fklocks-2.patch

application/octet-stream

Filename: fklocks-2.patch
Type: application/octet-stream
Part: 0
Message: Re: FOR KEY LOCK foreign keys
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
***************
*** 2450,2455 **** heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
--- 2450,2456 ----
  	HTSU_Result result;
  	TransactionId xid = GetCurrentTransactionId();
  	Bitmapset  *hot_attrs;
+ 	Bitmapset  *keylck_attrs;
  	ItemId		lp;
  	HeapTupleData oldtup;
  	HeapTuple	heaptup;
***************
*** 2466,2471 **** heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
--- 2467,2473 ----
  	bool		have_tuple_lock = false;
  	bool		iscombo;
  	bool		use_hot_update = false;
+ 	bool		keylocked_update = false;
  	bool		all_visible_cleared = false;
  	bool		all_visible_cleared_new = false;
  
***************
*** 2483,2489 **** heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
  	 * Note that we get a copy here, so we need not worry about relcache flush
  	 * happening midway through.
  	 */
! 	hot_attrs = RelationGetIndexAttrBitmap(relation);
  
  	block = ItemPointerGetBlockNumber(otid);
  	buffer = ReadBuffer(relation, block);
--- 2485,2492 ----
  	 * Note that we get a copy here, so we need not worry about relcache flush
  	 * happening midway through.
  	 */
! 	hot_attrs = RelationGetIndexAttrBitmap(relation, false);
! 	keylck_attrs = RelationGetIndexAttrBitmap(relation, true);
  
  	block = ItemPointerGetBlockNumber(otid);
  	buffer = ReadBuffer(relation, block);
***************
*** 2524,2614 **** l2:
  	}
  	else if (result == HeapTupleBeingUpdated && wait)
  	{
- 		TransactionId xwait;
  		uint16		infomask;
  
- 		/* must copy state data before unlocking buffer */
- 		xwait = HeapTupleHeaderGetXmax(oldtup.t_data);
  		infomask = oldtup.t_data->t_infomask;
  
- 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- 
  		/*
! 		 * Acquire tuple lock to establish our priority for the tuple (see
! 		 * heap_lock_tuple).  LockTuple will release us when we are
! 		 * next-in-line for the tuple.
! 		 *
! 		 * If we are forced to "start over" below, we keep the tuple lock;
! 		 * this arranges that we stay at the head of the line while rechecking
! 		 * tuple state.
  		 */
! 		if (!have_tuple_lock)
  		{
! 			LockTuple(relation, &(oldtup.t_self), ExclusiveLock);
! 			have_tuple_lock = true;
  		}
  
! 		/*
! 		 * Sleep until concurrent transaction ends.  Note that we don't care
! 		 * if the locker has an exclusive or shared lock, because we need
! 		 * exclusive.
! 		 */
! 
! 		if (infomask & HEAP_XMAX_IS_MULTI)
  		{
! 			/* wait for multixact */
! 			MultiXactIdWait((MultiXactId) xwait);
! 			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
  
  			/*
! 			 * If xwait had just locked the tuple then some other xact could
! 			 * update this tuple before we get to this point.  Check for xmax
! 			 * change, and start over if so.
  			 */
! 			if (!(oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
! 				!TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
! 									 xwait))
! 				goto l2;
  
  			/*
! 			 * You might think the multixact is necessarily done here, but not
! 			 * so: it could have surviving members, namely our own xact or
! 			 * other subxacts of this backend.	It is legal for us to update
! 			 * the tuple in either case, however (the latter case is
! 			 * essentially a situation of upgrading our former shared lock to
! 			 * exclusive).	We don't bother changing the on-disk hint bits
! 			 * since we are about to overwrite the xmax altogether.
  			 */
! 		}
! 		else
! 		{
! 			/* wait for regular transaction to end */
! 			XactLockTableWait(xwait);
! 			LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
  
  			/*
! 			 * xwait is done, but if xwait had just locked the tuple then some
! 			 * other xact could update this tuple before we get to this point.
! 			 * Check for xmax change, and start over if so.
  			 */
! 			if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
! 				!TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
! 									 xwait))
! 				goto l2;
! 
! 			/* Otherwise check if it committed or aborted */
! 			UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
  		}
- 
- 		/*
- 		 * We may overwrite if previous xmax aborted, or if it committed but
- 		 * only locked the tuple without updating it.
- 		 */
- 		if (oldtup.t_data->t_infomask & (HEAP_XMAX_INVALID |
- 										 HEAP_IS_LOCKED))
- 			result = HeapTupleMayBeUpdated;
- 		else
- 			result = HeapTupleUpdated;
  	}
  
  	if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
--- 2527,2636 ----
  	}
  	else if (result == HeapTupleBeingUpdated && wait)
  	{
  		uint16		infomask;
  
  		infomask = oldtup.t_data->t_infomask;
  
  		/*
! 		 * if it's only key-locked and we're not updating an indexed column,
! 		 * we can act though MayBeUpdated was returned, but the resulting tuple
! 		 * needs a bunch of fields copied from the original.
  		 */
! 		if ((infomask & HEAP_XMAX_KEY_LOCK) &&
! 			!(infomask & HEAP_XMAX_SHARED_LOCK) &&
! 			HeapSatisfiesHOTUpdate(relation, keylck_attrs,
! 								   &oldtup, newtup))
  		{
! 			result = HeapTupleMayBeUpdated;
! 			keylocked_update = true;
  		}
  
! 		if (!keylocked_update)
  		{
! 			TransactionId xwait;
! 
! 			/* must copy state data before unlocking buffer */
! 			xwait = HeapTupleHeaderGetXmax(oldtup.t_data);
! 
! 			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
  
  			/*
! 			 * Acquire tuple lock to establish our priority for the tuple (see
! 			 * heap_lock_tuple).  LockTuple will release us when we are
! 			 * next-in-line for the tuple.
! 			 *
! 			 * If we are forced to "start over" below, we keep the tuple lock;
! 			 * this arranges that we stay at the head of the line while rechecking
! 			 * tuple state.
  			 */
! 			if (!have_tuple_lock)
! 			{
! 				LockTuple(relation, &(oldtup.t_self), ExclusiveLock);
! 				have_tuple_lock = true;
! 			}
  
  			/*
! 			 * Sleep until concurrent transaction ends.  Note that we don't care
! 			 * if the locker has an exclusive or shared lock, because we need
! 			 * exclusive.
  			 */
! 
! 			if (infomask & HEAP_XMAX_IS_MULTI)
! 			{
! 				/* wait for multixact */
! 				MultiXactIdWait((MultiXactId) xwait);
! 				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
! 
! 				/*
! 				 * If xwait had just locked the tuple then some other xact could
! 				 * update this tuple before we get to this point.  Check for xmax
! 				 * change, and start over if so.
! 				 */
! 				if (!(oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
! 					!TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
! 										 xwait))
! 					goto l2;
! 
! 				/*
! 				 * You might think the multixact is necessarily done here, but not
! 				 * so: it could have surviving members, namely our own xact or
! 				 * other subxacts of this backend.	It is legal for us to update
! 				 * the tuple in either case, however (the latter case is
! 				 * essentially a situation of upgrading our former shared lock to
! 				 * exclusive).	We don't bother changing the on-disk hint bits
! 				 * since we are about to overwrite the xmax altogether.
! 				 */
! 			}
! 			else
! 			{
! 				/* wait for regular transaction to end */
! 				XactLockTableWait(xwait);
! 				LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
! 
! 				/*
! 				 * xwait is done, but if xwait had just locked the tuple then some
! 				 * other xact could update this tuple before we get to this point.
! 				 * Check for xmax change, and start over if so.
! 				 */
! 				if ((oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
! 					!TransactionIdEquals(HeapTupleHeaderGetXmax(oldtup.t_data),
! 										 xwait))
! 					goto l2;
! 
! 				/* Otherwise check if it committed or aborted */
! 				UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
! 			}
  
  			/*
! 			 * We may overwrite if previous xmax aborted, or if it committed but
! 			 * only locked the tuple without updating it.
  			 */
! 			if (oldtup.t_data->t_infomask & (HEAP_XMAX_INVALID |
! 											 HEAP_IS_LOCKED))
! 				result = HeapTupleMayBeUpdated;
! 			else
! 				result = HeapTupleUpdated;
  		}
  	}
  
  	if (crosscheck != InvalidSnapshot && result == HeapTupleMayBeUpdated)
***************
*** 2632,2637 **** l2:
--- 2654,2660 ----
  		if (vmbuffer != InvalidBuffer)
  			ReleaseBuffer(vmbuffer);
  		bms_free(hot_attrs);
+ 		bms_free(keylck_attrs);
  		return result;
  	}
  
***************
*** 2670,2682 **** l2:
  		Assert(!(newtup->t_data->t_infomask & HEAP_HASOID));
  	}
  
  	newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
  	newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
! 	newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
  	HeapTupleHeaderSetXmin(newtup->t_data, xid);
  	HeapTupleHeaderSetCmin(newtup->t_data, cid);
- 	HeapTupleHeaderSetXmax(newtup->t_data, 0);	/* for cleanliness */
  	newtup->t_tableOid = RelationGetRelid(relation);
  
  	/*
  	 * Replace cid with a combo cid if necessary.  Note that we already put
--- 2693,2721 ----
  		Assert(!(newtup->t_data->t_infomask & HEAP_HASOID));
  	}
  
+ 	/*
+ 	 * Prepare the new tuple with the appropriate initial values of Xmin and
+ 	 * Xmax, as well as initial infomask bits.
+ 	 */
  	newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
  	newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
! 	newtup->t_data->t_infomask |= HEAP_UPDATED;
  	HeapTupleHeaderSetXmin(newtup->t_data, xid);
  	HeapTupleHeaderSetCmin(newtup->t_data, cid);
  	newtup->t_tableOid = RelationGetRelid(relation);
+ 	if (keylocked_update)
+ 	{
+ 		HeapTupleHeaderSetXmax(newtup->t_data,
+ 							   HeapTupleHeaderGetXmax(oldtup.t_data));
+ 		newtup->t_data->t_infomask |= (oldtup.t_data->t_infomask & 
+ 									   (HEAP_XMAX_IS_MULTI |
+ 										HEAP_XMAX_KEY_LOCK));
+ 	}
+ 	else
+ 	{
+ 		newtup->t_data->t_infomask |= HEAP_XMAX_INVALID;
+ 		HeapTupleHeaderSetXmax(newtup->t_data, 0);	/* for cleanliness */
+ 	}
  
  	/*
  	 * Replace cid with a combo cid if necessary.  Note that we already put
***************
*** 2971,2976 **** l2:
--- 3010,3016 ----
  	}
  
  	bms_free(hot_attrs);
+ 	bms_free(keylck_attrs);
  
  	return HeapTupleMayBeUpdated;
  }
***************
*** 3203,3209 **** heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer,
  	LOCKMODE	tuple_lock_type;
  	bool		have_tuple_lock = false;
  
! 	tuple_lock_type = (mode == LockTupleShared) ? ShareLock : ExclusiveLock;
  
  	*buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
  	LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
--- 3243,3261 ----
  	LOCKMODE	tuple_lock_type;
  	bool		have_tuple_lock = false;
  
! 	switch (mode)
! 	{
! 		case LockTupleShared:
! 		case LockTupleKeylock:
! 			tuple_lock_type = ShareLock;
! 			break;
! 		case LockTupleExclusive:
! 			tuple_lock_type = ExclusiveLock;
! 			break;
! 		default:
! 			elog(ERROR, "invalid tuple lock mode");
! 			tuple_lock_type = 0;	/* keep compiler quiet */
! 	}
  
  	*buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
  	LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
***************
*** 3242,3253 **** l3:
  		 * already.  We *must* succeed without trying to take the tuple lock,
  		 * else we will deadlock against anyone waiting to acquire exclusive
  		 * lock.  We don't need to make any state changes in this case.
  		 */
! 		if (mode == LockTupleShared &&
! 			(infomask & HEAP_XMAX_IS_MULTI) &&
  			MultiXactIdIsCurrent((MultiXactId) xwait))
  		{
- 			Assert(infomask & HEAP_XMAX_SHARED_LOCK);
  			/* Probably can't hold tuple lock here, but may as well check */
  			if (have_tuple_lock)
  				UnlockTuple(relation, tid, tuple_lock_type);
--- 3294,3312 ----
  		 * already.  We *must* succeed without trying to take the tuple lock,
  		 * else we will deadlock against anyone waiting to acquire exclusive
  		 * lock.  We don't need to make any state changes in this case.
+ 		 *
+ 		 * Likewise, if we wish to acquire a key lock, and the tuple is already
+ 		 * share- or key-locked by us, we effectively hold the lock already.
+ 		 *
+ 		 * Note we cannot do this if we're asking for share lock and the tuple
+ 		 * is only key-locked.
  		 */
! 		if ((infomask & HEAP_XMAX_IS_MULTI) &&
! 			(((mode == LockTupleShared) && (infomask & HEAP_XMAX_SHARED_LOCK)) ||
! 			 ((mode == LockTupleKeylock) &&
! 			  (infomask & (HEAP_XMAX_SHARED_LOCK | HEAP_XMAX_KEY_LOCK)))) &&
  			MultiXactIdIsCurrent((MultiXactId) xwait))
  		{
  			/* Probably can't hold tuple lock here, but may as well check */
  			if (have_tuple_lock)
  				UnlockTuple(relation, tid, tuple_lock_type);
***************
*** 3293,3298 **** l3:
--- 3352,3372 ----
  			if (!(tuple->t_data->t_infomask & HEAP_XMAX_SHARED_LOCK))
  				goto l3;
  		}
+ 		else if (mode == LockTupleKeylock &&
+ 				 (infomask & (HEAP_XMAX_SHARED_LOCK | HEAP_XMAX_KEY_LOCK)))
+ 		{
+ 			/*
+ 			 * As above: acquiring keylock when there's at least one share- or
+ 			 * key-locker already.  We need not wait for him/them to complete.
+ 			 */
+ 			LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 			/*
+ 			 * Make sure it's still an appropriate lock, else start over.
+ 			 */
+ 			if (!(tuple->t_data->t_infomask & (HEAP_XMAX_SHARED_LOCK | HEAP_XMAX_KEY_LOCK)))
+ 				goto l3;
+ 		}
  		else if (infomask & HEAP_XMAX_IS_MULTI)
  		{
  			/* wait for multixact to end */
***************
*** 3400,3407 **** l3:
  	if (!(old_infomask & (HEAP_XMAX_INVALID |
  						  HEAP_XMAX_COMMITTED |
  						  HEAP_XMAX_IS_MULTI)) &&
! 		(mode == LockTupleShared ?
  		 (old_infomask & HEAP_IS_LOCKED) :
  		 (old_infomask & HEAP_XMAX_EXCL_LOCK)) &&
  		TransactionIdIsCurrentTransactionId(xmax))
  	{
--- 3474,3483 ----
  	if (!(old_infomask & (HEAP_XMAX_INVALID |
  						  HEAP_XMAX_COMMITTED |
  						  HEAP_XMAX_IS_MULTI)) &&
! 		(mode == LockTupleKeylock ?
  		 (old_infomask & HEAP_IS_LOCKED) :
+ 		 mode == LockTupleShared ?
+ 		 (old_infomask & (HEAP_XMAX_SHARED_LOCK | HEAP_XMAX_EXCL_LOCK)) :
  		 (old_infomask & HEAP_XMAX_EXCL_LOCK)) &&
  		TransactionIdIsCurrentTransactionId(xmax))
  	{
***************
*** 3425,3434 **** l3:
  									HEAP_IS_LOCKED |
  									HEAP_MOVED);
  
! 	if (mode == LockTupleShared)
  	{
  		/*
! 		 * If this is the first acquisition of a shared lock in the current
  		 * transaction, set my per-backend OldestMemberMXactId setting. We can
  		 * be certain that the transaction will never become a member of any
  		 * older MultiXactIds than that.  (We have to do this even if we end
--- 3501,3510 ----
  									HEAP_IS_LOCKED |
  									HEAP_MOVED);
  
! 	if (mode == LockTupleShared || mode == LockTupleKeylock)
  	{
  		/*
! 		 * If this is the first acquisition of a keylock or shared lock in the current
  		 * transaction, set my per-backend OldestMemberMXactId setting. We can
  		 * be certain that the transaction will never become a member of any
  		 * older MultiXactIds than that.  (We have to do this even if we end
***************
*** 3437,3443 **** l3:
  		 */
  		MultiXactIdSetOldestMember();
  
! 		new_infomask |= HEAP_XMAX_SHARED_LOCK;
  
  		/*
  		 * Check to see if we need a MultiXactId because there are multiple
--- 3513,3520 ----
  		 */
  		MultiXactIdSetOldestMember();
  
! 		new_infomask |= mode == LockTupleShared ? HEAP_XMAX_SHARED_LOCK :
! 			HEAP_XMAX_KEY_LOCK;
  
  		/*
  		 * Check to see if we need a MultiXactId because there are multiple
***************
*** 3537,3543 **** l3:
  		xlrec.target.tid = tuple->t_self;
  		xlrec.locking_xid = xid;
  		xlrec.xid_is_mxact = ((new_infomask & HEAP_XMAX_IS_MULTI) != 0);
! 		xlrec.shared_lock = (mode == LockTupleShared);
  		rdata[0].data = (char *) &xlrec;
  		rdata[0].len = SizeOfHeapLock;
  		rdata[0].buffer = InvalidBuffer;
--- 3614,3620 ----
  		xlrec.target.tid = tuple->t_self;
  		xlrec.locking_xid = xid;
  		xlrec.xid_is_mxact = ((new_infomask & HEAP_XMAX_IS_MULTI) != 0);
! 		xlrec.lock_strength = mode;
  		rdata[0].data = (char *) &xlrec;
  		rdata[0].len = SizeOfHeapLock;
  		rdata[0].buffer = InvalidBuffer;
***************
*** 4987,4996 **** heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
  						  HEAP_MOVED);
  	if (xlrec->xid_is_mxact)
  		htup->t_infomask |= HEAP_XMAX_IS_MULTI;
! 	if (xlrec->shared_lock)
  		htup->t_infomask |= HEAP_XMAX_SHARED_LOCK;
  	else
  		htup->t_infomask |= HEAP_XMAX_EXCL_LOCK;
  	HeapTupleHeaderClearHotUpdated(htup);
  	HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
  	HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
--- 5064,5078 ----
  						  HEAP_MOVED);
  	if (xlrec->xid_is_mxact)
  		htup->t_infomask |= HEAP_XMAX_IS_MULTI;
! 	if (xlrec->lock_strength == LockTupleShared)
  		htup->t_infomask |= HEAP_XMAX_SHARED_LOCK;
+ 	else if (xlrec->lock_strength == LockTupleKeylock)
+ 		htup->t_infomask |= HEAP_XMAX_KEY_LOCK;
  	else
+ 	{
+ 		Assert(xlrec->lock_strength == LockTupleExclusive);
  		htup->t_infomask |= HEAP_XMAX_EXCL_LOCK;
+ 	}
  	HeapTupleHeaderClearHotUpdated(htup);
  	HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
  	HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
***************
*** 5194,5203 **** heap_desc(StringInfo buf, uint8 xl_info, char *rec)
  	{
  		xl_heap_lock *xlrec = (xl_heap_lock *) rec;
  
! 		if (xlrec->shared_lock)
  			appendStringInfo(buf, "shared_lock: ");
! 		else
  			appendStringInfo(buf, "exclusive_lock: ");
  		if (xlrec->xid_is_mxact)
  			appendStringInfo(buf, "mxid ");
  		else
--- 5276,5289 ----
  	{
  		xl_heap_lock *xlrec = (xl_heap_lock *) rec;
  
! 		if (xlrec->lock_strength == LockTupleShared)
  			appendStringInfo(buf, "shared_lock: ");
! 		else if (xlrec->lock_strength == LockTupleKeylock)
! 			appendStringInfo(buf, "key_lock: ");
! 		else if (xlrec->lock_strength == LockTupleExclusive)
  			appendStringInfo(buf, "exclusive_lock: ");
+ 		else
+ 			appendStringInfo(buf, "unknown_type_lock: ");
  		if (xlrec->xid_is_mxact)
  			appendStringInfo(buf, "mxid ");
  		else
*** a/src/backend/catalog/index.c
--- b/src/backend/catalog/index.c
***************
*** 2986,2992 **** reindex_relation(Oid relid, int flags)
  
  	/* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
  	if (is_pg_class)
! 		(void) RelationGetIndexAttrBitmap(rel);
  
  	PG_TRY();
  	{
--- 2986,2992 ----
  
  	/* Ensure rd_indexattr is valid; see comments for RelationSetIndexList */
  	if (is_pg_class)
! 		(void) RelationGetIndexAttrBitmap(rel, false);
  
  	PG_TRY();
  	{
*** a/src/backend/executor/execMain.c
--- b/src/backend/executor/execMain.c
***************
*** 801,807 **** InitPlan(QueryDesc *queryDesc, int eflags)
  	}
  
  	/*
! 	 * Similarly, we have to lock relations selected FOR UPDATE/FOR SHARE
  	 * before we initialize the plan tree, else we'd be risking lock upgrades.
  	 * While we are at it, build the ExecRowMark list.
  	 */
--- 801,807 ----
  	}
  
  	/*
! 	 * Similarly, we have to lock relations selected FOR UPDATE/FOR SHARE/KEY LOCK
  	 * before we initialize the plan tree, else we'd be risking lock upgrades.
  	 * While we are at it, build the ExecRowMark list.
  	 */
***************
*** 821,826 **** InitPlan(QueryDesc *queryDesc, int eflags)
--- 821,827 ----
  		{
  			case ROW_MARK_EXCLUSIVE:
  			case ROW_MARK_SHARE:
+ 			case ROW_MARK_KEYLOCK:
  				relid = getrelid(rc->rti, rangeTable);
  				relation = heap_open(relid, RowShareLock);
  				break;
*** a/src/backend/executor/nodeLockRows.c
--- b/src/backend/executor/nodeLockRows.c
***************
*** 111,120 **** lnext:
  		tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  		/* okay, try to lock the tuple */
! 		if (erm->markType == ROW_MARK_EXCLUSIVE)
! 			lockmode = LockTupleExclusive;
! 		else
! 			lockmode = LockTupleShared;
  
  		test = heap_lock_tuple(erm->relation, &tuple, &buffer,
  							   &update_ctid, &update_xmax,
--- 111,132 ----
  		tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
  
  		/* okay, try to lock the tuple */
! 		switch (erm->markType)
! 		{
! 			case ROW_MARK_EXCLUSIVE:
! 				lockmode = LockTupleExclusive;
! 				break;
! 			case ROW_MARK_SHARE:
! 				lockmode = LockTupleShared;
! 				break;
! 			case ROW_MARK_KEYLOCK:
! 				lockmode = LockTupleKeylock;
! 				break;
! 			default:
! 				elog(ERROR, "unsupported rowmark type");
! 				lockmode = LockTupleExclusive;	/* keep compiler quiet */
! 				break;
! 		}
  
  		test = heap_lock_tuple(erm->relation, &tuple, &buffer,
  							   &update_ctid, &update_xmax,
*** a/src/backend/nodes/copyfuncs.c
--- b/src/backend/nodes/copyfuncs.c
***************
*** 2008,2014 **** _copyRowMarkClause(RowMarkClause *from)
  	RowMarkClause *newnode = makeNode(RowMarkClause);
  
  	COPY_SCALAR_FIELD(rti);
! 	COPY_SCALAR_FIELD(forUpdate);
  	COPY_SCALAR_FIELD(noWait);
  	COPY_SCALAR_FIELD(pushedDown);
  
--- 2008,2014 ----
  	RowMarkClause *newnode = makeNode(RowMarkClause);
  
  	COPY_SCALAR_FIELD(rti);
! 	COPY_SCALAR_FIELD(strength);
  	COPY_SCALAR_FIELD(noWait);
  	COPY_SCALAR_FIELD(pushedDown);
  
***************
*** 2366,2372 **** _copyLockingClause(LockingClause *from)
  	LockingClause *newnode = makeNode(LockingClause);
  
  	COPY_NODE_FIELD(lockedRels);
! 	COPY_SCALAR_FIELD(forUpdate);
  	COPY_SCALAR_FIELD(noWait);
  
  	return newnode;
--- 2366,2372 ----
  	LockingClause *newnode = makeNode(LockingClause);
  
  	COPY_NODE_FIELD(lockedRels);
! 	COPY_SCALAR_FIELD(strength);
  	COPY_SCALAR_FIELD(noWait);
  
  	return newnode;
*** a/src/backend/nodes/equalfuncs.c
--- b/src/backend/nodes/equalfuncs.c
***************
*** 2291,2297 **** static bool
  _equalLockingClause(LockingClause *a, LockingClause *b)
  {
  	COMPARE_NODE_FIELD(lockedRels);
! 	COMPARE_SCALAR_FIELD(forUpdate);
  	COMPARE_SCALAR_FIELD(noWait);
  
  	return true;
--- 2291,2297 ----
  _equalLockingClause(LockingClause *a, LockingClause *b)
  {
  	COMPARE_NODE_FIELD(lockedRels);
! 	COMPARE_SCALAR_FIELD(strength);
  	COMPARE_SCALAR_FIELD(noWait);
  
  	return true;
***************
*** 2362,2368 **** static bool
  _equalRowMarkClause(RowMarkClause *a, RowMarkClause *b)
  {
  	COMPARE_SCALAR_FIELD(rti);
! 	COMPARE_SCALAR_FIELD(forUpdate);
  	COMPARE_SCALAR_FIELD(noWait);
  	COMPARE_SCALAR_FIELD(pushedDown);
  
--- 2362,2368 ----
  _equalRowMarkClause(RowMarkClause *a, RowMarkClause *b)
  {
  	COMPARE_SCALAR_FIELD(rti);
! 	COMPARE_SCALAR_FIELD(strength);
  	COMPARE_SCALAR_FIELD(noWait);
  	COMPARE_SCALAR_FIELD(pushedDown);
  
*** a/src/backend/nodes/outfuncs.c
--- b/src/backend/nodes/outfuncs.c
***************
*** 2070,2076 **** _outLockingClause(StringInfo str, LockingClause *node)
  	WRITE_NODE_TYPE("LOCKINGCLAUSE");
  
  	WRITE_NODE_FIELD(lockedRels);
! 	WRITE_BOOL_FIELD(forUpdate);
  	WRITE_BOOL_FIELD(noWait);
  }
  
--- 2070,2076 ----
  	WRITE_NODE_TYPE("LOCKINGCLAUSE");
  
  	WRITE_NODE_FIELD(lockedRels);
! 	WRITE_ENUM_FIELD(strength, LockClauseStrength);
  	WRITE_BOOL_FIELD(noWait);
  }
  
***************
*** 2247,2253 **** _outRowMarkClause(StringInfo str, RowMarkClause *node)
  	WRITE_NODE_TYPE("ROWMARKCLAUSE");
  
  	WRITE_UINT_FIELD(rti);
! 	WRITE_BOOL_FIELD(forUpdate);
  	WRITE_BOOL_FIELD(noWait);
  	WRITE_BOOL_FIELD(pushedDown);
  }
--- 2247,2253 ----
  	WRITE_NODE_TYPE("ROWMARKCLAUSE");
  
  	WRITE_UINT_FIELD(rti);
! 	WRITE_ENUM_FIELD(strength, LockClauseStrength);
  	WRITE_BOOL_FIELD(noWait);
  	WRITE_BOOL_FIELD(pushedDown);
  }
*** a/src/backend/nodes/readfuncs.c
--- b/src/backend/nodes/readfuncs.c
***************
*** 301,307 **** _readRowMarkClause(void)
  	READ_LOCALS(RowMarkClause);
  
  	READ_UINT_FIELD(rti);
! 	READ_BOOL_FIELD(forUpdate);
  	READ_BOOL_FIELD(noWait);
  	READ_BOOL_FIELD(pushedDown);
  
--- 301,307 ----
  	READ_LOCALS(RowMarkClause);
  
  	READ_UINT_FIELD(rti);
! 	READ_ENUM_FIELD(strength, LockClauseStrength);
  	READ_BOOL_FIELD(noWait);
  	READ_BOOL_FIELD(pushedDown);
  
*** a/src/backend/optimizer/plan/initsplan.c
--- b/src/backend/optimizer/plan/initsplan.c
***************
*** 563,573 **** make_outerjoininfo(PlannerInfo *root,
  	Assert(jointype != JOIN_RIGHT);
  
  	/*
! 	 * Presently the executor cannot support FOR UPDATE/SHARE marking of rels
  	 * appearing on the nullable side of an outer join. (It's somewhat unclear
  	 * what that would mean, anyway: what should we mark when a result row is
  	 * generated from no element of the nullable relation?)  So, complain if
! 	 * any nullable rel is FOR UPDATE/SHARE.
  	 *
  	 * You might be wondering why this test isn't made far upstream in the
  	 * parser.	It's because the parser hasn't got enough info --- consider
--- 563,573 ----
  	Assert(jointype != JOIN_RIGHT);
  
  	/*
! 	 * Presently the executor cannot support FOR UPDATE/SHARE/KEY LOCK marking of rels
  	 * appearing on the nullable side of an outer join. (It's somewhat unclear
  	 * what that would mean, anyway: what should we mark when a result row is
  	 * generated from no element of the nullable relation?)  So, complain if
! 	 * any nullable rel is FOR UPDATE/SHARE/KEY LOCK.
  	 *
  	 * You might be wondering why this test isn't made far upstream in the
  	 * parser.	It's because the parser hasn't got enough info --- consider
***************
*** 585,591 **** make_outerjoininfo(PlannerInfo *root,
  			(jointype == JOIN_FULL && bms_is_member(rc->rti, left_rels)))
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 					 errmsg("SELECT FOR UPDATE/SHARE cannot be applied to the nullable side of an outer join")));
  	}
  
  	sjinfo->syn_lefthand = left_rels;
--- 585,591 ----
  			(jointype == JOIN_FULL && bms_is_member(rc->rti, left_rels)))
  			ereport(ERROR,
  					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 					 errmsg("SELECT FOR UPDATE/SHARE/KEY LOCK cannot be applied to the nullable side of an outer join")));
  	}
  
  	sjinfo->syn_lefthand = left_rels;
*** a/src/backend/optimizer/plan/planner.c
--- b/src/backend/optimizer/plan/planner.c
***************
*** 1837,1843 **** preprocess_rowmarks(PlannerInfo *root)
  	if (parse->rowMarks)
  	{
  		/*
! 		 * We've got trouble if FOR UPDATE/SHARE appears inside grouping,
  		 * since grouping renders a reference to individual tuple CTIDs
  		 * invalid.  This is also checked at parse time, but that's
  		 * insufficient because of rule substitution, query pullup, etc.
--- 1837,1843 ----
  	if (parse->rowMarks)
  	{
  		/*
! 		 * We've got trouble if FOR UPDATE/SHARE/KEY LOCK appears inside grouping,
  		 * since grouping renders a reference to individual tuple CTIDs
  		 * invalid.  This is also checked at parse time, but that's
  		 * insufficient because of rule substitution, query pullup, etc.
***************
*** 1847,1853 **** preprocess_rowmarks(PlannerInfo *root)
  	else
  	{
  		/*
! 		 * We only need rowmarks for UPDATE, DELETE, or FOR UPDATE/SHARE.
  		 */
  		if (parse->commandType != CMD_UPDATE &&
  			parse->commandType != CMD_DELETE)
--- 1847,1853 ----
  	else
  	{
  		/*
! 		 * We only need rowmarks for UPDATE, DELETE, or FOR UPDATE/SHARE/KEY LOCK.
  		 */
  		if (parse->commandType != CMD_UPDATE &&
  			parse->commandType != CMD_DELETE)
***************
*** 1857,1863 **** preprocess_rowmarks(PlannerInfo *root)
  	/*
  	 * We need to have rowmarks for all base relations except the target. We
  	 * make a bitmapset of all base rels and then remove the items we don't
! 	 * need or have FOR UPDATE/SHARE marks for.
  	 */
  	rels = get_base_rel_indexes((Node *) parse->jointree);
  	if (parse->resultRelation)
--- 1857,1863 ----
  	/*
  	 * We need to have rowmarks for all base relations except the target. We
  	 * make a bitmapset of all base rels and then remove the items we don't
! 	 * need or have FOR UPDATE/SHARE/KEY LOCK marks for.
  	 */
  	rels = get_base_rel_indexes((Node *) parse->jointree);
  	if (parse->resultRelation)
***************
*** 1894,1903 **** preprocess_rowmarks(PlannerInfo *root)
  		newrc = makeNode(PlanRowMark);
  		newrc->rti = newrc->prti = rc->rti;
  		newrc->rowmarkId = ++(root->glob->lastRowMarkId);
! 		if (rc->forUpdate)
! 			newrc->markType = ROW_MARK_EXCLUSIVE;
! 		else
! 			newrc->markType = ROW_MARK_SHARE;
  		newrc->noWait = rc->noWait;
  		newrc->isParent = false;
  
--- 1894,1913 ----
  		newrc = makeNode(PlanRowMark);
  		newrc->rti = newrc->prti = rc->rti;
  		newrc->rowmarkId = ++(root->glob->lastRowMarkId);
! 		switch (rc->strength)
! 		{
! 			case LCS_FORUPDATE:
! 				newrc->markType = ROW_MARK_EXCLUSIVE;
! 				break;
! 			case LCS_FORSHARE:
! 				newrc->markType = ROW_MARK_SHARE;
! 				break;
! 			case LCS_FORKEYLOCK:
! 				newrc->markType = ROW_MARK_KEYLOCK;
! 				break;
! 			default:
! 				elog(ERROR, "unsupported rowmark type %d", rc->strength);
! 		}
  		newrc->noWait = rc->noWait;
  		newrc->isParent = false;
  
*** a/src/backend/parser/analyze.c
--- b/src/backend/parser/analyze.c
***************
*** 2310,2316 **** transformLockingClause(ParseState *pstate, Query *qry, LockingClause *lc,
  	/* make a clause we can pass down to subqueries to select all rels */
  	allrels = makeNode(LockingClause);
  	allrels->lockedRels = NIL;	/* indicates all rels */
! 	allrels->forUpdate = lc->forUpdate;
  	allrels->noWait = lc->noWait;
  
  	if (lockedRels == NIL)
--- 2310,2316 ----
  	/* make a clause we can pass down to subqueries to select all rels */
  	allrels = makeNode(LockingClause);
  	allrels->lockedRels = NIL;	/* indicates all rels */
! 	allrels->strength = lc->strength;
  	allrels->noWait = lc->noWait;
  
  	if (lockedRels == NIL)
***************
*** 2329,2340 **** transformLockingClause(ParseState *pstate, Query *qry, LockingClause *lc,
  					if (rte->relkind == RELKIND_FOREIGN_TABLE)
  						break;
  					applyLockingClause(qry, i,
! 									   lc->forUpdate, lc->noWait, pushedDown);
  					rte->requiredPerms |= ACL_SELECT_FOR_UPDATE;
  					break;
  				case RTE_SUBQUERY:
  					applyLockingClause(qry, i,
! 									   lc->forUpdate, lc->noWait, pushedDown);
  
  					/*
  					 * FOR UPDATE/SHARE of subquery is propagated to all of
--- 2329,2340 ----
  					if (rte->relkind == RELKIND_FOREIGN_TABLE)
  						break;
  					applyLockingClause(qry, i,
! 									   lc->strength, lc->noWait, pushedDown);
  					rte->requiredPerms |= ACL_SELECT_FOR_UPDATE;
  					break;
  				case RTE_SUBQUERY:
  					applyLockingClause(qry, i,
! 									   lc->strength, lc->noWait, pushedDown);
  
  					/*
  					 * FOR UPDATE/SHARE of subquery is propagated to all of
***************
*** 2384,2396 **** transformLockingClause(ParseState *pstate, Query *qry, LockingClause *lc,
  											 rte->eref->aliasname),
  									  parser_errposition(pstate, thisrel->location)));
  							applyLockingClause(qry, i,
! 											   lc->forUpdate, lc->noWait,
  											   pushedDown);
  							rte->requiredPerms |= ACL_SELECT_FOR_UPDATE;
  							break;
  						case RTE_SUBQUERY:
  							applyLockingClause(qry, i,
! 											   lc->forUpdate, lc->noWait,
  											   pushedDown);
  							/* see comment above */
  							transformLockingClause(pstate, rte->subquery,
--- 2384,2396 ----
  											 rte->eref->aliasname),
  									  parser_errposition(pstate, thisrel->location)));
  							applyLockingClause(qry, i,
! 											   lc->strength, lc->noWait,
  											   pushedDown);
  							rte->requiredPerms |= ACL_SELECT_FOR_UPDATE;
  							break;
  						case RTE_SUBQUERY:
  							applyLockingClause(qry, i,
! 											   lc->strength, lc->noWait,
  											   pushedDown);
  							/* see comment above */
  							transformLockingClause(pstate, rte->subquery,
***************
*** 2443,2449 **** transformLockingClause(ParseState *pstate, Query *qry, LockingClause *lc,
   */
  void
  applyLockingClause(Query *qry, Index rtindex,
! 				   bool forUpdate, bool noWait, bool pushedDown)
  {
  	RowMarkClause *rc;
  
--- 2443,2449 ----
   */
  void
  applyLockingClause(Query *qry, Index rtindex,
! 				   LockClauseStrength strength, bool noWait, bool pushedDown)
  {
  	RowMarkClause *rc;
  
***************
*** 2455,2464 **** applyLockingClause(Query *qry, Index rtindex,
  	if ((rc = get_parse_rowmark(qry, rtindex)) != NULL)
  	{
  		/*
! 		 * If the same RTE is specified both FOR UPDATE and FOR SHARE, treat
! 		 * it as FOR UPDATE.  (Reasonable, since you can't take both a shared
! 		 * and exclusive lock at the same time; it'll end up being exclusive
! 		 * anyway.)
  		 *
  		 * We also consider that NOWAIT wins if it's specified both ways. This
  		 * is a bit more debatable but raising an error doesn't seem helpful.
--- 2455,2464 ----
  	if ((rc = get_parse_rowmark(qry, rtindex)) != NULL)
  	{
  		/*
! 		 * If the same RTE is specified for more than one locking strength,
! 		 * treat is as the strongest.  (Reasonable, since you can't take both a
! 		 * shared and exclusive lock at the same time; it'll end up being
! 		 * exclusive anyway.)
  		 *
  		 * We also consider that NOWAIT wins if it's specified both ways. This
  		 * is a bit more debatable but raising an error doesn't seem helpful.
***************
*** 2467,2473 **** applyLockingClause(Query *qry, Index rtindex,
  		 *
  		 * And of course pushedDown becomes false if any clause is explicit.
  		 */
! 		rc->forUpdate |= forUpdate;
  		rc->noWait |= noWait;
  		rc->pushedDown &= pushedDown;
  		return;
--- 2467,2473 ----
  		 *
  		 * And of course pushedDown becomes false if any clause is explicit.
  		 */
! 		rc->strength = Max(rc->strength, strength);
  		rc->noWait |= noWait;
  		rc->pushedDown &= pushedDown;
  		return;
***************
*** 2476,2482 **** applyLockingClause(Query *qry, Index rtindex,
  	/* Make a new RowMarkClause */
  	rc = makeNode(RowMarkClause);
  	rc->rti = rtindex;
! 	rc->forUpdate = forUpdate;
  	rc->noWait = noWait;
  	rc->pushedDown = pushedDown;
  	qry->rowMarks = lappend(qry->rowMarks, rc);
--- 2476,2482 ----
  	/* Make a new RowMarkClause */
  	rc = makeNode(RowMarkClause);
  	rc->rti = rtindex;
! 	rc->strength = strength;
  	rc->noWait = noWait;
  	rc->pushedDown = pushedDown;
  	qry->rowMarks = lappend(qry->rowMarks, rc);
*** a/src/backend/parser/gram.y
--- b/src/backend/parser/gram.y
***************
*** 8760,8766 **** for_locking_item:
  				{
  					LockingClause *n = makeNode(LockingClause);
  					n->lockedRels = $3;
! 					n->forUpdate = TRUE;
  					n->noWait = $4;
  					$$ = (Node *) n;
  				}
--- 8760,8766 ----
  				{
  					LockingClause *n = makeNode(LockingClause);
  					n->lockedRels = $3;
! 					n->strength = LCS_FORUPDATE;
  					n->noWait = $4;
  					$$ = (Node *) n;
  				}
***************
*** 8768,8777 **** for_locking_item:
  				{
  					LockingClause *n = makeNode(LockingClause);
  					n->lockedRels = $3;
! 					n->forUpdate = FALSE;
  					n->noWait = $4;
  					$$ = (Node *) n;
  				}
  		;
  
  locked_rels_list:
--- 8768,8785 ----
  				{
  					LockingClause *n = makeNode(LockingClause);
  					n->lockedRels = $3;
! 					n->strength = LCS_FORSHARE;
  					n->noWait = $4;
  					$$ = (Node *) n;
  				}
+ 			| FOR KEY LOCK_P locked_rels_list opt_nowait
+ 				{
+ 					LockingClause *n = makeNode(LockingClause);
+ 					n->lockedRels = $4;
+ 					n->strength = LCS_FORKEYLOCK;
+ 					n->noWait = $5;
+ 					$$ = (Node *) n;
+ 				}
  		;
  
  locked_rels_list:
*** a/src/backend/rewrite/rewriteHandler.c
--- b/src/backend/rewrite/rewriteHandler.c
***************
*** 56,62 **** static void rewriteValuesRTE(RangeTblEntry *rte, Relation target_relation,
  static void rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte,
  					Relation target_relation);
  static void markQueryForLocking(Query *qry, Node *jtnode,
! 					bool forUpdate, bool noWait, bool pushedDown);
  static List *matchLocks(CmdType event, RuleLock *rulelocks,
  		   int varno, Query *parsetree);
  static Query *fireRIRrules(Query *parsetree, List *activeRIRs,
--- 56,62 ----
  static void rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte,
  					Relation target_relation);
  static void markQueryForLocking(Query *qry, Node *jtnode,
! 					LockClauseStrength strength, bool noWait, bool pushedDown);
  static List *matchLocks(CmdType event, RuleLock *rulelocks,
  		   int varno, Query *parsetree);
  static Query *fireRIRrules(Query *parsetree, List *activeRIRs,
***************
*** 1402,1409 **** ApplyRetrieveRule(Query *parsetree,
  	rte->modifiedCols = NULL;
  
  	/*
! 	 * If FOR UPDATE/SHARE of view, mark all the contained tables as implicit
! 	 * FOR UPDATE/SHARE, the same as the parser would have done if the view's
  	 * subquery had been written out explicitly.
  	 *
  	 * Note: we don't consider forUpdatePushedDown here; such marks will be
--- 1402,1409 ----
  	rte->modifiedCols = NULL;
  
  	/*
! 	 * If FOR UPDATE/SHARE/KEY LOCK of view, mark all the contained tables as implicit
! 	 * FOR UPDATE/SHARE/KEY LOCK, the same as the parser would have done if the view's
  	 * subquery had been written out explicitly.
  	 *
  	 * Note: we don't consider forUpdatePushedDown here; such marks will be
***************
*** 1411,1423 **** ApplyRetrieveRule(Query *parsetree,
  	 */
  	if (rc != NULL)
  		markQueryForLocking(rule_action, (Node *) rule_action->jointree,
! 							rc->forUpdate, rc->noWait, true);
  
  	return parsetree;
  }
  
  /*
!  * Recursively mark all relations used by a view as FOR UPDATE/SHARE.
   *
   * This may generate an invalid query, eg if some sub-query uses an
   * aggregate.  We leave it to the planner to detect that.
--- 1411,1423 ----
  	 */
  	if (rc != NULL)
  		markQueryForLocking(rule_action, (Node *) rule_action->jointree,
! 							rc->strength, rc->noWait, true);
  
  	return parsetree;
  }
  
  /*
!  * Recursively mark all relations used by a view as FOR UPDATE/SHARE/KEY LOCK.
   *
   * This may generate an invalid query, eg if some sub-query uses an
   * aggregate.  We leave it to the planner to detect that.
***************
*** 1429,1435 **** ApplyRetrieveRule(Query *parsetree,
   */
  static void
  markQueryForLocking(Query *qry, Node *jtnode,
! 					bool forUpdate, bool noWait, bool pushedDown)
  {
  	if (jtnode == NULL)
  		return;
--- 1429,1435 ----
   */
  static void
  markQueryForLocking(Query *qry, Node *jtnode,
! 					LockClauseStrength strength, bool noWait, bool pushedDown)
  {
  	if (jtnode == NULL)
  		return;
***************
*** 1443,1458 **** markQueryForLocking(Query *qry, Node *jtnode,
  			/* ignore foreign tables */
  			if (rte->relkind != RELKIND_FOREIGN_TABLE)
  			{
! 				applyLockingClause(qry, rti, forUpdate, noWait, pushedDown);
  				rte->requiredPerms |= ACL_SELECT_FOR_UPDATE;
  			}
  		}
  		else if (rte->rtekind == RTE_SUBQUERY)
  		{
! 			applyLockingClause(qry, rti, forUpdate, noWait, pushedDown);
! 			/* FOR UPDATE/SHARE of subquery is propagated to subquery's rels */
  			markQueryForLocking(rte->subquery, (Node *) rte->subquery->jointree,
! 								forUpdate, noWait, true);
  		}
  		/* other RTE types are unaffected by FOR UPDATE */
  	}
--- 1443,1458 ----
  			/* ignore foreign tables */
  			if (rte->relkind != RELKIND_FOREIGN_TABLE)
  			{
! 				applyLockingClause(qry, rti, strength, noWait, pushedDown);
  				rte->requiredPerms |= ACL_SELECT_FOR_UPDATE;
  			}
  		}
  		else if (rte->rtekind == RTE_SUBQUERY)
  		{
! 			applyLockingClause(qry, rti, strength, noWait, pushedDown);
! 			/* FOR UPDATE/SHARE/KEY LOCK of subquery is propagated to subquery's rels */
  			markQueryForLocking(rte->subquery, (Node *) rte->subquery->jointree,
! 								strength, noWait, true);
  		}
  		/* other RTE types are unaffected by FOR UPDATE */
  	}
***************
*** 1462,1475 **** markQueryForLocking(Query *qry, Node *jtnode,
  		ListCell   *l;
  
  		foreach(l, f->fromlist)
! 			markQueryForLocking(qry, lfirst(l), forUpdate, noWait, pushedDown);
  	}
  	else if (IsA(jtnode, JoinExpr))
  	{
  		JoinExpr   *j = (JoinExpr *) jtnode;
  
! 		markQueryForLocking(qry, j->larg, forUpdate, noWait, pushedDown);
! 		markQueryForLocking(qry, j->rarg, forUpdate, noWait, pushedDown);
  	}
  	else
  		elog(ERROR, "unrecognized node type: %d",
--- 1462,1475 ----
  		ListCell   *l;
  
  		foreach(l, f->fromlist)
! 			markQueryForLocking(qry, lfirst(l), strength, noWait, pushedDown);
  	}
  	else if (IsA(jtnode, JoinExpr))
  	{
  		JoinExpr   *j = (JoinExpr *) jtnode;
  
! 		markQueryForLocking(qry, j->larg, strength, noWait, pushedDown);
! 		markQueryForLocking(qry, j->rarg, strength, noWait, pushedDown);
  	}
  	else
  		elog(ERROR, "unrecognized node type: %d",
*** a/src/backend/tcop/utility.c
--- b/src/backend/tcop/utility.c
***************
*** 130,136 **** CommandIsReadOnly(Node *parsetree)
  				if (stmt->intoClause != NULL)
  					return false;		/* SELECT INTO */
  				else if (stmt->rowMarks != NIL)
! 					return false;		/* SELECT FOR UPDATE/SHARE */
  				else if (stmt->hasModifyingCTE)
  					return false;		/* data-modifying CTE */
  				else
--- 130,136 ----
  				if (stmt->intoClause != NULL)
  					return false;		/* SELECT INTO */
  				else if (stmt->rowMarks != NIL)
! 					return false;		/* SELECT FOR UPDATE/SHARE/KEY LOCK */
  				else if (stmt->hasModifyingCTE)
  					return false;		/* data-modifying CTE */
  				else
***************
*** 2181,2190 **** CreateCommandTag(Node *parsetree)
  						else if (stmt->rowMarks != NIL)
  						{
  							/* not 100% but probably close enough */
! 							if (((PlanRowMark *) linitial(stmt->rowMarks))->markType == ROW_MARK_EXCLUSIVE)
! 								tag = "SELECT FOR UPDATE";
! 							else
! 								tag = "SELECT FOR SHARE";
  						}
  						else
  							tag = "SELECT";
--- 2181,2201 ----
  						else if (stmt->rowMarks != NIL)
  						{
  							/* not 100% but probably close enough */
! 							switch (((RowMarkClause *) linitial(stmt->rowMarks))->strength)
! 							{
! 								case LCS_FORUPDATE:
! 									tag = "SELECT FOR UPDATE";
! 									break;
! 								case LCS_FORSHARE:
! 									tag = "SELECT FOR SHARE";
! 									break;
! 								case LCS_FORKEYLOCK:
! 									tag = "SELECT FOR KEY LOCK";
! 									break;
! 								default:
! 									tag =  "???";
! 									break;
! 							}
  						}
  						else
  							tag = "SELECT";
***************
*** 2231,2240 **** CreateCommandTag(Node *parsetree)
  						else if (stmt->rowMarks != NIL)
  						{
  							/* not 100% but probably close enough */
! 							if (((RowMarkClause *) linitial(stmt->rowMarks))->forUpdate)
! 								tag = "SELECT FOR UPDATE";
! 							else
! 								tag = "SELECT FOR SHARE";
  						}
  						else
  							tag = "SELECT";
--- 2242,2262 ----
  						else if (stmt->rowMarks != NIL)
  						{
  							/* not 100% but probably close enough */
! 							switch (((RowMarkClause *) linitial(stmt->rowMarks))->strength)
! 							{
! 								case LCS_FORUPDATE:
! 									tag = "SELECT FOR UPDATE";
! 									break;
! 								case LCS_FORSHARE:
! 									tag = "SELECT FOR SHARE";
! 									break;
! 								case LCS_FORKEYLOCK:
! 									tag = "SELECT FOR KEY LOCK";
! 									break;
! 								default:
! 									tag =  "???";
! 									break;
! 							}
  						}
  						else
  							tag = "SELECT";
*** a/src/backend/utils/adt/ri_triggers.c
--- b/src/backend/utils/adt/ri_triggers.c
***************
*** 309,315 **** RI_FKey_check(PG_FUNCTION_ARGS)
  	 * Get the relation descriptors of the FK and PK tables.
  	 *
  	 * pk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR SHARE will get on it.
  	 */
  	fk_rel = trigdata->tg_relation;
  	pk_rel = heap_open(riinfo.pk_relid, RowShareLock);
--- 309,315 ----
  	 * Get the relation descriptors of the FK and PK tables.
  	 *
  	 * pk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR KEY LOCK will get on it.
  	 */
  	fk_rel = trigdata->tg_relation;
  	pk_rel = heap_open(riinfo.pk_relid, RowShareLock);
***************
*** 339,350 **** RI_FKey_check(PG_FUNCTION_ARGS)
  
  			/* ---------
  			 * The query string built is
! 			 *	SELECT 1 FROM ONLY <pktable>
  			 * ----------
  			 */
  			quoteRelationName(pkrelname, pk_rel);
  			snprintf(querystr, sizeof(querystr),
! 					 "SELECT 1 FROM ONLY %s x FOR SHARE OF x",
  					 pkrelname);
  
  			/* Prepare and save the plan */
--- 339,350 ----
  
  			/* ---------
  			 * The query string built is
! 			 *	SELECT 1 FROM ONLY <pktable> x FOR KEY LOCK OF x
  			 * ----------
  			 */
  			quoteRelationName(pkrelname, pk_rel);
  			snprintf(querystr, sizeof(querystr),
! 					 "SELECT 1 FROM ONLY %s x FOR KEY LOCK OF x",
  					 pkrelname);
  
  			/* Prepare and save the plan */
***************
*** 464,470 **** RI_FKey_check(PG_FUNCTION_ARGS)
  
  		/* ----------
  		 * The query string built is
! 		 *	SELECT 1 FROM ONLY <pktable> WHERE pkatt1 = $1 [AND ...] FOR SHARE
  		 * The type id's for the $ parameters are those of the
  		 * corresponding FK attributes.
  		 * ----------
--- 464,471 ----
  
  		/* ----------
  		 * The query string built is
! 		 *	SELECT 1 FROM ONLY <pktable> x WHERE pkatt1 = $1 [AND ...]
! 		 *	       FOR KEY LOCK OF x
  		 * The type id's for the $ parameters are those of the
  		 * corresponding FK attributes.
  		 * ----------
***************
*** 488,494 **** RI_FKey_check(PG_FUNCTION_ARGS)
  			querysep = "AND";
  			queryoids[i] = fk_type;
  		}
! 		appendStringInfo(&querybuf, " FOR SHARE OF x");
  
  		/* Prepare and save the plan */
  		qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
--- 489,495 ----
  			querysep = "AND";
  			queryoids[i] = fk_type;
  		}
! 		appendStringInfo(&querybuf, " FOR KEY LOCK OF x");
  
  		/* Prepare and save the plan */
  		qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
***************
*** 626,632 **** ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
  
  		/* ----------
  		 * The query string built is
! 		 *	SELECT 1 FROM ONLY <pktable> WHERE pkatt1 = $1 [AND ...] FOR SHARE
  		 * The type id's for the $ parameters are those of the
  		 * PK attributes themselves.
  		 * ----------
--- 627,634 ----
  
  		/* ----------
  		 * The query string built is
! 		 *	SELECT 1 FROM ONLY <pktable> x WHERE pkatt1 = $1 [AND ...]
! 		 *	       FOR KEY LOCK OF x
  		 * The type id's for the $ parameters are those of the
  		 * PK attributes themselves.
  		 * ----------
***************
*** 649,655 **** ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
  			querysep = "AND";
  			queryoids[i] = pk_type;
  		}
! 		appendStringInfo(&querybuf, " FOR SHARE OF x");
  
  		/* Prepare and save the plan */
  		qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
--- 651,657 ----
  			querysep = "AND";
  			queryoids[i] = pk_type;
  		}
! 		appendStringInfo(&querybuf, " FOR KEY LOCK OF x");
  
  		/* Prepare and save the plan */
  		qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
***************
*** 713,719 **** RI_FKey_noaction_del(PG_FUNCTION_ARGS)
  	 * Get the relation descriptors of the FK and PK tables and the old tuple.
  	 *
  	 * fk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR SHARE will get on it.
  	 */
  	fk_rel = heap_open(riinfo.fk_relid, RowShareLock);
  	pk_rel = trigdata->tg_relation;
--- 715,721 ----
  	 * Get the relation descriptors of the FK and PK tables and the old tuple.
  	 *
  	 * fk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR KEY LOCK will get on it.
  	 */
  	fk_rel = heap_open(riinfo.fk_relid, RowShareLock);
  	pk_rel = trigdata->tg_relation;
***************
*** 781,787 **** RI_FKey_noaction_del(PG_FUNCTION_ARGS)
  
  				/* ----------
  				 * The query string built is
! 				 *	SELECT 1 FROM ONLY <fktable> WHERE $1 = fkatt1 [AND ...]
  				 * The type id's for the $ parameters are those of the
  				 * corresponding PK attributes.
  				 * ----------
--- 783,790 ----
  
  				/* ----------
  				 * The query string built is
! 				 *	SELECT 1 FROM ONLY <fktable> x WHERE $1 = fkatt1 [AND ...]
! 				 *	       FOR KEY LOCK OF x
  				 * The type id's for the $ parameters are those of the
  				 * corresponding PK attributes.
  				 * ----------
***************
*** 806,812 **** RI_FKey_noaction_del(PG_FUNCTION_ARGS)
  					querysep = "AND";
  					queryoids[i] = pk_type;
  				}
! 				appendStringInfo(&querybuf, " FOR SHARE OF x");
  
  				/* Prepare and save the plan */
  				qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
--- 809,815 ----
  					querysep = "AND";
  					queryoids[i] = pk_type;
  				}
! 				appendStringInfo(&querybuf, " FOR KEY LOCK OF x");
  
  				/* Prepare and save the plan */
  				qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
***************
*** 891,897 **** RI_FKey_noaction_upd(PG_FUNCTION_ARGS)
  	 * old tuple.
  	 *
  	 * fk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR SHARE will get on it.
  	 */
  	fk_rel = heap_open(riinfo.fk_relid, RowShareLock);
  	pk_rel = trigdata->tg_relation;
--- 894,900 ----
  	 * old tuple.
  	 *
  	 * fk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR KEY LOCK will get on it.
  	 */
  	fk_rel = heap_open(riinfo.fk_relid, RowShareLock);
  	pk_rel = trigdata->tg_relation;
***************
*** 994,1000 **** RI_FKey_noaction_upd(PG_FUNCTION_ARGS)
  					querysep = "AND";
  					queryoids[i] = pk_type;
  				}
! 				appendStringInfo(&querybuf, " FOR SHARE OF x");
  
  				/* Prepare and save the plan */
  				qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
--- 997,1003 ----
  					querysep = "AND";
  					queryoids[i] = pk_type;
  				}
! 				appendStringInfo(&querybuf, " FOR KEY LOCK OF x");
  
  				/* Prepare and save the plan */
  				qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
***************
*** 1432,1438 **** RI_FKey_restrict_del(PG_FUNCTION_ARGS)
  	 * Get the relation descriptors of the FK and PK tables and the old tuple.
  	 *
  	 * fk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR SHARE will get on it.
  	 */
  	fk_rel = heap_open(riinfo.fk_relid, RowShareLock);
  	pk_rel = trigdata->tg_relation;
--- 1435,1441 ----
  	 * Get the relation descriptors of the FK and PK tables and the old tuple.
  	 *
  	 * fk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR KEY LOCK will get on it.
  	 */
  	fk_rel = heap_open(riinfo.fk_relid, RowShareLock);
  	pk_rel = trigdata->tg_relation;
***************
*** 1490,1496 **** RI_FKey_restrict_del(PG_FUNCTION_ARGS)
  
  				/* ----------
  				 * The query string built is
! 				 *	SELECT 1 FROM ONLY <fktable> WHERE $1 = fkatt1 [AND ...]
  				 * The type id's for the $ parameters are those of the
  				 * corresponding PK attributes.
  				 * ----------
--- 1493,1500 ----
  
  				/* ----------
  				 * The query string built is
! 				 *	SELECT 1 FROM ONLY <fktable> x WHERE $1 = fkatt1 [AND ...]
! 				 *	       FOR KEY LOCK OF x
  				 * The type id's for the $ parameters are those of the
  				 * corresponding PK attributes.
  				 * ----------
***************
*** 1515,1521 **** RI_FKey_restrict_del(PG_FUNCTION_ARGS)
  					querysep = "AND";
  					queryoids[i] = pk_type;
  				}
! 				appendStringInfo(&querybuf, " FOR SHARE OF x");
  
  				/* Prepare and save the plan */
  				qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
--- 1519,1525 ----
  					querysep = "AND";
  					queryoids[i] = pk_type;
  				}
! 				appendStringInfo(&querybuf, " FOR KEY LOCK OF x");
  
  				/* Prepare and save the plan */
  				qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
***************
*** 1605,1611 **** RI_FKey_restrict_upd(PG_FUNCTION_ARGS)
  	 * old tuple.
  	 *
  	 * fk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR SHARE will get on it.
  	 */
  	fk_rel = heap_open(riinfo.fk_relid, RowShareLock);
  	pk_rel = trigdata->tg_relation;
--- 1609,1615 ----
  	 * old tuple.
  	 *
  	 * fk_rel is opened in RowShareLock mode since that's what our eventual
! 	 * SELECT FOR KEY LOCK will get on it.
  	 */
  	fk_rel = heap_open(riinfo.fk_relid, RowShareLock);
  	pk_rel = trigdata->tg_relation;
***************
*** 1673,1679 **** RI_FKey_restrict_upd(PG_FUNCTION_ARGS)
  
  				/* ----------
  				 * The query string built is
! 				 *	SELECT 1 FROM ONLY <fktable> WHERE $1 = fkatt1 [AND ...]
  				 * The type id's for the $ parameters are those of the
  				 * corresponding PK attributes.
  				 * ----------
--- 1677,1684 ----
  
  				/* ----------
  				 * The query string built is
! 				 *	SELECT 1 FROM ONLY <fktable> x WHERE $1 = fkatt1 [AND ...]
! 				 *	       FOR KEY LOCK OF x
  				 * The type id's for the $ parameters are those of the
  				 * corresponding PK attributes.
  				 * ----------
***************
*** 1698,1704 **** RI_FKey_restrict_upd(PG_FUNCTION_ARGS)
  					querysep = "AND";
  					queryoids[i] = pk_type;
  				}
! 				appendStringInfo(&querybuf, " FOR SHARE OF x");
  
  				/* Prepare and save the plan */
  				qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
--- 1703,1709 ----
  					querysep = "AND";
  					queryoids[i] = pk_type;
  				}
! 				appendStringInfo(&querybuf, " FOR KEY LOCK OF x");
  
  				/* Prepare and save the plan */
  				qplan = ri_PlanCheck(querybuf.data, riinfo.nkeys, queryoids,
*** a/src/backend/utils/adt/ruleutils.c
--- b/src/backend/utils/adt/ruleutils.c
***************
*** 2857,2868 **** get_select_query_def(Query *query, deparse_context *context,
  			if (rc->pushedDown)
  				continue;
  
! 			if (rc->forUpdate)
! 				appendContextKeyword(context, " FOR UPDATE",
! 									 -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
! 			else
! 				appendContextKeyword(context, " FOR SHARE",
! 									 -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
  			appendStringInfo(buf, " OF %s",
  							 quote_identifier(rte->eref->aliasname));
  			if (rc->noWait)
--- 2857,2880 ----
  			if (rc->pushedDown)
  				continue;
  
! 			switch (rc->strength)
! 			{
! 				case LCS_FORKEYLOCK:
! 					appendContextKeyword(context, " FOR KEY LOCK",
! 										 -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
! 					break;
! 				case LCS_FORSHARE:
! 					appendContextKeyword(context, " FOR SHARE",
! 										 -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
! 					break;
! 				case LCS_FORUPDATE:
! 					appendContextKeyword(context, " FOR UPDATE",
! 										 -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
! 					break;
! 				default:
! 					elog(ERROR, "unrecognized row locking clause %d", rc->strength);
! 			}
! 
  			appendStringInfo(buf, " OF %s",
  							 quote_identifier(rte->eref->aliasname));
  			if (rc->noWait)
*** a/src/backend/utils/cache/relcache.c
--- b/src/backend/utils/cache/relcache.c
***************
*** 3614,3619 **** RelationGetIndexPredicate(Relation relation)
--- 3614,3622 ----
   * simple index keys, but attributes used in expressions and partial-index
   * predicates.)
   *
+  * If "keyAttrs" is true, only attributes that can be referenced by foreign
+  * keys are considered.
+  *
   * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
   * we can include system attributes (e.g., OID) in the bitmap representation.
   *
***************
*** 3625,3640 **** RelationGetIndexPredicate(Relation relation)
   * be bms_free'd when not needed anymore.
   */
  Bitmapset *
! RelationGetIndexAttrBitmap(Relation relation)
  {
  	Bitmapset  *indexattrs;
  	List	   *indexoidlist;
  	ListCell   *l;
  	MemoryContext oldcxt;
  
  	/* Quick exit if we already computed the result. */
  	if (relation->rd_indexattr != NULL)
! 		return bms_copy(relation->rd_indexattr);
  
  	/* Fast path if definitely no indexes */
  	if (!RelationGetForm(relation)->relhasindex)
--- 3628,3644 ----
   * be bms_free'd when not needed anymore.
   */
  Bitmapset *
! RelationGetIndexAttrBitmap(Relation relation, bool keyAttrs)
  {
  	Bitmapset  *indexattrs;
+ 	Bitmapset  *uindexattrs;
  	List	   *indexoidlist;
  	ListCell   *l;
  	MemoryContext oldcxt;
  
  	/* Quick exit if we already computed the result. */
  	if (relation->rd_indexattr != NULL)
! 		return bms_copy(keyAttrs ? relation->rd_keyattr : relation->rd_indexattr);
  
  	/* Fast path if definitely no indexes */
  	if (!RelationGetForm(relation)->relhasindex)
***************
*** 3653,3678 **** RelationGetIndexAttrBitmap(Relation relation)
--- 3657,3694 ----
  	 * For each index, add referenced attributes to indexattrs.
  	 */
  	indexattrs = NULL;
+ 	uindexattrs = NULL;
  	foreach(l, indexoidlist)
  	{
  		Oid			indexOid = lfirst_oid(l);
  		Relation	indexDesc;
  		IndexInfo  *indexInfo;
  		int			i;
+ 		bool		isKey;
  
  		indexDesc = index_open(indexOid, AccessShareLock);
  
  		/* Extract index key information from the index's pg_index row */
  		indexInfo = BuildIndexInfo(indexDesc);
  
+ 		/* Can this index be referenced by a foreign key? */
+ 		isKey = indexInfo->ii_Unique &&
+ 				indexInfo->ii_Expressions == NIL &&
+ 				indexInfo->ii_Predicate == NIL;
+ 
  		/* Collect simple attribute references */
  		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
  		{
  			int			attrnum = indexInfo->ii_KeyAttrNumbers[i];
  
  			if (attrnum != 0)
+ 			{
  				indexattrs = bms_add_member(indexattrs,
  							   attrnum - FirstLowInvalidHeapAttributeNumber);
+ 				if (isKey)
+ 					uindexattrs = bms_add_member(uindexattrs,
+ 											   	 attrnum - FirstLowInvalidHeapAttributeNumber);
+ 			}
  		}
  
  		/* Collect all attributes used in expressions, too */
***************
*** 3689,3698 **** RelationGetIndexAttrBitmap(Relation relation)
  	/* Now save a copy of the bitmap in the relcache entry. */
  	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
  	relation->rd_indexattr = bms_copy(indexattrs);
  	MemoryContextSwitchTo(oldcxt);
  
  	/* We return our original working copy for caller to play with */
! 	return indexattrs;
  }
  
  /*
--- 3705,3715 ----
  	/* Now save a copy of the bitmap in the relcache entry. */
  	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
  	relation->rd_indexattr = bms_copy(indexattrs);
+ 	relation->rd_keyattr = bms_copy(uindexattrs);
  	MemoryContextSwitchTo(oldcxt);
  
  	/* We return our original working copy for caller to play with */
! 	return keyAttrs ? uindexattrs : indexattrs;
  }
  
  /*
*** a/src/include/access/heapam.h
--- b/src/include/access/heapam.h
***************
*** 31,38 ****
--- 31,44 ----
  
  typedef struct BulkInsertStateData *BulkInsertState;
  
+ /*
+  * This enum mirrors LockClauseStrength precisely, but we define it separately
+  * to reduce having to share otherwise unrelated headers.  To go from one to
+  * the other, we wade through the planner using a third enum, RowMarkType.
+  */
  typedef enum
  {
+ 	LockTupleKeylock,
  	LockTupleShared,
  	LockTupleExclusive
  } LockTupleMode;
*** a/src/include/access/htup.h
--- b/src/include/access/htup.h
***************
*** 163,174 **** typedef HeapTupleHeaderData *HeapTupleHeader;
  #define HEAP_HASVARWIDTH		0x0002	/* has variable-width attribute(s) */
  #define HEAP_HASEXTERNAL		0x0004	/* has external stored attribute(s) */
  #define HEAP_HASOID				0x0008	/* has an object-id field */
! /* bit 0x0010 is available */
  #define HEAP_COMBOCID			0x0020	/* t_cid is a combo cid */
  #define HEAP_XMAX_EXCL_LOCK		0x0040	/* xmax is exclusive locker */
  #define HEAP_XMAX_SHARED_LOCK	0x0080	/* xmax is shared locker */
! /* if either LOCK bit is set, xmax hasn't deleted the tuple, only locked it */
! #define HEAP_IS_LOCKED	(HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_SHARED_LOCK)
  #define HEAP_XMIN_COMMITTED		0x0100	/* t_xmin committed */
  #define HEAP_XMIN_INVALID		0x0200	/* t_xmin invalid/aborted */
  #define HEAP_XMAX_COMMITTED		0x0400	/* t_xmax committed */
--- 163,177 ----
  #define HEAP_HASVARWIDTH		0x0002	/* has variable-width attribute(s) */
  #define HEAP_HASEXTERNAL		0x0004	/* has external stored attribute(s) */
  #define HEAP_HASOID				0x0008	/* has an object-id field */
! #define HEAP_XMAX_KEY_LOCK		0x0010	/* xmax is a "key" locker */
  #define HEAP_COMBOCID			0x0020	/* t_cid is a combo cid */
  #define HEAP_XMAX_EXCL_LOCK		0x0040	/* xmax is exclusive locker */
  #define HEAP_XMAX_SHARED_LOCK	0x0080	/* xmax is shared locker */
! /* if either SHARE or KEY lock bit is set, this is a "shared" lock */
! #define HEAP_IS_SHARE_LOCKED (HEAP_XMAX_SHARED_LOCK | HEAP_XMAX_KEY_LOCK)
! /* if any LOCK bit is set, xmax hasn't deleted the tuple, only locked it */
! #define HEAP_IS_LOCKED	(HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_SHARED_LOCK | \
! 						 HEAP_XMAX_KEY_LOCK)
  #define HEAP_XMIN_COMMITTED		0x0100	/* t_xmin committed */
  #define HEAP_XMIN_INVALID		0x0200	/* t_xmin invalid/aborted */
  #define HEAP_XMAX_COMMITTED		0x0400	/* t_xmax committed */
***************
*** 726,735 **** typedef struct xl_heap_lock
  	xl_heaptid	target;			/* locked tuple id */
  	TransactionId locking_xid;	/* might be a MultiXactId not xid */
  	bool		xid_is_mxact;	/* is it? */
! 	bool		shared_lock;	/* shared or exclusive row lock? */
  } xl_heap_lock;
  
! #define SizeOfHeapLock	(offsetof(xl_heap_lock, shared_lock) + sizeof(bool))
  
  /* This is what we need to know about in-place update */
  typedef struct xl_heap_inplace
--- 729,738 ----
  	xl_heaptid	target;			/* locked tuple id */
  	TransactionId locking_xid;	/* might be a MultiXactId not xid */
  	bool		xid_is_mxact;	/* is it? */
! 	int8		lock_strength;	/* keylock, shared, exclusive lock? */
  } xl_heap_lock;
  
! #define SizeOfHeapLock	(offsetof(xl_heap_lock, lock_strength) + sizeof(int8))
  
  /* This is what we need to know about in-place update */
  typedef struct xl_heap_inplace
***************
*** 767,774 **** extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
  extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup);
  extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup);
  extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
! 						  CommandId *cmax,
! 						  bool *iscombo);
  
  /* ----------------
   *		fastgetattr
--- 770,776 ----
  extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup);
  extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup);
  extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
! 						  CommandId *cmax, bool *iscombo);
  
  /* ----------------
   *		fastgetattr
*** a/src/include/access/xlog_internal.h
--- b/src/include/access/xlog_internal.h
***************
*** 71,77 **** typedef struct XLogContRecord
  /*
   * Each page of XLOG file has a header like this:
   */
! #define XLOG_PAGE_MAGIC 0xD068	/* can be used as WAL version indicator */
  
  typedef struct XLogPageHeaderData
  {
--- 71,77 ----
  /*
   * Each page of XLOG file has a header like this:
   */
! #define XLOG_PAGE_MAGIC 0xD069	/* can be used as WAL version indicator */
  
  typedef struct XLogPageHeaderData
  {
*** a/src/include/nodes/execnodes.h
--- b/src/include/nodes/execnodes.h
***************
*** 408,414 **** typedef struct EState
   * ExecRowMark -
   *	   runtime representation of FOR UPDATE/SHARE clauses
   *
!  * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we should have an
   * ExecRowMark for each non-target relation in the query (except inheritance
   * parent RTEs, which can be ignored at runtime).  See PlanRowMark for details
   * about most of the fields.  In addition to fields directly derived from
--- 408,414 ----
   * ExecRowMark -
   *	   runtime representation of FOR UPDATE/SHARE clauses
   *
!  * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE/KEY LOCK, we should have an
   * ExecRowMark for each non-target relation in the query (except inheritance
   * parent RTEs, which can be ignored at runtime).  See PlanRowMark for details
   * about most of the fields.  In addition to fields directly derived from
*** a/src/include/nodes/parsenodes.h
--- b/src/include/nodes/parsenodes.h
***************
*** 119,125 **** typedef struct Query
  	bool		hasDistinctOn;	/* distinctClause is from DISTINCT ON */
  	bool		hasRecursive;	/* WITH RECURSIVE was specified */
  	bool		hasModifyingCTE;	/* has INSERT/UPDATE/DELETE in WITH */
! 	bool		hasForUpdate;	/* FOR UPDATE or FOR SHARE was specified */
  
  	List	   *cteList;		/* WITH list (of CommonTableExpr's) */
  
--- 119,125 ----
  	bool		hasDistinctOn;	/* distinctClause is from DISTINCT ON */
  	bool		hasRecursive;	/* WITH RECURSIVE was specified */
  	bool		hasModifyingCTE;	/* has INSERT/UPDATE/DELETE in WITH */
! 	bool		hasForUpdate;	/* FOR UPDATE/SHARE/KEY LOCK was specified */
  
  	List	   *cteList;		/* WITH list (of CommonTableExpr's) */
  
***************
*** 569,586 **** typedef struct DefElem
  } DefElem;
  
  /*
!  * LockingClause - raw representation of FOR UPDATE/SHARE options
   *
   * Note: lockedRels == NIL means "all relations in query".	Otherwise it
   * is a list of RangeVar nodes.  (We use RangeVar mainly because it carries
   * a location field --- currently, parse analysis insists on unqualified
   * names in LockingClause.)
   */
  typedef struct LockingClause
  {
  	NodeTag		type;
  	List	   *lockedRels;		/* FOR UPDATE or FOR SHARE relations */
! 	bool		forUpdate;		/* true = FOR UPDATE, false = FOR SHARE */
  	bool		noWait;			/* NOWAIT option */
  } LockingClause;
  
--- 569,594 ----
  } DefElem;
  
  /*
!  * LockingClause - raw representation of FOR UPDATE/SHARE/KEY LOCK options
   *
   * Note: lockedRels == NIL means "all relations in query".	Otherwise it
   * is a list of RangeVar nodes.  (We use RangeVar mainly because it carries
   * a location field --- currently, parse analysis insists on unqualified
   * names in LockingClause.)
   */
+ typedef enum LockClauseStrength
+ {
+ 	/* order is important -- see applyLockingClause */
+ 	LCS_FORKEYLOCK,
+ 	LCS_FORSHARE,
+ 	LCS_FORUPDATE
+ } LockClauseStrength;
+ 
  typedef struct LockingClause
  {
  	NodeTag		type;
  	List	   *lockedRels;		/* FOR UPDATE or FOR SHARE relations */
! 	LockClauseStrength strength;
  	bool		noWait;			/* NOWAIT option */
  } LockingClause;
  
***************
*** 863,880 **** typedef struct WindowClause
   *	   parser output representation of FOR UPDATE/SHARE clauses
   *
   * Query.rowMarks contains a separate RowMarkClause node for each relation
!  * identified as a FOR UPDATE/SHARE target.  If FOR UPDATE/SHARE is applied
!  * to a subquery, we generate RowMarkClauses for all normal and subquery rels
!  * in the subquery, but they are marked pushedDown = true to distinguish them
!  * from clauses that were explicitly written at this query level.  Also,
!  * Query.hasForUpdate tells whether there were explicit FOR UPDATE/SHARE
!  * clauses in the current query level.
   */
  typedef struct RowMarkClause
  {
  	NodeTag		type;
  	Index		rti;			/* range table index of target relation */
! 	bool		forUpdate;		/* true = FOR UPDATE, false = FOR SHARE */
  	bool		noWait;			/* NOWAIT option */
  	bool		pushedDown;		/* pushed down from higher query level? */
  } RowMarkClause;
--- 871,888 ----
   *	   parser output representation of FOR UPDATE/SHARE clauses
   *
   * Query.rowMarks contains a separate RowMarkClause node for each relation
!  * identified as a FOR UPDATE/SHARE/KEY LOCK target.  If one of these clauses
!  * is applied to a subquery, we generate RowMarkClauses for all normal and
!  * subquery rels in the subquery, but they are marked pushedDown = true to
!  * distinguish them from clauses that were explicitly written at this query
!  * level.  Also, Query.hasForUpdate tells whether there were explicit FOR
!  * UPDATE/SHARE/KEY LOCK clauses in the current query level.
   */
  typedef struct RowMarkClause
  {
  	NodeTag		type;
  	Index		rti;			/* range table index of target relation */
! 	LockClauseStrength strength;
  	bool		noWait;			/* NOWAIT option */
  	bool		pushedDown;		/* pushed down from higher query level? */
  } RowMarkClause;
*** a/src/include/nodes/plannodes.h
--- b/src/include/nodes/plannodes.h
***************
*** 722,728 **** typedef struct Limit
   * RowMarkType -
   *	  enums for types of row-marking operations
   *
!  * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we have to uniquely
   * identify all the source rows, not only those from the target relations, so
   * that we can perform EvalPlanQual rechecking at need.  For plain tables we
   * can just fetch the TID, the same as for a target relation.  Otherwise (for
--- 722,728 ----
   * RowMarkType -
   *	  enums for types of row-marking operations
   *
!  * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE/KEY LOCK, we have to uniquely
   * identify all the source rows, not only those from the target relations, so
   * that we can perform EvalPlanQual rechecking at need.  For plain tables we
   * can just fetch the TID, the same as for a target relation.  Otherwise (for
***************
*** 734,752 **** typedef enum RowMarkType
  {
  	ROW_MARK_EXCLUSIVE,			/* obtain exclusive tuple lock */
  	ROW_MARK_SHARE,				/* obtain shared tuple lock */
  	ROW_MARK_REFERENCE,			/* just fetch the TID */
  	ROW_MARK_COPY				/* physically copy the row value */
  } RowMarkType;
  
! #define RowMarkRequiresRowShareLock(marktype)  ((marktype) <= ROW_MARK_SHARE)
  
  /*
   * PlanRowMark -
   *	   plan-time representation of FOR UPDATE/SHARE clauses
   *
!  * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, we create a separate
   * PlanRowMark node for each non-target relation in the query.	Relations that
!  * are not specified as FOR UPDATE/SHARE are marked ROW_MARK_REFERENCE (if
   * real tables) or ROW_MARK_COPY (if not).
   *
   * Initially all PlanRowMarks have rti == prti and isParent == false.
--- 734,753 ----
  {
  	ROW_MARK_EXCLUSIVE,			/* obtain exclusive tuple lock */
  	ROW_MARK_SHARE,				/* obtain shared tuple lock */
+ 	ROW_MARK_KEYLOCK,			/* obtain keylock tuple lock */
  	ROW_MARK_REFERENCE,			/* just fetch the TID */
  	ROW_MARK_COPY				/* physically copy the row value */
  } RowMarkType;
  
! #define RowMarkRequiresRowShareLock(marktype)  ((marktype) <= ROW_MARK_KEYLOCK)
  
  /*
   * PlanRowMark -
   *	   plan-time representation of FOR UPDATE/SHARE clauses
   *
!  * When doing UPDATE, DELETE, or SELECT FOR UPDATE/SHARE/KEY LOCK, we create a separate
   * PlanRowMark node for each non-target relation in the query.	Relations that
!  * are not specified as FOR UPDATE/SHARE/KEY LOCK are marked ROW_MARK_REFERENCE (if
   * real tables) or ROW_MARK_COPY (if not).
   *
   * Initially all PlanRowMarks have rti == prti and isParent == false.
*** a/src/include/parser/analyze.h
--- b/src/include/parser/analyze.h
***************
*** 31,36 **** extern bool analyze_requires_snapshot(Node *parseTree);
  
  extern void CheckSelectLocking(Query *qry);
  extern void applyLockingClause(Query *qry, Index rtindex,
! 				   bool forUpdate, bool noWait, bool pushedDown);
  
  #endif   /* ANALYZE_H */
--- 31,36 ----
  
  extern void CheckSelectLocking(Query *qry);
  extern void applyLockingClause(Query *qry, Index rtindex,
! 				   LockClauseStrength strength, bool noWait, bool pushedDown);
  
  #endif   /* ANALYZE_H */
*** a/src/include/utils/rel.h
--- b/src/include/utils/rel.h
***************
*** 103,108 **** typedef struct RelationData
--- 103,109 ----
  	Oid			rd_id;			/* relation's object id */
  	List	   *rd_indexlist;	/* list of OIDs of indexes on relation */
  	Bitmapset  *rd_indexattr;	/* identifies columns used in indexes */
+ 	Bitmapset  *rd_keyattr;		/* cols that can be ref'd by foreign keys */
  	Oid			rd_oidindex;	/* OID of unique index on OID, if any */
  	LockInfoData rd_lockInfo;	/* lock mgr's info for locking relation */
  	RuleLock   *rd_rules;		/* rewrite rules */
*** a/src/include/utils/relcache.h
--- b/src/include/utils/relcache.h
***************
*** 42,48 **** extern List *RelationGetIndexList(Relation relation);
  extern Oid	RelationGetOidIndex(Relation relation);
  extern List *RelationGetIndexExpressions(Relation relation);
  extern List *RelationGetIndexPredicate(Relation relation);
! extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation);
  extern void RelationGetExclusionInfo(Relation indexRelation,
  						 Oid **operators,
  						 Oid **procs,
--- 42,48 ----
  extern Oid	RelationGetOidIndex(Relation relation);
  extern List *RelationGetIndexExpressions(Relation relation);
  extern List *RelationGetIndexPredicate(Relation relation);
! extern Bitmapset *RelationGetIndexAttrBitmap(Relation relation, bool keyAttrs);
  extern void RelationGetExclusionInfo(Relation indexRelation,
  						 Oid **operators,
  						 Oid **procs,
*** a/src/test/isolation/expected/fk-contention.out
--- b/src/test/isolation/expected/fk-contention.out
***************
*** 7,15 **** step upd:  UPDATE foo SET b = 'Hello World';
  
  starting permutation: ins upd com
  step ins:  INSERT INTO bar VALUES (42); 
! step upd:  UPDATE foo SET b = 'Hello World';  <waiting ...>
  step com:  COMMIT; 
- step upd: <... completed>
  
  starting permutation: upd ins com
  step upd:  UPDATE foo SET b = 'Hello World'; 
--- 7,14 ----
  
  starting permutation: ins upd com
  step ins:  INSERT INTO bar VALUES (42); 
! step upd:  UPDATE foo SET b = 'Hello World'; 
  step com:  COMMIT; 
  
  starting permutation: upd ins com
  step upd:  UPDATE foo SET b = 'Hello World'; 
*** a/src/test/isolation/expected/fk-deadlock.out
--- b/src/test/isolation/expected/fk-deadlock.out
***************
*** 20,60 **** step s2c:  COMMIT;
  starting permutation: s1i s2i s1u s2u s1c s2c
  step s1i:  INSERT INTO child VALUES (1, 1); 
  step s2i:  INSERT INTO child VALUES (2, 1); 
! step s1u:  UPDATE parent SET aux = 'bar';  <waiting ...>
! step s2u:  UPDATE parent SET aux = 'baz'; 
! step s1u: <... completed>
! ERROR:  deadlock detected
  step s1c:  COMMIT; 
  step s2c:  COMMIT; 
  
  starting permutation: s1i s2i s2u s1u s2c s1c
  step s1i:  INSERT INTO child VALUES (1, 1); 
  step s2i:  INSERT INTO child VALUES (2, 1); 
! step s2u:  UPDATE parent SET aux = 'baz';  <waiting ...>
! step s1u:  UPDATE parent SET aux = 'bar'; 
! ERROR:  deadlock detected
! step s2u: <... completed>
  step s2c:  COMMIT; 
  step s1c:  COMMIT; 
  
  starting permutation: s2i s1i s1u s2u s1c s2c
  step s2i:  INSERT INTO child VALUES (2, 1); 
  step s1i:  INSERT INTO child VALUES (1, 1); 
! step s1u:  UPDATE parent SET aux = 'bar';  <waiting ...>
! step s2u:  UPDATE parent SET aux = 'baz'; 
! step s1u: <... completed>
! ERROR:  deadlock detected
  step s1c:  COMMIT; 
  step s2c:  COMMIT; 
  
  starting permutation: s2i s1i s2u s1u s2c s1c
  step s2i:  INSERT INTO child VALUES (2, 1); 
  step s1i:  INSERT INTO child VALUES (1, 1); 
! step s2u:  UPDATE parent SET aux = 'baz';  <waiting ...>
! step s1u:  UPDATE parent SET aux = 'bar'; 
! ERROR:  deadlock detected
! step s2u: <... completed>
  step s2c:  COMMIT; 
  step s1c:  COMMIT; 
  
  starting permutation: s2i s2u s1i s2c s1u s1c
--- 20,56 ----
  starting permutation: s1i s2i s1u s2u s1c s2c
  step s1i:  INSERT INTO child VALUES (1, 1); 
  step s2i:  INSERT INTO child VALUES (2, 1); 
! step s1u:  UPDATE parent SET aux = 'bar'; 
! step s2u:  UPDATE parent SET aux = 'baz';  <waiting ...>
  step s1c:  COMMIT; 
+ step s2u: <... completed>
  step s2c:  COMMIT; 
  
  starting permutation: s1i s2i s2u s1u s2c s1c
  step s1i:  INSERT INTO child VALUES (1, 1); 
  step s2i:  INSERT INTO child VALUES (2, 1); 
! step s2u:  UPDATE parent SET aux = 'baz'; 
! step s1u:  UPDATE parent SET aux = 'bar';  <waiting ...>
  step s2c:  COMMIT; 
+ step s1u: <... completed>
  step s1c:  COMMIT; 
  
  starting permutation: s2i s1i s1u s2u s1c s2c
  step s2i:  INSERT INTO child VALUES (2, 1); 
  step s1i:  INSERT INTO child VALUES (1, 1); 
! step s1u:  UPDATE parent SET aux = 'bar'; 
! step s2u:  UPDATE parent SET aux = 'baz';  <waiting ...>
  step s1c:  COMMIT; 
+ step s2u: <... completed>
  step s2c:  COMMIT; 
  
  starting permutation: s2i s1i s2u s1u s2c s1c
  step s2i:  INSERT INTO child VALUES (2, 1); 
  step s1i:  INSERT INTO child VALUES (1, 1); 
! step s2u:  UPDATE parent SET aux = 'baz'; 
! step s1u:  UPDATE parent SET aux = 'bar';  <waiting ...>
  step s2c:  COMMIT; 
+ step s1u: <... completed>
  step s1c:  COMMIT; 
  
  starting permutation: s2i s2u s1i s2c s1u s1c
*** a/src/test/isolation/expected/fk-deadlock2.out
--- b/src/test/isolation/expected/fk-deadlock2.out
***************
*** 100,107 **** step s1c:  COMMIT;
  starting permutation: s2u1 s2u2 s1u1 s2c s1u2 s1c
  step s2u1:  UPDATE B SET Col2 = 1 WHERE BID = 2; 
  step s2u2:  UPDATE B SET Col2 = 1 WHERE BID = 2; 
! step s1u1:  UPDATE A SET Col1 = 1 WHERE AID = 1;  <waiting ...>
  step s2c:  COMMIT; 
- step s1u1: <... completed>
  step s1u2:  UPDATE B SET Col2 = 1 WHERE BID = 2; 
  step s1c:  COMMIT; 
--- 100,106 ----
  starting permutation: s2u1 s2u2 s1u1 s2c s1u2 s1c
  step s2u1:  UPDATE B SET Col2 = 1 WHERE BID = 2; 
  step s2u2:  UPDATE B SET Col2 = 1 WHERE BID = 2; 
! step s1u1:  UPDATE A SET Col1 = 1 WHERE AID = 1; 
  step s2c:  COMMIT; 
  step s1u2:  UPDATE B SET Col2 = 1 WHERE BID = 2; 
  step s1c:  COMMIT;