diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 1e2aa9f..2dc99a6 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -4685,7 +4685,6 @@ MaxLivePostmasterChildren(void) extern slock_t *ShmemLock; extern LWLock *LWLockArray; extern slock_t *ProcStructLock; -extern PROC_HDR *ProcGlobal; extern PGPROC *AuxiliaryProcs; extern PMSignalData *PMSignalState; extern pgsocket pgStatSock; diff --git a/src/backend/storage/lmgr/README b/src/backend/storage/lmgr/README index 87fd312..8c01bed 100644 --- a/src/backend/storage/lmgr/README +++ b/src/backend/storage/lmgr/README @@ -60,20 +60,29 @@ identical lock mode sets. See src/tools/backend/index.html and src/include/storage/lock.h for more details. (Lock modes are also called lock types in some places in the code and documentation.) -There are two fundamental lock structures in shared memory: the -per-lockable-object LOCK struct, and the per-lock-and-requestor PROCLOCK -struct. A LOCK object exists for each lockable object that currently has -locks held or requested on it. A PROCLOCK struct exists for each backend -that is holding or requesting lock(s) on each LOCK object. - -In addition to these, each backend maintains an unshared LOCALLOCK structure -for each lockable object and lock mode that it is currently holding or -requesting. The shared lock structures only allow a single lock grant to -be made per lockable object/lock mode/backend. Internally to a backend, -however, the same lock may be requested and perhaps released multiple times -in a transaction, and it can also be held both transactionally and session- -wide. The internal request counts are held in LOCALLOCK so that the shared -data structures need not be accessed to alter them. +There are two main methods for recording locks in shared memory. The primary +mechanism uses two main structures: the per-lockable-object LOCK struct, and +the per-lock-and-requestor PROCLOCK struct. A LOCK object exists for each +lockable object that currently has locks held or requested on it. A PROCLOCK +struct exists for each backend that is holding or requesting lock(s) on each +LOCK object. + +There is also a special "fast path" mechanism which backends may use to +record a limited number of locks with very specific characteristics: they must +use the DEFAULT lockmethod; they must represent a lock on a database relation +(not a shared relation), they must be a "weak" lock which is unlikely to +conflict (AccessShareLock, RowShareLock, or RowExclusiveLock); and the system +must be able to quickly verify that no conflicting locks could possibly be +present. See "Fast Path Locking", below, for more details. + +Each backend also maintains an unshared LOCALLOCK structure for each lockable +object and lock mode that it is currently holding or requesting. The shared +lock structures only allow a single lock grant to be made per lockable +object/lock mode/backend. Internally to a backend, however, the same lock may +be requested and perhaps released multiple times in a transaction, and it can +also be held both transactionally and session- wide. The internal request +counts are held in LOCALLOCK so that the shared data structures need not be +accessed to alter them. --------------------------------------------------------------------------- @@ -250,6 +259,65 @@ tradeoff: we could instead recalculate the partition number from the LOCKTAG when needed. +Fast Path Locking +----------------- + +Fast path locking is a special purpose mechanism designed to reduce the +overhead of taking and releasing weak relation locks. SELECT, INSERT, +UPDATE, and DELETE must acquire a lock on every relation they operate on, +as well as various system catalogs that can be used internally. These locks +are notable not only for the very high frequency with which they are taken +and released, but also for the fact that they virtually never conflict. +Many DML operations can proceed in parallel against the same table at the +same time; only DDL operations such as CLUSTER, ALTER TABLE, or DROP -- or +explicit user action such as LOCK TABLE -- will create lock conflicts with +the "weak" locks (AccessShareLock, RowShareLock, RowExclusiveLock) acquired +by DML operations. + +The primary locking mechanism does not cope well with this workload. Even +though the lock manager locks are partitioned, the locktag for any given +relation still falls in one, and only one, partition. Thus, if many short +queries are accessing the same relation, the lock manager partition lock for +that partition becomes a contention bottleneck. This effect is measurable +even on 2-core servers, and becomes very pronounced as core count increases. + +To alleviate this bottleneck, beginning in PostgreSQL 9.2, each backend is +permitted to record a limited number of locks on unshared relations in an +array within its PGPROC structure, rather than using the primary lock table. +This is called the "fast path" mechanism, and can only be used when the +locker can verify that no conflicting locks can possibly exist. + +A key point of this algorithm is that it must be possible to verify the +absence of possibly conflicting locks without fighting over a shared LWLock or +spinlock. Otherwise, this effort would simply move the contention bottleneck +from one place to another. We accomplish this using an array of 1024 integer +counters, which are in effect a 1024-way partitioning of the lock space. Each +counter records the number of "strong" locks (that is, ShareLock, +ShareRowExclusiveLock, ExclusiveLock, and AccessExclusiveLock) on unshared +relations that fall into that partition. When this counter is non-zero, the +fast path mechanism may not be used for relation locks in that partition. A +strong locker bumps the counter and then scans each per-backend array for +matching fast-path locks; any which are found must be transferred to the +primary lock table before attempting to acquire the lock, to ensure proper +lock conflict and deadlock detection. + +On an SMP system, we must guarantee proper memory synchronization. Here we +rely on the fact that LWLock acquisition acts as a memory sequence point: if +A performs a store, A and B both acquire an LWLock in either order, and B +then performs a load on the same memory location, it is guaranteed to see +A's store. In this case, each backend's fast-path lock queue is protected +by an LWLock. A backend wishing to acquire a fast-path lock grabs this +LWLock before examining FastPathStrongLocks to check for the presence of a +conflicting strong lock. And the backend attempting to acquire a strong +lock, because it must transfer any matching weak locks taken via the fast-path +mechanism to the shared lock table, will acquire the every LWLock protecting +a backend fast-path queue in turn. Thus, if we examine FastPathStrongLocks +and see a zero, then either the value is truly zero, or if it is a stale value, +the strong locker has yet to acquire the per-backend LWLock we now hold (or, +indeed, even the first per-backend LWLock) and will notice any weak lock we +take when it does. + + The Deadlock Detection Algorithm -------------------------------- diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index e3ad319..42b2db7 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -112,6 +112,87 @@ static const char *const lock_mode_names[] = "AccessExclusiveLock" }; +/* + * Count of the number of fast path lock slots we believe to be used. This + * might be higher than the real number if another backend has transferred + * our locks to the primary lock table, but it can never be higher than the + * real value, since only we can acquire locks on our own behalf. + */ +static int FastPathLocalUseCount = 0; + +/* Macros for manipulating proc->fpLockBits */ +#define FAST_PATH_BITS_PER_SLOT 3 +#define FAST_PATH_LOCKNUMBER_OFFSET 1 +#define FAST_PATH_MASK ((1 << FAST_PATH_BITS_PER_SLOT) - 1) +#define FAST_PATH_GET_BITS(proc, n) \ + (((proc)->fpLockBits >> (FAST_PATH_BITS_PER_SLOT * n)) & FAST_PATH_MASK) +#define FAST_PATH_BIT_POSITION(n, l) \ + (AssertMacro((l) >= FAST_PATH_LOCKNUMBER_OFFSET), \ + AssertMacro((l) < FAST_PATH_BITS_PER_SLOT+FAST_PATH_LOCKNUMBER_OFFSET), \ + AssertMacro((n) < FP_LOCK_SLOTS_PER_BACKEND), \ + ((l) - FAST_PATH_LOCKNUMBER_OFFSET + FAST_PATH_BITS_PER_SLOT * (n))) +#define FAST_PATH_SET_LOCKMODE(proc, n, l) \ + (proc)->fpLockBits |= UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l) +#define FAST_PATH_CLEAR_LOCKMODE(proc, n, l) \ + (proc)->fpLockBits &= ~(UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l)) +#define FAST_PATH_CHECK_LOCKMODE(proc, n, l) \ + ((proc)->fpLockBits & (UINT64CONST(1) << FAST_PATH_BIT_POSITION(n, l))) + +/* + * The fast-path lock mechanism is concerned only with relation locks on + * unshared relations by backends bound to a database. The fast-path + * mechanism exists mostly to accelerate acquisition and release of locks + * that rarely conflict. Because ShareUpdateExclusiveLock is + * self-conflicting, it can't use the fast-path mechanism; but it also does + * not conflict with any of the locks that do, so we can ignore it completely. + */ +#define FastPathTag(locktag) \ + ((locktag)->locktag_lockmethodid == DEFAULT_LOCKMETHOD && \ + (locktag)->locktag_type == LOCKTAG_RELATION && \ + (locktag)->locktag_field1 == MyDatabaseId && \ + MyDatabaseId != InvalidOid) +#define FastPathWeakMode(mode) ((mode) < ShareUpdateExclusiveLock) +#define FastPathStrongMode(mode) ((mode) > ShareUpdateExclusiveLock) +#define FastPathRelevantMode(mode) ((mode) != ShareUpdateExclusiveLock) + +static bool FastPathGrantLock(Oid relid, LOCKMODE lockmode); +static bool FastPathUnGrantLock(Oid relid, LOCKMODE lockmode); +static bool FastPathTransferLocks(LockMethod lockMethodTable, + const LOCKTAG *locktag, uint32 hashcode); +static PROCLOCK *FastPathGetLockEntry(LOCALLOCK *locallock); + +/* ZZZ: Remove this. */ +#define DEBUG_FAST_LOCK 0 + +/* + * To make the fast-path lock mechanism work, we must have some way of + * preventing the use of the fast-path when a conflicting lock might be + * present. We partition* the locktag space into FAST_PATH_HASH_BUCKETS + * partitions, and maintain an integer count of the number of "strong" lockers + * in each partition. When any "strong" lockers are present (which is + * hopefully not very often), the fast-path mechanism can't be used, and we + * must fall back to the slower method of pushing matching locks directly + * into the main lock tables. + * + * The deadlock detector does not know anything about the fast path mechanism, + * so any locks that might be involved in a deadlock must be transferred from + * the fast-path queues to the main lock table. + */ + +#define FAST_PATH_STRONG_LOCK_HASH_BITS 10 +#define FAST_PATH_STRONG_LOCK_HASH_PARTITIONS \ + (1 << FAST_PATH_STRONG_LOCK_HASH_BITS) +#define FastPathStrongLockHashPartition(hashcode) \ + ((hashcode) % FAST_PATH_STRONG_LOCK_HASH_PARTITIONS) + +typedef struct +{ + slock_t mutex; + uint32 count[FAST_PATH_STRONG_LOCK_HASH_PARTITIONS]; +} FastPathStrongLockData; + +FastPathStrongLockData *FastPathStrongLocks; + #ifndef LOCK_DEBUG static bool Dummy_trace = false; #endif @@ -254,6 +335,8 @@ PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP) static uint32 proclock_hash(const void *key, Size keysize); static void RemoveLocalLock(LOCALLOCK *locallock); +static PROCLOCK *SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc, + const LOCKTAG *locktag, uint32 hashcode, LOCKMODE lockmode); static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner); static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner); static void ReleaseLockForOwner(LOCALLOCK *locallock, ResourceOwner owner); @@ -262,6 +345,9 @@ static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode, static void CleanUpLock(LOCK *lock, PROCLOCK *proclock, LockMethod lockMethodTable, uint32 hashcode, bool wakeupNeeded); +static void LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc, + LOCKTAG *locktag, LOCKMODE lockmode, + bool decrement_strong_lock_count); /* @@ -283,6 +369,7 @@ InitLocks(void) int hash_flags; long init_table_size, max_table_size; + bool found; /* * Compute init/max size to request for lock hashtables. Note these @@ -329,6 +416,14 @@ InitLocks(void) hash_flags); /* + * Allocate fast-path structures. + */ + FastPathStrongLocks = ShmemInitStruct("Fast Path Strong Lock Data", + sizeof(FastPathStrongLockData), &found); + if (!found) + SpinLockInit(&FastPathStrongLocks->mutex); + + /* * Allocate non-shared hash table for LOCALLOCK structs. This stores lock * counts and resource owner information. * @@ -492,12 +587,9 @@ LockAcquireExtended(const LOCKTAG *locktag, LOCALLOCK *locallock; LOCK *lock; PROCLOCK *proclock; - PROCLOCKTAG proclocktag; bool found; ResourceOwner owner; uint32 hashcode; - uint32 proclock_hashcode; - int partition; LWLockId partitionLock; int status; bool log_lock = false; @@ -553,6 +645,7 @@ LockAcquireExtended(const LOCKTAG *locktag, locallock->nLocks = 0; locallock->numLockOwners = 0; locallock->maxLockOwners = 8; + locallock->holdsStrongLockCount = 0; locallock->lockOwners = NULL; locallock->lockOwners = (LOCALLOCKOWNER *) MemoryContextAlloc(TopMemoryContext, @@ -571,6 +664,7 @@ LockAcquireExtended(const LOCKTAG *locktag, locallock->maxLockOwners = newsize; } } + hashcode = locallock->hashcode; /* * If we already hold the lock, we can just increase the count locally. @@ -600,16 +694,260 @@ LockAcquireExtended(const LOCKTAG *locktag, log_lock = true; } + /* Locks that participate in the fast path require special handling. */ + if (FastPathTag(locktag) && FastPathRelevantMode(lockmode)) + { + uint32 fasthashcode; + + fasthashcode = FastPathStrongLockHashPartition(hashcode); + + /* + * If we remember having filled up the fast path array, we don't + * attempt to make any further use of it until we release some locks. + * It's possible that some other backend has transferred some of those + * locks to the shared hash table, leaving space free, but it's not + * worth acquiring the LWLock just to check. It's also possible that + * we're acquiring a second or third lock type on a relation we have + * already locked using the fast-path, but for now we don't worry about + * that case either. + */ + if (FastPathWeakMode(lockmode) + && FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND) + { + bool acquired; + + /* + * LWLockAcquire acts as a memory sequencing point, so it's safe + * to assume that any strong locker whose increment to + * FastPathStrongLocks->counts becomes visible after we test it has + * yet to begin to transfer fast-path locks. + */ + LWLockAcquire(MyProc->fpLWLock, LW_EXCLUSIVE); + if (FastPathStrongLocks->count[fasthashcode] != 0) + acquired = false; + else + acquired = FastPathGrantLock(locktag->locktag_field2, lockmode); + LWLockRelease(MyProc->fpLWLock); + if (acquired) + { + GrantLockLocal(locallock, owner); +#if DEBUG_FAST_LOCK > 1 + elog(WARNING, "pid %d fast acquire rel %u/%u mode %s used %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode], FastPathLocalUseCount); +#endif + return LOCKACQUIRE_OK; + } +#if DEBUG_FAST_LOCK > 0 + elog(WARNING, "pid %d FALLTHROUGH acquire rel %u/%u mode %s partition %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode], fasthashcode); +#endif + } + else if (FastPathStrongMode(lockmode)) + { + /* + * Adding to a memory location is not atomic, so we take a + * spinlock to ensure we don't collide with someone else trying + * to bump the count at the same time. + * + * XXX: It might be worth considering using an atomic fetch-and-add + * instruction here, on architectures where that is supported. + */ +#if DEBUG_FAST_LOCK > 0 + elog(WARNING, "pid %d EMBARGO acquire rel %u/%u mode %s partition %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode], fasthashcode); +#endif + Assert(locallock->holdsStrongLockCount == 0); + SpinLockAcquire(&FastPathStrongLocks->mutex); + FastPathStrongLocks->count[fasthashcode]++; + locallock->holdsStrongLockCount = 1; + SpinLockRelease(&FastPathStrongLocks->mutex); + if (!FastPathTransferLocks(lockMethodTable, locktag, hashcode)) + { + if (reportMemoryError) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errhint("You might need to increase max_locks_per_transaction."))); + else + return LOCKACQUIRE_NOT_AVAIL; + } + } + } + /* * Otherwise we've got to mess with the shared lock table. */ - hashcode = locallock->hashcode; - partition = LockHashPartition(hashcode); partitionLock = LockHashPartitionLock(hashcode); LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* + * Find or create a proclock entry with this tag + */ + proclock = SetupLockInTable(lockMethodTable, MyProc, locktag, + hashcode, lockmode); + if (!proclock) + { + LWLockRelease(partitionLock); + if (reportMemoryError) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errhint("You might need to increase max_locks_per_transaction."))); + else + return LOCKACQUIRE_NOT_AVAIL; + } + locallock->proclock = proclock; + lock = proclock->tag.myLock; + locallock->lock = lock; + + /* + * If lock requested conflicts with locks requested by waiters, must join + * wait queue. Otherwise, check for conflict with already-held locks. + * (That's last because most complex check.) + */ + if (lockMethodTable->conflictTab[lockmode] & lock->waitMask) + status = STATUS_FOUND; + else + status = LockCheckConflicts(lockMethodTable, lockmode, + lock, proclock, MyProc); + + if (status == STATUS_OK) + { + /* No conflict with held or previously requested locks */ + GrantLock(lock, proclock, lockmode); + GrantLockLocal(locallock, owner); + } + else + { + Assert(status == STATUS_FOUND); + + /* + * We can't acquire the lock immediately. If caller specified no + * blocking, remove useless table entries and return NOT_AVAIL without + * waiting. + */ + if (dontWait) + { + if (proclock->holdMask == 0) + { + uint32 proclock_hashcode; + + proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode); + SHMQueueDelete(&proclock->lockLink); + SHMQueueDelete(&proclock->procLink); + if (!hash_search_with_hash_value(LockMethodProcLockHash, + (void *) &(proclock->tag), + proclock_hashcode, + HASH_REMOVE, + NULL)) + elog(PANIC, "proclock table corrupted"); + } + else + PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock); + lock->nRequested--; + lock->requested[lockmode]--; + LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode); + Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0)); + Assert(lock->nGranted <= lock->nRequested); + LWLockRelease(partitionLock); + if (locallock->nLocks == 0) + RemoveLocalLock(locallock); + return LOCKACQUIRE_NOT_AVAIL; + } + + /* + * In Hot Standby perform early deadlock detection in normal backends. + * If deadlock found we release partition lock but do not return. + */ + if (RecoveryInProgress() && !InRecovery) + CheckRecoveryConflictDeadlock(partitionLock); + + /* + * Set bitmask of locks this process already holds on this object. + */ + MyProc->heldLocks = proclock->holdMask; + + /* + * Sleep till someone wakes me up. + */ + + TRACE_POSTGRESQL_LOCK_WAIT_START(locktag->locktag_field1, + locktag->locktag_field2, + locktag->locktag_field3, + locktag->locktag_field4, + locktag->locktag_type, + lockmode); + + WaitOnLock(locallock, owner); + + TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1, + locktag->locktag_field2, + locktag->locktag_field3, + locktag->locktag_field4, + locktag->locktag_type, + lockmode); + + /* + * NOTE: do not do any material change of state between here and + * return. All required changes in locktable state must have been + * done when the lock was granted to us --- see notes in WaitOnLock. + */ + + /* + * Check the proclock entry status, in case something in the ipc + * communication doesn't work correctly. + */ + if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) + { + PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock); + LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); + /* Should we retry ? */ + LWLockRelease(partitionLock); + elog(ERROR, "LockAcquire failed"); + } + PROCLOCK_PRINT("LockAcquire: granted", proclock); + LOCK_PRINT("LockAcquire: granted", lock, lockmode); + } + + LWLockRelease(partitionLock); + + /* + * Emit a WAL record if acquisition of this lock need to be replayed in a + * standby server. + */ + if (log_lock) + { + /* + * Decode the locktag back to the original values, to avoid sending + * lots of empty bytes with every message. See lock.h to check how a + * locktag is defined for LOCKTAG_RELATION + */ + LogAccessExclusiveLock(locktag->locktag_field1, + locktag->locktag_field2); + } + + return LOCKACQUIRE_OK; +} + +/* + * Find or create LOCK and PROCLOCK objects as needed for a new lock + * request. + */ +static PROCLOCK * +SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc, + const LOCKTAG *locktag, uint32 hashcode, LOCKMODE lockmode) +{ + LOCK *lock; + PROCLOCK *proclock; + PROCLOCKTAG proclocktag; + uint32 proclock_hashcode; + bool found; + + /* * Find or create a lock with this tag. * * Note: if the locallock object already existed, it might have a pointer @@ -623,17 +961,7 @@ LockAcquireExtended(const LOCKTAG *locktag, HASH_ENTER_NULL, &found); if (!lock) - { - LWLockRelease(partitionLock); - if (reportMemoryError) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of shared memory"), - errhint("You might need to increase max_locks_per_transaction."))); - else - return LOCKACQUIRE_NOT_AVAIL; - } - locallock->lock = lock; + return NULL; /* * if it's a new lock object, initialize it @@ -662,7 +990,7 @@ LockAcquireExtended(const LOCKTAG *locktag, * Create the hash key for the proclock table. */ proclocktag.myLock = lock; - proclocktag.myProc = MyProc; + proclocktag.myProc = proc; proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode); @@ -693,27 +1021,21 @@ LockAcquireExtended(const LOCKTAG *locktag, NULL)) elog(PANIC, "lock table corrupted"); } - LWLockRelease(partitionLock); - if (reportMemoryError) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of shared memory"), - errhint("You might need to increase max_locks_per_transaction."))); - else - return LOCKACQUIRE_NOT_AVAIL; + return NULL; } - locallock->proclock = proclock; /* * If new, initialize the new entry */ if (!found) { + uint32 partition = LockHashPartition(hashcode); + proclock->holdMask = 0; proclock->releaseMask = 0; /* Add proclock to appropriate lists */ SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink); - SHMQueueInsertBefore(&(MyProc->myProcLocks[partition]), + SHMQueueInsertBefore(&(proc->myProcLocks[partition]), &proclock->procLink); PROCLOCK_PRINT("LockAcquire: new", proclock); } @@ -779,130 +1101,7 @@ LockAcquireExtended(const LOCKTAG *locktag, lock->tag.locktag_field1, lock->tag.locktag_field2, lock->tag.locktag_field3); - /* - * If lock requested conflicts with locks requested by waiters, must join - * wait queue. Otherwise, check for conflict with already-held locks. - * (That's last because most complex check.) - */ - if (lockMethodTable->conflictTab[lockmode] & lock->waitMask) - status = STATUS_FOUND; - else - status = LockCheckConflicts(lockMethodTable, lockmode, - lock, proclock, MyProc); - - if (status == STATUS_OK) - { - /* No conflict with held or previously requested locks */ - GrantLock(lock, proclock, lockmode); - GrantLockLocal(locallock, owner); - } - else - { - Assert(status == STATUS_FOUND); - - /* - * We can't acquire the lock immediately. If caller specified no - * blocking, remove useless table entries and return NOT_AVAIL without - * waiting. - */ - if (dontWait) - { - if (proclock->holdMask == 0) - { - SHMQueueDelete(&proclock->lockLink); - SHMQueueDelete(&proclock->procLink); - if (!hash_search_with_hash_value(LockMethodProcLockHash, - (void *) &(proclock->tag), - proclock_hashcode, - HASH_REMOVE, - NULL)) - elog(PANIC, "proclock table corrupted"); - } - else - PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock); - lock->nRequested--; - lock->requested[lockmode]--; - LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode); - Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0)); - Assert(lock->nGranted <= lock->nRequested); - LWLockRelease(partitionLock); - if (locallock->nLocks == 0) - RemoveLocalLock(locallock); - return LOCKACQUIRE_NOT_AVAIL; - } - - /* - * In Hot Standby perform early deadlock detection in normal backends. - * If deadlock found we release partition lock but do not return. - */ - if (RecoveryInProgress() && !InRecovery) - CheckRecoveryConflictDeadlock(partitionLock); - - /* - * Set bitmask of locks this process already holds on this object. - */ - MyProc->heldLocks = proclock->holdMask; - - /* - * Sleep till someone wakes me up. - */ - - TRACE_POSTGRESQL_LOCK_WAIT_START(locktag->locktag_field1, - locktag->locktag_field2, - locktag->locktag_field3, - locktag->locktag_field4, - locktag->locktag_type, - lockmode); - - WaitOnLock(locallock, owner); - - TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1, - locktag->locktag_field2, - locktag->locktag_field3, - locktag->locktag_field4, - locktag->locktag_type, - lockmode); - - /* - * NOTE: do not do any material change of state between here and - * return. All required changes in locktable state must have been - * done when the lock was granted to us --- see notes in WaitOnLock. - */ - - /* - * Check the proclock entry status, in case something in the ipc - * communication doesn't work correctly. - */ - if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) - { - PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock); - LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); - /* Should we retry ? */ - LWLockRelease(partitionLock); - elog(ERROR, "LockAcquire failed"); - } - PROCLOCK_PRINT("LockAcquire: granted", proclock); - LOCK_PRINT("LockAcquire: granted", lock, lockmode); - } - - LWLockRelease(partitionLock); - - /* - * Emit a WAL record if acquisition of this lock need to be replayed in a - * standby server. - */ - if (log_lock) - { - /* - * Decode the locktag back to the original values, to avoid sending - * lots of empty bytes with every message. See lock.h to check how a - * locktag is defined for LOCKTAG_RELATION - */ - LogAccessExclusiveLock(locktag->locktag_field1, - locktag->locktag_field2); - } - - return LOCKACQUIRE_OK; + return proclock; } /* @@ -913,6 +1112,25 @@ RemoveLocalLock(LOCALLOCK *locallock) { pfree(locallock->lockOwners); locallock->lockOwners = NULL; + if (locallock->holdsStrongLockCount) + { + uint32 fasthashcode; + fasthashcode = FastPathStrongLockHashPartition(locallock->hashcode); + +#if DEBUG_FAST_LOCK > 0 + { + LOCKTAG *locktag = &locallock->tag.lock; + elog(WARNING, "pid %d DE-EMBARGO rel %u/%u mode %s partition %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[locallock->tag.mode], fasthashcode); + } +#endif + SpinLockAcquire(&FastPathStrongLocks->mutex); + Assert(FastPathStrongLocks->count[fasthashcode] > 0); + FastPathStrongLocks->count[fasthashcode]--; + locallock->holdsStrongLockCount = 0; + SpinLockRelease(&FastPathStrongLocks->mutex); + } if (!hash_search(LockMethodLocalHash, (void *) &(locallock->tag), HASH_REMOVE, NULL)) @@ -1439,6 +1657,36 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) if (locallock->nLocks > 0) return TRUE; + /* Locks that participate in the fast path require special handling. */ + if (FastPathTag(locktag) && FastPathWeakMode(lockmode) + && FastPathLocalUseCount > 0) + { + bool released; + + /* + * We might not find the lock here, even if we originally entered + * it here. Another backend may have moved it to the main table. + */ + LWLockAcquire(MyProc->fpLWLock, LW_EXCLUSIVE); + released = FastPathUnGrantLock(locktag->locktag_field2, lockmode); + LWLockRelease(MyProc->fpLWLock); + if (released) + { +#if DEBUG_FAST_LOCK > 1 + elog(WARNING, "pid %d fast release rel %u/%u mode %s used %d", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode], FastPathLocalUseCount); +#endif + RemoveLocalLock(locallock); + return TRUE; + } +#if DEBUG_FAST_LOCK > 0 + elog(WARNING, "pid %d FALLTHROUGH release rel %u/%u mode %s", + MyProcPid, locktag->locktag_field1, locktag->locktag_field2, + lock_mode_names[lockmode]); +#endif + } + /* * Otherwise we've got to mess with the shared lock table. */ @@ -1447,11 +1695,34 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* - * We don't need to re-find the lock or proclock, since we kept their - * addresses in the locallock table, and they couldn't have been removed - * while we were holding a lock on them. + * Normally, we don't need to re-find the lock or proclock, since we kept + * their addresses in the locallock table, and they couldn't have been + * removed while we were holding a lock on them. But it's possible that + * the locks have been moved to the main hash table by another backend, in + * which case we might need to go look them up after all. */ lock = locallock->lock; + if (!lock) + { + PROCLOCKTAG proclocktag; + bool found; + + Assert(FastPathTag(locktag) && FastPathWeakMode(lockmode)); + lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash, + (void *) locktag, + locallock->hashcode, + HASH_FIND, + &found); + Assert(found && lock != NULL); + locallock->lock = lock; + + proclocktag.myLock = lock; + proclocktag.myProc = MyProc; + locallock->proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash, + (void *) &proclocktag, + HASH_FIND, &found); + Assert(found); + } LOCK_PRINT("LockRelease: found", lock, lockmode); proclock = locallock->proclock; PROCLOCK_PRINT("LockRelease: found", proclock); @@ -1529,6 +1800,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) LOCK *lock; PROCLOCK *proclock; int partition; + bool have_fast_path_lwlock = false; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); @@ -1554,11 +1826,69 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) { if (locallock->proclock == NULL || locallock->lock == NULL) { + LOCKMODE lockmode = locallock->tag.mode; + Oid relid; + /* - * We must've run out of shared memory while trying to set up this - * lock. Just forget the local entry. + * If the LOCALLOCK entry is unused, we must've run out of shared + * memory while trying to set up this lock. Just forget the local + * entry. */ - Assert(locallock->nLocks == 0); + if (locallock->nLocks == 0) + { + RemoveLocalLock(locallock); + continue; + } + + /* + * Otherwise, we should be dealing with a lock acquired via the + * fast-path. If not, we've got trouble. + */ + if (!FastPathTag(&locallock->tag.lock) + || !FastPathWeakMode(lockmode)) + elog(PANIC, "locallock table corrupted"); + + /* + * If we don't currently hold the LWLock that protects our + * fast-path data structures, we must acquire it before + * attempting to release the lock via the fast-path. + */ + if (!have_fast_path_lwlock) + { + LWLockAcquire(MyProc->fpLWLock, LW_EXCLUSIVE); + have_fast_path_lwlock = true; + } + + /* Attempt fast-path release. */ + relid = locallock->tag.lock.locktag_field2; + if (FastPathUnGrantLock(relid, lockmode)) + { +#if DEBUG_FAST_LOCK > 1 + elog(WARNING, "pid %d fast ReleaseAll rel %u/%u mode %s used %d", + MyProcPid, locallock->tag.lock.locktag_field1, relid, + lock_mode_names[lockmode], FastPathLocalUseCount); +#endif + RemoveLocalLock(locallock); + continue; + } + + /* + * Our lock, originally taken via the fast path, has been + * transferred to the main lock table. That's going to require + * some extra work, so release our fast-path lock before starting. + */ + LWLockRelease(MyProc->fpLWLock); + have_fast_path_lwlock = false; + + /* + * Now dump the lock. We haven't got a pointer to the LOCK or + * PROCLOCK in this case, so we have to handle this a bit + * differently than a normal lock release. Unfortunately, this + * requires an extra LWLock acquire-and-release cycle on the + * partitionLock, but hopefully it shouldn't happen often. + */ + LockRefindAndRelease(lockMethodTable, MyProc, + &locallock->tag.lock, lockmode, false); RemoveLocalLock(locallock); continue; } @@ -1606,6 +1936,9 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) RemoveLocalLock(locallock); } + if (have_fast_path_lwlock) + LWLockRelease(MyProc->fpLWLock); + /* * Now, scan each lock partition separately. */ @@ -1824,6 +2157,235 @@ LockReassignCurrentOwner(void) } } +/* + * FastPathGrantLock + * Grant lock using per-backend fast-path array, if there is space. + */ +static bool +FastPathGrantLock(Oid relid, LOCKMODE lockmode) +{ + uint32 f; + uint32 unused_slot = FP_LOCK_SLOTS_PER_BACKEND; + + /* Scan for existing entry for this relid, remembering empty slot. */ + for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; ++f) + { + if (FAST_PATH_GET_BITS(MyProc, f) == 0) + unused_slot = f; + else if (MyProc->fpRelId[f] == relid) + { + Assert(!FAST_PATH_CHECK_LOCKMODE(MyProc, f, lockmode)); + FAST_PATH_SET_LOCKMODE(MyProc, f, lockmode); + return true; + } + } + + /* If no existing entry, use any empty slot. */ + if (unused_slot < FP_LOCK_SLOTS_PER_BACKEND) + { + MyProc->fpRelId[unused_slot] = relid; + FAST_PATH_SET_LOCKMODE(MyProc, unused_slot, lockmode); + ++FastPathLocalUseCount; + return true; + } + + /* No existing entry, and no empty slot. */ + return false; +} + +/* + * FastPathUnGrantLock + * Release fast-path lock, if present. Update backend-private local + * use count, while we're at it. + */ +static bool +FastPathUnGrantLock(Oid relid, LOCKMODE lockmode) +{ + uint32 f; + bool result = false; + + FastPathLocalUseCount = 0; + for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; ++f) + { + if (MyProc->fpRelId[f] == relid + && FAST_PATH_CHECK_LOCKMODE(MyProc, f, lockmode)) + { + Assert(!result); + FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode); + result = true; + } + if (FAST_PATH_GET_BITS(MyProc, f) != 0) + ++FastPathLocalUseCount; + } + return result; +} + +/* + * FastPathTransferLocks + * Transfer locks matching the given lock tag from per-backend fast-path + * arrays to the shared hash table. + */ +static bool +FastPathTransferLocks(LockMethod lockMethodTable, const LOCKTAG *locktag, + uint32 hashcode) +{ + LWLockId partitionLock = LockHashPartitionLock(hashcode); + Oid relid = locktag->locktag_field2; + uint32 i; + + /* + * Every PGPROC that can potentially hold a fast-path lock is present + * in ProcGlobal->allProcs. Prepared transactions are not, but + * any outstanding fast-path locks held by prepared transactions are + * transferred to the main lock table. + */ + for (i = 0; i < ProcGlobal->allProcCount; ++i) + { + PGPROC *proc = &ProcGlobal->allProcs[i]; + uint32 f; + + LWLockAcquire(proc->fpLWLock, LW_EXCLUSIVE); + + /* + * If the target backend isn't referencing the same database as we are, + * then we needn't examine the individual relation IDs at all; none of + * them can be relevant. + * + * proc->databaseId is set at backend startup time and never changes + * thereafter, so it might be safe to perform this test before + * acquiring proc->fpLWLock. In particular, it's certainly safe to + * assume that if the target backend holds any fast-path locks, it must + * have performed a memory-fencing operation (in particular, an LWLock + * acquisition) since setting proc->databaseId. However, it's less + * clear that our backend is certain to have performed a memory fencing + * operation since the other backend set proc->databaseId. So for now, + * we test it after acquiring the LWLock just to be safe. + */ + if (proc->databaseId != MyDatabaseId) + { + LWLockRelease(proc->fpLWLock); + continue; + } + + for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; ++f) + { + uint32 lockmode; + + /* Look for an allocated slot matching the given relid. */ + if (relid != proc->fpRelId[f] || FAST_PATH_GET_BITS(proc, f) == 0) + continue; + + /* Find or create lock object. */ + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + for (lockmode = FAST_PATH_LOCKNUMBER_OFFSET; + lockmode < FAST_PATH_LOCKNUMBER_OFFSET+FAST_PATH_BITS_PER_SLOT; + ++lockmode) + { + PROCLOCK *proclock; + + if (!FAST_PATH_CHECK_LOCKMODE(proc, f, lockmode)) + continue; + proclock = SetupLockInTable(lockMethodTable, proc, locktag, + hashcode, lockmode); + if (!proclock) + { + LWLockRelease(partitionLock); + return false; + } + GrantLock(proclock->tag.myLock, proclock, lockmode); + FAST_PATH_CLEAR_LOCKMODE(proc, f, lockmode); + } + LWLockRelease(partitionLock); + } + LWLockRelease(proc->fpLWLock); + } + return true; +} + +/* + * FastPathGetLockEntry + * Return the PROCLOCK for a lock originally taken via the fast-path, + * transferring it to the primary lock table if necessary. + */ +static PROCLOCK * +FastPathGetLockEntry(LOCALLOCK *locallock) +{ + LockMethod lockMethodTable = LockMethods[DEFAULT_LOCKMETHOD]; + LOCKTAG *locktag = &locallock->tag.lock; + PROCLOCK *proclock = NULL; + LWLockId partitionLock = LockHashPartitionLock(locallock->hashcode); + Oid relid = locktag->locktag_field2; + uint32 f; + + LWLockAcquire(MyProc->fpLWLock, LW_EXCLUSIVE); + + for (f = 0; f < FP_LOCK_SLOTS_PER_BACKEND; ++f) + { + uint32 lockmode; + + /* Look for an allocated slot matching the given relid. */ + if (relid != MyProc->fpRelId[f] || FAST_PATH_GET_BITS(MyProc, f) == 0) + continue; + + /* If we don't have a lock of the given mode, forget it! */ + lockmode = locallock->tag.mode; + if (!FAST_PATH_CHECK_LOCKMODE(MyProc, f, lockmode)) + break; + + /* Find or create lock object. */ + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + proclock = SetupLockInTable(lockMethodTable, MyProc, locktag, + locallock->hashcode, lockmode); + if (!proclock) + { + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errhint("You might need to increase max_locks_per_transaction."))); + } + GrantLock(proclock->tag.myLock, proclock, lockmode); + FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode); + + LWLockRelease(partitionLock); + } + + LWLockRelease(MyProc->fpLWLock); + + /* Lock may have already been transferred by some other backend. */ + if (proclock == NULL) + { + LOCK *lock; + PROCLOCKTAG proclocktag; + uint32 proclock_hashcode; + + LWLockAcquire(partitionLock, LW_SHARED); + + lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash, + (void *) locktag, + locallock->hashcode, + HASH_FIND, + NULL); + if (!lock) + elog(ERROR, "failed to re-find shared lock object"); + + proclocktag.myLock = lock; + proclocktag.myProc = MyProc; + + proclock_hashcode = ProcLockHashCode(&proclocktag, locallock->hashcode); + proclock = (PROCLOCK *) + hash_search_with_hash_value(LockMethodProcLockHash, + (void *) &proclocktag, + proclock_hashcode, + HASH_FIND, + NULL); + if (!proclock) + elog(ERROR, "failed to re-find shared proclock object"); + LWLockRelease(partitionLock); + } + + return proclock; +} /* * GetLockConflicts @@ -1945,6 +2507,98 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode) return vxids; } +/* + * Find a lock in the shared lock table and release it. It is the caller's + * responsibility to verify that this is a sane thing to do. (For example, it + * would be bad to release a lock here if there might still be a LOCALLOCK + * object with pointers to it.) + * + * We currently use this in two situations: first, to release locks held by + * prepared transactions on commit (see lock_twophase_postcommit); and second, + * to release locks taken via the fast-path, transferred to the main hash + * table, and then released (see LockReleaseAll). + */ +static void +LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc, + LOCKTAG *locktag, LOCKMODE lockmode, + bool decrement_strong_lock_count) +{ + LOCK *lock; + PROCLOCK *proclock; + PROCLOCKTAG proclocktag; + uint32 hashcode; + uint32 proclock_hashcode; + LWLockId partitionLock; + bool wakeupNeeded; + + hashcode = LockTagHashCode(locktag); + partitionLock = LockHashPartitionLock(hashcode); + + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + /* + * Re-find the lock object (it had better be there). + */ + lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash, + (void *) locktag, + hashcode, + HASH_FIND, + NULL); + if (!lock) + elog(PANIC, "failed to re-find shared lock object"); + + /* + * Re-find the proclock object (ditto). + */ + proclocktag.myLock = lock; + proclocktag.myProc = proc; + + proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode); + + proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash, + (void *) &proclocktag, + proclock_hashcode, + HASH_FIND, + NULL); + if (!proclock) + elog(PANIC, "failed to re-find shared proclock object"); + + /* + * Double-check that we are actually holding a lock of the type we want to + * release. + */ + if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) + { + PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock); + LWLockRelease(partitionLock); + elog(WARNING, "you don't own a lock of type %s", + lockMethodTable->lockModeNames[lockmode]); + return; + } + + /* + * Do the releasing. CleanUpLock will waken any now-wakable waiters. + */ + wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable); + + CleanUpLock(lock, proclock, + lockMethodTable, hashcode, + wakeupNeeded); + + LWLockRelease(partitionLock); + + /* + * Decrement strong lock count. This logic is needed only for 2PC. + */ + if (decrement_strong_lock_count + && FastPathTag(&lock->tag) && FastPathStrongMode(lockmode)) + { + uint32 fasthashcode = FastPathStrongLockHashPartition(hashcode); + SpinLockAcquire(&FastPathStrongLocks->mutex); + FastPathStrongLocks->count[fasthashcode]--; + SpinLockRelease(&FastPathStrongLocks->mutex); + } +} /* * AtPrepare_Locks @@ -1966,8 +2620,10 @@ AtPrepare_Locks(void) LOCALLOCK *locallock; /* - * We don't need to touch shared memory for this --- all the necessary - * state information is in the locallock table. + * For the most part, we don't need to touch shared memory for this --- + * all the necessary state information is in the locallock table. + * Fast-path locks are an exception, however: we move any such locks + * to the main table before allowing PREPARE TRANSACTION to succeed. */ hash_seq_init(&status, LockMethodLocalHash); @@ -2001,6 +2657,24 @@ AtPrepare_Locks(void) } /* + * If the local lock was taken via the fast-path, we need to move it + * to the primary lock table, or just get a pointer to the existing + * primary lock table if by chance it's already been transferred. + */ + if (locallock->proclock == NULL) + { + locallock->proclock = FastPathGetLockEntry(locallock); + locallock->lock = locallock->proclock->tag.myLock; + } + + /* + * Arrange not to release any strong lock count held by this lock + * entry. We must retain the count until the prepared transaction + * is committed or rolled back. + */ + locallock->holdsStrongLockCount = 0; + + /* * Create a 2PC record. */ memcpy(&(record.locktag), &(locallock->tag.lock), sizeof(LOCKTAG)); @@ -2658,6 +3332,18 @@ lock_twophase_recover(TransactionId xid, uint16 info, */ GrantLock(lock, proclock, lockmode); + /* + * Bump strong lock count, to make sure any fast-path lock requests won't + * be granted without consulting the primary lock table. + */ + if (FastPathTag(&lock->tag) && FastPathStrongMode(lockmode)) + { + uint32 fasthashcode = FastPathStrongLockHashPartition(hashcode); + SpinLockAcquire(&FastPathStrongLocks->mutex); + FastPathStrongLocks->count[fasthashcode]++; + SpinLockRelease(&FastPathStrongLocks->mutex); + } + LWLockRelease(partitionLock); } @@ -2704,81 +3390,18 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; PGPROC *proc = TwoPhaseGetDummyProc(xid); LOCKTAG *locktag; - LOCKMODE lockmode; LOCKMETHODID lockmethodid; - LOCK *lock; - PROCLOCK *proclock; - PROCLOCKTAG proclocktag; - uint32 hashcode; - uint32 proclock_hashcode; - LWLockId partitionLock; LockMethod lockMethodTable; - bool wakeupNeeded; Assert(len == sizeof(TwoPhaseLockRecord)); locktag = &rec->locktag; - lockmode = rec->lockmode; lockmethodid = locktag->locktag_lockmethodid; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); lockMethodTable = LockMethods[lockmethodid]; - hashcode = LockTagHashCode(locktag); - partitionLock = LockHashPartitionLock(hashcode); - - LWLockAcquire(partitionLock, LW_EXCLUSIVE); - - /* - * Re-find the lock object (it had better be there). - */ - lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash, - (void *) locktag, - hashcode, - HASH_FIND, - NULL); - if (!lock) - elog(PANIC, "failed to re-find shared lock object"); - - /* - * Re-find the proclock object (ditto). - */ - proclocktag.myLock = lock; - proclocktag.myProc = proc; - - proclock_hashcode = ProcLockHashCode(&proclocktag, hashcode); - - proclock = (PROCLOCK *) hash_search_with_hash_value(LockMethodProcLockHash, - (void *) &proclocktag, - proclock_hashcode, - HASH_FIND, - NULL); - if (!proclock) - elog(PANIC, "failed to re-find shared proclock object"); - - /* - * Double-check that we are actually holding a lock of the type we want to - * release. - */ - if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) - { - PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock); - LWLockRelease(partitionLock); - elog(WARNING, "you don't own a lock of type %s", - lockMethodTable->lockModeNames[lockmode]); - return; - } - - /* - * Do the releasing. CleanUpLock will waken any now-wakable waiters. - */ - wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable); - - CleanUpLock(lock, proclock, - lockMethodTable, hashcode, - wakeupNeeded); - - LWLockRelease(partitionLock); + LockRefindAndRelease(lockMethodTable, proc, locktag, rec->lockmode, true); } /* diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 0fe7ce4..8fae67e 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -167,6 +167,9 @@ NumLWLocks(void) /* bufmgr.c needs two for each shared buffer */ numLocks += 2 * NBuffers; + /* lock.c needs one per backend */ + numLocks += MaxBackends; + /* clog.c needs one per CLOG buffer */ numLocks += NUM_CLOG_BUFFERS; diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index ee03316..9cc376e 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -67,7 +67,7 @@ PGPROC *MyProc = NULL; NON_EXEC_STATIC slock_t *ProcStructLock = NULL; /* Pointers to shared-memory structures */ -NON_EXEC_STATIC PROC_HDR *ProcGlobal = NULL; +PROC_HDR *ProcGlobal = NULL; NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL; /* If we are waiting for a lock, this points to the associated LOCALLOCK */ @@ -160,6 +160,7 @@ InitProcGlobal(void) PGPROC *procs; int i; bool found; + uint32 TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS; /* Create the ProcGlobal shared structure */ ProcGlobal = (PROC_HDR *) @@ -167,68 +168,61 @@ InitProcGlobal(void) Assert(!found); /* - * Create the PGPROC structures for auxiliary (bgwriter) processes, too. - * These do not get linked into the freeProcs list. - */ - AuxiliaryProcs = (PGPROC *) - ShmemInitStruct("AuxiliaryProcs", NUM_AUXILIARY_PROCS * sizeof(PGPROC), - &found); - Assert(!found); - - /* * Initialize the data structures. */ + ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY; ProcGlobal->freeProcs = NULL; ProcGlobal->autovacFreeProcs = NULL; - ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY; - - /* - * Pre-create the PGPROC structures and create a semaphore for each. - */ - procs = (PGPROC *) ShmemAlloc((MaxConnections) * sizeof(PGPROC)); - if (!procs) - ereport(FATAL, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of shared memory"))); - MemSet(procs, 0, MaxConnections * sizeof(PGPROC)); - for (i = 0; i < MaxConnections; i++) - { - PGSemaphoreCreate(&(procs[i].sem)); - procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs; - ProcGlobal->freeProcs = &procs[i]; - InitSharedLatch(&procs[i].waitLatch); - } - /* - * Likewise for the PGPROCs reserved for autovacuum. + * Create and initialize all the PGPROC structures we'll need. * - * Note: the "+1" here accounts for the autovac launcher + * There are three separate consumers of PGPROC structures: (1) normal + * backends, (2) autovacuum workers and the autovacuum launcher, and (3) + * auxiliary processes. Each PGPROC structure is dedicated to exactly + * one of these purposes, and they do not move between groups. */ - procs = (PGPROC *) ShmemAlloc((autovacuum_max_workers + 1) * sizeof(PGPROC)); + procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC)); + ProcGlobal->allProcs = procs; + ProcGlobal->allProcCount = TotalProcs; if (!procs) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); - MemSet(procs, 0, (autovacuum_max_workers + 1) * sizeof(PGPROC)); - for (i = 0; i < autovacuum_max_workers + 1; i++) + MemSet(procs, 0, TotalProcs * sizeof(PGPROC)); + for (i = 0; i < TotalProcs; i++) { + /* Common initialization for all PGPROCs, regardless of type. */ PGSemaphoreCreate(&(procs[i].sem)); - procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs; - ProcGlobal->autovacFreeProcs = &procs[i]; + procs[i].fpLWLock = LWLockAssign(); InitSharedLatch(&procs[i].waitLatch); + + /* + * Newly created PGPROCs for normal backends or for autovacuum must + * be queued up on the appropriate free list. Because there can only + * ever be a small, fixed number of auxiliary processes, no free + * list is used in that case; InitAuxiliaryProcess() instead uses a + * linear search. + */ + if (i < MaxConnections) + { + /* PGPROC for normal backend, add to freeProcs list */ + procs[i].links.next = (SHM_QUEUE *) ProcGlobal->freeProcs; + ProcGlobal->freeProcs = &procs[i]; + } + else if (i < MaxBackends) + { + /* PGPROC for AV launcher/worker, add to autovacFreeProcs list */ + procs[i].links.next = (SHM_QUEUE *) ProcGlobal->autovacFreeProcs; + ProcGlobal->autovacFreeProcs = &procs[i]; + } } /* - * And auxiliary procs. + * Save a pointer to the block of PGPROC structures reserved for + * auxiliary proceses. */ - MemSet(AuxiliaryProcs, 0, NUM_AUXILIARY_PROCS * sizeof(PGPROC)); - for (i = 0; i < NUM_AUXILIARY_PROCS; i++) - { - AuxiliaryProcs[i].pid = 0; /* marks auxiliary proc as not in use */ - PGSemaphoreCreate(&(AuxiliaryProcs[i].sem)); - InitSharedLatch(&procs[i].waitLatch); - } + AuxiliaryProcs = &procs[MaxBackends]; /* Create ProcStructLock spinlock, too */ ProcStructLock = (slock_t *) ShmemAlloc(sizeof(slock_t)); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 7ec961f..6df878d 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -412,6 +412,7 @@ typedef struct LOCALLOCK int64 nLocks; /* total number of times lock is held */ int numLockOwners; /* # of relevant ResourceOwners */ int maxLockOwners; /* allocated size of array */ + int holdsStrongLockCount; /* did we bump FastPathStrongLocks? */ LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */ } LOCALLOCK; diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 4819cb8..938f47d 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -51,6 +51,15 @@ struct XidCache #define PROC_VACUUM_STATE_MASK (0x0E) /* + * We allow a small number of "weak" relation locks (AccesShareLock, + * RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure + * rather than the main lock table. This eases contention on the lock + * manager LWLocks in workloads that have a small number of "hot" tables. + * See storage/lmgr/lock.c for full details. + */ +#define FP_LOCK_SLOTS_PER_BACKEND 16 + +/* * Each backend has a PGPROC struct in shared memory. There is also a list of * currently-unused PGPROC structs that will be reallocated to new backends. * @@ -136,6 +145,11 @@ struct PGPROC */ SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS]; + /* Info about fast-path locks taken by this backend */ + LWLockId fpLWLock; /* protects the fields below */ + uint64 fpLockBits; /* lock modes held for each fast-path slot */ + Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */ + struct XidCache subxids; /* cache for subtransaction XIDs */ }; @@ -150,6 +164,10 @@ extern PGDLLIMPORT PGPROC *MyProc; */ typedef struct PROC_HDR { + /* Array of PGPROC structures (not including dummies for prepared txns) */ + PGPROC *allProcs; + /* Length of allProcs array */ + uint32 allProcCount; /* Head of list of free PGPROC structures */ PGPROC *freeProcs; /* Head of list of autovacuum's free PGPROC structures */ @@ -163,6 +181,8 @@ typedef struct PROC_HDR int startupBufferPinWaitBufId; } PROC_HDR; +extern PROC_HDR *ProcGlobal; + /* * We set aside some extra PGPROC structures for auxiliary processes, * ie things that aren't full-fledged backends but need shmem access.