Skip to content

Commit f374fb4

Browse files
anarazelmichaelpq
authored andcommitted
lwlock: Fix quadratic behavior with very long wait lists
Until now LWLockDequeueSelf() sequentially searched the list of waiters to see if the current proc is still is on the list of waiters, or has already been removed. In extreme workloads, where the wait lists are very long, this leads to a quadratic behavior. #backends iterating over a list #backends long. Additionally, the likelihood of needing to call LWLockDequeueSelf() in the first place also increases with the increased length of the wait queue, as it becomes more likely that a lock is released while waiting for the wait list lock, which is held for longer during lock release. Due to the exponential back-off in perform_spin_delay() this is surprisingly hard to detect. We should make that easier, e.g. by adding a wait event around the pg_usleep() - but that's a separate patch. The fix is simple - track whether a proc is currently waiting in the wait list or already removed but waiting to be woken up in PGPROC->lwWaiting. In some workloads with a lot of clients contending for a small number of lwlocks (e.g. WALWriteLock), the fix can substantially increase throughput. This has been originally fixed for 16~ with a4adc31 without a backpatch, and we have heard complaints from users impacted by this quadratic behavior in older versions as well. Author: Andres Freund <andres@anarazel.de> Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Discussion: https://postgr.es/m/20221027165914.2hofzp4cvutj6gin@awork3.anarazel.de Discussion: https://postgr.es/m/CALj2ACXktNbG=K8Xi7PSqbofTZozavhaxjatVc14iYaLu4Maag@mail.gmail.com Backpatch-through: 12
1 parent 90ccc9b commit f374fb4

File tree

5 files changed

+42
-27
lines changed

5 files changed

+42
-27
lines changed

src/backend/access/transam/twophase.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
486486
proc->roleId = owner;
487487
proc->tempNamespaceId = InvalidOid;
488488
proc->isBackgroundWorker = false;
489-
proc->lwWaiting = false;
489+
proc->lwWaiting = LW_WS_NOT_WAITING;
490490
proc->lwWaitMode = 0;
491491
proc->waitLock = NULL;
492492
proc->waitProcLock = NULL;

src/backend/storage/lmgr/lwlock.c

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,15 @@ LWLockWakeup(LWLock *lock)
987987
wokeup_somebody = true;
988988
}
989989

990+
/*
991+
* Signal that the process isn't on the wait list anymore. This allows
992+
* LWLockDequeueSelf() to remove itself of the waitlist with a
993+
* proclist_delete(), rather than having to check if it has been
994+
* removed from the list.
995+
*/
996+
Assert(waiter->lwWaiting == LW_WS_WAITING);
997+
waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
998+
990999
/*
9911000
* Once we've woken up an exclusive lock, there's no point in waking
9921001
* up anybody else.
@@ -1044,7 +1053,7 @@ LWLockWakeup(LWLock *lock)
10441053
* another lock.
10451054
*/
10461055
pg_write_barrier();
1047-
waiter->lwWaiting = false;
1056+
waiter->lwWaiting = LW_WS_NOT_WAITING;
10481057
PGSemaphoreUnlock(waiter->sem);
10491058
}
10501059
}
@@ -1065,15 +1074,15 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
10651074
if (MyProc == NULL)
10661075
elog(PANIC, "cannot wait without a PGPROC structure");
10671076

1068-
if (MyProc->lwWaiting)
1077+
if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
10691078
elog(PANIC, "queueing for lock while waiting on another one");
10701079

10711080
LWLockWaitListLock(lock);
10721081

10731082
/* setting the flag is protected by the spinlock */
10741083
pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
10751084

1076-
MyProc->lwWaiting = true;
1085+
MyProc->lwWaiting = LW_WS_WAITING;
10771086
MyProc->lwWaitMode = mode;
10781087

10791088
/* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
@@ -1100,8 +1109,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
11001109
static void
11011110
LWLockDequeueSelf(LWLock *lock)
11021111
{
1103-
bool found = false;
1104-
proclist_mutable_iter iter;
1112+
bool on_waitlist;
11051113

11061114
#ifdef LWLOCK_STATS
11071115
lwlock_stats *lwstats;
@@ -1114,18 +1122,13 @@ LWLockDequeueSelf(LWLock *lock)
11141122
LWLockWaitListLock(lock);
11151123

11161124
/*
1117-
* Can't just remove ourselves from the list, but we need to iterate over
1118-
* all entries as somebody else could have dequeued us.
1125+
* Remove ourselves from the waitlist, unless we've already been
1126+
* removed. The removal happens with the wait list lock held, so there's
1127+
* no race in this check.
11191128
*/
1120-
proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1121-
{
1122-
if (iter.cur == MyProc->pgprocno)
1123-
{
1124-
found = true;
1125-
proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1126-
break;
1127-
}
1128-
}
1129+
on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1130+
if (on_waitlist)
1131+
proclist_delete(&lock->waiters, MyProc->pgprocno, lwWaitLink);
11291132

11301133
if (proclist_is_empty(&lock->waiters) &&
11311134
(pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
@@ -1137,8 +1140,8 @@ LWLockDequeueSelf(LWLock *lock)
11371140
LWLockWaitListUnlock(lock);
11381141

11391142
/* clear waiting state again, nice for debugging */
1140-
if (found)
1141-
MyProc->lwWaiting = false;
1143+
if (on_waitlist)
1144+
MyProc->lwWaiting = LW_WS_NOT_WAITING;
11421145
else
11431146
{
11441147
int extraWaits = 0;
@@ -1162,7 +1165,7 @@ LWLockDequeueSelf(LWLock *lock)
11621165
for (;;)
11631166
{
11641167
PGSemaphoreLock(MyProc->sem);
1165-
if (!MyProc->lwWaiting)
1168+
if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
11661169
break;
11671170
extraWaits++;
11681171
}
@@ -1313,7 +1316,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
13131316
for (;;)
13141317
{
13151318
PGSemaphoreLock(proc->sem);
1316-
if (!proc->lwWaiting)
1319+
if (proc->lwWaiting == LW_WS_NOT_WAITING)
13171320
break;
13181321
extraWaits++;
13191322
}
@@ -1478,7 +1481,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
14781481
for (;;)
14791482
{
14801483
PGSemaphoreLock(proc->sem);
1481-
if (!proc->lwWaiting)
1484+
if (proc->lwWaiting == LW_WS_NOT_WAITING)
14821485
break;
14831486
extraWaits++;
14841487
}
@@ -1694,7 +1697,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
16941697
for (;;)
16951698
{
16961699
PGSemaphoreLock(proc->sem);
1697-
if (!proc->lwWaiting)
1700+
if (proc->lwWaiting == LW_WS_NOT_WAITING)
16981701
break;
16991702
extraWaits++;
17001703
}
@@ -1772,6 +1775,10 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
17721775

17731776
proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
17741777
proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1778+
1779+
/* see LWLockWakeup() */
1780+
Assert(waiter->lwWaiting == LW_WS_WAITING);
1781+
waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
17751782
}
17761783

17771784
/* We are done updating shared state of the lock itself. */
@@ -1787,7 +1794,7 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
17871794
proclist_delete(&wakeup, iter.cur, lwWaitLink);
17881795
/* check comment in LWLockWakeup() about this barrier */
17891796
pg_write_barrier();
1790-
waiter->lwWaiting = false;
1797+
waiter->lwWaiting = LW_WS_NOT_WAITING;
17911798
PGSemaphoreUnlock(waiter->sem);
17921799
}
17931800
}

src/backend/storage/lmgr/proc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ InitProcess(void)
397397
/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
398398
if (IsAutoVacuumWorkerProcess())
399399
MyProc->statusFlags |= PROC_IS_AUTOVACUUM;
400-
MyProc->lwWaiting = false;
400+
MyProc->lwWaiting = LW_WS_NOT_WAITING;
401401
MyProc->lwWaitMode = 0;
402402
MyProc->waitLock = NULL;
403403
MyProc->waitProcLock = NULL;
@@ -579,7 +579,7 @@ InitAuxiliaryProcess(void)
579579
MyProc->isBackgroundWorker = IsBackgroundWorker;
580580
MyProc->delayChkptFlags = 0;
581581
MyProc->statusFlags = 0;
582-
MyProc->lwWaiting = false;
582+
MyProc->lwWaiting = LW_WS_NOT_WAITING;
583583
MyProc->lwWaitMode = 0;
584584
MyProc->waitLock = NULL;
585585
MyProc->waitProcLock = NULL;

src/include/storage/lwlock.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@
2323

2424
struct PGPROC;
2525

26+
/* what state of the wait process is a backend in */
27+
typedef enum LWLockWaitState
28+
{
29+
LW_WS_NOT_WAITING, /* not currently waiting / woken up */
30+
LW_WS_WAITING, /* currently waiting */
31+
LW_WS_PENDING_WAKEUP /* removed from waitlist, but not yet signalled */
32+
} LWLockWaitState;
33+
2634
/*
2735
* Code outside of lwlock.c should not manipulate the contents of this
2836
* structure directly, but we have to declare it here to allow LWLocks to be

src/include/storage/proc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ struct PGPROC
211211
bool recoveryConflictPending;
212212

213213
/* Info about LWLock the process is currently waiting for, if any. */
214-
bool lwWaiting; /* true if waiting for an LW lock */
214+
uint8 lwWaiting; /* see LWLockWaitState */
215215
uint8 lwWaitMode; /* lwlock mode being waited for */
216216
proclist_node lwWaitLink; /* position in LW lock wait list */
217217

0 commit comments

Comments
 (0)