Skip to content

Commit

Permalink
Merge 7aaa87a into f0a985e
Browse files Browse the repository at this point in the history
  • Loading branch information
jamadden committed May 21, 2021
2 parents f0a985e + 7aaa87a commit 1b4d569
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 164 deletions.
4 changes: 3 additions & 1 deletion CHANGES.rst
Expand Up @@ -5,7 +5,9 @@
3.5.0a2 (unreleased)
====================

- Nothing changed yet.
- Revert :issue:`469` and return to taking shared locks before
exclusive locks. Testing in a large, busy application indicated that
performance was overall slightly worse this way.


3.5.0a1 (2021-05-17)
Expand Down
1 change: 0 additions & 1 deletion src/relstorage/adapters/adapter.py
Expand Up @@ -217,7 +217,6 @@ def after_lock_share():
# We go ahead and compare the readCurrent TIDs here, so
# that we don't have to make the call to detect conflicts
# or even lock rows if there are readCurrent violations.
# Recall this function is called twice.
for oid_int, expect_tid_int in read_current_oids.items():
actual_tid_int = current.get(oid_int, 0)
if actual_tid_int != expect_tid_int:
Expand Down
25 changes: 3 additions & 22 deletions src/relstorage/adapters/interfaces.py
Expand Up @@ -1534,8 +1534,9 @@ def lock_objects_and_detect_conflicts(
deadlock detection, and starting with MySQL 8, it supports
``NOWAIT``.
In both implementations, though, deadlocks lead to annoying
database server error logs, so they should be avoided.
Transactions that deadlock would have been doomed anyway;
a deadlock is just another way of saying there will be a
readCurrent conflict.
.. rubric:: Lock Order
Expand Down Expand Up @@ -1631,26 +1632,6 @@ def lock_objects_and_detect_conflicts(
immediately able to determine that 2 has been modified and
quickly raise an exception without holding any other locks.
However, this does introduce a problem of its own: The deadlocks
we talked about above are common in this scenario. While that's
fine in-and-of-itself, it does mean that we're possibly missing out
on the chance to catch and resolve conflicts (because the deadlock
errors immediately abort the transaction). Since most databases
support NOWAI or a good approximation of it, we can avoid the deadlocks
by taking the exclusive locks first, and then taking share locks NOWAIT
(taking the share lock NOWAIT will immediately abort if some other transaction
is already holding an exclusive lock; the reverse is not true).
But what about fast notification of transactions that
are going to have to abort anyway? For databases that implement all of this
in a single step in the database, we can first do a no-lock check
of ``readCurrent`` items, and if anything has changed, we can
bail immediately. It's only in the case that some other transaction is
actually modifying an object that we want to ``readCurrent`` on, *and*
some other transaction is modifying an object that we also want to modify
(so we have to wait on an exclusive lock) that we wind up waiting
longer than we did before to abort a doomed transaction.
:param cursor: The store cursor.
:param read_current_oids: A mapping from oid integer to tid
integer that the transaction expects.
Expand Down
15 changes: 5 additions & 10 deletions src/relstorage/adapters/locker.py
Expand Up @@ -183,22 +183,17 @@ def lock_current_objects(self, cursor, read_current_oid_ints, shared_locks_block
# locked when they are returned by the cursor, so we must
# consume all the rows.

# Lock order is explained in IRelStorageAdapter.lock_objects_and_detect_conflicts.

after_lock_share() # Call once for a quick check
# Lock rows we need shared access to, typically without blocking.
if read_current_oid_ints:
self._lock_readCurrent_oids_for_share(cursor, read_current_oid_ints,
shared_locks_block)
after_lock_share()

# Lock the rows we need exclusive access to.
# This will block for up to ``commit_lock_timeout``,
# possibly * N
self._lock_rows_being_modified(cursor)

# Next lock rows we need shared access to, typically without blocking.
if read_current_oid_ints:
self._lock_readCurrent_oids_for_share(cursor, read_current_oid_ints,
shared_locks_block)
after_lock_share() # call again to verify


def _lock_readCurrent_oids_for_share(self, cursor, current_oids, shared_locks_block):
_, table = self._get_current_objects_query
oids_to_lock = sorted(set(current_oids))
Expand Down
Expand Up @@ -82,40 +82,6 @@ label_proc:BEGIN
SET i = i + 1;
END WHILE;


-- Now return any such rows that we locked
-- that are in conflict.
-- No point doing any more work taking exclusive locks, etc,
-- because we will immediately abort, but we must be careful not to return
-- an extra result set that's empty: that intereferes with our socket IO
-- waiting for query results.
SELECT zoid, {CURRENT_OBJECT}.tid
INTO con_oid, con_tid
FROM {CURRENT_OBJECT}
INNER JOIN temp_read_current USING (zoid)
WHERE temp_read_current.tid <> {CURRENT_OBJECT}.tid
LIMIT 1;

IF con_oid IS NOT NULL THEN
SELECT con_oid, con_tid, NULL as prev_tid, NULL as state;
ROLLBACK; -- release locks.
LEAVE label_proc; -- return
END IF;

END IF;


INSERT INTO temp_locked_zoid
SELECT o.zoid
FROM {CURRENT_OBJECT} o FORCE INDEX (PRIMARY)
INNER JOIN temp_store t FORCE INDEX (PRIMARY)
ON o.zoid = t.zoid
WHERE t.prev_tid <> 0
ORDER BY t.zoid
FOR UPDATE;

IF read_current_oids_tids IS NOT NULL THEN

-- The timeout only goes to 1; this procedure seems to always take roughtly 2s
-- to actually detect a lock timeout problem, however.
-- TODO: Can we apply the MAX_EXECUTION_TIME() optimizer hint?
Expand Down Expand Up @@ -159,6 +125,18 @@ label_proc:BEGIN

END IF;


INSERT INTO temp_locked_zoid
SELECT o.zoid
FROM {CURRENT_OBJECT} o FORCE INDEX (PRIMARY)
INNER JOIN temp_store t FORCE INDEX (PRIMARY)
ON o.zoid = t.zoid
WHERE t.prev_tid <> 0
ORDER BY t.zoid
FOR UPDATE;



SELECT cur.zoid, cur.tid, temp_store.prev_tid, {OBJECT_STATE_NAME}.state
FROM {CURRENT_OBJECT} cur
INNER JOIN temp_store USING (zoid)
Expand Down
2 changes: 1 addition & 1 deletion src/relstorage/adapters/postgresql/locker.py
Expand Up @@ -62,7 +62,7 @@ def _modify_commit_lock_kind(self, kind, exc):
exc_str = str(exc).lower()
if 'for update' in exc_str:
kind = UnableToLockRowsToModifyError
elif 'return query' in exc_str or 'readCurrent' in exc_str:
elif 'return query' in exc_str or 'readcurrent' in exc_str:
kind = UnableToLockRowsToReadCurrentError
return kind

Expand Down
Expand Up @@ -9,87 +9,56 @@ BEGIN

-- Order matters here.
--
-- We want to first take exclusive locks for the modified objects
-- (and wait if needed) and then take NOWAIT shared locks for the
-- readCurrent objects.
-- We've tried it both ways: Shared locks and then exclusive locks,
-- or exclusive locks and then shared locks.
--
-- If we do it the other way around (RelStorage < 3.5), we can
-- *easily* deadlock in lock order on the server, which would result
-- in us raising ``UnableToLockRows...`` exception, which
-- immediately aborts the transaction and doesn't allow for any
-- conflict resolution.
-- The shared-then-exclusive can fairly easily deadlock; this gets
-- detected by PostgreSQL which kills one of the transactions to
-- resolve the situation. How long it takes to find the deadlock is
-- configurable (up to 1s by default, but typically it happens much
-- faster because each involved PostgrSQL worker is checking in a
-- slightly different schedule). Busy servers are sometimes
-- configured to check less frequently, though. The killed transaction
-- results in an unresolvable conflict and has to be retried. (There was
-- going to be a conflict anyway, almost certainly a ReadConflictError,
-- which is also unresolvable).
--
-- However, taking exclusive first and only then shared NOWAIT
-- doesn't deadlock (I think), certainly not as easily. This means a
-- transaction might have to wait, or timeout, but if it gets
-- through, it has a chance to resolve conflicts and commit.
-- The other way, exclusive-then-shared, eliminates the deadlocks.
-- However, in tests of a busy system, taking the exclusive locks
-- first and then raising a ReadConflictError from the shared locks,
-- actually made performance overall *worse*. The theory is that we
-- spent more time waiting for locks we weren't going to be able to
-- use anyway.
--
-- One of the appealing properties of taking the shared first,
-- though, is that if we *are* doomed to failure (because of a
-- ReadCurrent error), we can know it immediately. We can still make
-- that check before we try to wait for exclusive locks, but since
-- we can't actually lock those rows yet, we need to perform the
-- check *again*, after taking the exclusive locks (and this time
-- lock them to be sure they don't change).

-- lock in share should NOWAIT
-- Thus, take shared locks first.

IF read_current_oids IS NOT NULL THEN
-- A pre-check: If we can detect a single readCurrent conflict,
-- do so, send it to the server where it will raise VoteConflictError,
-- and just bail. We could make this raise an exception and abort the
-- transaction, but we're not holding any locks here, and the error message
-- from VoteConflictError is more informative.
-- XXX: SELECT FOR SHARE does disk I/O! This can become expensive
-- and possibly lead to database issues.
-- See https://buttondown.email/nelhage/archive/22ab771c-25b4-4cd9-b316-31a86f737acc
-- We document this in docs/postgresql/index.rst
--
-- Because of that, and because there are ZODB tests that expect
-- to get a ReadConflictError with an OID and TID in it
-- (check_checkCurrentSerialInTransaction), we first run a check
-- to see if there is definitely an issue. If there is, we just
-- return. Only if we can't be sure do we go on and take out
-- shared locks. We need to do this because if the shared locks detect a
-- conflict, we want to immediately release those locks, and the only way to
-- do that in PostgreSQL is to raise an exception (unlike MySQL we can't
-- ROLLBACK the transaction).
RETURN QUERY -- This just adds to the result table; a final bare ``RETURN`` actually ends execution
SELECT {CURRENT_OBJECT}.zoid, {CURRENT_OBJECT}.tid, NULL::BIGINT, NULL::BYTEA
FROM {CURRENT_OBJECT}
INNER JOIN unnest(read_current_oids, read_current_tids) t(zoid, tid)
USING (zoid)
WHERE {CURRENT_OBJECT}.tid <> t.tid
LIMIT 1;
SELECT {CURRENT_OBJECT}.zoid, {CURRENT_OBJECT}.tid, NULL::BIGINT, NULL::BYTEA
FROM {CURRENT_OBJECT}
INNER JOIN unnest(read_current_oids, read_current_tids) t(zoid, tid)
USING (zoid)
WHERE {CURRENT_OBJECT}.tid <> t.tid
LIMIT 1;

IF FOUND THEN
RETURN;
END IF;
END IF;


-- Unlike MySQL, we can simply do the SELECT (with PERFORM) for its
-- side effects to lock the rows.
-- This one will block. (We set the PG configuration variable ``lock_timeout``
-- from the ``commit-lock-timeout`` configuration variable to determine how long.)

-- A note on the query: PostgreSQL will typcially choose a
-- sequential scan on the temp_store table and do a nested loop join
-- against the object_state_pkey index culminating in a sort after
-- the join. This is the same whether we write a WHERE IN (SELECT)
-- or a INNER JOIN. (This is without a WHERE prev_tid <> 0 clause.)

-- That changes substantially if we ANALYZE the temp table;
-- depending on the data, it might do an MERGE join with an index
-- scan on temp_store_pkey (lots of data) or it might do a
-- sequential scan and sort in memory before doing a MERGE join
-- (little data). (Again without the WHERE prev_tid clause.)

-- However, ANALYZE takes time, often more time than it takes to actually
-- do the nested loop join.

-- If we add a WHERE prev_tid clause, even if we ANALYZE, it will
-- choose a sequential scan on temp_store with a FILTER. Given that most
-- transactions contain relatively few objects, and that it would do a
-- sequential scan /anyway/, this is fine, and worth it to avoid probing
-- the main index for new objects.

-- TODO: Is it worth doing partitioning and putting prev_tid = 0 in their own
-- partition? prev_tid isn't the primary key, zoid is, though, and range
-- partitioning has to include the primary key when there is one.

PERFORM {CURRENT_OBJECT}.zoid
FROM {CURRENT_OBJECT}
INNER JOIN temp_store USING(zoid)
WHERE temp_store.prev_tid <> 0
ORDER BY {CURRENT_OBJECT}.zoid
FOR UPDATE OF {CURRENT_OBJECT};

IF read_current_oids IS NOT NULL THEN

-- Doing this in a single query takes some effort to make sure
-- that the required rows all get locked. The optimizer is smart
Expand All @@ -111,11 +80,7 @@ BEGIN
-- keyword alone would be enough to fool it (the plan doesn't
-- change on 11 when we use the keyword)).

-- XXX: SELECT FOR SHARE does disk I/O! This can become expensive
-- and possibly lead to database issues.
-- See https://buttondown.email/nelhage/archive/22ab771c-25b4-4cd9-b316-31a86f737acc
-- We document this in docs/postgresql/index.rst
RETURN QUERY -- This just adds to the result table; a final bare ``RETURN`` actually ends execution
RETURN QUERY
WITH locked AS (
SELECT {CURRENT_OBJECT}.zoid, {CURRENT_OBJECT}.tid, t.tid AS desired
FROM {CURRENT_OBJECT}
Expand All @@ -130,13 +95,52 @@ BEGIN
-- If that failed to get a lock because it is being modified by another transaction,
-- it raised an exception.
IF FOUND THEN
-- We're holding exclusive locks here, so abort the transaction
-- We're holding shared locks here, so abort the transaction
-- and release them; don't wait for Python to do it.
RAISE EXCEPTION USING ERRCODE = 'lock_not_available'
-- Unfortunately, this means we lose the ability to report exactly the
-- object that conflicted.
RAISE EXCEPTION USING ERRCODE = 'lock_not_available',
HINT = 'readCurrent';
END IF;
END IF;

-- Unlike MySQL, we can simply do the SELECT (with PERFORM) for its
-- side effects to lock the rows.
-- This one will block. (We set the PG configuration variable ``lock_timeout``
-- from the ``commit-lock-timeout`` configuration variable to determine how long.)

-- A note on the query: PostgreSQL will typcially choose a
-- sequential scan on the temp_store table and do a nested loop join
-- against the object_state_pkey index culminating in a sort after
-- the join. This is the same whether we write a WHERE IN (SELECT)
-- or a INNER JOIN. (This is without a WHERE prev_tid <> 0 clause.)

-- That changes substantially if we ANALYZE the temp table;
-- depending on the data, it might do an MERGE join with an index
-- scan on temp_store_pkey (lots of data) or it might do a
-- sequential scan and sort in memory before doing a MERGE join
-- (little data). (Again without the WHERE prev_tid clause.)

-- However, ANALYZE takes time, often more time than it takes to actually
-- do the nested loop join.

-- If we add a WHERE prev_tid clause, even if we ANALYZE, it will
-- choose a sequential scan on temp_store with a FILTER. Given that most
-- transactions contain relatively few objects, and that it would do a
-- sequential scan /anyway/, this is fine, and worth it to avoid probing
-- the main index for new objects.

-- TODO: Is it worth doing partitioning and putting prev_tid = 0 in their own
-- partition? prev_tid isn't the primary key, zoid is, though, and range
-- partitioning has to include the primary key when there is one.

PERFORM {CURRENT_OBJECT}.zoid
FROM {CURRENT_OBJECT}
INNER JOIN temp_store USING(zoid)
WHERE temp_store.prev_tid <> 0
ORDER BY {CURRENT_OBJECT}.zoid
FOR UPDATE OF {CURRENT_OBJECT};


RETURN QUERY
SELECT cur.zoid, cur.tid,
Expand Down

0 comments on commit 1b4d569

Please sign in to comment.