Merge 7aaa87a into f0a985e

zodb · May 21, 2021 · 1b4d569 · 1b4d569
2 parents f0a985e + 7aaa87a
commit 1b4d569
Show file tree

Hide file tree

Showing 8 changed files with 125 additions and 164 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -5,7 +5,9 @@
 3.5.0a2 (unreleased)
 ====================
 
-- Nothing changed yet.
+- Revert :issue:`469` and return to taking shared locks before
+  exclusive locks. Testing in a large, busy application indicated that
+  performance was overall slightly worse this way.
 
 
 3.5.0a1 (2021-05-17)

diff --git a/src/relstorage/adapters/adapter.py b/src/relstorage/adapters/adapter.py
@@ -217,7 +217,6 @@ def after_lock_share():
             # We go ahead and compare the readCurrent TIDs here, so
             # that we don't have to make the call to detect conflicts
             # or even lock rows if there are readCurrent violations.
-            # Recall this function is called twice.
             for oid_int, expect_tid_int in read_current_oids.items():
                 actual_tid_int = current.get(oid_int, 0)
                 if actual_tid_int != expect_tid_int:

diff --git a/src/relstorage/adapters/interfaces.py b/src/relstorage/adapters/interfaces.py
@@ -1534,8 +1534,9 @@ def lock_objects_and_detect_conflicts(
         deadlock detection, and starting with MySQL 8, it supports
         ``NOWAIT``.
 
-        In both implementations, though, deadlocks lead to annoying
-        database server error logs, so they should be avoided.
+        Transactions that deadlock would have been doomed anyway;
+        a deadlock is just another way of saying there will be a
+        readCurrent conflict.
 
         .. rubric:: Lock Order
 
@@ -1631,26 +1632,6 @@ def lock_objects_and_detect_conflicts(
         immediately able to determine that 2 has been modified and
         quickly raise an exception without holding any other locks.
 
-        However, this does introduce a problem of its own: The deadlocks
-        we talked about above are common in this scenario. While that's
-        fine in-and-of-itself, it does mean that we're possibly missing out
-        on the chance to catch and resolve conflicts (because the deadlock
-        errors immediately abort the transaction). Since most databases
-        support NOWAI or a good approximation of it, we can avoid the deadlocks
-        by taking the exclusive locks first, and then taking share locks NOWAIT
-        (taking the share lock NOWAIT will immediately abort if some other transaction
-        is already holding an exclusive lock; the reverse is not true).
-
-        But what about fast notification of transactions that
-        are going to have to abort anyway? For databases that implement all of this
-        in a single step in the database, we can first do a no-lock check
-        of ``readCurrent`` items, and if anything has changed, we can
-        bail immediately. It's only in the case that some other transaction is
-        actually modifying an object that we want to ``readCurrent`` on, *and*
-        some other transaction is modifying an object that we also want to modify
-        (so we have to wait on an exclusive lock) that we wind up waiting
-        longer than we did before to abort a doomed transaction.
-
         :param cursor: The store cursor.
         :param read_current_oids: A mapping from oid integer to tid
             integer that the transaction expects.

diff --git a/src/relstorage/adapters/locker.py b/src/relstorage/adapters/locker.py
@@ -183,22 +183,17 @@ def lock_current_objects(self, cursor, read_current_oid_ints, shared_locks_block
         # locked when they are returned by the cursor, so we must
         # consume all the rows.
 
-        # Lock order is explained in IRelStorageAdapter.lock_objects_and_detect_conflicts.
-
-        after_lock_share() # Call once for a quick check
+        # Lock rows we need shared access to, typically without blocking.
+        if read_current_oid_ints:
+            self._lock_readCurrent_oids_for_share(cursor, read_current_oid_ints,
+                                                  shared_locks_block)
+            after_lock_share()
 
         # Lock the rows we need exclusive access to.
         # This will block for up to ``commit_lock_timeout``,
         # possibly * N
         self._lock_rows_being_modified(cursor)
 
-        # Next lock rows we need shared access to, typically without blocking.
-        if read_current_oid_ints:
-            self._lock_readCurrent_oids_for_share(cursor, read_current_oid_ints,
-                                                  shared_locks_block)
-            after_lock_share() # call again to verify
-
-
     def _lock_readCurrent_oids_for_share(self, cursor, current_oids, shared_locks_block):
         _, table = self._get_current_objects_query
         oids_to_lock = sorted(set(current_oids))

diff --git a/src/relstorage/adapters/mysql/procs/lock_objects_and_detect_conflicts.sql b/src/relstorage/adapters/mysql/procs/lock_objects_and_detect_conflicts.sql
@@ -82,40 +82,6 @@ label_proc:BEGIN
       SET i = i + 1;
     END WHILE;
 
-
-    -- Now return any such rows that we locked
-    -- that are in conflict.
-    -- No point doing any more work taking exclusive locks, etc,
-    -- because we will immediately abort, but we must be careful not to return
-    -- an extra result set that's empty: that intereferes with our socket IO
-    -- waiting for query results.
-    SELECT zoid, {CURRENT_OBJECT}.tid
-    INTO con_oid, con_tid
-    FROM {CURRENT_OBJECT}
-    INNER JOIN temp_read_current USING (zoid)
-    WHERE temp_read_current.tid <> {CURRENT_OBJECT}.tid
-    LIMIT 1;
-
-    IF con_oid IS NOT NULL THEN
-      SELECT con_oid, con_tid, NULL as prev_tid, NULL as state;
-      ROLLBACK; -- release locks.
-      LEAVE label_proc; -- return
-    END IF;
-
-  END IF;
-
-
-  INSERT INTO temp_locked_zoid
-  SELECT o.zoid
-  FROM {CURRENT_OBJECT} o FORCE INDEX (PRIMARY)
-  INNER JOIN temp_store t FORCE INDEX (PRIMARY)
-      ON o.zoid = t.zoid
-  WHERE t.prev_tid <> 0
-  ORDER BY t.zoid
-  FOR UPDATE;
-
-  IF read_current_oids_tids IS NOT NULL THEN
-
     -- The timeout only goes to 1; this procedure seems to always take roughtly 2s
     -- to actually detect a lock timeout problem, however.
     -- TODO: Can we apply the MAX_EXECUTION_TIME() optimizer hint?
@@ -159,6 +125,18 @@ label_proc:BEGIN
 
   END IF;
 
+
+  INSERT INTO temp_locked_zoid
+  SELECT o.zoid
+  FROM {CURRENT_OBJECT} o FORCE INDEX (PRIMARY)
+  INNER JOIN temp_store t FORCE INDEX (PRIMARY)
+      ON o.zoid = t.zoid
+  WHERE t.prev_tid <> 0
+  ORDER BY t.zoid
+  FOR UPDATE;
+
+
+
   SELECT cur.zoid, cur.tid, temp_store.prev_tid, {OBJECT_STATE_NAME}.state
   FROM {CURRENT_OBJECT} cur
   INNER JOIN temp_store USING (zoid)

diff --git a/src/relstorage/adapters/postgresql/locker.py b/src/relstorage/adapters/postgresql/locker.py
@@ -62,7 +62,7 @@ def _modify_commit_lock_kind(self, kind, exc):
         exc_str = str(exc).lower()
         if 'for update' in exc_str:
             kind = UnableToLockRowsToModifyError
-        elif 'return query' in exc_str or 'readCurrent' in exc_str:
+        elif 'return query' in exc_str or 'readcurrent' in exc_str:
             kind = UnableToLockRowsToReadCurrentError
         return kind
 

diff --git a/src/relstorage/adapters/postgresql/procs/lock_objects_and_detect_conflicts.sql b/src/relstorage/adapters/postgresql/procs/lock_objects_and_detect_conflicts.sql
@@ -9,87 +9,56 @@ BEGIN
 
   -- Order matters here.
   --
-  -- We want to first take exclusive locks for the modified objects
-  -- (and wait if needed) and then take NOWAIT shared locks for the
-  -- readCurrent objects.
+  -- We've tried it both ways: Shared locks and then exclusive locks,
+  -- or exclusive locks and then shared locks.
   --
-  -- If we do it the other way around (RelStorage < 3.5), we can
-  -- *easily* deadlock in lock order on the server, which would result
-  -- in us raising ``UnableToLockRows...`` exception, which
-  -- immediately aborts the transaction and doesn't allow for any
-  -- conflict resolution.
+  -- The shared-then-exclusive can fairly easily deadlock; this gets
+  -- detected by PostgreSQL which kills one of the transactions to
+  -- resolve the situation. How long it takes to find the deadlock is
+  -- configurable (up to 1s by default, but typically it happens much
+  -- faster because each involved PostgrSQL worker is checking in a
+  -- slightly different schedule). Busy servers are sometimes
+  -- configured to check less frequently, though. The killed transaction
+  -- results in an unresolvable conflict and has to be retried. (There was
+  -- going to be a conflict anyway, almost certainly a ReadConflictError,
+  -- which is also unresolvable).
   --
-  -- However, taking exclusive first and only then shared NOWAIT
-  -- doesn't deadlock (I think), certainly not as easily. This means a
-  -- transaction might have to wait, or timeout, but if it gets
-  -- through, it has a chance to resolve conflicts and commit.
+  -- The other way, exclusive-then-shared, eliminates the deadlocks.
+  -- However, in tests of a busy system, taking the exclusive locks
+  -- first and then raising a ReadConflictError from the shared locks,
+  -- actually made performance overall *worse*. The theory is that we
+  -- spent more time waiting for locks we weren't going to be able to
+  -- use anyway.
   --
-  -- One of the appealing properties of taking the shared first,
-  -- though, is that if we *are* doomed to failure (because of a
-  -- ReadCurrent error), we can know it immediately. We can still make
-  -- that check before we try to wait for exclusive locks, but since
-  -- we can't actually lock those rows yet, we need to perform the
-  -- check *again*, after taking the exclusive locks (and this time
-  -- lock them to be sure they don't change).
-
-  -- lock in share should NOWAIT
+  -- Thus, take shared locks first.
+
   IF read_current_oids IS NOT NULL THEN
-    -- A pre-check: If we can detect a single readCurrent conflict,
-    -- do so, send it to the server where it will raise VoteConflictError,
-    -- and just bail. We could make this raise an exception and abort the
-    -- transaction, but we're not holding any locks here, and the error message
-    -- from VoteConflictError is more informative.
+    -- XXX: SELECT FOR SHARE does disk I/O! This can become expensive
+    -- and possibly lead to database issues.
+    -- See https://buttondown.email/nelhage/archive/22ab771c-25b4-4cd9-b316-31a86f737acc
+    -- We document this in docs/postgresql/index.rst
+    --
+    -- Because of that, and because there are ZODB tests that expect
+    -- to get a ReadConflictError with an OID and TID in it
+    -- (check_checkCurrentSerialInTransaction), we first run a check
+    -- to see if there is definitely an issue. If there is, we just
+    -- return. Only if we can't be sure do we go on and take out
+    -- shared locks. We need to do this because if the shared locks detect a
+    -- conflict, we want to immediately release those locks, and the only way to
+    -- do that in PostgreSQL is to raise an exception (unlike MySQL we can't
+    -- ROLLBACK the transaction).
     RETURN QUERY -- This just adds to the result table; a final bare ``RETURN`` actually ends execution
-      SELECT {CURRENT_OBJECT}.zoid, {CURRENT_OBJECT}.tid, NULL::BIGINT, NULL::BYTEA
-      FROM {CURRENT_OBJECT}
-      INNER JOIN unnest(read_current_oids, read_current_tids) t(zoid, tid)
-          USING (zoid)
-      WHERE {CURRENT_OBJECT}.tid <> t.tid
-      LIMIT 1;
+    SELECT {CURRENT_OBJECT}.zoid, {CURRENT_OBJECT}.tid, NULL::BIGINT, NULL::BYTEA
+    FROM {CURRENT_OBJECT}
+    INNER JOIN unnest(read_current_oids, read_current_tids) t(zoid, tid)
+      USING (zoid)
+    WHERE {CURRENT_OBJECT}.tid <> t.tid
+    LIMIT 1;
+
     IF FOUND THEN
       RETURN;
     END IF;
-  END IF;
-
-
-  -- Unlike MySQL, we can simply do the SELECT (with PERFORM) for its
-  -- side effects to lock the rows.
-  -- This one will block. (We set the PG configuration variable ``lock_timeout``
-  -- from the ``commit-lock-timeout`` configuration variable to determine how long.)
-
-  -- A note on the query: PostgreSQL will typcially choose a
-  -- sequential scan on the temp_store table and do a nested loop join
-  -- against the object_state_pkey index culminating in a sort after
-  -- the join. This is the same whether we write a WHERE IN (SELECT)
-  -- or a INNER JOIN. (This is without a WHERE prev_tid <> 0 clause.)
-
-  -- That changes substantially if we ANALYZE the temp table;
-  -- depending on the data, it might do an MERGE join with an index
-  -- scan on temp_store_pkey (lots of data) or it might do a
-  -- sequential scan and sort in memory before doing a MERGE join
-  -- (little data). (Again without the WHERE prev_tid clause.)
 
-  -- However, ANALYZE takes time, often more time than it takes to actually
-  -- do the nested loop join.
-
-  -- If we add a WHERE prev_tid clause, even if we ANALYZE, it will
-  -- choose a sequential scan on temp_store with a FILTER. Given that most
-  -- transactions contain relatively few objects, and that it would do a
-  -- sequential scan /anyway/, this is fine, and worth it to avoid probing
-  -- the main index for new objects.
-
-  -- TODO: Is it worth doing partitioning and putting prev_tid = 0 in their own
-  -- partition? prev_tid isn't the primary key, zoid is, though, and range
-  -- partitioning has to include the primary key when there is one.
-
-  PERFORM {CURRENT_OBJECT}.zoid
-  FROM {CURRENT_OBJECT}
-  INNER JOIN temp_store USING(zoid)
-  WHERE temp_store.prev_tid <> 0
-  ORDER BY {CURRENT_OBJECT}.zoid
-  FOR UPDATE OF {CURRENT_OBJECT};
-
-  IF read_current_oids IS NOT NULL THEN
 
     -- Doing this in a single query takes some effort to make sure
     -- that the required rows all get locked. The optimizer is smart
@@ -111,11 +80,7 @@ BEGIN
     -- keyword alone would be enough to fool it (the plan doesn't
     -- change on 11 when we use the keyword)).
 
-    -- XXX: SELECT FOR SHARE does disk I/O! This can become expensive
-    -- and possibly lead to database issues.
-    -- See https://buttondown.email/nelhage/archive/22ab771c-25b4-4cd9-b316-31a86f737acc
-    -- We document this in docs/postgresql/index.rst
-    RETURN QUERY -- This just adds to the result table; a final bare ``RETURN`` actually ends execution
+    RETURN QUERY
       WITH locked AS (
         SELECT {CURRENT_OBJECT}.zoid, {CURRENT_OBJECT}.tid, t.tid AS desired
         FROM {CURRENT_OBJECT}
@@ -130,13 +95,52 @@ BEGIN
     -- If that failed to get a lock because it is being modified by another transaction,
     -- it raised an exception.
     IF FOUND THEN
-      -- We're holding exclusive locks here, so abort the transaction
+      -- We're holding shared locks here, so abort the transaction
       -- and release them; don't wait for Python to do it.
-      RAISE EXCEPTION USING ERRCODE = 'lock_not_available'
+      -- Unfortunately, this means we lose the ability to report exactly the
+      -- object that conflicted.
+      RAISE EXCEPTION USING ERRCODE = 'lock_not_available',
             HINT = 'readCurrent';
     END IF;
   END IF;
 
+  -- Unlike MySQL, we can simply do the SELECT (with PERFORM) for its
+  -- side effects to lock the rows.
+  -- This one will block. (We set the PG configuration variable ``lock_timeout``
+  -- from the ``commit-lock-timeout`` configuration variable to determine how long.)
+
+  -- A note on the query: PostgreSQL will typcially choose a
+  -- sequential scan on the temp_store table and do a nested loop join
+  -- against the object_state_pkey index culminating in a sort after
+  -- the join. This is the same whether we write a WHERE IN (SELECT)
+  -- or a INNER JOIN. (This is without a WHERE prev_tid <> 0 clause.)
+
+  -- That changes substantially if we ANALYZE the temp table;
+  -- depending on the data, it might do an MERGE join with an index
+  -- scan on temp_store_pkey (lots of data) or it might do a
+  -- sequential scan and sort in memory before doing a MERGE join
+  -- (little data). (Again without the WHERE prev_tid clause.)
+
+  -- However, ANALYZE takes time, often more time than it takes to actually
+  -- do the nested loop join.
+
+  -- If we add a WHERE prev_tid clause, even if we ANALYZE, it will
+  -- choose a sequential scan on temp_store with a FILTER. Given that most
+  -- transactions contain relatively few objects, and that it would do a
+  -- sequential scan /anyway/, this is fine, and worth it to avoid probing
+  -- the main index for new objects.
+
+  -- TODO: Is it worth doing partitioning and putting prev_tid = 0 in their own
+  -- partition? prev_tid isn't the primary key, zoid is, though, and range
+  -- partitioning has to include the primary key when there is one.
+
+  PERFORM {CURRENT_OBJECT}.zoid
+  FROM {CURRENT_OBJECT}
+  INNER JOIN temp_store USING(zoid)
+  WHERE temp_store.prev_tid <> 0
+  ORDER BY {CURRENT_OBJECT}.zoid
+  FOR UPDATE OF {CURRENT_OBJECT};
+
 
   RETURN QUERY
   SELECT cur.zoid, cur.tid,