Bug#36186180 Updating histograms on offloaded tables can deadlock

Tobias Christiani · Tobias Christiani · commit 65a9cf1dd6a0 · 2024-01-18T16:20:13.000+01:00
When running ANALYZE TABLE UPDATE HISTOGRAM on a table that has been
loaded into the secondary engine the UPDATE HISTOGRAM command updates
both the primary and secondary table share with a snapshot of the
histograms. If, while histograms are being updated, another thread runs
a query that is offloaded to the secondary engine and this triggers the
creation of the secondary table share, then we can end up with a
deadlock where: 1) the histogram update thread is waiting on the
secondary share to be constructed before it can release histogram MDL,
and 2) the offloaded query is waiting for histogram MDL to be relased
before it can finish constructing the secondary share because part of
the work involved in constructing the table share involves retrieving
the histograms on the table from the dictionary.

This problem does not happen for tables that exist only in the primary
engine since we lock and open such tables before acquiring MDL on the
histograms. However, for tables that also exist in the secondary engine
we attempt to retrieve the secondary share at the point where we update
the primary share, so after having acquired MDL on the histograms, and
this creates the possibility of a deadlock.

This patch removes histograms from the secondary engine table share
altogether, thus avoiding the need the need to update both the primary
and secondary share when updating histograms. When optimizing an
offloaded query we are still able to use the histograms on the primary
table share. This saves memory by avoiding keeping additional copies of
the histograms on the secondary table share and avoids the type of lock
order violation causing the bug.

Change-Id: I36c22d217837a5c6e3e21151c9d1b353cdd86f77
diff --git a/mysql-test/suite/secondary_engine/r/histogram_deadlock.result b/mysql-test/suite/secondary_engine/r/histogram_deadlock.result
@@ -0,0 +1,18 @@
+#
+# Bug#36186180 Updating histograms on offloaded tables can deadlock
+#
+CREATE TABLE t1(x INT) SECONDARY_ENGINE MOCK;
+INSERT INTO t1 VALUES (1), (2), (3);
+ALTER TABLE t1 SECONDARY_LOAD;
+connect  con1,localhost,root;
+SET DEBUG_SYNC="histogram_update_mdl_acquired SIGNAL histogram_update_ongoing WAIT_FOR secondary_engine_share_open_in_progress";
+ANALYZE TABLE t1 UPDATE HISTOGRAM ON x;
+connection default;
+SET DEBUG_SYNC="now WAIT_FOR histogram_update_ongoing";
+SET DEBUG_SYNC="table_share_open_in_progress SIGNAL secondary_engine_share_open_in_progress";
+SELECT * FROM t1;
+x
+connection con1;
+Table	Op	Msg_type	Msg_text
+test.t1	histogram	status	Histogram statistics created for column 'x'.
+DROP TABLE t1;
diff --git a/mysql-test/suite/secondary_engine/t/histogram_deadlock.test b/mysql-test/suite/secondary_engine/t/histogram_deadlock.test
@@ -0,0 +1,34 @@
+--source include/have_debug_sync.inc
+--disable_query_log
+eval INSTALL PLUGIN mock SONAME '$MOCK_PLUGIN';
+--enable_query_log
+
+--echo #
+--echo # Bug#36186180 Updating histograms on offloaded tables can deadlock
+--echo #
+
+CREATE TABLE t1(x INT) SECONDARY_ENGINE MOCK;
+INSERT INTO t1 VALUES (1), (2), (3);
+ALTER TABLE t1 SECONDARY_LOAD;
+
+--enable_connect_log
+--connect (con1,localhost,root)
+SET DEBUG_SYNC="histogram_update_mdl_acquired SIGNAL histogram_update_ongoing WAIT_FOR secondary_engine_share_open_in_progress";
+--send ANALYZE TABLE t1 UPDATE HISTOGRAM ON x
+
+--connection default
+SET DEBUG_SYNC="now WAIT_FOR histogram_update_ongoing";
+SET DEBUG_SYNC="table_share_open_in_progress SIGNAL secondary_engine_share_open_in_progress";
+# Without the fix this query will deadlock because it gets stuck waiting for UPDATE HISTOGRAM
+# to release histogram MDL while opening the secondary share, and at the same time the histogram update
+# is stuck waiting for the secondary share to be opened.
+SELECT * FROM t1;
+
+--connection con1
+--reap;
+
+DROP TABLE t1;
+
+--disable_query_log
+UNINSTALL PLUGIN mock;
+--enable_query_log
diff --git a/sql/dd_table_share.cc b/sql/dd_table_share.cc
@@ -274,7 +274,7 @@ static bool prepare_share(THD *thd, TABLE_SHARE *share,
 
   // Setup other fields =====================================================
 
-  if (share->tmp_table == NO_TMP_TABLE) {
+  if (share->tmp_table == NO_TMP_TABLE && share->is_primary_engine()) {
     share->m_histograms = new (&share->mem_root) Table_histograms_collection();
     if (share->m_histograms == nullptr) return true;  // OOM.
   }
diff --git a/sql/handler.h b/sql/handler.h
@@ -5147,6 +5147,7 @@ class handler {
     table_share = share;
   }
   const TABLE_SHARE *get_table_share() const { return table_share; }
+  const TABLE *get_table() const { return table; }
 
   /* Estimates calculation */
 
diff --git a/sql/histograms/histogram.cc b/sql/histograms/histogram.cc
@@ -1486,53 +1486,15 @@ bool update_share_histograms(THD *thd, Table_ref *table) {
       MDL_key::TABLE, table->db, table->table_name, MDL_SHARED_READ));
   assert(table->table != nullptr);
 
-  // If the table has a shadow copy in a secondary engine we must retrieve the
-  // TABLE_SHARE for the secondary engine as well.
   TABLE_SHARE *share = table->table->s;
-  TABLE_SHARE *secondary_share = nullptr;
-  if (share->has_secondary_engine()) {
-    mysql_mutex_lock(&LOCK_open);
-    std::string secondary_key =
-        create_table_def_key_secondary(table->db, table->table_name);
-    secondary_share = get_table_share(
-        thd, table->db, table->table_name, secondary_key.c_str(),
-        secondary_key.length(), /*open_view=*/false, /*open_secondary=*/true);
-    mysql_mutex_unlock(&LOCK_open);
-  }
-  if (share->has_secondary_engine() && secondary_share == nullptr) return true;
-
-  auto share_guard = create_scope_guard([secondary_share]() {
-    if (secondary_share != nullptr) {
-      mysql_mutex_lock(&LOCK_open);
-      release_table_share(secondary_share);
-      mysql_mutex_unlock(&LOCK_open);
-    }
-  });
-
-  // Create Table_histograms objects for the primary and secondary share (if it
-  // exists) together with scope guards to clean up in case of failure.
   Table_histograms *table_histograms =
       Table_histograms::create(key_memory_table_share);
   if (table_histograms == nullptr) return true;
   auto table_histograms_guard =
       create_scope_guard([table_histograms]() { table_histograms->destroy(); });
 
-  Table_histograms *table_histograms_secondary = nullptr;
-  if (secondary_share != nullptr) {
-    table_histograms_secondary =
-        Table_histograms::create(key_memory_table_share);
-    if (table_histograms_secondary == nullptr) return true;
-  }
-
-  auto table_histograms_secondary_guard =
-      create_scope_guard([table_histograms_secondary]() {
-        if (table_histograms_secondary != nullptr) {
-          table_histograms_secondary->destroy();
-        }
-      });
-
   // Retrieve histograms from the data dictionary and add them to the
-  // TABLE_SHARE.
+  // set of table_histograms that is to be inserted into the TABLE_SHARE.
   for (size_t i = 0; i < share->fields; ++i) {
     const Field *field = share->field[i];
     if (field->is_hidden_by_system()) continue;
@@ -1543,44 +1505,20 @@ bool update_share_histograms(THD *thd, Table_ref *table) {
       return true;
     }
 
-    if (histogram != nullptr) {
-      if (table_histograms->insert_histogram(field->field_index(), histogram)) {
-        return true;
-      }
-      if (table_histograms_secondary &&
-          table_histograms_secondary->insert_histogram(field->field_index(),
-                                                       histogram)) {
-        return true;
-      }
+    if (histogram != nullptr &&
+        table_histograms->insert_histogram(field->field_index(), histogram)) {
+      return true;
     }
   }
 
-  // Disable the scope guard that would release the secondary share and attempt
-  // to insert the new histogram snapshots and release the secondary share if it
-  // was acquired. Since acquiring/releasing shares and modifying the collection
-  // of histograms on the share is protected by LOCK_open we attempt to reduce
-  // the number of lock/unlock pairs by grouping these operations together.
-  share_guard.commit();
-
-  bool error = false;
   mysql_mutex_lock(&LOCK_open);
-  if (share->m_histograms->insert(table_histograms)) {
-    error = true;
-  } else {
+  const bool error = share->m_histograms->insert(table_histograms);
+  mysql_mutex_unlock(&LOCK_open);
+  if (!error) {
     // If the insertion succeeded ownership responsibility was passed on, so we
     // can disable the scope guard that would free the Table_histograms object.
     table_histograms_guard.commit();
   }
-
-  if (secondary_share != nullptr) {
-    if (secondary_share->m_histograms->insert(table_histograms_secondary)) {
-      error = true;
-    } else {
-      table_histograms_secondary_guard.commit();
-    }
-    release_table_share(secondary_share);
-  }
-  mysql_mutex_unlock(&LOCK_open);
   return error;
 }
 
diff --git a/sql/histograms/histogram.h b/sql/histograms/histogram.h
@@ -805,20 +805,16 @@ bool auto_update_table_histograms(THD *thd, Table_ref *table);
 /**
   Retrieve an updated snapshot of the histograms on a table directly from the
   dictionary (in an inefficient manner, querying all columns) and inserts this
-  snapshot in the Table_histograms_collection on the TABLE_SHARE. If the table
-  has a secondary engine we also insert a new snapshot on the secondary share.
+  snapshot in the Table_histograms_collection on the TABLE_SHARE.
 
   @param thd The current thread.
   @param table The table to retrieve updated histograms for.
 
   @note This function assumes that the table is opened and generally depends on
   the surrounding context. It also locks/unlocks LOCK_OPEN.
 
-  @return False on success. Returns true if an error occurred in which case it
-  can have happened that none of the shares were updated, or that only one of
-  the shares (primary and secondary) were updated, even though we intended to
-  update both. In other words if this function returns true we do not know to
-  what extent the share(s) reflect the dictionary state.
+  @return False on success. Returns true if an error occurred in which case the
+  TABLE_SHARE will not have been updated.
 */
 bool update_share_histograms(THD *thd, Table_ref *table);
 
diff --git a/sql/histograms/table_histograms.h b/sql/histograms/table_histograms.h
@@ -100,7 +100,7 @@ object, because the appropriate protection should already be in place. For
 example, for the insert() in sql_base.cc:get_table_share() we do not use mutex
 protection since we are in the process of constructing the TABLE_SHARE.
 
--- insert() in sql_admin.cc:update_share_histograms(): protected by LOCK_open.
+-- insert() in histogram.cc:update_share_histograms(): protected by LOCK_open.
 
 -- acquire() in table.cc:open_table_from_share(): protected by LOCK_open.
 
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
@@ -1785,6 +1785,7 @@ bool Sql_cmd_analyze_table::handle_histogram_command_inner(
   });
 
   if (open_table_and_lock_histograms(thd, table, results)) return true;
+  DEBUG_SYNC(thd, "histogram_update_mdl_acquired");
 
   // UPDATE/DROP histograms. Commit on success. Rollback on error.
   switch (get_histogram_command()) {
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
@@ -830,6 +830,7 @@ TABLE_SHARE *get_table_share(THD *thd, const char *db, const char *table_name,
   */
   share->increment_ref_count();      // Mark in use
   share->m_open_in_progress = true;  // Mark being opened
+  DEBUG_SYNC(thd, "table_share_open_in_progress");
 
   /*
     Temporarily release LOCK_open before opening the table definition,
@@ -891,8 +892,8 @@ TABLE_SHARE *get_table_share(THD *thd, const char *db, const char *table_name,
       /*
         Read any existing histogram statistics from the data dictionary and
         store a copy of them in the TABLE_SHARE. We only perform this step for
-        non-temporary tables, since temporary tables have share->m_histograms
-        set to nullptr.
+        non-temporary and primary engine tables. When these conditions are not
+        met m_histograms is nullptr.
 
         We need to do this outside the protection of LOCK_open, since the data
         dictionary might have to open tables in order to read histogram data
diff --git a/sql/table.cc b/sql/table.cc
@@ -7850,8 +7850,11 @@ void TABLE::disable_logical_diffs_for_current_row(const Field *field) const {
 }
 
 const histograms::Histogram *TABLE::find_histogram(uint field_index) const {
-  if (histograms == nullptr) return nullptr;
-  return histograms->find_histogram(field_index);
+  const handler *primary = get_primary_handler();
+  if (primary == nullptr) return nullptr;
+  const TABLE *table = primary->get_table();
+  if (table == nullptr || table->histograms == nullptr) return nullptr;
+  return table->histograms->find_histogram(field_index);
 }
 
 //////////////////////////////////////////////////////////////////////////
diff --git a/sql/table.h b/sql/table.h
@@ -2469,6 +2469,12 @@ struct TABLE {
   /**
     Find the histogram for the given field index.
 
+    @note If this is called on a TABLE object that belongs to a secondary
+    engine, it will take a round-trip through the handler in order to obtain the
+    histogram from the TABLE object associated with the primary engine. This is
+    done to avoid storing histograms on both the primary and secondary
+    TABLE_SHARE.
+
     @param field_index The index of the field we want to find a histogram for.
 
     @retval nullptr if no histogram is found.

Original file line number	Diff line number	Diff line change
`@@ -274,7 +274,7 @@ static bool prepare_share(THD thd, TABLE_SHARE share,`
`274`	`274`
`275`	`275`	`// Setup other fields =====================================================`
`276`	`276`
`277`		`- if (share->tmp_table == NO_TMP_TABLE) {`
	`277`	`+ if (share->tmp_table == NO_TMP_TABLE && share->is_primary_engine()) {`
`278`	`278`	`share->m_histograms = new (&share->mem_root) Table_histograms_collection();`
`279`	`279`	`if (share->m_histograms == nullptr) return true; // OOM.`
`280`	`280`	`}`
Original file line number	Diff line number	Diff line change
`@@ -5147,6 +5147,7 @@ class handler {`
`5147`	`5147`	`table_share = share;`
`5148`	`5148`	`}`
`5149`	`5149`	`const TABLE_SHARE *get_table_share() const { return table_share; }`
	`5150`	`+ const TABLE *get_table() const { return table; }`
`5150`	`5151`
`5151`	`5152`	`/* Estimates calculation */`
`5152`	`5153`
Original file line number	Diff line number	Diff line change
`@@ -7850,8 +7850,11 @@ void TABLE::disable_logical_diffs_for_current_row(const Field *field) const {`
`7850`	`7850`	`}`
`7851`	`7851`
`7852`	`7852`	`const histograms::Histogram *TABLE::find_histogram(uint field_index) const {`
`7853`		`- if (histograms == nullptr) return nullptr;`
`7854`		`- return histograms->find_histogram(field_index);`
	`7853`	`+ const handler *primary = get_primary_handler();`
	`7854`	`+ if (primary == nullptr) return nullptr;`
	`7855`	`+ const TABLE *table = primary->get_table();`
	`7856`	`+ if (table == nullptr \|\| table->histograms == nullptr) return nullptr;`
	`7857`	`+ return table->histograms->find_histogram(field_index);`
`7855`	`7858`	`}`
`7856`	`7859`
`7857`	`7860`	`//////////////////////////////////////////////////////////////////////////`