Permalink
Browse files

Buffer pool list scan optimization

Summary:
Feature: Optimize buffer pool list scans and fix reporting

This patch includes:
  -- backport of upstream work around buffer pool list scan.
     revno: 6228
     revision-id: sunny.bains@oracle.com-20130808223745-7lzp6h40vx1ndc7k
     parent: marc.alff@oracle.com-20130808135008-p5irewph4fm1yu19
     committer: Sunny Bains <Sunny.Bains@Oracle.Com>
     branch nick: trunk
     timestamp: Fri 2013-08-09 08:37:45 +1000
     message:
       WL#7047 - Optimize buffer pool list scans and related batch processing code

       Reduce excessive scanning of pages when doing flush list batches. The
       fix is to introduce the concept of "Hazard Pointer", this reduces the
       time complexity of the scan from O(n*n) to O(n).

       The concept of hazard pointer is reversed in this work.  Academically a
       hazard pointer is a pointer that the thread working on it will declare as
       such and as long as that thread is not done no other thread is allowed to
       do anything with it.

       In this WL we declare the pointer as a hazard pointer and then if any other
       thread attempts to work on it, it is allowed to do so but it has to adjust
       the hazard pointer to the next valid value. We use hazard pointer solely for
       reverse traversal of lists within a buffer pool instance.

       Add an event to control the background flush thread. The background flush
       thread wait has been converted to an os event timed wait so that it can be
       signalled by threads that want to kick start a background flush when the
       buffer pool is running low on free/dirty pages.

  -- fix for mysql bug#71411
     buf_flush_LRU() returns the number of pages processed. There are
     two types of processing that can happen. A page can get evicted or
     a page can get flushed. These two numbers are quite distinct and
     should not be mixed.

Test Plan: mtr

Reviewers: pivanof, liang.guo.752, CalvinSun

Reviewed By: pivanof

CC: jtolmer, MarkCallaghan, flamingcow, jeremycole, andrew-ford, pengt, steaphan

Differential Revision: https://reviews.facebook.net/D16629
  • Loading branch information...
1 parent 5e92130 commit d086837b5487647b130ce471c45f8c9093e87855 @inaam-rana inaam-rana committed Mar 6, 2014
@@ -43,7 +43,6 @@ buffer_data_written disabled
buffer_flush_batch_scanned disabled
buffer_flush_batch_num_scan disabled
buffer_flush_batch_scanned_per_call disabled
-buffer_flush_batch_rescan disabled
buffer_flush_batch_total_pages disabled
buffer_flush_batches disabled
buffer_flush_batch_pages disabled
@@ -68,9 +67,12 @@ buffer_flush_background_pages disabled
buffer_LRU_batch_scanned disabled
buffer_LRU_batch_num_scan disabled
buffer_LRU_batch_scanned_per_call disabled
-buffer_LRU_batch_total_pages disabled
-buffer_LRU_batches disabled
-buffer_LRU_batch_pages disabled
+buffer_LRU_batch_flush_total_pages disabled
+buffer_LRU_batches_flush disabled
+buffer_LRU_batch_flush_pages disabled
+buffer_LRU_batch_evict_total_pages disabled
+buffer_LRU_batches_evict disabled
+buffer_LRU_batch_evict_pages disabled
buffer_LRU_single_flush_scanned disabled
buffer_LRU_single_flush_num_scan disabled
buffer_LRU_single_flush_scanned_per_call disabled
@@ -43,7 +43,6 @@ buffer_data_written disabled
buffer_flush_batch_scanned disabled
buffer_flush_batch_num_scan disabled
buffer_flush_batch_scanned_per_call disabled
-buffer_flush_batch_rescan disabled
buffer_flush_batch_total_pages disabled
buffer_flush_batches disabled
buffer_flush_batch_pages disabled
@@ -68,9 +67,12 @@ buffer_flush_background_pages disabled
buffer_LRU_batch_scanned disabled
buffer_LRU_batch_num_scan disabled
buffer_LRU_batch_scanned_per_call disabled
-buffer_LRU_batch_total_pages disabled
-buffer_LRU_batches disabled
-buffer_LRU_batch_pages disabled
+buffer_LRU_batch_flush_total_pages disabled
+buffer_LRU_batches_flush disabled
+buffer_LRU_batch_flush_pages disabled
+buffer_LRU_batch_evict_total_pages disabled
+buffer_LRU_batches_evict disabled
+buffer_LRU_batch_evict_pages disabled
buffer_LRU_single_flush_scanned disabled
buffer_LRU_single_flush_num_scan disabled
buffer_LRU_single_flush_scanned_per_call disabled
@@ -43,7 +43,6 @@ buffer_data_written disabled
buffer_flush_batch_scanned disabled
buffer_flush_batch_num_scan disabled
buffer_flush_batch_scanned_per_call disabled
-buffer_flush_batch_rescan disabled
buffer_flush_batch_total_pages disabled
buffer_flush_batches disabled
buffer_flush_batch_pages disabled
@@ -68,9 +67,12 @@ buffer_flush_background_pages disabled
buffer_LRU_batch_scanned disabled
buffer_LRU_batch_num_scan disabled
buffer_LRU_batch_scanned_per_call disabled
-buffer_LRU_batch_total_pages disabled
-buffer_LRU_batches disabled
-buffer_LRU_batch_pages disabled
+buffer_LRU_batch_flush_total_pages disabled
+buffer_LRU_batches_flush disabled
+buffer_LRU_batch_flush_pages disabled
+buffer_LRU_batch_evict_total_pages disabled
+buffer_LRU_batches_evict disabled
+buffer_LRU_batch_evict_pages disabled
buffer_LRU_single_flush_scanned disabled
buffer_LRU_single_flush_num_scan disabled
buffer_LRU_single_flush_scanned_per_call disabled
@@ -43,7 +43,6 @@ buffer_data_written disabled
buffer_flush_batch_scanned disabled
buffer_flush_batch_num_scan disabled
buffer_flush_batch_scanned_per_call disabled
-buffer_flush_batch_rescan disabled
buffer_flush_batch_total_pages disabled
buffer_flush_batches disabled
buffer_flush_batch_pages disabled
@@ -68,9 +67,12 @@ buffer_flush_background_pages disabled
buffer_LRU_batch_scanned disabled
buffer_LRU_batch_num_scan disabled
buffer_LRU_batch_scanned_per_call disabled
-buffer_LRU_batch_total_pages disabled
-buffer_LRU_batches disabled
-buffer_LRU_batch_pages disabled
+buffer_LRU_batch_flush_total_pages disabled
+buffer_LRU_batches_flush disabled
+buffer_LRU_batch_flush_pages disabled
+buffer_LRU_batch_evict_total_pages disabled
+buffer_LRU_batches_evict disabled
+buffer_LRU_batch_evict_pages disabled
buffer_LRU_single_flush_scanned disabled
buffer_LRU_single_flush_num_scan disabled
buffer_LRU_single_flush_scanned_per_call disabled
@@ -43,7 +43,6 @@ buffer_data_written disabled
buffer_flush_batch_scanned disabled
buffer_flush_batch_num_scan disabled
buffer_flush_batch_scanned_per_call disabled
-buffer_flush_batch_rescan disabled
buffer_flush_batch_total_pages disabled
buffer_flush_batches disabled
buffer_flush_batch_pages disabled
@@ -68,9 +67,12 @@ buffer_flush_background_pages disabled
buffer_LRU_batch_scanned disabled
buffer_LRU_batch_num_scan disabled
buffer_LRU_batch_scanned_per_call disabled
-buffer_LRU_batch_total_pages disabled
-buffer_LRU_batches disabled
-buffer_LRU_batch_pages disabled
+buffer_LRU_batch_flush_total_pages disabled
+buffer_LRU_batches_flush disabled
+buffer_LRU_batch_flush_pages disabled
+buffer_LRU_batch_evict_total_pages disabled
+buffer_LRU_batches_evict disabled
+buffer_LRU_batch_evict_pages disabled
buffer_LRU_single_flush_scanned disabled
buffer_LRU_single_flush_num_scan disabled
buffer_LRU_single_flush_scanned_per_call disabled
@@ -54,6 +54,8 @@ Created 11/5/1995 Heikki Tuuri
#include "srv0mon.h"
#include "buf0checksum.h"
+#include <new>
+
/*
IMPLEMENTATION OF THE BUFFER POOL
=================================
@@ -1303,6 +1305,19 @@ buf_pool_init_instance(
buf_pool->try_LRU_scan = TRUE;
+ /* Initialize the hazard pointer for flush_list batches */
+ new(&buf_pool->flush_hp)
+ FlushHp(buf_pool, &buf_pool->flush_list_mutex);
+
+ /* Initialize the hazard pointer for LRU batches */
+ new(&buf_pool->lru_hp) LRUHp(buf_pool, &buf_pool->mutex);
+
+ /* Initialize the iterator for LRU scan search */
+ new(&buf_pool->lru_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
+
+ /* Initialize the iterator for single page scan search */
+ new(&buf_pool->single_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
+
buf_pool_mutex_exit(buf_pool);
return(DB_SUCCESS);
@@ -1393,6 +1408,8 @@ buf_pool_init(
btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
+ buf_flush_event = os_event_create();
+
return(DB_SUCCESS);
}
@@ -1509,6 +1526,10 @@ buf_relocate(
memcpy(dpage, bpage, sizeof *dpage);
+ /* Important that we adjust the hazard pointer before
+ removing bpage from LRU list. */
+ buf_LRU_adjust_hp(buf_pool, bpage);
+
ut_d(bpage->in_LRU_list = FALSE);
ut_d(bpage->in_page_hash = FALSE);
@@ -1547,6 +1568,84 @@ buf_relocate(
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
}
+/** Hazard Pointer implementation. */
+
+/** Set current value
+@param bpage buffer block to be set as hp */
+void
+HazardPointer::set(buf_page_t* bpage)
+{
+ ut_ad(mutex_own(m_mutex));
+ ut_ad(!bpage || buf_pool_from_bpage(bpage) == m_buf_pool);
+ ut_ad(!bpage || buf_page_in_file(bpage));
+
+ m_hp = bpage;
+}
+
+/** Checks if a bpage is the hp
+@param bpage buffer block to be compared
+@return true if it is hp */
+
+bool
+HazardPointer::is_hp(const buf_page_t* bpage)
+{
+ ut_ad(mutex_own(m_mutex));
+ ut_ad(!m_hp || buf_pool_from_bpage(m_hp) == m_buf_pool);
+ ut_ad(!bpage || buf_pool_from_bpage(bpage) == m_buf_pool);
+
+ return(bpage == m_hp);
+}
+
+/** Adjust the value of hp. This happens when some other thread working
+on the same list attempts to remove the hp from the list.
+@param bpage buffer block to be compared */
+
+void
+FlushHp::adjust(const buf_page_t* bpage)
+{
+ ut_ad(bpage != NULL);
+
+ /** We only support reverse traversal for now. */
+ if (is_hp(bpage)) {
+ m_hp = UT_LIST_GET_PREV(list, m_hp);
+ }
+
+ ut_ad(!m_hp || m_hp->in_flush_list);
+}
+
+/** Adjust the value of hp. This happens when some other thread working
+on the same list attempts to remove the hp from the list.
+@param bpage buffer block to be compared */
+
+void
+LRUHp::adjust(const buf_page_t* bpage)
+{
+ ut_ad(bpage);
+
+ /** We only support reverse traversal for now. */
+ if (is_hp(bpage)) {
+ m_hp = UT_LIST_GET_PREV(LRU, m_hp);
+ }
+
+ ut_ad(!m_hp || m_hp->in_LRU_list);
+}
+
+/** Selects from where to start a scan. If we have scanned too deep into
+the LRU list it resets the value to the tail of the LRU list.
+@return buf_page_t from where to start scan. */
+
+buf_page_t*
+LRUItr::start()
+{
+ ut_ad(mutex_own(m_mutex));
+
+ if (!m_hp || m_hp->old) {
+ m_hp = UT_LIST_GET_LAST(m_buf_pool->LRU);
+ }
+
+ return(m_hp);
+}
+
/********************************************************************//**
Determine if a block is a sentinel for a buffer pool watch.
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
@@ -4037,7 +4136,10 @@ UNIV_INTERN
bool
buf_page_io_complete(
/*=================*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
+ buf_page_t* bpage, /*!< in: pointer to the block in question */
+ bool evict) /*!< in: whether or not to evict the page
+ from LRU list. */
+
{
enum buf_io_fix io_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
@@ -4218,6 +4320,7 @@ buf_page_io_complete(
id. */
buf_page_set_io_fix(bpage, BUF_IO_NONE);
+ buf_page_monitor(bpage, io_type);
switch (io_type) {
case BUF_IO_READ:
@@ -4234,6 +4337,8 @@ buf_page_io_complete(
BUF_IO_READ);
}
+ mutex_exit(buf_page_get_mutex(bpage));
+
break;
case BUF_IO_WRITE:
@@ -4249,14 +4354,30 @@ buf_page_io_complete(
buf_pool->stat.n_pages_written++;
+ /* In case of flush batches i.e.: BUF_FLUSH_LIST and
+ BUF_FLUSH_LRU this function is always called from IO
+ helper thread. In this case, we decide whether or not
+ to evict the page based on flush type. The value
+ passed as evict is the default value in function
+ definition which is false.
+ We always evict in case of LRU batch and never evict
+ in case of flush list batch. For single page flush
+ the caller sets the appropriate value. */
+ if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) {
+ evict = true;
+ }
+
+ mutex_exit(buf_page_get_mutex(bpage));
+ if (evict) {
+ buf_LRU_free_page(bpage, true);
+ }
+
break;
default:
ut_error;
}
- buf_page_monitor(bpage, io_type);
-
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr, "Has %s page space %lu page no %lu\n",
@@ -4266,7 +4387,6 @@ buf_page_io_complete(
}
#endif /* UNIV_DEBUG */
- mutex_exit(buf_page_get_mutex(bpage));
buf_pool_mutex_exit(buf_pool);
return(true);
Oops, something went wrong.

0 comments on commit d086837

Please sign in to comment.