Skip to content

Commit

Permalink
mm: multigenerational lru: eviction
Browse files Browse the repository at this point in the history
The eviction consumes old generations. Given an lruvec, it scans pages
on lrugen->lists[] indexed by min_seq%MAX_NR_GENS. A feedback loop
modeled after the PID controller monitors refaults over anon and file
types and decides which type to evict when both are available from the
same generation.

Each generation is divided into multiple tiers. Tiers represent
different ranges of numbers of accesses thru file descriptors. A page
accessed N times thru file descriptors is in tier order_base_2(N). The
feedback loop also monitors refaults over all tiers and decides when
to promote pages in which tiers (N>1), using the first tier (N=0,1) as
a baseline.

The eviction sorts a page according to the gen counter if the aging
has found this page accessed thru page tables, which completes the
promotion of this page. The eviction also promotes a page to the next
generation (min_seq+1 rather than max_seq) if this page was accessed
multiple times thru file descriptors and the feedback loop has
detected higher refaults from the tier this page is in. This approach
has the following advantages:
1) It removes the cost of activation (recall the terms) in the
   buffered access path by inferring whether pages accessed multiple
   times thru file descriptors are statistically hot and thus worth
   promoting in the eviction path.
2) It takes pages accessed thru page tables into account and avoids
   overprotecting pages accessed multiple times thru file descriptors.
3) More tiers, which require additional bits in folio->flags, provide
   better protection for pages accessed more than twice thru file
   descriptors, when under heavy buffered I/O workloads.

The eviction increments min_seq when lrugen->lists[] indexed by
min_seq%MAX_NR_GENS is empty.

Signed-off-by: Yu Zhao <yuzhao@google.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
  • Loading branch information
yuzhaogoogle authored and xanmod committed Jan 12, 2022
1 parent 22bfc00 commit 811d1a9
Show file tree
Hide file tree
Showing 5 changed files with 767 additions and 3 deletions.
10 changes: 10 additions & 0 deletions include/linux/mm_inline.h
Expand Up @@ -115,6 +115,14 @@ static inline int lru_hist_from_seq(unsigned long seq)
return seq % NR_HIST_GENS;
}

static inline int lru_tier_from_refs(int refs)
{
VM_BUG_ON(refs > BIT(LRU_REFS_WIDTH));

/* see the comment on MAX_NR_TIERS */
return order_base_2(refs + 1);
}

static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
{
unsigned long max_seq = lruvec->lrugen.max_seq;
Expand Down Expand Up @@ -243,6 +251,8 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio,
gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;

new_flags &= ~LRU_GEN_MASK;
if ((new_flags & LRU_REFS_FLAGS) != LRU_REFS_FLAGS)
new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS);
/* for shrink_page_list() */
if (reclaiming)
new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim));
Expand Down
28 changes: 28 additions & 0 deletions include/linux/mmzone.h
Expand Up @@ -336,6 +336,25 @@ struct page_vma_mapped_walk;
#define MIN_NR_GENS 2U
#define MAX_NR_GENS ((unsigned int)CONFIG_NR_LRU_GENS)

/*
* Each generation is divided into multiple tiers. Tiers represent different
* ranges of numbers of accesses thru file descriptors. A page accessed N times
* thru file descriptors is in tier order_base_2(N). A page in the first tier
* (N=0,1) is marked by PG_referenced unless it was faulted in thru page tables
* or read ahead. A page in any other tier (N>1) is marked by PG_referenced and
* PG_workingset. Additional bits in folio->flags are required to support more
* than two tiers.
*
* In contrast to moving across generations (promotion), moving across tiers
* only requires operations on folio->flags and therefore has a negligible cost
* in the buffered access path. In the eviction path, comparisons of
* refaulted/(evicted+promoted) from the first tier and the rest infer whether
* pages accessed multiple times thru file descriptors are statistically hot
* and thus worth promoting.
*/
#define MAX_NR_TIERS ((unsigned int)CONFIG_TIERS_PER_GEN)
#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset))

/* whether to keep historical stats for evicted generations */
#ifdef CONFIG_LRU_GEN_STATS
#define NR_HIST_GENS ((unsigned int)CONFIG_NR_LRU_GENS)
Expand All @@ -354,6 +373,15 @@ struct lru_gen_struct {
struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
/* the sizes of the above lists */
unsigned long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
/* the exponential moving average of refaulted */
unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
/* the exponential moving average of evicted+promoted */
unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
/* the first tier doesn't need promotion, hence the minus one */
unsigned long promoted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
/* can be modified without holding the lru lock */
atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
/* whether the multigenerational lru is enabled */
bool enabled;
};
Expand Down
42 changes: 42 additions & 0 deletions mm/swap.c
Expand Up @@ -407,6 +407,43 @@ static void __lru_cache_activate_folio(struct folio *folio)
local_unlock(&lru_pvecs.lock);
}

#ifdef CONFIG_LRU_GEN
static void folio_inc_refs(struct folio *folio)
{
unsigned long refs;
unsigned long old_flags, new_flags;

if (folio_test_unevictable(folio))
return;

/* see the comment on MAX_NR_TIERS */
do {
new_flags = old_flags = READ_ONCE(folio->flags);

if (!(new_flags & BIT(PG_referenced))) {
new_flags |= BIT(PG_referenced);
continue;
}

if (!(new_flags & BIT(PG_workingset))) {
new_flags |= BIT(PG_workingset);
continue;
}

refs = new_flags & LRU_REFS_MASK;
refs = min(refs + BIT(LRU_REFS_PGOFF), LRU_REFS_MASK);

new_flags &= ~LRU_REFS_MASK;
new_flags |= refs;
} while (new_flags != old_flags &&
cmpxchg(&folio->flags, old_flags, new_flags) != old_flags);
}
#else
static void folio_inc_refs(struct folio *folio)
{
}
#endif /* CONFIG_LRU_GEN */

/*
* Mark a page as having seen activity.
*
Expand All @@ -419,6 +456,11 @@ static void __lru_cache_activate_folio(struct folio *folio)
*/
void folio_mark_accessed(struct folio *folio)
{
if (lru_gen_enabled()) {
folio_inc_refs(folio);
return;
}

if (!folio_test_referenced(folio)) {
folio_set_referenced(folio);
} else if (folio_test_unevictable(folio)) {
Expand Down

0 comments on commit 811d1a9

Please sign in to comment.