Skip to content

Commit

Permalink
mm: multigenerational lru: mm_struct list
Browse files Browse the repository at this point in the history
In order to scan page tables, we add an infrastructure to maintain
either a system-wide mm_struct list or per-memcg mm_struct lists, and
track whether an mm_struct is being used or has been used since the
last scan.

Multiple threads can concurrently work on the same mm_struct list, and
each of them will be given a different mm_struct belonging to a
process that has been scheduled since the last scan.

Signed-off-by: Yu Zhao <yuzhao@google.com>
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
  • Loading branch information
yuzhaogoogle authored and xanmod committed Jun 28, 2021
1 parent 6726876 commit f537af6
Show file tree
Hide file tree
Showing 9 changed files with 481 additions and 0 deletions.
2 changes: 2 additions & 0 deletions fs/exec.c
Expand Up @@ -1008,6 +1008,7 @@ static int exec_mmap(struct mm_struct *mm)
active_mm = tsk->active_mm;
tsk->active_mm = mm;
tsk->mm = mm;
lru_gen_add_mm(mm);
/*
* This prevents preemption while active_mm is being loaded and
* it and mm are being updated, which could cause problems for
Expand All @@ -1018,6 +1019,7 @@ static int exec_mmap(struct mm_struct *mm)
if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
activate_mm(active_mm, mm);
lru_gen_switch_mm(active_mm, mm);
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
tsk->mm->vmacache_seqnum = 0;
Expand Down
6 changes: 6 additions & 0 deletions include/linux/memcontrol.h
Expand Up @@ -230,6 +230,8 @@ struct obj_cgroup {
};
};

struct lru_gen_mm_list;

/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
Expand Down Expand Up @@ -349,6 +351,10 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif

#ifdef CONFIG_LRU_GEN
struct lru_gen_mm_list *mm_list;
#endif

struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */
};
Expand Down
107 changes: 107 additions & 0 deletions include/linux/mm_types.h
Expand Up @@ -15,6 +15,8 @@
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <linux/seqlock.h>
#include <linux/nodemask.h>
#include <linux/mmdebug.h>

#include <asm/mmu.h>

Expand Down Expand Up @@ -574,6 +576,22 @@ struct mm_struct {

#ifdef CONFIG_IOMMU_SUPPORT
u32 pasid;
#endif
#ifdef CONFIG_LRU_GEN
struct {
/* the node of a global or per-memcg mm_struct list */
struct list_head list;
#ifdef CONFIG_MEMCG
/* points to the memcg of the owner task above */
struct mem_cgroup *memcg;
#endif
/* whether this mm_struct has been used since the last walk */
nodemask_t nodes;
#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
/* the number of CPUs using this mm_struct */
atomic_t nr_cpus;
#endif
} lrugen;
#endif
} __randomize_layout;

Expand Down Expand Up @@ -601,6 +619,95 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
return (struct cpumask *)&mm->cpu_bitmap;
}

#ifdef CONFIG_LRU_GEN

void lru_gen_init_mm(struct mm_struct *mm);
void lru_gen_add_mm(struct mm_struct *mm);
void lru_gen_del_mm(struct mm_struct *mm);
#ifdef CONFIG_MEMCG
int lru_gen_alloc_mm_list(struct mem_cgroup *memcg);
void lru_gen_free_mm_list(struct mem_cgroup *memcg);
void lru_gen_migrate_mm(struct mm_struct *mm);
#endif

/* Track the usage of each mm_struct so that we can skip inactive ones. */
static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
{
/* exclude init_mm, efi_mm, etc. */
if (!core_kernel_data((unsigned long)old)) {
VM_BUG_ON(old == &init_mm);

nodes_setall(old->lrugen.nodes);
#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
atomic_dec(&old->lrugen.nr_cpus);
VM_BUG_ON_MM(atomic_read(&old->lrugen.nr_cpus) < 0, old);
#endif
} else
VM_BUG_ON_MM(READ_ONCE(old->lrugen.list.prev) ||
READ_ONCE(old->lrugen.list.next), old);

if (!core_kernel_data((unsigned long)new)) {
VM_BUG_ON(new == &init_mm);

#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
atomic_inc(&new->lrugen.nr_cpus);
VM_BUG_ON_MM(atomic_read(&new->lrugen.nr_cpus) < 0, new);
#endif
} else
VM_BUG_ON_MM(READ_ONCE(new->lrugen.list.prev) ||
READ_ONCE(new->lrugen.list.next), new);
}

/* Return whether this mm_struct is being used on any CPUs. */
static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
{
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
return !cpumask_empty(mm_cpumask(mm));
#else
return atomic_read(&mm->lrugen.nr_cpus);
#endif
}

#else /* CONFIG_LRU_GEN */

static inline void lru_gen_init_mm(struct mm_struct *mm)
{
}

static inline void lru_gen_add_mm(struct mm_struct *mm)
{
}

static inline void lru_gen_del_mm(struct mm_struct *mm)
{
}

#ifdef CONFIG_MEMCG
static inline int lru_gen_alloc_mm_list(struct mem_cgroup *memcg)
{
return 0;
}

static inline void lru_gen_free_mm_list(struct mem_cgroup *memcg)
{
}

static inline void lru_gen_migrate_mm(struct mm_struct *mm)
{
}
#endif

static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
{
}

static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
{
return false;
}

#endif /* CONFIG_LRU_GEN */

struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
Expand Down
1 change: 1 addition & 0 deletions kernel/exit.c
Expand Up @@ -422,6 +422,7 @@ void mm_update_next_owner(struct mm_struct *mm)
goto retry;
}
WRITE_ONCE(mm->owner, c);
lru_gen_migrate_mm(mm);
task_unlock(c);
put_task_struct(c);
}
Expand Down
10 changes: 10 additions & 0 deletions kernel/fork.c
Expand Up @@ -674,6 +674,7 @@ static void check_mm(struct mm_struct *mm)
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
#endif
VM_BUG_ON_MM(lru_gen_mm_is_active(mm), mm);
}

#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
Expand Down Expand Up @@ -1066,6 +1067,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
goto fail_nocontext;

mm->user_ns = get_user_ns(user_ns);
lru_gen_init_mm(mm);
return mm;

fail_nocontext:
Expand Down Expand Up @@ -1108,6 +1110,7 @@ static inline void __mmput(struct mm_struct *mm)
}
if (mm->binfmt)
module_put(mm->binfmt->module);
lru_gen_del_mm(mm);
mmdrop(mm);
}

Expand Down Expand Up @@ -2533,6 +2536,13 @@ pid_t kernel_clone(struct kernel_clone_args *args)
get_task_struct(p);
}

if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
/* lock the task to synchronize with memcg migration */
task_lock(p);
lru_gen_add_mm(p->mm);
task_unlock(p);
}

wake_up_new_task(p);

/* forking complete and child started to run, tell ptracer */
Expand Down
1 change: 1 addition & 0 deletions kernel/kthread.c
Expand Up @@ -1350,6 +1350,7 @@ void kthread_use_mm(struct mm_struct *mm)
tsk->mm = mm;
membarrier_update_current_mm(mm);
switch_mm_irqs_off(active_mm, mm, tsk);
lru_gen_switch_mm(active_mm, mm);
local_irq_enable();
task_unlock(tsk);
#ifdef finish_arch_post_lock_switch
Expand Down
2 changes: 2 additions & 0 deletions kernel/sched/core.c
Expand Up @@ -4323,6 +4323,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
* finish_task_switch()'s mmdrop().
*/
switch_mm_irqs_off(prev->active_mm, next->mm, next);
lru_gen_switch_mm(prev->active_mm, next->mm);

if (!prev->mm) { // from kernel
/* will mmdrop() in finish_task_switch(). */
Expand Down Expand Up @@ -7603,6 +7604,7 @@ void idle_task_exit(void)

if (mm != &init_mm) {
switch_mm(mm, &init_mm, current);
lru_gen_switch_mm(mm, &init_mm);
finish_arch_post_lock_switch();
}

Expand Down
28 changes: 28 additions & 0 deletions mm/memcontrol.c
Expand Up @@ -4981,6 +4981,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
lru_gen_free_mm_list(memcg);
kfree(memcg);
}

Expand Down Expand Up @@ -5030,6 +5031,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (alloc_mem_cgroup_per_node_info(memcg, node))
goto fail;

if (lru_gen_alloc_mm_list(memcg))
goto fail;

if (memcg_wb_domain_init(memcg, GFP_KERNEL))
goto fail;

Expand Down Expand Up @@ -5991,6 +5995,29 @@ static void mem_cgroup_move_task(void)
}
#endif

#ifdef CONFIG_LRU_GEN
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
struct task_struct *task = NULL;

cgroup_taskset_for_each_leader(task, css, tset)
;

if (!task)
return;

task_lock(task);
if (task->mm && task->mm->owner == task)
lru_gen_migrate_mm(task->mm);
task_unlock(task);
}
#else
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
}
#endif

static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
if (value == PAGE_COUNTER_MAX)
Expand Down Expand Up @@ -6332,6 +6359,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset,
.css_rstat_flush = mem_cgroup_css_rstat_flush,
.can_attach = mem_cgroup_can_attach,
.attach = mem_cgroup_attach,
.cancel_attach = mem_cgroup_cancel_attach,
.post_attach = mem_cgroup_move_task,
.dfl_cftypes = memory_files,
Expand Down

0 comments on commit f537af6

Please sign in to comment.