Skip to content
Permalink
Browse files
CacULE 5.14
  • Loading branch information
damentz committed Sep 14, 2021
1 parent 7d2a07b commit 9e3f9e4d524cfe1d20d03a731930e4d6ebacbfd8
Show file tree
Hide file tree
Showing 11 changed files with 1,228 additions and 6 deletions.
@@ -1084,6 +1084,10 @@ reboot-cmd (SPARC only)
ROM/Flash boot loader. Maybe to tell it what to do after
rebooting. ???

sched_interactivity_factor (CacULE scheduler only)
==================================================
Sets the value *m* for interactivity score calculations. See
Figure 1 in https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf

sched_energy_aware
==================
@@ -0,0 +1,76 @@
======================================
The CacULE Scheduler by Hamad Al Marri.
======================================

1. Overview
=============

The CacULE CPU scheduler is based on interactivity score mechanism.
The interactivity score is inspired by the ULE scheduler (FreeBSD
scheduler).

1.1 About CacULE Scheduler
--------------------------

- Each CPU has its own runqueue.

- NORMAL runqueue is a linked list of sched_entities (instead of RB-Tree).

- RT and other runqueues are just the same as the CFS's.

- Wake up tasks preempt currently running tasks if its interactivity score value
is higher.


1.2. Complexity
----------------

The complexity of Enqueue and Dequeue a task is O(1).

The complexity of pick the next task is in O(n), where n is the number of tasks
in a runqueue (each CPU has its own runqueue).

Note: O(n) sounds scary, but usually for a machine with 4 CPUS where it is used
for desktop or mobile jobs, the maximum number of runnable tasks might not
exceeds 10 (at the pick next run time) - the idle tasks are excluded since they
are dequeued when sleeping and enqueued when they wake up.


2. The CacULE Interactivity Score
=======================================================

The interactivity score is inspired by the ULE scheduler (FreeBSD scheduler).
For more information see: https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf
CacULE doesn't replace CFS with ULE, it only changes the CFS' pick next task
mechanism to ULE's interactivity score mechanism for picking next task to run.


2.3 sched_interactivity_factor
=================
Sets the value *m* for interactivity score calculations. See Figure 1 in
https://web.cs.ucdavis.edu/~roper/ecs150/ULE.pdf
The default value of in CacULE is 10 which means that the Maximum Interactive
Score is 20 (since m = Maximum Interactive Score / 2).
You can tune sched_interactivity_factor with sysctl command:

sysctl kernel.sched_interactivity_factor=50

This command changes the sched_interactivity_factor from 10 to 50.


3. Scheduling policies
=======================

CacULE some CFS, implements three scheduling policies:

- SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
policy that is used for regular tasks.

- SCHED_BATCH: Does not preempt nearly as often as regular tasks
would, thereby allowing tasks to run longer and make better use of
caches but at the cost of interactivity. This is well suited for
batch jobs.

- SCHED_IDLE: This is even weaker than nice 19, but its not a true
idle timer scheduler in order to avoid to get into priority
inversion problems which would deadlock the machine.
@@ -462,10 +462,23 @@ struct sched_statistics {
#endif
};

#ifdef CONFIG_CACULE_SCHED
struct cacule_node {
struct cacule_node* next;
struct cacule_node* prev;
u64 cacule_start_time;
u64 last_run;
u64 vruntime;
};
#endif

struct sched_entity {
/* For load-balancing: */
struct load_weight load;
struct rb_node run_node;
#ifdef CONFIG_CACULE_SCHED
struct cacule_node cacule_node;
#endif
struct list_head group_node;
unsigned int on_rq;

@@ -32,6 +32,16 @@ extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;

#ifdef CONFIG_CACULE_SCHED
extern unsigned int interactivity_factor;
extern unsigned int cacule_max_lifetime;
extern unsigned int cache_factor;
extern unsigned int cache_divisor;
extern unsigned int starve_factor;
extern unsigned int starve_divisor;
extern int cacule_yield;
#endif

enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE,
SCHED_TUNABLESCALING_LOG,
@@ -837,6 +837,51 @@ config UCLAMP_BUCKETS_COUNT

endmenu

config CACULE_SCHED
bool "CacULE CPU scheduler"
default y
help
The CacULE CPU scheduler is based on interactivity score mechanism.
The interactivity score is inspired by the ULE scheduler (FreeBSD
scheduler).

If unsure, say Y here.

config CACULE_RDB
bool "RDB (Response Driven Balancer)"
default y
depends on CACULE_SCHED
help
This is an experimental load balancer for CacULE. It is a lightweight
load balancer which is a replacement of CFS load balancer. It migrates
tasks based on their interactivity scores.

If unsure, say Y here.

config RDB_INTERVAL
int "RDB load balancer interval"
default 19
depends on CACULE_RDB
help
This is an interval to control load balance time period.
The trigger_load_balance runs in every tick. For High HZ values, the
load balance could be overwhelming. RDB load balance includes rq locking
which can reduce the performance. The balance interval can help to avoid
running load balance on every tick. For example, RDB_INTERVAL=3 will
only run load balance every 3ms. Setting RDB_INTERVAL depends on HZ.
If you want load balancer run every 2ms while HZ=500 then it is not
needed and better to set RDB_INTERVAL=0 since 500HZ already (1000ms
/ 500HZ = 2ms). However, if you have 1000HZ and want to avoid load
balancer from running every 1ms, you could set RDB_INTERVAL=4ms for
example to make load balancer run every 4ms. Less RDB_INTERVAL values
(or 0 to disable) could make sure tasks are balanced ASAP, but with
the cost of locking/blocking time. High RDB_INTERVAL values can relax
balancing locking but with the cost of imbalanced workload for that
period of time (i.e. if RDB_INTERVAL=100ms) there will be no balancing
for 100ms (except for newidle_balance which is not effected by RDB_INTERVAL).

If in doubt, use the default value.

#
# For architectures that want to enable the support for NUMA-affine scheduler
# balancing logic:
@@ -1234,6 +1279,7 @@ config SCHED_AUTOGROUP
select CGROUPS
select CGROUP_SCHED
select FAIR_GROUP_SCHED
default y
help
This option optimizes the scheduler for common desktop workloads by
automatically creating and populating task groups. This separation
@@ -46,6 +46,9 @@ choice
1000 Hz is the preferred choice for desktop systems and other
systems requiring fast interactive responses to events.

config HZ_2000
bool "2000 HZ"

endchoice

config HZ
@@ -54,6 +57,7 @@ config HZ
default 250 if HZ_250
default 300 if HZ_300
default 1000 if HZ_1000
default 2000 if HZ_2000

config SCHED_HRTICK
def_bool HIGH_RES_TIMERS
@@ -3943,6 +3943,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
p->se.vruntime = 0;

#ifdef CONFIG_CACULE_SCHED
p->se.cacule_node.vruntime = 0;
#endif

INIT_LIST_HEAD(&p->se.group_node);

#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -4215,6 +4220,10 @@ void wake_up_new_task(struct task_struct *p)
update_rq_clock(rq);
post_init_entity_util_avg(p);

#ifdef CONFIG_CACULE_SCHED
p->se.cacule_node.cacule_start_time = sched_clock();
#endif

activate_task(rq, p, ENQUEUE_NOCLOCK);
trace_sched_wakeup_new(p);
check_preempt_curr(rq, p, WF_FORK);
@@ -5026,7 +5035,9 @@ static void sched_tick_remote(struct work_struct *work)
struct rq *rq = cpu_rq(cpu);
struct task_struct *curr;
struct rq_flags rf;
#if !defined(CONFIG_CACULE_SCHED)
u64 delta;
#endif
int os;

/*
@@ -5046,6 +5057,7 @@ static void sched_tick_remote(struct work_struct *work)

update_rq_clock(rq);

#if !defined(CONFIG_CACULE_SCHED)
if (!is_idle_task(curr)) {
/*
* Make sure the next tick runs within a reasonable
@@ -5054,6 +5066,8 @@ static void sched_tick_remote(struct work_struct *work)
delta = rq_clock_task(rq) - curr->se.exec_start;
WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
}
#endif

curr->sched_class->task_tick(rq, curr, 0);

calc_load_nohz_remote(rq);
@@ -8980,6 +8994,14 @@ void __init sched_init(void)
BUG_ON(&dl_sched_class + 1 != &stop_sched_class);
#endif

#ifdef CONFIG_CACULE_SCHED
#ifdef CONFIG_CACULE_RDB
printk(KERN_INFO "CacULE CPU scheduler (RDB) v5.14 by Hamad Al Marri.");
#else
printk(KERN_INFO "CacULE CPU scheduler v5.14 by Hamad Al Marri.");
#endif
#endif

wait_bit_init();

#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -560,8 +560,11 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)

void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
{
s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
spread, rq0_min_vruntime, spread0;
s64 MIN_vruntime = -1, max_vruntime = -1,
#if !defined(CONFIG_CACULE_SCHED)
min_vruntime, rq0_min_vruntime, spread0,
#endif
spread;
struct rq *rq = cpu_rq(cpu);
struct sched_entity *last;
unsigned long flags;
@@ -582,21 +585,27 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
last = __pick_last_entity(cfs_rq);
if (last)
max_vruntime = last->vruntime;
#if !defined(CONFIG_CACULE_SCHED)
min_vruntime = cfs_rq->min_vruntime;
rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
#endif
raw_spin_rq_unlock_irqrestore(rq, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
SPLIT_NS(MIN_vruntime));
#if !defined(CONFIG_CACULE_SCHED)
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
SPLIT_NS(min_vruntime));
#endif
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime",
SPLIT_NS(max_vruntime));
spread = max_vruntime - MIN_vruntime;
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread",
SPLIT_NS(spread));
#if !defined(CONFIG_CACULE_SCHED)
spread0 = min_vruntime - rq0_min_vruntime;
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
SPLIT_NS(spread0));
#endif
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);

0 comments on commit 9e3f9e4

Please sign in to comment.