mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-24 20:00:22 +00:00
scx_lavd: prioritize the turbo boost-able cores
Signed-off-by: Changwoo Min <changwoo@igalia.com>
This commit is contained in:
parent
cd5b2bf664
commit
9807e561f0
@ -82,6 +82,7 @@ enum consts {
|
||||
|
||||
LAVD_SYS_STAT_INTERVAL_NS = (25ULL * NSEC_PER_MSEC),
|
||||
LAVD_CC_PER_CORE_MAX_CTUIL = 500, /* maximum per-core CPU utilization */
|
||||
LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */
|
||||
LAVD_CC_NR_ACTIVE_MIN = 1, /* num of mininum active cores */
|
||||
LAVD_CC_NR_OVRFLW = 1, /* num of overflow cores */
|
||||
LAVD_CC_CPU_PIN_INTERVAL = (3ULL * LAVD_TIME_ONE_SEC),
|
||||
@ -193,6 +194,7 @@ struct cpu_ctx {
|
||||
*/
|
||||
u16 capacity; /* CPU capacity based on 1000 */
|
||||
u8 big_core; /* is it a big core? */
|
||||
u8 turbo_core; /* is it a turbo core? */
|
||||
u8 cpdom_id; /* compute domain id (== dsq_id) */
|
||||
u8 cpdom_alt_id; /* compute domain id of anternative type (== dsq_id) */
|
||||
u8 cpdom_poll_pos; /* index to check if a DSQ of a compute domain is starving */
|
||||
|
@ -200,10 +200,11 @@ static volatile u64 nr_cpus_big;
|
||||
static struct sys_stat __sys_stats[2];
|
||||
static volatile int __sys_stat_idx;
|
||||
|
||||
private(LAVD) struct bpf_cpumask __kptr *active_cpumask; /* CPU mask for active CPUs */
|
||||
private(LAVD) struct bpf_cpumask __kptr *ovrflw_cpumask; /* CPU mask for overflow CPUs */
|
||||
private(LAVD) struct bpf_cpumask __kptr *turbo_cpumask; /* CPU mask for turbo CPUs */
|
||||
private(LAVD) struct bpf_cpumask __kptr *big_cpumask; /* CPU mask for big CPUs */
|
||||
private(LAVD) struct bpf_cpumask __kptr *little_cpumask; /* CPU mask for little CPUs */
|
||||
private(LAVD) struct bpf_cpumask __kptr *active_cpumask; /* CPU mask for active CPUs */
|
||||
private(LAVD) struct bpf_cpumask __kptr *ovrflw_cpumask; /* CPU mask for overflow CPUs */
|
||||
private(LAVD) struct bpf_cpumask cpdom_cpumask[LAVD_CPDOM_MAX_NR]; /* CPU mask for each compute domain */
|
||||
|
||||
/*
|
||||
@ -229,6 +230,7 @@ static u64 cur_svc_time;
|
||||
*/
|
||||
const volatile bool no_core_compaction;
|
||||
const volatile bool no_freq_scaling;
|
||||
const volatile bool no_prefer_turbo_core;
|
||||
const volatile u32 is_smt_active;
|
||||
const volatile u8 verbose;
|
||||
|
||||
@ -659,8 +661,14 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
|
||||
c->new_util = (compute * LAVD_CPU_UTIL_MAX) / c->duration;
|
||||
cpuc->util = calc_avg(cpuc->util, c->new_util);
|
||||
|
||||
if (cpuc->util > LAVD_CC_PER_CORE_MAX_CTUIL)
|
||||
c->nr_violation += 1000;
|
||||
if (cpuc->turbo_core) {
|
||||
if (cpuc->util > LAVD_CC_PER_TURBO_CORE_MAX_CTUIL)
|
||||
c->nr_violation += 1000;
|
||||
}
|
||||
else {
|
||||
if (cpuc->util > LAVD_CC_PER_CORE_MAX_CTUIL)
|
||||
c->nr_violation += 1000;
|
||||
}
|
||||
|
||||
/*
|
||||
* Accmulate system-wide idle time
|
||||
@ -1391,21 +1399,41 @@ static s32 pick_idle_cpu(struct task_struct *p, struct task_ctx *taskc,
|
||||
if (bpf_cpumask_empty(cast_mask(a_cpumask)))
|
||||
goto start_omask;
|
||||
|
||||
if (is_perf_cri(taskc, stat_cur))
|
||||
if (is_perf_cri(taskc, stat_cur) || no_core_compaction ) {
|
||||
bpf_cpumask_and(t_cpumask, cast_mask(a_cpumask), cast_mask(big));
|
||||
else
|
||||
}
|
||||
else {
|
||||
bpf_cpumask_and(t_cpumask, cast_mask(a_cpumask), cast_mask(little));
|
||||
goto start_llc_mask;
|
||||
}
|
||||
|
||||
bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(cpdom_mask_prev));
|
||||
/*
|
||||
* Pick an idle core among turbo boost-enabled CPUs with a matching
|
||||
* core type.
|
||||
*/
|
||||
start_turbo_mask:
|
||||
if (no_prefer_turbo_core || !turbo_cpumask)
|
||||
goto start_llc_mask;
|
||||
|
||||
bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(turbo_cpumask));
|
||||
if (bpf_cpumask_empty(cast_mask(t2_cpumask)))
|
||||
goto start_tmask;
|
||||
goto start_llc_mask;
|
||||
|
||||
cpu_id = pick_idle_cpu_in(t2_cpumask);
|
||||
if (cpu_id >= 0) {
|
||||
*is_idle = true;
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pick an idle core among active CPUs with a matching core type within
|
||||
* the prev CPU's LLC domain.
|
||||
*/
|
||||
start_t2mask:
|
||||
start_llc_mask:
|
||||
bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(cpdom_mask_prev));
|
||||
if (bpf_cpumask_empty(cast_mask(t2_cpumask)))
|
||||
goto start_tmask;
|
||||
|
||||
cpu_id = pick_idle_cpu_in(t2_cpumask);
|
||||
if (cpu_id >= 0) {
|
||||
*is_idle = true;
|
||||
@ -2755,6 +2783,10 @@ static int init_cpumasks(void)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = calloc_cpumask(&turbo_cpumask);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = calloc_cpumask(&big_cpumask);
|
||||
if (err)
|
||||
goto out;
|
||||
@ -2777,11 +2809,6 @@ out:
|
||||
|
||||
static u16 get_cpuperf_cap(s32 cpu)
|
||||
{
|
||||
/*
|
||||
* If CPU's capacitiy values are all 1024, then let's just use the
|
||||
* capacity value from userspace, which are calculated using each CPU's
|
||||
* maximum frequency.
|
||||
*/
|
||||
if (cpu >= 0 && cpu < LAVD_CPU_ID_MAX)
|
||||
return __cpu_capacity_hint[cpu];
|
||||
|
||||
@ -2789,25 +2816,51 @@ static u16 get_cpuperf_cap(s32 cpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static u16 get_cputurbo_cap(void)
|
||||
{
|
||||
u16 turbo_cap = 0;
|
||||
int nr_turbo = 0, cpu;
|
||||
|
||||
/*
|
||||
* Find the maximum CPU frequency
|
||||
*/
|
||||
for (cpu = 0; cpu < LAVD_CPU_ID_MAX; cpu++) {
|
||||
if (__cpu_capacity_hint[cpu] > turbo_cap) {
|
||||
turbo_cap = __cpu_capacity_hint[cpu];
|
||||
nr_turbo++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If all CPU's frequencies are the same, ignore the turbo.
|
||||
*/
|
||||
if (nr_turbo <= 1)
|
||||
turbo_cap = 0;
|
||||
|
||||
return turbo_cap;
|
||||
}
|
||||
|
||||
static s32 init_per_cpu_ctx(u64 now)
|
||||
{
|
||||
struct cpu_ctx *cpuc;
|
||||
struct bpf_cpumask *big, *little, *active, *ovrflw, *cd_cpumask;
|
||||
struct bpf_cpumask *turbo, *big, *little, *active, *ovrflw, *cd_cpumask;
|
||||
struct cpdom_ctx *cpdomc;
|
||||
int cpu, i, j, err = 0;
|
||||
u64 cpdom_id;
|
||||
u32 sum_capacity = 0, avg_capacity;
|
||||
u16 turbo_cap;
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
|
||||
/*
|
||||
* Prepare cpumasks.
|
||||
*/
|
||||
turbo = turbo_cpumask;
|
||||
big = big_cpumask;
|
||||
little = little_cpumask;
|
||||
active = active_cpumask;
|
||||
ovrflw = ovrflw_cpumask;
|
||||
if (!big|| !little || !active || !ovrflw) {
|
||||
if (!turbo || !big|| !little || !active || !ovrflw) {
|
||||
scx_bpf_error("Failed to prepare cpumasks.");
|
||||
err = -ENOMEM;
|
||||
goto unlock_out;
|
||||
@ -2848,6 +2901,11 @@ static s32 init_per_cpu_ctx(u64 now)
|
||||
sum_capacity += cpuc->capacity;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get turbo capacitiy.
|
||||
*/
|
||||
turbo_cap = get_cputurbo_cap();
|
||||
|
||||
/*
|
||||
* Classify CPU into BIG or little cores based on their average capacity.
|
||||
*/
|
||||
@ -2874,6 +2932,10 @@ static s32 init_per_cpu_ctx(u64 now)
|
||||
bpf_cpumask_set_cpu(cpu, little);
|
||||
bpf_cpumask_set_cpu(cpu, ovrflw);
|
||||
}
|
||||
|
||||
cpuc->turbo_core = cpuc->capacity == turbo_cap;
|
||||
if (cpuc->turbo_core)
|
||||
bpf_cpumask_set_cpu(cpu, turbo);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -101,6 +101,10 @@ struct Opts {
|
||||
#[clap(long = "prefer-little-core", action = clap::ArgAction::SetTrue)]
|
||||
prefer_little_core: bool,
|
||||
|
||||
/// Do not specifically prefer to schedule on turbo cores.
|
||||
#[clap(long = "no-prefer-turbo-core", action = clap::ArgAction::SetTrue)]
|
||||
no_prefer_turbo_core: bool,
|
||||
|
||||
/// Disable controlling the CPU frequency. In order to improve latency and responsiveness of
|
||||
/// performance-critical tasks, scx_lavd increases the CPU frequency even if CPU usage is low.
|
||||
/// See main.bpf.c for more info. Normally set by the power mode, but can be set independently
|
||||
@ -129,18 +133,21 @@ impl Opts {
|
||||
self.no_core_compaction = true;
|
||||
self.prefer_smt_core = false;
|
||||
self.prefer_little_core = false;
|
||||
self.no_prefer_turbo_core = false;
|
||||
self.no_freq_scaling = true;
|
||||
}
|
||||
if self.powersave {
|
||||
self.no_core_compaction = false;
|
||||
self.prefer_smt_core = true;
|
||||
self.prefer_little_core = true;
|
||||
self.no_prefer_turbo_core = true;
|
||||
self.no_freq_scaling = false;
|
||||
}
|
||||
if self.balanced {
|
||||
self.no_core_compaction = false;
|
||||
self.prefer_smt_core = false;
|
||||
self.prefer_little_core = false;
|
||||
self.no_prefer_turbo_core = false;
|
||||
self.no_freq_scaling = false;
|
||||
}
|
||||
|
||||
@ -516,6 +523,7 @@ impl<'a> Scheduler<'a> {
|
||||
skel.maps.bss_data.nr_cpus_onln = nr_cpus_onln;
|
||||
skel.maps.rodata_data.no_core_compaction = opts.no_core_compaction;
|
||||
skel.maps.rodata_data.no_freq_scaling = opts.no_freq_scaling;
|
||||
skel.maps.rodata_data.no_prefer_turbo_core = opts.no_prefer_turbo_core;
|
||||
skel.maps.rodata_data.is_smt_active = match FlatTopology::is_smt_active() {
|
||||
Ok(ret) => (ret == 1) as u32,
|
||||
Err(_) => 0,
|
||||
|
Loading…
Reference in New Issue
Block a user