mirror of
https://github.com/JakeHillion/scx.git
synced 2024-11-26 03:20:24 +00:00
Merge pull request #591 from multics69/lavd-turbo3
scx_lavd: introduce "autopilot" mode and misc. optimization & bug fix
This commit is contained in:
commit
4d8bf870a1
@ -89,6 +89,10 @@ enum consts {
|
||||
LAVD_CC_CPU_PIN_INTERVAL_DIV = (LAVD_CC_CPU_PIN_INTERVAL /
|
||||
LAVD_SYS_STAT_INTERVAL_NS),
|
||||
|
||||
LAVD_AP_LOW_UTIL = 50, /* powersave mode when cpu util <= 5% */
|
||||
LAVD_AP_HIGH_UTIL = 300, /* balanced mode when 5% < cpu util <= 30%,
|
||||
performance mode when cpu util > 30% */
|
||||
|
||||
LAVD_CPDOM_MAX_NR = 32, /* maximum number of compute domain */
|
||||
LAVD_CPDOM_MAX_DIST = 4, /* maximum distance from one compute domain to another */
|
||||
LAVD_CPDOM_STARV_NS = (5ULL * NSEC_PER_MSEC),
|
||||
@ -299,6 +303,8 @@ enum {
|
||||
LAVD_PM_PERFORMANCE = 0,
|
||||
LAVD_PM_BALANCED = 1,
|
||||
LAVD_PM_POWERSAVE = 2,
|
||||
|
||||
LAVD_PM_MAX = 3
|
||||
};
|
||||
|
||||
struct power_arg {
|
||||
|
@ -229,10 +229,11 @@ static u64 cur_svc_time;
|
||||
/*
|
||||
* Options
|
||||
*/
|
||||
volatile bool no_core_compaction;
|
||||
volatile bool no_freq_scaling;
|
||||
volatile bool no_prefer_turbo_core;
|
||||
volatile bool is_powersave_mode;
|
||||
volatile bool no_core_compaction;
|
||||
volatile bool no_freq_scaling;
|
||||
volatile bool no_prefer_turbo_core;
|
||||
volatile bool is_powersave_mode;
|
||||
const volatile bool is_autopilot_on;
|
||||
const volatile u32 is_smt_active;
|
||||
const volatile u8 verbose;
|
||||
|
||||
@ -315,7 +316,7 @@ struct {
|
||||
} introspec_msg SEC(".maps");
|
||||
|
||||
static u16 get_nice_prio(struct task_struct *p);
|
||||
static void adjust_slice_boost(struct cpu_ctx *cpuc, struct task_ctx *taskc);
|
||||
static int reinit_active_cpumask_for_performance(void);
|
||||
|
||||
static u64 sigmoid_u64(u64 v, u64 max)
|
||||
{
|
||||
@ -593,6 +594,7 @@ static void init_sys_stat_ctx(struct sys_stat_ctx *c)
|
||||
c->stat_next = get_sys_stat_next();
|
||||
c->now = bpf_ktime_get_ns();
|
||||
c->duration = c->now - c->stat_cur->last_update_clk;
|
||||
c->stat_next->last_update_clk = c->now;
|
||||
}
|
||||
|
||||
static void collect_sys_stat(struct sys_stat_ctx *c)
|
||||
@ -649,7 +651,7 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
|
||||
bool ret = __sync_bool_compare_and_swap(
|
||||
&cpuc->idle_start_clk, old_clk, c->now);
|
||||
if (ret) {
|
||||
c->idle_total += c->now - old_clk;
|
||||
cpuc->idle_total += c->now - old_clk;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -660,6 +662,7 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
|
||||
u64 compute = 0;
|
||||
if (c->duration > cpuc->idle_total)
|
||||
compute = c->duration - cpuc->idle_total;
|
||||
|
||||
c->new_util = (compute * LAVD_CPU_UTIL_MAX) / c->duration;
|
||||
cpuc->util = calc_avg(cpuc->util, c->new_util);
|
||||
|
||||
@ -691,9 +694,9 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
|
||||
c->duration_total = c->duration * nr_cpus_onln;
|
||||
if (c->duration_total > c->idle_total)
|
||||
c->compute_total = c->duration_total - c->idle_total;
|
||||
|
||||
c->new_util = (c->compute_total * LAVD_CPU_UTIL_MAX) /
|
||||
c->duration_total;
|
||||
else
|
||||
c->compute_total = 0;
|
||||
c->new_util = (c->compute_total * LAVD_CPU_UTIL_MAX)/c->duration_total;
|
||||
|
||||
if (c->sched_nr == 0) {
|
||||
/*
|
||||
@ -757,7 +760,6 @@ static void do_update_sys_stat(void)
|
||||
/*
|
||||
* Make the next version atomically visible.
|
||||
*/
|
||||
c.stat_next->last_update_clk = c.now;
|
||||
flip_sys_stat();
|
||||
}
|
||||
|
||||
@ -772,7 +774,7 @@ static u64 calc_nr_active_cpus(struct sys_stat *stat_cur)
|
||||
nr_active /= (LAVD_CC_PER_CORE_MAX_CTUIL * 1000);
|
||||
|
||||
/*
|
||||
* If a few CPUs are particularly busy, boost the overflow CPUs by 2x.
|
||||
* If a few CPUs are particularly busy, boost the active CPUs more.
|
||||
*/
|
||||
nr_active += min(LAVD_CC_NR_OVRFLW, (stat_cur->nr_violation) / 1000);
|
||||
nr_active = max(min(nr_active, nr_cpus_onln),
|
||||
@ -900,10 +902,91 @@ unlock_out:
|
||||
bpf_rcu_read_unlock();
|
||||
}
|
||||
|
||||
int do_set_power_profile(s32 power_mode, int util)
|
||||
{
|
||||
static s32 cur_mode = LAVD_PM_MAX;
|
||||
|
||||
/*
|
||||
* Skip setting the mode if alreay in the same mode.
|
||||
*/
|
||||
if (cur_mode == power_mode)
|
||||
return 0;
|
||||
cur_mode = power_mode;
|
||||
|
||||
/*
|
||||
* Change the power mode.
|
||||
*/
|
||||
switch (power_mode) {
|
||||
case LAVD_PM_PERFORMANCE:
|
||||
no_core_compaction = true;
|
||||
no_freq_scaling = true;
|
||||
no_prefer_turbo_core = false;
|
||||
is_powersave_mode = false;
|
||||
|
||||
/*
|
||||
* Since the core compaction becomes off, we need to
|
||||
* reinitialize the active and overflow cpumask for performance
|
||||
* mode.
|
||||
*/
|
||||
reinit_active_cpumask_for_performance();
|
||||
debugln("Set the scheduler's power profile to performance mode: %d", util);
|
||||
break;
|
||||
case LAVD_PM_BALANCED:
|
||||
no_core_compaction = false;
|
||||
no_freq_scaling = false;
|
||||
no_prefer_turbo_core = false;
|
||||
is_powersave_mode = false;
|
||||
debugln("Set the scheduler's power profile to balanced mode: %d", util);
|
||||
break;
|
||||
case LAVD_PM_POWERSAVE:
|
||||
no_core_compaction = false;
|
||||
no_freq_scaling = false;
|
||||
no_prefer_turbo_core = true;
|
||||
is_powersave_mode = true;
|
||||
debugln("Set the scheduler's power profile to power-save mode: %d", util);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_autopilot(void)
|
||||
{
|
||||
struct sys_stat *stat_cur = get_sys_stat_cur();
|
||||
|
||||
/*
|
||||
* If the CPU utiulization is very low (say <= 5%), it means high
|
||||
* performance is not required. We run the scheduler in powersave mode
|
||||
* to save energy consumption.
|
||||
*/
|
||||
if (stat_cur->util <= LAVD_AP_LOW_UTIL)
|
||||
return do_set_power_profile(LAVD_PM_POWERSAVE, stat_cur->util);
|
||||
|
||||
/*
|
||||
* If the CPU utiulization is moderate (say > 5%, <= 30%), we run the
|
||||
* scheduler in balanced mode. Actually, balanced mode can save energy
|
||||
* consumption only under moderate CPU load.
|
||||
*/
|
||||
if (stat_cur->util <= LAVD_AP_HIGH_UTIL)
|
||||
return do_set_power_profile(LAVD_PM_BALANCED, stat_cur->util);
|
||||
|
||||
/*
|
||||
* If the CPU utilization is high enough (say > 30%), we run the
|
||||
* scheduler in performance mode. The system indeed needs perrformance
|
||||
* also there is little energy benefit even under balanced mode anyway.
|
||||
*/
|
||||
return do_set_power_profile(LAVD_PM_PERFORMANCE, stat_cur->util);
|
||||
}
|
||||
|
||||
static void update_sys_stat(void)
|
||||
{
|
||||
do_update_sys_stat();
|
||||
|
||||
if (is_autopilot_on)
|
||||
do_autopilot();
|
||||
|
||||
if (!no_core_compaction)
|
||||
do_core_compaction();
|
||||
}
|
||||
@ -1356,11 +1439,23 @@ static s32 pick_idle_cpu(struct task_struct *p, struct task_ctx *taskc,
|
||||
struct bpf_cpumask *active, *ovrflw, *big, *little, *cpdom_mask_prev;
|
||||
s32 cpu_id;
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
/*
|
||||
* If a task can run only on a single CPU (e.g., per-CPU kworker), we
|
||||
* simply check if a task is still pinned on the prev_cpu and go.
|
||||
*/
|
||||
if (p->nr_cpus_allowed == 1 &&
|
||||
bpf_cpumask_test_cpu(prev_cpu, p->cpus_ptr)) {
|
||||
if (scx_bpf_test_and_clear_cpu_idle(prev_cpu))
|
||||
*is_idle = true;
|
||||
cpu_id = prev_cpu;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare cpumaks.
|
||||
*/
|
||||
bpf_rcu_read_lock();
|
||||
|
||||
cpuc = get_cpu_ctx();
|
||||
cpuc_prev = get_cpu_ctx_id(prev_cpu);
|
||||
if (!cpuc || !cpuc_prev || !taskc) {
|
||||
@ -1515,6 +1610,7 @@ start_any_mask:
|
||||
*/
|
||||
unlock_out:
|
||||
bpf_rcu_read_unlock();
|
||||
out:
|
||||
return cpu_id;
|
||||
}
|
||||
|
||||
@ -2852,6 +2948,50 @@ static u16 get_cputurbo_cap(void)
|
||||
return turbo_cap;
|
||||
}
|
||||
|
||||
static int reinit_active_cpumask_for_performance(void)
|
||||
{
|
||||
struct cpu_ctx *cpuc;
|
||||
struct bpf_cpumask *active, *ovrflw;
|
||||
int cpu, err = 0;
|
||||
|
||||
barrier();
|
||||
bpf_rcu_read_lock();
|
||||
|
||||
/*
|
||||
* Prepare cpumasks.
|
||||
*/
|
||||
active = active_cpumask;
|
||||
ovrflw = ovrflw_cpumask;
|
||||
if (!active || !ovrflw) {
|
||||
scx_bpf_error("Failed to prepare cpumasks.");
|
||||
err = -ENOMEM;
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Once core compaction becomes off in performance mode,
|
||||
* reinitialize active/overflow cpumasks to reflect the mode change.
|
||||
*/
|
||||
bpf_for(cpu, 0, nr_cpus_onln) {
|
||||
cpuc = get_cpu_ctx_id(cpu);
|
||||
if (!cpuc) {
|
||||
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
|
||||
err = -ESRCH;
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
if (cpuc->big_core)
|
||||
bpf_cpumask_set_cpu(cpu, active);
|
||||
else
|
||||
bpf_cpumask_set_cpu(cpu, ovrflw);
|
||||
}
|
||||
|
||||
unlock_out:
|
||||
bpf_rcu_read_unlock();
|
||||
return err;
|
||||
}
|
||||
|
||||
static s32 init_per_cpu_ctx(u64 now)
|
||||
{
|
||||
struct cpu_ctx *cpuc;
|
||||
@ -3081,30 +3221,7 @@ void BPF_STRUCT_OPS(lavd_exit, struct scx_exit_info *ei)
|
||||
SEC("syscall")
|
||||
int set_power_profile(struct power_arg *input)
|
||||
{
|
||||
switch (input->power_mode) {
|
||||
case LAVD_PM_PERFORMANCE:
|
||||
no_core_compaction = true;
|
||||
no_freq_scaling = true;
|
||||
no_prefer_turbo_core = false;
|
||||
is_powersave_mode = false;
|
||||
break;
|
||||
case LAVD_PM_BALANCED:
|
||||
no_core_compaction = false;
|
||||
no_freq_scaling = false;
|
||||
no_prefer_turbo_core = false;
|
||||
is_powersave_mode = false;
|
||||
break;
|
||||
case LAVD_PM_POWERSAVE:
|
||||
no_core_compaction = false;
|
||||
no_freq_scaling = false;
|
||||
no_prefer_turbo_core = true;
|
||||
is_powersave_mode = true;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return do_set_power_profile(input->power_mode, 0);
|
||||
}
|
||||
|
||||
SCX_OPS_DEFINE(lavd_ops,
|
||||
|
@ -72,9 +72,14 @@ use rlimit::{getrlimit, setrlimit, Resource};
|
||||
/// See the more detailed overview of the LAVD design at main.bpf.c.
|
||||
#[derive(Debug, Parser)]
|
||||
struct Opts {
|
||||
/// Automatically decide the power mode based on the current energy profile.
|
||||
#[clap(long = "auto", action = clap::ArgAction::SetTrue)]
|
||||
auto: bool,
|
||||
/// Automatically decide the scheduler's power mode based on system load.
|
||||
/// This is a recommended mode if you don't understand the following options:
|
||||
#[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
|
||||
autopilot: bool,
|
||||
|
||||
/// Automatically decide the scheduler's power mode based on the system's energy profile.
|
||||
#[clap(long = "autopower", action = clap::ArgAction::SetTrue)]
|
||||
autopower: bool,
|
||||
|
||||
/// Run in performance mode to get maximum performance.
|
||||
#[clap(long = "performance", action = clap::ArgAction::SetTrue)]
|
||||
@ -551,6 +556,7 @@ impl<'a> Scheduler<'a> {
|
||||
Ok(ret) => (ret == 1) as u32,
|
||||
Err(_) => 0,
|
||||
};
|
||||
skel.maps.rodata_data.is_autopilot_on = opts.autopilot;
|
||||
skel.maps.rodata_data.verbose = opts.verbose;
|
||||
}
|
||||
|
||||
@ -692,32 +698,43 @@ impl<'a> Scheduler<'a> {
|
||||
res.unwrap_or_else(|_| "none".to_string())
|
||||
}
|
||||
|
||||
fn update_power_profile(&mut self) -> bool {
|
||||
fn update_power_profile(&mut self, prev_profile: String) -> (bool, String) {
|
||||
const LAVD_PM_PERFORMANCE: s32 = 0;
|
||||
const LAVD_PM_BALANCED: s32 = 1;
|
||||
const LAVD_PM_POWERSAVE: s32 = 2;
|
||||
|
||||
let profile = Self::read_energy_profile();
|
||||
if profile == "performance" {
|
||||
let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
|
||||
} else if profile == "balance_performance" {
|
||||
let _ = self.set_power_profile(LAVD_PM_BALANCED);
|
||||
} else if profile == "power" {
|
||||
let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
|
||||
} else {
|
||||
return false;
|
||||
if profile == prev_profile {
|
||||
// If the profile is the same, skip updaring the profile for BPF.
|
||||
return (true, profile);
|
||||
}
|
||||
|
||||
true
|
||||
if profile == "performance" {
|
||||
let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
|
||||
info!("Set the scheduler's power profile to performance mode.");
|
||||
} else if profile == "balance_performance" {
|
||||
let _ = self.set_power_profile(LAVD_PM_BALANCED);
|
||||
info!("Set the scheduler's power profile to balanced mode.");
|
||||
} else if profile == "power" {
|
||||
let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
|
||||
info!("Set the scheduler's power profile to power-save mode.");
|
||||
} else {
|
||||
// We don't know how to handle an unknown energy profile,
|
||||
// so we just give up updating the profile from now on.
|
||||
return (false, profile);
|
||||
}
|
||||
|
||||
(true, profile)
|
||||
}
|
||||
|
||||
fn run(&mut self, auto: bool, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
|
||||
fn run(&mut self, autopower: bool, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
|
||||
let (res_ch, req_ch) = self.stats_server.channels();
|
||||
let mut auto = auto;
|
||||
let mut autopower = autopower;
|
||||
let mut profile = "unknown".to_string();
|
||||
|
||||
while !shutdown.load(Ordering::Relaxed) && !self.exited() {
|
||||
if auto {
|
||||
auto = self.update_power_profile();
|
||||
if autopower {
|
||||
(autopower, profile) = self.update_power_profile(profile);
|
||||
}
|
||||
|
||||
match req_ch.recv_timeout(Duration::from_secs(1)) {
|
||||
@ -799,7 +816,7 @@ fn main() -> Result<()> {
|
||||
*build_id::SCX_FULL_VERSION
|
||||
);
|
||||
info!("scx_lavd scheduler starts running.");
|
||||
if !sched.run(opts.auto, shutdown.clone())?.should_restart() {
|
||||
if !sched.run(opts.autopower, shutdown.clone())?.should_restart() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user