Merge pull request #621 from multics69/lavd-greedy-fix

scx_lavd: improve greedy ratio calculation and more
This commit is contained in:
Changwoo Min 2024-09-07 10:52:00 +09:00 committed by GitHub
commit 17e0e08e6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 70 additions and 54 deletions

View File

@ -445,6 +445,12 @@ static bool is_greedy(struct task_ctx *taskc)
return taskc->greedy_ratio > 1000;
}
static bool is_eligible(struct task_ctx *taskc)
{
return !is_greedy(taskc);
}
static __always_inline
int submit_task_ctx(struct task_struct *p, struct task_ctx *taskc, u32 cpu_id)
{
@ -764,6 +770,7 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
static void update_sys_stat_next(struct sys_stat_ctx *c)
{
static int cnt = 0;
u64 avg_svc_time = 0;
/*
* Update the CPU utilization to the next version.
@ -788,8 +795,10 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
stat_next->nr_violation =
calc_avg32(stat_cur->nr_violation, c->nr_violation);
stat_next->avg_svc_time = (c->nr_sched == 0) ? 0 :
c->tot_svc_time / c->nr_sched;
if (c->nr_sched > 0)
avg_svc_time = c->tot_svc_time / c->nr_sched;
stat_next->avg_svc_time =
calc_avg(stat_cur->avg_svc_time, avg_svc_time);
stat_next->nr_queued_task =
calc_avg(stat_cur->nr_queued_task, c->nr_queued_task);
@ -1168,11 +1177,6 @@ static u64 calc_freq_factor(u64 freq)
return ft + 1;
}
static bool is_eligible(struct task_ctx *taskc)
{
return taskc->greedy_ratio <= 1000;
}
static s64 calc_static_prio_factor(struct task_struct *p)
{
/*
@ -1386,12 +1390,6 @@ static void update_stat_for_running(struct task_struct *p,
*/
advance_cur_logical_clk(taskc);
/*
* Update the current service time if necessary.
*/
if (cur_svc_time < taskc->svc_time)
WRITE_ONCE(cur_svc_time, taskc->svc_time);
/*
* Since this is the start of a new schedule for @p, we update run
* frequency in a second using an exponential weighted moving average.
@ -1460,34 +1458,22 @@ static void update_stat_for_running(struct task_struct *p,
if (taskc->victim_cpu >= 0)
cpuc->nr_preemption++;
if (is_lat_cri(taskc, stat_cur)) {
if (is_lat_cri(taskc, stat_cur))
cpuc->nr_lat_cri++;
// debugln("------------------------ lc = %llu", cpuc->nr__cri);
}
if (is_perf_cri(taskc, stat_cur)) {
if (is_perf_cri(taskc, stat_cur))
cpuc->nr_perf_cri++;
// debugln("------------------------ pc = %llu", cpuc->nr_perf_cri);
}
if (is_greedy(taskc))
cpuc->nr_greedy++;
}
static u64 calc_svc_time(struct task_struct *p, struct task_ctx *taskc)
{
/*
* Scale the execution time by the inverse of the weight and charge.
*/
return (taskc->last_stopping_clk - taskc->last_running_clk) / p->scx.weight;
}
static void update_stat_for_stopping(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
u64 now = bpf_ktime_get_ns();
u64 old_run_time_ns, suspended_duration, task_svc_time;
u64 old_run_time_ns, suspended_duration, task_run_time;
/*
* Update task's run_time. When a task is scheduled consecutively
@ -1500,13 +1486,12 @@ static void update_stat_for_stopping(struct task_struct *p,
*/
old_run_time_ns = taskc->run_time_ns;
suspended_duration = get_suspended_duration_and_reset(cpuc);
taskc->acc_run_time_ns += now - taskc->last_running_clk -
suspended_duration;
taskc->run_time_ns = calc_avg(taskc->run_time_ns,
taskc->acc_run_time_ns);
task_run_time = now - taskc->last_running_clk - suspended_duration;
taskc->acc_run_time_ns += task_run_time;
taskc->run_time_ns = calc_avg(taskc->run_time_ns, taskc->acc_run_time_ns);
taskc->last_stopping_clk = now;
task_svc_time = calc_svc_time(p, taskc);
taskc->svc_time += task_svc_time;
taskc->svc_time += task_run_time / p->scx.weight;
taskc->victim_cpu = (s32)LAVD_CPU_ID_NONE;
/*
@ -1519,7 +1504,13 @@ static void update_stat_for_stopping(struct task_struct *p,
/*
* Increase total service time of this CPU.
*/
cpuc->tot_svc_time += task_svc_time;
cpuc->tot_svc_time += taskc->svc_time;
/*
* Update the current service time if necessary.
*/
if (READ_ONCE(cur_svc_time) < taskc->svc_time)
WRITE_ONCE(cur_svc_time, taskc->svc_time);
}
static void update_stat_for_quiescent(struct task_struct *p,
@ -3248,8 +3239,10 @@ static s32 init_per_cpu_ctx(u64 now)
}
cpuc->turbo_core = cpuc->capacity == turbo_cap;
if (cpuc->turbo_core)
if (cpuc->turbo_core) {
bpf_cpumask_set_cpu(cpu, turbo);
debugln("CPU %d is a turbo core.", cpu);
}
}
/*

View File

@ -74,7 +74,7 @@ use rlimit::{getrlimit, setrlimit, Resource};
#[derive(Debug, Parser)]
struct Opts {
/// Automatically decide the scheduler's power mode based on system load.
/// This is a recommended mode if you don't understand the following options:
/// This is a default mode if you don't specify the following options:
#[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
autopilot: bool,
@ -151,22 +151,41 @@ struct Opts {
}
impl Opts {
fn nothing_specified(&self) -> bool {
return self.autopilot == false &&
self.autopower == false &&
self.performance == false &&
self.powersave == false &&
self.balanced == false &&
self.no_core_compaction == false &&
self.prefer_smt_core == false &&
self.prefer_little_core == false &&
self.no_prefer_turbo_core == false &&
self.no_freq_scaling == false &&
self.monitor == None &&
self.monitor_sched_samples == None;
}
fn proc(&mut self) -> Option<&mut Self> {
if self.nothing_specified() {
self.autopilot = true;
info!("Autopilot mode is enabled by default.");
return Some(self);
}
if self.performance {
self.no_core_compaction = true;
self.prefer_smt_core = false;
self.prefer_little_core = false;
self.no_prefer_turbo_core = false;
self.no_freq_scaling = true;
}
if self.powersave {
} else if self.powersave {
self.no_core_compaction = false;
self.prefer_smt_core = true;
self.prefer_little_core = true;
self.no_prefer_turbo_core = true;
self.no_freq_scaling = false;
}
if self.balanced {
} else if self.balanced {
self.no_core_compaction = false;
self.prefer_smt_core = false;
self.prefer_little_core = false;
@ -757,7 +776,7 @@ impl<'a> Scheduler<'a> {
uei_exited!(&self.skel, uei)
}
fn set_power_profile(&mut self, mode: i32) -> Result<(), u32> {
fn set_power_profile(&mut self, mode: u32) -> Result<(), u32> {
let prog = &mut self.skel.progs.set_power_profile;
let mut args = power_arg {
power_mode: mode as c_int,
@ -792,10 +811,6 @@ impl<'a> Scheduler<'a> {
}
fn update_power_profile(&mut self, prev_profile: String) -> (bool, String) {
const LAVD_PM_PERFORMANCE: s32 = 0;
const LAVD_PM_BALANCED: s32 = 1;
const LAVD_PM_POWERSAVE: s32 = 2;
let profile = Self::read_energy_profile();
if profile == prev_profile {
// If the profile is the same, skip updaring the profile for BPF.
@ -820,11 +835,19 @@ impl<'a> Scheduler<'a> {
(true, profile)
}
fn run(&mut self, autopower: bool, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
fn run(&mut self, opts: &Opts, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
let (res_ch, req_ch) = self.stats_server.channels();
let mut autopower = autopower;
let mut autopower = opts.autopower;
let mut profile = "unknown".to_string();
if opts.performance {
let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
} else if opts.powersave {
let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
} else {
let _ = self.set_power_profile(LAVD_PM_BALANCED);
}
while !shutdown.load(Ordering::Relaxed) && !self.exited() {
if autopower {
(autopower, profile) = self.update_power_profile(profile);
@ -877,7 +900,6 @@ fn init_log(opts: &Opts) {
fn main() -> Result<()> {
let mut opts = Opts::parse();
opts.proc().unwrap();
if opts.version {
println!("scx_lavd {}", *build_id::SCX_FULL_VERSION);
@ -890,6 +912,8 @@ fn main() -> Result<()> {
}
init_log(&opts);
opts.proc().unwrap();
debug!("{:#?}", opts);
let shutdown = Arc::new(AtomicBool::new(false));
@ -925,7 +949,7 @@ fn main() -> Result<()> {
*build_id::SCX_FULL_VERSION
);
info!("scx_lavd scheduler starts running.");
if !sched.run(opts.autopower, shutdown.clone())?.should_restart() {
if !sched.run(&opts, shutdown.clone())?.should_restart() {
break;
}
}

View File

@ -72,7 +72,7 @@ impl SysStats {
pub fn format_header<W: Write>(w: &mut W) -> Result<()> {
writeln!(
w,
"\x1b[93m| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m",
"\x1b[93m| {:8} | {:13} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m",
"MSEQ",
"SVC_TIME",
"# Q TASK",
@ -101,7 +101,7 @@ impl SysStats {
writeln!(
w,
"| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |",
"| {:8} | {:13} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |",
self.mseq,
self.avg_svc_time,
self.nr_queued_task,
@ -406,6 +406,5 @@ pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
intv,
|| shutdown.load(Ordering::Relaxed),
|sysstats| sysstats.format(&mut std::io::stdout()),
);
Ok(())
)
}