Merge pull request #621 from multics69/lavd-greedy-fix

scx_lavd: improve greedy ratio calculation and more
This commit is contained in:
Changwoo Min 2024-09-07 10:52:00 +09:00 committed by GitHub
commit 17e0e08e6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 70 additions and 54 deletions

View File

@ -445,6 +445,12 @@ static bool is_greedy(struct task_ctx *taskc)
return taskc->greedy_ratio > 1000; return taskc->greedy_ratio > 1000;
} }
static bool is_eligible(struct task_ctx *taskc)
{
return !is_greedy(taskc);
}
static __always_inline static __always_inline
int submit_task_ctx(struct task_struct *p, struct task_ctx *taskc, u32 cpu_id) int submit_task_ctx(struct task_struct *p, struct task_ctx *taskc, u32 cpu_id)
{ {
@ -764,6 +770,7 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
static void update_sys_stat_next(struct sys_stat_ctx *c) static void update_sys_stat_next(struct sys_stat_ctx *c)
{ {
static int cnt = 0; static int cnt = 0;
u64 avg_svc_time = 0;
/* /*
* Update the CPU utilization to the next version. * Update the CPU utilization to the next version.
@ -788,8 +795,10 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
stat_next->nr_violation = stat_next->nr_violation =
calc_avg32(stat_cur->nr_violation, c->nr_violation); calc_avg32(stat_cur->nr_violation, c->nr_violation);
stat_next->avg_svc_time = (c->nr_sched == 0) ? 0 : if (c->nr_sched > 0)
c->tot_svc_time / c->nr_sched; avg_svc_time = c->tot_svc_time / c->nr_sched;
stat_next->avg_svc_time =
calc_avg(stat_cur->avg_svc_time, avg_svc_time);
stat_next->nr_queued_task = stat_next->nr_queued_task =
calc_avg(stat_cur->nr_queued_task, c->nr_queued_task); calc_avg(stat_cur->nr_queued_task, c->nr_queued_task);
@ -1168,11 +1177,6 @@ static u64 calc_freq_factor(u64 freq)
return ft + 1; return ft + 1;
} }
static bool is_eligible(struct task_ctx *taskc)
{
return taskc->greedy_ratio <= 1000;
}
static s64 calc_static_prio_factor(struct task_struct *p) static s64 calc_static_prio_factor(struct task_struct *p)
{ {
/* /*
@ -1386,12 +1390,6 @@ static void update_stat_for_running(struct task_struct *p,
*/ */
advance_cur_logical_clk(taskc); advance_cur_logical_clk(taskc);
/*
* Update the current service time if necessary.
*/
if (cur_svc_time < taskc->svc_time)
WRITE_ONCE(cur_svc_time, taskc->svc_time);
/* /*
* Since this is the start of a new schedule for @p, we update run * Since this is the start of a new schedule for @p, we update run
* frequency in a second using an exponential weighted moving average. * frequency in a second using an exponential weighted moving average.
@ -1460,34 +1458,22 @@ static void update_stat_for_running(struct task_struct *p,
if (taskc->victim_cpu >= 0) if (taskc->victim_cpu >= 0)
cpuc->nr_preemption++; cpuc->nr_preemption++;
if (is_lat_cri(taskc, stat_cur)) { if (is_lat_cri(taskc, stat_cur))
cpuc->nr_lat_cri++; cpuc->nr_lat_cri++;
// debugln("------------------------ lc = %llu", cpuc->nr__cri);
}
if (is_perf_cri(taskc, stat_cur)) { if (is_perf_cri(taskc, stat_cur))
cpuc->nr_perf_cri++; cpuc->nr_perf_cri++;
// debugln("------------------------ pc = %llu", cpuc->nr_perf_cri);
}
if (is_greedy(taskc)) if (is_greedy(taskc))
cpuc->nr_greedy++; cpuc->nr_greedy++;
} }
static u64 calc_svc_time(struct task_struct *p, struct task_ctx *taskc)
{
/*
* Scale the execution time by the inverse of the weight and charge.
*/
return (taskc->last_stopping_clk - taskc->last_running_clk) / p->scx.weight;
}
static void update_stat_for_stopping(struct task_struct *p, static void update_stat_for_stopping(struct task_struct *p,
struct task_ctx *taskc, struct task_ctx *taskc,
struct cpu_ctx *cpuc) struct cpu_ctx *cpuc)
{ {
u64 now = bpf_ktime_get_ns(); u64 now = bpf_ktime_get_ns();
u64 old_run_time_ns, suspended_duration, task_svc_time; u64 old_run_time_ns, suspended_duration, task_run_time;
/* /*
* Update task's run_time. When a task is scheduled consecutively * Update task's run_time. When a task is scheduled consecutively
@ -1500,13 +1486,12 @@ static void update_stat_for_stopping(struct task_struct *p,
*/ */
old_run_time_ns = taskc->run_time_ns; old_run_time_ns = taskc->run_time_ns;
suspended_duration = get_suspended_duration_and_reset(cpuc); suspended_duration = get_suspended_duration_and_reset(cpuc);
taskc->acc_run_time_ns += now - taskc->last_running_clk - task_run_time = now - taskc->last_running_clk - suspended_duration;
suspended_duration; taskc->acc_run_time_ns += task_run_time;
taskc->run_time_ns = calc_avg(taskc->run_time_ns, taskc->run_time_ns = calc_avg(taskc->run_time_ns, taskc->acc_run_time_ns);
taskc->acc_run_time_ns);
taskc->last_stopping_clk = now; taskc->last_stopping_clk = now;
task_svc_time = calc_svc_time(p, taskc);
taskc->svc_time += task_svc_time; taskc->svc_time += task_run_time / p->scx.weight;
taskc->victim_cpu = (s32)LAVD_CPU_ID_NONE; taskc->victim_cpu = (s32)LAVD_CPU_ID_NONE;
/* /*
@ -1519,7 +1504,13 @@ static void update_stat_for_stopping(struct task_struct *p,
/* /*
* Increase total service time of this CPU. * Increase total service time of this CPU.
*/ */
cpuc->tot_svc_time += task_svc_time; cpuc->tot_svc_time += taskc->svc_time;
/*
* Update the current service time if necessary.
*/
if (READ_ONCE(cur_svc_time) < taskc->svc_time)
WRITE_ONCE(cur_svc_time, taskc->svc_time);
} }
static void update_stat_for_quiescent(struct task_struct *p, static void update_stat_for_quiescent(struct task_struct *p,
@ -3248,8 +3239,10 @@ static s32 init_per_cpu_ctx(u64 now)
} }
cpuc->turbo_core = cpuc->capacity == turbo_cap; cpuc->turbo_core = cpuc->capacity == turbo_cap;
if (cpuc->turbo_core) if (cpuc->turbo_core) {
bpf_cpumask_set_cpu(cpu, turbo); bpf_cpumask_set_cpu(cpu, turbo);
debugln("CPU %d is a turbo core.", cpu);
}
} }
/* /*

View File

@ -74,7 +74,7 @@ use rlimit::{getrlimit, setrlimit, Resource};
#[derive(Debug, Parser)] #[derive(Debug, Parser)]
struct Opts { struct Opts {
/// Automatically decide the scheduler's power mode based on system load. /// Automatically decide the scheduler's power mode based on system load.
/// This is a recommended mode if you don't understand the following options: /// This is a default mode if you don't specify the following options:
#[clap(long = "autopilot", action = clap::ArgAction::SetTrue)] #[clap(long = "autopilot", action = clap::ArgAction::SetTrue)]
autopilot: bool, autopilot: bool,
@ -151,22 +151,41 @@ struct Opts {
} }
impl Opts { impl Opts {
fn nothing_specified(&self) -> bool {
return self.autopilot == false &&
self.autopower == false &&
self.performance == false &&
self.powersave == false &&
self.balanced == false &&
self.no_core_compaction == false &&
self.prefer_smt_core == false &&
self.prefer_little_core == false &&
self.no_prefer_turbo_core == false &&
self.no_freq_scaling == false &&
self.monitor == None &&
self.monitor_sched_samples == None;
}
fn proc(&mut self) -> Option<&mut Self> { fn proc(&mut self) -> Option<&mut Self> {
if self.nothing_specified() {
self.autopilot = true;
info!("Autopilot mode is enabled by default.");
return Some(self);
}
if self.performance { if self.performance {
self.no_core_compaction = true; self.no_core_compaction = true;
self.prefer_smt_core = false; self.prefer_smt_core = false;
self.prefer_little_core = false; self.prefer_little_core = false;
self.no_prefer_turbo_core = false; self.no_prefer_turbo_core = false;
self.no_freq_scaling = true; self.no_freq_scaling = true;
} } else if self.powersave {
if self.powersave {
self.no_core_compaction = false; self.no_core_compaction = false;
self.prefer_smt_core = true; self.prefer_smt_core = true;
self.prefer_little_core = true; self.prefer_little_core = true;
self.no_prefer_turbo_core = true; self.no_prefer_turbo_core = true;
self.no_freq_scaling = false; self.no_freq_scaling = false;
} } else if self.balanced {
if self.balanced {
self.no_core_compaction = false; self.no_core_compaction = false;
self.prefer_smt_core = false; self.prefer_smt_core = false;
self.prefer_little_core = false; self.prefer_little_core = false;
@ -757,7 +776,7 @@ impl<'a> Scheduler<'a> {
uei_exited!(&self.skel, uei) uei_exited!(&self.skel, uei)
} }
fn set_power_profile(&mut self, mode: i32) -> Result<(), u32> { fn set_power_profile(&mut self, mode: u32) -> Result<(), u32> {
let prog = &mut self.skel.progs.set_power_profile; let prog = &mut self.skel.progs.set_power_profile;
let mut args = power_arg { let mut args = power_arg {
power_mode: mode as c_int, power_mode: mode as c_int,
@ -792,10 +811,6 @@ impl<'a> Scheduler<'a> {
} }
fn update_power_profile(&mut self, prev_profile: String) -> (bool, String) { fn update_power_profile(&mut self, prev_profile: String) -> (bool, String) {
const LAVD_PM_PERFORMANCE: s32 = 0;
const LAVD_PM_BALANCED: s32 = 1;
const LAVD_PM_POWERSAVE: s32 = 2;
let profile = Self::read_energy_profile(); let profile = Self::read_energy_profile();
if profile == prev_profile { if profile == prev_profile {
// If the profile is the same, skip updaring the profile for BPF. // If the profile is the same, skip updaring the profile for BPF.
@ -820,11 +835,19 @@ impl<'a> Scheduler<'a> {
(true, profile) (true, profile)
} }
fn run(&mut self, autopower: bool, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> { fn run(&mut self, opts: &Opts, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
let (res_ch, req_ch) = self.stats_server.channels(); let (res_ch, req_ch) = self.stats_server.channels();
let mut autopower = autopower; let mut autopower = opts.autopower;
let mut profile = "unknown".to_string(); let mut profile = "unknown".to_string();
if opts.performance {
let _ = self.set_power_profile(LAVD_PM_PERFORMANCE);
} else if opts.powersave {
let _ = self.set_power_profile(LAVD_PM_POWERSAVE);
} else {
let _ = self.set_power_profile(LAVD_PM_BALANCED);
}
while !shutdown.load(Ordering::Relaxed) && !self.exited() { while !shutdown.load(Ordering::Relaxed) && !self.exited() {
if autopower { if autopower {
(autopower, profile) = self.update_power_profile(profile); (autopower, profile) = self.update_power_profile(profile);
@ -877,7 +900,6 @@ fn init_log(opts: &Opts) {
fn main() -> Result<()> { fn main() -> Result<()> {
let mut opts = Opts::parse(); let mut opts = Opts::parse();
opts.proc().unwrap();
if opts.version { if opts.version {
println!("scx_lavd {}", *build_id::SCX_FULL_VERSION); println!("scx_lavd {}", *build_id::SCX_FULL_VERSION);
@ -890,6 +912,8 @@ fn main() -> Result<()> {
} }
init_log(&opts); init_log(&opts);
opts.proc().unwrap();
debug!("{:#?}", opts); debug!("{:#?}", opts);
let shutdown = Arc::new(AtomicBool::new(false)); let shutdown = Arc::new(AtomicBool::new(false));
@ -925,7 +949,7 @@ fn main() -> Result<()> {
*build_id::SCX_FULL_VERSION *build_id::SCX_FULL_VERSION
); );
info!("scx_lavd scheduler starts running."); info!("scx_lavd scheduler starts running.");
if !sched.run(opts.autopower, shutdown.clone())?.should_restart() { if !sched.run(&opts, shutdown.clone())?.should_restart() {
break; break;
} }
} }

View File

@ -72,7 +72,7 @@ impl SysStats {
pub fn format_header<W: Write>(w: &mut W) -> Result<()> { pub fn format_header<W: Write>(w: &mut W) -> Result<()> {
writeln!( writeln!(
w, w,
"\x1b[93m| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m", "\x1b[93m| {:8} | {:13} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m",
"MSEQ", "MSEQ",
"SVC_TIME", "SVC_TIME",
"# Q TASK", "# Q TASK",
@ -101,7 +101,7 @@ impl SysStats {
writeln!( writeln!(
w, w,
"| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |", "| {:8} | {:13} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |",
self.mseq, self.mseq,
self.avg_svc_time, self.avg_svc_time,
self.nr_queued_task, self.nr_queued_task,
@ -406,6 +406,5 @@ pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
intv, intv,
|| shutdown.load(Ordering::Relaxed), || shutdown.load(Ordering::Relaxed),
|sysstats| sysstats.format(&mut std::io::stdout()), |sysstats| sysstats.format(&mut std::io::stdout()),
); )
Ok(())
} }