Merge pull request #566 from multics69/lavd-turbo

scx_lavd: prioritize the turbo boost-able cores
This commit is contained in:
Changwoo Min 2024-08-27 08:47:25 +09:00 committed by GitHub
commit 09cff560aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 112 additions and 35 deletions

View File

@ -1087,7 +1087,6 @@ dependencies = [
"libbpf-rs",
"libc",
"log",
"nix 0.29.0",
"ordered-float 3.9.2",
"plain",
"rlimit",

View File

@ -27,7 +27,6 @@ simplelog = "0.12"
static_assertions = "1.1.0"
rlimit = "0.10.1"
plain = "0.2.3"
nix = { version = "0.29.0", features = ["signal"] }
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }

View File

@ -82,6 +82,7 @@ enum consts {
LAVD_SYS_STAT_INTERVAL_NS = (25ULL * NSEC_PER_MSEC),
LAVD_CC_PER_CORE_MAX_CTUIL = 500, /* maximum per-core CPU utilization */
LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */
LAVD_CC_NR_ACTIVE_MIN = 1, /* num of mininum active cores */
LAVD_CC_NR_OVRFLW = 1, /* num of overflow cores */
LAVD_CC_CPU_PIN_INTERVAL = (3ULL * LAVD_TIME_ONE_SEC),
@ -193,6 +194,7 @@ struct cpu_ctx {
*/
u16 capacity; /* CPU capacity based on 1000 */
u8 big_core; /* is it a big core? */
u8 turbo_core; /* is it a turbo core? */
u8 cpdom_id; /* compute domain id (== dsq_id) */
u8 cpdom_alt_id; /* compute domain id of anternative type (== dsq_id) */
u8 cpdom_poll_pos; /* index to check if a DSQ of a compute domain is starving */

View File

@ -200,10 +200,11 @@ static volatile u64 nr_cpus_big;
static struct sys_stat __sys_stats[2];
static volatile int __sys_stat_idx;
private(LAVD) struct bpf_cpumask __kptr *active_cpumask; /* CPU mask for active CPUs */
private(LAVD) struct bpf_cpumask __kptr *ovrflw_cpumask; /* CPU mask for overflow CPUs */
private(LAVD) struct bpf_cpumask __kptr *turbo_cpumask; /* CPU mask for turbo CPUs */
private(LAVD) struct bpf_cpumask __kptr *big_cpumask; /* CPU mask for big CPUs */
private(LAVD) struct bpf_cpumask __kptr *little_cpumask; /* CPU mask for little CPUs */
private(LAVD) struct bpf_cpumask __kptr *active_cpumask; /* CPU mask for active CPUs */
private(LAVD) struct bpf_cpumask __kptr *ovrflw_cpumask; /* CPU mask for overflow CPUs */
private(LAVD) struct bpf_cpumask cpdom_cpumask[LAVD_CPDOM_MAX_NR]; /* CPU mask for each compute domain */
/*
@ -229,6 +230,7 @@ static u64 cur_svc_time;
*/
const volatile bool no_core_compaction;
const volatile bool no_freq_scaling;
const volatile bool no_prefer_turbo_core;
const volatile u32 is_smt_active;
const volatile u8 verbose;
@ -659,8 +661,14 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
c->new_util = (compute * LAVD_CPU_UTIL_MAX) / c->duration;
cpuc->util = calc_avg(cpuc->util, c->new_util);
if (cpuc->util > LAVD_CC_PER_CORE_MAX_CTUIL)
c->nr_violation += 1000;
if (cpuc->turbo_core) {
if (cpuc->util > LAVD_CC_PER_TURBO_CORE_MAX_CTUIL)
c->nr_violation += 1000;
}
else {
if (cpuc->util > LAVD_CC_PER_CORE_MAX_CTUIL)
c->nr_violation += 1000;
}
/*
* Accmulate system-wide idle time
@ -1391,21 +1399,41 @@ static s32 pick_idle_cpu(struct task_struct *p, struct task_ctx *taskc,
if (bpf_cpumask_empty(cast_mask(a_cpumask)))
goto start_omask;
if (is_perf_cri(taskc, stat_cur))
if (is_perf_cri(taskc, stat_cur) || no_core_compaction ) {
bpf_cpumask_and(t_cpumask, cast_mask(a_cpumask), cast_mask(big));
else
}
else {
bpf_cpumask_and(t_cpumask, cast_mask(a_cpumask), cast_mask(little));
goto start_llc_mask;
}
bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(cpdom_mask_prev));
/*
* Pick an idle core among turbo boost-enabled CPUs with a matching
* core type.
*/
start_turbo_mask:
if (no_prefer_turbo_core || !turbo_cpumask)
goto start_llc_mask;
bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(turbo_cpumask));
if (bpf_cpumask_empty(cast_mask(t2_cpumask)))
goto start_tmask;
goto start_llc_mask;
cpu_id = pick_idle_cpu_in(t2_cpumask);
if (cpu_id >= 0) {
*is_idle = true;
goto unlock_out;
}
/*
* Pick an idle core among active CPUs with a matching core type within
* the prev CPU's LLC domain.
*/
start_t2mask:
start_llc_mask:
bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(cpdom_mask_prev));
if (bpf_cpumask_empty(cast_mask(t2_cpumask)))
goto start_tmask;
cpu_id = pick_idle_cpu_in(t2_cpumask);
if (cpu_id >= 0) {
*is_idle = true;
@ -2755,6 +2783,10 @@ static int init_cpumasks(void)
if (err)
goto out;
err = calloc_cpumask(&turbo_cpumask);
if (err)
goto out;
err = calloc_cpumask(&big_cpumask);
if (err)
goto out;
@ -2777,11 +2809,6 @@ out:
static u16 get_cpuperf_cap(s32 cpu)
{
/*
* If CPU's capacitiy values are all 1024, then let's just use the
* capacity value from userspace, which are calculated using each CPU's
* maximum frequency.
*/
if (cpu >= 0 && cpu < LAVD_CPU_ID_MAX)
return __cpu_capacity_hint[cpu];
@ -2789,25 +2816,51 @@ static u16 get_cpuperf_cap(s32 cpu)
return 1;
}
static u16 get_cputurbo_cap(void)
{
u16 turbo_cap = 0;
int nr_turbo = 0, cpu;
/*
* Find the maximum CPU frequency
*/
for (cpu = 0; cpu < LAVD_CPU_ID_MAX; cpu++) {
if (__cpu_capacity_hint[cpu] > turbo_cap) {
turbo_cap = __cpu_capacity_hint[cpu];
nr_turbo++;
}
}
/*
* If all CPU's frequencies are the same, ignore the turbo.
*/
if (nr_turbo <= 1)
turbo_cap = 0;
return turbo_cap;
}
static s32 init_per_cpu_ctx(u64 now)
{
struct cpu_ctx *cpuc;
struct bpf_cpumask *big, *little, *active, *ovrflw, *cd_cpumask;
struct bpf_cpumask *turbo, *big, *little, *active, *ovrflw, *cd_cpumask;
struct cpdom_ctx *cpdomc;
int cpu, i, j, err = 0;
u64 cpdom_id;
u32 sum_capacity = 0, avg_capacity;
u16 turbo_cap;
bpf_rcu_read_lock();
/*
* Prepare cpumasks.
*/
turbo = turbo_cpumask;
big = big_cpumask;
little = little_cpumask;
active = active_cpumask;
ovrflw = ovrflw_cpumask;
if (!big|| !little || !active || !ovrflw) {
if (!turbo || !big|| !little || !active || !ovrflw) {
scx_bpf_error("Failed to prepare cpumasks.");
err = -ENOMEM;
goto unlock_out;
@ -2848,6 +2901,11 @@ static s32 init_per_cpu_ctx(u64 now)
sum_capacity += cpuc->capacity;
}
/*
* Get turbo capacitiy.
*/
turbo_cap = get_cputurbo_cap();
/*
* Classify CPU into BIG or little cores based on their average capacity.
*/
@ -2874,6 +2932,10 @@ static s32 init_per_cpu_ctx(u64 now)
bpf_cpumask_set_cpu(cpu, little);
bpf_cpumask_set_cpu(cpu, ovrflw);
}
cpuc->turbo_core = cpuc->capacity == turbo_cap;
if (cpuc->turbo_core)
bpf_cpumask_set_cpu(cpu, turbo);
}
/*

View File

@ -30,6 +30,7 @@ use std::mem::MaybeUninit;
use std::str;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::thread::ThreadId;
use std::time::Duration;
@ -59,7 +60,6 @@ use scx_utils::Topology;
use scx_utils::UserExitInfo;
use itertools::iproduct;
use nix::sys::signal;
use plain::Plain;
use rlimit::{getrlimit, setrlimit, Resource};
@ -101,6 +101,10 @@ struct Opts {
#[clap(long = "prefer-little-core", action = clap::ArgAction::SetTrue)]
prefer_little_core: bool,
/// Do not specifically prefer to schedule on turbo cores.
#[clap(long = "no-prefer-turbo-core", action = clap::ArgAction::SetTrue)]
no_prefer_turbo_core: bool,
/// Disable controlling the CPU frequency. In order to improve latency and responsiveness of
/// performance-critical tasks, scx_lavd increases the CPU frequency even if CPU usage is low.
/// See main.bpf.c for more info. Normally set by the power mode, but can be set independently
@ -117,6 +121,10 @@ struct Opts {
/// times to increase verbosity.
#[clap(short = 'v', long, action = clap::ArgAction::Count)]
verbose: u8,
/// Print scheduler version and exit.
#[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
version: bool,
}
impl Opts {
@ -125,18 +133,21 @@ impl Opts {
self.no_core_compaction = true;
self.prefer_smt_core = false;
self.prefer_little_core = false;
self.no_prefer_turbo_core = false;
self.no_freq_scaling = true;
}
if self.powersave {
self.no_core_compaction = false;
self.prefer_smt_core = true;
self.prefer_little_core = true;
self.no_prefer_turbo_core = true;
self.no_freq_scaling = false;
}
if self.balanced {
self.no_core_compaction = false;
self.prefer_smt_core = false;
self.prefer_little_core = false;
self.no_prefer_turbo_core = false;
self.no_freq_scaling = false;
}
@ -512,6 +523,7 @@ impl<'a> Scheduler<'a> {
skel.maps.bss_data.nr_cpus_onln = nr_cpus_onln;
skel.maps.rodata_data.no_core_compaction = opts.no_core_compaction;
skel.maps.rodata_data.no_freq_scaling = opts.no_freq_scaling;
skel.maps.rodata_data.no_prefer_turbo_core = opts.no_prefer_turbo_core;
skel.maps.rodata_data.is_smt_active = match FlatTopology::is_smt_active() {
Ok(ret) => (ret == 1) as u32,
Err(_) => 0,
@ -623,10 +635,14 @@ impl<'a> Scheduler<'a> {
})
}
fn run(&mut self) -> Result<UserExitInfo> {
pub fn exited(&mut self) -> bool {
uei_exited!(&self.skel, uei)
}
fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
let (res_ch, req_ch) = self.stats_server.channels();
while self.running() {
while !shutdown.load(Ordering::Relaxed) && !self.exited() {
match req_ch.recv_timeout(Duration::from_secs(1)) {
Ok(req) => {
let res = self.stats_req_to_res(&req)?;
@ -676,26 +692,25 @@ extern "C" fn handle_sigint(_: libc::c_int, _: *mut libc::siginfo_t, _: *mut lib
RUNNING.store(false, Ordering::SeqCst);
}
fn init_signal_handlers() {
// Ctrl-c for termination
unsafe {
let sigint_action = signal::SigAction::new(
signal::SigHandler::SigAction(handle_sigint),
signal::SaFlags::empty(),
signal::SigSet::empty(),
);
signal::sigaction(signal::SIGINT, &sigint_action).unwrap();
}
}
fn main() -> Result<()> {
let mut opts = Opts::parse();
opts.proc().unwrap();
if opts.version {
println!("scx_lavd {}", *build_id::SCX_FULL_VERSION);
return Ok(());
}
init_log(&opts);
init_signal_handlers();
debug!("{:#?}", opts);
let shutdown = Arc::new(AtomicBool::new(false));
let shutdown_clone = shutdown.clone();
ctrlc::set_handler(move || {
shutdown_clone.store(true, Ordering::Relaxed);
})
.context("Error setting Ctrl-C handler")?;
if let Some(nr_samples) = opts.monitor_sched_samples {
let jh = std::thread::spawn(move || stats::monitor_sched_samples(nr_samples).unwrap());
let _ = jh.join();
@ -710,7 +725,7 @@ fn main() -> Result<()> {
*build_id::SCX_FULL_VERSION
);
info!("scx_lavd scheduler starts running.");
if !sched.run()?.should_restart() {
if !sched.run(shutdown.clone())?.should_restart() {
break;
}
}