mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-24 11:50:23 +00:00
scx_bpfland: get rid of preferred domain
Using the turbo boosted CPUs as preferred scheduling seems to be beneficial only a very few corner cases, for example on battery-powered devices with an aggressive cpufreq governor that constantly tries to scale down the frequency (and even in this case it's probably better to not force the tasks to run on the fast CPUs, to save power). In practive the preferred domain seems to introduce more overhead than benefits overall, so let's get rid of it. This can be improved in the future adding multiple user-configurable scheduling domains. Signed-off-by: Andrea Righi <andrea.righi@linux.dev>
This commit is contained in:
parent
4fb2b09a6e
commit
079a53c689
@ -139,11 +139,6 @@ UEI_DEFINE(uei);
|
||||
*/
|
||||
private(BPFLAND) struct bpf_cpumask __kptr *primary_cpumask;
|
||||
|
||||
/*
|
||||
* Mask of preferred CPUs in the system.
|
||||
*/
|
||||
private(BPFLAND) struct bpf_cpumask __kptr *preferred_cpumask;
|
||||
|
||||
/*
|
||||
* Mask of offline CPUs, used to properly support CPU hotplugging.
|
||||
*/
|
||||
@ -290,7 +285,7 @@ static bool is_task_interactive(struct task_struct *p)
|
||||
*/
|
||||
static inline bool is_kthread(const struct task_struct *p)
|
||||
{
|
||||
return !!(p->flags & PF_KTHREAD);
|
||||
return p->flags & PF_KTHREAD;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -538,11 +533,10 @@ static void handle_sync_wakeup(struct task_struct *p)
|
||||
* to handle these mistakes in favor of a more efficient response and a reduced
|
||||
* scheduling overhead.
|
||||
*/
|
||||
static s32 pick_idle_cpu(struct task_struct *p,
|
||||
s32 prev_cpu, u64 wake_flags, bool do_preferred)
|
||||
static s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu, u64 wake_flags)
|
||||
{
|
||||
const struct cpumask *online_cpumask, *idle_smtmask, *idle_cpumask;
|
||||
struct bpf_cpumask *primary, *preferred, *l2_domain, *l3_domain;
|
||||
struct bpf_cpumask *primary, *l2_domain, *l3_domain;
|
||||
struct bpf_cpumask *p_mask, *l2_mask, *l3_mask;
|
||||
struct task_ctx *tctx;
|
||||
struct cpu_ctx *cctx;
|
||||
@ -558,9 +552,6 @@ static s32 pick_idle_cpu(struct task_struct *p,
|
||||
primary = primary_cpumask;
|
||||
if (!primary)
|
||||
return -ENOENT;
|
||||
preferred = preferred_cpumask;
|
||||
if (!preferred)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* If the task isn't allowed to use its previously used CPU it means
|
||||
@ -574,14 +565,9 @@ static s32 pick_idle_cpu(struct task_struct *p,
|
||||
/*
|
||||
* For tasks that can run only on a single CPU, we can simply verify if
|
||||
* their only allowed CPU is still idle.
|
||||
*
|
||||
* Moreover, if local_kthreads is enabled, always allow to dispatch
|
||||
* per-CPU kthreads directly to their target CPU, independently on the
|
||||
* idle state.
|
||||
*/
|
||||
if (p->nr_cpus_allowed == 1) {
|
||||
if ((is_kthread(p) && local_kthreads) ||
|
||||
scx_bpf_test_and_clear_cpu_idle(prev_cpu))
|
||||
if (scx_bpf_test_and_clear_cpu_idle(prev_cpu))
|
||||
return prev_cpu;
|
||||
return -ENOENT;
|
||||
}
|
||||
@ -628,18 +614,9 @@ static s32 pick_idle_cpu(struct task_struct *p,
|
||||
|
||||
/*
|
||||
* Determine the task's scheduling domain.
|
||||
*
|
||||
* Try to dispatch on the preferred CPUs first. If we can't find any
|
||||
* idle CPU, re-try again with the primary scheduling domain.
|
||||
*/
|
||||
if (do_preferred &&
|
||||
!bpf_cpumask_empty(cast_mask(preferred)) &&
|
||||
!bpf_cpumask_equal(cast_mask(preferred), cast_mask(primary))) {
|
||||
bpf_cpumask_and(p_mask, p->cpus_ptr, cast_mask(preferred));
|
||||
} else {
|
||||
bpf_cpumask_and(p_mask, p->cpus_ptr, cast_mask(primary));
|
||||
do_preferred = false;
|
||||
}
|
||||
bpf_cpumask_and(p_mask, p->cpus_ptr, cast_mask(primary));
|
||||
|
||||
/*
|
||||
* Determine the L2 cache domain as the intersection of the task's
|
||||
@ -730,15 +707,6 @@ static s32 pick_idle_cpu(struct task_struct *p,
|
||||
goto out_put_cpumask;
|
||||
}
|
||||
|
||||
/*
|
||||
* When considering the preferred domain (first idle CPU
|
||||
* selection pass) try to stay on the same LLC.
|
||||
*/
|
||||
if (do_preferred) {
|
||||
cpu = -ENOENT;
|
||||
goto out_put_cpumask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search for any other full-idle core in the primary domain.
|
||||
*/
|
||||
@ -780,15 +748,6 @@ static s32 pick_idle_cpu(struct task_struct *p,
|
||||
goto out_put_cpumask;
|
||||
}
|
||||
|
||||
/*
|
||||
* When considering the preferred domain (first idle CPU selection
|
||||
* pass) try to stay on the same LLC.
|
||||
*/
|
||||
if (do_preferred) {
|
||||
cpu = -ENOENT;
|
||||
goto out_put_cpumask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search for any idle CPU in the scheduling domain.
|
||||
*/
|
||||
@ -815,7 +774,7 @@ s32 BPF_STRUCT_OPS(bpfland_select_cpu, struct task_struct *p, s32 prev_cpu, u64
|
||||
{
|
||||
s32 cpu;
|
||||
|
||||
cpu = pick_idle_cpu(p, prev_cpu, wake_flags, true);
|
||||
cpu = pick_idle_cpu(p, prev_cpu, wake_flags);
|
||||
if (cpu >= 0 && !dispatch_direct_cpu(p, cpu, 0)) {
|
||||
__sync_fetch_and_add(&nr_direct_dispatches, 1);
|
||||
return cpu;
|
||||
@ -832,16 +791,18 @@ void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
{
|
||||
struct bpf_cpumask *primary;
|
||||
u64 deadline = task_deadline(p);
|
||||
s32 cpu, prev_cpu = scx_bpf_task_cpu(p);
|
||||
s32 cpu;
|
||||
|
||||
/*
|
||||
* If we couldn't find an idle CPU in ops.select_cpu(), give the task
|
||||
* another chance here to keep using the same CPU / cache / domain.
|
||||
* If local_kthreads is enabled, always dispatch per-CPU kthreads
|
||||
* directly to their target CPU.
|
||||
*/
|
||||
cpu = pick_idle_cpu(p, prev_cpu, 0, false);
|
||||
if (cpu >= 0 && !dispatch_direct_cpu(p, cpu, 0)) {
|
||||
__sync_fetch_and_add(&nr_direct_dispatches, 1);
|
||||
return;
|
||||
if (local_kthreads && is_kthread(p) && p->nr_cpus_allowed == 1) {
|
||||
cpu = scx_bpf_task_cpu(p);
|
||||
if (!dispatch_direct_cpu(p, cpu, enq_flags)) {
|
||||
__sync_fetch_and_add(&nr_direct_dispatches, 1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -868,14 +829,9 @@ void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
* task, wake them up to see whether they'd be able to steal the just
|
||||
* queued task.
|
||||
*/
|
||||
primary = primary_cpumask;
|
||||
if (!primary)
|
||||
return;
|
||||
if (bpf_cpumask_subset(cast_mask(primary), p->cpus_ptr)) {
|
||||
cpu = scx_bpf_pick_idle_cpu(cast_mask(primary), 0);
|
||||
if (cpu >= 0)
|
||||
scx_bpf_kick_cpu(cpu, 0);
|
||||
}
|
||||
cpu = scx_bpf_pick_idle_cpu(cast_mask(p->cpus_ptr), 0);
|
||||
if (cpu >= 0)
|
||||
scx_bpf_kick_cpu(cpu, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1366,34 +1322,6 @@ int enable_sibling_cpu(struct domain_arg *input)
|
||||
return err;
|
||||
}
|
||||
|
||||
SEC("syscall")
|
||||
int enable_preferred_cpu(struct cpu_arg *input)
|
||||
{
|
||||
struct bpf_cpumask *mask;
|
||||
int err = 0;
|
||||
|
||||
/* Make sure the primary CPU mask is initialized */
|
||||
err = init_cpumask(&preferred_cpumask);
|
||||
if (err)
|
||||
return err;
|
||||
/*
|
||||
* Enable the target CPU in the preferred scheduling domain.
|
||||
*/
|
||||
bpf_rcu_read_lock();
|
||||
mask = preferred_cpumask;
|
||||
if (mask) {
|
||||
s32 cpu = input->cpu_id;
|
||||
|
||||
if (cpu < 0)
|
||||
bpf_cpumask_clear(mask);
|
||||
else
|
||||
bpf_cpumask_set_cpu(cpu, mask);
|
||||
}
|
||||
bpf_rcu_read_unlock();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
SEC("syscall")
|
||||
int enable_primary_cpu(struct cpu_arg *input)
|
||||
{
|
||||
@ -1481,11 +1409,6 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(bpfland_init)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Initialize the preferred scheduling domain */
|
||||
err = init_cpumask(&preferred_cpumask);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,6 @@ const SCHEDULER_NAME: &'static str = "scx_bpfland";
|
||||
|
||||
#[derive(PartialEq)]
|
||||
enum Powermode {
|
||||
Turbo,
|
||||
Performance,
|
||||
Powersave,
|
||||
Any,
|
||||
@ -66,8 +65,6 @@ fn get_primary_cpus(mode: Powermode) -> std::io::Result<Vec<usize>> {
|
||||
.into_iter()
|
||||
.flat_map(|core| core.cpus())
|
||||
.filter_map(|(cpu_id, cpu)| match (&mode, &cpu.core_type) {
|
||||
// Turbo mode: only add turbo-boosted CPUs
|
||||
(Powermode::Turbo, CoreType::Big { turbo: true }) |
|
||||
// Performance mode: add all the Big CPUs (either Turbo or non-Turbo)
|
||||
(Powermode::Performance, CoreType::Big { .. }) |
|
||||
// Powersave mode: add all the Little CPUs
|
||||
@ -158,15 +155,6 @@ struct Opts {
|
||||
#[clap(short = 'k', long, action = clap::ArgAction::SetTrue)]
|
||||
local_kthreads: bool,
|
||||
|
||||
/// Specifies a group of preferred CPUs, represented as a bitmask in hex (e.g., 0xff), that the
|
||||
/// scheduler will try to prioritize to dispatch tasks.
|
||||
///
|
||||
/// Special values:
|
||||
/// - "auto" = automaticlly detect the fastest CPUs based on the current scheduler and system
|
||||
/// energy profiles.
|
||||
#[clap(short = 'M', long, default_value = "auto")]
|
||||
preferred_domain: String,
|
||||
|
||||
/// Specifies the initial set of CPUs, represented as a bitmask in hex (e.g., 0xff), that the
|
||||
/// scheduler will use to dispatch tasks, until the system becomes saturated, at which point
|
||||
/// tasks may overflow to other available CPUs.
|
||||
@ -288,9 +276,6 @@ impl<'a> Scheduler<'a> {
|
||||
|
||||
// Initialize the primary scheduling domain and the preferred domain.
|
||||
let energy_profile = Self::read_energy_profile();
|
||||
if let Err(err) = Self::init_preferred_domain(&mut skel, &opts.preferred_domain) {
|
||||
warn!("failed to initialize preferred domain: error {}", err);
|
||||
}
|
||||
if let Err(err) = Self::init_energy_domain(&mut skel, &opts.primary_domain, &energy_profile)
|
||||
{
|
||||
warn!("failed to initialize primary domain: error {}", err);
|
||||
@ -372,28 +357,6 @@ impl<'a> Scheduler<'a> {
|
||||
res.unwrap_or_else(|_: String| "none".to_string())
|
||||
}
|
||||
|
||||
fn enable_preferred_cpu(skel: &mut BpfSkel<'_>, cpu: i32) -> Result<(), u32> {
|
||||
let prog = &mut skel.progs.enable_preferred_cpu;
|
||||
let mut args = cpu_arg {
|
||||
cpu_id: cpu as c_int,
|
||||
};
|
||||
let input = ProgramInput {
|
||||
context_in: Some(unsafe {
|
||||
std::slice::from_raw_parts_mut(
|
||||
&mut args as *mut _ as *mut u8,
|
||||
std::mem::size_of_val(&args),
|
||||
)
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
let out = prog.test_run(input).unwrap();
|
||||
if out.return_value != 0 {
|
||||
return Err(out.return_value);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn epp_to_cpumask(profile: Powermode) -> Result<Cpumask> {
|
||||
let mut cpus = get_primary_cpus(profile).unwrap_or(Vec::new());
|
||||
if cpus.is_empty() {
|
||||
@ -402,32 +365,6 @@ impl<'a> Scheduler<'a> {
|
||||
Cpumask::from_str(&cpus_to_cpumask(&cpus))
|
||||
}
|
||||
|
||||
fn init_preferred_domain(skel: &mut BpfSkel<'_>, preferred_domain: &String) -> Result<()> {
|
||||
let domain = match preferred_domain.as_str() {
|
||||
"auto" => Self::epp_to_cpumask(Powermode::Turbo)?,
|
||||
&_ => Cpumask::from_str(&preferred_domain)?,
|
||||
};
|
||||
|
||||
info!("preferred CPU domain = 0x{:x}", domain);
|
||||
|
||||
// Clear the preferred domain by passing a negative CPU id.
|
||||
if let Err(err) = Self::enable_preferred_cpu(skel, -1) {
|
||||
warn!("failed to reset preferred domain: error {}", err);
|
||||
}
|
||||
for cpu in 0..*NR_CPU_IDS {
|
||||
if domain.test_cpu(cpu) {
|
||||
if let Err(err) = Self::enable_preferred_cpu(skel, cpu as i32) {
|
||||
warn!(
|
||||
"failed to add CPU {} to preferred domain: error {}",
|
||||
cpu, err
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn init_energy_domain(
|
||||
skel: &mut BpfSkel<'_>,
|
||||
primary_domain: &String,
|
||||
@ -504,11 +441,6 @@ impl<'a> Scheduler<'a> {
|
||||
self.energy_profile = energy_profile.clone();
|
||||
|
||||
if self.opts.primary_domain == "auto" {
|
||||
if let Err(err) =
|
||||
Self::init_preferred_domain(&mut self.skel, &self.opts.preferred_domain)
|
||||
{
|
||||
warn!("failed to refresh preferred domain: error {}", err);
|
||||
}
|
||||
if let Err(err) = Self::init_energy_domain(
|
||||
&mut self.skel,
|
||||
&self.opts.primary_domain,
|
||||
|
Loading…
Reference in New Issue
Block a user