scx_rustland_core: switch to FIFO when system is underutilized

Provide a knob in scx_rustland_core to automatically turn the scheduler
into a simple FIFO when the system is underutilized.

This choice is based on the assumption that, in the case of system
underutilization (less tasks running than the amount of available CPUs),
the best scheduling policy is FIFO.

With this option enabled the scheduler starts in FIFO mode. If most of
the CPUs are busy (nr_running >= num_cpus - 1), the scheduler
immediately exits from FIFO mode and starts to apply the logic
implemented by the user-space component. Then the scheduler can switch
back to FIFO if there are no tasks waiting to be scheduled (evaluated
using a moving average).

This option can be enabled/disabled by the user-space scheduler using
the fifo_sched parameter in BpfScheduler: if set, the BPF component will
periodically check for system utilization and switch back and forth to
FIFO mode based on that.

This allows to improve performance of workloads that are using a small
amount of the available CPUs in the system, while still maintaining the
same good level of performance for interactive tasks when the system is
over commissioned.

In certain video games, such as Baldur's Gate 3 or Counter-Strike 2,
running in "normal" system conditions, we can experience a boost in fps
of approximately 4-8% with this change applied.

Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
This commit is contained in:
Andrea Righi 2024-05-07 07:56:49 +02:00
parent 3ad634c293
commit d25675ff44
4 changed files with 93 additions and 4 deletions

View File

@ -183,6 +183,7 @@ impl<'cb> BpfScheduler<'cb> {
exit_dump_len: u32,
full_user: bool,
low_power: bool,
fifo_sched: bool,
debug: bool,
) -> Result<Self> {
// Open the BPF prog first for verification.
@ -247,6 +248,7 @@ impl<'cb> BpfScheduler<'cb> {
skel.rodata_mut().debug = debug;
skel.rodata_mut().full_user = full_user;
skel.rodata_mut().low_power = low_power;
skel.rodata_mut().fifo_sched = fifo_sched;
// Attach BPF scheduler.
let mut skel = scx_ops_load!(skel, rustland, uei)?;

View File

@ -105,6 +105,19 @@ const volatile bool full_user;
*/
const volatile bool low_power;
/*
* Automatically switch to simple FIFO scheduling during periods of system
* underutilization to minimize unnecessary scheduling overhead.
*
* 'fifo_sched' can be used by the user-space scheduler to enable/disable this
* behavior.
*
* 'is_fifo_enabled' indicates whether the scheduling has switched to FIFO mode
* or regular scheduling mode.
*/
const volatile bool fifo_sched;
static bool is_fifo_enabled;
/* Allow to use bpf_printk() only when @debug is set */
#define dbg_msg(_fmt, ...) do { \
if (debug) \
@ -200,6 +213,13 @@ struct {
__type(value, struct usersched_timer);
} usersched_timer SEC(".maps");
/*
* Time period of the scheduler heartbeat, used to periodically kick the the
* scheduler and check if we need to switch to FIFO mode or regular
* scheduling (default 100ms).
*/
#define USERSCHED_TIMER_NS (NSEC_PER_SEC / 10)
/*
* Map of allocated CPUs.
*/
@ -533,6 +553,20 @@ void BPF_STRUCT_OPS(rustland_enqueue, struct task_struct *p, u64 enq_flags)
return;
}
/*
* Dispatch directly to the target CPU DSQ if the scheduler is set to
* FIFO mode.
*/
if (is_fifo_enabled) {
s32 cpu = scx_bpf_task_cpu(p);
scx_bpf_dispatch(p, cpu_to_dsq(cpu), slice_ns, enq_flags);
scx_bpf_kick_cpu(cpu, __COMPAT_SCX_KICK_IDLE);
__sync_fetch_and_add(&nr_kernel_dispatches, 1);
return;
}
/*
* Add tasks to the @queued list, they will be processed by the
* user-space scheduler.
@ -739,7 +773,6 @@ void BPF_STRUCT_OPS(rustland_cpu_release, s32 cpu,
set_usersched_needed();
}
/*
* A new task @p is being created.
*
@ -791,6 +824,43 @@ void BPF_STRUCT_OPS(rustland_exit_task, struct task_struct *p,
__sync_fetch_and_add(&nr_queued, 1);
}
/*
* Check whether we can switch to FIFO mode if the system is underutilized.
*/
static bool should_enable_fifo(void)
{
/* Moving average of the tasks that are waiting to be scheduled */
static u64 nr_waiting_avg;
/* Current amount of tasks waiting to be scheduled */
u64 nr_waiting = nr_queued + nr_scheduled;
if (!fifo_sched)
return false;
/*
* Exiting from FIFO mode requires to have almost all the CPUs busy.
*/
if (is_fifo_enabled)
return nr_running < num_possible_cpus - 1;
/*
* We are not in FIFO mode, check for the task waiting to be processed
* by the user-space scheduler.
*
* We want to evaluate a moving average of the waiting tasks to prevent
* bouncing too often between FIFO mode and user-space mode.
*/
nr_waiting_avg = (nr_waiting_avg + nr_waiting) / 2;
/*
* The condition to enter in FIFO mode is to have no tasks (in average)
* that are waiting to be scheduled.
*
* Exiting from FIFO mode requires to have almost all the CPUs busy.
*/
return nr_waiting_avg == 0;
}
/*
* Heartbeat scheduler timer callback.
*
@ -807,8 +877,11 @@ static int usersched_timer_fn(void *map, int *key, struct bpf_timer *timer)
/* Kick the scheduler */
set_usersched_needed();
/* Update flag that determines if FIFO scheduling needs to be enabled */
is_fifo_enabled = should_enable_fifo();
/* Re-arm the timer */
err = bpf_timer_start(timer, NSEC_PER_SEC, 0);
err = bpf_timer_start(timer, USERSCHED_TIMER_NS, 0);
if (err)
scx_bpf_error("Failed to arm stats timer");
@ -831,7 +904,7 @@ static int usersched_timer_init(void)
}
bpf_timer_init(timer, &usersched_timer, CLOCK_BOOTTIME);
bpf_timer_set_callback(timer, usersched_timer_fn);
err = bpf_timer_start(timer, NSEC_PER_SEC, 0);
err = bpf_timer_start(timer, USERSCHED_TIMER_NS, 0);
if (err)
scx_bpf_error("Failed to arm scheduler timer");

View File

@ -26,7 +26,7 @@ struct Scheduler<'a> {
impl<'a> Scheduler<'a> {
fn init() -> Result<Self> {
let topo = Topology::new().expect("Failed to build host topology");
let bpf = BpfScheduler::init(5000, topo.nr_cpus_possible() as i32, false, 0, false, false, false)?;
let bpf = BpfScheduler::init(5000, topo.nr_cpus_possible() as i32, false, 0, false, false, true, false)?;
Ok(Self { bpf })
}

View File

@ -137,6 +137,19 @@ struct Opts {
#[clap(short = 'l', long, action = clap::ArgAction::SetTrue)]
low_power: bool,
/// By default the scheduler automatically transitions to FIFO mode when the system is
/// underutilized. This allows to reduce unnecessary scheduling overhead and boost performance
/// when the system is not running at full capacity.
///
/// Be aware that FIFO mode can lead to less predictable performance. Therefore, use this
/// option if performance predictability is important, such as when running real-time audio
/// applications or during live streaming. Conversely, avoid using this option when you care
/// about maximizing performance, such as gaming.
///
/// Set this option to disable this automatic transition.
#[clap(short = 'f', long, action = clap::ArgAction::SetTrue)]
disable_fifo: bool,
/// If specified, only tasks which have their scheduling policy set to
/// SCHED_EXT using sched_setscheduler(2) are switched. Otherwise, all
/// tasks are switched.
@ -304,6 +317,7 @@ impl<'a> Scheduler<'a> {
opts.exit_dump_len,
opts.full_user,
opts.low_power,
!opts.disable_fifo,
opts.debug,
)?;
info!("{} scheduler attached - {} CPUs", SCHEDULER_NAME, nr_cpus);