scx_rustland_core: implement effective time slice on a per-task basis

Drop the global effective time-slice and use the more fine-grained
per-task time-slice to implement the dynamic time-slice capability.

This allows to reduce the scheduler's overhead (dropping the global time
slice volatile variable shared between user-space and BPF) and it
provides a more fine-grained control on the per-task time slice.

Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
This commit is contained in:
Andrea Righi 2024-05-09 07:03:36 +02:00
parent 382ef72999
commit f052493005
3 changed files with 16 additions and 46 deletions

View File

@ -293,24 +293,6 @@ impl<'cb> BpfScheduler<'cb> {
}
}
// Override the default scheduler time slice (in us).
#[allow(dead_code)]
pub fn set_effective_slice_us(&mut self, slice_us: u64) {
self.skel.bss_mut().effective_slice_ns = slice_us * 1000;
}
// Get current value of time slice (slice_ns).
#[allow(dead_code)]
pub fn get_effective_slice_us(&mut self) -> u64 {
let slice_ns = self.skel.bss().effective_slice_ns;
if slice_ns > 0 {
slice_ns / 1000
} else {
self.skel.rodata().slice_ns / 1000
}
}
// Counter of queued tasks.
#[allow(dead_code)]
pub fn nr_queued_mut(&mut self) -> &mut u64 {

View File

@ -52,12 +52,6 @@ u32 usersched_pid; /* User-space scheduler PID */
const volatile bool switch_partial; /* Switch all tasks or SCHED_EXT tasks */
const volatile u64 slice_ns = SCX_SLICE_DFL; /* Base time slice duration */
/*
* Effective time slice: allow the scheduler to override the default time slice
* (slice_ns) if this one is set.
*/
volatile u64 effective_slice_ns;
/*
* Number of tasks that are queued for scheduling.
*
@ -321,8 +315,7 @@ dispatch_task(struct task_struct *p, u64 dsq_id,
u64 cpumask_cnt, u64 task_slice_ns, u64 enq_flags)
{
struct task_ctx *tctx;
u64 slice = task_slice_ns ? :
__sync_fetch_and_add(&effective_slice_ns, 0) ? : slice_ns;
u64 slice = task_slice_ns ? : slice_ns;
u64 curr_cpumask_cnt;
bool force_shared = false;
s32 cpu;

View File

@ -150,9 +150,8 @@ struct Opts {
}
// Time constants.
const USEC_PER_NSEC: u64 = 1_000;
const NSEC_PER_USEC: u64 = 1_000;
const MSEC_PER_SEC: u64 = 1_000;
const NSEC_PER_MSEC: u64 = 1_000_000;
const NSEC_PER_SEC: u64 = 1_000_000_000;
// Basic item stored in the task information map.
@ -375,9 +374,6 @@ impl<'a> Scheduler<'a> {
// Cache the current timestamp.
let now = Self::now();
// Get the current effective time slice.
let slice_ns = self.bpf.get_effective_slice_us() * MSEC_PER_SEC;
// Update dynamic slice boost.
//
// The slice boost is dynamically adjusted as a function of the amount of CPUs
@ -445,7 +441,7 @@ impl<'a> Scheduler<'a> {
//
// Moreover, limiting the accounted time slice to slice_ns, allows to prevent starving the
// current task for too long in the scheduler task pool.
task_info.vruntime = self.min_vruntime + slice.clamp(1, slice_ns);
task_info.vruntime = self.min_vruntime + slice.clamp(1, self.slice_ns);
// Update total task cputime.
task_info.sum_exec_runtime = task.sum_exec_runtime;
@ -503,21 +499,24 @@ impl<'a> Scheduler<'a> {
}
}
// Dynamically adjust the time slice based on the amount of waiting tasks.
fn scale_slice_ns(&mut self) {
let nr_scheduled = self.task_pool.tasks.len() as u64;
let slice_us_max = self.slice_ns / NSEC_PER_USEC;
// Return the target time slice, proportionally adjusted based on the total amount of tasks
// waiting to be scheduled (more tasks waiting => shorter time slice).
fn effective_slice_ns(&mut self, nr_scheduled: u64) -> u64 {
// Scale time slice as a function of nr_scheduled, but never scale below 250 us.
//
// The goal here is to adjust the time slice allocated to tasks based on the number of
// tasks currently awaiting scheduling. When the system is heavily loaded, shorter time
// slices are assigned to provide more opportunities for all tasks to receive CPU time.
let scaling = ((nr_scheduled + 1) / 2).max(1);
let slice_us = (slice_us_max / scaling).max(USEC_PER_NSEC / 4);
let slice_ns = (self.slice_ns / scaling).max(NSEC_PER_MSEC / 4);
// Apply new scaling.
self.bpf.set_effective_slice_us(slice_us);
slice_ns
}
// Dispatch tasks from the task pool in order (sending them to the BPF dispatcher).
fn dispatch_tasks(&mut self) {
let nr_scheduled = self.task_pool.tasks.len() as u64;
// Dispatch only a batch of tasks equal to the amount of idle CPUs in the system.
//
// This allows to have more tasks sitting in the task pool, reducing the pressure on the
@ -546,6 +545,8 @@ impl<'a> Scheduler<'a> {
// maximum static time slice allowed.
dispatched_task.set_slice_ns(self.slice_ns);
dispatched_task.set_flag(RL_PREEMPT_CPU);
} else {
dispatched_task.set_slice_ns(self.effective_slice_ns(nr_scheduled));
}
// Send task to the BPF dispatcher.
@ -576,9 +577,6 @@ impl<'a> Scheduler<'a> {
self.drain_queued_tasks();
self.dispatch_tasks();
// Adjust the dynamic time slice immediately after dispatching the tasks.
self.scale_slice_ns();
// Yield to avoid using too much CPU from the scheduler itself.
thread::yield_now();
}
@ -702,9 +700,6 @@ impl<'a> Scheduler<'a> {
// Show total page faults of the user-space scheduler.
self.print_faults();
// Show current used time slice.
info!("time slice = {} us", self.bpf.get_effective_slice_us());
// Show current slice boost.
info!("slice boost = {}", self.eff_slice_boost);