scx_rustland_core: implement effective time slice on a per-task basis

Drop the global effective time-slice and use the more fine-grained
per-task time-slice to implement the dynamic time-slice capability.

This allows to reduce the scheduler's overhead (dropping the global time
slice volatile variable shared between user-space and BPF) and it
provides a more fine-grained control on the per-task time slice.

Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
This commit is contained in:
Andrea Righi 2024-05-09 07:03:36 +02:00
parent 382ef72999
commit f052493005
3 changed files with 16 additions and 46 deletions

View File

@ -293,24 +293,6 @@ impl<'cb> BpfScheduler<'cb> {
} }
} }
// Override the default scheduler time slice (in us).
#[allow(dead_code)]
pub fn set_effective_slice_us(&mut self, slice_us: u64) {
self.skel.bss_mut().effective_slice_ns = slice_us * 1000;
}
// Get current value of time slice (slice_ns).
#[allow(dead_code)]
pub fn get_effective_slice_us(&mut self) -> u64 {
let slice_ns = self.skel.bss().effective_slice_ns;
if slice_ns > 0 {
slice_ns / 1000
} else {
self.skel.rodata().slice_ns / 1000
}
}
// Counter of queued tasks. // Counter of queued tasks.
#[allow(dead_code)] #[allow(dead_code)]
pub fn nr_queued_mut(&mut self) -> &mut u64 { pub fn nr_queued_mut(&mut self) -> &mut u64 {

View File

@ -52,12 +52,6 @@ u32 usersched_pid; /* User-space scheduler PID */
const volatile bool switch_partial; /* Switch all tasks or SCHED_EXT tasks */ const volatile bool switch_partial; /* Switch all tasks or SCHED_EXT tasks */
const volatile u64 slice_ns = SCX_SLICE_DFL; /* Base time slice duration */ const volatile u64 slice_ns = SCX_SLICE_DFL; /* Base time slice duration */
/*
* Effective time slice: allow the scheduler to override the default time slice
* (slice_ns) if this one is set.
*/
volatile u64 effective_slice_ns;
/* /*
* Number of tasks that are queued for scheduling. * Number of tasks that are queued for scheduling.
* *
@ -321,8 +315,7 @@ dispatch_task(struct task_struct *p, u64 dsq_id,
u64 cpumask_cnt, u64 task_slice_ns, u64 enq_flags) u64 cpumask_cnt, u64 task_slice_ns, u64 enq_flags)
{ {
struct task_ctx *tctx; struct task_ctx *tctx;
u64 slice = task_slice_ns ? : u64 slice = task_slice_ns ? : slice_ns;
__sync_fetch_and_add(&effective_slice_ns, 0) ? : slice_ns;
u64 curr_cpumask_cnt; u64 curr_cpumask_cnt;
bool force_shared = false; bool force_shared = false;
s32 cpu; s32 cpu;

View File

@ -150,9 +150,8 @@ struct Opts {
} }
// Time constants. // Time constants.
const USEC_PER_NSEC: u64 = 1_000;
const NSEC_PER_USEC: u64 = 1_000; const NSEC_PER_USEC: u64 = 1_000;
const MSEC_PER_SEC: u64 = 1_000; const NSEC_PER_MSEC: u64 = 1_000_000;
const NSEC_PER_SEC: u64 = 1_000_000_000; const NSEC_PER_SEC: u64 = 1_000_000_000;
// Basic item stored in the task information map. // Basic item stored in the task information map.
@ -375,9 +374,6 @@ impl<'a> Scheduler<'a> {
// Cache the current timestamp. // Cache the current timestamp.
let now = Self::now(); let now = Self::now();
// Get the current effective time slice.
let slice_ns = self.bpf.get_effective_slice_us() * MSEC_PER_SEC;
// Update dynamic slice boost. // Update dynamic slice boost.
// //
// The slice boost is dynamically adjusted as a function of the amount of CPUs // The slice boost is dynamically adjusted as a function of the amount of CPUs
@ -445,7 +441,7 @@ impl<'a> Scheduler<'a> {
// //
// Moreover, limiting the accounted time slice to slice_ns, allows to prevent starving the // Moreover, limiting the accounted time slice to slice_ns, allows to prevent starving the
// current task for too long in the scheduler task pool. // current task for too long in the scheduler task pool.
task_info.vruntime = self.min_vruntime + slice.clamp(1, slice_ns); task_info.vruntime = self.min_vruntime + slice.clamp(1, self.slice_ns);
// Update total task cputime. // Update total task cputime.
task_info.sum_exec_runtime = task.sum_exec_runtime; task_info.sum_exec_runtime = task.sum_exec_runtime;
@ -503,21 +499,24 @@ impl<'a> Scheduler<'a> {
} }
} }
// Dynamically adjust the time slice based on the amount of waiting tasks. // Return the target time slice, proportionally adjusted based on the total amount of tasks
fn scale_slice_ns(&mut self) { // waiting to be scheduled (more tasks waiting => shorter time slice).
let nr_scheduled = self.task_pool.tasks.len() as u64; fn effective_slice_ns(&mut self, nr_scheduled: u64) -> u64 {
let slice_us_max = self.slice_ns / NSEC_PER_USEC;
// Scale time slice as a function of nr_scheduled, but never scale below 250 us. // Scale time slice as a function of nr_scheduled, but never scale below 250 us.
//
// The goal here is to adjust the time slice allocated to tasks based on the number of
// tasks currently awaiting scheduling. When the system is heavily loaded, shorter time
// slices are assigned to provide more opportunities for all tasks to receive CPU time.
let scaling = ((nr_scheduled + 1) / 2).max(1); let scaling = ((nr_scheduled + 1) / 2).max(1);
let slice_us = (slice_us_max / scaling).max(USEC_PER_NSEC / 4); let slice_ns = (self.slice_ns / scaling).max(NSEC_PER_MSEC / 4);
// Apply new scaling. slice_ns
self.bpf.set_effective_slice_us(slice_us);
} }
// Dispatch tasks from the task pool in order (sending them to the BPF dispatcher). // Dispatch tasks from the task pool in order (sending them to the BPF dispatcher).
fn dispatch_tasks(&mut self) { fn dispatch_tasks(&mut self) {
let nr_scheduled = self.task_pool.tasks.len() as u64;
// Dispatch only a batch of tasks equal to the amount of idle CPUs in the system. // Dispatch only a batch of tasks equal to the amount of idle CPUs in the system.
// //
// This allows to have more tasks sitting in the task pool, reducing the pressure on the // This allows to have more tasks sitting in the task pool, reducing the pressure on the
@ -546,6 +545,8 @@ impl<'a> Scheduler<'a> {
// maximum static time slice allowed. // maximum static time slice allowed.
dispatched_task.set_slice_ns(self.slice_ns); dispatched_task.set_slice_ns(self.slice_ns);
dispatched_task.set_flag(RL_PREEMPT_CPU); dispatched_task.set_flag(RL_PREEMPT_CPU);
} else {
dispatched_task.set_slice_ns(self.effective_slice_ns(nr_scheduled));
} }
// Send task to the BPF dispatcher. // Send task to the BPF dispatcher.
@ -576,9 +577,6 @@ impl<'a> Scheduler<'a> {
self.drain_queued_tasks(); self.drain_queued_tasks();
self.dispatch_tasks(); self.dispatch_tasks();
// Adjust the dynamic time slice immediately after dispatching the tasks.
self.scale_slice_ns();
// Yield to avoid using too much CPU from the scheduler itself. // Yield to avoid using too much CPU from the scheduler itself.
thread::yield_now(); thread::yield_now();
} }
@ -702,9 +700,6 @@ impl<'a> Scheduler<'a> {
// Show total page faults of the user-space scheduler. // Show total page faults of the user-space scheduler.
self.print_faults(); self.print_faults();
// Show current used time slice.
info!("time slice = {} us", self.bpf.get_effective_slice_us());
// Show current slice boost. // Show current slice boost.
info!("slice boost = {}", self.eff_slice_boost); info!("slice boost = {}", self.eff_slice_boost);