mirror of
https://github.com/JakeHillion/scx.git
synced 2024-11-29 20:50:22 +00:00
scx_rustland_core: implement effective time slice on a per-task basis
Drop the global effective time-slice and use the more fine-grained per-task time-slice to implement the dynamic time-slice capability. This allows to reduce the scheduler's overhead (dropping the global time slice volatile variable shared between user-space and BPF) and it provides a more fine-grained control on the per-task time slice. Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
This commit is contained in:
parent
382ef72999
commit
f052493005
@ -293,24 +293,6 @@ impl<'cb> BpfScheduler<'cb> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Override the default scheduler time slice (in us).
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub fn set_effective_slice_us(&mut self, slice_us: u64) {
|
|
||||||
self.skel.bss_mut().effective_slice_ns = slice_us * 1000;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get current value of time slice (slice_ns).
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub fn get_effective_slice_us(&mut self) -> u64 {
|
|
||||||
let slice_ns = self.skel.bss().effective_slice_ns;
|
|
||||||
|
|
||||||
if slice_ns > 0 {
|
|
||||||
slice_ns / 1000
|
|
||||||
} else {
|
|
||||||
self.skel.rodata().slice_ns / 1000
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Counter of queued tasks.
|
// Counter of queued tasks.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn nr_queued_mut(&mut self) -> &mut u64 {
|
pub fn nr_queued_mut(&mut self) -> &mut u64 {
|
||||||
|
@ -52,12 +52,6 @@ u32 usersched_pid; /* User-space scheduler PID */
|
|||||||
const volatile bool switch_partial; /* Switch all tasks or SCHED_EXT tasks */
|
const volatile bool switch_partial; /* Switch all tasks or SCHED_EXT tasks */
|
||||||
const volatile u64 slice_ns = SCX_SLICE_DFL; /* Base time slice duration */
|
const volatile u64 slice_ns = SCX_SLICE_DFL; /* Base time slice duration */
|
||||||
|
|
||||||
/*
|
|
||||||
* Effective time slice: allow the scheduler to override the default time slice
|
|
||||||
* (slice_ns) if this one is set.
|
|
||||||
*/
|
|
||||||
volatile u64 effective_slice_ns;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Number of tasks that are queued for scheduling.
|
* Number of tasks that are queued for scheduling.
|
||||||
*
|
*
|
||||||
@ -321,8 +315,7 @@ dispatch_task(struct task_struct *p, u64 dsq_id,
|
|||||||
u64 cpumask_cnt, u64 task_slice_ns, u64 enq_flags)
|
u64 cpumask_cnt, u64 task_slice_ns, u64 enq_flags)
|
||||||
{
|
{
|
||||||
struct task_ctx *tctx;
|
struct task_ctx *tctx;
|
||||||
u64 slice = task_slice_ns ? :
|
u64 slice = task_slice_ns ? : slice_ns;
|
||||||
__sync_fetch_and_add(&effective_slice_ns, 0) ? : slice_ns;
|
|
||||||
u64 curr_cpumask_cnt;
|
u64 curr_cpumask_cnt;
|
||||||
bool force_shared = false;
|
bool force_shared = false;
|
||||||
s32 cpu;
|
s32 cpu;
|
||||||
|
@ -150,9 +150,8 @@ struct Opts {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Time constants.
|
// Time constants.
|
||||||
const USEC_PER_NSEC: u64 = 1_000;
|
|
||||||
const NSEC_PER_USEC: u64 = 1_000;
|
const NSEC_PER_USEC: u64 = 1_000;
|
||||||
const MSEC_PER_SEC: u64 = 1_000;
|
const NSEC_PER_MSEC: u64 = 1_000_000;
|
||||||
const NSEC_PER_SEC: u64 = 1_000_000_000;
|
const NSEC_PER_SEC: u64 = 1_000_000_000;
|
||||||
|
|
||||||
// Basic item stored in the task information map.
|
// Basic item stored in the task information map.
|
||||||
@ -375,9 +374,6 @@ impl<'a> Scheduler<'a> {
|
|||||||
// Cache the current timestamp.
|
// Cache the current timestamp.
|
||||||
let now = Self::now();
|
let now = Self::now();
|
||||||
|
|
||||||
// Get the current effective time slice.
|
|
||||||
let slice_ns = self.bpf.get_effective_slice_us() * MSEC_PER_SEC;
|
|
||||||
|
|
||||||
// Update dynamic slice boost.
|
// Update dynamic slice boost.
|
||||||
//
|
//
|
||||||
// The slice boost is dynamically adjusted as a function of the amount of CPUs
|
// The slice boost is dynamically adjusted as a function of the amount of CPUs
|
||||||
@ -445,7 +441,7 @@ impl<'a> Scheduler<'a> {
|
|||||||
//
|
//
|
||||||
// Moreover, limiting the accounted time slice to slice_ns, allows to prevent starving the
|
// Moreover, limiting the accounted time slice to slice_ns, allows to prevent starving the
|
||||||
// current task for too long in the scheduler task pool.
|
// current task for too long in the scheduler task pool.
|
||||||
task_info.vruntime = self.min_vruntime + slice.clamp(1, slice_ns);
|
task_info.vruntime = self.min_vruntime + slice.clamp(1, self.slice_ns);
|
||||||
|
|
||||||
// Update total task cputime.
|
// Update total task cputime.
|
||||||
task_info.sum_exec_runtime = task.sum_exec_runtime;
|
task_info.sum_exec_runtime = task.sum_exec_runtime;
|
||||||
@ -503,21 +499,24 @@ impl<'a> Scheduler<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dynamically adjust the time slice based on the amount of waiting tasks.
|
// Return the target time slice, proportionally adjusted based on the total amount of tasks
|
||||||
fn scale_slice_ns(&mut self) {
|
// waiting to be scheduled (more tasks waiting => shorter time slice).
|
||||||
let nr_scheduled = self.task_pool.tasks.len() as u64;
|
fn effective_slice_ns(&mut self, nr_scheduled: u64) -> u64 {
|
||||||
let slice_us_max = self.slice_ns / NSEC_PER_USEC;
|
|
||||||
|
|
||||||
// Scale time slice as a function of nr_scheduled, but never scale below 250 us.
|
// Scale time slice as a function of nr_scheduled, but never scale below 250 us.
|
||||||
|
//
|
||||||
|
// The goal here is to adjust the time slice allocated to tasks based on the number of
|
||||||
|
// tasks currently awaiting scheduling. When the system is heavily loaded, shorter time
|
||||||
|
// slices are assigned to provide more opportunities for all tasks to receive CPU time.
|
||||||
let scaling = ((nr_scheduled + 1) / 2).max(1);
|
let scaling = ((nr_scheduled + 1) / 2).max(1);
|
||||||
let slice_us = (slice_us_max / scaling).max(USEC_PER_NSEC / 4);
|
let slice_ns = (self.slice_ns / scaling).max(NSEC_PER_MSEC / 4);
|
||||||
|
|
||||||
// Apply new scaling.
|
slice_ns
|
||||||
self.bpf.set_effective_slice_us(slice_us);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dispatch tasks from the task pool in order (sending them to the BPF dispatcher).
|
// Dispatch tasks from the task pool in order (sending them to the BPF dispatcher).
|
||||||
fn dispatch_tasks(&mut self) {
|
fn dispatch_tasks(&mut self) {
|
||||||
|
let nr_scheduled = self.task_pool.tasks.len() as u64;
|
||||||
|
|
||||||
// Dispatch only a batch of tasks equal to the amount of idle CPUs in the system.
|
// Dispatch only a batch of tasks equal to the amount of idle CPUs in the system.
|
||||||
//
|
//
|
||||||
// This allows to have more tasks sitting in the task pool, reducing the pressure on the
|
// This allows to have more tasks sitting in the task pool, reducing the pressure on the
|
||||||
@ -546,6 +545,8 @@ impl<'a> Scheduler<'a> {
|
|||||||
// maximum static time slice allowed.
|
// maximum static time slice allowed.
|
||||||
dispatched_task.set_slice_ns(self.slice_ns);
|
dispatched_task.set_slice_ns(self.slice_ns);
|
||||||
dispatched_task.set_flag(RL_PREEMPT_CPU);
|
dispatched_task.set_flag(RL_PREEMPT_CPU);
|
||||||
|
} else {
|
||||||
|
dispatched_task.set_slice_ns(self.effective_slice_ns(nr_scheduled));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send task to the BPF dispatcher.
|
// Send task to the BPF dispatcher.
|
||||||
@ -576,9 +577,6 @@ impl<'a> Scheduler<'a> {
|
|||||||
self.drain_queued_tasks();
|
self.drain_queued_tasks();
|
||||||
self.dispatch_tasks();
|
self.dispatch_tasks();
|
||||||
|
|
||||||
// Adjust the dynamic time slice immediately after dispatching the tasks.
|
|
||||||
self.scale_slice_ns();
|
|
||||||
|
|
||||||
// Yield to avoid using too much CPU from the scheduler itself.
|
// Yield to avoid using too much CPU from the scheduler itself.
|
||||||
thread::yield_now();
|
thread::yield_now();
|
||||||
}
|
}
|
||||||
@ -702,9 +700,6 @@ impl<'a> Scheduler<'a> {
|
|||||||
// Show total page faults of the user-space scheduler.
|
// Show total page faults of the user-space scheduler.
|
||||||
self.print_faults();
|
self.print_faults();
|
||||||
|
|
||||||
// Show current used time slice.
|
|
||||||
info!("time slice = {} us", self.bpf.get_effective_slice_us());
|
|
||||||
|
|
||||||
// Show current slice boost.
|
// Show current slice boost.
|
||||||
info!("slice boost = {}", self.eff_slice_boost);
|
info!("slice boost = {}", self.eff_slice_boost);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user