mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-28 21:50:23 +00:00
Merge pull request #64 from arighi/improve-interactive-workloads
scx_rustland: improve interactive workloads
This commit is contained in:
commit
9f1a3973d8
@ -319,19 +319,6 @@ static bool is_task_cpu_available(struct task_struct *p, u64 enq_flags)
|
||||
if (is_kthread(p) && p->nr_cpus_allowed == 1)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Moreover, immediately dispatch kthreads that still have more than
|
||||
* half of their runtime budget. As they are likely to release the CPU
|
||||
* soon, granting them a substantial priority boost can enhance the
|
||||
* overall system performance.
|
||||
*
|
||||
* In the event that one of these kthreads turns into a CPU hog, it
|
||||
* will deplete its runtime budget and therefore it will be scheduled
|
||||
* like any other normal task.
|
||||
*/
|
||||
if (is_kthread(p) && p->scx.slice > slice_ns / 2)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* For regular tasks always rely on force_local to determine if we can
|
||||
* bypass the scheduler.
|
||||
@ -352,13 +339,17 @@ static void get_task_info(struct queued_task_ctx *task,
|
||||
const struct task_struct *p, bool exiting)
|
||||
{
|
||||
task->pid = p->pid;
|
||||
task->sum_exec_runtime = p->se.sum_exec_runtime;
|
||||
task->weight = p->scx.weight;
|
||||
/*
|
||||
* Use a negative CPU number to notify that the task is exiting, so
|
||||
* that we can free up its resources in the user-space scheduler.
|
||||
*/
|
||||
task->cpu = exiting ? -1 : scx_bpf_task_cpu(p);
|
||||
if (exiting) {
|
||||
task->cpu = -1;
|
||||
return;
|
||||
}
|
||||
task->sum_exec_runtime = p->se.sum_exec_runtime;
|
||||
task->weight = p->scx.weight;
|
||||
task->cpu = scx_bpf_task_cpu(p);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -568,7 +559,7 @@ s32 BPF_STRUCT_OPS(rustland_prep_enable, struct task_struct *p,
|
||||
*/
|
||||
void BPF_STRUCT_OPS(rustland_disable, struct task_struct *p)
|
||||
{
|
||||
struct queued_task_ctx task;
|
||||
struct queued_task_ctx task = {};
|
||||
|
||||
dbg_msg("exiting: pid=%d", task.pid);
|
||||
get_task_info(&task, p, true);
|
||||
|
@ -314,23 +314,40 @@ impl<'a> Scheduler<'a> {
|
||||
idle_cpus
|
||||
}
|
||||
|
||||
// Update task's vruntime based on the information collected from the kernel part.
|
||||
// Update task's vruntime based on the information collected from the kernel.
|
||||
//
|
||||
// This method implements the main task ordering logic of the scheduler.
|
||||
fn update_enqueued(
|
||||
task_info: &mut TaskInfo,
|
||||
sum_exec_runtime: u64,
|
||||
weight: u64,
|
||||
min_vruntime: u64,
|
||||
max_slice_ns: u64,
|
||||
slice_ns: u64,
|
||||
) {
|
||||
// Add cputime delta normalized by weight to the vruntime (if delta > 0).
|
||||
if sum_exec_runtime > task_info.sum_exec_runtime {
|
||||
let delta = (sum_exec_runtime - task_info.sum_exec_runtime) * 100 / weight;
|
||||
// Never account more than max_slice_ns. This helps to prevent starving a task for too
|
||||
// long in the scheduler task pool.
|
||||
task_info.vruntime += delta.min(max_slice_ns);
|
||||
// Scale the maximum allowed time slice by a factor of 10 to increase the
|
||||
// range of allowed time delta and give a better chance to prioritize tasks
|
||||
// with shorter time delta / higher weight.
|
||||
let max_slice_ns = slice_ns * 10;
|
||||
|
||||
// Evaluate last time slot used by the task, scaled by its priority (weight).
|
||||
let mut delta = (sum_exec_runtime - task_info.sum_exec_runtime) * 100 / weight;
|
||||
|
||||
// Account (max_slice_ns / 2) to new tasks to avoid granting excessive priority without
|
||||
// understanding their nature. This allows to mitigate potential system starvation caused
|
||||
// by spawning a massive amount of tasks (e.g., fork-bomb attacks).
|
||||
if task_info.sum_exec_runtime == 0 {
|
||||
delta = max_slice_ns / 2;
|
||||
}
|
||||
// Make sure vruntime is moving forward (> current minimum).
|
||||
task_info.vruntime = task_info.vruntime.max(min_vruntime);
|
||||
|
||||
// Never account more than max_slice_ns, to prevent starving a task for too long in the
|
||||
// scheduler task pool, but still give a range large enough to be able to prioritize
|
||||
// tasks with short delta / higher weight.
|
||||
task_info.vruntime += delta.min(max_slice_ns);
|
||||
|
||||
// Also make sure that the global vruntime is always progressing (at least by +1)
|
||||
// during each scheduler run, to prevent excessive starvation of the other tasks
|
||||
// sitting in the self.task_pool tree, waiting to be dispatched.
|
||||
task_info.vruntime = task_info.vruntime.max(min_vruntime + 1);
|
||||
|
||||
// Update total task cputime.
|
||||
task_info.sum_exec_runtime = sum_exec_runtime;
|
||||
@ -362,7 +379,7 @@ impl<'a> Scheduler<'a> {
|
||||
.tasks
|
||||
.entry(task.pid)
|
||||
.or_insert_with_key(|&_pid| TaskInfo {
|
||||
sum_exec_runtime: task.sum_exec_runtime,
|
||||
sum_exec_runtime: 0,
|
||||
vruntime: self.min_vruntime,
|
||||
});
|
||||
|
||||
@ -371,12 +388,7 @@ impl<'a> Scheduler<'a> {
|
||||
task_info,
|
||||
task.sum_exec_runtime,
|
||||
task.weight,
|
||||
// Make sure the global vruntime is always progressing (at least by +1)
|
||||
// during each scheduler run, providing a priority boost to newer tasks
|
||||
// (that is still beneficial for potential short-lived tasks), while also
|
||||
// preventing excessive starvation of the other tasks sitting in the
|
||||
// self.task_pool tree, waiting to be dispatched.
|
||||
self.min_vruntime + 1,
|
||||
self.min_vruntime,
|
||||
self.skel.rodata().slice_ns,
|
||||
);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user