Merge pull request #64 from arighi/improve-interactive-workloads

scx_rustland: improve interactive workloads
This commit is contained in:
David Vernet 2024-01-03 12:10:26 -06:00 committed by GitHub
commit 9f1a3973d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 37 additions and 34 deletions

View File

@ -319,19 +319,6 @@ static bool is_task_cpu_available(struct task_struct *p, u64 enq_flags)
if (is_kthread(p) && p->nr_cpus_allowed == 1)
return true;
/*
* Moreover, immediately dispatch kthreads that still have more than
* half of their runtime budget. As they are likely to release the CPU
* soon, granting them a substantial priority boost can enhance the
* overall system performance.
*
* In the event that one of these kthreads turns into a CPU hog, it
* will deplete its runtime budget and therefore it will be scheduled
* like any other normal task.
*/
if (is_kthread(p) && p->scx.slice > slice_ns / 2)
return true;
/*
* For regular tasks always rely on force_local to determine if we can
* bypass the scheduler.
@ -352,13 +339,17 @@ static void get_task_info(struct queued_task_ctx *task,
const struct task_struct *p, bool exiting)
{
task->pid = p->pid;
task->sum_exec_runtime = p->se.sum_exec_runtime;
task->weight = p->scx.weight;
/*
* Use a negative CPU number to notify that the task is exiting, so
* that we can free up its resources in the user-space scheduler.
*/
task->cpu = exiting ? -1 : scx_bpf_task_cpu(p);
if (exiting) {
task->cpu = -1;
return;
}
task->sum_exec_runtime = p->se.sum_exec_runtime;
task->weight = p->scx.weight;
task->cpu = scx_bpf_task_cpu(p);
}
/*
@ -568,7 +559,7 @@ s32 BPF_STRUCT_OPS(rustland_prep_enable, struct task_struct *p,
*/
void BPF_STRUCT_OPS(rustland_disable, struct task_struct *p)
{
struct queued_task_ctx task;
struct queued_task_ctx task = {};
dbg_msg("exiting: pid=%d", task.pid);
get_task_info(&task, p, true);

View File

@ -314,23 +314,40 @@ impl<'a> Scheduler<'a> {
idle_cpus
}
// Update task's vruntime based on the information collected from the kernel part.
// Update task's vruntime based on the information collected from the kernel.
//
// This method implements the main task ordering logic of the scheduler.
fn update_enqueued(
task_info: &mut TaskInfo,
sum_exec_runtime: u64,
weight: u64,
min_vruntime: u64,
max_slice_ns: u64,
slice_ns: u64,
) {
// Add cputime delta normalized by weight to the vruntime (if delta > 0).
if sum_exec_runtime > task_info.sum_exec_runtime {
let delta = (sum_exec_runtime - task_info.sum_exec_runtime) * 100 / weight;
// Never account more than max_slice_ns. This helps to prevent starving a task for too
// long in the scheduler task pool.
task_info.vruntime += delta.min(max_slice_ns);
// Scale the maximum allowed time slice by a factor of 10 to increase the
// range of allowed time delta and give a better chance to prioritize tasks
// with shorter time delta / higher weight.
let max_slice_ns = slice_ns * 10;
// Evaluate last time slot used by the task, scaled by its priority (weight).
let mut delta = (sum_exec_runtime - task_info.sum_exec_runtime) * 100 / weight;
// Account (max_slice_ns / 2) to new tasks to avoid granting excessive priority without
// understanding their nature. This allows to mitigate potential system starvation caused
// by spawning a massive amount of tasks (e.g., fork-bomb attacks).
if task_info.sum_exec_runtime == 0 {
delta = max_slice_ns / 2;
}
// Make sure vruntime is moving forward (> current minimum).
task_info.vruntime = task_info.vruntime.max(min_vruntime);
// Never account more than max_slice_ns, to prevent starving a task for too long in the
// scheduler task pool, but still give a range large enough to be able to prioritize
// tasks with short delta / higher weight.
task_info.vruntime += delta.min(max_slice_ns);
// Also make sure that the global vruntime is always progressing (at least by +1)
// during each scheduler run, to prevent excessive starvation of the other tasks
// sitting in the self.task_pool tree, waiting to be dispatched.
task_info.vruntime = task_info.vruntime.max(min_vruntime + 1);
// Update total task cputime.
task_info.sum_exec_runtime = sum_exec_runtime;
@ -362,7 +379,7 @@ impl<'a> Scheduler<'a> {
.tasks
.entry(task.pid)
.or_insert_with_key(|&_pid| TaskInfo {
sum_exec_runtime: task.sum_exec_runtime,
sum_exec_runtime: 0,
vruntime: self.min_vruntime,
});
@ -371,12 +388,7 @@ impl<'a> Scheduler<'a> {
task_info,
task.sum_exec_runtime,
task.weight,
// Make sure the global vruntime is always progressing (at least by +1)
// during each scheduler run, providing a priority boost to newer tasks
// (that is still beneficial for potential short-lived tasks), while also
// preventing excessive starvation of the other tasks sitting in the
// self.task_pool tree, waiting to be dispatched.
self.min_vruntime + 1,
self.min_vruntime,
self.skel.rodata().slice_ns,
);