scx_rustland_core: pass nvcsw, slice and dsq_vtime to user-space

Provide additional task metrics to user-space schedulers via QueuedTask:
 - nvcsw: total amount of voluntary context switches
 - slice: task time slice "budget" (from p->scx.slice)
 - dsq_vtime: current task vtime (from p->scx.dsq_vtime)

In this way user-space schedulers can quickly access these metrics to
implement better scheduling policy.

Signed-off-by: Andrea Righi <andrea.righi@linux.dev>
This commit is contained in:
Andrea Righi 2024-10-11 20:57:23 +02:00
parent 1bbae64dc7
commit be681c731a
3 changed files with 27 additions and 12 deletions

View File

@ -82,7 +82,10 @@ pub struct QueuedTask {
pub cpu: i32, // CPU where the task is running
pub flags: u64, // task enqueue flags
pub sum_exec_runtime: u64, // Total cpu time
pub nvcsw: u64, // Total amount of voluntary context switches
pub weight: u64, // Task static priority
pub slice: u64, // Time slice budget
pub vtime: u64, // Current vruntime
cpumask_cnt: u64, // cpumask generation counter (private)
}
@ -107,9 +110,9 @@ impl DispatchedTask {
pid: task.pid,
cpu: task.cpu,
flags: task.flags,
cpumask_cnt: task.cpumask_cnt,
slice_ns: 0, // use default time slice
vtime: 0,
cpumask_cnt: task.cpumask_cnt,
}
}
}
@ -144,7 +147,10 @@ impl EnqueuedMessage {
cpu: self.inner.cpu,
flags: self.inner.flags,
sum_exec_runtime: self.inner.sum_exec_runtime,
nvcsw: self.inner.nvcsw,
weight: self.inner.weight,
slice: self.inner.slice,
vtime: self.inner.vtime,
cpumask_cnt: self.inner.cpumask_cnt,
}
}

View File

@ -83,9 +83,12 @@ struct queued_task_ctx {
s32 pid;
s32 cpu; /* CPU where the task is running */
u64 flags; /* task enqueue flags */
u64 cpumask_cnt; /* cpumask generation counter */
u64 sum_exec_runtime; /* Total cpu time */
u64 nvcsw; /* Total amount of voluntary context switches */
u64 weight; /* Task static priority */
u64 slice; /* Time slice budget */
u64 vtime; /* Current task's vruntime */
u64 cpumask_cnt; /* cpumask generation counter */
};
/*

View File

@ -690,6 +690,9 @@ static void dispatch_task(const struct dispatched_task_ctx *task)
if (!p)
return;
dbg_msg("dispatch: pid=%d (%s) cpu=0x%lx vtime=%llu slice=%llu",
p->pid, p->comm, task->cpu, task->vtime, task->slice_ns);
/*
* Update task's time slice in its context.
*/
@ -808,10 +811,13 @@ static void get_task_info(struct queued_task_ctx *task,
struct task_ctx *tctx = try_lookup_task_ctx(p);
task->pid = p->pid;
task->sum_exec_runtime = p->se.sum_exec_runtime;
task->flags = enq_flags;
task->weight = p->scx.weight;
task->cpu = scx_bpf_task_cpu(p);
task->flags = enq_flags;
task->sum_exec_runtime = p->se.sum_exec_runtime;
task->nvcsw = p->nvcsw;
task->weight = p->scx.weight;
task->slice = p->scx.slice;
task->vtime = p->scx.dsq_vtime;
task->cpumask_cnt = tctx ? tctx->cpumask_cnt : 0;
}
@ -903,6 +909,7 @@ static bool dispatch_user_scheduler(void)
scx_bpf_error("Failed to find usersched task %d", usersched_pid);
return false;
}
/*
* Use the highest vtime possible to give the scheduler itself the
* lowest priority possible.
@ -997,6 +1004,11 @@ void BPF_STRUCT_OPS(rustland_dispatch, s32 cpu, struct task_struct *prev)
*/
bpf_user_ringbuf_drain(&dispatched, handle_dispatched_task, NULL, 0);
/*
* Check if the user-space scheduler needs to run.
*/
dispatch_user_scheduler();
/*
* Try to steal a task dispatched to CPUs that may have gone offline
* (this allows to prevent indefinite task stalls).
@ -1013,13 +1025,7 @@ void BPF_STRUCT_OPS(rustland_dispatch, s32 cpu, struct task_struct *prev)
/*
* Consume a task from the shared DSQ.
*/
if (scx_bpf_consume(SHARED_DSQ))
return;
/*
* Check if the user-space scheduler needs to run.
*/
dispatch_user_scheduler();
scx_bpf_consume(SHARED_DSQ);
}
/*