scx_rustland_core: pass nvcsw, slice and dsq_vtime to user-space

Provide additional task metrics to user-space schedulers via QueuedTask:
 - nvcsw: total amount of voluntary context switches
 - slice: task time slice "budget" (from p->scx.slice)
 - dsq_vtime: current task vtime (from p->scx.dsq_vtime)

In this way user-space schedulers can quickly access these metrics to
implement better scheduling policy.

Signed-off-by: Andrea Righi <andrea.righi@linux.dev>
This commit is contained in:
Andrea Righi 2024-10-11 20:57:23 +02:00
parent 1bbae64dc7
commit be681c731a
3 changed files with 27 additions and 12 deletions

View File

@ -82,7 +82,10 @@ pub struct QueuedTask {
pub cpu: i32, // CPU where the task is running pub cpu: i32, // CPU where the task is running
pub flags: u64, // task enqueue flags pub flags: u64, // task enqueue flags
pub sum_exec_runtime: u64, // Total cpu time pub sum_exec_runtime: u64, // Total cpu time
pub nvcsw: u64, // Total amount of voluntary context switches
pub weight: u64, // Task static priority pub weight: u64, // Task static priority
pub slice: u64, // Time slice budget
pub vtime: u64, // Current vruntime
cpumask_cnt: u64, // cpumask generation counter (private) cpumask_cnt: u64, // cpumask generation counter (private)
} }
@ -107,9 +110,9 @@ impl DispatchedTask {
pid: task.pid, pid: task.pid,
cpu: task.cpu, cpu: task.cpu,
flags: task.flags, flags: task.flags,
cpumask_cnt: task.cpumask_cnt,
slice_ns: 0, // use default time slice slice_ns: 0, // use default time slice
vtime: 0, vtime: 0,
cpumask_cnt: task.cpumask_cnt,
} }
} }
} }
@ -144,7 +147,10 @@ impl EnqueuedMessage {
cpu: self.inner.cpu, cpu: self.inner.cpu,
flags: self.inner.flags, flags: self.inner.flags,
sum_exec_runtime: self.inner.sum_exec_runtime, sum_exec_runtime: self.inner.sum_exec_runtime,
nvcsw: self.inner.nvcsw,
weight: self.inner.weight, weight: self.inner.weight,
slice: self.inner.slice,
vtime: self.inner.vtime,
cpumask_cnt: self.inner.cpumask_cnt, cpumask_cnt: self.inner.cpumask_cnt,
} }
} }

View File

@ -83,9 +83,12 @@ struct queued_task_ctx {
s32 pid; s32 pid;
s32 cpu; /* CPU where the task is running */ s32 cpu; /* CPU where the task is running */
u64 flags; /* task enqueue flags */ u64 flags; /* task enqueue flags */
u64 cpumask_cnt; /* cpumask generation counter */
u64 sum_exec_runtime; /* Total cpu time */ u64 sum_exec_runtime; /* Total cpu time */
u64 nvcsw; /* Total amount of voluntary context switches */
u64 weight; /* Task static priority */ u64 weight; /* Task static priority */
u64 slice; /* Time slice budget */
u64 vtime; /* Current task's vruntime */
u64 cpumask_cnt; /* cpumask generation counter */
}; };
/* /*

View File

@ -690,6 +690,9 @@ static void dispatch_task(const struct dispatched_task_ctx *task)
if (!p) if (!p)
return; return;
dbg_msg("dispatch: pid=%d (%s) cpu=0x%lx vtime=%llu slice=%llu",
p->pid, p->comm, task->cpu, task->vtime, task->slice_ns);
/* /*
* Update task's time slice in its context. * Update task's time slice in its context.
*/ */
@ -808,10 +811,13 @@ static void get_task_info(struct queued_task_ctx *task,
struct task_ctx *tctx = try_lookup_task_ctx(p); struct task_ctx *tctx = try_lookup_task_ctx(p);
task->pid = p->pid; task->pid = p->pid;
task->sum_exec_runtime = p->se.sum_exec_runtime;
task->flags = enq_flags;
task->weight = p->scx.weight;
task->cpu = scx_bpf_task_cpu(p); task->cpu = scx_bpf_task_cpu(p);
task->flags = enq_flags;
task->sum_exec_runtime = p->se.sum_exec_runtime;
task->nvcsw = p->nvcsw;
task->weight = p->scx.weight;
task->slice = p->scx.slice;
task->vtime = p->scx.dsq_vtime;
task->cpumask_cnt = tctx ? tctx->cpumask_cnt : 0; task->cpumask_cnt = tctx ? tctx->cpumask_cnt : 0;
} }
@ -903,6 +909,7 @@ static bool dispatch_user_scheduler(void)
scx_bpf_error("Failed to find usersched task %d", usersched_pid); scx_bpf_error("Failed to find usersched task %d", usersched_pid);
return false; return false;
} }
/* /*
* Use the highest vtime possible to give the scheduler itself the * Use the highest vtime possible to give the scheduler itself the
* lowest priority possible. * lowest priority possible.
@ -997,6 +1004,11 @@ void BPF_STRUCT_OPS(rustland_dispatch, s32 cpu, struct task_struct *prev)
*/ */
bpf_user_ringbuf_drain(&dispatched, handle_dispatched_task, NULL, 0); bpf_user_ringbuf_drain(&dispatched, handle_dispatched_task, NULL, 0);
/*
* Check if the user-space scheduler needs to run.
*/
dispatch_user_scheduler();
/* /*
* Try to steal a task dispatched to CPUs that may have gone offline * Try to steal a task dispatched to CPUs that may have gone offline
* (this allows to prevent indefinite task stalls). * (this allows to prevent indefinite task stalls).
@ -1013,13 +1025,7 @@ void BPF_STRUCT_OPS(rustland_dispatch, s32 cpu, struct task_struct *prev)
/* /*
* Consume a task from the shared DSQ. * Consume a task from the shared DSQ.
*/ */
if (scx_bpf_consume(SHARED_DSQ)) scx_bpf_consume(SHARED_DSQ);
return;
/*
* Check if the user-space scheduler needs to run.
*/
dispatch_user_scheduler();
} }
/* /*