mirror of
https://github.com/sched-ext/scx.git
synced 2024-12-01 07:00:23 +00:00
scx_bpfland: always rely on prev_cpu with single-CPU tasks
When selecting an idle for tasks that can only run on a single CPU, always check if the previously used CPU is sill usable, instead of trying to figure out the single allowed CPU looking at the task's cpumask. Apparently, single-CPU tasks can report a prev_cpu that is not in the allowed cpumask when they rapidly change affinity. This could lead to stalls, because we may end up dispatching the kthread to a per-CPU DSQ that is not compatible with its allowed cpumask. Example: kworker/u32:2[173797] triggered exit kind 1026: runnable task stall (kworker/2:1[70] failed to run for 7.552s) ... R kworker/2:1[70] -7552ms scx_state/flags=3/0x9 dsq_flags=0x1 ops_state/qseq=0/0 sticky/holding_cpu=-1/-1 dsq_id=0x8 dsq_vtime=234483011369 cpus=04 In this case kworker/2 can only run on CPU #2 (cpus=0x4), but it's dispatched to dsq_id=0x8, that can only be consumed by CPU 8 => stall. To prevent this, do not try to figure out the best idle CPU for tasks that are changing affinity and just dispatch them to a global DSQ (either priority or regular, depending on its interactive state). Moreover, introduce an explicit error check in dispatch_direct_cpu() to improve detection of similar issues in the future, and drop lookup_task_ctx() in favor of try_lookup_task_ctx(), since we can now safely handle all the cases where the task context is not found. Signed-off-by: Andrea Righi <andrea.righi@linux.dev>
This commit is contained in:
parent
cc3f696c4b
commit
7cc18460b9
@ -249,23 +249,6 @@ struct task_ctx *try_lookup_task_ctx(const struct task_struct *p)
|
|||||||
(struct task_struct *)p, 0, 0);
|
(struct task_struct *)p, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Return a local task context from a generic task, failing if it doesn't
|
|
||||||
* exist.
|
|
||||||
*/
|
|
||||||
struct task_ctx *lookup_task_ctx(const struct task_struct *p)
|
|
||||||
{
|
|
||||||
struct task_ctx *tctx;
|
|
||||||
|
|
||||||
tctx = try_lookup_task_ctx(p);
|
|
||||||
if (!tctx) {
|
|
||||||
scx_bpf_error("Failed to lookup task ctx for %d (%s)",
|
|
||||||
p->pid, p->comm);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return tctx;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return true if interactive tasks classification via voluntary context
|
* Return true if interactive tasks classification via voluntary context
|
||||||
* switches is enabled, false otherwise.
|
* switches is enabled, false otherwise.
|
||||||
@ -469,8 +452,11 @@ static int dispatch_direct_cpu(struct task_struct *p, s32 cpu, u64 enq_flags)
|
|||||||
* Make sure we can dispatch the task to the target CPU according to
|
* Make sure we can dispatch the task to the target CPU according to
|
||||||
* its cpumask.
|
* its cpumask.
|
||||||
*/
|
*/
|
||||||
if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr))
|
if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr)) {
|
||||||
|
scx_bpf_error("%d %s can't be dispatched to CPU %d",
|
||||||
|
p->pid, p->comm, cpu);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
scx_bpf_dispatch_vtime(p, dsq_id, SCX_SLICE_DFL, deadline, enq_flags);
|
scx_bpf_dispatch_vtime(p, dsq_id, SCX_SLICE_DFL, deadline, enq_flags);
|
||||||
|
|
||||||
@ -521,23 +507,10 @@ static s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu)
|
|||||||
|
|
||||||
tctx = try_lookup_task_ctx(p);
|
tctx = try_lookup_task_ctx(p);
|
||||||
if (!tctx)
|
if (!tctx)
|
||||||
return prev_cpu;
|
return -ENOENT;
|
||||||
cctx = try_lookup_cpu_ctx(prev_cpu);
|
cctx = try_lookup_cpu_ctx(prev_cpu);
|
||||||
if (!cctx)
|
if (!cctx)
|
||||||
return prev_cpu;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* For tasks that can run only on a single CPU, we can simply verify if
|
|
||||||
* their only allowed CPU is idle.
|
|
||||||
*/
|
|
||||||
if (p->nr_cpus_allowed == 1) {
|
|
||||||
cpu = bpf_cpumask_first(p->cpus_ptr);
|
|
||||||
|
|
||||||
if (scx_bpf_test_and_clear_cpu_idle(cpu))
|
|
||||||
return cpu;
|
|
||||||
|
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
}
|
|
||||||
|
|
||||||
primary = primary_cpumask;
|
primary = primary_cpumask;
|
||||||
if (!primary)
|
if (!primary)
|
||||||
@ -554,6 +527,24 @@ static s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu)
|
|||||||
idle_smtmask = scx_bpf_get_idle_smtmask();
|
idle_smtmask = scx_bpf_get_idle_smtmask();
|
||||||
idle_cpumask = scx_bpf_get_idle_cpumask();
|
idle_cpumask = scx_bpf_get_idle_cpumask();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For tasks that can run only on a single CPU, we can simply verify if
|
||||||
|
* their only allowed CPU is still usable, online and idle.
|
||||||
|
*
|
||||||
|
* Moreover, if local_kthreads is enabled, always dispatch per-CPU
|
||||||
|
* kthreads directly to their target CPU, independently on its idle
|
||||||
|
* state.
|
||||||
|
*/
|
||||||
|
if (p->nr_cpus_allowed == 1) {
|
||||||
|
if (bpf_cpumask_test_cpu(prev_cpu, p->cpus_ptr) &&
|
||||||
|
bpf_cpumask_test_cpu(prev_cpu, online_cpumask) &&
|
||||||
|
(local_kthreads || scx_bpf_test_and_clear_cpu_idle(prev_cpu)))
|
||||||
|
cpu = prev_cpu;
|
||||||
|
else
|
||||||
|
cpu = -ENOENT;
|
||||||
|
goto out_put_cpumask;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Scheduling domains of the previously used CPU.
|
* Scheduling domains of the previously used CPU.
|
||||||
*/
|
*/
|
||||||
@ -798,19 +789,8 @@ void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
|
|||||||
handle_sync_wakeup(p);
|
handle_sync_wakeup(p);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Always dispatch per-CPU kthreads directly on their target CPU if
|
* If we couldn't find an idle CPU in ops.select_cpu(), give the task
|
||||||
* local_kthreads is enabled.
|
* another chance here to keep using the same CPU / cache / domain.
|
||||||
*/
|
|
||||||
if (local_kthreads && is_kthread(p) && p->nr_cpus_allowed == 1) {
|
|
||||||
if (!dispatch_direct_cpu(p, prev_cpu, enq_flags)) {
|
|
||||||
__sync_fetch_and_add(&nr_direct_dispatches, 1);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Second chance to find an idle CPU and try to contain the task on the
|
|
||||||
* local CPU / cache / domain.
|
|
||||||
*/
|
*/
|
||||||
cpu = pick_idle_cpu(p, prev_cpu);
|
cpu = pick_idle_cpu(p, prev_cpu);
|
||||||
if (cpu >= 0 && !dispatch_direct_cpu(p, cpu, 0)) {
|
if (cpu >= 0 && !dispatch_direct_cpu(p, cpu, 0)) {
|
||||||
@ -1053,7 +1033,7 @@ void BPF_STRUCT_OPS(bpfland_stopping, struct task_struct *p, bool runnable)
|
|||||||
|
|
||||||
__sync_fetch_and_sub(&nr_running, 1);
|
__sync_fetch_and_sub(&nr_running, 1);
|
||||||
|
|
||||||
tctx = lookup_task_ctx(p);
|
tctx = try_lookup_task_ctx(p);
|
||||||
if (!tctx)
|
if (!tctx)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1130,7 +1110,7 @@ void BPF_STRUCT_OPS(bpfland_enable, struct task_struct *p)
|
|||||||
p->scx.dsq_vtime = vtime_now;
|
p->scx.dsq_vtime = vtime_now;
|
||||||
|
|
||||||
/* Initialize voluntary context switch timestamp */
|
/* Initialize voluntary context switch timestamp */
|
||||||
tctx = lookup_task_ctx(p);
|
tctx = try_lookup_task_ctx(p);
|
||||||
if (!tctx)
|
if (!tctx)
|
||||||
return;
|
return;
|
||||||
tctx->nvcsw = p->nvcsw;
|
tctx->nvcsw = p->nvcsw;
|
||||||
|
Loading…
Reference in New Issue
Block a user