scx_layered: Refactor topo preemption

Refactor topology preemption logic so the non topology aware code is
contianed to a separate function. This should make maintaining the non
topology aware code path far easier.

Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
This commit is contained in:
Daniel Hodges 2024-10-09 21:24:07 -04:00
parent e7b1feed5a
commit fe00e2c7be
No known key found for this signature in database
GPG Key ID: D295F6D6F3E97B18

View File

@ -885,8 +885,9 @@ bool try_preempt_cpu(s32 cand, struct task_struct *p, struct cpu_ctx *cctx,
} }
static __always_inline static __always_inline
void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx, void try_preempt_no_topo(s32 task_cpu, struct task_struct *p,
bool preempt_first, u64 enq_flags) struct task_ctx *tctx, bool preempt_first,
u64 enq_flags)
{ {
struct bpf_cpumask *attempted, *topo_cpus; struct bpf_cpumask *attempted, *topo_cpus;
struct cache_ctx *cachec; struct cache_ctx *cachec;
@ -896,7 +897,8 @@ void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
struct node_ctx *nodec; struct node_ctx *nodec;
u32 idx; u32 idx;
if (!(layer = lookup_layer(tctx->layer)) || !(cctx = lookup_cpu_ctx(-1)) || if (!(layer = lookup_layer(tctx->layer)) ||
!(cctx = lookup_cpu_ctx(-1)) ||
!(layer_cpumask = (lookup_layer_cpumask(layer->idx)))) !(layer_cpumask = (lookup_layer_cpumask(layer->idx))))
return; return;
@ -925,66 +927,151 @@ void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
return; return;
} }
if (!disable_topology) { bpf_for(idx, 0, nr_possible_cpus) {
if (!(cachec = lookup_cache_ctx(cctx->cache_idx)) || s32 cand = (preempt_cursor + idx) % nr_possible_cpus;
!(nodec = lookup_node_ctx(cctx->node_idx)))
if (try_preempt_cpu(cand, p, cctx, tctx, layer, false))
return; return;
}
attempted = bpf_cpumask_create(); lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx);
if (!attempted)
goto preempt_fail;
topo_cpus = bpf_cpumask_create(); preempt_fail:
if (!topo_cpus) { lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx);
}
static __always_inline
void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
bool preempt_first, u64 enq_flags)
{
if (disable_topology)
return try_preempt_no_topo(task_cpu, p, tctx, preempt_first,
enq_flags);
struct bpf_cpumask *attempted, *topo_cpus;
struct cache_ctx *cachec;
struct cpumask *layer_cpumask;
struct cpu_ctx *cctx;
struct layer *layer;
struct node_ctx *nodec;
u32 idx;
if (!(layer = lookup_layer(tctx->layer)) ||
!(cctx = lookup_cpu_ctx(-1)) ||
!(layer_cpumask = (lookup_layer_cpumask(layer->idx))))
return;
if (preempt_first) {
/*
* @p prefers to preempt its previous CPU even when there are
* other idle CPUs.
*/
if (try_preempt_cpu(task_cpu, p, cctx, tctx, layer, true))
return;
/* we skipped idle CPU picking in select_cpu. Do it here. */
if (pick_idle_cpu_and_kick(p, task_cpu, cctx, tctx, layer))
return;
} else {
/*
* If we aren't in the wakeup path, layered_select_cpu() hasn't
* run and thus we haven't looked for and kicked an idle CPU.
* Let's do it now.
*/
if (!(enq_flags & SCX_ENQ_WAKEUP) &&
pick_idle_cpu_and_kick(p, task_cpu, cctx, tctx, layer))
return;
if (!layer->preempt)
return;
if (try_preempt_cpu(task_cpu, p, cctx, tctx, layer, false))
return;
}
if (!(cachec = lookup_cache_ctx(cctx->cache_idx)) ||
!(nodec = lookup_node_ctx(cctx->node_idx)))
return;
attempted = bpf_cpumask_create();
if (!attempted)
goto preempt_fail;
topo_cpus = bpf_cpumask_create();
if (!topo_cpus) {
bpf_cpumask_release(attempted);
goto preempt_fail;
}
if (!cachec->cpumask) {
bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus);
goto preempt_fail;
}
bpf_cpumask_copy(topo_cpus, cast_mask(cachec->cpumask));
bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
/*
* First try preempting in the local LLC of available cpus in the layer mask
*/
bpf_for(idx, 0, cachec->nr_cpus) {
s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
trace("PREEMPT attempt on cpu %d from cpu %d",
preempt_cpu, bpf_get_smp_processor_id());
if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
bpf_cpumask_release(attempted); bpf_cpumask_release(attempted);
goto preempt_fail; bpf_cpumask_release(topo_cpus);
return;
} }
bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
bpf_cpumask_set_cpu(preempt_cpu, attempted);
}
if (!cachec->cpumask) { /*
* Next try node local LLCs in the layer cpumask
*/
if (!nodec->cpumask) {
bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus);
goto preempt_fail;
}
bpf_cpumask_copy(topo_cpus, cast_mask(nodec->cpumask));
bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus));
bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
bpf_for(idx, 0, nodec->nr_cpus) {
s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus);
lstat_inc(LSTAT_PREEMPT_XLLC, layer, cctx);
return;
}
bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
bpf_cpumask_set_cpu(preempt_cpu, attempted);
if (bpf_cpumask_empty(cast_mask(topo_cpus)))
break;
}
/*
* Finally try across nodes
*/
if (xnuma_preemption) {
if (!all_cpumask) {
bpf_cpumask_release(attempted); bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus); bpf_cpumask_release(topo_cpus);
goto preempt_fail; goto preempt_fail;
} }
bpf_cpumask_copy(topo_cpus, cast_mask(all_cpumask));
bpf_cpumask_copy(topo_cpus, cast_mask(cachec->cpumask));
bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
/*
* First try preempting in the local LLC of available cpus in the layer mask
*/
bpf_for(idx, 0, cachec->nr_cpus) {
s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
trace("PREEMPT attempt on cpu %d from cpu %d",
preempt_cpu, bpf_get_smp_processor_id());
if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus);
return;
}
bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
bpf_cpumask_set_cpu(preempt_cpu, attempted);
}
/*
* Next try node local LLCs in the layer cpumask
*/
if (!nodec->cpumask) {
bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus);
goto preempt_fail;
}
bpf_cpumask_copy(topo_cpus, cast_mask(nodec->cpumask));
bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus)); bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus));
bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask); bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
bpf_for(idx, 0, nodec->nr_cpus) { bpf_for(idx, 0, nr_possible_cpus) {
s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus)); s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) { if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
bpf_cpumask_release(attempted); bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus); bpf_cpumask_release(topo_cpus);
lstat_inc(LSTAT_PREEMPT_XLLC, layer, cctx); lstat_inc(LSTAT_PREEMPT_XNUMA, layer, cctx);
return; return;
} }
bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus); bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
@ -992,45 +1079,9 @@ void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
if (bpf_cpumask_empty(cast_mask(topo_cpus))) if (bpf_cpumask_empty(cast_mask(topo_cpus)))
break; break;
} }
/*
* Finally try across nodes
*/
if (xnuma_preemption) {
if (!all_cpumask) {
bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus);
goto preempt_fail;
}
bpf_cpumask_copy(topo_cpus, cast_mask(all_cpumask));
bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus));
bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
bpf_for(idx, 0, nr_possible_cpus) {
s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus);
lstat_inc(LSTAT_PREEMPT_XNUMA, layer, cctx);
return;
}
bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
bpf_cpumask_set_cpu(preempt_cpu, attempted);
if (bpf_cpumask_empty(cast_mask(topo_cpus)))
break;
}
}
bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus);
} else {
bpf_for(idx, 0, nr_possible_cpus) {
s32 cand = (preempt_cursor + idx) % nr_possible_cpus;
if (try_preempt_cpu(cand, p, cctx, tctx, layer, false))
return;
}
} }
bpf_cpumask_release(attempted);
bpf_cpumask_release(topo_cpus);
lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx); lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx);