scx_layered: Refactor topo preemption

Refactor topology preemption logic so the non topology aware code is contianed to a separate function. This should make maintaining the non topology aware code path far easier. Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
2024-11-25 19:10:23 +00:00 · 2024-10-09 21:24:07 -04:00 · 2024-10-09 21:24:07 -04:00 · fe00e2c7be
commit fe00e2c7be
parent e7b1feed5a
1 changed files with 135 additions and 84 deletions
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@ -885,8 +885,9 @@ bool try_preempt_cpu(s32 cand, struct task_struct *p, struct cpu_ctx *cctx,
 }
 static __always_inline
-void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
+void try_preempt_no_topo(s32 task_cpu, struct task_struct *p,
-		 bool preempt_first, u64 enq_flags)
+			 struct task_ctx *tctx, bool preempt_first,
 			 u64 enq_flags)
 {
 	struct bpf_cpumask *attempted, *topo_cpus;
 	struct cache_ctx *cachec;
@ -896,7 +897,8 @@ void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
 	struct node_ctx *nodec;
 	u32 idx;
-	if (!(layer = lookup_layer(tctx->layer)) || !(cctx = lookup_cpu_ctx(-1)) ||
+	if (!(layer = lookup_layer(tctx->layer)) ||
 	    !(cctx = lookup_cpu_ctx(-1)) ||
 	    !(layer_cpumask = (lookup_layer_cpumask(layer->idx))))
 		return;
@ -925,66 +927,151 @@ void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
 			return;
 	}
-	if (!disable_topology) {
+	bpf_for(idx, 0, nr_possible_cpus) {
-		if (!(cachec = lookup_cache_ctx(cctx->cache_idx)) ||
+		s32 cand = (preempt_cursor + idx) % nr_possible_cpus;
-		    !(nodec = lookup_node_ctx(cctx->node_idx)))
+
 		if (try_preempt_cpu(cand, p, cctx, tctx, layer, false))
 			return;
 	}
-		attempted = bpf_cpumask_create();
+	lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx);
 		if (!attempted)
 			goto preempt_fail;
-		topo_cpus = bpf_cpumask_create();
+preempt_fail:
-		if (!topo_cpus) {
+	lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx);
 }
 static __always_inline
 void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
 		 bool preempt_first, u64 enq_flags)
 {
 	if (disable_topology)
 		return try_preempt_no_topo(task_cpu, p, tctx, preempt_first,
 					   enq_flags);
 	struct bpf_cpumask *attempted, *topo_cpus;
 	struct cache_ctx *cachec;
 	struct cpumask *layer_cpumask;
 	struct cpu_ctx *cctx;
 	struct layer *layer;
 	struct node_ctx *nodec;
 	u32 idx;
 	if (!(layer = lookup_layer(tctx->layer)) ||
 	    !(cctx = lookup_cpu_ctx(-1)) ||
 	    !(layer_cpumask = (lookup_layer_cpumask(layer->idx))))
 		return;
 	if (preempt_first) {
 		/*
 		 * @p prefers to preempt its previous CPU even when there are
 		 * other idle CPUs.
 		 */
 		if (try_preempt_cpu(task_cpu, p, cctx, tctx, layer, true))
 			return;
 		/* we skipped idle CPU picking in select_cpu. Do it here. */
 		if (pick_idle_cpu_and_kick(p, task_cpu, cctx, tctx, layer))
 			return;
 	} else {
 		/*
 		 * If we aren't in the wakeup path, layered_select_cpu() hasn't
 		 * run and thus we haven't looked for and kicked an idle CPU.
 		 * Let's do it now.
 		 */
 		if (!(enq_flags & SCX_ENQ_WAKEUP) &&
 		    pick_idle_cpu_and_kick(p, task_cpu, cctx, tctx, layer))
 			return;
 		if (!layer->preempt)
 			return;
 		if (try_preempt_cpu(task_cpu, p, cctx, tctx, layer, false))
 			return;
 	}
 	if (!(cachec = lookup_cache_ctx(cctx->cache_idx)) ||
 	    !(nodec = lookup_node_ctx(cctx->node_idx)))
 		return;
 	attempted = bpf_cpumask_create();
 	if (!attempted)
 		goto preempt_fail;
 	topo_cpus = bpf_cpumask_create();
 	if (!topo_cpus) {
 		bpf_cpumask_release(attempted);
 		goto preempt_fail;
 	}
 	if (!cachec->cpumask) {
 		bpf_cpumask_release(attempted);
 		bpf_cpumask_release(topo_cpus);
 		goto preempt_fail;
 	}
 	bpf_cpumask_copy(topo_cpus, cast_mask(cachec->cpumask));
 	bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
 	/*
 	 * First try preempting in the local LLC of available cpus in the layer mask
 	 */
 	bpf_for(idx, 0, cachec->nr_cpus) {
 		s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
 		trace("PREEMPT attempt on cpu %d from cpu %d",
 		      preempt_cpu, bpf_get_smp_processor_id());
 		if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
 			bpf_cpumask_release(attempted);
-			goto preempt_fail;
+			bpf_cpumask_release(topo_cpus);
 			return;
 		}
 		bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
 		bpf_cpumask_set_cpu(preempt_cpu, attempted);
 	}
-		if (!cachec->cpumask) {
+	/*
 	 * Next try node local LLCs in the layer cpumask
 	 */
 	if (!nodec->cpumask) {
 		bpf_cpumask_release(attempted);
 		bpf_cpumask_release(topo_cpus);
 		goto preempt_fail;
 	}
 	bpf_cpumask_copy(topo_cpus, cast_mask(nodec->cpumask));
 	bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus));
 	bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
 	bpf_for(idx, 0, nodec->nr_cpus) {
 		s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
 		if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
 			bpf_cpumask_release(attempted);
 			bpf_cpumask_release(topo_cpus);
 			lstat_inc(LSTAT_PREEMPT_XLLC, layer, cctx);
 			return;
 		}
 		bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
 		bpf_cpumask_set_cpu(preempt_cpu, attempted);
 		if (bpf_cpumask_empty(cast_mask(topo_cpus)))
 			break;
 	}
 	/*
 	 * Finally try across nodes
 	 */
 	if (xnuma_preemption) {
 		if (!all_cpumask) {
 			bpf_cpumask_release(attempted);
 			bpf_cpumask_release(topo_cpus);
 			goto preempt_fail;
 		}
-
+		bpf_cpumask_copy(topo_cpus, cast_mask(all_cpumask));
 		bpf_cpumask_copy(topo_cpus, cast_mask(cachec->cpumask));
 		bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
 		/*
 		 * First try preempting in the local LLC of available cpus in the layer mask
 		 */
 		bpf_for(idx, 0, cachec->nr_cpus) {
 			s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
 			trace("PREEMPT attempt on cpu %d from cpu %d",
 			      preempt_cpu, bpf_get_smp_processor_id());
 			if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
 				bpf_cpumask_release(attempted);
 				bpf_cpumask_release(topo_cpus);
 				return;
 			}
 			bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
 			bpf_cpumask_set_cpu(preempt_cpu, attempted);
 		}
 		/*
 		 * Next try node local LLCs in the layer cpumask
 		 */
 		if (!nodec->cpumask) {
 			bpf_cpumask_release(attempted);
 			bpf_cpumask_release(topo_cpus);
 			goto preempt_fail;
 		}
 		bpf_cpumask_copy(topo_cpus, cast_mask(nodec->cpumask));
 		bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus));
 		bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
-		bpf_for(idx, 0, nodec->nr_cpus) {
+		bpf_for(idx, 0, nr_possible_cpus) {
 			s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
 			if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
 				bpf_cpumask_release(attempted);
 				bpf_cpumask_release(topo_cpus);
-				lstat_inc(LSTAT_PREEMPT_XLLC, layer, cctx);
+				lstat_inc(LSTAT_PREEMPT_XNUMA, layer, cctx);
 				return;
 			}
 			bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
@ -992,45 +1079,9 @@ void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
 			if (bpf_cpumask_empty(cast_mask(topo_cpus)))
 				break;
 		}
 		/*
 		 * Finally try across nodes
 		 */
 		if (xnuma_preemption) {
 			if (!all_cpumask) {
 				bpf_cpumask_release(attempted);
 				bpf_cpumask_release(topo_cpus);
 				goto preempt_fail;
 			}
 			bpf_cpumask_copy(topo_cpus, cast_mask(all_cpumask));
 			bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus));
 			bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
 			bpf_for(idx, 0, nr_possible_cpus) {
 				s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
 				if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
 					bpf_cpumask_release(attempted);
 					bpf_cpumask_release(topo_cpus);
 					lstat_inc(LSTAT_PREEMPT_XNUMA, layer, cctx);
 					return;
 				}
 				bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
 				bpf_cpumask_set_cpu(preempt_cpu, attempted);
 				if (bpf_cpumask_empty(cast_mask(topo_cpus)))
 					break;
 			}
 		}
 		bpf_cpumask_release(attempted);
 		bpf_cpumask_release(topo_cpus);
 	} else {
 		bpf_for(idx, 0, nr_possible_cpus) {
 			s32 cand = (preempt_cursor + idx) % nr_possible_cpus;
 			if (try_preempt_cpu(cand, p, cctx, tctx, layer, false))
 				return;
 		}
 	}
 	bpf_cpumask_release(attempted);
 	bpf_cpumask_release(topo_cpus);
 	lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx);