scx_layered: Refactor topo preemption

Refactor topology preemption logic so the non topology aware code is contianed to a separate function. This should make maintaining the non topology aware code path far easier. Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
2024-11-24 11:50:23 +00:00 · 2024-10-09 21:24:07 -04:00 · 2024-10-09 21:24:07 -04:00 · fe00e2c7be
commit fe00e2c7be
parent e7b1feed5a
1 changed files with 135 additions and 84 deletions
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@ -885,8 +885,9 @@ bool try_preempt_cpu(s32 cand, struct task_struct *p, struct cpu_ctx *cctx,
 }

 static __always_inline
-void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
-		 bool preempt_first, u64 enq_flags)
+void try_preempt_no_topo(s32 task_cpu, struct task_struct *p,
+			 struct task_ctx *tctx, bool preempt_first,
+			 u64 enq_flags)
 {
 	struct bpf_cpumask *attempted, *topo_cpus;
 	struct cache_ctx *cachec;
@ -896,7 +897,8 @@ void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
 	struct node_ctx *nodec;
 	u32 idx;

-	if (!(layer = lookup_layer(tctx->layer)) || !(cctx = lookup_cpu_ctx(-1)) ||
+	if (!(layer = lookup_layer(tctx->layer)) ||
+	    !(cctx = lookup_cpu_ctx(-1)) ||
 	    !(layer_cpumask = (lookup_layer_cpumask(layer->idx))))
 		return;

@ -925,66 +927,151 @@ void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
 			return;
 	}

-	if (!disable_topology) {
-		if (!(cachec = lookup_cache_ctx(cctx->cache_idx)) ||
-		    !(nodec = lookup_node_ctx(cctx->node_idx)))
+	bpf_for(idx, 0, nr_possible_cpus) {
+		s32 cand = (preempt_cursor + idx) % nr_possible_cpus;
+
+		if (try_preempt_cpu(cand, p, cctx, tctx, layer, false))
 			return;
+	}

-		attempted = bpf_cpumask_create();
-		if (!attempted)
-			goto preempt_fail;
+	lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx);

-		topo_cpus = bpf_cpumask_create();
-		if (!topo_cpus) {
+preempt_fail:
+	lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx);
+}
+
+static __always_inline
+void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
+		 bool preempt_first, u64 enq_flags)
+{
+	if (disable_topology)
+		return try_preempt_no_topo(task_cpu, p, tctx, preempt_first,
+					   enq_flags);
+
+	struct bpf_cpumask *attempted, *topo_cpus;
+	struct cache_ctx *cachec;
+	struct cpumask *layer_cpumask;
+	struct cpu_ctx *cctx;
+	struct layer *layer;
+	struct node_ctx *nodec;
+	u32 idx;
+
+	if (!(layer = lookup_layer(tctx->layer)) ||
+	    !(cctx = lookup_cpu_ctx(-1)) ||
+	    !(layer_cpumask = (lookup_layer_cpumask(layer->idx))))
+		return;
+
+	if (preempt_first) {
+		/*
+		 * @p prefers to preempt its previous CPU even when there are
+		 * other idle CPUs.
+		 */
+		if (try_preempt_cpu(task_cpu, p, cctx, tctx, layer, true))
+			return;
+		/* we skipped idle CPU picking in select_cpu. Do it here. */
+		if (pick_idle_cpu_and_kick(p, task_cpu, cctx, tctx, layer))
+			return;
+	} else {
+		/*
+		 * If we aren't in the wakeup path, layered_select_cpu() hasn't
+		 * run and thus we haven't looked for and kicked an idle CPU.
+		 * Let's do it now.
+		 */
+		if (!(enq_flags & SCX_ENQ_WAKEUP) &&
+		    pick_idle_cpu_and_kick(p, task_cpu, cctx, tctx, layer))
+			return;
+		if (!layer->preempt)
+			return;
+		if (try_preempt_cpu(task_cpu, p, cctx, tctx, layer, false))
+			return;
+	}
+
+	if (!(cachec = lookup_cache_ctx(cctx->cache_idx)) ||
+	    !(nodec = lookup_node_ctx(cctx->node_idx)))
+		return;
+
+	attempted = bpf_cpumask_create();
+	if (!attempted)
+		goto preempt_fail;
+
+	topo_cpus = bpf_cpumask_create();
+	if (!topo_cpus) {
+		bpf_cpumask_release(attempted);
+		goto preempt_fail;
+	}
+
+	if (!cachec->cpumask) {
+		bpf_cpumask_release(attempted);
+		bpf_cpumask_release(topo_cpus);
+		goto preempt_fail;
+	}
+
+	bpf_cpumask_copy(topo_cpus, cast_mask(cachec->cpumask));
+	bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
+
+	/*
+	 * First try preempting in the local LLC of available cpus in the layer mask
+	 */
+	bpf_for(idx, 0, cachec->nr_cpus) {
+		s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
+		trace("PREEMPT attempt on cpu %d from cpu %d",
+		      preempt_cpu, bpf_get_smp_processor_id());
+
+		if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
 			bpf_cpumask_release(attempted);
-			goto preempt_fail;
+			bpf_cpumask_release(topo_cpus);
+			return;
 		}
+		bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
+		bpf_cpumask_set_cpu(preempt_cpu, attempted);
+	}

-		if (!cachec->cpumask) {
+	/*
+	 * Next try node local LLCs in the layer cpumask
+	 */
+	if (!nodec->cpumask) {
+		bpf_cpumask_release(attempted);
+		bpf_cpumask_release(topo_cpus);
+		goto preempt_fail;
+	}
+
+	bpf_cpumask_copy(topo_cpus, cast_mask(nodec->cpumask));
+	bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus));
+	bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
+
+	bpf_for(idx, 0, nodec->nr_cpus) {
+		s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
+		if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
+			bpf_cpumask_release(attempted);
+			bpf_cpumask_release(topo_cpus);
+			lstat_inc(LSTAT_PREEMPT_XLLC, layer, cctx);
+			return;
+		}
+		bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
+		bpf_cpumask_set_cpu(preempt_cpu, attempted);
+		if (bpf_cpumask_empty(cast_mask(topo_cpus)))
+			break;
+	}
+
+	/*
+	 * Finally try across nodes
+	 */
+	if (xnuma_preemption) {
+		if (!all_cpumask) {
 			bpf_cpumask_release(attempted);
 			bpf_cpumask_release(topo_cpus);
 			goto preempt_fail;
 		}
-
-		bpf_cpumask_copy(topo_cpus, cast_mask(cachec->cpumask));
-		bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
-
-		/*
-		 * First try preempting in the local LLC of available cpus in the layer mask
-		 */
-		bpf_for(idx, 0, cachec->nr_cpus) {
-			s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
-			trace("PREEMPT attempt on cpu %d from cpu %d",
-			      preempt_cpu, bpf_get_smp_processor_id());
-
-			if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
-				bpf_cpumask_release(attempted);
-				bpf_cpumask_release(topo_cpus);
-				return;
-			}
-			bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
-			bpf_cpumask_set_cpu(preempt_cpu, attempted);
-		}
-
-		/*
-		 * Next try node local LLCs in the layer cpumask
-		 */
-		if (!nodec->cpumask) {
-			bpf_cpumask_release(attempted);
-			bpf_cpumask_release(topo_cpus);
-			goto preempt_fail;
-		}
-
-		bpf_cpumask_copy(topo_cpus, cast_mask(nodec->cpumask));
+		bpf_cpumask_copy(topo_cpus, cast_mask(all_cpumask));
 		bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus));
 		bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);

-		bpf_for(idx, 0, nodec->nr_cpus) {
+		bpf_for(idx, 0, nr_possible_cpus) {
 			s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
 			if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
 				bpf_cpumask_release(attempted);
 				bpf_cpumask_release(topo_cpus);
-				lstat_inc(LSTAT_PREEMPT_XLLC, layer, cctx);
+				lstat_inc(LSTAT_PREEMPT_XNUMA, layer, cctx);
 				return;
 			}
 			bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
@ -992,45 +1079,9 @@ void try_preempt(s32 task_cpu, struct task_struct *p, struct task_ctx *tctx,
 			if (bpf_cpumask_empty(cast_mask(topo_cpus)))
 				break;
 		}
-
-		/*
-		 * Finally try across nodes
-		 */
-		if (xnuma_preemption) {
-			if (!all_cpumask) {
-				bpf_cpumask_release(attempted);
-				bpf_cpumask_release(topo_cpus);
-				goto preempt_fail;
-			}
-			bpf_cpumask_copy(topo_cpus, cast_mask(all_cpumask));
-			bpf_cpumask_xor(topo_cpus, cast_mask(attempted), cast_mask(topo_cpus));
-			bpf_cpumask_and(topo_cpus, cast_mask(topo_cpus), layer_cpumask);
-
-			bpf_for(idx, 0, nr_possible_cpus) {
-				s32 preempt_cpu = bpf_cpumask_any_distribute(cast_mask(topo_cpus));
-				if (try_preempt_cpu(preempt_cpu, p, cctx, tctx, layer, false)) {
-					bpf_cpumask_release(attempted);
-					bpf_cpumask_release(topo_cpus);
-					lstat_inc(LSTAT_PREEMPT_XNUMA, layer, cctx);
-					return;
-				}
-				bpf_cpumask_clear_cpu(preempt_cpu, topo_cpus);
-				bpf_cpumask_set_cpu(preempt_cpu, attempted);
-				if (bpf_cpumask_empty(cast_mask(topo_cpus)))
-					break;
-			}
-		}
-		bpf_cpumask_release(attempted);
-		bpf_cpumask_release(topo_cpus);
-	} else {
-
-		bpf_for(idx, 0, nr_possible_cpus) {
-			s32 cand = (preempt_cursor + idx) % nr_possible_cpus;
-
-			if (try_preempt_cpu(cand, p, cctx, tctx, layer, false))
-				return;
-		}
 	}
+	bpf_cpumask_release(attempted);
+	bpf_cpumask_release(topo_cpus);

 	lstat_inc(LSTAT_PREEMPT_FAIL, layer, cctx);