layered: Set layered cpumask in scheduler init call

In layered_init, we're currently setting all bits in every layers' cpumask, and then asynchronously updating the cpumasks at later time to reflect their actual values at runtime. Now that we're updating the layered code to initialize the cpumasks before we attach the scheduler, we can instead have the init path actually refresh and initialize the cpumasks directly. Signed-off-by: David Vernet <void@manifault.com>
2024-11-28 21:50:23 +00:00 · 2024-02-21 15:12:19 -06:00 · 2024-02-21 15:12:19 -06:00 · 56ff3437a2
commit 56ff3437a2
parent 1f834e7f94
1 changed files with 20 additions and 26 deletions
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@ -140,21 +140,34 @@ static struct cpumask *lookup_layer_cpumask(int idx)
 	}
 }
 struct layer *lookup_layer(int idx)
 {
 	if (idx < 0 || idx >= nr_layers) {
 		scx_bpf_error("invalid layer %d", idx);
 		return NULL;
 	}
 	return &layers[idx];
 }
 static void refresh_cpumasks(int idx)
 {
 	struct layer_cpumask_wrapper *cpumaskw;
 	struct layer *layer;
 	int cpu, total = 0;
 	struct layer *layer = lookup_layer(idx);
-	if (!__sync_val_compare_and_swap(&layers[idx].refresh_cpus, 1, 0))
+	if (!layer)
 		return;
 	if (!__sync_val_compare_and_swap(&layer->refresh_cpus, 1, 0))
 		return;
 	cpumaskw = bpf_map_lookup_elem(&layer_cpumasks, &idx);
 	bpf_rcu_read_lock();
 	bpf_for(cpu, 0, nr_possible_cpus) {
 		u8 *u8_ptr;
-		if ((u8_ptr = MEMBER_VPTR(layers, [idx].cpus[cpu / 8]))) {
+		if ((u8_ptr = &layer->cpus[cpu / 8])) {
 			/*
 			 * XXX - The following test should be outside the loop
 			 * but that makes the verifier think that
@ -162,6 +175,7 @@ static void refresh_cpumasks(int idx)
 			 */
 			barrier_var(cpumaskw);
 			if (!cpumaskw || !cpumaskw->cpumask) {
 				bpf_rcu_read_unlock();
 				scx_bpf_error("can't happen");
 				return;
 			}
@ -176,13 +190,7 @@ static void refresh_cpumasks(int idx)
 			scx_bpf_error("can't happen");
 		}
 	}
-
+	bpf_rcu_read_unlock();
 	// XXX - shouldn't be necessary
 	layer = MEMBER_VPTR(layers, [idx]);
 	if (!layer) {
 		scx_bpf_error("can't happen");
 		return;
 	}
 	layer->nr_cpus = total;
 	__sync_fetch_and_add(&layer->cpus_seq, 1);
@ -240,15 +248,6 @@ struct task_ctx *lookup_task_ctx(struct task_struct *p)
 	}
 }
 struct layer *lookup_layer(int idx)
 {
 	if (idx < 0 || idx >= nr_layers) {
 		scx_bpf_error("invalid layer %d", idx);
 		return NULL;
 	}
 	return &layers[idx];
 }
 /*
 * Because the layer membership is by the default hierarchy cgroups rather than
 * the CPU controller membership, we can't use ops.cgroup_move(). Let's iterate
@ -925,16 +924,11 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 		if (!cpumask)
 			return -ENOMEM;
 		/*
 		 * Start all layers with full cpumask so that everything runs
 		 * everywhere. This will soon be updated by refresh_cpumasks()
 		 * once the scheduler starts running.
 		 */
 		bpf_cpumask_setall(cpumask);
 		cpumask = bpf_kptr_xchg(&cpumaskw->cpumask, cpumask);
 		if (cpumask)
 			bpf_cpumask_release(cpumask);
 		refresh_cpumasks(i);
 	}
 	return 0;