Revert "layered: Set layered cpumask in scheduler init call"

This reverts commit 56ff3437a2. For some reason we seem to be non-deterministically failing to see the updated layer values in BPF if we initialize before attaching. Let's just undo this specific part so that we can unblock this being broken, and we can figure it out async. Signed-off-by: David Vernet <void@manifault.com>
2024-11-24 20:00:22 +00:00 · 2024-02-21 18:24:39 -06:00 · 2024-02-21 18:24:39 -06:00 · 68d317079a
commit 68d317079a
parent ebce76b0cb
1 changed files with 26 additions and 20 deletions
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@ -140,34 +140,21 @@ static struct cpumask *lookup_layer_cpumask(int idx)
 	}
 }

-struct layer *lookup_layer(int idx)
-{
-	if (idx < 0 || idx >= nr_layers) {
-		scx_bpf_error("invalid layer %d", idx);
-		return NULL;
-	}
-	return &layers[idx];
-}
-
 static void refresh_cpumasks(int idx)
 {
 	struct layer_cpumask_wrapper *cpumaskw;
+	struct layer *layer;
 	int cpu, total = 0;
-	struct layer *layer = lookup_layer(idx);

-	if (!layer)
-		return;
-
-	if (!__sync_val_compare_and_swap(&layer->refresh_cpus, 1, 0))
+	if (!__sync_val_compare_and_swap(&layers[idx].refresh_cpus, 1, 0))
 		return;

 	cpumaskw = bpf_map_lookup_elem(&layer_cpumasks, &idx);

-	bpf_rcu_read_lock();
 	bpf_for(cpu, 0, nr_possible_cpus) {
 		u8 *u8_ptr;

-		if ((u8_ptr = &layer->cpus[cpu / 8])) {
+		if ((u8_ptr = MEMBER_VPTR(layers, [idx].cpus[cpu / 8]))) {
 			/*
 			 * XXX - The following test should be outside the loop
 			 * but that makes the verifier think that
@ -175,7 +162,6 @@ static void refresh_cpumasks(int idx)
 			 */
 			barrier_var(cpumaskw);
 			if (!cpumaskw || !cpumaskw->cpumask) {
-				bpf_rcu_read_unlock();
 				scx_bpf_error("can't happen");
 				return;
 			}
@ -190,7 +176,13 @@ static void refresh_cpumasks(int idx)
 			scx_bpf_error("can't happen");
 		}
 	}
-	bpf_rcu_read_unlock();
+
+	// XXX - shouldn't be necessary
+	layer = MEMBER_VPTR(layers, [idx]);
+	if (!layer) {
+		scx_bpf_error("can't happen");
+		return;
+	}

 	layer->nr_cpus = total;
 	__sync_fetch_and_add(&layer->cpus_seq, 1);
@ -248,6 +240,15 @@ struct task_ctx *lookup_task_ctx(struct task_struct *p)
 	}
 }

+struct layer *lookup_layer(int idx)
+{
+	if (idx < 0 || idx >= nr_layers) {
+		scx_bpf_error("invalid layer %d", idx);
+		return NULL;
+	}
+	return &layers[idx];
+}
+
 /*
 * Because the layer membership is by the default hierarchy cgroups rather than
 * the CPU controller membership, we can't use ops.cgroup_move(). Let's iterate
@ -921,11 +922,16 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
 		if (!cpumask)
 			return -ENOMEM;

+		/*
+		 * Start all layers with full cpumask so that everything runs
+		 * everywhere. This will soon be updated by refresh_cpumasks()
+		 * once the scheduler starts running.
+		 */
+		bpf_cpumask_setall(cpumask);
+
 		cpumask = bpf_kptr_xchg(&cpumaskw->cpumask, cpumask);
 		if (cpumask)
 			bpf_cpumask_release(cpumask);
-
-		refresh_cpumasks(i);
 	}

 	return 0;