scx_layered: Use PROG_RUN to update cpumasks

Use bpf PROG_RUN from userspace for updating cpumask for rather than relying on scheduler ticks. This should be a lower overhead approach in that an extra bpf program does not need to be called on every CPU during tick. Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
2024-11-23 03:11:49 +00:00 · 2024-11-20 13:22:35 -08:00 · 2024-11-20 13:22:35 -08:00 · a86e62aa21
commit a86e62aa21
parent dabb4aff41
2 changed files with 18 additions and 16 deletions
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@ -322,8 +322,10 @@ static bool refresh_cpumasks(int idx)
 	if (!__sync_val_compare_and_swap(&layer->refresh_cpus, 1, 0))
 		return false;

+	bpf_rcu_read_lock();
 	if (!(cpumaskw = bpf_map_lookup_elem(&layer_cpumasks, &idx)) ||
 	    !(layer_cpumask = cpumaskw->cpumask)) {
+		bpf_rcu_read_unlock();
 		scx_bpf_error("can't happen");
 		return false;
 	}
@ -332,6 +334,7 @@ static bool refresh_cpumasks(int idx)
 		u8 *u8_ptr;

 		if (!(cctx = lookup_cpu_ctx(cpu))) {
+			bpf_rcu_read_unlock();
 			scx_bpf_error("unknown cpu");
 			return false;
 		}
@ -351,6 +354,7 @@ static bool refresh_cpumasks(int idx)

 	layer->nr_cpus = total;
 	__sync_fetch_and_add(&layer->cpus_seq, 1);
+	bpf_rcu_read_unlock();
 	trace("LAYER[%d] now has %d cpus, seq=%llu", idx, layer->nr_cpus, layer->cpus_seq);
 	return total > 0;
 }
@ -359,14 +363,17 @@ static bool refresh_cpumasks(int idx)
 // defined after some helpers, but before it's helpers are used.
 #include "cost.bpf.c"

-SEC("fentry")
-int BPF_PROG(sched_tick_fentry)
+/*
+ * Refreshes all layer cpumasks, this is called via BPF_PROG_RUN from userspace.
+ */
+SEC("syscall")
+int BPF_PROG(refresh_layer_cpumasks)
 {
 	int idx;

-	if (bpf_get_smp_processor_id() == 0)
-		bpf_for(idx, 0, nr_layers)
-			refresh_cpumasks(idx);
+	bpf_for(idx, 0, nr_layers)
+		refresh_cpumasks(idx);
+
 	return 0;
 }

--- a/scheds/rust/scx_layered/src/main.rs
+++ b/scheds/rust/scx_layered/src/main.rs
@ -34,6 +34,7 @@ use libbpf_rs::skel::Skel;
 use libbpf_rs::skel::SkelBuilder;
 use libbpf_rs::MapCore as _;
 use libbpf_rs::OpenObject;
+use libbpf_rs::ProgramInput;
 use log::debug;
 use log::info;
 use log::trace;
@ -1461,17 +1462,6 @@ impl<'a> Scheduler<'a> {
        skel.maps.rodata_data.slice_ns = scx_enums.SCX_SLICE_DFL;
        skel.maps.rodata_data.max_exec_ns = 20 * scx_enums.SCX_SLICE_DFL;

-        // scheduler_tick() got renamed to sched_tick() during v6.10-rc.
-        let sched_tick_name = match compat::ksym_exists("sched_tick")? {
-            true => "sched_tick",
-            false => "scheduler_tick",
-        };
-
-        skel.progs
-            .sched_tick_fentry
-            .set_attach_target(0, Some(sched_tick_name.into()))
-            .context("Failed to set attach target for sched_tick_fentry()")?;
-
        // Initialize skel according to @opts.
        skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len;

@ -1715,6 +1705,11 @@ impl<'a> Scheduler<'a> {
                    self.nr_layer_cpus_ranges[lidx].1.max(layer.nr_cpus),
                );
            }
+            let input = ProgramInput {
+                ..Default::default()
+            };
+            let prog = &mut self.skel.progs.refresh_layer_cpumasks;
+            let _ = prog.test_run(input);
        }

        let _ = self.update_netdev_cpumasks();