Merge pull request #872 from multics69/lavd-opt-dispatch-v2

scx_lavd: optimize ops.dispatch() and DSQ placement
2024-11-28 05:30:24 +00:00 · 2024-11-01 19:52:58 +09:00 · 2024-11-01 19:52:58 +09:00 · cf6aa5d63b
commit cf6aa5d63b
parent 4c3f1fd61c 18a80977bb
4 changed files with 33 additions and 14 deletions
--- a/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h
+++ b/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h
@ -61,9 +61,10 @@ enum consts_internal  {
 * - system > numa node > llc domain > compute domain per core type (P or E)
 */
 struct cpdom_ctx {
+	u64	last_consume_clk;		    /* when the associated DSQ was consumed */
 	u64	id;				    /* id of this compute domain (== dsq_id) */
 	u64	alt_id;				    /* id of the closest compute domain of alternative type (== dsq id) */
-	u64	last_consume_clk;		    /* when the associated DSQ was consumed */
+	u8	node_id;			    /* numa domain id */
 	u8	is_big;				    /* is it a big core or little core? */
 	u8	is_active;			    /* if this compute domain is active */
 	u8	nr_neighbors[LAVD_CPDOM_MAX_DIST];  /* number of neighbors per distance */
--- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
@ -1144,7 +1144,13 @@ static bool consume_starving_task(s32 cpu, struct cpu_ctx *cpuc, u64 now)
 	bool ret = false;
 	int i;

-	bpf_for(i, 0, LAVD_CPDOM_MAX_NR) {
+	if (nr_cpdoms == 1)
+		return false;
+
+	bpf_for(i, 0, nr_cpdoms) {
+		if (i >= LAVD_CPDOM_MAX_NR)
+			break;
+
 		dsq_id = (dsq_id + i) % LAVD_CPDOM_MAX_NR;

 		if (dsq_id == cpuc->cpdom_id)
@ -1393,21 +1399,21 @@ void BPF_STRUCT_OPS(lavd_tick, struct task_struct *p_run)
 	struct task_ctx *taskc_run;
 	bool preempted = false;

+	cpuc_run = get_cpu_ctx();
+	taskc_run = get_task_ctx(p_run);
+	if (!cpuc_run || !taskc_run)
+		goto update_cpuperf;
+
 	/*
 	 * If a task is eligible, don't consider its being preempted.
 	 */
-	if (is_eligible(p_run))
+	if (is_eligible(taskc_run))
 		goto update_cpuperf;

 	/*
 	 * Try to yield the current CPU if there is a higher priority task in
 	 * the run queue.
 	 */
-	cpuc_run = get_cpu_ctx();
-	taskc_run = get_task_ctx(p_run);
-	if (!cpuc_run || !taskc_run)
-		goto update_cpuperf;
-
 	preempted = try_yield_current_cpu(p_run, cpuc_run, taskc_run);

 	/*
@ -1819,13 +1825,19 @@ static s32 init_cpdoms(u64 now)
 		WRITE_ONCE(cpdomc->last_consume_clk, now);

 		/*
-		 * Create an associated DSQ.
+		 * Create an associated DSQ on its associated NUMA domain.
 		 */
-		err = scx_bpf_create_dsq(cpdomc->id, -1);
+		err = scx_bpf_create_dsq(cpdomc->id, cpdomc->node_id);
 		if (err) {
-			scx_bpf_error("Failed to create a DSQ for cpdom %llu", cpdomc->id);
+			scx_bpf_error("Failed to create a DSQ for cpdom %llu on NUMA node %d",
+				      cpdomc->id, cpdomc->node_id);
 			return err;
 		}
+
+		/*
+		 * Update the number of compute domains.
+		 */
+		nr_cpdoms = i + 1;
 	}

 	return 0;
@ -2001,7 +2013,10 @@ static s32 init_per_cpu_ctx(u64 now)
 	/*
 	 * Initialize compute domain id.
 	 */
-	bpf_for(cpdom_id, 0, LAVD_CPDOM_MAX_NR) {
+	bpf_for(cpdom_id, 0, nr_cpdoms) {
+		if (cpdom_id >= LAVD_CPDOM_MAX_NR)
+			break;
+
 		cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpdom_id]);
 		cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [cpdom_id]);
 		if (!cpdomc || !cd_cpumask) {
@ -2124,3 +2139,4 @@ SCX_OPS_DEFINE(lavd_ops,
 	       .flags			= SCX_OPS_KEEP_BUILTIN_IDLE,
 	       .timeout_ms		= 30000U,
 	       .name			= "lavd");
+
--- a/scheds/rust/scx_lavd/src/bpf/power.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/power.bpf.c
@ -19,6 +19,7 @@ const volatile u16	cpu_order_performance[LAVD_CPU_ID_MAX]; /* CPU preference ord
 const volatile u16	cpu_order_powersave[LAVD_CPU_ID_MAX]; /* CPU preference order for powersave mode */
 const volatile u16	__cpu_capacity_hint[LAVD_CPU_ID_MAX]; /* CPU capacity based on 1000 */
 struct cpdom_ctx	cpdom_ctxs[LAVD_CPDOM_MAX_NR]; /* contexts for compute domains */
+static int		nr_cpdoms; /* number of compute domains */


 /*
--- a/scheds/rust/scx_lavd/src/main.rs
+++ b/scheds/rust/scx_lavd/src/main.rs
@ -298,12 +298,12 @@ impl FlatTopology {
        // Build a vector of cpu flat ids.
        let mut base_freq = 0;
        let mut avg_freq = 0;
-        for (node_id, node) in topo.nodes().iter().enumerate() {
+        for (node_pos, node) in topo.nodes().iter().enumerate() {
            for (llc_pos, (_llc_id, llc)) in node.llcs().iter().enumerate() {
                for (core_pos, (_core_id, core)) in llc.cores().iter().enumerate() {
                    for (cpu_pos, (cpu_id, cpu)) in core.cpus().iter().enumerate() {
                        let cpu_fid = CpuFlatId {
-                            node_id,
+                            node_id: node.id(),
                            llc_pos,
                            max_freq: cpu.max_freq(),
                            core_pos,
@ -549,6 +549,7 @@ impl<'a> Scheduler<'a> {
        for (k, v) in topo.cpdom_map.iter() {
            skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].id = v.cpdom_id as u64;
            skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].alt_id = v.cpdom_alt_id.get() as u64;
+            skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].node_id = k.node_id as u8;
            skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].is_big = k.is_big as u8;
            skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].is_active = 1;
            for cpu_id in v.cpu_ids.iter() {