diff --git a/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h b/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h index 1287bb2..5423a1e 100644 --- a/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h +++ b/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h @@ -61,9 +61,10 @@ enum consts_internal { * - system > numa node > llc domain > compute domain per core type (P or E) */ struct cpdom_ctx { + u64 last_consume_clk; /* when the associated DSQ was consumed */ u64 id; /* id of this compute domain (== dsq_id) */ u64 alt_id; /* id of the closest compute domain of alternative type (== dsq id) */ - u64 last_consume_clk; /* when the associated DSQ was consumed */ + u8 node_id; /* numa domain id */ u8 is_big; /* is it a big core or little core? */ u8 is_active; /* if this compute domain is active */ u8 nr_neighbors[LAVD_CPDOM_MAX_DIST]; /* number of neighbors per distance */ diff --git a/scheds/rust/scx_lavd/src/bpf/main.bpf.c b/scheds/rust/scx_lavd/src/bpf/main.bpf.c index 262f7f7..6b6aabe 100644 --- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c +++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c @@ -1144,7 +1144,13 @@ static bool consume_starving_task(s32 cpu, struct cpu_ctx *cpuc, u64 now) bool ret = false; int i; - bpf_for(i, 0, LAVD_CPDOM_MAX_NR) { + if (nr_cpdoms == 1) + return false; + + bpf_for(i, 0, nr_cpdoms) { + if (i >= LAVD_CPDOM_MAX_NR) + break; + dsq_id = (dsq_id + i) % LAVD_CPDOM_MAX_NR; if (dsq_id == cpuc->cpdom_id) @@ -1393,21 +1399,21 @@ void BPF_STRUCT_OPS(lavd_tick, struct task_struct *p_run) struct task_ctx *taskc_run; bool preempted = false; + cpuc_run = get_cpu_ctx(); + taskc_run = get_task_ctx(p_run); + if (!cpuc_run || !taskc_run) + goto update_cpuperf; + /* * If a task is eligible, don't consider its being preempted. */ - if (is_eligible(p_run)) + if (is_eligible(taskc_run)) goto update_cpuperf; /* * Try to yield the current CPU if there is a higher priority task in * the run queue. */ - cpuc_run = get_cpu_ctx(); - taskc_run = get_task_ctx(p_run); - if (!cpuc_run || !taskc_run) - goto update_cpuperf; - preempted = try_yield_current_cpu(p_run, cpuc_run, taskc_run); /* @@ -1819,13 +1825,19 @@ static s32 init_cpdoms(u64 now) WRITE_ONCE(cpdomc->last_consume_clk, now); /* - * Create an associated DSQ. + * Create an associated DSQ on its associated NUMA domain. */ - err = scx_bpf_create_dsq(cpdomc->id, -1); + err = scx_bpf_create_dsq(cpdomc->id, cpdomc->node_id); if (err) { - scx_bpf_error("Failed to create a DSQ for cpdom %llu", cpdomc->id); + scx_bpf_error("Failed to create a DSQ for cpdom %llu on NUMA node %d", + cpdomc->id, cpdomc->node_id); return err; } + + /* + * Update the number of compute domains. + */ + nr_cpdoms = i + 1; } return 0; @@ -2001,7 +2013,10 @@ static s32 init_per_cpu_ctx(u64 now) /* * Initialize compute domain id. */ - bpf_for(cpdom_id, 0, LAVD_CPDOM_MAX_NR) { + bpf_for(cpdom_id, 0, nr_cpdoms) { + if (cpdom_id >= LAVD_CPDOM_MAX_NR) + break; + cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpdom_id]); cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [cpdom_id]); if (!cpdomc || !cd_cpumask) { @@ -2124,3 +2139,4 @@ SCX_OPS_DEFINE(lavd_ops, .flags = SCX_OPS_KEEP_BUILTIN_IDLE, .timeout_ms = 30000U, .name = "lavd"); + diff --git a/scheds/rust/scx_lavd/src/bpf/power.bpf.c b/scheds/rust/scx_lavd/src/bpf/power.bpf.c index 3967f6f..bafeb4a 100644 --- a/scheds/rust/scx_lavd/src/bpf/power.bpf.c +++ b/scheds/rust/scx_lavd/src/bpf/power.bpf.c @@ -19,6 +19,7 @@ const volatile u16 cpu_order_performance[LAVD_CPU_ID_MAX]; /* CPU preference ord const volatile u16 cpu_order_powersave[LAVD_CPU_ID_MAX]; /* CPU preference order for powersave mode */ const volatile u16 __cpu_capacity_hint[LAVD_CPU_ID_MAX]; /* CPU capacity based on 1000 */ struct cpdom_ctx cpdom_ctxs[LAVD_CPDOM_MAX_NR]; /* contexts for compute domains */ +static int nr_cpdoms; /* number of compute domains */ /* diff --git a/scheds/rust/scx_lavd/src/main.rs b/scheds/rust/scx_lavd/src/main.rs index 82b14e3..cb5307e 100644 --- a/scheds/rust/scx_lavd/src/main.rs +++ b/scheds/rust/scx_lavd/src/main.rs @@ -298,12 +298,12 @@ impl FlatTopology { // Build a vector of cpu flat ids. let mut base_freq = 0; let mut avg_freq = 0; - for (node_id, node) in topo.nodes().iter().enumerate() { + for (node_pos, node) in topo.nodes().iter().enumerate() { for (llc_pos, (_llc_id, llc)) in node.llcs().iter().enumerate() { for (core_pos, (_core_id, core)) in llc.cores().iter().enumerate() { for (cpu_pos, (cpu_id, cpu)) in core.cpus().iter().enumerate() { let cpu_fid = CpuFlatId { - node_id, + node_id: node.id(), llc_pos, max_freq: cpu.max_freq(), core_pos, @@ -549,6 +549,7 @@ impl<'a> Scheduler<'a> { for (k, v) in topo.cpdom_map.iter() { skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].id = v.cpdom_id as u64; skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].alt_id = v.cpdom_alt_id.get() as u64; + skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].node_id = k.node_id as u8; skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].is_big = k.is_big as u8; skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].is_active = 1; for cpu_id in v.cpu_ids.iter() {