diff --git a/scheds/rust/scx_layered/src/bpf/intf.h b/scheds/rust/scx_layered/src/bpf/intf.h index 5090c62..c6d8e19 100644 --- a/scheds/rust/scx_layered/src/bpf/intf.h +++ b/scheds/rust/scx_layered/src/bpf/intf.h @@ -37,7 +37,6 @@ enum consts { /* Statistics */ enum global_stat_idx { - GSTAT_TASK_CTX_FREE_FAILED, GSTAT_EXCL_IDLE, GSTAT_EXCL_WAKEUP, NR_GSTATS, diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c index 0b40228..dd06a5c 100644 --- a/scheds/rust/scx_layered/src/bpf/main.bpf.c +++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c @@ -238,31 +238,25 @@ struct task_ctx { }; struct { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, pid_t); + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); __type(value, struct task_ctx); - __uint(max_entries, MAX_TASKS); - __uint(map_flags, 0); } task_ctxs SEC(".maps"); static struct task_ctx *lookup_task_ctx_may_fail(struct task_struct *p) { - s32 pid = p->pid; - - return bpf_map_lookup_elem(&task_ctxs, &pid); + return bpf_task_storage_get(&task_ctxs, p, 0, 0); } static struct task_ctx *lookup_task_ctx(struct task_struct *p) { - struct task_ctx *tctx; - s32 pid = p->pid; + struct task_ctx *tctx = lookup_task_ctx_may_fail(p); - if ((tctx = bpf_map_lookup_elem(&task_ctxs, &pid))) { - return tctx; - } else { + if (!tctx) scx_bpf_error("task_ctx lookup failed"); - return NULL; - } + + return tctx; } static struct layer *lookup_layer(int idx) @@ -852,29 +846,21 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p, * fail spuriously due to BPF recursion protection triggering * unnecessarily. */ - if ((ret = bpf_map_update_elem(&task_ctxs, &pid, &tctx_init, 0 /*BPF_NOEXIST*/))) { - scx_bpf_error("task_ctx allocation failure, ret=%d", ret); - return ret; - } - - /* - * Read the entry from the map immediately so we can add the cpumask - * with bpf_kptr_xchg(). - */ - if (!(tctx = lookup_task_ctx(p))) - return -ENOENT; - - cpumask = bpf_cpumask_create(); - if (!cpumask) { - bpf_map_delete_elem(&task_ctxs, &pid); + tctx = bpf_task_storage_get(&task_ctxs, p, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!tctx) { + scx_bpf_error("task_ctx allocation failure"); return -ENOMEM; } + cpumask = bpf_cpumask_create(); + if (!cpumask) + return -ENOMEM; + cpumask = bpf_kptr_xchg(&tctx->layered_cpumask, cpumask); if (cpumask) { /* Should never happen as we just inserted it above. */ bpf_cpumask_release(cpumask); - bpf_map_delete_elem(&task_ctxs, &pid); return -EINVAL; } @@ -901,16 +887,6 @@ void BPF_STRUCT_OPS(layered_exit_task, struct task_struct *p, if (tctx->layer >= 0 && tctx->layer < nr_layers) __sync_fetch_and_add(&layers[tctx->layer].nr_tasks, -1); - - /* - * XXX - There's no reason delete should fail here but BPF's recursion - * protection can unnecessarily fail the operation. The fact that - * deletions aren't reliable means that we sometimes leak task_ctx and - * can't use BPF_NOEXIST on allocation in .prep_enable(). - */ - ret = bpf_map_delete_elem(&task_ctxs, &pid); - if (ret) - gstat_inc(GSTAT_TASK_CTX_FREE_FAILED, cctx); } s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init) diff --git a/scheds/rust/scx_layered/src/main.rs b/scheds/rust/scx_layered/src/main.rs index 36e1627..05c81d4 100644 --- a/scheds/rust/scx_layered/src/main.rs +++ b/scheds/rust/scx_layered/src/main.rs @@ -199,7 +199,7 @@ lazy_static::lazy_static! { /// scx_layered will print out a set of statistics every monitoring /// interval. /// -/// tot= 117909 local=86.20 open_idle= 0.21 affn_viol= 1.37 tctx_err=9 proc=6ms +/// tot= 117909 local=86.20 open_idle= 0.21 affn_viol= 1.37 proc=6ms /// busy= 34.2 util= 1733.6 load= 21744.1 fallback_cpu= 1 /// batch : util/frac= 11.8/ 0.7 load/frac= 29.7: 0.1 tasks= 2597 /// tot= 3478 local=67.80 open_idle= 0.00 preempt= 0.00 affn_viol= 0.00 @@ -1084,7 +1084,6 @@ struct OpenMetricsStats { local: Gauge, open_idle: Gauge, affn_viol: Gauge, - tctx_err: Gauge, excl_idle: Gauge, excl_wakeup: Gauge, proc_ms: Gauge, @@ -1137,7 +1136,6 @@ impl OpenMetricsStats { affn_viol, "% which violated configured policies due to CPU affinity restrictions" ); - register!(tctx_err, "Failures to free task contexts"); register!( excl_idle, "Number of times a CPU skipped dispatching due to sibling running an exclusive task" @@ -1508,10 +1506,6 @@ impl<'a> Scheduler<'a> { self.om_stats .affn_viol .set(lsum_pct(bpf_intf::layer_stat_idx_LSTAT_AFFN_VIOL)); - self.om_stats.tctx_err.set( - stats.prev_bpf_stats.gstats - [bpf_intf::global_stat_idx_GSTAT_TASK_CTX_FREE_FAILED as usize] as i64, - ); self.om_stats.excl_idle.set( stats.bpf_stats.gstats[bpf_intf::global_stat_idx_GSTAT_EXCL_IDLE as usize] as f64 / total as f64, @@ -1527,12 +1521,11 @@ impl<'a> Scheduler<'a> { if !self.om_format { info!( - "tot={:7} local={} open_idle={} affn_viol={} tctx_err={} proc={:?}ms", + "tot={:7} local={} open_idle={} affn_viol={} proc={:?}ms", self.om_stats.total.get(), fmt_pct(self.om_stats.local.get()), fmt_pct(self.om_stats.open_idle.get()), fmt_pct(self.om_stats.affn_viol.get()), - self.om_stats.tctx_err.get(), self.om_stats.proc_ms.get(), );