Merge pull request #200 from sched-ext/layered_delete

layered: Use TLS map instead of hash map
This commit is contained in:
Tejun Heo 2024-03-27 17:09:20 -10:00 committed by GitHub
commit 340938025f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 18 additions and 50 deletions

View File

@ -37,7 +37,6 @@ enum consts {
/* Statistics */
enum global_stat_idx {
GSTAT_TASK_CTX_FREE_FAILED,
GSTAT_EXCL_IDLE,
GSTAT_EXCL_WAKEUP,
NR_GSTATS,

View File

@ -238,31 +238,25 @@ struct task_ctx {
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, pid_t);
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, struct task_ctx);
__uint(max_entries, MAX_TASKS);
__uint(map_flags, 0);
} task_ctxs SEC(".maps");
static struct task_ctx *lookup_task_ctx_may_fail(struct task_struct *p)
{
s32 pid = p->pid;
return bpf_map_lookup_elem(&task_ctxs, &pid);
return bpf_task_storage_get(&task_ctxs, p, 0, 0);
}
static struct task_ctx *lookup_task_ctx(struct task_struct *p)
{
struct task_ctx *tctx;
s32 pid = p->pid;
struct task_ctx *tctx = lookup_task_ctx_may_fail(p);
if ((tctx = bpf_map_lookup_elem(&task_ctxs, &pid))) {
return tctx;
} else {
if (!tctx)
scx_bpf_error("task_ctx lookup failed");
return NULL;
}
return tctx;
}
static struct layer *lookup_layer(int idx)
@ -852,29 +846,21 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p,
* fail spuriously due to BPF recursion protection triggering
* unnecessarily.
*/
if ((ret = bpf_map_update_elem(&task_ctxs, &pid, &tctx_init, 0 /*BPF_NOEXIST*/))) {
scx_bpf_error("task_ctx allocation failure, ret=%d", ret);
return ret;
}
/*
* Read the entry from the map immediately so we can add the cpumask
* with bpf_kptr_xchg().
*/
if (!(tctx = lookup_task_ctx(p)))
return -ENOENT;
cpumask = bpf_cpumask_create();
if (!cpumask) {
bpf_map_delete_elem(&task_ctxs, &pid);
tctx = bpf_task_storage_get(&task_ctxs, p, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!tctx) {
scx_bpf_error("task_ctx allocation failure");
return -ENOMEM;
}
cpumask = bpf_cpumask_create();
if (!cpumask)
return -ENOMEM;
cpumask = bpf_kptr_xchg(&tctx->layered_cpumask, cpumask);
if (cpumask) {
/* Should never happen as we just inserted it above. */
bpf_cpumask_release(cpumask);
bpf_map_delete_elem(&task_ctxs, &pid);
return -EINVAL;
}
@ -901,16 +887,6 @@ void BPF_STRUCT_OPS(layered_exit_task, struct task_struct *p,
if (tctx->layer >= 0 && tctx->layer < nr_layers)
__sync_fetch_and_add(&layers[tctx->layer].nr_tasks, -1);
/*
* XXX - There's no reason delete should fail here but BPF's recursion
* protection can unnecessarily fail the operation. The fact that
* deletions aren't reliable means that we sometimes leak task_ctx and
* can't use BPF_NOEXIST on allocation in .prep_enable().
*/
ret = bpf_map_delete_elem(&task_ctxs, &pid);
if (ret)
gstat_inc(GSTAT_TASK_CTX_FREE_FAILED, cctx);
}
s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)

View File

@ -199,7 +199,7 @@ lazy_static::lazy_static! {
/// scx_layered will print out a set of statistics every monitoring
/// interval.
///
/// tot= 117909 local=86.20 open_idle= 0.21 affn_viol= 1.37 tctx_err=9 proc=6ms
/// tot= 117909 local=86.20 open_idle= 0.21 affn_viol= 1.37 proc=6ms
/// busy= 34.2 util= 1733.6 load= 21744.1 fallback_cpu= 1
/// batch : util/frac= 11.8/ 0.7 load/frac= 29.7: 0.1 tasks= 2597
/// tot= 3478 local=67.80 open_idle= 0.00 preempt= 0.00 affn_viol= 0.00
@ -1084,7 +1084,6 @@ struct OpenMetricsStats {
local: Gauge<f64, AtomicU64>,
open_idle: Gauge<f64, AtomicU64>,
affn_viol: Gauge<f64, AtomicU64>,
tctx_err: Gauge<i64, AtomicI64>,
excl_idle: Gauge<f64, AtomicU64>,
excl_wakeup: Gauge<f64, AtomicU64>,
proc_ms: Gauge<i64, AtomicI64>,
@ -1137,7 +1136,6 @@ impl OpenMetricsStats {
affn_viol,
"% which violated configured policies due to CPU affinity restrictions"
);
register!(tctx_err, "Failures to free task contexts");
register!(
excl_idle,
"Number of times a CPU skipped dispatching due to sibling running an exclusive task"
@ -1508,10 +1506,6 @@ impl<'a> Scheduler<'a> {
self.om_stats
.affn_viol
.set(lsum_pct(bpf_intf::layer_stat_idx_LSTAT_AFFN_VIOL));
self.om_stats.tctx_err.set(
stats.prev_bpf_stats.gstats
[bpf_intf::global_stat_idx_GSTAT_TASK_CTX_FREE_FAILED as usize] as i64,
);
self.om_stats.excl_idle.set(
stats.bpf_stats.gstats[bpf_intf::global_stat_idx_GSTAT_EXCL_IDLE as usize] as f64
/ total as f64,
@ -1527,12 +1521,11 @@ impl<'a> Scheduler<'a> {
if !self.om_format {
info!(
"tot={:7} local={} open_idle={} affn_viol={} tctx_err={} proc={:?}ms",
"tot={:7} local={} open_idle={} affn_viol={} proc={:?}ms",
self.om_stats.total.get(),
fmt_pct(self.om_stats.local.get()),
fmt_pct(self.om_stats.open_idle.get()),
fmt_pct(self.om_stats.affn_viol.get()),
self.om_stats.tctx_err.get(),
self.om_stats.proc_ms.get(),
);