scx_flatcg: Keep cgroup rb nodes stashed

The flatcg scheduler uses a rb_node type - struct cgv_node - to keep
track of vtime. On cgroup init, a cgv_node is created and stashed in a
hashmap - cgv_node_stash - for later use. In cgroup_enqueued and
try_pick_next_cgroup, the node is inserted into the rbtree, which
required removing it from the stash before this patch's changes.

This patch makes cgv_node refcounted, which allows keeping it in the
stash for the entirety of the cgroup's lifetime. Unnecessary
bpf_kptr_xchg's and other boilerplate can be removed as a result.

Note that in addition to bpf_refcount patches, which have been upstream
for quite some time, this change depends on a more recent series [0].

  [0]: https://lore.kernel.org/bpf/20231107085639.3016113-1-davemarchevsky@fb.com/

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
This commit is contained in:
Dave Marchevsky 2023-12-14 11:25:31 -08:00
parent b7c06b9ed9
commit 3b7f33ea1b
2 changed files with 11 additions and 18 deletions

View File

@ -100,6 +100,7 @@ struct cgv_node {
struct bpf_rb_node rb_node;
__u64 cvtime;
__u64 cgid;
struct bpf_refcount refcount;
};
private(CGV_TREE) struct bpf_spin_lock cgv_tree_lock;
@ -289,14 +290,17 @@ static void cgrp_enqueued(struct cgroup *cgrp, struct fcg_cgrp_ctx *cgc)
}
stash = bpf_map_lookup_elem(&cgv_node_stash, &cgid);
if (!stash) {
if (!stash || !stash->node) {
scx_bpf_error("cgv_node lookup failed for cgid %llu", cgid);
return;
}
/* NULL if the node is already on the rbtree */
cgv_node = bpf_kptr_xchg(&stash->node, NULL);
cgv_node = bpf_refcount_acquire(stash->node);
if (!cgv_node) {
/*
* Node never leaves cgv_node_stash, this should only happen if
* fcg_cgroup_exit deletes the stashed node
*/
stat_inc(FCG_STAT_ENQ_RACE);
return;
}
@ -609,7 +613,6 @@ void BPF_STRUCT_OPS(fcg_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
static bool try_pick_next_cgroup(u64 *cgidp)
{
struct bpf_rb_node *rb_node;
struct cgv_node_stash *stash;
struct cgv_node *cgv_node;
struct fcg_cgrp_ctx *cgc;
struct cgroup *cgrp;
@ -693,12 +696,6 @@ static bool try_pick_next_cgroup(u64 *cgidp)
return true;
out_stash:
stash = bpf_map_lookup_elem(&cgv_node_stash, &cgid);
if (!stash) {
stat_inc(FCG_STAT_PNC_GONE);
goto out_free;
}
/*
* Paired with cmpxchg in cgrp_enqueued(). If they see the following
* transition, they'll enqueue the cgroup. If they are earlier, we'll
@ -711,15 +708,8 @@ out_stash:
bpf_rbtree_add(&cgv_tree, &cgv_node->rb_node, cgv_node_less);
bpf_spin_unlock(&cgv_tree_lock);
stat_inc(FCG_STAT_PNC_RACE);
} else {
cgv_node = bpf_kptr_xchg(&stash->node, cgv_node);
if (cgv_node) {
scx_bpf_error("unexpected !NULL cgv_node stash");
goto out_free;
}
}
return false;
}
out_free:
bpf_obj_drop(cgv_node);

View File

@ -200,6 +200,9 @@ int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
extern void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym;
#define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL)
/* task */
struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;