mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-28 21:50:23 +00:00
Merge pull request #82 from sched-ext/htejun
scx_flatcg: Fix fallout from direct dispatch API update
This commit is contained in:
commit
b32d73ae4e
@ -307,6 +307,17 @@ static void cgrp_enqueued(struct cgroup *cgrp, struct fcg_cgrp_ctx *cgc)
|
||||
bpf_spin_unlock(&cgv_tree_lock);
|
||||
}
|
||||
|
||||
static void set_bypassed_at(struct task_struct *p, struct fcg_task_ctx *taskc)
|
||||
{
|
||||
/*
|
||||
* Tell fcg_stopping() that this bypassed the regular scheduling path
|
||||
* and should be force charged to the cgroup. 0 is used to indicate that
|
||||
* the task isn't bypassing, so if the current runtime is 0, go back by
|
||||
* one nanosecond.
|
||||
*/
|
||||
taskc->bypassed_at = p->se.sum_exec_runtime ?: (u64)-1;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS(fcg_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
|
||||
{
|
||||
struct fcg_task_ctx *taskc;
|
||||
@ -324,35 +335,12 @@ s32 BPF_STRUCT_OPS(fcg_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake
|
||||
/*
|
||||
* If select_cpu_dfl() is recommending local enqueue, the target CPU is
|
||||
* idle. Follow it and charge the cgroup later in fcg_stopping() after
|
||||
* the fact. Use the same mechanism to deal with tasks with custom
|
||||
* affinities so that we don't have to worry about per-cgroup dq's
|
||||
* containing tasks that can't be executed from some CPUs.
|
||||
* the fact.
|
||||
*/
|
||||
if (is_idle || p->nr_cpus_allowed != nr_cpus) {
|
||||
/*
|
||||
* Tell fcg_stopping() that this bypassed the regular scheduling
|
||||
* path and should be force charged to the cgroup. 0 is used to
|
||||
* indicate that the task isn't bypassing, so if the current
|
||||
* runtime is 0, go back by one nanosecond.
|
||||
*/
|
||||
taskc->bypassed_at = p->se.sum_exec_runtime ?: (u64)-1;
|
||||
|
||||
/*
|
||||
* The global dq is deprioritized as we don't want to let tasks
|
||||
* to boost themselves by constraining its cpumask. The
|
||||
* deprioritization is rather severe, so let's not apply that to
|
||||
* per-cpu kernel threads. This is ham-fisted. We probably wanna
|
||||
* implement per-cgroup fallback dq's instead so that we have
|
||||
* more control over when tasks with custom cpumask get issued.
|
||||
*/
|
||||
if (is_idle ||
|
||||
(p->nr_cpus_allowed == 1 && (p->flags & PF_KTHREAD))) {
|
||||
stat_inc(FCG_STAT_LOCAL);
|
||||
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
|
||||
} else {
|
||||
stat_inc(FCG_STAT_GLOBAL);
|
||||
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
|
||||
}
|
||||
if (is_idle) {
|
||||
set_bypassed_at(p, taskc);
|
||||
stat_inc(FCG_STAT_LOCAL);
|
||||
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
|
||||
}
|
||||
|
||||
return cpu;
|
||||
@ -370,6 +358,32 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the direct dispatching and force charging to deal with tasks with
|
||||
* custom affinities so that we don't have to worry about per-cgroup
|
||||
* dq's containing tasks that can't be executed from some CPUs.
|
||||
*/
|
||||
if (p->nr_cpus_allowed != nr_cpus) {
|
||||
set_bypassed_at(p, taskc);
|
||||
|
||||
/*
|
||||
* The global dq is deprioritized as we don't want to let tasks
|
||||
* to boost themselves by constraining its cpumask. The
|
||||
* deprioritization is rather severe, so let's not apply that to
|
||||
* per-cpu kernel threads. This is ham-fisted. We probably wanna
|
||||
* implement per-cgroup fallback dq's instead so that we have
|
||||
* more control over when tasks with custom cpumask get issued.
|
||||
*/
|
||||
if (p->nr_cpus_allowed == 1 && (p->flags & PF_KTHREAD)) {
|
||||
stat_inc(FCG_STAT_LOCAL);
|
||||
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, enq_flags);
|
||||
} else {
|
||||
stat_inc(FCG_STAT_GLOBAL);
|
||||
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
cgrp = scx_bpf_task_cgroup(p);
|
||||
cgc = find_cgrp_ctx(cgrp);
|
||||
if (!cgc)
|
||||
@ -696,6 +710,7 @@ out_stash:
|
||||
bpf_spin_lock(&cgv_tree_lock);
|
||||
bpf_rbtree_add(&cgv_tree, &cgv_node->rb_node, cgv_node_less);
|
||||
bpf_spin_unlock(&cgv_tree_lock);
|
||||
stat_inc(FCG_STAT_PNC_RACE);
|
||||
} else {
|
||||
cgv_node = bpf_kptr_xchg(&stash->node, cgv_node);
|
||||
if (cgv_node) {
|
||||
@ -717,6 +732,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev)
|
||||
struct fcg_cgrp_ctx *cgc;
|
||||
struct cgroup *cgrp;
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
bool picked_next = false;
|
||||
|
||||
cpuc = find_cpu_ctx();
|
||||
if (!cpuc)
|
||||
@ -772,9 +788,20 @@ pick_next_cgroup:
|
||||
}
|
||||
|
||||
bpf_repeat(CGROUP_MAX_RETRIES) {
|
||||
if (try_pick_next_cgroup(&cpuc->cur_cgid))
|
||||
if (try_pick_next_cgroup(&cpuc->cur_cgid)) {
|
||||
picked_next = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This only happens if try_pick_next_cgroup() races against enqueue
|
||||
* path for more than CGROUP_MAX_RETRIES times, which is extremely
|
||||
* unlikely and likely indicates an underlying bug. There shouldn't be
|
||||
* any stall risk as the race is against enqueue.
|
||||
*/
|
||||
if (!picked_next)
|
||||
stat_inc(FCG_STAT_PNC_FAIL);
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS(fcg_init_task, struct task_struct *p,
|
||||
|
@ -186,29 +186,31 @@ int main(int argc, char **argv)
|
||||
|
||||
printf("\n[SEQ %6lu cpu=%5.1lf hweight_gen=%" PRIu64 "]\n",
|
||||
seq++, cpu_util * 100.0, skel->data->hweight_gen);
|
||||
printf(" act:%6llu deact:%6llu local:%6llu global:%6llu\n",
|
||||
printf(" act:%6llu deact:%6llu global:%6llu local:%6llu\n",
|
||||
stats[FCG_STAT_ACT],
|
||||
stats[FCG_STAT_DEACT],
|
||||
stats[FCG_STAT_LOCAL],
|
||||
stats[FCG_STAT_GLOBAL]);
|
||||
printf("HWT skip:%6llu race:%6llu cache:%6llu update:%6llu\n",
|
||||
stats[FCG_STAT_HWT_SKIP],
|
||||
stats[FCG_STAT_HWT_RACE],
|
||||
stats[FCG_STAT_GLOBAL],
|
||||
stats[FCG_STAT_LOCAL]);
|
||||
printf("HWT cache:%6llu update:%6llu skip:%6llu race:%6llu\n",
|
||||
stats[FCG_STAT_HWT_CACHE],
|
||||
stats[FCG_STAT_HWT_UPDATES]);
|
||||
stats[FCG_STAT_HWT_UPDATES],
|
||||
stats[FCG_STAT_HWT_SKIP],
|
||||
stats[FCG_STAT_HWT_RACE]);
|
||||
printf("ENQ skip:%6llu race:%6llu\n",
|
||||
stats[FCG_STAT_ENQ_SKIP],
|
||||
stats[FCG_STAT_ENQ_RACE]);
|
||||
printf("CNS keep:%6llu expire:%6llu empty:%6llu gone:%6llu\n",
|
||||
printf("CNS keep:%6llu expire:%6llu empty:%6llu gone:%6llu\n",
|
||||
stats[FCG_STAT_CNS_KEEP],
|
||||
stats[FCG_STAT_CNS_EXPIRE],
|
||||
stats[FCG_STAT_CNS_EMPTY],
|
||||
stats[FCG_STAT_CNS_GONE]);
|
||||
printf("PNC nocgrp:%6llu next:%6llu empty:%6llu gone:%6llu\n",
|
||||
stats[FCG_STAT_PNC_NO_CGRP],
|
||||
printf("PNC next:%6llu empty:%6llu nocgrp:%6llu gone:%6llu race:%6llu fail:%6llu\n",
|
||||
stats[FCG_STAT_PNC_NEXT],
|
||||
stats[FCG_STAT_PNC_EMPTY],
|
||||
stats[FCG_STAT_PNC_GONE]);
|
||||
stats[FCG_STAT_PNC_NO_CGRP],
|
||||
stats[FCG_STAT_PNC_GONE],
|
||||
stats[FCG_STAT_PNC_RACE],
|
||||
stats[FCG_STAT_PNC_FAIL]);
|
||||
printf("BAD remove:%6llu\n",
|
||||
acc_stats[FCG_STAT_BAD_REMOVAL]);
|
||||
fflush(stdout);
|
||||
|
@ -28,6 +28,8 @@ enum fcg_stat_idx {
|
||||
FCG_STAT_PNC_NEXT,
|
||||
FCG_STAT_PNC_EMPTY,
|
||||
FCG_STAT_PNC_GONE,
|
||||
FCG_STAT_PNC_RACE,
|
||||
FCG_STAT_PNC_FAIL,
|
||||
|
||||
FCG_STAT_BAD_REMOVAL,
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user