scx_userland: survive to dispatch failures

If the scheduler fails to dispatch a task we immediately give up,
exiting with an error like the following:

 Failed to dispatch task 251 in 1
 EXIT: BPF scheduler unregistered

This scenario can be simulated decreasing dramatically the value of
MAX_ENQUEUED_TASKS.

We can make the scheduler a little more robust simply by re-adding the
task that cannot be dispatched to vruntime_head and stop dispatching
additional tasks in the same batch.

This can give enough room, under such "dispatch overload" condition, to
catch up and resume the normal execution without crashing.

Moreover, introduce nr_vruntime_failed to report failed dispatch events
in the scheduler's statistics.

Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
This commit is contained in:
Andrea Righi 2023-12-13 21:53:41 +01:00
parent a68885f92f
commit 48bba8e4f6

View File

@ -57,7 +57,7 @@ static struct scx_userland *skel;
static struct bpf_link *ops_link;
/* Stats collected in user space. */
static __u64 nr_vruntime_enqueues, nr_vruntime_dispatches;
static __u64 nr_vruntime_enqueues, nr_vruntime_dispatches, nr_vruntime_failed;
/* The data structure containing tasks that are enqueued in user space. */
struct enqueued_task {
@ -145,8 +145,7 @@ static int dispatch_task(__s32 pid)
err = bpf_map_update_elem(dispatched_fd, NULL, &pid, 0);
if (err) {
fprintf(stderr, "Failed to dispatch task %d\n", pid);
exit_req = 1;
nr_vruntime_failed++;
} else {
nr_vruntime_dispatches++;
}
@ -256,8 +255,12 @@ static void dispatch_batch(void)
LIST_REMOVE(task, entries);
err = dispatch_task(pid);
if (err) {
fprintf(stderr, "Failed to dispatch task %d in %u\n",
pid, i);
/*
* If we fail to dispatch, put the task back to the
* vruntime_head list and stop dispatching additional
* tasks in this batch.
*/
LIST_INSERT_HEAD(&vruntime_head, task, entries);
return;
}
}
@ -287,6 +290,7 @@ static void *run_stats_printer(void *arg)
printf("|-----------------------|\n");
printf("| enq: %10llu |\n", nr_vruntime_enqueues);
printf("| disp: %10llu |\n", nr_vruntime_dispatches);
printf("| failed: %10llu |\n", nr_vruntime_failed);
printf("o-----------------------o\n");
printf("\n\n");
sleep(1);