scx: Sync from kernel, some schedulers are broken

Sync from kernel to receive new vmlinux.h and the updates to common headers.
This includes the following updates:

- scx_bpf_switch_all() is replaced by SCX_OPS_SWITCH_PARTIAL flag.

- sched_ext_ops.exit_dump_len added to allow customizing dump buffer size.

- scx_bpf_exit() added.

- Common headers updated to provide backward compatibility in a way which
  hides most complexities from scheduler implementations.

scx_simple, qmap, central and flatcg are updated accordingly. Other
schedulers are broken for the moment.
This commit is contained in:
Tejun Heo 2024-03-07 08:05:18 -10:00
parent 04c9e7fe9d
commit 9447cb27b2
15 changed files with 141510 additions and 183 deletions

View File

@ -55,7 +55,6 @@ enum {
TIMER_INTERVAL_NS = 1 * MS_TO_NS,
};
const volatile bool switch_partial;
const volatile s32 central_cpu;
const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */
const volatile u64 slice_ns = SCX_SLICE_DFL;
@ -65,7 +64,7 @@ u64 nr_total, nr_locals, nr_queued, nr_lost_pids;
u64 nr_timers, nr_dispatches, nr_mismatches, nr_retries;
u64 nr_overflows;
struct user_exit_info uei;
UEI_DEFINE(uei);
struct {
__uint(type, BPF_MAP_TYPE_QUEUE);
@ -176,7 +175,7 @@ static bool dispatch_to_cpu(s32 cpu)
scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_INF, 0);
if (cpu != central_cpu)
__COMPAT_scx_bpf_kick_cpu_IDLE(cpu);
scx_bpf_kick_cpu(cpu, __COMPAT_SCX_KICK_IDLE);
bpf_task_release(p);
return true;
@ -306,9 +305,6 @@ int BPF_STRUCT_OPS_SLEEPABLE(central_init)
struct bpf_timer *timer;
int ret;
if (!switch_partial)
scx_bpf_switch_all();
ret = scx_bpf_create_dsq(FALLBACK_DSQ_ID, -1);
if (ret)
return ret;
@ -344,24 +340,22 @@ int BPF_STRUCT_OPS_SLEEPABLE(central_init)
void BPF_STRUCT_OPS(central_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops central_ops = {
/*
* We are offloading all scheduling decisions to the central CPU and
* thus being the last task on a given CPU doesn't mean anything
* special. Enqueue the last tasks like any other tasks.
*/
.flags = SCX_OPS_ENQ_LAST,
SCX_OPS_DEFINE(central_ops,
/*
* We are offloading all scheduling decisions to the central CPU
* and thus being the last task on a given CPU doesn't mean
* anything special. Enqueue the last tasks like any other tasks.
*/
.flags = SCX_OPS_ENQ_LAST,
.select_cpu = (void *)central_select_cpu,
.enqueue = (void *)central_enqueue,
.dispatch = (void *)central_dispatch,
.running = (void *)central_running,
.stopping = (void *)central_stopping,
.init = (void *)central_init,
.exit = (void *)central_exit,
.name = "central",
};
.select_cpu = (void *)central_select_cpu,
.enqueue = (void *)central_enqueue,
.dispatch = (void *)central_dispatch,
.running = (void *)central_running,
.stopping = (void *)central_stopping,
.init = (void *)central_init,
.exit = (void *)central_exit,
.name = "central");

View File

@ -24,7 +24,6 @@ const char help_fmt[] =
"\n"
" -s SLICE_US Override slice duration\n"
" -c CPU Override the central CPU (default: 0)\n"
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
" -h Display this help and exit\n";
static volatile int exit_req;
@ -61,9 +60,6 @@ int main(int argc, char **argv)
case 'c':
skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
break;
case 'p':
skel->rodata->switch_partial = true;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
return opt != 'h';
@ -74,7 +70,7 @@ int main(int argc, char **argv)
RESIZE_ARRAY(data, cpu_gimme_task, skel->rodata->nr_cpu_ids);
RESIZE_ARRAY(data, cpu_started_at, skel->rodata->nr_cpu_ids);
SCX_BUG_ON(scx_central__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, central_ops, scx_central, uei);
/*
* Affinitize the loading thread to the central CPU, as:
@ -96,13 +92,12 @@ int main(int argc, char **argv)
skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1);
CPU_FREE(cpuset);
link = bpf_map__attach_struct_ops(skel->maps.central_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
link = SCX_OPS_ATTACH(skel, central_ops);
if (!skel->data->timer_pinned)
printf("WARNING : BPF_F_TIMER_CPU_PIN not available, timer not pinned to central\n");
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
printf("[SEQ %llu]\n", seq++);
printf("total :%10" PRIu64 " local:%10" PRIu64 " queued:%10" PRIu64 " lost:%10" PRIu64 "\n",
skel->bss->nr_total,
@ -121,7 +116,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_central__destroy(skel);
return 0;
}

View File

@ -56,10 +56,9 @@ char _license[] SEC("license") = "GPL";
const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */
const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL;
const volatile bool fifo_sched;
const volatile bool switch_partial;
u64 cvtime_now;
struct user_exit_info uei;
UEI_DEFINE(uei);
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@ -917,34 +916,24 @@ void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p,
p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta;
}
s32 BPF_STRUCT_OPS(fcg_init)
{
if (!switch_partial)
scx_bpf_switch_all();
return 0;
}
void BPF_STRUCT_OPS(fcg_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops flatcg_ops = {
.select_cpu = (void *)fcg_select_cpu,
.enqueue = (void *)fcg_enqueue,
.dispatch = (void *)fcg_dispatch,
.runnable = (void *)fcg_runnable,
.running = (void *)fcg_running,
.stopping = (void *)fcg_stopping,
.quiescent = (void *)fcg_quiescent,
.init_task = (void *)fcg_init_task,
.cgroup_set_weight = (void *)fcg_cgroup_set_weight,
.cgroup_init = (void *)fcg_cgroup_init,
.cgroup_exit = (void *)fcg_cgroup_exit,
.cgroup_move = (void *)fcg_cgroup_move,
.init = (void *)fcg_init,
.exit = (void *)fcg_exit,
.flags = SCX_OPS_CGROUP_KNOB_WEIGHT | SCX_OPS_ENQ_EXITING,
.name = "flatcg",
};
SCX_OPS_DEFINE(flatcg_ops,
.select_cpu = (void *)fcg_select_cpu,
.enqueue = (void *)fcg_enqueue,
.dispatch = (void *)fcg_dispatch,
.runnable = (void *)fcg_runnable,
.running = (void *)fcg_running,
.stopping = (void *)fcg_stopping,
.quiescent = (void *)fcg_quiescent,
.init_task = (void *)fcg_init_task,
.cgroup_set_weight = (void *)fcg_cgroup_set_weight,
.cgroup_init = (void *)fcg_cgroup_init,
.cgroup_exit = (void *)fcg_cgroup_exit,
.cgroup_move = (void *)fcg_cgroup_move,
.exit = (void *)fcg_exit,
.flags = SCX_OPS_CGROUP_KNOB_WEIGHT | SCX_OPS_ENQ_EXITING,
.name = "flatcg");

View File

@ -31,7 +31,6 @@ const char help_fmt[] =
" -s SLICE_US Override slice duration\n"
" -i INTERVAL Report interval\n"
" -f Use FIFO scheduling instead of weighted vtime scheduling\n"
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
" -h Display this help and exit\n";
static volatile int exit_req;
@ -150,9 +149,6 @@ int main(int argc, char **argv)
case 'f':
skel->rodata->fifo_sched = true;
break;
case 'p':
skel->rodata->switch_partial = true;
break;
case 'h':
default:
fprintf(stderr, help_fmt, basename(argv[0]));
@ -165,12 +161,10 @@ int main(int argc, char **argv)
(double)intv_ts.tv_sec + (double)intv_ts.tv_nsec / 1000000000.0,
dump_cgrps);
SCX_BUG_ON(scx_flatcg__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, flatcg_ops, scx_flatcg, uei);
link = SCX_OPS_ATTACH(skel, flatcg_ops);
link = bpf_map__attach_struct_ops(skel->maps.flatcg_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
__u64 acc_stats[FCG_NR_STATS];
__u64 stats[FCG_NR_STATS];
float cpu_util;
@ -219,7 +213,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_flatcg__destroy(skel);
return 0;
}

View File

@ -35,7 +35,7 @@ const volatile s32 disallow_tgid;
u32 test_error_cnt;
struct user_exit_info uei;
UEI_DEFINE(uei);
struct qmap {
__uint(type, BPF_MAP_TYPE_QUEUE);
@ -192,7 +192,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, 0, enq_flags);
cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
if (cpu >= 0)
__COMPAT_scx_bpf_kick_cpu_IDLE(cpu);
scx_bpf_kick_cpu(cpu, __COMPAT_SCX_KICK_IDLE);
return;
}
@ -374,27 +374,25 @@ s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p,
s32 BPF_STRUCT_OPS(qmap_init)
{
if (!switch_partial)
scx_bpf_switch_all();
__COMPAT_scx_bpf_switch_all();
return 0;
}
void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops qmap_ops = {
.select_cpu = (void *)qmap_select_cpu,
.enqueue = (void *)qmap_enqueue,
.dequeue = (void *)qmap_dequeue,
.dispatch = (void *)qmap_dispatch,
.core_sched_before = (void *)qmap_core_sched_before,
.cpu_release = (void *)qmap_cpu_release,
.init_task = (void *)qmap_init_task,
.init = (void *)qmap_init,
.exit = (void *)qmap_exit,
.flags = SCX_OPS_ENQ_LAST,
.timeout_ms = 5000U,
.name = "qmap",
};
SCX_OPS_DEFINE(qmap_ops,
.select_cpu = (void *)qmap_select_cpu,
.enqueue = (void *)qmap_enqueue,
.dequeue = (void *)qmap_dequeue,
.dispatch = (void *)qmap_dispatch,
.core_sched_before = (void *)qmap_core_sched_before,
.cpu_release = (void *)qmap_cpu_release,
.init_task = (void *)qmap_init_task,
.init = (void *)qmap_init,
.exit = (void *)qmap_exit,
.flags = SCX_OPS_ENQ_LAST,
.timeout_ms = 5000U,
.name = "qmap");

View File

@ -19,7 +19,8 @@ const char help_fmt[] =
"\n"
"See the top-level comment in .bpf.c for more details.\n"
"\n"
"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-d PID] [-p]\n"
"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-d PID]\n"
" [-D LEN] [-p]\n"
"\n"
" -s SLICE_US Override slice duration\n"
" -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n"
@ -27,6 +28,7 @@ const char help_fmt[] =
" -T COUNT Stall every COUNT'th kernel thread\n"
" -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n"
" -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n"
" -D LEN Set scx_exit_info.dump buffer length\n"
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
" -h Display this help and exit\n";
@ -51,7 +53,7 @@ int main(int argc, char **argv)
skel = scx_qmap__open();
SCX_BUG_ON(!skel, "Failed to open skel");
while ((opt = getopt(argc, argv, "s:e:t:T:l:d:ph")) != -1) {
while ((opt = getopt(argc, argv, "s:e:t:T:l:d:D:ph")) != -1) {
switch (opt) {
case 's':
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
@ -73,8 +75,12 @@ int main(int argc, char **argv)
if (skel->rodata->disallow_tgid < 0)
skel->rodata->disallow_tgid = getpid();
break;
case 'D':
skel->struct_ops.qmap_ops->exit_dump_len = strtoul(optarg, NULL, 0);
break;
case 'p':
skel->rodata->switch_partial = true;
skel->struct_ops.qmap_ops->flags |= __COMPAT_SCX_OPS_SWITCH_PARTIAL;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
@ -82,12 +88,10 @@ int main(int argc, char **argv)
}
}
SCX_BUG_ON(scx_qmap__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, qmap_ops, scx_qmap, uei);
link = SCX_OPS_ATTACH(skel, qmap_ops);
link = bpf_map__attach_struct_ops(skel->maps.qmap_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
long nr_enqueued = skel->bss->nr_enqueued;
long nr_dispatched = skel->bss->nr_dispatched;
@ -100,7 +104,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_qmap__destroy(skel);
return 0;
}

View File

@ -25,10 +25,9 @@
char _license[] SEC("license") = "GPL";
const volatile bool fifo_sched;
const volatile bool switch_partial;
static u64 vtime_now;
struct user_exit_info uei;
UEI_DEFINE(uei);
#define SHARED_DSQ 0
@ -130,26 +129,21 @@ void BPF_STRUCT_OPS(simple_enable, struct task_struct *p)
s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)
{
if (!switch_partial)
scx_bpf_switch_all();
return scx_bpf_create_dsq(SHARED_DSQ, -1);
}
void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops simple_ops = {
.select_cpu = (void *)simple_select_cpu,
.enqueue = (void *)simple_enqueue,
.dispatch = (void *)simple_dispatch,
.running = (void *)simple_running,
.stopping = (void *)simple_stopping,
.enable = (void *)simple_enable,
.init = (void *)simple_init,
.exit = (void *)simple_exit,
.name = "simple",
};
SCX_OPS_DEFINE(simple_ops,
.select_cpu = (void *)simple_select_cpu,
.enqueue = (void *)simple_enqueue,
.dispatch = (void *)simple_dispatch,
.running = (void *)simple_running,
.stopping = (void *)simple_stopping,
.enable = (void *)simple_enable,
.init = (void *)simple_init,
.exit = (void *)simple_exit,
.name = "simple");

View File

@ -20,7 +20,6 @@ const char help_fmt[] =
"Usage: %s [-f] [-p]\n"
"\n"
" -f Use FIFO scheduling instead of weighted vtime scheduling\n"
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
" -h Display this help and exit\n";
static volatile int exit_req;
@ -69,21 +68,16 @@ int main(int argc, char **argv)
case 'f':
skel->rodata->fifo_sched = true;
break;
case 'p':
skel->rodata->switch_partial = true;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
return opt != 'h';
}
}
SCX_BUG_ON(scx_simple__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, simple_ops, scx_simple, uei);
link = SCX_OPS_ATTACH(skel, simple_ops);
link = bpf_map__attach_struct_ops(skel->maps.simple_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
__u64 stats[2];
read_stats(skel, stats);
@ -93,7 +87,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_simple__destroy(skel);
return 0;
}

View File

@ -29,31 +29,55 @@ static inline void ___vmlinux_h_sanity_check___(void)
}
void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym;
void scx_bpf_exit_bstr(s64 exit_code, char *fmt,
unsigned long long *data, u32 data__sz) __ksym;
static inline __attribute__((format(printf, 1, 2)))
void ___scx_bpf_error_format_checker(const char *fmt, ...) {}
void ___scx_bpf_exit_format_checker(const char *fmt, ...) {}
/*
* Helper macro for initializing the fmt and variadic argument inputs to both
* bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
* refer to the initialized list of inputs to the bstr kfunc.
*/
#define scx_bpf_exit_preamble(fmt, args...) \
static char ___fmt[] = fmt; \
/* \
* Note that __param[] must have at least one \
* element to keep the verifier happy. \
*/ \
unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \
\
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
___bpf_fill(___param, args); \
_Pragma("GCC diagnostic pop") \
/*
* scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments
* instead of an array of u64. Using this macro will cause the scheduler to
* exit cleanly with the specified exit code being passed to user space.
*/
#define scx_bpf_exit(code, fmt, args...) \
({ \
scx_bpf_exit_preamble(fmt, args) \
scx_bpf_exit_bstr(code, ___fmt, ___param, sizeof(___param)); \
___scx_bpf_exit_format_checker(fmt, ##args); \
})
/*
* scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
* instead of an array of u64. Note that __param[] must have at least one
* element to keep the verifier happy.
* instead of an array of u64. Invoking this macro will cause the scheduler to
* exit in an erroneous state, with diagnostic information being passed to the
* user.
*/
#define scx_bpf_error(fmt, args...) \
({ \
static char ___fmt[] = fmt; \
unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \
\
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
___bpf_fill(___param, args); \
_Pragma("GCC diagnostic pop") \
\
scx_bpf_exit_preamble(fmt, args) \
scx_bpf_error_bstr(___fmt, ___param, sizeof(___param)); \
\
___scx_bpf_error_format_checker(fmt, ##args); \
___scx_bpf_exit_format_checker(fmt, ##args); \
})
void scx_bpf_switch_all(void) __ksym;
s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
bool scx_bpf_consume(u64 dsq_id) __ksym;
void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym;

View File

@ -15,8 +15,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "user_exit_info.h"
#include <errno.h>
typedef uint8_t u8;
typedef uint16_t u16;
@ -66,4 +65,7 @@ typedef int64_t s64;
bpf_map__initial_value(skel->maps.elfsec##_##arr, &__sz); \
} while (0)
#include "user_exit_info.h"
#include "compat.h"
#endif /* __SCHED_EXT_COMMON_H */

View File

@ -7,12 +7,41 @@
#ifndef __SCX_COMPAT_BPF_H
#define __SCX_COMPAT_BPF_H
static inline void __COMPAT_scx_bpf_kick_cpu_IDLE(s32 cpu)
#define __COMPAT_ENUM_OR_ZERO(__type, __ent) \
({ \
__type __ret = 0; \
if (bpf_core_enum_value_exists(__type, __ent)) \
__ret = __ent; \
__ret; \
})
/*
* %SCX_KICK_IDLE is a later addition. To support both before and after, use
* %__COMPAT_SCX_KICK_IDLE which becomes 0 on kernels which don't support it.
*/
#define __COMPAT_SCX_KICK_IDLE \
__COMPAT_ENUM_OR_ZERO(enum scx_kick_flags, SCX_KICK_IDLE)
/*
* scx_switch_all() was replaced by %SCX_OPS_SWITCH_PARTIAL. See
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h.
*/
void scx_bpf_switch_all(void) __ksym __weak;
static inline void __COMPAT_scx_bpf_switch_all(void)
{
if (bpf_core_enum_value_exists(enum scx_kick_flags, SCX_KICK_IDLE))
scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
else
scx_bpf_kick_cpu(cpu, 0);
if (!bpf_core_enum_value_exists(enum scx_ops_flags, SCX_OPS_SWITCH_PARTIAL))
scx_bpf_switch_all();
}
#endif
/*
* Define sched_ext_ops. This may be expanded to define multiple variants for
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
*/
#define SCX_OPS_DEFINE(__name, ...) \
SEC(".struct_ops.link") \
struct sched_ext_ops __name = { \
__VA_ARGS__, \
};
#endif /* __SCX_COMPAT_BPF_H */

136
scheds/include/scx/compat.h Normal file
View File

@ -0,0 +1,136 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2024 Tejun Heo <tj@kernel.org>
* Copyright (c) 2024 David Vernet <dvernet@meta.com>
*/
#ifndef __SCX_COMPAT_H
#define __SCX_COMPAT_H
#include <bpf/btf.h>
struct btf *__COMPAT_vmlinux_btf __attribute__((weak));
static inline void __COMPAT_load_vmlinux_btf(void)
{
if (!__COMPAT_vmlinux_btf) {
__COMPAT_vmlinux_btf = btf__load_vmlinux_btf();
SCX_BUG_ON(!__COMPAT_vmlinux_btf, "btf__load_vmlinux_btf()");
}
}
static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v)
{
const struct btf_type *t;
const char *n;
s32 tid;
int i;
__COMPAT_load_vmlinux_btf();
tid = btf__find_by_name_kind(__COMPAT_vmlinux_btf, type, BTF_KIND_ENUM);
if (tid < 0)
return false;
t = btf__type_by_id(__COMPAT_vmlinux_btf, tid);
SCX_BUG_ON(!t, "btf__type_by_id(%d)", tid);
if (btf_is_enum(t)) {
struct btf_enum *e = btf_enum(t);
for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
n = btf__name_by_offset(__COMPAT_vmlinux_btf, e[i].name_off);
SCX_BUG_ON(!n, "btf__name_by_offset()");
if (!strcmp(n, name)) {
*v = e[i].val;
return true;
}
}
} else if (btf_is_enum64(t)) {
struct btf_enum64 *e = btf_enum64(t);
for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
n = btf__name_by_offset(__COMPAT_vmlinux_btf, e[i].name_off);
SCX_BUG_ON(!n, "btf__name_by_offset()");
if (!strcmp(n, name)) {
*v = btf_enum64_value(&e[i]);
return true;
}
}
}
return false;
}
#define __COMPAT_ENUM_OR_ZERO(__type, __ent) \
({ \
u64 __val = 0; \
__COMPAT_read_enum(__type, __ent, &__val); \
__val; \
})
static inline bool __COMPAT_struct_has_field(const char *type, const char *field)
{
const struct btf_type *t;
const struct btf_member *m;
const char *n;
s32 tid;
int i;
__COMPAT_load_vmlinux_btf();
tid = btf__find_by_name_kind(__COMPAT_vmlinux_btf, type, BTF_KIND_STRUCT);
if (tid < 0)
return false;
t = btf__type_by_id(__COMPAT_vmlinux_btf, tid);
SCX_BUG_ON(!t, "btf__type_by_id(%d)", tid);
m = btf_members(t);
for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
n = btf__name_by_offset(__COMPAT_vmlinux_btf, m[i].name_off);
SCX_BUG_ON(!n, "btf__name_by_offset()");
if (!strcmp(n, field))
return true;
}
return false;
}
/*
* An ops flag, %SCX_OPS_SWITCH_PARTIAL, replaced scx_bpf_switch_all() which had
* to be called from ops.init(). To support both before and after, use both
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL and %__COMPAT_scx_bpf_switch_all() defined
* in compat.bpf.h.
*/
#define __COMPAT_SCX_OPS_SWITCH_PARTIAL \
__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
/*
* struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE()
* is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load
* and attach it, backward compatibility is automatically maintained where
* reasonable.
*
* - sched_ext_ops.exit_dump_len was added later. On kernels which don't support
* it, the value is ignored and a warning is triggered if the value is
* requested to be non-zero.
*/
#define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({ \
UEI_SET_SIZE(__skel, __ops_name, __uei_name); \
if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \
(__skel)->struct_ops.__ops_name->exit_dump_len) { \
fprintf(stderr, "WARNING: kernel doesn't support setting exit dump len\n"); \
(__skel)->struct_ops.__ops_name->exit_dump_len = 0; \
} \
SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel"); \
})
#define SCX_OPS_ATTACH(__skel, __ops_name) ({ \
struct bpf_link *__link; \
__link = bpf_map__attach_struct_ops((__skel)->maps.__ops_name); \
SCX_BUG_ON(!__link, "Failed to attach struct_ops"); \
__link; \
})
#endif /* __SCX_COMPAT_H */

View File

@ -11,16 +11,16 @@
#define __USER_EXIT_INFO_H
enum uei_sizes {
UEI_REASON_SIZE = 128,
UEI_MSG_SIZE = 1024,
UEI_DUMP_SIZE = 32768,
UEI_REASON_LEN = 128,
UEI_MSG_LEN = 1024,
UEI_DUMP_DFL_LEN = 32768,
};
struct user_exit_info {
int kind;
char reason[UEI_REASON_SIZE];
char msg[UEI_MSG_SIZE];
char dump[UEI_DUMP_SIZE];
s64 exit_code;
char reason[UEI_REASON_LEN];
char msg[UEI_MSG_LEN];
};
#ifdef __bpf__
@ -28,40 +28,56 @@ struct user_exit_info {
#include "vmlinux.h"
#include <bpf/bpf_core_read.h>
static inline void uei_record(struct user_exit_info *uei,
const struct scx_exit_info *ei)
{
bpf_probe_read_kernel_str(uei->reason, sizeof(uei->reason), ei->reason);
bpf_probe_read_kernel_str(uei->msg, sizeof(uei->msg), ei->msg);
bpf_probe_read_kernel_str(uei->dump, sizeof(uei->dump), ei->dump);
/* use __sync to force memory barrier */
__sync_val_compare_and_swap(&uei->kind, uei->kind, ei->kind);
}
#define UEI_DEFINE(__name) \
char RESIZABLE_ARRAY(data, __name##_dump); \
const volatile u32 __name##_dump_len; \
struct user_exit_info __name SEC(".data")
#define UEI_RECORD(__uei_name, __ei) ({ \
bpf_probe_read_kernel_str(__uei_name.reason, \
sizeof(__uei_name.reason), (__ei)->reason); \
bpf_probe_read_kernel_str(__uei_name.msg, \
sizeof(__uei_name.msg), (__ei)->msg); \
bpf_probe_read_kernel_str(__uei_name##_dump, \
__uei_name##_dump_len, (__ei)->dump); \
if (bpf_core_field_exists((__ei)->exit_code)) \
__uei_name.exit_code = (__ei)->exit_code; \
/* use __sync to force memory barrier */ \
__sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
(__ei)->kind); \
})
#else /* !__bpf__ */
#include <stdio.h>
#include <stdbool.h>
static inline bool uei_exited(struct user_exit_info *uei)
{
/* use __sync to force memory barrier */
return __sync_val_compare_and_swap(&uei->kind, -1, -1);
}
/* no need to call the following explicitly if SCX_OPS_LOAD() is used */
#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({ \
u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \
(__skel)->rodata->__uei_name##_dump_len = __len; \
RESIZE_ARRAY(data, __uei_name##_dump, __len); \
})
static inline void uei_print(const struct user_exit_info *uei)
{
if (uei->dump[0] != '\0') {
fputs("\nDEBUG DUMP\n", stderr);
fputs("================================================================================\n\n", stderr);
fputs(uei->dump, stderr);
fputs("\n================================================================================\n\n", stderr);
}
fprintf(stderr, "EXIT: %s", uei->reason);
if (uei->msg[0] != '\0')
fprintf(stderr, " (%s)", uei->msg);
fputs("\n", stderr);
}
#define UEI_EXITED(__skel, __uei_name) ({ \
/* use __sync to force memory barrier */ \
__sync_val_compare_and_swap(&(__skel)->data->__uei_name.kind, -1, -1); \
})
#define UEI_REPORT(__skel, __uei_name) ({ \
struct user_exit_info *__uei = &(__skel)->data->__uei_name; \
char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \
if (__uei_dump[0] != '\0') { \
fputs("\nDEBUG DUMP\n", stderr); \
fputs("================================================================================\n\n", stderr); \
fputs(__uei_dump, stderr); \
fputs("\n================================================================================\n\n", stderr); \
} \
fprintf(stderr, "EXIT: %s", __uei->reason); \
if (__uei->msg[0] != '\0') \
fprintf(stderr, " (%s)", __uei->msg); \
fputs("\n", stderr); \
})
#endif /* __bpf__ */
#endif /* __USER_EXIT_INFO_H */

File diff suppressed because it is too large Load Diff

View File

@ -1 +1 @@
vmlinux-v6.7-g6851d5f5be95.h
vmlinux-v6.9-ge34c7df6e8fa.h