mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-29 14:10:24 +00:00
Sync from kernel (670bdab6073)
And fix build breakage in scx_utils due to an enum type rename.
This commit is contained in:
parent
3ee64a1301
commit
cf66e58118
@ -45,7 +45,7 @@ impl Builder {
|
||||
let bindings = bindgen::Builder::default()
|
||||
.header("bindings.h")
|
||||
.allowlist_type("scx_exit_kind")
|
||||
.allowlist_type("scx_internal_consts")
|
||||
.allowlist_type("scx_consts")
|
||||
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
|
||||
.generate()
|
||||
.expect("Unable to generate bindings");
|
||||
|
@ -43,7 +43,7 @@ pub use builder::Builder;
|
||||
|
||||
mod user_exit_info;
|
||||
pub use user_exit_info::ScxExitKind;
|
||||
pub use user_exit_info::ScxInternalConsts;
|
||||
pub use user_exit_info::ScxConsts;
|
||||
pub use user_exit_info::UeiDumpPtr;
|
||||
pub use user_exit_info::UserExitInfo;
|
||||
pub use user_exit_info::UEI_DUMP_PTR_MUTEX;
|
||||
|
@ -29,8 +29,8 @@ pub enum ScxExitKind {
|
||||
ErrorStall = bindings::scx_exit_kind_SCX_EXIT_ERROR_STALL as isize,
|
||||
}
|
||||
|
||||
pub enum ScxInternalConsts {
|
||||
ExitDumpDflLen = bindings::scx_internal_consts_SCX_EXIT_DUMP_DFL_LEN as isize,
|
||||
pub enum ScxConsts {
|
||||
ExitDumpDflLen = bindings::scx_consts_SCX_EXIT_DUMP_DFL_LEN as isize,
|
||||
}
|
||||
|
||||
/// Takes a reference to C struct user_exit_info and reads it into
|
||||
@ -65,7 +65,7 @@ macro_rules! uei_set_size {
|
||||
($skel: expr, $ops: ident, $uei:ident) => {{
|
||||
scx_utils::paste! {
|
||||
let len = match $skel.struct_ops.$ops().exit_dump_len {
|
||||
0 => scx_utils::ScxInternalConsts::ExitDumpDflLen as u32,
|
||||
0 => scx_utils::ScxConsts::ExitDumpDflLen as u32,
|
||||
v => v,
|
||||
};
|
||||
$skel.rodata_mut().[<$uei _dump_len>] = len;
|
||||
|
@ -24,10 +24,19 @@ const char help_fmt[] =
|
||||
"\n"
|
||||
" -s SLICE_US Override slice duration\n"
|
||||
" -c CPU Override the central CPU (default: 0)\n"
|
||||
" -v Print libbpf debug messages\n"
|
||||
" -h Display this help and exit\n";
|
||||
|
||||
static bool verbose;
|
||||
static volatile int exit_req;
|
||||
|
||||
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
|
||||
{
|
||||
if (level == LIBBPF_DEBUG && !verbose)
|
||||
return 0;
|
||||
return vfprintf(stderr, format, args);
|
||||
}
|
||||
|
||||
static void sigint_handler(int dummy)
|
||||
{
|
||||
exit_req = 1;
|
||||
@ -37,22 +46,20 @@ int main(int argc, char **argv)
|
||||
{
|
||||
struct scx_central *skel;
|
||||
struct bpf_link *link;
|
||||
__u64 seq = 0;
|
||||
__u64 seq = 0, ecode;
|
||||
__s32 opt;
|
||||
cpu_set_t *cpuset;
|
||||
|
||||
libbpf_set_print(libbpf_print_fn);
|
||||
signal(SIGINT, sigint_handler);
|
||||
signal(SIGTERM, sigint_handler);
|
||||
|
||||
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
|
||||
|
||||
skel = scx_central__open();
|
||||
SCX_BUG_ON(!skel, "Failed to open skel");
|
||||
restart:
|
||||
skel = SCX_OPS_OPEN(central_ops, scx_central);
|
||||
|
||||
skel->rodata->central_cpu = 0;
|
||||
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:c:ph")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
|
||||
switch (opt) {
|
||||
case 's':
|
||||
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
|
||||
@ -60,6 +67,9 @@ int main(int argc, char **argv)
|
||||
case 'c':
|
||||
skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, help_fmt, basename(argv[0]));
|
||||
return opt != 'h';
|
||||
@ -116,7 +126,10 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
bpf_link__destroy(link);
|
||||
UEI_REPORT(skel, uei);
|
||||
ecode = UEI_REPORT(skel, uei);
|
||||
scx_central__destroy(skel);
|
||||
|
||||
if (UEI_ECODE_RESTART(ecode))
|
||||
goto restart;
|
||||
return 0;
|
||||
}
|
||||
|
@ -26,15 +26,24 @@ const char help_fmt[] =
|
||||
"\n"
|
||||
"See the top-level comment in .bpf.c for more details.\n"
|
||||
"\n"
|
||||
"Usage: %s [-s SLICE_US] [-i INTERVAL] [-f]\n"
|
||||
"Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-v]\n"
|
||||
"\n"
|
||||
" -s SLICE_US Override slice duration\n"
|
||||
" -i INTERVAL Report interval\n"
|
||||
" -f Use FIFO scheduling instead of weighted vtime scheduling\n"
|
||||
" -v Print libbpf debug messages\n"
|
||||
" -h Display this help and exit\n";
|
||||
|
||||
static bool verbose;
|
||||
static volatile int exit_req;
|
||||
|
||||
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
|
||||
{
|
||||
if (level == LIBBPF_DEBUG && !verbose)
|
||||
return 0;
|
||||
return vfprintf(stderr, format, args);
|
||||
}
|
||||
|
||||
static void sigint_handler(int dummy)
|
||||
{
|
||||
exit_req = 1;
|
||||
@ -119,18 +128,17 @@ int main(int argc, char **argv)
|
||||
__u64 last_stats[FCG_NR_STATS] = {};
|
||||
unsigned long seq = 0;
|
||||
__s32 opt;
|
||||
__u64 ecode;
|
||||
|
||||
libbpf_set_print(libbpf_print_fn);
|
||||
signal(SIGINT, sigint_handler);
|
||||
signal(SIGTERM, sigint_handler);
|
||||
|
||||
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
|
||||
|
||||
skel = scx_flatcg__open();
|
||||
SCX_BUG_ON(!skel, "Failed to open skel");
|
||||
restart:
|
||||
skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
|
||||
|
||||
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:i:dfph")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
|
||||
double v;
|
||||
|
||||
switch (opt) {
|
||||
@ -149,6 +157,9 @@ int main(int argc, char **argv)
|
||||
case 'f':
|
||||
skel->rodata->fifo_sched = true;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
fprintf(stderr, help_fmt, basename(argv[0]));
|
||||
@ -213,7 +224,10 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
bpf_link__destroy(link);
|
||||
UEI_REPORT(skel, uei);
|
||||
ecode = UEI_REPORT(skel, uei);
|
||||
scx_flatcg__destroy(skel);
|
||||
|
||||
if (UEI_ECODE_RESTART(ecode))
|
||||
goto restart;
|
||||
return 0;
|
||||
}
|
||||
|
@ -23,6 +23,12 @@
|
||||
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
|
||||
*/
|
||||
#include <scx/common.bpf.h>
|
||||
#include <string.h>
|
||||
|
||||
enum consts {
|
||||
ONE_SEC_IN_NS = 1000000000,
|
||||
SHARED_DSQ = 0,
|
||||
};
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
@ -30,6 +36,9 @@ const volatile u64 slice_ns = SCX_SLICE_DFL;
|
||||
const volatile u32 stall_user_nth;
|
||||
const volatile u32 stall_kernel_nth;
|
||||
const volatile u32 dsp_inf_loop_after;
|
||||
const volatile u32 dsp_batch;
|
||||
const volatile bool print_shared_dsq;
|
||||
const volatile char exp_prefix[17];
|
||||
const volatile s32 disallow_tgid;
|
||||
const volatile bool switch_partial;
|
||||
|
||||
@ -62,6 +71,18 @@ struct {
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* If enabled, CPU performance target is set according to the queue index
|
||||
* according to the following table.
|
||||
*/
|
||||
static const u32 qidx_to_cpuperf_target[] = {
|
||||
[0] = SCX_CPUPERF_ONE * 0 / 4,
|
||||
[1] = SCX_CPUPERF_ONE * 1 / 4,
|
||||
[2] = SCX_CPUPERF_ONE * 2 / 4,
|
||||
[3] = SCX_CPUPERF_ONE * 3 / 4,
|
||||
[4] = SCX_CPUPERF_ONE * 4 / 4,
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-queue sequence numbers to implement core-sched ordering.
|
||||
*
|
||||
@ -86,17 +107,25 @@ struct {
|
||||
__type(value, struct task_ctx);
|
||||
} task_ctx_stor SEC(".maps");
|
||||
|
||||
/* Per-cpu dispatch index and remaining count */
|
||||
struct cpu_ctx {
|
||||
u64 dsp_idx; /* dispatch index */
|
||||
u64 dsp_cnt; /* remaining count */
|
||||
u32 avg_weight;
|
||||
u32 cpuperf_target;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
|
||||
__uint(max_entries, 2);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, u32);
|
||||
__type(value, u64);
|
||||
} dispatch_idx_cnt SEC(".maps");
|
||||
__type(value, struct cpu_ctx);
|
||||
} cpu_ctx_stor SEC(".maps");
|
||||
|
||||
/* Statistics */
|
||||
u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued;
|
||||
u64 nr_core_sched_execed;
|
||||
u64 nr_core_sched_execed, nr_expedited;
|
||||
u32 cpuperf_min, cpuperf_avg, cpuperf_max;
|
||||
u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max;
|
||||
|
||||
s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
|
||||
s32 prev_cpu, u64 wake_flags)
|
||||
@ -189,7 +218,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
if (enq_flags & SCX_ENQ_REENQ) {
|
||||
s32 cpu;
|
||||
|
||||
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, 0, enq_flags);
|
||||
scx_bpf_dispatch(p, SHARED_DSQ, 0, enq_flags);
|
||||
cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
|
||||
if (cpu >= 0)
|
||||
scx_bpf_kick_cpu(cpu, __COMPAT_SCX_KICK_IDLE);
|
||||
@ -204,7 +233,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
|
||||
/* Queue on the selected FIFO. If the FIFO overflows, punt to global. */
|
||||
if (bpf_map_push_elem(ring, &pid, 0)) {
|
||||
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, enq_flags);
|
||||
scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, enq_flags);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -233,18 +262,49 @@ static void update_core_sched_head_seq(struct task_struct *p)
|
||||
scx_bpf_error("task_ctx lookup failed");
|
||||
}
|
||||
|
||||
static bool consume_shared_dsq(void)
|
||||
{
|
||||
struct task_struct *p;
|
||||
bool consumed;
|
||||
|
||||
if (exp_prefix[0] == '\0')
|
||||
return scx_bpf_consume(SHARED_DSQ);
|
||||
|
||||
/*
|
||||
* To demonstrate the use of scx_bpf_consume_task(), implement silly
|
||||
* selective priority boosting mechanism by scanning SHARED_DSQ looking
|
||||
* for matching comms and consume them first. This makes difference only
|
||||
* when dsp_batch is larger than 1.
|
||||
*/
|
||||
consumed = false;
|
||||
__COMPAT_DSQ_FOR_EACH(p, SHARED_DSQ, 0) {
|
||||
char comm[sizeof(exp_prefix)];
|
||||
|
||||
memcpy(comm, p->comm, sizeof(exp_prefix) - 1);
|
||||
|
||||
if (!bpf_strncmp(comm, sizeof(exp_prefix),
|
||||
(const char *)exp_prefix) &&
|
||||
__COMPAT_scx_bpf_consume_task(BPF_FOR_EACH_ITER, p)) {
|
||||
consumed = true;
|
||||
__sync_fetch_and_add(&nr_expedited, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return consumed || scx_bpf_consume(SHARED_DSQ);
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
|
||||
{
|
||||
u32 zero = 0, one = 1;
|
||||
u64 *idx = bpf_map_lookup_elem(&dispatch_idx_cnt, &zero);
|
||||
u64 *cnt = bpf_map_lookup_elem(&dispatch_idx_cnt, &one);
|
||||
struct task_struct *p;
|
||||
struct cpu_ctx *cpuc;
|
||||
u32 zero = 0, batch = dsp_batch ?: 1;
|
||||
void *fifo;
|
||||
s32 pid;
|
||||
int i;
|
||||
s32 i, pid;
|
||||
|
||||
if (consume_shared_dsq())
|
||||
return;
|
||||
|
||||
if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) {
|
||||
struct task_struct *p;
|
||||
|
||||
/*
|
||||
* PID 2 should be kthreadd which should mostly be idle and off
|
||||
* the scheduler. Let's keep dispatching it to force the kernel
|
||||
@ -252,49 +312,80 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
|
||||
*/
|
||||
p = bpf_task_from_pid(2);
|
||||
if (p) {
|
||||
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, 0);
|
||||
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0);
|
||||
bpf_task_release(p);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!idx || !cnt) {
|
||||
scx_bpf_error("failed to lookup idx[%p], cnt[%p]", idx, cnt);
|
||||
if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
|
||||
scx_bpf_error("failed to look up cpu_ctx");
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
/* Advance the dispatch cursor and pick the fifo. */
|
||||
if (!*cnt) {
|
||||
*idx = (*idx + 1) % 5;
|
||||
*cnt = 1 << *idx;
|
||||
if (!cpuc->dsp_cnt) {
|
||||
cpuc->dsp_idx = (cpuc->dsp_idx + 1) % 5;
|
||||
cpuc->dsp_cnt = 1 << cpuc->dsp_idx;
|
||||
}
|
||||
(*cnt)--;
|
||||
|
||||
fifo = bpf_map_lookup_elem(&queue_arr, idx);
|
||||
fifo = bpf_map_lookup_elem(&queue_arr, &cpuc->dsp_idx);
|
||||
if (!fifo) {
|
||||
scx_bpf_error("failed to find ring %llu", *idx);
|
||||
scx_bpf_error("failed to find ring %llu", cpuc->dsp_idx);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Dispatch or advance. */
|
||||
if (!bpf_map_pop_elem(fifo, &pid)) {
|
||||
struct task_struct *p;
|
||||
bpf_repeat(BPF_MAX_LOOPS) {
|
||||
if (bpf_map_pop_elem(fifo, &pid))
|
||||
break;
|
||||
|
||||
p = bpf_task_from_pid(pid);
|
||||
if (p) {
|
||||
update_core_sched_head_seq(p);
|
||||
__sync_fetch_and_add(&nr_dispatched, 1);
|
||||
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, 0);
|
||||
bpf_task_release(p);
|
||||
if (!p)
|
||||
continue;
|
||||
|
||||
update_core_sched_head_seq(p);
|
||||
__sync_fetch_and_add(&nr_dispatched, 1);
|
||||
scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, 0);
|
||||
bpf_task_release(p);
|
||||
batch--;
|
||||
cpuc->dsp_cnt--;
|
||||
if (!batch || !scx_bpf_dispatch_nr_slots()) {
|
||||
consume_shared_dsq();
|
||||
return;
|
||||
}
|
||||
if (!cpuc->dsp_cnt)
|
||||
break;
|
||||
}
|
||||
|
||||
*cnt = 0;
|
||||
cpuc->dsp_cnt = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(qmap_tick, struct task_struct *p)
|
||||
{
|
||||
struct cpu_ctx *cpuc;
|
||||
u32 zero = 0;
|
||||
int idx;
|
||||
|
||||
if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
|
||||
scx_bpf_error("failed to look up cpu_ctx");
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the running avg of weights to select the target cpuperf level.
|
||||
* This is a demonstration of the cpuperf feature rather than a
|
||||
* practical strategy to regulate CPU frequency.
|
||||
*/
|
||||
cpuc->avg_weight = cpuc->avg_weight * 3 / 4 + p->scx.weight / 4;
|
||||
idx = weight_to_idx(cpuc->avg_weight);
|
||||
cpuc->cpuperf_target = qidx_to_cpuperf_target[idx];
|
||||
|
||||
scx_bpf_cpuperf_set(scx_bpf_task_cpu(p), cpuc->cpuperf_target);
|
||||
}
|
||||
|
||||
/*
|
||||
* The distance from the head of the queue scaled by the weight of the queue.
|
||||
* The lower the number, the older the task and the higher the priority.
|
||||
@ -371,11 +462,189 @@ s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS(qmap_init)
|
||||
/*
|
||||
* Print out the online and possible CPU map using bpf_printk() as a
|
||||
* demonstration of using the cpumask kfuncs and ops.cpu_on/offline().
|
||||
*/
|
||||
static void print_cpus(void)
|
||||
{
|
||||
const struct cpumask *possible, *online;
|
||||
s32 cpu;
|
||||
char buf[128] = "", *p;
|
||||
int idx;
|
||||
|
||||
if (!__COMPAT_HAS_CPUMASKS)
|
||||
return;
|
||||
|
||||
possible = scx_bpf_get_possible_cpumask();
|
||||
online = scx_bpf_get_online_cpumask();
|
||||
|
||||
idx = 0;
|
||||
bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) {
|
||||
if (!(p = MEMBER_VPTR(buf, [idx++])))
|
||||
break;
|
||||
if (bpf_cpumask_test_cpu(cpu, online))
|
||||
*p++ = 'O';
|
||||
else if (bpf_cpumask_test_cpu(cpu, possible))
|
||||
*p++ = 'X';
|
||||
else
|
||||
*p++ = ' ';
|
||||
|
||||
if ((cpu & 7) == 7) {
|
||||
if (!(p = MEMBER_VPTR(buf, [idx++])))
|
||||
break;
|
||||
*p++ = '|';
|
||||
}
|
||||
}
|
||||
buf[sizeof(buf) - 1] = '\0';
|
||||
|
||||
scx_bpf_put_cpumask(online);
|
||||
scx_bpf_put_cpumask(possible);
|
||||
|
||||
bpf_printk("CPUS: |%s", buf);
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu)
|
||||
{
|
||||
bpf_printk("CPU %d coming online", cpu);
|
||||
/* @cpu is already online at this point */
|
||||
print_cpus();
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu)
|
||||
{
|
||||
bpf_printk("CPU %d going offline", cpu);
|
||||
/* @cpu is still online at this point */
|
||||
print_cpus();
|
||||
}
|
||||
|
||||
struct monitor_timer {
|
||||
struct bpf_timer timer;
|
||||
};
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, u32);
|
||||
__type(value, struct monitor_timer);
|
||||
} central_timer SEC(".maps");
|
||||
|
||||
/*
|
||||
* Print out the min, avg and max performance levels of CPUs every second to
|
||||
* demonstrate the cpuperf interface.
|
||||
*/
|
||||
static void monitor_cpuperf(void)
|
||||
{
|
||||
u32 zero = 0;
|
||||
u32 nr_cpu_ids = scx_bpf_nr_cpu_ids();
|
||||
u64 cap_sum = 0, cur_sum = 0, cur_min = SCX_CPUPERF_ONE, cur_max = 0;
|
||||
u64 target_sum = 0, target_min = SCX_CPUPERF_ONE, target_max = 0;
|
||||
const struct cpumask *online;
|
||||
int i, nr_online_cpus = 0;
|
||||
|
||||
online = scx_bpf_get_online_cpumask();
|
||||
|
||||
bpf_for(i, 0, nr_cpu_ids) {
|
||||
struct cpu_ctx *cpuc;
|
||||
u32 cap, cur;
|
||||
|
||||
if (!bpf_cpumask_test_cpu(i, online))
|
||||
continue;
|
||||
nr_online_cpus++;
|
||||
|
||||
/* collect the capacity and current cpuperf */
|
||||
cap = scx_bpf_cpuperf_cap(i);
|
||||
cur = scx_bpf_cpuperf_cur(i);
|
||||
|
||||
cur_min = cur < cur_min ? cur : cur_min;
|
||||
cur_max = cur > cur_max ? cur : cur_max;
|
||||
|
||||
/*
|
||||
* $cur is relative to $cap. Scale it down accordingly so that
|
||||
* it's in the same scale as other CPUs and $cur_sum/$cap_sum
|
||||
* makes sense.
|
||||
*/
|
||||
cur_sum += cur * cap / SCX_CPUPERF_ONE;
|
||||
cap_sum += cap;
|
||||
|
||||
if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, i))) {
|
||||
scx_bpf_error("failed to look up cpu_ctx");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* collect target */
|
||||
cur = cpuc->cpuperf_target;
|
||||
target_sum += cur;
|
||||
target_min = cur < target_min ? cur : target_min;
|
||||
target_max = cur > target_max ? cur : target_max;
|
||||
}
|
||||
|
||||
cpuperf_min = cur_min;
|
||||
cpuperf_avg = cur_sum * SCX_CPUPERF_ONE / cap_sum;
|
||||
cpuperf_max = cur_max;
|
||||
|
||||
cpuperf_target_min = target_min;
|
||||
cpuperf_target_avg = target_sum / nr_online_cpus;
|
||||
cpuperf_target_max = target_max;
|
||||
out:
|
||||
scx_bpf_put_cpumask(online);
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump the currently queued tasks in the shared DSQ to demonstrate the usage of
|
||||
* scx_bpf_dsq_nr_queued() and DSQ iterator. Raise the dispatch batch count to
|
||||
* see meaningful dumps in the trace pipe.
|
||||
*/
|
||||
static void dump_shared_dsq(void)
|
||||
{
|
||||
struct task_struct *p;
|
||||
s32 nr;
|
||||
|
||||
if (!(nr = scx_bpf_dsq_nr_queued(SHARED_DSQ)))
|
||||
return;
|
||||
|
||||
bpf_printk("Dumping %d tasks in SHARED_DSQ in reverse order", nr);
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
__COMPAT_DSQ_FOR_EACH(p, SHARED_DSQ, SCX_DSQ_ITER_REV)
|
||||
bpf_printk("%s[%d]", p->comm, p->pid);
|
||||
bpf_rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
|
||||
{
|
||||
monitor_cpuperf();
|
||||
|
||||
if (print_shared_dsq)
|
||||
dump_shared_dsq();
|
||||
|
||||
bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
|
||||
{
|
||||
u32 key = 0;
|
||||
struct bpf_timer *timer;
|
||||
s32 ret;
|
||||
|
||||
if (!switch_partial)
|
||||
__COMPAT_scx_bpf_switch_all();
|
||||
return 0;
|
||||
|
||||
print_cpus();
|
||||
|
||||
ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
timer = bpf_map_lookup_elem(¢ral_timer, &key);
|
||||
if (!timer)
|
||||
return -ESRCH;
|
||||
|
||||
bpf_timer_init(timer, ¢ral_timer, CLOCK_MONOTONIC);
|
||||
bpf_timer_set_callback(timer, monitor_timerfn);
|
||||
|
||||
return bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei)
|
||||
@ -388,9 +657,12 @@ SCX_OPS_DEFINE(qmap_ops,
|
||||
.enqueue = (void *)qmap_enqueue,
|
||||
.dequeue = (void *)qmap_dequeue,
|
||||
.dispatch = (void *)qmap_dispatch,
|
||||
.tick = (void *)qmap_tick,
|
||||
.core_sched_before = (void *)qmap_core_sched_before,
|
||||
.cpu_release = (void *)qmap_cpu_release,
|
||||
.init_task = (void *)qmap_init_task,
|
||||
.cpu_online = (void *)qmap_cpu_online,
|
||||
.cpu_offline = (void *)qmap_cpu_offline,
|
||||
.init = (void *)qmap_init,
|
||||
.exit = (void *)qmap_exit,
|
||||
.flags = SCX_OPS_ENQ_LAST,
|
||||
|
@ -19,21 +19,34 @@ const char help_fmt[] =
|
||||
"\n"
|
||||
"See the top-level comment in .bpf.c for more details.\n"
|
||||
"\n"
|
||||
"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-d PID]\n"
|
||||
" [-D LEN] [-p]\n"
|
||||
"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-b COUNT]\n"
|
||||
" [-P] [-E PREFIX] [-d PID] [-D LEN] [-p] [-v]\n"
|
||||
"\n"
|
||||
" -s SLICE_US Override slice duration\n"
|
||||
" -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n"
|
||||
" -t COUNT Stall every COUNT'th user thread\n"
|
||||
" -T COUNT Stall every COUNT'th kernel thread\n"
|
||||
" -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n"
|
||||
" -b COUNT Dispatch upto COUNT tasks together\n"
|
||||
" -P Print out DSQ content to trace_pipe every second, use with -b\n"
|
||||
" -E PREFIX Expedite consumption of threads w/ matching comm, use with -b\n"
|
||||
" (e.g. match shell on a loaded system)\n"
|
||||
" -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n"
|
||||
" -D LEN Set scx_exit_info.dump buffer length\n"
|
||||
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
|
||||
" -v Print libbpf debug messages\n"
|
||||
" -h Display this help and exit\n";
|
||||
|
||||
static bool verbose;
|
||||
static volatile int exit_req;
|
||||
|
||||
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
|
||||
{
|
||||
if (level == LIBBPF_DEBUG && !verbose)
|
||||
return 0;
|
||||
return vfprintf(stderr, format, args);
|
||||
}
|
||||
|
||||
static void sigint_handler(int dummy)
|
||||
{
|
||||
exit_req = 1;
|
||||
@ -45,15 +58,13 @@ int main(int argc, char **argv)
|
||||
struct bpf_link *link;
|
||||
int opt;
|
||||
|
||||
libbpf_set_print(libbpf_print_fn);
|
||||
signal(SIGINT, sigint_handler);
|
||||
signal(SIGTERM, sigint_handler);
|
||||
|
||||
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
|
||||
skel = SCX_OPS_OPEN(qmap_ops, scx_qmap);
|
||||
|
||||
skel = scx_qmap__open();
|
||||
SCX_BUG_ON(!skel, "Failed to open skel");
|
||||
|
||||
while ((opt = getopt(argc, argv, "s:e:t:T:l:d:D:ph")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PE:d:D:pvh")) != -1) {
|
||||
switch (opt) {
|
||||
case 's':
|
||||
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
|
||||
@ -70,6 +81,16 @@ int main(int argc, char **argv)
|
||||
case 'l':
|
||||
skel->rodata->dsp_inf_loop_after = strtoul(optarg, NULL, 0);
|
||||
break;
|
||||
case 'b':
|
||||
skel->rodata->dsp_batch = strtoul(optarg, NULL, 0);
|
||||
break;
|
||||
case 'P':
|
||||
skel->rodata->print_shared_dsq = true;
|
||||
break;
|
||||
case 'E':
|
||||
strncpy(skel->rodata->exp_prefix, optarg,
|
||||
sizeof(skel->rodata->exp_prefix) - 1);
|
||||
break;
|
||||
case 'd':
|
||||
skel->rodata->disallow_tgid = strtol(optarg, NULL, 0);
|
||||
if (skel->rodata->disallow_tgid < 0)
|
||||
@ -82,12 +103,19 @@ int main(int argc, char **argv)
|
||||
skel->rodata->switch_partial = true;
|
||||
skel->struct_ops.qmap_ops->flags |= __COMPAT_SCX_OPS_SWITCH_PARTIAL;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, help_fmt, basename(argv[0]));
|
||||
return opt != 'h';
|
||||
}
|
||||
}
|
||||
|
||||
if (!__COMPAT_HAS_DSQ_ITER &&
|
||||
(skel->rodata->print_shared_dsq || strlen(skel->rodata->exp_prefix)))
|
||||
fprintf(stderr, "kernel doesn't support DSQ iteration\n");
|
||||
|
||||
SCX_OPS_LOAD(skel, qmap_ops, scx_qmap, uei);
|
||||
link = SCX_OPS_ATTACH(skel, qmap_ops);
|
||||
|
||||
@ -95,10 +123,18 @@ int main(int argc, char **argv)
|
||||
long nr_enqueued = skel->bss->nr_enqueued;
|
||||
long nr_dispatched = skel->bss->nr_dispatched;
|
||||
|
||||
printf("enq=%lu, dsp=%lu, delta=%ld, reenq=%" PRIu64 ", deq=%" PRIu64 ", core=%" PRIu64 "\n",
|
||||
printf("stats : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64" exp=%"PRIu64"\n",
|
||||
nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched,
|
||||
skel->bss->nr_reenqueued, skel->bss->nr_dequeued,
|
||||
skel->bss->nr_core_sched_execed);
|
||||
skel->bss->nr_core_sched_execed, skel->bss->nr_expedited);
|
||||
if (__COMPAT_has_ksym("scx_bpf_cpuperf_cur"))
|
||||
printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n",
|
||||
skel->bss->cpuperf_min,
|
||||
skel->bss->cpuperf_avg,
|
||||
skel->bss->cpuperf_max,
|
||||
skel->bss->cpuperf_target_min,
|
||||
skel->bss->cpuperf_target_avg,
|
||||
skel->bss->cpuperf_target_max);
|
||||
fflush(stdout);
|
||||
sleep(1);
|
||||
}
|
||||
@ -106,5 +142,9 @@ int main(int argc, char **argv)
|
||||
bpf_link__destroy(link);
|
||||
UEI_REPORT(skel, uei);
|
||||
scx_qmap__destroy(skel);
|
||||
/*
|
||||
* scx_qmap implements ops.cpu_on/offline() and doesn't need to restart
|
||||
* on CPU hotplug events.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
@ -129,7 +129,6 @@ void BPF_STRUCT_OPS(simple_enable, struct task_struct *p)
|
||||
|
||||
s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)
|
||||
{
|
||||
__COMPAT_scx_bpf_switch_all();
|
||||
return scx_bpf_create_dsq(SHARED_DSQ, -1);
|
||||
}
|
||||
|
||||
|
@ -17,13 +17,22 @@ const char help_fmt[] =
|
||||
"\n"
|
||||
"See the top-level comment in .bpf.c for more details.\n"
|
||||
"\n"
|
||||
"Usage: %s [-f]\n"
|
||||
"Usage: %s [-f] [-v]\n"
|
||||
"\n"
|
||||
" -f Use FIFO scheduling instead of weighted vtime scheduling\n"
|
||||
" -v Print libbpf debug messages\n"
|
||||
" -h Display this help and exit\n";
|
||||
|
||||
static bool verbose;
|
||||
static volatile int exit_req;
|
||||
|
||||
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
|
||||
{
|
||||
if (level == LIBBPF_DEBUG && !verbose)
|
||||
return 0;
|
||||
return vfprintf(stderr, format, args);
|
||||
}
|
||||
|
||||
static void sigint_handler(int simple)
|
||||
{
|
||||
exit_req = 1;
|
||||
@ -54,20 +63,22 @@ int main(int argc, char **argv)
|
||||
struct scx_simple *skel;
|
||||
struct bpf_link *link;
|
||||
__u32 opt;
|
||||
__u64 ecode;
|
||||
|
||||
libbpf_set_print(libbpf_print_fn);
|
||||
signal(SIGINT, sigint_handler);
|
||||
signal(SIGTERM, sigint_handler);
|
||||
restart:
|
||||
skel = SCX_OPS_OPEN(simple_ops, scx_simple);
|
||||
|
||||
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
|
||||
|
||||
skel = scx_simple__open();
|
||||
SCX_BUG_ON(!skel, "Failed to open skel");
|
||||
|
||||
while ((opt = getopt(argc, argv, "fh")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "fvh")) != -1) {
|
||||
switch (opt) {
|
||||
case 'f':
|
||||
skel->rodata->fifo_sched = true;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = true;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, help_fmt, basename(argv[0]));
|
||||
return opt != 'h';
|
||||
@ -87,7 +98,10 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
bpf_link__destroy(link);
|
||||
UEI_REPORT(skel, uei);
|
||||
ecode = UEI_REPORT(skel, uei);
|
||||
scx_simple__destroy(skel);
|
||||
|
||||
if (UEI_ECODE_RESTART(ecode))
|
||||
goto restart;
|
||||
return 0;
|
||||
}
|
||||
|
@ -28,9 +28,54 @@ static inline void ___vmlinux_h_sanity_check___(void)
|
||||
"bpftool generated vmlinux.h is missing high bits for 64bit enums, upgrade clang and pahole");
|
||||
}
|
||||
|
||||
s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
|
||||
s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
|
||||
void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym;
|
||||
void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym;
|
||||
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
|
||||
void scx_bpf_dispatch_cancel(void) __ksym;
|
||||
bool scx_bpf_consume(u64 dsq_id) __ksym;
|
||||
bool __scx_bpf_consume_task(unsigned long it, struct task_struct *p) __ksym __weak;
|
||||
u32 scx_bpf_reenqueue_local(void) __ksym;
|
||||
void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
|
||||
s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
|
||||
void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
|
||||
int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, bool rev) __ksym __weak;
|
||||
struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak;
|
||||
void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak;
|
||||
void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym __weak;
|
||||
void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym;
|
||||
void scx_bpf_exit_bstr(s64 exit_code, char *fmt,
|
||||
unsigned long long *data, u32 data__sz) __ksym __weak;
|
||||
u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak;
|
||||
u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak;
|
||||
void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak;
|
||||
u32 scx_bpf_nr_cpu_ids(void) __ksym __weak;
|
||||
const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak;
|
||||
const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak;
|
||||
void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak;
|
||||
const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym;
|
||||
const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym;
|
||||
void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym;
|
||||
bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym;
|
||||
s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
|
||||
s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
|
||||
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
|
||||
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
|
||||
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym;
|
||||
|
||||
/*
|
||||
* Use the following as @it when calling scx_bpf_consume_task() from whitin
|
||||
* bpf_for_each() loops.
|
||||
*/
|
||||
#define BPF_FOR_EACH_ITER (&___it)
|
||||
|
||||
/* hopefully temporary wrapper to work around BPF restriction */
|
||||
static inline bool scx_bpf_consume_task(struct bpf_iter_scx_dsq *it,
|
||||
struct task_struct *p)
|
||||
{
|
||||
unsigned long ptr;
|
||||
bpf_probe_read_kernel(&ptr, sizeof(ptr), it);
|
||||
return __scx_bpf_consume_task(ptr, p);
|
||||
}
|
||||
|
||||
static inline __attribute__((format(printf, 1, 2)))
|
||||
void ___scx_bpf_exit_format_checker(const char *fmt, ...) {}
|
||||
@ -40,18 +85,18 @@ void ___scx_bpf_exit_format_checker(const char *fmt, ...) {}
|
||||
* bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
|
||||
* refer to the initialized list of inputs to the bstr kfunc.
|
||||
*/
|
||||
#define scx_bpf_exit_preamble(fmt, args...) \
|
||||
static char ___fmt[] = fmt; \
|
||||
/* \
|
||||
* Note that __param[] must have at least one \
|
||||
* element to keep the verifier happy. \
|
||||
*/ \
|
||||
unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \
|
||||
\
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
|
||||
___bpf_fill(___param, args); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
#define scx_bpf_exit_preamble(fmt, args...) \
|
||||
static char ___fmt[] = fmt; \
|
||||
/* \
|
||||
* Note that __param[] must have at least one \
|
||||
* element to keep the verifier happy. \
|
||||
*/ \
|
||||
unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \
|
||||
\
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
|
||||
___bpf_fill(___param, args); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
|
||||
/*
|
||||
* scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments
|
||||
@ -78,30 +123,6 @@ void ___scx_bpf_exit_format_checker(const char *fmt, ...) {}
|
||||
___scx_bpf_exit_format_checker(fmt, ##args); \
|
||||
})
|
||||
|
||||
s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
|
||||
bool scx_bpf_consume(u64 dsq_id) __ksym;
|
||||
void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym;
|
||||
void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym;
|
||||
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
|
||||
void scx_bpf_dispatch_cancel(void) __ksym;
|
||||
void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
|
||||
s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
|
||||
bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym;
|
||||
s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
|
||||
s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
|
||||
const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym;
|
||||
const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym;
|
||||
void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym;
|
||||
void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
|
||||
s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
|
||||
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
|
||||
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
|
||||
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym;
|
||||
u32 scx_bpf_reenqueue_local(void) __ksym;
|
||||
u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym;
|
||||
u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym;
|
||||
void scx_bpf_cpuperf_set(u32 cpu, u32 perf) __ksym __weak;
|
||||
|
||||
#define BPF_STRUCT_OPS(name, args...) \
|
||||
SEC("struct_ops/"#name) \
|
||||
BPF_PROG(name, ##args)
|
||||
@ -156,7 +177,8 @@ BPF_PROG(name, ##args)
|
||||
* be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of
|
||||
* `MEMBER_VPTR(ptr, ->member)`.
|
||||
*/
|
||||
#define MEMBER_VPTR(base, member) (typeof((base) member) *)({ \
|
||||
#define MEMBER_VPTR(base, member) (typeof((base) member) *) \
|
||||
({ \
|
||||
u64 __base = (u64)&(base); \
|
||||
u64 __addr = (u64)&((base) member) - __base; \
|
||||
_Static_assert(sizeof(base) >= sizeof((base) member), \
|
||||
@ -186,18 +208,19 @@ BPF_PROG(name, ##args)
|
||||
* size of the array to compute the max, which will result in rejection by
|
||||
* the verifier.
|
||||
*/
|
||||
#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *)({ \
|
||||
u64 __base = (u64)arr; \
|
||||
u64 __addr = (u64)&(arr[i]) - __base; \
|
||||
asm volatile ( \
|
||||
"if %0 <= %[max] goto +2\n" \
|
||||
"%0 = 0\n" \
|
||||
"goto +1\n" \
|
||||
"%0 += %1\n" \
|
||||
: "+r"(__addr) \
|
||||
: "r"(__base), \
|
||||
[max]"r"(sizeof(arr[0]) * ((n) - 1))); \
|
||||
__addr; \
|
||||
#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *) \
|
||||
({ \
|
||||
u64 __base = (u64)arr; \
|
||||
u64 __addr = (u64)&(arr[i]) - __base; \
|
||||
asm volatile ( \
|
||||
"if %0 <= %[max] goto +2\n" \
|
||||
"%0 = 0\n" \
|
||||
"goto +1\n" \
|
||||
"%0 += %1\n" \
|
||||
: "+r"(__addr) \
|
||||
: "r"(__base), \
|
||||
[max]"r"(sizeof(arr[0]) * ((n) - 1))); \
|
||||
__addr; \
|
||||
})
|
||||
|
||||
/*
|
||||
@ -227,7 +250,7 @@ int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
|
||||
|
||||
struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
|
||||
|
||||
extern void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym;
|
||||
void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym;
|
||||
#define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL)
|
||||
|
||||
/* task */
|
||||
|
@ -18,13 +18,15 @@
|
||||
/*
|
||||
* %SCX_KICK_IDLE is a later addition. To support both before and after, use
|
||||
* %__COMPAT_SCX_KICK_IDLE which becomes 0 on kernels which don't support it.
|
||||
* Users can use %SCX_KICK_IDLE directly in the future.
|
||||
*/
|
||||
#define __COMPAT_SCX_KICK_IDLE \
|
||||
__COMPAT_ENUM_OR_ZERO(enum scx_kick_flags, SCX_KICK_IDLE)
|
||||
|
||||
/*
|
||||
* scx_switch_all() was replaced by %SCX_OPS_SWITCH_PARTIAL. See
|
||||
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h.
|
||||
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h. This can be dropped in the
|
||||
* future.
|
||||
*/
|
||||
void scx_bpf_switch_all(void) __ksym __weak;
|
||||
|
||||
@ -34,6 +36,67 @@ static inline void __COMPAT_scx_bpf_switch_all(void)
|
||||
scx_bpf_switch_all();
|
||||
}
|
||||
|
||||
/*
|
||||
* scx_bpf_exit() is a new addition. Fall back to scx_bpf_error() if
|
||||
* unavailable. Users can use scx_bpf_exit() directly in the future.
|
||||
*/
|
||||
#define __COMPAT_scx_bpf_exit(code, fmt, args...) \
|
||||
({ \
|
||||
if (bpf_ksym_exists(scx_bpf_exit_bstr)) \
|
||||
scx_bpf_exit((code), fmt, args); \
|
||||
else \
|
||||
scx_bpf_error(fmt, args); \
|
||||
})
|
||||
|
||||
/*
|
||||
* scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. No good
|
||||
* way to noop these kfuncs. Provide a test macro. Users can assume existence in
|
||||
* the future.
|
||||
*/
|
||||
#define __COMPAT_HAS_CPUMASKS \
|
||||
bpf_ksym_exists(scx_bpf_nr_cpu_ids)
|
||||
|
||||
/*
|
||||
* cpuperf is new. The followings become noop on older kernels. Callers can be
|
||||
* updated to call cpuperf kfuncs directly in the future.
|
||||
*/
|
||||
static inline u32 __COMPAT_scx_bpf_cpuperf_cap(s32 cpu)
|
||||
{
|
||||
if (bpf_ksym_exists(scx_bpf_cpuperf_cap))
|
||||
return scx_bpf_cpuperf_cap(cpu);
|
||||
else
|
||||
return 1024;
|
||||
}
|
||||
|
||||
static inline u32 __COMPAT_scx_bpf_cpuperf_cur(s32 cpu)
|
||||
{
|
||||
if (bpf_ksym_exists(scx_bpf_cpuperf_cur))
|
||||
return scx_bpf_cpuperf_cur(cpu);
|
||||
else
|
||||
return 1024;
|
||||
}
|
||||
|
||||
static inline void __COMPAT_scx_bpf_cpuperf_set(s32 cpu, u32 perf)
|
||||
{
|
||||
if (bpf_ksym_exists(scx_bpf_cpuperf_set))
|
||||
return scx_bpf_cpuperf_set(cpu, perf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iteration and scx_bpf_consume_task() are new. The following become noop on
|
||||
* older kernels. The users can switch to bpf_for_each(scx_dsq) and directly
|
||||
* call scx_bpf_consume_task() in the future.
|
||||
*/
|
||||
#define __COMPAT_DSQ_FOR_EACH(p, dsq_id, flags) \
|
||||
if (bpf_ksym_exists(bpf_iter_scx_dsq_new)) \
|
||||
bpf_for_each(scx_dsq, (p), (dsq_id), (flags))
|
||||
|
||||
static inline bool __COMPAT_scx_bpf_consume_task(struct bpf_iter_scx_dsq *it,
|
||||
struct task_struct *p)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Define sched_ext_ops. This may be expanded to define multiple variants for
|
||||
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
|
||||
|
@ -8,6 +8,9 @@
|
||||
#define __SCX_COMPAT_H
|
||||
|
||||
#include <bpf/btf.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
struct btf *__COMPAT_vmlinux_btf __attribute__((weak));
|
||||
|
||||
@ -69,6 +72,12 @@ static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v
|
||||
__val; \
|
||||
})
|
||||
|
||||
static inline bool __COMPAT_has_ksym(const char *ksym)
|
||||
{
|
||||
__COMPAT_load_vmlinux_btf();
|
||||
return btf__find_by_name(__COMPAT_vmlinux_btf, ksym) >= 0;
|
||||
}
|
||||
|
||||
static inline bool __COMPAT_struct_has_field(const char *type, const char *field)
|
||||
{
|
||||
const struct btf_type *t;
|
||||
@ -101,27 +110,79 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field
|
||||
* An ops flag, %SCX_OPS_SWITCH_PARTIAL, replaced scx_bpf_switch_all() which had
|
||||
* to be called from ops.init(). To support both before and after, use both
|
||||
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL and %__COMPAT_scx_bpf_switch_all() defined
|
||||
* in compat.bpf.h.
|
||||
* in compat.bpf.h. Users can switch to directly using %SCX_OPS_SWITCH_PARTIAL
|
||||
* in the future.
|
||||
*/
|
||||
#define __COMPAT_SCX_OPS_SWITCH_PARTIAL \
|
||||
__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
|
||||
|
||||
/*
|
||||
* scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. Users
|
||||
* will be able to assume existence in the future.
|
||||
*/
|
||||
#define __COMPAT_HAS_CPUMASKS \
|
||||
__COMPAT_has_ksym("scx_bpf_nr_cpu_ids")
|
||||
|
||||
/*
|
||||
* DSQ iterator is new. Users will be able to assume existence in the future.
|
||||
*/
|
||||
#define __COMPAT_HAS_DSQ_ITER \
|
||||
__COMPAT_has_ksym("bpf_iter_scx_dsq_new")
|
||||
|
||||
static inline long scx_hotplug_seq(void)
|
||||
{
|
||||
int fd;
|
||||
char buf[32];
|
||||
ssize_t len;
|
||||
long val;
|
||||
|
||||
fd = open("/sys/kernel/sched_ext/hotplug_seq", O_RDONLY);
|
||||
if (fd < 0)
|
||||
return -ENOENT;
|
||||
|
||||
len = read(fd, buf, sizeof(buf) - 1);
|
||||
SCX_BUG_ON(len <= 0, "read failed (%ld)", len);
|
||||
buf[len] = 0;
|
||||
close(fd);
|
||||
|
||||
val = strtoul(buf, NULL, 10);
|
||||
SCX_BUG_ON(val < 0, "invalid num hotplug events: %lu", val);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE()
|
||||
* is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load
|
||||
* and attach it, backward compatibility is automatically maintained where
|
||||
* reasonable.
|
||||
*
|
||||
* - sched_ext_ops.exit_dump_len was added later. On kernels which don't support
|
||||
* it, the value is ignored and a warning is triggered if the value is
|
||||
* requested to be non-zero.
|
||||
* - ops.tick(): Ignored on older kernels with a warning.
|
||||
* - ops.exit_dump_len: Cleared to zero on older kernels with a warning.
|
||||
* - ops.hotplug_seq: Ignored on older kernels.
|
||||
*/
|
||||
#define SCX_OPS_OPEN(__ops_name, __scx_name) ({ \
|
||||
struct __scx_name *__skel; \
|
||||
\
|
||||
__skel = __scx_name##__open(); \
|
||||
SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \
|
||||
\
|
||||
if (__COMPAT_struct_has_field("sched_ext_ops", "hotplug_seq")) \
|
||||
__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \
|
||||
__skel; \
|
||||
})
|
||||
|
||||
#define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({ \
|
||||
UEI_SET_SIZE(__skel, __ops_name, __uei_name); \
|
||||
if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \
|
||||
if (!__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \
|
||||
(__skel)->struct_ops.__ops_name->exit_dump_len) { \
|
||||
fprintf(stderr, "WARNING: kernel doesn't support setting exit dump len\n"); \
|
||||
(__skel)->struct_ops.__ops_name->exit_dump_len = 0; \
|
||||
(__skel)->struct_ops.__ops_name->exit_dump_len = 0; \
|
||||
} \
|
||||
if (!__COMPAT_struct_has_field("sched_ext_ops", "tick") && \
|
||||
(__skel)->struct_ops.__ops_name->tick) { \
|
||||
fprintf(stderr, "WARNING: kernel doesn't support ops.tick()\n"); \
|
||||
(__skel)->struct_ops.__ops_name->tick = NULL; \
|
||||
} \
|
||||
SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel"); \
|
||||
})
|
||||
|
@ -77,7 +77,35 @@ struct user_exit_info {
|
||||
if (__uei->msg[0] != '\0') \
|
||||
fprintf(stderr, " (%s)", __uei->msg); \
|
||||
fputs("\n", stderr); \
|
||||
__uei->exit_code; \
|
||||
})
|
||||
|
||||
/*
|
||||
* We can't import vmlinux.h while compiling user C code. Let's duplicate
|
||||
* scx_exit_code definition.
|
||||
*/
|
||||
enum scx_exit_code {
|
||||
/* Reasons */
|
||||
SCX_ECODE_RSN_HOTPLUG = 1LLU << 32,
|
||||
|
||||
/* Actions */
|
||||
SCX_ECODE_ACT_RESTART = 1LLU << 48,
|
||||
};
|
||||
|
||||
enum uei_ecode_mask {
|
||||
UEI_ECODE_USER_MASK = ((1LLU << 32) - 1),
|
||||
UEI_ECODE_SYS_RSN_MASK = ((1LLU << 16) - 1) << 32,
|
||||
UEI_ECODE_SYS_ACT_MASK = ((1LLU << 16) - 1) << 48,
|
||||
};
|
||||
|
||||
/*
|
||||
* These macro interpret the ecode returned from UEI_REPORT().
|
||||
*/
|
||||
#define UEI_ECODE_USER(__ecode) ((__ecode) & UEI_ECODE_USER_MASK)
|
||||
#define UEI_ECODE_SYS_RSN(__ecode) ((__ecode) & UEI_ECODE_SYS_RSN_MASK)
|
||||
#define UEI_ECODE_SYS_ACT(__ecode) ((__ecode) & UEI_ECODE_SYS_ACT_MASK)
|
||||
|
||||
#define UEI_ECODE_RESTART(__ecode) (UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)
|
||||
|
||||
#endif /* __bpf__ */
|
||||
#endif /* __USER_EXIT_INFO_H */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1 +1 @@
|
||||
vmlinux-v6.9-g5dc95302301f.h
|
||||
vmlinux-v6.9-g73f4013eb1eb.h
|
Loading…
Reference in New Issue
Block a user