Merge pull request #253 from sched-ext/htejun/sync-kernel

Sync to the latest kernel
2024-11-26 04:30:23 +00:00 · 2024-04-29 10:16:35 -10:00 · 2024-04-29 10:16:35 -10:00 · b1bb2a5c5f
commit b1bb2a5c5f
parent 3ee64a1301 c77d101655
20 changed files with 115548 additions and 114769 deletions
--- a/rust/scx_utils/src/builder.rs
+++ b/rust/scx_utils/src/builder.rs
@ -45,7 +45,7 @@ impl Builder {
        let bindings = bindgen::Builder::default()
            .header("bindings.h")
            .allowlist_type("scx_exit_kind")
-            .allowlist_type("scx_internal_consts")
+            .allowlist_type("scx_consts")
            .parse_callbacks(Box::new(bindgen::CargoCallbacks))
            .generate()
            .expect("Unable to generate bindings");
--- a/rust/scx_utils/src/lib.rs
+++ b/rust/scx_utils/src/lib.rs
@ -43,7 +43,7 @@ pub use builder::Builder;
 mod user_exit_info;
 pub use user_exit_info::ScxExitKind;
-pub use user_exit_info::ScxInternalConsts;
+pub use user_exit_info::ScxConsts;
 pub use user_exit_info::UeiDumpPtr;
 pub use user_exit_info::UserExitInfo;
 pub use user_exit_info::UEI_DUMP_PTR_MUTEX;
--- a/rust/scx_utils/src/user_exit_info.rs
+++ b/rust/scx_utils/src/user_exit_info.rs
@ -29,8 +29,8 @@ pub enum ScxExitKind {
    ErrorStall = bindings::scx_exit_kind_SCX_EXIT_ERROR_STALL as isize,
 }
-pub enum ScxInternalConsts {
+pub enum ScxConsts {
-    ExitDumpDflLen = bindings::scx_internal_consts_SCX_EXIT_DUMP_DFL_LEN as isize,
+    ExitDumpDflLen = bindings::scx_consts_SCX_EXIT_DUMP_DFL_LEN as isize,
 }
 /// Takes a reference to C struct user_exit_info and reads it into
@ -65,7 +65,7 @@ macro_rules! uei_set_size {
    ($skel: expr, $ops: ident, $uei:ident) => {{
        scx_utils::paste! {
            let len = match $skel.struct_ops.$ops().exit_dump_len {
-                0 => scx_utils::ScxInternalConsts::ExitDumpDflLen as u32,
+                0 => scx_utils::ScxConsts::ExitDumpDflLen as u32,
                v => v,
            };
            $skel.rodata_mut().[<$uei _dump_len>] = len;
--- a/scheds/c/scx_central.c
+++ b/scheds/c/scx_central.c
@ -24,10 +24,19 @@ const char help_fmt[] =
 "\n"
 "  -s SLICE_US   Override slice duration\n"
 "  -c CPU        Override the central CPU (default: 0)\n"
 "  -v            Print libbpf debug messages\n"
 "  -h            Display this help and exit\n";
 static bool verbose;
 static volatile int exit_req;
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
 	if (level == LIBBPF_DEBUG && !verbose)
 		return 0;
 	return vfprintf(stderr, format, args);
 }
 static void sigint_handler(int dummy)
 {
 	exit_req = 1;
@ -37,22 +46,20 @@ int main(int argc, char **argv)
 {
 	struct scx_central *skel;
 	struct bpf_link *link;
-	__u64 seq = 0;
+	__u64 seq = 0, ecode;
 	__s32 opt;
 	cpu_set_t *cpuset;
 	libbpf_set_print(libbpf_print_fn);
 	signal(SIGINT, sigint_handler);
 	signal(SIGTERM, sigint_handler);
-
+restart:
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	skel = SCX_OPS_OPEN(central_ops, scx_central);
 	skel = scx_central__open();
 	SCX_BUG_ON(!skel, "Failed to open skel");
 	skel->rodata->central_cpu = 0;
 	skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
-	while ((opt = getopt(argc, argv, "s:c:ph")) != -1) {
+	while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
 		switch (opt) {
 		case 's':
 			skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
@ -60,6 +67,9 @@ int main(int argc, char **argv)
 		case 'c':
 			skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
 			break;
 		case 'v':
 			verbose = true;
 			break;
 		default:
 			fprintf(stderr, help_fmt, basename(argv[0]));
 			return opt != 'h';
@ -116,7 +126,10 @@ int main(int argc, char **argv)
 	}
 	bpf_link__destroy(link);
-	UEI_REPORT(skel, uei);
+	ecode = UEI_REPORT(skel, uei);
 	scx_central__destroy(skel);
 	if (UEI_ECODE_RESTART(ecode))
 		goto restart;
 	return 0;
 }
--- a/scheds/c/scx_flatcg.c
+++ b/scheds/c/scx_flatcg.c
@ -26,15 +26,24 @@ const char help_fmt[] =
 "\n"
 "See the top-level comment in .bpf.c for more details.\n"
 "\n"
-"Usage: %s [-s SLICE_US] [-i INTERVAL] [-f]\n"
+"Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-v]\n"
 "\n"
 "  -s SLICE_US   Override slice duration\n"
 "  -i INTERVAL   Report interval\n"
 "  -f            Use FIFO scheduling instead of weighted vtime scheduling\n"
 "  -v            Print libbpf debug messages\n"
 "  -h            Display this help and exit\n";
 static bool verbose;
 static volatile int exit_req;
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
 	if (level == LIBBPF_DEBUG && !verbose)
 		return 0;
 	return vfprintf(stderr, format, args);
 }
 static void sigint_handler(int dummy)
 {
 	exit_req = 1;
@ -119,18 +128,17 @@ int main(int argc, char **argv)
 	__u64 last_stats[FCG_NR_STATS] = {};
 	unsigned long seq = 0;
 	__s32 opt;
 	__u64 ecode;
 	libbpf_set_print(libbpf_print_fn);
 	signal(SIGINT, sigint_handler);
 	signal(SIGTERM, sigint_handler);
-
+restart:
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
 	skel = scx_flatcg__open();
 	SCX_BUG_ON(!skel, "Failed to open skel");
 	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
-	while ((opt = getopt(argc, argv, "s:i:dfph")) != -1) {
+	while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
 		double v;
 		switch (opt) {
@ -149,6 +157,9 @@ int main(int argc, char **argv)
 		case 'f':
 			skel->rodata->fifo_sched = true;
 			break;
 		case 'v':
 			verbose = true;
 			break;
 		case 'h':
 		default:
 			fprintf(stderr, help_fmt, basename(argv[0]));
@ -213,7 +224,10 @@ int main(int argc, char **argv)
 	}
 	bpf_link__destroy(link);
-	UEI_REPORT(skel, uei);
+	ecode = UEI_REPORT(skel, uei);
 	scx_flatcg__destroy(skel);
 	if (UEI_ECODE_RESTART(ecode))
 		goto restart;
 	return 0;
 }
--- a/scheds/c/scx_nest.c
+++ b/scheds/c/scx_nest.c
@ -29,10 +29,19 @@ const char help_fmt[] =
 "  -i ITERS      Number of successive placement failures tolerated before trying to aggressively expand primary nest (default 2), or 0 to disable\n"
 "  -s SLICE_US   Override slice duration in us (default 20000us / 20ms)\n"
 "  -I            First try to find a fully idle core, and then any idle core, when searching nests. Default behavior is to ignore hypertwins and check for any idle core.\n"
 "  -v            Print libbpf debug messages\n"
 "  -h            Display this help and exit\n";
 static bool verbose;
 static volatile int exit_req;
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
 	if (level == LIBBPF_DEBUG && !verbose)
 		return 0;
 	return vfprintf(stderr, format, args);
 }
 static void sigint_handler(int nest)
 {
 	exit_req = 1;
@ -152,19 +161,18 @@ int main(int argc, char **argv)
 	struct scx_nest *skel;
 	struct bpf_link *link;
 	__u32 opt;
 	__u64 ecode;
 	libbpf_set_print(libbpf_print_fn);
 	signal(SIGINT, sigint_handler);
 	signal(SIGTERM, sigint_handler);
-
+restart:
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	skel = SCX_OPS_OPEN(nest_ops, scx_nest);
 	skel = scx_nest__open();
 	SCX_BUG_ON(!skel, "Failed to open skel");
 	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
 	skel->rodata->sampling_cadence_ns = SAMPLING_CADENCE_S * 1000 * 1000 * 1000;
-	while ((opt = getopt(argc, argv, "hId:m:i:s:")) != -1) {
+	while ((opt = getopt(argc, argv, "d:m:i:Is:vh")) != -1) {
 		switch (opt) {
 		case 'd':
 			skel->rodata->p_remove_ns = strtoull(optarg, NULL, 0) * 1000;
@ -181,6 +189,9 @@ int main(int argc, char **argv)
 		case 's':
 			skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
 			break;
 		case 'v':
 			verbose = true;
 			break;
 		default:
 			fprintf(stderr, help_fmt, basename(argv[0]));
 			return opt != 'h';
@ -216,7 +227,10 @@ int main(int argc, char **argv)
 	}
 	bpf_link__destroy(link);
-	UEI_REPORT(skel, uei);
+	ecode = UEI_REPORT(skel, uei);
 	scx_nest__destroy(skel);
 	if (UEI_ECODE_RESTART(ecode))
 		goto restart;
 	return 0;
 }
--- a/scheds/c/scx_pair.c
+++ b/scheds/c/scx_pair.c
@ -23,10 +23,19 @@ const char help_fmt[] =
 "Usage: %s [-S STRIDE]\n"
 "\n"
 "  -S STRIDE     Override CPU pair stride (default: nr_cpus_ids / 2)\n"
 "  -v            Print libbpf debug messages\n"
 "  -h            Display this help and exit\n";
 static bool verbose;
 static volatile int exit_req;
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
 	if (level == LIBBPF_DEBUG && !verbose)
 		return 0;
 	return vfprintf(stderr, format, args);
 }
 static void sigint_handler(int dummy)
 {
 	exit_req = 1;
@ -36,27 +45,28 @@ int main(int argc, char **argv)
 {
 	struct scx_pair *skel;
 	struct bpf_link *link;
-	__u64 seq = 0;
+	__u64 seq = 0, ecode;
 	__s32 stride, i, opt, outer_fd;
 	libbpf_set_print(libbpf_print_fn);
 	signal(SIGINT, sigint_handler);
 	signal(SIGTERM, sigint_handler);
-
+restart:
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	skel = SCX_OPS_OPEN(pair_ops, scx_pair);
 	skel = scx_pair__open();
 	SCX_BUG_ON(!skel, "Failed to open skel");
 	skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
 	/* pair up the earlier half to the latter by default, override with -s */
 	stride = skel->rodata->nr_cpu_ids / 2;
-	while ((opt = getopt(argc, argv, "S:ph")) != -1) {
+	while ((opt = getopt(argc, argv, "S:vh")) != -1) {
 		switch (opt) {
 		case 'S':
 			stride = strtoul(optarg, NULL, 0);
 			break;
 		case 'v':
 			verbose = true;
 			break;
 		default:
 			fprintf(stderr, help_fmt, basename(argv[0]));
 			return opt != 'h';
@ -158,7 +168,10 @@ int main(int argc, char **argv)
 	}
 	bpf_link__destroy(link);
-	UEI_REPORT(skel, uei);
+	ecode = UEI_REPORT(skel, uei);
 	scx_pair__destroy(skel);
 	if (UEI_ECODE_RESTART(ecode))
 		goto restart;
 	return 0;
 }
--- a/scheds/c/scx_qmap.bpf.c
+++ b/scheds/c/scx_qmap.bpf.c
@ -23,6 +23,12 @@
 * Copyright (c) 2022 David Vernet <dvernet@meta.com>
 */
 #include <scx/common.bpf.h>
 #include <string.h>
 enum consts {
 	ONE_SEC_IN_NS		= 1000000000,
 	SHARED_DSQ		= 0,
 };
 char _license[] SEC("license") = "GPL";
@ -30,6 +36,9 @@ const volatile u64 slice_ns = SCX_SLICE_DFL;
 const volatile u32 stall_user_nth;
 const volatile u32 stall_kernel_nth;
 const volatile u32 dsp_inf_loop_after;
 const volatile u32 dsp_batch;
 const volatile bool print_shared_dsq;
 const volatile char exp_prefix[17];
 const volatile s32 disallow_tgid;
 const volatile bool switch_partial;
@ -62,6 +71,18 @@ struct {
 	},
 };
 /*
 * If enabled, CPU performance target is set according to the queue index
 * according to the following table.
 */
 static const u32 qidx_to_cpuperf_target[] = {
 	[0] = SCX_CPUPERF_ONE * 0 / 4,
 	[1] = SCX_CPUPERF_ONE * 1 / 4,
 	[2] = SCX_CPUPERF_ONE * 2 / 4,
 	[3] = SCX_CPUPERF_ONE * 3 / 4,
 	[4] = SCX_CPUPERF_ONE * 4 / 4,
 };
 /*
 * Per-queue sequence numbers to implement core-sched ordering.
 *
@ -86,17 +107,25 @@ struct {
 	__type(value, struct task_ctx);
 } task_ctx_stor SEC(".maps");
-/* Per-cpu dispatch index and remaining count */
+struct cpu_ctx {
 	u64	dsp_idx;	/* dispatch index */
 	u64	dsp_cnt;	/* remaining count */
 	u32	avg_weight;
 	u32	cpuperf_target;
 };
 struct {
 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-	__uint(max_entries, 2);
+	__uint(max_entries, 1);
 	__type(key, u32);
-	__type(value, u64);
+	__type(value, struct cpu_ctx);
-} dispatch_idx_cnt SEC(".maps");
+} cpu_ctx_stor SEC(".maps");
 /* Statistics */
 u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued;
-u64 nr_core_sched_execed;
+u64 nr_core_sched_execed, nr_expedited;
 u32 cpuperf_min, cpuperf_avg, cpuperf_max;
 u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max;
 s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
 		   s32 prev_cpu, u64 wake_flags)
@ -189,7 +218,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
 	if (enq_flags & SCX_ENQ_REENQ) {
 		s32 cpu;
-		scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, 0, enq_flags);
+		scx_bpf_dispatch(p, SHARED_DSQ, 0, enq_flags);
 		cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
 		if (cpu >= 0)
 			scx_bpf_kick_cpu(cpu, __COMPAT_SCX_KICK_IDLE);
@ -204,7 +233,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
 	/* Queue on the selected FIFO. If the FIFO overflows, punt to global. */
 	if (bpf_map_push_elem(ring, &pid, 0)) {
-		scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, enq_flags);
+		scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, enq_flags);
 		return;
 	}
@ -233,18 +262,49 @@ static void update_core_sched_head_seq(struct task_struct *p)
 		scx_bpf_error("task_ctx lookup failed");
 }
 static bool consume_shared_dsq(void)
 {
 	struct task_struct *p;
 	bool consumed;
 	if (exp_prefix[0] == '\0')
 		return scx_bpf_consume(SHARED_DSQ);
 	/*
 	 * To demonstrate the use of scx_bpf_consume_task(), implement silly
 	 * selective priority boosting mechanism by scanning SHARED_DSQ looking
 	 * for matching comms and consume them first. This makes difference only
 	 * when dsp_batch is larger than 1.
 	 */
 	consumed = false;
 	__COMPAT_DSQ_FOR_EACH(p, SHARED_DSQ, 0) {
 		char comm[sizeof(exp_prefix)];
 		memcpy(comm, p->comm, sizeof(exp_prefix) - 1);
 		if (!bpf_strncmp(comm, sizeof(exp_prefix),
 				 (const char *)exp_prefix) &&
 		    __COMPAT_scx_bpf_consume_task(BPF_FOR_EACH_ITER, p)) {
 			consumed = true;
 			__sync_fetch_and_add(&nr_expedited, 1);
 		}
 	}
 	return consumed || scx_bpf_consume(SHARED_DSQ);
 }
 void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
 {
-	u32 zero = 0, one = 1;
+	struct task_struct *p;
-	u64 *idx = bpf_map_lookup_elem(&dispatch_idx_cnt, &zero);
+	struct cpu_ctx *cpuc;
-	u64 *cnt = bpf_map_lookup_elem(&dispatch_idx_cnt, &one);
+	u32 zero = 0, batch = dsp_batch ?: 1;
 	void *fifo;
-	s32 pid;
+	s32 i, pid;
-	int i;
+
 	if (consume_shared_dsq())
 		return;
 	if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) {
 		struct task_struct *p;
 		/*
 		 * PID 2 should be kthreadd which should mostly be idle and off
 		 * the scheduler. Let's keep dispatching it to force the kernel
@ -252,49 +312,80 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
 		 */
 		p = bpf_task_from_pid(2);
 		if (p) {
-			scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, 0);
+			scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0);
 			bpf_task_release(p);
 			return;
 		}
 	}
-	if (!idx || !cnt) {
+	if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
-		scx_bpf_error("failed to lookup idx[%p], cnt[%p]", idx, cnt);
+		scx_bpf_error("failed to look up cpu_ctx");
 		return;
 	}
 	for (i = 0; i < 5; i++) {
 		/* Advance the dispatch cursor and pick the fifo. */
-		if (!*cnt) {
+		if (!cpuc->dsp_cnt) {
-			*idx = (*idx + 1) % 5;
+			cpuc->dsp_idx = (cpuc->dsp_idx + 1) % 5;
-			*cnt = 1 << *idx;
+			cpuc->dsp_cnt = 1 << cpuc->dsp_idx;
 		}
 		(*cnt)--;
-		fifo = bpf_map_lookup_elem(&queue_arr, idx);
+		fifo = bpf_map_lookup_elem(&queue_arr, &cpuc->dsp_idx);
 		if (!fifo) {
-			scx_bpf_error("failed to find ring %llu", *idx);
+			scx_bpf_error("failed to find ring %llu", cpuc->dsp_idx);
 			return;
 		}
 		/* Dispatch or advance. */
-		if (!bpf_map_pop_elem(fifo, &pid)) {
+		bpf_repeat(BPF_MAX_LOOPS) {
-			struct task_struct *p;
+			if (bpf_map_pop_elem(fifo, &pid))
 				break;
 			p = bpf_task_from_pid(pid);
-			if (p) {
+			if (!p)
-				update_core_sched_head_seq(p);
+				continue;
-				__sync_fetch_and_add(&nr_dispatched, 1);
+
-				scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, 0);
+			update_core_sched_head_seq(p);
-				bpf_task_release(p);
+			__sync_fetch_and_add(&nr_dispatched, 1);
 			scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, 0);
 			bpf_task_release(p);
 			batch--;
 			cpuc->dsp_cnt--;
 			if (!batch || !scx_bpf_dispatch_nr_slots()) {
 				consume_shared_dsq();
 				return;
 			}
 			if (!cpuc->dsp_cnt)
 				break;
 		}
-		*cnt = 0;
+		cpuc->dsp_cnt = 0;
 	}
 }
 void BPF_STRUCT_OPS(qmap_tick, struct task_struct *p)
 {
 	struct cpu_ctx *cpuc;
 	u32 zero = 0;
 	int idx;
 	if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
 		scx_bpf_error("failed to look up cpu_ctx");
 		return;
 	}
 	/*
 	 * Use the running avg of weights to select the target cpuperf level.
 	 * This is a demonstration of the cpuperf feature rather than a
 	 * practical strategy to regulate CPU frequency.
 	 */
 	cpuc->avg_weight = cpuc->avg_weight * 3 / 4 + p->scx.weight / 4;
 	idx = weight_to_idx(cpuc->avg_weight);
 	cpuc->cpuperf_target = qidx_to_cpuperf_target[idx];
 	scx_bpf_cpuperf_set(scx_bpf_task_cpu(p), cpuc->cpuperf_target);
 }
 /*
 * The distance from the head of the queue scaled by the weight of the queue.
 * The lower the number, the older the task and the higher the priority.
@ -371,11 +462,189 @@ s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p,
 		return -ENOMEM;
 }
-s32 BPF_STRUCT_OPS(qmap_init)
+/*
 * Print out the online and possible CPU map using bpf_printk() as a
 * demonstration of using the cpumask kfuncs and ops.cpu_on/offline().
 */
 static void print_cpus(void)
 {
 	const struct cpumask *possible, *online;
 	s32 cpu;
 	char buf[128] = "", *p;
 	int idx;
 	if (!__COMPAT_HAS_CPUMASKS)
 		return;
 	possible = scx_bpf_get_possible_cpumask();
 	online = scx_bpf_get_online_cpumask();
 	idx = 0;
 	bpf_for(cpu, 0, scx_bpf_nr_cpu_ids()) {
 		if (!(p = MEMBER_VPTR(buf, [idx++])))
 			break;
 		if (bpf_cpumask_test_cpu(cpu, online))
 			*p++ = 'O';
 		else if (bpf_cpumask_test_cpu(cpu, possible))
 			*p++ = 'X';
 		else
 			*p++ = ' ';
 		if ((cpu & 7) == 7) {
 			if (!(p = MEMBER_VPTR(buf, [idx++])))
 				break;
 			*p++ = '|';
 		}
 	}
 	buf[sizeof(buf) - 1] = '\0';
 	scx_bpf_put_cpumask(online);
 	scx_bpf_put_cpumask(possible);
 	bpf_printk("CPUS: |%s", buf);
 }
 void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu)
 {
 	bpf_printk("CPU %d coming online", cpu);
 	/* @cpu is already online at this point */
 	print_cpus();
 }
 void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu)
 {
 	bpf_printk("CPU %d going offline", cpu);
 	/* @cpu is still online at this point */
 	print_cpus();
 }
 struct monitor_timer {
 	struct bpf_timer timer;
 };
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 1);
 	__type(key, u32);
 	__type(value, struct monitor_timer);
 } central_timer SEC(".maps");
 /*
 * Print out the min, avg and max performance levels of CPUs every second to
 * demonstrate the cpuperf interface.
 */
 static void monitor_cpuperf(void)
 {
 	u32 zero = 0;
 	u32 nr_cpu_ids = scx_bpf_nr_cpu_ids();
 	u64 cap_sum = 0, cur_sum = 0, cur_min = SCX_CPUPERF_ONE, cur_max = 0;
 	u64 target_sum = 0, target_min = SCX_CPUPERF_ONE, target_max = 0;
 	const struct cpumask *online;
 	int i, nr_online_cpus = 0;
 	online = scx_bpf_get_online_cpumask();
 	bpf_for(i, 0, nr_cpu_ids) {
 		struct cpu_ctx *cpuc;
 		u32 cap, cur;
 		if (!bpf_cpumask_test_cpu(i, online))
 			continue;
 		nr_online_cpus++;
 		/* collect the capacity and current cpuperf */
 		cap = scx_bpf_cpuperf_cap(i);
 		cur = scx_bpf_cpuperf_cur(i);
 		cur_min = cur < cur_min ? cur : cur_min;
 		cur_max = cur > cur_max ? cur : cur_max;
 		/*
 		 * $cur is relative to $cap. Scale it down accordingly so that
 		 * it's in the same scale as other CPUs and $cur_sum/$cap_sum
 		 * makes sense.
 		 */
 		cur_sum += cur * cap / SCX_CPUPERF_ONE;
 		cap_sum += cap;
 		if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, i))) {
 			scx_bpf_error("failed to look up cpu_ctx");
 			goto out;
 		}
 		/* collect target */
 		cur = cpuc->cpuperf_target;
 		target_sum += cur;
 		target_min = cur < target_min ? cur : target_min;
 		target_max = cur > target_max ? cur : target_max;
 	}
 	cpuperf_min = cur_min;
 	cpuperf_avg = cur_sum * SCX_CPUPERF_ONE / cap_sum;
 	cpuperf_max = cur_max;
 	cpuperf_target_min = target_min;
 	cpuperf_target_avg = target_sum / nr_online_cpus;
 	cpuperf_target_max = target_max;
 out:
 	scx_bpf_put_cpumask(online);
 }
 /*
 * Dump the currently queued tasks in the shared DSQ to demonstrate the usage of
 * scx_bpf_dsq_nr_queued() and DSQ iterator. Raise the dispatch batch count to
 * see meaningful dumps in the trace pipe.
 */
 static void dump_shared_dsq(void)
 {
 	struct task_struct *p;
 	s32 nr;
 	if (!(nr = scx_bpf_dsq_nr_queued(SHARED_DSQ)))
 		return;
 	bpf_printk("Dumping %d tasks in SHARED_DSQ in reverse order", nr);
 	bpf_rcu_read_lock();
 	__COMPAT_DSQ_FOR_EACH(p, SHARED_DSQ, SCX_DSQ_ITER_REV)
 		bpf_printk("%s[%d]", p->comm, p->pid);
 	bpf_rcu_read_unlock();
 }
 static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
 {
 	monitor_cpuperf();
 	if (print_shared_dsq)
 		dump_shared_dsq();
 	bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
 	return 0;
 }
 s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
 {
 	u32 key = 0;
 	struct bpf_timer *timer;
 	s32 ret;
 	if (!switch_partial)
 		__COMPAT_scx_bpf_switch_all();
-	return 0;
+
 	print_cpus();
 	ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
 	if (ret)
 		return ret;
 	timer = bpf_map_lookup_elem(&central_timer, &key);
 	if (!timer)
 		return -ESRCH;
 	bpf_timer_init(timer, &central_timer, CLOCK_MONOTONIC);
 	bpf_timer_set_callback(timer, monitor_timerfn);
 	return bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
 }
 void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei)
@ -388,9 +657,12 @@ SCX_OPS_DEFINE(qmap_ops,
 	       .enqueue			= (void *)qmap_enqueue,
 	       .dequeue			= (void *)qmap_dequeue,
 	       .dispatch		= (void *)qmap_dispatch,
 	       .tick			= (void *)qmap_tick,
 	       .core_sched_before	= (void *)qmap_core_sched_before,
 	       .cpu_release		= (void *)qmap_cpu_release,
 	       .init_task		= (void *)qmap_init_task,
 	       .cpu_online		= (void *)qmap_cpu_online,
 	       .cpu_offline		= (void *)qmap_cpu_offline,
 	       .init			= (void *)qmap_init,
 	       .exit			= (void *)qmap_exit,
 	       .flags			= SCX_OPS_ENQ_LAST,
--- a/scheds/c/scx_qmap.c
+++ b/scheds/c/scx_qmap.c
@ -19,21 +19,34 @@ const char help_fmt[] =
 "\n"
 "See the top-level comment in .bpf.c for more details.\n"
 "\n"
-"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-d PID]\n"
+"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-b COUNT]\n"
-"       [-D LEN] [-p]\n"
+"       [-P] [-E PREFIX] [-d PID] [-D LEN] [-p] [-v]\n"
 "\n"
 "  -s SLICE_US   Override slice duration\n"
 "  -e COUNT      Trigger scx_bpf_error() after COUNT enqueues\n"
 "  -t COUNT      Stall every COUNT'th user thread\n"
 "  -T COUNT      Stall every COUNT'th kernel thread\n"
 "  -l COUNT      Trigger dispatch infinite looping after COUNT dispatches\n"
 "  -b COUNT      Dispatch upto COUNT tasks together\n"
 "  -P            Print out DSQ content to trace_pipe every second, use with -b\n"
 "  -E PREFIX     Expedite consumption of threads w/ matching comm, use with -b\n"
 "                (e.g. match shell on a loaded system)\n"
 "  -d PID        Disallow a process from switching into SCHED_EXT (-1 for self)\n"
 "  -D LEN        Set scx_exit_info.dump buffer length\n"
 "  -p            Switch only tasks on SCHED_EXT policy intead of all\n"
 "  -v            Print libbpf debug messages\n"
 "  -h            Display this help and exit\n";
 static bool verbose;
 static volatile int exit_req;
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
 	if (level == LIBBPF_DEBUG && !verbose)
 		return 0;
 	return vfprintf(stderr, format, args);
 }
 static void sigint_handler(int dummy)
 {
 	exit_req = 1;
@ -45,15 +58,13 @@ int main(int argc, char **argv)
 	struct bpf_link *link;
 	int opt;
 	libbpf_set_print(libbpf_print_fn);
 	signal(SIGINT, sigint_handler);
 	signal(SIGTERM, sigint_handler);
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	skel = SCX_OPS_OPEN(qmap_ops, scx_qmap);
-	skel = scx_qmap__open();
+	while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PE:d:D:pvh")) != -1) {
 	SCX_BUG_ON(!skel, "Failed to open skel");
 	while ((opt = getopt(argc, argv, "s:e:t:T:l:d:D:ph")) != -1) {
 		switch (opt) {
 		case 's':
 			skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
@ -70,6 +81,16 @@ int main(int argc, char **argv)
 		case 'l':
 			skel->rodata->dsp_inf_loop_after = strtoul(optarg, NULL, 0);
 			break;
 		case 'b':
 			skel->rodata->dsp_batch = strtoul(optarg, NULL, 0);
 			break;
 		case 'P':
 			skel->rodata->print_shared_dsq = true;
 			break;
 		case 'E':
 			strncpy(skel->rodata->exp_prefix, optarg,
 				sizeof(skel->rodata->exp_prefix) - 1);
 			break;
 		case 'd':
 			skel->rodata->disallow_tgid = strtol(optarg, NULL, 0);
 			if (skel->rodata->disallow_tgid < 0)
@ -82,12 +103,19 @@ int main(int argc, char **argv)
 			skel->rodata->switch_partial = true;
 			skel->struct_ops.qmap_ops->flags |= __COMPAT_SCX_OPS_SWITCH_PARTIAL;
 			break;
 		case 'v':
 			verbose = true;
 			break;
 		default:
 			fprintf(stderr, help_fmt, basename(argv[0]));
 			return opt != 'h';
 		}
 	}
 	if (!__COMPAT_HAS_DSQ_ITER &&
 	    (skel->rodata->print_shared_dsq || strlen(skel->rodata->exp_prefix)))
 		fprintf(stderr, "kernel doesn't support DSQ iteration\n");
 	SCX_OPS_LOAD(skel, qmap_ops, scx_qmap, uei);
 	link = SCX_OPS_ATTACH(skel, qmap_ops);
@ -95,10 +123,18 @@ int main(int argc, char **argv)
 		long nr_enqueued = skel->bss->nr_enqueued;
 		long nr_dispatched = skel->bss->nr_dispatched;
-		printf("enq=%lu, dsp=%lu, delta=%ld, reenq=%" PRIu64 ", deq=%" PRIu64 ", core=%" PRIu64 "\n",
+		printf("stats  : enq=%lu dsp=%lu delta=%ld reenq=%"PRIu64" deq=%"PRIu64" core=%"PRIu64" exp=%"PRIu64"\n",
 		       nr_enqueued, nr_dispatched, nr_enqueued - nr_dispatched,
 		       skel->bss->nr_reenqueued, skel->bss->nr_dequeued,
-		       skel->bss->nr_core_sched_execed);
+		       skel->bss->nr_core_sched_execed, skel->bss->nr_expedited);
 		if (__COMPAT_has_ksym("scx_bpf_cpuperf_cur"))
 			printf("cpuperf: cur min/avg/max=%u/%u/%u target min/avg/max=%u/%u/%u\n",
 			       skel->bss->cpuperf_min,
 			       skel->bss->cpuperf_avg,
 			       skel->bss->cpuperf_max,
 			       skel->bss->cpuperf_target_min,
 			       skel->bss->cpuperf_target_avg,
 			       skel->bss->cpuperf_target_max);
 		fflush(stdout);
 		sleep(1);
 	}
@ -106,5 +142,9 @@ int main(int argc, char **argv)
 	bpf_link__destroy(link);
 	UEI_REPORT(skel, uei);
 	scx_qmap__destroy(skel);
 	/*
 	 * scx_qmap implements ops.cpu_on/offline() and doesn't need to restart
 	 * on CPU hotplug events.
 	 */
 	return 0;
 }
--- a/scheds/c/scx_simple.bpf.c
+++ b/scheds/c/scx_simple.bpf.c
@ -129,7 +129,6 @@ void BPF_STRUCT_OPS(simple_enable, struct task_struct *p)
 s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)
 {
 	__COMPAT_scx_bpf_switch_all();
 	return scx_bpf_create_dsq(SHARED_DSQ, -1);
 }
--- a/scheds/c/scx_simple.c
+++ b/scheds/c/scx_simple.c
@ -17,13 +17,22 @@ const char help_fmt[] =
 "\n"
 "See the top-level comment in .bpf.c for more details.\n"
 "\n"
-"Usage: %s [-f]\n"
+"Usage: %s [-f] [-v]\n"
 "\n"
 "  -f            Use FIFO scheduling instead of weighted vtime scheduling\n"
 "  -v            Print libbpf debug messages\n"
 "  -h            Display this help and exit\n";
 static bool verbose;
 static volatile int exit_req;
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
 	if (level == LIBBPF_DEBUG && !verbose)
 		return 0;
 	return vfprintf(stderr, format, args);
 }
 static void sigint_handler(int simple)
 {
 	exit_req = 1;
@ -54,20 +63,22 @@ int main(int argc, char **argv)
 	struct scx_simple *skel;
 	struct bpf_link *link;
 	__u32 opt;
 	__u64 ecode;
 	libbpf_set_print(libbpf_print_fn);
 	signal(SIGINT, sigint_handler);
 	signal(SIGTERM, sigint_handler);
 restart:
 	skel = SCX_OPS_OPEN(simple_ops, scx_simple);
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	while ((opt = getopt(argc, argv, "fvh")) != -1) {
 	skel = scx_simple__open();
 	SCX_BUG_ON(!skel, "Failed to open skel");
 	while ((opt = getopt(argc, argv, "fh")) != -1) {
 		switch (opt) {
 		case 'f':
 			skel->rodata->fifo_sched = true;
 			break;
 		case 'v':
 			verbose = true;
 			break;
 		default:
 			fprintf(stderr, help_fmt, basename(argv[0]));
 			return opt != 'h';
@ -87,7 +98,10 @@ int main(int argc, char **argv)
 	}
 	bpf_link__destroy(link);
-	UEI_REPORT(skel, uei);
+	ecode = UEI_REPORT(skel, uei);
 	scx_simple__destroy(skel);
 	if (UEI_ECODE_RESTART(ecode))
 		goto restart;
 	return 0;
 }
--- a/scheds/c/scx_userland.c
+++ b/scheds/c/scx_userland.c
@ -41,6 +41,7 @@ const char help_fmt[] =
 "Usage: %s [-b BATCH]\n"
 "\n"
 "  -b BATCH      The number of tasks to batch when dispatching (default: 8)\n"
 "  -v            Print libbpf debug messages\n"
 "  -h            Display this help and exit\n";
 /* Defined in UAPI */
@ -49,6 +50,7 @@ const char help_fmt[] =
 /* Number of tasks to batch when dispatching to user space. */
 static __u32 batch_size = 8;
 static bool verbose;
 static volatile int exit_req;
 static int enqueued_fd, dispatched_fd;
@ -96,6 +98,13 @@ static int pid_max;
 static double min_vruntime;
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
 	if (level == LIBBPF_DEBUG && !verbose)
 		return 0;
 	return vfprintf(stderr, format, args);
 }
 static void sigint_handler(int userland)
 {
 	exit_req = 1;
@ -337,7 +346,7 @@ static void print_example_warning(const char *sched)
 	printf(warning_fmt, sched);
 }
-static void bootstrap(int argc, char **argv)
+static void pre_bootstrap(int argc, char **argv)
 {
 	int err;
 	__u32 opt;
@ -349,9 +358,9 @@ static void bootstrap(int argc, char **argv)
 	if (err)
 		exit(err);
 	libbpf_set_print(libbpf_print_fn);
 	signal(SIGINT, sigint_handler);
 	signal(SIGTERM, sigint_handler);
 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 	/*
 	 * Enforce that the user scheduler task is managed by sched_ext. The
@ -363,11 +372,14 @@ static void bootstrap(int argc, char **argv)
 	err = syscall(__NR_sched_setscheduler, getpid(), SCHED_EXT, &sched_param);
 	SCX_BUG_ON(err, "Failed to set scheduler to SCHED_EXT");
-	while ((opt = getopt(argc, argv, "b:ph")) != -1) {
+	while ((opt = getopt(argc, argv, "b:vh")) != -1) {
 		switch (opt) {
 		case 'b':
 			batch_size = strtoul(optarg, NULL, 0);
 			break;
 		case 'v':
 			verbose = true;
 			break;
 		default:
 			fprintf(stderr, help_fmt, basename(argv[0]));
 			exit(opt != 'h');
@ -381,9 +393,11 @@ static void bootstrap(int argc, char **argv)
 	 */
 	err = mlockall(MCL_CURRENT | MCL_FUTURE);
 	SCX_BUG_ON(err, "Failed to prefault and lock address space");
 }
-	skel = scx_userland__open();
+static void bootstrap(char *comm)
-	SCX_BUG_ON(!skel, "Failed to open skel");
+{
 	skel = SCX_OPS_OPEN(userland_ops, scx_userland);
 	skel->rodata->num_possible_cpus = libbpf_num_possible_cpus();
 	assert(skel->rodata->num_possible_cpus > 0);
@ -399,7 +413,7 @@ static void bootstrap(int argc, char **argv)
 	SCX_BUG_ON(spawn_stats_thread(), "Failed to spawn stats thread");
-	print_example_warning(basename(argv[0]));
+	print_example_warning(basename(comm));
 	ops_link = SCX_OPS_ATTACH(skel, userland_ops);
 }
@ -428,12 +442,19 @@ static void sched_main_loop(void)
 int main(int argc, char **argv)
 {
-	bootstrap(argc, argv);
+	__u64 ecode;
 	pre_bootstrap(argc, argv);
 restart:
 	bootstrap(argv[0]);
 	sched_main_loop();
 	exit_req = 1;
 	bpf_link__destroy(ops_link);
-	UEI_REPORT(skel, uei);
+	ecode = UEI_REPORT(skel, uei);
 	scx_userland__destroy(skel);
 	if (UEI_ECODE_RESTART(ecode))
 		goto restart;
 	return 0;
 }
--- a/scheds/include/scx/common.bpf.h
+++ b/scheds/include/scx/common.bpf.h
@ -28,9 +28,54 @@ static inline void ___vmlinux_h_sanity_check___(void)
 		       "bpftool generated vmlinux.h is missing high bits for 64bit enums, upgrade clang and pahole");
 }
 s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
 s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
 void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym;
 void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym;
 u32 scx_bpf_dispatch_nr_slots(void) __ksym;
 void scx_bpf_dispatch_cancel(void) __ksym;
 bool scx_bpf_consume(u64 dsq_id) __ksym;
 bool __scx_bpf_consume_task(unsigned long it, struct task_struct *p) __ksym __weak;
 u32 scx_bpf_reenqueue_local(void) __ksym;
 void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
 s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
 void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
 int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, bool rev) __ksym __weak;
 struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak;
 void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak;
 void scx_bpf_exit_bstr(s64 exit_code, char *fmt, unsigned long long *data, u32 data__sz) __ksym __weak;
 void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym;
-void scx_bpf_exit_bstr(s64 exit_code, char *fmt,
+u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak;
-		       unsigned long long *data, u32 data__sz) __ksym __weak;
+u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak;
 void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak;
 u32 scx_bpf_nr_cpu_ids(void) __ksym __weak;
 const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak;
 const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak;
 void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak;
 const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym;
 const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym;
 void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym;
 bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym;
 s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
 s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
 bool scx_bpf_task_running(const struct task_struct *p) __ksym;
 s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
 struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym;
 /*
 * Use the following as @it when calling scx_bpf_consume_task() from whitin
 * bpf_for_each() loops.
 */
 #define BPF_FOR_EACH_ITER	(&___it)
 /* hopefully temporary wrapper to work around BPF restriction */
 static inline bool scx_bpf_consume_task(struct bpf_iter_scx_dsq *it,
 					struct task_struct *p)
 {
 	unsigned long ptr;
 	bpf_probe_read_kernel(&ptr, sizeof(ptr), it);
 	return __scx_bpf_consume_task(ptr, p);
 }
 static inline __attribute__((format(printf, 1, 2)))
 void ___scx_bpf_exit_format_checker(const char *fmt, ...) {}
@ -40,18 +85,18 @@ void ___scx_bpf_exit_format_checker(const char *fmt, ...) {}
 * bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
 * refer to the initialized list of inputs to the bstr kfunc.
 */
-#define scx_bpf_exit_preamble(fmt, args...)				\
+#define scx_bpf_exit_preamble(fmt, args...)					\
-	static char ___fmt[] = fmt;					\
+	static char ___fmt[] = fmt;						\
-	/*								\
+	/*									\
-	 * Note that __param[] must have at least one			\
+	 * Note that __param[] must have at least one				\
-	 * element to keep the verifier happy.				\
+	 * element to keep the verifier happy.					\
-	 */								\
+	 */									\
-	unsigned long long ___param[___bpf_narg(args) ?: 1] = {};	\
+	unsigned long long ___param[___bpf_narg(args) ?: 1] = {};		\
-									\
+										\
-	_Pragma("GCC diagnostic push")					\
+	_Pragma("GCC diagnostic push")						\
-	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")		\
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")			\
-	___bpf_fill(___param, args);					\
+	___bpf_fill(___param, args);						\
-	_Pragma("GCC diagnostic pop")					\
+	_Pragma("GCC diagnostic pop")						\
 /*
 * scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments
@ -78,30 +123,6 @@ void ___scx_bpf_exit_format_checker(const char *fmt, ...) {}
 	___scx_bpf_exit_format_checker(fmt, ##args);				\
 })
 s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
 bool scx_bpf_consume(u64 dsq_id) __ksym;
 void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym;
 void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym;
 u32 scx_bpf_dispatch_nr_slots(void) __ksym;
 void scx_bpf_dispatch_cancel(void) __ksym;
 void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
 s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
 bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym;
 s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
 s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
 const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym;
 const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym;
 void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym;
 void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
 s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
 bool scx_bpf_task_running(const struct task_struct *p) __ksym;
 s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
 struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym;
 u32 scx_bpf_reenqueue_local(void) __ksym;
 u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym;
 u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym;
 void scx_bpf_cpuperf_set(u32 cpu, u32 perf) __ksym __weak;
 #define BPF_STRUCT_OPS(name, args...)						\
 SEC("struct_ops/"#name)								\
 BPF_PROG(name, ##args)
@ -156,7 +177,8 @@ BPF_PROG(name, ##args)
 * be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of
 * `MEMBER_VPTR(ptr, ->member)`.
 */
-#define MEMBER_VPTR(base, member) (typeof((base) member) *)({			\
+#define MEMBER_VPTR(base, member) (typeof((base) member) *)			\
 ({										\
 	u64 __base = (u64)&(base);						\
 	u64 __addr = (u64)&((base) member) - __base;				\
 	_Static_assert(sizeof(base) >= sizeof((base) member),			\
@ -186,18 +208,19 @@ BPF_PROG(name, ##args)
 * size of the array to compute the max, which will result in rejection by
 * the verifier.
 */
-#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *)({	  \
+#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *)				\
-	u64 __base = (u64)arr;				  \
+({										\
-	u64 __addr = (u64)&(arr[i]) - __base;		  \
+	u64 __base = (u64)arr;							\
-	asm volatile (					  \
+	u64 __addr = (u64)&(arr[i]) - __base;					\
-		"if %0 <= %[max] goto +2\n"		  \
+	asm volatile (								\
-		"%0 = 0\n"				  \
+		"if %0 <= %[max] goto +2\n"					\
-		"goto +1\n"				  \
+		"%0 = 0\n"							\
-		"%0 += %1\n"				  \
+		"goto +1\n"							\
-		: "+r"(__addr)				  \
+		"%0 += %1\n"							\
-		: "r"(__base),				  \
+		: "+r"(__addr)							\
-		  [max]"r"(sizeof(arr[0]) * ((n) - 1)));  \
+		: "r"(__base),							\
-	__addr;						  \
+		  [max]"r"(sizeof(arr[0]) * ((n) - 1)));			\
 	__addr;									\
 })
 /*
@ -227,7 +250,7 @@ int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
 struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
-extern void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym;
+void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym;
 #define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL)
 /* task */
--- a/scheds/include/scx/compat.bpf.h
+++ b/scheds/include/scx/compat.bpf.h
@ -18,13 +18,15 @@
 /*
 * %SCX_KICK_IDLE is a later addition. To support both before and after, use
 * %__COMPAT_SCX_KICK_IDLE which becomes 0 on kernels which don't support it.
 * Users can use %SCX_KICK_IDLE directly in the future.
 */
 #define __COMPAT_SCX_KICK_IDLE							\
 	__COMPAT_ENUM_OR_ZERO(enum scx_kick_flags, SCX_KICK_IDLE)
 /*
 * scx_switch_all() was replaced by %SCX_OPS_SWITCH_PARTIAL. See
- * %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h.
+ * %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h. This can be dropped in the
 * future.
 */
 void scx_bpf_switch_all(void) __ksym __weak;
@ -34,6 +36,67 @@ static inline void __COMPAT_scx_bpf_switch_all(void)
 		scx_bpf_switch_all();
 }
 /*
 * scx_bpf_exit() is a new addition. Fall back to scx_bpf_error() if
 * unavailable. Users can use scx_bpf_exit() directly in the future.
 */
 #define __COMPAT_scx_bpf_exit(code, fmt, args...)				\
 ({										\
 	if (bpf_ksym_exists(scx_bpf_exit_bstr))					\
 		scx_bpf_exit((code), fmt, args);				\
 	else									\
 		scx_bpf_error(fmt, args);					\
 })
 /*
 * scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. No good
 * way to noop these kfuncs. Provide a test macro. Users can assume existence in
 * the future.
 */
 #define __COMPAT_HAS_CPUMASKS							\
 	bpf_ksym_exists(scx_bpf_nr_cpu_ids)
 /*
 * cpuperf is new. The followings become noop on older kernels. Callers can be
 * updated to call cpuperf kfuncs directly in the future.
 */
 static inline u32 __COMPAT_scx_bpf_cpuperf_cap(s32 cpu)
 {
 	if (bpf_ksym_exists(scx_bpf_cpuperf_cap))
 		return scx_bpf_cpuperf_cap(cpu);
 	else
 		return 1024;
 }
 static inline u32 __COMPAT_scx_bpf_cpuperf_cur(s32 cpu)
 {
 	if (bpf_ksym_exists(scx_bpf_cpuperf_cur))
 		return scx_bpf_cpuperf_cur(cpu);
 	else
 		return 1024;
 }
 static inline void __COMPAT_scx_bpf_cpuperf_set(s32 cpu, u32 perf)
 {
 	if (bpf_ksym_exists(scx_bpf_cpuperf_set))
 		return scx_bpf_cpuperf_set(cpu, perf);
 }
 /*
 * Iteration and scx_bpf_consume_task() are new. The following become noop on
 * older kernels. The users can switch to bpf_for_each(scx_dsq) and directly
 * call scx_bpf_consume_task() in the future.
 */
 #define __COMPAT_DSQ_FOR_EACH(p, dsq_id, flags)					\
 	if (bpf_ksym_exists(bpf_iter_scx_dsq_new))				\
 		bpf_for_each(scx_dsq, (p), (dsq_id), (flags))
 static inline bool __COMPAT_scx_bpf_consume_task(struct bpf_iter_scx_dsq *it,
 						 struct task_struct *p)
 {
 	return false;
 }
 /*
 * Define sched_ext_ops. This may be expanded to define multiple variants for
 * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
--- a/scheds/include/scx/compat.h
+++ b/scheds/include/scx/compat.h
@ -8,6 +8,9 @@
 #define __SCX_COMPAT_H
 #include <bpf/btf.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <unistd.h>
 struct btf *__COMPAT_vmlinux_btf __attribute__((weak));
@ -69,6 +72,12 @@ static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v
 	__val;									\
 })
 static inline bool __COMPAT_has_ksym(const char *ksym)
 {
 	__COMPAT_load_vmlinux_btf();
 	return btf__find_by_name(__COMPAT_vmlinux_btf, ksym) >= 0;
 }
 static inline bool __COMPAT_struct_has_field(const char *type, const char *field)
 {
 	const struct btf_type *t;
@ -101,27 +110,79 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field
 * An ops flag, %SCX_OPS_SWITCH_PARTIAL, replaced scx_bpf_switch_all() which had
 * to be called from ops.init(). To support both before and after, use both
 * %__COMPAT_SCX_OPS_SWITCH_PARTIAL and %__COMPAT_scx_bpf_switch_all() defined
- * in compat.bpf.h.
+ * in compat.bpf.h. Users can switch to directly using %SCX_OPS_SWITCH_PARTIAL
 * in the future.
 */
 #define __COMPAT_SCX_OPS_SWITCH_PARTIAL						\
 	__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
 /*
 * scx_bpf_nr_cpu_ids(), scx_bpf_get_possible/online_cpumask() are new. Users
 * will be able to assume existence in the future.
 */
 #define __COMPAT_HAS_CPUMASKS							\
 	__COMPAT_has_ksym("scx_bpf_nr_cpu_ids")
 /*
 * DSQ iterator is new. Users will be able to assume existence in the future.
 */
 #define __COMPAT_HAS_DSQ_ITER							\
 	__COMPAT_has_ksym("bpf_iter_scx_dsq_new")
 static inline long scx_hotplug_seq(void)
 {
 	int fd;
 	char buf[32];
 	ssize_t len;
 	long val;
 	fd = open("/sys/kernel/sched_ext/hotplug_seq", O_RDONLY);
 	if (fd < 0)
 		return -ENOENT;
 	len = read(fd, buf, sizeof(buf) - 1);
 	SCX_BUG_ON(len <= 0, "read failed (%ld)", len);
 	buf[len] = 0;
 	close(fd);
 	val = strtoul(buf, NULL, 10);
 	SCX_BUG_ON(val < 0, "invalid num hotplug events: %lu", val);
 	return val;
 }
 /*
 * struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE()
 * is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load
 * and attach it, backward compatibility is automatically maintained where
 * reasonable.
 *
- * - sched_ext_ops.exit_dump_len was added later. On kernels which don't support
+ * - ops.tick(): Ignored on older kernels with a warning.
- *   it, the value is ignored and a warning is triggered if the value is
+ * - ops.exit_dump_len: Cleared to zero on older kernels with a warning.
- *   requested to be non-zero.
+ * - ops.hotplug_seq: Ignored on older kernels.
 */
 #define SCX_OPS_OPEN(__ops_name, __scx_name) ({					\
 	struct __scx_name *__skel;						\
 										\
 	__skel = __scx_name##__open();						\
 	SCX_BUG_ON(!__skel, "Could not open " #__scx_name);			\
 										\
 	if (__COMPAT_struct_has_field("sched_ext_ops", "hotplug_seq"))		\
 		__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq();	\
 	__skel; 								\
 })
 #define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({		\
 	UEI_SET_SIZE(__skel, __ops_name, __uei_name);				\
-	if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") &&	\
+	if (!__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") &&	\
 	    (__skel)->struct_ops.__ops_name->exit_dump_len) {			\
 		fprintf(stderr, "WARNING: kernel doesn't support setting exit dump len\n"); \
-		(__skel)->struct_ops.__ops_name->exit_dump_len = 0;	\
+		(__skel)->struct_ops.__ops_name->exit_dump_len = 0;		\
 	}									\
 	if (!__COMPAT_struct_has_field("sched_ext_ops", "tick") &&		\
 	    (__skel)->struct_ops.__ops_name->tick) {				\
 		fprintf(stderr, "WARNING: kernel doesn't support ops.tick()\n"); \
 		(__skel)->struct_ops.__ops_name->tick = NULL;			\
 	}									\
 	SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel");	\
 })
--- a/scheds/include/scx/user_exit_info.h
+++ b/scheds/include/scx/user_exit_info.h
@ -77,7 +77,35 @@ struct user_exit_info {
 	if (__uei->msg[0] != '\0')						\
 		fprintf(stderr, " (%s)", __uei->msg);				\
 	fputs("\n", stderr);							\
 	__uei->exit_code;							\
 })
 /*
 * We can't import vmlinux.h while compiling user C code. Let's duplicate
 * scx_exit_code definition.
 */
 enum scx_exit_code {
 	/* Reasons */
 	SCX_ECODE_RSN_HOTPLUG		= 1LLU << 32,
 	/* Actions */
 	SCX_ECODE_ACT_RESTART		= 1LLU << 48,
 };
 enum uei_ecode_mask {
 	UEI_ECODE_USER_MASK		= ((1LLU << 32) - 1),
 	UEI_ECODE_SYS_RSN_MASK		= ((1LLU << 16) - 1) << 32,
 	UEI_ECODE_SYS_ACT_MASK		= ((1LLU << 16) - 1) << 48,
 };
 /*
 * These macro interpret the ecode returned from UEI_REPORT().
 */
 #define UEI_ECODE_USER(__ecode)		((__ecode) & UEI_ECODE_USER_MASK)
 #define UEI_ECODE_SYS_RSN(__ecode)	((__ecode) & UEI_ECODE_SYS_RSN_MASK)
 #define UEI_ECODE_SYS_ACT(__ecode)	((__ecode) & UEI_ECODE_SYS_ACT_MASK)
 #define UEI_ECODE_RESTART(__ecode)	(UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)
 #endif	/* __bpf__ */
 #endif	/* __USER_EXIT_INFO_H */
--- a/scheds/include/vmlinux/vmlinux-v6.9-g73f4013eb1eb.h
+++ b/scheds/include/vmlinux/vmlinux-v6.9-g73f4013eb1eb.h
--- a/scheds/include/vmlinux/vmlinux.h
+++ b/scheds/include/vmlinux/vmlinux.h
@ -1 +1 @@
-vmlinux-v6.9-g5dc95302301f.h
+vmlinux-v6.9-g73f4013eb1eb.h
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@ -763,8 +763,8 @@ void BPF_STRUCT_OPS(layered_running, struct task_struct *p)
 		}
 	}
-	if (bpf_ksym_exists(scx_bpf_cpuperf_set) && layer->perf > 0)
+	if (layer->perf > 0)
-		scx_bpf_cpuperf_set(cpu, layer->perf);
+		__COMPAT_scx_bpf_cpuperf_set(cpu, layer->perf);
 	cctx->maybe_idle = false;
 }
--- a/scheds/rust/scx_rusty/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_rusty/src/bpf/main.bpf.c
@ -1421,18 +1421,12 @@ static s32 initialize_cpu(s32 cpu)
 void BPF_STRUCT_OPS(rusty_cpu_online, s32 cpu)
 {
-	if (bpf_ksym_exists(scx_bpf_exit_bstr))
+	__COMPAT_scx_bpf_exit(RUSTY_EXIT_HOTPLUG, "CPU %d went online", cpu);
 		scx_bpf_exit(RUSTY_EXIT_HOTPLUG, "CPU %d went online", cpu);
 	else
 		scx_bpf_error("CPU %d went online", cpu);
 }
 void BPF_STRUCT_OPS(rusty_cpu_offline, s32 cpu)
 {
-	if (bpf_ksym_exists(scx_bpf_exit_bstr))
+	__COMPAT_scx_bpf_exit(RUSTY_EXIT_HOTPLUG, "CPU %d went offline", cpu);
 		scx_bpf_exit(RUSTY_EXIT_HOTPLUG, "CPU %d went offline", cpu);
 	else
 		scx_bpf_error("CPU %d went offline", cpu);
 }
 s32 BPF_STRUCT_OPS_SLEEPABLE(rusty_init)
		`@ -1 +1 @@`
			`vmlinux-v6.9-g5dc95302301f.h`				`vmlinux-v6.9-g73f4013eb1eb.h`