scx_lavd: split main.bpf.c into multiple files

As the main.bpf.c file grows, it gets hard to maintain. So, split it into multiple logical files. There is no functional change. Signed-off-by: Changwoo Min <changwoo@igalia.com>
2024-11-24 20:00:22 +00:00 · 2024-10-05 00:25:40 +09:00 · 2024-10-05 00:25:40 +09:00 · 7c5c83a3a2
commit 7c5c83a3a2
parent db0f83ce89
9 changed files with 1867 additions and 1800 deletions
--- a/scheds/rust/scx_lavd/src/bpf/intf.h
+++ b/scheds/rust/scx_lavd/src/bpf/intf.h
@ -47,56 +47,11 @@ extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;
 /*
 * common constants
 */
-enum consts {
+enum {
 	CLOCK_BOOTTIME			= 7,
 	CACHELINE_SIZE			= 64,
 	NSEC_PER_USEC			= 1000ULL,
 	NSEC_PER_MSEC			= (1000ULL * NSEC_PER_USEC),
 	LAVD_TIME_ONE_SEC		= (1000ULL * NSEC_PER_MSEC),
 	LAVD_TIME_INFINITY_NS		= SCX_SLICE_INF,
 	LAVD_MAX_RETRY			= 4,
 	LAVD_TARGETED_LATENCY_NS	= (20ULL * NSEC_PER_MSEC),
 	LAVD_SLICE_MIN_NS		= (300ULL * NSEC_PER_USEC), /* min time slice */
 	LAVD_SLICE_MAX_NS		= (3ULL * NSEC_PER_MSEC), /* max time slice */
 	LAVD_SLICE_UNDECIDED		= SCX_SLICE_INF,
 	LAVD_LC_FREQ_MAX		= 1000000,
 	LAVD_LC_RUNTIME_MAX		= LAVD_TARGETED_LATENCY_NS,
 	LAVD_LC_RUNTIME_SHIFT		= 15,
 	LAVD_LC_WAKEUP_FT		= 30,
 	LAVD_LC_KTHREAD_FT		= 30,
 	LAVD_SLICE_BOOST_MAX_FT		= 3, /* maximum additional 3x of slice */
 	LAVD_SLICE_BOOST_MAX_STEP	= 6, /* 6 slice exhausitions in a row */
 	LAVD_NEW_PROC_PENALITY		= 5,
 	LAVD_GREEDY_RATIO_NEW		= (1000 * LAVD_NEW_PROC_PENALITY),
 	LAVD_CPU_UTIL_MAX		= 1000, /* 100.0% */
 	LAVD_CPU_UTIL_MAX_FOR_CPUPERF	= 850, /* 85.0% */
 	LAVD_CPU_ID_HERE		= ((u32)-2),
 	LAVD_CPU_ID_NONE		= ((u32)-1),
 	LAVD_CPU_ID_MAX			= 512,
 	LAVD_PREEMPT_KICK_MARGIN	= (1ULL * NSEC_PER_MSEC),
 	LAVD_PREEMPT_TICK_MARGIN	= (100ULL * NSEC_PER_USEC),
 	LAVD_SYS_STAT_INTERVAL_NS	= (50ULL * NSEC_PER_MSEC),
 	LAVD_SYS_STAT_DECAY_TIMES	= (2ULL * LAVD_TIME_ONE_SEC) / LAVD_SYS_STAT_INTERVAL_NS,
 	LAVD_CC_PER_CORE_MAX_CTUIL	= 500, /* maximum per-core CPU utilization */
 	LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */
 	LAVD_CC_NR_ACTIVE_MIN		= 1, /* num of mininum active cores */
 	LAVD_CC_NR_OVRFLW		= 1, /* num of overflow cores */
 	LAVD_CC_CPU_PIN_INTERVAL	= (1ULL * LAVD_TIME_ONE_SEC),
 	LAVD_CC_CPU_PIN_INTERVAL_DIV	= (LAVD_CC_CPU_PIN_INTERVAL /
 					   LAVD_SYS_STAT_INTERVAL_NS),
 	LAVD_AP_HIGH_UTIL		= 700, /* balanced mode when 10% < cpu util <= 40%,
 						  performance mode when cpu util > 40% */
 	LAVD_CPDOM_MAX_NR		= 32, /* maximum number of compute domain */
 	LAVD_CPDOM_MAX_DIST		= 4,  /* maximum distance from one compute domain to another */
 	LAVD_CPDOM_STARV_NS		= (5ULL * NSEC_PER_MSEC),
 	LAVD_STATUS_STR_LEN		= 5, /* {LR: Latency-critical, Regular}
 						{HI: performance-Hungry, performance-Insensitive}
@ -139,100 +94,6 @@ struct sys_stat {
 	volatile u64	nr_lc_on_big;	/* latency-critical tasks scheduled on big core */
 };
 /*
 * Compute domain context
 * - system > numa node > llc domain > compute domain per core type (P or E)
 */
 struct cpdom_ctx {
 	u64	id;				    /* id of this compute domain (== dsq_id) */
 	u64	alt_id;				    /* id of the closest compute domain of alternative type (== dsq id) */
 	u64	last_consume_clk;		    /* when the associated DSQ was consumed */
 	u8	is_big;				    /* is it a big core or little core? */
 	u8	is_active;			    /* if this compute domain is active */
 	u8	nr_neighbors[LAVD_CPDOM_MAX_DIST];  /* number of neighbors per distance */
 	u64	neighbor_bits[LAVD_CPDOM_MAX_DIST]; /* bitmask of neighbor bitmask per distance */
 	u64	__cpumask[LAVD_CPU_ID_MAX/64];	    /* cpumasks belongs to this compute domain */
 } __attribute__((aligned(CACHELINE_SIZE)));
 /*
 * CPU context
 */
 struct cpu_ctx {
 	/* 
 	 * Information used to keep track of CPU utilization
 	 */
 	volatile u64	util;		/* average of the CPU utilization */
 	volatile u64	idle_total;	/* total idle time so far */
 	volatile u64	idle_start_clk;	/* when the CPU becomes idle */
 	/*
 	 * Information used to keep track of load
 	 */
 	volatile u64	load_actual;	/* actual load of runnable tasks */
 	volatile u64	load_run_time_ns; /* total runtime of runnable tasks */
 	volatile u64	tot_svc_time;	/* total service time on a CPU */
 	volatile u64	last_kick_clk;	/* when the CPU was kicked */
 	/*
 	 * Information for cpu hotplug
 	 */
 	u64		online_clk;	/* when a CPU becomes online */
 	u64		offline_clk;	/* when a CPU becomes offline */
 	/*
 	 * Information used to keep track of latency criticality
 	 */
 	volatile u32	max_lat_cri;	/* maximum latency criticality */
 	volatile u32	sum_lat_cri;	/* sum of latency criticality */
 	volatile u32	nr_sched;	/* number of schedules */
 	/*
 	 * Information used to keep track of performance criticality
 	 */
 	volatile u64	sum_perf_cri;	/* sum of performance criticality */
 	volatile u64	min_perf_cri;	/* mininum performance criticality */
 	volatile u64	max_perf_cri;	/* maximum performance criticality */
 	/*
 	 * Information of a current running task for preemption
 	 */
 	volatile u64	stopping_tm_est_ns; /* estimated stopping time */
 	volatile u16	lat_cri;	/* latency criticality */
 	volatile u8	is_online;	/* is this CPU online? */
 	s32		cpu_id;		/* cpu id */
 	/*
 	 * Information for CPU frequency scaling
 	 */
 	u32		cpuperf_cur;	/* CPU's current performance target */
 	u32		cpuperf_task;	/* task's CPU performance target */
 	u32		cpuperf_avg;	/* EWMA of task's CPU performance target */
 	/*
 	 * Fields for core compaction
 	 *
 	 */
 	u16		capacity;	/* CPU capacity based on 1000 */
 	u8		big_core;	/* is it a big core? */
 	u8		turbo_core;	/* is it a turbo core? */
 	u8		cpdom_id;	/* compute domain id (== dsq_id) */
 	u8		cpdom_alt_id;	/* compute domain id of anternative type (== dsq_id) */
 	u8		cpdom_poll_pos;	/* index to check if a DSQ of a compute domain is starving */
 	struct bpf_cpumask __kptr *tmp_a_mask;	/* temporary cpu mask */
 	struct bpf_cpumask __kptr *tmp_o_mask;	/* temporary cpu mask */
 	struct bpf_cpumask __kptr *tmp_t_mask;	/* temporary cpu mask */
 	struct bpf_cpumask __kptr *tmp_t2_mask;	/* temporary cpu mask */
 	/*
 	 * Information for statistics.
 	 */
 	volatile u32	nr_migration;	/* number of migrations */
 	volatile u32	nr_preemption;	/* number of migrations */
 	volatile u32	nr_greedy;	/* number of greedy tasks scheduled */
 	volatile u32	nr_perf_cri;
 	volatile u32	nr_lat_cri;
 } __attribute__((aligned(CACHELINE_SIZE)));
 /*
 * Task context
 */
--- a/scheds/rust/scx_lavd/src/bpf/introspec.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/introspec.bpf.c
@ -0,0 +1,116 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Copyright (c) 2023, 2024 Valve Corporation.
 * Author: Changwoo Min <changwoo@igalia.com>
 */
 /*
 * To be included to the main.bpf.c
 */
 /*
 * Introspection commands
 */
 struct introspec intrspc;
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
 	__uint(max_entries, 16 * 1024 /* 16 KB */);
 } introspec_msg SEC(".maps");
 static __always_inline
 int submit_task_ctx(struct task_struct *p, struct task_ctx *taskc, u32 cpu_id)
 {
 	struct sys_stat *stat_cur = get_sys_stat_cur();
 	struct cpu_ctx *cpuc;
 	struct msg_task_ctx *m;
 	cpuc = get_cpu_ctx_id(cpu_id);
 	if (!cpuc)
 		return -EINVAL;
 	m = bpf_ringbuf_reserve(&introspec_msg, sizeof(*m), 0);
 	if (!m)
 		return -ENOMEM;
 	m->hdr.kind = LAVD_MSG_TASKC;
 	m->taskc_x.pid = p->pid;
 	memcpy(m->taskc_x.comm, p->comm, TASK_COMM_LEN);
 	m->taskc_x.static_prio = get_nice_prio(p);
 	m->taskc_x.cpu_util = cpuc->util / 10;
 	m->taskc_x.cpu_id = cpu_id;
 	m->taskc_x.avg_lat_cri = stat_cur->avg_lat_cri;
 	m->taskc_x.thr_perf_cri = stat_cur->thr_perf_cri;
 	m->taskc_x.nr_active = stat_cur->nr_active;
 	m->taskc_x.cpuperf_cur = cpuc->cpuperf_cur;
 	m->taskc_x.stat[0] = is_lat_cri(taskc, stat_cur) ? 'L' : 'R';
 	m->taskc_x.stat[1] = is_perf_cri(taskc, stat_cur) ? 'H' : 'I';
 	m->taskc_x.stat[2] = cpuc->big_core ? 'B' : 'T';
 	m->taskc_x.stat[3] = is_greedy(taskc) ? 'G' : 'E';
 	m->taskc_x.stat[4] = taskc->victim_cpu >= 0 ? 'P' : 'N';
 	m->taskc_x.stat[5] = '\0';
 	memcpy(&m->taskc, taskc, sizeof(m->taskc));
 	bpf_ringbuf_submit(m, 0);
 	return 0;
 }
 static void proc_introspec_sched_n(struct task_struct *p,
 				   struct task_ctx *taskc, u32 cpu_id)
 {
 	u64 cur_nr, prev_nr;
 	int i;
 	/* introspec_arg is the number of schedules remaining */
 	cur_nr = intrspc.arg;
 	/*
 	 * Note that the bounded retry (@LAVD_MAX_RETRY) does *not *guarantee*
 	 * to decrement introspec_arg. However, it is unlikely to happen. Even
 	 * if it happens, it is nothing but a matter of delaying a message
 	 * delivery. That's because other threads will try and succeed the CAS
 	 * operation eventually. So this is good enough. ;-)
 	 */
 	for (i = 0; cur_nr > 0 && i < LAVD_MAX_RETRY; i++) {
 		prev_nr = __sync_val_compare_and_swap(
 				&intrspc.arg, cur_nr, cur_nr - 1);
 		/* CAS success: submit a message and done */
 		if (prev_nr == cur_nr) {
 			submit_task_ctx(p, taskc, cpu_id);
 			break;
 		}
 		/* CAS failure: retry */
 		cur_nr = prev_nr;
 	}
 }
 static void proc_introspec_pid(struct task_struct *p, struct task_ctx *taskc,
 			       u32 cpu_id)
 {
 	if (p->pid == intrspc.arg)
 		submit_task_ctx(p, taskc, cpu_id);
 }
 static void try_proc_introspec_cmd(struct task_struct *p,
 				   struct task_ctx *taskc, u32 cpu_id)
 {
 	if (LAVD_CPU_ID_HERE == cpu_id)
 		cpu_id = bpf_get_smp_processor_id();
 	switch(intrspc.cmd) {
 	case LAVD_CMD_SCHED_N:
 		proc_introspec_sched_n(p, taskc, cpu_id);
 		break;
 	case LAVD_CMD_NOP:
 		/* do nothing */
 		break;
 	default:
 		scx_bpf_error("Unknown introspec command: %d", intrspc.cmd);
 		break;
 	}
 }
--- a/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h
+++ b/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h
@ -0,0 +1,158 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Copyright (c) 2023, 2024 Valve Corporation.
 * Author: Changwoo Min <changwoo@igalia.com>
 */
 #ifndef __LAVD_H
 #define __LAVD_H
 /*
 * common constants
 */
 enum consts_internal  {
 	CLOCK_BOOTTIME			= 7,
 	CACHELINE_SIZE			= 64,
 	NSEC_PER_USEC			= 1000ULL,
 	NSEC_PER_MSEC			= (1000ULL * NSEC_PER_USEC),
 	LAVD_TIME_ONE_SEC		= (1000ULL * NSEC_PER_MSEC),
 	LAVD_TIME_INFINITY_NS		= SCX_SLICE_INF,
 	LAVD_MAX_RETRY			= 4,
 	LAVD_TARGETED_LATENCY_NS	= (20ULL * NSEC_PER_MSEC),
 	LAVD_SLICE_MIN_NS		= (300ULL * NSEC_PER_USEC), /* min time slice */
 	LAVD_SLICE_MAX_NS		= (3ULL * NSEC_PER_MSEC), /* max time slice */
 	LAVD_SLICE_UNDECIDED		= SCX_SLICE_INF,
 	LAVD_LC_FREQ_MAX		= 1000000,
 	LAVD_LC_RUNTIME_MAX		= LAVD_TARGETED_LATENCY_NS,
 	LAVD_LC_RUNTIME_SHIFT		= 15,
 	LAVD_LC_WAKEUP_FT		= 30,
 	LAVD_LC_KTHREAD_FT		= 30,
 	LAVD_SLICE_BOOST_MAX_FT		= 3, /* maximum additional 3x of slice */
 	LAVD_SLICE_BOOST_MAX_STEP	= 6, /* 6 slice exhausitions in a row */
 	LAVD_NEW_PROC_PENALITY		= 5,
 	LAVD_GREEDY_RATIO_NEW		= (1000 * LAVD_NEW_PROC_PENALITY),
 	LAVD_CPU_UTIL_MAX		= 1000, /* 100.0% */
 	LAVD_CPU_UTIL_MAX_FOR_CPUPERF	= 850, /* 85.0% */
 	LAVD_CPU_ID_HERE		= ((u32)-2),
 	LAVD_CPU_ID_NONE		= ((u32)-1),
 	LAVD_PREEMPT_KICK_MARGIN	= (1ULL * NSEC_PER_MSEC),
 	LAVD_PREEMPT_TICK_MARGIN	= (100ULL * NSEC_PER_USEC),
 	LAVD_SYS_STAT_INTERVAL_NS	= (50ULL * NSEC_PER_MSEC),
 	LAVD_SYS_STAT_DECAY_TIMES	= (2ULL * LAVD_TIME_ONE_SEC) / LAVD_SYS_STAT_INTERVAL_NS,
 	LAVD_CC_PER_CORE_MAX_CTUIL	= 500, /* maximum per-core CPU utilization */
 	LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */
 	LAVD_CC_NR_ACTIVE_MIN		= 1, /* num of mininum active cores */
 	LAVD_CC_NR_OVRFLW		= 1, /* num of overflow cores */
 	LAVD_CC_CPU_PIN_INTERVAL	= (1ULL * LAVD_TIME_ONE_SEC),
 	LAVD_CC_CPU_PIN_INTERVAL_DIV	= (LAVD_CC_CPU_PIN_INTERVAL /
 					   LAVD_SYS_STAT_INTERVAL_NS),
 	LAVD_AP_HIGH_UTIL		= 700, /* balanced mode when 10% < cpu util <= 40%,
 						  performance mode when cpu util > 40% */
 	LAVD_CPDOM_STARV_NS		= (5ULL * NSEC_PER_MSEC),
 };
 /*
 * Compute domain context
 * - system > numa node > llc domain > compute domain per core type (P or E)
 */
 struct cpdom_ctx {
 	u64	id;				    /* id of this compute domain (== dsq_id) */
 	u64	alt_id;				    /* id of the closest compute domain of alternative type (== dsq id) */
 	u64	last_consume_clk;		    /* when the associated DSQ was consumed */
 	u8	is_big;				    /* is it a big core or little core? */
 	u8	is_active;			    /* if this compute domain is active */
 	u8	nr_neighbors[LAVD_CPDOM_MAX_DIST];  /* number of neighbors per distance */
 	u64	neighbor_bits[LAVD_CPDOM_MAX_DIST]; /* bitmask of neighbor bitmask per distance */
 	u64	__cpumask[LAVD_CPU_ID_MAX/64];	    /* cpumasks belongs to this compute domain */
 } __attribute__((aligned(CACHELINE_SIZE)));
 /*
 * CPU context
 */
 struct cpu_ctx {
 	/* 
 	 * Information used to keep track of CPU utilization
 	 */
 	volatile u64	util;		/* average of the CPU utilization */
 	volatile u64	idle_total;	/* total idle time so far */
 	volatile u64	idle_start_clk;	/* when the CPU becomes idle */
 	/*
 	 * Information used to keep track of load
 	 */
 	volatile u64	load_actual;	/* actual load of runnable tasks */
 	volatile u64	load_run_time_ns; /* total runtime of runnable tasks */
 	volatile u64	tot_svc_time;	/* total service time on a CPU */
 	volatile u64	last_kick_clk;	/* when the CPU was kicked */
 	/*
 	 * Information for cpu hotplug
 	 */
 	u64		online_clk;	/* when a CPU becomes online */
 	u64		offline_clk;	/* when a CPU becomes offline */
 	/*
 	 * Information used to keep track of latency criticality
 	 */
 	volatile u32	max_lat_cri;	/* maximum latency criticality */
 	volatile u32	sum_lat_cri;	/* sum of latency criticality */
 	volatile u32	nr_sched;	/* number of schedules */
 	/*
 	 * Information used to keep track of performance criticality
 	 */
 	volatile u64	sum_perf_cri;	/* sum of performance criticality */
 	volatile u64	min_perf_cri;	/* mininum performance criticality */
 	volatile u64	max_perf_cri;	/* maximum performance criticality */
 	/*
 	 * Information of a current running task for preemption
 	 */
 	volatile u64	stopping_tm_est_ns; /* estimated stopping time */
 	volatile u16	lat_cri;	/* latency criticality */
 	volatile u8	is_online;	/* is this CPU online? */
 	s32		cpu_id;		/* cpu id */
 	/*
 	 * Information for CPU frequency scaling
 	 */
 	u32		cpuperf_cur;	/* CPU's current performance target */
 	u32		cpuperf_task;	/* task's CPU performance target */
 	u32		cpuperf_avg;	/* EWMA of task's CPU performance target */
 	/*
 	 * Fields for core compaction
 	 *
 	 */
 	u16		capacity;	/* CPU capacity based on 1000 */
 	u8		big_core;	/* is it a big core? */
 	u8		turbo_core;	/* is it a turbo core? */
 	u8		cpdom_id;	/* compute domain id (== dsq_id) */
 	u8		cpdom_alt_id;	/* compute domain id of anternative type (== dsq_id) */
 	u8		cpdom_poll_pos;	/* index to check if a DSQ of a compute domain is starving */
 	struct bpf_cpumask __kptr *tmp_a_mask;	/* temporary cpu mask */
 	struct bpf_cpumask __kptr *tmp_o_mask;	/* temporary cpu mask */
 	struct bpf_cpumask __kptr *tmp_t_mask;	/* temporary cpu mask */
 	struct bpf_cpumask __kptr *tmp_t2_mask;	/* temporary cpu mask */
 	/*
 	 * Information for statistics.
 	 */
 	volatile u32	nr_migration;	/* number of migrations */
 	volatile u32	nr_preemption;	/* number of migrations */
 	volatile u32	nr_greedy;	/* number of greedy tasks scheduled */
 	volatile u32	nr_perf_cri;
 	volatile u32	nr_lat_cri;
 } __attribute__((aligned(CACHELINE_SIZE)));
 #endif /* __LAVD_H */
--- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
--- a/scheds/rust/scx_lavd/src/bpf/power.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/power.bpf.c
@ -0,0 +1,565 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Copyright (c) 2023, 2024 Valve Corporation.
 * Author: Changwoo Min <changwoo@igalia.com>
 */
 /*
 * To be included to the main.bpf.c
 */
 /*
 * CPU topology
 */
 static u64		LAVD_AP_LOW_UTIL;
 static bool		have_turbo_core;
 static bool		have_little_core;
 const volatile u16	cpu_order_performance[LAVD_CPU_ID_MAX]; /* CPU preference order for performance and balanced mode */
 const volatile u16	cpu_order_powersave[LAVD_CPU_ID_MAX]; /* CPU preference order for powersave mode */
 const volatile u16	__cpu_capacity_hint[LAVD_CPU_ID_MAX]; /* CPU capacity based on 1000 */
 struct cpdom_ctx	cpdom_ctxs[LAVD_CPDOM_MAX_NR]; /* contexts for compute domains */
 /*
 * Big core's compute ratio among currently active cores
 */
 static u32		cur_big_core_ratio;
 /*
 * Big core's compute ratio when all cores are active
 */
 static u32		default_big_core_ratio;
 /*
 * Statistics
 */
 volatile int		power_mode;
 volatile u64		last_power_mode_clk;
 volatile u64		performance_mode_ns;
 volatile u64		balanced_mode_ns;
 volatile u64		powersave_mode_ns;
 static u64 calc_nr_active_cpus(struct sys_stat *stat_cur)
 {
 	u64 nr_active;
 	/*
 	 * nr_active = ceil(nr_cpus_onln * cpu_util * per_core_max_util)
 	 */
 	nr_active  = (nr_cpus_onln * stat_cur->util * 1000) + 500;
 	nr_active /= (LAVD_CC_PER_CORE_MAX_CTUIL * 1000);
 	/*
 	 * If a few CPUs are particularly busy, boost the active CPUs more.
 	 */
 	nr_active += min(LAVD_CC_NR_OVRFLW, (stat_cur->nr_violation) / 1000);
 	nr_active = max(min(nr_active, nr_cpus_onln),
 			LAVD_CC_NR_ACTIVE_MIN);
 	return nr_active;
 }
 static bool clear_cpu_periodically(u32 cpu, struct bpf_cpumask *cpumask)
 {
 	u32 clear;
 	/*
 	 * If the CPU is on, we clear the bit once every four times
 	 * (LAVD_CC_CPU_PIN_INTERVAL_DIV). Hence, the bit will be
 	 * probabilistically cleared once every 100 msec (4 * 25 msec).
 	 */
 	clear = !(bpf_get_prandom_u32() % LAVD_CC_CPU_PIN_INTERVAL_DIV);
 	if (clear)
 		bpf_cpumask_clear_cpu(cpu, cpumask);
 	return clear;
 }
 static void do_core_compaction(void)
 {
 	struct sys_stat *stat_cur = get_sys_stat_cur();
 	struct cpu_ctx *cpuc;
 	struct bpf_cpumask *active, *ovrflw;
 	int nr_cpus, nr_active, nr_active_old, cpu, i;
 	u32 sum_capacity = 0, big_capacity = 0;
 	bool clear;
 	const volatile u16 *cpu_order;
 	bpf_rcu_read_lock();
 	/*
 	 * Prepare cpumasks.
 	 */
 	active = active_cpumask;
 	ovrflw = ovrflw_cpumask;
 	if (!active || !ovrflw) {
 		scx_bpf_error("Failed to prepare cpumasks.");
 		goto unlock_out;
 	}
 	/*
 	 * Decide a cpuorder to use according to its power mode.
 	 */
 	if (is_powersave_mode)
 		cpu_order = cpu_order_powersave;
 	else
 		cpu_order = cpu_order_performance;
 	/*
 	 * Assign active and overflow cores
 	 */
 	nr_active_old = stat_cur->nr_active;
 	nr_active = calc_nr_active_cpus(stat_cur);
 	nr_cpus = nr_active + LAVD_CC_NR_OVRFLW;
 	bpf_for(i, 0, nr_cpu_ids) {
 		if (i >= LAVD_CPU_ID_MAX)
 			break;
 		/*
 		 * Skip offline cpu
 		 */
 		cpu = cpu_order[i];
 		cpuc = get_cpu_ctx_id(cpu);
 		if (!cpuc || !cpuc->is_online) {
 			bpf_cpumask_clear_cpu(cpu, active);
 			bpf_cpumask_clear_cpu(cpu, ovrflw);
 			continue;
 		}
 		/*
 		 * Assign an online cpu to active and overflow cpumasks
 		 */
 		if (i < nr_cpus) {
 			if (i < nr_active) {
 				bpf_cpumask_set_cpu(cpu, active);
 				bpf_cpumask_clear_cpu(cpu, ovrflw);
 			}
 			else {
 				bpf_cpumask_set_cpu(cpu, ovrflw);
 				bpf_cpumask_clear_cpu(cpu, active);
 			}
 			scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
 			/*
 			 * Calculate big capacity ratio among active cores.
 			 */
 			sum_capacity += cpuc->capacity;
 			if (cpuc->big_core)
 				big_capacity += cpuc->capacity;
 		}
 		else {
 			if (i < nr_active_old) {
 				bpf_cpumask_clear_cpu(cpu, active);
 				bpf_cpumask_clear_cpu(cpu, ovrflw);
 			}
 			else {
 				/*
 				 * This is the case when a CPU belongs to the
 				 * overflow set even though that CPU was not an
 				 * overflow set initially. This can happen only
 				 * when a pinned userspace task ran on this
 				 * CPU. In this case, we keep the CPU in an
 				 * overflow set since the CPU will be used
 				 * anyway for the task. This will promote equal
 				 * use of all used CPUs, lowering the energy
 				 * consumption by avoiding a few CPUs being
 				 * turbo-boosted. Hence, we do not clear the
 				 * overflow cpumask here for a while,
 				 * approximately for LAVD_CC_CPU_PIN_INTERVAL.
 				 */
 				bpf_cpumask_clear_cpu(cpu, active);
 				clear = clear_cpu_periodically(cpu, ovrflw);
 				if (!clear)
 					scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
 			}
 		}
 	}
 	cur_big_core_ratio = (1000 * big_capacity) / sum_capacity;
 	stat_cur->nr_active = nr_active;
 unlock_out:
 	bpf_rcu_read_unlock();
 }
 static void update_power_mode_time(void)
 {
 	u64 now = bpf_ktime_get_ns();
 	u64 delta;
 	if (last_power_mode_clk == 0)
 		last_power_mode_clk = now;
 	delta = now - last_power_mode_clk;
 	last_power_mode_clk = now;
 	switch (power_mode) {
 	case LAVD_PM_PERFORMANCE:
 		__sync_fetch_and_add(&performance_mode_ns, delta);
 		break;
 	case LAVD_PM_BALANCED:
 		__sync_fetch_and_add(&balanced_mode_ns, delta);
 		break;
 	case LAVD_PM_POWERSAVE:
 		__sync_fetch_and_add(&powersave_mode_ns, delta);
 		break;
 	}
 }
 static int do_set_power_profile(s32 pm, int util)
 {
 	/*
 	 * Skip setting the mode if already in the same mode.
 	 */
 	if (power_mode == pm)
 		return 0;
 	/*
 	 * Update power mode time
 	 */
 	update_power_mode_time();
 	power_mode = pm;
 	/*
 	 * Change the power mode.
 	 */
 	switch (pm) {
 	case LAVD_PM_PERFORMANCE:
 		no_core_compaction = true;
 		no_freq_scaling = true;
 		no_prefer_turbo_core = false;
 		is_powersave_mode = false;
 		/*
 		 * Since the core compaction becomes off, we need to
 		 * reinitialize the active and overflow cpumask for performance
 		 * mode.
 		 *
 		 * Note that a verifier in an old kernel does not allow calling
 		 * bpf_cpumask_set_cpu(), so we defer the actual update to our
 		 * timer handler, update_sys_stat().
 		 */
 		reinit_cpumask_for_performance = true;
 		debugln("Set the scheduler's power profile to performance mode: %d", util);
 		break;
 	case LAVD_PM_BALANCED:
 		no_core_compaction = false;
 		no_freq_scaling = false;
 		no_prefer_turbo_core = false;
 		is_powersave_mode = false;
 		reinit_cpumask_for_performance = false;
 		debugln("Set the scheduler's power profile to balanced mode: %d", util);
 		break;
 	case LAVD_PM_POWERSAVE:
 		no_core_compaction = false;
 		no_freq_scaling = false;
 		no_prefer_turbo_core = true;
 		is_powersave_mode = true;
 		reinit_cpumask_for_performance = false;
 		debugln("Set the scheduler's power profile to power-save mode: %d", util);
 		break;
 	default:
 		return -EINVAL;
 	}
 	return 0;
 }
 static int do_autopilot(void)
 {
 	struct sys_stat *stat_cur = get_sys_stat_cur();
 	/*
 	 * If the CPU utiulization is very low (say <= 5%), it means high
 	 * performance is not required. We run the scheduler in powersave mode
 	 * to save energy consumption.
 	 */
 	if (stat_cur->util <= LAVD_AP_LOW_UTIL)
 		return do_set_power_profile(LAVD_PM_POWERSAVE, stat_cur->util);
 	/*
 	 * If the CPU utiulization is moderate (say > 5%, <= 30%), we run the
 	 * scheduler in balanced mode. Actually, balanced mode can save energy
 	 * consumption only under moderate CPU load.
 	 */
 	if (stat_cur->util <= LAVD_AP_HIGH_UTIL)
 		return do_set_power_profile(LAVD_PM_BALANCED, stat_cur->util);
 	/*
 	 * If the CPU utilization is high enough (say > 30%), we run the
 	 * scheduler in performance mode. The system indeed needs perrformance
 	 * also there is little energy benefit even under balanced mode anyway.
 	 */
 	return do_set_power_profile(LAVD_PM_PERFORMANCE, stat_cur->util);
 }
 static void update_thr_perf_cri(void)
 {
 	struct sys_stat *stat_cur = get_sys_stat_cur();
 	u32 little_core_ratio, delta, diff, thr;
 	if (no_core_compaction || !have_little_core)
 		cur_big_core_ratio = default_big_core_ratio;
 	/*
 	 * If all active cores are big, all tasks should run on the big cores.
 	 */
 	if (cur_big_core_ratio == 1000) {
 		stat_cur->thr_perf_cri = 0;
 		return;
 	}
 	/*
 	 * We approximate the distribution of performance criticality of tasks
 	 * using min, avg, and max performance criticality of a given period.
 	 *
 	 *   min_perf_cri
 	 *   |         avg_perf_cri
 	 *   |         |                       max_perf_cri
 	 *   |         |                       |
 	 *   <--------><----------------------->
 	 *
 	 * The half of compute capacity should be assigned to the below average
 	 * tasks (< avg_perf_cri), and the other half should assigned to the
 	 * above average tasks (>= avg_perf_cri).
 	 *
 	 *   <------------><------------------->
 	 *   |            |                    |
 	 *   |            |                    1000
 	 *   |            1000 - big_core_ratio (i.e., little_core_ratio)
 	 *   0
 	 */
 	little_core_ratio = 1000 - cur_big_core_ratio;
 	if (little_core_ratio < 500) {
 		/*
 		 *   min_perf_cri
 		 *   |         avg_perf_cri
 		 *   |         |                       max_perf_cri
 		 *   |         |                       |
 		 *   <--------><----------------------->
 		 *
 		 *   <-///-><-------------------------->
 		 *   |     |                           |
 		 *   |     |                           1000
 		 *   |     little_core_ratio
 		 *   0
 		 */
 		delta = stat_cur->avg_perf_cri - stat_cur->min_perf_cri;
 		diff = (delta * little_core_ratio) / 1000;
 		thr = diff + stat_cur->min_perf_cri;
 	}
 	else {
 		/*
 		 *   min_perf_cri
 		 *   |         avg_perf_cri
 		 *   |         |                       max_perf_cri
 		 *   |         |                       |
 		 *   <--------><----------------------->
 		 *
 		 *   <---------------------><-////////->
 		 *   |                     |           |
 		 *   |                     |           1000
 		 *   |                     little_core_ratio
 		 *   0
 		 */
 		delta = stat_cur->max_perf_cri - stat_cur->avg_perf_cri;
 		diff = (delta * cur_big_core_ratio) / 1000;
 		thr = stat_cur->max_perf_cri - diff;
 	}
 	stat_cur->thr_perf_cri = thr;
 }
 static int reinit_active_cpumask_for_performance(void)
 {
 	struct cpu_ctx *cpuc;
 	struct bpf_cpumask *active, *ovrflw;
 	int cpu, err = 0;
 	barrier();
 	bpf_rcu_read_lock();
 	/*
 	 * Prepare cpumasks.
 	 */
 	active  = active_cpumask;
 	ovrflw  = ovrflw_cpumask;
 	if (!active || !ovrflw) {
 		scx_bpf_error("Failed to prepare cpumasks.");
 		err = -ENOMEM;
 		goto unlock_out;
 	}
 	/*
 	 * Once core compaction becomes off in performance mode,
 	 * reinitialize active/overflow cpumasks to reflect the mode change.
 	 */
 	bpf_for(cpu, 0, nr_cpu_ids) {
 		cpuc = get_cpu_ctx_id(cpu);
 		if (!cpuc) {
 			scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
 			err = -ESRCH;
 			goto unlock_out;
 		}
 		if (cpuc->big_core)
 			bpf_cpumask_set_cpu(cpu, active);
 		else
 			bpf_cpumask_set_cpu(cpu, ovrflw);
 	}
 unlock_out:
 	bpf_rcu_read_unlock();
 	return err;
 }
 static int calc_cpuperf_target(struct sys_stat *stat_cur,
 			       struct task_ctx *taskc, struct cpu_ctx *cpuc)
 {
 	u64 max_load, cpu_load;
 	u32 cpuperf_target;
 	if (!stat_cur || !taskc || !cpuc)
 		return -EINVAL;
 	if (no_freq_scaling) {
 		cpuc->cpuperf_task = SCX_CPUPERF_ONE;
 		cpuc->cpuperf_avg = SCX_CPUPERF_ONE;
 		return 0;
 	}
 	/*
 	 * We determine the clock frequency of a CPU using two factors: 1) the
 	 * current CPU utilization (cpuc->util) and 2) the current task's
 	 * performance criticality (taskc->perf_cri) compared to the
 	 * system-wide average performance criticality
 	 * (stat_cur->thr_perf_cri).
 	 *
 	 * When a current CPU utilization is 85% and the current task's
 	 * performance criticality is the same as the system-wide average
 	 * criticality, we set the target CPU frequency to the maximum.
 	 *
 	 * In other words, even if CPU utilization is not so high, the target
 	 * CPU frequency could be high when the task's performance criticality
 	 * is high enough (i.e., boosting CPU frequency). On the other hand,
 	 * the target CPU frequency could be low even if CPU utilization is
 	 * high when a non-performance-critical task is running (i.e.,
 	 * deboosting CPU frequency).
 	 */
 	max_load = stat_cur->thr_perf_cri * LAVD_CPU_UTIL_MAX_FOR_CPUPERF;
 	cpu_load = taskc->perf_cri * cpuc->util;
 	cpuperf_target = (cpu_load * SCX_CPUPERF_ONE) / max_load;
 	cpuperf_target = min(cpuperf_target, SCX_CPUPERF_ONE);
 	cpuc->cpuperf_task = cpuperf_target;
 	cpuc->cpuperf_avg = calc_avg32(cpuc->cpuperf_avg, cpuperf_target);
 	return 0;
 }
 static bool try_increase_cpuperf_target(struct cpu_ctx *cpuc)
 {
 	/*
 	 * When a task becomes running, update CPU's performance target only
 	 * when the current task's target performance is higher. This helps
 	 * rapidly adopt workload changes by rapidly increasing CPU's
 	 * performance target.
 	 */
 	u32 target;
 	if (!cpuc)
 		return false;
 	target = max(cpuc->cpuperf_task, cpuc->cpuperf_avg);
 	if (cpuc->cpuperf_cur < target) {
 		cpuc->cpuperf_cur = target;
 		scx_bpf_cpuperf_set(cpuc->cpu_id, target);
 		return true;
 	}
 	return false;
 }
 static bool try_decrease_cpuperf_target(struct cpu_ctx *cpuc)
 {
 	/*
 	 * Upon every tick interval, we try to decrease the CPU's performance
 	 * target if the current one is higher than both the current task's
 	 * target and EWMA of past targets. This helps gradually adopt workload
 	 * changes upon sudden down falls.
 	 */
 	u32 target;
 	if (!cpuc)
 		return false;
 	target = max(cpuc->cpuperf_task, cpuc->cpuperf_avg);
 	if (cpuc->cpuperf_cur != target) {
 		cpuc->cpuperf_cur = target;
 		scx_bpf_cpuperf_set(cpuc->cpu_id, target);
 		return true;
 	}
 	return false;
 }
 static u16 get_cpuperf_cap(s32 cpu)
 {
 	if (cpu >= 0 && cpu < nr_cpu_ids && cpu < LAVD_CPU_ID_MAX)
 		return __cpu_capacity_hint[cpu];
 	debugln("Infeasible CPU id: %d", cpu);
 	return 0;
 }
 static u16 get_cputurbo_cap(void)
 {
 	u16 turbo_cap = 0;
 	int nr_turbo = 0, cpu;
 	/*
 	 * Find the maximum CPU frequency
 	 */
 	for (cpu = 0; cpu < nr_cpu_ids && cpu < LAVD_CPU_ID_MAX; cpu++) {
 		if (__cpu_capacity_hint[cpu] > turbo_cap) {
 			turbo_cap = __cpu_capacity_hint[cpu];
 			nr_turbo++;
 		}
 	}
 	/*
 	 * If all CPU's frequencies are the same, ignore the turbo.
 	 */
 	if (nr_turbo <= 1)
 		turbo_cap = 0;
 	return turbo_cap;
 }
 static void init_autopilot_low_util(void)
 {
 	if (nr_cpus_big < nr_cpus_onln) {
 		/*
 		 * When there are little cores, we move up to the balanced mode
 		 * if one little core is fully utilized.
 		 */
 		LAVD_AP_LOW_UTIL = 1000 / nr_cpus_onln;
 	}
 	else {
 		/*
 		 * When there are only big cores, we move up to the balanced
 		 * mode if two big cores are fully utilized.
 		 */
 		LAVD_AP_LOW_UTIL = (2 * 1000) / nr_cpus_onln;
 	}
 }
 SEC("syscall")
 int set_power_profile(struct power_arg *input)
 {
 	return do_set_power_profile(input->power_mode, 0);
 }
--- a/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c
@ -0,0 +1,326 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * To be included to the main.bpf.c
 */
 /*
 * Preemption related ones
 */
 struct preemption_info {
 	u64		stopping_tm_est_ns;
 	u64		last_kick_clk;
 	u64		lat_cri;
 	struct cpu_ctx	*cpuc;
 };
 static u64 get_est_stopping_time(struct task_ctx *taskc)
 {
 	return bpf_ktime_get_ns() + taskc->run_time_ns;
 }
 static int comp_preemption_info(struct preemption_info *prm_a,
 				struct preemption_info *prm_b)
 {
 	/*
 	 * Check if one's latency priority _or_ deadline is smaller or not.
 	 */
 	if ((prm_a->lat_cri < prm_b->lat_cri) ||
 	    (prm_a->stopping_tm_est_ns < prm_b->stopping_tm_est_ns))
 		return -1;
 	if ((prm_a->lat_cri > prm_b->lat_cri) ||
 	    (prm_a->stopping_tm_est_ns > prm_b->stopping_tm_est_ns))
 		return 1;
 	return 0;
 }
 static  bool can_task1_kick_task2(struct preemption_info *prm_task1,
 				  struct preemption_info *prm_task2)
 {
 	return comp_preemption_info(prm_task1, prm_task2) < 0;
 }
 static  bool can_cpu1_kick_cpu2(struct preemption_info *prm_cpu1,
 				struct preemption_info *prm_cpu2,
 				struct cpu_ctx *cpuc2)
 {
 	/*
 	 * Set a CPU information
 	 */
 	prm_cpu2->stopping_tm_est_ns = cpuc2->stopping_tm_est_ns;
 	prm_cpu2->lat_cri = cpuc2->lat_cri;
 	prm_cpu2->cpuc = cpuc2;
 	prm_cpu2->last_kick_clk = cpuc2->last_kick_clk;
 	/*
 	 * If that CPU runs a lower priority task, that's a victim
 	 * candidate.
 	 */
 	return can_task1_kick_task2(prm_cpu1, prm_cpu2);
 }
 static bool is_worth_kick_other_task(struct task_ctx *taskc)
 {
 	/*
 	 * The scx_bpf_kick_cpu() used for preemption is expensive as an IPI is
 	 * involved. Hence, we first judiciously check whether it is worth
 	 * trying to victimize another CPU as the current task is urgent
 	 * enough.
 	 */
 	struct sys_stat *stat_cur = get_sys_stat_cur();
 	return (taskc->lat_cri >= stat_cur->thr_lat_cri);
 }
 static bool can_cpu_be_kicked(u64 now, struct cpu_ctx *cpuc)
 {
 	return cpuc->is_online &&
 	       (now - cpuc->last_kick_clk) >= LAVD_PREEMPT_KICK_MARGIN;
 }
 static struct cpu_ctx *find_victim_cpu(const struct cpumask *cpumask,
 				       struct task_ctx *taskc,
 				       u64 *p_old_last_kick_clk)
 {
 	/*
 	 * We see preemption as a load-balancing problem. In a system with N
 	 * CPUs, ideally, the top N tasks with the highest latency priorities
 	 * should run on the N CPUs all the time. This is the same as the
 	 * load-balancing problem; the load-balancing problem finds a least
 	 * loaded server, and the preemption problem finds a CPU running a
 	 * least latency critical task. Hence, we use the 'power of two random
 	 * choices' technique.
 	 */
 	u64 now = bpf_ktime_get_ns();
 	struct cpu_ctx *cpuc;
 	struct preemption_info prm_task, prm_cpus[2], *victim_cpu;
 	int cpu, nr_cpus;
 	int i, v = 0, cur_cpu = bpf_get_smp_processor_id();
 	int ret;
 	/*
 	 * Get task's preemption information for comparison.
 	 */
 	prm_task.stopping_tm_est_ns = get_est_stopping_time(taskc) +
 				      LAVD_PREEMPT_KICK_MARGIN;
 	prm_task.lat_cri = taskc->lat_cri;
 	prm_task.cpuc = cpuc = get_cpu_ctx();
 	if (!cpuc) {
 		scx_bpf_error("Failed to lookup the current cpu_ctx");
 		goto null_out;
 	}
 	prm_task.last_kick_clk = cpuc->last_kick_clk;
 	/*
 	 * First, test the current CPU since it can skip the expensive IPI.
 	 */
 	if (can_cpu_be_kicked(now, cpuc) &&
 	    bpf_cpumask_test_cpu(cur_cpu, cpumask) &&
 	    can_cpu1_kick_cpu2(&prm_task, &prm_cpus[0], cpuc)) {
 		victim_cpu = &prm_task;
 		goto bingo_out;
 	}
 	/*
 	 * If the current CPU cannot be a victim, let's check if it is worth to
 	 * try to kick other CPU at the expense of IPI.
 	 */
 	if (!is_worth_kick_other_task(taskc))
 		goto null_out;
 	/*
 	 * Randomly find _two_ CPUs that run lower-priority tasks than @p. To
 	 * traverse CPUs in a random order, we start from a random CPU ID in a
 	 * random direction (left or right). The random-order traversal helps
 	 * to mitigate the thundering herd problem. Otherwise, all CPUs may end
 	 * up finding the same victim CPU.
 	 *
 	 * In the worst case, the current logic traverses _all_ CPUs. It would
 	 * be too expensive to perform every task queue. We need to revisit
 	 * this if the traversal cost becomes problematic.
 	 */
 	barrier();
 	nr_cpus = bpf_cpumask_weight(cpumask);
 	bpf_for(i, 0, nr_cpus) {
 		/*
 		 * Decide a CPU ID to examine.
 		 */
 		cpu = bpf_cpumask_any_distribute(cpumask);
 		if (cpu >= nr_cpu_ids || cur_cpu == cpu)
 			continue;
 		/*
 		 * Check whether that CPU is qualified to run @p.
 		 */
 		cpuc = get_cpu_ctx_id(cpu);
 		if (!cpuc) {
 			scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
 			goto null_out;
 		}
 		if (!can_cpu_be_kicked(now, cpuc))
 			continue;
 		/*
 		 * If that CPU runs a lower priority task, that's a victim
 		 * candidate.
 		 */
 		ret = can_cpu1_kick_cpu2(&prm_task, &prm_cpus[v], cpuc);
 		if (ret == true && ++v >= 2)
 			break;
 	}
 	/*
 	 * Choose a final victim CPU.
 	 */
 	switch(v) {
 	case 2:	/* two dandidates */
 		victim_cpu = can_task1_kick_task2(&prm_cpus[0], &prm_cpus[1]) ?
 				&prm_cpus[0] : &prm_cpus[1];
 		goto bingo_out;
 	case 1:	/* one candidate */
 		victim_cpu = &prm_cpus[0];
 		goto bingo_out;
 	case 0:	/* no candidate */
 		goto null_out;
 	default:/* something wrong */
 		goto null_out;
 	}
 bingo_out:
 	taskc->victim_cpu = victim_cpu->cpuc->cpu_id;
 	*p_old_last_kick_clk = victim_cpu->last_kick_clk;
 	return victim_cpu->cpuc;
 null_out:
 	taskc->victim_cpu = (s32)LAVD_CPU_ID_NONE;
 	return NULL;
 }
 static bool kick_cpu(struct cpu_ctx *victim_cpuc, u64 victim_last_kick_clk)
 {
 	/*
 	 * If the current CPU is a victim, we just reset the current task's
 	 * time slice as an optimization. Othewise, kick the remote CPU for
 	 * preemption.
 	 *
 	 * Kicking the victim CPU does _not_ guarantee that task @p will run on
 	 * that CPU. Enqueuing @p to the global queue is one operation, and
 	 * kicking the victim is another asynchronous operation. However, it is
 	 * okay because, anyway, the victim CPU will run a higher-priority task
 	 * than @p.
 	 */
 	if (bpf_get_smp_processor_id() == victim_cpuc->cpu_id) {
 		struct task_struct *tsk = bpf_get_current_task_btf();
 		tsk->scx.slice = 0;
 		return true;
 	}
 	/*
 	 * Kick the remote victim CPU if it is not victimized yet by another
 	 * concurrent kick task.
 	 */
 	bool ret = __sync_bool_compare_and_swap(&victim_cpuc->last_kick_clk,
 						victim_last_kick_clk,
 						bpf_ktime_get_ns());
 	if (ret)
 		scx_bpf_kick_cpu(victim_cpuc->cpu_id, SCX_KICK_PREEMPT);
 	return ret;
 }
 static bool try_find_and_kick_victim_cpu(struct task_struct *p,
 					 struct task_ctx *taskc,
 					 struct cpu_ctx *cpuc_cur,
 					 u64 dsq_id)
 {
 	struct bpf_cpumask *cd_cpumask, *cpumask;
 	struct cpdom_ctx *cpdomc;
 	struct cpu_ctx *victim_cpuc;
 	u64 victim_last_kick_clk;
 	bool ret = false;
 	/*
 	 * Prepare a cpumak so we find a victim @p's compute domain.
 	 */
 	cpumask = cpuc_cur->tmp_t_mask;
 	cpdomc = MEMBER_VPTR(cpdom_ctxs, [dsq_id]);
 	cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [dsq_id]);
 	if (!cpdomc || !cd_cpumask || !cpumask)
 		return false;
 	bpf_cpumask_and(cpumask, cast_mask(cd_cpumask), p->cpus_ptr);
 	/*
 	 * Find a victim CPU among CPUs that run lower-priority tasks.
 	 */
 	victim_cpuc = find_victim_cpu(cast_mask(cpumask), taskc, &victim_last_kick_clk);
 	/*
 	 * If a victim CPU is chosen, preempt the victim by kicking it.
 	 */
 	if (victim_cpuc)
 		ret = kick_cpu(victim_cpuc, victim_last_kick_clk);
 	if (!ret)
 		taskc->victim_cpu = (s32)LAVD_CPU_ID_NONE;
 	return ret;
 }
 static bool try_yield_current_cpu(struct task_struct *p_run,
 				  struct cpu_ctx *cpuc_run,
 				  struct task_ctx *taskc_run)
 {
 	struct task_struct *p_wait;
 	struct task_ctx *taskc_wait;
 	struct preemption_info prm_run, prm_wait;
 	s32 cpu_id = scx_bpf_task_cpu(p_run), wait_vtm_cpu_id;
 	bool ret = false;
 	/*
 	 * If there is a higher priority task waiting on the global rq, the
 	 * current running task yield the CPU by shrinking its time slice to
 	 * zero.
 	 */
 	prm_run.stopping_tm_est_ns = taskc_run->last_running_clk +
 				     taskc_run->run_time_ns -
 				     LAVD_PREEMPT_TICK_MARGIN;
 	prm_run.lat_cri = taskc_run->lat_cri;
 	bpf_rcu_read_lock();
 	bpf_for_each(scx_dsq, p_wait, cpuc_run->cpdom_id, 0) {
 		taskc_wait = get_task_ctx(p_wait);
 		if (!taskc_wait)
 			break;
 		wait_vtm_cpu_id = taskc_wait->victim_cpu;
 		if (wait_vtm_cpu_id != (s32)LAVD_CPU_ID_NONE)
 			break;
 		prm_wait.stopping_tm_est_ns = get_est_stopping_time(taskc_wait);
 		prm_wait.lat_cri = taskc_wait->lat_cri;
 		if (can_task1_kick_task2(&prm_wait, &prm_run)) {
 			/*
 			 * The atomic CAS guarantees only one task yield its
 			 * CPU for the waiting task.
 			 */
 			ret = __sync_bool_compare_and_swap(
 					&taskc_wait->victim_cpu,
 					(s32)LAVD_CPU_ID_NONE, cpu_id);
 			if (ret)
 				p_run->scx.slice = 0;
 		}
 		/*
 		 * Test only the first entry on the DSQ.
 		 */
 		break;
 	}
 	bpf_rcu_read_unlock();
 	return ret;
 }
--- a/scheds/rust/scx_lavd/src/bpf/sys_stat.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/sys_stat.bpf.c
@ -0,0 +1,376 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Copyright (c) 2023, 2024 Valve Corporation.
 * Author: Changwoo Min <changwoo@igalia.com>
 */
 /*
 * To be included to the main.bpf.c
 */
 /*
 * Timer for updating system-wide status periorically
 */
 struct update_timer {
 	struct bpf_timer timer;
 };
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 1);
 	__type(key, u32);
 	__type(value, struct update_timer);
 } update_timer SEC(".maps");
 struct sys_stat_ctx {
 	struct sys_stat *stat_cur;
 	struct sys_stat	*stat_next;
 	u64		now;
 	u64		duration;
 	u64		duration_total;
 	u64		idle_total;
 	u64		compute_total;
 	u64		load_actual;
 	u64		tot_svc_time;
 	u64		nr_queued_task;
 	u64		load_run_time_ns;
 	s32		max_lat_cri;
 	s32		avg_lat_cri;
 	u64		sum_lat_cri;
 	u32		nr_sched;
 	u32		nr_migration;
 	u32		nr_preemption;
 	u32		nr_greedy;
 	u32		nr_perf_cri;
 	u32		nr_lat_cri;
 	u32		nr_big;
 	u32		nr_pc_on_big;
 	u32		nr_lc_on_big;
 	u64		min_perf_cri;
 	u64		avg_perf_cri;
 	u64		max_perf_cri;
 	u64		sum_perf_cri;
 	u32		thr_perf_cri;
 	u64		new_util;
 	u32		nr_violation;
 };
 static void init_sys_stat_ctx(struct sys_stat_ctx *c)
 {
 	memset(c, 0, sizeof(*c));
 	c->stat_cur = get_sys_stat_cur();
 	c->stat_next = get_sys_stat_next();
 	c->min_perf_cri = 1000;
 	c->now = bpf_ktime_get_ns();
 	c->duration = c->now - c->stat_cur->last_update_clk;
 	c->stat_next->last_update_clk = c->now;
 }
 static void collect_sys_stat(struct sys_stat_ctx *c)
 {
 	u64 dsq_id;
 	int cpu, nr;
 	bpf_for(cpu, 0, nr_cpu_ids) {
 		struct cpu_ctx *cpuc = get_cpu_ctx_id(cpu);
 		if (!cpuc) {
 			c->compute_total = 0;
 			break;
 		}
 		/*
 		 * Accumulate cpus' loads.
 		 */
 		c->load_actual += cpuc->load_actual;
 		c->load_run_time_ns += cpuc->load_run_time_ns;
 		c->tot_svc_time += cpuc->tot_svc_time;
 		cpuc->tot_svc_time = 0;
 		/*
 		 * Accumulate statistics.
 		 */
 		if (cpuc->big_core) {
 			c->nr_big += cpuc->nr_sched;
 			c->nr_pc_on_big += cpuc->nr_perf_cri;
 			c->nr_lc_on_big += cpuc->nr_lat_cri;
 		}
 		c->nr_perf_cri += cpuc->nr_perf_cri;
 		cpuc->nr_perf_cri = 0;
 		c->nr_lat_cri += cpuc->nr_lat_cri;
 		cpuc->nr_lat_cri = 0;
 		c->nr_migration += cpuc->nr_migration;
 		cpuc->nr_migration = 0;
 		c->nr_preemption += cpuc->nr_preemption;
 		cpuc->nr_preemption = 0;
 		c->nr_greedy += cpuc->nr_greedy;
 		cpuc->nr_greedy = 0;
 		/*
 		 * Accumulate task's latency criticlity information.
 		 *
 		 * While updating cpu->* is racy, the resulting impact on
 		 * accuracy should be small and very rare and thus should be
 		 * fine.
 		 */
 		c->sum_lat_cri += cpuc->sum_lat_cri;
 		cpuc->sum_lat_cri = 0;
 		c->nr_sched += cpuc->nr_sched;
 		cpuc->nr_sched = 0;
 		if (cpuc->max_lat_cri > c->max_lat_cri)
 			c->max_lat_cri = cpuc->max_lat_cri;
 		cpuc->max_lat_cri = 0;
 		/*
 		 * Accumulate task's performance criticlity information.
 		 */
 		if (cpuc->min_perf_cri < c->min_perf_cri)
 			c->min_perf_cri = cpuc->min_perf_cri;
 		cpuc->min_perf_cri = 1000;
 		if (cpuc->max_perf_cri > c->max_perf_cri)
 			c->max_perf_cri = cpuc->max_perf_cri;
 		cpuc->max_perf_cri = 0;
 		c->sum_perf_cri += cpuc->sum_perf_cri;
 		cpuc->sum_perf_cri = 0;
 		/*
 		 * If the CPU is in an idle state (i.e., idle_start_clk is
 		 * non-zero), accumulate the current idle peirod so far.
 		 */
 		for (int i = 0; i < LAVD_MAX_RETRY; i++) {
 			u64 old_clk = cpuc->idle_start_clk;
 			if (old_clk == 0)
 				break;
 			bool ret = __sync_bool_compare_and_swap(
 					&cpuc->idle_start_clk, old_clk, c->now);
 			if (ret) {
 				cpuc->idle_total += c->now - old_clk;
 				break;
 			}
 		}
 		/*
 		 * Calculcate per-CPU utilization
 		 */
 		u64 compute = 0;
 		if (c->duration > cpuc->idle_total)
 			compute = c->duration - cpuc->idle_total;
 		c->new_util = (compute * LAVD_CPU_UTIL_MAX) / c->duration;
 		cpuc->util = calc_avg(cpuc->util, c->new_util);
 		if (cpuc->turbo_core) {
 			if (cpuc->util > LAVD_CC_PER_TURBO_CORE_MAX_CTUIL)
 				c->nr_violation += 1000;
 		}
 		else {
 			if (cpuc->util > LAVD_CC_PER_CORE_MAX_CTUIL)
 				c->nr_violation += 1000;
 		}
 		/*
 		 * Accmulate system-wide idle time
 		 */
 		c->idle_total += cpuc->idle_total;
 		cpuc->idle_total = 0;
 	}
 	bpf_for(dsq_id, 0, LAVD_CPDOM_MAX_NR) {
 		nr = scx_bpf_dsq_nr_queued(dsq_id);
 		if (nr > 0)
 			c->nr_queued_task += nr;
 	}
 }
 static void calc_sys_stat(struct sys_stat_ctx *c)
 {
 	c->duration_total = c->duration * nr_cpus_onln;
 	if (c->duration_total > c->idle_total)
 		c->compute_total = c->duration_total - c->idle_total;
 	else
 		c->compute_total = 0;
 	c->new_util = (c->compute_total * LAVD_CPU_UTIL_MAX)/c->duration_total;
 	if (c->nr_sched == 0) {
 		/*
 		 * When a system is completely idle, it is indeed possible
 		 * nothing scheduled for an interval.
 		 */
 		c->max_lat_cri = c->stat_cur->max_lat_cri;
 		c->avg_lat_cri = c->stat_cur->avg_lat_cri;
 		c->min_perf_cri = c->stat_cur->min_perf_cri;
 		c->max_perf_cri = c->stat_cur->max_perf_cri;
 		c->avg_perf_cri = c->stat_cur->avg_perf_cri;
 	}
 	else {
 		c->avg_lat_cri = c->sum_lat_cri / c->nr_sched;
 		c->avg_perf_cri = c->sum_perf_cri / c->nr_sched;
 	}
 }
 static void update_sys_stat_next(struct sys_stat_ctx *c)
 {
 	static int cnt = 0;
 	u64 avg_svc_time = 0;
 	/*
 	 * Update the CPU utilization to the next version.
 	 */
 	struct sys_stat *stat_cur = c->stat_cur;
 	struct sys_stat *stat_next = c->stat_next;
 	stat_next->load_actual =
 		calc_avg(stat_cur->load_actual, c->load_actual);
 	stat_next->util =
 		calc_avg(stat_cur->util, c->new_util);
 	stat_next->max_lat_cri =
 		calc_avg32(stat_cur->max_lat_cri, c->max_lat_cri);
 	stat_next->avg_lat_cri =
 		calc_avg32(stat_cur->avg_lat_cri, c->avg_lat_cri);
 	stat_next->thr_lat_cri = stat_next->max_lat_cri -
 		((stat_next->max_lat_cri - stat_next->avg_lat_cri) >> 1);
 	stat_next->min_perf_cri =
 		calc_avg32(stat_cur->min_perf_cri, c->min_perf_cri);
 	stat_next->avg_perf_cri =
 		calc_avg32(stat_cur->avg_perf_cri, c->avg_perf_cri);
 	stat_next->max_perf_cri =
 		calc_avg32(stat_cur->max_perf_cri, c->max_perf_cri);
 	stat_next->thr_perf_cri =
 		c->stat_cur->thr_perf_cri; /* will be updated later */
 	stat_next->nr_violation =
 		calc_avg32(stat_cur->nr_violation, c->nr_violation);
 	if (c->nr_sched > 0)
 		avg_svc_time = c->tot_svc_time / c->nr_sched;
 	stat_next->avg_svc_time =
 		calc_avg(stat_cur->avg_svc_time, avg_svc_time);
 	stat_next->nr_queued_task =
 		calc_avg(stat_cur->nr_queued_task, c->nr_queued_task);
 	/*
 	 * Half the statistics every minitue so the statistics hold the
 	 * information on a few minutes.
 	 */
 	if (cnt++ == LAVD_SYS_STAT_DECAY_TIMES) {
 		cnt = 0;
 		stat_next->nr_sched >>= 1;
 		stat_next->nr_migration >>= 1;
 		stat_next->nr_preemption >>= 1;
 		stat_next->nr_greedy >>= 1;
 		stat_next->nr_perf_cri >>= 1;
 		stat_next->nr_lat_cri >>= 1;
 		stat_next->nr_big >>= 1;
 		stat_next->nr_pc_on_big >>= 1;
 		stat_next->nr_lc_on_big >>= 1;
 		__sync_fetch_and_sub(&performance_mode_ns, performance_mode_ns/2);
 		__sync_fetch_and_sub(&balanced_mode_ns, balanced_mode_ns/2);
 		__sync_fetch_and_sub(&powersave_mode_ns, powersave_mode_ns/2);
 	}
 	stat_next->nr_sched += c->nr_sched;
 	stat_next->nr_migration += c->nr_migration;
 	stat_next->nr_preemption += c->nr_preemption;
 	stat_next->nr_greedy += c->nr_greedy;
 	stat_next->nr_perf_cri += c->nr_perf_cri;
 	stat_next->nr_lat_cri += c->nr_lat_cri;
 	stat_next->nr_big += c->nr_big;
 	stat_next->nr_pc_on_big += c->nr_pc_on_big;
 	stat_next->nr_lc_on_big += c->nr_lc_on_big;
 	update_power_mode_time();
 }
 static void do_update_sys_stat(void)
 {
 	struct sys_stat_ctx c;
 	/*
 	 * Collect and prepare the next version of stat.
 	 */
 	init_sys_stat_ctx(&c);
 	collect_sys_stat(&c);
 	calc_sys_stat(&c);
 	update_sys_stat_next(&c);
 	/*
 	 * Make the next version atomically visible.
 	 */
 	flip_sys_stat();
 }
 static void update_sys_stat(void)
 {
 	do_update_sys_stat();
 	if (is_autopilot_on)
 		do_autopilot();
 	if (!no_core_compaction)
 		do_core_compaction();
 	update_thr_perf_cri();
 	if (reinit_cpumask_for_performance) {
 		reinit_cpumask_for_performance = false;
 		reinit_active_cpumask_for_performance();
 	}
 }
 static int update_timer_cb(void *map, int *key, struct bpf_timer *timer)
 {
 	int err;
 	update_sys_stat();
 	err = bpf_timer_start(timer, LAVD_SYS_STAT_INTERVAL_NS, 0);
 	if (err)
 		scx_bpf_error("Failed to arm update timer");
 	return 0;
 }
 static s32 init_sys_stat(u64 now)
 {
 	struct bpf_timer *timer;
 	u32 key = 0;
 	int err;
 	memset(__sys_stats, 0, sizeof(__sys_stats));
 	__sys_stats[0].last_update_clk = now;
 	__sys_stats[1].last_update_clk = now;
 	__sys_stats[0].nr_active = nr_cpus_big;
 	__sys_stats[1].nr_active = nr_cpus_big;
 	timer = bpf_map_lookup_elem(&update_timer, &key);
 	if (!timer) {
 		scx_bpf_error("Failed to lookup update timer");
 		return -ESRCH;
 	}
 	bpf_timer_init(timer, &update_timer, CLOCK_BOOTTIME);
 	bpf_timer_set_callback(timer, update_timer_cb);
 	err = bpf_timer_start(timer, LAVD_SYS_STAT_INTERVAL_NS, 0);
 	if (err) {
 		scx_bpf_error("Failed to arm update timer");
 		return err;
 	}
 	return 0;
 }
--- a/scheds/rust/scx_lavd/src/bpf/util.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/util.bpf.c
@ -0,0 +1,298 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Copyright (c) 2023, 2024 Valve Corporation.
 * Author: Changwoo Min <changwoo@igalia.com>
 */
 /*
 * To be included to the main.bpf.c
 */
 /*
 * Sched related globals
 */
 private(LAVD) struct bpf_cpumask __kptr *turbo_cpumask; /* CPU mask for turbo CPUs */
 private(LAVD) struct bpf_cpumask __kptr *big_cpumask; /* CPU mask for big CPUs */
 private(LAVD) struct bpf_cpumask __kptr *little_cpumask; /* CPU mask for little CPUs */
 private(LAVD) struct bpf_cpumask __kptr *active_cpumask; /* CPU mask for active CPUs */
 private(LAVD) struct bpf_cpumask __kptr *ovrflw_cpumask; /* CPU mask for overflow CPUs */
 private(LAVD) struct bpf_cpumask cpdom_cpumask[LAVD_CPDOM_MAX_NR]; /* CPU mask for each compute domain */
 const volatile u64	nr_cpu_ids;	/* maximum CPU IDs */
 static volatile u64	nr_cpus_onln;	/* current number of online CPUs */
 static volatile u64	nr_cpus_big;
 struct sys_stat	__sys_stats[2];
 volatile int	__sys_stat_idx;
 /*
 * Options
 */
 volatile bool		no_core_compaction;
 volatile bool		no_freq_scaling;
 volatile bool		no_prefer_turbo_core;
 volatile bool		is_powersave_mode;
 volatile bool		reinit_cpumask_for_performance;
 const volatile bool	is_autopilot_on;
 const volatile u32 	is_smt_active;
 const volatile u8	verbose;
 /*
 * Exit infomation
 */
 UEI_DEFINE(uei);
 /*
 * per-CPU globals
 */
 struct {
 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 	__type(key, u32);
 	__type(value, struct cpu_ctx);
 	__uint(max_entries, 1);
 } cpu_ctx_stor SEC(".maps");
 /*
 * Per-task scheduling context
 */
 struct {
 	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
 	__uint(map_flags, BPF_F_NO_PREALLOC);
 	__type(key, int);
 	__type(value, struct task_ctx);
 } task_ctx_stor SEC(".maps");
 #define debugln(fmt, ...)						\
 ({									\
 	if (verbose > 0)						\
 		bpf_printk("[%s:%d] " fmt, __func__, __LINE__,		\
 					##__VA_ARGS__);			\
 })
 #define traceln(fmt, ...)						\
 ({									\
 	if (verbose > 1)						\
 		bpf_printk("[%s:%d] " fmt, __func__, __LINE__,		\
 					##__VA_ARGS__);			\
 })
 #ifndef min
 #define min(X, Y) (((X) < (Y)) ? (X) : (Y))
 #endif
 #ifndef max
 #define max(X, Y) (((X) < (Y)) ? (Y) : (X))
 #endif
 static struct sys_stat *get_sys_stat_cur(void)
 {
 	if (READ_ONCE(__sys_stat_idx) == 0)
 		return &__sys_stats[0];
 	return &__sys_stats[1];
 }
 static struct sys_stat *get_sys_stat_next(void)
 {
 	if (READ_ONCE(__sys_stat_idx) == 0)
 		return &__sys_stats[1];
 	return &__sys_stats[0];
 }
 static void flip_sys_stat(void)
 {
 	WRITE_ONCE(__sys_stat_idx, __sys_stat_idx ^ 0x1);
 }
 static u64 sigmoid_u64(u64 v, u64 max)
 {
 	/*
 	 * An integer approximation of the sigmoid function. It is convenient
 	 * to use the sigmoid function since it has a known upper and lower
 	 * bound, [0, max].
 	 *
 	 *      |
 	 *	|      +------ <= max
 	 *	|    /
 	 *	|  /
 	 *	|/
 	 *	+------------->
 	 */
 	return (v > max) ? max : v;
 }
 static u64 rsigmoid_u64(u64 v, u64 max)
 {
 	/*
 	 * A horizontally flipped version of sigmoid function. Again, it is
 	 * convenient since the upper and lower bound of the function is known,
 	 * [0, max].
 	 *
 	 *
 	 *      |
 	 *	|\ <= max
 	 *	| \
 	 *	|  \
 	 *	|   \
 	 *	+----+-------->
 	 */
 	return (v >= max) ? 0 : max - v;
 }
 static struct task_ctx *try_get_task_ctx(struct task_struct *p)
 {
 	return bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
 }
 static struct task_ctx *get_task_ctx(struct task_struct *p)
 {
 	struct task_ctx *taskc;
 	taskc = try_get_task_ctx(p);
 	if (!taskc)
 		scx_bpf_error("task_ctx lookup failed for %s[%d]",
 			      p->comm, p->pid);
 	return taskc;
 }
 static struct cpu_ctx *get_cpu_ctx(void)
 {
 	const u32 idx = 0;
 	struct cpu_ctx *cpuc;
 	cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &idx);
 	if (!cpuc)
 		scx_bpf_error("cpu_ctx lookup failed for current cpu");
 	return cpuc;
 }
 static struct cpu_ctx *get_cpu_ctx_id(s32 cpu_id)
 {
 	const u32 idx = 0;
 	struct cpu_ctx *cpuc;
 	cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &idx, cpu_id);
 	if (!cpuc)
 		scx_bpf_error("cpu_ctx lookup failed for %d", cpu_id);
 	return cpuc;
 }
 static u32 calc_avg32(u32 old_val, u32 new_val)
 {
 	/*
 	 * Calculate the exponential weighted moving average (EWMA).
 	 *  - EWMA = (0.75 * old) + (0.25 * new)
 	 */
 	return (old_val - (old_val >> 2)) + (new_val >> 2);
 }
 static u64 calc_avg(u64 old_val, u64 new_val)
 {
 	/*
 	 * Calculate the exponential weighted moving average (EWMA).
 	 *  - EWMA = (0.75 * old) + (0.25 * new)
 	 */
 	return (old_val - (old_val >> 2)) + (new_val >> 2);
 }
 static u64 calc_avg_freq(u64 old_freq, u64 interval)
 {
 	u64 new_freq, ewma_freq;
 	/*
 	 * Calculate the exponential weighted moving average (EWMA) of a
 	 * frequency with a new interval measured.
 	 */
 	new_freq = LAVD_TIME_ONE_SEC / interval;
 	ewma_freq = calc_avg(old_freq, new_freq);
 	return ewma_freq;
 }
 static bool is_lat_cri(struct task_ctx *taskc, struct sys_stat *stat_cur)
 {
 	return taskc->lat_cri >= stat_cur->avg_lat_cri;
 }
 static bool is_perf_cri(struct task_ctx *taskc, struct sys_stat *stat_cur)
 {
 	if (READ_ONCE(taskc->on_big) && READ_ONCE(taskc->on_little))
 		return taskc->perf_cri >= stat_cur->thr_perf_cri;
 	return READ_ONCE(taskc->on_big);
 }
 static bool is_greedy(struct task_ctx *taskc)
 {
 	return taskc->greedy_ratio > 1000;
 }
 static bool is_eligible(struct task_ctx *taskc)
 {
 	return !is_greedy(taskc);
 }
 static bool have_scheduled(struct task_ctx *taskc)
 {
 	/*
 	 * If task's time slice hasn't been updated, that means the task has
 	 * been scheduled by this scheduler.
 	 */
 	return taskc->slice_ns != 0;
 }
 static u16 get_nice_prio(struct task_struct *p)
 {
 	u16 prio = p->static_prio - MAX_RT_PRIO; /* [0, 40) */
 	return prio;
 }
 static bool use_full_cpus(void)
 {
 	struct sys_stat *stat_cur = get_sys_stat_cur();
 	return (stat_cur->nr_active + LAVD_CC_NR_OVRFLW) >= nr_cpus_onln;
 }
 static u64 pick_any_bit(u64 bitmap, u64 nuance)
 {
 	u64 i, pos;
 	bpf_for(i, 0, 64) {
 		pos = (i + nuance) % 64;
 		if (bitmap & (1LLU << pos))
 			return pos;
 	}
 	return -ENOENT;
 }
 static void set_on_core_type(struct task_ctx *taskc,
 			     const struct cpumask *cpumask)
 {
 	bool on_big = false, on_little = false;
 	struct cpu_ctx *cpuc;
 	int cpu;
 	bpf_for(cpu, 0, nr_cpu_ids) {
 		if (!bpf_cpumask_test_cpu(cpu, cpumask))
 			continue;
 		cpuc = get_cpu_ctx_id(cpu);
 		if (!cpuc) {
 			scx_bpf_error("Failed to look up cpu_ctx: %d", cpu);
 			return;
 		}
 		if (cpuc->big_core)
 			on_big = true;
 		else
 			on_little = true;
 		if (on_big && on_little)
 			break;
 	}
 	WRITE_ONCE(taskc->on_big, on_big);
 	WRITE_ONCE(taskc->on_little, on_little);
 }
--- a/scheds/rust/scx_lavd/src/main.rs
+++ b/scheds/rust/scx_lavd/src/main.rs
@ -65,8 +65,6 @@ use stats::StatsReq;
 use stats::StatsRes;
 use stats::SysStats;
 const LAVD_CPU_ID_MAX: usize = bpf_intf::consts_LAVD_CPU_ID_MAX as usize;
 /// scx_lavd: Latency-criticality Aware Virtual Deadline (LAVD) scheduler
 ///
 /// The rust part is minimal. It processes command line options and logs out
@ -484,7 +482,7 @@ struct Scheduler<'a> {
 impl<'a> Scheduler<'a> {
    fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
-        if *NR_CPU_IDS > LAVD_CPU_ID_MAX {
+        if *NR_CPU_IDS > LAVD_CPU_ID_MAX as usize {
            panic!(
                "Num possible CPU IDs ({}) exceeds maximum of ({})",
                *NR_CPU_IDS, LAVD_CPU_ID_MAX
@ -559,15 +557,13 @@ impl<'a> Scheduler<'a> {
                skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].__cpumask[i] |= 0x01 << j;
            }
-            const LAVD_CPDOM_MAX_NR: u8 = 32;
+            if v.neighbor_map.borrow().iter().len() > LAVD_CPDOM_MAX_DIST as usize {
            const LAVD_CPDOM_MAX_DIST: usize = 4;
            if v.neighbor_map.borrow().iter().len() > LAVD_CPDOM_MAX_DIST {
                panic!("The processor topology is too complex to handle in BPF.");
            }
            for (k, (_d, neighbors)) in v.neighbor_map.borrow().iter().enumerate() {
                let nr_neighbors = neighbors.borrow().len() as u8;
-                if nr_neighbors > LAVD_CPDOM_MAX_NR {
+                if nr_neighbors > LAVD_CPDOM_MAX_NR as u8 {
                    panic!("The processor topology is too complex to handle in BPF.");
                }
                skel.maps.bss_data.cpdom_ctxs[v.cpdom_id].nr_neighbors[k] = nr_neighbors;
@ -666,12 +662,8 @@ impl<'a> Scheduler<'a> {
        return 100. * x as f64 / y as f64;
    }
-    fn get_power_mode(power_mode: s32) -> &'static str {
+    fn get_power_mode(power_mode: i32) -> &'static str {
-        const LAVD_PM_PERFORMANCE: s32 = 0;
+        match power_mode as u32 {
        const LAVD_PM_BALANCED: s32 = 1;
        const LAVD_PM_POWERSAVE: s32 = 2;
        match power_mode {
            LAVD_PM_PERFORMANCE => {
                return &"performance";
            }