diff --git a/scheds/rust/scx_lavd/src/bpf/intf.h b/scheds/rust/scx_lavd/src/bpf/intf.h
index c2f82ce..a882ba6 100644
--- a/scheds/rust/scx_lavd/src/bpf/intf.h
+++ b/scheds/rust/scx_lavd/src/bpf/intf.h
@@ -54,7 +54,7 @@ enum consts {
 	NSEC_PER_MSEC			= (1000ULL * NSEC_PER_USEC),
 	LAVD_TIME_ONE_SEC		= (1000ULL * NSEC_PER_MSEC),
 	LAVD_TIME_INFINITY_NS		= SCX_SLICE_INF,
-	LAVD_MAX_CAS_RETRY		= 8,
+	LAVD_MAX_CAS_RETRY		= 4,
 
 	LAVD_TARGETED_LATENCY_NS	= (15 * NSEC_PER_MSEC),
 	LAVD_SLICE_MIN_NS		= ( 1 * NSEC_PER_MSEC), /* min time slice */
@@ -109,18 +109,18 @@ struct sys_stat {
 	volatile u64	load_ideal;	/* average ideal load of runnable tasks */
 	volatile u64	load_actual;	/* average actual load of runnable tasks */
 
-	volatile u64	avg_lat_cri;	/* average latency criticality (LC) */
-	volatile u64	max_lat_cri;	/* maximum latency criticality (LC) */
-	volatile u64	min_lat_cri;	/* minimum latency criticality (LC) */
-	volatile u64	thr_lat_cri;	/* latency criticality threshold for kicking */
+	volatile u32	avg_lat_cri;	/* average latency criticality (LC) */
+	volatile u32	max_lat_cri;	/* maximum latency criticality (LC) */
+	volatile u32	min_lat_cri;	/* minimum latency criticality (LC) */
+	volatile u32	thr_lat_cri;	/* latency criticality threshold for kicking */
 
-	volatile s64	inc1k_low;	/* increment from low LC to priority mapping */
-	volatile s64	inc1k_high;	/* increment from high LC to priority mapping */
+	volatile s32	inc1k_low;	/* increment from low LC to priority mapping */
+	volatile s32	inc1k_high;	/* increment from high LC to priority mapping */
 
-	volatile u64	avg_perf_cri;	/* average performance criticality */
+	volatile u32	avg_perf_cri;	/* average performance criticality */
 
-	volatile u64	nr_violation;	/* number of utilization violation */
-	volatile int	nr_active;	/* number of active cores */
+	volatile u32	nr_violation;	/* number of utilization violation */
+	volatile u32	nr_active;	/* number of active cores */
 };
 
 /*
@@ -151,10 +151,10 @@ struct cpu_ctx {
 	/*
 	 * Information used to keep track of latency criticality
 	 */
-	volatile u64	max_lat_cri;	/* maximum latency criticality */
-	volatile u64	min_lat_cri;	/* minimum latency criticality */
-	volatile u64	sum_lat_cri;	/* sum of latency criticality */
-	volatile u64	sched_nr;	/* number of schedules */
+	volatile u32	max_lat_cri;	/* maximum latency criticality */
+	volatile u32	min_lat_cri;	/* minimum latency criticality */
+	volatile u32	sum_lat_cri;	/* sum of latency criticality */
+	volatile u32	sched_nr;	/* number of schedules */
 
 	/*
 	 * Information used to keep track of performance criticality
@@ -200,6 +200,7 @@ struct task_ctx {
 	u64	run_time_ns;		/* average runtime per schedule */
 	u64	run_freq;		/* scheduling frequency in a second */
 	u64	wait_freq;		/* waiting frequency in a second */
+
 	u64	wake_freq;		/* waking-up frequency in a second */
 	u64	load_actual;		/* task load derived from run_time and run_freq */
 
@@ -209,17 +210,16 @@ struct task_ctx {
 	u64	vdeadline_delta_ns;	/* time delta until task's virtual deadline */
 	u64	eligible_delta_ns;	/* time delta until task becomes eligible */
 	u64	slice_ns;		/* time slice */
-	u64	greedy_ratio;		/* task's overscheduling ratio compared to its nice priority */
-	u64	lat_cri;		/* calculated latency criticality */
+	u32	greedy_ratio;		/* task's overscheduling ratio compared to its nice priority */
+	u32	lat_cri;		/* calculated latency criticality */
 	volatile s32 victim_cpu;
 	u16	slice_boost_prio;	/* how many times a task fully consumed the slice */
 	u16	lat_prio;		/* latency priority */
-	s16	lat_boost_prio;		/* DEBUG */
 
 	/*
 	 * Task's performance criticality
 	 */
-	u64	perf_cri;		/* performance criticality of a task */
+	u32	perf_cri;		/* performance criticality of a task */
 };
 
 struct task_ctx_x {
@@ -229,8 +229,8 @@ struct task_ctx_x {
 	u32	cpu_id;		/* where a task ran */
 	u64	cpu_util;	/* cpu utilization in [0..100] */
 	u64	sys_load_factor; /* system load factor in [0..100..] */
-	u64	avg_lat_cri;	/* average latency criticality */
-	u64	avg_perf_cri;	/* average performance criticality */
+	u32	avg_perf_cri;	/* average performance criticality */
+	u32	avg_lat_cri;	/* average latency criticality */
 	u32	nr_active;	/* number of active cores */
 	u32	cpuperf_cur;	/* CPU's current performance target */
 };
diff --git a/scheds/rust/scx_lavd/src/bpf/main.bpf.c b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
index d8f12b1..d50f9c3 100644
--- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
@@ -765,16 +765,16 @@ struct sys_stat_ctx {
 	u64		load_actual;
 	u64		load_ideal;
 	u64		load_run_time_ns;
-	s64		max_lat_cri;
-	s64		min_lat_cri;
-	s64		avg_lat_cri;
+	s32		max_lat_cri;
+	s32		min_lat_cri;
+	s32		avg_lat_cri;
 	u64		sum_lat_cri;
-	u64		sched_nr;
+	u32		sched_nr;
 	u64		sum_perf_cri;
-	u64		avg_perf_cri;
+	u32		avg_perf_cri;
 	u64		new_util;
 	u64		new_load_factor;
-	u64		nr_violation;
+	u32		nr_violation;
 };
 
 static void init_sys_stat_ctx(struct sys_stat_ctx *c)
@@ -919,18 +919,18 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
 		calc_avg(stat_cur->load_factor, c->new_load_factor);
 
 	stat_next->min_lat_cri =
-		calc_avg(stat_cur->min_lat_cri, c->min_lat_cri);
+		calc_avg32(stat_cur->min_lat_cri, c->min_lat_cri);
 	stat_next->max_lat_cri =
-		calc_avg(stat_cur->max_lat_cri, c->max_lat_cri);
+		calc_avg32(stat_cur->max_lat_cri, c->max_lat_cri);
 	stat_next->avg_lat_cri =
-		calc_avg(stat_cur->avg_lat_cri, c->avg_lat_cri);
+		calc_avg32(stat_cur->avg_lat_cri, c->avg_lat_cri);
 	stat_next->thr_lat_cri = stat_next->max_lat_cri -
 		((stat_next->max_lat_cri - stat_next->avg_lat_cri) >> 1);
 	stat_next->avg_perf_cri =
-		calc_avg(stat_cur->avg_perf_cri, c->avg_perf_cri);
+		calc_avg32(stat_cur->avg_perf_cri, c->avg_perf_cri);
 
 	stat_next->nr_violation =
-		calc_avg(stat_cur->nr_violation, c->nr_violation);
+		calc_avg32(stat_cur->nr_violation, c->nr_violation);
 }
 
 static void calc_inc1k(struct sys_stat_ctx *c)
@@ -1129,10 +1129,10 @@ static int update_timer_cb(void *map, int *key, struct bpf_timer *timer)
 	return 0;
 }
 
-static u64 calc_greedy_ratio(struct task_struct *p, struct task_ctx *taskc)
+static u32 calc_greedy_ratio(struct task_struct *p, struct task_ctx *taskc)
 {
 	struct sys_stat *stat_cur = get_sys_stat_cur();
-	u64 ratio;
+	u32 ratio;
 
 	/*
 	 * The greedy ratio of a task represents how much time the task
@@ -1173,12 +1173,12 @@ static u64 calc_lat_factor(u64 lat_prio)
 	return LAVD_ELIGIBLE_TIME_LAT_FT * (NICE_WIDTH - lat_prio);
 }
 
-static u64 calc_greedy_factor(struct task_ctx *taskc)
+static u32 calc_greedy_factor(struct task_ctx *taskc)
 {
-	u64 greedy_ratio = taskc->greedy_ratio;
+	u32 greedy_ratio = taskc->greedy_ratio;
 	s16 lat_prio = taskc->lat_prio;
-	u64 greedy_threshold;
-	u64 gr_ft;
+	u32 greedy_threshold;
+	u32 gr_ft;
 
 	if (lat_prio < 0)
 		lat_prio = 0;
@@ -1308,7 +1308,7 @@ static int sum_prios_for_lat(struct task_struct *p, int nice_prio,
 	return prio;
 }
 
-static int map_lat_cri_to_lat_prio(u64 lat_cri)
+static int map_lat_cri_to_lat_prio(u32 lat_cri)
 {
 	/*
 	 * Latency criticality is an absolute metric representing how
@@ -1326,7 +1326,7 @@ static int map_lat_cri_to_lat_prio(u64 lat_cri)
 	 */
 
 	struct sys_stat *stat_cur = get_sys_stat_cur();
-	s64 base_lat_cri, inc1k;
+	s32 base_lat_cri, inc1k;
 	int base_prio, lat_prio;
 
 	/*
@@ -1440,7 +1440,6 @@ static int boost_lat(struct task_struct *p, struct task_ctx *taskc,
 out:
 	static_prio = get_nice_prio(p);
 	taskc->lat_prio = sum_prios_for_lat(p, static_prio, boost);
-	taskc->lat_boost_prio = boost;
 
 	return boost;
 }
@@ -1546,7 +1545,8 @@ static u64 cap_time_slice_ns(u64 slice)
 static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc)
 {
 	struct sys_stat *stat_cur = get_sys_stat_cur();
-	u64 slice, share, gr_ft;
+	u64 slice, share;
+	u32 gr_ft;
 
 	/*
 	 * The time slice should be short enough to schedule all runnable tasks
@@ -2138,6 +2138,19 @@ static void put_local_rq_no_fail(struct task_struct *p, struct task_ctx *taskc,
 	scx_bpf_dispatch(p, SCX_DSQ_LOCAL, LAVD_SLICE_UNDECIDED, enq_flags);
 }
 
+static bool could_run_on_prev(struct task_struct *p, s32 prev_cpu,
+			      struct bpf_cpumask *a_cpumask,
+			      struct bpf_cpumask *o_cpumask)
+{
+	bool ret;
+
+	ret = bpf_cpumask_test_cpu(prev_cpu, p->cpus_ptr) &&
+	      (bpf_cpumask_test_cpu(prev_cpu, cast_mask(a_cpumask)) ||
+	       bpf_cpumask_test_cpu(prev_cpu, cast_mask(o_cpumask)));
+
+	return ret;
+}
+
 static s32 pick_cpu(struct task_struct *p, struct task_ctx *taskc,
 		    s32 prev_cpu, u64 wake_flags, bool *is_idle)
 {
@@ -2169,9 +2182,9 @@ static s32 pick_cpu(struct task_struct *p, struct task_ctx *taskc,
 	bpf_cpumask_and(a_cpumask, p->cpus_ptr, cast_mask(active));
 
 	/*
-	 * First, try to stay on the previous core if it is active.
+	 * First, try to stay on the previous core if it is on active or ovrfw.
 	 */
-	if (bpf_cpumask_test_cpu(prev_cpu, cast_mask(a_cpumask)) &&
+	if (could_run_on_prev(p, prev_cpu, a_cpumask, o_cpumask) &&
 	    scx_bpf_test_and_clear_cpu_idle(prev_cpu)) {
 		cpu_id = prev_cpu;
 		goto unlock_out;
diff --git a/scheds/rust/scx_lavd/src/main.rs b/scheds/rust/scx_lavd/src/main.rs
index c4db816..4e17b59 100644
--- a/scheds/rust/scx_lavd/src/main.rs
+++ b/scheds/rust/scx_lavd/src/main.rs
@@ -192,10 +192,10 @@ impl<'a> Scheduler<'a> {
                    | {:4} | {:4} | {:9} \
                    | {:6} | {:8} | {:7} \
                    | {:8} | {:7} | {:8} \
-                   | {:7} | {:7} | {:9} \
-                   | {:9} | {:9} | {:9} \
+                   | {:7} | {:9} | {:9} \
+                   | {:9} | {:9} | {:8} \
                    | {:8} | {:8} | {:8} \
-                   | {:8} | {:6} | {:6} |",
+                   | {:6} | {:6} |",
                 "mseq",
                 "pid",
                 "comm",
@@ -208,7 +208,6 @@ impl<'a> Scheduler<'a> {
                 "lat_prio",
                 "avg_lc",
                 "st_prio",
-                "lat_bst",
                 "slc_bst",
                 "run_freq",
                 "run_tm_ns",
@@ -232,10 +231,10 @@ impl<'a> Scheduler<'a> {
                | {:4} | {:4} | {:9} \
                | {:6} | {:8} | {:7} \
                | {:8} | {:7} | {:8} \
-               | {:7} | {:7} | {:9} \
-               | {:9} | {:9} | {:9} \
+               | {:7} | {:9} | {:9} \
+               | {:9} | {:9} | {:8} \
                | {:8} | {:8} | {:8} \
-               | {:8} | {:6} | {:6} |",
+               | {:6} | {:6} |",
             mseq,
             tx.pid,
             tx_comm,
@@ -248,7 +247,6 @@ impl<'a> Scheduler<'a> {
             tc.lat_prio,
             tx.avg_lat_cri,
             tx.static_prio,
-            tc.lat_boost_prio,
             tc.slice_boost_prio,
             tc.run_freq,
             tc.run_time_ns,