scx_lavd: use lat_cri instead of lat_prio universally

Signed-off-by: Changwoo Min <changwoo@igalia.com>
2024-11-28 13:40:28 +00:00 · 2024-07-19 23:51:49 +09:00 · 2024-07-19 23:51:49 +09:00 · 67a6deb983
commit 67a6deb983
parent 6f10d6907c
3 changed files with 31 additions and 166 deletions
--- a/scheds/rust/scx_lavd/src/bpf/intf.h
+++ b/scheds/rust/scx_lavd/src/bpf/intf.h
@ -74,12 +74,10 @@ enum consts {
 	LAVD_SLICE_BOOST_MAX_FT		= 2, /* maximum additional 2x of slice */
 	LAVD_SLICE_BOOST_MAX_STEP	= 8, /* 8 slice exhausitions in a row */
 	LAVD_GREEDY_RATIO_MAX		= USHRT_MAX,
-	LAVD_LAT_PRIO_NEW		= 10,
-	LAVD_LAT_PRIO_IDLE		= USHRT_MAX,
-	LAVD_LAT_WEIGHT_FT		= 88761,

 	LAVD_ELIGIBLE_TIME_LAT_FT	= 16,
 	LAVD_ELIGIBLE_TIME_MAX		= (10 * LAVD_TARGETED_LATENCY_NS),
+	LAVD_REFILL_NR			= 2,

 	LAVD_CPU_UTIL_MAX		= 1000, /* 100.0% */
 	LAVD_CPU_UTIL_MAX_FOR_CPUPERF	= 850, /* 85.0% */
@ -87,7 +85,6 @@ enum consts {
 	LAVD_CPU_ID_NONE		= ((u32)-1),
 	LAVD_CPU_ID_MAX			= 512,

-	LAVD_PREEMPT_KICK_LAT_PRIO	= 15,
 	LAVD_PREEMPT_KICK_MARGIN	= (2 * NSEC_PER_USEC),
 	LAVD_PREEMPT_TICK_MARGIN	= (1 * NSEC_PER_USEC),

@ -119,9 +116,6 @@ struct sys_stat {
 	volatile u32	min_lat_cri;	/* minimum latency criticality (LC) */
 	volatile u32	thr_lat_cri;	/* latency criticality threshold for kicking */

-	volatile s32	inc1k_low;	/* increment from low LC to priority mapping */
-	volatile s32	inc1k_high;	/* increment from high LC to priority mapping */
-
 	volatile u32	avg_perf_cri;	/* average performance criticality */

 	volatile u32	nr_violation;	/* number of utilization violation */
@ -171,7 +165,7 @@ struct cpu_ctx {
 	 * Information of a current running task for preemption
 	 */
 	volatile u64	stopping_tm_est_ns; /* estimated stopping time */
-	volatile u16	lat_prio;	/* latency priority */
+	volatile u16	lat_cri;	/* latency criticality */
 	volatile u8	is_online;	/* is this CPU online? */
 	s32		cpu_id;		/* cpu id */

@ -222,7 +216,6 @@ struct task_ctx {
 	u32	lat_cri;		/* calculated latency criticality */
 	volatile s32 victim_cpu;
 	u16	slice_boost_prio;	/* how many times a task fully consumed the slice */
-	u16	lat_prio;		/* latency priority */

 	/*
 	 * Task's performance criticality
--- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
@ -308,7 +308,7 @@ struct {
 struct preemption_info {
 	u64		stopping_tm_est_ns;
 	u64		last_kick_clk;
-	u16		lat_prio;
+	u64		lat_cri;
 	struct cpu_ctx	*cpuc;
 };

@ -767,33 +767,6 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
 				  c->tot_svc_time / c->sched_nr;
 }

-static void calc_inc1k(struct sys_stat_ctx *c)
-{
-	/*
-	 * Calculate the increment for mapping from latency criticality to
-	 * priority.
-	 *  - Case 1. inc1k_low:   [min_lc, avg_lc) -> [half_range, 0)
-	 *  - Case 2. inc1k_high:  [avg_lc, max_lc] -> [0, -half_range)
-	 */
-	struct sys_stat *stat_next = c->stat_next;
-
-	if (stat_next->avg_lat_cri == stat_next->min_lat_cri)
-		stat_next->inc1k_low = 0;
-	else {
-		stat_next->inc1k_low = ((LAVD_BOOST_RANGE >> 1) * 1000) /
-					(stat_next->avg_lat_cri -
-					 stat_next->min_lat_cri);
-	}
-
-	if ((stat_next->max_lat_cri + 1) == stat_next->avg_lat_cri)
-		stat_next->inc1k_high = 0;
-	else {	
-		stat_next->inc1k_high = ((LAVD_BOOST_RANGE >> 1) * 1000) /
-					 (stat_next->max_lat_cri + 1 -
-					  stat_next->avg_lat_cri);
-	}
-}
-
 static void do_update_sys_stat(void)
 {
 	struct sys_stat_ctx c;
@ -805,7 +778,6 @@ static void do_update_sys_stat(void)
 	collect_sys_stat(&c);
 	calc_sys_stat(&c);
 	update_sys_stat_next(&c);
-	calc_inc1k(&c);

 	/*
 	 * Make the next version atomically visible.
@ -997,11 +969,6 @@ static u64 calc_freq_factor(u64 freq)
 	return ft + 1;
 }

-static u64 calc_lat_factor(u64 lat_prio)
-{
-	return LAVD_ELIGIBLE_TIME_LAT_FT * (NICE_WIDTH - lat_prio);
-}
-
 static bool is_eligible(struct task_ctx *taskc)
 {
 	return taskc->greedy_ratio <= 1000;
@ -1042,7 +1009,6 @@ static u64 calc_eligible_delta(struct task_struct *p, struct task_ctx *taskc)
 	 *	when the task become eligible.
 	 */
 	u64 delta_ns;
-	u64 lat_ft;

 	/*
 	 * Get how greedy this task has been to enforce fairness if necessary.
@ -1059,13 +1025,8 @@ static u64 calc_eligible_delta(struct task_struct *p, struct task_ctx *taskc)
 		goto out;
 	}

-	/*
-	 * As a task is more latency-critical, it will have a shorter but more
-	 * frequent ineligibility durations.
-	 */
-	lat_ft = calc_lat_factor(taskc->lat_prio);
 	delta_ns = (LAVD_TIME_ONE_SEC / (1000 * taskc->run_freq)) *
-		   (taskc->greedy_ratio / (lat_ft + 1));
+		   (taskc->greedy_ratio);

 	if (delta_ns > LAVD_ELIGIBLE_TIME_MAX)
 		delta_ns = LAVD_ELIGIBLE_TIME_MAX;
@ -1092,55 +1053,6 @@ static int sum_prios_for_lat(struct task_struct *p, int nice_prio,
 	return prio;
 }

-static int map_lat_cri_to_lat_prio(u32 lat_cri)
-{
-	/*
-	 * Latency criticality is an absolute metric representing how
-	 * latency-critical a task is. However, latency priority is a relative
-	 * metric compared to the other co-running tasks. Especially when the
-	 * task's latency criticalities are in a small range, the relative
-	 * metric is advantageous in mitigating integer truncation errors. In
-	 * the relative metric, we map
-	 *
-	 *  - Case 1. inc1k_low:   [min_lc, avg_lc) -> [boost_range/2,  0)
-	 *  - Case 2. inc1k_high:  [avg_lc, max_lc] -> [0, -boost_range/2)
-	 *
-	 * Hence, latency priority 20 now means that a task has an average
-	 * latency criticality among the co-running tasks.
-	 */
-
-	struct sys_stat *stat_cur = get_sys_stat_cur();
-	s32 base_lat_cri, inc1k;
-	int base_prio, lat_prio;
-
-	/*
-	 * Set up params for the Case 1 and 2.
-	 */
-	if (lat_cri < stat_cur->avg_lat_cri) {
-		inc1k = stat_cur->inc1k_low;
-		base_lat_cri = stat_cur->min_lat_cri;
-		base_prio = LAVD_BOOST_RANGE >> 1;
-	}
-	else {
-		inc1k = stat_cur->inc1k_high;
-		base_lat_cri = stat_cur->avg_lat_cri;
-		base_prio = 0;
-	}
-
-	/*
-	 * Task's lat_cri could be more up-to-date than stat_cur's one. In this
-	 * case, just take the stat_cur's one.
-	 */
-	if (lat_cri >= base_lat_cri) {
-		lat_prio = base_prio -
-			   (((lat_cri - base_lat_cri) * inc1k + 500) / 1000);
-	}
-	else
-		lat_prio = base_prio;
-
-	return lat_prio;
-}
-
 static u64 calc_starvation_factor(struct task_ctx *taskc)
 {
 	struct sys_stat *stat_cur = get_sys_stat_cur();
@ -1154,23 +1066,11 @@ static u64 calc_starvation_factor(struct task_ctx *taskc)
 	return ratio + 1;
 }

-static int boost_lat(struct task_struct *p, struct task_ctx *taskc,
-		     struct cpu_ctx *cpuc, bool is_wakeup)
+static void boost_lat(struct task_struct *p, struct task_ctx *taskc,
+		      struct cpu_ctx *cpuc, bool is_wakeup)
 {
 	u64 starvation_ft, wait_freq_ft, wake_freq_ft;
 	u64 lat_cri_raw;
-	u16 static_prio;
-	int boost;
-
-	/*
-	 * If a task has yet to be scheduled (i.e., a freshly forked task or a
-	 * task just under sched_ext), don't boost its priority before knowing
-	 * its property.
-	 */
-	if (!have_scheduled(taskc)) {
-		boost = LAVD_LAT_PRIO_NEW;
-		goto out;
-	}

 	/*
 	 * A task is more latency-critical as its wait or wake frequencies
@ -1189,16 +1089,9 @@ static int boost_lat(struct task_struct *p, struct task_ctx *taskc,
 	/*
 	 * Wake frequency and wait frequency represent how much a task is used
 	 * for a producer and a consumer, respectively. If both are high, the
-	 * task is in the middle of a task chain. We multiply frequencies --
-	 * wait_freq * wake_freq * wake_freq -- to amplify the subtle
-	 * differences in frequencies than simple addition. Also, we square
-	 * wake_freq to prioritize scheduling of a producer task. That's
-	 * because if the scheduling of a producer task is delayed, all the
-	 * following consumer tasks are also delayed.
+	 * task is in the middle of a task chain.
 	 */
-	lat_cri_raw = wait_freq_ft *
-		      wake_freq_ft * wake_freq_ft *
-		      starvation_ft;
+	lat_cri_raw = wait_freq_ft * wake_freq_ft * starvation_ft;

 	/*
 	 * The ratio above tends to follow an exponentially skewed
@ -1211,21 +1104,7 @@ static int boost_lat(struct task_struct *p, struct task_ctx *taskc,
 	 * conversion, we mitigate the exponentially skewed distribution to
 	 * non-linear distribution.
 	 */
-	taskc->lat_cri = log2_u64(lat_cri_raw + 1);
-
-	/*
-	 * Convert @p's latency criticality to its boost priority linearly.
-	 * When a task is wakening up, boost its latency boost priority by 1.
-	 */
-	boost = map_lat_cri_to_lat_prio(taskc->lat_cri);
-	if (is_wakeup)
-		boost -= LAVD_BOOST_WAKEUP_LAT;
-
-out:
-	static_prio = get_nice_prio(p);
-	taskc->lat_prio = sum_prios_for_lat(p, static_prio, boost);
-
-	return boost;
+	taskc->lat_cri = log2_u64(lat_cri_raw + 1) + is_wakeup;
 }

 static u64 calc_virtual_deadline_delta(struct task_struct *p,
@ -1556,10 +1435,10 @@ static int comp_preemption_info(struct preemption_info *prm_a,
 	/*
 	 * Check if one's latency priority _or_ deadline is smaller or not.
 	 */
-	if ((prm_a->lat_prio < prm_b->lat_prio) ||
+	if ((prm_a->lat_cri < prm_b->lat_cri) ||
 	    (prm_a->stopping_tm_est_ns < prm_b->stopping_tm_est_ns))
 		return -1;
-	if ((prm_a->lat_prio > prm_b->lat_prio) ||
+	if ((prm_a->lat_cri > prm_b->lat_cri) ||
 	    (prm_a->stopping_tm_est_ns > prm_b->stopping_tm_est_ns))
 		return 1;
 	return 0;
@ -1593,7 +1472,7 @@ static  bool can_cpu1_kick_cpu2(struct preemption_info *prm_cpu1,
 	 * Set a CPU information
 	 */
 	prm_cpu2->stopping_tm_est_ns = cpuc2->stopping_tm_est_ns;
-	prm_cpu2->lat_prio = cpuc2->lat_prio;
+	prm_cpu2->lat_cri = cpuc2->lat_cri;
 	prm_cpu2->cpuc = cpuc2;
 	prm_cpu2->last_kick_clk = cpuc2->last_kick_clk;

@ -1613,12 +1492,8 @@ static bool is_worth_kick_other_task(struct task_ctx *taskc)
 	 * enough.
 	 */
 	struct sys_stat *stat_cur = get_sys_stat_cur();
-	bool ret;

-	ret = (taskc->lat_prio <= LAVD_PREEMPT_KICK_LAT_PRIO) &&
-	      (taskc->lat_cri >= stat_cur->thr_lat_cri);
-
-	return ret;
+	return (taskc->lat_cri >= stat_cur->thr_lat_cri);
 }

 static bool can_cpu_be_kicked(u64 now, struct cpu_ctx *cpuc)
@ -1652,7 +1527,7 @@ static struct cpu_ctx *find_victim_cpu(const struct cpumask *cpumask,
 	 */
 	prm_task.stopping_tm_est_ns = get_est_stopping_time(taskc) +
 				      LAVD_PREEMPT_KICK_MARGIN;
-	prm_task.lat_prio = taskc->lat_prio;
+	prm_task.lat_cri = taskc->lat_cri;
 	prm_task.cpuc = cpuc = get_cpu_ctx();
 	if (!cpuc) {
 		scx_bpf_error("Failed to lookup the current cpu_ctx");
@ -1819,7 +1694,7 @@ static bool try_yield_current_cpu(struct task_struct *p_run,
 	prm_run.stopping_tm_est_ns = taskc_run->last_running_clk +
 				     taskc_run->run_time_ns -
 				     LAVD_PREEMPT_TICK_MARGIN;
-	prm_run.lat_prio = taskc_run->lat_prio;
+	prm_run.lat_cri = taskc_run->lat_cri;

 	bpf_rcu_read_lock();
 	bpf_for_each(scx_dsq, p_wait, LAVD_ELIGIBLE_DSQ, 0) {
@ -1832,7 +1707,7 @@ static bool try_yield_current_cpu(struct task_struct *p_run,
 			break;

 		prm_wait.stopping_tm_est_ns = get_est_stopping_time(taskc_wait);
-		prm_wait.lat_prio = taskc_wait->lat_prio;
+		prm_wait.lat_cri = taskc_wait->lat_cri;

 		if (can_task1_kick_task2(&prm_wait, &prm_run)) {
 			/*
@ -2506,7 +2381,7 @@ void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
 	/*
 	 * Update running task's information for preemption
 	 */
-	cpuc->lat_prio = taskc->lat_prio;
+	cpuc->lat_cri = taskc->lat_cri;
 	cpuc->stopping_tm_est_ns = get_est_stopping_time(taskc);

 	/*
@ -2613,7 +2488,7 @@ static void cpu_ctx_init_online(struct cpu_ctx *cpuc, u32 cpu_id, u64 now)
 {
 	cpuc->idle_start_clk = 0;
 	cpuc->cpu_id = cpu_id;
-	cpuc->lat_prio = LAVD_LAT_PRIO_IDLE;
+	cpuc->lat_cri = 0;
 	cpuc->stopping_tm_est_ns = LAVD_TIME_INFINITY_NS;
 	WRITE_ONCE(cpuc->online_clk, now);
 	barrier();
@ -2629,7 +2504,7 @@ static void cpu_ctx_init_offline(struct cpu_ctx *cpuc, u32 cpu_id, u64 now)
 	cpuc->is_online = false;
 	barrier();

-	cpuc->lat_prio = LAVD_LAT_PRIO_IDLE;
+	cpuc->lat_cri = 0;
 	cpuc->stopping_tm_est_ns = LAVD_TIME_INFINITY_NS;
 }

@ -2690,7 +2565,7 @@ void BPF_STRUCT_OPS(lavd_update_idle, s32 cpu, bool idle)
 	 */
 	if (idle) {
 		cpuc->idle_start_clk = bpf_ktime_get_ns();
-		cpuc->lat_prio = LAVD_LAT_PRIO_IDLE;
+		cpuc->lat_cri = 0;
 		cpuc->stopping_tm_est_ns = LAVD_TIME_INFINITY_NS;
 	}
 	/*
@ -2727,7 +2602,6 @@ static void init_task_ctx(struct task_struct *p, struct task_ctx *taskc)
 	taskc->last_running_clk = now; /* for run_time_ns */
 	taskc->last_stopping_clk = now; /* for run_time_ns */
 	taskc->run_time_ns = LAVD_SLICE_MAX_NS;
-	taskc->lat_prio = get_nice_prio(p);
 	taskc->run_freq = 0;
 	taskc->greedy_ratio = 1000;
 	taskc->victim_cpu = (s32)LAVD_CPU_ID_NONE;
--- a/scheds/rust/scx_lavd/src/main.rs
+++ b/scheds/rust/scx_lavd/src/main.rs
@ -190,13 +190,13 @@ impl<'a> Scheduler<'a> {
        if mseq % 32 == 1 {
            info!(
                "| {:6} | {:7} | {:17} \
-                   | {:4} | {:4} | {:9} \
+                   | {:4} | {:4} | {:12} \
                   | {:14} | {:8} | {:7} \
-                   | {:8} | {:4} | {:7} \
-                   | {:8} | {:7} | {:9} \
-                   | {:9} | {:9} | {:9} \
+                   | {:8} | {:7} | {:8} \
+                   | {:7} | {:9} | {:9} \
+                   | {:9} | {:9} | {:8} \
                   | {:8} | {:8} | {:8} \
-                   | {:8} | {:6} | {:6} |",
+                   | {:6} | {:6} |",
                "mseq",
                "pid",
                "comm",
@ -206,8 +206,7 @@ impl<'a> Scheduler<'a> {
                "eli_ns",
                "slc_ns",
                "grdy_rt",
-                "lat_prio",
-                "lc",
+                "lat_cri",
                "avg_lc",
                "st_prio",
                "slc_bst",
@ -230,13 +229,13 @@ impl<'a> Scheduler<'a> {

        info!(
            "| {:6} | {:7} | {:17} \
-               | {:4} | {:4} | {:9} \
+               | {:4} | {:4} | {:12} \
               | {:14} | {:8} | {:7} \
-               | {:8} | {:4} | {:7} \
-               | {:8} | {:7} | {:9} \
-               | {:9} | {:9} | {:9} \
+               | {:8} | {:7} | {:8} \
+               | {:7} | {:9} | {:9} \
+               | {:9} | {:9} | {:8} \
               | {:8} | {:8} | {:8} \
-               | {:8} | {:6} | {:6} |",
+               | {:6} | {:6} |",
            mseq,
            tx.pid,
            tx_comm,
@ -246,7 +245,6 @@ impl<'a> Scheduler<'a> {
            tc.eligible_delta_ns,
            tc.slice_ns,
            tc.greedy_ratio,
-            tc.lat_prio,
            tc.lat_cri,
            tx.avg_lat_cri,
            tx.static_prio,