Merge pull request #818 from multics69/lavd-tuning

scx_lavd: add missing reset_lock_futex_boost()
2024-11-24 10:30:24 +00:00 · 2024-10-20 01:41:54 +00:00 · 2024-10-20 01:41:54 +00:00 · bf1b014d63
commit bf1b014d63
parent e72e5ce0f4 2fd395bbbf
6 changed files with 39 additions and 58 deletions
--- a/scheds/rust/scx_lavd/src/bpf/intf.h
+++ b/scheds/rust/scx_lavd/src/bpf/intf.h
@ -67,7 +67,6 @@ struct sys_stat {
 	volatile u64	last_update_clk;
 	volatile u64	util;		/* average of the CPU utilization */

-	volatile u64	load_actual;	/* average actual load of runnable tasks */
 	volatile u64	avg_svc_time;	/* average service time per task */
 	volatile u64	nr_queued_task;

@ -117,7 +116,6 @@ struct task_ctx {
 	u64	wait_freq;		/* waiting frequency in a second */

 	u64	wake_freq;		/* waking-up frequency in a second */
-	u64	load_actual;		/* task load derived from run_time and run_freq */
 	u64	svc_time;		/* total CPU time consumed for this task */

 	/*
@ -132,9 +130,10 @@ struct task_ctx {
 	u32	lat_cri_waker;		/* waker's latency criticality */
 	volatile s32 victim_cpu;
 	u16	slice_boost_prio;	/* how many times a task fully consumed the slice */
-	u8	wakeup_ft;		/* regular wakeup = 1, sync wakeup = 2 */
 	volatile s16 lock_boost;	/* lock boost count */
 	volatile s16 futex_boost;	/* futex boost count */
+	volatile u8 need_lock_boost;	/* need to boost lock for deadline calculation */
+	u8	wakeup_ft;		/* regular wakeup = 1, sync wakeup = 2 */
 	volatile u32 *futex_uaddr;	/* futex uaddr */

 	/*
@ -176,7 +175,7 @@ enum {
 };

 enum {
-       LAVD_MSG_TASKC		= 0x1,
+       LAVD_MSG_TASKC		= 0x1
 };

 struct introspec {
--- a/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h
+++ b/scheds/rust/scx_lavd/src/bpf/lavd.bpf.h
@ -90,8 +90,6 @@ struct cpu_ctx {
 	/*
 	 * Information used to keep track of load
 	 */
-	volatile u64	load_actual;	/* actual load of runnable tasks */
-	volatile u64	load_run_time_ns; /* total runtime of runnable tasks */
 	volatile u64	tot_svc_time;	/* total service time on a CPU */
 	volatile u64	last_kick_clk;	/* when the CPU was kicked */

--- a/scheds/rust/scx_lavd/src/bpf/lock.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/lock.bpf.c
@ -93,11 +93,17 @@ static void dec_futex_boost(u32 *uaddr)
 	try_dec_futex_boost(taskc, cpuc, uaddr);
 }

-static void reset_lock_futex_boost(struct task_ctx *taskc)
+static void reset_lock_futex_boost(struct task_ctx *taskc, struct cpu_ctx *cpuc)
 {
+	if (is_lock_holder(taskc)) {
+		taskc->need_lock_boost = true;
+		cpuc->nr_lhp++;
+	}
+
 	taskc->lock_boost = 0;
 	taskc->futex_boost = 0;
 	taskc->futex_uaddr = NULL;
+	cpuc->lock_holder = false;
 }

 /**
--- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
@ -307,22 +307,18 @@ static u64 calc_lat_cri(struct task_struct *p, struct task_ctx *taskc,
 	if (is_kernel_task(p))
 		lat_cri += LAVD_LC_KTHREAD_FT;

+	/*
+	 * Reset task's lock and futex boost count
+	 * for a lock holder to be boosted only once.
+	 */
+	reset_lock_futex_boost(taskc, cpuc_cur);
+
 	/*
 	 * Prioritize a lock holder for faster system-wide forward progress.
 	 */
-	if (is_lock_holder(taskc)) {
+	if (taskc->need_lock_boost) {
+		taskc->need_lock_boost = false;
 		lat_cri += (lat_cri * LAVD_LC_LOCK_HOLDER_FT) / 1000;
-
-		/*
-		 * Update statistics.
-		 */
-		cpuc_cur->nr_lhp++;
-
-		/*
-		 * Reset task's lock and futex boost count
-		 * for a lock holder to be boosted only once.
-		 */
-		reset_lock_futex_boost(taskc);
 	}

 	/*
@ -356,16 +352,6 @@ static void calc_virtual_deadline_delta(struct task_struct *p,
 	taskc->vdeadline_delta_ns = deadline;
 }

-static u64 calc_task_load_actual(struct task_ctx *taskc)
-{
-	/*
-	 * The actual load is the CPU time consumed in a time interval, which
-	 * can be calculated from task's average run time and frequency.
-	 */
-	const s64 interval_adj = LAVD_TIME_ONE_SEC / LAVD_SYS_STAT_INTERVAL_NS;
-	return (taskc->run_time_ns * taskc->run_freq) / interval_adj;
-}
-
 static u64 clamp_time_slice_ns(u64 slice)
 {
 	if (slice < LAVD_SLICE_MIN_NS)
@ -448,10 +434,7 @@ static void update_stat_for_runnable(struct task_struct *p,
 	/*
 	 * Reflect task's load immediately.
 	 */
-	taskc->load_actual = calc_task_load_actual(taskc);
 	taskc->acc_run_time_ns = 0;
-	cpuc->load_actual += taskc->load_actual;
-	cpuc->load_run_time_ns += clamp_time_slice_ns(taskc->run_time_ns);
 }

 static void advance_cur_logical_clk(struct task_ctx *taskc)
@ -612,13 +595,6 @@ static void update_stat_for_stopping(struct task_struct *p,
 	 */
 	taskc->lat_cri_waker = 0;

-	/*
-	 * After getting updated task's runtime, compensate CPU's total
-	 * runtime.
-	 */
-	cpuc->load_run_time_ns = cpuc->load_run_time_ns -
-				 clamp_time_slice_ns(old_run_time_ns) +
-				 clamp_time_slice_ns(taskc->run_time_ns);
 	/*
 	 * Increase total service time of this CPU.
 	 */
@ -629,6 +605,12 @@ static void update_stat_for_stopping(struct task_struct *p,
 	 */
 	if (READ_ONCE(cur_svc_time) < taskc->svc_time)
 		WRITE_ONCE(cur_svc_time, taskc->svc_time);
+
+	/*
+	 * Reset task's lock and futex boost count
+	 * for a lock holder to be boosted only once.
+	 */
+	reset_lock_futex_boost(taskc, cpuc);
 }

 static void update_stat_for_quiescent(struct task_struct *p,
@ -636,11 +618,10 @@ static void update_stat_for_quiescent(struct task_struct *p,
 				      struct cpu_ctx *cpuc)
 {
 	/*
-	 * When quiescent, reduce the per-CPU task load. Per-CPU task load will
-	 * be aggregated periodically at update_sys_cpu_load().
+	 * Reset task's lock and futex boost count
+	 * for a lock holder to be boosted only once.
 	 */
-	cpuc->load_actual -= taskc->load_actual;
-	cpuc->load_run_time_ns -= clamp_time_slice_ns(taskc->run_time_ns);
+	reset_lock_futex_boost(taskc, cpuc);
 }

 static bool match_task_core_type(struct task_ctx *taskc,
@ -1174,8 +1155,7 @@ void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)
 	u64 now = bpf_ktime_get_ns();
 	struct cpu_ctx *cpuc;
 	struct bpf_cpumask *active, *ovrflw;
-	struct task_struct *p;
-	struct task_ctx *taskc;
+	struct task_struct *p, *taskc;
 	u64 dsq_id = 0;
 	bool try_consume = false;

@ -1289,20 +1269,27 @@ consume_out:
 	 */
 	if (!try_consume)
 		return;
+
 	if (consume_task(cpu, cpuc, now))
 		return;

 	/*
-	 * If no other task is consumed, the scheduler will keep continue to
-	 * run the prev task, so let's re-assigne its time slice.
+	 * Reset prev task's lock and futex boost count
+	 * for a lock holder to be boosted only once.
 	 */
-	if (prev && (prev->scx.flags & SCX_TASK_QUEUED)) {
+	if (prev) {
 		taskc = get_task_ctx(prev);
 		if (!taskc) {
 			scx_bpf_error("Failed to look up task context");
 			return;
 		}
-		prev->scx.slice = calc_time_slice(prev, taskc);
+		reset_lock_futex_boost(taskc, cpuc);
+
+		/*
+		 * If nothing to run, continue to run the previous task.
+		 */
+		if (prev->scx.flags & SCX_TASK_QUEUED)
+			prev->scx.slice = calc_time_slice(prev, taskc);
 	}
 }

--- a/scheds/rust/scx_lavd/src/bpf/sys_stat.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/sys_stat.bpf.c
@ -30,10 +30,8 @@ struct sys_stat_ctx {
 	u64		duration_total;
 	u64		idle_total;
 	u64		compute_total;
-	u64		load_actual;
 	u64		tot_svc_time;
 	u64		nr_queued_task;
-	u64		load_run_time_ns;
 	s32		max_lat_cri;
 	s32		avg_lat_cri;
 	u64		sum_lat_cri;
@ -83,8 +81,6 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
 		/*
 		 * Accumulate cpus' loads.
 		 */
-		c->load_actual += cpuc->load_actual;
-		c->load_run_time_ns += cpuc->load_run_time_ns;
 		c->tot_svc_time += cpuc->tot_svc_time;
 		cpuc->tot_svc_time = 0;

@ -233,8 +229,6 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
 	struct sys_stat *stat_cur = c->stat_cur;
 	struct sys_stat *stat_next = c->stat_next;

-	stat_next->load_actual =
-		calc_avg(stat_cur->load_actual, c->load_actual);
 	stat_next->util =
 		calc_avg(stat_cur->util, c->new_util);

--- a/scripts/dsq_lat.bt
+++ b/scripts/dsq_lat.bt
@ -51,9 +51,6 @@ rawtracepoint:sched_switch
 }

 interval:s:1 {
-    $scx_ops = kaddr("scx_ops");
-    $ops = (struct sched_ext_ops*)$scx_ops;
-    printf("scheduler: %s\n", $ops->name);
    print(@avg_lat);
    print(@usec_hist);
    print(@dsq_lat);