Merge pull request #204 from multics69/scx-lavd-runtime-runfreq

scx_lavd: directly accumulate task's runtimes for consecutive runnings
2024-11-25 04:00:24 +00:00 · 2024-03-31 16:23:29 +09:00 · 2024-03-31 16:23:29 +09:00 · 048662a692
commit 048662a692
parent 04c9e7fe9d 3a3bd2a750
2 changed files with 103 additions and 97 deletions
--- a/scheds/rust/scx_lavd/src/bpf/intf.h
+++ b/scheds/rust/scx_lavd/src/bpf/intf.h
@ -61,12 +61,11 @@ enum consts {

 	LAVD_BOOST_RANGE		= 14, /* 35% of nice range */
 	LAVD_BOOST_WAKEUP_LAT		= 1,
-	LAVD_SLICE_BOOST_MAX_PRIO	= (LAVD_SLICE_MAX_NS/LAVD_SLICE_MIN_NS),
 	LAVD_SLICE_BOOST_MAX_STEP	= 3,
 	LAVD_GREEDY_RATIO_MAX		= USHRT_MAX,

 	LAVD_ELIGIBLE_TIME_LAT_FT	= 2,
-	LAVD_ELIGIBLE_TIME_MAX		= (LAVD_TARGETED_LATENCY_NS >> 1),
+	LAVD_ELIGIBLE_TIME_MAX		= LAVD_TARGETED_LATENCY_NS,

 	LAVD_CPU_UTIL_MAX		= 1000, /* 100.0% */
 	LAVD_CPU_UTIL_INTERVAL_NS	= (100 * NSEC_PER_MSEC), /* 100 msec */
@ -121,26 +120,34 @@ struct cpu_ctx {

 struct task_ctx {
 	/*
-	 * Essential task running statistics for latency criticality calculation
+	 * Clocks when a task state transition happens for task statistics calculation
 	 */
-	u64	last_start_clk;	/* last time when scheduled in */
-	u64	last_stop_clk;	/* last time when scheduled out */
-	u64	run_time_ns;	/* average runtime per schedule */
-	u64	run_freq;	/* scheduling frequency in a second */
-	u64	last_wait_clk;	/* last time when a task waits for an event */
-	u64	wait_freq;	/* waiting frequency in a second */
-	u64	wake_freq;	/* waking-up frequency in a second */
-	u64	last_wake_clk;	/* last time when a task wakes up others */
+	u64	last_runnable_clk;	/* last time when a task wakes up others */
+	u64	last_running_clk;	/* last time when scheduled in */
+	u64	last_stopping_clk;	/* last time when scheduled out */
+	u64	last_quiescent_clk;	/* last time when a task waits for an event */

-	u64	load_actual;	/* task load derived from run_time and run_freq */
-	u64	vdeadline_delta_ns;
-	u64	eligible_delta_ns;
-	u64	slice_ns;
-	u64	greedy_ratio;
-	u64	lat_cri;
-	u16	slice_boost_prio;/* how many times a task fully consumed the slice */
-	u16	lat_prio;	/* latency priority */
-	s16	lat_boost_prio;	/* DEBUG */
+	/*
+	 * Task running statistics for latency criticality calculation
+	 */
+	u64	acc_run_time_ns;	/* accmulated runtime from runnable to quiescent state */
+	u64	run_time_ns;		/* average runtime per schedule */
+	u64	run_freq;		/* scheduling frequency in a second */
+	u64	wait_freq;		/* waiting frequency in a second */
+	u64	wake_freq;		/* waking-up frequency in a second */
+	u64	load_actual;		/* task load derived from run_time and run_freq */
+
+	/*
+	 * Task deadline and time slice
+	 */
+	u64	vdeadline_delta_ns;	/* time delta until task's virtual deadline */
+	u64	eligible_delta_ns;	/* time delta until task becomes eligible */
+	u64	slice_ns;		/* time slice */
+	u64	greedy_ratio;		/* task's overscheduling ratio compared to its nice priority */
+	u64	lat_cri;		/* calculated latency criticality */
+	u16	slice_boost_prio;	/* how many times a task fully consumed the slice */
+	u16	lat_prio;		/* latency priority */
+	s16	lat_boost_prio;		/* DEBUG */
 };

 struct task_ctx_x {
--- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
@ -1171,10 +1171,7 @@ static u64 calc_slice_share(struct task_struct *p, struct task_ctx *taskc)
 	 * scheduler tries to allocate a longer time slice.
 	 */
 	u64 share = get_task_load_ideal(p);
-	u64 slice_boost_step = min(taskc->slice_boost_prio,
-				   LAVD_SLICE_BOOST_MAX_STEP);
-
-	share += (share * slice_boost_step) / LAVD_SLICE_BOOST_MAX_STEP;
+	share += (share * taskc->slice_boost_prio) / LAVD_SLICE_BOOST_MAX_STEP;

 	return share;
 }
@ -1221,28 +1218,34 @@ static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc)
 	return slice;
 }

-static void update_stat_for_enq(struct task_struct *p, struct task_ctx *taskc,
-				struct cpu_ctx *cpuc)
+static void update_stat_for_runnable(struct task_struct *p,
+				     struct task_ctx *taskc,
+				     struct cpu_ctx *cpuc)
 {
 	/*
 	 * Reflect task's load immediately.
 	 */
 	taskc->load_actual = calc_task_load_actual(taskc);
+	taskc->acc_run_time_ns = 0;
 	cpuc->load_actual += taskc->load_actual;
 	cpuc->load_ideal  += get_task_load_ideal(p);
 }

-static void update_stat_for_run(struct task_struct *p, struct task_ctx *taskc,
-				struct cpu_ctx *cpuc)
+static void update_stat_for_running(struct task_struct *p,
+				    struct task_ctx *taskc,
+				    struct cpu_ctx *cpuc)
 {
-	u64 now, wait_period, interval;
+	u64 wait_period, interval;
+	u64 now = bpf_ktime_get_ns();
+
+	if (!have_scheduled(taskc))
+		goto clk_out;

 	/*
 	 * Since this is the start of a new schedule for @p, we update run
 	 * frequency in a second using an exponential weighted moving average.
 	 */
-	now = bpf_ktime_get_ns();
-	wait_period = now - taskc->last_stop_clk;
+	wait_period = now - taskc->last_quiescent_clk;
 	interval = taskc->run_time_ns + wait_period;
 	taskc->run_freq = calc_avg_freq(taskc->run_freq, interval);

@ -1250,51 +1253,43 @@ static void update_stat_for_run(struct task_struct *p, struct task_ctx *taskc,
 	 * Update per-CPU latency criticality information for ever-scheduled
 	 * tasks
 	 */
-	if (have_scheduled(taskc)) {
-		if (cpuc->max_lat_cri < taskc->lat_cri)
-			cpuc->max_lat_cri = taskc->lat_cri;
-		if (cpuc->min_lat_cri > taskc->lat_cri)
-			cpuc->min_lat_cri = taskc->lat_cri;
-		cpuc->sum_lat_cri += taskc->lat_cri;
-		cpuc->sched_nr++;
-	}
+	if (cpuc->max_lat_cri < taskc->lat_cri)
+		cpuc->max_lat_cri = taskc->lat_cri;
+	if (cpuc->min_lat_cri > taskc->lat_cri)
+		cpuc->min_lat_cri = taskc->lat_cri;
+	cpuc->sum_lat_cri += taskc->lat_cri;
+	cpuc->sched_nr++;

+clk_out:
 	/*
 	 * Update task state when starts running.
 	 */
-	taskc->last_start_clk = now;
+	taskc->last_running_clk = now;
 }

-static void update_stat_for_stop(struct task_struct *p, struct task_ctx *taskc,
-				 struct cpu_ctx *cpuc)
+static void update_stat_for_stopping(struct task_struct *p,
+				     struct task_ctx *taskc,
+				     struct cpu_ctx *cpuc)
 {
-	u64 now, run_time_ns, run_time_boosted_ns;
-
-	now = bpf_ktime_get_ns();
+	u64 now = bpf_ktime_get_ns();

 	/*
-	 * Adjust slice boost for the task's next schedule. Note that the
-	 * updating slice_boost_prio should be done before updating
-	 * run_time_boosted_ns, since the run_time_boosted_ns calculation
-	 * requires updated slice_boost_prio.
+	 * Update task's run_time. When a task is scheduled consecutively
+	 * without ops.quiescent(), the task's runtime is accumulated for
+	 * statistics. Suppose a task is scheduled 2ms, 2ms, and 2ms with the
+	 * time slice exhausted. If 6ms of time slice was given in the first
+	 * place, the task will entirely consume the time slice. Hence, the
+	 * consecutive execution is accumulated and reflected in the
+	 * calculation of runtime statistics.
 	 */
-	taskc->last_stop_clk = now;
-	adjust_slice_boost(cpuc, taskc);
-
-	/*
-	 * Update task's run_time. If a task got slice-boosted -- in other
-	 * words, its time slices have been fully consumed multiple times,
-	 * stretch the measured runtime according to the slice_boost_prio.
-	 * The stretched runtime more accurately reflects the actual runtime
-	 * per schedule as if a large enough time slice was given in the first
-	 * place.
-	 */
-	run_time_ns = now - taskc->last_start_clk;
-	run_time_boosted_ns = run_time_ns * (1 + taskc->slice_boost_prio);
-	taskc->run_time_ns = calc_avg(taskc->run_time_ns, run_time_boosted_ns);
+	taskc->acc_run_time_ns += now - taskc->last_running_clk;
+	taskc->run_time_ns = calc_avg(taskc->run_time_ns,
+				      taskc->acc_run_time_ns);
+	taskc->last_stopping_clk = now;
 }

-static void update_stat_for_quiescent(struct task_struct *p, struct task_ctx *taskc,
+static void update_stat_for_quiescent(struct task_struct *p,
+				      struct task_ctx *taskc,
 				      struct cpu_ctx *cpuc)
 {
 	/*
@ -1463,17 +1458,17 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
 	struct task_ctx *p_taskc, *waker_taskc;
 	u64 now, interval;

-	cpuc = get_cpu_ctx();
-	p_taskc = get_task_ctx(p);
-	if (!cpuc || !p_taskc)
-		return;
-
 	/*
 	 * Add task load based on the current statistics regardless of a target
 	 * rq. Statistics will be adjusted when more accurate statistics become
 	 * available (ops.running).
 	 */
-	update_stat_for_enq(p, p_taskc, cpuc);
+	cpuc = get_cpu_ctx();
+	p_taskc = get_task_ctx(p);
+	if (!cpuc || !p_taskc)
+		return;
+
+	update_stat_for_runnable(p, p_taskc, cpuc);

 	/*
 	 * When a task @p is wakened up, the wake frequency of its waker task
@ -1494,28 +1489,25 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
 	}

 	now = bpf_ktime_get_ns();
-	interval = now - waker_taskc->last_wake_clk;
+	interval = now - waker_taskc->last_runnable_clk;
 	waker_taskc->wake_freq = calc_avg_freq(waker_taskc->wake_freq, interval);
-	waker_taskc->last_wake_clk = now;
+	waker_taskc->last_runnable_clk = now;
 }

 void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
 {
-	struct task_ctx *taskc;
 	struct cpu_ctx *cpuc;
+	struct task_ctx *taskc;

 	/*
-	 * Update task statistics then adjust task load based on the update.
+	 * Update task statistics
 	 */
-	taskc = get_task_ctx(p);
-	if (!taskc)
-		return;
-
 	cpuc = get_cpu_ctx();
-	if (!cpuc)
+	taskc = get_task_ctx(p);
+	if (!cpuc || !taskc)
 		return;

-	update_stat_for_run(p, taskc, cpuc);
+	update_stat_for_running(p, taskc, cpuc);

 	/*
 	 * Calcualte task's time slice based on updated load.
@ -1535,12 +1527,12 @@ static bool slice_fully_consumed(struct cpu_ctx *cpuc, struct task_ctx *taskc)
 	/*
 	 * Sanity check just to make sure the runtime is positive.
 	 */
-	if (taskc->last_stop_clk < taskc->last_start_clk) {
+	if (taskc->last_stopping_clk < taskc->last_running_clk) {
 		scx_bpf_error("run_time_ns is negative: 0x%llu - 0x%llu",
-			      taskc->last_stop_clk, taskc->last_start_clk);
+			      taskc->last_stopping_clk, taskc->last_running_clk);
 	}

-	run_time_ns = taskc->last_stop_clk - taskc->last_start_clk;
+	run_time_ns = taskc->last_stopping_clk - taskc->last_running_clk;

 	return run_time_ns >= taskc->slice_ns;
 }
@ -1553,7 +1545,7 @@ static void adjust_slice_boost(struct cpu_ctx *cpuc, struct task_ctx *taskc)
 	 * fully consumed, decrease the slice boost priority by half.
 	 */
 	if (slice_fully_consumed(cpuc, taskc)) {
-		if (taskc->slice_boost_prio < LAVD_SLICE_BOOST_MAX_PRIO)
+		if (taskc->slice_boost_prio < LAVD_SLICE_BOOST_MAX_STEP)
 			taskc->slice_boost_prio++;
 	}
 	else {
@ -1568,17 +1560,19 @@ void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)
 	struct task_ctx *taskc;

 	/*
-	 * Reduce the task load.
+	 * Update task statistics
 	 */
 	cpuc = get_cpu_ctx();
-	if (!cpuc)
-		return;
-
 	taskc = get_task_ctx(p);
-	if (!taskc)
+	if (!cpuc || !taskc)
 		return;

-	update_stat_for_stop(p, taskc, cpuc);
+	update_stat_for_stopping(p, taskc, cpuc);
+
+	/*
+	 * Adjust slice boost for the task's next schedule.
+	 */
+	adjust_slice_boost(cpuc, taskc);
 }

 void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
@ -1587,6 +1581,9 @@ void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
 	struct task_ctx *taskc;
 	u64 now, interval;

+	/*
+	 * Substract task load from the current CPU's load.
+	 */
 	cpuc = get_cpu_ctx();
 	taskc = get_task_ctx(p);
 	if (!cpuc || !taskc)
@ -1606,9 +1603,9 @@ void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
 	 * When a task @p goes to sleep, its associated wait_freq is updated.
 	 */
 	now = bpf_ktime_get_ns();
-	interval = now - taskc->last_wait_clk;
+	interval = now - taskc->last_quiescent_clk;
 	taskc->wait_freq = calc_avg_freq(taskc->wait_freq, interval);
-	taskc->last_wait_clk = now;
+	taskc->last_quiescent_clk = now;
 }

 void BPF_STRUCT_OPS(lavd_cpu_online, s32 cpu)
@ -1712,14 +1709,16 @@ s32 BPF_STRUCT_OPS(lavd_init_task, struct task_struct *p,


 	/*
-	 * Initialize @p's context.
+	 * Initialize @p's context with the current clock and default load.
 	 */
 	now = bpf_ktime_get_ns();
-	taskc->last_start_clk = now;
-	taskc->last_stop_clk = now;
-	taskc->last_wait_clk = now;
-	taskc->last_wake_clk = now;
+	taskc->last_runnable_clk = now;
+	taskc->last_running_clk = now;
+	taskc->last_stopping_clk = now;
+	taskc->last_quiescent_clk = now;
 	taskc->greedy_ratio = 1000;
+	taskc->run_time_ns = LAVD_LC_RUNTIME_MAX;
+	taskc->run_freq = 1;

 	/*
 	 * When a task is forked, we immediately reflect changes to the current