scx_bpfland: rework lowlatency mode to adjust tasks priority

Rework lowlatency mode as following: - introduce task dynamic priority: task weight multiplied by the average amount of voluntary context switches - use dynamic priority to determine task's vruntime (instead of the static task's weight) - task's minimum vruntime is evaluated in function of the dynamic priority (tasks with a higher dynamic priority can have a smaller vruntime compared to tasks with a lower dynamic priority) The dynamic priority allows to maintain a good system responsiveness also without applying the classification of tasks in "interactive" and "regular", therefore in lowlatency mode only the shared DSQ will be used (priority DSQ is disabled). Using a separate priority queue to dispatch "interactive" tasks makes the scheduler less fair, allowing latency-sensitive tasks to be prioritized even when there is a high number of tasks in the system (e.g., `stress-ng -c 1024` or similar scenarios), where relying solely on dynamic priority may not be sufficient. On the other hand, disabling the classification of "interactive" tasks results in a fairer scheduler and more predictable performance, making it better suited for soft real-time applications (e.g, audio and multimedia). Therefore, the --lowlatency option is retained to allow users to choose between more predictable performance (by disabling the interactive task classification) or a more responsive system (default). Signed-off-by: Andrea Righi <andrea.righi@linux.dev>
2024-11-25 02:50:24 +00:00 · 2024-10-09 15:15:33 +02:00 · 2024-10-09 15:15:33 +02:00 · 4d68133f3b
commit 4d68133f3b
parent d336892c71
3 changed files with 152 additions and 175 deletions
--- a/scheds/rust/scx_bpfland/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_bpfland/src/bpf/main.bpf.c
@ -26,15 +26,20 @@ const volatile bool debug;
 */
 #define SHARED_DSQ	1

+/*
+ * Maximum multiplier for the dynamic task priority.
+ */
+#define MAX_LATENCY_WEIGHT	1000
+
 /*
 * Default task time slice.
 */
-const volatile u64 slice_ns = 5ULL * NSEC_PER_MSEC;
+const volatile u64 slice_max = 5ULL * NSEC_PER_MSEC;

 /*
 * Time slice used when system is over commissioned.
 */
-const volatile u64 slice_ns_min = 500ULL * NSEC_PER_USEC;
+const volatile u64 slice_min = 1ULL * NSEC_PER_MSEC;

 /*
 * Maximum time slice lag.
@ -43,7 +48,7 @@ const volatile u64 slice_ns_min = 500ULL * NSEC_PER_USEC;
 * tasks at the cost of making regular and newly created tasks less responsive
 * (0 = disabled).
 */
-const volatile s64 slice_ns_lag;
+const volatile s64 slice_lag = 5ULL * NSEC_PER_MSEC;

 /*
 * When enabled always dispatch all kthreads directly.
@ -56,23 +61,13 @@ const volatile s64 slice_ns_lag;
 const volatile bool local_kthreads;

 /*
- * Boost interactive tasks, by shortening their deadline as a function of their
- * average amount of voluntary context switches.
+ * With lowlatency enabled, instead of classifying tasks as interactive or
+ * non-interactive, they all get a dynamic priority, which is adjusted in
+ * function of their average rate of voluntary context switches.
 *
- * Tasks are already classified as interactive if their average amount of
- * context switches exceeds nvcsw_avg_thresh, which grants them higher
- * priority.
- *
- * When this option is enabled, tasks will receive a deadline boost in addition
- * to their interactive vs. regular classification, with the boost being
- * proportional to their average number of context switches.
- *
- * This ensures that within the main scheduling classes (interactive and
- * regular), tasks that more frequently voluntarily yield the CPU receive an
- * even higher priority.
- *
- * This option is particularly useful in soft real-time scenarios, such as
- * audio processing, multimedia, etc.
+ * This option guarantess less spikey behavior and it can be particularly
+ * useful in soft real-time scenarios, such as audio processing, multimedia,
+ * etc.
 */
 const volatile bool lowlatency;

@ -108,7 +103,7 @@ volatile s64 cpufreq_perf_lvl;
 *  consuming a task, the scheduler will be forced to consume a task from the
 *  corresponding DSQ.
 */
-const volatile u64 starvation_thresh_ns = 5ULL * NSEC_PER_MSEC;
+const volatile u64 starvation_thresh_ns = 5000ULL * NSEC_PER_MSEC;
 static u64 starvation_shared_ts;

 /*
@ -120,7 +115,12 @@ volatile u64 nr_kthread_dispatches, nr_direct_dispatches,
 /*
 * Amount of currently running tasks.
 */
-volatile u64 nr_running, nr_waiting, nr_interactive, nr_online_cpus;
+volatile u64 nr_running, nr_interactive, nr_shared_waiting, nr_prio_waiting;
+
+/*
+ * Amount of online CPUs.
+ */
+volatile u64 nr_online_cpus;

 /*
 * Exit information.
@ -193,18 +193,17 @@ struct task_ctx {
 	 */
 	u64 nvcsw;
 	u64 nvcsw_ts;
-	u64 avg_nvcsw;
+
+	/*
+	 * Task's latency priority.
+	 */
+	u64 lat_weight;

 	/*
 	 * Task's average used time slice.
 	 */
 	u64 avg_runtime;

-	/*
-	 * Last task's execution time.
-	 */
-	u64 last_running;
-
 	/*
 	 * Task's deadline.
 	 */
@ -233,15 +232,6 @@ struct task_ctx *try_lookup_task_ctx(const struct task_struct *p)
 					(struct task_struct *)p, 0, 0);
 }

-/*
- * Return true if interactive tasks classification via voluntary context
- * switches is enabled, false otherwise.
- */
-static bool is_nvcsw_enabled(void)
-{
-	return !!nvcsw_max_thresh;
-}
-
 /*
 * Compare two vruntime values, returns true if the first value is less than
 * the second one.
@ -253,19 +243,6 @@ static inline bool vtime_before(u64 a, u64 b)
 	return (s64)(a - b) < 0;
 }

-/*
- * Return true if the task is interactive, false otherwise.
- */
-static bool is_task_interactive(struct task_struct *p)
-{
-	struct task_ctx *tctx;
-
-	tctx = try_lookup_task_ctx(p);
-	if (!tctx)
-		return false;
-	return tctx->is_interactive;
-}
-
 /*
 * Return true if the target task @p is a kernel thread.
 */
@ -313,11 +290,46 @@ static u64 calc_avg_clamp(u64 old_val, u64 new_val, u64 low, u64 high)
 }

 /*
- * Return a value inversely proportional to a weight.
+ * Return the dynamic priority multiplier (only applied in lowlatency mode).
+ *
+ * The multiplier is evaluated in function of the task's average rate of
+ * voluntary context switches per second.
 */
-static u64 scale_inverse_fair(u64 value, u64 weight)
+static u64 task_dyn_prio(struct task_struct *p)
 {
-	return value * 100 / weight;
+	struct task_ctx *tctx;
+
+	if (!lowlatency)
+		return 1;
+	tctx = try_lookup_task_ctx(p);
+	if (!tctx)
+		return 1;
+	return MAX(tctx->lat_weight, 1);
+}
+
+/*
+ * Return task's dynamic priority.
+ */
+static u64 task_prio(struct task_struct *p)
+{
+	return p->scx.weight * task_dyn_prio(p);
+}
+
+/*
+ * Return the task's allowed lag: used to determine how early its vruntime can
+ * be.
+ */
+static u64 task_lag(struct task_struct *p)
+{
+	return slice_lag * task_prio(p) / 100;
+}
+
+/*
+ * Return a value inversely proportional to the task's weight.
+ */
+static u64 scale_inverse_fair(struct task_struct *p, u64 value)
+{
+	return value * 100 / task_prio(p);
 }

 /*
@ -326,41 +338,19 @@ static u64 scale_inverse_fair(u64 value, u64 weight)
 */
 static s64 task_compute_dl(struct task_struct *p ,struct task_ctx *tctx)
 {
-	/*
-	 * The amount of voluntary context switches contributes to determine
-	 * the task's priority.
-	 */
-	u64 task_prio = p->scx.weight + tctx->avg_nvcsw;
-
-	/*
-	 * If not in "lowlatency" mode, always apply a pure vruntime based
-	 * scheduling.
-	 */
-	if (!lowlatency)
-		return 0;
-
-	/*
-	 * If the task has not ran during the previous slice_ns period, use its
-	 * vruntime as deadline to give it a priority boost. This allows to
-	 * speed up tasks that are mostly sleeping and they suddenly need to
-	 * react fast.
-	 */
-	if (vtime_before(tctx->last_running + slice_ns, bpf_ktime_get_ns()))
-		return 0;
-
 	/*
 	 * Return the deadline as a function of the average runtime and the
 	 * evaluated task's dynamic priority.
 	 */
-	return scale_inverse_fair(tctx->avg_runtime, task_prio);
+	return scale_inverse_fair(p, tctx->avg_runtime);
 }

 /*
- * Return task's evaluated deadline.
+ * Return task's evaluated vruntime.
 */
 static inline u64 task_deadline(struct task_struct *p)
 {
-	u64 min_vruntime = vtime_now - slice_ns_lag;
+	u64 min_vruntime = vtime_now - task_lag(p);
 	struct task_ctx *tctx;

 	tctx = try_lookup_task_ctx(p);
@ -368,16 +358,7 @@ static inline u64 task_deadline(struct task_struct *p)
 		return min_vruntime;

 	/*
-	 * Limit the vruntime to (vtime_now - slice_ns_lag) to avoid
-	 * excessively penalizing tasks.
-	 *
-	 * A positive slice_ns_lag can enhance vruntime scheduling
-	 * effectiveness, but it may lead to more "spikey" performance as tasks
-	 * could remain in the queue for too long.
-	 *
-	 * Instead, a negative slice_ns_lag can result in more consistent
-	 * performance (less spikey), smoothing the reordering of the vruntime
-	 * scheduling and making the scheduler closer to a FIFO.
+	 * Limit the vruntime to to avoid excessively penalizing tasks.
 	 */
 	if (vtime_before(p->scx.dsq_vtime, min_vruntime)) {
 		p->scx.dsq_vtime = min_vruntime;
@ -387,36 +368,36 @@ static inline u64 task_deadline(struct task_struct *p)
 	return tctx->deadline;
 }

-/*
- * Return the amount of tasks waiting to be dispatched.
- */
-static u64 nr_tasks_waiting(void)
-{
-	return scx_bpf_dsq_nr_queued(PRIO_DSQ) +
-	       scx_bpf_dsq_nr_queued(SHARED_DSQ);
-}
-
 /*
 * Evaluate task's time slice in function of the total amount of tasks that are
 * waiting to be dispatched and the task's weight.
 */
 static inline void task_refill_slice(struct task_struct *p)
 {
-	u64 slice;
+	u64 curr_prio_waiting = scx_bpf_dsq_nr_queued(PRIO_DSQ);
+	u64 curr_shared_waiting = scx_bpf_dsq_nr_queued(SHARED_DSQ);
+	u64 scale_factor;

 	/*
 	 * Refresh the amount of waiting tasks to get a more accurate scaling
 	 * factor for the time slice.
 	 */
-	nr_waiting = (nr_waiting + nr_tasks_waiting()) / 2;
+	nr_prio_waiting = calc_avg(nr_prio_waiting, curr_prio_waiting);
+	nr_shared_waiting = calc_avg(nr_shared_waiting, curr_shared_waiting);

-	slice = slice_ns / (nr_waiting + 1);
-	p->scx.slice = CLAMP(slice, slice_ns_min, slice_ns);
+	/*
+	 * Scale the time slice of an inversely proportional factor of the
+	 * total amount of tasks that are waiting (use a more immediate metric
+	 * in lowlatency mode and an average in normal mode).
+	 */
+	if (lowlatency)
+		scale_factor = curr_shared_waiting + 1;
+	else
+		scale_factor = nr_prio_waiting + nr_shared_waiting + 1;
+
+	p->scx.slice = CLAMP(slice_max / scale_factor, slice_min, slice_max);
 }

-/*
- * Return true if priority DSQ is congested, false otherwise.
- */
 static bool is_prio_congested(void)
 {
 	return scx_bpf_dsq_nr_queued(PRIO_DSQ) > nr_online_cpus * 4;
@ -439,7 +420,7 @@ static void handle_sync_wakeup(struct task_struct *p)
 	 * the tasks that are already classified as interactive.
 	 */
 	tctx = try_lookup_task_ctx(p);
-	if (tctx && is_nvcsw_enabled() && !is_prio_congested())
+	if (tctx && !is_prio_congested())
 		tctx->is_interactive = true;
 }

@ -738,8 +719,13 @@ static void kick_task_cpu(struct task_struct *p)
 */
 void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
 {
+	struct task_ctx *tctx;
 	s32 dsq_id;

+	tctx = try_lookup_task_ctx(p);
+	if (!tctx)
+		return;
+
 	/*
 	 * Per-CPU kthreads are critical for system responsiveness so make sure
 	 * they are dispatched before any other task.
@ -757,12 +743,10 @@ void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
 	 * Dispatch interactive tasks to the priority DSQ and regular tasks to
 	 * the shared DSQ.
 	 *
-	 * However, avoid queuing too many tasks to the priority DSQ: if we
-	 * have a storm of interactive tasks (more than 4x the amount of CPUs
-	 * that can consume them) we can just dispatch them to the shared DSQ
-	 * and simply rely on the vruntime logic.
+	 * When lowlatency is enabled, the separate priority DSQ is disabled,
+	 * so in this case always dispatch to the shared DSQ.
 	 */
-	if (is_task_interactive(p)) {
+	if (!lowlatency && tctx->is_interactive) {
 		dsq_id = PRIO_DSQ;
 		__sync_fetch_and_add(&nr_prio_dispatches, 1);
 	} else {
@ -863,7 +847,7 @@ void BPF_STRUCT_OPS(bpfland_dispatch, s32 cpu, struct task_struct *prev)
 * Scale target CPU frequency based on the performance level selected
 * from user-space and the CPU utilization.
 */
-static void update_cpuperf_target(struct task_struct *p)
+static void update_cpuperf_target(struct task_struct *p, struct task_ctx *tctx)
 {
 	u64 now = bpf_ktime_get_ns();
 	s32 cpu = scx_bpf_task_cpu(p);
@ -882,7 +866,7 @@ static void update_cpuperf_target(struct task_struct *p)
 	/*
 	 * Auto mode: always tset max performance for interactive tasks.
 	 */
-	if (is_task_interactive(p)) {
+	if (tctx->is_interactive) {
 		scx_bpf_cpuperf_set(cpu, SCX_CPUPERF_ONE);
 		return;
 	}
@ -916,46 +900,28 @@ void BPF_STRUCT_OPS(bpfland_running, struct task_struct *p)
 {
 	struct task_ctx *tctx;

+	__sync_fetch_and_add(&nr_running, 1);
+
 	/*
 	 * Refresh task's time slice immediately before it starts to run on its
 	 * assigned CPU.
 	 */
 	task_refill_slice(p);

+	tctx = try_lookup_task_ctx(p);
+	if (!tctx)
+		return;
+
 	/*
 	 * Adjust target CPU frequency before the task starts to run.
 	 */
-	update_cpuperf_target(p);
+	update_cpuperf_target(p, tctx);

-	tctx = try_lookup_task_ctx(p);
-	if (tctx) {
-		/*
-		 * Update CPU interactive state.
-		 */
-		if (tctx->is_interactive)
-			__sync_fetch_and_add(&nr_interactive, 1);
-
-		/*
-		 * Update task's running timestamp.
-		 */
-		tctx->last_running = bpf_ktime_get_ns();
-	}
-
-	__sync_fetch_and_add(&nr_running, 1);
-}
-
-static void update_task_interactive(struct task_ctx *tctx)
-{
 	/*
-	 * Classify the task based on the average of voluntary context
-	 * switches.
-	 *
-	 * If the task has an average greater than the global average
-	 * (nvcsw_avg_thresh) it is classified as interactive, otherwise the
-	 * task is classified as regular.
+	 * Update CPU interactive state.
 	 */
-	if (is_nvcsw_enabled())
-		tctx->is_interactive = tctx->avg_nvcsw >= nvcsw_avg_thresh;
+	if (tctx->is_interactive)
+		__sync_fetch_and_add(&nr_interactive, 1);
 }

 /*
@ -964,7 +930,7 @@ static void update_task_interactive(struct task_ctx *tctx)
 */
 void BPF_STRUCT_OPS(bpfland_stopping, struct task_struct *p, bool runnable)
 {
-	u64 now = bpf_ktime_get_ns(), task_slice;
+	u64 now = bpf_ktime_get_ns(), slice;
 	s32 cpu = scx_bpf_task_cpu(p);
 	s64 delta_t;
 	struct cpu_ctx *cctx;
@ -986,22 +952,23 @@ void BPF_STRUCT_OPS(bpfland_stopping, struct task_struct *p, bool runnable)
 	/*
 	 * Update task's average runtime.
 	 */
-	task_slice = p->se.sum_exec_runtime - tctx->sum_exec_runtime;
+	slice = p->se.sum_exec_runtime - tctx->sum_exec_runtime;
+	if (lowlatency)
+		slice = CLAMP(slice, slice_min, slice_max);
 	tctx->sum_exec_runtime = p->se.sum_exec_runtime;
-	tctx->avg_runtime = calc_avg(tctx->avg_runtime, task_slice);
+	tctx->avg_runtime = calc_avg(tctx->avg_runtime, slice);

 	/*
-	 * Update task vruntime and deadline, charging the weighted used time
-	 * slice.
+	 * Update task vruntime charging the weighted used time slice.
 	 */
-	task_slice = scale_inverse_fair(task_slice, p->scx.weight);
-	p->scx.dsq_vtime += task_slice;
+	slice = scale_inverse_fair(p, slice);
+	p->scx.dsq_vtime += slice;
 	tctx->deadline = p->scx.dsq_vtime + task_compute_dl(p, tctx);

 	/*
 	 * Update global vruntime.
 	 */
-	vtime_now += task_slice;
+	vtime_now += slice;

 	/*
 	 * Refresh voluntary context switch metrics.
@ -1009,23 +976,25 @@ void BPF_STRUCT_OPS(bpfland_stopping, struct task_struct *p, bool runnable)
 	 * Evaluate the average number of voluntary context switches per second
 	 * using an exponentially weighted moving average, see calc_avg().
 	 */
-	if (!lowlatency && !is_nvcsw_enabled())
-		return;
 	delta_t = (s64)(now - tctx->nvcsw_ts);
 	if (delta_t > NSEC_PER_SEC) {
 		u64 delta_nvcsw = p->nvcsw - tctx->nvcsw;
 		u64 avg_nvcsw = delta_nvcsw * NSEC_PER_SEC / delta_t;
+		u64 max_lat_weight = lowlatency ? MAX_LATENCY_WEIGHT :
+					MIN(nvcsw_max_thresh, MAX_LATENCY_WEIGHT);

-		/*
-		 * Evaluate the average nvcsw for the task, limited to the
-		 * range [0 .. 1000] to prevent excessive spikes.
-		 */
-		tctx->avg_nvcsw = calc_avg_clamp(tctx->avg_nvcsw, avg_nvcsw,
-						 0, MAX(nvcsw_max_thresh, 1000));
 		tctx->nvcsw = p->nvcsw;
 		tctx->nvcsw_ts = now;

 		/*
+		 * Evaluate the latency weight of the task as its average rate
+		 * of voluntary context switches (limited to the max_lat_weight
+		 * to prevent excessive spikes).
+		 */
+		tctx->lat_weight = calc_avg_clamp(tctx->lat_weight, avg_nvcsw,
+						  0, max_lat_weight);
+
+                /*
 		 * Update the global voluntary context switches average using
 		 * an exponentially weighted moving average (EWMA) with the
 		 * formula:
@ -1039,13 +1008,19 @@ void BPF_STRUCT_OPS(bpfland_stopping, struct task_struct *p, bool runnable)
 		 * Additionally, restrict the global nvcsw_avg_thresh average
 		 * to the range [1 .. nvcsw_max_thresh] to always allow the
 		 * classification of some tasks as interactive.
-		 */
+                 */
 		nvcsw_avg_thresh = calc_avg_clamp(nvcsw_avg_thresh, avg_nvcsw,
 						  1, nvcsw_max_thresh);
+
 		/*
-		 * Reresh task status: interactive or regular.
+		 * Classify the task based on the average of voluntary context
+		 * switches.
+		 *
+		 * If the task has an average greater than the global average
+		 * it is classified as interactive, otherwise the task is
+		 * classified as regular.
 		 */
-		update_task_interactive(tctx);
+		tctx->is_interactive = tctx->lat_weight >= nvcsw_max_thresh;
 	}
 }

@ -1064,12 +1039,9 @@ void BPF_STRUCT_OPS(bpfland_enable, struct task_struct *p)
 	tctx->sum_exec_runtime = p->se.sum_exec_runtime;
 	tctx->nvcsw = p->nvcsw;
 	tctx->nvcsw_ts = now;
-	tctx->avg_nvcsw = p->nvcsw * NSEC_PER_SEC / tctx->nvcsw_ts;
-	tctx->avg_runtime = slice_ns;
+	tctx->lat_weight = p->nvcsw * NSEC_PER_SEC / tctx->nvcsw_ts;
+	tctx->avg_runtime = slice_max;
 	tctx->deadline = vtime_now;
-	tctx->last_running = now;
-
-	update_task_interactive(tctx);
 }

 s32 BPF_STRUCT_OPS(bpfland_init_task, struct task_struct *p,
--- a/scheds/rust/scx_bpfland/src/main.rs
+++ b/scheds/rust/scx_bpfland/src/main.rs
@ -138,11 +138,12 @@ struct Opts {
    #[clap(short = 'l', long, allow_hyphen_values = true, default_value = "0")]
    slice_us_lag: i64,

-    /// Shorten interactive tasks' deadline based on their average amount of voluntary context
-    /// switches.
+    /// With lowlatency enabled, instead of classifying tasks as interactive or non-interactive,
+    /// they all get a dynamic priority, which is adjusted in function of their average rate of
+    /// voluntary context switches.
    ///
-    /// Enabling this option can be beneficial in soft real-time scenarios, such as audio
-    /// processing, multimedia, etc.
+    /// This option guarantess less spikey behavior and it can be particularly useful in soft
+    /// real-time scenarios, such as audio processing, multimedia, etc.
    #[clap(short = 'L', long, action = clap::ArgAction::SetTrue)]
    lowlatency: bool,

@ -260,9 +261,9 @@ impl<'a> Scheduler<'a> {
        skel.maps.rodata_data.smt_enabled = smt_enabled;
        skel.maps.rodata_data.lowlatency = opts.lowlatency;
        skel.maps.rodata_data.local_kthreads = opts.local_kthreads;
-        skel.maps.rodata_data.slice_ns = opts.slice_us * 1000;
-        skel.maps.rodata_data.slice_ns_min = opts.slice_us_min * 1000;
-        skel.maps.rodata_data.slice_ns_lag = opts.slice_us_lag * 1000;
+        skel.maps.rodata_data.slice_max = opts.slice_us * 1000;
+        skel.maps.rodata_data.slice_min = opts.slice_us_min * 1000;
+        skel.maps.rodata_data.slice_lag = opts.slice_us_lag * 1000;
        skel.maps.rodata_data.starvation_thresh_ns = opts.starvation_thresh_us * 1000;
        skel.maps.rodata_data.nvcsw_max_thresh = opts.nvcsw_max_thresh;

@ -555,7 +556,8 @@ impl<'a> Scheduler<'a> {
            nr_running: self.skel.maps.bss_data.nr_running,
            nr_cpus: self.skel.maps.bss_data.nr_online_cpus,
            nr_interactive: self.skel.maps.bss_data.nr_interactive,
-            nr_waiting: self.skel.maps.bss_data.nr_waiting,
+            nr_prio_waiting: self.skel.maps.bss_data.nr_prio_waiting,
+            nr_shared_waiting: self.skel.maps.bss_data.nr_shared_waiting,
            nvcsw_avg_thresh: self.skel.maps.bss_data.nvcsw_avg_thresh,
            nr_kthread_dispatches: self.skel.maps.bss_data.nr_kthread_dispatches,
            nr_direct_dispatches: self.skel.maps.bss_data.nr_direct_dispatches,
--- a/scheds/rust/scx_bpfland/src/stats.rs
+++ b/scheds/rust/scx_bpfland/src/stats.rs
@ -21,8 +21,10 @@ pub struct Metrics {
    pub nr_cpus: u64,
    #[stat(desc = "Number of running interactive tasks")]
    pub nr_interactive: u64,
-    #[stat(desc = "Average amount of tasks waiting to be dispatched")]
-    pub nr_waiting: u64,
+    #[stat(desc = "Average amount of regular tasks waiting to be dispatched")]
+    pub nr_shared_waiting: u64,
+    #[stat(desc = "Average amount of interactive tasks waiting to be dispatched")]
+    pub nr_prio_waiting: u64,
    #[stat(desc = "Average of voluntary context switches")]
    pub nvcsw_avg_thresh: u64,
    #[stat(desc = "Number of kthread direct dispatches")]
@ -39,12 +41,13 @@ impl Metrics {
    fn format<W: Write>(&self, w: &mut W) -> Result<()> {
        writeln!(
            w,
-            "[{}] tasks -> run: {:>2}/{:<2} int: {:<2} wait: {:<4} | nvcsw: {:<4} | dispatch -> kth: {:<5} dir: {:<5} pri: {:<5} shr: {:<5}",
+            "[{}] tasks -> r: {:>2}/{:<2} i: {:<2} pw: {:<4} w: {:<4} | nvcsw: {:<4} | dispatch -> k: {:<5} d: {:<5} p: {:<5} s: {:<5}",
            crate::SCHEDULER_NAME,
            self.nr_running,
            self.nr_cpus,
            self.nr_interactive,
-            self.nr_waiting,
+            self.nr_prio_waiting,
+            self.nr_shared_waiting,
            self.nvcsw_avg_thresh,
            self.nr_kthread_dispatches,
            self.nr_direct_dispatches,