Merge pull request #199 from sched-ext/lavd-task-states

scx_lavd: Clean up task state transition tracking
This commit is contained in:
Changwoo Min 2024-03-28 13:43:56 +09:00 committed by GitHub
commit 360d4ec457
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 40 additions and 105 deletions

View File

@ -119,19 +119,6 @@ struct cpu_ctx {
volatile u64 sched_nr; /* number of schedules */
};
/*
* Per-task scheduling context
*/
enum task_stat {
_LAVD_TASK_STAT_MIN = 0,
LAVD_TASK_STAT_STOPPING = _LAVD_TASK_STAT_MIN,
LAVD_TASK_STAT_ENQ,
LAVD_TASK_STAT_RUNNING,
_LAVD_TASK_STAT_MAX = LAVD_TASK_STAT_RUNNING,
};
struct task_ctx {
/*
* Essential task running statistics for latency criticality calculation
@ -151,7 +138,6 @@ struct task_ctx {
u64 slice_ns;
u64 greedy_ratio;
u64 lat_cri;
u16 stat; /* NIL -> ENQ -> RUN -> STOP -> NIL ... */
u16 slice_boost_prio;/* how many times a task fully consumed the slice */
u16 lat_prio; /* latency priority */
s16 lat_boost_prio; /* DEBUG */

View File

@ -1220,58 +1220,6 @@ static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc)
return slice;
}
static bool transit_task_stat(struct task_ctx *taskc, int tgt_stat)
{
/*
* Update task loads only when the state transition is valid. So far,
* two types of invalid state transitions have been observed, and there
* are reasons for that. The two are as follows:
*
* - ENQ -> ENQ: This transition can happen because scx_lavd does not
* provide ops.dequeue. When task attributes are updated (e.g., nice
* level, allowed cpus and so on), the scx core will dequeue the task
* and re-enqueue it (ENQ->DEQ->ENQ). However, When ops.dequeue() is
* not provided, the dequeue operations is done by the scx core.
* Hence, ignoring the dequeue operation is completely fine.
*
* - STOPPING -> RUNNING: This can happen because there are several
* special cases where scx core skips enqueue including: 1) bypass
* mode is turned on (this is turned on during both init and exit.
* it's also used across suspend/resume operations. 2)
* SCX_OPS_ENQ_EXITING is not set and an exiting task was woken up.
* 3) The associated CPU is not fully online. However, we avoid
* collecting time & frequency statistics for such special cases for
* accuracy.
*
* initial state
* -------------
* |
* \/
* [STOPPING] --> [ENQ] --> [RUNNING]
* /\ |
* | |
* +-------------------------+
*/
const static int valid_tgt_stat[] = {
[LAVD_TASK_STAT_STOPPING] = LAVD_TASK_STAT_ENQ,
[LAVD_TASK_STAT_ENQ] = LAVD_TASK_STAT_RUNNING,
[LAVD_TASK_STAT_RUNNING] = LAVD_TASK_STAT_STOPPING,
};
int src_stat = taskc->stat;
if (src_stat < _LAVD_TASK_STAT_MIN || src_stat > _LAVD_TASK_STAT_MAX) {
scx_bpf_error("Invalid task state: %d", src_stat);
return false;
}
if (valid_tgt_stat[src_stat] == tgt_stat) {
taskc->stat = tgt_stat;
return true;
}
return false;
}
static void update_stat_for_enq(struct task_struct *p, struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
@ -1323,13 +1271,6 @@ static void update_stat_for_stop(struct task_struct *p, struct task_ctx *taskc,
now = bpf_ktime_get_ns();
/*
* When stopped, reduce the per-CPU task load. Per-CPU task load will
* be aggregated periodically at update_sys_cpu_load().
*/
cpuc->load_actual -= taskc->load_actual;
cpuc->load_ideal -= get_task_load_ideal(p);
/*
* Update task's run_time. If a task got slice-boosted -- in other
* words, its time slices have been fully consumed multiple times,
@ -1344,6 +1285,17 @@ static void update_stat_for_stop(struct task_struct *p, struct task_ctx *taskc,
taskc->last_stop_clk = now;
}
static void update_stat_for_quiescent(struct task_struct *p, struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
/*
* When quiescent, reduce the per-CPU task load. Per-CPU task load will
* be aggregated periodically at update_sys_cpu_load().
*/
cpuc->load_actual -= taskc->load_actual;
cpuc->load_ideal -= get_task_load_ideal(p);
}
static void calc_when_to_run(struct task_struct *p, struct task_ctx *taskc,
struct cpu_ctx *cpuc, u64 enq_flags)
{
@ -1379,14 +1331,6 @@ static bool put_local_rq(struct task_struct *p, struct task_ctx *taskc,
if (!is_eligible(taskc))
return false;
/*
* Add task load based on the current statistics regardless of a target
* rq. Statistics will be adjusted when more accurate statistics
* become available (ops.running).
*/
if (transit_task_stat(taskc, LAVD_TASK_STAT_ENQ))
update_stat_for_enq(p, taskc, cpuc);
/*
* This task should be scheduled as soon as possible (e.g., wakened up)
* so the deadline is no use and enqueued into a local DSQ, which
@ -1416,12 +1360,6 @@ static bool put_global_rq(struct task_struct *p, struct task_ctx *taskc,
*/
calc_when_to_run(p, taskc, cpuc, enq_flags);
/*
* Reflect task's load immediately.
*/
if (transit_task_stat(taskc, LAVD_TASK_STAT_ENQ))
update_stat_for_enq(p, taskc, cpuc);
/*
* Enqueue the task to the global runqueue based on its virtual
* deadline.
@ -1511,10 +1449,23 @@ void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)
void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
{
struct cpu_ctx *cpuc;
struct task_struct *waker;
struct task_ctx *taskc;
struct task_ctx *p_taskc, *waker_taskc;
u64 now, interval;
cpuc = get_cpu_ctx();
p_taskc = get_task_ctx(p);
if (!cpuc || !p_taskc)
return;
/*
* Add task load based on the current statistics regardless of a target
* rq. Statistics will be adjusted when more accurate statistics become
* available (ops.running).
*/
update_stat_for_enq(p, p_taskc, cpuc);
/*
* When a task @p is wakened up, the wake frequency of its waker task
* is updated. The @current task is a waker and @p is a waiter, which
@ -1524,8 +1475,8 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
return;
waker = bpf_get_current_task_btf();
taskc = try_get_task_ctx(waker);
if (!taskc) {
waker_taskc = try_get_task_ctx(waker);
if (!waker_taskc) {
/*
* In this case, the waker could be an idle task
* (swapper/_[_]), so we just ignore.
@ -1534,9 +1485,9 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
}
now = bpf_ktime_get_ns();
interval = now - taskc->last_wake_clk;
taskc->wake_freq = calc_avg_freq(taskc->wake_freq, interval);
taskc->last_wake_clk = now;
interval = now - waker_taskc->last_wake_clk;
waker_taskc->wake_freq = calc_avg_freq(waker_taskc->wake_freq, interval);
waker_taskc->last_wake_clk = now;
}
void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
@ -1555,8 +1506,7 @@ void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
if (!cpuc)
return;
if (transit_task_stat(taskc, LAVD_TASK_STAT_RUNNING))
update_stat_for_run(p, taskc, cpuc);
update_stat_for_run(p, taskc, cpuc);
/*
* Calcualte task's time slice based on updated load.
@ -1619,8 +1569,7 @@ void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)
if (!taskc)
return;
if (transit_task_stat(taskc, LAVD_TASK_STAT_STOPPING))
update_stat_for_stop(p, taskc, cpuc);
update_stat_for_stop(p, taskc, cpuc);
/*
* Adjust slice boost for the task's next schedule.
@ -1630,9 +1579,17 @@ void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)
void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc;
u64 now, interval;
cpuc = get_cpu_ctx();
taskc = get_task_ctx(p);
if (!cpuc || !taskc)
return;
update_stat_for_quiescent(p, taskc, cpuc);
/*
* If a task @p is dequeued from a run queue for some other reason
* other than going to sleep, it is an implementation-level side
@ -1644,10 +1601,6 @@ void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
/*
* When a task @p goes to sleep, its associated wait_freq is updated.
*/
taskc = get_task_ctx(p);
if (!taskc)
return;
now = bpf_ktime_get_ns();
interval = now - taskc->last_wait_clk;
taskc->wait_freq = calc_avg_freq(taskc->wait_freq, interval);

View File

@ -10,10 +10,6 @@ pub use bpf_skel::*;
pub mod bpf_intf;
pub use bpf_intf::*;
extern crate libc;
extern crate plain;
extern crate static_assertions;
use std::mem;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;