mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-25 04:00:24 +00:00
Merge pull request #204 from multics69/scx-lavd-runtime-runfreq
scx_lavd: directly accumulate task's runtimes for consecutive runnings
This commit is contained in:
commit
048662a692
@ -61,12 +61,11 @@ enum consts {
|
||||
|
||||
LAVD_BOOST_RANGE = 14, /* 35% of nice range */
|
||||
LAVD_BOOST_WAKEUP_LAT = 1,
|
||||
LAVD_SLICE_BOOST_MAX_PRIO = (LAVD_SLICE_MAX_NS/LAVD_SLICE_MIN_NS),
|
||||
LAVD_SLICE_BOOST_MAX_STEP = 3,
|
||||
LAVD_GREEDY_RATIO_MAX = USHRT_MAX,
|
||||
|
||||
LAVD_ELIGIBLE_TIME_LAT_FT = 2,
|
||||
LAVD_ELIGIBLE_TIME_MAX = (LAVD_TARGETED_LATENCY_NS >> 1),
|
||||
LAVD_ELIGIBLE_TIME_MAX = LAVD_TARGETED_LATENCY_NS,
|
||||
|
||||
LAVD_CPU_UTIL_MAX = 1000, /* 100.0% */
|
||||
LAVD_CPU_UTIL_INTERVAL_NS = (100 * NSEC_PER_MSEC), /* 100 msec */
|
||||
@ -121,26 +120,34 @@ struct cpu_ctx {
|
||||
|
||||
struct task_ctx {
|
||||
/*
|
||||
* Essential task running statistics for latency criticality calculation
|
||||
* Clocks when a task state transition happens for task statistics calculation
|
||||
*/
|
||||
u64 last_start_clk; /* last time when scheduled in */
|
||||
u64 last_stop_clk; /* last time when scheduled out */
|
||||
u64 run_time_ns; /* average runtime per schedule */
|
||||
u64 run_freq; /* scheduling frequency in a second */
|
||||
u64 last_wait_clk; /* last time when a task waits for an event */
|
||||
u64 wait_freq; /* waiting frequency in a second */
|
||||
u64 wake_freq; /* waking-up frequency in a second */
|
||||
u64 last_wake_clk; /* last time when a task wakes up others */
|
||||
u64 last_runnable_clk; /* last time when a task wakes up others */
|
||||
u64 last_running_clk; /* last time when scheduled in */
|
||||
u64 last_stopping_clk; /* last time when scheduled out */
|
||||
u64 last_quiescent_clk; /* last time when a task waits for an event */
|
||||
|
||||
u64 load_actual; /* task load derived from run_time and run_freq */
|
||||
u64 vdeadline_delta_ns;
|
||||
u64 eligible_delta_ns;
|
||||
u64 slice_ns;
|
||||
u64 greedy_ratio;
|
||||
u64 lat_cri;
|
||||
u16 slice_boost_prio;/* how many times a task fully consumed the slice */
|
||||
u16 lat_prio; /* latency priority */
|
||||
s16 lat_boost_prio; /* DEBUG */
|
||||
/*
|
||||
* Task running statistics for latency criticality calculation
|
||||
*/
|
||||
u64 acc_run_time_ns; /* accmulated runtime from runnable to quiescent state */
|
||||
u64 run_time_ns; /* average runtime per schedule */
|
||||
u64 run_freq; /* scheduling frequency in a second */
|
||||
u64 wait_freq; /* waiting frequency in a second */
|
||||
u64 wake_freq; /* waking-up frequency in a second */
|
||||
u64 load_actual; /* task load derived from run_time and run_freq */
|
||||
|
||||
/*
|
||||
* Task deadline and time slice
|
||||
*/
|
||||
u64 vdeadline_delta_ns; /* time delta until task's virtual deadline */
|
||||
u64 eligible_delta_ns; /* time delta until task becomes eligible */
|
||||
u64 slice_ns; /* time slice */
|
||||
u64 greedy_ratio; /* task's overscheduling ratio compared to its nice priority */
|
||||
u64 lat_cri; /* calculated latency criticality */
|
||||
u16 slice_boost_prio; /* how many times a task fully consumed the slice */
|
||||
u16 lat_prio; /* latency priority */
|
||||
s16 lat_boost_prio; /* DEBUG */
|
||||
};
|
||||
|
||||
struct task_ctx_x {
|
||||
|
@ -1171,10 +1171,7 @@ static u64 calc_slice_share(struct task_struct *p, struct task_ctx *taskc)
|
||||
* scheduler tries to allocate a longer time slice.
|
||||
*/
|
||||
u64 share = get_task_load_ideal(p);
|
||||
u64 slice_boost_step = min(taskc->slice_boost_prio,
|
||||
LAVD_SLICE_BOOST_MAX_STEP);
|
||||
|
||||
share += (share * slice_boost_step) / LAVD_SLICE_BOOST_MAX_STEP;
|
||||
share += (share * taskc->slice_boost_prio) / LAVD_SLICE_BOOST_MAX_STEP;
|
||||
|
||||
return share;
|
||||
}
|
||||
@ -1221,28 +1218,34 @@ static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc)
|
||||
return slice;
|
||||
}
|
||||
|
||||
static void update_stat_for_enq(struct task_struct *p, struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc)
|
||||
static void update_stat_for_runnable(struct task_struct *p,
|
||||
struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc)
|
||||
{
|
||||
/*
|
||||
* Reflect task's load immediately.
|
||||
*/
|
||||
taskc->load_actual = calc_task_load_actual(taskc);
|
||||
taskc->acc_run_time_ns = 0;
|
||||
cpuc->load_actual += taskc->load_actual;
|
||||
cpuc->load_ideal += get_task_load_ideal(p);
|
||||
}
|
||||
|
||||
static void update_stat_for_run(struct task_struct *p, struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc)
|
||||
static void update_stat_for_running(struct task_struct *p,
|
||||
struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc)
|
||||
{
|
||||
u64 now, wait_period, interval;
|
||||
u64 wait_period, interval;
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
|
||||
if (!have_scheduled(taskc))
|
||||
goto clk_out;
|
||||
|
||||
/*
|
||||
* Since this is the start of a new schedule for @p, we update run
|
||||
* frequency in a second using an exponential weighted moving average.
|
||||
*/
|
||||
now = bpf_ktime_get_ns();
|
||||
wait_period = now - taskc->last_stop_clk;
|
||||
wait_period = now - taskc->last_quiescent_clk;
|
||||
interval = taskc->run_time_ns + wait_period;
|
||||
taskc->run_freq = calc_avg_freq(taskc->run_freq, interval);
|
||||
|
||||
@ -1250,51 +1253,43 @@ static void update_stat_for_run(struct task_struct *p, struct task_ctx *taskc,
|
||||
* Update per-CPU latency criticality information for ever-scheduled
|
||||
* tasks
|
||||
*/
|
||||
if (have_scheduled(taskc)) {
|
||||
if (cpuc->max_lat_cri < taskc->lat_cri)
|
||||
cpuc->max_lat_cri = taskc->lat_cri;
|
||||
if (cpuc->min_lat_cri > taskc->lat_cri)
|
||||
cpuc->min_lat_cri = taskc->lat_cri;
|
||||
cpuc->sum_lat_cri += taskc->lat_cri;
|
||||
cpuc->sched_nr++;
|
||||
}
|
||||
if (cpuc->max_lat_cri < taskc->lat_cri)
|
||||
cpuc->max_lat_cri = taskc->lat_cri;
|
||||
if (cpuc->min_lat_cri > taskc->lat_cri)
|
||||
cpuc->min_lat_cri = taskc->lat_cri;
|
||||
cpuc->sum_lat_cri += taskc->lat_cri;
|
||||
cpuc->sched_nr++;
|
||||
|
||||
clk_out:
|
||||
/*
|
||||
* Update task state when starts running.
|
||||
*/
|
||||
taskc->last_start_clk = now;
|
||||
taskc->last_running_clk = now;
|
||||
}
|
||||
|
||||
static void update_stat_for_stop(struct task_struct *p, struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc)
|
||||
static void update_stat_for_stopping(struct task_struct *p,
|
||||
struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc)
|
||||
{
|
||||
u64 now, run_time_ns, run_time_boosted_ns;
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
|
||||
/*
|
||||
* Adjust slice boost for the task's next schedule. Note that the
|
||||
* updating slice_boost_prio should be done before updating
|
||||
* run_time_boosted_ns, since the run_time_boosted_ns calculation
|
||||
* requires updated slice_boost_prio.
|
||||
* Update task's run_time. When a task is scheduled consecutively
|
||||
* without ops.quiescent(), the task's runtime is accumulated for
|
||||
* statistics. Suppose a task is scheduled 2ms, 2ms, and 2ms with the
|
||||
* time slice exhausted. If 6ms of time slice was given in the first
|
||||
* place, the task will entirely consume the time slice. Hence, the
|
||||
* consecutive execution is accumulated and reflected in the
|
||||
* calculation of runtime statistics.
|
||||
*/
|
||||
taskc->last_stop_clk = now;
|
||||
adjust_slice_boost(cpuc, taskc);
|
||||
|
||||
/*
|
||||
* Update task's run_time. If a task got slice-boosted -- in other
|
||||
* words, its time slices have been fully consumed multiple times,
|
||||
* stretch the measured runtime according to the slice_boost_prio.
|
||||
* The stretched runtime more accurately reflects the actual runtime
|
||||
* per schedule as if a large enough time slice was given in the first
|
||||
* place.
|
||||
*/
|
||||
run_time_ns = now - taskc->last_start_clk;
|
||||
run_time_boosted_ns = run_time_ns * (1 + taskc->slice_boost_prio);
|
||||
taskc->run_time_ns = calc_avg(taskc->run_time_ns, run_time_boosted_ns);
|
||||
taskc->acc_run_time_ns += now - taskc->last_running_clk;
|
||||
taskc->run_time_ns = calc_avg(taskc->run_time_ns,
|
||||
taskc->acc_run_time_ns);
|
||||
taskc->last_stopping_clk = now;
|
||||
}
|
||||
|
||||
static void update_stat_for_quiescent(struct task_struct *p, struct task_ctx *taskc,
|
||||
static void update_stat_for_quiescent(struct task_struct *p,
|
||||
struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc)
|
||||
{
|
||||
/*
|
||||
@ -1463,17 +1458,17 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
|
||||
struct task_ctx *p_taskc, *waker_taskc;
|
||||
u64 now, interval;
|
||||
|
||||
cpuc = get_cpu_ctx();
|
||||
p_taskc = get_task_ctx(p);
|
||||
if (!cpuc || !p_taskc)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Add task load based on the current statistics regardless of a target
|
||||
* rq. Statistics will be adjusted when more accurate statistics become
|
||||
* available (ops.running).
|
||||
*/
|
||||
update_stat_for_enq(p, p_taskc, cpuc);
|
||||
cpuc = get_cpu_ctx();
|
||||
p_taskc = get_task_ctx(p);
|
||||
if (!cpuc || !p_taskc)
|
||||
return;
|
||||
|
||||
update_stat_for_runnable(p, p_taskc, cpuc);
|
||||
|
||||
/*
|
||||
* When a task @p is wakened up, the wake frequency of its waker task
|
||||
@ -1494,28 +1489,25 @@ void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
|
||||
}
|
||||
|
||||
now = bpf_ktime_get_ns();
|
||||
interval = now - waker_taskc->last_wake_clk;
|
||||
interval = now - waker_taskc->last_runnable_clk;
|
||||
waker_taskc->wake_freq = calc_avg_freq(waker_taskc->wake_freq, interval);
|
||||
waker_taskc->last_wake_clk = now;
|
||||
waker_taskc->last_runnable_clk = now;
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
|
||||
{
|
||||
struct task_ctx *taskc;
|
||||
struct cpu_ctx *cpuc;
|
||||
struct task_ctx *taskc;
|
||||
|
||||
/*
|
||||
* Update task statistics then adjust task load based on the update.
|
||||
* Update task statistics
|
||||
*/
|
||||
taskc = get_task_ctx(p);
|
||||
if (!taskc)
|
||||
return;
|
||||
|
||||
cpuc = get_cpu_ctx();
|
||||
if (!cpuc)
|
||||
taskc = get_task_ctx(p);
|
||||
if (!cpuc || !taskc)
|
||||
return;
|
||||
|
||||
update_stat_for_run(p, taskc, cpuc);
|
||||
update_stat_for_running(p, taskc, cpuc);
|
||||
|
||||
/*
|
||||
* Calcualte task's time slice based on updated load.
|
||||
@ -1535,12 +1527,12 @@ static bool slice_fully_consumed(struct cpu_ctx *cpuc, struct task_ctx *taskc)
|
||||
/*
|
||||
* Sanity check just to make sure the runtime is positive.
|
||||
*/
|
||||
if (taskc->last_stop_clk < taskc->last_start_clk) {
|
||||
if (taskc->last_stopping_clk < taskc->last_running_clk) {
|
||||
scx_bpf_error("run_time_ns is negative: 0x%llu - 0x%llu",
|
||||
taskc->last_stop_clk, taskc->last_start_clk);
|
||||
taskc->last_stopping_clk, taskc->last_running_clk);
|
||||
}
|
||||
|
||||
run_time_ns = taskc->last_stop_clk - taskc->last_start_clk;
|
||||
run_time_ns = taskc->last_stopping_clk - taskc->last_running_clk;
|
||||
|
||||
return run_time_ns >= taskc->slice_ns;
|
||||
}
|
||||
@ -1553,7 +1545,7 @@ static void adjust_slice_boost(struct cpu_ctx *cpuc, struct task_ctx *taskc)
|
||||
* fully consumed, decrease the slice boost priority by half.
|
||||
*/
|
||||
if (slice_fully_consumed(cpuc, taskc)) {
|
||||
if (taskc->slice_boost_prio < LAVD_SLICE_BOOST_MAX_PRIO)
|
||||
if (taskc->slice_boost_prio < LAVD_SLICE_BOOST_MAX_STEP)
|
||||
taskc->slice_boost_prio++;
|
||||
}
|
||||
else {
|
||||
@ -1568,17 +1560,19 @@ void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)
|
||||
struct task_ctx *taskc;
|
||||
|
||||
/*
|
||||
* Reduce the task load.
|
||||
* Update task statistics
|
||||
*/
|
||||
cpuc = get_cpu_ctx();
|
||||
if (!cpuc)
|
||||
return;
|
||||
|
||||
taskc = get_task_ctx(p);
|
||||
if (!taskc)
|
||||
if (!cpuc || !taskc)
|
||||
return;
|
||||
|
||||
update_stat_for_stop(p, taskc, cpuc);
|
||||
update_stat_for_stopping(p, taskc, cpuc);
|
||||
|
||||
/*
|
||||
* Adjust slice boost for the task's next schedule.
|
||||
*/
|
||||
adjust_slice_boost(cpuc, taskc);
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
|
||||
@ -1587,6 +1581,9 @@ void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
|
||||
struct task_ctx *taskc;
|
||||
u64 now, interval;
|
||||
|
||||
/*
|
||||
* Substract task load from the current CPU's load.
|
||||
*/
|
||||
cpuc = get_cpu_ctx();
|
||||
taskc = get_task_ctx(p);
|
||||
if (!cpuc || !taskc)
|
||||
@ -1606,9 +1603,9 @@ void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
|
||||
* When a task @p goes to sleep, its associated wait_freq is updated.
|
||||
*/
|
||||
now = bpf_ktime_get_ns();
|
||||
interval = now - taskc->last_wait_clk;
|
||||
interval = now - taskc->last_quiescent_clk;
|
||||
taskc->wait_freq = calc_avg_freq(taskc->wait_freq, interval);
|
||||
taskc->last_wait_clk = now;
|
||||
taskc->last_quiescent_clk = now;
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(lavd_cpu_online, s32 cpu)
|
||||
@ -1712,14 +1709,16 @@ s32 BPF_STRUCT_OPS(lavd_init_task, struct task_struct *p,
|
||||
|
||||
|
||||
/*
|
||||
* Initialize @p's context.
|
||||
* Initialize @p's context with the current clock and default load.
|
||||
*/
|
||||
now = bpf_ktime_get_ns();
|
||||
taskc->last_start_clk = now;
|
||||
taskc->last_stop_clk = now;
|
||||
taskc->last_wait_clk = now;
|
||||
taskc->last_wake_clk = now;
|
||||
taskc->last_runnable_clk = now;
|
||||
taskc->last_running_clk = now;
|
||||
taskc->last_stopping_clk = now;
|
||||
taskc->last_quiescent_clk = now;
|
||||
taskc->greedy_ratio = 1000;
|
||||
taskc->run_time_ns = LAVD_LC_RUNTIME_MAX;
|
||||
taskc->run_freq = 1;
|
||||
|
||||
/*
|
||||
* When a task is forked, we immediately reflect changes to the current
|
||||
|
Loading…
Reference in New Issue
Block a user