mirror of
https://github.com/JakeHillion/scx.git
synced 2024-11-26 11:30:22 +00:00
Merge pull request #457 from multics69/lavd-amp-v2
scx_lavd: support two-level scheduling for heavy-loaded cases (like bpfland)
This commit is contained in:
commit
643edb5431
@ -56,7 +56,7 @@ enum consts {
|
||||
LAVD_TIME_INFINITY_NS = SCX_SLICE_INF,
|
||||
LAVD_MAX_RETRY = 4,
|
||||
|
||||
LAVD_TARGETED_LATENCY_NS = (15ULL * NSEC_PER_MSEC),
|
||||
LAVD_TARGETED_LATENCY_NS = (20ULL * NSEC_PER_MSEC),
|
||||
LAVD_SLICE_MIN_NS = (30ULL * NSEC_PER_USEC), /* min time slice */
|
||||
LAVD_SLICE_MAX_NS = ( 3ULL * NSEC_PER_MSEC), /* max time slice */
|
||||
LAVD_SLICE_UNDECIDED = SCX_SLICE_INF,
|
||||
@ -90,7 +90,9 @@ enum consts {
|
||||
LAVD_CC_CPU_PIN_INTERVAL_DIV = (LAVD_CC_CPU_PIN_INTERVAL /
|
||||
LAVD_SYS_STAT_INTERVAL_NS),
|
||||
|
||||
LAVD_GLOBAL_DSQ = 0, /* a global DSQ for eligible tasks */
|
||||
LAVD_LATENCY_CRITICAL_DSQ = 0, /* a global DSQ for latency-criticcal tasks */
|
||||
LAVD_REGULAR_DSQ = 1, /* a global DSQ for non-latency-criticcal tasks */
|
||||
LAVD_DSQ_STARVE_TIMEOUT = (5ULL * NSEC_PER_USEC),
|
||||
};
|
||||
|
||||
/*
|
||||
@ -105,7 +107,6 @@ struct sys_stat {
|
||||
|
||||
volatile u32 avg_lat_cri; /* average latency criticality (LC) */
|
||||
volatile u32 max_lat_cri; /* maximum latency criticality (LC) */
|
||||
volatile u32 min_lat_cri; /* minimum latency criticality (LC) */
|
||||
volatile u32 thr_lat_cri; /* latency criticality threshold for kicking */
|
||||
|
||||
volatile u32 avg_perf_cri; /* average performance criticality */
|
||||
@ -143,7 +144,6 @@ struct cpu_ctx {
|
||||
* Information used to keep track of latency criticality
|
||||
*/
|
||||
volatile u32 max_lat_cri; /* maximum latency criticality */
|
||||
volatile u32 min_lat_cri; /* minimum latency criticality */
|
||||
volatile u32 sum_lat_cri; /* sum of latency criticality */
|
||||
volatile u32 sched_nr; /* number of schedules */
|
||||
|
||||
@ -206,6 +206,7 @@ struct task_ctx {
|
||||
u64 slice_ns; /* time slice */
|
||||
u32 greedy_ratio; /* task's overscheduling ratio compared to its nice priority */
|
||||
u32 lat_cri; /* calculated latency criticality */
|
||||
u32 starv_cri; /* calculated starvation criticality */
|
||||
volatile s32 victim_cpu;
|
||||
u16 slice_boost_prio; /* how many times a task fully consumed the slice */
|
||||
|
||||
|
@ -224,11 +224,18 @@ static u64 cur_logical_clk;
|
||||
*/
|
||||
static u64 cur_svc_time;
|
||||
|
||||
/*
|
||||
* Last task consumption time
|
||||
*/
|
||||
static u64 lat_cri_rq_clk; /* last task consumption timem for latency-critical task */
|
||||
static u64 regular_rq_clk; /* last task consumption timem for latency-critical task */
|
||||
|
||||
/*
|
||||
* Options
|
||||
*/
|
||||
const volatile bool no_freq_scaling;
|
||||
const volatile bool no_core_compaction;
|
||||
const volatile bool no_2_level_scheduling;
|
||||
const volatile bool no_freq_scaling;
|
||||
const volatile u8 verbose;
|
||||
|
||||
UEI_DEFINE(uei);
|
||||
@ -580,7 +587,6 @@ struct sys_stat_ctx {
|
||||
u64 tot_svc_time;
|
||||
u64 load_run_time_ns;
|
||||
s32 max_lat_cri;
|
||||
s32 min_lat_cri;
|
||||
s32 avg_lat_cri;
|
||||
u64 sum_lat_cri;
|
||||
u32 sched_nr;
|
||||
@ -598,7 +604,6 @@ static void init_sys_stat_ctx(struct sys_stat_ctx *c)
|
||||
c->stat_next = get_sys_stat_next();
|
||||
c->now = bpf_ktime_get_ns();
|
||||
c->duration = c->now - c->stat_cur->last_update_clk;
|
||||
c->min_lat_cri = UINT_MAX;
|
||||
}
|
||||
|
||||
static void collect_sys_stat(struct sys_stat_ctx *c)
|
||||
@ -636,10 +641,6 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
|
||||
c->max_lat_cri = cpuc->max_lat_cri;
|
||||
cpuc->max_lat_cri = 0;
|
||||
|
||||
if (cpuc->min_lat_cri < c->min_lat_cri)
|
||||
c->min_lat_cri = cpuc->min_lat_cri;
|
||||
cpuc->min_lat_cri = UINT_MAX;
|
||||
|
||||
/*
|
||||
* Accumulate task's performance criticlity information.
|
||||
*/
|
||||
@ -697,7 +698,6 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
|
||||
* When a system is completely idle, it is indeed possible
|
||||
* nothing scheduled for an interval.
|
||||
*/
|
||||
c->min_lat_cri = c->stat_cur->min_lat_cri;
|
||||
c->max_lat_cri = c->stat_cur->max_lat_cri;
|
||||
c->avg_lat_cri = c->stat_cur->avg_lat_cri;
|
||||
c->avg_perf_cri = c->stat_cur->avg_perf_cri;
|
||||
@ -721,8 +721,6 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
|
||||
stat_next->util =
|
||||
calc_avg(stat_cur->util, c->new_util);
|
||||
|
||||
stat_next->min_lat_cri =
|
||||
calc_avg32(stat_cur->min_lat_cri, c->min_lat_cri);
|
||||
stat_next->max_lat_cri =
|
||||
calc_avg32(stat_cur->max_lat_cri, c->max_lat_cri);
|
||||
stat_next->avg_lat_cri =
|
||||
@ -907,7 +905,7 @@ static int update_timer_cb(void *map, int *key, struct bpf_timer *timer)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 calc_greedy_ratio(struct task_struct *p, struct task_ctx *taskc)
|
||||
static u32 calc_greedy_ratio(struct task_ctx *taskc)
|
||||
{
|
||||
struct sys_stat *stat_cur = get_sys_stat_cur();
|
||||
u32 ratio;
|
||||
@ -953,21 +951,12 @@ static bool is_eligible(struct task_ctx *taskc)
|
||||
return taskc->greedy_ratio <= 1000;
|
||||
}
|
||||
|
||||
static bool is_wakeup_wf(u64 wake_flags)
|
||||
{
|
||||
/*
|
||||
* We don't need to test SCX_WAKE_SYNC because SCX_WAKE_SYNC should
|
||||
* only be set when SCX_WAKE_TTWU is set.
|
||||
*/
|
||||
return wake_flags & SCX_WAKE_TTWU;
|
||||
}
|
||||
|
||||
static bool is_wakeup_ef(u64 enq_flags)
|
||||
{
|
||||
return enq_flags & SCX_ENQ_WAKEUP;
|
||||
}
|
||||
|
||||
static u64 calc_eligible_delta(struct task_struct *p, struct task_ctx *taskc)
|
||||
static u64 calc_eligible_delta(struct task_ctx *taskc)
|
||||
{
|
||||
/*
|
||||
* We calculate how long a task should be ineligible for execution. To
|
||||
@ -995,7 +984,7 @@ static u64 calc_eligible_delta(struct task_struct *p, struct task_ctx *taskc)
|
||||
* If a task is too greedy so it is not eligible, don't put it to the
|
||||
* local rq to seek another eligible task later.
|
||||
*/
|
||||
calc_greedy_ratio(p, taskc);
|
||||
calc_greedy_ratio(taskc);
|
||||
|
||||
/*
|
||||
* Considering task's greedy ratio, decide if a task is now eligible.
|
||||
@ -1036,23 +1025,6 @@ out:
|
||||
return delta_ns;
|
||||
}
|
||||
|
||||
static int sum_prios_for_lat(struct task_struct *p, int nice_prio,
|
||||
int lat_boost_prio)
|
||||
{
|
||||
int prio;
|
||||
|
||||
/*
|
||||
* Bound the final scheduler priority to the NICE_WIDTHE, [0, 40).
|
||||
*/
|
||||
prio = nice_prio + lat_boost_prio;
|
||||
if (prio >= NICE_WIDTH)
|
||||
prio = NICE_WIDTH - 1;
|
||||
else if (prio < 0)
|
||||
prio = 0;
|
||||
|
||||
return prio;
|
||||
}
|
||||
|
||||
static u64 calc_starvation_factor(struct task_ctx *taskc)
|
||||
{
|
||||
struct sys_stat *stat_cur = get_sys_stat_cur();
|
||||
@ -1066,15 +1038,24 @@ static u64 calc_starvation_factor(struct task_ctx *taskc)
|
||||
return ratio + 1;
|
||||
}
|
||||
|
||||
static void boost_lat(struct task_struct *p, struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc, bool is_wakeup)
|
||||
static s64 calc_static_prio_factor(struct task_struct *p)
|
||||
{
|
||||
u64 starvation_ft, wait_freq_ft, wake_freq_ft;
|
||||
u64 lat_cri_raw;
|
||||
/*
|
||||
* A nicer task with >20 static priority will get penalized with
|
||||
* negative latency-criticality. However, a greedier task with <20
|
||||
* static priority will get boosted.
|
||||
*/
|
||||
return (20 - get_nice_prio(p)) >> 1;
|
||||
}
|
||||
|
||||
static void calc_lat_cri(struct task_struct *p, struct task_ctx *taskc)
|
||||
{
|
||||
u64 wait_freq_ft, wake_freq_ft;
|
||||
s64 lat_cri_raw, lat_cri;
|
||||
|
||||
/*
|
||||
* A task is more latency-critical as its wait or wake frequencies
|
||||
* (i.e., wait_freq and wake_freq) and starvation factors are higher.
|
||||
* (i.e., wait_freq and wake_freq) are higher.
|
||||
*
|
||||
* Since those frequencies are unbounded and their upper limits are
|
||||
* unknown, we transform them using sigmoid-like functions. For wait
|
||||
@ -1084,40 +1065,48 @@ static void boost_lat(struct task_struct *p, struct task_ctx *taskc,
|
||||
*/
|
||||
wait_freq_ft = calc_freq_factor(taskc->wait_freq);
|
||||
wake_freq_ft = calc_freq_factor(taskc->wake_freq);
|
||||
starvation_ft = calc_starvation_factor(taskc);
|
||||
|
||||
/*
|
||||
* Wake frequency and wait frequency represent how much a task is used
|
||||
* for a producer and a consumer, respectively. If both are high, the
|
||||
* task is in the middle of a task chain.
|
||||
* task is in the middle of a task chain. We prioritize a producer.
|
||||
*/
|
||||
lat_cri_raw = wait_freq_ft * wake_freq_ft * starvation_ft;
|
||||
lat_cri_raw = wait_freq_ft * wake_freq_ft * wake_freq_ft;
|
||||
|
||||
/*
|
||||
* The ratio above tends to follow an exponentially skewed
|
||||
* distribution, so we linearize it using log2 before converting it to
|
||||
* a boost priority. We add +1 to guarantee the latency criticality
|
||||
* (log2-ed) is always positive.
|
||||
*
|
||||
* Note that the priority-to-weight conversion table is non-linear.
|
||||
* Through this process -- log2(ratio) then priority to weight
|
||||
* conversion, we mitigate the exponentially skewed distribution to
|
||||
* non-linear distribution.
|
||||
*/
|
||||
taskc->lat_cri = log2_u64(lat_cri_raw + 1) + is_wakeup;
|
||||
lat_cri = log2_u64(lat_cri_raw + 1);
|
||||
|
||||
/*
|
||||
* A user-provided nice value is a strong hint for latency-criticality.
|
||||
*/
|
||||
lat_cri += calc_static_prio_factor(p);
|
||||
lat_cri = max(lat_cri, 1);
|
||||
|
||||
taskc->lat_cri = lat_cri;
|
||||
}
|
||||
|
||||
static void calc_starv_cri(struct task_ctx *taskc, bool is_wakeup)
|
||||
{
|
||||
taskc->starv_cri = log2_u64(calc_starvation_factor(taskc) + 1) +
|
||||
is_wakeup;
|
||||
}
|
||||
|
||||
static void calc_virtual_deadline_delta(struct task_struct *p,
|
||||
struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc,
|
||||
u64 enq_flags)
|
||||
struct task_ctx *taskc, u64 enq_flags)
|
||||
{
|
||||
bool is_wakeup;
|
||||
|
||||
is_wakeup = is_wakeup_ef(enq_flags);
|
||||
boost_lat(p, taskc, cpuc, is_wakeup);
|
||||
taskc->vdeadline_delta_ns = (taskc->run_time_ns *
|
||||
LAVD_VDL_LOOSENESS_FT) / taskc->lat_cri;
|
||||
calc_lat_cri(p, taskc);
|
||||
calc_starv_cri(taskc, is_wakeup);
|
||||
taskc->vdeadline_delta_ns =
|
||||
(taskc->run_time_ns * LAVD_VDL_LOOSENESS_FT) /
|
||||
(taskc->lat_cri + taskc->starv_cri);
|
||||
}
|
||||
|
||||
static u64 calc_task_load_actual(struct task_ctx *taskc)
|
||||
@ -1139,6 +1128,12 @@ static u64 clamp_time_slice_ns(u64 slice)
|
||||
return slice;
|
||||
}
|
||||
|
||||
static s32 nr_queued_tasks(void)
|
||||
{
|
||||
return scx_bpf_dsq_nr_queued(LAVD_LATENCY_CRITICAL_DSQ) +
|
||||
scx_bpf_dsq_nr_queued(LAVD_REGULAR_DSQ);
|
||||
}
|
||||
|
||||
static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc)
|
||||
{
|
||||
@ -1149,7 +1144,7 @@ static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc,
|
||||
* The time slice should be short enough to schedule all runnable tasks
|
||||
* at least once within a targeted latency.
|
||||
*/
|
||||
nr_queued = scx_bpf_dsq_nr_queued(LAVD_GLOBAL_DSQ) + 1;
|
||||
nr_queued = nr_queued_tasks() + 1;
|
||||
slice = (LAVD_TARGETED_LATENCY_NS * stat_cur->nr_active) / nr_queued;
|
||||
|
||||
/*
|
||||
@ -1243,7 +1238,7 @@ static void advance_cur_logical_clk(struct task_ctx *taskc)
|
||||
* Advance the clock up to the task's deadline. When overloaded,
|
||||
* advance the clock slower so other can jump in the run queue.
|
||||
*/
|
||||
nr_queued = max(scx_bpf_dsq_nr_queued(LAVD_GLOBAL_DSQ), 1);
|
||||
nr_queued = max(nr_queued_tasks(), 1);
|
||||
delta = (vlc - clc) / nr_queued;
|
||||
new_clk = clc + delta;
|
||||
|
||||
@ -1257,7 +1252,7 @@ static void update_stat_for_running(struct task_struct *p,
|
||||
u64 wait_period, interval;
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
u64 load_actual_ft, wait_freq_ft, wake_freq_ft;
|
||||
u64 perf_cri_raw;
|
||||
u64 perf_cri, perf_cri_raw;
|
||||
|
||||
/*
|
||||
* Update the current logical clock.
|
||||
@ -1286,8 +1281,6 @@ static void update_stat_for_running(struct task_struct *p,
|
||||
*/
|
||||
if (cpuc->max_lat_cri < taskc->lat_cri)
|
||||
cpuc->max_lat_cri = taskc->lat_cri;
|
||||
if (cpuc->min_lat_cri > taskc->lat_cri)
|
||||
cpuc->min_lat_cri = taskc->lat_cri;
|
||||
cpuc->sum_lat_cri += taskc->lat_cri;
|
||||
cpuc->sched_nr++;
|
||||
|
||||
@ -1314,9 +1307,13 @@ static void update_stat_for_running(struct task_struct *p,
|
||||
load_actual_ft = calc_runtime_factor(taskc->load_actual);
|
||||
wait_freq_ft = calc_freq_factor(taskc->wait_freq);
|
||||
wake_freq_ft = calc_freq_factor(taskc->wake_freq);
|
||||
perf_cri_raw = load_actual_ft * p->scx.weight *
|
||||
wait_freq_ft * wake_freq_ft;
|
||||
taskc->perf_cri = log2_u64(perf_cri_raw + 1);
|
||||
|
||||
perf_cri_raw = load_actual_ft * p->scx.weight;
|
||||
perf_cri = log2_u64(perf_cri_raw + 1);
|
||||
perf_cri_raw = wait_freq_ft * wake_freq_ft * wake_freq_ft;
|
||||
perf_cri += log2_u64(perf_cri_raw + 1);
|
||||
|
||||
taskc->perf_cri = perf_cri;
|
||||
cpuc->sum_perf_cri += taskc->perf_cri;
|
||||
|
||||
/*
|
||||
@ -1386,7 +1383,7 @@ static void update_stat_for_quiescent(struct task_struct *p,
|
||||
}
|
||||
|
||||
static void calc_when_to_run(struct task_struct *p, struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc, u64 enq_flags)
|
||||
u64 enq_flags)
|
||||
{
|
||||
u64 vlc;
|
||||
|
||||
@ -1396,8 +1393,8 @@ static void calc_when_to_run(struct task_struct *p, struct task_ctx *taskc,
|
||||
* urgent it is - vdeadline_delta_ns - and when it becomes eligible if
|
||||
* overscheduled - eligible_time_ns.
|
||||
*/
|
||||
calc_virtual_deadline_delta(p, taskc, cpuc, enq_flags);
|
||||
calc_eligible_delta(p, taskc);
|
||||
calc_virtual_deadline_delta(p, taskc, enq_flags);
|
||||
calc_eligible_delta(taskc);
|
||||
|
||||
/*
|
||||
* Update the logical clock of the virtual deadline including
|
||||
@ -1681,7 +1678,7 @@ static bool try_yield_current_cpu(struct task_struct *p_run,
|
||||
prm_run.lat_cri = taskc_run->lat_cri;
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
bpf_for_each(scx_dsq, p_wait, LAVD_GLOBAL_DSQ, 0) {
|
||||
bpf_for_each(scx_dsq, p_wait, LAVD_LATENCY_CRITICAL_DSQ, 0) {
|
||||
taskc_wait = get_task_ctx(p_wait);
|
||||
if (!taskc_wait)
|
||||
break;
|
||||
@ -1706,7 +1703,7 @@ static bool try_yield_current_cpu(struct task_struct *p_run,
|
||||
}
|
||||
|
||||
/*
|
||||
* Test only the first entry on the LAVD_GLOBAL_DSQ.
|
||||
* Test only the first entry on the LAVD_LATENCY_CRITICAL_DSQ.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
@ -1715,11 +1712,19 @@ static bool try_yield_current_cpu(struct task_struct *p_run,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool is_lat_cri_task(struct task_ctx *taskc)
|
||||
{
|
||||
struct sys_stat *stat_cur = get_sys_stat_cur();
|
||||
|
||||
return taskc->lat_cri > stat_cur->avg_lat_cri;
|
||||
}
|
||||
|
||||
static void put_global_rq(struct task_struct *p, struct task_ctx *taskc,
|
||||
struct cpu_ctx *cpuc, u64 enq_flags)
|
||||
{
|
||||
struct task_ctx *taskc_run;
|
||||
struct task_struct *p_run;
|
||||
u64 dsq_id = LAVD_REGULAR_DSQ;
|
||||
|
||||
/*
|
||||
* Calculate when a tack can be scheduled.
|
||||
@ -1727,7 +1732,7 @@ static void put_global_rq(struct task_struct *p, struct task_ctx *taskc,
|
||||
* Note that the task's time slice will be calculated and reassigned
|
||||
* right before running at ops.running().
|
||||
*/
|
||||
calc_when_to_run(p, taskc, cpuc, enq_flags);
|
||||
calc_when_to_run(p, taskc, enq_flags);
|
||||
|
||||
/*
|
||||
* If a task is eligible, dispatch to the eligible DSQ.
|
||||
@ -1749,9 +1754,15 @@ static void put_global_rq(struct task_struct *p, struct task_ctx *taskc,
|
||||
}
|
||||
|
||||
/*
|
||||
* Enqueue the task to the eligible DSQ based on its virtual deadline.
|
||||
* Enqueue the task to one of the DSQs based on its virtual deadline.
|
||||
*
|
||||
* Note that, with no_2_level_scheduling, all tasks are considered
|
||||
* latency-critical and they're all enqueed to the
|
||||
* LAVD_LATENCY_CRITICAL_DSQ.
|
||||
*/
|
||||
scx_bpf_dispatch_vtime(p, LAVD_GLOBAL_DSQ, LAVD_SLICE_UNDECIDED,
|
||||
if (no_2_level_scheduling || is_lat_cri_task(taskc))
|
||||
dsq_id = LAVD_LATENCY_CRITICAL_DSQ;
|
||||
scx_bpf_dispatch_vtime(p, dsq_id, LAVD_SLICE_UNDECIDED,
|
||||
taskc->vdeadline_log_clk, enq_flags);
|
||||
return;
|
||||
}
|
||||
@ -1883,8 +1894,9 @@ s32 BPF_STRUCT_OPS(lavd_select_cpu, struct task_struct *p, s32 prev_cpu,
|
||||
return prev_cpu;
|
||||
|
||||
cpu_id = pick_cpu(p, taskc, prev_cpu, wake_flags, &found_idle);
|
||||
if (found_idle)
|
||||
if (found_idle) {
|
||||
return cpu_id;
|
||||
}
|
||||
|
||||
return prev_cpu;
|
||||
}
|
||||
@ -1926,16 +1938,63 @@ static bool use_full_cpus(void)
|
||||
((stat_cur->nr_active + LAVD_CC_NR_OVRFLW) >= nr_cpus_onln);
|
||||
}
|
||||
|
||||
static bool consume_lat_cri_task(u64 now)
|
||||
{
|
||||
if (scx_bpf_consume(LAVD_LATENCY_CRITICAL_DSQ)) {
|
||||
WRITE_ONCE(lat_cri_rq_clk, now);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool consume_regular_task(u64 now)
|
||||
{
|
||||
if (scx_bpf_consume(LAVD_REGULAR_DSQ)) {
|
||||
WRITE_ONCE(regular_rq_clk, now);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool consume_starving_task(u64 now)
|
||||
{
|
||||
u64 clk;
|
||||
|
||||
clk = READ_ONCE(lat_cri_rq_clk) + LAVD_DSQ_STARVE_TIMEOUT;
|
||||
if (clk < now && consume_lat_cri_task(now))
|
||||
return true;
|
||||
|
||||
clk = READ_ONCE(regular_rq_clk) + LAVD_DSQ_STARVE_TIMEOUT;
|
||||
if (clk < now && consume_regular_task(now))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool consume_task(u64 now)
|
||||
{
|
||||
if (!no_2_level_scheduling && consume_starving_task(now))
|
||||
return true;
|
||||
|
||||
if (consume_lat_cri_task(now))
|
||||
return true;
|
||||
|
||||
if (!no_2_level_scheduling && consume_regular_task(now))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)
|
||||
{
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
struct bpf_cpumask *active, *ovrflw;
|
||||
struct task_struct *p;
|
||||
bool ret = false;
|
||||
|
||||
/*
|
||||
* If all CPUs are using, directly consume without checking CPU masks.
|
||||
*/
|
||||
if (use_full_cpus()) {
|
||||
scx_bpf_consume(LAVD_GLOBAL_DSQ);
|
||||
consume_task(now);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1956,7 +2015,7 @@ void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)
|
||||
*/
|
||||
if (bpf_cpumask_test_cpu(cpu, cast_mask(active)) ||
|
||||
bpf_cpumask_test_cpu(cpu, cast_mask(ovrflw))) {
|
||||
scx_bpf_consume(LAVD_GLOBAL_DSQ);
|
||||
consume_task(now);
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
@ -1964,14 +2023,14 @@ void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)
|
||||
* If this CPU is not either in active or overflow CPUs, it tries to
|
||||
* find and run a task pinned to run on this CPU.
|
||||
*/
|
||||
bpf_for_each(scx_dsq, p, LAVD_GLOBAL_DSQ, 0) {
|
||||
bpf_for_each(scx_dsq, p, LAVD_LATENCY_CRITICAL_DSQ, 0) {
|
||||
/*
|
||||
* Prioritize kernel tasks because most kernel tasks are pinned
|
||||
* to a particular CPU and latency-critical (e.g., ksoftirqd,
|
||||
* kworker, etc).
|
||||
*/
|
||||
if (is_kernel_task(p)) {
|
||||
scx_bpf_consume(LAVD_GLOBAL_DSQ);
|
||||
ret = consume_task(now);
|
||||
bpf_cpumask_set_cpu(cpu, ovrflw);
|
||||
break;
|
||||
}
|
||||
@ -2003,7 +2062,7 @@ void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)
|
||||
* cores. We will optimize this path after introducing per-core
|
||||
* DSQ.
|
||||
*/
|
||||
scx_bpf_consume(LAVD_GLOBAL_DSQ);
|
||||
ret = consume_task(now);
|
||||
|
||||
/*
|
||||
* This is the first time a particular pinned user-space task
|
||||
@ -2019,7 +2078,36 @@ release_break:
|
||||
bpf_task_release(p);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* With no_2_level_scheduling, all tasks are considered
|
||||
* latency-critical, so we don't need to check the regular quque.
|
||||
*/
|
||||
if (no_2_level_scheduling || ret)
|
||||
goto unlock_out;
|
||||
|
||||
bpf_for_each(scx_dsq, p, LAVD_REGULAR_DSQ, 0) {
|
||||
if (is_kernel_task(p)) {
|
||||
consume_task(now);
|
||||
bpf_cpumask_set_cpu(cpu, ovrflw);
|
||||
break;
|
||||
}
|
||||
|
||||
p = bpf_task_from_pid(p->pid);
|
||||
if (!p)
|
||||
goto unlock_out;
|
||||
|
||||
if (bpf_cpumask_intersects(cast_mask(active), p->cpus_ptr) ||
|
||||
bpf_cpumask_intersects(cast_mask(ovrflw), p->cpus_ptr))
|
||||
goto release_break2;
|
||||
|
||||
consume_task(now);
|
||||
bpf_cpumask_set_cpu(cpu, ovrflw);
|
||||
release_break2:
|
||||
bpf_task_release(p);
|
||||
break;
|
||||
}
|
||||
|
||||
unlock_out:
|
||||
bpf_rcu_read_unlock();
|
||||
return;
|
||||
@ -2507,13 +2595,19 @@ s32 BPF_STRUCT_OPS(lavd_init_task, struct task_struct *p,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static s32 init_dsq(void)
|
||||
static s32 init_dsqs(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = scx_bpf_create_dsq(LAVD_GLOBAL_DSQ, -1);
|
||||
err = scx_bpf_create_dsq(LAVD_LATENCY_CRITICAL_DSQ, -1);
|
||||
if (err) {
|
||||
scx_bpf_error("Failed to create an eligible DSQ");
|
||||
scx_bpf_error("Failed to create a latency critical DSQ");
|
||||
return err;
|
||||
}
|
||||
|
||||
err = scx_bpf_create_dsq(LAVD_REGULAR_DSQ, -1);
|
||||
if (err) {
|
||||
scx_bpf_error("Failed to create a regular DSQ");
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -2658,9 +2752,9 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(lavd_init)
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Create a central task queue.
|
||||
* Create central task queues.
|
||||
*/
|
||||
err = init_dsq();
|
||||
err = init_dsqs();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -2719,6 +2813,6 @@ SCX_OPS_DEFINE(lavd_ops,
|
||||
.init_task = (void *)lavd_init_task,
|
||||
.init = (void *)lavd_init,
|
||||
.exit = (void *)lavd_exit,
|
||||
.flags = /* SCX_OPS_ENQ_LAST | */ SCX_OPS_KEEP_BUILTIN_IDLE,
|
||||
.flags = SCX_OPS_KEEP_BUILTIN_IDLE,
|
||||
.timeout_ms = 30000U,
|
||||
.name = "lavd");
|
||||
|
@ -60,11 +60,16 @@ struct Opts {
|
||||
#[clap(long = "prefer-smt-core", action = clap::ArgAction::SetTrue)]
|
||||
prefer_smt_core: bool,
|
||||
|
||||
/// Disable 2-level scheduling, which segregates latency-critical tasks from regular tasks
|
||||
#[clap(long = "no-2-level-scheduling", action = clap::ArgAction::SetTrue)]
|
||||
no_2_level_scheduling: bool,
|
||||
|
||||
/// Disable frequency scaling by scx_lavd
|
||||
#[clap(long = "no-freq-scaling", action = clap::ArgAction::SetTrue)]
|
||||
no_freq_scaling: bool,
|
||||
|
||||
/// The number of scheduling samples to be reported every second (default: 1)
|
||||
/// The number of scheduling samples to be reported every second
|
||||
/// (default: 1, 0 = disable logging)
|
||||
#[clap(short = 's', long, default_value = "1")]
|
||||
nr_sched_samples: u64,
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user