scx_layered: Add per layer timeslice

Allow setting a different timeslice per layer.

Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
This commit is contained in:
Daniel Hodges 2024-08-27 13:27:30 -07:00
parent 3219c6dd22
commit cc450f1a4b
3 changed files with 36 additions and 11 deletions

View File

@ -141,6 +141,7 @@ struct layer {
u64 min_exec_ns;
u64 max_exec_ns;
u64 yield_step_ns;
u64 slice_ns;
bool open;
bool preempt;
bool preempt_first;

View File

@ -569,6 +569,7 @@ s32 BPF_STRUCT_OPS(layered_select_cpu, struct task_struct *p, s32 prev_cpu, u64
if (cpu >= 0) {
lstat_inc(LSTAT_SEL_LOCAL, layer, cctx);
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0);
return cpu;
} else {
@ -658,6 +659,7 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
try_preempt_first = cctx->try_preempt_first;
cctx->try_preempt_first = false;
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
if (cctx->yielding) {
lstat_inc(LSTAT_YIELD, layer, cctx);
@ -677,8 +679,8 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
* Limit the amount of budget that an idling task can accumulate
* to one slice.
*/
if (vtime_before(vtime, layer->vtime_now - slice_ns))
vtime = layer->vtime_now - slice_ns;
if (vtime_before(vtime, layer->vtime_now - layer_slice_ns))
vtime = layer->vtime_now - layer_slice_ns;
/*
* Special-case per-cpu kthreads which aren't in a preempting layer so
@ -695,7 +697,7 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
!bpf_cpumask_test_cpu(task_cpu, layer_cpumask))
lstat_inc(LSTAT_AFFN_VIOL, layer, cctx);
scx_bpf_dispatch(p, HI_FALLBACK_DSQ, slice_ns, enq_flags);
scx_bpf_dispatch(p, HI_FALLBACK_DSQ, layer_slice_ns, enq_flags);
goto find_cpu;
}
@ -718,17 +720,17 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
* starvation. For now, we just dispatch all affinitized tasks
* to HI_FALLBACK_DSQ to avoid this starvation issue.
*/
scx_bpf_dispatch(p, HI_FALLBACK_DSQ, slice_ns, enq_flags);
scx_bpf_dispatch(p, HI_FALLBACK_DSQ, layer_slice_ns, enq_flags);
goto find_cpu;
}
if (disable_topology) {
scx_bpf_dispatch_vtime(p, tctx->layer, slice_ns, vtime, enq_flags);
scx_bpf_dispatch_vtime(p, tctx->layer, layer_slice_ns, vtime, enq_flags);
} else {
u32 llc_id = cpu_to_llc_id(tctx->last_cpu >= 0 ? tctx->last_cpu :
bpf_get_smp_processor_id());
idx = layer_dsq_id(layer->idx, llc_id);
scx_bpf_dispatch_vtime(p, idx, slice_ns, vtime, enq_flags);
scx_bpf_dispatch_vtime(p, idx, layer_slice_ns, vtime, enq_flags);
}
find_cpu:
@ -788,8 +790,9 @@ static bool keep_running(struct cpu_ctx *cctx, struct task_struct *p)
if (!(tctx = lookup_task_ctx(p)) || !(layer = lookup_layer(tctx->layer)))
goto no;
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
/* @p has fully consumed its slice and still wants to run */
cctx->ran_current_for += slice_ns;
cctx->ran_current_for += layer_slice_ns;
/*
* There wasn't anything in the local or global DSQ, but there may be
@ -814,6 +817,7 @@ static bool keep_running(struct cpu_ctx *cctx, struct task_struct *p)
*/
if (disable_topology) {
if (!scx_bpf_dsq_nr_queued(layer->idx)) {
p->scx.slice = layer_slice_ns;
lstat_inc(LSTAT_KEEP, layer, cctx);
return true;
}
@ -822,6 +826,7 @@ static bool keep_running(struct cpu_ctx *cctx, struct task_struct *p)
tctx->last_cpu :
bpf_get_smp_processor_id());
if (!scx_bpf_dsq_nr_queued(dsq_id)) {
p->scx.slice = layer_slice_ns;
lstat_inc(LSTAT_KEEP, layer, cctx);
return true;
}
@ -847,6 +852,7 @@ static bool keep_running(struct cpu_ctx *cctx, struct task_struct *p)
scx_bpf_put_idle_cpumask(idle_cpumask);
if (has_idle) {
p->scx.slice = layer_slice_ns;
lstat_inc(LSTAT_KEEP, layer, cctx);
return true;
}
@ -1288,10 +1294,11 @@ void BPF_STRUCT_OPS(layered_stopping, struct task_struct *p, bool runnable)
cctx->current_preempt = false;
cctx->prev_exclusive = cctx->current_exclusive;
cctx->current_exclusive = false;
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
/* scale the execution time by the inverse of the weight and charge */
if (cctx->yielding && used < slice_ns)
used = slice_ns;
if (cctx->yielding && used < layer_slice_ns)
used = layer_slice_ns;
p->scx.dsq_vtime += used * 100 / p->scx.weight;
cctx->maybe_idle = true;
}

View File

@ -93,6 +93,7 @@ lazy_static::lazy_static! {
preempt: false,
preempt_first: false,
exclusive: false,
slice_us: 20000,
perf: 1024,
nodes: vec![],
llcs: vec![],
@ -111,6 +112,7 @@ lazy_static::lazy_static! {
preempt: true,
preempt_first: false,
exclusive: true,
slice_us: 20000,
perf: 1024,
nodes: vec![],
llcs: vec![],
@ -128,6 +130,7 @@ lazy_static::lazy_static! {
preempt: false,
preempt_first: false,
exclusive: false,
slice_us: 20000,
perf: 1024,
nodes: vec![],
llcs: vec![],
@ -450,6 +453,8 @@ enum LayerKind {
#[serde(default)]
yield_ignore: f64,
#[serde(default)]
slice_us: u64,
#[serde(default)]
preempt: bool,
#[serde(default)]
preempt_first: bool,
@ -471,6 +476,8 @@ enum LayerKind {
#[serde(default)]
yield_ignore: f64,
#[serde(default)]
slice_us: u64,
#[serde(default)]
preempt: bool,
#[serde(default)]
preempt_first: bool,
@ -489,6 +496,8 @@ enum LayerKind {
#[serde(default)]
yield_ignore: f64,
#[serde(default)]
slice_us: u64,
#[serde(default)]
preempt: bool,
#[serde(default)]
preempt_first: bool,
@ -1464,6 +1473,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
preempt_first,
exclusive,
nodes,
slice_us,
..
}
| LayerKind::Grouped {
@ -1474,6 +1484,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
preempt_first,
exclusive,
nodes,
slice_us,
..
}
| LayerKind::Open {
@ -1484,15 +1495,21 @@ impl<'a, 'b> Scheduler<'a, 'b> {
preempt_first,
exclusive,
nodes,
slice_us,
..
} => {
layer.slice_ns = if *slice_us > 0 {
*slice_us * 1000
} else {
opts.slice_us * 1000
};
layer.min_exec_ns = min_exec_us * 1000;
layer.yield_step_ns = if *yield_ignore > 0.999 {
0
} else if *yield_ignore < 0.001 {
opts.slice_us * 1000
layer.slice_ns
} else {
((opts.slice_us * 1000) as f64 * (1.0 - *yield_ignore)) as u64
(layer.slice_ns as f64 * (1.0 - *yield_ignore)) as u64
};
layer.preempt.write(*preempt);
layer.preempt_first.write(*preempt_first);