mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-24 11:50:23 +00:00
scx_layered: Add weighted layer DSQ iteration
Add a flag to control DSQ iteration across layers by layer weight. This helps prevent starvation by iterating over layers with the lowest weight first. Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
This commit is contained in:
parent
bd75ac8dbf
commit
f3b3d4f19c
@ -32,8 +32,10 @@ const volatile u32 nr_llcs = 32; /* !0 for veristat, set during init */
|
||||
const volatile bool smt_enabled = true;
|
||||
const volatile bool disable_topology = false;
|
||||
const volatile bool xnuma_preemption = false;
|
||||
const volatile bool layer_weight_dsq_iter = false;
|
||||
const volatile s32 __sibling_cpu[MAX_CPUS];
|
||||
const volatile unsigned char all_cpus[MAX_CPUS_U8];
|
||||
const volatile u32 layer_iteration_order[MAX_LAYERS];
|
||||
|
||||
private(all_cpumask) struct bpf_cpumask __kptr *all_cpumask;
|
||||
struct layer layers[MAX_LAYERS];
|
||||
@ -79,6 +81,12 @@ static u32 cpu_ctx_layer_idx_inc(struct cpu_ctx *cctx)
|
||||
return cctx->layer_idx;
|
||||
}
|
||||
|
||||
// Returns the iterator index of a layer ordered by weight.
|
||||
static u32 iter_layer_weight_ctx(int idx)
|
||||
{
|
||||
return *MEMBER_VPTR(layer_iteration_order, [idx]);
|
||||
}
|
||||
|
||||
static __noinline u32 iter_layer_cpu_ctx(u32 layer_idx, int idx)
|
||||
{
|
||||
u32 offset;
|
||||
@ -1160,14 +1168,16 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
|
||||
|
||||
/* consume preempting layers first */
|
||||
bpf_for(idx, 0, nr_layers) {
|
||||
layer_idx = iter_layer_cpu_ctx(cctx->layer_idx, idx);
|
||||
if (disable_topology) {
|
||||
if (MEMBER_VPTR(layers, [layer_idx].preempt) && scx_bpf_consume(layer_idx))
|
||||
if (MEMBER_VPTR(layers, [idx].preempt) && scx_bpf_consume(idx))
|
||||
return;
|
||||
} else {
|
||||
layer_idx = layer_weight_dsq_iter ? iter_layer_weight_ctx(idx) :
|
||||
iter_layer_cpu_ctx(cctx->layer_idx, idx);
|
||||
bpf_for(llc_id, 0, nr_llcs) {
|
||||
dsq_id = layer_dsq_id(layer_idx, llc_id);
|
||||
if (MEMBER_VPTR(layers, [layer_idx].preempt) && scx_bpf_consume(dsq_id))
|
||||
if (MEMBER_VPTR(layers, [layer_idx].preempt) &&
|
||||
scx_bpf_consume(dsq_id))
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -1179,21 +1189,23 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
|
||||
|
||||
/* consume !open layers second */
|
||||
bpf_for(idx, 0, nr_layers) {
|
||||
layer_idx = iter_layer_cpu_ctx(cctx->layer_idx, idx);
|
||||
if (disable_topology) {
|
||||
struct layer *layer = &layers[layer_idx];
|
||||
layer_idx = idx;
|
||||
struct layer *layer = &layers[idx];
|
||||
struct cpumask *layer_cpumask;
|
||||
|
||||
/* consume matching layers */
|
||||
if (!(layer_cpumask = lookup_layer_cpumask(layer_idx)))
|
||||
if (!(layer_cpumask = lookup_layer_cpumask(idx)))
|
||||
return;
|
||||
|
||||
if (bpf_cpumask_test_cpu(cpu, layer_cpumask) ||
|
||||
(cpu == fallback_cpu && layer->nr_cpus == 0)) {
|
||||
if (scx_bpf_consume(layer_idx))
|
||||
if (scx_bpf_consume(idx))
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
layer_idx = layer_weight_dsq_iter ? iter_layer_weight_ctx(idx) :
|
||||
iter_layer_cpu_ctx(cctx->layer_idx, idx);
|
||||
bpf_for(llc_id, 0, nr_llcs) {
|
||||
struct layer *layer = &layers[layer_idx];
|
||||
struct cpumask *layer_cpumask;
|
||||
@ -1205,7 +1217,7 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
|
||||
|
||||
if (bpf_cpumask_test_cpu(cpu, layer_cpumask) ||
|
||||
(cpu <= nr_possible_cpus && cpu == fallback_cpu &&
|
||||
layer->nr_cpus == 0)) {
|
||||
MEMBER_VPTR(layer, ->nr_cpus) == 0)) {
|
||||
if (scx_bpf_consume(dsq_id))
|
||||
return;
|
||||
}
|
||||
@ -1215,12 +1227,13 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
|
||||
|
||||
/* consume !preempting open layers */
|
||||
bpf_for(idx, 0, nr_layers) {
|
||||
layer_idx = iter_layer_cpu_ctx(cctx->layer_idx, idx);
|
||||
if (disable_topology) {
|
||||
if (!layers[layer_idx].preempt && layers[layer_idx].open &&
|
||||
scx_bpf_consume(layer_idx))
|
||||
if (!layers[idx].preempt && layers[idx].open &&
|
||||
scx_bpf_consume(idx))
|
||||
return;
|
||||
} else {
|
||||
layer_idx = layer_weight_dsq_iter ? iter_layer_weight_ctx(idx) :
|
||||
iter_layer_cpu_ctx(cctx->layer_idx, idx);
|
||||
bpf_for(llc_id, 0, nr_llcs) {
|
||||
dsq_id = layer_dsq_id(layer_idx, llc_id);
|
||||
|
||||
@ -1885,6 +1898,14 @@ void BPF_STRUCT_OPS(layered_dump, struct scx_dump_ctx *dctx)
|
||||
dsq_first_runnable_for_ms(LO_FALLBACK_DSQ, now));
|
||||
}
|
||||
|
||||
static void print_iter_order() {
|
||||
int i;
|
||||
|
||||
bpf_for(i, 0, nr_layers) {
|
||||
trace("ITER order i: %d %d\n", i, *MEMBER_VPTR(layer_iteration_order, [i]));
|
||||
}
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
@ -2059,6 +2080,8 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
|
||||
}
|
||||
}
|
||||
|
||||
print_iter_order();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -414,6 +414,12 @@ struct Opts {
|
||||
#[clap(long, default_value = "0.0")]
|
||||
layer_growth_weight_disable: f64,
|
||||
|
||||
/// When iterating over layer DSQs use the weight of the layer for iteration
|
||||
/// order. The default iteration order is semi-random except when topology
|
||||
/// awareness is disabled.
|
||||
#[clap(long)]
|
||||
layer_weight_dsq_iter: bool,
|
||||
|
||||
/// Enable stats monitoring with the specified interval.
|
||||
#[clap(long)]
|
||||
stats: Option<f64>,
|
||||
@ -1555,6 +1561,9 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
skel.maps.rodata_data.nr_layers = specs.len() as u32;
|
||||
let mut perf_set = false;
|
||||
|
||||
let mut layer_iteration_order = (0..specs.len()).collect::<Vec<_>>();
|
||||
let mut layer_weights: Vec<usize> = vec![];
|
||||
|
||||
for (spec_i, spec) in specs.iter().enumerate() {
|
||||
let layer = &mut skel.maps.bss_data.layers[spec_i];
|
||||
|
||||
@ -1675,6 +1684,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
} else {
|
||||
DEFAULT_LAYER_WEIGHT
|
||||
};
|
||||
layer_weights.push(layer.weight.try_into().unwrap());
|
||||
layer.perf = u32::try_from(*perf)?;
|
||||
layer.node_mask = nodemask_from_nodes(nodes) as u64;
|
||||
for topo_node in topo.nodes() {
|
||||
@ -1696,6 +1706,11 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
perf_set |= layer.perf > 0;
|
||||
}
|
||||
|
||||
layer_iteration_order.sort_by(|i, j| layer_weights[*i].cmp(&layer_weights[*j]));
|
||||
for (idx, layer_idx) in layer_iteration_order.iter().enumerate() {
|
||||
skel.maps.rodata_data.layer_iteration_order[idx] = *layer_idx as u32;
|
||||
}
|
||||
|
||||
if perf_set && !compat::ksym_exists("scx_bpf_cpuperf_set")? {
|
||||
warn!("cpufreq support not available, ignoring perf configurations");
|
||||
}
|
||||
@ -1775,6 +1790,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
skel.maps.rodata_data.smt_enabled = cpu_pool.nr_cpus > cpu_pool.nr_cores;
|
||||
skel.maps.rodata_data.disable_topology = opts.disable_topology;
|
||||
skel.maps.rodata_data.xnuma_preemption = opts.xnuma_preemption;
|
||||
skel.maps.rodata_data.layer_weight_dsq_iter = opts.layer_weight_dsq_iter;
|
||||
for (cpu, sib) in cpu_pool.sibling_cpu.iter().enumerate() {
|
||||
skel.maps.rodata_data.__sibling_cpu[cpu] = *sib;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user