Merge branch 'main' into layered-dispatch-local

This commit is contained in:
Daniel Hodges 2024-11-14 16:10:12 -05:00 committed by GitHub
commit 3a3a7d71ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 145 additions and 75 deletions

View File

@ -15,7 +15,19 @@ static __always_inline int fallback_dsq_cost_id(u64 fallback_dsq)
scx_bpf_error("invalid fallback dsq");
return 0;
}
return (int)fallback_dsq - HI_FALLBACK_DSQ_BASE;
return nr_layers + (int)fallback_dsq - HI_FALLBACK_DSQ_BASE;
}
/*
* Converts a llc DSQ to a cost id for accessing a cost struct.
*/
static __always_inline int fallback_llc_cost_id(int fallback_llc)
{
if (fallback_llc > MAX_LLCS) {
scx_bpf_error("invalid fallback llc");
return 0;
}
return nr_layers + fallback_llc;
}
/*
@ -24,16 +36,8 @@ static __always_inline int fallback_dsq_cost_id(u64 fallback_dsq)
static u64 budget_id_to_fallback_dsq(u32 budget_id)
{
if (budget_id == MAX_GLOBAL_BUDGETS)
return LO_FALLBACK_DSQ;
return HI_FALLBACK_DSQ_BASE + budget_id;
}
/*
* Returns true if the cost has preferred fallback DSQ budget
*/
static bool has_pref_fallback_budget(struct cost *costc)
{
return costc->pref_budget > nr_layers && costc->pref_budget <= MAX_GLOBAL_BUDGETS;
return (u64)LO_FALLBACK_DSQ;
return (u64)HI_FALLBACK_DSQ_BASE + (u64)budget_id - nr_layers;
}
/*
@ -125,7 +129,7 @@ static struct cost *initialize_cost(u32 cost_idx, u32 parent_idx,
static __noinline void initialize_budget(struct cost *costc, u32 budget_id,
s64 capacity)
{
if (budget_id >= MAX_GLOBAL_BUDGETS) {
if (budget_id > MAX_GLOBAL_BUDGETS) {
scx_bpf_error("invalid budget id %d", budget_id);
return;
}
@ -140,11 +144,10 @@ static void calc_preferred_cost(struct cost *costc)
{
u32 layer_id, id, budget_id, pref_budget = 0, max_layer = 0;
s64 max_budget = 0;
u64 dsq_id;
u32 rotation = bpf_get_smp_processor_id() % nr_layers;
bpf_for(id, 0, nr_layers) {
/*
/*
* If there is two equally weighted layers that have the same
* budget we rely on rotating the layers based on the cpu. This
* may not work well on low core machines.
@ -157,14 +160,16 @@ static void calc_preferred_cost(struct cost *costc)
if (costc->budget[layer_id] > max_budget) {
max_budget = costc->budget[layer_id];
max_layer = layer_id;
pref_budget = max_layer;
pref_budget = layer_id;
}
}
// Hi fallback DSQs
bpf_for(id, 0, nr_llcs) {
if (costc->budget[id] > max_budget) {
max_budget = costc->budget[id];
pref_budget = id;
budget_id = fallback_llc_cost_id(id);
if (costc->budget[budget_id] >= max_budget) {
max_budget = costc->budget[budget_id];
pref_budget = budget_id;
trace("COST pref fallback %d", budget_id);
}
}
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
@ -178,34 +183,85 @@ static void calc_preferred_cost(struct cost *costc)
costc->pref_layer = max_layer;
costc->pref_budget = pref_budget;
if (costc->idx == 0 && pref_budget > nr_layers)
trace("COST pref_layer %d pref_budget %d budget %lld",
max_layer, pref_budget, costc->budget[pref_budget]);
return;
}
/*
* Returns true if the cost has preferred fallback DSQ budget
*/
static bool has_pref_fallback_budget(struct cost *costc)
{
return costc->pref_budget >= nr_layers &&
costc->pref_budget < MAX_GLOBAL_BUDGETS;
}
/*
* Returns if a budget is allowed to preempt another budget. In general if the
* preempting budget is greater than the running budget then it is allowed to
* preempt.
*/
static __always_inline bool has_preempt_budget(struct cost *costc,
u32 cur_budget, u32 budget_id)
{
if (cur_budget >= MAX_GLOBAL_BUDGETS ||
budget_id >= MAX_GLOBAL_BUDGETS)
return false;
/*
* Fallback DSQs are always allowed to preempt
*/
if (budget_id > nr_layers)
return true;
return costc->budget[budget_id] > costc->budget[cur_budget];
}
/*
* Refreshes the budget of a cost.
*/
int refresh_budget(int cost_id)
{
struct cost *costc;
s64 capacity;
if (!(costc = lookup_cost(cost_id))) {
scx_bpf_error("failed to lookup cost %d", cost_id);
return 0;
}
u32 layer_id, id;
u32 budget_id, id;
u32 rotation = bpf_get_smp_processor_id() % nr_layers;
bpf_for(id, 0, nr_layers) {
layer_id = rotate_layer_id(id, rotation);
if (layer_id > nr_layers) {
scx_bpf_error("invalid layer");
budget_id = rotate_layer_id(id, rotation);
if (budget_id > nr_layers) {
scx_bpf_error("invalid budget id");
return 0;
}
s64 capacity = costc->capacity[layer_id];
__sync_lock_test_and_set(MEMBER_VPTR(*costc, .budget[layer_id]),
capacity = costc->capacity[budget_id];
__sync_lock_test_and_set(MEMBER_VPTR(*costc, .budget[budget_id]),
capacity);
}
// Hi fallback DSQs
bpf_for(id, 0, nr_llcs) {
budget_id = fallback_llc_cost_id(id);
capacity = costc->capacity[budget_id];
__sync_lock_test_and_set(MEMBER_VPTR(*costc, .budget[budget_id]),
capacity);
}
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
if (budget_id > MAX_GLOBAL_BUDGETS) {
scx_bpf_error("invalid budget");
return 0;
}
capacity = costc->capacity[budget_id];
__sync_lock_test_and_set(MEMBER_VPTR(*costc, .budget[budget_id]),
capacity);
trace("COST refreshed budget %d", cost_id);
return 0;
}
@ -223,11 +279,11 @@ int refresh_budgets(void)
/*
* Acquires a budget from a parent cost account.
*/
s64 acquire_budget(struct cost *costc, u32 layer_id, s64 amount)
s64 acquire_budget(struct cost *costc, u32 budget_id, s64 amount)
{
s64 budget = 0;
if (layer_id >= MAX_LAYERS || layer_id < 0) {
if (budget_id >= MAX_GLOBAL_BUDGETS) {
scx_bpf_error("invalid parent cost");
return budget;
}
@ -235,14 +291,15 @@ s64 acquire_budget(struct cost *costc, u32 layer_id, s64 amount)
if (!costc || !costc->has_parent)
return budget;
struct cost *parent_cost;
if (!(parent_cost = lookup_cost(costc->idx)))
if (!(parent_cost = lookup_cost(costc->idx))) {
scx_bpf_error("failed to find parent");
return budget;
}
__sync_fetch_and_sub(&parent_cost->budget[layer_id], amount);
__sync_fetch_and_sub(&parent_cost->budget[budget_id], amount);
if (parent_cost->budget[layer_id] < 0)
if (parent_cost->budget[budget_id] <= 0)
refresh_budgets();
return amount;
@ -253,7 +310,7 @@ s64 acquire_budget(struct cost *costc, u32 layer_id, s64 amount)
* acquire budget by either retrieving budget from the global context or
* refreshing all budgets.
*/
int record_cpu_cost(struct cost *costc, u32 budget_id, s64 amount)
int record_cpu_cost(struct cost *costc, u32 budget_id, s64 amount, u64 slice_ns)
{
if (budget_id > MAX_GLOBAL_BUDGETS || !costc) {
scx_bpf_error("invalid budget %d", budget_id);
@ -262,10 +319,11 @@ int record_cpu_cost(struct cost *costc, u32 budget_id, s64 amount)
__sync_fetch_and_sub(&costc->budget[budget_id], amount);
if (costc->budget[budget_id] <= 0) {
if (costc->budget[budget_id] <= 0 ||
costc->budget[budget_id] < slice_ns) {
if (costc->has_parent) {
s64 budget = acquire_budget(costc, budget_id,
costc->capacity[budget_id] + amount);
s64 req_budget = costc->capacity[budget_id] - costc->budget[budget_id];
s64 budget = acquire_budget(costc, budget_id, req_budget);
if (budget > 0) {
__sync_fetch_and_add(&costc->budget[budget_id],
costc->capacity[budget_id]);
@ -296,7 +354,7 @@ __weak int has_budget(struct cost *costc, struct layer *layer)
s64 budget = *MEMBER_VPTR(*costc, .budget[layer_id]);
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
if (budget > layer_slice_ns)
if (budget >= layer_slice_ns)
return slice_ns;
return 0;
@ -310,7 +368,7 @@ static void initialize_budgets(u64 refresh_intvl_ns)
struct layer *layer;
struct cost *costc, *global_costc;
int layer_id, llc_id;
u64 dsq_id, layer_weight_dur, layer_weight_sum = 0;
u64 layer_weight_dur, layer_weight_sum = 0;
s32 cpu;
u32 budget_id;
@ -339,7 +397,7 @@ static void initialize_budgets(u64 refresh_intvl_ns)
return;
}
layer_weight_dur = (layer->weight * ((u64)refresh_intvl_ns * slice_ns * nr_possible_cpus)) /
layer_weight_dur = (layer->weight * ((u64)refresh_intvl_ns * nr_possible_cpus)) /
layer_weight_sum;
initialize_budget(global_costc, layer_id, (s64)layer_weight_dur);
trace("COST GLOBAL[%d][%s] budget %lld",
@ -354,7 +412,7 @@ static void initialize_budgets(u64 refresh_intvl_ns)
scx_bpf_error("failed to cpu budget: %d", cpu);
return;
}
layer_weight_dur = (layer->weight * slice_ns * refresh_intvl_ns) /
layer_weight_dur = (layer->weight * refresh_intvl_ns) /
layer_weight_sum;
initialize_budget(costc, layer_id, (s64)layer_weight_dur);
if (cpu == 0)
@ -367,16 +425,15 @@ static void initialize_budgets(u64 refresh_intvl_ns)
* XXX: since any task from any layer can get kicked to the fallback
* DSQ we use the default slice to calculate the default budget.
*/
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_sum;
initialize_budget(global_costc, fallback_dsq_cost_id(LO_FALLBACK_DSQ),
(s64)layer_weight_dur);
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
initialize_budget(global_costc, budget_id, (s64)layer_weight_dur);
bpf_for(llc_id, 0, nr_llcs) {
dsq_id = llc_hi_fallback_dsq_id(llc_id);
budget_id = fallback_dsq_cost_id(dsq_id);
budget_id = fallback_llc_cost_id(llc_id);
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_sum;
initialize_budget(global_costc, budget_id, (s64)layer_weight_dur);
@ -390,14 +447,14 @@ static void initialize_budgets(u64 refresh_intvl_ns)
// On first iteration always setup the lo fallback dsq budget.
if (llc_id == 0) {
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns) /
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * refresh_intvl_ns) /
layer_weight_sum;
initialize_budget(costc, budget_id,
(s64)layer_weight_dur);
initialize_budget(costc, budget_id, (s64)layer_weight_dur);
}
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns) /
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * refresh_intvl_ns) /
layer_weight_sum;
budget_id = fallback_llc_cost_id(llc_id);
initialize_budget(costc, budget_id, (s64)layer_weight_dur);
if (cpu == 0 && llc_id == 0 && budget_id < MAX_GLOBAL_BUDGETS)
trace("COST CPU DSQ[%d][%d] budget %lld",

View File

@ -15,8 +15,8 @@
enum cost_consts {
COST_GLOBAL_KEY = 0,
HI_FALLBACK_DSQ_WEIGHT = 50,
LO_FALLBACK_DSQ_WEIGHT = 10,
HI_FALLBACK_DSQ_WEIGHT = 95,
LO_FALLBACK_DSQ_WEIGHT = 85,
/*
* Max global budgets map fallback DSQs (per LLC) as well as layers.

View File

@ -93,6 +93,11 @@ static struct layer *lookup_layer(int idx)
return &layers[idx];
}
static __always_inline u64 layer_slice_ns(struct layer *layer)
{
return layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
}
static __always_inline
int rotate_layer_id(u32 base_layer_id, u32 rotation)
{
@ -777,8 +782,8 @@ s32 BPF_STRUCT_OPS(layered_select_cpu, struct task_struct *p, s32 prev_cpu, u64
if (cpu >= 0) {
lstat_inc(LSTAT_SEL_LOCAL, layer, cctx);
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, layer_slice_ns, 0);
u64 slice_ns = layer_slice_ns(layer);
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0);
return cpu;
} else {
return prev_cpu;
@ -818,8 +823,12 @@ bool try_preempt_cpu(s32 cand, struct task_struct *p, struct cpu_ctx *cctx,
if (!(cand_cctx = lookup_cpu_ctx(cand)) || cand_cctx->current_preempt)
return false;
if (!(costc = lookup_cpu_cost(cand)) || has_budget(costc, layer) == 0)
if (!(costc = lookup_cpu_cost(cand)) ||
has_budget(costc, layer) == 0 ||
!has_preempt_budget(costc, cand_cctx->layer_idx, tctx->layer)) {
trace("COST layer %s not enough budget to preempt", layer->name);
return false;
}
/*
* If exclusive, we want to make sure the sibling CPU, if there's
@ -1067,7 +1076,7 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
try_preempt_first = cctx->try_preempt_first;
cctx->try_preempt_first = false;
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
u64 slice_ns = layer_slice_ns(layer);
if (cctx->yielding) {
lstat_inc(LSTAT_YIELD, layer, cctx);
@ -1087,8 +1096,8 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
* Limit the amount of budget that an idling task can accumulate
* to one slice.
*/
if (vtime_before(vtime, layer->vtime_now - layer_slice_ns))
vtime = layer->vtime_now - layer_slice_ns;
if (vtime_before(vtime, layer->vtime_now - slice_ns))
vtime = layer->vtime_now - slice_ns;
/*
* Special-case per-cpu kthreads which aren't in a preempting layer so
@ -1139,13 +1148,13 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
if (disable_topology) {
tctx->last_dsq = tctx->layer;
scx_bpf_dispatch_vtime(p, tctx->layer, layer_slice_ns, vtime, enq_flags);
scx_bpf_dispatch_vtime(p, tctx->layer, slice_ns, vtime, enq_flags);
} else {
u32 llc_id = cpu_to_llc_id(tctx->last_cpu >= 0 ? tctx->last_cpu :
bpf_get_smp_processor_id());
idx = layer_dsq_id(layer->idx, llc_id);
tctx->last_dsq = idx;
scx_bpf_dispatch_vtime(p, idx, layer_slice_ns, vtime, enq_flags);
scx_bpf_dispatch_vtime(p, idx, slice_ns, vtime, enq_flags);
}
preempt:
@ -1167,9 +1176,9 @@ static bool keep_running(struct cpu_ctx *cctx, struct task_struct *p)
if (!(tctx = lookup_task_ctx(p)) || !(layer = lookup_layer(tctx->layer)))
goto no;
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
u64 slice_ns = layer_slice_ns(layer);
/* @p has fully consumed its slice and still wants to run */
cctx->ran_current_for += layer_slice_ns;
cctx->ran_current_for += slice_ns;
/*
* There wasn't anything in the local or global DSQ, but there may be
@ -1194,7 +1203,7 @@ static bool keep_running(struct cpu_ctx *cctx, struct task_struct *p)
*/
if (disable_topology) {
if (!scx_bpf_dsq_nr_queued(layer->idx)) {
p->scx.slice = layer_slice_ns;
p->scx.slice = slice_ns;
lstat_inc(LSTAT_KEEP, layer, cctx);
return true;
}
@ -1203,7 +1212,7 @@ static bool keep_running(struct cpu_ctx *cctx, struct task_struct *p)
tctx->last_cpu :
bpf_get_smp_processor_id());
if (!scx_bpf_dsq_nr_queued(dsq_id)) {
p->scx.slice = layer_slice_ns;
p->scx.slice = slice_ns;
lstat_inc(LSTAT_KEEP, layer, cctx);
return true;
}
@ -1230,7 +1239,7 @@ static bool keep_running(struct cpu_ctx *cctx, struct task_struct *p)
scx_bpf_put_idle_cpumask(idle_cpumask);
if (has_idle) {
p->scx.slice = layer_slice_ns;
p->scx.slice = slice_ns;
lstat_inc(LSTAT_KEEP, layer, cctx);
return true;
}
@ -1672,11 +1681,14 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
u32 my_llc_id = cpu_to_llc_id(cpu);
/*
* If one of the fallback DSQs has the most budget then consume from
* it to prevent starvation.
* If one of the fallback DSQs has the most budget then consume from it
* to prevent starvation.
*/
if (has_pref_fallback_budget(costc)) {
dsq_id = budget_id_to_fallback_dsq(costc->pref_budget);
trace("COST consuming fallback %lld", dsq_id);
if (dsq_id > LO_FALLBACK_DSQ)
scx_bpf_error("invalid fallback dsq %lld", dsq_id);
if (scx_bpf_consume(dsq_id))
return;
}
@ -2126,17 +2138,18 @@ void BPF_STRUCT_OPS(layered_stopping, struct task_struct *p, bool runnable)
} else {
budget_id = layer->idx;
}
record_cpu_cost(costc, budget_id, (s64)used);
u64 slice_ns = layer_slice_ns(layer);
record_cpu_cost(costc, budget_id, (s64)used, slice_ns);
cctx->layer_cycles[lidx] += used;
cctx->current_preempt = false;
cctx->prev_exclusive = cctx->current_exclusive;
cctx->current_exclusive = false;
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
/* scale the execution time by the inverse of the weight and charge */
if (cctx->yielding && used < layer_slice_ns)
used = layer_slice_ns;
if (cctx->yielding && used < slice_ns)
used = slice_ns;
p->scx.dsq_vtime += used * 100 / p->scx.weight;
cctx->maybe_idle = true;
}
@ -2347,19 +2360,19 @@ int dump_cost(void)
bpf_for(i, 0, nr_llcs) {
u64 dsq_id = llc_hi_fallback_dsq_id(i);
u32 budget_id = fallback_dsq_cost_id(dsq_id);
scx_bpf_dump("COST FALLBACK[%d][%d] budget=%lld capacity=%lld\n",
scx_bpf_dump("COST FALLBACK[%llu][%d] budget=%lld capacity=%lld\n",
dsq_id, budget_id,
costc->budget[budget_id], costc->capacity[budget_id]);
}
// Per CPU costs
bpf_for(i, 0, nr_possible_cpus) {
if (!(costc = lookup_cpu_cost(j))) {
if (!(costc = lookup_cpu_cost(i))) {
scx_bpf_error("unabled to lookup layer %d", i);
continue;
}
bpf_for(j, 0, nr_layers) {
layer = lookup_layer(i);
layer = lookup_layer(j);
if (!layer) {
scx_bpf_error("unabled to lookup layer %d", i);
continue;
@ -2373,8 +2386,8 @@ int dump_cost(void)
u32 budget_id = fallback_dsq_cost_id(dsq_id);
if (budget_id >= MAX_GLOBAL_BUDGETS)
continue;
scx_bpf_dump("COST CPU[%d]FALLBACK[%d][%d] budget=%lld capacity=%lld\n",
i, j, dsq_id, budget_id,
scx_bpf_dump("COST CPU[%d]FALLBACK[%llu][%d] budget=%lld capacity=%lld\n",
i, dsq_id, budget_id,
costc->budget[budget_id], costc->capacity[budget_id]);
}
}
@ -2793,7 +2806,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
}
}
}
initialize_budgets(1000LLU * NSEC_PER_MSEC);
initialize_budgets(15LLU * NSEC_PER_SEC);
ret = start_layered_timers();
if (ret < 0)
return ret;