Merge pull request #923 from hodgesds/layered-dsq-preempt-fix

scx_layered: Fix cost accounting for fallback dsqs
This commit is contained in:
Daniel Hodges 2024-11-14 13:47:11 +00:00 committed by GitHub
commit db46e27651
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 124 additions and 59 deletions

View File

@ -15,7 +15,19 @@ static __always_inline int fallback_dsq_cost_id(u64 fallback_dsq)
scx_bpf_error("invalid fallback dsq");
return 0;
}
return (int)fallback_dsq - HI_FALLBACK_DSQ_BASE;
return nr_layers + (int)fallback_dsq - HI_FALLBACK_DSQ_BASE;
}
/*
* Converts a llc DSQ to a cost id for accessing a cost struct.
*/
static __always_inline int fallback_llc_cost_id(int fallback_llc)
{
if (fallback_llc > MAX_LLCS) {
scx_bpf_error("invalid fallback llc");
return 0;
}
return nr_layers + fallback_llc;
}
/*
@ -24,16 +36,8 @@ static __always_inline int fallback_dsq_cost_id(u64 fallback_dsq)
static u64 budget_id_to_fallback_dsq(u32 budget_id)
{
if (budget_id == MAX_GLOBAL_BUDGETS)
return LO_FALLBACK_DSQ;
return HI_FALLBACK_DSQ_BASE + budget_id;
}
/*
* Returns true if the cost has preferred fallback DSQ budget
*/
static bool has_pref_fallback_budget(struct cost *costc)
{
return costc->pref_budget > nr_layers && costc->pref_budget <= MAX_GLOBAL_BUDGETS;
return (u64)LO_FALLBACK_DSQ;
return (u64)HI_FALLBACK_DSQ_BASE + (u64)budget_id - nr_layers;
}
/*
@ -125,7 +129,7 @@ static struct cost *initialize_cost(u32 cost_idx, u32 parent_idx,
static __noinline void initialize_budget(struct cost *costc, u32 budget_id,
s64 capacity)
{
if (budget_id >= MAX_GLOBAL_BUDGETS) {
if (budget_id > MAX_GLOBAL_BUDGETS) {
scx_bpf_error("invalid budget id %d", budget_id);
return;
}
@ -140,11 +144,10 @@ static void calc_preferred_cost(struct cost *costc)
{
u32 layer_id, id, budget_id, pref_budget = 0, max_layer = 0;
s64 max_budget = 0;
u64 dsq_id;
u32 rotation = bpf_get_smp_processor_id() % nr_layers;
bpf_for(id, 0, nr_layers) {
/*
/*
* If there is two equally weighted layers that have the same
* budget we rely on rotating the layers based on the cpu. This
* may not work well on low core machines.
@ -157,14 +160,16 @@ static void calc_preferred_cost(struct cost *costc)
if (costc->budget[layer_id] > max_budget) {
max_budget = costc->budget[layer_id];
max_layer = layer_id;
pref_budget = max_layer;
pref_budget = layer_id;
}
}
// Hi fallback DSQs
bpf_for(id, 0, nr_llcs) {
if (costc->budget[id] > max_budget) {
max_budget = costc->budget[id];
pref_budget = id;
budget_id = fallback_llc_cost_id(id);
if (costc->budget[budget_id] >= max_budget) {
max_budget = costc->budget[budget_id];
pref_budget = budget_id;
trace("COST pref fallback %d", budget_id);
}
}
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
@ -178,34 +183,85 @@ static void calc_preferred_cost(struct cost *costc)
costc->pref_layer = max_layer;
costc->pref_budget = pref_budget;
if (costc->idx == 0 && pref_budget > nr_layers)
trace("COST pref_layer %d pref_budget %d budget %lld",
max_layer, pref_budget, costc->budget[pref_budget]);
return;
}
/*
* Returns true if the cost has preferred fallback DSQ budget
*/
static bool has_pref_fallback_budget(struct cost *costc)
{
return costc->pref_budget >= nr_layers &&
costc->pref_budget < MAX_GLOBAL_BUDGETS;
}
/*
* Returns if a budget is allowed to preempt another budget. In general if the
* preempting budget is greater than the running budget then it is allowed to
* preempt.
*/
static __always_inline bool has_preempt_budget(struct cost *costc,
u32 cur_budget, u32 budget_id)
{
if (cur_budget >= MAX_GLOBAL_BUDGETS ||
budget_id >= MAX_GLOBAL_BUDGETS)
return false;
/*
* Fallback DSQs are always allowed to preempt
*/
if (budget_id > nr_layers)
return true;
return costc->budget[budget_id] > costc->budget[cur_budget];
}
/*
* Refreshes the budget of a cost.
*/
int refresh_budget(int cost_id)
{
struct cost *costc;
s64 capacity;
if (!(costc = lookup_cost(cost_id))) {
scx_bpf_error("failed to lookup cost %d", cost_id);
return 0;
}
u32 layer_id, id;
u32 budget_id, id;
u32 rotation = bpf_get_smp_processor_id() % nr_layers;
bpf_for(id, 0, nr_layers) {
layer_id = rotate_layer_id(id, rotation);
if (layer_id > nr_layers) {
scx_bpf_error("invalid layer");
budget_id = rotate_layer_id(id, rotation);
if (budget_id > nr_layers) {
scx_bpf_error("invalid budget id");
return 0;
}
s64 capacity = costc->capacity[layer_id];
__sync_lock_test_and_set(MEMBER_VPTR(*costc, .budget[layer_id]),
capacity = costc->capacity[budget_id];
__sync_lock_test_and_set(MEMBER_VPTR(*costc, .budget[budget_id]),
capacity);
}
// Hi fallback DSQs
bpf_for(id, 0, nr_llcs) {
budget_id = fallback_llc_cost_id(id);
capacity = costc->capacity[budget_id];
__sync_lock_test_and_set(MEMBER_VPTR(*costc, .budget[budget_id]),
capacity);
}
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
if (budget_id > MAX_GLOBAL_BUDGETS) {
scx_bpf_error("invalid budget");
return 0;
}
capacity = costc->capacity[budget_id];
__sync_lock_test_and_set(MEMBER_VPTR(*costc, .budget[budget_id]),
capacity);
trace("COST refreshed budget %d", cost_id);
return 0;
}
@ -223,11 +279,11 @@ int refresh_budgets(void)
/*
* Acquires a budget from a parent cost account.
*/
s64 acquire_budget(struct cost *costc, u32 layer_id, s64 amount)
s64 acquire_budget(struct cost *costc, u32 budget_id, s64 amount)
{
s64 budget = 0;
if (layer_id >= MAX_LAYERS || layer_id < 0) {
if (budget_id >= MAX_GLOBAL_BUDGETS) {
scx_bpf_error("invalid parent cost");
return budget;
}
@ -235,14 +291,15 @@ s64 acquire_budget(struct cost *costc, u32 layer_id, s64 amount)
if (!costc || !costc->has_parent)
return budget;
struct cost *parent_cost;
if (!(parent_cost = lookup_cost(costc->idx)))
if (!(parent_cost = lookup_cost(costc->idx))) {
scx_bpf_error("failed to find parent");
return budget;
}
__sync_fetch_and_sub(&parent_cost->budget[layer_id], amount);
__sync_fetch_and_sub(&parent_cost->budget[budget_id], amount);
if (parent_cost->budget[layer_id] < 0)
if (parent_cost->budget[budget_id] <= 0)
refresh_budgets();
return amount;
@ -253,7 +310,7 @@ s64 acquire_budget(struct cost *costc, u32 layer_id, s64 amount)
* acquire budget by either retrieving budget from the global context or
* refreshing all budgets.
*/
int record_cpu_cost(struct cost *costc, u32 budget_id, s64 amount)
int record_cpu_cost(struct cost *costc, u32 budget_id, s64 amount, u64 slice_ns)
{
if (budget_id > MAX_GLOBAL_BUDGETS || !costc) {
scx_bpf_error("invalid budget %d", budget_id);
@ -262,10 +319,11 @@ int record_cpu_cost(struct cost *costc, u32 budget_id, s64 amount)
__sync_fetch_and_sub(&costc->budget[budget_id], amount);
if (costc->budget[budget_id] <= 0) {
if (costc->budget[budget_id] <= 0 ||
costc->budget[budget_id] < slice_ns) {
if (costc->has_parent) {
s64 budget = acquire_budget(costc, budget_id,
costc->capacity[budget_id] + amount);
s64 req_budget = costc->capacity[budget_id] - costc->budget[budget_id];
s64 budget = acquire_budget(costc, budget_id, req_budget);
if (budget > 0) {
__sync_fetch_and_add(&costc->budget[budget_id],
costc->capacity[budget_id]);
@ -296,7 +354,7 @@ __weak int has_budget(struct cost *costc, struct layer *layer)
s64 budget = *MEMBER_VPTR(*costc, .budget[layer_id]);
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
if (budget > layer_slice_ns)
if (budget >= layer_slice_ns)
return slice_ns;
return 0;
@ -310,7 +368,7 @@ static void initialize_budgets(u64 refresh_intvl_ns)
struct layer *layer;
struct cost *costc, *global_costc;
int layer_id, llc_id;
u64 dsq_id, layer_weight_dur, layer_weight_sum = 0;
u64 layer_weight_dur, layer_weight_sum = 0;
s32 cpu;
u32 budget_id;
@ -339,7 +397,7 @@ static void initialize_budgets(u64 refresh_intvl_ns)
return;
}
layer_weight_dur = (layer->weight * ((u64)refresh_intvl_ns * slice_ns * nr_possible_cpus)) /
layer_weight_dur = (layer->weight * ((u64)refresh_intvl_ns * nr_possible_cpus)) /
layer_weight_sum;
initialize_budget(global_costc, layer_id, (s64)layer_weight_dur);
trace("COST GLOBAL[%d][%s] budget %lld",
@ -354,7 +412,7 @@ static void initialize_budgets(u64 refresh_intvl_ns)
scx_bpf_error("failed to cpu budget: %d", cpu);
return;
}
layer_weight_dur = (layer->weight * slice_ns * refresh_intvl_ns) /
layer_weight_dur = (layer->weight * refresh_intvl_ns) /
layer_weight_sum;
initialize_budget(costc, layer_id, (s64)layer_weight_dur);
if (cpu == 0)
@ -367,16 +425,15 @@ static void initialize_budgets(u64 refresh_intvl_ns)
* XXX: since any task from any layer can get kicked to the fallback
* DSQ we use the default slice to calculate the default budget.
*/
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_sum;
initialize_budget(global_costc, fallback_dsq_cost_id(LO_FALLBACK_DSQ),
(s64)layer_weight_dur);
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
initialize_budget(global_costc, budget_id, (s64)layer_weight_dur);
bpf_for(llc_id, 0, nr_llcs) {
dsq_id = llc_hi_fallback_dsq_id(llc_id);
budget_id = fallback_dsq_cost_id(dsq_id);
budget_id = fallback_llc_cost_id(llc_id);
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_sum;
initialize_budget(global_costc, budget_id, (s64)layer_weight_dur);
@ -390,14 +447,14 @@ static void initialize_budgets(u64 refresh_intvl_ns)
// On first iteration always setup the lo fallback dsq budget.
if (llc_id == 0) {
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns) /
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * refresh_intvl_ns) /
layer_weight_sum;
initialize_budget(costc, budget_id,
(s64)layer_weight_dur);
initialize_budget(costc, budget_id, (s64)layer_weight_dur);
}
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns) /
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * refresh_intvl_ns) /
layer_weight_sum;
budget_id = fallback_llc_cost_id(llc_id);
initialize_budget(costc, budget_id, (s64)layer_weight_dur);
if (cpu == 0 && llc_id == 0 && budget_id < MAX_GLOBAL_BUDGETS)
trace("COST CPU DSQ[%d][%d] budget %lld",

View File

@ -15,8 +15,8 @@
enum cost_consts {
COST_GLOBAL_KEY = 0,
HI_FALLBACK_DSQ_WEIGHT = 50,
LO_FALLBACK_DSQ_WEIGHT = 10,
HI_FALLBACK_DSQ_WEIGHT = 95,
LO_FALLBACK_DSQ_WEIGHT = 85,
/*
* Max global budgets map fallback DSQs (per LLC) as well as layers.

View File

@ -822,8 +822,12 @@ bool try_preempt_cpu(s32 cand, struct task_struct *p, struct cpu_ctx *cctx,
if (!(cand_cctx = lookup_cpu_ctx(cand)) || cand_cctx->current_preempt)
return false;
if (!(costc = lookup_cpu_cost(cand)) || has_budget(costc, layer) == 0)
if (!(costc = lookup_cpu_cost(cand)) ||
has_budget(costc, layer) == 0 ||
!has_preempt_budget(costc, cand_cctx->layer_idx, tctx->layer)) {
trace("COST layer %s not enough budget to preempt", layer->name);
return false;
}
/*
* If exclusive, we want to make sure the sibling CPU, if there's
@ -1604,11 +1608,14 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
u32 my_llc_id = cpu_to_llc_id(cpu);
/*
* If one of the fallback DSQs has the most budget then consume from
* it to prevent starvation.
* If one of the fallback DSQs has the most budget then consume from it
* to prevent starvation.
*/
if (has_pref_fallback_budget(costc)) {
dsq_id = budget_id_to_fallback_dsq(costc->pref_budget);
trace("COST consuming fallback %lld", dsq_id);
if (dsq_id > LO_FALLBACK_DSQ)
scx_bpf_error("invalid fallback dsq %lld", dsq_id);
if (scx_bpf_consume(dsq_id))
return;
}
@ -2058,13 +2065,14 @@ void BPF_STRUCT_OPS(layered_stopping, struct task_struct *p, bool runnable)
} else {
budget_id = layer->idx;
}
record_cpu_cost(costc, budget_id, (s64)used);
u64 slice_ns = layer_slice_ns(layer);
record_cpu_cost(costc, budget_id, (s64)used, slice_ns);
cctx->layer_cycles[lidx] += used;
cctx->current_preempt = false;
cctx->prev_exclusive = cctx->current_exclusive;
cctx->current_exclusive = false;
u64 slice_ns = layer_slice_ns(layer);
/* scale the execution time by the inverse of the weight and charge */
if (cctx->yielding && used < slice_ns)
@ -2279,7 +2287,7 @@ int dump_cost(void)
bpf_for(i, 0, nr_llcs) {
u64 dsq_id = llc_hi_fallback_dsq_id(i);
u32 budget_id = fallback_dsq_cost_id(dsq_id);
scx_bpf_dump("COST FALLBACK[%d][%d] budget=%lld capacity=%lld\n",
scx_bpf_dump("COST FALLBACK[%llu][%d] budget=%lld capacity=%lld\n",
dsq_id, budget_id,
costc->budget[budget_id], costc->capacity[budget_id]);
}
@ -2305,8 +2313,8 @@ int dump_cost(void)
u32 budget_id = fallback_dsq_cost_id(dsq_id);
if (budget_id >= MAX_GLOBAL_BUDGETS)
continue;
scx_bpf_dump("COST CPU[%d]FALLBACK[%d][%d] budget=%lld capacity=%lld\n",
i, j, dsq_id, budget_id,
scx_bpf_dump("COST CPU[%d]FALLBACK[%llu][%d] budget=%lld capacity=%lld\n",
i, dsq_id, budget_id,
costc->budget[budget_id], costc->capacity[budget_id]);
}
}
@ -2725,7 +2733,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
}
}
}
initialize_budgets(1000LLU * NSEC_PER_MSEC);
initialize_budgets(15LLU * NSEC_PER_SEC);
ret = start_layered_timers();
if (ret < 0)
return ret;