Merge pull request #896 from hodgesds/layered-dsq-cost

scx_layered: Add fallback DSQ cost accounting
This commit is contained in:
Daniel Hodges 2024-11-07 13:35:04 +00:00 committed by GitHub
commit d6ba3b79d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 227 additions and 73 deletions

View File

@ -1,23 +1,40 @@
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
#include "cost.bpf.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
/*
* Cost accounting struct that is used in both the per CPU and global context.
* Budgets are allowed to recurse to parent structs.
*/
struct cost {
s64 budget[MAX_LAYERS];
s64 capacity[MAX_LAYERS];
u32 pref_layer;
u32 idx;
bool overflow;
bool has_parent;
bool drain_fallback;
};
/*
* Converts a fallback DSQ to a cost id for accessing a cost struct.
*/
static __always_inline int fallback_dsq_cost_id(u64 fallback_dsq)
{
if (fallback_dsq < HI_FALLBACK_DSQ_BASE) {
scx_bpf_error("invalid fallback dsq");
return 0;
}
return (int)fallback_dsq - HI_FALLBACK_DSQ_BASE;
}
/*
* Returns the fallback DSQ id for a budget id.
*/
static u64 budget_id_to_fallback_dsq(u32 budget_id)
{
if (budget_id == MAX_GLOBAL_BUDGETS)
return LO_FALLBACK_DSQ;
return HI_FALLBACK_DSQ_BASE + budget_id;
}
/*
* Returns true if the cost has preferred fallback DSQ budget
*/
static bool has_pref_fallback_budget(struct cost *costc)
{
return costc->pref_budget > nr_layers && costc->pref_budget <= MAX_GLOBAL_BUDGETS;
}
/*
* Map used for global cost accounting. Can be extended to support NUMA nodes.
@ -26,7 +43,7 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
__type(value, struct cost);
__uint(max_entries, MAX_NUMA_NODES + 1);
__uint(max_entries, 1);
__uint(map_flags, 0);
} cost_data SEC(".maps");
@ -72,6 +89,7 @@ static __always_inline struct cost *lookup_cpu_cost(s32 cpu)
return costc;
}
/*
* Initializes a cost.
*/
@ -102,39 +120,65 @@ static struct cost *initialize_cost(u32 cost_idx, u32 parent_idx,
}
/*
* Initializes the cost of a layer.
* Initializes a budget.
*/
static void initialize_cost_layer(struct cost *costc, u32 layer_id, s64 capacity)
static void initialize_budget(struct cost *costc, u32 budget_id, s64 capacity)
{
costc->capacity[layer_id] = capacity;
costc->budget[layer_id] = capacity;
if (budget_id >= MAX_GLOBAL_BUDGETS) {
scx_bpf_error("invalid budget id %d", budget_id);
return;
}
costc->capacity[budget_id] = capacity;
costc->budget[budget_id] = capacity;
}
/*
* Returns the preferred layer based on the layer with the maximum budget.
* Calculates the preferred budget and layer based based on maximum budget.
*/
static u32 preferred_cost(struct cost *costc)
static void calc_preferred_cost(struct cost *costc)
{
u32 layer_id, id, max_layer = 0;
u32 layer_id, id, budget_id, pref_budget = 0, max_layer = 0;
s64 max_budget = 0;
u64 dsq_id;
u32 rotation = bpf_get_smp_processor_id() % nr_layers;
bpf_for(id, 0, nr_layers) {
// If there is two equally weighted layers that have the same
// budget we rely on rotating the layers based on the cpu. This
// may not work well on low core machines.
/*
* If there is two equally weighted layers that have the same
* budget we rely on rotating the layers based on the cpu. This
* may not work well on low core machines.
*/
layer_id = rotate_layer_id(id, rotation);
if (layer_id > nr_layers) {
scx_bpf_error("invalid layer");
return 0;
return;
}
if (costc->budget[layer_id] > max_budget) {
max_budget = costc->budget[layer_id];
max_layer = layer_id;
pref_budget = max_layer;
}
}
// Hi fallback DSQs
bpf_for(id, 0, nr_llcs) {
if (costc->budget[id] > max_budget) {
max_budget = costc->budget[id];
pref_budget = id;
}
}
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
if (budget_id > MAX_GLOBAL_BUDGETS) {
scx_bpf_error("invalid budget");
return;
}
if (costc->budget[budget_id] > max_budget) {
pref_budget = budget_id;
}
return max_layer;
costc->pref_layer = max_layer;
costc->pref_budget = pref_budget;
return;
}
/*
@ -208,34 +252,26 @@ s64 acquire_budget(struct cost *costc, u32 layer_id, s64 amount)
* acquire budget by either retrieving budget from the global context or
* refreshing all budgets.
*/
static int record_cpu_cost(struct cost *costc, u32 layer_id, s64 amount)
int record_cpu_cost(struct cost *costc, u32 budget_id, s64 amount)
{
if (layer_id >= MAX_LAYERS || !costc) {
scx_bpf_error("invalid layer %d", layer_id);
if (budget_id > MAX_GLOBAL_BUDGETS || !costc) {
scx_bpf_error("invalid budget %d", budget_id);
return 0;
}
__sync_fetch_and_sub(&costc->budget[layer_id], amount);
__sync_fetch_and_sub(&costc->budget[budget_id], amount);
if (costc->budget[layer_id] <= 0) {
costc->drain_fallback = true;
if (costc->budget[budget_id] <= 0) {
if (costc->has_parent) {
s64 budget = acquire_budget(costc, layer_id,
costc->capacity[layer_id] + amount);
s64 budget = acquire_budget(costc, budget_id,
costc->capacity[budget_id] + amount);
if (budget > 0) {
__sync_fetch_and_add(MEMBER_VPTR(*costc, .budget[layer_id]),
costc->capacity[layer_id]);
__sync_fetch_and_add(&costc->budget[budget_id],
costc->capacity[budget_id]);
}
}
}
u32 pref_layer = preferred_cost(costc);
if (pref_layer > nr_layers) {
scx_bpf_error("invalid pref_layer");
return 0;
}
costc->pref_layer = pref_layer;
calc_preferred_cost(costc);
return 0;
}
@ -271,11 +307,11 @@ int has_budget(struct cost *costc, struct layer *layer)
static void initialize_budgets(u64 refresh_intvl_ns)
{
struct layer *layer;
struct cost *costc;
int layer_id;
u64 layer_weight_dur, layer_weight_sum = 0;
struct cost *costc, *global_costc;
int layer_id, llc_id;
u64 dsq_id, layer_weight_dur, layer_weight_sum = 0;
s32 cpu;
u32 global = 0;
u32 budget_id;
bpf_for(layer_id, 0, nr_layers) {
layer = &layers[layer_id];
@ -285,41 +321,86 @@ static void initialize_budgets(u64 refresh_intvl_ns)
}
layer_weight_sum += layer->weight;
}
layer_weight_sum += HI_FALLBACK_DSQ_WEIGHT;
layer_weight_sum += LO_FALLBACK_DSQ_WEIGHT;
bpf_for(layer_id, 0, nr_layers) {
costc = initialize_cost(global, global, false, false, false);
if (!costc) {
global_costc = initialize_cost(COST_GLOBAL_KEY, COST_GLOBAL_KEY,
false, false, false);
if (!global_costc) {
scx_bpf_error("failed to initialize global budget");
return;
}
bpf_for(layer_id, 0, nr_layers) {
layer = &layers[layer_id];
if (!layer) {
scx_bpf_error("failed to lookup layer %d", layer_id);
return;
}
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
layer_weight_dur = (layer->weight * ((u64)refresh_intvl_ns * nr_possible_cpus)) /
layer_weight_dur = (layer->weight * ((u64)refresh_intvl_ns * slice_ns * nr_possible_cpus)) /
layer_weight_sum;
initialize_cost_layer(costc, layer_id, (s64)layer_weight_dur);
initialize_budget(global_costc, layer_id, (s64)layer_weight_dur);
trace("COST GLOBAL[%d][%s] budget %lld",
layer_id, layer->name, costc->budget[layer_id]);
layer_id, layer->name, global_costc->budget[layer_id]);
// TODO: add L3 budgets for topology awareness
bpf_for(cpu, 0, nr_possible_cpus) {
costc = initialize_cost(cpu, global, true,
costc = initialize_cost(cpu, COST_GLOBAL_KEY, true,
true, false);
if (!costc) {
scx_bpf_error("failed to cpu budget: %d", cpu);
return;
}
layer_weight_dur = (layer->weight * layer_slice_ns * refresh_intvl_ns) /
layer_weight_dur = (layer->weight * slice_ns * refresh_intvl_ns) /
layer_weight_sum;
initialize_cost_layer(costc, layer_id, (s64)layer_weight_dur);
initialize_budget(costc, layer_id, (s64)layer_weight_dur);
if (cpu == 0)
trace("COST CPU[%d][%d][%s] budget %lld",
cpu, layer_id, layer->name, costc->budget[layer_id]);
}
}
/*
* XXX: since any task from any layer can get kicked to the fallback
* DSQ we use the default slice to calculate the default budget.
*/
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_sum;
initialize_budget(global_costc, fallback_dsq_cost_id(LO_FALLBACK_DSQ),
(s64)layer_weight_dur);
bpf_for(llc_id, 0, nr_llcs) {
dsq_id = llc_hi_fallback_dsq_id(llc_id);
budget_id = fallback_dsq_cost_id(dsq_id);
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns * nr_possible_cpus) /
layer_weight_sum;
initialize_budget(global_costc, budget_id, (s64)layer_weight_dur);
bpf_for(cpu, 0, nr_possible_cpus) {
costc = lookup_cpu_cost(cpu);
if (!costc) {
scx_bpf_error("failed to cpu budget: %d", cpu);
return;
}
// On first iteration always setup the lo fallback dsq budget.
if (llc_id == 0) {
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns) /
layer_weight_sum;
initialize_budget(costc, budget_id,
(s64)layer_weight_dur);
}
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns) /
layer_weight_sum;
initialize_budget(costc, budget_id, (s64)layer_weight_dur);
if (cpu == 0 && llc_id == 0)
trace("COST CPU DSQ[%d][%d] budget %lld",
cpu, budget_id, costc->budget[budget_id]);
}
}
}

View File

@ -0,0 +1,46 @@
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
#ifndef __LAYERED_COST_H
#define __LAYERED_COST_H
#ifdef LSP
#define __bpf__
#ifndef LSP_INC
#include "../../../../include/scx/common.bpf.h"
#endif
#endif
#include "intf.h"
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
enum cost_consts {
COST_GLOBAL_KEY = 0,
HI_FALLBACK_DSQ_WEIGHT = 50,
LO_FALLBACK_DSQ_WEIGHT = 10,
/*
* Max global budgets map fallback DSQs (per LLC) as well as layers.
* This is so that budgets can easily be calculated between fallback
* dsqs and weights. The cost accounting could be done at the DSQ
* level, which would simplify some things at the cost of the size of
* the cost struct.
*/
MAX_GLOBAL_BUDGETS = MAX_LLCS + MAX_LAYERS + 1,
};
/*
* Cost accounting struct that is used in both the per CPU and global context.
* Budgets are allowed to recurse to parent structs.
*/
struct cost {
s64 budget[MAX_GLOBAL_BUDGETS];
s64 capacity[MAX_GLOBAL_BUDGETS];
u32 pref_budget; // the cost with the most budget
u32 pref_layer; // the layer with the most budget.
u32 idx;
bool overflow;
bool has_parent;
};
#endif /* __LAYERED_COST_H */

View File

@ -133,6 +133,11 @@ static u64 llc_hi_fallback_dsq_id(u32 llc_id)
return HI_FALLBACK_DSQ_BASE + llc_id;
}
static inline bool is_fallback_dsq(u64 dsq_id)
{
return dsq_id > HI_FALLBACK_DSQ_BASE && dsq_id <= LO_FALLBACK_DSQ;
}
static u64 llc_hi_fallback_dsq_iter_offset(int llc_offset, int idx)
{
int offset = llc_offset + idx;
@ -383,6 +388,7 @@ struct task_ctx {
bool all_cpus_allowed;
u64 runnable_at;
u64 running_at;
u64 last_dsq;
};
struct {
@ -1076,6 +1082,7 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
lstat_inc(LSTAT_AFFN_VIOL, layer, cctx);
idx = cpu_hi_fallback_dsq_id(task_cpu);
tctx->last_dsq = idx;
scx_bpf_dispatch(p, idx, slice_ns, enq_flags);
goto preempt;
}
@ -1102,15 +1109,18 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
*/
idx = cpu_hi_fallback_dsq_id(task_cpu);
scx_bpf_dispatch(p, idx, slice_ns, enq_flags);
tctx->last_dsq = idx;
goto preempt;
}
if (disable_topology) {
tctx->last_dsq = tctx->layer;
scx_bpf_dispatch_vtime(p, tctx->layer, layer_slice_ns, vtime, enq_flags);
} else {
u32 llc_id = cpu_to_llc_id(tctx->last_cpu >= 0 ? tctx->last_cpu :
bpf_get_smp_processor_id());
idx = layer_dsq_id(layer->idx, llc_id);
tctx->last_dsq = idx;
scx_bpf_dispatch_vtime(p, idx, layer_slice_ns, vtime, enq_flags);
}
@ -1247,6 +1257,16 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev)
return;
}
/*
* If one of the fallback DSQs has the most budget then consume from
* it to prevent starvation.
*/
if (has_pref_fallback_budget(costc)) {
dsq_id = budget_id_to_fallback_dsq(costc->pref_budget);
if (scx_bpf_consume(dsq_id))
return;
}
/* consume preempting layers first */
bpf_for(idx, 0, nr_layers) {
layer_idx = rotate_layer_id(costc->pref_layer, idx);
@ -1444,20 +1464,18 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
return;
}
u32 my_llc_id = cpu_to_llc_id(cpu);
/*
* Fallback DSQs don't have cost accounting. When the budget runs out
* for a layer we do an extra consume of the fallback DSQ to ensure
* that it doesn't stall out when the system is being saturated.
* If one of the fallback DSQs has the most budget then consume from
* it to prevent starvation.
*/
if (costc->drain_fallback) {
costc->drain_fallback = false;
dsq_id = cpu_hi_fallback_dsq_id(cpu);
if (has_pref_fallback_budget(costc)) {
dsq_id = budget_id_to_fallback_dsq(costc->pref_budget);
if (scx_bpf_consume(dsq_id))
return;
}
u32 my_llc_id = cpu_to_llc_id(cpu);
/* consume preempting layers first */
if (consume_preempting(costc, my_llc_id) == 0)
return;
@ -1878,6 +1896,7 @@ void BPF_STRUCT_OPS(layered_stopping, struct task_struct *p, bool runnable)
struct task_ctx *tctx;
struct layer *layer;
struct cost *costc;
u32 budget_id;
s32 lidx;
u64 used;
@ -1895,7 +1914,15 @@ void BPF_STRUCT_OPS(layered_stopping, struct task_struct *p, bool runnable)
used = layer->min_exec_ns;
}
record_cpu_cost(costc, layer->idx, (s64)used);
// If the task ran on the hi fallback dsq then the cost should be
// charged to it.
if (is_fallback_dsq(tctx->last_dsq)) {
budget_id = fallback_dsq_cost_id(tctx->last_dsq);
} else {
budget_id = layer->idx;
}
record_cpu_cost(costc, budget_id, (s64)used);
cctx->layer_cycles[lidx] += used;
cctx->current_preempt = false;
cctx->prev_exclusive = cctx->current_exclusive;