mirror of
https://github.com/JakeHillion/scx.git
synced 2024-11-25 11:00:24 +00:00
Merge pull request #896 from hodgesds/layered-dsq-cost
scx_layered: Add fallback DSQ cost accounting
This commit is contained in:
commit
d6ba3b79d7
@ -1,23 +1,40 @@
|
||||
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
|
||||
#include "cost.bpf.h"
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
|
||||
/*
|
||||
* Cost accounting struct that is used in both the per CPU and global context.
|
||||
* Budgets are allowed to recurse to parent structs.
|
||||
*/
|
||||
struct cost {
|
||||
s64 budget[MAX_LAYERS];
|
||||
s64 capacity[MAX_LAYERS];
|
||||
u32 pref_layer;
|
||||
u32 idx;
|
||||
bool overflow;
|
||||
bool has_parent;
|
||||
bool drain_fallback;
|
||||
};
|
||||
|
||||
/*
|
||||
* Converts a fallback DSQ to a cost id for accessing a cost struct.
|
||||
*/
|
||||
static __always_inline int fallback_dsq_cost_id(u64 fallback_dsq)
|
||||
{
|
||||
if (fallback_dsq < HI_FALLBACK_DSQ_BASE) {
|
||||
scx_bpf_error("invalid fallback dsq");
|
||||
return 0;
|
||||
}
|
||||
return (int)fallback_dsq - HI_FALLBACK_DSQ_BASE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the fallback DSQ id for a budget id.
|
||||
*/
|
||||
static u64 budget_id_to_fallback_dsq(u32 budget_id)
|
||||
{
|
||||
if (budget_id == MAX_GLOBAL_BUDGETS)
|
||||
return LO_FALLBACK_DSQ;
|
||||
return HI_FALLBACK_DSQ_BASE + budget_id;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the cost has preferred fallback DSQ budget
|
||||
*/
|
||||
static bool has_pref_fallback_budget(struct cost *costc)
|
||||
{
|
||||
return costc->pref_budget > nr_layers && costc->pref_budget <= MAX_GLOBAL_BUDGETS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map used for global cost accounting. Can be extended to support NUMA nodes.
|
||||
@ -26,7 +43,7 @@ struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, struct cost);
|
||||
__uint(max_entries, MAX_NUMA_NODES + 1);
|
||||
__uint(max_entries, 1);
|
||||
__uint(map_flags, 0);
|
||||
} cost_data SEC(".maps");
|
||||
|
||||
@ -72,6 +89,7 @@ static __always_inline struct cost *lookup_cpu_cost(s32 cpu)
|
||||
return costc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Initializes a cost.
|
||||
*/
|
||||
@ -102,39 +120,65 @@ static struct cost *initialize_cost(u32 cost_idx, u32 parent_idx,
|
||||
}
|
||||
|
||||
/*
|
||||
* Initializes the cost of a layer.
|
||||
* Initializes a budget.
|
||||
*/
|
||||
static void initialize_cost_layer(struct cost *costc, u32 layer_id, s64 capacity)
|
||||
static void initialize_budget(struct cost *costc, u32 budget_id, s64 capacity)
|
||||
{
|
||||
costc->capacity[layer_id] = capacity;
|
||||
costc->budget[layer_id] = capacity;
|
||||
if (budget_id >= MAX_GLOBAL_BUDGETS) {
|
||||
scx_bpf_error("invalid budget id %d", budget_id);
|
||||
return;
|
||||
}
|
||||
costc->capacity[budget_id] = capacity;
|
||||
costc->budget[budget_id] = capacity;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the preferred layer based on the layer with the maximum budget.
|
||||
* Calculates the preferred budget and layer based based on maximum budget.
|
||||
*/
|
||||
static u32 preferred_cost(struct cost *costc)
|
||||
static void calc_preferred_cost(struct cost *costc)
|
||||
{
|
||||
u32 layer_id, id, max_layer = 0;
|
||||
u32 layer_id, id, budget_id, pref_budget = 0, max_layer = 0;
|
||||
s64 max_budget = 0;
|
||||
u64 dsq_id;
|
||||
u32 rotation = bpf_get_smp_processor_id() % nr_layers;
|
||||
|
||||
bpf_for(id, 0, nr_layers) {
|
||||
// If there is two equally weighted layers that have the same
|
||||
// budget we rely on rotating the layers based on the cpu. This
|
||||
// may not work well on low core machines.
|
||||
/*
|
||||
* If there is two equally weighted layers that have the same
|
||||
* budget we rely on rotating the layers based on the cpu. This
|
||||
* may not work well on low core machines.
|
||||
*/
|
||||
layer_id = rotate_layer_id(id, rotation);
|
||||
if (layer_id > nr_layers) {
|
||||
scx_bpf_error("invalid layer");
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
if (costc->budget[layer_id] > max_budget) {
|
||||
max_budget = costc->budget[layer_id];
|
||||
max_layer = layer_id;
|
||||
pref_budget = max_layer;
|
||||
}
|
||||
}
|
||||
// Hi fallback DSQs
|
||||
bpf_for(id, 0, nr_llcs) {
|
||||
if (costc->budget[id] > max_budget) {
|
||||
max_budget = costc->budget[id];
|
||||
pref_budget = id;
|
||||
}
|
||||
}
|
||||
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
|
||||
if (budget_id > MAX_GLOBAL_BUDGETS) {
|
||||
scx_bpf_error("invalid budget");
|
||||
return;
|
||||
}
|
||||
if (costc->budget[budget_id] > max_budget) {
|
||||
pref_budget = budget_id;
|
||||
}
|
||||
|
||||
return max_layer;
|
||||
costc->pref_layer = max_layer;
|
||||
costc->pref_budget = pref_budget;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -208,34 +252,26 @@ s64 acquire_budget(struct cost *costc, u32 layer_id, s64 amount)
|
||||
* acquire budget by either retrieving budget from the global context or
|
||||
* refreshing all budgets.
|
||||
*/
|
||||
static int record_cpu_cost(struct cost *costc, u32 layer_id, s64 amount)
|
||||
int record_cpu_cost(struct cost *costc, u32 budget_id, s64 amount)
|
||||
{
|
||||
if (layer_id >= MAX_LAYERS || !costc) {
|
||||
scx_bpf_error("invalid layer %d", layer_id);
|
||||
if (budget_id > MAX_GLOBAL_BUDGETS || !costc) {
|
||||
scx_bpf_error("invalid budget %d", budget_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__sync_fetch_and_sub(&costc->budget[layer_id], amount);
|
||||
__sync_fetch_and_sub(&costc->budget[budget_id], amount);
|
||||
|
||||
if (costc->budget[layer_id] <= 0) {
|
||||
costc->drain_fallback = true;
|
||||
if (costc->budget[budget_id] <= 0) {
|
||||
if (costc->has_parent) {
|
||||
s64 budget = acquire_budget(costc, layer_id,
|
||||
costc->capacity[layer_id] + amount);
|
||||
s64 budget = acquire_budget(costc, budget_id,
|
||||
costc->capacity[budget_id] + amount);
|
||||
if (budget > 0) {
|
||||
__sync_fetch_and_add(MEMBER_VPTR(*costc, .budget[layer_id]),
|
||||
costc->capacity[layer_id]);
|
||||
__sync_fetch_and_add(&costc->budget[budget_id],
|
||||
costc->capacity[budget_id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 pref_layer = preferred_cost(costc);
|
||||
if (pref_layer > nr_layers) {
|
||||
scx_bpf_error("invalid pref_layer");
|
||||
return 0;
|
||||
}
|
||||
|
||||
costc->pref_layer = pref_layer;
|
||||
calc_preferred_cost(costc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -271,11 +307,11 @@ int has_budget(struct cost *costc, struct layer *layer)
|
||||
static void initialize_budgets(u64 refresh_intvl_ns)
|
||||
{
|
||||
struct layer *layer;
|
||||
struct cost *costc;
|
||||
int layer_id;
|
||||
u64 layer_weight_dur, layer_weight_sum = 0;
|
||||
struct cost *costc, *global_costc;
|
||||
int layer_id, llc_id;
|
||||
u64 dsq_id, layer_weight_dur, layer_weight_sum = 0;
|
||||
s32 cpu;
|
||||
u32 global = 0;
|
||||
u32 budget_id;
|
||||
|
||||
bpf_for(layer_id, 0, nr_layers) {
|
||||
layer = &layers[layer_id];
|
||||
@ -285,41 +321,86 @@ static void initialize_budgets(u64 refresh_intvl_ns)
|
||||
}
|
||||
layer_weight_sum += layer->weight;
|
||||
}
|
||||
layer_weight_sum += HI_FALLBACK_DSQ_WEIGHT;
|
||||
layer_weight_sum += LO_FALLBACK_DSQ_WEIGHT;
|
||||
|
||||
global_costc = initialize_cost(COST_GLOBAL_KEY, COST_GLOBAL_KEY,
|
||||
false, false, false);
|
||||
if (!global_costc) {
|
||||
scx_bpf_error("failed to initialize global budget");
|
||||
return;
|
||||
}
|
||||
|
||||
bpf_for(layer_id, 0, nr_layers) {
|
||||
costc = initialize_cost(global, global, false, false, false);
|
||||
if (!costc) {
|
||||
scx_bpf_error("failed to initialize global budget");
|
||||
return;
|
||||
}
|
||||
|
||||
layer = &layers[layer_id];
|
||||
if (!layer) {
|
||||
scx_bpf_error("failed to lookup layer %d", layer_id);
|
||||
return;
|
||||
}
|
||||
u64 layer_slice_ns = layer->slice_ns > 0 ? layer->slice_ns : slice_ns;
|
||||
|
||||
layer_weight_dur = (layer->weight * ((u64)refresh_intvl_ns * nr_possible_cpus)) /
|
||||
layer_weight_dur = (layer->weight * ((u64)refresh_intvl_ns * slice_ns * nr_possible_cpus)) /
|
||||
layer_weight_sum;
|
||||
initialize_cost_layer(costc, layer_id, (s64)layer_weight_dur);
|
||||
initialize_budget(global_costc, layer_id, (s64)layer_weight_dur);
|
||||
trace("COST GLOBAL[%d][%s] budget %lld",
|
||||
layer_id, layer->name, costc->budget[layer_id]);
|
||||
layer_id, layer->name, global_costc->budget[layer_id]);
|
||||
|
||||
// TODO: add L3 budgets for topology awareness
|
||||
|
||||
bpf_for(cpu, 0, nr_possible_cpus) {
|
||||
costc = initialize_cost(cpu, global, true,
|
||||
true, false);
|
||||
costc = initialize_cost(cpu, COST_GLOBAL_KEY, true,
|
||||
true, false);
|
||||
if (!costc) {
|
||||
scx_bpf_error("failed to cpu budget: %d", cpu);
|
||||
return;
|
||||
}
|
||||
layer_weight_dur = (layer->weight * layer_slice_ns * refresh_intvl_ns) /
|
||||
layer_weight_dur = (layer->weight * slice_ns * refresh_intvl_ns) /
|
||||
layer_weight_sum;
|
||||
initialize_cost_layer(costc, layer_id, (s64)layer_weight_dur);
|
||||
trace("COST CPU[%d][%d][%s] budget %lld",
|
||||
cpu, layer_id, layer->name, costc->budget[layer_id]);
|
||||
initialize_budget(costc, layer_id, (s64)layer_weight_dur);
|
||||
if (cpu == 0)
|
||||
trace("COST CPU[%d][%d][%s] budget %lld",
|
||||
cpu, layer_id, layer->name, costc->budget[layer_id]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: since any task from any layer can get kicked to the fallback
|
||||
* DSQ we use the default slice to calculate the default budget.
|
||||
*/
|
||||
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns * nr_possible_cpus) /
|
||||
layer_weight_sum;
|
||||
initialize_budget(global_costc, fallback_dsq_cost_id(LO_FALLBACK_DSQ),
|
||||
(s64)layer_weight_dur);
|
||||
|
||||
bpf_for(llc_id, 0, nr_llcs) {
|
||||
dsq_id = llc_hi_fallback_dsq_id(llc_id);
|
||||
budget_id = fallback_dsq_cost_id(dsq_id);
|
||||
|
||||
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns * nr_possible_cpus) /
|
||||
layer_weight_sum;
|
||||
initialize_budget(global_costc, budget_id, (s64)layer_weight_dur);
|
||||
|
||||
bpf_for(cpu, 0, nr_possible_cpus) {
|
||||
costc = lookup_cpu_cost(cpu);
|
||||
if (!costc) {
|
||||
scx_bpf_error("failed to cpu budget: %d", cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
// On first iteration always setup the lo fallback dsq budget.
|
||||
if (llc_id == 0) {
|
||||
budget_id = fallback_dsq_cost_id(LO_FALLBACK_DSQ);
|
||||
layer_weight_dur = (LO_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns) /
|
||||
layer_weight_sum;
|
||||
initialize_budget(costc, budget_id,
|
||||
(s64)layer_weight_dur);
|
||||
}
|
||||
|
||||
layer_weight_dur = (HI_FALLBACK_DSQ_WEIGHT * slice_ns * refresh_intvl_ns) /
|
||||
layer_weight_sum;
|
||||
initialize_budget(costc, budget_id, (s64)layer_weight_dur);
|
||||
if (cpu == 0 && llc_id == 0)
|
||||
trace("COST CPU DSQ[%d][%d] budget %lld",
|
||||
cpu, budget_id, costc->budget[budget_id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
46
scheds/rust/scx_layered/src/bpf/cost.bpf.h
Normal file
46
scheds/rust/scx_layered/src/bpf/cost.bpf.h
Normal file
@ -0,0 +1,46 @@
|
||||
/* Copyright (c) Meta Platforms, Inc. and affiliates. */
|
||||
#ifndef __LAYERED_COST_H
|
||||
#define __LAYERED_COST_H
|
||||
|
||||
#ifdef LSP
|
||||
#define __bpf__
|
||||
#ifndef LSP_INC
|
||||
#include "../../../../include/scx/common.bpf.h"
|
||||
#endif
|
||||
#endif
|
||||
#include "intf.h"
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
enum cost_consts {
|
||||
COST_GLOBAL_KEY = 0,
|
||||
HI_FALLBACK_DSQ_WEIGHT = 50,
|
||||
LO_FALLBACK_DSQ_WEIGHT = 10,
|
||||
|
||||
/*
|
||||
* Max global budgets map fallback DSQs (per LLC) as well as layers.
|
||||
* This is so that budgets can easily be calculated between fallback
|
||||
* dsqs and weights. The cost accounting could be done at the DSQ
|
||||
* level, which would simplify some things at the cost of the size of
|
||||
* the cost struct.
|
||||
*/
|
||||
MAX_GLOBAL_BUDGETS = MAX_LLCS + MAX_LAYERS + 1,
|
||||
};
|
||||
|
||||
/*
|
||||
* Cost accounting struct that is used in both the per CPU and global context.
|
||||
* Budgets are allowed to recurse to parent structs.
|
||||
*/
|
||||
struct cost {
|
||||
s64 budget[MAX_GLOBAL_BUDGETS];
|
||||
s64 capacity[MAX_GLOBAL_BUDGETS];
|
||||
u32 pref_budget; // the cost with the most budget
|
||||
u32 pref_layer; // the layer with the most budget.
|
||||
u32 idx;
|
||||
bool overflow;
|
||||
bool has_parent;
|
||||
};
|
||||
|
||||
|
||||
#endif /* __LAYERED_COST_H */
|
@ -133,6 +133,11 @@ static u64 llc_hi_fallback_dsq_id(u32 llc_id)
|
||||
return HI_FALLBACK_DSQ_BASE + llc_id;
|
||||
}
|
||||
|
||||
static inline bool is_fallback_dsq(u64 dsq_id)
|
||||
{
|
||||
return dsq_id > HI_FALLBACK_DSQ_BASE && dsq_id <= LO_FALLBACK_DSQ;
|
||||
}
|
||||
|
||||
static u64 llc_hi_fallback_dsq_iter_offset(int llc_offset, int idx)
|
||||
{
|
||||
int offset = llc_offset + idx;
|
||||
@ -383,6 +388,7 @@ struct task_ctx {
|
||||
bool all_cpus_allowed;
|
||||
u64 runnable_at;
|
||||
u64 running_at;
|
||||
u64 last_dsq;
|
||||
};
|
||||
|
||||
struct {
|
||||
@ -1076,6 +1082,7 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
lstat_inc(LSTAT_AFFN_VIOL, layer, cctx);
|
||||
|
||||
idx = cpu_hi_fallback_dsq_id(task_cpu);
|
||||
tctx->last_dsq = idx;
|
||||
scx_bpf_dispatch(p, idx, slice_ns, enq_flags);
|
||||
goto preempt;
|
||||
}
|
||||
@ -1102,15 +1109,18 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
*/
|
||||
idx = cpu_hi_fallback_dsq_id(task_cpu);
|
||||
scx_bpf_dispatch(p, idx, slice_ns, enq_flags);
|
||||
tctx->last_dsq = idx;
|
||||
goto preempt;
|
||||
}
|
||||
|
||||
if (disable_topology) {
|
||||
tctx->last_dsq = tctx->layer;
|
||||
scx_bpf_dispatch_vtime(p, tctx->layer, layer_slice_ns, vtime, enq_flags);
|
||||
} else {
|
||||
u32 llc_id = cpu_to_llc_id(tctx->last_cpu >= 0 ? tctx->last_cpu :
|
||||
bpf_get_smp_processor_id());
|
||||
idx = layer_dsq_id(layer->idx, llc_id);
|
||||
tctx->last_dsq = idx;
|
||||
scx_bpf_dispatch_vtime(p, idx, layer_slice_ns, vtime, enq_flags);
|
||||
}
|
||||
|
||||
@ -1247,6 +1257,16 @@ void layered_dispatch_no_topo(s32 cpu, struct task_struct *prev)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If one of the fallback DSQs has the most budget then consume from
|
||||
* it to prevent starvation.
|
||||
*/
|
||||
if (has_pref_fallback_budget(costc)) {
|
||||
dsq_id = budget_id_to_fallback_dsq(costc->pref_budget);
|
||||
if (scx_bpf_consume(dsq_id))
|
||||
return;
|
||||
}
|
||||
|
||||
/* consume preempting layers first */
|
||||
bpf_for(idx, 0, nr_layers) {
|
||||
layer_idx = rotate_layer_id(costc->pref_layer, idx);
|
||||
@ -1444,20 +1464,18 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
|
||||
return;
|
||||
}
|
||||
|
||||
u32 my_llc_id = cpu_to_llc_id(cpu);
|
||||
|
||||
/*
|
||||
* Fallback DSQs don't have cost accounting. When the budget runs out
|
||||
* for a layer we do an extra consume of the fallback DSQ to ensure
|
||||
* that it doesn't stall out when the system is being saturated.
|
||||
* If one of the fallback DSQs has the most budget then consume from
|
||||
* it to prevent starvation.
|
||||
*/
|
||||
if (costc->drain_fallback) {
|
||||
costc->drain_fallback = false;
|
||||
dsq_id = cpu_hi_fallback_dsq_id(cpu);
|
||||
if (has_pref_fallback_budget(costc)) {
|
||||
dsq_id = budget_id_to_fallback_dsq(costc->pref_budget);
|
||||
if (scx_bpf_consume(dsq_id))
|
||||
return;
|
||||
}
|
||||
|
||||
u32 my_llc_id = cpu_to_llc_id(cpu);
|
||||
|
||||
/* consume preempting layers first */
|
||||
if (consume_preempting(costc, my_llc_id) == 0)
|
||||
return;
|
||||
@ -1878,6 +1896,7 @@ void BPF_STRUCT_OPS(layered_stopping, struct task_struct *p, bool runnable)
|
||||
struct task_ctx *tctx;
|
||||
struct layer *layer;
|
||||
struct cost *costc;
|
||||
u32 budget_id;
|
||||
s32 lidx;
|
||||
u64 used;
|
||||
|
||||
@ -1895,7 +1914,15 @@ void BPF_STRUCT_OPS(layered_stopping, struct task_struct *p, bool runnable)
|
||||
used = layer->min_exec_ns;
|
||||
}
|
||||
|
||||
record_cpu_cost(costc, layer->idx, (s64)used);
|
||||
// If the task ran on the hi fallback dsq then the cost should be
|
||||
// charged to it.
|
||||
if (is_fallback_dsq(tctx->last_dsq)) {
|
||||
budget_id = fallback_dsq_cost_id(tctx->last_dsq);
|
||||
} else {
|
||||
budget_id = layer->idx;
|
||||
}
|
||||
record_cpu_cost(costc, budget_id, (s64)used);
|
||||
|
||||
cctx->layer_cycles[lidx] += used;
|
||||
cctx->current_preempt = false;
|
||||
cctx->prev_exclusive = cctx->current_exclusive;
|
||||
|
Loading…
Reference in New Issue
Block a user