mirror of
https://github.com/sched-ext/scx.git
synced 2024-12-04 08:17:11 +00:00
scx_lavd: Perform load balancing at consume_task()
Upon ops.dispatch, perform load balancing based on the set-up plan, stealing a task from a stealee domain to a stealer domain. To avoid the thundering herd problem of concurrent stealers, a stealer steals a task probabilistically. Also, to minimize the task migration distance, decrease the stealing probability exponentially for each hop in the distance. Finally, for every stat cycle (50 ms), a stealer will migrate only one task from a stealee for gradual load balancing. Signed-off-by: Changwoo Min <changwoo@igalia.com>
This commit is contained in:
parent
4f1ffc1bc6
commit
047e8c81e9
@ -51,7 +51,9 @@ enum consts_internal {
|
||||
performance mode when cpu util > 40% */
|
||||
|
||||
LAVD_CPDOM_STARV_NS = (2 * LAVD_SLICE_MAX_NS_DFL),
|
||||
LAVD_CPDOM_MIGRATION_SHIFT = 2, /* 1/2**2 = +/- 25% */
|
||||
LAVD_CPDOM_MIGRATION_SHIFT = 3, /* 1/2**3 = +/- 12.5% */
|
||||
LAVD_CPDOM_X_PROB_FT = (LAVD_SYS_STAT_INTERVAL_NS /
|
||||
(2 * LAVD_SLICE_MAX_NS_DFL)), /* roughly twice per interval */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1129,18 +1129,109 @@ static bool consume_dsq(u64 dsq_id)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool try_to_steal_task(struct cpdom_ctx *cpdomc)
|
||||
{
|
||||
struct cpdom_ctx *cpdomc_pick;
|
||||
u64 nr_nbr, dsq_id;
|
||||
s64 nuance;
|
||||
|
||||
/*
|
||||
* If all CPUs are not used -- i.e., the system is under-utilized,
|
||||
* there is no point of load balancing. It is better to make an
|
||||
* effort to increase the system utilization.
|
||||
*/
|
||||
if (!use_full_cpus())
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Probabilistically make a go or no go decision to avoid the
|
||||
* thundering herd problem. In other words, one out of nr_cpus
|
||||
* will try to steal a task at a moment.
|
||||
*/
|
||||
if (!prob_x_out_of_y(1, cpdomc->nr_cpus * LAVD_CPDOM_X_PROB_FT))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Traverse neighbor compute domains in distance order.
|
||||
*/
|
||||
nuance = bpf_get_prandom_u32();
|
||||
for (int i = 0; i < LAVD_CPDOM_MAX_DIST; i++) {
|
||||
nr_nbr = min(cpdomc->nr_neighbors[i], LAVD_CPDOM_MAX_NR);
|
||||
if (nr_nbr == 0)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Traverse neighbor in the same distance in arbitrary order.
|
||||
*/
|
||||
for (int j = 0; j < LAVD_CPDOM_MAX_NR; j++, nuance++) {
|
||||
if (j >= nr_nbr)
|
||||
break;
|
||||
|
||||
dsq_id = pick_any_bit(cpdomc->neighbor_bits[i], nuance);
|
||||
if (dsq_id == -ENOENT)
|
||||
continue;
|
||||
|
||||
cpdomc_pick = MEMBER_VPTR(cpdom_ctxs, [dsq_id]);
|
||||
if (!cpdomc_pick) {
|
||||
scx_bpf_error("Failed to lookup cpdom_ctx for %llu", dsq_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!cpdomc_pick->is_stealee || !cpdomc_pick->is_active)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If task stealing is successful, mark the stealer
|
||||
* and the stealee's job done. By marking done,
|
||||
* those compute domains would not be involved in
|
||||
* load balancing until the end of this round,
|
||||
* so this helps gradual migration. Note that multiple
|
||||
* stealers can steal tasks from the same stealee.
|
||||
* However, we don't coordinate concurrent stealing
|
||||
* because the chance is low and there is no harm
|
||||
* in slight over-stealing.
|
||||
*/
|
||||
if (consume_dsq(dsq_id)) {
|
||||
WRITE_ONCE(cpdomc_pick->is_stealee, false);
|
||||
WRITE_ONCE(cpdomc->is_stealer, false);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now, we need to steal a task from a farther neighbor
|
||||
* for load balancing. Since task migration from a farther
|
||||
* neighbor is more expensive (e.g., crossing a NUMA boundary),
|
||||
* we will do this with a lot of hesitation. The chance of
|
||||
* further migration will decrease exponentially as distance
|
||||
* increases, so, on the other hand, it increases the chance
|
||||
* of closer migration.
|
||||
*/
|
||||
if (!prob_x_out_of_y(1, LAVD_CPDOM_X_PROB_FT))
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool force_to_steal_task(struct cpdom_ctx *cpdomc)
|
||||
{
|
||||
struct cpdom_ctx *cpdomc_pick;
|
||||
u64 nr_nbr, dsq_id;
|
||||
s64 nuance;
|
||||
|
||||
/*
|
||||
* Traverse neighbor compute domains in distance order.
|
||||
*/
|
||||
nuance = bpf_get_prandom_u32();
|
||||
for (int i = 0; i < LAVD_CPDOM_MAX_DIST; i++) {
|
||||
nr_nbr = min(cpdomc->nr_neighbors[i], LAVD_CPDOM_MAX_NR);
|
||||
if (nr_nbr == 0)
|
||||
break;
|
||||
|
||||
nuance = bpf_get_prandom_u32();
|
||||
/*
|
||||
* Traverse neighbor in the same distance in arbitrary order.
|
||||
*/
|
||||
for (int j = 0; j < LAVD_CPDOM_MAX_NR; j++, nuance++) {
|
||||
if (j >= nr_nbr)
|
||||
break;
|
||||
@ -1171,10 +1262,23 @@ static bool consume_task(struct cpu_ctx *cpuc)
|
||||
struct cpdom_ctx *cpdomc;
|
||||
u64 dsq_id;
|
||||
|
||||
/*
|
||||
* Try to consume from CPU's associated DSQ.
|
||||
*/
|
||||
dsq_id = cpuc->cpdom_id;
|
||||
cpdomc = MEMBER_VPTR(cpdom_ctxs, [dsq_id]);
|
||||
if (!cpdomc) {
|
||||
scx_bpf_error("Failed to lookup cpdom_ctx for %llu", dsq_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the current compute domain is a stealer, try to steal
|
||||
* a task from any of stealee domains probabilistically.
|
||||
*/
|
||||
if (cpdomc->is_stealer && try_to_steal_task(cpdomc))
|
||||
goto x_domain_migration_out;
|
||||
|
||||
/*
|
||||
* Try to consume a task from CPU's associated DSQ.
|
||||
*/
|
||||
if (consume_dsq(dsq_id))
|
||||
return true;
|
||||
|
||||
@ -1182,12 +1286,6 @@ static bool consume_task(struct cpu_ctx *cpuc)
|
||||
* If there is no task in the assssociated DSQ, traverse neighbor
|
||||
* compute domains in distance order -- task stealing.
|
||||
*/
|
||||
cpdomc = MEMBER_VPTR(cpdom_ctxs, [dsq_id]);
|
||||
if (!cpdomc) {
|
||||
scx_bpf_error("Failed to lookup cpdom_ctx for %llu", dsq_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (force_to_steal_task(cpdomc))
|
||||
goto x_domain_migration_out;
|
||||
|
||||
@ -1337,10 +1435,7 @@ consume_out:
|
||||
/*
|
||||
* Consume a task if requested.
|
||||
*/
|
||||
if (!try_consume)
|
||||
return;
|
||||
|
||||
if (consume_task(cpuc))
|
||||
if (try_consume && consume_task(cpuc))
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -299,3 +299,14 @@ static void set_on_core_type(struct task_ctx *taskc,
|
||||
WRITE_ONCE(taskc->on_big, on_big);
|
||||
WRITE_ONCE(taskc->on_little, on_little);
|
||||
}
|
||||
|
||||
static bool prob_x_out_of_y(u32 x, u32 y)
|
||||
{
|
||||
/*
|
||||
* [0, r, y)
|
||||
* ---- x?
|
||||
*/
|
||||
u32 r = bpf_get_prandom_u32() % y;
|
||||
return r < x;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user