Compare commits

...

4 Commits

Author SHA1 Message Date
Alexander Zaitsev
294e9564e4
Merge 59ad671520 into fa1e12aeb6 2024-11-30 15:27:25 -05:00
Tejun Heo
fa1e12aeb6
Merge pull request #1013 from sched-ext/htejun/layered-updates
scx_layered: Deprecate idle_smt layer config
2024-11-30 16:30:59 +00:00
Tejun Heo
b7b15ac4b1 scx_layered: Deprecate idle_smt layer config
24fba4ab8d ("scx_layered: Add idle smt layer configuration") added the
idle_smt layer config but

- It flipped the default from preferring idle cores to not having
  preference.

- Misdocumented what it meant. It doesn't only pick idle cores. It tries to
  pick an idle core first and if that fails pick any idle CPU.

A follow-up commit 637fc3f6e1 ("scx_layered: Use layer idle_smt option")
made it more confusing. If idle_smt is set, the idle core prioritizing logic
is disabled.

The first commit disables idle core prioritization by overriding
idle_smtmask to be idle_cpumask if idle_smt is *clear* and the second commit
disables the same by disabling the code path when the flag is *set*. ie.
Both options did exactly the same thing.

Recently, 75dd81e3e6 ("scx_layered: Improve topology aware select_cpu()")
restored the function of the flag by dropping the cpumask override. However,
this made the actual behavior the opposite of the documented one by leaving
only the behavior of the second commit which implemented the reverse
behavior.

This flag is hopeless. History aside, the name itself is too confusing.
idle_smt - is it saying that the flag is going to prefer idle smt *thread*
or idle smt *core*? While the name is transferred from idle_cpumask/smtmask,
there, the meaning of the former is clear which also makes it difficult to
confuse what the latter means.

Preferring idle cores was one of the drivers of performance gain identified
during earlier ads experiments. Let's just drop the flag to restore the
previous behavior and retry if necessary.
2024-11-30 00:19:39 -10:00
Alexander Zaitsev
59ad671520 feat: enable LTO
Enable Link-Time Optimization (LTO) for Rust schedulers. It improves the
binary size and allows to a compiler perform more aggressive
optimizations.

More details in https://github.com/sched-ext/scx/issues/1010
2024-11-29 11:55:38 +01:00
5 changed files with 24 additions and 25 deletions

View File

@ -13,3 +13,6 @@ members = ["rust/scx_stats",
"scheds/rust/scx_layered",
"scheds/rust/scx_mitosis"]
resolver = "2"
[profile.release]
lto = true

View File

@ -245,7 +245,6 @@ struct layer {
bool preempt;
bool preempt_first;
bool exclusive;
bool idle_smt;
int growth_algo;
u32 owned_usage_target_ppk;

View File

@ -580,7 +580,7 @@ static void maybe_refresh_layered_cpus_node(struct task_struct *p, struct task_c
}
static s32 pick_idle_cpu_from(const struct cpumask *cand_cpumask, s32 prev_cpu,
const struct cpumask *idle_smtmask, bool pref_idle_smt)
const struct cpumask *idle_smtmask)
{
bool prev_in_cand = bpf_cpumask_test_cpu(prev_cpu, cand_cpumask);
s32 cpu;
@ -589,7 +589,7 @@ static s32 pick_idle_cpu_from(const struct cpumask *cand_cpumask, s32 prev_cpu,
* If CPU has SMT, any wholly idle CPU is likely a better pick than
* partially idle @prev_cpu.
*/
if (smt_enabled && !pref_idle_smt) {
if (smt_enabled) {
if (prev_in_cand &&
bpf_cpumask_test_cpu(prev_cpu, idle_smtmask) &&
scx_bpf_test_and_clear_cpu_idle(prev_cpu))
@ -654,8 +654,7 @@ s32 pick_idle_big_little(struct layer *layer, struct task_ctx *taskc,
bpf_cpumask_and(tmp_cpumask, cast_mask(taskc->layered_mask),
cast_mask(big_cpumask));
cpu = pick_idle_cpu_from(cast_mask(tmp_cpumask),
prev_cpu, idle_smtmask,
layer->idle_smt);
prev_cpu, idle_smtmask);
goto out_put;
}
case GROWTH_ALGO_LITTLE_BIG: {
@ -669,8 +668,7 @@ s32 pick_idle_big_little(struct layer *layer, struct task_ctx *taskc,
bpf_cpumask_and(tmp_cpumask, cast_mask(taskc->layered_mask),
cast_mask(tmp_cpumask));
cpu = pick_idle_cpu_from(cast_mask(tmp_cpumask),
prev_cpu, idle_smtmask,
layer->idle_smt);
prev_cpu, idle_smtmask);
goto out_put;
}
default:
@ -763,8 +761,7 @@ s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
cpu = -1;
goto out_put;
}
if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask,
layer->idle_smt)) >= 0)
if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask)) >= 0)
goto out_put;
}
@ -778,21 +775,18 @@ s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
cpu = -1;
goto out_put;
}
if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask,
layer->idle_smt)) >= 0)
if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask)) >= 0)
goto out_put;
}
if ((cpu = pick_idle_cpu_from(layered_cpumask, prev_cpu,
idle_smtmask, layer->idle_smt)) >= 0)
if ((cpu = pick_idle_cpu_from(layered_cpumask, prev_cpu, idle_smtmask)) >= 0)
goto out_put;
/*
* If the layer is an open one, we can try the whole machine.
*/
if (layer->kind != LAYER_KIND_CONFINED &&
((cpu = pick_idle_cpu_from(p->cpus_ptr, prev_cpu,
idle_smtmask, layer->idle_smt)) >= 0)) {
((cpu = pick_idle_cpu_from(p->cpus_ptr, prev_cpu, idle_smtmask) >= 0))) {
lstat_inc(LSTAT_OPEN_IDLE, layer, cpuc);
goto out_put;
}

View File

@ -89,8 +89,8 @@ pub struct LayerCommon {
pub exclusive: bool,
#[serde(default)]
pub weight: u32,
#[serde(default)]
pub idle_smt: bool,
#[serde(default, skip_serializing)]
pub idle_smt: Option<bool>,
#[serde(default)]
pub growth_algo: LayerGrowthAlgo,
#[serde(default)]

View File

@ -105,7 +105,7 @@ lazy_static! {
preempt: false,
preempt_first: false,
exclusive: false,
idle_smt: false,
idle_smt: None,
slice_us: 20000,
weight: DEFAULT_LAYER_WEIGHT,
growth_algo: LayerGrowthAlgo::Sticky,
@ -129,7 +129,7 @@ lazy_static! {
preempt: true,
preempt_first: false,
exclusive: true,
idle_smt: false,
idle_smt: None,
slice_us: 20000,
weight: DEFAULT_LAYER_WEIGHT,
growth_algo: LayerGrowthAlgo::Sticky,
@ -155,7 +155,7 @@ lazy_static! {
preempt: true,
preempt_first: false,
exclusive: false,
idle_smt: false,
idle_smt: None,
slice_us: 800,
weight: DEFAULT_LAYER_WEIGHT,
growth_algo: LayerGrowthAlgo::Topo,
@ -178,7 +178,7 @@ lazy_static! {
preempt: false,
preempt_first: false,
exclusive: false,
idle_smt: false,
idle_smt: None,
slice_us: 20000,
weight: DEFAULT_LAYER_WEIGHT,
growth_algo: LayerGrowthAlgo::Linear,
@ -323,8 +323,7 @@ lazy_static! {
/// utilization to determine the infeasible adjusted weight with higher
/// weights having a larger adjustment in adjusted utilization.
///
/// - idle_smt: When selecting an idle CPU for task task migration use
/// only idle SMT CPUs. The default is to select any idle cpu.
/// - idle_smt: *** DEPRECATED ****
///
/// - growth_algo: When a layer is allocated new CPUs different algorithms can
/// be used to determine which CPU should be allocated next. The default
@ -1160,7 +1159,6 @@ impl<'a> Scheduler<'a> {
preempt,
preempt_first,
exclusive,
idle_smt,
growth_algo,
nodes,
slice_us,
@ -1187,7 +1185,6 @@ impl<'a> Scheduler<'a> {
layer.preempt.write(*preempt);
layer.preempt_first.write(*preempt_first);
layer.exclusive.write(*exclusive);
layer.idle_smt.write(*idle_smt);
layer.growth_algo = growth_algo.as_bpf_enum();
layer.weight = if *weight <= MAX_LAYER_WEIGHT && *weight >= MIN_LAYER_WEIGHT {
*weight
@ -2315,6 +2312,12 @@ fn main() -> Result<()> {
);
}
for spec in &layer_config.specs {
if spec.kind.common().idle_smt.is_some() {
warn!("Layer {} has deprecated flag \"idle_smt\"", &spec.name);
}
}
debug!("specs={}", serde_json::to_string_pretty(&layer_config)?);
verify_layer_specs(&layer_config.specs)?;