scx_layered: Deprecate idle_smt layer config

24fba4ab8d ("scx_layered: Add idle smt layer configuration") added the idle_smt layer config but - It flipped the default from preferring idle cores to not having preference. - Misdocumented what it meant. It doesn't only pick idle cores. It tries to pick an idle core first and if that fails pick any idle CPU. A follow-up commit 637fc3f6e1 ("scx_layered: Use layer idle_smt option") made it more confusing. If idle_smt is set, the idle core prioritizing logic is disabled. The first commit disables idle core prioritization by overriding idle_smtmask to be idle_cpumask if idle_smt is *clear* and the second commit disables the same by disabling the code path when the flag is *set*. ie. Both options did exactly the same thing. Recently, 75dd81e3e6 ("scx_layered: Improve topology aware select_cpu()") restored the function of the flag by dropping the cpumask override. However, this made the actual behavior the opposite of the documented one by leaving only the behavior of the second commit which implemented the reverse behavior. This flag is hopeless. History aside, the name itself is too confusing. idle_smt - is it saying that the flag is going to prefer idle smt *thread* or idle smt *core*? While the name is transferred from idle_cpumask/smtmask, there, the meaning of the former is clear which also makes it difficult to confuse what the latter means. Preferring idle cores was one of the drivers of performance gain identified during earlier ads experiments. Let's just drop the flag to restore the previous behavior and retry if necessary.
2024-12-04 08:17:11 +00:00 · 2024-11-29 23:43:40 -10:00 · 2024-11-29 23:43:40 -10:00 · b7b15ac4b1
commit b7b15ac4b1
parent 14af41d0dd
4 changed files with 21 additions and 25 deletions
--- a/scheds/rust/scx_layered/src/bpf/intf.h
+++ b/scheds/rust/scx_layered/src/bpf/intf.h
@ -245,7 +245,6 @@ struct layer {
 	bool			preempt;
 	bool			preempt_first;
 	bool			exclusive;
-	bool			idle_smt;
 	int			growth_algo;

 	u32			owned_usage_target_ppk;
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@ -580,7 +580,7 @@ static void maybe_refresh_layered_cpus_node(struct task_struct *p, struct task_c
 }

 static s32 pick_idle_cpu_from(const struct cpumask *cand_cpumask, s32 prev_cpu,
-			      const struct cpumask *idle_smtmask, bool pref_idle_smt)
+			      const struct cpumask *idle_smtmask)
 {
 	bool prev_in_cand = bpf_cpumask_test_cpu(prev_cpu, cand_cpumask);
 	s32 cpu;
@ -589,7 +589,7 @@ static s32 pick_idle_cpu_from(const struct cpumask *cand_cpumask, s32 prev_cpu,
 	 * If CPU has SMT, any wholly idle CPU is likely a better pick than
 	 * partially idle @prev_cpu.
 	 */
-	if (smt_enabled && !pref_idle_smt) {
+	if (smt_enabled) {
 		if (prev_in_cand &&
 		    bpf_cpumask_test_cpu(prev_cpu, idle_smtmask) &&
 		    scx_bpf_test_and_clear_cpu_idle(prev_cpu))
@ -654,8 +654,7 @@ s32 pick_idle_big_little(struct layer *layer, struct task_ctx *taskc,
 		bpf_cpumask_and(tmp_cpumask, cast_mask(taskc->layered_mask),
 				cast_mask(big_cpumask));
 		cpu = pick_idle_cpu_from(cast_mask(tmp_cpumask),
-					 prev_cpu, idle_smtmask,
-					 layer->idle_smt);
+					 prev_cpu, idle_smtmask);
 		goto out_put;
 	}
 	case GROWTH_ALGO_LITTLE_BIG: {
@ -669,8 +668,7 @@ s32 pick_idle_big_little(struct layer *layer, struct task_ctx *taskc,
 		bpf_cpumask_and(tmp_cpumask, cast_mask(taskc->layered_mask),
 				cast_mask(tmp_cpumask));
 		cpu = pick_idle_cpu_from(cast_mask(tmp_cpumask),
-					 prev_cpu, idle_smtmask,
-					 layer->idle_smt);
+					 prev_cpu, idle_smtmask);
 		goto out_put;
 	}
 	default:
@ -763,8 +761,7 @@ s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
 			cpu = -1;
 			goto out_put;
 		}
-		if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask,
-					      layer->idle_smt)) >= 0)
+		if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask)) >= 0)
 			goto out_put;
 	}

@ -778,21 +775,18 @@ s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
 			cpu = -1;
 			goto out_put;
 		}
-		if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask,
-					      layer->idle_smt)) >= 0)
+		if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask)) >= 0)
 			goto out_put;
 	}

-	if ((cpu = pick_idle_cpu_from(layered_cpumask, prev_cpu,
-				      idle_smtmask, layer->idle_smt)) >= 0)
+	if ((cpu = pick_idle_cpu_from(layered_cpumask, prev_cpu, idle_smtmask)) >= 0)
 		goto out_put;

 	/*
 	 * If the layer is an open one, we can try the whole machine.
 	 */
 	if (layer->kind != LAYER_KIND_CONFINED &&
-	    ((cpu = pick_idle_cpu_from(p->cpus_ptr, prev_cpu,
-				       idle_smtmask, layer->idle_smt)) >= 0)) {
+	    ((cpu = pick_idle_cpu_from(p->cpus_ptr, prev_cpu, idle_smtmask) >= 0))) {
 		lstat_inc(LSTAT_OPEN_IDLE, layer, cpuc);
 		goto out_put;
 	}
--- a/scheds/rust/scx_layered/src/config.rs
+++ b/scheds/rust/scx_layered/src/config.rs
@ -89,8 +89,8 @@ pub struct LayerCommon {
    pub exclusive: bool,
    #[serde(default)]
    pub weight: u32,
-    #[serde(default)]
-    pub idle_smt: bool,
+    #[serde(default, skip_serializing)]
+    pub idle_smt: Option<bool>,
    #[serde(default)]
    pub growth_algo: LayerGrowthAlgo,
    #[serde(default)]
--- a/scheds/rust/scx_layered/src/main.rs
+++ b/scheds/rust/scx_layered/src/main.rs
@ -105,7 +105,7 @@ lazy_static! {
                        preempt: false,
                        preempt_first: false,
                        exclusive: false,
-                        idle_smt: false,
+                        idle_smt: None,
                        slice_us: 20000,
                        weight: DEFAULT_LAYER_WEIGHT,
                        growth_algo: LayerGrowthAlgo::Sticky,
@ -129,7 +129,7 @@ lazy_static! {
                        preempt: true,
                        preempt_first: false,
                        exclusive: true,
-                        idle_smt: false,
+                        idle_smt: None,
                        slice_us: 20000,
                        weight: DEFAULT_LAYER_WEIGHT,
                        growth_algo: LayerGrowthAlgo::Sticky,
@ -155,7 +155,7 @@ lazy_static! {
                        preempt: true,
                        preempt_first: false,
                        exclusive: false,
-                        idle_smt: false,
+                        idle_smt: None,
                        slice_us: 800,
                        weight: DEFAULT_LAYER_WEIGHT,
                        growth_algo: LayerGrowthAlgo::Topo,
@ -178,7 +178,7 @@ lazy_static! {
                        preempt: false,
                        preempt_first: false,
                        exclusive: false,
-                        idle_smt: false,
+                        idle_smt: None,
                        slice_us: 20000,
                        weight: DEFAULT_LAYER_WEIGHT,
                        growth_algo: LayerGrowthAlgo::Linear,
@ -323,8 +323,7 @@ lazy_static! {
 ///   utilization to determine the infeasible adjusted weight with higher
 ///   weights having a larger adjustment in adjusted utilization.
 ///
-/// - idle_smt: When selecting an idle CPU for task task migration use
-///   only idle SMT CPUs. The default is to select any idle cpu.
+/// - idle_smt: *** DEPRECATED ****
 ///
 /// - growth_algo: When a layer is allocated new CPUs different algorithms can
 ///   be used to determine which CPU should be allocated next. The default
@ -1160,7 +1159,6 @@ impl<'a> Scheduler<'a> {
                    preempt,
                    preempt_first,
                    exclusive,
-                    idle_smt,
                    growth_algo,
                    nodes,
                    slice_us,
@ -1187,7 +1185,6 @@ impl<'a> Scheduler<'a> {
                layer.preempt.write(*preempt);
                layer.preempt_first.write(*preempt_first);
                layer.exclusive.write(*exclusive);
-                layer.idle_smt.write(*idle_smt);
                layer.growth_algo = growth_algo.as_bpf_enum();
                layer.weight = if *weight <= MAX_LAYER_WEIGHT && *weight >= MIN_LAYER_WEIGHT {
                    *weight
@ -2315,6 +2312,12 @@ fn main() -> Result<()> {
        );
    }

+    for spec in &layer_config.specs {
+        if spec.kind.common().idle_smt.is_some() {
+            warn!("Layer {} has deprecated flag \"idle_smt\"", &spec.name);
+        }
+    }
+
    debug!("specs={}", serde_json::to_string_pretty(&layer_config)?);
    verify_layer_specs(&layer_config.specs)?;