Merge 59ad671520 into fa1e12aeb6

Merge pull request #1013 from sched-ext/htejun/layered-updates
scx_layered: Deprecate idle_smt layer config
2024-12-11 19:30:45 +00:00 · 2024-11-30 15:27:25 -05:00 · 2024-11-30 16:30:59 +00:00 · 2024-11-30 00:19:39 -10:00 · 2024-11-29 11:55:38 +01:00
5 changed files with 24 additions and 25 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -13,3 +13,6 @@ members = ["rust/scx_stats",
           "scheds/rust/scx_layered",
           "scheds/rust/scx_mitosis"]
 resolver = "2"
+
+[profile.release]
+lto = true
--- a/scheds/rust/scx_layered/src/bpf/intf.h
+++ b/scheds/rust/scx_layered/src/bpf/intf.h
@ -245,7 +245,6 @@ struct layer {
 	bool			preempt;
 	bool			preempt_first;
 	bool			exclusive;
-	bool			idle_smt;
 	int			growth_algo;

 	u32			owned_usage_target_ppk;
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@ -580,7 +580,7 @@ static void maybe_refresh_layered_cpus_node(struct task_struct *p, struct task_c
 }

 static s32 pick_idle_cpu_from(const struct cpumask *cand_cpumask, s32 prev_cpu,
-			      const struct cpumask *idle_smtmask, bool pref_idle_smt)
+			      const struct cpumask *idle_smtmask)
 {
 	bool prev_in_cand = bpf_cpumask_test_cpu(prev_cpu, cand_cpumask);
 	s32 cpu;
@ -589,7 +589,7 @@ static s32 pick_idle_cpu_from(const struct cpumask *cand_cpumask, s32 prev_cpu,
 	 * If CPU has SMT, any wholly idle CPU is likely a better pick than
 	 * partially idle @prev_cpu.
 	 */
-	if (smt_enabled && !pref_idle_smt) {
+	if (smt_enabled) {
 		if (prev_in_cand &&
 		    bpf_cpumask_test_cpu(prev_cpu, idle_smtmask) &&
 		    scx_bpf_test_and_clear_cpu_idle(prev_cpu))
@ -654,8 +654,7 @@ s32 pick_idle_big_little(struct layer *layer, struct task_ctx *taskc,
 		bpf_cpumask_and(tmp_cpumask, cast_mask(taskc->layered_mask),
 				cast_mask(big_cpumask));
 		cpu = pick_idle_cpu_from(cast_mask(tmp_cpumask),
-					 prev_cpu, idle_smtmask,
-					 layer->idle_smt);
+					 prev_cpu, idle_smtmask);
 		goto out_put;
 	}
 	case GROWTH_ALGO_LITTLE_BIG: {
@ -669,8 +668,7 @@ s32 pick_idle_big_little(struct layer *layer, struct task_ctx *taskc,
 		bpf_cpumask_and(tmp_cpumask, cast_mask(taskc->layered_mask),
 				cast_mask(tmp_cpumask));
 		cpu = pick_idle_cpu_from(cast_mask(tmp_cpumask),
-					 prev_cpu, idle_smtmask,
-					 layer->idle_smt);
+					 prev_cpu, idle_smtmask);
 		goto out_put;
 	}
 	default:
@ -763,8 +761,7 @@ s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
 			cpu = -1;
 			goto out_put;
 		}
-		if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask,
-					      layer->idle_smt)) >= 0)
+		if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask)) >= 0)
 			goto out_put;
 	}

@ -778,21 +775,18 @@ s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
 			cpu = -1;
 			goto out_put;
 		}
-		if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask,
-					      layer->idle_smt)) >= 0)
+		if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask)) >= 0)
 			goto out_put;
 	}

-	if ((cpu = pick_idle_cpu_from(layered_cpumask, prev_cpu,
-				      idle_smtmask, layer->idle_smt)) >= 0)
+	if ((cpu = pick_idle_cpu_from(layered_cpumask, prev_cpu, idle_smtmask)) >= 0)
 		goto out_put;

 	/*
 	 * If the layer is an open one, we can try the whole machine.
 	 */
 	if (layer->kind != LAYER_KIND_CONFINED &&
-	    ((cpu = pick_idle_cpu_from(p->cpus_ptr, prev_cpu,
-				       idle_smtmask, layer->idle_smt)) >= 0)) {
+	    ((cpu = pick_idle_cpu_from(p->cpus_ptr, prev_cpu, idle_smtmask) >= 0))) {
 		lstat_inc(LSTAT_OPEN_IDLE, layer, cpuc);
 		goto out_put;
 	}
--- a/scheds/rust/scx_layered/src/config.rs
+++ b/scheds/rust/scx_layered/src/config.rs
@ -89,8 +89,8 @@ pub struct LayerCommon {
    pub exclusive: bool,
    #[serde(default)]
    pub weight: u32,
-    #[serde(default)]
-    pub idle_smt: bool,
+    #[serde(default, skip_serializing)]
+    pub idle_smt: Option<bool>,
    #[serde(default)]
    pub growth_algo: LayerGrowthAlgo,
    #[serde(default)]
--- a/scheds/rust/scx_layered/src/main.rs
+++ b/scheds/rust/scx_layered/src/main.rs
@ -105,7 +105,7 @@ lazy_static! {
                        preempt: false,
                        preempt_first: false,
                        exclusive: false,
-                        idle_smt: false,
+                        idle_smt: None,
                        slice_us: 20000,
                        weight: DEFAULT_LAYER_WEIGHT,
                        growth_algo: LayerGrowthAlgo::Sticky,
@ -129,7 +129,7 @@ lazy_static! {
                        preempt: true,
                        preempt_first: false,
                        exclusive: true,
-                        idle_smt: false,
+                        idle_smt: None,
                        slice_us: 20000,
                        weight: DEFAULT_LAYER_WEIGHT,
                        growth_algo: LayerGrowthAlgo::Sticky,
@ -155,7 +155,7 @@ lazy_static! {
                        preempt: true,
                        preempt_first: false,
                        exclusive: false,
-                        idle_smt: false,
+                        idle_smt: None,
                        slice_us: 800,
                        weight: DEFAULT_LAYER_WEIGHT,
                        growth_algo: LayerGrowthAlgo::Topo,
@ -178,7 +178,7 @@ lazy_static! {
                        preempt: false,
                        preempt_first: false,
                        exclusive: false,
-                        idle_smt: false,
+                        idle_smt: None,
                        slice_us: 20000,
                        weight: DEFAULT_LAYER_WEIGHT,
                        growth_algo: LayerGrowthAlgo::Linear,
@ -323,8 +323,7 @@ lazy_static! {
 ///   utilization to determine the infeasible adjusted weight with higher
 ///   weights having a larger adjustment in adjusted utilization.
 ///
-/// - idle_smt: When selecting an idle CPU for task task migration use
-///   only idle SMT CPUs. The default is to select any idle cpu.
+/// - idle_smt: *** DEPRECATED ****
 ///
 /// - growth_algo: When a layer is allocated new CPUs different algorithms can
 ///   be used to determine which CPU should be allocated next. The default
@ -1160,7 +1159,6 @@ impl<'a> Scheduler<'a> {
                    preempt,
                    preempt_first,
                    exclusive,
-                    idle_smt,
                    growth_algo,
                    nodes,
                    slice_us,
@ -1187,7 +1185,6 @@ impl<'a> Scheduler<'a> {
                layer.preempt.write(*preempt);
                layer.preempt_first.write(*preempt_first);
                layer.exclusive.write(*exclusive);
-                layer.idle_smt.write(*idle_smt);
                layer.growth_algo = growth_algo.as_bpf_enum();
                layer.weight = if *weight <= MAX_LAYER_WEIGHT && *weight >= MIN_LAYER_WEIGHT {
                    *weight
@ -2315,6 +2312,12 @@ fn main() -> Result<()> {
        );
    }

+    for spec in &layer_config.specs {
+        if spec.kind.common().idle_smt.is_some() {
+            warn!("Layer {} has deprecated flag \"idle_smt\"", &spec.name);
+        }
+    }
+
    debug!("specs={}", serde_json::to_string_pretty(&layer_config)?);
    verify_layer_specs(&layer_config.specs)?;
Author	SHA1	Message	Date
Alexander Zaitsev	294e9564e4	Merge `59ad671520` into `fa1e12aeb6`	2024-11-30 15:27:25 -05:00
Tejun Heo	fa1e12aeb6	Merge pull request #1013 from sched-ext/htejun/layered-updates scx_layered: Deprecate idle_smt layer config	2024-11-30 16:30:59 +00:00
Tejun Heo	b7b15ac4b1	scx_layered: Deprecate idle_smt layer config `24fba4ab8d` ("scx_layered: Add idle smt layer configuration") added the idle_smt layer config but - It flipped the default from preferring idle cores to not having preference. - Misdocumented what it meant. It doesn't only pick idle cores. It tries to pick an idle core first and if that fails pick any idle CPU. A follow-up commit `637fc3f6e1` ("scx_layered: Use layer idle_smt option") made it more confusing. If idle_smt is set, the idle core prioritizing logic is disabled. The first commit disables idle core prioritization by overriding idle_smtmask to be idle_cpumask if idle_smt is clear and the second commit disables the same by disabling the code path when the flag is set. ie. Both options did exactly the same thing. Recently, `75dd81e3e6` ("scx_layered: Improve topology aware select_cpu()") restored the function of the flag by dropping the cpumask override. However, this made the actual behavior the opposite of the documented one by leaving only the behavior of the second commit which implemented the reverse behavior. This flag is hopeless. History aside, the name itself is too confusing. idle_smt - is it saying that the flag is going to prefer idle smt thread or idle smt core? While the name is transferred from idle_cpumask/smtmask, there, the meaning of the former is clear which also makes it difficult to confuse what the latter means. Preferring idle cores was one of the drivers of performance gain identified during earlier ads experiments. Let's just drop the flag to restore the previous behavior and retry if necessary.	2024-11-30 00:19:39 -10:00
Alexander Zaitsev	59ad671520	feat: enable LTO Enable Link-Time Optimization (LTO) for Rust schedulers. It improves the binary size and allows to a compiler perform more aggressive optimizations. More details in https://github.com/sched-ext/scx/issues/1010	2024-11-29 11:55:38 +01:00