Merge branch 'main' into htejun/scx_rusty

2024-11-24 20:00:22 +00:00 · 2024-08-23 07:48:07 -10:00 · 2024-08-23 07:48:07 -10:00 · 8c8912ccea
commit 8c8912ccea
parent 44a0f1b124 e635e7eac8
28 changed files with 240 additions and 216 deletions
--- a/meson-scripts/test_sched
+++ b/meson-scripts/test_sched
@ -13,12 +13,18 @@ GUEST_TIMEOUT=60
 # List of schedulers to test
 #
 # TODO:
-#   - scx_layered: temporarily excluded because it
-#     cannot run with a default configuration
 #   - scx_flatcg, scx_pair: excluded until cgroup support lands upstream
 #   - scx_mitosis: not ready yet
 #
-SCHEDULERS="scx_simple scx_central scx_nest scx_rusty scx_rustland scx_bpfland"
+declare -A SCHEDS
+
+SCHEDS["scx_simple"]=""
+SCHEDS["scx_central"]=""
+SCHEDS["scx_nest"]=""
+SCHEDS["scx_rusty"]=""
+SCHEDS["scx_rustland"]=""
+SCHEDS["scx_bpfland"]=""
+SCHEDS["scx_layered"]="--run-example"

 if [ ! -x `which vng` ]; then
    echo "vng not found, please install virtme-ng to enable testing"
@ -30,7 +36,8 @@ if [ $# -lt 1 ]; then
 fi
 kernel=$1

-for sched in ${SCHEDULERS}; do
+for sched in ${!SCHEDS[@]}; do
+    args=${SCHEDS[$sched]}
    sched_path=$(find -type f -executable -name ${sched})
    if [ ! -n "${sched_path}" ]; then
        echo "${sched}: binary not found"
@ -42,7 +49,7 @@ for sched in ${SCHEDULERS}; do
    rm -f /tmp/output
    timeout --preserve-status ${GUEST_TIMEOUT} \
        vng --force-9p -v -r ${kernel} -- \
-            "timeout --foreground --preserve-status ${TEST_TIMEOUT} ${sched_path}" \
+            "timeout --foreground --preserve-status ${TEST_TIMEOUT} ${sched_path} ${args}" \
                2> >(tee /tmp/output) </dev/null
        grep -v " Speculative Return Stack Overflow" /tmp/output | \
            sed -n -e '/\bBUG:/q1' \
--- a/meson.build
+++ b/meson.build
@ -1,5 +1,5 @@
 project('sched_ext schedulers', 'c',
-        version: '1.0.2',
+        version: '1.0.3',
        license: 'GPL-2.0',)

 if meson.version().version_compare('<1.2')
--- a/rust/scx_rustland_core/Cargo.toml
+++ b/rust/scx_rustland_core/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_rustland_core"
-version = "1.0.2"
+version = "1.0.3"
 edition = "2021"
 authors = ["Andrea Righi <andrea.righi@linux.dev>"]
 license = "GPL-2.0-only"
@ -12,12 +12,12 @@ anyhow = "1.0.65"
 plain = "0.2.3"
 libbpf-rs = "0.24.1"
 libc = "0.2.137"
-scx_utils = { path = "../scx_utils", version = "1.0.2" }
+scx_utils = { path = "../scx_utils", version = "1.0.3" }

 [build-dependencies]
 tar = "0.4"
 walkdir = "2.4"
-scx_utils = { path = "../scx_utils", version = "1.0.2" }
+scx_utils = { path = "../scx_utils", version = "1.0.3" }

 [lib]
 name = "scx_rustland_core"
--- a/rust/scx_stats/Cargo.toml
+++ b/rust/scx_stats/Cargo.toml
@ -2,7 +2,7 @@ workspace = { members = ["scx_stats_derive"] }

 [package]
 name = "scx_stats"
-version = "0.2.0"
+version = "1.0.3"
 edition = "2021"
 authors = ["Tejun Heo <tj@kernel.org>"]
 license = "GPL-2.0-only"
--- a/rust/scx_stats/scx_stats_derive/Cargo.toml
+++ b/rust/scx_stats/scx_stats_derive/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_stats_derive"
-version = "0.2.0"
+version = "1.0.3"
 edition = "2021"
 authors = ["Tejun Heo <tj@kernel.org>"]
 license = "GPL-2.0-only"
@ -13,6 +13,6 @@ proc-macro = true
 [dependencies]
 proc-macro2 = "1.0"
 quote = "1.0"
-scx_stats = { path = "..", version = "0.2.0" }
+scx_stats = { path = "..", version = "1.0.3" }
 serde_json = "1.0"
 syn = { version = "2.0", features = ["extra-traits", "full"] }
--- a/rust/scx_utils/Cargo.toml
+++ b/rust/scx_utils/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_utils"
-version = "1.0.2"
+version = "1.0.3"
 edition = "2021"
 authors = ["Tejun Heo <tj@kernel.org>"]
 license = "GPL-2.0-only"
@ -21,7 +21,7 @@ libbpf-rs = "0.24.1"
 log = "0.4.17"
 paste = "1.0"
 regex = "1.10"
-scx_stats = { path = "../scx_stats", version = "0.2.0" }
+scx_stats = { path = "../scx_stats", version = "1.0.3" }
 serde = { version = "1.0", features = ["derive"] }
 sscanf = "0.4"
 tar = "0.4"
--- a/rust/scx_utils/src/lib.rs
+++ b/rust/scx_utils/src/lib.rs
@ -65,8 +65,8 @@ pub use topology::Cpu;
 pub use topology::Node;
 pub use topology::Topology;
 pub use topology::TopologyMap;
-pub use topology::NR_CPU_IDS;
 pub use topology::NR_CPUS_POSSIBLE;
+pub use topology::NR_CPU_IDS;

 mod cpumask;
 pub use cpumask::Cpumask;
--- a/rust/scx_utils/src/topology.rs
+++ b/rust/scx_utils/src/topology.rs
@ -76,30 +76,31 @@ use sscanf::sscanf;
 use std::collections::BTreeMap;
 use std::path::Path;
 use std::slice::Iter;
-use std::sync::LazyLock;

-/// The maximum possible number of CPU IDs in the system. As mentioned above,
-/// this is different than the number of possible CPUs on the system (though
-/// very seldom is). This number may differ from the number of possible CPUs on
-/// the system when e.g. there are fully disabled CPUs in the middle of the
-/// range of possible CPUs (i.e. CPUs that may not be onlined).
-pub static NR_CPU_IDS: LazyLock<usize> = LazyLock::new(|| {
-    read_cpu_ids().unwrap().last().unwrap() + 1
-});
+lazy_static::lazy_static! {
+    /// The maximum possible number of CPU IDs in the system. As mentioned
+    /// above, this is different than the number of possible CPUs on the
+    /// system (though very seldom is). This number may differ from the
+    /// number of possible CPUs on the system when e.g. there are fully
+    /// disabled CPUs in the middle of the range of possible CPUs (i.e. CPUs
+    /// that may not be onlined).
+    pub static ref NR_CPU_IDS: usize = read_cpu_ids().unwrap().last().unwrap() + 1;

-/// The number of possible CPUs that may be active on the system. Note that this
-/// value is separate from the number of possible _CPU IDs_ in the system, as
-/// there may be gaps in what CPUs are allowed to be onlined. For example, some
-/// BIOS implementations may report spans of disabled CPUs that may not be
-/// onlined, whose IDs are lower than the IDs of other CPUs that may be onlined.
-pub static NR_CPUS_POSSIBLE: LazyLock<usize> =
-    LazyLock::new(|| libbpf_rs::num_possible_cpus().unwrap());
+    /// The number of possible CPUs that may be active on the system. Note
+    /// that this value is separate from the number of possible _CPU IDs_ in
+    /// the system, as there may be gaps in what CPUs are allowed to be
+    /// onlined. For example, some BIOS implementations may report spans of
+    /// disabled CPUs that may not be onlined, whose IDs are lower than the
+    /// IDs of other CPUs that may be onlined.
+    pub static ref NR_CPUS_POSSIBLE: usize = libbpf_rs::num_possible_cpus().unwrap();
+}

 #[derive(Debug, Clone)]
 pub struct Cpu {
    id: usize,
    min_freq: usize,
    max_freq: usize,
+    base_freq: usize,
    trans_lat_ns: usize,
    l2_id: usize,
    l3_id: usize,
@ -122,6 +123,14 @@ impl Cpu {
        self.max_freq
    }

+    /// Get the base operational frequency of this CPU
+    ///
+    /// This is only available on Intel Turbo Boost CPUs, if not available this will simply return
+    /// maximum frequency.
+    pub fn base_freq(&self) -> usize {
+        self.base_freq
+    }
+
    /// Get the transition latency of the CPU in nanoseconds
    pub fn trans_lat_ns(&self) -> usize {
        self.trans_lat_ns
@ -421,10 +430,8 @@ fn create_insert_cpu(cpu_id: usize, node: &mut Node, online_mask: &Cpumask) -> R
    // if there's no cache information then we have no option but to assume a single unified cache
    // per node.
    let cache_path = cpu_path.join("cache");
-    let l2_id =
-        read_file_usize(&cache_path.join(format!("index{}", 2)).join("id")).unwrap_or(0);
-    let l3_id =
-        read_file_usize(&cache_path.join(format!("index{}", 3)).join("id")).unwrap_or(0);
+    let l2_id = read_file_usize(&cache_path.join(format!("index{}", 2)).join("id")).unwrap_or(0);
+    let l3_id = read_file_usize(&cache_path.join(format!("index{}", 3)).join("id")).unwrap_or(0);
    // Assume that LLC is always 3.
    let llc_id = l3_id;

@ -433,6 +440,7 @@ fn create_insert_cpu(cpu_id: usize, node: &mut Node, online_mask: &Cpumask) -> R
    let freq_path = cpu_path.join("cpufreq");
    let min_freq = read_file_usize(&freq_path.join("scaling_min_freq")).unwrap_or(0);
    let max_freq = read_file_usize(&freq_path.join("scaling_max_freq")).unwrap_or(0);
+    let base_freq = read_file_usize(&freq_path.join("base_frequency")).unwrap_or(max_freq);
    let trans_lat_ns = read_file_usize(&freq_path.join("cpuinfo_transition_latency")).unwrap_or(0);

    let cache = node.llcs.entry(llc_id).or_insert(Cache {
@ -453,6 +461,7 @@ fn create_insert_cpu(cpu_id: usize, node: &mut Node, online_mask: &Cpumask) -> R
            id: cpu_id,
            min_freq: min_freq,
            max_freq: max_freq,
+            base_freq: base_freq,
            trans_lat_ns: trans_lat_ns,
            l2_id: l2_id,
            l3_id: l3_id,
@ -502,7 +511,7 @@ fn create_default_node(online_mask: &Cpumask) -> Result<Vec<Node>> {

    let cpu_ids = read_cpu_ids()?;
    for cpu_id in cpu_ids.iter() {
-	create_insert_cpu(*cpu_id, &mut node, &online_mask)?;
+        create_insert_cpu(*cpu_id, &mut node, &online_mask)?;
    }

    nodes.push(node);
--- a/scheds/rust/scx_bpfland/Cargo.lock
+++ b/scheds/rust/scx_bpfland/Cargo.lock
@ -1371,7 +1371,7 @@ dependencies = [

 [[package]]
 name = "scx_bpfland"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "clap",
@ -1387,7 +1387,7 @@ dependencies = [

 [[package]]
 name = "scx_utils"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "bindgen",
--- a/scheds/rust/scx_bpfland/Cargo.toml
+++ b/scheds/rust/scx_bpfland/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_bpfland"
-version = "1.0.2"
+version = "1.0.3"
 authors = ["Andrea Righi <andrea.righi@linux.dev>"]
 edition = "2021"
 description = "A vruntime-based sched_ext scheduler that prioritizes interactive workloads. https://github.com/sched-ext/scx/tree/main"
@ -12,14 +12,14 @@ ctrlc = { version = "3.1", features = ["termination"] }
 clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
 libbpf-rs = "0.24.1"
 log = "0.4.17"
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
 simplelog = "0.12"
 rlimit = "0.10.1"
 metrics = "0.23.0"
 metrics-exporter-prometheus = "0.15.0"

 [build-dependencies]
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }

 [features]
 enable_backtrace = []
--- a/scheds/rust/scx_bpfland/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_bpfland/src/bpf/main.bpf.c
@ -240,6 +240,15 @@ struct task_ctx *lookup_task_ctx(const struct task_struct *p)
 	return tctx;
 }

+/*
+ * Return true if interactive tasks classification via voluntary context
+ * switches is enabled, false otherwise.
+ */
+static bool is_nvcsw_enabled(void)
+{
+	return !!nvcsw_max_thresh;
+}
+
 /*
 * Return true if the task is interactive, false otherwise.
 */
@ -261,15 +270,6 @@ static inline bool is_kthread(const struct task_struct *p)
 	return !!(p->flags & PF_KTHREAD);
 }

-/*
- * Return true if interactive tasks classification via voluntary context
- * switches is enabled, false otherwise.
- */
-static bool is_nvcsw_enabled(void)
-{
-	return !!nvcsw_max_thresh;
-}
-
 /*
 * Access a cpumask in read-only mode (typically to check bits).
 */
@ -413,7 +413,6 @@ static u64 cpu_to_dsq(s32 cpu)
 static int dispatch_direct_cpu(struct task_struct *p, s32 cpu, u64 enq_flags)
 {
 	struct bpf_cpumask *offline;
-	u64 slice = task_slice(p);
 	u64 vtime = task_vtime(p);
 	u64 dsq_id = cpu_to_dsq(cpu);

@ -424,7 +423,7 @@ static int dispatch_direct_cpu(struct task_struct *p, s32 cpu, u64 enq_flags)
 	if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr))
 		return -EINVAL;

-	scx_bpf_dispatch_vtime(p, dsq_id, slice, vtime, enq_flags);
+	scx_bpf_dispatch_vtime(p, dsq_id, SCX_SLICE_DFL, vtime, enq_flags);

 	/*
 	 * If the CPU has gone offline notify that the task needs to be
@ -699,6 +698,9 @@ static void handle_sync_wakeup(struct task_struct *p)
 {
 	struct task_ctx *tctx;

+	if (!is_nvcsw_enabled())
+		return;
+
 	/*
 	 * If we are waking up a task immediately promote it as interactive, so
 	 * that it can be dispatched as soon as possible on the first CPU
@ -722,7 +724,6 @@ static void handle_sync_wakeup(struct task_struct *p)
 void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
 {
 	u64 vtime = task_vtime(p);
-	u64 slice = task_slice(p);

 	/*
 	 * If the system is saturated and we couldn't dispatch directly in
@ -754,10 +755,12 @@ void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
 	 * and simply rely on the vruntime logic.
 	 */
 	if (is_task_interactive(p)) {
-		scx_bpf_dispatch_vtime(p, prio_dsq_id, slice, vtime, enq_flags);
+		scx_bpf_dispatch_vtime(p, prio_dsq_id, SCX_SLICE_DFL,
+				       vtime, enq_flags);
 		__sync_fetch_and_add(&nr_prio_dispatches, 1);
 	} else {
-		scx_bpf_dispatch_vtime(p, shared_dsq_id, slice, vtime, enq_flags);
+		scx_bpf_dispatch_vtime(p, shared_dsq_id, SCX_SLICE_DFL,
+				       vtime, enq_flags);
 		__sync_fetch_and_add(&nr_shared_dispatches, 1);
 	}
 }
@ -900,7 +903,22 @@ void BPF_STRUCT_OPS(bpfland_dispatch, s32 cpu, struct task_struct *prev)
 	/*
 	 * Lastly, consume regular tasks from the shared DSQ.
 	 */
-	consume_regular_task(now);
+	if (consume_regular_task(now))
+		return;
+
+	/*
+	 * If the current task expired its time slice, but no other task wants
+	 * to run, simply replenish its time slice and let it run for another
+	 * round on the same CPU.
+	 *
+	 * Note that bpfland_stopping() won't be called if we replenish the
+	 * time slice here. As a result, the nvcsw statistics won't be updated,
+	 * but this isn't an issue, because these statistics are only relevant
+	 * when the system is overloaded, which isn't the case when there are
+	 * no other tasks to run.
+	 */
+	if (prev && (prev->scx.flags & SCX_TASK_QUEUED))
+		prev->scx.slice = task_slice(prev);
 }

 void BPF_STRUCT_OPS(bpfland_running, struct task_struct *p)
@ -910,11 +928,10 @@ void BPF_STRUCT_OPS(bpfland_running, struct task_struct *p)
 		vtime_now = p->scx.dsq_vtime;

 	/*
-	 * Ensure time slice never exceeds slice_ns when a task is started on a
-	 * CPU.
+	 * Refresh task's time slice immediately before it starts to run on its
+	 * assigned CPU.
 	 */
-	if (p->scx.slice > slice_ns)
-		p->scx.slice = slice_ns;
+	p->scx.slice = task_slice(p);

 	/* Update CPU interactive state */
 	if (is_task_interactive(p))
@ -933,7 +950,8 @@ static void update_task_interactive(struct task_ctx *tctx)
 	 * (nvcsw_avg_thresh) it is classified as interactive, otherwise the
 	 * task is classified as regular.
 	 */
-	tctx->is_interactive = tctx->avg_nvcsw >= nvcsw_avg_thresh;
+	if (is_nvcsw_enabled())
+		tctx->is_interactive = tctx->avg_nvcsw >= nvcsw_avg_thresh;
 }

 /*
--- a/scheds/rust/scx_bpfland/src/main.rs
+++ b/scheds/rust/scx_bpfland/src/main.rs
@ -54,11 +54,12 @@ const SCHEDULER_NAME: &'static str = "scx_bpfland";
 fn get_primary_cpus(powersave: bool) -> std::io::Result<Vec<usize>> {
    let topo = Topology::new().unwrap();

-    // Iterate over each CPU directory and collect CPU ID and its max frequency.
+    // Iterate over each CPU directory and collect CPU ID and its base operational frequency to
+    // distinguish between fast and slow cores.
    let mut cpu_freqs = Vec::new();
    for core in topo.cores().into_iter() {
        for (cpu_id, cpu) in core.cpus() {
-            cpu_freqs.push((*cpu_id, cpu.max_freq()));
+            cpu_freqs.push((*cpu_id, cpu.base_freq()));
        }
    }
    if cpu_freqs.is_empty() {
@ -327,7 +328,7 @@ impl<'a> Scheduler<'a> {
        let topo = Topology::new().unwrap();

        // Initialize the primary scheduling domain (based on the --primary-domain option).
-        Self::init_primary_domain(&mut skel, &topo, &opts.primary_domain)?;
+        Self::init_primary_domain(&mut skel, &opts.primary_domain)?;

        // Initialize L2 cache domains.
        if !opts.disable_l2 {
@ -382,7 +383,6 @@ impl<'a> Scheduler<'a> {

    fn init_primary_domain(
        skel: &mut BpfSkel<'_>,
-        topo: &Topology,
        primary_domain: &Cpumask,
    ) -> Result<()> {
        info!("primary CPU domain = 0x{:x}", primary_domain);
--- a/scheds/rust/scx_lavd/Cargo.lock
+++ b/scheds/rust/scx_lavd/Cargo.lock
@ -948,7 +948,7 @@ dependencies = [

 [[package]]
 name = "scx_lavd"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "bitvec",
@ -970,7 +970,7 @@ dependencies = [

 [[package]]
 name = "scx_utils"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "bindgen",
--- a/scheds/rust/scx_lavd/Cargo.toml
+++ b/scheds/rust/scx_lavd/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_lavd"
-version = "1.0.2"
+version = "1.0.3"
 authors = ["Changwoo Min <changwoo@igalia.com>", "Igalia"]
 edition = "2021"
 description = "A Latency-criticality Aware Virtual Deadline (LAVD) scheduler based on sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -17,7 +17,7 @@ libbpf-rs = "0.24.1"
 libc = "0.2.137"
 log = "0.4.17"
 ordered-float = "3.4.0"
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
 simplelog = "0.12"
 static_assertions = "1.1.0"
 rlimit = "0.10.1"
@ -25,7 +25,7 @@ plain = "0.2.3"
 nix = { version = "0.29.0", features = ["signal"] }

 [build-dependencies]
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }

 [features]
 enable_backtrace = []
--- a/scheds/rust/scx_layered/Cargo.lock
+++ b/scheds/rust/scx_layered/Cargo.lock
@ -1023,7 +1023,7 @@ dependencies = [

 [[package]]
 name = "scx_layered"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "bitvec",
@ -1046,7 +1046,7 @@ dependencies = [

 [[package]]
 name = "scx_stats"
-version = "0.2.0"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "crossbeam",
@ -1060,7 +1060,7 @@ dependencies = [

 [[package]]
 name = "scx_stats_derive"
-version = "0.2.0"
+version = "1.0.3"
 dependencies = [
 "proc-macro2",
 "quote",
@ -1071,7 +1071,7 @@ dependencies = [

 [[package]]
 name = "scx_utils"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "bindgen",
--- a/scheds/rust/scx_layered/Cargo.toml
+++ b/scheds/rust/scx_layered/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_layered"
-version = "1.0.2"
+version = "1.0.3"
 authors = ["Tejun Heo <htejun@meta.com>", "Meta"]
 edition = "2021"
 description = "A highly configurable multi-layer BPF / user space hybrid scheduler used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -18,15 +18,15 @@ lazy_static = "1.4"
 libbpf-rs = "0.24.1"
 libc = "0.2.137"
 log = "0.4.17"
-scx_stats = { path = "../../../rust/scx_stats", version = "0.2.0" }
-scx_stats_derive = { path = "../../../rust/scx_stats/scx_stats_derive", version = "0.2.0" }
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_stats = { path = "../../../rust/scx_stats", version = "1.0.3" }
+scx_stats_derive = { path = "../../../rust/scx_stats/scx_stats_derive", version = "1.0.3" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 simplelog = "0.12"

 [build-dependencies]
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }

 [features]
 enable_backtrace = []
--- a/scheds/rust/scx_layered/src/bpf/intf.h
+++ b/scheds/rust/scx_layered/src/bpf/intf.h
@ -53,7 +53,6 @@ enum layer_stat_idx {
 	LSTAT_SEL_LOCAL,
 	LSTAT_ENQ_WAKEUP,
 	LSTAT_ENQ_EXPIRE,
-	LSTAT_ENQ_LAST,
 	LSTAT_ENQ_REENQ,
 	LSTAT_MIN_EXEC,
 	LSTAT_MIN_EXEC_NS,
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@ -667,12 +667,6 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
 	if (enq_flags & SCX_ENQ_REENQ) {
 		lstat_inc(LSTAT_ENQ_REENQ, layer, cctx);
 	} else {
-		if (enq_flags & SCX_ENQ_LAST) {
-			lstat_inc(LSTAT_ENQ_LAST, layer, cctx);
-			scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0);
-			return;
-		}
-
 		if (enq_flags & SCX_ENQ_WAKEUP)
 			lstat_inc(LSTAT_ENQ_WAKEUP, layer, cctx);
 		else
@ -1698,5 +1692,4 @@ SCX_OPS_DEFINE(layered,
 	       .dump			= (void *)layered_dump,
 	       .init			= (void *)layered_init,
 	       .exit			= (void *)layered_exit,
-	       .flags			= SCX_OPS_ENQ_LAST,
 	       .name			= "layered");
--- a/scheds/rust/scx_layered/src/main.rs
+++ b/scheds/rust/scx_layered/src/main.rs
@ -71,9 +71,70 @@ const NR_LSTATS: usize = bpf_intf::layer_stat_idx_NR_LSTATS as usize;
 const NR_LAYER_MATCH_KINDS: usize = bpf_intf::layer_match_kind_NR_LAYER_MATCH_KINDS as usize;
 const CORE_CACHE_LEVEL: u32 = 2;

+#[rustfmt::skip]
 lazy_static::lazy_static! {
    static ref NR_POSSIBLE_CPUS: usize = libbpf_rs::num_possible_cpus().unwrap();
    static ref USAGE_DECAY: f64 = 0.5f64.powf(1.0 / USAGE_HALF_LIFE_F64);
+    static ref EXAMPLE_CONFIG: LayerConfig =
+	LayerConfig {
+            specs: vec![
+		LayerSpec {
+                    name: "batch".into(),
+                    comment: Some("tasks under system.slice or tasks with nice value > 0".into()),
+                    matches: vec![
+			vec![LayerMatch::CgroupPrefix("system.slice/".into())],
+			vec![LayerMatch::NiceAbove(0)],
+                    ],
+                    kind: LayerKind::Confined {
+			cpus_range: Some((0, 16)),
+			util_range: (0.8, 0.9),
+			min_exec_us: 1000,
+			yield_ignore: 0.0,
+			preempt: false,
+			preempt_first: false,
+			exclusive: false,
+			perf: 1024,
+			nodes: vec![],
+			llcs: vec![],
+                    },
+		},
+		LayerSpec {
+                    name: "immediate".into(),
+                    comment: Some("tasks under workload.slice with nice value < 0".into()),
+                    matches: vec![vec![
+			LayerMatch::CgroupPrefix("workload.slice/".into()),
+			LayerMatch::NiceBelow(0),
+                    ]],
+                    kind: LayerKind::Open {
+			min_exec_us: 100,
+			yield_ignore: 0.25,
+			preempt: true,
+			preempt_first: false,
+			exclusive: true,
+			perf: 1024,
+			nodes: vec![],
+			llcs: vec![],
+                    },
+		},
+		LayerSpec {
+                    name: "normal".into(),
+                    comment: Some("the rest".into()),
+                    matches: vec![vec![]],
+                    kind: LayerKind::Grouped {
+			cpus_range: None,
+			util_range: (0.5, 0.6),
+			min_exec_us: 200,
+			yield_ignore: 0.0,
+			preempt: false,
+			preempt_first: false,
+			exclusive: false,
+			perf: 1024,
+			nodes: vec![],
+			llcs: vec![],
+                    },
+		},
+            ],
+	};
 }

 /// scx_layered: A highly configurable multi-layer sched_ext scheduler
@ -347,6 +408,10 @@ struct Opts {
    #[clap(long)]
    monitor: Option<f64>,

+    /// Run with example layer specifications (useful for e.g. CI pipelines)
+    #[clap(long)]
+    run_example: bool,
+
    /// Layer specification. See --help.
    specs: Vec<String>,
 }
@ -999,7 +1064,7 @@ impl Layer {
        cpu_pool: &CpuPool,
        name: &str,
        kind: LayerKind,
-        topo: &Topology
+        topo: &Topology,
    ) -> Result<Self> {
        let mut cpus = bitvec![0; cpu_pool.nr_cpus];
        cpus.fill(false);
@ -1072,39 +1137,28 @@ impl Layer {
            }
        }

-        let mut nodes = topo.nodes().iter().collect::<Vec<_>>().clone();
-        let num_nodes = nodes.len();
        let is_left = idx % 2 == 0;
-        let rot_by = |idx, len| -> usize { if idx <= len { idx } else { idx % len } };
-        if is_left {
-            nodes.rotate_left(rot_by(idx, num_nodes));
-        } else {
-            nodes.rotate_right(rot_by(idx, num_nodes));
-        }
+        let rot_by = |idx, len| -> usize {
+            if idx <= len {
+                idx
+            } else {
+                idx % len
+            }
+        };

        let mut core_order = vec![];
-        for node in nodes.iter() {
-            let mut llcs = node.llcs().clone().into_values().collect::<Vec<_>>().clone();
-            let num_llcs = llcs.len();
-            if is_left {
-                llcs.rotate_left(rot_by(idx, num_llcs));
-            } else {
-                llcs.rotate_right(rot_by(idx, num_llcs));
-            }
-
-            for llc in llcs.iter() {
-                let mut llc_cores = llc.cores().clone().into_values().collect::<Vec<_>>().clone();
-                let num_cores = llc_cores.len();
+        for i in 0..topo.cores().len() {
+            core_order.push(i);
+        }

+        for node in topo.nodes().iter() {
+            for (_, llc) in node.llcs() {
+                let llc_cores = llc.cores().len();
+                let rot = rot_by(llc_cores + (idx << 1), llc_cores);
                if is_left {
-                    llc_cores.rotate_left(rot_by(idx, num_cores));
+                    core_order.rotate_left(rot);
                } else {
-                    llc_cores.rotate_right(rot_by(idx, num_cores));
-                }
-
-
-                for llc_core in llc_cores.iter() {
-                    core_order.push(llc_core.id());
+                    core_order.rotate_right(rot);
                }
            }
        }
@ -1523,7 +1577,13 @@ impl<'a, 'b> Scheduler<'a, 'b> {

        let mut layers = vec![];
        for (idx, spec) in layer_specs.iter().enumerate() {
-            layers.push(Layer::new(idx, &cpu_pool, &spec.name, spec.kind.clone(), &topo)?);
+            layers.push(Layer::new(
+                idx,
+                &cpu_pool,
+                &spec.name,
+                spec.kind.clone(),
+                &topo,
+            )?);
        }

        // Other stuff.
@ -1578,7 +1638,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {

    fn refresh_cpumasks(&mut self) -> Result<()> {
        let mut updated = false;
-	let num_layers = self.layers.len();
+        let num_layers = self.layers.len();

        for idx in 0..num_layers {
            match self.layers[idx].kind {
@ -1748,71 +1808,11 @@ impl<'a, 'b> Drop for Scheduler<'a, 'b> {
 }

 fn write_example_file(path: &str) -> Result<()> {
-    let example = LayerConfig {
-        specs: vec![
-            LayerSpec {
-                name: "batch".into(),
-                comment: Some("tasks under system.slice or tasks with nice value > 0".into()),
-                matches: vec![
-                    vec![LayerMatch::CgroupPrefix("system.slice/".into())],
-                    vec![LayerMatch::NiceAbove(0)],
-                ],
-                kind: LayerKind::Confined {
-                    cpus_range: Some((0, 16)),
-                    util_range: (0.8, 0.9),
-                    min_exec_us: 1000,
-                    yield_ignore: 0.0,
-                    preempt: false,
-                    preempt_first: false,
-                    exclusive: false,
-                    perf: 1024,
-                    nodes: vec![],
-                    llcs: vec![],
-                },
-            },
-            LayerSpec {
-                name: "immediate".into(),
-                comment: Some("tasks under workload.slice with nice value < 0".into()),
-                matches: vec![vec![
-                    LayerMatch::CgroupPrefix("workload.slice/".into()),
-                    LayerMatch::NiceBelow(0),
-                ]],
-                kind: LayerKind::Open {
-                    min_exec_us: 100,
-                    yield_ignore: 0.25,
-                    preempt: true,
-                    preempt_first: false,
-                    exclusive: true,
-                    perf: 1024,
-                    nodes: vec![],
-                    llcs: vec![],
-                },
-            },
-            LayerSpec {
-                name: "normal".into(),
-                comment: Some("the rest".into()),
-                matches: vec![vec![]],
-                kind: LayerKind::Grouped {
-                    cpus_range: None,
-                    util_range: (0.5, 0.6),
-                    min_exec_us: 200,
-                    yield_ignore: 0.0,
-                    preempt: false,
-                    preempt_first: false,
-                    exclusive: false,
-                    perf: 1024,
-                    nodes: vec![],
-                    llcs: vec![],
-                },
-            },
-        ],
-    };
-
    let mut f = fs::OpenOptions::new()
        .create_new(true)
        .write(true)
        .open(path)?;
-    Ok(f.write_all(serde_json::to_string_pretty(&example)?.as_bytes())?)
+    Ok(f.write_all(serde_json::to_string_pretty(&*EXAMPLE_CONFIG)?.as_bytes())?)
 }

 fn verify_layer_specs(specs: &[LayerSpec]) -> Result<()> {
@ -1956,7 +1956,11 @@ fn main() -> Result<()> {
        return Ok(());
    }

-    let mut layer_config = LayerConfig { specs: vec![] };
+    let mut layer_config = match opts.run_example {
+        true => EXAMPLE_CONFIG.clone(),
+        false => LayerConfig { specs: vec![] },
+    };
+
    for (idx, input) in opts.specs.iter().enumerate() {
        layer_config.specs.append(
            &mut LayerSpec::parse(input)
--- a/scheds/rust/scx_layered/src/stats.rs
+++ b/scheds/rust/scx_layered/src/stats.rs
@ -68,8 +68,6 @@ pub struct LayerStats {
    pub enq_wakeup: f64,
    #[stat(desc = "layer: % enqueued after slice expiration")]
    pub enq_expire: f64,
-    #[stat(desc = "layer: % enqueued as last runnable task on CPU")]
-    pub enq_last: f64,
    #[stat(desc = "layer: % re-enqueued due to RT preemption")]
    pub enq_reenq: f64,
    #[stat(desc = "layer: # times exec duration < min_exec_us")]
@ -146,7 +144,6 @@ impl LayerStats {
        let ltotal = lstat(bpf_intf::layer_stat_idx_LSTAT_SEL_LOCAL)
            + lstat(bpf_intf::layer_stat_idx_LSTAT_ENQ_WAKEUP)
            + lstat(bpf_intf::layer_stat_idx_LSTAT_ENQ_EXPIRE)
-            + lstat(bpf_intf::layer_stat_idx_LSTAT_ENQ_LAST)
            + lstat(bpf_intf::layer_stat_idx_LSTAT_ENQ_REENQ);
        let lstat_pct = |sidx| {
            if ltotal != 0 {
@ -179,7 +176,6 @@ impl LayerStats {
            sel_local: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_SEL_LOCAL),
            enq_wakeup: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_ENQ_WAKEUP),
            enq_expire: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_ENQ_EXPIRE),
-            enq_last: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_ENQ_LAST),
            enq_reenq: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_ENQ_REENQ),
            min_exec: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_MIN_EXEC),
            min_exec_us: (lstat(bpf_intf::layer_stat_idx_LSTAT_MIN_EXEC_NS) / 1000) as u64,
@ -221,13 +217,12 @@ impl LayerStats {

        writeln!(
            w,
-            "  {:<width$}  tot={:7} local={} wake/exp/last/reenq={}/{}/{}/{}",
+            "  {:<width$}  tot={:7} local={} wake/exp/reenq={}/{}/{}",
            "",
            self.total,
            fmt_pct(self.sel_local),
            fmt_pct(self.enq_wakeup),
            fmt_pct(self.enq_expire),
-            fmt_pct(self.enq_last),
            fmt_pct(self.enq_reenq),
            width = header_width,
        )?;
@ -348,7 +343,6 @@ impl SysStats {
        let total = lsum(bpf_intf::layer_stat_idx_LSTAT_SEL_LOCAL)
            + lsum(bpf_intf::layer_stat_idx_LSTAT_ENQ_WAKEUP)
            + lsum(bpf_intf::layer_stat_idx_LSTAT_ENQ_EXPIRE)
-            + lsum(bpf_intf::layer_stat_idx_LSTAT_ENQ_LAST)
            + lsum(bpf_intf::layer_stat_idx_LSTAT_ENQ_REENQ);
        let lsum_pct = |idx| {
            if total != 0 {
--- a/scheds/rust/scx_mitosis/Cargo.toml
+++ b/scheds/rust/scx_mitosis/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_mitosis"
-version = "0.0.3"
+version = "0.0.4"
 authors = ["Dan Schatzberg <dschatzberg@meta.com>", "Meta"]
 edition = "2021"
 description = "A dynamic affinity scheduler used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -20,13 +20,13 @@ libc = "0.2.137"
 log = "0.4.17"
 maplit = "1.0.2"
 prometheus-client = "0.19"
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 simplelog = "0.12"

 [build-dependencies]
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }

 [features]
 enable_backtrace = []
--- a/scheds/rust/scx_rlfifo/Cargo.lock
+++ b/scheds/rust/scx_rlfifo/Cargo.lock
@ -868,7 +868,7 @@ dependencies = [

 [[package]]
 name = "scx_rlfifo"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "ctrlc",
@ -881,7 +881,7 @@ dependencies = [

 [[package]]
 name = "scx_rustland_core"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "libbpf-rs",
@ -894,7 +894,7 @@ dependencies = [

 [[package]]
 name = "scx_utils"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "bindgen",
--- a/scheds/rust/scx_rlfifo/Cargo.toml
+++ b/scheds/rust/scx_rlfifo/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_rlfifo"
-version = "1.0.2"
+version = "1.0.3"
 authors = ["Andrea Righi <andrea.righi@linux.dev>"]
 edition = "2021"
 description = "A simple FIFO scheduler in Rust that runs in user-space"
@ -12,12 +12,12 @@ plain = "0.2.3"
 ctrlc = { version = "3.1", features = ["termination"] }
 libbpf-rs = "0.24.1"
 libc = "0.2.137"
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
-scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
+scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.3" }

 [build-dependencies]
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
-scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
+scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.3" }

 [features]
 enable_backtrace = []
--- a/scheds/rust/scx_rustland/Cargo.lock
+++ b/scheds/rust/scx_rustland/Cargo.lock
@ -939,7 +939,7 @@ dependencies = [

 [[package]]
 name = "scx_rustland"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "clap",
@ -957,7 +957,7 @@ dependencies = [

 [[package]]
 name = "scx_rustland_core"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "libbpf-rs",
@ -970,7 +970,7 @@ dependencies = [

 [[package]]
 name = "scx_utils"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "bindgen",
--- a/scheds/rust/scx_rustland/Cargo.toml
+++ b/scheds/rust/scx_rustland/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_rustland"
-version = "1.0.2"
+version = "1.0.3"
 authors = ["Andrea Righi <andrea.righi@linux.dev>"]
 edition = "2021"
 description = "A BPF component (dispatcher) that implements the low level sched-ext functionalities and a user-space counterpart (scheduler), written in Rust, that implements the actual scheduling policy. This is used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -16,13 +16,13 @@ libbpf-rs = "0.24.1"
 libc = "0.2.137"
 log = "0.4.17"
 ordered-float = "3.4.0"
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
-scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
+scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.3" }
 simplelog = "0.12"

 [build-dependencies]
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
-scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
+scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.3" }

 [features]
 enable_backtrace = []
--- a/scheds/rust/scx_rusty/Cargo.lock
+++ b/scheds/rust/scx_rusty/Cargo.lock
@ -1032,7 +1032,7 @@ dependencies = [

 [[package]]
 name = "scx_rusty"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "chrono",
@ -1055,7 +1055,7 @@ dependencies = [

 [[package]]
 name = "scx_stats"
-version = "0.2.0"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "crossbeam",
@ -1069,7 +1069,7 @@ dependencies = [

 [[package]]
 name = "scx_stats_derive"
-version = "0.2.0"
+version = "1.0.3"
 dependencies = [
 "proc-macro2",
 "quote",
@ -1080,7 +1080,7 @@ dependencies = [

 [[package]]
 name = "scx_utils"
-version = "1.0.2"
+version = "1.0.3"
 dependencies = [
 "anyhow",
 "bindgen",
--- a/scheds/rust/scx_rusty/Cargo.toml
+++ b/scheds/rust/scx_rusty/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "scx_rusty"
-version = "1.0.2"
+version = "1.0.3"
 authors = ["Dan Schatzberg <dschatzberg@meta.com>", "Meta"]
 edition = "2021"
 description = "A multi-domain, BPF / user space hybrid scheduler used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -17,16 +17,16 @@ libbpf-rs = "0.24.1"
 libc = "0.2.137"
 log = "0.4.17"
 ordered-float = "3.4.0"
-scx_stats = { path = "../../../rust/scx_stats", version = "0.2.0" }
-scx_stats_derive = { path = "../../../rust/scx_stats/scx_stats_derive", version = "0.2.0" }
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_stats = { path = "../../../rust/scx_stats", version = "1.0.3" }
+scx_stats_derive = { path = "../../../rust/scx_stats/scx_stats_derive", version = "1.0.3" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
 serde = { version = "1.0", features = ["derive"] }
 simplelog = "0.12"
 sorted-vec = "0.8.3"
 static_assertions = "1.1.0"

 [build-dependencies]
-scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
+scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }

 [features]
 enable_backtrace = []
--- a/services/scx
+++ b/services/scx
@ -1,5 +1,5 @@
 # List of scx_schedulers: scx_bpfland scx_central scx_lavd scx_layered scx_nest scx_qmap scx_rlfifo scx_rustland scx_rusty scx_simple scx_userland
-SCX_SCHEDULER=scx_rusty
+SCX_SCHEDULER=scx_bpfland

 # Set custom flags for each scheduler, below is an example of how to use
-#SCX_FLAGS='-u 3000 -i 0.5 -I 0.025 -l 0.5 -b -k'
+#SCX_FLAGS='-s 20000 -S 1000 -c 0 -k'