Merge pull request #781 from JakeHillion/pr781

layered: move configuration into library component
2024-11-24 11:50:23 +00:00 · 2024-10-11 16:39:23 +00:00 · 2024-10-11 16:39:23 +00:00 · eb59085e61
commit eb59085e61
parent caff46e864 143a55cda1
4 changed files with 515 additions and 474 deletions
--- a/scheds/rust/scx_layered/src/config.rs
+++ b/scheds/rust/scx_layered/src/config.rs
@ -0,0 +1,161 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2.
+use std::fs;
+use std::io::Read;
+
+use anyhow::Result;
+use serde::Deserialize;
+use serde::Serialize;
+
+use crate::LayerGrowthAlgo;
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(transparent)]
+pub struct LayerConfig {
+    pub specs: Vec<LayerSpec>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct LayerSpec {
+    pub name: String,
+    pub comment: Option<String>,
+    pub matches: Vec<Vec<LayerMatch>>,
+    pub kind: LayerKind,
+}
+
+impl LayerSpec {
+    pub fn parse(input: &str) -> Result<Vec<Self>> {
+        let config: LayerConfig = if input.starts_with("f:") || input.starts_with("file:") {
+            let mut f = fs::OpenOptions::new()
+                .read(true)
+                .open(input.split_once(':').unwrap().1)?;
+            let mut content = String::new();
+            f.read_to_string(&mut content)?;
+            serde_json::from_str(&content)?
+        } else {
+            serde_json::from_str(input)?
+        };
+        Ok(config.specs)
+    }
+
+    pub fn nodes(&self) -> Vec<usize> {
+        match &self.kind {
+            LayerKind::Confined { nodes, .. }
+            | LayerKind::Open { nodes, .. }
+            | LayerKind::Grouped { nodes, .. } => nodes.clone(),
+        }
+    }
+
+    pub fn llcs(&self) -> Vec<usize> {
+        match &self.kind {
+            LayerKind::Confined { llcs, .. }
+            | LayerKind::Open { llcs, .. }
+            | LayerKind::Grouped { llcs, .. } => llcs.clone(),
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub enum LayerMatch {
+    CgroupPrefix(String),
+    CommPrefix(String),
+    PcommPrefix(String),
+    NiceAbove(i32),
+    NiceBelow(i32),
+    NiceEquals(i32),
+    UIDEquals(u32),
+    GIDEquals(u32),
+    PIDEquals(u32),
+    PPIDEquals(u32),
+    TGIDEquals(u32),
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub enum LayerKind {
+    Confined {
+        util_range: (f64, f64),
+        #[serde(default)]
+        cpus_range: Option<(usize, usize)>,
+        #[serde(default)]
+        min_exec_us: u64,
+        #[serde(default)]
+        yield_ignore: f64,
+        #[serde(default)]
+        slice_us: u64,
+        #[serde(default)]
+        preempt: bool,
+        #[serde(default)]
+        preempt_first: bool,
+        #[serde(default)]
+        exclusive: bool,
+        #[serde(default)]
+        weight: u32,
+        #[serde(default)]
+        idle_smt: bool,
+        #[serde(default)]
+        growth_algo: LayerGrowthAlgo,
+        #[serde(default)]
+        perf: u64,
+        #[serde(default)]
+        nodes: Vec<usize>,
+        #[serde(default)]
+        llcs: Vec<usize>,
+    },
+    Grouped {
+        util_range: (f64, f64),
+        #[serde(default)]
+        cpus_range: Option<(usize, usize)>,
+        #[serde(default)]
+        min_exec_us: u64,
+        #[serde(default)]
+        yield_ignore: f64,
+        #[serde(default)]
+        slice_us: u64,
+        #[serde(default)]
+        preempt: bool,
+        #[serde(default)]
+        preempt_first: bool,
+        #[serde(default)]
+        exclusive: bool,
+        #[serde(default)]
+        weight: u32,
+        #[serde(default)]
+        idle_smt: bool,
+        #[serde(default)]
+        growth_algo: LayerGrowthAlgo,
+        #[serde(default)]
+        perf: u64,
+        #[serde(default)]
+        nodes: Vec<usize>,
+        #[serde(default)]
+        llcs: Vec<usize>,
+    },
+    Open {
+        #[serde(default)]
+        min_exec_us: u64,
+        #[serde(default)]
+        yield_ignore: f64,
+        #[serde(default)]
+        slice_us: u64,
+        #[serde(default)]
+        preempt: bool,
+        #[serde(default)]
+        preempt_first: bool,
+        #[serde(default)]
+        exclusive: bool,
+        #[serde(default)]
+        weight: u32,
+        #[serde(default)]
+        idle_smt: bool,
+        #[serde(default)]
+        growth_algo: LayerGrowthAlgo,
+        #[serde(default)]
+        perf: u64,
+        #[serde(default)]
+        nodes: Vec<usize>,
+        #[serde(default)]
+        llcs: Vec<usize>,
+    },
+}
--- a/scheds/rust/scx_layered/src/layer_core_growth.rs
+++ b/scheds/rust/scx_layered/src/layer_core_growth.rs
@ -6,7 +6,6 @@ use serde::Serialize;

 use crate::bpf_intf;
 use crate::CpuPool;
-use crate::IteratorInterleaver;
 use crate::LayerSpec;

 #[derive(Clone, Debug, Parser, Serialize, Deserialize)]
@ -239,3 +238,52 @@ impl<'a> LayerCoreOrderGenerator<'a> {
        }
    }
 }
+
+struct IteratorInterleaver<T>
+where
+    T: Iterator,
+{
+    empty: bool,
+    index: usize,
+    iters: Vec<T>,
+}
+
+impl<T> IteratorInterleaver<T>
+where
+    T: Iterator,
+{
+    fn new(iters: Vec<T>) -> Self {
+        Self {
+            empty: false,
+            index: 0,
+            iters,
+        }
+    }
+}
+
+impl<T> Iterator for IteratorInterleaver<T>
+where
+    T: Iterator,
+{
+    type Item = T::Item;
+
+    fn next(&mut self) -> Option<T::Item> {
+        if let Some(iter) = self.iters.get_mut(self.index) {
+            self.index += 1;
+            if let Some(value) = iter.next() {
+                self.empty = false;
+                Some(value)
+            } else {
+                self.next()
+            }
+        } else {
+            self.index = 0;
+            if self.empty {
+                None
+            } else {
+                self.empty = true;
+                self.next()
+            }
+        }
+    }
+}
--- a/scheds/rust/scx_layered/src/lib.rs
+++ b/scheds/rust/scx_layered/src/lib.rs
@ -0,0 +1,299 @@
+// Copyright (c) Meta Platforms, Inc. and affiliates.
+
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2.
+mod config;
+mod layer_core_growth;
+
+pub mod bpf_intf;
+
+use std::collections::BTreeMap;
+use std::collections::BTreeSet;
+
+use anyhow::bail;
+use anyhow::Context;
+use anyhow::Result;
+use bitvec::prelude::*;
+pub use config::LayerConfig;
+pub use config::LayerKind;
+pub use config::LayerMatch;
+pub use config::LayerSpec;
+pub use layer_core_growth::LayerGrowthAlgo;
+use log::debug;
+use log::info;
+use scx_utils::Core;
+use scx_utils::Topology;
+
+const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize;
+const CORE_CACHE_LEVEL: u32 = 2;
+
+lazy_static::lazy_static! {
+    static ref NR_POSSIBLE_CPUS: usize = libbpf_rs::num_possible_cpus().unwrap();
+}
+
+#[derive(Debug)]
+/// `CpuPool` represents the CPU core and logical CPU topology within the system.
+/// It manages the mapping and availability of physical and logical cores, including
+/// how resources are allocated for tasks across the available CPUs.
+pub struct CpuPool {
+    /// The number of physical cores available on the system.
+    pub nr_cores: usize,
+
+    /// The total number of logical CPUs (including SMT threads).
+    /// This can be larger than `nr_cores` if SMT is enabled,
+    /// where each physical core may have a couple logical cores.
+    pub nr_cpus: usize,
+
+    /// A bit mask representing all online logical cores.
+    /// Each bit corresponds to whether a logical core (CPU) is online and available
+    /// for processing tasks.
+    pub all_cpus: BitVec,
+
+    /// A vector of bit masks, each representing the mapping between
+    /// physical cores and the logical cores that run on them.
+    /// The index in the vector represents the physical core, and each bit in the
+    /// corresponding `BitVec` represents whether a logical core belongs to that physical core.
+    core_cpus: Vec<BitVec>,
+
+    /// A vector that maps the index of each logical core to the sibling core.
+    /// This represents the "next sibling" core within a package in systems that support SMT.
+    /// The sibling core is the other logical core that shares the physical resources
+    /// of the same physical core.
+    pub sibling_cpu: Vec<i32>,
+
+    /// A list of physical core IDs.
+    /// Each entry in this vector corresponds to a unique physical core.
+    cpu_core: Vec<usize>,
+
+    /// A bit mask representing all available physical cores.
+    /// Each bit corresponds to whether a physical core is available for task scheduling.
+    available_cores: BitVec,
+
+    /// The ID of the first physical core in the system.
+    /// This core is often used as a default for initializing tasks.
+    first_cpu: usize,
+
+    /// The ID of the next free CPU or the fallback CPU if none are available.
+    /// This is used to allocate resources when a task needs to be assigned to a core.
+    pub fallback_cpu: usize,
+
+    /// A mapping of node IDs to last-level cache (LLC) IDs.
+    /// The map allows for the identification of which last-level cache
+    /// corresponds to each CPU based on its core topology.
+    core_topology_to_id: BTreeMap<(usize, usize, usize), usize>,
+}
+
+impl CpuPool {
+    pub fn new(topo: &Topology) -> Result<Self> {
+        if *NR_POSSIBLE_CPUS > MAX_CPUS {
+            bail!(
+                "NR_POSSIBLE_CPUS {} > MAX_CPUS {}",
+                *NR_POSSIBLE_CPUS,
+                MAX_CPUS
+            );
+        }
+
+        let mut cpu_to_cache = vec![]; // (cpu_id, Option<cache_id>)
+        let mut cache_ids = BTreeSet::<usize>::new();
+        let mut nr_offline = 0;
+
+        // Build cpu -> cache ID mapping.
+        for cpu in 0..*NR_POSSIBLE_CPUS {
+            let path = format!(
+                "/sys/devices/system/cpu/cpu{}/cache/index{}/id",
+                cpu, CORE_CACHE_LEVEL
+            );
+            let id = match std::fs::read_to_string(&path) {
+                Ok(val) => Some(val.trim().parse::<usize>().with_context(|| {
+                    format!("Failed to parse {:?}'s content {:?}", &path, &val)
+                })?),
+                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                    nr_offline += 1;
+                    None
+                }
+                Err(e) => return Err(e).with_context(|| format!("Failed to open {:?}", &path)),
+            };
+
+            cpu_to_cache.push(id);
+            if let Some(id) = id {
+                cache_ids.insert(id);
+            }
+        }
+
+        let nr_cpus = *NR_POSSIBLE_CPUS - nr_offline;
+
+        // Cache IDs may have holes. Assign consecutive core IDs to existing
+        // cache IDs.
+        let mut cache_to_core = BTreeMap::<usize, usize>::new();
+        let mut nr_cores = 0;
+        for cache_id in cache_ids.iter() {
+            cache_to_core.insert(*cache_id, nr_cores);
+            nr_cores += 1;
+        }
+
+        // Build core -> cpumask and cpu -> core mappings.
+        let mut all_cpus = bitvec![0; *NR_POSSIBLE_CPUS];
+        let mut core_cpus = vec![bitvec![0; *NR_POSSIBLE_CPUS]; nr_cores];
+        let mut cpu_core = vec![];
+
+        for (cpu, cache) in cpu_to_cache.iter().enumerate().take(*NR_POSSIBLE_CPUS) {
+            if let Some(cache_id) = cache {
+                let core_id = cache_to_core[cache_id];
+                all_cpus.set(cpu, true);
+                core_cpus[core_id].set(cpu, true);
+                cpu_core.push(core_id);
+            }
+        }
+
+        // Build sibling_cpu[]
+        let mut sibling_cpu = vec![-1i32; *NR_POSSIBLE_CPUS];
+        for cpus in &core_cpus {
+            let mut first = -1i32;
+            for cpu in cpus.iter_ones() {
+                if first < 0 {
+                    first = cpu as i32;
+                } else {
+                    sibling_cpu[first as usize] = cpu as i32;
+                    sibling_cpu[cpu as usize] = first;
+                    break;
+                }
+            }
+        }
+
+        // Build core_topology_to_id
+        let mut core_topology_to_id = BTreeMap::new();
+        let mut next_topo_id: usize = 0;
+        for node in topo.nodes() {
+            for llc in node.llcs().values() {
+                for core in llc.cores().values() {
+                    core_topology_to_id
+                        .insert((core.node_id, core.llc_id, core.id()), next_topo_id);
+                    next_topo_id += 1;
+                }
+            }
+        }
+
+        info!(
+            "CPUs: online/possible={}/{} nr_cores={}",
+            nr_cpus, *NR_POSSIBLE_CPUS, nr_cores,
+        );
+        debug!("CPUs: siblings={:?}", &sibling_cpu[..nr_cpus]);
+
+        let first_cpu = core_cpus[0].first_one().unwrap();
+
+        let mut cpu_pool = Self {
+            nr_cores,
+            nr_cpus,
+            all_cpus,
+            core_cpus,
+            sibling_cpu,
+            cpu_core,
+            available_cores: bitvec![1; nr_cores],
+            first_cpu,
+            fallback_cpu: first_cpu,
+            core_topology_to_id,
+        };
+        cpu_pool.update_fallback_cpu();
+        Ok(cpu_pool)
+    }
+
+    fn update_fallback_cpu(&mut self) {
+        match self.available_cores.first_one() {
+            Some(next) => self.fallback_cpu = self.core_cpus[next].first_one().unwrap(),
+            None => self.fallback_cpu = self.first_cpu,
+        }
+    }
+
+    pub fn alloc_cpus<'a>(
+        &'a mut self,
+        allowed_cpus: &BitVec,
+        core_alloc_order: &[usize],
+    ) -> Option<&'a BitVec> {
+        let available_cpus = self.available_cpus_in_mask(&allowed_cpus);
+        let available_cores = self.cpus_to_cores(&available_cpus).ok()?;
+
+        for alloc_core in core_alloc_order {
+            match available_cores.get(*alloc_core) {
+                Some(bit) => {
+                    if *bit {
+                        self.available_cores.set(*alloc_core, false);
+                        self.update_fallback_cpu();
+                        return Some(&self.core_cpus[*alloc_core]);
+                    }
+                }
+                None => {
+                    continue;
+                }
+            }
+        }
+        None
+    }
+
+    fn cpus_to_cores(&self, cpus_to_match: &BitVec) -> Result<BitVec> {
+        let mut cpus = cpus_to_match.clone();
+        let mut cores = bitvec![0; self.nr_cores];
+
+        while let Some(cpu) = cpus.first_one() {
+            let core = self.cpu_core[cpu];
+
+            if (self.core_cpus[core].clone() & !cpus.clone()).count_ones() != 0 {
+                bail!(
+                    "CPUs {} partially intersect with core {} ({})",
+                    cpus_to_match,
+                    core,
+                    self.core_cpus[core],
+                );
+            }
+
+            cpus &= !self.core_cpus[core].clone();
+            cores.set(core, true);
+        }
+
+        Ok(cores)
+    }
+
+    pub fn free<'a>(&'a mut self, cpus_to_free: &BitVec) -> Result<()> {
+        let cores = self.cpus_to_cores(cpus_to_free)?;
+        if (self.available_cores.clone() & &cores).any() {
+            bail!("Some of CPUs {} are already free", cpus_to_free);
+        }
+        self.available_cores |= cores;
+        self.update_fallback_cpu();
+        Ok(())
+    }
+
+    pub fn next_to_free<'a>(&'a self, cands: &BitVec) -> Result<Option<&'a BitVec>> {
+        let last = match cands.last_one() {
+            Some(ret) => ret,
+            None => return Ok(None),
+        };
+        let core = self.cpu_core[last];
+        if (self.core_cpus[core].clone() & !cands.clone()).count_ones() != 0 {
+            bail!(
+                "CPUs{} partially intersect with core {} ({})",
+                cands,
+                core,
+                self.core_cpus[core]
+            );
+        }
+
+        Ok(Some(&self.core_cpus[core]))
+    }
+
+    pub fn available_cpus_in_mask(&self, allowed_cpus: &BitVec) -> BitVec {
+        let mut cpus = bitvec![0; self.nr_cpus];
+        for core in self.available_cores.iter_ones() {
+            let mut core_cpus = self.core_cpus[core].clone();
+            core_cpus &= allowed_cpus;
+            cpus |= core_cpus;
+        }
+        cpus
+    }
+
+    fn get_core_topological_id(&self, core: &Core) -> usize {
+        *self
+            .core_topology_to_id
+            .get(&(core.node_id, core.llc_id, core.id()))
+            .expect("unrecognised core")
+    }
+}
--- a/scheds/rust/scx_layered/src/main.rs
+++ b/scheds/rust/scx_layered/src/main.rs
@ -3,17 +3,12 @@
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2.
 mod bpf_skel;
-mod layer_core_growth;
 mod stats;

-pub use bpf_skel::*;
-pub mod bpf_intf;
 use std::collections::BTreeMap;
-use std::collections::BTreeSet;
 use std::collections::HashMap;
 use std::ffi::CString;
 use std::fs;
-use std::io::Read;
 use std::io::Write;
 use std::mem::MaybeUninit;
 use std::ops::Sub;
@ -30,10 +25,10 @@ use anyhow::bail;
 use anyhow::Context;
 use anyhow::Result;
 use bitvec::prelude::*;
+pub use bpf_skel::*;
 use clap::Parser;
 use clap::ValueEnum;
 use crossbeam::channel::RecvTimeoutError;
-use layer_core_growth::LayerGrowthAlgo;
 use libbpf_rs::skel::OpenSkel;
 use libbpf_rs::skel::Skel;
 use libbpf_rs::skel::SkelBuilder;
@ -43,6 +38,7 @@ use log::debug;
 use log::info;
 use log::trace;
 use log::warn;
+use scx_layered::*;
 use scx_stats::prelude::*;
 use scx_utils::compat;
 use scx_utils::init_libbpf_logging;
@ -53,7 +49,6 @@ use scx_utils::scx_ops_open;
 use scx_utils::uei_exited;
 use scx_utils::uei_report;
 use scx_utils::Cache;
-use scx_utils::Core;
 use scx_utils::CoreType;
 use scx_utils::LoadAggregator;
 use scx_utils::Topology;
@ -66,7 +61,6 @@ use stats::StatsRes;
 use stats::SysStats;

 const RAVG_FRAC_BITS: u32 = bpf_intf::ravg_consts_RAVG_FRAC_BITS;
-const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize;
 const MAX_PATH: usize = bpf_intf::consts_MAX_PATH as usize;
 const MAX_COMM: usize = bpf_intf::consts_MAX_COMM as usize;
 const MAX_LAYER_WEIGHT: u32 = bpf_intf::consts_MAX_LAYER_WEIGHT;
@ -79,7 +73,6 @@ const USAGE_HALF_LIFE_F64: f64 = USAGE_HALF_LIFE as f64 / 1_000_000_000.0;
 const NR_GSTATS: usize = bpf_intf::global_stat_idx_NR_GSTATS as usize;
 const NR_LSTATS: usize = bpf_intf::layer_stat_idx_NR_LSTATS as usize;
 const NR_LAYER_MATCH_KINDS: usize = bpf_intf::layer_match_kind_NR_LAYER_MATCH_KINDS as usize;
-const CORE_CACHE_LEVEL: u32 = 2;

 #[rustfmt::skip]
 lazy_static::lazy_static! {
@ -475,21 +468,6 @@ struct Opts {
    specs: Vec<String>,
 }

-#[derive(Clone, Debug, Serialize, Deserialize)]
-enum LayerMatch {
-    CgroupPrefix(String),
-    CommPrefix(String),
-    PcommPrefix(String),
-    NiceAbove(i32),
-    NiceBelow(i32),
-    NiceEquals(i32),
-    UIDEquals(u32),
-    GIDEquals(u32),
-    PIDEquals(u32),
-    PPIDEquals(u32),
-    TGIDEquals(u32),
-}
-
 #[derive(ValueEnum, Clone, Debug, Parser, PartialEq, Serialize, Deserialize)]
 #[clap(rename_all = "snake_case")]
 enum DsqIterAlgo {
@ -521,138 +499,6 @@ impl Default for DsqIterAlgo {
    }
 }

-#[derive(Clone, Debug, Serialize, Deserialize)]
-enum LayerKind {
-    Confined {
-        util_range: (f64, f64),
-        #[serde(default)]
-        cpus_range: Option<(usize, usize)>,
-        #[serde(default)]
-        min_exec_us: u64,
-        #[serde(default)]
-        yield_ignore: f64,
-        #[serde(default)]
-        slice_us: u64,
-        #[serde(default)]
-        preempt: bool,
-        #[serde(default)]
-        preempt_first: bool,
-        #[serde(default)]
-        exclusive: bool,
-        #[serde(default)]
-        weight: u32,
-        #[serde(default)]
-        idle_smt: bool,
-        #[serde(default)]
-        growth_algo: LayerGrowthAlgo,
-        #[serde(default)]
-        perf: u64,
-        #[serde(default)]
-        nodes: Vec<usize>,
-        #[serde(default)]
-        llcs: Vec<usize>,
-    },
-    Grouped {
-        util_range: (f64, f64),
-        #[serde(default)]
-        cpus_range: Option<(usize, usize)>,
-        #[serde(default)]
-        min_exec_us: u64,
-        #[serde(default)]
-        yield_ignore: f64,
-        #[serde(default)]
-        slice_us: u64,
-        #[serde(default)]
-        preempt: bool,
-        #[serde(default)]
-        preempt_first: bool,
-        #[serde(default)]
-        exclusive: bool,
-        #[serde(default)]
-        weight: u32,
-        #[serde(default)]
-        idle_smt: bool,
-        #[serde(default)]
-        growth_algo: LayerGrowthAlgo,
-        #[serde(default)]
-        perf: u64,
-        #[serde(default)]
-        nodes: Vec<usize>,
-        #[serde(default)]
-        llcs: Vec<usize>,
-    },
-    Open {
-        #[serde(default)]
-        min_exec_us: u64,
-        #[serde(default)]
-        yield_ignore: f64,
-        #[serde(default)]
-        slice_us: u64,
-        #[serde(default)]
-        preempt: bool,
-        #[serde(default)]
-        preempt_first: bool,
-        #[serde(default)]
-        exclusive: bool,
-        #[serde(default)]
-        weight: u32,
-        #[serde(default)]
-        idle_smt: bool,
-        #[serde(default)]
-        growth_algo: LayerGrowthAlgo,
-        #[serde(default)]
-        perf: u64,
-        #[serde(default)]
-        nodes: Vec<usize>,
-        #[serde(default)]
-        llcs: Vec<usize>,
-    },
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-struct LayerSpec {
-    name: String,
-    comment: Option<String>,
-    matches: Vec<Vec<LayerMatch>>,
-    kind: LayerKind,
-}
-
-impl LayerSpec {
-    fn parse(input: &str) -> Result<Vec<Self>> {
-        let config: LayerConfig = if input.starts_with("f:") || input.starts_with("file:") {
-            let mut f = fs::OpenOptions::new()
-                .read(true)
-                .open(input.split_once(':').unwrap().1)?;
-            let mut content = String::new();
-            f.read_to_string(&mut content)?;
-            serde_json::from_str(&content)?
-        } else {
-            serde_json::from_str(input)?
-        };
-        Ok(config.specs)
-    }
-    fn nodes(&self) -> Vec<usize> {
-        match &self.kind {
-            LayerKind::Confined { nodes, .. }
-            | LayerKind::Open { nodes, .. }
-            | LayerKind::Grouped { nodes, .. } => nodes.clone(),
-        }
-    }
-    fn llcs(&self) -> Vec<usize> {
-        match &self.kind {
-            LayerKind::Confined { llcs, .. }
-            | LayerKind::Open { llcs, .. }
-            | LayerKind::Grouped { llcs, .. } => llcs.clone(),
-        }
-    }
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(transparent)]
-struct LayerConfig {
-    specs: Vec<LayerSpec>,
-}
-
 fn now_monotonic() -> u64 {
    let mut time = libc::timespec {
        tv_sec: 0,
@ -1074,318 +920,6 @@ impl Stats {
    }
 }

-#[derive(Debug)]
-/// `CpuPool` represents the CPU core and logical CPU topology within the system.
-/// It manages the mapping and availability of physical and logical cores, including
-/// how resources are allocated for tasks across the available CPUs.
-struct CpuPool {
-    /// The number of physical cores available on the system.
-    nr_cores: usize,
-
-    /// The total number of logical CPUs (including SMT threads).
-    /// This can be larger than `nr_cores` if SMT is enabled,
-    /// where each physical core may have a couple logical cores.
-    nr_cpus: usize,
-
-    /// A bit mask representing all online logical cores.
-    /// Each bit corresponds to whether a logical core (CPU) is online and available
-    /// for processing tasks.
-    all_cpus: BitVec,
-
-    /// A vector of bit masks, each representing the mapping between
-    /// physical cores and the logical cores that run on them.
-    /// The index in the vector represents the physical core, and each bit in the
-    /// corresponding `BitVec` represents whether a logical core belongs to that physical core.
-    core_cpus: Vec<BitVec>,
-
-    /// A vector that maps the index of each logical core to the sibling core.
-    /// This represents the "next sibling" core within a package in systems that support SMT.
-    /// The sibling core is the other logical core that shares the physical resources
-    /// of the same physical core.
-    sibling_cpu: Vec<i32>,
-
-    /// A list of physical core IDs.
-    /// Each entry in this vector corresponds to a unique physical core.
-    cpu_core: Vec<usize>,
-
-    /// A bit mask representing all available physical cores.
-    /// Each bit corresponds to whether a physical core is available for task scheduling.
-    available_cores: BitVec,
-
-    /// The ID of the first physical core in the system.
-    /// This core is often used as a default for initializing tasks.
-    first_cpu: usize,
-
-    /// The ID of the next free CPU or the fallback CPU if none are available.
-    /// This is used to allocate resources when a task needs to be assigned to a core.
-    fallback_cpu: usize,
-
-    /// A mapping of node IDs to last-level cache (LLC) IDs.
-    /// The map allows for the identification of which last-level cache
-    /// corresponds to each CPU based on its core topology.
-    core_topology_to_id: BTreeMap<(usize, usize, usize), usize>,
-}
-
-impl CpuPool {
-    fn new(topo: &Topology) -> Result<Self> {
-        if *NR_POSSIBLE_CPUS > MAX_CPUS {
-            bail!(
-                "NR_POSSIBLE_CPUS {} > MAX_CPUS {}",
-                *NR_POSSIBLE_CPUS,
-                MAX_CPUS
-            );
-        }
-
-        let mut cpu_to_cache = vec![]; // (cpu_id, Option<cache_id>)
-        let mut cache_ids = BTreeSet::<usize>::new();
-        let mut nr_offline = 0;
-
-        // Build cpu -> cache ID mapping.
-        for cpu in 0..*NR_POSSIBLE_CPUS {
-            let path = format!(
-                "/sys/devices/system/cpu/cpu{}/cache/index{}/id",
-                cpu, CORE_CACHE_LEVEL
-            );
-            let id = match std::fs::read_to_string(&path) {
-                Ok(val) => Some(val.trim().parse::<usize>().with_context(|| {
-                    format!("Failed to parse {:?}'s content {:?}", &path, &val)
-                })?),
-                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
-                    nr_offline += 1;
-                    None
-                }
-                Err(e) => return Err(e).with_context(|| format!("Failed to open {:?}", &path)),
-            };
-
-            cpu_to_cache.push(id);
-            if let Some(id) = id {
-                cache_ids.insert(id);
-            }
-        }
-
-        let nr_cpus = *NR_POSSIBLE_CPUS - nr_offline;
-
-        // Cache IDs may have holes. Assign consecutive core IDs to existing
-        // cache IDs.
-        let mut cache_to_core = BTreeMap::<usize, usize>::new();
-        let mut nr_cores = 0;
-        for cache_id in cache_ids.iter() {
-            cache_to_core.insert(*cache_id, nr_cores);
-            nr_cores += 1;
-        }
-
-        // Build core -> cpumask and cpu -> core mappings.
-        let mut all_cpus = bitvec![0; *NR_POSSIBLE_CPUS];
-        let mut core_cpus = vec![bitvec![0; *NR_POSSIBLE_CPUS]; nr_cores];
-        let mut cpu_core = vec![];
-
-        for (cpu, cache) in cpu_to_cache.iter().enumerate().take(*NR_POSSIBLE_CPUS) {
-            if let Some(cache_id) = cache {
-                let core_id = cache_to_core[cache_id];
-                all_cpus.set(cpu, true);
-                core_cpus[core_id].set(cpu, true);
-                cpu_core.push(core_id);
-            }
-        }
-
-        // Build sibling_cpu[]
-        let mut sibling_cpu = vec![-1i32; *NR_POSSIBLE_CPUS];
-        for cpus in &core_cpus {
-            let mut first = -1i32;
-            for cpu in cpus.iter_ones() {
-                if first < 0 {
-                    first = cpu as i32;
-                } else {
-                    sibling_cpu[first as usize] = cpu as i32;
-                    sibling_cpu[cpu as usize] = first;
-                    break;
-                }
-            }
-        }
-
-        // Build core_topology_to_id
-        let mut core_topology_to_id = BTreeMap::new();
-        let mut next_topo_id: usize = 0;
-        for node in topo.nodes() {
-            for llc in node.llcs().values() {
-                for core in llc.cores().values() {
-                    core_topology_to_id
-                        .insert((core.node_id, core.llc_id, core.id()), next_topo_id);
-                    next_topo_id += 1;
-                }
-            }
-        }
-
-        info!(
-            "CPUs: online/possible={}/{} nr_cores={}",
-            nr_cpus, *NR_POSSIBLE_CPUS, nr_cores,
-        );
-        debug!("CPUs: siblings={:?}", &sibling_cpu[..nr_cpus]);
-
-        let first_cpu = core_cpus[0].first_one().unwrap();
-
-        let mut cpu_pool = Self {
-            nr_cores,
-            nr_cpus,
-            all_cpus,
-            core_cpus,
-            sibling_cpu,
-            cpu_core,
-            available_cores: bitvec![1; nr_cores],
-            first_cpu,
-            fallback_cpu: first_cpu,
-            core_topology_to_id,
-        };
-        cpu_pool.update_fallback_cpu();
-        Ok(cpu_pool)
-    }
-
-    fn update_fallback_cpu(&mut self) {
-        match self.available_cores.first_one() {
-            Some(next) => self.fallback_cpu = self.core_cpus[next].first_one().unwrap(),
-            None => self.fallback_cpu = self.first_cpu,
-        }
-    }
-
-    fn alloc_cpus<'a>(&'a mut self, layer: &Layer) -> Option<&'a BitVec> {
-        let available_cpus = self.available_cpus_in_mask(&layer.allowed_cpus);
-        let available_cores = self.cpus_to_cores(&available_cpus).ok()?;
-
-        for alloc_core in layer.core_alloc_order() {
-            match available_cores.get(*alloc_core) {
-                Some(bit) => {
-                    if *bit {
-                        self.available_cores.set(*alloc_core, false);
-                        self.update_fallback_cpu();
-                        return Some(&self.core_cpus[*alloc_core]);
-                    }
-                }
-                None => {
-                    continue;
-                }
-            }
-        }
-        None
-    }
-
-    fn cpus_to_cores(&self, cpus_to_match: &BitVec) -> Result<BitVec> {
-        let mut cpus = cpus_to_match.clone();
-        let mut cores = bitvec![0; self.nr_cores];
-
-        while let Some(cpu) = cpus.first_one() {
-            let core = self.cpu_core[cpu];
-
-            if (self.core_cpus[core].clone() & !cpus.clone()).count_ones() != 0 {
-                bail!(
-                    "CPUs {} partially intersect with core {} ({})",
-                    cpus_to_match,
-                    core,
-                    self.core_cpus[core],
-                );
-            }
-
-            cpus &= !self.core_cpus[core].clone();
-            cores.set(core, true);
-        }
-
-        Ok(cores)
-    }
-
-    fn free<'a>(&'a mut self, cpus_to_free: &BitVec) -> Result<()> {
-        let cores = self.cpus_to_cores(cpus_to_free)?;
-        if (self.available_cores.clone() & &cores).any() {
-            bail!("Some of CPUs {} are already free", cpus_to_free);
-        }
-        self.available_cores |= cores;
-        self.update_fallback_cpu();
-        Ok(())
-    }
-
-    fn next_to_free<'a>(&'a self, cands: &BitVec) -> Result<Option<&'a BitVec>> {
-        let last = match cands.last_one() {
-            Some(ret) => ret,
-            None => return Ok(None),
-        };
-        let core = self.cpu_core[last];
-        if (self.core_cpus[core].clone() & !cands.clone()).count_ones() != 0 {
-            bail!(
-                "CPUs{} partially intersect with core {} ({})",
-                cands,
-                core,
-                self.core_cpus[core]
-            );
-        }
-
-        Ok(Some(&self.core_cpus[core]))
-    }
-
-    fn available_cpus_in_mask(&self, allowed_cpus: &BitVec) -> BitVec {
-        let mut cpus = bitvec![0; self.nr_cpus];
-        for core in self.available_cores.iter_ones() {
-            let mut core_cpus = self.core_cpus[core].clone();
-            core_cpus &= allowed_cpus;
-            cpus |= core_cpus;
-        }
-        cpus
-    }
-
-    fn get_core_topological_id(&self, core: &Core) -> usize {
-        *self
-            .core_topology_to_id
-            .get(&(core.node_id, core.llc_id, core.id()))
-            .expect("unrecognised core")
-    }
-}
-
-struct IteratorInterleaver<T>
-where
-    T: Iterator,
-{
-    empty: bool,
-    index: usize,
-    iters: Vec<T>,
-}
-
-impl<T> IteratorInterleaver<T>
-where
-    T: Iterator,
-{
-    fn new(iters: Vec<T>) -> Self {
-        Self {
-            empty: false,
-            index: 0,
-            iters,
-        }
-    }
-}
-
-impl<T> Iterator for IteratorInterleaver<T>
-where
-    T: Iterator,
-{
-    type Item = T::Item;
-
-    fn next(&mut self) -> Option<T::Item> {
-        if let Some(iter) = self.iters.get_mut(self.index) {
-            self.index += 1;
-            if let Some(value) = iter.next() {
-                self.empty = false;
-                Some(value)
-            } else {
-                self.next()
-            }
-        } else {
-            self.index = 0;
-            if self.empty {
-                None
-            } else {
-                self.empty = true;
-                self.next()
-            }
-        }
-    }
-}
-
 #[derive(Debug)]
 struct Layer {
    name: String,
@ -1506,10 +1040,6 @@ impl Layer {
        })
    }

-    fn core_alloc_order(&self) -> &Vec<usize> {
-        &self.core_order
-    }
-
    fn grow_confined_or_grouped(
        &mut self,
        cpu_pool: &mut CpuPool,
@ -1542,7 +1072,10 @@ impl Layer {
            return Ok(false);
        }

-        let new_cpus = match cpu_pool.alloc_cpus(&self).clone() {
+        let new_cpus = match cpu_pool
+            .alloc_cpus(&self.allowed_cpus, &self.core_order)
+            .clone()
+        {
            Some(ret) => ret.clone(),
            None => {
                trace!("layer-{} can't grow, no CPUs", &self.name);