mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-24 11:50:23 +00:00
Merge pull request #781 from JakeHillion/pr781
layered: move configuration into library component
This commit is contained in:
commit
eb59085e61
161
scheds/rust/scx_layered/src/config.rs
Normal file
161
scheds/rust/scx_layered/src/config.rs
Normal file
@ -0,0 +1,161 @@
|
||||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
|
||||
// This software may be used and distributed according to the terms of the
|
||||
// GNU General Public License version 2.
|
||||
use std::fs;
|
||||
use std::io::Read;
|
||||
|
||||
use anyhow::Result;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::LayerGrowthAlgo;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct LayerConfig {
|
||||
pub specs: Vec<LayerSpec>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct LayerSpec {
|
||||
pub name: String,
|
||||
pub comment: Option<String>,
|
||||
pub matches: Vec<Vec<LayerMatch>>,
|
||||
pub kind: LayerKind,
|
||||
}
|
||||
|
||||
impl LayerSpec {
|
||||
pub fn parse(input: &str) -> Result<Vec<Self>> {
|
||||
let config: LayerConfig = if input.starts_with("f:") || input.starts_with("file:") {
|
||||
let mut f = fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(input.split_once(':').unwrap().1)?;
|
||||
let mut content = String::new();
|
||||
f.read_to_string(&mut content)?;
|
||||
serde_json::from_str(&content)?
|
||||
} else {
|
||||
serde_json::from_str(input)?
|
||||
};
|
||||
Ok(config.specs)
|
||||
}
|
||||
|
||||
pub fn nodes(&self) -> Vec<usize> {
|
||||
match &self.kind {
|
||||
LayerKind::Confined { nodes, .. }
|
||||
| LayerKind::Open { nodes, .. }
|
||||
| LayerKind::Grouped { nodes, .. } => nodes.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn llcs(&self) -> Vec<usize> {
|
||||
match &self.kind {
|
||||
LayerKind::Confined { llcs, .. }
|
||||
| LayerKind::Open { llcs, .. }
|
||||
| LayerKind::Grouped { llcs, .. } => llcs.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub enum LayerMatch {
|
||||
CgroupPrefix(String),
|
||||
CommPrefix(String),
|
||||
PcommPrefix(String),
|
||||
NiceAbove(i32),
|
||||
NiceBelow(i32),
|
||||
NiceEquals(i32),
|
||||
UIDEquals(u32),
|
||||
GIDEquals(u32),
|
||||
PIDEquals(u32),
|
||||
PPIDEquals(u32),
|
||||
TGIDEquals(u32),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub enum LayerKind {
|
||||
Confined {
|
||||
util_range: (f64, f64),
|
||||
#[serde(default)]
|
||||
cpus_range: Option<(usize, usize)>,
|
||||
#[serde(default)]
|
||||
min_exec_us: u64,
|
||||
#[serde(default)]
|
||||
yield_ignore: f64,
|
||||
#[serde(default)]
|
||||
slice_us: u64,
|
||||
#[serde(default)]
|
||||
preempt: bool,
|
||||
#[serde(default)]
|
||||
preempt_first: bool,
|
||||
#[serde(default)]
|
||||
exclusive: bool,
|
||||
#[serde(default)]
|
||||
weight: u32,
|
||||
#[serde(default)]
|
||||
idle_smt: bool,
|
||||
#[serde(default)]
|
||||
growth_algo: LayerGrowthAlgo,
|
||||
#[serde(default)]
|
||||
perf: u64,
|
||||
#[serde(default)]
|
||||
nodes: Vec<usize>,
|
||||
#[serde(default)]
|
||||
llcs: Vec<usize>,
|
||||
},
|
||||
Grouped {
|
||||
util_range: (f64, f64),
|
||||
#[serde(default)]
|
||||
cpus_range: Option<(usize, usize)>,
|
||||
#[serde(default)]
|
||||
min_exec_us: u64,
|
||||
#[serde(default)]
|
||||
yield_ignore: f64,
|
||||
#[serde(default)]
|
||||
slice_us: u64,
|
||||
#[serde(default)]
|
||||
preempt: bool,
|
||||
#[serde(default)]
|
||||
preempt_first: bool,
|
||||
#[serde(default)]
|
||||
exclusive: bool,
|
||||
#[serde(default)]
|
||||
weight: u32,
|
||||
#[serde(default)]
|
||||
idle_smt: bool,
|
||||
#[serde(default)]
|
||||
growth_algo: LayerGrowthAlgo,
|
||||
#[serde(default)]
|
||||
perf: u64,
|
||||
#[serde(default)]
|
||||
nodes: Vec<usize>,
|
||||
#[serde(default)]
|
||||
llcs: Vec<usize>,
|
||||
},
|
||||
Open {
|
||||
#[serde(default)]
|
||||
min_exec_us: u64,
|
||||
#[serde(default)]
|
||||
yield_ignore: f64,
|
||||
#[serde(default)]
|
||||
slice_us: u64,
|
||||
#[serde(default)]
|
||||
preempt: bool,
|
||||
#[serde(default)]
|
||||
preempt_first: bool,
|
||||
#[serde(default)]
|
||||
exclusive: bool,
|
||||
#[serde(default)]
|
||||
weight: u32,
|
||||
#[serde(default)]
|
||||
idle_smt: bool,
|
||||
#[serde(default)]
|
||||
growth_algo: LayerGrowthAlgo,
|
||||
#[serde(default)]
|
||||
perf: u64,
|
||||
#[serde(default)]
|
||||
nodes: Vec<usize>,
|
||||
#[serde(default)]
|
||||
llcs: Vec<usize>,
|
||||
},
|
||||
}
|
@ -6,7 +6,6 @@ use serde::Serialize;
|
||||
|
||||
use crate::bpf_intf;
|
||||
use crate::CpuPool;
|
||||
use crate::IteratorInterleaver;
|
||||
use crate::LayerSpec;
|
||||
|
||||
#[derive(Clone, Debug, Parser, Serialize, Deserialize)]
|
||||
@ -239,3 +238,52 @@ impl<'a> LayerCoreOrderGenerator<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct IteratorInterleaver<T>
|
||||
where
|
||||
T: Iterator,
|
||||
{
|
||||
empty: bool,
|
||||
index: usize,
|
||||
iters: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T> IteratorInterleaver<T>
|
||||
where
|
||||
T: Iterator,
|
||||
{
|
||||
fn new(iters: Vec<T>) -> Self {
|
||||
Self {
|
||||
empty: false,
|
||||
index: 0,
|
||||
iters,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Iterator for IteratorInterleaver<T>
|
||||
where
|
||||
T: Iterator,
|
||||
{
|
||||
type Item = T::Item;
|
||||
|
||||
fn next(&mut self) -> Option<T::Item> {
|
||||
if let Some(iter) = self.iters.get_mut(self.index) {
|
||||
self.index += 1;
|
||||
if let Some(value) = iter.next() {
|
||||
self.empty = false;
|
||||
Some(value)
|
||||
} else {
|
||||
self.next()
|
||||
}
|
||||
} else {
|
||||
self.index = 0;
|
||||
if self.empty {
|
||||
None
|
||||
} else {
|
||||
self.empty = true;
|
||||
self.next()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
299
scheds/rust/scx_layered/src/lib.rs
Normal file
299
scheds/rust/scx_layered/src/lib.rs
Normal file
@ -0,0 +1,299 @@
|
||||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
|
||||
// This software may be used and distributed according to the terms of the
|
||||
// GNU General Public License version 2.
|
||||
mod config;
|
||||
mod layer_core_growth;
|
||||
|
||||
pub mod bpf_intf;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use anyhow::bail;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use bitvec::prelude::*;
|
||||
pub use config::LayerConfig;
|
||||
pub use config::LayerKind;
|
||||
pub use config::LayerMatch;
|
||||
pub use config::LayerSpec;
|
||||
pub use layer_core_growth::LayerGrowthAlgo;
|
||||
use log::debug;
|
||||
use log::info;
|
||||
use scx_utils::Core;
|
||||
use scx_utils::Topology;
|
||||
|
||||
const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize;
|
||||
const CORE_CACHE_LEVEL: u32 = 2;
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref NR_POSSIBLE_CPUS: usize = libbpf_rs::num_possible_cpus().unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// `CpuPool` represents the CPU core and logical CPU topology within the system.
|
||||
/// It manages the mapping and availability of physical and logical cores, including
|
||||
/// how resources are allocated for tasks across the available CPUs.
|
||||
pub struct CpuPool {
|
||||
/// The number of physical cores available on the system.
|
||||
pub nr_cores: usize,
|
||||
|
||||
/// The total number of logical CPUs (including SMT threads).
|
||||
/// This can be larger than `nr_cores` if SMT is enabled,
|
||||
/// where each physical core may have a couple logical cores.
|
||||
pub nr_cpus: usize,
|
||||
|
||||
/// A bit mask representing all online logical cores.
|
||||
/// Each bit corresponds to whether a logical core (CPU) is online and available
|
||||
/// for processing tasks.
|
||||
pub all_cpus: BitVec,
|
||||
|
||||
/// A vector of bit masks, each representing the mapping between
|
||||
/// physical cores and the logical cores that run on them.
|
||||
/// The index in the vector represents the physical core, and each bit in the
|
||||
/// corresponding `BitVec` represents whether a logical core belongs to that physical core.
|
||||
core_cpus: Vec<BitVec>,
|
||||
|
||||
/// A vector that maps the index of each logical core to the sibling core.
|
||||
/// This represents the "next sibling" core within a package in systems that support SMT.
|
||||
/// The sibling core is the other logical core that shares the physical resources
|
||||
/// of the same physical core.
|
||||
pub sibling_cpu: Vec<i32>,
|
||||
|
||||
/// A list of physical core IDs.
|
||||
/// Each entry in this vector corresponds to a unique physical core.
|
||||
cpu_core: Vec<usize>,
|
||||
|
||||
/// A bit mask representing all available physical cores.
|
||||
/// Each bit corresponds to whether a physical core is available for task scheduling.
|
||||
available_cores: BitVec,
|
||||
|
||||
/// The ID of the first physical core in the system.
|
||||
/// This core is often used as a default for initializing tasks.
|
||||
first_cpu: usize,
|
||||
|
||||
/// The ID of the next free CPU or the fallback CPU if none are available.
|
||||
/// This is used to allocate resources when a task needs to be assigned to a core.
|
||||
pub fallback_cpu: usize,
|
||||
|
||||
/// A mapping of node IDs to last-level cache (LLC) IDs.
|
||||
/// The map allows for the identification of which last-level cache
|
||||
/// corresponds to each CPU based on its core topology.
|
||||
core_topology_to_id: BTreeMap<(usize, usize, usize), usize>,
|
||||
}
|
||||
|
||||
impl CpuPool {
|
||||
pub fn new(topo: &Topology) -> Result<Self> {
|
||||
if *NR_POSSIBLE_CPUS > MAX_CPUS {
|
||||
bail!(
|
||||
"NR_POSSIBLE_CPUS {} > MAX_CPUS {}",
|
||||
*NR_POSSIBLE_CPUS,
|
||||
MAX_CPUS
|
||||
);
|
||||
}
|
||||
|
||||
let mut cpu_to_cache = vec![]; // (cpu_id, Option<cache_id>)
|
||||
let mut cache_ids = BTreeSet::<usize>::new();
|
||||
let mut nr_offline = 0;
|
||||
|
||||
// Build cpu -> cache ID mapping.
|
||||
for cpu in 0..*NR_POSSIBLE_CPUS {
|
||||
let path = format!(
|
||||
"/sys/devices/system/cpu/cpu{}/cache/index{}/id",
|
||||
cpu, CORE_CACHE_LEVEL
|
||||
);
|
||||
let id = match std::fs::read_to_string(&path) {
|
||||
Ok(val) => Some(val.trim().parse::<usize>().with_context(|| {
|
||||
format!("Failed to parse {:?}'s content {:?}", &path, &val)
|
||||
})?),
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
|
||||
nr_offline += 1;
|
||||
None
|
||||
}
|
||||
Err(e) => return Err(e).with_context(|| format!("Failed to open {:?}", &path)),
|
||||
};
|
||||
|
||||
cpu_to_cache.push(id);
|
||||
if let Some(id) = id {
|
||||
cache_ids.insert(id);
|
||||
}
|
||||
}
|
||||
|
||||
let nr_cpus = *NR_POSSIBLE_CPUS - nr_offline;
|
||||
|
||||
// Cache IDs may have holes. Assign consecutive core IDs to existing
|
||||
// cache IDs.
|
||||
let mut cache_to_core = BTreeMap::<usize, usize>::new();
|
||||
let mut nr_cores = 0;
|
||||
for cache_id in cache_ids.iter() {
|
||||
cache_to_core.insert(*cache_id, nr_cores);
|
||||
nr_cores += 1;
|
||||
}
|
||||
|
||||
// Build core -> cpumask and cpu -> core mappings.
|
||||
let mut all_cpus = bitvec![0; *NR_POSSIBLE_CPUS];
|
||||
let mut core_cpus = vec![bitvec![0; *NR_POSSIBLE_CPUS]; nr_cores];
|
||||
let mut cpu_core = vec![];
|
||||
|
||||
for (cpu, cache) in cpu_to_cache.iter().enumerate().take(*NR_POSSIBLE_CPUS) {
|
||||
if let Some(cache_id) = cache {
|
||||
let core_id = cache_to_core[cache_id];
|
||||
all_cpus.set(cpu, true);
|
||||
core_cpus[core_id].set(cpu, true);
|
||||
cpu_core.push(core_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Build sibling_cpu[]
|
||||
let mut sibling_cpu = vec![-1i32; *NR_POSSIBLE_CPUS];
|
||||
for cpus in &core_cpus {
|
||||
let mut first = -1i32;
|
||||
for cpu in cpus.iter_ones() {
|
||||
if first < 0 {
|
||||
first = cpu as i32;
|
||||
} else {
|
||||
sibling_cpu[first as usize] = cpu as i32;
|
||||
sibling_cpu[cpu as usize] = first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build core_topology_to_id
|
||||
let mut core_topology_to_id = BTreeMap::new();
|
||||
let mut next_topo_id: usize = 0;
|
||||
for node in topo.nodes() {
|
||||
for llc in node.llcs().values() {
|
||||
for core in llc.cores().values() {
|
||||
core_topology_to_id
|
||||
.insert((core.node_id, core.llc_id, core.id()), next_topo_id);
|
||||
next_topo_id += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"CPUs: online/possible={}/{} nr_cores={}",
|
||||
nr_cpus, *NR_POSSIBLE_CPUS, nr_cores,
|
||||
);
|
||||
debug!("CPUs: siblings={:?}", &sibling_cpu[..nr_cpus]);
|
||||
|
||||
let first_cpu = core_cpus[0].first_one().unwrap();
|
||||
|
||||
let mut cpu_pool = Self {
|
||||
nr_cores,
|
||||
nr_cpus,
|
||||
all_cpus,
|
||||
core_cpus,
|
||||
sibling_cpu,
|
||||
cpu_core,
|
||||
available_cores: bitvec![1; nr_cores],
|
||||
first_cpu,
|
||||
fallback_cpu: first_cpu,
|
||||
core_topology_to_id,
|
||||
};
|
||||
cpu_pool.update_fallback_cpu();
|
||||
Ok(cpu_pool)
|
||||
}
|
||||
|
||||
fn update_fallback_cpu(&mut self) {
|
||||
match self.available_cores.first_one() {
|
||||
Some(next) => self.fallback_cpu = self.core_cpus[next].first_one().unwrap(),
|
||||
None => self.fallback_cpu = self.first_cpu,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn alloc_cpus<'a>(
|
||||
&'a mut self,
|
||||
allowed_cpus: &BitVec,
|
||||
core_alloc_order: &[usize],
|
||||
) -> Option<&'a BitVec> {
|
||||
let available_cpus = self.available_cpus_in_mask(&allowed_cpus);
|
||||
let available_cores = self.cpus_to_cores(&available_cpus).ok()?;
|
||||
|
||||
for alloc_core in core_alloc_order {
|
||||
match available_cores.get(*alloc_core) {
|
||||
Some(bit) => {
|
||||
if *bit {
|
||||
self.available_cores.set(*alloc_core, false);
|
||||
self.update_fallback_cpu();
|
||||
return Some(&self.core_cpus[*alloc_core]);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn cpus_to_cores(&self, cpus_to_match: &BitVec) -> Result<BitVec> {
|
||||
let mut cpus = cpus_to_match.clone();
|
||||
let mut cores = bitvec![0; self.nr_cores];
|
||||
|
||||
while let Some(cpu) = cpus.first_one() {
|
||||
let core = self.cpu_core[cpu];
|
||||
|
||||
if (self.core_cpus[core].clone() & !cpus.clone()).count_ones() != 0 {
|
||||
bail!(
|
||||
"CPUs {} partially intersect with core {} ({})",
|
||||
cpus_to_match,
|
||||
core,
|
||||
self.core_cpus[core],
|
||||
);
|
||||
}
|
||||
|
||||
cpus &= !self.core_cpus[core].clone();
|
||||
cores.set(core, true);
|
||||
}
|
||||
|
||||
Ok(cores)
|
||||
}
|
||||
|
||||
pub fn free<'a>(&'a mut self, cpus_to_free: &BitVec) -> Result<()> {
|
||||
let cores = self.cpus_to_cores(cpus_to_free)?;
|
||||
if (self.available_cores.clone() & &cores).any() {
|
||||
bail!("Some of CPUs {} are already free", cpus_to_free);
|
||||
}
|
||||
self.available_cores |= cores;
|
||||
self.update_fallback_cpu();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn next_to_free<'a>(&'a self, cands: &BitVec) -> Result<Option<&'a BitVec>> {
|
||||
let last = match cands.last_one() {
|
||||
Some(ret) => ret,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let core = self.cpu_core[last];
|
||||
if (self.core_cpus[core].clone() & !cands.clone()).count_ones() != 0 {
|
||||
bail!(
|
||||
"CPUs{} partially intersect with core {} ({})",
|
||||
cands,
|
||||
core,
|
||||
self.core_cpus[core]
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Some(&self.core_cpus[core]))
|
||||
}
|
||||
|
||||
pub fn available_cpus_in_mask(&self, allowed_cpus: &BitVec) -> BitVec {
|
||||
let mut cpus = bitvec![0; self.nr_cpus];
|
||||
for core in self.available_cores.iter_ones() {
|
||||
let mut core_cpus = self.core_cpus[core].clone();
|
||||
core_cpus &= allowed_cpus;
|
||||
cpus |= core_cpus;
|
||||
}
|
||||
cpus
|
||||
}
|
||||
|
||||
fn get_core_topological_id(&self, core: &Core) -> usize {
|
||||
*self
|
||||
.core_topology_to_id
|
||||
.get(&(core.node_id, core.llc_id, core.id()))
|
||||
.expect("unrecognised core")
|
||||
}
|
||||
}
|
@ -3,17 +3,12 @@
|
||||
// This software may be used and distributed according to the terms of the
|
||||
// GNU General Public License version 2.
|
||||
mod bpf_skel;
|
||||
mod layer_core_growth;
|
||||
mod stats;
|
||||
|
||||
pub use bpf_skel::*;
|
||||
pub mod bpf_intf;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CString;
|
||||
use std::fs;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::ops::Sub;
|
||||
@ -30,10 +25,10 @@ use anyhow::bail;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use bitvec::prelude::*;
|
||||
pub use bpf_skel::*;
|
||||
use clap::Parser;
|
||||
use clap::ValueEnum;
|
||||
use crossbeam::channel::RecvTimeoutError;
|
||||
use layer_core_growth::LayerGrowthAlgo;
|
||||
use libbpf_rs::skel::OpenSkel;
|
||||
use libbpf_rs::skel::Skel;
|
||||
use libbpf_rs::skel::SkelBuilder;
|
||||
@ -43,6 +38,7 @@ use log::debug;
|
||||
use log::info;
|
||||
use log::trace;
|
||||
use log::warn;
|
||||
use scx_layered::*;
|
||||
use scx_stats::prelude::*;
|
||||
use scx_utils::compat;
|
||||
use scx_utils::init_libbpf_logging;
|
||||
@ -53,7 +49,6 @@ use scx_utils::scx_ops_open;
|
||||
use scx_utils::uei_exited;
|
||||
use scx_utils::uei_report;
|
||||
use scx_utils::Cache;
|
||||
use scx_utils::Core;
|
||||
use scx_utils::CoreType;
|
||||
use scx_utils::LoadAggregator;
|
||||
use scx_utils::Topology;
|
||||
@ -66,7 +61,6 @@ use stats::StatsRes;
|
||||
use stats::SysStats;
|
||||
|
||||
const RAVG_FRAC_BITS: u32 = bpf_intf::ravg_consts_RAVG_FRAC_BITS;
|
||||
const MAX_CPUS: usize = bpf_intf::consts_MAX_CPUS as usize;
|
||||
const MAX_PATH: usize = bpf_intf::consts_MAX_PATH as usize;
|
||||
const MAX_COMM: usize = bpf_intf::consts_MAX_COMM as usize;
|
||||
const MAX_LAYER_WEIGHT: u32 = bpf_intf::consts_MAX_LAYER_WEIGHT;
|
||||
@ -79,7 +73,6 @@ const USAGE_HALF_LIFE_F64: f64 = USAGE_HALF_LIFE as f64 / 1_000_000_000.0;
|
||||
const NR_GSTATS: usize = bpf_intf::global_stat_idx_NR_GSTATS as usize;
|
||||
const NR_LSTATS: usize = bpf_intf::layer_stat_idx_NR_LSTATS as usize;
|
||||
const NR_LAYER_MATCH_KINDS: usize = bpf_intf::layer_match_kind_NR_LAYER_MATCH_KINDS as usize;
|
||||
const CORE_CACHE_LEVEL: u32 = 2;
|
||||
|
||||
#[rustfmt::skip]
|
||||
lazy_static::lazy_static! {
|
||||
@ -475,21 +468,6 @@ struct Opts {
|
||||
specs: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
enum LayerMatch {
|
||||
CgroupPrefix(String),
|
||||
CommPrefix(String),
|
||||
PcommPrefix(String),
|
||||
NiceAbove(i32),
|
||||
NiceBelow(i32),
|
||||
NiceEquals(i32),
|
||||
UIDEquals(u32),
|
||||
GIDEquals(u32),
|
||||
PIDEquals(u32),
|
||||
PPIDEquals(u32),
|
||||
TGIDEquals(u32),
|
||||
}
|
||||
|
||||
#[derive(ValueEnum, Clone, Debug, Parser, PartialEq, Serialize, Deserialize)]
|
||||
#[clap(rename_all = "snake_case")]
|
||||
enum DsqIterAlgo {
|
||||
@ -521,138 +499,6 @@ impl Default for DsqIterAlgo {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
enum LayerKind {
|
||||
Confined {
|
||||
util_range: (f64, f64),
|
||||
#[serde(default)]
|
||||
cpus_range: Option<(usize, usize)>,
|
||||
#[serde(default)]
|
||||
min_exec_us: u64,
|
||||
#[serde(default)]
|
||||
yield_ignore: f64,
|
||||
#[serde(default)]
|
||||
slice_us: u64,
|
||||
#[serde(default)]
|
||||
preempt: bool,
|
||||
#[serde(default)]
|
||||
preempt_first: bool,
|
||||
#[serde(default)]
|
||||
exclusive: bool,
|
||||
#[serde(default)]
|
||||
weight: u32,
|
||||
#[serde(default)]
|
||||
idle_smt: bool,
|
||||
#[serde(default)]
|
||||
growth_algo: LayerGrowthAlgo,
|
||||
#[serde(default)]
|
||||
perf: u64,
|
||||
#[serde(default)]
|
||||
nodes: Vec<usize>,
|
||||
#[serde(default)]
|
||||
llcs: Vec<usize>,
|
||||
},
|
||||
Grouped {
|
||||
util_range: (f64, f64),
|
||||
#[serde(default)]
|
||||
cpus_range: Option<(usize, usize)>,
|
||||
#[serde(default)]
|
||||
min_exec_us: u64,
|
||||
#[serde(default)]
|
||||
yield_ignore: f64,
|
||||
#[serde(default)]
|
||||
slice_us: u64,
|
||||
#[serde(default)]
|
||||
preempt: bool,
|
||||
#[serde(default)]
|
||||
preempt_first: bool,
|
||||
#[serde(default)]
|
||||
exclusive: bool,
|
||||
#[serde(default)]
|
||||
weight: u32,
|
||||
#[serde(default)]
|
||||
idle_smt: bool,
|
||||
#[serde(default)]
|
||||
growth_algo: LayerGrowthAlgo,
|
||||
#[serde(default)]
|
||||
perf: u64,
|
||||
#[serde(default)]
|
||||
nodes: Vec<usize>,
|
||||
#[serde(default)]
|
||||
llcs: Vec<usize>,
|
||||
},
|
||||
Open {
|
||||
#[serde(default)]
|
||||
min_exec_us: u64,
|
||||
#[serde(default)]
|
||||
yield_ignore: f64,
|
||||
#[serde(default)]
|
||||
slice_us: u64,
|
||||
#[serde(default)]
|
||||
preempt: bool,
|
||||
#[serde(default)]
|
||||
preempt_first: bool,
|
||||
#[serde(default)]
|
||||
exclusive: bool,
|
||||
#[serde(default)]
|
||||
weight: u32,
|
||||
#[serde(default)]
|
||||
idle_smt: bool,
|
||||
#[serde(default)]
|
||||
growth_algo: LayerGrowthAlgo,
|
||||
#[serde(default)]
|
||||
perf: u64,
|
||||
#[serde(default)]
|
||||
nodes: Vec<usize>,
|
||||
#[serde(default)]
|
||||
llcs: Vec<usize>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
struct LayerSpec {
|
||||
name: String,
|
||||
comment: Option<String>,
|
||||
matches: Vec<Vec<LayerMatch>>,
|
||||
kind: LayerKind,
|
||||
}
|
||||
|
||||
impl LayerSpec {
|
||||
fn parse(input: &str) -> Result<Vec<Self>> {
|
||||
let config: LayerConfig = if input.starts_with("f:") || input.starts_with("file:") {
|
||||
let mut f = fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(input.split_once(':').unwrap().1)?;
|
||||
let mut content = String::new();
|
||||
f.read_to_string(&mut content)?;
|
||||
serde_json::from_str(&content)?
|
||||
} else {
|
||||
serde_json::from_str(input)?
|
||||
};
|
||||
Ok(config.specs)
|
||||
}
|
||||
fn nodes(&self) -> Vec<usize> {
|
||||
match &self.kind {
|
||||
LayerKind::Confined { nodes, .. }
|
||||
| LayerKind::Open { nodes, .. }
|
||||
| LayerKind::Grouped { nodes, .. } => nodes.clone(),
|
||||
}
|
||||
}
|
||||
fn llcs(&self) -> Vec<usize> {
|
||||
match &self.kind {
|
||||
LayerKind::Confined { llcs, .. }
|
||||
| LayerKind::Open { llcs, .. }
|
||||
| LayerKind::Grouped { llcs, .. } => llcs.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
struct LayerConfig {
|
||||
specs: Vec<LayerSpec>,
|
||||
}
|
||||
|
||||
fn now_monotonic() -> u64 {
|
||||
let mut time = libc::timespec {
|
||||
tv_sec: 0,
|
||||
@ -1074,318 +920,6 @@ impl Stats {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// `CpuPool` represents the CPU core and logical CPU topology within the system.
|
||||
/// It manages the mapping and availability of physical and logical cores, including
|
||||
/// how resources are allocated for tasks across the available CPUs.
|
||||
struct CpuPool {
|
||||
/// The number of physical cores available on the system.
|
||||
nr_cores: usize,
|
||||
|
||||
/// The total number of logical CPUs (including SMT threads).
|
||||
/// This can be larger than `nr_cores` if SMT is enabled,
|
||||
/// where each physical core may have a couple logical cores.
|
||||
nr_cpus: usize,
|
||||
|
||||
/// A bit mask representing all online logical cores.
|
||||
/// Each bit corresponds to whether a logical core (CPU) is online and available
|
||||
/// for processing tasks.
|
||||
all_cpus: BitVec,
|
||||
|
||||
/// A vector of bit masks, each representing the mapping between
|
||||
/// physical cores and the logical cores that run on them.
|
||||
/// The index in the vector represents the physical core, and each bit in the
|
||||
/// corresponding `BitVec` represents whether a logical core belongs to that physical core.
|
||||
core_cpus: Vec<BitVec>,
|
||||
|
||||
/// A vector that maps the index of each logical core to the sibling core.
|
||||
/// This represents the "next sibling" core within a package in systems that support SMT.
|
||||
/// The sibling core is the other logical core that shares the physical resources
|
||||
/// of the same physical core.
|
||||
sibling_cpu: Vec<i32>,
|
||||
|
||||
/// A list of physical core IDs.
|
||||
/// Each entry in this vector corresponds to a unique physical core.
|
||||
cpu_core: Vec<usize>,
|
||||
|
||||
/// A bit mask representing all available physical cores.
|
||||
/// Each bit corresponds to whether a physical core is available for task scheduling.
|
||||
available_cores: BitVec,
|
||||
|
||||
/// The ID of the first physical core in the system.
|
||||
/// This core is often used as a default for initializing tasks.
|
||||
first_cpu: usize,
|
||||
|
||||
/// The ID of the next free CPU or the fallback CPU if none are available.
|
||||
/// This is used to allocate resources when a task needs to be assigned to a core.
|
||||
fallback_cpu: usize,
|
||||
|
||||
/// A mapping of node IDs to last-level cache (LLC) IDs.
|
||||
/// The map allows for the identification of which last-level cache
|
||||
/// corresponds to each CPU based on its core topology.
|
||||
core_topology_to_id: BTreeMap<(usize, usize, usize), usize>,
|
||||
}
|
||||
|
||||
impl CpuPool {
|
||||
fn new(topo: &Topology) -> Result<Self> {
|
||||
if *NR_POSSIBLE_CPUS > MAX_CPUS {
|
||||
bail!(
|
||||
"NR_POSSIBLE_CPUS {} > MAX_CPUS {}",
|
||||
*NR_POSSIBLE_CPUS,
|
||||
MAX_CPUS
|
||||
);
|
||||
}
|
||||
|
||||
let mut cpu_to_cache = vec![]; // (cpu_id, Option<cache_id>)
|
||||
let mut cache_ids = BTreeSet::<usize>::new();
|
||||
let mut nr_offline = 0;
|
||||
|
||||
// Build cpu -> cache ID mapping.
|
||||
for cpu in 0..*NR_POSSIBLE_CPUS {
|
||||
let path = format!(
|
||||
"/sys/devices/system/cpu/cpu{}/cache/index{}/id",
|
||||
cpu, CORE_CACHE_LEVEL
|
||||
);
|
||||
let id = match std::fs::read_to_string(&path) {
|
||||
Ok(val) => Some(val.trim().parse::<usize>().with_context(|| {
|
||||
format!("Failed to parse {:?}'s content {:?}", &path, &val)
|
||||
})?),
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
|
||||
nr_offline += 1;
|
||||
None
|
||||
}
|
||||
Err(e) => return Err(e).with_context(|| format!("Failed to open {:?}", &path)),
|
||||
};
|
||||
|
||||
cpu_to_cache.push(id);
|
||||
if let Some(id) = id {
|
||||
cache_ids.insert(id);
|
||||
}
|
||||
}
|
||||
|
||||
let nr_cpus = *NR_POSSIBLE_CPUS - nr_offline;
|
||||
|
||||
// Cache IDs may have holes. Assign consecutive core IDs to existing
|
||||
// cache IDs.
|
||||
let mut cache_to_core = BTreeMap::<usize, usize>::new();
|
||||
let mut nr_cores = 0;
|
||||
for cache_id in cache_ids.iter() {
|
||||
cache_to_core.insert(*cache_id, nr_cores);
|
||||
nr_cores += 1;
|
||||
}
|
||||
|
||||
// Build core -> cpumask and cpu -> core mappings.
|
||||
let mut all_cpus = bitvec![0; *NR_POSSIBLE_CPUS];
|
||||
let mut core_cpus = vec![bitvec![0; *NR_POSSIBLE_CPUS]; nr_cores];
|
||||
let mut cpu_core = vec![];
|
||||
|
||||
for (cpu, cache) in cpu_to_cache.iter().enumerate().take(*NR_POSSIBLE_CPUS) {
|
||||
if let Some(cache_id) = cache {
|
||||
let core_id = cache_to_core[cache_id];
|
||||
all_cpus.set(cpu, true);
|
||||
core_cpus[core_id].set(cpu, true);
|
||||
cpu_core.push(core_id);
|
||||
}
|
||||
}
|
||||
|
||||
// Build sibling_cpu[]
|
||||
let mut sibling_cpu = vec![-1i32; *NR_POSSIBLE_CPUS];
|
||||
for cpus in &core_cpus {
|
||||
let mut first = -1i32;
|
||||
for cpu in cpus.iter_ones() {
|
||||
if first < 0 {
|
||||
first = cpu as i32;
|
||||
} else {
|
||||
sibling_cpu[first as usize] = cpu as i32;
|
||||
sibling_cpu[cpu as usize] = first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build core_topology_to_id
|
||||
let mut core_topology_to_id = BTreeMap::new();
|
||||
let mut next_topo_id: usize = 0;
|
||||
for node in topo.nodes() {
|
||||
for llc in node.llcs().values() {
|
||||
for core in llc.cores().values() {
|
||||
core_topology_to_id
|
||||
.insert((core.node_id, core.llc_id, core.id()), next_topo_id);
|
||||
next_topo_id += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
"CPUs: online/possible={}/{} nr_cores={}",
|
||||
nr_cpus, *NR_POSSIBLE_CPUS, nr_cores,
|
||||
);
|
||||
debug!("CPUs: siblings={:?}", &sibling_cpu[..nr_cpus]);
|
||||
|
||||
let first_cpu = core_cpus[0].first_one().unwrap();
|
||||
|
||||
let mut cpu_pool = Self {
|
||||
nr_cores,
|
||||
nr_cpus,
|
||||
all_cpus,
|
||||
core_cpus,
|
||||
sibling_cpu,
|
||||
cpu_core,
|
||||
available_cores: bitvec![1; nr_cores],
|
||||
first_cpu,
|
||||
fallback_cpu: first_cpu,
|
||||
core_topology_to_id,
|
||||
};
|
||||
cpu_pool.update_fallback_cpu();
|
||||
Ok(cpu_pool)
|
||||
}
|
||||
|
||||
fn update_fallback_cpu(&mut self) {
|
||||
match self.available_cores.first_one() {
|
||||
Some(next) => self.fallback_cpu = self.core_cpus[next].first_one().unwrap(),
|
||||
None => self.fallback_cpu = self.first_cpu,
|
||||
}
|
||||
}
|
||||
|
||||
fn alloc_cpus<'a>(&'a mut self, layer: &Layer) -> Option<&'a BitVec> {
|
||||
let available_cpus = self.available_cpus_in_mask(&layer.allowed_cpus);
|
||||
let available_cores = self.cpus_to_cores(&available_cpus).ok()?;
|
||||
|
||||
for alloc_core in layer.core_alloc_order() {
|
||||
match available_cores.get(*alloc_core) {
|
||||
Some(bit) => {
|
||||
if *bit {
|
||||
self.available_cores.set(*alloc_core, false);
|
||||
self.update_fallback_cpu();
|
||||
return Some(&self.core_cpus[*alloc_core]);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn cpus_to_cores(&self, cpus_to_match: &BitVec) -> Result<BitVec> {
|
||||
let mut cpus = cpus_to_match.clone();
|
||||
let mut cores = bitvec![0; self.nr_cores];
|
||||
|
||||
while let Some(cpu) = cpus.first_one() {
|
||||
let core = self.cpu_core[cpu];
|
||||
|
||||
if (self.core_cpus[core].clone() & !cpus.clone()).count_ones() != 0 {
|
||||
bail!(
|
||||
"CPUs {} partially intersect with core {} ({})",
|
||||
cpus_to_match,
|
||||
core,
|
||||
self.core_cpus[core],
|
||||
);
|
||||
}
|
||||
|
||||
cpus &= !self.core_cpus[core].clone();
|
||||
cores.set(core, true);
|
||||
}
|
||||
|
||||
Ok(cores)
|
||||
}
|
||||
|
||||
fn free<'a>(&'a mut self, cpus_to_free: &BitVec) -> Result<()> {
|
||||
let cores = self.cpus_to_cores(cpus_to_free)?;
|
||||
if (self.available_cores.clone() & &cores).any() {
|
||||
bail!("Some of CPUs {} are already free", cpus_to_free);
|
||||
}
|
||||
self.available_cores |= cores;
|
||||
self.update_fallback_cpu();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn next_to_free<'a>(&'a self, cands: &BitVec) -> Result<Option<&'a BitVec>> {
|
||||
let last = match cands.last_one() {
|
||||
Some(ret) => ret,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let core = self.cpu_core[last];
|
||||
if (self.core_cpus[core].clone() & !cands.clone()).count_ones() != 0 {
|
||||
bail!(
|
||||
"CPUs{} partially intersect with core {} ({})",
|
||||
cands,
|
||||
core,
|
||||
self.core_cpus[core]
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Some(&self.core_cpus[core]))
|
||||
}
|
||||
|
||||
fn available_cpus_in_mask(&self, allowed_cpus: &BitVec) -> BitVec {
|
||||
let mut cpus = bitvec![0; self.nr_cpus];
|
||||
for core in self.available_cores.iter_ones() {
|
||||
let mut core_cpus = self.core_cpus[core].clone();
|
||||
core_cpus &= allowed_cpus;
|
||||
cpus |= core_cpus;
|
||||
}
|
||||
cpus
|
||||
}
|
||||
|
||||
fn get_core_topological_id(&self, core: &Core) -> usize {
|
||||
*self
|
||||
.core_topology_to_id
|
||||
.get(&(core.node_id, core.llc_id, core.id()))
|
||||
.expect("unrecognised core")
|
||||
}
|
||||
}
|
||||
|
||||
struct IteratorInterleaver<T>
|
||||
where
|
||||
T: Iterator,
|
||||
{
|
||||
empty: bool,
|
||||
index: usize,
|
||||
iters: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T> IteratorInterleaver<T>
|
||||
where
|
||||
T: Iterator,
|
||||
{
|
||||
fn new(iters: Vec<T>) -> Self {
|
||||
Self {
|
||||
empty: false,
|
||||
index: 0,
|
||||
iters,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Iterator for IteratorInterleaver<T>
|
||||
where
|
||||
T: Iterator,
|
||||
{
|
||||
type Item = T::Item;
|
||||
|
||||
fn next(&mut self) -> Option<T::Item> {
|
||||
if let Some(iter) = self.iters.get_mut(self.index) {
|
||||
self.index += 1;
|
||||
if let Some(value) = iter.next() {
|
||||
self.empty = false;
|
||||
Some(value)
|
||||
} else {
|
||||
self.next()
|
||||
}
|
||||
} else {
|
||||
self.index = 0;
|
||||
if self.empty {
|
||||
None
|
||||
} else {
|
||||
self.empty = true;
|
||||
self.next()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Layer {
|
||||
name: String,
|
||||
@ -1506,10 +1040,6 @@ impl Layer {
|
||||
})
|
||||
}
|
||||
|
||||
fn core_alloc_order(&self) -> &Vec<usize> {
|
||||
&self.core_order
|
||||
}
|
||||
|
||||
fn grow_confined_or_grouped(
|
||||
&mut self,
|
||||
cpu_pool: &mut CpuPool,
|
||||
@ -1542,7 +1072,10 @@ impl Layer {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let new_cpus = match cpu_pool.alloc_cpus(&self).clone() {
|
||||
let new_cpus = match cpu_pool
|
||||
.alloc_cpus(&self.allowed_cpus, &self.core_order)
|
||||
.clone()
|
||||
{
|
||||
Some(ret) => ret.clone(),
|
||||
None => {
|
||||
trace!("layer-{} can't grow, no CPUs", &self.name);
|
||||
|
Loading…
Reference in New Issue
Block a user