Merge branch 'main' into scx-fair

This commit is contained in:
Andrea Righi 2024-11-18 07:42:09 +01:00 committed by GitHub
commit 5b4b6df5e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 208 additions and 266 deletions

View File

@ -2631,7 +2631,8 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
bpf_for(i, 0, nr_possible_cpus) { bpf_for(i, 0, nr_possible_cpus) {
const volatile u8 *u8_ptr; const volatile u8 *u8_ptr;
init_antistall_dsq = bpf_map_lookup_percpu_elem(&antistall_cpu_dsq, &zero, i); init_antistall_dsq = bpf_map_lookup_percpu_elem(&antistall_cpu_dsq,
&zero, i);
if (init_antistall_dsq) { if (init_antistall_dsq) {
*init_antistall_dsq = SCX_DSQ_INVALID; *init_antistall_dsq = SCX_DSQ_INVALID;
} }

View File

@ -41,20 +41,20 @@ impl LayerSpec {
Ok(config.specs) Ok(config.specs)
} }
pub fn nodes(&self) -> Vec<usize> { pub fn nodes(&self) -> &Vec<usize> {
match &self.kind { &self.kind.common().nodes
LayerKind::Confined { nodes, .. }
| LayerKind::Open { nodes, .. }
| LayerKind::Grouped { nodes, .. } => nodes.clone(),
}
} }
pub fn llcs(&self) -> Vec<usize> { pub fn llcs(&self) -> &Vec<usize> {
match &self.kind { &self.kind.common().llcs
LayerKind::Confined { llcs, .. } }
| LayerKind::Open { llcs, .. }
| LayerKind::Grouped { llcs, .. } => llcs.clone(), pub fn nodes_mut(&mut self) -> &mut Vec<usize> {
} &mut self.kind.common_mut().nodes
}
pub fn llcs_mut(&mut self) -> &mut Vec<usize> {
&mut self.kind.common_mut().llcs
} }
} }
@ -73,91 +73,55 @@ pub enum LayerMatch {
TGIDEquals(u32), TGIDEquals(u32),
} }
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct LayerCommon {
#[serde(default)]
pub min_exec_us: u64,
#[serde(default)]
pub yield_ignore: f64,
#[serde(default)]
pub slice_us: u64,
#[serde(default)]
pub preempt: bool,
#[serde(default)]
pub preempt_first: bool,
#[serde(default)]
pub exclusive: bool,
#[serde(default)]
pub weight: u32,
#[serde(default)]
pub idle_smt: bool,
#[serde(default)]
pub growth_algo: LayerGrowthAlgo,
#[serde(default)]
pub perf: u64,
#[serde(default)]
pub nodes: Vec<usize>,
#[serde(default)]
pub llcs: Vec<usize>,
}
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub enum LayerKind { pub enum LayerKind {
Confined { Confined {
util_range: (f64, f64), util_range: (f64, f64),
#[serde(default)] #[serde(default)]
cpus_range: Option<(usize, usize)>, cpus_range: Option<(usize, usize)>,
#[serde(default)]
min_exec_us: u64, #[serde(flatten)]
#[serde(default)] common: LayerCommon,
yield_ignore: f64,
#[serde(default)]
slice_us: u64,
#[serde(default)]
preempt: bool,
#[serde(default)]
preempt_first: bool,
#[serde(default)]
exclusive: bool,
#[serde(default)]
weight: u32,
#[serde(default)]
idle_smt: bool,
#[serde(default)]
growth_algo: LayerGrowthAlgo,
#[serde(default)]
perf: u64,
#[serde(default)]
nodes: Vec<usize>,
#[serde(default)]
llcs: Vec<usize>,
}, },
Grouped { Grouped {
util_range: (f64, f64), util_range: (f64, f64),
#[serde(default)] #[serde(default)]
cpus_range: Option<(usize, usize)>, cpus_range: Option<(usize, usize)>,
#[serde(default)]
min_exec_us: u64, #[serde(flatten)]
#[serde(default)] common: LayerCommon,
yield_ignore: f64,
#[serde(default)]
slice_us: u64,
#[serde(default)]
preempt: bool,
#[serde(default)]
preempt_first: bool,
#[serde(default)]
exclusive: bool,
#[serde(default)]
weight: u32,
#[serde(default)]
idle_smt: bool,
#[serde(default)]
growth_algo: LayerGrowthAlgo,
#[serde(default)]
perf: u64,
#[serde(default)]
nodes: Vec<usize>,
#[serde(default)]
llcs: Vec<usize>,
}, },
Open { Open {
#[serde(default)] #[serde(flatten)]
min_exec_us: u64, common: LayerCommon,
#[serde(default)]
yield_ignore: f64,
#[serde(default)]
slice_us: u64,
#[serde(default)]
preempt: bool,
#[serde(default)]
preempt_first: bool,
#[serde(default)]
exclusive: bool,
#[serde(default)]
weight: u32,
#[serde(default)]
idle_smt: bool,
#[serde(default)]
growth_algo: LayerGrowthAlgo,
#[serde(default)]
perf: u64,
#[serde(default)]
nodes: Vec<usize>,
#[serde(default)]
llcs: Vec<usize>,
}, },
} }
@ -169,4 +133,20 @@ impl LayerKind {
LayerKind::Open { .. } => bpf_intf::layer_kind_LAYER_KIND_OPEN as i32, LayerKind::Open { .. } => bpf_intf::layer_kind_LAYER_KIND_OPEN as i32,
} }
} }
pub fn common(&self) -> &LayerCommon {
match self {
LayerKind::Confined { common, .. }
| LayerKind::Grouped { common, .. }
| LayerKind::Open { common, .. } => common,
}
}
pub fn common_mut(&mut self) -> &mut LayerCommon {
match self {
LayerKind::Confined { common, .. }
| LayerKind::Grouped { common, .. }
| LayerKind::Open { common, .. } => common,
}
}
} }

View File

@ -12,6 +12,7 @@ use std::collections::BTreeMap;
use anyhow::bail; use anyhow::bail;
use anyhow::Result; use anyhow::Result;
use bitvec::prelude::*; use bitvec::prelude::*;
pub use config::LayerCommon;
pub use config::LayerConfig; pub use config::LayerConfig;
pub use config::LayerKind; pub use config::LayerKind;
pub use config::LayerMatch; pub use config::LayerMatch;

View File

@ -28,6 +28,7 @@ use bitvec::prelude::*;
pub use bpf_skel::*; pub use bpf_skel::*;
use clap::Parser; use clap::Parser;
use crossbeam::channel::RecvTimeoutError; use crossbeam::channel::RecvTimeoutError;
use lazy_static::lazy_static;
use libbpf_rs::skel::OpenSkel; use libbpf_rs::skel::OpenSkel;
use libbpf_rs::skel::Skel; use libbpf_rs::skel::Skel;
use libbpf_rs::skel::SkelBuilder; use libbpf_rs::skel::SkelBuilder;
@ -74,108 +75,112 @@ const NR_LSTATS: usize = bpf_intf::layer_stat_idx_NR_LSTATS as usize;
const NR_LAYER_MATCH_KINDS: usize = bpf_intf::layer_match_kind_NR_LAYER_MATCH_KINDS as usize; const NR_LAYER_MATCH_KINDS: usize = bpf_intf::layer_match_kind_NR_LAYER_MATCH_KINDS as usize;
const MAX_LAYER_NAME: usize = bpf_intf::consts_MAX_LAYER_NAME as usize; const MAX_LAYER_NAME: usize = bpf_intf::consts_MAX_LAYER_NAME as usize;
#[rustfmt::skip] lazy_static! {
lazy_static::lazy_static! {
static ref NR_POSSIBLE_CPUS: usize = libbpf_rs::num_possible_cpus().unwrap(); static ref NR_POSSIBLE_CPUS: usize = libbpf_rs::num_possible_cpus().unwrap();
static ref USAGE_DECAY: f64 = 0.5f64.powf(1.0 / USAGE_HALF_LIFE_F64); static ref USAGE_DECAY: f64 = 0.5f64.powf(1.0 / USAGE_HALF_LIFE_F64);
static ref EXAMPLE_CONFIG: LayerConfig = static ref EXAMPLE_CONFIG: LayerConfig = LayerConfig {
LayerConfig { specs: vec![
specs: vec![ LayerSpec {
LayerSpec { name: "batch".into(),
name: "batch".into(), comment: Some("tasks under system.slice or tasks with nice value > 0".into()),
comment: Some("tasks under system.slice or tasks with nice value > 0".into()), matches: vec![
matches: vec![ vec![LayerMatch::CgroupPrefix("system.slice/".into())],
vec![LayerMatch::CgroupPrefix("system.slice/".into())], vec![LayerMatch::NiceAbove(0)],
vec![LayerMatch::NiceAbove(0)], ],
], kind: LayerKind::Confined {
kind: LayerKind::Confined { util_range: (0.8, 0.9),
cpus_range: Some((0, 16)), cpus_range: Some((0, 16)),
util_range: (0.8, 0.9), common: LayerCommon {
min_exec_us: 1000, min_exec_us: 1000,
yield_ignore: 0.0, yield_ignore: 0.0,
preempt: false, preempt: false,
preempt_first: false, preempt_first: false,
exclusive: false, exclusive: false,
idle_smt: false, idle_smt: false,
slice_us: 20000, slice_us: 20000,
weight: DEFAULT_LAYER_WEIGHT, weight: DEFAULT_LAYER_WEIGHT,
growth_algo: LayerGrowthAlgo::Sticky, growth_algo: LayerGrowthAlgo::Sticky,
perf: 1024, perf: 1024,
nodes: vec![], nodes: vec![],
llcs: vec![], llcs: vec![],
}, },
}, },
LayerSpec { },
name: "immediate".into(), LayerSpec {
comment: Some("tasks under workload.slice with nice value < 0".into()), name: "immediate".into(),
matches: vec![vec![ comment: Some("tasks under workload.slice with nice value < 0".into()),
LayerMatch::CgroupPrefix("workload.slice/".into()), matches: vec![vec![
LayerMatch::NiceBelow(0), LayerMatch::CgroupPrefix("workload.slice/".into()),
]], LayerMatch::NiceBelow(0),
kind: LayerKind::Open { ]],
min_exec_us: 100, kind: LayerKind::Open {
yield_ignore: 0.25, common: LayerCommon {
preempt: true, min_exec_us: 100,
preempt_first: false, yield_ignore: 0.25,
exclusive: true, preempt: true,
idle_smt: false, preempt_first: false,
exclusive: true,
idle_smt: false,
slice_us: 20000, slice_us: 20000,
weight: DEFAULT_LAYER_WEIGHT, weight: DEFAULT_LAYER_WEIGHT,
growth_algo: LayerGrowthAlgo::Sticky, growth_algo: LayerGrowthAlgo::Sticky,
perf: 1024, perf: 1024,
nodes: vec![], nodes: vec![],
llcs: vec![], llcs: vec![],
}, },
}, },
LayerSpec { },
name: "stress-ng".into(), LayerSpec {
comment: Some("stress-ng test layer".into()), name: "stress-ng".into(),
matches: vec![vec![ comment: Some("stress-ng test layer".into()),
LayerMatch::CommPrefix("stress-ng".into()), matches: vec![
], vec![LayerMatch::CommPrefix("stress-ng".into()),],
vec![ vec![LayerMatch::PcommPrefix("stress-ng".into()),]
LayerMatch::PcommPrefix("stress-ng".into()), ],
]], kind: LayerKind::Confined {
kind: LayerKind::Confined { cpus_range: None,
cpus_range: None, util_range: (0.2, 0.8),
min_exec_us: 800, common: LayerCommon {
yield_ignore: 0.0, min_exec_us: 800,
util_range: (0.2, 0.8), yield_ignore: 0.0,
preempt: true, preempt: true,
preempt_first: false, preempt_first: false,
exclusive: false, exclusive: false,
idle_smt: false, idle_smt: false,
slice_us: 800, slice_us: 800,
weight: DEFAULT_LAYER_WEIGHT, weight: DEFAULT_LAYER_WEIGHT,
growth_algo: LayerGrowthAlgo::Topo, growth_algo: LayerGrowthAlgo::Topo,
perf: 1024, perf: 1024,
nodes: vec![], nodes: vec![],
llcs: vec![], llcs: vec![],
}, },
}, },
LayerSpec { },
name: "normal".into(), LayerSpec {
comment: Some("the rest".into()), name: "normal".into(),
matches: vec![vec![]], comment: Some("the rest".into()),
kind: LayerKind::Grouped { matches: vec![vec![]],
cpus_range: None, kind: LayerKind::Grouped {
util_range: (0.5, 0.6), cpus_range: None,
min_exec_us: 200, util_range: (0.5, 0.6),
yield_ignore: 0.0, common: LayerCommon {
preempt: false, min_exec_us: 200,
preempt_first: false, yield_ignore: 0.0,
exclusive: false, preempt: false,
idle_smt: false, preempt_first: false,
exclusive: false,
idle_smt: false,
slice_us: 20000, slice_us: 20000,
weight: DEFAULT_LAYER_WEIGHT, weight: DEFAULT_LAYER_WEIGHT,
growth_algo: LayerGrowthAlgo::Linear, growth_algo: LayerGrowthAlgo::Linear,
perf: 1024, perf: 1024,
nodes: vec![], nodes: vec![],
llcs: vec![], llcs: vec![],
}, },
}, },
], },
}; ],
};
} }
/// scx_layered: A highly configurable multi-layer sched_ext scheduler /// scx_layered: A highly configurable multi-layer sched_ext scheduler
@ -923,8 +928,7 @@ impl Layer {
LayerKind::Confined { LayerKind::Confined {
cpus_range, cpus_range,
util_range, util_range,
nodes, common: LayerCommon { nodes, llcs, .. },
llcs,
.. ..
} => { } => {
let cpus_range = cpus_range.unwrap_or((0, std::usize::MAX)); let cpus_range = cpus_range.unwrap_or((0, std::usize::MAX));
@ -962,7 +966,14 @@ impl Layer {
bail!("invalid util_range {:?}", util_range); bail!("invalid util_range {:?}", util_range);
} }
} }
LayerKind::Grouped { nodes, llcs, .. } | LayerKind::Open { nodes, llcs, .. } => { LayerKind::Grouped {
common: LayerCommon { nodes, llcs, .. },
..
}
| LayerKind::Open {
common: LayerCommon { nodes, llcs, .. },
..
} => {
if nodes.len() == 0 && llcs.len() == 0 { if nodes.len() == 0 && llcs.len() == 0 {
allowed_cpus.fill(true); allowed_cpus.fill(true);
} else { } else {
@ -987,23 +998,13 @@ impl Layer {
} }
} }
let layer_growth_algo = match &kind { let layer_growth_algo = kind.common().growth_algo.clone();
LayerKind::Confined { growth_algo, .. } let preempt = kind.common().preempt;
| LayerKind::Grouped { growth_algo, .. }
| LayerKind::Open { growth_algo, .. } => growth_algo.clone(),
};
let preempt = match &kind {
LayerKind::Confined { preempt, .. }
| LayerKind::Grouped { preempt, .. }
| LayerKind::Open { preempt, .. } => preempt.clone(),
};
let core_order = layer_growth_algo.layer_core_order(cpu_pool, spec, idx, topo); let core_order = layer_growth_algo.layer_core_order(cpu_pool, spec, idx, topo);
debug!( debug!(
"layer: {} algo: {:?} core order: {:?}", "layer: {} algo: {:?} core order: {:?}",
name, name, &layer_growth_algo, core_order
layer_growth_algo.clone(),
core_order
); );
Ok(Self { Ok(Self {
@ -1289,8 +1290,8 @@ impl<'a> Scheduler<'a> {
layer.nr_match_ors = spec.matches.len() as u32; layer.nr_match_ors = spec.matches.len() as u32;
layer.kind = spec.kind.as_bpf_enum(); layer.kind = spec.kind.as_bpf_enum();
match &spec.kind { {
LayerKind::Confined { let LayerCommon {
min_exec_us, min_exec_us,
yield_ignore, yield_ignore,
perf, perf,
@ -1303,70 +1304,42 @@ impl<'a> Scheduler<'a> {
slice_us, slice_us,
weight, weight,
.. ..
} } = spec.kind.common();
| LayerKind::Grouped {
min_exec_us, layer.slice_ns = if *slice_us > 0 {
yield_ignore, *slice_us * 1000
perf, } else {
preempt, opts.slice_us * 1000
preempt_first, };
exclusive, layer.min_exec_ns = min_exec_us * 1000;
idle_smt, layer.yield_step_ns = if *yield_ignore > 0.999 {
growth_algo, 0
nodes, } else if *yield_ignore < 0.001 {
slice_us, layer.slice_ns
weight, } else {
.. (layer.slice_ns as f64 * (1.0 - *yield_ignore)) as u64
} };
| LayerKind::Open { let mut layer_name: String = spec.name.clone();
min_exec_us, layer_name.truncate(MAX_LAYER_NAME);
yield_ignore, copy_into_cstr(&mut layer.name, layer_name.as_str());
perf, layer.preempt.write(*preempt);
preempt, layer.preempt_first.write(*preempt_first);
preempt_first, layer.exclusive.write(*exclusive);
exclusive, layer.idle_smt.write(*idle_smt);
idle_smt, layer.growth_algo = growth_algo.as_bpf_enum();
growth_algo, layer.weight = if *weight <= MAX_LAYER_WEIGHT && *weight >= MIN_LAYER_WEIGHT {
nodes, *weight
slice_us, } else {
weight, DEFAULT_LAYER_WEIGHT
.. };
} => { layer_weights.push(layer.weight.try_into().unwrap());
layer.slice_ns = if *slice_us > 0 { layer.perf = u32::try_from(*perf)?;
*slice_us * 1000 layer.node_mask = nodemask_from_nodes(nodes) as u64;
} else { for topo_node in topo.nodes() {
opts.slice_us * 1000 if !nodes.contains(&topo_node.id()) {
}; continue;
layer.min_exec_ns = min_exec_us * 1000;
layer.yield_step_ns = if *yield_ignore > 0.999 {
0
} else if *yield_ignore < 0.001 {
layer.slice_ns
} else {
(layer.slice_ns as f64 * (1.0 - *yield_ignore)) as u64
};
let mut layer_name: String = spec.name.clone();
layer_name.truncate(MAX_LAYER_NAME);
copy_into_cstr(&mut layer.name, layer_name.as_str());
layer.preempt.write(*preempt);
layer.preempt_first.write(*preempt_first);
layer.exclusive.write(*exclusive);
layer.idle_smt.write(*idle_smt);
layer.growth_algo = growth_algo.as_bpf_enum();
layer.weight = if *weight <= MAX_LAYER_WEIGHT && *weight >= MIN_LAYER_WEIGHT {
*weight
} else {
DEFAULT_LAYER_WEIGHT
};
layer_weights.push(layer.weight.try_into().unwrap());
layer.perf = u32::try_from(*perf)?;
layer.node_mask = nodemask_from_nodes(nodes) as u64;
for topo_node in topo.nodes() {
if !nodes.contains(&topo_node.id()) {
continue;
}
layer.cache_mask |= cachemask_from_llcs(&topo_node.llcs()) as u64;
} }
layer.cache_mask |= cachemask_from_llcs(&topo_node.llcs()) as u64;
} }
} }
@ -1449,14 +1422,8 @@ impl<'a> Scheduler<'a> {
.into_iter() .into_iter()
.cloned() .cloned()
.map(|mut s| { .map(|mut s| {
match &mut s.kind { s.kind.common_mut().nodes.clear();
LayerKind::Confined { nodes, llcs, .. } s.kind.common_mut().llcs.clear();
| LayerKind::Open { nodes, llcs, .. }
| LayerKind::Grouped { nodes, llcs, .. } => {
nodes.truncate(0);
llcs.truncate(0);
}
};
s s
}) })
.collect() .collect()

View File

@ -25,7 +25,6 @@ use serde::Serialize;
use crate::bpf_intf; use crate::bpf_intf;
use crate::BpfStats; use crate::BpfStats;
use crate::Layer; use crate::Layer;
use crate::LayerKind;
use crate::Stats; use crate::Stats;
fn fmt_pct(v: f64) -> String { fn fmt_pct(v: f64) -> String {
@ -174,12 +173,6 @@ impl LayerStats {
if b != 0.0 { a / b * 100.0 } else { 0.0 } if b != 0.0 { a / b * 100.0 } else { 0.0 }
}; };
let is_excl = match &layer.kind {
LayerKind::Confined { exclusive, .. }
| LayerKind::Grouped { exclusive, .. }
| LayerKind::Open { exclusive, .. } => *exclusive,
} as u32;
Self { Self {
util: stats.layer_utils[lidx] * 100.0, util: stats.layer_utils[lidx] * 100.0,
util_frac: calc_frac(stats.layer_utils[lidx], stats.total_util), util_frac: calc_frac(stats.layer_utils[lidx], stats.total_util),
@ -206,7 +199,7 @@ impl LayerStats {
keep: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_KEEP), keep: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_KEEP),
keep_fail_max_exec: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_KEEP_FAIL_MAX_EXEC), keep_fail_max_exec: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_KEEP_FAIL_MAX_EXEC),
keep_fail_busy: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_KEEP_FAIL_BUSY), keep_fail_busy: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_KEEP_FAIL_BUSY),
is_excl, is_excl: layer.kind.common().exclusive as u32,
excl_collision: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_EXCL_COLLISION), excl_collision: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_EXCL_COLLISION),
excl_preempt: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_EXCL_PREEMPT), excl_preempt: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_EXCL_PREEMPT),
kick: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_KICK), kick: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_KICK),