This commit is contained in:
Daniel Hodges 2024-11-18 07:46:38 -10:00 committed by GitHub
commit 24afdb0080
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 155 additions and 1 deletions

View File

@ -68,7 +68,7 @@ use std::ops::BitOrAssign;
use std::ops::BitXor;
use std::ops::BitXorAssign;
#[derive(Debug, Eq, Clone, Ord, PartialEq, PartialOrd)]
#[derive(Debug, Eq, Clone, Hash, Ord, PartialEq, PartialOrd)]
pub struct Cpumask {
mask: BitVec<u64, Lsb0>,
}
@ -146,6 +146,10 @@ impl Cpumask {
}
}
pub fn from_bitvec(bitvec: BitVec<u64, Lsb0>) -> Self {
Self { mask: bitvec }
}
/// Return a slice of u64's whose bits reflect the Cpumask.
pub fn as_raw_slice(&self) -> &[u64] {
self.mask.as_raw_slice()

View File

@ -88,5 +88,9 @@ pub use misc::monitor_stats;
pub use misc::normalize_load_metric;
pub use misc::set_rlimit_infinity;
mod netdev;
pub use netdev::read_netdevs;
pub use netdev::NetDev;
pub mod enums;
pub use enums::scx_enums;

View File

@ -0,0 +1,83 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2.
use std::collections::BTreeMap;
use std::fs;
use std::path::Path;
use crate::misc::read_file_usize;
use crate::Cpumask;
use anyhow::Result;
#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct NetDev {
pub iface: String,
pub node: usize,
pub irqs: BTreeMap<usize, Cpumask>,
pub irq_hints: BTreeMap<usize, Cpumask>,
}
impl NetDev {
pub fn apply_cpumasks(&self) -> Result<()> {
for (irq, cpumask) in self.irqs.iter() {
let irq_path = format!("/proc/irq/{}/smp_affinity", irq);
fs::write(irq_path, format!("{:#x}", cpumask))?
}
Ok(())
}
}
pub fn read_netdevs() -> Result<BTreeMap<String, NetDev>> {
let mut netdevs: BTreeMap<String, NetDev> = BTreeMap::new();
for entry in fs::read_dir("/sys/class/net")? {
let entry = entry?;
let iface = entry.file_name().to_string_lossy().into_owned();
let raw_path = format!("/sys/class/net/{}/device/msi_irqs", iface);
let msi_irqs_path = Path::new(&raw_path);
if !msi_irqs_path.exists() {
continue;
}
let node_path_raw = format!("/sys/class/net/{}/device/node", iface);
let node_path = Path::new(&node_path_raw);
let node = read_file_usize(node_path).unwrap_or(0);
let mut irqs = BTreeMap::new();
let mut irq_hints = BTreeMap::new();
for entry in fs::read_dir(msi_irqs_path)? {
let entry = entry.unwrap();
let irq = entry.file_name().to_string_lossy().into_owned();
if let Ok(irq) = irq.parse::<usize>() {
let affinity_raw_path = format!("/proc/irq/{}/smp_affinity", irq);
let smp_affinity_path = Path::new(&affinity_raw_path);
let smp_affinity = fs::read_to_string(smp_affinity_path)?
.replace(",", "")
.replace("\n", "");
let cpumask = Cpumask::from_str(&smp_affinity)?;
irqs.insert(irq, cpumask);
// affinity hints
let affinity_hint_raw_path = format!("/proc/irq/{}/affinity_hint", irq);
let affinity_hint_path = Path::new(&affinity_hint_raw_path);
let affinity_hint = fs::read_to_string(affinity_hint_path)?
.replace(",", "")
.replace("\n", "");
let hint_cpumask = Cpumask::from_str(&affinity_hint)?;
irq_hints.insert(irq, hint_cpumask);
}
}
netdevs.insert(
iface.clone(),
NetDev {
iface,
node,
irqs,
irq_hints,
},
);
}
Ok(netdevs)
}

View File

@ -222,6 +222,15 @@ impl CpuPool {
Ok(Some(&self.core_cpus[core]))
}
pub fn available_cpus(&self) -> BitVec<u64, Lsb0> {
let mut cpus = bitvec![u64, Lsb0; 0; self.nr_cpus];
for core in self.available_cores.iter_ones() {
let core_cpus = self.core_cpus[core].clone();
cpus |= core_cpus.as_bitslice();
}
cpus
}
pub fn available_cpus_in_mask(&self, allowed_cpus: &BitVec) -> BitVec {
let mut cpus = bitvec![0; self.nr_cpus];
for core in self.available_cores.iter_ones() {

View File

@ -44,6 +44,7 @@ use scx_utils::compat;
use scx_utils::import_enums;
use scx_utils::init_libbpf_logging;
use scx_utils::ravg::ravg_read;
use scx_utils::read_netdevs;
use scx_utils::scx_enums;
use scx_utils::scx_ops_attach;
use scx_utils::scx_ops_load;
@ -53,6 +54,7 @@ use scx_utils::uei_report;
use scx_utils::Cache;
use scx_utils::CoreType;
use scx_utils::LoadAggregator;
use scx_utils::NetDev;
use scx_utils::Topology;
use scx_utils::UserExitInfo;
use stats::LayerStats;
@ -471,6 +473,10 @@ struct Opts {
#[clap(long, default_value = "false")]
disable_antistall: bool,
/// Enable netdev IRQ balancing
#[clap(long, default_value = "false")]
netdev_irq_balance: bool,
/// Maximum task runnable_at delay (in seconds) before antistall turns on
#[clap(long, default_value = "3")]
antistall_sec: u64,
@ -1215,6 +1221,8 @@ struct Scheduler<'a> {
nr_layer_cpus_ranges: Vec<(usize, usize)>,
processing_dur: Duration,
topo: Topology,
netdevs: BTreeMap<String, NetDev>,
stats_server: StatsServer<StatsReq, StatsRes>,
}
@ -1399,6 +1407,12 @@ impl<'a> Scheduler<'a> {
let topo = Topology::new()?;
let cpu_pool = CpuPool::new(&topo)?;
let netdevs = if opts.netdev_irq_balance {
read_netdevs()?
} else {
BTreeMap::new()
};
let disable_topology = if let Some(val) = opts.disable_topology {
val
} else {
@ -1523,6 +1537,8 @@ impl<'a> Scheduler<'a> {
proc_reader,
skel,
topo,
netdevs,
stats_server,
};
@ -1542,6 +1558,43 @@ impl<'a> Scheduler<'a> {
bpf_layer.refresh_cpus = 1;
}
fn update_netdev_cpumasks(&mut self) -> Result<()> {
let available_cpus = self.cpu_pool.available_cpus();
if available_cpus.is_empty() {
return Ok(());
}
for (iface, netdev) in self.netdevs.iter_mut() {
let node = self
.topo
.nodes()
.into_iter()
.take_while(|n| n.id() == netdev.node)
.next()
.ok_or_else(|| anyhow!("Failed to get netdev node"))?;
let node_cpus = node.span();
for (irq, irqmask) in netdev.irqs.iter_mut() {
irqmask.clear();
for cpu in available_cpus.iter_ones() {
if !node_cpus.test_cpu(cpu) {
continue;
}
let _ = irqmask.set_cpu(cpu);
}
trace!("{} updating irq {} cpumask {:?}", iface, irq, irqmask);
// If no CPUs are available in the node then spread the load across the node
if irqmask.weight() == 0 {
for cpu in node_cpus.as_raw_bitvec().iter_ones() {
let _ = irqmask.set_cpu(cpu);
}
}
}
netdev.apply_cpumasks()?;
}
Ok(())
}
fn set_bpf_layer_preemption(layer: &mut Layer, bpf_layer: &mut types::layer, preempt: bool) {
layer.preempt = preempt;
bpf_layer.preempt.write(preempt);
@ -1656,6 +1709,7 @@ impl<'a> Scheduler<'a> {
}
}
let _ = self.update_netdev_cpumasks();
Ok(())
}