Merge pull request #436 from danielocfb/topic/inlined-skeleton-members

rust Updated libbpf-rs & libbpf-cargo to 0.24
This commit is contained in:
Tejun Heo 2024-08-08 14:41:47 -10:00 committed by GitHub
commit b264787dde
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 189 additions and 156 deletions

View File

@ -10,7 +10,7 @@ description = "Framework to implement sched_ext schedulers running in user space
[dependencies]
anyhow = "1.0.65"
plain = "0.2.3"
libbpf-rs = "0.23.1"
libbpf-rs = "0.24"
libc = "0.2.137"
scx_utils = { path = "../scx_utils", version = "1.0.2" }

View File

@ -3,6 +3,8 @@
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2.
use std::mem::MaybeUninit;
use crate::bpf_intf;
use crate::bpf_skel::*;
@ -14,6 +16,7 @@ use anyhow::Result;
use plain::Plain;
use libbpf_rs::OpenObject;
use libbpf_rs::skel::OpenSkel;
use libbpf_rs::skel::Skel;
use libbpf_rs::skel::SkelBuilder;
@ -189,6 +192,7 @@ fn is_smt_active() -> std::io::Result<bool> {
impl<'cb> BpfScheduler<'cb> {
pub fn init(
open_object: &'cb mut MaybeUninit<OpenObject>,
exit_dump_len: u32,
partial: bool,
slice_us: u64,
@ -200,7 +204,7 @@ impl<'cb> BpfScheduler<'cb> {
// Open the BPF prog first for verification.
let mut skel_builder = BpfSkelBuilder::default();
skel_builder.obj_builder.debug(verbose);
let mut skel = scx_ops_open!(skel_builder, rustland)?;
let mut skel = scx_ops_open!(skel_builder, open_object, rustland)?;
// Lock all the memory to prevent page faults that could trigger potential deadlocks during
// scheduling.
@ -242,7 +246,7 @@ impl<'cb> BpfScheduler<'cb> {
}
// Check host topology to determine if we need to enable SMT capabilities.
skel.rodata_mut().smt_enabled = is_smt_active()?;
skel.maps.rodata_data.smt_enabled = is_smt_active()?;
// Set scheduler options (defined in the BPF part).
if partial {
@ -250,26 +254,26 @@ impl<'cb> BpfScheduler<'cb> {
}
skel.struct_ops.rustland_mut().exit_dump_len = exit_dump_len;
skel.bss_mut().usersched_pid = std::process::id();
skel.rodata_mut().slice_ns = slice_us * 1000;
skel.rodata_mut().full_user = full_user;
skel.rodata_mut().low_power = low_power;
skel.rodata_mut().debug = debug;
skel.maps.bss_data.usersched_pid = std::process::id();
skel.maps.rodata_data.slice_ns = slice_us * 1000;
skel.maps.rodata_data.full_user = full_user;
skel.maps.rodata_data.low_power = low_power;
skel.maps.rodata_data.debug = debug;
// Attach BPF scheduler.
let mut skel = scx_ops_load!(skel, rustland, uei)?;
let struct_ops = Some(scx_ops_attach!(skel, rustland)?);
// Build the ring buffer of queued tasks.
let maps = skel.maps();
let queued_ring_buffer = maps.queued();
let maps = &skel.maps;
let queued_ring_buffer = &maps.queued;
let mut rbb = libbpf_rs::RingBufferBuilder::new();
rbb.add(queued_ring_buffer, callback)
.expect("failed to add ringbuf callback");
let queued = rbb.build().expect("failed to build ringbuf");
// Build the user ring buffer of dispatched tasks.
let dispatched = libbpf_rs::UserRingBuffer::new(&maps.dispatched())
let dispatched = libbpf_rs::UserRingBuffer::new(&maps.dispatched)
.expect("failed to create user ringbuf");
// Make sure to use the SCHED_EXT class at least for the scheduler itself.
@ -297,71 +301,71 @@ impl<'cb> BpfScheduler<'cb> {
// busy loop, causing unnecessary high CPU consumption.
pub fn update_tasks(&mut self, nr_queued: Option<u64>, nr_scheduled: Option<u64>) {
if let Some(queued) = nr_queued {
self.skel.bss_mut().nr_queued = queued;
self.skel.maps.bss_data.nr_queued = queued;
}
if let Some(scheduled) = nr_scheduled {
self.skel.bss_mut().nr_scheduled = scheduled;
self.skel.maps.bss_data.nr_scheduled = scheduled;
}
}
// Counter of the online CPUs.
#[allow(dead_code)]
pub fn nr_online_cpus_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_online_cpus
&mut self.skel.maps.bss_data.nr_online_cpus
}
// Counter of currently running tasks.
#[allow(dead_code)]
pub fn nr_running_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_running
&mut self.skel.maps.bss_data.nr_running
}
// Counter of queued tasks.
#[allow(dead_code)]
pub fn nr_queued_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_queued
&mut self.skel.maps.bss_data.nr_queued
}
// Counter of scheduled tasks.
#[allow(dead_code)]
pub fn nr_scheduled_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_scheduled
&mut self.skel.maps.bss_data.nr_scheduled
}
// Counter of user dispatch events.
#[allow(dead_code)]
pub fn nr_user_dispatches_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_user_dispatches
&mut self.skel.maps.bss_data.nr_user_dispatches
}
// Counter of user kernel events.
#[allow(dead_code)]
pub fn nr_kernel_dispatches_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_kernel_dispatches
&mut self.skel.maps.bss_data.nr_kernel_dispatches
}
// Counter of cancel dispatch events.
#[allow(dead_code)]
pub fn nr_cancel_dispatches_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_cancel_dispatches
&mut self.skel.maps.bss_data.nr_cancel_dispatches
}
// Counter of dispatches bounced to the shared DSQ.
#[allow(dead_code)]
pub fn nr_bounce_dispatches_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_bounce_dispatches
&mut self.skel.maps.bss_data.nr_bounce_dispatches
}
// Counter of failed dispatch events.
#[allow(dead_code)]
pub fn nr_failed_dispatches_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_failed_dispatches
&mut self.skel.maps.bss_data.nr_failed_dispatches
}
// Counter of scheduler congestion events.
#[allow(dead_code)]
pub fn nr_sched_congested_mut(&mut self) -> &mut u64 {
&mut self.skel.bss_mut().nr_sched_congested
&mut self.skel.maps.bss_data.nr_sched_congested
}
// Set scheduling class for the scheduler itself to SCHED_EXT

View File

@ -16,8 +16,8 @@ bindgen = ">=0.68, <0.70"
glob = "0.3"
hex = "0.4.3"
lazy_static = "1.4"
libbpf-cargo = "0.23.1"
libbpf-rs = "0.23.1"
libbpf-cargo = "0.24.1"
libbpf-rs = "0.24.1"
log = "0.4.17"
paste = "1.0"
regex = "1.10"

View File

@ -177,11 +177,11 @@ pub fn check_min_requirements() -> Result<()> {
#[rustfmt::skip]
#[macro_export]
macro_rules! scx_ops_open {
($builder: expr, $ops: ident) => { 'block: {
($builder: expr, $obj_ref: expr, $ops: ident) => { 'block: {
scx_utils::paste! {
scx_utils::unwrap_or_break!(scx_utils::compat::check_min_requirements(), 'block);
let mut skel = match $builder.open().context("Failed to open BPF program") {
let mut skel = match $builder.open($obj_ref).context("Failed to open BPF program") {
Ok(val) => val,
Err(e) => break 'block Err(e),
};
@ -239,8 +239,8 @@ macro_rules! scx_ops_attach {
.context("Failed to attach non-struct_ops BPF programs")
.and_then(|_| {
$skel
.maps_mut()
.$ops()
.maps
.$ops
.attach_struct_ops()
.context("Failed to attach struct_ops BPF programs")
})

View File

@ -51,7 +51,7 @@ pub enum ScxConsts {
macro_rules! uei_read {
($skel: expr, $uei:ident) => {{
scx_utils::paste! {
let bpf_uei = $skel.data().$uei;
let bpf_uei = $skel.maps.data_data.$uei;
let bpf_dump = scx_utils::UEI_DUMP_PTR_MUTEX.lock().unwrap().ptr;
let exit_code_ptr = match scx_utils::compat::struct_has_field("scx_exit_info", "exit_code") {
Ok(true) => &bpf_uei.exit_code as *const _,
@ -80,14 +80,14 @@ macro_rules! uei_set_size {
0 => scx_utils::ScxConsts::ExitDumpDflLen as u32,
v => v,
};
$skel.rodata_mut().[<$uei _dump_len>] = len;
$skel.maps_mut().[<data_ $uei _dump>]().set_value_size(len).unwrap();
$skel.maps.rodata_data.[<$uei _dump_len>] = len;
$skel.maps.[<data_ $uei _dump>].set_value_size(len).unwrap();
let mut ptr = scx_utils::UEI_DUMP_PTR_MUTEX.lock().unwrap();
*ptr = scx_utils::UeiDumpPtr { ptr:
$skel
.maps()
.[<data_ $uei _dump>]()
.maps
.[<data_ $uei _dump>]
.initial_value()
.unwrap()
.as_ptr() as *const _,
@ -101,7 +101,7 @@ macro_rules! uei_set_size {
#[macro_export]
macro_rules! uei_exited {
($skel: expr, $uei:ident) => {{
let bpf_uei = $skel.data().uei;
let bpf_uei = $skel.maps.data_data.uei;
(unsafe { std::ptr::read_volatile(&bpf_uei.kind as *const _) } != 0)
}};
}

View File

@ -10,7 +10,7 @@ license = "GPL-2.0-only"
anyhow = "1.0.65"
ctrlc = { version = "3.1", features = ["termination"] }
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
libbpf-rs = "0.23.1"
libbpf-rs = "0.24.1"
log = "0.4.17"
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
simplelog = "0.12"

View File

@ -12,6 +12,7 @@ pub use bpf_intf::*;
use std::fs::File;
use std::io::Read;
use std::mem::MaybeUninit;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::sync::Arc;
@ -30,6 +31,7 @@ use metrics_exporter_prometheus::PrometheusBuilder;
use rlimit::{getrlimit, setrlimit, Resource};
use libbpf_rs::OpenObject;
use libbpf_rs::skel::OpenSkel;
use libbpf_rs::skel::Skel;
use libbpf_rs::skel::SkelBuilder;
@ -169,7 +171,7 @@ struct Scheduler<'a> {
}
impl<'a> Scheduler<'a> {
fn init(opts: &'a Opts) -> Result<Self> {
fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
let (soft_limit, _) = getrlimit(Resource::MEMLOCK).unwrap();
setrlimit(Resource::MEMLOCK, soft_limit, rlimit::INFINITY).unwrap();
@ -191,19 +193,19 @@ impl<'a> Scheduler<'a> {
// Initialize BPF connector.
let mut skel_builder = BpfSkelBuilder::default();
skel_builder.obj_builder.debug(opts.verbose);
let mut skel = scx_ops_open!(skel_builder, bpfland_ops)?;
let mut skel = scx_ops_open!(skel_builder, open_object, bpfland_ops)?;
skel.struct_ops.bpfland_ops_mut().exit_dump_len = opts.exit_dump_len;
// Override default BPF scheduling parameters.
skel.rodata_mut().debug = opts.debug;
skel.rodata_mut().smt_enabled = smt_enabled;
skel.rodata_mut().local_kthreads = opts.local_kthreads;
skel.rodata_mut().slice_ns = opts.slice_us * 1000;
skel.rodata_mut().slice_ns_min = opts.slice_us_min * 1000;
skel.rodata_mut().slice_ns_lag = opts.slice_us_lag * 1000;
skel.rodata_mut().starvation_thresh_ns = opts.starvation_thresh_us * 1000;
skel.rodata_mut().nvcsw_max_thresh = opts.nvcsw_max_thresh;
skel.maps.rodata_data.debug = opts.debug;
skel.maps.rodata_data.smt_enabled = smt_enabled;
skel.maps.rodata_data.local_kthreads = opts.local_kthreads;
skel.maps.rodata_data.slice_ns = opts.slice_us * 1000;
skel.maps.rodata_data.slice_ns_min = opts.slice_us_min * 1000;
skel.maps.rodata_data.slice_ns_lag = opts.slice_us_lag * 1000;
skel.maps.rodata_data.starvation_thresh_ns = opts.starvation_thresh_us * 1000;
skel.maps.rodata_data.nvcsw_max_thresh = opts.nvcsw_max_thresh;
// Attach the scheduler.
let mut skel = scx_ops_load!(skel, bpfland_ops, uei)?;
@ -231,14 +233,14 @@ impl<'a> Scheduler<'a> {
}
fn update_stats(&mut self) {
let nr_cpus = self.skel.bss().nr_online_cpus;
let nr_running = self.skel.bss().nr_running;
let nr_interactive = self.skel.bss().nr_interactive;
let nr_waiting = self.skel.bss().nr_waiting;
let nvcsw_avg_thresh = self.skel.bss().nvcsw_avg_thresh;
let nr_direct_dispatches = self.skel.bss().nr_direct_dispatches;
let nr_prio_dispatches = self.skel.bss().nr_prio_dispatches;
let nr_shared_dispatches = self.skel.bss().nr_shared_dispatches;
let nr_cpus = self.skel.maps.bss_data.nr_online_cpus;
let nr_running = self.skel.maps.bss_data.nr_running;
let nr_interactive = self.skel.maps.bss_data.nr_interactive;
let nr_waiting = self.skel.maps.bss_data.nr_waiting;
let nvcsw_avg_thresh = self.skel.maps.bss_data.nvcsw_avg_thresh;
let nr_direct_dispatches = self.skel.maps.bss_data.nr_direct_dispatches;
let nr_prio_dispatches = self.skel.maps.bss_data.nr_prio_dispatches;
let nr_shared_dispatches = self.skel.maps.bss_data.nr_shared_dispatches;
// Update Prometheus statistics.
self.metrics
@ -328,8 +330,9 @@ fn main() -> Result<()> {
})
.context("Error setting Ctrl-C handler")?;
let mut open_object = MaybeUninit::uninit();
loop {
let mut sched = Scheduler::init(&opts)?;
let mut sched = Scheduler::init(&opts, &mut open_object)?;
if !sched.run(shutdown.clone())?.should_restart() {
break;
}

View File

@ -13,7 +13,7 @@ clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
ctrlc = { version = "3.1", features = ["termination"] }
fb_procfs = "0.7"
hex = "0.4.3"
libbpf-rs = "0.23.1"
libbpf-rs = "0.24.1"
libc = "0.2.137"
log = "0.4.17"
ordered-float = "3.4.0"

View File

@ -12,6 +12,7 @@ pub mod bpf_intf;
pub use bpf_intf::*;
use std::mem;
use std::mem::MaybeUninit;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::time::Duration;
@ -24,6 +25,7 @@ use std::str;
use anyhow::Context;
use anyhow::Result;
use clap::Parser;
use libbpf_rs::OpenObject;
use libbpf_rs::skel::OpenSkel;
use libbpf_rs::skel::Skel;
use libbpf_rs::skel::SkelBuilder;
@ -230,7 +232,10 @@ struct Scheduler<'a> {
}
impl<'a> Scheduler<'a> {
fn init(opts: &'a Opts) -> Result<Self> {
fn init(
opts: &'a Opts,
open_object: &'a mut MaybeUninit<OpenObject>,
) -> Result<Self> {
// Increase MEMLOCK size since the BPF scheduler might use
// more than the current limit
let (soft_limit, _) = getrlimit(Resource::MEMLOCK).unwrap();
@ -239,23 +244,23 @@ impl<'a> Scheduler<'a> {
// Open the BPF prog first for verification.
let mut skel_builder = BpfSkelBuilder::default();
skel_builder.obj_builder.debug(opts.verbose > 0);
let mut skel = scx_ops_open!(skel_builder, lavd_ops)?;
let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops)?;
// Initialize CPU order topologically sorted by a cpu, node, llc, max_freq, and core order
let topo = FlatTopology::new(opts.prefer_smt_core).expect("Failed to build host topology");
for (pos, cpu) in topo.cpu_fids().iter().enumerate() {
skel.rodata_mut().cpu_order[pos] = cpu.cpu_id as u16;
skel.rodata_mut().__cpu_capacity_hint[cpu.cpu_id] = cpu.cpu_cap as u16;
skel.maps.rodata_data.cpu_order[pos] = cpu.cpu_id as u16;
skel.maps.rodata_data.__cpu_capacity_hint[cpu.cpu_id] = cpu.cpu_cap as u16;
}
debug!("{}", topo);
// Initialize skel according to @opts.
let nr_cpus_onln = topo.nr_cpus_online() as u64;
skel.bss_mut().nr_cpus_onln = nr_cpus_onln;
skel.maps.bss_data.nr_cpus_onln = nr_cpus_onln;
skel.struct_ops.lavd_ops_mut().exit_dump_len = opts.exit_dump_len;
skel.rodata_mut().no_core_compaction = opts.no_core_compaction;
skel.rodata_mut().no_freq_scaling = opts.no_freq_scaling;
skel.rodata_mut().verbose = opts.verbose;
skel.maps.rodata_data.no_core_compaction = opts.no_core_compaction;
skel.maps.rodata_data.no_freq_scaling = opts.no_freq_scaling;
skel.maps.rodata_data.verbose = opts.verbose;
let intrspc = introspec::init(opts);
// Attach.
@ -263,8 +268,7 @@ impl<'a> Scheduler<'a> {
let struct_ops = Some(scx_ops_attach!(skel, lavd_ops)?);
// Build a ring buffer for instrumentation
let mut maps = skel.maps_mut();
let rb_map = maps.introspec_msg();
let rb_map = &mut skel.maps.introspec_msg;
let mut builder = libbpf_rs::RingBufferBuilder::new();
builder.add(rb_map, Scheduler::print_bpf_msg).unwrap();
let rb_mgr = builder.build().unwrap();
@ -390,9 +394,9 @@ impl<'a> Scheduler<'a> {
}
self.intrspc.requested = true as u8;
self.skel.bss_mut().intrspc.cmd = self.intrspc.cmd;
self.skel.bss_mut().intrspc.arg = self.intrspc.arg;
self.skel.bss_mut().intrspc.requested = self.intrspc.requested;
self.skel.maps.bss_data.intrspc.cmd = self.intrspc.cmd;
self.skel.maps.bss_data.intrspc.arg = self.intrspc.arg;
self.skel.maps.bss_data.intrspc.requested = self.intrspc.requested;
interval_ms
}
@ -472,8 +476,9 @@ fn main() -> Result<()> {
init_log(&opts);
init_signal_handlers();
let mut open_object = MaybeUninit::uninit();
loop {
let mut sched = Scheduler::init(&opts)?;
let mut sched = Scheduler::init(&opts, &mut open_object)?;
info!(
"scx_lavd scheduler is initialized (build ID: {})",
*build_id::SCX_FULL_VERSION

View File

@ -13,7 +13,7 @@ clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
ctrlc = { version = "3.1", features = ["termination"] }
fb_procfs = "0.7"
lazy_static = "1.4"
libbpf-rs = "0.23.1"
libbpf-rs = "0.24.1"
libc = "0.2.137"
log = "0.4.17"
prometheus-client = "0.19"

View File

@ -14,6 +14,7 @@ use std::ffi::CString;
use std::fs;
use std::io::Read;
use std::io::Write;
use std::mem::MaybeUninit;
use std::ops::Sub;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
@ -28,6 +29,8 @@ use anyhow::Context;
use anyhow::Result;
use bitvec::prelude::*;
use clap::Parser;
use libbpf_rs::MapCore as _;
use libbpf_rs::OpenObject;
use libbpf_rs::skel::OpenSkel;
use libbpf_rs::skel::Skel;
use libbpf_rs::skel::SkelBuilder;
@ -558,8 +561,8 @@ fn cachemask_from_llcs(llcs: &BTreeMap<usize, Cache>) -> usize {
fn read_cpu_ctxs(skel: &BpfSkel) -> Result<Vec<bpf_intf::cpu_ctx>> {
let mut cpu_ctxs = vec![];
let cpu_ctxs_vec = skel
.maps()
.cpu_ctxs()
.maps
.cpu_ctxs
.lookup_percpu(&0u32.to_ne_bytes(), libbpf_rs::MapFlags::ANY)
.context("Failed to lookup cpu_ctx")?
.unwrap();
@ -651,7 +654,7 @@ impl Stats {
fn read_layer_loads(skel: &mut BpfSkel, nr_layers: usize) -> (f64, Vec<f64>) {
let now_mono = now_monotonic();
let layer_loads: Vec<f64> = skel
.bss()
.maps.bss_data
.layers
.iter()
.take(nr_layers)
@ -684,7 +687,7 @@ impl Stats {
}
fn new(skel: &mut BpfSkel, proc_reader: &procfs::ProcReader) -> Result<Self> {
let nr_layers = skel.rodata().nr_layers as usize;
let nr_layers = skel.maps.rodata_data.nr_layers as usize;
let bpf_stats = BpfStats::read(&read_cpu_ctxs(skel)?, nr_layers);
Ok(Self {
@ -718,7 +721,7 @@ impl Stats {
let cpu_ctxs = read_cpu_ctxs(skel)?;
let nr_layer_tasks: Vec<usize> = skel
.bss()
.maps.bss_data
.layers
.iter()
.take(self.nr_layers)
@ -1266,11 +1269,11 @@ struct Scheduler<'a, 'b> {
impl<'a, 'b> Scheduler<'a, 'b> {
fn init_layers(skel: &mut OpenBpfSkel, opts: &Opts, specs: &Vec<LayerSpec>, topo: &Topology) -> Result<()> {
skel.rodata_mut().nr_layers = specs.len() as u32;
skel.maps.rodata_data.nr_layers = specs.len() as u32;
let mut perf_set = false;
for (spec_i, spec) in specs.iter().enumerate() {
let layer = &mut skel.bss_mut().layers[spec_i];
let layer = &mut skel.maps.bss_data.layers[spec_i];
for (or_i, or) in spec.matches.iter().enumerate() {
for (and_i, and) in or.iter().enumerate() {
@ -1386,26 +1389,26 @@ impl<'a, 'b> Scheduler<'a, 'b> {
}
fn init_nodes(skel: &mut OpenBpfSkel, _opts: &Opts, topo: &Topology) {
skel.rodata_mut().nr_nodes = topo.nodes().len() as u32;
skel.rodata_mut().nr_llcs = 0;
skel.maps.rodata_data.nr_nodes = topo.nodes().len() as u32;
skel.maps.rodata_data.nr_llcs = 0;
for node in topo.nodes() {
info!("configuring node {}, LLCs {:?}", node.id(), node.llcs().len());
skel.rodata_mut().nr_llcs += node.llcs().len() as u32;
skel.maps.rodata_data.nr_llcs += node.llcs().len() as u32;
for (_, llc) in node.llcs() {
info!("configuring llc {:?} for node {:?}", llc.id(), node.id());
skel.rodata_mut().llc_numa_id_map[llc.id()] = node.id() as u32;
skel.maps.rodata_data.llc_numa_id_map[llc.id()] = node.id() as u32;
}
}
for (_, cpu) in topo.cpus() {
skel.rodata_mut().cpu_llc_id_map[cpu.id()] = cpu.llc_id() as u32;
skel.maps.rodata_data.cpu_llc_id_map[cpu.id()] = cpu.llc_id() as u32;
}
}
fn init(opts: &Opts, layer_specs: &'b Vec<LayerSpec>) -> Result<Self> {
fn init(opts: &Opts, layer_specs: &'b Vec<LayerSpec>, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
let nr_layers = layer_specs.len();
let topo = Topology::new()?;
let cpu_pool = CpuPool::new()?;
@ -1414,7 +1417,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
let mut skel_builder = BpfSkelBuilder::default();
skel_builder.obj_builder.debug(opts.verbose > 1);
init_libbpf_logging(None);
let mut skel = scx_ops_open!(skel_builder, layered)?;
let mut skel = scx_ops_open!(skel_builder, open_object, layered)?;
// scheduler_tick() got renamed to sched_tick() during v6.10-rc.
let sched_tick_name = match compat::ksym_exists("sched_tick")? {
@ -1422,29 +1425,29 @@ impl<'a, 'b> Scheduler<'a, 'b> {
false => "scheduler_tick",
};
skel.progs_mut()
.sched_tick_fentry()
skel.progs
.sched_tick_fentry
.set_attach_target(0, Some(sched_tick_name.into()))
.context("Failed to set attach target for sched_tick_fentry()")?;
// Initialize skel according to @opts.
skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len;
skel.rodata_mut().debug = opts.verbose as u32;
skel.rodata_mut().slice_ns = opts.slice_us * 1000;
skel.rodata_mut().max_exec_ns = if opts.max_exec_us > 0 {
skel.maps.rodata_data.debug = opts.verbose as u32;
skel.maps.rodata_data.slice_ns = opts.slice_us * 1000;
skel.maps.rodata_data.max_exec_ns = if opts.max_exec_us > 0 {
opts.max_exec_us * 1000
} else {
opts.slice_us * 1000 * 20
};
skel.rodata_mut().nr_possible_cpus = *NR_POSSIBLE_CPUS as u32;
skel.rodata_mut().smt_enabled = cpu_pool.nr_cpus > cpu_pool.nr_cores;
skel.rodata_mut().disable_topology = opts.disable_topology;
skel.maps.rodata_data.nr_possible_cpus = *NR_POSSIBLE_CPUS as u32;
skel.maps.rodata_data.smt_enabled = cpu_pool.nr_cpus > cpu_pool.nr_cores;
skel.maps.rodata_data.disable_topology = opts.disable_topology;
for (cpu, sib) in cpu_pool.sibling_cpu.iter().enumerate() {
skel.rodata_mut().__sibling_cpu[cpu] = *sib;
skel.maps.rodata_data.__sibling_cpu[cpu] = *sib;
}
for cpu in cpu_pool.all_cpus.iter_ones() {
skel.rodata_mut().all_cpus[cpu / 8] |= 1 << (cpu % 8);
skel.maps.rodata_data.all_cpus[cpu / 8] |= 1 << (cpu % 8);
}
Self::init_layers(&mut skel, opts, layer_specs, &topo)?;
Self::init_nodes(&mut skel, opts, &topo);
@ -1497,7 +1500,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
Ok(sched)
}
fn update_bpf_layer_cpumask(layer: &Layer, bpf_layer: &mut bpf_types::layer) {
fn update_bpf_layer_cpumask(layer: &Layer, bpf_layer: &mut types::layer) {
for bit in 0..layer.cpus.len() {
if layer.cpus[bit] {
bpf_layer.cpus[bit / 8] |= 1 << (bit % 8);
@ -1542,7 +1545,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
{
Self::update_bpf_layer_cpumask(
&self.layers[idx],
&mut self.skel.bss_mut().layers[idx],
&mut self.skel.maps.bss_data.layers[idx],
);
updated = true;
}
@ -1554,7 +1557,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
if updated {
for idx in 0..self.layers.len() {
let layer = &mut self.layers[idx];
let bpf_layer = &mut self.skel.bss_mut().layers[idx];
let bpf_layer = &mut self.skel.maps.bss_data.layers[idx];
match &layer.kind {
LayerKind::Open { .. } => {
let available_cpus = self.cpu_pool.available_cpus_in_mask(&layer.allowed_cpus);
@ -1569,7 +1572,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
}
}
self.skel.bss_mut().fallback_cpu = self.cpu_pool.fallback_cpu as u32;
self.skel.maps.bss_data.fallback_cpu = self.cpu_pool.fallback_cpu as u32;
for (lidx, layer) in self.layers.iter().enumerate() {
self.nr_layer_cpus_min_max[lidx] = (
@ -2183,8 +2186,9 @@ fn main() -> Result<()> {
}
}
let mut open_object = MaybeUninit::uninit();
loop {
let mut sched = Scheduler::init(&opts, &layer_config.specs)?;
let mut sched = Scheduler::init(&opts, &layer_config.specs, &mut open_object)?;
if !sched.run(shutdown.clone())?.should_restart() {
break;
}

View File

@ -15,7 +15,7 @@ ctrlc = { version = "3.1", features = ["termination"] }
fb_procfs = "0.7"
itertools = "0.12.1"
lazy_static = "1.4"
libbpf-rs = "0.23.1"
libbpf-rs = "0.24.1"
libc = "0.2.137"
log = "0.4.17"
maplit = "1.0.2"

View File

@ -262,13 +262,13 @@ struct Scheduler<'a> {
}
impl<'a> Scheduler<'a> {
fn init(opts: &Opts) -> Result<Self> {
fn init(opts: &Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
let mut cpu_pool = CpuPool::new()?;
let mut skel_builder = BpfSkelBuilder::default();
skel_builder.obj_builder.debug(opts.verbose > 1);
init_libbpf_logging(None);
let mut skel = scx_ops_open!(skel_builder, mitosis)?;
let mut skel = scx_ops_open!(skel_builder, open_object, mitosis)?;
// scheduler_tick() got renamed to sched_tick() during v6.10-rc.
let sched_tick_name = match compat::ksym_exists("sched_tick")? {
@ -284,12 +284,12 @@ impl<'a> Scheduler<'a> {
skel.struct_ops.mitosis_mut().exit_dump_len = opts.exit_dump_len;
if opts.verbose >= 1 {
skel.rodata_mut().debug = true;
skel.maps.rodata_data.debug = true;
}
skel.rodata_mut().nr_possible_cpus = *NR_POSSIBLE_CPUS as u32;
skel.maps.rodata_data.nr_possible_cpus = *NR_POSSIBLE_CPUS as u32;
for cpu in cpu_pool.all_cpus.iter_ones() {
skel.rodata_mut().all_cpus[cpu / 8] |= 1 << (cpu % 8);
skel.bss_mut().cells[0].cpus[cpu / 8] |= 1 << (cpu % 8);
skel.maps.rodata_data.all_cpus[cpu / 8] |= 1 << (cpu % 8);
skel.maps.bss_data.cells[0].cpus[cpu / 8] |= 1 << (cpu % 8);
}
for _ in 0..cpu_pool.all_cpus.count_ones() {
cpu_pool.alloc();
@ -345,7 +345,7 @@ impl<'a> Scheduler<'a> {
let total_load = self.collect_cgroup_load()?;
self.debug()?;
let mut reconfigured = false;
if self.skel.bss().user_global_seq != self.skel.bss().global_seq {
if self.skel.maps.bss_data.user_global_seq != self.skel.maps.bss_data.global_seq {
trace!("BPF reconfiguration still in progress, skipping further changes");
continue;
} else if self.last_reconfiguration.elapsed() >= self.reconfiguration_interval {
@ -396,13 +396,13 @@ impl<'a> Scheduler<'a> {
})?;
trace!("Assigned {} to {}", cgroup, cell_idx);
}
self.skel.bss_mut().update_cell_assignment = true;
self.skel.maps.bss_data.update_cell_assignment = true;
Ok(())
}
fn trigger_reconfiguration(&mut self) {
trace!("Triggering Reconfiguration");
self.skel.bss_mut().user_global_seq += 1;
self.skel.maps.bss_data.user_global_seq += 1;
}
/// Iterate through each cg in the cgroupfs, read its load from BPF and
@ -639,7 +639,7 @@ impl<'a> Scheduler<'a> {
.free(&mut cell.cpu_assignment)
.ok_or(anyhow!("No cpus to free"))?;
trace!("Freeing {} from Cell {}", freed_cpu, cell_idx);
self.skel.bss_mut().cells[*cell_idx as usize].cpus[freed_cpu / 8] &=
self.skel.maps.bss_data.cells[*cell_idx as usize].cpus[freed_cpu / 8] &=
!(1 << freed_cpu % 8);
}
}
@ -656,10 +656,10 @@ impl<'a> Scheduler<'a> {
.ok_or(anyhow!("No cpus to allocate"))?;
trace!("Allocating {} to Cell {}", new_cpu, cell_idx);
cell.cpu_assignment.set(new_cpu, true);
self.skel.bss_mut().cells[*cell_idx as usize].cpus[new_cpu / 8] |= 1 << new_cpu % 8;
self.skel.maps.bss_data.cells[*cell_idx as usize].cpus[new_cpu / 8] |= 1 << new_cpu % 8;
}
}
for (cell_idx, cell) in self.skel.bss().cells.iter().enumerate() {
for (cell_idx, cell) in self.skel.maps.bss_data.cells.iter().enumerate() {
trace!("Cell {} Cpumask {:X?}", cell_idx, cell.cpus);
}
Ok(())
@ -738,8 +738,8 @@ impl<'a> Scheduler<'a> {
cell1.cgroups.append(&mut cell2.cgroups);
// XXX: I don't love manipulating the CPU mask here and not in assign_cpus
for cpu in cell2.cpu_assignment.iter_ones() {
self.skel.bss_mut().cells[merge.cell1 as usize].cpus[cpu / 8] |= 1 << cpu % 8;
self.skel.bss_mut().cells[merge.cell2 as usize].cpus[cpu / 8] &= !(1 << cpu % 8);
self.skel.maps.bss_data.cells[merge.cell1 as usize].cpus[cpu / 8] |= 1 << cpu % 8;
self.skel.maps.bss_data.cells[merge.cell2 as usize].cpus[cpu / 8] &= !(1 << cpu % 8);
}
cell1.cpu_assignment |= cell2.cpu_assignment;
cell1.load += cell2.load;
@ -827,8 +827,9 @@ fn main() -> Result<()> {
})
.context("Error setting Ctrl-C handler")?;
let mut open_object = MaybeUninit::uninit();
loop {
let mut sched = Scheduler::init(&opts)?;
let mut sched = Scheduler::init(&opts, &mut open_object)?;
if !sched.run(shutdown.clone())?.should_restart() {
break;
}

View File

@ -10,7 +10,7 @@ license = "GPL-2.0-only"
anyhow = "1.0.65"
plain = "0.2.3"
ctrlc = { version = "3.1", features = ["termination"] }
libbpf-rs = "0.23.1"
libbpf-rs = "0.24"
libc = "0.2.137"
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }

View File

@ -11,6 +11,9 @@ use bpf::*;
use scx_utils::UserExitInfo;
use libbpf_rs::OpenObject;
use std::mem::MaybeUninit;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::sync::Arc;
@ -24,8 +27,9 @@ struct Scheduler<'a> {
}
impl<'a> Scheduler<'a> {
fn init() -> Result<Self> {
fn init(open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
let bpf = BpfScheduler::init(
open_object,
0, // exit_dump_len (buffer size of exit info)
false, // partial (include all tasks if false)
5000, // slice_ns (default task time slice)
@ -141,8 +145,9 @@ fn main() -> Result<()> {
shutdown_clone.store(true, Ordering::Relaxed);
})?;
let mut open_object = MaybeUninit::uninit();
loop {
let mut sched = Scheduler::init()?;
let mut sched = Scheduler::init(&mut open_object)?;
if !sched.run(shutdown.clone())?.should_restart() {
break;
}

View File

@ -12,7 +12,7 @@ plain = "0.2.3"
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
ctrlc = { version = "3.1", features = ["termination"] }
fb_procfs = "0.7"
libbpf-rs = "0.23.1"
libbpf-rs = "0.24.1"
libc = "0.2.137"
log = "0.4.17"
ordered-float = "3.4.0"

View File

@ -17,7 +17,7 @@ use std::thread;
use std::collections::BTreeSet;
use std::collections::HashMap;
use std::mem::MaybeUninit;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::sync::Arc;
@ -30,6 +30,7 @@ use std::path::Path;
use anyhow::Context;
use anyhow::Result;
use clap::Parser;
use libbpf_rs::OpenObject;
use log::info;
use log::warn;
@ -275,13 +276,17 @@ struct Scheduler<'a> {
}
impl<'a> Scheduler<'a> {
fn init(opts: &Opts) -> Result<Self> {
fn init(
opts: &Opts,
open_object: &'a mut MaybeUninit<OpenObject>,
) -> Result<Self> {
// Initialize core mapping topology.
let topo = Topology::new().expect("Failed to build host topology");
let topo_map = TopologyMap::new(&topo).expect("Failed to generate topology map");
// Low-level BPF connector.
let bpf = BpfScheduler::init(
open_object,
opts.exit_dump_len,
opts.partial,
opts.slice_us,
@ -693,8 +698,9 @@ fn main() -> Result<()> {
})
.context("Error setting Ctrl-C handler")?;
let mut open_object = MaybeUninit::uninit();
loop {
let mut sched = Scheduler::init(&opts)?;
let mut sched = Scheduler::init(&opts, &mut open_object)?;
// Start the scheduler.
if !sched.run(shutdown.clone())?.should_restart() {
break;

View File

@ -11,7 +11,7 @@ anyhow = "1.0.65"
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
ctrlc = { version = "3.1", features = ["termination"] }
fb_procfs = "0.7"
libbpf-rs = "0.23.1"
libbpf-rs = "0.24.1"
libc = "0.2.137"
log = "0.4.17"
ordered-float = "3.4.0"

View File

@ -143,6 +143,7 @@ use std::sync::Arc;
use anyhow::bail;
use anyhow::Context;
use anyhow::Result;
use libbpf_rs::MapCore as _;
use log::debug;
use log::warn;
use ordered_float::OrderedFloat;
@ -368,7 +369,7 @@ impl Domain {
let dom_id: u32 = other.id.try_into().unwrap();
// Ask BPF code to execute the migration.
if let Err(e) = skel.maps_mut().lb_data().update(
if let Err(e) = skel.maps.lb_data.update(
&cpid,
&dom_id.to_ne_bytes(),
libbpf_rs::MapFlags::NO_EXIST,
@ -603,9 +604,8 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
fn calculate_load_avgs(&mut self) -> Result<LoadLedger> {
const NUM_BUCKETS: u64 = bpf_intf::consts_LB_LOAD_BUCKETS as u64;
let now_mono = now_monotonic();
let load_half_life = self.skel.rodata().load_half_life;
let maps = self.skel.maps();
let dom_data = maps.dom_data();
let load_half_life = self.skel.maps.rodata_data.load_half_life;
let dom_data = &self.skel.maps.dom_data;
let mut aggregator = LoadAggregator::new(self.dom_group.weight(), !self.lb_apply_weight.clone());
@ -680,20 +680,19 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
// Read active_pids and update read_idx and gen.
const MAX_PIDS: u64 = bpf_intf::consts_MAX_DOM_ACTIVE_PIDS as u64;
let active_pids = &mut self.skel.bss_mut().dom_active_pids[dom.id];
let active_pids = &mut self.skel.maps.bss_data.dom_active_pids[dom.id];
let (mut ridx, widx) = (active_pids.read_idx, active_pids.write_idx);
active_pids.read_idx = active_pids.write_idx;
active_pids.gen += 1;
let active_pids = &self.skel.bss().dom_active_pids[dom.id];
let active_pids = &self.skel.maps.bss_data.dom_active_pids[dom.id];
if widx - ridx > MAX_PIDS {
ridx = widx - MAX_PIDS;
}
// Read task_ctx and load.
let load_half_life = self.skel.rodata().load_half_life;
let maps = self.skel.maps();
let task_data = maps.task_data();
let load_half_life = self.skel.maps.rodata_data.load_half_life;
let task_data = &self.skel.maps.task_data;
let now_mono = now_monotonic();
for idx in ridx..widx {
@ -1093,7 +1092,7 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
}
fn perform_balancing(&mut self) -> Result<()> {
clear_map(self.skel.maps().lb_data());
clear_map(&self.skel.maps.lb_data);
// First balance load between the NUMA nodes. Balancing here has a
// higher cost function than balancing between domains inside of NUMA

View File

@ -16,6 +16,7 @@ pub mod load_balance;
use load_balance::LoadBalancer;
use load_balance::NumaStat;
use std::mem::MaybeUninit;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::sync::Arc;
@ -31,6 +32,8 @@ use anyhow::bail;
use anyhow::Context;
use anyhow::Result;
use clap::Parser;
use libbpf_rs::MapCore as _;
use libbpf_rs::OpenObject;
use libbpf_rs::skel::OpenSkel;
use libbpf_rs::skel::Skel;
use libbpf_rs::skel::SkelBuilder;
@ -305,13 +308,16 @@ struct Scheduler<'a> {
}
impl<'a> Scheduler<'a> {
fn init(opts: &Opts) -> Result<Self> {
fn init(
opts: &Opts,
open_object: &'a mut MaybeUninit<OpenObject>,
) -> Result<Self> {
// Open the BPF prog first for verification.
let mut skel_builder = BpfSkelBuilder::default();
skel_builder.obj_builder.debug(opts.verbose > 0);
init_libbpf_logging(None);
info!("Running scx_rusty (build ID: {})", *build_id::SCX_FULL_VERSION);
let mut skel = scx_ops_open!(skel_builder, rusty).unwrap();
let mut skel = scx_ops_open!(skel_builder, open_object, rusty).unwrap();
// Initialize skel according to @opts.
let top = Arc::new(Topology::new()?);
@ -334,21 +340,21 @@ impl<'a> Scheduler<'a> {
);
}
skel.rodata_mut().nr_nodes = domains.nr_nodes() as u32;
skel.rodata_mut().nr_doms = domains.nr_doms() as u32;
skel.rodata_mut().nr_cpu_ids = top.nr_cpu_ids() as u32;
skel.maps.rodata_data.nr_nodes = domains.nr_nodes() as u32;
skel.maps.rodata_data.nr_doms = domains.nr_doms() as u32;
skel.maps.rodata_data.nr_cpu_ids = top.nr_cpu_ids() as u32;
// Any CPU with dom > MAX_DOMS is considered offline by default. There
// are a few places in the BPF code where we skip over offlined CPUs
// (e.g. when initializing or refreshing tune params), and elsewhere the
// scheduler will error if we try to schedule from them.
for cpu in 0..top.nr_cpu_ids() {
skel.rodata_mut().cpu_dom_id_map[cpu] = u32::MAX;
skel.maps.rodata_data.cpu_dom_id_map[cpu] = u32::MAX;
}
for (id, dom) in domains.doms().iter() {
for cpu in dom.mask().into_iter() {
skel.rodata_mut().cpu_dom_id_map[cpu] = id
skel.maps.rodata_data.cpu_dom_id_map[cpu] = id
.clone()
.try_into()
.expect("Domain ID could not fit into 32 bits");
@ -364,17 +370,17 @@ impl<'a> Scheduler<'a> {
}
let raw_numa_slice = numa_mask.as_raw_slice();
let node_cpumask_slice = &mut skel.rodata_mut().numa_cpumasks[numa];
let node_cpumask_slice = &mut skel.maps.rodata_data.numa_cpumasks[numa];
let (left, _) = node_cpumask_slice.split_at_mut(raw_numa_slice.len());
left.clone_from_slice(raw_numa_slice);
info!("NUMA[{:02}] mask= {}", numa, numa_mask);
for dom in node_domains.iter() {
let raw_dom_slice = dom.mask_slice();
let dom_cpumask_slice = &mut skel.rodata_mut().dom_cpumasks[dom.id()];
let dom_cpumask_slice = &mut skel.maps.rodata_data.dom_cpumasks[dom.id()];
let (left, _) = dom_cpumask_slice.split_at_mut(raw_dom_slice.len());
left.clone_from_slice(raw_dom_slice);
skel.rodata_mut().dom_numa_id_map[dom.id()] =
skel.maps.rodata_data.dom_numa_id_map[dom.id()] =
numa.try_into().expect("NUMA ID could not fit into 32 bits");
info!(" DOM[{:02}] mask= {}", dom.id(), dom.mask());
@ -386,14 +392,14 @@ impl<'a> Scheduler<'a> {
}
skel.struct_ops.rusty_mut().exit_dump_len = opts.exit_dump_len;
skel.rodata_mut().load_half_life = (opts.load_half_life * 1000000000.0) as u32;
skel.rodata_mut().kthreads_local = opts.kthreads_local;
skel.rodata_mut().fifo_sched = opts.fifo_sched;
skel.rodata_mut().greedy_threshold = opts.greedy_threshold;
skel.rodata_mut().greedy_threshold_x_numa = opts.greedy_threshold_x_numa;
skel.rodata_mut().direct_greedy_numa = opts.direct_greedy_numa;
skel.rodata_mut().mempolicy_affinity = opts.mempolicy_affinity;
skel.rodata_mut().debug = opts.verbose as u32;
skel.maps.rodata_data.load_half_life = (opts.load_half_life * 1000000000.0) as u32;
skel.maps.rodata_data.kthreads_local = opts.kthreads_local;
skel.maps.rodata_data.fifo_sched = opts.fifo_sched;
skel.maps.rodata_data.greedy_threshold = opts.greedy_threshold;
skel.maps.rodata_data.greedy_threshold_x_numa = opts.greedy_threshold_x_numa;
skel.maps.rodata_data.direct_greedy_numa = opts.direct_greedy_numa;
skel.maps.rodata_data.mempolicy_affinity = opts.mempolicy_affinity;
skel.maps.rodata_data.debug = opts.verbose as u32;
// Attach.
let mut skel = scx_ops_load!(skel, rusty, uei)?;
@ -487,8 +493,7 @@ impl<'a> Scheduler<'a> {
}
fn read_bpf_stats(&mut self) -> Result<Vec<u64>> {
let mut maps = self.skel.maps_mut();
let stats_map = maps.stats();
let stats_map = &mut self.skel.maps.stats;
let mut stats: Vec<u64> = Vec::new();
let zero_vec =
vec![vec![0u8; stats_map.value_size() as usize]; self.top.nr_cpus_possible()];
@ -686,8 +691,9 @@ fn main() -> Result<()> {
.expect("failed to install log recorder");
}
let mut open_object = MaybeUninit::uninit();
loop {
let mut sched = Scheduler::init(&opts)?;
let mut sched = Scheduler::init(&opts, &mut open_object)?;
if !sched.run(shutdown.clone())?.should_restart() {
break;
}

View File

@ -160,7 +160,7 @@ impl Tuner {
}
}
let ti = &mut skel.bss_mut().tune_input;
let ti = &mut skel.maps.bss_data.tune_input;
let write_to_bpf = |target: &mut [u64; 8], mask: &Cpumask| {
let raw_slice = mask.as_raw_slice();
let (left, _) = target.split_at_mut(raw_slice.len());