mirror of
https://github.com/sched-ext/scx.git
synced 2024-12-03 15:57:12 +00:00
Merge pull request #436 from danielocfb/topic/inlined-skeleton-members
rust Updated libbpf-rs & libbpf-cargo to 0.24
This commit is contained in:
commit
b264787dde
@ -10,7 +10,7 @@ description = "Framework to implement sched_ext schedulers running in user space
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
plain = "0.2.3"
|
||||
libbpf-rs = "0.23.1"
|
||||
libbpf-rs = "0.24"
|
||||
libc = "0.2.137"
|
||||
scx_utils = { path = "../scx_utils", version = "1.0.2" }
|
||||
|
||||
|
@ -3,6 +3,8 @@
|
||||
// This software may be used and distributed according to the terms of the
|
||||
// GNU General Public License version 2.
|
||||
|
||||
use std::mem::MaybeUninit;
|
||||
|
||||
use crate::bpf_intf;
|
||||
use crate::bpf_skel::*;
|
||||
|
||||
@ -14,6 +16,7 @@ use anyhow::Result;
|
||||
|
||||
use plain::Plain;
|
||||
|
||||
use libbpf_rs::OpenObject;
|
||||
use libbpf_rs::skel::OpenSkel;
|
||||
use libbpf_rs::skel::Skel;
|
||||
use libbpf_rs::skel::SkelBuilder;
|
||||
@ -189,6 +192,7 @@ fn is_smt_active() -> std::io::Result<bool> {
|
||||
|
||||
impl<'cb> BpfScheduler<'cb> {
|
||||
pub fn init(
|
||||
open_object: &'cb mut MaybeUninit<OpenObject>,
|
||||
exit_dump_len: u32,
|
||||
partial: bool,
|
||||
slice_us: u64,
|
||||
@ -200,7 +204,7 @@ impl<'cb> BpfScheduler<'cb> {
|
||||
// Open the BPF prog first for verification.
|
||||
let mut skel_builder = BpfSkelBuilder::default();
|
||||
skel_builder.obj_builder.debug(verbose);
|
||||
let mut skel = scx_ops_open!(skel_builder, rustland)?;
|
||||
let mut skel = scx_ops_open!(skel_builder, open_object, rustland)?;
|
||||
|
||||
// Lock all the memory to prevent page faults that could trigger potential deadlocks during
|
||||
// scheduling.
|
||||
@ -242,7 +246,7 @@ impl<'cb> BpfScheduler<'cb> {
|
||||
}
|
||||
|
||||
// Check host topology to determine if we need to enable SMT capabilities.
|
||||
skel.rodata_mut().smt_enabled = is_smt_active()?;
|
||||
skel.maps.rodata_data.smt_enabled = is_smt_active()?;
|
||||
|
||||
// Set scheduler options (defined in the BPF part).
|
||||
if partial {
|
||||
@ -250,26 +254,26 @@ impl<'cb> BpfScheduler<'cb> {
|
||||
}
|
||||
skel.struct_ops.rustland_mut().exit_dump_len = exit_dump_len;
|
||||
|
||||
skel.bss_mut().usersched_pid = std::process::id();
|
||||
skel.rodata_mut().slice_ns = slice_us * 1000;
|
||||
skel.rodata_mut().full_user = full_user;
|
||||
skel.rodata_mut().low_power = low_power;
|
||||
skel.rodata_mut().debug = debug;
|
||||
skel.maps.bss_data.usersched_pid = std::process::id();
|
||||
skel.maps.rodata_data.slice_ns = slice_us * 1000;
|
||||
skel.maps.rodata_data.full_user = full_user;
|
||||
skel.maps.rodata_data.low_power = low_power;
|
||||
skel.maps.rodata_data.debug = debug;
|
||||
|
||||
// Attach BPF scheduler.
|
||||
let mut skel = scx_ops_load!(skel, rustland, uei)?;
|
||||
let struct_ops = Some(scx_ops_attach!(skel, rustland)?);
|
||||
|
||||
// Build the ring buffer of queued tasks.
|
||||
let maps = skel.maps();
|
||||
let queued_ring_buffer = maps.queued();
|
||||
let maps = &skel.maps;
|
||||
let queued_ring_buffer = &maps.queued;
|
||||
let mut rbb = libbpf_rs::RingBufferBuilder::new();
|
||||
rbb.add(queued_ring_buffer, callback)
|
||||
.expect("failed to add ringbuf callback");
|
||||
let queued = rbb.build().expect("failed to build ringbuf");
|
||||
|
||||
// Build the user ring buffer of dispatched tasks.
|
||||
let dispatched = libbpf_rs::UserRingBuffer::new(&maps.dispatched())
|
||||
let dispatched = libbpf_rs::UserRingBuffer::new(&maps.dispatched)
|
||||
.expect("failed to create user ringbuf");
|
||||
|
||||
// Make sure to use the SCHED_EXT class at least for the scheduler itself.
|
||||
@ -297,71 +301,71 @@ impl<'cb> BpfScheduler<'cb> {
|
||||
// busy loop, causing unnecessary high CPU consumption.
|
||||
pub fn update_tasks(&mut self, nr_queued: Option<u64>, nr_scheduled: Option<u64>) {
|
||||
if let Some(queued) = nr_queued {
|
||||
self.skel.bss_mut().nr_queued = queued;
|
||||
self.skel.maps.bss_data.nr_queued = queued;
|
||||
}
|
||||
if let Some(scheduled) = nr_scheduled {
|
||||
self.skel.bss_mut().nr_scheduled = scheduled;
|
||||
self.skel.maps.bss_data.nr_scheduled = scheduled;
|
||||
}
|
||||
}
|
||||
|
||||
// Counter of the online CPUs.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_online_cpus_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_online_cpus
|
||||
&mut self.skel.maps.bss_data.nr_online_cpus
|
||||
}
|
||||
|
||||
// Counter of currently running tasks.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_running_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_running
|
||||
&mut self.skel.maps.bss_data.nr_running
|
||||
}
|
||||
|
||||
// Counter of queued tasks.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_queued_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_queued
|
||||
&mut self.skel.maps.bss_data.nr_queued
|
||||
}
|
||||
|
||||
// Counter of scheduled tasks.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_scheduled_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_scheduled
|
||||
&mut self.skel.maps.bss_data.nr_scheduled
|
||||
}
|
||||
|
||||
// Counter of user dispatch events.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_user_dispatches_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_user_dispatches
|
||||
&mut self.skel.maps.bss_data.nr_user_dispatches
|
||||
}
|
||||
|
||||
// Counter of user kernel events.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_kernel_dispatches_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_kernel_dispatches
|
||||
&mut self.skel.maps.bss_data.nr_kernel_dispatches
|
||||
}
|
||||
|
||||
// Counter of cancel dispatch events.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_cancel_dispatches_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_cancel_dispatches
|
||||
&mut self.skel.maps.bss_data.nr_cancel_dispatches
|
||||
}
|
||||
|
||||
// Counter of dispatches bounced to the shared DSQ.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_bounce_dispatches_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_bounce_dispatches
|
||||
&mut self.skel.maps.bss_data.nr_bounce_dispatches
|
||||
}
|
||||
|
||||
// Counter of failed dispatch events.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_failed_dispatches_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_failed_dispatches
|
||||
&mut self.skel.maps.bss_data.nr_failed_dispatches
|
||||
}
|
||||
|
||||
// Counter of scheduler congestion events.
|
||||
#[allow(dead_code)]
|
||||
pub fn nr_sched_congested_mut(&mut self) -> &mut u64 {
|
||||
&mut self.skel.bss_mut().nr_sched_congested
|
||||
&mut self.skel.maps.bss_data.nr_sched_congested
|
||||
}
|
||||
|
||||
// Set scheduling class for the scheduler itself to SCHED_EXT
|
||||
|
@ -16,8 +16,8 @@ bindgen = ">=0.68, <0.70"
|
||||
glob = "0.3"
|
||||
hex = "0.4.3"
|
||||
lazy_static = "1.4"
|
||||
libbpf-cargo = "0.23.1"
|
||||
libbpf-rs = "0.23.1"
|
||||
libbpf-cargo = "0.24.1"
|
||||
libbpf-rs = "0.24.1"
|
||||
log = "0.4.17"
|
||||
paste = "1.0"
|
||||
regex = "1.10"
|
||||
|
@ -177,11 +177,11 @@ pub fn check_min_requirements() -> Result<()> {
|
||||
#[rustfmt::skip]
|
||||
#[macro_export]
|
||||
macro_rules! scx_ops_open {
|
||||
($builder: expr, $ops: ident) => { 'block: {
|
||||
($builder: expr, $obj_ref: expr, $ops: ident) => { 'block: {
|
||||
scx_utils::paste! {
|
||||
scx_utils::unwrap_or_break!(scx_utils::compat::check_min_requirements(), 'block);
|
||||
|
||||
let mut skel = match $builder.open().context("Failed to open BPF program") {
|
||||
let mut skel = match $builder.open($obj_ref).context("Failed to open BPF program") {
|
||||
Ok(val) => val,
|
||||
Err(e) => break 'block Err(e),
|
||||
};
|
||||
@ -239,8 +239,8 @@ macro_rules! scx_ops_attach {
|
||||
.context("Failed to attach non-struct_ops BPF programs")
|
||||
.and_then(|_| {
|
||||
$skel
|
||||
.maps_mut()
|
||||
.$ops()
|
||||
.maps
|
||||
.$ops
|
||||
.attach_struct_ops()
|
||||
.context("Failed to attach struct_ops BPF programs")
|
||||
})
|
||||
|
@ -51,7 +51,7 @@ pub enum ScxConsts {
|
||||
macro_rules! uei_read {
|
||||
($skel: expr, $uei:ident) => {{
|
||||
scx_utils::paste! {
|
||||
let bpf_uei = $skel.data().$uei;
|
||||
let bpf_uei = $skel.maps.data_data.$uei;
|
||||
let bpf_dump = scx_utils::UEI_DUMP_PTR_MUTEX.lock().unwrap().ptr;
|
||||
let exit_code_ptr = match scx_utils::compat::struct_has_field("scx_exit_info", "exit_code") {
|
||||
Ok(true) => &bpf_uei.exit_code as *const _,
|
||||
@ -80,14 +80,14 @@ macro_rules! uei_set_size {
|
||||
0 => scx_utils::ScxConsts::ExitDumpDflLen as u32,
|
||||
v => v,
|
||||
};
|
||||
$skel.rodata_mut().[<$uei _dump_len>] = len;
|
||||
$skel.maps_mut().[<data_ $uei _dump>]().set_value_size(len).unwrap();
|
||||
$skel.maps.rodata_data.[<$uei _dump_len>] = len;
|
||||
$skel.maps.[<data_ $uei _dump>].set_value_size(len).unwrap();
|
||||
|
||||
let mut ptr = scx_utils::UEI_DUMP_PTR_MUTEX.lock().unwrap();
|
||||
*ptr = scx_utils::UeiDumpPtr { ptr:
|
||||
$skel
|
||||
.maps()
|
||||
.[<data_ $uei _dump>]()
|
||||
.maps
|
||||
.[<data_ $uei _dump>]
|
||||
.initial_value()
|
||||
.unwrap()
|
||||
.as_ptr() as *const _,
|
||||
@ -101,7 +101,7 @@ macro_rules! uei_set_size {
|
||||
#[macro_export]
|
||||
macro_rules! uei_exited {
|
||||
($skel: expr, $uei:ident) => {{
|
||||
let bpf_uei = $skel.data().uei;
|
||||
let bpf_uei = $skel.maps.data_data.uei;
|
||||
(unsafe { std::ptr::read_volatile(&bpf_uei.kind as *const _) } != 0)
|
||||
}};
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ license = "GPL-2.0-only"
|
||||
anyhow = "1.0.65"
|
||||
ctrlc = { version = "3.1", features = ["termination"] }
|
||||
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
|
||||
libbpf-rs = "0.23.1"
|
||||
libbpf-rs = "0.24.1"
|
||||
log = "0.4.17"
|
||||
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
|
||||
simplelog = "0.12"
|
||||
|
@ -12,6 +12,7 @@ pub use bpf_intf::*;
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
@ -30,6 +31,7 @@ use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
|
||||
use rlimit::{getrlimit, setrlimit, Resource};
|
||||
|
||||
use libbpf_rs::OpenObject;
|
||||
use libbpf_rs::skel::OpenSkel;
|
||||
use libbpf_rs::skel::Skel;
|
||||
use libbpf_rs::skel::SkelBuilder;
|
||||
@ -169,7 +171,7 @@ struct Scheduler<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Scheduler<'a> {
|
||||
fn init(opts: &'a Opts) -> Result<Self> {
|
||||
fn init(opts: &'a Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
|
||||
let (soft_limit, _) = getrlimit(Resource::MEMLOCK).unwrap();
|
||||
setrlimit(Resource::MEMLOCK, soft_limit, rlimit::INFINITY).unwrap();
|
||||
|
||||
@ -191,19 +193,19 @@ impl<'a> Scheduler<'a> {
|
||||
// Initialize BPF connector.
|
||||
let mut skel_builder = BpfSkelBuilder::default();
|
||||
skel_builder.obj_builder.debug(opts.verbose);
|
||||
let mut skel = scx_ops_open!(skel_builder, bpfland_ops)?;
|
||||
let mut skel = scx_ops_open!(skel_builder, open_object, bpfland_ops)?;
|
||||
|
||||
skel.struct_ops.bpfland_ops_mut().exit_dump_len = opts.exit_dump_len;
|
||||
|
||||
// Override default BPF scheduling parameters.
|
||||
skel.rodata_mut().debug = opts.debug;
|
||||
skel.rodata_mut().smt_enabled = smt_enabled;
|
||||
skel.rodata_mut().local_kthreads = opts.local_kthreads;
|
||||
skel.rodata_mut().slice_ns = opts.slice_us * 1000;
|
||||
skel.rodata_mut().slice_ns_min = opts.slice_us_min * 1000;
|
||||
skel.rodata_mut().slice_ns_lag = opts.slice_us_lag * 1000;
|
||||
skel.rodata_mut().starvation_thresh_ns = opts.starvation_thresh_us * 1000;
|
||||
skel.rodata_mut().nvcsw_max_thresh = opts.nvcsw_max_thresh;
|
||||
skel.maps.rodata_data.debug = opts.debug;
|
||||
skel.maps.rodata_data.smt_enabled = smt_enabled;
|
||||
skel.maps.rodata_data.local_kthreads = opts.local_kthreads;
|
||||
skel.maps.rodata_data.slice_ns = opts.slice_us * 1000;
|
||||
skel.maps.rodata_data.slice_ns_min = opts.slice_us_min * 1000;
|
||||
skel.maps.rodata_data.slice_ns_lag = opts.slice_us_lag * 1000;
|
||||
skel.maps.rodata_data.starvation_thresh_ns = opts.starvation_thresh_us * 1000;
|
||||
skel.maps.rodata_data.nvcsw_max_thresh = opts.nvcsw_max_thresh;
|
||||
|
||||
// Attach the scheduler.
|
||||
let mut skel = scx_ops_load!(skel, bpfland_ops, uei)?;
|
||||
@ -231,14 +233,14 @@ impl<'a> Scheduler<'a> {
|
||||
}
|
||||
|
||||
fn update_stats(&mut self) {
|
||||
let nr_cpus = self.skel.bss().nr_online_cpus;
|
||||
let nr_running = self.skel.bss().nr_running;
|
||||
let nr_interactive = self.skel.bss().nr_interactive;
|
||||
let nr_waiting = self.skel.bss().nr_waiting;
|
||||
let nvcsw_avg_thresh = self.skel.bss().nvcsw_avg_thresh;
|
||||
let nr_direct_dispatches = self.skel.bss().nr_direct_dispatches;
|
||||
let nr_prio_dispatches = self.skel.bss().nr_prio_dispatches;
|
||||
let nr_shared_dispatches = self.skel.bss().nr_shared_dispatches;
|
||||
let nr_cpus = self.skel.maps.bss_data.nr_online_cpus;
|
||||
let nr_running = self.skel.maps.bss_data.nr_running;
|
||||
let nr_interactive = self.skel.maps.bss_data.nr_interactive;
|
||||
let nr_waiting = self.skel.maps.bss_data.nr_waiting;
|
||||
let nvcsw_avg_thresh = self.skel.maps.bss_data.nvcsw_avg_thresh;
|
||||
let nr_direct_dispatches = self.skel.maps.bss_data.nr_direct_dispatches;
|
||||
let nr_prio_dispatches = self.skel.maps.bss_data.nr_prio_dispatches;
|
||||
let nr_shared_dispatches = self.skel.maps.bss_data.nr_shared_dispatches;
|
||||
|
||||
// Update Prometheus statistics.
|
||||
self.metrics
|
||||
@ -328,8 +330,9 @@ fn main() -> Result<()> {
|
||||
})
|
||||
.context("Error setting Ctrl-C handler")?;
|
||||
|
||||
let mut open_object = MaybeUninit::uninit();
|
||||
loop {
|
||||
let mut sched = Scheduler::init(&opts)?;
|
||||
let mut sched = Scheduler::init(&opts, &mut open_object)?;
|
||||
if !sched.run(shutdown.clone())?.should_restart() {
|
||||
break;
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
|
||||
ctrlc = { version = "3.1", features = ["termination"] }
|
||||
fb_procfs = "0.7"
|
||||
hex = "0.4.3"
|
||||
libbpf-rs = "0.23.1"
|
||||
libbpf-rs = "0.24.1"
|
||||
libc = "0.2.137"
|
||||
log = "0.4.17"
|
||||
ordered-float = "3.4.0"
|
||||
|
@ -12,6 +12,7 @@ pub mod bpf_intf;
|
||||
pub use bpf_intf::*;
|
||||
|
||||
use std::mem;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::Duration;
|
||||
@ -24,6 +25,7 @@ use std::str;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use libbpf_rs::OpenObject;
|
||||
use libbpf_rs::skel::OpenSkel;
|
||||
use libbpf_rs::skel::Skel;
|
||||
use libbpf_rs::skel::SkelBuilder;
|
||||
@ -230,7 +232,10 @@ struct Scheduler<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Scheduler<'a> {
|
||||
fn init(opts: &'a Opts) -> Result<Self> {
|
||||
fn init(
|
||||
opts: &'a Opts,
|
||||
open_object: &'a mut MaybeUninit<OpenObject>,
|
||||
) -> Result<Self> {
|
||||
// Increase MEMLOCK size since the BPF scheduler might use
|
||||
// more than the current limit
|
||||
let (soft_limit, _) = getrlimit(Resource::MEMLOCK).unwrap();
|
||||
@ -239,23 +244,23 @@ impl<'a> Scheduler<'a> {
|
||||
// Open the BPF prog first for verification.
|
||||
let mut skel_builder = BpfSkelBuilder::default();
|
||||
skel_builder.obj_builder.debug(opts.verbose > 0);
|
||||
let mut skel = scx_ops_open!(skel_builder, lavd_ops)?;
|
||||
let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops)?;
|
||||
|
||||
// Initialize CPU order topologically sorted by a cpu, node, llc, max_freq, and core order
|
||||
let topo = FlatTopology::new(opts.prefer_smt_core).expect("Failed to build host topology");
|
||||
for (pos, cpu) in topo.cpu_fids().iter().enumerate() {
|
||||
skel.rodata_mut().cpu_order[pos] = cpu.cpu_id as u16;
|
||||
skel.rodata_mut().__cpu_capacity_hint[cpu.cpu_id] = cpu.cpu_cap as u16;
|
||||
skel.maps.rodata_data.cpu_order[pos] = cpu.cpu_id as u16;
|
||||
skel.maps.rodata_data.__cpu_capacity_hint[cpu.cpu_id] = cpu.cpu_cap as u16;
|
||||
}
|
||||
debug!("{}", topo);
|
||||
|
||||
// Initialize skel according to @opts.
|
||||
let nr_cpus_onln = topo.nr_cpus_online() as u64;
|
||||
skel.bss_mut().nr_cpus_onln = nr_cpus_onln;
|
||||
skel.maps.bss_data.nr_cpus_onln = nr_cpus_onln;
|
||||
skel.struct_ops.lavd_ops_mut().exit_dump_len = opts.exit_dump_len;
|
||||
skel.rodata_mut().no_core_compaction = opts.no_core_compaction;
|
||||
skel.rodata_mut().no_freq_scaling = opts.no_freq_scaling;
|
||||
skel.rodata_mut().verbose = opts.verbose;
|
||||
skel.maps.rodata_data.no_core_compaction = opts.no_core_compaction;
|
||||
skel.maps.rodata_data.no_freq_scaling = opts.no_freq_scaling;
|
||||
skel.maps.rodata_data.verbose = opts.verbose;
|
||||
let intrspc = introspec::init(opts);
|
||||
|
||||
// Attach.
|
||||
@ -263,8 +268,7 @@ impl<'a> Scheduler<'a> {
|
||||
let struct_ops = Some(scx_ops_attach!(skel, lavd_ops)?);
|
||||
|
||||
// Build a ring buffer for instrumentation
|
||||
let mut maps = skel.maps_mut();
|
||||
let rb_map = maps.introspec_msg();
|
||||
let rb_map = &mut skel.maps.introspec_msg;
|
||||
let mut builder = libbpf_rs::RingBufferBuilder::new();
|
||||
builder.add(rb_map, Scheduler::print_bpf_msg).unwrap();
|
||||
let rb_mgr = builder.build().unwrap();
|
||||
@ -390,9 +394,9 @@ impl<'a> Scheduler<'a> {
|
||||
}
|
||||
self.intrspc.requested = true as u8;
|
||||
|
||||
self.skel.bss_mut().intrspc.cmd = self.intrspc.cmd;
|
||||
self.skel.bss_mut().intrspc.arg = self.intrspc.arg;
|
||||
self.skel.bss_mut().intrspc.requested = self.intrspc.requested;
|
||||
self.skel.maps.bss_data.intrspc.cmd = self.intrspc.cmd;
|
||||
self.skel.maps.bss_data.intrspc.arg = self.intrspc.arg;
|
||||
self.skel.maps.bss_data.intrspc.requested = self.intrspc.requested;
|
||||
|
||||
interval_ms
|
||||
}
|
||||
@ -472,8 +476,9 @@ fn main() -> Result<()> {
|
||||
init_log(&opts);
|
||||
init_signal_handlers();
|
||||
|
||||
let mut open_object = MaybeUninit::uninit();
|
||||
loop {
|
||||
let mut sched = Scheduler::init(&opts)?;
|
||||
let mut sched = Scheduler::init(&opts, &mut open_object)?;
|
||||
info!(
|
||||
"scx_lavd scheduler is initialized (build ID: {})",
|
||||
*build_id::SCX_FULL_VERSION
|
||||
|
@ -13,7 +13,7 @@ clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
|
||||
ctrlc = { version = "3.1", features = ["termination"] }
|
||||
fb_procfs = "0.7"
|
||||
lazy_static = "1.4"
|
||||
libbpf-rs = "0.23.1"
|
||||
libbpf-rs = "0.24.1"
|
||||
libc = "0.2.137"
|
||||
log = "0.4.17"
|
||||
prometheus-client = "0.19"
|
||||
|
@ -14,6 +14,7 @@ use std::ffi::CString;
|
||||
use std::fs;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::ops::Sub;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
@ -28,6 +29,8 @@ use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use bitvec::prelude::*;
|
||||
use clap::Parser;
|
||||
use libbpf_rs::MapCore as _;
|
||||
use libbpf_rs::OpenObject;
|
||||
use libbpf_rs::skel::OpenSkel;
|
||||
use libbpf_rs::skel::Skel;
|
||||
use libbpf_rs::skel::SkelBuilder;
|
||||
@ -558,8 +561,8 @@ fn cachemask_from_llcs(llcs: &BTreeMap<usize, Cache>) -> usize {
|
||||
fn read_cpu_ctxs(skel: &BpfSkel) -> Result<Vec<bpf_intf::cpu_ctx>> {
|
||||
let mut cpu_ctxs = vec![];
|
||||
let cpu_ctxs_vec = skel
|
||||
.maps()
|
||||
.cpu_ctxs()
|
||||
.maps
|
||||
.cpu_ctxs
|
||||
.lookup_percpu(&0u32.to_ne_bytes(), libbpf_rs::MapFlags::ANY)
|
||||
.context("Failed to lookup cpu_ctx")?
|
||||
.unwrap();
|
||||
@ -651,7 +654,7 @@ impl Stats {
|
||||
fn read_layer_loads(skel: &mut BpfSkel, nr_layers: usize) -> (f64, Vec<f64>) {
|
||||
let now_mono = now_monotonic();
|
||||
let layer_loads: Vec<f64> = skel
|
||||
.bss()
|
||||
.maps.bss_data
|
||||
.layers
|
||||
.iter()
|
||||
.take(nr_layers)
|
||||
@ -684,7 +687,7 @@ impl Stats {
|
||||
}
|
||||
|
||||
fn new(skel: &mut BpfSkel, proc_reader: &procfs::ProcReader) -> Result<Self> {
|
||||
let nr_layers = skel.rodata().nr_layers as usize;
|
||||
let nr_layers = skel.maps.rodata_data.nr_layers as usize;
|
||||
let bpf_stats = BpfStats::read(&read_cpu_ctxs(skel)?, nr_layers);
|
||||
|
||||
Ok(Self {
|
||||
@ -718,7 +721,7 @@ impl Stats {
|
||||
let cpu_ctxs = read_cpu_ctxs(skel)?;
|
||||
|
||||
let nr_layer_tasks: Vec<usize> = skel
|
||||
.bss()
|
||||
.maps.bss_data
|
||||
.layers
|
||||
.iter()
|
||||
.take(self.nr_layers)
|
||||
@ -1266,11 +1269,11 @@ struct Scheduler<'a, 'b> {
|
||||
|
||||
impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
fn init_layers(skel: &mut OpenBpfSkel, opts: &Opts, specs: &Vec<LayerSpec>, topo: &Topology) -> Result<()> {
|
||||
skel.rodata_mut().nr_layers = specs.len() as u32;
|
||||
skel.maps.rodata_data.nr_layers = specs.len() as u32;
|
||||
let mut perf_set = false;
|
||||
|
||||
for (spec_i, spec) in specs.iter().enumerate() {
|
||||
let layer = &mut skel.bss_mut().layers[spec_i];
|
||||
let layer = &mut skel.maps.bss_data.layers[spec_i];
|
||||
|
||||
for (or_i, or) in spec.matches.iter().enumerate() {
|
||||
for (and_i, and) in or.iter().enumerate() {
|
||||
@ -1386,26 +1389,26 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
}
|
||||
|
||||
fn init_nodes(skel: &mut OpenBpfSkel, _opts: &Opts, topo: &Topology) {
|
||||
skel.rodata_mut().nr_nodes = topo.nodes().len() as u32;
|
||||
skel.rodata_mut().nr_llcs = 0;
|
||||
skel.maps.rodata_data.nr_nodes = topo.nodes().len() as u32;
|
||||
skel.maps.rodata_data.nr_llcs = 0;
|
||||
|
||||
for node in topo.nodes() {
|
||||
info!("configuring node {}, LLCs {:?}", node.id(), node.llcs().len());
|
||||
skel.rodata_mut().nr_llcs += node.llcs().len() as u32;
|
||||
skel.maps.rodata_data.nr_llcs += node.llcs().len() as u32;
|
||||
|
||||
for (_, llc) in node.llcs() {
|
||||
info!("configuring llc {:?} for node {:?}", llc.id(), node.id());
|
||||
skel.rodata_mut().llc_numa_id_map[llc.id()] = node.id() as u32;
|
||||
skel.maps.rodata_data.llc_numa_id_map[llc.id()] = node.id() as u32;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (_, cpu) in topo.cpus() {
|
||||
skel.rodata_mut().cpu_llc_id_map[cpu.id()] = cpu.llc_id() as u32;
|
||||
skel.maps.rodata_data.cpu_llc_id_map[cpu.id()] = cpu.llc_id() as u32;
|
||||
}
|
||||
}
|
||||
|
||||
fn init(opts: &Opts, layer_specs: &'b Vec<LayerSpec>) -> Result<Self> {
|
||||
fn init(opts: &Opts, layer_specs: &'b Vec<LayerSpec>, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
|
||||
let nr_layers = layer_specs.len();
|
||||
let topo = Topology::new()?;
|
||||
let cpu_pool = CpuPool::new()?;
|
||||
@ -1414,7 +1417,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
let mut skel_builder = BpfSkelBuilder::default();
|
||||
skel_builder.obj_builder.debug(opts.verbose > 1);
|
||||
init_libbpf_logging(None);
|
||||
let mut skel = scx_ops_open!(skel_builder, layered)?;
|
||||
let mut skel = scx_ops_open!(skel_builder, open_object, layered)?;
|
||||
|
||||
// scheduler_tick() got renamed to sched_tick() during v6.10-rc.
|
||||
let sched_tick_name = match compat::ksym_exists("sched_tick")? {
|
||||
@ -1422,29 +1425,29 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
false => "scheduler_tick",
|
||||
};
|
||||
|
||||
skel.progs_mut()
|
||||
.sched_tick_fentry()
|
||||
skel.progs
|
||||
.sched_tick_fentry
|
||||
.set_attach_target(0, Some(sched_tick_name.into()))
|
||||
.context("Failed to set attach target for sched_tick_fentry()")?;
|
||||
|
||||
// Initialize skel according to @opts.
|
||||
skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len;
|
||||
|
||||
skel.rodata_mut().debug = opts.verbose as u32;
|
||||
skel.rodata_mut().slice_ns = opts.slice_us * 1000;
|
||||
skel.rodata_mut().max_exec_ns = if opts.max_exec_us > 0 {
|
||||
skel.maps.rodata_data.debug = opts.verbose as u32;
|
||||
skel.maps.rodata_data.slice_ns = opts.slice_us * 1000;
|
||||
skel.maps.rodata_data.max_exec_ns = if opts.max_exec_us > 0 {
|
||||
opts.max_exec_us * 1000
|
||||
} else {
|
||||
opts.slice_us * 1000 * 20
|
||||
};
|
||||
skel.rodata_mut().nr_possible_cpus = *NR_POSSIBLE_CPUS as u32;
|
||||
skel.rodata_mut().smt_enabled = cpu_pool.nr_cpus > cpu_pool.nr_cores;
|
||||
skel.rodata_mut().disable_topology = opts.disable_topology;
|
||||
skel.maps.rodata_data.nr_possible_cpus = *NR_POSSIBLE_CPUS as u32;
|
||||
skel.maps.rodata_data.smt_enabled = cpu_pool.nr_cpus > cpu_pool.nr_cores;
|
||||
skel.maps.rodata_data.disable_topology = opts.disable_topology;
|
||||
for (cpu, sib) in cpu_pool.sibling_cpu.iter().enumerate() {
|
||||
skel.rodata_mut().__sibling_cpu[cpu] = *sib;
|
||||
skel.maps.rodata_data.__sibling_cpu[cpu] = *sib;
|
||||
}
|
||||
for cpu in cpu_pool.all_cpus.iter_ones() {
|
||||
skel.rodata_mut().all_cpus[cpu / 8] |= 1 << (cpu % 8);
|
||||
skel.maps.rodata_data.all_cpus[cpu / 8] |= 1 << (cpu % 8);
|
||||
}
|
||||
Self::init_layers(&mut skel, opts, layer_specs, &topo)?;
|
||||
Self::init_nodes(&mut skel, opts, &topo);
|
||||
@ -1497,7 +1500,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
Ok(sched)
|
||||
}
|
||||
|
||||
fn update_bpf_layer_cpumask(layer: &Layer, bpf_layer: &mut bpf_types::layer) {
|
||||
fn update_bpf_layer_cpumask(layer: &Layer, bpf_layer: &mut types::layer) {
|
||||
for bit in 0..layer.cpus.len() {
|
||||
if layer.cpus[bit] {
|
||||
bpf_layer.cpus[bit / 8] |= 1 << (bit % 8);
|
||||
@ -1542,7 +1545,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
{
|
||||
Self::update_bpf_layer_cpumask(
|
||||
&self.layers[idx],
|
||||
&mut self.skel.bss_mut().layers[idx],
|
||||
&mut self.skel.maps.bss_data.layers[idx],
|
||||
);
|
||||
updated = true;
|
||||
}
|
||||
@ -1554,7 +1557,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
if updated {
|
||||
for idx in 0..self.layers.len() {
|
||||
let layer = &mut self.layers[idx];
|
||||
let bpf_layer = &mut self.skel.bss_mut().layers[idx];
|
||||
let bpf_layer = &mut self.skel.maps.bss_data.layers[idx];
|
||||
match &layer.kind {
|
||||
LayerKind::Open { .. } => {
|
||||
let available_cpus = self.cpu_pool.available_cpus_in_mask(&layer.allowed_cpus);
|
||||
@ -1569,7 +1572,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
}
|
||||
}
|
||||
|
||||
self.skel.bss_mut().fallback_cpu = self.cpu_pool.fallback_cpu as u32;
|
||||
self.skel.maps.bss_data.fallback_cpu = self.cpu_pool.fallback_cpu as u32;
|
||||
|
||||
for (lidx, layer) in self.layers.iter().enumerate() {
|
||||
self.nr_layer_cpus_min_max[lidx] = (
|
||||
@ -2183,8 +2186,9 @@ fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let mut open_object = MaybeUninit::uninit();
|
||||
loop {
|
||||
let mut sched = Scheduler::init(&opts, &layer_config.specs)?;
|
||||
let mut sched = Scheduler::init(&opts, &layer_config.specs, &mut open_object)?;
|
||||
if !sched.run(shutdown.clone())?.should_restart() {
|
||||
break;
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ ctrlc = { version = "3.1", features = ["termination"] }
|
||||
fb_procfs = "0.7"
|
||||
itertools = "0.12.1"
|
||||
lazy_static = "1.4"
|
||||
libbpf-rs = "0.23.1"
|
||||
libbpf-rs = "0.24.1"
|
||||
libc = "0.2.137"
|
||||
log = "0.4.17"
|
||||
maplit = "1.0.2"
|
||||
|
@ -262,13 +262,13 @@ struct Scheduler<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Scheduler<'a> {
|
||||
fn init(opts: &Opts) -> Result<Self> {
|
||||
fn init(opts: &Opts, open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
|
||||
let mut cpu_pool = CpuPool::new()?;
|
||||
|
||||
let mut skel_builder = BpfSkelBuilder::default();
|
||||
skel_builder.obj_builder.debug(opts.verbose > 1);
|
||||
init_libbpf_logging(None);
|
||||
let mut skel = scx_ops_open!(skel_builder, mitosis)?;
|
||||
let mut skel = scx_ops_open!(skel_builder, open_object, mitosis)?;
|
||||
|
||||
// scheduler_tick() got renamed to sched_tick() during v6.10-rc.
|
||||
let sched_tick_name = match compat::ksym_exists("sched_tick")? {
|
||||
@ -284,12 +284,12 @@ impl<'a> Scheduler<'a> {
|
||||
skel.struct_ops.mitosis_mut().exit_dump_len = opts.exit_dump_len;
|
||||
|
||||
if opts.verbose >= 1 {
|
||||
skel.rodata_mut().debug = true;
|
||||
skel.maps.rodata_data.debug = true;
|
||||
}
|
||||
skel.rodata_mut().nr_possible_cpus = *NR_POSSIBLE_CPUS as u32;
|
||||
skel.maps.rodata_data.nr_possible_cpus = *NR_POSSIBLE_CPUS as u32;
|
||||
for cpu in cpu_pool.all_cpus.iter_ones() {
|
||||
skel.rodata_mut().all_cpus[cpu / 8] |= 1 << (cpu % 8);
|
||||
skel.bss_mut().cells[0].cpus[cpu / 8] |= 1 << (cpu % 8);
|
||||
skel.maps.rodata_data.all_cpus[cpu / 8] |= 1 << (cpu % 8);
|
||||
skel.maps.bss_data.cells[0].cpus[cpu / 8] |= 1 << (cpu % 8);
|
||||
}
|
||||
for _ in 0..cpu_pool.all_cpus.count_ones() {
|
||||
cpu_pool.alloc();
|
||||
@ -345,7 +345,7 @@ impl<'a> Scheduler<'a> {
|
||||
let total_load = self.collect_cgroup_load()?;
|
||||
self.debug()?;
|
||||
let mut reconfigured = false;
|
||||
if self.skel.bss().user_global_seq != self.skel.bss().global_seq {
|
||||
if self.skel.maps.bss_data.user_global_seq != self.skel.maps.bss_data.global_seq {
|
||||
trace!("BPF reconfiguration still in progress, skipping further changes");
|
||||
continue;
|
||||
} else if self.last_reconfiguration.elapsed() >= self.reconfiguration_interval {
|
||||
@ -396,13 +396,13 @@ impl<'a> Scheduler<'a> {
|
||||
})?;
|
||||
trace!("Assigned {} to {}", cgroup, cell_idx);
|
||||
}
|
||||
self.skel.bss_mut().update_cell_assignment = true;
|
||||
self.skel.maps.bss_data.update_cell_assignment = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn trigger_reconfiguration(&mut self) {
|
||||
trace!("Triggering Reconfiguration");
|
||||
self.skel.bss_mut().user_global_seq += 1;
|
||||
self.skel.maps.bss_data.user_global_seq += 1;
|
||||
}
|
||||
|
||||
/// Iterate through each cg in the cgroupfs, read its load from BPF and
|
||||
@ -639,7 +639,7 @@ impl<'a> Scheduler<'a> {
|
||||
.free(&mut cell.cpu_assignment)
|
||||
.ok_or(anyhow!("No cpus to free"))?;
|
||||
trace!("Freeing {} from Cell {}", freed_cpu, cell_idx);
|
||||
self.skel.bss_mut().cells[*cell_idx as usize].cpus[freed_cpu / 8] &=
|
||||
self.skel.maps.bss_data.cells[*cell_idx as usize].cpus[freed_cpu / 8] &=
|
||||
!(1 << freed_cpu % 8);
|
||||
}
|
||||
}
|
||||
@ -656,10 +656,10 @@ impl<'a> Scheduler<'a> {
|
||||
.ok_or(anyhow!("No cpus to allocate"))?;
|
||||
trace!("Allocating {} to Cell {}", new_cpu, cell_idx);
|
||||
cell.cpu_assignment.set(new_cpu, true);
|
||||
self.skel.bss_mut().cells[*cell_idx as usize].cpus[new_cpu / 8] |= 1 << new_cpu % 8;
|
||||
self.skel.maps.bss_data.cells[*cell_idx as usize].cpus[new_cpu / 8] |= 1 << new_cpu % 8;
|
||||
}
|
||||
}
|
||||
for (cell_idx, cell) in self.skel.bss().cells.iter().enumerate() {
|
||||
for (cell_idx, cell) in self.skel.maps.bss_data.cells.iter().enumerate() {
|
||||
trace!("Cell {} Cpumask {:X?}", cell_idx, cell.cpus);
|
||||
}
|
||||
Ok(())
|
||||
@ -738,8 +738,8 @@ impl<'a> Scheduler<'a> {
|
||||
cell1.cgroups.append(&mut cell2.cgroups);
|
||||
// XXX: I don't love manipulating the CPU mask here and not in assign_cpus
|
||||
for cpu in cell2.cpu_assignment.iter_ones() {
|
||||
self.skel.bss_mut().cells[merge.cell1 as usize].cpus[cpu / 8] |= 1 << cpu % 8;
|
||||
self.skel.bss_mut().cells[merge.cell2 as usize].cpus[cpu / 8] &= !(1 << cpu % 8);
|
||||
self.skel.maps.bss_data.cells[merge.cell1 as usize].cpus[cpu / 8] |= 1 << cpu % 8;
|
||||
self.skel.maps.bss_data.cells[merge.cell2 as usize].cpus[cpu / 8] &= !(1 << cpu % 8);
|
||||
}
|
||||
cell1.cpu_assignment |= cell2.cpu_assignment;
|
||||
cell1.load += cell2.load;
|
||||
@ -827,8 +827,9 @@ fn main() -> Result<()> {
|
||||
})
|
||||
.context("Error setting Ctrl-C handler")?;
|
||||
|
||||
let mut open_object = MaybeUninit::uninit();
|
||||
loop {
|
||||
let mut sched = Scheduler::init(&opts)?;
|
||||
let mut sched = Scheduler::init(&opts, &mut open_object)?;
|
||||
if !sched.run(shutdown.clone())?.should_restart() {
|
||||
break;
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ license = "GPL-2.0-only"
|
||||
anyhow = "1.0.65"
|
||||
plain = "0.2.3"
|
||||
ctrlc = { version = "3.1", features = ["termination"] }
|
||||
libbpf-rs = "0.23.1"
|
||||
libbpf-rs = "0.24"
|
||||
libc = "0.2.137"
|
||||
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
|
||||
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }
|
||||
|
@ -11,6 +11,9 @@ use bpf::*;
|
||||
|
||||
use scx_utils::UserExitInfo;
|
||||
|
||||
use libbpf_rs::OpenObject;
|
||||
|
||||
use std::mem::MaybeUninit;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
@ -24,8 +27,9 @@ struct Scheduler<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Scheduler<'a> {
|
||||
fn init() -> Result<Self> {
|
||||
fn init(open_object: &'a mut MaybeUninit<OpenObject>) -> Result<Self> {
|
||||
let bpf = BpfScheduler::init(
|
||||
open_object,
|
||||
0, // exit_dump_len (buffer size of exit info)
|
||||
false, // partial (include all tasks if false)
|
||||
5000, // slice_ns (default task time slice)
|
||||
@ -141,8 +145,9 @@ fn main() -> Result<()> {
|
||||
shutdown_clone.store(true, Ordering::Relaxed);
|
||||
})?;
|
||||
|
||||
let mut open_object = MaybeUninit::uninit();
|
||||
loop {
|
||||
let mut sched = Scheduler::init()?;
|
||||
let mut sched = Scheduler::init(&mut open_object)?;
|
||||
if !sched.run(shutdown.clone())?.should_restart() {
|
||||
break;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ plain = "0.2.3"
|
||||
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
|
||||
ctrlc = { version = "3.1", features = ["termination"] }
|
||||
fb_procfs = "0.7"
|
||||
libbpf-rs = "0.23.1"
|
||||
libbpf-rs = "0.24.1"
|
||||
libc = "0.2.137"
|
||||
log = "0.4.17"
|
||||
ordered-float = "3.4.0"
|
||||
|
@ -17,7 +17,7 @@ use std::thread;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use std::mem::MaybeUninit;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
@ -30,6 +30,7 @@ use std::path::Path;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use libbpf_rs::OpenObject;
|
||||
use log::info;
|
||||
use log::warn;
|
||||
|
||||
@ -275,13 +276,17 @@ struct Scheduler<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Scheduler<'a> {
|
||||
fn init(opts: &Opts) -> Result<Self> {
|
||||
fn init(
|
||||
opts: &Opts,
|
||||
open_object: &'a mut MaybeUninit<OpenObject>,
|
||||
) -> Result<Self> {
|
||||
// Initialize core mapping topology.
|
||||
let topo = Topology::new().expect("Failed to build host topology");
|
||||
let topo_map = TopologyMap::new(&topo).expect("Failed to generate topology map");
|
||||
|
||||
// Low-level BPF connector.
|
||||
let bpf = BpfScheduler::init(
|
||||
open_object,
|
||||
opts.exit_dump_len,
|
||||
opts.partial,
|
||||
opts.slice_us,
|
||||
@ -693,8 +698,9 @@ fn main() -> Result<()> {
|
||||
})
|
||||
.context("Error setting Ctrl-C handler")?;
|
||||
|
||||
let mut open_object = MaybeUninit::uninit();
|
||||
loop {
|
||||
let mut sched = Scheduler::init(&opts)?;
|
||||
let mut sched = Scheduler::init(&opts, &mut open_object)?;
|
||||
// Start the scheduler.
|
||||
if !sched.run(shutdown.clone())?.should_restart() {
|
||||
break;
|
||||
|
@ -11,7 +11,7 @@ anyhow = "1.0.65"
|
||||
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
|
||||
ctrlc = { version = "3.1", features = ["termination"] }
|
||||
fb_procfs = "0.7"
|
||||
libbpf-rs = "0.23.1"
|
||||
libbpf-rs = "0.24.1"
|
||||
libc = "0.2.137"
|
||||
log = "0.4.17"
|
||||
ordered-float = "3.4.0"
|
||||
|
@ -143,6 +143,7 @@ use std::sync::Arc;
|
||||
use anyhow::bail;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use libbpf_rs::MapCore as _;
|
||||
use log::debug;
|
||||
use log::warn;
|
||||
use ordered_float::OrderedFloat;
|
||||
@ -368,7 +369,7 @@ impl Domain {
|
||||
let dom_id: u32 = other.id.try_into().unwrap();
|
||||
|
||||
// Ask BPF code to execute the migration.
|
||||
if let Err(e) = skel.maps_mut().lb_data().update(
|
||||
if let Err(e) = skel.maps.lb_data.update(
|
||||
&cpid,
|
||||
&dom_id.to_ne_bytes(),
|
||||
libbpf_rs::MapFlags::NO_EXIST,
|
||||
@ -603,9 +604,8 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
|
||||
fn calculate_load_avgs(&mut self) -> Result<LoadLedger> {
|
||||
const NUM_BUCKETS: u64 = bpf_intf::consts_LB_LOAD_BUCKETS as u64;
|
||||
let now_mono = now_monotonic();
|
||||
let load_half_life = self.skel.rodata().load_half_life;
|
||||
let maps = self.skel.maps();
|
||||
let dom_data = maps.dom_data();
|
||||
let load_half_life = self.skel.maps.rodata_data.load_half_life;
|
||||
let dom_data = &self.skel.maps.dom_data;
|
||||
|
||||
let mut aggregator = LoadAggregator::new(self.dom_group.weight(), !self.lb_apply_weight.clone());
|
||||
|
||||
@ -680,20 +680,19 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
|
||||
|
||||
// Read active_pids and update read_idx and gen.
|
||||
const MAX_PIDS: u64 = bpf_intf::consts_MAX_DOM_ACTIVE_PIDS as u64;
|
||||
let active_pids = &mut self.skel.bss_mut().dom_active_pids[dom.id];
|
||||
let active_pids = &mut self.skel.maps.bss_data.dom_active_pids[dom.id];
|
||||
let (mut ridx, widx) = (active_pids.read_idx, active_pids.write_idx);
|
||||
active_pids.read_idx = active_pids.write_idx;
|
||||
active_pids.gen += 1;
|
||||
|
||||
let active_pids = &self.skel.bss().dom_active_pids[dom.id];
|
||||
let active_pids = &self.skel.maps.bss_data.dom_active_pids[dom.id];
|
||||
if widx - ridx > MAX_PIDS {
|
||||
ridx = widx - MAX_PIDS;
|
||||
}
|
||||
|
||||
// Read task_ctx and load.
|
||||
let load_half_life = self.skel.rodata().load_half_life;
|
||||
let maps = self.skel.maps();
|
||||
let task_data = maps.task_data();
|
||||
let load_half_life = self.skel.maps.rodata_data.load_half_life;
|
||||
let task_data = &self.skel.maps.task_data;
|
||||
let now_mono = now_monotonic();
|
||||
|
||||
for idx in ridx..widx {
|
||||
@ -1093,7 +1092,7 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
|
||||
}
|
||||
|
||||
fn perform_balancing(&mut self) -> Result<()> {
|
||||
clear_map(self.skel.maps().lb_data());
|
||||
clear_map(&self.skel.maps.lb_data);
|
||||
|
||||
// First balance load between the NUMA nodes. Balancing here has a
|
||||
// higher cost function than balancing between domains inside of NUMA
|
||||
|
@ -16,6 +16,7 @@ pub mod load_balance;
|
||||
use load_balance::LoadBalancer;
|
||||
use load_balance::NumaStat;
|
||||
|
||||
use std::mem::MaybeUninit;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
@ -31,6 +32,8 @@ use anyhow::bail;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use libbpf_rs::MapCore as _;
|
||||
use libbpf_rs::OpenObject;
|
||||
use libbpf_rs::skel::OpenSkel;
|
||||
use libbpf_rs::skel::Skel;
|
||||
use libbpf_rs::skel::SkelBuilder;
|
||||
@ -305,13 +308,16 @@ struct Scheduler<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Scheduler<'a> {
|
||||
fn init(opts: &Opts) -> Result<Self> {
|
||||
fn init(
|
||||
opts: &Opts,
|
||||
open_object: &'a mut MaybeUninit<OpenObject>,
|
||||
) -> Result<Self> {
|
||||
// Open the BPF prog first for verification.
|
||||
let mut skel_builder = BpfSkelBuilder::default();
|
||||
skel_builder.obj_builder.debug(opts.verbose > 0);
|
||||
init_libbpf_logging(None);
|
||||
info!("Running scx_rusty (build ID: {})", *build_id::SCX_FULL_VERSION);
|
||||
let mut skel = scx_ops_open!(skel_builder, rusty).unwrap();
|
||||
let mut skel = scx_ops_open!(skel_builder, open_object, rusty).unwrap();
|
||||
|
||||
// Initialize skel according to @opts.
|
||||
let top = Arc::new(Topology::new()?);
|
||||
@ -334,21 +340,21 @@ impl<'a> Scheduler<'a> {
|
||||
);
|
||||
}
|
||||
|
||||
skel.rodata_mut().nr_nodes = domains.nr_nodes() as u32;
|
||||
skel.rodata_mut().nr_doms = domains.nr_doms() as u32;
|
||||
skel.rodata_mut().nr_cpu_ids = top.nr_cpu_ids() as u32;
|
||||
skel.maps.rodata_data.nr_nodes = domains.nr_nodes() as u32;
|
||||
skel.maps.rodata_data.nr_doms = domains.nr_doms() as u32;
|
||||
skel.maps.rodata_data.nr_cpu_ids = top.nr_cpu_ids() as u32;
|
||||
|
||||
// Any CPU with dom > MAX_DOMS is considered offline by default. There
|
||||
// are a few places in the BPF code where we skip over offlined CPUs
|
||||
// (e.g. when initializing or refreshing tune params), and elsewhere the
|
||||
// scheduler will error if we try to schedule from them.
|
||||
for cpu in 0..top.nr_cpu_ids() {
|
||||
skel.rodata_mut().cpu_dom_id_map[cpu] = u32::MAX;
|
||||
skel.maps.rodata_data.cpu_dom_id_map[cpu] = u32::MAX;
|
||||
}
|
||||
|
||||
for (id, dom) in domains.doms().iter() {
|
||||
for cpu in dom.mask().into_iter() {
|
||||
skel.rodata_mut().cpu_dom_id_map[cpu] = id
|
||||
skel.maps.rodata_data.cpu_dom_id_map[cpu] = id
|
||||
.clone()
|
||||
.try_into()
|
||||
.expect("Domain ID could not fit into 32 bits");
|
||||
@ -364,17 +370,17 @@ impl<'a> Scheduler<'a> {
|
||||
}
|
||||
|
||||
let raw_numa_slice = numa_mask.as_raw_slice();
|
||||
let node_cpumask_slice = &mut skel.rodata_mut().numa_cpumasks[numa];
|
||||
let node_cpumask_slice = &mut skel.maps.rodata_data.numa_cpumasks[numa];
|
||||
let (left, _) = node_cpumask_slice.split_at_mut(raw_numa_slice.len());
|
||||
left.clone_from_slice(raw_numa_slice);
|
||||
info!("NUMA[{:02}] mask= {}", numa, numa_mask);
|
||||
|
||||
for dom in node_domains.iter() {
|
||||
let raw_dom_slice = dom.mask_slice();
|
||||
let dom_cpumask_slice = &mut skel.rodata_mut().dom_cpumasks[dom.id()];
|
||||
let dom_cpumask_slice = &mut skel.maps.rodata_data.dom_cpumasks[dom.id()];
|
||||
let (left, _) = dom_cpumask_slice.split_at_mut(raw_dom_slice.len());
|
||||
left.clone_from_slice(raw_dom_slice);
|
||||
skel.rodata_mut().dom_numa_id_map[dom.id()] =
|
||||
skel.maps.rodata_data.dom_numa_id_map[dom.id()] =
|
||||
numa.try_into().expect("NUMA ID could not fit into 32 bits");
|
||||
|
||||
info!(" DOM[{:02}] mask= {}", dom.id(), dom.mask());
|
||||
@ -386,14 +392,14 @@ impl<'a> Scheduler<'a> {
|
||||
}
|
||||
skel.struct_ops.rusty_mut().exit_dump_len = opts.exit_dump_len;
|
||||
|
||||
skel.rodata_mut().load_half_life = (opts.load_half_life * 1000000000.0) as u32;
|
||||
skel.rodata_mut().kthreads_local = opts.kthreads_local;
|
||||
skel.rodata_mut().fifo_sched = opts.fifo_sched;
|
||||
skel.rodata_mut().greedy_threshold = opts.greedy_threshold;
|
||||
skel.rodata_mut().greedy_threshold_x_numa = opts.greedy_threshold_x_numa;
|
||||
skel.rodata_mut().direct_greedy_numa = opts.direct_greedy_numa;
|
||||
skel.rodata_mut().mempolicy_affinity = opts.mempolicy_affinity;
|
||||
skel.rodata_mut().debug = opts.verbose as u32;
|
||||
skel.maps.rodata_data.load_half_life = (opts.load_half_life * 1000000000.0) as u32;
|
||||
skel.maps.rodata_data.kthreads_local = opts.kthreads_local;
|
||||
skel.maps.rodata_data.fifo_sched = opts.fifo_sched;
|
||||
skel.maps.rodata_data.greedy_threshold = opts.greedy_threshold;
|
||||
skel.maps.rodata_data.greedy_threshold_x_numa = opts.greedy_threshold_x_numa;
|
||||
skel.maps.rodata_data.direct_greedy_numa = opts.direct_greedy_numa;
|
||||
skel.maps.rodata_data.mempolicy_affinity = opts.mempolicy_affinity;
|
||||
skel.maps.rodata_data.debug = opts.verbose as u32;
|
||||
|
||||
// Attach.
|
||||
let mut skel = scx_ops_load!(skel, rusty, uei)?;
|
||||
@ -487,8 +493,7 @@ impl<'a> Scheduler<'a> {
|
||||
}
|
||||
|
||||
fn read_bpf_stats(&mut self) -> Result<Vec<u64>> {
|
||||
let mut maps = self.skel.maps_mut();
|
||||
let stats_map = maps.stats();
|
||||
let stats_map = &mut self.skel.maps.stats;
|
||||
let mut stats: Vec<u64> = Vec::new();
|
||||
let zero_vec =
|
||||
vec![vec![0u8; stats_map.value_size() as usize]; self.top.nr_cpus_possible()];
|
||||
@ -686,8 +691,9 @@ fn main() -> Result<()> {
|
||||
.expect("failed to install log recorder");
|
||||
}
|
||||
|
||||
let mut open_object = MaybeUninit::uninit();
|
||||
loop {
|
||||
let mut sched = Scheduler::init(&opts)?;
|
||||
let mut sched = Scheduler::init(&opts, &mut open_object)?;
|
||||
if !sched.run(shutdown.clone())?.should_restart() {
|
||||
break;
|
||||
}
|
||||
|
@ -160,7 +160,7 @@ impl Tuner {
|
||||
}
|
||||
}
|
||||
|
||||
let ti = &mut skel.bss_mut().tune_input;
|
||||
let ti = &mut skel.maps.bss_data.tune_input;
|
||||
let write_to_bpf = |target: &mut [u64; 8], mask: &Cpumask| {
|
||||
let raw_slice = mask.as_raw_slice();
|
||||
let (left, _) = target.split_at_mut(raw_slice.len());
|
||||
|
Loading…
Reference in New Issue
Block a user