Merge pull request #207 from sched-ext/api-updates

scx: Apply API updates from sched_ext
This commit is contained in:
Tejun Heo 2024-04-02 14:26:42 -10:00 committed by GitHub
commit a60737a6bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
47 changed files with 127557 additions and 551256 deletions

View File

@ -1,5 +1,5 @@
project('sched_ext schedulers', 'c',
version: '0.1.7',
version: '0.1.8',
license: 'GPL-2.0')
if meson.version().version_compare('<1.2')
@ -104,7 +104,7 @@ if should_build_libbpf
libbpf_local_h = ['.@0@/libbpf/src/usr/include'.format(meson.current_build_dir().replace(meson.current_source_dir(), ''))]
message('Fetching libbpf repo')
libbpf_commit = '4f875865b772c4f534bc0a665bbd988193825bd4'
libbpf_commit = '6d3595d215b014d3eddb88038d686e1c20781534'
run_command(fetch_libbpf, meson.current_build_dir(), libbpf_commit, check: true)
make_jobs = 1
@ -156,7 +156,7 @@ endif
if should_build_bpftool
message('Fetching bpftool repo')
bpftool_commit = '8328f373c0ef27fda14f12e67f1d6ed882dd2e81'
bpftool_commit = '20ce6933869b70bacfdd0dd1a8399199290bf8ff'
run_command(fetch_bpftool, meson.current_build_dir(), bpftool_commit, check: true)
bpftool_target = custom_target('bpftool_target',

View File

@ -1,6 +1,6 @@
[package]
name = "scx_rustland_core"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
authors = ["Andrea Righi <andrea.righi@canonical.com>"]
license = "GPL-2.0-only"
@ -15,12 +15,12 @@ include = [
[dependencies]
anyhow = "1.0"
libbpf-rs = "0.22.0"
libbpf-rs = "0.23"
libc = "0.2.137"
buddy-alloc = "0.5.1"
scx_utils = { path = "../scx_utils", version = "0.6" }
scx_utils = { path = "../scx_utils", version = "0.7" }
[build-dependencies]
tar = "0.4"
walkdir = "2.4"
scx_utils = { path = "../scx_utils", version = "0.6" }
scx_utils = { path = "../scx_utils", version = "0.7" }

View File

@ -10,12 +10,14 @@ use anyhow::Context;
use anyhow::Result;
use libbpf_rs::skel::OpenSkel as _;
use libbpf_rs::skel::Skel as _;
use libbpf_rs::skel::SkelBuilder as _;
use libc::{sched_param, sched_setscheduler};
use scx_utils::compat;
use scx_utils::init_libbpf_logging;
use scx_utils::scx_ops_attach;
use scx_utils::scx_ops_load;
use scx_utils::uei_exited;
use scx_utils::uei_report;
@ -67,10 +69,10 @@ pub struct QueuedTask {
// Task queued for dispatching to the BPF component (see bpf_intf::dispatched_task_ctx).
#[derive(Debug, PartialEq, Eq, PartialOrd, Clone)]
pub struct DispatchedTask {
pid: i32, // pid that uniquely identifies a task
cpu: i32, // target CPU selected by the scheduler
slice_ns: u64, // time slice assigned to the task (0 = default)
cpumask_cnt: u64, // cpumask generation counter (private)
pid: i32, // pid that uniquely identifies a task
cpu: i32, // target CPU selected by the scheduler
slice_ns: u64, // time slice assigned to the task (0 = default)
cpumask_cnt: u64, // cpumask generation counter (private)
}
impl DispatchedTask {
@ -83,7 +85,7 @@ impl DispatchedTask {
pid: task.pid,
cpu: task.cpu,
cpumask_cnt: task.cpumask_cnt,
slice_ns: 0 // use default time slice
slice_ns: 0, // use default time slice
}
}
@ -177,6 +179,7 @@ impl<'cb> BpfScheduler<'cb> {
slice_us: u64,
nr_cpus_online: i32,
partial: bool,
exit_dump_len: u32,
full_user: bool,
debug: bool,
) -> Result<Self> {
@ -231,6 +234,11 @@ impl<'cb> BpfScheduler<'cb> {
skel.rodata_mut().num_possible_cpus = nr_cpus_online;
// Set scheduler options (defined in the BPF part).
if partial {
skel.struct_ops.rustland_mut().flags |= *compat::SCX_OPS_SWITCH_PARTIAL;
}
skel.struct_ops.rustland_mut().exit_dump_len = exit_dump_len;
skel.bss_mut().usersched_pid = std::process::id();
skel.rodata_mut().slice_ns = slice_us * 1000;
skel.rodata_mut().switch_partial = partial;
@ -238,14 +246,8 @@ impl<'cb> BpfScheduler<'cb> {
skel.rodata_mut().full_user = full_user;
// Attach BPF scheduler.
let mut skel = skel.load().context("Failed to load BPF program")?;
skel.attach().context("Failed to attach BPF program")?;
let struct_ops = Some(
skel.maps_mut()
.rustland()
.attach_struct_ops()
.context("Failed to attach struct ops")?,
);
let mut skel = scx_ops_load!(skel, rustland, uei)?;
let struct_ops = Some(scx_ops_attach!(skel, rustland)?);
// Build the ring buffer of queued tasks.
let binding = skel.maps();
@ -395,13 +397,13 @@ impl<'cb> BpfScheduler<'cb> {
// Read exit code from the BPF part.
pub fn exited(&mut self) -> bool {
uei_exited!(&self.skel.bss().uei)
uei_exited!(&self.skel, uei)
}
// Called on exit to shutdown and report exit message from the BPF part.
pub fn shutdown_and_report(&mut self) -> Result<()> {
self.struct_ops.take();
uei_report!(self.skel.bss().uei)
uei_report!(&self.skel, uei)
}
}

View File

@ -31,7 +31,7 @@
char _license[] SEC("license") = "GPL";
struct user_exit_info uei;
UEI_DEFINE(uei);
/*
* Maximum amount of CPUs supported by this scheduler (this defines the size of
@ -394,7 +394,7 @@ dispatch_task(struct task_struct *p, u64 dsq_id,
p->pid, p->comm, dsq_id);
/* Wake up the target CPU (only if idle) */
__COMPAT_scx_bpf_kick_cpu_IDLE(cpu);
scx_bpf_kick_cpu(cpu, __COMPAT_SCX_KICK_IDLE);
break;
}
}
@ -863,7 +863,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(rustland_init)
if (err)
return err;
if (!switch_partial)
scx_bpf_switch_all();
__COMPAT_scx_bpf_switch_all();
return 0;
}
@ -873,27 +873,25 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(rustland_init)
*/
void BPF_STRUCT_OPS(rustland_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
/*
* Scheduling class declaration.
*/
SEC(".struct_ops.link")
struct sched_ext_ops rustland = {
.select_cpu = (void *)rustland_select_cpu,
.enqueue = (void *)rustland_enqueue,
.dispatch = (void *)rustland_dispatch,
.running = (void *)rustland_running,
.stopping = (void *)rustland_stopping,
.update_idle = (void *)rustland_update_idle,
.set_cpumask = (void *)rustland_set_cpumask,
.cpu_release = (void *)rustland_cpu_release,
.init_task = (void *)rustland_init_task,
.exit_task = (void *)rustland_exit_task,
.init = (void *)rustland_init,
.exit = (void *)rustland_exit,
.flags = SCX_OPS_ENQ_LAST | SCX_OPS_KEEP_BUILTIN_IDLE,
.timeout_ms = 5000,
.name = "rustland",
};
SCX_OPS_DEFINE(rustland,
.select_cpu = (void *)rustland_select_cpu,
.enqueue = (void *)rustland_enqueue,
.dispatch = (void *)rustland_dispatch,
.running = (void *)rustland_running,
.stopping = (void *)rustland_stopping,
.update_idle = (void *)rustland_update_idle,
.set_cpumask = (void *)rustland_set_cpumask,
.cpu_release = (void *)rustland_cpu_release,
.init_task = (void *)rustland_init_task,
.exit_task = (void *)rustland_exit_task,
.init = (void *)rustland_init,
.exit = (void *)rustland_exit,
.flags = SCX_OPS_ENQ_LAST | SCX_OPS_KEEP_BUILTIN_IDLE,
.timeout_ms = 5000,
.name = "rustland");

View File

@ -1,6 +1,6 @@
[package]
name = "scx_utils"
version = "0.6.0"
version = "0.7.0"
edition = "2021"
authors = ["Tejun Heo <tj@kernel.org>"]
license = "GPL-2.0-only"
@ -16,10 +16,11 @@ bindgen = ">=0.68, <0.70"
glob = "0.3"
hex = "0.4.3"
lazy_static = "1.4"
libbpf-cargo = "0.22"
libbpf-rs = "0.22.0"
buddy-alloc = "0.5.1"
log = "0.4.17"
libbpf-cargo = "0.23"
libbpf-rs = "0.23"
buddy-alloc = "0.5"
log = "0.4"
paste = "1.0"
regex = "1.10"
sscanf = "0.4"
tar = "0.4"

View File

@ -478,14 +478,6 @@ impl BpfBuilder {
self
}
fn cflags_string(&self) -> String {
self.cflags
.iter()
.map(|x| x.as_str())
.collect::<Vec<&str>>()
.join(" ")
}
fn bindgen_bpf_intf(&self, deps: &mut BTreeSet<String>) -> Result<()> {
let (input, output) = match &self.intf_input_output {
Some(pair) => pair,
@ -538,7 +530,7 @@ impl BpfBuilder {
.source(input)
.obj(&obj)
.clang(&self.clang.0)
.clang_args(self.cflags_string())
.clang_args(&self.cflags)
.build_and_generate(&skel_path)?;
match &self.skel_deps {

View File

@ -45,6 +45,7 @@ impl Builder {
let bindings = bindgen::Builder::default()
.header("bindings.h")
.allowlist_type("scx_exit_kind")
.allowlist_type("scx_internal_consts")
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
.generate()
.expect("Unable to generate bindings");

View File

@ -0,0 +1,180 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2.
use anyhow::{anyhow, bail, Context, Result};
use libbpf_rs::libbpf_sys::*;
use std::ffi::c_void;
use std::ffi::CStr;
use std::ffi::CString;
use std::mem::size_of;
use std::slice::from_raw_parts;
lazy_static::lazy_static! {
pub static ref SCX_OPS_SWITCH_PARTIAL: u64 =
read_enum("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL").unwrap_or(0);
}
fn load_vmlinux_btf() -> &'static mut btf {
let btf = unsafe { btf__load_vmlinux_btf() };
if btf.is_null() {
panic!("btf__load_vmlinux_btf() returned NULL");
}
unsafe { &mut *btf }
}
lazy_static::lazy_static! {
static ref VMLINUX_BTF: &'static mut btf = load_vmlinux_btf();
}
fn btf_kind(t: &btf_type) -> u32 {
(t.info >> 24) & 0x1f
}
fn btf_vlen(t: &btf_type) -> u32 {
t.info & 0xffff
}
fn btf_type_plus_1(t: &btf_type) -> *const c_void {
let ptr_val = t as *const btf_type as usize;
(ptr_val + size_of::<btf_type>()) as *const c_void
}
fn btf_enum(t: &btf_type) -> &[btf_enum] {
let ptr = btf_type_plus_1(t);
unsafe { from_raw_parts(ptr as *const btf_enum, btf_vlen(t) as usize) }
}
fn btf_enum64(t: &btf_type) -> &[btf_enum64] {
let ptr = btf_type_plus_1(t);
unsafe { from_raw_parts(ptr as *const btf_enum64, btf_vlen(t) as usize) }
}
fn btf_members(t: &btf_type) -> &[btf_member] {
let ptr = btf_type_plus_1(t);
unsafe { from_raw_parts(ptr as *const btf_member, btf_vlen(t) as usize) }
}
fn btf_name_str_by_offset(btf: &btf, name_off: u32) -> Result<&str> {
let n = unsafe { btf__name_by_offset(btf, name_off) };
if n.is_null() {
bail!("btf__name_by_offset() returned NULL");
}
Ok(unsafe { CStr::from_ptr(n) }
.to_str()
.with_context(|| format!("Failed to convert {:?} to string", n))?)
}
pub fn read_enum(type_name: &str, name: &str) -> Result<u64> {
let btf: &btf = *VMLINUX_BTF;
let type_name = CString::new(type_name).unwrap();
let tid = unsafe { btf__find_by_name(btf, type_name.as_ptr()) };
if tid < 0 {
bail!("type {:?} doesn't exist, ret={}", type_name, tid);
}
let t = unsafe { btf__type_by_id(btf, tid as _) };
if t.is_null() {
bail!("btf__type_by_id({}) returned NULL", tid);
}
let t = unsafe { &*t };
match btf_kind(t) {
BTF_KIND_ENUM => {
for e in btf_enum(t).iter() {
if btf_name_str_by_offset(btf, e.name_off)? == name {
return Ok(e.val as u64);
}
}
}
BTF_KIND_ENUM64 => {
for e in btf_enum64(t).iter() {
if btf_name_str_by_offset(btf, e.name_off)? == name {
return Ok(((e.val_hi32 as u64) << 32) | (e.val_lo32) as u64);
}
}
}
_ => (),
}
Err(anyhow!("{:?} doesn't exist in {:?}", name, type_name))
}
pub fn struct_has_field(type_name: &str, field: &str) -> Result<bool> {
let btf: &btf = *VMLINUX_BTF;
let type_name = CString::new(type_name).unwrap();
let tid = unsafe { btf__find_by_name_kind(btf, type_name.as_ptr(), BTF_KIND_STRUCT) };
if tid < 0 {
bail!("type {:?} doesn't exist, ret={}", type_name, tid);
}
let t = unsafe { btf__type_by_id(btf, tid as _) };
if t.is_null() {
bail!("btf__type_by_id({}) returned NULL", tid);
}
let t = unsafe { &*t };
for m in btf_members(t).iter() {
if btf_name_str_by_offset(btf, m.name_off)? == field {
return Ok(true);
}
}
return Ok(false);
}
/// struct sched_ext_ops can change over time. If
/// compat.bpf.h::SCX_OPS_DEFINE() is used to define ops and scx_ops_load!()
/// and scx_ops_attach!() are used to load and attach it, backward
/// compatibility is automatically maintained where reasonable.
///
/// - sched_ext_ops.exit_dump_len was added later. On kernels which don't
/// support it, the value is ignored and a warning is triggered if the value
/// is requested to be non-zero.
#[macro_export]
macro_rules! scx_ops_load {
($skel: expr, $ops: ident, $uei: ident) => {{
scx_utils::paste! {
scx_utils::uei_set_size!($skel, $ops, $uei);
let ops = $skel.struct_ops.[<$ops _mut>]();
let has_field = scx_utils::compat::struct_has_field("sched_ext_ops", "exit_dump_len")?;
if !has_field && ops.exit_dump_len != 0 {
scx_utils::warn!("Kernel doesn't support setting exit dump len");
ops.exit_dump_len = 0;
}
$skel.load().context("Failed to load BPF program")
}
}};
}
/// Must be used together with scx_ops_load!(). See there.
#[macro_export]
macro_rules! scx_ops_attach {
($skel: expr, $ops: ident) => {{
$skel
.maps_mut()
.$ops()
.attach_struct_ops()
.context("Failed to attach struct ops")
}};
}
#[cfg(test)]
mod tests {
#[test]
fn test_read_enum() {
assert_eq!(super::read_enum("pid_type", "PIDTYPE_TGID").unwrap(), 1);
}
#[test]
fn test_struct_has_field() {
assert!(super::struct_has_field("task_struct", "flags").unwrap());
assert!(!super::struct_has_field("task_struct", "NO_SUCH_FIELD").unwrap());
assert!(super::struct_has_field("NO_SUCH_STRUCT", "NO_SUCH_FIELD").is_err());
}
}

View File

@ -30,6 +30,9 @@
//! Utility modules which can be useful for userspace component of sched_ext
//! schedulers.
pub use paste::paste;
pub use log::warn;
mod bindings;
mod bpf_builder;
@ -38,21 +41,26 @@ pub use bpf_builder::BpfBuilder;
mod builder;
pub use builder::Builder;
pub mod ravg;
mod user_exit_info;
pub use user_exit_info::ScxExitKind;
pub use user_exit_info::ScxInternalConsts;
pub use user_exit_info::UeiDumpPtr;
pub use user_exit_info::UserExitInfo;
pub use user_exit_info::UEI_DUMP_PTR_MUTEX;
pub mod compat;
mod libbpf_logger;
pub use libbpf_logger::init_libbpf_logging;
mod user_exit_info;
pub use user_exit_info::UserExitInfo;
pub use user_exit_info::ScxExitKind;
pub mod ravg;
mod topology;
pub use topology::Topology;
pub use topology::Cpu;
pub use topology::Core;
pub use topology::Cache;
pub use topology::Core;
pub use topology::Cpu;
pub use topology::Node;
pub use topology::Topology;
mod cpumask;
pub use cpumask::Cpumask;

View File

@ -7,6 +7,16 @@ use anyhow::bail;
use anyhow::Result;
use std::ffi::CStr;
use std::os::raw::c_char;
use std::sync::Mutex;
pub struct UeiDumpPtr {
pub ptr: *const c_char,
}
unsafe impl Send for UeiDumpPtr {}
pub static UEI_DUMP_PTR_MUTEX: Mutex<UeiDumpPtr> = Mutex::new(UeiDumpPtr {
ptr: std::ptr::null(),
});
pub enum ScxExitKind {
None = bindings::scx_exit_kind_SCX_EXIT_NONE as isize,
@ -18,29 +28,63 @@ pub enum ScxExitKind {
ErrorStall = bindings::scx_exit_kind_SCX_EXIT_ERROR_STALL as isize,
}
pub enum ScxInternalConsts {
ExitDumpDflLen = bindings::scx_internal_consts_SCX_EXIT_DUMP_DFL_LEN as isize,
}
/// Takes a reference to C struct user_exit_info and reads it into
/// UserExitInfo. See UserExitInfo.
#[macro_export]
macro_rules! uei_read {
($bpf_uei:expr) => {{
{
let bpf_uei = $bpf_uei;
($skel: expr, $uei:ident) => {{
scx_utils::paste! {
let bpf_uei = $skel.data().$uei;
let bpf_dump = scx_utils::UEI_DUMP_PTR_MUTEX.lock().unwrap().ptr;
scx_utils::UserExitInfo::new(
&bpf_uei.kind as *const _,
bpf_uei.reason.as_ptr() as *const _,
bpf_uei.msg.as_ptr() as *const _,
bpf_uei.dump.as_ptr() as *const _,
bpf_dump,
)
}
}};
}
/// Resize debug dump area according to ops.exit_dump_len. If this macro is
/// not called, debug dump area is not allocated and debug dump won't be
/// printed out.
#[macro_export]
macro_rules! uei_set_size {
($skel: expr, $ops: ident, $uei:ident) => {{
scx_utils::paste! {
let len = match $skel.struct_ops.$ops().exit_dump_len {
0 => scx_utils::ScxInternalConsts::ExitDumpDflLen as u32,
v => v,
};
$skel.rodata_mut().[<$uei _dump_len>] = len;
$skel.maps_mut().[<data_ $uei _dump>]().set_value_size(len).unwrap();
let mut ptr = scx_utils::UEI_DUMP_PTR_MUTEX.lock().unwrap();
*ptr = scx_utils::UeiDumpPtr { ptr:
$skel
.maps()
.[<data_ $uei _dump>]()
.initial_value()
.unwrap()
.as_ptr() as *const _,
};
}
}};
}
/// Takes a reference to C struct user_exit_info and test whether the BPF
/// scheduler has exited. See UserExitInfo.
#[macro_export]
macro_rules! uei_exited {
($bpf_uei:expr) => {{
(unsafe { std::ptr::read_volatile(&$bpf_uei.kind as *const _) } != 0)
($skel: expr, $uei:ident) => {{
let bpf_uei = $skel.data().uei;
(unsafe { std::ptr::read_volatile(&bpf_uei.kind as *const _) } != 0)
}};
}
@ -48,8 +92,8 @@ macro_rules! uei_exited {
/// UserExitInfo::report() on it. See UserExitInfo.
#[macro_export]
macro_rules! uei_report {
($bpf_uei:expr) => {{
scx_utils::uei_read!($bpf_uei).report()
($skel: expr, $uei:ident) => {{
scx_utils::uei_read!($skel, $uei).report()
}};
}
@ -78,7 +122,7 @@ impl UserExitInfo {
) -> Self {
let kind = unsafe { std::ptr::read_volatile(kind_ptr) };
let (reason, msg, dump) = (
let (reason, msg) = (
Some(
unsafe { CStr::from_ptr(reason_ptr) }
.to_str()
@ -93,14 +137,19 @@ impl UserExitInfo {
.to_string(),
)
.filter(|s| !s.is_empty()),
);
let dump = if dump_ptr.is_null() {
None
} else {
Some(
unsafe { CStr::from_ptr(dump_ptr) }
.to_str()
.expect("Failed to convert msg to string")
.to_string(),
)
.filter(|s| !s.is_empty()),
);
.filter(|s| !s.is_empty())
};
Self {
kind,
@ -118,12 +167,12 @@ impl UserExitInfo {
return Ok(());
}
if let Some(dump) = &self.dump {
eprintln!("\nDEBUG DUMP");
eprintln!("================================================================================\n");
eprintln!("{}", dump);
eprintln!("================================================================================\n");
}
if let Some(dump) = &self.dump {
eprintln!("\nDEBUG DUMP");
eprintln!("================================================================================\n");
eprintln!("{}", dump);
eprintln!("================================================================================\n");
}
let why = match (&self.reason, &self.msg) {
(Some(reason), None) => format!("EXIT: {}", reason),

View File

@ -55,7 +55,6 @@ enum {
TIMER_INTERVAL_NS = 1 * MS_TO_NS,
};
const volatile bool switch_partial;
const volatile s32 central_cpu;
const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */
const volatile u64 slice_ns = SCX_SLICE_DFL;
@ -65,7 +64,7 @@ u64 nr_total, nr_locals, nr_queued, nr_lost_pids;
u64 nr_timers, nr_dispatches, nr_mismatches, nr_retries;
u64 nr_overflows;
struct user_exit_info uei;
UEI_DEFINE(uei);
struct {
__uint(type, BPF_MAP_TYPE_QUEUE);
@ -176,7 +175,7 @@ static bool dispatch_to_cpu(s32 cpu)
scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_INF, 0);
if (cpu != central_cpu)
__COMPAT_scx_bpf_kick_cpu_IDLE(cpu);
scx_bpf_kick_cpu(cpu, __COMPAT_SCX_KICK_IDLE);
bpf_task_release(p);
return true;
@ -306,9 +305,6 @@ int BPF_STRUCT_OPS_SLEEPABLE(central_init)
struct bpf_timer *timer;
int ret;
if (!switch_partial)
scx_bpf_switch_all();
ret = scx_bpf_create_dsq(FALLBACK_DSQ_ID, -1);
if (ret)
return ret;
@ -344,24 +340,22 @@ int BPF_STRUCT_OPS_SLEEPABLE(central_init)
void BPF_STRUCT_OPS(central_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops central_ops = {
/*
* We are offloading all scheduling decisions to the central CPU and
* thus being the last task on a given CPU doesn't mean anything
* special. Enqueue the last tasks like any other tasks.
*/
.flags = SCX_OPS_ENQ_LAST,
SCX_OPS_DEFINE(central_ops,
/*
* We are offloading all scheduling decisions to the central CPU
* and thus being the last task on a given CPU doesn't mean
* anything special. Enqueue the last tasks like any other tasks.
*/
.flags = SCX_OPS_ENQ_LAST,
.select_cpu = (void *)central_select_cpu,
.enqueue = (void *)central_enqueue,
.dispatch = (void *)central_dispatch,
.running = (void *)central_running,
.stopping = (void *)central_stopping,
.init = (void *)central_init,
.exit = (void *)central_exit,
.name = "central",
};
.select_cpu = (void *)central_select_cpu,
.enqueue = (void *)central_enqueue,
.dispatch = (void *)central_dispatch,
.running = (void *)central_running,
.stopping = (void *)central_stopping,
.init = (void *)central_init,
.exit = (void *)central_exit,
.name = "central");

View File

@ -20,11 +20,10 @@ const char help_fmt[] =
"\n"
"See the top-level comment in .bpf.c for more details.\n"
"\n"
"Usage: %s [-s SLICE_US] [-c CPU] [-p]\n"
"Usage: %s [-s SLICE_US] [-c CPU]\n"
"\n"
" -s SLICE_US Override slice duration\n"
" -c CPU Override the central CPU (default: 0)\n"
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
" -h Display this help and exit\n";
static volatile int exit_req;
@ -61,9 +60,6 @@ int main(int argc, char **argv)
case 'c':
skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
break;
case 'p':
skel->rodata->switch_partial = true;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
return opt != 'h';
@ -74,7 +70,7 @@ int main(int argc, char **argv)
RESIZE_ARRAY(data, cpu_gimme_task, skel->rodata->nr_cpu_ids);
RESIZE_ARRAY(data, cpu_started_at, skel->rodata->nr_cpu_ids);
SCX_BUG_ON(scx_central__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, central_ops, scx_central, uei);
/*
* Affinitize the loading thread to the central CPU, as:
@ -96,13 +92,12 @@ int main(int argc, char **argv)
skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1);
CPU_FREE(cpuset);
link = bpf_map__attach_struct_ops(skel->maps.central_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
link = SCX_OPS_ATTACH(skel, central_ops);
if (!skel->data->timer_pinned)
printf("WARNING : BPF_F_TIMER_CPU_PIN not available, timer not pinned to central\n");
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
printf("[SEQ %llu]\n", seq++);
printf("total :%10" PRIu64 " local:%10" PRIu64 " queued:%10" PRIu64 " lost:%10" PRIu64 "\n",
skel->bss->nr_total,
@ -121,7 +116,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_central__destroy(skel);
return 0;
}

View File

@ -56,10 +56,9 @@ char _license[] SEC("license") = "GPL";
const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */
const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL;
const volatile bool fifo_sched;
const volatile bool switch_partial;
u64 cvtime_now;
struct user_exit_info uei;
UEI_DEFINE(uei);
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@ -917,34 +916,24 @@ void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p,
p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta;
}
s32 BPF_STRUCT_OPS(fcg_init)
{
if (!switch_partial)
scx_bpf_switch_all();
return 0;
}
void BPF_STRUCT_OPS(fcg_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops flatcg_ops = {
.select_cpu = (void *)fcg_select_cpu,
.enqueue = (void *)fcg_enqueue,
.dispatch = (void *)fcg_dispatch,
.runnable = (void *)fcg_runnable,
.running = (void *)fcg_running,
.stopping = (void *)fcg_stopping,
.quiescent = (void *)fcg_quiescent,
.init_task = (void *)fcg_init_task,
.cgroup_set_weight = (void *)fcg_cgroup_set_weight,
.cgroup_init = (void *)fcg_cgroup_init,
.cgroup_exit = (void *)fcg_cgroup_exit,
.cgroup_move = (void *)fcg_cgroup_move,
.init = (void *)fcg_init,
.exit = (void *)fcg_exit,
.flags = SCX_OPS_CGROUP_KNOB_WEIGHT | SCX_OPS_ENQ_EXITING,
.name = "flatcg",
};
SCX_OPS_DEFINE(flatcg_ops,
.select_cpu = (void *)fcg_select_cpu,
.enqueue = (void *)fcg_enqueue,
.dispatch = (void *)fcg_dispatch,
.runnable = (void *)fcg_runnable,
.running = (void *)fcg_running,
.stopping = (void *)fcg_stopping,
.quiescent = (void *)fcg_quiescent,
.init_task = (void *)fcg_init_task,
.cgroup_set_weight = (void *)fcg_cgroup_set_weight,
.cgroup_init = (void *)fcg_cgroup_init,
.cgroup_exit = (void *)fcg_cgroup_exit,
.cgroup_move = (void *)fcg_cgroup_move,
.exit = (void *)fcg_exit,
.flags = SCX_OPS_CGROUP_KNOB_WEIGHT | SCX_OPS_ENQ_EXITING,
.name = "flatcg");

View File

@ -26,12 +26,11 @@ const char help_fmt[] =
"\n"
"See the top-level comment in .bpf.c for more details.\n"
"\n"
"Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-p]\n"
"Usage: %s [-s SLICE_US] [-i INTERVAL] [-f]\n"
"\n"
" -s SLICE_US Override slice duration\n"
" -i INTERVAL Report interval\n"
" -f Use FIFO scheduling instead of weighted vtime scheduling\n"
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
" -h Display this help and exit\n";
static volatile int exit_req;
@ -150,9 +149,6 @@ int main(int argc, char **argv)
case 'f':
skel->rodata->fifo_sched = true;
break;
case 'p':
skel->rodata->switch_partial = true;
break;
case 'h':
default:
fprintf(stderr, help_fmt, basename(argv[0]));
@ -165,12 +161,10 @@ int main(int argc, char **argv)
(double)intv_ts.tv_sec + (double)intv_ts.tv_nsec / 1000000000.0,
dump_cgrps);
SCX_BUG_ON(scx_flatcg__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, flatcg_ops, scx_flatcg, uei);
link = SCX_OPS_ATTACH(skel, flatcg_ops);
link = bpf_map__attach_struct_ops(skel->maps.flatcg_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
__u64 acc_stats[FCG_NR_STATS];
__u64 stats[FCG_NR_STATS];
float cpu_util;
@ -219,7 +213,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_flatcg__destroy(skel);
return 0;
}

View File

@ -59,7 +59,7 @@ u64 stats_primary_mask, stats_reserved_mask, stats_other_mask, stats_idle_mask;
static s32 nr_reserved;
static u64 vtime_now;
struct user_exit_info uei;
UEI_DEFINE(uei);
extern unsigned long CONFIG_HZ __kconfig;
@ -236,15 +236,6 @@ s32 BPF_STRUCT_OPS(nest_select_cpu, struct task_struct *p, s32 prev_cpu,
struct pcpu_ctx *pcpu_ctx;
bool direct_to_primary = false, reset_impatient = true;
/*
* Don't bother trying to find an idle core if a task is doing an
* exec(). We would have already tried to find a core on fork(), and if
* we were successful in doing so, the task will already be running on
* what was previously an idle core.
*/
if (wake_flags & SCX_WAKE_EXEC)
return prev_cpu;
tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
if (!tctx)
return -ENOENT;
@ -591,7 +582,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(nest_init)
struct bpf_timer *timer;
u32 key = 0;
scx_bpf_switch_all();
__COMPAT_scx_bpf_switch_all();
err = scx_bpf_create_dsq(FALLBACK_DSQ_ID, NUMA_NO_NODE);
if (err) {
@ -652,20 +643,19 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(nest_init)
void BPF_STRUCT_OPS(nest_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops nest_ops = {
.select_cpu = (void *)nest_select_cpu,
.enqueue = (void *)nest_enqueue,
.dispatch = (void *)nest_dispatch,
.running = (void *)nest_running,
.stopping = (void *)nest_stopping,
.init_task = (void *)nest_init_task,
.enable = (void *)nest_enable,
.init = (void *)nest_init,
.exit = (void *)nest_exit,
.flags = 0,
.name = "nest",
};
SCX_OPS_DEFINE(nest_ops,
.select_cpu = (void *)nest_select_cpu,
.enqueue = (void *)nest_enqueue,
.dispatch = (void *)nest_dispatch,
.running = (void *)nest_running,
.stopping = (void *)nest_stopping,
.init_task = (void *)nest_init_task,
.enable = (void *)nest_enable,
.init = (void *)nest_init,
.exit = (void *)nest_exit,
.flags = 0,
.name = "nest");

View File

@ -187,12 +187,10 @@ int main(int argc, char **argv)
}
}
SCX_BUG_ON(scx_nest__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, nest_ops, scx_nest, uei);
link = SCX_OPS_ATTACH(skel, nest_ops);
link = bpf_map__attach_struct_ops(skel->maps.nest_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
u64 stats[NEST_STAT(NR)];
enum nest_stat_idx i;
enum nest_stat_group last_grp = -1;
@ -218,7 +216,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_nest__destroy(skel);
return 0;
}

View File

@ -120,8 +120,6 @@
char _license[] SEC("license") = "GPL";
const volatile bool switch_partial;
/* !0 for veristat, set during init */
const volatile u32 nr_cpu_ids = 1;
@ -239,7 +237,7 @@ u64 nr_total, nr_dispatched, nr_missing, nr_kicks, nr_preemptions;
u64 nr_exps, nr_exp_waits, nr_exp_empty;
u64 nr_cgrp_next, nr_cgrp_coll, nr_cgrp_empty;
struct user_exit_info uei;
UEI_DEFINE(uei);
static bool time_before(u64 a, u64 b)
{
@ -600,27 +598,17 @@ void BPF_STRUCT_OPS(pair_cgroup_exit, struct cgroup *cgrp)
}
}
s32 BPF_STRUCT_OPS(pair_init)
{
if (!switch_partial)
scx_bpf_switch_all();
return 0;
}
void BPF_STRUCT_OPS(pair_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops pair_ops = {
.enqueue = (void *)pair_enqueue,
.dispatch = (void *)pair_dispatch,
.cpu_acquire = (void *)pair_cpu_acquire,
.cpu_release = (void *)pair_cpu_release,
.cgroup_init = (void *)pair_cgroup_init,
.cgroup_exit = (void *)pair_cgroup_exit,
.init = (void *)pair_init,
.exit = (void *)pair_exit,
.name = "pair",
};
SCX_OPS_DEFINE(pair_ops,
.enqueue = (void *)pair_enqueue,
.dispatch = (void *)pair_dispatch,
.cpu_acquire = (void *)pair_cpu_acquire,
.cpu_release = (void *)pair_cpu_release,
.cgroup_init = (void *)pair_cgroup_init,
.cgroup_exit = (void *)pair_cgroup_exit,
.exit = (void *)pair_exit,
.name = "pair");

View File

@ -20,10 +20,9 @@ const char help_fmt[] =
"\n"
"See the top-level comment in .bpf.c for more details.\n"
"\n"
"Usage: %s [-S STRIDE] [-p]\n"
"Usage: %s [-S STRIDE]\n"
"\n"
" -S STRIDE Override CPU pair stride (default: nr_cpus_ids / 2)\n"
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
" -h Display this help and exit\n";
static volatile int exit_req;
@ -58,9 +57,6 @@ int main(int argc, char **argv)
case 'S':
stride = strtoul(optarg, NULL, 0);
break;
case 'p':
skel->rodata->switch_partial = true;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
return opt != 'h';
@ -103,7 +99,7 @@ int main(int argc, char **argv)
}
printf("\n");
SCX_BUG_ON(scx_pair__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, pair_ops, scx_pair, uei);
/*
* Populate the cgrp_q_arr map which is an array containing per-cgroup
@ -138,10 +134,9 @@ int main(int argc, char **argv)
/*
* Fully initialized, attach and run.
*/
link = bpf_map__attach_struct_ops(skel->maps.pair_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
link = SCX_OPS_ATTACH(skel, pair_ops);
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
printf("[SEQ %llu]\n", seq++);
printf(" total:%10" PRIu64 " dispatch:%10" PRIu64 " missing:%10" PRIu64 "\n",
skel->bss->nr_total,
@ -163,7 +158,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_pair__destroy(skel);
return 0;
}

View File

@ -27,15 +27,15 @@
char _license[] SEC("license") = "GPL";
const volatile u64 slice_ns = SCX_SLICE_DFL;
const volatile bool switch_partial;
const volatile u32 stall_user_nth;
const volatile u32 stall_kernel_nth;
const volatile u32 dsp_inf_loop_after;
const volatile s32 disallow_tgid;
const volatile bool switch_partial;
u32 test_error_cnt;
struct user_exit_info uei;
UEI_DEFINE(uei);
struct qmap {
__uint(type, BPF_MAP_TYPE_QUEUE);
@ -192,7 +192,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, 0, enq_flags);
cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
if (cpu >= 0)
__COMPAT_scx_bpf_kick_cpu_IDLE(cpu);
scx_bpf_kick_cpu(cpu, __COMPAT_SCX_KICK_IDLE);
return;
}
@ -374,27 +374,25 @@ s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p,
s32 BPF_STRUCT_OPS(qmap_init)
{
if (!switch_partial)
scx_bpf_switch_all();
__COMPAT_scx_bpf_switch_all();
return 0;
}
void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops qmap_ops = {
.select_cpu = (void *)qmap_select_cpu,
.enqueue = (void *)qmap_enqueue,
.dequeue = (void *)qmap_dequeue,
.dispatch = (void *)qmap_dispatch,
.core_sched_before = (void *)qmap_core_sched_before,
.cpu_release = (void *)qmap_cpu_release,
.init_task = (void *)qmap_init_task,
.init = (void *)qmap_init,
.exit = (void *)qmap_exit,
.flags = SCX_OPS_ENQ_LAST,
.timeout_ms = 5000U,
.name = "qmap",
};
SCX_OPS_DEFINE(qmap_ops,
.select_cpu = (void *)qmap_select_cpu,
.enqueue = (void *)qmap_enqueue,
.dequeue = (void *)qmap_dequeue,
.dispatch = (void *)qmap_dispatch,
.core_sched_before = (void *)qmap_core_sched_before,
.cpu_release = (void *)qmap_cpu_release,
.init_task = (void *)qmap_init_task,
.init = (void *)qmap_init,
.exit = (void *)qmap_exit,
.flags = SCX_OPS_ENQ_LAST,
.timeout_ms = 5000U,
.name = "qmap");

View File

@ -19,7 +19,8 @@ const char help_fmt[] =
"\n"
"See the top-level comment in .bpf.c for more details.\n"
"\n"
"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-d PID] [-p]\n"
"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-d PID]\n"
" [-D LEN] [-p]\n"
"\n"
" -s SLICE_US Override slice duration\n"
" -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n"
@ -27,6 +28,7 @@ const char help_fmt[] =
" -T COUNT Stall every COUNT'th kernel thread\n"
" -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n"
" -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n"
" -D LEN Set scx_exit_info.dump buffer length\n"
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
" -h Display this help and exit\n";
@ -51,7 +53,7 @@ int main(int argc, char **argv)
skel = scx_qmap__open();
SCX_BUG_ON(!skel, "Failed to open skel");
while ((opt = getopt(argc, argv, "s:e:t:T:l:d:ph")) != -1) {
while ((opt = getopt(argc, argv, "s:e:t:T:l:d:D:ph")) != -1) {
switch (opt) {
case 's':
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
@ -73,8 +75,12 @@ int main(int argc, char **argv)
if (skel->rodata->disallow_tgid < 0)
skel->rodata->disallow_tgid = getpid();
break;
case 'D':
skel->struct_ops.qmap_ops->exit_dump_len = strtoul(optarg, NULL, 0);
break;
case 'p':
skel->rodata->switch_partial = true;
skel->struct_ops.qmap_ops->flags |= __COMPAT_SCX_OPS_SWITCH_PARTIAL;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
@ -82,12 +88,10 @@ int main(int argc, char **argv)
}
}
SCX_BUG_ON(scx_qmap__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, qmap_ops, scx_qmap, uei);
link = SCX_OPS_ATTACH(skel, qmap_ops);
link = bpf_map__attach_struct_ops(skel->maps.qmap_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
long nr_enqueued = skel->bss->nr_enqueued;
long nr_dispatched = skel->bss->nr_dispatched;
@ -100,7 +104,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_qmap__destroy(skel);
return 0;
}

View File

@ -25,10 +25,9 @@
char _license[] SEC("license") = "GPL";
const volatile bool fifo_sched;
const volatile bool switch_partial;
static u64 vtime_now;
struct user_exit_info uei;
UEI_DEFINE(uei);
#define SHARED_DSQ 0
@ -130,26 +129,21 @@ void BPF_STRUCT_OPS(simple_enable, struct task_struct *p)
s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)
{
if (!switch_partial)
scx_bpf_switch_all();
return scx_bpf_create_dsq(SHARED_DSQ, -1);
}
void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops simple_ops = {
.select_cpu = (void *)simple_select_cpu,
.enqueue = (void *)simple_enqueue,
.dispatch = (void *)simple_dispatch,
.running = (void *)simple_running,
.stopping = (void *)simple_stopping,
.enable = (void *)simple_enable,
.init = (void *)simple_init,
.exit = (void *)simple_exit,
.name = "simple",
};
SCX_OPS_DEFINE(simple_ops,
.select_cpu = (void *)simple_select_cpu,
.enqueue = (void *)simple_enqueue,
.dispatch = (void *)simple_dispatch,
.running = (void *)simple_running,
.stopping = (void *)simple_stopping,
.enable = (void *)simple_enable,
.init = (void *)simple_init,
.exit = (void *)simple_exit,
.name = "simple");

View File

@ -17,10 +17,9 @@ const char help_fmt[] =
"\n"
"See the top-level comment in .bpf.c for more details.\n"
"\n"
"Usage: %s [-f] [-p]\n"
"Usage: %s [-f]\n"
"\n"
" -f Use FIFO scheduling instead of weighted vtime scheduling\n"
" -p Switch only tasks on SCHED_EXT policy intead of all\n"
" -h Display this help and exit\n";
static volatile int exit_req;
@ -64,26 +63,21 @@ int main(int argc, char **argv)
skel = scx_simple__open();
SCX_BUG_ON(!skel, "Failed to open skel");
while ((opt = getopt(argc, argv, "fph")) != -1) {
while ((opt = getopt(argc, argv, "fh")) != -1) {
switch (opt) {
case 'f':
skel->rodata->fifo_sched = true;
break;
case 'p':
skel->rodata->switch_partial = true;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
return opt != 'h';
}
}
SCX_BUG_ON(scx_simple__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, simple_ops, scx_simple, uei);
link = SCX_OPS_ATTACH(skel, simple_ops);
link = bpf_map__attach_struct_ops(skel->maps.simple_ops);
SCX_BUG_ON(!link, "Failed to attach struct_ops");
while (!exit_req && !uei_exited(&skel->bss->uei)) {
while (!exit_req && !UEI_EXITED(skel, uei)) {
__u64 stats[2];
read_stats(skel, stats);
@ -93,7 +87,7 @@ int main(int argc, char **argv)
}
bpf_link__destroy(link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_simple__destroy(skel);
return 0;
}

View File

@ -30,7 +30,6 @@
char _license[] SEC("license") = "GPL";
const volatile bool switch_partial;
const volatile s32 usersched_pid;
/* !0 for veristat, set during init */
@ -56,7 +55,7 @@ volatile u64 nr_queued;
*/
volatile u64 nr_scheduled;
struct user_exit_info uei;
UEI_DEFINE(uei);
/*
* The map containing tasks that are enqueued in user space from the kernel.
@ -324,25 +323,22 @@ s32 BPF_STRUCT_OPS(userland_init)
return -EINVAL;
}
if (!switch_partial)
scx_bpf_switch_all();
return 0;
}
void BPF_STRUCT_OPS(userland_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops userland_ops = {
.select_cpu = (void *)userland_select_cpu,
.enqueue = (void *)userland_enqueue,
.dispatch = (void *)userland_dispatch,
.update_idle = (void *)userland_update_idle,
.init_task = (void *)userland_init_task,
.init = (void *)userland_init,
.exit = (void *)userland_exit,
.flags = SCX_OPS_ENQ_LAST | SCX_OPS_KEEP_BUILTIN_IDLE,
.name = "userland",
};
SCX_OPS_DEFINE(userland_ops,
.select_cpu = (void *)userland_select_cpu,
.enqueue = (void *)userland_enqueue,
.dispatch = (void *)userland_dispatch,
.update_idle = (void *)userland_update_idle,
.init_task = (void *)userland_init_task,
.init = (void *)userland_init,
.exit = (void *)userland_exit,
.flags = SCX_OPS_ENQ_LAST |
SCX_OPS_KEEP_BUILTIN_IDLE,
.name = "userland");

View File

@ -38,10 +38,9 @@ const char help_fmt[] =
"\n"
"Try to reduce `sysctl kernel.pid_max` if this program triggers OOMs.\n"
"\n"
"Usage: %s [-b BATCH] [-p]\n"
"Usage: %s [-b BATCH]\n"
"\n"
" -b BATCH The number of tasks to batch when dispatching (default: 8)\n"
" -p Don't switch all, switch only tasks on SCHED_EXT policy\n"
" -h Display this help and exit\n";
/* Defined in UAPI */
@ -345,7 +344,6 @@ static void bootstrap(int argc, char **argv)
struct sched_param sched_param = {
.sched_priority = sched_get_priority_max(SCHED_EXT),
};
bool switch_partial = false;
err = init_tasks();
if (err)
@ -370,9 +368,6 @@ static void bootstrap(int argc, char **argv)
case 'b':
batch_size = strtoul(optarg, NULL, 0);
break;
case 'p':
switch_partial = true;
break;
default:
fprintf(stderr, help_fmt, basename(argv[0]));
exit(opt != 'h');
@ -394,9 +389,8 @@ static void bootstrap(int argc, char **argv)
assert(skel->rodata->num_possible_cpus > 0);
skel->rodata->usersched_pid = getpid();
assert(skel->rodata->usersched_pid > 0);
skel->rodata->switch_partial = switch_partial;
SCX_BUG_ON(scx_userland__load(skel), "Failed to load skel");
SCX_OPS_LOAD(skel, userland_ops, scx_userland, uei);
enqueued_fd = bpf_map__fd(skel->maps.enqueued);
dispatched_fd = bpf_map__fd(skel->maps.dispatched);
@ -406,8 +400,7 @@ static void bootstrap(int argc, char **argv)
SCX_BUG_ON(spawn_stats_thread(), "Failed to spawn stats thread");
print_example_warning(basename(argv[0]));
ops_link = bpf_map__attach_struct_ops(skel->maps.userland_ops);
SCX_BUG_ON(!ops_link, "Failed to attach struct_ops");
ops_link = SCX_OPS_ATTACH(skel, userland_ops);
}
static void sched_main_loop(void)
@ -440,7 +433,7 @@ int main(int argc, char **argv)
exit_req = 1;
bpf_link__destroy(ops_link);
uei_print(&skel->bss->uei);
UEI_REPORT(skel, uei);
scx_userland__destroy(skel);
return 0;
}

View File

@ -29,31 +29,55 @@ static inline void ___vmlinux_h_sanity_check___(void)
}
void scx_bpf_error_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym;
void scx_bpf_exit_bstr(s64 exit_code, char *fmt,
unsigned long long *data, u32 data__sz) __ksym;
static inline __attribute__((format(printf, 1, 2)))
void ___scx_bpf_error_format_checker(const char *fmt, ...) {}
void ___scx_bpf_exit_format_checker(const char *fmt, ...) {}
/*
* Helper macro for initializing the fmt and variadic argument inputs to both
* bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
* refer to the initialized list of inputs to the bstr kfunc.
*/
#define scx_bpf_exit_preamble(fmt, args...) \
static char ___fmt[] = fmt; \
/* \
* Note that __param[] must have at least one \
* element to keep the verifier happy. \
*/ \
unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \
\
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
___bpf_fill(___param, args); \
_Pragma("GCC diagnostic pop") \
/*
* scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments
* instead of an array of u64. Using this macro will cause the scheduler to
* exit cleanly with the specified exit code being passed to user space.
*/
#define scx_bpf_exit(code, fmt, args...) \
({ \
scx_bpf_exit_preamble(fmt, args) \
scx_bpf_exit_bstr(code, ___fmt, ___param, sizeof(___param)); \
___scx_bpf_exit_format_checker(fmt, ##args); \
})
/*
* scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
* instead of an array of u64. Note that __param[] must have at least one
* element to keep the verifier happy.
* instead of an array of u64. Invoking this macro will cause the scheduler to
* exit in an erroneous state, with diagnostic information being passed to the
* user.
*/
#define scx_bpf_error(fmt, args...) \
({ \
static char ___fmt[] = fmt; \
unsigned long long ___param[___bpf_narg(args) ?: 1] = {}; \
\
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
___bpf_fill(___param, args); \
_Pragma("GCC diagnostic pop") \
\
scx_bpf_exit_preamble(fmt, args) \
scx_bpf_error_bstr(___fmt, ___param, sizeof(___param)); \
\
___scx_bpf_error_format_checker(fmt, ##args); \
___scx_bpf_exit_format_checker(fmt, ##args); \
})
void scx_bpf_switch_all(void) __ksym;
s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
bool scx_bpf_consume(u64 dsq_id) __ksym;
void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym;

View File

@ -15,8 +15,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "user_exit_info.h"
#include <errno.h>
typedef uint8_t u8;
typedef uint16_t u16;
@ -66,4 +65,7 @@ typedef int64_t s64;
bpf_map__initial_value(skel->maps.elfsec##_##arr, &__sz); \
} while (0)
#include "user_exit_info.h"
#include "compat.h"
#endif /* __SCHED_EXT_COMMON_H */

View File

@ -7,12 +7,41 @@
#ifndef __SCX_COMPAT_BPF_H
#define __SCX_COMPAT_BPF_H
static inline void __COMPAT_scx_bpf_kick_cpu_IDLE(s32 cpu)
#define __COMPAT_ENUM_OR_ZERO(__type, __ent) \
({ \
__type __ret = 0; \
if (bpf_core_enum_value_exists(__type, __ent)) \
__ret = __ent; \
__ret; \
})
/*
* %SCX_KICK_IDLE is a later addition. To support both before and after, use
* %__COMPAT_SCX_KICK_IDLE which becomes 0 on kernels which don't support it.
*/
#define __COMPAT_SCX_KICK_IDLE \
__COMPAT_ENUM_OR_ZERO(enum scx_kick_flags, SCX_KICK_IDLE)
/*
* scx_switch_all() was replaced by %SCX_OPS_SWITCH_PARTIAL. See
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL in compat.h.
*/
void scx_bpf_switch_all(void) __ksym __weak;
static inline void __COMPAT_scx_bpf_switch_all(void)
{
if (bpf_core_enum_value_exists(enum scx_kick_flags, SCX_KICK_IDLE))
scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
else
scx_bpf_kick_cpu(cpu, 0);
if (!bpf_core_enum_value_exists(enum scx_ops_flags, SCX_OPS_SWITCH_PARTIAL))
scx_bpf_switch_all();
}
#endif
/*
* Define sched_ext_ops. This may be expanded to define multiple variants for
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
*/
#define SCX_OPS_DEFINE(__name, ...) \
SEC(".struct_ops.link") \
struct sched_ext_ops __name = { \
__VA_ARGS__, \
};
#endif /* __SCX_COMPAT_BPF_H */

136
scheds/include/scx/compat.h Normal file
View File

@ -0,0 +1,136 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2024 Tejun Heo <tj@kernel.org>
* Copyright (c) 2024 David Vernet <dvernet@meta.com>
*/
#ifndef __SCX_COMPAT_H
#define __SCX_COMPAT_H
#include <bpf/btf.h>
struct btf *__COMPAT_vmlinux_btf __attribute__((weak));
static inline void __COMPAT_load_vmlinux_btf(void)
{
if (!__COMPAT_vmlinux_btf) {
__COMPAT_vmlinux_btf = btf__load_vmlinux_btf();
SCX_BUG_ON(!__COMPAT_vmlinux_btf, "btf__load_vmlinux_btf()");
}
}
static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v)
{
const struct btf_type *t;
const char *n;
s32 tid;
int i;
__COMPAT_load_vmlinux_btf();
tid = btf__find_by_name(__COMPAT_vmlinux_btf, type);
if (tid < 0)
return false;
t = btf__type_by_id(__COMPAT_vmlinux_btf, tid);
SCX_BUG_ON(!t, "btf__type_by_id(%d)", tid);
if (btf_is_enum(t)) {
struct btf_enum *e = btf_enum(t);
for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
n = btf__name_by_offset(__COMPAT_vmlinux_btf, e[i].name_off);
SCX_BUG_ON(!n, "btf__name_by_offset()");
if (!strcmp(n, name)) {
*v = e[i].val;
return true;
}
}
} else if (btf_is_enum64(t)) {
struct btf_enum64 *e = btf_enum64(t);
for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
n = btf__name_by_offset(__COMPAT_vmlinux_btf, e[i].name_off);
SCX_BUG_ON(!n, "btf__name_by_offset()");
if (!strcmp(n, name)) {
*v = btf_enum64_value(&e[i]);
return true;
}
}
}
return false;
}
#define __COMPAT_ENUM_OR_ZERO(__type, __ent) \
({ \
u64 __val = 0; \
__COMPAT_read_enum(__type, __ent, &__val); \
__val; \
})
static inline bool __COMPAT_struct_has_field(const char *type, const char *field)
{
const struct btf_type *t;
const struct btf_member *m;
const char *n;
s32 tid;
int i;
__COMPAT_load_vmlinux_btf();
tid = btf__find_by_name_kind(__COMPAT_vmlinux_btf, type, BTF_KIND_STRUCT);
if (tid < 0)
return false;
t = btf__type_by_id(__COMPAT_vmlinux_btf, tid);
SCX_BUG_ON(!t, "btf__type_by_id(%d)", tid);
m = btf_members(t);
for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
n = btf__name_by_offset(__COMPAT_vmlinux_btf, m[i].name_off);
SCX_BUG_ON(!n, "btf__name_by_offset()");
if (!strcmp(n, field))
return true;
}
return false;
}
/*
* An ops flag, %SCX_OPS_SWITCH_PARTIAL, replaced scx_bpf_switch_all() which had
* to be called from ops.init(). To support both before and after, use both
* %__COMPAT_SCX_OPS_SWITCH_PARTIAL and %__COMPAT_scx_bpf_switch_all() defined
* in compat.bpf.h.
*/
#define __COMPAT_SCX_OPS_SWITCH_PARTIAL \
__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
/*
* struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE()
* is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load
* and attach it, backward compatibility is automatically maintained where
* reasonable.
*
* - sched_ext_ops.exit_dump_len was added later. On kernels which don't support
* it, the value is ignored and a warning is triggered if the value is
* requested to be non-zero.
*/
#define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({ \
UEI_SET_SIZE(__skel, __ops_name, __uei_name); \
if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \
(__skel)->struct_ops.__ops_name->exit_dump_len) { \
fprintf(stderr, "WARNING: kernel doesn't support setting exit dump len\n"); \
(__skel)->struct_ops.__ops_name->exit_dump_len = 0; \
} \
SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel"); \
})
#define SCX_OPS_ATTACH(__skel, __ops_name) ({ \
struct bpf_link *__link; \
__link = bpf_map__attach_struct_ops((__skel)->maps.__ops_name); \
SCX_BUG_ON(!__link, "Failed to attach struct_ops"); \
__link; \
})
#endif /* __SCX_COMPAT_H */

View File

@ -11,16 +11,16 @@
#define __USER_EXIT_INFO_H
enum uei_sizes {
UEI_REASON_SIZE = 128,
UEI_MSG_SIZE = 1024,
UEI_DUMP_SIZE = 32768,
UEI_REASON_LEN = 128,
UEI_MSG_LEN = 1024,
UEI_DUMP_DFL_LEN = 32768,
};
struct user_exit_info {
int kind;
char reason[UEI_REASON_SIZE];
char msg[UEI_MSG_SIZE];
char dump[UEI_DUMP_SIZE];
s64 exit_code;
char reason[UEI_REASON_LEN];
char msg[UEI_MSG_LEN];
};
#ifdef __bpf__
@ -28,40 +28,56 @@ struct user_exit_info {
#include "vmlinux.h"
#include <bpf/bpf_core_read.h>
static inline void uei_record(struct user_exit_info *uei,
const struct scx_exit_info *ei)
{
bpf_probe_read_kernel_str(uei->reason, sizeof(uei->reason), ei->reason);
bpf_probe_read_kernel_str(uei->msg, sizeof(uei->msg), ei->msg);
bpf_probe_read_kernel_str(uei->dump, sizeof(uei->dump), ei->dump);
/* use __sync to force memory barrier */
__sync_val_compare_and_swap(&uei->kind, uei->kind, ei->kind);
}
#define UEI_DEFINE(__name) \
char RESIZABLE_ARRAY(data, __name##_dump); \
const volatile u32 __name##_dump_len; \
struct user_exit_info __name SEC(".data")
#define UEI_RECORD(__uei_name, __ei) ({ \
bpf_probe_read_kernel_str(__uei_name.reason, \
sizeof(__uei_name.reason), (__ei)->reason); \
bpf_probe_read_kernel_str(__uei_name.msg, \
sizeof(__uei_name.msg), (__ei)->msg); \
bpf_probe_read_kernel_str(__uei_name##_dump, \
__uei_name##_dump_len, (__ei)->dump); \
if (bpf_core_field_exists((__ei)->exit_code)) \
__uei_name.exit_code = (__ei)->exit_code; \
/* use __sync to force memory barrier */ \
__sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
(__ei)->kind); \
})
#else /* !__bpf__ */
#include <stdio.h>
#include <stdbool.h>
static inline bool uei_exited(struct user_exit_info *uei)
{
/* use __sync to force memory barrier */
return __sync_val_compare_and_swap(&uei->kind, -1, -1);
}
/* no need to call the following explicitly if SCX_OPS_LOAD() is used */
#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({ \
u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \
(__skel)->rodata->__uei_name##_dump_len = __len; \
RESIZE_ARRAY(data, __uei_name##_dump, __len); \
})
static inline void uei_print(const struct user_exit_info *uei)
{
if (uei->dump[0] != '\0') {
fputs("\nDEBUG DUMP\n", stderr);
fputs("================================================================================\n\n", stderr);
fputs(uei->dump, stderr);
fputs("\n================================================================================\n\n", stderr);
}
fprintf(stderr, "EXIT: %s", uei->reason);
if (uei->msg[0] != '\0')
fprintf(stderr, " (%s)", uei->msg);
fputs("\n", stderr);
}
#define UEI_EXITED(__skel, __uei_name) ({ \
/* use __sync to force memory barrier */ \
__sync_val_compare_and_swap(&(__skel)->data->__uei_name.kind, -1, -1); \
})
#define UEI_REPORT(__skel, __uei_name) ({ \
struct user_exit_info *__uei = &(__skel)->data->__uei_name; \
char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \
if (__uei_dump[0] != '\0') { \
fputs("\nDEBUG DUMP\n", stderr); \
fputs("================================================================================\n\n", stderr); \
fputs(__uei_dump, stderr); \
fputs("\n================================================================================\n\n", stderr); \
} \
fprintf(stderr, "EXIT: %s", __uei->reason); \
if (__uei->msg[0] != '\0') \
fprintf(stderr, " (%s)", __uei->msg); \
fputs("\n", stderr); \
})
#endif /* __bpf__ */
#endif /* __USER_EXIT_INFO_H */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1 +1 @@
vmlinux-v6.7-g6851d5f5be95.h
vmlinux-v6.9-g5dc95302301f.h

View File

@ -1,6 +1,6 @@
[package]
name = "scx_lavd"
version = "0.1.0"
version = "0.1.1"
authors = ["Changwoo Min <changwoo@igalia.com>", "Igalia"]
edition = "2021"
description = "A Latency-criticality Aware Virtual Deadline (LAVD) scheduler based on sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -13,11 +13,11 @@ clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
ctrlc = { version = "3.1", features = ["termination"] }
fb_procfs = "0.7.0"
hex = "0.4.3"
libbpf-rs = "0.22.0"
libbpf-rs = "0.23"
libc = "0.2.137"
log = "0.4.17"
ordered-float = "3.4.0"
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
simplelog = "0.12.0"
static_assertions = "1.1.0"
rlimit = "0.10.1"
@ -25,7 +25,7 @@ plain = "0.2.3"
nix = "0.28.0"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
[features]
enable_backtrace = []

View File

@ -123,10 +123,10 @@ volatile u64 nr_cpus_onln;
static struct sys_cpu_util __sys_cpu_util[2];
static volatile int __sys_cpu_util_idx;
struct user_exit_info uei;
const volatile u8 verbose;
UEI_DEFINE(uei);
#define debugln(fmt, ...) \
({ \
if (verbose > 0) \
@ -1778,33 +1778,30 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(lavd_init)
/*
* Switch all tasks to scx tasks.
*/
scx_bpf_switch_all();
__COMPAT_scx_bpf_switch_all();
return err;
}
void BPF_STRUCT_OPS(lavd_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops lavd_ops = {
.select_cpu = (void *)lavd_select_cpu,
.enqueue = (void *)lavd_enqueue,
.dispatch = (void *)lavd_dispatch,
.runnable = (void *)lavd_runnable,
.running = (void *)lavd_running,
.stopping = (void *)lavd_stopping,
.quiescent = (void *)lavd_quiescent,
.cpu_online = (void *)lavd_cpu_online,
.cpu_offline = (void *)lavd_cpu_offline,
.update_idle = (void *)lavd_update_idle,
.init_task = (void *)lavd_init_task,
.init = (void *)lavd_init,
.exit = (void *)lavd_exit,
.flags = SCX_OPS_KEEP_BUILTIN_IDLE,
.timeout_ms = 5000U,
.name = "lavd",
};
SCX_OPS_DEFINE(lavd_ops,
.select_cpu = (void *)lavd_select_cpu,
.enqueue = (void *)lavd_enqueue,
.dispatch = (void *)lavd_dispatch,
.runnable = (void *)lavd_runnable,
.running = (void *)lavd_running,
.stopping = (void *)lavd_stopping,
.quiescent = (void *)lavd_quiescent,
.cpu_online = (void *)lavd_cpu_online,
.cpu_offline = (void *)lavd_cpu_offline,
.update_idle = (void *)lavd_update_idle,
.init_task = (void *)lavd_init_task,
.init = (void *)lavd_init,
.exit = (void *)lavd_exit,
.flags = SCX_OPS_KEEP_BUILTIN_IDLE,
.timeout_ms = 5000U,
.name = "lavd");

View File

@ -23,9 +23,10 @@ use anyhow::Context;
use anyhow::Result;
use clap::Parser;
use libbpf_rs::skel::OpenSkel as _;
use libbpf_rs::skel::Skel as _;
use libbpf_rs::skel::SkelBuilder as _;
use log::info;
use scx_utils::scx_ops_attach;
use scx_utils::scx_ops_load;
use scx_utils::uei_exited;
use scx_utils::uei_report;
use scx_utils::Topology;
@ -51,6 +52,10 @@ struct Opts {
#[clap(short = 'p', long, default_value = "0")]
pid_traced: u64,
/// Exit debug dump buffer length. 0 indicates default.
#[clap(long, default_value = "0")]
exit_dump_len: u32,
/// Enable verbose output including libbpf details. Specify multiple
/// times to increase verbosity.
#[clap(short = 'v', long, action = clap::ArgAction::Count)]
@ -111,18 +116,13 @@ impl<'a> Scheduler<'a> {
let topo = Topology::new().expect("Failed to build host topology");
let nr_cpus_onln = topo.span().weight() as u64;
skel.bss_mut().nr_cpus_onln = nr_cpus_onln;
skel.struct_ops.lavd_ops_mut().exit_dump_len = opts.exit_dump_len;
skel.rodata_mut().verbose = opts.verbose;
let intrspc = introspec::init(opts);
// Attach.
let mut skel = skel.load().context("Failed to load BPF program")?;
skel.attach().context("Failed to attach BPF program")?;
let struct_ops = Some(
skel.maps_mut()
.lavd_ops()
.attach_struct_ops()
.context("Failed to attach scx_lavd struct ops")?,
);
let mut skel = scx_ops_load!(skel, lavd_ops, uei)?;
let struct_ops = Some(scx_ops_attach!(skel, lavd_ops)?);
// Build a ring buffer for instrumentation
let mut maps = skel.maps_mut();
@ -262,7 +262,7 @@ impl<'a> Scheduler<'a> {
}
fn running(&mut self) -> bool {
RUNNING.load(Ordering::Relaxed) && !uei_exited!(&self.skel.bss().uei)
RUNNING.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei)
}
fn run(&mut self) -> Result<()> {
@ -275,7 +275,7 @@ impl<'a> Scheduler<'a> {
self.rb_mgr.consume().unwrap();
self.struct_ops.take();
uei_report!(&self.skel.bss().uei)
uei_report!(&self.skel, uei)
}
}

View File

@ -1,6 +1,6 @@
[package]
name = "scx_layered"
version = "0.0.6"
version = "0.0.7"
authors = ["Tejun Heo <htejun@meta.com>", "Meta"]
edition = "2021"
description = "A highly configurable multi-layer BPF / user space hybrid scheduler used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -13,17 +13,17 @@ clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
ctrlc = { version = "3.1", features = ["termination"] }
fb_procfs = "0.7"
lazy_static = "1.4"
libbpf-rs = "0.22"
libbpf-rs = "0.23"
libc = "0.2"
log = "0.4"
prometheus-client = "0.19"
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
simplelog = "0.12"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
[features]
enable_backtrace = []

View File

@ -30,7 +30,7 @@ static u32 preempt_cursor;
#include "util.bpf.c"
struct user_exit_info uei;
UEI_DEFINE(uei);
static inline bool vtime_before(u64 a, u64 b)
{
@ -894,7 +894,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
struct bpf_cpumask *cpumask;
int i, j, k, nr_online_cpus, ret;
scx_bpf_switch_all();
__COMPAT_scx_bpf_switch_all();
cpumask = bpf_cpumask_create();
if (!cpumask)
@ -1020,23 +1020,21 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(layered_init)
void BPF_STRUCT_OPS(layered_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops layered = {
.select_cpu = (void *)layered_select_cpu,
.enqueue = (void *)layered_enqueue,
.dispatch = (void *)layered_dispatch,
.runnable = (void *)layered_runnable,
.running = (void *)layered_running,
.stopping = (void *)layered_stopping,
.quiescent = (void *)layered_quiescent,
.set_weight = (void *)layered_set_weight,
.set_cpumask = (void *)layered_set_cpumask,
.init_task = (void *)layered_init_task,
.exit_task = (void *)layered_exit_task,
.init = (void *)layered_init,
.exit = (void *)layered_exit,
.name = "layered",
};
SCX_OPS_DEFINE(layered,
.select_cpu = (void *)layered_select_cpu,
.enqueue = (void *)layered_enqueue,
.dispatch = (void *)layered_dispatch,
.runnable = (void *)layered_runnable,
.running = (void *)layered_running,
.stopping = (void *)layered_stopping,
.quiescent = (void *)layered_quiescent,
.set_weight = (void *)layered_set_weight,
.set_cpumask = (void *)layered_set_cpumask,
.init_task = (void *)layered_init_task,
.exit_task = (void *)layered_exit_task,
.init = (void *)layered_init,
.exit = (void *)layered_exit,
.name = "layered");

View File

@ -29,7 +29,6 @@ use anyhow::Result;
use bitvec::prelude::*;
use clap::Parser;
use libbpf_rs::skel::OpenSkel as _;
use libbpf_rs::skel::Skel as _;
use libbpf_rs::skel::SkelBuilder as _;
use log::debug;
use log::info;
@ -41,6 +40,8 @@ use prometheus_client::metrics::gauge::Gauge;
use prometheus_client::registry::Registry;
use scx_utils::init_libbpf_logging;
use scx_utils::ravg::ravg_read;
use scx_utils::scx_ops_attach;
use scx_utils::scx_ops_load;
use scx_utils::uei_exited;
use scx_utils::uei_report;
use serde::Deserialize;
@ -271,6 +272,10 @@ struct Opts {
#[clap(short = 'n', long)]
no_load_frac_limit: bool,
/// Exit debug dump buffer length. 0 indicates default.
#[clap(long, default_value = "0")]
exit_dump_len: u32,
/// Enable verbose output including libbpf details. Specify multiple
/// times to increase verbosity.
#[clap(short = 'v', long, action = clap::ArgAction::Count)]
@ -303,28 +308,28 @@ enum LayerMatch {
enum LayerKind {
Confined {
util_range: (f64, f64),
#[serde(default)]
#[serde(default)]
cpus_range: Option<(usize, usize)>,
#[serde(default)]
#[serde(default)]
min_exec_us: u64,
},
Grouped {
util_range: (f64, f64),
#[serde(default)]
#[serde(default)]
cpus_range: Option<(usize, usize)>,
#[serde(default)]
#[serde(default)]
min_exec_us: u64,
#[serde(default)]
#[serde(default)]
preempt: bool,
#[serde(default)]
#[serde(default)]
exclusive: bool,
},
Open {
#[serde(default)]
#[serde(default)]
min_exec_us: u64,
#[serde(default)]
#[serde(default)]
preempt: bool,
#[serde(default)]
#[serde(default)]
exclusive: bool,
},
}
@ -1282,10 +1287,10 @@ impl<'a> Scheduler<'a> {
exclusive,
..
} => {
layer.open = true;
layer.open.write(true);
layer.min_exec_ns = min_exec_us * 1000;
layer.preempt = *preempt;
layer.exclusive = *exclusive;
layer.preempt.write(*preempt);
layer.exclusive.write(*exclusive);
}
}
}
@ -1304,6 +1309,8 @@ impl<'a> Scheduler<'a> {
let mut skel = skel_builder.open().context("Failed to open BPF program")?;
// Initialize skel according to @opts.
skel.struct_ops.layered_mut().exit_dump_len = opts.exit_dump_len;
skel.rodata_mut().debug = opts.verbose as u32;
skel.rodata_mut().slice_ns = opts.slice_us * 1000;
skel.rodata_mut().nr_possible_cpus = *NR_POSSIBLE_CPUS as u32;
@ -1316,7 +1323,8 @@ impl<'a> Scheduler<'a> {
}
Self::init_layers(&mut skel, &layer_specs)?;
let mut skel = skel.load().context("Failed to load BPF program")?;
let mut skel = scx_ops_load!(skel, layered, uei)?;
let mut layers = vec![];
for spec in layer_specs.iter() {
layers.push(Layer::new(&mut cpu_pool, &spec.name, spec.kind.clone())?);
@ -1357,24 +1365,13 @@ impl<'a> Scheduler<'a> {
// huge problem in the interim until we figure it out.
// Attach.
sched
.skel
.attach()
.context("Failed to attach BPF program")?;
sched.struct_ops = Some(
sched
.skel
.maps_mut()
.layered()
.attach_struct_ops()
.context("Failed to attach layered struct ops")?,
);
sched.struct_ops = Some(scx_ops_attach!(sched.skel, layered)?);
info!("Layered Scheduler Attached");
Ok(sched)
}
fn update_bpf_layer_cpumask(layer: &Layer, bpf_layer: &mut bpf_bss_types::layer) {
fn update_bpf_layer_cpumask(layer: &Layer, bpf_layer: &mut bpf_types::layer) {
for bit in 0..layer.cpus.len() {
if layer.cpus[bit] {
bpf_layer.cpus[bit / 8] |= 1 << (bit % 8);
@ -1709,7 +1706,7 @@ impl<'a> Scheduler<'a> {
let mut next_sched_at = now + self.sched_intv;
let mut next_monitor_at = now + self.monitor_intv;
while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel.bss().uei) {
while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei) {
let now = Instant::now();
if now >= next_sched_at {
@ -1734,7 +1731,7 @@ impl<'a> Scheduler<'a> {
}
self.struct_ops.take();
uei_report!(&self.skel.bss().uei)
uei_report!(&self.skel, uei)
}
}

View File

@ -1,6 +1,6 @@
[package]
name = "scx_rlfifo"
version = "0.0.1"
version = "0.0.2"
authors = ["Andrea Righi <andrea.righi@canonical.com>", "Canonical"]
edition = "2021"
description = "A simple FIFO scheduler in Rust that runs in user-space"
@ -9,13 +9,13 @@ license = "GPL-2.0-only"
[dependencies]
anyhow = "1.0.65"
ctrlc = { version = "3.1", features = ["termination"] }
libbpf-rs = "0.22.0"
libbpf-rs = "0.23"
libc = "0.2.137"
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" }
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" }
[features]

View File

@ -26,7 +26,7 @@ struct Scheduler<'a> {
impl<'a> Scheduler<'a> {
fn init() -> Result<Self> {
let topo = Topology::new().expect("Failed to build host topology");
let bpf = BpfScheduler::init(5000, topo.nr_cpus_possible() as i32, false, false, false)?;
let bpf = BpfScheduler::init(5000, topo.nr_cpus_possible() as i32, false, 0, false, false)?;
Ok(Self { bpf })
}
@ -75,9 +75,12 @@ impl<'a> Scheduler<'a> {
println!(
"user={} kernel={} cancel={} bounce={} fail={} cong={}",
nr_user_dispatches, nr_kernel_dispatches,
nr_cancel_dispatches, nr_bounce_dispatches,
nr_failed_dispatches, nr_sched_congested,
nr_user_dispatches,
nr_kernel_dispatches,
nr_cancel_dispatches,
nr_bounce_dispatches,
nr_failed_dispatches,
nr_sched_congested,
);
}

View File

@ -1,6 +1,6 @@
[package]
name = "scx_rustland"
version = "0.0.3"
version = "0.0.4"
authors = ["Andrea Righi <andrea.righi@canonical.com>", "Canonical"]
edition = "2021"
description = "A BPF component (dispatcher) that implements the low level sched-ext functionalities and a user-space counterpart (scheduler), written in Rust, that implements the actual scheduling policy. This is used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -11,16 +11,16 @@ anyhow = "1.0.65"
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
ctrlc = { version = "3.1", features = ["termination"] }
fb_procfs = "0.7.0"
libbpf-rs = "0.22.0"
libbpf-rs = "0.23"
libc = "0.2.137"
log = "0.4.17"
ordered-float = "3.4.0"
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" }
simplelog = "0.12.0"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" }
[features]

View File

@ -122,6 +122,10 @@ struct Opts {
#[clap(short = 'p', long, action = clap::ArgAction::SetTrue)]
partial: bool,
/// Exit debug dump buffer length. 0 indicates default.
#[clap(long, default_value = "0")]
exit_dump_len: u32,
/// If specified, all the BPF scheduling events will be reported in
/// debugfs (e.g., /sys/kernel/debug/tracing/trace_pipe).
#[clap(short = 'd', long, action = clap::ArgAction::SetTrue)]
@ -269,6 +273,7 @@ impl<'a> Scheduler<'a> {
opts.slice_us,
nr_online_cpus as i32,
opts.partial,
opts.exit_dump_len,
opts.full_user,
opts.debug,
)?;

View File

@ -1,6 +1,6 @@
[package]
name = "scx_rusty"
version = "0.5.4"
version = "0.5.5"
authors = ["Dan Schatzberg <dschatzberg@meta.com>", "Meta"]
edition = "2021"
description = "A multi-domain, BPF / user space hybrid scheduler used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -11,17 +11,17 @@ anyhow = "1.0.65"
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
ctrlc = { version = "3.1", features = ["termination"] }
fb_procfs = "0.7.0"
libbpf-rs = "0.22.0"
libbpf-rs = "0.23"
libc = "0.2.137"
log = "0.4.17"
ordered-float = "3.4.0"
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
simplelog = "0.12.0"
sorted-vec = "0.8.3"
static_assertions = "1.1.0"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
scx_utils = { path = "../../../rust/scx_utils", version = "0.7" }
[features]
enable_backtrace = []

View File

@ -48,7 +48,7 @@
char _license[] SEC("license") = "GPL";
struct user_exit_info uei;
UEI_DEFINE(uei);
/*
* const volatiles are set during initialization and treated as consts by the
@ -1455,7 +1455,7 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(rusty_init)
bpf_cpumask_release(cpumask);
if (!switch_partial)
scx_bpf_switch_all();
__COMPAT_scx_bpf_switch_all();
bpf_for(i, 0, nr_nodes) {
ret = create_node(i);
@ -1482,23 +1482,21 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(rusty_init)
void BPF_STRUCT_OPS(rusty_exit, struct scx_exit_info *ei)
{
uei_record(&uei, ei);
UEI_RECORD(uei, ei);
}
SEC(".struct_ops.link")
struct sched_ext_ops rusty = {
.select_cpu = (void *)rusty_select_cpu,
.enqueue = (void *)rusty_enqueue,
.dispatch = (void *)rusty_dispatch,
.runnable = (void *)rusty_runnable,
.running = (void *)rusty_running,
.stopping = (void *)rusty_stopping,
.quiescent = (void *)rusty_quiescent,
.set_weight = (void *)rusty_set_weight,
.set_cpumask = (void *)rusty_set_cpumask,
.init_task = (void *)rusty_init_task,
.exit_task = (void *)rusty_exit_task,
.init = (void *)rusty_init,
.exit = (void *)rusty_exit,
.name = "rusty",
};
SCX_OPS_DEFINE(rusty,
.select_cpu = (void *)rusty_select_cpu,
.enqueue = (void *)rusty_enqueue,
.dispatch = (void *)rusty_dispatch,
.runnable = (void *)rusty_runnable,
.running = (void *)rusty_running,
.stopping = (void *)rusty_stopping,
.quiescent = (void *)rusty_quiescent,
.set_weight = (void *)rusty_set_weight,
.set_cpumask = (void *)rusty_set_cpumask,
.init_task = (void *)rusty_init_task,
.exit_task = (void *)rusty_exit_task,
.init = (void *)rusty_init,
.exit = (void *)rusty_exit,
.name = "rusty");

View File

@ -32,10 +32,12 @@ use anyhow::Context;
use anyhow::Result;
use clap::Parser;
use libbpf_rs::skel::OpenSkel as _;
use libbpf_rs::skel::Skel as _;
use libbpf_rs::skel::SkelBuilder as _;
use log::info;
use scx_utils::compat;
use scx_utils::init_libbpf_logging;
use scx_utils::scx_ops_attach;
use scx_utils::scx_ops_load;
use scx_utils::uei_exited;
use scx_utils::uei_report;
use scx_utils::Cpumask;
@ -172,6 +174,10 @@ struct Opts {
#[clap(short = 'p', long, action = clap::ArgAction::SetTrue)]
partial: bool,
/// Exit debug dump buffer length. 0 indicates default.
#[clap(long, default_value = "0")]
exit_dump_len: u32,
/// Enable verbose output including libbpf details. Specify multiple
/// times to increase verbosity.
#[clap(short = 'v', long, action = clap::ArgAction::Count)]
@ -260,8 +266,10 @@ impl<'a> Scheduler<'a> {
for (id, dom) in domains.doms().iter() {
for cpu in dom.mask().into_iter() {
skel.rodata_mut().cpu_dom_id_map[cpu] =
id.clone().try_into().expect("Domain ID could not fit into 32 bits");
skel.rodata_mut().cpu_dom_id_map[cpu] = id
.clone()
.try_into()
.expect("Domain ID could not fit into 32 bits");
}
}
@ -291,6 +299,11 @@ impl<'a> Scheduler<'a> {
}
}
if opts.partial {
skel.struct_ops.rusty_mut().flags |= *compat::SCX_OPS_SWITCH_PARTIAL;
}
skel.struct_ops.rusty_mut().exit_dump_len = opts.exit_dump_len;
skel.rodata_mut().slice_ns = opts.slice_us * 1000;
skel.rodata_mut().load_half_life = (opts.load_half_life * 1000000000.0) as u32;
skel.rodata_mut().kthreads_local = opts.kthreads_local;
@ -302,14 +315,8 @@ impl<'a> Scheduler<'a> {
skel.rodata_mut().debug = opts.verbose as u32;
// Attach.
let mut skel = skel.load().context("Failed to load BPF program")?;
skel.attach().context("Failed to attach BPF program")?;
let struct_ops = Some(
skel.maps_mut()
.rusty()
.attach_struct_ops()
.context("Failed to attach rusty struct ops")?,
);
let mut skel = scx_ops_load!(skel, rusty, uei)?;
let struct_ops = Some(scx_ops_attach!(skel, rusty)?);
info!("Rusty Scheduler Attached");
// Other stuff.
@ -325,7 +332,7 @@ impl<'a> Scheduler<'a> {
balance_load: !opts.no_load_balance,
balanced_kworkers: opts.balanced_kworkers,
top: top,
top,
dom_group: domains.clone(),
proc_reader,
@ -394,7 +401,8 @@ impl<'a> Scheduler<'a> {
let mut maps = self.skel.maps_mut();
let stats_map = maps.stats();
let mut stats: Vec<u64> = Vec::new();
let zero_vec = vec![vec![0u8; stats_map.value_size() as usize]; self.top.nr_cpus_possible()];
let zero_vec =
vec![vec![0u8; stats_map.value_size() as usize]; self.top.nr_cpus_possible()];
for stat in 0..bpf_intf::stat_idx_RUSTY_NR_STATS {
let cpu_stat_vec = stats_map
@ -524,7 +532,7 @@ impl<'a> Scheduler<'a> {
let mut next_tune_at = now + self.tune_interval;
let mut next_sched_at = now + self.sched_interval;
while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel.bss().uei) {
while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei) {
let now = Instant::now();
if now >= next_tune_at {
@ -550,8 +558,8 @@ impl<'a> Scheduler<'a> {
);
}
self.struct_ops.take();
uei_report!(&self.skel.bss().uei)
self.struct_ops.take();
uei_report!(&self.skel, uei)
}
}