mirror of
https://github.com/JakeHillion/scx.git
synced 2024-12-03 22:37:11 +00:00
Merge pull request #161 from sched-ext/scx-user
Introduce scx_rustland_core: a generic layer to implement user-space schedulers in Rust
This commit is contained in:
commit
7278d88632
@ -1 +1,2 @@
|
|||||||
subdir('scx_utils')
|
subdir('scx_utils')
|
||||||
|
subdir('scx_rustland_core')
|
||||||
|
1
rust/scx_rustland_core/.gitignore
vendored
Normal file
1
rust/scx_rustland_core/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
Cargo.lock
|
26
rust/scx_rustland_core/Cargo.toml
Normal file
26
rust/scx_rustland_core/Cargo.toml
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
[package]
|
||||||
|
name = "scx_rustland_core"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
authors = ["Andrea Righi <andrea.righi@canonical.com>"]
|
||||||
|
license = "GPL-2.0-only"
|
||||||
|
repository = "https://github.com/sched-ext/scx"
|
||||||
|
description = "Framework to implement sched_ext schedulers running in user space"
|
||||||
|
|
||||||
|
include = [
|
||||||
|
"src/bpf/intf.h",
|
||||||
|
"src/bpf/main.bpf.c",
|
||||||
|
"src/bpf.rs",
|
||||||
|
]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0"
|
||||||
|
libbpf-rs = "0.22.0"
|
||||||
|
libc = "0.2.137"
|
||||||
|
buddy-alloc = "0.5.1"
|
||||||
|
scx_utils = { path = "../scx_utils", version = "0.6" }
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
tar = "0.4"
|
||||||
|
walkdir = "2.4"
|
||||||
|
scx_utils = { path = "../scx_utils", version = "0.6" }
|
1
rust/scx_rustland_core/LICENSE
Symbolic link
1
rust/scx_rustland_core/LICENSE
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../LICENSE
|
78
rust/scx_rustland_core/README.md
Normal file
78
rust/scx_rustland_core/README.md
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
# Framework to implement sched_ext schedulers running in user-space
|
||||||
|
|
||||||
|
[sched_ext](https://github.com/sched-ext/scx) is a Linux kernel feature
|
||||||
|
which enables implementing kernel thread schedulers in BPF and dynamically
|
||||||
|
loading them.
|
||||||
|
|
||||||
|
This crate provides a generic layer that can be used to implement sched-ext
|
||||||
|
schedulers that run in user-space.
|
||||||
|
|
||||||
|
It provides a generic BPF abstraction that is completely agnostic of the
|
||||||
|
particular scheduling policy implemented in user-space.
|
||||||
|
|
||||||
|
Developers can use such abstraction to implement schedulers using pure Rust
|
||||||
|
code, without having to deal with any internal kernel / BPF internal details.
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
The main BPF interface is provided by the `BpfScheduler` struct. When this
|
||||||
|
object is initialized it will take care of registering and initializing the BPF
|
||||||
|
component.
|
||||||
|
|
||||||
|
The scheduler then can use `BpfScheduler` instance to receive tasks (in the
|
||||||
|
form of `QueuedTask` objects) and dispatch tasks (in the form of DispatchedTask
|
||||||
|
objects), using respectively the methods `dequeue_task()` and `dispatch_task()`.
|
||||||
|
|
||||||
|
Example usage (FIFO scheduler):
|
||||||
|
```
|
||||||
|
struct Scheduler<'a> {
|
||||||
|
bpf: BpfScheduler<'a>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Scheduler<'a> {
|
||||||
|
fn init() -> Result<Self> {
|
||||||
|
let topo = Topology::new().expect("Failed to build host topology");
|
||||||
|
let bpf = BpfScheduler::init(5000, topo.nr_cpus() as i32, false, false, false)?;
|
||||||
|
Ok(Self { bpf })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn schedule(&mut self) {
|
||||||
|
match self.bpf.dequeue_task() {
|
||||||
|
Ok(Some(task)) => {
|
||||||
|
// task.cpu < 0 is used to to notify an exiting task, in this
|
||||||
|
// case we can simply ignore it.
|
||||||
|
if task.cpu >= 0 {
|
||||||
|
let _ = self.bpf.dispatch_task(&DispatchedTask {
|
||||||
|
pid: task.pid,
|
||||||
|
cpu: task.cpu,
|
||||||
|
cpumask_cnt: task.cpumask_cnt,
|
||||||
|
payload: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
// Notify the BPF component that all tasks have been dispatched.
|
||||||
|
self.bpf.update_tasks(Some(0), Some(0))?
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Moreover, a CPU ownership map (that keeps track of which PID runs on which CPU)
|
||||||
|
can be accessed using the method `get_cpu_pid()`. This also allows to keep
|
||||||
|
track of the idle and busy CPUs, with the corresponding PIDs associated to
|
||||||
|
them.
|
||||||
|
|
||||||
|
BPF counters and statistics can be accessed using the methods `nr_*_mut()`, in
|
||||||
|
particular `nr_queued_mut()` and `nr_scheduled_mut()` can be updated to notify
|
||||||
|
the BPF component if the user-space scheduler has still some pending work to do
|
||||||
|
or not.
|
||||||
|
|
||||||
|
Lastly, the methods `exited()` and `shutdown_and_report()` can be used
|
||||||
|
respectively to test whether the BPF component exited, and to shutdown and
|
||||||
|
report the exit message.
|
1
rust/scx_rustland_core/bindings.h
Normal file
1
rust/scx_rustland_core/bindings.h
Normal file
@ -0,0 +1 @@
|
|||||||
|
#include "bpf_h/vmlinux/vmlinux.h"
|
1
rust/scx_rustland_core/bpf_h
Symbolic link
1
rust/scx_rustland_core/bpf_h
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../scheds/include
|
10
rust/scx_rustland_core/build.rs
Normal file
10
rust/scx_rustland_core/build.rs
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
//
|
||||||
|
// This software may be used and distributed according to the terms of the
|
||||||
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
|
use scx_utils::Builder;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
Builder::new().build()
|
||||||
|
}
|
7
rust/scx_rustland_core/meson.build
Normal file
7
rust/scx_rustland_core/meson.build
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
custom_target('scx_rustland_core',
|
||||||
|
output: '@PLAINNAME@.__PHONY__',
|
||||||
|
input: 'Cargo.toml',
|
||||||
|
command: [cargo, 'build', '--manifest-path=@INPUT@', '--target-dir=@OUTDIR@',
|
||||||
|
cargo_build_args],
|
||||||
|
env: cargo_env,
|
||||||
|
build_by_default: true)
|
@ -15,7 +15,7 @@ const HEAP_SIZE: usize = 64 * 1024 * 1024; // 64M
|
|||||||
const LEAF_SIZE: usize = 64;
|
const LEAF_SIZE: usize = 64;
|
||||||
|
|
||||||
#[repr(align(4096))]
|
#[repr(align(4096))]
|
||||||
pub struct AlignedHeap<const N: usize>([u8; N]);
|
struct AlignedHeap<const N: usize>([u8; N]);
|
||||||
|
|
||||||
// Statically pre-allocated memory arena.
|
// Statically pre-allocated memory arena.
|
||||||
static mut FAST_HEAP: AlignedHeap<FAST_HEAP_SIZE> = AlignedHeap([0u8; FAST_HEAP_SIZE]);
|
static mut FAST_HEAP: AlignedHeap<FAST_HEAP_SIZE> = AlignedHeap([0u8; FAST_HEAP_SIZE]);
|
||||||
@ -26,25 +26,25 @@ static mut HEAP: AlignedHeap<HEAP_SIZE> = AlignedHeap([0u8; HEAP_SIZE]);
|
|||||||
// To prevent potential deadlock conditions under heavy loads, any scheduler that delegates
|
// To prevent potential deadlock conditions under heavy loads, any scheduler that delegates
|
||||||
// scheduling decisions to user-space should avoid triggering page faults.
|
// scheduling decisions to user-space should avoid triggering page faults.
|
||||||
//
|
//
|
||||||
// To address this issue, replace the global allocator with a custom one (RustLandAllocator),
|
// To address this issue, replace the global allocator with a custom one (UserAllocator),
|
||||||
// designed to operate on a pre-allocated buffer. This, coupled with the memory locking achieved
|
// designed to operate on a pre-allocated buffer. This, coupled with the memory locking achieved
|
||||||
// through mlockall(), prevents page faults from occurring during the execution of the user-space
|
// through mlockall(), prevents page faults from occurring during the execution of the user-space
|
||||||
// scheduler.
|
// scheduler.
|
||||||
#[cfg_attr(not(test), global_allocator)]
|
#[cfg_attr(not(test), global_allocator)]
|
||||||
pub static ALLOCATOR: RustLandAllocator = unsafe {
|
pub static ALLOCATOR: UserAllocator = unsafe {
|
||||||
let fast_param = FastAllocParam::new(FAST_HEAP.0.as_ptr(), FAST_HEAP_SIZE);
|
let fast_param = FastAllocParam::new(FAST_HEAP.0.as_ptr(), FAST_HEAP_SIZE);
|
||||||
let buddy_param = BuddyAllocParam::new(HEAP.0.as_ptr(), HEAP_SIZE, LEAF_SIZE);
|
let buddy_param = BuddyAllocParam::new(HEAP.0.as_ptr(), HEAP_SIZE, LEAF_SIZE);
|
||||||
RustLandAllocator {
|
UserAllocator {
|
||||||
arena: NonThreadsafeAlloc::new(fast_param, buddy_param),
|
arena: NonThreadsafeAlloc::new(fast_param, buddy_param),
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Main allocator class.
|
// Main allocator class.
|
||||||
pub struct RustLandAllocator {
|
pub struct UserAllocator {
|
||||||
pub arena: NonThreadsafeAlloc,
|
arena: NonThreadsafeAlloc,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RustLandAllocator {
|
impl UserAllocator {
|
||||||
pub fn lock_memory(&self) {
|
pub fn lock_memory(&self) {
|
||||||
unsafe {
|
unsafe {
|
||||||
VM.save();
|
VM.save();
|
||||||
@ -75,7 +75,7 @@ impl RustLandAllocator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Override global allocator methods.
|
// Override global allocator methods.
|
||||||
unsafe impl GlobalAlloc for RustLandAllocator {
|
unsafe impl GlobalAlloc for UserAllocator {
|
||||||
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
||||||
self.arena.alloc(layout)
|
self.arena.alloc(layout)
|
||||||
}
|
}
|
6
rust/scx_rustland_core/src/bindings.rs
Normal file
6
rust/scx_rustland_core/src/bindings.rs
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#![allow(non_upper_case_globals)]
|
||||||
|
#![allow(non_camel_case_types)]
|
||||||
|
#![allow(non_snake_case)]
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
@ -15,13 +15,12 @@ use libbpf_rs::skel::SkelBuilder as _;
|
|||||||
|
|
||||||
use libc::{sched_param, sched_setscheduler};
|
use libc::{sched_param, sched_setscheduler};
|
||||||
|
|
||||||
mod alloc;
|
|
||||||
use alloc::*;
|
|
||||||
|
|
||||||
use scx_utils::init_libbpf_logging;
|
use scx_utils::init_libbpf_logging;
|
||||||
use scx_utils::uei_exited;
|
use scx_utils::uei_exited;
|
||||||
use scx_utils::uei_report;
|
use scx_utils::uei_report;
|
||||||
|
|
||||||
|
use scx_rustland_core::ALLOCATOR;
|
||||||
|
|
||||||
// Defined in UAPI
|
// Defined in UAPI
|
||||||
const SCHED_EXT: i32 = 7;
|
const SCHED_EXT: i32 = 7;
|
||||||
|
|
||||||
@ -31,7 +30,7 @@ const SCHED_EXT: i32 = 7;
|
|||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub const NO_CPU: i32 = -1;
|
pub const NO_CPU: i32 = -1;
|
||||||
|
|
||||||
/// scx_rustland: provide high-level abstractions to interact with the BPF component.
|
/// High-level Rust abstraction to interact with a generic sched-ext BPF component.
|
||||||
///
|
///
|
||||||
/// Overview
|
/// Overview
|
||||||
/// ========
|
/// ========
|
||||||
@ -40,7 +39,7 @@ pub const NO_CPU: i32 = -1;
|
|||||||
/// initialized it will take care of registering and initializing the BPF component.
|
/// initialized it will take care of registering and initializing the BPF component.
|
||||||
///
|
///
|
||||||
/// The scheduler then can use BpfScheduler() instance to receive tasks (in the form of QueuedTask
|
/// The scheduler then can use BpfScheduler() instance to receive tasks (in the form of QueuedTask
|
||||||
/// object) and dispatch tasks (in the form of DispatchedTask objects), using respectively the
|
/// objects) and dispatch tasks (in the form of DispatchedTask objects), using respectively the
|
||||||
/// methods dequeue_task() and dispatch_task().
|
/// methods dequeue_task() and dispatch_task().
|
||||||
///
|
///
|
||||||
/// The CPU ownership map can be accessed using the method get_cpu_pid(), this also allows to keep
|
/// The CPU ownership map can be accessed using the method get_cpu_pid(), this also allows to keep
|
||||||
@ -51,85 +50,8 @@ pub const NO_CPU: i32 = -1;
|
|||||||
/// user-space scheduler has some pending work to do or not.
|
/// user-space scheduler has some pending work to do or not.
|
||||||
///
|
///
|
||||||
/// Finally the methods exited() and shutdown_and_report() can be used respectively to test
|
/// Finally the methods exited() and shutdown_and_report() can be used respectively to test
|
||||||
|
/// whether the BPF component exited, and to shutdown and report the exit message.
|
||||||
/// whether the BPF component exited, and to shutdown and report exit message.
|
/// whether the BPF component exited, and to shutdown and report exit message.
|
||||||
///
|
|
||||||
/// Example
|
|
||||||
/// =======
|
|
||||||
///
|
|
||||||
/// Following you can find bare minimum template that can be used to implement a simple FIFO
|
|
||||||
/// scheduler using the BPF abstraction:
|
|
||||||
///
|
|
||||||
/// mod bpf_skel;
|
|
||||||
/// pub use bpf_skel::*;
|
|
||||||
/// mod bpf;
|
|
||||||
/// pub mod bpf_intf;
|
|
||||||
/// use bpf::*;
|
|
||||||
///
|
|
||||||
/// use std::thread;
|
|
||||||
///
|
|
||||||
/// use std::sync::atomic::AtomicBool;
|
|
||||||
/// use std::sync::atomic::Ordering;
|
|
||||||
/// use std::sync::Arc;
|
|
||||||
///
|
|
||||||
/// use anyhow::Result;
|
|
||||||
///
|
|
||||||
/// struct Scheduler<'a> {
|
|
||||||
/// bpf: BpfScheduler<'a>,
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
/// impl<'a> Scheduler<'a> {
|
|
||||||
/// fn init() -> Result<Self> {
|
|
||||||
/// let bpf = BpfScheduler::init(20000, false, false)?;
|
|
||||||
/// Ok(Self { bpf })
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
/// fn dispatch_tasks(&mut self) {
|
|
||||||
/// loop {
|
|
||||||
/// match self.bpf.dequeue_task() {
|
|
||||||
/// Ok(Some(task)) => {
|
|
||||||
/// if task.cpu >= 0 {
|
|
||||||
/// let _ = self.bpf.dispatch_task(
|
|
||||||
/// &DispatchedTask {
|
|
||||||
/// pid: task.pid,
|
|
||||||
/// cpu: task.cpu,
|
|
||||||
/// payload: 0,
|
|
||||||
/// }
|
|
||||||
/// );
|
|
||||||
/// }
|
|
||||||
/// }
|
|
||||||
/// Ok(None) => {
|
|
||||||
/// *self.bpf.nr_queued_mut() = 0;
|
|
||||||
/// *self.bpf.nr_scheduled_mut() = 0;
|
|
||||||
/// break;
|
|
||||||
/// }
|
|
||||||
/// Err(_) => {
|
|
||||||
/// break;
|
|
||||||
/// }
|
|
||||||
/// }
|
|
||||||
/// }
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
/// fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<()> {
|
|
||||||
/// while !shutdown.load(Ordering::Relaxed) && !self.bpf.exited() {
|
|
||||||
/// self.dispatch_tasks();
|
|
||||||
/// thread::yield_now();
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
/// Ok(())
|
|
||||||
/// }
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
/// fn main() -> Result<()> {
|
|
||||||
/// let mut sched = Scheduler::init()?;
|
|
||||||
/// let shutdown = Arc::new(AtomicBool::new(false));
|
|
||||||
/// let shutdown_clone = shutdown.clone();
|
|
||||||
/// ctrlc::set_handler(move || {
|
|
||||||
/// shutdown_clone.store(true, Ordering::Relaxed);
|
|
||||||
/// })?;
|
|
||||||
///
|
|
||||||
/// sched.run(shutdown)
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
|
|
||||||
// Task queued for scheduling from the BPF component (see bpf_intf::queued_task_ctx).
|
// Task queued for scheduling from the BPF component (see bpf_intf::queued_task_ctx).
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -241,16 +163,31 @@ impl<'cb> BpfScheduler<'cb> {
|
|||||||
ALLOCATOR.lock_memory();
|
ALLOCATOR.lock_memory();
|
||||||
|
|
||||||
// Copy one item from the ring buffer.
|
// Copy one item from the ring buffer.
|
||||||
|
//
|
||||||
|
// # Safety
|
||||||
|
//
|
||||||
|
// Each invocation of the callback will trigger the copy of exactly one QueuedTask item to
|
||||||
|
// BUF. The caller must be synchronize to ensure that multiple invocations of the callback
|
||||||
|
// are not happening at the same time, but this is implicitly guaranteed by the fact that
|
||||||
|
// the caller is a single-thread process (for now).
|
||||||
|
//
|
||||||
|
// Use of a `str` whose contents are not valid UTF-8 is undefined behavior.
|
||||||
fn callback(data: &[u8]) -> i32 {
|
fn callback(data: &[u8]) -> i32 {
|
||||||
unsafe {
|
unsafe {
|
||||||
|
// SAFETY: copying from the BPF ring buffer to BUF is safe, since the size of BUF
|
||||||
|
// is exactly the size of QueuedTask and the callback operates in chunks of
|
||||||
|
// QueuedTask items. It also copies exactly one QueuedTask at a time, this is
|
||||||
|
// guaranteed by the error code returned by this callback (see below). From a
|
||||||
|
// thread-safety perspective this is also correct, assuming the caller is a
|
||||||
|
// single-thread process (as it is for now).
|
||||||
BUF.0.copy_from_slice(data);
|
BUF.0.copy_from_slice(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return an unsupported error to stop early and consume only one item.
|
// Return an unsupported error to stop early and consume only one item.
|
||||||
//
|
//
|
||||||
// NOTE: this is quite a hack. I wish libbpf would honor stopping after the first item
|
// NOTE: this is quite a hack. I wish libbpf would honor stopping after the first item
|
||||||
// is consumed, upon returnin a non-zero positive value here, but it doesn't seem to be
|
// is consumed, upon returning a non-zero positive value here, but it doesn't seem to
|
||||||
// the case:
|
// be the case:
|
||||||
//
|
//
|
||||||
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/ringbuf.c?h=v6.8-rc5#n260
|
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/ringbuf.c?h=v6.8-rc5#n260
|
||||||
//
|
//
|
||||||
@ -305,6 +242,23 @@ impl<'cb> BpfScheduler<'cb> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the amount of tasks that have been queued to the user-space scheduler and dispatched.
|
||||||
|
//
|
||||||
|
// This method is used to notify the BPF component if the user-space scheduler has still some
|
||||||
|
// pending actions to complete (based on the counter of queued and scheduled tasks).
|
||||||
|
//
|
||||||
|
// NOTE: do not set allow(dead_code) for this method, any scheduler must use this method at
|
||||||
|
// some point, otherwise the BPF component will keep waking-up the user-space scheduler in a
|
||||||
|
// busy loop, causing unnecessary high CPU consumption.
|
||||||
|
pub fn update_tasks(&mut self, nr_queued: Option<u64>, nr_scheduled: Option<u64>) {
|
||||||
|
if let Some(queued) = nr_queued {
|
||||||
|
self.skel.bss_mut().nr_queued = queued;
|
||||||
|
}
|
||||||
|
if let Some(scheduled) = nr_scheduled {
|
||||||
|
self.skel.bss_mut().nr_scheduled = scheduled;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Override the default scheduler time slice (in us).
|
// Override the default scheduler time slice (in us).
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn set_effective_slice_us(&mut self, slice_us: u64) {
|
pub fn set_effective_slice_us(&mut self, slice_us: u64) {
|
||||||
@ -324,11 +278,13 @@ impl<'cb> BpfScheduler<'cb> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Counter of queued tasks.
|
// Counter of queued tasks.
|
||||||
|
#[allow(dead_code)]
|
||||||
pub fn nr_queued_mut(&mut self) -> &mut u64 {
|
pub fn nr_queued_mut(&mut self) -> &mut u64 {
|
||||||
&mut self.skel.bss_mut().nr_queued
|
&mut self.skel.bss_mut().nr_queued
|
||||||
}
|
}
|
||||||
|
|
||||||
// Counter of scheduled tasks.
|
// Counter of scheduled tasks.
|
||||||
|
#[allow(dead_code)]
|
||||||
pub fn nr_scheduled_mut(&mut self) -> &mut u64 {
|
pub fn nr_scheduled_mut(&mut self) -> &mut u64 {
|
||||||
&mut self.skel.bss_mut().nr_scheduled
|
&mut self.skel.bss_mut().nr_scheduled
|
||||||
}
|
}
|
@ -1,19 +1,13 @@
|
|||||||
/* Copyright (c) Andrea Righi <andrea.righi@canonical.com> */
|
/* Copyright (c) Andrea Righi <andrea.righi@canonical.com> */
|
||||||
/*
|
/*
|
||||||
* scx_rustland: simple user-space scheduler written in Rust
|
* scx_rustland_core: BPF backend for schedulers running in user-space.
|
||||||
*
|
*
|
||||||
* The main goal of this scheduler is be an "easy to read" template that can be
|
* This BPF backend implements the low level sched-ext functionalities for a
|
||||||
* used to quickly test more complex scheduling policies. For this reason this
|
* user-space counterpart, that implements the actual scheduling policy.
|
||||||
* scheduler is mostly focused on simplicity and code readability.
|
|
||||||
*
|
*
|
||||||
* The scheduler is made of a BPF component (dispatcher) that implements the
|
* The BPF part collects total cputime and weight from the tasks that need to
|
||||||
* low level sched-ext functionalities and a user-space counterpart
|
* run, then it sends all details to the user-space scheduler that decides the
|
||||||
* (scheduler), written in Rust, that implements the actual scheduling policy.
|
* best order of execution of the tasks (based on the collected metrics).
|
||||||
*
|
|
||||||
* The BPF dispatcher collects total cputime and weight from the tasks that
|
|
||||||
* need to run, then it sends all details to the user-space scheduler that
|
|
||||||
* decides the best order of execution of the tasks (based on the collected
|
|
||||||
* metrics).
|
|
||||||
*
|
*
|
||||||
* The user-space scheduler then returns to the BPF component the list of tasks
|
* The user-space scheduler then returns to the BPF component the list of tasks
|
||||||
* to be dispatched in the proper order.
|
* to be dispatched in the proper order.
|
9
rust/scx_rustland_core/src/bpf_intf.rs
Normal file
9
rust/scx_rustland_core/src/bpf_intf.rs
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
// This software may be used and distributed according to the terms of the
|
||||||
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
|
#![allow(non_upper_case_globals)]
|
||||||
|
#![allow(non_camel_case_types)]
|
||||||
|
#![allow(non_snake_case)]
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
include!(concat!(env!("OUT_DIR"), "/bpf_intf.rs"));
|
4
rust/scx_rustland_core/src/bpf_skel.rs
Normal file
4
rust/scx_rustland_core/src/bpf_skel.rs
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
// This software may be used and distributed according to the terms of the
|
||||||
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
|
include!(concat!(env!("OUT_DIR"), "/bpf_skel.rs"));
|
7
rust/scx_rustland_core/src/lib.rs
Normal file
7
rust/scx_rustland_core/src/lib.rs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
mod bindings;
|
||||||
|
|
||||||
|
mod alloc;
|
||||||
|
pub use alloc::ALLOCATOR;
|
||||||
|
|
||||||
|
mod rustland_builder;
|
||||||
|
pub use rustland_builder::RustLandBuilder;
|
50
rust/scx_rustland_core/src/rustland_builder.rs
Normal file
50
rust/scx_rustland_core/src/rustland_builder.rs
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
// Copyright (c) Andrea Righi <andrea.righi@canonical.com>
|
||||||
|
|
||||||
|
// This software may be used and distributed according to the terms of the
|
||||||
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use scx_utils::BpfBuilder;
|
||||||
|
|
||||||
|
pub struct RustLandBuilder {
|
||||||
|
inner_builder: BpfBuilder,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RustLandBuilder {
|
||||||
|
pub fn new() -> Result<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
inner_builder: BpfBuilder::new()?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_file(&self, file_name: &str, content: &[u8]) {
|
||||||
|
let path = Path::new(file_name);
|
||||||
|
let mut file = File::create(&path).expect("Unable to create file");
|
||||||
|
file.write_all(content).expect("Unable to write to file");
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(&mut self) -> Result<()> {
|
||||||
|
// Embed the BPF source files.
|
||||||
|
let intf = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/bpf/intf.h"));
|
||||||
|
let skel = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/bpf/main.bpf.c"));
|
||||||
|
let bpf = include_bytes!(concat!(env!("CARGO_MANIFEST_DIR"), "/src/bpf.rs"));
|
||||||
|
|
||||||
|
// Generate BPF backend code (C).
|
||||||
|
self.create_file("intf.h", intf);
|
||||||
|
self.create_file("main.bpf.c", skel);
|
||||||
|
|
||||||
|
self.inner_builder.enable_intf("intf.h", "bpf_intf.rs");
|
||||||
|
self.inner_builder.enable_skel("main.bpf.c", "bpf");
|
||||||
|
|
||||||
|
// Generate user-space BPF connector code (Rust).
|
||||||
|
self.create_file("src/bpf.rs", bpf);
|
||||||
|
|
||||||
|
// Build the scheduler.
|
||||||
|
self.inner_builder.build()
|
||||||
|
}
|
||||||
|
}
|
@ -18,10 +18,12 @@ hex = "0.4.3"
|
|||||||
lazy_static = "1.4"
|
lazy_static = "1.4"
|
||||||
libbpf-cargo = "0.22"
|
libbpf-cargo = "0.22"
|
||||||
libbpf-rs = "0.22.0"
|
libbpf-rs = "0.22.0"
|
||||||
|
buddy-alloc = "0.5.1"
|
||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
regex = "1.10"
|
regex = "1.10"
|
||||||
sscanf = "0.4"
|
sscanf = "0.4"
|
||||||
tar = "0.4"
|
tar = "0.4"
|
||||||
|
walkdir = "2.4"
|
||||||
version-compare = "0.1"
|
version-compare = "0.1"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
|
@ -3,51 +3,8 @@
|
|||||||
// This software may be used and distributed according to the terms of the
|
// This software may be used and distributed according to the terms of the
|
||||||
// GNU General Public License version 2.
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
use std::env;
|
include!("src/builder.rs");
|
||||||
use std::fs::File;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
const BPF_H: &str = "bpf_h";
|
|
||||||
|
|
||||||
fn gen_bpf_h() {
|
|
||||||
let file =
|
|
||||||
File::create(PathBuf::from(env::var("OUT_DIR").unwrap()).join(format!("{}.tar", BPF_H)))
|
|
||||||
.unwrap();
|
|
||||||
let mut ar = tar::Builder::new(file);
|
|
||||||
|
|
||||||
ar.follow_symlinks(false);
|
|
||||||
ar.append_dir_all(".", BPF_H).unwrap();
|
|
||||||
ar.finish().unwrap();
|
|
||||||
|
|
||||||
for ent in walkdir::WalkDir::new(BPF_H) {
|
|
||||||
let ent = ent.unwrap();
|
|
||||||
if !ent.file_type().is_dir() {
|
|
||||||
println!("cargo:rerun-if-changed={}", ent.path().to_string_lossy());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn gen_bindings() {
|
|
||||||
// FIXME - bindgen's API changed between 0.68 and 0.69 so that
|
|
||||||
// `bindgen::CargoCallbacks::new()` should be used instead of
|
|
||||||
// `bindgen::CargoCallbacks`. Unfortunately, as of Dec 2023, fedora is
|
|
||||||
// shipping 0.68. To accommodate fedora, allow both 0.68 and 0.69 of
|
|
||||||
// bindgen and suppress deprecation warning. Remove the following once
|
|
||||||
// fedora can be updated to bindgen >= 0.69.
|
|
||||||
#[allow(deprecated)]
|
|
||||||
let bindings = bindgen::Builder::default()
|
|
||||||
.header("bindings.h")
|
|
||||||
.allowlist_type("scx_exit_kind")
|
|
||||||
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
|
|
||||||
.generate()
|
|
||||||
.expect("Unable to generate bindings");
|
|
||||||
|
|
||||||
bindings
|
|
||||||
.write_to_file(PathBuf::from(env::var("OUT_DIR").unwrap()).join("bindings.rs"))
|
|
||||||
.expect("Couldn't write bindings");
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
gen_bpf_h();
|
Builder::new().build()
|
||||||
gen_bindings();
|
|
||||||
}
|
}
|
||||||
|
61
rust/scx_utils/src/builder.rs
Normal file
61
rust/scx_utils/src/builder.rs
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
//
|
||||||
|
// This software may be used and distributed according to the terms of the
|
||||||
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
|
use std::env;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
const BPF_H: &str = "bpf_h";
|
||||||
|
|
||||||
|
pub struct Builder;
|
||||||
|
|
||||||
|
impl Builder {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Builder
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_bpf_h(&self) {
|
||||||
|
let out_dir = env::var("OUT_DIR").unwrap();
|
||||||
|
let file = File::create(PathBuf::from(&out_dir).join(format!("{}.tar", BPF_H))).unwrap();
|
||||||
|
let mut ar = tar::Builder::new(file);
|
||||||
|
|
||||||
|
ar.follow_symlinks(false);
|
||||||
|
ar.append_dir_all(".", BPF_H).unwrap();
|
||||||
|
ar.finish().unwrap();
|
||||||
|
|
||||||
|
for ent in walkdir::WalkDir::new(BPF_H) {
|
||||||
|
let ent = ent.unwrap();
|
||||||
|
if !ent.file_type().is_dir() {
|
||||||
|
println!("cargo:rerun-if-changed={}", ent.path().to_string_lossy());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_bindings(&self) {
|
||||||
|
let out_dir = env::var("OUT_DIR").unwrap();
|
||||||
|
// FIXME - bindgen's API changed between 0.68 and 0.69 so that
|
||||||
|
// `bindgen::CargoCallbacks::new()` should be used instead of
|
||||||
|
// `bindgen::CargoCallbacks`. Unfortunately, as of Dec 2023, fedora is
|
||||||
|
// shipping 0.68. To accommodate fedora, allow both 0.68 and 0.69 of
|
||||||
|
// bindgen and suppress deprecation warning. Remove the following once
|
||||||
|
// fedora can be updated to bindgen >= 0.69.
|
||||||
|
#[allow(deprecated)]
|
||||||
|
let bindings = bindgen::Builder::default()
|
||||||
|
.header("bindings.h")
|
||||||
|
.allowlist_type("scx_exit_kind")
|
||||||
|
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
|
||||||
|
.generate()
|
||||||
|
.expect("Unable to generate bindings");
|
||||||
|
|
||||||
|
bindings
|
||||||
|
.write_to_file(PathBuf::from(&out_dir).join("bindings.rs"))
|
||||||
|
.expect("Couldn't write bindings");
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build(self) {
|
||||||
|
self.gen_bpf_h();
|
||||||
|
self.gen_bindings();
|
||||||
|
}
|
||||||
|
}
|
@ -35,6 +35,9 @@ mod bindings;
|
|||||||
mod bpf_builder;
|
mod bpf_builder;
|
||||||
pub use bpf_builder::BpfBuilder;
|
pub use bpf_builder::BpfBuilder;
|
||||||
|
|
||||||
|
mod builder;
|
||||||
|
pub use builder::Builder;
|
||||||
|
|
||||||
pub mod ravg;
|
pub mod ravg;
|
||||||
|
|
||||||
mod libbpf_logger;
|
mod libbpf_logger;
|
||||||
|
@ -15,3 +15,4 @@ main.rs or \*.bpf.c files.
|
|||||||
- [scx_layered](scx_layered/README.md)
|
- [scx_layered](scx_layered/README.md)
|
||||||
- [scx_rusty](scx_rusty/README.md)
|
- [scx_rusty](scx_rusty/README.md)
|
||||||
- [scx_rustland](scx_rustland/README.md)
|
- [scx_rustland](scx_rustland/README.md)
|
||||||
|
- [scx_rlfifo](scx_rlfifo/README.md)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
subdir('scx_layered')
|
subdir('scx_layered')
|
||||||
subdir('scx_rusty')
|
subdir('scx_rusty')
|
||||||
subdir('scx_rustland')
|
subdir('scx_rustland')
|
||||||
|
subdir('scx_rlfifo')
|
||||||
|
6
scheds/rust/scx_rlfifo/.gitignore
vendored
Normal file
6
scheds/rust/scx_rlfifo/.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
src/bpf/.output
|
||||||
|
intf.h
|
||||||
|
main.bpf.c
|
||||||
|
bpf.rs
|
||||||
|
Cargo.lock
|
||||||
|
target
|
22
scheds/rust/scx_rlfifo/Cargo.toml
Normal file
22
scheds/rust/scx_rlfifo/Cargo.toml
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
[package]
|
||||||
|
name = "scx_rlfifo"
|
||||||
|
version = "0.0.1"
|
||||||
|
authors = ["Andrea Righi <andrea.righi@canonical.com>", "Canonical"]
|
||||||
|
edition = "2021"
|
||||||
|
description = "A simple FIFO scheduler in Rust that runs in user-space"
|
||||||
|
license = "GPL-2.0-only"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.65"
|
||||||
|
ctrlc = { version = "3.1", features = ["termination"] }
|
||||||
|
libbpf-rs = "0.22.0"
|
||||||
|
libc = "0.2.137"
|
||||||
|
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
|
||||||
|
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" }
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
|
||||||
|
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
enable_backtrace = []
|
1
scheds/rust/scx_rlfifo/LICENSE
Symbolic link
1
scheds/rust/scx_rlfifo/LICENSE
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../../LICENSE
|
20
scheds/rust/scx_rlfifo/README.md
Normal file
20
scheds/rust/scx_rlfifo/README.md
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# scx_rlfifo
|
||||||
|
|
||||||
|
This is a single user-defined scheduler used within [sched_ext](https://github.com/sched-ext/scx/tree/main), which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. [Read more about sched_ext](https://github.com/sched-ext/scx/tree/main).
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
scx_rlfifo is a simple FIFO scheduler runs in user-space, based on the
|
||||||
|
scx_rustland_core framework.
|
||||||
|
|
||||||
|
## Typical Use Case
|
||||||
|
|
||||||
|
This scheduler is provided as a simple template that can be used as a baseline
|
||||||
|
to test more complex scheduling policies.
|
||||||
|
|
||||||
|
## Production Ready?
|
||||||
|
|
||||||
|
Definitely not. Using this scheduler in a production environment is not
|
||||||
|
recommended, unless there are specific requirements that necessitate a basic
|
||||||
|
FIFO scheduling approach. Even then, it's still recommended to use the kernel's
|
||||||
|
SCHED_FIFO real-time class.
|
9
scheds/rust/scx_rlfifo/build.rs
Normal file
9
scheds/rust/scx_rlfifo/build.rs
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
// This software may be used and distributed according to the terms of the
|
||||||
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
scx_rustland_core::RustLandBuilder::new()
|
||||||
|
.unwrap()
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
}
|
7
scheds/rust/scx_rlfifo/meson.build
Normal file
7
scheds/rust/scx_rlfifo/meson.build
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
custom_target('scx_rlfifo',
|
||||||
|
output: '@PLAINNAME@.__PHONY__',
|
||||||
|
input: 'Cargo.toml',
|
||||||
|
command: [cargo, 'build', '--manifest-path=@INPUT@', '--target-dir=@OUTDIR@',
|
||||||
|
cargo_build_args],
|
||||||
|
env: cargo_env,
|
||||||
|
build_by_default: true)
|
8
scheds/rust/scx_rlfifo/rustfmt.toml
Normal file
8
scheds/rust/scx_rlfifo/rustfmt.toml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Get help on options with `rustfmt --help=config`
|
||||||
|
# Please keep these in alphabetical order.
|
||||||
|
edition = "2021"
|
||||||
|
group_imports = "StdExternalCrate"
|
||||||
|
imports_granularity = "Item"
|
||||||
|
merge_derives = false
|
||||||
|
use_field_init_shorthand = true
|
||||||
|
version = "Two"
|
9
scheds/rust/scx_rlfifo/src/bpf_intf.rs
Normal file
9
scheds/rust/scx_rlfifo/src/bpf_intf.rs
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
// This software may be used and distributed according to the terms of the
|
||||||
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
|
#![allow(non_upper_case_globals)]
|
||||||
|
#![allow(non_camel_case_types)]
|
||||||
|
#![allow(non_snake_case)]
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
include!(concat!(env!("OUT_DIR"), "/bpf_intf.rs"));
|
4
scheds/rust/scx_rlfifo/src/bpf_skel.rs
Normal file
4
scheds/rust/scx_rlfifo/src/bpf_skel.rs
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
// This software may be used and distributed according to the terms of the
|
||||||
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
|
include!(concat!(env!("OUT_DIR"), "/bpf_skel.rs"));
|
117
scheds/rust/scx_rlfifo/src/main.rs
Normal file
117
scheds/rust/scx_rlfifo/src/main.rs
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
// Copyright (c) Andrea Righi <andrea.righi@canonical.com>
|
||||||
|
|
||||||
|
// This software may be used and distributed according to the terms of the
|
||||||
|
// GNU General Public License version 2.
|
||||||
|
mod bpf_skel;
|
||||||
|
pub use bpf_skel::*;
|
||||||
|
pub mod bpf_intf;
|
||||||
|
|
||||||
|
mod bpf;
|
||||||
|
use bpf::*;
|
||||||
|
|
||||||
|
use scx_utils::Topology;
|
||||||
|
|
||||||
|
use std::sync::atomic::AtomicBool;
|
||||||
|
use std::sync::atomic::Ordering;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use std::time::{Duration, SystemTime};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
|
||||||
|
struct Scheduler<'a> {
|
||||||
|
bpf: BpfScheduler<'a>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Scheduler<'a> {
|
||||||
|
fn init() -> Result<Self> {
|
||||||
|
let topo = Topology::new().expect("Failed to build host topology");
|
||||||
|
let bpf = BpfScheduler::init(5000, topo.nr_cpus() as i32, false, false, false)?;
|
||||||
|
Ok(Self { bpf })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn now() -> u64 {
|
||||||
|
SystemTime::now()
|
||||||
|
.duration_since(SystemTime::UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_secs()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dispatch_tasks(&mut self) {
|
||||||
|
loop {
|
||||||
|
// Get queued taks and dispatch them in order (FIFO).
|
||||||
|
match self.bpf.dequeue_task() {
|
||||||
|
Ok(Some(task)) => {
|
||||||
|
// task.cpu < 0 is used to to notify an exiting task, in this
|
||||||
|
// case we can simply ignore the task.
|
||||||
|
if task.cpu >= 0 {
|
||||||
|
let _ = self.bpf.dispatch_task(&DispatchedTask {
|
||||||
|
pid: task.pid,
|
||||||
|
cpu: task.cpu,
|
||||||
|
cpumask_cnt: task.cpumask_cnt,
|
||||||
|
payload: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Give the task a chance to run and prevent overflowing the dispatch queue.
|
||||||
|
std::thread::yield_now();
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
// Notify the BPF component that all tasks have been scheduled and dispatched.
|
||||||
|
self.bpf.update_tasks(Some(0), Some(0));
|
||||||
|
|
||||||
|
// All queued tasks have been dipatched, add a short sleep to reduce
|
||||||
|
// scheduler's CPU consuption.
|
||||||
|
std::thread::sleep(Duration::from_millis(1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_stats(&mut self) {
|
||||||
|
let nr_user_dispatches = *self.bpf.nr_user_dispatches_mut();
|
||||||
|
let nr_kernel_dispatches = *self.bpf.nr_kernel_dispatches_mut();
|
||||||
|
let nr_cancel_dispatches = *self.bpf.nr_cancel_dispatches_mut();
|
||||||
|
let nr_bounce_dispatches = *self.bpf.nr_bounce_dispatches_mut();
|
||||||
|
let nr_failed_dispatches = *self.bpf.nr_failed_dispatches_mut();
|
||||||
|
let nr_sched_congested = *self.bpf.nr_sched_congested_mut();
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"user={} kernel={} cancel={} bounce={} fail={} cong={}",
|
||||||
|
nr_user_dispatches, nr_kernel_dispatches,
|
||||||
|
nr_cancel_dispatches, nr_bounce_dispatches,
|
||||||
|
nr_failed_dispatches, nr_sched_congested,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<()> {
|
||||||
|
let mut prev_ts = Self::now();
|
||||||
|
|
||||||
|
while !shutdown.load(Ordering::Relaxed) && !self.bpf.exited() {
|
||||||
|
self.dispatch_tasks();
|
||||||
|
|
||||||
|
let curr_ts = Self::now();
|
||||||
|
if curr_ts > prev_ts {
|
||||||
|
self.print_stats();
|
||||||
|
prev_ts = curr_ts;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.bpf.shutdown_and_report()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
let mut sched = Scheduler::init()?;
|
||||||
|
let shutdown = Arc::new(AtomicBool::new(false));
|
||||||
|
let shutdown_clone = shutdown.clone();
|
||||||
|
|
||||||
|
ctrlc::set_handler(move || {
|
||||||
|
shutdown_clone.store(true, Ordering::Relaxed);
|
||||||
|
})?;
|
||||||
|
|
||||||
|
sched.run(shutdown)
|
||||||
|
}
|
3
scheds/rust/scx_rustland/.gitignore
vendored
3
scheds/rust/scx_rustland/.gitignore
vendored
@ -1,3 +1,6 @@
|
|||||||
src/bpf/.output
|
src/bpf/.output
|
||||||
|
intf.h
|
||||||
|
main.bpf.c
|
||||||
|
bpf.rs
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
target
|
target
|
||||||
|
@ -8,21 +8,20 @@ license = "GPL-2.0-only"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.65"
|
anyhow = "1.0.65"
|
||||||
bitvec = { version = "1.0", features = ["serde"] }
|
|
||||||
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
|
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
|
||||||
ctrlc = { version = "3.1", features = ["termination"] }
|
ctrlc = { version = "3.1", features = ["termination"] }
|
||||||
fb_procfs = "0.7.0"
|
fb_procfs = "0.7.0"
|
||||||
hex = "0.4.3"
|
|
||||||
libbpf-rs = "0.22.0"
|
libbpf-rs = "0.22.0"
|
||||||
libc = "0.2.137"
|
libc = "0.2.137"
|
||||||
buddy-alloc = "0.5.1"
|
|
||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
ordered-float = "3.4.0"
|
ordered-float = "3.4.0"
|
||||||
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
|
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
|
||||||
|
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" }
|
||||||
simplelog = "0.12.0"
|
simplelog = "0.12.0"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
|
scx_utils = { path = "../../../rust/scx_utils", version = "0.6" }
|
||||||
|
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "0.1" }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
enable_backtrace = []
|
enable_backtrace = []
|
||||||
|
@ -4,25 +4,24 @@ This is a single user-defined scheduler used within [sched_ext](https://github.c
|
|||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
scx_rustland is made of a BPF component (dispatcher) that implements the low
|
scx_rustland is made of a BPF component (scx_rustland_core) that implements the
|
||||||
level sched-ext functionalities and a user-space counterpart (scheduler),
|
low level sched-ext functionalities and a user-space counterpart (scheduler),
|
||||||
written in Rust, that implements the actual scheduling policy.
|
written in Rust, that implements the actual scheduling policy.
|
||||||
|
|
||||||
The BPF dispatcher is completely agnostic of the particular scheduling policy
|
|
||||||
implemented in user-space. For this reason developers that are willing to use
|
|
||||||
this scheduler to experiment scheduling policies should be able to simply
|
|
||||||
modify the Rust component, without having to deal with any internal kernel /
|
|
||||||
BPF details.
|
|
||||||
|
|
||||||
## How To Install
|
## How To Install
|
||||||
|
|
||||||
Available as a [Rust crate](https://crates.io/crates/scx_rustland): `cargo add scx_rustland`
|
Available as a [Rust crate](https://crates.io/crates/scx_rustland): `cargo add scx_rustland`
|
||||||
|
|
||||||
## Typical Use Case
|
## Typical Use Case
|
||||||
|
|
||||||
scx_rustland is designed to be "easy to read" template that can be used by any
|
scx_rustland is designed to prioritize interactive workloads over background
|
||||||
developer to quickly experiment more complex scheduling policies, that can be
|
CPU-intensive workloads. For this reason the typical use case of this scheduler
|
||||||
fully implemented in Rust.
|
involves low-latency interactive applications, such as gaming, video
|
||||||
|
conferencing and live streaming.
|
||||||
|
|
||||||
|
scx_rustland is also designed to be an "easy to read" template that can be used
|
||||||
|
by any developer to quickly experiment more complex scheduling policies fully
|
||||||
|
implemented in Rust.
|
||||||
|
|
||||||
## Production Ready?
|
## Production Ready?
|
||||||
|
|
||||||
@ -32,20 +31,17 @@ with a certain cost.
|
|||||||
|
|
||||||
However, a scheduler entirely implemented in user-space holds the potential for
|
However, a scheduler entirely implemented in user-space holds the potential for
|
||||||
seamless integration with sophisticated libraries, tracing tools, external
|
seamless integration with sophisticated libraries, tracing tools, external
|
||||||
services (e.g., AI), etc. Hence, there might be situations where the benefits
|
services (e.g., AI), etc.
|
||||||
outweigh the overhead, justifying the use of this scheduler in a production
|
|
||||||
environment.
|
Hence, there might be situations where the benefits outweigh the overhead,
|
||||||
|
justifying the use of this scheduler in a production environment.
|
||||||
|
|
||||||
## Demo
|
## Demo
|
||||||
|
|
||||||
[scx_rustland-terraria](https://github.com/sched-ext/scx/assets/1051723/42ec3bf2-9f1f-4403-80ab-bf5d66b7c2d5)
|
[scx_rustland-terraria](https://github.com/sched-ext/scx/assets/1051723/42ec3bf2-9f1f-4403-80ab-bf5d66b7c2d5)
|
||||||
|
|
||||||
For this demo the scheduler includes an extra patch to impose a "time slice
|
The key takeaway of this demo is to demonstrate that , despite the overhead of
|
||||||
penalty" on new short-lived tasks. While this approach might not be suitable
|
running a scheduler in user-space, we can still obtain interesting results and,
|
||||||
for general usage, it can yield significant advantages in this specific
|
in this particular case, even outperform the default Linux scheduler (EEVDF) in
|
||||||
scenario.
|
terms of application responsiveness (fps), while a CPU intensive workload
|
||||||
|
(parallel kernel build) is running in the background.
|
||||||
The key takeaway is to demonstrate the ease and safety of conducting
|
|
||||||
experiments like this, as we operate in user-space, and we can accomplish
|
|
||||||
everything simply by modifying the Rust code, that is completely abstracted
|
|
||||||
from the underlying BPF/kernel internal details.
|
|
||||||
|
@ -2,10 +2,8 @@
|
|||||||
// GNU General Public License version 2.
|
// GNU General Public License version 2.
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
scx_utils::BpfBuilder::new()
|
scx_rustland_core::RustLandBuilder::new()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.enable_intf("src/bpf/intf.h", "bpf_intf.rs")
|
|
||||||
.enable_skel("src/bpf/main.bpf.c", "bpf")
|
|
||||||
.build()
|
.build()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
@ -33,15 +33,18 @@ use log::warn;
|
|||||||
|
|
||||||
const SCHEDULER_NAME: &'static str = "RustLand";
|
const SCHEDULER_NAME: &'static str = "RustLand";
|
||||||
|
|
||||||
/// scx_rustland: simple user-space scheduler written in Rust
|
/// scx_rustland: user-space scheduler written in Rust
|
||||||
///
|
///
|
||||||
/// The main goal of this scheduler is be an "easy to read" template that can be used to quickly
|
/// scx_rustland is designed to prioritize interactive workloads over background CPU-intensive
|
||||||
/// test more complex scheduling policies. For this reason this scheduler is mostly focused on
|
/// workloads. For this reason the typical use case of this scheduler involves low-latency
|
||||||
/// simplicity and code readability.
|
/// interactive applications, such as gaming, video conferencing and live streaming.
|
||||||
///
|
///
|
||||||
/// The scheduler is made of a BPF component (dispatcher) that implements the low level sched-ext
|
/// scx_rustland is also designed to be an "easy to read" template that can be used by any
|
||||||
/// functionalities and a user-space counterpart (scheduler), written in Rust, that implements the
|
/// developer to quickly experiment more complex scheduling policies fully implemented in Rust.
|
||||||
/// actual scheduling policy.
|
///
|
||||||
|
/// The scheduler is made of a BPF component (scx_rustland_core) that implements the low level
|
||||||
|
/// sched-ext functionalities and a user-space counterpart (scheduler), written in Rust, that
|
||||||
|
/// implements the actual scheduling policy.
|
||||||
///
|
///
|
||||||
/// The default scheduling policy implemented in the user-space scheduler is a based on virtual
|
/// The default scheduling policy implemented in the user-space scheduler is a based on virtual
|
||||||
/// runtime (vruntime):
|
/// runtime (vruntime):
|
||||||
@ -65,10 +68,6 @@ const SCHEDULER_NAME: &'static str = "RustLand";
|
|||||||
///
|
///
|
||||||
/// === Troubleshooting ===
|
/// === Troubleshooting ===
|
||||||
///
|
///
|
||||||
/// - Disable HyperThreading / SMT if you notice poor performance (add "nosmt" to the kernel boot
|
|
||||||
/// parameters): this scheduler is not NUMA-aware and it implements a simple policy of handling
|
|
||||||
/// SMT cores.
|
|
||||||
///
|
|
||||||
/// - Adjust the time slice boost parameter (option `-b`) to enhance the responsiveness of
|
/// - Adjust the time slice boost parameter (option `-b`) to enhance the responsiveness of
|
||||||
/// low-latency applications (i.e., online gaming, live streaming, video conferencing etc.).
|
/// low-latency applications (i.e., online gaming, live streaming, video conferencing etc.).
|
||||||
///
|
///
|
||||||
@ -473,8 +472,7 @@ impl<'a> Scheduler<'a> {
|
|||||||
// Reset nr_queued and update nr_scheduled, to notify the dispatcher that
|
// Reset nr_queued and update nr_scheduled, to notify the dispatcher that
|
||||||
// queued tasks are drained, but there is still some work left to do in the
|
// queued tasks are drained, but there is still some work left to do in the
|
||||||
// scheduler.
|
// scheduler.
|
||||||
*self.bpf.nr_queued_mut() = 0;
|
self.bpf.update_tasks(Some(0), Some(self.task_pool.tasks.len() as u64));
|
||||||
*self.bpf.nr_scheduled_mut() = self.task_pool.tasks.len() as u64;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
@ -533,10 +531,10 @@ impl<'a> Scheduler<'a> {
|
|||||||
None => break,
|
None => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Reset nr_scheduled to notify the dispatcher that all the tasks received by the scheduler
|
// Update nr_scheduled to notify the dispatcher that all the tasks received by the
|
||||||
// has been dispatched, so there is no reason to re-activate the scheduler, unless more
|
// scheduler has been dispatched, so there is no reason to re-activate the scheduler,
|
||||||
// tasks are queued.
|
// unless more tasks are queued.
|
||||||
self.bpf.skel.bss_mut().nr_scheduled = self.task_pool.tasks.len() as u64;
|
self.bpf.update_tasks(None, Some(self.task_pool.tasks.len() as u64));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Main scheduling function (called in a loop to periodically drain tasks from the queued list
|
// Main scheduling function (called in a loop to periodically drain tasks from the queued list
|
||||||
|
Loading…
Reference in New Issue
Block a user