Merge branch 'main' into htejun/scx_rusty

This commit is contained in:
Tejun Heo 2024-08-23 07:48:07 -10:00
commit 8c8912ccea
28 changed files with 240 additions and 216 deletions

View File

@ -13,12 +13,18 @@ GUEST_TIMEOUT=60
# List of schedulers to test
#
# TODO:
# - scx_layered: temporarily excluded because it
# cannot run with a default configuration
# - scx_flatcg, scx_pair: excluded until cgroup support lands upstream
# - scx_mitosis: not ready yet
#
SCHEDULERS="scx_simple scx_central scx_nest scx_rusty scx_rustland scx_bpfland"
declare -A SCHEDS
SCHEDS["scx_simple"]=""
SCHEDS["scx_central"]=""
SCHEDS["scx_nest"]=""
SCHEDS["scx_rusty"]=""
SCHEDS["scx_rustland"]=""
SCHEDS["scx_bpfland"]=""
SCHEDS["scx_layered"]="--run-example"
if [ ! -x `which vng` ]; then
echo "vng not found, please install virtme-ng to enable testing"
@ -30,7 +36,8 @@ if [ $# -lt 1 ]; then
fi
kernel=$1
for sched in ${SCHEDULERS}; do
for sched in ${!SCHEDS[@]}; do
args=${SCHEDS[$sched]}
sched_path=$(find -type f -executable -name ${sched})
if [ ! -n "${sched_path}" ]; then
echo "${sched}: binary not found"
@ -42,7 +49,7 @@ for sched in ${SCHEDULERS}; do
rm -f /tmp/output
timeout --preserve-status ${GUEST_TIMEOUT} \
vng --force-9p -v -r ${kernel} -- \
"timeout --foreground --preserve-status ${TEST_TIMEOUT} ${sched_path}" \
"timeout --foreground --preserve-status ${TEST_TIMEOUT} ${sched_path} ${args}" \
2> >(tee /tmp/output) </dev/null
grep -v " Speculative Return Stack Overflow" /tmp/output | \
sed -n -e '/\bBUG:/q1' \

View File

@ -1,5 +1,5 @@
project('sched_ext schedulers', 'c',
version: '1.0.2',
version: '1.0.3',
license: 'GPL-2.0',)
if meson.version().version_compare('<1.2')

View File

@ -1,6 +1,6 @@
[package]
name = "scx_rustland_core"
version = "1.0.2"
version = "1.0.3"
edition = "2021"
authors = ["Andrea Righi <andrea.righi@linux.dev>"]
license = "GPL-2.0-only"
@ -12,12 +12,12 @@ anyhow = "1.0.65"
plain = "0.2.3"
libbpf-rs = "0.24.1"
libc = "0.2.137"
scx_utils = { path = "../scx_utils", version = "1.0.2" }
scx_utils = { path = "../scx_utils", version = "1.0.3" }
[build-dependencies]
tar = "0.4"
walkdir = "2.4"
scx_utils = { path = "../scx_utils", version = "1.0.2" }
scx_utils = { path = "../scx_utils", version = "1.0.3" }
[lib]
name = "scx_rustland_core"

View File

@ -2,7 +2,7 @@ workspace = { members = ["scx_stats_derive"] }
[package]
name = "scx_stats"
version = "0.2.0"
version = "1.0.3"
edition = "2021"
authors = ["Tejun Heo <tj@kernel.org>"]
license = "GPL-2.0-only"

View File

@ -1,6 +1,6 @@
[package]
name = "scx_stats_derive"
version = "0.2.0"
version = "1.0.3"
edition = "2021"
authors = ["Tejun Heo <tj@kernel.org>"]
license = "GPL-2.0-only"
@ -13,6 +13,6 @@ proc-macro = true
[dependencies]
proc-macro2 = "1.0"
quote = "1.0"
scx_stats = { path = "..", version = "0.2.0" }
scx_stats = { path = "..", version = "1.0.3" }
serde_json = "1.0"
syn = { version = "2.0", features = ["extra-traits", "full"] }

View File

@ -1,6 +1,6 @@
[package]
name = "scx_utils"
version = "1.0.2"
version = "1.0.3"
edition = "2021"
authors = ["Tejun Heo <tj@kernel.org>"]
license = "GPL-2.0-only"
@ -21,7 +21,7 @@ libbpf-rs = "0.24.1"
log = "0.4.17"
paste = "1.0"
regex = "1.10"
scx_stats = { path = "../scx_stats", version = "0.2.0" }
scx_stats = { path = "../scx_stats", version = "1.0.3" }
serde = { version = "1.0", features = ["derive"] }
sscanf = "0.4"
tar = "0.4"

View File

@ -65,8 +65,8 @@ pub use topology::Cpu;
pub use topology::Node;
pub use topology::Topology;
pub use topology::TopologyMap;
pub use topology::NR_CPU_IDS;
pub use topology::NR_CPUS_POSSIBLE;
pub use topology::NR_CPU_IDS;
mod cpumask;
pub use cpumask::Cpumask;

View File

@ -76,30 +76,31 @@ use sscanf::sscanf;
use std::collections::BTreeMap;
use std::path::Path;
use std::slice::Iter;
use std::sync::LazyLock;
/// The maximum possible number of CPU IDs in the system. As mentioned above,
/// this is different than the number of possible CPUs on the system (though
/// very seldom is). This number may differ from the number of possible CPUs on
/// the system when e.g. there are fully disabled CPUs in the middle of the
/// range of possible CPUs (i.e. CPUs that may not be onlined).
pub static NR_CPU_IDS: LazyLock<usize> = LazyLock::new(|| {
read_cpu_ids().unwrap().last().unwrap() + 1
});
lazy_static::lazy_static! {
/// The maximum possible number of CPU IDs in the system. As mentioned
/// above, this is different than the number of possible CPUs on the
/// system (though very seldom is). This number may differ from the
/// number of possible CPUs on the system when e.g. there are fully
/// disabled CPUs in the middle of the range of possible CPUs (i.e. CPUs
/// that may not be onlined).
pub static ref NR_CPU_IDS: usize = read_cpu_ids().unwrap().last().unwrap() + 1;
/// The number of possible CPUs that may be active on the system. Note that this
/// value is separate from the number of possible _CPU IDs_ in the system, as
/// there may be gaps in what CPUs are allowed to be onlined. For example, some
/// BIOS implementations may report spans of disabled CPUs that may not be
/// onlined, whose IDs are lower than the IDs of other CPUs that may be onlined.
pub static NR_CPUS_POSSIBLE: LazyLock<usize> =
LazyLock::new(|| libbpf_rs::num_possible_cpus().unwrap());
/// The number of possible CPUs that may be active on the system. Note
/// that this value is separate from the number of possible _CPU IDs_ in
/// the system, as there may be gaps in what CPUs are allowed to be
/// onlined. For example, some BIOS implementations may report spans of
/// disabled CPUs that may not be onlined, whose IDs are lower than the
/// IDs of other CPUs that may be onlined.
pub static ref NR_CPUS_POSSIBLE: usize = libbpf_rs::num_possible_cpus().unwrap();
}
#[derive(Debug, Clone)]
pub struct Cpu {
id: usize,
min_freq: usize,
max_freq: usize,
base_freq: usize,
trans_lat_ns: usize,
l2_id: usize,
l3_id: usize,
@ -122,6 +123,14 @@ impl Cpu {
self.max_freq
}
/// Get the base operational frequency of this CPU
///
/// This is only available on Intel Turbo Boost CPUs, if not available this will simply return
/// maximum frequency.
pub fn base_freq(&self) -> usize {
self.base_freq
}
/// Get the transition latency of the CPU in nanoseconds
pub fn trans_lat_ns(&self) -> usize {
self.trans_lat_ns
@ -421,10 +430,8 @@ fn create_insert_cpu(cpu_id: usize, node: &mut Node, online_mask: &Cpumask) -> R
// if there's no cache information then we have no option but to assume a single unified cache
// per node.
let cache_path = cpu_path.join("cache");
let l2_id =
read_file_usize(&cache_path.join(format!("index{}", 2)).join("id")).unwrap_or(0);
let l3_id =
read_file_usize(&cache_path.join(format!("index{}", 3)).join("id")).unwrap_or(0);
let l2_id = read_file_usize(&cache_path.join(format!("index{}", 2)).join("id")).unwrap_or(0);
let l3_id = read_file_usize(&cache_path.join(format!("index{}", 3)).join("id")).unwrap_or(0);
// Assume that LLC is always 3.
let llc_id = l3_id;
@ -433,6 +440,7 @@ fn create_insert_cpu(cpu_id: usize, node: &mut Node, online_mask: &Cpumask) -> R
let freq_path = cpu_path.join("cpufreq");
let min_freq = read_file_usize(&freq_path.join("scaling_min_freq")).unwrap_or(0);
let max_freq = read_file_usize(&freq_path.join("scaling_max_freq")).unwrap_or(0);
let base_freq = read_file_usize(&freq_path.join("base_frequency")).unwrap_or(max_freq);
let trans_lat_ns = read_file_usize(&freq_path.join("cpuinfo_transition_latency")).unwrap_or(0);
let cache = node.llcs.entry(llc_id).or_insert(Cache {
@ -453,6 +461,7 @@ fn create_insert_cpu(cpu_id: usize, node: &mut Node, online_mask: &Cpumask) -> R
id: cpu_id,
min_freq: min_freq,
max_freq: max_freq,
base_freq: base_freq,
trans_lat_ns: trans_lat_ns,
l2_id: l2_id,
l3_id: l3_id,
@ -502,7 +511,7 @@ fn create_default_node(online_mask: &Cpumask) -> Result<Vec<Node>> {
let cpu_ids = read_cpu_ids()?;
for cpu_id in cpu_ids.iter() {
create_insert_cpu(*cpu_id, &mut node, &online_mask)?;
create_insert_cpu(*cpu_id, &mut node, &online_mask)?;
}
nodes.push(node);

View File

@ -1371,7 +1371,7 @@ dependencies = [
[[package]]
name = "scx_bpfland"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"clap",
@ -1387,7 +1387,7 @@ dependencies = [
[[package]]
name = "scx_utils"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"bindgen",

View File

@ -1,6 +1,6 @@
[package]
name = "scx_bpfland"
version = "1.0.2"
version = "1.0.3"
authors = ["Andrea Righi <andrea.righi@linux.dev>"]
edition = "2021"
description = "A vruntime-based sched_ext scheduler that prioritizes interactive workloads. https://github.com/sched-ext/scx/tree/main"
@ -12,14 +12,14 @@ ctrlc = { version = "3.1", features = ["termination"] }
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
libbpf-rs = "0.24.1"
log = "0.4.17"
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
simplelog = "0.12"
rlimit = "0.10.1"
metrics = "0.23.0"
metrics-exporter-prometheus = "0.15.0"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
[features]
enable_backtrace = []

View File

@ -240,6 +240,15 @@ struct task_ctx *lookup_task_ctx(const struct task_struct *p)
return tctx;
}
/*
* Return true if interactive tasks classification via voluntary context
* switches is enabled, false otherwise.
*/
static bool is_nvcsw_enabled(void)
{
return !!nvcsw_max_thresh;
}
/*
* Return true if the task is interactive, false otherwise.
*/
@ -261,15 +270,6 @@ static inline bool is_kthread(const struct task_struct *p)
return !!(p->flags & PF_KTHREAD);
}
/*
* Return true if interactive tasks classification via voluntary context
* switches is enabled, false otherwise.
*/
static bool is_nvcsw_enabled(void)
{
return !!nvcsw_max_thresh;
}
/*
* Access a cpumask in read-only mode (typically to check bits).
*/
@ -413,7 +413,6 @@ static u64 cpu_to_dsq(s32 cpu)
static int dispatch_direct_cpu(struct task_struct *p, s32 cpu, u64 enq_flags)
{
struct bpf_cpumask *offline;
u64 slice = task_slice(p);
u64 vtime = task_vtime(p);
u64 dsq_id = cpu_to_dsq(cpu);
@ -424,7 +423,7 @@ static int dispatch_direct_cpu(struct task_struct *p, s32 cpu, u64 enq_flags)
if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr))
return -EINVAL;
scx_bpf_dispatch_vtime(p, dsq_id, slice, vtime, enq_flags);
scx_bpf_dispatch_vtime(p, dsq_id, SCX_SLICE_DFL, vtime, enq_flags);
/*
* If the CPU has gone offline notify that the task needs to be
@ -699,6 +698,9 @@ static void handle_sync_wakeup(struct task_struct *p)
{
struct task_ctx *tctx;
if (!is_nvcsw_enabled())
return;
/*
* If we are waking up a task immediately promote it as interactive, so
* that it can be dispatched as soon as possible on the first CPU
@ -722,7 +724,6 @@ static void handle_sync_wakeup(struct task_struct *p)
void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
{
u64 vtime = task_vtime(p);
u64 slice = task_slice(p);
/*
* If the system is saturated and we couldn't dispatch directly in
@ -754,10 +755,12 @@ void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
* and simply rely on the vruntime logic.
*/
if (is_task_interactive(p)) {
scx_bpf_dispatch_vtime(p, prio_dsq_id, slice, vtime, enq_flags);
scx_bpf_dispatch_vtime(p, prio_dsq_id, SCX_SLICE_DFL,
vtime, enq_flags);
__sync_fetch_and_add(&nr_prio_dispatches, 1);
} else {
scx_bpf_dispatch_vtime(p, shared_dsq_id, slice, vtime, enq_flags);
scx_bpf_dispatch_vtime(p, shared_dsq_id, SCX_SLICE_DFL,
vtime, enq_flags);
__sync_fetch_and_add(&nr_shared_dispatches, 1);
}
}
@ -900,7 +903,22 @@ void BPF_STRUCT_OPS(bpfland_dispatch, s32 cpu, struct task_struct *prev)
/*
* Lastly, consume regular tasks from the shared DSQ.
*/
consume_regular_task(now);
if (consume_regular_task(now))
return;
/*
* If the current task expired its time slice, but no other task wants
* to run, simply replenish its time slice and let it run for another
* round on the same CPU.
*
* Note that bpfland_stopping() won't be called if we replenish the
* time slice here. As a result, the nvcsw statistics won't be updated,
* but this isn't an issue, because these statistics are only relevant
* when the system is overloaded, which isn't the case when there are
* no other tasks to run.
*/
if (prev && (prev->scx.flags & SCX_TASK_QUEUED))
prev->scx.slice = task_slice(prev);
}
void BPF_STRUCT_OPS(bpfland_running, struct task_struct *p)
@ -910,11 +928,10 @@ void BPF_STRUCT_OPS(bpfland_running, struct task_struct *p)
vtime_now = p->scx.dsq_vtime;
/*
* Ensure time slice never exceeds slice_ns when a task is started on a
* CPU.
* Refresh task's time slice immediately before it starts to run on its
* assigned CPU.
*/
if (p->scx.slice > slice_ns)
p->scx.slice = slice_ns;
p->scx.slice = task_slice(p);
/* Update CPU interactive state */
if (is_task_interactive(p))
@ -933,7 +950,8 @@ static void update_task_interactive(struct task_ctx *tctx)
* (nvcsw_avg_thresh) it is classified as interactive, otherwise the
* task is classified as regular.
*/
tctx->is_interactive = tctx->avg_nvcsw >= nvcsw_avg_thresh;
if (is_nvcsw_enabled())
tctx->is_interactive = tctx->avg_nvcsw >= nvcsw_avg_thresh;
}
/*

View File

@ -54,11 +54,12 @@ const SCHEDULER_NAME: &'static str = "scx_bpfland";
fn get_primary_cpus(powersave: bool) -> std::io::Result<Vec<usize>> {
let topo = Topology::new().unwrap();
// Iterate over each CPU directory and collect CPU ID and its max frequency.
// Iterate over each CPU directory and collect CPU ID and its base operational frequency to
// distinguish between fast and slow cores.
let mut cpu_freqs = Vec::new();
for core in topo.cores().into_iter() {
for (cpu_id, cpu) in core.cpus() {
cpu_freqs.push((*cpu_id, cpu.max_freq()));
cpu_freqs.push((*cpu_id, cpu.base_freq()));
}
}
if cpu_freqs.is_empty() {
@ -327,7 +328,7 @@ impl<'a> Scheduler<'a> {
let topo = Topology::new().unwrap();
// Initialize the primary scheduling domain (based on the --primary-domain option).
Self::init_primary_domain(&mut skel, &topo, &opts.primary_domain)?;
Self::init_primary_domain(&mut skel, &opts.primary_domain)?;
// Initialize L2 cache domains.
if !opts.disable_l2 {
@ -382,7 +383,6 @@ impl<'a> Scheduler<'a> {
fn init_primary_domain(
skel: &mut BpfSkel<'_>,
topo: &Topology,
primary_domain: &Cpumask,
) -> Result<()> {
info!("primary CPU domain = 0x{:x}", primary_domain);

View File

@ -948,7 +948,7 @@ dependencies = [
[[package]]
name = "scx_lavd"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"bitvec",
@ -970,7 +970,7 @@ dependencies = [
[[package]]
name = "scx_utils"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"bindgen",

View File

@ -1,6 +1,6 @@
[package]
name = "scx_lavd"
version = "1.0.2"
version = "1.0.3"
authors = ["Changwoo Min <changwoo@igalia.com>", "Igalia"]
edition = "2021"
description = "A Latency-criticality Aware Virtual Deadline (LAVD) scheduler based on sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -17,7 +17,7 @@ libbpf-rs = "0.24.1"
libc = "0.2.137"
log = "0.4.17"
ordered-float = "3.4.0"
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
simplelog = "0.12"
static_assertions = "1.1.0"
rlimit = "0.10.1"
@ -25,7 +25,7 @@ plain = "0.2.3"
nix = { version = "0.29.0", features = ["signal"] }
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
[features]
enable_backtrace = []

View File

@ -1023,7 +1023,7 @@ dependencies = [
[[package]]
name = "scx_layered"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"bitvec",
@ -1046,7 +1046,7 @@ dependencies = [
[[package]]
name = "scx_stats"
version = "0.2.0"
version = "1.0.3"
dependencies = [
"anyhow",
"crossbeam",
@ -1060,7 +1060,7 @@ dependencies = [
[[package]]
name = "scx_stats_derive"
version = "0.2.0"
version = "1.0.3"
dependencies = [
"proc-macro2",
"quote",
@ -1071,7 +1071,7 @@ dependencies = [
[[package]]
name = "scx_utils"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"bindgen",

View File

@ -1,6 +1,6 @@
[package]
name = "scx_layered"
version = "1.0.2"
version = "1.0.3"
authors = ["Tejun Heo <htejun@meta.com>", "Meta"]
edition = "2021"
description = "A highly configurable multi-layer BPF / user space hybrid scheduler used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -18,15 +18,15 @@ lazy_static = "1.4"
libbpf-rs = "0.24.1"
libc = "0.2.137"
log = "0.4.17"
scx_stats = { path = "../../../rust/scx_stats", version = "0.2.0" }
scx_stats_derive = { path = "../../../rust/scx_stats/scx_stats_derive", version = "0.2.0" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_stats = { path = "../../../rust/scx_stats", version = "1.0.3" }
scx_stats_derive = { path = "../../../rust/scx_stats/scx_stats_derive", version = "1.0.3" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
simplelog = "0.12"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
[features]
enable_backtrace = []

View File

@ -53,7 +53,6 @@ enum layer_stat_idx {
LSTAT_SEL_LOCAL,
LSTAT_ENQ_WAKEUP,
LSTAT_ENQ_EXPIRE,
LSTAT_ENQ_LAST,
LSTAT_ENQ_REENQ,
LSTAT_MIN_EXEC,
LSTAT_MIN_EXEC_NS,

View File

@ -667,12 +667,6 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
if (enq_flags & SCX_ENQ_REENQ) {
lstat_inc(LSTAT_ENQ_REENQ, layer, cctx);
} else {
if (enq_flags & SCX_ENQ_LAST) {
lstat_inc(LSTAT_ENQ_LAST, layer, cctx);
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0);
return;
}
if (enq_flags & SCX_ENQ_WAKEUP)
lstat_inc(LSTAT_ENQ_WAKEUP, layer, cctx);
else
@ -1698,5 +1692,4 @@ SCX_OPS_DEFINE(layered,
.dump = (void *)layered_dump,
.init = (void *)layered_init,
.exit = (void *)layered_exit,
.flags = SCX_OPS_ENQ_LAST,
.name = "layered");

View File

@ -71,9 +71,70 @@ const NR_LSTATS: usize = bpf_intf::layer_stat_idx_NR_LSTATS as usize;
const NR_LAYER_MATCH_KINDS: usize = bpf_intf::layer_match_kind_NR_LAYER_MATCH_KINDS as usize;
const CORE_CACHE_LEVEL: u32 = 2;
#[rustfmt::skip]
lazy_static::lazy_static! {
static ref NR_POSSIBLE_CPUS: usize = libbpf_rs::num_possible_cpus().unwrap();
static ref USAGE_DECAY: f64 = 0.5f64.powf(1.0 / USAGE_HALF_LIFE_F64);
static ref EXAMPLE_CONFIG: LayerConfig =
LayerConfig {
specs: vec![
LayerSpec {
name: "batch".into(),
comment: Some("tasks under system.slice or tasks with nice value > 0".into()),
matches: vec![
vec![LayerMatch::CgroupPrefix("system.slice/".into())],
vec![LayerMatch::NiceAbove(0)],
],
kind: LayerKind::Confined {
cpus_range: Some((0, 16)),
util_range: (0.8, 0.9),
min_exec_us: 1000,
yield_ignore: 0.0,
preempt: false,
preempt_first: false,
exclusive: false,
perf: 1024,
nodes: vec![],
llcs: vec![],
},
},
LayerSpec {
name: "immediate".into(),
comment: Some("tasks under workload.slice with nice value < 0".into()),
matches: vec![vec![
LayerMatch::CgroupPrefix("workload.slice/".into()),
LayerMatch::NiceBelow(0),
]],
kind: LayerKind::Open {
min_exec_us: 100,
yield_ignore: 0.25,
preempt: true,
preempt_first: false,
exclusive: true,
perf: 1024,
nodes: vec![],
llcs: vec![],
},
},
LayerSpec {
name: "normal".into(),
comment: Some("the rest".into()),
matches: vec![vec![]],
kind: LayerKind::Grouped {
cpus_range: None,
util_range: (0.5, 0.6),
min_exec_us: 200,
yield_ignore: 0.0,
preempt: false,
preempt_first: false,
exclusive: false,
perf: 1024,
nodes: vec![],
llcs: vec![],
},
},
],
};
}
/// scx_layered: A highly configurable multi-layer sched_ext scheduler
@ -347,6 +408,10 @@ struct Opts {
#[clap(long)]
monitor: Option<f64>,
/// Run with example layer specifications (useful for e.g. CI pipelines)
#[clap(long)]
run_example: bool,
/// Layer specification. See --help.
specs: Vec<String>,
}
@ -999,7 +1064,7 @@ impl Layer {
cpu_pool: &CpuPool,
name: &str,
kind: LayerKind,
topo: &Topology
topo: &Topology,
) -> Result<Self> {
let mut cpus = bitvec![0; cpu_pool.nr_cpus];
cpus.fill(false);
@ -1072,39 +1137,28 @@ impl Layer {
}
}
let mut nodes = topo.nodes().iter().collect::<Vec<_>>().clone();
let num_nodes = nodes.len();
let is_left = idx % 2 == 0;
let rot_by = |idx, len| -> usize { if idx <= len { idx } else { idx % len } };
if is_left {
nodes.rotate_left(rot_by(idx, num_nodes));
} else {
nodes.rotate_right(rot_by(idx, num_nodes));
}
let rot_by = |idx, len| -> usize {
if idx <= len {
idx
} else {
idx % len
}
};
let mut core_order = vec![];
for node in nodes.iter() {
let mut llcs = node.llcs().clone().into_values().collect::<Vec<_>>().clone();
let num_llcs = llcs.len();
if is_left {
llcs.rotate_left(rot_by(idx, num_llcs));
} else {
llcs.rotate_right(rot_by(idx, num_llcs));
}
for llc in llcs.iter() {
let mut llc_cores = llc.cores().clone().into_values().collect::<Vec<_>>().clone();
let num_cores = llc_cores.len();
for i in 0..topo.cores().len() {
core_order.push(i);
}
for node in topo.nodes().iter() {
for (_, llc) in node.llcs() {
let llc_cores = llc.cores().len();
let rot = rot_by(llc_cores + (idx << 1), llc_cores);
if is_left {
llc_cores.rotate_left(rot_by(idx, num_cores));
core_order.rotate_left(rot);
} else {
llc_cores.rotate_right(rot_by(idx, num_cores));
}
for llc_core in llc_cores.iter() {
core_order.push(llc_core.id());
core_order.rotate_right(rot);
}
}
}
@ -1523,7 +1577,13 @@ impl<'a, 'b> Scheduler<'a, 'b> {
let mut layers = vec![];
for (idx, spec) in layer_specs.iter().enumerate() {
layers.push(Layer::new(idx, &cpu_pool, &spec.name, spec.kind.clone(), &topo)?);
layers.push(Layer::new(
idx,
&cpu_pool,
&spec.name,
spec.kind.clone(),
&topo,
)?);
}
// Other stuff.
@ -1578,7 +1638,7 @@ impl<'a, 'b> Scheduler<'a, 'b> {
fn refresh_cpumasks(&mut self) -> Result<()> {
let mut updated = false;
let num_layers = self.layers.len();
let num_layers = self.layers.len();
for idx in 0..num_layers {
match self.layers[idx].kind {
@ -1748,71 +1808,11 @@ impl<'a, 'b> Drop for Scheduler<'a, 'b> {
}
fn write_example_file(path: &str) -> Result<()> {
let example = LayerConfig {
specs: vec![
LayerSpec {
name: "batch".into(),
comment: Some("tasks under system.slice or tasks with nice value > 0".into()),
matches: vec![
vec![LayerMatch::CgroupPrefix("system.slice/".into())],
vec![LayerMatch::NiceAbove(0)],
],
kind: LayerKind::Confined {
cpus_range: Some((0, 16)),
util_range: (0.8, 0.9),
min_exec_us: 1000,
yield_ignore: 0.0,
preempt: false,
preempt_first: false,
exclusive: false,
perf: 1024,
nodes: vec![],
llcs: vec![],
},
},
LayerSpec {
name: "immediate".into(),
comment: Some("tasks under workload.slice with nice value < 0".into()),
matches: vec![vec![
LayerMatch::CgroupPrefix("workload.slice/".into()),
LayerMatch::NiceBelow(0),
]],
kind: LayerKind::Open {
min_exec_us: 100,
yield_ignore: 0.25,
preempt: true,
preempt_first: false,
exclusive: true,
perf: 1024,
nodes: vec![],
llcs: vec![],
},
},
LayerSpec {
name: "normal".into(),
comment: Some("the rest".into()),
matches: vec![vec![]],
kind: LayerKind::Grouped {
cpus_range: None,
util_range: (0.5, 0.6),
min_exec_us: 200,
yield_ignore: 0.0,
preempt: false,
preempt_first: false,
exclusive: false,
perf: 1024,
nodes: vec![],
llcs: vec![],
},
},
],
};
let mut f = fs::OpenOptions::new()
.create_new(true)
.write(true)
.open(path)?;
Ok(f.write_all(serde_json::to_string_pretty(&example)?.as_bytes())?)
Ok(f.write_all(serde_json::to_string_pretty(&*EXAMPLE_CONFIG)?.as_bytes())?)
}
fn verify_layer_specs(specs: &[LayerSpec]) -> Result<()> {
@ -1956,7 +1956,11 @@ fn main() -> Result<()> {
return Ok(());
}
let mut layer_config = LayerConfig { specs: vec![] };
let mut layer_config = match opts.run_example {
true => EXAMPLE_CONFIG.clone(),
false => LayerConfig { specs: vec![] },
};
for (idx, input) in opts.specs.iter().enumerate() {
layer_config.specs.append(
&mut LayerSpec::parse(input)

View File

@ -68,8 +68,6 @@ pub struct LayerStats {
pub enq_wakeup: f64,
#[stat(desc = "layer: % enqueued after slice expiration")]
pub enq_expire: f64,
#[stat(desc = "layer: % enqueued as last runnable task on CPU")]
pub enq_last: f64,
#[stat(desc = "layer: % re-enqueued due to RT preemption")]
pub enq_reenq: f64,
#[stat(desc = "layer: # times exec duration < min_exec_us")]
@ -146,7 +144,6 @@ impl LayerStats {
let ltotal = lstat(bpf_intf::layer_stat_idx_LSTAT_SEL_LOCAL)
+ lstat(bpf_intf::layer_stat_idx_LSTAT_ENQ_WAKEUP)
+ lstat(bpf_intf::layer_stat_idx_LSTAT_ENQ_EXPIRE)
+ lstat(bpf_intf::layer_stat_idx_LSTAT_ENQ_LAST)
+ lstat(bpf_intf::layer_stat_idx_LSTAT_ENQ_REENQ);
let lstat_pct = |sidx| {
if ltotal != 0 {
@ -179,7 +176,6 @@ impl LayerStats {
sel_local: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_SEL_LOCAL),
enq_wakeup: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_ENQ_WAKEUP),
enq_expire: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_ENQ_EXPIRE),
enq_last: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_ENQ_LAST),
enq_reenq: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_ENQ_REENQ),
min_exec: lstat_pct(bpf_intf::layer_stat_idx_LSTAT_MIN_EXEC),
min_exec_us: (lstat(bpf_intf::layer_stat_idx_LSTAT_MIN_EXEC_NS) / 1000) as u64,
@ -221,13 +217,12 @@ impl LayerStats {
writeln!(
w,
" {:<width$} tot={:7} local={} wake/exp/last/reenq={}/{}/{}/{}",
" {:<width$} tot={:7} local={} wake/exp/reenq={}/{}/{}",
"",
self.total,
fmt_pct(self.sel_local),
fmt_pct(self.enq_wakeup),
fmt_pct(self.enq_expire),
fmt_pct(self.enq_last),
fmt_pct(self.enq_reenq),
width = header_width,
)?;
@ -348,7 +343,6 @@ impl SysStats {
let total = lsum(bpf_intf::layer_stat_idx_LSTAT_SEL_LOCAL)
+ lsum(bpf_intf::layer_stat_idx_LSTAT_ENQ_WAKEUP)
+ lsum(bpf_intf::layer_stat_idx_LSTAT_ENQ_EXPIRE)
+ lsum(bpf_intf::layer_stat_idx_LSTAT_ENQ_LAST)
+ lsum(bpf_intf::layer_stat_idx_LSTAT_ENQ_REENQ);
let lsum_pct = |idx| {
if total != 0 {

View File

@ -1,6 +1,6 @@
[package]
name = "scx_mitosis"
version = "0.0.3"
version = "0.0.4"
authors = ["Dan Schatzberg <dschatzberg@meta.com>", "Meta"]
edition = "2021"
description = "A dynamic affinity scheduler used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -20,13 +20,13 @@ libc = "0.2.137"
log = "0.4.17"
maplit = "1.0.2"
prometheus-client = "0.19"
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
simplelog = "0.12"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
[features]
enable_backtrace = []

View File

@ -868,7 +868,7 @@ dependencies = [
[[package]]
name = "scx_rlfifo"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"ctrlc",
@ -881,7 +881,7 @@ dependencies = [
[[package]]
name = "scx_rustland_core"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"libbpf-rs",
@ -894,7 +894,7 @@ dependencies = [
[[package]]
name = "scx_utils"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"bindgen",

View File

@ -1,6 +1,6 @@
[package]
name = "scx_rlfifo"
version = "1.0.2"
version = "1.0.3"
authors = ["Andrea Righi <andrea.righi@linux.dev>"]
edition = "2021"
description = "A simple FIFO scheduler in Rust that runs in user-space"
@ -12,12 +12,12 @@ plain = "0.2.3"
ctrlc = { version = "3.1", features = ["termination"] }
libbpf-rs = "0.24.1"
libc = "0.2.137"
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.3" }
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.3" }
[features]
enable_backtrace = []

View File

@ -939,7 +939,7 @@ dependencies = [
[[package]]
name = "scx_rustland"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"clap",
@ -957,7 +957,7 @@ dependencies = [
[[package]]
name = "scx_rustland_core"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"libbpf-rs",
@ -970,7 +970,7 @@ dependencies = [
[[package]]
name = "scx_utils"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"bindgen",

View File

@ -1,6 +1,6 @@
[package]
name = "scx_rustland"
version = "1.0.2"
version = "1.0.3"
authors = ["Andrea Righi <andrea.righi@linux.dev>"]
edition = "2021"
description = "A BPF component (dispatcher) that implements the low level sched-ext functionalities and a user-space counterpart (scheduler), written in Rust, that implements the actual scheduling policy. This is used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -16,13 +16,13 @@ libbpf-rs = "0.24.1"
libc = "0.2.137"
log = "0.4.17"
ordered-float = "3.4.0"
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.3" }
simplelog = "0.12"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
scx_rustland_core = { path = "../../../rust/scx_rustland_core", version = "1.0.3" }
[features]
enable_backtrace = []

View File

@ -1032,7 +1032,7 @@ dependencies = [
[[package]]
name = "scx_rusty"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"chrono",
@ -1055,7 +1055,7 @@ dependencies = [
[[package]]
name = "scx_stats"
version = "0.2.0"
version = "1.0.3"
dependencies = [
"anyhow",
"crossbeam",
@ -1069,7 +1069,7 @@ dependencies = [
[[package]]
name = "scx_stats_derive"
version = "0.2.0"
version = "1.0.3"
dependencies = [
"proc-macro2",
"quote",
@ -1080,7 +1080,7 @@ dependencies = [
[[package]]
name = "scx_utils"
version = "1.0.2"
version = "1.0.3"
dependencies = [
"anyhow",
"bindgen",

View File

@ -1,6 +1,6 @@
[package]
name = "scx_rusty"
version = "1.0.2"
version = "1.0.3"
authors = ["Dan Schatzberg <dschatzberg@meta.com>", "Meta"]
edition = "2021"
description = "A multi-domain, BPF / user space hybrid scheduler used within sched_ext, which is a Linux kernel feature which enables implementing kernel thread schedulers in BPF and dynamically loading them. https://github.com/sched-ext/scx/tree/main"
@ -17,16 +17,16 @@ libbpf-rs = "0.24.1"
libc = "0.2.137"
log = "0.4.17"
ordered-float = "3.4.0"
scx_stats = { path = "../../../rust/scx_stats", version = "0.2.0" }
scx_stats_derive = { path = "../../../rust/scx_stats/scx_stats_derive", version = "0.2.0" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_stats = { path = "../../../rust/scx_stats", version = "1.0.3" }
scx_stats_derive = { path = "../../../rust/scx_stats/scx_stats_derive", version = "1.0.3" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
serde = { version = "1.0", features = ["derive"] }
simplelog = "0.12"
sorted-vec = "0.8.3"
static_assertions = "1.1.0"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.2" }
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
[features]
enable_backtrace = []

View File

@ -1,5 +1,5 @@
# List of scx_schedulers: scx_bpfland scx_central scx_lavd scx_layered scx_nest scx_qmap scx_rlfifo scx_rustland scx_rusty scx_simple scx_userland
SCX_SCHEDULER=scx_rusty
SCX_SCHEDULER=scx_bpfland
# Set custom flags for each scheduler, below is an example of how to use
#SCX_FLAGS='-u 3000 -i 0.5 -I 0.025 -l 0.5 -b -k'
#SCX_FLAGS='-s 20000 -S 1000 -c 0 -k'