Merge pull request #612 from multics69/lavd-monitor

scx_lavd: add --monitor flag and two micro-optimizations
This commit is contained in:
Changwoo Min 2024-09-06 09:33:55 +09:00 committed by GitHub
commit e3243c5d51
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 523 additions and 68 deletions

10
scheds/rust/Cargo.lock generated
View File

@ -527,6 +527,15 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "gpoint"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c00f1d62d57408109a871dd9e12b76645ec4284406d5ec838d277777ef1ef6c"
dependencies = [
"libc",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
@ -1152,6 +1161,7 @@ dependencies = [
"crossbeam",
"ctrlc",
"fb_procfs",
"gpoint",
"hex",
"itertools 0.13.0",
"libbpf-rs",

View File

@ -27,6 +27,7 @@ simplelog = "0.12"
static_assertions = "1.1.0"
rlimit = "0.10.1"
plain = "0.2.3"
gpoint = "0.2"
[build-dependencies]
scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }

View File

@ -81,11 +81,12 @@ enum consts {
LAVD_PREEMPT_TICK_MARGIN = (100ULL * NSEC_PER_USEC),
LAVD_SYS_STAT_INTERVAL_NS = (50ULL * NSEC_PER_MSEC),
LAVD_SYS_STAT_DECAY_TIMES = (2ULL * LAVD_TIME_ONE_SEC) / LAVD_SYS_STAT_INTERVAL_NS,
LAVD_CC_PER_CORE_MAX_CTUIL = 500, /* maximum per-core CPU utilization */
LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */
LAVD_CC_NR_ACTIVE_MIN = 1, /* num of mininum active cores */
LAVD_CC_NR_OVRFLW = 1, /* num of overflow cores */
LAVD_CC_CPU_PIN_INTERVAL = (2ULL * LAVD_TIME_ONE_SEC),
LAVD_CC_CPU_PIN_INTERVAL = (1ULL * LAVD_TIME_ONE_SEC),
LAVD_CC_CPU_PIN_INTERVAL_DIV = (LAVD_CC_CPU_PIN_INTERVAL /
LAVD_SYS_STAT_INTERVAL_NS),
@ -122,6 +123,16 @@ struct sys_stat {
volatile u32 nr_violation; /* number of utilization violation */
volatile u32 nr_active; /* number of active cores */
volatile u64 nr_sched; /* total scheduling so far */
volatile u64 nr_migration; /* number of task migration */
volatile u64 nr_preemption; /* number of preemption */
volatile u64 nr_greedy; /* number of greedy tasks scheduled */
volatile u64 nr_perf_cri; /* number of performance-critical tasks scheduled */
volatile u64 nr_lat_cri; /* number of latency-critical tasks scheduled */
volatile u64 nr_big; /* scheduled on big core */
volatile u64 nr_pc_on_big; /* performance-critical tasks scheduled on big core */
volatile u64 nr_lc_on_big; /* latency-critical tasks scheduled on big core */
};
/*
@ -169,7 +180,7 @@ struct cpu_ctx {
*/
volatile u32 max_lat_cri; /* maximum latency criticality */
volatile u32 sum_lat_cri; /* sum of latency criticality */
volatile u32 sched_nr; /* number of schedules */
volatile u32 nr_sched; /* number of schedules */
/*
* Information used to keep track of performance criticality
@ -205,6 +216,15 @@ struct cpu_ctx {
struct bpf_cpumask __kptr *tmp_o_mask; /* temporary cpu mask */
struct bpf_cpumask __kptr *tmp_t_mask; /* temporary cpu mask */
struct bpf_cpumask __kptr *tmp_t2_mask; /* temporary cpu mask */
/*
* Information for statistics.
*/
volatile u32 nr_migration; /* number of migrations */
volatile u32 nr_preemption; /* number of migrations */
volatile u32 nr_greedy; /* number of greedy tasks scheduled */
volatile u32 nr_perf_cri;
volatile u32 nr_lat_cri;
} __attribute__((aligned(CACHELINE_SIZE)));
/*
@ -242,12 +262,18 @@ struct task_ctx {
volatile s32 victim_cpu;
u16 slice_boost_prio; /* how many times a task fully consumed the slice */
u8 wakeup_ft; /* regular wakeup = 1, sync wakeup = 2 */
/*
* Task's performance criticality
*/
u8 on_big; /* executable on a big core */
u8 on_little; /* executable on a little core */
u32 perf_cri; /* performance criticality of a task */
/*
* Information for statistics collection
*/
u32 cpu_id; /* CPU ID scheduled on */
};
/*

View File

@ -197,8 +197,8 @@ char _license[] SEC("license") = "GPL";
volatile u64 nr_cpus_onln;
static volatile u64 nr_cpus_big;
static struct sys_stat __sys_stats[2];
static volatile int __sys_stat_idx;
struct sys_stat __sys_stats[2];
volatile int __sys_stat_idx;
private(LAVD) struct bpf_cpumask __kptr *turbo_cpumask; /* CPU mask for turbo CPUs */
private(LAVD) struct bpf_cpumask __kptr *big_cpumask; /* CPU mask for big CPUs */
@ -240,6 +240,18 @@ const volatile bool is_autopilot_on;
const volatile u32 is_smt_active;
const volatile u8 verbose;
/*
* Statistics
*/
volatile int power_mode;
volatile u64 last_power_mode_clk;
volatile u64 performance_mode_ns;
volatile u64 balanced_mode_ns;
volatile u64 powersave_mode_ns;
/*
* Exit infomation
*/
UEI_DEFINE(uei);
#define debugln(fmt, ...) \
@ -320,6 +332,7 @@ struct {
static u16 get_nice_prio(struct task_struct *p);
static int reinit_active_cpumask_for_performance(void);
static void update_power_mode_time(void);
static u64 sigmoid_u64(u64 v, u64 max)
{
@ -582,7 +595,15 @@ struct sys_stat_ctx {
s32 max_lat_cri;
s32 avg_lat_cri;
u64 sum_lat_cri;
u32 sched_nr;
u32 nr_sched;
u32 nr_migration;
u32 nr_preemption;
u32 nr_greedy;
u32 nr_perf_cri;
u32 nr_lat_cri;
u32 nr_big;
u32 nr_pc_on_big;
u32 nr_lc_on_big;
u64 sum_perf_cri;
u32 avg_perf_cri;
u64 new_util;
@ -618,6 +639,30 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
c->load_actual += cpuc->load_actual;
c->load_run_time_ns += cpuc->load_run_time_ns;
c->tot_svc_time += cpuc->tot_svc_time;
cpuc->tot_svc_time = 0;
/*
* Accumulate statistics.
*/
if (cpuc->big_core) {
c->nr_big += cpuc->nr_sched;
c->nr_pc_on_big += cpuc->nr_perf_cri;
c->nr_lc_on_big += cpuc->nr_lat_cri;
}
c->nr_perf_cri += cpuc->nr_perf_cri;
cpuc->nr_perf_cri = 0;
c->nr_lat_cri += cpuc->nr_lat_cri;
cpuc->nr_lat_cri = 0;
c->nr_migration += cpuc->nr_migration;
cpuc->nr_migration = 0;
c->nr_preemption += cpuc->nr_preemption;
cpuc->nr_preemption = 0;
c->nr_greedy += cpuc->nr_greedy;
cpuc->nr_greedy = 0;
/*
* Accumulate task's latency criticlity information.
@ -629,8 +674,8 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
c->sum_lat_cri += cpuc->sum_lat_cri;
cpuc->sum_lat_cri = 0;
c->sched_nr += cpuc->sched_nr;
cpuc->sched_nr = 0;
c->nr_sched += cpuc->nr_sched;
cpuc->nr_sched = 0;
if (cpuc->max_lat_cri > c->max_lat_cri)
c->max_lat_cri = cpuc->max_lat_cri;
@ -701,7 +746,7 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
c->compute_total = 0;
c->new_util = (c->compute_total * LAVD_CPU_UTIL_MAX)/c->duration_total;
if (c->sched_nr == 0) {
if (c->nr_sched == 0) {
/*
* When a system is completely idle, it is indeed possible
* nothing scheduled for an interval.
@ -711,13 +756,15 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
c->avg_perf_cri = c->stat_cur->avg_perf_cri;
}
else {
c->avg_lat_cri = c->sum_lat_cri / c->sched_nr;
c->avg_perf_cri = c->sum_perf_cri / c->sched_nr;
c->avg_lat_cri = c->sum_lat_cri / c->nr_sched;
c->avg_perf_cri = c->sum_perf_cri / c->nr_sched;
}
}
static void update_sys_stat_next(struct sys_stat_ctx *c)
{
static int cnt = 0;
/*
* Update the CPU utilization to the next version.
*/
@ -741,11 +788,45 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
stat_next->nr_violation =
calc_avg32(stat_cur->nr_violation, c->nr_violation);
stat_next->avg_svc_time = (c->sched_nr == 0) ? 0 :
c->tot_svc_time / c->sched_nr;
stat_next->avg_svc_time = (c->nr_sched == 0) ? 0 :
c->tot_svc_time / c->nr_sched;
stat_next->nr_queued_task =
calc_avg(stat_cur->nr_queued_task, c->nr_queued_task);
/*
* Half the statistics every minitue so the statistics hold the
* information on a few minutes.
*/
if (cnt++ == LAVD_SYS_STAT_DECAY_TIMES) {
cnt = 0;
stat_next->nr_sched >>= 1;
stat_next->nr_migration >>= 1;
stat_next->nr_preemption >>= 1;
stat_next->nr_greedy >>= 1;
stat_next->nr_perf_cri >>= 1;
stat_next->nr_lat_cri >>= 1;
stat_next->nr_big >>= 1;
stat_next->nr_pc_on_big >>= 1;
stat_next->nr_lc_on_big >>= 1;
__sync_fetch_and_sub(&performance_mode_ns, performance_mode_ns/2);
__sync_fetch_and_sub(&balanced_mode_ns, balanced_mode_ns/2);
__sync_fetch_and_sub(&powersave_mode_ns, powersave_mode_ns/2);
}
stat_next->nr_sched += c->nr_sched;
stat_next->nr_migration += c->nr_migration;
stat_next->nr_preemption += c->nr_preemption;
stat_next->nr_greedy += c->nr_greedy;
stat_next->nr_perf_cri += c->nr_perf_cri;
stat_next->nr_lat_cri += c->nr_lat_cri;
stat_next->nr_big += c->nr_big;
stat_next->nr_pc_on_big += c->nr_pc_on_big;
stat_next->nr_lc_on_big += c->nr_lc_on_big;
update_power_mode_time();
}
static void do_update_sys_stat(void)
@ -905,21 +986,49 @@ unlock_out:
bpf_rcu_read_unlock();
}
int do_set_power_profile(s32 power_mode, int util)
static void update_power_mode_time(void)
{
static s32 cur_mode = LAVD_PM_MAX;
u64 now = bpf_ktime_get_ns();
u64 delta;
if (last_power_mode_clk == 0)
last_power_mode_clk = now;
delta = now - last_power_mode_clk;
last_power_mode_clk = now;
switch (power_mode) {
case LAVD_PM_PERFORMANCE:
__sync_fetch_and_add(&performance_mode_ns, delta);
break;
case LAVD_PM_BALANCED:
__sync_fetch_and_add(&balanced_mode_ns, delta);
break;
case LAVD_PM_POWERSAVE:
__sync_fetch_and_add(&powersave_mode_ns, delta);
break;
}
}
static int do_set_power_profile(s32 pm, int util)
{
/*
* Skip setting the mode if alreay in the same mode.
*/
if (cur_mode == power_mode)
if (power_mode == pm)
return 0;
cur_mode = power_mode;
/*
* Update power mode time
*/
update_power_mode_time();
power_mode = pm;
/*
* Change the power mode.
*/
switch (power_mode) {
switch (pm) {
case LAVD_PM_PERFORMANCE:
no_core_compaction = true;
no_freq_scaling = true;
@ -1184,13 +1293,6 @@ static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc,
taskc->slice_boost_prio) / LAVD_SLICE_BOOST_MAX_STEP;
}
/*
* Boost time slice based on CPU's capacity to assign a longer time
* slice for a more performant CPU for making each CPU's job processing
* throughput similar.
*/
slice = slice * cpuc->capacity / 1024;
/*
* If a task has yet to be scheduled (i.e., a freshly forked task or a
* task just under sched_ext), don't give a fair amount of time slice
@ -1274,6 +1376,7 @@ static void update_stat_for_running(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
struct sys_stat *stat_cur = get_sys_stat_cur();
u64 wait_period, interval;
u64 now = bpf_ktime_get_ns();
u64 wait_freq_ft, wake_freq_ft, perf_cri;
@ -1306,7 +1409,7 @@ static void update_stat_for_running(struct task_struct *p,
if (cpuc->max_lat_cri < taskc->lat_cri)
cpuc->max_lat_cri = taskc->lat_cri;
cpuc->sum_lat_cri += taskc->lat_cri;
cpuc->sched_nr++;
cpuc->nr_sched++;
/*
* It is clear there is no need to consider the suspended duration
@ -1345,6 +1448,30 @@ static void update_stat_for_running(struct task_struct *p,
* Update task state when starts running.
*/
taskc->last_running_clk = now;
/*
* Update statistics information.
*/
if (taskc->cpu_id != cpuc->cpu_id) {
taskc->cpu_id = cpuc->cpu_id;
cpuc->nr_migration++;
}
if (taskc->victim_cpu >= 0)
cpuc->nr_preemption++;
if (is_lat_cri(taskc, stat_cur)) {
cpuc->nr_lat_cri++;
// debugln("------------------------ lc = %llu", cpuc->nr__cri);
}
if (is_perf_cri(taskc, stat_cur)) {
cpuc->nr_perf_cri++;
// debugln("------------------------ pc = %llu", cpuc->nr_perf_cri);
}
if (is_greedy(taskc))
cpuc->nr_greedy++;
}
static u64 calc_svc_time(struct task_struct *p, struct task_ctx *taskc)
@ -1626,13 +1753,14 @@ start_omask:
/*
* If the task cannot run on either active or overflow cores,
* stay on the previous core (if it is okay) or one of its taskset.
* Then, put the CPU to the overflow set.
*/
start_any_mask:
if (bpf_cpumask_test_cpu(prev_cpu, p->cpus_ptr))
cpu_id = prev_cpu;
else {
start_any_mask:
else
cpu_id = bpf_cpumask_any_distribute(p->cpus_ptr);
}
bpf_cpumask_set_cpu(cpu_id, ovrflw);
/*
* Note that we don't need to kick the picked CPU here since the

View File

@ -12,6 +12,7 @@ pub mod bpf_intf;
pub use bpf_intf::*;
mod stats;
use stats::SysStats;
use stats::SchedSample;
use stats::SchedSamples;
use stats::StatsReq;
@ -122,6 +123,14 @@ struct Opts {
#[clap(long = "no-freq-scaling", action = clap::ArgAction::SetTrue)]
no_freq_scaling: bool,
/// Enable stats monitoring with the specified interval.
#[clap(long)]
stats: Option<f64>,
/// Run in stats monitoring mode with the specified interval. Scheduler is not launched.
#[clap(long)]
monitor: Option<f64>,
/// Run in monitoring mode. Show the specified number of scheduling
/// samples every second.
#[clap(long)]
@ -135,6 +144,10 @@ struct Opts {
/// Print scheduler version and exit.
#[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
version: bool,
/// Show descriptions for statistics.
#[clap(long)]
help_stats: bool,
}
impl Opts {
@ -448,8 +461,9 @@ struct Scheduler<'a> {
rb_mgr: libbpf_rs::RingBuffer<'static>,
intrspc: introspec,
intrspc_rx: Receiver<SchedSample>,
sampler_tid: Option<ThreadId>,
monitor_tid: Option<ThreadId>,
stats_server: StatsServer<StatsReq, StatsRes>,
mseq_id: u64,
}
impl<'a> Scheduler<'a> {
@ -494,8 +508,9 @@ impl<'a> Scheduler<'a> {
rb_mgr,
intrspc: introspec::new(),
intrspc_rx,
sampler_tid: None,
monitor_tid: None,
stats_server,
mseq_id: 0,
})
}
@ -626,19 +641,97 @@ impl<'a> Scheduler<'a> {
self.skel.maps.bss_data.intrspc.cmd = LAVD_CMD_NOP;
}
fn get_pc(x: u64, y: u64) -> f64 {
return 100. * x as f64 / y as f64;
}
fn get_power_mode(power_mode: s32) -> &'static str {
const LAVD_PM_PERFORMANCE: s32 = 0;
const LAVD_PM_BALANCED: s32 = 1;
const LAVD_PM_POWERSAVE: s32 = 2;
match power_mode {
LAVD_PM_PERFORMANCE => {
return &"performance";
}
LAVD_PM_BALANCED => {
return &"balanced";
}
LAVD_PM_POWERSAVE => {
return &"powersave";
}
_ => {
return &"unknown";
}
}
}
fn stats_req_to_res(&mut self, req: &StatsReq) -> Result<StatsRes> {
Ok(match req {
StatsReq::NewSampler(tid) => {
self.rb_mgr.consume().unwrap();
self.sampler_tid = Some(*tid);
self.monitor_tid = Some(*tid);
StatsRes::Ack
}
StatsReq::SysStatsReq {
tid,
} => {
if Some(*tid) != self.monitor_tid {
return Ok(StatsRes::Bye);
}
self.mseq_id += 1;
let bss_data = &self.skel.maps.bss_data;
let st = bss_data.__sys_stats[0];
let mseq = self.mseq_id;
let avg_svc_time = st.avg_svc_time;
let nr_queued_task = st.nr_queued_task;
let nr_active = st.nr_active;
let nr_sched = st.nr_sched;
let pc_migration = Self::get_pc(st.nr_migration, nr_sched);
let pc_preemption = Self::get_pc(st.nr_preemption, nr_sched);
let pc_greedy = Self::get_pc(st.nr_greedy, nr_sched);
let pc_pc = Self::get_pc(st.nr_perf_cri, nr_sched);
let pc_lc = Self::get_pc(st.nr_lat_cri, nr_sched);
let nr_big = st.nr_big;
let pc_big = Self::get_pc(nr_big, nr_sched);
let pc_pc_on_big = Self::get_pc(st.nr_pc_on_big, nr_big);
let pc_lc_on_big = Self::get_pc(st.nr_lc_on_big, nr_big);
let power_mode = Self::get_power_mode(bss_data.power_mode);
let total_time = bss_data.performance_mode_ns +
bss_data.balanced_mode_ns +
bss_data.powersave_mode_ns;
let pc_performance = Self::get_pc(bss_data.performance_mode_ns, total_time);
let pc_balanced = Self::get_pc(bss_data.balanced_mode_ns, total_time);
let pc_powersave = Self::get_pc(bss_data.powersave_mode_ns, total_time);
StatsRes::SysStats(SysStats {
mseq,
avg_svc_time,
nr_queued_task,
nr_active,
nr_sched,
pc_migration,
pc_preemption,
pc_greedy,
pc_pc,
pc_lc,
pc_big,
pc_pc_on_big,
pc_lc_on_big,
power_mode: power_mode.to_string(),
pc_performance,
pc_balanced,
pc_powersave,
})
}
StatsReq::SchedSamplesNr {
tid,
nr_samples,
interval_ms,
} => {
if Some(*tid) != self.sampler_tid {
if Some(*tid) != self.monitor_tid {
return Ok(StatsRes::Bye);
}
@ -791,6 +884,11 @@ fn main() -> Result<()> {
return Ok(());
}
if opts.help_stats {
stats::server_data(0).describe_meta(&mut std::io::stdout(), None)?;
return Ok(());
}
init_log(&opts);
debug!("{:#?}", opts);
@ -808,6 +906,17 @@ fn main() -> Result<()> {
return Ok(());
}
if let Some(intv) = opts.monitor.or(opts.stats) {
let shutdown_copy = shutdown.clone();
let jh = std::thread::spawn(move || {
stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap()
});
if opts.monitor.is_some() {
let _ = jh.join();
return Ok(());
}
}
let mut open_object = MaybeUninit::uninit();
loop {
let mut sched = Scheduler::init(&opts, &mut open_object)?;

View File

@ -11,30 +11,165 @@ use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::thread::ThreadId;
use std::time::Duration;
use gpoint::GPoint;
#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
#[stat(top)]
pub struct SysStats {
#[stat(desc = "Sequence ID of this messge")]
pub mseq: u64,
#[stat(desc = "Average runtime per schedule")]
pub avg_svc_time: u64,
#[stat(desc = "Number of runnable tasks in runqueues")]
pub nr_queued_task: u64,
#[stat(desc = "Number of active CPUs when core compaction is enabled")]
pub nr_active: u32,
#[stat(desc = "Number of context switches")]
pub nr_sched: u64,
#[stat(desc = "% of task migration")]
pub pc_migration: f64,
#[stat(desc = "% of task preemption")]
pub pc_preemption: f64,
#[stat(desc = "% of greedy tasks")]
pub pc_greedy: f64,
#[stat(desc = "% of performance-critical tasks")]
pub pc_pc: f64,
#[stat(desc = "% of latency-critical tasks")]
pub pc_lc: f64,
#[stat(desc = "% of tasks scheduled on big cores")]
pub pc_big: f64,
#[stat(desc = "% of performance-critical tasks scheduled on big cores")]
pub pc_pc_on_big: f64,
#[stat(desc = "% of latency-critical tasks scheduled on big cores")]
pub pc_lc_on_big: f64,
#[stat(desc = "Current power mode")]
pub power_mode: String,
#[stat(desc = "% of performance mode")]
pub pc_performance: f64,
#[stat(desc = "% of balanced mode")]
pub pc_balanced: f64,
#[stat(desc = "% of powersave powersave")]
pub pc_powersave: f64,
}
impl SysStats {
pub fn format_header<W: Write>(w: &mut W) -> Result<()> {
writeln!(
w,
"\x1b[93m| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m",
"MSEQ",
"SVC_TIME",
"# Q TASK",
"# ACT CPU",
"# SCHED",
"MIGRATE%",
"PREEMPT%",
"GREEDY%",
"PERF-CR%",
"LAT-CR%",
"BIG%",
"PC/BIG%",
"LC/BIG%",
"POWER MODE",
"PERFORMANCE%",
"BALANCED%",
"POWERSAVE%",
)?;
Ok(())
}
fn format<W: Write>(&self, w: &mut W) -> Result<()> {
if self.mseq % 10 == 1 {
Self::format_header(w)?;
}
writeln!(
w,
"| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |",
self.mseq,
self.avg_svc_time,
self.nr_queued_task,
self.nr_active,
self.nr_sched,
GPoint(self.pc_migration),
GPoint(self.pc_preemption),
GPoint(self.pc_greedy),
GPoint(self.pc_pc),
GPoint(self.pc_lc),
GPoint(self.pc_big),
GPoint(self.pc_pc_on_big),
GPoint(self.pc_lc_on_big),
self.power_mode,
GPoint(self.pc_performance),
GPoint(self.pc_balanced),
GPoint(self.pc_powersave),
)?;
Ok(())
}
}
#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
pub struct SchedSample {
#[stat(desc = "Sequence ID of this message")]
pub mseq: u64,
#[stat(desc = "Process ID")]
pub pid: i32,
#[stat(desc = "Task name")]
pub comm: String,
#[stat(desc = "LR: 'L'atency-critical or 'R'egular, HI: performance-'H'ungry or performance-'I'nsensitive, BT: 'B'ig or li'T'tle, EG: 'E'ligigle or 'G'reedy, PN: 'P'reempting or 'N'ot")]
pub stat: String,
#[stat(desc = "CPU id where this task is scheduled on")]
pub cpu_id: u32,
#[stat(desc = "Victim CPU to be preempted out (-1 = no preemption)")]
pub victim_cpu: i32,
#[stat(desc = "Assigned virtual deadline")]
pub vdeadline_delta_ns: u64,
#[stat(desc = "Assigned time slice")]
pub slice_ns: u64,
#[stat(desc = "How greedy this task is in using CPU time (1000 = fair)")]
pub greedy_ratio: u32,
#[stat(desc = "Latency criticality of this task")]
pub lat_cri: u32,
#[stat(desc = "Average latency criticality in a system")]
pub avg_lat_cri: u32,
#[stat(desc = "Static priority (20 == nice 0)")]
pub static_prio: u16,
#[stat(desc = "Slice boost factor (number of consecutive full slice exhaustions)")]
pub slice_boost_prio: u16,
#[stat(desc = "How often this task is scheduled per second")]
pub run_freq: u64,
#[stat(desc = "Average runtime per schedule")]
pub run_time_ns: u64,
#[stat(desc = "How frequently this task waits for other tasks")]
pub wait_freq: u64,
#[stat(desc = "How frequently this task wakes other tasks")]
pub wake_freq: u64,
#[stat(desc = "Performance criticality of this task")]
pub perf_cri: u32,
#[stat(desc = "Average performance criticality in a system")]
pub avg_perf_cri: u32,
#[stat(desc = "Target performance level of this CPU")]
pub cpuperf_cur: u32,
#[stat(desc = "CPU utilization of this particular CPU")]
pub cpu_util: u64,
#[stat(desc = "Number of active CPUs when core compaction is enabled")]
pub nr_active: u32,
}
@ -42,42 +177,42 @@ impl SchedSample {
pub fn format_header<W: Write>(w: &mut W) -> Result<()> {
writeln!(
w,
"| {:6} | {:7} | {:17} \
"\x1b[93m| {:6} | {:7} | {:17} \
| {:5} | {:4} | {:4} \
| {:14} | {:8} | {:7} \
| {:8} | {:7} | {:8} \
| {:7} | {:9} | {:9} \
| {:9} | {:9} | {:8} \
| {:8} | {:8} | {:8} \
| {:6} |",
"mseq",
"pid",
"comm",
"stat",
"cpu",
"vtmc",
"vddln_ns",
"slc_ns",
"grdy_rt",
"lat_cri",
"avg_lc",
"st_prio",
"slc_bst",
"run_freq",
"run_tm_ns",
"wait_freq",
"wake_freq",
"perf_cri",
"avg_pc",
"cpufreq",
"cpu_util",
"nr_act",
| {:6} |\x1b[0m",
"MSEQ",
"PID",
"COMM",
"STAT",
"CPU",
"VTMC",
"VDDLN_NS",
"SLC_NS",
"GRDY_RT",
"LAT_CRI",
"AVG_LC",
"ST_PRIO",
"SLC_BST",
"RUN_FREQ",
"RUN_TM_NS",
"WAIT_FREQ",
"WAKE_FREQ",
"PERF_CRI",
"AVG_PC",
"CPUFREQ",
"CPU_UTIL",
"NR_ACT",
)?;
Ok(())
}
pub fn format<W: Write>(&self, w: &mut W) -> Result<()> {
if self.mseq % 32 == 1 {
if self.mseq % 10 == 1 {
Self::format_header(w)?;
}
@ -126,6 +261,9 @@ pub struct SchedSamples {
#[derive(Debug)]
pub enum StatsReq {
NewSampler(ThreadId),
SysStatsReq {
tid: ThreadId,
},
SchedSamplesNr {
tid: ThreadId,
nr_samples: u64,
@ -134,7 +272,15 @@ pub enum StatsReq {
}
impl StatsReq {
fn from_args(
fn from_args_stats(
tid: ThreadId,
) -> Result<Self> {
Ok(Self::SysStatsReq {
tid,
})
}
fn from_args_samples(
tid: ThreadId,
nr_cpus_onln: u64,
args: &BTreeMap<String, String>,
@ -164,12 +310,36 @@ impl StatsReq {
pub enum StatsRes {
Ack,
Bye,
SysStats(SysStats),
SchedSamples(SchedSamples),
}
pub fn server_data(nr_cpus_onln: u64) -> StatsServerData<StatsReq, StatsRes> {
let samples_open: Box<dyn StatsOpener<StatsReq, StatsRes>> =
Box::new(move |(req_ch, res_ch)| {
let open: Box<dyn StatsOpener<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
let tid = std::thread::current().id();
req_ch.send(StatsReq::NewSampler(tid))?;
match res_ch.recv()? {
StatsRes::Ack => {}
res => bail!("invalid response: {:?}", &res),
}
let read: Box<dyn StatsReader<StatsReq, StatsRes>> =
Box::new(move |_args, (req_ch, res_ch)| {
let req = StatsReq::from_args_stats(tid)?;
req_ch.send(req)?;
let stats = match res_ch.recv()? {
StatsRes::SysStats(v) => v,
StatsRes::Bye => bail!("preempted by another sampler"),
res => bail!("invalid response: {:?}", &res),
};
stats.to_json()
});
Ok(read)
});
let samples_open: Box<dyn StatsOpener<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
let tid = std::thread::current().id();
req_ch.send(StatsReq::NewSampler(tid))?;
match res_ch.recv()? {
@ -179,7 +349,7 @@ pub fn server_data(nr_cpus_onln: u64) -> StatsServerData<StatsReq, StatsRes> {
let read: Box<dyn StatsReader<StatsReq, StatsRes>> =
Box::new(move |args, (req_ch, res_ch)| {
let req = StatsReq::from_args(tid, nr_cpus_onln, args)?;
let req = StatsReq::from_args_samples(tid, nr_cpus_onln, args)?;
req_ch.send(req)?;
let samples = match res_ch.recv()? {
@ -194,6 +364,14 @@ pub fn server_data(nr_cpus_onln: u64) -> StatsServerData<StatsReq, StatsRes> {
});
StatsServerData::new()
.add_meta(SysStats::meta())
.add_ops(
"top",
StatsOps {
open: open,
close: None,
},
)
.add_meta(SchedSample::meta())
.add_ops(
"sched_samples",
@ -205,13 +383,6 @@ pub fn server_data(nr_cpus_onln: u64) -> StatsServerData<StatsReq, StatsRes> {
}
pub fn monitor_sched_samples(nr_samples: u64, shutdown: Arc<AtomicBool>) -> Result<()> {
println!("## stats");
println!(" LR: 'L'atency-critical or 'R'egular");
println!(" HI: performance-'H'ungry or performance-'I'nsensitive");
println!(" BT: 'B'ig or li'T'tle");
println!(" EG: 'E'ligigle or 'G'reedy");
println!(" PN: 'P'reempting or 'N'ot");
scx_utils::monitor_stats::<SchedSamples>(
&vec![
("target".into(), "sched_samples".into()),
@ -228,3 +399,13 @@ pub fn monitor_sched_samples(nr_samples: u64, shutdown: Arc<AtomicBool>) -> Resu
},
)
}
pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
scx_utils::monitor_stats::<SysStats>(
&vec![],
intv,
|| shutdown.load(Ordering::Relaxed),
|sysstats| sysstats.format(&mut std::io::stdout()),
);
Ok(())
}