Merge pull request #441 from hodgesds/layered-metrics-refactor

scx_layered: Add separate module for metrics
This commit is contained in:
Tejun Heo 2024-07-19 06:37:51 -10:00 committed by GitHub
commit 2e22a53560
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 199 additions and 193 deletions

View File

@ -5,6 +5,8 @@
mod bpf_skel;
pub use bpf_skel::*;
pub mod bpf_intf;
pub mod metrics;
use metrics::OpenMetricsStats;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
@ -14,8 +16,6 @@ use std::io::Read;
use std::io::Write;
use std::ops::Sub;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicI64;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::time::Duration;
@ -36,9 +36,6 @@ use log::info;
use log::trace;
use log::warn;
use prometheus_client::encoding::text::encode;
use prometheus_client::metrics::family::Family;
use prometheus_client::metrics::gauge::Gauge;
use prometheus_client::registry::Registry;
use scx_utils::compat;
use scx_utils::init_libbpf_logging;
use scx_utils::ravg::ravg_read;
@ -1132,194 +1129,6 @@ impl Layer {
}
}
#[derive(Default)]
struct OpenMetricsStats {
registry: Registry,
total: Gauge<i64, AtomicI64>,
local: Gauge<f64, AtomicU64>,
open_idle: Gauge<f64, AtomicU64>,
affn_viol: Gauge<f64, AtomicU64>,
excl_idle: Gauge<f64, AtomicU64>,
excl_wakeup: Gauge<f64, AtomicU64>,
proc_ms: Gauge<i64, AtomicI64>,
busy: Gauge<f64, AtomicU64>,
util: Gauge<f64, AtomicU64>,
load: Gauge<f64, AtomicU64>,
l_util: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_util_frac: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_load: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_load_frac: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_tasks: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
l_total: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
l_sel_local: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_enq_wakeup: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_enq_expire: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_enq_last: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_enq_reenq: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_min_exec: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_min_exec_us: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
l_open_idle: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_preempt: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_preempt_first: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_preempt_idle: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_preempt_fail: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_affn_viol: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_keep: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_keep_fail_max_exec: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_keep_fail_busy: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_excl_collision: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_excl_preempt: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_kick: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_yield: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_yield_ignore: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
l_migration: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
l_cur_nr_cpus: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
l_min_nr_cpus: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
l_max_nr_cpus: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
}
impl OpenMetricsStats {
fn new() -> OpenMetricsStats {
let mut metrics = OpenMetricsStats {
registry: <Registry>::default(),
..Default::default()
};
// Helper macro to reduce on some of the boilerplate:
// $i: The identifier of the metric to register
// $help: The Help text associated with the metric
macro_rules! register {
($i:ident, $help:expr) => {
metrics
.registry
.register(stringify!($i), $help, metrics.$i.clone())
};
}
register!(total, "Total scheduling events in the period");
register!(local, "% that got scheduled directly into an idle CPU");
register!(
open_idle,
"% of open layer tasks scheduled into occupied idle CPUs"
);
register!(
affn_viol,
"% which violated configured policies due to CPU affinity restrictions"
);
register!(
excl_idle,
"Number of times a CPU skipped dispatching due to sibling running an exclusive task"
);
register!(
excl_wakeup,
"Number of times an idle sibling CPU was woken up after an exclusive task is finished"
);
register!(
proc_ms,
"CPU time this binary has consumed during the period"
);
register!(busy, "CPU busy % (100% means all CPUs were fully occupied)");
register!(
util,
"CPU utilization % (100% means one CPU was fully occupied)"
);
register!(load, "Sum of weight * duty_cycle for all tasks");
register!(
l_util,
"CPU utilization of the layer (100% means one CPU was fully occupied)"
);
register!(
l_util_frac,
"Fraction of total CPU utilization consumed by the layer"
);
register!(l_load, "Sum of weight * duty_cycle for tasks in the layer");
register!(l_load_frac, "Fraction of total load consumed by the layer");
register!(l_tasks, "Number of tasks in the layer");
register!(l_total, "Number of scheduling events in the layer");
register!(
l_sel_local,
"% of scheduling events directly into an idle CPU"
);
register!(
l_enq_wakeup,
"% of scheduling events enqueued to layer after wakeup"
);
register!(
l_enq_expire,
"% of scheduling events enqueued to layer after slice expiration"
);
register!(
l_enq_last,
"% of scheduling events enqueued as last runnable task on CPU"
);
register!(
l_enq_reenq,
"% of scheduling events re-enqueued due to RT preemption"
);
register!(
l_min_exec,
"Number of times execution duration was shorter than min_exec_us"
);
register!(
l_min_exec_us,
"Total execution duration extended due to min_exec_us"
);
register!(
l_open_idle,
"% of scheduling events into idle CPUs occupied by other layers"
);
register!(
l_preempt,
"% of scheduling events that preempted other tasks"
);
register!(
l_preempt_first,
"% of scheduling events that first-preempted other tasks"
);
register!(
l_preempt_idle,
"% of scheduling events that idle-preempted other tasks"
);
register!(
l_preempt_fail,
"% of scheduling events that attempted to preempt other tasks but failed"
);
register!(
l_affn_viol,
"% of scheduling events that violated configured policies due to CPU affinity restrictions"
);
register!(
l_keep,
"% of scheduling events that continued executing after slice expiration"
);
register!(
l_keep_fail_max_exec,
"% of scheduling events that weren't allowed to continue executing after slice expiration due to overrunning max_exec duration limit"
);
register!(
l_keep_fail_busy,
"% of scheduling events that weren't allowed to continue executing after slice expiration to accommodate other tasks"
);
register!(
l_excl_collision,
"Number of times an exclusive task skipped a CPU as the sibling was also exclusive"
);
register!(
l_excl_preempt,
"Number of times a sibling CPU was preempted for an exclusive task"
);
register!(
l_kick,
"% of schduling events that kicked a CPU from enqueue path"
);
register!(l_yield, "% of scheduling events that yielded");
register!(l_yield_ignore, "Number of times yield was ignored");
register!(l_migration, "% of scheduling events that migrated across CPUs");
register!(l_cur_nr_cpus, "Current # of CPUs assigned to the layer");
register!(l_min_nr_cpus, "Minimum # of CPUs assigned to the layer");
register!(l_max_nr_cpus, "Maximum # of CPUs assigned to the layer");
metrics
}
}
struct Scheduler<'a, 'b> {
skel: BpfSkel<'a>,
struct_ops: Option<libbpf_rs::Link>,

View File

@ -0,0 +1,197 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2.
use std::sync::atomic::AtomicI64;
use std::sync::atomic::AtomicU64;
use prometheus_client::metrics::family::Family;
use prometheus_client::metrics::gauge::Gauge;
use prometheus_client::registry::Registry;
#[derive(Default)]
pub struct OpenMetricsStats {
pub registry: Registry,
pub total: Gauge<i64, AtomicI64>,
pub local: Gauge<f64, AtomicU64>,
pub open_idle: Gauge<f64, AtomicU64>,
pub affn_viol: Gauge<f64, AtomicU64>,
pub excl_idle: Gauge<f64, AtomicU64>,
pub excl_wakeup: Gauge<f64, AtomicU64>,
pub proc_ms: Gauge<i64, AtomicI64>,
pub busy: Gauge<f64, AtomicU64>,
pub util: Gauge<f64, AtomicU64>,
pub load: Gauge<f64, AtomicU64>,
pub l_util: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_util_frac: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_load: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_load_frac: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_tasks: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
pub l_total: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
pub l_sel_local: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_enq_wakeup: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_enq_expire: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_enq_last: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_enq_reenq: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_min_exec: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_min_exec_us: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
pub l_open_idle: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_preempt: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_preempt_first: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_preempt_idle: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_preempt_fail: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_affn_viol: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_keep: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_keep_fail_max_exec: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_keep_fail_busy: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_excl_collision: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_excl_preempt: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_kick: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_yield: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_yield_ignore: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
pub l_migration: Family<Vec<(String, String)>, Gauge<f64, AtomicU64>>,
pub l_cur_nr_cpus: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
pub l_min_nr_cpus: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
pub l_max_nr_cpus: Family<Vec<(String, String)>, Gauge<i64, AtomicI64>>,
}
impl OpenMetricsStats {
pub fn new() -> OpenMetricsStats {
let mut metrics = OpenMetricsStats {
registry: <Registry>::default(),
..Default::default()
};
// Helper macro to reduce on some of the boilerplate:
// $i: The identifier of the metric to register
// $help: The Help text associated with the metric
macro_rules! register {
($i:ident, $help:expr) => {
metrics
.registry
.register(stringify!($i), $help, metrics.$i.clone())
};
}
register!(total, "Total scheduling events in the period");
register!(local, "% that got scheduled directly into an idle CPU");
register!(
open_idle,
"% of open layer tasks scheduled into occupied idle CPUs"
);
register!(
affn_viol,
"% which violated configured policies due to CPU affinity restrictions"
);
register!(
excl_idle,
"Number of times a CPU skipped dispatching due to sibling running an exclusive task"
);
register!(
excl_wakeup,
"Number of times an idle sibling CPU was woken up after an exclusive task is finished"
);
register!(
proc_ms,
"CPU time this binary has consumed during the period"
);
register!(busy, "CPU busy % (100% means all CPUs were fully occupied)");
register!(
util,
"CPU utilization % (100% means one CPU was fully occupied)"
);
register!(load, "Sum of weight * duty_cycle for all tasks");
register!(
l_util,
"CPU utilization of the layer (100% means one CPU was fully occupied)"
);
register!(
l_util_frac,
"Fraction of total CPU utilization consumed by the layer"
);
register!(l_load, "Sum of weight * duty_cycle for tasks in the layer");
register!(l_load_frac, "Fraction of total load consumed by the layer");
register!(l_tasks, "Number of tasks in the layer");
register!(l_total, "Number of scheduling events in the layer");
register!(
l_sel_local,
"% of scheduling events directly into an idle CPU"
);
register!(
l_enq_wakeup,
"% of scheduling events enqueued to layer after wakeup"
);
register!(
l_enq_expire,
"% of scheduling events enqueued to layer after slice expiration"
);
register!(
l_enq_last,
"% of scheduling events enqueued as last runnable task on CPU"
);
register!(
l_enq_reenq,
"% of scheduling events re-enqueued due to RT preemption"
);
register!(
l_min_exec,
"Number of times execution duration was shorter than min_exec_us"
);
register!(
l_min_exec_us,
"Total execution duration extended due to min_exec_us"
);
register!(
l_open_idle,
"% of scheduling events into idle CPUs occupied by other layers"
);
register!(
l_preempt,
"% of scheduling events that preempted other tasks"
);
register!(
l_preempt_first,
"% of scheduling events that first-preempted other tasks"
);
register!(
l_preempt_idle,
"% of scheduling events that idle-preempted other tasks"
);
register!(
l_preempt_fail,
"% of scheduling events that attempted to preempt other tasks but failed"
);
register!(
l_affn_viol,
"% of scheduling events that violated configured policies due to CPU affinity restrictions"
);
register!(
l_keep,
"% of scheduling events that continued executing after slice expiration"
);
register!(
l_keep_fail_max_exec,
"% of scheduling events that weren't allowed to continue executing after slice expiration due to overrunning max_exec duration limit"
);
register!(
l_keep_fail_busy,
"% of scheduling events that weren't allowed to continue executing after slice expiration to accommodate other tasks"
);
register!(
l_excl_collision,
"Number of times an exclusive task skipped a CPU as the sibling was also exclusive"
);
register!(
l_excl_preempt,
"Number of times a sibling CPU was preempted for an exclusive task"
);
register!(
l_kick,
"% of schduling events that kicked a CPU from enqueue path"
);
register!(l_yield, "% of scheduling events that yielded");
register!(l_yield_ignore, "Number of times yield was ignored");
register!(l_migration, "% of scheduling events that migrated across CPUs");
register!(l_cur_nr_cpus, "Current # of CPUs assigned to the layer");
register!(l_min_nr_cpus, "Minimum # of CPUs assigned to the layer");
register!(l_max_nr_cpus, "Maximum # of CPUs assigned to the layer");
metrics
}
}