mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-24 11:50:23 +00:00
Merge pull request #763 from ryantimwilson/rusty-default-weights-fix
[rusty] Fix load stats when host is under-utilized
This commit is contained in:
commit
2b5829e275
@ -83,4 +83,5 @@ pub use log_recorder::LogRecorderBuilder;
|
||||
|
||||
mod misc;
|
||||
pub use misc::monitor_stats;
|
||||
pub use misc::normalize_load_metric;
|
||||
pub use misc::set_rlimit_infinity;
|
||||
|
@ -90,3 +90,25 @@ pub fn read_file_usize(path: &Path) -> Result<usize> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Load is reported as weight * duty cycle
|
||||
*
|
||||
* In the Linux kernel, EEDVF uses default weight = 1 s.t.
|
||||
* load for a nice-0 thread runnable for time slice = 1
|
||||
*
|
||||
* To conform with cgroup weights convention, sched-ext uses
|
||||
* the convention of default weight = 100 with the formula
|
||||
* 100 * nice ^ 1.5. This means load for a nice-0 thread
|
||||
* runnable for time slice = 100.
|
||||
*
|
||||
* To ensure we report load metrics consistently with the Linux
|
||||
* kernel, we divide load by 100.0 prior to reporting metrics.
|
||||
* This is also more intuitive for users since 1 CPU roughly
|
||||
* means 1 unit of load.
|
||||
*
|
||||
* We only do this prior to reporting as its easier to work with
|
||||
* weight as integers in BPF / userspace than floating point.
|
||||
*/
|
||||
pub fn normalize_load_metric(metric: f64) -> f64 {
|
||||
metric / 100.0
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ use log::warn;
|
||||
use scx_stats::prelude::*;
|
||||
use scx_stats_derive::stat_doc;
|
||||
use scx_stats_derive::Stats;
|
||||
use scx_utils::normalize_load_metric;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
@ -182,7 +183,7 @@ impl LayerStats {
|
||||
Self {
|
||||
util: stats.layer_utils[lidx] * 100.0,
|
||||
util_frac: calc_frac(stats.layer_utils[lidx], stats.total_util),
|
||||
load: stats.layer_loads[lidx],
|
||||
load: normalize_load_metric(stats.layer_loads[lidx]),
|
||||
load_adj: calc_frac(stats.layer_load_sums[lidx], stats.total_load_sum),
|
||||
dcycle: calc_frac(stats.layer_dcycle_sums[lidx], stats.total_dcycle_sum),
|
||||
load_frac: calc_frac(stats.layer_loads[lidx], stats.total_load),
|
||||
@ -411,7 +412,7 @@ impl SysStats {
|
||||
proc_ms: stats.processing_dur.as_millis() as u64,
|
||||
busy: stats.cpu_busy * 100.0,
|
||||
util: stats.total_util * 100.0,
|
||||
load: stats.total_load,
|
||||
load: normalize_load_metric(stats.total_load),
|
||||
fallback_cpu: fallback_cpu as u32,
|
||||
layers: BTreeMap::new(),
|
||||
})
|
||||
|
@ -155,6 +155,7 @@ use crate::stats::DomainStats;
|
||||
use crate::stats::NodeStats;
|
||||
use crate::DomainGroup;
|
||||
|
||||
const DEFAULT_WEIGHT: f64 = bpf_intf::consts_LB_DEFAULT_WEIGHT as f64;
|
||||
const RAVG_FRAC_BITS: u32 = bpf_intf::ravg_consts_RAVG_FRAC_BITS;
|
||||
|
||||
fn now_monotonic() -> u64 {
|
||||
@ -446,20 +447,16 @@ impl NumaNode {
|
||||
}
|
||||
|
||||
fn stats(&self) -> NodeStats {
|
||||
let mut stats = NodeStats {
|
||||
load: self.load.load_sum(),
|
||||
imbal: self.load.imbal(),
|
||||
delta: self.load.delta(),
|
||||
doms: BTreeMap::new(),
|
||||
};
|
||||
let mut stats = NodeStats::new(
|
||||
self.load.load_sum(),
|
||||
self.load.imbal(),
|
||||
self.load.delta(),
|
||||
BTreeMap::new(),
|
||||
);
|
||||
for dom in self.domains.iter() {
|
||||
stats.doms.insert(
|
||||
dom.id,
|
||||
DomainStats {
|
||||
load: dom.load.load_sum(),
|
||||
imbal: dom.load.imbal(),
|
||||
delta: dom.load.delta(),
|
||||
},
|
||||
DomainStats::new(dom.load.load_sum(), dom.load.imbal(), dom.load.delta()),
|
||||
);
|
||||
}
|
||||
stats
|
||||
@ -542,8 +539,13 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
|
||||
|
||||
let (dom_loads, total_load) = if !self.lb_apply_weight {
|
||||
(
|
||||
ledger.dom_dcycle_sums().to_vec(),
|
||||
ledger.global_dcycle_sum(),
|
||||
ledger
|
||||
.dom_dcycle_sums()
|
||||
.to_vec()
|
||||
.into_iter()
|
||||
.map(|d| DEFAULT_WEIGHT * d)
|
||||
.collect(),
|
||||
DEFAULT_WEIGHT * ledger.global_dcycle_sum(),
|
||||
)
|
||||
} else {
|
||||
self.infeas_threshold = ledger.effective_max_weight();
|
||||
@ -696,10 +698,12 @@ impl<'a, 'b> LoadBalancer<'a, 'b> {
|
||||
RAVG_FRAC_BITS,
|
||||
);
|
||||
|
||||
if self.lb_apply_weight {
|
||||
let weight = (task_ctx.weight as f64).min(self.infeas_threshold);
|
||||
load *= weight;
|
||||
}
|
||||
let weight = if self.lb_apply_weight {
|
||||
(task_ctx.weight as f64).min(self.infeas_threshold)
|
||||
} else {
|
||||
DEFAULT_WEIGHT
|
||||
};
|
||||
load *= weight;
|
||||
|
||||
dom.tasks.insert(TaskInfo {
|
||||
tptr,
|
||||
|
@ -12,6 +12,7 @@ use chrono::Local;
|
||||
use scx_stats::prelude::*;
|
||||
use scx_stats_derive::stat_doc;
|
||||
use scx_stats_derive::Stats;
|
||||
use scx_utils::normalize_load_metric;
|
||||
use scx_utils::Cpumask;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
@ -39,6 +40,14 @@ pub struct DomainStats {
|
||||
}
|
||||
|
||||
impl DomainStats {
|
||||
pub fn new(load: f64, imbal: f64, delta: f64) -> Self {
|
||||
Self {
|
||||
load: normalize_load_metric(load),
|
||||
imbal: normalize_load_metric(imbal),
|
||||
delta: normalize_load_metric(delta),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn format<W: Write>(&self, w: &mut W, id: usize) -> Result<()> {
|
||||
writeln!(
|
||||
w,
|
||||
@ -67,6 +76,15 @@ pub struct NodeStats {
|
||||
}
|
||||
|
||||
impl NodeStats {
|
||||
pub fn new(load: f64, imbal: f64, delta: f64, doms: BTreeMap<usize, DomainStats>) -> Self {
|
||||
Self {
|
||||
load: normalize_load_metric(load),
|
||||
imbal: normalize_load_metric(imbal),
|
||||
delta: normalize_load_metric(delta),
|
||||
doms,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn format<W: Write>(&self, w: &mut W, id: usize) -> Result<()> {
|
||||
writeln!(
|
||||
w,
|
||||
|
Loading…
Reference in New Issue
Block a user