mirror of
https://github.com/JakeHillion/scx.git
synced 2024-11-26 03:20:24 +00:00
Merge pull request #514 from sched-ext/htejun/scx_stats
scx_stats, scx_layered: Implement independent stats client sessions
This commit is contained in:
commit
3498a2b899
@ -11,6 +11,7 @@ description = "Statistics transport library for sched_ext schedulers"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
crossbeam = "0.8.4"
|
||||
libc = "0.2.137"
|
||||
log = "0.4.17"
|
||||
quote = "1.0"
|
||||
|
@ -90,7 +90,7 @@ The statistics server which serves the above structs through a UNIX domain
|
||||
socket can be launched as follows:
|
||||
|
||||
```rust
|
||||
ScxStatsServer::new()
|
||||
let _server = ScxStatsServer::new()
|
||||
.set_path(&path)
|
||||
.add_stats_meta(ClusterStats::meta())
|
||||
.add_stats_meta(DomainStats::meta())
|
||||
@ -111,9 +111,9 @@ return `serde_json::Value`. Note that `scx_stats::ToJson` automatically adds
|
||||
`.to_json()` to structs which implement both `scx_stats::Meta` and
|
||||
`serde::Serialize`.
|
||||
|
||||
The above will launch the statistics server listening on `@path`. The client
|
||||
side is also simple. Taken from
|
||||
[`examples/client.rs`](./examples/client.rs):
|
||||
The above will launch the statistics server listening on `@path`. Note that
|
||||
the server will shutdown when `_server` variable is dropped. The client side
|
||||
is also simple. Taken from [`examples/client.rs`](./examples/client.rs):
|
||||
|
||||
```rust
|
||||
let mut client = ScxStatsClient::new().set_path(path).connect().unwrap();
|
||||
|
@ -41,6 +41,8 @@ fn main() {
|
||||
println!("{:#?}", &resp);
|
||||
|
||||
println!("\n===== Requesting \"stats_meta\" but receiving with serde_json::Value:");
|
||||
let resp = client.request::<serde_json::Value>("stats_meta", vec![]).unwrap();
|
||||
let resp = client
|
||||
.request::<serde_json::Value>("stats_meta", vec![])
|
||||
.unwrap();
|
||||
println!("{}", serde_json::to_string_pretty(&resp).unwrap());
|
||||
}
|
||||
|
@ -1,33 +1,41 @@
|
||||
use scx_stats::{ScxStatsServer, Meta, ToJson};
|
||||
use log::{debug, warn};
|
||||
use scx_stats::{Meta, ScxStatsServer, ToJson};
|
||||
use scx_stats_derive::Stats;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::BTreeMap;
|
||||
use std::env::args;
|
||||
use std::io::Read;
|
||||
use std::thread::{current, spawn, ThreadId};
|
||||
|
||||
// Hacky definition sharing. See stats_def.rs.h.
|
||||
include!("stats_defs.rs.h");
|
||||
|
||||
fn main() {
|
||||
simple_logger::SimpleLogger::new()
|
||||
.with_level(log::LevelFilter::Info)
|
||||
.env()
|
||||
.init()
|
||||
.unwrap();
|
||||
|
||||
let stats = ClusterStats {
|
||||
name: "test cluster".into(),
|
||||
at: 12345,
|
||||
bitmap: vec![0xdeadbeef, 0xbeefdead],
|
||||
bitmap: vec![0xdeadbeef, 0xbeefdead],
|
||||
doms_dict: BTreeMap::from([
|
||||
(
|
||||
0,
|
||||
DomainStats {
|
||||
name: "domain 0".into(),
|
||||
events: 1234,
|
||||
pressure: 1.234,
|
||||
events: 1234,
|
||||
pressure: 1.234,
|
||||
},
|
||||
),
|
||||
(
|
||||
3,
|
||||
DomainStats {
|
||||
name: "domain 3".into(),
|
||||
events: 5678,
|
||||
pressure: 5.678,
|
||||
events: 5678,
|
||||
pressure: 5.678,
|
||||
},
|
||||
),
|
||||
]),
|
||||
@ -36,14 +44,38 @@ fn main() {
|
||||
std::assert_eq!(args().len(), 2, "Usage: server UNIX_SOCKET_PATH");
|
||||
let path = args().nth(1).unwrap();
|
||||
|
||||
ScxStatsServer::new()
|
||||
// If communication from the stats generating closure is not necessary,
|
||||
// ScxStatsServer::<(), ()> can be used. This example sends thread ID
|
||||
// and receives the formatted string just for demonstration.
|
||||
let server = ScxStatsServer::<ThreadId, String>::new()
|
||||
.set_path(&path)
|
||||
.add_stats_meta(ClusterStats::meta())
|
||||
.add_stats_meta(DomainStats::meta())
|
||||
.add_stats("top", Box::new(move |_| stats.to_json()))
|
||||
.add_stats(
|
||||
"top",
|
||||
Box::new(move |_args, (tx, rx)| {
|
||||
let id = current().id();
|
||||
let res = tx.send(id);
|
||||
debug!("Sendt {:?} {:?}", id, &res);
|
||||
let res = rx.recv();
|
||||
debug!("Recevied {:?}", res);
|
||||
stats.to_json()
|
||||
}),
|
||||
)
|
||||
.launch()
|
||||
.unwrap();
|
||||
|
||||
debug!("Doing unnecessary server channel handling");
|
||||
let (tx, rx) = server.channels();
|
||||
spawn(move || {
|
||||
while let Ok(id) = rx.recv() {
|
||||
if let Err(e) = tx.send(format!("hello {:?}", &id)) {
|
||||
warn!("Server channel errored ({:?})", &e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
println!(
|
||||
"Server listening. Run `client {:?}`.\n\
|
||||
Use `socat - UNIX-CONNECT:{:?}` for raw connection.\n\
|
||||
|
@ -7,7 +7,10 @@ pub use stats::{
|
||||
};
|
||||
|
||||
mod server;
|
||||
pub use server::{ScxStatsErrno, ScxStatsRequest, ScxStatsResponse, ScxStatsServer, ToJson};
|
||||
pub use server::{
|
||||
ScxStatsErrno, ScxStatsOps, ScxStatsRequest, ScxStatsResponse, ScxStatsServer, StatsCloser,
|
||||
StatsOpener, StatsReader, StatsReaderSend, StatsReaderSync, ToJson,
|
||||
};
|
||||
|
||||
mod client;
|
||||
pub use client::ScxStatsClient;
|
||||
|
@ -1,18 +1,104 @@
|
||||
use crate::ScxStatsClient;
|
||||
use crate::{Meta, ScxStatsMeta};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use log::warn;
|
||||
use crossbeam::channel::{unbounded, Receiver, RecvError, Select, SendError, Sender};
|
||||
use log::{debug, error, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::collections::BTreeMap;
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::os::unix::net::{UnixListener, UnixStream};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread::spawn;
|
||||
|
||||
type StatMap = BTreeMap<
|
||||
String,
|
||||
Arc<Mutex<Box<dyn FnMut(&BTreeMap<String, String>) -> Result<serde_json::Value> + Send>>>,
|
||||
>;
|
||||
pub trait StatsReader<Req, Res>:
|
||||
FnMut(&BTreeMap<String, String>, (&Sender<Req>, &Receiver<Res>)) -> Result<Value>
|
||||
{
|
||||
}
|
||||
impl<
|
||||
Req,
|
||||
Res,
|
||||
T: FnMut(&BTreeMap<String, String>, (&Sender<Req>, &Receiver<Res>)) -> Result<Value>,
|
||||
> StatsReader<Req, Res> for T
|
||||
{
|
||||
}
|
||||
|
||||
pub trait StatsReaderSend<Req, Res>:
|
||||
FnMut(&BTreeMap<String, String>, (&Sender<Req>, &Receiver<Res>)) -> Result<Value> + Send
|
||||
{
|
||||
}
|
||||
impl<
|
||||
Req,
|
||||
Res,
|
||||
T: FnMut(&BTreeMap<String, String>, (&Sender<Req>, &Receiver<Res>)) -> Result<Value> + Send,
|
||||
> StatsReaderSend<Req, Res> for T
|
||||
{
|
||||
}
|
||||
|
||||
pub trait StatsReaderSync<Req, Res>:
|
||||
Fn(&BTreeMap<String, String>, (&Sender<Req>, &Receiver<Res>)) -> Result<Value> + Send + Sync
|
||||
{
|
||||
}
|
||||
impl<
|
||||
Req,
|
||||
Res,
|
||||
T: Fn(&BTreeMap<String, String>, (&Sender<Req>, &Receiver<Res>)) -> Result<Value>
|
||||
+ Send
|
||||
+ Sync,
|
||||
> StatsReaderSync<Req, Res> for T
|
||||
{
|
||||
}
|
||||
|
||||
pub trait StatsOpener<Req, Res>:
|
||||
FnMut((&Sender<Req>, &Receiver<Res>)) -> Result<Box<dyn StatsReader<Req, Res>>> + Send
|
||||
{
|
||||
}
|
||||
impl<
|
||||
Req,
|
||||
Res,
|
||||
T: FnMut((&Sender<Req>, &Receiver<Res>)) -> Result<Box<dyn StatsReader<Req, Res>>> + Send,
|
||||
> StatsOpener<Req, Res> for T
|
||||
{
|
||||
}
|
||||
|
||||
pub trait StatsCloser<Req, Res>: FnOnce((&Sender<Req>, &Receiver<Res>)) + Send {}
|
||||
impl<Req, Res, T: FnOnce((&Sender<Req>, &Receiver<Res>)) + Send> StatsCloser<Req, Res> for T {}
|
||||
|
||||
pub struct ScxStatsOps<Req, Res> {
|
||||
pub open: Box<dyn StatsOpener<Req, Res>>,
|
||||
pub close: Option<Box<dyn StatsCloser<Req, Res>>>,
|
||||
}
|
||||
|
||||
struct ScxStatsOpenOps<Req, Res> {
|
||||
map: BTreeMap<
|
||||
String,
|
||||
(
|
||||
Arc<Mutex<ScxStatsOps<Req, Res>>>,
|
||||
Box<dyn StatsReader<Req, Res>>,
|
||||
ChannelPair<Req, Res>,
|
||||
),
|
||||
>,
|
||||
}
|
||||
|
||||
impl<Req, Res> ScxStatsOpenOps<Req, Res> {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
map: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Req, Res> std::ops::Drop for ScxStatsOpenOps<Req, Res> {
|
||||
fn drop(&mut self) {
|
||||
for (_, (ops, _, ch)) in self.map.iter_mut() {
|
||||
if let Some(close) = ops.lock().unwrap().close.take() {
|
||||
close((&ch.req, &ch.res));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct ScxStatsRequest {
|
||||
@ -33,7 +119,7 @@ impl ScxStatsRequest {
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct ScxStatsResponse {
|
||||
pub errno: i32,
|
||||
pub args: BTreeMap<String, serde_json::Value>,
|
||||
pub args: BTreeMap<String, Value>,
|
||||
}
|
||||
|
||||
pub struct ScxStatsErrno(pub i32);
|
||||
@ -50,25 +136,72 @@ impl std::fmt::Debug for ScxStatsErrno {
|
||||
}
|
||||
}
|
||||
|
||||
struct ScxStatsServerData {
|
||||
struct ChannelPair<Req, Res> {
|
||||
req: Sender<Req>,
|
||||
res: Receiver<Res>,
|
||||
}
|
||||
|
||||
impl<Req, Res> ChannelPair<Req, Res> {
|
||||
fn bidi() -> (ChannelPair<Req, Res>, ChannelPair<Res, Req>) {
|
||||
let (req, res) = (unbounded::<Req>(), unbounded::<Res>());
|
||||
(
|
||||
ChannelPair {
|
||||
req: req.0,
|
||||
res: res.1,
|
||||
},
|
||||
ChannelPair {
|
||||
req: res.0,
|
||||
res: req.1,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Req, Res> Clone for ChannelPair<Req, Res> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
req: self.req.clone(),
|
||||
res: self.res.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ScxStatsServerData<Req, Res> {
|
||||
stats_meta: BTreeMap<String, ScxStatsMeta>,
|
||||
stats: StatMap,
|
||||
stats_ops: BTreeMap<String, Arc<Mutex<ScxStatsOps<Req, Res>>>>,
|
||||
}
|
||||
|
||||
struct ScxStatsServerInner {
|
||||
struct ScxStatsServerInner<Req, Res>
|
||||
where
|
||||
Req: Send + 'static,
|
||||
Res: Send + 'static,
|
||||
{
|
||||
listener: UnixListener,
|
||||
data: Arc<Mutex<ScxStatsServerData>>,
|
||||
data: Arc<Mutex<ScxStatsServerData<Req, Res>>>,
|
||||
inner_ch: ChannelPair<Req, Res>,
|
||||
exit: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl ScxStatsServerInner {
|
||||
impl<Req, Res> ScxStatsServerInner<Req, Res>
|
||||
where
|
||||
Req: Send + 'static,
|
||||
Res: Send + 'static,
|
||||
{
|
||||
fn new(
|
||||
listener: UnixListener,
|
||||
stats_meta: BTreeMap<String, ScxStatsMeta>,
|
||||
stats: StatMap,
|
||||
stats_ops: BTreeMap<String, Arc<Mutex<ScxStatsOps<Req, Res>>>>,
|
||||
inner_ch: ChannelPair<Req, Res>,
|
||||
exit: Arc<AtomicBool>,
|
||||
) -> Self {
|
||||
Self {
|
||||
listener,
|
||||
data: Arc::new(Mutex::new(ScxStatsServerData { stats_meta, stats })),
|
||||
data: Arc::new(Mutex::new(ScxStatsServerData {
|
||||
stats_meta,
|
||||
stats_ops,
|
||||
})),
|
||||
inner_ch,
|
||||
exit,
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,7 +219,9 @@ impl ScxStatsServerInner {
|
||||
|
||||
fn handle_request(
|
||||
line: String,
|
||||
data: &Arc<Mutex<ScxStatsServerData>>,
|
||||
data: &Arc<Mutex<ScxStatsServerData<Req, Res>>>,
|
||||
ch: &ChannelPair<Req, Res>,
|
||||
open_ops: &mut ScxStatsOpenOps<Req, Res>,
|
||||
) -> Result<ScxStatsResponse> {
|
||||
let req: ScxStatsRequest = serde_json::from_str(&line)?;
|
||||
|
||||
@ -97,13 +232,22 @@ impl ScxStatsServerInner {
|
||||
None => "top",
|
||||
};
|
||||
|
||||
let handler = match data.lock().unwrap().stats.get(target) {
|
||||
let ops = match data.lock().unwrap().stats_ops.get(target) {
|
||||
Some(v) => v.clone(),
|
||||
None => Err(anyhow!("unknown stat target {:?}", req)
|
||||
.context(ScxStatsErrno(libc::EINVAL)))?,
|
||||
};
|
||||
|
||||
let resp = handler.lock().unwrap()(&req.args)?;
|
||||
if !open_ops.map.contains_key(target) {
|
||||
let read = (ops.lock().unwrap().open)((&ch.req, &ch.res))?;
|
||||
open_ops
|
||||
.map
|
||||
.insert(target.into(), (ops.clone(), read, ch.clone()));
|
||||
}
|
||||
|
||||
let read = &mut open_ops.map.get_mut(target).unwrap().1;
|
||||
|
||||
let resp = read(&req.args, (&ch.req, &ch.res))?;
|
||||
|
||||
Self::build_resp(0, &resp)
|
||||
}
|
||||
@ -112,17 +256,27 @@ impl ScxStatsServerInner {
|
||||
}
|
||||
}
|
||||
|
||||
fn serve(mut stream: UnixStream, data: Arc<Mutex<ScxStatsServerData>>) -> Result<()> {
|
||||
let mut reader = BufReader::new(stream.try_clone()?);
|
||||
fn serve(
|
||||
mut stream: UnixStream,
|
||||
data: Arc<Mutex<ScxStatsServerData<Req, Res>>>,
|
||||
inner_ch: ChannelPair<Req, Res>,
|
||||
exit: Arc<AtomicBool>,
|
||||
) -> Result<()> {
|
||||
let mut stream_reader = BufReader::new(stream.try_clone()?);
|
||||
let mut open_ops = ScxStatsOpenOps::new();
|
||||
|
||||
loop {
|
||||
let mut line = String::new();
|
||||
reader.read_line(&mut line)?;
|
||||
stream_reader.read_line(&mut line)?;
|
||||
if line.len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
if exit.load(Ordering::Relaxed) {
|
||||
debug!("server exiting due to exit");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let resp = match Self::handle_request(line, &data) {
|
||||
let resp = match Self::handle_request(line, &data, &inner_ch, &mut open_ops) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
let errno = match e.downcast_ref::<ScxStatsErrno>() {
|
||||
@ -138,37 +292,151 @@ impl ScxStatsServerInner {
|
||||
}
|
||||
}
|
||||
|
||||
fn listen(self) {
|
||||
loop {
|
||||
for stream in self.listener.incoming() {
|
||||
match stream {
|
||||
Ok(stream) => {
|
||||
let data = self.data.clone();
|
||||
spawn(move || {
|
||||
if let Err(e) = Self::serve(stream, data) {
|
||||
warn!("stat communication errored ({})", &e);
|
||||
fn proxy(inner_ch: ChannelPair<Req, Res>, add_res: Receiver<ChannelPair<Res, Req>>) {
|
||||
let mut chs_cursor = 0;
|
||||
let mut chs = BTreeMap::<u64, ChannelPair<Res, Req>>::new();
|
||||
let mut ch_to_add: Option<ChannelPair<Res, Req>> = None;
|
||||
let mut idx_to_drop: Option<u64> = None;
|
||||
|
||||
'outer: loop {
|
||||
if let Some(new_ch) = ch_to_add.take() {
|
||||
let idx = chs_cursor;
|
||||
chs_cursor += 1;
|
||||
chs.insert(idx, new_ch);
|
||||
debug!("proxy: added new channel idx={}, total={}", idx, chs.len());
|
||||
}
|
||||
|
||||
if let Some(idx) = idx_to_drop.take() {
|
||||
debug!("proxy: dropping channel {}, total={}", idx, chs.len());
|
||||
chs.remove(&idx).unwrap();
|
||||
}
|
||||
|
||||
let mut sel = Select::new();
|
||||
let inner_idx = sel.recv(&inner_ch.res);
|
||||
let add_idx = sel.recv(&add_res);
|
||||
|
||||
let mut chs_sel_idx = BTreeMap::<usize, u64>::new();
|
||||
for (idx, cp) in chs.iter() {
|
||||
let sel_idx = sel.recv(&cp.res);
|
||||
chs_sel_idx.insert(sel_idx, *idx);
|
||||
}
|
||||
|
||||
'select: loop {
|
||||
let oper = sel.select();
|
||||
match oper.index() {
|
||||
sel_idx if sel_idx == add_idx => match oper.recv(&add_res) {
|
||||
Ok(ch) => {
|
||||
ch_to_add = Some(ch);
|
||||
debug!("proxy: received new channel from add_res");
|
||||
break 'select;
|
||||
}
|
||||
Err(RecvError) => {
|
||||
debug!("proxy: add_res disconnected, terminating");
|
||||
break 'outer;
|
||||
}
|
||||
},
|
||||
sel_idx if sel_idx == inner_idx => match oper.recv(&inner_ch.res) {
|
||||
Ok(_) => {
|
||||
error!("proxy: unexpected data in ScxStatsServer.channels().0");
|
||||
panic!();
|
||||
}
|
||||
Err(RecvError) => break 'outer,
|
||||
},
|
||||
sel_idx => {
|
||||
let idx = chs_sel_idx.get(&sel_idx).unwrap();
|
||||
let pair = chs.get(idx).unwrap();
|
||||
|
||||
let req = match oper.recv(&pair.res) {
|
||||
Ok(v) => v,
|
||||
Err(RecvError) => {
|
||||
idx_to_drop = Some(*idx);
|
||||
break 'select;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
match inner_ch.req.send(req) {
|
||||
Ok(()) => {}
|
||||
Err(SendError(..)) => break 'outer,
|
||||
}
|
||||
|
||||
let resp = match inner_ch.res.recv() {
|
||||
Ok(v) => v,
|
||||
Err(RecvError) => break 'outer,
|
||||
};
|
||||
|
||||
match pair.req.send(resp) {
|
||||
Ok(()) => {}
|
||||
Err(SendError(..)) => {
|
||||
idx_to_drop = Some(*idx);
|
||||
break 'select;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => warn!("failed to accept stat connection ({})", &e),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn listen(self) {
|
||||
let inner_ch_copy = self.inner_ch.clone();
|
||||
let (add_req, add_res) = unbounded::<ChannelPair<Res, Req>>();
|
||||
|
||||
spawn(move || Self::proxy(inner_ch_copy, add_res));
|
||||
|
||||
for stream in self.listener.incoming() {
|
||||
if self.exit.load(Ordering::Relaxed) {
|
||||
debug!("listener exiting");
|
||||
break;
|
||||
}
|
||||
match stream {
|
||||
Ok(stream) => {
|
||||
let data = self.data.clone();
|
||||
let exit = self.exit.clone();
|
||||
|
||||
let (req_pair, res_pair) = ChannelPair::<Req, Res>::bidi();
|
||||
match add_req.send(res_pair) {
|
||||
Ok(()) => debug!("sent new channel to proxy"),
|
||||
Err(e) => warn!("ScxStatsServer::proxy() failed ({})", &e),
|
||||
}
|
||||
|
||||
spawn(move || {
|
||||
if let Err(e) = Self::serve(stream, data, req_pair, exit) {
|
||||
warn!("stat communication errored ({})", &e);
|
||||
}
|
||||
});
|
||||
}
|
||||
Err(e) => warn!("failed to accept stat connection ({})", &e),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ScxStatsServer {
|
||||
pub struct ScxStatsServer<Req, Res>
|
||||
where
|
||||
Req: Send + 'static,
|
||||
Res: Send + 'static,
|
||||
{
|
||||
base_path: PathBuf,
|
||||
sched_path: PathBuf,
|
||||
stats_path: PathBuf,
|
||||
path: Option<PathBuf>,
|
||||
|
||||
stats_meta_holder: BTreeMap<String, ScxStatsMeta>,
|
||||
stats_holder: StatMap,
|
||||
stats_ops_holder: BTreeMap<String, Arc<Mutex<ScxStatsOps<Req, Res>>>>,
|
||||
|
||||
outer_ch: ChannelPair<Res, Req>,
|
||||
inner_ch: Option<ChannelPair<Req, Res>>,
|
||||
exit: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl ScxStatsServer {
|
||||
impl<Req, Res> ScxStatsServer<Req, Res>
|
||||
where
|
||||
Req: Send + 'static,
|
||||
Res: Send + 'static,
|
||||
{
|
||||
pub fn new() -> Self {
|
||||
let (ich, och) = ChannelPair::<Req, Res>::bidi();
|
||||
|
||||
Self {
|
||||
base_path: PathBuf::from("/var/run/scx"),
|
||||
sched_path: PathBuf::from("root"),
|
||||
@ -176,7 +444,11 @@ impl ScxStatsServer {
|
||||
path: None,
|
||||
|
||||
stats_meta_holder: BTreeMap::new(),
|
||||
stats_holder: BTreeMap::new(),
|
||||
stats_ops_holder: BTreeMap::new(),
|
||||
|
||||
outer_ch: och,
|
||||
inner_ch: Some(ich),
|
||||
exit: Arc::new(AtomicBool::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -185,16 +457,28 @@ impl ScxStatsServer {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_stats(
|
||||
mut self,
|
||||
name: &str,
|
||||
fetch: Box<dyn FnMut(&BTreeMap<String, String>) -> Result<serde_json::Value> + Send>,
|
||||
) -> Self {
|
||||
self.stats_holder
|
||||
.insert(name.to_string(), Arc::new(Mutex::new(Box::new(fetch))));
|
||||
pub fn add_stats_ops(mut self, name: &str, ops: ScxStatsOps<Req, Res>) -> Self {
|
||||
self.stats_ops_holder
|
||||
.insert(name.to_string(), Arc::new(Mutex::new(ops)));
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_stats(self, name: &str, fetch: Box<dyn StatsReaderSend<Req, Res>>) -> Self {
|
||||
let wrapped_fetch = Mutex::new(fetch);
|
||||
let read: Box<dyn StatsReaderSync<Req, Res>> =
|
||||
Box::new(move |args, chan| wrapped_fetch.lock().unwrap()(args, chan));
|
||||
let wrapped_read = Arc::new(read);
|
||||
let ops = ScxStatsOps {
|
||||
open: Box::new(move |_| {
|
||||
let copy = wrapped_read.clone();
|
||||
Ok(Box::new(move |args, chan| copy(args, chan)))
|
||||
}),
|
||||
close: None,
|
||||
};
|
||||
|
||||
self.add_stats_ops(name, ops)
|
||||
}
|
||||
|
||||
pub fn set_base_path<P: AsRef<Path>>(mut self, path: P) -> Self {
|
||||
self.base_path = PathBuf::from(path.as_ref());
|
||||
self
|
||||
@ -238,24 +522,47 @@ impl ScxStatsServer {
|
||||
let mut stats_meta = BTreeMap::new();
|
||||
let mut stats = BTreeMap::new();
|
||||
std::mem::swap(&mut stats_meta, &mut self.stats_meta_holder);
|
||||
std::mem::swap(&mut stats, &mut self.stats_holder);
|
||||
std::mem::swap(&mut stats, &mut self.stats_ops_holder);
|
||||
|
||||
let inner = ScxStatsServerInner::new(listener, stats_meta, stats);
|
||||
let inner = ScxStatsServerInner::new(
|
||||
listener,
|
||||
stats_meta,
|
||||
stats,
|
||||
self.inner_ch.take().unwrap(),
|
||||
self.exit.clone(),
|
||||
);
|
||||
|
||||
spawn(move || inner.listen());
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
pub fn channels(&self) -> (Sender<Res>, Receiver<Req>) {
|
||||
(self.outer_ch.req.clone(), self.outer_ch.res.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl<Req, Res> std::ops::Drop for ScxStatsServer<Req, Res>
|
||||
where
|
||||
Req: Send + 'static,
|
||||
Res: Send + 'static,
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
self.exit.store(true, Ordering::Relaxed);
|
||||
if let Some(path) = self.path.as_ref() {
|
||||
let _ = ScxStatsClient::new().set_path(path).connect();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ToJson {
|
||||
fn to_json(&self) -> Result<serde_json::Value>;
|
||||
fn to_json(&self) -> Result<Value>;
|
||||
}
|
||||
|
||||
impl<T> ToJson for T
|
||||
where
|
||||
T: Meta + Serialize,
|
||||
{
|
||||
fn to_json(&self) -> Result<serde_json::Value> {
|
||||
fn to_json(&self) -> Result<Value> {
|
||||
Ok(serde_json::to_value(self)?)
|
||||
}
|
||||
}
|
||||
|
43
scheds/rust/scx_layered/Cargo.lock
generated
43
scheds/rust/scx_layered/Cargo.lock
generated
@ -338,6 +338,38 @@ version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-queue",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
|
||||
dependencies = [
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.18"
|
||||
@ -347,6 +379,15 @@ dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-queue"
|
||||
version = "0.3.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.20"
|
||||
@ -988,6 +1029,7 @@ dependencies = [
|
||||
"bitvec",
|
||||
"chrono",
|
||||
"clap",
|
||||
"crossbeam",
|
||||
"ctrlc",
|
||||
"fb_procfs",
|
||||
"lazy_static",
|
||||
@ -1007,6 +1049,7 @@ name = "scx_stats"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"crossbeam",
|
||||
"libc",
|
||||
"log",
|
||||
"quote",
|
||||
|
@ -11,6 +11,7 @@ anyhow = "1.0.65"
|
||||
bitvec = "1.0"
|
||||
chrono = "0.4"
|
||||
clap = { version = "4.1", features = ["derive", "env", "unicode", "wrap_help"] }
|
||||
crossbeam = "0.8.4"
|
||||
ctrlc = { version = "3.1", features = ["termination"] }
|
||||
fb_procfs = "0.7"
|
||||
lazy_static = "1.4"
|
||||
|
@ -7,9 +7,12 @@ mod stats;
|
||||
pub use bpf_skel::*;
|
||||
pub mod bpf_intf;
|
||||
use stats::LayerStats;
|
||||
use stats::StatsReq;
|
||||
use stats::StatsRes;
|
||||
use stats::SysStats;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CString;
|
||||
use std::fs;
|
||||
use std::io::Read;
|
||||
@ -19,7 +22,8 @@ use std::ops::Sub;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::thread::spawn;
|
||||
use std::thread::ThreadId;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
@ -30,6 +34,7 @@ use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use bitvec::prelude::*;
|
||||
use clap::Parser;
|
||||
use crossbeam::channel::RecvTimeoutError;
|
||||
use libbpf_rs::skel::OpenSkel;
|
||||
use libbpf_rs::skel::Skel;
|
||||
use libbpf_rs::skel::SkelBuilder;
|
||||
@ -39,6 +44,7 @@ use log::debug;
|
||||
use log::info;
|
||||
use log::trace;
|
||||
use log::warn;
|
||||
use scx_stats::ScxStatsServer;
|
||||
use scx_utils::compat;
|
||||
use scx_utils::init_libbpf_logging;
|
||||
use scx_utils::ravg::ravg_read;
|
||||
@ -234,10 +240,12 @@ lazy_static::lazy_static! {
|
||||
/// Monitoring Statistics
|
||||
/// =====================
|
||||
///
|
||||
/// scx_stat server will listen on /var/run/scx/root/stat. Monitor
|
||||
/// statistics by runing `scx_layered monitor`.
|
||||
/// Run with `--monitor INTERVAL` added to enable stats monitoring. There is
|
||||
/// also scx_stat server listening on /var/run/scx/root/stat and you can
|
||||
/// monitor statistics by running `scx_layered --monitor INTERVAL`
|
||||
/// separately.
|
||||
///
|
||||
/// $ scx_layered --monitor
|
||||
/// $ scx_layered --monitor 1
|
||||
/// tot= 117909 local=86.20 open_idle= 0.21 affn_viol= 1.37 proc=6ms
|
||||
/// busy= 34.2 util= 1733.6 load= 21744.1 fallback_cpu= 1
|
||||
/// batch : util/frac= 11.8/ 0.7 load/frac= 29.7: 0.1 tasks= 2597
|
||||
@ -307,10 +315,6 @@ struct Opts {
|
||||
#[clap(short = 'i', long, default_value = "0.1")]
|
||||
interval: f64,
|
||||
|
||||
/// Statistics interval in seconds.
|
||||
#[clap(short = 'I', long, default_value = "2.0")]
|
||||
stats_interval: f64,
|
||||
|
||||
/// Disable load-fraction based max layer CPU limit. ***NOTE***
|
||||
/// load-fraction calculation is currently broken due to lack of
|
||||
/// infeasible weight adjustments. Setting this option is recommended.
|
||||
@ -335,9 +339,10 @@ struct Opts {
|
||||
#[clap(short = 'e', long)]
|
||||
example: Option<String>,
|
||||
|
||||
/// Run in monitor mode.
|
||||
/// Enable stats monitoring with the specified interval. If no layer
|
||||
/// specs are specified, run in monitor mode.
|
||||
#[clap(long)]
|
||||
monitor: bool,
|
||||
monitor: Option<f64>,
|
||||
|
||||
/// Layer specification. See --help.
|
||||
specs: Vec<String>,
|
||||
@ -609,6 +614,7 @@ impl<'a, 'b> Sub<&'b BpfStats> for &'a BpfStats {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct Stats {
|
||||
nr_layers: usize,
|
||||
at: Instant,
|
||||
@ -627,6 +633,9 @@ struct Stats {
|
||||
|
||||
bpf_stats: BpfStats,
|
||||
prev_bpf_stats: BpfStats,
|
||||
|
||||
processing_dur: Duration,
|
||||
prev_processing_dur: Duration,
|
||||
}
|
||||
|
||||
impl Stats {
|
||||
@ -668,7 +677,8 @@ impl Stats {
|
||||
|
||||
fn new(skel: &mut BpfSkel, proc_reader: &procfs::ProcReader) -> Result<Self> {
|
||||
let nr_layers = skel.maps.rodata_data.nr_layers as usize;
|
||||
let bpf_stats = BpfStats::read(&read_cpu_ctxs(skel)?, nr_layers);
|
||||
let cpu_ctxs = read_cpu_ctxs(skel)?;
|
||||
let bpf_stats = BpfStats::read(&cpu_ctxs, nr_layers);
|
||||
|
||||
Ok(Self {
|
||||
at: Instant::now(),
|
||||
@ -681,13 +691,16 @@ impl Stats {
|
||||
|
||||
total_util: 0.0,
|
||||
layer_utils: vec![0.0; nr_layers],
|
||||
prev_layer_cycles: vec![0; nr_layers],
|
||||
prev_layer_cycles: Self::read_layer_cycles(&cpu_ctxs, nr_layers),
|
||||
|
||||
cpu_busy: 0.0,
|
||||
prev_total_cpu: read_total_cpu(&proc_reader)?,
|
||||
|
||||
bpf_stats: bpf_stats.clone(),
|
||||
prev_bpf_stats: bpf_stats,
|
||||
|
||||
processing_dur: Default::default(),
|
||||
prev_processing_dur: Default::default(),
|
||||
})
|
||||
}
|
||||
|
||||
@ -696,6 +709,7 @@ impl Stats {
|
||||
skel: &mut BpfSkel,
|
||||
proc_reader: &procfs::ProcReader,
|
||||
now: Instant,
|
||||
cur_processing_dur: Duration,
|
||||
) -> Result<()> {
|
||||
let elapsed = now.duration_since(self.at).as_secs_f64() as f64;
|
||||
let cpu_ctxs = read_cpu_ctxs(skel)?;
|
||||
@ -732,6 +746,10 @@ impl Stats {
|
||||
let cur_bpf_stats = BpfStats::read(&cpu_ctxs, self.nr_layers);
|
||||
let bpf_stats = &cur_bpf_stats - &self.prev_bpf_stats;
|
||||
|
||||
let processing_dur = cur_processing_dur
|
||||
.checked_sub(self.prev_processing_dur)
|
||||
.unwrap();
|
||||
|
||||
*self = Self {
|
||||
at: now,
|
||||
nr_layers: self.nr_layers,
|
||||
@ -750,6 +768,9 @@ impl Stats {
|
||||
|
||||
bpf_stats,
|
||||
prev_bpf_stats: cur_bpf_stats,
|
||||
|
||||
processing_dur,
|
||||
prev_processing_dur: cur_processing_dur,
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
@ -1225,7 +1246,6 @@ struct Scheduler<'a, 'b> {
|
||||
layer_specs: &'b Vec<LayerSpec>,
|
||||
|
||||
sched_intv: Duration,
|
||||
stats_intv: Duration,
|
||||
no_load_frac_limit: bool,
|
||||
|
||||
cpu_pool: CpuPool,
|
||||
@ -1233,13 +1253,11 @@ struct Scheduler<'a, 'b> {
|
||||
|
||||
proc_reader: procfs::ProcReader,
|
||||
sched_stats: Stats,
|
||||
report_stats: Stats,
|
||||
|
||||
nr_layer_cpus_min_max: Vec<(usize, usize)>,
|
||||
nr_layer_cpus_ranges: Vec<(usize, usize)>,
|
||||
processing_dur: Duration,
|
||||
prev_processing_dur: Duration,
|
||||
|
||||
sys_stats: Arc<Mutex<SysStats>>,
|
||||
stats_server: ScxStatsServer<StatsReq, StatsRes>,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
@ -1448,33 +1466,6 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
|
||||
// Other stuff.
|
||||
let proc_reader = procfs::ProcReader::new();
|
||||
let sys_stats = Arc::new(Mutex::new(SysStats::default()));
|
||||
|
||||
let mut sched = Self {
|
||||
struct_ops: None,
|
||||
layer_specs,
|
||||
|
||||
sched_intv: Duration::from_secs_f64(opts.interval),
|
||||
stats_intv: Duration::from_secs_f64(opts.stats_interval),
|
||||
no_load_frac_limit: opts.no_load_frac_limit,
|
||||
|
||||
cpu_pool,
|
||||
layers,
|
||||
|
||||
sched_stats: Stats::new(&mut skel, &proc_reader)?,
|
||||
report_stats: Stats::new(&mut skel, &proc_reader)?,
|
||||
|
||||
nr_layer_cpus_min_max: vec![(0, 0); nr_layers],
|
||||
processing_dur: Duration::from_millis(0),
|
||||
prev_processing_dur: Duration::from_millis(0),
|
||||
|
||||
proc_reader,
|
||||
skel,
|
||||
|
||||
sys_stats: sys_stats.clone(),
|
||||
};
|
||||
|
||||
stats::launch_server(sys_stats)?;
|
||||
|
||||
// XXX If we try to refresh the cpumasks here before attaching, we
|
||||
// sometimes (non-deterministically) don't see the updated values in
|
||||
@ -1483,7 +1474,30 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
// huge problem in the interim until we figure it out.
|
||||
|
||||
// Attach.
|
||||
sched.struct_ops = Some(scx_ops_attach!(sched.skel, layered)?);
|
||||
let struct_ops = scx_ops_attach!(skel, layered)?;
|
||||
let stats_server = stats::launch_server()?;
|
||||
|
||||
let sched = Self {
|
||||
struct_ops: Some(struct_ops),
|
||||
layer_specs,
|
||||
|
||||
sched_intv: Duration::from_secs_f64(opts.interval),
|
||||
no_load_frac_limit: opts.no_load_frac_limit,
|
||||
|
||||
cpu_pool,
|
||||
layers,
|
||||
|
||||
sched_stats: Stats::new(&mut skel, &proc_reader)?,
|
||||
|
||||
nr_layer_cpus_ranges: vec![(0, 0); nr_layers],
|
||||
processing_dur: Default::default(),
|
||||
|
||||
proc_reader,
|
||||
skel,
|
||||
|
||||
stats_server,
|
||||
};
|
||||
|
||||
info!("Layered Scheduler Attached. Run `scx_layered --monitor` for metrics.");
|
||||
|
||||
Ok(sched)
|
||||
@ -1565,9 +1579,9 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
self.skel.maps.bss_data.fallback_cpu = self.cpu_pool.fallback_cpu as u32;
|
||||
|
||||
for (lidx, layer) in self.layers.iter().enumerate() {
|
||||
self.nr_layer_cpus_min_max[lidx] = (
|
||||
self.nr_layer_cpus_min_max[lidx].0.min(layer.nr_cpus),
|
||||
self.nr_layer_cpus_min_max[lidx].1.max(layer.nr_cpus),
|
||||
self.nr_layer_cpus_ranges[lidx] = (
|
||||
self.nr_layer_cpus_ranges[lidx].0.min(layer.nr_cpus),
|
||||
self.nr_layer_cpus_ranges[lidx].1.max(layer.nr_cpus),
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -1577,48 +1591,39 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
|
||||
fn step(&mut self) -> Result<()> {
|
||||
let started_at = Instant::now();
|
||||
self.sched_stats
|
||||
.refresh(&mut self.skel, &self.proc_reader, started_at)?;
|
||||
|
||||
self.sched_stats.refresh(
|
||||
&mut self.skel,
|
||||
&self.proc_reader,
|
||||
started_at,
|
||||
self.processing_dur,
|
||||
)?;
|
||||
self.refresh_cpumasks()?;
|
||||
|
||||
self.processing_dur += Instant::now().duration_since(started_at);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn refresh_sys_stats(&mut self) -> Result<()> {
|
||||
let started_at = Instant::now();
|
||||
self.report_stats
|
||||
.refresh(&mut self.skel, &self.proc_reader, started_at)?;
|
||||
let stats = &self.report_stats;
|
||||
fn generate_sys_stats(
|
||||
&mut self,
|
||||
stats: &Stats,
|
||||
cpus_ranges: &mut Vec<(usize, usize)>,
|
||||
) -> Result<SysStats> {
|
||||
let bstats = &stats.bpf_stats;
|
||||
|
||||
let processing_dur = self.processing_dur - self.prev_processing_dur;
|
||||
self.prev_processing_dur = self.processing_dur;
|
||||
|
||||
let mut sys_stats = SysStats::new(
|
||||
stats,
|
||||
bstats,
|
||||
&self.stats_intv,
|
||||
&processing_dur,
|
||||
self.cpu_pool.fallback_cpu,
|
||||
)?;
|
||||
let mut sys_stats = SysStats::new(stats, bstats, self.cpu_pool.fallback_cpu)?;
|
||||
|
||||
for (lidx, (spec, layer)) in self.layer_specs.iter().zip(self.layers.iter()).enumerate() {
|
||||
let layer_stats =
|
||||
LayerStats::new(lidx, layer, stats, bstats, self.nr_layer_cpus_min_max[lidx]);
|
||||
let layer_stats = LayerStats::new(lidx, layer, stats, bstats, cpus_ranges[lidx]);
|
||||
sys_stats.layers.insert(spec.name.to_string(), layer_stats);
|
||||
self.nr_layer_cpus_min_max[lidx] = (layer.nr_cpus, layer.nr_cpus);
|
||||
cpus_ranges[lidx] = (layer.nr_cpus, layer.nr_cpus);
|
||||
}
|
||||
|
||||
*self.sys_stats.lock().unwrap() = sys_stats;
|
||||
Ok(())
|
||||
Ok(sys_stats)
|
||||
}
|
||||
|
||||
fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
|
||||
let now = Instant::now();
|
||||
let mut next_sched_at = now + self.sched_intv;
|
||||
let mut next_stats_at = now + self.stats_intv;
|
||||
let (res_ch, req_ch) = self.stats_server.channels();
|
||||
let mut next_sched_at = Instant::now() + self.sched_intv;
|
||||
let mut cpus_ranges = HashMap::<ThreadId, Vec<(usize, usize)>>::new();
|
||||
|
||||
while !shutdown.load(Ordering::Relaxed) && !uei_exited!(&self.skel, uei) {
|
||||
let now = Instant::now();
|
||||
@ -1630,18 +1635,40 @@ impl<'a, 'b> Scheduler<'a, 'b> {
|
||||
}
|
||||
}
|
||||
|
||||
if now >= next_stats_at {
|
||||
self.refresh_sys_stats()?;
|
||||
while next_stats_at < now {
|
||||
next_stats_at += self.stats_intv;
|
||||
match req_ch.recv_deadline(next_sched_at) {
|
||||
Ok(StatsReq::Hello(tid)) => {
|
||||
cpus_ranges.insert(
|
||||
tid,
|
||||
self.layers.iter().map(|l| (l.nr_cpus, l.nr_cpus)).collect(),
|
||||
);
|
||||
let stats = Stats::new(&mut self.skel, &self.proc_reader)?;
|
||||
res_ch.send(StatsRes::Hello(stats))?;
|
||||
}
|
||||
}
|
||||
Ok(StatsReq::Refresh(tid, mut stats)) => {
|
||||
// Propagate self's layer cpu ranges into each stat's.
|
||||
for i in 0..self.nr_layer_cpus_ranges.len() {
|
||||
for (_, ranges) in cpus_ranges.iter_mut() {
|
||||
ranges[i] = (
|
||||
ranges[i].0.min(self.nr_layer_cpus_ranges[i].0),
|
||||
ranges[i].1.max(self.nr_layer_cpus_ranges[i].1),
|
||||
);
|
||||
}
|
||||
self.nr_layer_cpus_ranges[i] =
|
||||
(self.layers[i].nr_cpus, self.layers[i].nr_cpus);
|
||||
}
|
||||
|
||||
std::thread::sleep(
|
||||
next_sched_at
|
||||
.min(next_stats_at)
|
||||
.duration_since(Instant::now()),
|
||||
);
|
||||
stats.refresh(&mut self.skel, &self.proc_reader, now, self.processing_dur)?;
|
||||
let sys_stats =
|
||||
self.generate_sys_stats(&stats, cpus_ranges.get_mut(&tid).unwrap())?;
|
||||
res_ch.send(StatsRes::Refreshed((stats, sys_stats)))?;
|
||||
}
|
||||
Ok(StatsReq::Bye(tid)) => {
|
||||
cpus_ranges.remove(&tid);
|
||||
res_ch.send(StatsRes::Bye)?;
|
||||
}
|
||||
Err(RecvTimeoutError::Timeout) => {}
|
||||
Err(e) => Err(e)?,
|
||||
}
|
||||
}
|
||||
|
||||
self.struct_ops.take();
|
||||
@ -1850,10 +1877,6 @@ fn main() -> Result<()> {
|
||||
})
|
||||
.context("Error setting Ctrl-C handler")?;
|
||||
|
||||
if opts.monitor {
|
||||
return stats::monitor(shutdown.clone());
|
||||
}
|
||||
|
||||
if let Some(path) = &opts.example {
|
||||
write_example_file(path)?;
|
||||
return Ok(());
|
||||
@ -1867,6 +1890,16 @@ fn main() -> Result<()> {
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(intv) = opts.monitor {
|
||||
let shutdown_copy = shutdown.clone();
|
||||
let jh =
|
||||
spawn(move || stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap());
|
||||
if layer_config.specs.len() == 0 {
|
||||
let _ = jh.join();
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
debug!("specs={}", serde_json::to_string_pretty(&layer_config)?);
|
||||
verify_layer_specs(&layer_config.specs)?;
|
||||
|
||||
|
@ -3,14 +3,18 @@ use crate::BpfStats;
|
||||
use crate::Layer;
|
||||
use crate::LayerKind;
|
||||
use crate::Stats;
|
||||
use anyhow::Result;
|
||||
use anyhow::{bail, Result};
|
||||
use bitvec::prelude::*;
|
||||
use chrono::DateTime;
|
||||
use chrono::Local;
|
||||
use log::warn;
|
||||
use log::{info, warn};
|
||||
use scx_stats::Meta;
|
||||
use scx_stats::ScxStatsClient;
|
||||
use scx_stats::ScxStatsOps;
|
||||
use scx_stats::ScxStatsServer;
|
||||
use scx_stats::StatsCloser;
|
||||
use scx_stats::StatsOpener;
|
||||
use scx_stats::StatsReader;
|
||||
use scx_stats::ToJson;
|
||||
use scx_stats_derive::Stats;
|
||||
use serde::Deserialize;
|
||||
@ -20,7 +24,9 @@ use std::io::Write;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::thread::current;
|
||||
use std::thread::sleep;
|
||||
use std::thread::ThreadId;
|
||||
use std::time::Duration;
|
||||
use std::time::SystemTime;
|
||||
use std::time::UNIX_EPOCH;
|
||||
@ -44,7 +50,7 @@ fn fmt_num(v: u64) -> String {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
|
||||
#[stat(_om_prefix = "l_", _om_label="layer_name")]
|
||||
#[stat(_om_prefix = "l_", _om_label = "layer_name")]
|
||||
pub struct LayerStats {
|
||||
#[stat(desc = "layer: CPU utilization (100% means one full CPU)")]
|
||||
pub util: f64,
|
||||
@ -306,8 +312,6 @@ impl LayerStats {
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)]
|
||||
#[stat(top)]
|
||||
pub struct SysStats {
|
||||
#[stat(desc = "update interval", _om_skip)]
|
||||
pub intv: f64,
|
||||
#[stat(desc = "timestamp", _om_skip)]
|
||||
pub at: f64,
|
||||
#[stat(desc = "# sched events duringg the period")]
|
||||
@ -341,13 +345,7 @@ pub struct SysStats {
|
||||
}
|
||||
|
||||
impl SysStats {
|
||||
pub fn new(
|
||||
stats: &Stats,
|
||||
bstats: &BpfStats,
|
||||
intv: &Duration,
|
||||
proc_dur: &Duration,
|
||||
fallback_cpu: usize,
|
||||
) -> Result<Self> {
|
||||
pub fn new(stats: &Stats, bstats: &BpfStats, fallback_cpu: usize) -> Result<Self> {
|
||||
let lsum = |idx| stats.bpf_stats.lstats_sums[idx as usize];
|
||||
let total = lsum(bpf_intf::layer_stat_idx_LSTAT_SEL_LOCAL)
|
||||
+ lsum(bpf_intf::layer_stat_idx_LSTAT_ENQ_WAKEUP)
|
||||
@ -363,7 +361,6 @@ impl SysStats {
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
intv: intv.as_secs_f64(),
|
||||
at: SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs_f64(),
|
||||
total,
|
||||
local: lsum_pct(bpf_intf::layer_stat_idx_LSTAT_SEL_LOCAL),
|
||||
@ -375,7 +372,7 @@ impl SysStats {
|
||||
/ total as f64,
|
||||
excl_wakeup: bstats.gstats[bpf_intf::global_stat_idx_GSTAT_EXCL_WAKEUP as usize] as f64
|
||||
/ total as f64,
|
||||
proc_ms: proc_dur.as_millis() as u64,
|
||||
proc_ms: stats.processing_dur.as_millis() as u64,
|
||||
busy: stats.cpu_busy * 100.0,
|
||||
util: stats.total_util * 100.0,
|
||||
load: stats.total_load,
|
||||
@ -429,32 +426,101 @@ impl SysStats {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn launch_server(sys_stats: Arc<Mutex<SysStats>>) -> Result<()> {
|
||||
ScxStatsServer::new()
|
||||
.add_stats_meta(LayerStats::meta())
|
||||
.add_stats_meta(SysStats::meta())
|
||||
.add_stats(
|
||||
"top",
|
||||
Box::new(move |_| sys_stats.lock().unwrap().to_json()),
|
||||
)
|
||||
.launch()?;
|
||||
Ok(())
|
||||
#[derive(Debug)]
|
||||
pub enum StatsReq {
|
||||
Hello(ThreadId),
|
||||
Refresh(ThreadId, Stats),
|
||||
Bye(ThreadId),
|
||||
}
|
||||
|
||||
pub fn monitor(shutdown: Arc<AtomicBool>) -> Result<()> {
|
||||
let mut client = ScxStatsClient::new().connect()?;
|
||||
let mut last_at = 0.0;
|
||||
#[derive(Debug)]
|
||||
pub enum StatsRes {
|
||||
Hello(Stats),
|
||||
Refreshed((Stats, SysStats)),
|
||||
Bye,
|
||||
}
|
||||
|
||||
pub fn launch_server() -> Result<ScxStatsServer<StatsReq, StatsRes>> {
|
||||
let open: Box<dyn StatsOpener<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
|
||||
let tid = current().id();
|
||||
req_ch.send(StatsReq::Hello(tid))?;
|
||||
let mut stats = Some(match res_ch.recv()? {
|
||||
StatsRes::Hello(v) => v,
|
||||
res => bail!("invalid response to Hello: {:?}", &res),
|
||||
});
|
||||
|
||||
let read: Box<dyn StatsReader<StatsReq, StatsRes>> =
|
||||
Box::new(move |_args, (req_ch, res_ch)| {
|
||||
req_ch.send(StatsReq::Refresh(tid, stats.take().unwrap()))?;
|
||||
let (new_stats, sys_stats) = match res_ch.recv()? {
|
||||
StatsRes::Refreshed(v) => v,
|
||||
res => bail!("invalid response to Refresh: {:?}", &res),
|
||||
};
|
||||
stats = Some(new_stats);
|
||||
sys_stats.to_json()
|
||||
});
|
||||
|
||||
Ok(read)
|
||||
});
|
||||
|
||||
let close: Box<dyn StatsCloser<StatsReq, StatsRes>> = Box::new(move |(req_ch, res_ch)| {
|
||||
req_ch.send(StatsReq::Bye(current().id())).unwrap();
|
||||
match res_ch.recv().unwrap() {
|
||||
StatsRes::Bye => {}
|
||||
res => panic!("invalid response to Bye: {:?}", &res),
|
||||
}
|
||||
});
|
||||
|
||||
Ok(ScxStatsServer::new()
|
||||
.add_stats_meta(LayerStats::meta())
|
||||
.add_stats_meta(SysStats::meta())
|
||||
.add_stats_ops(
|
||||
"top",
|
||||
ScxStatsOps {
|
||||
open,
|
||||
close: Some(close),
|
||||
},
|
||||
)
|
||||
.launch()?)
|
||||
}
|
||||
|
||||
pub fn monitor(intv: Duration, shutdown: Arc<AtomicBool>) -> Result<()> {
|
||||
let mut retry_cnt: u32 = 0;
|
||||
while !shutdown.load(Ordering::Relaxed) {
|
||||
let sst = client.request::<SysStats>("stats", vec![])?;
|
||||
if sst.at != last_at {
|
||||
let mut client = match ScxStatsClient::new().connect() {
|
||||
Ok(v) => v,
|
||||
Err(e) => match e.downcast_ref::<std::io::Error>() {
|
||||
Some(ioe) if ioe.kind() == std::io::ErrorKind::ConnectionRefused => {
|
||||
if retry_cnt == 1 {
|
||||
info!("Stats server not avaliable, retrying...");
|
||||
}
|
||||
retry_cnt += 1;
|
||||
sleep(Duration::from_secs(1));
|
||||
continue;
|
||||
}
|
||||
_ => Err(e)?,
|
||||
},
|
||||
};
|
||||
retry_cnt = 0;
|
||||
|
||||
while !shutdown.load(Ordering::Relaxed) {
|
||||
let sst = match client.request::<SysStats>("stats", vec![]) {
|
||||
Ok(v) => v,
|
||||
Err(e) => match e.downcast_ref::<std::io::Error>() {
|
||||
Some(ioe) => {
|
||||
info!("Connection to stats_server failed ({})", &ioe);
|
||||
sleep(Duration::from_secs(1));
|
||||
break;
|
||||
}
|
||||
None => Err(e)?,
|
||||
},
|
||||
};
|
||||
let dt = DateTime::<Local>::from(UNIX_EPOCH + Duration::from_secs_f64(sst.at));
|
||||
println!("###### {} ######", dt.to_rfc2822());
|
||||
sst.format_all(&mut std::io::stdout())?;
|
||||
last_at = sst.at;
|
||||
sleep(intv);
|
||||
}
|
||||
|
||||
std::thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user