diff --git a/scheds/rust/scx_lavd/src/bpf/intf.h b/scheds/rust/scx_lavd/src/bpf/intf.h index 026ebef..21d7ca5 100644 --- a/scheds/rust/scx_lavd/src/bpf/intf.h +++ b/scheds/rust/scx_lavd/src/bpf/intf.h @@ -291,4 +291,18 @@ struct msg_task_ctx { struct task_ctx_x taskc_x; }; + +/* + * BPF syscall + */ +enum { + LAVD_PM_PERFORMANCE = 0, + LAVD_PM_BALANCED = 1, + LAVD_PM_POWERSAVE = 2, +}; + +struct power_arg { + s32 power_mode; +}; + #endif /* __INTF_H */ diff --git a/scheds/rust/scx_lavd/src/bpf/main.bpf.c b/scheds/rust/scx_lavd/src/bpf/main.bpf.c index 2e7c145..20b927e 100644 --- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c +++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c @@ -210,7 +210,8 @@ private(LAVD) struct bpf_cpumask cpdom_cpumask[LAVD_CPDOM_MAX_NR]; /* CPU mask f /* * CPU topology */ -const volatile u16 cpu_order[LAVD_CPU_ID_MAX]; /* CPU preference order */ +const volatile u16 cpu_order_performance[LAVD_CPU_ID_MAX]; /* CPU preference order for performance and balanced mode */ +const volatile u16 cpu_order_powersave[LAVD_CPU_ID_MAX]; /* CPU preference order for powersave mode */ const volatile u16 __cpu_capacity_hint[LAVD_CPU_ID_MAX]; /* CPU capacity based on 1000 */ struct cpdom_ctx cpdom_ctxs[LAVD_CPDOM_MAX_NR]; /* contexts for compute domains */ @@ -228,9 +229,10 @@ static u64 cur_svc_time; /* * Options */ -const volatile bool no_core_compaction; -const volatile bool no_freq_scaling; -const volatile bool no_prefer_turbo_core; +volatile bool no_core_compaction; +volatile bool no_freq_scaling; +volatile bool no_prefer_turbo_core; +volatile bool is_powersave_mode; const volatile u32 is_smt_active; const volatile u8 verbose; @@ -807,6 +809,7 @@ static void do_core_compaction(void) struct bpf_cpumask *active, *ovrflw; int nr_cpus, nr_active, nr_active_old, cpu, i; bool clear; + const volatile u16 *cpu_order; bpf_rcu_read_lock(); @@ -820,6 +823,14 @@ static void do_core_compaction(void) goto unlock_out; } + /* + * Decide a cpuorder to use according to its power mode. + */ + if (is_powersave_mode) + cpu_order = cpu_order_powersave; + else + cpu_order = cpu_order_performance; + /* * Assign active and overflow cores */ @@ -2957,7 +2968,7 @@ static s32 init_per_cpu_ctx(u64 now) u64 cpumask = cpdomc->__cpumask[i]; bpf_for(j, 0, 64) { if (cpumask & 0x1LLU << j) { - cpu = (i * 64) + j; + cpu = (i * 64) + j; bpf_cpumask_set_cpu(cpu, cd_cpumask); cpuc = get_cpu_ctx_id(cpu); if (!cpuc) { @@ -3067,6 +3078,35 @@ void BPF_STRUCT_OPS(lavd_exit, struct scx_exit_info *ei) UEI_RECORD(uei, ei); } +SEC("syscall") +int set_power_profile(struct power_arg *input) +{ + switch (input->power_mode) { + case LAVD_PM_PERFORMANCE: + no_core_compaction = true; + no_freq_scaling = true; + no_prefer_turbo_core = false; + is_powersave_mode = false; + break; + case LAVD_PM_BALANCED: + no_core_compaction = false; + no_freq_scaling = false; + no_prefer_turbo_core = false; + is_powersave_mode = false; + break; + case LAVD_PM_POWERSAVE: + no_core_compaction = false; + no_freq_scaling = false; + no_prefer_turbo_core = true; + is_powersave_mode = true; + break; + default: + return -EINVAL; + } + + return 0; +} + SCX_OPS_DEFINE(lavd_ops, .select_cpu = (void *)lavd_select_cpu, .enqueue = (void *)lavd_enqueue, diff --git a/scheds/rust/scx_lavd/src/main.rs b/scheds/rust/scx_lavd/src/main.rs index aa20c4d..e5f9e97 100644 --- a/scheds/rust/scx_lavd/src/main.rs +++ b/scheds/rust/scx_lavd/src/main.rs @@ -23,6 +23,7 @@ use std::cell::RefCell; use std::collections::BTreeMap; use std::fs::File; use std::io::Read; +use std::ffi::c_int; use std::ffi::CStr; use std::fmt; use std::mem; @@ -46,6 +47,7 @@ use libbpf_rs::skel::OpenSkel; use libbpf_rs::skel::Skel; use libbpf_rs::skel::SkelBuilder; use libbpf_rs::OpenObject; +use libbpf_rs::ProgramInput; use log::debug; use log::info; use log::warn; @@ -63,8 +65,6 @@ use itertools::iproduct; use plain::Plain; use rlimit::{getrlimit, setrlimit, Resource}; -static RUNNING: AtomicBool = AtomicBool::new(true); - /// scx_lavd: Latency-criticality Aware Virtual Deadline (LAVD) scheduler /// /// The rust part is minimal. It processes command line options and logs out @@ -72,6 +72,10 @@ static RUNNING: AtomicBool = AtomicBool::new(true); /// See the more detailed overview of the LAVD design at main.bpf.c. #[derive(Debug, Parser)] struct Opts { + /// Automatically decide the power mode based on the current energy profile. + #[clap(long = "auto", action = clap::ArgAction::SetTrue)] + auto: bool, + /// Run in performance mode to get maximum performance. #[clap(long = "performance", action = clap::ArgAction::SetTrue)] performance: bool, @@ -198,15 +202,19 @@ struct ComputeDomainValue { #[derive(Debug)] struct FlatTopology { - cpu_fids: Vec, + cpu_fids_performance: Vec, + cpu_fids_powersave: Vec, cpdom_map: BTreeMap, nr_cpus_online: usize, } impl fmt::Display for FlatTopology { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for cpu_fid in self.cpu_fids.iter() { - write!(f, "\nCPU: {:?}", cpu_fid).ok(); + for cpu_fid in self.cpu_fids_performance.iter() { + write!(f, "\nCPU in performance: {:?}", cpu_fid).ok(); + } + for cpu_fid in self.cpu_fids_powersave.iter() { + write!(f, "\nCPU in powersave: {:?}", cpu_fid).ok(); } for (k, v) in self.cpdom_map.iter() { write!(f, "\nCPDOM: {:?} {:?}", k, v).ok(); @@ -217,13 +225,19 @@ impl fmt::Display for FlatTopology { impl FlatTopology { /// Build a flat-structured topology - pub fn new(opts: &Opts) -> Result { - let (cpu_fids, avg_freq, nr_cpus_online) = - Self::build_cpu_fids(opts.prefer_smt_core, opts.prefer_little_core).unwrap(); - let cpdom_map = Self::build_cpdom(&cpu_fids, avg_freq).unwrap(); + pub fn new() -> Result { + let (cpu_fids_performance, avg_freq, nr_cpus_online) = + Self::build_cpu_fids(false, false).unwrap(); + let (cpu_fids_powersave, _, _) = + Self::build_cpu_fids(true, true).unwrap(); + + // Note that building compute domain is not dependent to CPU orer + // so it is okay to use any cpu_fids_*. + let cpdom_map = Self::build_cpdom(&cpu_fids_performance, avg_freq).unwrap(); Ok(FlatTopology { - cpu_fids, + cpu_fids_performance, + cpu_fids_powersave, cpdom_map, nr_cpus_online, }) @@ -281,8 +295,8 @@ impl FlatTopology { cpu_fid.cpu_cap = ((cpu_fid.max_freq * 1024) / base_freq) as usize; } } else { - // Unfortunately, the frequency information in sysfs seems not always correct in some - // distributions. + // Unfortunately, the frequency information in sysfs seems not + // always correct in some distributions. for cpu_fid in cpu_fids.iter_mut() { cpu_fid.cpu_cap = 1024 as usize; } @@ -445,7 +459,7 @@ impl<'a> Scheduler<'a> { let mut skel = scx_ops_open!(skel_builder, open_object, lavd_ops)?; // Initialize CPU topology - let topo = FlatTopology::new(&opts).unwrap(); + let topo = FlatTopology::new().unwrap(); Self::init_cpus(&mut skel, &topo); // Initialize skel according to @opts. @@ -482,10 +496,13 @@ impl<'a> Scheduler<'a> { fn init_cpus(skel: &mut OpenBpfSkel, topo: &FlatTopology) { // Initialize CPU order topologically sorted // by a cpu, node, llc, max_freq, and core order - for (pos, cpu) in topo.cpu_fids.iter().enumerate() { - skel.maps.rodata_data.cpu_order[pos] = cpu.cpu_id as u16; + for (pos, cpu) in topo.cpu_fids_performance.iter().enumerate() { + skel.maps.rodata_data.cpu_order_performance[pos] = cpu.cpu_id as u16; skel.maps.rodata_data.__cpu_capacity_hint[cpu.cpu_id] = cpu.cpu_cap as u16; } + for (pos, cpu) in topo.cpu_fids_powersave.iter().enumerate() { + skel.maps.rodata_data.cpu_order_powersave[pos] = cpu.cpu_id as u16; + } debug!("{:#?}", topo); // Initialize compute domain contexts @@ -519,11 +536,16 @@ impl<'a> Scheduler<'a> { } } + fn is_powersave_mode(opts: &Opts) -> bool { + opts.prefer_smt_core && opts.prefer_little_core + } + fn init_globals(skel: &mut OpenBpfSkel, opts: &Opts, nr_cpus_onln: u64) { skel.maps.bss_data.nr_cpus_onln = nr_cpus_onln; - skel.maps.rodata_data.no_core_compaction = opts.no_core_compaction; - skel.maps.rodata_data.no_freq_scaling = opts.no_freq_scaling; - skel.maps.rodata_data.no_prefer_turbo_core = opts.no_prefer_turbo_core; + skel.maps.bss_data.no_core_compaction = opts.no_core_compaction; + skel.maps.bss_data.no_freq_scaling = opts.no_freq_scaling; + skel.maps.bss_data.no_prefer_turbo_core = opts.no_prefer_turbo_core; + skel.maps.bss_data.is_powersave_mode = Self::is_powersave_mode(&opts); skel.maps.rodata_data.is_smt_active = match FlatTopology::is_smt_active() { Ok(ret) => (ret == 1) as u32, Err(_) => 0, @@ -635,10 +657,68 @@ impl<'a> Scheduler<'a> { uei_exited!(&self.skel, uei) } - fn run(&mut self, shutdown: Arc) -> Result { + fn set_power_profile(&mut self, mode: i32) -> Result<(), u32> { + let prog = &mut self.skel.progs.set_power_profile; + let mut args = power_arg { + power_mode: mode as c_int, + }; + let input = ProgramInput { + context_in: Some(unsafe { + std::slice::from_raw_parts_mut( + &mut args as *mut _ as *mut u8, + std::mem::size_of_val(&args), + ) + }), + ..Default::default() + }; + let out = prog.test_run(input).unwrap(); + if out.return_value != 0 { + return Err(out.return_value); + } + + Ok(()) + } + + fn read_energy_profile() -> String { + let res = + File::open("/sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference") + .and_then(|mut file| { + let mut contents = String::new(); + file.read_to_string(&mut contents)?; + Ok(contents.trim().to_string()) + }); + + res.unwrap_or_else(|_| "none".to_string()) + } + + fn update_power_profile(&mut self) -> bool { + const LAVD_PM_PERFORMANCE: s32 = 0; + const LAVD_PM_BALANCED: s32 = 1; + const LAVD_PM_POWERSAVE: s32 = 2; + + let profile = Self::read_energy_profile(); + if profile == "performance" { + let _ = self.set_power_profile(LAVD_PM_PERFORMANCE); + } else if profile == "balance_performance" { + let _ = self.set_power_profile(LAVD_PM_BALANCED); + } else if profile == "power" { + let _ = self.set_power_profile(LAVD_PM_POWERSAVE); + } else { + return false; + } + + true + } + + fn run(&mut self, auto: bool, shutdown: Arc) -> Result { let (res_ch, req_ch) = self.stats_server.channels(); + let mut auto = auto; while !shutdown.load(Ordering::Relaxed) && !self.exited() { + if auto { + auto = self.update_power_profile(); + } + match req_ch.recv_timeout(Duration::from_secs(1)) { Ok(req) => { let res = self.stats_req_to_res(&req)?; @@ -718,7 +798,7 @@ fn main() -> Result<()> { *build_id::SCX_FULL_VERSION ); info!("scx_lavd scheduler starts running."); - if !sched.run(shutdown.clone())?.should_restart() { + if !sched.run(opts.auto, shutdown.clone())?.should_restart() { break; } }