Merge pull request #566 from multics69/lavd-turbo

scx_lavd: prioritize the turbo boost-able cores
2024-11-26 03:20:24 +00:00 · 2024-08-27 08:47:25 +09:00 · 2024-08-27 08:47:25 +09:00 · 09cff560aa
commit 09cff560aa
parent 83cd26eb9e 9807e561f0
5 changed files with 112 additions and 35 deletions
--- a/scheds/rust/Cargo.lock
+++ b/scheds/rust/Cargo.lock
@ -1087,7 +1087,6 @@ dependencies = [
 "libbpf-rs",
 "libc",
 "log",
- "nix 0.29.0",
 "ordered-float 3.9.2",
 "plain",
 "rlimit",
--- a/scheds/rust/scx_lavd/Cargo.toml
+++ b/scheds/rust/scx_lavd/Cargo.toml
@ -27,7 +27,6 @@ simplelog = "0.12"
 static_assertions = "1.1.0"
 rlimit = "0.10.1"
 plain = "0.2.3"
-nix = { version = "0.29.0", features = ["signal"] }

 [build-dependencies]
 scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" }
--- a/scheds/rust/scx_lavd/src/bpf/intf.h
+++ b/scheds/rust/scx_lavd/src/bpf/intf.h
@ -82,6 +82,7 @@ enum consts {

 	LAVD_SYS_STAT_INTERVAL_NS	= (25ULL * NSEC_PER_MSEC),
 	LAVD_CC_PER_CORE_MAX_CTUIL	= 500, /* maximum per-core CPU utilization */
+	LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */
 	LAVD_CC_NR_ACTIVE_MIN		= 1, /* num of mininum active cores */
 	LAVD_CC_NR_OVRFLW		= 1, /* num of overflow cores */
 	LAVD_CC_CPU_PIN_INTERVAL	= (3ULL * LAVD_TIME_ONE_SEC),
@ -193,6 +194,7 @@ struct cpu_ctx {
 	 */
 	u16		capacity;	/* CPU capacity based on 1000 */
 	u8		big_core;	/* is it a big core? */
+	u8		turbo_core;	/* is it a turbo core? */
 	u8		cpdom_id;	/* compute domain id (== dsq_id) */
 	u8		cpdom_alt_id;	/* compute domain id of anternative type (== dsq_id) */
 	u8		cpdom_poll_pos;	/* index to check if a DSQ of a compute domain is starving */
--- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
@ -200,10 +200,11 @@ static volatile u64	nr_cpus_big;
 static struct sys_stat	__sys_stats[2];
 static volatile int	__sys_stat_idx;

-private(LAVD) struct bpf_cpumask __kptr *active_cpumask; /* CPU mask for active CPUs */
-private(LAVD) struct bpf_cpumask __kptr *ovrflw_cpumask; /* CPU mask for overflow CPUs */
+private(LAVD) struct bpf_cpumask __kptr *turbo_cpumask; /* CPU mask for turbo CPUs */
 private(LAVD) struct bpf_cpumask __kptr *big_cpumask; /* CPU mask for big CPUs */
 private(LAVD) struct bpf_cpumask __kptr *little_cpumask; /* CPU mask for little CPUs */
+private(LAVD) struct bpf_cpumask __kptr *active_cpumask; /* CPU mask for active CPUs */
+private(LAVD) struct bpf_cpumask __kptr *ovrflw_cpumask; /* CPU mask for overflow CPUs */
 private(LAVD) struct bpf_cpumask cpdom_cpumask[LAVD_CPDOM_MAX_NR]; /* CPU mask for each compute domain */

 /*
@ -229,6 +230,7 @@ static u64		cur_svc_time;
 */
 const volatile bool	no_core_compaction;
 const volatile bool	no_freq_scaling;
+const volatile bool	no_prefer_turbo_core;
 const volatile u32 	is_smt_active;
 const volatile u8	verbose;

@ -659,8 +661,14 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
 		c->new_util = (compute * LAVD_CPU_UTIL_MAX) / c->duration;
 		cpuc->util = calc_avg(cpuc->util, c->new_util);

-		if (cpuc->util > LAVD_CC_PER_CORE_MAX_CTUIL)
-			c->nr_violation += 1000;
+		if (cpuc->turbo_core) {
+			if (cpuc->util > LAVD_CC_PER_TURBO_CORE_MAX_CTUIL)
+				c->nr_violation += 1000;
+		}
+		else {
+			if (cpuc->util > LAVD_CC_PER_CORE_MAX_CTUIL)
+				c->nr_violation += 1000;
+		}

 		/*
 		 * Accmulate system-wide idle time
@ -1391,21 +1399,41 @@ static s32 pick_idle_cpu(struct task_struct *p, struct task_ctx *taskc,
 	if (bpf_cpumask_empty(cast_mask(a_cpumask)))
 		goto start_omask;

-	if (is_perf_cri(taskc, stat_cur))
+	if (is_perf_cri(taskc, stat_cur) || no_core_compaction ) {
 		bpf_cpumask_and(t_cpumask, cast_mask(a_cpumask), cast_mask(big));
-	else
+	}
+	else {
 		bpf_cpumask_and(t_cpumask, cast_mask(a_cpumask), cast_mask(little));
+		goto start_llc_mask;
+	}

-	bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(cpdom_mask_prev));
+	/*
+	 * Pick an idle core among turbo boost-enabled CPUs with a matching
+	 * core type.
+	 */
+start_turbo_mask:
+	if (no_prefer_turbo_core || !turbo_cpumask)
+		goto start_llc_mask;

+	bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(turbo_cpumask));
 	if (bpf_cpumask_empty(cast_mask(t2_cpumask)))
-		goto start_tmask;
+		goto start_llc_mask;
+
+	cpu_id = pick_idle_cpu_in(t2_cpumask);
+	if (cpu_id >= 0) {
+		*is_idle = true;
+		goto unlock_out;
+	}

 	/*
 	 * Pick an idle core among active CPUs with a matching core type within
 	 * the prev CPU's LLC domain.
 	 */
-start_t2mask:
+start_llc_mask:
+	bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(cpdom_mask_prev));
+	if (bpf_cpumask_empty(cast_mask(t2_cpumask)))
+		goto start_tmask;
+
 	cpu_id = pick_idle_cpu_in(t2_cpumask);
 	if (cpu_id >= 0) {
 		*is_idle = true;
@ -2755,6 +2783,10 @@ static int init_cpumasks(void)
 	if (err)
 		goto out;

+	err = calloc_cpumask(&turbo_cpumask);
+	if (err)
+		goto out;
+
 	err = calloc_cpumask(&big_cpumask);
 	if (err)
 		goto out;
@ -2777,11 +2809,6 @@ out:

 static u16 get_cpuperf_cap(s32 cpu)
 {
-	/*
-	 * If CPU's capacitiy values are all 1024, then let's just use the
-	 * capacity value from userspace, which are calculated using each CPU's
-	 * maximum frequency.
-	 */
 	if (cpu >= 0 && cpu < LAVD_CPU_ID_MAX)
 		return __cpu_capacity_hint[cpu];

@ -2789,25 +2816,51 @@ static u16 get_cpuperf_cap(s32 cpu)
 	return 1;
 }

+static u16 get_cputurbo_cap(void)
+{
+	u16 turbo_cap = 0;
+	int nr_turbo = 0, cpu;
+
+	/*
+	 * Find the maximum CPU frequency
+	 */
+	for (cpu = 0; cpu < LAVD_CPU_ID_MAX; cpu++) {
+		if (__cpu_capacity_hint[cpu] > turbo_cap) {
+			turbo_cap = __cpu_capacity_hint[cpu];
+			nr_turbo++;
+		}
+	}
+
+	/*
+	 * If all CPU's frequencies are the same, ignore the turbo.
+	 */
+	if (nr_turbo <= 1)
+		turbo_cap = 0;
+
+	return turbo_cap;
+}
+
 static s32 init_per_cpu_ctx(u64 now)
 {
 	struct cpu_ctx *cpuc;
-	struct bpf_cpumask *big, *little, *active, *ovrflw, *cd_cpumask;
+	struct bpf_cpumask *turbo, *big, *little, *active, *ovrflw, *cd_cpumask;
 	struct cpdom_ctx *cpdomc;
 	int cpu, i, j, err = 0;
 	u64 cpdom_id;
 	u32 sum_capacity = 0, avg_capacity;
+	u16 turbo_cap;
 	
 	bpf_rcu_read_lock();

 	/*
 	 * Prepare cpumasks.
 	 */
+	turbo = turbo_cpumask;
 	big = big_cpumask;
 	little = little_cpumask;
 	active  = active_cpumask;
 	ovrflw  = ovrflw_cpumask;
-	if (!big|| !little || !active || !ovrflw) {
+	if (!turbo || !big|| !little || !active || !ovrflw) {
 		scx_bpf_error("Failed to prepare cpumasks.");
 		err = -ENOMEM;
 		goto unlock_out;
@ -2848,6 +2901,11 @@ static s32 init_per_cpu_ctx(u64 now)
 		sum_capacity += cpuc->capacity;
 	}

+	/*
+	 * Get turbo capacitiy.
+	 */
+	turbo_cap = get_cputurbo_cap();
+
 	/*
 	 * Classify CPU into BIG or little cores based on their average capacity.
 	 */
@ -2874,6 +2932,10 @@ static s32 init_per_cpu_ctx(u64 now)
 			bpf_cpumask_set_cpu(cpu, little);
 			bpf_cpumask_set_cpu(cpu, ovrflw);
 		}
+
+		cpuc->turbo_core = cpuc->capacity == turbo_cap;
+		if (cpuc->turbo_core)
+			bpf_cpumask_set_cpu(cpu, turbo);
 	}

 	/*
--- a/scheds/rust/scx_lavd/src/main.rs
+++ b/scheds/rust/scx_lavd/src/main.rs
@ -30,6 +30,7 @@ use std::mem::MaybeUninit;
 use std::str;
 use std::sync::atomic::AtomicBool;
 use std::sync::atomic::Ordering;
+use std::sync::Arc;
 use std::thread::ThreadId;
 use std::time::Duration;

@ -59,7 +60,6 @@ use scx_utils::Topology;
 use scx_utils::UserExitInfo;

 use itertools::iproduct;
-use nix::sys::signal;
 use plain::Plain;
 use rlimit::{getrlimit, setrlimit, Resource};

@ -101,6 +101,10 @@ struct Opts {
    #[clap(long = "prefer-little-core", action = clap::ArgAction::SetTrue)]
    prefer_little_core: bool,

+    /// Do not specifically prefer to schedule on turbo cores.
+    #[clap(long = "no-prefer-turbo-core", action = clap::ArgAction::SetTrue)]
+    no_prefer_turbo_core: bool,
+
    /// Disable controlling the CPU frequency. In order to improve latency and responsiveness of
    /// performance-critical tasks, scx_lavd increases the CPU frequency even if CPU usage is low.
    /// See main.bpf.c for more info. Normally set by the power mode, but can be set independently
@ -117,6 +121,10 @@ struct Opts {
    /// times to increase verbosity.
    #[clap(short = 'v', long, action = clap::ArgAction::Count)]
    verbose: u8,
+
+    /// Print scheduler version and exit.
+    #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)]
+    version: bool,
 }

 impl Opts {
@ -125,18 +133,21 @@ impl Opts {
            self.no_core_compaction = true;
            self.prefer_smt_core = false;
            self.prefer_little_core = false;
+            self.no_prefer_turbo_core = false;
            self.no_freq_scaling = true;
        }
        if self.powersave {
            self.no_core_compaction = false;
            self.prefer_smt_core = true;
            self.prefer_little_core = true;
+            self.no_prefer_turbo_core = true;
            self.no_freq_scaling = false;
        }
        if self.balanced {
            self.no_core_compaction = false;
            self.prefer_smt_core = false;
            self.prefer_little_core = false;
+            self.no_prefer_turbo_core = false;
            self.no_freq_scaling = false;
        }

@ -512,6 +523,7 @@ impl<'a> Scheduler<'a> {
        skel.maps.bss_data.nr_cpus_onln = nr_cpus_onln;
        skel.maps.rodata_data.no_core_compaction = opts.no_core_compaction;
        skel.maps.rodata_data.no_freq_scaling = opts.no_freq_scaling;
+        skel.maps.rodata_data.no_prefer_turbo_core = opts.no_prefer_turbo_core;
        skel.maps.rodata_data.is_smt_active = match FlatTopology::is_smt_active() {
            Ok(ret) => (ret == 1) as u32,
            Err(_)  => 0,
@ -623,10 +635,14 @@ impl<'a> Scheduler<'a> {
        })
    }

-    fn run(&mut self) -> Result<UserExitInfo> {
+    pub fn exited(&mut self) -> bool {
+        uei_exited!(&self.skel, uei)
+    }
+
+    fn run(&mut self, shutdown: Arc<AtomicBool>) -> Result<UserExitInfo> {
        let (res_ch, req_ch) = self.stats_server.channels();

-        while self.running() {
+        while !shutdown.load(Ordering::Relaxed) && !self.exited() {
            match req_ch.recv_timeout(Duration::from_secs(1)) {
                Ok(req) => {
                    let res = self.stats_req_to_res(&req)?;
@ -676,26 +692,25 @@ extern "C" fn handle_sigint(_: libc::c_int, _: *mut libc::siginfo_t, _: *mut lib
    RUNNING.store(false, Ordering::SeqCst);
 }

-fn init_signal_handlers() {
-    // Ctrl-c for termination
-    unsafe {
-        let sigint_action = signal::SigAction::new(
-            signal::SigHandler::SigAction(handle_sigint),
-            signal::SaFlags::empty(),
-            signal::SigSet::empty(),
-        );
-        signal::sigaction(signal::SIGINT, &sigint_action).unwrap();
-    }
-}
-
 fn main() -> Result<()> {
    let mut opts = Opts::parse();
    opts.proc().unwrap();

+    if opts.version {
+        println!("scx_lavd {}", *build_id::SCX_FULL_VERSION);
+        return Ok(());
+    }
+
    init_log(&opts);
-    init_signal_handlers();
    debug!("{:#?}", opts);

+    let shutdown = Arc::new(AtomicBool::new(false));
+    let shutdown_clone = shutdown.clone();
+    ctrlc::set_handler(move || {
+        shutdown_clone.store(true, Ordering::Relaxed);
+    })
+    .context("Error setting Ctrl-C handler")?;
+
    if let Some(nr_samples) = opts.monitor_sched_samples {
        let jh = std::thread::spawn(move || stats::monitor_sched_samples(nr_samples).unwrap());
        let _ = jh.join();
@ -710,7 +725,7 @@ fn main() -> Result<()> {
            *build_id::SCX_FULL_VERSION
        );
        info!("scx_lavd scheduler starts running.");
-        if !sched.run()?.should_restart() {
+        if !sched.run(shutdown.clone())?.should_restart() {
            break;
        }
    }