scx_lavd: prioritize the turbo boost-able cores

Signed-off-by: Changwoo Min <changwoo@igalia.com>
2024-11-24 20:00:22 +00:00 · 2024-08-26 17:52:13 +09:00 · 2024-08-26 17:52:13 +09:00 · 9807e561f0
commit 9807e561f0
parent cd5b2bf664
3 changed files with 88 additions and 16 deletions
--- a/scheds/rust/scx_lavd/src/bpf/intf.h
+++ b/scheds/rust/scx_lavd/src/bpf/intf.h
@ -82,6 +82,7 @@ enum consts {

 	LAVD_SYS_STAT_INTERVAL_NS	= (25ULL * NSEC_PER_MSEC),
 	LAVD_CC_PER_CORE_MAX_CTUIL	= 500, /* maximum per-core CPU utilization */
+	LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */
 	LAVD_CC_NR_ACTIVE_MIN		= 1, /* num of mininum active cores */
 	LAVD_CC_NR_OVRFLW		= 1, /* num of overflow cores */
 	LAVD_CC_CPU_PIN_INTERVAL	= (3ULL * LAVD_TIME_ONE_SEC),
@ -193,6 +194,7 @@ struct cpu_ctx {
 	 */
 	u16		capacity;	/* CPU capacity based on 1000 */
 	u8		big_core;	/* is it a big core? */
+	u8		turbo_core;	/* is it a turbo core? */
 	u8		cpdom_id;	/* compute domain id (== dsq_id) */
 	u8		cpdom_alt_id;	/* compute domain id of anternative type (== dsq_id) */
 	u8		cpdom_poll_pos;	/* index to check if a DSQ of a compute domain is starving */
--- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c
@ -200,10 +200,11 @@ static volatile u64	nr_cpus_big;
 static struct sys_stat	__sys_stats[2];
 static volatile int	__sys_stat_idx;

-private(LAVD) struct bpf_cpumask __kptr *active_cpumask; /* CPU mask for active CPUs */
-private(LAVD) struct bpf_cpumask __kptr *ovrflw_cpumask; /* CPU mask for overflow CPUs */
+private(LAVD) struct bpf_cpumask __kptr *turbo_cpumask; /* CPU mask for turbo CPUs */
 private(LAVD) struct bpf_cpumask __kptr *big_cpumask; /* CPU mask for big CPUs */
 private(LAVD) struct bpf_cpumask __kptr *little_cpumask; /* CPU mask for little CPUs */
+private(LAVD) struct bpf_cpumask __kptr *active_cpumask; /* CPU mask for active CPUs */
+private(LAVD) struct bpf_cpumask __kptr *ovrflw_cpumask; /* CPU mask for overflow CPUs */
 private(LAVD) struct bpf_cpumask cpdom_cpumask[LAVD_CPDOM_MAX_NR]; /* CPU mask for each compute domain */

 /*
@ -229,6 +230,7 @@ static u64		cur_svc_time;
 */
 const volatile bool	no_core_compaction;
 const volatile bool	no_freq_scaling;
+const volatile bool	no_prefer_turbo_core;
 const volatile u32 	is_smt_active;
 const volatile u8	verbose;

@ -659,8 +661,14 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
 		c->new_util = (compute * LAVD_CPU_UTIL_MAX) / c->duration;
 		cpuc->util = calc_avg(cpuc->util, c->new_util);

-		if (cpuc->util > LAVD_CC_PER_CORE_MAX_CTUIL)
-			c->nr_violation += 1000;
+		if (cpuc->turbo_core) {
+			if (cpuc->util > LAVD_CC_PER_TURBO_CORE_MAX_CTUIL)
+				c->nr_violation += 1000;
+		}
+		else {
+			if (cpuc->util > LAVD_CC_PER_CORE_MAX_CTUIL)
+				c->nr_violation += 1000;
+		}

 		/*
 		 * Accmulate system-wide idle time
@ -1391,21 +1399,41 @@ static s32 pick_idle_cpu(struct task_struct *p, struct task_ctx *taskc,
 	if (bpf_cpumask_empty(cast_mask(a_cpumask)))
 		goto start_omask;

-	if (is_perf_cri(taskc, stat_cur))
+	if (is_perf_cri(taskc, stat_cur) || no_core_compaction ) {
 		bpf_cpumask_and(t_cpumask, cast_mask(a_cpumask), cast_mask(big));
-	else
+	}
+	else {
 		bpf_cpumask_and(t_cpumask, cast_mask(a_cpumask), cast_mask(little));
+		goto start_llc_mask;
+	}

-	bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(cpdom_mask_prev));
+	/*
+	 * Pick an idle core among turbo boost-enabled CPUs with a matching
+	 * core type.
+	 */
+start_turbo_mask:
+	if (no_prefer_turbo_core || !turbo_cpumask)
+		goto start_llc_mask;

+	bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(turbo_cpumask));
 	if (bpf_cpumask_empty(cast_mask(t2_cpumask)))
-		goto start_tmask;
+		goto start_llc_mask;
+
+	cpu_id = pick_idle_cpu_in(t2_cpumask);
+	if (cpu_id >= 0) {
+		*is_idle = true;
+		goto unlock_out;
+	}

 	/*
 	 * Pick an idle core among active CPUs with a matching core type within
 	 * the prev CPU's LLC domain.
 	 */
-start_t2mask:
+start_llc_mask:
+	bpf_cpumask_and(t2_cpumask, cast_mask(t_cpumask), cast_mask(cpdom_mask_prev));
+	if (bpf_cpumask_empty(cast_mask(t2_cpumask)))
+		goto start_tmask;
+
 	cpu_id = pick_idle_cpu_in(t2_cpumask);
 	if (cpu_id >= 0) {
 		*is_idle = true;
@ -2755,6 +2783,10 @@ static int init_cpumasks(void)
 	if (err)
 		goto out;

+	err = calloc_cpumask(&turbo_cpumask);
+	if (err)
+		goto out;
+
 	err = calloc_cpumask(&big_cpumask);
 	if (err)
 		goto out;
@ -2777,11 +2809,6 @@ out:

 static u16 get_cpuperf_cap(s32 cpu)
 {
-	/*
-	 * If CPU's capacitiy values are all 1024, then let's just use the
-	 * capacity value from userspace, which are calculated using each CPU's
-	 * maximum frequency.
-	 */
 	if (cpu >= 0 && cpu < LAVD_CPU_ID_MAX)
 		return __cpu_capacity_hint[cpu];

@ -2789,25 +2816,51 @@ static u16 get_cpuperf_cap(s32 cpu)
 	return 1;
 }

+static u16 get_cputurbo_cap(void)
+{
+	u16 turbo_cap = 0;
+	int nr_turbo = 0, cpu;
+
+	/*
+	 * Find the maximum CPU frequency
+	 */
+	for (cpu = 0; cpu < LAVD_CPU_ID_MAX; cpu++) {
+		if (__cpu_capacity_hint[cpu] > turbo_cap) {
+			turbo_cap = __cpu_capacity_hint[cpu];
+			nr_turbo++;
+		}
+	}
+
+	/*
+	 * If all CPU's frequencies are the same, ignore the turbo.
+	 */
+	if (nr_turbo <= 1)
+		turbo_cap = 0;
+
+	return turbo_cap;
+}
+
 static s32 init_per_cpu_ctx(u64 now)
 {
 	struct cpu_ctx *cpuc;
-	struct bpf_cpumask *big, *little, *active, *ovrflw, *cd_cpumask;
+	struct bpf_cpumask *turbo, *big, *little, *active, *ovrflw, *cd_cpumask;
 	struct cpdom_ctx *cpdomc;
 	int cpu, i, j, err = 0;
 	u64 cpdom_id;
 	u32 sum_capacity = 0, avg_capacity;
+	u16 turbo_cap;
 	
 	bpf_rcu_read_lock();

 	/*
 	 * Prepare cpumasks.
 	 */
+	turbo = turbo_cpumask;
 	big = big_cpumask;
 	little = little_cpumask;
 	active  = active_cpumask;
 	ovrflw  = ovrflw_cpumask;
-	if (!big|| !little || !active || !ovrflw) {
+	if (!turbo || !big|| !little || !active || !ovrflw) {
 		scx_bpf_error("Failed to prepare cpumasks.");
 		err = -ENOMEM;
 		goto unlock_out;
@ -2848,6 +2901,11 @@ static s32 init_per_cpu_ctx(u64 now)
 		sum_capacity += cpuc->capacity;
 	}

+	/*
+	 * Get turbo capacitiy.
+	 */
+	turbo_cap = get_cputurbo_cap();
+
 	/*
 	 * Classify CPU into BIG or little cores based on their average capacity.
 	 */
@ -2874,6 +2932,10 @@ static s32 init_per_cpu_ctx(u64 now)
 			bpf_cpumask_set_cpu(cpu, little);
 			bpf_cpumask_set_cpu(cpu, ovrflw);
 		}
+
+		cpuc->turbo_core = cpuc->capacity == turbo_cap;
+		if (cpuc->turbo_core)
+			bpf_cpumask_set_cpu(cpu, turbo);
 	}

 	/*
--- a/scheds/rust/scx_lavd/src/main.rs
+++ b/scheds/rust/scx_lavd/src/main.rs
@ -101,6 +101,10 @@ struct Opts {
    #[clap(long = "prefer-little-core", action = clap::ArgAction::SetTrue)]
    prefer_little_core: bool,

+    /// Do not specifically prefer to schedule on turbo cores.
+    #[clap(long = "no-prefer-turbo-core", action = clap::ArgAction::SetTrue)]
+    no_prefer_turbo_core: bool,
+
    /// Disable controlling the CPU frequency. In order to improve latency and responsiveness of
    /// performance-critical tasks, scx_lavd increases the CPU frequency even if CPU usage is low.
    /// See main.bpf.c for more info. Normally set by the power mode, but can be set independently
@ -129,18 +133,21 @@ impl Opts {
            self.no_core_compaction = true;
            self.prefer_smt_core = false;
            self.prefer_little_core = false;
+            self.no_prefer_turbo_core = false;
            self.no_freq_scaling = true;
        }
        if self.powersave {
            self.no_core_compaction = false;
            self.prefer_smt_core = true;
            self.prefer_little_core = true;
+            self.no_prefer_turbo_core = true;
            self.no_freq_scaling = false;
        }
        if self.balanced {
            self.no_core_compaction = false;
            self.prefer_smt_core = false;
            self.prefer_little_core = false;
+            self.no_prefer_turbo_core = false;
            self.no_freq_scaling = false;
        }

@ -516,6 +523,7 @@ impl<'a> Scheduler<'a> {
        skel.maps.bss_data.nr_cpus_onln = nr_cpus_onln;
        skel.maps.rodata_data.no_core_compaction = opts.no_core_compaction;
        skel.maps.rodata_data.no_freq_scaling = opts.no_freq_scaling;
+        skel.maps.rodata_data.no_prefer_turbo_core = opts.no_prefer_turbo_core;
        skel.maps.rodata_data.is_smt_active = match FlatTopology::is_smt_active() {
            Ok(ret) => (ret == 1) as u32,
            Err(_)  => 0,