scx_lavd: accmulate more system-wide statistics

Signed-off-by: Changwoo Min <changwoo@igalia.com>
This commit is contained in:
Changwoo Min 2024-09-04 16:33:19 +09:00
parent e5d27d0553
commit f490a55d54
2 changed files with 175 additions and 15 deletions

View File

@ -81,6 +81,7 @@ enum consts {
LAVD_PREEMPT_TICK_MARGIN = (100ULL * NSEC_PER_USEC),
LAVD_SYS_STAT_INTERVAL_NS = (50ULL * NSEC_PER_MSEC),
LAVD_SYS_STAT_DECAY_TIMES = (2ULL * LAVD_TIME_ONE_SEC) / LAVD_SYS_STAT_INTERVAL_NS,
LAVD_CC_PER_CORE_MAX_CTUIL = 500, /* maximum per-core CPU utilization */
LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */
LAVD_CC_NR_ACTIVE_MIN = 1, /* num of mininum active cores */
@ -122,6 +123,16 @@ struct sys_stat {
volatile u32 nr_violation; /* number of utilization violation */
volatile u32 nr_active; /* number of active cores */
volatile u64 nr_sched; /* total scheduling so far */
volatile u64 nr_migration; /* number of task migration */
volatile u64 nr_preemption; /* number of preemption */
volatile u64 nr_greedy; /* number of greedy tasks scheduled */
volatile u64 nr_perf_cri; /* number of performance-critical tasks scheduled */
volatile u64 nr_lat_cri; /* number of latency-critical tasks scheduled */
volatile u64 nr_big; /* scheduled on big core */
volatile u64 nr_pc_on_big; /* performance-critical tasks scheduled on big core */
volatile u64 nr_lc_on_big; /* latency-critical tasks scheduled on big core */
};
/*
@ -169,7 +180,7 @@ struct cpu_ctx {
*/
volatile u32 max_lat_cri; /* maximum latency criticality */
volatile u32 sum_lat_cri; /* sum of latency criticality */
volatile u32 sched_nr; /* number of schedules */
volatile u32 nr_sched; /* number of schedules */
/*
* Information used to keep track of performance criticality
@ -205,6 +216,15 @@ struct cpu_ctx {
struct bpf_cpumask __kptr *tmp_o_mask; /* temporary cpu mask */
struct bpf_cpumask __kptr *tmp_t_mask; /* temporary cpu mask */
struct bpf_cpumask __kptr *tmp_t2_mask; /* temporary cpu mask */
/*
* Information for statistics.
*/
volatile u32 nr_migration; /* number of migrations */
volatile u32 nr_preemption; /* number of migrations */
volatile u32 nr_greedy; /* number of greedy tasks scheduled */
volatile u32 nr_perf_cri;
volatile u32 nr_lat_cri;
} __attribute__((aligned(CACHELINE_SIZE)));
/*
@ -242,12 +262,18 @@ struct task_ctx {
volatile s32 victim_cpu;
u16 slice_boost_prio; /* how many times a task fully consumed the slice */
u8 wakeup_ft; /* regular wakeup = 1, sync wakeup = 2 */
/*
* Task's performance criticality
*/
u8 on_big; /* executable on a big core */
u8 on_little; /* executable on a little core */
u32 perf_cri; /* performance criticality of a task */
/*
* Information for statistics collection
*/
u32 cpu_id; /* CPU ID scheduled on */
};
/*

View File

@ -240,6 +240,18 @@ const volatile bool is_autopilot_on;
const volatile u32 is_smt_active;
const volatile u8 verbose;
/*
* Statistics
*/
volatile int power_mode;
volatile u64 last_power_mode_clk;
volatile u64 performance_mode_ns;
volatile u64 balanced_mode_ns;
volatile u64 powersave_mode_ns;
/*
* Exit infomation
*/
UEI_DEFINE(uei);
#define debugln(fmt, ...) \
@ -320,6 +332,7 @@ struct {
static u16 get_nice_prio(struct task_struct *p);
static int reinit_active_cpumask_for_performance(void);
static void update_power_mode_time(void);
static u64 sigmoid_u64(u64 v, u64 max)
{
@ -582,7 +595,15 @@ struct sys_stat_ctx {
s32 max_lat_cri;
s32 avg_lat_cri;
u64 sum_lat_cri;
u32 sched_nr;
u32 nr_sched;
u32 nr_migration;
u32 nr_preemption;
u32 nr_greedy;
u32 nr_perf_cri;
u32 nr_lat_cri;
u32 nr_big;
u32 nr_pc_on_big;
u32 nr_lc_on_big;
u64 sum_perf_cri;
u32 avg_perf_cri;
u64 new_util;
@ -618,6 +639,30 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
c->load_actual += cpuc->load_actual;
c->load_run_time_ns += cpuc->load_run_time_ns;
c->tot_svc_time += cpuc->tot_svc_time;
cpuc->tot_svc_time = 0;
/*
* Accumulate statistics.
*/
if (cpuc->big_core) {
c->nr_big += cpuc->nr_sched;
c->nr_pc_on_big += cpuc->nr_perf_cri;
c->nr_lc_on_big += cpuc->nr_lat_cri;
}
c->nr_perf_cri += cpuc->nr_perf_cri;
cpuc->nr_perf_cri = 0;
c->nr_lat_cri += cpuc->nr_lat_cri;
cpuc->nr_lat_cri = 0;
c->nr_migration += cpuc->nr_migration;
cpuc->nr_migration = 0;
c->nr_preemption += cpuc->nr_preemption;
cpuc->nr_preemption = 0;
c->nr_greedy += cpuc->nr_greedy;
cpuc->nr_greedy = 0;
/*
* Accumulate task's latency criticlity information.
@ -629,8 +674,8 @@ static void collect_sys_stat(struct sys_stat_ctx *c)
c->sum_lat_cri += cpuc->sum_lat_cri;
cpuc->sum_lat_cri = 0;
c->sched_nr += cpuc->sched_nr;
cpuc->sched_nr = 0;
c->nr_sched += cpuc->nr_sched;
cpuc->nr_sched = 0;
if (cpuc->max_lat_cri > c->max_lat_cri)
c->max_lat_cri = cpuc->max_lat_cri;
@ -701,7 +746,7 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
c->compute_total = 0;
c->new_util = (c->compute_total * LAVD_CPU_UTIL_MAX)/c->duration_total;
if (c->sched_nr == 0) {
if (c->nr_sched == 0) {
/*
* When a system is completely idle, it is indeed possible
* nothing scheduled for an interval.
@ -711,13 +756,15 @@ static void calc_sys_stat(struct sys_stat_ctx *c)
c->avg_perf_cri = c->stat_cur->avg_perf_cri;
}
else {
c->avg_lat_cri = c->sum_lat_cri / c->sched_nr;
c->avg_perf_cri = c->sum_perf_cri / c->sched_nr;
c->avg_lat_cri = c->sum_lat_cri / c->nr_sched;
c->avg_perf_cri = c->sum_perf_cri / c->nr_sched;
}
}
static void update_sys_stat_next(struct sys_stat_ctx *c)
{
static int cnt = 0;
/*
* Update the CPU utilization to the next version.
*/
@ -741,11 +788,45 @@ static void update_sys_stat_next(struct sys_stat_ctx *c)
stat_next->nr_violation =
calc_avg32(stat_cur->nr_violation, c->nr_violation);
stat_next->avg_svc_time = (c->sched_nr == 0) ? 0 :
c->tot_svc_time / c->sched_nr;
stat_next->avg_svc_time = (c->nr_sched == 0) ? 0 :
c->tot_svc_time / c->nr_sched;
stat_next->nr_queued_task =
calc_avg(stat_cur->nr_queued_task, c->nr_queued_task);
/*
* Half the statistics every minitue so the statistics hold the
* information on a few minutes.
*/
if (cnt++ == LAVD_SYS_STAT_DECAY_TIMES) {
cnt = 0;
stat_next->nr_sched >>= 1;
stat_next->nr_migration >>= 1;
stat_next->nr_preemption >>= 1;
stat_next->nr_greedy >>= 1;
stat_next->nr_perf_cri >>= 1;
stat_next->nr_lat_cri >>= 1;
stat_next->nr_big >>= 1;
stat_next->nr_pc_on_big >>= 1;
stat_next->nr_lc_on_big >>= 1;
__sync_fetch_and_sub(&performance_mode_ns, performance_mode_ns/2);
__sync_fetch_and_sub(&balanced_mode_ns, balanced_mode_ns/2);
__sync_fetch_and_sub(&powersave_mode_ns, powersave_mode_ns/2);
}
stat_next->nr_sched += c->nr_sched;
stat_next->nr_migration += c->nr_migration;
stat_next->nr_preemption += c->nr_preemption;
stat_next->nr_greedy += c->nr_greedy;
stat_next->nr_perf_cri += c->nr_perf_cri;
stat_next->nr_lat_cri += c->nr_lat_cri;
stat_next->nr_big += c->nr_big;
stat_next->nr_pc_on_big += c->nr_pc_on_big;
stat_next->nr_lc_on_big += c->nr_lc_on_big;
update_power_mode_time();
}
static void do_update_sys_stat(void)
@ -905,21 +986,49 @@ unlock_out:
bpf_rcu_read_unlock();
}
int do_set_power_profile(s32 power_mode, int util)
static void update_power_mode_time(void)
{
static s32 cur_mode = LAVD_PM_MAX;
u64 now = bpf_ktime_get_ns();
u64 delta;
if (last_power_mode_clk == 0)
last_power_mode_clk = now;
delta = now - last_power_mode_clk;
last_power_mode_clk = now;
switch (power_mode) {
case LAVD_PM_PERFORMANCE:
__sync_fetch_and_add(&performance_mode_ns, delta);
break;
case LAVD_PM_BALANCED:
__sync_fetch_and_add(&balanced_mode_ns, delta);
break;
case LAVD_PM_POWERSAVE:
__sync_fetch_and_add(&powersave_mode_ns, delta);
break;
}
}
static int do_set_power_profile(s32 pm, int util)
{
/*
* Skip setting the mode if alreay in the same mode.
*/
if (cur_mode == power_mode)
if (power_mode == pm)
return 0;
cur_mode = power_mode;
/*
* Update power mode time
*/
update_power_mode_time();
power_mode = pm;
/*
* Change the power mode.
*/
switch (power_mode) {
switch (pm) {
case LAVD_PM_PERFORMANCE:
no_core_compaction = true;
no_freq_scaling = true;
@ -1274,6 +1383,7 @@ static void update_stat_for_running(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
struct sys_stat *stat_cur = get_sys_stat_cur();
u64 wait_period, interval;
u64 now = bpf_ktime_get_ns();
u64 wait_freq_ft, wake_freq_ft, perf_cri;
@ -1306,7 +1416,7 @@ static void update_stat_for_running(struct task_struct *p,
if (cpuc->max_lat_cri < taskc->lat_cri)
cpuc->max_lat_cri = taskc->lat_cri;
cpuc->sum_lat_cri += taskc->lat_cri;
cpuc->sched_nr++;
cpuc->nr_sched++;
/*
* It is clear there is no need to consider the suspended duration
@ -1345,6 +1455,30 @@ static void update_stat_for_running(struct task_struct *p,
* Update task state when starts running.
*/
taskc->last_running_clk = now;
/*
* Update statistics information.
*/
if (taskc->cpu_id != cpuc->cpu_id) {
taskc->cpu_id = cpuc->cpu_id;
cpuc->nr_migration++;
}
if (taskc->victim_cpu >= 0)
cpuc->nr_preemption++;
if (is_lat_cri(taskc, stat_cur)) {
cpuc->nr_lat_cri++;
// debugln("------------------------ lc = %llu", cpuc->nr__cri);
}
if (is_perf_cri(taskc, stat_cur)) {
cpuc->nr_perf_cri++;
// debugln("------------------------ pc = %llu", cpuc->nr_perf_cri);
}
if (is_greedy(taskc))
cpuc->nr_greedy++;
}
static u64 calc_svc_time(struct task_struct *p, struct task_ctx *taskc)