diff --git a/scheds/rust/scx_lavd/src/bpf/intf.h b/scheds/rust/scx_lavd/src/bpf/intf.h index 5029147..a5354aa 100644 --- a/scheds/rust/scx_lavd/src/bpf/intf.h +++ b/scheds/rust/scx_lavd/src/bpf/intf.h @@ -81,6 +81,7 @@ enum consts { LAVD_PREEMPT_TICK_MARGIN = (100ULL * NSEC_PER_USEC), LAVD_SYS_STAT_INTERVAL_NS = (50ULL * NSEC_PER_MSEC), + LAVD_SYS_STAT_DECAY_TIMES = (2ULL * LAVD_TIME_ONE_SEC) / LAVD_SYS_STAT_INTERVAL_NS, LAVD_CC_PER_CORE_MAX_CTUIL = 500, /* maximum per-core CPU utilization */ LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */ LAVD_CC_NR_ACTIVE_MIN = 1, /* num of mininum active cores */ @@ -122,6 +123,16 @@ struct sys_stat { volatile u32 nr_violation; /* number of utilization violation */ volatile u32 nr_active; /* number of active cores */ + + volatile u64 nr_sched; /* total scheduling so far */ + volatile u64 nr_migration; /* number of task migration */ + volatile u64 nr_preemption; /* number of preemption */ + volatile u64 nr_greedy; /* number of greedy tasks scheduled */ + volatile u64 nr_perf_cri; /* number of performance-critical tasks scheduled */ + volatile u64 nr_lat_cri; /* number of latency-critical tasks scheduled */ + volatile u64 nr_big; /* scheduled on big core */ + volatile u64 nr_pc_on_big; /* performance-critical tasks scheduled on big core */ + volatile u64 nr_lc_on_big; /* latency-critical tasks scheduled on big core */ }; /* @@ -169,7 +180,7 @@ struct cpu_ctx { */ volatile u32 max_lat_cri; /* maximum latency criticality */ volatile u32 sum_lat_cri; /* sum of latency criticality */ - volatile u32 sched_nr; /* number of schedules */ + volatile u32 nr_sched; /* number of schedules */ /* * Information used to keep track of performance criticality @@ -205,6 +216,15 @@ struct cpu_ctx { struct bpf_cpumask __kptr *tmp_o_mask; /* temporary cpu mask */ struct bpf_cpumask __kptr *tmp_t_mask; /* temporary cpu mask */ struct bpf_cpumask __kptr *tmp_t2_mask; /* temporary cpu mask */ + + /* + * Information for statistics. + */ + volatile u32 nr_migration; /* number of migrations */ + volatile u32 nr_preemption; /* number of migrations */ + volatile u32 nr_greedy; /* number of greedy tasks scheduled */ + volatile u32 nr_perf_cri; + volatile u32 nr_lat_cri; } __attribute__((aligned(CACHELINE_SIZE))); /* @@ -242,12 +262,18 @@ struct task_ctx { volatile s32 victim_cpu; u16 slice_boost_prio; /* how many times a task fully consumed the slice */ u8 wakeup_ft; /* regular wakeup = 1, sync wakeup = 2 */ + /* * Task's performance criticality */ u8 on_big; /* executable on a big core */ u8 on_little; /* executable on a little core */ u32 perf_cri; /* performance criticality of a task */ + + /* + * Information for statistics collection + */ + u32 cpu_id; /* CPU ID scheduled on */ }; /* diff --git a/scheds/rust/scx_lavd/src/bpf/main.bpf.c b/scheds/rust/scx_lavd/src/bpf/main.bpf.c index 88c6e2d..d28a007 100644 --- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c +++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c @@ -240,6 +240,18 @@ const volatile bool is_autopilot_on; const volatile u32 is_smt_active; const volatile u8 verbose; +/* + * Statistics + */ +volatile int power_mode; +volatile u64 last_power_mode_clk; +volatile u64 performance_mode_ns; +volatile u64 balanced_mode_ns; +volatile u64 powersave_mode_ns; + +/* + * Exit infomation + */ UEI_DEFINE(uei); #define debugln(fmt, ...) \ @@ -320,6 +332,7 @@ struct { static u16 get_nice_prio(struct task_struct *p); static int reinit_active_cpumask_for_performance(void); +static void update_power_mode_time(void); static u64 sigmoid_u64(u64 v, u64 max) { @@ -582,7 +595,15 @@ struct sys_stat_ctx { s32 max_lat_cri; s32 avg_lat_cri; u64 sum_lat_cri; - u32 sched_nr; + u32 nr_sched; + u32 nr_migration; + u32 nr_preemption; + u32 nr_greedy; + u32 nr_perf_cri; + u32 nr_lat_cri; + u32 nr_big; + u32 nr_pc_on_big; + u32 nr_lc_on_big; u64 sum_perf_cri; u32 avg_perf_cri; u64 new_util; @@ -618,6 +639,30 @@ static void collect_sys_stat(struct sys_stat_ctx *c) c->load_actual += cpuc->load_actual; c->load_run_time_ns += cpuc->load_run_time_ns; c->tot_svc_time += cpuc->tot_svc_time; + cpuc->tot_svc_time = 0; + + /* + * Accumulate statistics. + */ + if (cpuc->big_core) { + c->nr_big += cpuc->nr_sched; + c->nr_pc_on_big += cpuc->nr_perf_cri; + c->nr_lc_on_big += cpuc->nr_lat_cri; + } + c->nr_perf_cri += cpuc->nr_perf_cri; + cpuc->nr_perf_cri = 0; + + c->nr_lat_cri += cpuc->nr_lat_cri; + cpuc->nr_lat_cri = 0; + + c->nr_migration += cpuc->nr_migration; + cpuc->nr_migration = 0; + + c->nr_preemption += cpuc->nr_preemption; + cpuc->nr_preemption = 0; + + c->nr_greedy += cpuc->nr_greedy; + cpuc->nr_greedy = 0; /* * Accumulate task's latency criticlity information. @@ -629,8 +674,8 @@ static void collect_sys_stat(struct sys_stat_ctx *c) c->sum_lat_cri += cpuc->sum_lat_cri; cpuc->sum_lat_cri = 0; - c->sched_nr += cpuc->sched_nr; - cpuc->sched_nr = 0; + c->nr_sched += cpuc->nr_sched; + cpuc->nr_sched = 0; if (cpuc->max_lat_cri > c->max_lat_cri) c->max_lat_cri = cpuc->max_lat_cri; @@ -701,7 +746,7 @@ static void calc_sys_stat(struct sys_stat_ctx *c) c->compute_total = 0; c->new_util = (c->compute_total * LAVD_CPU_UTIL_MAX)/c->duration_total; - if (c->sched_nr == 0) { + if (c->nr_sched == 0) { /* * When a system is completely idle, it is indeed possible * nothing scheduled for an interval. @@ -711,13 +756,15 @@ static void calc_sys_stat(struct sys_stat_ctx *c) c->avg_perf_cri = c->stat_cur->avg_perf_cri; } else { - c->avg_lat_cri = c->sum_lat_cri / c->sched_nr; - c->avg_perf_cri = c->sum_perf_cri / c->sched_nr; + c->avg_lat_cri = c->sum_lat_cri / c->nr_sched; + c->avg_perf_cri = c->sum_perf_cri / c->nr_sched; } } static void update_sys_stat_next(struct sys_stat_ctx *c) { + static int cnt = 0; + /* * Update the CPU utilization to the next version. */ @@ -741,11 +788,45 @@ static void update_sys_stat_next(struct sys_stat_ctx *c) stat_next->nr_violation = calc_avg32(stat_cur->nr_violation, c->nr_violation); - stat_next->avg_svc_time = (c->sched_nr == 0) ? 0 : - c->tot_svc_time / c->sched_nr; + stat_next->avg_svc_time = (c->nr_sched == 0) ? 0 : + c->tot_svc_time / c->nr_sched; stat_next->nr_queued_task = calc_avg(stat_cur->nr_queued_task, c->nr_queued_task); + + + /* + * Half the statistics every minitue so the statistics hold the + * information on a few minutes. + */ + if (cnt++ == LAVD_SYS_STAT_DECAY_TIMES) { + cnt = 0; + stat_next->nr_sched >>= 1; + stat_next->nr_migration >>= 1; + stat_next->nr_preemption >>= 1; + stat_next->nr_greedy >>= 1; + stat_next->nr_perf_cri >>= 1; + stat_next->nr_lat_cri >>= 1; + stat_next->nr_big >>= 1; + stat_next->nr_pc_on_big >>= 1; + stat_next->nr_lc_on_big >>= 1; + + __sync_fetch_and_sub(&performance_mode_ns, performance_mode_ns/2); + __sync_fetch_and_sub(&balanced_mode_ns, balanced_mode_ns/2); + __sync_fetch_and_sub(&powersave_mode_ns, powersave_mode_ns/2); + } + + stat_next->nr_sched += c->nr_sched; + stat_next->nr_migration += c->nr_migration; + stat_next->nr_preemption += c->nr_preemption; + stat_next->nr_greedy += c->nr_greedy; + stat_next->nr_perf_cri += c->nr_perf_cri; + stat_next->nr_lat_cri += c->nr_lat_cri; + stat_next->nr_big += c->nr_big; + stat_next->nr_pc_on_big += c->nr_pc_on_big; + stat_next->nr_lc_on_big += c->nr_lc_on_big; + + update_power_mode_time(); } static void do_update_sys_stat(void) @@ -905,21 +986,49 @@ unlock_out: bpf_rcu_read_unlock(); } -int do_set_power_profile(s32 power_mode, int util) +static void update_power_mode_time(void) { - static s32 cur_mode = LAVD_PM_MAX; + u64 now = bpf_ktime_get_ns(); + u64 delta; + if (last_power_mode_clk == 0) + last_power_mode_clk = now; + + delta = now - last_power_mode_clk; + last_power_mode_clk = now; + + switch (power_mode) { + case LAVD_PM_PERFORMANCE: + __sync_fetch_and_add(&performance_mode_ns, delta); + break; + case LAVD_PM_BALANCED: + __sync_fetch_and_add(&balanced_mode_ns, delta); + break; + case LAVD_PM_POWERSAVE: + __sync_fetch_and_add(&powersave_mode_ns, delta); + break; + } +} + + +static int do_set_power_profile(s32 pm, int util) +{ /* * Skip setting the mode if alreay in the same mode. */ - if (cur_mode == power_mode) + if (power_mode == pm) return 0; - cur_mode = power_mode; + + /* + * Update power mode time + */ + update_power_mode_time(); + power_mode = pm; /* * Change the power mode. */ - switch (power_mode) { + switch (pm) { case LAVD_PM_PERFORMANCE: no_core_compaction = true; no_freq_scaling = true; @@ -1274,6 +1383,7 @@ static void update_stat_for_running(struct task_struct *p, struct task_ctx *taskc, struct cpu_ctx *cpuc) { + struct sys_stat *stat_cur = get_sys_stat_cur(); u64 wait_period, interval; u64 now = bpf_ktime_get_ns(); u64 wait_freq_ft, wake_freq_ft, perf_cri; @@ -1306,7 +1416,7 @@ static void update_stat_for_running(struct task_struct *p, if (cpuc->max_lat_cri < taskc->lat_cri) cpuc->max_lat_cri = taskc->lat_cri; cpuc->sum_lat_cri += taskc->lat_cri; - cpuc->sched_nr++; + cpuc->nr_sched++; /* * It is clear there is no need to consider the suspended duration @@ -1345,6 +1455,30 @@ static void update_stat_for_running(struct task_struct *p, * Update task state when starts running. */ taskc->last_running_clk = now; + + /* + * Update statistics information. + */ + if (taskc->cpu_id != cpuc->cpu_id) { + taskc->cpu_id = cpuc->cpu_id; + cpuc->nr_migration++; + } + + if (taskc->victim_cpu >= 0) + cpuc->nr_preemption++; + + if (is_lat_cri(taskc, stat_cur)) { + cpuc->nr_lat_cri++; +// debugln("------------------------ lc = %llu", cpuc->nr__cri); + } + + if (is_perf_cri(taskc, stat_cur)) { + cpuc->nr_perf_cri++; +// debugln("------------------------ pc = %llu", cpuc->nr_perf_cri); + } + + if (is_greedy(taskc)) + cpuc->nr_greedy++; } static u64 calc_svc_time(struct task_struct *p, struct task_ctx *taskc)