mirror of
https://github.com/JakeHillion/scx.git
synced 2024-11-26 19:30:24 +00:00
Merge pull request #410 from sched-ext/bpfland-smooth-perf
scx_bpfland: enhance performance consistency and predictability
This commit is contained in:
commit
86d2f50230
@ -15,7 +15,9 @@
|
||||
#define CLAMP(val, lo, hi) MIN(MAX(val, lo), hi)
|
||||
|
||||
enum consts {
|
||||
NSEC_PER_SEC = 1000000000ULL,
|
||||
NSEC_PER_USEC = 1000ULL,
|
||||
NSEC_PER_MSEC = (1000ULL * NSEC_PER_USEC),
|
||||
NSEC_PER_SEC = (1000ULL * NSEC_PER_MSEC),
|
||||
};
|
||||
|
||||
#ifndef __VMLINUX_H__
|
||||
|
@ -34,12 +34,12 @@ const volatile bool debug;
|
||||
/*
|
||||
* Default task time slice.
|
||||
*/
|
||||
const volatile u64 slice_ns = SCX_SLICE_DFL;
|
||||
const volatile u64 slice_ns = 5ULL * NSEC_PER_SEC;
|
||||
|
||||
/*
|
||||
* Time slice used when system is over commissioned.
|
||||
*/
|
||||
const volatile u64 slice_ns_min = 500000;
|
||||
const volatile u64 slice_ns_min = 500ULL * NSEC_PER_USEC;
|
||||
|
||||
/*
|
||||
* Maximum time slice lag.
|
||||
@ -50,22 +50,35 @@ const volatile u64 slice_ns_min = 500000;
|
||||
*/
|
||||
const volatile u64 slice_ns_lag;
|
||||
|
||||
/*
|
||||
* Enable built-in idle selection logic.
|
||||
*/
|
||||
const volatile bool builtin_idle;
|
||||
|
||||
/*
|
||||
* Threshold of voluntary context switches used to classify a task as
|
||||
* interactive.
|
||||
*/
|
||||
const volatile u64 nvcsw_thresh = 10;
|
||||
const volatile u64 nvcsw_thresh = 10ULL;
|
||||
|
||||
/*
|
||||
* Time threshold to prevent task starvation.
|
||||
*
|
||||
* The scheduler processes tasks from various DSQs in the following order:
|
||||
*
|
||||
* per-CPU DSQs => priority DSQ => shared DSQ
|
||||
*
|
||||
* Tasks in the shared DSQ may be starved by those in the priority DSQ, which
|
||||
* in turn may be starved by tasks in any per-CPU DSQ.
|
||||
*
|
||||
* To mitigate this, store the timestamp of the last task consumption from
|
||||
* both the priority DSQ and the shared DSQ. If the starvation_thresh_ns
|
||||
* threshold is exceeded without consuming a task, the scheduler will be
|
||||
* forced to consume a task from the corresponding DSQ.
|
||||
*/
|
||||
const volatile u64 starvation_thresh_ns = 5ULL * NSEC_PER_MSEC;
|
||||
static u64 starvation_shared_ts;
|
||||
static u64 starvation_prio_ts;
|
||||
|
||||
/*
|
||||
* Scheduling statistics.
|
||||
*/
|
||||
volatile u64 nr_direct_dispatches, nr_kthread_dispatches,
|
||||
nr_shared_dispatches, nr_prio_dispatches;
|
||||
volatile u64 nr_direct_dispatches, nr_shared_dispatches, nr_prio_dispatches;
|
||||
|
||||
/*
|
||||
* Amount of currently running tasks.
|
||||
@ -166,6 +179,19 @@ static int calloc_cpumask(struct bpf_cpumask **p_cpumask)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the state of a CPU in a cpumask.
|
||||
*/
|
||||
static bool set_cpu_state(struct bpf_cpumask *cpumask, s32 cpu, bool state)
|
||||
{
|
||||
if (!cpumask)
|
||||
return false;
|
||||
if (state)
|
||||
return bpf_cpumask_test_and_set_cpu(cpu, cpumask);
|
||||
else
|
||||
return bpf_cpumask_test_and_clear_cpu(cpu, cpumask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Exponential weighted moving average (EWMA).
|
||||
*
|
||||
@ -212,9 +238,7 @@ static inline u64 task_vtime(struct task_struct *p)
|
||||
*/
|
||||
static inline u64 task_slice(struct task_struct *p)
|
||||
{
|
||||
u64 slice = p->scx.slice;
|
||||
|
||||
return MAX(slice, slice_ns_min);
|
||||
return MAX(p->scx.slice, slice_ns_min);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -230,22 +254,6 @@ static u64 cpu_to_dsq(s32 cpu)
|
||||
return (u64)cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatch a per-CPU kthread directly to the local CPU DSQ.
|
||||
*/
|
||||
static void dispatch_kthread(struct task_struct *p, u64 enq_flags)
|
||||
{
|
||||
u64 slice = task_slice(p);
|
||||
|
||||
/*
|
||||
* Use the local CPU DSQ directly for per-CPU kthreads, to give them
|
||||
* maximum priority.
|
||||
*/
|
||||
scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice, enq_flags);
|
||||
|
||||
__sync_fetch_and_add(&nr_kthread_dispatches, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatch a task directly to the assigned CPU DSQ (used when an idle CPU is
|
||||
* found).
|
||||
@ -253,6 +261,7 @@ static void dispatch_kthread(struct task_struct *p, u64 enq_flags)
|
||||
static int dispatch_direct_cpu(struct task_struct *p, s32 cpu)
|
||||
{
|
||||
u64 slice = task_slice(p);
|
||||
u64 vtime = task_vtime(p);
|
||||
u64 dsq_id = cpu_to_dsq(cpu);
|
||||
|
||||
/*
|
||||
@ -262,14 +271,7 @@ static int dispatch_direct_cpu(struct task_struct *p, s32 cpu)
|
||||
if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* We don't need to use vtime here, because we basically dispatch one
|
||||
* task at a time when the corresponding CPU is idle.
|
||||
*
|
||||
* We could also use SCX_DSQ_LOCAL, but we want to distinguish regular
|
||||
* tasks from per-CPU kthreads to give more priority to the latter.
|
||||
*/
|
||||
scx_bpf_dispatch(p, dsq_id, slice, 0);
|
||||
scx_bpf_dispatch_vtime(p, dsq_id, slice, vtime, 0);
|
||||
__sync_fetch_and_add(&nr_direct_dispatches, 1);
|
||||
|
||||
/*
|
||||
@ -286,54 +288,20 @@ static int dispatch_direct_cpu(struct task_struct *p, s32 cpu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatch a regular task.
|
||||
*/
|
||||
static void dispatch_task(struct task_struct *p, u64 enq_flags)
|
||||
{
|
||||
u64 vtime = task_vtime(p);
|
||||
u64 slice = task_slice(p);
|
||||
struct task_ctx *tctx;
|
||||
|
||||
tctx = lookup_task_ctx(p);
|
||||
if (!tctx)
|
||||
return;
|
||||
/*
|
||||
* Always dispatch interactive tasks to the priority DSQ and regular
|
||||
* tasks to the shared DSQ.
|
||||
*/
|
||||
if (tctx->is_interactive) {
|
||||
scx_bpf_dispatch_vtime(p, PRIO_DSQ, slice, vtime, enq_flags);
|
||||
__sync_fetch_and_add(&nr_prio_dispatches, 1);
|
||||
} else {
|
||||
scx_bpf_dispatch_vtime(p, SHARED_DSQ, slice, vtime, enq_flags);
|
||||
__sync_fetch_and_add(&nr_shared_dispatches, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find an idle CPU in the system.
|
||||
*
|
||||
* NOTE: the idle CPU selection doesn't need to be formally perfect, it is
|
||||
* totally fine to accept racy conditions and potentially make mistakes, by
|
||||
* picking CPUs that are not idle or even offline, the logic has been designed
|
||||
* to handle these mistakes in favor of a more efficient response and a reduced
|
||||
* scheduling overhead.
|
||||
*/
|
||||
static s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu, u64 wake_flags)
|
||||
{
|
||||
const struct cpumask *online_cpumask, *idle_smtmask, *idle_cpumask;
|
||||
bool prev_in_cand;
|
||||
s32 cpu;
|
||||
|
||||
if (builtin_idle) {
|
||||
bool is_idle = false;
|
||||
|
||||
/*
|
||||
* Find an idle CPU using the sched_ext built-in idle selection
|
||||
* logic.
|
||||
*/
|
||||
cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle);
|
||||
if (is_idle)
|
||||
return cpu;
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Acquire the CPU masks to determine the online and idle CPUs in the
|
||||
* system.
|
||||
@ -342,14 +310,28 @@ static s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu, u64 wake_flags)
|
||||
idle_smtmask = scx_bpf_get_idle_smtmask();
|
||||
idle_cpumask = scx_bpf_get_idle_cpumask();
|
||||
|
||||
prev_in_cand = bpf_cpumask_test_cpu(prev_cpu, p->cpus_ptr);
|
||||
/*
|
||||
* For tasks that can run only on a single CPU, we can simply verify if
|
||||
* their only allowed CPU is idle.
|
||||
*/
|
||||
if (p->nr_cpus_allowed == 1) {
|
||||
cpu = bpf_cpumask_first(p->cpus_ptr);
|
||||
|
||||
if (scx_bpf_test_and_clear_cpu_idle(cpu))
|
||||
goto out_put_cpumask;
|
||||
else
|
||||
goto out_not_found;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the best idle CPU, prioritizing full idle cores in SMT systems.
|
||||
*/
|
||||
if (smt_enabled) {
|
||||
/*
|
||||
* If the task can still run on the previously used CPU and
|
||||
* it's a full-idle core, keep using it.
|
||||
*/
|
||||
if (prev_in_cand &&
|
||||
if (bpf_cpumask_test_cpu(prev_cpu, p->cpus_ptr) &&
|
||||
bpf_cpumask_test_cpu(prev_cpu, idle_smtmask) &&
|
||||
scx_bpf_test_and_clear_cpu_idle(prev_cpu)) {
|
||||
cpu = prev_cpu;
|
||||
@ -366,10 +348,11 @@ static s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu, u64 wake_flags)
|
||||
}
|
||||
|
||||
/*
|
||||
* If a full-idle core can't be found (or if it's not an SMT system)
|
||||
* If a full-idle core can't be found (or if this is not an SMT system)
|
||||
* try to re-use the same CPU, even if it's not in a full-idle core.
|
||||
*/
|
||||
if (prev_in_cand && scx_bpf_test_and_clear_cpu_idle(prev_cpu)) {
|
||||
if (bpf_cpumask_test_cpu(prev_cpu, p->cpus_ptr) &&
|
||||
scx_bpf_test_and_clear_cpu_idle(prev_cpu)) {
|
||||
cpu = prev_cpu;
|
||||
goto out_put_cpumask;
|
||||
}
|
||||
@ -390,16 +373,16 @@ static s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu, u64 wake_flags)
|
||||
struct task_ctx *tctx;
|
||||
|
||||
/*
|
||||
* If we are waking up a task and we couldn't find any idle CPU
|
||||
* to use, at least set the task as interactive, so that it can
|
||||
* be dispatched as soon as possible on the first CPU
|
||||
* available.
|
||||
* If we are waking up a task and we can't use the current CPU
|
||||
* at least set the task as interactive, so that it can be
|
||||
* dispatched as soon as possible on the first CPU available.
|
||||
*/
|
||||
tctx = lookup_task_ctx(p);
|
||||
if (tctx)
|
||||
tctx->is_interactive = true;
|
||||
}
|
||||
|
||||
out_not_found:
|
||||
/*
|
||||
* If all the previous attempts have failed, dispatch the task to the
|
||||
* first CPU that will become available.
|
||||
@ -425,10 +408,18 @@ s32 BPF_STRUCT_OPS(bpfland_select_cpu, struct task_struct *p, s32 prev_cpu, u64
|
||||
return prev_cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatch all the other tasks that were not dispatched directly in
|
||||
* select_cpu().
|
||||
*/
|
||||
void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
{
|
||||
u64 vtime = task_vtime(p);
|
||||
u64 slice = task_slice(p);
|
||||
struct task_ctx *tctx;
|
||||
|
||||
/*
|
||||
* Always dispatch per-CPU kthreads immediately.
|
||||
* Always dispatch per-CPU kthreads directly on their target CPU.
|
||||
*
|
||||
* This allows to prioritize critical kernel threads that may
|
||||
* potentially slow down the entire system if they are blocked for too
|
||||
@ -440,15 +431,26 @@ void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
* this scheduler is desktop usage, this shouldn't be a problem.
|
||||
*/
|
||||
if (is_kthread(p) && p->nr_cpus_allowed == 1) {
|
||||
dispatch_kthread(p, enq_flags);
|
||||
s32 cpu = scx_bpf_task_cpu(p);
|
||||
dispatch_direct_cpu(p, cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
tctx = lookup_task_ctx(p);
|
||||
if (!tctx)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Dispatch all the other tasks that were not dispatched directly in
|
||||
* select_cpu().
|
||||
* Always dispatch interactive tasks to the priority DSQ and regular
|
||||
* tasks to the shared DSQ.
|
||||
*/
|
||||
dispatch_task(p, enq_flags);
|
||||
if (tctx->is_interactive) {
|
||||
scx_bpf_dispatch_vtime(p, PRIO_DSQ, slice, vtime, enq_flags);
|
||||
__sync_fetch_and_add(&nr_prio_dispatches, 1);
|
||||
} else {
|
||||
scx_bpf_dispatch_vtime(p, SHARED_DSQ, slice, vtime, enq_flags);
|
||||
__sync_fetch_and_add(&nr_shared_dispatches, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -456,18 +458,17 @@ void BPF_STRUCT_OPS(bpfland_enqueue, struct task_struct *p, u64 enq_flags)
|
||||
*
|
||||
* These tasks will be consumed on other active CPUs to prevent indefinite
|
||||
* stalling.
|
||||
*
|
||||
* Return true if one task is consumed, false otherwise.
|
||||
*/
|
||||
static int dispatch_offline_cpus(s32 cpu)
|
||||
static bool consume_offline_cpus(s32 cpu)
|
||||
{
|
||||
u64 cpu_max = scx_bpf_nr_cpu_ids();
|
||||
struct bpf_cpumask *offline;
|
||||
int ret = -ENOENT;
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
|
||||
offline = offline_cpumask;
|
||||
if (!offline)
|
||||
goto out_rcu;
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Cycle through all the CPUs and evenly consume tasks from the DSQs of
|
||||
@ -482,22 +483,81 @@ static int dispatch_offline_cpus(s32 cpu)
|
||||
* This CPU is offline, if a task has been dispatched there
|
||||
* consume it immediately on the current CPU.
|
||||
*/
|
||||
if (scx_bpf_consume(cpu_to_dsq(cpu))) {
|
||||
ret = 0;
|
||||
goto out_rcu;
|
||||
}
|
||||
if (scx_bpf_consume(cpu_to_dsq(cpu)))
|
||||
return true;
|
||||
}
|
||||
out_rcu:
|
||||
bpf_rcu_read_unlock();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Consume a task from the priority DSQ, transferring it to the local CPU DSQ.
|
||||
*
|
||||
* Return true if a task is consumed, false otherwise.
|
||||
*/
|
||||
static bool consume_prio_task(u64 now)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
ret = scx_bpf_consume(PRIO_DSQ);
|
||||
if (ret)
|
||||
starvation_prio_ts = now;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Consume a task from the shared DSQ, transferring it to the local CPU DSQ.
|
||||
*
|
||||
* Return true if a task is consumed, false otherwise.
|
||||
*/
|
||||
static bool consume_regular_task(u64 now)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
ret = scx_bpf_consume(SHARED_DSQ);
|
||||
if (ret)
|
||||
starvation_shared_ts = now;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Consume tasks that are potentially starving.
|
||||
*
|
||||
* In order to limit potential starvation conditions the scheduler uses a
|
||||
* time-based threshold to ensure that at least one task from the
|
||||
* lower-priority DSQs is periodically consumed.
|
||||
*/
|
||||
static bool consume_starving_tasks(u64 now)
|
||||
{
|
||||
if (!starvation_thresh_ns)
|
||||
return false;
|
||||
|
||||
if (vtime_before(starvation_shared_ts + starvation_thresh_ns, now))
|
||||
if (consume_regular_task(now))
|
||||
return true;
|
||||
|
||||
if (vtime_before(starvation_prio_ts + starvation_thresh_ns, now))
|
||||
if (consume_prio_task(now))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(bpfland_dispatch, s32 cpu, struct task_struct *prev)
|
||||
{
|
||||
u64 now = bpf_ktime_get_ns();
|
||||
|
||||
/*
|
||||
* First consume directly dispatched tasks, so that they can
|
||||
* immediately use the CPU assigned in select_cpu().
|
||||
* Make sure we are not staving tasks from the lower priority DSQs.
|
||||
*/
|
||||
if (consume_starving_tasks(now))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Consume directly dispatched tasks, so that they can immediately use
|
||||
* the CPU assigned in select_cpu().
|
||||
*/
|
||||
if (scx_bpf_consume(cpu_to_dsq(cpu)))
|
||||
return;
|
||||
@ -506,28 +566,19 @@ void BPF_STRUCT_OPS(bpfland_dispatch, s32 cpu, struct task_struct *prev)
|
||||
* Try also to steal tasks directly dispatched to CPUs that have gone
|
||||
* offline (this allows to prevent indefinite task stalls).
|
||||
*/
|
||||
if (!dispatch_offline_cpus(cpu))
|
||||
if (consume_offline_cpus(cpu))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Then always consume interactive tasks before regular tasks.
|
||||
*
|
||||
* This is fine and we shouldn't have starvation, because interactive
|
||||
* tasks are classified by their amount of voluntary context switches,
|
||||
* so they should naturally release the CPU quickly and give a chance
|
||||
* to the regular tasks to run.
|
||||
*
|
||||
* TODO: Add a tunable setting to limit the number of priority tasks
|
||||
* dispatched. Once this limit is reached, at least one regular task
|
||||
* must be dispatched.
|
||||
*/
|
||||
if (scx_bpf_consume(PRIO_DSQ))
|
||||
if (consume_prio_task(now))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Lastly, consume regular tasks from the shared DSQ.
|
||||
*/
|
||||
scx_bpf_consume(SHARED_DSQ);
|
||||
consume_regular_task(now);
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(bpfland_running, struct task_struct *p)
|
||||
@ -536,6 +587,13 @@ void BPF_STRUCT_OPS(bpfland_running, struct task_struct *p)
|
||||
if (vtime_before(vtime_now, p->scx.dsq_vtime))
|
||||
vtime_now = p->scx.dsq_vtime;
|
||||
|
||||
/*
|
||||
* Ensure time slice never exceeds slice_ns when a task is started on a
|
||||
* CPU.
|
||||
*/
|
||||
if (p->scx.slice > slice_ns)
|
||||
p->scx.slice = slice_ns;
|
||||
|
||||
__sync_fetch_and_add(&nr_running, 1);
|
||||
}
|
||||
|
||||
@ -604,37 +662,16 @@ void BPF_STRUCT_OPS(bpfland_enable, struct task_struct *p)
|
||||
p->scx.dsq_vtime = vtime_now;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the offline state of a CPU, updating the global offline cpumask.
|
||||
*/
|
||||
static void set_cpu_offline(s32 cpu, bool state)
|
||||
{
|
||||
struct bpf_cpumask *offline;
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
|
||||
offline = offline_cpumask;
|
||||
if (!offline)
|
||||
goto out_rcu;
|
||||
if (state)
|
||||
bpf_cpumask_set_cpu(cpu, offline);
|
||||
else
|
||||
bpf_cpumask_clear_cpu(cpu, offline);
|
||||
|
||||
out_rcu:
|
||||
bpf_rcu_read_unlock();
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(bpfland_cpu_online, s32 cpu)
|
||||
{
|
||||
/* Set the CPU state to offline */
|
||||
set_cpu_offline(cpu, false);
|
||||
set_cpu_state(offline_cpumask, cpu, false);
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(bpfland_cpu_offline, s32 cpu)
|
||||
{
|
||||
/* Set the CPU state to online */
|
||||
set_cpu_offline(cpu, true);
|
||||
set_cpu_state(offline_cpumask, cpu, true);
|
||||
}
|
||||
|
||||
s32 BPF_STRUCT_OPS(bpfland_init_task, struct task_struct *p,
|
||||
@ -649,7 +686,7 @@ s32 BPF_STRUCT_OPS(bpfland_init_task, struct task_struct *p,
|
||||
|
||||
s32 BPF_STRUCT_OPS_SLEEPABLE(bpfland_init)
|
||||
{
|
||||
struct bpf_cpumask *offline;
|
||||
struct bpf_cpumask *mask;
|
||||
int err;
|
||||
s32 cpu;
|
||||
|
||||
@ -678,14 +715,12 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(bpfland_init)
|
||||
}
|
||||
|
||||
/* Initialize the offline CPU mask */
|
||||
bpf_rcu_read_lock();
|
||||
err = calloc_cpumask(&offline_cpumask);
|
||||
offline = offline_cpumask;
|
||||
if (err || !offline)
|
||||
err -ENOMEM;
|
||||
bpf_rcu_read_unlock();
|
||||
mask = offline_cpumask;
|
||||
if (!mask)
|
||||
err = -ENOMEM;
|
||||
|
||||
return 0;
|
||||
return err;
|
||||
}
|
||||
|
||||
void BPF_STRUCT_OPS(bpfland_exit, struct scx_exit_info *ei)
|
||||
|
@ -59,7 +59,7 @@ struct Opts {
|
||||
exit_dump_len: u32,
|
||||
|
||||
/// Maximum scheduling slice duration in microseconds.
|
||||
#[clap(short = 's', long, default_value = "20000")]
|
||||
#[clap(short = 's', long, default_value = "5000")]
|
||||
slice_us: u64,
|
||||
|
||||
/// Minimum scheduling slice duration in microseconds.
|
||||
@ -78,9 +78,10 @@ struct Opts {
|
||||
#[clap(short = 'c', long, default_value = "10")]
|
||||
nvcsw_thresh: u64,
|
||||
|
||||
/// Enable direct dispatch via sched_ext built-in idle selection logic.
|
||||
#[clap(short = 'i', long, action = clap::ArgAction::SetTrue)]
|
||||
builtin_idle: bool,
|
||||
/// Prevent the starvation making sure that at least one lower priority task is scheduled every
|
||||
/// starvation_thresh_us (0 = disable starvation prevention).
|
||||
#[clap(short = 't', long, default_value = "5000")]
|
||||
starvation_thresh_us: u64,
|
||||
|
||||
/// Enable the Prometheus endpoint for metrics on port 9000.
|
||||
#[clap(short = 'p', long, action = clap::ArgAction::SetTrue)]
|
||||
@ -101,7 +102,6 @@ struct Opts {
|
||||
|
||||
struct Metrics {
|
||||
nr_running: Gauge,
|
||||
nr_kthread_dispatches: Gauge,
|
||||
nr_direct_dispatches: Gauge,
|
||||
nr_prio_dispatches: Gauge,
|
||||
nr_shared_dispatches: Gauge,
|
||||
@ -113,9 +113,6 @@ impl Metrics {
|
||||
nr_running: gauge!(
|
||||
"nr_running", "info" => "Number of running tasks"
|
||||
),
|
||||
nr_kthread_dispatches: gauge!(
|
||||
"nr_kthread_dispatches", "info" => "Number of kthread dispatches"
|
||||
),
|
||||
nr_direct_dispatches: gauge!(
|
||||
"nr_direct_dispatches", "info" => "Number of direct dispatches"
|
||||
),
|
||||
@ -173,8 +170,8 @@ impl<'a> Scheduler<'a> {
|
||||
skel.rodata_mut().slice_ns = opts.slice_us * 1000;
|
||||
skel.rodata_mut().slice_ns_min = opts.slice_us_min * 1000;
|
||||
skel.rodata_mut().slice_ns_lag = opts.slice_us_lag * 1000;
|
||||
skel.rodata_mut().starvation_thresh_ns = opts.starvation_thresh_us * 1000;
|
||||
skel.rodata_mut().nvcsw_thresh = opts.nvcsw_thresh;
|
||||
skel.rodata_mut().builtin_idle = opts.builtin_idle;
|
||||
|
||||
// Attach the scheduler.
|
||||
let mut skel = scx_ops_load!(skel, bpfland_ops, uei)?;
|
||||
@ -196,18 +193,14 @@ impl<'a> Scheduler<'a> {
|
||||
}
|
||||
|
||||
fn update_stats(&mut self) {
|
||||
let nr_running = self.skel.bss().nr_running;
|
||||
let nr_cpus = libbpf_rs::num_possible_cpus().unwrap();
|
||||
let nr_kthread_dispatches = self.skel.bss().nr_kthread_dispatches;
|
||||
let nr_running = self.skel.bss().nr_running;
|
||||
let nr_direct_dispatches = self.skel.bss().nr_direct_dispatches;
|
||||
let nr_prio_dispatches = self.skel.bss().nr_prio_dispatches;
|
||||
let nr_shared_dispatches = self.skel.bss().nr_shared_dispatches;
|
||||
|
||||
// Update Prometheus statistics.
|
||||
self.metrics.nr_running.set(nr_running as f64);
|
||||
self.metrics
|
||||
.nr_kthread_dispatches
|
||||
.set(nr_kthread_dispatches as f64);
|
||||
self.metrics
|
||||
.nr_direct_dispatches
|
||||
.set(nr_direct_dispatches as f64);
|
||||
@ -219,10 +212,9 @@ impl<'a> Scheduler<'a> {
|
||||
.set(nr_shared_dispatches as f64);
|
||||
|
||||
// Log scheduling statistics.
|
||||
info!("running={}/{} nr_kthread_dispatches={} nr_direct_dispatches={} nr_prio_dispatches={} nr_shared_dispatches={}",
|
||||
info!("running={}/{} direct_dispatches={} prio_dispatches={} shared_dispatches={}",
|
||||
nr_running,
|
||||
nr_cpus,
|
||||
nr_kthread_dispatches,
|
||||
nr_direct_dispatches,
|
||||
nr_prio_dispatches,
|
||||
nr_shared_dispatches);
|
||||
|
Loading…
Reference in New Issue
Block a user