mirror of
https://github.com/sched-ext/scx.git
synced 2024-11-28 13:40:28 +00:00
scx_bpfland: introduce performance/powersave primary domain
The primary scheduling domain represents a group of CPUs in the system where the scheduler will initially attempt to assign tasks. Tasks will only be dispatched to CPUs within this primary domain until they are fully utilized, after which tasks may overflow to other available CPUs. The primary scheduling domain can defined using the option `--primary-domain CPUMASK` (by default all the CPUs in the system are used as primary domain). This change introduces two new special values for the CPUMASK argument: - `performance`: automatically detect the fastest CPUs in the system and use them as primary scheduling domain, - `powersave`: automatically detect the slowest CPUs in the system and use them as primary scheduling domain. The current logic only supports creating two groups: fast and slow CPUs. The fast CPU group is created by excluding CPUs with the lowest frequency from the overall set, which means that within the fast CPU group, CPUs may have different maximum frequencies. When using the `performance` mode the fast CPUs will be used as primary domain, whereas in `powersave` mode, the slow CPUs will be used instead. This option is particularly useful in hybrid architectures (with P-cores and E-cores), as it allows the use of bpfland to prioritize task scheduling on either P-cores or E-cores, depending on the desired performance profile. Example: - Dell Precision 5480 - CPU: 13th Gen Intel(R) Core(TM) i7-13800H - P-cores: 0-11 / max freq: 5.2GHz - E-cores: 12-19 / max freq: 4.0GHz $ scx_bpfland --primary-domain performance 0[||||||||| 24.5%] 10[|||||||| 22.8%] 1[|||||| 14.9%] 11[||||||||||||| 36.9%] 2[|||||| 16.2%] 12[ 0.0%] 3[||||||||| 25.3%] 13[ 0.0%] 4[||||||||||| 33.3%] 14[ 0.0%] 5[|||| 9.9%] 15[ 0.0%] 6[||||||||||| 31.5%] 16[ 0.0%] 7[||||||| 17.4%] 17[ 0.0%] 8[|||||||| 23.4%] 18[ 0.0%] 9[||||||||| 26.1%] 19[ 0.0%] Avg power consumption: 3.29W $ scx_bpfland --primary-domain powersave 0[| 2.5%] 10[ 0.0%] 1[ 0.0%] 11[ 0.0%] 2[ 0.0%] 12[|||| 8.0%] 3[ 0.0%] 13[||||||||||||||||||||| 64.2%] 4[ 0.0%] 14[|||||||||| 29.6%] 5[ 0.0%] 15[||||||||||||||||| 52.5%] 6[ 0.0%] 16[||||||||| 24.7%] 7[ 0.0%] 17[|||||||||| 30.4%] 8[ 0.0%] 18[||||||| 22.4%] 9[ 0.0%] 19[||||| 12.4%] Avg power consumption: 2.17W (Info collected from htop and turbostat) Signed-off-by: Andrea Righi <andrea.righi@linux.dev>
This commit is contained in:
parent
174993f9d2
commit
f8a2445869
@ -109,9 +109,107 @@ impl CpuMask {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_primary_cpus(powersave: bool) -> std::io::Result<Vec<usize>> {
|
||||
let cpu_base_path = "/sys/devices/system/cpu/";
|
||||
let mut cpu_freqs = Vec::new();
|
||||
|
||||
// Iterate over each CPU directory and collect CPU ID and its max frequency.
|
||||
for entry in std::fs::read_dir(cpu_base_path)? {
|
||||
let entry = match entry {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue, // Skip if there's an error
|
||||
};
|
||||
|
||||
let path = entry.path();
|
||||
if path.is_dir() && path.file_name().unwrap().to_str().unwrap_or("").starts_with("cpu") {
|
||||
if let Some(cpu_id_str) = path.file_name().unwrap().to_str().unwrap_or("").strip_prefix("cpu") {
|
||||
if let Ok(cpu_id) = cpu_id_str.parse::<usize>() {
|
||||
let max_freq_path = path.join("cpufreq/cpuinfo_max_freq");
|
||||
if max_freq_path.exists() {
|
||||
if let Ok(max_freq) = std::fs::read_to_string(&max_freq_path) {
|
||||
if let Ok(freq) = max_freq.trim().parse::<u64>() {
|
||||
cpu_freqs.push((cpu_id, freq));
|
||||
} else {
|
||||
// warn!("failed to parse frequency for cpu{}", cpu_id);
|
||||
}
|
||||
} else {
|
||||
// warn!("failed to read {}", max_freq_path.display());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if cpu_freqs.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
// Find the smallest maximum frequency.
|
||||
let min_freq = cpu_freqs.iter().map(|&(_, freq)| freq).min().unwrap();
|
||||
|
||||
// Check if all CPUs have the smallest frequency.
|
||||
let all_have_min_freq = cpu_freqs.iter().all(|&(_, freq)| freq == min_freq);
|
||||
|
||||
let selected_cpu_ids: Vec<usize> = if all_have_min_freq {
|
||||
// If all CPUs have the smallest frequency, return all CPU IDs.
|
||||
cpu_freqs.into_iter().map(|(cpu_id, _)| cpu_id).collect()
|
||||
} else if powersave {
|
||||
// If powersave is true, return the CPUs with the smallest frequency.
|
||||
cpu_freqs.into_iter()
|
||||
.filter(|&(_, freq)| freq == min_freq)
|
||||
.map(|(cpu_id, _)| cpu_id)
|
||||
.collect()
|
||||
} else {
|
||||
// If powersave is false, return the CPUs with the highest frequency.
|
||||
cpu_freqs.into_iter()
|
||||
.filter(|&(_, freq)| freq != min_freq)
|
||||
.map(|(cpu_id, _)| cpu_id)
|
||||
.collect()
|
||||
};
|
||||
|
||||
Ok(selected_cpu_ids)
|
||||
}
|
||||
|
||||
// Convert an array of CPUs to the corresponding cpumask of any arbitrary size.
|
||||
fn cpus_to_cpumask(cpus: &Vec<usize>) -> String {
|
||||
if cpus.is_empty() {
|
||||
return String::from("0x0");
|
||||
}
|
||||
|
||||
// Determine the maximum CPU ID to create a sufficiently large byte vector.
|
||||
let max_cpu_id = *cpus.iter().max().unwrap();
|
||||
|
||||
// Create a byte vector with enough bytes to cover all CPU IDs.
|
||||
let mut bitmask = vec![0u8; (max_cpu_id + 1 + 7) / 8];
|
||||
|
||||
// Set the appropriate bits for each CPU ID.
|
||||
for cpu_id in cpus {
|
||||
let byte_index = cpu_id / 8;
|
||||
let bit_index = cpu_id % 8;
|
||||
bitmask[byte_index] |= 1 << bit_index;
|
||||
}
|
||||
|
||||
// Convert the byte vector to a hexadecimal string.
|
||||
let hex_str: String = bitmask.iter()
|
||||
.rev()
|
||||
.map(|byte| format!("{:02x}", byte))
|
||||
.collect();
|
||||
|
||||
format!("0x{}", hex_str)
|
||||
}
|
||||
|
||||
// Custom parser function for cpumask using CpuMask's from_str method
|
||||
fn parse_cpumask(hex_str: &str) -> Result<CpuMask, std::num::ParseIntError> {
|
||||
CpuMask::from_str(hex_str)
|
||||
fn parse_cpumask(cpu_str: &str) -> Result<CpuMask, std::num::ParseIntError> {
|
||||
if cpu_str == "performance" {
|
||||
let cpus = get_primary_cpus(false).unwrap();
|
||||
CpuMask::from_str(&cpus_to_cpumask(&cpus))
|
||||
} else if cpu_str == "powersave" {
|
||||
let cpus = get_primary_cpus(true).unwrap();
|
||||
CpuMask::from_str(&cpus_to_cpumask(&cpus))
|
||||
} else {
|
||||
CpuMask::from_str(cpu_str)
|
||||
}
|
||||
}
|
||||
|
||||
/// scx_bpfland: a vruntime-based sched_ext scheduler that prioritizes interactive workloads.
|
||||
@ -159,7 +257,11 @@ struct Opts {
|
||||
/// scheduler will use to dispatch tasks, until the system becomes saturated, at which point
|
||||
/// tasks may overflow to other available CPUs.
|
||||
///
|
||||
/// (empty string = all CPUs are used for initial scheduling)
|
||||
/// Special values:
|
||||
/// - "performance" = automatically detect and use the fastest CPUs
|
||||
/// - "powersave" = automatically detect and use the slowest CPUs
|
||||
///
|
||||
/// By default all CPUs are used for the primary scheduling domain.
|
||||
#[clap(short = 'm', long, default_value = "", value_parser = parse_cpumask)]
|
||||
primary_domain: CpuMask,
|
||||
|
||||
@ -294,7 +396,7 @@ impl<'a> Scheduler<'a> {
|
||||
// Load the BPF program for validation.
|
||||
let mut skel = scx_ops_load!(skel, bpfland_ops, uei)?;
|
||||
|
||||
// Initialize primary domain CPUs.
|
||||
// Initialize the primary scheduling domain (based on the --primary-domain option).
|
||||
Self::init_primary_domain(&mut skel, &opts.primary_domain)?;
|
||||
|
||||
// Initialize L2 cache domains.
|
||||
|
Loading…
Reference in New Issue
Block a user