diff --git a/rust/scx_rustland_core/assets/bpf.rs b/rust/scx_rustland_core/assets/bpf.rs index a2a9b4d..f32c739 100644 --- a/rust/scx_rustland_core/assets/bpf.rs +++ b/rust/scx_rustland_core/assets/bpf.rs @@ -57,9 +57,6 @@ pub const RL_PREEMPT_CPU: u64 = bpf_intf::RL_PREEMPT_CPU as u64; /// objects) and dispatch tasks (in the form of DispatchedTask objects), using respectively the /// methods dequeue_task() and dispatch_task(). /// -/// The CPU ownership map can be accessed using the method get_cpu_pid(), this also allows to keep -/// track of the idle and busy CPUs, with the corresponding PIDs associated to them. -/// /// BPF counters and statistics can be accessed using the methods nr_*_mut(), in particular /// nr_queued_mut() and nr_scheduled_mut() can be updated to notify the BPF component if the /// user-space scheduler has some pending work to do or not. @@ -384,14 +381,6 @@ impl<'cb> BpfScheduler<'cb> { unsafe { pthread_setschedparam(pthread_self(), SCHED_EXT, ¶m as *const sched_param) } } - // Get the pid running on a certain CPU, if no tasks are running return 0. - #[allow(dead_code)] - pub fn get_cpu_pid(&self, cpu: i32) -> u32 { - let cpu_map_ptr = self.skel.bss().cpu_map.as_ptr(); - - unsafe { *cpu_map_ptr.offset(cpu as isize) } - } - // Receive a task to be scheduled from the BPF dispatcher. // // NOTE: if task.cpu is negative the task is exiting and it does not require to be scheduled. diff --git a/rust/scx_rustland_core/assets/bpf/main.bpf.c b/rust/scx_rustland_core/assets/bpf/main.bpf.c index 36d5def..f2be7c0 100644 --- a/rust/scx_rustland_core/assets/bpf/main.bpf.c +++ b/rust/scx_rustland_core/assets/bpf/main.bpf.c @@ -274,39 +274,6 @@ struct { */ #define USERSCHED_TIMER_NS (NSEC_PER_SEC / 10) -/* - * Map of allocated CPUs. - */ -volatile u32 cpu_map[MAX_CPUS]; - -/* - * Assign a task to a CPU (used in .running() and .stopping()). - * - * If pid == 0 the CPU will be considered idle. - */ -static void set_cpu_owner(u32 cpu, u32 pid) -{ - if (cpu >= MAX_CPUS) { - scx_bpf_error("Invalid cpu: %d", cpu); - return; - } - cpu_map[cpu] = pid; -} - -/* - * Get the pid of the task that is currently running on @cpu. - * - * Return 0 if the CPU is idle. - */ -static __maybe_unused u32 get_cpu_owner(u32 cpu) -{ - if (cpu >= MAX_CPUS) { - scx_bpf_error("Invalid cpu: %d", cpu); - return 0; - } - return cpu_map[cpu]; -} - /* * Return true if the target task @p is the user-space scheduler. */ @@ -937,10 +904,8 @@ void BPF_STRUCT_OPS(rustland_running, struct task_struct *p) * Mark the CPU as busy by setting the pid as owner (ignoring the * user-space scheduler). */ - if (!is_usersched_task(p)) { - set_cpu_owner(cpu, p->pid); + if (!is_usersched_task(p)) __sync_fetch_and_add(&nr_running, 1); - } } /* @@ -955,7 +920,6 @@ void BPF_STRUCT_OPS(rustland_stopping, struct task_struct *p, bool runnable) * Mark the CPU as idle by setting the owner to 0. */ if (!is_usersched_task(p)) { - set_cpu_owner(scx_bpf_task_cpu(p), 0); __sync_fetch_and_sub(&nr_running, 1); /* * Kick the user-space scheduler immediately when a task diff --git a/scheds/rust/scx_rlfifo/src/main.rs b/scheds/rust/scx_rlfifo/src/main.rs index e76310a..332e2f6 100644 --- a/scheds/rust/scx_rlfifo/src/main.rs +++ b/scheds/rust/scx_rlfifo/src/main.rs @@ -9,7 +9,6 @@ pub mod bpf_intf; mod bpf; use bpf::*; -use scx_utils::Topology; use scx_utils::UserExitInfo; use std::sync::atomic::AtomicBool; diff --git a/scheds/rust/scx_rustland/src/main.rs b/scheds/rust/scx_rustland/src/main.rs index fae5ff9..9c05f28 100644 --- a/scheds/rust/scx_rustland/src/main.rs +++ b/scheds/rust/scx_rustland/src/main.rs @@ -301,31 +301,6 @@ impl<'a> Scheduler<'a> { }) } - // Return the amount of idle cores. - // - // On SMT systems consider only one CPU for each fully idle core, to avoid disrupting - // performnance too much by running multiple tasks in the same core. - fn nr_idle_cpus(&mut self) -> usize { - let mut idle_cpu_count = 0; - - // Count the number of cores where all the CPUs are idle. - for core in self.topo_map.iter() { - let mut all_idle = true; - for cpu_id in core { - if self.bpf.get_cpu_pid(*cpu_id as i32) != 0 { - all_idle = false; - break; - } - } - - if all_idle { - idle_cpu_count += 1; - } - } - - idle_cpu_count - } - // Return current timestamp in ns. fn now() -> u64 { let ts = SystemTime::now() @@ -427,57 +402,47 @@ impl<'a> Scheduler<'a> { nr_queued + nr_scheduled } - // Return the target time slice, proportionally adjusted based on the total amount of tasks - // waiting to be scheduled (more tasks waiting => shorter time slice). - // Dispatch tasks from the task pool in order (sending them to the BPF dispatcher). - fn dispatch_tasks(&mut self) { - // Dispatch only a batch of tasks equal to the amount of idle CPUs in the system. - // - // This allows to have more tasks sitting in the task pool, reducing the pressure on the - // dispatcher queues and giving a chance to higher priority tasks to come in and get - // dispatched earlier, mitigating potential priority inversion issues. - let nr_tasks = self.nr_idle_cpus().max(1); - for _ in 0..nr_tasks { - match self.task_pool.pop() { - Some(task) => { - // Update global minimum vruntime. - if self.min_vruntime < task.vruntime { - self.min_vruntime = task.vruntime; - } + // Dispatch the first task from the task pool (sending them to the BPF dispatcher). + fn dispatch_task(&mut self) { + match self.task_pool.pop() { + Some(task) => { + // Update global minimum vruntime. + if self.min_vruntime < task.vruntime { + self.min_vruntime = task.vruntime; + } - // Scale time slice based on the amount of tasks that are waiting in the - // scheduler's queue and the previously unused time slice budget, but make sure - // to assign at least slice_us_min. - let slice_ns = (self.slice_ns / (self.nr_tasks_waiting() + 1)).max(self.slice_ns_min); + // Scale time slice based on the amount of tasks that are waiting in the + // scheduler's queue and the previously unused time slice budget, but make sure + // to assign at least slice_us_min. + let slice_ns = (self.slice_ns / (self.nr_tasks_waiting() + 1)).max(self.slice_ns_min); - // Create a new task to dispatch. - let mut dispatched_task = DispatchedTask::new(&task.qtask); + // Create a new task to dispatch. + let mut dispatched_task = DispatchedTask::new(&task.qtask); - // Assign the time slice to the task. - dispatched_task.set_slice_ns(slice_ns); + // Assign the time slice to the task. + dispatched_task.set_slice_ns(slice_ns); - // Dispatch task on the first CPU available if it is classified as - // interactive, non-interactive tasks will continue to run on the same CPU. - if task.is_interactive { - dispatched_task.set_flag(RL_CPU_ANY); - } + // Dispatch task on the first CPU available if it is classified as + // interactive, non-interactive tasks will continue to run on the same CPU. + if task.is_interactive { + dispatched_task.set_flag(RL_CPU_ANY); + } - // Send task to the BPF dispatcher. - match self.bpf.dispatch_task(&dispatched_task) { - Ok(_) => {} - Err(_) => { - /* - * Re-add the task to the dispatched list in case of failure and stop - * dispatching. - */ - self.task_pool.push(task); - break; - } + // Send task to the BPF dispatcher. + match self.bpf.dispatch_task(&dispatched_task) { + Ok(_) => {} + Err(_) => { + /* + * Re-add the task to the dispatched list in case of failure and stop + * dispatching. + */ + self.task_pool.push(task); } } - None => break, } + None => {} } + // Update nr_scheduled to notify the dispatcher that all the tasks received by the // scheduler has been dispatched, so there is no reason to re-activate the scheduler, // unless more tasks are queued. @@ -489,7 +454,7 @@ impl<'a> Scheduler<'a> { // and dispatch them to the BPF part via the dispatched list). fn schedule(&mut self) { self.drain_queued_tasks(); - self.dispatch_tasks(); + self.dispatch_task(); // Yield to avoid using too much CPU from the scheduler itself. thread::yield_now(); @@ -518,37 +483,6 @@ impl<'a> Scheduler<'a> { } } - // Get the current CPU where the scheduler is running. - fn get_current_cpu() -> io::Result { - // Open /proc/self/stat file - let path = Path::new("/proc/self/stat"); - let mut file = File::open(path)?; - - // Read the content of the file into a String - let mut content = String::new(); - file.read_to_string(&mut content)?; - - // Split the content into fields using whitespace as the delimiter - let fields: Vec<&str> = content.split_whitespace().collect(); - - // Parse the 39th field as an i32 and return it. - if let Some(field) = fields.get(38) { - if let Ok(value) = field.parse::() { - Ok(value) - } else { - Err(io::Error::new( - io::ErrorKind::InvalidData, - "Unable to parse current CPU information as i32", - )) - } - } else { - Err(io::Error::new( - io::ErrorKind::InvalidData, - "Unable to get current CPU information", - )) - } - } - // Print critical user-space scheduler statistics. fn print_faults(&mut self) { // Get counters of scheduling failures. @@ -620,23 +554,6 @@ impl<'a> Scheduler<'a> { // Show total page faults of the user-space scheduler. self.print_faults(); - // Show tasks that are currently running on each core and CPU. - let sched_cpu = match Self::get_current_cpu() { - Ok(cpu_info) => cpu_info, - Err(_) => -1, - }; - info!("Running tasks:"); - for (core_id, core) in self.topo_map.iter().enumerate() { - for cpu_id in core { - let pid = if *cpu_id as i32 == sched_cpu { - "[self]".to_string() - } else { - self.bpf.get_cpu_pid(*cpu_id as i32).to_string() - }; - info!(" core {:2} cpu {:2} pid={}", core_id, cpu_id, pid); - } - } - log::logger().flush(); }