diff --git a/scheds/rust/scx_layered/src/bpf/cost.bpf.c b/scheds/rust/scx_layered/src/bpf/cost.bpf.c
index 6d8e5cd..a7e2e10 100644
--- a/scheds/rust/scx_layered/src/bpf/cost.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/cost.bpf.c
@@ -15,6 +15,7 @@ struct cost {
 	u32		idx;
 	bool		overflow;
 	bool		has_parent;
+	bool		drain_fallback;
 };
 
 
@@ -217,6 +218,7 @@ static int record_cpu_cost(struct cost *costc, u32 layer_id, s64 amount)
 	__sync_fetch_and_sub(&costc->budget[layer_id], amount);
 
 	if (costc->budget[layer_id] <= 0) {
+		costc->drain_fallback = true;
 		if (costc->has_parent) {
 			s64 budget = acquire_budget(costc, layer_id,
 						    costc->capacity[layer_id] + amount);
diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c
index 3ffe833..a7b1cbf 100644
--- a/scheds/rust/scx_layered/src/bpf/main.bpf.c
+++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c
@@ -1067,7 +1067,7 @@ void BPF_STRUCT_OPS(layered_enqueue, struct task_struct *p, u64 enq_flags)
 	 * usually important for system performance and responsiveness.
 	 */
 	if (!layer->preempt &&
-	    (p->flags & PF_KTHREAD) && p->nr_cpus_allowed == 1) {
+	    (p->flags & PF_KTHREAD) && p->nr_cpus_allowed < nr_possible_cpus) {
 		struct cpumask *layer_cpumask;
 
 		if (!layer->open &&
@@ -1444,6 +1444,18 @@ void BPF_STRUCT_OPS(layered_dispatch, s32 cpu, struct task_struct *prev)
 		return;
 	}
 
+	/*
+	 * Fallback DSQs don't have cost accounting. When the budget runs out
+	 * for a layer we do an extra consume of the fallback DSQ to ensure
+	 * that it doesn't stall out when the system is being saturated.
+	 */
+	if (costc->drain_fallback) {
+		costc->drain_fallback = false;
+		dsq_id = cpu_hi_fallback_dsq_id(cpu);
+		if (scx_bpf_consume(dsq_id))
+			return;
+	}
+
 	u32 my_llc_id = cpu_to_llc_id(cpu);
 
 	/* consume preempting layers first */