diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-07-10 23:18:26 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-07-10 23:18:26 +0000 |
commit | c7c4310f7fc1485925d800628bf50b3aeab535ef (patch) | |
tree | b12aa4be0e8fb82aaaea97fb475e793e8a347c49 /kernel/sched.c | |
parent | 1ffd1d069ca4c5ffe16fea6175dab1b9bbb15820 (diff) |
Merge with Linux 2.4.0-test3-pre8. Linus has accepted most of what
I've sent him, so we're very close to full integration of the MIPS
port into his sources.
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 181 |
1 files changed, 115 insertions, 66 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 3b007e5e8..dd055d92f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -78,18 +78,20 @@ static union { } aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}}; #define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr +#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule struct kernel_stat kstat = { 0 }; #ifdef CONFIG_SMP #define idle_task(cpu) (init_tasks[cpu_number_map(cpu)]) -#define can_schedule(p) (!(p)->has_cpu) +#define can_schedule(p,cpu) ((!(p)->has_cpu) && \ + ((p)->cpus_allowed & (1 << cpu))) #else #define idle_task(cpu) (&init_task) -#define can_schedule(p) (1) +#define can_schedule(p,cpu) (1) #endif @@ -119,7 +121,7 @@ static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struc * into account). */ if (p->policy != SCHED_OTHER) { - weight = 1000 + p->rt_priority; + weight = 1000 + 2*DEF_PRIORITY + p->rt_priority; goto out; } @@ -183,87 +185,108 @@ static inline int preemption_goodness(struct task_struct * prev, struct task_str * up unlocking it early, so the caller must not unlock the * runqueue, it's always done by reschedule_idle(). */ -static inline void reschedule_idle(struct task_struct * p, unsigned long flags) +static void reschedule_idle(struct task_struct * p, unsigned long flags) { #ifdef CONFIG_SMP - int this_cpu = smp_processor_id(), target_cpu; - struct task_struct *tsk; - int cpu, best_cpu, i; + int this_cpu = smp_processor_id(); + struct task_struct *tsk, *target_tsk; + int cpu, best_cpu, i, max_prio; + cycles_t oldest_idle; /* * shortcut if the woken up task's last CPU is * idle now. */ best_cpu = p->processor; - tsk = idle_task(best_cpu); - if (cpu_curr(best_cpu) == tsk) - goto send_now; + if (can_schedule(p, best_cpu)) { + tsk = idle_task(best_cpu); + if (cpu_curr(best_cpu) == tsk) + goto send_now_idle; + + /* + * Maybe this process has enough priority to preempt + * its preferred CPU. (this is a shortcut): + */ + tsk = cpu_curr(best_cpu); + if (preemption_goodness(tsk, p, best_cpu) > 1) + goto preempt_now; + } /* * We know that the preferred CPU has a cache-affine current * process, lets try to find a new idle CPU for the woken-up - * process: + * process. Select the least recently active idle CPU. (that + * one will have the least active cache context.) Also find + * the executing process which has the least priority. */ - for (i = smp_num_cpus - 1; i >= 0; i--) { + oldest_idle = -1ULL; + target_tsk = NULL; + max_prio = 1; + + for (i = 0; i < smp_num_cpus; i++) { cpu = cpu_logical_map(i); - if (cpu == best_cpu) + if (!can_schedule(p, cpu)) continue; tsk = cpu_curr(cpu); /* - * We use the last available idle CPU. This creates + * We use the first available idle CPU. This creates * a priority list between idle CPUs, but this is not * a problem. */ - if (tsk == idle_task(cpu)) - goto send_now; - } - - /* - * No CPU is idle, but maybe this process has enough priority - * to preempt it's preferred CPU. - */ - tsk = cpu_curr(best_cpu); - if (preemption_goodness(tsk, p, best_cpu) > 0) - goto send_now; + if (tsk == idle_task(cpu)) { + if (last_schedule(cpu) < oldest_idle) { + oldest_idle = last_schedule(cpu); + target_tsk = tsk; + } + } else { + if (oldest_idle == -1ULL) { + int prio = preemption_goodness(tsk, p, cpu); - /* - * We will get here often - or in the high CPU contention - * case. No CPU is idle and this process is either lowprio or - * the preferred CPU is highprio. Try to preempt some other CPU - * only if it's RT or if it's iteractive and the preferred - * cpu won't reschedule shortly. - */ - if (p->avg_slice < cacheflush_time || (p->policy & ~SCHED_YIELD) != SCHED_OTHER) { - for (i = smp_num_cpus - 1; i >= 0; i--) { - cpu = cpu_logical_map(i); - if (cpu == best_cpu) - continue; - tsk = cpu_curr(cpu); - if (preemption_goodness(tsk, p, cpu) > 0) - goto send_now; + if (prio > max_prio) { + max_prio = prio; + target_tsk = tsk; + } + } } } + tsk = target_tsk; + if (tsk) { + if (oldest_idle != -1ULL) + goto send_now_idle; + goto preempt_now; + } spin_unlock_irqrestore(&runqueue_lock, flags); return; -send_now: - target_cpu = tsk->processor; +send_now_idle: + /* + * If need_resched == -1 then we can skip sending the IPI + * altogether, tsk->need_resched is actively watched by the + * idle thread. + */ + if (!tsk->need_resched) + smp_send_reschedule(tsk->processor); + tsk->need_resched = 1; + spin_unlock_irqrestore(&runqueue_lock, flags); + return; + +preempt_now: tsk->need_resched = 1; spin_unlock_irqrestore(&runqueue_lock, flags); /* * the APIC stuff can go outside of the lock because * it uses no task information, only CPU#. */ - if (target_cpu != this_cpu) - smp_send_reschedule(target_cpu); + if (tsk->processor != this_cpu) + smp_send_reschedule(tsk->processor); return; #else /* UP */ int this_cpu = smp_processor_id(); struct task_struct *tsk; tsk = cpu_curr(this_cpu); - if (preemption_goodness(tsk, p, this_cpu) > 0) + if (preemption_goodness(tsk, p, this_cpu) > 1) tsk->need_resched = 1; spin_unlock_irqrestore(&runqueue_lock, flags); #endif @@ -413,10 +436,12 @@ static inline void __schedule_tail(struct task_struct *prev) unsigned long flags; spin_lock_irqsave(&runqueue_lock, flags); + prev->has_cpu = 0; reschedule_idle(prev, flags); // spin_unlocks runqueue + } else { + wmb(); + prev->has_cpu = 0; } - wmb(); - prev->has_cpu = 0; #endif /* CONFIG_SMP */ } @@ -501,7 +526,7 @@ repeat_schedule: still_running_back: list_for_each(tmp, &runqueue_head) { p = list_entry(tmp, struct task_struct, run_list); - if (can_schedule(p)) { + if (can_schedule(p, this_cpu)) { int weight = goodness(p, this_cpu, prev->active_mm); if (weight > c) c = weight, next = p; @@ -540,13 +565,6 @@ still_running_back: t = get_cycles(); this_slice = t - sched_data->last_schedule; sched_data->last_schedule = t; - - /* - * Exponentially fading average calculation, with - * some weight so it doesnt get fooled easily by - * smaller irregularities. - */ - prev->avg_slice = (this_slice*1 + prev->avg_slice*1)/2; } /* @@ -641,15 +659,20 @@ scheduling_in_interrupt: return; } -static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, const int sync) +static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode, + const int sync) { struct list_head *tmp, *head; - struct task_struct *p; + struct task_struct *p, *best_exclusive; unsigned long flags; + int best_cpu, irq; if (!q) goto out; + best_cpu = smp_processor_id(); + irq = in_interrupt(); + best_exclusive = NULL; wq_write_lock_irqsave(&q->lock, flags); #if WAITQUEUE_DEBUG @@ -661,10 +684,13 @@ static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, con if (!head->next || !head->prev) WQ_BUG(); #endif - list_for_each(tmp, head) { + tmp = head->next; + while (tmp != head) { unsigned int state; wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); + tmp = tmp->next; + #if WAITQUEUE_DEBUG CHECK_MAGIC(curr->__magic); #endif @@ -674,15 +700,37 @@ static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, con #if WAITQUEUE_DEBUG curr->__waker = (long)__builtin_return_address(0); #endif - if (sync) - wake_up_process_synchronous(p); - else - wake_up_process(p); - if (state & mode & TASK_EXCLUSIVE) - break; + /* + * If waking up from an interrupt context then + * prefer processes which are affine to this + * CPU. + */ + if (irq && (state & mode & TASK_EXCLUSIVE)) { + if (!best_exclusive) + best_exclusive = p; + else if ((p->processor == best_cpu) && + (best_exclusive->processor != best_cpu)) + best_exclusive = p; + } else { + if (sync) + wake_up_process_synchronous(p); + else + wake_up_process(p); + if (state & mode & TASK_EXCLUSIVE) + break; + } } } + if (best_exclusive) + best_exclusive->state = TASK_RUNNING; wq_write_unlock_irqrestore(&q->lock, flags); + + if (best_exclusive) { + if (sync) + wake_up_process_synchronous(best_exclusive); + else + wake_up_process(best_exclusive); + } out: return; } @@ -697,6 +745,7 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int mode) __wake_up_common(q, mode, 1); } + #define SLEEP_ON_VAR \ unsigned long flags; \ wait_queue_t wait; \ @@ -798,7 +847,7 @@ asmlinkage long sys_nice(int increment) * timeslice instead (default 200 ms). The rounding is * why we want to avoid negative values. */ - newprio = (newprio * DEF_PRIORITY + 10) / 20; + newprio = (newprio * DEF_PRIORITY + 10)/20; increment = newprio; if (increase) increment = -increment; @@ -812,7 +861,7 @@ asmlinkage long sys_nice(int increment) */ newprio = current->priority - increment; if ((signed) newprio < 1) - newprio = 1; + newprio = DEF_PRIORITY/20; if (newprio > DEF_PRIORITY*2) newprio = DEF_PRIORITY*2; current->priority = newprio; |