summaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-07-10 23:18:26 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-07-10 23:18:26 +0000
commitc7c4310f7fc1485925d800628bf50b3aeab535ef (patch)
treeb12aa4be0e8fb82aaaea97fb475e793e8a347c49 /kernel/sched.c
parent1ffd1d069ca4c5ffe16fea6175dab1b9bbb15820 (diff)
Merge with Linux 2.4.0-test3-pre8. Linus has accepted most of what
I've sent him, so we're very close to full integration of the MIPS port into his sources.
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c181
1 files changed, 115 insertions, 66 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 3b007e5e8..dd055d92f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -78,18 +78,20 @@ static union {
} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
+#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
struct kernel_stat kstat = { 0 };
#ifdef CONFIG_SMP
#define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
-#define can_schedule(p) (!(p)->has_cpu)
+#define can_schedule(p,cpu) ((!(p)->has_cpu) && \
+ ((p)->cpus_allowed & (1 << cpu)))
#else
#define idle_task(cpu) (&init_task)
-#define can_schedule(p) (1)
+#define can_schedule(p,cpu) (1)
#endif
@@ -119,7 +121,7 @@ static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struc
* into account).
*/
if (p->policy != SCHED_OTHER) {
- weight = 1000 + p->rt_priority;
+ weight = 1000 + 2*DEF_PRIORITY + p->rt_priority;
goto out;
}
@@ -183,87 +185,108 @@ static inline int preemption_goodness(struct task_struct * prev, struct task_str
* up unlocking it early, so the caller must not unlock the
* runqueue, it's always done by reschedule_idle().
*/
-static inline void reschedule_idle(struct task_struct * p, unsigned long flags)
+static void reschedule_idle(struct task_struct * p, unsigned long flags)
{
#ifdef CONFIG_SMP
- int this_cpu = smp_processor_id(), target_cpu;
- struct task_struct *tsk;
- int cpu, best_cpu, i;
+ int this_cpu = smp_processor_id();
+ struct task_struct *tsk, *target_tsk;
+ int cpu, best_cpu, i, max_prio;
+ cycles_t oldest_idle;
/*
* shortcut if the woken up task's last CPU is
* idle now.
*/
best_cpu = p->processor;
- tsk = idle_task(best_cpu);
- if (cpu_curr(best_cpu) == tsk)
- goto send_now;
+ if (can_schedule(p, best_cpu)) {
+ tsk = idle_task(best_cpu);
+ if (cpu_curr(best_cpu) == tsk)
+ goto send_now_idle;
+
+ /*
+ * Maybe this process has enough priority to preempt
+ * its preferred CPU. (this is a shortcut):
+ */
+ tsk = cpu_curr(best_cpu);
+ if (preemption_goodness(tsk, p, best_cpu) > 1)
+ goto preempt_now;
+ }
/*
* We know that the preferred CPU has a cache-affine current
* process, lets try to find a new idle CPU for the woken-up
- * process:
+ * process. Select the least recently active idle CPU. (that
+ * one will have the least active cache context.) Also find
+ * the executing process which has the least priority.
*/
- for (i = smp_num_cpus - 1; i >= 0; i--) {
+ oldest_idle = -1ULL;
+ target_tsk = NULL;
+ max_prio = 1;
+
+ for (i = 0; i < smp_num_cpus; i++) {
cpu = cpu_logical_map(i);
- if (cpu == best_cpu)
+ if (!can_schedule(p, cpu))
continue;
tsk = cpu_curr(cpu);
/*
- * We use the last available idle CPU. This creates
+ * We use the first available idle CPU. This creates
* a priority list between idle CPUs, but this is not
* a problem.
*/
- if (tsk == idle_task(cpu))
- goto send_now;
- }
-
- /*
- * No CPU is idle, but maybe this process has enough priority
- * to preempt it's preferred CPU.
- */
- tsk = cpu_curr(best_cpu);
- if (preemption_goodness(tsk, p, best_cpu) > 0)
- goto send_now;
+ if (tsk == idle_task(cpu)) {
+ if (last_schedule(cpu) < oldest_idle) {
+ oldest_idle = last_schedule(cpu);
+ target_tsk = tsk;
+ }
+ } else {
+ if (oldest_idle == -1ULL) {
+ int prio = preemption_goodness(tsk, p, cpu);
- /*
- * We will get here often - or in the high CPU contention
- * case. No CPU is idle and this process is either lowprio or
- * the preferred CPU is highprio. Try to preempt some other CPU
- * only if it's RT or if it's iteractive and the preferred
- * cpu won't reschedule shortly.
- */
- if (p->avg_slice < cacheflush_time || (p->policy & ~SCHED_YIELD) != SCHED_OTHER) {
- for (i = smp_num_cpus - 1; i >= 0; i--) {
- cpu = cpu_logical_map(i);
- if (cpu == best_cpu)
- continue;
- tsk = cpu_curr(cpu);
- if (preemption_goodness(tsk, p, cpu) > 0)
- goto send_now;
+ if (prio > max_prio) {
+ max_prio = prio;
+ target_tsk = tsk;
+ }
+ }
}
}
+ tsk = target_tsk;
+ if (tsk) {
+ if (oldest_idle != -1ULL)
+ goto send_now_idle;
+ goto preempt_now;
+ }
spin_unlock_irqrestore(&runqueue_lock, flags);
return;
-send_now:
- target_cpu = tsk->processor;
+send_now_idle:
+ /*
+ * If need_resched == -1 then we can skip sending the IPI
+ * altogether, tsk->need_resched is actively watched by the
+ * idle thread.
+ */
+ if (!tsk->need_resched)
+ smp_send_reschedule(tsk->processor);
+ tsk->need_resched = 1;
+ spin_unlock_irqrestore(&runqueue_lock, flags);
+ return;
+
+preempt_now:
tsk->need_resched = 1;
spin_unlock_irqrestore(&runqueue_lock, flags);
/*
* the APIC stuff can go outside of the lock because
* it uses no task information, only CPU#.
*/
- if (target_cpu != this_cpu)
- smp_send_reschedule(target_cpu);
+ if (tsk->processor != this_cpu)
+ smp_send_reschedule(tsk->processor);
return;
#else /* UP */
int this_cpu = smp_processor_id();
struct task_struct *tsk;
tsk = cpu_curr(this_cpu);
- if (preemption_goodness(tsk, p, this_cpu) > 0)
+ if (preemption_goodness(tsk, p, this_cpu) > 1)
tsk->need_resched = 1;
spin_unlock_irqrestore(&runqueue_lock, flags);
#endif
@@ -413,10 +436,12 @@ static inline void __schedule_tail(struct task_struct *prev)
unsigned long flags;
spin_lock_irqsave(&runqueue_lock, flags);
+ prev->has_cpu = 0;
reschedule_idle(prev, flags); // spin_unlocks runqueue
+ } else {
+ wmb();
+ prev->has_cpu = 0;
}
- wmb();
- prev->has_cpu = 0;
#endif /* CONFIG_SMP */
}
@@ -501,7 +526,7 @@ repeat_schedule:
still_running_back:
list_for_each(tmp, &runqueue_head) {
p = list_entry(tmp, struct task_struct, run_list);
- if (can_schedule(p)) {
+ if (can_schedule(p, this_cpu)) {
int weight = goodness(p, this_cpu, prev->active_mm);
if (weight > c)
c = weight, next = p;
@@ -540,13 +565,6 @@ still_running_back:
t = get_cycles();
this_slice = t - sched_data->last_schedule;
sched_data->last_schedule = t;
-
- /*
- * Exponentially fading average calculation, with
- * some weight so it doesnt get fooled easily by
- * smaller irregularities.
- */
- prev->avg_slice = (this_slice*1 + prev->avg_slice*1)/2;
}
/*
@@ -641,15 +659,20 @@ scheduling_in_interrupt:
return;
}
-static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, const int sync)
+static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
+ const int sync)
{
struct list_head *tmp, *head;
- struct task_struct *p;
+ struct task_struct *p, *best_exclusive;
unsigned long flags;
+ int best_cpu, irq;
if (!q)
goto out;
+ best_cpu = smp_processor_id();
+ irq = in_interrupt();
+ best_exclusive = NULL;
wq_write_lock_irqsave(&q->lock, flags);
#if WAITQUEUE_DEBUG
@@ -661,10 +684,13 @@ static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, con
if (!head->next || !head->prev)
WQ_BUG();
#endif
- list_for_each(tmp, head) {
+ tmp = head->next;
+ while (tmp != head) {
unsigned int state;
wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+ tmp = tmp->next;
+
#if WAITQUEUE_DEBUG
CHECK_MAGIC(curr->__magic);
#endif
@@ -674,15 +700,37 @@ static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, con
#if WAITQUEUE_DEBUG
curr->__waker = (long)__builtin_return_address(0);
#endif
- if (sync)
- wake_up_process_synchronous(p);
- else
- wake_up_process(p);
- if (state & mode & TASK_EXCLUSIVE)
- break;
+ /*
+ * If waking up from an interrupt context then
+ * prefer processes which are affine to this
+ * CPU.
+ */
+ if (irq && (state & mode & TASK_EXCLUSIVE)) {
+ if (!best_exclusive)
+ best_exclusive = p;
+ else if ((p->processor == best_cpu) &&
+ (best_exclusive->processor != best_cpu))
+ best_exclusive = p;
+ } else {
+ if (sync)
+ wake_up_process_synchronous(p);
+ else
+ wake_up_process(p);
+ if (state & mode & TASK_EXCLUSIVE)
+ break;
+ }
}
}
+ if (best_exclusive)
+ best_exclusive->state = TASK_RUNNING;
wq_write_unlock_irqrestore(&q->lock, flags);
+
+ if (best_exclusive) {
+ if (sync)
+ wake_up_process_synchronous(best_exclusive);
+ else
+ wake_up_process(best_exclusive);
+ }
out:
return;
}
@@ -697,6 +745,7 @@ void __wake_up_sync(wait_queue_head_t *q, unsigned int mode)
__wake_up_common(q, mode, 1);
}
+
#define SLEEP_ON_VAR \
unsigned long flags; \
wait_queue_t wait; \
@@ -798,7 +847,7 @@ asmlinkage long sys_nice(int increment)
* timeslice instead (default 200 ms). The rounding is
* why we want to avoid negative values.
*/
- newprio = (newprio * DEF_PRIORITY + 10) / 20;
+ newprio = (newprio * DEF_PRIORITY + 10)/20;
increment = newprio;
if (increase)
increment = -increment;
@@ -812,7 +861,7 @@ asmlinkage long sys_nice(int increment)
*/
newprio = current->priority - increment;
if ((signed) newprio < 1)
- newprio = 1;
+ newprio = DEF_PRIORITY/20;
if (newprio > DEF_PRIORITY*2)
newprio = DEF_PRIORITY*2;
current->priority = newprio;