diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 1492 |
1 files changed, 1095 insertions, 397 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 93003dfc1..8f88f88a3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2,16 +2,17 @@ * linux/kernel/sched.c * * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1996-04-21 Modified by Ulrich Windl to make NTP work */ /* * 'sched.c' is the main kernel file. It contains scheduling primitives * (sleep_on, wakeup, schedule etc) as well as a number of simple system - * call functions (type getpid(), which just extracts a field from + * call functions (type getpid()), which just extract a field from * current-task */ -#include <linux/config.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/timer.h> @@ -26,20 +27,23 @@ #include <linux/tqueue.h> #include <linux/resource.h> #include <linux/mm.h> +#include <linux/smp.h> #include <asm/system.h> #include <asm/io.h> -#include <asm/segment.h> +#include <asm/uaccess.h> #include <asm/pgtable.h> - -#define TIMER_IRQ 0 +#include <asm/mmu_context.h> #include <linux/timex.h> /* * kernel variables */ -long tick = 1000000 / HZ; /* timer interrupt period */ + +int securelevel = 0; /* system security level */ + +long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ volatile struct timeval xtime; /* The current time */ int tickadj = 500/HZ; /* microsecs */ @@ -50,17 +54,19 @@ DECLARE_TASK_QUEUE(tq_scheduler); /* * phase-lock loop variables */ -int time_status = TIME_BAD; /* clock synchronization status */ -long time_offset = 0; /* time adjustment (us) */ -long time_constant = 0; /* pll time constant */ -long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ -long time_precision = 1; /* clock precision (us) */ -long time_maxerror = 0x70000000;/* maximum error */ -long time_esterror = 0x70000000;/* estimated error */ -long time_phase = 0; /* phase offset (scaled us) */ -long time_freq = 0; /* frequency offset (scaled ppm) */ -long time_adj = 0; /* tick adjust (scaled 1 / HZ) */ -long time_reftime = 0; /* time at last adjustment (s) */ +/* TIME_ERROR prevents overwriting the CMOS clock */ +int time_state = TIME_ERROR; /* clock synchronization status */ +int time_status = STA_UNSYNC; /* clock status bits */ +long time_offset = 0; /* time adjustment (us) */ +long time_constant = 2; /* pll time constant */ +long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ +long time_precision = 1; /* clock precision (us) */ +long time_maxerror = MAXPHASE; /* maximum error (us) */ +long time_esterror = MAXPHASE; /* estimated error (us) */ +long time_phase = 0; /* phase offset (scaled us) */ +long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC; /* frequency offset (scaled ppm) */ +long time_adj = 0; /* tick adjust (scaled 1 / HZ) */ +long time_reftime = 0; /* time at last adjustment (s) */ long time_adjust = 0; long time_adjust_step = 0; @@ -69,132 +75,354 @@ int need_resched = 0; unsigned long event = 0; extern int _setitimer(int, struct itimerval *, struct itimerval *); -unsigned long * prof_buffer = NULL; +unsigned int * prof_buffer = NULL; unsigned long prof_len = 0; +unsigned long prof_shift = 0; #define _S(nr) (1<<((nr)-1)) extern void mem_use(void); -extern int timer_interrupt(void); - +#ifdef __mips__ +unsigned long init_kernel_stack[2048] = { STACK_MAGIC, }; +unsigned long init_user_stack[2048] = { STACK_MAGIC, }; +#else unsigned long init_kernel_stack[1024] = { STACK_MAGIC, }; unsigned long init_user_stack[1024] = { STACK_MAGIC, }; +#endif static struct vm_area_struct init_mmap = INIT_MMAP; +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS; + +struct mm_struct init_mm = INIT_MM; struct task_struct init_task = INIT_TASK; unsigned long volatile jiffies=0; -struct task_struct *current = &init_task; +struct task_struct *current_set[NR_CPUS]; struct task_struct *last_task_used_math = NULL; struct task_struct * task[NR_TASKS] = {&init_task, }; struct kernel_stat kstat = { 0 }; -unsigned long itimer_ticks = 0; -unsigned long itimer_next = ~0; +static inline void add_to_runqueue(struct task_struct * p) +{ +#ifdef __SMP__ + int cpu=smp_processor_id(); +#endif +#if 1 /* sanity tests */ + if (p->next_run || p->prev_run) { + printk("task already on run-queue\n"); + return; + } +#endif + if (p->counter > current->counter + 3) + need_resched = 1; + nr_running++; + (p->prev_run = init_task.prev_run)->next_run = p; + p->next_run = &init_task; + init_task.prev_run = p; +#ifdef __SMP__ + /* this is safe only if called with cli()*/ + while(set_bit(31,&smp_process_available)) + { + while(test_bit(31,&smp_process_available)) + { + if(clear_bit(cpu,&smp_invalidate_needed)) + { + local_flush_tlb(); + set_bit(cpu,&cpu_callin_map[0]); + } + } + } + smp_process_available++; + clear_bit(31,&smp_process_available); + if ((0!=p->pid) && smp_threads_ready) + { + int i; + for (i=0;i<smp_num_cpus;i++) + { + if (0==current_set[cpu_logical_map[i]]->pid) + { + smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0); + break; + } + } + } +#endif +} + +static inline void del_from_runqueue(struct task_struct * p) +{ + struct task_struct *next = p->next_run; + struct task_struct *prev = p->prev_run; + +#if 1 /* sanity tests */ + if (!next || !prev) { + printk("task not on run-queue\n"); + return; + } +#endif + if (p == &init_task) { + static int nr = 0; + if (nr < 5) { + nr++; + printk("idle task may not sleep\n"); + } + return; + } + nr_running--; + next->prev_run = prev; + prev->next_run = next; + p->next_run = NULL; + p->prev_run = NULL; +} + +static inline void move_last_runqueue(struct task_struct * p) +{ + struct task_struct *next = p->next_run; + struct task_struct *prev = p->prev_run; + + /* remove from list */ + next->prev_run = prev; + prev->next_run = next; + /* add back to list */ + p->next_run = &init_task; + prev = init_task.prev_run; + init_task.prev_run = p; + p->prev_run = prev; + prev->next_run = p; +} + +/* + * Wake up a process. Put it on the run-queue if it's not + * already there. The "current" process is always on the + * run-queue (except when the actual re-schedule is in + * progress), and as such you're allowed to do the simpler + * "current->state = TASK_RUNNING" to mark yourself runnable + * without the overhead of this. + */ +inline void wake_up_process(struct task_struct * p) +{ + unsigned long flags; + + save_flags(flags); + cli(); + p->state = TASK_RUNNING; + if (!p->next_run) + add_to_runqueue(p); + restore_flags(flags); +} + +static void process_timeout(unsigned long __data) +{ + struct task_struct * p = (struct task_struct *) __data; + + p->timeout = 0; + wake_up_process(p); +} + +/* + * This is the function that decides how desirable a process is.. + * You can weigh different processes against each other depending + * on what CPU they've run on lately etc to try to handle cache + * and TLB miss penalties. + * + * Return values: + * -1000: never select this + * 0: out of time, recalculate counters (but it might still be + * selected) + * +ve: "goodness" value (the larger, the better) + * +1000: realtime process, select this. + */ +static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu) +{ + int weight; + +#ifdef __SMP__ + /* We are not permitted to run a task someone else is running */ + if (p->processor != NO_PROC_ID) + return -1000; +#ifdef PAST_2_0 + /* This process is locked to a processor group */ + if (p->processor_mask && !(p->processor_mask & (1<<this_cpu)) + return -1000; +#endif +#endif + + /* + * Realtime process, select the first one on the + * runqueue (taking priorities within processes + * into account). + */ + if (p->policy != SCHED_OTHER) + return 1000 + p->rt_priority; + + /* + * Give the process a first-approximation goodness value + * according to the number of clock-ticks it has left. + * + * Don't do any other calculations if the time slice is + * over.. + */ + weight = p->counter; + if (weight) { + +#ifdef __SMP__ + /* Give a largish advantage to the same processor... */ + /* (this is equivalent to penalizing other processors) */ + if (p->last_processor == this_cpu) + weight += PROC_CHANGE_PENALTY; +#endif + + /* .. and a slight advantage to the current process */ + if (p == prev) + weight += 1; + } + + return weight; +} /* * 'schedule()' is the scheduler function. It's a very simple and nice * scheduler: it's not perfect, but certainly works for most things. - * The one thing you might take a look at is the signal-handler code here. + * + * The goto is "interesting". * * NOTE!! Task 0 is the 'idle' task, which gets called when no other * tasks can run. It can not be killed, and it cannot sleep. The 'state' * information in task[0] is never used. - * - * The "confuse_gcc" goto is used only to get better assembly code.. - * Dijkstra probably hates me. */ asmlinkage void schedule(void) { int c; struct task_struct * p; - struct task_struct * next; - unsigned long ticks; + struct task_struct * prev, * next; + unsigned long timeout = 0; + int this_cpu=smp_processor_id(); /* check alarm, wake up any interruptible tasks that have got a signal */ - if (intr_count) { - printk("Aiee: scheduling in interrupt\n"); + if (intr_count) + goto scheduling_in_interrupt; + + if (bh_active & bh_mask) { + intr_count = 1; + do_bottom_half(); intr_count = 0; } + run_task_queue(&tq_scheduler); - cli(); - ticks = itimer_ticks; - itimer_ticks = 0; - itimer_next = ~0; - sti(); + need_resched = 0; - nr_running = 0; - p = &init_task; - for (;;) { - if ((p = p->next_task) == &init_task) - goto confuse_gcc1; - if (ticks && p->it_real_value) { - if (p->it_real_value <= ticks) { - send_sig(SIGALRM, p, 1); - if (!p->it_real_incr) { - p->it_real_value = 0; - goto end_itimer; - } - do { - p->it_real_value += p->it_real_incr; - } while (p->it_real_value <= ticks); + prev = current; + cli(); + /* move an exhausted RR process to be last.. */ + if (!prev->counter && prev->policy == SCHED_RR) { + prev->counter = prev->priority; + move_last_runqueue(prev); + } + switch (prev->state) { + case TASK_INTERRUPTIBLE: + if (prev->signal & ~prev->blocked) + goto makerunnable; + timeout = prev->timeout; + if (timeout && (timeout <= jiffies)) { + prev->timeout = 0; + timeout = 0; + makerunnable: + prev->state = TASK_RUNNING; + break; } - p->it_real_value -= ticks; - if (p->it_real_value < itimer_next) - itimer_next = p->it_real_value; - } -end_itimer: - if (p->state != TASK_INTERRUPTIBLE) - continue; - if (p->signal & ~p->blocked) { - p->state = TASK_RUNNING; - continue; - } - if (p->timeout && p->timeout <= jiffies) { - p->timeout = 0; - p->state = TASK_RUNNING; - } + default: + del_from_runqueue(prev); + case TASK_RUNNING: } -confuse_gcc1: + p = init_task.next_run; + sti(); + +#ifdef __SMP__ + /* + * This is safe as we do not permit re-entry of schedule() + */ + prev->processor = NO_PROC_ID; +#define idle_task (task[cpu_number_map[this_cpu]]) +#else +#define idle_task (&init_task) +#endif +/* + * Note! there may appear new tasks on the run-queue during this, as + * interrupts are enabled. However, they will be put on front of the + * list, so our list starting at "p" is essentially fixed. + */ /* this is the scheduler proper: */ -#if 0 - /* give processes that go to sleep a bit higher priority.. */ - /* This depends on the values for TASK_XXX */ - /* This gives smoother scheduling for some things, but */ - /* can be very unfair under some circumstances, so.. */ - if (TASK_UNINTERRUPTIBLE >= (unsigned) current->state && - current->counter < current->priority*2) { - ++current->counter; - } -#endif c = -1000; - next = p = &init_task; - for (;;) { - if ((p = p->next_task) == &init_task) - goto confuse_gcc2; - if (p->state == TASK_RUNNING) { - nr_running++; - if (p->counter > c) - c = p->counter, next = p; - } + next = idle_task; + while (p != &init_task) { + int weight = goodness(p, prev, this_cpu); + if (weight > c) + c = weight, next = p; + p = p->next_run; } -confuse_gcc2: + + /* if all runnable processes have "counter == 0", re-calculate counters */ if (!c) { for_each_task(p) p->counter = (p->counter >> 1) + p->priority; } - if (current == next) - return; - kstat.context_swtch++; +#ifdef __SMP__ + /* + * Allocate process to CPU + */ + + next->processor = this_cpu; + next->last_processor = this_cpu; +#endif +#ifdef __SMP_PROF__ + /* mark processor running an idle thread */ + if (0==next->pid) + set_bit(this_cpu,&smp_idle_map); + else + clear_bit(this_cpu,&smp_idle_map); +#endif + if (prev != next) { + struct timer_list timer; + + kstat.context_swtch++; + if (timeout) { + init_timer(&timer); + timer.expires = timeout; + timer.data = (unsigned long) prev; + timer.function = process_timeout; + add_timer(&timer); + } + + get_mmu_context(next); + switch_to(prev,next); + if (timeout) + del_timer(&timer); + } + return; - switch_to(next); +scheduling_in_interrupt: + printk("Aiee: scheduling in interrupt %p\n", + return_address()); +/* + * System is probably fucked up anyway beyond a save landing; prevent + * messages on the screen from scrolling away. + */ +while(1); } +#ifndef __alpha__ + +/* + * For backwards compatibility? This can be done in libc so Alpha + * and all newer ports shouldn't need it. + */ asmlinkage int sys_pause(void) { current->state = TASK_INTERRUPTIBLE; @@ -202,6 +430,8 @@ asmlinkage int sys_pause(void) return -ERESTARTNOHAND; } +#endif + /* * wake_up doesn't wake up stopped processes - they have to be awakened * with signals or similar. @@ -212,70 +442,139 @@ asmlinkage int sys_pause(void) */ void wake_up(struct wait_queue **q) { - struct wait_queue *tmp; - struct task_struct * p; + struct wait_queue *next; + struct wait_queue *head; - if (!q || !(tmp = *q)) + if (!q || !(next = *q)) return; - do { - if ((p = tmp->task) != NULL) { + head = WAIT_QUEUE_HEAD(q); + while (next != head) { + struct task_struct *p = next->task; + next = next->next; + if (p != NULL) { if ((p->state == TASK_UNINTERRUPTIBLE) || - (p->state == TASK_INTERRUPTIBLE)) { - p->state = TASK_RUNNING; - if (p->counter > current->counter + 3) - need_resched = 1; - } + (p->state == TASK_INTERRUPTIBLE)) + wake_up_process(p); } - if (!tmp->next) { - printk("wait_queue is bad (eip = %p)\n", - __builtin_return_address(0)); - printk(" q = %p\n",q); - printk(" *q = %p\n",*q); - printk(" tmp = %p\n",tmp); - break; - } - tmp = tmp->next; - } while (tmp != *q); + if (!next) + goto bad; + } + return; +bad: + printk("wait_queue is bad (eip = %p)\n", + __builtin_return_address(0)); + printk(" q = %p\n",q); + printk(" *q = %p\n",*q); } void wake_up_interruptible(struct wait_queue **q) { - struct wait_queue *tmp; - struct task_struct * p; + struct wait_queue *next; + struct wait_queue *head; - if (!q || !(tmp = *q)) + if (!q || !(next = *q)) return; - do { - if ((p = tmp->task) != NULL) { - if (p->state == TASK_INTERRUPTIBLE) { - p->state = TASK_RUNNING; - if (p->counter > current->counter + 3) - need_resched = 1; - } + head = WAIT_QUEUE_HEAD(q); + while (next != head) { + struct task_struct *p = next->task; + next = next->next; + if (p != NULL) { + if (p->state == TASK_INTERRUPTIBLE) + wake_up_process(p); } - if (!tmp->next) { - printk("wait_queue is bad (eip = %p)\n", - __builtin_return_address(0)); - printk(" q = %p\n",q); - printk(" *q = %p\n",*q); - printk(" tmp = %p\n",tmp); - break; - } - tmp = tmp->next; - } while (tmp != *q); + if (!next) + goto bad; + } + return; +bad: + printk("wait_queue is bad (eip = %p)\n", + __builtin_return_address(0)); + printk(" q = %p\n",q); + printk(" *q = %p\n",*q); +} + +/* + * Semaphores are implemented using a two-way counter: + * The "count" variable is decremented for each process + * that tries to sleep, while the "waiting" variable is + * incremented _while_ the process is sleeping on that + * semaphore. + * + * Notably, the inline "up()" and "down()" functions can + * efficiently test if they need to do any extra work (up + * needs to do something only if count was negative before + * the increment operation. + */ +static inline void normalize_semaphore(struct semaphore *sem) +{ + atomic_add(xchg(&sem->waiting,0), &sem->count); +} + +/* + * When __up() is called, the count was negative before + * incrementing it, and we need to wake up somebody. In + * most cases "waiting" will be positive, and the normalization + * will allow things to continue. However, if somebody has + * /just/ done a down(), it may be that count was negative + * without waiting being positive (or in the generic case + * "count is more negative than waiting is positive"), and + * the waiter needs to check this itself (see __down). + * + * Note that these functions are only called when there is + * contention on the lock, and as such all this is the + * "non-critical" part of the whole semaphore business. The + * critical part is the inline stuff in <asm/semaphore.h> + * where we want to avoid any extra jumps and calls. + */ +void __up(struct semaphore *sem) +{ + normalize_semaphore(sem); + wake_up(&sem->wait); } void __down(struct semaphore * sem) { - struct wait_queue wait = { current, NULL }; + struct task_struct *tsk = current; + struct wait_queue wait = { tsk, NULL }; + + /* + * The order here is important. We add ourselves to the + * wait queues and mark ourselves sleeping _first_. That + * way, if a "up()" comes in here, we'll either get + * woken up (up happens after the wait queues are set up) + * OR we'll have "waiting > 0". + */ + tsk->state = TASK_UNINTERRUPTIBLE; add_wait_queue(&sem->wait, &wait); - current->state = TASK_UNINTERRUPTIBLE; - while (sem->count <= 0) { - schedule(); - current->state = TASK_UNINTERRUPTIBLE; + atomic_inc(&sem->waiting); + + /* + * Ok, we're set up. The only race here is really that + * an "up()" might have incremented count before we got + * here, so we check "count+waiting". If that is larger + * than zero, we shouldn't sleep, but re-try the lock. + */ + if (sem->count+sem->waiting <= 0) { + /* + * If "count+waiting" <= 0, we have to wait + * for a up(), which will normalize the count. + * Remember, at this point we have decremented + * count, and incremented up, so if count is + * zero or positive we need to return to re-try + * the lock. It _may_ be that both count and + * waiting is zero and that it is still locked, + * but we still want to re-try the lock in that + * case to make count go negative again so that + * the optimized "up()" wake_up sequence works. + */ + do { + schedule(); + tsk->state = TASK_UNINTERRUPTIBLE; + } while (sem->count < 0); } - current->state = TASK_RUNNING; + tsk->state = TASK_RUNNING; remove_wait_queue(&sem->wait, &wait); + normalize_semaphore(sem); } static inline void __sleep_on(struct wait_queue **p, int state) @@ -288,11 +587,13 @@ static inline void __sleep_on(struct wait_queue **p, int state) if (current == task[0]) panic("task[0] trying to sleep"); current->state = state; - add_wait_queue(p, &wait); save_flags(flags); + cli(); + __add_wait_queue(p, &wait); sti(); schedule(); - remove_wait_queue(p, &wait); + cli(); + __remove_wait_queue(p, &wait); restore_flags(flags); } @@ -311,7 +612,7 @@ void sleep_on(struct wait_queue **p) * and the sorting routine counts on this.. */ static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL }; -#define SLOW_BUT_DEBUGGING_TIMERS 1 +#define SLOW_BUT_DEBUGGING_TIMERS 0 void add_timer(struct timer_list * timer) { @@ -326,7 +627,6 @@ void add_timer(struct timer_list * timer) } #endif p = &timer_head; - timer->expires += jiffies; save_flags(flags); cli(); do { @@ -341,42 +641,66 @@ void add_timer(struct timer_list * timer) int del_timer(struct timer_list * timer) { - unsigned long flags; -#if SLOW_BUT_DEBUGGING_TIMERS - struct timer_list * p; - - p = &timer_head; - save_flags(flags); - cli(); - while ((p = p->next) != &timer_head) { - if (p == timer) { - timer->next->prev = timer->prev; - timer->prev->next = timer->next; + int ret = 0; + if (timer->next) { + unsigned long flags; + struct timer_list * next; + save_flags(flags); + cli(); + if ((next = timer->next) != NULL) { + (next->prev = timer->prev)->next = next; timer->next = timer->prev = NULL; - restore_flags(flags); - timer->expires -= jiffies; - return 1; + ret = 1; } + restore_flags(flags); } - if (timer->next || timer->prev) - printk("del_timer() called from %p with timer not initialized\n", - __builtin_return_address(0)); - restore_flags(flags); - return 0; -#else - save_flags(flags); + return ret; +} + +static inline void run_timer_list(void) +{ + struct timer_list * timer; + cli(); - if (timer->next) { + while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) { + void (*fn)(unsigned long) = timer->function; + unsigned long data = timer->data; timer->next->prev = timer->prev; timer->prev->next = timer->next; timer->next = timer->prev = NULL; - restore_flags(flags); - timer->expires -= jiffies; - return 1; + sti(); + fn(data); + cli(); } - restore_flags(flags); - return 0; -#endif + sti(); +} + +static inline void run_old_timers(void) +{ + struct timer_struct *tp; + unsigned long mask; + + for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) { + if (mask > timer_active) + break; + if (!(mask & timer_active)) + continue; + if (tp->expires > jiffies) + continue; + timer_active &= ~mask; + tp->fn(); + sti(); + } +} + +void tqueue_bh(void) +{ + run_task_queue(&tq_timer); +} + +void immediate_bh(void) +{ + run_task_queue(&tq_immediate); } unsigned long timer_active = 0; @@ -403,21 +727,25 @@ static unsigned long count_active_tasks(void) (*p)->state == TASK_UNINTERRUPTIBLE || (*p)->state == TASK_SWAPPING)) nr += FIXED_1; +#ifdef __SMP__ + nr-=(smp_num_cpus-1)*FIXED_1; +#endif return nr; } -static inline void calc_load(void) +static inline void calc_load(unsigned long ticks) { unsigned long active_tasks; /* fixed-point */ static int count = LOAD_FREQ; - if (count-- > 0) - return; - count = LOAD_FREQ; - active_tasks = count_active_tasks(); - CALC_LOAD(avenrun[0], EXP_1, active_tasks); - CALC_LOAD(avenrun[1], EXP_5, active_tasks); - CALC_LOAD(avenrun[2], EXP_15, active_tasks); + count -= ticks; + if (count < 0) { + count += LOAD_FREQ; + active_tasks = count_active_tasks(); + CALC_LOAD(avenrun[0], EXP_1, active_tasks); + CALC_LOAD(avenrun[1], EXP_5, active_tasks); + CALC_LOAD(avenrun[2], EXP_15, active_tasks); + } } /* @@ -428,138 +756,138 @@ static inline void calc_load(void) * They were originally developed for SUN and DEC kernels. * All the kudos should go to Dave for this stuff. * - * These were ported to Linux by Philip Gladstone. */ static void second_overflow(void) { - long ltemp; - - /* Bump the maxerror field */ - time_maxerror = (0x70000000-time_maxerror < time_tolerance) ? - 0x70000000 : (time_maxerror + time_tolerance); - - /* Run the PLL */ - if (time_offset < 0) { - ltemp = (-(time_offset+1) >> (SHIFT_KG + time_constant)) + 1; - time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - time_offset += (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE); - time_adj = - time_adj; - } else if (time_offset > 0) { - ltemp = ((time_offset-1) >> (SHIFT_KG + time_constant)) + 1; - time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); - time_offset -= (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE); - } else { - time_adj = 0; - } - - time_adj += (time_freq >> (SHIFT_KF + SHIFT_HZ - SHIFT_SCALE)) - + FINETUNE; - - /* Handle the leap second stuff */ - switch (time_status) { - case TIME_INS: - /* ugly divide should be replaced */ - if (xtime.tv_sec % 86400 == 0) { - xtime.tv_sec--; /* !! */ - time_status = TIME_OOP; - printk("Clock: inserting leap second 23:59:60 UTC\n"); - } - break; - - case TIME_DEL: - /* ugly divide should be replaced */ - if (xtime.tv_sec % 86400 == 86399) { - xtime.tv_sec++; - time_status = TIME_OK; - printk("Clock: deleting leap second 23:59:59 UTC\n"); - } - break; - - case TIME_OOP: - time_status = TIME_OK; - break; + long ltemp; + + /* Bump the maxerror field */ + time_maxerror += time_tolerance >> SHIFT_USEC; + if ( time_maxerror > MAXPHASE ) + time_maxerror = MAXPHASE; + + /* + * Leap second processing. If in leap-insert state at + * the end of the day, the system clock is set back one + * second; if in leap-delete state, the system clock is + * set ahead one second. The microtime() routine or + * external clock driver will insure that reported time + * is always monotonic. The ugly divides should be + * replaced. + */ + switch (time_state) { + + case TIME_OK: + if (time_status & STA_INS) + time_state = TIME_INS; + else if (time_status & STA_DEL) + time_state = TIME_DEL; + break; + + case TIME_INS: + if (xtime.tv_sec % 86400 == 0) { + xtime.tv_sec--; + time_state = TIME_OOP; + printk("Clock: inserting leap second 23:59:60 UTC\n"); } -} - -/* - * disregard lost ticks for now.. We don't care enough. - */ -static void timer_bh(void * unused) -{ - unsigned long mask; - struct timer_struct *tp; - struct timer_list * timer; + break; - cli(); - while ((timer = timer_head.next) != &timer_head && timer->expires < jiffies) { - void (*fn)(unsigned long) = timer->function; - unsigned long data = timer->data; - timer->next->prev = timer->prev; - timer->prev->next = timer->next; - timer->next = timer->prev = NULL; - sti(); - fn(data); - cli(); - } - sti(); - - for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) { - if (mask > timer_active) - break; - if (!(mask & timer_active)) - continue; - if (tp->expires > jiffies) - continue; - timer_active &= ~mask; - tp->fn(); - sti(); + case TIME_DEL: + if ((xtime.tv_sec + 1) % 86400 == 0) { + xtime.tv_sec++; + time_state = TIME_WAIT; + printk("Clock: deleting leap second 23:59:59 UTC\n"); } + break; + + case TIME_OOP: + time_state = TIME_WAIT; + break; + + case TIME_WAIT: + if (!(time_status & (STA_INS | STA_DEL))) + time_state = TIME_OK; + } + + /* + * Compute the phase adjustment for the next second. In + * PLL mode, the offset is reduced by a fixed factor + * times the time constant. In FLL mode the offset is + * used directly. In either mode, the maximum phase + * adjustment for each second is clamped so as to spread + * the adjustment over not more than the number of + * seconds between updates. + */ + if (time_offset < 0) { + ltemp = -time_offset; + if (!(time_status & STA_FLL)) + ltemp >>= SHIFT_KG + time_constant; + if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) + ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; + time_offset += ltemp; + time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + } else { + ltemp = time_offset; + if (!(time_status & STA_FLL)) + ltemp >>= SHIFT_KG + time_constant; + if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) + ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; + time_offset -= ltemp; + time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + } + + /* + * Compute the frequency estimate and additional phase + * adjustment due to frequency error for the next + * second. When the PPS signal is engaged, gnaw on the + * watchdog counter and update the frequency computed by + * the pll and the PPS signal. + */ + pps_valid++; + if (pps_valid == PPS_VALID) { + pps_jitter = MAXTIME; + pps_stabil = MAXFREQ; + time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER | + STA_PPSWANDER | STA_PPSERROR); + } + ltemp = time_freq + pps_freq; + if (ltemp < 0) + time_adj -= -ltemp >> + (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); + else + time_adj += ltemp >> + (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); + +#if HZ == 100 + /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */ + if (time_adj < 0) + time_adj -= -time_adj >> 2; + else + time_adj += time_adj >> 2; +#endif } -void tqueue_bh(void * unused) -{ - run_task_queue(&tq_timer); -} - -void immediate_bh(void * unused) -{ - run_task_queue(&tq_immediate); -} - -/* - * The int argument is really a (struct pt_regs *), in case the - * interrupt wants to know from where it was called. The timer - * irq uses this to decide if it should update the user or system - * times. - */ -static void do_timer(int irq, struct pt_regs * regs) +/* in the NTP reference this is called "hardclock()" */ +static void update_wall_time_one_tick(void) { - unsigned long mask; - struct timer_struct *tp; - /* last time the cmos clock got updated */ - static long last_rtc_update=0; - extern int set_rtc_mmss(unsigned long); - - long ltemp, psecs; - - /* Advance the phase, once it gets to one microsecond, then + /* + * Advance the phase, once it gets to one microsecond, then * advance the tick more. */ time_phase += time_adj; - if (time_phase < -FINEUSEC) { - ltemp = -time_phase >> SHIFT_SCALE; + if (time_phase <= -FINEUSEC) { + long ltemp = -time_phase >> SHIFT_SCALE; time_phase += ltemp << SHIFT_SCALE; xtime.tv_usec += tick + time_adjust_step - ltemp; } - else if (time_phase > FINEUSEC) { - ltemp = time_phase >> SHIFT_SCALE; + else if (time_phase >= FINEUSEC) { + long ltemp = time_phase >> SHIFT_SCALE; time_phase -= ltemp << SHIFT_SCALE; xtime.tv_usec += tick + time_adjust_step + ltemp; } else xtime.tv_usec += tick + time_adjust_step; - if (time_adjust) - { + if (time_adjust) { /* We are doing an adjtime thing. * * Modify the value of the tick for next time. @@ -570,123 +898,240 @@ static void do_timer(int irq, struct pt_regs * regs) * in the range -tickadj .. +tickadj */ if (time_adjust > tickadj) - time_adjust_step = tickadj; + time_adjust_step = tickadj; else if (time_adjust < -tickadj) - time_adjust_step = -tickadj; + time_adjust_step = -tickadj; else - time_adjust_step = time_adjust; + time_adjust_step = time_adjust; /* Reduce by this step the amount of time left */ time_adjust -= time_adjust_step; } else time_adjust_step = 0; +} + +/* + * Using a loop looks inefficient, but "ticks" is + * usually just one (we shouldn't be losing ticks, + * we're doing this this way mainly for interrupt + * latency reasons, not because we think we'll + * have lots of lost timer ticks + */ +static void update_wall_time(unsigned long ticks) +{ + do { + ticks--; + update_wall_time_one_tick(); + } while (ticks); if (xtime.tv_usec >= 1000000) { xtime.tv_usec -= 1000000; xtime.tv_sec++; second_overflow(); } +} - /* If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be - * called as close as possible to 500 ms before the new second starts. - */ - if (time_status != TIME_BAD && xtime.tv_sec > last_rtc_update + 660 && - xtime.tv_usec > 500000 - (tick >> 1) && - xtime.tv_usec < 500000 + (tick >> 1)) - if (set_rtc_mmss(xtime.tv_sec) == 0) - last_rtc_update = xtime.tv_sec; - else - last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */ - - jiffies++; - calc_load(); - if (user_mode(regs)) { - current->utime++; - if (current != task[0]) { - if (current->priority < 15) - kstat.cpu_nice++; - else - kstat.cpu_user++; +static inline void do_process_times(struct task_struct *p, + unsigned long user, unsigned long system) +{ + long psecs; + + p->utime += user; + p->stime += system; + + psecs = (p->stime + p->utime) / HZ; + if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) { + /* Send SIGXCPU every second.. */ + if (psecs * HZ == p->stime + p->utime) + send_sig(SIGXCPU, p, 1); + /* and SIGKILL when we go over max.. */ + if (psecs > p->rlim[RLIMIT_CPU].rlim_max) + send_sig(SIGKILL, p, 1); + } +} + +static inline void do_it_virt(struct task_struct * p, unsigned long ticks) +{ + unsigned long it_virt = p->it_virt_value; + + if (it_virt) { + if (it_virt <= ticks) { + it_virt = ticks + p->it_virt_incr; + send_sig(SIGVTALRM, p, 1); } - /* Update ITIMER_VIRT for current task if not in a system call */ - if (current->it_virt_value && !(--current->it_virt_value)) { - current->it_virt_value = current->it_virt_incr; - send_sig(SIGVTALRM,current,1); + p->it_virt_value = it_virt - ticks; + } +} + +static inline void do_it_prof(struct task_struct * p, unsigned long ticks) +{ + unsigned long it_prof = p->it_prof_value; + + if (it_prof) { + if (it_prof <= ticks) { + it_prof = ticks + p->it_prof_incr; + send_sig(SIGPROF, p, 1); } - } else { - current->stime++; - if(current != task[0]) - kstat.cpu_system++; -#ifdef CONFIG_PROFILE - if (prof_buffer && current != task[0]) { - extern int _stext; - unsigned long eip = regs->eip - (unsigned long) &_stext; - eip >>= CONFIG_PROFILE_SHIFT; - if (eip < prof_len) - prof_buffer[eip]++; + p->it_prof_value = it_prof - ticks; + } +} + +static __inline__ void update_one_process(struct task_struct *p, + unsigned long ticks, unsigned long user, unsigned long system) +{ + do_process_times(p, user, system); + do_it_virt(p, user); + do_it_prof(p, ticks); +} + +static void update_process_times(unsigned long ticks, unsigned long system) +{ +#ifndef __SMP__ + struct task_struct * p = current; + unsigned long user = ticks - system; + if (p->pid) { + p->counter -= ticks; + if (p->counter < 0) { + p->counter = 0; + need_resched = 1; } -#endif + if (p->priority < DEF_PRIORITY) + kstat.cpu_nice += user; + else + kstat.cpu_user += user; + kstat.cpu_system += system; } - /* - * check the cpu time limit on the process. - */ - if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) && - (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max)) - send_sig(SIGKILL, current, 1); - if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) && - (((current->stime + current->utime) % HZ) == 0)) { - psecs = (current->stime + current->utime) / HZ; - /* send when equal */ - if (psecs == current->rlim[RLIMIT_CPU].rlim_cur) - send_sig(SIGXCPU, current, 1); - /* and every five seconds thereafter. */ - else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) && - ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0) - send_sig(SIGXCPU, current, 1); - } - - if (current != task[0] && 0 > --current->counter) { - current->counter = 0; - need_resched = 1; + update_one_process(p, ticks, user, system); +#else + int cpu,j; + cpu = smp_processor_id(); + for (j=0;j<smp_num_cpus;j++) + { + int i = cpu_logical_map[j]; + struct task_struct *p; + +#ifdef __SMP_PROF__ + if (test_bit(i,&smp_idle_map)) + smp_idle_count[i]++; +#endif + p = current_set[i]; + /* + * Do we have a real process? + */ + if (p->pid) { + /* assume user-mode process */ + unsigned long utime = ticks; + unsigned long stime = 0; + if (cpu == i) { + utime = ticks-system; + stime = system; + } else if (smp_proc_in_lock[j]) { + utime = 0; + stime = ticks; + } + update_one_process(p, ticks, utime, stime); + + if (p->priority < DEF_PRIORITY) + kstat.cpu_nice += utime; + else + kstat.cpu_user += utime; + kstat.cpu_system += stime; + + p->counter -= ticks; + if (p->counter >= 0) + continue; + p->counter = 0; + } else { + /* + * Idle processor found, do we have anything + * we could run? + */ + if (!(0x7fffffff & smp_process_available)) + continue; + } + /* Ok, we should reschedule, do the magic */ + if (i==cpu) + need_resched = 1; + else + smp_message_pass(i, MSG_RESCHEDULE, 0L, 0); } - /* Update ITIMER_PROF for the current task */ - if (current->it_prof_value && !(--current->it_prof_value)) { - current->it_prof_value = current->it_prof_incr; - send_sig(SIGPROF,current,1); +#endif +} + +static unsigned long lost_ticks = 0; +static unsigned long lost_ticks_system = 0; + +static inline void update_times(void) +{ + unsigned long ticks; + + ticks = xchg(&lost_ticks, 0); + + if (ticks) { + unsigned long system; + + system = xchg(&lost_ticks_system, 0); + calc_load(ticks); + update_wall_time(ticks); + update_process_times(ticks, system); } - for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) { - if (mask > timer_active) - break; - if (!(mask & timer_active)) - continue; - if (tp->expires > jiffies) - continue; - mark_bh(TIMER_BH); +} + +static void timer_bh(void) +{ + update_times(); + run_old_timers(); + run_timer_list(); +} + +void do_timer(struct pt_regs * regs) +{ + (*(unsigned long *)&jiffies)++; + lost_ticks++; + mark_bh(TIMER_BH); + if (!user_mode(regs)) { + lost_ticks_system++; + if (prof_buffer && current->pid) { + extern int _stext; + unsigned long ip = instruction_pointer(regs); + ip -= (unsigned long) &_stext; + ip >>= prof_shift; + if (ip < prof_len) + prof_buffer[ip]++; + } } - cli(); - itimer_ticks++; - if (itimer_ticks > itimer_next) - need_resched = 1; - if (timer_head.next->expires < jiffies) - mark_bh(TIMER_BH); - if (tq_timer != &tq_last) + if (tq_timer) mark_bh(TQUEUE_BH); - sti(); } -asmlinkage int sys_alarm(long seconds) +#ifndef __alpha__ + +/* + * For backwards compatibility? This can be done in libc so Alpha + * and all newer ports shouldn't need it. + */ +asmlinkage unsigned int sys_alarm(unsigned int seconds) { struct itimerval it_new, it_old; + unsigned int oldalarm; it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; it_new.it_value.tv_sec = seconds; it_new.it_value.tv_usec = 0; _setitimer(ITIMER_REAL, &it_new, &it_old); - return(it_old.it_value.tv_sec + (it_old.it_value.tv_usec / 1000000)); + oldalarm = it_old.it_value.tv_sec; + /* ehhh.. We can't return 0 if we have an alarm pending.. */ + /* And we'd better return too much than too little anyway */ + if (it_old.it_value.tv_usec) + oldalarm++; + return oldalarm; } +/* + * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this + * should be moved into arch/i386 instead? + */ asmlinkage int sys_getpid(void) { return current->pid; @@ -717,47 +1162,286 @@ asmlinkage int sys_getegid(void) return current->egid; } -asmlinkage int sys_nice(long increment) +/* + * This has been replaced by sys_setpriority. Maybe it should be + * moved into the arch dependent tree for those ports that require + * it for backward compatibility? + */ +asmlinkage int sys_nice(int increment) { - int newprio; - - if (increment < 0 && !suser()) - return -EPERM; + unsigned long newprio; + int increase = 0; + + newprio = increment; + if (increment < 0) { + if (!suser()) + return -EPERM; + newprio = -increment; + increase = 1; + } + if (newprio > 40) + newprio = 40; + /* + * do a "normalization" of the priority (traditionally + * unix nice values are -20..20, linux doesn't really + * use that kind of thing, but uses the length of the + * timeslice instead (default 150 msec). The rounding is + * why we want to avoid negative values. + */ + newprio = (newprio * DEF_PRIORITY + 10) / 20; + increment = newprio; + if (increase) + increment = -increment; newprio = current->priority - increment; - if (newprio < 1) + if ((signed) newprio < 1) newprio = 1; - if (newprio > 35) - newprio = 35; + if (newprio > DEF_PRIORITY*2) + newprio = DEF_PRIORITY*2; current->priority = newprio; return 0; } +#endif + +static struct task_struct *find_process_by_pid(pid_t pid) +{ + struct task_struct *p; + + p = current; + if (pid) { + for_each_task(p) { + if (p->pid == pid) + goto found; + } + p = NULL; + } +found: + return p; +} + +static int setscheduler(pid_t pid, int policy, + struct sched_param *param) +{ + struct sched_param lp; + struct task_struct *p; + + if (!param || pid < 0) + return -EINVAL; + + if (copy_from_user(&lp, param, sizeof(struct sched_param))) + return -EFAULT; + + p = find_process_by_pid(pid); + if (!p) + return -ESRCH; + + if (policy < 0) + policy = p->policy; + else if (policy != SCHED_FIFO && policy != SCHED_RR && + policy != SCHED_OTHER) + return -EINVAL; + + /* + * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid + * priority for SCHED_OTHER is 0. + */ + if (lp.sched_priority < 0 || lp.sched_priority > 99) + return -EINVAL; + if ((policy == SCHED_OTHER) != (lp.sched_priority == 0)) + return -EINVAL; + + if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser()) + return -EPERM; + if ((current->euid != p->euid) && (current->euid != p->uid) && + !suser()) + return -EPERM; + + p->policy = policy; + p->rt_priority = lp.sched_priority; + cli(); + if (p->next_run) + move_last_runqueue(p); + sti(); + schedule(); + + return 0; +} + +asmlinkage int sys_sched_setscheduler(pid_t pid, int policy, + struct sched_param *param) +{ + return setscheduler(pid, policy, param); +} + +asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param) +{ + return setscheduler(pid, -1, param); +} + +asmlinkage int sys_sched_getscheduler(pid_t pid) +{ + struct task_struct *p; + + if (pid < 0) + return -EINVAL; + + p = find_process_by_pid(pid); + if (!p) + return -ESRCH; + + return p->policy; +} + +asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param) +{ + struct task_struct *p; + struct sched_param lp; + + if (!param || pid < 0) + return -EINVAL; + + p = find_process_by_pid(pid); + if (!p) + return -ESRCH; + + lp.sched_priority = p->rt_priority; + return copy_to_user(param, &lp, sizeof(struct sched_param)) ? -EFAULT : 0; +} + +asmlinkage int sys_sched_yield(void) +{ + cli(); + move_last_runqueue(current); + sti(); + return 0; +} + +asmlinkage int sys_sched_get_priority_max(int policy) +{ + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: + return 99; + case SCHED_OTHER: + return 0; + } + + return -EINVAL; +} + +asmlinkage int sys_sched_get_priority_min(int policy) +{ + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: + return 1; + case SCHED_OTHER: + return 0; + } + + return -EINVAL; +} + +asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval) +{ + struct timespec t; + + t.tv_sec = 0; + t.tv_nsec = 0; /* <-- Linus, please fill correct value in here */ + return -ENOSYS; /* and then delete this line. Thanks! */ + return copy_to_user(interval, &t, sizeof(struct timespec)) ? -EFAULT : 0; +} + +/* + * change timeval to jiffies, trying to avoid the + * most obvious overflows.. + */ +static unsigned long timespectojiffies(struct timespec *value) +{ + unsigned long sec = (unsigned) value->tv_sec; + long nsec = value->tv_nsec; + + if (sec > (LONG_MAX / HZ)) + return LONG_MAX; + nsec += 1000000000L / HZ - 1; + nsec /= 1000000000L / HZ; + return HZ * sec + nsec; +} + +static void jiffiestotimespec(unsigned long jiffies, struct timespec *value) +{ + value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ); + value->tv_sec = jiffies / HZ; + return; +} + +asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) +{ + int error; + struct timespec t; + unsigned long expire; + + error = copy_from_user(&t, rqtp, sizeof(struct timespec)); + if (error) + return -EFAULT; + + if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0) + return -EINVAL; + + if (t.tv_sec == 0 && t.tv_nsec <= 2000000L && + current->policy != SCHED_OTHER) { + /* + * Short delay requests up to 2 ms will be handled with + * high precision by a busy wait for all real-time processes. + */ + udelay((t.tv_nsec + 999) / 1000); + return 0; + } + + expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies; + current->timeout = expire; + current->state = TASK_INTERRUPTIBLE; + schedule(); + + if (expire > jiffies) { + if (rmtp) { + jiffiestotimespec(expire - jiffies - + (expire > jiffies + 1), &t); + if (copy_to_user(rmtp, &t, sizeof(struct timespec))) + return -EFAULT; + } + return -EINTR; + } + + return 0; +} + static void show_task(int nr,struct task_struct * p) { unsigned long free; - static char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" }; + static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" }; printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr); if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *)) printk(stat_nam[p->state]); else printk(" "); -#ifdef __i386__ +#if ((~0UL) == 0xffffffff) if (p == current) printk(" current "); else - printk(" %08lX ", ((unsigned long *)p->tss.esp)[3]); -#elif defined (__mips__) + printk(" %08lX ", thread_saved_pc(&p->tss)); +#else if (p == current) - printk(" current "); + printk(" current task "); else - printk(" "); + printk(" %016lx ", thread_saved_pc(&p->tss)); #endif - for (free = 1; free < 1024 ; free++) { + for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) { if (((unsigned long *)p->kernel_stack_page)[free]) break; } - printk("%5lu %5d %6d ", free << 2, p->pid, p->p_pptr->pid); + printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid); if (p->p_cptr) printk("%5d ", p->p_cptr->pid); else @@ -776,8 +1460,15 @@ void show_state(void) { int i; - printk(" free sibling\n"); +#if ((~0UL) == 0xffffffff) + printk("\n" + " free sibling\n"); printk(" task PC stack pid father child younger older\n"); +#else + printk("\n" + " free sibling\n"); + printk(" task PC stack pid father child younger older\n"); +#endif for (i=0 ; i<NR_TASKS ; i++) if (task[i]) show_task(i,task[i]); @@ -785,12 +1476,19 @@ void show_state(void) void sched_init(void) { - bh_base[TIMER_BH].routine = timer_bh; - bh_base[TQUEUE_BH].routine = tqueue_bh; - bh_base[IMMEDIATE_BH].routine = immediate_bh; - if (request_irq(TIMER_IRQ, do_timer, 0, "timer") != 0) - panic("Could not allocate timer IRQ!"); - enable_bh(TIMER_BH); - enable_bh(TQUEUE_BH); - enable_bh(IMMEDIATE_BH); + /* + * We have to do a little magic to get the first + * process right in SMP mode. + */ + int cpu=smp_processor_id(); +#ifndef __SMP__ + current_set[cpu]=&init_task; +#else + init_task.processor=cpu; + for(cpu = 0; cpu < NR_CPUS; cpu++) + current_set[cpu] = &init_task; +#endif + init_bh(TIMER_BH, timer_bh); + init_bh(TQUEUE_BH, tqueue_bh); + init_bh(IMMEDIATE_BH, immediate_bh); } |