diff options
Diffstat (limited to 'arch/i386/kernel/smp.c')
-rw-r--r-- | arch/i386/kernel/smp.c | 486 |
1 files changed, 24 insertions, 462 deletions
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 9acf81556..05e0d1d23 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -2,7 +2,7 @@ * Intel SMP support routines. * * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> - * (c) 1998-99 Ingo Molnar <mingo@redhat.com> + * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> * * This code is released under the GNU public license version 2 or * later. @@ -11,18 +11,18 @@ #include <linux/init.h> #include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/kernel_stat.h> -#include <linux/smp_lock.h> #include <linux/irq.h> - #include <linux/delay.h> +#include <linux/spinlock.h> +#include <linux/smp_lock.h> +#include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> + #include <asm/mtrr.h> #include <asm/pgalloc.h> /* - * Some notes on processor bugs: + * Some notes on x86 processor bugs affecting SMP operation: * * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. * The Linux implications for SMP are handled as follows: @@ -381,7 +381,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) static inline void do_flush_tlb_all_local(void) { - local_flush_tlb(); + __flush_tlb_all(); if (!current->mm && current->active_mm) { unsigned long cpu = smp_processor_id(); @@ -397,9 +397,7 @@ static void flush_tlb_all_ipi(void* info) void flush_tlb_all(void) { - if (cpu_online_map ^ (1 << smp_processor_id())) - while (smp_call_function (flush_tlb_all_ipi,0,0,1) == -EBUSY) - mb(); + smp_call_function (flush_tlb_all_ipi,0,1,1); do_flush_tlb_all_local(); } @@ -438,50 +436,44 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, * [SUMMARY] Run a function on all other CPUs. * <func> The function to run. This must be fast and non-blocking. * <info> An arbitrary pointer to pass to the function. - * <nonatomic> If true, we might schedule away to lock the mutex + * <nonatomic> currently unused. * <wait> If true, wait (atomically) until function has completed on other CPUs. * [RETURNS] 0 on success, else a negative status code. Does not return until * remote CPUs are nearly ready to execute <<func>> or are or have executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler, you may call it from a bottom half handler. */ { struct call_data_struct data; int ret, cpus = smp_num_cpus-1; - static DECLARE_MUTEX(lock); - unsigned long timeout; + static spinlock_t lock = SPIN_LOCK_UNLOCKED; - if (nonatomic) - down(&lock); - else - if (down_trylock(&lock)) - return -EBUSY; + if(cpus == 0) + return 0; - call_data = &data; data.func = func; data.info = info; atomic_set(&data.started, 0); data.wait = wait; if (wait) atomic_set(&data.finished, 0); - mb(); + spin_lock_bh(&lock); + call_data = &data; /* Send a message to all other CPUs and wait for them to respond */ send_IPI_allbutself(CALL_FUNCTION_VECTOR); /* Wait for response */ - timeout = jiffies + HZ; - while ((atomic_read(&data.started) != cpus) - && time_before(jiffies, timeout)) + /* FIXME: lock-up detection, backtrace on lock-up */ + while(atomic_read(&data.started) != cpus) barrier(); - ret = -ETIMEDOUT; - if (atomic_read(&data.started) != cpus) - goto out; + ret = 0; if (wait) while (atomic_read(&data.finished) != cpus) barrier(); -out: - call_data = NULL; - up(&lock); + spin_unlock_bh(&lock); return 0; } @@ -504,14 +496,12 @@ static void stop_this_cpu (void * dummy) void smp_send_stop(void) { - unsigned long flags; + smp_call_function(stop_this_cpu, NULL, 1, 0); + smp_num_cpus = 1; - __save_flags(flags); __cli(); - smp_call_function(stop_this_cpu, NULL, 1, 0); disable_local_APIC(); - __restore_flags(flags); - + __sti(); } /* @@ -561,431 +551,3 @@ asmlinkage void smp_call_function_interrupt(void) atomic_inc(&call_data->finished); } -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -asmlinkage void smp_spurious_interrupt(void) -{ - ack_APIC_irq(); - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - printk("spurious APIC interrupt on CPU#%d, should never happen.\n", - smp_processor_id()); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ - -static spinlock_t err_lock = SPIN_LOCK_UNLOCKED; - -asmlinkage void smp_error_interrupt(void) -{ - unsigned long v; - - spin_lock(&err_lock); - - v = apic_read(APIC_ESR); - printk("APIC error interrupt on CPU#%d, should never happen.\n", - smp_processor_id()); - printk("... APIC ESR0: %08lx\n", v); - - apic_write(APIC_ESR, 0); - v |= apic_read(APIC_ESR); - printk("... APIC ESR1: %08lx\n", v); - /* - * Be a bit more verbose. (multiple bits can be set) - */ - if (v & 0x01) - printk("... bit 0: APIC Send CS Error (hw problem).\n"); - if (v & 0x02) - printk("... bit 1: APIC Receive CS Error (hw problem).\n"); - if (v & 0x04) - printk("... bit 2: APIC Send Accept Error.\n"); - if (v & 0x08) - printk("... bit 3: APIC Receive Accept Error.\n"); - if (v & 0x10) - printk("... bit 4: Reserved!.\n"); - if (v & 0x20) - printk("... bit 5: Send Illegal Vector (kernel bug).\n"); - if (v & 0x40) - printk("... bit 6: Received Illegal Vector.\n"); - if (v & 0x80) - printk("... bit 7: Illegal Register Address.\n"); - - ack_APIC_irq(); - - irq_err_count++; - - spin_unlock(&err_lock); -} - -/* - * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts - * per second. We assume that the caller has already set up the local - * APIC. - * - * The APIC timer is not exactly sync with the external timer chip, it - * closely follows bus clocks. - */ - -int prof_multiplier[NR_CPUS] = { 1, }; -int prof_old_multiplier[NR_CPUS] = { 1, }; -int prof_counter[NR_CPUS] = { 1, }; - -/* - * The timer chip is already set up at HZ interrupts per second here, - * but we do not accept timer interrupts yet. We only allow the BP - * to calibrate. - */ -static unsigned int __init get_8254_timer_count(void) -{ - extern rwlock_t xtime_lock; - unsigned long flags; - - unsigned int count; - - write_lock_irqsave(&xtime_lock, flags); - - outb_p(0x00, 0x43); - count = inb_p(0x40); - count |= inb_p(0x40) << 8; - - write_unlock_irqrestore(&xtime_lock, flags); - - return count; -} - -void __init wait_8254_wraparound(void) -{ - unsigned int curr_count, prev_count=~0; - int delta; - - curr_count = get_8254_timer_count(); - - do { - prev_count = curr_count; - curr_count = get_8254_timer_count(); - delta = curr_count-prev_count; - - /* - * This limit for delta seems arbitrary, but it isn't, it's - * slightly above the level of error a buggy Mercury/Neptune - * chipset timer can cause. - */ - - } while (delta<300); -} - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ - -#define APIC_DIVISOR 16 - -void __setup_APIC_LVTT(unsigned int clocks) -{ - unsigned int lvtt1_value, tmp_value; - - tmp_value = apic_read(APIC_LVTT); - lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) | - APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - apic_write(APIC_LVTT, lvtt1_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); - - tmp_value = apic_read(APIC_TMICT); - apic_write(APIC_TMICT, clocks/APIC_DIVISOR); -} - -void setup_APIC_timer(void * data) -{ - unsigned int clocks = (unsigned int) data, slice, t0, t1, nr; - unsigned long flags; - int delta; - - __save_flags(flags); - __sti(); - /* - * ok, Intel has some smart code in their APIC that knows - * if a CPU was in 'hlt' lowpower mode, and this increases - * its APIC arbitration priority. To avoid the external timer - * IRQ APIC event being in synchron with the APIC clock we - * introduce an interrupt skew to spread out timer events. - * - * The number of slices within a 'big' timeslice is smp_num_cpus+1 - */ - - slice = clocks / (smp_num_cpus+1); - nr = cpu_number_map[smp_processor_id()] + 1; - printk("cpu: %d, clocks: %d, slice: %d, nr: %d.\n", - smp_processor_id(), clocks, slice, nr); - /* - * Wait for IRQ0's slice: - */ - wait_8254_wraparound(); - - __setup_APIC_LVTT(clocks); - - t0 = apic_read(APIC_TMCCT)*APIC_DIVISOR; - do { - t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR; - delta = (int)(t0 - t1 - slice*nr); - } while (delta < 0); - - __setup_APIC_LVTT(clocks); - - printk("CPU%d<C0:%d,C:%d,D:%d,S:%d,C:%d>\n", - smp_processor_id(), t0, t1, delta, slice, clocks); - - __restore_flags(flags); -} - -/* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. - */ - -int __init calibrate_APIC_clock(void) -{ - unsigned long long t1 = 0, t2 = 0; - long tt1, tt2; - long result; - int i; - const int LOOPS = HZ/10; - - printk("calibrating APIC timer ... "); - - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - */ - __setup_APIC_LVTT(1000000000); - - /* - * The timer chip counts down to zero. Let's wait - * for a wraparound to start exact measurement: - * (the current tick might have been already half done) - */ - - wait_8254_wraparound(); - - /* - * We wrapped around just now. Let's start: - */ - if (cpu_has_tsc) - rdtscll(t1); - tt1 = apic_read(APIC_TMCCT); - - /* - * Let's wait LOOPS wraprounds: - */ - for (i = 0; i < LOOPS; i++) - wait_8254_wraparound(); - - tt2 = apic_read(APIC_TMCCT); - if (cpu_has_tsc) - rdtscll(t2); - - /* - * The APIC bus clock counter is 32 bits only, it - * might have overflown, but note that we use signed - * longs, thus no extra care needed. - * - * underflown to be exact, as the timer counts down ;) - */ - - result = (tt1-tt2)*APIC_DIVISOR/LOOPS; - - if (cpu_has_tsc) - printk("\n..... CPU clock speed is %ld.%04ld MHz.\n", - ((long)(t2-t1)/LOOPS)/(1000000/HZ), - ((long)(t2-t1)/LOOPS)%(1000000/HZ)); - - printk("..... host bus clock speed is %ld.%04ld MHz.\n", - result/(1000000/HZ), - result%(1000000/HZ)); - - return result; -} - -static unsigned int calibration_result; - -void __init setup_APIC_clocks(void) -{ - unsigned long flags; - - __save_flags(flags); - __cli(); - - calibration_result = calibrate_APIC_clock(); - - smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1); - - /* - * Now set up the timer for real. - */ - setup_APIC_timer((void *)calibration_result); - - __restore_flags(flags); -} - -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) -{ - int i; - - /* - * Sanity check. [at least 500 APIC cycles should be - * between APIC interrupts as a rule of thumb, to avoid - * irqs flooding us] - */ - if ( (!multiplier) || (calibration_result/multiplier < 500)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. At that time they also adjust their APIC timers - * accordingly. - */ - for (i = 0; i < NR_CPUS; ++i) - prof_multiplier[i] = multiplier; - - return 0; -} - -#undef APIC_DIVISOR - -/* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ - -inline void smp_local_timer_interrupt(struct pt_regs * regs) -{ - int user = (user_mode(regs) != 0); - int cpu = smp_processor_id(); - - /* - * The profiling function is SMP safe. (nothing can mess - * around with "current", and the profiling counters are - * updated with atomic operations). This is especially - * useful with a profiling multiplier != 1 - */ - if (!user) - x86_do_profile(regs->eip); - - if (--prof_counter[cpu] <= 0) { - int system = 1 - user; - struct task_struct * p = current; - - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - prof_counter[cpu] = prof_multiplier[cpu]; - if (prof_counter[cpu] != prof_old_multiplier[cpu]) { - __setup_APIC_LVTT(calibration_result/prof_counter[cpu]); - prof_old_multiplier[cpu] = prof_counter[cpu]; - } - - /* - * After doing the above, we need to make like - * a normal interrupt - otherwise timer interrupts - * ignore the global interrupt lock, which is the - * WrongThing (tm) to do. - */ - - irq_enter(cpu, 0); - update_one_process(p, 1, user, system, cpu); - if (p->pid) { - p->counter -= 1; - if (p->counter <= 0) { - p->counter = 0; - p->need_resched = 1; - } - if (p->priority < DEF_PRIORITY) { - kstat.cpu_nice += user; - kstat.per_cpu_nice[cpu] += user; - } else { - kstat.cpu_user += user; - kstat.per_cpu_user[cpu] += user; - } - kstat.cpu_system += system; - kstat.per_cpu_system[cpu] += system; - - } - irq_exit(cpu, 0); - } - - /* - * We take the 'long' return path, and there every subsystem - * grabs the apropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesnt support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -unsigned int apic_timer_irqs [NR_CPUS] = { 0, }; - -void smp_apic_timer_interrupt(struct pt_regs * regs) -{ - /* - * the NMI deadlock-detector uses this. - */ - apic_timer_irqs[smp_processor_id()]++; - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - smp_local_timer_interrupt(regs); -} - |