/* * Intel MP v1.1/v1.4 specification support routines for multi-pentium * hosts. * * (c) 1995 Alan Cox, CymruNET Ltd * Supported by Caldera http://www.caldera.com. * Much of the core SMP work is based on previous work by Thomas Radke, to * whom a great many thanks are extended. * * Thanks to Intel for making available several different Pentium and * Pentium Pro MP machines. * * This code is released under the GNU public license version 2 or * later. * * Fixes * Felix Koop : NR_CPUS used properly * Jose Renau : Handle single CPU case. * Alan Cox : By repeated request 8) - Total BogoMIP report. * Greg Wright : Fix for kernel stacks panic. * Erich Boleyn : MP v1.4 and additional changes. * Matthias Sattler : Changes for 2.1 kernel map. * Michel Lespinasse : Changes for 2.1 kernel map. * Michael Chastain : Change trampoline.S to gnu as. * Alan Cox : Dumb bug: 'B' step PPro's are fine * Ingo Molnar : Added APIC timers, based on code * from Jose Renau */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define __KERNEL_SYSCALLS__ #include #include "irq.h" extern unsigned long start_kernel, _etext; extern void update_one_process( struct task_struct *p, unsigned long ticks, unsigned long user, unsigned long system); /* * Some notes on processor bugs: * * Pentium and Pentium Pro (and all CPU's) have bugs. The Linux issues * for SMP are handled as follows. * * Pentium Pro * Occasional delivery of 'spurious interrupt' as trap #16. This * is very very rare. The kernel logs the event and recovers * * Pentium * There is a marginal case where REP MOVS on 100MHz SMP * machines with B stepping processors can fail. XXX should provide * an L1cache=Writethrough or L1cache=off option. * * B stepping CPU's may hang. There are hardware work arounds * for this. We warn about it in case your board doesnt have the work * arounds. Basically thats so I can tell anyone with a B stepping * CPU and SMP problems "tough". * * Specific items [From Pentium Processor Specification Update] * * 1AP. Linux doesn't use remote read * 2AP. Linux doesn't trust APIC errors * 3AP. We work around this * 4AP. Linux never generated 3 interrupts of the same priority * to cause a lost local interrupt. * 5AP. Remote read is never used * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX * 11AP. Linux read the APIC between writes to avoid this, as per * the documentation. Make sure you preserve this as it affects * the C stepping chips too. * * If this sounds worrying believe me these bugs are ___RARE___ and * there's about nothing of note with C stepping upwards. */ /* * Why isn't this somewhere standard ?? */ extern __inline int max(int a,int b) { if(a>b) return a; return b; } static int smp_b_stepping = 0; /* Set if we find a B stepping CPU */ static int max_cpus = -1; /* Setup configured maximum number of CPUs to activate */ int smp_found_config=0; /* Have we found an SMP box */ unsigned long cpu_present_map = 0; /* Bitmask of existing CPU's */ int smp_num_cpus = 1; /* Total count of live CPU's */ int smp_threads_ready=0; /* Set when the idlers are all forked */ volatile int cpu_number_map[NR_CPUS]; /* which CPU maps to which logical number */ volatile int cpu_logical_map[NR_CPUS]; /* which logical number maps to which CPU */ volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */ volatile unsigned long smp_invalidate_needed; /* Used for the invalidate map that's also checked in the spinlock */ volatile unsigned long kstack_ptr; /* Stack vector for booting CPU's */ struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per cpu bogomips and other parameters */ static unsigned int num_processors = 1; /* Internal processor count */ static unsigned long io_apic_addr = 0xFEC00000; /* Address of the I/O apic (not yet used) */ unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */ static int smp_activated = 0; /* Tripped once we need to start cross invalidating */ int apic_version[NR_CPUS]; /* APIC version number */ static volatile int smp_commenced=0; /* Tripped when we start scheduling */ unsigned long apic_addr = 0xFEE00000; /* Address of APIC (defaults to 0xFEE00000) */ unsigned long nlong = 0; /* dummy used for apic_reg address + 0x20 */ unsigned char *apic_reg=((unsigned char *)(&nlong))-0x20;/* Later set to the ioremap() of the APIC */ unsigned long apic_retval; /* Just debugging the assembler.. */ static volatile unsigned char smp_cpu_in_msg[NR_CPUS]; /* True if this processor is sending an IPI */ volatile unsigned long kernel_flag=0; /* Kernel spinlock */ volatile unsigned char active_kernel_processor = NO_PROC_ID; /* Processor holding kernel spinlock */ volatile unsigned long kernel_counter=0; /* Number of times the processor holds the lock */ volatile unsigned long syscall_count=0; /* Number of times the processor holds the syscall lock */ volatile unsigned long ipi_count; /* Number of IPI's delivered */ #ifdef __SMP_PROF__ volatile unsigned long smp_spins[NR_CPUS]={0}; /* Count interrupt spins */ volatile unsigned long smp_spins_syscall[NR_CPUS]={0}; /* Count syscall spins */ volatile unsigned long smp_spins_syscall_cur[NR_CPUS]={0};/* Count spins for the actual syscall */ volatile unsigned long smp_spins_sys_idle[NR_CPUS]={0}; /* Count spins for sys_idle */ volatile unsigned long smp_idle_count[1+NR_CPUS]={0,}; /* Count idle ticks */ /* Count local APIC timer ticks */ volatile unsigned long smp_local_timer_ticks[1+NR_CPUS]={0,}; #endif #if defined (__SMP_PROF__) volatile unsigned long smp_idle_map=0; /* Map for idle processors */ #endif volatile unsigned long smp_proc_in_lock[NR_CPUS] = {0,};/* for computing process time */ volatile int smp_process_available=0; /*#define SMP_DEBUG*/ #ifdef SMP_DEBUG #define SMP_PRINTK(x) printk x #else #define SMP_PRINTK(x) #endif /* * Setup routine for controlling SMP activation * * Command-line option of "nosmp" or "maxcpus=0" will disable SMP * activation entirely (the MPS table probe still happens, though). * * Command-line option of "maxcpus=", where is an integer * greater than 0, limits the maximum number of CPUs activated in * SMP mode to . */ __initfunc(void smp_setup(char *str, int *ints)) { if (ints && ints[0] > 0) max_cpus = ints[1]; else max_cpus = 0; } static inline void ack_APIC_irq (void) { /* Clear the IPI */ /* Dummy read */ apic_read(APIC_SPIV); /* Docs say use 0 for future compatibility */ apic_write(APIC_EOI, 0); } /* * Checksum an MP configuration block. */ static int mpf_checksum(unsigned char *mp, int len) { int sum=0; while(len--) sum+=*mp++; return sum&0xFF; } /* * Processor encoding in an MP configuration block */ static char *mpc_family(int family,int model) { static char n[32]; static char *model_defs[]= { "80486DX","80486DX", "80486SX","80486DX/2 or 80487", "80486SL","Intel5X2(tm)", "Unknown","Unknown", "80486DX/4" }; if(family==0x6) return("Pentium(tm) Pro"); if(family==0x5) return("Pentium(tm)"); if(family==0x0F && model==0x0F) return("Special controller"); if(family==0x04 && model<9) return model_defs[model]; sprintf(n,"Unknown CPU [%d:%d]",family, model); return n; } /* * Read the MPC */ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc)) { char str[16]; int count=sizeof(*mpc); int apics=0; unsigned char *mpt=((unsigned char *)mpc)+count; if(memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { printk("Bad signature [%c%c%c%c].\n", mpc->mpc_signature[0], mpc->mpc_signature[1], mpc->mpc_signature[2], mpc->mpc_signature[3]); return 1; } if(mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { printk("Checksum error.\n"); return 1; } if(mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { printk("Bad Config Table version (%d)!!\n",mpc->mpc_spec); return 1; } memcpy(str,mpc->mpc_oem,8); str[8]=0; printk("OEM ID: %s ",str); memcpy(str,mpc->mpc_productid,12); str[12]=0; printk("Product ID: %s ",str); printk("APIC at: 0x%lX\n",mpc->mpc_lapic); /* set the local APIC address */ apic_addr = (unsigned long)phys_to_virt((unsigned long)mpc->mpc_lapic); /* * Now process the configuration blocks. */ while(countmpc_length) { switch(*mpt) { case MP_PROCESSOR: { struct mpc_config_processor *m= (struct mpc_config_processor *)mpt; if(m->mpc_cpuflag&CPU_ENABLED) { printk("Processor #%d %s APIC version %d\n", m->mpc_apicid, mpc_family((m->mpc_cpufeature& CPU_FAMILY_MASK)>>8, (m->mpc_cpufeature& CPU_MODEL_MASK)>>4), m->mpc_apicver); #ifdef SMP_DEBUG if(m->mpc_featureflag&(1<<0)) printk(" Floating point unit present.\n"); if(m->mpc_featureflag&(1<<7)) printk(" Machine Exception supported.\n"); if(m->mpc_featureflag&(1<<8)) printk(" 64 bit compare & exchange supported.\n"); if(m->mpc_featureflag&(1<<9)) printk(" Internal APIC present.\n"); #endif if(m->mpc_cpuflag&CPU_BOOTPROCESSOR) { SMP_PRINTK((" Bootup CPU\n")); boot_cpu_id=m->mpc_apicid; } else /* Boot CPU already counted */ num_processors++; if(m->mpc_apicid>NR_CPUS) printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS); else { cpu_present_map|=(1<mpc_apicid); apic_version[m->mpc_apicid]=m->mpc_apicver; } } mpt+=sizeof(*m); count+=sizeof(*m); break; } case MP_BUS: { struct mpc_config_bus *m= (struct mpc_config_bus *)mpt; memcpy(str,m->mpc_bustype,6); str[6]=0; SMP_PRINTK(("Bus #%d is %s\n", m->mpc_busid, str)); mpt+=sizeof(*m); count+=sizeof(*m); break; } case MP_IOAPIC: { struct mpc_config_ioapic *m= (struct mpc_config_ioapic *)mpt; if(m->mpc_flags&MPC_APIC_USABLE) { apics++; printk("I/O APIC #%d Version %d at 0x%lX.\n", m->mpc_apicid,m->mpc_apicver, m->mpc_apicaddr); io_apic_addr = (unsigned long)phys_to_virt(m->mpc_apicaddr); } mpt+=sizeof(*m); count+=sizeof(*m); break; } case MP_INTSRC: { struct mpc_config_intsrc *m= (struct mpc_config_intsrc *)mpt; mpt+=sizeof(*m); count+=sizeof(*m); break; } case MP_LINTSRC: { struct mpc_config_intlocal *m= (struct mpc_config_intlocal *)mpt; mpt+=sizeof(*m); count+=sizeof(*m); break; } } } if(apics>1) printk("Warning: Multiple APIC's not supported.\n"); return num_processors; } /* * Scan the memory blocks for an SMP configuration block. */ __initfunc(int smp_scan_config(unsigned long base, unsigned long length)) { unsigned long *bp=phys_to_virt(base); struct intel_mp_floating *mpf; SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n", bp,length)); if(sizeof(*mpf)!=16) printk("Error: MPF size\n"); while(length>0) { if(*bp==SMP_MAGIC_IDENT) { mpf=(struct intel_mp_floating *)bp; if(mpf->mpf_length==1 && !mpf_checksum((unsigned char *)bp,16) && (mpf->mpf_specification == 1 || mpf->mpf_specification == 4) ) { printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); if(mpf->mpf_feature2&(1<<7)) printk(" IMCR and PIC compatibility mode.\n"); else printk(" Virtual Wire compatibility mode.\n"); smp_found_config=1; /* * Now see if we need to read further. */ if(mpf->mpf_feature1!=0) { unsigned long cfg; /* * We need to know what the local * APIC id of the boot CPU is! */ /* * * HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK * * It's not just a crazy hack... ;-) */ /* * Standard page mapping * functions don't work yet. * We know that page 0 is not * used. Steal it for now! */ cfg=pg0[0]; pg0[0] = (apic_addr | 7); local_flush_tlb(); boot_cpu_id = GET_APIC_ID(*((volatile unsigned long *) APIC_ID)); /* * Give it back */ pg0[0]= cfg; local_flush_tlb(); /* * * END OF HACK END OF HACK END OF HACK END OF HACK END OF HACK * */ /* * 2 CPUs, numbered 0 & 1. */ cpu_present_map=3; num_processors=2; printk("I/O APIC at 0xFEC00000.\n"); printk("Bus#0 is "); } switch(mpf->mpf_feature1) { case 1: case 5: printk("ISA\n"); break; case 2: printk("EISA with no IRQ8 chaining\n"); break; case 6: case 3: printk("EISA\n"); break; case 4: case 7: printk("MCA\n"); break; case 0: break; default: printk("???\nUnknown standard configuration %d\n", mpf->mpf_feature1); return 1; } if(mpf->mpf_feature1>4) { printk("Bus #1 is PCI\n"); /* * Set local APIC version to * the integrated form. * It's initialized to zero * otherwise, representing * a discrete 82489DX. */ apic_version[0] = 0x10; apic_version[1] = 0x10; } /* * Read the physical hardware table. * Anything here will override the * defaults. */ if(mpf->mpf_physptr) smp_read_mpc((void *)mpf->mpf_physptr); /* * Now that the boot CPU id is known, * set some other information about it. */ nlong = boot_cpu_id<<24; /* Dummy 'self' for bootup */ cpu_logical_map[0] = boot_cpu_id; global_irq_holder = boot_cpu_id; current->processor = boot_cpu_id; printk("Processors: %d\n", num_processors); /* * Only use the first configuration found. */ return 1; } } bp+=4; length-=16; } return 0; } /* * Trampoline 80x86 program as an array. */ extern unsigned char trampoline_data []; extern unsigned char trampoline_end []; static unsigned char *trampoline_base; /* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller * has made sure it's suitably aligned. */ __initfunc(static unsigned long setup_trampoline(void)) { memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); return virt_to_phys(trampoline_base); } /* * We are called very early to get the low memory for the * SMP bootup trampoline page. */ __initfunc(unsigned long smp_alloc_memory(unsigned long mem_base)) { if (virt_to_phys((void *)mem_base) >= 0x9F000) panic("smp_alloc_memory: Insufficient low memory for kernel trampoline 0x%lx.\n", mem_base); trampoline_base = (void *)mem_base; return mem_base + PAGE_SIZE; } /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU */ __initfunc(void smp_store_cpu_info(int id)) { struct cpuinfo_x86 *c=&cpu_data[id]; c->hard_math=hard_math; /* Always assumed same currently */ c->x86=x86; c->x86_model=x86_model; c->x86_mask=x86_mask; /* * Mask B, Pentium, but not Pentium MMX */ if(x86_mask>=1 && x86_mask<=4 && x86==5 && (x86_model>=0&&x86_model<=3)) smp_b_stepping=1; /* Remember we have B step Pentia with bugs */ c->x86_capability=x86_capability; c->fdiv_bug=fdiv_bug; c->wp_works_ok=wp_works_ok; /* Always assumed the same currently */ c->hlt_works_ok=hlt_works_ok; c->have_cpuid=have_cpuid; c->udelay_val=loops_per_sec; strcpy(c->x86_vendor_id, x86_vendor_id); } /* * Architecture specific routine called by the kernel just before init is * fired off. This allows the BP to have everything in order [we hope]. * At the end of this all the AP's will hit the system scheduling and off * we go. Each AP will load the system gdt's and jump through the kernel * init into idle(). At this point the scheduler will one day take over * and give them jobs to do. smp_callin is a standard routine * we use to track CPU's as they power up. */ __initfunc(void smp_commence(void)) { /* * Lets the callin's below out of their loop. */ SMP_PRINTK(("Setting commenced=1, go go go\n")); smp_commenced=1; } __initfunc(void smp_callin(void)) { extern void calibrate_delay(void); int cpuid=GET_APIC_ID(apic_read(APIC_ID)); unsigned long l; /* * Activate our APIC */ SMP_PRINTK(("CALLIN %d %d\n",hard_smp_processor_id(), smp_processor_id())); l=apic_read(APIC_SPIV); l|=(1<<8); /* Enable */ apic_write(APIC_SPIV,l); /* * Set up our APIC timer. */ setup_APIC_clock (); sti(); /* * Get our bogomips. */ calibrate_delay(); SMP_PRINTK(("Stack at about %p\n",&cpuid)); /* * Save our processor parameters */ smp_store_cpu_info(cpuid); /* * Allow the master to continue. */ set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]); } static int cpucount = 0; extern int cpu_idle(void * unused); /* * Activate a secondary processor. */ __initfunc(int start_secondary(void *unused)) { smp_callin(); while (!smp_commenced) barrier(); return cpu_idle(NULL); } /* * Everything has been set up for the secondary * CPU's - they just need to reload everything * from the task structure */ __initfunc(void initialize_secondary(void)) { struct thread_struct * p = ¤t->tss; /* * We don't actually need to load the full TSS, * basically just the stack pointer and the eip. */ asm volatile("lldt %%ax": :"a" (p->ldt)); asm volatile("ltr %%ax": :"a" (p->tr)); asm volatile( "movl %0,%%esp\n\t" "jmp *%1" : :"r" (p->esp),"r" (p->eip)); } extern struct { void * esp; unsigned short ss; } stack_start; __initfunc(static void do_boot_cpu(int i)) { unsigned long cfg; pgd_t maincfg; struct task_struct *idle; unsigned long send_status, accept_status; int timeout, num_starts, j; unsigned long start_eip; /* * We need an idle process for each processor. */ kernel_thread(start_secondary, NULL, CLONE_PID); cpucount++; idle = task[cpucount]; if (!idle) panic("No idle process for CPU %d\n", i); idle->processor = i; cpu_logical_map[cpucount] = i; cpu_number_map[i] = cpucount; /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); printk("Booting processor %d eip %lx: ", i, start_eip); /* So we see what's up */ stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); /* * This grunge runs the startup process for * the targeted processor. */ SMP_PRINTK(("Setting warm reset code and vector.\n")); CMOS_WRITE(0xa, 0xf); local_flush_tlb(); SMP_PRINTK(("1.\n")); *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4; SMP_PRINTK(("2.\n")); *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf; SMP_PRINTK(("3.\n")); maincfg=swapper_pg_dir[0]; ((unsigned long *)swapper_pg_dir)[0]=0x102007; /* * Be paranoid about clearing APIC errors. */ if ( apic_version[i] & 0xF0 ) { apic_write(APIC_ESR, 0); accept_status = (apic_read(APIC_ESR) & 0xEF); } /* * Status is now clean */ send_status = 0; accept_status = 0; /* * Starting actual IPI sequence... */ SMP_PRINTK(("Asserting INIT.\n")); /* * Turn INIT on */ cfg=apic_read(APIC_ICR2); cfg&=0x00FFFFFF; apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ cfg=apic_read(APIC_ICR); cfg&=~0xCDFFF; /* Clear bits */ cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG | APIC_DEST_ASSERT | APIC_DEST_DM_INIT); apic_write(APIC_ICR, cfg); /* Send IPI */ udelay(200); SMP_PRINTK(("Deasserting INIT.\n")); cfg=apic_read(APIC_ICR2); cfg&=0x00FFFFFF; apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ cfg=apic_read(APIC_ICR); cfg&=~0xCDFFF; /* Clear bits */ cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG | APIC_DEST_DM_INIT); apic_write(APIC_ICR, cfg); /* Send IPI */ /* * Should we send STARTUP IPIs ? * * Determine this based on the APIC version. * If we don't have an integrated APIC, don't * send the STARTUP IPIs. */ if ( apic_version[i] & 0xF0 ) num_starts = 2; else num_starts = 0; /* * Run STARTUP IPI loop. */ for (j = 1; !(send_status || accept_status) && (j <= num_starts) ; j++) { SMP_PRINTK(("Sending STARTUP #%d.\n",j)); apic_write(APIC_ESR, 0); SMP_PRINTK(("After apic_write.\n")); /* * STARTUP IPI */ cfg=apic_read(APIC_ICR2); cfg&=0x00FFFFFF; apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ cfg=apic_read(APIC_ICR); cfg&=~0xCDFFF; /* Clear bits */ cfg |= (APIC_DEST_FIELD | APIC_DEST_DM_STARTUP | (start_eip >> 12)); /* Boot on the stack */ SMP_PRINTK(("Before start apic_write.\n")); apic_write(APIC_ICR, cfg); /* Kick the second */ SMP_PRINTK(("Startup point 1.\n")); timeout = 0; do { SMP_PRINTK(("Sleeping.\n")); udelay(1000000); udelay(10); } while ( (send_status = (apic_read(APIC_ICR) & 0x1000)) && (timeout++ < 1000)); udelay(200); accept_status = (apic_read(APIC_ESR) & 0xEF); } SMP_PRINTK(("After Startup.\n")); if (send_status) /* APIC never delivered?? */ printk("APIC never delivered???\n"); if (accept_status) /* Send accept error */ printk("APIC delivery error (%lx).\n", accept_status); if( !(send_status || accept_status) ) { for(timeout=0;timeout<50000;timeout++) { if(cpu_callin_map[0]&(1< cpucount+1)) { do_boot_cpu(i); } /* * Make sure we unmap all failed CPUs */ if (cpu_number_map[i] == -1) cpu_present_map &= ~(1 << i); } /* * Cleanup possible dangling ends... */ /* * Install writable page 0 entry. */ cfg = pg0[0]; pg0[0] = 3; /* writeable, present, addr 0 */ local_flush_tlb(); /* * Paranoid: Set warm reset code and vector here back * to default values. */ CMOS_WRITE(0, 0xf); *((volatile long *) phys_to_virt(0x467)) = 0; /* * Restore old page 0 entry. */ pg0[0] = cfg; local_flush_tlb(); /* * Allow the user to impress friends. */ SMP_PRINTK(("Before bogomips.\n")); if(cpucount==0) { printk("Error: only one processor found.\n"); cpu_present_map=(1<eip); if (!--prof_counter[cpu]) { int user=0,system=0; struct task_struct * p = current; /* * After doing the above, we need to make like * a normal interrupt - otherwise timer interrupts * ignore the global interrupt lock, which is the * WrongThing (tm) to do. */ if (user_mode(regs)) user=1; else system=1; irq_enter(cpu, 0); if (p->pid) { update_one_process(p, 1, user, system); p->counter -= 1; if (p->counter < 0) { p->counter = 0; need_resched = 1; } if (p->priority < DEF_PRIORITY) kstat.cpu_nice += user; else kstat.cpu_user += user; kstat.cpu_system += system; } else { #ifdef __SMP_PROF__ if (test_bit(cpu,&smp_idle_map)) smp_idle_count[cpu]++; #endif } prof_counter[cpu]=prof_multiplier[cpu]; irq_exit(cpu, 0); } #ifdef __SMP_PROF__ smp_local_timer_ticks[cpu]++; #endif /* * We take the 'long' return path, and there every subsystem * grabs the apropriate locks (kernel lock/ irq lock). * * we might want to decouple profiling from the 'long path', * and do the profiling totally in assembly. * * Currently this isnt too much of an issue (performancewise), * we can take more than 100K local irqs per second on a 100 MHz P5. */ } /* * Local APIC timer interrupt. This is the most natural way for doing * local interrupts, but local timer interrupts can be emulated by * broadcast interrupts too. [in case the hw doesnt support APIC timers] * * [ if a single-CPU system runs an SMP kernel then we call the local * interrupt as well. Thus we cannot inline the local irq ... ] */ void smp_apic_timer_interrupt(struct pt_regs * regs) { /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow, and we * want to be able to accept NMI tlb invalidates * during this time. */ ack_APIC_irq (); smp_local_timer_interrupt(regs); } /* * Reschedule call back */ asmlinkage void smp_reschedule_interrupt(void) { int cpu = smp_processor_id(); ack_APIC_irq(); /* * This looks silly, but we actually do need to wait * for the global interrupt lock. */ irq_enter(cpu, 0); need_resched=1; irq_exit(cpu, 0); } /* * Invalidate call-back */ asmlinkage void smp_invalidate_interrupt(void) { if (test_and_clear_bit(smp_processor_id(), &smp_invalidate_needed)) local_flush_tlb(); ack_APIC_irq (); } /* * CPU halt call-back */ asmlinkage void smp_stop_cpu_interrupt(void) { if (cpu_data[smp_processor_id()].hlt_works_ok) for(;;) __asm__("hlt"); for (;;) ; } /* * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts * per second. We assume that the caller has already set up the local * APIC at apic_addr. * * The APIC timer is not exactly sync with the external timer chip, it * closely follows bus clocks. */ #define RTDSC(x) __asm__ __volatile__ ( ".byte 0x0f,0x31" \ :"=a" (((unsigned long*)&x)[0]), \ "=d" (((unsigned long*)&x)[1])) /* * The timer chip is already set up at HZ interrupts per second here, * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. */ __initfunc(static unsigned int get_8254_timer_count (void)) { unsigned int count; outb_p(0x00, 0x43); count = inb_p(0x40); count |= inb_p(0x40) << 8; return count; } /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call * this function twice, once with a bogus timeout value, second * time for real. The other (noncalibrating) CPUs call this * function only once, with the real value. * * We are strictly in irqs off mode here, as we do not want to * get an APIC interrupt go off accidentally. * * We do reads before writes even if unnecessary, to get around the * APIC double write bug. */ #define APIC_DIVISOR 16 void setup_APIC_timer (unsigned int clocks) { unsigned long lvtt1_value; unsigned int tmp_value; /* * Unfortunately the local APIC timer cannot be set up into NMI * mode. With the IO APIC we can re-route the external timer * interrupt and broadcast it as an NMI to all CPUs, so no pain. * * NOTE: this trap vector (0x41) and the gate in * BUILD_SMP_TIMER_INTERRUPT should be the same ;) */ tmp_value = apic_read(APIC_LVTT); lvtt1_value = APIC_LVT_TIMER_PERIODIC | 0x41; apic_write(APIC_LVTT , lvtt1_value); /* * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 ) | APIC_TDR_DIV_16); tmp_value = apic_read(APIC_TMICT); apic_write(APIC_TMICT, clocks/APIC_DIVISOR); } __initfunc(void wait_8254_wraparound (void)) { unsigned int curr_count, prev_count=~0; int delta; curr_count = get_8254_timer_count(); do { prev_count = curr_count; curr_count = get_8254_timer_count(); delta = curr_count-prev_count; /* * This limit for delta seems arbitrary, but it isnt, it's * slightly above the level of error a buggy Mercury/Neptune * chipset timer can cause. */ } while (delta<300); } /* * In this function we calibrate APIC bus clocks to the external * timer. Unfortunately we cannot use jiffies and the timer irq * to calibrate, since some later bootup code depends on getting * the first irq? Ugh. * * We want to do the calibration only once since we * want to have local timer irqs syncron. CPUs connected * by the same APIC bus have the very same bus frequency. * And we want to have irqs off anyways, no accidental * APIC irq that way. */ __initfunc(int calibrate_APIC_clock (void)) { unsigned long long t1,t2; long tt1,tt2; long calibration_result; int i; printk("calibrating APIC timer ... "); /* * Put whatever arbitrary (but long enough) timeout * value into the APIC clock, we just want to get the * counter running for calibration. */ setup_APIC_timer(1000000000); /* * The timer chip counts down to zero. Lets wait * for a wraparound to start exact measurement: * (the current tick might have been already half done) */ wait_8254_wraparound (); /* * We wrapped around just now, lets start: */ RTDSC(t1); tt1=apic_read(APIC_TMCCT); #define LOOPS (HZ/10) /* * lets wait LOOPS wraprounds: */ for (i=0; i