summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/smp.c
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-04-29 21:13:14 +0000
committer <ralf@linux-mips.org>1997-04-29 21:13:14 +0000
commit19c9bba94152148523ba0f7ef7cffe3d45656b11 (patch)
tree40b1cb534496a7f1ca0f5c314a523c69f1fee464 /arch/i386/kernel/smp.c
parent7206675c40394c78a90e74812bbdbf8cf3cca1be (diff)
Import of Linux/MIPS 2.1.36
Diffstat (limited to 'arch/i386/kernel/smp.c')
-rw-r--r--arch/i386/kernel/smp.c790
1 files changed, 652 insertions, 138 deletions
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 05c80b711..a1590f500 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -19,6 +19,12 @@
* Alan Cox : By repeated request 8) - Total BogoMIP report.
* Greg Wright : Fix for kernel stacks panic.
* Erich Boleyn : MP v1.4 and additional changes.
+ * Matthias Sattler : Changes for 2.1 kernel map.
+ * Michel Lespinasse : Changes for 2.1 kernel map.
+ * Michael Chastain : Change trampoline.S to gnu as.
+ * Alan Cox : Dumb bug: 'B' step PPro's are fine
+ * Ingo Molnar : Added APIC timers, based on code
+ * from Jose Renau
*/
#include <linux/kernel.h>
@@ -31,11 +37,62 @@
#include <linux/mc146818rtc.h>
#include <asm/i82489.h>
#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
#include <asm/pgtable.h>
#include <asm/bitops.h>
#include <asm/pgtable.h>
#include <asm/smp.h>
+#include <asm/io.h>
+#include "irq.h"
+
+extern unsigned long start_kernel, _etext;
+extern void update_one_process( struct task_struct *p,
+ unsigned long ticks, unsigned long user,
+ unsigned long system);
+void setup_APIC_clock (void);
+
+/*
+ * Some notes on processor bugs:
+ *
+ * Pentium and Pentium Pro (and all CPU's) have bugs. The Linux issues
+ * for SMP are handled as follows.
+ *
+ * Pentium Pro
+ * Occasional delivery of 'spurious interrupt' as trap #16. This
+ * is very very rare. The kernel logs the event and recovers
+ *
+ * Pentium
+ * There is a marginal case where REP MOVS on 100MHz SMP
+ * machines with B stepping processors can fail. XXX should provide
+ * an L1cache=Writethrough or L1cache=off option.
+ *
+ * B stepping CPU's may hang. There are hardware work arounds
+ * for this. We warn about it in case your board doesnt have the work
+ * arounds. Basically thats so I can tell anyone with a B stepping
+ * CPU and SMP problems "tough".
+ *
+ * Specific items [From Pentium Processor Specification Update]
+ *
+ * 1AP. Linux doesn't use remote read
+ * 2AP. Linux doesn't trust APIC errors
+ * 3AP. We work around this
+ * 4AP. Linux never generated 3 interrupts of the same priority
+ * to cause a lost local interrupt.
+ * 5AP. Remote read is never used
+ * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX
+ * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX
+ * 11AP. Linux read the APIC between writes to avoid this, as per
+ * the documentation. Make sure you preserve this as it affects
+ * the C stepping chips too.
+ *
+ * If this sounds worrying believe me these bugs are ___RARE___ and
+ * there's about nothing of note with C stepping upwards.
+ */
+
+
/*
* Why isn't this somewhere standard ??
*/
@@ -47,7 +104,9 @@ extern __inline int max(int a,int b)
return b;
}
+static int smp_b_stepping = 0; /* Set if we find a B stepping CPU */
+static int max_cpus = -1; /* Setup configured maximum number of CPUs to activate */
int smp_found_config=0; /* Have we found an SMP box */
unsigned long cpu_present_map = 0; /* Bitmask of existing CPU's */
@@ -57,6 +116,7 @@ volatile int cpu_number_map[NR_CPUS]; /* which CPU maps to which logical numbe
volatile int cpu_logical_map[NR_CPUS]; /* which logical number maps to which CPU */
volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
volatile unsigned long smp_invalidate_needed; /* Used for the invalidate map that's also checked in the spinlock */
+volatile unsigned long kstack_ptr; /* Stack vector for booting CPU's */
struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per cpu bogomips and other parameters */
static unsigned int num_processors = 1; /* Internal processor count */
static unsigned long io_apic_addr = 0xFEC00000; /* Address of the I/O apic (not yet used) */
@@ -65,16 +125,13 @@ static unsigned char *kstack_base,*kstack_end; /* Kernel stack list pointers
static int smp_activated = 0; /* Tripped once we need to start cross invalidating */
int apic_version[NR_CPUS]; /* APIC version number */
static volatile int smp_commenced=0; /* Tripped when we start scheduling */
-unsigned long apic_addr=0xFEE00000; /* Address of APIC (defaults to 0xFEE00000) */
+unsigned long apic_addr = 0xFEE00000; /* Address of APIC (defaults to 0xFEE00000) */
unsigned long nlong = 0; /* dummy used for apic_reg address + 0x20 */
unsigned char *apic_reg=((unsigned char *)(&nlong))-0x20;/* Later set to the ioremap() of the APIC */
unsigned long apic_retval; /* Just debugging the assembler.. */
unsigned char *kernel_stacks[NR_CPUS]; /* Kernel stack pointers for CPU's (debugging) */
static volatile unsigned char smp_cpu_in_msg[NR_CPUS]; /* True if this processor is sending an IPI */
-static volatile unsigned long smp_msg_data; /* IPI data pointer */
-static volatile int smp_src_cpu; /* IPI sender processor */
-static volatile int smp_msg_id; /* Message being sent */
volatile unsigned long kernel_flag=0; /* Kernel spinlock */
volatile unsigned char active_kernel_processor = NO_PROC_ID; /* Processor holding kernel spinlock */
@@ -88,13 +145,17 @@ volatile unsigned long smp_spins_syscall[NR_CPUS]={0}; /* Count syscall spins
volatile unsigned long smp_spins_syscall_cur[NR_CPUS]={0};/* Count spins for the actual syscall */
volatile unsigned long smp_spins_sys_idle[NR_CPUS]={0}; /* Count spins for sys_idle */
volatile unsigned long smp_idle_count[1+NR_CPUS]={0,}; /* Count idle ticks */
+
+/* Count local APIC timer ticks */
+volatile unsigned long smp_local_timer_ticks[1+NR_CPUS]={0,};
+
#endif
#if defined (__SMP_PROF__)
volatile unsigned long smp_idle_map=0; /* Map for idle processors */
#endif
volatile unsigned long smp_proc_in_lock[NR_CPUS] = {0,};/* for computing process time */
-volatile unsigned long smp_process_available=0;
+volatile int smp_process_available=0;
/*#define SMP_DEBUG*/
@@ -104,6 +165,35 @@ volatile unsigned long smp_process_available=0;
#define SMP_PRINTK(x)
#endif
+/*
+ * Setup routine for controlling SMP activation
+ *
+ * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
+ * activation entirely (the MPS table probe still happens, though).
+ *
+ * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
+ * greater than 0, limits the maximum number of CPUs activated in
+ * SMP mode to <NUM>.
+ */
+
+__initfunc(void smp_setup(char *str, int *ints))
+{
+ if (ints && ints[0] > 0)
+ max_cpus = ints[1];
+ else
+ max_cpus = 0;
+}
+
+static inline void ack_APIC_irq (void)
+{
+ /* Clear the IPI */
+
+ /* Dummy read */
+ apic_read(APIC_SPIV);
+
+ /* Docs say use 0 for future compatibility */
+ apic_write(APIC_EOI, 0);
+}
/*
* Checksum an MP configuration block.
@@ -148,7 +238,7 @@ static char *mpc_family(int family,int model)
* Read the MPC
*/
-static int smp_read_mpc(struct mp_config_table *mpc)
+__initfunc(static int smp_read_mpc(struct mp_config_table *mpc))
{
char str[16];
int count=sizeof(*mpc);
@@ -183,7 +273,7 @@ static int smp_read_mpc(struct mp_config_table *mpc)
printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
/* set the local APIC address */
- apic_addr = mpc->mpc_lapic;
+ apic_addr = (unsigned long)phys_to_virt((unsigned long)mpc->mpc_lapic);
/*
* Now process the configuration blocks.
@@ -259,7 +349,7 @@ static int smp_read_mpc(struct mp_config_table *mpc)
printk("I/O APIC #%d Version %d at 0x%lX.\n",
m->mpc_apicid,m->mpc_apicver,
m->mpc_apicaddr);
- io_apic_addr = m->mpc_apicaddr;
+ io_apic_addr = (unsigned long)phys_to_virt(m->mpc_apicaddr);
}
mpt+=sizeof(*m);
count+=sizeof(*m);
@@ -293,9 +383,9 @@ static int smp_read_mpc(struct mp_config_table *mpc)
* Scan the memory blocks for an SMP configuration block.
*/
-int smp_scan_config(unsigned long base, unsigned long length)
+__initfunc(int smp_scan_config(unsigned long base, unsigned long length))
{
- unsigned long *bp=(unsigned long *)base;
+ unsigned long *bp=phys_to_virt(base);
struct intel_mp_floating *mpf;
SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n",
@@ -422,6 +512,7 @@ int smp_scan_config(unsigned long base, unsigned long length)
*/
nlong = boot_cpu_id<<24; /* Dummy 'self' for bootup */
cpu_logical_map[0] = boot_cpu_id;
+ global_irq_holder = boot_cpu_id;
printk("Processors: %d\n", num_processors);
/*
@@ -441,9 +532,8 @@ int smp_scan_config(unsigned long base, unsigned long length)
* Trampoline 80x86 program as an array.
*/
-static unsigned char trampoline_data[]={
-#include "trampoline.hex"
-};
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end [];
/*
* Currently trivial. Write the real->protected mode
@@ -451,9 +541,9 @@ static unsigned char trampoline_data[]={
* has made sure it's suitably aligned.
*/
-static void install_trampoline(unsigned char *mp)
+__initfunc(static void install_trampoline(unsigned char *mp))
{
- memcpy(mp,trampoline_data,sizeof(trampoline_data));
+ memcpy(mp, trampoline_data, trampoline_end - trampoline_data);
}
/*
@@ -463,16 +553,17 @@ static void install_trampoline(unsigned char *mp)
* other things).
*/
-unsigned long smp_alloc_memory(unsigned long mem_base)
+__initfunc(unsigned long smp_alloc_memory(unsigned long mem_base))
{
int size=(num_processors-1)*PAGE_SIZE; /* Number of stacks needed */
+
/*
* Our stacks have to be below the 1Mb line, and mem_base on entry
* is 4K aligned.
*/
- if(mem_base+size>=0x9F000)
- panic("smp_alloc_memory: Insufficient low memory for kernel stacks.\n");
+ if(virt_to_phys((void *)(mem_base+size))>=0x9F000)
+ panic("smp_alloc_memory: Insufficient low memory for kernel stacks 0x%lx.\n", mem_base);
kstack_base=(void *)mem_base;
mem_base+=size;
kstack_end=(void *)mem_base;
@@ -483,7 +574,7 @@ unsigned long smp_alloc_memory(unsigned long mem_base)
* Hand out stacks one at a time.
*/
-static void *get_kernel_stack(void)
+__initfunc(static void *get_kernel_stack(void))
{
void *stack=kstack_base;
if(kstack_base>=kstack_end)
@@ -498,13 +589,18 @@ static void *get_kernel_stack(void)
* a given CPU
*/
-void smp_store_cpu_info(int id)
+__initfunc(void smp_store_cpu_info(int id))
{
struct cpuinfo_x86 *c=&cpu_data[id];
c->hard_math=hard_math; /* Always assumed same currently */
c->x86=x86;
c->x86_model=x86_model;
c->x86_mask=x86_mask;
+ /*
+ * Mask B, Pentium, but not Pentium MMX
+ */
+ if(x86_mask>=1 && x86_mask<=4 && x86==5 && (x86_model>=0&&x86_model<=3))
+ smp_b_stepping=1; /* Remember we have B step Pentia with bugs */
c->x86_capability=x86_capability;
c->fdiv_bug=fdiv_bug;
c->wp_works_ok=wp_works_ok; /* Always assumed the same currently */
@@ -524,15 +620,16 @@ void smp_store_cpu_info(int id)
* we use to track CPU's as they power up.
*/
-void smp_commence(void)
+__initfunc(void smp_commence(void))
{
/*
* Lets the callin's below out of their loop.
*/
+ SMP_PRINTK(("Setting commenced=1, go go go\n"));
smp_commenced=1;
}
-
-void smp_callin(void)
+
+__initfunc(void smp_callin(void))
{
extern void calibrate_delay(void);
int cpuid=GET_APIC_ID(apic_read(APIC_ID));
@@ -546,11 +643,19 @@ void smp_callin(void)
l=apic_read(APIC_SPIV);
l|=(1<<8); /* Enable */
apic_write(APIC_SPIV,l);
+
+ /*
+ * Set up our APIC timer.
+ */
+ setup_APIC_clock ();
+
sti();
/*
* Get our bogomips.
*/
calibrate_delay();
+ SMP_PRINTK(("Stack at about %p\n",&cpuid));
+
/*
* Save our processor parameters
*/
@@ -563,28 +668,37 @@ void smp_callin(void)
* Until we are ready for SMP scheduling
*/
load_ldt(0);
-/* printk("Testing faulting...\n");
- *(long *)0=1; OOPS... */
local_flush_tlb();
- while(!smp_commenced);
- if (cpu_number_map[cpuid] == -1)
- while(1);
- local_flush_tlb();
- SMP_PRINTK(("Commenced..\n"));
+ while (cpu_number_map[cpuid] == -1)
+ barrier();
+
+ while(!task[cpuid] || current_set[cpuid] != task[cpu_number_map[cpuid]])
+ barrier();
+
+ local_flush_tlb();
load_TR(cpu_number_map[cpuid]);
-/* while(1);*/
+
+ while(!smp_commenced)
+ barrier();
+
+ local_flush_tlb();
+
+ SMP_PRINTK(("Commenced..\n"));
+ local_flush_tlb();
+ sti();
}
/*
- * Cycle through the processors sending pentium IPI's to boot each.
+ * Cycle through the processors sending APIC IPI's to boot each.
*/
-void smp_boot_cpus(void)
+__initfunc(void smp_boot_cpus(void))
{
int i;
int cpucount=0;
unsigned long cfg;
+ pgd_t maincfg;
void *stack;
extern unsigned long init_user_stack[];
@@ -608,6 +722,16 @@ void smp_boot_cpus(void)
active_kernel_processor=boot_cpu_id;
/*
+ * If SMP should be disabled, then really disable it!
+ */
+
+ if (!max_cpus && smp_found_config)
+ {
+ smp_found_config = 0;
+ printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+ }
+
+ /*
* If we don't conform to the Intel MPS standard, get out
* of here now!
*/
@@ -671,7 +795,12 @@ void smp_boot_cpus(void)
apic_write(APIC_SPIV,cfg);
udelay(10);
-
+
+ /*
+ * Set up our local APIC timer:
+ */
+ setup_APIC_clock ();
+
/*
* Now scan the cpu present map and fire up the other CPUs.
*/
@@ -686,7 +815,8 @@ void smp_boot_cpus(void)
if (i == boot_cpu_id)
continue;
- if (cpu_present_map & (1 << i))
+ if ((cpu_present_map & (1 << i))
+ && (max_cpus < 0 || max_cpus > cpucount+1))
{
unsigned long send_status, accept_status;
int timeout, num_starts, j;
@@ -698,7 +828,8 @@ void smp_boot_cpus(void)
stack=get_kernel_stack(); /* We allocated these earlier */
if(stack==NULL)
panic("No memory for processor stacks.\n");
- kernel_stacks[i]=stack;
+
+ kernel_stacks[i]=(void *)phys_to_virt((unsigned long)stack);
install_trampoline(stack);
printk("Booting processor %d stack %p: ",i,stack); /* So we set what's up */
@@ -719,8 +850,11 @@ void smp_boot_cpus(void)
CMOS_WRITE(0xa, 0xf);
pg0[0]=7;
local_flush_tlb();
- *((volatile unsigned short *) 0x469) = ((unsigned long)stack)>>4;
- *((volatile unsigned short *) 0x467) = 0;
+ SMP_PRINTK(("1.\n"));
+ *((volatile unsigned short *) phys_to_virt(0x469)) = ((unsigned long)stack)>>4;
+ SMP_PRINTK(("2.\n"));
+ *((volatile unsigned short *) phys_to_virt(0x467)) = 0;
+ SMP_PRINTK(("3.\n"));
/*
* Protect it again
@@ -729,6 +863,17 @@ void smp_boot_cpus(void)
pg0[0]= cfg;
local_flush_tlb();
+ /* walken modif
+ * enable mapping of the first 4M at virtual
+ * address zero
+ */
+
+ maincfg=swapper_pg_dir[0];
+ ((unsigned long *)swapper_pg_dir)[0]=0x102007;
+
+ /* no need to local_flush_tlb :
+ we are setting this up for the slave processor ! */
+
/*
* Be paranoid about clearing APIC errors.
*/
@@ -800,7 +945,8 @@ void smp_boot_cpus(void)
SMP_PRINTK(("Sending STARTUP #%d.\n",j));
apic_write(APIC_ESR, 0);
-
+ SMP_PRINTK(("After apic_write.\n"));
+
/*
* STARTUP IPI
*/
@@ -812,11 +958,14 @@ void smp_boot_cpus(void)
cfg&=~0xCDFFF; /* Clear bits */
cfg |= (APIC_DEST_FIELD
| APIC_DEST_DM_STARTUP
- | (((int) stack) >> 12) ); /* Boot on the stack */
+ | (((int)virt_to_phys(stack)) >> 12)); /* Boot on the stack */
+ SMP_PRINTK(("Before start apic_write.\n"));
apic_write(APIC_ICR, cfg); /* Kick the second */
+ SMP_PRINTK(("Startup point 1.\n"));
timeout = 0;
do {
+ SMP_PRINTK(("Sleeping.\n")); udelay(1000000);
udelay(10);
} while ( (send_status = (apic_read(APIC_ICR) & 0x1000))
&& (timeout++ < 1000));
@@ -824,6 +973,7 @@ void smp_boot_cpus(void)
accept_status = (apic_read(APIC_ESR) & 0xEF);
}
+ SMP_PRINTK(("After Startup.\n"));
if (send_status) /* APIC never delivered?? */
printk("APIC never delivered???\n");
@@ -847,15 +997,24 @@ void smp_boot_cpus(void)
}
else
{
- if(*((volatile unsigned char *)8192)==0xA5)
+ if(*((volatile unsigned char *)phys_to_virt(8192))==0xA5)
printk("Stuck ??\n");
else
printk("Not responding.\n");
}
}
+ SMP_PRINTK(("CPU has booted.\n"));
+
+ /* walken modif
+ * restore mapping of the first 4M
+ */
+
+ swapper_pg_dir[0]=maincfg;
+
+ local_flush_tlb();
/* mark "stuck" area as not stuck */
- *((volatile unsigned long *)8192) = 0;
+ *((volatile unsigned long *)phys_to_virt(8192)) = 0;
}
/*
@@ -885,7 +1044,7 @@ void smp_boot_cpus(void)
CMOS_WRITE(0, 0xf);
- *((volatile long *) 0x467) = 0;
+ *((volatile long *) phys_to_virt(0x467)) = 0;
/*
* Restore old page 0 entry.
@@ -898,6 +1057,7 @@ void smp_boot_cpus(void)
* Allow the user to impress friends.
*/
+ SMP_PRINTK(("Before bogomips.\n"));
if(cpucount==0)
{
printk("Error: only one processor found.\n");
@@ -915,12 +1075,15 @@ void smp_boot_cpus(void)
cpucount+1,
(bogosum+2500)/500000,
((bogosum+2500)/5000)%100);
+ SMP_PRINTK(("Before bogocount - setting activated=1.\n"));
smp_activated=1;
smp_num_cpus=cpucount+1;
}
+ if(smp_b_stepping)
+ printk("WARNING: SMP operation may be unreliable with B stepping processors.\n");
+ SMP_PRINTK(("Boot done.\n"));
}
-
/*
* A non wait message cannot pass data or cpu source info. This current setup
* is only safe because the kernel lock owner is the only person who can send a message.
@@ -936,12 +1099,12 @@ void smp_boot_cpus(void)
void smp_message_pass(int target, int msg, unsigned long data, int wait)
{
+ unsigned long flags;
unsigned long cfg;
unsigned long target_map;
int p=smp_processor_id();
- int irq=0x2d; /* IRQ 13 */
+ int irq;
int ct=0;
- static volatile int message_cpu = NO_PROC_ID;
/*
* During boot up send no messages
@@ -956,11 +1119,25 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
* message at this time. The reschedule cannot wait
* but is not critical.
*/
-
- if(msg==MSG_RESCHEDULE) /* Reschedules we do via trap 0x30 */
- {
- irq=0x30;
- if(smp_cpu_in_msg[p])
+
+ switch (msg) {
+ case MSG_RESCHEDULE:
+ irq = 0x30;
+ if (smp_cpu_in_msg[p])
+ return;
+ break;
+
+ case MSG_INVALIDATE_TLB:
+ /* make this a NMI some day */
+ irq = 0x31;
+ break;
+
+ case MSG_STOP_CPU:
+ irq = 0x40;
+ break;
+
+ default:
+ printk("Unknown SMP message %d\n", msg);
return;
}
@@ -972,31 +1149,12 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
* I got to notice this bug...
*/
- if(message_cpu!=NO_PROC_ID && msg!=MSG_STOP_CPU && msg!=MSG_RESCHEDULE)
- {
- panic("CPU #%d: Message pass %d but pass in progress by %d of %d\n",
- smp_processor_id(),msg,message_cpu, smp_msg_id);
- }
- message_cpu=smp_processor_id();
-
-
/*
* We are busy
*/
smp_cpu_in_msg[p]++;
- /*
- * Reschedule is currently special
- */
-
- if(msg!=MSG_RESCHEDULE)
- {
- smp_src_cpu=p;
- smp_msg_id=msg;
- smp_msg_data=data;
- }
-
/* printk("SMP message pass #%d to %d of %d\n",
p, msg, target);*/
@@ -1019,12 +1177,14 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
*/
if(ct==1000)
- printk("CPU #%d: previous IPI still not cleared after 10mS", smp_processor_id());
+ printk("CPU #%d: previous IPI still not cleared after 10mS\n", p);
/*
* Program the APIC to deliver the IPI
*/
-
+
+ __save_flags(flags);
+ __cli();
cfg=apic_read(APIC_ICR2);
cfg&=0x00FFFFFF;
apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(target)); /* Target chip */
@@ -1040,7 +1200,7 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
{
cfg|=APIC_DEST_ALLBUT;
target_map=cpu_present_map;
- cpu_callin_map[0]=(1<<smp_src_cpu);
+ cpu_callin_map[0]=(1<<p);
}
else if(target==MSG_ALL)
{
@@ -1058,7 +1218,8 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
* Send the IPI. The write to APIC_ICR fires this off.
*/
- apic_write(APIC_ICR, cfg);
+ apic_write(APIC_ICR, cfg);
+ __restore_flags(flags);
/*
* Spin waiting for completion
@@ -1066,11 +1227,30 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
switch(wait)
{
+ int stuck;
case 1:
- while(cpu_callin_map[0]!=target_map); /* Spin on the pass */
+ stuck = 50000000;
+ while(cpu_callin_map[0]!=target_map) {
+ --stuck;
+ if (!stuck) {
+ printk("stuck on target_map IPI wait\n");
+ break;
+ }
+ }
break;
case 2:
- while(smp_invalidate_needed); /* Wait for invalidate map to clear */
+ stuck = 50000000;
+ /* Wait for invalidate map to clear */
+ while (smp_invalidate_needed) {
+ /* Take care of "crossing" invalidates */
+ if (test_bit(p, &smp_invalidate_needed))
+ clear_bit(p, &smp_invalidate_needed);
+ --stuck;
+ if (!stuck) {
+ printk("stuck on smp_invalidate_needed IPI wait\n");
+ break;
+ }
+ }
break;
}
@@ -1079,7 +1259,6 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
*/
smp_cpu_in_msg[p]--;
- message_cpu=NO_PROC_ID;
}
/*
@@ -1091,10 +1270,12 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
void smp_flush_tlb(void)
{
unsigned long flags;
- if(smp_activated && smp_processor_id()!=active_kernel_processor)
- panic("CPU #%d:Attempted flush tlb IPI when not AKP(=%d)\n",smp_processor_id(),active_kernel_processor);
+ if(smp_activated && smp_processor_id()!=active_kernel_processor) {
+ printk("CPU #%d:Attempted flush tlb IPI when not AKP(=%d)\n",smp_processor_id(),active_kernel_processor);
+ *(char *)0=0;
+ }
/* printk("SMI-");*/
-
+
/*
* The assignment is safe because it's volatile so the compiler cannot reorder it,
* because the i586 has strict memory ordering and because only the kernel lock holder
@@ -1102,15 +1283,15 @@ void smp_flush_tlb(void)
* bus locked or.
*/
- smp_invalidate_needed=cpu_present_map&~(1<<smp_processor_id());
+ smp_invalidate_needed=cpu_present_map;
/*
* Processors spinning on the lock will see this IRQ late. The smp_invalidate_needed map will
* ensure they don't do a spurious flush tlb or miss one.
*/
- save_flags(flags);
- cli();
+ __save_flags(flags);
+ __cli();
smp_message_pass(MSG_ALL_BUT_SELF, MSG_INVALIDATE_TLB, 0L, 2);
/*
@@ -1119,7 +1300,7 @@ void smp_flush_tlb(void)
local_flush_tlb();
- restore_flags(flags);
+ __restore_flags(flags);
/*
* Completed.
@@ -1128,77 +1309,410 @@ void smp_flush_tlb(void)
/* printk("SMID\n");*/
}
-/*
- * Reschedule call back
+/*
+ * Local timer interrupt handler. It does both profiling and
+ * process statistics/rescheduling.
+ *
+ * We do profiling in every local tick, statistics/rescheduling
+ * happen only every 'profiling multiplier' ticks. The default
+ * multiplier is 1 and it can be changed by writing a 4 bytes multiplier
+ * value into /proc/profile.
*/
-void smp_reschedule_irq(int cpl, struct pt_regs *regs)
+unsigned int prof_multiplier[NR_CPUS];
+unsigned int prof_counter[NR_CPUS];
+
+static inline void smp_local_timer_interrupt(struct pt_regs * regs)
{
-#ifdef DEBUGGING_SMP_RESCHED
- static int ct=0;
- if(ct==0)
- {
- printk("Beginning scheduling on CPU#%d\n",smp_processor_id());
- ct=1;
+ int cpu = smp_processor_id();
+
+ /*
+ * The profiling function is SMP safe. (nothing can mess
+ * around with "current", and the profiling counters are
+ * updated with atomic operations). This is especially
+ * useful with a profiling multiplier != 1
+ */
+ if (!user_mode(regs))
+ x86_do_profile (regs->eip);
+
+ if (!--prof_counter[cpu]) {
+ int user=0,system=0;
+ struct task_struct * p = current;
+
+ /*
+ * After doing the above, we need to make like
+ * a normal interrupt - otherwise timer interrupts
+ * ignore the global interrupt lock, which is the
+ * WrongThing (tm) to do.
+ */
+
+ if (user_mode(regs))
+ user=1;
+ else
+ system=1;
+
+ irq_enter(cpu, 0);
+ if (p->pid) {
+
+ update_one_process(p, 1, user, system);
+
+ p->counter -= 1;
+ if (p->counter < 0) {
+ p->counter = 0;
+ need_resched = 1;
+ }
+ if (p->priority < DEF_PRIORITY)
+ kstat.cpu_nice += user;
+ else
+ kstat.cpu_user += user;
+
+ kstat.cpu_system += system;
+
+ } else {
+#ifdef __SMP_PROF__
+ if (test_bit(cpu,&smp_idle_map))
+ smp_idle_count[cpu]++;
+#endif
+ }
+ prof_counter[cpu]=prof_multiplier[cpu];
+
+ irq_exit(cpu, 0);
}
-#endif
- if(smp_processor_id()!=active_kernel_processor)
- panic("SMP Reschedule on CPU #%d, but #%d is active.\n",
- smp_processor_id(), active_kernel_processor);
- need_resched=1;
+#ifdef __SMP_PROF__
+ smp_local_timer_ticks[cpu]++;
+#endif
+ /*
+ * We take the 'long' return path, and there every subsystem
+ * grabs the apropriate locks (kernel lock/ irq lock).
+ *
+ * FIXME: we want to decouple profiling from the 'long path'.
+ *
+ * Currently this isnt too much of an issue (performancewise),
+ * we can take more than 100K local irqs per second on a 100 MHz P5.
+ * [ although we notice need_resched too early, thus the way we
+ * schedule (deliver signals and handle bhs) changes. ]
+ *
+ * Possibly we could solve these problems with 'smart irqs'.
+ */
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesnt support APIC timers]
+ */
+void smp_apic_timer_interrupt(struct pt_regs * regs)
+{
+ /*
+ * NOTE! We'd better ACK the irq immediately,
+ * because timer handling can be slow, and we
+ * want to be able to accept NMI tlb invalidates
+ * during this time.
+ */
+ ack_APIC_irq ();
+
+ smp_local_timer_interrupt(regs);
+}
+/*
+ * Reschedule call back
+ */
+asmlinkage void smp_reschedule_interrupt(void)
+{
+ int cpu = smp_processor_id();
+
+ ack_APIC_irq();
/*
- * Clear the IPI
+ * This looks silly, but we actually do need to wait
+ * for the global interrupt lock.
*/
- apic_read(APIC_SPIV); /* Dummy read */
- apic_write(APIC_EOI, 0); /* Docs say use 0 for future compatibility */
+ irq_enter(cpu, 0);
+ need_resched=1;
+ irq_exit(cpu, 0);
+}
+
+/*
+ * Invalidate call-back
+ */
+asmlinkage void smp_invalidate_interrupt(void)
+{
+ if (clear_bit(smp_processor_id(), &smp_invalidate_needed))
+ local_flush_tlb();
+
+ ack_APIC_irq ();
}
/*
- * Message call back.
+ * CPU halt call-back
*/
-
-void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs)
+asmlinkage void smp_stop_cpu_interrupt(void)
{
- int i=smp_processor_id();
-/* static int n=0;
- if(n++<NR_CPUS)
- printk("IPI %d->%d(%d,%ld)\n",smp_src_cpu,i,smp_msg_id,smp_msg_data);*/
- switch(smp_msg_id)
- {
- case 0: /* IRQ 13 testing - boring */
- return;
-
+ if (cpu_data[smp_processor_id()].hlt_works_ok)
+ for(;;) __asm__("hlt");
+ for (;;) ;
+}
+
+/*
+ * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
+ * per second. We assume that the caller has already set up the local
+ * APIC at apic_addr.
+ *
+ * The APIC timer is not exactly sync with the external timer chip, it
+ * closely follows bus clocks.
+ */
+
+#define RTDSC(x) __asm__ __volatile__ ( ".byte 0x0f,0x31" \
+ :"=a" (((unsigned long*)&x)[0]), \
+ "=d" (((unsigned long*)&x)[1]))
+
+/*
+ * The timer chip is already set up at HZ interrupts per second here,
+ * but we do not accept timer interrupts yet. We only allow the BP
+ * to calibrate.
+ */
+static unsigned int get_8254_timer_count (void)
+{
+ unsigned int count;
+
+ outb_p(0x00, 0x43);
+ count = inb_p(0x40);
+ count |= inb_p(0x40) << 8;
+
+ return count;
+}
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice, once with a bogus timeout value, second
+ * time for real. The other (noncalibrating) CPUs call this
+ * function only once, with the real value.
+ *
+ * We are strictly in irqs off mode here, as we do not want to
+ * get an APIC interrupt go off accidentally.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * APIC double write bug.
+ */
+
+#define APIC_DIVISOR 16
+
+void setup_APIC_timer (unsigned int clocks)
+{
+ unsigned long lvtt1_value;
+ unsigned int tmp_value;
+
+ /*
+ * Unfortunately the local APIC timer cannot be set up into NMI
+ * mode. With the IO APIC we can re-route the external timer
+ * interrupt and broadcast it as an NMI to all CPUs, so no pain.
+ *
+ * NOTE: this trap vector (0x41) and the gate in BUILD_SMP_TIMER_INTERRUPT
+ * should be the same ;)
+ */
+ tmp_value = apic_read(APIC_LVTT);
+ lvtt1_value = APIC_LVT_TIMER_PERIODIC | 0x41;
+ apic_write(APIC_LVTT , lvtt1_value);
+
+ /*
+ * Divide PICLK by 16
+ */
+ tmp_value = apic_read(APIC_TDCR);
+ apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 )
+ | APIC_TDR_DIV_16);
+
+ tmp_value = apic_read(APIC_TMICT);
+ apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
+}
+
+void wait_8254_wraparound (void)
+{
+ unsigned int curr_count, prev_count=~0;
+ int delta;
+
+ curr_count = get_8254_timer_count();
+
+ do {
+ prev_count = curr_count;
+ curr_count = get_8254_timer_count();
+ delta = curr_count-prev_count;
+
+ /*
+ * This limit for delta seems arbitrary, but it isnt, it's
+ * slightly above the level of error a buggy Mercury/Neptune
+ * chipset timer can cause.
+ */
+
+ } while (delta<300);
+}
+
+/*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+int calibrate_APIC_clock (void)
+{
+ unsigned long long t1,t2;
+ long tt1,tt2;
+ long calibration_result;
+
+ printk("calibrating APIC timer ... ");
+
+ /*
+ * Put whatever arbitrary (but long enough) timeout
+ * value into the APIC clock, we just want to get the
+ * counter running for calibration.
+ */
+ setup_APIC_timer(1000000000);
+
+ /*
+ * The timer chip counts down to zero. Lets wait
+ * for a wraparound to start exact measurement:
+ * (the current tick might have been already half done)
+ */
+
+ wait_8254_wraparound ();
+
+ /*
+ * We wrapped around just now, lets start:
+ */
+ RTDSC(t1);
+ tt1=apic_read(APIC_TMCCT);
+
+ /*
+ * lets wait until we get to the next wrapround:
+ */
+ wait_8254_wraparound ();
+
+ tt2=apic_read(APIC_TMCCT);
+ RTDSC(t2);
+
+ /*
+ * The APIC bus clock counter is 32 bits only, it
+ * might have overflown, but note that we use signed
+ * longs, thus no extra care needed.
+ *
+ * underflown to be exact, as the timer counts down ;)
+ */
+
+ calibration_result = (tt1-tt2)*APIC_DIVISOR;
+
+ printk("\n..... %ld CPU clocks in 1 timer chip tick.\n",
+ (unsigned long)(t2-t1));
+
+ printk("..... %ld APIC bus clocks in 1 timer chip tick.\n",
+ calibration_result);
+
+
+ printk("..... CPU clock speed is %ld.%ld MHz.\n",
+ ((long)(t2-t1))/(1000000/HZ),
+ ((long)(t2-t1))%(1000000/HZ) );
+
+ printk("..... APIC bus clock speed is %ld.%ld MHz.\n",
+ calibration_result/(1000000/HZ),
+ calibration_result%(1000000/HZ) );
+
+ return calibration_result;
+}
+
+static unsigned int calibration_result;
+
+void setup_APIC_clock (void)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+
+ static volatile int calibration_lock;
+
+ save_flags(flags);
+ cli();
+
+ printk("setup_APIC_clock() called.\n");
+
+ /*
+ * [ setup_APIC_clock() is called from all CPUs, but we want
+ * to do this part of the setup only once ... and it fits
+ * here best ]
+ */
+ if (!set_bit(0,&calibration_lock)) {
+
+ calibration_result=calibrate_APIC_clock();
/*
- * A TLB flush is needed.
- */
-
- case MSG_INVALIDATE_TLB:
- if(clear_bit(i,(unsigned long *)&smp_invalidate_needed))
- local_flush_tlb();
- set_bit(i, (unsigned long *)&cpu_callin_map[0]);
- /* cpu_callin_map[0]|=1<<smp_processor_id();*/
- break;
-
+ * Signal completion to the other CPU[s]:
+ */
+ calibration_lock = 3;
+
+ } else {
/*
- * Halt other CPU's for a panic or reboot
+ * Other CPU is calibrating, wait for finish:
*/
- case MSG_STOP_CPU:
- while(1)
- {
- if(cpu_data[smp_processor_id()].hlt_works_ok)
- __asm__("hlt");
- }
- default:
- printk("CPU #%d sent invalid cross CPU message to CPU #%d: %X(%lX).\n",
- smp_src_cpu,smp_processor_id(),smp_msg_id,smp_msg_data);
- break;
+ printk("waiting for other CPU calibrating APIC timer ... ");
+ while (calibration_lock == 1);
+ printk("done, continuing.\n");
}
+
+/*
+ * Now set up the timer for real. Profiling multiplier is 1.
+ */
+ setup_APIC_timer (calibration_result);
+
+ prof_counter[cpu] = prof_multiplier[cpu] = 1;
+
/*
- * Clear the IPI, so we can receive future IPI's
+ * FIXME: i sporadically see booting problems (keyboard irq is
+ * lost, looks like the timer irq isnt working or some irq
+ * lock is messed up). Once we reboot the bug doesnt showu
+ * up anymore.
+ *
+ * i'm quite certain it's a timing problem/race condition in
+ * the bootup logic, not a hw bug. It might have been gone
+ * meanwhile, tell me if you see it.
*/
-
- apic_read(APIC_SPIV); /* Dummy read */
- apic_write(APIC_EOI, 0); /* Docs say use 0 for future compatibility */
+
+ ack_APIC_irq ();
+
+ restore_flags(flags);
}
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing 4 bytes into /proc/profile.
+ *
+ * usually you want to run this on all CPUs ;)
+ */
+int setup_profiling_timer (unsigned int multiplier)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+
+ /*
+ * Sanity check. [at least 500 APIC cycles should be
+ * between APIC interrupts as a rule of thumb, rather be
+ * careful as irq flooding renders the system unusable]
+ */
+ if ( (!multiplier) || (calibration_result/multiplier < 500))
+ return -EINVAL;
+
+ save_flags(flags);
+ cli();
+ setup_APIC_timer (calibration_result/multiplier);
+ prof_multiplier[cpu]=multiplier;
+ restore_flags(flags);
+
+ return 0;
+}
+
+#undef APIC_DIVISOR
+#undef RTDSC
+
+