summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-06-01 03:16:17 +0000
committerRalf Baechle <ralf@linux-mips.org>1997-06-01 03:16:17 +0000
commitd8d9b8f76f22b7a16a83e261e64f89ee611f49df (patch)
tree3067bc130b80d52808e6390c9fc7fc087ec1e33c /arch/i386/kernel
parent19c9bba94152148523ba0f7ef7cffe3d45656b11 (diff)
Initial revision
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile2
-rw-r--r--arch/i386/kernel/bios32.c72
-rw-r--r--arch/i386/kernel/entry.S81
-rw-r--r--arch/i386/kernel/head.S136
-rw-r--r--arch/i386/kernel/i386_ksyms.c10
-rw-r--r--arch/i386/kernel/init_task.c22
-rw-r--r--arch/i386/kernel/irq.c271
-rw-r--r--arch/i386/kernel/irq.h187
-rw-r--r--arch/i386/kernel/process.c26
-rw-r--r--arch/i386/kernel/ptrace.c28
-rw-r--r--arch/i386/kernel/setup.c14
-rw-r--r--arch/i386/kernel/signal.c10
-rw-r--r--arch/i386/kernel/smp.c773
-rw-r--r--arch/i386/kernel/time.c7
-rw-r--r--arch/i386/kernel/trampoline.S43
-rw-r--r--arch/i386/kernel/traps.c77
-rw-r--r--arch/i386/kernel/vm86.c20
17 files changed, 808 insertions, 971 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index e04fb5efb..9491ef562 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -15,7 +15,7 @@ else
$(CC) -D__ASSEMBLY__ -traditional -c $< -o $*.o
endif
-all: kernel.o head.o
+all: kernel.o head.o init_task.o
O_TARGET := kernel.o
O_OBJS := process.o signal.o entry.o traps.o irq.o vm86.o bios32.o \
diff --git a/arch/i386/kernel/bios32.c b/arch/i386/kernel/bios32.c
index e128000c3..157e62b2d 100644
--- a/arch/i386/kernel/bios32.c
+++ b/arch/i386/kernel/bios32.c
@@ -1,6 +1,8 @@
/*
* bios32.c - BIOS32, PCI BIOS functions.
*
+ * $Id: bios32.c,v 1.11 1997/05/07 13:35:21 mj Exp $
+ *
* Sponsored by
* iX Multiuser Multitasking Magazine
* Hannover, Germany
@@ -52,6 +54,11 @@
* Feb 3, 1997 : Set internal functions to static, save/restore flags
* avoid dead locks reading broken PCI BIOS, werner@suse.de
*
+ * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS
+ * (mj@atrey.karlin.mff.cuni.cz)
+ *
+ * May 7, 1997 : Added some missing cli()'s. [mj]
+ *
*/
#include <linux/config.h>
@@ -158,7 +165,7 @@ static unsigned long bios32_service(unsigned long service)
unsigned long entry; /* %edx */
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__("lcall (%%edi)"
: "=a" (return_code),
"=b" (address),
@@ -173,10 +180,10 @@ static unsigned long bios32_service(unsigned long service)
case 0:
return address + entry;
case 0x80: /* Not present */
- printk("bios32_service(%ld) : not present\n", service);
+ printk("bios32_service(0x%lx) : not present\n", service);
return 0;
default: /* Shouldn't happen */
- printk("bios32_service(%ld) : returned 0x%x, mail drew@colorado.edu\n",
+ printk("bios32_service(0x%lx) : returned 0x%x, mail drew@colorado.edu\n",
service, return_code);
return 0;
}
@@ -189,7 +196,7 @@ static struct {
} pci_indirect = { 0, KERNEL_CS };
-__initfunc(static unsigned long check_pcibios(unsigned long memory_start, unsigned long memory_end))
+__initfunc(static int check_pcibios(void))
{
unsigned long signature;
unsigned char present_status;
@@ -201,7 +208,7 @@ __initfunc(static unsigned long check_pcibios(unsigned long memory_start, unsign
if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
pci_indirect.address = pcibios_entry | PAGE_OFFSET;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__("lcall (%%edi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
@@ -212,7 +219,7 @@ __initfunc(static unsigned long check_pcibios(unsigned long memory_start, unsign
: "1" (PCIBIOS_PCI_BIOS_PRESENT),
"D" (&pci_indirect)
: "bx", "cx");
- restore_flags(flags);
+ restore_flags(flags);
present_status = (pack >> 16) & 0xff;
major_revision = (pack >> 8) & 0xff;
@@ -232,9 +239,10 @@ __initfunc(static unsigned long check_pcibios(unsigned long memory_start, unsign
if (pcibios_entry) {
printk ("pcibios_init : PCI BIOS revision %x.%02x entry at 0x%lx\n",
major_revision, minor_revision, pcibios_entry);
+ return 1;
}
}
- return memory_start;
+ return 0;
}
@@ -245,7 +253,7 @@ static int pci_bios_find_class (unsigned int class_code, unsigned short index,
unsigned long ret;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__ ("lcall (%%edi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
@@ -270,7 +278,7 @@ static int pci_bios_find_device (unsigned short vendor, unsigned short device_id
unsigned short ret;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__("lcall (%%edi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
@@ -295,7 +303,7 @@ static int pci_bios_read_config_byte(unsigned char bus,
unsigned long bx = (bus << 8) | device_fn;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
@@ -317,7 +325,7 @@ static int pci_bios_read_config_word (unsigned char bus,
unsigned long bx = (bus << 8) | device_fn;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
@@ -339,7 +347,7 @@ static int pci_bios_read_config_dword (unsigned char bus,
unsigned long bx = (bus << 8) | device_fn;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
@@ -361,7 +369,7 @@ static int pci_bios_write_config_byte (unsigned char bus,
unsigned long bx = (bus << 8) | device_fn;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
@@ -383,7 +391,7 @@ static int pci_bios_write_config_word (unsigned char bus,
unsigned long bx = (bus << 8) | device_fn;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
@@ -405,7 +413,7 @@ static int pci_bios_write_config_dword (unsigned char bus,
unsigned long bx = (bus << 8) | device_fn;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
"xor %%ah, %%ah\n"
@@ -476,7 +484,7 @@ static int pci_direct_find_class (unsigned int class_code, unsigned short index,
struct pci_dev *dev;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
for (dev = pci_devices; dev; dev = dev->next) {
if (dev->class == class_code) {
if (curr == index) {
@@ -502,7 +510,7 @@ static int pci_conf1_read_config_byte(unsigned char bus, unsigned char device_fn
{
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
switch (where & 3) {
case 0: *value = inb(0xCFC);
@@ -523,7 +531,7 @@ static int pci_conf1_read_config_word (unsigned char bus,
{
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
if (where & 2)
*value = inw(0xCFE);
@@ -538,7 +546,7 @@ static int pci_conf1_read_config_dword (unsigned char bus, unsigned char device_
{
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
*value = inl(0xCFC);
restore_flags(flags);
@@ -550,7 +558,7 @@ static int pci_conf1_write_config_byte (unsigned char bus, unsigned char device_
{
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
outb(value, 0xCFC);
restore_flags(flags);
@@ -562,7 +570,7 @@ static int pci_conf1_write_config_word (unsigned char bus, unsigned char device_
{
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
outw(value, 0xCFC);
restore_flags(flags);
@@ -574,7 +582,7 @@ static int pci_conf1_write_config_dword (unsigned char bus, unsigned char device
{
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
outl(value, 0xCFC);
restore_flags(flags);
@@ -610,7 +618,7 @@ static int pci_conf2_read_config_byte(unsigned char bus, unsigned char device_fn
if (device_fn & 0x80)
return PCIBIOS_DEVICE_NOT_FOUND;
- save_flags(flags);
+ save_flags(flags); cli();
outb (FUNC(device_fn), 0xCF8);
outb (bus, 0xCFA);
*value = inb(IOADDR(device_fn,where));
@@ -626,7 +634,7 @@ static int pci_conf2_read_config_word (unsigned char bus, unsigned char device_f
if (device_fn & 0x80)
return PCIBIOS_DEVICE_NOT_FOUND;
- save_flags(flags);
+ save_flags(flags); cli();
outb (FUNC(device_fn), 0xCF8);
outb (bus, 0xCFA);
*value = inw(IOADDR(device_fn,where));
@@ -642,7 +650,7 @@ static int pci_conf2_read_config_dword (unsigned char bus, unsigned char device_
if (device_fn & 0x80)
return PCIBIOS_DEVICE_NOT_FOUND;
- save_flags(flags);
+ save_flags(flags); cli();
outb (FUNC(device_fn), 0xCF8);
outb (bus, 0xCFA);
*value = inl (IOADDR(device_fn,where));
@@ -656,7 +664,7 @@ static int pci_conf2_write_config_byte (unsigned char bus, unsigned char device_
{
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
outb (FUNC(device_fn), 0xCF8);
outb (bus, 0xCFA);
outb (value, IOADDR(device_fn,where));
@@ -670,7 +678,7 @@ static int pci_conf2_write_config_word (unsigned char bus, unsigned char device_
{
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
outb (FUNC(device_fn), 0xCF8);
outb (bus, 0xCFA);
outw (value, IOADDR(device_fn,where));
@@ -684,7 +692,7 @@ static int pci_conf2_write_config_dword (unsigned char bus, unsigned char device
{
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
outb (FUNC(device_fn), 0xCF8);
outb (bus, 0xCFA);
outl (value, IOADDR(device_fn,where));
@@ -716,7 +724,7 @@ __initfunc(static struct pci_access *check_direct_pci(void))
unsigned int tmp;
unsigned long flags;
- save_flags(flags);
+ save_flags(flags); cli();
/*
* check if configuration type 1 works
@@ -912,13 +920,11 @@ __initfunc(unsigned long pcibios_init(unsigned long memory_start, unsigned long
bios32_entry = check->fields.entry;
printk ("pcibios_init : BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
bios32_indirect.address = bios32_entry + PAGE_OFFSET;
- access_pci = &pci_bios_access;
}
}
}
- if (bios32_entry) {
- memory_start = check_pcibios (memory_start, memory_end);
- }
+ if (bios32_entry && check_pcibios())
+ access_pci = &pci_bios_access;
#endif
return memory_start;
}
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 84fe0c7fd..ac67da797 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -110,62 +110,45 @@ ENOSYS = 38
addl $4,%esp; \
iret
-#ifdef __SMP__
-/* Get the processor ID multiplied by 4 */
-#define GET_PROCESSOR_OFFSET(reg) \
- movl SYMBOL_NAME(apic_reg), reg; \
- movl 32(reg), reg; \
- shrl $22, reg; \
- andl $0x3C, reg;
-
-#define GET_CURRENT(reg) \
- GET_PROCESSOR_OFFSET(reg) \
- movl SYMBOL_NAME(current_set)(reg),reg
-
-#else
-
#define GET_CURRENT(reg) \
- movl SYMBOL_NAME(current_set),reg
-
-#endif
+ movl %esp, reg; \
+ andl $-8192, reg;
ENTRY(lcall7)
pushfl # We get a different stack layout with call gates,
pushl %eax # which has to be cleaned up later..
SAVE_ALL
- GET_CURRENT(%ebx)
movl EIP(%esp),%eax # due to call gates, this is eflags, not eip..
movl CS(%esp),%edx # this is eip..
movl EFLAGS(%esp),%ecx # and this is cs..
movl %eax,EFLAGS(%esp) #
movl %edx,EIP(%esp) # Now we move them to their "normal" places
movl %ecx,CS(%esp) #
- movl %esp,%eax
- GET_CURRENT(%edx)
- pushl %eax
- movl exec_domain(%edx),%edx # Get the execution domain
+ movl %esp,%ebx
+ pushl %ebx
+ andl $-8192,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
movl 4(%edx),%edx # Get the lcall7 handler for the domain
call *%edx
popl %eax
jmp ret_from_sys_call
+
#ifdef __SMP__
ALIGN
.globl ret_from_smpfork
ret_from_smpfork:
+ GET_CURRENT(%ebx)
btrl $0, SYMBOL_NAME(scheduler_lock)
jmp ret_from_sys_call
#endif /* __SMP__ */
- ALIGN
-handle_bottom_half:
- pushl $2f
- jmp SYMBOL_NAME(do_bottom_half)
-
- ALIGN
-reschedule:
- pushl $ret_from_sys_call
- jmp SYMBOL_NAME(schedule) # test
+/*
+ * Return to user mode is not as complex as all this looks,
+ * but we want the default path for a system call return to
+ * go as quickly as possible which is why some of this is
+ * less clear than it otherwise should be.
+ */
ENTRY(system_call)
pushl %eax # save orig_eax
@@ -180,16 +163,11 @@ ENTRY(system_call)
ALIGN
.globl ret_from_sys_call
.globl ret_from_intr
-ret_from_intr:
ret_from_sys_call:
- GET_CURRENT(%ebx)
movl SYMBOL_NAME(bh_mask),%eax
andl SYMBOL_NAME(bh_active),%eax
jne handle_bottom_half
-2: movl EFLAGS(%esp),%eax # mix EFLAGS and CS
- movb CS(%esp),%al
- testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor?
- je 1f
+ret_with_reschedule:
cmpl $0,SYMBOL_NAME(need_resched)
jne reschedule
movl blocked(%ebx),%eax
@@ -197,7 +175,6 @@ ret_from_sys_call:
notl %eax
andl signal(%ebx),%eax
jne signal_return
-1:
RESTORE_ALL
ALIGN
signal_return:
@@ -230,6 +207,30 @@ badsys:
movl $-ENOSYS,EAX(%esp)
jmp ret_from_sys_call
+ ALIGN
+ret_from_exception:
+ movl SYMBOL_NAME(bh_mask),%eax
+ andl SYMBOL_NAME(bh_active),%eax
+ jne handle_bottom_half
+ ALIGN
+ret_from_intr:
+ GET_CURRENT(%ebx)
+ movl EFLAGS(%esp),%eax # mix EFLAGS and CS
+ movb CS(%esp),%al
+ testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor?
+ jne ret_with_reschedule
+ RESTORE_ALL
+
+ ALIGN
+handle_bottom_half:
+ pushl $ret_from_intr
+ jmp SYMBOL_NAME(do_bottom_half)
+
+ ALIGN
+reschedule:
+ pushl $ret_from_sys_call
+ jmp SYMBOL_NAME(schedule) # test
+
ENTRY(divide_error)
pushl $0 # no error code
@@ -260,7 +261,7 @@ error_code:
GET_CURRENT(%ebx)
call *%ecx
addl $8,%esp
- jmp ret_from_sys_call
+ jmp ret_from_exception
ENTRY(coprocessor_error)
pushl $0
@@ -271,7 +272,7 @@ ENTRY(device_not_available)
pushl $-1 # mark this as an int
SAVE_ALL
GET_CURRENT(%ebx)
- pushl $ret_from_sys_call
+ pushl $ret_from_exception
movl %cr0,%eax
testl $0x4,%eax # EM (math emulation bit)
je SYMBOL_NAME(math_state_restore)
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 2bd095997..a42b87b1b 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -39,19 +39,21 @@ startup_32:
jz 1f
/*
* New page tables may be in 4Mbyte page mode and may
- * be using the global pages.
+ * be using the global pages.
+ *
+ * NOTE! We have to correct for the fact that we're
+ * not yet offset 0xC0000000..
*/
+#define cr4_bits mmu_cr4_features-0xC0000000
#ifdef GAS_KNOWS_CR4
movl %cr4,%eax # Turn on 4Mb pages
- orl $16+128,%eax
+ orl cr4_bits,%eax
movl %eax,%cr4
#else
.byte 0x0f,0x20,0xe0
- orl $16+128,%eax
+ orl cr4_bits,%eax
.byte 0x0f,0x22,0xe0
#endif
- movl %eax,%cr3 /* flush TLB as per app note */
- movl %cr0,%eax
#endif
/*
* Setup paging (the tables are already set up, just switch them on)
@@ -67,24 +69,16 @@ startup_32:
movl $1f,%eax
jmp *%eax /* make sure eip is relocated */
1:
+ /* Set up the stack pointer */
+ lss stack_start,%esp
#ifdef __SMP__
orw %bx,%bx
jz 1f /* Initial CPU cleans BSS */
-/*
- * Set up the stack
- */
- movl $(KERNEL_DS),%eax /* walken modif */
- mov %ax,%ss
- xorl %eax,%eax
- movw %cx, %ax
- movl %eax,%esp
- addl $0xC0000000, %esp /* shift it to the upper mapping */
pushl $0
popfl
jmp checkCPUtype
1:
- lss stack_start,%esp
#endif __SMP__
/*
* Clear BSS first so that there are no surprises...
@@ -305,15 +299,53 @@ rp_sidt:
jne rp_sidt
ret
+ENTRY(stack_start)
+ .long SYMBOL_NAME(init_task_union)+8192
+ .long KERNEL_DS
+
+/* This is the default interrupt "handler" :-) */
+int_msg:
+ .asciz "Unknown interrupt\n"
+ ALIGN
+ignore_int:
+ cld
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ push %ds
+ movl $(KERNEL_DS),%eax
+ mov %ax,%ds
+ mov %ax,%es
+ mov %ax,%fs
+ pushl $int_msg
+ call SYMBOL_NAME(printk)
+ popl %eax
+ pop %ds
+ popl %edx
+ popl %ecx
+ popl %eax
+ iret
+
+/*
+ * The interrupt descriptor table has room for 256 idt's
+ */
+ ALIGN
+.word 0
+idt_descr:
+ .word 256*8-1 # idt contains 256 entries
+ .long SYMBOL_NAME(idt)
+
+ ALIGN
+.word 0
+gdt_descr:
+#ifdef CONFIG_APM
+ .word (11+2*NR_TASKS)*8-1
+#else
+ .word (8+2*NR_TASKS)*8-1
+#endif
+ .long SYMBOL_NAME(gdt)
/*
- * page 0 is made non-existent, so that kernel NULL pointer references get
- * caught. Thus the swapper page directory has been moved to 0x101000
- * with the introduction of the compressed boot code. Theoretically,
- * the original design of overlaying the startup code with the swapper
- * page directory is still possible --- it would reduce the size of the kernel
- * by 2-3k. This would be a good thing to do at some point.....
- *
* This is initialized to create a identity-mapping at 0-4M (for bootup
* purposes) and another mapping of the 0-4M area at virtual address
* 0xC0000000.
@@ -471,63 +503,29 @@ ENTRY(empty_bad_page_table)
ENTRY(empty_zero_page)
.org 0x6000
-
-stack_start:
- .long SYMBOL_NAME(init_user_stack)+4096
- .long KERNEL_DS
-
-/* This is the default interrupt "handler" :-) */
-int_msg:
- .asciz "Unknown interrupt\n"
- ALIGN
-ignore_int:
- cld
- pushl %eax
- pushl %ecx
- pushl %edx
- push %ds
- push %es
- push %fs
- movl $(KERNEL_DS),%eax
- mov %ax,%ds
- mov %ax,%es
- mov %ax,%fs
- pushl $int_msg
- call SYMBOL_NAME(printk)
- popl %eax
- pop %fs
- pop %es
- pop %ds
- popl %edx
- popl %ecx
- popl %eax
- iret
+ENTRY(this_must_match_init_task)
/*
- * The interrupt descriptor table has room for 256 idt's
+ * This starts the data section. Note that the above is all
+ * in the text section because it has alignment requirements
+ * that we cannot fulfill any other way.
*/
- ALIGN
-.word 0
-idt_descr:
- .word 256*8-1 # idt contains 256 entries
- .long SYMBOL_NAME(idt)
+.data
+ALIGN
+/* 256 quadwords - 2048 bytes of idt */
ENTRY(idt)
.fill 256,8,0 # idt is uninitialized
- ALIGN
-.word 0
-gdt_descr:
-#ifdef CONFIG_APM
- .word (11+2*NR_TASKS)*8-1
-#else
- .word (8+2*NR_TASKS)*8-1
-#endif
- .long SYMBOL_NAME(gdt)
-
/*
* This gdt setup gives the kernel a 1GB address space at virtual
* address 0xC0000000 - space enough for expansion, I hope.
+ *
+ * This contains up to 8192 quadwords depending on NR_TASKS - 64kB of
+ * gdt entries. Ugh.
+ *
+ * NOTE! Make sure the gdt descriptor in head.S matches this if you
+ * change anything.
*/
ENTRY(gdt)
.quad 0x0000000000000000 /* NULL descriptor */
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index 8c16f0204..daa6baf42 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -19,6 +19,11 @@ extern void dump_thread(struct pt_regs *, struct user *);
extern int dump_fpu(elf_fpregset_t *);
extern void __lock_kernel(void);
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
+EXPORT_SYMBOL(drive_info);
+#endif
+
/* platform dependent support */
EXPORT_SYMBOL(EISA_bus);
EXPORT_SYMBOL(MCA_bus);
@@ -39,12 +44,13 @@ EXPORT_SYMBOL(csum_partial_copy);
#ifdef __SMP__
EXPORT_SYMBOL(apic_reg); /* Needed internally for the I386 inlines */
EXPORT_SYMBOL(cpu_data);
-EXPORT_SYMBOL(kernel_flag);
-EXPORT_SYMBOL(active_kernel_processor);
+EXPORT_SYMBOL_NOVERS(kernel_flag);
+EXPORT_SYMBOL_NOVERS(active_kernel_processor);
EXPORT_SYMBOL(smp_invalidate_needed);
EXPORT_SYMBOL_NOVERS(__lock_kernel);
/* Global SMP irq stuff */
+EXPORT_SYMBOL(synchronize_irq);
EXPORT_SYMBOL(global_irq_holder);
EXPORT_SYMBOL(__global_cli);
EXPORT_SYMBOL(__global_sti);
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
new file mode 100644
index 000000000..cc0a19231
--- /dev/null
+++ b/arch/i386/kernel/init_task.c
@@ -0,0 +1,22 @@
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#include <asm/pgtable.h>
+
+static struct vm_area_struct init_mmap = INIT_MMAP;
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS;
+struct mm_struct init_mm = INIT_MM;
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is 8192-byte aligned due to the
+ * way process stacks are handled. This is done by making sure
+ * the linker maps this in the .text segment right after head.S,
+ * and making head.S ensure the proper alignment.
+ *
+ * The things we do for performance..
+ */
+union task_union init_task_union __attribute__((__section__(".text"))) = { INIT_TASK };
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 95a7b525f..e5fb5acb1 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -44,9 +44,6 @@ extern volatile unsigned long smp_local_timer_ticks[1+NR_CPUS];
#define CR0_NE 32
-static unsigned char cache_21 = 0xff;
-static unsigned char cache_A1 = 0xff;
-
unsigned int local_irq_count[NR_CPUS];
#ifdef __SMP__
atomic_t __intel_bh_counter;
@@ -58,51 +55,84 @@ int __intel_bh_counter;
static unsigned int int_count[NR_CPUS][NR_IRQS] = {{0},};
#endif
-static inline void mask_irq(unsigned int irq_nr)
-{
- unsigned char mask;
+/*
+ * This contains the irq mask for both irq controllers
+ */
+static unsigned int cached_irq_mask = 0xffff;
+
+#define cached_21 (((char *)(&cached_irq_mask))[0])
+#define cached_A1 (((char *)(&cached_irq_mask))[1])
- mask = 1 << (irq_nr & 7);
- if (irq_nr < 8) {
- cache_21 |= mask;
- outb(cache_21,0x21);
+spinlock_t irq_controller_lock;
+
+/*
+ * This is always called from an interrupt context
+ * with local interrupts disabled. Don't worry about
+ * irq-safe locks.
+ *
+ * Note that we always ack the primary irq controller,
+ * even if the interrupt came from the secondary, as
+ * the primary will still have routed it. Oh, the joys
+ * of PC hardware.
+ */
+static inline void mask_and_ack_irq(int irq_nr)
+{
+ spin_lock(&irq_controller_lock);
+ cached_irq_mask |= 1 << irq_nr;
+ if (irq_nr & 8) {
+ inb(0xA1); /* DUMMY */
+ outb(cached_A1,0xA1);
+ outb(0x20,0xA0);
} else {
- cache_A1 |= mask;
- outb(cache_A1,0xA1);
+ inb(0x21); /* DUMMY */
+ outb(cached_21,0x21);
}
+ outb(0x20,0x20);
+ spin_unlock(&irq_controller_lock);
}
-static inline void unmask_irq(unsigned int irq_nr)
+static inline void set_irq_mask(int irq_nr)
{
- unsigned char mask;
-
- mask = ~(1 << (irq_nr & 7));
- if (irq_nr < 8) {
- cache_21 &= mask;
- outb(cache_21,0x21);
+ if (irq_nr & 8) {
+ outb(cached_A1,0xA1);
} else {
- cache_A1 &= mask;
- outb(cache_A1,0xA1);
+ outb(cached_21,0x21);
}
}
+/*
+ * These have to be protected by the spinlock
+ * before being called.
+ */
+static inline void mask_irq(unsigned int irq_nr)
+{
+ cached_irq_mask |= 1 << irq_nr;
+ set_irq_mask(irq_nr);
+}
+
+static inline void unmask_irq(unsigned int irq_nr)
+{
+ cached_irq_mask &= ~(1 << irq_nr);
+ set_irq_mask(irq_nr);
+}
+
void disable_irq(unsigned int irq_nr)
{
unsigned long flags;
- save_flags(flags);
- cli();
+ spin_lock_irqsave(&irq_controller_lock, flags);
mask_irq(irq_nr);
- restore_flags(flags);
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+ synchronize_irq();
}
void enable_irq(unsigned int irq_nr)
{
unsigned long flags;
- save_flags(flags);
- cli();
+
+ spin_lock_irqsave(&irq_controller_lock, flags);
unmask_irq(irq_nr);
- restore_flags(flags);
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
}
/*
@@ -133,7 +163,8 @@ void enable_irq(unsigned int irq_nr)
#error make irq stub building NR_IRQS dependent and remove me.
#endif
-BUILD_TIMER_IRQ(FIRST,0,0x01)
+BUILD_COMMON_IRQ()
+BUILD_IRQ(FIRST,0,0x01)
BUILD_IRQ(FIRST,1,0x02)
BUILD_IRQ(FIRST,2,0x04)
BUILD_IRQ(FIRST,3,0x08)
@@ -157,10 +188,6 @@ BUILD_SMP_INTERRUPT(stop_cpu_interrupt)
BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt)
#endif
-/*
- * Pointers to the low-level handlers: first the general ones, then the
- * fast ones, then the bad ones.
- */
static void (*interrupt[17])(void) = {
IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt,
IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt,
@@ -168,28 +195,6 @@ static void (*interrupt[17])(void) = {
IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt
};
-static void (*fast_interrupt[16])(void) = {
- fast_IRQ0_interrupt, fast_IRQ1_interrupt,
- fast_IRQ2_interrupt, fast_IRQ3_interrupt,
- fast_IRQ4_interrupt, fast_IRQ5_interrupt,
- fast_IRQ6_interrupt, fast_IRQ7_interrupt,
- fast_IRQ8_interrupt, fast_IRQ9_interrupt,
- fast_IRQ10_interrupt, fast_IRQ11_interrupt,
- fast_IRQ12_interrupt, fast_IRQ13_interrupt,
- fast_IRQ14_interrupt, fast_IRQ15_interrupt
-};
-
-static void (*bad_interrupt[16])(void) = {
- bad_IRQ0_interrupt, bad_IRQ1_interrupt,
- bad_IRQ2_interrupt, bad_IRQ3_interrupt,
- bad_IRQ4_interrupt, bad_IRQ5_interrupt,
- bad_IRQ6_interrupt, bad_IRQ7_interrupt,
- bad_IRQ8_interrupt, bad_IRQ9_interrupt,
- bad_IRQ10_interrupt, bad_IRQ11_interrupt,
- bad_IRQ12_interrupt, bad_IRQ13_interrupt,
- bad_IRQ14_interrupt, bad_IRQ15_interrupt
-};
-
/*
* Initial irq handlers.
*/
@@ -240,14 +245,10 @@ int get_irq_list(char *buf)
action = irq_action[i];
if (!action)
continue;
- len += sprintf(buf+len, "%2d: %10u %c %s",
- i, kstat.interrupts[i],
- (action->flags & SA_INTERRUPT) ? '+' : ' ',
- action->name);
+ len += sprintf(buf+len, "%2d: %10u %s",
+ i, kstat.interrupts[i], action->name);
for (action=action->next; action; action = action->next) {
- len += sprintf(buf+len, ",%s %s",
- (action->flags & SA_INTERRUPT) ? " +" : "",
- action->name);
+ len += sprintf(buf+len, ", %s", action->name);
}
len += sprintf(buf+len, "\n");
}
@@ -298,13 +299,9 @@ int get_smp_prof_list(char *buf) {
for (j=0;j<smp_num_cpus;j++)
len+=sprintf(buf+len, "%10d ",
int_count[cpu_logical_map[j]][i]);
- len += sprintf(buf+len, "%c %s",
- (action->flags & SA_INTERRUPT) ? '+' : ' ',
- action->name);
+ len += sprintf(buf+len, " %s", action->name);
for (action=action->next; action; action = action->next) {
- len += sprintf(buf+len, ",%s %s",
- (action->flags & SA_INTERRUPT) ? " +" : "",
- action->name);
+ len += sprintf(buf+len, ", %s", action->name);
}
len += sprintf(buf+len, "\n");
}
@@ -393,16 +390,8 @@ static inline void check_smp_invalidate(int cpu)
static unsigned long previous_irqholder;
-#undef INIT_STUCK
-#define INIT_STUCK 100000000
-
-#undef STUCK
-#define STUCK \
-if (!--stuck) {printk("wait_on_irq CPU#%d stuck at %08lx, waiting for %08lx (local=%d, global=%d)\n", cpu, where, previous_irqholder, local_count, atomic_read(&global_irq_count)); stuck = INIT_STUCK; }
-
static inline void wait_on_irq(int cpu, unsigned long where)
{
- int stuck = INIT_STUCK;
int local_count = local_irq_count[cpu];
/* Are we the only one in an interrupt context? */
@@ -421,13 +410,12 @@ static inline void wait_on_irq(int cpu, unsigned long where)
* their things before trying to get the lock again.
*/
for (;;) {
- STUCK;
check_smp_invalidate(cpu);
if (atomic_read(&global_irq_count))
continue;
if (global_irq_lock)
continue;
- if (!set_bit(0,&global_irq_lock))
+ if (!test_and_set_bit(0,&global_irq_lock))
break;
}
atomic_add(local_count, &global_irq_count);
@@ -456,28 +444,18 @@ void synchronize_irq(void)
}
}
-#undef INIT_STUCK
-#define INIT_STUCK 10000000
-
-#undef STUCK
-#define STUCK \
-if (!--stuck) {printk("get_irqlock stuck at %08lx, waiting for %08lx\n", where, previous_irqholder); stuck = INIT_STUCK;}
-
static inline void get_irqlock(int cpu, unsigned long where)
{
- int stuck = INIT_STUCK;
-
- if (set_bit(0,&global_irq_lock)) {
+ if (test_and_set_bit(0,&global_irq_lock)) {
/* do we already hold the lock? */
if ((unsigned char) cpu == global_irq_holder)
return;
/* Uhhuh.. Somebody else got it. Wait.. */
do {
do {
- STUCK;
check_smp_invalidate(cpu);
} while (test_bit(0,&global_irq_lock));
- } while (set_bit(0,&global_irq_lock));
+ } while (test_and_set_bit(0,&global_irq_lock));
}
/*
* Ok, we got the lock bit.
@@ -519,7 +497,8 @@ void __global_restore_flags(unsigned long flags)
{
switch (flags) {
case 0:
- __global_sti();
+ release_irqlock(smp_processor_id());
+ __sti();
break;
case 1:
__global_cli();
@@ -533,56 +512,58 @@ void __global_restore_flags(unsigned long flags)
#endif
/*
- * do_IRQ handles IRQ's that have been installed without the
- * SA_INTERRUPT flag: it uses the full signal-handling return
- * and runs with other interrupts enabled. All relatively slow
- * IRQ's should use this format: notably the keyboard/timer
- * routines.
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
*/
-asmlinkage void do_IRQ(int irq, struct pt_regs * regs)
+asmlinkage void do_IRQ(struct pt_regs regs)
{
+ int irq = regs.orig_eax & 0xff;
struct irqaction * action;
- int do_random, cpu = smp_processor_id();
+ int status, cpu;
+
+ /*
+ * mask and ack quickly, we don't want the irq controller
+ * thinking we're snobs just because some other CPU has
+ * disabled global interrupts (we have already done the
+ * INT_ACK cycles, it's too late to try to pretend to the
+ * controller that we aren't taking the interrupt).
+ */
+ mask_and_ack_irq(irq);
+ cpu = smp_processor_id();
irq_enter(cpu, irq);
kstat.interrupts[irq]++;
- /* slow interrupts run with interrupts enabled */
- __sti();
+ /* Return with this interrupt masked if no action */
+ status = 0;
action = *(irq + irq_action);
- do_random = 0;
- while (action) {
- do_random |= action->flags;
- action->handler(irq, action->dev_id, regs);
- action = action->next;
+ if (action) {
+ do {
+ status |= action->flags;
+ action->handler(irq, action->dev_id, &regs);
+ action = action->next;
+ } while (action);
+ if (status & SA_SAMPLE_RANDOM)
+ add_interrupt_randomness(irq);
+
+ __cli();
+ spin_lock(&irq_controller_lock);
+ unmask_irq(irq);
+ spin_unlock(&irq_controller_lock);
}
- if (do_random & SA_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
- irq_exit(cpu, irq);
-}
-/*
- * do_fast_IRQ handles IRQ's that don't need the fancy interrupt return
- * stuff - the handler is also running with interrupts disabled unless
- * it explicitly enables them later.
- */
-asmlinkage void do_fast_IRQ(int irq)
-{
- struct irqaction * action;
- int do_random, cpu = smp_processor_id();
-
- irq_enter(cpu, irq);
- kstat.interrupts[irq]++;
- action = *(irq + irq_action);
- do_random = 0;
- while (action) {
- do_random |= action->flags;
- action->handler(irq, action->dev_id, NULL);
- action = action->next;
- }
- if (do_random & SA_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
irq_exit(cpu, irq);
+ /*
+ * This should be conditional: we should really get
+ * a return code from the irq handler to tell us
+ * whether the handler wants us to do software bottom
+ * half handling or not..
+ */
+ if (1) {
+ if (bh_active & bh_mask)
+ do_bottom_half();
+ }
}
int setup_x86_irq(int irq, struct irqaction * new)
@@ -597,10 +578,6 @@ int setup_x86_irq(int irq, struct irqaction * new)
if (!(old->flags & new->flags & SA_SHIRQ))
return -EBUSY;
- /* Can't share interrupts unless both are same type */
- if ((old->flags ^ new->flags) & SA_INTERRUPT)
- return -EBUSY;
-
/* add new interrupt at end of irq queue */
do {
p = &old->next;
@@ -617,11 +594,9 @@ int setup_x86_irq(int irq, struct irqaction * new)
*p = new;
if (!shared) {
- if (new->flags & SA_INTERRUPT)
- set_intr_gate(0x20+irq,fast_interrupt[irq]);
- else
- set_intr_gate(0x20+irq,interrupt[irq]);
+ spin_lock(&irq_controller_lock);
unmask_irq(irq);
+ spin_unlock(&irq_controller_lock);
}
restore_flags(flags);
return 0;
@@ -676,10 +651,6 @@ void free_irq(unsigned int irq, void *dev_id)
save_flags(flags);
cli();
*p = action->next;
- if (!irq[irq_action]) {
- mask_irq(irq);
- set_intr_gate(0x20+irq,bad_interrupt[irq]);
- }
restore_flags(flags);
kfree(action);
return;
@@ -689,7 +660,7 @@ void free_irq(unsigned int irq, void *dev_id)
unsigned long probe_irq_on (void)
{
- unsigned int i, irqs = 0, irqmask;
+ unsigned int i, irqs = 0;
unsigned long delay;
/* first, enable any unassigned irqs */
@@ -705,19 +676,17 @@ unsigned long probe_irq_on (void)
/* about 100ms delay */;
/* now filter out any obviously spurious interrupts */
- irqmask = (((unsigned int)cache_A1)<<8) | (unsigned int)cache_21;
- return irqs & ~irqmask;
+ return irqs & ~cached_irq_mask;
}
int probe_irq_off (unsigned long irqs)
{
- unsigned int i, irqmask;
+ unsigned int i;
- irqmask = (((unsigned int)cache_A1)<<8) | (unsigned int)cache_21;
#ifdef DEBUG
- printk("probe_irq_off: irqs=0x%04lx irqmask=0x%04x\n", irqs, irqmask);
+ printk("probe_irq_off: irqs=0x%04lx irqmask=0x%04x\n", irqs, cached_irq_mask);
#endif
- irqs &= irqmask;
+ irqs &= cached_irq_mask;
if (!irqs)
return 0;
i = ffz(~irqs);
@@ -729,10 +698,6 @@ int probe_irq_off (unsigned long irqs)
__initfunc(void init_IRQ(void))
{
int i;
- static unsigned char smptrap=0;
- if(smptrap)
- return;
- smptrap=1;
/* set the clock to 100 Hz */
outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
@@ -740,7 +705,7 @@ __initfunc(void init_IRQ(void))
outb(LATCH >> 8 , 0x40); /* MSB */
for (i = 0; i < NR_IRQS ; i++)
- set_intr_gate(0x20+i,bad_interrupt[i]);
+ set_intr_gate(0x20+i,interrupt[i]);
#ifdef __SMP__
/*
diff --git a/arch/i386/kernel/irq.h b/arch/i386/kernel/irq.h
index 3a349f20a..1f9e89399 100644
--- a/arch/i386/kernel/irq.h
+++ b/arch/i386/kernel/irq.h
@@ -33,7 +33,6 @@ static inline void irq_enter(int cpu, int irq)
static inline void irq_exit(int cpu, int irq)
{
- __cli();
hardirq_exit(cpu);
release_irqlock(cpu);
}
@@ -63,125 +62,12 @@ static inline void irq_exit(int cpu, int irq)
"mov %dx,%ds\n\t" \
"mov %dx,%es\n\t"
-/*
- * SAVE_MOST/RESTORE_MOST is used for the faster version of IRQ handlers,
- * installed by using the SA_INTERRUPT flag. These kinds of IRQ's don't
- * call the routines that do signal handling etc on return, and can have
- * more relaxed register-saving etc. They are also atomic, and are thus
- * suited for small, fast interrupts like the serial lines or the harddisk
- * drivers, which don't actually need signal handling etc.
- *
- * Also note that we actually save only those registers that are used in
- * C subroutines (%eax, %edx and %ecx), so if you do something weird,
- * you're on your own. The only segments that are saved (not counting the
- * automatic stack and code segment handling) are %ds and %es, and they
- * point to kernel space. No messing around with %fs here.
- */
-#define SAVE_MOST \
- "cld\n\t" \
- "push %es\n\t" \
- "push %ds\n\t" \
- "pushl %eax\n\t" \
- "pushl %edx\n\t" \
- "pushl %ecx\n\t" \
- "movl $" STR(KERNEL_DS) ",%edx\n\t" \
- "mov %dx,%ds\n\t" \
- "mov %dx,%es\n\t"
-
-#define RESTORE_MOST \
- "popl %ecx\n\t" \
- "popl %edx\n\t" \
- "popl %eax\n\t" \
- "pop %ds\n\t" \
- "pop %es\n\t" \
- "iret"
-
-/*
- * Some fast irq handlers might want to access saved registers (mostly
- * cs or flags)
- */
-
-struct fast_irq_regs {
- long ecx;
- long edx;
- long eax;
- int xds;
- int xes;
- long eip;
- int xcs;
- long eflags;
- long esp;
- int xss;
-};
-
-/*
- * The "inb" instructions are not needed, but seem to change the timings
- * a bit - without them it seems that the harddisk driver won't work on
- * all hardware. Arghh.
- */
-#define ACK_FIRST(mask,nr) \
- "inb $0x21,%al\n\t" \
- "jmp 1f\n" \
- "1:\tjmp 1f\n" \
- "1:\torb $" #mask ","SYMBOL_NAME_STR(cache_21)"\n\t" \
- "movb "SYMBOL_NAME_STR(cache_21)",%al\n\t" \
- "outb %al,$0x21\n\t" \
- "jmp 1f\n" \
- "1:\tjmp 1f\n" \
- "1:\tmovb $0x20,%al\n\t" \
- "outb %al,$0x20\n\t"
-
-#define ACK_SECOND(mask,nr) \
- "inb $0xA1,%al\n\t" \
- "jmp 1f\n" \
- "1:\tjmp 1f\n" \
- "1:\torb $" #mask ","SYMBOL_NAME_STR(cache_A1)"\n\t" \
- "movb "SYMBOL_NAME_STR(cache_A1)",%al\n\t" \
- "outb %al,$0xA1\n\t" \
- "jmp 1f\n" \
- "1:\tjmp 1f\n" \
- "1:\tmovb $0x20,%al\n\t" \
- "outb %al,$0xA0\n\t" \
- "jmp 1f\n" \
- "1:\tjmp 1f\n" \
- "1:\toutb %al,$0x20\n\t"
-
-#define UNBLK_FIRST(mask) \
- "inb $0x21,%al\n\t" \
- "jmp 1f\n" \
- "1:\tjmp 1f\n" \
- "1:\tandb $~(" #mask "),"SYMBOL_NAME_STR(cache_21)"\n\t" \
- "movb "SYMBOL_NAME_STR(cache_21)",%al\n\t" \
- "outb %al,$0x21\n\t"
-
-#define UNBLK_SECOND(mask) \
- "inb $0xA1,%al\n\t" \
- "jmp 1f\n" \
- "1:\tjmp 1f\n" \
- "1:\tandb $~(" #mask "),"SYMBOL_NAME_STR(cache_A1)"\n\t" \
- "movb "SYMBOL_NAME_STR(cache_A1)",%al\n\t" \
- "outb %al,$0xA1\n\t"
-
#define IRQ_NAME2(nr) nr##_interrupt(void)
#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
-#define FAST_IRQ_NAME(nr) IRQ_NAME2(fast_IRQ##nr)
-#define BAD_IRQ_NAME(nr) IRQ_NAME2(bad_IRQ##nr)
-
-#ifdef __SMP__
-
-#define GET_CURRENT \
- "movl "SYMBOL_NAME_STR(apic_reg)", %ebx\n\t" \
- "movl 32(%ebx), %ebx\n\t" \
- "shrl $22,%ebx\n\t" \
- "andl $0x3C,%ebx\n\t" \
- "movl " SYMBOL_NAME_STR(current_set) "(,%ebx),%ebx\n\t"
-
-#else
#define GET_CURRENT \
- "movl " SYMBOL_NAME_STR(current_set) ",%ebx\n\t"
-
-#endif
+ "movl %esp, %ebx\n\t" \
+ "andl $-8192, %ebx\n\t"
#ifdef __SMP__
@@ -205,66 +91,30 @@ __asm__( \
"\n"__ALIGN_STR"\n" \
SYMBOL_NAME_STR(x) ":\n\t" \
"pushl $-1\n\t" \
- SAVE_ALL \
- "movl %esp,%eax\n\t" \
- "pushl %eax\n\t" \
+ SAVE_ALL \
+ "movl %esp,%eax\n\t" \
+ "pushl %eax\n\t" \
"call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
- "addl $4,%esp\n\t" \
+ "addl $4,%esp\n\t" \
"jmp ret_from_intr\n");
#endif /* __SMP__ */
-#define BUILD_IRQ(chip,nr,mask) \
-asmlinkage void IRQ_NAME(nr); \
-asmlinkage void FAST_IRQ_NAME(nr); \
-asmlinkage void BAD_IRQ_NAME(nr); \
+#define BUILD_COMMON_IRQ() \
__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
- "pushl $-"#nr"-2\n\t" \
+ "\n" __ALIGN_STR"\n" \
+ "common_interrupt:\n\t" \
SAVE_ALL \
- ACK_##chip(mask,(nr&7)) \
- "movl %esp,%eax\n\t" \
- "pushl %eax\n\t" \
- "pushl $" #nr "\n\t" \
- "call "SYMBOL_NAME_STR(do_IRQ)"\n\t" \
- "addl $8,%esp\n\t" \
- UNBLK_##chip(mask) \
- "jmp ret_from_intr\n" \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(fast_IRQ) #nr "_interrupt:\n\t" \
- SAVE_MOST \
- ACK_##chip(mask,(nr&7)) \
- "pushl $" #nr "\n\t" \
- "call "SYMBOL_NAME_STR(do_fast_IRQ)"\n\t" \
- "addl $4,%esp\n\t" \
- UNBLK_##chip(mask) \
- RESTORE_MOST \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(bad_IRQ) #nr "_interrupt:\n\t" \
- SAVE_MOST \
- ACK_##chip(mask,(nr&7)) \
- RESTORE_MOST);
-
-#define BUILD_TIMER_IRQ(chip,nr,mask) \
+ "pushl $ret_from_intr\n\t" \
+ "jmp "SYMBOL_NAME_STR(do_IRQ));
+
+#define BUILD_IRQ(chip,nr,mask) \
asmlinkage void IRQ_NAME(nr); \
-asmlinkage void FAST_IRQ_NAME(nr); \
-asmlinkage void BAD_IRQ_NAME(nr); \
__asm__( \
"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(fast_IRQ) #nr "_interrupt:\n\t" \
-SYMBOL_NAME_STR(bad_IRQ) #nr "_interrupt:\n\t" \
SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
- "pushl $-"#nr"-2\n\t" \
- SAVE_ALL \
- ACK_##chip(mask,(nr&7)) \
- "movl %esp,%eax\n\t" \
- "pushl %eax\n\t" \
- "pushl $" #nr "\n\t" \
- "call "SYMBOL_NAME_STR(do_IRQ)"\n\t" \
- "addl $8,%esp\n\t" \
- UNBLK_##chip(mask) \
- "jmp ret_from_intr\n");
+ "pushl $"#nr"-256\n\t" \
+ "jmp common_interrupt");
/*
* x86 profiling function, SMP safe. We might want to do this in
@@ -276,15 +126,14 @@ static inline void x86_do_profile (unsigned long eip)
extern int _stext;
eip -= (unsigned long) &_stext;
eip >>= prof_shift;
- if (eip < prof_len)
- atomic_inc((atomic_t *)&prof_buffer[eip]);
- else
/*
* Dont ignore out-of-bounds EIP values silently,
* put them into the last histogram slot, so if
* present, they will show up as a sharp peak.
*/
- atomic_inc((atomic_t *)&prof_buffer[prof_len-1]);
+ if (eip > prof_len-1)
+ eip = prof_len-1;
+ atomic_inc((atomic_t *)&prof_buffer[eip]);
}
}
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index fe4723951..33842a21f 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -30,6 +30,7 @@
#include <linux/delay.h>
#include <linux/smp.h>
#include <linux/reboot.h>
+#include <linux/init.h>
#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
#include <linux/apm_bios.h>
#endif
@@ -149,7 +150,8 @@ int cpu_idle(void *unused)
current->priority = -100;
while(1)
{
- if(cpu_data[smp_processor_id()].hlt_works_ok && !hlt_counter && !need_resched)
+ if(cpu_data[smp_processor_id()].hlt_works_ok &&
+ !hlt_counter && !need_resched)
__asm("hlt");
/*
* tq_scheduler currently assumes we're running in a process
@@ -183,7 +185,7 @@ static long no_idt[2] = {0, 0};
static int reboot_mode = 0;
static int reboot_thru_bios = 0;
-void reboot_setup(char *str, int *ints)
+__initfunc(void reboot_setup(char *str, int *ints))
{
while(1) {
switch (*str) {
@@ -324,11 +326,14 @@ void machine_restart(char * __unused)
pg0 [0] = 7;
- /* Use `swapper_pg_dir' as our page directory. Don't bother with
- `SET_PAGE_DIR' because interrupts are disabled and we're rebooting.
- This instruction flushes the TLB. */
+ /*
+ * Use `swapper_pg_dir' as our page directory. We bother with
+ * `SET_PAGE_DIR' because although might be rebooting, but if we change
+ * the way we set root page dir in the future, then we wont break a
+ * seldom used feature ;)
+ */
- __asm__ __volatile__ ("movl %0,%%cr3" : : "a" (swapper_pg_dir) : "memory");
+ SET_PAGE_DIR(current,swapper_pg_dir);
/* Write 0x1234 to absolute memory location 0x472. The BIOS reads
this on booting to tell it to "Bypass memory test (also warm
@@ -473,6 +478,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
int i;
struct pt_regs * childregs;
+ p->tss.tr = _TSS(nr);
+ p->tss.ldt = _LDT(nr);
p->tss.es = KERNEL_DS;
p->tss.cs = KERNEL_CS;
p->tss.ss = KERNEL_DS;
@@ -480,9 +487,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
p->tss.fs = USER_DS;
p->tss.gs = USER_DS;
p->tss.ss0 = KERNEL_DS;
- p->tss.esp0 = p->kernel_stack_page + PAGE_SIZE;
- p->tss.tr = _TSS(nr);
- childregs = ((struct pt_regs *) (p->kernel_stack_page + PAGE_SIZE)) - 1;
+ p->tss.esp0 = 2*PAGE_SIZE + (unsigned long) p;
+ childregs = ((struct pt_regs *) (p->tss.esp0)) - 1;
p->tss.esp = (unsigned long) childregs;
#ifdef __SMP__
p->tss.eip = (unsigned long) ret_from_smpfork;
@@ -496,7 +502,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
childregs->eax = 0;
childregs->esp = esp;
p->tss.back_link = 0;
- p->tss.ldt = _LDT(nr);
if (p->ldt) {
p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
if (p->ldt != NULL)
@@ -512,6 +517,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
p->tss.io_bitmap[i] = ~0;
if (last_task_used_math == current)
__asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387));
+
return 0;
}
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 920d1bc1c..0dfffd672 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -34,18 +34,6 @@
*/
#define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs))
-/* change a pid into a task struct. */
-static inline struct task_struct * get_task(int pid)
-{
- int i;
-
- for (i = 1; i < NR_TASKS; i++) {
- if (task[i] != NULL && (task[i]->pid == pid))
- return task[i];
- }
- return NULL;
-}
-
/*
* this routine will get a word off of the processes privileged stack.
* the offset is how far from the base addr as stored in the TSS.
@@ -95,7 +83,7 @@ static unsigned long get_long(struct task_struct * tsk,
repeat:
pgdir = pgd_offset(vma->vm_mm, addr);
if (pgd_none(*pgdir)) {
- do_no_page(tsk, vma, addr, 0);
+ handle_mm_fault(tsk, vma, addr, 0);
goto repeat;
}
if (pgd_bad(*pgdir)) {
@@ -105,7 +93,7 @@ repeat:
}
pgmiddle = pmd_offset(pgdir, addr);
if (pmd_none(*pgmiddle)) {
- do_no_page(tsk, vma, addr, 0);
+ handle_mm_fault(tsk, vma, addr, 0);
goto repeat;
}
if (pmd_bad(*pgmiddle)) {
@@ -115,7 +103,7 @@ repeat:
}
pgtable = pte_offset(pgmiddle, addr);
if (!pte_present(*pgtable)) {
- do_no_page(tsk, vma, addr, 0);
+ handle_mm_fault(tsk, vma, addr, 0);
goto repeat;
}
page = pte_page(*pgtable);
@@ -146,7 +134,7 @@ static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsi
repeat:
pgdir = pgd_offset(vma->vm_mm, addr);
if (!pgd_present(*pgdir)) {
- do_no_page(tsk, vma, addr, 1);
+ handle_mm_fault(tsk, vma, addr, 1);
goto repeat;
}
if (pgd_bad(*pgdir)) {
@@ -156,7 +144,7 @@ repeat:
}
pgmiddle = pmd_offset(pgdir, addr);
if (pmd_none(*pgmiddle)) {
- do_no_page(tsk, vma, addr, 1);
+ handle_mm_fault(tsk, vma, addr, 1);
goto repeat;
}
if (pmd_bad(*pgmiddle)) {
@@ -166,12 +154,12 @@ repeat:
}
pgtable = pte_offset(pgmiddle, addr);
if (!pte_present(*pgtable)) {
- do_no_page(tsk, vma, addr, 1);
+ handle_mm_fault(tsk, vma, addr, 1);
goto repeat;
}
page = pte_page(*pgtable);
if (!pte_write(*pgtable)) {
- do_wp_page(tsk, vma, addr, 1);
+ handle_mm_fault(tsk, vma, addr, 1);
goto repeat;
}
/* this is a hack for non-kernel-mapped video buffers and similar */
@@ -381,7 +369,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
if (pid == 1) /* you may not mess with init */
goto out;
ret = -ESRCH;
- if (!(child = get_task(pid)))
+ if (!(child = find_task_by_pid(pid)))
goto out;
ret = -EPERM;
if (request == PTRACE_ATTACH) {
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index ec5954771..f62744d11 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -247,7 +247,7 @@ static const char * i586model(unsigned int nr)
static const char * i686model(unsigned int nr)
{
static const char *model[] = {
- "PPro A-step", "Pentium Pro"
+ "PPro A-step", "Pentium Pro", "2", "Pentium II"
};
if (nr < sizeof(model)/sizeof(char *))
return model[nr];
@@ -279,9 +279,10 @@ static const char * getmodel(int x86, int model)
int get_cpuinfo(char * buffer)
{
int i, len = 0;
+ int sep_bug;
static const char *x86_cap_flags[] = {
"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
- "cx8", "apic", "10", "11", "mtrr", "pge", "mca", "cmov",
+ "cx8", "apic", "10", "sep", "mtrr", "pge", "mca", "cmov",
"16", "17", "18", "19", "20", "21", "22", "mmx",
"24", "25", "26", "27", "28", "29", "30", "31"
};
@@ -321,10 +322,18 @@ int get_cpuinfo(char * buffer)
else
len += sprintf(buffer+len,
"stepping\t: unknown\n");
+
+ sep_bug = CD(have_cpuid) &&
+ (CD(x86_capability) & 0x800) &&
+ !memcmp(x86_vendor_id, "GenuineIntel", 12) &&
+ CD(x86) == 6 &&
+ CD(x86_model) < 3 &&
+ CD(x86_mask) < 3;
len += sprintf(buffer+len,
"fdiv_bug\t: %s\n"
"hlt_bug\t\t: %s\n"
+ "sep_bug\t\t: %s\n"
"fpu\t\t: %s\n"
"fpu_exception\t: %s\n"
"cpuid\t\t: %s\n"
@@ -332,6 +341,7 @@ int get_cpuinfo(char * buffer)
"flags\t\t:",
CD(fdiv_bug) ? "yes" : "no",
CD(hlt_works_ok) ? "no" : "yes",
+ sep_bug ? "yes" : "no",
CD(hard_math) ? "yes" : "no",
(CD(hard_math) && ignore_irq13)
? "yes" : "no",
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 970c8c5d7..3141c5318 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -318,6 +318,14 @@ asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs)
unsigned long signr;
struct sigaction * sa;
+ /*
+ * We want the common case to go fast, which
+ * is why we may in certain cases get here from
+ * kernel mode. Just return without doing anything
+ * if so.
+ */
+ if ((regs->xcs & 3) != 3)
+ return 1;
mask = ~current->blocked;
while ((signr = current->signal & mask)) {
/*
@@ -384,10 +392,12 @@ asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs)
case SIGQUIT: case SIGILL: case SIGTRAP:
case SIGABRT: case SIGFPE: case SIGSEGV:
+ lock_kernel();
if (current->binfmt && current->binfmt->core_dump) {
if (current->binfmt->core_dump(signr, regs))
signr |= 0x80;
}
+ unlock_kernel();
/* fall through */
default:
spin_lock_irq(&current->sigmask_lock);
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index a1590f500..1dc615501 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -1,5 +1,5 @@
/*
- * Intel MP v1.1/v1.4 specification support routines for multi-pentium
+ * Intel MP v1.1/v1.4 specification support routines for multi-pentium
* hosts.
*
* (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
@@ -46,14 +46,15 @@
#include <asm/smp.h>
#include <asm/io.h>
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+
#include "irq.h"
extern unsigned long start_kernel, _etext;
extern void update_one_process( struct task_struct *p,
- unsigned long ticks, unsigned long user,
- unsigned long system);
-void setup_APIC_clock (void);
-
+ unsigned long ticks, unsigned long user,
+ unsigned long system);
/*
* Some notes on processor bugs:
*
@@ -67,7 +68,7 @@ void setup_APIC_clock (void);
* Pentium
* There is a marginal case where REP MOVS on 100MHz SMP
* machines with B stepping processors can fail. XXX should provide
- * an L1cache=Writethrough or L1cache=off option.
+ * an L1cache=Writethrough or L1cache=off option.
*
* B stepping CPU's may hang. There are hardware work arounds
* for this. We warn about it in case your board doesnt have the work
@@ -91,12 +92,12 @@ void setup_APIC_clock (void);
* If this sounds worrying believe me these bugs are ___RARE___ and
* there's about nothing of note with C stepping upwards.
*/
-
-
+
+
/*
* Why isn't this somewhere standard ??
*/
-
+
extern __inline int max(int a,int b)
{
if(a>b)
@@ -121,7 +122,6 @@ struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per cpu bogomips and other parameters
static unsigned int num_processors = 1; /* Internal processor count */
static unsigned long io_apic_addr = 0xFEC00000; /* Address of the I/O apic (not yet used) */
unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */
-static unsigned char *kstack_base,*kstack_end; /* Kernel stack list pointers */
static int smp_activated = 0; /* Tripped once we need to start cross invalidating */
int apic_version[NR_CPUS]; /* APIC version number */
static volatile int smp_commenced=0; /* Tripped when we start scheduling */
@@ -129,7 +129,6 @@ unsigned long apic_addr = 0xFEE00000; /* Address of APIC (defaults to 0xFEE000
unsigned long nlong = 0; /* dummy used for apic_reg address + 0x20 */
unsigned char *apic_reg=((unsigned char *)(&nlong))-0x20;/* Later set to the ioremap() of the APIC */
unsigned long apic_retval; /* Just debugging the assembler.. */
-unsigned char *kernel_stacks[NR_CPUS]; /* Kernel stack pointers for CPU's (debugging) */
static volatile unsigned char smp_cpu_in_msg[NR_CPUS]; /* True if this processor is sending an IPI */
@@ -195,10 +194,10 @@ static inline void ack_APIC_irq (void)
apic_write(APIC_EOI, 0);
}
-/*
+/*
* Checksum an MP configuration block.
*/
-
+
static int mpf_checksum(unsigned char *mp, int len)
{
int sum=0;
@@ -210,7 +209,7 @@ static int mpf_checksum(unsigned char *mp, int len)
/*
* Processor encoding in an MP configuration block
*/
-
+
static char *mpc_family(int family,int model)
{
static char n[32];
@@ -274,11 +273,11 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc))
/* set the local APIC address */
apic_addr = (unsigned long)phys_to_virt((unsigned long)mpc->mpc_lapic);
-
+
/*
* Now process the configuration blocks.
*/
-
+
while(count<mpc->mpc_length)
{
switch(*mpt)
@@ -290,13 +289,13 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc))
if(m->mpc_cpuflag&CPU_ENABLED)
{
printk("Processor #%d %s APIC version %d\n",
- m->mpc_apicid,
+ m->mpc_apicid,
mpc_family((m->mpc_cpufeature&
CPU_FAMILY_MASK)>>8,
(m->mpc_cpufeature&
CPU_MODEL_MASK)>>4),
m->mpc_apicver);
-#ifdef SMP_DEBUG
+#ifdef SMP_DEBUG
if(m->mpc_featureflag&(1<<0))
printk(" Floating point unit present.\n");
if(m->mpc_featureflag&(1<<7))
@@ -305,7 +304,7 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc))
printk(" 64 bit compare & exchange supported.\n");
if(m->mpc_featureflag&(1<<9))
printk(" Internal APIC present.\n");
-#endif
+#endif
if(m->mpc_cpuflag&CPU_BOOTPROCESSOR)
{
SMP_PRINTK((" Bootup CPU\n"));
@@ -313,10 +312,10 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc))
}
else /* Boot CPU already counted */
num_processors++;
-
+
if(m->mpc_apicid>NR_CPUS)
printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS);
- else
+ else
{
cpu_present_map|=(1<<m->mpc_apicid);
apic_version[m->mpc_apicid]=m->mpc_apicver;
@@ -337,7 +336,7 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc))
str));
mpt+=sizeof(*m);
count+=sizeof(*m);
- break;
+ break;
}
case MP_IOAPIC:
{
@@ -346,20 +345,20 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc))
if(m->mpc_flags&MPC_APIC_USABLE)
{
apics++;
- printk("I/O APIC #%d Version %d at 0x%lX.\n",
- m->mpc_apicid,m->mpc_apicver,
- m->mpc_apicaddr);
- io_apic_addr = (unsigned long)phys_to_virt(m->mpc_apicaddr);
- }
- mpt+=sizeof(*m);
- count+=sizeof(*m);
- break;
+ printk("I/O APIC #%d Version %d at 0x%lX.\n",
+ m->mpc_apicid,m->mpc_apicver,
+ m->mpc_apicaddr);
+ io_apic_addr = (unsigned long)phys_to_virt(m->mpc_apicaddr);
+ }
+ mpt+=sizeof(*m);
+ count+=sizeof(*m);
+ break;
}
case MP_INTSRC:
{
struct mpc_config_intsrc *m=
(struct mpc_config_intsrc *)mpt;
-
+
mpt+=sizeof(*m);
count+=sizeof(*m);
break;
@@ -376,29 +375,29 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc))
}
if(apics>1)
printk("Warning: Multiple APIC's not supported.\n");
- return num_processors;
+ return num_processors;
}
/*
* Scan the memory blocks for an SMP configuration block.
*/
-
+
__initfunc(int smp_scan_config(unsigned long base, unsigned long length))
{
unsigned long *bp=phys_to_virt(base);
struct intel_mp_floating *mpf;
-
+
SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n",
bp,length));
if(sizeof(*mpf)!=16)
printk("Error: MPF size\n");
-
+
while(length>0)
{
if(*bp==SMP_MAGIC_IDENT)
{
mpf=(struct intel_mp_floating *)bp;
- if(mpf->mpf_length==1 &&
+ if(mpf->mpf_length==1 &&
!mpf_checksum((unsigned char *)bp,16) &&
(mpf->mpf_specification == 1
|| mpf->mpf_specification == 4) )
@@ -433,7 +432,7 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length))
* We know that page 0 is not
* used. Steal it for now!
*/
-
+
cfg=pg0[0];
pg0[0] = (apic_addr | 7);
local_flush_tlb();
@@ -451,7 +450,7 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length))
*
* END OF HACK END OF HACK END OF HACK END OF HACK END OF HACK
*
- */
+ */
/*
* 2 CPUs, numbered 0 & 1.
*/
@@ -513,6 +512,7 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length))
nlong = boot_cpu_id<<24; /* Dummy 'self' for bootup */
cpu_logical_map[0] = boot_cpu_id;
global_irq_holder = boot_cpu_id;
+ current->processor = boot_cpu_id;
printk("Processors: %d\n", num_processors);
/*
@@ -534,61 +534,37 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length))
extern unsigned char trampoline_data [];
extern unsigned char trampoline_end [];
+static unsigned char *trampoline_base;
/*
* Currently trivial. Write the real->protected mode
* bootstrap into the page concerned. The caller
* has made sure it's suitably aligned.
*/
-
-__initfunc(static void install_trampoline(unsigned char *mp))
+
+__initfunc(static unsigned long setup_trampoline(void))
{
- memcpy(mp, trampoline_data, trampoline_end - trampoline_data);
+ memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+ return virt_to_phys(trampoline_base);
}
/*
- * We are called very early to get the low memory for the trampoline/kernel stacks
- * This has to be done by mm/init.c to parcel us out nice low memory. We allocate
- * the kernel stacks at 4K, 8K, 12K... currently (0-03FF is preserved for SMM and
- * other things).
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
*/
-
__initfunc(unsigned long smp_alloc_memory(unsigned long mem_base))
{
- int size=(num_processors-1)*PAGE_SIZE; /* Number of stacks needed */
-
- /*
- * Our stacks have to be below the 1Mb line, and mem_base on entry
- * is 4K aligned.
- */
-
- if(virt_to_phys((void *)(mem_base+size))>=0x9F000)
- panic("smp_alloc_memory: Insufficient low memory for kernel stacks 0x%lx.\n", mem_base);
- kstack_base=(void *)mem_base;
- mem_base+=size;
- kstack_end=(void *)mem_base;
- return mem_base;
-}
-
-/*
- * Hand out stacks one at a time.
- */
-
-__initfunc(static void *get_kernel_stack(void))
-{
- void *stack=kstack_base;
- if(kstack_base>=kstack_end)
- return NULL;
- kstack_base+=PAGE_SIZE;
- return stack;
+ if (virt_to_phys((void *)mem_base) >= 0x9F000)
+ panic("smp_alloc_memory: Insufficient low memory for kernel trampoline 0x%lx.\n", mem_base);
+ trampoline_base = (void *)mem_base;
+ return mem_base + PAGE_SIZE;
}
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
*/
-
+
__initfunc(void smp_store_cpu_info(int id))
{
struct cpuinfo_x86 *c=&cpu_data[id];
@@ -615,7 +591,7 @@ __initfunc(void smp_store_cpu_info(int id))
* fired off. This allows the BP to have everything in order [we hope].
* At the end of this all the AP's will hit the system scheduling and off
* we go. Each AP will load the system gdt's and jump through the kernel
- * init into idle(). At this point the scheduler will one day take over
+ * init into idle(). At this point the scheduler will one day take over
* and give them jobs to do. smp_callin is a standard routine
* we use to track CPU's as they power up.
*/
@@ -634,74 +610,276 @@ __initfunc(void smp_callin(void))
extern void calibrate_delay(void);
int cpuid=GET_APIC_ID(apic_read(APIC_ID));
unsigned long l;
-
+
/*
* Activate our APIC
*/
-
- SMP_PRINTK(("CALLIN %d\n",smp_processor_id()));
+
+ SMP_PRINTK(("CALLIN %d %d\n",hard_smp_processor_id(), smp_processor_id()));
l=apic_read(APIC_SPIV);
l|=(1<<8); /* Enable */
apic_write(APIC_SPIV,l);
/*
- * Set up our APIC timer.
+ * Set up our APIC timer.
*/
setup_APIC_clock ();
sti();
/*
* Get our bogomips.
- */
+ */
calibrate_delay();
SMP_PRINTK(("Stack at about %p\n",&cpuid));
-
+
/*
* Save our processor parameters
*/
smp_store_cpu_info(cpuid);
+
/*
* Allow the master to continue.
- */
+ */
set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]);
+}
+
+static int cpucount = 0;
+
+extern int cpu_idle(void * unused);
+
+/*
+ * Activate a secondary processor.
+ */
+__initfunc(int start_secondary(void *unused))
+{
+ smp_callin();
+ while (!smp_commenced)
+ barrier();
+ return cpu_idle(NULL);
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPU's - they just need to reload everything
+ * from the task structure
+ */
+__initfunc(void initialize_secondary(void))
+{
+ struct thread_struct * p = &current->tss;
+
/*
- * Until we are ready for SMP scheduling
+ * We don't actually need to load the full TSS,
+ * basically just the stack pointer and the eip.
*/
- load_ldt(0);
- local_flush_tlb();
-
- while (cpu_number_map[cpuid] == -1)
- barrier();
+ asm volatile("lldt %%ax": :"a" (p->ldt));
+ asm volatile("ltr %%ax": :"a" (p->tr));
+ asm volatile(
+ "movl %0,%%esp\n\t"
+ "jmp *%1"
+ :
+ :"r" (p->esp),"r" (p->eip));
+}
- while(!task[cpuid] || current_set[cpuid] != task[cpu_number_map[cpuid]])
- barrier();
+extern struct {
+ void * esp;
+ unsigned short ss;
+} stack_start;
- local_flush_tlb();
- load_TR(cpu_number_map[cpuid]);
+__initfunc(static void do_boot_cpu(int i))
+{
+ unsigned long cfg;
+ pgd_t maincfg;
+ struct task_struct *idle;
+ unsigned long send_status, accept_status;
+ int timeout, num_starts, j;
+ unsigned long start_eip;
- while(!smp_commenced)
- barrier();
-
+ /*
+ * We need an idle process for each processor.
+ */
+
+ kernel_thread(start_secondary, NULL, CLONE_PID);
+ cpucount++;
+
+ idle = task[cpucount];
+ if (!idle)
+ panic("No idle process for CPU %d\n", i);
+
+ idle->processor = i;
+ cpu_logical_map[cpucount] = i;
+ cpu_number_map[i] = cpucount;
+
+ /* start_eip had better be page-aligned! */
+ start_eip = setup_trampoline();
+
+ printk("Booting processor %d eip %lx: ", i, start_eip); /* So we see what's up */
+ stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
+
+ /*
+ * This grunge runs the startup process for
+ * the targeted processor.
+ */
+
+ SMP_PRINTK(("Setting warm reset code and vector.\n"));
+
+ CMOS_WRITE(0xa, 0xf);
local_flush_tlb();
+ SMP_PRINTK(("1.\n"));
+ *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4;
+ SMP_PRINTK(("2.\n"));
+ *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
+ SMP_PRINTK(("3.\n"));
+
+ maincfg=swapper_pg_dir[0];
+ ((unsigned long *)swapper_pg_dir)[0]=0x102007;
+
+ /*
+ * Be paranoid about clearing APIC errors.
+ */
+
+ if ( apic_version[i] & 0xF0 )
+ {
+ apic_write(APIC_ESR, 0);
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ }
+
+ /*
+ * Status is now clean
+ */
- SMP_PRINTK(("Commenced..\n"));
+ send_status = 0;
+ accept_status = 0;
+
+ /*
+ * Starting actual IPI sequence...
+ */
+
+ SMP_PRINTK(("Asserting INIT.\n"));
+
+ /*
+ * Turn INIT on
+ */
+
+ cfg=apic_read(APIC_ICR2);
+ cfg&=0x00FFFFFF;
+ apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
+ cfg=apic_read(APIC_ICR);
+ cfg&=~0xCDFFF; /* Clear bits */
+ cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG
+ | APIC_DEST_ASSERT | APIC_DEST_DM_INIT);
+ apic_write(APIC_ICR, cfg); /* Send IPI */
+
+ udelay(200);
+ SMP_PRINTK(("Deasserting INIT.\n"));
+
+ cfg=apic_read(APIC_ICR2);
+ cfg&=0x00FFFFFF;
+ apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
+ cfg=apic_read(APIC_ICR);
+ cfg&=~0xCDFFF; /* Clear bits */
+ cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG
+ | APIC_DEST_DM_INIT);
+ apic_write(APIC_ICR, cfg); /* Send IPI */
+
+ /*
+ * Should we send STARTUP IPIs ?
+ *
+ * Determine this based on the APIC version.
+ * If we don't have an integrated APIC, don't
+ * send the STARTUP IPIs.
+ */
+
+ if ( apic_version[i] & 0xF0 )
+ num_starts = 2;
+ else
+ num_starts = 0;
+
+ /*
+ * Run STARTUP IPI loop.
+ */
+
+ for (j = 1; !(send_status || accept_status)
+ && (j <= num_starts) ; j++)
+ {
+ SMP_PRINTK(("Sending STARTUP #%d.\n",j));
+ apic_write(APIC_ESR, 0);
+ SMP_PRINTK(("After apic_write.\n"));
+
+ /*
+ * STARTUP IPI
+ */
+
+ cfg=apic_read(APIC_ICR2);
+ cfg&=0x00FFFFFF;
+ apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
+ cfg=apic_read(APIC_ICR);
+ cfg&=~0xCDFFF; /* Clear bits */
+ cfg |= (APIC_DEST_FIELD
+ | APIC_DEST_DM_STARTUP
+ | (start_eip >> 12)); /* Boot on the stack */
+ SMP_PRINTK(("Before start apic_write.\n"));
+ apic_write(APIC_ICR, cfg); /* Kick the second */
+
+ SMP_PRINTK(("Startup point 1.\n"));
+ timeout = 0;
+ do {
+ SMP_PRINTK(("Sleeping.\n")); udelay(1000000);
+ udelay(10);
+ } while ( (send_status = (apic_read(APIC_ICR) & 0x1000))
+ && (timeout++ < 1000));
+ udelay(200);
+ accept_status = (apic_read(APIC_ESR) & 0xEF);
+ }
+ SMP_PRINTK(("After Startup.\n"));
+
+ if (send_status) /* APIC never delivered?? */
+ printk("APIC never delivered???\n");
+ if (accept_status) /* Send accept error */
+ printk("APIC delivery error (%lx).\n", accept_status);
+
+ if( !(send_status || accept_status) )
+ {
+ for(timeout=0;timeout<50000;timeout++)
+ {
+ if(cpu_callin_map[0]&(1<<i))
+ break; /* It has booted */
+ udelay(100); /* Wait 5s total for a response */
+ }
+ if(cpu_callin_map[0]&(1<<i))
+ {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+#if 0
+ cpu_number_map[i] = cpucount;
+ cpu_logical_map[cpucount] = i;
+#endif
+ }
+ else
+ {
+ if(*((volatile unsigned char *)phys_to_virt(8192))==0xA5)
+ printk("Stuck ??\n");
+ else
+ printk("Not responding.\n");
+ }
+ }
+ SMP_PRINTK(("CPU has booted.\n"));
+
+ swapper_pg_dir[0]=maincfg;
local_flush_tlb();
- sti();
+
+ /* mark "stuck" area as not stuck */
+ *((volatile unsigned long *)phys_to_virt(8192)) = 0;
}
+
/*
* Cycle through the processors sending APIC IPI's to boot each.
*/
-
+
__initfunc(void smp_boot_cpus(void))
{
int i;
- int cpucount=0;
unsigned long cfg;
- pgd_t maincfg;
- void *stack;
- extern unsigned long init_user_stack[];
-
+
/*
* Initialize the logical to physical cpu number mapping
*/
@@ -712,12 +890,10 @@ __initfunc(void smp_boot_cpus(void))
/*
* Setup boot CPU information
*/
-
- kernel_stacks[boot_cpu_id]=(void *)init_user_stack; /* Set up for boot processor first */
smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */
- cpu_present_map |= (1 << smp_processor_id());
+ cpu_present_map |= (1 << hard_smp_processor_id());
cpu_number_map[boot_cpu_id] = 0;
active_kernel_processor=boot_cpu_id;
@@ -744,11 +920,11 @@ __initfunc(void smp_boot_cpus(void))
*/
apic_reg = ioremap(apic_addr,4096);
-
+
if(apic_reg == NULL)
panic("Unable to map local apic.\n");
-
-#ifdef SMP_DEBUG
+
+#ifdef SMP_DEBUG
{
int reg;
@@ -785,11 +961,11 @@ __initfunc(void smp_boot_cpus(void))
SMP_PRINTK(("Getting LVT1: %x\n", reg));
}
#endif
-
+
/*
* Enable the local APIC
*/
-
+
cfg=apic_read(APIC_SPIV);
cfg|=(1<<8); /* Enable APIC */
apic_write(APIC_SPIV,cfg);
@@ -798,15 +974,15 @@ __initfunc(void smp_boot_cpus(void))
/*
* Set up our local APIC timer:
- */
+ */
setup_APIC_clock ();
/*
* Now scan the cpu present map and fire up the other CPUs.
*/
-
+
SMP_PRINTK(("CPU map: %lx\n", cpu_present_map));
-
+
for(i=0;i<NR_CPUS;i++)
{
/*
@@ -814,213 +990,17 @@ __initfunc(void smp_boot_cpus(void))
*/
if (i == boot_cpu_id)
continue;
-
+
if ((cpu_present_map & (1 << i))
&& (max_cpus < 0 || max_cpus > cpucount+1))
{
- unsigned long send_status, accept_status;
- int timeout, num_starts, j;
-
- /*
- * We need a kernel stack for each processor.
- */
-
- stack=get_kernel_stack(); /* We allocated these earlier */
- if(stack==NULL)
- panic("No memory for processor stacks.\n");
-
- kernel_stacks[i]=(void *)phys_to_virt((unsigned long)stack);
- install_trampoline(stack);
-
- printk("Booting processor %d stack %p: ",i,stack); /* So we set what's up */
-
- /*
- * This grunge runs the startup process for
- * the targeted processor.
- */
-
- SMP_PRINTK(("Setting warm reset code and vector.\n"));
-
- /*
- * Install a writable page 0 entry.
- */
-
- cfg=pg0[0];
-
- CMOS_WRITE(0xa, 0xf);
- pg0[0]=7;
- local_flush_tlb();
- SMP_PRINTK(("1.\n"));
- *((volatile unsigned short *) phys_to_virt(0x469)) = ((unsigned long)stack)>>4;
- SMP_PRINTK(("2.\n"));
- *((volatile unsigned short *) phys_to_virt(0x467)) = 0;
- SMP_PRINTK(("3.\n"));
-
- /*
- * Protect it again
- */
-
- pg0[0]= cfg;
- local_flush_tlb();
-
- /* walken modif
- * enable mapping of the first 4M at virtual
- * address zero
- */
-
- maincfg=swapper_pg_dir[0];
- ((unsigned long *)swapper_pg_dir)[0]=0x102007;
-
- /* no need to local_flush_tlb :
- we are setting this up for the slave processor ! */
-
- /*
- * Be paranoid about clearing APIC errors.
- */
-
- if ( apic_version[i] & 0xF0 )
- {
- apic_write(APIC_ESR, 0);
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- }
-
- /*
- * Status is now clean
- */
-
- send_status = 0;
- accept_status = 0;
-
- /*
- * Starting actual IPI sequence...
- */
-
- SMP_PRINTK(("Asserting INIT.\n"));
-
- /*
- * Turn INIT on
- */
-
- cfg=apic_read(APIC_ICR2);
- cfg&=0x00FFFFFF;
- apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
- cfg=apic_read(APIC_ICR);
- cfg&=~0xCDFFF; /* Clear bits */
- cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG
- | APIC_DEST_ASSERT | APIC_DEST_DM_INIT);
- apic_write(APIC_ICR, cfg); /* Send IPI */
-
- udelay(200);
- SMP_PRINTK(("Deasserting INIT.\n"));
-
- cfg=apic_read(APIC_ICR2);
- cfg&=0x00FFFFFF;
- apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
- cfg=apic_read(APIC_ICR);
- cfg&=~0xCDFFF; /* Clear bits */
- cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG
- | APIC_DEST_DM_INIT);
- apic_write(APIC_ICR, cfg); /* Send IPI */
-
- /*
- * Should we send STARTUP IPIs ?
- *
- * Determine this based on the APIC version.
- * If we don't have an integrated APIC, don't
- * send the STARTUP IPIs.
- */
-
- if ( apic_version[i] & 0xF0 )
- num_starts = 2;
- else
- num_starts = 0;
-
- /*
- * Run STARTUP IPI loop.
- */
-
- for (j = 1; !(send_status || accept_status)
- && (j <= num_starts) ; j++)
- {
- SMP_PRINTK(("Sending STARTUP #%d.\n",j));
-
- apic_write(APIC_ESR, 0);
- SMP_PRINTK(("After apic_write.\n"));
-
- /*
- * STARTUP IPI
- */
-
- cfg=apic_read(APIC_ICR2);
- cfg&=0x00FFFFFF;
- apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
- cfg=apic_read(APIC_ICR);
- cfg&=~0xCDFFF; /* Clear bits */
- cfg |= (APIC_DEST_FIELD
- | APIC_DEST_DM_STARTUP
- | (((int)virt_to_phys(stack)) >> 12)); /* Boot on the stack */
- SMP_PRINTK(("Before start apic_write.\n"));
- apic_write(APIC_ICR, cfg); /* Kick the second */
-
- SMP_PRINTK(("Startup point 1.\n"));
- timeout = 0;
- do {
- SMP_PRINTK(("Sleeping.\n")); udelay(1000000);
- udelay(10);
- } while ( (send_status = (apic_read(APIC_ICR) & 0x1000))
- && (timeout++ < 1000));
- udelay(200);
-
- accept_status = (apic_read(APIC_ESR) & 0xEF);
- }
- SMP_PRINTK(("After Startup.\n"));
-
- if (send_status) /* APIC never delivered?? */
- printk("APIC never delivered???\n");
- if (accept_status) /* Send accept error */
- printk("APIC delivery error (%lx).\n", accept_status);
-
- if( !(send_status || accept_status) )
- {
- for(timeout=0;timeout<50000;timeout++)
- {
- if(cpu_callin_map[0]&(1<<i))
- break; /* It has booted */
- udelay(100); /* Wait 5s total for a response */
- }
- if(cpu_callin_map[0]&(1<<i))
- {
- cpucount++;
- /* number CPUs logically, starting from 1 (BSP is 0) */
- cpu_number_map[i] = cpucount;
- cpu_logical_map[cpucount] = i;
- }
- else
- {
- if(*((volatile unsigned char *)phys_to_virt(8192))==0xA5)
- printk("Stuck ??\n");
- else
- printk("Not responding.\n");
- }
- }
- SMP_PRINTK(("CPU has booted.\n"));
-
- /* walken modif
- * restore mapping of the first 4M
- */
-
- swapper_pg_dir[0]=maincfg;
-
- local_flush_tlb();
-
- /* mark "stuck" area as not stuck */
- *((volatile unsigned long *)phys_to_virt(8192)) = 0;
+ do_boot_cpu(i);
}
-
- /*
+
+ /*
* Make sure we unmap all failed CPUs
*/
-
+
if (cpu_number_map[i] == -1)
cpu_present_map &= ~(1 << i);
}
@@ -1056,12 +1036,12 @@ __initfunc(void smp_boot_cpus(void))
/*
* Allow the user to impress friends.
*/
-
+
SMP_PRINTK(("Before bogomips.\n"));
if(cpucount==0)
{
printk("Error: only one processor found.\n");
- cpu_present_map=(1<<smp_processor_id());
+ cpu_present_map=(1<<hard_smp_processor_id());
}
else
{
@@ -1071,8 +1051,8 @@ __initfunc(void smp_boot_cpus(void))
if(cpu_present_map&(1<<i))
bogosum+=cpu_data[i].udelay_val;
}
- printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
- cpucount+1,
+ printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ cpucount+1,
(bogosum+2500)/500000,
((bogosum+2500)/5000)%100);
SMP_PRINTK(("Before bogocount - setting activated=1.\n"));
@@ -1096,7 +1076,7 @@ __initfunc(void smp_boot_cpus(void))
* IDE disk problems), and other messages sent with IRQ's enabled in a civilised fashion. That
* will also boost performance.
*/
-
+
void smp_message_pass(int target, int msg, unsigned long data, int wait)
{
unsigned long flags;
@@ -1109,11 +1089,11 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
/*
* During boot up send no messages
*/
-
+
if(!smp_activated || !smp_commenced)
return;
-
-
+
+
/*
* Skip the reschedule if we are waiting to clear a
* message at this time. The reschedule cannot wait
@@ -1121,7 +1101,7 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
*/
switch (msg) {
- case MSG_RESCHEDULE:
+ case MSG_RESCHEDULE:
irq = 0x30;
if (smp_cpu_in_msg[p])
return;
@@ -1148,21 +1128,21 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
* no data and can occur during a flush.. guess what panic
* I got to notice this bug...
*/
-
+
/*
* We are busy
*/
-
- smp_cpu_in_msg[p]++;
+ smp_cpu_in_msg[p]++;
+
/* printk("SMP message pass #%d to %d of %d\n",
p, msg, target);*/
-
+
/*
* Wait for the APIC to become ready - this should never occur. Its
* a debugging check really.
*/
-
+
while(ct<1000)
{
cfg=apic_read(APIC_ICR);
@@ -1171,14 +1151,14 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
ct++;
udelay(10);
}
-
+
/*
* Just pray... there is nothing more we can do
*/
-
+
if(ct==1000)
printk("CPU #%d: previous IPI still not cleared after 10mS\n", p);
-
+
/*
* Program the APIC to deliver the IPI
*/
@@ -1190,12 +1170,12 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(target)); /* Target chip */
cfg=apic_read(APIC_ICR);
cfg&=~0xFDFFF; /* Clear bits */
- cfg|=APIC_DEST_FIELD|APIC_DEST_DM_FIXED|irq; /* Send an IRQ 13 */
+ cfg|=APIC_DEST_FIELD|APIC_DEST_DM_FIXED|irq; /* Send an IRQ 13 */
/*
* Set the target requirement
*/
-
+
if(target==MSG_ALL_BUT_SELF)
{
cfg|=APIC_DEST_ALLBUT;
@@ -1213,18 +1193,18 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
target_map=(1<<target);
cpu_callin_map[0]=0;
}
-
+
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
-
+
apic_write(APIC_ICR, cfg);
__restore_flags(flags);
-
+
/*
* Spin waiting for completion
*/
-
+
switch(wait)
{
int stuck;
@@ -1247,17 +1227,17 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
clear_bit(p, &smp_invalidate_needed);
--stuck;
if (!stuck) {
- printk("stuck on smp_invalidate_needed IPI wait\n");
+ printk("stuck on smp_invalidate_needed IPI wait (CPU#%d)\n",p);
break;
}
}
break;
}
-
+
/*
* Record our completion
*/
-
+
smp_cpu_in_msg[p]--;
}
@@ -1266,14 +1246,17 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
* even with IRQ's off. We have to avoid a pair of crossing flushes
* or we are doomed. See the notes about smp_message_pass.
*/
-
+
void smp_flush_tlb(void)
{
unsigned long flags;
+
+#if 0
if(smp_activated && smp_processor_id()!=active_kernel_processor) {
printk("CPU #%d:Attempted flush tlb IPI when not AKP(=%d)\n",smp_processor_id(),active_kernel_processor);
*(char *)0=0;
}
+#endif
/* printk("SMI-");*/
/*
@@ -1282,30 +1265,30 @@ void smp_flush_tlb(void)
* may issue a tlb flush. If you break any one of those three change this to an atomic
* bus locked or.
*/
-
+
smp_invalidate_needed=cpu_present_map;
-
+
/*
* Processors spinning on the lock will see this IRQ late. The smp_invalidate_needed map will
* ensure they don't do a spurious flush tlb or miss one.
*/
-
+
__save_flags(flags);
__cli();
smp_message_pass(MSG_ALL_BUT_SELF, MSG_INVALIDATE_TLB, 0L, 2);
-
+
/*
* Flush the local TLB
*/
-
- local_flush_tlb();
+ local_flush_tlb();
+
__restore_flags(flags);
-
+
/*
* Completed.
*/
-
+
/* printk("SMID\n");*/
}
@@ -1315,14 +1298,14 @@ void smp_flush_tlb(void)
*
* We do profiling in every local tick, statistics/rescheduling
* happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing a 4 bytes multiplier
+ * multiplier is 1 and it can be changed by writing the new multiplier
* value into /proc/profile.
*/
unsigned int prof_multiplier[NR_CPUS];
unsigned int prof_counter[NR_CPUS];
-static inline void smp_local_timer_interrupt(struct pt_regs * regs)
+void smp_local_timer_interrupt(struct pt_regs * regs)
{
int cpu = smp_processor_id();
@@ -1367,7 +1350,7 @@ static inline void smp_local_timer_interrupt(struct pt_regs * regs)
kstat.cpu_user += user;
kstat.cpu_system += system;
-
+
} else {
#ifdef __SMP_PROF__
if (test_bit(cpu,&smp_idle_map))
@@ -1386,14 +1369,11 @@ static inline void smp_local_timer_interrupt(struct pt_regs * regs)
* We take the 'long' return path, and there every subsystem
* grabs the apropriate locks (kernel lock/ irq lock).
*
- * FIXME: we want to decouple profiling from the 'long path'.
+ * we might want to decouple profiling from the 'long path',
+ * and do the profiling totally in assembly.
*
* Currently this isnt too much of an issue (performancewise),
* we can take more than 100K local irqs per second on a 100 MHz P5.
- * [ although we notice need_resched too early, thus the way we
- * schedule (deliver signals and handle bhs) changes. ]
- *
- * Possibly we could solve these problems with 'smart irqs'.
*/
}
@@ -1401,6 +1381,9 @@ static inline void smp_local_timer_interrupt(struct pt_regs * regs)
* Local APIC timer interrupt. This is the most natural way for doing
* local interrupts, but local timer interrupts can be emulated by
* broadcast interrupts too. [in case the hw doesnt support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ * interrupt as well. Thus we cannot inline the local irq ... ]
*/
void smp_apic_timer_interrupt(struct pt_regs * regs)
{
@@ -1415,7 +1398,7 @@ void smp_apic_timer_interrupt(struct pt_regs * regs)
smp_local_timer_interrupt(regs);
}
-/*
+/*
* Reschedule call back
*/
asmlinkage void smp_reschedule_interrupt(void)
@@ -1437,11 +1420,11 @@ asmlinkage void smp_reschedule_interrupt(void)
*/
asmlinkage void smp_invalidate_interrupt(void)
{
- if (clear_bit(smp_processor_id(), &smp_invalidate_needed))
+ if (test_and_clear_bit(smp_processor_id(), &smp_invalidate_needed))
local_flush_tlb();
ack_APIC_irq ();
-}
+}
/*
* CPU halt call-back
@@ -1471,11 +1454,11 @@ asmlinkage void smp_stop_cpu_interrupt(void)
* but we do not accept timer interrupts yet. We only allow the BP
* to calibrate.
*/
-static unsigned int get_8254_timer_count (void)
+__initfunc(static unsigned int get_8254_timer_count (void))
{
unsigned int count;
- outb_p(0x00, 0x43);
+ outb_p(0x00, 0x43);
count = inb_p(0x40);
count |= inb_p(0x40) << 8;
@@ -1500,7 +1483,7 @@ static unsigned int get_8254_timer_count (void)
void setup_APIC_timer (unsigned int clocks)
{
- unsigned long lvtt1_value;
+ unsigned long lvtt1_value;
unsigned int tmp_value;
/*
@@ -1508,8 +1491,8 @@ void setup_APIC_timer (unsigned int clocks)
* mode. With the IO APIC we can re-route the external timer
* interrupt and broadcast it as an NMI to all CPUs, so no pain.
*
- * NOTE: this trap vector (0x41) and the gate in BUILD_SMP_TIMER_INTERRUPT
- * should be the same ;)
+ * NOTE: this trap vector (0x41) and the gate in
+ * BUILD_SMP_TIMER_INTERRUPT should be the same ;)
*/
tmp_value = apic_read(APIC_LVTT);
lvtt1_value = APIC_LVT_TIMER_PERIODIC | 0x41;
@@ -1526,7 +1509,7 @@ void setup_APIC_timer (unsigned int clocks)
apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
}
-void wait_8254_wraparound (void)
+__initfunc(void wait_8254_wraparound (void))
{
unsigned int curr_count, prev_count=~0;
int delta;
@@ -1560,11 +1543,12 @@ void wait_8254_wraparound (void)
* APIC irq that way.
*/
-int calibrate_APIC_clock (void)
+__initfunc(int calibrate_APIC_clock (void))
{
unsigned long long t1,t2;
long tt1,tt2;
long calibration_result;
+ int i;
printk("calibrating APIC timer ... ");
@@ -1589,10 +1573,12 @@ int calibrate_APIC_clock (void)
RTDSC(t1);
tt1=apic_read(APIC_TMCCT);
+#define LOOPS (HZ/10)
/*
- * lets wait until we get to the next wrapround:
+ * lets wait LOOPS wraprounds:
*/
- wait_8254_wraparound ();
+ for (i=0; i<LOOPS; i++)
+ wait_8254_wraparound ();
tt2=apic_read(APIC_TMCCT);
RTDSC(t2);
@@ -1605,46 +1591,47 @@ int calibrate_APIC_clock (void)
* underflown to be exact, as the timer counts down ;)
*/
- calibration_result = (tt1-tt2)*APIC_DIVISOR;
+ calibration_result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
- printk("\n..... %ld CPU clocks in 1 timer chip tick.\n",
- (unsigned long)(t2-t1));
+ SMP_PRINTK(("\n..... %ld CPU clocks in 1 timer chip tick.",
+ (unsigned long)(t2-t1)/LOOPS));
- printk("..... %ld APIC bus clocks in 1 timer chip tick.\n",
- calibration_result);
+ SMP_PRINTK(("\n..... %ld APIC bus clocks in 1 timer chip tick.",
+ calibration_result));
- printk("..... CPU clock speed is %ld.%ld MHz.\n",
- ((long)(t2-t1))/(1000000/HZ),
- ((long)(t2-t1))%(1000000/HZ) );
+ printk("\n..... CPU clock speed is %ld.%04ld MHz.\n",
+ ((long)(t2-t1)/LOOPS)/(1000000/HZ),
+ ((long)(t2-t1)/LOOPS)%(1000000/HZ) );
- printk("..... APIC bus clock speed is %ld.%ld MHz.\n",
+ printk("..... APIC bus clock speed is %ld.%04ld MHz.\n",
calibration_result/(1000000/HZ),
calibration_result%(1000000/HZ) );
+#undef LOOPS
return calibration_result;
}
static unsigned int calibration_result;
-void setup_APIC_clock (void)
+__initfunc(void setup_APIC_clock (void))
{
int cpu = smp_processor_id();
- unsigned long flags;
+ unsigned long flags;
static volatile int calibration_lock;
save_flags(flags);
cli();
- printk("setup_APIC_clock() called.\n");
+ SMP_PRINTK(("setup_APIC_clock() called.\n"));
/*
* [ setup_APIC_clock() is called from all CPUs, but we want
* to do this part of the setup only once ... and it fits
* here best ]
*/
- if (!set_bit(0,&calibration_lock)) {
+ if (!test_and_set_bit(0,&calibration_lock)) {
calibration_result=calibrate_APIC_clock();
/*
@@ -1656,9 +1643,9 @@ void setup_APIC_clock (void)
/*
* Other CPU is calibrating, wait for finish:
*/
- printk("waiting for other CPU calibrating APIC timer ... ");
+ SMP_PRINTK(("waiting for other CPU calibrating APIC ... "));
while (calibration_lock == 1);
- printk("done, continuing.\n");
+ SMP_PRINTK(("done, continuing.\n"));
}
/*
@@ -1669,16 +1656,8 @@ void setup_APIC_clock (void)
prof_counter[cpu] = prof_multiplier[cpu] = 1;
/*
- * FIXME: i sporadically see booting problems (keyboard irq is
- * lost, looks like the timer irq isnt working or some irq
- * lock is messed up). Once we reboot the bug doesnt showu
- * up anymore.
- *
- * i'm quite certain it's a timing problem/race condition in
- * the bootup logic, not a hw bug. It might have been gone
- * meanwhile, tell me if you see it.
+ * We ACK the APIC, just in case there is something pending.
*/
-
ack_APIC_irq ();
restore_flags(flags);
@@ -1686,7 +1665,7 @@ void setup_APIC_clock (void)
/*
* the frequency of the profiling timer can be changed
- * by writing 4 bytes into /proc/profile.
+ * by writing a multiplier value into /proc/profile.
*
* usually you want to run this on all CPUs ;)
*/
@@ -1697,8 +1676,8 @@ int setup_profiling_timer (unsigned int multiplier)
/*
* Sanity check. [at least 500 APIC cycles should be
- * between APIC interrupts as a rule of thumb, rather be
- * careful as irq flooding renders the system unusable]
+ * between APIC interrupts as a rule of thumb, to avoid
+ * irqs flooding us]
*/
if ( (!multiplier) || (calibration_result/multiplier < 500))
return -EINVAL;
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 6a7d6b461..e45cc7279 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -23,6 +23,7 @@
#include <linux/time.h>
#include <linux/delay.h>
#include <linux/init.h>
+#include <linux/smp.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -379,11 +380,15 @@ static inline void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
do_timer(regs);
/*
* In the SMP case we use the local APIC timer interrupt to do the
- * profiling.
+ * profiling, except when we simulate SMP mode on a uniprocessor
+ * system, in that case we have to call the local interrupt handler.
*/
#ifndef __SMP__
if (!user_mode(regs))
x86_do_profile(regs->eip);
+#else
+ if (!smp_found_config)
+ smp_local_timer_interrupt(regs);
#endif
/*
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S
index 63bc51c5e..d0a726f6b 100644
--- a/arch/i386/kernel/trampoline.S
+++ b/arch/i386/kernel/trampoline.S
@@ -21,13 +21,9 @@
* and IP is zero. Thus, data addresses need to be absolute
* (no relocation) and are taken with regard to r_base.
*
- * On the transition to protected mode, this page appears at
- * address 8192, so protected mode addresses are with regard
- * to p_base.
- *
* If you work on this file, check the object module with objdump
* --full-contents --reloc to make sure there are no relocation
- * entries.
+ * entries except for the gdt one..
*/
#include <linux/linkage.h>
@@ -39,15 +35,10 @@
ENTRY(trampoline_data)
r_base = .
-p_base = . - 8192
mov %cs, %ax # Code and data in the same place
mov %ax, %ds
- mov %ax, %cx # Pass stack info to the 32bit boot
- shl $4, %cx # Segment -> Offset
- add $4096, %cx # End of page is wanted
-
mov $1, %bx # Flag an SMP trampoline
cli # We should be safe anyway
@@ -71,37 +62,7 @@ idt_48:
gdt_48:
.word 0x0800 # gdt limit = 2048, 256 GDT entries
- .word gdt - p_base, 0x0 # gdt base = gdt (first SMP CPU)
- # we load the others with first table
- # saves rewriting gdt_48 for each
-gdt:
- .word 0, 0, 0, 0 # dummy
-
- .word 0, 0, 0, 0 # unused
-
-# walken modif
-
- .word 0xFFFF # 4 Gb - (0x100000*0x1000 = 4Gb)
- .word 0x0000 # base address = 0
- .word 0x9A00 # code read / exec
- .word 0x00CF # granularity = 4096, 386 (+5th nibble of limit)
-
- .word 0xFFFF # 4 Gb - (0x100000*0x1000 = 4Gb)
- .word 0x0000 # base address = 0
- .word 0x9200 # data read / write
- .word 0x00CF # granularity = 4096, 386 (+5th nibble of limit)
-
-# walken modif
-
-# .word 0x07FF # 8 Mb - limit = 2047 (2048 * 4096 = 8 Mb)
-# .word 0x0000 # base address = 0
-# .word 0x9A00 # code read / exec
-# .word 0x00C0 # granularity = 4096, 386
-
-# .word 0x07FF # 8 Mb - limit = 2047 (2048 * 4096 = 8 Mb)
-# .word 0x0000 # base address = 0
-# .word 0x9200 # data read / write
-# .word 0x00C0 # granularity = 4096, 386
+ .long gdt-0xc0000000 # gdt base = gdt (first SMP CPU)
.globl SYMBOL_NAME(trampoline_end)
SYMBOL_NAME_LABEL(trampoline_end)
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 905cf5b13..696e37004 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -23,10 +23,12 @@
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
+#include <linux/delay.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
+#include <asm/spinlock.h>
asmlinkage int system_call(void);
asmlinkage void lcall7(void);
@@ -121,7 +123,7 @@ static void show_registers(struct pt_regs *regs)
unsigned long esp;
unsigned short ss;
unsigned long *stack, addr, module_start, module_end;
- extern char start_kernel, _etext;
+ extern char _stext, _etext;
esp = (unsigned long) &regs->esp;
ss = KERNEL_DS;
@@ -129,8 +131,8 @@ static void show_registers(struct pt_regs *regs)
esp = regs->esp;
ss = regs->xss & 0xffff;
}
- printk("CPU: %d\n", smp_processor_id());
- printk("EIP: %04x:[<%08lx>]\nEFLAGS: %08lx\n", 0xffff & regs->xcs,regs->eip,regs->eflags);
+ printk("CPU: %d\nEIP: %04x:[<%08lx>]\nEFLAGS: %08lx\n",
+ smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags);
printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
regs->eax, regs->ebx, regs->ecx, regs->edx);
printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
@@ -138,10 +140,8 @@ static void show_registers(struct pt_regs *regs)
printk("ds: %04x es: %04x ss: %04x\n",
regs->xds & 0xffff, regs->xes & 0xffff, ss);
store_TR(i);
- if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page)
- printk("Corrupted stack page\n");
printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\nStack: ",
- current->comm, current->pid, 0xffff & i, current->kernel_stack_page);
+ current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current);
stack = (unsigned long *) esp;
for(i=0; i < kstack_depth_to_print; i++) {
if (((long) stack & 4095) == 0)
@@ -166,7 +166,7 @@ static void show_registers(struct pt_regs *regs)
* down the cause of the crash will be able to figure
* out the call path that was taken.
*/
- if (((addr >= (unsigned long) &start_kernel) &&
+ if (((addr >= (unsigned long) &_stext) &&
(addr <= (unsigned long) &_etext)) ||
((addr >= module_start) && (addr <= module_end))) {
if (i && ((i % 8) == 0))
@@ -181,13 +181,19 @@ static void show_registers(struct pt_regs *regs)
printk("\n");
}
+spinlock_t die_lock;
+
/*static*/ void die_if_kernel(const char * str, struct pt_regs * regs, long err)
{
if ((regs->eflags & VM_MASK) || (3 & regs->xcs) == 3)
return;
console_verbose();
+ spin_lock_irq(&die_lock);
printk("%s: %04lx\n", str, err & 0xffff);
show_registers(regs);
+do { int i=2000000000; while (i) i--; } while (0);
+do { int i=2000000000; while (i) i--; } while (0);
+ spin_unlock_irq(&die_lock);
do_exit(SIGSEGV);
}
@@ -235,18 +241,45 @@ out:
unlock_kernel();
}
-asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
{
- printk("NMI\n"); show_registers(regs);
-#ifdef CONFIG_SMP_NMI_INVAL
- smp_flush_tlb_rcv();
-#else
-#ifndef CONFIG_IGNORE_NMI
printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
- printk("You probably have a hardware problem with your RAM chips or a\n");
- printk("power saving mode enabled.\n");
-#endif
-#endif
+ printk("You probably have a hardware problem with your RAM chips\n");
+}
+
+static void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+ unsigned long i;
+
+ printk("NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+
+ /* Re-enable the IOCK line, wait for a few seconds */
+ reason |= 8;
+ outb(reason, 0x61);
+ i = 2000;
+ while (--i) udelay(1000);
+ reason &= ~8;
+ outb(reason, 0x61);
+}
+
+static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+{
+ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+ printk("Dazed and confused, but trying to continue\n");
+ printk("Do you have a strange power saving mode enabled?\n");
+}
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+ unsigned char reason = inb(0x61);
+
+ if (reason & 0x80)
+ mem_parity_error(reason, regs);
+ if (reason & 0x40)
+ io_check_error(reason, regs);
+ if (!(reason & 0xc0))
+ unknown_nmi_error(reason, regs);
}
asmlinkage void do_debug(struct pt_regs * regs, long error_code)
@@ -380,15 +413,7 @@ __initfunc(void trap_init(void))
{
int i;
struct desc_struct * p;
- static int smptrap=0;
-
- if(smptrap)
- {
- __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
- load_ldt(0);
- return;
- }
- smptrap++;
+
if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
EISA_bus = 1;
set_call_gate(&default_ldt,lcall7);
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index bfba24327..a09fa6419 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -81,8 +81,8 @@ asmlinkage struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs)
printk("vm86: could not access userspace vm86_info\n");
do_exit(SIGSEGV);
}
- current->tss.esp0 = current->saved_kernel_stack;
- current->saved_kernel_stack = 0;
+ current->tss.esp0 = current->tss.saved_esp0;
+ current->tss.saved_esp0 = 0;
ret = KVM86->regs32;
unlock_kernel();
return ret;
@@ -137,7 +137,7 @@ asmlinkage int sys_vm86old(struct vm86_struct * v86)
lock_kernel();
tsk = current;
- if (tsk->saved_kernel_stack)
+ if (tsk->tss.saved_esp0)
goto out;
tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
@@ -187,7 +187,7 @@ asmlinkage int sys_vm86(unsigned long subfunction, struct vm86plus_struct * v86)
/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
ret = -EPERM;
- if (tsk->saved_kernel_stack)
+ if (tsk->tss.saved_esp0)
goto out;
tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
@@ -247,7 +247,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
* Save old state, set default return value (%eax) to 0
*/
info->regs32->eax = 0;
- tsk->saved_kernel_stack = tsk->tss.esp0;
+ tsk->tss.saved_esp0 = tsk->tss.esp0;
tsk->tss.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
tsk->tss.screen_bitmap = info->screen_bitmap;
@@ -601,11 +601,17 @@ static inline void free_vm86_irq(int irqnumber)
static inline int task_valid(struct task_struct *tsk)
{
struct task_struct *p;
+ int ret = 0;
+ read_lock(&tasklist_lock);
for_each_task(p) {
- if ((p == tsk) && (p->sig)) return 1;
+ if ((p == tsk) && (p->sig)) {
+ ret = 1;
+ break;
+ }
}
- return 0;
+ read_unlock(&tasklist_lock);
+ return ret;
}
static inline void handle_irq_zombies(void)