From d8d9b8f76f22b7a16a83e261e64f89ee611f49df Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 1 Jun 1997 03:16:17 +0000 Subject: Initial revision --- arch/i386/kernel/Makefile | 2 +- arch/i386/kernel/bios32.c | 72 ++-- arch/i386/kernel/entry.S | 81 ++--- arch/i386/kernel/head.S | 136 ++++---- arch/i386/kernel/i386_ksyms.c | 10 +- arch/i386/kernel/init_task.c | 22 ++ arch/i386/kernel/irq.c | 271 +++++++-------- arch/i386/kernel/irq.h | 187 +--------- arch/i386/kernel/process.c | 26 +- arch/i386/kernel/ptrace.c | 28 +- arch/i386/kernel/setup.c | 14 +- arch/i386/kernel/signal.c | 10 + arch/i386/kernel/smp.c | 773 ++++++++++++++++++++---------------------- arch/i386/kernel/time.c | 7 +- arch/i386/kernel/trampoline.S | 43 +-- arch/i386/kernel/traps.c | 77 +++-- arch/i386/kernel/vm86.c | 20 +- 17 files changed, 808 insertions(+), 971 deletions(-) create mode 100644 arch/i386/kernel/init_task.c (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index e04fb5efb..9491ef562 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -15,7 +15,7 @@ else $(CC) -D__ASSEMBLY__ -traditional -c $< -o $*.o endif -all: kernel.o head.o +all: kernel.o head.o init_task.o O_TARGET := kernel.o O_OBJS := process.o signal.o entry.o traps.o irq.o vm86.o bios32.o \ diff --git a/arch/i386/kernel/bios32.c b/arch/i386/kernel/bios32.c index e128000c3..157e62b2d 100644 --- a/arch/i386/kernel/bios32.c +++ b/arch/i386/kernel/bios32.c @@ -1,6 +1,8 @@ /* * bios32.c - BIOS32, PCI BIOS functions. * + * $Id: bios32.c,v 1.11 1997/05/07 13:35:21 mj Exp $ + * * Sponsored by * iX Multiuser Multitasking Magazine * Hannover, Germany @@ -52,6 +54,11 @@ * Feb 3, 1997 : Set internal functions to static, save/restore flags * avoid dead locks reading broken PCI BIOS, werner@suse.de * + * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS + * (mj@atrey.karlin.mff.cuni.cz) + * + * May 7, 1997 : Added some missing cli()'s. [mj] + * */ #include @@ -158,7 +165,7 @@ static unsigned long bios32_service(unsigned long service) unsigned long entry; /* %edx */ unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%edi)" : "=a" (return_code), "=b" (address), @@ -173,10 +180,10 @@ static unsigned long bios32_service(unsigned long service) case 0: return address + entry; case 0x80: /* Not present */ - printk("bios32_service(%ld) : not present\n", service); + printk("bios32_service(0x%lx) : not present\n", service); return 0; default: /* Shouldn't happen */ - printk("bios32_service(%ld) : returned 0x%x, mail drew@colorado.edu\n", + printk("bios32_service(0x%lx) : returned 0x%x, mail drew@colorado.edu\n", service, return_code); return 0; } @@ -189,7 +196,7 @@ static struct { } pci_indirect = { 0, KERNEL_CS }; -__initfunc(static unsigned long check_pcibios(unsigned long memory_start, unsigned long memory_end)) +__initfunc(static int check_pcibios(void)) { unsigned long signature; unsigned char present_status; @@ -201,7 +208,7 @@ __initfunc(static unsigned long check_pcibios(unsigned long memory_start, unsign if ((pcibios_entry = bios32_service(PCI_SERVICE))) { pci_indirect.address = pcibios_entry | PAGE_OFFSET; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%edi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -212,7 +219,7 @@ __initfunc(static unsigned long check_pcibios(unsigned long memory_start, unsign : "1" (PCIBIOS_PCI_BIOS_PRESENT), "D" (&pci_indirect) : "bx", "cx"); - restore_flags(flags); + restore_flags(flags); present_status = (pack >> 16) & 0xff; major_revision = (pack >> 8) & 0xff; @@ -232,9 +239,10 @@ __initfunc(static unsigned long check_pcibios(unsigned long memory_start, unsign if (pcibios_entry) { printk ("pcibios_init : PCI BIOS revision %x.%02x entry at 0x%lx\n", major_revision, minor_revision, pcibios_entry); + return 1; } } - return memory_start; + return 0; } @@ -245,7 +253,7 @@ static int pci_bios_find_class (unsigned int class_code, unsigned short index, unsigned long ret; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__ ("lcall (%%edi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -270,7 +278,7 @@ static int pci_bios_find_device (unsigned short vendor, unsigned short device_id unsigned short ret; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%edi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -295,7 +303,7 @@ static int pci_bios_read_config_byte(unsigned char bus, unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -317,7 +325,7 @@ static int pci_bios_read_config_word (unsigned char bus, unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -339,7 +347,7 @@ static int pci_bios_read_config_dword (unsigned char bus, unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -361,7 +369,7 @@ static int pci_bios_write_config_byte (unsigned char bus, unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -383,7 +391,7 @@ static int pci_bios_write_config_word (unsigned char bus, unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -405,7 +413,7 @@ static int pci_bios_write_config_dword (unsigned char bus, unsigned long bx = (bus << 8) | device_fn; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); __asm__("lcall (%%esi)\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" @@ -476,7 +484,7 @@ static int pci_direct_find_class (unsigned int class_code, unsigned short index, struct pci_dev *dev; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); for (dev = pci_devices; dev; dev = dev->next) { if (dev->class == class_code) { if (curr == index) { @@ -502,7 +510,7 @@ static int pci_conf1_read_config_byte(unsigned char bus, unsigned char device_fn { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); switch (where & 3) { case 0: *value = inb(0xCFC); @@ -523,7 +531,7 @@ static int pci_conf1_read_config_word (unsigned char bus, { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); if (where & 2) *value = inw(0xCFE); @@ -538,7 +546,7 @@ static int pci_conf1_read_config_dword (unsigned char bus, unsigned char device_ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); *value = inl(0xCFC); restore_flags(flags); @@ -550,7 +558,7 @@ static int pci_conf1_write_config_byte (unsigned char bus, unsigned char device_ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); outb(value, 0xCFC); restore_flags(flags); @@ -562,7 +570,7 @@ static int pci_conf1_write_config_word (unsigned char bus, unsigned char device_ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); outw(value, 0xCFC); restore_flags(flags); @@ -574,7 +582,7 @@ static int pci_conf1_write_config_dword (unsigned char bus, unsigned char device { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outl(CONFIG_CMD(bus,device_fn,where), 0xCF8); outl(value, 0xCFC); restore_flags(flags); @@ -610,7 +618,7 @@ static int pci_conf2_read_config_byte(unsigned char bus, unsigned char device_fn if (device_fn & 0x80) return PCIBIOS_DEVICE_NOT_FOUND; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); *value = inb(IOADDR(device_fn,where)); @@ -626,7 +634,7 @@ static int pci_conf2_read_config_word (unsigned char bus, unsigned char device_f if (device_fn & 0x80) return PCIBIOS_DEVICE_NOT_FOUND; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); *value = inw(IOADDR(device_fn,where)); @@ -642,7 +650,7 @@ static int pci_conf2_read_config_dword (unsigned char bus, unsigned char device_ if (device_fn & 0x80) return PCIBIOS_DEVICE_NOT_FOUND; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); *value = inl (IOADDR(device_fn,where)); @@ -656,7 +664,7 @@ static int pci_conf2_write_config_byte (unsigned char bus, unsigned char device_ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); outb (value, IOADDR(device_fn,where)); @@ -670,7 +678,7 @@ static int pci_conf2_write_config_word (unsigned char bus, unsigned char device_ { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); outw (value, IOADDR(device_fn,where)); @@ -684,7 +692,7 @@ static int pci_conf2_write_config_dword (unsigned char bus, unsigned char device { unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); outb (FUNC(device_fn), 0xCF8); outb (bus, 0xCFA); outl (value, IOADDR(device_fn,where)); @@ -716,7 +724,7 @@ __initfunc(static struct pci_access *check_direct_pci(void)) unsigned int tmp; unsigned long flags; - save_flags(flags); + save_flags(flags); cli(); /* * check if configuration type 1 works @@ -912,13 +920,11 @@ __initfunc(unsigned long pcibios_init(unsigned long memory_start, unsigned long bios32_entry = check->fields.entry; printk ("pcibios_init : BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); bios32_indirect.address = bios32_entry + PAGE_OFFSET; - access_pci = &pci_bios_access; } } } - if (bios32_entry) { - memory_start = check_pcibios (memory_start, memory_end); - } + if (bios32_entry && check_pcibios()) + access_pci = &pci_bios_access; #endif return memory_start; } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 84fe0c7fd..ac67da797 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -110,62 +110,45 @@ ENOSYS = 38 addl $4,%esp; \ iret -#ifdef __SMP__ -/* Get the processor ID multiplied by 4 */ -#define GET_PROCESSOR_OFFSET(reg) \ - movl SYMBOL_NAME(apic_reg), reg; \ - movl 32(reg), reg; \ - shrl $22, reg; \ - andl $0x3C, reg; - -#define GET_CURRENT(reg) \ - GET_PROCESSOR_OFFSET(reg) \ - movl SYMBOL_NAME(current_set)(reg),reg - -#else - #define GET_CURRENT(reg) \ - movl SYMBOL_NAME(current_set),reg - -#endif + movl %esp, reg; \ + andl $-8192, reg; ENTRY(lcall7) pushfl # We get a different stack layout with call gates, pushl %eax # which has to be cleaned up later.. SAVE_ALL - GET_CURRENT(%ebx) movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. movl CS(%esp),%edx # this is eip.. movl EFLAGS(%esp),%ecx # and this is cs.. movl %eax,EFLAGS(%esp) # movl %edx,EIP(%esp) # Now we move them to their "normal" places movl %ecx,CS(%esp) # - movl %esp,%eax - GET_CURRENT(%edx) - pushl %eax - movl exec_domain(%edx),%edx # Get the execution domain + movl %esp,%ebx + pushl %ebx + andl $-8192,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain movl 4(%edx),%edx # Get the lcall7 handler for the domain call *%edx popl %eax jmp ret_from_sys_call + #ifdef __SMP__ ALIGN .globl ret_from_smpfork ret_from_smpfork: + GET_CURRENT(%ebx) btrl $0, SYMBOL_NAME(scheduler_lock) jmp ret_from_sys_call #endif /* __SMP__ */ - ALIGN -handle_bottom_half: - pushl $2f - jmp SYMBOL_NAME(do_bottom_half) - - ALIGN -reschedule: - pushl $ret_from_sys_call - jmp SYMBOL_NAME(schedule) # test +/* + * Return to user mode is not as complex as all this looks, + * but we want the default path for a system call return to + * go as quickly as possible which is why some of this is + * less clear than it otherwise should be. + */ ENTRY(system_call) pushl %eax # save orig_eax @@ -180,16 +163,11 @@ ENTRY(system_call) ALIGN .globl ret_from_sys_call .globl ret_from_intr -ret_from_intr: ret_from_sys_call: - GET_CURRENT(%ebx) movl SYMBOL_NAME(bh_mask),%eax andl SYMBOL_NAME(bh_active),%eax jne handle_bottom_half -2: movl EFLAGS(%esp),%eax # mix EFLAGS and CS - movb CS(%esp),%al - testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? - je 1f +ret_with_reschedule: cmpl $0,SYMBOL_NAME(need_resched) jne reschedule movl blocked(%ebx),%eax @@ -197,7 +175,6 @@ ret_from_sys_call: notl %eax andl signal(%ebx),%eax jne signal_return -1: RESTORE_ALL ALIGN signal_return: @@ -230,6 +207,30 @@ badsys: movl $-ENOSYS,EAX(%esp) jmp ret_from_sys_call + ALIGN +ret_from_exception: + movl SYMBOL_NAME(bh_mask),%eax + andl SYMBOL_NAME(bh_active),%eax + jne handle_bottom_half + ALIGN +ret_from_intr: + GET_CURRENT(%ebx) + movl EFLAGS(%esp),%eax # mix EFLAGS and CS + movb CS(%esp),%al + testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? + jne ret_with_reschedule + RESTORE_ALL + + ALIGN +handle_bottom_half: + pushl $ret_from_intr + jmp SYMBOL_NAME(do_bottom_half) + + ALIGN +reschedule: + pushl $ret_from_sys_call + jmp SYMBOL_NAME(schedule) # test + ENTRY(divide_error) pushl $0 # no error code @@ -260,7 +261,7 @@ error_code: GET_CURRENT(%ebx) call *%ecx addl $8,%esp - jmp ret_from_sys_call + jmp ret_from_exception ENTRY(coprocessor_error) pushl $0 @@ -271,7 +272,7 @@ ENTRY(device_not_available) pushl $-1 # mark this as an int SAVE_ALL GET_CURRENT(%ebx) - pushl $ret_from_sys_call + pushl $ret_from_exception movl %cr0,%eax testl $0x4,%eax # EM (math emulation bit) je SYMBOL_NAME(math_state_restore) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 2bd095997..a42b87b1b 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -39,19 +39,21 @@ startup_32: jz 1f /* * New page tables may be in 4Mbyte page mode and may - * be using the global pages. + * be using the global pages. + * + * NOTE! We have to correct for the fact that we're + * not yet offset 0xC0000000.. */ +#define cr4_bits mmu_cr4_features-0xC0000000 #ifdef GAS_KNOWS_CR4 movl %cr4,%eax # Turn on 4Mb pages - orl $16+128,%eax + orl cr4_bits,%eax movl %eax,%cr4 #else .byte 0x0f,0x20,0xe0 - orl $16+128,%eax + orl cr4_bits,%eax .byte 0x0f,0x22,0xe0 #endif - movl %eax,%cr3 /* flush TLB as per app note */ - movl %cr0,%eax #endif /* * Setup paging (the tables are already set up, just switch them on) @@ -67,24 +69,16 @@ startup_32: movl $1f,%eax jmp *%eax /* make sure eip is relocated */ 1: + /* Set up the stack pointer */ + lss stack_start,%esp #ifdef __SMP__ orw %bx,%bx jz 1f /* Initial CPU cleans BSS */ -/* - * Set up the stack - */ - movl $(KERNEL_DS),%eax /* walken modif */ - mov %ax,%ss - xorl %eax,%eax - movw %cx, %ax - movl %eax,%esp - addl $0xC0000000, %esp /* shift it to the upper mapping */ pushl $0 popfl jmp checkCPUtype 1: - lss stack_start,%esp #endif __SMP__ /* * Clear BSS first so that there are no surprises... @@ -305,15 +299,53 @@ rp_sidt: jne rp_sidt ret +ENTRY(stack_start) + .long SYMBOL_NAME(init_task_union)+8192 + .long KERNEL_DS + +/* This is the default interrupt "handler" :-) */ +int_msg: + .asciz "Unknown interrupt\n" + ALIGN +ignore_int: + cld + pushl %eax + pushl %ecx + pushl %edx + push %ds + movl $(KERNEL_DS),%eax + mov %ax,%ds + mov %ax,%es + mov %ax,%fs + pushl $int_msg + call SYMBOL_NAME(printk) + popl %eax + pop %ds + popl %edx + popl %ecx + popl %eax + iret + +/* + * The interrupt descriptor table has room for 256 idt's + */ + ALIGN +.word 0 +idt_descr: + .word 256*8-1 # idt contains 256 entries + .long SYMBOL_NAME(idt) + + ALIGN +.word 0 +gdt_descr: +#ifdef CONFIG_APM + .word (11+2*NR_TASKS)*8-1 +#else + .word (8+2*NR_TASKS)*8-1 +#endif + .long SYMBOL_NAME(gdt) /* - * page 0 is made non-existent, so that kernel NULL pointer references get - * caught. Thus the swapper page directory has been moved to 0x101000 - * with the introduction of the compressed boot code. Theoretically, - * the original design of overlaying the startup code with the swapper - * page directory is still possible --- it would reduce the size of the kernel - * by 2-3k. This would be a good thing to do at some point..... - * * This is initialized to create a identity-mapping at 0-4M (for bootup * purposes) and another mapping of the 0-4M area at virtual address * 0xC0000000. @@ -471,63 +503,29 @@ ENTRY(empty_bad_page_table) ENTRY(empty_zero_page) .org 0x6000 - -stack_start: - .long SYMBOL_NAME(init_user_stack)+4096 - .long KERNEL_DS - -/* This is the default interrupt "handler" :-) */ -int_msg: - .asciz "Unknown interrupt\n" - ALIGN -ignore_int: - cld - pushl %eax - pushl %ecx - pushl %edx - push %ds - push %es - push %fs - movl $(KERNEL_DS),%eax - mov %ax,%ds - mov %ax,%es - mov %ax,%fs - pushl $int_msg - call SYMBOL_NAME(printk) - popl %eax - pop %fs - pop %es - pop %ds - popl %edx - popl %ecx - popl %eax - iret +ENTRY(this_must_match_init_task) /* - * The interrupt descriptor table has room for 256 idt's + * This starts the data section. Note that the above is all + * in the text section because it has alignment requirements + * that we cannot fulfill any other way. */ - ALIGN -.word 0 -idt_descr: - .word 256*8-1 # idt contains 256 entries - .long SYMBOL_NAME(idt) +.data +ALIGN +/* 256 quadwords - 2048 bytes of idt */ ENTRY(idt) .fill 256,8,0 # idt is uninitialized - ALIGN -.word 0 -gdt_descr: -#ifdef CONFIG_APM - .word (11+2*NR_TASKS)*8-1 -#else - .word (8+2*NR_TASKS)*8-1 -#endif - .long SYMBOL_NAME(gdt) - /* * This gdt setup gives the kernel a 1GB address space at virtual * address 0xC0000000 - space enough for expansion, I hope. + * + * This contains up to 8192 quadwords depending on NR_TASKS - 64kB of + * gdt entries. Ugh. + * + * NOTE! Make sure the gdt descriptor in head.S matches this if you + * change anything. */ ENTRY(gdt) .quad 0x0000000000000000 /* NULL descriptor */ diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 8c16f0204..daa6baf42 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -19,6 +19,11 @@ extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(elf_fpregset_t *); extern void __lock_kernel(void); +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +extern struct drive_info_struct drive_info; +EXPORT_SYMBOL(drive_info); +#endif + /* platform dependent support */ EXPORT_SYMBOL(EISA_bus); EXPORT_SYMBOL(MCA_bus); @@ -39,12 +44,13 @@ EXPORT_SYMBOL(csum_partial_copy); #ifdef __SMP__ EXPORT_SYMBOL(apic_reg); /* Needed internally for the I386 inlines */ EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(kernel_flag); -EXPORT_SYMBOL(active_kernel_processor); +EXPORT_SYMBOL_NOVERS(kernel_flag); +EXPORT_SYMBOL_NOVERS(active_kernel_processor); EXPORT_SYMBOL(smp_invalidate_needed); EXPORT_SYMBOL_NOVERS(__lock_kernel); /* Global SMP irq stuff */ +EXPORT_SYMBOL(synchronize_irq); EXPORT_SYMBOL(global_irq_holder); EXPORT_SYMBOL(__global_cli); EXPORT_SYMBOL(__global_sti); diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c new file mode 100644 index 000000000..cc0a19231 --- /dev/null +++ b/arch/i386/kernel/init_task.c @@ -0,0 +1,22 @@ +#include +#include + +#include + +static struct vm_area_struct init_mmap = INIT_MMAP; +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS; +struct mm_struct init_mm = INIT_MM; + +/* + * Initial task structure. + * + * We need to make sure that this is 8192-byte aligned due to the + * way process stacks are handled. This is done by making sure + * the linker maps this in the .text segment right after head.S, + * and making head.S ensure the proper alignment. + * + * The things we do for performance.. + */ +union task_union init_task_union __attribute__((__section__(".text"))) = { INIT_TASK }; diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 95a7b525f..e5fb5acb1 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -44,9 +44,6 @@ extern volatile unsigned long smp_local_timer_ticks[1+NR_CPUS]; #define CR0_NE 32 -static unsigned char cache_21 = 0xff; -static unsigned char cache_A1 = 0xff; - unsigned int local_irq_count[NR_CPUS]; #ifdef __SMP__ atomic_t __intel_bh_counter; @@ -58,51 +55,84 @@ int __intel_bh_counter; static unsigned int int_count[NR_CPUS][NR_IRQS] = {{0},}; #endif -static inline void mask_irq(unsigned int irq_nr) -{ - unsigned char mask; +/* + * This contains the irq mask for both irq controllers + */ +static unsigned int cached_irq_mask = 0xffff; + +#define cached_21 (((char *)(&cached_irq_mask))[0]) +#define cached_A1 (((char *)(&cached_irq_mask))[1]) - mask = 1 << (irq_nr & 7); - if (irq_nr < 8) { - cache_21 |= mask; - outb(cache_21,0x21); +spinlock_t irq_controller_lock; + +/* + * This is always called from an interrupt context + * with local interrupts disabled. Don't worry about + * irq-safe locks. + * + * Note that we always ack the primary irq controller, + * even if the interrupt came from the secondary, as + * the primary will still have routed it. Oh, the joys + * of PC hardware. + */ +static inline void mask_and_ack_irq(int irq_nr) +{ + spin_lock(&irq_controller_lock); + cached_irq_mask |= 1 << irq_nr; + if (irq_nr & 8) { + inb(0xA1); /* DUMMY */ + outb(cached_A1,0xA1); + outb(0x20,0xA0); } else { - cache_A1 |= mask; - outb(cache_A1,0xA1); + inb(0x21); /* DUMMY */ + outb(cached_21,0x21); } + outb(0x20,0x20); + spin_unlock(&irq_controller_lock); } -static inline void unmask_irq(unsigned int irq_nr) +static inline void set_irq_mask(int irq_nr) { - unsigned char mask; - - mask = ~(1 << (irq_nr & 7)); - if (irq_nr < 8) { - cache_21 &= mask; - outb(cache_21,0x21); + if (irq_nr & 8) { + outb(cached_A1,0xA1); } else { - cache_A1 &= mask; - outb(cache_A1,0xA1); + outb(cached_21,0x21); } } +/* + * These have to be protected by the spinlock + * before being called. + */ +static inline void mask_irq(unsigned int irq_nr) +{ + cached_irq_mask |= 1 << irq_nr; + set_irq_mask(irq_nr); +} + +static inline void unmask_irq(unsigned int irq_nr) +{ + cached_irq_mask &= ~(1 << irq_nr); + set_irq_mask(irq_nr); +} + void disable_irq(unsigned int irq_nr) { unsigned long flags; - save_flags(flags); - cli(); + spin_lock_irqsave(&irq_controller_lock, flags); mask_irq(irq_nr); - restore_flags(flags); + spin_unlock_irqrestore(&irq_controller_lock, flags); + synchronize_irq(); } void enable_irq(unsigned int irq_nr) { unsigned long flags; - save_flags(flags); - cli(); + + spin_lock_irqsave(&irq_controller_lock, flags); unmask_irq(irq_nr); - restore_flags(flags); + spin_unlock_irqrestore(&irq_controller_lock, flags); } /* @@ -133,7 +163,8 @@ void enable_irq(unsigned int irq_nr) #error make irq stub building NR_IRQS dependent and remove me. #endif -BUILD_TIMER_IRQ(FIRST,0,0x01) +BUILD_COMMON_IRQ() +BUILD_IRQ(FIRST,0,0x01) BUILD_IRQ(FIRST,1,0x02) BUILD_IRQ(FIRST,2,0x04) BUILD_IRQ(FIRST,3,0x08) @@ -157,10 +188,6 @@ BUILD_SMP_INTERRUPT(stop_cpu_interrupt) BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt) #endif -/* - * Pointers to the low-level handlers: first the general ones, then the - * fast ones, then the bad ones. - */ static void (*interrupt[17])(void) = { IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt, IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt, @@ -168,28 +195,6 @@ static void (*interrupt[17])(void) = { IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt }; -static void (*fast_interrupt[16])(void) = { - fast_IRQ0_interrupt, fast_IRQ1_interrupt, - fast_IRQ2_interrupt, fast_IRQ3_interrupt, - fast_IRQ4_interrupt, fast_IRQ5_interrupt, - fast_IRQ6_interrupt, fast_IRQ7_interrupt, - fast_IRQ8_interrupt, fast_IRQ9_interrupt, - fast_IRQ10_interrupt, fast_IRQ11_interrupt, - fast_IRQ12_interrupt, fast_IRQ13_interrupt, - fast_IRQ14_interrupt, fast_IRQ15_interrupt -}; - -static void (*bad_interrupt[16])(void) = { - bad_IRQ0_interrupt, bad_IRQ1_interrupt, - bad_IRQ2_interrupt, bad_IRQ3_interrupt, - bad_IRQ4_interrupt, bad_IRQ5_interrupt, - bad_IRQ6_interrupt, bad_IRQ7_interrupt, - bad_IRQ8_interrupt, bad_IRQ9_interrupt, - bad_IRQ10_interrupt, bad_IRQ11_interrupt, - bad_IRQ12_interrupt, bad_IRQ13_interrupt, - bad_IRQ14_interrupt, bad_IRQ15_interrupt -}; - /* * Initial irq handlers. */ @@ -240,14 +245,10 @@ int get_irq_list(char *buf) action = irq_action[i]; if (!action) continue; - len += sprintf(buf+len, "%2d: %10u %c %s", - i, kstat.interrupts[i], - (action->flags & SA_INTERRUPT) ? '+' : ' ', - action->name); + len += sprintf(buf+len, "%2d: %10u %s", + i, kstat.interrupts[i], action->name); for (action=action->next; action; action = action->next) { - len += sprintf(buf+len, ",%s %s", - (action->flags & SA_INTERRUPT) ? " +" : "", - action->name); + len += sprintf(buf+len, ", %s", action->name); } len += sprintf(buf+len, "\n"); } @@ -298,13 +299,9 @@ int get_smp_prof_list(char *buf) { for (j=0;jflags & SA_INTERRUPT) ? '+' : ' ', - action->name); + len += sprintf(buf+len, " %s", action->name); for (action=action->next; action; action = action->next) { - len += sprintf(buf+len, ",%s %s", - (action->flags & SA_INTERRUPT) ? " +" : "", - action->name); + len += sprintf(buf+len, ", %s", action->name); } len += sprintf(buf+len, "\n"); } @@ -393,16 +390,8 @@ static inline void check_smp_invalidate(int cpu) static unsigned long previous_irqholder; -#undef INIT_STUCK -#define INIT_STUCK 100000000 - -#undef STUCK -#define STUCK \ -if (!--stuck) {printk("wait_on_irq CPU#%d stuck at %08lx, waiting for %08lx (local=%d, global=%d)\n", cpu, where, previous_irqholder, local_count, atomic_read(&global_irq_count)); stuck = INIT_STUCK; } - static inline void wait_on_irq(int cpu, unsigned long where) { - int stuck = INIT_STUCK; int local_count = local_irq_count[cpu]; /* Are we the only one in an interrupt context? */ @@ -421,13 +410,12 @@ static inline void wait_on_irq(int cpu, unsigned long where) * their things before trying to get the lock again. */ for (;;) { - STUCK; check_smp_invalidate(cpu); if (atomic_read(&global_irq_count)) continue; if (global_irq_lock) continue; - if (!set_bit(0,&global_irq_lock)) + if (!test_and_set_bit(0,&global_irq_lock)) break; } atomic_add(local_count, &global_irq_count); @@ -456,28 +444,18 @@ void synchronize_irq(void) } } -#undef INIT_STUCK -#define INIT_STUCK 10000000 - -#undef STUCK -#define STUCK \ -if (!--stuck) {printk("get_irqlock stuck at %08lx, waiting for %08lx\n", where, previous_irqholder); stuck = INIT_STUCK;} - static inline void get_irqlock(int cpu, unsigned long where) { - int stuck = INIT_STUCK; - - if (set_bit(0,&global_irq_lock)) { + if (test_and_set_bit(0,&global_irq_lock)) { /* do we already hold the lock? */ if ((unsigned char) cpu == global_irq_holder) return; /* Uhhuh.. Somebody else got it. Wait.. */ do { do { - STUCK; check_smp_invalidate(cpu); } while (test_bit(0,&global_irq_lock)); - } while (set_bit(0,&global_irq_lock)); + } while (test_and_set_bit(0,&global_irq_lock)); } /* * Ok, we got the lock bit. @@ -519,7 +497,8 @@ void __global_restore_flags(unsigned long flags) { switch (flags) { case 0: - __global_sti(); + release_irqlock(smp_processor_id()); + __sti(); break; case 1: __global_cli(); @@ -533,56 +512,58 @@ void __global_restore_flags(unsigned long flags) #endif /* - * do_IRQ handles IRQ's that have been installed without the - * SA_INTERRUPT flag: it uses the full signal-handling return - * and runs with other interrupts enabled. All relatively slow - * IRQ's should use this format: notably the keyboard/timer - * routines. + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). */ -asmlinkage void do_IRQ(int irq, struct pt_regs * regs) +asmlinkage void do_IRQ(struct pt_regs regs) { + int irq = regs.orig_eax & 0xff; struct irqaction * action; - int do_random, cpu = smp_processor_id(); + int status, cpu; + + /* + * mask and ack quickly, we don't want the irq controller + * thinking we're snobs just because some other CPU has + * disabled global interrupts (we have already done the + * INT_ACK cycles, it's too late to try to pretend to the + * controller that we aren't taking the interrupt). + */ + mask_and_ack_irq(irq); + cpu = smp_processor_id(); irq_enter(cpu, irq); kstat.interrupts[irq]++; - /* slow interrupts run with interrupts enabled */ - __sti(); + /* Return with this interrupt masked if no action */ + status = 0; action = *(irq + irq_action); - do_random = 0; - while (action) { - do_random |= action->flags; - action->handler(irq, action->dev_id, regs); - action = action->next; + if (action) { + do { + status |= action->flags; + action->handler(irq, action->dev_id, ®s); + action = action->next; + } while (action); + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); + + __cli(); + spin_lock(&irq_controller_lock); + unmask_irq(irq); + spin_unlock(&irq_controller_lock); } - if (do_random & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - irq_exit(cpu, irq); -} -/* - * do_fast_IRQ handles IRQ's that don't need the fancy interrupt return - * stuff - the handler is also running with interrupts disabled unless - * it explicitly enables them later. - */ -asmlinkage void do_fast_IRQ(int irq) -{ - struct irqaction * action; - int do_random, cpu = smp_processor_id(); - - irq_enter(cpu, irq); - kstat.interrupts[irq]++; - action = *(irq + irq_action); - do_random = 0; - while (action) { - do_random |= action->flags; - action->handler(irq, action->dev_id, NULL); - action = action->next; - } - if (do_random & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); irq_exit(cpu, irq); + /* + * This should be conditional: we should really get + * a return code from the irq handler to tell us + * whether the handler wants us to do software bottom + * half handling or not.. + */ + if (1) { + if (bh_active & bh_mask) + do_bottom_half(); + } } int setup_x86_irq(int irq, struct irqaction * new) @@ -597,10 +578,6 @@ int setup_x86_irq(int irq, struct irqaction * new) if (!(old->flags & new->flags & SA_SHIRQ)) return -EBUSY; - /* Can't share interrupts unless both are same type */ - if ((old->flags ^ new->flags) & SA_INTERRUPT) - return -EBUSY; - /* add new interrupt at end of irq queue */ do { p = &old->next; @@ -617,11 +594,9 @@ int setup_x86_irq(int irq, struct irqaction * new) *p = new; if (!shared) { - if (new->flags & SA_INTERRUPT) - set_intr_gate(0x20+irq,fast_interrupt[irq]); - else - set_intr_gate(0x20+irq,interrupt[irq]); + spin_lock(&irq_controller_lock); unmask_irq(irq); + spin_unlock(&irq_controller_lock); } restore_flags(flags); return 0; @@ -676,10 +651,6 @@ void free_irq(unsigned int irq, void *dev_id) save_flags(flags); cli(); *p = action->next; - if (!irq[irq_action]) { - mask_irq(irq); - set_intr_gate(0x20+irq,bad_interrupt[irq]); - } restore_flags(flags); kfree(action); return; @@ -689,7 +660,7 @@ void free_irq(unsigned int irq, void *dev_id) unsigned long probe_irq_on (void) { - unsigned int i, irqs = 0, irqmask; + unsigned int i, irqs = 0; unsigned long delay; /* first, enable any unassigned irqs */ @@ -705,19 +676,17 @@ unsigned long probe_irq_on (void) /* about 100ms delay */; /* now filter out any obviously spurious interrupts */ - irqmask = (((unsigned int)cache_A1)<<8) | (unsigned int)cache_21; - return irqs & ~irqmask; + return irqs & ~cached_irq_mask; } int probe_irq_off (unsigned long irqs) { - unsigned int i, irqmask; + unsigned int i; - irqmask = (((unsigned int)cache_A1)<<8) | (unsigned int)cache_21; #ifdef DEBUG - printk("probe_irq_off: irqs=0x%04lx irqmask=0x%04x\n", irqs, irqmask); + printk("probe_irq_off: irqs=0x%04lx irqmask=0x%04x\n", irqs, cached_irq_mask); #endif - irqs &= irqmask; + irqs &= cached_irq_mask; if (!irqs) return 0; i = ffz(~irqs); @@ -729,10 +698,6 @@ int probe_irq_off (unsigned long irqs) __initfunc(void init_IRQ(void)) { int i; - static unsigned char smptrap=0; - if(smptrap) - return; - smptrap=1; /* set the clock to 100 Hz */ outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ @@ -740,7 +705,7 @@ __initfunc(void init_IRQ(void)) outb(LATCH >> 8 , 0x40); /* MSB */ for (i = 0; i < NR_IRQS ; i++) - set_intr_gate(0x20+i,bad_interrupt[i]); + set_intr_gate(0x20+i,interrupt[i]); #ifdef __SMP__ /* diff --git a/arch/i386/kernel/irq.h b/arch/i386/kernel/irq.h index 3a349f20a..1f9e89399 100644 --- a/arch/i386/kernel/irq.h +++ b/arch/i386/kernel/irq.h @@ -33,7 +33,6 @@ static inline void irq_enter(int cpu, int irq) static inline void irq_exit(int cpu, int irq) { - __cli(); hardirq_exit(cpu); release_irqlock(cpu); } @@ -63,125 +62,12 @@ static inline void irq_exit(int cpu, int irq) "mov %dx,%ds\n\t" \ "mov %dx,%es\n\t" -/* - * SAVE_MOST/RESTORE_MOST is used for the faster version of IRQ handlers, - * installed by using the SA_INTERRUPT flag. These kinds of IRQ's don't - * call the routines that do signal handling etc on return, and can have - * more relaxed register-saving etc. They are also atomic, and are thus - * suited for small, fast interrupts like the serial lines or the harddisk - * drivers, which don't actually need signal handling etc. - * - * Also note that we actually save only those registers that are used in - * C subroutines (%eax, %edx and %ecx), so if you do something weird, - * you're on your own. The only segments that are saved (not counting the - * automatic stack and code segment handling) are %ds and %es, and they - * point to kernel space. No messing around with %fs here. - */ -#define SAVE_MOST \ - "cld\n\t" \ - "push %es\n\t" \ - "push %ds\n\t" \ - "pushl %eax\n\t" \ - "pushl %edx\n\t" \ - "pushl %ecx\n\t" \ - "movl $" STR(KERNEL_DS) ",%edx\n\t" \ - "mov %dx,%ds\n\t" \ - "mov %dx,%es\n\t" - -#define RESTORE_MOST \ - "popl %ecx\n\t" \ - "popl %edx\n\t" \ - "popl %eax\n\t" \ - "pop %ds\n\t" \ - "pop %es\n\t" \ - "iret" - -/* - * Some fast irq handlers might want to access saved registers (mostly - * cs or flags) - */ - -struct fast_irq_regs { - long ecx; - long edx; - long eax; - int xds; - int xes; - long eip; - int xcs; - long eflags; - long esp; - int xss; -}; - -/* - * The "inb" instructions are not needed, but seem to change the timings - * a bit - without them it seems that the harddisk driver won't work on - * all hardware. Arghh. - */ -#define ACK_FIRST(mask,nr) \ - "inb $0x21,%al\n\t" \ - "jmp 1f\n" \ - "1:\tjmp 1f\n" \ - "1:\torb $" #mask ","SYMBOL_NAME_STR(cache_21)"\n\t" \ - "movb "SYMBOL_NAME_STR(cache_21)",%al\n\t" \ - "outb %al,$0x21\n\t" \ - "jmp 1f\n" \ - "1:\tjmp 1f\n" \ - "1:\tmovb $0x20,%al\n\t" \ - "outb %al,$0x20\n\t" - -#define ACK_SECOND(mask,nr) \ - "inb $0xA1,%al\n\t" \ - "jmp 1f\n" \ - "1:\tjmp 1f\n" \ - "1:\torb $" #mask ","SYMBOL_NAME_STR(cache_A1)"\n\t" \ - "movb "SYMBOL_NAME_STR(cache_A1)",%al\n\t" \ - "outb %al,$0xA1\n\t" \ - "jmp 1f\n" \ - "1:\tjmp 1f\n" \ - "1:\tmovb $0x20,%al\n\t" \ - "outb %al,$0xA0\n\t" \ - "jmp 1f\n" \ - "1:\tjmp 1f\n" \ - "1:\toutb %al,$0x20\n\t" - -#define UNBLK_FIRST(mask) \ - "inb $0x21,%al\n\t" \ - "jmp 1f\n" \ - "1:\tjmp 1f\n" \ - "1:\tandb $~(" #mask "),"SYMBOL_NAME_STR(cache_21)"\n\t" \ - "movb "SYMBOL_NAME_STR(cache_21)",%al\n\t" \ - "outb %al,$0x21\n\t" - -#define UNBLK_SECOND(mask) \ - "inb $0xA1,%al\n\t" \ - "jmp 1f\n" \ - "1:\tjmp 1f\n" \ - "1:\tandb $~(" #mask "),"SYMBOL_NAME_STR(cache_A1)"\n\t" \ - "movb "SYMBOL_NAME_STR(cache_A1)",%al\n\t" \ - "outb %al,$0xA1\n\t" - #define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) -#define FAST_IRQ_NAME(nr) IRQ_NAME2(fast_IRQ##nr) -#define BAD_IRQ_NAME(nr) IRQ_NAME2(bad_IRQ##nr) - -#ifdef __SMP__ - -#define GET_CURRENT \ - "movl "SYMBOL_NAME_STR(apic_reg)", %ebx\n\t" \ - "movl 32(%ebx), %ebx\n\t" \ - "shrl $22,%ebx\n\t" \ - "andl $0x3C,%ebx\n\t" \ - "movl " SYMBOL_NAME_STR(current_set) "(,%ebx),%ebx\n\t" - -#else #define GET_CURRENT \ - "movl " SYMBOL_NAME_STR(current_set) ",%ebx\n\t" - -#endif + "movl %esp, %ebx\n\t" \ + "andl $-8192, %ebx\n\t" #ifdef __SMP__ @@ -205,66 +91,30 @@ __asm__( \ "\n"__ALIGN_STR"\n" \ SYMBOL_NAME_STR(x) ":\n\t" \ "pushl $-1\n\t" \ - SAVE_ALL \ - "movl %esp,%eax\n\t" \ - "pushl %eax\n\t" \ + SAVE_ALL \ + "movl %esp,%eax\n\t" \ + "pushl %eax\n\t" \ "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ - "addl $4,%esp\n\t" \ + "addl $4,%esp\n\t" \ "jmp ret_from_intr\n"); #endif /* __SMP__ */ -#define BUILD_IRQ(chip,nr,mask) \ -asmlinkage void IRQ_NAME(nr); \ -asmlinkage void FAST_IRQ_NAME(nr); \ -asmlinkage void BAD_IRQ_NAME(nr); \ +#define BUILD_COMMON_IRQ() \ __asm__( \ -"\n"__ALIGN_STR"\n" \ -SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ - "pushl $-"#nr"-2\n\t" \ + "\n" __ALIGN_STR"\n" \ + "common_interrupt:\n\t" \ SAVE_ALL \ - ACK_##chip(mask,(nr&7)) \ - "movl %esp,%eax\n\t" \ - "pushl %eax\n\t" \ - "pushl $" #nr "\n\t" \ - "call "SYMBOL_NAME_STR(do_IRQ)"\n\t" \ - "addl $8,%esp\n\t" \ - UNBLK_##chip(mask) \ - "jmp ret_from_intr\n" \ -"\n"__ALIGN_STR"\n" \ -SYMBOL_NAME_STR(fast_IRQ) #nr "_interrupt:\n\t" \ - SAVE_MOST \ - ACK_##chip(mask,(nr&7)) \ - "pushl $" #nr "\n\t" \ - "call "SYMBOL_NAME_STR(do_fast_IRQ)"\n\t" \ - "addl $4,%esp\n\t" \ - UNBLK_##chip(mask) \ - RESTORE_MOST \ -"\n"__ALIGN_STR"\n" \ -SYMBOL_NAME_STR(bad_IRQ) #nr "_interrupt:\n\t" \ - SAVE_MOST \ - ACK_##chip(mask,(nr&7)) \ - RESTORE_MOST); - -#define BUILD_TIMER_IRQ(chip,nr,mask) \ + "pushl $ret_from_intr\n\t" \ + "jmp "SYMBOL_NAME_STR(do_IRQ)); + +#define BUILD_IRQ(chip,nr,mask) \ asmlinkage void IRQ_NAME(nr); \ -asmlinkage void FAST_IRQ_NAME(nr); \ -asmlinkage void BAD_IRQ_NAME(nr); \ __asm__( \ "\n"__ALIGN_STR"\n" \ -SYMBOL_NAME_STR(fast_IRQ) #nr "_interrupt:\n\t" \ -SYMBOL_NAME_STR(bad_IRQ) #nr "_interrupt:\n\t" \ SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \ - "pushl $-"#nr"-2\n\t" \ - SAVE_ALL \ - ACK_##chip(mask,(nr&7)) \ - "movl %esp,%eax\n\t" \ - "pushl %eax\n\t" \ - "pushl $" #nr "\n\t" \ - "call "SYMBOL_NAME_STR(do_IRQ)"\n\t" \ - "addl $8,%esp\n\t" \ - UNBLK_##chip(mask) \ - "jmp ret_from_intr\n"); + "pushl $"#nr"-256\n\t" \ + "jmp common_interrupt"); /* * x86 profiling function, SMP safe. We might want to do this in @@ -276,15 +126,14 @@ static inline void x86_do_profile (unsigned long eip) extern int _stext; eip -= (unsigned long) &_stext; eip >>= prof_shift; - if (eip < prof_len) - atomic_inc((atomic_t *)&prof_buffer[eip]); - else /* * Dont ignore out-of-bounds EIP values silently, * put them into the last histogram slot, so if * present, they will show up as a sharp peak. */ - atomic_inc((atomic_t *)&prof_buffer[prof_len-1]); + if (eip > prof_len-1) + eip = prof_len-1; + atomic_inc((atomic_t *)&prof_buffer[eip]); } } diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index fe4723951..33842a21f 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -30,6 +30,7 @@ #include #include #include +#include #if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF) #include #endif @@ -149,7 +150,8 @@ int cpu_idle(void *unused) current->priority = -100; while(1) { - if(cpu_data[smp_processor_id()].hlt_works_ok && !hlt_counter && !need_resched) + if(cpu_data[smp_processor_id()].hlt_works_ok && + !hlt_counter && !need_resched) __asm("hlt"); /* * tq_scheduler currently assumes we're running in a process @@ -183,7 +185,7 @@ static long no_idt[2] = {0, 0}; static int reboot_mode = 0; static int reboot_thru_bios = 0; -void reboot_setup(char *str, int *ints) +__initfunc(void reboot_setup(char *str, int *ints)) { while(1) { switch (*str) { @@ -324,11 +326,14 @@ void machine_restart(char * __unused) pg0 [0] = 7; - /* Use `swapper_pg_dir' as our page directory. Don't bother with - `SET_PAGE_DIR' because interrupts are disabled and we're rebooting. - This instruction flushes the TLB. */ + /* + * Use `swapper_pg_dir' as our page directory. We bother with + * `SET_PAGE_DIR' because although might be rebooting, but if we change + * the way we set root page dir in the future, then we wont break a + * seldom used feature ;) + */ - __asm__ __volatile__ ("movl %0,%%cr3" : : "a" (swapper_pg_dir) : "memory"); + SET_PAGE_DIR(current,swapper_pg_dir); /* Write 0x1234 to absolute memory location 0x472. The BIOS reads this on booting to tell it to "Bypass memory test (also warm @@ -473,6 +478,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, int i; struct pt_regs * childregs; + p->tss.tr = _TSS(nr); + p->tss.ldt = _LDT(nr); p->tss.es = KERNEL_DS; p->tss.cs = KERNEL_CS; p->tss.ss = KERNEL_DS; @@ -480,9 +487,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, p->tss.fs = USER_DS; p->tss.gs = USER_DS; p->tss.ss0 = KERNEL_DS; - p->tss.esp0 = p->kernel_stack_page + PAGE_SIZE; - p->tss.tr = _TSS(nr); - childregs = ((struct pt_regs *) (p->kernel_stack_page + PAGE_SIZE)) - 1; + p->tss.esp0 = 2*PAGE_SIZE + (unsigned long) p; + childregs = ((struct pt_regs *) (p->tss.esp0)) - 1; p->tss.esp = (unsigned long) childregs; #ifdef __SMP__ p->tss.eip = (unsigned long) ret_from_smpfork; @@ -496,7 +502,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, childregs->eax = 0; childregs->esp = esp; p->tss.back_link = 0; - p->tss.ldt = _LDT(nr); if (p->ldt) { p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); if (p->ldt != NULL) @@ -512,6 +517,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, p->tss.io_bitmap[i] = ~0; if (last_task_used_math == current) __asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387)); + return 0; } diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 920d1bc1c..0dfffd672 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -34,18 +34,6 @@ */ #define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs)) -/* change a pid into a task struct. */ -static inline struct task_struct * get_task(int pid) -{ - int i; - - for (i = 1; i < NR_TASKS; i++) { - if (task[i] != NULL && (task[i]->pid == pid)) - return task[i]; - } - return NULL; -} - /* * this routine will get a word off of the processes privileged stack. * the offset is how far from the base addr as stored in the TSS. @@ -95,7 +83,7 @@ static unsigned long get_long(struct task_struct * tsk, repeat: pgdir = pgd_offset(vma->vm_mm, addr); if (pgd_none(*pgdir)) { - do_no_page(tsk, vma, addr, 0); + handle_mm_fault(tsk, vma, addr, 0); goto repeat; } if (pgd_bad(*pgdir)) { @@ -105,7 +93,7 @@ repeat: } pgmiddle = pmd_offset(pgdir, addr); if (pmd_none(*pgmiddle)) { - do_no_page(tsk, vma, addr, 0); + handle_mm_fault(tsk, vma, addr, 0); goto repeat; } if (pmd_bad(*pgmiddle)) { @@ -115,7 +103,7 @@ repeat: } pgtable = pte_offset(pgmiddle, addr); if (!pte_present(*pgtable)) { - do_no_page(tsk, vma, addr, 0); + handle_mm_fault(tsk, vma, addr, 0); goto repeat; } page = pte_page(*pgtable); @@ -146,7 +134,7 @@ static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsi repeat: pgdir = pgd_offset(vma->vm_mm, addr); if (!pgd_present(*pgdir)) { - do_no_page(tsk, vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } if (pgd_bad(*pgdir)) { @@ -156,7 +144,7 @@ repeat: } pgmiddle = pmd_offset(pgdir, addr); if (pmd_none(*pgmiddle)) { - do_no_page(tsk, vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } if (pmd_bad(*pgmiddle)) { @@ -166,12 +154,12 @@ repeat: } pgtable = pte_offset(pgmiddle, addr); if (!pte_present(*pgtable)) { - do_no_page(tsk, vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } page = pte_page(*pgtable); if (!pte_write(*pgtable)) { - do_wp_page(tsk, vma, addr, 1); + handle_mm_fault(tsk, vma, addr, 1); goto repeat; } /* this is a hack for non-kernel-mapped video buffers and similar */ @@ -381,7 +369,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) if (pid == 1) /* you may not mess with init */ goto out; ret = -ESRCH; - if (!(child = get_task(pid))) + if (!(child = find_task_by_pid(pid))) goto out; ret = -EPERM; if (request == PTRACE_ATTACH) { diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index ec5954771..f62744d11 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -247,7 +247,7 @@ static const char * i586model(unsigned int nr) static const char * i686model(unsigned int nr) { static const char *model[] = { - "PPro A-step", "Pentium Pro" + "PPro A-step", "Pentium Pro", "2", "Pentium II" }; if (nr < sizeof(model)/sizeof(char *)) return model[nr]; @@ -279,9 +279,10 @@ static const char * getmodel(int x86, int model) int get_cpuinfo(char * buffer) { int i, len = 0; + int sep_bug; static const char *x86_cap_flags[] = { "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", "10", "11", "mtrr", "pge", "mca", "cmov", + "cx8", "apic", "10", "sep", "mtrr", "pge", "mca", "cmov", "16", "17", "18", "19", "20", "21", "22", "mmx", "24", "25", "26", "27", "28", "29", "30", "31" }; @@ -321,10 +322,18 @@ int get_cpuinfo(char * buffer) else len += sprintf(buffer+len, "stepping\t: unknown\n"); + + sep_bug = CD(have_cpuid) && + (CD(x86_capability) & 0x800) && + !memcmp(x86_vendor_id, "GenuineIntel", 12) && + CD(x86) == 6 && + CD(x86_model) < 3 && + CD(x86_mask) < 3; len += sprintf(buffer+len, "fdiv_bug\t: %s\n" "hlt_bug\t\t: %s\n" + "sep_bug\t\t: %s\n" "fpu\t\t: %s\n" "fpu_exception\t: %s\n" "cpuid\t\t: %s\n" @@ -332,6 +341,7 @@ int get_cpuinfo(char * buffer) "flags\t\t:", CD(fdiv_bug) ? "yes" : "no", CD(hlt_works_ok) ? "no" : "yes", + sep_bug ? "yes" : "no", CD(hard_math) ? "yes" : "no", (CD(hard_math) && ignore_irq13) ? "yes" : "no", diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 970c8c5d7..3141c5318 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -318,6 +318,14 @@ asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs) unsigned long signr; struct sigaction * sa; + /* + * We want the common case to go fast, which + * is why we may in certain cases get here from + * kernel mode. Just return without doing anything + * if so. + */ + if ((regs->xcs & 3) != 3) + return 1; mask = ~current->blocked; while ((signr = current->signal & mask)) { /* @@ -384,10 +392,12 @@ asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs) case SIGQUIT: case SIGILL: case SIGTRAP: case SIGABRT: case SIGFPE: case SIGSEGV: + lock_kernel(); if (current->binfmt && current->binfmt->core_dump) { if (current->binfmt->core_dump(signr, regs)) signr |= 0x80; } + unlock_kernel(); /* fall through */ default: spin_lock_irq(¤t->sigmask_lock); diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index a1590f500..1dc615501 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -1,5 +1,5 @@ /* - * Intel MP v1.1/v1.4 specification support routines for multi-pentium + * Intel MP v1.1/v1.4 specification support routines for multi-pentium * hosts. * * (c) 1995 Alan Cox, CymruNET Ltd @@ -46,14 +46,15 @@ #include #include +#define __KERNEL_SYSCALLS__ +#include + #include "irq.h" extern unsigned long start_kernel, _etext; extern void update_one_process( struct task_struct *p, - unsigned long ticks, unsigned long user, - unsigned long system); -void setup_APIC_clock (void); - + unsigned long ticks, unsigned long user, + unsigned long system); /* * Some notes on processor bugs: * @@ -67,7 +68,7 @@ void setup_APIC_clock (void); * Pentium * There is a marginal case where REP MOVS on 100MHz SMP * machines with B stepping processors can fail. XXX should provide - * an L1cache=Writethrough or L1cache=off option. + * an L1cache=Writethrough or L1cache=off option. * * B stepping CPU's may hang. There are hardware work arounds * for this. We warn about it in case your board doesnt have the work @@ -91,12 +92,12 @@ void setup_APIC_clock (void); * If this sounds worrying believe me these bugs are ___RARE___ and * there's about nothing of note with C stepping upwards. */ - - + + /* * Why isn't this somewhere standard ?? */ - + extern __inline int max(int a,int b) { if(a>b) @@ -121,7 +122,6 @@ struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per cpu bogomips and other parameters static unsigned int num_processors = 1; /* Internal processor count */ static unsigned long io_apic_addr = 0xFEC00000; /* Address of the I/O apic (not yet used) */ unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */ -static unsigned char *kstack_base,*kstack_end; /* Kernel stack list pointers */ static int smp_activated = 0; /* Tripped once we need to start cross invalidating */ int apic_version[NR_CPUS]; /* APIC version number */ static volatile int smp_commenced=0; /* Tripped when we start scheduling */ @@ -129,7 +129,6 @@ unsigned long apic_addr = 0xFEE00000; /* Address of APIC (defaults to 0xFEE000 unsigned long nlong = 0; /* dummy used for apic_reg address + 0x20 */ unsigned char *apic_reg=((unsigned char *)(&nlong))-0x20;/* Later set to the ioremap() of the APIC */ unsigned long apic_retval; /* Just debugging the assembler.. */ -unsigned char *kernel_stacks[NR_CPUS]; /* Kernel stack pointers for CPU's (debugging) */ static volatile unsigned char smp_cpu_in_msg[NR_CPUS]; /* True if this processor is sending an IPI */ @@ -195,10 +194,10 @@ static inline void ack_APIC_irq (void) apic_write(APIC_EOI, 0); } -/* +/* * Checksum an MP configuration block. */ - + static int mpf_checksum(unsigned char *mp, int len) { int sum=0; @@ -210,7 +209,7 @@ static int mpf_checksum(unsigned char *mp, int len) /* * Processor encoding in an MP configuration block */ - + static char *mpc_family(int family,int model) { static char n[32]; @@ -274,11 +273,11 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc)) /* set the local APIC address */ apic_addr = (unsigned long)phys_to_virt((unsigned long)mpc->mpc_lapic); - + /* * Now process the configuration blocks. */ - + while(countmpc_length) { switch(*mpt) @@ -290,13 +289,13 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc)) if(m->mpc_cpuflag&CPU_ENABLED) { printk("Processor #%d %s APIC version %d\n", - m->mpc_apicid, + m->mpc_apicid, mpc_family((m->mpc_cpufeature& CPU_FAMILY_MASK)>>8, (m->mpc_cpufeature& CPU_MODEL_MASK)>>4), m->mpc_apicver); -#ifdef SMP_DEBUG +#ifdef SMP_DEBUG if(m->mpc_featureflag&(1<<0)) printk(" Floating point unit present.\n"); if(m->mpc_featureflag&(1<<7)) @@ -305,7 +304,7 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc)) printk(" 64 bit compare & exchange supported.\n"); if(m->mpc_featureflag&(1<<9)) printk(" Internal APIC present.\n"); -#endif +#endif if(m->mpc_cpuflag&CPU_BOOTPROCESSOR) { SMP_PRINTK((" Bootup CPU\n")); @@ -313,10 +312,10 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc)) } else /* Boot CPU already counted */ num_processors++; - + if(m->mpc_apicid>NR_CPUS) printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS); - else + else { cpu_present_map|=(1<mpc_apicid); apic_version[m->mpc_apicid]=m->mpc_apicver; @@ -337,7 +336,7 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc)) str)); mpt+=sizeof(*m); count+=sizeof(*m); - break; + break; } case MP_IOAPIC: { @@ -346,20 +345,20 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc)) if(m->mpc_flags&MPC_APIC_USABLE) { apics++; - printk("I/O APIC #%d Version %d at 0x%lX.\n", - m->mpc_apicid,m->mpc_apicver, - m->mpc_apicaddr); - io_apic_addr = (unsigned long)phys_to_virt(m->mpc_apicaddr); - } - mpt+=sizeof(*m); - count+=sizeof(*m); - break; + printk("I/O APIC #%d Version %d at 0x%lX.\n", + m->mpc_apicid,m->mpc_apicver, + m->mpc_apicaddr); + io_apic_addr = (unsigned long)phys_to_virt(m->mpc_apicaddr); + } + mpt+=sizeof(*m); + count+=sizeof(*m); + break; } case MP_INTSRC: { struct mpc_config_intsrc *m= (struct mpc_config_intsrc *)mpt; - + mpt+=sizeof(*m); count+=sizeof(*m); break; @@ -376,29 +375,29 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc)) } if(apics>1) printk("Warning: Multiple APIC's not supported.\n"); - return num_processors; + return num_processors; } /* * Scan the memory blocks for an SMP configuration block. */ - + __initfunc(int smp_scan_config(unsigned long base, unsigned long length)) { unsigned long *bp=phys_to_virt(base); struct intel_mp_floating *mpf; - + SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n", bp,length)); if(sizeof(*mpf)!=16) printk("Error: MPF size\n"); - + while(length>0) { if(*bp==SMP_MAGIC_IDENT) { mpf=(struct intel_mp_floating *)bp; - if(mpf->mpf_length==1 && + if(mpf->mpf_length==1 && !mpf_checksum((unsigned char *)bp,16) && (mpf->mpf_specification == 1 || mpf->mpf_specification == 4) ) @@ -433,7 +432,7 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length)) * We know that page 0 is not * used. Steal it for now! */ - + cfg=pg0[0]; pg0[0] = (apic_addr | 7); local_flush_tlb(); @@ -451,7 +450,7 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length)) * * END OF HACK END OF HACK END OF HACK END OF HACK END OF HACK * - */ + */ /* * 2 CPUs, numbered 0 & 1. */ @@ -513,6 +512,7 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length)) nlong = boot_cpu_id<<24; /* Dummy 'self' for bootup */ cpu_logical_map[0] = boot_cpu_id; global_irq_holder = boot_cpu_id; + current->processor = boot_cpu_id; printk("Processors: %d\n", num_processors); /* @@ -534,61 +534,37 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length)) extern unsigned char trampoline_data []; extern unsigned char trampoline_end []; +static unsigned char *trampoline_base; /* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller * has made sure it's suitably aligned. */ - -__initfunc(static void install_trampoline(unsigned char *mp)) + +__initfunc(static unsigned long setup_trampoline(void)) { - memcpy(mp, trampoline_data, trampoline_end - trampoline_data); + memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); + return virt_to_phys(trampoline_base); } /* - * We are called very early to get the low memory for the trampoline/kernel stacks - * This has to be done by mm/init.c to parcel us out nice low memory. We allocate - * the kernel stacks at 4K, 8K, 12K... currently (0-03FF is preserved for SMM and - * other things). + * We are called very early to get the low memory for the + * SMP bootup trampoline page. */ - __initfunc(unsigned long smp_alloc_memory(unsigned long mem_base)) { - int size=(num_processors-1)*PAGE_SIZE; /* Number of stacks needed */ - - /* - * Our stacks have to be below the 1Mb line, and mem_base on entry - * is 4K aligned. - */ - - if(virt_to_phys((void *)(mem_base+size))>=0x9F000) - panic("smp_alloc_memory: Insufficient low memory for kernel stacks 0x%lx.\n", mem_base); - kstack_base=(void *)mem_base; - mem_base+=size; - kstack_end=(void *)mem_base; - return mem_base; -} - -/* - * Hand out stacks one at a time. - */ - -__initfunc(static void *get_kernel_stack(void)) -{ - void *stack=kstack_base; - if(kstack_base>=kstack_end) - return NULL; - kstack_base+=PAGE_SIZE; - return stack; + if (virt_to_phys((void *)mem_base) >= 0x9F000) + panic("smp_alloc_memory: Insufficient low memory for kernel trampoline 0x%lx.\n", mem_base); + trampoline_base = (void *)mem_base; + return mem_base + PAGE_SIZE; } - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU */ - + __initfunc(void smp_store_cpu_info(int id)) { struct cpuinfo_x86 *c=&cpu_data[id]; @@ -615,7 +591,7 @@ __initfunc(void smp_store_cpu_info(int id)) * fired off. This allows the BP to have everything in order [we hope]. * At the end of this all the AP's will hit the system scheduling and off * we go. Each AP will load the system gdt's and jump through the kernel - * init into idle(). At this point the scheduler will one day take over + * init into idle(). At this point the scheduler will one day take over * and give them jobs to do. smp_callin is a standard routine * we use to track CPU's as they power up. */ @@ -634,74 +610,276 @@ __initfunc(void smp_callin(void)) extern void calibrate_delay(void); int cpuid=GET_APIC_ID(apic_read(APIC_ID)); unsigned long l; - + /* * Activate our APIC */ - - SMP_PRINTK(("CALLIN %d\n",smp_processor_id())); + + SMP_PRINTK(("CALLIN %d %d\n",hard_smp_processor_id(), smp_processor_id())); l=apic_read(APIC_SPIV); l|=(1<<8); /* Enable */ apic_write(APIC_SPIV,l); /* - * Set up our APIC timer. + * Set up our APIC timer. */ setup_APIC_clock (); sti(); /* * Get our bogomips. - */ + */ calibrate_delay(); SMP_PRINTK(("Stack at about %p\n",&cpuid)); - + /* * Save our processor parameters */ smp_store_cpu_info(cpuid); + /* * Allow the master to continue. - */ + */ set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]); +} + +static int cpucount = 0; + +extern int cpu_idle(void * unused); + +/* + * Activate a secondary processor. + */ +__initfunc(int start_secondary(void *unused)) +{ + smp_callin(); + while (!smp_commenced) + barrier(); + return cpu_idle(NULL); +} + +/* + * Everything has been set up for the secondary + * CPU's - they just need to reload everything + * from the task structure + */ +__initfunc(void initialize_secondary(void)) +{ + struct thread_struct * p = ¤t->tss; + /* - * Until we are ready for SMP scheduling + * We don't actually need to load the full TSS, + * basically just the stack pointer and the eip. */ - load_ldt(0); - local_flush_tlb(); - - while (cpu_number_map[cpuid] == -1) - barrier(); + asm volatile("lldt %%ax": :"a" (p->ldt)); + asm volatile("ltr %%ax": :"a" (p->tr)); + asm volatile( + "movl %0,%%esp\n\t" + "jmp *%1" + : + :"r" (p->esp),"r" (p->eip)); +} - while(!task[cpuid] || current_set[cpuid] != task[cpu_number_map[cpuid]]) - barrier(); +extern struct { + void * esp; + unsigned short ss; +} stack_start; - local_flush_tlb(); - load_TR(cpu_number_map[cpuid]); +__initfunc(static void do_boot_cpu(int i)) +{ + unsigned long cfg; + pgd_t maincfg; + struct task_struct *idle; + unsigned long send_status, accept_status; + int timeout, num_starts, j; + unsigned long start_eip; - while(!smp_commenced) - barrier(); - + /* + * We need an idle process for each processor. + */ + + kernel_thread(start_secondary, NULL, CLONE_PID); + cpucount++; + + idle = task[cpucount]; + if (!idle) + panic("No idle process for CPU %d\n", i); + + idle->processor = i; + cpu_logical_map[cpucount] = i; + cpu_number_map[i] = cpucount; + + /* start_eip had better be page-aligned! */ + start_eip = setup_trampoline(); + + printk("Booting processor %d eip %lx: ", i, start_eip); /* So we see what's up */ + stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); + + /* + * This grunge runs the startup process for + * the targeted processor. + */ + + SMP_PRINTK(("Setting warm reset code and vector.\n")); + + CMOS_WRITE(0xa, 0xf); local_flush_tlb(); + SMP_PRINTK(("1.\n")); + *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4; + SMP_PRINTK(("2.\n")); + *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf; + SMP_PRINTK(("3.\n")); + + maincfg=swapper_pg_dir[0]; + ((unsigned long *)swapper_pg_dir)[0]=0x102007; + + /* + * Be paranoid about clearing APIC errors. + */ + + if ( apic_version[i] & 0xF0 ) + { + apic_write(APIC_ESR, 0); + accept_status = (apic_read(APIC_ESR) & 0xEF); + } + + /* + * Status is now clean + */ - SMP_PRINTK(("Commenced..\n")); + send_status = 0; + accept_status = 0; + + /* + * Starting actual IPI sequence... + */ + + SMP_PRINTK(("Asserting INIT.\n")); + + /* + * Turn INIT on + */ + + cfg=apic_read(APIC_ICR2); + cfg&=0x00FFFFFF; + apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ + cfg=apic_read(APIC_ICR); + cfg&=~0xCDFFF; /* Clear bits */ + cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG + | APIC_DEST_ASSERT | APIC_DEST_DM_INIT); + apic_write(APIC_ICR, cfg); /* Send IPI */ + + udelay(200); + SMP_PRINTK(("Deasserting INIT.\n")); + + cfg=apic_read(APIC_ICR2); + cfg&=0x00FFFFFF; + apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ + cfg=apic_read(APIC_ICR); + cfg&=~0xCDFFF; /* Clear bits */ + cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG + | APIC_DEST_DM_INIT); + apic_write(APIC_ICR, cfg); /* Send IPI */ + + /* + * Should we send STARTUP IPIs ? + * + * Determine this based on the APIC version. + * If we don't have an integrated APIC, don't + * send the STARTUP IPIs. + */ + + if ( apic_version[i] & 0xF0 ) + num_starts = 2; + else + num_starts = 0; + + /* + * Run STARTUP IPI loop. + */ + + for (j = 1; !(send_status || accept_status) + && (j <= num_starts) ; j++) + { + SMP_PRINTK(("Sending STARTUP #%d.\n",j)); + apic_write(APIC_ESR, 0); + SMP_PRINTK(("After apic_write.\n")); + + /* + * STARTUP IPI + */ + + cfg=apic_read(APIC_ICR2); + cfg&=0x00FFFFFF; + apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ + cfg=apic_read(APIC_ICR); + cfg&=~0xCDFFF; /* Clear bits */ + cfg |= (APIC_DEST_FIELD + | APIC_DEST_DM_STARTUP + | (start_eip >> 12)); /* Boot on the stack */ + SMP_PRINTK(("Before start apic_write.\n")); + apic_write(APIC_ICR, cfg); /* Kick the second */ + + SMP_PRINTK(("Startup point 1.\n")); + timeout = 0; + do { + SMP_PRINTK(("Sleeping.\n")); udelay(1000000); + udelay(10); + } while ( (send_status = (apic_read(APIC_ICR) & 0x1000)) + && (timeout++ < 1000)); + udelay(200); + accept_status = (apic_read(APIC_ESR) & 0xEF); + } + SMP_PRINTK(("After Startup.\n")); + + if (send_status) /* APIC never delivered?? */ + printk("APIC never delivered???\n"); + if (accept_status) /* Send accept error */ + printk("APIC delivery error (%lx).\n", accept_status); + + if( !(send_status || accept_status) ) + { + for(timeout=0;timeout<50000;timeout++) + { + if(cpu_callin_map[0]&(1< cpucount+1)) { - unsigned long send_status, accept_status; - int timeout, num_starts, j; - - /* - * We need a kernel stack for each processor. - */ - - stack=get_kernel_stack(); /* We allocated these earlier */ - if(stack==NULL) - panic("No memory for processor stacks.\n"); - - kernel_stacks[i]=(void *)phys_to_virt((unsigned long)stack); - install_trampoline(stack); - - printk("Booting processor %d stack %p: ",i,stack); /* So we set what's up */ - - /* - * This grunge runs the startup process for - * the targeted processor. - */ - - SMP_PRINTK(("Setting warm reset code and vector.\n")); - - /* - * Install a writable page 0 entry. - */ - - cfg=pg0[0]; - - CMOS_WRITE(0xa, 0xf); - pg0[0]=7; - local_flush_tlb(); - SMP_PRINTK(("1.\n")); - *((volatile unsigned short *) phys_to_virt(0x469)) = ((unsigned long)stack)>>4; - SMP_PRINTK(("2.\n")); - *((volatile unsigned short *) phys_to_virt(0x467)) = 0; - SMP_PRINTK(("3.\n")); - - /* - * Protect it again - */ - - pg0[0]= cfg; - local_flush_tlb(); - - /* walken modif - * enable mapping of the first 4M at virtual - * address zero - */ - - maincfg=swapper_pg_dir[0]; - ((unsigned long *)swapper_pg_dir)[0]=0x102007; - - /* no need to local_flush_tlb : - we are setting this up for the slave processor ! */ - - /* - * Be paranoid about clearing APIC errors. - */ - - if ( apic_version[i] & 0xF0 ) - { - apic_write(APIC_ESR, 0); - accept_status = (apic_read(APIC_ESR) & 0xEF); - } - - /* - * Status is now clean - */ - - send_status = 0; - accept_status = 0; - - /* - * Starting actual IPI sequence... - */ - - SMP_PRINTK(("Asserting INIT.\n")); - - /* - * Turn INIT on - */ - - cfg=apic_read(APIC_ICR2); - cfg&=0x00FFFFFF; - apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ - cfg=apic_read(APIC_ICR); - cfg&=~0xCDFFF; /* Clear bits */ - cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG - | APIC_DEST_ASSERT | APIC_DEST_DM_INIT); - apic_write(APIC_ICR, cfg); /* Send IPI */ - - udelay(200); - SMP_PRINTK(("Deasserting INIT.\n")); - - cfg=apic_read(APIC_ICR2); - cfg&=0x00FFFFFF; - apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ - cfg=apic_read(APIC_ICR); - cfg&=~0xCDFFF; /* Clear bits */ - cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG - | APIC_DEST_DM_INIT); - apic_write(APIC_ICR, cfg); /* Send IPI */ - - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't - * send the STARTUP IPIs. - */ - - if ( apic_version[i] & 0xF0 ) - num_starts = 2; - else - num_starts = 0; - - /* - * Run STARTUP IPI loop. - */ - - for (j = 1; !(send_status || accept_status) - && (j <= num_starts) ; j++) - { - SMP_PRINTK(("Sending STARTUP #%d.\n",j)); - - apic_write(APIC_ESR, 0); - SMP_PRINTK(("After apic_write.\n")); - - /* - * STARTUP IPI - */ - - cfg=apic_read(APIC_ICR2); - cfg&=0x00FFFFFF; - apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ - cfg=apic_read(APIC_ICR); - cfg&=~0xCDFFF; /* Clear bits */ - cfg |= (APIC_DEST_FIELD - | APIC_DEST_DM_STARTUP - | (((int)virt_to_phys(stack)) >> 12)); /* Boot on the stack */ - SMP_PRINTK(("Before start apic_write.\n")); - apic_write(APIC_ICR, cfg); /* Kick the second */ - - SMP_PRINTK(("Startup point 1.\n")); - timeout = 0; - do { - SMP_PRINTK(("Sleeping.\n")); udelay(1000000); - udelay(10); - } while ( (send_status = (apic_read(APIC_ICR) & 0x1000)) - && (timeout++ < 1000)); - udelay(200); - - accept_status = (apic_read(APIC_ESR) & 0xEF); - } - SMP_PRINTK(("After Startup.\n")); - - if (send_status) /* APIC never delivered?? */ - printk("APIC never delivered???\n"); - if (accept_status) /* Send accept error */ - printk("APIC delivery error (%lx).\n", accept_status); - - if( !(send_status || accept_status) ) - { - for(timeout=0;timeout<50000;timeout++) - { - if(cpu_callin_map[0]&(1< #include #include +#include #include #include @@ -379,11 +380,15 @@ static inline void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) do_timer(regs); /* * In the SMP case we use the local APIC timer interrupt to do the - * profiling. + * profiling, except when we simulate SMP mode on a uniprocessor + * system, in that case we have to call the local interrupt handler. */ #ifndef __SMP__ if (!user_mode(regs)) x86_do_profile(regs->eip); +#else + if (!smp_found_config) + smp_local_timer_interrupt(regs); #endif /* diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S index 63bc51c5e..d0a726f6b 100644 --- a/arch/i386/kernel/trampoline.S +++ b/arch/i386/kernel/trampoline.S @@ -21,13 +21,9 @@ * and IP is zero. Thus, data addresses need to be absolute * (no relocation) and are taken with regard to r_base. * - * On the transition to protected mode, this page appears at - * address 8192, so protected mode addresses are with regard - * to p_base. - * * If you work on this file, check the object module with objdump * --full-contents --reloc to make sure there are no relocation - * entries. + * entries except for the gdt one.. */ #include @@ -39,15 +35,10 @@ ENTRY(trampoline_data) r_base = . -p_base = . - 8192 mov %cs, %ax # Code and data in the same place mov %ax, %ds - mov %ax, %cx # Pass stack info to the 32bit boot - shl $4, %cx # Segment -> Offset - add $4096, %cx # End of page is wanted - mov $1, %bx # Flag an SMP trampoline cli # We should be safe anyway @@ -71,37 +62,7 @@ idt_48: gdt_48: .word 0x0800 # gdt limit = 2048, 256 GDT entries - .word gdt - p_base, 0x0 # gdt base = gdt (first SMP CPU) - # we load the others with first table - # saves rewriting gdt_48 for each -gdt: - .word 0, 0, 0, 0 # dummy - - .word 0, 0, 0, 0 # unused - -# walken modif - - .word 0xFFFF # 4 Gb - (0x100000*0x1000 = 4Gb) - .word 0x0000 # base address = 0 - .word 0x9A00 # code read / exec - .word 0x00CF # granularity = 4096, 386 (+5th nibble of limit) - - .word 0xFFFF # 4 Gb - (0x100000*0x1000 = 4Gb) - .word 0x0000 # base address = 0 - .word 0x9200 # data read / write - .word 0x00CF # granularity = 4096, 386 (+5th nibble of limit) - -# walken modif - -# .word 0x07FF # 8 Mb - limit = 2047 (2048 * 4096 = 8 Mb) -# .word 0x0000 # base address = 0 -# .word 0x9A00 # code read / exec -# .word 0x00C0 # granularity = 4096, 386 - -# .word 0x07FF # 8 Mb - limit = 2047 (2048 * 4096 = 8 Mb) -# .word 0x0000 # base address = 0 -# .word 0x9200 # data read / write -# .word 0x00C0 # granularity = 4096, 386 + .long gdt-0xc0000000 # gdt base = gdt (first SMP CPU) .globl SYMBOL_NAME(trampoline_end) SYMBOL_NAME_LABEL(trampoline_end) diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 905cf5b13..696e37004 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -23,10 +23,12 @@ #include #include #include +#include #include #include #include +#include asmlinkage int system_call(void); asmlinkage void lcall7(void); @@ -121,7 +123,7 @@ static void show_registers(struct pt_regs *regs) unsigned long esp; unsigned short ss; unsigned long *stack, addr, module_start, module_end; - extern char start_kernel, _etext; + extern char _stext, _etext; esp = (unsigned long) ®s->esp; ss = KERNEL_DS; @@ -129,8 +131,8 @@ static void show_registers(struct pt_regs *regs) esp = regs->esp; ss = regs->xss & 0xffff; } - printk("CPU: %d\n", smp_processor_id()); - printk("EIP: %04x:[<%08lx>]\nEFLAGS: %08lx\n", 0xffff & regs->xcs,regs->eip,regs->eflags); + printk("CPU: %d\nEIP: %04x:[<%08lx>]\nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags); printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", regs->eax, regs->ebx, regs->ecx, regs->edx); printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", @@ -138,10 +140,8 @@ static void show_registers(struct pt_regs *regs) printk("ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); store_TR(i); - if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page) - printk("Corrupted stack page\n"); printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\nStack: ", - current->comm, current->pid, 0xffff & i, current->kernel_stack_page); + current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current); stack = (unsigned long *) esp; for(i=0; i < kstack_depth_to_print; i++) { if (((long) stack & 4095) == 0) @@ -166,7 +166,7 @@ static void show_registers(struct pt_regs *regs) * down the cause of the crash will be able to figure * out the call path that was taken. */ - if (((addr >= (unsigned long) &start_kernel) && + if (((addr >= (unsigned long) &_stext) && (addr <= (unsigned long) &_etext)) || ((addr >= module_start) && (addr <= module_end))) { if (i && ((i % 8) == 0)) @@ -181,13 +181,19 @@ static void show_registers(struct pt_regs *regs) printk("\n"); } +spinlock_t die_lock; + /*static*/ void die_if_kernel(const char * str, struct pt_regs * regs, long err) { if ((regs->eflags & VM_MASK) || (3 & regs->xcs) == 3) return; console_verbose(); + spin_lock_irq(&die_lock); printk("%s: %04lx\n", str, err & 0xffff); show_registers(regs); +do { int i=2000000000; while (i) i--; } while (0); +do { int i=2000000000; while (i) i--; } while (0); + spin_unlock_irq(&die_lock); do_exit(SIGSEGV); } @@ -235,18 +241,45 @@ out: unlock_kernel(); } -asmlinkage void do_nmi(struct pt_regs * regs, long error_code) +static void mem_parity_error(unsigned char reason, struct pt_regs * regs) { - printk("NMI\n"); show_registers(regs); -#ifdef CONFIG_SMP_NMI_INVAL - smp_flush_tlb_rcv(); -#else -#ifndef CONFIG_IGNORE_NMI printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); - printk("You probably have a hardware problem with your RAM chips or a\n"); - printk("power saving mode enabled.\n"); -#endif -#endif + printk("You probably have a hardware problem with your RAM chips\n"); +} + +static void io_check_error(unsigned char reason, struct pt_regs * regs) +{ + unsigned long i; + + printk("NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); + + /* Re-enable the IOCK line, wait for a few seconds */ + reason |= 8; + outb(reason, 0x61); + i = 2000; + while (--i) udelay(1000); + reason &= ~8; + outb(reason, 0x61); +} + +static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +{ + printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); + printk("Dazed and confused, but trying to continue\n"); + printk("Do you have a strange power saving mode enabled?\n"); +} + +asmlinkage void do_nmi(struct pt_regs * regs, long error_code) +{ + unsigned char reason = inb(0x61); + + if (reason & 0x80) + mem_parity_error(reason, regs); + if (reason & 0x40) + io_check_error(reason, regs); + if (!(reason & 0xc0)) + unknown_nmi_error(reason, regs); } asmlinkage void do_debug(struct pt_regs * regs, long error_code) @@ -380,15 +413,7 @@ __initfunc(void trap_init(void)) { int i; struct desc_struct * p; - static int smptrap=0; - - if(smptrap) - { - __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); - load_ldt(0); - return; - } - smptrap++; + if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) EISA_bus = 1; set_call_gate(&default_ldt,lcall7); diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index bfba24327..a09fa6419 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -81,8 +81,8 @@ asmlinkage struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs) printk("vm86: could not access userspace vm86_info\n"); do_exit(SIGSEGV); } - current->tss.esp0 = current->saved_kernel_stack; - current->saved_kernel_stack = 0; + current->tss.esp0 = current->tss.saved_esp0; + current->tss.saved_esp0 = 0; ret = KVM86->regs32; unlock_kernel(); return ret; @@ -137,7 +137,7 @@ asmlinkage int sys_vm86old(struct vm86_struct * v86) lock_kernel(); tsk = current; - if (tsk->saved_kernel_stack) + if (tsk->tss.saved_esp0) goto out; tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, @@ -187,7 +187,7 @@ asmlinkage int sys_vm86(unsigned long subfunction, struct vm86plus_struct * v86) /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ ret = -EPERM; - if (tsk->saved_kernel_stack) + if (tsk->tss.saved_esp0) goto out; tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, @@ -247,7 +247,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk * Save old state, set default return value (%eax) to 0 */ info->regs32->eax = 0; - tsk->saved_kernel_stack = tsk->tss.esp0; + tsk->tss.saved_esp0 = tsk->tss.esp0; tsk->tss.esp0 = (unsigned long) &info->VM86_TSS_ESP0; tsk->tss.screen_bitmap = info->screen_bitmap; @@ -601,11 +601,17 @@ static inline void free_vm86_irq(int irqnumber) static inline int task_valid(struct task_struct *tsk) { struct task_struct *p; + int ret = 0; + read_lock(&tasklist_lock); for_each_task(p) { - if ((p == tsk) && (p->sig)) return 1; + if ((p == tsk) && (p->sig)) { + ret = 1; + break; + } } - return 0; + read_unlock(&tasklist_lock); + return ret; } static inline void handle_irq_zombies(void) -- cgit v1.2.3