diff options
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r-- | arch/i386/kernel/Makefile | 9 | ||||
-rw-r--r-- | arch/i386/kernel/bios32.c | 448 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 33 | ||||
-rw-r--r-- | arch/i386/kernel/head.S | 31 | ||||
-rw-r--r-- | arch/i386/kernel/i386_ksyms.c | 10 | ||||
-rw-r--r-- | arch/i386/kernel/io_apic.c | 127 | ||||
-rw-r--r-- | arch/i386/kernel/ioport.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/irq.c | 415 | ||||
-rw-r--r-- | arch/i386/kernel/irq.h | 13 | ||||
-rw-r--r-- | arch/i386/kernel/ldt.c | 26 | ||||
-rw-r--r-- | arch/i386/kernel/mca.c | 30 | ||||
-rw-r--r-- | arch/i386/kernel/mtrr.c | 1229 | ||||
-rw-r--r-- | arch/i386/kernel/process.c | 85 | ||||
-rw-r--r-- | arch/i386/kernel/signal.c | 10 | ||||
-rw-r--r-- | arch/i386/kernel/smp.c | 70 | ||||
-rw-r--r-- | arch/i386/kernel/traps.c | 6 | ||||
-rw-r--r-- | arch/i386/kernel/vm86.c | 2 |
17 files changed, 2013 insertions, 533 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index ce1e6652d..6f63d2c97 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -21,6 +21,7 @@ O_TARGET := kernel.o O_OBJS := process.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o OX_OBJS := i386_ksyms.o +MX_OBJS := ifdef CONFIG_PCI O_OBJS += bios32.o @@ -30,6 +31,14 @@ ifdef CONFIG_MCA O_OBJS += mca.o endif +ifeq ($(CONFIG_MTRR),y) +OX_OBJS += mtrr.o +else + ifeq ($(CONFIG_MTRR),m) + MX_OBJS += mtrr.o + endif +endif + ifdef SMP diff --git a/arch/i386/kernel/bios32.c b/arch/i386/kernel/bios32.c index 7e865c417..f2955918a 100644 --- a/arch/i386/kernel/bios32.c +++ b/arch/i386/kernel/bios32.c @@ -1,7 +1,7 @@ /* - * bios32.c - BIOS32, PCI BIOS functions. + * bios32.c - Low-Level PCI Access * - * $Id: bios32.c,v 1.5 1997/12/02 01:48:00 ralf Exp $ + * $Id: bios32.c,v 1.29 1998/04/17 16:31:15 mj Exp $ * * Sponsored by * iX Multiuser Multitasking Magazine @@ -64,14 +64,16 @@ * * Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts * and cleaned it up... Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * + * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj] */ #include <linux/config.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/bios32.h> #include <linux/pci.h> #include <linux/init.h> +#include <linux/ioport.h> #include <asm/page.h> #include <asm/segment.h> @@ -85,14 +87,20 @@ #include "irq.h" +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + /* * Generic PCI access -- indirect calls according to detected HW. */ struct pci_access { int pci_present; - int (*find_device)(unsigned short, unsigned short, unsigned short, unsigned char *, unsigned char *); - int (*find_class)(unsigned int, unsigned short, unsigned char *, unsigned char *); int (*read_config_byte)(unsigned char, unsigned char, unsigned char, unsigned char *); int (*read_config_word)(unsigned char, unsigned char, unsigned char, unsigned short *); int (*read_config_dword)(unsigned char, unsigned char, unsigned char, unsigned int *); @@ -108,8 +116,6 @@ static int pci_stub(void) static struct pci_access pci_access_none = { 0, /* No PCI present */ - (void *) pci_stub, /* No functions implemented */ - (void *) pci_stub, (void *) pci_stub, (void *) pci_stub, (void *) pci_stub, @@ -125,54 +131,10 @@ int pcibios_present(void) return access_pci->pci_present; } -int pcibios_find_class (unsigned int class_code, unsigned short index, - unsigned char *bus, unsigned char *device_fn) -{ - return access_pci->find_class(class_code, index, bus, device_fn); -} - -int pcibios_find_device (unsigned short vendor, unsigned short device_id, - unsigned short index, unsigned char *bus, unsigned char *device_fn) -{ - return access_pci->find_device(vendor, device_id, index, bus, device_fn); -} - int pcibios_read_config_byte (unsigned char bus, unsigned char device_fn, unsigned char where, unsigned char *value) { - int res; - - res = access_pci->read_config_byte(bus, device_fn, where, value); - -#ifdef __SMP__ -/* - * IOAPICs can take PCI IRQs directly, lets first check the mptable: - */ - if (where == PCI_INTERRUPT_LINE) { - int irq; - char pin; - - /* - * get the PCI IRQ INT _physical pin_ for this device - */ - access_pci->read_config_byte(bus, device_fn, - PCI_INTERRUPT_PIN, &pin); - /* - * subtle, PCI pins are numbered starting from 1 ... - */ - pin--; - - irq = IO_APIC_get_PCI_irq_vector (bus,PCI_SLOT(device_fn),pin); - if (irq != -1) - *value = (unsigned char) irq; - - printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n", - bus,PCI_SLOT(device_fn), pin, irq); - - } -#endif - - return res; + return access_pci->read_config_byte(bus, device_fn, where, value); } int pcibios_read_config_word (unsigned char bus, @@ -205,60 +167,19 @@ int pcibios_write_config_dword (unsigned char bus, return access_pci->write_config_dword(bus, device_fn, where, value); } -/* - * Direct access to PCI hardware... - */ - -/* - * Given the vendor and device ids, find the n'th instance of that device - * in the system. - */ +#define PCI_PROBE_BIOS 1 +#define PCI_PROBE_CONF1 2 +#define PCI_PROBE_CONF2 4 +#define PCI_NO_SORT 0x100 +#define PCI_BIOS_SORT 0x200 -#ifdef CONFIG_PCI_DIRECT - -static int pci_direct_find_device (unsigned short vendor, unsigned short device_id, - unsigned short index, unsigned char *bus, - unsigned char *devfn) -{ - unsigned int curr = 0; - struct pci_dev *dev; - - for (dev = pci_devices; dev; dev = dev->next) { - if (dev->vendor == vendor && dev->device == device_id) { - if (curr == index) { - *devfn = dev->devfn; - *bus = dev->bus->number; - return PCIBIOS_SUCCESSFUL; - } - ++curr; - } - } - return PCIBIOS_DEVICE_NOT_FOUND; -} +static unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2; /* - * Given the class, find the n'th instance of that device - * in the system. + * Direct access to PCI hardware... */ -static int pci_direct_find_class (unsigned int class_code, unsigned short index, - unsigned char *bus, unsigned char *devfn) -{ - unsigned int curr = 0; - struct pci_dev *dev; - - for (dev = pci_devices; dev; dev = dev->next) { - if (dev->class == class_code) { - if (curr == index) { - *devfn = dev->devfn; - *bus = dev->bus->number; - return PCIBIOS_SUCCESSFUL; - } - ++curr; - } - } - return PCIBIOS_DEVICE_NOT_FOUND; -} +#ifdef CONFIG_PCI_DIRECT /* * Functions for accessing PCI configuration space with type 1 accesses @@ -346,8 +267,6 @@ static int pci_conf1_write_config_dword (unsigned char bus, unsigned char device static struct pci_access pci_direct_conf1 = { 1, - pci_direct_find_device, - pci_direct_find_class, pci_conf1_read_config_byte, pci_conf1_read_config_word, pci_conf1_read_config_dword, @@ -458,8 +377,6 @@ static int pci_conf2_write_config_dword (unsigned char bus, unsigned char device static struct pci_access pci_direct_conf2 = { 1, - pci_direct_find_device, - pci_direct_find_class, pci_conf2_read_config_byte, pci_conf2_read_config_word, pci_conf2_read_config_dword, @@ -470,39 +387,43 @@ static struct pci_access pci_direct_conf2 = { __initfunc(static struct pci_access *pci_check_direct(void)) { - unsigned int tmp; - unsigned long flags; + unsigned int tmp; + unsigned long flags; - save_flags(flags); cli(); + save_flags(flags); cli(); + + /* + * Check if configuration type 1 works. + */ + if (pci_probe & PCI_PROBE_CONF1) { + outb (0x01, 0xCFB); + tmp = inl (0xCF8); + outl (0x80000000, 0xCF8); + if (inl (0xCF8) == 0x80000000) { + outl (tmp, 0xCF8); + restore_flags(flags); + printk("PCI: Using configuration type 1\n"); + return &pci_direct_conf1; + } + outl (tmp, 0xCF8); + } + + /* + * Check if configuration type 2 works. + */ + if (pci_probe & PCI_PROBE_CONF2) { + outb (0x00, 0xCFB); + outb (0x00, 0xCF8); + outb (0x00, 0xCFA); + if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00) { + restore_flags(flags); + printk("PCI: Using configuration type 2\n"); + return &pci_direct_conf2; + } + } - /* - * Check if configuration type 1 works. - */ - outb (0x01, 0xCFB); - tmp = inl (0xCF8); - outl (0x80000000, 0xCF8); - if (inl (0xCF8) == 0x80000000) { - outl (tmp, 0xCF8); - restore_flags(flags); - printk("PCI: Using configuration type 1\n"); - return &pci_direct_conf1; - } - outl (tmp, 0xCF8); - - /* - * Check if configuration type 2 works. - */ - outb (0x00, 0xCFB); - outb (0x00, 0xCF8); - outb (0x00, 0xCFA); - if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00) { restore_flags(flags); - printk("PCI: Using configuration type 2\n"); - return &pci_direct_conf2; - } - restore_flags(flags); - printk("PCI: PCI hardware not found (i.e., not present or not supported).\n"); - return NULL; + return NULL; } #endif @@ -599,7 +520,7 @@ static unsigned long bios32_service(unsigned long service) printk("bios32_service(0x%lx): not present\n", service); return 0; default: /* Shouldn't happen */ - printk("bios32_service(0x%lx): returned 0x%x, mail drew@colorado.edu\n", + printk("bios32_service(0x%lx): returned 0x%x, report to <mj@ucw.cz>.\n", service, return_code); return 0; } @@ -642,7 +563,7 @@ __initfunc(static int check_pcibios(void)) if (present_status || (signature != PCI_SIGNATURE)) { printk ("PCI: %s: BIOS32 Service Directory says PCI BIOS is present,\n" " but PCI_BIOS_PRESENT subfunction fails with present status of 0x%x\n" - " and signature of 0x%08lx (%c%c%c%c). Mail drew@Colorado.EDU\n", + " and signature of 0x%08lx (%c%c%c%c). Report to <mj@ucw.cz>.\n", (signature == PCI_SIGNATURE) ? "WARNING" : "ERROR", present_status, signature, (char) (signature >> 0), (char) (signature >> 8), @@ -660,6 +581,8 @@ __initfunc(static int check_pcibios(void)) return 0; } +#if 0 /* Not used */ + static int pci_bios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *device_fn) { @@ -684,8 +607,10 @@ static int pci_bios_find_class (unsigned int class_code, unsigned short index, return (int) (ret & 0xff00) >> 8; } -static int pci_bios_find_device (unsigned short vendor, unsigned short device_id, - unsigned short index, unsigned char *bus, unsigned char *device_fn) +#endif + +__initfunc(static int pci_bios_find_device (unsigned short vendor, unsigned short device_id, + unsigned short index, unsigned char *bus, unsigned char *device_fn)) { unsigned short bx; unsigned short ret; @@ -847,8 +772,6 @@ static int pci_bios_write_config_dword (unsigned char bus, static struct pci_access pci_bios_access = { 1, - pci_bios_find_device, - pci_bios_find_class, pci_bios_read_config_byte, pci_bios_read_config_word, pci_bios_read_config_dword, @@ -887,21 +810,17 @@ __initfunc(static struct pci_access *pci_find_bios(void)) if (sum != 0) continue; if (check->fields.revision != 0) { - printk("PCI: unsupported BIOS32 revision %d at 0x%p, mail drew@colorado.edu\n", + printk("PCI: unsupported BIOS32 revision %d at 0x%p, report to <mj@ucw.cz>\n", check->fields.revision, check); continue; } - printk ("PCI: BIOS32 Service Directory structure at 0x%p\n", check); + DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check); if (check->fields.entry >= 0x100000) { -#ifdef CONFIG_PCI_DIRECT - printk("PCI: BIOS32 entry in high memory, trying direct PCI access.\n"); - return pci_check_direct(); -#else - printk("PCI: BIOS32 entry in high memory, cannot use.\n"); -#endif + printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check); + return NULL; } else { bios32_entry = check->fields.entry; - printk ("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); + DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); bios32_indirect.address = bios32_entry + PAGE_OFFSET; if (check_pcibios()) return &pci_bios_access; @@ -912,36 +831,237 @@ __initfunc(static struct pci_access *pci_find_bios(void)) return NULL; } +/* + * Sort the device list according to PCI BIOS. + */ + +__initfunc(void pcibios_sort(void)) +{ + struct pci_dev *dev = pci_devices; + struct pci_dev **last = &pci_devices; + struct pci_dev *d, **dd, *e; + int idx; + unsigned char bus, devfn; + + DBG("PCI: Sorting device list...\n"); + while ((e = dev)) { + idx = 0; + while (pci_bios_find_device(e->vendor, e->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) { + idx++; + for(dd=&dev; (d = *dd); dd = &d->next) { + if (d->bus->number == bus && d->devfn == devfn) { + *dd = d->next; + *last = d; + last = &d->next; + break; + } + } + if (!d) + printk("PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn); + } + if (!idx) { + printk("PCI: Device %02x:%02x not found by BIOS\n", + dev->bus->number, dev->devfn); + d = dev; + dev = dev->next; + *last = d; + last = &d->next; + } + } + *last = NULL; +} + #endif /* - * No fixup function used. + * Several BIOS'es forget to assign addresses to I/O ranges. + * We try to fix it here, expecting there are free addresses + * starting with 0x5800. Ugly, but until we come with better + * resource management, it's the only simple solution. */ -__initfunc(unsigned long pcibios_fixup(unsigned long mem_start, unsigned long mem_end)) +static int pci_last_io_addr __initdata = 0x5800; + +__initfunc(void pcibios_fixup_io_addr(struct pci_dev *dev, int idx)) { - return mem_start; + unsigned short cmd; + unsigned int reg = PCI_BASE_ADDRESS_0 + 4*idx; + unsigned int size, addr, try; + unsigned int bus = dev->bus->number; + unsigned int devfn = dev->devfn; + + if (!pci_last_io_addr) { + printk("PCI: Unassigned I/O space for %02x:%02x\n", bus, devfn); + return; + } + pcibios_read_config_word(bus, devfn, PCI_COMMAND, &cmd); + pcibios_write_config_word(bus, devfn, PCI_COMMAND, cmd & ~PCI_COMMAND_IO); + pcibios_write_config_dword(bus, devfn, reg, ~0); + pcibios_read_config_dword(bus, devfn, reg, &size); + size = (~(size & PCI_BASE_ADDRESS_IO_MASK) & 0xffff) + 1; + addr = 0; + if (!size || size > 0x100) + printk("PCI: Unable to handle I/O allocation for %02x:%02x (%04x), tell <mj@ucw.cz>\n", bus, devfn, size); + else { + do { + addr = (pci_last_io_addr + size - 1) & ~(size-1); + pci_last_io_addr = addr + size; + } while (check_region(addr, size)); + printk("PCI: Assigning I/O space %04x-%04x to device %02x:%02x\n", addr, addr+size-1, bus, devfn); + pcibios_write_config_dword(bus, devfn, reg, addr | PCI_BASE_ADDRESS_SPACE_IO); + pcibios_read_config_dword(bus, devfn, reg, &try); + if ((try & PCI_BASE_ADDRESS_IO_MASK) != addr) { + addr = 0; + printk("PCI: Address setup failed, got %04x\n", try); + } else + dev->base_address[idx] = try; + } + if (!addr) { + pcibios_write_config_dword(bus, devfn, reg, 0); + dev->base_address[idx] = 0; + } + pcibios_write_config_word(bus, devfn, PCI_COMMAND, cmd); +} + +/* + * Arch-dependent fixups. We need to fix here base addresses, I/O + * and memory enables and IRQ's as the PCI BIOS'es are buggy as hell. + */ + +__initfunc(void pcibios_fixup(void)) +{ + struct pci_dev *dev; + int i, has_io, has_mem; + unsigned short cmd; + + for(dev = pci_devices; dev; dev=dev->next) { + /* + * There are buggy BIOSes that forget to enable I/O and memory + * access to PCI devices. We try to fix this, but we need to + * be sure that the BIOS didn't forget to assign an address + * to the device. [mj] + */ + has_io = has_mem = 0; + for(i=0; i<6; i++) { + unsigned long a = dev->base_address[i]; + if (a & PCI_BASE_ADDRESS_SPACE_IO) { + has_io = 1; + a &= PCI_BASE_ADDRESS_IO_MASK; + if (!a || a == PCI_BASE_ADDRESS_IO_MASK) + pcibios_fixup_io_addr(dev, i); + } else if (a & PCI_BASE_ADDRESS_MEM_MASK) + has_mem = 1; + } + pci_read_config_word(dev, PCI_COMMAND, &cmd); + if (has_io && !(cmd & PCI_COMMAND_IO)) { + printk("PCI: Enabling I/O for device %02x:%02x\n", + dev->bus->number, dev->devfn); + cmd |= PCI_COMMAND_IO; + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + if (has_mem && !(cmd & PCI_COMMAND_MEMORY)) { + printk("PCI: Enabling memory for device %02x:%02x\n", + dev->bus->number, dev->devfn); + cmd |= PCI_COMMAND_MEMORY; + pci_write_config_word(dev, PCI_COMMAND, cmd); + } +#ifdef __SMP__ + /* + * Recalculate IRQ numbers if we use the I/O APIC + */ + { + int irq; + unsigned char pin; + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (pin) { + pin--; /* interrupt pins are numbered starting from 1 */ + irq = IO_APIC_get_PCI_irq_vector (dev->bus->number, PCI_SLOT(dev->devfn), pin); + if (irq >= 0) { + printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n", + dev->bus->number, PCI_SLOT(dev->devfn), pin, irq); + dev->irq = irq; + } + } + } +#endif + /* + * Fix out-of-range IRQ numbers and report bogus IRQ. + */ + if (dev->irq >= NR_IRQS) + dev->irq = 0; + } + +#ifdef CONFIG_PCI_BIOS + if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT)) + pcibios_sort(); +#endif } /* - * Initialization. Try all known PCI access methods. + * Initialization. Try all known PCI access methods. Note that we support + * using both PCI BIOS and direct access: in such cases, we use I/O ports + * to access config space, but we still keep BIOS order of cards to be + * compatible with 2.0.X. This should go away in 2.3. */ -__initfunc(unsigned long pcibios_init(unsigned long memory_start, unsigned long memory_end)) +__initfunc(void pcibios_init(void)) { - struct pci_access *a = NULL; + struct pci_access *bios = NULL; + struct pci_access *dir = NULL; #ifdef CONFIG_PCI_BIOS - a = pci_find_bios(); -#else + if ((pci_probe & PCI_PROBE_BIOS) && ((bios = pci_find_bios()))) + pci_probe |= PCI_BIOS_SORT; +#endif #ifdef CONFIG_PCI_DIRECT - a = pci_check_direct(); -#else -#error "You need to set CONFIG_PCI_BIOS or CONFIG_PCI_DIRECT if you want PCI support." + if (pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2)) + dir = pci_check_direct(); #endif + if (dir) + access_pci = dir; + else if (bios) + access_pci = bios; +} + +#if !defined(CONFIG_PCI_BIOS) && !defined(CONFIG_PCI_DIRECT) +#error PCI configured with neither PCI BIOS or PCI direct access support. #endif - if (a) - access_pci = a; - return memory_start; +__initfunc(char *pcibios_setup(char *str)) +{ + if (!strcmp(str, "off")) { + pci_probe = 0; + return NULL; + } else if (!strncmp(str, "io=", 3)) { + char *p; + unsigned int x = simple_strtoul(str+3, &p, 16); + if (p && *p) + return str; + pci_last_io_addr = x; + return NULL; + } +#ifdef CONFIG_PCI_BIOS + else if (!strcmp(str, "bios")) { + pci_probe = PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "nobios")) { + pci_probe &= ~PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "nosort")) { + pci_probe |= PCI_NO_SORT; + return NULL; + } +#endif +#ifdef CONFIG_PCI_DIRECT + else if (!strcmp(str, "conf1")) { + pci_probe = PCI_PROBE_CONF1; + return NULL; + } + else if (!strcmp(str, "conf2")) { + pci_probe = PCI_PROBE_CONF2; + return NULL; + } +#endif + return str; } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 14b82b45b..b6541005f 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -81,8 +81,8 @@ ENOSYS = 38 #define SAVE_ALL \ cld; \ - push %es; \ - push %ds; \ + pushl %es; \ + pushl %ds; \ pushl %eax; \ pushl %ebp; \ pushl %edi; \ @@ -91,8 +91,8 @@ ENOSYS = 38 pushl %ecx; \ pushl %ebx; \ movl $(__KERNEL_DS),%edx; \ - mov %dx,%ds; \ - mov %dx,%es; + movl %dx,%ds; \ + movl %dx,%es; #define RESTORE_ALL \ popl %ebx; \ @@ -102,8 +102,8 @@ ENOSYS = 38 popl %edi; \ popl %ebp; \ popl %eax; \ - pop %ds; \ - pop %es; \ + popl %ds; \ + popl %es; \ addl $4,%esp; \ iret @@ -155,7 +155,7 @@ ENTRY(system_call) jae badsys testb $0x20,flags(%ebx) # PF_TRACESYS jne tracesys - call SYMBOL_NAME(sys_call_table)(,%eax,4) + call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value ALIGN .globl ret_from_sys_call @@ -193,7 +193,7 @@ tracesys: movl $-ENOSYS,EAX(%esp) call SYMBOL_NAME(syscall_trace) movl ORIG_EAX(%esp),%eax - call SYMBOL_NAME(sys_call_table)(,%eax,4) + call *SYMBOL_NAME(sys_call_table)(,%eax,4) movl %eax,EAX(%esp) # save the return value call SYMBOL_NAME(syscall_trace) jmp ret_from_sys_call @@ -231,7 +231,7 @@ ENTRY(divide_error) pushl $ SYMBOL_NAME(do_divide_error) ALIGN error_code: - push %ds + pushl %ds pushl %eax xorl %eax,%eax pushl %ebp @@ -241,17 +241,27 @@ error_code: decl %eax # eax = -1 pushl %ecx pushl %ebx +#if 1 xorl %ecx,%ecx # zero ecx cld mov %es,%cx # get the lower order bits of es +#else + cld +# Some older processors leave the top 16 bits of the 32 bit destination +# register undefined, rather than zeroed in the following instruction. +# This won't matter when restoring or loading a segment register from the +# stack. It may be a problem if any code reads the full 32 bit value. +# dosemu? kernel? Would somebody like to verify that this way is really OK? + movl %es,%cx +#endif xchgl %eax, ORIG_EAX(%esp) # orig_eax (get the error code. ) movl %esp,%edx xchgl %ecx, ES(%esp) # get the address and save es. pushl %eax # push the error code pushl %edx movl $(__KERNEL_DS),%edx - mov %dx,%ds - mov %dx,%es + movl %dx,%ds + movl %dx,%es GET_CURRENT(%ebx) call *%ecx addl $8,%esp @@ -533,6 +543,7 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_pread) /* 180 */ .long SYMBOL_NAME(sys_pwrite) .long SYMBOL_NAME(sys_chown) + .long SYMBOL_NAME(sys_getcwd) .rept NR_syscalls-182 .long SYMBOL_NAME(sys_ni_syscall) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 048921838..86031f37f 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -45,10 +45,10 @@ startup_32: */ cld movl $(__KERNEL_DS),%eax - mov %ax,%ds - mov %ax,%es - mov %ax,%fs - mov %ax,%gs + movl %ax,%ds + movl %ax,%es + movl %ax,%fs + movl %ax,%gs #ifdef __SMP__ orw %bx,%bx jz 1f @@ -321,10 +321,10 @@ is386: pushl %ecx # restore original EFLAGS lidt idt_descr ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers - mov %ax,%ds # after changing gdt. - mov %ax,%es - mov %ax,%fs - mov %ax,%gs + movl %ax,%ds # after changing gdt. + movl %ax,%es + movl %ax,%fs + movl %ax,%gs #ifdef __SMP__ movl $(__KERNEL_DS), %eax mov %ax,%ss # Reload the stack pointer (segment only) @@ -404,16 +404,16 @@ ignore_int: pushl %eax pushl %ecx pushl %edx - push %es - push %ds + pushl %es + pushl %ds movl $(__KERNEL_DS),%eax - mov %ax,%ds - mov %ax,%es + movl %ax,%ds + movl %ax,%es pushl $int_msg call SYMBOL_NAME(printk) popl %eax - pop %ds - pop %es + popl %ds + popl %es popl %edx popl %ecx popl %eax @@ -619,9 +619,6 @@ ENTRY(idt_table) .fill 256,8,0 # idt is uninitialized /* - * This gdt setup gives the kernel a CONFIG_MAX_MEMSIZE sized address space at - * virtual address PAGE_OFFSET. - * * This contains up to 8192 quadwords depending on NR_TASKS - 64kB of * gdt entries. Ugh. * diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index d2837d648..66dec5fed 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -8,7 +8,6 @@ #include <linux/in6.h> #include <linux/interrupt.h> #include <linux/smp_lock.h> -#include <linux/pci.h> #include <asm/semaphore.h> #include <asm/processor.h> @@ -64,13 +63,14 @@ EXPORT_SYMBOL(__generic_copy_to_user); EXPORT_SYMBOL(strlen_user); #ifdef __SMP__ -EXPORT_SYMBOL(apic_reg); /* Needed internally for the I386 inlines */ EXPORT_SYMBOL(cpu_data); EXPORT_SYMBOL_NOVERS(kernel_flag); EXPORT_SYMBOL_NOVERS(active_kernel_processor); EXPORT_SYMBOL(smp_invalidate_needed); EXPORT_SYMBOL_NOVERS(__lock_kernel); EXPORT_SYMBOL(lk_lockmsg); +EXPORT_SYMBOL(__cpu_logical_map); +EXPORT_SYMBOL(smp_num_cpus); /* Global SMP irq stuff */ EXPORT_SYMBOL(synchronize_irq); @@ -82,6 +82,8 @@ EXPORT_SYMBOL(__global_cli); EXPORT_SYMBOL(__global_sti); EXPORT_SYMBOL(__global_save_flags); EXPORT_SYMBOL(__global_restore_flags); +EXPORT_SYMBOL(smp_message_pass); +EXPORT_SYMBOL(mtrr_hook); #endif #ifdef CONFIG_MCA @@ -97,7 +99,3 @@ EXPORT_SYMBOL(mca_set_adapter_procfn); EXPORT_SYMBOL(mca_isenabled); EXPORT_SYMBOL(mca_isadapter); #endif - -#if CONFIG_PCI -EXPORT_SYMBOL(pci_devices); -#endif diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 219e7f853..6e422614e 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -28,13 +28,11 @@ #include "irq.h" -#define IO_APIC_BASE 0xfec00000 - /* * volatile is justified in this case, it might change * spontaneously, GCC should not cache it */ -volatile unsigned int * io_apic_reg = NULL; +#define IO_APIC_BASE ((volatile int *)0xfec00000) /* * The structure of the IO-APIC: @@ -96,17 +94,19 @@ int nr_ioapic_registers = 0; /* # of IRQ routing registers */ int mp_irq_entries = 0; /* # of MP IRQ source entries */ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* MP IRQ source entries */ +int mpc_default_type = 0; /* non-0 if default (table-less) + MP configuration */ unsigned int io_apic_read (unsigned int reg) { - *io_apic_reg = reg; - return *(io_apic_reg+4); + *IO_APIC_BASE = reg; + return *(IO_APIC_BASE+4); } void io_apic_write (unsigned int reg, unsigned int value) { - *io_apic_reg = reg; - *(io_apic_reg+4) = value; + *IO_APIC_BASE = reg; + *(IO_APIC_BASE+4) = value; } void enable_IO_APIC_irq (unsigned int irq) @@ -256,7 +256,7 @@ void setup_IO_APIC_irqs (void) /* * PCI IRQ redirection. Yes, limits are hardcoded. */ - if ((i>=16) && (i<=19)) { + if ((i>=16) && (i<=23)) { if (pirq_entries[i-16] != -1) { if (!pirq_entries[i-16]) { printk("disabling PIRQ%d\n", i-16); @@ -516,16 +516,16 @@ void print_IO_APIC (void) static void init_sym_mode (void) { printk("enabling Symmetric IO mode ... "); - outb (0x70, 0x22); - outb (0x01, 0x23); + outb_p (0x70, 0x22); + outb_p (0x01, 0x23); printk("...done.\n"); } void init_pic_mode (void) { printk("disabling Symmetric IO mode ... "); - outb (0x70, 0x22); - outb (0x00, 0x23); + outb_p (0x70, 0x22); + outb_p (0x00, 0x23); printk("...done.\n"); } @@ -579,17 +579,85 @@ static int ioapic_blacklisted (void) return in_ioapic_list(ioapic_blacklist); } +static void setup_ioapic_id (void) +{ + struct IO_APIC_reg_00 reg_00; -void setup_IO_APIC (void) + /* + * 'default' mptable configurations mean a hardwired setup, + * 2 CPUs, 16 APIC registers. IO-APIC ID is usually set to 0, + * setting it to ID 2 should be fine. + */ + + /* + * Sanity check, is ID 2 really free? Every APIC in the + * system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (cpu_present_map & (1<<0x2)) + panic("APIC ID 2 already used"); + + /* + * set the ID + */ + *(int *)®_00 = io_apic_read(0); + printk("... changing IO-APIC physical APIC ID to 2 ...\n"); + reg_00.ID = 0x2; + io_apic_write(0, *(int *)®_00); + + /* + * Sanity check + */ + *(int *)®_00 = io_apic_read(0); + if (reg_00.ID != 0x2) + panic("could not set ID"); +} + +static void construct_default_ISA_mptable (void) { - int i; + int i, pos=0; + + for (i=0; i<16; i++) { + if (!IO_APIC_IRQ(i)) + continue; + + mp_irqs[pos].mpc_irqtype = 0; + mp_irqs[pos].mpc_irqflag = 0; + mp_irqs[pos].mpc_srcbus = 0; + mp_irqs[pos].mpc_srcbusirq = i; + mp_irqs[pos].mpc_dstapic = 0; + mp_irqs[pos].mpc_dstirq = i; + pos++; + } + mp_irq_entries = pos; + mp_bus_id_to_type[0] = MP_BUS_ISA; + /* - * Map the IO APIC into kernel space + * MP specification 1.4 defines some extra rules for default + * configurations, fix them up here: */ + + switch (mpc_default_type) + { + case 2: + break; + default: + /* + * pin 2 is IRQ0: + */ + mp_irqs[0].mpc_dstirq = 2; + } - printk("mapping IO APIC from standard address.\n"); - io_apic_reg = ioremap_nocache(IO_APIC_BASE,4096); - printk("new virtual address: %p.\n",io_apic_reg); + setup_ioapic_id(); +} + +void setup_IO_APIC (void) +{ + int i; + + if (!pirqs_enabled) + for (i=0; i<MAX_PIRQS; i++) + pirq_entries[i]=-1; init_sym_mode(); { @@ -605,12 +673,6 @@ void setup_IO_APIC (void) for (i=0; i<nr_ioapic_registers; i++) clear_IO_APIC_irq (i); -#if DEBUG_1 - for (i=0; i<16; i++) - if (IO_APIC_IRQ(i)) - setup_IO_APIC_irq_ISA_default (i); -#endif - /* * the following IO-APIC's can be enabled: * @@ -634,7 +696,18 @@ void setup_IO_APIC (void) io_apic_irqs = 0; } + /* + * If there are no explicit mp irq entries: it's either one of the + * default configuration types or we are broken. In both cases it's + * fine to set up most of the low 16 IOAPIC pins to ISA defaults. + */ + if (!mp_irq_entries) { + printk("no explicit IRQ entries, using default mptable\n"); + construct_default_ISA_mptable(); + } + init_IO_APIC_traps(); + setup_IO_APIC_irqs (); if (!timer_irq_works ()) { @@ -644,9 +717,9 @@ void setup_IO_APIC (void) printk("..MP-BIOS bug: i8254 timer not connected to IO-APIC\n"); printk("..falling back to 8259A-based timer interrupt\n"); } - - printk("nr of MP irq sources: %d.\n", mp_irq_entries); - printk("nr of IOAPIC registers: %d.\n", nr_ioapic_registers); + + printk("nr of MP irq sources: %d.\n", mp_irq_entries); + printk("nr of IOAPIC registers: %d.\n", nr_ioapic_registers); print_IO_APIC(); } diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index 44fd26530..19587312a 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c @@ -76,8 +76,6 @@ asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) return 0; } -unsigned int *stack; - /* * sys_iopl has to be used when you want to access the IO ports * beyond the 0x3ff range: to get the full 65536 ports bitmapped diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 95ce9fb14..2b8b86cc7 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -68,10 +68,6 @@ static unsigned int cached_irq_mask = (1<<NR_IRQS)-1; spinlock_t irq_controller_lock; -static unsigned int irq_events [NR_IRQS] = { -1, }; -static int disabled_irq [NR_IRQS] = { 0, }; -static int ipi_pending [NR_IRQS] = { 0, }; - /* * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) * boards the timer interrupt and sometimes the keyboard interrupt is @@ -124,11 +120,34 @@ static struct hw_interrupt_type ioapic_irq_type = { }; #endif -struct hw_interrupt_type *irq_handles[NR_IRQS] = -{ - [0 ... 15] = &i8259A_irq_type /* standard ISA IRQs */ +/* + * Status: reason for being disabled: somebody has + * done a "disable_irq()" or we must not re-enter the + * already executing irq.. + */ +#define IRQ_INPROGRESS 1 +#define IRQ_DISABLED 2 + +/* + * This is the "IRQ descriptor", which contains various information + * about the irq, including what kind of hardware handling it has, + * whether it is disabled etc etc. + * + * Pad this out to 32 bytes for cache and indexing reasons. + */ +typedef struct { + unsigned int status; /* IRQ status - IRQ_INPROGRESS, IRQ_DISABLED */ + unsigned int events; /* Do we have any pending events? */ + unsigned int ipi; /* Have we sent off the pending IPI? */ + struct hw_interrupt_type *handler; /* handle/enable/disable functions */ + struct irqaction *action; /* IRQ action list */ + unsigned int unused[3]; +} irq_desc_t; + +irq_desc_t irq_desc[NR_IRQS] = { + [0 ... 15] = { 0, 0, 0, &i8259A_irq_type, }, /* standard ISA IRQs */ #ifdef __SMP__ - , [16 ... NR_IRQS-1] = &ioapic_irq_type /* 'high' PCI IRQs */ + [16 ... 23] = { 0, 0, 0, &ioapic_irq_type, }, /* 'high' PCI IRQs */ #endif }; @@ -175,6 +194,7 @@ void set_8259A_irq_mask(unsigned int irq) void unmask_generic_irq(unsigned int irq) { + irq_desc[irq].status = 0; if (IO_APIC_IRQ(irq)) enable_IO_APIC_irq(irq); else { @@ -241,6 +261,7 @@ BUILD_IRQ(23) BUILD_SMP_INTERRUPT(reschedule_interrupt) BUILD_SMP_INTERRUPT(invalidate_interrupt) BUILD_SMP_INTERRUPT(stop_cpu_interrupt) +BUILD_SMP_INTERRUPT(mtrr_interrupt) /* * every pentium local APIC has two 'local interrupts', with a @@ -297,17 +318,6 @@ static struct irqaction irq13 = { math_error_irq, 0, 0, "fpu", NULL, NULL }; */ static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; -static struct irqaction *irq_action[NR_IRQS] = { - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL -#ifdef __SMP__ - ,NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL -#endif -}; - int get_irq_list(char *buf) { int i, j; @@ -320,7 +330,7 @@ int get_irq_list(char *buf) *p++ = '\n'; for (i = 0 ; i < NR_IRQS ; i++) { - action = irq_action[i]; + action = irq_desc[i].action; if (!action) continue; p += sprintf(p, "%3d: ",i); @@ -335,7 +345,7 @@ int get_irq_list(char *buf) if (IO_APIC_IRQ(i)) p += sprintf(p, " IO-APIC "); else - p += sprintf(p, " XT PIC "); + p += sprintf(p, " XT-PIC "); p += sprintf(p, " %s", action->name); for (action=action->next; action; action = action->next) { @@ -535,20 +545,31 @@ static inline void get_irqlock(int cpu) global_irq_holder = cpu; } +#define EFLAGS_IF_SHIFT 9 + /* * A global "cli()" while in an interrupt context * turns into just a local cli(). Interrupts * should use spinlocks for the (very unlikely) * case that they ever want to protect against * each other. + * + * If we already have local interrupts disabled, + * this will not turn a local disable into a + * global one (problems with spinlocks: this makes + * save_flags+cli+sti usable inside a spinlock). */ void __global_cli(void) { - int cpu = smp_processor_id(); + unsigned int flags; - __cli(); - if (!local_irq_count[cpu]) - get_irqlock(cpu); + __save_flags(flags); + if (flags & (1 << EFLAGS_IF_SHIFT)) { + int cpu = smp_processor_id(); + __cli(); + if (!local_irq_count[cpu]) + get_irqlock(cpu); + } } void __global_sti(void) @@ -560,33 +581,53 @@ void __global_sti(void) __sti(); } +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ unsigned long __global_save_flags(void) { - if (!local_irq_count[smp_processor_id()]) - return global_irq_holder == (unsigned char) smp_processor_id(); - else { - unsigned long x; - __save_flags(x); - return x; + int retval; + int local_enabled; + unsigned long flags; + + __save_flags(flags); + local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1; + /* default to local */ + retval = 2 + local_enabled; + + /* check for global flags if we're not in an interrupt */ + if (!local_irq_count[smp_processor_id()]) { + if (local_enabled) + retval = 1; + if (global_irq_holder == (unsigned char) smp_processor_id()) + retval = 0; } + return retval; } void __global_restore_flags(unsigned long flags) { - if (!local_irq_count[smp_processor_id()]) { - switch (flags) { - case 0: - __global_sti(); - break; - case 1: - __global_cli(); - break; - default: - printk("global_restore_flags: %08lx (%08lx)\n", - flags, (&flags)[-1]); - } - } else - __restore_flags(flags); + switch (flags) { + case 0: + __global_cli(); + break; + case 1: + __global_sti(); + break; + case 2: + __cli(); + break; + case 3: + __sti(); + break; + default: + printk("global_restore_flags: %08lx (%08lx)\n", + flags, (&flags)[-1]); + } } #endif @@ -597,7 +638,7 @@ static int handle_IRQ_event(unsigned int irq, struct pt_regs * regs) int status; status = 0; - action = *(irq + irq_action); + action = irq_desc[irq].action; if (action) { status |= 1; @@ -618,125 +659,26 @@ static int handle_IRQ_event(unsigned int irq, struct pt_regs * regs) return status; } - -void disable_irq(unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - irq_handles[irq]->disable(irq); - spin_unlock_irqrestore(&irq_controller_lock, flags); - - synchronize_irq(); -} - /* * disable/enable_irq() wait for all irq contexts to finish * executing. Also it's recursive. */ static void disable_8259A_irq(unsigned int irq) { - disabled_irq[irq]++; cached_irq_mask |= 1 << irq; set_8259A_irq_mask(irq); } -#ifdef __SMP__ -static void disable_ioapic_irq(unsigned int irq) -{ - disabled_irq[irq]++; - /* - * We do not disable IO-APIC irqs in hardware ... - */ -} -#endif - void enable_8259A_irq (unsigned int irq) { - unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); - if (disabled_irq[irq]) - disabled_irq[irq]--; - else { - spin_unlock_irqrestore(&irq_controller_lock, flags); - return; - } cached_irq_mask &= ~(1 << irq); set_8259A_irq_mask(irq); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} - -#ifdef __SMP__ -void enable_ioapic_irq (unsigned int irq) -{ - unsigned long flags, should_handle_irq; - int cpu = smp_processor_id(); - - spin_lock_irqsave(&irq_controller_lock, flags); - if (disabled_irq[irq]) - disabled_irq[irq]--; - else { - spin_unlock_irqrestore(&irq_controller_lock, flags); - return; - } -#if 0 - /* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we dont have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ - if (!disabled_irq[irq] && irq_events[irq]) { - if (!ipi_pending[irq]) { - ipi_pending[irq] = 1; - --irq_events[irq]; - send_IPI(cpu,IO_APIC_VECTOR(irq)); - } - } - spin_unlock_irqrestore(&irq_controller_lock, flags); -#else - if (!disabled_irq[irq] && irq_events[irq]) { - struct pt_regs regs; /* FIXME: these are fake currently */ - - disabled_irq[irq]++; - hardirq_enter(cpu); - spin_unlock(&irq_controller_lock); - - release_irqlock(cpu); - while (test_bit(0,&global_irq_lock)) mb(); -again: - handle_IRQ_event(irq, ®s); - - spin_lock(&irq_controller_lock); - disabled_irq[irq]--; - should_handle_irq=0; - if (--irq_events[irq] && !disabled_irq[irq]) { - should_handle_irq=1; - disabled_irq[irq]++; - } - spin_unlock(&irq_controller_lock); - - if (should_handle_irq) - goto again; - - irq_exit(cpu, irq); - __restore_flags(flags); - } else - spin_unlock_irqrestore(&irq_controller_lock, flags); -#endif -} -#endif - -void enable_irq(unsigned int irq) -{ - irq_handles[irq]->enable(irq); } void make_8259A_irq (unsigned int irq) { io_apic_irqs &= ~(1<<irq); - irq_handles[irq] = &i8259A_irq_type; + irq_desc[irq].handler = &i8259A_irq_type; disable_irq(irq); enable_irq(irq); } @@ -750,6 +692,7 @@ void make_8259A_irq (unsigned int irq) static inline void mask_and_ack_8259A(unsigned int irq) { spin_lock(&irq_controller_lock); + irq_desc[irq].status |= IRQ_INPROGRESS; cached_irq_mask |= 1 << irq; if (irq & 8) { inb(0xA1); /* DUMMY */ @@ -772,7 +715,8 @@ static void do_8259A_IRQ(unsigned int irq, int cpu, struct pt_regs * regs) if (handle_IRQ_event(irq, regs)) { spin_lock(&irq_controller_lock); - unmask_8259A(irq); + if (!(irq_desc[irq].status &= IRQ_DISABLED)) + unmask_8259A(irq); spin_unlock(&irq_controller_lock); } @@ -780,41 +724,119 @@ static void do_8259A_IRQ(unsigned int irq, int cpu, struct pt_regs * regs) } #ifdef __SMP__ + +/* + * In the SMP+IOAPIC case it might happen that there are an unspecified + * number of pending IRQ events unhandled. These cases are very rare, + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much + * better to do it this way as thus we dont have to be aware of + * 'pending' interrupts in the IRQ path, except at this point. + */ +static void enable_ioapic_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + if (desc->events && !desc->ipi) { + desc->ipi = 1; + send_IPI(APIC_DEST_SELF, IO_APIC_VECTOR(irq)); + } +} + +/* + * We do not actually disable IO-APIC irqs in hardware ... + */ +static void disable_ioapic_irq(unsigned int irq) +{ +} + static void do_ioapic_IRQ(unsigned int irq, int cpu, struct pt_regs * regs) { - int should_handle_irq = 0; + irq_desc_t *desc = irq_desc + irq; + + spin_lock(&irq_controller_lock); + /* Ack the irq inside the lock! */ ack_APIC_irq(); + desc->ipi = 0; - spin_lock(&irq_controller_lock); - if (ipi_pending[irq]) - ipi_pending[irq] = 0; + /* If the irq is disabled for whatever reason, just set a flag and return */ + if (desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)) { + desc->events = 1; + spin_unlock(&irq_controller_lock); + return; + } - if (!irq_events[irq]++ && !disabled_irq[irq]) - should_handle_irq = 1; + desc->status = IRQ_INPROGRESS; + desc->events = 0; hardirq_enter(cpu); spin_unlock(&irq_controller_lock); - if (should_handle_irq) { - while (test_bit(0,&global_irq_lock)) mb(); -again: - handle_IRQ_event(irq, regs); + while (test_bit(0,&global_irq_lock)) barrier(); + + for (;;) { + int pending; + + /* If there is no IRQ handler, exit early, leaving the irq "in progress" */ + if (!handle_IRQ_event(irq, regs)) + goto no_handler; spin_lock(&irq_controller_lock); - should_handle_irq=0; - if (--irq_events[irq] && !disabled_irq[irq]) - should_handle_irq=1; + pending = desc->events; + desc->events = 0; + if (!pending) + break; spin_unlock(&irq_controller_lock); - - if (should_handle_irq) - goto again; } + desc->status &= IRQ_DISABLED; + spin_unlock(&irq_controller_lock); +no_handler: hardirq_exit(cpu); release_irqlock(cpu); } + #endif + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ +void disable_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_controller_lock, flags); + /* + * At this point we may actually have a pending interrupt being active + * on another CPU. So don't touch the IRQ_INPROGRESS bit.. + */ + irq_desc[irq].status |= IRQ_DISABLED; + irq_desc[irq].handler->disable(irq); + spin_unlock_irqrestore(&irq_controller_lock, flags); + + synchronize_irq(); +} + +void enable_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_controller_lock, flags); + /* + * In contrast to the above, we should _not_ have any concurrent + * interrupt activity here, so we just clear both disabled bits. + * + * This allows us to have IRQ_INPROGRESS set until we actually + * install a handler for this interrupt (make irq autodetection + * work by just looking at the status field for the irq) + */ + irq_desc[irq].status = 0; + irq_desc[irq].handler->enable(irq); + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -845,7 +867,7 @@ asmlinkage void do_IRQ(struct pt_regs regs) int cpu = smp_processor_id(); kstat.irqs[cpu][irq]++; - irq_handles[irq]->handle(irq, cpu, ®s); + irq_desc[irq].handler->handle(irq, cpu, ®s); /* * This should be conditional: we should really get @@ -865,7 +887,7 @@ int setup_x86_irq(unsigned int irq, struct irqaction * new) struct irqaction *old, **p; unsigned long flags; - p = irq_action + irq; + p = &irq_desc[irq].action; if ((old = *p) != NULL) { /* Can't share interrupts unless both agree to */ if (!(old->flags & new->flags & SA_SHIRQ)) @@ -890,7 +912,7 @@ int setup_x86_irq(unsigned int irq, struct irqaction * new) spin_lock(&irq_controller_lock); #ifdef __SMP__ if (IO_APIC_IRQ(irq)) { - irq_handles[irq] = &ioapic_irq_type; + irq_desc[irq].handler = &ioapic_irq_type; /* * First disable it in the 8259A: */ @@ -948,7 +970,7 @@ void free_irq(unsigned int irq, void *dev_id) printk("Trying to free IRQ%d\n",irq); return; } - for (p = irq + irq_action; (action = *p) != NULL; p = &action->next) { + for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { if (action->dev_id != dev_id) continue; @@ -964,32 +986,29 @@ void free_irq(unsigned int irq, void *dev_id) } /* - * probing is always single threaded [FIXME: is this true?] + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_INPROGRESS" asserted and the interrupt + * disabled. */ -static unsigned int probe_irqs[NR_CPUS][NR_IRQS]; - unsigned long probe_irq_on (void) { - unsigned int i, j, irqs = 0; + unsigned int i, irqs = 0; unsigned long delay; /* - * save current irq counts - */ - memcpy(probe_irqs,kstat.irqs,NR_CPUS*NR_IRQS*sizeof(int)); - - /* * first, enable any unassigned irqs */ + spin_lock_irq(&irq_controller_lock); for (i = NR_IRQS-1; i > 0; i--) { - if (!irq_action[i]) { - unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); + if (!irq_desc[i].action) { unmask_generic_irq(i); irqs |= (1 << i); - spin_unlock_irqrestore(&irq_controller_lock, flags); } } + spin_unlock_irq(&irq_controller_lock); /* * wait for spurious interrupts to increase counters @@ -1000,35 +1019,35 @@ unsigned long probe_irq_on (void) /* * now filter out any obviously spurious interrupts */ - for (i=0; i<NR_IRQS; i++) - for (j=0; j<NR_CPUS; j++) - if (kstat.irqs[j][i] != probe_irqs[j][i]) - irqs &= ~(1UL << i); + spin_lock_irq(&irq_controller_lock); + for (i=0; i<NR_IRQS; i++) { + if (irq_desc[i].status & IRQ_INPROGRESS) + irqs &= ~(1UL << i); + } + spin_unlock_irq(&irq_controller_lock); return irqs; } int probe_irq_off (unsigned long irqs) { - int i,j, irq_found = -1; + int i, irq_found = -1; + spin_lock_irq(&irq_controller_lock); for (i=0; i<NR_IRQS; i++) { - int sum = 0; - for (j=0; j<NR_CPUS; j++) { - sum += kstat.irqs[j][i]; - sum -= probe_irqs[j][i]; - } - if (sum && (irqs & (1UL << i))) { + if ((irqs & 1) && (irq_desc[i].status & IRQ_INPROGRESS)) { if (irq_found != -1) { irq_found = -irq_found; goto out; - } else - irq_found = i; + } + irq_found = i; } + irqs >>= 1; } if (irq_found == -1) irq_found = 0; out: + spin_unlock_irq(&irq_controller_lock); return irq_found; } @@ -1050,7 +1069,7 @@ void init_IO_APIC_traps(void) for (i = 0; i < NR_IRQS ; i++) if (IO_APIC_VECTOR(i) <= 0xfe) /* HACK */ { if (IO_APIC_IRQ(i)) { - irq_handles[i] = &ioapic_irq_type; + irq_desc[i].handler = &ioapic_irq_type; /* * First disable it in the 8259A: */ @@ -1071,10 +1090,9 @@ __initfunc(void init_IRQ(void)) outb_p(LATCH & 0xff , 0x40); /* LSB */ outb(LATCH >> 8 , 0x40); /* MSB */ - printk("INIT IRQ\n"); for (i=0; i<NR_IRQS; i++) { - irq_events[i] = 0; - disabled_irq[i] = 0; + irq_desc[i].events = 0; + irq_desc[i].status = 0; } /* * 16 old-style INTA-cycle interrupt gates: @@ -1110,6 +1128,9 @@ __initfunc(void init_IRQ(void)) /* self generated IPI for local APIC timer */ set_intr_gate(0x41, apic_timer_interrupt); + /* IPI for MTRR control */ + set_intr_gate(0x50, mtrr_interrupt); + #endif request_region(0x20,0x20,"pic1"); request_region(0xa0,0x20,"pic2"); diff --git a/arch/i386/kernel/irq.h b/arch/i386/kernel/irq.h index 9824026dc..81795c85c 100644 --- a/arch/i386/kernel/irq.h +++ b/arch/i386/kernel/irq.h @@ -23,10 +23,7 @@ void init_pic_mode (void); extern unsigned int io_apic_irqs; -extern inline int IO_APIC_VECTOR (int irq) -{ - return (0x51+(irq<<3)); -} +#define IO_APIC_VECTOR(irq) (0x51+((irq)<<3)) #define MAX_IRQ_SOURCES 128 #define MAX_MP_BUSSES 32 @@ -83,8 +80,8 @@ static inline void irq_exit(int cpu, unsigned int irq) #define SAVE_ALL \ "cld\n\t" \ - "push %es\n\t" \ - "push %ds\n\t" \ + "pushl %es\n\t" \ + "pushl %ds\n\t" \ "pushl %eax\n\t" \ "pushl %ebp\n\t" \ "pushl %edi\n\t" \ @@ -93,8 +90,8 @@ static inline void irq_exit(int cpu, unsigned int irq) "pushl %ecx\n\t" \ "pushl %ebx\n\t" \ "movl $" STR(__KERNEL_DS) ",%edx\n\t" \ - "mov %dx,%ds\n\t" \ - "mov %dx,%es\n\t" + "movl %dx,%ds\n\t" \ + "movl %dx,%es\n\t" #define IRQ_NAME2(nr) nr##_interrupt(void) #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index 65c743195..64d4ab153 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -18,7 +18,7 @@ static int read_ldt(void * ptr, unsigned long bytecount) { - void * address = current->ldt; + void * address = current->mm->segments; unsigned long size; if (!ptr) @@ -37,6 +37,7 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) { struct modify_ldt_ldt_s ldt_info; unsigned long *lp; + struct mm_struct * mm; int error, i; if (bytecount != sizeof(ldt_info)) @@ -48,19 +49,32 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) if ((ldt_info.contents == 3 && (oldmode || ldt_info.seg_not_present == 0)) || ldt_info.entry_number >= LDT_ENTRIES) return -EINVAL; - if (!current->ldt) { + mm = current->mm; + + /* + * Horrible dependencies! Try to get rid of this. This is wrong, + * as it only reloads the ldt for the first process with this + * mm. The implications are that you should really make sure that + * you have a ldt before you do the first clone(), otherwise + * you get strange behaviour (the kernel is safe, it's just user + * space strangeness). + * + * For no good reason except historical, the GDT index of the LDT + * is chosen to follow the index number in the task[] array. + */ + if (!mm->segments) { for (i=1 ; i<NR_TASKS ; i++) { if (task[i] == current) { - if (!(current->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE))) + if (!(mm->segments = (void *) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE))) return -ENOMEM; - memset(current->ldt, 0, LDT_ENTRIES*LDT_ENTRY_SIZE); - set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, current->ldt, LDT_ENTRIES); + memset(mm->segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE); + set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, mm->segments, LDT_ENTRIES); load_ldt(i); } } } - lp = (unsigned long *) ¤t->ldt[ldt_info.entry_number]; + lp = (unsigned long *) (LDT_ENTRY_SIZE * ldt_info.entry_number + (unsigned long) mm->segments); /* Allow LDTs to be cleared by the user. */ if (ldt_info.base_addr == 0 && ldt_info.limit == 0 && (oldmode || diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index de6de8f14..ae67822bc 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -62,7 +62,7 @@ static struct MCA_info* mca_info = 0; /*--------------------------------------------------------------------*/ #ifdef CONFIG_PROC_FS -static long mca_do_proc_init( long memory_start, long memory_end ); +static void mca_do_proc_init( void ); static int mca_default_procfn( char* buf, int slot ); static ssize_t proc_mca_read( struct file*, char*, size_t, loff_t *); @@ -79,7 +79,7 @@ static struct inode_operations proc_mca_inode_operations = { /*--------------------------------------------------------------------*/ -__initfunc(long mca_init(long memory_start, long memory_end)) +__initfunc(void mca_init(void)) { unsigned int i, j; int foundscsi = 0; @@ -96,21 +96,14 @@ __initfunc(long mca_init(long memory_start, long memory_end)) */ if (!MCA_bus) - return memory_start; + return; cli(); /* * Allocate MCA_info structure (at address divisible by 8) */ - if( ((memory_start+7)&(~7)) > memory_end ) - { - /* uh oh */ - return memory_start; - } - - mca_info = (struct MCA_info*) ((memory_start+7)&(~7)); - memory_start = ((long)mca_info) + sizeof(struct MCA_info); + mca_info = kmalloc(sizeof(struct MCA_info), GFP_ATOMIC); /* * Make sure adapter setup is off @@ -194,10 +187,8 @@ __initfunc(long mca_init(long memory_start, long memory_end)) request_region(0x100,0x08,"POS (MCA)"); #ifdef CONFIG_PROC_FS - memory_start = mca_do_proc_init( memory_start, memory_end ); + mca_do_proc_init(); #endif - - return memory_start; } /*--------------------------------------------------------------------*/ @@ -418,12 +409,12 @@ int get_mca_info(char *buf) /*--------------------------------------------------------------------*/ -__initfunc(long mca_do_proc_init( long memory_start, long memory_end )) +__initfunc(void mca_do_proc_init( void )) { int i = 0; struct proc_dir_entry* node = 0; - if( mca_info == 0 ) return memory_start; /* never happens */ + if( mca_info == 0 ) return; /* never happens */ proc_register( &proc_mca, &(struct proc_dir_entry) { PROC_MCA_REGISTERS, 3, "pos", S_IFREG|S_IRUGO, @@ -439,11 +430,7 @@ __initfunc(long mca_do_proc_init( long memory_start, long memory_end )) mca_info->slot[i].dev = 0; if( ! mca_isadapter( i ) ) continue; - if( memory_start + sizeof(struct proc_dir_entry) > memory_end ) { - continue; - } - node = (struct proc_dir_entry*) memory_start; - memory_start += sizeof(struct proc_dir_entry); + node = kmalloc(sizeof(struct proc_dir_entry), GFP_ATOMIC); if( i < MCA_MAX_SLOT_NR ) { node->low_ino = PROC_MCA_SLOT + i; @@ -464,7 +451,6 @@ __initfunc(long mca_do_proc_init( long memory_start, long memory_end )) proc_register( &proc_mca, node ); } - return memory_start; } /* mca_do_proc_init() */ /*--------------------------------------------------------------------*/ diff --git a/arch/i386/kernel/mtrr.c b/arch/i386/kernel/mtrr.c new file mode 100644 index 000000000..f2981c5cf --- /dev/null +++ b/arch/i386/kernel/mtrr.c @@ -0,0 +1,1229 @@ +/* Generic MTRR (Memory Type Range Register) driver. + + Copyright (C) 1997-1998 Richard Gooch + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. + + Source: "Pentium Pro Family Developer's Manual, Volume 3: + Operating System Writer's Guide" (Intel document number 242692), + section 11.11.7 + + ChangeLog + + Prehistory Martin Tischhäuser <martin@ikcbarka.fzk.de> + Initial register-setting code (from proform-1.0). + 19971216 Richard Gooch <rgooch@atnf.csiro.au> + Original version for /proc/mtrr interface, SMP-safe. + v1.0 + 19971217 Richard Gooch <rgooch@atnf.csiro.au> + Bug fix for ioctls()'s. + Added sample code in Documentation/mtrr.txt + v1.1 + 19971218 Richard Gooch <rgooch@atnf.csiro.au> + Disallow overlapping regions. + 19971219 Jens Maurer <jmaurer@menuett.rhein-main.de> + Register-setting fixups. + v1.2 + 19971222 Richard Gooch <rgooch@atnf.csiro.au> + Fixups for kernel 2.1.75. + v1.3 + 19971229 David Wragg <dpw@doc.ic.ac.uk> + Register-setting fixups and conformity with Intel conventions. + 19971229 Richard Gooch <rgooch@atnf.csiro.au> + Cosmetic changes and wrote this ChangeLog ;-) + 19980106 Richard Gooch <rgooch@atnf.csiro.au> + Fixups for kernel 2.1.78. + v1.4 + 19980119 David Wragg <dpw@doc.ic.ac.uk> + Included passive-release enable code (elsewhere in PCI setup). + v1.5 + 19980131 Richard Gooch <rgooch@atnf.csiro.au> + Replaced global kernel lock with private spinlock. + v1.6 + 19980201 Richard Gooch <rgooch@atnf.csiro.au> + Added wait for other CPUs to complete changes. + v1.7 + 19980202 Richard Gooch <rgooch@atnf.csiro.au> + Bug fix in definition of <set_mtrr> for UP. + v1.8 + 19980319 Richard Gooch <rgooch@atnf.csiro.au> + Fixups for kernel 2.1.90. + 19980323 Richard Gooch <rgooch@atnf.csiro.au> + Move SMP BIOS fixup before secondary CPUs call <calibrate_delay> + v1.9 + 19980325 Richard Gooch <rgooch@atnf.csiro.au> + Fixed test for overlapping regions: confused by adjacent regions + 19980326 Richard Gooch <rgooch@atnf.csiro.au> + Added wbinvd in <set_mtrr_prepare>. + 19980401 Richard Gooch <rgooch@atnf.csiro.au> + Bug fix for non-SMP compilation. + 19980418 David Wragg <dpw@doc.ic.ac.uk> + Fixed-MTRR synchronisation for SMP and use atomic operations + instead of spinlocks. + 19980418 Richard Gooch <rgooch@atnf.csiro.au> + Differentiate different MTRR register classes for BIOS fixup. + v1.10 + 19980419 David Wragg <dpw@doc.ic.ac.uk> + Bug fix in variable MTRR synchronisation. + v1.11 + 19980419 Richard Gooch <rgooch@atnf.csiro.au> + Fixups for kernel 2.1.97. + v1.12 + 19980421 Richard Gooch <rgooch@atnf.csiro.au> + Safer synchronisation across CPUs when changing MTRRs. + v1.13 + 19980423 Richard Gooch <rgooch@atnf.csiro.au> + Bugfix for SMP systems without MTRR support. + v1.14 + 19980427 Richard Gooch <rgooch@atnf.csiro.au> + Trap calls to <mtrr_add> and <mtrr_del> on non-MTRR machines. + v1.15 + 19980427 Richard Gooch <rgooch@atnf.csiro.au> + Use atomic bitops for setting SMP change mask. + v1.16 + 19980428 Richard Gooch <rgooch@atnf.csiro.au> + Removed spurious diagnostic message. + v1.17 + 19980429 Richard Gooch <rgooch@atnf.csiro.au> + Moved register-setting macros into this file. + Moved setup code from init/main.c to i386-specific areas. + v1.18 +*/ +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/tty.h> +#include <linux/timer.h> +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/wait.h> +#include <linux/string.h> +#include <linux/malloc.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/ctype.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/module.h> +#define MTRR_NEED_STRINGS +#include <asm/mtrr.h> +#include <linux/init.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/pgtable.h> +#include <asm/segment.h> +#include <asm/bitops.h> +#include <asm/smp_lock.h> +#include <asm/atomic.h> +#include <linux/smp.h> + +#define MTRR_VERSION "1.18 (19980429)" + +#define TRUE 1 +#define FALSE 0 + +#define X86_FEATURE_MTRR 0x1000 /* memory type registers */ + +#define MTRRcap_MSR 0x0fe +#define MTRRdefType_MSR 0x2ff + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + +#define NUM_FIXED_RANGES 88 +#define MTRRfix64K_00000_MSR 0x250 +#define MTRRfix16K_80000_MSR 0x258 +#define MTRRfix16K_A0000_MSR 0x259 +#define MTRRfix4K_C0000_MSR 0x268 +#define MTRRfix4K_C8000_MSR 0x269 +#define MTRRfix4K_D0000_MSR 0x26a +#define MTRRfix4K_D8000_MSR 0x26b +#define MTRRfix4K_E0000_MSR 0x26c +#define MTRRfix4K_E8000_MSR 0x26d +#define MTRRfix4K_F0000_MSR 0x26e +#define MTRRfix4K_F8000_MSR 0x26f + +#ifdef __SMP__ +# define MTRR_CHANGE_MASK_FIXED 0x01 +# define MTRR_CHANGE_MASK_VARIABLE 0x02 +# define MTRR_CHANGE_MASK_DEFTYPE 0x04 +#endif + +/* In the processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +#define LINE_SIZE 80 +#define JIFFIE_TIMEOUT 100 + +#ifdef __SMP__ +# define set_mtrr(reg,base,size,type) set_mtrr_smp (reg, base, size, type) +#else +# define set_mtrr(reg,base,size,type) set_mtrr_up (reg, base, size, type,TRUE) +#endif + +#ifndef CONFIG_PROC_FS +# define compute_ascii() while (0) +#endif + +#ifdef CONFIG_PROC_FS +static char *ascii_buffer = NULL; +static unsigned int ascii_buf_bytes = 0; +#endif +static unsigned int *usage_table = NULL; +#ifdef __SMP__ +static spinlock_t main_lock = SPIN_LOCK_UNLOCKED; +#endif + +/* Private functions */ +#ifdef CONFIG_PROC_FS +static void compute_ascii (void); +#endif + + +struct set_mtrr_context +{ + unsigned long flags; + unsigned long deftype_lo; + unsigned long deftype_hi; + unsigned long cr4val; +}; + +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ +#define rdmsr(msr,val1,val2) \ + __asm__ __volatile__("rdmsr" \ + : "=a" (val1), "=d" (val2) \ + : "c" (msr)) + +#define wrmsr(msr,val1,val2) \ + __asm__ __volatile__("wrmsr" \ + : /* no outputs */ \ + : "c" (msr), "a" (val1), "d" (val2)) + +#define rdtsc(low,high) \ + __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) + +#define rdpmc(counter,low,high) \ + __asm__ __volatile__("rdpmc" \ + : "=a" (low), "=d" (high) \ + : "c" (counter)) + + +/* Put the processor into a state where MTRRs can be safely set. */ +static void set_mtrr_prepare(struct set_mtrr_context *ctxt) +{ + unsigned long tmp; + + /* disable interrupts */ + save_flags(ctxt->flags); cli(); + + /* save value of CR4 and clear Page Global Enable (bit 7) */ + asm volatile ("movl %%cr4, %0\n\t" + "movl %0, %1\n\t" + "andb $0x7f, %b1\n\t" + "movl %1, %%cr4\n\t" + : "=r" (ctxt->cr4val), "=q" (tmp) : : "memory"); + + /* disable and flush caches. Note that wbinvd flushes the TLBs as + a side-effect. */ + asm volatile ("movl %%cr0, %0\n\t" + "orl $0x40000000, %0\n\t" + "wbinvd\n\t" + "movl %0, %%cr0\n\t" + "wbinvd\n\t" + : "=r" (tmp) : : "memory"); + + /* disable MTRRs, and set the default type to uncached. */ + rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi); +} /* End Function set_mtrr_prepare */ + + +/* Restore the processor after a set_mtrr_prepare */ +static void set_mtrr_done(struct set_mtrr_context *ctxt) +{ + unsigned long tmp; + + /* flush caches and TLBs */ + asm volatile ("wbinvd" : : : "memory" ); + + /* restore MTRRdefType */ + wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + + /* enable caches */ + asm volatile ("movl %%cr0, %0\n\t" + "andl $0xbfffffff, %0\n\t" + "movl %0, %%cr0\n\t" + : "=r" (tmp) : : "memory"); + + /* restore value of CR4 */ + asm volatile ("movl %0, %%cr4" + : : "r" (ctxt->cr4val) : "memory"); + + /* re-enable interrupts (if enabled previously) */ + restore_flags(ctxt->flags); +} /* End Function set_mtrr_done */ + + +/* this function returns the number of variable MTRRs */ +static unsigned int get_num_var_ranges (void) +{ + unsigned long config, dummy; + + rdmsr(MTRRcap_MSR, config, dummy); + return (config & 0xff); +} /* End Function get_num_var_ranges */ + + +/* non-zero if we have the write-combining memory type. */ +static int have_wrcomb (void) +{ + unsigned long config, dummy; + + rdmsr(MTRRcap_MSR, config, dummy); + return (config & (1<<10)); +} + + +static void get_mtrr (unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type *type) +{ + unsigned long dummy, mask_lo, base_lo; + + rdmsr(MTRRphysMask_MSR(reg), mask_lo, dummy); + if ((mask_lo & 0x800) == 0) { + /* Invalid (i.e. free) range. */ + *base = 0; + *size = 0; + *type = 0; + return; + } + + rdmsr(MTRRphysBase_MSR(reg), base_lo, dummy); + + /* We ignore the extra address bits (32-35). If someone wants to + run x86 Linux on a machine with >4GB memory, this will be the + least of their problems. */ + + /* Clean up mask_lo so it gives the real address mask. */ + mask_lo = (mask_lo & 0xfffff000UL); + + /* This works correctly if size is a power of two, i.e. a + contiguous range. */ + *size = ~(mask_lo - 1); + + *base = (base_lo & 0xfffff000UL); + *type = (base_lo & 0xff); +} /* End Function get_mtrr */ + + +static void set_mtrr_up (unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type, int do_safe) +/* [SUMMARY] Set variable MTRR register on the local CPU. + <reg> The register to set. + <base> The base address of the region. + <size> The size of the region. If this is 0 the region is disabled. + <type> The type of the region. + <do_safe> If TRUE, do the change safely. If FALSE, safety measures should + be done externally. +*/ +{ + struct set_mtrr_context ctxt; + + if (do_safe) set_mtrr_prepare (&ctxt); + if (size == 0) + { + /* The invalid bit is kept in the mask, so we simply clear the + relevant mask register to disable a range. */ + wrmsr (MTRRphysMask_MSR (reg), 0, 0); + } + else + { + wrmsr (MTRRphysBase_MSR (reg), base | type, 0); + wrmsr (MTRRphysMask_MSR (reg), ~(size - 1) | 0x800, 0); + } + if (do_safe) set_mtrr_done (&ctxt); +} /* End Function set_mtrr_up */ + + +#ifdef __SMP__ + +struct mtrr_var_range +{ + unsigned long base_lo; + unsigned long base_hi; + unsigned long mask_lo; + unsigned long mask_hi; +}; + + +/* Get the MSR pair relating to a var range. */ +__initfunc(static void get_mtrr_var_range (unsigned int index, + struct mtrr_var_range *vr)) +{ + rdmsr (MTRRphysBase_MSR (index), vr->base_lo, vr->base_hi); + rdmsr (MTRRphysMask_MSR (index), vr->mask_lo, vr->mask_hi); +} /* End Function get_mtrr_var_range */ + + +/* Set the MSR pair relating to a var range. Returns TRUE if + changes are made. */ +__initfunc(static int set_mtrr_var_range_testing (unsigned int index, + struct mtrr_var_range *vr)) +{ + unsigned int lo, hi; + int changed = FALSE; + + rdmsr(MTRRphysBase_MSR(index), lo, hi); + + if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) + || (vr->base_hi & 0xfUL) != (hi & 0xfUL)) { + wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); + changed = TRUE; + } + + rdmsr(MTRRphysMask_MSR(index), lo, hi); + + if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) + || (vr->mask_hi & 0xfUL) != (hi & 0xfUL)) { + wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); + changed = TRUE; + } + + return changed; +} + + +__initfunc(static void get_fixed_ranges(mtrr_type *frs)) +{ + unsigned long *p = (unsigned long *)frs; + int i; + + rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + + for (i = 0; i < 2; i++) + rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i*2], p[3 + i*2]); + + for (i = 0; i < 8; i++) + rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i*2], p[7 + i*2]); +} + + +__initfunc(static int set_fixed_ranges_testing(mtrr_type *frs)) +{ + unsigned long *p = (unsigned long *)frs; + int changed = FALSE; + int i; + unsigned long lo, hi; + + rdmsr(MTRRfix64K_00000_MSR, lo, hi); + if (p[0] != lo || p[1] != hi) { + wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + changed = TRUE; + } + + for (i = 0; i < 2; i++) { + rdmsr(MTRRfix16K_80000_MSR + i, lo, hi); + if (p[2 + i*2] != lo || p[3 + i*2] != hi) { + wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i*2], p[3 + i*2]); + changed = TRUE; + } + } + + for (i = 0; i < 8; i++) { + rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi); + if (p[6 + i*2] != lo || p[7 + i*2] != hi) { + wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i*2], p[7 + i*2]); + changed = TRUE; + } + } + + return changed; +} + + +struct mtrr_state +{ + unsigned int num_var_ranges; + struct mtrr_var_range *var_ranges; + mtrr_type fixed_ranges[NUM_FIXED_RANGES]; + unsigned char enabled; + mtrr_type def_type; +}; + + +/* Grab all of the mtrr state for this cpu into *state. */ +__initfunc(static void get_mtrr_state(struct mtrr_state *state)) +{ + unsigned int nvrs, i; + struct mtrr_var_range *vrs; + unsigned long lo, dummy; + + nvrs = state->num_var_ranges = get_num_var_ranges(); + vrs = state->var_ranges + = kmalloc(nvrs * sizeof(struct mtrr_var_range), GFP_KERNEL); + if (vrs == NULL) + nvrs = state->num_var_ranges = 0; + + for (i = 0; i < nvrs; i++) + get_mtrr_var_range(i, &vrs[i]); + + get_fixed_ranges(state->fixed_ranges); + + rdmsr(MTRRdefType_MSR, lo, dummy); + state->def_type = (lo & 0xff); + state->enabled = (lo & 0xc00) >> 10; +} /* End Function get_mtrr_state */ + + +/* Free resources associated with a struct mtrr_state */ +__initfunc(static void finalize_mtrr_state(struct mtrr_state *state)) +{ + if (state->var_ranges) kfree (state->var_ranges); +} /* End Function finalize_mtrr_state */ + + +__initfunc(static unsigned long set_mtrr_state (struct mtrr_state *state, + struct set_mtrr_context *ctxt)) +/* [SUMMARY] Set the MTRR state for this CPU. + <state> The MTRR state information to read. + <ctxt> Some relevant CPU context. + [NOTE] The CPU must already be in a safe state for MTRR changes. + [RETURNS] 0 if no changes made, else a mask indication what was changed. +*/ +{ + unsigned int i; + unsigned long change_mask = 0; + + for (i = 0; i < state->num_var_ranges; i++) + if (set_mtrr_var_range_testing(i, &state->var_ranges[i])) + change_mask |= MTRR_CHANGE_MASK_VARIABLE; + + if (set_fixed_ranges_testing(state->fixed_ranges)) + change_mask |= MTRR_CHANGE_MASK_FIXED; + + /* set_mtrr_restore restores the old value of MTRRdefType, + so to set it we fiddle with the saved value. */ + if ((ctxt->deftype_lo & 0xff) != state->def_type + || ((ctxt->deftype_lo & 0xc00) >> 10) != state->enabled) + { + ctxt->deftype_lo |= (state->def_type | state->enabled << 10); + change_mask |= MTRR_CHANGE_MASK_DEFTYPE; + } + + return change_mask; +} /* End Function set_mtrr_state */ + + +static atomic_t undone_count; +static void (*handler_func) (struct set_mtrr_context *ctxt, void *info); +static void *handler_info; +static volatile int wait_barrier_execute = FALSE; +static volatile int wait_barrier_cache_enable = FALSE; + +static void sync_handler (void) +/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. + [RETURNS] Nothing. +*/ +{ + struct set_mtrr_context ctxt; + + set_mtrr_prepare (&ctxt); + /* Notify master CPU that I'm at the barrier and then wait */ + atomic_dec (&undone_count); + while (wait_barrier_execute) barrier (); + /* The master has cleared me to execute */ + (*handler_func) (&ctxt, handler_info); + /* Notify master CPU that I've executed the function */ + atomic_dec (&undone_count); + /* Wait for master to clear me to enable cache and return */ + while (wait_barrier_cache_enable) barrier (); + set_mtrr_done (&ctxt); +} /* End Function sync_handler */ + +static void do_all_cpus (void (*handler) (struct set_mtrr_context *ctxt, + void *info), + void *info, int local) +/* [SUMMARY] Execute a function on all CPUs, with caches flushed and disabled. + [PURPOSE] This function will synchronise all CPUs, flush and disable caches + on all CPUs, then call a specified function. When the specified function + finishes on all CPUs, caches are enabled on all CPUs. + <handler> The function to execute. + <info> An arbitrary information pointer which is passed to <<handler>>. + <local> If TRUE <<handler>> is executed locally. + [RETURNS] Nothing. +*/ +{ + unsigned long timeout; + struct set_mtrr_context ctxt; + + mtrr_hook = sync_handler; + handler_func = handler; + handler_info = info; + wait_barrier_execute = TRUE; + wait_barrier_cache_enable = TRUE; + /* Send a message to all other CPUs and wait for them to enter the + barrier */ + atomic_set (&undone_count, smp_num_cpus - 1); + smp_message_pass (MSG_ALL_BUT_SELF, MSG_MTRR_CHANGE, 0, 0); + /* Wait for it to be done */ + timeout = jiffies + JIFFIE_TIMEOUT; + while ( (atomic_read (&undone_count) > 0) && (jiffies < timeout) ) + barrier (); + if (atomic_read (&undone_count) > 0) + { + panic ("mtrr: timed out waiting for other CPUs\n"); + } + mtrr_hook = NULL; + /* All other CPUs should be waiting for the barrier, with their caches + already flushed and disabled. Prepare for function completion + notification */ + atomic_set (&undone_count, smp_num_cpus - 1); + /* Flush and disable the local CPU's cache and release the barier, which + should cause the other CPUs to execute the function. Also execute it + locally if required */ + set_mtrr_prepare (&ctxt); + wait_barrier_execute = FALSE; + if (local) (*handler) (&ctxt, info); + /* Now wait for other CPUs to complete the function */ + while (atomic_read (&undone_count) > 0) barrier (); + /* Now all CPUs should have finished the function. Release the barrier to + allow them to re-enable their caches and return from their interrupt, + then enable the local cache and return */ + wait_barrier_cache_enable = FALSE; + set_mtrr_done (&ctxt); + handler_func = NULL; + handler_info = NULL; +} /* End Function do_all_cpus */ + + +struct set_mtrr_data +{ + unsigned long smp_base; + unsigned long smp_size; + unsigned int smp_reg; + mtrr_type smp_type; +}; + +static void set_mtrr_handler (struct set_mtrr_context *ctxt, void *info) +{ + struct set_mtrr_data *data = info; + + set_mtrr_up (data->smp_reg, data->smp_base, data->smp_size, data->smp_type, + FALSE); +} /* End Function set_mtrr_handler */ + +static void set_mtrr_smp (unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct set_mtrr_data data; + + data.smp_reg = reg; + data.smp_base = base; + data.smp_size = size; + data.smp_type = type; + do_all_cpus (set_mtrr_handler, &data, TRUE); +} /* End Function set_mtrr_smp */ + + +/* A warning that is common to the module and non-module cases. */ +/* Some BIOS's are fucked and don't set all MTRRs the same! */ +#ifdef MODULE +static void mtrr_state_warn (unsigned long mask) +#else +__initfunc(static void mtrr_state_warn (unsigned long mask)) +#endif +{ + if (!mask) return; + if (mask & MTRR_CHANGE_MASK_FIXED) + printk ("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_VARIABLE) + printk ("mtrr: your CPUs had inconsistent variable MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_DEFTYPE) + printk ("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + printk ("mtrr: probably your BIOS does not setup all CPUs\n"); +} /* End Function mtrr_state_warn */ + +#ifdef MODULE +/* As a module, copy the MTRR state using an IPI handler. */ + +static volatile unsigned long smp_changes_mask = 0; + +static void copy_mtrr_state_handler (struct set_mtrr_context *ctxt, void *info) +{ + unsigned long mask, count; + struct mtrr_state *smp_mtrr_state = info; + + mask = set_mtrr_state (smp_mtrr_state, ctxt); + /* Use the atomic bitops to update the global mask */ + for (count = 0; count < sizeof mask * 8; ++count) + { + if (mask & 0x01) set_bit (count, &smp_changes_mask); + mask >>= 1; + } +} /* End Function copy_mtrr_state_handler */ + +/* Copies the entire MTRR state of this cpu to all the others. */ +static void copy_mtrr_state (void) +{ + struct mtrr_state ms; + + get_mtrr_state (&ms); + do_all_cpus (copy_mtrr_state_handler, &ms, FALSE); + finalize_mtrr_state (&ms); + mtrr_state_warn (smp_changes_mask); +} /* End Function copy_mtrr_state */ + +#endif /* MODULE */ +#endif /* __SMP__ */ + +static char *attrib_to_str (int x) +{ + return (x <= 6) ? mtrr_strings[x] : "?"; +} /* End Function attrib_to_str */ + +static void init_table (void) +{ + int i, max; + + max = get_num_var_ranges (); + if ( ( usage_table = kmalloc (max * sizeof *usage_table, GFP_KERNEL) ) + == NULL ) + { + printk ("mtrr: could not allocate\n"); + return; + } + for (i = 0; i < max; i++) usage_table[i] = 1; +#ifdef CONFIG_PROC_FS + if ( ( ascii_buffer = kmalloc (max * LINE_SIZE, GFP_KERNEL) ) == NULL ) + { + printk ("mtrr: could not allocate\n"); + return; + } + ascii_buf_bytes = 0; + compute_ascii (); +#endif +} /* End Function init_table */ + +int mtrr_add (unsigned long base, unsigned long size, unsigned int type, + char increment) +/* [SUMMARY] Add an MTRR entry. + <base> The starting (base) address of the region. + <size> The size (in bytes) of the region. + <type> The type of the new region. + <increment> If true and the region already exists, the usage count will be + incremented. + [RETURNS] The MTRR register on success, else a negative number indicating + the error code. + [NOTE] This routine uses a spinlock. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize, last; + + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return -ENODEV; + if ( (base & 0xfff) || (size & 0xfff) ) + { + printk ("mtrr: size and base must be multiples of 4kB\n"); + printk ("mtrr: size: %lx base: %lx\n", size, base); + return -EINVAL; + } + if (base + size < 0x100000) + { + printk ("mtrr: cannot set region below 1 MByte (0x%lx,0x%lx)\n", + base, size); + return -EINVAL; + } + /* Check upper bits of base and last are equal and lower bits are 0 for + base and 1 for last */ + last = base + size - 1; + for (lbase = base; !(lbase & 1) && (last & 1); + lbase = lbase >> 1, last = last >> 1); + if (lbase != last) + { + printk ("mtrr: base(0x%lx) is not aligned on a size(0x%lx) boundary\n", + base, size); + return -EINVAL; + } + if (type >= MTRR_NUM_TYPES) + { + printk ("mtrr: type: %u illegal\n", type); + return -EINVAL; + } + /* If the type is WC, check that this processor supports it */ + if ( (type == MTRR_TYPE_WRCOMB) && !have_wrcomb () ) + { + printk ("mtrr: your processor doesn't support write-combining\n"); + return -ENOSYS; + } + increment = increment ? 1 : 0; + max = get_num_var_ranges (); + /* Search for existing MTRR */ + spin_lock (&main_lock); + for (i = 0; i < max; ++i) + { + get_mtrr (i, &lbase, &lsize, <ype); + if (base >= lbase + lsize) continue; + if ( (base < lbase) && (base + size <= lbase) ) continue; + /* At this point we know there is some kind of overlap/enclosure */ + if ( (base < lbase) || (base + size > lbase + lsize) ) + { + spin_unlock (&main_lock); + printk ("mtrr: 0x%lx,0x%lx overlaps existing 0x%lx,0x%lx\n", + base, size, lbase, lsize); + return -EINVAL; + } + if (ltype != type) + { + spin_unlock (&main_lock); + printk ( "mtrr: type missmatch for %lx,%lx old: %s new: %s\n", + base, size, attrib_to_str (ltype), attrib_to_str (type) ); + return -EINVAL; + } + if (increment) ++usage_table[i]; + compute_ascii (); + spin_unlock (&main_lock); + return i; + } + /* Search for an empty MTRR */ + for (i = 0; i < max; ++i) + { + get_mtrr (i, &lbase, &lsize, <ype); + if (lsize > 0) continue; + set_mtrr (i, base, size, type); + usage_table[i] = 1; + compute_ascii (); + spin_unlock (&main_lock); + return i; + } + spin_unlock (&main_lock); + printk ("mtrr: no more MTRRs available\n"); + return -ENOSPC; +} /* End Function mtrr_add */ + +int mtrr_del (int reg, unsigned long base, unsigned long size) +/* [SUMMARY] Delete MTRR/decrement usage count. + <reg> The register. If this is less than 0 then <<base>> and <<size>> must + be supplied. + <base> The base address of the region. This is ignored if <<reg>> is >= 0. + <size> The size of the region. This is ignored if <<reg>> is >= 0. + [RETURNS] The register on success, else a negative number indicating + the error code. + [NOTE] This routine uses a spinlock. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return -ENODEV; + max = get_num_var_ranges (); + spin_lock (&main_lock); + if (reg < 0) + { + /* Search for existing MTRR */ + for (i = 0; i < max; ++i) + { + get_mtrr (i, &lbase, &lsize, <ype); + if ( (lbase == base) && (lsize == size) ) + { + reg = i; + break; + } + } + if (reg < 0) + { + spin_unlock (&main_lock); + printk ("mtrr: no MTRR for %lx,%lx found\n", base, size); + return -EINVAL; + } + } + if (reg >= max) + { + spin_unlock (&main_lock); + printk ("mtrr: register: %d too big\n", reg); + return -EINVAL; + } + get_mtrr (reg, &lbase, &lsize, <ype); + if (lsize < 1) + { + spin_unlock (&main_lock); + printk ("mtrr: MTRR %d not used\n", reg); + return -EINVAL; + } + if (usage_table[reg] < 1) + { + spin_unlock (&main_lock); + printk ("mtrr: reg: %d has count=0\n", reg); + return -EINVAL; + } + if (--usage_table[reg] < 1) set_mtrr (reg, 0, 0, 0); + compute_ascii (); + spin_unlock (&main_lock); + return reg; +} /* End Function mtrr_del */ + +#ifdef CONFIG_PROC_FS + +static int mtrr_file_add (unsigned long base, unsigned long size, + unsigned int type, char increment, struct file *file) +{ + int reg, max; + unsigned int *fcount = file->private_data; + + max = get_num_var_ranges (); + if (fcount == NULL) + { + if ( ( fcount = kmalloc (max * sizeof *fcount, GFP_KERNEL) ) == NULL ) + { + printk ("mtrr: could not allocate\n"); + return -ENOMEM; + } + memset (fcount, 0, max * sizeof *fcount); + file->private_data = fcount; + } + reg = mtrr_add (base, size, type, 1); + if (reg >= 0) ++fcount[reg]; + return reg; +} /* End Function mtrr_file_add */ + +static int mtrr_file_del (unsigned long base, unsigned long size, + struct file *file) +{ + int reg; + unsigned int *fcount = file->private_data; + + reg = mtrr_del (-1, base, size); + if (reg < 0) return reg; + if (fcount != NULL) --fcount[reg]; + return reg; +} /* End Function mtrr_file_del */ + +static ssize_t mtrr_read (struct file *file, char *buf, size_t len, + loff_t *ppos) +{ + if (*ppos >= ascii_buf_bytes) return 0; + if (*ppos + len > ascii_buf_bytes) len = ascii_buf_bytes - *ppos; + if ( copy_to_user (buf, ascii_buffer + *ppos, len) ) return -EFAULT; + *ppos += len; + return len; +} /* End Function mtrr_read */ + +static ssize_t mtrr_write (struct file *file, const char *buf, size_t len, + loff_t *ppos) +/* Format of control line: + "base=%lx size=%lx type=%s" OR: + "disable=%d" +*/ +{ + int i, err; + unsigned long reg, base, size; + char *ptr; + char line[LINE_SIZE]; + + if ( !suser () ) return -EPERM; + /* Can't seek (pwrite) on this device */ + if (ppos != &file->f_pos) return -ESPIPE; + memset (line, 0, LINE_SIZE); + if (len > LINE_SIZE) len = LINE_SIZE; + if ( copy_from_user (line, buf, len - 1) ) return -EFAULT; + ptr = line + strlen (line) - 1; + if (*ptr == '\n') *ptr = '\0'; + if ( !strncmp (line, "disable=", 8) ) + { + reg = simple_strtoul (line + 8, &ptr, 0); + err = mtrr_del (reg, 0, 0); + if (err < 0) return err; + return len; + } + if ( strncmp (line, "base=", 5) ) + { + printk ("mtrr: no \"base=\" in line: \"%s\"\n", line); + return -EINVAL; + } + base = simple_strtoul (line + 5, &ptr, 0); + for (; isspace (*ptr); ++ptr); + if ( strncmp (ptr, "size=", 5) ) + { + printk ("mtrr: no \"size=\" in line: \"%s\"\n", line); + return -EINVAL; + } + size = simple_strtoul (ptr + 5, &ptr, 0); + for (; isspace (*ptr); ++ptr); + if ( strncmp (ptr, "type=", 5) ) + { + printk ("mtrr: no \"type=\" in line: \"%s\"\n", line); + return -EINVAL; + } + ptr += 5; + for (; isspace (*ptr); ++ptr); + for (i = 0; i < MTRR_NUM_TYPES; ++i) + { + if ( strcmp (ptr, mtrr_strings[i]) ) continue; + err = mtrr_add (base, size, i, 1); + if (err < 0) return err; + return len; + } + printk ("mtrr: illegal type: \"%s\"\n", ptr); + return -EINVAL; +} /* End Function mtrr_write */ + +static int mtrr_ioctl (struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + int err; + mtrr_type type; + struct mtrr_sentry sentry; + struct mtrr_gentry gentry; + + switch (cmd) + { + default: + return -ENOIOCTLCMD; + case MTRRIOC_ADD_ENTRY: + if ( !suser () ) return -EPERM; + if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) ) + return -EFAULT; + err = mtrr_file_add (sentry.base, sentry.size, sentry.type, 1, file); + if (err < 0) return err; + break; + case MTRRIOC_SET_ENTRY: + if ( !suser () ) return -EPERM; + if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) ) + return -EFAULT; + err = mtrr_add (sentry.base, sentry.size, sentry.type, 0); + if (err < 0) return err; + break; + case MTRRIOC_DEL_ENTRY: + if ( !suser () ) return -EPERM; + if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) ) + return -EFAULT; + err = mtrr_file_del (sentry.base, sentry.size, file); + if (err < 0) return err; + break; + case MTRRIOC_GET_ENTRY: + if ( copy_from_user (&gentry, (void *) arg, sizeof gentry) ) + return -EFAULT; + if ( gentry.regnum >= get_num_var_ranges () ) return -EINVAL; + get_mtrr (gentry.regnum, &gentry.base, &gentry.size, &type); + gentry.type = type; + if ( copy_to_user ( (void *) arg, &gentry, sizeof gentry) ) + return -EFAULT; + break; + } + return 0; +} /* End Function mtrr_ioctl */ + +static int mtrr_open (struct inode *ino, struct file *filep) +{ + MOD_INC_USE_COUNT; + return 0; +} /* End Function mtrr_open */ + +static int mtrr_close (struct inode *ino, struct file *file) +{ + int i, max; + unsigned int *fcount = file->private_data; + + MOD_DEC_USE_COUNT; + if (fcount == NULL) return 0; + max = get_num_var_ranges (); + for (i = 0; i < max; ++i) + { + while (fcount[i] > 0) + { + if (mtrr_del (i, 0, 0) < 0) printk ("mtrr: reg %d not used\n", i); + --fcount[i]; + } + } + kfree (fcount); + file->private_data = NULL; + return 0; +} /* End Function mtrr_close */ + +static struct file_operations mtrr_fops = +{ + NULL, /* Seek */ + mtrr_read, /* Read */ + mtrr_write, /* Write */ + NULL, /* Readdir */ + NULL, /* Poll */ + mtrr_ioctl, /* IOctl */ + NULL, /* MMAP */ + mtrr_open, /* Open */ + mtrr_close, /* Release */ + NULL, /* Fsync */ + NULL, /* Fasync */ + NULL, /* CheckMediaChange */ + NULL, /* Revalidate */ + NULL, /* Lock */ +}; + +static struct inode_operations proc_mtrr_inode_operations = { + &mtrr_fops, /* default property file-ops */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + NULL, /* readlink */ + NULL, /* follow_link */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* bmap */ + NULL, /* truncate */ + NULL /* permission */ +}; + +static struct proc_dir_entry proc_root_mtrr = { + PROC_MTRR, 4, "mtrr", + S_IFREG | S_IWUSR | S_IRUGO, 1, 0, 0, + 0, &proc_mtrr_inode_operations +}; + +static void compute_ascii (void) +{ + char factor; + int i, max; + mtrr_type type; + unsigned long base, size; + + ascii_buf_bytes = 0; + max = get_num_var_ranges (); + for (i = 0; i < max; i++) + { + get_mtrr (i, &base, &size, &type); + if (size < 1) usage_table[i] = 0; + else + { + if (size < 0x100000) + { + /* 1MB */ + factor = 'k'; + size >>= 10; + } + else + { + factor = 'M'; + size >>= 20; + } + sprintf + (ascii_buffer + ascii_buf_bytes, + "reg%02i: base=0x%08lx (%4liMB), size=%4li%cB: %s, count=%d\n", + i, base, base>>20, size, factor, + attrib_to_str (type), usage_table[i]); + ascii_buf_bytes += strlen (ascii_buffer + ascii_buf_bytes); + } + } + proc_root_mtrr.size = ascii_buf_bytes; +} /* End Function compute_ascii */ + +#endif /* CONFIG_PROC_FS */ + +EXPORT_SYMBOL(mtrr_add); +EXPORT_SYMBOL(mtrr_del); + +#if defined(__SMP__) && !defined(MODULE) + +static volatile unsigned long smp_changes_mask __initdata = 0; +static struct mtrr_state smp_mtrr_state __initdata = {0, 0}; + +__initfunc(void mtrr_init_boot_cpu (void)) +{ + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return; + printk("mtrr: v%s Richard Gooch (rgooch@atnf.csiro.au)\n", MTRR_VERSION); + + get_mtrr_state (&smp_mtrr_state); +} /* End Function mtrr_init_boot_cpu */ + +__initfunc(void mtrr_init_secondary_cpu (void)) +{ + unsigned long mask, count; + struct set_mtrr_context ctxt; + + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return; + /* Note that this is not ideal, since the cache is only flushed/disabled + for this CPU while the MTRRs are changed, but changing this requires + more invasive changes to the way the kernel boots */ + set_mtrr_prepare (&ctxt); + mask = set_mtrr_state (&smp_mtrr_state, &ctxt); + set_mtrr_done (&ctxt); + /* Use the atomic bitops to update the global mask */ + for (count = 0; count < sizeof mask * 8; ++count) + { + if (mask & 0x01) set_bit (count, &smp_changes_mask); + mask >>= 1; + } +} /* End Function mtrr_init_secondary_cpu */ + +#endif + +#ifdef MODULE +int init_module (void) +#else +__initfunc(int mtrr_init(void)) +#endif +{ +# if !defined(__SMP__) || defined(MODULE) + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return 0; + printk("mtrr: v%s Richard Gooch (rgooch@atnf.csiro.au)\n", MTRR_VERSION); +#endif + +# ifdef __SMP__ +# ifdef MODULE + copy_mtrr_state (); +# else /* MODULE */ + finalize_mtrr_state (&smp_mtrr_state); + mtrr_state_warn (smp_changes_mask); +# endif /* MODULE */ +# endif /* __SMP__ */ + +# ifdef CONFIG_PROC_FS + proc_register (&proc_root, &proc_root_mtrr); +# endif + + init_table (); + return 0; +} + +#ifdef MODULE +void cleanup_module (void) +{ + if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return; +# ifdef CONFIG_PROC_FS + proc_unregister (&proc_root, PROC_MTRR); +# endif +# ifdef __SMP__ + mtrr_hook = NULL; +# endif +} +#endif diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 6ba4e0ff8..a06477b9d 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -375,12 +375,12 @@ void machine_restart(char * __unused) registers don't have to be reloaded after switching to real mode: the values are consistent for real mode operation already. */ - __asm__ __volatile__ ("movw $0x0010,%%ax\n" - "\tmovw %%ax,%%ds\n" - "\tmovw %%ax,%%es\n" - "\tmovw %%ax,%%fs\n" - "\tmovw %%ax,%%gs\n" - "\tmovw %%ax,%%ss" : : : "eax"); + __asm__ __volatile__ ("movl $0x0010,%%eax\n" + "\tmovl %%ax,%%ds\n" + "\tmovl %%ax,%%es\n" + "\tmovl %%ax,%%fs\n" + "\tmovl %%ax,%%gs\n" + "\tmovl %%ax,%%ss" : : : "eax"); /* Jump to the 16-bit code that we copied earlier. It disables paging and the cache, switches to real mode, and jumps to the BIOS reset @@ -418,43 +418,37 @@ void show_regs(struct pt_regs * regs) 0xffff & regs->xds,0xffff & regs->xes); } +void release_segments(struct mm_struct *mm) +{ + void * ldt; + + /* forget local segments */ + __asm__ __volatile__("movl %w0,%%fs ; movl %w0,%%gs ; lldt %w0" + : /* no outputs */ + : "r" (0)); + current->tss.ldt = 0; + + ldt = mm->segments; + if (ldt) { + mm->segments = NULL; + vfree(ldt); + } +} + /* * Free current thread data structures etc.. */ - void exit_thread(void) { /* forget lazy i387 state */ if (last_task_used_math == current) last_task_used_math = NULL; - /* forget local segments */ - __asm__ __volatile__("mov %w0,%%fs ; mov %w0,%%gs ; lldt %w0" - : /* no outputs */ - : "r" (0)); - current->tss.ldt = 0; - if (current->ldt) { - void * ldt = current->ldt; - current->ldt = NULL; - vfree(ldt); - } } void flush_thread(void) { int i; - if (current->ldt) { - free_page((unsigned long) current->ldt); - current->ldt = NULL; - for (i=1 ; i<NR_TASKS ; i++) { - if (task[i] == current) { - set_ldt_desc(gdt+(i<<1)+ - FIRST_LDT_ENTRY,&default_ldt, 1); - load_ldt(i); - } - } - } - for (i=0 ; i<8 ; i++) current->debugreg[i] = 0; @@ -479,13 +473,30 @@ void release_thread(struct task_struct *dead_task) { } +void copy_segments(int nr, struct task_struct *p, struct mm_struct *new_mm) +{ + int ldt_size = 1; + void * ldt = &default_ldt; + struct mm_struct * old_mm = current->mm; + + p->tss.ldt = _LDT(nr); + if (old_mm->segments) { + new_mm->segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); + if (new_mm->segments) { + ldt = new_mm->segments; + ldt_size = LDT_ENTRIES; + memcpy(ldt, old_mm->segments, LDT_ENTRIES*LDT_ENTRY_SIZE); + } + } + set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY, ldt, ldt_size); +} + int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, struct task_struct * p, struct pt_regs * regs) { struct pt_regs * childregs; p->tss.tr = _TSS(nr); - p->tss.ldt = _LDT(nr); p->tss.es = __KERNEL_DS; p->tss.cs = __KERNEL_CS; p->tss.ss = __KERNEL_DS; @@ -508,16 +519,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, childregs->eax = 0; childregs->esp = esp; p->tss.back_link = 0; - if (p->ldt) { - p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); - if (p->ldt != NULL) - memcpy(p->ldt, current->ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); - } set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss)); - if (p->ldt) - set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,p->ldt, 512); - else - set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&default_ldt, 1); + /* * a bitmap offset pointing outside of the TSS limit causes a nicely * controllable SIGSEGV. The first sys_ioperm() call sets up the @@ -583,8 +586,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->regs.eax = regs->eax; dump->regs.ds = regs->xds; dump->regs.es = regs->xes; - __asm__("mov %%fs,%0":"=r" (dump->regs.fs)); - __asm__("mov %%gs,%0":"=r" (dump->regs.gs)); + __asm__("movl %%fs,%0":"=r" (dump->regs.fs)); + __asm__("movl %%gs,%0":"=r" (dump->regs.gs)); dump->regs.orig_eax = regs->orig_eax; dump->regs.eip = regs->eip; dump->regs.cs = regs->xcs; diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 12a777b5c..c17c13590 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -199,7 +199,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) && (tmp & 0x4) != 0x4 /* not a LDT selector */ \ && (tmp & 3) != 3) /* not a RPL3 GDT selector */ \ goto badframe; \ - __asm__ __volatile__("mov %w0,%%" #seg : : "r"(tmp)); } + __asm__ __volatile__("movl %w0,%%" #seg : : "r"(tmp)); } GET_SEG(gs); GET_SEG(fs); @@ -337,9 +337,9 @@ setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, unsigned int tmp; tmp = 0; - __asm__("mov %%gs,%w0" : "=r"(tmp): "0"(tmp)); + __asm__("movl %%gs,%w0" : "=r"(tmp): "0"(tmp)); __put_user(tmp, (unsigned int *)&sc->gs); - __asm__("mov %%fs,%w0" : "=r"(tmp): "0"(tmp)); + __asm__("movl %%fs,%w0" : "=r"(tmp): "0"(tmp)); __put_user(tmp, (unsigned int *)&sc->fs); __put_user(regs->xes, (unsigned int *)&sc->es); @@ -427,7 +427,7 @@ static void setup_frame(int sig, struct k_sigaction *ka, regs->eip = (unsigned long) ka->sa.sa_handler; { unsigned long seg = __USER_DS; - __asm__("mov %w0,%%fs ; mov %w0,%%gs": "=r"(seg) : "0"(seg)); + __asm__("movl %w0,%%fs ; movl %w0,%%gs": "=r"(seg) : "0"(seg)); set_fs(USER_DS); regs->xds = seg; regs->xes = seg; @@ -492,7 +492,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->eip = (unsigned long) ka->sa.sa_handler; { unsigned long seg = __USER_DS; - __asm__("mov %w0,%%fs ; mov %w0,%%gs": "=r"(seg) : "0"(seg)); + __asm__("movl %w0,%%fs ; movl %w0,%%gs": "=r"(seg) : "0"(seg)); set_fs(USER_DS); regs->xds = seg; regs->xes = seg; diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 9ca377128..0793410a6 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -28,6 +28,7 @@ * Alan Cox : Added EBDA scanning */ +#include <linux/config.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/timer.h> @@ -47,6 +48,10 @@ #include <asm/smp.h> #include <asm/io.h> +#ifdef CONFIG_MTRR +# include <asm/mtrr.h> +#endif + #define __KERNEL_SYSCALLS__ #include <linux/unistd.h> @@ -128,9 +133,6 @@ unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */ static int smp_activated = 0; /* Tripped once we need to start cross invalidating */ int apic_version[NR_CPUS]; /* APIC version number */ static volatile int smp_commenced=0; /* Tripped when we start scheduling */ -unsigned long apic_addr = 0xFEE00000; /* Address of APIC (defaults to 0xFEE00000) */ -unsigned long nlong = 0; /* dummy used for apic_reg address + 0x20 */ -unsigned char *apic_reg=((unsigned char *)(&nlong))-0x20;/* Later set to the ioremap() of the APIC */ unsigned long apic_retval; /* Just debugging the assembler.. */ static volatile unsigned char smp_cpu_in_msg[NR_CPUS]; /* True if this processor is sending an IPI */ @@ -150,8 +152,10 @@ const char lk_lockmsg[] = "lock from interrupt context at %p\n"; int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, }; extern int mp_irq_entries; extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; +extern int mpc_default_type; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, }; int mp_current_pci_id = 0; +unsigned long mp_lapic_addr = 0; /* #define SMP_DEBUG */ @@ -272,8 +276,8 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc)) printk("APIC at: 0x%lX\n",mpc->mpc_lapic); - /* set the local APIC address */ - apic_addr = (unsigned long)phys_to_virt((unsigned long)mpc->mpc_lapic); + /* save the local APIC address, it might be non-default */ + mp_lapic_addr = mpc->mpc_lapic; /* * Now process the configuration blocks. @@ -454,7 +458,7 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length)) */ cfg=pg0[0]; - pg0[0] = (apic_addr | 7); + pg0[0] = (mp_lapic_addr | 7); local_flush_tlb(); boot_cpu_id = GET_APIC_ID(*((volatile unsigned long *) APIC_ID)); @@ -477,6 +481,14 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length)) cpu_present_map=3; num_processors=2; printk("I/O APIC at 0xFEC00000.\n"); + + /* + * Save the default type number, we + * need it later to set the IO-APIC + * up properly: + */ + mpc_default_type = mpf->mpf_feature1; + printk("Bus #0 is "); } switch(mpf->mpf_feature1) @@ -525,11 +537,6 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length)) if(mpf->mpf_physptr) smp_read_mpc((void *)mpf->mpf_physptr); - /* - * Now that the boot CPU id is known, - * set some other information about it. - */ - nlong = boot_cpu_id<<24; /* Dummy 'self' for bootup */ __cpu_logical_map[0] = boot_cpu_id; global_irq_holder = boot_cpu_id; current->processor = boot_cpu_id; @@ -667,6 +674,10 @@ extern int cpu_idle(void * unused); */ __initfunc(int start_secondary(void *unused)) { +#ifdef CONFIG_MTRR + /* Must be done before calibration delay is computed */ + mtrr_init_secondary_cpu (); +#endif smp_callin(); while (!smp_commenced) barrier(); @@ -727,7 +738,7 @@ __initfunc(static void do_boot_cpu(int i)) /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); - printk("Booting processor %d eip %lx: ", i, start_eip); /* So we see what's up */ + printk("Booting processor %d eip %lx\n", i, start_eip); /* So we see what's up */ stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); /* @@ -906,6 +917,10 @@ __initfunc(void smp_boot_cpus(void)) int i; unsigned long cfg; +#ifdef CONFIG_MTRR + /* Must be done before other processors booted */ + mtrr_init_boot_cpu (); +#endif /* * Initialize the logical to physical cpu number mapping * and the per-CPU profiling counter/multiplier @@ -938,7 +953,7 @@ __initfunc(void smp_boot_cpus(void)) { printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n"); io_apic_irqs = 0; - return; + goto smp_done; } /* @@ -951,15 +966,6 @@ __initfunc(void smp_boot_cpus(void)) printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); } - /* - * Map the local APIC into kernel space - */ - - apic_reg = ioremap(apic_addr,4096); - - if(apic_reg == NULL) - panic("Unable to map local apic."); - #ifdef SMP_DEBUG { int reg; @@ -1106,6 +1112,12 @@ __initfunc(void smp_boot_cpus(void)) * go and set it up: */ setup_IO_APIC(); + +smp_done: +#ifdef CONFIG_MTRR + /* Must be done after other processors booted */ + mtrr_init (); +#endif } @@ -1196,6 +1208,10 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait) irq = 0x40; break; + case MSG_MTRR_CHANGE: + irq = 0x50; + break; + default: printk("Unknown SMP message %d\n", msg); return; @@ -1494,10 +1510,18 @@ asmlinkage void smp_stop_cpu_interrupt(void) for (;;) ; } +void (*mtrr_hook) (void) = NULL; + +asmlinkage void smp_mtrr_interrupt(void) +{ + ack_APIC_irq (); + if (mtrr_hook) (*mtrr_hook) (); +} + /* * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts * per second. We assume that the caller has already set up the local - * APIC at apic_addr. + * APIC. * * The APIC timer is not exactly sync with the external timer chip, it * closely follows bus clocks. diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index fdcf951f3..754e9371c 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -68,19 +68,19 @@ out: \ #define get_seg_byte(seg,addr) ({ \ register unsigned char __res; \ -__asm__("push %%fs;mov %%ax,%%fs;movb %%fs:%2,%%al;pop %%fs" \ +__asm__("pushl %%fs;movl %%ax,%%fs;movb %%fs:%2,%%al;popl %%fs" \ :"=a" (__res):"0" (seg),"m" (*(addr))); \ __res;}) #define get_seg_long(seg,addr) ({ \ register unsigned long __res; \ -__asm__("push %%fs;mov %%ax,%%fs;movl %%fs:%2,%%eax;pop %%fs" \ +__asm__("pushl %%fs;movl %%ax,%%fs;movl %%fs:%2,%%eax;popl %%fs" \ :"=a" (__res):"0" (seg),"m" (*(addr))); \ __res;}) #define _fs() ({ \ register unsigned short __res; \ -__asm__("mov %%fs,%%ax":"=a" (__res):); \ +__asm__("movl %%fs,%%ax":"=a" (__res):); \ __res;}) void page_exception(void); diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index 5ae87b06a..db7da10fc 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -255,7 +255,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk mark_screen_rdonly(tsk); unlock_kernel(); __asm__ __volatile__( - "xorl %%eax,%%eax; mov %%ax,%%fs; mov %%ax,%%gs\n\t" + "xorl %%eax,%%eax; movl %%ax,%%fs; movl %%ax,%%gs\n\t" "movl %0,%%esp\n\t" "jmp ret_from_sys_call" : /* no outputs */ |