summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1998-05-07 02:55:41 +0000
committerRalf Baechle <ralf@linux-mips.org>1998-05-07 02:55:41 +0000
commitdcec8a13bf565e47942a1751a9cec21bec5648fe (patch)
tree548b69625b18cc2e88c3e68d0923be546c9ebb03 /arch/i386/kernel
parent2e0f55e79c49509b7ff70ff1a10e1e9e90a3dfd4 (diff)
o Merge with Linux 2.1.99.
o Fix ancient bug in the ELF loader making ldd crash. o Fix ancient bug in the keyboard code for SGI, SNI and Jazz.
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile9
-rw-r--r--arch/i386/kernel/bios32.c448
-rw-r--r--arch/i386/kernel/entry.S33
-rw-r--r--arch/i386/kernel/head.S31
-rw-r--r--arch/i386/kernel/i386_ksyms.c10
-rw-r--r--arch/i386/kernel/io_apic.c127
-rw-r--r--arch/i386/kernel/ioport.c2
-rw-r--r--arch/i386/kernel/irq.c415
-rw-r--r--arch/i386/kernel/irq.h13
-rw-r--r--arch/i386/kernel/ldt.c26
-rw-r--r--arch/i386/kernel/mca.c30
-rw-r--r--arch/i386/kernel/mtrr.c1229
-rw-r--r--arch/i386/kernel/process.c85
-rw-r--r--arch/i386/kernel/signal.c10
-rw-r--r--arch/i386/kernel/smp.c70
-rw-r--r--arch/i386/kernel/traps.c6
-rw-r--r--arch/i386/kernel/vm86.c2
17 files changed, 2013 insertions, 533 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index ce1e6652d..6f63d2c97 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -21,6 +21,7 @@ O_TARGET := kernel.o
O_OBJS := process.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o
OX_OBJS := i386_ksyms.o
+MX_OBJS :=
ifdef CONFIG_PCI
O_OBJS += bios32.o
@@ -30,6 +31,14 @@ ifdef CONFIG_MCA
O_OBJS += mca.o
endif
+ifeq ($(CONFIG_MTRR),y)
+OX_OBJS += mtrr.o
+else
+ ifeq ($(CONFIG_MTRR),m)
+ MX_OBJS += mtrr.o
+ endif
+endif
+
ifdef SMP
diff --git a/arch/i386/kernel/bios32.c b/arch/i386/kernel/bios32.c
index 7e865c417..f2955918a 100644
--- a/arch/i386/kernel/bios32.c
+++ b/arch/i386/kernel/bios32.c
@@ -1,7 +1,7 @@
/*
- * bios32.c - BIOS32, PCI BIOS functions.
+ * bios32.c - Low-Level PCI Access
*
- * $Id: bios32.c,v 1.5 1997/12/02 01:48:00 ralf Exp $
+ * $Id: bios32.c,v 1.29 1998/04/17 16:31:15 mj Exp $
*
* Sponsored by
* iX Multiuser Multitasking Magazine
@@ -64,14 +64,16 @@
*
* Aug 2, 1997 : Split to PCI BIOS handling and direct PCI access parts
* and cleaned it up... Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Feb 6, 1998 : No longer using BIOS to find devices and device classes. [mj]
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/bios32.h>
#include <linux/pci.h>
#include <linux/init.h>
+#include <linux/ioport.h>
#include <asm/page.h>
#include <asm/segment.h>
@@ -85,14 +87,20 @@
#include "irq.h"
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
/*
* Generic PCI access -- indirect calls according to detected HW.
*/
struct pci_access {
int pci_present;
- int (*find_device)(unsigned short, unsigned short, unsigned short, unsigned char *, unsigned char *);
- int (*find_class)(unsigned int, unsigned short, unsigned char *, unsigned char *);
int (*read_config_byte)(unsigned char, unsigned char, unsigned char, unsigned char *);
int (*read_config_word)(unsigned char, unsigned char, unsigned char, unsigned short *);
int (*read_config_dword)(unsigned char, unsigned char, unsigned char, unsigned int *);
@@ -108,8 +116,6 @@ static int pci_stub(void)
static struct pci_access pci_access_none = {
0, /* No PCI present */
- (void *) pci_stub, /* No functions implemented */
- (void *) pci_stub,
(void *) pci_stub,
(void *) pci_stub,
(void *) pci_stub,
@@ -125,54 +131,10 @@ int pcibios_present(void)
return access_pci->pci_present;
}
-int pcibios_find_class (unsigned int class_code, unsigned short index,
- unsigned char *bus, unsigned char *device_fn)
-{
- return access_pci->find_class(class_code, index, bus, device_fn);
-}
-
-int pcibios_find_device (unsigned short vendor, unsigned short device_id,
- unsigned short index, unsigned char *bus, unsigned char *device_fn)
-{
- return access_pci->find_device(vendor, device_id, index, bus, device_fn);
-}
-
int pcibios_read_config_byte (unsigned char bus,
unsigned char device_fn, unsigned char where, unsigned char *value)
{
- int res;
-
- res = access_pci->read_config_byte(bus, device_fn, where, value);
-
-#ifdef __SMP__
-/*
- * IOAPICs can take PCI IRQs directly, lets first check the mptable:
- */
- if (where == PCI_INTERRUPT_LINE) {
- int irq;
- char pin;
-
- /*
- * get the PCI IRQ INT _physical pin_ for this device
- */
- access_pci->read_config_byte(bus, device_fn,
- PCI_INTERRUPT_PIN, &pin);
- /*
- * subtle, PCI pins are numbered starting from 1 ...
- */
- pin--;
-
- irq = IO_APIC_get_PCI_irq_vector (bus,PCI_SLOT(device_fn),pin);
- if (irq != -1)
- *value = (unsigned char) irq;
-
- printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n",
- bus,PCI_SLOT(device_fn), pin, irq);
-
- }
-#endif
-
- return res;
+ return access_pci->read_config_byte(bus, device_fn, where, value);
}
int pcibios_read_config_word (unsigned char bus,
@@ -205,60 +167,19 @@ int pcibios_write_config_dword (unsigned char bus,
return access_pci->write_config_dword(bus, device_fn, where, value);
}
-/*
- * Direct access to PCI hardware...
- */
-
-/*
- * Given the vendor and device ids, find the n'th instance of that device
- * in the system.
- */
+#define PCI_PROBE_BIOS 1
+#define PCI_PROBE_CONF1 2
+#define PCI_PROBE_CONF2 4
+#define PCI_NO_SORT 0x100
+#define PCI_BIOS_SORT 0x200
-#ifdef CONFIG_PCI_DIRECT
-
-static int pci_direct_find_device (unsigned short vendor, unsigned short device_id,
- unsigned short index, unsigned char *bus,
- unsigned char *devfn)
-{
- unsigned int curr = 0;
- struct pci_dev *dev;
-
- for (dev = pci_devices; dev; dev = dev->next) {
- if (dev->vendor == vendor && dev->device == device_id) {
- if (curr == index) {
- *devfn = dev->devfn;
- *bus = dev->bus->number;
- return PCIBIOS_SUCCESSFUL;
- }
- ++curr;
- }
- }
- return PCIBIOS_DEVICE_NOT_FOUND;
-}
+static unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
/*
- * Given the class, find the n'th instance of that device
- * in the system.
+ * Direct access to PCI hardware...
*/
-static int pci_direct_find_class (unsigned int class_code, unsigned short index,
- unsigned char *bus, unsigned char *devfn)
-{
- unsigned int curr = 0;
- struct pci_dev *dev;
-
- for (dev = pci_devices; dev; dev = dev->next) {
- if (dev->class == class_code) {
- if (curr == index) {
- *devfn = dev->devfn;
- *bus = dev->bus->number;
- return PCIBIOS_SUCCESSFUL;
- }
- ++curr;
- }
- }
- return PCIBIOS_DEVICE_NOT_FOUND;
-}
+#ifdef CONFIG_PCI_DIRECT
/*
* Functions for accessing PCI configuration space with type 1 accesses
@@ -346,8 +267,6 @@ static int pci_conf1_write_config_dword (unsigned char bus, unsigned char device
static struct pci_access pci_direct_conf1 = {
1,
- pci_direct_find_device,
- pci_direct_find_class,
pci_conf1_read_config_byte,
pci_conf1_read_config_word,
pci_conf1_read_config_dword,
@@ -458,8 +377,6 @@ static int pci_conf2_write_config_dword (unsigned char bus, unsigned char device
static struct pci_access pci_direct_conf2 = {
1,
- pci_direct_find_device,
- pci_direct_find_class,
pci_conf2_read_config_byte,
pci_conf2_read_config_word,
pci_conf2_read_config_dword,
@@ -470,39 +387,43 @@ static struct pci_access pci_direct_conf2 = {
__initfunc(static struct pci_access *pci_check_direct(void))
{
- unsigned int tmp;
- unsigned long flags;
+ unsigned int tmp;
+ unsigned long flags;
- save_flags(flags); cli();
+ save_flags(flags); cli();
+
+ /*
+ * Check if configuration type 1 works.
+ */
+ if (pci_probe & PCI_PROBE_CONF1) {
+ outb (0x01, 0xCFB);
+ tmp = inl (0xCF8);
+ outl (0x80000000, 0xCF8);
+ if (inl (0xCF8) == 0x80000000) {
+ outl (tmp, 0xCF8);
+ restore_flags(flags);
+ printk("PCI: Using configuration type 1\n");
+ return &pci_direct_conf1;
+ }
+ outl (tmp, 0xCF8);
+ }
+
+ /*
+ * Check if configuration type 2 works.
+ */
+ if (pci_probe & PCI_PROBE_CONF2) {
+ outb (0x00, 0xCFB);
+ outb (0x00, 0xCF8);
+ outb (0x00, 0xCFA);
+ if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00) {
+ restore_flags(flags);
+ printk("PCI: Using configuration type 2\n");
+ return &pci_direct_conf2;
+ }
+ }
- /*
- * Check if configuration type 1 works.
- */
- outb (0x01, 0xCFB);
- tmp = inl (0xCF8);
- outl (0x80000000, 0xCF8);
- if (inl (0xCF8) == 0x80000000) {
- outl (tmp, 0xCF8);
- restore_flags(flags);
- printk("PCI: Using configuration type 1\n");
- return &pci_direct_conf1;
- }
- outl (tmp, 0xCF8);
-
- /*
- * Check if configuration type 2 works.
- */
- outb (0x00, 0xCFB);
- outb (0x00, 0xCF8);
- outb (0x00, 0xCFA);
- if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00) {
restore_flags(flags);
- printk("PCI: Using configuration type 2\n");
- return &pci_direct_conf2;
- }
- restore_flags(flags);
- printk("PCI: PCI hardware not found (i.e., not present or not supported).\n");
- return NULL;
+ return NULL;
}
#endif
@@ -599,7 +520,7 @@ static unsigned long bios32_service(unsigned long service)
printk("bios32_service(0x%lx): not present\n", service);
return 0;
default: /* Shouldn't happen */
- printk("bios32_service(0x%lx): returned 0x%x, mail drew@colorado.edu\n",
+ printk("bios32_service(0x%lx): returned 0x%x, report to <mj@ucw.cz>.\n",
service, return_code);
return 0;
}
@@ -642,7 +563,7 @@ __initfunc(static int check_pcibios(void))
if (present_status || (signature != PCI_SIGNATURE)) {
printk ("PCI: %s: BIOS32 Service Directory says PCI BIOS is present,\n"
" but PCI_BIOS_PRESENT subfunction fails with present status of 0x%x\n"
- " and signature of 0x%08lx (%c%c%c%c). Mail drew@Colorado.EDU\n",
+ " and signature of 0x%08lx (%c%c%c%c). Report to <mj@ucw.cz>.\n",
(signature == PCI_SIGNATURE) ? "WARNING" : "ERROR",
present_status, signature,
(char) (signature >> 0), (char) (signature >> 8),
@@ -660,6 +581,8 @@ __initfunc(static int check_pcibios(void))
return 0;
}
+#if 0 /* Not used */
+
static int pci_bios_find_class (unsigned int class_code, unsigned short index,
unsigned char *bus, unsigned char *device_fn)
{
@@ -684,8 +607,10 @@ static int pci_bios_find_class (unsigned int class_code, unsigned short index,
return (int) (ret & 0xff00) >> 8;
}
-static int pci_bios_find_device (unsigned short vendor, unsigned short device_id,
- unsigned short index, unsigned char *bus, unsigned char *device_fn)
+#endif
+
+__initfunc(static int pci_bios_find_device (unsigned short vendor, unsigned short device_id,
+ unsigned short index, unsigned char *bus, unsigned char *device_fn))
{
unsigned short bx;
unsigned short ret;
@@ -847,8 +772,6 @@ static int pci_bios_write_config_dword (unsigned char bus,
static struct pci_access pci_bios_access = {
1,
- pci_bios_find_device,
- pci_bios_find_class,
pci_bios_read_config_byte,
pci_bios_read_config_word,
pci_bios_read_config_dword,
@@ -887,21 +810,17 @@ __initfunc(static struct pci_access *pci_find_bios(void))
if (sum != 0)
continue;
if (check->fields.revision != 0) {
- printk("PCI: unsupported BIOS32 revision %d at 0x%p, mail drew@colorado.edu\n",
+ printk("PCI: unsupported BIOS32 revision %d at 0x%p, report to <mj@ucw.cz>\n",
check->fields.revision, check);
continue;
}
- printk ("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
+ DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
if (check->fields.entry >= 0x100000) {
-#ifdef CONFIG_PCI_DIRECT
- printk("PCI: BIOS32 entry in high memory, trying direct PCI access.\n");
- return pci_check_direct();
-#else
- printk("PCI: BIOS32 entry in high memory, cannot use.\n");
-#endif
+ printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
+ return NULL;
} else {
bios32_entry = check->fields.entry;
- printk ("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+ DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
bios32_indirect.address = bios32_entry + PAGE_OFFSET;
if (check_pcibios())
return &pci_bios_access;
@@ -912,36 +831,237 @@ __initfunc(static struct pci_access *pci_find_bios(void))
return NULL;
}
+/*
+ * Sort the device list according to PCI BIOS.
+ */
+
+__initfunc(void pcibios_sort(void))
+{
+ struct pci_dev *dev = pci_devices;
+ struct pci_dev **last = &pci_devices;
+ struct pci_dev *d, **dd, *e;
+ int idx;
+ unsigned char bus, devfn;
+
+ DBG("PCI: Sorting device list...\n");
+ while ((e = dev)) {
+ idx = 0;
+ while (pci_bios_find_device(e->vendor, e->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) {
+ idx++;
+ for(dd=&dev; (d = *dd); dd = &d->next) {
+ if (d->bus->number == bus && d->devfn == devfn) {
+ *dd = d->next;
+ *last = d;
+ last = &d->next;
+ break;
+ }
+ }
+ if (!d)
+ printk("PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn);
+ }
+ if (!idx) {
+ printk("PCI: Device %02x:%02x not found by BIOS\n",
+ dev->bus->number, dev->devfn);
+ d = dev;
+ dev = dev->next;
+ *last = d;
+ last = &d->next;
+ }
+ }
+ *last = NULL;
+}
+
#endif
/*
- * No fixup function used.
+ * Several BIOS'es forget to assign addresses to I/O ranges.
+ * We try to fix it here, expecting there are free addresses
+ * starting with 0x5800. Ugly, but until we come with better
+ * resource management, it's the only simple solution.
*/
-__initfunc(unsigned long pcibios_fixup(unsigned long mem_start, unsigned long mem_end))
+static int pci_last_io_addr __initdata = 0x5800;
+
+__initfunc(void pcibios_fixup_io_addr(struct pci_dev *dev, int idx))
{
- return mem_start;
+ unsigned short cmd;
+ unsigned int reg = PCI_BASE_ADDRESS_0 + 4*idx;
+ unsigned int size, addr, try;
+ unsigned int bus = dev->bus->number;
+ unsigned int devfn = dev->devfn;
+
+ if (!pci_last_io_addr) {
+ printk("PCI: Unassigned I/O space for %02x:%02x\n", bus, devfn);
+ return;
+ }
+ pcibios_read_config_word(bus, devfn, PCI_COMMAND, &cmd);
+ pcibios_write_config_word(bus, devfn, PCI_COMMAND, cmd & ~PCI_COMMAND_IO);
+ pcibios_write_config_dword(bus, devfn, reg, ~0);
+ pcibios_read_config_dword(bus, devfn, reg, &size);
+ size = (~(size & PCI_BASE_ADDRESS_IO_MASK) & 0xffff) + 1;
+ addr = 0;
+ if (!size || size > 0x100)
+ printk("PCI: Unable to handle I/O allocation for %02x:%02x (%04x), tell <mj@ucw.cz>\n", bus, devfn, size);
+ else {
+ do {
+ addr = (pci_last_io_addr + size - 1) & ~(size-1);
+ pci_last_io_addr = addr + size;
+ } while (check_region(addr, size));
+ printk("PCI: Assigning I/O space %04x-%04x to device %02x:%02x\n", addr, addr+size-1, bus, devfn);
+ pcibios_write_config_dword(bus, devfn, reg, addr | PCI_BASE_ADDRESS_SPACE_IO);
+ pcibios_read_config_dword(bus, devfn, reg, &try);
+ if ((try & PCI_BASE_ADDRESS_IO_MASK) != addr) {
+ addr = 0;
+ printk("PCI: Address setup failed, got %04x\n", try);
+ } else
+ dev->base_address[idx] = try;
+ }
+ if (!addr) {
+ pcibios_write_config_dword(bus, devfn, reg, 0);
+ dev->base_address[idx] = 0;
+ }
+ pcibios_write_config_word(bus, devfn, PCI_COMMAND, cmd);
+}
+
+/*
+ * Arch-dependent fixups. We need to fix here base addresses, I/O
+ * and memory enables and IRQ's as the PCI BIOS'es are buggy as hell.
+ */
+
+__initfunc(void pcibios_fixup(void))
+{
+ struct pci_dev *dev;
+ int i, has_io, has_mem;
+ unsigned short cmd;
+
+ for(dev = pci_devices; dev; dev=dev->next) {
+ /*
+ * There are buggy BIOSes that forget to enable I/O and memory
+ * access to PCI devices. We try to fix this, but we need to
+ * be sure that the BIOS didn't forget to assign an address
+ * to the device. [mj]
+ */
+ has_io = has_mem = 0;
+ for(i=0; i<6; i++) {
+ unsigned long a = dev->base_address[i];
+ if (a & PCI_BASE_ADDRESS_SPACE_IO) {
+ has_io = 1;
+ a &= PCI_BASE_ADDRESS_IO_MASK;
+ if (!a || a == PCI_BASE_ADDRESS_IO_MASK)
+ pcibios_fixup_io_addr(dev, i);
+ } else if (a & PCI_BASE_ADDRESS_MEM_MASK)
+ has_mem = 1;
+ }
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ if (has_io && !(cmd & PCI_COMMAND_IO)) {
+ printk("PCI: Enabling I/O for device %02x:%02x\n",
+ dev->bus->number, dev->devfn);
+ cmd |= PCI_COMMAND_IO;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+ if (has_mem && !(cmd & PCI_COMMAND_MEMORY)) {
+ printk("PCI: Enabling memory for device %02x:%02x\n",
+ dev->bus->number, dev->devfn);
+ cmd |= PCI_COMMAND_MEMORY;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+#ifdef __SMP__
+ /*
+ * Recalculate IRQ numbers if we use the I/O APIC
+ */
+ {
+ int irq;
+ unsigned char pin;
+
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (pin) {
+ pin--; /* interrupt pins are numbered starting from 1 */
+ irq = IO_APIC_get_PCI_irq_vector (dev->bus->number, PCI_SLOT(dev->devfn), pin);
+ if (irq >= 0) {
+ printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n",
+ dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
+ dev->irq = irq;
+ }
+ }
+ }
+#endif
+ /*
+ * Fix out-of-range IRQ numbers and report bogus IRQ.
+ */
+ if (dev->irq >= NR_IRQS)
+ dev->irq = 0;
+ }
+
+#ifdef CONFIG_PCI_BIOS
+ if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
+ pcibios_sort();
+#endif
}
/*
- * Initialization. Try all known PCI access methods.
+ * Initialization. Try all known PCI access methods. Note that we support
+ * using both PCI BIOS and direct access: in such cases, we use I/O ports
+ * to access config space, but we still keep BIOS order of cards to be
+ * compatible with 2.0.X. This should go away in 2.3.
*/
-__initfunc(unsigned long pcibios_init(unsigned long memory_start, unsigned long memory_end))
+__initfunc(void pcibios_init(void))
{
- struct pci_access *a = NULL;
+ struct pci_access *bios = NULL;
+ struct pci_access *dir = NULL;
#ifdef CONFIG_PCI_BIOS
- a = pci_find_bios();
-#else
+ if ((pci_probe & PCI_PROBE_BIOS) && ((bios = pci_find_bios())))
+ pci_probe |= PCI_BIOS_SORT;
+#endif
#ifdef CONFIG_PCI_DIRECT
- a = pci_check_direct();
-#else
-#error "You need to set CONFIG_PCI_BIOS or CONFIG_PCI_DIRECT if you want PCI support."
+ if (pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2))
+ dir = pci_check_direct();
#endif
+ if (dir)
+ access_pci = dir;
+ else if (bios)
+ access_pci = bios;
+}
+
+#if !defined(CONFIG_PCI_BIOS) && !defined(CONFIG_PCI_DIRECT)
+#error PCI configured with neither PCI BIOS or PCI direct access support.
#endif
- if (a)
- access_pci = a;
- return memory_start;
+__initfunc(char *pcibios_setup(char *str))
+{
+ if (!strcmp(str, "off")) {
+ pci_probe = 0;
+ return NULL;
+ } else if (!strncmp(str, "io=", 3)) {
+ char *p;
+ unsigned int x = simple_strtoul(str+3, &p, 16);
+ if (p && *p)
+ return str;
+ pci_last_io_addr = x;
+ return NULL;
+ }
+#ifdef CONFIG_PCI_BIOS
+ else if (!strcmp(str, "bios")) {
+ pci_probe = PCI_PROBE_BIOS;
+ return NULL;
+ } else if (!strcmp(str, "nobios")) {
+ pci_probe &= ~PCI_PROBE_BIOS;
+ return NULL;
+ } else if (!strcmp(str, "nosort")) {
+ pci_probe |= PCI_NO_SORT;
+ return NULL;
+ }
+#endif
+#ifdef CONFIG_PCI_DIRECT
+ else if (!strcmp(str, "conf1")) {
+ pci_probe = PCI_PROBE_CONF1;
+ return NULL;
+ }
+ else if (!strcmp(str, "conf2")) {
+ pci_probe = PCI_PROBE_CONF2;
+ return NULL;
+ }
+#endif
+ return str;
}
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 14b82b45b..b6541005f 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -81,8 +81,8 @@ ENOSYS = 38
#define SAVE_ALL \
cld; \
- push %es; \
- push %ds; \
+ pushl %es; \
+ pushl %ds; \
pushl %eax; \
pushl %ebp; \
pushl %edi; \
@@ -91,8 +91,8 @@ ENOSYS = 38
pushl %ecx; \
pushl %ebx; \
movl $(__KERNEL_DS),%edx; \
- mov %dx,%ds; \
- mov %dx,%es;
+ movl %dx,%ds; \
+ movl %dx,%es;
#define RESTORE_ALL \
popl %ebx; \
@@ -102,8 +102,8 @@ ENOSYS = 38
popl %edi; \
popl %ebp; \
popl %eax; \
- pop %ds; \
- pop %es; \
+ popl %ds; \
+ popl %es; \
addl $4,%esp; \
iret
@@ -155,7 +155,7 @@ ENTRY(system_call)
jae badsys
testb $0x20,flags(%ebx) # PF_TRACESYS
jne tracesys
- call SYMBOL_NAME(sys_call_table)(,%eax,4)
+ call *SYMBOL_NAME(sys_call_table)(,%eax,4)
movl %eax,EAX(%esp) # save the return value
ALIGN
.globl ret_from_sys_call
@@ -193,7 +193,7 @@ tracesys:
movl $-ENOSYS,EAX(%esp)
call SYMBOL_NAME(syscall_trace)
movl ORIG_EAX(%esp),%eax
- call SYMBOL_NAME(sys_call_table)(,%eax,4)
+ call *SYMBOL_NAME(sys_call_table)(,%eax,4)
movl %eax,EAX(%esp) # save the return value
call SYMBOL_NAME(syscall_trace)
jmp ret_from_sys_call
@@ -231,7 +231,7 @@ ENTRY(divide_error)
pushl $ SYMBOL_NAME(do_divide_error)
ALIGN
error_code:
- push %ds
+ pushl %ds
pushl %eax
xorl %eax,%eax
pushl %ebp
@@ -241,17 +241,27 @@ error_code:
decl %eax # eax = -1
pushl %ecx
pushl %ebx
+#if 1
xorl %ecx,%ecx # zero ecx
cld
mov %es,%cx # get the lower order bits of es
+#else
+ cld
+# Some older processors leave the top 16 bits of the 32 bit destination
+# register undefined, rather than zeroed in the following instruction.
+# This won't matter when restoring or loading a segment register from the
+# stack. It may be a problem if any code reads the full 32 bit value.
+# dosemu? kernel? Would somebody like to verify that this way is really OK?
+ movl %es,%cx
+#endif
xchgl %eax, ORIG_EAX(%esp) # orig_eax (get the error code. )
movl %esp,%edx
xchgl %ecx, ES(%esp) # get the address and save es.
pushl %eax # push the error code
pushl %edx
movl $(__KERNEL_DS),%edx
- mov %dx,%ds
- mov %dx,%es
+ movl %dx,%ds
+ movl %dx,%es
GET_CURRENT(%ebx)
call *%ecx
addl $8,%esp
@@ -533,6 +543,7 @@ ENTRY(sys_call_table)
.long SYMBOL_NAME(sys_pread) /* 180 */
.long SYMBOL_NAME(sys_pwrite)
.long SYMBOL_NAME(sys_chown)
+ .long SYMBOL_NAME(sys_getcwd)
.rept NR_syscalls-182
.long SYMBOL_NAME(sys_ni_syscall)
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 048921838..86031f37f 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -45,10 +45,10 @@ startup_32:
*/
cld
movl $(__KERNEL_DS),%eax
- mov %ax,%ds
- mov %ax,%es
- mov %ax,%fs
- mov %ax,%gs
+ movl %ax,%ds
+ movl %ax,%es
+ movl %ax,%fs
+ movl %ax,%gs
#ifdef __SMP__
orw %bx,%bx
jz 1f
@@ -321,10 +321,10 @@ is386: pushl %ecx # restore original EFLAGS
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
- mov %ax,%ds # after changing gdt.
- mov %ax,%es
- mov %ax,%fs
- mov %ax,%gs
+ movl %ax,%ds # after changing gdt.
+ movl %ax,%es
+ movl %ax,%fs
+ movl %ax,%gs
#ifdef __SMP__
movl $(__KERNEL_DS), %eax
mov %ax,%ss # Reload the stack pointer (segment only)
@@ -404,16 +404,16 @@ ignore_int:
pushl %eax
pushl %ecx
pushl %edx
- push %es
- push %ds
+ pushl %es
+ pushl %ds
movl $(__KERNEL_DS),%eax
- mov %ax,%ds
- mov %ax,%es
+ movl %ax,%ds
+ movl %ax,%es
pushl $int_msg
call SYMBOL_NAME(printk)
popl %eax
- pop %ds
- pop %es
+ popl %ds
+ popl %es
popl %edx
popl %ecx
popl %eax
@@ -619,9 +619,6 @@ ENTRY(idt_table)
.fill 256,8,0 # idt is uninitialized
/*
- * This gdt setup gives the kernel a CONFIG_MAX_MEMSIZE sized address space at
- * virtual address PAGE_OFFSET.
- *
* This contains up to 8192 quadwords depending on NR_TASKS - 64kB of
* gdt entries. Ugh.
*
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index d2837d648..66dec5fed 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -8,7 +8,6 @@
#include <linux/in6.h>
#include <linux/interrupt.h>
#include <linux/smp_lock.h>
-#include <linux/pci.h>
#include <asm/semaphore.h>
#include <asm/processor.h>
@@ -64,13 +63,14 @@ EXPORT_SYMBOL(__generic_copy_to_user);
EXPORT_SYMBOL(strlen_user);
#ifdef __SMP__
-EXPORT_SYMBOL(apic_reg); /* Needed internally for the I386 inlines */
EXPORT_SYMBOL(cpu_data);
EXPORT_SYMBOL_NOVERS(kernel_flag);
EXPORT_SYMBOL_NOVERS(active_kernel_processor);
EXPORT_SYMBOL(smp_invalidate_needed);
EXPORT_SYMBOL_NOVERS(__lock_kernel);
EXPORT_SYMBOL(lk_lockmsg);
+EXPORT_SYMBOL(__cpu_logical_map);
+EXPORT_SYMBOL(smp_num_cpus);
/* Global SMP irq stuff */
EXPORT_SYMBOL(synchronize_irq);
@@ -82,6 +82,8 @@ EXPORT_SYMBOL(__global_cli);
EXPORT_SYMBOL(__global_sti);
EXPORT_SYMBOL(__global_save_flags);
EXPORT_SYMBOL(__global_restore_flags);
+EXPORT_SYMBOL(smp_message_pass);
+EXPORT_SYMBOL(mtrr_hook);
#endif
#ifdef CONFIG_MCA
@@ -97,7 +99,3 @@ EXPORT_SYMBOL(mca_set_adapter_procfn);
EXPORT_SYMBOL(mca_isenabled);
EXPORT_SYMBOL(mca_isadapter);
#endif
-
-#if CONFIG_PCI
-EXPORT_SYMBOL(pci_devices);
-#endif
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 219e7f853..6e422614e 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -28,13 +28,11 @@
#include "irq.h"
-#define IO_APIC_BASE 0xfec00000
-
/*
* volatile is justified in this case, it might change
* spontaneously, GCC should not cache it
*/
-volatile unsigned int * io_apic_reg = NULL;
+#define IO_APIC_BASE ((volatile int *)0xfec00000)
/*
* The structure of the IO-APIC:
@@ -96,17 +94,19 @@ int nr_ioapic_registers = 0; /* # of IRQ routing registers */
int mp_irq_entries = 0; /* # of MP IRQ source entries */
struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* MP IRQ source entries */
+int mpc_default_type = 0; /* non-0 if default (table-less)
+ MP configuration */
unsigned int io_apic_read (unsigned int reg)
{
- *io_apic_reg = reg;
- return *(io_apic_reg+4);
+ *IO_APIC_BASE = reg;
+ return *(IO_APIC_BASE+4);
}
void io_apic_write (unsigned int reg, unsigned int value)
{
- *io_apic_reg = reg;
- *(io_apic_reg+4) = value;
+ *IO_APIC_BASE = reg;
+ *(IO_APIC_BASE+4) = value;
}
void enable_IO_APIC_irq (unsigned int irq)
@@ -256,7 +256,7 @@ void setup_IO_APIC_irqs (void)
/*
* PCI IRQ redirection. Yes, limits are hardcoded.
*/
- if ((i>=16) && (i<=19)) {
+ if ((i>=16) && (i<=23)) {
if (pirq_entries[i-16] != -1) {
if (!pirq_entries[i-16]) {
printk("disabling PIRQ%d\n", i-16);
@@ -516,16 +516,16 @@ void print_IO_APIC (void)
static void init_sym_mode (void)
{
printk("enabling Symmetric IO mode ... ");
- outb (0x70, 0x22);
- outb (0x01, 0x23);
+ outb_p (0x70, 0x22);
+ outb_p (0x01, 0x23);
printk("...done.\n");
}
void init_pic_mode (void)
{
printk("disabling Symmetric IO mode ... ");
- outb (0x70, 0x22);
- outb (0x00, 0x23);
+ outb_p (0x70, 0x22);
+ outb_p (0x00, 0x23);
printk("...done.\n");
}
@@ -579,17 +579,85 @@ static int ioapic_blacklisted (void)
return in_ioapic_list(ioapic_blacklist);
}
+static void setup_ioapic_id (void)
+{
+ struct IO_APIC_reg_00 reg_00;
-void setup_IO_APIC (void)
+ /*
+ * 'default' mptable configurations mean a hardwired setup,
+ * 2 CPUs, 16 APIC registers. IO-APIC ID is usually set to 0,
+ * setting it to ID 2 should be fine.
+ */
+
+ /*
+ * Sanity check, is ID 2 really free? Every APIC in the
+ * system must have a unique ID or we get lots of nice
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
+ */
+ if (cpu_present_map & (1<<0x2))
+ panic("APIC ID 2 already used");
+
+ /*
+ * set the ID
+ */
+ *(int *)&reg_00 = io_apic_read(0);
+ printk("... changing IO-APIC physical APIC ID to 2 ...\n");
+ reg_00.ID = 0x2;
+ io_apic_write(0, *(int *)&reg_00);
+
+ /*
+ * Sanity check
+ */
+ *(int *)&reg_00 = io_apic_read(0);
+ if (reg_00.ID != 0x2)
+ panic("could not set ID");
+}
+
+static void construct_default_ISA_mptable (void)
{
- int i;
+ int i, pos=0;
+
+ for (i=0; i<16; i++) {
+ if (!IO_APIC_IRQ(i))
+ continue;
+
+ mp_irqs[pos].mpc_irqtype = 0;
+ mp_irqs[pos].mpc_irqflag = 0;
+ mp_irqs[pos].mpc_srcbus = 0;
+ mp_irqs[pos].mpc_srcbusirq = i;
+ mp_irqs[pos].mpc_dstapic = 0;
+ mp_irqs[pos].mpc_dstirq = i;
+ pos++;
+ }
+ mp_irq_entries = pos;
+ mp_bus_id_to_type[0] = MP_BUS_ISA;
+
/*
- * Map the IO APIC into kernel space
+ * MP specification 1.4 defines some extra rules for default
+ * configurations, fix them up here:
*/
+
+ switch (mpc_default_type)
+ {
+ case 2:
+ break;
+ default:
+ /*
+ * pin 2 is IRQ0:
+ */
+ mp_irqs[0].mpc_dstirq = 2;
+ }
- printk("mapping IO APIC from standard address.\n");
- io_apic_reg = ioremap_nocache(IO_APIC_BASE,4096);
- printk("new virtual address: %p.\n",io_apic_reg);
+ setup_ioapic_id();
+}
+
+void setup_IO_APIC (void)
+{
+ int i;
+
+ if (!pirqs_enabled)
+ for (i=0; i<MAX_PIRQS; i++)
+ pirq_entries[i]=-1;
init_sym_mode();
{
@@ -605,12 +673,6 @@ void setup_IO_APIC (void)
for (i=0; i<nr_ioapic_registers; i++)
clear_IO_APIC_irq (i);
-#if DEBUG_1
- for (i=0; i<16; i++)
- if (IO_APIC_IRQ(i))
- setup_IO_APIC_irq_ISA_default (i);
-#endif
-
/*
* the following IO-APIC's can be enabled:
*
@@ -634,7 +696,18 @@ void setup_IO_APIC (void)
io_apic_irqs = 0;
}
+ /*
+ * If there are no explicit mp irq entries: it's either one of the
+ * default configuration types or we are broken. In both cases it's
+ * fine to set up most of the low 16 IOAPIC pins to ISA defaults.
+ */
+ if (!mp_irq_entries) {
+ printk("no explicit IRQ entries, using default mptable\n");
+ construct_default_ISA_mptable();
+ }
+
init_IO_APIC_traps();
+
setup_IO_APIC_irqs ();
if (!timer_irq_works ()) {
@@ -644,9 +717,9 @@ void setup_IO_APIC (void)
printk("..MP-BIOS bug: i8254 timer not connected to IO-APIC\n");
printk("..falling back to 8259A-based timer interrupt\n");
}
-
- printk("nr of MP irq sources: %d.\n", mp_irq_entries);
- printk("nr of IOAPIC registers: %d.\n", nr_ioapic_registers);
+
+ printk("nr of MP irq sources: %d.\n", mp_irq_entries);
+ printk("nr of IOAPIC registers: %d.\n", nr_ioapic_registers);
print_IO_APIC();
}
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 44fd26530..19587312a 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -76,8 +76,6 @@ asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
return 0;
}
-unsigned int *stack;
-
/*
* sys_iopl has to be used when you want to access the IO ports
* beyond the 0x3ff range: to get the full 65536 ports bitmapped
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 95ce9fb14..2b8b86cc7 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -68,10 +68,6 @@ static unsigned int cached_irq_mask = (1<<NR_IRQS)-1;
spinlock_t irq_controller_lock;
-static unsigned int irq_events [NR_IRQS] = { -1, };
-static int disabled_irq [NR_IRQS] = { 0, };
-static int ipi_pending [NR_IRQS] = { 0, };
-
/*
* Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
* boards the timer interrupt and sometimes the keyboard interrupt is
@@ -124,11 +120,34 @@ static struct hw_interrupt_type ioapic_irq_type = {
};
#endif
-struct hw_interrupt_type *irq_handles[NR_IRQS] =
-{
- [0 ... 15] = &i8259A_irq_type /* standard ISA IRQs */
+/*
+ * Status: reason for being disabled: somebody has
+ * done a "disable_irq()" or we must not re-enter the
+ * already executing irq..
+ */
+#define IRQ_INPROGRESS 1
+#define IRQ_DISABLED 2
+
+/*
+ * This is the "IRQ descriptor", which contains various information
+ * about the irq, including what kind of hardware handling it has,
+ * whether it is disabled etc etc.
+ *
+ * Pad this out to 32 bytes for cache and indexing reasons.
+ */
+typedef struct {
+ unsigned int status; /* IRQ status - IRQ_INPROGRESS, IRQ_DISABLED */
+ unsigned int events; /* Do we have any pending events? */
+ unsigned int ipi; /* Have we sent off the pending IPI? */
+ struct hw_interrupt_type *handler; /* handle/enable/disable functions */
+ struct irqaction *action; /* IRQ action list */
+ unsigned int unused[3];
+} irq_desc_t;
+
+irq_desc_t irq_desc[NR_IRQS] = {
+ [0 ... 15] = { 0, 0, 0, &i8259A_irq_type, }, /* standard ISA IRQs */
#ifdef __SMP__
- , [16 ... NR_IRQS-1] = &ioapic_irq_type /* 'high' PCI IRQs */
+ [16 ... 23] = { 0, 0, 0, &ioapic_irq_type, }, /* 'high' PCI IRQs */
#endif
};
@@ -175,6 +194,7 @@ void set_8259A_irq_mask(unsigned int irq)
void unmask_generic_irq(unsigned int irq)
{
+ irq_desc[irq].status = 0;
if (IO_APIC_IRQ(irq))
enable_IO_APIC_irq(irq);
else {
@@ -241,6 +261,7 @@ BUILD_IRQ(23)
BUILD_SMP_INTERRUPT(reschedule_interrupt)
BUILD_SMP_INTERRUPT(invalidate_interrupt)
BUILD_SMP_INTERRUPT(stop_cpu_interrupt)
+BUILD_SMP_INTERRUPT(mtrr_interrupt)
/*
* every pentium local APIC has two 'local interrupts', with a
@@ -297,17 +318,6 @@ static struct irqaction irq13 = { math_error_irq, 0, 0, "fpu", NULL, NULL };
*/
static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
-static struct irqaction *irq_action[NR_IRQS] = {
- NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL
-#ifdef __SMP__
- ,NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL
-#endif
-};
-
int get_irq_list(char *buf)
{
int i, j;
@@ -320,7 +330,7 @@ int get_irq_list(char *buf)
*p++ = '\n';
for (i = 0 ; i < NR_IRQS ; i++) {
- action = irq_action[i];
+ action = irq_desc[i].action;
if (!action)
continue;
p += sprintf(p, "%3d: ",i);
@@ -335,7 +345,7 @@ int get_irq_list(char *buf)
if (IO_APIC_IRQ(i))
p += sprintf(p, " IO-APIC ");
else
- p += sprintf(p, " XT PIC ");
+ p += sprintf(p, " XT-PIC ");
p += sprintf(p, " %s", action->name);
for (action=action->next; action; action = action->next) {
@@ -535,20 +545,31 @@ static inline void get_irqlock(int cpu)
global_irq_holder = cpu;
}
+#define EFLAGS_IF_SHIFT 9
+
/*
* A global "cli()" while in an interrupt context
* turns into just a local cli(). Interrupts
* should use spinlocks for the (very unlikely)
* case that they ever want to protect against
* each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
*/
void __global_cli(void)
{
- int cpu = smp_processor_id();
+ unsigned int flags;
- __cli();
- if (!local_irq_count[cpu])
- get_irqlock(cpu);
+ __save_flags(flags);
+ if (flags & (1 << EFLAGS_IF_SHIFT)) {
+ int cpu = smp_processor_id();
+ __cli();
+ if (!local_irq_count[cpu])
+ get_irqlock(cpu);
+ }
}
void __global_sti(void)
@@ -560,33 +581,53 @@ void __global_sti(void)
__sti();
}
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
unsigned long __global_save_flags(void)
{
- if (!local_irq_count[smp_processor_id()])
- return global_irq_holder == (unsigned char) smp_processor_id();
- else {
- unsigned long x;
- __save_flags(x);
- return x;
+ int retval;
+ int local_enabled;
+ unsigned long flags;
+
+ __save_flags(flags);
+ local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
+ /* default to local */
+ retval = 2 + local_enabled;
+
+ /* check for global flags if we're not in an interrupt */
+ if (!local_irq_count[smp_processor_id()]) {
+ if (local_enabled)
+ retval = 1;
+ if (global_irq_holder == (unsigned char) smp_processor_id())
+ retval = 0;
}
+ return retval;
}
void __global_restore_flags(unsigned long flags)
{
- if (!local_irq_count[smp_processor_id()]) {
- switch (flags) {
- case 0:
- __global_sti();
- break;
- case 1:
- __global_cli();
- break;
- default:
- printk("global_restore_flags: %08lx (%08lx)\n",
- flags, (&flags)[-1]);
- }
- } else
- __restore_flags(flags);
+ switch (flags) {
+ case 0:
+ __global_cli();
+ break;
+ case 1:
+ __global_sti();
+ break;
+ case 2:
+ __cli();
+ break;
+ case 3:
+ __sti();
+ break;
+ default:
+ printk("global_restore_flags: %08lx (%08lx)\n",
+ flags, (&flags)[-1]);
+ }
}
#endif
@@ -597,7 +638,7 @@ static int handle_IRQ_event(unsigned int irq, struct pt_regs * regs)
int status;
status = 0;
- action = *(irq + irq_action);
+ action = irq_desc[irq].action;
if (action) {
status |= 1;
@@ -618,125 +659,26 @@ static int handle_IRQ_event(unsigned int irq, struct pt_regs * regs)
return status;
}
-
-void disable_irq(unsigned int irq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- irq_handles[irq]->disable(irq);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-
- synchronize_irq();
-}
-
/*
* disable/enable_irq() wait for all irq contexts to finish
* executing. Also it's recursive.
*/
static void disable_8259A_irq(unsigned int irq)
{
- disabled_irq[irq]++;
cached_irq_mask |= 1 << irq;
set_8259A_irq_mask(irq);
}
-#ifdef __SMP__
-static void disable_ioapic_irq(unsigned int irq)
-{
- disabled_irq[irq]++;
- /*
- * We do not disable IO-APIC irqs in hardware ...
- */
-}
-#endif
-
void enable_8259A_irq (unsigned int irq)
{
- unsigned long flags;
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (disabled_irq[irq])
- disabled_irq[irq]--;
- else {
- spin_unlock_irqrestore(&irq_controller_lock, flags);
- return;
- }
cached_irq_mask &= ~(1 << irq);
set_8259A_irq_mask(irq);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-
-#ifdef __SMP__
-void enable_ioapic_irq (unsigned int irq)
-{
- unsigned long flags, should_handle_irq;
- int cpu = smp_processor_id();
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (disabled_irq[irq])
- disabled_irq[irq]--;
- else {
- spin_unlock_irqrestore(&irq_controller_lock, flags);
- return;
- }
-#if 0
- /*
- * In the SMP+IOAPIC case it might happen that there are an unspecified
- * number of pending IRQ events unhandled. These cases are very rare,
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
- * better to do it this way as thus we dont have to be aware of
- * 'pending' interrupts in the IRQ path, except at this point.
- */
- if (!disabled_irq[irq] && irq_events[irq]) {
- if (!ipi_pending[irq]) {
- ipi_pending[irq] = 1;
- --irq_events[irq];
- send_IPI(cpu,IO_APIC_VECTOR(irq));
- }
- }
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-#else
- if (!disabled_irq[irq] && irq_events[irq]) {
- struct pt_regs regs; /* FIXME: these are fake currently */
-
- disabled_irq[irq]++;
- hardirq_enter(cpu);
- spin_unlock(&irq_controller_lock);
-
- release_irqlock(cpu);
- while (test_bit(0,&global_irq_lock)) mb();
-again:
- handle_IRQ_event(irq, &regs);
-
- spin_lock(&irq_controller_lock);
- disabled_irq[irq]--;
- should_handle_irq=0;
- if (--irq_events[irq] && !disabled_irq[irq]) {
- should_handle_irq=1;
- disabled_irq[irq]++;
- }
- spin_unlock(&irq_controller_lock);
-
- if (should_handle_irq)
- goto again;
-
- irq_exit(cpu, irq);
- __restore_flags(flags);
- } else
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-#endif
-}
-#endif
-
-void enable_irq(unsigned int irq)
-{
- irq_handles[irq]->enable(irq);
}
void make_8259A_irq (unsigned int irq)
{
io_apic_irqs &= ~(1<<irq);
- irq_handles[irq] = &i8259A_irq_type;
+ irq_desc[irq].handler = &i8259A_irq_type;
disable_irq(irq);
enable_irq(irq);
}
@@ -750,6 +692,7 @@ void make_8259A_irq (unsigned int irq)
static inline void mask_and_ack_8259A(unsigned int irq)
{
spin_lock(&irq_controller_lock);
+ irq_desc[irq].status |= IRQ_INPROGRESS;
cached_irq_mask |= 1 << irq;
if (irq & 8) {
inb(0xA1); /* DUMMY */
@@ -772,7 +715,8 @@ static void do_8259A_IRQ(unsigned int irq, int cpu, struct pt_regs * regs)
if (handle_IRQ_event(irq, regs)) {
spin_lock(&irq_controller_lock);
- unmask_8259A(irq);
+ if (!(irq_desc[irq].status &= IRQ_DISABLED))
+ unmask_8259A(irq);
spin_unlock(&irq_controller_lock);
}
@@ -780,41 +724,119 @@ static void do_8259A_IRQ(unsigned int irq, int cpu, struct pt_regs * regs)
}
#ifdef __SMP__
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we dont have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+static void enable_ioapic_irq(unsigned int irq)
+{
+ irq_desc_t *desc = irq_desc + irq;
+ if (desc->events && !desc->ipi) {
+ desc->ipi = 1;
+ send_IPI(APIC_DEST_SELF, IO_APIC_VECTOR(irq));
+ }
+}
+
+/*
+ * We do not actually disable IO-APIC irqs in hardware ...
+ */
+static void disable_ioapic_irq(unsigned int irq)
+{
+}
+
static void do_ioapic_IRQ(unsigned int irq, int cpu, struct pt_regs * regs)
{
- int should_handle_irq = 0;
+ irq_desc_t *desc = irq_desc + irq;
+
+ spin_lock(&irq_controller_lock);
+ /* Ack the irq inside the lock! */
ack_APIC_irq();
+ desc->ipi = 0;
- spin_lock(&irq_controller_lock);
- if (ipi_pending[irq])
- ipi_pending[irq] = 0;
+ /* If the irq is disabled for whatever reason, just set a flag and return */
+ if (desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)) {
+ desc->events = 1;
+ spin_unlock(&irq_controller_lock);
+ return;
+ }
- if (!irq_events[irq]++ && !disabled_irq[irq])
- should_handle_irq = 1;
+ desc->status = IRQ_INPROGRESS;
+ desc->events = 0;
hardirq_enter(cpu);
spin_unlock(&irq_controller_lock);
- if (should_handle_irq) {
- while (test_bit(0,&global_irq_lock)) mb();
-again:
- handle_IRQ_event(irq, regs);
+ while (test_bit(0,&global_irq_lock)) barrier();
+
+ for (;;) {
+ int pending;
+
+ /* If there is no IRQ handler, exit early, leaving the irq "in progress" */
+ if (!handle_IRQ_event(irq, regs))
+ goto no_handler;
spin_lock(&irq_controller_lock);
- should_handle_irq=0;
- if (--irq_events[irq] && !disabled_irq[irq])
- should_handle_irq=1;
+ pending = desc->events;
+ desc->events = 0;
+ if (!pending)
+ break;
spin_unlock(&irq_controller_lock);
-
- if (should_handle_irq)
- goto again;
}
+ desc->status &= IRQ_DISABLED;
+ spin_unlock(&irq_controller_lock);
+no_handler:
hardirq_exit(cpu);
release_irqlock(cpu);
}
+
#endif
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock.
+ */
+void disable_irq(unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ /*
+ * At this point we may actually have a pending interrupt being active
+ * on another CPU. So don't touch the IRQ_INPROGRESS bit..
+ */
+ irq_desc[irq].status |= IRQ_DISABLED;
+ irq_desc[irq].handler->disable(irq);
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+
+ synchronize_irq();
+}
+
+void enable_irq(unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ /*
+ * In contrast to the above, we should _not_ have any concurrent
+ * interrupt activity here, so we just clear both disabled bits.
+ *
+ * This allows us to have IRQ_INPROGRESS set until we actually
+ * install a handler for this interrupt (make irq autodetection
+ * work by just looking at the status field for the irq)
+ */
+ irq_desc[irq].status = 0;
+ irq_desc[irq].handler->enable(irq);
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
@@ -845,7 +867,7 @@ asmlinkage void do_IRQ(struct pt_regs regs)
int cpu = smp_processor_id();
kstat.irqs[cpu][irq]++;
- irq_handles[irq]->handle(irq, cpu, &regs);
+ irq_desc[irq].handler->handle(irq, cpu, &regs);
/*
* This should be conditional: we should really get
@@ -865,7 +887,7 @@ int setup_x86_irq(unsigned int irq, struct irqaction * new)
struct irqaction *old, **p;
unsigned long flags;
- p = irq_action + irq;
+ p = &irq_desc[irq].action;
if ((old = *p) != NULL) {
/* Can't share interrupts unless both agree to */
if (!(old->flags & new->flags & SA_SHIRQ))
@@ -890,7 +912,7 @@ int setup_x86_irq(unsigned int irq, struct irqaction * new)
spin_lock(&irq_controller_lock);
#ifdef __SMP__
if (IO_APIC_IRQ(irq)) {
- irq_handles[irq] = &ioapic_irq_type;
+ irq_desc[irq].handler = &ioapic_irq_type;
/*
* First disable it in the 8259A:
*/
@@ -948,7 +970,7 @@ void free_irq(unsigned int irq, void *dev_id)
printk("Trying to free IRQ%d\n",irq);
return;
}
- for (p = irq + irq_action; (action = *p) != NULL; p = &action->next) {
+ for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
if (action->dev_id != dev_id)
continue;
@@ -964,32 +986,29 @@ void free_irq(unsigned int irq, void *dev_id)
}
/*
- * probing is always single threaded [FIXME: is this true?]
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_INPROGRESS" asserted and the interrupt
+ * disabled.
*/
-static unsigned int probe_irqs[NR_CPUS][NR_IRQS];
-
unsigned long probe_irq_on (void)
{
- unsigned int i, j, irqs = 0;
+ unsigned int i, irqs = 0;
unsigned long delay;
/*
- * save current irq counts
- */
- memcpy(probe_irqs,kstat.irqs,NR_CPUS*NR_IRQS*sizeof(int));
-
- /*
* first, enable any unassigned irqs
*/
+ spin_lock_irq(&irq_controller_lock);
for (i = NR_IRQS-1; i > 0; i--) {
- if (!irq_action[i]) {
- unsigned long flags;
- spin_lock_irqsave(&irq_controller_lock, flags);
+ if (!irq_desc[i].action) {
unmask_generic_irq(i);
irqs |= (1 << i);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
}
}
+ spin_unlock_irq(&irq_controller_lock);
/*
* wait for spurious interrupts to increase counters
@@ -1000,35 +1019,35 @@ unsigned long probe_irq_on (void)
/*
* now filter out any obviously spurious interrupts
*/
- for (i=0; i<NR_IRQS; i++)
- for (j=0; j<NR_CPUS; j++)
- if (kstat.irqs[j][i] != probe_irqs[j][i])
- irqs &= ~(1UL << i);
+ spin_lock_irq(&irq_controller_lock);
+ for (i=0; i<NR_IRQS; i++) {
+ if (irq_desc[i].status & IRQ_INPROGRESS)
+ irqs &= ~(1UL << i);
+ }
+ spin_unlock_irq(&irq_controller_lock);
return irqs;
}
int probe_irq_off (unsigned long irqs)
{
- int i,j, irq_found = -1;
+ int i, irq_found = -1;
+ spin_lock_irq(&irq_controller_lock);
for (i=0; i<NR_IRQS; i++) {
- int sum = 0;
- for (j=0; j<NR_CPUS; j++) {
- sum += kstat.irqs[j][i];
- sum -= probe_irqs[j][i];
- }
- if (sum && (irqs & (1UL << i))) {
+ if ((irqs & 1) && (irq_desc[i].status & IRQ_INPROGRESS)) {
if (irq_found != -1) {
irq_found = -irq_found;
goto out;
- } else
- irq_found = i;
+ }
+ irq_found = i;
}
+ irqs >>= 1;
}
if (irq_found == -1)
irq_found = 0;
out:
+ spin_unlock_irq(&irq_controller_lock);
return irq_found;
}
@@ -1050,7 +1069,7 @@ void init_IO_APIC_traps(void)
for (i = 0; i < NR_IRQS ; i++)
if (IO_APIC_VECTOR(i) <= 0xfe) /* HACK */ {
if (IO_APIC_IRQ(i)) {
- irq_handles[i] = &ioapic_irq_type;
+ irq_desc[i].handler = &ioapic_irq_type;
/*
* First disable it in the 8259A:
*/
@@ -1071,10 +1090,9 @@ __initfunc(void init_IRQ(void))
outb_p(LATCH & 0xff , 0x40); /* LSB */
outb(LATCH >> 8 , 0x40); /* MSB */
- printk("INIT IRQ\n");
for (i=0; i<NR_IRQS; i++) {
- irq_events[i] = 0;
- disabled_irq[i] = 0;
+ irq_desc[i].events = 0;
+ irq_desc[i].status = 0;
}
/*
* 16 old-style INTA-cycle interrupt gates:
@@ -1110,6 +1128,9 @@ __initfunc(void init_IRQ(void))
/* self generated IPI for local APIC timer */
set_intr_gate(0x41, apic_timer_interrupt);
+ /* IPI for MTRR control */
+ set_intr_gate(0x50, mtrr_interrupt);
+
#endif
request_region(0x20,0x20,"pic1");
request_region(0xa0,0x20,"pic2");
diff --git a/arch/i386/kernel/irq.h b/arch/i386/kernel/irq.h
index 9824026dc..81795c85c 100644
--- a/arch/i386/kernel/irq.h
+++ b/arch/i386/kernel/irq.h
@@ -23,10 +23,7 @@ void init_pic_mode (void);
extern unsigned int io_apic_irqs;
-extern inline int IO_APIC_VECTOR (int irq)
-{
- return (0x51+(irq<<3));
-}
+#define IO_APIC_VECTOR(irq) (0x51+((irq)<<3))
#define MAX_IRQ_SOURCES 128
#define MAX_MP_BUSSES 32
@@ -83,8 +80,8 @@ static inline void irq_exit(int cpu, unsigned int irq)
#define SAVE_ALL \
"cld\n\t" \
- "push %es\n\t" \
- "push %ds\n\t" \
+ "pushl %es\n\t" \
+ "pushl %ds\n\t" \
"pushl %eax\n\t" \
"pushl %ebp\n\t" \
"pushl %edi\n\t" \
@@ -93,8 +90,8 @@ static inline void irq_exit(int cpu, unsigned int irq)
"pushl %ecx\n\t" \
"pushl %ebx\n\t" \
"movl $" STR(__KERNEL_DS) ",%edx\n\t" \
- "mov %dx,%ds\n\t" \
- "mov %dx,%es\n\t"
+ "movl %dx,%ds\n\t" \
+ "movl %dx,%es\n\t"
#define IRQ_NAME2(nr) nr##_interrupt(void)
#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
index 65c743195..64d4ab153 100644
--- a/arch/i386/kernel/ldt.c
+++ b/arch/i386/kernel/ldt.c
@@ -18,7 +18,7 @@
static int read_ldt(void * ptr, unsigned long bytecount)
{
- void * address = current->ldt;
+ void * address = current->mm->segments;
unsigned long size;
if (!ptr)
@@ -37,6 +37,7 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
{
struct modify_ldt_ldt_s ldt_info;
unsigned long *lp;
+ struct mm_struct * mm;
int error, i;
if (bytecount != sizeof(ldt_info))
@@ -48,19 +49,32 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
if ((ldt_info.contents == 3 && (oldmode || ldt_info.seg_not_present == 0)) || ldt_info.entry_number >= LDT_ENTRIES)
return -EINVAL;
- if (!current->ldt) {
+ mm = current->mm;
+
+ /*
+ * Horrible dependencies! Try to get rid of this. This is wrong,
+ * as it only reloads the ldt for the first process with this
+ * mm. The implications are that you should really make sure that
+ * you have a ldt before you do the first clone(), otherwise
+ * you get strange behaviour (the kernel is safe, it's just user
+ * space strangeness).
+ *
+ * For no good reason except historical, the GDT index of the LDT
+ * is chosen to follow the index number in the task[] array.
+ */
+ if (!mm->segments) {
for (i=1 ; i<NR_TASKS ; i++) {
if (task[i] == current) {
- if (!(current->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE)))
+ if (!(mm->segments = (void *) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE)))
return -ENOMEM;
- memset(current->ldt, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
- set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, current->ldt, LDT_ENTRIES);
+ memset(mm->segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, mm->segments, LDT_ENTRIES);
load_ldt(i);
}
}
}
- lp = (unsigned long *) &current->ldt[ldt_info.entry_number];
+ lp = (unsigned long *) (LDT_ENTRY_SIZE * ldt_info.entry_number + (unsigned long) mm->segments);
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0
&& (oldmode ||
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c
index de6de8f14..ae67822bc 100644
--- a/arch/i386/kernel/mca.c
+++ b/arch/i386/kernel/mca.c
@@ -62,7 +62,7 @@ static struct MCA_info* mca_info = 0;
/*--------------------------------------------------------------------*/
#ifdef CONFIG_PROC_FS
-static long mca_do_proc_init( long memory_start, long memory_end );
+static void mca_do_proc_init( void );
static int mca_default_procfn( char* buf, int slot );
static ssize_t proc_mca_read( struct file*, char*, size_t, loff_t *);
@@ -79,7 +79,7 @@ static struct inode_operations proc_mca_inode_operations = {
/*--------------------------------------------------------------------*/
-__initfunc(long mca_init(long memory_start, long memory_end))
+__initfunc(void mca_init(void))
{
unsigned int i, j;
int foundscsi = 0;
@@ -96,21 +96,14 @@ __initfunc(long mca_init(long memory_start, long memory_end))
*/
if (!MCA_bus)
- return memory_start;
+ return;
cli();
/*
* Allocate MCA_info structure (at address divisible by 8)
*/
- if( ((memory_start+7)&(~7)) > memory_end )
- {
- /* uh oh */
- return memory_start;
- }
-
- mca_info = (struct MCA_info*) ((memory_start+7)&(~7));
- memory_start = ((long)mca_info) + sizeof(struct MCA_info);
+ mca_info = kmalloc(sizeof(struct MCA_info), GFP_ATOMIC);
/*
* Make sure adapter setup is off
@@ -194,10 +187,8 @@ __initfunc(long mca_init(long memory_start, long memory_end))
request_region(0x100,0x08,"POS (MCA)");
#ifdef CONFIG_PROC_FS
- memory_start = mca_do_proc_init( memory_start, memory_end );
+ mca_do_proc_init();
#endif
-
- return memory_start;
}
/*--------------------------------------------------------------------*/
@@ -418,12 +409,12 @@ int get_mca_info(char *buf)
/*--------------------------------------------------------------------*/
-__initfunc(long mca_do_proc_init( long memory_start, long memory_end ))
+__initfunc(void mca_do_proc_init( void ))
{
int i = 0;
struct proc_dir_entry* node = 0;
- if( mca_info == 0 ) return memory_start; /* never happens */
+ if( mca_info == 0 ) return; /* never happens */
proc_register( &proc_mca, &(struct proc_dir_entry) {
PROC_MCA_REGISTERS, 3, "pos", S_IFREG|S_IRUGO,
@@ -439,11 +430,7 @@ __initfunc(long mca_do_proc_init( long memory_start, long memory_end ))
mca_info->slot[i].dev = 0;
if( ! mca_isadapter( i ) ) continue;
- if( memory_start + sizeof(struct proc_dir_entry) > memory_end ) {
- continue;
- }
- node = (struct proc_dir_entry*) memory_start;
- memory_start += sizeof(struct proc_dir_entry);
+ node = kmalloc(sizeof(struct proc_dir_entry), GFP_ATOMIC);
if( i < MCA_MAX_SLOT_NR ) {
node->low_ino = PROC_MCA_SLOT + i;
@@ -464,7 +451,6 @@ __initfunc(long mca_do_proc_init( long memory_start, long memory_end ))
proc_register( &proc_mca, node );
}
- return memory_start;
} /* mca_do_proc_init() */
/*--------------------------------------------------------------------*/
diff --git a/arch/i386/kernel/mtrr.c b/arch/i386/kernel/mtrr.c
new file mode 100644
index 000000000..f2981c5cf
--- /dev/null
+++ b/arch/i386/kernel/mtrr.c
@@ -0,0 +1,1229 @@
+/* Generic MTRR (Memory Type Range Register) driver.
+
+ Copyright (C) 1997-1998 Richard Gooch
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this library; if not, write to the Free
+ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+
+ Source: "Pentium Pro Family Developer's Manual, Volume 3:
+ Operating System Writer's Guide" (Intel document number 242692),
+ section 11.11.7
+
+ ChangeLog
+
+ Prehistory Martin Tischhäuser <martin@ikcbarka.fzk.de>
+ Initial register-setting code (from proform-1.0).
+ 19971216 Richard Gooch <rgooch@atnf.csiro.au>
+ Original version for /proc/mtrr interface, SMP-safe.
+ v1.0
+ 19971217 Richard Gooch <rgooch@atnf.csiro.au>
+ Bug fix for ioctls()'s.
+ Added sample code in Documentation/mtrr.txt
+ v1.1
+ 19971218 Richard Gooch <rgooch@atnf.csiro.au>
+ Disallow overlapping regions.
+ 19971219 Jens Maurer <jmaurer@menuett.rhein-main.de>
+ Register-setting fixups.
+ v1.2
+ 19971222 Richard Gooch <rgooch@atnf.csiro.au>
+ Fixups for kernel 2.1.75.
+ v1.3
+ 19971229 David Wragg <dpw@doc.ic.ac.uk>
+ Register-setting fixups and conformity with Intel conventions.
+ 19971229 Richard Gooch <rgooch@atnf.csiro.au>
+ Cosmetic changes and wrote this ChangeLog ;-)
+ 19980106 Richard Gooch <rgooch@atnf.csiro.au>
+ Fixups for kernel 2.1.78.
+ v1.4
+ 19980119 David Wragg <dpw@doc.ic.ac.uk>
+ Included passive-release enable code (elsewhere in PCI setup).
+ v1.5
+ 19980131 Richard Gooch <rgooch@atnf.csiro.au>
+ Replaced global kernel lock with private spinlock.
+ v1.6
+ 19980201 Richard Gooch <rgooch@atnf.csiro.au>
+ Added wait for other CPUs to complete changes.
+ v1.7
+ 19980202 Richard Gooch <rgooch@atnf.csiro.au>
+ Bug fix in definition of <set_mtrr> for UP.
+ v1.8
+ 19980319 Richard Gooch <rgooch@atnf.csiro.au>
+ Fixups for kernel 2.1.90.
+ 19980323 Richard Gooch <rgooch@atnf.csiro.au>
+ Move SMP BIOS fixup before secondary CPUs call <calibrate_delay>
+ v1.9
+ 19980325 Richard Gooch <rgooch@atnf.csiro.au>
+ Fixed test for overlapping regions: confused by adjacent regions
+ 19980326 Richard Gooch <rgooch@atnf.csiro.au>
+ Added wbinvd in <set_mtrr_prepare>.
+ 19980401 Richard Gooch <rgooch@atnf.csiro.au>
+ Bug fix for non-SMP compilation.
+ 19980418 David Wragg <dpw@doc.ic.ac.uk>
+ Fixed-MTRR synchronisation for SMP and use atomic operations
+ instead of spinlocks.
+ 19980418 Richard Gooch <rgooch@atnf.csiro.au>
+ Differentiate different MTRR register classes for BIOS fixup.
+ v1.10
+ 19980419 David Wragg <dpw@doc.ic.ac.uk>
+ Bug fix in variable MTRR synchronisation.
+ v1.11
+ 19980419 Richard Gooch <rgooch@atnf.csiro.au>
+ Fixups for kernel 2.1.97.
+ v1.12
+ 19980421 Richard Gooch <rgooch@atnf.csiro.au>
+ Safer synchronisation across CPUs when changing MTRRs.
+ v1.13
+ 19980423 Richard Gooch <rgooch@atnf.csiro.au>
+ Bugfix for SMP systems without MTRR support.
+ v1.14
+ 19980427 Richard Gooch <rgooch@atnf.csiro.au>
+ Trap calls to <mtrr_add> and <mtrr_del> on non-MTRR machines.
+ v1.15
+ 19980427 Richard Gooch <rgooch@atnf.csiro.au>
+ Use atomic bitops for setting SMP change mask.
+ v1.16
+ 19980428 Richard Gooch <rgooch@atnf.csiro.au>
+ Removed spurious diagnostic message.
+ v1.17
+ 19980429 Richard Gooch <rgooch@atnf.csiro.au>
+ Moved register-setting macros into this file.
+ Moved setup code from init/main.c to i386-specific areas.
+ v1.18
+*/
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/timer.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/wait.h>
+#include <linux/string.h>
+#include <linux/malloc.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#define MTRR_NEED_STRINGS
+#include <asm/mtrr.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/segment.h>
+#include <asm/bitops.h>
+#include <asm/smp_lock.h>
+#include <asm/atomic.h>
+#include <linux/smp.h>
+
+#define MTRR_VERSION "1.18 (19980429)"
+
+#define TRUE 1
+#define FALSE 0
+
+#define X86_FEATURE_MTRR 0x1000 /* memory type registers */
+
+#define MTRRcap_MSR 0x0fe
+#define MTRRdefType_MSR 0x2ff
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
+#define NUM_FIXED_RANGES 88
+#define MTRRfix64K_00000_MSR 0x250
+#define MTRRfix16K_80000_MSR 0x258
+#define MTRRfix16K_A0000_MSR 0x259
+#define MTRRfix4K_C0000_MSR 0x268
+#define MTRRfix4K_C8000_MSR 0x269
+#define MTRRfix4K_D0000_MSR 0x26a
+#define MTRRfix4K_D8000_MSR 0x26b
+#define MTRRfix4K_E0000_MSR 0x26c
+#define MTRRfix4K_E8000_MSR 0x26d
+#define MTRRfix4K_F0000_MSR 0x26e
+#define MTRRfix4K_F8000_MSR 0x26f
+
+#ifdef __SMP__
+# define MTRR_CHANGE_MASK_FIXED 0x01
+# define MTRR_CHANGE_MASK_VARIABLE 0x02
+# define MTRR_CHANGE_MASK_DEFTYPE 0x04
+#endif
+
+/* In the processor's MTRR interface, the MTRR type is always held in
+ an 8 bit field: */
+typedef u8 mtrr_type;
+
+#define LINE_SIZE 80
+#define JIFFIE_TIMEOUT 100
+
+#ifdef __SMP__
+# define set_mtrr(reg,base,size,type) set_mtrr_smp (reg, base, size, type)
+#else
+# define set_mtrr(reg,base,size,type) set_mtrr_up (reg, base, size, type,TRUE)
+#endif
+
+#ifndef CONFIG_PROC_FS
+# define compute_ascii() while (0)
+#endif
+
+#ifdef CONFIG_PROC_FS
+static char *ascii_buffer = NULL;
+static unsigned int ascii_buf_bytes = 0;
+#endif
+static unsigned int *usage_table = NULL;
+#ifdef __SMP__
+static spinlock_t main_lock = SPIN_LOCK_UNLOCKED;
+#endif
+
+/* Private functions */
+#ifdef CONFIG_PROC_FS
+static void compute_ascii (void);
+#endif
+
+
+struct set_mtrr_context
+{
+ unsigned long flags;
+ unsigned long deftype_lo;
+ unsigned long deftype_hi;
+ unsigned long cr4val;
+};
+
+/*
+ * Access to machine-specific registers (available on 586 and better only)
+ * Note: the rd* operations modify the parameters directly (without using
+ * pointer indirection), this allows gcc to optimize better
+ */
+#define rdmsr(msr,val1,val2) \
+ __asm__ __volatile__("rdmsr" \
+ : "=a" (val1), "=d" (val2) \
+ : "c" (msr))
+
+#define wrmsr(msr,val1,val2) \
+ __asm__ __volatile__("wrmsr" \
+ : /* no outputs */ \
+ : "c" (msr), "a" (val1), "d" (val2))
+
+#define rdtsc(low,high) \
+ __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+#define rdpmc(counter,low,high) \
+ __asm__ __volatile__("rdpmc" \
+ : "=a" (low), "=d" (high) \
+ : "c" (counter))
+
+
+/* Put the processor into a state where MTRRs can be safely set. */
+static void set_mtrr_prepare(struct set_mtrr_context *ctxt)
+{
+ unsigned long tmp;
+
+ /* disable interrupts */
+ save_flags(ctxt->flags); cli();
+
+ /* save value of CR4 and clear Page Global Enable (bit 7) */
+ asm volatile ("movl %%cr4, %0\n\t"
+ "movl %0, %1\n\t"
+ "andb $0x7f, %b1\n\t"
+ "movl %1, %%cr4\n\t"
+ : "=r" (ctxt->cr4val), "=q" (tmp) : : "memory");
+
+ /* disable and flush caches. Note that wbinvd flushes the TLBs as
+ a side-effect. */
+ asm volatile ("movl %%cr0, %0\n\t"
+ "orl $0x40000000, %0\n\t"
+ "wbinvd\n\t"
+ "movl %0, %%cr0\n\t"
+ "wbinvd\n\t"
+ : "=r" (tmp) : : "memory");
+
+ /* disable MTRRs, and set the default type to uncached. */
+ rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+ wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi);
+} /* End Function set_mtrr_prepare */
+
+
+/* Restore the processor after a set_mtrr_prepare */
+static void set_mtrr_done(struct set_mtrr_context *ctxt)
+{
+ unsigned long tmp;
+
+ /* flush caches and TLBs */
+ asm volatile ("wbinvd" : : : "memory" );
+
+ /* restore MTRRdefType */
+ wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+
+ /* enable caches */
+ asm volatile ("movl %%cr0, %0\n\t"
+ "andl $0xbfffffff, %0\n\t"
+ "movl %0, %%cr0\n\t"
+ : "=r" (tmp) : : "memory");
+
+ /* restore value of CR4 */
+ asm volatile ("movl %0, %%cr4"
+ : : "r" (ctxt->cr4val) : "memory");
+
+ /* re-enable interrupts (if enabled previously) */
+ restore_flags(ctxt->flags);
+} /* End Function set_mtrr_done */
+
+
+/* this function returns the number of variable MTRRs */
+static unsigned int get_num_var_ranges (void)
+{
+ unsigned long config, dummy;
+
+ rdmsr(MTRRcap_MSR, config, dummy);
+ return (config & 0xff);
+} /* End Function get_num_var_ranges */
+
+
+/* non-zero if we have the write-combining memory type. */
+static int have_wrcomb (void)
+{
+ unsigned long config, dummy;
+
+ rdmsr(MTRRcap_MSR, config, dummy);
+ return (config & (1<<10));
+}
+
+
+static void get_mtrr (unsigned int reg, unsigned long *base,
+ unsigned long *size, mtrr_type *type)
+{
+ unsigned long dummy, mask_lo, base_lo;
+
+ rdmsr(MTRRphysMask_MSR(reg), mask_lo, dummy);
+ if ((mask_lo & 0x800) == 0) {
+ /* Invalid (i.e. free) range. */
+ *base = 0;
+ *size = 0;
+ *type = 0;
+ return;
+ }
+
+ rdmsr(MTRRphysBase_MSR(reg), base_lo, dummy);
+
+ /* We ignore the extra address bits (32-35). If someone wants to
+ run x86 Linux on a machine with >4GB memory, this will be the
+ least of their problems. */
+
+ /* Clean up mask_lo so it gives the real address mask. */
+ mask_lo = (mask_lo & 0xfffff000UL);
+
+ /* This works correctly if size is a power of two, i.e. a
+ contiguous range. */
+ *size = ~(mask_lo - 1);
+
+ *base = (base_lo & 0xfffff000UL);
+ *type = (base_lo & 0xff);
+} /* End Function get_mtrr */
+
+
+static void set_mtrr_up (unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type, int do_safe)
+/* [SUMMARY] Set variable MTRR register on the local CPU.
+ <reg> The register to set.
+ <base> The base address of the region.
+ <size> The size of the region. If this is 0 the region is disabled.
+ <type> The type of the region.
+ <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+ be done externally.
+*/
+{
+ struct set_mtrr_context ctxt;
+
+ if (do_safe) set_mtrr_prepare (&ctxt);
+ if (size == 0)
+ {
+ /* The invalid bit is kept in the mask, so we simply clear the
+ relevant mask register to disable a range. */
+ wrmsr (MTRRphysMask_MSR (reg), 0, 0);
+ }
+ else
+ {
+ wrmsr (MTRRphysBase_MSR (reg), base | type, 0);
+ wrmsr (MTRRphysMask_MSR (reg), ~(size - 1) | 0x800, 0);
+ }
+ if (do_safe) set_mtrr_done (&ctxt);
+} /* End Function set_mtrr_up */
+
+
+#ifdef __SMP__
+
+struct mtrr_var_range
+{
+ unsigned long base_lo;
+ unsigned long base_hi;
+ unsigned long mask_lo;
+ unsigned long mask_hi;
+};
+
+
+/* Get the MSR pair relating to a var range. */
+__initfunc(static void get_mtrr_var_range (unsigned int index,
+ struct mtrr_var_range *vr))
+{
+ rdmsr (MTRRphysBase_MSR (index), vr->base_lo, vr->base_hi);
+ rdmsr (MTRRphysMask_MSR (index), vr->mask_lo, vr->mask_hi);
+} /* End Function get_mtrr_var_range */
+
+
+/* Set the MSR pair relating to a var range. Returns TRUE if
+ changes are made. */
+__initfunc(static int set_mtrr_var_range_testing (unsigned int index,
+ struct mtrr_var_range *vr))
+{
+ unsigned int lo, hi;
+ int changed = FALSE;
+
+ rdmsr(MTRRphysBase_MSR(index), lo, hi);
+
+ if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
+ || (vr->base_hi & 0xfUL) != (hi & 0xfUL)) {
+ wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
+ changed = TRUE;
+ }
+
+ rdmsr(MTRRphysMask_MSR(index), lo, hi);
+
+ if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
+ || (vr->mask_hi & 0xfUL) != (hi & 0xfUL)) {
+ wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
+ changed = TRUE;
+ }
+
+ return changed;
+}
+
+
+__initfunc(static void get_fixed_ranges(mtrr_type *frs))
+{
+ unsigned long *p = (unsigned long *)frs;
+ int i;
+
+ rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+
+ for (i = 0; i < 2; i++)
+ rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i*2], p[3 + i*2]);
+
+ for (i = 0; i < 8; i++)
+ rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i*2], p[7 + i*2]);
+}
+
+
+__initfunc(static int set_fixed_ranges_testing(mtrr_type *frs))
+{
+ unsigned long *p = (unsigned long *)frs;
+ int changed = FALSE;
+ int i;
+ unsigned long lo, hi;
+
+ rdmsr(MTRRfix64K_00000_MSR, lo, hi);
+ if (p[0] != lo || p[1] != hi) {
+ wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+ changed = TRUE;
+ }
+
+ for (i = 0; i < 2; i++) {
+ rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
+ if (p[2 + i*2] != lo || p[3 + i*2] != hi) {
+ wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i*2], p[3 + i*2]);
+ changed = TRUE;
+ }
+ }
+
+ for (i = 0; i < 8; i++) {
+ rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
+ if (p[6 + i*2] != lo || p[7 + i*2] != hi) {
+ wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i*2], p[7 + i*2]);
+ changed = TRUE;
+ }
+ }
+
+ return changed;
+}
+
+
+struct mtrr_state
+{
+ unsigned int num_var_ranges;
+ struct mtrr_var_range *var_ranges;
+ mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+ unsigned char enabled;
+ mtrr_type def_type;
+};
+
+
+/* Grab all of the mtrr state for this cpu into *state. */
+__initfunc(static void get_mtrr_state(struct mtrr_state *state))
+{
+ unsigned int nvrs, i;
+ struct mtrr_var_range *vrs;
+ unsigned long lo, dummy;
+
+ nvrs = state->num_var_ranges = get_num_var_ranges();
+ vrs = state->var_ranges
+ = kmalloc(nvrs * sizeof(struct mtrr_var_range), GFP_KERNEL);
+ if (vrs == NULL)
+ nvrs = state->num_var_ranges = 0;
+
+ for (i = 0; i < nvrs; i++)
+ get_mtrr_var_range(i, &vrs[i]);
+
+ get_fixed_ranges(state->fixed_ranges);
+
+ rdmsr(MTRRdefType_MSR, lo, dummy);
+ state->def_type = (lo & 0xff);
+ state->enabled = (lo & 0xc00) >> 10;
+} /* End Function get_mtrr_state */
+
+
+/* Free resources associated with a struct mtrr_state */
+__initfunc(static void finalize_mtrr_state(struct mtrr_state *state))
+{
+ if (state->var_ranges) kfree (state->var_ranges);
+} /* End Function finalize_mtrr_state */
+
+
+__initfunc(static unsigned long set_mtrr_state (struct mtrr_state *state,
+ struct set_mtrr_context *ctxt))
+/* [SUMMARY] Set the MTRR state for this CPU.
+ <state> The MTRR state information to read.
+ <ctxt> Some relevant CPU context.
+ [NOTE] The CPU must already be in a safe state for MTRR changes.
+ [RETURNS] 0 if no changes made, else a mask indication what was changed.
+*/
+{
+ unsigned int i;
+ unsigned long change_mask = 0;
+
+ for (i = 0; i < state->num_var_ranges; i++)
+ if (set_mtrr_var_range_testing(i, &state->var_ranges[i]))
+ change_mask |= MTRR_CHANGE_MASK_VARIABLE;
+
+ if (set_fixed_ranges_testing(state->fixed_ranges))
+ change_mask |= MTRR_CHANGE_MASK_FIXED;
+
+ /* set_mtrr_restore restores the old value of MTRRdefType,
+ so to set it we fiddle with the saved value. */
+ if ((ctxt->deftype_lo & 0xff) != state->def_type
+ || ((ctxt->deftype_lo & 0xc00) >> 10) != state->enabled)
+ {
+ ctxt->deftype_lo |= (state->def_type | state->enabled << 10);
+ change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
+ }
+
+ return change_mask;
+} /* End Function set_mtrr_state */
+
+
+static atomic_t undone_count;
+static void (*handler_func) (struct set_mtrr_context *ctxt, void *info);
+static void *handler_info;
+static volatile int wait_barrier_execute = FALSE;
+static volatile int wait_barrier_cache_enable = FALSE;
+
+static void sync_handler (void)
+/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
+ [RETURNS] Nothing.
+*/
+{
+ struct set_mtrr_context ctxt;
+
+ set_mtrr_prepare (&ctxt);
+ /* Notify master CPU that I'm at the barrier and then wait */
+ atomic_dec (&undone_count);
+ while (wait_barrier_execute) barrier ();
+ /* The master has cleared me to execute */
+ (*handler_func) (&ctxt, handler_info);
+ /* Notify master CPU that I've executed the function */
+ atomic_dec (&undone_count);
+ /* Wait for master to clear me to enable cache and return */
+ while (wait_barrier_cache_enable) barrier ();
+ set_mtrr_done (&ctxt);
+} /* End Function sync_handler */
+
+static void do_all_cpus (void (*handler) (struct set_mtrr_context *ctxt,
+ void *info),
+ void *info, int local)
+/* [SUMMARY] Execute a function on all CPUs, with caches flushed and disabled.
+ [PURPOSE] This function will synchronise all CPUs, flush and disable caches
+ on all CPUs, then call a specified function. When the specified function
+ finishes on all CPUs, caches are enabled on all CPUs.
+ <handler> The function to execute.
+ <info> An arbitrary information pointer which is passed to <<handler>>.
+ <local> If TRUE <<handler>> is executed locally.
+ [RETURNS] Nothing.
+*/
+{
+ unsigned long timeout;
+ struct set_mtrr_context ctxt;
+
+ mtrr_hook = sync_handler;
+ handler_func = handler;
+ handler_info = info;
+ wait_barrier_execute = TRUE;
+ wait_barrier_cache_enable = TRUE;
+ /* Send a message to all other CPUs and wait for them to enter the
+ barrier */
+ atomic_set (&undone_count, smp_num_cpus - 1);
+ smp_message_pass (MSG_ALL_BUT_SELF, MSG_MTRR_CHANGE, 0, 0);
+ /* Wait for it to be done */
+ timeout = jiffies + JIFFIE_TIMEOUT;
+ while ( (atomic_read (&undone_count) > 0) && (jiffies < timeout) )
+ barrier ();
+ if (atomic_read (&undone_count) > 0)
+ {
+ panic ("mtrr: timed out waiting for other CPUs\n");
+ }
+ mtrr_hook = NULL;
+ /* All other CPUs should be waiting for the barrier, with their caches
+ already flushed and disabled. Prepare for function completion
+ notification */
+ atomic_set (&undone_count, smp_num_cpus - 1);
+ /* Flush and disable the local CPU's cache and release the barier, which
+ should cause the other CPUs to execute the function. Also execute it
+ locally if required */
+ set_mtrr_prepare (&ctxt);
+ wait_barrier_execute = FALSE;
+ if (local) (*handler) (&ctxt, info);
+ /* Now wait for other CPUs to complete the function */
+ while (atomic_read (&undone_count) > 0) barrier ();
+ /* Now all CPUs should have finished the function. Release the barrier to
+ allow them to re-enable their caches and return from their interrupt,
+ then enable the local cache and return */
+ wait_barrier_cache_enable = FALSE;
+ set_mtrr_done (&ctxt);
+ handler_func = NULL;
+ handler_info = NULL;
+} /* End Function do_all_cpus */
+
+
+struct set_mtrr_data
+{
+ unsigned long smp_base;
+ unsigned long smp_size;
+ unsigned int smp_reg;
+ mtrr_type smp_type;
+};
+
+static void set_mtrr_handler (struct set_mtrr_context *ctxt, void *info)
+{
+ struct set_mtrr_data *data = info;
+
+ set_mtrr_up (data->smp_reg, data->smp_base, data->smp_size, data->smp_type,
+ FALSE);
+} /* End Function set_mtrr_handler */
+
+static void set_mtrr_smp (unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
+{
+ struct set_mtrr_data data;
+
+ data.smp_reg = reg;
+ data.smp_base = base;
+ data.smp_size = size;
+ data.smp_type = type;
+ do_all_cpus (set_mtrr_handler, &data, TRUE);
+} /* End Function set_mtrr_smp */
+
+
+/* A warning that is common to the module and non-module cases. */
+/* Some BIOS's are fucked and don't set all MTRRs the same! */
+#ifdef MODULE
+static void mtrr_state_warn (unsigned long mask)
+#else
+__initfunc(static void mtrr_state_warn (unsigned long mask))
+#endif
+{
+ if (!mask) return;
+ if (mask & MTRR_CHANGE_MASK_FIXED)
+ printk ("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+ if (mask & MTRR_CHANGE_MASK_VARIABLE)
+ printk ("mtrr: your CPUs had inconsistent variable MTRR settings\n");
+ if (mask & MTRR_CHANGE_MASK_DEFTYPE)
+ printk ("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+ printk ("mtrr: probably your BIOS does not setup all CPUs\n");
+} /* End Function mtrr_state_warn */
+
+#ifdef MODULE
+/* As a module, copy the MTRR state using an IPI handler. */
+
+static volatile unsigned long smp_changes_mask = 0;
+
+static void copy_mtrr_state_handler (struct set_mtrr_context *ctxt, void *info)
+{
+ unsigned long mask, count;
+ struct mtrr_state *smp_mtrr_state = info;
+
+ mask = set_mtrr_state (smp_mtrr_state, ctxt);
+ /* Use the atomic bitops to update the global mask */
+ for (count = 0; count < sizeof mask * 8; ++count)
+ {
+ if (mask & 0x01) set_bit (count, &smp_changes_mask);
+ mask >>= 1;
+ }
+} /* End Function copy_mtrr_state_handler */
+
+/* Copies the entire MTRR state of this cpu to all the others. */
+static void copy_mtrr_state (void)
+{
+ struct mtrr_state ms;
+
+ get_mtrr_state (&ms);
+ do_all_cpus (copy_mtrr_state_handler, &ms, FALSE);
+ finalize_mtrr_state (&ms);
+ mtrr_state_warn (smp_changes_mask);
+} /* End Function copy_mtrr_state */
+
+#endif /* MODULE */
+#endif /* __SMP__ */
+
+static char *attrib_to_str (int x)
+{
+ return (x <= 6) ? mtrr_strings[x] : "?";
+} /* End Function attrib_to_str */
+
+static void init_table (void)
+{
+ int i, max;
+
+ max = get_num_var_ranges ();
+ if ( ( usage_table = kmalloc (max * sizeof *usage_table, GFP_KERNEL) )
+ == NULL )
+ {
+ printk ("mtrr: could not allocate\n");
+ return;
+ }
+ for (i = 0; i < max; i++) usage_table[i] = 1;
+#ifdef CONFIG_PROC_FS
+ if ( ( ascii_buffer = kmalloc (max * LINE_SIZE, GFP_KERNEL) ) == NULL )
+ {
+ printk ("mtrr: could not allocate\n");
+ return;
+ }
+ ascii_buf_bytes = 0;
+ compute_ascii ();
+#endif
+} /* End Function init_table */
+
+int mtrr_add (unsigned long base, unsigned long size, unsigned int type,
+ char increment)
+/* [SUMMARY] Add an MTRR entry.
+ <base> The starting (base) address of the region.
+ <size> The size (in bytes) of the region.
+ <type> The type of the new region.
+ <increment> If true and the region already exists, the usage count will be
+ incremented.
+ [RETURNS] The MTRR register on success, else a negative number indicating
+ the error code.
+ [NOTE] This routine uses a spinlock.
+*/
+{
+ int i, max;
+ mtrr_type ltype;
+ unsigned long lbase, lsize, last;
+
+ if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return -ENODEV;
+ if ( (base & 0xfff) || (size & 0xfff) )
+ {
+ printk ("mtrr: size and base must be multiples of 4kB\n");
+ printk ("mtrr: size: %lx base: %lx\n", size, base);
+ return -EINVAL;
+ }
+ if (base + size < 0x100000)
+ {
+ printk ("mtrr: cannot set region below 1 MByte (0x%lx,0x%lx)\n",
+ base, size);
+ return -EINVAL;
+ }
+ /* Check upper bits of base and last are equal and lower bits are 0 for
+ base and 1 for last */
+ last = base + size - 1;
+ for (lbase = base; !(lbase & 1) && (last & 1);
+ lbase = lbase >> 1, last = last >> 1);
+ if (lbase != last)
+ {
+ printk ("mtrr: base(0x%lx) is not aligned on a size(0x%lx) boundary\n",
+ base, size);
+ return -EINVAL;
+ }
+ if (type >= MTRR_NUM_TYPES)
+ {
+ printk ("mtrr: type: %u illegal\n", type);
+ return -EINVAL;
+ }
+ /* If the type is WC, check that this processor supports it */
+ if ( (type == MTRR_TYPE_WRCOMB) && !have_wrcomb () )
+ {
+ printk ("mtrr: your processor doesn't support write-combining\n");
+ return -ENOSYS;
+ }
+ increment = increment ? 1 : 0;
+ max = get_num_var_ranges ();
+ /* Search for existing MTRR */
+ spin_lock (&main_lock);
+ for (i = 0; i < max; ++i)
+ {
+ get_mtrr (i, &lbase, &lsize, &ltype);
+ if (base >= lbase + lsize) continue;
+ if ( (base < lbase) && (base + size <= lbase) ) continue;
+ /* At this point we know there is some kind of overlap/enclosure */
+ if ( (base < lbase) || (base + size > lbase + lsize) )
+ {
+ spin_unlock (&main_lock);
+ printk ("mtrr: 0x%lx,0x%lx overlaps existing 0x%lx,0x%lx\n",
+ base, size, lbase, lsize);
+ return -EINVAL;
+ }
+ if (ltype != type)
+ {
+ spin_unlock (&main_lock);
+ printk ( "mtrr: type missmatch for %lx,%lx old: %s new: %s\n",
+ base, size, attrib_to_str (ltype), attrib_to_str (type) );
+ return -EINVAL;
+ }
+ if (increment) ++usage_table[i];
+ compute_ascii ();
+ spin_unlock (&main_lock);
+ return i;
+ }
+ /* Search for an empty MTRR */
+ for (i = 0; i < max; ++i)
+ {
+ get_mtrr (i, &lbase, &lsize, &ltype);
+ if (lsize > 0) continue;
+ set_mtrr (i, base, size, type);
+ usage_table[i] = 1;
+ compute_ascii ();
+ spin_unlock (&main_lock);
+ return i;
+ }
+ spin_unlock (&main_lock);
+ printk ("mtrr: no more MTRRs available\n");
+ return -ENOSPC;
+} /* End Function mtrr_add */
+
+int mtrr_del (int reg, unsigned long base, unsigned long size)
+/* [SUMMARY] Delete MTRR/decrement usage count.
+ <reg> The register. If this is less than 0 then <<base>> and <<size>> must
+ be supplied.
+ <base> The base address of the region. This is ignored if <<reg>> is >= 0.
+ <size> The size of the region. This is ignored if <<reg>> is >= 0.
+ [RETURNS] The register on success, else a negative number indicating
+ the error code.
+ [NOTE] This routine uses a spinlock.
+*/
+{
+ int i, max;
+ mtrr_type ltype;
+ unsigned long lbase, lsize;
+
+ if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return -ENODEV;
+ max = get_num_var_ranges ();
+ spin_lock (&main_lock);
+ if (reg < 0)
+ {
+ /* Search for existing MTRR */
+ for (i = 0; i < max; ++i)
+ {
+ get_mtrr (i, &lbase, &lsize, &ltype);
+ if ( (lbase == base) && (lsize == size) )
+ {
+ reg = i;
+ break;
+ }
+ }
+ if (reg < 0)
+ {
+ spin_unlock (&main_lock);
+ printk ("mtrr: no MTRR for %lx,%lx found\n", base, size);
+ return -EINVAL;
+ }
+ }
+ if (reg >= max)
+ {
+ spin_unlock (&main_lock);
+ printk ("mtrr: register: %d too big\n", reg);
+ return -EINVAL;
+ }
+ get_mtrr (reg, &lbase, &lsize, &ltype);
+ if (lsize < 1)
+ {
+ spin_unlock (&main_lock);
+ printk ("mtrr: MTRR %d not used\n", reg);
+ return -EINVAL;
+ }
+ if (usage_table[reg] < 1)
+ {
+ spin_unlock (&main_lock);
+ printk ("mtrr: reg: %d has count=0\n", reg);
+ return -EINVAL;
+ }
+ if (--usage_table[reg] < 1) set_mtrr (reg, 0, 0, 0);
+ compute_ascii ();
+ spin_unlock (&main_lock);
+ return reg;
+} /* End Function mtrr_del */
+
+#ifdef CONFIG_PROC_FS
+
+static int mtrr_file_add (unsigned long base, unsigned long size,
+ unsigned int type, char increment, struct file *file)
+{
+ int reg, max;
+ unsigned int *fcount = file->private_data;
+
+ max = get_num_var_ranges ();
+ if (fcount == NULL)
+ {
+ if ( ( fcount = kmalloc (max * sizeof *fcount, GFP_KERNEL) ) == NULL )
+ {
+ printk ("mtrr: could not allocate\n");
+ return -ENOMEM;
+ }
+ memset (fcount, 0, max * sizeof *fcount);
+ file->private_data = fcount;
+ }
+ reg = mtrr_add (base, size, type, 1);
+ if (reg >= 0) ++fcount[reg];
+ return reg;
+} /* End Function mtrr_file_add */
+
+static int mtrr_file_del (unsigned long base, unsigned long size,
+ struct file *file)
+{
+ int reg;
+ unsigned int *fcount = file->private_data;
+
+ reg = mtrr_del (-1, base, size);
+ if (reg < 0) return reg;
+ if (fcount != NULL) --fcount[reg];
+ return reg;
+} /* End Function mtrr_file_del */
+
+static ssize_t mtrr_read (struct file *file, char *buf, size_t len,
+ loff_t *ppos)
+{
+ if (*ppos >= ascii_buf_bytes) return 0;
+ if (*ppos + len > ascii_buf_bytes) len = ascii_buf_bytes - *ppos;
+ if ( copy_to_user (buf, ascii_buffer + *ppos, len) ) return -EFAULT;
+ *ppos += len;
+ return len;
+} /* End Function mtrr_read */
+
+static ssize_t mtrr_write (struct file *file, const char *buf, size_t len,
+ loff_t *ppos)
+/* Format of control line:
+ "base=%lx size=%lx type=%s" OR:
+ "disable=%d"
+*/
+{
+ int i, err;
+ unsigned long reg, base, size;
+ char *ptr;
+ char line[LINE_SIZE];
+
+ if ( !suser () ) return -EPERM;
+ /* Can't seek (pwrite) on this device */
+ if (ppos != &file->f_pos) return -ESPIPE;
+ memset (line, 0, LINE_SIZE);
+ if (len > LINE_SIZE) len = LINE_SIZE;
+ if ( copy_from_user (line, buf, len - 1) ) return -EFAULT;
+ ptr = line + strlen (line) - 1;
+ if (*ptr == '\n') *ptr = '\0';
+ if ( !strncmp (line, "disable=", 8) )
+ {
+ reg = simple_strtoul (line + 8, &ptr, 0);
+ err = mtrr_del (reg, 0, 0);
+ if (err < 0) return err;
+ return len;
+ }
+ if ( strncmp (line, "base=", 5) )
+ {
+ printk ("mtrr: no \"base=\" in line: \"%s\"\n", line);
+ return -EINVAL;
+ }
+ base = simple_strtoul (line + 5, &ptr, 0);
+ for (; isspace (*ptr); ++ptr);
+ if ( strncmp (ptr, "size=", 5) )
+ {
+ printk ("mtrr: no \"size=\" in line: \"%s\"\n", line);
+ return -EINVAL;
+ }
+ size = simple_strtoul (ptr + 5, &ptr, 0);
+ for (; isspace (*ptr); ++ptr);
+ if ( strncmp (ptr, "type=", 5) )
+ {
+ printk ("mtrr: no \"type=\" in line: \"%s\"\n", line);
+ return -EINVAL;
+ }
+ ptr += 5;
+ for (; isspace (*ptr); ++ptr);
+ for (i = 0; i < MTRR_NUM_TYPES; ++i)
+ {
+ if ( strcmp (ptr, mtrr_strings[i]) ) continue;
+ err = mtrr_add (base, size, i, 1);
+ if (err < 0) return err;
+ return len;
+ }
+ printk ("mtrr: illegal type: \"%s\"\n", ptr);
+ return -EINVAL;
+} /* End Function mtrr_write */
+
+static int mtrr_ioctl (struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ int err;
+ mtrr_type type;
+ struct mtrr_sentry sentry;
+ struct mtrr_gentry gentry;
+
+ switch (cmd)
+ {
+ default:
+ return -ENOIOCTLCMD;
+ case MTRRIOC_ADD_ENTRY:
+ if ( !suser () ) return -EPERM;
+ if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+ return -EFAULT;
+ err = mtrr_file_add (sentry.base, sentry.size, sentry.type, 1, file);
+ if (err < 0) return err;
+ break;
+ case MTRRIOC_SET_ENTRY:
+ if ( !suser () ) return -EPERM;
+ if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+ return -EFAULT;
+ err = mtrr_add (sentry.base, sentry.size, sentry.type, 0);
+ if (err < 0) return err;
+ break;
+ case MTRRIOC_DEL_ENTRY:
+ if ( !suser () ) return -EPERM;
+ if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+ return -EFAULT;
+ err = mtrr_file_del (sentry.base, sentry.size, file);
+ if (err < 0) return err;
+ break;
+ case MTRRIOC_GET_ENTRY:
+ if ( copy_from_user (&gentry, (void *) arg, sizeof gentry) )
+ return -EFAULT;
+ if ( gentry.regnum >= get_num_var_ranges () ) return -EINVAL;
+ get_mtrr (gentry.regnum, &gentry.base, &gentry.size, &type);
+ gentry.type = type;
+ if ( copy_to_user ( (void *) arg, &gentry, sizeof gentry) )
+ return -EFAULT;
+ break;
+ }
+ return 0;
+} /* End Function mtrr_ioctl */
+
+static int mtrr_open (struct inode *ino, struct file *filep)
+{
+ MOD_INC_USE_COUNT;
+ return 0;
+} /* End Function mtrr_open */
+
+static int mtrr_close (struct inode *ino, struct file *file)
+{
+ int i, max;
+ unsigned int *fcount = file->private_data;
+
+ MOD_DEC_USE_COUNT;
+ if (fcount == NULL) return 0;
+ max = get_num_var_ranges ();
+ for (i = 0; i < max; ++i)
+ {
+ while (fcount[i] > 0)
+ {
+ if (mtrr_del (i, 0, 0) < 0) printk ("mtrr: reg %d not used\n", i);
+ --fcount[i];
+ }
+ }
+ kfree (fcount);
+ file->private_data = NULL;
+ return 0;
+} /* End Function mtrr_close */
+
+static struct file_operations mtrr_fops =
+{
+ NULL, /* Seek */
+ mtrr_read, /* Read */
+ mtrr_write, /* Write */
+ NULL, /* Readdir */
+ NULL, /* Poll */
+ mtrr_ioctl, /* IOctl */
+ NULL, /* MMAP */
+ mtrr_open, /* Open */
+ mtrr_close, /* Release */
+ NULL, /* Fsync */
+ NULL, /* Fasync */
+ NULL, /* CheckMediaChange */
+ NULL, /* Revalidate */
+ NULL, /* Lock */
+};
+
+static struct inode_operations proc_mtrr_inode_operations = {
+ &mtrr_fops, /* default property file-ops */
+ NULL, /* create */
+ NULL, /* lookup */
+ NULL, /* link */
+ NULL, /* unlink */
+ NULL, /* symlink */
+ NULL, /* mkdir */
+ NULL, /* rmdir */
+ NULL, /* mknod */
+ NULL, /* rename */
+ NULL, /* readlink */
+ NULL, /* follow_link */
+ NULL, /* readpage */
+ NULL, /* writepage */
+ NULL, /* bmap */
+ NULL, /* truncate */
+ NULL /* permission */
+};
+
+static struct proc_dir_entry proc_root_mtrr = {
+ PROC_MTRR, 4, "mtrr",
+ S_IFREG | S_IWUSR | S_IRUGO, 1, 0, 0,
+ 0, &proc_mtrr_inode_operations
+};
+
+static void compute_ascii (void)
+{
+ char factor;
+ int i, max;
+ mtrr_type type;
+ unsigned long base, size;
+
+ ascii_buf_bytes = 0;
+ max = get_num_var_ranges ();
+ for (i = 0; i < max; i++)
+ {
+ get_mtrr (i, &base, &size, &type);
+ if (size < 1) usage_table[i] = 0;
+ else
+ {
+ if (size < 0x100000)
+ {
+ /* 1MB */
+ factor = 'k';
+ size >>= 10;
+ }
+ else
+ {
+ factor = 'M';
+ size >>= 20;
+ }
+ sprintf
+ (ascii_buffer + ascii_buf_bytes,
+ "reg%02i: base=0x%08lx (%4liMB), size=%4li%cB: %s, count=%d\n",
+ i, base, base>>20, size, factor,
+ attrib_to_str (type), usage_table[i]);
+ ascii_buf_bytes += strlen (ascii_buffer + ascii_buf_bytes);
+ }
+ }
+ proc_root_mtrr.size = ascii_buf_bytes;
+} /* End Function compute_ascii */
+
+#endif /* CONFIG_PROC_FS */
+
+EXPORT_SYMBOL(mtrr_add);
+EXPORT_SYMBOL(mtrr_del);
+
+#if defined(__SMP__) && !defined(MODULE)
+
+static volatile unsigned long smp_changes_mask __initdata = 0;
+static struct mtrr_state smp_mtrr_state __initdata = {0, 0};
+
+__initfunc(void mtrr_init_boot_cpu (void))
+{
+ if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return;
+ printk("mtrr: v%s Richard Gooch (rgooch@atnf.csiro.au)\n", MTRR_VERSION);
+
+ get_mtrr_state (&smp_mtrr_state);
+} /* End Function mtrr_init_boot_cpu */
+
+__initfunc(void mtrr_init_secondary_cpu (void))
+{
+ unsigned long mask, count;
+ struct set_mtrr_context ctxt;
+
+ if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return;
+ /* Note that this is not ideal, since the cache is only flushed/disabled
+ for this CPU while the MTRRs are changed, but changing this requires
+ more invasive changes to the way the kernel boots */
+ set_mtrr_prepare (&ctxt);
+ mask = set_mtrr_state (&smp_mtrr_state, &ctxt);
+ set_mtrr_done (&ctxt);
+ /* Use the atomic bitops to update the global mask */
+ for (count = 0; count < sizeof mask * 8; ++count)
+ {
+ if (mask & 0x01) set_bit (count, &smp_changes_mask);
+ mask >>= 1;
+ }
+} /* End Function mtrr_init_secondary_cpu */
+
+#endif
+
+#ifdef MODULE
+int init_module (void)
+#else
+__initfunc(int mtrr_init(void))
+#endif
+{
+# if !defined(__SMP__) || defined(MODULE)
+ if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return 0;
+ printk("mtrr: v%s Richard Gooch (rgooch@atnf.csiro.au)\n", MTRR_VERSION);
+#endif
+
+# ifdef __SMP__
+# ifdef MODULE
+ copy_mtrr_state ();
+# else /* MODULE */
+ finalize_mtrr_state (&smp_mtrr_state);
+ mtrr_state_warn (smp_changes_mask);
+# endif /* MODULE */
+# endif /* __SMP__ */
+
+# ifdef CONFIG_PROC_FS
+ proc_register (&proc_root, &proc_root_mtrr);
+# endif
+
+ init_table ();
+ return 0;
+}
+
+#ifdef MODULE
+void cleanup_module (void)
+{
+ if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return;
+# ifdef CONFIG_PROC_FS
+ proc_unregister (&proc_root, PROC_MTRR);
+# endif
+# ifdef __SMP__
+ mtrr_hook = NULL;
+# endif
+}
+#endif
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 6ba4e0ff8..a06477b9d 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -375,12 +375,12 @@ void machine_restart(char * __unused)
registers don't have to be reloaded after switching to real mode:
the values are consistent for real mode operation already. */
- __asm__ __volatile__ ("movw $0x0010,%%ax\n"
- "\tmovw %%ax,%%ds\n"
- "\tmovw %%ax,%%es\n"
- "\tmovw %%ax,%%fs\n"
- "\tmovw %%ax,%%gs\n"
- "\tmovw %%ax,%%ss" : : : "eax");
+ __asm__ __volatile__ ("movl $0x0010,%%eax\n"
+ "\tmovl %%ax,%%ds\n"
+ "\tmovl %%ax,%%es\n"
+ "\tmovl %%ax,%%fs\n"
+ "\tmovl %%ax,%%gs\n"
+ "\tmovl %%ax,%%ss" : : : "eax");
/* Jump to the 16-bit code that we copied earlier. It disables paging
and the cache, switches to real mode, and jumps to the BIOS reset
@@ -418,43 +418,37 @@ void show_regs(struct pt_regs * regs)
0xffff & regs->xds,0xffff & regs->xes);
}
+void release_segments(struct mm_struct *mm)
+{
+ void * ldt;
+
+ /* forget local segments */
+ __asm__ __volatile__("movl %w0,%%fs ; movl %w0,%%gs ; lldt %w0"
+ : /* no outputs */
+ : "r" (0));
+ current->tss.ldt = 0;
+
+ ldt = mm->segments;
+ if (ldt) {
+ mm->segments = NULL;
+ vfree(ldt);
+ }
+}
+
/*
* Free current thread data structures etc..
*/
-
void exit_thread(void)
{
/* forget lazy i387 state */
if (last_task_used_math == current)
last_task_used_math = NULL;
- /* forget local segments */
- __asm__ __volatile__("mov %w0,%%fs ; mov %w0,%%gs ; lldt %w0"
- : /* no outputs */
- : "r" (0));
- current->tss.ldt = 0;
- if (current->ldt) {
- void * ldt = current->ldt;
- current->ldt = NULL;
- vfree(ldt);
- }
}
void flush_thread(void)
{
int i;
- if (current->ldt) {
- free_page((unsigned long) current->ldt);
- current->ldt = NULL;
- for (i=1 ; i<NR_TASKS ; i++) {
- if (task[i] == current) {
- set_ldt_desc(gdt+(i<<1)+
- FIRST_LDT_ENTRY,&default_ldt, 1);
- load_ldt(i);
- }
- }
- }
-
for (i=0 ; i<8 ; i++)
current->debugreg[i] = 0;
@@ -479,13 +473,30 @@ void release_thread(struct task_struct *dead_task)
{
}
+void copy_segments(int nr, struct task_struct *p, struct mm_struct *new_mm)
+{
+ int ldt_size = 1;
+ void * ldt = &default_ldt;
+ struct mm_struct * old_mm = current->mm;
+
+ p->tss.ldt = _LDT(nr);
+ if (old_mm->segments) {
+ new_mm->segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+ if (new_mm->segments) {
+ ldt = new_mm->segments;
+ ldt_size = LDT_ENTRIES;
+ memcpy(ldt, old_mm->segments, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ }
+ }
+ set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY, ldt, ldt_size);
+}
+
int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
struct task_struct * p, struct pt_regs * regs)
{
struct pt_regs * childregs;
p->tss.tr = _TSS(nr);
- p->tss.ldt = _LDT(nr);
p->tss.es = __KERNEL_DS;
p->tss.cs = __KERNEL_CS;
p->tss.ss = __KERNEL_DS;
@@ -508,16 +519,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
childregs->eax = 0;
childregs->esp = esp;
p->tss.back_link = 0;
- if (p->ldt) {
- p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
- if (p->ldt != NULL)
- memcpy(p->ldt, current->ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
- }
set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss));
- if (p->ldt)
- set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,p->ldt, 512);
- else
- set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&default_ldt, 1);
+
/*
* a bitmap offset pointing outside of the TSS limit causes a nicely
* controllable SIGSEGV. The first sys_ioperm() call sets up the
@@ -583,8 +586,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
dump->regs.eax = regs->eax;
dump->regs.ds = regs->xds;
dump->regs.es = regs->xes;
- __asm__("mov %%fs,%0":"=r" (dump->regs.fs));
- __asm__("mov %%gs,%0":"=r" (dump->regs.gs));
+ __asm__("movl %%fs,%0":"=r" (dump->regs.fs));
+ __asm__("movl %%gs,%0":"=r" (dump->regs.gs));
dump->regs.orig_eax = regs->orig_eax;
dump->regs.eip = regs->eip;
dump->regs.cs = regs->xcs;
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 12a777b5c..c17c13590 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -199,7 +199,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
&& (tmp & 0x4) != 0x4 /* not a LDT selector */ \
&& (tmp & 3) != 3) /* not a RPL3 GDT selector */ \
goto badframe; \
- __asm__ __volatile__("mov %w0,%%" #seg : : "r"(tmp)); }
+ __asm__ __volatile__("movl %w0,%%" #seg : : "r"(tmp)); }
GET_SEG(gs);
GET_SEG(fs);
@@ -337,9 +337,9 @@ setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate,
unsigned int tmp;
tmp = 0;
- __asm__("mov %%gs,%w0" : "=r"(tmp): "0"(tmp));
+ __asm__("movl %%gs,%w0" : "=r"(tmp): "0"(tmp));
__put_user(tmp, (unsigned int *)&sc->gs);
- __asm__("mov %%fs,%w0" : "=r"(tmp): "0"(tmp));
+ __asm__("movl %%fs,%w0" : "=r"(tmp): "0"(tmp));
__put_user(tmp, (unsigned int *)&sc->fs);
__put_user(regs->xes, (unsigned int *)&sc->es);
@@ -427,7 +427,7 @@ static void setup_frame(int sig, struct k_sigaction *ka,
regs->eip = (unsigned long) ka->sa.sa_handler;
{
unsigned long seg = __USER_DS;
- __asm__("mov %w0,%%fs ; mov %w0,%%gs": "=r"(seg) : "0"(seg));
+ __asm__("movl %w0,%%fs ; movl %w0,%%gs": "=r"(seg) : "0"(seg));
set_fs(USER_DS);
regs->xds = seg;
regs->xes = seg;
@@ -492,7 +492,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->eip = (unsigned long) ka->sa.sa_handler;
{
unsigned long seg = __USER_DS;
- __asm__("mov %w0,%%fs ; mov %w0,%%gs": "=r"(seg) : "0"(seg));
+ __asm__("movl %w0,%%fs ; movl %w0,%%gs": "=r"(seg) : "0"(seg));
set_fs(USER_DS);
regs->xds = seg;
regs->xes = seg;
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 9ca377128..0793410a6 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -28,6 +28,7 @@
* Alan Cox : Added EBDA scanning
*/
+#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/timer.h>
@@ -47,6 +48,10 @@
#include <asm/smp.h>
#include <asm/io.h>
+#ifdef CONFIG_MTRR
+# include <asm/mtrr.h>
+#endif
+
#define __KERNEL_SYSCALLS__
#include <linux/unistd.h>
@@ -128,9 +133,6 @@ unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */
static int smp_activated = 0; /* Tripped once we need to start cross invalidating */
int apic_version[NR_CPUS]; /* APIC version number */
static volatile int smp_commenced=0; /* Tripped when we start scheduling */
-unsigned long apic_addr = 0xFEE00000; /* Address of APIC (defaults to 0xFEE00000) */
-unsigned long nlong = 0; /* dummy used for apic_reg address + 0x20 */
-unsigned char *apic_reg=((unsigned char *)(&nlong))-0x20;/* Later set to the ioremap() of the APIC */
unsigned long apic_retval; /* Just debugging the assembler.. */
static volatile unsigned char smp_cpu_in_msg[NR_CPUS]; /* True if this processor is sending an IPI */
@@ -150,8 +152,10 @@ const char lk_lockmsg[] = "lock from interrupt context at %p\n";
int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
extern int mp_irq_entries;
extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
+extern int mpc_default_type;
int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, };
int mp_current_pci_id = 0;
+unsigned long mp_lapic_addr = 0;
/* #define SMP_DEBUG */
@@ -272,8 +276,8 @@ __initfunc(static int smp_read_mpc(struct mp_config_table *mpc))
printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
- /* set the local APIC address */
- apic_addr = (unsigned long)phys_to_virt((unsigned long)mpc->mpc_lapic);
+ /* save the local APIC address, it might be non-default */
+ mp_lapic_addr = mpc->mpc_lapic;
/*
* Now process the configuration blocks.
@@ -454,7 +458,7 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length))
*/
cfg=pg0[0];
- pg0[0] = (apic_addr | 7);
+ pg0[0] = (mp_lapic_addr | 7);
local_flush_tlb();
boot_cpu_id = GET_APIC_ID(*((volatile unsigned long *) APIC_ID));
@@ -477,6 +481,14 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length))
cpu_present_map=3;
num_processors=2;
printk("I/O APIC at 0xFEC00000.\n");
+
+ /*
+ * Save the default type number, we
+ * need it later to set the IO-APIC
+ * up properly:
+ */
+ mpc_default_type = mpf->mpf_feature1;
+
printk("Bus #0 is ");
}
switch(mpf->mpf_feature1)
@@ -525,11 +537,6 @@ __initfunc(int smp_scan_config(unsigned long base, unsigned long length))
if(mpf->mpf_physptr)
smp_read_mpc((void *)mpf->mpf_physptr);
- /*
- * Now that the boot CPU id is known,
- * set some other information about it.
- */
- nlong = boot_cpu_id<<24; /* Dummy 'self' for bootup */
__cpu_logical_map[0] = boot_cpu_id;
global_irq_holder = boot_cpu_id;
current->processor = boot_cpu_id;
@@ -667,6 +674,10 @@ extern int cpu_idle(void * unused);
*/
__initfunc(int start_secondary(void *unused))
{
+#ifdef CONFIG_MTRR
+ /* Must be done before calibration delay is computed */
+ mtrr_init_secondary_cpu ();
+#endif
smp_callin();
while (!smp_commenced)
barrier();
@@ -727,7 +738,7 @@ __initfunc(static void do_boot_cpu(int i))
/* start_eip had better be page-aligned! */
start_eip = setup_trampoline();
- printk("Booting processor %d eip %lx: ", i, start_eip); /* So we see what's up */
+ printk("Booting processor %d eip %lx\n", i, start_eip); /* So we see what's up */
stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
/*
@@ -906,6 +917,10 @@ __initfunc(void smp_boot_cpus(void))
int i;
unsigned long cfg;
+#ifdef CONFIG_MTRR
+ /* Must be done before other processors booted */
+ mtrr_init_boot_cpu ();
+#endif
/*
* Initialize the logical to physical cpu number mapping
* and the per-CPU profiling counter/multiplier
@@ -938,7 +953,7 @@ __initfunc(void smp_boot_cpus(void))
{
printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n");
io_apic_irqs = 0;
- return;
+ goto smp_done;
}
/*
@@ -951,15 +966,6 @@ __initfunc(void smp_boot_cpus(void))
printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
}
- /*
- * Map the local APIC into kernel space
- */
-
- apic_reg = ioremap(apic_addr,4096);
-
- if(apic_reg == NULL)
- panic("Unable to map local apic.");
-
#ifdef SMP_DEBUG
{
int reg;
@@ -1106,6 +1112,12 @@ __initfunc(void smp_boot_cpus(void))
* go and set it up:
*/
setup_IO_APIC();
+
+smp_done:
+#ifdef CONFIG_MTRR
+ /* Must be done after other processors booted */
+ mtrr_init ();
+#endif
}
@@ -1196,6 +1208,10 @@ void smp_message_pass(int target, int msg, unsigned long data, int wait)
irq = 0x40;
break;
+ case MSG_MTRR_CHANGE:
+ irq = 0x50;
+ break;
+
default:
printk("Unknown SMP message %d\n", msg);
return;
@@ -1494,10 +1510,18 @@ asmlinkage void smp_stop_cpu_interrupt(void)
for (;;) ;
}
+void (*mtrr_hook) (void) = NULL;
+
+asmlinkage void smp_mtrr_interrupt(void)
+{
+ ack_APIC_irq ();
+ if (mtrr_hook) (*mtrr_hook) ();
+}
+
/*
* This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
* per second. We assume that the caller has already set up the local
- * APIC at apic_addr.
+ * APIC.
*
* The APIC timer is not exactly sync with the external timer chip, it
* closely follows bus clocks.
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index fdcf951f3..754e9371c 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -68,19 +68,19 @@ out: \
#define get_seg_byte(seg,addr) ({ \
register unsigned char __res; \
-__asm__("push %%fs;mov %%ax,%%fs;movb %%fs:%2,%%al;pop %%fs" \
+__asm__("pushl %%fs;movl %%ax,%%fs;movb %%fs:%2,%%al;popl %%fs" \
:"=a" (__res):"0" (seg),"m" (*(addr))); \
__res;})
#define get_seg_long(seg,addr) ({ \
register unsigned long __res; \
-__asm__("push %%fs;mov %%ax,%%fs;movl %%fs:%2,%%eax;pop %%fs" \
+__asm__("pushl %%fs;movl %%ax,%%fs;movl %%fs:%2,%%eax;popl %%fs" \
:"=a" (__res):"0" (seg),"m" (*(addr))); \
__res;})
#define _fs() ({ \
register unsigned short __res; \
-__asm__("mov %%fs,%%ax":"=a" (__res):); \
+__asm__("movl %%fs,%%ax":"=a" (__res):); \
__res;})
void page_exception(void);
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index 5ae87b06a..db7da10fc 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -255,7 +255,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
mark_screen_rdonly(tsk);
unlock_kernel();
__asm__ __volatile__(
- "xorl %%eax,%%eax; mov %%ax,%%fs; mov %%ax,%%gs\n\t"
+ "xorl %%eax,%%eax; movl %%ax,%%fs; movl %%ax,%%gs\n\t"
"movl %0,%%esp\n\t"
"jmp ret_from_sys_call"
: /* no outputs */