summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1999-10-09 00:00:47 +0000
committerRalf Baechle <ralf@linux-mips.org>1999-10-09 00:00:47 +0000
commitd6434e1042f3b0a6dfe1b1f615af369486f9b1fa (patch)
treee2be02f33984c48ec019c654051d27964e42c441 /arch/i386/kernel
parent609d1e803baf519487233b765eb487f9ec227a18 (diff)
Merge with 2.3.19.
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile12
-rw-r--r--arch/i386/kernel/apm.c345
-rw-r--r--arch/i386/kernel/bios32.c517
-rw-r--r--arch/i386/kernel/entry.S27
-rw-r--r--arch/i386/kernel/head.S10
-rw-r--r--arch/i386/kernel/i386_ksyms.c3
-rw-r--r--arch/i386/kernel/i8259.c409
-rw-r--r--arch/i386/kernel/init_task.c13
-rw-r--r--arch/i386/kernel/io_apic.c439
-rw-r--r--arch/i386/kernel/ioport.c25
-rw-r--r--arch/i386/kernel/irq.c640
-rw-r--r--arch/i386/kernel/irq.h255
-rw-r--r--arch/i386/kernel/ldt.c69
-rw-r--r--arch/i386/kernel/mca.c25
-rw-r--r--arch/i386/kernel/mtrr.c157
-rw-r--r--arch/i386/kernel/process.c311
-rw-r--r--arch/i386/kernel/ptrace.c30
-rw-r--r--arch/i386/kernel/semaphore.c220
-rw-r--r--arch/i386/kernel/setup.c594
-rw-r--r--arch/i386/kernel/signal.c22
-rw-r--r--arch/i386/kernel/smp.c236
-rw-r--r--arch/i386/kernel/time.c10
-rw-r--r--arch/i386/kernel/traps.c92
-rw-r--r--arch/i386/kernel/visws_apic.c3
-rw-r--r--arch/i386/kernel/vm86.c54
25 files changed, 2538 insertions, 1980 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 0c3f24889..875f52d5a 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -13,8 +13,8 @@
all: kernel.o head.o init_task.o
O_TARGET := kernel.o
-O_OBJS := process.o signal.o entry.o traps.o irq.o vm86.o \
- ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o
+O_OBJS := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
+ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o
OX_OBJS := i386_ksyms.o
MX_OBJS :=
@@ -34,8 +34,12 @@ else
endif
endif
-ifdef CONFIG_APM
+ifeq ($(CONFIG_APM),y)
OX_OBJS += apm.o
+else
+ ifeq ($(CONFIG_APM),m)
+ MX_OBJS += apm.o
+ endif
endif
ifdef CONFIG_SMP
@@ -50,7 +54,7 @@ ifdef CONFIG_X86_VISWS_APIC
O_OBJS += visws_apic.o
endif
-head.o: head.S $(TOPDIR)/include/linux/tasks.h
+head.o: head.S
$(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $*.S -o $*.o
include $(TOPDIR)/Rules.make
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 7931e8df8..3bafdfcfc 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -273,7 +273,6 @@ static void standby(void);
static void set_time(void);
static void check_events(void);
-static void do_apm_timer(unsigned long);
static int do_open(struct inode *, struct file *);
static int do_release(struct inode *, struct file *);
@@ -289,7 +288,7 @@ extern void apm_unregister_callback(int (*)(apm_event_t));
/*
* Local variables
*/
-static asmlinkage struct {
+static struct {
unsigned long offset;
unsigned short segment;
} apm_bios_entry;
@@ -314,11 +313,9 @@ static int got_clock_diff = 0;
static int debug = 0;
static int apm_disabled = 0;
-static DECLARE_WAIT_QUEUE_HEAD(process_list);
+static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static struct apm_bios_struct * user_list = NULL;
-static struct timer_list apm_timer;
-
static char driver_version[] = "1.9"; /* no spaces */
#ifdef APM_DEBUG
@@ -543,6 +540,50 @@ static int apm_set_power_state(u_short state)
return set_power_state(0x0001, state);
}
+/*
+ * If no process has been interested in this
+ * CPU for some time, we want to wake up the
+ * power management thread - we probably want
+ * to conserve power.
+ */
+#define HARD_IDLE_TIMEOUT (HZ/3)
+
+/* This should wake up kapmd and ask it to slow the CPU */
+#define powermanagement_idle() do { } while (0)
+
+extern int hlt_counter;
+
+/*
+ * This is the idle thing.
+ */
+void apm_cpu_idle(void)
+{
+ unsigned int start_idle;
+
+ start_idle = jiffies;
+ while (1) {
+ if (!current->need_resched) {
+ if (jiffies - start_idle < HARD_IDLE_TIMEOUT) {
+ if (!current_cpu_data.hlt_works_ok)
+ continue;
+ if (hlt_counter)
+ continue;
+ asm volatile("sti ; hlt" : : : "memory");
+ continue;
+ }
+
+ /*
+ * Ok, do some power management - we've been idle for too long
+ */
+ powermanagement_idle();
+ }
+
+ schedule();
+ check_pgt_cache();
+ start_idle = jiffies;
+ }
+}
+
void apm_power_off(void)
{
/*
@@ -756,7 +797,7 @@ static int queue_event(apm_event_t event, struct apm_bios_struct *sender)
break;
}
}
- wake_up_interruptible(&process_list);
+ wake_up_interruptible(&apm_waitqueue);
return 1;
}
@@ -942,15 +983,14 @@ static void check_events(void)
}
}
-static void do_apm_timer(unsigned long unused)
+static void apm_event_handler(void)
{
- int err;
-
- static int pending_count = 0;
+ static int pending_count = 0;
if (((standbys_pending > 0) || (suspends_pending > 0))
&& (apm_bios_info.version > 0x100)
&& (pending_count-- <= 0)) {
+ int err;
pending_count = 4;
err = apm_set_power_state(APM_STATE_BUSY);
@@ -961,14 +1001,9 @@ static void do_apm_timer(unsigned long unused)
if (!(((standbys_pending > 0) || (suspends_pending > 0))
&& (apm_bios_info.version == 0x100)))
check_events();
-
- init_timer(&apm_timer);
- apm_timer.expires = APM_CHECK_TIMEOUT + jiffies;
- add_timer(&apm_timer);
}
-/* Called from sys_idle, must make sure apm_enabled. */
-int apm_do_idle(void)
+static int apm_do_idle(void)
{
#ifdef CONFIG_APM_CPU_IDLE
u32 dummy;
@@ -979,30 +1014,74 @@ int apm_do_idle(void)
if (apm_bios_call_simple(0x5305, 0, 0, &dummy))
return 0;
+#ifdef ALWAYS_CALL_BUSY
+ clock_slowed = 1;
+#else
clock_slowed = (apm_bios_info.flags & APM_IDLE_SLOWS_CLOCK) != 0;
+#endif
return 1;
#else
return 0;
#endif
}
-/* Called from sys_idle, must make sure apm_enabled. */
-void apm_do_busy(void)
+static void apm_do_busy(void)
{
#ifdef CONFIG_APM_CPU_IDLE
u32 dummy;
- if (apm_enabled
-#ifndef ALWAYS_CALL_BUSY
- && clock_slowed
-#endif
- ) {
+ if (clock_slowed) {
(void) apm_bios_call_simple(0x5306, 0, 0, &dummy);
clock_slowed = 0;
}
#endif
}
+/*
+ * This is the APM thread main loop.
+ *
+ * Check whether we're the only running process to
+ * decide if we should just power down.
+ *
+ * Do this by checking the runqueue: if we're the
+ * only one, then the current process run_list will
+ * have both prev and next pointing to the same
+ * entry (the true idle process)
+ */
+#define system_idle() (current->run_list.next == current->run_list.prev)
+
+static void apm_mainloop(void)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ apm_enabled = 1;
+
+ add_wait_queue(&apm_waitqueue, &wait);
+ current->state = TASK_INTERRUPTIBLE;
+ for (;;) {
+ /* Nothing to do, just sleep for the timeout */
+ schedule_timeout(APM_CHECK_TIMEOUT);
+
+ /*
+ * Ok, check all events, check for idle (and mark us sleeping
+ * so as not to count towards the load average)..
+ */
+ current->state = TASK_INTERRUPTIBLE;
+ apm_event_handler();
+ if (!system_idle())
+ continue;
+ if (apm_do_idle()) {
+ unsigned long start = jiffies;
+ do {
+ apm_do_idle();
+ if (jiffies - start > APM_CHECK_TIMEOUT)
+ break;
+ } while (system_idle());
+ apm_do_busy();
+ apm_event_handler();
+ }
+ }
+}
+
static int check_apm_bios_struct(struct apm_bios_struct *as, const char *func)
{
if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) {
@@ -1027,15 +1106,15 @@ static ssize_t do_read(struct file *fp, char *buf, size_t count, loff_t *ppos)
if (queue_empty(as)) {
if (fp->f_flags & O_NONBLOCK)
return -EAGAIN;
- add_wait_queue(&process_list, &wait);
+ add_wait_queue(&apm_waitqueue, &wait);
repeat:
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
if (queue_empty(as) && !signal_pending(current)) {
schedule();
goto repeat;
}
current->state = TASK_RUNNING;
- remove_wait_queue(&process_list, &wait);
+ remove_wait_queue(&apm_waitqueue, &wait);
}
i = count;
while ((i >= sizeof(event)) && !queue_empty(as)) {
@@ -1069,7 +1148,7 @@ static unsigned int do_poll(struct file *fp, poll_table * wait)
as = fp->private_data;
if (check_apm_bios_struct(as, "select"))
return 0;
- poll_wait(fp, &process_list, wait);
+ poll_wait(fp, &apm_waitqueue, wait);
if (!queue_empty(as))
return POLLIN | POLLRDNORM;
return 0;
@@ -1263,7 +1342,97 @@ int apm_get_info(char *buf, char **start, off_t fpos, int length, int dummy)
return p - buf;
}
-void __init apm_setup(char *str, int *dummy)
+static int apm(void *unused)
+{
+ unsigned short bx;
+ unsigned short cx;
+ unsigned short dx;
+ unsigned short error;
+ char * power_stat;
+ char * bat_stat;
+
+ strcpy(current->comm, "kapmd");
+ sigfillset(&current->blocked);
+
+ if (apm_bios_info.version > 0x100) {
+ /*
+ * We only support BIOSs up to version 1.2
+ */
+ if (apm_bios_info.version > 0x0102)
+ apm_bios_info.version = 0x0102;
+ if (apm_driver_version(&apm_bios_info.version) != APM_SUCCESS) {
+ /* Fall back to an APM 1.0 connection. */
+ apm_bios_info.version = 0x100;
+ }
+ }
+ if (debug) {
+ printk(KERN_INFO "apm: Connection version %d.%d\n",
+ (apm_bios_info.version >> 8) & 0xff,
+ apm_bios_info.version & 0xff );
+
+ error = apm_get_power_status(&bx, &cx, &dx);
+ if (error)
+ printk(KERN_INFO "apm: power status not available\n");
+ else {
+ switch ((bx >> 8) & 0xff) {
+ case 0: power_stat = "off line"; break;
+ case 1: power_stat = "on line"; break;
+ case 2: power_stat = "on backup power"; break;
+ default: power_stat = "unknown"; break;
+ }
+ switch (bx & 0xff) {
+ case 0: bat_stat = "high"; break;
+ case 1: bat_stat = "low"; break;
+ case 2: bat_stat = "critical"; break;
+ case 3: bat_stat = "charging"; break;
+ default: bat_stat = "unknown"; break;
+ }
+ printk(KERN_INFO
+ "apm: AC %s, battery status %s, battery life ",
+ power_stat, bat_stat);
+ if ((cx & 0xff) == 0xff)
+ printk("unknown\n");
+ else
+ printk("%d%%\n", cx & 0xff);
+ if (apm_bios_info.version > 0x100) {
+ printk(KERN_INFO
+ "apm: battery flag 0x%02x, battery life ",
+ (cx >> 8) & 0xff);
+ if (dx == 0xffff)
+ printk("unknown\n");
+ else
+ printk("%d %s\n", dx & 0x7fff,
+ (dx & 0x8000) ?
+ "minutes" : "seconds");
+ }
+ }
+ }
+
+#ifdef CONFIG_APM_DO_ENABLE
+ if (apm_bios_info.flags & APM_BIOS_DISABLED) {
+ /*
+ * This call causes my NEC UltraLite Versa 33/C to hang if it
+ * is booted with PM disabled but not in the docking station.
+ * Unfortunate ...
+ */
+ error = apm_enable_power_management();
+ if (error) {
+ apm_error("enable power management", error);
+ return -1;
+ }
+ }
+#endif
+ if (((apm_bios_info.flags & APM_BIOS_DISENGAGED) == 0)
+ && (apm_bios_info.version > 0x0100)) {
+ if (apm_engage_power_management(0x0001) == APM_SUCCESS)
+ apm_bios_info.flags &= ~APM_BIOS_DISENGAGED;
+ }
+
+ apm_mainloop();
+ return 0;
+}
+
+static int __init apm_setup(char *str)
{
int invert;
@@ -1283,16 +1452,23 @@ void __init apm_setup(char *str, int *dummy)
if (str != NULL)
str += strspn(str, ", \t");
}
+ return 1;
}
-void __init apm_bios_init(void)
+__setup("apm=", apm_setup);
+
+/*
+ * Just start the APM thread. We do NOT want to do APM BIOS
+ * calls from anything but the APM thread, if for no other reason
+ * than the fact that we don't trust the APM BIOS. This way,
+ * most common APM BIOS problems that lead to protection errors
+ * etc will have at least some level of being contained...
+ *
+ * In short, if something bad happens, at least we have a choice
+ * of just killing the apm thread..
+ */
+static int __init apm_init(void)
{
- unsigned short bx;
- unsigned short cx;
- unsigned short dx;
- unsigned short error;
- char * power_stat;
- char * bat_stat;
static struct proc_dir_entry *ent;
if (apm_bios_info.version == 0) {
@@ -1339,6 +1515,15 @@ void __init apm_bios_init(void)
return;
}
+#ifdef CONFIG_SMP
+ if (smp_num_cpus > 1) {
+ printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
+ if (smp_hack)
+ smp_hack = 2;
+ return -1;
+ }
+#endif
+
/*
* Set up a segment that references the real mode segment 0x40
* that extends up to the end of page zero (that we have reserved).
@@ -1378,92 +1563,6 @@ void __init apm_bios_init(void)
(apm_bios_info.dseg_len - 1) & 0xffff);
}
#endif
-#ifdef CONFIG_SMP
- if (smp_num_cpus > 1) {
- printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
- if (smp_hack)
- smp_hack = 2;
- return;
- }
-#endif
- if (apm_bios_info.version > 0x100) {
- /*
- * We only support BIOSs up to version 1.2
- */
- if (apm_bios_info.version > 0x0102)
- apm_bios_info.version = 0x0102;
- if (apm_driver_version(&apm_bios_info.version) != APM_SUCCESS) {
- /* Fall back to an APM 1.0 connection. */
- apm_bios_info.version = 0x100;
- }
- }
- if (debug) {
- printk(KERN_INFO "apm: Connection version %d.%d\n",
- (apm_bios_info.version >> 8) & 0xff,
- apm_bios_info.version & 0xff );
-
- error = apm_get_power_status(&bx, &cx, &dx);
- if (error)
- printk(KERN_INFO "apm: power status not available\n");
- else {
- switch ((bx >> 8) & 0xff) {
- case 0: power_stat = "off line"; break;
- case 1: power_stat = "on line"; break;
- case 2: power_stat = "on backup power"; break;
- default: power_stat = "unknown"; break;
- }
- switch (bx & 0xff) {
- case 0: bat_stat = "high"; break;
- case 1: bat_stat = "low"; break;
- case 2: bat_stat = "critical"; break;
- case 3: bat_stat = "charging"; break;
- default: bat_stat = "unknown"; break;
- }
- printk(KERN_INFO
- "apm: AC %s, battery status %s, battery life ",
- power_stat, bat_stat);
- if ((cx & 0xff) == 0xff)
- printk("unknown\n");
- else
- printk("%d%%\n", cx & 0xff);
- if (apm_bios_info.version > 0x100) {
- printk(KERN_INFO
- "apm: battery flag 0x%02x, battery life ",
- (cx >> 8) & 0xff);
- if (dx == 0xffff)
- printk("unknown\n");
- else
- printk("%d %s\n", dx & 0x7fff,
- (dx & 0x8000) ?
- "minutes" : "seconds");
- }
- }
- }
-
-#ifdef CONFIG_APM_DO_ENABLE
- if (apm_bios_info.flags & APM_BIOS_DISABLED) {
- /*
- * This call causes my NEC UltraLite Versa 33/C to hang if it
- * is booted with PM disabled but not in the docking station.
- * Unfortunate ...
- */
- error = apm_enable_power_management();
- if (error) {
- apm_error("enable power management", error);
- return;
- }
- }
-#endif
- if (((apm_bios_info.flags & APM_BIOS_DISABLED) == 0)
- && (apm_bios_info.version > 0x0100)) {
- if (apm_engage_power_management(0x0001) == APM_SUCCESS)
- apm_bios_info.flags &= ~APM_BIOS_DISENGAGED;
- }
-
- init_timer(&apm_timer);
- apm_timer.function = do_apm_timer;
- apm_timer.expires = APM_CHECK_TIMEOUT + jiffies;
- add_timer(&apm_timer);
ent = create_proc_entry("apm", 0, 0);
if (ent != NULL)
@@ -1471,5 +1570,7 @@ void __init apm_bios_init(void)
misc_register(&apm_device);
- apm_enabled = 1;
+ kernel_thread(apm, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND | SIGCHLD);
}
+
+module_init(apm_init)
diff --git a/arch/i386/kernel/bios32.c b/arch/i386/kernel/bios32.c
index 91d338b2c..f0c63c938 100644
--- a/arch/i386/kernel/bios32.c
+++ b/arch/i386/kernel/bios32.c
@@ -75,6 +75,8 @@
* Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj]
*
* Feb 8, 1999 : Added UM8886BF I/O address fixup. [mj]
+ *
+ * August 1999 : New resource management and configuration access stuff. [mj]
*/
#include <linux/config.h>
@@ -85,15 +87,14 @@
#include <linux/ioport.h>
#include <linux/malloc.h>
#include <linux/smp_lock.h>
+#include <linux/irq.h>
+#include <linux/spinlock.h>
#include <asm/page.h>
#include <asm/segment.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/smp.h>
-#include <asm/spinlock.h>
-
-#include "irq.h"
#undef DEBUG
@@ -103,72 +104,6 @@
#define DBG(x...)
#endif
-/*
- * This interrupt-safe spinlock protects all accesses to PCI
- * configuration space.
- */
-
-spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
-
-/*
- * Generic PCI access -- indirect calls according to detected HW.
- */
-
-struct pci_access {
- int pci_present;
- int (*read_config_byte)(unsigned char, unsigned char, unsigned char, unsigned char *);
- int (*read_config_word)(unsigned char, unsigned char, unsigned char, unsigned short *);
- int (*read_config_dword)(unsigned char, unsigned char, unsigned char, unsigned int *);
- int (*write_config_byte)(unsigned char, unsigned char, unsigned char, unsigned char);
- int (*write_config_word)(unsigned char, unsigned char, unsigned char, unsigned short);
- int (*write_config_dword)(unsigned char, unsigned char, unsigned char, unsigned int);
-};
-
-static int pci_stub(void)
-{
- return PCIBIOS_FUNC_NOT_SUPPORTED;
-}
-
-static struct pci_access pci_access_none = {
- 0, /* No PCI present */
- (void *) pci_stub,
- (void *) pci_stub,
- (void *) pci_stub,
- (void *) pci_stub,
- (void *) pci_stub,
- (void *) pci_stub
-};
-
-static struct pci_access *access_pci = &pci_access_none;
-
-int pcibios_present(void)
-{
- return access_pci->pci_present;
-}
-
-#define PCI_byte_BAD 0
-#define PCI_word_BAD (pos & 1)
-#define PCI_dword_BAD (pos & 3)
-
-#define PCI_STUB(rw,size,type) \
-int pcibios_##rw##_config_##size (u8 bus, u8 dfn, u8 pos, type value) \
-{ \
- int res; \
- unsigned long flags; \
- if (PCI_##size##_BAD) return PCIBIOS_BAD_REGISTER_NUMBER; \
- spin_lock_irqsave(&pci_lock, flags); \
- res = access_pci->rw##_config_##size(bus, dfn, pos, value); \
- spin_unlock_irqrestore(&pci_lock, flags); \
- return res; \
-}
-
-PCI_STUB(read, byte, u8 *)
-PCI_STUB(read, word, u16 *)
-PCI_STUB(read, dword, u32 *)
-PCI_STUB(write, byte, u8)
-PCI_STUB(write, word, u16)
-PCI_STUB(write, dword, u32)
-
#define PCI_PROBE_BIOS 1
#define PCI_PROBE_CONF1 2
#define PCI_PROBE_CONF2 4
@@ -176,6 +111,7 @@ PCI_STUB(write, dword, u32)
#define PCI_BIOS_SORT 0x200
#define PCI_NO_CHECKS 0x400
#define PCI_NO_PEER_FIXUP 0x800
+#define PCI_ASSIGN_ROMS 0x1000
static unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
@@ -189,60 +125,53 @@ static unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CON
* Functions for accessing PCI configuration space with type 1 accesses
*/
-#define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (bus << 16) | (device_fn << 8) | (where & ~3))
+#define CONFIG_CMD(dev, where) (0x80000000 | (dev->bus->number << 16) | (dev->devfn << 8) | (where & ~3))
-static int pci_conf1_read_config_byte(unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned char *value)
+static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value)
{
- outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
+ outl(CONFIG_CMD(dev,where), 0xCF8);
*value = inb(0xCFC + (where&3));
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf1_read_config_word (unsigned char bus,
- unsigned char device_fn, unsigned char where, unsigned short *value)
+static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value)
{
- outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
+ outl(CONFIG_CMD(dev,where), 0xCF8);
*value = inw(0xCFC + (where&2));
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf1_read_config_dword (unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned int *value)
+static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value)
{
- outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
+ outl(CONFIG_CMD(dev,where), 0xCF8);
*value = inl(0xCFC);
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf1_write_config_byte (unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned char value)
+static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value)
{
- outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
+ outl(CONFIG_CMD(dev,where), 0xCF8);
outb(value, 0xCFC + (where&3));
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf1_write_config_word (unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned short value)
+static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value)
{
- outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
+ outl(CONFIG_CMD(dev,where), 0xCF8);
outw(value, 0xCFC + (where&2));
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf1_write_config_dword (unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned int value)
+static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value)
{
- outl(CONFIG_CMD(bus,device_fn,where), 0xCF8);
+ outl(CONFIG_CMD(dev,where), 0xCF8);
outl(value, 0xCFC);
return PCIBIOS_SUCCESSFUL;
}
#undef CONFIG_CMD
-static struct pci_access pci_direct_conf1 = {
- 1,
+static struct pci_ops pci_direct_conf1 = {
pci_conf1_read_config_byte,
pci_conf1_read_config_word,
pci_conf1_read_config_dword,
@@ -255,86 +184,65 @@ static struct pci_access pci_direct_conf1 = {
* Functions for accessing PCI configuration space with type 2 accesses
*/
-#define IOADDR(devfn, where) ((0xC000 | ((devfn & 0x78) << 5)) + where)
-#define FUNC(devfn) (((devfn & 7) << 1) | 0xf0)
+#define IOADDR(devfn, where) ((0xC000 | ((devfn & 0x78) << 5)) + where)
+#define FUNC(devfn) (((devfn & 7) << 1) | 0xf0)
+#define SET(dev) if (dev->devfn) return PCIBIOS_DEVICE_NOT_FOUND; \
+ outb(FUNC(dev->devfn), 0xCF8); \
+ outb(dev->bus->number, 0xCFA);
-static int pci_conf2_read_config_byte(unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned char *value)
+static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value)
{
- if (device_fn & 0x80)
- return PCIBIOS_DEVICE_NOT_FOUND;
- outb (FUNC(device_fn), 0xCF8);
- outb (bus, 0xCFA);
- *value = inb(IOADDR(device_fn,where));
+ SET(dev);
+ *value = inb(IOADDR(dev->devfn,where));
outb (0, 0xCF8);
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf2_read_config_word (unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned short *value)
+static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value)
{
- if (device_fn & 0x80)
- return PCIBIOS_DEVICE_NOT_FOUND;
- outb (FUNC(device_fn), 0xCF8);
- outb (bus, 0xCFA);
- *value = inw(IOADDR(device_fn,where));
+ SET(dev);
+ *value = inw(IOADDR(dev->devfn,where));
outb (0, 0xCF8);
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf2_read_config_dword (unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned int *value)
+static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value)
{
- if (device_fn & 0x80)
- return PCIBIOS_DEVICE_NOT_FOUND;
- outb (FUNC(device_fn), 0xCF8);
- outb (bus, 0xCFA);
- *value = inl (IOADDR(device_fn,where));
+ SET(dev);
+ *value = inl (IOADDR(dev->devfn,where));
outb (0, 0xCF8);
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf2_write_config_byte (unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned char value)
+static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value)
{
- if (device_fn & 0x80)
- return PCIBIOS_DEVICE_NOT_FOUND;
- outb (FUNC(device_fn), 0xCF8);
- outb (bus, 0xCFA);
- outb (value, IOADDR(device_fn,where));
+ SET(dev);
+ outb (value, IOADDR(dev->devfn,where));
outb (0, 0xCF8);
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf2_write_config_word (unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned short value)
+static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value)
{
- if (device_fn & 0x80)
- return PCIBIOS_DEVICE_NOT_FOUND;
- outb (FUNC(device_fn), 0xCF8);
- outb (bus, 0xCFA);
- outw (value, IOADDR(device_fn,where));
+ SET(dev);
+ outw (value, IOADDR(dev->devfn,where));
outb (0, 0xCF8);
return PCIBIOS_SUCCESSFUL;
}
-static int pci_conf2_write_config_dword (unsigned char bus, unsigned char device_fn,
- unsigned char where, unsigned int value)
+static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value)
{
- if (device_fn & 0x80)
- return PCIBIOS_DEVICE_NOT_FOUND;
- outb (FUNC(device_fn), 0xCF8);
- outb (bus, 0xCFA);
- outl (value, IOADDR(device_fn,where));
+ SET(dev);
+ outl (value, IOADDR(dev->devfn,where));
outb (0, 0xCF8);
return PCIBIOS_SUCCESSFUL;
}
+#undef SET
#undef IOADDR
#undef FUNC
-static struct pci_access pci_direct_conf2 = {
- 1,
+static struct pci_ops pci_direct_conf2 = {
pci_conf2_read_config_byte,
pci_conf2_read_config_word,
pci_conf2_read_config_dword,
@@ -353,9 +261,11 @@ static struct pci_access pci_direct_conf2 = {
* This should be close to trivial, but it isn't, because there are buggy
* chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
*/
-__initfunc(int pci_sanity_check(struct pci_access *a))
+static int __init pci_sanity_check(struct pci_ops *o)
{
- u16 dfn, x;
+ u16 x;
+ struct pci_bus bus; /* Fake bus and device */
+ struct pci_dev dev;
#ifdef CONFIG_VISWS
return 1; /* Lithium PCI Bridges are non-standard */
@@ -363,17 +273,19 @@ __initfunc(int pci_sanity_check(struct pci_access *a))
if (pci_probe & PCI_NO_CHECKS)
return 1;
- for(dfn=0; dfn < 0x100; dfn++)
- if ((!a->read_config_word(0, dfn, PCI_CLASS_DEVICE, &x) &&
+ bus.number = 0;
+ dev.bus = &bus;
+ for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++)
+ if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) &&
(x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) ||
- (!a->read_config_word(0, dfn, PCI_VENDOR_ID, &x) &&
+ (!o->read_word(&dev, PCI_VENDOR_ID, &x) &&
(x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)))
return 1;
DBG("PCI: Sanity check failed\n");
return 0;
}
-__initfunc(static struct pci_access *pci_check_direct(void))
+static struct pci_ops * __init pci_check_direct(void)
{
unsigned int tmp;
unsigned long flags;
@@ -497,7 +409,7 @@ static unsigned long bios32_service(unsigned long service)
unsigned long entry; /* %edx */
unsigned long flags;
- spin_lock_irqsave(&pci_lock, flags);
+ __save_flags(flags); __cli();
__asm__("lcall (%%edi)"
: "=a" (return_code),
"=b" (address),
@@ -506,7 +418,7 @@ static unsigned long bios32_service(unsigned long service)
: "0" (service),
"1" (0),
"D" (&bios32_indirect));
- spin_unlock_irqrestore(&pci_lock, flags);
+ __restore_flags(flags);
switch (return_code) {
case 0:
@@ -528,7 +440,7 @@ static struct {
static int pci_bios_present;
-__initfunc(static int check_pcibios(void))
+static int __init check_pcibios(void)
{
u32 signature, eax, ebx, ecx;
u8 status, major_ver, minor_ver, hw_mech, last_bus;
@@ -602,8 +514,8 @@ static int pci_bios_find_class (unsigned int class_code, unsigned short index,
#endif
-__initfunc(static int pci_bios_find_device (unsigned short vendor, unsigned short device_id,
- unsigned short index, unsigned char *bus, unsigned char *device_fn))
+static int __init pci_bios_find_device (unsigned short vendor, unsigned short device_id,
+ unsigned short index, unsigned char *bus, unsigned char *device_fn)
{
unsigned short bx;
unsigned short ret;
@@ -624,11 +536,10 @@ __initfunc(static int pci_bios_find_device (unsigned short vendor, unsigned shor
return (int) (ret & 0xff00) >> 8;
}
-static int pci_bios_read_config_byte(unsigned char bus,
- unsigned char device_fn, unsigned char where, unsigned char *value)
+static int pci_bios_read_config_byte(struct pci_dev *dev, int where, u8 *value)
{
unsigned long ret;
- unsigned long bx = (bus << 8) | device_fn;
+ unsigned long bx = (dev->bus->number << 8) | dev->devfn;
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
@@ -643,11 +554,10 @@ static int pci_bios_read_config_byte(unsigned char bus,
return (int) (ret & 0xff00) >> 8;
}
-static int pci_bios_read_config_word (unsigned char bus,
- unsigned char device_fn, unsigned char where, unsigned short *value)
+static int pci_bios_read_config_word(struct pci_dev *dev, int where, u16 *value)
{
unsigned long ret;
- unsigned long bx = (bus << 8) | device_fn;
+ unsigned long bx = (dev->bus->number << 8) | dev->devfn;
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
@@ -662,11 +572,10 @@ static int pci_bios_read_config_word (unsigned char bus,
return (int) (ret & 0xff00) >> 8;
}
-static int pci_bios_read_config_dword (unsigned char bus,
- unsigned char device_fn, unsigned char where, unsigned int *value)
+static int pci_bios_read_config_dword(struct pci_dev *dev, int where, u32 *value)
{
unsigned long ret;
- unsigned long bx = (bus << 8) | device_fn;
+ unsigned long bx = (dev->bus->number << 8) | dev->devfn;
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
@@ -681,11 +590,10 @@ static int pci_bios_read_config_dword (unsigned char bus,
return (int) (ret & 0xff00) >> 8;
}
-static int pci_bios_write_config_byte (unsigned char bus,
- unsigned char device_fn, unsigned char where, unsigned char value)
+static int pci_bios_write_config_byte(struct pci_dev *dev, int where, u8 value)
{
unsigned long ret;
- unsigned long bx = (bus << 8) | device_fn;
+ unsigned long bx = (dev->bus->number << 8) | dev->devfn;
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
@@ -700,11 +608,10 @@ static int pci_bios_write_config_byte (unsigned char bus,
return (int) (ret & 0xff00) >> 8;
}
-static int pci_bios_write_config_word (unsigned char bus,
- unsigned char device_fn, unsigned char where, unsigned short value)
+static int pci_bios_write_config_word(struct pci_dev *dev, int where, u16 value)
{
unsigned long ret;
- unsigned long bx = (bus << 8) | device_fn;
+ unsigned long bx = (dev->bus->number << 8) | dev->devfn;
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
@@ -719,11 +626,10 @@ static int pci_bios_write_config_word (unsigned char bus,
return (int) (ret & 0xff00) >> 8;
}
-static int pci_bios_write_config_dword (unsigned char bus,
- unsigned char device_fn, unsigned char where, unsigned int value)
+static int pci_bios_write_config_dword(struct pci_dev *dev, int where, u32 value)
{
unsigned long ret;
- unsigned long bx = (bus << 8) | device_fn;
+ unsigned long bx = (dev->bus->number << 8) | dev->devfn;
__asm__("lcall (%%esi)\n\t"
"jc 1f\n\t"
@@ -742,8 +648,7 @@ static int pci_bios_write_config_dword (unsigned char bus,
* Function table for BIOS32 access
*/
-static struct pci_access pci_bios_access = {
- 1,
+static struct pci_ops pci_bios_access = {
pci_bios_read_config_byte,
pci_bios_read_config_word,
pci_bios_read_config_dword,
@@ -756,7 +661,7 @@ static struct pci_access pci_bios_access = {
* Try to find PCI BIOS.
*/
-__initfunc(static struct pci_access *pci_find_bios(void))
+static struct pci_ops * __init pci_find_bios(void)
{
union bios32 *check;
unsigned char sum;
@@ -855,26 +760,15 @@ static void __init pcibios_sort(void)
#endif
/*
- * Several BIOS'es forget to assign addresses to I/O ranges.
- * We try to fix it here, expecting there are free addresses
- * starting with 0x5800. Ugly, but until we come with better
- * resource management, it's the only simple solution.
+ * Several BIOS'es forget to assign addresses to I/O ranges. Try to fix it.
*/
-static int pci_last_io_addr __initdata = 0x5800;
-
static void __init pcibios_fixup_io_addr(struct pci_dev *dev, int idx)
{
- unsigned short cmd;
unsigned int reg = PCI_BASE_ADDRESS_0 + 4*idx;
- unsigned int size, addr, try;
- unsigned int bus = dev->bus->number;
- unsigned int devfn = dev->devfn;
+ struct resource *r = &dev->resource[idx];
+ unsigned int size = r->end - r->start + 1;
- if (!pci_last_io_addr) {
- printk("PCI: Unassigned I/O space for %02x:%02x\n", bus, devfn);
- return;
- }
if (((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && idx < 4) ||
(dev->class >> 8) == PCI_CLASS_DISPLAY_VGA) {
/*
@@ -888,33 +782,54 @@ static void __init pcibios_fixup_io_addr(struct pci_dev *dev, int idx)
*/
return;
}
- pcibios_read_config_word(bus, devfn, PCI_COMMAND, &cmd);
- pcibios_write_config_word(bus, devfn, PCI_COMMAND, cmd & ~PCI_COMMAND_IO);
- pcibios_write_config_dword(bus, devfn, reg, ~0);
- pcibios_read_config_dword(bus, devfn, reg, &size);
- size = (~(size & PCI_BASE_ADDRESS_IO_MASK) & 0xffff) + 1;
- addr = 0;
- if (!size || size > 0x100)
- printk("PCI: Unable to handle I/O allocation for %02x:%02x (%04x), tell <mj@ucw.cz>\n", bus, devfn, size);
- else {
- do {
- addr = (pci_last_io_addr + size - 1) & ~(size-1);
- pci_last_io_addr = addr + size;
- } while (check_region(addr, size));
- printk("PCI: Assigning I/O space %04x-%04x to device %02x:%02x\n", addr, addr+size-1, bus, devfn);
- pcibios_write_config_dword(bus, devfn, reg, addr | PCI_BASE_ADDRESS_SPACE_IO);
- pcibios_read_config_dword(bus, devfn, reg, &try);
- if ((try & PCI_BASE_ADDRESS_IO_MASK) != addr) {
- addr = 0;
- printk("PCI: Address setup failed, got %04x\n", try);
- } else
- dev->base_address[idx] = try;
+ /*
+ * We need to avoid collisions with `mirrored' VGA ports and other strange
+ * ISA hardware, so we always want the addresses kilobyte aligned.
+ */
+ if (!size || size > 256) {
+ printk(KERN_ERR "PCI: Cannot assign I/O space to device %s, %d bytes are too much.\n", dev->name, size);
+ return;
+ } else {
+ u32 try;
+
+ r->start = 0;
+ r->end = size - 1;
+ if (pci_assign_resource(dev, idx)) {
+ printk(KERN_ERR "PCI: Unable to find free %d bytes of I/O space for device %s.\n", size, dev->name);
+ return;
+ }
+ printk("PCI: Assigned I/O space %04lx-%04lx to device %s\n", r->start, r->end, dev->name);
+ pci_read_config_dword(dev, reg, &try);
+ if ((try & PCI_BASE_ADDRESS_IO_MASK) != r->start) {
+ r->start = 0;
+ pci_write_config_dword(dev, reg, 0);
+ printk(KERN_ERR "PCI: I/O address setup failed, got %04x\n", try);
+ }
}
- if (!addr) {
- pcibios_write_config_dword(bus, devfn, reg, 0);
- dev->base_address[idx] = 0;
+}
+
+/*
+ * Assign address to expansion ROM. This is a highly experimental feature
+ * and you must enable it by "pci=rom". It's even not guaranteed to work
+ * with all cards since the PCI specs allow address decoders to be shared
+ * between the ROM space and one of the standard regions (sigh!).
+ */
+static void __init pcibios_fixup_rom_addr(struct pci_dev *dev)
+{
+ int reg = (dev->hdr_type == 1) ? PCI_ROM_ADDRESS1 : PCI_ROM_ADDRESS;
+ struct resource *r = &dev->resource[PCI_ROM_RESOURCE];
+ unsigned long rom_size = r->end - r->start + 1;
+
+ r->start = 0;
+ r->end = rom_size - 1;
+ if (pci_assign_resource(dev, PCI_ROM_RESOURCE))
+ printk(KERN_ERR "PCI: Unable to find free space for expansion ROM of device %s (0x%lx bytes)\n",
+ dev->name, rom_size);
+ else {
+ DBG("PCI: Assigned address %08lx to expansion ROM of %s (0x%lx bytes)\n", r->start, dev->name, rom_size);
+ pci_write_config_dword(dev, reg, r->start | PCI_ROM_ADDRESS_ENABLE);
+ r->flags |= PCI_ROM_ADDRESS_ENABLE;
}
- pcibios_write_config_word(bus, devfn, PCI_COMMAND, cmd);
}
/*
@@ -929,18 +844,25 @@ static void __init pcibios_fixup_ghosts(struct pci_bus *b)
struct pci_dev *d, *e, **z;
int mirror = PCI_DEVFN(16,0);
int seen_host_bridge = 0;
+ int i;
DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
for(d=b->devices; d && d->devfn < mirror; d=d->sibling) {
if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
seen_host_bridge++;
- for(e=d->next; e; e=e->sibling)
- if (e->devfn == d->devfn + mirror &&
- e->vendor == d->vendor &&
- e->device == d->device &&
- e->class == d->class &&
- !memcmp(e->base_address, d->base_address, sizeof(e->base_address)))
- break;
+ for(e=d->next; e; e=e->sibling) {
+ if (e->devfn != d->devfn + mirror ||
+ e->vendor != d->vendor ||
+ e->device != d->device ||
+ e->class != d->class)
+ continue;
+ for(i=0; i<PCI_NUM_RESOURCES; i++)
+ if (e->resource[i].start != d->resource[i].start ||
+ e->resource[i].end != d->resource[i].end ||
+ e->resource[i].flags != d->resource[i].flags)
+ continue;
+ break;
+ }
if (!e)
return;
}
@@ -966,12 +888,13 @@ static void __init pcibios_fixup_ghosts(struct pci_bus *b)
*/
static void __init pcibios_fixup_peer_bridges(void)
{
- struct pci_bus *b = &pci_root;
- int i, n, cnt=-1;
+ struct pci_bus *b = pci_root;
+ int n, cnt=-1;
struct pci_dev *d;
+ struct pci_ops *ops = pci_root->ops;
#ifdef CONFIG_VISWS
- pci_scan_peer_bridge(1);
+ pci_scan_bus(1, ops, NULL);
return;
#endif
@@ -981,7 +904,7 @@ static void __init pcibios_fixup_peer_bridges(void)
* since it reads bogus values for non-existent busses and
* chipsets supporting multiple primary busses use conf1 anyway.
*/
- if (access_pci == &pci_direct_conf2)
+ if (ops == &pci_direct_conf2)
return;
#endif
@@ -992,26 +915,31 @@ static void __init pcibios_fixup_peer_bridges(void)
while (n <= 0xff) {
int found = 0;
u16 l;
- for(i=0; i<256; i += 8)
- if (!pcibios_read_config_word(n, i, PCI_VENDOR_ID, &l) &&
+ struct pci_bus bus;
+ struct pci_dev dev;
+ bus.number = n;
+ bus.ops = ops;
+ dev.bus = &bus;
+ for(dev.devfn=0; dev.devfn<256; dev.devfn += 8)
+ if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) &&
l != 0x0000 && l != 0xffff) {
#ifdef CONFIG_PCI_BIOS
if (pci_bios_present) {
int err, idx = 0;
u8 bios_bus, bios_dfn;
u16 d;
- pcibios_read_config_word(n, i, PCI_DEVICE_ID, &d);
- DBG("BIOS test for %02x:%02x (%04x:%04x)\n", n, i, l, d);
+ pci_read_config_word(&dev, PCI_DEVICE_ID, &d);
+ DBG("BIOS test for %02x:%02x (%04x:%04x)\n", n, dev.devfn, l, d);
while (!(err = pci_bios_find_device(l, d, idx, &bios_bus, &bios_dfn)) &&
- (bios_bus != n || bios_dfn != i))
+ (bios_bus != n || bios_dfn != dev.devfn))
idx++;
if (err)
break;
}
#endif
- DBG("Found device at %02x:%02x\n", n, i);
+ DBG("Found device at %02x:%02x\n", n, dev.devfn);
found++;
- if (!pcibios_read_config_word(n, i, PCI_CLASS_DEVICE, &l) &&
+ if (!pci_read_config_word(&dev, PCI_CLASS_DEVICE, &l) &&
l == PCI_CLASS_BRIDGE_HOST)
cnt++;
}
@@ -1019,8 +947,9 @@ static void __init pcibios_fixup_peer_bridges(void)
break;
if (found) {
printk("PCI: Discovered primary peer bus %02x\n", n);
- b = pci_scan_peer_bridge(n);
- n = b->subordinate;
+ b = pci_scan_bus(n, ops, NULL);
+ if (b)
+ n = b->subordinate;
}
n++;
}
@@ -1037,6 +966,7 @@ static void __init pci_fixup_i450nx(struct pci_dev *d)
*/
int pxb, reg;
u8 busno, suba, subb;
+ printk("PCI: Searching for i450NX host bridges on %s\n", d->name);
reg = 0xd0;
for(pxb=0; pxb<2; pxb++) {
pci_read_config_byte(d, reg++, &busno);
@@ -1044,9 +974,9 @@ static void __init pci_fixup_i450nx(struct pci_dev *d)
pci_read_config_byte(d, reg++, &subb);
DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
if (busno)
- pci_scan_peer_bridge(busno); /* Bus A */
+ pci_scan_bus(busno, pci_root->ops, NULL); /* Bus A */
if (suba < subb)
- pci_scan_peer_bridge(suba+1); /* Bus B */
+ pci_scan_bus(suba+1, pci_root->ops, NULL); /* Bus B */
}
pci_probe |= PCI_NO_PEER_FIXUP;
}
@@ -1059,35 +989,44 @@ static void __init pci_fixup_umc_ide(struct pci_dev *d)
*/
int i;
+ printk("PCI: Fixing base address flags for device %s\n", d->name);
for(i=0; i<4; i++)
- d->base_address[i] |= PCI_BASE_ADDRESS_SPACE_IO;
+ d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO;
}
-struct dev_ex {
- u16 vendor, device;
- void (*handler)(struct pci_dev *);
- char *comment;
+struct pci_fixup pcibios_fixups[] = {
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx },
+ { PCI_FIXUP_HEADER, PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide },
+ { 0 }
};
-static struct dev_ex __initdata dev_ex_table[] = {
- { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx, "Scanning peer host bridges" },
- { PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide, "Working around UM8886BF bugs" }
-};
+/*
+ * Allocate resources for all PCI devices. We need to do that before
+ * we try to fix up anything.
+ */
-static void __init pcibios_scan_buglist(struct pci_bus *b)
+static void __init pcibios_claim_resources(struct pci_bus *bus)
{
- struct pci_dev *d;
- int i;
+ struct pci_dev *dev;
+ int idx;
- for(d=b->devices; d; d=d->sibling)
- for(i=0; i<sizeof(dev_ex_table)/sizeof(dev_ex_table[0]); i++) {
- struct dev_ex *e = &dev_ex_table[i];
- if (e->vendor == d->vendor && e->device == d->device) {
- printk("PCI: %02x:%02x [%04x/%04x]: %s\n",
- b->number, d->devfn, d->vendor, d->device, e->comment);
- e->handler(d);
- }
+ while (bus) {
+ for (dev=bus->devices; dev; dev=dev->sibling)
+ for (idx = 0; idx < PCI_NUM_RESOURCES; idx++) {
+ struct resource *r = &dev->resource[idx];
+ struct resource *pr;
+ if (!r->start)
+ continue;
+ pr = pci_find_parent_resource(dev, r);
+ if (!pr || request_resource(pr, r) < 0) {
+ printk(KERN_ERR "PCI: Address space collision on region %d of device %s\n", idx, dev->name);
+ /* We probably should disable the region, shouldn't we? */
+ }
}
+ if (bus->children)
+ pcibios_claim_resources(bus->children);
+ bus = bus->next;
+ }
}
/*
@@ -1112,13 +1051,12 @@ static void __init pcibios_fixup_devices(void)
*/
has_io = has_mem = 0;
for(i=0; i<6; i++) {
- unsigned long a = dev->base_address[i];
- if (a & PCI_BASE_ADDRESS_SPACE_IO) {
+ struct resource *r = &dev->resource[i];
+ if (r->flags & PCI_BASE_ADDRESS_SPACE_IO) {
has_io = 1;
- a &= PCI_BASE_ADDRESS_IO_MASK;
- if (!a || a == PCI_BASE_ADDRESS_IO_MASK)
+ if (!r->start || r->start == PCI_BASE_ADDRESS_IO_MASK)
pcibios_fixup_io_addr(dev, i);
- } else if (a & PCI_BASE_ADDRESS_MEM_MASK)
+ } else if (r->start)
has_mem = 1;
}
/*
@@ -1133,18 +1071,21 @@ static void __init pcibios_fixup_devices(void)
((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)) {
pci_read_config_word(dev, PCI_COMMAND, &cmd);
if (has_io && !(cmd & PCI_COMMAND_IO)) {
- printk("PCI: Enabling I/O for device %02x:%02x\n",
- dev->bus->number, dev->devfn);
+ printk("PCI: Enabling I/O for device %s\n", dev->name);
cmd |= PCI_COMMAND_IO;
pci_write_config_word(dev, PCI_COMMAND, cmd);
}
if (has_mem && !(cmd & PCI_COMMAND_MEMORY)) {
- printk("PCI: Enabling memory for device %02x:%02x\n",
- dev->bus->number, dev->devfn);
+ printk("PCI: Enabling memory for device %s\n", dev->name);
cmd |= PCI_COMMAND_MEMORY;
pci_write_config_word(dev, PCI_COMMAND, cmd);
}
}
+ /*
+ * Assign address to expansion ROM if requested.
+ */
+ if ((pci_probe & PCI_ASSIGN_ROMS) && dev->resource[PCI_ROM_RESOURCE].end)
+ pcibios_fixup_rom_addr(dev);
#if defined(CONFIG_X86_IO_APIC)
/*
* Recalculate IRQ numbers if we use the I/O APIC
@@ -1185,38 +1126,27 @@ static void __init pcibios_fixup_devices(void)
}
/*
- * Arch-dependent fixups.
+ * Called after each bus is probed, but before its children
+ * are examined.
*/
-__initfunc(void pcibios_fixup(void))
-{
- if (!(pci_probe & PCI_NO_PEER_FIXUP))
- pcibios_fixup_peer_bridges();
- pcibios_fixup_devices();
-
-#ifdef CONFIG_PCI_BIOS
- if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
- pcibios_sort();
-#endif
-}
-
-__initfunc(void pcibios_fixup_bus(struct pci_bus *b))
+void __init pcibios_fixup_bus(struct pci_bus *b)
{
pcibios_fixup_ghosts(b);
- pcibios_scan_buglist(b);
}
/*
* Initialization. Try all known PCI access methods. Note that we support
* using both PCI BIOS and direct access: in such cases, we use I/O ports
* to access config space, but we still keep BIOS order of cards to be
- * compatible with 2.0.X. This should go away in 2.3.
+ * compatible with 2.0.X. This should go away some day.
*/
-__initfunc(void pcibios_init(void))
+void __init pcibios_init(void)
{
- struct pci_access *bios = NULL;
- struct pci_access *dir = NULL;
+ struct pci_ops *bios = NULL;
+ struct pci_ops *dir = NULL;
+ struct pci_ops *ops;
#ifdef CONFIG_PCI_BIOS
if ((pci_probe & PCI_PROBE_BIOS) && ((bios = pci_find_bios()))) {
@@ -1229,23 +1159,33 @@ __initfunc(void pcibios_init(void))
dir = pci_check_direct();
#endif
if (dir)
- access_pci = dir;
+ ops = dir;
else if (bios)
- access_pci = bios;
+ ops = bios;
+ else {
+ printk("PCI: No PCI bus detected\n");
+ return;
+ }
+
+ printk("PCI: Probing PCI hardware\n");
+ pci_scan_bus(0, ops, NULL);
+
+ if (!(pci_probe & PCI_NO_PEER_FIXUP))
+ pcibios_fixup_peer_bridges();
+ pcibios_claim_resources(pci_root);
+ pcibios_fixup_devices();
+
+#ifdef CONFIG_PCI_BIOS
+ if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
+ pcibios_sort();
+#endif
}
-__initfunc(char *pcibios_setup(char *str))
+char * __init pcibios_setup(char *str)
{
if (!strcmp(str, "off")) {
pci_probe = 0;
return NULL;
- } else if (!strncmp(str, "io=", 3)) {
- char *p;
- unsigned int x = simple_strtoul(str+3, &p, 16);
- if (p && *p)
- return str;
- pci_last_io_addr = x;
- return NULL;
}
#ifdef CONFIG_PCI_BIOS
else if (!strcmp(str, "bios")) {
@@ -1272,6 +1212,9 @@ __initfunc(char *pcibios_setup(char *str))
else if (!strcmp(str, "nopeer")) {
pci_probe |= PCI_NO_PEER_FIXUP;
return NULL;
+ } else if (!strcmp(str, "rom")) {
+ pci_probe |= PCI_ASSIGN_ROMS;
+ return NULL;
}
return str;
}
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 3a5fc93a1..47f23b6b6 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -145,7 +145,30 @@ ENTRY(lcall7)
andl $-8192,%ebx # GET_CURRENT
movl exec_domain(%ebx),%edx # Get the execution domain
movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x7
call *%edx
+ addl $4, %esp
+ popl %eax
+ jmp ret_from_sys_call
+
+ENTRY(lcall27)
+ pushfl # We get a different stack layout with call gates,
+ pushl %eax # which has to be cleaned up later..
+ SAVE_ALL
+ movl EIP(%esp),%eax # due to call gates, this is eflags, not eip..
+ movl CS(%esp),%edx # this is eip..
+ movl EFLAGS(%esp),%ecx # and this is cs..
+ movl %eax,EFLAGS(%esp) #
+ movl %edx,EIP(%esp) # Now we move them to their "normal" places
+ movl %ecx,CS(%esp) #
+ movl %esp,%ebx
+ pushl %ebx
+ andl $-8192,%ebx # GET_CURRENT
+ movl exec_domain(%ebx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ pushl $0x27
+ call *%edx
+ addl $4, %esp
popl %eax
jmp ret_from_sys_call
@@ -153,11 +176,9 @@ ENTRY(lcall7)
ALIGN
.globl ret_from_fork
ret_from_fork:
-#ifdef __SMP__
pushl %ebx
call SYMBOL_NAME(schedule_tail)
addl $4, %esp
-#endif /* __SMP__ */
GET_CURRENT(%ebx)
jmp ret_from_sys_call
@@ -483,7 +504,7 @@ ENTRY(sys_call_table)
.long SYMBOL_NAME(sys_uname)
.long SYMBOL_NAME(sys_iopl) /* 110 */
.long SYMBOL_NAME(sys_vhangup)
- .long SYMBOL_NAME(sys_idle)
+ .long SYMBOL_NAME(sys_ni_syscall) /* old "idle" system call */
.long SYMBOL_NAME(sys_vm86old)
.long SYMBOL_NAME(sys_wait4)
.long SYMBOL_NAME(sys_swapoff) /* 115 */
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index acbc3e325..ac854e721 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -8,11 +8,12 @@
*/
.text
-#include <linux/tasks.h>
+#include <linux/threads.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/desc.h>
#define CL_MAGIC_ADDR 0x90020
@@ -330,7 +331,7 @@ ignore_int:
* of tasks we can have..
*/
#define IDT_ENTRIES 256
-#define GDT_ENTRIES (12+2*NR_TASKS)
+#define GDT_ENTRIES (__TSS(NR_CPUS))
.globl SYMBOL_NAME(idt)
@@ -519,8 +520,7 @@ ENTRY(empty_zero_page)
ALIGN
/*
- * This contains up to 8192 quadwords depending on NR_TASKS - 64kB of
- * gdt entries. Ugh.
+ * This contains typically 140 quadwords, depending on NR_CPUS.
*
* NOTE! Make sure the gdt descriptor in head.S matches this if you
* change anything.
@@ -542,7 +542,7 @@ ENTRY(gdt_table)
.quad 0x00409a0000000000 /* 0x48 APM CS code */
.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0x58 APM DS data */
- .fill 2*NR_TASKS,8,0 /* space for LDT's and TSS's etc */
+ .fill NR_CPUS*4,8,0 /* space for TSS's and LDT's */
/*
* This is to aid debugging, the various locking macros will be putting
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index afcfd274e..61422f372 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -76,7 +76,6 @@ EXPORT_SYMBOL(strlen_user);
#ifdef __SMP__
EXPORT_SYMBOL(cpu_data);
EXPORT_SYMBOL(kernel_flag);
-EXPORT_SYMBOL(smp_invalidate_needed);
EXPORT_SYMBOL(cpu_number_map);
EXPORT_SYMBOL(__cpu_logical_map);
EXPORT_SYMBOL(smp_num_cpus);
@@ -89,6 +88,7 @@ EXPORT_SYMBOL(synchronize_bh);
EXPORT_SYMBOL(global_bh_count);
EXPORT_SYMBOL(global_bh_lock);
EXPORT_SYMBOL(global_irq_holder);
+EXPORT_SYMBOL(i386_bh_lock);
EXPORT_SYMBOL(__global_cli);
EXPORT_SYMBOL(__global_sti);
EXPORT_SYMBOL(__global_save_flags);
@@ -111,6 +111,7 @@ EXPORT_SYMBOL(mca_isadapter);
EXPORT_SYMBOL(mca_mark_as_used);
EXPORT_SYMBOL(mca_mark_as_unused);
EXPORT_SYMBOL(mca_find_unused_adapter);
+EXPORT_SYMBOL(mca_is_adapter_used);
#endif
#ifdef CONFIG_VT
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
new file mode 100644
index 000000000..ce4082848
--- /dev/null
+++ b/arch/i386/kernel/i8259.c
@@ -0,0 +1,409 @@
+#include <linux/config.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/malloc.h>
+#include <linux/random.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/bitops.h>
+#include <asm/smp.h>
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+#include <asm/desc.h>
+
+#include <linux/irq.h>
+
+
+/*
+ * Intel specific no controller code
+ * odd that no-controller should be architecture dependent
+ * but see the ifdef __SMP__
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+#ifdef __SMP__
+ /*
+ * [currently unexpected vectors happen only on SMP and APIC.
+ * if we want to have non-APIC and non-8259A controllers
+ * in the future with unexpected vectors, this ack should
+ * probably be made controller-specific.]
+ */
+ ack_APIC_irq();
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none disable_none
+#define end_none enable_none
+
+struct hw_interrupt_type no_irq_type = {
+ "none",
+ startup_none,
+ shutdown_none,
+ enable_none,
+ disable_none,
+ ack_none,
+ end_none
+};
+
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ * plus some generic x86 specific things if generic specifics makes
+ * any sense at all.
+ * this file should become arch/i386/kernel/irq.c when the old irq.c
+ * moves to arch independent land
+ */
+/*
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that save
+ * register context and call do_IRQ(). do_IRQ() then does all the
+ * operations that are needed to keep the AT (or SMP IOAPIC)
+ * interrupt-controller happy.
+ */
+
+
+BUILD_COMMON_IRQ()
+
+#define BI(x,y) \
+ BUILD_IRQ(##x##y)
+
+#define BUILD_16_IRQS(x) \
+ BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
+ BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
+ BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
+ BI(x,c) BI(x,d) BI(x,e) BI(x,f)
+
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x20-0x30)
+ */
+BUILD_16_IRQS(0x0)
+
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+ BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
+BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
+BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
+#endif
+
+#undef BUILD_16_IRQS
+#undef BI
+
+
+#ifdef __SMP__
+/*
+ * The following vectors are part of the Linux architecture, there
+ * is no hardware IRQ pin equivalent for them, they are triggered
+ * through the ICC by us (IPIs)
+ */
+BUILD_SMP_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
+BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
+BUILD_SMP_INTERRUPT(stop_cpu_interrupt,STOP_CPU_VECTOR)
+BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+
+/*
+ * every pentium local APIC has two 'local interrupts', with a
+ * soft-definable vector attached to both interrupts, one of
+ * which is a timer interrupt, the other one is error counter
+ * overflow. Linux uses the local APIC timer interrupt to get
+ * a much simpler SMP time architecture:
+ */
+BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
+
+#endif
+
+#define IRQ(x,y) \
+ IRQ##x##y##_interrupt
+
+#define IRQLIST_16(x) \
+ IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
+ IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
+ IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
+ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
+
+static void (*interrupt[NR_IRQS])(void) = {
+ IRQLIST_16(0x0),
+
+#ifdef CONFIG_X86_IO_APIC
+ IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
+ IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
+ IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
+ IRQLIST_16(0xc), IRQLIST_16(0xd)
+#endif
+};
+
+#undef IRQ
+#undef IRQLIST_16
+
+
+
+
+static void enable_8259A_irq(unsigned int irq);
+void disable_8259A_irq(unsigned int irq);
+
+/* shutdown is same as "disable" */
+#define end_8259A_irq enable_8259A_irq
+#define shutdown_8259A_irq disable_8259A_irq
+
+static void mask_and_ack_8259A(unsigned int);
+
+static unsigned int startup_8259A_irq(unsigned int irq)
+{
+ enable_8259A_irq(irq);
+ return 0; /* never anything pending */
+}
+
+static struct hw_interrupt_type i8259A_irq_type = {
+ "XT-PIC",
+ startup_8259A_irq,
+ shutdown_8259A_irq,
+ enable_8259A_irq,
+ disable_8259A_irq,
+ mask_and_ack_8259A,
+ end_8259A_irq
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+static unsigned int cached_irq_mask = 0xffff;
+
+#define __byte(x,y) (((unsigned char *)&(y))[x])
+#define cached_21 (__byte(0,cached_irq_mask))
+#define cached_A1 (__byte(1,cached_irq_mask))
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not connected to any IO-APIC pin, it's
+ * fed to the CPU IRQ line directly.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
+ */
+unsigned long io_apic_irqs = 0;
+
+/*
+ * These have to be protected by the irq controller spinlock
+ * before being called.
+ */
+void disable_8259A_irq(unsigned int irq)
+{
+ unsigned int mask = 1 << irq;
+ cached_irq_mask |= mask;
+ if (irq & 8) {
+ outb(cached_A1,0xA1);
+ } else {
+ outb(cached_21,0x21);
+ }
+}
+
+static void enable_8259A_irq(unsigned int irq)
+{
+ unsigned int mask = ~(1 << irq);
+ cached_irq_mask &= mask;
+ if (irq & 8) {
+ outb(cached_A1,0xA1);
+ } else {
+ outb(cached_21,0x21);
+ }
+}
+
+int i8259A_irq_pending(unsigned int irq)
+{
+ unsigned int mask = 1<<irq;
+
+ if (irq < 8)
+ return (inb(0x20) & mask);
+ return (inb(0xA0) & (mask >> 8));
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+ disable_irq_nosync(irq);
+ io_apic_irqs &= ~(1<<irq);
+ irq_desc[irq].handler = &i8259A_irq_type;
+ enable_irq(irq);
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+static void mask_and_ack_8259A(unsigned int irq)
+{
+ cached_irq_mask |= 1 << irq;
+ if (irq & 8) {
+ inb(0xA1); /* DUMMY */
+ outb(cached_A1,0xA1);
+ outb(0x62,0x20); /* Specific EOI to cascade */
+ outb(0x20,0xA0);
+ } else {
+ inb(0x21); /* DUMMY */
+ outb(cached_21,0x21);
+ outb(0x20,0x20);
+ }
+}
+
+#ifndef CONFIG_VISWS
+/*
+ * Note that on a 486, we don't want to do a SIGFPE on an irq13
+ * as the irq is unreliable, and exception 16 works correctly
+ * (ie as explained in the intel literature). On a 386, you
+ * can't use exception 16 due to bad IBM design, so we have to
+ * rely on the less exact irq13.
+ *
+ * Careful.. Not only is IRQ13 unreliable, but it is also
+ * leads to races. IBM designers who came up with it should
+ * be shot.
+ */
+
+static void math_error_irq(int cpl, void *dev_id, struct pt_regs *regs)
+{
+ outb(0,0xF0);
+ if (ignore_irq13 || !boot_cpu_data.hard_math)
+ return;
+ math_error();
+}
+
+static struct irqaction irq13 = { math_error_irq, 0, 0, "fpu", NULL, NULL };
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+
+static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
+#endif
+
+
+void init_ISA_irqs (void)
+{
+ int i;
+
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc[i].status = IRQ_DISABLED;
+ irq_desc[i].action = 0;
+ irq_desc[i].depth = 0;
+
+ if (i < 16) {
+ /*
+ * 16 old-style INTA-cycle interrupts:
+ */
+ irq_desc[i].handler = &i8259A_irq_type;
+ } else {
+ /*
+ * 'high' PCI IRQs filled in on demand
+ */
+ irq_desc[i].handler = &no_irq_type;
+ }
+ }
+}
+
+void __init init_IRQ(void)
+{
+ int i;
+
+#ifndef CONFIG_X86_VISWS_APIC
+ init_ISA_irqs();
+#else
+ init_VISWS_APIC_irqs();
+#endif
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
+ */
+ for (i = 0; i < NR_IRQS; i++) {
+ int vector = FIRST_EXTERNAL_VECTOR + i;
+ if (vector != SYSCALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
+ }
+
+#ifdef __SMP__
+
+ /*
+ IRQ0 must be given a fixed assignment and initialized
+ before init_IRQ_SMP.
+ */
+ set_intr_gate(IRQ0_TRAP_VECTOR, interrupt[0]);
+
+ /*
+ * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+ * IPI, driven by wakeup.
+ */
+ set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+ /* IPI for invalidation */
+ set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+ /* IPI for CPU halt */
+ set_intr_gate(STOP_CPU_VECTOR, stop_cpu_interrupt);
+
+ /* self generated IPI for local APIC timer */
+ set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+
+ /* IPI for generic function call */
+ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+ /* IPI vector for APIC spurious interrupts */
+ set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+#endif
+
+ /*
+ * Set the clock to HZ Hz, we already have a valid
+ * vector now:
+ */
+ outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff , 0x40); /* LSB */
+ outb(LATCH >> 8 , 0x40); /* MSB */
+
+#ifndef CONFIG_VISWS
+ setup_irq(2, &irq2);
+ setup_irq(13, &irq13);
+#endif
+}
+
+#ifdef CONFIG_X86_IO_APIC
+void __init init_IRQ_SMP(void)
+{
+ int i;
+ for (i = 0; i < NR_IRQS ; i++)
+ if (IO_APIC_VECTOR(i) > 0)
+ set_intr_gate(IO_APIC_VECTOR(i), interrupt[i]);
+}
+#endif
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c
index 0faa696a4..84fba5106 100644
--- a/arch/i386/kernel/init_task.c
+++ b/arch/i386/kernel/init_task.c
@@ -1,5 +1,6 @@
#include <linux/mm.h>
#include <linux/sched.h>
+#include <linux/init.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -7,7 +8,6 @@
static struct vm_area_struct init_mmap = INIT_MMAP;
static struct fs_struct init_fs = INIT_FS;
-static struct file * init_fd_array[NR_OPEN] = { NULL, };
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
struct mm_struct init_mm = INIT_MM(init_mm);
@@ -22,4 +22,13 @@ struct mm_struct init_mm = INIT_MM(init_mm);
union task_union init_task_union
__attribute__((__section__(".data.init_task"))) =
{ INIT_TASK(init_task_union.task) };
-
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
+struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
+
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 42ebd9643..34e3ff86f 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -5,6 +5,12 @@
*
* Many thanks to Stig Venaas for trying out countless experimental
* patches and reporting/debugging problems patiently!
+ *
+ * (c) 1999, Multiple IO-APIC support, developed by
+ * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ * further tested and cleaned up by Zach Brown <zab@redhat.com>
+ * and Ingo Molnar <mingo@redhat.com>
*/
#include <linux/sched.h>
@@ -13,13 +19,13 @@
#include <linux/delay.h>
#include <asm/io.h>
-#include "irq.h"
+#include <linux/irq.h>
/*
* volatile is justified in this case, IO-APIC register contents
* might change spontaneously, GCC should not cache it
*/
-#define IO_APIC_BASE ((volatile int *)fix_to_virt(FIX_IO_APIC_BASE))
+#define IO_APIC_BASE(idx) ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx))
/*
* The structure of the IO-APIC:
@@ -45,9 +51,10 @@ struct IO_APIC_reg_02 {
} __attribute__ ((packed));
/*
- * # of IRQ routing registers
+ * # of IO-APICs and # of IRQ routing registers
*/
-int nr_ioapic_registers = 0;
+int nr_ioapics = 0;
+int nr_ioapic_registers[MAX_IO_APICS];
enum ioapic_irq_destination_types {
dest_Fixed = 0,
@@ -94,6 +101,7 @@ enum mp_irq_source_types {
mp_ExtINT = 3
};
+struct mpc_config_ioapic mp_apics[MAX_IO_APICS];/* I/O APIC entries */
int mp_irq_entries = 0; /* # of MP IRQ source entries */
struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* MP IRQ source entries */
@@ -108,34 +116,34 @@ int mpc_default_type = 0; /* non-0 if default (table-less)
* between pins and IRQs.
*/
-static inline unsigned int io_apic_read(unsigned int reg)
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
{
- *IO_APIC_BASE = reg;
- return *(IO_APIC_BASE+4);
+ *IO_APIC_BASE(apic) = reg;
+ return *(IO_APIC_BASE(apic)+4);
}
-static inline void io_apic_write(unsigned int reg, unsigned int value)
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
{
- *IO_APIC_BASE = reg;
- *(IO_APIC_BASE+4) = value;
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = value;
}
/*
* Re-write a value: to be used for read-modify-write
* cycles where the read already set up the index register.
*/
-static inline void io_apic_modify(unsigned int value)
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
{
- *(IO_APIC_BASE+4) = value;
+ *(IO_APIC_BASE(apic)+4) = value;
}
/*
* Synchronize the IO-APIC and the CPU by doing
* a dummy read from the IO-APIC
*/
-static inline void io_apic_sync(void)
+static inline void io_apic_sync(unsigned int apic)
{
- (void) *(IO_APIC_BASE+4);
+ (void) *(IO_APIC_BASE(apic)+4);
}
/*
@@ -146,7 +154,7 @@ static inline void io_apic_sync(void)
#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
static struct irq_pin_list {
- int pin, next;
+ int apic, pin, next;
} irq_2_pin[PIN_MAP_SIZE];
/*
@@ -154,7 +162,7 @@ static struct irq_pin_list {
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void add_pin_to_irq(unsigned int irq, int pin)
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
{
static int first_free_entry = NR_IRQS;
struct irq_pin_list *entry = irq_2_pin + irq;
@@ -168,6 +176,7 @@ static void add_pin_to_irq(unsigned int irq, int pin)
if (++first_free_entry >= PIN_MAP_SIZE)
panic("io_apic.c: whoops");
}
+ entry->apic = apic;
entry->pin = pin;
}
@@ -183,9 +192,9 @@ static void name##_IO_APIC_irq(unsigned int irq) \
pin = entry->pin; \
if (pin == -1) \
break; \
- reg = io_apic_read(0x10 + R + pin*2); \
+ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
reg ACTION; \
- io_apic_modify(reg); \
+ io_apic_modify(entry->apic, reg); \
if (!entry->next) \
break; \
entry = irq_2_pin + entry->next; \
@@ -197,12 +206,12 @@ static void name##_IO_APIC_irq(unsigned int irq) \
* We disable IO-APIC IRQs by setting their 'destination CPU mask' to
* zero. Trick by Ramesh Nalluri.
*/
-DO_ACTION( disable, 1, &= 0x00ffffff, io_apic_sync()) /* destination = 0x00 */
+DO_ACTION( disable, 1, &= 0x00ffffff, io_apic_sync(entry->apic))/* destination = 0x00 */
DO_ACTION( enable, 1, |= 0xff000000, ) /* destination = 0xff */
-DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync()) /* mask = 1 */
+DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */
DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */
-static void clear_IO_APIC_pin(unsigned int pin)
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
@@ -211,16 +220,17 @@ static void clear_IO_APIC_pin(unsigned int pin)
*/
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
- io_apic_write(0x10 + 2 * pin, *(((int *)&entry) + 0));
- io_apic_write(0x11 + 2 * pin, *(((int *)&entry) + 1));
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
}
static void clear_IO_APIC (void)
{
- int pin;
+ int apic, pin;
- for (pin = 0; pin < nr_ioapic_registers; pin++)
- clear_IO_APIC_pin(pin);
+ for (apic = 0; apic < nr_ioapics; apic++)
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+ clear_IO_APIC_pin(apic, pin);
}
/*
@@ -232,50 +242,54 @@ static void clear_IO_APIC (void)
int pirq_entries [MAX_PIRQS];
int pirqs_enabled;
-void __init ioapic_setup(char *str, int *ints)
+static int __init ioapic_setup(char *str)
{
extern int skip_ioapic_setup; /* defined in arch/i386/kernel/smp.c */
skip_ioapic_setup = 1;
+ return 1;
}
-void __init ioapic_pirq_setup(char *str, int *ints)
+__setup("noapic", ioapic_setup);
+
+static int __init ioapic_pirq_setup(char *str)
{
int i, max;
+ int ints[MAX_PIRQS+1];
+
+ get_options(str, ARRAY_SIZE(ints), ints);
for (i = 0; i < MAX_PIRQS; i++)
pirq_entries[i] = -1;
- if (!ints) {
- pirqs_enabled = 0;
- printk("PIRQ redirection, trusting MP-BIOS.\n");
+ pirqs_enabled = 1;
+ printk("PIRQ redirection, working around broken MP-BIOS.\n");
+ max = MAX_PIRQS;
+ if (ints[0] < MAX_PIRQS)
+ max = ints[0];
- } else {
- pirqs_enabled = 1;
- printk("PIRQ redirection, working around broken MP-BIOS.\n");
- max = MAX_PIRQS;
- if (ints[0] < MAX_PIRQS)
- max = ints[0];
-
- for (i = 0; i < max; i++) {
- printk("... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
- /*
- * PIRQs are mapped upside down, usually.
- */
- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
- }
+ for (i = 0; i < max; i++) {
+ printk("... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+ /*
+ * PIRQs are mapped upside down, usually.
+ */
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
}
+ return 1;
}
+__setup("pirq=", ioapic_pirq_setup);
+
/*
* Find the IRQ entry number of a certain pin.
*/
-static int __init find_irq_entry(int pin, int type)
+static int __init find_irq_entry(int apic, int pin, int type)
{
int i;
for (i = 0; i < mp_irq_entries; i++)
if ( (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_dstapic == mp_apics[apic].mpc_apicid) &&
(mp_irqs[i].mpc_dstirq == pin))
return i;
@@ -305,23 +319,28 @@ static int __init find_timer_pin(int type)
/*
* Find a specific PCI IRQ entry.
- * Not an initfunc, possibly needed by modules
+ * Not an __init, possibly needed by modules
*/
+static int __init pin_2_irq(int idx, int apic, int pin);
int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pci_pin)
{
- int i;
+ int apic, i;
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].mpc_srcbus;
- if (IO_APIC_IRQ(mp_irqs[i].mpc_dstirq) &&
+ for (apic = 0; apic < nr_ioapics; apic++)
+ if (mp_apics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+ break;
+
+ if ((apic || IO_APIC_IRQ(mp_irqs[i].mpc_dstirq)) &&
(mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
!mp_irqs[i].mpc_irqtype &&
(bus == mp_bus_id_to_pci_bus[mp_irqs[i].mpc_srcbus]) &&
(slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f)) &&
(pci_pin == (mp_irqs[i].mpc_srcbusirq & 3)))
- return mp_irqs[i].mpc_dstirq;
+ return pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
}
return -1;
}
@@ -491,9 +510,9 @@ static inline int irq_trigger(int idx)
return MPBIOS_trigger(idx);
}
-static int __init pin_2_irq(int idx, int pin)
+static int __init pin_2_irq(int idx, int apic, int pin)
{
- int irq;
+ int irq, i;
int bus = mp_irqs[idx].mpc_srcbus;
/*
@@ -513,9 +532,12 @@ static int __init pin_2_irq(int idx, int pin)
case MP_BUS_PCI: /* PCI pin */
{
/*
- * PCI IRQs are 'directly mapped'
+ * PCI IRQs are mapped in order
*/
- irq = pin;
+ i = irq = 0;
+ while (i < apic)
+ irq += nr_ioapic_registers[i++];
+ irq += pin;
break;
}
default:
@@ -545,12 +567,14 @@ static int __init pin_2_irq(int idx, int pin)
static inline int IO_APIC_irq_trigger(int irq)
{
- int idx, pin;
+ int apic, idx, pin;
- for (pin = 0; pin < nr_ioapic_registers; pin++) {
- idx = find_irq_entry(pin,mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx,pin)))
- return irq_trigger(idx);
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ idx = find_irq_entry(apic,pin,mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+ return irq_trigger(idx);
+ }
}
/*
* nonexistent IRQs are edge default
@@ -582,11 +606,12 @@ static int __init assign_irq_vector(int irq)
void __init setup_IO_APIC_irqs(void)
{
struct IO_APIC_route_entry entry;
- int pin, idx, bus, irq, first_notcon = 1;
+ int apic, pin, idx, irq, first_notcon = 1;
printk("init IO_APIC IRQs\n");
- for (pin = 0; pin < nr_ioapic_registers; pin++) {
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
/*
* add it to the IO-APIC irq-routing table:
@@ -598,13 +623,13 @@ void __init setup_IO_APIC_irqs(void)
entry.mask = 0; /* enable IRQ */
entry.dest.logical.logical_dest = 0; /* but no route */
- idx = find_irq_entry(pin,mp_INT);
+ idx = find_irq_entry(apic,pin,mp_INT);
if (idx == -1) {
if (first_notcon) {
- printk(" IO-APIC pin %d", pin);
+ printk(" IO-APIC (apicid-pin) %d-%d", mp_apics[apic].mpc_apicid, pin);
first_notcon = 0;
} else
- printk(", %d", pin);
+ printk(", %d-%d", mp_apics[apic].mpc_apicid, pin);
continue;
}
@@ -617,18 +642,17 @@ void __init setup_IO_APIC_irqs(void)
entry.dest.logical.logical_dest = 0xff;
}
- irq = pin_2_irq(idx,pin);
- add_pin_to_irq(irq, pin);
+ irq = pin_2_irq(idx,apic,pin);
+ add_pin_to_irq(irq, apic, pin);
- if (!IO_APIC_IRQ(irq))
+ if (!apic && !IO_APIC_IRQ(irq))
continue;
entry.vector = assign_irq_vector(irq);
- bus = mp_irqs[idx].mpc_srcbus;
-
- io_apic_write(0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(0x10+2*pin, *(((int *)&entry)+0));
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ }
}
if (!first_notcon)
@@ -638,7 +662,7 @@ void __init setup_IO_APIC_irqs(void)
/*
* Set up a certain pin as ExtINT delivered interrupt
*/
-void __init setup_ExtINT_pin(unsigned int pin, int irq)
+void __init setup_ExtINT_pin(unsigned int apic, unsigned int pin, int irq)
{
struct IO_APIC_route_entry entry;
@@ -662,8 +686,8 @@ void __init setup_ExtINT_pin(unsigned int pin, int irq)
entry.polarity = 0;
entry.trigger = 0;
- io_apic_write(0x10+2*pin, *(((int *)&entry)+0));
- io_apic_write(0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
}
void __init UNEXPECTED_IO_APIC(void)
@@ -674,17 +698,14 @@ void __init UNEXPECTED_IO_APIC(void)
void __init print_IO_APIC(void)
{
- int i;
+ int apic, i;
struct IO_APIC_reg_00 reg_00;
struct IO_APIC_reg_01 reg_01;
struct IO_APIC_reg_02 reg_02;
printk("number of MP IRQ sources: %d.\n", mp_irq_entries);
- printk("number of IO-APIC registers: %d.\n", nr_ioapic_registers);
-
- *(int *)&reg_00 = io_apic_read(0);
- *(int *)&reg_01 = io_apic_read(1);
- *(int *)&reg_02 = io_apic_read(2);
+ for (i = 0; i < nr_ioapics; i++)
+ printk("number of IO-APIC #%d registers: %d.\n", mp_apics[i].mpc_apicid, nr_ioapic_registers[i]);
/*
* We are a bit conservative about what we expect. We have to
@@ -692,6 +713,12 @@ void __init print_IO_APIC(void)
*/
printk("testing the IO APIC.......................\n");
+ for (apic = 0; apic < nr_ioapics; apic++) {
+
+ *(int *)&reg_00 = io_apic_read(apic, 0);
+ *(int *)&reg_01 = io_apic_read(apic, 1);
+ *(int *)&reg_02 = io_apic_read(apic, 2);
+ printk("\nIO APIC #%d......\n", mp_apics[apic].mpc_apicid);
printk(".... register #00: %08X\n", *(int *)&reg_00);
printk("....... : physical APIC id: %02X\n", reg_00.ID);
if (reg_00.__reserved_1 || reg_00.__reserved_2)
@@ -706,8 +733,6 @@ void __init print_IO_APIC(void)
(reg_01.entries != 0x3F) /* bigger Xeon boards */
)
UNEXPECTED_IO_APIC();
- if (reg_01.entries == 0x0f)
- printk("....... [IO-APIC cannot route PCI PIRQ 0-3]\n");
printk("....... : IO APIC version: %04X\n", reg_01.version);
if ( (reg_01.version != 0x10) && /* oldest IO-APICs */
@@ -731,8 +756,8 @@ void __init print_IO_APIC(void)
for (i = 0; i <= reg_01.entries; i++) {
struct IO_APIC_route_entry entry;
- *(((int *)&entry)+0) = io_apic_read(0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(0x11+i*2);
+ *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
printk(" %02x %03X %02X ",
i,
@@ -751,7 +776,7 @@ void __init print_IO_APIC(void)
entry.vector
);
}
-
+ }
printk(KERN_DEBUG "IRQ to pin mappings:\n");
for (i = 0; i < NR_IRQS; i++) {
struct irq_pin_list *entry = irq_2_pin + i;
@@ -796,9 +821,12 @@ static void __init init_sym_mode(void)
*/
{
struct IO_APIC_reg_01 reg_01;
+ int i;
- *(int *)&reg_01 = io_apic_read(1);
- nr_ioapic_registers = reg_01.entries+1;
+ for (i = 0; i < nr_ioapics; i++) {
+ *(int *)&reg_01 = io_apic_read(i, 1);
+ nr_ioapic_registers[i] = reg_01.entries+1;
+ }
}
/*
@@ -808,7 +836,7 @@ static void __init init_sym_mode(void)
}
/*
- * Not an initfunc, needed by the reboot code
+ * Not an __init, needed by the reboot code
*/
void init_pic_mode(void)
{
@@ -827,55 +855,6 @@ void init_pic_mode(void)
printk("...done.\n");
}
-char ioapic_OEM_ID [16];
-char ioapic_Product_ID [16];
-
-struct ioapic_list_entry {
- char * oem_id;
- char * product_id;
-};
-
-struct ioapic_list_entry __initdata ioapic_whitelist [] = {
-
- { "INTEL " , "PR440FX " },
- { "INTEL " , "82440FX " },
- { "AIR " , "KDI " },
- { 0 , 0 }
-};
-
-struct ioapic_list_entry __initdata ioapic_blacklist [] = {
-
- { "OEM00000" , "PROD00000000" },
- { 0 , 0 }
-};
-
-static int __init in_ioapic_list(struct ioapic_list_entry * table)
-{
- for ( ; table->oem_id ; table++)
- if ((!strcmp(table->oem_id,ioapic_OEM_ID)) &&
- (!strcmp(table->product_id,ioapic_Product_ID)))
- return 1;
- return 0;
-}
-
-static int __init ioapic_whitelisted(void)
-{
-/*
- * Right now, whitelist everything to see whether the new parsing
- * routines really do work for everybody.
- */
-#if 1
- return 1;
-#else
- return in_ioapic_list(ioapic_whitelist);
-#endif
-}
-
-static int __init ioapic_blacklisted(void)
-{
- return in_ioapic_list(ioapic_blacklist);
-}
-
static void __init setup_ioapic_id(void)
{
struct IO_APIC_reg_00 reg_00;
@@ -897,15 +876,15 @@ static void __init setup_ioapic_id(void)
/*
* Set the ID
*/
- *(int *)&reg_00 = io_apic_read(0);
+ *(int *)&reg_00 = io_apic_read(0, 0);
printk("...changing IO-APIC physical APIC ID to 2...\n");
reg_00.ID = 0x2;
- io_apic_write(0, *(int *)&reg_00);
+ io_apic_write(0, 0, *(int *)&reg_00);
/*
* Sanity check
*/
- *(int *)&reg_00 = io_apic_read(0);
+ *(int *)&reg_00 = io_apic_read(0, 0);
if (reg_00.ID != 0x2)
panic("could not set ID");
}
@@ -978,24 +957,13 @@ static int __init timer_irq_works(void)
* better to do it this way as thus we do not have to be aware of
* 'pending' interrupts in the IRQ path, except at this point.
*/
-static inline void self_IPI(unsigned int irq)
-{
- irq_desc_t *desc = irq_desc + irq;
- unsigned int status = desc->status;
-
- if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
- desc->status = status | IRQ_REPLAY;
- send_IPI_self(IO_APIC_VECTOR(irq));
- }
-}
-
/*
* Edge triggered needs to resend any interrupt
- * that was delayed.
+ * that was delayed but this is now handled in the device
+ * independent code.
*/
static void enable_edge_ioapic_irq(unsigned int irq)
{
- self_IPI(irq);
enable_IO_APIC_irq(irq);
}
@@ -1008,129 +976,52 @@ static void disable_edge_ioapic_irq(unsigned int irq)
* Starting up a edge-triggered IO-APIC interrupt is
* nasty - we need to make sure that we get the edge.
* If it is already asserted for some reason, we need
- * to fake an edge by marking it IRQ_PENDING..
+ * return 1 to indicate that is was pending.
*
* This is not complete - we should be able to fake
* an edge even if it isn't on the 8259A...
*/
-static void startup_edge_ioapic_irq(unsigned int irq)
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
{
+ int was_pending = 0;
if (irq < 16) {
disable_8259A_irq(irq);
if (i8259A_irq_pending(irq))
- irq_desc[irq].status |= IRQ_PENDING;
+ was_pending = 1;
}
enable_edge_ioapic_irq(irq);
+ return was_pending;
}
#define shutdown_edge_ioapic_irq disable_edge_ioapic_irq
+void static ack_edge_ioapic_irq(unsigned int i)
+{
+ ack_APIC_irq();
+}
+void static end_edge_ioapic_irq(unsigned int i){}
+
/*
* Level triggered interrupts can just be masked,
* and shutting down and starting up the interrupt
- * is the same as enabling and disabling them.
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
*/
-#define startup_level_ioapic_irq unmask_IO_APIC_irq
-#define shutdown_level_ioapic_irq mask_IO_APIC_irq
-#define enable_level_ioapic_irq unmask_IO_APIC_irq
-#define disable_level_ioapic_irq mask_IO_APIC_irq
-
-static void do_edge_ioapic_IRQ(unsigned int irq, struct pt_regs * regs)
+static unsigned int startup_level_ioapic_irq(unsigned int irq)
{
- irq_desc_t *desc = irq_desc + irq;
- struct irqaction * action;
- unsigned int status;
-
- spin_lock(&irq_controller_lock);
-
- /*
- * Edge triggered IRQs can be acknowledged immediately
- * and do not need to be masked.
- */
- ack_APIC_irq();
- status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
- status |= IRQ_PENDING;
-
- /*
- * If the IRQ is disabled for whatever reason, we cannot
- * use the action we have.
- */
- action = NULL;
- if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
- action = desc->action;
- status &= ~IRQ_PENDING;
- status |= IRQ_INPROGRESS;
- }
- desc->status = status;
- spin_unlock(&irq_controller_lock);
-
- /*
- * If there is no IRQ handler or it was disabled, exit early.
- */
- if (!action)
- return;
-
- /*
- * Edge triggered interrupts need to remember
- * pending events.
- */
- for (;;) {
- handle_IRQ_event(irq, regs, action);
-
- spin_lock(&irq_controller_lock);
- if (!(desc->status & IRQ_PENDING))
- break;
- desc->status &= ~IRQ_PENDING;
- spin_unlock(&irq_controller_lock);
- }
- desc->status &= ~IRQ_INPROGRESS;
- spin_unlock(&irq_controller_lock);
+ unmask_IO_APIC_irq(irq);
+ return 0; /* don't check for pending */
}
-static void do_level_ioapic_IRQ(unsigned int irq, struct pt_regs * regs)
+#define shutdown_level_ioapic_irq mask_IO_APIC_irq
+#define enable_level_ioapic_irq unmask_IO_APIC_irq
+#define disable_level_ioapic_irq mask_IO_APIC_irq
+#define end_level_ioapic_irq unmask_IO_APIC_irq
+void static mask_and_ack_level_ioapic_irq(unsigned int i)
{
- irq_desc_t *desc = irq_desc + irq;
- struct irqaction * action;
- unsigned int status;
-
- spin_lock(&irq_controller_lock);
- /*
- * In the level triggered case we first disable the IRQ
- * in the IO-APIC, then we 'early ACK' the IRQ, then we
- * handle it and enable the IRQ when finished.
- *
- * disable has to happen before the ACK, to avoid IRQ storms.
- * So this all has to be within the spinlock.
- */
- mask_IO_APIC_irq(irq);
- status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
-
- /*
- * If the IRQ is disabled for whatever reason, we must
- * not enter the IRQ action.
- */
- action = NULL;
- if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
- action = desc->action;
- status |= IRQ_INPROGRESS;
- }
- desc->status = status;
-
+ mask_IO_APIC_irq(i);
ack_APIC_irq();
- spin_unlock(&irq_controller_lock);
-
- /* Exit early if we had no action or it was disabled */
- if (!action)
- return;
-
- handle_IRQ_event(irq, regs, action);
-
- spin_lock(&irq_controller_lock);
- desc->status &= ~IRQ_INPROGRESS;
- if (!(desc->status & IRQ_DISABLED))
- unmask_IO_APIC_irq(irq);
- spin_unlock(&irq_controller_lock);
}
/*
@@ -1146,18 +1037,20 @@ static struct hw_interrupt_type ioapic_edge_irq_type = {
"IO-APIC-edge",
startup_edge_ioapic_irq,
shutdown_edge_ioapic_irq,
- do_edge_ioapic_IRQ,
enable_edge_ioapic_irq,
- disable_edge_ioapic_irq
+ disable_edge_ioapic_irq,
+ ack_edge_ioapic_irq,
+ end_edge_ioapic_irq
};
static struct hw_interrupt_type ioapic_level_irq_type = {
"IO-APIC-level",
startup_level_ioapic_irq,
shutdown_level_ioapic_irq,
- do_level_ioapic_IRQ,
enable_level_ioapic_irq,
- disable_level_ioapic_irq
+ disable_level_ioapic_irq,
+ mask_and_ack_level_ioapic_irq,
+ end_level_ioapic_irq
};
static inline void init_IO_APIC_traps(void)
@@ -1227,7 +1120,10 @@ static inline void check_timer(void)
if (pin2 != -1) {
printk(".. (found pin %d) ...", pin2);
- setup_ExtINT_pin(pin2, 0);
+ /*
+ * legacy devices should be connected to IO APIC #0
+ */
+ setup_ExtINT_pin(0, pin2, 0);
make_8259A_irq(0);
}
@@ -1238,9 +1134,9 @@ static inline void check_timer(void)
* Just in case ...
*/
if (pin1 != -1)
- clear_IO_APIC_pin(pin1);
+ clear_IO_APIC_pin(0, pin1);
if (pin2 != -1)
- clear_IO_APIC_pin(pin2);
+ clear_IO_APIC_pin(0, pin2);
make_8259A_irq(0);
@@ -1273,29 +1169,8 @@ void __init setup_IO_APIC(void)
{
init_sym_mode();
- /*
- * Determine the range of IRQs handled by the IO-APIC. The
- * following boards can be fully enabled:
- *
- * - whitelisted ones
- * - those which have no PCI pins connected
- * - those for which the user has specified a pirq= parameter
- */
- if ( ioapic_whitelisted() ||
- (nr_ioapic_registers == 16) ||
- pirqs_enabled)
- {
- printk("ENABLING IO-APIC IRQs\n");
- io_apic_irqs = ~PIC_IRQS;
- } else {
- if (ioapic_blacklisted())
- printk(" blacklisted board, DISABLING IO-APIC IRQs\n");
- else
- printk(" unlisted board, DISABLING IO-APIC IRQs\n");
-
- printk(" see Documentation/IO-APIC.txt to enable them\n");
- io_apic_irqs = 0;
- }
+ printk("ENABLING IO-APIC IRQs\n");
+ io_apic_irqs = ~PIC_IRQS;
/*
* If there are no explicit MP IRQ entries, it's either one of the
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 445a26613..070667cbf 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -54,7 +54,8 @@ static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_
*/
asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
- struct thread_struct * t = &current->tss;
+ struct thread_struct * t = &current->thread;
+ struct tss_struct * tss = init_tss + smp_processor_id();
if ((from + num <= from) || (from + num > IO_BITMAP_SIZE*32))
return -EINVAL;
@@ -65,14 +66,24 @@ asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* IO bitmap up. ioperm() is much less timing critical than clone(),
* this is why we delay this operation until now:
*/
-#define IO_BITMAP_OFFSET offsetof(struct thread_struct,io_bitmap)
-
- if (t->bitmap != IO_BITMAP_OFFSET) {
- t->bitmap = IO_BITMAP_OFFSET;
+ if (!t->ioperm) {
+ /*
+ * just in case ...
+ */
memset(t->io_bitmap,0xff,(IO_BITMAP_SIZE+1)*4);
+ t->ioperm = 1;
+ /*
+ * this activates it in the TSS
+ */
+ tss->bitmap = IO_BITMAP_OFFSET;
}
-
- set_bitmap((unsigned long *)t->io_bitmap, from, num, !turn_on);
+
+ /*
+ * do it in the per-thread copy and in the TSS ...
+ */
+ set_bitmap(t->io_bitmap, from, num, !turn_on);
+ set_bitmap(tss->io_bitmap, from, num, !turn_on);
+
return 0;
}
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index ea218fe45..3106f1966 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -1,3 +1,8 @@
+/* mostly architecture independent
+ some moved to i8259.c
+ the beautiful visws architecture code needs to be updated too.
+ and, finally, the BUILD_IRQ and SMP_BUILD macros in irq.h need fixed.
+ */
/*
* linux/arch/i386/kernel/irq.c
*
@@ -15,7 +20,6 @@
* Naturally it's not a 1:1 relation, but there are similarities.
*/
-#include <linux/config.h>
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/kernel_stat.h>
@@ -27,20 +31,19 @@
#include <linux/malloc.h>
#include <linux/random.h>
#include <linux/smp.h>
-#include <linux/tasks.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <asm/system.h>
#include <asm/io.h>
-#include <asm/irq.h>
#include <asm/bitops.h>
#include <asm/smp.h>
#include <asm/pgtable.h>
#include <asm/delay.h>
#include <asm/desc.h>
+#include <asm/irq.h>
+#include <linux/irq.h>
-#include "irq.h"
unsigned int local_bh_count[NR_CPUS];
unsigned int local_irq_count[NR_CPUS];
@@ -68,297 +71,11 @@ atomic_t nmi_counter;
* system. We never hold this lock when we call the actual
* IRQ handler.
*/
-spinlock_t irq_controller_lock;
-
-/*
- * Dummy controller type for unused interrupts
- */
-static void do_none(unsigned int irq, struct pt_regs * regs)
-{
- /*
- * we are careful. While for ISA irqs it's common to happen
- * outside of any driver (think autodetection), this is not
- * at all nice for PCI interrupts. So we are stricter and
- * print a warning when such spurious interrupts happen.
- * Spurious interrupts can confuse other drivers if the PCI
- * IRQ line is shared.
- *
- * Such spurious interrupts are either driver bugs, or
- * sometimes hw (chipset) bugs.
- */
- printk("unexpected IRQ vector %d on CPU#%d!\n",irq, smp_processor_id());
-
-#ifdef __SMP__
- /*
- * [currently unexpected vectors happen only on SMP and APIC.
- * if we want to have non-APIC and non-8259A controllers
- * in the future with unexpected vectors, this ack should
- * probably be made controller-specific.]
- */
- ack_APIC_irq();
-#endif
-}
-static void enable_none(unsigned int irq) { }
-static void disable_none(unsigned int irq) { }
-
-/* startup is the same as "enable", shutdown is same as "disable" */
-#define startup_none enable_none
-#define shutdown_none disable_none
-
-struct hw_interrupt_type no_irq_type = {
- "none",
- startup_none,
- shutdown_none,
- do_none,
- enable_none,
- disable_none
-};
-
-/*
- * This is the 'legacy' 8259A Programmable Interrupt Controller,
- * present in the majority of PC/AT boxes.
- */
-
-static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs);
-static void enable_8259A_irq(unsigned int irq);
-void disable_8259A_irq(unsigned int irq);
-
-/* startup is the same as "enable", shutdown is same as "disable" */
-#define startup_8259A_irq enable_8259A_irq
-#define shutdown_8259A_irq disable_8259A_irq
-
-static struct hw_interrupt_type i8259A_irq_type = {
- "XT-PIC",
- startup_8259A_irq,
- shutdown_8259A_irq,
- do_8259A_IRQ,
- enable_8259A_irq,
- disable_8259A_irq
-};
-
+spinlock_t irq_controller_lock = SPIN_LOCK_UNLOCKED;
/*
* Controller mappings for all interrupt sources:
*/
-irq_desc_t irq_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = { 0, &no_irq_type, }};
-
-
-/*
- * 8259A PIC functions to handle ISA devices:
- */
-
-/*
- * This contains the irq mask for both 8259A irq controllers,
- */
-static unsigned int cached_irq_mask = 0xffff;
-
-#define __byte(x,y) (((unsigned char *)&(y))[x])
-#define cached_21 (__byte(0,cached_irq_mask))
-#define cached_A1 (__byte(1,cached_irq_mask))
-
-/*
- * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
- * boards the timer interrupt is not connected to any IO-APIC pin, it's
- * fed to the CPU IRQ line directly.
- *
- * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs nothing because it's only used
- * at IRQ setup time.
- */
-unsigned long io_apic_irqs = 0;
-
-/*
- * These have to be protected by the irq controller spinlock
- * before being called.
- */
-void disable_8259A_irq(unsigned int irq)
-{
- unsigned int mask = 1 << irq;
- cached_irq_mask |= mask;
- if (irq & 8) {
- outb(cached_A1,0xA1);
- } else {
- outb(cached_21,0x21);
- }
-}
-
-static void enable_8259A_irq(unsigned int irq)
-{
- unsigned int mask = ~(1 << irq);
- cached_irq_mask &= mask;
- if (irq & 8) {
- outb(cached_A1,0xA1);
- } else {
- outb(cached_21,0x21);
- }
-}
-
-int i8259A_irq_pending(unsigned int irq)
-{
- unsigned int mask = 1<<irq;
-
- if (irq < 8)
- return (inb(0x20) & mask);
- return (inb(0xA0) & (mask >> 8));
-}
-
-void make_8259A_irq(unsigned int irq)
-{
- disable_irq_nosync(irq);
- io_apic_irqs &= ~(1<<irq);
- irq_desc[irq].handler = &i8259A_irq_type;
- enable_irq(irq);
-}
-
-/*
- * Careful! The 8259A is a fragile beast, it pretty
- * much _has_ to be done exactly like this (mask it
- * first, _then_ send the EOI, and the order of EOI
- * to the two 8259s is important!
- */
-static inline void mask_and_ack_8259A(unsigned int irq)
-{
- cached_irq_mask |= 1 << irq;
- if (irq & 8) {
- inb(0xA1); /* DUMMY */
- outb(cached_A1,0xA1);
- outb(0x62,0x20); /* Specific EOI to cascade */
- outb(0x20,0xA0);
- } else {
- inb(0x21); /* DUMMY */
- outb(cached_21,0x21);
- outb(0x20,0x20);
- }
-}
-
-static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs)
-{
- struct irqaction * action;
- irq_desc_t *desc = irq_desc + irq;
-
- spin_lock(&irq_controller_lock);
- {
- unsigned int status;
- mask_and_ack_8259A(irq);
- status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
- action = NULL;
- if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
- action = desc->action;
- status |= IRQ_INPROGRESS;
- }
- desc->status = status;
- }
- spin_unlock(&irq_controller_lock);
-
- /* Exit early if we had no action or it was disabled */
- if (!action)
- return;
-
- handle_IRQ_event(irq, regs, action);
-
- spin_lock(&irq_controller_lock);
- {
- unsigned int status = desc->status & ~IRQ_INPROGRESS;
- desc->status = status;
- if (!(status & IRQ_DISABLED))
- enable_8259A_irq(irq);
- }
- spin_unlock(&irq_controller_lock);
-}
-
-/*
- * This builds up the IRQ handler stubs using some ugly macros in irq.h
- *
- * These macros create the low-level assembly IRQ routines that save
- * register context and call do_IRQ(). do_IRQ() then does all the
- * operations that are needed to keep the AT (or SMP IOAPIC)
- * interrupt-controller happy.
- */
-
-
-BUILD_COMMON_IRQ()
-
-#define BI(x,y) \
- BUILD_IRQ(##x##y)
-
-#define BUILD_16_IRQS(x) \
- BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
- BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
- BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
- BI(x,c) BI(x,d) BI(x,e) BI(x,f)
-
-/*
- * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
- * (these are usually mapped to vectors 0x20-0x30)
- */
-BUILD_16_IRQS(0x0)
-
-#ifdef CONFIG_X86_IO_APIC
-/*
- * The IO-APIC gives us many more interrupt sources. Most of these
- * are unused but an SMP system is supposed to have enough memory ...
- * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
- * across the spectrum, so we really want to be prepared to get all
- * of these. Plus, more powerful systems might have more than 64
- * IO-APIC registers.
- *
- * (these are usually mapped into the 0x30-0xff vector range)
- */
- BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
-BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
-BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
-BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
-#endif
-
-#undef BUILD_16_IRQS
-#undef BI
-
-
-#ifdef __SMP__
-/*
- * The following vectors are part of the Linux architecture, there
- * is no hardware IRQ pin equivalent for them, they are triggered
- * through the ICC by us (IPIs)
- */
-BUILD_SMP_INTERRUPT(reschedule_interrupt)
-BUILD_SMP_INTERRUPT(invalidate_interrupt)
-BUILD_SMP_INTERRUPT(stop_cpu_interrupt)
-BUILD_SMP_INTERRUPT(call_function_interrupt)
-BUILD_SMP_INTERRUPT(spurious_interrupt)
-
-/*
- * every pentium local APIC has two 'local interrupts', with a
- * soft-definable vector attached to both interrupts, one of
- * which is a timer interrupt, the other one is error counter
- * overflow. Linux uses the local APIC timer interrupt to get
- * a much simpler SMP time architecture:
- */
-BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt)
-
-#endif
-
-#define IRQ(x,y) \
- IRQ##x##y##_interrupt
-
-#define IRQLIST_16(x) \
- IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
- IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
- IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
- IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-
-static void (*interrupt[NR_IRQS])(void) = {
- IRQLIST_16(0x0),
-
-#ifdef CONFIG_X86_IO_APIC
- IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
- IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
- IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
- IRQLIST_16(0xc), IRQLIST_16(0xd)
-#endif
-};
-
-#undef IRQ
-#undef IRQLIST_16
-
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = { [0 ... NR_IRQS-1] = { 0, &no_irq_type, }};
/*
* Special irq handlers.
@@ -366,36 +83,6 @@ static void (*interrupt[NR_IRQS])(void) = {
void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
-#ifndef CONFIG_VISWS
-/*
- * Note that on a 486, we don't want to do a SIGFPE on an irq13
- * as the irq is unreliable, and exception 16 works correctly
- * (ie as explained in the intel literature). On a 386, you
- * can't use exception 16 due to bad IBM design, so we have to
- * rely on the less exact irq13.
- *
- * Careful.. Not only is IRQ13 unreliable, but it is also
- * leads to races. IBM designers who came up with it should
- * be shot.
- */
-
-static void math_error_irq(int cpl, void *dev_id, struct pt_regs *regs)
-{
- outb(0,0xF0);
- if (ignore_irq13 || !boot_cpu_data.hard_math)
- return;
- math_error();
-}
-
-static struct irqaction irq13 = { math_error_irq, 0, 0, "fpu", NULL, NULL };
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-
-static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
-#endif
-
/*
* Generic, controller-independent functions:
*/
@@ -438,10 +125,13 @@ int get_irq_list(char *buf)
return p - buf;
}
+
/*
* Global interrupt locks for SMP. Allow interrupts to come in on any
* CPU, yet make cli/sti act globally to protect critical regions..
*/
+spinlock_t i386_bh_lock = SPIN_LOCK_UNLOCKED;
+
#ifdef __SMP__
unsigned char global_irq_holder = NO_PROC_ID;
unsigned volatile int global_irq_lock;
@@ -461,7 +151,10 @@ atomic_t global_bh_lock;
static inline void check_smp_invalidate(int cpu)
{
if (test_bit(cpu, &smp_invalidate_needed)) {
+ struct mm_struct *mm = current->mm;
clear_bit(cpu, &smp_invalidate_needed);
+ if (mm)
+ atomic_set_mask(1 << cpu, &mm->cpu_vm_mask);
local_flush_tlb();
}
}
@@ -471,7 +164,6 @@ static void show(char * str)
int i;
unsigned long *stack;
int cpu = smp_processor_id();
- extern char *get_options(char *str, int *ints);
printk("\n%s, CPU %d:\n", str, cpu);
printk("irq: %d [%d %d]\n",
@@ -481,7 +173,7 @@ static void show(char * str)
stack = (unsigned long *) &stack;
for (i = 40; i ; i--) {
unsigned long x = *++stack;
- if (x > (unsigned long) &get_options && x < (unsigned long) &vsprintf) {
+ if (x > (unsigned long) &get_option && x < (unsigned long) &vsprintf) {
printk("<[%08lx]> ", x);
}
}
@@ -782,10 +474,16 @@ void enable_irq(unsigned int irq)
spin_lock_irqsave(&irq_controller_lock, flags);
switch (irq_desc[irq].depth) {
- case 1:
- irq_desc[irq].status &= ~IRQ_DISABLED;
+ case 1: {
+ unsigned int status = irq_desc[irq].status & ~IRQ_DISABLED;
+ irq_desc[irq].status = status;
+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ irq_desc[irq].status = status | IRQ_REPLAY;
+ hw_resend_irq(irq_desc[irq].handler,irq);
+ }
irq_desc[irq].handler->enable(irq);
- /* fall throught */
+ /* fall-through */
+ }
default:
irq_desc[irq].depth--;
break;
@@ -801,7 +499,7 @@ void enable_irq(unsigned int irq)
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-asmlinkage void do_IRQ(struct pt_regs regs)
+asmlinkage unsigned int do_IRQ(struct pt_regs regs)
{
/*
* We ack quickly, we don't want the irq controller
@@ -813,76 +511,81 @@ asmlinkage void do_IRQ(struct pt_regs regs)
* 0 return value means that this irq is already being
* handled by some other CPU. (or is disabled)
*/
- int irq = regs.orig_eax & 0xff; /* subtle, see irq.h */
+ int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */
int cpu = smp_processor_id();
+ irq_desc_t *desc;
+ struct irqaction * action;
+ unsigned int status;
kstat.irqs[cpu][irq]++;
- irq_desc[irq].handler->handle(irq, &regs);
+ desc = irq_desc + irq;
+ spin_lock(&irq_controller_lock);
+ irq_desc[irq].handler->ack(irq);
+ /*
+ REPLAY is when Linux resends an IRQ that was dropped earlier
+ WAITING is used by probe to mark irqs that are being tested
+ */
+ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+ status |= IRQ_PENDING; /* we _want_ to handle it */
/*
- * This should be conditional: we should really get
- * a return code from the irq handler to tell us
- * whether the handler wants us to do software bottom
- * half handling or not..
+ * If the IRQ is disabled for whatever reason, we cannot
+ * use the action we have.
*/
- if (1) {
- if (bh_active & bh_mask)
- do_bottom_half();
+ action = NULL;
+ if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+ action = desc->action;
+ status &= ~IRQ_PENDING; /* we commit to handling */
+ status |= IRQ_INPROGRESS; /* we are handling it */
}
-}
-
-int setup_x86_irq(unsigned int irq, struct irqaction * new)
-{
- int shared = 0;
- struct irqaction *old, **p;
- unsigned long flags;
+ desc->status = status;
+ spin_unlock(&irq_controller_lock);
/*
- * Some drivers like serial.c use request_irq() heavily,
- * so we have to be careful not to interfere with a
- * running system.
+ * If there is no IRQ handler or it was disabled, exit early.
+ Since we set PENDING, if another processor is handling
+ a different instance of this same irq, the other processor
+ will take care of it.
*/
- if (new->flags & SA_SAMPLE_RANDOM) {
- /*
- * This function might sleep, we want to call it first,
- * outside of the atomic block.
- * Yes, this might clear the entropy pool if the wrong
- * driver is attempted to be loaded, without actually
- * installing a new handler, but is this really a problem,
- * only the sysadmin is able to do this.
- */
- rand_initialize_irq(irq);
- }
+ if (!action)
+ return 1;
/*
- * The following block of code has to be executed atomically
+ * Edge triggered interrupts need to remember
+ * pending events.
+ * This applies to any hw interrupts that allow a second
+ * instance of the same irq to arrive while we are in do_IRQ
+ * or in the handler. But the code here only handles the _second_
+ * instance of the irq, not the third or fourth. So it is mostly
+ * useful for irq hardware that does not mask cleanly in an
+ * SMP environment.
*/
- spin_lock_irqsave(&irq_controller_lock,flags);
- p = &irq_desc[irq].action;
- if ((old = *p) != NULL) {
- /* Can't share interrupts unless both agree to */
- if (!(old->flags & new->flags & SA_SHIRQ)) {
- spin_unlock_irqrestore(&irq_controller_lock,flags);
- return -EBUSY;
- }
-
- /* add new interrupt at end of irq queue */
- do {
- p = &old->next;
- old = *p;
- } while (old);
- shared = 1;
+ for (;;) {
+ handle_IRQ_event(irq, &regs, action);
+ spin_lock(&irq_controller_lock);
+
+ if (!(desc->status & IRQ_PENDING))
+ break;
+ desc->status &= ~IRQ_PENDING;
+ spin_unlock(&irq_controller_lock);
}
+ desc->status &= ~IRQ_INPROGRESS;
+ if (!(desc->status & IRQ_DISABLED)){
+ irq_desc[irq].handler->end(irq);
+ }
+ spin_unlock(&irq_controller_lock);
- *p = new;
-
- if (!shared) {
- irq_desc[irq].depth = 0;
- irq_desc[irq].status &= ~IRQ_DISABLED;
- irq_desc[irq].handler->startup(irq);
+ /*
+ * This should be conditional: we should really get
+ * a return code from the irq handler to tell us
+ * whether the handler wants us to do software bottom
+ * half handling or not..
+ */
+ if (1) {
+ if (bh_active & bh_mask)
+ do_bottom_half();
}
- spin_unlock_irqrestore(&irq_controller_lock,flags);
- return 0;
+ return 1;
}
int request_irq(unsigned int irq,
@@ -911,8 +614,7 @@ int request_irq(unsigned int irq,
action->next = NULL;
action->dev_id = dev_id;
- retval = setup_x86_irq(irq, action);
-
+ retval = setup_irq(irq, action);
if (retval)
kfree(action);
return retval;
@@ -920,29 +622,40 @@ int request_irq(unsigned int irq,
void free_irq(unsigned int irq, void *dev_id)
{
- struct irqaction * action, **p;
+ struct irqaction **p;
unsigned long flags;
if (irq >= NR_IRQS)
return;
spin_lock_irqsave(&irq_controller_lock,flags);
- for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
- if (action->dev_id != dev_id)
- continue;
+ p = &irq_desc[irq].action;
+ for (;;) {
+ struct irqaction * action = *p;
+ if (action) {
+ struct irqaction **pp = p;
+ p = &action->next;
+ if (action->dev_id != dev_id)
+ continue;
- /* Found it - now free it */
- *p = action->next;
- kfree(action);
- if (!irq_desc[irq].action) {
- irq_desc[irq].status |= IRQ_DISABLED;
- irq_desc[irq].handler->shutdown(irq);
+ /* Found it - now remove it from the list of entries */
+ *pp = action->next;
+ if (!irq_desc[irq].action) {
+ irq_desc[irq].status |= IRQ_DISABLED;
+ irq_desc[irq].handler->shutdown(irq);
+ }
+ spin_unlock_irqrestore(&irq_controller_lock,flags);
+
+ /* Wait to make sure it's not being used on another CPU */
+ while (irq_desc[irq].status & IRQ_INPROGRESS)
+ barrier();
+ kfree(action);
+ return;
}
- goto out;
+ printk("Trying to free free IRQ%d\n",irq);
+ spin_unlock_irqrestore(&irq_controller_lock,flags);
+ return;
}
- printk("Trying to free free IRQ%d\n",irq);
-out:
- spin_unlock_irqrestore(&irq_controller_lock,flags);
}
/*
@@ -965,7 +678,8 @@ unsigned long probe_irq_on(void)
for (i = NR_IRQS-1; i > 0; i--) {
if (!irq_desc[i].action) {
irq_desc[i].status |= IRQ_AUTODETECT | IRQ_WAITING;
- irq_desc[i].handler->startup(i);
+ if(irq_desc[i].handler->startup(i))
+ irq_desc[i].status |= IRQ_PENDING;
}
}
spin_unlock_irq(&irq_controller_lock);
@@ -1028,102 +742,58 @@ int probe_irq_off(unsigned long unused)
return irq_found;
}
-void init_ISA_irqs (void)
+/* this was setup_x86_irq but it seems pretty generic */
+int setup_irq(unsigned int irq, struct irqaction * new)
{
- int i;
-
- for (i = 0; i < NR_IRQS; i++) {
- irq_desc[i].status = IRQ_DISABLED;
- irq_desc[i].action = 0;
- irq_desc[i].depth = 0;
-
- if (i < 16) {
- /*
- * 16 old-style INTA-cycle interrupts:
- */
- irq_desc[i].handler = &i8259A_irq_type;
- } else {
- /*
- * 'high' PCI IRQs filled in on demand
- */
- irq_desc[i].handler = &no_irq_type;
- }
- }
-}
-
-__initfunc(void init_IRQ(void))
-{
- int i;
+ int shared = 0;
+ struct irqaction *old, **p;
+ unsigned long flags;
-#ifndef CONFIG_X86_VISWS_APIC
- init_ISA_irqs();
-#else
- init_VISWS_APIC_irqs();
-#endif
/*
- * Cover the whole vector space, no vector can escape
- * us. (some of these will be overridden and become
- * 'special' SMP interrupts)
+ * Some drivers like serial.c use request_irq() heavily,
+ * so we have to be careful not to interfere with a
+ * running system.
*/
- for (i = 0; i < NR_IRQS; i++) {
- int vector = FIRST_EXTERNAL_VECTOR + i;
- if (vector != SYSCALL_VECTOR)
- set_intr_gate(vector, interrupt[i]);
+ if (new->flags & SA_SAMPLE_RANDOM) {
+ /*
+ * This function might sleep, we want to call it first,
+ * outside of the atomic block.
+ * Yes, this might clear the entropy pool if the wrong
+ * driver is attempted to be loaded, without actually
+ * installing a new handler, but is this really a problem,
+ * only the sysadmin is able to do this.
+ */
+ rand_initialize_irq(irq);
}
-#ifdef __SMP__
-
/*
- IRQ0 must be given a fixed assignment and initialized
- before init_IRQ_SMP.
- */
- set_intr_gate(IRQ0_TRAP_VECTOR, interrupt[0]);
-
- /*
- * The reschedule interrupt is a CPU-to-CPU reschedule-helper
- * IPI, driven by wakeup.
+ * The following block of code has to be executed atomically
*/
- set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
- /* IPI for invalidation */
- set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
-
- /* IPI for CPU halt */
- set_intr_gate(STOP_CPU_VECTOR, stop_cpu_interrupt);
-
- /* self generated IPI for local APIC timer */
- set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
- /* IPI for generic function call */
- set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
- /* IPI vector for APIC spurious interrupts */
- set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-#endif
- request_region(0x20,0x20,"pic1");
- request_region(0xa0,0x20,"pic2");
+ spin_lock_irqsave(&irq_controller_lock,flags);
+ p = &irq_desc[irq].action;
+ if ((old = *p) != NULL) {
+ /* Can't share interrupts unless both agree to */
+ if (!(old->flags & new->flags & SA_SHIRQ)) {
+ spin_unlock_irqrestore(&irq_controller_lock,flags);
+ return -EBUSY;
+ }
- /*
- * Set the clock to 100 Hz, we already have a valid
- * vector now:
- */
- outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(LATCH & 0xff , 0x40); /* LSB */
- outb(LATCH >> 8 , 0x40); /* MSB */
+ /* add new interrupt at end of irq queue */
+ do {
+ p = &old->next;
+ old = *p;
+ } while (old);
+ shared = 1;
+ }
-#ifndef CONFIG_VISWS
- setup_x86_irq(2, &irq2);
- setup_x86_irq(13, &irq13);
-#endif
-}
+ *p = new;
-#ifdef CONFIG_X86_IO_APIC
-__initfunc(void init_IRQ_SMP(void))
-{
- int i;
- for (i = 0; i < NR_IRQS ; i++)
- if (IO_APIC_VECTOR(i) > 0)
- set_intr_gate(IO_APIC_VECTOR(i), interrupt[i]);
+ if (!shared) {
+ irq_desc[irq].depth = 0;
+ irq_desc[irq].status &= ~IRQ_DISABLED;
+ irq_desc[irq].handler->startup(irq);
+ }
+ spin_unlock_irqrestore(&irq_controller_lock,flags);
+ return 0;
}
-#endif
diff --git a/arch/i386/kernel/irq.h b/arch/i386/kernel/irq.h
deleted file mode 100644
index 1023cd4da..000000000
--- a/arch/i386/kernel/irq.h
+++ /dev/null
@@ -1,255 +0,0 @@
-#ifndef __irq_h
-#define __irq_h
-
-#include <asm/irq.h>
-
-/*
- * Interrupt controller descriptor. This is all we need
- * to describe about the low-level hardware.
- */
-struct hw_interrupt_type {
- const char * typename;
- void (*startup)(unsigned int irq);
- void (*shutdown)(unsigned int irq);
- void (*handle)(unsigned int irq, struct pt_regs * regs);
- void (*enable)(unsigned int irq);
- void (*disable)(unsigned int irq);
-};
-
-extern struct hw_interrupt_type no_irq_type;
-
-/*
- * IRQ line status.
- */
-#define IRQ_INPROGRESS 1 /* IRQ handler active - do not enter! */
-#define IRQ_DISABLED 2 /* IRQ disabled - do not enter! */
-#define IRQ_PENDING 4 /* IRQ pending - replay on enable */
-#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */
-#define IRQ_AUTODETECT 16 /* IRQ is being autodetected */
-#define IRQ_WAITING 32 /* IRQ not yet seen - for autodetection */
-
-/*
- * This is the "IRQ descriptor", which contains various information
- * about the irq, including what kind of hardware handling it has,
- * whether it is disabled etc etc.
- *
- * Pad this out to 32 bytes for cache and indexing reasons.
- */
-typedef struct {
- unsigned int status; /* IRQ status - IRQ_INPROGRESS, IRQ_DISABLED */
- struct hw_interrupt_type *handler; /* handle/enable/disable functions */
- struct irqaction *action; /* IRQ action list */
- unsigned int depth; /* Disable depth for nested irq disables */
-} irq_desc_t;
-
-/*
- * IDT vectors usable for external interrupt sources start
- * at 0x20:
- */
-#define FIRST_EXTERNAL_VECTOR 0x20
-
-#define SYSCALL_VECTOR 0x80
-
-/*
- * Vectors 0x20-0x2f are used for ISA interrupts.
- */
-
-/*
- * Special IRQ vectors used by the SMP architecture:
- *
- * (some of the following vectors are 'rare', they might be merged
- * into a single vector to save vector space. TLB, reschedule and
- * local APIC vectors are performance-critical.)
- */
-#define RESCHEDULE_VECTOR 0x30
-#define INVALIDATE_TLB_VECTOR 0x31
-#define STOP_CPU_VECTOR 0x40
-#define LOCAL_TIMER_VECTOR 0x41
-#define CALL_FUNCTION_VECTOR 0x50
-
-/*
- * First APIC vector available to drivers: (vectors 0x51-0xfe)
- */
-#define IRQ0_TRAP_VECTOR 0x51
-
-/*
- * This IRQ should never happen, but we print a message nevertheless.
- */
-#define SPURIOUS_APIC_VECTOR 0xff
-
-extern irq_desc_t irq_desc[NR_IRQS];
-extern int irq_vector[NR_IRQS];
-#define IO_APIC_VECTOR(irq) irq_vector[irq]
-
-extern void init_IRQ_SMP(void);
-extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
-extern int setup_x86_irq(unsigned int, struct irqaction *);
-
-/*
- * Various low-level irq details needed by irq.c, process.c,
- * time.c, io_apic.c and smp.c
- *
- * Interrupt entry/exit code at both C and assembly level
- */
-
-extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
-extern void mask_irq(unsigned int irq);
-extern void unmask_irq(unsigned int irq);
-extern void disable_8259A_irq(unsigned int irq);
-extern int i8259A_irq_pending(unsigned int irq);
-extern void ack_APIC_irq(void);
-extern void FASTCALL(send_IPI_self(int vector));
-extern void init_VISWS_APIC_irqs(void);
-extern void setup_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
-extern void make_8259A_irq(unsigned int irq);
-extern void send_IPI(int dest, int vector);
-extern void init_pic_mode(void);
-extern void print_IO_APIC(void);
-
-extern unsigned long io_apic_irqs;
-
-extern char _stext, _etext;
-
-#define MAX_IRQ_SOURCES 128
-#define MAX_MP_BUSSES 32
-enum mp_bustype {
- MP_BUS_ISA,
- MP_BUS_EISA,
- MP_BUS_PCI
-};
-extern int mp_bus_id_to_type [MAX_MP_BUSSES];
-extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
-extern char ioapic_OEM_ID [16];
-extern char ioapic_Product_ID [16];
-
-extern spinlock_t irq_controller_lock;
-
-#ifdef __SMP__
-
-#include <asm/atomic.h>
-
-static inline void irq_enter(int cpu, unsigned int irq)
-{
- hardirq_enter(cpu);
- while (test_bit(0,&global_irq_lock)) {
- /* nothing */;
- }
-}
-
-static inline void irq_exit(int cpu, unsigned int irq)
-{
- hardirq_exit(cpu);
-}
-
-#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
-
-#else
-
-#define irq_enter(cpu, irq) (++local_irq_count[cpu])
-#define irq_exit(cpu, irq) (--local_irq_count[cpu])
-
-#define IO_APIC_IRQ(x) (0)
-
-#endif
-
-#define __STR(x) #x
-#define STR(x) __STR(x)
-
-#define SAVE_ALL \
- "cld\n\t" \
- "pushl %es\n\t" \
- "pushl %ds\n\t" \
- "pushl %eax\n\t" \
- "pushl %ebp\n\t" \
- "pushl %edi\n\t" \
- "pushl %esi\n\t" \
- "pushl %edx\n\t" \
- "pushl %ecx\n\t" \
- "pushl %ebx\n\t" \
- "movl $" STR(__KERNEL_DS) ",%edx\n\t" \
- "movl %dx,%ds\n\t" \
- "movl %dx,%es\n\t"
-
-#define IRQ_NAME2(nr) nr##_interrupt(void)
-#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
-
-#define GET_CURRENT \
- "movl %esp, %ebx\n\t" \
- "andl $-8192, %ebx\n\t"
-
-#ifdef __SMP__
-
-/*
- * SMP has a few special interrupts for IPI messages
- */
-
-#define BUILD_SMP_INTERRUPT(x) \
-asmlinkage void x(void); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(x) ":\n\t" \
- "pushl $-1\n\t" \
- SAVE_ALL \
- "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
- "jmp ret_from_intr\n");
-
-#define BUILD_SMP_TIMER_INTERRUPT(x) \
-asmlinkage void x(struct pt_regs * regs); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(x) ":\n\t" \
- "pushl $-1\n\t" \
- SAVE_ALL \
- "movl %esp,%eax\n\t" \
- "pushl %eax\n\t" \
- "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
- "addl $4,%esp\n\t" \
- "jmp ret_from_intr\n");
-
-#endif /* __SMP__ */
-
-#define BUILD_COMMON_IRQ() \
-__asm__( \
- "\n" __ALIGN_STR"\n" \
- "common_interrupt:\n\t" \
- SAVE_ALL \
- "pushl $ret_from_intr\n\t" \
- "jmp "SYMBOL_NAME_STR(do_IRQ));
-
-/*
- * subtle. orig_eax is used by the signal code to distinct between
- * system calls and interrupted 'random user-space'. Thus we have
- * to put a negative value into orig_eax here. (the problem is that
- * both system calls and IRQs want to have small integer numbers in
- * orig_eax, and the syscall code has won the optimization conflict ;)
- */
-#define BUILD_IRQ(nr) \
-asmlinkage void IRQ_NAME(nr); \
-__asm__( \
-"\n"__ALIGN_STR"\n" \
-SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
- "pushl $"#nr"-256\n\t" \
- "jmp common_interrupt");
-
-/*
- * x86 profiling function, SMP safe. We might want to do this in
- * assembly totally?
- */
-static inline void x86_do_profile (unsigned long eip)
-{
- if (prof_buffer) {
- eip -= (unsigned long) &_stext;
- eip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds EIP values silently,
- * put them into the last histogram slot, so if
- * present, they will show up as a sharp peak.
- */
- if (eip > prof_len-1)
- eip = prof_len-1;
- atomic_inc((atomic_t *)&prof_buffer[eip]);
- }
-}
-
-#endif
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
index 25e8deec4..1c359b4f4 100644
--- a/arch/i386/kernel/ldt.c
+++ b/arch/i386/kernel/ldt.c
@@ -2,6 +2,7 @@
* linux/kernel/ldt.c
*
* Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
*/
#include <linux/errno.h>
@@ -17,19 +18,31 @@
#include <asm/ldt.h>
#include <asm/desc.h>
+/*
+ * read_ldt() is not really atomic - this is not a problem since
+ * synchronization of reads and writes done to the LDT has to be
+ * assured by user-space anyway. Writes are atomic, to protect
+ * the security checks done on new descriptors.
+ */
static int read_ldt(void * ptr, unsigned long bytecount)
{
- void * address = current->mm->segments;
+ int err;
unsigned long size;
+ struct mm_struct * mm = current->mm;
+
+ err = 0;
+ if (!mm->segments)
+ goto out;
- if (!ptr)
- return -EINVAL;
- if (!address)
- return 0;
size = LDT_ENTRIES*LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;
- return copy_to_user(ptr, address, size) ? -EFAULT : size;
+
+ err = size;
+ if (copy_to_user(ptr, mm->segments, size))
+ err = -EFAULT;
+out:
+ return err;
}
static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
@@ -64,31 +77,30 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
* you get strange behaviour (the kernel is safe, it's just user
* space strangeness).
*
- * For no good reason except historical, the GDT index of the LDT
- * is chosen to follow the index number in the task[] array.
+ * we have two choices: either we preallocate the LDT descriptor
+ * and can do a shared modify_ldt(), or we postallocate it and do
+ * an smp message pass to update it. Currently we are a bit
+ * un-nice to user-space and reload the LDT only on the next
+ * schedule. (only an issue on SMP)
+ *
+ * the GDT index of the LDT is allocated dynamically, and is
+ * limited by MAX_LDT_DESCRIPTORS.
*/
+ down(&mm->mmap_sem);
if (!mm->segments) {
- void * ldt;
+
error = -ENOMEM;
- ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
- if (!ldt)
- goto out;
- memset(ldt, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ mm->segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+ if (!mm->segments)
+ goto out_unlock;
+
+ if (atomic_read(&mm->mm_users) > 1)
+ printk(KERN_WARNING "LDT allocated for cloned task!\n");
/*
- * Make sure someone else hasn't allocated it for us ...
+ * Possibly do an SMP cross-call to other CPUs to reload
+ * their LDTs?
*/
- if (!mm->segments) {
- int i = current->tarray_ptr - &task[0];
- mm->segments = ldt;
- set_ldt_desc(i, ldt, LDT_ENTRIES);
- current->tss.ldt = _LDT(i);
- load_ldt(i);
- if (atomic_read(&mm->count) > 1)
- printk(KERN_WARNING
- "LDT allocated for cloned task!\n");
- } else {
- vfree(ldt);
- }
+ load_LDT(mm);
}
lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->segments);
@@ -127,6 +139,9 @@ install:
*lp = entry_1;
*(lp+1) = entry_2;
error = 0;
+
+out_unlock:
+ up(&mm->mmap_sem);
out:
return error;
}
@@ -135,7 +150,6 @@ asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
{
int ret = -ENOSYS;
- lock_kernel();
switch (func) {
case 0:
ret = read_ldt(ptr, bytecount);
@@ -147,6 +161,5 @@ asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
ret = write_ldt(ptr, bytecount, 0);
break;
}
- unlock_kernel();
return ret;
}
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c
index 8bfd7fa45..792cc8c0d 100644
--- a/arch/i386/kernel/mca.c
+++ b/arch/i386/kernel/mca.c
@@ -210,7 +210,19 @@ static void mca_configure_adapter_status(int slot) {
/*--------------------------------------------------------------------*/
-__initfunc(void mca_init(void))
+struct resource mca_standard_resources[] = {
+ { "system control port B (MCA)", 0x60, 0x60 },
+ { "arbitration (MCA)", 0x90, 0x90 },
+ { "card Select Feedback (MCA)", 0x91, 0x91 },
+ { "system Control port A (MCA)", 0x92, 0x92 },
+ { "system board setup (MCA)", 0x94, 0x94 },
+ { "POS (MCA)", 0x96, 0x97 },
+ { "POS (MCA)", 0x100, 0x107 }
+};
+
+#define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource))
+
+void __init mca_init(void)
{
unsigned int i, j;
unsigned long flags;
@@ -319,13 +331,8 @@ __initfunc(void mca_init(void))
restore_flags(flags);
- request_region(0x60,0x01,"system control port B (MCA)");
- request_region(0x90,0x01,"arbitration (MCA)");
- request_region(0x91,0x01,"card Select Feedback (MCA)");
- request_region(0x92,0x01,"system Control port A (MCA)");
- request_region(0x94,0x01,"system board setup (MCA)");
- request_region(0x96,0x02,"POS (MCA)");
- request_region(0x100,0x08,"POS (MCA)");
+ for (i = 0; i < MCA_STANDARD_RESOURCES; i++)
+ request_resource(&ioport_resource, mca_standard_resources + i);
#ifdef CONFIG_PROC_FS
mca_do_proc_init();
@@ -691,7 +698,7 @@ int get_mca_info(char *buf)
/*--------------------------------------------------------------------*/
-__initfunc(void mca_do_proc_init(void))
+void __init mca_do_proc_init(void)
{
int i;
struct proc_dir_entry* node = NULL;
diff --git a/arch/i386/kernel/mtrr.c b/arch/i386/kernel/mtrr.c
index 084ad431c..f76c68f59 100644
--- a/arch/i386/kernel/mtrr.c
+++ b/arch/i386/kernel/mtrr.c
@@ -201,6 +201,28 @@
19990512 Richard Gooch <rgooch@atnf.csiro.au>
Minor cleanups.
v1.35
+ 19990707 Zoltan Boszormenyi <zboszor@mol.hu>
+ Check whether ARR3 is protected in cyrix_get_free_region()
+ and mtrr_del(). The code won't attempt to delete or change it
+ from now on if the BIOS protected ARR3. It silently skips ARR3
+ in cyrix_get_free_region() or returns with an error code from
+ mtrr_del().
+ 19990711 Zoltan Boszormenyi <zboszor@mol.hu>
+ Reset some bits in the CCRs in cyrix_arr_init() to disable SMM
+ if ARR3 isn't protected. This is needed because if SMM is active
+ and ARR3 isn't protected then deleting and setting ARR3 again
+ may lock up the processor. With SMM entirely disabled, it does
+ not happen.
+ 19990812 Zoltan Boszormenyi <zboszor@mol.hu>
+ Rearrange switch() statements so the driver accomodates to
+ the fact that the AMD Athlon handles its MTRRs the same way
+ as Intel does.
+ 19990814 Zoltan Boszormenyi <zboszor@mol.hu>
+ Double check for Intel in mtrr_add()'s big switch() because
+ that revision check is only valid for Intel CPUs.
+ 19990819 Alan Cox <alan@redhat.com>
+ Tested Zoltan's changes on a pre production Athlon - 100%
+ success.
*/
#include <linux/types.h>
#include <linux/errno.h>
@@ -235,7 +257,7 @@
#include <asm/msr.h>
#include <asm/hardirq.h>
-#include "irq.h"
+#include <linux/irq.h>
#define MTRR_VERSION "1.35 (19990512)"
@@ -309,6 +331,7 @@ struct set_mtrr_context
unsigned long ccr3;
};
+static int arr3_protected;
/* Put the processor into a state where MTRRs can be safely set */
static void set_mtrr_prepare (struct set_mtrr_context *ctxt)
@@ -321,6 +344,8 @@ static void set_mtrr_prepare (struct set_mtrr_context *ctxt)
switch (boot_cpu_data.x86_vendor)
{
case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 >= 6) break; /* Athlon and post-Athlon CPUs */
+ /* else fall through */
case X86_VENDOR_CENTAUR:
return;
/*break;*/
@@ -344,6 +369,7 @@ static void set_mtrr_prepare (struct set_mtrr_context *ctxt)
switch (boot_cpu_data.x86_vendor)
{
+ case X86_VENDOR_AMD:
case X86_VENDOR_INTEL:
/* Disable MTRRs, and set the default type to uncached */
rdmsr (MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
@@ -365,6 +391,8 @@ static void set_mtrr_done (struct set_mtrr_context *ctxt)
switch (boot_cpu_data.x86_vendor)
{
case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 >= 6) break; /* Athlon and post-Athlon CPUs */
+ /* else fall through */
case X86_VENDOR_CENTAUR:
__restore_flags (ctxt->flags);
return;
@@ -376,6 +404,7 @@ static void set_mtrr_done (struct set_mtrr_context *ctxt)
/* Restore MTRRdefType */
switch (boot_cpu_data.x86_vendor)
{
+ case X86_VENDOR_AMD:
case X86_VENDOR_INTEL:
wrmsr (MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
break;
@@ -406,6 +435,9 @@ static unsigned int get_num_var_ranges (void)
switch (boot_cpu_data.x86_vendor)
{
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 < 6) return 2; /* pre-Athlon CPUs */
+ /* else fall through */
case X86_VENDOR_INTEL:
rdmsr (MTRRcap_MSR, config, dummy);
return (config & 0xff);
@@ -416,9 +448,6 @@ static unsigned int get_num_var_ranges (void)
/* and Centaur has 8 MCR's */
return 8;
/*break;*/
- case X86_VENDOR_AMD:
- return 2;
- /*break;*/
}
return 0;
} /* End Function get_num_var_ranges */
@@ -430,12 +459,14 @@ static int have_wrcomb (void)
switch (boot_cpu_data.x86_vendor)
{
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 < 6) return 1; /* pre-Athlon CPUs */
+ /* else fall through */
case X86_VENDOR_INTEL:
rdmsr (MTRRcap_MSR, config, dummy);
return (config & (1<<10));
/*break;*/
case X86_VENDOR_CYRIX:
- case X86_VENDOR_AMD:
case X86_VENDOR_CENTAUR:
return 1;
/*break;*/
@@ -731,8 +762,8 @@ struct mtrr_var_range
/* Get the MSR pair relating to a var range */
-__initfunc(static void get_mtrr_var_range (unsigned int index,
- struct mtrr_var_range *vr))
+static void __init get_mtrr_var_range (unsigned int index,
+ struct mtrr_var_range *vr)
{
rdmsr (MTRRphysBase_MSR (index), vr->base_lo, vr->base_hi);
rdmsr (MTRRphysMask_MSR (index), vr->mask_lo, vr->mask_hi);
@@ -741,8 +772,8 @@ __initfunc(static void get_mtrr_var_range (unsigned int index,
/* Set the MSR pair relating to a var range. Returns TRUE if
changes are made */
-__initfunc(static int set_mtrr_var_range_testing (unsigned int index,
- struct mtrr_var_range *vr))
+static int __init set_mtrr_var_range_testing (unsigned int index,
+ struct mtrr_var_range *vr)
{
unsigned int lo, hi;
int changed = FALSE;
@@ -764,7 +795,7 @@ __initfunc(static int set_mtrr_var_range_testing (unsigned int index,
return changed;
} /* End Function set_mtrr_var_range_testing */
-__initfunc(static void get_fixed_ranges(mtrr_type *frs))
+static void __init get_fixed_ranges(mtrr_type *frs)
{
unsigned long *p = (unsigned long *)frs;
int i;
@@ -777,7 +808,7 @@ __initfunc(static void get_fixed_ranges(mtrr_type *frs))
rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i*2], p[7 + i*2]);
} /* End Function get_fixed_ranges */
-__initfunc(static int set_fixed_ranges_testing(mtrr_type *frs))
+static int __init set_fixed_ranges_testing(mtrr_type *frs)
{
unsigned long *p = (unsigned long *)frs;
int changed = FALSE;
@@ -819,7 +850,7 @@ struct mtrr_state
/* Grab all of the MTRR state for this CPU into *state */
-__initfunc(static void get_mtrr_state(struct mtrr_state *state))
+static void __init get_mtrr_state(struct mtrr_state *state)
{
unsigned int nvrs, i;
struct mtrr_var_range *vrs;
@@ -842,14 +873,14 @@ __initfunc(static void get_mtrr_state(struct mtrr_state *state))
/* Free resources associated with a struct mtrr_state */
-__initfunc(static void finalize_mtrr_state(struct mtrr_state *state))
+static void __init finalize_mtrr_state(struct mtrr_state *state)
{
if (state->var_ranges) kfree (state->var_ranges);
} /* End Function finalize_mtrr_state */
-__initfunc(static unsigned long set_mtrr_state (struct mtrr_state *state,
- struct set_mtrr_context *ctxt))
+static unsigned long __init set_mtrr_state (struct mtrr_state *state,
+ struct set_mtrr_context *ctxt)
/* [SUMMARY] Set the MTRR state for this CPU.
<state> The MTRR state information to read.
<ctxt> Some relevant CPU context.
@@ -948,7 +979,7 @@ static void set_mtrr_smp (unsigned int reg, unsigned long base,
/* Some BIOS's are fucked and don't set all MTRRs the same! */
-__initfunc(static void mtrr_state_warn (unsigned long mask))
+static void __init mtrr_state_warn(unsigned long mask)
{
if (!mask) return;
if (mask & MTRR_CHANGE_MASK_FIXED)
@@ -1030,6 +1061,7 @@ static int cyrix_get_free_region (unsigned long base, unsigned long size)
for (i = 0; i < 7; i++)
{
cyrix_get_arr (i, &lbase, &lsize, &ltype);
+ if ((i == 3) && arr3_protected) continue;
if (lsize < 1) return i;
}
/* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */
@@ -1062,13 +1094,30 @@ int mtrr_add (unsigned long base, unsigned long size, unsigned int type,
if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return -ENODEV;
switch (boot_cpu_data.x86_vendor)
{
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 < 6) { /* pre-Athlon CPUs */
+ /* Apply the K6 block alignment and size rules
+ In order
+ o Uncached or gathering only
+ o 128K or bigger block
+ o Power of 2 block
+ o base suitably aligned to the power
+ */
+ if (type > MTRR_TYPE_WRCOMB || size < (1 << 17) ||
+ (size & ~(size-1))-size || (base & (size-1)))
+ return -EINVAL;
+ break;
+ } /* else fall through */
case X86_VENDOR_INTEL:
- /* For Intel PPro stepping <= 7, must be 4 MiB aligned */
- if ( (boot_cpu_data.x86 == 6) && (boot_cpu_data.x86_model == 1) &&
- (boot_cpu_data.x86_mask <= 7) && ( base & ( (1 << 22) - 1 ) ) )
- {
- printk ("mtrr: base(0x%lx) is not 4 MiB aligned\n", base);
- return -EINVAL;
+ /* Double check for Intel, we may run on Athlon. */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ /* For Intel PPro stepping <= 7, must be 4 MiB aligned */
+ if ( (boot_cpu_data.x86 == 6) && (boot_cpu_data.x86_model == 1) &&
+ (boot_cpu_data.x86_mask <= 7) && ( base & ( (1 << 22) - 1 ) ) )
+ {
+ printk ("mtrr: base(0x%lx) is not 4 MiB aligned\n", base);
+ return -EINVAL;
+ }
}
/* Fall through */
case X86_VENDOR_CYRIX:
@@ -1105,18 +1154,6 @@ int mtrr_add (unsigned long base, unsigned long size, unsigned int type,
return -EINVAL;
}
break;
- case X86_VENDOR_AMD:
- /* Apply the K6 block alignment and size rules
- In order
- o Uncached or gathering only
- o 128K or bigger block
- o Power of 2 block
- o base suitably aligned to the power
- */
- if (type > MTRR_TYPE_WRCOMB || size < (1 << 17) ||
- (size & ~(size-1))-size || (base & (size-1)))
- return -EINVAL;
- break;
default:
return -EINVAL;
/*break;*/
@@ -1221,6 +1258,15 @@ int mtrr_del (int reg, unsigned long base, unsigned long size)
printk ("mtrr: register: %d too big\n", reg);
return -EINVAL;
}
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_CYRIX)
+ {
+ if ((reg == 3) && arr3_protected)
+ {
+ spin_unlock (&main_lock);
+ printk ("mtrr: ARR3 cannot be changed\n");
+ return -EINVAL;
+ }
+ }
(*get_mtrr) (reg, &lbase, &lsize, &ltype);
if (lsize < 1)
{
@@ -1532,7 +1578,7 @@ arr_state_t arr_state[8] __initdata = {
unsigned char ccr_state[7] __initdata = { 0, 0, 0, 0, 0, 0, 0 };
-__initfunc(static void cyrix_arr_init_secondary(void))
+static void __init cyrix_arr_init_secondary(void)
{
struct set_mtrr_context ctxt;
int i;
@@ -1565,7 +1611,7 @@ __initfunc(static void cyrix_arr_init_secondary(void))
* - (maybe) disable ARR3
* Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
*/
-__initfunc(static void cyrix_arr_init(void))
+static void __init cyrix_arr_init(void)
{
struct set_mtrr_context ctxt;
unsigned char ccr[7];
@@ -1585,22 +1631,22 @@ __initfunc(static void cyrix_arr_init(void))
ccr[5] = getCx86 (CX86_CCR5);
ccr[6] = getCx86 (CX86_CCR6);
- if (ccr[3] & 1)
+ if (ccr[3] & 1) {
ccrc[3] = 1;
- else {
+ arr3_protected = 1;
+ } else {
/* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
* access to SMM memory through ARR3 (bit 7).
*/
-/*
if (ccr[1] & 0x80) { ccr[1] &= 0x7f; ccrc[1] |= 0x80; }
if (ccr[1] & 0x04) { ccr[1] &= 0xfb; ccrc[1] |= 0x04; }
if (ccr[1] & 0x02) { ccr[1] &= 0xfd; ccrc[1] |= 0x02; }
-*/
+ arr3_protected = 0;
if (ccr[6] & 0x02) {
ccr[6] &= 0xfd; ccrc[6] = 1; /* Disable write protection of ARR3. */
setCx86 (CX86_CCR6, ccr[6]);
}
- /* Disable ARR3. */
+ /* Disable ARR3. This is safe now that we disabled SMM. */
/* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
}
/* If we changed CCR1 in memory, change it in the processor, too. */
@@ -1631,7 +1677,7 @@ __initfunc(static void cyrix_arr_init(void))
if ( ccrc[6] ) printk ("mtrr: ARR3 was write protected, unprotected\n");
} /* End Function cyrix_arr_init */
-__initfunc(static void centaur_mcr_init (void))
+static void __init centaur_mcr_init(void)
{
unsigned i;
struct set_mtrr_context ctxt;
@@ -1655,11 +1701,17 @@ __initfunc(static void centaur_mcr_init (void))
set_mtrr_done (&ctxt);
} /* End Function centaur_mcr_init */
-__initfunc(static void mtrr_setup (void))
+static void __init mtrr_setup(void)
{
printk ("mtrr: v%s Richard Gooch (rgooch@atnf.csiro.au)\n", MTRR_VERSION);
switch (boot_cpu_data.x86_vendor)
{
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 < 6) { /* pre-Athlon CPUs */
+ get_mtrr = amd_get_mtrr;
+ set_mtrr_up = amd_set_mtrr_up;
+ break;
+ } /* else fall through */
case X86_VENDOR_INTEL:
get_mtrr = intel_get_mtrr;
set_mtrr_up = intel_set_mtrr_up;
@@ -1669,10 +1721,6 @@ __initfunc(static void mtrr_setup (void))
set_mtrr_up = cyrix_set_arr_up;
get_free_region = cyrix_get_free_region;
break;
- case X86_VENDOR_AMD:
- get_mtrr = amd_get_mtrr;
- set_mtrr_up = amd_set_mtrr_up;
- break;
case X86_VENDOR_CENTAUR:
get_mtrr = centaur_get_mcr;
set_mtrr_up = centaur_set_mcr_up;
@@ -1685,12 +1733,14 @@ __initfunc(static void mtrr_setup (void))
static volatile unsigned long smp_changes_mask __initdata = 0;
static struct mtrr_state smp_mtrr_state __initdata = {0, 0};
-__initfunc(void mtrr_init_boot_cpu (void))
+void __init mtrr_init_boot_cpu(void)
{
if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return;
mtrr_setup ();
switch (boot_cpu_data.x86_vendor)
{
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 < 6) break; /* pre-Athlon CPUs */
case X86_VENDOR_INTEL:
get_mtrr_state (&smp_mtrr_state);
break;
@@ -1703,7 +1753,7 @@ __initfunc(void mtrr_init_boot_cpu (void))
}
} /* End Function mtrr_init_boot_cpu */
-__initfunc(static void intel_mtrr_init_secondary_cpu (void))
+static void __init intel_mtrr_init_secondary_cpu(void)
{
unsigned long mask, count;
struct set_mtrr_context ctxt;
@@ -1722,11 +1772,14 @@ __initfunc(static void intel_mtrr_init_secondary_cpu (void))
}
} /* End Function intel_mtrr_init_secondary_cpu */
-__initfunc(void mtrr_init_secondary_cpu (void))
+void __init mtrr_init_secondary_cpu(void)
{
if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return;
switch (boot_cpu_data.x86_vendor)
{
+ case X86_VENDOR_AMD:
+ /* Just for robustness: pre-Athlon CPUs cannot do SMP. */
+ if (boot_cpu_data.x86 < 6) break;
case X86_VENDOR_INTEL:
intel_mtrr_init_secondary_cpu ();
break;
@@ -1746,12 +1799,14 @@ __initfunc(void mtrr_init_secondary_cpu (void))
} /* End Function mtrr_init_secondary_cpu */
#endif /* __SMP__ */
-__initfunc(int mtrr_init(void))
+int __init mtrr_init(void)
{
if ( !(boot_cpu_data.x86_capability & X86_FEATURE_MTRR) ) return 0;
# ifdef __SMP__
switch (boot_cpu_data.x86_vendor)
{
+ case X86_VENDOR_AMD:
+ if (boot_cpu_data.x86 < 6) break; /* pre-Athlon CPUs */
case X86_VENDOR_INTEL:
finalize_mtrr_state (&smp_mtrr_state);
mtrr_state_warn (smp_changes_mask);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 08dde1ed7..4937efec2 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -40,24 +40,18 @@
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/desc.h>
+#include <asm/mmu_context.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif
-#include "irq.h"
+#include <linux/irq.h>
spinlock_t semaphore_wake_lock = SPIN_LOCK_UNLOCKED;
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-#ifdef CONFIG_APM
-extern int apm_do_idle(void);
-extern void apm_do_busy(void);
-#endif
-
-static int hlt_counter=0;
-
-#define HARD_IDLE_TIMEOUT (HZ / 3)
+int hlt_counter=0;
void disable_hlt(void)
{
@@ -69,103 +63,39 @@ void enable_hlt(void)
hlt_counter--;
}
-#ifndef __SMP__
-
-static void hard_idle(void)
-{
- while (!current->need_resched) {
- if (boot_cpu_data.hlt_works_ok && !hlt_counter) {
-#ifdef CONFIG_APM
- /* If the APM BIOS is not enabled, or there
- is an error calling the idle routine, we
- should hlt if possible. We need to check
- need_resched again because an interrupt
- may have occurred in apm_do_idle(). */
- start_bh_atomic();
- if (!apm_do_idle() && !current->need_resched)
- __asm__("hlt");
- end_bh_atomic();
-#else
- __asm__("hlt");
-#endif
- }
- if (current->need_resched)
- break;
- schedule();
- }
-#ifdef CONFIG_APM
- apm_do_busy();
-#endif
-}
-
/*
- * The idle loop on a uniprocessor i386..
- */
-static int cpu_idle(void *unused)
-{
- int work = 1;
- unsigned long start_idle = 0;
-
- /* endless idle loop with no priority at all */
- current->priority = 0;
- current->counter = -100;
- init_idle();
-
- for (;;) {
- if (work)
- start_idle = jiffies;
-
- if (jiffies - start_idle > HARD_IDLE_TIMEOUT)
- hard_idle();
- else {
- if (boot_cpu_data.hlt_works_ok && !hlt_counter && !current->need_resched)
- __asm__("hlt");
- }
-
- work = current->need_resched;
- schedule();
- check_pgt_cache();
- }
-}
-
-#else
+ * Powermanagement idle function, if any..
+ */
+void (*acpi_idle)(void) = NULL;
/*
- * This is being executed in task 0 'user space'.
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
*/
-
-int cpu_idle(void *unused)
+void cpu_idle(void)
{
/* endless idle loop with no priority at all */
+ init_idle();
current->priority = 0;
current->counter = -100;
- init_idle();
- while(1) {
- if (current_cpu_data.hlt_works_ok && !hlt_counter &&
- !current->need_resched)
- __asm__("hlt");
- /*
- * although we are an idle CPU, we do not want to
- * get into the scheduler unnecessarily.
- */
- if (current->need_resched) {
- schedule();
- check_pgt_cache();
+ while (1) {
+ while (!current->need_resched) {
+ if (!current_cpu_data.hlt_works_ok)
+ continue;
+ if (hlt_counter)
+ continue;
+ asm volatile("sti ; hlt" : : : "memory");
}
+ schedule();
+ check_pgt_cache();
+ if (acpi_idle)
+ acpi_idle();
}
}
-#endif
-
-asmlinkage int sys_idle(void)
-{
- if (current->pid != 0)
- return -EPERM;
- cpu_idle(NULL);
- return 0;
-}
-
/*
* This routine reboots the machine by asking the keyboard
* controller to pulse the reset-line low. We try that for a while,
@@ -176,7 +106,7 @@ static long no_idt[2] = {0, 0};
static int reboot_mode = 0;
static int reboot_thru_bios = 0;
-__initfunc(void reboot_setup(char *str, int *ints))
+static int __init reboot_setup(char *str)
{
while(1) {
switch (*str) {
@@ -198,8 +128,10 @@ __initfunc(void reboot_setup(char *str, int *ints))
else
break;
}
+ return 1;
}
+__setup("reboot=", reboot_setup);
/* The following code and data reboots the machine by switching to real
mode and jumping to the BIOS reset entry point, as if the CPU has
@@ -321,13 +253,9 @@ void machine_restart(char * __unused)
pg0[0] = _PAGE_RW | _PAGE_PRESENT;
/*
- * Use `swapper_pg_dir' as our page directory. We bother with
- * `SET_PAGE_DIR' because although might be rebooting, but if we change
- * the way we set root page dir in the future, then we wont break a
- * seldom used feature ;)
+ * Use `swapper_pg_dir' as our page directory.
*/
-
- SET_PAGE_DIR(current,swapper_pg_dir);
+ asm volatile("movl %0,%%cr3": :"r" (__pa(swapper_pg_dir)));
/* Write 0x1234 to absolute memory location 0x472. The BIOS reads
this on booting to tell it to "Bypass memory test (also warm
@@ -405,6 +333,7 @@ void show_regs(struct pt_regs * regs)
regs->esi, regs->edi, regs->ebp);
printk(" DS: %04x ES: %04x\n",
0xffff & regs->xds,0xffff & regs->xes);
+
__asm__("movl %%cr0, %0": "=r" (cr0));
__asm__("movl %%cr2, %0": "=r" (cr2));
__asm__("movl %%cr3, %0": "=r" (cr3));
@@ -475,11 +404,19 @@ void free_task_struct(struct task_struct *p)
free_pages((unsigned long) p, 1);
}
+/*
+ * No need to lock the MM as we are the last user
+ */
void release_segments(struct mm_struct *mm)
{
- if (mm->segments) {
- void * ldt = mm->segments;
+ void * ldt = mm->segments;
+
+ /*
+ * free the LDT
+ */
+ if (ldt) {
mm->segments = NULL;
+ clear_LDT();
vfree(ldt);
}
}
@@ -492,10 +429,9 @@ void forget_segments(void)
: "r" (0));
/*
- * Get the LDT entry from init_task.
+ * Load the LDT entry of init_task.
*/
- current->tss.ldt = _LDT(0);
- load_ldt(0);
+ load_LDT(&init_mm);
}
/*
@@ -537,12 +473,9 @@ void exit_thread(void)
void flush_thread(void)
{
- int i;
struct task_struct *tsk = current;
- for (i=0 ; i<8 ; i++)
- tsk->tss.debugreg[i] = 0;
-
+ memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
/*
* Forget coprocessor state..
*/
@@ -552,33 +485,45 @@ void flush_thread(void)
void release_thread(struct task_struct *dead_task)
{
+ if (dead_task->mm) {
+ void * ldt = dead_task->mm->segments;
+
+ // temporary debugging check
+ if (ldt) {
+ printk("WARNING: dead process %8s still has LDT? <%p>\n",
+ dead_task->comm, ldt);
+ BUG();
+ }
+ }
}
/*
- * If new_mm is NULL, we're being called to set up the LDT descriptor
- * for a clone task. Each clone must have a separate entry in the GDT.
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
*/
-void copy_segments(int nr, struct task_struct *p, struct mm_struct *new_mm)
+void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
{
struct mm_struct * old_mm = current->mm;
void * old_ldt = old_mm->segments, * ldt = old_ldt;
- /* default LDT - use the one from init_task */
- p->tss.ldt = _LDT(0);
- if (old_ldt) {
- if (new_mm) {
- ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
- new_mm->segments = ldt;
- if (!ldt) {
- printk(KERN_WARNING "ldt allocation failed\n");
- return;
- }
- memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
- }
- p->tss.ldt = _LDT(nr);
- set_ldt_desc(nr, ldt, LDT_ENTRIES);
+ if (!old_mm->segments) {
+ /*
+ * default LDT - use the one from init_task
+ */
+ new_mm->segments = NULL;
return;
}
+
+ /*
+ * Completely new LDT, we initialize it from the parent:
+ */
+ ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+ if (!ldt)
+ printk(KERN_WARNING "ldt allocation failed\n");
+ else
+ memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ new_mm->segments = ldt;
+ return;
}
/*
@@ -592,31 +537,21 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
{
struct pt_regs * childregs;
- childregs = ((struct pt_regs *) (2*PAGE_SIZE + (unsigned long) p)) - 1;
+ childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
*childregs = *regs;
childregs->eax = 0;
childregs->esp = esp;
- p->tss.esp = (unsigned long) childregs;
- p->tss.esp0 = (unsigned long) (childregs+1);
- p->tss.ss0 = __KERNEL_DS;
+ p->thread.esp = (unsigned long) childregs;
+ p->thread.esp0 = (unsigned long) (childregs+1);
- p->tss.tr = _TSS(nr);
- set_tss_desc(nr,&(p->tss));
- p->tss.eip = (unsigned long) ret_from_fork;
+ p->thread.eip = (unsigned long) ret_from_fork;
- savesegment(fs,p->tss.fs);
- savesegment(gs,p->tss.gs);
-
- /*
- * a bitmap offset pointing outside of the TSS limit causes a nicely
- * controllable SIGSEGV. The first sys_ioperm() call sets up the
- * bitmap properly.
- */
- p->tss.bitmap = sizeof(struct thread_struct);
+ savesegment(fs,p->thread.fs);
+ savesegment(gs,p->thread.gs);
unlazy_fpu(current);
- p->tss.i387 = current->tss.i387;
+ p->thread.i387 = current->thread.i387;
return 0;
}
@@ -632,7 +567,7 @@ int dump_fpu (struct pt_regs * regs, struct user_i387_struct* fpu)
fpvalid = tsk->used_math;
if (fpvalid) {
unlazy_fpu(tsk);
- memcpy(fpu,&tsk->tss.i387.hard,sizeof(*fpu));
+ memcpy(fpu,&tsk->thread.i387.hard,sizeof(*fpu));
}
return fpvalid;
@@ -654,7 +589,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
dump->u_dsize -= dump->u_tsize;
dump->u_ssize = 0;
for (i = 0; i < 8; i++)
- dump->u_debugreg[i] = current->tss.debugreg[i];
+ dump->u_debugreg[i] = current->thread.debugreg[i];
if (dump->start_stack < TASK_SIZE)
dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
@@ -683,11 +618,10 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
/*
* This special macro can be used to load a debugging register
*/
-#define loaddebug(tsk,register) \
+#define loaddebug(thread,register) \
__asm__("movl %0,%%db" #register \
: /* no output */ \
- :"r" (tsk->tss.debugreg[register]))
-
+ :"r" (thread->debugreg[register]))
/*
* switch_to(x,yn) should switch tasks from x to y.
@@ -712,60 +646,67 @@ void dump_thread(struct pt_regs * regs, struct user * dump)
* More important, however, is the fact that this allows us much
* more flexibility.
*/
-void __switch_to(struct task_struct *prev, struct task_struct *next)
+extern int cpus_initialized;
+void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
- /* Do the FPU save and set TS if it wasn't set before.. */
- unlazy_fpu(prev);
+ struct thread_struct *prev = &prev_p->thread,
+ *next = &next_p->thread;
+ struct tss_struct *tss = init_tss + smp_processor_id();
+
+ unlazy_fpu(prev_p);
/*
- * Reload TR, LDT and the page table pointers..
- *
- * We need TR for the IO permission bitmask (and
- * the vm86 bitmasks in case we ever use enhanced
- * v86 mode properly).
- *
- * We may want to get rid of the TR register some
- * day, and copy the bitmaps around by hand. Oh,
- * well. In the meantime we have to clear the busy
- * bit in the TSS entry, ugh.
+ * Reload esp0, LDT and the page table pointer:
*/
- gdt_table[next->tss.tr >> 3].b &= 0xfffffdff;
- asm volatile("ltr %0": :"g" (*(unsigned short *)&next->tss.tr));
+ tss->esp0 = next->esp0;
/*
* Save away %fs and %gs. No need to save %es and %ds, as
* those are always kernel segments while inside the kernel.
*/
- asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->tss.fs));
- asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->tss.gs));
-
- /* Re-load LDT if necessary */
- if (next->mm->segments != prev->mm->segments)
- asm volatile("lldt %0": :"g" (*(unsigned short *)&next->tss.ldt));
-
- /* Re-load page tables */
- {
- unsigned long new_cr3 = next->tss.cr3;
- if (new_cr3 != prev->tss.cr3)
- asm volatile("movl %0,%%cr3": :"r" (new_cr3));
- }
+ asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
+ asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
/*
* Restore %fs and %gs.
*/
- loadsegment(fs,next->tss.fs);
- loadsegment(gs,next->tss.gs);
+ loadsegment(fs, next->fs);
+ loadsegment(gs, next->gs);
/*
* Now maybe reload the debug registers
*/
- if (next->tss.debugreg[7]){
- loaddebug(next,0);
- loaddebug(next,1);
- loaddebug(next,2);
- loaddebug(next,3);
- loaddebug(next,6);
- loaddebug(next,7);
+ if (next->debugreg[7]){
+ loaddebug(next, 0);
+ loaddebug(next, 1);
+ loaddebug(next, 2);
+ loaddebug(next, 3);
+ /* no 4 and 5 */
+ loaddebug(next, 6);
+ loaddebug(next, 7);
+ }
+
+ if (prev->ioperm || next->ioperm) {
+ if (next->ioperm) {
+ /*
+ * 4 cachelines copy ... not good, but not that
+ * bad either. Anyone got something better?
+ * This only affects processes which use ioperm().
+ * [Putting the TSSs into 4k-tlb mapped regions
+ * and playing VM tricks to switch the IO bitmap
+ * is not really acceptable.]
+ */
+ memcpy(tss->io_bitmap, next->io_bitmap,
+ IO_BITMAP_SIZE*sizeof(unsigned long));
+ tss->bitmap = IO_BITMAP_OFFSET;
+ } else
+ /*
+ * a bitmap offset pointing outside of the TSS limit
+ * causes a nicely controllable SIGSEGV if a process
+ * tries to use a port IO instruction. The first
+ * sys_ioperm() call sets up the bitmap properly.
+ */
+ tss->bitmap = INVALID_IO_BITMAP_OFFSET;
}
}
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 9935cdf53..e86451291 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -45,7 +45,7 @@ static inline int get_stack_long(struct task_struct *task, int offset)
{
unsigned char *stack;
- stack = (unsigned char *)task->tss.esp0;
+ stack = (unsigned char *)task->thread.esp0;
stack += offset;
return (*((int *)stack));
}
@@ -61,7 +61,7 @@ static inline int put_stack_long(struct task_struct *task, int offset,
{
unsigned char * stack;
- stack = (unsigned char *) task->tss.esp0;
+ stack = (unsigned char *) task->thread.esp0;
stack += offset;
*(unsigned long *) stack = data;
return 0;
@@ -76,12 +76,12 @@ static int putreg(struct task_struct *child,
case FS:
if (value && (value & 3) != 3)
return -EIO;
- child->tss.fs = value;
+ child->thread.fs = value;
return 0;
case GS:
if (value && (value & 3) != 3)
return -EIO;
- child->tss.gs = value;
+ child->thread.gs = value;
return 0;
case DS:
case ES:
@@ -112,10 +112,10 @@ static unsigned long getreg(struct task_struct *child,
switch (regno >> 2) {
case FS:
- retval = child->tss.fs;
+ retval = child->thread.fs;
break;
case GS:
- retval = child->tss.gs;
+ retval = child->thread.gs;
break;
case DS:
case ES:
@@ -229,7 +229,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
addr <= (long) &dummy->u_debugreg[7]){
addr -= (long) &dummy->u_debugreg[0];
addr = addr >> 2;
- tmp = child->tss.debugreg[addr];
+ tmp = child->thread.debugreg[addr];
};
ret = put_user(tmp,(unsigned long *) data);
goto out;
@@ -278,7 +278,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
addr -= (long) &dummy->u_debugreg;
addr = addr >> 2;
- child->tss.debugreg[addr] = data;
+ child->thread.debugreg[addr] = data;
ret = 0;
goto out;
};
@@ -409,18 +409,18 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
ret = 0;
if ( !child->used_math ) {
/* Simulate an empty FPU. */
- child->tss.i387.hard.cwd = 0xffff037f;
- child->tss.i387.hard.swd = 0xffff0000;
- child->tss.i387.hard.twd = 0xffffffff;
+ child->thread.i387.hard.cwd = 0xffff037f;
+ child->thread.i387.hard.swd = 0xffff0000;
+ child->thread.i387.hard.twd = 0xffffffff;
}
#ifdef CONFIG_MATH_EMULATION
if ( boot_cpu_data.hard_math ) {
#endif
- __copy_to_user((void *)data, &child->tss.i387.hard,
+ __copy_to_user((void *)data, &child->thread.i387.hard,
sizeof(struct user_i387_struct));
#ifdef CONFIG_MATH_EMULATION
} else {
- save_i387_soft(&child->tss.i387.soft,
+ save_i387_soft(&child->thread.i387.soft,
(struct _fpstate *)data);
}
#endif
@@ -438,11 +438,11 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
#ifdef CONFIG_MATH_EMULATION
if ( boot_cpu_data.hard_math ) {
#endif
- __copy_from_user(&child->tss.i387.hard, (void *)data,
+ __copy_from_user(&child->thread.i387.hard, (void *)data,
sizeof(struct user_i387_struct));
#ifdef CONFIG_MATH_EMULATION
} else {
- restore_i387_soft(&child->tss.i387.soft,
+ restore_i387_soft(&child->thread.i387.soft,
(struct _fpstate *)data);
}
#endif
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
new file mode 100644
index 000000000..cf556282d
--- /dev/null
+++ b/arch/i386/kernel/semaphore.c
@@ -0,0 +1,220 @@
+/*
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ */
+#include <linux/sched.h>
+
+#include <asm/semaphore.h>
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to aquire the semaphore, while the "sleeping"
+ * variable is a count of such aquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is
+ * protected by the semaphore spinlock.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ * - only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - when we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleeper" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED;
+
+void __down(struct semaphore * sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+ wake_up(&sem->wait);
+}
+
+int __down_interruptible(struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers ++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock. The
+ * "-1" is because we're still hoping to get
+ * the lock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(&sem->wait, &wait);
+ wake_up(&sem->wait);
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+int __down_trylock(struct semaphore * sem)
+{
+ int sleepers;
+
+ spin_lock_irq(&semaphore_lock);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count))
+ wake_up(&sem->wait);
+
+ spin_unlock_irq(&semaphore_lock);
+ return 1;
+}
+
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * %ecx contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (%eax, %edx and %ecx) except %eax when used as a return
+ * value..
+ */
+asm(
+".align 4\n"
+".globl __down_failed\n"
+"__down_failed:\n\t"
+ "pushl %eax\n\t"
+ "pushl %edx\n\t"
+ "pushl %ecx\n\t"
+ "call __down\n\t"
+ "popl %ecx\n\t"
+ "popl %edx\n\t"
+ "popl %eax\n\t"
+ "ret"
+);
+
+asm(
+".align 4\n"
+".globl __down_failed_interruptible\n"
+"__down_failed_interruptible:\n\t"
+ "pushl %edx\n\t"
+ "pushl %ecx\n\t"
+ "call __down_interruptible\n\t"
+ "popl %ecx\n\t"
+ "popl %edx\n\t"
+ "ret"
+);
+
+asm(
+".align 4\n"
+".globl __down_failed_trylock\n"
+"__down_failed_trylock:\n\t"
+ "pushl %edx\n\t"
+ "pushl %ecx\n\t"
+ "call __down_trylock\n\t"
+ "popl %ecx\n\t"
+ "popl %edx\n\t"
+ "ret"
+);
+
+asm(
+".align 4\n"
+".globl __up_wakeup\n"
+"__up_wakeup:\n\t"
+ "pushl %eax\n\t"
+ "pushl %edx\n\t"
+ "pushl %ecx\n\t"
+ "call __up\n\t"
+ "popl %ecx\n\t"
+ "popl %edx\n\t"
+ "popl %eax\n\t"
+ "ret"
+);
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index c0721b482..88ba3feeb 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -14,6 +14,17 @@
* Bart Hartgers <bart@etpmod.phys.tue.nl>, May 1999.
*
* Intel Mobile Pentium II detection fix. Sean Gilley, June 1999.
+ *
+ * IDT Winchip tweaks, misc clean ups.
+ * Dave Jones <dave@powertweak.com>, August 1999
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ *
+ * Better detection of Centaur/IDT WinChip models.
+ * Bart Hartgers <bart@etpmod.phys.tue.nl>, August 1999.
+ *
+ * Memory region support
+ * David Parsons <orc@pell.chi.il.us>, July-August 1999
*/
/*
@@ -35,12 +46,11 @@
#include <linux/delay.h>
#include <linux/config.h>
#include <linux/init.h>
-#ifdef CONFIG_APM
#include <linux/apm_bios.h>
-#endif
#ifdef CONFIG_BLK_DEV_RAM
#include <linux/blk.h>
#endif
+#include <linux/bigmem.h>
#include <asm/processor.h>
#include <linux/console.h>
#include <asm/uaccess.h>
@@ -49,6 +59,9 @@
#include <asm/smp.h>
#include <asm/cobalt.h>
#include <asm/msr.h>
+#include <asm/desc.h>
+#include <asm/e820.h>
+#include <asm/dma.h>
/*
* Machine setup..
@@ -57,6 +70,8 @@
char ignore_irq13 = 0; /* set if exception 16 works */
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+unsigned long mmu_cr4_features __initdata = 0;
+
/*
* Bus types ..
*/
@@ -74,14 +89,14 @@ unsigned int mca_pentium_flag = 0;
*/
struct drive_info_struct { char dummy[32]; } drive_info;
struct screen_info screen_info;
-#ifdef CONFIG_APM
struct apm_bios_info apm_bios_info;
-#endif
struct sys_desc_table_struct {
unsigned short length;
unsigned char table[0];
};
+struct e820map e820 = { 0 };
+
unsigned char aux_device_present;
#ifdef CONFIG_BLK_DEV_RAM
@@ -91,7 +106,7 @@ extern int rd_image_start; /* starting block # of image */
#endif
extern int root_mountflags;
-extern int _etext, _edata, _end;
+extern int _text, _etext, _edata, _end;
extern unsigned long cpu_hz;
/*
@@ -101,6 +116,8 @@ extern unsigned long cpu_hz;
#define SCREEN_INFO (*(struct screen_info *) (PARAM+0))
#define EXT_MEM_K (*(unsigned short *) (PARAM+2))
#define ALT_MEM_K (*(unsigned long *) (PARAM+0x1e0))
+#define E820_MAP_NR (*(char*) (PARAM+E820NR))
+#define E820_MAP ((unsigned long *) (PARAM+E820MAP))
#define APM_BIOS_INFO (*(struct apm_bios_info *) (PARAM+0x40))
#define DRIVE_INFO (*(struct drive_info_struct *) (PARAM+0x80))
#define SYS_DESC_TABLE (*(struct sys_desc_table_struct*)(PARAM+0xa0))
@@ -249,12 +266,207 @@ visws_get_board_type_and_rev(void)
static char command_line[COMMAND_LINE_SIZE] = { 0, };
char saved_command_line[COMMAND_LINE_SIZE];
-__initfunc(void setup_arch(char **cmdline_p,
- unsigned long * memory_start_p, unsigned long * memory_end_p))
+struct resource standard_io_resources[] = {
+ { "dma1", 0x00, 0x1f, IORESOURCE_BUSY },
+ { "pic1", 0x20, 0x3f, IORESOURCE_BUSY },
+ { "timer", 0x40, 0x5f, IORESOURCE_BUSY },
+ { "keyboard", 0x60, 0x6f, IORESOURCE_BUSY },
+ { "dma page reg", 0x80, 0x8f, IORESOURCE_BUSY },
+ { "pic2", 0xa0, 0xbf, IORESOURCE_BUSY },
+ { "dma2", 0xc0, 0xdf, IORESOURCE_BUSY },
+ { "fpu", 0xf0, 0xff, IORESOURCE_BUSY }
+};
+
+#define STANDARD_IO_RESOURCES (sizeof(standard_io_resources)/sizeof(struct resource))
+
+/* System RAM - interrupted by the 640kB-1M hole */
+#define code_resource (ram_resources[3])
+#define data_resource (ram_resources[4])
+static struct resource ram_resources[] = {
+ { "System RAM", 0x000000, 0x09ffff, IORESOURCE_BUSY },
+ { "System RAM", 0x100000, 0x100000, IORESOURCE_BUSY },
+ { "Video RAM area", 0x0a0000, 0x0bffff, IORESOURCE_BUSY },
+ { "Kernel code", 0x100000, 0 },
+ { "Kernel data", 0, 0 }
+};
+
+/* System ROM resources */
+#define MAXROMS 6
+static struct resource rom_resources[MAXROMS] = {
+ { "System ROM", 0xF0000, 0xFFFFF, IORESOURCE_BUSY },
+ { "Video ROM", 0xc0000, 0xc7fff, IORESOURCE_BUSY }
+};
+
+#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+
+static void __init probe_roms(void)
+{
+ int roms = 1;
+ unsigned long base;
+ unsigned char *romstart;
+
+ request_resource(&iomem_resource, rom_resources+0);
+
+ /* Video ROM is standard at C000:0000 - C7FF:0000, check signature */
+ for (base = 0xC0000; base < 0xE0000; base += 2048) {
+ romstart = bus_to_virt(base);
+ if (!romsignature(romstart))
+ continue;
+ request_resource(&iomem_resource, rom_resources + roms);
+ roms++;
+ break;
+ }
+
+ /* Extension roms at C800:0000 - DFFF:0000 */
+ for (base = 0xC8000; base < 0xE0000; base += 2048) {
+ unsigned long length;
+
+ romstart = bus_to_virt(base);
+ if (!romsignature(romstart))
+ continue;
+ length = romstart[2] * 512;
+ if (length) {
+ unsigned int i;
+ unsigned char chksum;
+
+ chksum = 0;
+ for (i = 0; i < length; i++)
+ chksum += romstart[i];
+
+ /* Good checksum? */
+ if (!chksum) {
+ rom_resources[roms].start = base;
+ rom_resources[roms].end = base + length - 1;
+ rom_resources[roms].name = "Extension ROM";
+ rom_resources[roms].flags = IORESOURCE_BUSY;
+
+ request_resource(&iomem_resource, rom_resources + roms);
+ roms++;
+ if (roms >= MAXROMS)
+ return;
+ }
+ }
+ }
+
+ /* Final check for motherboard extension rom at E000:0000 */
+ base = 0xE0000;
+ romstart = bus_to_virt(base);
+
+ if (romsignature(romstart)) {
+ rom_resources[roms].start = base;
+ rom_resources[roms].end = base + 65535;
+ rom_resources[roms].name = "Extension ROM";
+ rom_resources[roms].flags = IORESOURCE_BUSY;
+
+ request_resource(&iomem_resource, rom_resources + roms);
+ }
+}
+
+unsigned long __init memparse(char *ptr, char **retptr)
+{
+ unsigned long ret;
+
+ ret = simple_strtoul(ptr, retptr, 0);
+
+ if (**retptr == 'K' || **retptr == 'k') {
+ ret <<= 10;
+ (*retptr)++;
+ }
+ else if (**retptr == 'M' || **retptr == 'm') {
+ ret <<= 20;
+ (*retptr)++;
+ }
+ return ret;
+} /* memparse */
+
+
+void __init add_memory_region(unsigned long start,
+ unsigned long size, int type)
+{
+ int x = e820.nr_map;
+
+ if (x == E820MAX) {
+ printk("Ooops! Too many entries in the memory map!\n");
+ return;
+ }
+
+ e820.map[x].addr = start;
+ e820.map[x].size = size;
+ e820.map[x].type = type;
+ e820.nr_map++;
+} /* add_memory_region */
+
+
+#define LOWMEMSIZE() ((*(unsigned short *)__va(0x413)) * 1024)
+
+
+void __init setup_memory_region(void)
+{
+#define E820_DEBUG 0
+#ifdef E820_DEBUG
+ int i;
+#endif
+
+ /*
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory. If we aren't, we'll fake a memory map.
+ *
+ * We check to see that the memory map contains at least 2 elements
+ * before we'll use it, because the detection code in setup.S may
+ * not be perfect and most every PC known to man has two memory
+ * regions: one from 0 to 640k, and one from 1mb up. (The IBM
+ * thinkpad 560x, for example, does not cooperate with the memory
+ * detection code.)
+ */
+ if (E820_MAP_NR > 1) {
+ /* got a memory map; copy it into a safe place.
+ */
+ e820.nr_map = E820_MAP_NR;
+ if (e820.nr_map > E820MAX)
+ e820.nr_map = E820MAX;
+ memcpy(e820.map, E820_MAP, e820.nr_map * sizeof e820.map[0]);
+#ifdef E820_DEBUG
+ for (i=0; i < e820.nr_map; i++) {
+ printk("e820: %ld @ %08lx ",
+ (unsigned long)(e820.map[i].size),
+ (unsigned long)(e820.map[i].addr));
+ switch (e820.map[i].type) {
+ case E820_RAM: printk("(usable)\n");
+ break;
+ case E820_RESERVED:
+ printk("(reserved)\n");
+ break;
+ case E820_ACPI:
+ printk("(ACPI data)\n");
+ break;
+ default: printk("type %lu\n", e820.map[i].type);
+ break;
+ }
+ }
+#endif
+ }
+ else {
+ /* otherwise fake a memory map; one section from 0k->640k,
+ * the next section from 1mb->appropriate_mem_k
+ */
+ unsigned long mem_size;
+
+ mem_size = (ALT_MEM_K < EXT_MEM_K) ? EXT_MEM_K : ALT_MEM_K;
+
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+ }
+} /* setup_memory_region */
+
+
+void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * memory_end_p)
{
unsigned long memory_start, memory_end;
char c = ' ', *to = command_line, *from = COMMAND_LINE;
int len = 0;
+ int i;
+ int usermem=0;
#ifdef CONFIG_VISWS
visws_get_board_type_and_rev();
@@ -263,9 +475,7 @@ __initfunc(void setup_arch(char **cmdline_p,
ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
drive_info = DRIVE_INFO;
screen_info = SCREEN_INFO;
-#ifdef CONFIG_APM
apm_bios_info = APM_BIOS_INFO;
-#endif
if( SYS_DESC_TABLE.length != 0 ) {
MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
machine_id = SYS_DESC_TABLE.table[0];
@@ -273,29 +483,26 @@ __initfunc(void setup_arch(char **cmdline_p,
BIOS_revision = SYS_DESC_TABLE.table[2];
}
aux_device_present = AUX_DEVICE_INFO;
- memory_end = (1<<20) + (EXT_MEM_K<<10);
-#ifndef STANDARD_MEMORY_BIOS_CALL
- {
- unsigned long memory_alt_end = (1<<20) + (ALT_MEM_K<<10);
- /* printk(KERN_DEBUG "Memory sizing: %08x %08x\n", memory_end, memory_alt_end); */
- if (memory_alt_end > memory_end)
- memory_end = memory_alt_end;
- }
-#endif
- memory_end &= PAGE_MASK;
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
+ setup_memory_region();
+
if (!MOUNT_ROOT_RDONLY)
root_mountflags &= ~MS_RDONLY;
memory_start = (unsigned long) &_end;
- init_task.mm->start_code = PAGE_OFFSET;
- init_task.mm->end_code = (unsigned long) &_etext;
- init_task.mm->end_data = (unsigned long) &_edata;
- init_task.mm->brk = (unsigned long) &_end;
+ init_mm.start_code = (unsigned long) &_text;
+ init_mm.end_code = (unsigned long) &_etext;
+ init_mm.end_data = (unsigned long) &_edata;
+ init_mm.brk = (unsigned long) &_end;
+
+ code_resource.start = virt_to_bus(&_text);
+ code_resource.end = virt_to_bus(&_etext)-1;
+ data_resource.start = virt_to_bus(&_etext);
+ data_resource.end = virt_to_bus(&_edata)-1;
/* Save unparsed command line copy for /proc/cmdline */
memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
@@ -304,8 +511,10 @@ __initfunc(void setup_arch(char **cmdline_p,
for (;;) {
/*
* "mem=nopentium" disables the 4MB page tables.
- * "mem=XXX[kKmM]" overrides the BIOS-reported
- * memory size
+ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+ * to <mem>, overriding the bios size.
+ * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+ * <start> to <start>+<mem>, overriding the bios size.
*/
if (c == ' ' && *(const unsigned long *)from == *(const unsigned long *)"mem=") {
if (to != command_line) to--;
@@ -313,14 +522,29 @@ __initfunc(void setup_arch(char **cmdline_p,
from += 9+4;
boot_cpu_data.x86_capability &= ~X86_FEATURE_PSE;
} else {
- memory_end = simple_strtoul(from+4, &from, 0);
- if ( *from == 'K' || *from == 'k' ) {
- memory_end = memory_end << 10;
- from++;
- } else if ( *from == 'M' || *from == 'm' ) {
- memory_end = memory_end << 20;
- from++;
+ /* If the user specifies memory size, we
+ * blow away any automatically generated
+ * size
+ */
+ unsigned long start_at, mem_size;
+
+ if (usermem == 0) {
+ /* first time in: zap the whitelist
+ * and reinitialize it with the
+ * standard low-memory region.
+ */
+ e820.nr_map = 0;
+ usermem = 1;
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ }
+ mem_size = memparse(from+4, &from);
+ if (*from == '@')
+ start_at = memparse(from+1,&from);
+ else {
+ start_at = HIGH_MEMORY;
+ mem_size -= HIGH_MEMORY;
}
+ add_memory_region(start_at, mem_size, E820_RAM);
}
}
c = *(from++);
@@ -333,15 +557,47 @@ __initfunc(void setup_arch(char **cmdline_p,
*to = '\0';
*cmdline_p = command_line;
-#define VMALLOC_RESERVE (64 << 20) /* 64MB for vmalloc */
+#define VMALLOC_RESERVE (128 << 20) /* 128MB for vmalloc and initrd */
#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
+ memory_end = 0;
+ for (i=0; i < e820.nr_map; i++) {
+ /* RAM? */
+ if (e820.map[i].type == E820_RAM) {
+ unsigned long end = e820.map[i].addr + e820.map[i].size;
+
+ if (end > memory_end)
+ memory_end = end;
+ }
+ }
+ memory_end &= PAGE_MASK;
+ ram_resources[1].end = memory_end-1;
+
+#ifdef CONFIG_BIGMEM
+ bigmem_start = bigmem_end = memory_end;
+#endif
if (memory_end > MAXMEM)
{
+#ifdef CONFIG_BIGMEM
+#define MAXBIGMEM ((unsigned long)(~(VMALLOC_RESERVE-1)))
+ bigmem_start = MAXMEM;
+ bigmem_end = (memory_end < MAXBIGMEM) ? memory_end : MAXBIGMEM;
+#endif
memory_end = MAXMEM;
+#ifdef CONFIG_BIGMEM
+ printk(KERN_NOTICE "%ldMB BIGMEM available.\n",
+ (bigmem_end-bigmem_start)>>20);
+#else
printk(KERN_WARNING "Warning only %ldMB will be used.\n",
MAXMEM>>20);
+#endif
}
+#if defined(CONFIG_BIGMEM) && defined(BIGMEM_DEBUG)
+ else {
+ memory_end -= memory_end/4;
+ bigmem_start = memory_end;
+ }
+#endif
memory_end += PAGE_OFFSET;
*memory_start_p = memory_start;
@@ -367,12 +623,20 @@ __initfunc(void setup_arch(char **cmdline_p,
}
#endif
+ /*
+ * Request the standard RAM and ROM resources -
+ * they eat up PCI memory space
+ */
+ request_resource(&iomem_resource, ram_resources+0);
+ request_resource(&iomem_resource, ram_resources+1);
+ request_resource(&iomem_resource, ram_resources+2);
+ request_resource(ram_resources+1, &code_resource);
+ request_resource(ram_resources+1, &data_resource);
+ probe_roms();
+
/* request I/O space for devices used on all i[345]86 PCs */
- request_region(0x00,0x20,"dma1");
- request_region(0x40,0x20,"timer");
- request_region(0x80,0x10,"dma page reg");
- request_region(0xc0,0x20,"dma2");
- request_region(0xf0,0x10,"fpu");
+ for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+ request_resource(&ioport_resource, standard_io_resources+i);
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
@@ -381,13 +645,9 @@ __initfunc(void setup_arch(char **cmdline_p,
conswitchp = &dummy_con;
#endif
#endif
- /*
- * Check the bugs that will bite us before we get booting
- */
-
}
-__initfunc(static int get_model_name(struct cpuinfo_x86 *c))
+static int __init get_model_name(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, *v;
@@ -415,7 +675,7 @@ __initfunc(static int get_model_name(struct cpuinfo_x86 *c))
return 1;
}
-__initfunc(static int amd_model(struct cpuinfo_x86 *c))
+static int __init amd_model(struct cpuinfo_x86 *c)
{
u32 l, h;
unsigned long flags;
@@ -480,6 +740,19 @@ __initfunc(static int amd_model(struct cpuinfo_x86 *c))
break;
}
break;
+ case 6: /* An Athlon. We can trust the BIOS probably */
+ {
+
+ u32 ecx, edx, dummy;
+ cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+ printk("L1 I Cache: %dK L1 D Cache: %dK\n",
+ ecx>>24, edx>>24);
+ cpuid(0x80000006, &dummy, &dummy, &ecx, &edx);
+ printk("L2 Cache: %dK\n", ecx>>16);
+ c->x86_cache_size = ecx>>16;
+ break;
+ }
+
}
return r;
}
@@ -544,7 +817,7 @@ static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock";
static char cyrix_model_mult1[] __initdata = "12??43";
static char cyrix_model_mult2[] __initdata = "12233445";
-__initfunc(static void cyrix_model(struct cpuinfo_x86 *c))
+static void __init cyrix_model(struct cpuinfo_x86 *c)
{
unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
char *buf = c->x86_model_id;
@@ -615,6 +888,15 @@ __initfunc(static void cyrix_model(struct cpuinfo_x86 *c))
c->x86_model = (dir1 & 0x20) ? 1 : 2;
c->x86_capability&=~X86_FEATURE_TSC;
}
+#ifdef CONFIG_PCI
+ /* It isnt really a PCI quirk directly, but the cure is the
+ same. The MediaGX has deep magic SMM stuff that handles the
+ SB emulation. It thows away the fifo on disable_dma() which
+ is wrong and ruins the audio. */
+
+ printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bug.\n");
+ isa_dma_bridge_buggy = 1;
+#endif
break;
case 5: /* 6x86MX/M II */
@@ -640,8 +922,8 @@ __initfunc(static void cyrix_model(struct cpuinfo_x86 *c))
dir0_msn = 0;
p = Cx486S_name[0];
break;
- break;
}
+ break;
default: /* unknown (shouldn't happen, we know everyone ;-) */
dir0_msn = 7;
@@ -652,7 +934,99 @@ __initfunc(static void cyrix_model(struct cpuinfo_x86 *c))
return;
}
-__initfunc(void get_cpu_vendor(struct cpuinfo_x86 *c))
+static void __init centaur_model(struct cpuinfo_x86 *c)
+{
+ enum {
+ ECX8=1<<1,
+ EIERRINT=1<<2,
+ DPM=1<<3,
+ DMCE=1<<4,
+ DSTPCLK=1<<5,
+ ELINEAR=1<<6,
+ DSMC=1<<7,
+ DTLOCK=1<<8,
+ EDCTLB=1<<8,
+ EMMX=1<<9,
+ DPDC=1<<11,
+ EBRPRED=1<<12,
+ DIC=1<<13,
+ DDC=1<<14,
+ DNA=1<<15,
+ ERETSTK=1<<16,
+ E2MMX=1<<19,
+ EAMD3D=1<<20,
+ };
+
+ char *name;
+ u32 fcr_set=0;
+ u32 fcr_clr=0;
+ u32 lo,hi,newlo;
+ u32 aa,bb,cc,dd;
+
+ switch(c->x86_model) {
+ case 4:
+ name="C6";
+ fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
+ fcr_clr=DPDC;
+ break;
+ case 8:
+ switch(c->x86_mask) {
+ default:
+ name="2";
+ break;
+ case 7 ... 9:
+ name="2A";
+ break;
+ case 10 ... 15:
+ name="2B";
+ break;
+ }
+ fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
+ fcr_clr=DPDC;
+ break;
+ case 9:
+ name="3";
+ fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
+ fcr_clr=DPDC;
+ break;
+ case 10:
+ name="4";
+ /* no info on the WC4 yet */
+ break;
+ default:
+ name="??";
+ }
+
+ /* get FCR */
+ rdmsr(0x107, lo, hi);
+
+ newlo=(lo|fcr_set) & (~fcr_clr);
+
+ if (newlo!=lo) {
+ printk("Centaur FCR was 0x%X now 0x%X\n", lo, newlo );
+ wrmsr(0x107, newlo, hi );
+ } else {
+ printk("Centaur FCR is 0x%X\n",lo);
+ }
+
+ /* Emulate MTRRs using Centaur's MCR. */
+ c->x86_capability |= X86_FEATURE_MTRR;
+ /* Report CX8 */
+ c->x86_capability |= X86_FEATURE_CX8;
+ /* Set 3DNow! on Winchip 2 and above. */
+ if (c->x86_model >=8)
+ c->x86_capability |= X86_FEATURE_AMD3D;
+ /* See if we can find out some more. */
+ cpuid(0x80000000,&aa,&bb,&cc,&dd);
+ if (aa>=0x80000005) { /* Yes, we can. */
+ cpuid(0x80000005,&aa,&bb,&cc,&dd);
+ /* Add L1 data and code cache sizes. */
+ c->x86_cache_size = (cc>>24)+(dd>>24);
+ }
+ sprintf( c->x86_model_id, "WinChip %s", name );
+}
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
{
char *v = c->x86_vendor_id;
@@ -701,18 +1075,20 @@ static struct cpu_model_info cpu_models[] __initdata = {
"K5", "K5", NULL, NULL,
"K6", "K6", "K6-2",
"K6-3", NULL, NULL, NULL, NULL, NULL, NULL }},
+ { X86_VENDOR_AMD, 6,
+ { "Athlon", "Athlon",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL }},
{ X86_VENDOR_UMC, 4,
{ NULL, "U5D", "U5S", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL }},
- { X86_VENDOR_CENTAUR, 5,
- { NULL, NULL, NULL, NULL, "C6", NULL, NULL, NULL, "C6-2", NULL, NULL,
- NULL, NULL, NULL, NULL, NULL }},
{ X86_VENDOR_NEXGEN, 5,
{ "Nx586", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL }},
};
-__initfunc(void identify_cpu(struct cpuinfo_x86 *c))
+void __init identify_cpu(struct cpuinfo_x86 *c)
{
int i;
char *p = NULL;
@@ -733,6 +1109,11 @@ __initfunc(void identify_cpu(struct cpuinfo_x86 *c))
if (c->x86_vendor == X86_VENDOR_AMD && amd_model(c))
return;
+
+ if (c->x86_vendor == X86_VENDOR_CENTAUR) {
+ centaur_model(c);
+ return;
+ }
if (c->cpuid_level > 0 && c->x86_vendor == X86_VENDOR_INTEL)
{
@@ -809,7 +1190,6 @@ __initfunc(void identify_cpu(struct cpuinfo_x86 *c))
p = "Celeron (Dixon)";
}
}
-
}
if (p) {
@@ -824,7 +1204,7 @@ __initfunc(void identify_cpu(struct cpuinfo_x86 *c))
* Perform early boot up checks for a valid TSC. See arch/i386/kernel/time.c
*/
-__initfunc(void dodgy_tsc(void))
+void __init dodgy_tsc(void)
{
get_cpu_vendor(&boot_cpu_data);
@@ -841,7 +1221,7 @@ static char *cpu_vendor_names[] __initdata = {
"Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur" };
-__initfunc(void print_cpu_info(struct cpuinfo_x86 *c))
+void __init print_cpu_info(struct cpuinfo_x86 *c)
{
char *vendor = NULL;
@@ -859,22 +1239,7 @@ __initfunc(void print_cpu_info(struct cpuinfo_x86 *c))
printk("%s", c->x86_model_id);
if (c->x86_mask || c->cpuid_level>=0)
- printk(" stepping %02x", c->x86_mask);
-
- if(c->x86_vendor == X86_VENDOR_CENTAUR)
- {
- u32 hv,lv;
- rdmsr(0x107, lv, hv);
- printk("\nCentaur FSR was 0x%X ",lv);
- lv|=(1<<8);
- lv|=(1<<7);
- /* lv|=(1<<6); - may help too if the board can cope */
- printk("now 0x%X", lv);
- wrmsr(0x107, lv, hv);
- /* Emulate MTRRs using Centaur's MCR. */
- c->x86_capability |= X86_FEATURE_MTRR;
- }
- printk("\n");
+ printk(" stepping %02x\n", c->x86_mask);
}
/*
@@ -909,7 +1274,7 @@ int get_cpuinfo(char * buffer)
c->x86 + '0',
c->x86_model,
c->x86_model_id[0] ? c->x86_model_id : "unknown");
-
+
if (c->x86_mask || c->cpuid_level >= 0)
p += sprintf(p, "stepping\t: %d\n", c->x86_mask);
else
@@ -925,14 +1290,20 @@ int get_cpuinfo(char * buffer)
p += sprintf(p, "cache size\t: %d KB\n", c->x86_cache_size);
/* Modify the capabilities according to chip type */
- if (c->x86_vendor == X86_VENDOR_CYRIX) {
+ switch (c->x86_vendor) {
+
+ case X86_VENDOR_CYRIX:
x86_cap_flags[24] = "cxmmx";
- } else if (c->x86_vendor == X86_VENDOR_AMD) {
- x86_cap_flags[16] = "fcmov";
- x86_cap_flags[31] = "3dnow";
+ break;
+
+ case X86_VENDOR_AMD:
if (c->x86 == 5 && c->x86_model == 6)
x86_cap_flags[10] = "sep";
- } else if (c->x86_vendor == X86_VENDOR_INTEL) {
+ x86_cap_flags[16] = "fcmov";
+ x86_cap_flags[31] = "3dnow";
+ break;
+
+ case X86_VENDOR_INTEL:
x86_cap_flags[6] = "pae";
x86_cap_flags[9] = "apic";
x86_cap_flags[14] = "mca";
@@ -940,6 +1311,16 @@ int get_cpuinfo(char * buffer)
x86_cap_flags[17] = "pse36";
x86_cap_flags[18] = "psn";
x86_cap_flags[24] = "osfxsr";
+ break;
+
+ case X86_VENDOR_CENTAUR:
+ if (c->x86_model >=8) /* Only Winchip2 and above */
+ x86_cap_flags[31] = "3dnow";
+ break;
+
+ default:
+ /* Unknown CPU manufacturer. Transmeta ? :-) */
+ break;
}
sep_bug = c->x86_vendor == X86_VENDOR_INTEL &&
@@ -978,3 +1359,64 @@ int get_cpuinfo(char * buffer)
}
return p - buffer;
}
+
+int cpus_initialized = 0;
+unsigned long cpu_initialized = 0;
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void cpu_init (void)
+{
+ int nr = smp_processor_id();
+ struct tss_struct * t = &init_tss[nr];
+
+ if (test_and_set_bit(nr,&cpu_initialized)) {
+ printk("CPU#%d already initialized!\n", nr);
+ for (;;) __sti();
+ }
+ cpus_initialized++;
+ printk("Initializing CPU#%d\n", nr);
+
+ if (boot_cpu_data.x86_capability & X86_FEATURE_PSE)
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+
+ __asm__ __volatile__("lgdt %0": "=m" (gdt_descr));
+ __asm__ __volatile__("lidt %0": "=m" (idt_descr));
+
+ /*
+ * Delete NT
+ */
+ __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
+
+ /*
+ * set up and load the per-CPU TSS and LDT
+ */
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+ t->esp0 = current->thread.esp0;
+ set_tss_desc(nr,t);
+ gdt_table[__TSS(nr)].b &= 0xfffffdff;
+ load_TR(nr);
+ load_LDT(&init_mm);
+
+ /*
+ * Clear all 6 debug registers:
+ */
+
+#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) );
+
+ CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
+
+#undef CD
+
+ /*
+ * Force FPU initialization:
+ */
+ current->flags &= ~PF_USEDFPU;
+ current->used_math = 0;
+ stts();
+}
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 32e7c4c56..cc9a992da 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -155,7 +155,7 @@ static inline int restore_i387_hard(struct _fpstate *buf)
{
struct task_struct *tsk = current;
clear_fpu(tsk);
- return __copy_from_user(&tsk->tss.i387.hard, buf, sizeof(*buf));
+ return __copy_from_user(&tsk->thread.i387.hard, buf, sizeof(*buf));
}
static inline int restore_i387(struct _fpstate *buf)
@@ -167,7 +167,7 @@ static inline int restore_i387(struct _fpstate *buf)
if (boot_cpu_data.hard_math)
err = restore_i387_hard(buf);
else
- err = restore_i387_soft(&current->tss.i387.soft, buf);
+ err = restore_i387_soft(&current->thread.i387.soft, buf);
#endif
current->used_math = 1;
return err;
@@ -308,8 +308,8 @@ static inline int save_i387_hard(struct _fpstate * buf)
struct task_struct *tsk = current;
unlazy_fpu(tsk);
- tsk->tss.i387.hard.status = tsk->tss.i387.hard.swd;
- if (__copy_to_user(buf, &tsk->tss.i387.hard, sizeof(*buf)))
+ tsk->thread.i387.hard.status = tsk->thread.i387.hard.swd;
+ if (__copy_to_user(buf, &tsk->thread.i387.hard, sizeof(*buf)))
return -1;
return 1;
}
@@ -328,7 +328,7 @@ static int save_i387(struct _fpstate *buf)
return save_i387_hard(buf);
#else
return boot_cpu_data.hard_math ? save_i387_hard(buf)
- : save_i387_soft(&current->tss.i387.soft, buf);
+ : save_i387_soft(&current->thread.i387.soft, buf);
#endif
}
@@ -354,8 +354,8 @@ setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate,
err |= __put_user(regs->edx, &sc->edx);
err |= __put_user(regs->ecx, &sc->ecx);
err |= __put_user(regs->eax, &sc->eax);
- err |= __put_user(current->tss.trap_no, &sc->trapno);
- err |= __put_user(current->tss.error_code, &sc->err);
+ err |= __put_user(current->thread.trap_no, &sc->trapno);
+ err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user(regs->eip, &sc->eip);
err |= __put_user(regs->xcs, (unsigned int *)&sc->cs);
err |= __put_user(regs->eflags, &sc->eflags);
@@ -370,7 +370,7 @@ setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate,
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
- err |= __put_user(current->tss.cr2, &sc->cr2);
+ err |= __put_user(current->thread.cr2, &sc->cr2);
return err;
}
@@ -687,12 +687,8 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
case SIGQUIT: case SIGILL: case SIGTRAP:
case SIGABRT: case SIGFPE: case SIGSEGV:
- lock_kernel();
- if (current->binfmt
- && current->binfmt->core_dump
- && current->binfmt->core_dump(signr, regs))
+ if (do_coredump(signr, regs))
exit_code |= 0x80;
- unlock_kernel();
/* FALLTHRU */
default:
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index f092d0905..f44234eb7 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -42,7 +42,7 @@
#include <asm/mtrr.h>
#include <asm/msr.h>
-#include "irq.h"
+#include <linux/irq.h>
#define JIFFIE_TIMEOUT 100
@@ -104,7 +104,7 @@ int smp_found_config=0; /* Have we found an SMP box */
unsigned long cpu_present_map = 0; /* Bitmask of physically existing CPUs */
unsigned long cpu_online_map = 0; /* Bitmask of currently online CPUs */
-int smp_num_cpus = 1; /* Total count of live CPUs */
+int smp_num_cpus = 0; /* Total count of live CPUs */
int smp_threads_ready=0; /* Set when the idlers are all forked */
volatile int cpu_number_map[NR_CPUS]; /* which CPU maps to which logical number */
volatile int __cpu_logical_map[NR_CPUS]; /* which logical number maps to which CPU */
@@ -128,6 +128,8 @@ volatile unsigned long ipi_count; /* Number of IPIs delivered */
const char lk_lockmsg[] = "lock from interrupt context at %p\n";
int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
+extern int nr_ioapics;
+extern struct mpc_config_ioapic mp_apics [MAX_IO_APICS];
extern int mp_irq_entries;
extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
extern int mpc_default_type;
@@ -162,14 +164,22 @@ int skip_ioapic_setup = 0; /* 1 if "noapic" boot option passed */
* SMP mode to <NUM>.
*/
-void __init smp_setup(char *str, int *ints)
+static int __init nosmp(char *str)
{
- if (ints && ints[0] > 0)
- max_cpus = ints[1];
- else
- max_cpus = 0;
+ max_cpus = 0;
+ return 1;
+}
+
+__setup("nosmp", nosmp);
+
+static int __init maxcpus(char *str)
+{
+ get_option(&str, &max_cpus);
+ return 1;
}
+__setup("maxcpus=", maxcpus);
+
void ack_APIC_irq(void)
{
/* Clear the IPI */
@@ -225,6 +235,7 @@ static char *mpc_family(int family,int model)
return n;
}
+
/*
* Read the MPC
*/
@@ -257,12 +268,10 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
}
memcpy(str,mpc->mpc_oem,8);
str[8]=0;
- memcpy(ioapic_OEM_ID,str,9);
printk("OEM ID: %s ",str);
memcpy(str,mpc->mpc_productid,12);
str[12]=0;
- memcpy(ioapic_Product_ID,str,13);
printk("Product ID: %s ",str);
printk("APIC at: 0x%lX\n",mpc->mpc_lapic);
@@ -367,11 +376,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
printk("I/O APIC #%d Version %d at 0x%lX.\n",
m->mpc_apicid,m->mpc_apicver,
m->mpc_apicaddr);
- /*
- * we use the first one only currently
- */
- if (ioapics == 1)
- mp_ioapic_addr = m->mpc_apicaddr;
+ mp_apics [nr_ioapics] = *m;
+ if (++nr_ioapics > MAX_IO_APICS)
+ --nr_ioapics;
}
mpt+=sizeof(*m);
count+=sizeof(*m);
@@ -403,9 +410,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
}
}
}
- if (ioapics > 1)
+ if (ioapics > MAX_IO_APICS)
{
- printk("Warning: Multiple IO-APICs not yet supported.\n");
+ printk("Warning: Max I/O APICs exceeded (max %d, found %d).\n", MAX_IO_APICS, ioapics);
printk("Warning: switching to non APIC mode.\n");
skip_ioapic_setup=1;
}
@@ -637,6 +644,8 @@ void __init init_smp_config (void)
#endif
}
+
+
/*
* Trampoline 80x86 program as an array.
*/
@@ -722,7 +731,11 @@ void __init enable_local_APIC(void)
value = apic_read(APIC_SPIV);
value |= (1<<8); /* Enable APIC (bit==1) */
+#if 0
value &= ~(1<<9); /* Enable focus processor (bit==0) */
+#else
+ value |= (1<<9); /* Disable focus processor (bit==1) */
+#endif
value |= 0xff; /* Set spurious IRQ vector to 0xff */
apic_write(APIC_SPIV,value);
@@ -771,18 +784,22 @@ unsigned long __init init_smp_mappings(unsigned long memory_start)
#ifdef CONFIG_X86_IO_APIC
{
- unsigned long ioapic_phys;
-
- if (smp_found_config) {
- ioapic_phys = mp_ioapic_addr;
- } else {
- ioapic_phys = __pa(memory_start);
- memset((void *)memory_start, 0, PAGE_SIZE);
- memory_start += PAGE_SIZE;
+ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+ int i;
+
+ for (i = 0; i < nr_ioapics; i++) {
+ if (smp_found_config) {
+ ioapic_phys = mp_apics[i].mpc_apicaddr;
+ } else {
+ ioapic_phys = __pa(memory_start);
+ memset((void *)memory_start, 0, PAGE_SIZE);
+ memory_start += PAGE_SIZE;
+ }
+ set_fixmap(idx,ioapic_phys);
+ printk("mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx), ioapic_phys);
+ idx++;
}
- set_fixmap(FIX_IO_APIC_BASE,ioapic_phys);
- printk("mapped IOAPIC to %08lx (%08lx)\n",
- fix_to_virt(FIX_IO_APIC_BASE), ioapic_phys);
}
#endif
@@ -870,7 +887,7 @@ void __init smp_callin(void)
int cpucount = 0;
-extern int cpu_idle(void * unused);
+extern int cpu_idle(void);
/*
* Activate a secondary processor.
@@ -882,10 +899,11 @@ int __init start_secondary(void *unused)
* booting is too fragile that we want to limit the
* things done here to the most necessary things.
*/
+ cpu_init();
smp_callin();
while (!atomic_read(&smp_commenced))
/* nothing */ ;
- return cpu_idle(NULL);
+ return cpu_idle();
}
/*
@@ -896,15 +914,6 @@ int __init start_secondary(void *unused)
*/
void __init initialize_secondary(void)
{
- struct thread_struct * p = &current->tss;
-
- /*
- * Load up the LDT and the task register.
- */
- asm volatile("lldt %%ax": :"a" (p->ldt));
- asm volatile("ltr %%ax": :"a" (p->tr));
- stts();
-
/*
* We don't actually need to load the full TSS,
* basically just the stack pointer and the eip.
@@ -914,7 +923,7 @@ void __init initialize_secondary(void)
"movl %0,%%esp\n\t"
"jmp *%1"
:
- :"r" (p->esp),"r" (p->eip));
+ :"r" (current->thread.esp),"r" (current->thread.eip));
}
extern struct {
@@ -922,6 +931,14 @@ extern struct {
unsigned short ss;
} stack_start;
+static int __init fork_by_hand(void)
+{
+ struct pt_regs regs;
+ /* don't care about the eip and regs settings since we'll never
+ reschedule the forked task. */
+ return do_fork(CLONE_VM|CLONE_PID, 0, &regs);
+}
+
static void __init do_boot_cpu(int i)
{
unsigned long cfg;
@@ -931,13 +948,17 @@ static void __init do_boot_cpu(int i)
int timeout, num_starts, j;
unsigned long start_eip;
- /*
- * We need an idle process for each processor.
- */
- kernel_thread(start_secondary, NULL, CLONE_PID);
cpucount++;
+ /* We can't use kernel_thread since we must _avoid_ to reschedule
+ the child. */
+ if (fork_by_hand() < 0)
+ panic("failed fork for CPU %d", i);
- idle = task[cpucount];
+ /*
+ * We remove it from the pidhash and the runqueue
+ * once we got the process:
+ */
+ idle = init_task.prev_task;
if (!idle)
panic("No idle process for CPU %d", i);
@@ -945,7 +966,11 @@ static void __init do_boot_cpu(int i)
__cpu_logical_map[cpucount] = i;
cpu_number_map[i] = cpucount;
idle->has_cpu = 1; /* we schedule the first task manually */
- idle->tss.eip = (unsigned long) start_secondary;
+ idle->thread.eip = (unsigned long) start_secondary;
+
+ del_from_runqueue(idle);
+ unhash_process(idle);
+ init_tasks[cpucount] = idle;
/* start_eip had better be page-aligned! */
start_eip = setup_trampoline();
@@ -1179,7 +1204,6 @@ void __init smp_boot_cpus(void)
/* Must be done before other processors booted */
mtrr_init_boot_cpu ();
#endif
- init_idle();
/*
* Initialize the logical to physical CPU number mapping
* and the per-CPU profiling counter/multiplier
@@ -1210,6 +1234,8 @@ void __init smp_boot_cpus(void)
cpu_number_map[boot_cpu_id] = 0;
+ init_idle();
+
/*
* If we couldnt find an SMP configuration at boot time,
* get out of here now!
@@ -1222,6 +1248,7 @@ void __init smp_boot_cpus(void)
io_apic_irqs = 0;
#endif
cpu_online_map = cpu_present_map;
+ smp_num_cpus = 1;
goto smp_done;
}
@@ -1356,27 +1383,23 @@ void __init smp_boot_cpus(void)
*/
SMP_PRINTK(("Before bogomips.\n"));
- if (cpucount==0)
- {
+ if (!cpucount) {
printk(KERN_ERR "Error: only one processor found.\n");
cpu_online_map = (1<<hard_smp_processor_id());
- }
- else
- {
- unsigned long bogosum=0;
- for(i=0;i<32;i++)
- {
+ } else {
+ unsigned long bogosum = 0;
+ for(i = 0; i < 32; i++)
if (cpu_online_map&(1<<i))
bogosum+=cpu_data[i].loops_per_sec;
- }
printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
cpucount+1,
(bogosum+2500)/500000,
((bogosum+2500)/5000)%100);
SMP_PRINTK(("Before bogocount - setting activated=1.\n"));
- smp_activated=1;
- smp_num_cpus=cpucount+1;
+ smp_activated = 1;
}
+ smp_num_cpus = cpucount + 1;
+
if (smp_b_stepping)
printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
SMP_PRINTK(("Boot done.\n"));
@@ -1392,6 +1415,11 @@ void __init smp_boot_cpus(void)
#endif
smp_done:
+ /*
+ * now we know the other CPUs have fired off and we know our
+ * APIC ID, so we can go init the TSS and stuff:
+ */
+ cpu_init();
}
@@ -1571,8 +1599,7 @@ static inline void send_IPI_single(int dest, int vector)
* bad as in the early days of SMP, so we might ease some of the
* paranoia here.
*/
-
-void smp_flush_tlb(void)
+static void flush_tlb_others(unsigned int cpumask)
{
int cpu = smp_processor_id();
int stuck;
@@ -1582,17 +1609,9 @@ void smp_flush_tlb(void)
* it's important that we do not generate any APIC traffic
* until the AP CPUs have booted up!
*/
- if (cpu_online_map) {
- /*
- * The assignment is safe because it's volatile so the
- * compiler cannot reorder it, because the i586 has
- * strict memory ordering and because only the kernel
- * lock holder may issue a tlb flush. If you break any
- * one of those three change this to an atomic bus
- * locked or.
- */
-
- smp_invalidate_needed = cpu_online_map;
+ cpumask &= cpu_online_map;
+ if (cpumask) {
+ atomic_set_mask(cpumask, &smp_invalidate_needed);
/*
* Processors spinning on some lock with IRQs disabled
@@ -1615,8 +1634,13 @@ void smp_flush_tlb(void)
/*
* Take care of "crossing" invalidates
*/
- if (test_bit(cpu, &smp_invalidate_needed))
- clear_bit(cpu, &smp_invalidate_needed);
+ if (test_bit(cpu, &smp_invalidate_needed)) {
+ struct mm_struct *mm = current->mm;
+ clear_bit(cpu, &smp_invalidate_needed);
+ if (mm)
+ atomic_set_mask(1 << cpu, &mm->cpu_vm_mask);
+ local_flush_tlb();
+ }
--stuck;
if (!stuck) {
printk("stuck on TLB IPI wait (CPU#%d)\n",cpu);
@@ -1625,12 +1649,57 @@ void smp_flush_tlb(void)
}
__restore_flags(flags);
}
+}
- /*
- * Flush the local TLB
- */
+/*
+ * Smarter SMP flushing macros.
+ * c/o Linus Torvalds.
+ *
+ * These mean you can really definitely utterly forget about
+ * writing to user space from interrupts. (Its not allowed anyway).
+ */
+void flush_tlb_current_task(void)
+{
+ unsigned long vm_mask = 1 << current->processor;
+ struct mm_struct *mm = current->mm;
+ unsigned long cpu_mask = mm->cpu_vm_mask & ~vm_mask;
+
+ mm->cpu_vm_mask = vm_mask;
+ flush_tlb_others(cpu_mask);
local_flush_tlb();
+}
+
+void flush_tlb_mm(struct mm_struct * mm)
+{
+ unsigned long vm_mask = 1 << current->processor;
+ unsigned long cpu_mask = mm->cpu_vm_mask & ~vm_mask;
+ mm->cpu_vm_mask = 0;
+ if (current->active_mm == mm) {
+ mm->cpu_vm_mask = vm_mask;
+ local_flush_tlb();
+ }
+ flush_tlb_others(cpu_mask);
+}
+
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+ unsigned long vm_mask = 1 << current->processor;
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long cpu_mask = mm->cpu_vm_mask & ~vm_mask;
+
+ mm->cpu_vm_mask = 0;
+ if (current->active_mm == mm) {
+ __flush_tlb_one(va);
+ mm->cpu_vm_mask = vm_mask;
+ }
+ flush_tlb_others(cpu_mask);
+}
+
+void flush_tlb_all(void)
+{
+ flush_tlb_others(~(1 << current->processor));
+ local_flush_tlb();
}
@@ -1853,13 +1922,24 @@ asmlinkage void smp_reschedule_interrupt(void)
}
/*
- * Invalidate call-back
+ * Invalidate call-back.
+ *
+ * Mark the CPU as a VM user if there is a active
+ * thread holding on to an mm at this time. This
+ * allows us to optimize CPU cross-calls even in the
+ * presense of lazy TLB handling.
*/
asmlinkage void smp_invalidate_interrupt(void)
{
- if (test_and_clear_bit(smp_processor_id(), &smp_invalidate_needed))
- local_flush_tlb();
+ struct task_struct *tsk = current;
+ unsigned int cpu = tsk->processor;
+ if (test_and_clear_bit(cpu, &smp_invalidate_needed)) {
+ struct mm_struct *mm = tsk->mm;
+ if (mm)
+ atomic_set_mask(1 << cpu, &mm->cpu_vm_mask);
+ local_flush_tlb();
+ }
ack_APIC_irq();
}
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 2ab29d479..9d18999a0 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -59,7 +59,7 @@
/*
* for x86_do_profile()
*/
-#include "irq.h"
+#include <linux/irq.h>
unsigned long cpu_hz; /* Detected as we calibrate the TSC */
@@ -547,7 +547,7 @@ static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NUL
#define CALIBRATE_LATCH (5 * LATCH)
#define CALIBRATE_TIME (5 * 1000020/HZ)
-__initfunc(static unsigned long calibrate_tsc(void))
+static unsigned long __init calibrate_tsc(void)
{
/* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
@@ -612,7 +612,7 @@ bad_ctc:
return 0;
}
-__initfunc(void time_init(void))
+void __init time_init(void)
{
xtime.tv_sec = get_cmos_time();
xtime.tv_usec = 0;
@@ -681,8 +681,8 @@ __initfunc(void time_init(void))
co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
/* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */
- setup_x86_irq(CO_IRQ_TIMER, &irq0);
+ setup_irq(CO_IRQ_TIMER, &irq0);
#else
- setup_x86_irq(0, &irq0);
+ setup_irq(0, &irq0);
#endif
}
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index cce35ac80..f3e6f75aa 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -20,6 +20,7 @@
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/spinlock.h>
#ifdef CONFIG_MCA
#include <linux/mca.h>
@@ -29,7 +30,6 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
-#include <asm/spinlock.h>
#include <asm/atomic.h>
#include <asm/debugreg.h>
#include <asm/desc.h>
@@ -42,12 +42,14 @@
#include <asm/lithium.h>
#endif
-#include "irq.h"
+#include <linux/irq.h>
asmlinkage int system_call(void);
asmlinkage void lcall7(void);
+asmlinkage void lcall27(void);
-struct desc_struct default_ldt = { 0, 0 };
+struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 } };
/*
* The IDT has to be page-aligned to simplify the Pentium
@@ -65,10 +67,10 @@ static inline void console_verbose(void)
#define DO_ERROR(trapnr, signr, str, name, tsk) \
asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
{ \
- tsk->tss.error_code = error_code; \
- tsk->tss.trap_no = trapnr; \
- force_sig(signr, tsk); \
+ tsk->thread.error_code = error_code; \
+ tsk->thread.trap_no = trapnr; \
die_if_no_fixup(str,regs,error_code); \
+ force_sig(signr, tsk); \
}
#define DO_VM86_ERROR(trapnr, signr, str, name, tsk) \
@@ -80,8 +82,8 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
goto out; \
/* else fall through */ \
} \
- tsk->tss.error_code = error_code; \
- tsk->tss.trap_no = trapnr; \
+ tsk->thread.error_code = error_code; \
+ tsk->thread.trap_no = trapnr; \
force_sig(signr, tsk); \
die_if_kernel(str,regs,error_code); \
out: \
@@ -143,10 +145,8 @@ static void show_registers(struct pt_regs *regs)
regs->esi, regs->edi, regs->ebp, esp);
printk("ds: %04x es: %04x ss: %04x\n",
regs->xds & 0xffff, regs->xes & 0xffff, ss);
- store_TR(i);
- printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
- current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current);
-
+ printk("Process %s (pid: %d, stackpage=%08lx)",
+ current->comm, current->pid, 4096+(unsigned long)current);
/*
* When in-kernel, we also print out the stack and code at the
* time of the fault..
@@ -201,6 +201,9 @@ void die(const char * str, struct pt_regs * regs, long err)
spin_lock_irq(&die_lock);
printk("%s: %04lx\n", str, err & 0xffff);
show_registers(regs);
+
+spin_lock_irq(&die_lock);
+
spin_unlock_irq(&die_lock);
do_exit(SIGSEGV);
}
@@ -249,8 +252,8 @@ asmlinkage void cache_flush_denied(struct pt_regs * regs, long error_code)
return;
}
die_if_kernel("cache flush denied",regs,error_code);
- current->tss.error_code = error_code;
- current->tss.trap_no = 19;
+ current->thread.error_code = error_code;
+ current->thread.trap_no = 19;
force_sig(SIGSEGV, current);
}
@@ -262,8 +265,8 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
if (!(regs->xcs & 3))
goto gp_in_kernel;
- current->tss.error_code = error_code;
- current->tss.trap_no = 13;
+ current->thread.error_code = error_code;
+ current->thread.trap_no = 13;
force_sig(SIGSEGV, current);
return;
@@ -354,11 +357,17 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
unsigned int condition;
struct task_struct *tsk = current;
+ __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
+
+ /* Mask out spurious debug traps due to lazy DR7 setting */
+ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+ if (!tsk->thread.debugreg[7])
+ goto clear_dr7;
+ }
+
if (regs->eflags & VM_MASK)
goto debug_vm86;
- __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
-
/* Mask out spurious TF errors due to lazy TF clearing */
if (condition & DR_STEP) {
/*
@@ -374,19 +383,13 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
goto clear_TF;
}
- /* Mast out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->tss.debugreg[7])
- goto clear_dr7;
- }
-
/* If this is a kernel mode trap, we need to reset db7 to allow us to continue sanely */
if ((regs->xcs & 3) == 0)
goto clear_dr7;
/* Ok, finally something we can handle */
- tsk->tss.trap_no = 1;
- tsk->tss.error_code = error_code;
+ tsk->thread.trap_no = 1;
+ tsk->thread.error_code = error_code;
force_sig(SIGTRAP, tsk);
return;
@@ -422,8 +425,8 @@ void math_error(void)
*/
task = current;
save_fpu(task);
- task->tss.trap_no = 16;
- task->tss.error_code = 0;
+ task->thread.trap_no = 16;
+ task->thread.error_code = 0;
force_sig(SIGFPE, task);
}
@@ -453,7 +456,7 @@ asmlinkage void math_state_restore(struct pt_regs regs)
{
__asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */
if(current->used_math)
- __asm__("frstor %0": :"m" (current->tss.i387));
+ __asm__("frstor %0": :"m" (current->thread.i387));
else
{
/*
@@ -479,13 +482,14 @@ asmlinkage void math_emulate(long arg)
#endif /* CONFIG_MATH_EMULATION */
-__initfunc(void trap_init_f00f_bug(void))
+void __init trap_init_f00f_bug(void)
{
unsigned long page;
pgd_t * pgd;
pmd_t * pmd;
pte_t * pte;
+return;
/*
* Allocate a new page in virtual address space,
* move the IDT into it and write protect this page.
@@ -570,12 +574,12 @@ __asm__ __volatile__ ("movw %3,0(%2)\n\t" \
void set_tss_desc(unsigned int n, void *addr)
{
- _set_tssldt_desc(gdt_table+FIRST_TSS_ENTRY+(n<<1), (int)addr, 235, 0x89);
+ _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89);
}
void set_ldt_desc(unsigned int n, void *addr, unsigned int size)
{
- _set_tssldt_desc(gdt_table+FIRST_LDT_ENTRY+(n<<1), (int)addr, ((size << 3) - 1), 0x82);
+ _set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82);
}
#ifdef CONFIG_X86_VISWS_APIC
@@ -672,7 +676,7 @@ void __init trap_init(void)
{
if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
EISA_bus = 1;
- set_call_gate(&default_ldt,lcall7);
+
set_trap_gate(0,&divide_error);
set_trap_gate(1,&debug);
set_trap_gate(2,&nmi);
@@ -693,14 +697,22 @@ void __init trap_init(void)
set_trap_gate(17,&alignment_check);
set_system_gate(SYSCALL_VECTOR,&system_call);
- /* set up GDT task & ldt entries */
- set_tss_desc(0, &init_task.tss);
- set_ldt_desc(0, &default_ldt, 1);
+ /*
+ * default LDT is a single-entry callgate to lcall7 for iBCS
+ * and a callgate to lcall27 for Solaris/x86 binaries
+ */
+ set_call_gate(&default_ldt[0],lcall7);
+ set_call_gate(&default_ldt[4],lcall27);
+
+ /*
+ * on SMP we do not yet know which CPU is on which TSS,
+ * so we delay this until smp_init(). (the CPU is already
+ * in a reasonable state, otherwise we wouldnt have gotten so far :)
+ */
+#ifndef __SMP__
+ cpu_init();
+#endif
- /* Clear NT, so that we won't have troubles with that later on */
- __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
- load_TR(0);
- load_ldt(0);
#ifdef CONFIG_X86_VISWS_APIC
superio_init();
lithium_init();
diff --git a/arch/i386/kernel/visws_apic.c b/arch/i386/kernel/visws_apic.c
index c12054689..de79fe61e 100644
--- a/arch/i386/kernel/visws_apic.c
+++ b/arch/i386/kernel/visws_apic.c
@@ -23,7 +23,6 @@
#include <linux/malloc.h>
#include <linux/random.h>
#include <linux/smp.h>
-#include <linux/tasks.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
@@ -103,7 +102,7 @@ static struct hw_interrupt_type cobalt_irq_type = {
/*
- * Not an initfunc, needed by the reboot code
+ * Not an __init, needed by the reboot code
*/
void init_pic_mode(void)
{
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index d181dc699..65dd7e9da 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -48,8 +48,8 @@
/*
* virtual flags (16 and 32-bit versions)
*/
-#define VFLAGS (*(unsigned short *)&(current->tss.v86flags))
-#define VEFLAGS (current->tss.v86flags)
+#define VFLAGS (*(unsigned short *)&(current->thread.v86flags))
+#define VEFLAGS (current->thread.v86flags)
#define set_flags(X,new,mask) \
((X) = ((X) & ~(mask)) | ((new) & (mask)))
@@ -65,25 +65,27 @@
asmlinkage struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs)
{
+ struct tss_struct *tss;
struct pt_regs *ret;
unsigned long tmp;
lock_kernel();
- if (!current->tss.vm86_info) {
+ if (!current->thread.vm86_info) {
printk("no vm86_info: BAD\n");
do_exit(SIGSEGV);
}
- set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->tss.v86mask);
- tmp = copy_to_user(&current->tss.vm86_info->regs,regs, VM86_REGS_SIZE1);
- tmp += copy_to_user(&current->tss.vm86_info->regs.VM86_REGS_PART2,
+ set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
+ tmp = copy_to_user(&current->thread.vm86_info->regs,regs, VM86_REGS_SIZE1);
+ tmp += copy_to_user(&current->thread.vm86_info->regs.VM86_REGS_PART2,
&regs->VM86_REGS_PART2, VM86_REGS_SIZE2);
- tmp += put_user(current->tss.screen_bitmap,&current->tss.vm86_info->screen_bitmap);
+ tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
if (tmp) {
printk("vm86: could not access userspace vm86_info\n");
do_exit(SIGSEGV);
}
- current->tss.esp0 = current->tss.saved_esp0;
- current->tss.saved_esp0 = 0;
+ tss = init_tss + smp_processor_id();
+ tss->esp0 = current->thread.esp0 = current->thread.saved_esp0;
+ current->thread.saved_esp0 = 0;
ret = KVM86->regs32;
unlock_kernel();
return ret;
@@ -138,7 +140,7 @@ asmlinkage int sys_vm86old(struct vm86_struct * v86)
lock_kernel();
tsk = current;
- if (tsk->tss.saved_esp0)
+ if (tsk->thread.saved_esp0)
goto out;
tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
@@ -148,7 +150,7 @@ asmlinkage int sys_vm86old(struct vm86_struct * v86)
goto out;
memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
info.regs32 = (struct pt_regs *) &v86;
- tsk->tss.vm86_info = v86;
+ tsk->thread.vm86_info = v86;
do_sys_vm86(&info, tsk);
ret = 0; /* we never return here */
out:
@@ -188,7 +190,7 @@ asmlinkage int sys_vm86(unsigned long subfunction, struct vm86plus_struct * v86)
/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
ret = -EPERM;
- if (tsk->tss.saved_esp0)
+ if (tsk->thread.saved_esp0)
goto out;
tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1);
tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
@@ -198,7 +200,7 @@ asmlinkage int sys_vm86(unsigned long subfunction, struct vm86plus_struct * v86)
goto out;
info.regs32 = (struct pt_regs *) &subfunction;
info.vm86plus.is_vm86pus = 1;
- tsk->tss.vm86_info = (struct vm86_struct *)v86;
+ tsk->thread.vm86_info = (struct vm86_struct *)v86;
do_sys_vm86(&info, tsk);
ret = 0; /* we never return here */
out:
@@ -209,6 +211,7 @@ out:
static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
{
+ struct tss_struct *tss;
/*
* make sure the vm86() system call doesn't try to do anything silly
*/
@@ -231,16 +234,16 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
switch (info->cpu_type) {
case CPU_286:
- tsk->tss.v86mask = 0;
+ tsk->thread.v86mask = 0;
break;
case CPU_386:
- tsk->tss.v86mask = NT_MASK | IOPL_MASK;
+ tsk->thread.v86mask = NT_MASK | IOPL_MASK;
break;
case CPU_486:
- tsk->tss.v86mask = AC_MASK | NT_MASK | IOPL_MASK;
+ tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK;
break;
default:
- tsk->tss.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
+ tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
break;
}
@@ -248,10 +251,11 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
* Save old state, set default return value (%eax) to 0
*/
info->regs32->eax = 0;
- tsk->tss.saved_esp0 = tsk->tss.esp0;
- tsk->tss.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
+ tsk->thread.saved_esp0 = tsk->thread.esp0;
+ tss = init_tss + smp_processor_id();
+ tss->esp0 = tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
- tsk->tss.screen_bitmap = info->screen_bitmap;
+ tsk->thread.screen_bitmap = info->screen_bitmap;
if (info->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk);
unlock_kernel();
@@ -295,7 +299,7 @@ static inline void clear_TF(struct kernel_vm86_regs * regs)
static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
{
- set_flags(VEFLAGS, eflags, current->tss.v86mask);
+ set_flags(VEFLAGS, eflags, current->thread.v86mask);
set_flags(regs->eflags, eflags, SAFE_MASK);
if (eflags & IF_MASK)
set_IF(regs);
@@ -303,7 +307,7 @@ static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs
static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
{
- set_flags(VFLAGS, flags, current->tss.v86mask);
+ set_flags(VFLAGS, flags, current->thread.v86mask);
set_flags(regs->eflags, flags, SAFE_MASK);
if (flags & IF_MASK)
set_IF(regs);
@@ -315,7 +319,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
if (VEFLAGS & VIF_MASK)
flags |= IF_MASK;
- return flags | (VEFLAGS & current->tss.v86mask);
+ return flags | (VEFLAGS & current->thread.v86mask);
}
static inline int is_revectored(int nr, struct revectored_struct * bitmap)
@@ -447,8 +451,8 @@ int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno
spin_unlock_irqrestore(&current->sigmask_lock, flags);
}
send_sig(SIGTRAP, current, 1);
- current->tss.trap_no = trapno;
- current->tss.error_code = error_code;
+ current->thread.trap_no = trapno;
+ current->thread.error_code = error_code;
return 0;
}