summaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1999-02-15 02:15:32 +0000
committerRalf Baechle <ralf@linux-mips.org>1999-02-15 02:15:32 +0000
commit86464aed71025541805e7b1515541aee89879e33 (patch)
treee01a457a4912a8553bc65524aa3125d51f29f810 /arch/i386
parent88f99939ecc6a95a79614574cb7d95ffccfc3466 (diff)
Merge with Linux 2.2.1.
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Makefile4
-rw-r--r--arch/i386/boot/Makefile2
-rw-r--r--arch/i386/config.in64
-rw-r--r--arch/i386/defconfig79
-rw-r--r--arch/i386/kernel/Makefile12
-rw-r--r--arch/i386/kernel/apm.c206
-rw-r--r--arch/i386/kernel/bios32.c6
-rw-r--r--arch/i386/kernel/entry.S10
-rw-r--r--arch/i386/kernel/head.S12
-rw-r--r--arch/i386/kernel/i386_ksyms.c5
-rw-r--r--arch/i386/kernel/io_apic.c15
-rw-r--r--arch/i386/kernel/ioport.c10
-rw-r--r--arch/i386/kernel/irq.c403
-rw-r--r--arch/i386/kernel/irq.h58
-rw-r--r--arch/i386/kernel/mtrr.c8
-rw-r--r--arch/i386/kernel/process.c71
-rw-r--r--arch/i386/kernel/ptrace.c8
-rw-r--r--arch/i386/kernel/setup.c325
-rw-r--r--arch/i386/kernel/smp.c838
-rw-r--r--arch/i386/kernel/sys_i386.c96
-rw-r--r--arch/i386/kernel/time.c194
-rw-r--r--arch/i386/kernel/traps.c121
-rw-r--r--arch/i386/kernel/visws_apic.c407
-rw-r--r--arch/i386/lib/Makefile3
-rw-r--r--arch/i386/lib/checksum.S447
-rw-r--r--arch/i386/lib/checksum.c459
-rw-r--r--arch/i386/lib/delay.c12
-rw-r--r--arch/i386/lib/old-checksum.c19
-rw-r--r--arch/i386/lib/usercopy.c28
-rw-r--r--arch/i386/mm/init.c62
-rw-r--r--arch/i386/vmlinux.lds4
31 files changed, 2649 insertions, 1339 deletions
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index 3e5c4e7ff..322b53210 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -35,6 +35,10 @@ ifdef CONFIG_M586
CFLAGS := $(CFLAGS) -m486 -malign-loops=2 -malign-jumps=2 -malign-functions=2 -DCPU=586
endif
+ifdef CONFIG_M586TSC
+CFLAGS := $(CFLAGS) -m486 -malign-loops=2 -malign-jumps=2 -malign-functions=2 -DCPU=586
+endif
+
ifdef CONFIG_M686
CFLAGS := $(CFLAGS) -m486 -malign-loops=2 -malign-jumps=2 -malign-functions=2 -DCPU=686
endif
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index ff26d087c..64b9377a2 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -40,7 +40,7 @@ zlilo: $(CONFIGURE) $(BOOTIMAGE)
if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
install: $(CONFIGURE) $(BOOTIMAGE)
- sh -x ./install.sh $(VERSION).$(PATCHLEVEL).$(SUBLEVEL) $(BOOTIMAGE) $(TOPDIR)/System.map "$(INSTALL_PATH)"
+ sh -x ./install.sh $(KERNELRELEASE) $(BOOTIMAGE) $(TOPDIR)/System.map "$(INSTALL_PATH)"
tools/build: tools/build.c
$(HOSTCC) $(HOSTCFLAGS) -o $@ $< -I$(TOPDIR)/include
diff --git a/arch/i386/config.in b/arch/i386/config.in
index a22692bca..3c42052e4 100644
--- a/arch/i386/config.in
+++ b/arch/i386/config.in
@@ -14,12 +14,28 @@ comment 'Processor type and features'
choice 'Processor family' \
"386 CONFIG_M386 \
486/Cx486 CONFIG_M486 \
- Pentium/K5/5x86/6x86 CONFIG_M586 \
- PPro/K6/6x86MX CONFIG_M686" Pentium
-bool 'Math emulation' CONFIG_MATH_EMULATION
-if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
- bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
+ 586/K5/5x86/6x86 CONFIG_M586 \
+ Pentium/K6/TSC CONFIG_M586TSC \
+ PPro/6x86MX CONFIG_M686" PPro
+#
+# Define implied options from the CPU selection here
+#
+if [ "$CONFIG_M386" != "y" ]; then
+ define_bool CONFIG_X86_WP_WORKS_OK y
+ define_bool CONFIG_X86_INVLPG y
+ define_bool CONFIG_X86_BSWAP y
+ define_bool CONFIG_X86_POPAD_OK y
+fi
+if [ "$CONFIG_M686" = "y" -o "$CONFIG_M586TSC" = "y" ]; then
+ define_bool CONFIG_X86_TSC y
fi
+if [ "$CONFIG_M686" = "y" ]; then
+ define_bool CONFIG_X86_GOOD_APIC y
+fi
+
+bool 'Math emulation' CONFIG_MATH_EMULATION
+bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
+bool 'Symmetric multi-processing support' CONFIG_SMP
endmenu
mainmenu_option next_comment
@@ -37,8 +53,16 @@ comment 'General setup'
bool 'Networking support' CONFIG_NET
bool 'PCI support' CONFIG_PCI
if [ "$CONFIG_PCI" = "y" ]; then
- bool ' PCI BIOS support' CONFIG_PCI_BIOS
- bool ' PCI direct access support' CONFIG_PCI_DIRECT
+ choice 'PCI access mode' \
+ "BIOS CONFIG_PCI_GOBIOS \
+ Direct CONFIG_PCI_GODIRECT \
+ Any CONFIG_PCI_GOANY" Any
+ if [ "$CONFIG_PCI_GOBIOS" = "y" -o "$CONFIG_PCI_GOANY" = "y" ]; then
+ define_bool CONFIG_PCI_BIOS y
+ fi
+ if [ "$CONFIG_PCI_GODIRECT" = "y" -o "$CONFIG_PCI_GOANY" = "y" ]; then
+ define_bool CONFIG_PCI_DIRECT y
+ fi
bool ' PCI quirks' CONFIG_PCI_QUIRKS
if [ "$CONFIG_PCI_QUIRKS" = "y" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then
bool ' PCI bridge optimization (experimental)' CONFIG_PCI_OPTIMIZE
@@ -46,6 +70,17 @@ if [ "$CONFIG_PCI" = "y" ]; then
bool ' Backward-compatible /proc/pci' CONFIG_PCI_OLD_PROC
fi
bool 'MCA support' CONFIG_MCA
+bool 'SGI Visual Workstation support' CONFIG_VISWS
+if [ "$CONFIG_VISWS" = "y" ]; then
+ define_bool CONFIG_X86_VISWS_APIC y
+ define_bool CONFIG_X86_LOCAL_APIC y
+else
+ if [ "$CONFIG_SMP" = "y" ]; then
+ define_bool CONFIG_X86_IO_APIC y
+ define_bool CONFIG_X86_LOCAL_APIC y
+ fi
+fi
+
bool 'System V IPC' CONFIG_SYSVIPC
bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT
bool 'Sysctl support' CONFIG_SYSCTL
@@ -73,6 +108,8 @@ if [ "$CONFIG_APM" = "y" ]; then
bool ' Power off on shutdown' CONFIG_APM_POWER_OFF
bool ' Ignore multiple suspend' CONFIG_APM_IGNORE_MULTIPLE_SUSPEND
bool ' Ignore multiple suspend/resume cycles' CONFIG_APM_IGNORE_SUSPEND_BOUNCE
+ bool ' RTC stores time in GMT' CONFIG_APM_RTC_IS_GMT
+ bool ' Allow interrupts during APM BIOS calls' CONFIG_APM_ALLOW_INTS
fi
endmenu
@@ -108,12 +145,15 @@ fi
source net/ax25/Config.in
+source net/irda/Config.in
+
mainmenu_option next_comment
comment 'ISDN subsystem'
-
-tristate 'ISDN support' CONFIG_ISDN
-if [ "$CONFIG_ISDN" != "n" ]; then
- source drivers/isdn/Config.in
+if [ "$CONFIG_NET" != "n" ]; then
+ tristate 'ISDN support' CONFIG_ISDN
+ if [ "$CONFIG_ISDN" != "n" ]; then
+ source drivers/isdn/Config.in
+ fi
fi
endmenu
@@ -130,8 +170,6 @@ source drivers/char/Config.in
source fs/Config.in
-source fs/nls/Config.in
-
if [ "$CONFIG_VT" = "y" ]; then
mainmenu_option next_comment
comment 'Console drivers'
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 78351e2d5..200716f59 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -12,9 +12,18 @@
#
# CONFIG_M386 is not set
# CONFIG_M486 is not set
-CONFIG_M586=y
-# CONFIG_M686 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+CONFIG_M686=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_TSC=y
+CONFIG_X86_GOOD_APIC=y
# CONFIG_MATH_EMULATION is not set
+# CONFIG_MTRR is not set
+CONFIG_SMP=y
#
# Loadable module support
@@ -28,11 +37,17 @@ CONFIG_MODULES=y
#
CONFIG_NET=y
CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
CONFIG_PCI_BIOS=y
CONFIG_PCI_DIRECT=y
CONFIG_PCI_QUIRKS=y
CONFIG_PCI_OLD_PROC=y
# CONFIG_MCA is not set
+# CONFIG_VISWS is not set
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_LOCAL_APIC=y
CONFIG_SYSVIPC=y
# CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_SYSCTL=y
@@ -139,6 +154,7 @@ CONFIG_SCSI_CONSTANTS=y
# SCSI low-level drivers
#
# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AHA152X is not set
# CONFIG_SCSI_AHA1542 is not set
# CONFIG_SCSI_AHA1740 is not set
@@ -146,14 +162,16 @@ CONFIG_SCSI_CONSTANTS=y
# CONFIG_SCSI_ADVANSYS is not set
# CONFIG_SCSI_IN2000 is not set
# CONFIG_SCSI_AM53C974 is not set
+# CONFIG_SCSI_MEGARAID is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_EATA_DMA is not set
# CONFIG_SCSI_EATA_PIO is not set
-# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_NCR53C406A is not set
# CONFIG_SCSI_NCR53C7xx is not set
CONFIG_SCSI_NCR53C8XX=y
@@ -206,6 +224,8 @@ CONFIG_EEXPRESS_PRO100=y
# CONFIG_NET_RADIO is not set
# CONFIG_TR is not set
# CONFIG_HOSTESS_SV11 is not set
+# CONFIG_COSA is not set
+# CONFIG_RCPCI is not set
# CONFIG_WAN_DRIVERS is not set
# CONFIG_LAPBETHER is not set
# CONFIG_X25_ASY is not set
@@ -237,6 +257,10 @@ CONFIG_SERIAL=y
CONFIG_UNIX98_PTYS=y
CONFIG_UNIX98_PTY_COUNT=256
CONFIG_MOUSE=y
+
+#
+# Mice
+#
# CONFIG_ATIXL_BUSMOUSE is not set
# CONFIG_BUSMOUSE is not set
# CONFIG_MS_BUSMOUSE is not set
@@ -245,9 +269,17 @@ CONFIG_82C710_MOUSE=y
# CONFIG_PC110_PAD is not set
# CONFIG_QIC02_TAPE is not set
# CONFIG_WATCHDOG is not set
+# CONFIG_NVRAM is not set
# CONFIG_RTC is not set
+
+#
+# Video For Linux
+#
# CONFIG_VIDEO_DEV is not set
-# CONFIG_NVRAM is not set
+
+#
+# Joystick support
+#
# CONFIG_JOYSTICK is not set
#
@@ -259,36 +291,43 @@ CONFIG_82C710_MOUSE=y
# Filesystems
#
# CONFIG_QUOTA is not set
-# CONFIG_MINIX_FS is not set
-CONFIG_EXT2_FS=y
-CONFIG_ISO9660_FS=y
-# CONFIG_JOLIET is not set
+CONFIG_AUTOFS_FS=y
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
# CONFIG_FAT_FS is not set
# CONFIG_MSDOS_FS is not set
# CONFIG_UMSDOS_FS is not set
# CONFIG_VFAT_FS is not set
+CONFIG_ISO9660_FS=y
+# CONFIG_JOLIET is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_HPFS_FS is not set
CONFIG_PROC_FS=y
+CONFIG_DEVPTS_FS=y
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+# CONFIG_CODA_FS is not set
CONFIG_NFS_FS=y
-CONFIG_NFSD=y
# CONFIG_NFSD_SUN is not set
CONFIG_SUNRPC=y
CONFIG_LOCKD=y
-# CONFIG_CODA_FS is not set
# CONFIG_SMB_FS is not set
# CONFIG_NCP_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_NTFS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_AUTOFS_FS=y
-# CONFIG_UFS_FS is not set
+
+#
+# Partition Types
+#
# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MAC_PARTITION is not set
# CONFIG_SMD_DISKLABEL is not set
# CONFIG_SOLARIS_X86_PARTITION is not set
-CONFIG_DEVPTS_FS=y
-# CONFIG_MAC_PARTITION is not set
# CONFIG_NLS is not set
#
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index b8a94af18..0c3f24889 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -38,8 +38,16 @@ ifdef CONFIG_APM
OX_OBJS += apm.o
endif
-ifdef SMP
-O_OBJS += io_apic.o smp.o trampoline.o
+ifdef CONFIG_SMP
+O_OBJS += smp.o trampoline.o
+endif
+
+ifdef CONFIG_X86_IO_APIC
+O_OBJS += io_apic.o
+endif
+
+ifdef CONFIG_X86_VISWS_APIC
+O_OBJS += visws_apic.o
endif
head.o: head.S $(TOPDIR)/include/linux/tasks.h
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 1cbc824f5..6eba89025 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -31,6 +31,8 @@
* Aug 1998, Version 1.5
* Sep 1998, Version 1.6
* Nov 1998, Version 1.7
+ * Jan 1999, Version 1.8
+ * Jan 1999, Version 1.9
*
* History:
* 0.6b: first version in official kernel, Linux 1.3.46
@@ -72,6 +74,22 @@
* Make boot messages far less verbose by default
* Make asm safer
* Stephen Rothwell
+ * 1.8: Add CONFIG_APM_RTC_IS_GMT
+ * Richard Gooch <rgooch@atnf.csiro.au>
+ * change APM_NOINTS to CONFIG_APM_ALLOW_INTS
+ * remove dependency on CONFIG_PROC_FS
+ * Stephen Rothwell
+ * 1.9: Fix small typo. <laslo@ilo.opole.pl>
+ * Try to cope with BIOS's that need to have all display
+ * devices blanked and not just the first one.
+ * Ross Paterson <ross@soi.city.ac.uk>
+ * Fix segment limit setting it has always been wrong as
+ * the segments needed to have byte granularity.
+ * Mark a few things __init.
+ * Add hack to allow power off of SMP systems by popular request.
+ * Use CONFIG_SMP instead of __SMP__
+ * Ignore BOUNCES for three seconds.
+ * Stephen Rothwell
*
* APM 1.1 Reference:
*
@@ -105,10 +123,8 @@
#include <linux/fcntl.h>
#include <linux/malloc.h>
#include <linux/linkage.h>
-#ifdef CONFIG_PROC_FS
#include <linux/stat.h>
#include <linux/proc_fs.h>
-#endif
#include <linux/miscdevice.h>
#include <linux/apm_bios.h>
#include <linux/init.h>
@@ -202,13 +218,6 @@ extern unsigned long get_cmos_time(void);
#define ALWAYS_CALL_BUSY
/*
- * Define to disable interrupts in APM BIOS calls (the CPU Idle BIOS call
- * should turn interrupts on before it does a 'hlt').
- * This reportedly needs undefining for the ThinkPad 600.
- */
-#define APM_NOINTS
-
-/*
* Define to make the APM BIOS calls zero all data segment registers (so
* that an incorrect BIOS implementation will cause a kernel panic if it
* tries to write to arbitrary memory).
@@ -216,7 +225,7 @@ extern unsigned long get_cmos_time(void);
#define APM_ZERO_SEGS
/*
- * Define to make all set_limit calls use 64k limits. The APM 1.1 BIOS is
+ * Define to make all _set_limit calls use 64k limits. The APM 1.1 BIOS is
* supposed to provide limit information that it recognizes. Many machines
* do this correctly, but many others do not restrict themselves to their
* claimed limit. When this happens, they will cause a segmentation
@@ -245,6 +254,12 @@ extern unsigned long get_cmos_time(void);
#define APM_CHECK_TIMEOUT (HZ)
/*
+ * If CONFIG_APM_IGNORE_SUSPEND_BOUNCE is defined then
+ * ignore suspend events for this amount of time
+ */
+#define BOUNCE_INTERVAL (3 * HZ)
+
+/*
* Save a segment register away
*/
#define savesegment(seg, where) \
@@ -266,9 +281,7 @@ static ssize_t do_read(struct file *, char *, size_t , loff_t *);
static unsigned int do_poll(struct file *, poll_table *);
static int do_ioctl(struct inode *, struct file *, u_int, u_long);
-#ifdef CONFIG_PROC_FS
static int apm_get_info(char *, char **, off_t, int, int);
-#endif
extern int apm_register_callback(int (*)(apm_event_t));
extern void apm_unregister_callback(int (*)(apm_event_t));
@@ -281,6 +294,7 @@ static asmlinkage struct {
unsigned short segment;
} apm_bios_entry;
static int apm_enabled = 0;
+static int smp_hack = 0;
#ifdef CONFIG_APM_CPU_IDLE
static int clock_slowed = 0;
#endif
@@ -290,8 +304,13 @@ static int standbys_pending = 0;
static int waiting_for_resume = 0;
#endif
+#ifdef CONFIG_APM_RTC_IS_GMT
+# define clock_cmos_diff 0
+# define got_clock_diff 1
+#else
static long clock_cmos_diff;
static int got_clock_diff = 0;
+#endif
static int debug = 0;
static int apm_disabled = 0;
@@ -300,7 +319,7 @@ static struct apm_bios_struct * user_list = NULL;
static struct timer_list apm_timer;
-static char driver_version[] = "1.7"; /* no spaces */
+static char driver_version[] = "1.9"; /* no spaces */
#ifdef APM_DEBUG
static char * apm_event_name[] = {
@@ -375,22 +394,22 @@ static const lookup_t error_table[] = {
#define ERROR_COUNT (sizeof(error_table)/sizeof(lookup_t))
/*
- * These are the actual BIOS calls. Depending on APM_ZERO_SEGS
- * and APM_NOINTS, we are being really paranoid here! Not only are
- * interrupts disabled, but all the segment registers (except SS) are
- * saved and zeroed this means that if the BIOS tries to reference any
- * data without explicitly loading the segment registers, the kernel will
- * fault immediately rather than have some unforeseen circumstances for
- * the rest of the kernel. And it will be very obvious! :-) Doing this
- * depends on CS referring to the same physical memory as DS so that DS
- * can be zeroed before the call. Unfortunately, we can't do anything
+ * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and
+ * CONFIG_APM_ALLOW_INTS, we are being really paranoid here! Not only
+ * are interrupts disabled, but all the segment registers (except SS)
+ * are saved and zeroed this means that if the BIOS tries to reference
+ * any data without explicitly loading the segment registers, the kernel
+ * will fault immediately rather than have some unforeseen circumstances
+ * for the rest of the kernel. And it will be very obvious! :-) Doing
+ * this depends on CS referring to the same physical memory as DS so that
+ * DS can be zeroed before the call. Unfortunately, we can't do anything
* about the stack segment/pointer. Also, we tell the compiler that
* everything could change.
*
* Also, we KNOW that for the non error case of apm_bios_call, there
* is no useful data returned in the low order 8 bits of eax.
*/
-#ifdef APM_NOINTS
+#ifndef CONFIG_APM_ALLOW_INTS
# define APM_DO_CLI __cli()
#else
# define APM_DO_CLI
@@ -526,7 +545,15 @@ static int apm_set_power_state(u_short state)
void apm_power_off(void)
{
- if (apm_enabled)
+ /*
+ * smp_hack == 2 means that we would have enabled APM support
+ * except there is more than one processor and so most of
+ * the APM stuff is unsafe. We will still try power down
+ * because is is useful to some people and they know what
+ * they are doing because they booted with the smp-power-off
+ * kernel option.
+ */
+ if (apm_enabled || (smp_hack == 2))
(void) apm_set_power_state(APM_STATE_OFF);
}
@@ -534,12 +561,19 @@ void apm_power_off(void)
/* Called by apm_display_blank and apm_display_unblank when apm_enabled. */
static int apm_set_display_power_state(u_short state)
{
- return set_power_state(0x0100, state);
+ int error;
+
+ /* Blank the first display device */
+ error = set_power_state(0x0100, state);
+ if (error == APM_BAD_DEVICE)
+ /* try to blank them all instead */
+ error = set_power_state(0x01ff, state);
+ return error;
}
#endif
#ifdef CONFIG_APM_DO_ENABLE
-static int apm_enable_power_management(void)
+static int __init apm_enable_power_management(void)
{
u32 eax;
@@ -568,12 +602,9 @@ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
return APM_SUCCESS;
}
-#if 0
-/* not used anywhere */
-static int apm_get_battery_status(u_short which,
+static int apm_get_battery_status(u_short which, u_short *status,
u_short *bat, u_short *life, u_short *nbat)
{
- u_short status;
u32 eax;
u32 ebx;
u32 ecx;
@@ -585,20 +616,20 @@ static int apm_get_battery_status(u_short which,
if (which != 1)
return APM_BAD_DEVICE;
*nbat = 1;
- return apm_get_power_status(&status, bat, life);
+ return apm_get_power_status(status, bat, life);
}
if (apm_bios_call(0x530a, (0x8000 | (which)), 0, &eax,
&ebx, &ecx, &edx, &esi))
return (eax >> 8) & 0xff;
+ *status = ebx;
*bat = ecx;
*life = edx;
*nbat = esi;
return APM_SUCCESS;
}
-#endif
-static int apm_engage_power_management(u_short device)
+static int __init apm_engage_power_management(u_short device)
{
u32 eax;
@@ -747,14 +778,17 @@ static void suspend(void)
unsigned long flags;
int err;
- /* Estimate time zone so that set_time can
- update the clock */
+#ifndef CONFIG_APM_RTC_IS_GMT
+ /*
+ * Estimate time zone so that set_time can update the clock
+ */
save_flags(flags);
clock_cmos_diff = -get_cmos_time();
cli();
clock_cmos_diff += CURRENT_TIME;
got_clock_diff = 1;
restore_flags(flags);
+#endif
err = apm_set_power_state(APM_STATE_SUSPEND);
if (err)
@@ -826,7 +860,7 @@ static void check_events(void)
apm_event_t event;
#ifdef CONFIG_APM_IGNORE_SUSPEND_BOUNCE
static unsigned long last_resume = 0;
- static int did_resume = 0;
+ static int ignore_bounce = 0;
#endif
while ((event = get_event()) != 0) {
@@ -838,6 +872,11 @@ static void check_events(void)
printk(KERN_DEBUG "apm: received unknown "
"event 0x%02x\n", event);
#endif
+#ifdef CONFIG_APM_IGNORE_SUSPEND_BOUNCE
+ if (ignore_bounce
+ && ((jiffies - last_resume) > BOUNCE_INTERVAL))
+ ignore_bounce = 0;
+#endif
switch (event) {
case APM_SYS_STANDBY:
case APM_USER_STANDBY:
@@ -859,7 +898,7 @@ static void check_events(void)
#endif
case APM_SYS_SUSPEND:
#ifdef CONFIG_APM_IGNORE_SUSPEND_BOUNCE
- if (did_resume && ((jiffies - last_resume) < HZ))
+ if (ignore_bounce)
break;
#endif
#ifdef CONFIG_APM_IGNORE_MULTIPLE_SUSPEND
@@ -880,7 +919,7 @@ static void check_events(void)
#endif
#ifdef CONFIG_APM_IGNORE_SUSPEND_BOUNCE
last_resume = jiffies;
- did_resume = 1;
+ ignore_bounce = 1;
#endif
set_time();
send_event(event, 0, NULL);
@@ -1139,13 +1178,13 @@ static int do_open(struct inode * inode, struct file * filp)
return 0;
}
-#ifdef CONFIG_PROC_FS
int apm_get_info(char *buf, char **start, off_t fpos, int length, int dummy)
{
char * p;
unsigned short bx;
unsigned short cx;
unsigned short dx;
+ unsigned short nbat;
unsigned short error;
unsigned short ac_line_status = 0xff;
unsigned short battery_status = 0xff;
@@ -1167,13 +1206,8 @@ int apm_get_info(char *buf, char **start, off_t fpos, int length, int dummy)
if (apm_bios_info.version > 0x100) {
battery_flag = (cx >> 8) & 0xff;
if (dx != 0xffff) {
- if ((dx & 0x8000) == 0x8000) {
- units = "min";
- time_units = dx & 0x7ffe;
- } else {
- units = "sec";
- time_units = dx & 0x7fff;
- }
+ units = (dx & 0x8000) ? "min" : "sec";
+ time_units = dx & 0x7fff;
}
}
}
@@ -1228,7 +1262,6 @@ int apm_get_info(char *buf, char **start, off_t fpos, int length, int dummy)
return p - buf;
}
-#endif
void __init apm_setup(char *str, int *dummy)
{
@@ -1244,6 +1277,8 @@ void __init apm_setup(char *str, int *dummy)
str += 3;
if (strncmp(str, "debug", 5) == 0)
debug = !invert;
+ if (strncmp(str, "smp-power-off", 13) == 0)
+ smp_hack = !invert;
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
@@ -1284,17 +1319,18 @@ void __init apm_bios_init(void)
/* BIOS < 1.2 doesn't set cseg_16_len */
if (apm_bios_info.version < 0x102)
- apm_bios_info.cseg_16_len = 0xFFFF; /* 64k */
+ apm_bios_info.cseg_16_len = 0; /* 64k */
if (debug) {
printk(KERN_INFO "apm: entry %x:%lx cseg16 %x dseg %x",
apm_bios_info.cseg, apm_bios_info.offset,
apm_bios_info.cseg_16, apm_bios_info.dseg);
if (apm_bios_info.version > 0x100)
- printk(" cseg len %x, cseg16 len %x, dseg len %x",
+ printk(" cseg len %x, dseg len %x",
apm_bios_info.cseg_len,
- apm_bios_info.cseg_16_len,
apm_bios_info.dseg_len);
+ if (apm_bios_info.version > 0x101)
+ printk(" cseg16 len %x", apm_bios_info.cseg_16_len);
printk("\n");
}
@@ -1302,12 +1338,6 @@ void __init apm_bios_init(void)
printk(KERN_NOTICE "apm: disabled on user request.\n");
return;
}
-#ifdef __SMP__
- if (smp_num_cpus > 1) {
- printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
- return;
- }
-#endif
/*
* Set up a segment that references the real mode segment 0x40
@@ -1317,7 +1347,7 @@ void __init apm_bios_init(void)
*/
set_base(gdt[APM_40 >> 3],
__va((unsigned long)0x40 << 4));
- set_limit(gdt[APM_40 >> 3], 4096 - (0x40 << 4));
+ _set_limit((char *)&gdt[APM_40 >> 3], 4095 - (0x40 << 4));
apm_bios_entry.offset = apm_bios_info.offset;
apm_bios_entry.segment = APM_CS;
@@ -1327,23 +1357,36 @@ void __init apm_bios_init(void)
__va((unsigned long)apm_bios_info.cseg_16 << 4));
set_base(gdt[APM_DS >> 3],
__va((unsigned long)apm_bios_info.dseg << 4));
- if (apm_bios_info.version == 0x100) {
- set_limit(gdt[APM_CS >> 3], 64 * 1024);
- set_limit(gdt[APM_CS_16 >> 3], 64 * 1024);
- set_limit(gdt[APM_DS >> 3], 64 * 1024);
- } else {
-#ifdef APM_RELAX_SEGMENTS
+#ifndef APM_RELAX_SEGMENTS
+ if (apm_bios_info.version == 0x100)
+#endif
+ {
/* For ASUS motherboard, Award BIOS rev 110 (and others?) */
- set_limit(gdt[APM_CS >> 3], 64 * 1024);
+ _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1);
/* For some unknown machine. */
- set_limit(gdt[APM_CS_16 >> 3], 64 * 1024);
+ _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1);
/* For the DEC Hinote Ultra CT475 (and others?) */
- set_limit(gdt[APM_DS >> 3], 64 * 1024);
-#else
- set_limit(gdt[APM_CS >> 3], apm_bios_info.cseg_len);
- set_limit(gdt[APM_CS_16 >> 3], apm_bios_info.cseg_16_len);
- set_limit(gdt[APM_DS >> 3], apm_bios_info.dseg_len);
+ _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1);
+ }
+#ifndef APM_RELAX_SEGMENTS
+ else {
+ _set_limit((char *)&gdt[APM_CS >> 3],
+ (apm_bios_info.cseg_len - 1) & 0xffff);
+ _set_limit((char *)&gdt[APM_CS_16 >> 3],
+ (apm_bios_info.cseg_16_len - 1) & 0xffff);
+ _set_limit((char *)&gdt[APM_DS >> 3],
+ (apm_bios_info.dseg_len - 1) & 0xffff);
+ }
+#endif
+#ifdef CONFIG_SMP
+ if (smp_num_cpus > 1) {
+ printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
+ if (smp_hack)
+ smp_hack = 2;
+ return;
+ }
#endif
+ if (apm_bios_info.version > 0x100) {
/*
* We only support BIOSs up to version 1.2
*/
@@ -1355,7 +1398,7 @@ void __init apm_bios_init(void)
}
}
if (debug) {
- printk(KERN_INFO "apm: onnection version %d.%d\n",
+ printk(KERN_INFO "apm: Connection version %d.%d\n",
(apm_bios_info.version >> 8) & 0xff,
apm_bios_info.version & 0xff );
@@ -1376,23 +1419,23 @@ void __init apm_bios_init(void)
case 3: bat_stat = "charging"; break;
default: bat_stat = "unknown"; break;
}
- printk(KERN_INFO "apm: AC %s, battery status %s, battery life ",
+ printk(KERN_INFO
+ "apm: AC %s, battery status %s, battery life ",
power_stat, bat_stat);
if ((cx & 0xff) == 0xff)
printk("unknown\n");
else
printk("%d%%\n", cx & 0xff);
if (apm_bios_info.version > 0x100) {
- printk("apm: battery flag 0x%02x, battery life ",
+ printk(KERN_INFO
+ "apm: battery flag 0x%02x, battery life ",
(cx >> 8) & 0xff);
if (dx == 0xffff)
printk("unknown\n");
- else {
- if ((dx & 0x8000))
- printk("%d minutes\n", dx & 0x7ffe );
- else
- printk("%d seconds\n", dx & 0x7fff );
- }
+ else
+ printk("%d %s\n", dx & 0x7fff,
+ (dx & 0x8000) ?
+ "minutes" : "seconds");
}
}
}
@@ -1422,10 +1465,9 @@ void __init apm_bios_init(void)
apm_timer.expires = APM_CHECK_TIMEOUT + jiffies;
add_timer(&apm_timer);
-#ifdef CONFIG_PROC_FS
ent = create_proc_entry("apm", 0, 0);
- ent->get_info = apm_get_info;
-#endif
+ if (ent != NULL)
+ ent->get_info = apm_get_info;
misc_register(&apm_device);
diff --git a/arch/i386/kernel/bios32.c b/arch/i386/kernel/bios32.c
index 9543fc7ba..e7383e55b 100644
--- a/arch/i386/kernel/bios32.c
+++ b/arch/i386/kernel/bios32.c
@@ -352,6 +352,10 @@ __initfunc(int pci_sanity_check(struct pci_access *a))
{
u16 dfn, x;
+#ifdef CONFIG_VISWS
+ return 1; /* Lithium PCI Bridges are non-standard */
+#endif
+
if (pci_probe & PCI_NO_CHECKS)
return 1;
for(dfn=0; dfn < 0x100; dfn++)
@@ -1051,7 +1055,7 @@ __initfunc(void pcibios_fixup_devices(void))
pci_write_config_word(dev, PCI_COMMAND, cmd);
}
}
-#ifdef __SMP__
+#if defined(CONFIG_X86_IO_APIC)
/*
* Recalculate IRQ numbers if we use the I/O APIC
*/
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 390c10c1f..0153c4b40 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -153,10 +153,10 @@ ENTRY(lcall7)
ALIGN
.globl ret_from_fork
ret_from_fork:
- GET_CURRENT(%ebx)
#ifdef __SMP__
- btrl $0, SYMBOL_NAME(scheduler_lock)
+ call SYMBOL_NAME(schedule_tail)
#endif /* __SMP__ */
+ GET_CURRENT(%ebx)
jmp ret_from_sys_call
/*
@@ -193,6 +193,7 @@ restore_all:
ALIGN
signal_return:
+ sti # we can get here from an interrupt handler
testl $(VM_MASK),EFLAGS(%esp)
movl %esp,%eax
jne v86_signal_return
@@ -558,13 +559,14 @@ ENTRY(sys_call_table)
.long SYMBOL_NAME(sys_sendfile)
.long SYMBOL_NAME(sys_ni_syscall) /* streams1 */
.long SYMBOL_NAME(sys_ni_syscall) /* streams2 */
+ .long SYMBOL_NAME(sys_vfork) /* 190 */
/*
- * NOTE!! This doesn' thave to be exact - we just have
+ * NOTE!! This doesn't have to be exact - we just have
* to make sure we have _enough_ of the "sys_ni_syscall"
* entries. Don't panic if you notice that this hasn't
* been shrunk every time we add a new system call.
*/
- .rept NR_syscalls-189
+ .rept NR_syscalls-190
.long SYMBOL_NAME(sys_ni_syscall)
.endr
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 7343cd64f..acbc3e325 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -534,10 +534,14 @@ ENTRY(gdt_table)
.quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
.quad 0x0000000000000000 /* not used */
.quad 0x0000000000000000 /* not used */
- .quad 0x00c0920000000000 /* 0x40 APM set up for bad BIOS's */
- .quad 0x00c09a0000000000 /* 0x48 APM CS code */
- .quad 0x00809a0000000000 /* 0x50 APM CS 16 code (16 bit) */
- .quad 0x00c0920000000000 /* 0x58 APM DS data */
+ /*
+ * The APM segments have byte granularity and their bases
+ * and limits are set at run time.
+ */
+ .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
+ .quad 0x00409a0000000000 /* 0x48 APM CS code */
+ .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
+ .quad 0x0040920000000000 /* 0x58 APM DS data */
.fill 2*NR_TASKS,8,0 /* space for LDT's and TSS's etc */
/*
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index e1833f43c..cd9074796 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -58,6 +58,10 @@ EXPORT_SYMBOL_NOVERS(__put_user_1);
EXPORT_SYMBOL_NOVERS(__put_user_2);
EXPORT_SYMBOL_NOVERS(__put_user_4);
+EXPORT_SYMBOL(strtok);
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strstr);
+
EXPORT_SYMBOL(strncpy_from_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(clear_user);
@@ -83,7 +87,6 @@ EXPORT_SYMBOL(__global_cli);
EXPORT_SYMBOL(__global_sti);
EXPORT_SYMBOL(__global_save_flags);
EXPORT_SYMBOL(__global_restore_flags);
-EXPORT_SYMBOL(smp_message_pass);
EXPORT_SYMBOL(mtrr_hook);
#endif
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index fa8ef26bb..232abf78d 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -225,6 +225,13 @@ static void __init clear_IO_APIC_pin(unsigned int pin)
int pirq_entries [MAX_PIRQS];
int pirqs_enabled;
+void __init ioapic_setup(char *str, int *ints)
+{
+ extern int skip_ioapic_setup; /* defined in arch/i386/kernel/smp.c */
+
+ skip_ioapic_setup = 1;
+}
+
void __init ioapic_pirq_setup(char *str, int *ints)
{
int i, max;
@@ -675,7 +682,8 @@ void __init print_IO_APIC(void)
printk(".... register #01: %08X\n", *(int *)&reg_01);
printk("....... : max redirection entries: %04X\n", reg_01.entries);
if ( (reg_01.entries != 0x0f) && /* ISA-only Neptune boards */
- (reg_01.entries != 0x17) /* ISA+PCI boards */
+ (reg_01.entries != 0x17) && /* ISA+PCI boards */
+ (reg_01.entries != 0x3F) /* Xeon boards */
)
UNEXPECTED_IO_APIC();
if (reg_01.entries == 0x0f)
@@ -683,7 +691,8 @@ void __init print_IO_APIC(void)
printk("....... : IO APIC version: %04X\n", reg_01.version);
if ( (reg_01.version != 0x10) && /* oldest IO-APICs */
- (reg_01.version != 0x11) /* my IO-APIC */
+ (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
+ (reg_01.version != 0x13) /* Xeon IO-APICs */
)
UNEXPECTED_IO_APIC();
if (reg_01.__reserved_1 || reg_01.__reserved_2)
@@ -946,7 +955,7 @@ static inline void self_IPI(unsigned int irq)
if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
desc->status = status | IRQ_REPLAY;
- send_IPI(APIC_DEST_SELF, IO_APIC_VECTOR(irq));
+ send_IPI_self(IO_APIC_VECTOR(irq));
}
}
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 2e3beb11b..445a26613 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -58,7 +58,7 @@ asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
if ((from + num <= from) || (from + num > IO_BITMAP_SIZE*32))
return -EINVAL;
- if (!capable(CAP_SYS_RAWIO))
+ if (turn_on && !capable(CAP_SYS_RAWIO))
return -EPERM;
/*
* If it's the first ioperm() call in this thread's lifetime, set the
@@ -91,11 +91,15 @@ asmlinkage int sys_iopl(unsigned long unused)
{
struct pt_regs * regs = (struct pt_regs *) &unused;
unsigned int level = regs->ebx;
+ unsigned int old = (regs->eflags >> 12) & 3;
if (level > 3)
return -EINVAL;
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
+ /* Trying to gain more privileges? */
+ if (level > old) {
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+ }
regs->eflags = (regs->eflags & 0xffffcfff) | (level << 12);
return 0;
}
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index e0fd62653..37878f59f 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -15,6 +15,7 @@
* Naturally it's not a 1:1 relation, but there are similarities.
*/
+#include <linux/config.h>
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/kernel_stat.h>
@@ -47,46 +48,28 @@ unsigned int local_irq_count[NR_CPUS];
atomic_t nmi_counter;
/*
- * About the IO-APIC, the architecture is 'merged' into our
- * current irq architecture, seemlessly. (i hope). It is only
- * visible through a few more more hardware interrupt lines, but
- * otherwise drivers are unaffected. The main code is believed
- * to be NR_IRQS-safe (nothing anymore thinks we have 16
- * irq lines only), but there might be some places left ...
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
*/
/*
- * This contains the irq mask for both 8259A irq controllers,
+ * Micro-access to controllers is serialized over the whole
+ * system. We never hold this lock when we call the actual
+ * IRQ handler.
*/
-static unsigned int cached_irq_mask = 0xffff;
-
-#define __byte(x,y) (((unsigned char *)&(y))[x])
-#define __word(x,y) (((unsigned short *)&(y))[x])
-#define __long(x,y) (((unsigned int *)&(y))[x])
-
-#define cached_21 (__byte(0,cached_irq_mask))
-#define cached_A1 (__byte(1,cached_irq_mask))
-
spinlock_t irq_controller_lock;
-/*
- * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
- * boards the timer interrupt is not connected to any IO-APIC pin, it's
- * fed to the CPU IRQ line directly.
- *
- * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
- * this 'mixed mode' IRQ handling costs us one more branch in do_IRQ,
- * but we have _much_ higher compatibility and robustness this way.
- */
-unsigned long long io_apic_irqs = 0;
-
-static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs);
-static void enable_8259A_irq(unsigned int irq);
-void disable_8259A_irq(unsigned int irq);
-
-/* startup is the same as "enable", shutdown is same as "disable" */
-#define startup_8259A_irq enable_8259A_irq
-#define shutdown_8259A_irq disable_8259A_irq
/*
* Dummy controller type for unused interrupts
@@ -108,6 +91,19 @@ static struct hw_interrupt_type no_irq_type = {
disable_none
};
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ */
+
+static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs);
+static void enable_8259A_irq(unsigned int irq);
+void disable_8259A_irq(unsigned int irq);
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define startup_8259A_irq enable_8259A_irq
+#define shutdown_8259A_irq disable_8259A_irq
+
static struct hw_interrupt_type i8259A_irq_type = {
"XT-PIC",
startup_8259A_irq,
@@ -117,11 +113,38 @@ static struct hw_interrupt_type i8259A_irq_type = {
disable_8259A_irq
};
-irq_desc_t irq_desc[NR_IRQS] = {
- [0 ... 15] = { 0, &i8259A_irq_type, }, /* default to standard ISA IRQs */
- [16 ... NR_IRQS-1] = { 0, &no_irq_type, }, /* 'high' PCI IRQs filled in on demand */
-};
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = { 0, &no_irq_type, }};
+
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+static unsigned int cached_irq_mask = 0xffff;
+#define __byte(x,y) (((unsigned char *)&(y))[x])
+#define __word(x,y) (((unsigned short *)&(y))[x])
+#define __long(x,y) (((unsigned int *)&(y))[x])
+
+#define cached_21 (__byte(0,cached_irq_mask))
+#define cached_A1 (__byte(1,cached_irq_mask))
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not connected to any IO-APIC pin, it's
+ * fed to the CPU IRQ line directly.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs us one more branch in do_IRQ,
+ * but we have _much_ higher compatibility and robustness this way.
+ */
+unsigned long long io_apic_irqs = 0;
/*
* These have to be protected by the irq controller spinlock
@@ -149,6 +172,77 @@ static void enable_8259A_irq(unsigned int irq)
}
}
+int i8259A_irq_pending(unsigned int irq)
+{
+ unsigned int mask = 1<<irq;
+
+ if (irq < 8)
+ return (inb(0x20) & mask);
+ return (inb(0xA0) & (mask >> 8));
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+ disable_irq(irq);
+ __long(0,io_apic_irqs) &= ~(1<<irq);
+ irq_desc[irq].handler = &i8259A_irq_type;
+ enable_irq(irq);
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+static inline void mask_and_ack_8259A(unsigned int irq)
+{
+ cached_irq_mask |= 1 << irq;
+ if (irq & 8) {
+ inb(0xA1); /* DUMMY */
+ outb(cached_A1,0xA1);
+ outb(0x62,0x20); /* Specific EOI to cascade */
+ outb(0x20,0xA0);
+ } else {
+ inb(0x21); /* DUMMY */
+ outb(cached_21,0x21);
+ outb(0x20,0x20);
+ }
+}
+
+static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs)
+{
+ struct irqaction * action;
+ irq_desc_t *desc = irq_desc + irq;
+
+ spin_lock(&irq_controller_lock);
+ {
+ unsigned int status;
+ mask_and_ack_8259A(irq);
+ status = desc->status & ~IRQ_REPLAY;
+ action = NULL;
+ if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+ action = desc->action;
+ desc->status = status | IRQ_INPROGRESS;
+ }
+ spin_unlock(&irq_controller_lock);
+
+ /* Exit early if we had no action or it was disabled */
+ if (!action)
+ return;
+
+ handle_IRQ_event(irq, regs, action);
+
+ spin_lock(&irq_controller_lock);
+ {
+ unsigned int status = desc->status & ~IRQ_INPROGRESS;
+ desc->status = status;
+ if (!(status & IRQ_DISABLED))
+ enable_8259A_irq(irq);
+ }
+ spin_unlock(&irq_controller_lock);
+}
+
/*
* This builds up the IRQ handler stubs using some ugly macros in irq.h
*
@@ -168,8 +262,7 @@ BUILD_IRQ(4) BUILD_IRQ(5) BUILD_IRQ(6) BUILD_IRQ(7)
BUILD_IRQ(8) BUILD_IRQ(9) BUILD_IRQ(10) BUILD_IRQ(11)
BUILD_IRQ(12) BUILD_IRQ(13) BUILD_IRQ(14) BUILD_IRQ(15)
-#ifdef __SMP__
-
+#ifdef CONFIG_X86_IO_APIC
/*
* The IO-APIC gives us many more interrupt sources..
*/
@@ -185,11 +278,13 @@ BUILD_IRQ(48) BUILD_IRQ(49) BUILD_IRQ(50) BUILD_IRQ(51)
BUILD_IRQ(52) BUILD_IRQ(53) BUILD_IRQ(54) BUILD_IRQ(55)
BUILD_IRQ(56) BUILD_IRQ(57) BUILD_IRQ(58) BUILD_IRQ(59)
BUILD_IRQ(60) BUILD_IRQ(61) BUILD_IRQ(62) BUILD_IRQ(63)
+#endif
+#ifdef __SMP__
/*
* The following vectors are part of the Linux architecture, there
* is no hardware IRQ pin equivalent for them, they are triggered
- * through the ICC by us (IPIs), via smp_message_pass():
+ * through the ICC by us (IPIs)
*/
BUILD_SMP_INTERRUPT(reschedule_interrupt)
BUILD_SMP_INTERRUPT(invalidate_interrupt)
@@ -213,7 +308,7 @@ static void (*interrupt[NR_IRQS])(void) = {
IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt,
IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt,
IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt
-#ifdef __SMP__
+#ifdef CONFIG_X86_IO_APIC
,IRQ16_interrupt, IRQ17_interrupt, IRQ18_interrupt, IRQ19_interrupt,
IRQ20_interrupt, IRQ21_interrupt, IRQ22_interrupt, IRQ23_interrupt,
IRQ24_interrupt, IRQ25_interrupt, IRQ26_interrupt, IRQ27_interrupt,
@@ -231,12 +326,16 @@ static void (*interrupt[NR_IRQS])(void) = {
#endif
};
+
/*
* Initial irq handlers.
*/
-static void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+void no_action(int cpl, void *dev_id, struct pt_regs *regs)
+{
+}
+#ifndef CONFIG_VISWS
/*
* Note that on a 486, we don't want to do a SIGFPE on an irq13
* as the irq is unreliable, and exception 16 works correctly
@@ -262,7 +361,13 @@ static struct irqaction irq13 = { math_error_irq, 0, 0, "fpu", NULL, NULL };
/*
* IRQ2 is cascade interrupt to second interrupt controller
*/
+
static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
int get_irq_list(char *buf)
{
@@ -297,7 +402,7 @@ int get_irq_list(char *buf)
}
p += sprintf(p, "NMI: %10u\n", atomic_read(&nmi_counter));
#ifdef __SMP__
- p += sprintf(p, "IPI: %10lu\n", ipi_count);
+ p += sprintf(p, "ERR: %10lu\n", ipi_count);
#endif
return p - buf;
}
@@ -335,22 +440,22 @@ static void show(char * str)
int i;
unsigned long *stack;
int cpu = smp_processor_id();
+ extern char *get_options(char *str, int *ints);
printk("\n%s, CPU %d:\n", str, cpu);
printk("irq: %d [%d %d]\n",
atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]);
printk("bh: %d [%d %d]\n",
atomic_read(&global_bh_count), local_bh_count[0], local_bh_count[1]);
- stack = (unsigned long *) &str;
+ stack = (unsigned long *) &stack;
for (i = 40; i ; i--) {
unsigned long x = *++stack;
- if (x > (unsigned long) &init_task_union && x < (unsigned long) &vsprintf) {
+ if (x > (unsigned long) &get_options && x < (unsigned long) &vsprintf) {
printk("<[%08lx]> ", x);
}
}
}
-
#define MAXCOUNT 100000000
static inline void wait_on_bh(void)
@@ -607,79 +712,6 @@ int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction *
return status;
}
-int i8259A_irq_pending(unsigned int irq)
-{
- unsigned int mask = 1<<irq;
-
- if (irq < 8)
- return (inb(0x20) & mask);
- return (inb(0xA0) & (mask >> 8));
-}
-
-
-void make_8259A_irq(unsigned int irq)
-{
- disable_irq(irq);
- __long(0,io_apic_irqs) &= ~(1<<irq);
- irq_desc[irq].handler = &i8259A_irq_type;
- enable_irq(irq);
-}
-
-/*
- * Careful! The 8259A is a fragile beast, it pretty
- * much _has_ to be done exactly like this (mask it
- * first, _then_ send the EOI, and the order of EOI
- * to the two 8259s is important!
- */
-static inline void mask_and_ack_8259A(unsigned int irq)
-{
- cached_irq_mask |= 1 << irq;
- if (irq & 8) {
- inb(0xA1); /* DUMMY */
- outb(cached_A1,0xA1);
- outb(0x62,0x20); /* Specific EOI to cascade */
- outb(0x20,0xA0);
- } else {
- inb(0x21); /* DUMMY */
- outb(cached_21,0x21);
- outb(0x20,0x20);
- }
-}
-
-static void do_8259A_IRQ(unsigned int irq, struct pt_regs * regs)
-{
- struct irqaction * action;
- irq_desc_t *desc = irq_desc + irq;
-
- spin_lock(&irq_controller_lock);
- {
- unsigned int status;
- mask_and_ack_8259A(irq);
- status = desc->status & ~IRQ_REPLAY;
- action = NULL;
- if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))
- action = desc->action;
- desc->status = status | IRQ_INPROGRESS;
- }
- spin_unlock(&irq_controller_lock);
-
- /* Exit early if we had no action or it was disabled */
- if (!action)
- return;
-
- handle_IRQ_event(irq, regs, action);
-
- spin_lock(&irq_controller_lock);
- {
- unsigned int status = desc->status & ~IRQ_INPROGRESS;
- desc->status = status;
- if (!(status & IRQ_DISABLED))
- enable_8259A_irq(irq);
- }
- spin_unlock(&irq_controller_lock);
-}
-
-
/*
* Generic enable/disable code: this just calls
* down into the PIC-specific version for the actual
@@ -691,8 +723,10 @@ void disable_irq(unsigned int irq)
unsigned long flags;
spin_lock_irqsave(&irq_controller_lock, flags);
- irq_desc[irq].status |= IRQ_DISABLED;
- irq_desc[irq].handler->disable(irq);
+ if (!irq_desc[irq].depth++) {
+ irq_desc[irq].status |= IRQ_DISABLED;
+ irq_desc[irq].handler->disable(irq);
+ }
spin_unlock_irqrestore(&irq_controller_lock, flags);
if (irq_desc[irq].status & IRQ_INPROGRESS)
@@ -704,16 +738,18 @@ void enable_irq(unsigned int irq)
unsigned long flags;
spin_lock_irqsave(&irq_controller_lock, flags);
- /*
- * In contrast to the above, we should _not_ have any concurrent
- * interrupt activity here, so we just clear both disabled bits.
- *
- * This allows us to have IRQ_INPROGRESS set until we actually
- * install a handler for this interrupt (make irq autodetection
- * work by just looking at the status field for the irq)
- */
- irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS);
- irq_desc[irq].handler->enable(irq);
+ switch (irq_desc[irq].depth) {
+ case 1:
+ irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS);
+ irq_desc[irq].handler->enable(irq);
+ /* fall throught */
+ default:
+ irq_desc[irq].depth--;
+ break;
+ case 0:
+ printk("enable_irq() unbalanced from %p\n",
+ __builtin_return_address(0));
+ }
spin_unlock_irqrestore(&irq_controller_lock, flags);
}
@@ -798,6 +834,7 @@ int setup_x86_irq(unsigned int irq, struct irqaction * new)
*p = new;
if (!shared) {
+ irq_desc[irq].depth = 0;
irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS);
irq_desc[irq].handler->startup(irq);
}
@@ -894,7 +931,7 @@ unsigned long probe_irq_on(void)
/*
* Wait for spurious interrupts to trigger
*/
- for (delay = jiffies + HZ/10; delay > jiffies; )
+ for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
/* about 100ms delay */ synchronize_irq();
/*
@@ -949,21 +986,75 @@ int probe_irq_off(unsigned long unused)
return irq_found;
}
-__initfunc(void init_IRQ(void))
+/*
+ * Silly, horrible hack
+ */
+static char uglybuffer[10*256];
+
+__asm__("\n" __ALIGN_STR"\n"
+ "common_unexpected:\n\t"
+ SAVE_ALL
+ "pushl $ret_from_intr\n\t"
+ "jmp strange_interrupt");
+
+void strange_interrupt(int irqnum)
+{
+ printk("Unexpected interrupt %d\n", irqnum & 255);
+ for (;;);
+}
+
+extern int common_unexpected;
+__initfunc(void init_unexpected_irq(void))
{
int i;
+ for (i = 0; i < 256; i++) {
+ char *code = uglybuffer + 10*i;
+ unsigned long jumpto = (unsigned long) &common_unexpected;
- /* set the clock to 100 Hz */
- outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
- outb_p(LATCH & 0xff , 0x40); /* LSB */
- outb(LATCH >> 8 , 0x40); /* MSB */
+ jumpto -= (unsigned long)(code+10);
+ code[0] = 0x68; /* pushl */
+ *(int *)(code+1) = i - 512;
+ code[5] = 0xe9; /* jmp */
+ *(int *)(code+6) = jumpto;
+
+ set_intr_gate(i,code);
+ }
+}
+
+
+void init_ISA_irqs (void)
+{
+ int i;
- for (i=0; i<NR_IRQS; i++)
+ for (i = 0; i < NR_IRQS; i++) {
irq_desc[i].status = IRQ_DISABLED;
+ irq_desc[i].action = 0;
+ irq_desc[i].depth = 0;
+
+ if (i < 16) {
+ /*
+ * 16 old-style INTA-cycle interrupt gates:
+ */
+ irq_desc[i].handler = &i8259A_irq_type;
+ } else {
+ /*
+ * 'high' PCI IRQs filled in on demand
+ */
+ irq_desc[i].handler = &no_irq_type;
+ }
+ }
+}
+
+__initfunc(void init_IRQ(void))
+{
+ int i;
+
+#ifndef CONFIG_X86_VISWS_APIC
+ init_ISA_irqs();
+#else
+ init_VISWS_APIC_irqs();
+#endif
- /*
- * 16 old-style INTA-cycle interrupt gates:
- */
for (i = 0; i < 16; i++)
set_intr_gate(0x20+i,interrupt[i]);
@@ -983,31 +1074,41 @@ __initfunc(void init_IRQ(void))
*/
/* IPI for rescheduling */
- set_intr_gate(0x30, reschedule_interrupt);
+ set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPI for invalidation */
- set_intr_gate(0x31, invalidate_interrupt);
+ set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
/* IPI for CPU halt */
- set_intr_gate(0x40, stop_cpu_interrupt);
+ set_intr_gate(STOP_CPU_VECTOR, stop_cpu_interrupt);
/* self generated IPI for local APIC timer */
- set_intr_gate(0x41, apic_timer_interrupt);
+ set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
/* IPI for MTRR control */
- set_intr_gate(0x50, mtrr_interrupt);
+ set_intr_gate(MTRR_CHANGE_VECTOR, mtrr_interrupt);
/* IPI vector for APIC spurious interrupts */
- set_intr_gate(0xff, spurious_interrupt);
+ set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
#endif
request_region(0x20,0x20,"pic1");
request_region(0xa0,0x20,"pic2");
+
+ /*
+ * Set the clock to 100 Hz, we already have a valid
+ * vector now:
+ */
+ outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff , 0x40); /* LSB */
+ outb(LATCH >> 8 , 0x40); /* MSB */
+
+#ifndef CONFIG_VISWS
setup_x86_irq(2, &irq2);
setup_x86_irq(13, &irq13);
+#endif
}
-#ifdef __SMP__
-
+#ifdef CONFIG_X86_IO_APIC
__initfunc(void init_IRQ_SMP(void))
{
int i;
@@ -1015,5 +1116,5 @@ __initfunc(void init_IRQ_SMP(void))
if (IO_APIC_VECTOR(i) > 0)
set_intr_gate(IO_APIC_VECTOR(i), interrupt[i]);
}
-
#endif
+
diff --git a/arch/i386/kernel/irq.h b/arch/i386/kernel/irq.h
index 9f0b3e457..982ab101e 100644
--- a/arch/i386/kernel/irq.h
+++ b/arch/i386/kernel/irq.h
@@ -37,10 +37,31 @@ typedef struct {
unsigned int status; /* IRQ status - IRQ_INPROGRESS, IRQ_DISABLED */
struct hw_interrupt_type *handler; /* handle/enable/disable functions */
struct irqaction *action; /* IRQ action list */
- unsigned int unused[3];
+ unsigned int depth; /* Disable depth for nested irq disables */
} irq_desc_t;
-#define IRQ0_TRAP_VECTOR 0x51
+/*
+ * Special IRQ vectors used by the SMP architecture:
+ *
+ * (some of the following vectors are 'rare', they might be merged
+ * into a single vector to save vector space. TLB, reschedule and
+ * local APIC vectors are performance-critical.)
+ */
+#define RESCHEDULE_VECTOR 0x30
+#define INVALIDATE_TLB_VECTOR 0x31
+#define STOP_CPU_VECTOR 0x40
+#define LOCAL_TIMER_VECTOR 0x41
+#define MTRR_CHANGE_VECTOR 0x50
+
+/*
+ * First vector available to drivers: (vectors 0x51-0xfe)
+ */
+#define IRQ0_TRAP_VECTOR 0x51
+
+/*
+ * This IRQ should never happen, but we print a message nevertheless.
+ */
+#define SPURIOUS_APIC_VECTOR 0xff
extern irq_desc_t irq_desc[NR_IRQS];
extern int irq_vector[NR_IRQS];
@@ -48,6 +69,7 @@ extern int irq_vector[NR_IRQS];
extern void init_IRQ_SMP(void);
extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
+extern int setup_x86_irq(unsigned int, struct irqaction *);
/*
* Various low-level irq details needed by irq.c, process.c,
@@ -56,17 +78,21 @@ extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
* Interrupt entry/exit code at both C and assembly level
*/
-void mask_irq(unsigned int irq);
-void unmask_irq(unsigned int irq);
-void disable_8259A_irq(unsigned int irq);
-int i8259A_irq_pending(unsigned int irq);
-void ack_APIC_irq(void);
-void setup_IO_APIC(void);
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
-void make_8259A_irq(unsigned int irq);
-void send_IPI(int dest, int vector);
-void init_pic_mode(void);
-void print_IO_APIC(void);
+extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
+extern void mask_irq(unsigned int irq);
+extern void unmask_irq(unsigned int irq);
+extern void disable_8259A_irq(unsigned int irq);
+extern int i8259A_irq_pending(unsigned int irq);
+extern void ack_APIC_irq(void);
+extern void FASTCALL(send_IPI_self(int vector));
+extern void smp_send_mtrr(void);
+extern void init_VISWS_APIC_irqs(void);
+extern void setup_IO_APIC(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern void make_8259A_irq(unsigned int irq);
+extern void send_IPI(int dest, int vector);
+extern void init_pic_mode(void);
+extern void print_IO_APIC(void);
extern unsigned long long io_apic_irqs;
@@ -81,11 +107,7 @@ extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
extern char ioapic_OEM_ID [16];
extern char ioapic_Product_ID [16];
-extern spinlock_t irq_controller_lock; /*
- * Protects both the 8259 and the
- * IO-APIC
- */
-
+extern spinlock_t irq_controller_lock;
#ifdef __SMP__
diff --git a/arch/i386/kernel/mtrr.c b/arch/i386/kernel/mtrr.c
index 324e8cec7..16c767b4a 100644
--- a/arch/i386/kernel/mtrr.c
+++ b/arch/i386/kernel/mtrr.c
@@ -164,6 +164,9 @@
#include <asm/bitops.h>
#include <asm/atomic.h>
+#include <asm/hardirq.h>
+#include "irq.h"
+
#define MTRR_VERSION "1.26 (19981001)"
#define TRUE 1
@@ -612,10 +615,11 @@ static void do_all_cpus (void (*handler) (struct set_mtrr_context *ctxt,
/* Send a message to all other CPUs and wait for them to enter the
barrier */
atomic_set (&undone_count, smp_num_cpus - 1);
- smp_message_pass (MSG_ALL_BUT_SELF, MSG_MTRR_CHANGE, 0, 0);
+ smp_send_mtrr();
/* Wait for it to be done */
timeout = jiffies + JIFFIE_TIMEOUT;
- while ( (atomic_read (&undone_count) > 0) && (jiffies < timeout) )
+ while ( (atomic_read (&undone_count) > 0) &&
+ time_before(jiffies, timeout) )
barrier ();
if (atomic_read (&undone_count) > 0)
{
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 00f39d4ed..00f7e0ba2 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -105,19 +105,24 @@ static void hard_idle(void)
*/
static int cpu_idle(void *unused)
{
- unsigned long start_idle = jiffies;
+ int work = 1;
+ unsigned long start_idle = 0;
/* endless idle loop with no priority at all */
+ current->priority = 0;
+ current->counter = -100;
for (;;) {
+ if (work)
+ start_idle = jiffies;
+
if (jiffies - start_idle > HARD_IDLE_TIMEOUT)
hard_idle();
else {
if (boot_cpu_data.hlt_works_ok && !hlt_counter && !current->need_resched)
__asm__("hlt");
}
- if (current->need_resched)
- start_idle = jiffies;
- current->policy = SCHED_YIELD;
+
+ work = current->need_resched;
schedule();
check_pgt_cache();
}
@@ -131,14 +136,21 @@ static int cpu_idle(void *unused)
int cpu_idle(void *unused)
{
-
/* endless idle loop with no priority at all */
+ current->priority = 0;
+ current->counter = -100;
while(1) {
- if (current_cpu_data.hlt_works_ok && !hlt_counter && !current->need_resched)
+ if (current_cpu_data.hlt_works_ok && !hlt_counter &&
+ !current->need_resched)
__asm__("hlt");
- current->policy = SCHED_YIELD;
- schedule();
- check_pgt_cache();
+ /*
+ * although we are an idle CPU, we do not want to
+ * get into the scheduler unnecessarily.
+ */
+ if (current->need_resched) {
+ schedule();
+ check_pgt_cache();
+ }
}
}
@@ -463,24 +475,27 @@ void free_task_struct(struct task_struct *p)
void release_segments(struct mm_struct *mm)
{
- /* forget local segments */
- __asm__ __volatile__("movl %w0,%%fs ; movl %w0,%%gs"
- : /* no outputs */
- : "r" (0));
if (mm->segments) {
void * ldt = mm->segments;
-
- /*
- * Get the LDT entry from init_task.
- */
- current->tss.ldt = _LDT(0);
- load_ldt(0);
-
mm->segments = NULL;
vfree(ldt);
}
}
+void forget_segments(void)
+{
+ /* forget local segments */
+ __asm__ __volatile__("movl %w0,%%fs ; movl %w0,%%gs"
+ : /* no outputs */
+ : "r" (0));
+
+ /*
+ * Get the LDT entry from init_task.
+ */
+ current->tss.ldt = _LDT(0);
+ load_ldt(0);
+}
+
/*
* Create a kernel thread
*/
@@ -579,7 +594,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
*childregs = *regs;
childregs->eax = 0;
childregs->esp = esp;
- childregs->eflags = regs->eflags & 0xffffcfff; /* iopl always 0 for a new process */
p->tss.esp = (unsigned long) childregs;
p->tss.esp0 = (unsigned long) (childregs+1);
@@ -771,6 +785,21 @@ asmlinkage int sys_clone(struct pt_regs regs)
}
/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage int sys_vfork(struct pt_regs regs)
+{
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, &regs);
+}
+
+/*
* sys_execve() executes a new program.
*/
asmlinkage int sys_execve(struct pt_regs regs)
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 52a1543c3..b0eca4345 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -367,8 +367,6 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
ret = 0;
goto out;
}
- if (pid == 1) /* you may not mess with init */
- goto out;
ret = -ESRCH;
read_lock(&tasklist_lock);
child = find_task_by_pid(pid);
@@ -376,6 +374,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
if (!child)
goto out;
ret = -EPERM;
+ if (pid == 1) /* you may not mess with init */
+ goto out;
if (request == PTRACE_ATTACH) {
if (child == current)
goto out;
@@ -420,7 +420,9 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
case PTRACE_PEEKDATA: {
unsigned long tmp;
+ down(&child->mm->mmap_sem);
ret = read_long(child, addr, &tmp);
+ up(&child->mm->mmap_sem);
if (ret >= 0)
ret = put_user(tmp,(unsigned long *) data);
goto out;
@@ -451,7 +453,9 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
/* when I and D space are separate, this will have to be fixed. */
case PTRACE_POKETEXT: /* write the word at location addr. */
case PTRACE_POKEDATA:
+ down(&child->mm->mmap_sem);
ret = write_long(child,addr,data);
+ up(&child->mm->mmap_sem);
goto out;
case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 490c4db82..c3f34270a 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -38,6 +38,7 @@
#include <asm/system.h>
#include <asm/io.h>
#include <asm/smp.h>
+#include <asm/cobalt.h>
/*
* Machine setup..
@@ -45,7 +46,6 @@
char ignore_irq13 = 0; /* set if exception 16 works */
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
-static char Cx86_step[8]; /* decoded Cyrix step number */
/*
* Bus types ..
@@ -108,6 +108,133 @@ extern unsigned long cpu_hz;
#define RAMDISK_PROMPT_FLAG 0x8000
#define RAMDISK_LOAD_FLAG 0x4000
+#ifdef CONFIG_VISWS
+char visws_board_type = -1;
+char visws_board_rev = -1;
+
+#define PIIX_PM_START 0x0F80
+
+#define SIO_GPIO_START 0x0FC0
+
+#define SIO_PM_START 0x0FC8
+
+#define PMBASE PIIX_PM_START
+#define GPIREG0 (PMBASE+0x30)
+#define GPIREG(x) (GPIREG0+((x)/8))
+#define PIIX_GPI_BD_ID1 18
+#define PIIX_GPI_BD_REG GPIREG(PIIX_GPI_BD_ID1)
+
+#define PIIX_GPI_BD_SHIFT (PIIX_GPI_BD_ID1 % 8)
+
+#define SIO_INDEX 0x2e
+#define SIO_DATA 0x2f
+
+#define SIO_DEV_SEL 0x7
+#define SIO_DEV_ENB 0x30
+#define SIO_DEV_MSB 0x60
+#define SIO_DEV_LSB 0x61
+
+#define SIO_GP_DEV 0x7
+
+#define SIO_GP_BASE SIO_GPIO_START
+#define SIO_GP_MSB (SIO_GP_BASE>>8)
+#define SIO_GP_LSB (SIO_GP_BASE&0xff)
+
+#define SIO_GP_DATA1 (SIO_GP_BASE+0)
+
+#define SIO_PM_DEV 0x8
+
+#define SIO_PM_BASE SIO_PM_START
+#define SIO_PM_MSB (SIO_PM_BASE>>8)
+#define SIO_PM_LSB (SIO_PM_BASE&0xff)
+#define SIO_PM_INDEX (SIO_PM_BASE+0)
+#define SIO_PM_DATA (SIO_PM_BASE+1)
+
+#define SIO_PM_FER2 0x1
+
+#define SIO_PM_GP_EN 0x80
+
+static void
+visws_get_board_type_and_rev(void)
+{
+ int raw;
+
+ visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
+ >> PIIX_GPI_BD_SHIFT;
+/*
+ * Get Board rev.
+ * First, we have to initialize the 307 part to allow us access
+ * to the GPIO registers. Let's map them at 0x0fc0 which is right
+ * after the PIIX4 PM section.
+ */
+ outb_p(SIO_DEV_SEL, SIO_INDEX);
+ outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
+
+ outb_p(SIO_DEV_MSB, SIO_INDEX);
+ outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
+
+ outb_p(SIO_DEV_LSB, SIO_INDEX);
+ outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
+
+ outb_p(SIO_DEV_ENB, SIO_INDEX);
+ outb_p(1, SIO_DATA); /* Enable GPIO registers. */
+
+/*
+ * Now, we have to map the power management section to write
+ * a bit which enables access to the GPIO registers.
+ * What lunatic came up with this shit?
+ */
+ outb_p(SIO_DEV_SEL, SIO_INDEX);
+ outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
+
+ outb_p(SIO_DEV_MSB, SIO_INDEX);
+ outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
+
+ outb_p(SIO_DEV_LSB, SIO_INDEX);
+ outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
+
+ outb_p(SIO_DEV_ENB, SIO_INDEX);
+ outb_p(1, SIO_DATA); /* Enable PM registers. */
+
+/*
+ * Now, write the PM register which enables the GPIO registers.
+ */
+ outb_p(SIO_PM_FER2, SIO_PM_INDEX);
+ outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
+
+/*
+ * Now, initialize the GPIO registers.
+ * We want them all to be inputs which is the
+ * power on default, so let's leave them alone.
+ * So, let's just read the board rev!
+ */
+ raw = inb_p(SIO_GP_DATA1);
+ raw &= 0x7f; /* 7 bits of valid board revision ID. */
+
+ if (visws_board_type == VISWS_320) {
+ if (raw < 0x6) {
+ visws_board_rev = 4;
+ } else if (raw < 0xc) {
+ visws_board_rev = 5;
+ } else {
+ visws_board_rev = 6;
+
+ }
+ } else if (visws_board_type == VISWS_540) {
+ visws_board_rev = 2;
+ } else {
+ visws_board_rev = raw;
+ }
+
+ printk("Silicon Graphics %s (rev %d)\n",
+ visws_board_type == VISWS_320 ? "320" :
+ (visws_board_type == VISWS_540 ? "540" :
+ "unknown"),
+ visws_board_rev);
+ }
+#endif
+
+
static char command_line[COMMAND_LINE_SIZE] = { 0, };
char saved_command_line[COMMAND_LINE_SIZE];
@@ -123,6 +250,10 @@ __initfunc(void setup_arch(char **cmdline_p,
return;
smptrap=1;
+#ifdef CONFIG_VISWS
+ visws_get_board_type_and_rev();
+#endif
+
ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
drive_info = DRIVE_INFO;
screen_info = SCREEN_INFO;
@@ -146,12 +277,6 @@ __initfunc(void setup_arch(char **cmdline_p,
}
#endif
-#define VMALLOC_RESERVE (64 << 20) /* 64MB for vmalloc */
-#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
-
- if (memory_end > MAXMEM)
- memory_end = MAXMEM;
-
memory_end &= PAGE_MASK;
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
@@ -201,10 +326,28 @@ __initfunc(void setup_arch(char **cmdline_p,
}
*to = '\0';
*cmdline_p = command_line;
+
+#define VMALLOC_RESERVE (64 << 20) /* 64MB for vmalloc */
+#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
+
+ if (memory_end > MAXMEM)
+ {
+ memory_end = MAXMEM;
+ printk(KERN_WARNING "Warning only %ldMB will be used.\n",
+ MAXMEM>>20);
+ }
+
memory_end += PAGE_OFFSET;
*memory_start_p = memory_start;
*memory_end_p = memory_end;
+#ifdef __SMP__
+ /*
+ * Save possible boot-time SMP configuration:
+ */
+ init_smp_config();
+#endif
+
#ifdef CONFIG_BLK_DEV_INITRD
if (LOADER_TYPE) {
initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
@@ -232,6 +375,10 @@ __initfunc(void setup_arch(char **cmdline_p,
conswitchp = &dummy_con;
#endif
#endif
+ /*
+ * Check the bugs that will bite us before we get booting
+ */
+
}
__initfunc(static int amd_model(struct cpuinfo_x86 *c))
@@ -245,6 +392,7 @@ __initfunc(static int amd_model(struct cpuinfo_x86 *c))
cpuid(0x80000000, &n, &dummy, &dummy, &dummy);
if (n < 4)
return 0;
+ cpuid(0x80000001, &dummy, &dummy, &dummy, &(c->x86_capability));
v = (unsigned int *) c->x86_model_id;
cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
@@ -254,9 +402,9 @@ __initfunc(static int amd_model(struct cpuinfo_x86 *c))
}
/*
- * Use the Cyrix DEVID CPU registers if avail. to get more detailed info.
+ * Read Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
*/
-__initfunc(static void do_cyrix_devid(struct cpuinfo_x86 *c))
+static inline void do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
{
unsigned char ccr2, ccr3;
@@ -272,22 +420,28 @@ __initfunc(static void do_cyrix_devid(struct cpuinfo_x86 *c))
getCx86(0xc0); /* dummy */
if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */
- c->x86_model = 0xfd;
+ *dir0 = 0xfd;
else { /* Cx486S A step */
setCx86(CX86_CCR2, ccr2);
- c->x86_model = 0xfe;
+ *dir0 = 0xfe;
}
}
else {
setCx86(CX86_CCR3, ccr3); /* restore CCR3 */
/* read DIR0 and DIR1 CPU registers */
- c->x86_model = getCx86(CX86_DIR0);
- c->x86_mask = getCx86(CX86_DIR1);
+ *dir0 = getCx86(CX86_DIR0);
+ *dir1 = getCx86(CX86_DIR1);
}
sti();
}
+/*
+ * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in
+ * order to identify the Cyrix CPU model after we're out of setup.c
+ */
+unsigned char Cx86_dir0_msb __initdata = 0;
+
static char Cx86_model[][9] __initdata = {
"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
"M II ", "Unknown"
@@ -305,27 +459,28 @@ static char Cx486D_name[][4] __initdata = {
static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock";
static char cyrix_model_mult1[] __initdata = "12??43";
static char cyrix_model_mult2[] __initdata = "12233445";
-static char cyrix_model_oldstep[] __initdata = "A step";
__initfunc(static void cyrix_model(struct cpuinfo_x86 *c))
{
- unsigned char dir0_msn, dir0_lsn, dir1;
+ unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
char *buf = c->x86_model_id;
const char *p = NULL;
- do_cyrix_devid(c);
+ do_cyrix_devid(&dir0, &dir1);
- dir0_msn = c->x86_model >> 4;
- dir0_lsn = c->x86_model & 0xf;
- dir1 = c->x86_mask;
+ Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */
+ dir0_lsn = dir0 & 0xf; /* model or clock multiplier */
- /* common case stepping number -- exceptions handled below */
- sprintf(Cx86_step, "%d.%d", (dir1 >> 4) + 1, dir1 & 0x0f);
+ /* common case step number/rev -- exceptions handled below */
+ c->x86_model = (dir1 >> 4) + 1;
+ c->x86_mask = dir1 & 0xf;
/* Now cook; the original recipe is by Channing Corn, from Cyrix.
* We do the same thing for each generation: we work out
- * the model, multiplier and stepping.
+ * the model, multiplier and stepping. Black magic included,
+ * to make the silicon step/rev numbers match the printed ones.
*/
+
switch (dir0_msn) {
unsigned char tmp;
@@ -349,37 +504,41 @@ __initfunc(static void cyrix_model(struct cpuinfo_x86 *c))
if (dir1 > 0x21) { /* 686L */
Cx86_cb[0] = 'L';
p = Cx86_cb;
- Cx86_step[0]++;
+ (c->x86_model)++;
} else /* 686 */
p = Cx86_cb+1;
break;
case 4: /* MediaGX/GXm */
+ /*
+ * Life sometimes gets weiiiiiiiird if we use this
+ * on the MediaGX. So we turn it off for now.
+ */
+
/* GXm supports extended cpuid levels 'ala' AMD */
if (c->cpuid_level == 2) {
amd_model(c); /* get CPU marketing name */
+ c->x86_capability&=~X86_FEATURE_TSC;
return;
}
else { /* MediaGX */
Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
p = Cx86_cb+2;
- Cx86_step[0] = (dir1 & 0x20) ? '1' : '2';
+ c->x86_model = (dir1 & 0x20) ? 1 : 2;
+ c->x86_capability&=~X86_FEATURE_TSC;
}
break;
case 5: /* 6x86MX/M II */
- /* the TSC is broken (for now) */
- c->x86_capability &= ~16;
-
if (dir1 > 7) dir0_msn++; /* M II */
tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
p = Cx86_cb+tmp;
if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
- Cx86_step[0]++;
+ (c->x86_model)++;
break;
- case 0xf: /* Cyrix 486 without DIR registers */
+ case 0xf: /* Cyrix 486 without DEVID registers */
switch (dir0_lsn) {
case 0xd: /* either a 486SLC or DLC w/o DEVID */
dir0_msn = 0;
@@ -389,11 +548,13 @@ __initfunc(static void cyrix_model(struct cpuinfo_x86 *c))
case 0xe: /* a 486S A step */
dir0_msn = 0;
p = Cx486S_name[0];
- strcpy(Cx86_step, cyrix_model_oldstep);
- c->x86_mask = 1; /* must != 0 to print */
break;
break;
}
+
+ default: /* unknown (shouldn't happen, we know everyone ;-) */
+ dir0_msn = 7;
+ break;
}
strcpy(buf, Cx86_model[dir0_msn & 7]);
if (p) strcat(buf, p);
@@ -445,15 +606,15 @@ static struct cpu_model_info cpu_models[] __initdata = {
"486 DX/4", "486 DX/4-WB", NULL, NULL, NULL, NULL, "Am5x86-WT",
"Am5x86-WB" }},
{ X86_VENDOR_AMD, 5,
- { "K5/SSA5 (PR75, PR90, PR100)", "K5 (PR120, PR133)",
- "K5 (PR166)", "K5 (PR200)", NULL, NULL,
- "K6 (PR166 - PR266)", "K6 (PR166 - PR300)", "K6-2 (PR233 - PR333)",
- "K6-3 (PR300 - PR450)", NULL, NULL, NULL, NULL, NULL, NULL }},
+ { "K5/SSA5", "K5",
+ "K5", "K5", NULL, NULL,
+ "K6", "K6", "K6-2",
+ "K6-3", NULL, NULL, NULL, NULL, NULL, NULL }},
{ X86_VENDOR_UMC, 4,
{ NULL, "U5D", "U5S", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL }},
{ X86_VENDOR_CENTAUR, 5,
- { NULL, NULL, NULL, NULL, "C6", NULL, NULL, NULL, NULL, NULL, NULL,
+ { NULL, NULL, NULL, NULL, "C6", NULL, NULL, NULL, "C6-2", NULL, NULL,
NULL, NULL, NULL, NULL, NULL }},
{ X86_VENDOR_NEXGEN, 5,
{ "Nx586", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -479,6 +640,9 @@ __initfunc(void identify_cpu(struct cpuinfo_x86 *c))
return;
}
+ if (c->x86_vendor == X86_VENDOR_AMD && amd_model(c))
+ return;
+
for (i = 0; i < sizeof(cpu_models)/sizeof(struct cpu_model_info); i++) {
if (c->cpuid_level > 1) {
/* supports eax=2 call */
@@ -543,15 +707,39 @@ __initfunc(void identify_cpu(struct cpuinfo_x86 *c))
return;
}
- if (c->x86_vendor == X86_VENDOR_AMD && amd_model(c))
- return;
-
sprintf(c->x86_model_id, "%02x/%02x", c->x86_vendor, c->x86_model);
}
+/*
+ * Perform early boot up checks for a valid TSC. See arch/i386/kernel/time.c
+ */
+
+__initfunc(void dodgy_tsc(void))
+{
+ get_cpu_vendor(&boot_cpu_data);
+
+ if(boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)
+ {
+ return;
+ }
+ cyrix_model(&boot_cpu_data);
+}
+
+
+#define rdmsr(msr,val1,val2) \
+ __asm__ __volatile__("rdmsr" \
+ : "=a" (val1), "=d" (val2) \
+ : "c" (msr))
+
+#define wrmsr(msr,val1,val2) \
+ __asm__ __volatile__("wrmsr" \
+ : /* no outputs */ \
+ : "c" (msr), "a" (val1), "d" (val2))
+
static char *cpu_vendor_names[] __initdata = {
"Intel", "Cyrix", "AMD", "UMC", "NexGen", "Centaur" };
+
__initfunc(void print_cpu_info(struct cpuinfo_x86 *c))
{
char *vendor = NULL;
@@ -569,11 +757,19 @@ __initfunc(void print_cpu_info(struct cpuinfo_x86 *c))
else
printk("%s", c->x86_model_id);
- if (c->x86_mask) {
- if (c->x86_vendor == X86_VENDOR_CYRIX)
- printk(" stepping %s", Cx86_step);
- else
- printk(" stepping %02x", c->x86_mask);
+ if (c->x86_mask || c->cpuid_level>=0)
+ printk(" stepping %02x", c->x86_mask);
+
+ if(c->x86_vendor == X86_VENDOR_CENTAUR)
+ {
+ u32 hv,lv;
+ rdmsr(0x107, lv, hv);
+ printk("\nCentaur FSR was 0x%X ",lv);
+ lv|=(1<<8);
+ lv|=(1<<7);
+ /* lv|=(1<<6); - may help too if the board can cope */
+ printk("now 0x%X", lv);
+ wrmsr(0x107, lv, hv);
}
printk("\n");
}
@@ -597,7 +793,7 @@ int get_cpuinfo(char * buffer)
for(n=0; n<NR_CPUS; n++, c++) {
#ifdef __SMP__
- if (!(cpu_present_map & (1<<n)))
+ if (!(cpu_online_map & (1<<n)))
continue;
#endif
p += sprintf(p,"processor\t: %d\n"
@@ -611,12 +807,9 @@ int get_cpuinfo(char * buffer)
c->x86_model,
c->x86_model_id[0] ? c->x86_model_id : "unknown");
- if (c->x86_mask) {
- if (c->x86_vendor == X86_VENDOR_CYRIX)
- p += sprintf(p, "stepping\t: %s\n", Cx86_step);
- else
- p += sprintf(p, "stepping\t: %d\n", c->x86_mask);
- } else
+ if (c->x86_mask)
+ p += sprintf(p, "stepping\t: %d\n", c->x86_mask);
+ else
p += sprintf(p, "stepping\t: unknown\n");
if (c->x86_capability & X86_FEATURE_TSC) {
@@ -629,21 +822,21 @@ int get_cpuinfo(char * buffer)
p += sprintf(p, "cache size\t: %d KB\n", c->x86_cache_size);
/* Modify the capabilities according to chip type */
- if (c->x86_mask) {
- if (c->x86_vendor == X86_VENDOR_CYRIX) {
- x86_cap_flags[24] = "cxmmx";
- } else if (c->x86_vendor == X86_VENDOR_AMD) {
- x86_cap_flags[16] = "fcmov";
- x86_cap_flags[31] = "amd3d";
- } else if (c->x86_vendor == X86_VENDOR_INTEL) {
- x86_cap_flags[6] = "pae";
- x86_cap_flags[9] = "apic";
- x86_cap_flags[12] = "mtrr";
- x86_cap_flags[14] = "mca";
- x86_cap_flags[16] = "pat";
- x86_cap_flags[17] = "pse36";
- x86_cap_flags[24] = "osfxsr";
- }
+ if (c->x86_vendor == X86_VENDOR_CYRIX) {
+ x86_cap_flags[24] = "cxmmx";
+ } else if (c->x86_vendor == X86_VENDOR_AMD) {
+ x86_cap_flags[16] = "fcmov";
+ x86_cap_flags[31] = "3dnow";
+ if (c->x86 == 5 && c->x86_model == 6)
+ x86_cap_flags[10] = "sep";
+ } else if (c->x86_vendor == X86_VENDOR_INTEL) {
+ x86_cap_flags[6] = "pae";
+ x86_cap_flags[9] = "apic";
+ x86_cap_flags[12] = "mtrr";
+ x86_cap_flags[14] = "mca";
+ x86_cap_flags[16] = "pat";
+ x86_cap_flags[17] = "pse36";
+ x86_cap_flags[24] = "osfxsr";
}
sep_bug = c->x86_vendor == X86_VENDOR_INTEL &&
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index a5f1f2de0..2960d521c 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -3,12 +3,14 @@
* hosts.
*
* (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
+ * (c) 1998 Ingo Molnar
+ *
* Supported by Caldera http://www.caldera.com.
* Much of the core SMP work is based on previous work by Thomas Radke, to
* whom a great many thanks are extended.
*
- * Thanks to Intel for making available several different Pentium and
- * Pentium Pro MP machines.
+ * Thanks to Intel for making available several different Pentium,
+ * Pentium Pro and Pentium-II/Xeon MP machines.
*
* This code is released under the GNU public license version 2 or
* later.
@@ -26,34 +28,17 @@
* Ingo Molnar : Added APIC timers, based on code
* from Jose Renau
* Alan Cox : Added EBDA scanning
+ * Ingo Molnar : various cleanups and rewrites
*/
#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/timer.h>
-#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/kernel_stat.h>
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
-#include <asm/i82489.h>
-#include <linux/smp.h>
#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
#include <linux/init.h>
-#include <asm/pgtable.h>
-#include <asm/bitops.h>
-#include <asm/pgtable.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-
-#ifdef CONFIG_MTRR
-# include <asm/mtrr.h>
-#endif
-
-#define __KERNEL_SYSCALLS__
-#include <linux/unistd.h>
+#include <asm/mtrr.h>
#include "irq.h"
@@ -104,31 +89,24 @@ extern void update_one_process( struct task_struct *p,
spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
/*
- * Why isn't this somewhere standard ??
- *
- * Maybe because this procedure is horribly buggy, and does
- * not deserve to live. Think about signedness issues for five
- * seconds to see why. - Linus
+ * function prototypes:
*/
-
-extern __inline int max(int a,int b)
-{
- if (a>b)
- return a;
- return b;
-}
+static void cache_APIC_registers (void);
+static void stop_this_cpu (void);
static int smp_b_stepping = 0; /* Set if we find a B stepping CPU */
static int max_cpus = -1; /* Setup configured maximum number of CPUs to activate */
int smp_found_config=0; /* Have we found an SMP box */
-unsigned long cpu_present_map = 0; /* Bitmask of existing CPUs */
+unsigned long cpu_present_map = 0; /* Bitmask of physically existing CPUs */
+unsigned long cpu_online_map = 0; /* Bitmask of currently online CPUs */
int smp_num_cpus = 1; /* Total count of live CPUs */
int smp_threads_ready=0; /* Set when the idlers are all forked */
volatile int cpu_number_map[NR_CPUS]; /* which CPU maps to which logical number */
volatile int __cpu_logical_map[NR_CPUS]; /* which logical number maps to which CPU */
-volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
+static volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
+static volatile unsigned long cpu_callout_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */
volatile unsigned long smp_invalidate_needed; /* Used for the invalidate map that's also checked in the spinlock */
volatile unsigned long kstack_ptr; /* Stack vector for booting CPUs */
struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per CPU bogomips and other parameters */
@@ -137,19 +115,13 @@ unsigned long mp_ioapic_addr = 0xFEC00000; /* Address of the I/O apic (not yet
unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */
static int smp_activated = 0; /* Tripped once we need to start cross invalidating */
int apic_version[NR_CPUS]; /* APIC version number */
-static volatile int smp_commenced=0; /* Tripped when we start scheduling */
unsigned long apic_retval; /* Just debugging the assembler.. */
-static volatile unsigned char smp_cpu_in_msg[NR_CPUS]; /* True if this processor is sending an IPI */
-
volatile unsigned long kernel_counter=0; /* Number of times the processor holds the lock */
volatile unsigned long syscall_count=0; /* Number of times the processor holds the syscall lock */
volatile unsigned long ipi_count; /* Number of IPIs delivered */
-volatile unsigned long smp_proc_in_lock[NR_CPUS] = {0,};/* for computing process time */
-volatile int smp_process_available=0;
-
const char lk_lockmsg[] = "lock from interrupt context at %p\n";
int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, };
@@ -159,6 +131,7 @@ extern int mpc_default_type;
int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, };
int mp_current_pci_id = 0;
unsigned long mp_lapic_addr = 0;
+int skip_ioapic_setup = 0; /* 1 if "noapic" boot option passed */
/* #define SMP_DEBUG */
@@ -169,6 +142,11 @@ unsigned long mp_lapic_addr = 0;
#endif
/*
+ * IA s/w dev Vol 3, Section 7.4
+ */
+#define APIC_DEFAULT_PHYS_BASE 0xfee00000
+
+/*
* Setup routine for controlling SMP activation
*
* Command-line option of "nosmp" or "maxcpus=0" will disable SMP
@@ -199,6 +177,11 @@ void ack_APIC_irq(void)
}
/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+#ifndef CONFIG_X86_VISWS_APIC
+/*
* Checksum an MP configuration block.
*/
@@ -250,7 +233,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4))
{
- printk("Bad signature [%c%c%c%c].\n",
+ panic("SMP mptable: bad signature [%c%c%c%c]!\n",
mpc->mpc_signature[0],
mpc->mpc_signature[1],
mpc->mpc_signature[2],
@@ -259,7 +242,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
}
if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length))
{
- printk("Checksum error.\n");
+ panic("SMP mptable: checksum error!\n");
return 1;
}
if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04)
@@ -405,7 +388,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
}
}
if (ioapics > 1)
+ {
printk("Warning: Multiple IO-APICs not yet supported.\n");
+ printk("Warning: switching to non APIC mode.\n");
+ skip_ioapic_setup=1;
+ }
return num_processors;
}
@@ -413,7 +400,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
* Scan the memory blocks for an SMP configuration block.
*/
-int __init smp_scan_config(unsigned long base, unsigned long length)
+static int __init smp_scan_config(unsigned long base, unsigned long length)
{
unsigned long *bp=phys_to_virt(base);
struct intel_mp_floating *mpf;
@@ -447,7 +434,7 @@ int __init smp_scan_config(unsigned long base, unsigned long length)
unsigned long cfg;
/* local APIC has default address */
- mp_lapic_addr = 0xFEE00000;
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
/*
* We need to know what the local
* APIC id of the boot CPU is!
@@ -564,6 +551,76 @@ int __init smp_scan_config(unsigned long base, unsigned long length)
return 0;
}
+void __init init_intel_smp (void)
+{
+ /*
+ * FIXME: Linux assumes you have 640K of base ram..
+ * this continues the error...
+ *
+ * 1) Scan the bottom 1K for a signature
+ * 2) Scan the top 1K of base RAM
+ * 3) Scan the 64K of bios
+ */
+ if (!smp_scan_config(0x0,0x400) &&
+ !smp_scan_config(639*0x400,0x400) &&
+ !smp_scan_config(0xF0000,0x10000)) {
+ /*
+ * If it is an SMP machine we should know now, unless the
+ * configuration is in an EISA/MCA bus machine with an
+ * extended bios data area.
+ *
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E, calculate and scan it here.
+ *
+ * NOTE! There are Linux loaders that will corrupt the EBDA
+ * area, and as such this kind of SMP config may be less
+ * trustworthy, simply because the SMP table may have been
+ * stomped on during early boot. These loaders are buggy and
+ * should be fixed.
+ */
+ unsigned int address;
+
+ address = *(unsigned short *)phys_to_virt(0x40E);
+ address<<=4;
+ smp_scan_config(address, 0x1000);
+ if (smp_found_config)
+ printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.rutgers.edu if you experience SMP problems!\n");
+ }
+}
+
+#else
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesnt have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+void __init init_visws_smp(void)
+{
+ smp_found_config = 1;
+
+ cpu_present_map |= 2; /* or in id 1 */
+ apic_version[1] |= 0x10; /* integrated APIC */
+ apic_version[0] |= 0x10;
+
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+}
+
+#endif
+
+/*
+ * - Intel MP Configuration Table
+ * - or SGI Visual Workstation configuration
+ */
+void __init init_smp_config (void)
+{
+#ifndef CONFIG_VISWS
+ init_intel_smp();
+#else
+ init_visws_smp();
+#endif
+}
+
/*
* Trampoline 80x86 program as an array.
*/
@@ -630,13 +687,17 @@ void __init smp_store_cpu_info(int id)
* we use to track CPUs as they power up.
*/
+static atomic_t smp_commenced = ATOMIC_INIT(0);
+
void __init smp_commence(void)
{
/*
* Lets the callins below out of their loop.
*/
SMP_PRINTK(("Setting commenced=1, go go go\n"));
- smp_commenced=1;
+
+ wmb();
+ atomic_set(&smp_commenced,1);
}
void __init enable_local_APIC(void)
@@ -653,6 +714,26 @@ void __init enable_local_APIC(void)
value &= ~APIC_TPRI_MASK; /* Set Task Priority to 'accept all' */
apic_write(APIC_TASKPRI,value);
+ /*
+ * Set arbitrarion priority to 0
+ */
+ value = apic_read(APIC_ARBPRI);
+ value &= ~APIC_ARBPRI_MASK;
+ apic_write(APIC_ARBPRI, value);
+
+ /*
+ * Set the logical destination ID to 'all', just to be safe.
+ * also, put the APIC into flat delivery mode.
+ */
+ value = apic_read(APIC_LDR);
+ value &= ~APIC_LDR_MASK;
+ value |= SET_APIC_LOGICAL_ID(0xff);
+ apic_write(APIC_LDR,value);
+
+ value = apic_read(APIC_DFR);
+ value |= SET_APIC_DFR(0xf);
+ apic_write(APIC_DFR, value);
+
udelay(100); /* B safe */
ack_APIC_irq();
udelay(100);
@@ -660,12 +741,11 @@ void __init enable_local_APIC(void)
unsigned long __init init_smp_mappings(unsigned long memory_start)
{
- unsigned long apic_phys, ioapic_phys;
+ unsigned long apic_phys;
memory_start = PAGE_ALIGN(memory_start);
if (smp_found_config) {
apic_phys = mp_lapic_addr;
- ioapic_phys = mp_ioapic_addr;
} else {
/*
* set up a fake all zeroes page to simulate the
@@ -674,30 +754,81 @@ unsigned long __init init_smp_mappings(unsigned long memory_start)
* this way if some buggy code writes to this page ...
*/
apic_phys = __pa(memory_start);
- ioapic_phys = __pa(memory_start+PAGE_SIZE);
- memset((void *)memory_start, 0, 2*PAGE_SIZE);
- memory_start += 2*PAGE_SIZE;
+ memset((void *)memory_start, 0, PAGE_SIZE);
+ memory_start += PAGE_SIZE;
}
-
set_fixmap(FIX_APIC_BASE,apic_phys);
- set_fixmap(FIX_IO_APIC_BASE,ioapic_phys);
-
printk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
- printk("mapped IOAPIC to %08lx (%08lx)\n", fix_to_virt(FIX_IO_APIC_BASE), ioapic_phys);
+
+#ifdef CONFIG_X86_IO_APIC
+ {
+ unsigned long ioapic_phys;
+
+ if (smp_found_config) {
+ ioapic_phys = mp_ioapic_addr;
+ } else {
+ ioapic_phys = __pa(memory_start);
+ memset((void *)memory_start, 0, PAGE_SIZE);
+ memory_start += PAGE_SIZE;
+ }
+ set_fixmap(FIX_IO_APIC_BASE,ioapic_phys);
+ printk("mapped IOAPIC to %08lx (%08lx)\n",
+ fix_to_virt(FIX_IO_APIC_BASE), ioapic_phys);
+ }
+#endif
return memory_start;
}
+extern void calibrate_delay(void);
+
void __init smp_callin(void)
{
- extern void calibrate_delay(void);
- int cpuid=GET_APIC_ID(apic_read(APIC_ID));
+ int cpuid;
+ unsigned long timeout;
+
+ /*
+ * (This works even if the APIC is not enabled.)
+ */
+ cpuid = GET_APIC_ID(apic_read(APIC_ID));
+
+ SMP_PRINTK(("CPU#%d waiting for CALLOUT\n", cpuid));
+
+ /*
+ * STARTUP IPIs are fragile beasts as they might sometimes
+ * trigger some glue motherboard logic. Complete APIC bus
+ * silence for 1 second, this overestimates the time the
+ * boot CPU is spending to send the up to 2 STARTUP IPIs
+ * by a factor of two. This should be enough.
+ */
+
+ /*
+ * Waiting 2s total for startup (udelay is not yet working)
+ */
+ timeout = jiffies + 2*HZ;
+ while (time_before(jiffies,timeout))
+ {
+ /*
+ * Has the boot CPU finished it's STARTUP sequence?
+ */
+ if (test_bit(cpuid, (unsigned long *)&cpu_callout_map[0]))
+ break;
+ }
+
+ while (!time_before(jiffies,timeout)) {
+ printk("BUG: CPU%d started up but did not get a callout!\n",
+ cpuid);
+ stop_this_cpu();
+ }
/*
- * Activate our APIC
+ * the boot CPU has finished the init stage and is spinning
+ * on callin_map until we finish. We are free to set up this
+ * CPU, first the APIC. (this is probably redundant on most
+ * boards)
*/
- SMP_PRINTK(("CALLIN %d %d\n",hard_smp_processor_id(), smp_processor_id()));
+ SMP_PRINTK(("CALLIN, before enable_local_APIC().\n"));
enable_local_APIC();
/*
@@ -705,7 +836,12 @@ void __init smp_callin(void)
*/
setup_APIC_clock();
- sti();
+ __sti();
+
+#ifdef CONFIG_MTRR
+ /* Must be done before calibration delay is computed */
+ mtrr_init_secondary_cpu ();
+#endif
/*
* Get our bogomips.
*/
@@ -732,13 +868,14 @@ extern int cpu_idle(void * unused);
*/
int __init start_secondary(void *unused)
{
-#ifdef CONFIG_MTRR
- /* Must be done before calibration delay is computed */
- mtrr_init_secondary_cpu ();
-#endif
+ /*
+ * Dont put anything before smp_callin(), SMP
+ * booting is too fragile that we want to limit the
+ * things done here to the most necessary things.
+ */
smp_callin();
- while (!smp_commenced)
- barrier();
+ while (!atomic_read(&smp_commenced))
+ /* nothing */ ;
return cpu_idle(NULL);
}
@@ -761,11 +898,7 @@ void __init initialize_secondary(void)
/*
* We don't actually need to load the full TSS,
* basically just the stack pointer and the eip.
- *
- * Get the scheduler lock, because we're going
- * to release it as part of the "reschedule" return.
*/
- spin_lock(&scheduler_lock);
asm volatile(
"movl %0,%%esp\n\t"
@@ -859,8 +992,7 @@ static void __init do_boot_cpu(int i)
apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
cfg=apic_read(APIC_ICR);
cfg&=~0xCDFFF; /* Clear bits */
- cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG
- | APIC_DEST_ASSERT | APIC_DEST_DM_INIT);
+ cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_ASSERT | APIC_DEST_DM_INIT);
apic_write(APIC_ICR, cfg); /* Send IPI */
udelay(200);
@@ -871,8 +1003,7 @@ static void __init do_boot_cpu(int i)
apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
cfg=apic_read(APIC_ICR);
cfg&=~0xCDFFF; /* Clear bits */
- cfg |= (APIC_DEST_FIELD | APIC_DEST_LEVELTRIG
- | APIC_DEST_DM_INIT);
+ cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_DM_INIT);
apic_write(APIC_ICR, cfg); /* Send IPI */
/*
@@ -908,19 +1039,23 @@ static void __init do_boot_cpu(int i)
apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */
cfg=apic_read(APIC_ICR);
cfg&=~0xCDFFF; /* Clear bits */
- cfg |= (APIC_DEST_FIELD
- | APIC_DEST_DM_STARTUP
- | (start_eip >> 12)); /* Boot on the stack */
+ cfg |= (APIC_DEST_DM_STARTUP | (start_eip >> 12)); /* Boot on the stack */
SMP_PRINTK(("Before start apic_write.\n"));
apic_write(APIC_ICR, cfg); /* Kick the second */
SMP_PRINTK(("Startup point 1.\n"));
+
timeout = 0;
+ SMP_PRINTK(("Waiting for send to finish...\n"));
do {
- SMP_PRINTK(("Sleeping.\n")); mdelay(1000);
- udelay(10);
- } while ( (send_status = (apic_read(APIC_ICR) & 0x1000))
- && (timeout++ < 1000));
+ SMP_PRINTK(("+"));
+ udelay(100);
+ send_status = apic_read(APIC_ICR) & 0x1000;
+ } while (send_status && (timeout++ < 1000));
+
+ /*
+ * Give the other CPU some time to accept the IPI.
+ */
udelay(200);
accept_status = (apic_read(APIC_ESR) & 0xEF);
}
@@ -933,6 +1068,13 @@ static void __init do_boot_cpu(int i)
if ( !(send_status || accept_status) )
{
+ /*
+ * allow APs to start initializing.
+ */
+ SMP_PRINTK(("Before Callout %d.\n", i));
+ set_bit(i, (unsigned long *)&cpu_callout_map[0]);
+ SMP_PRINTK(("After Callout %d.\n", i));
+
for(timeout=0;timeout<50000;timeout++)
{
if (cpu_callin_map[0]&(1<<i))
@@ -973,6 +1115,43 @@ static void __init do_boot_cpu(int i)
*((volatile unsigned long *)phys_to_virt(8192)) = 0;
}
+cycles_t cacheflush_time;
+extern unsigned long cpu_hz;
+
+static void smp_tune_scheduling (void)
+{
+ unsigned long cachesize;
+ /*
+ * Rough estimation for SMP scheduling, this is the number of
+ * cycles it takes for a fully memory-limited process to flush
+ * the SMP-local cache.
+ *
+ * (For a P5 this pretty much means we will choose another idle
+ * CPU almost always at wakeup time (this is due to the small
+ * L1 cache), on PIIs it's around 50-100 usecs, depending on
+ * the cache size)
+ */
+
+ if (!cpu_hz) {
+ /*
+ * this basically disables processor-affinity
+ * scheduling on SMP without a TSC.
+ */
+ cacheflush_time = 0;
+ return;
+ } else {
+ cachesize = boot_cpu_data.x86_cache_size;
+ if (cachesize == -1)
+ cachesize = 8; /* Pentiums */
+
+ cacheflush_time = cpu_hz/1024*cachesize/5000;
+ }
+
+ printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+ (long)cacheflush_time/(cpu_hz/1000000),
+ ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100);
+}
+
unsigned int prof_multiplier[NR_CPUS];
unsigned int prof_counter[NR_CPUS];
@@ -983,7 +1162,6 @@ unsigned int prof_counter[NR_CPUS];
void __init smp_boot_cpus(void)
{
int i;
- unsigned long cfg;
#ifdef CONFIG_MTRR
/* Must be done before other processors booted */
@@ -1005,21 +1183,31 @@ void __init smp_boot_cpus(void)
*/
smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */
+ smp_tune_scheduling();
printk("CPU%d: ", boot_cpu_id);
print_cpu_info(&cpu_data[boot_cpu_id]);
+ /*
+ * not necessary because the MP table should list the boot
+ * CPU too, but we do it for the sake of robustness anyway.
+ * (and for the case when a non-SMP board boots an SMP kernel)
+ */
cpu_present_map |= (1 << hard_smp_processor_id());
+
cpu_number_map[boot_cpu_id] = 0;
/*
- * If we don't conform to the Intel MPS standard, get out
- * of here now!
+ * If we couldnt find an SMP configuration at boot time,
+ * get out of here now!
*/
if (!smp_found_config)
{
printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n");
+#ifndef CONFIG_VISWS
io_apic_irqs = 0;
+#endif
+ cpu_online_map = cpu_present_map;
goto smp_done;
}
@@ -1082,6 +1270,14 @@ void __init smp_boot_cpus(void)
* Now scan the CPU present map and fire up the other CPUs.
*/
+ /*
+ * Add all detected CPUs. (later on we can down individual
+ * CPUs which will change cpu_online_map but not necessarily
+ * cpu_present_map. We are pretty much ready for hot-swap CPUs.)
+ */
+ cpu_online_map = cpu_present_map;
+ mb();
+
SMP_PRINTK(("CPU map: %lx\n", cpu_present_map));
for(i=0;i<NR_CPUS;i++)
@@ -1092,7 +1288,7 @@ void __init smp_boot_cpus(void)
if (i == boot_cpu_id)
continue;
- if ((cpu_present_map & (1 << i))
+ if ((cpu_online_map & (1 << i))
&& (max_cpus < 0 || max_cpus > cpucount+1))
{
do_boot_cpu(i);
@@ -1102,9 +1298,9 @@ void __init smp_boot_cpus(void)
* Make sure we unmap all failed CPUs
*/
- if (cpu_number_map[i] == -1 && (cpu_present_map & (1 << i))) {
- printk("CPU #%d not responding. Removing from cpu_present_map.\n",i);
- cpu_present_map &= ~(1 << i);
+ if (cpu_number_map[i] == -1 && (cpu_online_map & (1 << i))) {
+ printk("CPU #%d not responding. Removing from cpu_online_map.\n",i);
+ cpu_online_map &= ~(1 << i);
}
}
@@ -1112,29 +1308,34 @@ void __init smp_boot_cpus(void)
* Cleanup possible dangling ends...
*/
- /*
- * Install writable page 0 entry.
- */
-
- cfg = pg0[0];
- pg0[0] = 3; /* writeable, present, addr 0 */
- local_flush_tlb();
+#ifndef CONFIG_VISWS
+ {
+ unsigned long cfg;
- /*
- * Paranoid: Set warm reset code and vector here back
- * to default values.
- */
+ /*
+ * Install writable page 0 entry.
+ */
+ cfg = pg0[0];
+ pg0[0] = 3; /* writeable, present, addr 0 */
+ local_flush_tlb();
+
+ /*
+ * Paranoid: Set warm reset code and vector here back
+ * to default values.
+ */
- CMOS_WRITE(0, 0xf);
+ CMOS_WRITE(0, 0xf);
- *((volatile long *) phys_to_virt(0x467)) = 0;
+ *((volatile long *) phys_to_virt(0x467)) = 0;
- /*
- * Restore old page 0 entry.
- */
+ /*
+ * Restore old page 0 entry.
+ */
- pg0[0] = cfg;
- local_flush_tlb();
+ pg0[0] = cfg;
+ local_flush_tlb();
+ }
+#endif
/*
* Allow the user to impress friends.
@@ -1144,14 +1345,14 @@ void __init smp_boot_cpus(void)
if (cpucount==0)
{
printk(KERN_ERR "Error: only one processor found.\n");
- cpu_present_map=(1<<hard_smp_processor_id());
+ cpu_online_map = (1<<hard_smp_processor_id());
}
else
{
unsigned long bogosum=0;
for(i=0;i<32;i++)
{
- if (cpu_present_map&(1<<i))
+ if (cpu_online_map&(1<<i))
bogosum+=cpu_data[i].loops_per_sec;
}
printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
@@ -1166,267 +1367,289 @@ void __init smp_boot_cpus(void)
printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
SMP_PRINTK(("Boot done.\n"));
+ cache_APIC_registers();
+#ifndef CONFIG_VISWS
/*
* Here we can be sure that there is an IO-APIC in the system. Let's
* go and set it up:
*/
- setup_IO_APIC();
+ if (!skip_ioapic_setup)
+ setup_IO_APIC();
+#endif
smp_done:
}
-void send_IPI(int dest, int vector)
-{
- unsigned long cfg;
- unsigned long flags;
-
- __save_flags(flags);
- __cli();
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
- /*
- * prepare target chip field
- */
- cfg = apic_read(APIC_ICR2) & 0x00FFFFFF;
- apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(dest));
+/*
+ * Silly serialization to work around CPU bug in P5s.
+ * We can safely turn it off on a 686.
+ */
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_APIC_SERIALIZATION 0
+#else
+# define FORCE_APIC_SERIALIZATION 1
+#endif
- cfg = apic_read(APIC_ICR);
- cfg &= ~0xFDFFF;
- cfg |= APIC_DEST_FIELD|APIC_DEST_DM_FIXED|vector;
- cfg |= dest;
-
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
-
- apic_write(APIC_ICR, cfg);
- __restore_flags(flags);
-}
+static unsigned int cached_APIC_ICR;
+static unsigned int cached_APIC_ICR2;
/*
- * A non wait message cannot pass data or CPU source info. This current setup
- * is only safe because the kernel lock owner is the only person who can send
- * a message.
- *
- * Wrapping this whole block in a spinlock is not the safe answer either. A
- * processor may get stuck with IRQs off waiting to send a message and thus
- * not replying to the person spinning for a reply.
+ * Caches reserved bits, APIC reads are (mildly) expensive
+ * and force otherwise unnecessary CPU synchronization.
*
- * In the end flush tlb ought to be the NMI and a very short function
- * (to avoid the old IDE disk problems), and other messages sent with IRQs
- * enabled in a civilised fashion. That will also boost performance.
+ * (We could cache other APIC registers too, but these are the
+ * main ones used in RL.)
*/
+#define slow_ICR (apic_read(APIC_ICR) & ~0xFDFFF)
+#define slow_ICR2 (apic_read(APIC_ICR2) & 0x00FFFFFF)
-void smp_message_pass(int target, int msg, unsigned long data, int wait)
+void cache_APIC_registers (void)
{
- unsigned long cfg;
- unsigned long dest = 0;
- unsigned long target_map;
- int p=smp_processor_id();
- int irq;
- int ct=0;
+ cached_APIC_ICR = slow_ICR;
+ cached_APIC_ICR2 = slow_ICR2;
+ mb();
+}
+static inline unsigned int __get_ICR (void)
+{
+#if FORCE_APIC_SERIALIZATION
/*
- * During boot up send no messages
+ * Wait for the APIC to become ready - this should never occur. It's
+ * a debugging check really.
*/
-
- if (!smp_activated || !smp_commenced)
- return;
+ int count = 0;
+ unsigned int cfg;
+ while (count < 1000)
+ {
+ cfg = slow_ICR;
+ if (!(cfg&(1<<12))) {
+ if (count)
+ atomic_add(count, (atomic_t*)&ipi_count);
+ return cfg;
+ }
+ count++;
+ udelay(10);
+ }
+ printk("CPU #%d: previous IPI still not cleared after 10mS\n",
+ smp_processor_id());
+ return cfg;
+#else
+ return cached_APIC_ICR;
+#endif
+}
- /*
- * Skip the reschedule if we are waiting to clear a
- * message at this time. The reschedule cannot wait
- * but is not critical.
- */
+static inline unsigned int __get_ICR2 (void)
+{
+#if FORCE_APIC_SERIALIZATION
+ return slow_ICR2;
+#else
+ return cached_APIC_ICR2;
+#endif
+}
- switch (msg) {
- case MSG_RESCHEDULE:
- irq = 0x30;
- if (smp_cpu_in_msg[p])
- return;
- break;
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+ unsigned int cfg;
- case MSG_INVALIDATE_TLB:
- /* make this a NMI some day */
- irq = 0x31;
- break;
+ cfg = __get_ICR();
+ cfg |= APIC_DEST_DM_FIXED|shortcut|vector;
- case MSG_STOP_CPU:
- irq = 0x40;
- break;
+ return cfg;
+}
- case MSG_MTRR_CHANGE:
- irq = 0x50;
- break;
+static inline int __prepare_ICR2 (unsigned int dest)
+{
+ unsigned int cfg;
- default:
- printk("Unknown SMP message %d\n", msg);
- return;
- }
+ cfg = __get_ICR2();
+ cfg |= SET_APIC_DEST_FIELD(dest);
- /*
- * Sanity check we don't re-enter this across CPUs. Only the kernel
- * lock holder may send messages. For a STOP_CPU we are bringing the
- * entire box to the fastest halt we can. A reschedule carries
- * no data and can occur during a flush. Guess what panic
- * I got to notice this bug.
- */
-
- /*
- * We are busy.
- */
-
- smp_cpu_in_msg[p]++;
+ return cfg;
+}
-/* printk("SMP message pass #%d to %d of %d\n",
- p, msg, target);*/
+static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+ unsigned int cfg;
+/*
+ * Subtle. In the case of the 'never do double writes' workaround we
+ * have to lock out interrupts to be safe. Otherwise it's just one
+ * single atomic write to the APIC, no need for cli/sti.
+ */
+#if FORCE_APIC_SERIALIZATION
+ unsigned long flags;
- /*
- * Wait for the APIC to become ready - this should never occur. It's
- * a debugging check really.
- */
-
- while (ct<1000)
- {
- cfg=apic_read(APIC_ICR);
- if (!(cfg&(1<<12)))
- break;
- ct++;
- udelay(10);
- }
+ __save_flags(flags);
+ __cli();
+#endif
/*
- * Just pray... there is nothing more we can do
+ * No need to touch the target chip field
*/
-
- if (ct==1000)
- printk("CPU #%d: previous IPI still not cleared after 10mS\n", p);
+
+ cfg = __prepare_ICR(shortcut, vector);
/*
- * Set the target requirement
+ * Send the IPI. The write to APIC_ICR fires this off.
*/
-
- if (target==MSG_ALL_BUT_SELF)
- {
- dest=APIC_DEST_ALLBUT;
- target_map=cpu_present_map;
- cpu_callin_map[0]=(1<<p);
- }
- else if (target==MSG_ALL)
- {
- dest=APIC_DEST_ALLINC;
- target_map=cpu_present_map;
- cpu_callin_map[0]=0;
- }
- else
- {
- dest=0;
- target_map=(1<<target);
- cpu_callin_map[0]=0;
- }
+ apic_write(APIC_ICR, cfg);
+#if FORCE_APIC_SERIALIZATION
+ __restore_flags(flags);
+#endif
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+static inline void send_IPI_all(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
+void send_IPI_self(int vector)
+{
+ __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+static inline void send_IPI_single(int dest, int vector)
+{
+ unsigned long cfg;
+#if FORCE_APIC_SERIALIZATION
+ unsigned long flags;
+
+ __save_flags(flags);
+ __cli();
+#endif
/*
- * Program the APIC to deliver the IPI
+ * prepare target chip field
*/
- send_IPI(dest,irq);
+ cfg = __prepare_ICR2(dest);
+ apic_write(APIC_ICR2, cfg);
/*
- * Spin waiting for completion
+ * program the ICR
*/
+ cfg = __prepare_ICR(0, vector);
- switch(wait)
- {
- int stuck;
- case 1:
- stuck = 50000000;
- while(cpu_callin_map[0]!=target_map) {
- --stuck;
- if (!stuck) {
- printk("stuck on target_map IPI wait\n");
- break;
- }
- }
- break;
- case 2:
- stuck = 50000000;
- /* Wait for invalidate map to clear */
- while (smp_invalidate_needed) {
- /* Take care of "crossing" invalidates */
- if (test_bit(p, &smp_invalidate_needed))
- clear_bit(p, &smp_invalidate_needed);
- --stuck;
- if (!stuck) {
- printk("stuck on smp_invalidate_needed IPI wait (CPU#%d)\n",p);
- break;
- }
- }
- break;
- }
-
/*
- * Record our completion
+ * Send the IPI. The write to APIC_ICR fires this off.
*/
-
- smp_cpu_in_msg[p]--;
+ apic_write(APIC_ICR, cfg);
+#if FORCE_APIC_SERIALIZATION
+ __restore_flags(flags);
+#endif
}
/*
- * This is fraught with deadlocks. Linus does a flush tlb at a whim
- * even with IRQs off. We have to avoid a pair of crossing flushes
- * or we are doomed. See the notes about smp_message_pass.
+ * This is fraught with deadlocks. Probably the situation is not that
+ * bad as in the early days of SMP, so we might ease some of the
+ * paranoia here.
*/
void smp_flush_tlb(void)
{
+ int cpu = smp_processor_id();
+ int stuck;
unsigned long flags;
-/* printk("SMI-");*/
-
/*
- * The assignment is safe because it's volatile so the compiler cannot reorder it,
- * because the i586 has strict memory ordering and because only the kernel lock holder
- * may issue a tlb flush. If you break any one of those three change this to an atomic
- * bus locked or.
+ * it's important that we do not generate any APIC traffic
+ * until the AP CPUs have booted up!
*/
+ if (cpu_online_map) {
+ /*
+ * The assignment is safe because it's volatile so the
+ * compiler cannot reorder it, because the i586 has
+ * strict memory ordering and because only the kernel
+ * lock holder may issue a tlb flush. If you break any
+ * one of those three change this to an atomic bus
+ * locked or.
+ */
- smp_invalidate_needed=cpu_present_map;
+ smp_invalidate_needed = cpu_online_map;
- /*
- * Processors spinning on the lock will see this IRQ late. The smp_invalidate_needed map will
- * ensure they don't do a spurious flush tlb or miss one.
- */
+ /*
+ * Processors spinning on some lock with IRQs disabled
+ * will see this IRQ late. The smp_invalidate_needed
+ * map will ensure they don't do a spurious flush tlb
+ * or miss one.
+ */
- __save_flags(flags);
- __cli();
- smp_message_pass(MSG_ALL_BUT_SELF, MSG_INVALIDATE_TLB, 0L, 2);
+ __save_flags(flags);
+ __cli();
+
+ send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
+
+ /*
+ * Spin waiting for completion
+ */
+
+ stuck = 50000000;
+ while (smp_invalidate_needed) {
+ /*
+ * Take care of "crossing" invalidates
+ */
+ if (test_bit(cpu, &smp_invalidate_needed))
+ clear_bit(cpu, &smp_invalidate_needed);
+ --stuck;
+ if (!stuck) {
+ printk("stuck on TLB IPI wait (CPU#%d)\n",cpu);
+ break;
+ }
+ }
+ __restore_flags(flags);
+ }
/*
* Flush the local TLB
*/
-
local_flush_tlb();
- __restore_flags(flags);
-
- /*
- * Completed.
- */
-
-/* printk("SMID\n");*/
}
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+
void smp_send_reschedule(int cpu)
{
- unsigned long flags;
+ send_IPI_single(cpu, RESCHEDULE_VECTOR);
+}
- __save_flags(flags);
- __cli();
- smp_message_pass(cpu, MSG_RESCHEDULE, 0L, 0);
- __restore_flags(flags);
+/*
+ * this function sends a 'stop' IPI to all other CPUs in the system.
+ * it goes straight through.
+ */
+
+void smp_send_stop(void)
+{
+ send_IPI_allbutself(STOP_CPU_VECTOR);
+}
+
+/*
+ * this function sends an 'reload MTRR state' IPI to all other CPUs
+ * in the system. it goes straight through, completion processing
+ * is done on the mttr.c level.
+ */
+
+void smp_send_mtrr(void)
+{
+ send_IPI_allbutself(MTRR_CHANGE_VECTOR);
}
/*
@@ -1546,14 +1769,24 @@ asmlinkage void smp_invalidate_interrupt(void)
ack_APIC_irq();
}
+static void stop_this_cpu (void)
+{
+ /*
+ * Remove this CPU:
+ */
+ clear_bit(smp_processor_id(), &cpu_online_map);
+
+ if (cpu_data[smp_processor_id()].hlt_works_ok)
+ for(;;) __asm__("hlt");
+ for (;;);
+}
+
/*
* CPU halt call-back
*/
asmlinkage void smp_stop_cpu_interrupt(void)
{
- if (cpu_data[smp_processor_id()].hlt_works_ok)
- for(;;) __asm__("hlt");
- for (;;) ;
+ stop_this_cpu();
}
void (*mtrr_hook) (void) = NULL;
@@ -1627,12 +1860,9 @@ void setup_APIC_timer(unsigned int clocks)
* Unfortunately the local APIC timer cannot be set up into NMI
* mode. With the IO APIC we can re-route the external timer
* interrupt and broadcast it as an NMI to all CPUs, so no pain.
- *
- * NOTE: this trap vector (0x41) and the gate in
- * BUILD_SMP_TIMER_INTERRUPT should be the same ;)
*/
tmp_value = apic_read(APIC_LVTT);
- lvtt1_value = APIC_LVT_TIMER_PERIODIC | 0x41;
+ lvtt1_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
apic_write(APIC_LVTT , lvtt1_value);
/*
@@ -1741,7 +1971,7 @@ int __init calibrate_APIC_clock(void)
((long)(t2-t1)/LOOPS)/(1000000/HZ),
((long)(t2-t1)/LOOPS)%(1000000/HZ) );
- printk("..... APIC bus clock speed is %ld.%04ld MHz.\n",
+ printk("..... system bus clock speed is %ld.%04ld MHz.\n",
calibration_result/(1000000/HZ),
calibration_result%(1000000/HZ) );
#undef LOOPS
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c
index d95d64069..f7987718b 100644
--- a/arch/i386/kernel/sys_i386.c
+++ b/arch/i386/kernel/sys_i386.c
@@ -108,108 +108,94 @@ asmlinkage int old_select(struct sel_arg_struct *arg)
*
* This is really horribly ugly.
*/
-asmlinkage int sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth)
+asmlinkage int sys_ipc (uint call, int first, int second,
+ int third, void *ptr, long fifth)
{
int version, ret;
- lock_kernel();
version = call >> 16; /* hack for backward compatibility */
call &= 0xffff;
if (call <= SEMCTL)
switch (call) {
case SEMOP:
- ret = sys_semop (first, (struct sembuf *)ptr, second);
- goto out;
+ return sys_semop (first, (struct sembuf *)ptr, second);
case SEMGET:
- ret = sys_semget (first, second, third);
- goto out;
+ return sys_semget (first, second, third);
case SEMCTL: {
union semun fourth;
- ret = -EINVAL;
if (!ptr)
- goto out;
- ret = -EFAULT;
+ return -EINVAL;
if (get_user(fourth.__pad, (void **) ptr))
- goto out;
- ret = sys_semctl (first, second, third, fourth);
- goto out;
+ return -EFAULT;
+ return sys_semctl (first, second, third, fourth);
}
default:
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
+
if (call <= MSGCTL)
switch (call) {
case MSGSND:
- ret = sys_msgsnd (first, (struct msgbuf *) ptr,
+ return sys_msgsnd (first, (struct msgbuf *) ptr,
second, third);
- goto out;
case MSGRCV:
switch (version) {
case 0: {
struct ipc_kludge tmp;
- ret = -EINVAL;
if (!ptr)
- goto out;
- ret = -EFAULT;
- if (copy_from_user(&tmp,(struct ipc_kludge *) ptr,
- sizeof (tmp)))
- goto out;
- ret = sys_msgrcv (first, tmp.msgp, second, tmp.msgtyp, third);
- goto out;
+ return -EINVAL;
+
+ if (copy_from_user(&tmp,
+ (struct ipc_kludge *) ptr,
+ sizeof (tmp)))
+ return -EFAULT;
+ return sys_msgrcv (first, tmp.msgp, second,
+ tmp.msgtyp, third);
}
- case 1: default:
- ret = sys_msgrcv (first, (struct msgbuf *) ptr, second, fifth, third);
- goto out;
+ default:
+ return sys_msgrcv (first,
+ (struct msgbuf *) ptr,
+ second, fifth, third);
}
case MSGGET:
- ret = sys_msgget ((key_t) first, second);
- goto out;
+ return sys_msgget ((key_t) first, second);
case MSGCTL:
- ret = sys_msgctl (first, second, (struct msqid_ds *) ptr);
- goto out;
+ return sys_msgctl (first, second,
+ (struct msqid_ds *) ptr);
default:
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
if (call <= SHMCTL)
switch (call) {
case SHMAT:
switch (version) {
- case 0: default: {
+ default: {
ulong raddr;
- ret = sys_shmat (first, (char *) ptr, second, &raddr);
+ ret = sys_shmat (first, (char *) ptr,
+ second, &raddr);
if (ret)
- goto out;
- ret = put_user (raddr, (ulong *) third);
- goto out;
+ return ret;
+ return put_user (raddr, (ulong *) third);
}
case 1: /* iBCS2 emulator entry point */
- ret = -EINVAL;
if (!segment_eq(get_fs(), get_ds()))
- goto out;
- ret = sys_shmat (first, (char *) ptr, second, (ulong *) third);
- goto out;
+ return -EINVAL;
+ return sys_shmat (first, (char *) ptr,
+ second, (ulong *) third);
}
case SHMDT:
- ret = sys_shmdt ((char *)ptr);
- goto out;
+ return sys_shmdt ((char *)ptr);
case SHMGET:
- ret = sys_shmget (first, second, third);
- goto out;
+ return sys_shmget (first, second, third);
case SHMCTL:
- ret = sys_shmctl (first, second, (struct shmid_ds *) ptr);
- goto out;
+ return sys_shmctl (first, second,
+ (struct shmid_ds *) ptr);
default:
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
- else
- ret = -EINVAL;
-out:
- unlock_kernel();
- return ret;
+
+ return -EINVAL;
}
/*
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index dbd6e1942..ec2ea5d60 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -12,6 +12,8 @@
* precision CMOS clock update
* 1996-05-03 Ingo Molnar
* fixed time warps in do_[slow|fast]_gettimeoffset()
+ * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
+ * "A Kernel Model for Precision Timekeeping" by Dave Mills
* 1998-09-05 (Various)
* More robust do_fast_gettimeoffset() algorithm implemented
* (works with APM, Cyrix 6x86MX and Centaur C6),
@@ -20,6 +22,12 @@
* (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
* Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
* ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
+ * 1998-12-16 Andrea Arcangeli
+ * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
+ * because was not accounting lost_ticks.
+ * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
+ * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ * serialize accesses to xtime/lost_ticks).
*/
/* What about the "updated NTP code" stuff in 2.0 time.c? It's not in
@@ -57,12 +65,14 @@
#include <linux/timex.h>
#include <linux/config.h>
+#include <asm/fixmap.h>
+#include <asm/cobalt.h>
+
/*
* for x86_do_profile()
*/
#include "irq.h"
-extern int setup_x86_irq(int, struct irqaction *);
unsigned long cpu_hz; /* Detected as we calibrate the TSC */
@@ -78,7 +88,9 @@ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
*/
static unsigned long fast_gettimeoffset_quotient=0;
-static unsigned long do_fast_gettimeoffset(void)
+extern rwlock_t xtime_lock;
+
+static inline unsigned long do_fast_gettimeoffset(void)
{
register unsigned long eax asm("ax");
register unsigned long edx asm("dx");
@@ -88,13 +100,12 @@ static unsigned long do_fast_gettimeoffset(void)
:"=a" (eax), "=d" (edx));
/* .. relative to previous jiffy (32 bits is enough) */
- edx = 0;
eax -= last_tsc_low; /* tsc_low delta */
/*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient.
- * = (tsc_low delta) / (clocks_per_usec)
- * = (tsc_low delta) / (clocks_per_jiffy / usecs_per_jiffy)
+ * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
+ * = (tsc_low delta) * (usecs_per_clock)
+ * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
*
* Using a mull instead of a divl saves up to 31 clock cycles
* in the critical path.
@@ -102,13 +113,17 @@ static unsigned long do_fast_gettimeoffset(void)
__asm__("mull %2"
:"=a" (eax), "=d" (edx)
- :"r" (fast_gettimeoffset_quotient),
- "0" (eax), "1" (edx));
+ :"g" (fast_gettimeoffset_quotient),
+ "0" (eax));
/* our adjusted time offset in microseconds */
- return edx + delay_at_last_interrupt;
+ return delay_at_last_interrupt + edx;
}
+#define TICK_SIZE tick
+
+#ifndef CONFIG_X86_TSC
+
/* This function must be called with interrupts disabled
* It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
*
@@ -141,8 +156,6 @@ static unsigned long do_fast_gettimeoffset(void)
* comp.protocols.time.ntp!
*/
-#define TICK_SIZE tick
-
static unsigned long do_slow_gettimeoffset(void)
{
int count;
@@ -224,28 +237,45 @@ static unsigned long do_slow_gettimeoffset(void)
static unsigned long (*do_gettimeoffset)(void) = do_slow_gettimeoffset;
+#else
+
+#define do_gettimeoffset() do_fast_gettimeoffset()
+
+#endif
+
/*
* This version of gettimeofday has microsecond resolution
* and better than microsecond precision on fast x86 machines with TSC.
*/
void do_gettimeofday(struct timeval *tv)
{
+ extern volatile unsigned long lost_ticks;
unsigned long flags;
+ unsigned long usec, sec;
+
+ read_lock_irqsave(&xtime_lock, flags);
+ usec = do_gettimeoffset();
+ {
+ unsigned long lost = lost_ticks;
+ if (lost)
+ usec += lost * (1000000 / HZ);
+ }
+ sec = xtime.tv_sec;
+ usec += xtime.tv_usec;
+ read_unlock_irqrestore(&xtime_lock, flags);
- save_flags(flags);
- cli();
- *tv = xtime;
- tv->tv_usec += do_gettimeoffset();
- if (tv->tv_usec >= 1000000) {
- tv->tv_usec -= 1000000;
- tv->tv_sec++;
+ while (usec >= 1000000) {
+ usec -= 1000000;
+ sec++;
}
- restore_flags(flags);
+
+ tv->tv_sec = sec;
+ tv->tv_usec = usec;
}
void do_settimeofday(struct timeval *tv)
{
- cli();
+ write_lock_irq(&xtime_lock);
/* This is revolting. We need to set the xtime.tv_usec
* correctly. However, the value in this location is
* is value at the last tick.
@@ -254,16 +284,18 @@ void do_settimeofday(struct timeval *tv)
*/
tv->tv_usec -= do_gettimeoffset();
- if (tv->tv_usec < 0) {
+ while (tv->tv_usec < 0) {
tv->tv_usec += 1000000;
tv->tv_sec--;
}
xtime = *tv;
- time_state = TIME_BAD;
- time_maxerror = MAXPHASE;
- time_esterror = MAXPHASE;
- sti();
+ time_adjust = 0; /* stop active adjtime() */
+ time_status |= STA_UNSYNC;
+ time_state = TIME_ERROR; /* p. 24, (a) */
+ time_maxerror = NTP_PHASE_LIMIT;
+ time_esterror = NTP_PHASE_LIMIT;
+ write_unlock_irq(&xtime_lock);
}
/*
@@ -338,8 +370,12 @@ static long last_rtc_update = 0;
* timer_interrupt() needs to keep up the real-time clock,
* as well as call the "do_timer()" routine every clocktick
*/
-static inline void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
+#ifdef CONFIG_VISWS
+ /* Clear the interrupt */
+ co_cpu_write(CO_CPU_STAT,co_cpu_read(CO_CPU_STAT) & ~CO_STAT_TIMEINTR);
+#endif
do_timer(regs);
/*
* In the SMP case we use the local APIC timer interrupt to do the
@@ -359,21 +395,15 @@ static inline void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
* CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
* called as close as possible to 500 ms before the new second starts.
*/
- if (time_state != TIME_BAD && xtime.tv_sec > last_rtc_update + 660 &&
- xtime.tv_usec > 500000 - (tick >> 1) &&
- xtime.tv_usec < 500000 + (tick >> 1)) {
+ if ((time_status & STA_UNSYNC) == 0 &&
+ xtime.tv_sec > last_rtc_update + 660 &&
+ xtime.tv_usec >= 500000 - ((unsigned) tick) / 2 &&
+ xtime.tv_usec <= 500000 + ((unsigned) tick) / 2) {
if (set_rtc_mmss(xtime.tv_sec) == 0)
last_rtc_update = xtime.tv_sec;
else
last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
}
-#if 0
- /* As we return to user mode fire off the other CPU schedulers.. this is
- basically because we don't yet share IRQ's around. This message is
- rigged to be safe on the 386 - basically it's a hack, so don't look
- closely for now.. */
- smp_message_pass(MSG_ALL_BUT_SELF, MSG_RESCHEDULE, 0L, 0);
-#endif
#ifdef CONFIG_MCA
if( MCA_bus ) {
@@ -392,36 +422,56 @@ static inline void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
#endif
}
+static int use_tsc = 0;
+
/*
* This is the same as the above, except we _also_ save the current
* Time Stamp Counter value at the time of the timer interrupt, so that
* we later on can estimate the time of day more exactly.
*/
-static void pentium_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- int count, flags;
+ int count;
- /* It is important that these two operations happen almost at the
- * same time. We do the RDTSC stuff first, since it's faster. To
- * avoid any inconsistencies, we disable interrupts locally.
- */
+ /*
+ * Here we are in the timer irq handler. We just have irqs locally
+ * disabled but we don't know if the timer_bh is running on the other
+ * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
+ * the irq version of write_lock because as just said we have irq
+ * locally disabled. -arca
+ */
+ write_lock(&xtime_lock);
+
+ if (use_tsc)
+ {
+ /*
+ * It is important that these two operations happen almost at
+ * the same time. We do the RDTSC stuff first, since it's
+ * faster. To avoid any inconsistencies, we need interrupts
+ * disabled locally.
+ */
+
+ /*
+ * Interrupts are just disabled locally since the timer irq
+ * has the SA_INTERRUPT flag set. -arca
+ */
- __save_flags(flags);
- __cli();
- /* read Pentium cycle counter */
- __asm__("rdtsc"
- :"=a" (last_tsc_low):: "eax", "edx");
+ /* read Pentium cycle counter */
+ __asm__("rdtsc" : "=a" (last_tsc_low) : : "edx");
- outb_p(0x00, 0x43); /* latch the count ASAP */
+ outb_p(0x00, 0x43); /* latch the count ASAP */
- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
+ count = inb_p(0x40); /* read the latched count */
+ count |= inb(0x40) << 8;
+
+ count = ((LATCH-1) - count) * TICK_SIZE;
+ delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+ }
+
+ do_timer_interrupt(irq, NULL, regs);
+
+ write_unlock(&xtime_lock);
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
- __restore_flags(flags);
-
- timer_interrupt(irq, NULL, regs);
}
/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
@@ -591,10 +641,25 @@ __initfunc(void time_init(void))
* to disk; this won't break the kernel, though, 'cuz we're
* smart. See arch/i386/kernel/apm.c.
*/
+ /*
+ * Firstly we have to do a CPU check for chips with
+ * a potentially buggy TSC. At this point we haven't run
+ * the ident/bugs checks so we must run this hook as it
+ * may turn off the TSC flag.
+ *
+ * NOTE: this doesnt yet handle SMP 486 machines where only
+ * some CPU's have a TSC. Thats never worked and nobody has
+ * moaned if you have the only one in the world - you fix it!
+ */
+
+ dodgy_tsc();
+
if (boot_cpu_data.x86_capability & X86_FEATURE_TSC) {
+#ifndef do_gettimeoffset
do_gettimeoffset = do_fast_gettimeoffset;
+#endif
do_get_fast_time = do_gettimeofday;
- irq0.handler = pentium_timer_interrupt;
+ use_tsc = 1;
fast_gettimeoffset_quotient = calibrate_tsc();
/* report CPU clock rate in Hz.
@@ -609,5 +674,22 @@ __initfunc(void time_init(void))
printk("Detected %ld Hz processor.\n", cpu_hz);
}
}
+
+#ifdef CONFIG_VISWS
+ printk("Starting Cobalt Timer system clock\n");
+
+ /* Set the countdown value */
+ co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
+
+ /* Start the timer */
+ co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
+
+ /* Enable (unmask) the timer interrupt */
+ co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
+
+ /* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */
+ setup_x86_irq(CO_IRQ_TIMER, &irq0);
+#else
setup_x86_irq(0, &irq0);
+#endif
}
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 6e9a95423..f0dc06092 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -34,6 +34,14 @@
#include <asm/debugreg.h>
#include <asm/desc.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_X86_VISWS_APIC
+#include <asm/fixmap.h>
+#include <asm/cobalt.h>
+#include <asm/lithium.h>
+#endif
+
asmlinkage int system_call(void);
asmlinkage void lcall7(void);
@@ -499,15 +507,18 @@ __initfunc(void trap_init_f00f_bug(void))
}
#define _set_gate(gate_addr,type,dpl,addr) \
-__asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
- "movw %2,%%dx\n\t" \
+do { \
+ int __d0, __d1; \
+ __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
+ "movw %4,%%dx\n\t" \
"movl %%eax,%0\n\t" \
"movl %%edx,%1" \
:"=m" (*((long *) (gate_addr))), \
- "=m" (*(1+(long *) (gate_addr))) \
+ "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
:"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
- "d" ((char *) (addr)),"a" (__KERNEL_CS << 16) \
- :"ax","dx")
+ "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \
+} while (0)
+
/*
* This needs to use 'idt_table' rather than 'idt', and
@@ -566,9 +577,100 @@ void set_ldt_desc(unsigned int n, void *addr, unsigned int size)
_set_tssldt_desc(gdt_table+FIRST_LDT_ENTRY+(n<<1), (int)addr, ((size << 3) - 1), 0x82);
}
+#ifdef CONFIG_X86_VISWS_APIC
+
+/*
+ * On Rev 005 motherboards legacy device interrupt lines are wired directly
+ * to Lithium from the 307. But the PROM leaves the interrupt type of each
+ * 307 logical device set appropriate for the 8259. Later we'll actually use
+ * the 8259, but for now we have to flip the interrupt types to
+ * level triggered, active lo as required by Lithium.
+ */
+
+#define REG 0x2e /* The register to read/write */
+#define DEV 0x07 /* Register: Logical device select */
+#define VAL 0x2f /* The value to read/write */
+
+static void
+superio_outb(int dev, int reg, int val)
+{
+ outb(DEV, REG);
+ outb(dev, VAL);
+ outb(reg, REG);
+ outb(val, VAL);
+}
+
+static int __attribute__ ((unused))
+superio_inb(int dev, int reg)
+{
+ outb(DEV, REG);
+ outb(dev, VAL);
+ outb(reg, REG);
+ return inb(VAL);
+}
+
+#define FLOP 3 /* floppy logical device */
+#define PPORT 4 /* parallel logical device */
+#define UART5 5 /* uart2 logical device (not wired up) */
+#define UART6 6 /* uart1 logical device (THIS is the serial port!) */
+#define IDEST 0x70 /* int. destination (which 307 IRQ line) reg. */
+#define ITYPE 0x71 /* interrupt type register */
+
+/* interrupt type bits */
+#define LEVEL 0x01 /* bit 0, 0 == edge triggered */
+#define ACTHI 0x02 /* bit 1, 0 == active lo */
+
+static void
+superio_init(void)
+{
+ if (visws_board_type == VISWS_320 && visws_board_rev == 5) {
+ superio_outb(UART6, IDEST, 0); /* 0 means no intr propagated */
+ printk("SGI 320 rev 5: disabling 307 uart1 interrupt\n");
+ }
+}
+
+static void
+lithium_init(void)
+{
+ set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
+ printk("Lithium PCI Bridge A, Bus Number: %d\n",
+ li_pcia_read16(LI_PCI_BUSNUM) & 0xff);
+ set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
+ printk("Lithium PCI Bridge B (PIIX4), Bus Number: %d\n",
+ li_pcib_read16(LI_PCI_BUSNUM) & 0xff);
+
+ /* XXX blindly enables all interrupts */
+ li_pcia_write16(LI_PCI_INTEN, 0xffff);
+ li_pcib_write16(LI_PCI_INTEN, 0xffff);
+}
+
+static void
+cobalt_init(void)
+{
+ /*
+ * On normal SMP PC this is used only with SMP, but we have to
+ * use it and set it up here to start the Cobalt clock
+ */
+ set_fixmap(FIX_APIC_BASE, APIC_PHYS_BASE);
+ printk("Local APIC ID %lx\n", apic_read(APIC_ID));
+ printk("Local APIC Version %lx\n", apic_read(APIC_VERSION));
+
+ set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
+ printk("Cobalt Revision %lx\n", co_cpu_read(CO_CPU_REV));
+
+ set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
+ printk("Cobalt APIC ID %lx\n", co_apic_read(CO_APIC_ID));
+
+ /* Enable Cobalt APIC being careful to NOT change the ID! */
+ co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID)|CO_APIC_ENABLE);
+
+ printk("Cobalt APIC enabled: ID reg %lx\n", co_apic_read(CO_APIC_ID));
+}
+#endif
void __init trap_init(void)
{
- int i;
+ /* Initially up all of the IDT to jump to unexpected */
+ init_unexpected_irq();
if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
EISA_bus = 1;
@@ -591,8 +693,6 @@ void __init trap_init(void)
set_trap_gate(15,&spurious_interrupt_bug);
set_trap_gate(16,&coprocessor_error);
set_trap_gate(17,&alignment_check);
- for (i=18;i<48;i++)
- set_trap_gate(i,&reserved);
set_system_gate(0x80,&system_call);
/* set up GDT task & ldt entries */
@@ -603,4 +703,9 @@ void __init trap_init(void)
__asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
load_TR(0);
load_ldt(0);
+#ifdef CONFIG_X86_VISWS_APIC
+ superio_init();
+ lithium_init();
+ cobalt_init();
+#endif
}
diff --git a/arch/i386/kernel/visws_apic.c b/arch/i386/kernel/visws_apic.c
new file mode 100644
index 000000000..f7dabc15d
--- /dev/null
+++ b/arch/i386/kernel/visws_apic.c
@@ -0,0 +1,407 @@
+/*
+ * linux/arch/i386/kernel/visws_apic.c
+ *
+ * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
+ *
+ * SGI Visual Workstation interrupt controller
+ *
+ * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
+ * which serves as the main interrupt controller in the system. Non-legacy
+ * hardware in the system uses this controller directly. Legacy devices
+ * are connected to the PIIX4 which in turn has its 8259(s) connected to
+ * a of the Cobalt APIC entry.
+ */
+
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/malloc.h>
+#include <linux/random.h>
+#include <linux/smp.h>
+#include <linux/tasks.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/bitops.h>
+#include <asm/smp.h>
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+#include <asm/desc.h>
+
+#include <asm/cobalt.h>
+
+#include "irq.h"
+
+/*
+ * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
+ * -- not the manner expected by the normal 8259 code in irq.c.
+ *
+ * there is a 'master' physical interrupt source that gets sent to
+ * the CPU. But in the chipset there are various 'virtual' interrupts
+ * waiting to be handled. We represent this to Linux through a 'master'
+ * interrupt controller type, and through a special virtual interrupt-
+ * controller. Device drivers only see the virtual interrupt sources.
+ */
+
+#define CO_IRQ_BASE 0x20 /* This is the 0x20 in init_IRQ()! */
+
+static void startup_piix4_master_irq(unsigned int irq);
+static void shutdown_piix4_master_irq(unsigned int irq);
+static void do_piix4_master_IRQ(unsigned int irq, struct pt_regs * regs);
+#define enable_piix4_master_irq startup_piix4_master_irq
+#define disable_piix4_master_irq shutdown_piix4_master_irq
+
+static struct hw_interrupt_type piix4_master_irq_type = {
+ "PIIX4-master",
+ startup_piix4_master_irq,
+ shutdown_piix4_master_irq,
+ do_piix4_master_IRQ,
+ enable_piix4_master_irq,
+ disable_piix4_master_irq
+};
+
+static void enable_piix4_virtual_irq(unsigned int irq);
+static void disable_piix4_virtual_irq(unsigned int irq);
+#define startup_piix4_virtual_irq enable_piix4_virtual_irq
+#define shutdown_piix4_virtual_irq disable_piix4_virtual_irq
+
+static struct hw_interrupt_type piix4_virtual_irq_type = {
+ "PIIX4-virtual",
+ startup_piix4_virtual_irq,
+ shutdown_piix4_virtual_irq,
+ 0, /* no handler, it's never called physically */
+ enable_piix4_virtual_irq,
+ disable_piix4_virtual_irq
+};
+
+/*
+ * This is the SGI Cobalt (IO-)APIC:
+ */
+
+static void do_cobalt_IRQ(unsigned int irq, struct pt_regs * regs);
+static void enable_cobalt_irq(unsigned int irq);
+static void disable_cobalt_irq(unsigned int irq);
+static void startup_cobalt_irq(unsigned int irq);
+#define shutdown_cobalt_irq disable_cobalt_irq
+
+static struct hw_interrupt_type cobalt_irq_type = {
+ "Cobalt-APIC",
+ startup_cobalt_irq,
+ shutdown_cobalt_irq,
+ do_cobalt_IRQ,
+ enable_cobalt_irq,
+ disable_cobalt_irq
+};
+
+
+/*
+ * Not an initfunc, needed by the reboot code
+ */
+void init_pic_mode(void)
+{
+ /* Nop on Cobalt */
+}
+
+/*
+ * Cobalt (IO)-APIC functions to handle PCI devices.
+ */
+
+static void disable_cobalt_irq(unsigned int irq)
+{
+ /* XXX undo the APIC entry here? */
+
+ /*
+ * definitely, we do not want to have IRQ storms from
+ * unused devices --mingo
+ */
+}
+
+static void enable_cobalt_irq(unsigned int irq)
+{
+}
+
+/*
+ * Set the given Cobalt APIC Redirection Table entry to point
+ * to the given IDT vector/index.
+ */
+static void co_apic_set(int entry, int idtvec)
+{
+ co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (CO_IRQ_BASE+idtvec));
+ co_apic_write(CO_APIC_HI(entry), 0);
+
+ printk("Cobalt APIC Entry %d IDT Vector %d\n", entry, idtvec);
+}
+
+/*
+ * "irq" really just serves to identify the device. Here is where we
+ * map this to the Cobalt APIC entry where it's physically wired.
+ * This is called via request_irq -> setup_x86_irq -> irq_desc->startup()
+ */
+static void startup_cobalt_irq(unsigned int irq)
+{
+ /*
+ * These "irq"'s are wired to the same Cobalt APIC entries
+ * for all (known) motherboard types/revs
+ */
+ switch (irq) {
+ case CO_IRQ_TIMER: co_apic_set(CO_APIC_CPU, CO_IRQ_TIMER);
+ return;
+
+ case CO_IRQ_ENET: co_apic_set(CO_APIC_ENET, CO_IRQ_ENET);
+ return;
+
+ case CO_IRQ_SERIAL: return; /* XXX move to piix4-8259 "virtual" */
+
+ case CO_IRQ_8259: co_apic_set(CO_APIC_8259, CO_IRQ_8259);
+ return;
+
+ case CO_IRQ_IDE:
+ switch (visws_board_type) {
+ case VISWS_320:
+ switch (visws_board_rev) {
+ case 5:
+ co_apic_set(CO_APIC_0_5_IDE0, CO_IRQ_IDE);
+ co_apic_set(CO_APIC_0_5_IDE1, CO_IRQ_IDE);
+ return;
+ case 6:
+ co_apic_set(CO_APIC_0_6_IDE0, CO_IRQ_IDE);
+ co_apic_set(CO_APIC_0_6_IDE1, CO_IRQ_IDE);
+ return;
+ }
+ case VISWS_540:
+ switch (visws_board_rev) {
+ case 2:
+ co_apic_set(CO_APIC_1_2_IDE0, CO_IRQ_IDE);
+ return;
+ }
+ }
+ break;
+ default:
+ panic("huh?");
+ }
+}
+
+/*
+ * This is the handle() op in do_IRQ()
+ */
+static void do_cobalt_IRQ(unsigned int irq, struct pt_regs * regs)
+{
+ struct irqaction * action;
+ irq_desc_t *desc = irq_desc + irq;
+
+ spin_lock(&irq_controller_lock);
+ {
+ unsigned int status;
+ /* XXX APIC EOI? */
+ status = desc->status & ~IRQ_REPLAY;
+ action = NULL;
+ if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))
+ action = desc->action;
+ desc->status = status | IRQ_INPROGRESS;
+ }
+ spin_unlock(&irq_controller_lock);
+
+ /* Exit early if we had no action or it was disabled */
+ if (!action)
+ return;
+
+ handle_IRQ_event(irq, regs, action);
+
+ (void)co_cpu_read(CO_CPU_REV); /* Sync driver ack to its h/w */
+ apic_write(APIC_EOI, APIC_EIO_ACK); /* Send EOI to Cobalt APIC */
+
+ spin_lock(&irq_controller_lock);
+ {
+ unsigned int status = desc->status & ~IRQ_INPROGRESS;
+ desc->status = status;
+ if (!(status & IRQ_DISABLED))
+ enable_cobalt_irq(irq);
+ }
+ spin_unlock(&irq_controller_lock);
+}
+
+/*
+ * PIIX4-8259 master/virtual functions to handle:
+ *
+ * floppy
+ * parallel
+ * serial
+ * audio (?)
+ *
+ * None of these get Cobalt APIC entries, neither do they have IDT
+ * entries. These interrupts are purely virtual and distributed from
+ * the 'master' interrupt source: CO_IRQ_8259.
+ *
+ * When the 8259 interrupts its handler figures out which of these
+ * devices is interrupting and dispatches to it's handler.
+ *
+ * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
+ * enable_irq gets the right irq. This 'master' irq is never directly
+ * manipulated by any driver.
+ */
+
+static void startup_piix4_master_irq(unsigned int irq)
+{
+ /* ICW1 */
+ outb(0x11, 0x20);
+ outb(0x11, 0xa0);
+
+ /* ICW2 */
+ outb(0x08, 0x21);
+ outb(0x70, 0xa1);
+
+ /* ICW3 */
+ outb(0x04, 0x21);
+ outb(0x02, 0xa1);
+
+ /* ICW4 */
+ outb(0x01, 0x21);
+ outb(0x01, 0xa1);
+
+ /* OCW1 - disable all interrupts in both 8259's */
+ outb(0xff, 0x21);
+ outb(0xff, 0xa1);
+
+ startup_cobalt_irq(irq);
+}
+
+static void shutdown_piix4_master_irq(unsigned int irq)
+{
+ /*
+ * [we skip the 8259 magic here, not strictly necessary]
+ */
+
+ shutdown_cobalt_irq(irq);
+}
+
+static void do_piix4_master_IRQ(unsigned int irq, struct pt_regs * regs)
+{
+ int realirq, mask;
+
+ /* Find out what's interrupting in the PIIX4 8259 */
+
+ spin_lock(&irq_controller_lock);
+ outb(0x0c, 0x20); /* OCW3 Poll command */
+ realirq = inb(0x20);
+
+ if (!(realirq & 0x80)) {
+ /*
+ * Bit 7 == 0 means invalid/spurious
+ */
+ goto out_unlock;
+ }
+ realirq &= 0x7f;
+
+ /*
+ * mask and ack the 8259
+ */
+ mask = inb(0x21);
+ if ((mask >> realirq) & 0x01)
+ /*
+ * This IRQ is masked... ignore
+ */
+ goto out_unlock;
+
+ outb(mask | (1<<realirq), 0x21);
+ /*
+ * OCW2 - non-specific EOI
+ */
+ outb(0x20, 0x20);
+
+ spin_unlock(&irq_controller_lock);
+
+ /*
+ * handle this 'virtual interrupt' as a Cobalt one now.
+ */
+ kstat.irqs[smp_processor_id()][irq]++;
+ do_cobalt_IRQ(realirq, regs);
+
+ spin_lock(&irq_controller_lock);
+ {
+ irq_desc_t *desc = irq_desc + realirq;
+
+ if (!(desc->status & IRQ_DISABLED))
+ enable_piix4_virtual_irq(realirq);
+ }
+ spin_unlock(&irq_controller_lock);
+ return;
+
+out_unlock:
+ spin_unlock(&irq_controller_lock);
+ return;
+}
+
+static void enable_piix4_virtual_irq(unsigned int irq)
+{
+ /*
+ * assumes this irq is one of the legacy devices
+ */
+
+ unsigned int mask = inb(0x21);
+ mask &= ~(1 << irq);
+ outb(mask, 0x21);
+ enable_cobalt_irq(irq);
+}
+
+/*
+ * assumes this irq is one of the legacy devices
+ */
+static void disable_piix4_virtual_irq(unsigned int irq)
+{
+ unsigned int mask;
+
+ disable_cobalt_irq(irq);
+
+ mask = inb(0x21);
+ mask &= ~(1 << irq);
+ outb(mask, 0x21);
+}
+
+static struct irqaction master_action =
+ { no_action, 0, 0, "PIIX4-8259", NULL, NULL };
+
+void init_VISWS_APIC_irqs(void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ irq_desc[i].status = IRQ_DISABLED;
+ irq_desc[i].action = 0;
+ irq_desc[i].depth = 0;
+
+ /*
+ * Cobalt IRQs are mapped to standard ISA
+ * interrupt vectors:
+ */
+ switch (i) {
+ /*
+ * Only CO_IRQ_8259 will be raised
+ * externally.
+ */
+ case CO_IRQ_8259:
+ irq_desc[i].handler = &piix4_master_irq_type;
+ break;
+ case CO_IRQ_FLOPPY:
+ case CO_IRQ_PARLL:
+ irq_desc[i].handler = &piix4_virtual_irq_type;
+ break;
+ default:
+ irq_desc[i].handler = &cobalt_irq_type;
+ break;
+ }
+ }
+
+ /*
+ * The master interrupt is always present:
+ */
+ setup_x86_irq(CO_IRQ_8259, &master_action);
+}
+
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
index 6490984b1..c2cb3e5a6 100644
--- a/arch/i386/lib/Makefile
+++ b/arch/i386/lib/Makefile
@@ -6,6 +6,7 @@
$(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $< -o $*.o
L_TARGET = lib.a
-L_OBJS = checksum.o semaphore.o delay.o usercopy.o getuser.o putuser.o
+L_OBJS = checksum.o old-checksum.o semaphore.o delay.o \
+ usercopy.o getuser.o putuser.o
include $(TOPDIR)/Rules.make
diff --git a/arch/i386/lib/checksum.S b/arch/i386/lib/checksum.S
new file mode 100644
index 000000000..46527a85d
--- /dev/null
+++ b/arch/i386/lib/checksum.S
@@ -0,0 +1,447 @@
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IP/TCP/UDP checksumming routines
+ *
+ * Authors: Jorge Cwik, <jorge@laser.satlink.net>
+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ * Tom May, <ftom@netcom.com>
+ * Pentium Pro/II routines:
+ * Alexander Kjeldaas <astor@guardian.no>
+ * Finn Arne Gangstad <finnag@guardian.no>
+ * Lots of code moved from tcp.c and ip.c; see those files
+ * for more names.
+ *
+ * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ * handling.
+ * Andi Kleen, add zeroing on error
+ * converted to pure assembler
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/errno.h>
+
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+
+/*
+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+
+.text
+.align 4
+.globl csum_partial
+
+#if CPU!=686
+
+ /*
+ * Experiments with Ethernet and SLIP connections show that buff
+ * is aligned on either a 2-byte or 4-byte boundary. We get at
+ * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+ * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+ * alignment for the unrolled loop.
+ */
+csum_partial:
+ pushl %esi
+ pushl %ebx
+ movl 20(%esp),%eax # Function arg: unsigned int sum
+ movl 16(%esp),%ecx # Function arg: int len
+ movl 12(%esp),%esi # Function arg: unsigned char *buff
+ testl $2, %esi # Check alignment.
+ jz 2f # Jump if alignment is ok.
+ subl $2, %ecx # Alignment uses up two bytes.
+ jae 1f # Jump if we had at least two bytes.
+ addl $2, %ecx # ecx was < 2. Deal with it.
+ jmp 4f
+1: movw (%esi), %bx
+ addl $2, %esi
+ addw %bx, %ax
+ adcl $0, %eax
+2:
+ movl %ecx, %edx
+ shrl $5, %ecx
+ jz 2f
+ testl %esi, %esi
+1: movl (%esi), %ebx
+ adcl %ebx, %eax
+ movl 4(%esi), %ebx
+ adcl %ebx, %eax
+ movl 8(%esi), %ebx
+ adcl %ebx, %eax
+ movl 12(%esi), %ebx
+ adcl %ebx, %eax
+ movl 16(%esi), %ebx
+ adcl %ebx, %eax
+ movl 20(%esi), %ebx
+ adcl %ebx, %eax
+ movl 24(%esi), %ebx
+ adcl %ebx, %eax
+ movl 28(%esi), %ebx
+ adcl %ebx, %eax
+ lea 32(%esi), %esi
+ dec %ecx
+ jne 1b
+ adcl $0, %eax
+2: movl %edx, %ecx
+ andl $0x1c, %edx
+ je 4f
+ shrl $2, %edx # This clears CF
+3: adcl (%esi), %eax
+ lea 4(%esi), %esi
+ dec %edx
+ jne 3b
+ adcl $0, %eax
+4: andl $3, %ecx
+ jz 7f
+ cmpl $2, %ecx
+ jb 5f
+ movw (%esi),%cx
+ leal 2(%esi),%esi
+ je 6f
+ shll $16,%ecx
+5: movb (%esi),%cl
+6: addl %ecx,%eax
+ adcl $0, %eax
+7:
+ popl %ebx
+ popl %esi
+ ret
+
+#else /* CPU==686 */
+
+csum_partial:
+ movl 12(%esp),%eax # Function arg: unsigned int sum
+ movl 8(%esp),%ecx # Function arg: int len
+ movl 4(%esp),%esi # Function arg: const unsigned char *buf
+
+ testl $2, %esi
+ jnz 30f
+10:
+ movl %ecx, %edx
+ movl %ecx, %ebx
+ andl $0x7c, %ebx
+ shrl $7, %ecx
+ addl %ebx,%esi
+ shrl $2, %ebx
+ negl %ebx
+ lea 45f(%ebx,%ebx,2), %ebx
+ testl %esi, %esi
+ jmp *%ebx
+
+ # Handle 2-byte-aligned regions
+20: addw (%esi), %ax
+ lea 2(%esi), %esi
+ adcl $0, %eax
+ jmp 10b
+
+30: subl $2, %ecx
+ ja 20b
+ je 32f
+ movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
+ addl %ebx, %eax
+ adcl $0, %eax
+ jmp 80f
+32:
+ addw (%esi), %ax # csumming 2 bytes, 2-aligned
+ adcl $0, %eax
+ jmp 80f
+
+40:
+ addl -128(%esi), %eax
+ adcl -124(%esi), %eax
+ adcl -120(%esi), %eax
+ adcl -116(%esi), %eax
+ adcl -112(%esi), %eax
+ adcl -108(%esi), %eax
+ adcl -104(%esi), %eax
+ adcl -100(%esi), %eax
+ adcl -96(%esi), %eax
+ adcl -92(%esi), %eax
+ adcl -88(%esi), %eax
+ adcl -84(%esi), %eax
+ adcl -80(%esi), %eax
+ adcl -76(%esi), %eax
+ adcl -72(%esi), %eax
+ adcl -68(%esi), %eax
+ adcl -64(%esi), %eax
+ adcl -60(%esi), %eax
+ adcl -56(%esi), %eax
+ adcl -52(%esi), %eax
+ adcl -48(%esi), %eax
+ adcl -44(%esi), %eax
+ adcl -40(%esi), %eax
+ adcl -36(%esi), %eax
+ adcl -32(%esi), %eax
+ adcl -28(%esi), %eax
+ adcl -24(%esi), %eax
+ adcl -20(%esi), %eax
+ adcl -16(%esi), %eax
+ adcl -12(%esi), %eax
+ adcl -8(%esi), %eax
+ adcl -4(%esi), %eax
+45:
+ lea 128(%esi), %esi
+ adcl $0, %eax
+ dec %ecx
+ jge 40b
+ movl %edx, %ecx
+50: andl $3, %ecx
+ jz 80f
+
+ # Handle the last 1-3 bytes without jumping
+ notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
+ movl $0xffffff,%ebx # by the shll and shrl instructions
+ shll $3,%ecx
+ shrl %cl,%ebx
+ andl -128(%esi),%ebx # esi is 4-aligned so should be ok
+ addl %ebx,%eax
+ adcl $0,%eax
+80:
+ ret
+
+#endif /* CPU==686 */
+
+/*
+unsigned int csum_partial_copy_generic (const char *src, char *dst,
+ int len, int sum, int *src_err_ptr, int *dst_err_ptr)
+ */
+
+/*
+ * Copy from ds while checksumming, otherwise like csum_partial
+ *
+ * The macros SRC and DST specify the type of access for the instruction.
+ * thus we can call a custom exception handler for all access types.
+ *
+ * FIXME: could someone double-check whether I haven't mixed up some SRC and
+ * DST definitions? It's damn hard to trigger all cases. I hope I got
+ * them all but there's no guarantee.
+ */
+
+#define SRC(y...) \
+ 9999: y; \
+ .section __ex_table, "a"; \
+ .long 9999b, 6001f ; \
+ .previous
+
+#define DST(y...) \
+ 9999: y; \
+ .section __ex_table, "a"; \
+ .long 9999b, 6002f ; \
+ .previous
+
+.align 4
+.globl csum_partial_copy_generic
+
+#if CPU!=686
+
+#define ARGBASE 16
+#define FP 12
+
+csum_partial_copy_generic:
+ subl $4,%esp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ movl ARGBASE+16(%esp),%eax # sum
+ movl ARGBASE+12(%esp),%ecx # len
+ movl ARGBASE+4(%esp),%esi # src
+ movl ARGBASE+8(%esp),%edi # dst
+
+ testl $2, %edi # Check alignment.
+ jz 2f # Jump if alignment is ok.
+ subl $2, %ecx # Alignment uses up two bytes.
+ jae 1f # Jump if we had at least two bytes.
+ addl $2, %ecx # ecx was < 2. Deal with it.
+ jmp 4f
+SRC(1: movw (%esi), %bx )
+ addl $2, %esi
+DST( movw %bx, (%edi) )
+ addl $2, %edi
+ addw %bx, %ax
+ adcl $0, %eax
+2:
+ movl %ecx, FP(%esp)
+ shrl $5, %ecx
+ jz 2f
+ testl %esi, %esi
+SRC(1: movl (%esi), %ebx )
+SRC( movl 4(%esi), %edx )
+ adcl %ebx, %eax
+DST( movl %ebx, (%edi) )
+ adcl %edx, %eax
+DST( movl %edx, 4(%edi) )
+
+SRC( movl 8(%esi), %ebx )
+SRC( movl 12(%esi), %edx )
+ adcl %ebx, %eax
+DST( movl %ebx, 8(%edi) )
+ adcl %edx, %eax
+DST( movl %edx, 12(%edi) )
+
+SRC( movl 16(%esi), %ebx )
+SRC( movl 20(%esi), %edx )
+ adcl %ebx, %eax
+DST( movl %ebx, 16(%edi) )
+ adcl %edx, %eax
+DST( movl %edx, 20(%edi) )
+
+SRC( movl 24(%esi), %ebx )
+SRC( movl 28(%esi), %edx )
+ adcl %ebx, %eax
+DST( movl %ebx, 24(%edi) )
+ adcl %edx, %eax
+DST( movl %edx, 28(%edi) )
+
+ lea 32(%esi), %esi
+ lea 32(%edi), %edi
+ dec %ecx
+ jne 1b
+ adcl $0, %eax
+2: movl FP(%esp), %edx
+ movl %edx, %ecx
+ andl $0x1c, %edx
+ je 4f
+ shrl $2, %edx # This clears CF
+SRC(3: movl (%esi), %ebx )
+ adcl %ebx, %eax
+DST( movl %ebx, (%edi) )
+ lea 4(%esi), %esi
+ lea 4(%edi), %edi
+ dec %edx
+ jne 3b
+ adcl $0, %eax
+4: andl $3, %ecx
+ jz 7f
+ cmpl $2, %ecx
+ jb 5f
+SRC( movw (%esi), %cx )
+ leal 2(%esi), %esi
+DST( movw %cx, (%edi) )
+ leal 2(%edi), %edi
+ je 6f
+ shll $16,%ecx
+SRC(5: movb (%esi), %cl )
+DST( movb %cl, (%edi) )
+6: addl %ecx, %eax
+ adcl $0, %eax
+7:
+5000:
+
+# Exception handler:
+.section .fixup, "ax"
+
+6001:
+ movl ARGBASE+20(%esp), %ebx # src_err_ptr
+ movl $-EFAULT, (%ebx)
+
+ # zero the complete destination - computing the rest
+ # is too much work
+ movl ARGBASE+8(%esp), %edi # dst
+ movl ARGBASE+12(%esp), %ecx # len
+ xorl %eax,%eax
+ rep ; stosb
+
+ jmp 5000b
+
+6002:
+ movl ARGBASE+24(%esp), %ebx # dst_err_ptr
+ movl $-EFAULT,(%ebx)
+ jmp 5000b
+
+.previous
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ecx # equivalent to addl $4,%esp
+ ret
+
+#else
+
+/* Version for PentiumII/PPro */
+
+#define ROUND1(x) \
+ SRC(movl x(%esi), %ebx ) ; \
+ addl %ebx, %eax\n ; \
+ DST(movl %ebx, x(%edi) ) ;
+
+#define ROUND(x) \
+ SRC(movl x(%esi), %ebx ) ; \
+ adcl %ebx, %eax ; \
+ DST(movl %ebx, x(%edi) ) ;
+
+#define ARGBASE 12
+
+csum_partial_copy_generic:
+ pushl %ebx
+ pushl %edi
+ pushl %esi
+ movl ARGBASE+4(%esp),%esi #src
+ movl ARGBASE+8(%esp),%edi #dst
+ movl ARGBASE+12(%esp),%ecx #len
+ movl ARGBASE+16(%esp),%eax #sum
+ movl %ecx, %edx
+ movl %ecx, %ebx
+ shrl $6, %ecx
+ andl $0x3c, %ebx
+ negl %ebx
+ subl %ebx, %esi
+ subl %ebx, %edi
+ lea 3f(%ebx,%ebx), %ebx
+ testl %esi, %esi
+ jmp *%ebx
+1: addl $64,%esi
+ addl $64,%edi
+ ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
+ ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
+ ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
+ ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
+3: adcl $0,%eax
+ dec %ecx
+ jge 1b
+4: andl $3, %edx
+ jz 7f
+ cmpl $2, %edx
+ jb 5f
+SRC( movw (%esi), %dx )
+ leal 2(%esi), %esi
+DST( movw %dx, (%edi) )
+ leal 2(%edi), %edi
+ je 6f
+ shll $16,%edx
+5:
+SRC( movb (%esi), %dl )
+DST( movb %dl, (%edi) )
+6: addl %edx, %eax
+ adcl $0, %eax
+7:
+.section .fixup, "ax"
+6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr
+ movl $-EFAULT, (%ebx)
+ # zero the complete destination (computing the rest is too much work)
+ movl ARGBASE+8(%esp),%edi # dst
+ movl ARGBASE+12(%esp),%ecx # len
+ xorl %eax,%eax
+ rep; stosb
+ jmp 7b
+6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr
+ movl $-EFAULT, (%ebx)
+ jmp 7b
+.previous
+
+ popl %esi
+ popl %edi
+ popl %ebx
+ ret
+
+#undef ROUND
+#undef ROUND1
+
+#endif /* CPU==i686 */
diff --git a/arch/i386/lib/checksum.c b/arch/i386/lib/checksum.c
deleted file mode 100644
index 51a9219db..000000000
--- a/arch/i386/lib/checksum.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * INET An implementation of the TCP/IP protocol suite for the LINUX
- * operating system. INET is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * IP/TCP/UDP checksumming routines
- *
- * Authors: Jorge Cwik, <jorge@laser.satlink.net>
- * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
- * Tom May, <ftom@netcom.com>
- * Pentium Pro/II routines:
- * Alexander Kjeldaas <astor@guardian.no>
- * Finn Arne Gangstad <finnag@guardian.no>
- * Lots of code moved from tcp.c and ip.c; see those files
- * for more names.
- *
- * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
- * handling.
- * Andi Kleen, add zeroing on error, fix constraints.
- *
- * To fix:
- * Convert to pure asm, because this file is too hard
- * for gcc's register allocator and it is not clear if the
- * contraints are correct.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <net/checksum.h>
-
-/*
- * computes a partial checksum, e.g. for TCP/UDP fragments
- */
-
-#if CPU!=686
-
-unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) {
- /*
- * Experiments with Ethernet and SLIP connections show that buff
- * is aligned on either a 2-byte or 4-byte boundary. We get at
- * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
- * Fortunately, it is easy to convert 2-byte alignment to 4-byte
- * alignment for the unrolled loop.
- */
- __asm__("
- testl $2, %%esi # Check alignment.
- jz 2f # Jump if alignment is ok.
- subl $2, %%ecx # Alignment uses up two bytes.
- jae 1f # Jump if we had at least two bytes.
- addl $2, %%ecx # ecx was < 2. Deal with it.
- jmp 4f
-1: movw (%%esi), %%bx
- addl $2, %%esi
- addw %%bx, %%ax
- adcl $0, %%eax
-2:
- movl %%ecx, %%edx
- shrl $5, %%ecx
- jz 2f
- testl %%esi, %%esi
-1: movl (%%esi), %%ebx
- adcl %%ebx, %%eax
- movl 4(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl 8(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl 12(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl 16(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl 20(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl 24(%%esi), %%ebx
- adcl %%ebx, %%eax
- movl 28(%%esi), %%ebx
- adcl %%ebx, %%eax
- lea 32(%%esi), %%esi
- dec %%ecx
- jne 1b
- adcl $0, %%eax
-2: movl %%edx, %%ecx
- andl $0x1c, %%edx
- je 4f
- shrl $2, %%edx # This clears CF
-3: adcl (%%esi), %%eax
- lea 4(%%esi), %%esi
- dec %%edx
- jne 3b
- adcl $0, %%eax
-4: andl $3, %%ecx
- jz 7f
- cmpl $2, %%ecx
- jb 5f
- movw (%%esi),%%cx
- leal 2(%%esi),%%esi
- je 6f
- shll $16,%%ecx
-5: movb (%%esi),%%cl
-6: addl %%ecx,%%eax
- adcl $0, %%eax
-7: "
- : "=a"(sum)
- : "0"(sum), "c"(len), "S"(buff)
- : "bx", "dx", "si", "cx", "memory");
- return(sum);
-}
-
-#else /* 686 */
-
-unsigned int csum_partial(const unsigned char * buf, int len, unsigned int sum) {
- __asm__ ("
- testl $2, %%esi
- jnz 30f
-10:
- movl %%ecx, %%edx
- movl %%ecx, %%ebx
- andl $0x7c, %%ebx
- shrl $7, %%ecx
- addl %%ebx,%%esi
- shrl $2, %%ebx
- negl %%ebx
- lea 45f(%%ebx,%%ebx,2), %%ebx
- testl %%esi, %%esi
- jmp *%%ebx
-
- # Handle 2-byte-aligned regions
-20: addw (%%esi), %%ax
- lea 2(%%esi), %%esi
- adcl $0, %%eax
- jmp 10b
-
-30: subl $2, %%ecx
- ja 20b
- je 32f
- movzbl (%%esi),%%ebx # csumming 1 byte, 2-aligned
- addl %%ebx, %%eax
- adcl $0, %%eax
- jmp 80f
-32:
- addw (%%esi), %%ax # csumming 2 bytes, 2-aligned
- adcl $0, %%eax
- jmp 80f
-
-40:
- addl -128(%%esi), %%eax
- adcl -124(%%esi), %%eax
- adcl -120(%%esi), %%eax
- adcl -116(%%esi), %%eax
- adcl -112(%%esi), %%eax
- adcl -108(%%esi), %%eax
- adcl -104(%%esi), %%eax
- adcl -100(%%esi), %%eax
- adcl -96(%%esi), %%eax
- adcl -92(%%esi), %%eax
- adcl -88(%%esi), %%eax
- adcl -84(%%esi), %%eax
- adcl -80(%%esi), %%eax
- adcl -76(%%esi), %%eax
- adcl -72(%%esi), %%eax
- adcl -68(%%esi), %%eax
- adcl -64(%%esi), %%eax
- adcl -60(%%esi), %%eax
- adcl -56(%%esi), %%eax
- adcl -52(%%esi), %%eax
- adcl -48(%%esi), %%eax
- adcl -44(%%esi), %%eax
- adcl -40(%%esi), %%eax
- adcl -36(%%esi), %%eax
- adcl -32(%%esi), %%eax
- adcl -28(%%esi), %%eax
- adcl -24(%%esi), %%eax
- adcl -20(%%esi), %%eax
- adcl -16(%%esi), %%eax
- adcl -12(%%esi), %%eax
- adcl -8(%%esi), %%eax
- adcl -4(%%esi), %%eax
-45:
- lea 128(%%esi), %%esi
- adcl $0, %%eax
- dec %%ecx
- jge 40b
- movl %%edx, %%ecx
-50: andl $3, %%ecx
- jz 80f
-
- # Handle the last 1-3 bytes without jumping
- notl %%ecx # 1->2, 2->1, 3->0, higher bits are masked
- movl $0xffffff,%%ebx # by the shll and shrl instructions
- shll $3,%%ecx
- shrl %%cl,%%ebx
- andl -128(%%esi),%%ebx # esi is 4-aligned so should be ok
- addl %%ebx,%%eax
- adcl $0,%%eax
-80: "
- : "=a"(sum)
- : "0"(sum), "c"(len), "S"(buf)
- : "bx", "dx", "cx", "si", "memory");
- return(sum);
-}
-
-#endif
-
-/*
- * Copy from ds while checksumming, otherwise like csum_partial
- *
- * The macros SRC and DST specify the type of access for the instruction.
- * thus we can call a custom exception handler for all access types.
- *
- * FIXME: could someone double-check whether I haven't mixed up some SRC and
- * DST definitions? It's damn hard to trigger all cases. I hope I got
- * them all but there's no guarantee.
- */
-
-#define SRC(y...) \
-" 9999: "#y"; \n \
- .section __ex_table, \"a\"; \n \
- .long 9999b, 6001f \n \
- .previous\n"
-
-#define DST(y...) \
-" 9999: "#y"; \n \
- .section __ex_table, \"a\"; \n \
- .long 9999b, 6002f \n \
- .previous\n"
-
-#if CPU!=686
-
-unsigned int csum_partial_copy_generic (const char *src, char *dst,
- int len, int sum, int *src_err_ptr, int *dst_err_ptr)
-{
- __u32 tmp_var;
-
- __asm__ __volatile__ ( "
- movl %6,%%edi
- testl $2, %%edi # Check alignment.
- jz 2f # Jump if alignment is ok.
- subl $2, %%ecx # Alignment uses up two bytes.
- jae 1f # Jump if we had at least two bytes.
- addl $2, %%ecx # ecx was < 2. Deal with it.
- jmp 4f
-"SRC( 1: movw (%%esi), %%bx )"
- addl $2, %%esi
-"DST( movw %%bx, (%%edi) )"
- addl $2, %%edi
- addw %%bx, %%ax
- adcl $0, %%eax
- 2:
- movl %%ecx, %8
- shrl $5, %%ecx
- jz 2f
- testl %%esi, %%esi
-"SRC( 1: movl (%%esi), %%ebx )"
-"SRC( movl 4(%%esi), %%edx )"
- adcl %%ebx, %%eax
-"DST( movl %%ebx, (%%edi) )"
- adcl %%edx, %%eax
-"DST( movl %%edx, 4(%%edi) )"
-
-"SRC( movl 8(%%esi), %%ebx )"
-"SRC( movl 12(%%esi), %%edx )"
- adcl %%ebx, %%eax
-"DST( movl %%ebx, 8(%%edi) )"
- adcl %%edx, %%eax
-"DST( movl %%edx, 12(%%edi) )"
-
-"SRC( movl 16(%%esi), %%ebx )"
-"SRC( movl 20(%%esi), %%edx )"
- adcl %%ebx, %%eax
-"DST( movl %%ebx, 16(%%edi) )"
- adcl %%edx, %%eax
-"DST( movl %%edx, 20(%%edi) )"
-
-"SRC( movl 24(%%esi), %%ebx )"
-"SRC( movl 28(%%esi), %%edx )"
- adcl %%ebx, %%eax
-"DST( movl %%ebx, 24(%%edi) )"
- adcl %%edx, %%eax
-"DST( movl %%edx, 28(%%edi) )"
-
-"SRC( lea 32(%%esi), %%esi )"
-"DST( lea 32(%%edi), %%edi )"
- dec %%ecx
- jne 1b
- adcl $0, %%eax
- 2: movl %8, %%edx
- movl %%edx, %%ecx
- andl $0x1c, %%edx
- je 4f
- shrl $2, %%edx # This clears CF
-"SRC( 3: movl (%%esi), %%ebx )"
- adcl %%ebx, %%eax
-"DST( movl %%ebx, (%%edi) )"
-"SRC( lea 4(%%esi), %%esi )"
-"DST( lea 4(%%edi), %%edi )"
- dec %%edx
- jne 3b
- adcl $0, %%eax
- 4: andl $3, %%ecx
- jz 7f
- cmpl $2, %%ecx
- jb 5f
-"SRC( movw (%%esi), %%cx )"
-"SRC( leal 2(%%esi), %%esi )"
-"DST( movw %%cx, (%%edi) )"
-"DST( leal 2(%%edi), %%edi )"
- je 6f
- shll $16,%%ecx
-"SRC( 5: movb (%%esi), %%cl )"
-"DST( movb %%cl, (%%edi) )"
- 6: addl %%ecx, %%eax
- adcl $0, %%eax
- 7:
-
-5000:
-
-# Exception handler:
-################################################
- #
-.section .fixup, \"ax\" #
- #
-6000: #
- #
- movl %7, (%%ebx) #
- #
-# zero the complete destination - computing the rest
-# is too much work
- movl %6, %%edi
- movl %9, %%ecx
- xorl %%eax,%%eax
- rep ; stosb
- #
- jmp 5000b #
- #
-6001: #
- movl %1, %%ebx #
- jmp 6000b #
- #
-6002: #
- movl %2, %%ebx #
- jmp 6000b #
- #
-.previous #
- #
-################################################
-
-"
- : "=a" (sum)
- : "m" (src_err_ptr), "m" (dst_err_ptr),
- "0" (sum), "c" (len), "S" (src), "m" (dst),
- "i" (-EFAULT), "m"(tmp_var),
- "m" (len)
- : "bx", "dx", "si", "di", "cx", "memory" );
-
- return(sum);
-}
-
-#else /* CPU == 686 */
-
-#define ROUND1(x) \
- SRC(movl x(%%esi), %%ebx ) \
- "addl %%ebx, %%eax\n" \
- DST(movl %%ebx, x(%%edi) )
-
-#define ROUND(x) \
- SRC(movl x(%%esi), %%ebx ) \
- "adcl %%ebx, %%eax\n" \
- DST(movl %%ebx, x(%%edi) )
-
-unsigned int csum_partial_copy_generic (const char *src, char *dst,
- int len, int sum, int *src_err_ptr, int *dst_err_ptr)
-{
- __asm__ __volatile__ ("
- movl %4,%%ecx
- movl %%ecx, %%edx
- movl %%ecx, %%ebx
- shrl $6, %%ecx
- andl $0x3c, %%ebx
- negl %%ebx
- subl %%ebx, %%esi
- subl %%ebx, %%edi
- lea 3f(%%ebx,%%ebx), %%ebx
- testl %%esi, %%esi
- jmp *%%ebx
-1: addl $64,%%esi
- addl $64,%%edi\n"
-ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
-ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
-ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
-ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
-"3: adcl $0,%%eax
- dec %%ecx
- jge 1b
-4: andl $3, %%edx
- jz 7f
- cmpl $2, %%edx
- jb 5f
- " SRC(movw (%%esi), %%dx )"
- leal 2(%%esi), %%esi
- " DST(movw %%dx, (%%edi) )"
- leal 2(%%edi), %%edi
- je 6f
- shll $16,%%edx
-5:" SRC(movb (%%esi), %%dl )"
- " DST(movb %%dl, (%%edi) )"
-6: addl %%edx, %%eax
- adcl $0, %%eax
-7:
-.section .fixup, \"ax\"
-6000: movl %7, (%%ebx)
-# zero the complete destination (computing the rest is too much work)
- movl %8,%%edi
- movl %4,%%ecx
- xorl %%eax,%%eax
- rep ; stosb
- jmp 7b
-6001: movl %1, %%ebx
- jmp 6000b
-6002: movl %2, %%ebx
- jmp 6000b
-.previous
- "
- : "=a"(sum)
- : "m"(src_err_ptr), "m"(dst_err_ptr),
- "0"(sum), "m"(len), "S"(src), "D" (dst),
- "i" (-EFAULT),
- "m" (dst)
- : "bx", "cx", "si", "di", "dx", "memory" );
- return(sum);
-}
-
-#undef ROUND
-#undef ROUND1
-
-#endif
-
-
-#undef SRC
-#undef DST
-
-/*
- * FIXME: old compatibility stuff, will be removed soon.
- */
-
-unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum)
-{
- int src_err=0, dst_err=0;
-
- sum = csum_partial_copy_generic ( src, dst, len, sum, &src_err, &dst_err);
-
- if (src_err || dst_err)
- printk("old csum_partial_copy_fromuser(), tell mingo to convert me.\n");
-
- return sum;
-}
-
-
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
index ae8aec636..6918451a6 100644
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -19,23 +19,23 @@
void __delay(unsigned long loops)
{
+ int d0;
__asm__ __volatile__(
"\tjmp 1f\n"
".align 16\n"
"1:\tjmp 2f\n"
".align 16\n"
"2:\tdecl %0\n\tjns 2b"
- :/* no outputs */
- :"a" (loops)
- :"ax");
+ :"=&a" (d0)
+ :"0" (loops));
}
inline void __const_udelay(unsigned long xloops)
{
+ int d0;
__asm__("mull %0"
- :"=d" (xloops)
- :"a" (xloops),"0" (current_cpu_data.loops_per_sec)
- :"ax");
+ :"=d" (xloops), "=&a" (d0)
+ :"1" (xloops),"0" (current_cpu_data.loops_per_sec));
__delay(xloops);
}
diff --git a/arch/i386/lib/old-checksum.c b/arch/i386/lib/old-checksum.c
new file mode 100644
index 000000000..ae3a38043
--- /dev/null
+++ b/arch/i386/lib/old-checksum.c
@@ -0,0 +1,19 @@
+/*
+ * FIXME: old compatibility stuff, will be removed soon.
+ */
+
+#include <net/checksum.h>
+
+unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum)
+{
+ int src_err=0, dst_err=0;
+
+ sum = csum_partial_copy_generic ( src, dst, len, sum, &src_err, &dst_err);
+
+ if (src_err || dst_err)
+ printk("old csum_partial_copy_fromuser(), tell mingo to convert me.\n");
+
+ return sum;
+}
+
+
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index d5b052c20..f43be511f 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -29,6 +29,8 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n)
*/
#define __do_strncpy_from_user(dst,src,count,res) \
+do { \
+ int __d0, __d1, __d2; \
__asm__ __volatile__( \
" testl %1,%1\n" \
" jz 2f\n" \
@@ -41,16 +43,18 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n)
"1: subl %1,%0\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
- "3: movl %2,%0\n" \
+ "3: movl %5,%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
".previous" \
- : "=d"(res), "=c"(count) \
- : "i"(-EFAULT), "0"(count), "1"(count), "S"(src), "D"(dst) \
- : "si", "di", "ax", "memory")
+ : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
+ "=&D" (__d2) \
+ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+ : "memory"); \
+} while (0)
long
__strncpy_from_user(char *dst, const char *src, long count)
@@ -74,14 +78,16 @@ strncpy_from_user(char *dst, const char *src, long count)
* Zero Userspace
*/
-#define __do_clear_user(addr,size) \
- __asm__ __volatile__( \
+#define __do_clear_user(addr,size) \
+do { \
+ int __d0; \
+ __asm__ __volatile__( \
"0: rep; stosl\n" \
- " movl %1,%0\n" \
+ " movl %2,%0\n" \
"1: rep; stosb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
- "3: lea 0(%1,%0,4),%0\n" \
+ "3: lea 0(%2,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
@@ -89,9 +95,9 @@ strncpy_from_user(char *dst, const char *src, long count)
" .long 0b,3b\n" \
" .long 1b,2b\n" \
".previous" \
- : "=&c"(size) \
- : "r"(size & 3), "0"(size / 4), "D"(addr), "a"(0) \
- : "di")
+ : "=&c"(size), "=&D" (__d0) \
+ : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
+} while (0)
unsigned long
clear_user(void *to, unsigned long n)
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 693072b1a..dc96ad4bb 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -119,24 +119,28 @@ int do_check_pgt_cache(int low, int high)
pte_t * __bad_pagetable(void)
{
extern char empty_bad_page_table[PAGE_SIZE];
-
- __asm__ __volatile__("cld ; rep ; stosl":
- :"a" (pte_val(BAD_PAGE)),
- "D" ((long) empty_bad_page_table),
- "c" (PAGE_SIZE/4)
- :"di","cx");
+ int d0, d1;
+
+ __asm__ __volatile__("cld ; rep ; stosl"
+ : "=&D" (d0), "=&c" (d1)
+ : "a" (pte_val(BAD_PAGE)),
+ "0" ((long) empty_bad_page_table),
+ "1" (PAGE_SIZE/4)
+ : "memory");
return (pte_t *) empty_bad_page_table;
}
pte_t __bad_page(void)
{
extern char empty_bad_page[PAGE_SIZE];
-
- __asm__ __volatile__("cld ; rep ; stosl":
- :"a" (0),
- "D" ((long) empty_bad_page),
- "c" (PAGE_SIZE/4)
- :"di","cx");
+ int d0, d1;
+
+ __asm__ __volatile__("cld ; rep ; stosl"
+ : "=&D" (d0), "=&c" (d1)
+ : "a" (0),
+ "0" ((long) empty_bad_page),
+ "1" (PAGE_SIZE/4)
+ : "memory");
return pte_mkdirty(mk_pte((unsigned long) empty_bad_page, PAGE_SHARED));
}
@@ -275,38 +279,6 @@ __initfunc(unsigned long paging_init(unsigned long start_mem, unsigned long end_
* kernel.
* It may also hold the MP configuration table when we are booting SMP.
*/
-#ifdef __SMP__
- /*
- * FIXME: Linux assumes you have 640K of base ram..
- * this continues the error...
- *
- * 1) Scan the bottom 1K for a signature
- * 2) Scan the top 1K of base RAM
- * 3) Scan the 64K of bios
- */
- if (!smp_scan_config(0x0,0x400) &&
- !smp_scan_config(639*0x400,0x400) &&
- !smp_scan_config(0xF0000,0x10000)) {
- /*
- * If it is an SMP machine we should know now, unless the
- * configuration is in an EISA/MCA bus machine with an
- * extended bios data area.
- *
- * there is a real-mode segmented pointer pointing to the
- * 4K EBDA area at 0x40E, calculate and scan it here.
- *
- * NOTE! There are Linux loaders that will corrupt the EBDA
- * area, and as such this kind of SMP config may be less
- * trustworthy, simply because the SMP table may have been
- * stomped on during early boot.
- */
- address = *(unsigned short *)phys_to_virt(0x40E);
- address<<=4;
- smp_scan_config(address, 0x1000);
- if (smp_found_config)
- printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.rutgers.edu if you experience SMP problems!\n");
- }
-#endif
start_mem = PAGE_ALIGN(start_mem);
address = PAGE_OFFSET;
pg_dir = swapper_pg_dir;
@@ -403,7 +375,7 @@ __initfunc(void test_wp_bit(void))
if (boot_cpu_data.wp_works_ok < 0) {
boot_cpu_data.wp_works_ok = 0;
printk("No.\n");
-#ifndef CONFIG_M386
+#ifdef CONFIG_X86_WP_WORKS_OK
panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
#endif
} else
diff --git a/arch/i386/vmlinux.lds b/arch/i386/vmlinux.lds
index c23007bc8..203b9a927 100644
--- a/arch/i386/vmlinux.lds
+++ b/arch/i386/vmlinux.lds
@@ -45,9 +45,13 @@ SECTIONS
. = ALIGN(4096);
__init_end = .;
+ . = ALIGN(32);
+ .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
. = ALIGN(4096);
.data.page_aligned : { *(.data.idt) }
+
__bss_start = .; /* BSS */
.bss : {
*(.bss)