Import of Linux/MIPS 1.1.68

author: Ralf Baechle <ralf@linux-mips.org> 1994-12-01 08:00:00 +0000
committer: <ralf@linux-mips.org> 1994-12-01 08:00:00 +0000
commit: 90ecc248e200fee448001248dde0ca540dd3ef64 (patch)
tree: a3fe89494ce63b4835f0f9cf5c45e74cde88252b /arch/i386
parent: 1513ff9b7899ab588401c89db0e99903dbf5f886 (diff)
21 files changed, 8494 insertions, 1 deletions
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index 1b878067a..b1aebc71a 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -55,7 +55,7 @@ zlilo: $(CONFIGURE) zImage
 LOWLDFLAGS      =-qmagic -Ttext 0xfe0
 HIGHLDFLAGS     =-qmagic -Ttext 0xfffe0
 
-tools/system:	boot/head.o init/main.o tools/version.o linuxsubdirs
+tools/system:	boot/head.o init/main.o init/init.o tools/version.o linuxsubdirs
 	$(LD) $(LOWLDFLAGS) boot/head.o init/main.o tools/version.o \
 		$(ARCHIVES) \
 		$(FILESYSTEMS) \
diff --git a/arch/i386/bios32.c b/arch/i386/bios32.c
new file mode 100644
index 000000000..311dd111e
--- /dev/null
+++ b/arch/i386/bios32.c
@@ -0,0 +1,476 @@
+/*
+ * bios32.c - BIOS32, PCI BIOS functions.
+ *
+ * Sponsored by
+ *	iX Multiuser Multitasking Magazine
+ *	Hannover, Germany
+ *	hm@ix.de
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ *      Visionary Computing
+ *      (Unix and Linux consulting and custom programming)
+ *      Drew@Colorado.EDU
+ *      +1 (303) 786-7975
+ *
+ * For more information, please consult
+ *
+ * PCI BIOS Specification Revision
+ * PCI Local Bus Specification
+ * PCI System Design Guide
+ *
+ * PCI Special Interest Group
+ * M/S HF3-15A
+ * 5200 N.E. Elam Young Parkway
+ * Hillsboro, Oregon 97124-6497
+ * +1 (503) 696-2000
+ * +1 (800) 433-5177
+ *
+ * Manuals are $25 each or $50 for all three, plus $7 shipping
+ * within the United States, $35 abroad.
+ *
+ *
+ * CHANGELOG :
+ * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
+ *	Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/bios32.h>
+#include <linux/pci.h>
+
+#include <asm/segment.h>
+
+/*
+ * It would seem some PCI bioses are buggy, so we don't actually use these
+ * routines unless we need to..
+ */
+#ifdef CONFIG_SCSI_NCR53C7xx
+ #define CONFIG_PCI
+#else
+ #undef CONFIG_PCI
+#endif
+
+#define PCIBIOS_PCI_FUNCTION_ID 	0xb1XX
+#define PCIBIOS_PCI_BIOS_PRESENT 	0xb101
+#define PCIBIOS_FIND_PCI_DEVICE		0xb102
+#define PCIBIOS_FIND_PCI_CLASS_CODE	0xb103
+#define PCIBIOS_GENERATE_SPECIAL_CYCLE	0xb106
+#define PCIBIOS_READ_CONFIG_BYTE	0xb108
+#define PCIBIOS_READ_CONFIG_WORD	0xb109
+#define PCIBIOS_READ_CONFIG_DWORD	0xb10a
+#define PCIBIOS_WRITE_CONFIG_BYTE	0xb10b
+#define PCIBIOS_WRITE_CONFIG_WORD	0xb10c
+#define PCIBIOS_WRITE_CONFIG_DWORD	0xb10d
+
+/* BIOS32 signature: "_32_" */
+#define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
+
+/* PCI signature: "PCI " */
+#define PCI_SIGNATURE		(('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
+
+/* PCI service signature: "$PCI" */
+#define PCI_SERVICE		(('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
+
+/*
+ * This is the standard structure used to identify the entry point
+ * to the BIOS32 Service Directory, as documented in
+ * 	Standard BIOS 32-bit Service Directory Proposal
+ * 	Revision 0.4 May 24, 1993
+ * 	Phoenix Technologies Ltd.
+ *	Norwood, MA
+ * and the PCI BIOS specification.
+ */
+
+union bios32 {
+	struct {
+		unsigned long signature;	/* _32_ */
+		unsigned long entry;		/* 32 bit physical address */
+		unsigned char revision;		/* Revision level, 0 */
+		unsigned char length;		/* Length in paragraphs should be 01 */
+		unsigned char checksum;		/* All bytes must add up to zero */
+		unsigned char reserved[5]; 	/* Must be zero */
+	} fields;
+	char chars[16];
+};
+
+/*
+ * Physical address of the service directory.  I don't know if we're
+ * allowed to have more than one of these or not, so just in case
+ * we'll make bios32_init() take a memory start parameter and store
+ * the array there.
+ */
+
+static unsigned long bios32_entry = 0;
+static struct {
+	unsigned long address;
+	unsigned short segment;
+} bios32_indirect = { 0, KERNEL_CS };
+
+#ifdef CONFIG_PCI
+/*
+ * Returns the entry point for the given service, NULL on error
+ */
+
+static unsigned long bios32_service(unsigned long service)
+{
+	unsigned char return_code;	/* %al */
+	unsigned long address;		/* %ebx */
+	unsigned long length;		/* %ecx */
+	unsigned long entry;		/* %edx */
+
+	__asm__("lcall (%%edi)"
+		: "=a" (return_code),
+		  "=b" (address),
+		  "=c" (length),
+		  "=d" (entry)
+		: "0" (service),
+		  "1" (0),
+		  "D" (&bios32_indirect));
+
+	switch (return_code) {
+		case 0:
+			return address + entry;
+		case 0x80:	/* Not present */
+			printk("bios32_service(%ld) : not present\n", service);
+			return 0;
+		default: /* Shouldn't happen */
+			printk("bios32_service(%ld) : returned 0x%x, mail drew@colorado.edu\n",
+				service, return_code);
+			return 0;
+	}
+}
+
+static long pcibios_entry = 0;
+static struct {
+	unsigned long address;
+	unsigned short segment;
+} pci_indirect = { 0, KERNEL_CS };
+
+void NCR53c810_test(void);
+
+static unsigned long pcibios_init(unsigned long memory_start, unsigned long memory_end)
+{
+	unsigned long signature;
+	unsigned char present_status;
+	unsigned char major_revision;
+	unsigned char minor_revision;
+	int pack;
+
+	if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
+		pci_indirect.address = pcibios_entry;
+
+		__asm__("lcall (%%edi)\n\t"
+			"jc 1f\n\t"
+			"xor %%ah, %%ah\n"
+			"1:\tshl $8, %%eax\n\t"
+			"movw %%bx, %%ax"
+			: "=d" (signature),
+			  "=a" (pack)
+			: "1" (PCIBIOS_PCI_BIOS_PRESENT),
+			  "D" (&pci_indirect)
+			: "bx", "cx");
+
+		present_status = (pack >> 16) & 0xff;
+		major_revision = (pack >> 8) & 0xff;
+		minor_revision = pack & 0xff;
+		if (present_status || (signature != PCI_SIGNATURE)) {
+			printk ("pcibios_init : %s : BIOS32 Service Directory says PCI BIOS is present,\n"
+				"	but PCI_BIOS_PRESENT subfunction fails with present status of 0x%x\n"
+				"	and signature of 0x%08lx (%c%c%c%c).  mail drew@Colorado.EDU\n",
+				(signature == PCI_SIGNATURE) ?  "WARNING" : "ERROR",
+				present_status, signature,
+				(char) (signature >>  0), (char) (signature >>  8),
+				(char) (signature >> 16), (char) (signature >> 24));
+
+			if (signature != PCI_SIGNATURE)
+				pcibios_entry = 0;
+		}
+		if (pcibios_entry) {
+			printk ("pcibios_init : PCI BIOS revision %x.%02x entry at 0x%lx\n",
+				major_revision, minor_revision, pcibios_entry);
+		}
+	}
+
+#if 0
+	NCR53c810_test();
+#endif
+	return memory_start;
+}
+
+int pcibios_present(void)
+{
+	return pcibios_entry ? 1 : 0;
+}
+
+int pcibios_find_class_code (unsigned long class_code, unsigned short index,
+	unsigned char *bus, unsigned char *device_fn)
+{
+	unsigned long bx;
+	unsigned long ret;
+
+	__asm__ ("lcall (%%edi)\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=b" (bx),
+		  "=a" (ret)
+		: "1" (PCIBIOS_FIND_PCI_CLASS_CODE),
+		  "c" (class_code),
+		  "S" ((int) index),
+		  "D" (&pci_indirect));
+	*bus = (bx >> 8) & 0xff;
+	*device_fn = bx & 0xff;
+	return (int) (ret & 0xff00) >> 8;
+}
+
+
+int pcibios_find_device (unsigned short vendor, unsigned short device_id,
+	unsigned short index, unsigned char *bus, unsigned char *device_fn)
+{
+	unsigned short bx;
+	unsigned short ret;
+
+	__asm__("lcall (%%edi)\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=b" (bx),
+		  "=a" (ret)
+		: "1" (PCIBIOS_FIND_PCI_DEVICE),
+		  "c" (device_id),
+		  "d" (vendor),
+		  "S" ((int) index),
+		  "D" (&pci_indirect));
+	*bus = (bx >> 8) & 0xff;
+	*device_fn = bx & 0xff;
+	return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_read_config_byte(unsigned char bus,
+	unsigned char device_fn, unsigned char where, unsigned char *value)
+{
+	unsigned long ret;
+	unsigned long bx = (bus << 8) | device_fn;
+
+	__asm__("lcall (%%esi)\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=c" (*value),
+		  "=a" (ret)
+		: "1" (PCIBIOS_READ_CONFIG_BYTE),
+		  "b" (bx),
+		  "D" ((long) where),
+		  "S" (&pci_indirect));
+	return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_read_config_word (unsigned char bus,
+	unsigned char device_fn, unsigned char where, unsigned short *value)
+{
+	unsigned long ret;
+	unsigned long bx = (bus << 8) | device_fn;
+
+	__asm__("lcall (%%esi)\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=c" (*value),
+		  "=a" (ret)
+		: "1" (PCIBIOS_READ_CONFIG_WORD),
+		  "b" (bx),
+		  "D" ((long) where),
+		  "S" (&pci_indirect));
+	return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_read_config_dword (unsigned char bus,
+	unsigned char device_fn, unsigned char where, unsigned long *value)
+{
+	unsigned long ret;
+	unsigned long bx = (bus << 8) | device_fn;
+
+	__asm__("lcall (%%esi)\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=c" (*value),
+		  "=a" (ret)
+		: "1" (PCIBIOS_READ_CONFIG_DWORD),
+		  "b" (bx),
+		  "D" ((long) where),
+		  "S" (&pci_indirect));
+	return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_write_config_byte (unsigned char bus,
+	unsigned char device_fn, unsigned char where, unsigned char value)
+{
+	unsigned long ret;
+	unsigned long bx = (bus << 8) | device_fn;
+
+	__asm__("lcall (%%esi)\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=a" (ret)
+		: "0" (PCIBIOS_WRITE_CONFIG_BYTE),
+		  "c" (value),
+		  "b" (bx),
+		  "D" ((long) where),
+		  "S" (&pci_indirect));
+	return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_write_config_word (unsigned char bus,
+	unsigned char device_fn, unsigned char where, unsigned short value)
+{
+	unsigned long ret;
+	unsigned long bx = (bus << 8) | device_fn;
+
+	__asm__("lcall (%%esi)\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=a" (ret)
+		: "0" (PCIBIOS_WRITE_CONFIG_WORD),
+		  "c" (value),
+		  "b" (bx),
+		  "D" ((long) where),
+		  "S" (&pci_indirect));
+	return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_write_config_dword (unsigned char bus,
+	unsigned char device_fn, unsigned char where, unsigned long value)
+{
+	unsigned long ret;
+	unsigned long bx = (bus << 8) | device_fn;
+
+	__asm__("lcall (%%esi)\n\t"
+		"jc 1f\n\t"
+		"xor %%ah, %%ah\n"
+		"1:"
+		: "=a" (ret)
+		: "0" (PCIBIOS_WRITE_CONFIG_DWORD),
+		  "c" (value),
+		  "b" (bx),
+		  "D" ((long) where),
+		  "S" (&pci_indirect));
+	return (int) (ret & 0xff00) >> 8;
+}
+
+void NCR53c810_test(void)
+{
+	unsigned char bus, device_fn;
+	unsigned short index;
+	int ret;
+	unsigned char row, col;
+	unsigned long val;
+
+	for (index = 0; index < 4; ++index) {
+		ret = pcibios_find_device (
+			(unsigned short) PCI_VENDOR_ID_NCR,
+			(unsigned short) PCI_DEVICE_ID_NCR_53C810,
+			index, &bus, &device_fn);
+		if (ret)
+			break;
+		printk ("ncr53c810 : at PCI bus %d, device %d, function %d.",
+			bus, ((device_fn & 0xf8) >> 3), (device_fn & 7));
+		for (row = 0; row < 0x3c; row += 0x10) {
+			printk ("\n	reg 0x%02x	", row);
+			for (col = 0; col < 0x10; col += 4) {
+			if (!(ret = pcibios_read_config_dword (bus, device_fn, row+col, &val)))
+				printk ("0x%08lx	  ", val);
+			else
+				printk ("error 0x%02x ", ret);
+			}
+		}
+		printk ("\n");
+	}
+}
+
+char *pcibios_strerror (int error)
+{
+	static char buf[80];
+
+	switch (error) {
+		case PCIBIOS_SUCCESSFUL:
+			return "SUCCESSFUL";
+
+		case PCIBIOS_FUNC_NOT_SUPPORTED:
+			return "FUNC_NOT_SUPPORTED";
+
+		case PCIBIOS_BAD_VENDOR_ID:
+			return "SUCCESSFUL";
+
+		case PCIBIOS_DEVICE_NOT_FOUND:
+			return "DEVICE_NOT_FOUND";
+
+		case PCIBIOS_BAD_REGISTER_NUMBER:
+			return "BAD_REGISTER_NUMBER";
+
+		default:
+			sprintf (buf, "UNKNOWN RETURN 0x%x", error);
+			return buf;
+	}
+}
+
+#endif
+
+unsigned long bios32_init(unsigned long memory_start, unsigned long memory_end)
+{
+	union bios32 *check;
+	unsigned char sum;
+	int i, length;
+
+	/*
+	 * Follow the standard procedure for locating the BIOS32 Service
+	 * directory by scanning the permissible address range from
+	 * 0xe0000 through 0xfffff for a valid BIOS32 structure.
+	 *
+	 * The PCI BIOS doesn't seem to work too well on many machines,
+	 * so we disable this unless it's really needed (NCR SCSI driver)
+	 */
+
+	for (check = (union bios32 *) 0xe0000; check <= (union bios32 *) 0xffff0; ++check) {
+		if (check->fields.signature != BIOS32_SIGNATURE)
+			continue;
+		length = check->fields.length * 16;
+		if (!length)
+			continue;
+		sum = 0;
+		for (i = 0; i < length ; ++i)
+			sum += check->chars[i];
+		if (sum != 0)
+			continue;
+		if (check->fields.revision != 0) {
+			printk("bios32_init : unsupported revision %d at 0x%p, mail drew@colorado.edu\n",
+				check->fields.revision, check);
+			continue;
+		}
+		printk ("bios32_init : BIOS32 Service Directory structure at 0x%p\n", check);
+		if (!bios32_entry) {
+			bios32_indirect.address = bios32_entry = check->fields.entry;
+			printk ("bios32_init : BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+		} else {
+			printk ("bios32_init : multiple entries, mail drew@colorado.edu\n");
+			/*
+			 * Jeremy Fitzhardinge reports at least one PCI BIOS
+			 * with two different service directories, and as both
+			 * worked for him, we'll just mention the fact, and
+			 * not actually disallow it..
+			 */
+#if 0
+			return memory_start;
+#endif
+		}
+	}
+#ifdef CONFIG_PCI
+	if (bios32_entry) {
+		memory_start = pcibios_init (memory_start, memory_end);
+	}
+#endif
+	return memory_start;
+}
diff --git a/arch/i386/bootsect.S b/arch/i386/bootsect.S
new file mode 100644
index 000000000..f6a0d3158
--- /dev/null
+++ b/arch/i386/bootsect.S
@@ -0,0 +1,460 @@
+!
+! SYS_SIZE is the number of clicks (16 bytes) to be loaded.
+! 0x7F00 is 0x7F000 bytes = 508kB, more than enough for current
+! versions of linux which compress the kernel
+!
+#include <linux/config.h>
+SYSSIZE = DEF_SYSSIZE
+!
+!	bootsect.s		Copyright (C) 1991, 1992 Linus Torvalds
+!	modified by Drew Eckhardt
+!	modified by Bruce Evans (bde)
+!
+! bootsect.s is loaded at 0x7c00 by the bios-startup routines, and moves
+! itself out of the way to address 0x90000, and jumps there.
+!
+! bde - should not jump blindly, there may be systems with only 512K low
+! memory.  Use int 0x12 to get the top of memory, etc.
+!
+! It then loads 'setup' directly after itself (0x90200), and the system
+! at 0x10000, using BIOS interrupts. 
+!
+! NOTE! currently system is at most (8*65536-4096) bytes long. This should 
+! be no problem, even in the future. I want to keep it simple. This 508 kB
+! kernel size should be enough, especially as this doesn't contain the
+! buffer cache as in minix (and especially now that the kernel is 
+! compressed :-)
+!
+! The loader has been made as simple as possible, and continuous
+! read errors will result in a unbreakable loop. Reboot by hand. It
+! loads pretty fast by getting whole tracks at a time whenever possible.
+
+.text
+
+SETUPSECS = 4				! nr of setup-sectors
+BOOTSEG   = 0x07C0			! original address of boot-sector
+INITSEG   = DEF_INITSEG			! we move boot here - out of the way
+SETUPSEG  = DEF_SETUPSEG		! setup starts here
+SYSSEG    = DEF_SYSSEG			! system loaded at 0x10000 (65536).
+
+! ROOT_DEV & SWAP_DEV are now written by "build".
+ROOT_DEV = 0
+SWAP_DEV = 0
+#ifndef SVGA_MODE
+#define SVGA_MODE ASK_VGA
+#endif
+#ifndef RAMDISK
+#define RAMDISK 0
+#endif 
+#ifndef CONFIG_ROOT_RDONLY
+#define CONFIG_ROOT_RDONLY 0
+#endif
+
+! ld86 requires an entry symbol. This may as well be the usual one.
+.globl	_main
+_main:
+#if 0 /* hook for debugger, harmless unless BIOS is fussy (old HP) */
+	int	3
+#endif
+	mov	ax,#BOOTSEG
+	mov	ds,ax
+	mov	ax,#INITSEG
+	mov	es,ax
+	mov	cx,#256
+	sub	si,si
+	sub	di,di
+	cld
+	rep
+	movsw
+	jmpi	go,INITSEG
+
+! ax and es already contain INITSEG
+
+go:	mov	di,#0x4000-12	! 0x4000 is arbitrary value >= length of
+				! bootsect + length of setup + room for stack
+				! 12 is disk parm size
+
+! bde - changed 0xff00 to 0x4000 to use debugger at 0x6400 up (bde).  We
+! wouldn't have to worry about this if we checked the top of memory.  Also
+! my BIOS can be configured to put the wini drive tables in high memory
+! instead of in the vector table.  The old stack might have clobbered the
+! drive table.
+
+	mov	ds,ax
+	mov	ss,ax		! put stack at INITSEG:0x4000-12.
+	mov	sp,di
+/*
+ *	Many BIOS's default disk parameter tables will not 
+ *	recognize multi-sector reads beyond the maximum sector number
+ *	specified in the default diskette parameter tables - this may
+ *	mean 7 sectors in some cases.
+ *
+ *	Since single sector reads are slow and out of the question,
+ *	we must take care of this by creating new parameter tables
+ *	(for the first disk) in RAM.  We will set the maximum sector
+ *	count to 36 - the most we will encounter on an ED 2.88.  
+ *
+ *	High doesn't hurt.  Low does.
+ *
+ *	Segments are as follows: ds=es=ss=cs - INITSEG,
+ *		fs = 0, gs is unused.
+ */
+
+! cx contains 0 from rep movsw above
+
+	mov	fs,cx
+	mov	bx,#0x78		! fs:bx is parameter table address
+	push	ds
+	seg fs
+	lds	si,(bx)			! ds:si is source
+
+	mov	cl,#6			! copy 12 bytes
+	cld
+	push	di
+
+	rep
+	movsw
+
+	pop	di
+	pop	ds
+
+	movb	4(di),*36		! patch sector count
+
+	seg fs
+	mov	(bx),di
+	seg fs
+	mov	2(bx),es
+
+! load the setup-sectors directly after the bootblock.
+! Note that 'es' is already set up.
+! Also cx is 0 from rep movsw above.
+
+load_setup:
+	xor	ah,ah			! reset FDC 
+	xor	dl,dl
+	int 	0x13	
+
+	xor	dx, dx			! drive 0, head 0
+	mov	cl,#0x02		! sector 2, track 0
+	mov	bx,#0x0200		! address = 512, in INITSEG
+	mov	ah,#0x02		! service 2, nr of sectors
+	mov	al,setup_sects		! (assume all on head 0, track 0)
+	int	0x13			! read it
+	jnc	ok_load_setup		! ok - continue
+
+	push	ax			! dump error code
+	call	print_nl
+	mov	bp, sp
+	call	print_hex
+	pop	ax	
+	
+	jmp	load_setup
+
+ok_load_setup:
+
+! Get disk drive parameters, specifically nr of sectors/track
+
+#if 0
+
+! bde - the Phoenix BIOS manual says function 0x08 only works for fixed
+! disks.  It doesn't work for one of my BIOS's (1987 Award).  It was
+! fatal not to check the error code.
+
+	xor	dl,dl
+	mov	ah,#0x08		! AH=8 is get drive parameters
+	int	0x13
+	xor	ch,ch
+#else
+
+! It seems that there is no BIOS call to get the number of sectors.  Guess
+! 36 sectors if sector 36 can be read, 18 sectors if sector 18 can be read,
+! 15 if sector 15 can be read.  Otherwise guess 9.
+
+	mov	si,#disksizes		! table of sizes to try
+
+probe_loop:
+	lodsb
+	cbw				! extend to word
+	mov	sectors, ax
+	cmp	si,#disksizes+4
+	jae	got_sectors		! if all else fails, try 9
+	xchg	ax, cx			! cx = track and sector
+	xor	dx, dx			! drive 0, head 0
+	xor	bl, bl
+	mov	bh,setup_sects
+	inc	bh
+	shl	bh,#1			! address after setup (es = cs)
+	mov	ax,#0x0201		! service 2, 1 sector
+	int	0x13
+	jc	probe_loop		! try next value
+
+#endif
+
+got_sectors:
+
+! Restore es
+
+	mov	ax,#INITSEG
+	mov	es,ax
+
+! Print some inane message
+
+	mov	ah,#0x03		! read cursor pos
+	xor	bh,bh
+	int	0x10
+	
+	mov	cx,#9
+	mov	bx,#0x0007		! page 0, attribute 7 (normal)
+	mov	bp,#msg1
+	mov	ax,#0x1301		! write string, move cursor
+	int	0x10
+
+! ok, we've written the message, now
+! we want to load the system (at 0x10000)
+
+	mov	ax,#SYSSEG
+	mov	es,ax		! segment of 0x010000
+	call	read_it
+	call	kill_motor
+	call	print_nl
+
+! After that we check which root-device to use. If the device is
+! defined (!= 0), nothing is done and the given device is used.
+! Otherwise, one of /dev/fd0H2880 (2,32) or /dev/PS0 (2,28) or /dev/at0 (2,8),
+! depending on the number of sectors we pretend to know we have.
+
+	seg cs
+	mov	ax,root_dev
+	or	ax,ax
+	jne	root_defined
+	seg cs
+	mov	bx,sectors
+	mov	ax,#0x0208		! /dev/ps0 - 1.2Mb
+	cmp	bx,#15
+	je	root_defined
+	mov	al,#0x1c		! /dev/PS0 - 1.44Mb
+	cmp	bx,#18
+	je	root_defined
+	mov	al,#0x20		! /dev/fd0H2880 - 2.88Mb
+	cmp	bx,#36
+	je	root_defined
+	mov	al,#0			! /dev/fd0 - autodetect
+root_defined:
+	seg cs
+	mov	root_dev,ax
+
+! after that (everything loaded), we jump to
+! the setup-routine loaded directly after
+! the bootblock:
+
+	jmpi	0,SETUPSEG
+
+! This routine loads the system at address 0x10000, making sure
+! no 64kB boundaries are crossed. We try to load it as fast as
+! possible, loading whole tracks whenever we can.
+!
+! in:	es - starting address segment (normally 0x1000)
+!
+sread:	.word 0			! sectors read of current track
+head:	.word 0			! current head
+track:	.word 0			! current track
+
+read_it:
+	mov	al,setup_sects
+	inc	al
+	mov	sread,al
+	mov ax,es
+	test ax,#0x0fff
+die:	jne die			! es must be at 64kB boundary
+	xor bx,bx		! bx is starting address within segment
+rp_read:
+	mov ax,es
+	sub ax,#SYSSEG
+	cmp ax,syssize		! have we loaded all yet?
+	jbe ok1_read
+	ret
+ok1_read:
+	mov ax,sectors
+	sub ax,sread
+	mov cx,ax
+	shl cx,#9
+	add cx,bx
+	jnc ok2_read
+	je ok2_read
+	xor ax,ax
+	sub ax,bx
+	shr ax,#9
+ok2_read:
+	call read_track
+	mov cx,ax
+	add ax,sread
+	cmp ax,sectors
+	jne ok3_read
+	mov ax,#1
+	sub ax,head
+	jne ok4_read
+	inc track
+ok4_read:
+	mov head,ax
+	xor ax,ax
+ok3_read:
+	mov sread,ax
+	shl cx,#9
+	add bx,cx
+	jnc rp_read
+	mov ax,es
+	add ah,#0x10
+	mov es,ax
+	xor bx,bx
+	jmp rp_read
+
+read_track:
+	pusha
+	pusha	
+	mov	ax, #0xe2e 	! loading... message 2e = .
+	mov	bx, #7
+ 	int	0x10
+	popa		
+
+	mov	dx,track
+	mov	cx,sread
+	inc	cx
+	mov	ch,dl
+	mov	dx,head
+	mov	dh,dl
+	and	dx,#0x0100
+	mov	ah,#2
+	
+	push	dx				! save for error dump
+	push	cx
+	push	bx
+	push	ax
+
+	int	0x13
+	jc	bad_rt
+	add	sp, #8
+	popa
+	ret
+
+bad_rt:	push	ax				! save error code
+	call	print_all			! ah = error, al = read
+	
+	
+	xor ah,ah
+	xor dl,dl
+	int 0x13
+	
+
+	add	sp, #10
+	popa	
+	jmp read_track
+
+/*
+ *	print_all is for debugging purposes.  
+ *	It will print out all of the registers.  The assumption is that this is
+ *	called from a routine, with a stack frame like
+ *	dx 
+ *	cx
+ *	bx
+ *	ax
+ *	error
+ *	ret <- sp
+ *
+*/
+ 
+print_all:
+	mov	cx, #5		! error code + 4 registers
+	mov	bp, sp	
+
+print_loop:
+	push	cx		! save count left
+	call	print_nl	! nl for readability
+
+	cmp	cl, #5
+	jae	no_reg		! see if register name is needed
+	
+	mov	ax, #0xe05 + 'A - 1
+	sub	al, cl
+	int	0x10
+
+	mov	al, #'X
+	int	0x10
+
+	mov	al, #':
+	int	0x10
+
+no_reg:
+	add	bp, #2		! next register
+	call	print_hex	! print it
+	pop	cx
+	loop	print_loop
+	ret
+
+print_nl:
+	mov	ax, #0xe0d	! CR
+	int	0x10
+	mov	al, #0xa	! LF
+	int 	0x10
+	ret
+
+/*
+ *	print_hex is for debugging purposes, and prints the word
+ *	pointed to by ss:bp in hexadecimal.
+*/
+
+print_hex:
+	mov	cx, #4		! 4 hex digits
+	mov	dx, (bp)	! load word into dx
+print_digit:
+	rol	dx, #4		! rotate so that lowest 4 bits are used
+	mov	ax, #0xe0f	! ah = request, al = mask for nybble
+	and	al, dl
+	add	al, #0x90	! convert al to ascii hex (four instructions)
+	daa
+	adc	al, #0x40
+	daa
+	int	0x10
+	loop	print_digit
+	ret
+
+
+/*
+ * This procedure turns off the floppy drive motor, so
+ * that we enter the kernel in a known state, and
+ * don't have to worry about it later.
+ */
+kill_motor:
+	push dx
+	mov dx,#0x3f2
+	xor al, al
+	outb
+	pop dx
+	ret
+
+sectors:
+	.word 0
+
+disksizes:
+	.byte 36,18,15,9
+
+msg1:
+	.byte 13,10
+	.ascii "Loading"
+
+.org 497
+setup_sects:
+	.byte SETUPSECS
+root_flags:
+	.word CONFIG_ROOT_RDONLY
+syssize:
+	.word SYSSIZE
+swap_dev:
+	.word SWAP_DEV
+ram_size:
+	.word RAMDISK
+vid_mode:
+	.word SVGA_MODE
+root_dev:
+	.word ROOT_DEV
+boot_flag:
+	.word 0xAA55
diff --git a/arch/i386/dummy.c b/arch/i386/dummy.c
new file mode 100644
index 000000000..dd2410a7a
--- /dev/null
+++ b/arch/i386/dummy.c
@@ -0,0 +1,11 @@
+/*
+ * This file handles Systemcalls not available for all CPUs.
+ *
+ * Written by Ralf Baechle,
+ * Copyright (C) 1994 by Waldorf GMBH
+ */
+
+/*
+ * Nothing yet for i386...
+ */
+
diff --git a/arch/i386/head.S b/arch/i386/head.S
new file mode 100644
index 000000000..e720c14d0
--- /dev/null
+++ b/arch/i386/head.S
@@ -0,0 +1,349 @@
+/*
+ *  linux/boot/head.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ *  head.S contains the 32-bit startup code.
+ */
+
+.text
+.globl _idt,_gdt,
+.globl _swapper_pg_dir,_pg0
+.globl _empty_bad_page
+.globl _empty_bad_page_table
+.globl _empty_zero_page
+.globl _floppy_track_buffer
+
+#include <linux/tasks.h>
+#include <linux/segment.h>
+#define ASSEMBLER
+#include <linux/fd.h>
+
+#define CL_MAGIC_ADDR	0x90020
+#define CL_MAGIC	0xA33F
+#define CL_BASE_ADDR	0x90000
+#define CL_OFFSET	0x90022
+
+/*
+ * swapper_pg_dir is the main page directory, address 0x00001000 (or at
+ * address 0x00101000 for a compressed boot).
+ */
+startup_32:
+	cld
+	movl $(KERNEL_DS),%eax
+	mov %ax,%ds
+	mov %ax,%es
+	mov %ax,%fs
+	mov %ax,%gs
+	lss _stack_start,%esp
+/*
+ * Clear BSS first so that there are no surprises...
+ */
+	xorl %eax,%eax
+	movl $__edata,%edi
+	movl $__end,%ecx
+	subl %edi,%ecx
+	cld
+	rep
+	stosb
+/*
+ * start system 32-bit setup. We need to re-do some of the things done
+ * in 16-bit mode for the "real" operations.
+ */
+	call setup_idt
+	xorl %eax,%eax
+1:	incl %eax		# check that A20 really IS enabled
+	movl %eax,0x000000	# loop forever if it isn't
+	cmpl %eax,0x100000
+	je 1b
+/*
+ * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
+ * confuse the debugger if this code is traced.
+ * XXX - best to initialize before switching to protected mode.
+ */
+	pushl $0
+	popfl
+/*
+ * Copy bootup parameters out of the way. First 2kB of
+ * _empty_zero_page is for boot parameters, second 2kB
+ * is for the command line.
+ */
+	movl $0x90000,%esi
+	movl $_empty_zero_page,%edi
+	movl $512,%ecx
+	cld
+	rep
+	movsl
+	xorl %eax,%eax
+	movl $512,%ecx
+	rep
+	stosl
+	cmpw $(CL_MAGIC),CL_MAGIC_ADDR
+	jne 1f
+	movl $_empty_zero_page+2048,%edi
+	movzwl CL_OFFSET,%esi
+	addl $(CL_BASE_ADDR),%esi
+	movl $2048,%ecx
+	rep
+	movsb
+1:
+/* check if it is 486 or 386. */
+/*
+ * XXX - this does a lot of unnecessary setup.  Alignment checks don't
+ * apply at our cpl of 0 and the stack ought to be aligned already, and
+ * we don't need to preserve eflags.
+ */
+	movl %esp,%edi		# save stack pointer
+	andl $0xfffffffc,%esp	# align stack to avoid AC fault
+	movl $3,_x86
+	pushfl			# push EFLAGS
+	popl %eax		# get EFLAGS
+	movl %eax,%ecx		# save original EFLAGS
+	xorl $0x40000,%eax	# flip AC bit in EFLAGS
+	pushl %eax		# copy to EFLAGS
+	popfl			# set EFLAGS
+	pushfl			# get new EFLAGS
+	popl %eax		# put it in eax
+	xorl %ecx,%eax		# change in flags
+	andl $0x40000,%eax	# check if AC bit changed
+	je is386
+	movl $4,_x86
+	movl %ecx,%eax
+	xorl $0x200000,%eax	# check ID flag
+	pushl %eax
+	popfl			# if we are on a straight 486DX, SX, or
+	pushfl			# 487SX we can't change it
+	popl %eax
+	xorl %ecx,%eax
+	andl $0x200000,%eax
+	je is486
+isnew:	pushl %ecx		# restore original EFLAGS
+	popfl
+	movl $1, %eax		# Use the CPUID instruction to 
+	.byte 0x0f, 0xa2	# check the processor type
+	andl $0xf00, %eax	# Set _x86 with the family
+	shrl $8, %eax		# returned.	
+	movl %eax, _x86
+	movl %edi,%esp		# restore esp
+	movl %cr0,%eax		# 486+
+	andl $0x80000011,%eax	# Save PG,PE,ET
+	orl $0x50022,%eax	# set AM, WP, NE and MP
+	jmp 2f
+is486:	pushl %ecx		# restore original EFLAGS
+	popfl
+	movl %edi,%esp		# restore esp
+	movl %cr0,%eax		# 486
+	andl $0x80000011,%eax	# Save PG,PE,ET
+	orl $0x50022,%eax	# set AM, WP, NE and MP
+	jmp 2f
+is386:	pushl %ecx		# restore original EFLAGS
+	popfl
+	movl %edi,%esp		# restore esp
+	movl %cr0,%eax		# 386
+	andl $0x80000011,%eax	# Save PG,PE,ET
+	orl $2,%eax		# set MP
+2:	movl %eax,%cr0
+	call check_x87
+	call setup_paging
+	lgdt gdt_descr
+	lidt idt_descr
+	ljmp $(KERNEL_CS),$1f
+1:	movl $(KERNEL_DS),%eax	# reload all the segment registers
+	mov %ax,%ds		# after changing gdt.
+	mov %ax,%es
+	mov %ax,%fs
+	mov %ax,%gs
+	lss _stack_start,%esp
+	xorl %eax,%eax
+	lldt %ax
+	pushl %eax		# These are the parameters to main :-)
+	pushl %eax
+	pushl %eax
+	cld			# gcc2 wants the direction flag cleared at all times
+	call _start_kernel
+L6:
+	jmp L6			# main should never return here, but
+				# just in case, we know what happens.
+
+/*
+ * We depend on ET to be correct. This checks for 287/387.
+ */
+check_x87:
+	movl $0,_hard_math
+	clts
+	fninit
+	fstsw %ax
+	cmpb $0,%al
+	je 1f
+	movl %cr0,%eax		/* no coprocessor: have to set bits */
+	xorl $4,%eax		/* set EM */
+	movl %eax,%cr0
+	ret
+.align 2
+1:	movl $1,_hard_math
+	.byte 0xDB,0xE4		/* fsetpm for 287, ignored by 387 */
+	ret
+
+/*
+ *  setup_idt
+ *
+ *  sets up a idt with 256 entries pointing to
+ *  ignore_int, interrupt gates. It doesn't actually load
+ *  idt - that can be done only after paging has been enabled
+ *  and the kernel moved to 0xC0000000. Interrupts
+ *  are enabled elsewhere, when we can be relatively
+ *  sure everything is ok.
+ */
+setup_idt:
+	lea ignore_int,%edx
+	movl $(KERNEL_CS << 16),%eax
+	movw %dx,%ax		/* selector = 0x0010 = cs */
+	movw $0x8E00,%dx	/* interrupt gate - dpl=0, present */
+
+	lea _idt,%edi
+	mov $256,%ecx
+rp_sidt:
+	movl %eax,(%edi)
+	movl %edx,4(%edi)
+	addl $8,%edi
+	dec %ecx
+	jne rp_sidt
+	ret
+
+
+/*
+ * Setup_paging
+ *
+ * This routine sets up paging by setting the page bit
+ * in cr0. The page tables are set up, identity-mapping
+ * the first 4MB.  The rest are initialized later.
+ *
+ * (ref: added support for up to 32mb, 17Apr92)  -- Rik Faith
+ * (ref: update, 25Sept92)  -- croutons@crunchy.uucp 
+ * (ref: 92.10.11 - Linus Torvalds. Corrected 16M limit - no upper memory limit)
+ */
+.align 2
+setup_paging:
+	movl $1024*2,%ecx		/* 2 pages - swapper_pg_dir+1 page table */
+	xorl %eax,%eax
+	movl $_swapper_pg_dir,%edi	/* swapper_pg_dir is at 0x1000 */
+	cld;rep;stosl
+/* Identity-map the kernel in low 4MB memory for ease of transition */
+	movl $_pg0+7,_swapper_pg_dir		/* set present bit/user r/w */
+/* But the real place is at 0xC0000000 */
+	movl $_pg0+7,_swapper_pg_dir+3072	/* set present bit/user r/w */
+	movl $_pg0+4092,%edi
+	movl $0x03ff007,%eax		/*  4Mb - 4096 + 7 (r/w user,p) */
+	std
+1:	stosl			/* fill the page backwards - more efficient :-) */
+	subl $0x1000,%eax
+	jge 1b
+	cld
+	movl $_swapper_pg_dir,%eax
+	movl %eax,%cr3			/* cr3 - page directory start */
+	movl %cr0,%eax
+	orl $0x80000000,%eax
+	movl %eax,%cr0		/* set paging (PG) bit */
+	ret			/* this also flushes the prefetch-queue */
+
+/*
+ * page 0 is made non-existent, so that kernel NULL pointer references get
+ * caught. Thus the swapper page directory has been moved to 0x1000
+ *
+ * XXX Actually, the swapper page directory is at 0x1000 plus 1 megabyte,
+ * with the introduction of the compressed boot code.  Theoretically,
+ * the original design of overlaying the startup code with the swapper
+ * page directory is still possible --- it would reduce the size of the kernel
+ * by 2-3k.  This would be a good thing to do at some point.....
+ */
+.org 0x1000
+_swapper_pg_dir:
+/*
+ * The page tables are initialized to only 4MB here - the final page
+ * tables are set up later depending on memory size.
+ */
+.org 0x2000
+_pg0:
+
+.org 0x3000
+_empty_bad_page:
+
+.org 0x4000
+_empty_bad_page_table:
+
+.org 0x5000
+_empty_zero_page:
+
+.org 0x6000
+/*
+ * floppy_track_buffer is used to buffer one track of floppy data: it
+ * has to be separate from the tmp_floppy area, as otherwise a single-
+ * sector read/write can mess it up. It can contain one full cylinder (sic) of
+ * data (36*2*512 bytes).
+ */
+_floppy_track_buffer:
+	.fill 512*2*MAX_BUFFER_SECTORS,1,0
+
+/* This is the default interrupt "handler" :-) */
+int_msg:
+	.asciz "Unknown interrupt\n"
+.align 2
+ignore_int:
+	cld
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+	push %ds
+	push %es
+	push %fs
+	movl $(KERNEL_DS),%eax
+	mov %ax,%ds
+	mov %ax,%es
+	mov %ax,%fs
+	pushl $int_msg
+	call _printk
+	popl %eax
+	pop %fs
+	pop %es
+	pop %ds
+	popl %edx
+	popl %ecx
+	popl %eax
+	iret
+
+/*
+ * The interrupt descriptor table has room for 256 idt's
+ */
+.align 4
+.word 0
+idt_descr:
+	.word 256*8-1		# idt contains 256 entries
+	.long 0xc0000000+_idt
+
+.align 4
+_idt:
+	.fill 256,8,0		# idt is uninitialized
+
+.align 4
+.word 0
+gdt_descr:
+	.word (8+2*NR_TASKS)*8-1
+	.long 0xc0000000+_gdt
+
+/*
+ * This gdt setup gives the kernel a 1GB address space at virtual
+ * address 0xC0000000 - space enough for expansion, I hope.
+ */
+.align 4
+_gdt:
+	.quad 0x0000000000000000	/* NULL descriptor */
+	.quad 0x0000000000000000	/* not used */
+	.quad 0xc0c39a000000ffff	/* 0x10 kernel 1GB code at 0xC0000000 */
+	.quad 0xc0c392000000ffff	/* 0x18 kernel 1GB data at 0xC0000000 */
+	.quad 0x00cbfa000000ffff	/* 0x23 user   3GB code at 0x00000000 */
+	.quad 0x00cbf2000000ffff	/* 0x2b user   3GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* not used */
+	.quad 0x0000000000000000	/* not used */
+	.fill 2*NR_TASKS,8,0		/* space for LDT's and TSS's etc */
diff --git a/arch/i386/ioport.c b/arch/i386/ioport.c
new file mode 100644
index 000000000..c61690e3c
--- /dev/null
+++ b/arch/i386/ioport.c
@@ -0,0 +1,194 @@
+/*
+ *	linux/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+static unsigned long ioport_registrar[IO_BITMAP_SIZE] = {0, /* ... */};
+
+#define _IODEBUG
+
+#ifdef IODEBUG
+static char * ios(unsigned long l)
+{
+	static char str[33] = { '\0' };
+	int i;
+	unsigned long mask;
+
+	for (i = 0, mask = 0x80000000; i < 32; ++i, mask >>= 1)
+		str[i] = (l & mask) ? '1' : '0';
+	return str;
+}
+
+static void dump_io_bitmap(void)
+{
+	int i, j;
+	int numl = sizeof(current->tss.io_bitmap) >> 2;
+
+	for (i = j = 0; j < numl; ++i)
+	{
+		printk("%4d [%3x]: ", 64*i, 64*i);
+		printk("%s ", ios(current->tss.io_bitmap[j++]));
+		if (j < numl)
+			printk("%s", ios(current->tss.io_bitmap[j++]));
+		printk("\n");
+	}
+}
+#endif
+
+/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+asmlinkage void set_bitmap(unsigned long *bitmap,
+						   short base, short extent, int new_value)
+{
+	int mask;
+	unsigned long *bitmap_base = bitmap + (base >> 5);
+	unsigned short low_index = base & 0x1f;
+	int length = low_index + extent;
+
+	if (low_index != 0) {
+		mask = (~0 << low_index);
+		if (length < 32)
+				mask &= ~(~0 << length);
+		if (new_value)
+			*bitmap_base++ |= mask;
+		else
+			*bitmap_base++ &= ~mask;
+		length -= 32;
+	}
+
+	mask = (new_value ? ~0 : 0);
+	while (length >= 32) {
+		*bitmap_base++ = mask;
+		length -= 32;
+	}
+
+	if (length > 0) {
+		mask = ~(~0 << length);
+		if (new_value)
+			*bitmap_base++ |= mask;
+		else
+			*bitmap_base++ &= ~mask;
+	}
+}
+
+/* Check for set bits in BITMAP starting at BASE, going to EXTENT. */
+asmlinkage int check_bitmap(unsigned long *bitmap, short base, short extent)
+{
+	int mask;
+	unsigned long *bitmap_base = bitmap + (base >> 5);
+	unsigned short low_index = base & 0x1f;
+	int length = low_index + extent;
+
+	if (low_index != 0) {
+		mask = (~0 << low_index);
+		if (length < 32)
+				mask &= ~(~0 << length);
+		if (*bitmap_base++ & mask)
+			return 1;
+		length -= 32;
+	}
+	while (length >= 32) {
+		if (*bitmap_base++ != 0)
+			return 1;
+		length -= 32;
+	}
+
+	if (length > 0) {
+		mask = ~(~0 << length);
+		if (*bitmap_base++ & mask)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+	if (from + num <= from)
+		return -EINVAL;
+	if (from + num > IO_BITMAP_SIZE*32)
+		return -EINVAL;
+	if (!suser())
+		return -EPERM;
+
+#ifdef IODEBUG
+	printk("io: from=%d num=%d %s\n", from, num, (turn_on ? "on" : "off"));
+#endif
+	set_bitmap((unsigned long *)current->tss.io_bitmap, from, num, !turn_on);
+	return 0;
+}
+
+unsigned int *stack;
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ * Here we just change the eflags value on the stack: we allow
+ * only the super-user to do it. This depends on the stack-layout
+ * on system-call entry - see also fork() and the signal handling
+ * code.
+ */
+asmlinkage int sys_iopl(long ebx,long ecx,long edx,
+	     long esi, long edi, long ebp, long eax, long ds,
+	     long es, long fs, long gs, long orig_eax,
+	     long eip,long cs,long eflags,long esp,long ss)
+{
+	unsigned int level = ebx;
+
+	if (level > 3)
+		return -EINVAL;
+	if (!suser())
+		return -EPERM;
+	*(&eflags) = (eflags & 0xffffcfff) | (level << 12);
+	return 0;
+}
+
+
+void snarf_region(unsigned int from, unsigned int num)
+{
+	if (from > IO_BITMAP_SIZE*32)
+		return;
+	if (from + num > IO_BITMAP_SIZE*32)
+		num = IO_BITMAP_SIZE*32 - from;
+	set_bitmap(ioport_registrar, from, num, 1);
+	return;
+}
+
+void release_region(unsigned int from, unsigned int num)
+{
+	if (from > IO_BITMAP_SIZE*32)
+		return;
+	if (from + num > IO_BITMAP_SIZE*32)
+		num = IO_BITMAP_SIZE*32 - from;
+	set_bitmap(ioport_registrar, from, num, 0);
+	return;
+}
+
+int check_region(unsigned int from, unsigned int num)
+{
+	if (from > IO_BITMAP_SIZE*32)
+		return 0;
+	if (from + num > IO_BITMAP_SIZE*32)
+		num = IO_BITMAP_SIZE*32 - from;
+	return check_bitmap(ioport_registrar, from, num);
+}
+
+/* Called from init/main.c to reserve IO ports. */
+void reserve_setup(char *str, int *ints)
+{
+	int i;
+
+	for (i = 1; i < ints[0]; i += 2)
+		snarf_region(ints[i], ints[i+1]);
+}
diff --git a/arch/i386/irq.c b/arch/i386/irq.c
new file mode 100644
index 000000000..2de16db53
--- /dev/null
+++ b/arch/i386/irq.c
@@ -0,0 +1,354 @@
+/*
+ *	linux/kernel/irq.c
+ *
+ *	Copyright (C) 1992 Linus Torvalds
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * IRQ's are in fact implemented a bit like signal handlers for the kernel.
+ * The same sigaction struct is used, and with similar semantics (ie there
+ * is a SA_INTERRUPT flag etc). Naturally it's not a 1:1 relation, but there
+ * are similarities.
+ *
+ * sa_handler(int irq_NR) is the default function called (0 if no).
+ * sa_mask is horribly ugly (I won't even mention it)
+ * sa_flags contains various info: SA_INTERRUPT etc
+ * sa_restorer is the unused
+ */
+
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#define CR0_NE 32
+
+static unsigned char cache_21 = 0xff;
+static unsigned char cache_A1 = 0xff;
+
+unsigned long intr_count = 0;
+unsigned long bh_active = 0;
+unsigned long bh_mask = 0xFFFFFFFF;
+struct bh_struct bh_base[32]; 
+
+void disable_irq(unsigned int irq_nr)
+{
+	unsigned long flags;
+	unsigned char mask;
+
+	mask = 1 << (irq_nr & 7);
+	save_flags(flags);
+	if (irq_nr < 8) {
+		cli();
+		cache_21 |= mask;
+		outb(cache_21,0x21);
+		restore_flags(flags);
+		return;
+	}
+	cli();
+	cache_A1 |= mask;
+	outb(cache_A1,0xA1);
+	restore_flags(flags);
+}
+
+void enable_irq(unsigned int irq_nr)
+{
+	unsigned long flags;
+	unsigned char mask;
+
+	mask = ~(1 << (irq_nr & 7));
+	save_flags(flags);
+	if (irq_nr < 8) {
+		cli();
+		cache_21 &= mask;
+		outb(cache_21,0x21);
+		restore_flags(flags);
+		return;
+	}
+	cli();
+	cache_A1 &= mask;
+	outb(cache_A1,0xA1);
+	restore_flags(flags);
+}
+
+/*
+ * do_bottom_half() runs at normal kernel priority: all interrupts
+ * enabled.  do_bottom_half() is atomic with respect to itself: a
+ * bottom_half handler need not be re-entrant.
+ */
+asmlinkage void do_bottom_half(void)
+{
+	unsigned long active;
+	unsigned long mask, left;
+	struct bh_struct *bh;
+
+	bh = bh_base;
+	active = bh_active & bh_mask;
+	for (mask = 1, left = ~0 ; left & active ; bh++,mask += mask,left += left) {
+		if (mask & active) {
+			void (*fn)(void *);
+			bh_active &= ~mask;
+			fn = bh->routine;
+			if (!fn)
+				goto bad_bh;
+			fn(bh->data);
+		}
+	}
+	return;
+bad_bh:
+	printk ("irq.c:bad bottom half entry\n");
+}
+
+/*
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that do all
+ * the operations that are needed to keep the AT interrupt-controller
+ * happy. They are also written to be fast - and to disable interrupts
+ * as little as humanly possible.
+ *
+ * NOTE! These macros expand to three different handlers for each line: one
+ * complete handler that does all the fancy stuff (including signal handling),
+ * and one fast handler that is meant for simple IRQ's that want to be
+ * atomic. The specific handler is chosen depending on the SA_INTERRUPT
+ * flag when installing a handler. Finally, one "bad interrupt" handler, that
+ * is used when no handler is present.
+ */
+BUILD_IRQ(FIRST,0,0x01)
+BUILD_IRQ(FIRST,1,0x02)
+BUILD_IRQ(FIRST,2,0x04)
+BUILD_IRQ(FIRST,3,0x08)
+BUILD_IRQ(FIRST,4,0x10)
+BUILD_IRQ(FIRST,5,0x20)
+BUILD_IRQ(FIRST,6,0x40)
+BUILD_IRQ(FIRST,7,0x80)
+BUILD_IRQ(SECOND,8,0x01)
+BUILD_IRQ(SECOND,9,0x02)
+BUILD_IRQ(SECOND,10,0x04)
+BUILD_IRQ(SECOND,11,0x08)
+BUILD_IRQ(SECOND,12,0x10)
+BUILD_IRQ(SECOND,13,0x20)
+BUILD_IRQ(SECOND,14,0x40)
+BUILD_IRQ(SECOND,15,0x80)
+
+/*
+ * Pointers to the low-level handlers: first the general ones, then the
+ * fast ones, then the bad ones.
+ */
+static void (*interrupt[16])(void) = {
+	IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt,
+	IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt,
+	IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt,
+	IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt
+};
+
+static void (*fast_interrupt[16])(void) = {
+	fast_IRQ0_interrupt, fast_IRQ1_interrupt,
+	fast_IRQ2_interrupt, fast_IRQ3_interrupt,
+	fast_IRQ4_interrupt, fast_IRQ5_interrupt,
+	fast_IRQ6_interrupt, fast_IRQ7_interrupt,
+	fast_IRQ8_interrupt, fast_IRQ9_interrupt,
+	fast_IRQ10_interrupt, fast_IRQ11_interrupt,
+	fast_IRQ12_interrupt, fast_IRQ13_interrupt,
+	fast_IRQ14_interrupt, fast_IRQ15_interrupt
+};
+
+static void (*bad_interrupt[16])(void) = {
+	bad_IRQ0_interrupt, bad_IRQ1_interrupt,
+	bad_IRQ2_interrupt, bad_IRQ3_interrupt,
+	bad_IRQ4_interrupt, bad_IRQ5_interrupt,
+	bad_IRQ6_interrupt, bad_IRQ7_interrupt,
+	bad_IRQ8_interrupt, bad_IRQ9_interrupt,
+	bad_IRQ10_interrupt, bad_IRQ11_interrupt,
+	bad_IRQ12_interrupt, bad_IRQ13_interrupt,
+	bad_IRQ14_interrupt, bad_IRQ15_interrupt
+};
+
+/*
+ * Initial irq handlers.
+ */
+static struct sigaction irq_sigaction[16] = {
+	{ NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+	{ NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+	{ NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+	{ NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+	{ NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+	{ NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+	{ NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+	{ NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }
+};
+
+int get_irq_list(char *buf)
+{
+	int i, len = 0;
+	struct sigaction * sa = irq_sigaction;
+
+	for (i = 0 ; i < 16 ; i++, sa++) {
+		if (!sa->sa_handler)
+			continue;
+		len += sprintf(buf+len, "%2d: %8d %c %s\n",
+			i, kstat.interrupts[i],
+			(sa->sa_flags & SA_INTERRUPT) ? '+' : ' ',
+			(char *) sa->sa_mask);
+	}
+	return len;
+}
+
+/*
+ * do_IRQ handles IRQ's that have been installed without the
+ * SA_INTERRUPT flag: it uses the full signal-handling return
+ * and runs with other interrupts enabled. All relatively slow
+ * IRQ's should use this format: notably the keyboard/timer
+ * routines.
+ */
+asmlinkage void do_IRQ(int irq, struct pt_regs * regs)
+{
+	struct sigaction * sa = irq + irq_sigaction;
+
+	kstat.interrupts[irq]++;
+	sa->sa_handler((int) regs);
+}
+
+/*
+ * do_fast_IRQ handles IRQ's that don't need the fancy interrupt return
+ * stuff - the handler is also running with interrupts disabled unless
+ * it explicitly enables them later.
+ */
+asmlinkage void do_fast_IRQ(int irq)
+{
+	struct sigaction * sa = irq + irq_sigaction;
+
+	kstat.interrupts[irq]++;
+	sa->sa_handler(irq);
+}
+
+/*
+ * Using "struct sigaction" is slightly silly, but there
+ * are historical reasons and it works well, so..
+ */
+static int irqaction(unsigned int irq, struct sigaction * new_sa)
+{
+	struct sigaction * sa;
+	unsigned long flags;
+
+	if (irq > 15)
+		return -EINVAL;
+	sa = irq + irq_sigaction;
+	if (sa->sa_handler)
+		return -EBUSY;
+	if (!new_sa->sa_handler)
+		return -EINVAL;
+	save_flags(flags);
+	cli();
+	*sa = *new_sa;
+	if (sa->sa_flags & SA_INTERRUPT)
+		set_intr_gate(0x20+irq,fast_interrupt[irq]);
+	else
+		set_intr_gate(0x20+irq,interrupt[irq]);
+	if (irq < 8) {
+		cache_21 &= ~(1<<irq);
+		outb(cache_21,0x21);
+	} else {
+		cache_21 &= ~(1<<2);
+		cache_A1 &= ~(1<<(irq-8));
+		outb(cache_21,0x21);
+		outb(cache_A1,0xA1);
+	}
+	restore_flags(flags);
+	return 0;
+}
+		
+int request_irq(unsigned int irq, void (*handler)(int),
+	unsigned long flags, const char * devname)
+{
+	struct sigaction sa;
+
+	sa.sa_handler = handler;
+	sa.sa_flags = flags;
+	sa.sa_mask = (unsigned long) devname;
+	sa.sa_restorer = NULL;
+	return irqaction(irq,&sa);
+}
+
+void free_irq(unsigned int irq)
+{
+	struct sigaction * sa = irq + irq_sigaction;
+	unsigned long flags;
+
+	if (irq > 15) {
+		printk("Trying to free IRQ%d\n",irq);
+		return;
+	}
+	if (!sa->sa_handler) {
+		printk("Trying to free free IRQ%d\n",irq);
+		return;
+	}
+	save_flags(flags);
+	cli();
+	if (irq < 8) {
+		cache_21 |= 1 << irq;
+		outb(cache_21,0x21);
+	} else {
+		cache_A1 |= 1 << (irq-8);
+		outb(cache_A1,0xA1);
+	}
+	set_intr_gate(0x20+irq,bad_interrupt[irq]);
+	sa->sa_handler = NULL;
+	sa->sa_flags = 0;
+	sa->sa_mask = 0;
+	sa->sa_restorer = NULL;
+	restore_flags(flags);
+}
+
+/*
+ * Note that on a 486, we don't want to do a SIGFPE on a irq13
+ * as the irq is unreliable, and exception 16 works correctly
+ * (ie as explained in the intel literature). On a 386, you
+ * can't use exception 16 due to bad IBM design, so we have to
+ * rely on the less exact irq13.
+ *
+ * Careful.. Not only is IRQ13 unreliable, but it is also
+ * leads to races. IBM designers who came up with it should
+ * be shot.
+ */
+static void math_error_irq(int cpl)
+{
+	outb(0,0xF0);
+	if (ignore_irq13 || !hard_math)
+		return;
+	math_error();
+}
+
+static void no_action(int cpl) { }
+
+void init_IRQ(void)
+{
+	int i;
+
+	for (i = 0; i < 16 ; i++)
+		set_intr_gate(0x20+i,bad_interrupt[i]);
+	if (request_irq(2, no_action, SA_INTERRUPT, "cascade"))
+		printk("Unable to get IRQ2 for cascade\n");
+	if (request_irq(13,math_error_irq, 0, "math error"))
+		printk("Unable to get IRQ13 for math-error handler\n");
+
+	/* initialize the bottom half routines. */
+	for (i = 0; i < 32; i++) {
+		bh_base[i].routine = NULL;
+		bh_base[i].data = NULL;
+	}
+	bh_active = 0;
+	intr_count = 0;
+}
diff --git a/arch/i386/ldt.c b/arch/i386/ldt.c
new file mode 100644
index 000000000..dd0e477d4
--- /dev/null
+++ b/arch/i386/ldt.c
@@ -0,0 +1,103 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <linux/ldt.h>
+
+static int read_ldt(void * ptr, unsigned long bytecount)
+{
+	int error;
+	void * address = current->ldt;
+	unsigned long size;
+
+	if (!ptr)
+		return -EINVAL;
+	size = LDT_ENTRIES*LDT_ENTRY_SIZE;
+	if (!address) {
+		address = &default_ldt;
+		size = sizeof(default_ldt);
+	}
+	if (size > bytecount)
+		size = bytecount;
+	error = verify_area(VERIFY_WRITE, ptr, size);
+	if (error)
+		return error;
+	memcpy_tofs(ptr, address, size);
+	return size;
+}
+
+static int write_ldt(void * ptr, unsigned long bytecount)
+{
+	struct modify_ldt_ldt_s ldt_info;
+	unsigned long *lp;
+	unsigned long base, limit;
+	int error, i;
+
+	if (bytecount != sizeof(ldt_info))
+		return -EINVAL;
+	error = verify_area(VERIFY_READ, ptr, sizeof(ldt_info));
+	if (error)
+		return error;
+
+	memcpy_fromfs(&ldt_info, ptr, sizeof(ldt_info));
+
+	if (ldt_info.contents == 3 || ldt_info.entry_number >= LDT_ENTRIES)
+		return -EINVAL;
+
+	limit = ldt_info.limit;
+	base = ldt_info.base_addr;
+	if (ldt_info.limit_in_pages)
+		limit *= PAGE_SIZE;
+
+	limit += base;
+	if (limit < base || limit >= 0xC0000000)
+		return -EINVAL;
+
+	if (!current->ldt) {
+		for (i=1 ; i<NR_TASKS ; i++) {
+			if (task[i] == current) {
+				if (!(current->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE)))
+					return -ENOMEM;
+				set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, current->ldt, LDT_ENTRIES);
+				load_ldt(i);
+			}
+		}
+	}
+	
+	lp = (unsigned long *) &current->ldt[ldt_info.entry_number];
+   	/* Allow LDTs to be cleared by the user. */
+   	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+		*lp = 0;
+		*(lp+1) = 0;
+		return 0;
+	}
+	*lp = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+		  (ldt_info.limit & 0x0ffff);
+	*(lp+1) = (ldt_info.base_addr & 0xff000000) |
+		  ((ldt_info.base_addr & 0x00ff0000)>>16) |
+		  (ldt_info.limit & 0xf0000) |
+		  (ldt_info.contents << 10) |
+		  ((ldt_info.read_exec_only ^ 1) << 9) |
+		  (ldt_info.seg_32bit << 22) |
+		  (ldt_info.limit_in_pages << 23) |
+		  ((ldt_info.seg_not_present ^1) << 15) |
+		  0x7000;
+	return 0;
+}
+
+asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
+{
+	if (func == 0)
+		return read_ldt(ptr, bytecount);
+	if (func == 1)
+		return write_ldt(ptr, bytecount);
+	return -ENOSYS;
+}
diff --git a/arch/i386/main.c b/arch/i386/main.c
new file mode 100644
index 000000000..49606d4de
--- /dev/null
+++ b/arch/i386/main.c
@@ -0,0 +1,481 @@
+/*
+ *  linux/init/main.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <stdarg.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/head.h>
+#include <linux/unistd.h>
+#include <linux/string.h>
+#include <linux/timer.h>
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/utsname.h>
+#include <linux/ioport.h>
+
+extern unsigned long * prof_buffer;
+extern unsigned long prof_len;
+extern char edata, end;
+extern char *linux_banner;
+asmlinkage void lcall7(void);
+struct desc_struct default_ldt;
+
+/*
+ * we need this inline - forking from kernel space will result
+ * in NO COPY ON WRITE (!!!), until an execve is executed. This
+ * is no problem, but for the stack. This is handled by not letting
+ * main() use the stack at all after fork(). Thus, no function
+ * calls - which means inline code for fork too, as otherwise we
+ * would use the stack upon exit from 'fork()'.
+ *
+ * Actually only pause and fork are needed inline, so that there
+ * won't be any messing with the stack from main(), but we define
+ * some others too.
+ */
+#define __NR__exit __NR_exit
+static inline _syscall0(int,idle)
+static inline _syscall0(int,fork)
+static inline _syscall0(int,pause)
+static inline _syscall0(int,setup)
+static inline _syscall0(int,sync)
+static inline _syscall0(pid_t,setsid)
+static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
+static inline _syscall1(int,dup,int,fd)
+static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
+static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
+static inline _syscall1(int,close,int,fd)
+static inline _syscall1(int,_exit,int,exitcode)
+static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
+
+static inline pid_t wait(int * wait_stat)
+{
+	return waitpid(-1,wait_stat,0);
+}
+
+static char printbuf[1024];
+
+extern int console_loglevel;
+
+extern char empty_zero_page[PAGE_SIZE];
+extern void init(void);
+extern void init_IRQ(void);
+extern void init_modules(void);
+extern long console_init(long, long);
+extern long kmalloc_init(long,long);
+extern long blk_dev_init(long,long);
+extern long chr_dev_init(long,long);
+extern void sock_init(void);
+extern long rd_init(long mem_start, int length);
+unsigned long net_dev_init(unsigned long, unsigned long);
+extern long bios32_init(long, long);
+
+extern void hd_setup(char *str, int *ints);
+extern void bmouse_setup(char *str, int *ints);
+extern void eth_setup(char *str, int *ints);
+extern void xd_setup(char *str, int *ints);
+extern void mcd_setup(char *str, int *ints);
+extern void st_setup(char *str, int *ints);
+extern void st0x_setup(char *str, int *ints);
+extern void tmc8xx_setup(char *str, int *ints);
+extern void t128_setup(char *str, int *ints);
+extern void pas16_setup(char *str, int *ints);
+extern void generic_NCR5380_setup(char *str, int *intr);
+extern void aha152x_setup(char *str, int *ints);
+extern void aha1542_setup(char *str, int *ints);
+extern void aha274x_setup(char *str, int *ints);
+extern void scsi_luns_setup(char *str, int *ints);
+extern void sound_setup(char *str, int *ints);
+#ifdef CONFIG_SBPCD
+extern void sbpcd_setup(char *str, int *ints);
+#endif CONFIG_SBPCD
+#ifdef CONFIG_CDU31A
+extern void cdu31a_setup(char *str, int *ints);
+#endif CONFIG_CDU31A
+void ramdisk_setup(char *str, int *ints);
+
+#ifdef CONFIG_SYSVIPC
+extern void ipc_init(void);
+#endif
+#ifdef CONFIG_SCSI
+extern unsigned long scsi_dev_init(unsigned long, unsigned long);
+#endif
+
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+#define PARAM	empty_zero_page
+#define EXT_MEM_K (*(unsigned short *) (PARAM+2))
+#define DRIVE_INFO (*(struct drive_info_struct *) (PARAM+0x80))
+#define SCREEN_INFO (*(struct screen_info *) (PARAM+0))
+#define MOUNT_ROOT_RDONLY (*(unsigned short *) (PARAM+0x1F2))
+#define RAMDISK_SIZE (*(unsigned short *) (PARAM+0x1F8))
+#define ORIG_ROOT_DEV (*(unsigned short *) (PARAM+0x1FC))
+#define AUX_DEVICE_INFO (*(unsigned char *) (PARAM+0x1FF))
+
+/*
+ * Boot command-line arguments
+ */
+void copy_options(char * to, char * from);
+void parse_options(char *line);
+#define MAX_INIT_ARGS 8
+#define MAX_INIT_ENVS 8
+#define COMMAND_LINE ((char *) (PARAM+2048))
+#define COMMAND_LINE_SIZE 256
+
+extern void time_init(void);
+
+static unsigned long memory_start = 0;	/* After mem_init, stores the */
+					/* amount of free user memory */
+static unsigned long memory_end = 0;
+static unsigned long low_memory_start = 0;
+
+static char term[21];
+int rows, cols;
+
+static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+static char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", term, NULL, };
+
+static char * argv_rc[] = { "/bin/sh", NULL };
+static char * envp_rc[] = { "HOME=/", term, NULL };
+
+static char * argv[] = { "-/bin/sh",NULL };
+static char * envp[] = { "HOME=/usr/root", term, NULL };
+
+struct drive_info_struct { char dummy[32]; } drive_info;
+struct screen_info screen_info;
+
+unsigned char aux_device_present;
+int ramdisk_size;
+int root_mountflags = 0;
+
+static char fpu_error = 0;
+
+static char command_line[COMMAND_LINE_SIZE] = { 0, };
+
+char *get_options(char *str, int *ints)
+{
+	char *cur = str;
+	int i=1;
+
+	while (cur && isdigit(*cur) && i <= 10) {
+		ints[i++] = simple_strtoul(cur,NULL,0);
+		if ((cur = strchr(cur,',')) != NULL)
+			cur++;
+	}
+	ints[0] = i-1;
+	return(cur);
+}
+
+struct {
+	char *str;
+	void (*setup_func)(char *, int *);
+} bootsetups[] = {
+	{ "reserve=", reserve_setup },
+	{ "ramdisk=", ramdisk_setup },
+#ifdef CONFIG_INET
+	{ "ether=", eth_setup },
+#endif
+#ifdef CONFIG_SCSI
+	{ "max_scsi_luns=", scsi_luns_setup },
+#endif
+#ifdef CONFIG_BLK_DEV_HD
+	{ "hd=", hd_setup },
+#endif
+#ifdef CONFIG_CHR_DEV_ST
+	{ "st=", st_setup },
+#endif
+#ifdef CONFIG_BUSMOUSE
+	{ "bmouse=", bmouse_setup },
+#endif
+#ifdef CONFIG_SCSI_SEAGATE
+	{ "st0x=", st0x_setup },
+	{ "tmc8xx=", tmc8xx_setup },
+#endif
+#ifdef CONFIG_SCSI_T128
+	{ "t128=", t128_setup },
+#endif
+#ifdef CONFIG_SCSI_PAS16
+	{ "pas16=", pas16_setup },
+#endif
+#ifdef CONFIG_SCSI_GENERIC_NCR5380
+	{ "ncr5380=", generic_NCR5380_setup },
+#endif
+#ifdef CONFIG_SCSI_AHA152X
+        { "aha152x=", aha152x_setup},
+#endif
+#ifdef CONFIG_SCSI_AHA1542
+        { "aha1542=", aha1542_setup},
+#endif
+#ifdef CONFIG_SCSI_AHA274X
+        { "aha274x=", aha274x_setup},
+#endif
+#ifdef CONFIG_BLK_DEV_XD
+	{ "xd=", xd_setup },
+#endif
+#ifdef CONFIG_MCD
+	{ "mcd=", mcd_setup },
+#endif
+#ifdef CONFIG_SOUND
+	{ "sound=", sound_setup },
+#endif
+#ifdef CONFIG_SBPCD
+	{ "sbpcd=", sbpcd_setup },
+#endif CONFIG_SBPCD
+#ifdef CONFIG_CDU31A
+	{ "cdu31a=", cdu31a_setup },
+#endif CONFIG_CDU31A
+	{ 0, 0 }
+};
+
+void ramdisk_setup(char *str, int *ints)
+{
+   if (ints[0] > 0 && ints[1] >= 0)
+      ramdisk_size = ints[1];
+}
+
+static int checksetup(char *line)
+{
+	int i = 0;
+	int ints[11];
+
+	while (bootsetups[i].str) {
+		int n = strlen(bootsetups[i].str);
+		if (!strncmp(line,bootsetups[i].str,n)) {
+			bootsetups[i].setup_func(get_options(line+n,ints), ints);
+			return 1;
+		}
+		i++;
+	}
+	return 0;
+}
+
+unsigned long loops_per_sec = 1;
+
+static void calibrate_delay(void)
+{
+	int ticks;
+
+	printk("Calibrating delay loop.. ");
+	while (loops_per_sec <<= 1) {
+		ticks = jiffies;
+		__delay(loops_per_sec);
+		ticks = jiffies - ticks;
+		if (ticks >= HZ) {
+			__asm__("mull %1 ; divl %2"
+				:"=a" (loops_per_sec)
+				:"d" (HZ),
+				 "r" (ticks),
+				 "0" (loops_per_sec)
+				:"dx");
+			printk("ok - %lu.%02lu BogoMips\n",
+				loops_per_sec/500000,
+				(loops_per_sec/5000) % 100);
+			return;
+		}
+	}
+	printk("failed\n");
+}
+	
+/*
+ * parse machine depended options
+ */
+int parse_machine_options(char *line)
+{
+	if (!strcmp(line,"no-hlt")) {
+		hlt_works_ok = 0;
+		return 1;
+	}
+	if (!strcmp(line,"no387")) {
+		hard_math = 0;
+		__asm__("movl %%cr0,%%eax\n\t"
+			"orl $0xE,%%eax\n\t"
+			"movl %%eax,%%cr0\n\t" : : : "ax");
+		return 1;
+	}
+}
+
+static void copro_timeout(void)
+{
+	fpu_error = 1;
+	timer_table[COPRO_TIMER].expires = jiffies+100;
+	timer_active |= 1<<COPRO_TIMER;
+	printk("387 failed: trying to reset\n");
+	send_sig(SIGFPE, last_task_used_math, 1);
+	outb_p(0,0xf1);
+	outb_p(0,0xf0);
+}
+
+static void check_fpu(void)
+{
+	static double x = 4195835.0;
+	static double y = 3145727.0;
+	unsigned short control_word;
+	int i;
+
+	if (!hard_math) {
+#ifndef CONFIG_MATH_EMULATION
+		printk("No coprocessor found and no math emulation present.\n");
+		printk("Giving up.\n");
+		for (;;) ;
+#endif
+		return;
+	}
+	/*
+	 * check if exception 16 works correctly.. This is truly evil
+	 * code: it disables the high 8 interrupts to make sure that
+	 * the irq13 doesn't happen. But as this will lead to a lockup
+	 * if no exception16 arrives, it depends on the fact that the
+	 * high 8 interrupts will be re-enabled by the next timer tick.
+	 * So the irq13 will happen eventually, but the exception 16
+	 * should get there first..
+	 */
+	printk("Checking 386/387 coupling... ");
+	timer_table[COPRO_TIMER].expires = jiffies+50;
+	timer_table[COPRO_TIMER].fn = copro_timeout;
+	timer_active |= 1<<COPRO_TIMER;
+	__asm__("clts ; fninit ; fnstcw %0 ; fwait":"=m" (*&control_word));
+	control_word &= 0xffc0;
+	__asm__("fldcw %0 ; fwait": :"m" (*&control_word));
+	outb_p(inb_p(0x21) | (1 << 2), 0x21);
+	__asm__("fldz ; fld1 ; fdiv %st,%st(1) ; fwait");
+	timer_active &= ~(1<<COPRO_TIMER);
+	if (fpu_error)
+		return;
+	if (!ignore_irq13) {
+		printk("Ok, fpu using old IRQ13 error reporting\n");
+		return;
+	}
+	__asm__("fninit\n\t"
+		"fldl %1\n\t"
+		"fdivl %2\n\t"
+		"fmull %2\n\t"
+		"fldl %1\n\t"
+		"fsubp %%st,%%st(1)\n\t"
+		"fistpl %0\n\t"
+		"fwait\n\t"
+		"fninit"
+		: "=m" (*&i)
+		: "m" (*&x), "m" (*&y));
+	if (!i) {
+		printk("Ok, fpu using exception 16 error reporting.\n");
+		return;
+
+	}
+	printk("Ok, FDIV bug i%c86 system\n", '0'+x86);
+}
+
+static void check_hlt(void)
+{
+	printk("Checking 'hlt' instruction... ");
+	if (!hlt_works_ok) {
+		printk("disabled\n");
+		return;
+	}
+	__asm__ __volatile__("hlt ; hlt ; hlt ; hlt");
+	printk("Ok.\n");
+}
+
+static void check_bugs(void)
+{
+	check_fpu();
+	check_hlt();
+}
+
+asmlinkage void start_kernel(void)
+{
+/*
+ * Interrupts are still disabled. Do necessary setups, then
+ * enable them
+ */
+	set_call_gate(&default_ldt,lcall7);
+ 	ROOT_DEV = ORIG_ROOT_DEV;
+ 	drive_info = DRIVE_INFO;
+ 	screen_info = SCREEN_INFO;
+	aux_device_present = AUX_DEVICE_INFO;
+	memory_end = (1<<20) + (EXT_MEM_K<<10);
+	memory_end &= PAGE_MASK;
+	ramdisk_size = RAMDISK_SIZE;
+	copy_options(command_line,COMMAND_LINE);
+#ifdef CONFIG_MAX_16M
+	if (memory_end > 16*1024*1024)
+		memory_end = 16*1024*1024;
+#endif
+	if (MOUNT_ROOT_RDONLY)
+		root_mountflags |= MS_RDONLY;
+	if ((unsigned long)&end >= (1024*1024)) {
+		memory_start = (unsigned long) &end;
+		low_memory_start = PAGE_SIZE;
+	} else {
+		memory_start = 1024*1024;
+		low_memory_start = (unsigned long) &end;
+	}
+	low_memory_start = PAGE_ALIGN(low_memory_start);
+	memory_start = paging_init(memory_start,memory_end);
+	if (strncmp((char*)0x0FFFD9, "EISA", 4) == 0)
+		EISA_bus = 1;
+	trap_init();
+	init_IRQ();
+	sched_init();
+	parse_options(command_line);
+	init_modules();
+#ifdef CONFIG_PROFILE
+	prof_buffer = (unsigned long *) memory_start;
+	prof_len = (unsigned long) &end;
+	prof_len >>= 2;
+	memory_start += prof_len * sizeof(unsigned long);
+#endif
+	memory_start = console_init(memory_start,memory_end);
+	memory_start = bios32_init(memory_start,memory_end);
+	memory_start = kmalloc_init(memory_start,memory_end);
+	memory_start = chr_dev_init(memory_start,memory_end);
+	memory_start = blk_dev_init(memory_start,memory_end);
+	sti();
+	calibrate_delay();
+#ifdef CONFIG_SCSI
+	memory_start = scsi_dev_init(memory_start,memory_end);
+#endif
+#ifdef CONFIG_INET
+	memory_start = net_dev_init(memory_start,memory_end);
+#endif
+	memory_start = inode_init(memory_start,memory_end);
+	memory_start = file_table_init(memory_start,memory_end);
+	memory_start = name_cache_init(memory_start,memory_end);
+	mem_init(low_memory_start,memory_start,memory_end);
+	buffer_init();
+	time_init();
+	sock_init();
+#ifdef CONFIG_SYSVIPC
+	ipc_init();
+#endif
+	sti();
+	check_bugs();
+
+	system_utsname.machine[1] = '0' + x86;
+	printk(linux_banner);
+
+	move_to_user_mode();
+	if (!fork())		/* we count on this going ok */
+		init();
+/*
+ * task[0] is meant to be used as an "idle" task: it may not sleep, but
+ * it might do some general things like count free pages or it could be
+ * used to implement a reasonable LRU algorithm for the paging routines:
+ * anything that can be useful, but shouldn't take time from the real
+ * processes.
+ *
+ * Right now task[0] just does a infinite idle loop.
+ */
+	for(;;)
+		idle();
+}
diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile
new file mode 100644
index 000000000..5063d60c2
--- /dev/null
+++ b/arch/i386/mm/Makefile
@@ -0,0 +1,30 @@
+#
+# Makefile for the linux memory manager.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+.c.o:
+	$(CC) $(CFLAGS) -c $<
+.s.o:
+	$(AS) -o $*.o $<
+.c.s:
+	$(CC) $(CFLAGS) -S $<
+
+OBJS	= memory.o swap.o mmap.o mprotect.o kmalloc.o vmalloc.o
+
+mm.o: $(OBJS)
+	$(LD) -r -o mm.o $(OBJS)
+
+dep:
+	$(CPP) -M *.c > .depend
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff --git a/arch/i386/mm/kmalloc.c b/arch/i386/mm/kmalloc.c
new file mode 100644
index 000000000..018f8db8f
--- /dev/null
+++ b/arch/i386/mm/kmalloc.c
@@ -0,0 +1,362 @@
+/*
+ *  linux/mm/kmalloc.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds & Roger Wolff.
+ *
+ *  Written by R.E. Wolff Sept/Oct '93.
+ *
+ */
+
+/*
+ * Modified by Alex Bligh (alex@cconcepts.co.uk) 4 Apr 1994 to use multiple
+ * pages. So for 'page' throughout, read 'area'.
+ */
+
+#include <linux/mm.h>
+#include <asm/system.h>
+#include <linux/delay.h>
+
+#define GFP_LEVEL_MASK 0xf
+
+/* I want this low enough for a while to catch errors.
+   I want this number to be increased in the near future:
+        loadable device drivers should use this function to get memory */
+
+#define MAX_KMALLOC_K ((PAGE_SIZE<<(NUM_AREA_ORDERS-1))>>10)
+
+
+/* This defines how many times we should try to allocate a free page before
+   giving up. Normally this shouldn't happen at all. */
+#define MAX_GET_FREE_PAGE_TRIES 4
+
+
+/* Private flags. */
+
+#define MF_USED 0xffaa0055
+#define MF_FREE 0x0055ffaa
+
+
+/* 
+ * Much care has gone into making these routines in this file reentrant.
+ *
+ * The fancy bookkeeping of nbytesmalloced and the like are only used to
+ * report them to the user (oooohhhhh, aaaaahhhhh....) are not 
+ * protected by cli(). (If that goes wrong. So what?)
+ *
+ * These routines restore the interrupt status to allow calling with ints
+ * off. 
+ */
+
+/* 
+ * A block header. This is in front of every malloc-block, whether free or not.
+ */
+struct block_header {
+	unsigned long bh_flags;
+	union {
+		unsigned long ubh_length;
+		struct block_header *fbh_next;
+	} vp;
+};
+
+
+#define bh_length vp.ubh_length
+#define bh_next   vp.fbh_next
+#define BH(p) ((struct block_header *)(p))
+
+
+/* 
+ * The page descriptor is at the front of every page that malloc has in use. 
+ */
+struct page_descriptor {
+	struct page_descriptor *next;
+	struct block_header *firstfree;
+	int order;
+	int nfree;
+};
+
+
+#define PAGE_DESC(p) ((struct page_descriptor *)(((unsigned long)(p)) & PAGE_MASK))
+
+
+/*
+ * A size descriptor describes a specific class of malloc sizes.
+ * Each class of sizes has its own freelist.
+ */
+struct size_descriptor {
+	struct page_descriptor *firstfree;
+	int size;
+	int nblocks;
+
+	int nmallocs;
+	int nfrees;
+	int nbytesmalloced;
+	int npages;
+	unsigned long gfporder; /* number of pages in the area required */
+};
+
+/*
+ * For now it is unsafe to allocate bucket sizes between n & n=16 where n is
+ * 4096 * any power of two
+ */
+
+struct size_descriptor sizes[] = { 
+	{ NULL,  32,127, 0,0,0,0, 0},
+	{ NULL,  64, 63, 0,0,0,0, 0 },
+	{ NULL, 128, 31, 0,0,0,0, 0 },
+	{ NULL, 252, 16, 0,0,0,0, 0 },
+	{ NULL, 508,  8, 0,0,0,0, 0 },
+	{ NULL,1020,  4, 0,0,0,0, 0 },
+	{ NULL,2040,  2, 0,0,0,0, 0 },
+	{ NULL,4096-16,  1, 0,0,0,0, 0 },
+	{ NULL,8192-16,  1, 0,0,0,0, 1 },
+	{ NULL,16384-16,  1, 0,0,0,0, 2 },
+	{ NULL,32768-16,  1, 0,0,0,0, 3 },
+	{ NULL,65536-16,  1, 0,0,0,0, 4 },
+	{ NULL,131072-16,  1, 0,0,0,0, 5 },
+	{ NULL,   0,  0, 0,0,0,0, 0 }
+};
+
+
+#define NBLOCKS(order)          (sizes[order].nblocks)
+#define BLOCKSIZE(order)        (sizes[order].size)
+#define AREASIZE(order)		(PAGE_SIZE<<(sizes[order].gfporder))
+
+
+long kmalloc_init (long start_mem,long end_mem)
+{
+	int order;
+
+/* 
+ * Check the static info array. Things will blow up terribly if it's
+ * incorrect. This is a late "compile time" check.....
+ */
+for (order = 0;BLOCKSIZE(order);order++)
+    {
+    if ((NBLOCKS (order)*BLOCKSIZE(order) + sizeof (struct page_descriptor)) >
+        AREASIZE(order)) 
+        {
+        printk ("Cannot use %d bytes out of %d in order = %d block mallocs\n",
+                NBLOCKS (order) * BLOCKSIZE(order) + 
+                        sizeof (struct page_descriptor),
+                (int) AREASIZE(order),
+                BLOCKSIZE (order));
+        panic ("This only happens if someone messes with kmalloc");
+        }
+    }
+return start_mem;
+}
+
+
+
+int get_order (int size)
+{
+	int order;
+
+	/* Add the size of the header */
+	size += sizeof (struct block_header); 
+	for (order = 0;BLOCKSIZE(order);order++)
+		if (size <= BLOCKSIZE (order))
+			return order; 
+	return -1;
+}
+
+void * kmalloc (size_t size, int priority)
+{
+	unsigned long flags;
+	int order,tries,i,sz;
+	struct block_header *p;
+	struct page_descriptor *page;
+
+/* Sanity check... */
+	if (intr_count && priority != GFP_ATOMIC) {
+		static int count = 0;
+		if (++count < 5) {
+			printk("kmalloc called nonatomically from interrupt %p\n",
+				__builtin_return_address(0));
+			priority = GFP_ATOMIC;
+		}
+	}
+
+order = get_order (size);
+if (order < 0)
+    {
+    printk ("kmalloc of too large a block (%d bytes).\n",size);
+    return (NULL);
+    }
+
+save_flags(flags);
+
+/* It seems VERY unlikely to me that it would be possible that this 
+   loop will get executed more than once. */
+tries = MAX_GET_FREE_PAGE_TRIES; 
+while (tries --)
+    {
+    /* Try to allocate a "recently" freed memory block */
+    cli ();
+    if ((page = sizes[order].firstfree) &&
+        (p    =  page->firstfree))
+        {
+        if (p->bh_flags == MF_FREE)
+            {
+            page->firstfree = p->bh_next;
+            page->nfree--;
+            if (!page->nfree)
+                {
+                sizes[order].firstfree = page->next;
+                page->next = NULL;
+                }
+            restore_flags(flags);
+
+            sizes [order].nmallocs++;
+            sizes [order].nbytesmalloced += size;
+            p->bh_flags =  MF_USED; /* As of now this block is officially in use */
+            p->bh_length = size;
+            return p+1; /* Pointer arithmetic: increments past header */
+            }
+        printk ("Problem: block on freelist at %08lx isn't free.\n",(long)p);
+        return (NULL);
+        }
+    restore_flags(flags);
+
+
+    /* Now we're in trouble: We need to get a new free page..... */
+
+    sz = BLOCKSIZE(order); /* sz is the size of the blocks we're dealing with */
+
+    /* This can be done with ints on: This is private to this invocation */
+    page = (struct page_descriptor *) __get_free_pages (priority & GFP_LEVEL_MASK, sizes[order].gfporder);
+    if (!page) {
+        static unsigned long last = 0;
+        if (last + 10*HZ < jiffies) {
+        	last = jiffies;
+	        printk ("Couldn't get a free page.....\n");
+	}
+        return NULL;
+    }
+#if 0
+    printk ("Got page %08x to use for %d byte mallocs....",(long)page,sz);
+#endif
+    sizes[order].npages++;
+
+    /* Loop for all but last block: */
+    for (i=NBLOCKS(order),p=BH (page+1);i > 1;i--,p=p->bh_next) 
+        {
+        p->bh_flags = MF_FREE;
+        p->bh_next = BH ( ((long)p)+sz);
+        }
+    /* Last block: */
+    p->bh_flags = MF_FREE;
+    p->bh_next = NULL;
+
+    page->order = order;
+    page->nfree = NBLOCKS(order); 
+    page->firstfree = BH(page+1);
+#if 0
+    printk ("%d blocks per page\n",page->nfree);
+#endif
+    /* Now we're going to muck with the "global" freelist for this size:
+       this should be uninterruptible */
+    cli ();
+    /* 
+     * sizes[order].firstfree used to be NULL, otherwise we wouldn't be
+     * here, but you never know.... 
+     */
+    page->next = sizes[order].firstfree;
+    sizes[order].firstfree = page;
+    restore_flags(flags);
+    }
+
+/* Pray that printk won't cause this to happen again :-) */
+
+printk ("Hey. This is very funny. I tried %d times to allocate a whole\n"
+        "new page for an object only %d bytes long, but some other process\n"
+        "beat me to actually allocating it. Also note that this 'error'\n"
+        "message is soooo very long to catch your attention. I'd appreciate\n"
+        "it if you'd be so kind as to report what conditions caused this to\n"
+        "the author of this kmalloc: wolff@dutecai.et.tudelft.nl.\n"
+        "(Executive summary: This can't happen)\n", 
+                MAX_GET_FREE_PAGE_TRIES,
+                size);
+return NULL;
+}
+
+
+void kfree_s (void *ptr,int size)
+{
+unsigned long flags;
+int order;
+register struct block_header *p=((struct block_header *)ptr) -1;
+struct page_descriptor *page,*pg2;
+
+page = PAGE_DESC (p);
+order = page->order;
+if ((order < 0) || 
+    (order > sizeof (sizes)/sizeof (sizes[0])) ||
+    (((long)(page->next)) & ~PAGE_MASK) ||
+    (p->bh_flags != MF_USED))
+    {
+    printk ("kfree of non-kmalloced memory: %p, next= %p, order=%d\n",
+                p, page->next, page->order);
+    return;
+    }
+if (size &&
+    size != p->bh_length)
+    {
+    printk ("Trying to free pointer at %p with wrong size: %d instead of %lu.\n",
+        p,size,p->bh_length);
+    return;
+    }
+size = p->bh_length;
+p->bh_flags = MF_FREE; /* As of now this block is officially free */
+save_flags(flags);
+cli ();
+p->bh_next = page->firstfree;
+page->firstfree = p;
+page->nfree ++;
+
+if (page->nfree == 1)
+   { /* Page went from full to one free block: put it on the freelist */
+   if (page->next)
+        {
+        printk ("Page %p already on freelist dazed and confused....\n", page);
+        }
+   else
+        {
+        page->next = sizes[order].firstfree;
+        sizes[order].firstfree = page;
+        }
+   }
+
+/* If page is completely free, free it */
+if (page->nfree == NBLOCKS (page->order))
+    {
+#if 0
+    printk ("Freeing page %08x.\n", (long)page);
+#endif
+    if (sizes[order].firstfree == page)
+        {
+        sizes[order].firstfree = page->next;
+        }
+    else
+        {
+        for (pg2=sizes[order].firstfree;
+                (pg2 != NULL) && (pg2->next != page);
+                        pg2=pg2->next)
+            /* Nothing */;
+        if (pg2 != NULL)
+            pg2->next = page->next;
+        else
+            printk ("Ooops. page %p doesn't show on freelist.\n", page);
+        }
+/* FIXME: I'm sure we should do something with npages here (like npages--) */
+    free_pages ((long)page, sizes[order].gfporder);
+    }
+restore_flags(flags);
+
+/* FIXME: ?? Are these increment & decrement operations guaranteed to be
+ *	     atomic? Could an IRQ not occur between the read & the write?
+ *	     Maybe yes on a x86 with GCC...??
+ */
+sizes[order].nfrees++;      /* Noncritical (monitoring) admin stuff */
+sizes[order].nbytesmalloced -= size;
+}
diff --git a/arch/i386/mm/memory.c b/arch/i386/mm/memory.c
new file mode 100644
index 000000000..3e5a67041
--- /dev/null
+++ b/arch/i386/mm/memory.c
@@ -0,0 +1,1320 @@
+/*
+ *  linux/mm/memory.c
+ *
+ *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+/*
+ * demand-loading started 01.12.91 - seems it is high on the list of
+ * things wanted, and it should be easy to implement. - Linus
+ */
+
+/*
+ * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
+ * pages started 02.12.91, seems to work. - Linus.
+ *
+ * Tested sharing by executing about 30 /bin/sh: under the old kernel it
+ * would have taken more than the 6M I have free, but it worked well as
+ * far as I could see.
+ *
+ * Also corrected some "invalidate()"s - I wasn't doing enough of them.
+ */
+
+/*
+ * Real VM (paging to/from disk) started 18.12.91. Much more work and
+ * thought has to go into this. Oh, well..
+ * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
+ *		Found it. Everything seems to work now.
+ * 20.12.91  -  Ok, making the swap-device changeable like the root.
+ */
+
+/*
+ * 05.04.94  -  Multi-page memory management added for v1.1.
+ * 		Idea by Alex Bligh (alex@cconcepts.co.uk)
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/head.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+
+/*
+ * Define this if things work differently on a i386 and a i486:
+ * it will (on a i486) warn about kernel memory accesses that are
+ * done without a 'verify_area(VERIFY_WRITE,..)'
+ */
+#undef CONFIG_TEST_VERIFY_AREA
+
+unsigned long high_memory = 0;
+
+extern unsigned long pg0[1024];		/* page table for 0-4MB for everybody */
+
+extern void sound_mem_init(void);
+extern void die_if_kernel(char *,struct pt_regs *,long);
+extern void show_net_buffers(void);
+
+/*
+ * The free_area_list arrays point to the queue heads of the free areas
+ * of different sizes
+ */
+int nr_swap_pages = 0;
+int nr_free_pages = 0;
+struct mem_list free_area_list[NR_MEM_LISTS];
+unsigned char * free_area_map[NR_MEM_LISTS];
+
+#define copy_page(from,to) \
+__asm__("cld ; rep ; movsl": :"S" (from),"D" (to),"c" (1024):"cx","di","si")
+
+unsigned short * mem_map = NULL;
+
+#define CODE_SPACE(addr,p) ((addr) < (p)->end_code)
+
+/*
+ * oom() prints a message (so that the user knows why the process died),
+ * and gives the process an untrappable SIGKILL.
+ */
+void oom(struct task_struct * task)
+{
+	printk("\nOut of memory.\n");
+	task->sigaction[SIGKILL-1].sa_handler = NULL;
+	task->blocked &= ~(1<<(SIGKILL-1));
+	send_sig(SIGKILL,task,1);
+}
+
+static void free_one_table(unsigned long * page_dir)
+{
+	int j;
+	unsigned long pg_table = *page_dir;
+	unsigned long * page_table;
+
+	if (!pg_table)
+		return;
+	*page_dir = 0;
+	if (pg_table >= high_memory || !(pg_table & PAGE_PRESENT)) {
+		printk("Bad page table: [%p]=%08lx\n",page_dir,pg_table);
+		return;
+	}
+	if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
+		return;
+	page_table = (unsigned long *) (pg_table & PAGE_MASK);
+	for (j = 0 ; j < PTRS_PER_PAGE ; j++,page_table++) {
+		unsigned long pg = *page_table;
+		
+		if (!pg)
+			continue;
+		*page_table = 0;
+		if (pg & PAGE_PRESENT)
+			free_page(PAGE_MASK & pg);
+		else
+			swap_free(pg);
+	}
+	free_page(PAGE_MASK & pg_table);
+}
+
+/*
+ * This function clears all user-level page tables of a process - this
+ * is needed by execve(), so that old pages aren't in the way. Note that
+ * unlike 'free_page_tables()', this function still leaves a valid
+ * page-table-tree in memory: it just removes the user pages. The two
+ * functions are similar, but there is a fundamental difference.
+ */
+void clear_page_tables(struct task_struct * tsk)
+{
+	int i;
+	unsigned long pg_dir;
+	unsigned long * page_dir;
+
+	if (!tsk)
+		return;
+	if (tsk == task[0])
+		panic("task[0] (swapper) doesn't support exec()\n");
+	pg_dir = tsk->tss.cr3;
+	page_dir = (unsigned long *) pg_dir;
+	if (!page_dir || page_dir == swapper_pg_dir) {
+		printk("Trying to clear kernel page-directory: not good\n");
+		return;
+	}
+	if (mem_map[MAP_NR(pg_dir)] > 1) {
+		unsigned long * new_pg;
+
+		if (!(new_pg = (unsigned long*) get_free_page(GFP_KERNEL))) {
+			oom(tsk);
+			return;
+		}
+		for (i = 768 ; i < 1024 ; i++)
+			new_pg[i] = page_dir[i];
+		free_page(pg_dir);
+		tsk->tss.cr3 = (unsigned long) new_pg;
+		return;
+	}
+	for (i = 0 ; i < 768 ; i++,page_dir++)
+		free_one_table(page_dir);
+	invalidate();
+	return;
+}
+
+/*
+ * This function frees up all page tables of a process when it exits.
+ */
+void free_page_tables(struct task_struct * tsk)
+{
+	int i;
+	unsigned long pg_dir;
+	unsigned long * page_dir;
+
+	if (!tsk)
+		return;
+	if (tsk == task[0]) {
+		printk("task[0] (swapper) killed: unable to recover\n");
+		panic("Trying to free up swapper memory space");
+	}
+	pg_dir = tsk->tss.cr3;
+	if (!pg_dir || pg_dir == (unsigned long) swapper_pg_dir) {
+		printk("Trying to free kernel page-directory: not good\n");
+		return;
+	}
+	tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
+	if (tsk == current)
+		__asm__ __volatile__("movl %0,%%cr3": :"a" (tsk->tss.cr3));
+	if (mem_map[MAP_NR(pg_dir)] > 1) {
+		free_page(pg_dir);
+		return;
+	}
+	page_dir = (unsigned long *) pg_dir;
+	for (i = 0 ; i < PTRS_PER_PAGE ; i++,page_dir++)
+		free_one_table(page_dir);
+	free_page(pg_dir);
+	invalidate();
+}
+
+/*
+ * clone_page_tables() clones the page table for a process - both
+ * processes will have the exact same pages in memory. There are
+ * probably races in the memory management with cloning, but we'll
+ * see..
+ */
+int clone_page_tables(struct task_struct * tsk)
+{
+	unsigned long pg_dir;
+
+	pg_dir = current->tss.cr3;
+	mem_map[MAP_NR(pg_dir)]++;
+	tsk->tss.cr3 = pg_dir;
+	return 0;
+}
+
+/*
+ * copy_page_tables() just copies the whole process memory range:
+ * note the special handling of RESERVED (ie kernel) pages, which
+ * means that they are always shared by all processes.
+ */
+int copy_page_tables(struct task_struct * tsk)
+{
+	int i;
+	unsigned long old_pg_dir, *old_page_dir;
+	unsigned long new_pg_dir, *new_page_dir;
+
+	if (!(new_pg_dir = get_free_page(GFP_KERNEL)))
+		return -ENOMEM;
+	old_pg_dir = current->tss.cr3;
+	tsk->tss.cr3 = new_pg_dir;
+	old_page_dir = (unsigned long *) old_pg_dir;
+	new_page_dir = (unsigned long *) new_pg_dir;
+	for (i = 0 ; i < PTRS_PER_PAGE ; i++,old_page_dir++,new_page_dir++) {
+		int j;
+		unsigned long old_pg_table, *old_page_table;
+		unsigned long new_pg_table, *new_page_table;
+
+		old_pg_table = *old_page_dir;
+		if (!old_pg_table)
+			continue;
+		if (old_pg_table >= high_memory || !(old_pg_table & PAGE_PRESENT)) {
+			printk("copy_page_tables: bad page table: "
+				"probable memory corruption\n");
+			*old_page_dir = 0;
+			continue;
+		}
+		if (mem_map[MAP_NR(old_pg_table)] & MAP_PAGE_RESERVED) {
+			*new_page_dir = old_pg_table;
+			continue;
+		}
+		if (!(new_pg_table = get_free_page(GFP_KERNEL))) {
+			free_page_tables(tsk);
+			return -ENOMEM;
+		}
+		old_page_table = (unsigned long *) (PAGE_MASK & old_pg_table);
+		new_page_table = (unsigned long *) (PAGE_MASK & new_pg_table);
+		for (j = 0 ; j < PTRS_PER_PAGE ; j++,old_page_table++,new_page_table++) {
+			unsigned long pg;
+			pg = *old_page_table;
+			if (!pg)
+				continue;
+			if (!(pg & PAGE_PRESENT)) {
+				*new_page_table = swap_duplicate(pg);
+				continue;
+			}
+			if (pg > high_memory || (mem_map[MAP_NR(pg)] & MAP_PAGE_RESERVED)) {
+				*new_page_table = pg;
+				continue;
+			}
+			if (pg & PAGE_COW)
+				pg &= ~PAGE_RW;
+			if (delete_from_swap_cache(pg))
+				pg |= PAGE_DIRTY;
+			*new_page_table = pg;
+			*old_page_table = pg;
+			mem_map[MAP_NR(pg)]++;
+		}
+		*new_page_dir = new_pg_table | PAGE_TABLE;
+	}
+	invalidate();
+	return 0;
+}
+
+/*
+ * a more complete version of free_page_tables which performs with page
+ * granularity.
+ */
+int unmap_page_range(unsigned long from, unsigned long size)
+{
+	unsigned long page, page_dir;
+	unsigned long *page_table, *dir;
+	unsigned long poff, pcnt, pc;
+
+	if (from & ~PAGE_MASK) {
+		printk("unmap_page_range called with wrong alignment\n");
+		return -EINVAL;
+	}
+	size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
+	dir = PAGE_DIR_OFFSET(current->tss.cr3,from);
+	poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
+	if ((pcnt = PTRS_PER_PAGE - poff) > size)
+		pcnt = size;
+
+	for ( ; size > 0; ++dir, size -= pcnt,
+	     pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size)) {
+		if (!(page_dir = *dir))	{
+			poff = 0;
+			continue;
+		}
+		if (!(page_dir & PAGE_PRESENT)) {
+			printk("unmap_page_range: bad page directory.");
+			continue;
+		}
+		page_table = (unsigned long *)(PAGE_MASK & page_dir);
+		if (poff) {
+			page_table += poff;
+			poff = 0;
+		}
+		for (pc = pcnt; pc--; page_table++) {
+			if ((page = *page_table) != 0) {
+				*page_table = 0;
+				if (PAGE_PRESENT & page) {
+					if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
+						if (current->mm->rss > 0)
+							--current->mm->rss;
+					free_page(PAGE_MASK & page);
+				} else
+					swap_free(page);
+			}
+		}
+		if (pcnt == PTRS_PER_PAGE) {
+			*dir = 0;
+			free_page(PAGE_MASK & page_dir);
+		}
+	}
+	invalidate();
+	return 0;
+}
+
+int zeromap_page_range(unsigned long from, unsigned long size, int mask)
+{
+	unsigned long *page_table, *dir;
+	unsigned long poff, pcnt;
+	unsigned long page;
+
+	if (mask) {
+		if ((mask & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT) {
+			printk("zeromap_page_range: mask = %08x\n",mask);
+			return -EINVAL;
+		}
+		mask |= ZERO_PAGE;
+	}
+	if (from & ~PAGE_MASK) {
+		printk("zeromap_page_range: from = %08lx\n",from);
+		return -EINVAL;
+	}
+	dir = PAGE_DIR_OFFSET(current->tss.cr3,from);
+	size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
+	poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
+	if ((pcnt = PTRS_PER_PAGE - poff) > size)
+		pcnt = size;
+
+	while (size > 0) {
+		if (!(PAGE_PRESENT & *dir)) {
+				/* clear page needed here?  SRB. */
+			if (!(page_table = (unsigned long*) get_free_page(GFP_KERNEL))) {
+				invalidate();
+				return -ENOMEM;
+			}
+			if (PAGE_PRESENT & *dir) {
+				free_page((unsigned long) page_table);
+				page_table = (unsigned long *)(PAGE_MASK & *dir++);
+			} else
+				*dir++ = ((unsigned long) page_table) | PAGE_TABLE;
+		} else
+			page_table = (unsigned long *)(PAGE_MASK & *dir++);
+		page_table += poff;
+		poff = 0;
+		for (size -= pcnt; pcnt-- ;) {
+			if ((page = *page_table) != 0) {
+				*page_table = 0;
+				if (page & PAGE_PRESENT) {
+					if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
+						if (current->mm->rss > 0)
+							--current->mm->rss;
+					free_page(PAGE_MASK & page);
+				} else
+					swap_free(page);
+			}
+			*page_table++ = mask;
+		}
+		pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size);
+	}
+	invalidate();
+	return 0;
+}
+
+/*
+ * maps a range of physical memory into the requested pages. the old
+ * mappings are removed. any references to nonexistent pages results
+ * in null mappings (currently treated as "copy-on-access")
+ */
+int remap_page_range(unsigned long from, unsigned long to, unsigned long size, int mask)
+{
+	unsigned long *page_table, *dir;
+	unsigned long poff, pcnt;
+	unsigned long page;
+
+	if (mask) {
+		if ((mask & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT) {
+			printk("remap_page_range: mask = %08x\n",mask);
+			return -EINVAL;
+		}
+	}
+	if ((from & ~PAGE_MASK) || (to & ~PAGE_MASK)) {
+		printk("remap_page_range: from = %08lx, to=%08lx\n",from,to);
+		return -EINVAL;
+	}
+	dir = PAGE_DIR_OFFSET(current->tss.cr3,from);
+	size = (size + ~PAGE_MASK) >> PAGE_SHIFT;
+	poff = (from >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
+	if ((pcnt = PTRS_PER_PAGE - poff) > size)
+		pcnt = size;
+
+	while (size > 0) {
+		if (!(PAGE_PRESENT & *dir)) {
+			/* clearing page here, needed?  SRB. */
+			if (!(page_table = (unsigned long*) get_free_page(GFP_KERNEL))) {
+				invalidate();
+				return -1;
+			}
+			*dir++ = ((unsigned long) page_table) | PAGE_TABLE;
+		}
+		else
+			page_table = (unsigned long *)(PAGE_MASK & *dir++);
+		if (poff) {
+			page_table += poff;
+			poff = 0;
+		}
+
+		for (size -= pcnt; pcnt-- ;) {
+			if ((page = *page_table) != 0) {
+				*page_table = 0;
+				if (PAGE_PRESENT & page) {
+					if (!(mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED))
+						if (current->mm->rss > 0)
+							--current->mm->rss;
+					free_page(PAGE_MASK & page);
+				} else
+					swap_free(page);
+			}
+
+			/*
+			 * the first condition should return an invalid access
+			 * when the page is referenced. current assumptions
+			 * cause it to be treated as demand allocation in some
+			 * cases.
+			 */
+			if (!mask)
+				*page_table++ = 0;	/* not present */
+			else if (to >= high_memory)
+				*page_table++ = (to | mask);
+			else if (!mem_map[MAP_NR(to)])
+				*page_table++ = 0;	/* not present */
+			else {
+				*page_table++ = (to | mask);
+				if (!(mem_map[MAP_NR(to)] & MAP_PAGE_RESERVED)) {
+					++current->mm->rss;
+					mem_map[MAP_NR(to)]++;
+				}
+			}
+			to += PAGE_SIZE;
+		}
+		pcnt = (size > PTRS_PER_PAGE ? PTRS_PER_PAGE : size);
+	}
+	invalidate();
+	return 0;
+}
+
+/*
+ * This function puts a page in memory at the wanted address.
+ * It returns the physical address of the page gotten, 0 if
+ * out of memory (either when trying to access page-table or
+ * page.)
+ */
+unsigned long put_page(struct task_struct * tsk,unsigned long page,
+	unsigned long address,int prot)
+{
+	unsigned long *page_table;
+
+	if ((prot & (PAGE_MASK|PAGE_PRESENT)) != PAGE_PRESENT)
+		printk("put_page: prot = %08x\n",prot);
+	if (page >= high_memory) {
+		printk("put_page: trying to put page %08lx at %08lx\n",page,address);
+		return 0;
+	}
+	page_table = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
+	if ((*page_table) & PAGE_PRESENT)
+		page_table = (unsigned long *) (PAGE_MASK & *page_table);
+	else {
+		printk("put_page: bad page directory entry\n");
+		oom(tsk);
+		*page_table = BAD_PAGETABLE | PAGE_TABLE;
+		return 0;
+	}
+	page_table += (address >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
+	if (*page_table) {
+		printk("put_page: page already exists\n");
+		*page_table = 0;
+		invalidate();
+	}
+	*page_table = page | prot;
+/* no need for invalidate */
+	return page;
+}
+
+/*
+ * The previous function doesn't work very well if you also want to mark
+ * the page dirty: exec.c wants this, as it has earlier changed the page,
+ * and we want the dirty-status to be correct (for VM). Thus the same
+ * routine, but this time we mark it dirty too.
+ */
+unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
+{
+	unsigned long tmp, *page_table;
+
+	if (page >= high_memory)
+		printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
+	if (mem_map[MAP_NR(page)] != 1)
+		printk("mem_map disagrees with %08lx at %08lx\n",page,address);
+	page_table = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
+	if (PAGE_PRESENT & *page_table)
+		page_table = (unsigned long *) (PAGE_MASK & *page_table);
+	else {
+		if (!(tmp = get_free_page(GFP_KERNEL)))
+			return 0;
+		if (PAGE_PRESENT & *page_table) {
+			free_page(tmp);
+			page_table = (unsigned long *) (PAGE_MASK & *page_table);
+		} else {
+			*page_table = tmp | PAGE_TABLE;
+			page_table = (unsigned long *) tmp;
+		}
+	}
+	page_table += (address >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
+	if (*page_table) {
+		printk("put_dirty_page: page already exists\n");
+		*page_table = 0;
+		invalidate();
+	}
+	*page_table = page | (PAGE_DIRTY | PAGE_PRIVATE);
+/* no need for invalidate */
+	return page;
+}
+
+/*
+ * This routine handles present pages, when users try to write
+ * to a shared page. It is done by copying the page to a new address
+ * and decrementing the shared-page counter for the old page.
+ *
+ * Goto-purists beware: the only reason for goto's here is that it results
+ * in better assembly code.. The "default" path will see no jumps at all.
+ */
+void do_wp_page(struct vm_area_struct * vma, unsigned long address,
+	unsigned long error_code)
+{
+	unsigned long *pde, pte, old_page, prot;
+	unsigned long new_page;
+
+	new_page = __get_free_page(GFP_KERNEL);
+	pde = PAGE_DIR_OFFSET(vma->vm_task->tss.cr3,address);
+	pte = *pde;
+	if (!(pte & PAGE_PRESENT))
+		goto end_wp_page;
+	if ((pte & PAGE_TABLE) != PAGE_TABLE || pte >= high_memory)
+		goto bad_wp_pagetable;
+	pte &= PAGE_MASK;
+	pte += PAGE_PTR(address);
+	old_page = *(unsigned long *) pte;
+	if (!(old_page & PAGE_PRESENT))
+		goto end_wp_page;
+	if (old_page >= high_memory)
+		goto bad_wp_page;
+	if (old_page & PAGE_RW)
+		goto end_wp_page;
+	vma->vm_task->mm->min_flt++;
+	prot = (old_page & ~PAGE_MASK) | PAGE_RW | PAGE_DIRTY;
+	old_page &= PAGE_MASK;
+	if (mem_map[MAP_NR(old_page)] != 1) {
+		if (new_page) {
+			if (mem_map[MAP_NR(old_page)] & MAP_PAGE_RESERVED)
+				++vma->vm_task->mm->rss;
+			copy_page(old_page,new_page);
+			*(unsigned long *) pte = new_page | prot;
+			free_page(old_page);
+			invalidate();
+			return;
+		}
+		free_page(old_page);
+		oom(vma->vm_task);
+		*(unsigned long *) pte = BAD_PAGE | prot;
+		invalidate();
+		return;
+	}
+	*(unsigned long *) pte |= PAGE_RW | PAGE_DIRTY;
+	invalidate();
+	if (new_page)
+		free_page(new_page);
+	return;
+bad_wp_page:
+	printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
+	*(unsigned long *) pte = BAD_PAGE | PAGE_SHARED;
+	send_sig(SIGKILL, vma->vm_task, 1);
+	goto end_wp_page;
+bad_wp_pagetable:
+	printk("do_wp_page: bogus page-table at address %08lx (%08lx)\n",address,pte);
+	*pde = BAD_PAGETABLE | PAGE_TABLE;
+	send_sig(SIGKILL, vma->vm_task, 1);
+end_wp_page:
+	if (new_page)
+		free_page(new_page);
+	return;
+}
+
+/*
+ * Ugly, ugly, but the goto's result in better assembly..
+ */
+int verify_area(int type, const void * addr, unsigned long size)
+{
+	struct vm_area_struct * vma;
+	unsigned long start = (unsigned long) addr;
+
+	/* If the current user space is mapped to kernel space (for the
+	 * case where we use a fake user buffer with get_fs/set_fs()) we
+	 * don't expect to find the address in the user vm map.
+	 */
+	if (get_fs() == get_ds())
+		return 0;
+
+	for (vma = current->mm->mmap ; ; vma = vma->vm_next) {
+		if (!vma)
+			goto bad_area;
+		if (vma->vm_end > start)
+			break;
+	}
+	if (vma->vm_start <= start)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (vma->vm_end - start > current->rlim[RLIMIT_STACK].rlim_cur)
+		goto bad_area;
+
+good_area:
+	if (!wp_works_ok && type == VERIFY_WRITE)
+		goto check_wp_fault_by_hand;
+	for (;;) {
+		struct vm_area_struct * next;
+		if (!(vma->vm_page_prot & PAGE_USER))
+			goto bad_area;
+		if (type != VERIFY_READ && !(vma->vm_page_prot & (PAGE_COW | PAGE_RW)))
+			goto bad_area;
+		if (vma->vm_end - start >= size)
+			return 0;
+		next = vma->vm_next;
+		if (!next || vma->vm_end != next->vm_start)
+			goto bad_area;
+		vma = next;
+	}
+
+check_wp_fault_by_hand:
+	size--;
+	size += start & ~PAGE_MASK;
+	size >>= PAGE_SHIFT;
+	start &= PAGE_MASK;
+
+	for (;;) {
+		if (!(vma->vm_page_prot & (PAGE_COW | PAGE_RW)))
+			goto bad_area;
+		do_wp_page(vma, start, PAGE_PRESENT);
+		if (!size)
+			return 0;
+		size--;
+		start += PAGE_SIZE;
+		if (start < vma->vm_end)
+			continue;
+		vma = vma->vm_next;
+		if (!vma || vma->vm_start != start)
+			break;
+	}
+
+bad_area:
+	return -EFAULT;
+}
+
+static inline void get_empty_page(struct task_struct * tsk, unsigned long address)
+{
+	unsigned long tmp;
+
+	if (!(tmp = get_free_page(GFP_KERNEL))) {
+		oom(tsk);
+		tmp = BAD_PAGE;
+	}
+	if (!put_page(tsk,tmp,address,PAGE_PRIVATE))
+		free_page(tmp);
+}
+
+/*
+ * try_to_share() checks the page at address "address" in the task "p",
+ * to see if it exists, and if it is clean. If so, share it with the current
+ * task.
+ *
+ * NOTE! This assumes we have checked that p != current, and that they
+ * share the same inode and can generally otherwise be shared.
+ */
+static int try_to_share(unsigned long to_address, struct vm_area_struct * to_area,
+	unsigned long from_address, struct vm_area_struct * from_area,
+	unsigned long newpage)
+{
+	unsigned long from;
+	unsigned long to;
+	unsigned long from_page;
+	unsigned long to_page;
+
+	from_page = (unsigned long)PAGE_DIR_OFFSET(from_area->vm_task->tss.cr3,from_address);
+	to_page = (unsigned long)PAGE_DIR_OFFSET(to_area->vm_task->tss.cr3,to_address);
+/* is there a page-directory at from? */
+	from = *(unsigned long *) from_page;
+	if (!(from & PAGE_PRESENT))
+		return 0;
+	from &= PAGE_MASK;
+	from_page = from + PAGE_PTR(from_address);
+	from = *(unsigned long *) from_page;
+/* is the page present? */
+	if (!(from & PAGE_PRESENT))
+		return 0;
+/* if it is private, it must be clean to be shared */
+	if (from & PAGE_DIRTY) {
+		if (from_area->vm_page_prot & PAGE_COW)
+			return 0;
+		if (!(from_area->vm_page_prot & PAGE_RW))
+			return 0;
+	}		
+/* is the page reasonable at all? */
+	if (from >= high_memory)
+		return 0;
+	if (mem_map[MAP_NR(from)] & MAP_PAGE_RESERVED)
+		return 0;
+/* is the destination ok? */
+	to = *(unsigned long *) to_page;
+	if (!(to & PAGE_PRESENT))
+		return 0;
+	to &= PAGE_MASK;
+	to_page = to + PAGE_PTR(to_address);
+	if (*(unsigned long *) to_page)
+		return 0;
+/* do we copy? */
+	if (newpage) {
+		if (in_swap_cache(from)) { /* implies PAGE_DIRTY */
+			if (from_area->vm_page_prot & PAGE_COW)
+				return 0;
+			if (!(from_area->vm_page_prot & PAGE_RW))
+				return 0;
+		}
+		copy_page((from & PAGE_MASK), newpage);
+		*(unsigned long *) to_page = newpage | to_area->vm_page_prot;
+		return 1;
+	}
+/* do a final swap-cache test before sharing them.. */
+	if (in_swap_cache(from)) {
+		if (from_area->vm_page_prot & PAGE_COW)
+			return 0;
+		if (!(from_area->vm_page_prot & PAGE_RW))
+			return 0;
+		from |= PAGE_DIRTY;
+		*(unsigned long *) from_page = from;
+		delete_from_swap_cache(from);
+		invalidate();
+	}
+	mem_map[MAP_NR(from)]++;
+/* fill in the 'to' field, checking for COW-stuff */
+	to = (from & (PAGE_MASK | PAGE_DIRTY)) | to_area->vm_page_prot;
+	if (to & PAGE_COW)
+		to &= ~PAGE_RW;
+	*(unsigned long *) to_page = to;
+/* Check if we need to do anything at all to the 'from' field */
+	if (!(from & PAGE_RW))
+		return 1;
+	if (!(from_area->vm_page_prot & PAGE_COW))
+		return 1;
+/* ok, need to mark it read-only, so invalidate any possible old TB entry */
+	from &= ~PAGE_RW;
+	*(unsigned long *) from_page = from;
+	invalidate();
+	return 1;
+}
+
+/*
+ * share_page() tries to find a process that could share a page with
+ * the current one.
+ *
+ * We first check if it is at all feasible by checking inode->i_count.
+ * It should be >1 if there are other tasks sharing this inode.
+ */
+static int share_page(struct vm_area_struct * area, unsigned long address,
+	unsigned long error_code, unsigned long newpage)
+{
+	struct inode * inode;
+	struct task_struct ** p;
+	unsigned long offset;
+	unsigned long from_address;
+	unsigned long give_page;
+
+	if (!area || !(inode = area->vm_inode) || inode->i_count < 2)
+		return 0;
+	/* do we need to copy or can we just share? */
+	give_page = 0;
+	if ((area->vm_page_prot & PAGE_COW) && (error_code & PAGE_RW)) {
+		if (!newpage)
+			return 0;
+		give_page = newpage;
+	}
+	offset = address - area->vm_start + area->vm_offset;
+	for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
+		struct vm_area_struct * mpnt;
+		if (!*p)
+			continue;
+		if (area->vm_task == *p)
+			continue;
+		/* Now see if there is something in the VMM that
+		   we can share pages with */
+		for (mpnt = (*p)->mm->mmap; mpnt; mpnt = mpnt->vm_next) {
+			/* must be same inode */
+			if (mpnt->vm_inode != inode)
+				continue;
+			/* offsets must be mutually page-aligned */
+			if ((mpnt->vm_offset ^ area->vm_offset) & ~PAGE_MASK)
+				continue;
+			/* the other area must actually cover the wanted page.. */
+			from_address = offset + mpnt->vm_start - mpnt->vm_offset;
+			if (from_address < mpnt->vm_start || from_address >= mpnt->vm_end)
+				continue;
+			/* .. NOW we can actually try to use the same physical page */
+			if (!try_to_share(address, area, from_address, mpnt, give_page))
+				continue;
+			/* free newpage if we never used it.. */
+			if (give_page || !newpage)
+				return 1;
+			free_page(newpage);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * fill in an empty page-table if none exists.
+ */
+static inline unsigned long get_empty_pgtable(struct task_struct * tsk,unsigned long address)
+{
+	unsigned long page;
+	unsigned long *p;
+
+	p = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
+	if (PAGE_PRESENT & *p)
+		return *p;
+	if (*p) {
+		printk("get_empty_pgtable: bad page-directory entry \n");
+		*p = 0;
+	}
+	page = get_free_page(GFP_KERNEL);
+	p = PAGE_DIR_OFFSET(tsk->tss.cr3,address);
+	if (PAGE_PRESENT & *p) {
+		free_page(page);
+		return *p;
+	}
+	if (*p) {
+		printk("get_empty_pgtable: bad page-directory entry \n");
+		*p = 0;
+	}
+	if (page) {
+		*p = page | PAGE_TABLE;
+		return *p;
+	}
+	oom(current);
+	*p = BAD_PAGETABLE | PAGE_TABLE;
+	return 0;
+}
+
+static inline void do_swap_page(struct vm_area_struct * vma,
+	unsigned long address, unsigned long * pge, unsigned long entry)
+{
+	unsigned long page;
+
+	if (vma->vm_ops && vma->vm_ops->swapin)
+		page = vma->vm_ops->swapin(vma, entry);
+	else
+		page = swap_in(entry);
+	if (*pge != entry) {
+		free_page(page);
+		return;
+	}
+	page = page | vma->vm_page_prot;
+	if (mem_map[MAP_NR(page)] > 1 && (page & PAGE_COW))
+		page &= ~PAGE_RW;
+	++vma->vm_task->mm->rss;
+	++vma->vm_task->mm->maj_flt;
+	*pge = page;
+	return;
+}
+
+void do_no_page(struct vm_area_struct * vma, unsigned long address,
+	unsigned long error_code)
+{
+	unsigned long page, entry, prot;
+
+	page = get_empty_pgtable(vma->vm_task,address);
+	if (!page)
+		return;
+	page &= PAGE_MASK;
+	page += PAGE_PTR(address);
+	entry = *(unsigned long *) page;
+	if (entry & PAGE_PRESENT)
+		return;
+	if (entry) {
+		do_swap_page(vma, address, (unsigned long *) page, entry);
+		return;
+	}
+	address &= PAGE_MASK;
+
+	if (!vma->vm_ops || !vma->vm_ops->nopage) {
+		++vma->vm_task->mm->rss;
+		++vma->vm_task->mm->min_flt;
+		get_empty_page(vma->vm_task,address);
+		return;
+	}
+	page = get_free_page(GFP_KERNEL);
+	if (share_page(vma, address, error_code, page)) {
+		++vma->vm_task->mm->min_flt;
+		++vma->vm_task->mm->rss;
+		return;
+	}
+	if (!page) {
+		oom(current);
+		put_page(vma->vm_task, BAD_PAGE, address, PAGE_PRIVATE);
+		return;
+	}
+	++vma->vm_task->mm->maj_flt;
+	++vma->vm_task->mm->rss;
+	prot = vma->vm_page_prot;
+	/*
+	 * The fourth argument is "no_share", which tells the low-level code
+	 * to copy, not share the page even if sharing is possible.  It's
+	 * essentially an early COW detection ("moo at 5 AM").
+	 */
+	page = vma->vm_ops->nopage(vma, address, page, (error_code & PAGE_RW) && (prot & PAGE_COW));
+	if (share_page(vma, address, error_code, 0)) {
+		free_page(page);
+		return;
+	}
+	/*
+	 * This silly early PAGE_DIRTY setting removes a race
+	 * due to the bad i386 page protection.
+	 */
+	if (error_code & PAGE_RW) {
+		prot |= PAGE_DIRTY;	/* can't be COW-shared: see "no_share" above */
+	} else if ((prot & PAGE_COW) && mem_map[MAP_NR(page)] > 1)
+		prot &= ~PAGE_RW;
+	if (put_page(vma->vm_task, page, address, prot))
+		return;
+	free_page(page);
+	oom(current);
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+	struct vm_area_struct * vma;
+	unsigned long address;
+	unsigned long page;
+
+	/* get the address */
+	__asm__("movl %%cr2,%0":"=r" (address));
+	for (vma = current->mm->mmap ; ; vma = vma->vm_next) {
+		if (!vma)
+			goto bad_area;
+		if (vma->vm_end > address)
+			break;
+	}
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur)
+		goto bad_area;
+	vma->vm_offset -= vma->vm_start - (address & PAGE_MASK);
+	vma->vm_start = (address & PAGE_MASK);
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+	if (regs->eflags & VM_MASK) {
+		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
+		if (bit < 32)
+			current->screen_bitmap |= 1 << bit;
+	}
+	if (!(vma->vm_page_prot & PAGE_USER))
+		goto bad_area;
+	if (error_code & PAGE_PRESENT) {
+		if (!(vma->vm_page_prot & (PAGE_RW | PAGE_COW)))
+			goto bad_area;
+#ifdef CONFIG_TEST_VERIFY_AREA
+		if (regs->cs == KERNEL_CS)
+			printk("WP fault at %08x\n", regs->eip);
+#endif
+		do_wp_page(vma, address, error_code);
+		return;
+	}
+	do_no_page(vma, address, error_code);
+	return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+	if (error_code & PAGE_USER) {
+		current->tss.cr2 = address;
+		current->tss.error_code = error_code;
+		current->tss.trap_no = 14;
+		send_sig(SIGSEGV, current, 1);
+		return;
+	}
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+	if (wp_works_ok < 0 && address == TASK_SIZE && (error_code & PAGE_PRESENT)) {
+		wp_works_ok = 1;
+		pg0[0] = PAGE_SHARED;
+		invalidate();
+		printk("This processor honours the WP bit even when in supervisor mode. Good.\n");
+		return;
+	}
+	if ((unsigned long) (address-TASK_SIZE) < PAGE_SIZE) {
+		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+		pg0[0] = PAGE_SHARED;
+	} else
+		printk(KERN_ALERT "Unable to handle kernel paging request");
+	printk(" at virtual address %08lx\n",address);
+	__asm__("movl %%cr3,%0" : "=r" (page));
+	printk(KERN_ALERT "current->tss.cr3 = %08lx, %%cr3 = %08lx\n",
+		current->tss.cr3, page);
+	page = ((unsigned long *) page)[address >> 22];
+	printk(KERN_ALERT "*pde = %08lx\n", page);
+	if (page & PAGE_PRESENT) {
+		page &= PAGE_MASK;
+		address &= 0x003ff000;
+		page = ((unsigned long *) page)[address >> PAGE_SHIFT];
+		printk(KERN_ALERT "*pte = %08lx\n", page);
+	}
+	die_if_kernel("Oops", regs, error_code);
+	do_exit(SIGKILL);
+}
+
+/*
+ * BAD_PAGE is the page that is used for page faults when linux
+ * is out-of-memory. Older versions of linux just did a
+ * do_exit(), but using this instead means there is less risk
+ * for a process dying in kernel mode, possibly leaving a inode
+ * unused etc..
+ *
+ * BAD_PAGETABLE is the accompanying page-table: it is initialized
+ * to point to BAD_PAGE entries.
+ *
+ * ZERO_PAGE is a special page that is used for zero-initialized
+ * data and COW.
+ */
+unsigned long __bad_pagetable(void)
+{
+	extern char empty_bad_page_table[PAGE_SIZE];
+
+	__asm__ __volatile__("cld ; rep ; stosl":
+		:"a" (BAD_PAGE + PAGE_TABLE),
+		 "D" ((long) empty_bad_page_table),
+		 "c" (PTRS_PER_PAGE)
+		:"di","cx");
+	return (unsigned long) empty_bad_page_table;
+}
+
+unsigned long __bad_page(void)
+{
+	extern char empty_bad_page[PAGE_SIZE];
+
+	__asm__ __volatile__("cld ; rep ; stosl":
+		:"a" (0),
+		 "D" ((long) empty_bad_page),
+		 "c" (PTRS_PER_PAGE)
+		:"di","cx");
+	return (unsigned long) empty_bad_page;
+}
+
+unsigned long __zero_page(void)
+{
+	extern char empty_zero_page[PAGE_SIZE];
+
+	__asm__ __volatile__("cld ; rep ; stosl":
+		:"a" (0),
+		 "D" ((long) empty_zero_page),
+		 "c" (PTRS_PER_PAGE)
+		:"di","cx");
+	return (unsigned long) empty_zero_page;
+}
+
+void show_mem(void)
+{
+	int i,free = 0,total = 0,reserved = 0;
+	int shared = 0;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
+	i = high_memory >> PAGE_SHIFT;
+	while (i-- > 0) {
+		total++;
+		if (mem_map[i] & MAP_PAGE_RESERVED)
+			reserved++;
+		else if (!mem_map[i])
+			free++;
+		else
+			shared += mem_map[i]-1;
+	}
+	printk("%d pages of RAM\n",total);
+	printk("%d free pages\n",free);
+	printk("%d reserved pages\n",reserved);
+	printk("%d pages shared\n",shared);
+	show_buffers();
+#ifdef CONFIG_NET
+	show_net_buffers();
+#endif
+}
+
+extern unsigned long free_area_init(unsigned long, unsigned long);
+
+/*
+ * paging_init() sets up the page tables - note that the first 4MB are
+ * already mapped by head.S.
+ *
+ * This routines also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+unsigned long paging_init(unsigned long start_mem, unsigned long end_mem)
+{
+	unsigned long * pg_dir;
+	unsigned long * pg_table;
+	unsigned long tmp;
+	unsigned long address;
+
+/*
+ * Physical page 0 is special; it's not touched by Linux since BIOS
+ * and SMM (for laptops with [34]86/SL chips) may need it.  It is read
+ * and write protected to detect null pointer references in the
+ * kernel.
+ */
+#if 0
+	memset((void *) 0, 0, PAGE_SIZE);
+#endif
+	start_mem = PAGE_ALIGN(start_mem);
+	address = 0;
+	pg_dir = swapper_pg_dir;
+	while (address < end_mem) {
+		tmp = *(pg_dir + 768);		/* at virtual addr 0xC0000000 */
+		if (!tmp) {
+			tmp = start_mem | PAGE_TABLE;
+			*(pg_dir + 768) = tmp;
+			start_mem += PAGE_SIZE;
+		}
+		*pg_dir = tmp;			/* also map it in at 0x0000000 for init */
+		pg_dir++;
+		pg_table = (unsigned long *) (tmp & PAGE_MASK);
+		for (tmp = 0 ; tmp < PTRS_PER_PAGE ; tmp++,pg_table++) {
+			if (address < end_mem)
+				*pg_table = address | PAGE_SHARED;
+			else
+				*pg_table = 0;
+			address += PAGE_SIZE;
+		}
+	}
+	invalidate();
+	return free_area_init(start_mem, end_mem);
+}
+
+void mem_init(unsigned long start_low_mem,
+	      unsigned long start_mem, unsigned long end_mem)
+{
+	int codepages = 0;
+	int reservedpages = 0;
+	int datapages = 0;
+	unsigned long tmp;
+	extern int etext;
+
+	cli();
+	end_mem &= PAGE_MASK;
+	high_memory = end_mem;
+
+	/* mark usable pages in the mem_map[] */
+	start_low_mem = PAGE_ALIGN(start_low_mem);
+	start_mem = PAGE_ALIGN(start_mem);
+
+	/*
+	 * IBM messed up *AGAIN* in their thinkpad: 0xA0000 -> 0x9F000.
+	 * They seem to have done something stupid with the floppy
+	 * controller as well..
+	 */
+	while (start_low_mem < 0x9f000) {
+		mem_map[MAP_NR(start_low_mem)] = 0;
+		start_low_mem += PAGE_SIZE;
+	}
+
+	while (start_mem < high_memory) {
+		mem_map[MAP_NR(start_mem)] = 0;
+		start_mem += PAGE_SIZE;
+	}
+#ifdef CONFIG_SOUND
+	sound_mem_init();
+#endif
+	for (tmp = 0 ; tmp < high_memory ; tmp += PAGE_SIZE) {
+		if (mem_map[MAP_NR(tmp)]) {
+			if (tmp >= 0xA0000 && tmp < 0x100000)
+				reservedpages++;
+			else if (tmp < (unsigned long) &etext)
+				codepages++;
+			else
+				datapages++;
+			continue;
+		}
+		mem_map[MAP_NR(tmp)] = 1;
+		free_page(tmp);
+	}
+	tmp = nr_free_pages << PAGE_SHIFT;
+	printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data)\n",
+		tmp >> 10,
+		high_memory >> 10,
+		codepages << (PAGE_SHIFT-10),
+		reservedpages << (PAGE_SHIFT-10),
+		datapages << (PAGE_SHIFT-10));
+/* test if the WP bit is honoured in supervisor mode */
+	wp_works_ok = -1;
+	pg0[0] = PAGE_READONLY;
+	invalidate();
+	__asm__ __volatile__("movb 0,%%al ; movb %%al,0": : :"ax", "memory");
+	pg0[0] = 0;
+	invalidate();
+	if (wp_works_ok < 0)
+		wp_works_ok = 0;
+#ifdef CONFIG_TEST_VERIFY_AREA
+	wp_works_ok = 0;
+#endif
+	return;
+}
+
+void si_meminfo(struct sysinfo *val)
+{
+	int i;
+
+	i = high_memory >> PAGE_SHIFT;
+	val->totalram = 0;
+	val->sharedram = 0;
+	val->freeram = nr_free_pages << PAGE_SHIFT;
+	val->bufferram = buffermem;
+	while (i-- > 0)  {
+		if (mem_map[i] & MAP_PAGE_RESERVED)
+			continue;
+		val->totalram++;
+		if (!mem_map[i])
+			continue;
+		val->sharedram += mem_map[i]-1;
+	}
+	val->totalram <<= PAGE_SHIFT;
+	val->sharedram <<= PAGE_SHIFT;
+	return;
+}
+
+
+/*
+ * This handles a generic mmap of a disk file.
+ */
+static unsigned long file_mmap_nopage(struct vm_area_struct * area, unsigned long address,
+	unsigned long page, int no_share)
+{
+	struct inode * inode = area->vm_inode;
+	unsigned int block;
+	int nr[8];
+	int i, *p;
+
+	address &= PAGE_MASK;
+	block = address - area->vm_start + area->vm_offset;
+	block >>= inode->i_sb->s_blocksize_bits;
+	i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
+	p = nr;
+	do {
+		*p = bmap(inode,block);
+		i--;
+		block++;
+		p++;
+	} while (i > 0);
+	return bread_page(page, inode->i_dev, nr, inode->i_sb->s_blocksize, no_share);
+}
+
+struct vm_operations_struct file_mmap = {
+	NULL,			/* open */
+	NULL,			/* close */
+	file_mmap_nopage,	/* nopage */
+	NULL,			/* wppage */
+	NULL,			/* share */
+	NULL,			/* unmap */
+};
diff --git a/arch/i386/mm/mmap.c b/arch/i386/mm/mmap.c
new file mode 100644
index 000000000..fbbea985c
--- /dev/null
+++ b/arch/i386/mm/mmap.c
@@ -0,0 +1,470 @@
+/*
+ *	linux/mm/mmap.c
+ *
+ * Written by obz.
+ */
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/errno.h>
+#include <linux/mman.h>
+#include <linux/string.h>
+#include <linux/malloc.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+static int anon_map(struct inode *, struct file *, struct vm_area_struct *);
+
+/*
+ * description of effects of mapping type and prot in current implementation.
+ * this is due to the limited x86 page protection hardware.  The expected
+ * behavior is in parens:
+ *
+ * map_type	prot
+ *		PROT_NONE	PROT_READ	PROT_WRITE	PROT_EXEC
+ * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
+ *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no
+ *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
+ *		
+ * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
+ *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
+ *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
+ *
+ */
+
+int do_mmap(struct file * file, unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags, unsigned long off)
+{
+	int mask, error;
+	struct vm_area_struct * vma;
+
+	if ((len = PAGE_ALIGN(len)) == 0)
+		return addr;
+
+	if (addr > TASK_SIZE || len > TASK_SIZE || addr > TASK_SIZE-len)
+		return -EINVAL;
+
+	/* offset overflow? */
+	if (off + len < off)
+		return -EINVAL;
+
+	/*
+	 * do simple checking here so the lower-level routines won't have
+	 * to. we assume access permissions have been handled by the open
+	 * of the memory object, so we don't do any here.
+	 */
+
+	if (file != NULL) {
+		switch (flags & MAP_TYPE) {
+		case MAP_SHARED:
+			if ((prot & PROT_WRITE) && !(file->f_mode & 2))
+				return -EACCES;
+			/* fall through */
+		case MAP_PRIVATE:
+			if (!(file->f_mode & 1))
+				return -EACCES;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+		if ((flags & MAP_DENYWRITE) && (file->f_inode->i_wcount > 0))
+			return -ETXTBSY;
+	} else if ((flags & MAP_TYPE) == MAP_SHARED)
+		return -EINVAL;
+
+	/*
+	 * obtain the address to map to. we verify (or select) it and ensure
+	 * that it represents a valid section of the address space.
+	 */
+
+	if (flags & MAP_FIXED) {
+		if (addr & ~PAGE_MASK)
+			return -EINVAL;
+		if (len > TASK_SIZE || addr > TASK_SIZE - len)
+			return -EINVAL;
+	} else {
+		addr = get_unmapped_area(len);
+		if (!addr)
+			return -ENOMEM;
+	}
+
+	/*
+	 * determine the object being mapped and call the appropriate
+	 * specific mapper. the address has already been validated, but
+	 * not unmapped, but the maps are removed from the list.
+	 */
+	if (file && (!file->f_op || !file->f_op->mmap))
+		return -ENODEV;
+	mask = PAGE_PRESENT;
+	if (prot & (PROT_READ | PROT_EXEC))
+		mask |= PAGE_READONLY;
+	if (prot & PROT_WRITE)
+		if ((flags & MAP_TYPE) == MAP_PRIVATE)
+			mask |= PAGE_COPY;
+		else
+			mask |= PAGE_SHARED;
+
+	vma = (struct vm_area_struct *)kmalloc(sizeof(struct vm_area_struct),
+		GFP_KERNEL);
+	if (!vma)
+		return -ENOMEM;
+
+	vma->vm_task = current;
+	vma->vm_start = addr;
+	vma->vm_end = addr + len;
+	vma->vm_page_prot = mask;
+	vma->vm_flags = prot & (VM_READ | VM_WRITE | VM_EXEC);
+	vma->vm_flags |= flags & (VM_GROWSDOWN | VM_DENYWRITE | VM_EXECUTABLE);
+
+	if (file) {
+		if (file->f_mode & 1)
+			vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+		if (flags & MAP_SHARED) {
+			vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
+			if (!(file->f_mode & 2))
+				vma->vm_flags &= ~VM_MAYWRITE;
+		}
+	} else
+		vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+	vma->vm_ops = NULL;
+	vma->vm_offset = off;
+	vma->vm_inode = NULL;
+	vma->vm_pte = 0;
+
+	do_munmap(addr, len);	/* Clear old maps */
+
+	if (file)
+		error = file->f_op->mmap(file->f_inode, file, vma);
+	else
+		error = anon_map(NULL, NULL, vma);
+	
+	if (error) {
+		kfree(vma);
+		return error;
+	}
+	insert_vm_struct(current, vma);
+	merge_segments(current->mm->mmap);
+	return addr;
+}
+
+/*
+ * Get an address range which is currently unmapped.
+ * For mmap() without MAP_FIXED and shmat() with addr=0.
+ * Return value 0 means ENOMEM.
+ */
+unsigned long get_unmapped_area(unsigned long len)
+{
+	struct vm_area_struct * vmm;
+	unsigned long gap_start = 0, gap_end;
+
+	for (vmm = current->mm->mmap; ; vmm = vmm->vm_next) {
+		if (gap_start < SHM_RANGE_START)
+			gap_start = SHM_RANGE_START;
+		if (!vmm || ((gap_end = vmm->vm_start) > SHM_RANGE_END))
+			gap_end = SHM_RANGE_END;
+		gap_start = PAGE_ALIGN(gap_start);
+		gap_end &= PAGE_MASK;
+		if ((gap_start <= gap_end) && (gap_end - gap_start >= len))
+			return gap_start;
+		if (!vmm)
+			return 0;
+		gap_start = vmm->vm_end;
+	}
+}
+
+asmlinkage int sys_mmap(unsigned long *buffer)
+{
+	int error;
+	unsigned long flags;
+	struct file * file = NULL;
+
+	error = verify_area(VERIFY_READ, buffer, 6*sizeof(long));
+	if (error)
+		return error;
+	flags = get_fs_long(buffer+3);
+	if (!(flags & MAP_ANONYMOUS)) {
+		unsigned long fd = get_fs_long(buffer+4);
+		if (fd >= NR_OPEN || !(file = current->files->fd[fd]))
+			return -EBADF;
+	}
+	return do_mmap(file, get_fs_long(buffer), get_fs_long(buffer+1),
+		get_fs_long(buffer+2), flags, get_fs_long(buffer+5));
+}
+
+/*
+ * Normal function to fix up a mapping
+ * This function is the default for when an area has no specific
+ * function.  This may be used as part of a more specific routine.
+ * This function works out what part of an area is affected and
+ * adjusts the mapping information.  Since the actual page
+ * manipulation is done in do_mmap(), none need be done here,
+ * though it would probably be more appropriate.
+ *
+ * By the time this function is called, the area struct has been
+ * removed from the process mapping list, so it needs to be
+ * reinserted if necessary.
+ *
+ * The 4 main cases are:
+ *    Unmapping the whole area
+ *    Unmapping from the start of the segment to a point in it
+ *    Unmapping from an intermediate point to the end
+ *    Unmapping between to intermediate points, making a hole.
+ *
+ * Case 4 involves the creation of 2 new areas, for each side of
+ * the hole.
+ */
+void unmap_fixup(struct vm_area_struct *area,
+		 unsigned long addr, size_t len)
+{
+	struct vm_area_struct *mpnt;
+	unsigned long end = addr + len;
+
+	if (addr < area->vm_start || addr >= area->vm_end ||
+	    end <= area->vm_start || end > area->vm_end ||
+	    end < addr)
+	{
+		printk("unmap_fixup: area=%lx-%lx, unmap %lx-%lx!!\n",
+		       area->vm_start, area->vm_end, addr, end);
+		return;
+	}
+
+	/* Unmapping the whole area */
+	if (addr == area->vm_start && end == area->vm_end) {
+		if (area->vm_ops && area->vm_ops->close)
+			area->vm_ops->close(area);
+		if (area->vm_inode)
+			iput(area->vm_inode);
+		return;
+	}
+
+	/* Work out to one of the ends */
+	if (addr >= area->vm_start && end == area->vm_end)
+		area->vm_end = addr;
+	if (addr == area->vm_start && end <= area->vm_end) {
+		area->vm_offset += (end - area->vm_start);
+		area->vm_start = end;
+	}
+
+	/* Unmapping a hole */
+	if (addr > area->vm_start && end < area->vm_end)
+	{
+		/* Add end mapping -- leave beginning for below */
+		mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
+
+		if (!mpnt)
+			return;
+		*mpnt = *area;
+		mpnt->vm_offset += (end - area->vm_start);
+		mpnt->vm_start = end;
+		if (mpnt->vm_inode)
+			mpnt->vm_inode->i_count++;
+		if (mpnt->vm_ops && mpnt->vm_ops->open)
+			mpnt->vm_ops->open(mpnt);
+		area->vm_end = addr;	/* Truncate area */
+		insert_vm_struct(current, mpnt);
+	}
+
+	/* construct whatever mapping is needed */
+	mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
+	if (!mpnt)
+		return;
+	*mpnt = *area;
+	if (mpnt->vm_ops && mpnt->vm_ops->open)
+		mpnt->vm_ops->open(mpnt);
+	if (area->vm_ops && area->vm_ops->close) {
+		area->vm_end = area->vm_start;
+		area->vm_ops->close(area);
+	}
+	insert_vm_struct(current, mpnt);
+}
+
+asmlinkage int sys_munmap(unsigned long addr, size_t len)
+{
+	return do_munmap(addr, len);
+}
+
+/*
+ * Munmap is split into 2 main parts -- this part which finds
+ * what needs doing, and the areas themselves, which do the
+ * work.  This now handles partial unmappings.
+ * Jeremy Fitzhardine <jeremy@sw.oz.au>
+ */
+int do_munmap(unsigned long addr, size_t len)
+{
+	struct vm_area_struct *mpnt, **npp, *free;
+
+	if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
+		return -EINVAL;
+
+	if ((len = PAGE_ALIGN(len)) == 0)
+		return 0;
+
+	/*
+	 * Check if this memory area is ok - put it on the temporary
+	 * list if so..  The checks here are pretty simple --
+	 * every area affected in some way (by any overlap) is put
+	 * on the list.  If nothing is put on, nothing is affected.
+	 */
+	npp = &current->mm->mmap;
+	free = NULL;
+	for (mpnt = *npp; mpnt != NULL; mpnt = *npp) {
+		unsigned long end = addr+len;
+
+		if ((addr < mpnt->vm_start && end <= mpnt->vm_start) ||
+		    (addr >= mpnt->vm_end && end > mpnt->vm_end))
+		{
+			npp = &mpnt->vm_next;
+			continue;
+		}
+
+		*npp = mpnt->vm_next;
+		mpnt->vm_next = free;
+		free = mpnt;
+	}
+
+	if (free == NULL)
+		return 0;
+
+	/*
+	 * Ok - we have the memory areas we should free on the 'free' list,
+	 * so release them, and unmap the page range..
+	 * If the one of the segments is only being partially unmapped,
+	 * it will put new vm_area_struct(s) into the address space.
+	 */
+	while (free) {
+		unsigned long st, end;
+
+		mpnt = free;
+		free = free->vm_next;
+
+		st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
+		end = addr+len;
+		end = end > mpnt->vm_end ? mpnt->vm_end : end;
+
+		if (mpnt->vm_ops && mpnt->vm_ops->unmap)
+			mpnt->vm_ops->unmap(mpnt, st, end-st);
+		else
+			unmap_fixup(mpnt, st, end-st);
+
+		kfree(mpnt);
+	}
+
+	unmap_page_range(addr, len);
+	return 0;
+}
+
+/* This is used for a general mmap of a disk file */
+int generic_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
+{
+	extern struct vm_operations_struct file_mmap;
+
+	if (vma->vm_page_prot & PAGE_RW)	/* only PAGE_COW or read-only supported right now */
+		return -EINVAL;
+	if (vma->vm_offset & (inode->i_sb->s_blocksize - 1))
+		return -EINVAL;
+	if (!inode->i_sb || !S_ISREG(inode->i_mode))
+		return -EACCES;
+	if (!inode->i_op || !inode->i_op->bmap)
+		return -ENOEXEC;
+	if (!IS_RDONLY(inode)) {
+		inode->i_atime = CURRENT_TIME;
+		inode->i_dirt = 1;
+	}
+	vma->vm_inode = inode;
+	inode->i_count++;
+	vma->vm_ops = &file_mmap;
+	return 0;
+}
+
+/*
+ * Insert vm structure into process list sorted by address.
+ */
+void insert_vm_struct(struct task_struct *t, struct vm_area_struct *vmp)
+{
+	struct vm_area_struct **p, *mpnt;
+
+	p = &t->mm->mmap;
+	while ((mpnt = *p) != NULL) {
+		if (mpnt->vm_start > vmp->vm_start)
+			break;
+		if (mpnt->vm_end > vmp->vm_start)
+			printk("insert_vm_struct: overlapping memory areas\n");
+		p = &mpnt->vm_next;
+	}
+	vmp->vm_next = mpnt;
+	*p = vmp;
+}
+
+/*
+ * Merge a list of memory segments if possible.
+ * Redundant vm_area_structs are freed.
+ * This assumes that the list is ordered by address.
+ */
+void merge_segments(struct vm_area_struct *mpnt)
+{
+	struct vm_area_struct *prev, *next;
+
+	if (mpnt == NULL)
+		return;
+	
+	for(prev = mpnt, mpnt = mpnt->vm_next;
+	    mpnt != NULL;
+	    prev = mpnt, mpnt = next)
+	{
+		next = mpnt->vm_next;
+
+		/*
+		 * To share, we must have the same inode, operations.. 
+		 */
+		if (mpnt->vm_inode != prev->vm_inode)
+			continue;
+		if (mpnt->vm_pte != prev->vm_pte)
+			continue;
+		if (mpnt->vm_ops != prev->vm_ops)
+			continue;
+		if (mpnt->vm_page_prot != prev->vm_page_prot ||
+		    mpnt->vm_flags != prev->vm_flags)
+			continue;
+		if (prev->vm_end != mpnt->vm_start)
+			continue;
+		/*
+		 * and if we have an inode, the offsets must be contiguous..
+		 */
+		if ((mpnt->vm_inode != NULL) || (mpnt->vm_flags & VM_SHM)) {
+			if (prev->vm_offset + prev->vm_end - prev->vm_start != mpnt->vm_offset)
+				continue;
+		}
+
+		/*
+		 * merge prev with mpnt and set up pointers so the new
+		 * big segment can possibly merge with the next one.
+		 * The old unused mpnt is freed.
+		 */
+		prev->vm_end = mpnt->vm_end;
+		prev->vm_next = mpnt->vm_next;
+		if (mpnt->vm_ops && mpnt->vm_ops->close) {
+			mpnt->vm_offset += mpnt->vm_end - mpnt->vm_start;
+			mpnt->vm_start = mpnt->vm_end;
+			mpnt->vm_ops->close(mpnt);
+		}
+		if (mpnt->vm_inode)
+			mpnt->vm_inode->i_count--;
+		kfree_s(mpnt, sizeof(*mpnt));
+		mpnt = prev;
+	}
+}
+
+/*
+ * Map memory not associated with any file into a process
+ * address space.  Adjacent memory is merged.
+ */
+static int anon_map(struct inode *ino, struct file * file, struct vm_area_struct * vma)
+{
+	if (zeromap_page_range(vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
+		return -ENOMEM;
+	return 0;
+}
diff --git a/arch/i386/mm/mprotect.c b/arch/i386/mm/mprotect.c
new file mode 100644
index 000000000..99252183b
--- /dev/null
+++ b/arch/i386/mm/mprotect.c
@@ -0,0 +1,230 @@
+/*
+ *	linux/mm/mprotect.c
+ *
+ *  (C) Copyright 1994 Linus Torvalds
+ */
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/errno.h>
+#include <linux/mman.h>
+#include <linux/string.h>
+#include <linux/malloc.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#define CHG_MASK (PAGE_MASK | PAGE_ACCESSED | PAGE_DIRTY | PAGE_PWT | PAGE_PCD)
+
+static void change_protection(unsigned long start, unsigned long end, int prot)
+{
+	unsigned long *page_table, *dir;
+	unsigned long page, offset;
+	int nr;
+
+	dir = PAGE_DIR_OFFSET(current->tss.cr3, start);
+	offset = (start >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
+	nr = (end - start) >> PAGE_SHIFT;
+	while (nr > 0) {
+		page = *dir;
+		dir++;
+		if (!(page & PAGE_PRESENT)) {
+			nr = nr - PTRS_PER_PAGE + offset;
+			offset = 0;
+			continue;
+		}
+		page_table = offset + (unsigned long *) (page & PAGE_MASK);
+		offset = PTRS_PER_PAGE - offset;
+		if (offset > nr)
+			offset = nr;
+		nr = nr - offset;
+		do {
+			page = *page_table;
+			if (page & PAGE_PRESENT)
+				*page_table = (page & CHG_MASK) | prot;
+			++page_table;
+		} while (--offset);
+	}
+	return;
+}
+
+static inline int mprotect_fixup_all(struct vm_area_struct * vma,
+	int newflags, int prot)
+{
+	vma->vm_flags = newflags;
+	vma->vm_page_prot = prot;
+	return 0;
+}
+
+static inline int mprotect_fixup_start(struct vm_area_struct * vma,
+	unsigned long end,
+	int newflags, int prot)
+{
+	struct vm_area_struct * n;
+
+	n = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+	if (!n)
+		return -ENOMEM;
+	*n = *vma;
+	vma->vm_start = end;
+	n->vm_end = end;
+	vma->vm_offset += vma->vm_start - n->vm_start;
+	n->vm_flags = newflags;
+	n->vm_page_prot = prot;
+	if (n->vm_inode)
+		n->vm_inode->i_count++;
+	if (n->vm_ops && n->vm_ops->open)
+		n->vm_ops->open(n);
+	insert_vm_struct(current, n);
+	return 0;
+}
+
+static inline int mprotect_fixup_end(struct vm_area_struct * vma,
+	unsigned long start,
+	int newflags, int prot)
+{
+	struct vm_area_struct * n;
+
+	n = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+	if (!n)
+		return -ENOMEM;
+	*n = *vma;
+	vma->vm_end = start;
+	n->vm_start = start;
+	n->vm_offset += n->vm_start - vma->vm_start;
+	n->vm_flags = newflags;
+	n->vm_page_prot = prot;
+	if (n->vm_inode)
+		n->vm_inode->i_count++;
+	if (n->vm_ops && n->vm_ops->open)
+		n->vm_ops->open(n);
+	insert_vm_struct(current, n);
+	return 0;
+}
+
+static inline int mprotect_fixup_middle(struct vm_area_struct * vma,
+	unsigned long start, unsigned long end,
+	int newflags, int prot)
+{
+	struct vm_area_struct * left, * right;
+
+	left = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+	if (!left)
+		return -ENOMEM;
+	right = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+	if (!right) {
+		kfree(left);
+		return -ENOMEM;
+	}
+	*left = *vma;
+	*right = *vma;
+	left->vm_end = start;
+	vma->vm_start = start;
+	vma->vm_end = end;
+	right->vm_start = end;
+	vma->vm_offset += vma->vm_start - left->vm_start;
+	right->vm_offset += right->vm_start - left->vm_start;
+	vma->vm_flags = newflags;
+	vma->vm_page_prot = prot;
+	if (vma->vm_inode)
+		vma->vm_inode->i_count += 2;
+	if (vma->vm_ops && vma->vm_ops->open) {
+		vma->vm_ops->open(left);
+		vma->vm_ops->open(right);
+	}
+	insert_vm_struct(current, left);
+	insert_vm_struct(current, right);
+	return 0;
+}
+
+static int mprotect_fixup(struct vm_area_struct * vma, 
+	unsigned long start, unsigned long end, unsigned int newflags)
+{
+	int prot, error;
+
+	if (newflags == vma->vm_flags)
+		return 0;
+	prot = PAGE_PRESENT;
+	if (newflags & (VM_READ | VM_EXEC))
+		prot |= PAGE_READONLY;
+	if (newflags & VM_WRITE)
+		if (newflags & VM_SHARED)
+			prot |= PAGE_SHARED;
+		else
+			prot |= PAGE_COPY;
+
+	if (start == vma->vm_start)
+		if (end == vma->vm_end)
+			error = mprotect_fixup_all(vma, newflags, prot);
+		else
+			error = mprotect_fixup_start(vma, end, newflags, prot);
+	else if (end == vma->vm_end)
+		error = mprotect_fixup_end(vma, start, newflags, prot);
+	else
+		error = mprotect_fixup_middle(vma, start, end, newflags, prot);
+
+	if (error)
+		return error;
+
+	change_protection(start, end, prot);
+	return 0;
+}
+
+asmlinkage int sys_mprotect(unsigned long start, size_t len, unsigned long prot)
+{
+	unsigned long end, tmp;
+	struct vm_area_struct * vma, * next;
+	int error;
+
+	if (start & ~PAGE_MASK)
+		return -EINVAL;
+	len = (len + ~PAGE_MASK) & PAGE_MASK;
+	end = start + len;
+	if (end < start)
+		return -EINVAL;
+	if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC))
+		return -EINVAL;
+	if (end == start)
+		return 0;
+	for (vma = current->mm->mmap ; ; vma = vma->vm_next) {
+		if (!vma)
+			return -EFAULT;
+		if (vma->vm_end > start)
+			break;
+	}
+	if (vma->vm_start > start)
+		return -EFAULT;
+
+	for ( ; ; ) {
+		unsigned int newflags;
+
+		/* Here we know that  vma->vm_start <= start < vma->vm_end. */
+
+		newflags = prot | (vma->vm_flags & ~(PROT_READ | PROT_WRITE | PROT_EXEC));
+		if ((newflags & ~(newflags >> 4)) & 0xf) {
+			error = -EACCES;
+			break;
+		}
+
+		if (vma->vm_end >= end) {
+			error = mprotect_fixup(vma, start, end, newflags);
+			break;
+		}
+
+		tmp = vma->vm_end;
+		next = vma->vm_next;
+		error = mprotect_fixup(vma, start, tmp, newflags);
+		if (error)
+			break;
+		start = tmp;
+		vma = next;
+		if (!vma || vma->vm_start != start) {
+			error = -EFAULT;
+			break;
+		}
+	}
+	merge_segments(current->mm->mmap);
+	return error;
+}
diff --git a/arch/i386/mm/swap.c b/arch/i386/mm/swap.c
new file mode 100644
index 000000000..f7a1f54b3
--- /dev/null
+++ b/arch/i386/mm/swap.c
@@ -0,0 +1,1017 @@
+/*
+ *  linux/mm/swap.c
+ *
+ *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+/*
+ * This file should contain most things doing the swapping from/to disk.
+ * Started 18.12.91
+ */
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/head.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/fs.h>
+
+#include <asm/system.h> /* for cli()/sti() */
+#include <asm/bitops.h>
+
+#define MAX_SWAPFILES 8
+
+#define SWP_USED	1
+#define SWP_WRITEOK	3
+
+#define SWP_TYPE(entry) (((entry) & 0xfe) >> 1)
+#define SWP_OFFSET(entry) ((entry) >> PAGE_SHIFT)
+#define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << PAGE_SHIFT))
+
+int min_free_pages = 20;
+
+static int nr_swapfiles = 0;
+static struct wait_queue * lock_queue = NULL;
+
+static struct swap_info_struct {
+	unsigned long flags;
+	struct inode * swap_file;
+	unsigned int swap_device;
+	unsigned char * swap_map;
+	unsigned char * swap_lockmap;
+	int pages;
+	int lowest_bit;
+	int highest_bit;
+	unsigned long max;
+} swap_info[MAX_SWAPFILES];
+
+extern int shm_swap (int);
+
+unsigned long *swap_cache;
+
+#ifdef SWAP_CACHE_INFO
+unsigned long swap_cache_add_total = 0;
+unsigned long swap_cache_add_success = 0;
+unsigned long swap_cache_del_total = 0;
+unsigned long swap_cache_del_success = 0;
+unsigned long swap_cache_find_total = 0;
+unsigned long swap_cache_find_success = 0;
+
+extern inline void show_swap_cache_info(void)
+{
+	printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
+		swap_cache_add_total, swap_cache_add_success, 
+		swap_cache_del_total, swap_cache_del_success,
+		swap_cache_find_total, swap_cache_find_success);
+}
+#endif
+
+extern inline int add_to_swap_cache(unsigned long addr, unsigned long entry)
+{
+	struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
+
+#ifdef SWAP_CACHE_INFO
+	swap_cache_add_total++;
+#endif
+	if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { 
+		__asm__ __volatile__ (
+			"xchgl %0,%1\n"
+			: "=m" (swap_cache[addr >> PAGE_SHIFT]),
+			  "=r" (entry)
+			: "0" (swap_cache[addr >> PAGE_SHIFT]),
+			  "1" (entry));
+			if (entry)  {
+			  printk("swap_cache: replacing non-NULL entry\n");
+			}
+#ifdef SWAP_CACHE_INFO
+		swap_cache_add_success++;
+#endif
+		return 1;
+	}
+	return 0;
+}
+
+extern inline int add_to_swap_cache(unsigned long addr, unsigned long entry)
+{
+	struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
+	
+#ifdef SWAP_CACHE_INFO
+	swap_cache_add_total++;
+#endif
+	if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) { 
+		__asm__ __volatile__ (
+				      "xchgl %0,%1\n"
+				      : "=m" (swap_cache[addr >> PAGE_SHIFT]),
+				       "=r" (entry)
+				      : "0" (swap_cache[addr >> PAGE_SHIFT]),
+				       "1" (entry)
+				      );
+		if (entry)  {
+			printk("swap_cache: replacing non-NULL entry\n");
+		}
+#ifdef SWAP_CACHE_INFO
+		swap_cache_add_success++;
+#endif
+		return 1;
+	}
+	return 0;
+}
+
+static unsigned long init_swap_cache(unsigned long mem_start,
+	unsigned long mem_end)
+{
+	unsigned long swap_cache_size;
+
+	mem_start = (mem_start + 15) & ~15;
+	swap_cache = (unsigned long *) mem_start;
+	swap_cache_size = mem_end >> PAGE_SHIFT;
+	memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
+	return (unsigned long) (swap_cache + swap_cache_size);
+}
+
+void rw_swap_page(int rw, unsigned long entry, char * buf)
+{
+	unsigned long type, offset;
+	struct swap_info_struct * p;
+
+	type = SWP_TYPE(entry);
+	if (type >= nr_swapfiles) {
+		printk("Internal error: bad swap-device\n");
+		return;
+	}
+	p = &swap_info[type];
+	offset = SWP_OFFSET(entry);
+	if (offset >= p->max) {
+		printk("rw_swap_page: weirdness\n");
+		return;
+	}
+	if (!(p->flags & SWP_USED)) {
+		printk("Trying to swap to unused swap-device\n");
+		return;
+	}
+	while (set_bit(offset,p->swap_lockmap))
+		sleep_on(&lock_queue);
+	if (rw == READ)
+		kstat.pswpin++;
+	else
+		kstat.pswpout++;
+	if (p->swap_device) {
+		ll_rw_page(rw,p->swap_device,offset,buf);
+	} else if (p->swap_file) {
+		struct inode *swapf = p->swap_file;
+		unsigned int zones[8];
+		int i;
+		if (swapf->i_op->bmap == NULL
+			&& swapf->i_op->smap != NULL){
+			/*
+				With MsDOS, we use msdos_smap which return
+				a sector number (not a cluster or block number).
+				It is a patch to enable the UMSDOS project.
+				Other people are working on better solution.
+
+				It sounds like ll_rw_swap_file defined
+				it operation size (sector size) based on
+				PAGE_SIZE and the number of block to read.
+				So using bmap or smap should work even if
+				smap will require more blocks.
+			*/
+			int j;
+			unsigned int block = offset << 3;
+
+			for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
+				if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
+					printk("rw_swap_page: bad swap file\n");
+					return;
+				}
+			}
+		}else{
+			int j;
+			unsigned int block = offset
+				<< (12 - swapf->i_sb->s_blocksize_bits);
+
+			for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
+				if (!(zones[i] = bmap(swapf,block++))) {
+					printk("rw_swap_page: bad swap file\n");
+					return;
+				}
+		}
+		ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
+	} else
+		printk("re_swap_page: no swap file or device\n");
+	if (offset && !clear_bit(offset,p->swap_lockmap))
+		printk("rw_swap_page: lock already cleared\n");
+	wake_up(&lock_queue);
+}
+
+unsigned int get_swap_page(void)
+{
+	struct swap_info_struct * p;
+	unsigned int offset, type;
+
+	p = swap_info;
+	for (type = 0 ; type < nr_swapfiles ; type++,p++) {
+		if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
+			continue;
+		for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
+			if (p->swap_map[offset])
+				continue;
+			p->swap_map[offset] = 1;
+			nr_swap_pages--;
+			if (offset == p->highest_bit)
+				p->highest_bit--;
+			p->lowest_bit = offset;
+			return SWP_ENTRY(type,offset);
+		}
+	}
+	return 0;
+}
+
+unsigned long swap_duplicate(unsigned long entry)
+{
+	struct swap_info_struct * p;
+	unsigned long offset, type;
+
+	if (!entry)
+		return 0;
+	offset = SWP_OFFSET(entry);
+	type = SWP_TYPE(entry);
+	if (type == SHM_SWP_TYPE)
+		return entry;
+	if (type >= nr_swapfiles) {
+		printk("Trying to duplicate nonexistent swap-page\n");
+		return 0;
+	}
+	p = type + swap_info;
+	if (offset >= p->max) {
+		printk("swap_duplicate: weirdness\n");
+		return 0;
+	}
+	if (!p->swap_map[offset]) {
+		printk("swap_duplicate: trying to duplicate unused page\n");
+		return 0;
+	}
+	p->swap_map[offset]++;
+	return entry;
+}
+
+void swap_free(unsigned long entry)
+{
+	struct swap_info_struct * p;
+	unsigned long offset, type;
+
+	if (!entry)
+		return;
+	type = SWP_TYPE(entry);
+	if (type == SHM_SWP_TYPE)
+		return;
+	if (type >= nr_swapfiles) {
+		printk("Trying to free nonexistent swap-page\n");
+		return;
+	}
+	p = & swap_info[type];
+	offset = SWP_OFFSET(entry);
+	if (offset >= p->max) {
+		printk("swap_free: weirdness\n");
+		return;
+	}
+	if (!(p->flags & SWP_USED)) {
+		printk("Trying to free swap from unused swap-device\n");
+		return;
+	}
+	while (set_bit(offset,p->swap_lockmap))
+		sleep_on(&lock_queue);
+	if (offset < p->lowest_bit)
+		p->lowest_bit = offset;
+	if (offset > p->highest_bit)
+		p->highest_bit = offset;
+	if (!p->swap_map[offset])
+		printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
+	else
+		if (!--p->swap_map[offset])
+			nr_swap_pages++;
+	if (!clear_bit(offset,p->swap_lockmap))
+		printk("swap_free: lock already cleared\n");
+	wake_up(&lock_queue);
+}
+
+unsigned long swap_in(unsigned long entry)
+{
+	unsigned long page;
+
+	if (!(page = get_free_page(GFP_KERNEL))) {
+		oom(current);
+		return BAD_PAGE;
+	}
+	read_swap_page(entry, (char *) page);
+	if (add_to_swap_cache(page, entry))
+		return page | PAGE_PRESENT;
+  	swap_free(entry);
+	return page | PAGE_DIRTY | PAGE_PRESENT;
+}
+
+static inline int try_to_swap_out(unsigned long * table_ptr)
+{
+	unsigned long page, entry;
+
+	page = *table_ptr;
+	if (!(PAGE_PRESENT & page))
+		return 0;
+	if (page >= high_memory)
+		return 0;
+	if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
+		return 0;
+	
+	if ((PAGE_DIRTY & page) && delete_from_swap_cache(page))  {
+		*table_ptr &= ~PAGE_ACCESSED;
+		return 0;
+	}
+	if (PAGE_ACCESSED & page) {
+		*table_ptr &= ~PAGE_ACCESSED;
+		return 0;
+	}
+	if (PAGE_DIRTY & page) {
+		page &= PAGE_MASK;
+		if (mem_map[MAP_NR(page)] != 1)
+			return 0;
+		if (!(entry = get_swap_page()))
+			return 0;
+		*table_ptr = entry;
+		invalidate();
+		write_swap_page(entry, (char *) page);
+		free_page(page);
+		return 1;
+	}
+        if ((entry = find_in_swap_cache(page)))  {
+		if (mem_map[MAP_NR(page)] != 1) {
+			*table_ptr |= PAGE_DIRTY;
+			printk("Aiee.. duplicated cached swap-cache entry\n");
+			return 0;
+		}
+		*table_ptr = entry;
+		invalidate();
+		free_page(page & PAGE_MASK);
+		return 1;
+	} 
+	page &= PAGE_MASK;
+	*table_ptr = 0;
+	invalidate();
+	free_page(page);
+	return 1 + mem_map[MAP_NR(page)];
+}
+
+/*
+ * A new implementation of swap_out().  We do not swap complete processes,
+ * but only a small number of blocks, before we continue with the next
+ * process.  The number of blocks actually swapped is determined on the
+ * number of page faults, that this process actually had in the last time,
+ * so we won't swap heavily used processes all the time ...
+ *
+ * Note: the priority argument is a hint on much CPU to waste with the
+ *       swap block search, not a hint, of how much blocks to swap with
+ *       each process.
+ *
+ * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
+ */
+
+/*
+ * These are the minimum and maximum number of pages to swap from one process,
+ * before proceeding to the next:
+ */
+#define SWAP_MIN	4
+#define SWAP_MAX	32
+
+/*
+ * The actual number of pages to swap is determined as:
+ * SWAP_RATIO / (number of recent major page faults)
+ */
+#define SWAP_RATIO	128
+
+static int swap_out_process(struct task_struct * p)
+{
+	unsigned long address;
+	unsigned long offset;
+	unsigned long *pgdir;
+	unsigned long pg_table;
+
+	/*
+	 * Go through process' page directory.
+	 */
+	address = p->mm->swap_address;
+	pgdir = (address >> PGDIR_SHIFT) + (unsigned long *) p->tss.cr3;
+	offset = address & ~PGDIR_MASK;
+	address &= PGDIR_MASK;
+	for ( ; address < TASK_SIZE ;
+	pgdir++, address = address + PGDIR_SIZE, offset = 0) {
+		pg_table = *pgdir;
+		if (pg_table >= high_memory)
+			continue;
+		if (mem_map[MAP_NR(pg_table)] & MAP_PAGE_RESERVED)
+			continue;
+		if (!(PAGE_PRESENT & pg_table)) {
+			printk("swap_out_process (%s): bad page-table at vm %08lx: %08lx\n",
+					p->comm, address + offset, pg_table);
+			*pgdir = 0;
+			continue;
+		}
+		pg_table &= 0xfffff000;
+
+		/*
+		 * Go through this page table.
+		 */
+		for( ; offset < ~PGDIR_MASK ; offset += PAGE_SIZE) {
+			switch(try_to_swap_out((unsigned long *) (pg_table + (offset >> 10)))) {
+				case 0:
+					break;
+
+				case 1:
+					p->mm->rss--;
+					/* continue with the following page the next time */
+					p->mm->swap_address = address + offset + PAGE_SIZE;
+					return 1;
+
+				default:
+					p->mm->rss--;
+					break;
+			}
+		}
+	}
+	/*
+	 * Finish work with this process, if we reached the end of the page
+	 * directory.  Mark restart from the beginning the next time.
+	 */
+	p->mm->swap_address = 0;
+	return 0;
+}
+
+static int swap_out(unsigned int priority)
+{
+	static int swap_task;
+	int loop;
+	int counter = NR_TASKS * 2 >> priority;
+	struct task_struct *p;
+
+	counter = NR_TASKS * 2 >> priority;
+	for(; counter >= 0; counter--, swap_task++) {
+		/*
+		 * Check that swap_task is suitable for swapping.  If not, look for
+		 * the next suitable process.
+		 */
+		loop = 0;
+		while(1) {
+			if (swap_task >= NR_TASKS) {
+				swap_task = 1;
+				if (loop)
+					/* all processes are unswappable or already swapped out */
+					return 0;
+				loop = 1;
+			}
+
+			p = task[swap_task];
+			if (p && p->mm->swappable && p->mm->rss)
+				break;
+
+			swap_task++;
+		}
+
+		/*
+		 * Determine the number of pages to swap from this process.
+		 */
+		if (!p->mm->swap_cnt) {
+			p->mm->dec_flt = (p->mm->dec_flt * 3) / 4 + p->mm->maj_flt - p->mm->old_maj_flt;
+			p->mm->old_maj_flt = p->mm->maj_flt;
+
+			if (p->mm->dec_flt >= SWAP_RATIO / SWAP_MIN) {
+				p->mm->dec_flt = SWAP_RATIO / SWAP_MIN;
+				p->mm->swap_cnt = SWAP_MIN;
+			} else if (p->mm->dec_flt <= SWAP_RATIO / SWAP_MAX)
+				p->mm->swap_cnt = SWAP_MAX;
+			else
+				p->mm->swap_cnt = SWAP_RATIO / p->mm->dec_flt;
+		}
+		if (swap_out_process(p)) {
+			if ((--p->mm->swap_cnt) == 0)
+				swap_task++;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int try_to_free_page(int priority)
+{
+	int i=6;
+
+	while (i--) {
+		if (priority != GFP_NOBUFFER && shrink_buffers(i))
+			return 1;
+		if (shm_swap(i))
+			return 1;
+		if (swap_out(i))
+			return 1;
+	}
+	return 0;
+}
+
+static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
+{
+	entry->prev = head;
+	entry->next = head->next;
+	entry->next->prev = entry;
+	head->next = entry;
+}
+
+static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
+{
+	entry->next->prev = entry->prev;
+	entry->prev->next = entry->next;
+}
+
+/*
+ * Free_page() adds the page to the free lists. This is optimized for
+ * fast normal cases (no error jumps taken normally).
+ *
+ * The way to optimize jumps for gcc-2.2.2 is to:
+ *  - select the "normal" case and put it inside the if () { XXX }
+ *  - no else-statements if you can avoid them
+ *
+ * With the above two rules, you get a straight-line execution path
+ * for the normal case, giving better asm-code.
+ */
+
+/*
+ * Buddy system. Hairy. You really aren't expected to understand this
+ */
+static inline void free_pages_ok(unsigned long addr, unsigned long order)
+{
+	unsigned long index = addr >> (PAGE_SHIFT + 1 + order);
+	unsigned long mask = PAGE_MASK << order;
+
+	addr &= mask;
+	nr_free_pages += 1 << order;
+	while (order < NR_MEM_LISTS-1) {
+		if (!change_bit(index, free_area_map[order]))
+			break;
+		remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
+		order++;
+		index >>= 1;
+		mask <<= 1;
+		addr &= mask;
+	}
+	add_mem_queue(free_area_list+order, (struct mem_list *) addr);
+}
+
+static inline void check_free_buffers(unsigned long addr)
+{
+	struct buffer_head * bh;
+
+	bh = buffer_pages[MAP_NR(addr)];
+	if (bh) {
+		struct buffer_head *tmp = bh;
+		do {
+			if (tmp->b_list == BUF_SHARED && tmp->b_dev != 0xffff)
+				refile_buffer(tmp);
+			tmp = tmp->b_this_page;
+		} while (tmp != bh);
+	}
+}
+
+void free_pages(unsigned long addr, unsigned long order)
+{
+	if (addr < high_memory) {
+		unsigned long flag;
+		unsigned short * map = mem_map + MAP_NR(addr);
+		if (*map) {
+			if (!(*map & MAP_PAGE_RESERVED)) {
+				save_flags(flag);
+				cli();
+				if (!--*map)  {
+					free_pages_ok(addr, order);
+					delete_from_swap_cache(addr);
+				}
+				restore_flags(flag);
+				if (*map == 1)
+					check_free_buffers(addr);
+			}
+			return;
+		}
+		printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
+		printk("PC = %08lx\n",*(((unsigned long *)&addr)-1));
+		return;
+	}
+}
+
+/*
+ * Some ugly macros to speed up __get_free_pages()..
+ */
+#define RMQUEUE(order) \
+do { struct mem_list * queue = free_area_list+order; \
+     unsigned long new_order = order; \
+	do { struct mem_list *next = queue->next; \
+		if (queue != next) { \
+			(queue->next = next->next)->prev = queue; \
+			mark_used((unsigned long) next, new_order); \
+			nr_free_pages -= 1 << order; \
+			restore_flags(flags); \
+			EXPAND(next, order, new_order); \
+			return (unsigned long) next; \
+		} new_order++; queue++; \
+	} while (new_order < NR_MEM_LISTS); \
+} while (0)
+
+static inline int mark_used(unsigned long addr, unsigned long order)
+{
+	return change_bit(addr >> (PAGE_SHIFT+1+order), free_area_map[order]);
+}
+
+#define EXPAND(addr,low,high) \
+do { unsigned long size = PAGE_SIZE << high; \
+	while (high > low) { \
+		high--; size >>= 1; cli(); \
+		add_mem_queue(free_area_list+high, addr); \
+		mark_used((unsigned long) addr, high); \
+		restore_flags(flags); \
+		addr = (struct mem_list *) (size + (unsigned long) addr); \
+	} mem_map[MAP_NR((unsigned long) addr)] = 1; \
+} while (0)
+
+unsigned long __get_free_pages(int priority, unsigned long order)
+{
+	unsigned long flags;
+	int reserved_pages;
+
+	if (intr_count && priority != GFP_ATOMIC) {
+		static int count = 0;
+		if (++count < 5) {
+			printk("gfp called nonatomically from interrupt %p\n",
+				__builtin_return_address(0));
+			priority = GFP_ATOMIC;
+		}
+	}
+	reserved_pages = 5;
+	if (priority != GFP_NFS)
+		reserved_pages = min_free_pages;
+	save_flags(flags);
+repeat:
+	cli();
+	if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
+		RMQUEUE(order);
+		restore_flags(flags);
+		return 0;
+	}
+	restore_flags(flags);
+	if (priority != GFP_BUFFER && try_to_free_page(priority))
+		goto repeat;
+	return 0;
+}
+
+/*
+ * Yes, I know this is ugly. Don't tell me.
+ */
+unsigned long __get_dma_pages(int priority, unsigned long order)
+{
+	unsigned long list = 0; 
+	unsigned long result;
+	unsigned long limit = 16*1024*1024;
+
+	/* if (EISA_bus) limit = ~0UL; */
+	if (priority != GFP_ATOMIC)
+		priority = GFP_BUFFER;
+	for (;;) {
+		result = __get_free_pages(priority, order);
+		if (result < limit) /* covers failure as well */
+			break;
+		*(unsigned long *) result = list;
+		list = result;
+	}
+	while (list) {
+		unsigned long tmp = list;
+		list = *(unsigned long *) list;
+		free_pages(tmp, order);
+	}
+	return result;
+}
+
+/*
+ * Show free area list (used inside shift_scroll-lock stuff)
+ * We also calculate the percentage fragmentation. We do this by counting the
+ * memory on each free list with the exception of the first item on the list.
+ */
+void show_free_areas(void)
+{
+ 	unsigned long order, flags;
+ 	unsigned long total = 0;
+
+	printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
+	save_flags(flags);
+	cli();
+ 	for (order=0 ; order < NR_MEM_LISTS; order++) {
+		struct mem_list * tmp;
+		unsigned long nr = 0;
+		for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
+			nr ++;
+		}
+		total += nr * (4 << order);
+		printk("%lu*%ukB ", nr, 4 << order);
+	}
+	restore_flags(flags);
+	printk("= %lukB)\n", total);
+#ifdef SWAP_CACHE_INFO
+	show_swap_cache_info();
+#endif	
+}
+
+/*
+ * Trying to stop swapping from a file is fraught with races, so
+ * we repeat quite a bit here when we have to pause. swapoff()
+ * isn't exactly timing-critical, so who cares?
+ */
+static int try_to_unuse(unsigned int type)
+{
+	int nr, pgt, pg;
+	unsigned long page, *ppage;
+	unsigned long tmp = 0;
+	struct task_struct *p;
+
+	nr = 0;
+	
+/*
+ * When we have to sleep, we restart the whole algorithm from the same
+ * task we stopped in. That at least rids us of all races.
+ */
+repeat:
+	for (; nr < NR_TASKS ; nr++) {
+		p = task[nr];
+		if (!p)
+			continue;
+		for (pgt = 0 ; pgt < PTRS_PER_PAGE ; pgt++) {
+			ppage = pgt + ((unsigned long *) p->tss.cr3);
+			page = *ppage;
+			if (!page)
+				continue;
+			if (!(page & PAGE_PRESENT) || (page >= high_memory))
+				continue;
+			if (mem_map[MAP_NR(page)] & MAP_PAGE_RESERVED)
+				continue;
+			ppage = (unsigned long *) (page & PAGE_MASK);	
+			for (pg = 0 ; pg < PTRS_PER_PAGE ; pg++,ppage++) {
+				page = *ppage;
+				if (!page)
+					continue;
+				if (page & PAGE_PRESENT) {
+					if (!(page = in_swap_cache(page)))
+						continue;
+					if (SWP_TYPE(page) != type)
+						continue;
+					*ppage |= PAGE_DIRTY;
+					delete_from_swap_cache(*ppage);
+					continue;
+				}
+				if (SWP_TYPE(page) != type)
+					continue;
+				if (!tmp) {
+					if (!(tmp = __get_free_page(GFP_KERNEL)))
+						return -ENOMEM;
+					goto repeat;
+				}
+				read_swap_page(page, (char *) tmp);
+				if (*ppage == page) {
+					*ppage = tmp | (PAGE_DIRTY | PAGE_PRIVATE);
+					++p->mm->rss;
+					swap_free(page);
+					tmp = 0;
+				}
+				goto repeat;
+			}
+		}
+	}
+	free_page(tmp);
+	return 0;
+}
+
+asmlinkage int sys_swapoff(const char * specialfile)
+{
+	struct swap_info_struct * p;
+	struct inode * inode;
+	unsigned int type;
+	int i;
+
+	if (!suser())
+		return -EPERM;
+	i = namei(specialfile,&inode);
+	if (i)
+		return i;
+	p = swap_info;
+	for (type = 0 ; type < nr_swapfiles ; type++,p++) {
+		if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK)
+			continue;
+		if (p->swap_file) {
+			if (p->swap_file == inode)
+				break;
+		} else {
+			if (!S_ISBLK(inode->i_mode))
+				continue;
+			if (p->swap_device == inode->i_rdev)
+				break;
+		}
+	}
+	iput(inode);
+	if (type >= nr_swapfiles)
+		return -EINVAL;
+	p->flags = SWP_USED;
+	i = try_to_unuse(type);
+	if (i) {
+		p->flags = SWP_WRITEOK;
+		return i;
+	}
+	nr_swap_pages -= p->pages;
+	iput(p->swap_file);
+	p->swap_file = NULL;
+	p->swap_device = 0;
+	vfree(p->swap_map);
+	p->swap_map = NULL;
+	free_page((long) p->swap_lockmap);
+	p->swap_lockmap = NULL;
+	p->flags = 0;
+	return 0;
+}
+
+/*
+ * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
+ *
+ * The swapon system call
+ */
+asmlinkage int sys_swapon(const char * specialfile)
+{
+	struct swap_info_struct * p;
+	struct inode * swap_inode;
+	unsigned int type;
+	int i,j;
+	int error;
+
+	if (!suser())
+		return -EPERM;
+	p = swap_info;
+	for (type = 0 ; type < nr_swapfiles ; type++,p++)
+		if (!(p->flags & SWP_USED))
+			break;
+	if (type >= MAX_SWAPFILES)
+		return -EPERM;
+	if (type >= nr_swapfiles)
+		nr_swapfiles = type+1;
+	p->flags = SWP_USED;
+	p->swap_file = NULL;
+	p->swap_device = 0;
+	p->swap_map = NULL;
+	p->swap_lockmap = NULL;
+	p->lowest_bit = 0;
+	p->highest_bit = 0;
+	p->max = 1;
+	error = namei(specialfile,&swap_inode);
+	if (error)
+		goto bad_swap;
+	p->swap_file = swap_inode;
+	error = -EBUSY;
+	if (swap_inode->i_count != 1)
+		goto bad_swap;
+	error = -EINVAL;
+	if (S_ISBLK(swap_inode->i_mode)) {
+		p->swap_device = swap_inode->i_rdev;
+		p->swap_file = NULL;
+		iput(swap_inode);
+		error = -ENODEV;
+		if (!p->swap_device)
+			goto bad_swap;
+		error = -EBUSY;
+		for (i = 0 ; i < nr_swapfiles ; i++) {
+			if (i == type)
+				continue;
+			if (p->swap_device == swap_info[i].swap_device)
+				goto bad_swap;
+		}
+	} else if (!S_ISREG(swap_inode->i_mode))
+		goto bad_swap;
+	p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
+	if (!p->swap_lockmap) {
+		printk("Unable to start swapping: out of memory :-)\n");
+		error = -ENOMEM;
+		goto bad_swap;
+	}
+	read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
+	if (memcmp("SWAP-SPACE",p->swap_lockmap+4086,10)) {
+		printk("Unable to find swap-space signature\n");
+		error = -EINVAL;
+		goto bad_swap;
+	}
+	memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
+	j = 0;
+	p->lowest_bit = 0;
+	p->highest_bit = 0;
+	for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
+		if (test_bit(i,p->swap_lockmap)) {
+			if (!p->lowest_bit)
+				p->lowest_bit = i;
+			p->highest_bit = i;
+			p->max = i+1;
+			j++;
+		}
+	}
+	if (!j) {
+		printk("Empty swap-file\n");
+		error = -EINVAL;
+		goto bad_swap;
+	}
+	p->swap_map = (unsigned char *) vmalloc(p->max);
+	if (!p->swap_map) {
+		error = -ENOMEM;
+		goto bad_swap;
+	}
+	for (i = 1 ; i < p->max ; i++) {
+		if (test_bit(i,p->swap_lockmap))
+			p->swap_map[i] = 0;
+		else
+			p->swap_map[i] = 0x80;
+	}
+	p->swap_map[0] = 0x80;
+	memset(p->swap_lockmap,0,PAGE_SIZE);
+	p->flags = SWP_WRITEOK;
+	p->pages = j;
+	nr_swap_pages += j;
+	printk("Adding Swap: %dk swap-space\n",j<<2);
+	return 0;
+bad_swap:
+	free_page((long) p->swap_lockmap);
+	vfree(p->swap_map);
+	iput(p->swap_file);
+	p->swap_device = 0;
+	p->swap_file = NULL;
+	p->swap_map = NULL;
+	p->swap_lockmap = NULL;
+	p->flags = 0;
+	return error;
+}
+
+void si_swapinfo(struct sysinfo *val)
+{
+	unsigned int i, j;
+
+	val->freeswap = val->totalswap = 0;
+	for (i = 0; i < nr_swapfiles; i++) {
+		if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
+			continue;
+		for (j = 0; j < swap_info[i].max; ++j)
+			switch (swap_info[i].swap_map[j]) {
+				case 128:
+					continue;
+				case 0:
+					++val->freeswap;
+				default:
+					++val->totalswap;
+			}
+	}
+	val->freeswap <<= PAGE_SHIFT;
+	val->totalswap <<= PAGE_SHIFT;
+	return;
+}
+
+/*
+ * set up the free-area data structures:
+ *   - mark all pages MAP_PAGE_RESERVED
+ *   - mark all memory queues empty
+ *   - clear the memory bitmaps
+ */
+unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
+{
+	unsigned short * p;
+	unsigned long mask = PAGE_MASK;
+	int i;
+
+	/*
+	 * select nr of pages we try to keep free for important stuff
+	 * with a minimum of 16 pages. This is totally arbitrary
+	 */
+	i = end_mem >> (PAGE_SHIFT+6);
+	if (i < 16)
+		i = 16;
+	min_free_pages = i;
+	start_mem = init_swap_cache(start_mem, end_mem);
+	mem_map = (unsigned short *) start_mem;
+	p = mem_map + MAP_NR(end_mem);
+	start_mem = (unsigned long) p;
+	while (p > mem_map)
+		*--p = MAP_PAGE_RESERVED;
+
+	for (i = 0 ; i < NR_MEM_LISTS ; i++, mask <<= 1) {
+		unsigned long bitmap_size;
+		free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
+		end_mem = (end_mem + ~mask) & mask;
+		bitmap_size = end_mem >> (PAGE_SHIFT + i);
+		bitmap_size = (bitmap_size + 7) >> 3;
+		free_area_map[i] = (unsigned char *) start_mem;
+		memset((void *) start_mem, 0, bitmap_size);
+		start_mem += bitmap_size;
+	}
+	return start_mem;
+}
diff --git a/arch/i386/mm/vmalloc.c b/arch/i386/mm/vmalloc.c
new file mode 100644
index 000000000..0dbd16d54
--- /dev/null
+++ b/arch/i386/mm/vmalloc.c
@@ -0,0 +1,202 @@
+/*
+ *  linux/mm/vmalloc.c
+ *
+ *  Copyright (C) 1993  Linus Torvalds
+ */
+
+#include <asm/system.h>
+#include <linux/config.h>
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/head.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/malloc.h>
+#include <asm/segment.h>
+
+struct vm_struct {
+	unsigned long flags;
+	void * addr;
+	unsigned long size;
+	struct vm_struct * next;
+};
+
+static struct vm_struct * vmlist = NULL;
+
+/* Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET	(8*1024*1024)
+
+static inline void set_pgdir(unsigned long dindex, unsigned long value)
+{
+	struct task_struct * p;
+
+	p = &init_task;
+	do {
+		((unsigned long *) p->tss.cr3)[dindex] = value;
+		p = p->next_task;
+	} while (p != &init_task);
+}
+
+static int free_area_pages(unsigned long dindex, unsigned long index, unsigned long nr)
+{
+	unsigned long page, *pte;
+
+	if (!(PAGE_PRESENT & (page = swapper_pg_dir[dindex])))
+		return 0;
+	page &= PAGE_MASK;
+	pte = index + (unsigned long *) page;
+	do {
+		unsigned long pg = *pte;
+		*pte = 0;
+		if (pg & PAGE_PRESENT)
+			free_page(pg);
+		pte++;
+	} while (--nr);
+	pte = (unsigned long *) page;
+	for (nr = 0 ; nr < 1024 ; nr++, pte++)
+		if (*pte)
+			return 0;
+	set_pgdir(dindex,0);
+	mem_map[MAP_NR(page)] = 1;
+	free_page(page);
+	invalidate();
+	return 0;
+}
+
+static int alloc_area_pages(unsigned long dindex, unsigned long index, unsigned long nr)
+{
+	unsigned long page, *pte;
+
+	page = swapper_pg_dir[dindex];
+	if (!page) {
+		page = get_free_page(GFP_KERNEL);
+		if (!page)
+			return -ENOMEM;
+		if (swapper_pg_dir[dindex]) {
+			free_page(page);
+			page = swapper_pg_dir[dindex];
+		} else {
+			mem_map[MAP_NR(page)] = MAP_PAGE_RESERVED;
+			set_pgdir(dindex, page | PAGE_SHARED);
+		}
+	}
+	page &= PAGE_MASK;
+	pte = index + (unsigned long *) page;
+	*pte = PAGE_SHARED;		/* remove a race with vfree() */
+	do {
+		unsigned long pg = get_free_page(GFP_KERNEL);
+
+		if (!pg)
+			return -ENOMEM;
+		*pte = pg | PAGE_SHARED;
+		pte++;
+	} while (--nr);
+	invalidate();
+	return 0;
+}
+
+static int do_area(void * addr, unsigned long size,
+	int (*area_fn)(unsigned long,unsigned long,unsigned long))
+{
+	unsigned long nr, dindex, index;
+
+	nr = size >> PAGE_SHIFT;
+	dindex = (TASK_SIZE + (unsigned long) addr) >> 22;
+	index = (((unsigned long) addr) >> PAGE_SHIFT) & (PTRS_PER_PAGE-1);
+	while (nr > 0) {
+		unsigned long i = PTRS_PER_PAGE - index;
+
+		if (i > nr)
+			i = nr;
+		nr -= i;
+		if (area_fn(dindex, index, i))
+			return -1;
+		index = 0;
+		dindex++;
+	}
+	return 0;
+}
+
+void vfree(void * addr)
+{
+	struct vm_struct **p, *tmp;
+
+	if (!addr)
+		return;
+	if ((PAGE_SIZE-1) & (unsigned long) addr) {
+		printk("Trying to vfree() bad address (%p)\n", addr);
+		return;
+	}
+	for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
+		if (tmp->addr == addr) {
+			*p = tmp->next;
+			do_area(tmp->addr, tmp->size, free_area_pages);
+			kfree(tmp);
+			return;
+		}
+	}
+	printk("Trying to vfree() nonexistent vm area (%p)\n", addr);
+}
+
+void * vmalloc(unsigned long size)
+{
+	void * addr;
+	struct vm_struct **p, *tmp, *area;
+
+	size = PAGE_ALIGN(size);
+	if (!size || size > high_memory)
+		return NULL;
+	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area)
+		return NULL;
+	addr = (void *) ((high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1));
+	area->size = size + PAGE_SIZE;
+	area->next = NULL;
+	for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
+		if (size + (unsigned long) addr < (unsigned long) tmp->addr)
+			break;
+		addr = (void *) (tmp->size + (unsigned long) tmp->addr);
+	}
+	area->addr = addr;
+	area->next = *p;
+	*p = area;
+	if (do_area(addr, size, alloc_area_pages)) {
+		vfree(addr);
+		return NULL;
+	}
+	return addr;
+}
+
+int vread(char *buf, char *addr, int count)
+{
+	struct vm_struct **p, *tmp;
+	char *vaddr, *buf_start = buf;
+	int n;
+
+	for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
+		vaddr = (char *) tmp->addr;
+		while (addr < vaddr) {
+			if (count == 0)
+				goto finished;
+			put_fs_byte('\0', buf++), addr++, count--;
+		}
+		n = tmp->size - PAGE_SIZE;
+		if (addr > vaddr)
+			n -= addr - vaddr;
+		while (--n >= 0) {
+			if (count == 0)
+				goto finished;
+			put_fs_byte(*addr++, buf++), count--;
+		}
+	}
+finished:
+	return buf - buf_start;
+}
diff --git a/arch/i386/ptrace.c b/arch/i386/ptrace.c
new file mode 100644
index 000000000..cade04750
--- /dev/null
+++ b/arch/i386/ptrace.c
@@ -0,0 +1,517 @@
+/* ptrace.c */
+/* By Ross Biro 1/23/92 */
+/* edited by Linus Torvalds */
+
+#include <linux/head.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <linux/debugreg.h>
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/* determines which flags the user has access to. */
+/* 1 = access 0 = no access */
+#define FLAG_MASK 0x00044dd5
+
+/* set's the trap flag. */
+#define TRAP_FLAG 0x100
+
+/*
+ * this is the number to subtract from the top of the stack. To find
+ * the local frame.
+ */
+#define MAGICNUMBER 68
+
+/* change a pid into a task struct. */
+static inline struct task_struct * get_task(int pid)
+{
+	int i;
+
+	for (i = 1; i < NR_TASKS; i++) {
+		if (task[i] != NULL && (task[i]->pid == pid))
+			return task[i];
+	}
+	return NULL;
+}
+
+/*
+ * this routine will get a word off of the processes privileged stack. 
+ * the offset is how far from the base addr as stored in the TSS.  
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */   
+static inline int get_stack_long(struct task_struct *task, int offset)
+{
+	unsigned char *stack;
+
+	stack = (unsigned char *)task->tss.esp0;
+	stack += offset;
+	return (*((int *)stack));
+}
+
+/*
+ * this routine will put a word on the processes privileged stack. 
+ * the offset is how far from the base addr as stored in the TSS.  
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int put_stack_long(struct task_struct *task, int offset,
+	unsigned long data)
+{
+	unsigned char * stack;
+
+	stack = (unsigned char *) task->tss.esp0;
+	stack += offset;
+	*(unsigned long *) stack = data;
+	return 0;
+}
+
+/*
+ * This routine gets a long from any process space by following the page
+ * tables. NOTE! You should check that the long isn't on a page boundary,
+ * and that it is in the task area before calling this: this routine does
+ * no checking.
+ */
+static unsigned long get_long(struct vm_area_struct * vma, unsigned long addr)
+{
+	unsigned long page;
+
+repeat:
+	page = *PAGE_DIR_OFFSET(vma->vm_task->tss.cr3, addr);
+	if (page & PAGE_PRESENT) {
+		page &= PAGE_MASK;
+		page += PAGE_PTR(addr);
+		page = *((unsigned long *) page);
+	}
+	if (!(page & PAGE_PRESENT)) {
+		do_no_page(vma, addr, 0);
+		goto repeat;
+	}
+/* this is a hack for non-kernel-mapped video buffers and similar */
+	if (page >= high_memory)
+		return 0;
+	page &= PAGE_MASK;
+	page += addr & ~PAGE_MASK;
+	return *(unsigned long *) page;
+}
+
+/*
+ * This routine puts a long into any process space by following the page
+ * tables. NOTE! You should check that the long isn't on a page boundary,
+ * and that it is in the task area before calling this: this routine does
+ * no checking.
+ *
+ * Now keeps R/W state of page so that a text page stays readonly
+ * even if a debugger scribbles breakpoints into it.  -M.U-
+ */
+static void put_long(struct vm_area_struct * vma, unsigned long addr,
+	unsigned long data)
+{
+	unsigned long page, pte = 0;
+	int readonly = 0;
+
+repeat:
+	page = *PAGE_DIR_OFFSET(vma->vm_task->tss.cr3, addr);
+	if (page & PAGE_PRESENT) {
+		page &= PAGE_MASK;
+		page += PAGE_PTR(addr);
+		pte = page;
+		page = *((unsigned long *) page);
+	}
+	if (!(page & PAGE_PRESENT)) {
+		do_no_page(vma, addr, 0 /* PAGE_RW */);
+		goto repeat;
+	}
+	if (!(page & PAGE_RW)) {
+		if (!(page & PAGE_COW))
+			readonly = 1;
+		do_wp_page(vma, addr, PAGE_RW | PAGE_PRESENT);
+		goto repeat;
+	}
+/* this is a hack for non-kernel-mapped video buffers and similar */
+	if (page >= high_memory)
+		return;
+/* we're bypassing pagetables, so we have to set the dirty bit ourselves */
+	*(unsigned long *) pte |= (PAGE_DIRTY|PAGE_COW);
+	page &= PAGE_MASK;
+	page += addr & ~PAGE_MASK;
+	*(unsigned long *) page = data;
+	if (readonly) {
+		*(unsigned long *) pte &=~ (PAGE_RW|PAGE_COW);
+		invalidate();
+	} 
+}
+
+static struct vm_area_struct * find_vma(struct task_struct * tsk, unsigned long addr)
+{
+	struct vm_area_struct * vma;
+
+	addr &= PAGE_MASK;
+	for (vma = tsk->mm->mmap ; ; vma = vma->vm_next) {
+		if (!vma)
+			return NULL;
+		if (vma->vm_end > addr)
+			break;
+	}
+	if (vma->vm_start <= addr)
+		return vma;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		return NULL;
+	if (vma->vm_end - addr > tsk->rlim[RLIMIT_STACK].rlim_cur)
+		return NULL;
+	vma->vm_offset -= vma->vm_start - addr;
+	vma->vm_start = addr;
+	return vma;
+}
+
+/*
+ * This routine checks the page boundaries, and that the offset is
+ * within the task area. It then calls get_long() to read a long.
+ */
+static int read_long(struct task_struct * tsk, unsigned long addr,
+	unsigned long * result)
+{
+	struct vm_area_struct * vma = find_vma(tsk, addr);
+
+	if (!vma)
+		return -EIO;
+	if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) {
+		unsigned long low,high;
+		struct vm_area_struct * vma_high = vma;
+
+		if (addr + sizeof(long) >= vma->vm_end) {
+			vma_high = vma->vm_next;
+			if (!vma_high || vma_high->vm_start != vma->vm_end)
+				return -EIO;
+		}
+		low = get_long(vma, addr & ~(sizeof(long)-1));
+		high = get_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+		switch (addr & (sizeof(long)-1)) {
+			case 1:
+				low >>= 8;
+				low |= high << 24;
+				break;
+			case 2:
+				low >>= 16;
+				low |= high << 16;
+				break;
+			case 3:
+				low >>= 24;
+				low |= high << 8;
+				break;
+		}
+		*result = low;
+	} else
+		*result = get_long(vma, addr);
+	return 0;
+}
+
+/*
+ * This routine checks the page boundaries, and that the offset is
+ * within the task area. It then calls put_long() to write a long.
+ */
+static int write_long(struct task_struct * tsk, unsigned long addr,
+	unsigned long data)
+{
+	struct vm_area_struct * vma = find_vma(tsk, addr);
+
+	if (!vma)
+		return -EIO;
+	if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) {
+		unsigned long low,high;
+		struct vm_area_struct * vma_high = vma;
+
+		if (addr + sizeof(long) >= vma->vm_end) {
+			vma_high = vma->vm_next;
+			if (!vma_high || vma_high->vm_start != vma->vm_end)
+				return -EIO;
+		}
+		low = get_long(vma, addr & ~(sizeof(long)-1));
+		high = get_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+		switch (addr & (sizeof(long)-1)) {
+			case 0: /* shouldn't happen, but safety first */
+				low = data;
+				break;
+			case 1:
+				low &= 0x000000ff;
+				low |= data << 8;
+				high &= ~0xff;
+				high |= data >> 24;
+				break;
+			case 2:
+				low &= 0x0000ffff;
+				low |= data << 16;
+				high &= ~0xffff;
+				high |= data >> 16;
+				break;
+			case 3:
+				low &= 0x00ffffff;
+				low |= data << 24;
+				high &= ~0xffffff;
+				high |= data >> 8;
+				break;
+		}
+		put_long(vma, addr & ~(sizeof(long)-1),low);
+		put_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high);
+	} else
+		put_long(vma, addr, data);
+	return 0;
+}
+
+asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+{
+	struct task_struct *child;
+	struct user * dummy;
+	int i;
+
+	dummy = NULL;
+
+	if (request == PTRACE_TRACEME) {
+		/* are we already being traced? */
+		if (current->flags & PF_PTRACED)
+			return -EPERM;
+		/* set the ptrace bit in the process flags. */
+		current->flags |= PF_PTRACED;
+		return 0;
+	}
+	if (pid == 1)		/* you may not mess with init */
+		return -EPERM;
+	if (!(child = get_task(pid)))
+		return -ESRCH;
+	if (request == PTRACE_ATTACH) {
+		if (child == current)
+			return -EPERM;
+		if ((!child->dumpable ||
+		    (current->uid != child->euid) ||
+		    (current->uid != child->uid) ||
+	 	    (current->gid != child->egid) ||
+	 	    (current->gid != child->gid)) && !suser())
+			return -EPERM;
+		/* the same process cannot be attached many times */
+		if (child->flags & PF_PTRACED)
+			return -EPERM;
+		child->flags |= PF_PTRACED;
+		if (child->p_pptr != current) {
+			REMOVE_LINKS(child);
+			child->p_pptr = current;
+			SET_LINKS(child);
+		}
+		send_sig(SIGSTOP, child, 1);
+		return 0;
+	}
+	if (!(child->flags & PF_PTRACED))
+		return -ESRCH;
+	if (child->state != TASK_STOPPED) {
+		if (request != PTRACE_KILL)
+			return -ESRCH;
+	}
+	if (child->p_pptr != current)
+		return -ESRCH;
+
+	switch (request) {
+	/* when I and D space are separate, these will need to be fixed. */
+		case PTRACE_PEEKTEXT: /* read word at location addr. */ 
+		case PTRACE_PEEKDATA: {
+			unsigned long tmp;
+			int res;
+
+			res = read_long(child, addr, &tmp);
+			if (res < 0)
+				return res;
+			res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long));
+			if (!res)
+				put_fs_long(tmp,(unsigned long *) data);
+			return res;
+		}
+
+	/* read the word at location addr in the USER area. */
+		case PTRACE_PEEKUSR: {
+			unsigned long tmp;
+			int res;
+
+			if ((addr & 3) || addr < 0 || 
+			    addr > sizeof(struct user) - 3)
+				return -EIO;
+
+			res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long));
+			if (res)
+				return res;
+			tmp = 0;  /* Default return condition */
+			if(addr < 17*sizeof(long)) {
+			  addr = addr >> 2; /* temporary hack. */
+
+			  tmp = get_stack_long(child, sizeof(long)*addr - MAGICNUMBER);
+			  if (addr == DS || addr == ES ||
+			      addr == FS || addr == GS ||
+			      addr == CS || addr == SS)
+			    tmp &= 0xffff;
+			};
+			if(addr >= (long) &dummy->u_debugreg[0] &&
+			   addr <= (long) &dummy->u_debugreg[7]){
+				addr -= (long) &dummy->u_debugreg[0];
+				addr = addr >> 2;
+				tmp = child->debugreg[addr];
+			};
+			put_fs_long(tmp,(unsigned long *) data);
+			return 0;
+		}
+
+      /* when I and D space are separate, this will have to be fixed. */
+		case PTRACE_POKETEXT: /* write the word at location addr. */
+		case PTRACE_POKEDATA:
+			return write_long(child,addr,data);
+
+		case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
+			if ((addr & 3) || addr < 0 || 
+			    addr > sizeof(struct user) - 3)
+				return -EIO;
+
+			addr = addr >> 2; /* temporary hack. */
+
+			if (addr == ORIG_EAX)
+				return -EIO;
+			if (addr == DS || addr == ES ||
+			    addr == FS || addr == GS ||
+			    addr == CS || addr == SS) {
+			    	data &= 0xffff;
+			    	if (data && (data & 3) != 3)
+					return -EIO;
+			}
+			if (addr == EFL) {   /* flags. */
+				data &= FLAG_MASK;
+				data |= get_stack_long(child, EFL*sizeof(long)-MAGICNUMBER)  & ~FLAG_MASK;
+			}
+		  /* Do not allow the user to set the debug register for kernel
+		     address space */
+		  if(addr < 17){
+			  if (put_stack_long(child, sizeof(long)*addr-MAGICNUMBER, data))
+				return -EIO;
+			return 0;
+			};
+
+		  /* We need to be very careful here.  We implicitly
+		     want to modify a portion of the task_struct, and we
+		     have to be selective about what portions we allow someone
+		     to modify. */
+
+		  addr = addr << 2;  /* Convert back again */
+		  if(addr >= (long) &dummy->u_debugreg[0] &&
+		     addr <= (long) &dummy->u_debugreg[7]){
+
+			  if(addr == (long) &dummy->u_debugreg[4]) return -EIO;
+			  if(addr == (long) &dummy->u_debugreg[5]) return -EIO;
+			  if(addr < (long) &dummy->u_debugreg[4] &&
+			     ((unsigned long) data) >= 0xbffffffd) return -EIO;
+			  
+			  if(addr == (long) &dummy->u_debugreg[7]) {
+				  data &= ~DR_CONTROL_RESERVED;
+				  for(i=0; i<4; i++)
+					  if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+						  return -EIO;
+			  };
+
+			  addr -= (long) &dummy->u_debugreg;
+			  addr = addr >> 2;
+			  child->debugreg[addr] = data;
+			  return 0;
+		  };
+		  return -EIO;
+
+		case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+		case PTRACE_CONT: { /* restart after signal. */
+			long tmp;
+
+			if ((unsigned long) data > NSIG)
+				return -EIO;
+			if (request == PTRACE_SYSCALL)
+				child->flags |= PF_TRACESYS;
+			else
+				child->flags &= ~PF_TRACESYS;
+			child->exit_code = data;
+			child->state = TASK_RUNNING;
+	/* make sure the single step bit is not set. */
+			tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
+			put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+			return 0;
+		}
+
+/*
+ * make the child exit.  Best I can do is send it a sigkill. 
+ * perhaps it should be put in the status that it wants to 
+ * exit.
+ */
+		case PTRACE_KILL: {
+			long tmp;
+
+			child->state = TASK_RUNNING;
+			child->exit_code = SIGKILL;
+	/* make sure the single step bit is not set. */
+			tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
+			put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+			return 0;
+		}
+
+		case PTRACE_SINGLESTEP: {  /* set the trap flag. */
+			long tmp;
+
+			if ((unsigned long) data > NSIG)
+				return -EIO;
+			child->flags &= ~PF_TRACESYS;
+			tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) | TRAP_FLAG;
+			put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+			child->state = TASK_RUNNING;
+			child->exit_code = data;
+	/* give it a chance to run. */
+			return 0;
+		}
+
+		case PTRACE_DETACH: { /* detach a process that was attached. */
+			long tmp;
+
+			if ((unsigned long) data > NSIG)
+				return -EIO;
+			child->flags &= ~(PF_PTRACED|PF_TRACESYS);
+			child->state = TASK_RUNNING;
+			child->exit_code = data;
+			REMOVE_LINKS(child);
+			child->p_pptr = child->p_opptr;
+			SET_LINKS(child);
+			/* make sure the single step bit is not set. */
+			tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
+			put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+			return 0;
+		}
+
+		default:
+			return -EIO;
+	}
+}
+
+asmlinkage void syscall_trace(void)
+{
+	if ((current->flags & (PF_PTRACED|PF_TRACESYS))
+			!= (PF_PTRACED|PF_TRACESYS))
+		return;
+	current->exit_code = SIGTRAP;
+	current->state = TASK_STOPPED;
+	notify_parent(current);
+	schedule();
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code)
+		current->signal |= (1 << (current->exit_code - 1));
+	current->exit_code = 0;
+}
diff --git a/arch/i386/sched.c b/arch/i386/sched.c
new file mode 100644
index 000000000..6eed6e8f5
--- /dev/null
+++ b/arch/i386/sched.c
@@ -0,0 +1,861 @@
+/*
+ *  linux/kernel/sched.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * 'sched.c' is the main kernel file. It contains scheduling primitives
+ * (sleep_on, wakeup, schedule etc) as well as a number of simple system
+ * call functions (type getpid(), which just extracts a field from
+ * current-task
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/fdreg.h>
+#include <linux/errno.h>
+#include <linux/time.h>
+#include <linux/ptrace.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/tqueue.h>
+#include <linux/resource.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/segment.h>
+
+#define TIMER_IRQ 0
+
+#include <linux/timex.h>
+
+/*
+ * kernel variables
+ */
+long tick = 1000000 / HZ;               /* timer interrupt period */
+volatile struct timeval xtime;		/* The current time */
+int tickadj = 500/HZ;			/* microsecs */
+
+DECLARE_TASK_QUEUE(tq_timer);
+DECLARE_TASK_QUEUE(tq_immediate);
+
+/*
+ * phase-lock loop variables
+ */
+int time_status = TIME_BAD;     /* clock synchronization status */
+long time_offset = 0;           /* time adjustment (us) */
+long time_constant = 0;         /* pll time constant */
+long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
+long time_precision = 1; 	/* clock precision (us) */
+long time_maxerror = 0x70000000;/* maximum error */
+long time_esterror = 0x70000000;/* estimated error */
+long time_phase = 0;            /* phase offset (scaled us) */
+long time_freq = 0;             /* frequency offset (scaled ppm) */
+long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
+long time_reftime = 0;          /* time at last adjustment (s) */
+
+long time_adjust = 0;
+long time_adjust_step = 0;
+
+int need_resched = 0;
+unsigned long event = 0;
+
+/*
+ * Tell us the machine setup..
+ */
+int hard_math = 0;		/* set by boot/head.S */
+int x86 = 0;			/* set by boot/head.S to 3 or 4 */
+int ignore_irq13 = 0;		/* set if exception 16 works */
+int wp_works_ok = 0;		/* set if paging hardware honours WP */ 
+int hlt_works_ok = 1;		/* set if the "hlt" instruction works */
+
+/*
+ * Bus types ..
+ */
+int EISA_bus = 0;
+
+extern int _setitimer(int, struct itimerval *, struct itimerval *);
+unsigned long * prof_buffer = NULL;
+unsigned long prof_len = 0;
+
+#define _S(nr) (1<<((nr)-1))
+
+extern void mem_use(void);
+
+extern int timer_interrupt(void);
+asmlinkage int system_call(void);
+
+static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
+static struct vm_area_struct init_mmap = INIT_MMAP;
+struct task_struct init_task = INIT_TASK;
+
+unsigned long volatile jiffies=0;
+
+struct task_struct *current = &init_task;
+struct task_struct *last_task_used_math = NULL;
+
+struct task_struct * task[NR_TASKS] = {&init_task, };
+
+long user_stack [ PAGE_SIZE>>2 ] = { STACK_MAGIC, };
+
+struct {
+	long * a;
+	short b;
+	} stack_start = { & user_stack [PAGE_SIZE>>2] , KERNEL_DS };
+
+struct kernel_stat kstat = { 0 };
+
+/*
+ *  'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ */
+asmlinkage void math_state_restore(void)
+{
+	__asm__ __volatile__("clts");
+	if (last_task_used_math == current)
+		return;
+	timer_table[COPRO_TIMER].expires = jiffies+50;
+	timer_active |= 1<<COPRO_TIMER;	
+	if (last_task_used_math)
+		__asm__("fnsave %0":"=m" (last_task_used_math->tss.i387));
+	else
+		__asm__("fnclex");
+	last_task_used_math = current;
+	if (current->used_math) {
+		__asm__("frstor %0": :"m" (current->tss.i387));
+	} else {
+		__asm__("fninit");
+		current->used_math=1;
+	}
+	timer_active &= ~(1<<COPRO_TIMER);
+}
+
+#ifndef CONFIG_MATH_EMULATION
+
+asmlinkage void math_emulate(long arg)
+{
+  printk("math-emulation not enabled and no coprocessor found.\n");
+  printk("killing %s.\n",current->comm);
+  send_sig(SIGFPE,current,1);
+  schedule();
+}
+
+#endif /* CONFIG_MATH_EMULATION */
+
+unsigned long itimer_ticks = 0;
+unsigned long itimer_next = ~0;
+
+/*
+ *  'schedule()' is the scheduler function. It's a very simple and nice
+ * scheduler: it's not perfect, but certainly works for most things.
+ * The one thing you might take a look at is the signal-handler code here.
+ *
+ *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
+ * tasks can run. It can not be killed, and it cannot sleep. The 'state'
+ * information in task[0] is never used.
+ *
+ * The "confuse_gcc" goto is used only to get better assembly code..
+ * Dijkstra probably hates me.
+ */
+asmlinkage void schedule(void)
+{
+	int c;
+	struct task_struct * p;
+	struct task_struct * next;
+	unsigned long ticks;
+
+/* check alarm, wake up any interruptible tasks that have got a signal */
+
+	if (intr_count) {
+		printk("Aiee: scheduling in interrupt\n");
+		intr_count = 0;
+	}
+	cli();
+	ticks = itimer_ticks;
+	itimer_ticks = 0;
+	itimer_next = ~0;
+	sti();
+	need_resched = 0;
+	p = &init_task;
+	for (;;) {
+		if ((p = p->next_task) == &init_task)
+			goto confuse_gcc1;
+		if (ticks && p->it_real_value) {
+			if (p->it_real_value <= ticks) {
+				send_sig(SIGALRM, p, 1);
+				if (!p->it_real_incr) {
+					p->it_real_value = 0;
+					goto end_itimer;
+				}
+				do {
+					p->it_real_value += p->it_real_incr;
+				} while (p->it_real_value <= ticks);
+			}
+			p->it_real_value -= ticks;
+			if (p->it_real_value < itimer_next)
+				itimer_next = p->it_real_value;
+		}
+end_itimer:
+		if (p->state != TASK_INTERRUPTIBLE)
+			continue;
+		if (p->signal & ~p->blocked) {
+			p->state = TASK_RUNNING;
+			continue;
+		}
+		if (p->timeout && p->timeout <= jiffies) {
+			p->timeout = 0;
+			p->state = TASK_RUNNING;
+		}
+	}
+confuse_gcc1:
+
+/* this is the scheduler proper: */
+#if 0
+	/* give processes that go to sleep a bit higher priority.. */
+	/* This depends on the values for TASK_XXX */
+	/* This gives smoother scheduling for some things, but */
+	/* can be very unfair under some circumstances, so.. */
+ 	if (TASK_UNINTERRUPTIBLE >= (unsigned) current->state &&
+	    current->counter < current->priority*2) {
+		++current->counter;
+	}
+#endif
+	c = -1000;
+	next = p = &init_task;
+	for (;;) {
+		if ((p = p->next_task) == &init_task)
+			goto confuse_gcc2;
+		if (p->state == TASK_RUNNING && p->counter > c)
+			c = p->counter, next = p;
+	}
+confuse_gcc2:
+	if (!c) {
+		for_each_task(p)
+			p->counter = (p->counter >> 1) + p->priority;
+	}
+	if (current == next)
+		return;
+	kstat.context_swtch++;
+	switch_to(next);
+	/* Now maybe reload the debug registers */
+	if(current->debugreg[7]){
+		loaddebug(0);
+		loaddebug(1);
+		loaddebug(2);
+		loaddebug(3);
+		loaddebug(6);
+	};
+}
+
+asmlinkage int sys_pause(void)
+{
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	return -ERESTARTNOHAND;
+}
+
+/*
+ * wake_up doesn't wake up stopped processes - they have to be awakened
+ * with signals or similar.
+ *
+ * Note that this doesn't need cli-sti pairs: interrupts may not change
+ * the wait-queue structures directly, but only call wake_up() to wake
+ * a process. The process itself must remove the queue once it has woken.
+ */
+void wake_up(struct wait_queue **q)
+{
+	struct wait_queue *tmp;
+	struct task_struct * p;
+
+	if (!q || !(tmp = *q))
+		return;
+	do {
+		if ((p = tmp->task) != NULL) {
+			if ((p->state == TASK_UNINTERRUPTIBLE) ||
+			    (p->state == TASK_INTERRUPTIBLE)) {
+				p->state = TASK_RUNNING;
+				if (p->counter > current->counter + 3)
+					need_resched = 1;
+			}
+		}
+		if (!tmp->next) {
+			printk("wait_queue is bad (eip = %p)\n",
+				__builtin_return_address(0));
+			printk("        q = %p\n",q);
+			printk("       *q = %p\n",*q);
+			printk("      tmp = %p\n",tmp);
+			break;
+		}
+		tmp = tmp->next;
+	} while (tmp != *q);
+}
+
+void wake_up_interruptible(struct wait_queue **q)
+{
+	struct wait_queue *tmp;
+	struct task_struct * p;
+
+	if (!q || !(tmp = *q))
+		return;
+	do {
+		if ((p = tmp->task) != NULL) {
+			if (p->state == TASK_INTERRUPTIBLE) {
+				p->state = TASK_RUNNING;
+				if (p->counter > current->counter + 3)
+					need_resched = 1;
+			}
+		}
+		if (!tmp->next) {
+			printk("wait_queue is bad (eip = %p)\n",
+				__builtin_return_address(0));
+			printk("        q = %p\n",q);
+			printk("       *q = %p\n",*q);
+			printk("      tmp = %p\n",tmp);
+			break;
+		}
+		tmp = tmp->next;
+	} while (tmp != *q);
+}
+
+void __down(struct semaphore * sem)
+{
+	struct wait_queue wait = { current, NULL };
+	add_wait_queue(&sem->wait, &wait);
+	current->state = TASK_UNINTERRUPTIBLE;
+	while (sem->count <= 0) {
+		schedule();
+		current->state = TASK_UNINTERRUPTIBLE;
+	}
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&sem->wait, &wait);
+}
+
+static inline void __sleep_on(struct wait_queue **p, int state)
+{
+	unsigned long flags;
+	struct wait_queue wait = { current, NULL };
+
+	if (!p)
+		return;
+	if (current == task[0])
+		panic("task[0] trying to sleep");
+	current->state = state;
+	add_wait_queue(p, &wait);
+	save_flags(flags);
+	sti();
+	schedule();
+	remove_wait_queue(p, &wait);
+	restore_flags(flags);
+}
+
+void interruptible_sleep_on(struct wait_queue **p)
+{
+	__sleep_on(p,TASK_INTERRUPTIBLE);
+}
+
+void sleep_on(struct wait_queue **p)
+{
+	__sleep_on(p,TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * The head for the timer-list has a "expires" field of MAX_UINT,
+ * and the sorting routine counts on this..
+ */
+static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
+#define SLOW_BUT_DEBUGGING_TIMERS 1
+
+void add_timer(struct timer_list * timer)
+{
+	unsigned long flags;
+	struct timer_list *p;
+
+#if SLOW_BUT_DEBUGGING_TIMERS
+	if (timer->next || timer->prev) {
+		printk("add_timer() called with non-zero list from %p\n",
+			__builtin_return_address(0));
+		return;
+	}
+#endif
+	p = &timer_head;
+	timer->expires += jiffies;
+	save_flags(flags);
+	cli();
+	do {
+		p = p->next;
+	} while (timer->expires > p->expires);
+	timer->next = p;
+	timer->prev = p->prev;
+	p->prev = timer;
+	timer->prev->next = timer;
+	restore_flags(flags);
+}
+
+int del_timer(struct timer_list * timer)
+{
+	unsigned long flags;
+#if SLOW_BUT_DEBUGGING_TIMERS
+	struct timer_list * p;
+
+	p = &timer_head;
+	save_flags(flags);
+	cli();
+	while ((p = p->next) != &timer_head) {
+		if (p == timer) {
+			timer->next->prev = timer->prev;
+			timer->prev->next = timer->next;
+			timer->next = timer->prev = NULL;
+			restore_flags(flags);
+			timer->expires -= jiffies;
+			return 1;
+		}
+	}
+	if (timer->next || timer->prev)
+		printk("del_timer() called from %p with timer not initialized\n",
+			__builtin_return_address(0));
+	restore_flags(flags);
+	return 0;
+#else	
+	save_flags(flags);
+	cli();
+	if (timer->next) {
+		timer->next->prev = timer->prev;
+		timer->prev->next = timer->next;
+		timer->next = timer->prev = NULL;
+		restore_flags(flags);
+		timer->expires -= jiffies;
+		return 1;
+	}
+	restore_flags(flags);
+	return 0;
+#endif
+}
+
+unsigned long timer_active = 0;
+struct timer_struct timer_table[32];
+
+/*
+ * Hmm.. Changed this, as the GNU make sources (load.c) seems to
+ * imply that avenrun[] is the standard name for this kind of thing.
+ * Nothing else seems to be standardized: the fractional size etc
+ * all seem to differ on different machines.
+ */
+unsigned long avenrun[3] = { 0,0,0 };
+
+/*
+ * Nr of active tasks - counted in fixed-point numbers
+ */
+static unsigned long count_active_tasks(void)
+{
+	struct task_struct **p;
+	unsigned long nr = 0;
+
+	for(p = &LAST_TASK; p > &FIRST_TASK; --p)
+		if (*p && ((*p)->state == TASK_RUNNING ||
+			   (*p)->state == TASK_UNINTERRUPTIBLE ||
+			   (*p)->state == TASK_SWAPPING))
+			nr += FIXED_1;
+	return nr;
+}
+
+static inline void calc_load(void)
+{
+	unsigned long active_tasks; /* fixed-point */
+	static int count = LOAD_FREQ;
+
+	if (count-- > 0)
+		return;
+	count = LOAD_FREQ;
+	active_tasks = count_active_tasks();
+	CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+	CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+	CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+}
+
+/*
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ * These were ported to Linux by Philip Gladstone.
+ */
+static void second_overflow(void)
+{
+	long ltemp;
+	/* last time the cmos clock got updated */
+	static long last_rtc_update=0;
+	extern int set_rtc_mmss(unsigned long);
+
+	/* Bump the maxerror field */
+	time_maxerror = (0x70000000-time_maxerror < time_tolerance) ?
+	  0x70000000 : (time_maxerror + time_tolerance);
+
+	/* Run the PLL */
+	if (time_offset < 0) {
+		ltemp = (-(time_offset+1) >> (SHIFT_KG + time_constant)) + 1;
+		time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+		time_offset += (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE);
+		time_adj = - time_adj;
+	} else if (time_offset > 0) {
+		ltemp = ((time_offset-1) >> (SHIFT_KG + time_constant)) + 1;
+		time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+		time_offset -= (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE);
+	} else {
+		time_adj = 0;
+	}
+
+	time_adj += (time_freq >> (SHIFT_KF + SHIFT_HZ - SHIFT_SCALE))
+	    + FINETUNE;
+
+	/* Handle the leap second stuff */
+	switch (time_status) {
+		case TIME_INS:
+		/* ugly divide should be replaced */
+		if (xtime.tv_sec % 86400 == 0) {
+			xtime.tv_sec--; /* !! */
+			time_status = TIME_OOP;
+			printk("Clock: inserting leap second 23:59:60 GMT\n");
+		}
+		break;
+
+		case TIME_DEL:
+		/* ugly divide should be replaced */
+		if (xtime.tv_sec % 86400 == 86399) {
+			xtime.tv_sec++;
+			time_status = TIME_OK;
+			printk("Clock: deleting leap second 23:59:59 GMT\n");
+		}
+		break;
+
+		case TIME_OOP:
+		time_status = TIME_OK;
+		break;
+	}
+	if (xtime.tv_sec > last_rtc_update + 660)
+	  if (set_rtc_mmss(xtime.tv_sec) == 0)
+	    last_rtc_update = xtime.tv_sec;
+	  else
+	    last_rtc_update = xtime.tv_sec - 600; /* do it again in one min */
+}
+
+/*
+ * disregard lost ticks for now.. We don't care enough.
+ */
+static void timer_bh(void * unused)
+{
+	unsigned long mask;
+	struct timer_struct *tp;
+	struct timer_list * timer;
+
+	cli();
+	while ((timer = timer_head.next) != &timer_head && timer->expires < jiffies) {
+		void (*fn)(unsigned long) = timer->function;
+		unsigned long data = timer->data;
+		timer->next->prev = timer->prev;
+		timer->prev->next = timer->next;
+		timer->next = timer->prev = NULL;
+		sti();
+		fn(data);
+		cli();
+	}
+	sti();
+	
+	for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
+		if (mask > timer_active)
+			break;
+		if (!(mask & timer_active))
+			continue;
+		if (tp->expires > jiffies)
+			continue;
+		timer_active &= ~mask;
+		tp->fn();
+		sti();
+	}
+}
+
+void tqueue_bh(void * unused)
+{
+	run_task_queue(&tq_timer);
+}
+
+void immediate_bh(void * unused)
+{
+	run_task_queue(&tq_immediate);
+}
+
+/*
+ * The int argument is really a (struct pt_regs *), in case the
+ * interrupt wants to know from where it was called. The timer
+ * irq uses this to decide if it should update the user or system
+ * times.
+ */
+static void do_timer(struct pt_regs * regs)
+{
+	unsigned long mask;
+	struct timer_struct *tp;
+
+	long ltemp, psecs;
+
+	/* Advance the phase, once it gets to one microsecond, then
+	 * advance the tick more.
+	 */
+	time_phase += time_adj;
+	if (time_phase < -FINEUSEC) {
+		ltemp = -time_phase >> SHIFT_SCALE;
+		time_phase += ltemp << SHIFT_SCALE;
+		xtime.tv_usec += tick + time_adjust_step - ltemp;
+	}
+	else if (time_phase > FINEUSEC) {
+		ltemp = time_phase >> SHIFT_SCALE;
+		time_phase -= ltemp << SHIFT_SCALE;
+		xtime.tv_usec += tick + time_adjust_step + ltemp;
+	} else
+		xtime.tv_usec += tick + time_adjust_step;
+
+	if (time_adjust)
+	{
+	    /* We are doing an adjtime thing. 
+	     *
+	     * Modify the value of the tick for next time.
+	     * Note that a positive delta means we want the clock
+	     * to run fast. This means that the tick should be bigger
+	     *
+	     * Limit the amount of the step for *next* tick to be
+	     * in the range -tickadj .. +tickadj
+	     */
+	     if (time_adjust > tickadj)
+	       time_adjust_step = tickadj;
+	     else if (time_adjust < -tickadj)
+	       time_adjust_step = -tickadj;
+	     else
+	       time_adjust_step = time_adjust;
+	     
+	    /* Reduce by this step the amount of time left  */
+	    time_adjust -= time_adjust_step;
+	}
+	else
+	    time_adjust_step = 0;
+
+	if (xtime.tv_usec >= 1000000) {
+	    xtime.tv_usec -= 1000000;
+	    xtime.tv_sec++;
+	    second_overflow();
+	}
+
+	jiffies++;
+	calc_load();
+	if ((VM_MASK & regs->eflags) || (3 & regs->cs)) {
+		current->utime++;
+		if (current != task[0]) {
+			if (current->priority < 15)
+				kstat.cpu_nice++;
+			else
+				kstat.cpu_user++;
+		}
+		/* Update ITIMER_VIRT for current task if not in a system call */
+		if (current->it_virt_value && !(--current->it_virt_value)) {
+			current->it_virt_value = current->it_virt_incr;
+			send_sig(SIGVTALRM,current,1);
+		}
+	} else {
+		current->stime++;
+		if(current != task[0])
+			kstat.cpu_system++;
+#ifdef CONFIG_PROFILE
+		if (prof_buffer && current != task[0]) {
+			unsigned long eip = regs->eip;
+			eip >>= 2;
+			if (eip < prof_len)
+				prof_buffer[eip]++;
+		}
+#endif
+	}
+	/*
+	 * check the cpu time limit on the process.
+	 */
+	if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
+	    (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
+		send_sig(SIGKILL, current, 1);
+	if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
+	    (((current->stime + current->utime) % HZ) == 0)) {
+		psecs = (current->stime + current->utime) / HZ;
+		/* send when equal */
+		if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
+			send_sig(SIGXCPU, current, 1);
+		/* and every five seconds thereafter. */
+		else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
+		        ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
+			send_sig(SIGXCPU, current, 1);
+	}
+
+	if (current != task[0] && 0 > --current->counter) {
+		current->counter = 0;
+		need_resched = 1;
+	}
+	/* Update ITIMER_PROF for the current task */
+	if (current->it_prof_value && !(--current->it_prof_value)) {
+		current->it_prof_value = current->it_prof_incr;
+		send_sig(SIGPROF,current,1);
+	}
+	for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
+		if (mask > timer_active)
+			break;
+		if (!(mask & timer_active))
+			continue;
+		if (tp->expires > jiffies)
+			continue;
+		mark_bh(TIMER_BH);
+	}
+	cli();
+	itimer_ticks++;
+	if (itimer_ticks > itimer_next)
+		need_resched = 1;
+	if (timer_head.next->expires < jiffies)
+		mark_bh(TIMER_BH);
+	if (tq_timer != &tq_last)
+		mark_bh(TQUEUE_BH);
+	sti();
+}
+
+asmlinkage int sys_alarm(long seconds)
+{
+	struct itimerval it_new, it_old;
+
+	it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+	it_new.it_value.tv_sec = seconds;
+	it_new.it_value.tv_usec = 0;
+	_setitimer(ITIMER_REAL, &it_new, &it_old);
+	return(it_old.it_value.tv_sec + (it_old.it_value.tv_usec / 1000000));
+}
+
+asmlinkage int sys_getpid(void)
+{
+	return current->pid;
+}
+
+asmlinkage int sys_getppid(void)
+{
+	return current->p_opptr->pid;
+}
+
+asmlinkage int sys_getuid(void)
+{
+	return current->uid;
+}
+
+asmlinkage int sys_geteuid(void)
+{
+	return current->euid;
+}
+
+asmlinkage int sys_getgid(void)
+{
+	return current->gid;
+}
+
+asmlinkage int sys_getegid(void)
+{
+	return current->egid;
+}
+
+asmlinkage int sys_nice(long increment)
+{
+	int newprio;
+
+	if (increment < 0 && !suser())
+		return -EPERM;
+	newprio = current->priority - increment;
+	if (newprio < 1)
+		newprio = 1;
+	if (newprio > 35)
+		newprio = 35;
+	current->priority = newprio;
+	return 0;
+}
+
+static void show_task(int nr,struct task_struct * p)
+{
+	unsigned long free;
+	static char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
+
+	printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
+	if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
+		printk(stat_nam[p->state]);
+	else
+		printk(" ");
+	if (p == current)
+		printk(" current  ");
+	else
+		printk(" %08lX ", ((unsigned long *)p->tss.esp)[3]);
+	for (free = 1; free < 1024 ; free++) {
+		if (((unsigned long *)p->kernel_stack_page)[free])
+			break;
+	}
+	printk("%5lu %5d %6d ", free << 2, p->pid, p->p_pptr->pid);
+	if (p->p_cptr)
+		printk("%5d ", p->p_cptr->pid);
+	else
+		printk("      ");
+	if (p->p_ysptr)
+		printk("%7d", p->p_ysptr->pid);
+	else
+		printk("       ");
+	if (p->p_osptr)
+		printk(" %5d\n", p->p_osptr->pid);
+	else
+		printk("\n");
+}
+
+void show_state(void)
+{
+	int i;
+
+	printk("                         free                        sibling\n");
+	printk("  task             PC    stack   pid father child younger older\n");
+	for (i=0 ; i<NR_TASKS ; i++)
+		if (task[i])
+			show_task(i,task[i]);
+}
+
+void sched_init(void)
+{
+	int i;
+	struct desc_struct * p;
+
+	bh_base[TIMER_BH].routine = timer_bh;
+	bh_base[TQUEUE_BH].routine = tqueue_bh;
+	bh_base[IMMEDIATE_BH].routine = immediate_bh;
+	if (sizeof(struct sigaction) != 16)
+		panic("Struct sigaction MUST be 16 bytes");
+	set_tss_desc(gdt+FIRST_TSS_ENTRY,&init_task.tss);
+	set_ldt_desc(gdt+FIRST_LDT_ENTRY,&default_ldt,1);
+	set_system_gate(0x80,&system_call);
+	p = gdt+2+FIRST_TSS_ENTRY;
+	for(i=1 ; i<NR_TASKS ; i++) {
+		task[i] = NULL;
+		p->a=p->b=0;
+		p++;
+		p->a=p->b=0;
+		p++;
+	}
+/* Clear NT, so that we won't have troubles with that later on */
+	__asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
+	load_TR(0);
+	load_ldt(0);
+	outb_p(0x34,0x43);		/* binary, mode 2, LSB/MSB, ch 0 */
+	outb_p(LATCH & 0xff , 0x40);	/* LSB */
+	outb(LATCH >> 8 , 0x40);	/* MSB */
+	if (request_irq(TIMER_IRQ,(void (*)(int)) do_timer, 0, "timer") != 0)
+		panic("Could not allocate timer IRQ!");
+}
diff --git a/arch/i386/signal.c b/arch/i386/signal.c
new file mode 100644
index 000000000..df7324294
--- /dev/null
+++ b/arch/i386/signal.c
@@ -0,0 +1,407 @@
+/*
+ *  linux/kernel/signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+
+#include <asm/segment.h>
+
+#define _S(nr) (1<<((nr)-1))
+
+#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
+
+asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs);
+
+asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset)
+{
+	sigset_t new_set, old_set = current->blocked;
+	int error;
+
+	if (set) {
+		error = verify_area(VERIFY_READ, set, sizeof(sigset_t));
+		if (error)
+			return error;
+		new_set = get_fs_long((unsigned long *) set) & _BLOCKABLE;
+		switch (how) {
+		case SIG_BLOCK:
+			current->blocked |= new_set;
+			break;
+		case SIG_UNBLOCK:
+			current->blocked &= ~new_set;
+			break;
+		case SIG_SETMASK:
+			current->blocked = new_set;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	if (oset) {
+		error = verify_area(VERIFY_WRITE, oset, sizeof(sigset_t));
+		if (error)
+			return error;
+		put_fs_long(old_set, (unsigned long *) oset);
+	}
+	return 0;
+}
+
+asmlinkage int sys_sgetmask(void)
+{
+	return current->blocked;
+}
+
+asmlinkage int sys_ssetmask(int newmask)
+{
+	int old=current->blocked;
+
+	current->blocked = newmask & _BLOCKABLE;
+	return old;
+}
+
+asmlinkage int sys_sigpending(sigset_t *set)
+{
+	int error;
+	/* fill in "set" with signals pending but blocked. */
+	error = verify_area(VERIFY_WRITE, set, 4);
+	if (!error)
+		put_fs_long(current->blocked & current->signal, (unsigned long *)set);
+	return error;
+}
+
+/*
+ * atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, unsigned long set)
+{
+	unsigned long mask;
+	struct pt_regs * regs = (struct pt_regs *) &restart;
+
+	mask = current->blocked;
+	current->blocked = set & _BLOCKABLE;
+	regs->eax = -EINTR;
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		if (do_signal(mask,regs))
+			return -EINTR;
+	}
+}
+
+/*
+ * POSIX 3.3.1.3:
+ *  "Setting a signal action to SIG_IGN for a signal that is pending
+ *   shall cause the pending signal to be discarded, whether or not
+ *   it is blocked" (but SIGCHLD is unspecified: linux leaves it alone).
+ *
+ *  "Setting a signal action to SIG_DFL for a signal that is pending
+ *   and whose default action is to ignore the signal (for example,
+ *   SIGCHLD), shall cause the pending signal to be discarded, whether
+ *   or not it is blocked"
+ *
+ * Note the silly behaviour of SIGCHLD: SIG_IGN means that the signal
+ * isn't actually ignored, but does automatic child reaping, while
+ * SIG_DFL is explicitly said by POSIX to force the signal to be ignored..
+ */
+static void check_pending(int signum)
+{
+	struct sigaction *p;
+
+	p = signum - 1 + current->sigaction;
+	if (p->sa_handler == SIG_IGN) {
+		if (signum == SIGCHLD)
+			return;
+		current->signal &= ~_S(signum);
+		return;
+	}
+	if (p->sa_handler == SIG_DFL) {
+		if (signum != SIGCONT && signum != SIGCHLD && signum != SIGWINCH)
+			return;
+		current->signal &= ~_S(signum);
+		return;
+	}	
+}
+
+asmlinkage int sys_signal(int signum, unsigned long handler)
+{
+	struct sigaction tmp;
+
+	if (signum<1 || signum>32)
+		return -EINVAL;
+	if (signum==SIGKILL || signum==SIGSTOP)
+		return -EINVAL;
+	if (handler >= TASK_SIZE)
+		return -EFAULT;
+	tmp.sa_handler = (void (*)(int)) handler;
+	tmp.sa_mask = 0;
+	tmp.sa_flags = SA_ONESHOT | SA_NOMASK;
+	tmp.sa_restorer = NULL;
+	handler = (long) current->sigaction[signum-1].sa_handler;
+	current->sigaction[signum-1] = tmp;
+	check_pending(signum);
+	return handler;
+}
+
+asmlinkage int sys_sigaction(int signum, const struct sigaction * action,
+	struct sigaction * oldaction)
+{
+	struct sigaction new_sa, *p;
+
+	if (signum<1 || signum>32)
+		return -EINVAL;
+	if (signum==SIGKILL || signum==SIGSTOP)
+		return -EINVAL;
+	p = signum - 1 + current->sigaction;
+	if (action) {
+		int err = verify_area(VERIFY_READ, action, sizeof(*action));
+		if (err)
+			return err;
+		memcpy_fromfs(&new_sa, action, sizeof(struct sigaction));
+		if (new_sa.sa_flags & SA_NOMASK)
+			new_sa.sa_mask = 0;
+		else {
+			new_sa.sa_mask |= _S(signum);
+			new_sa.sa_mask &= _BLOCKABLE;
+		}
+		if (TASK_SIZE <= (unsigned long) new_sa.sa_handler)
+			return -EFAULT;
+	}
+	if (oldaction) {
+		int err = verify_area(VERIFY_WRITE, oldaction, sizeof(*oldaction));
+		if (err)
+			return err;
+		memcpy_tofs(oldaction, p, sizeof(struct sigaction));
+	}
+	if (action) {
+		*p = new_sa;
+		check_pending(signum);
+	}
+	return 0;
+}
+
+asmlinkage int sys_waitpid(pid_t pid,unsigned long * stat_addr, int options);
+
+/*
+ * This sets regs->esp even though we don't actually use sigstacks yet..
+ */
+asmlinkage int sys_sigreturn(unsigned long __unused)
+{
+#define COPY(x) regs->x = context.x
+#define COPY_SEG(x) \
+if ((context.x & 0xfffc) && (context.x & 3) != 3) goto badframe; COPY(x);
+#define COPY_SEG_STRICT(x) \
+if (!(context.x & 0xfffc) || (context.x & 3) != 3) goto badframe; COPY(x);
+	struct sigcontext_struct context;
+	struct pt_regs * regs;
+
+	regs = (struct pt_regs *) &__unused;
+	if (verify_area(VERIFY_READ, (void *) regs->esp, sizeof(context)))
+		goto badframe;
+	memcpy_fromfs(&context,(void *) regs->esp, sizeof(context));
+	current->blocked = context.oldmask & _BLOCKABLE;
+	COPY_SEG(ds);
+	COPY_SEG(es);
+	COPY_SEG(fs);
+	COPY_SEG(gs);
+	COPY_SEG_STRICT(ss);
+	COPY_SEG_STRICT(cs);
+	COPY(eip);
+	COPY(ecx); COPY(edx);
+	COPY(ebx);
+	COPY(esp); COPY(ebp);
+	COPY(edi); COPY(esi);
+	regs->eflags &= ~0x40DD5;
+	regs->eflags |= context.eflags & 0x40DD5;
+	regs->orig_eax = -1;		/* disable syscall checks */
+	return context.eax;
+badframe:
+	do_exit(SIGSEGV);
+}
+
+/*
+ * Set up a signal frame... Make the stack look the way iBCS2 expects
+ * it to look.
+ */
+static void setup_frame(struct sigaction * sa, unsigned long ** fp, unsigned long eip,
+	struct pt_regs * regs, int signr, unsigned long oldmask)
+{
+	unsigned long * frame;
+
+#define __CODE ((unsigned long)(frame+24))
+#define CODE(x) ((unsigned long *) ((x)+__CODE))
+	frame = *fp;
+	if (regs->ss != USER_DS)
+		frame = (unsigned long *) sa->sa_restorer;
+	frame -= 32;
+	if (verify_area(VERIFY_WRITE,frame,32*4))
+		do_exit(SIGSEGV);
+/* set up the "normal" stack seen by the signal handler (iBCS2) */
+	put_fs_long(__CODE,frame);
+	if (current->exec_domain && current->exec_domain->signal_invmap)
+		put_fs_long(current->exec_domain->signal_invmap[signr], frame+1);
+	else
+		put_fs_long(signr, frame+1);
+	put_fs_long(regs->gs, frame+2);
+	put_fs_long(regs->fs, frame+3);
+	put_fs_long(regs->es, frame+4);
+	put_fs_long(regs->ds, frame+5);
+	put_fs_long(regs->edi, frame+6);
+	put_fs_long(regs->esi, frame+7);
+	put_fs_long(regs->ebp, frame+8);
+	put_fs_long((long)*fp, frame+9);
+	put_fs_long(regs->ebx, frame+10);
+	put_fs_long(regs->edx, frame+11);
+	put_fs_long(regs->ecx, frame+12);
+	put_fs_long(regs->eax, frame+13);
+	put_fs_long(current->tss.trap_no, frame+14);
+	put_fs_long(current->tss.error_code, frame+15);
+	put_fs_long(eip, frame+16);
+	put_fs_long(regs->cs, frame+17);
+	put_fs_long(regs->eflags, frame+18);
+	put_fs_long(regs->esp, frame+19);
+	put_fs_long(regs->ss, frame+20);
+	put_fs_long(0,frame+21);		/* 387 state pointer - not implemented*/
+/* non-iBCS2 extensions.. */
+	put_fs_long(oldmask, frame+22);
+	put_fs_long(current->tss.cr2, frame+23);
+/* set up the return code... */
+	put_fs_long(0x0000b858, CODE(0));	/* popl %eax ; movl $,%eax */
+	put_fs_long(0x80cd0000, CODE(4));	/* int $0x80 */
+	put_fs_long(__NR_sigreturn, CODE(2));
+	*fp = frame;
+#undef __CODE
+#undef CODE
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs)
+{
+	unsigned long mask = ~current->blocked;
+	unsigned long handler_signal = 0;
+	unsigned long *frame = NULL;
+	unsigned long eip = 0;
+	unsigned long signr;
+	struct sigaction * sa;
+
+	while ((signr = current->signal & mask)) {
+		__asm__("bsf %2,%1\n\t"
+			"btrl %1,%0"
+			:"=m" (current->signal),"=r" (signr)
+			:"1" (signr));
+		sa = current->sigaction + signr;
+		signr++;
+		if ((current->flags & PF_PTRACED) && signr != SIGKILL) {
+			current->exit_code = signr;
+			current->state = TASK_STOPPED;
+			notify_parent(current);
+			schedule();
+			if (!(signr = current->exit_code))
+				continue;
+			current->exit_code = 0;
+			if (signr == SIGSTOP)
+				continue;
+			if (_S(signr) & current->blocked) {
+				current->signal |= _S(signr);
+				continue;
+			}
+			sa = current->sigaction + signr - 1;
+		}
+		if (sa->sa_handler == SIG_IGN) {
+			if (signr != SIGCHLD)
+				continue;
+			/* check for SIGCHLD: it's special */
+			while (sys_waitpid(-1,NULL,WNOHANG) > 0)
+				/* nothing */;
+			continue;
+		}
+		if (sa->sa_handler == SIG_DFL) {
+			if (current->pid == 1)
+				continue;
+			switch (signr) {
+			case SIGCONT: case SIGCHLD: case SIGWINCH:
+				continue;
+
+			case SIGSTOP: case SIGTSTP: case SIGTTIN: case SIGTTOU:
+				if (current->flags & PF_PTRACED)
+					continue;
+				current->state = TASK_STOPPED;
+				current->exit_code = signr;
+				if (!(current->p_pptr->sigaction[SIGCHLD-1].sa_flags & 
+						SA_NOCLDSTOP))
+					notify_parent(current);
+				schedule();
+				continue;
+
+			case SIGQUIT: case SIGILL: case SIGTRAP:
+			case SIGIOT: case SIGFPE: case SIGSEGV:
+				if (current->binfmt && current->binfmt->core_dump) {
+					if (current->binfmt->core_dump(signr, regs))
+						signr |= 0x80;
+				}
+				/* fall through */
+			default:
+				current->signal |= _S(signr & 0x7f);
+				do_exit(signr);
+			}
+		}
+		/*
+		 * OK, we're invoking a handler
+		 */
+		if (regs->orig_eax >= 0) {
+			if (regs->eax == -ERESTARTNOHAND ||
+			   (regs->eax == -ERESTARTSYS && !(sa->sa_flags & SA_RESTART)))
+				regs->eax = -EINTR;
+		}
+		handler_signal |= 1 << (signr-1);
+		mask &= ~sa->sa_mask;
+	}
+	if (regs->orig_eax >= 0 &&
+	    (regs->eax == -ERESTARTNOHAND ||
+	     regs->eax == -ERESTARTSYS ||
+	     regs->eax == -ERESTARTNOINTR)) {
+		regs->eax = regs->orig_eax;
+		regs->eip -= 2;
+	}
+	if (!handler_signal)		/* no handler will be called - return 0 */
+		return 0;
+	eip = regs->eip;
+	frame = (unsigned long *) regs->esp;
+	signr = 1;
+	sa = current->sigaction;
+	for (mask = 1 ; mask ; sa++,signr++,mask += mask) {
+		if (mask > handler_signal)
+			break;
+		if (!(mask & handler_signal))
+			continue;
+		setup_frame(sa,&frame,eip,regs,signr,oldmask);
+		eip = (unsigned long) sa->sa_handler;
+		if (sa->sa_flags & SA_ONESHOT)
+			sa->sa_handler = NULL;
+/* force a supervisor-mode page-in of the signal handler to reduce races */
+		__asm__("testb $0,%%fs:%0": :"m" (*(char *) eip));
+		regs->cs = USER_CS; regs->ss = USER_DS;
+		regs->ds = USER_DS; regs->es = USER_DS;
+		regs->gs = USER_DS; regs->fs = USER_DS;
+		current->blocked |= sa->sa_mask;
+		oldmask |= sa->sa_mask;
+	}
+	regs->esp = (unsigned long) frame;
+	regs->eip = eip;		/* "return" to the first handler */
+	current->tss.trap_no = current->tss.error_code = 0;
+	return 1;
+}
diff --git a/arch/i386/traps.c b/arch/i386/traps.c
new file mode 100644
index 000000000..150b702b3
--- /dev/null
+++ b/arch/i386/traps.c
@@ -0,0 +1,245 @@
+/*
+ *  linux/kernel/traps.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'. Currently mostly a debugging-aid, will be extended
+ * to mainly kill the offending process (probably by giving it a signal,
+ * but possibly by killing it outright if necessary).
+ */
+#include <linux/head.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+
+static inline void console_verbose(void)
+{
+	extern int console_loglevel;
+	console_loglevel = 15;
+}
+
+#define DO_ERROR(trapnr, signr, str, name, tsk) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+	tsk->tss.error_code = error_code; \
+	tsk->tss.trap_no = trapnr; \
+	if (signr == SIGTRAP && current->flags & PF_PTRACED) \
+		current->blocked &= ~(1 << (SIGTRAP-1)); \
+	send_sig(signr, tsk, 1); \
+	die_if_kernel(str,regs,error_code); \
+}
+
+#define get_seg_byte(seg,addr) ({ \
+register unsigned char __res; \
+__asm__("push %%fs;mov %%ax,%%fs;movb %%fs:%2,%%al;pop %%fs" \
+	:"=a" (__res):"0" (seg),"m" (*(addr))); \
+__res;})
+
+#define get_seg_long(seg,addr) ({ \
+register unsigned long __res; \
+__asm__("push %%fs;mov %%ax,%%fs;movl %%fs:%2,%%eax;pop %%fs" \
+	:"=a" (__res):"0" (seg),"m" (*(addr))); \
+__res;})
+
+#define _fs() ({ \
+register unsigned short __res; \
+__asm__("mov %%fs,%%ax":"=a" (__res):); \
+__res;})
+
+void page_exception(void);
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void reserved(void);
+asmlinkage void alignment_check(void);
+
+/*static*/ void die_if_kernel(char * str, struct pt_regs * regs, long err)
+{
+	int i;
+	unsigned long esp;
+	unsigned short ss;
+
+	esp = (unsigned long) &regs->esp;
+	ss = KERNEL_DS;
+	if ((regs->eflags & VM_MASK) || (3 & regs->cs) == 3)
+		return;
+	if (regs->cs & 3) {
+		esp = regs->esp;
+		ss = regs->ss;
+	}
+	console_verbose();
+	printk("%s: %04lx\n", str, err & 0xffff);
+	printk("EIP:    %04x:%08lx\nEFLAGS: %08lx\n", 0xffff & regs->cs,regs->eip,regs->eflags);
+	printk("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+		regs->eax, regs->ebx, regs->ecx, regs->edx);
+	printk("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
+		regs->esi, regs->edi, regs->ebp, esp);
+	printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   ss: %04x\n",
+		regs->ds, regs->es, regs->fs, regs->gs, ss);
+	store_TR(i);
+	if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page)
+		printk("Corrupted stack page\n");
+	printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\nStack: ",
+		current->comm, current->pid, 0xffff & i, current->kernel_stack_page);
+	for(i=0;i<5;i++)
+		printk("%08lx ", get_seg_long(ss,(i+(unsigned long *)esp)));
+	printk("\nCode: ");
+	for(i=0;i<20;i++)
+		printk("%02x ",0xff & get_seg_byte(regs->cs,(i+(char *)regs->eip)));
+	printk("\n");
+	do_exit(SIGSEGV);
+}
+
+DO_ERROR( 0, SIGFPE,  "divide error", divide_error, current)
+DO_ERROR( 3, SIGTRAP, "int3", int3, current)
+DO_ERROR( 4, SIGSEGV, "overflow", overflow, current)
+DO_ERROR( 5, SIGSEGV, "bounds", bounds, current)
+DO_ERROR( 6, SIGILL,  "invalid operand", invalid_op, current)
+DO_ERROR( 7, SIGSEGV, "device not available", device_not_available, current)
+DO_ERROR( 8, SIGSEGV, "double fault", double_fault, current)
+DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun, last_task_used_math)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS, current)
+DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present, current)
+DO_ERROR(12, SIGBUS,  "stack segment", stack_segment, current)
+DO_ERROR(15, SIGSEGV, "reserved", reserved, current)
+DO_ERROR(17, SIGSEGV, "alignment check", alignment_check, current)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+	int signr = SIGSEGV;
+
+	if (regs->eflags & VM_MASK) {
+		handle_vm86_fault((struct vm86_regs *) regs, error_code);
+		return;
+	}
+	die_if_kernel("general protection",regs,error_code);
+	switch (get_seg_byte(regs->cs, (char *)regs->eip)) {
+		case 0xCD: /* INT */
+		case 0xF4: /* HLT */
+		case 0xFA: /* CLI */
+		case 0xFB: /* STI */
+			signr = SIGILL;
+	}
+	current->tss.error_code = error_code;
+	current->tss.trap_no = 13;
+	send_sig(signr, current, 1);	
+}
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+	printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+	printk("You probably have a hardware problem with your RAM chips\n");
+}
+
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+	if (regs->eflags & VM_MASK) {
+		handle_vm86_debug((struct vm86_regs *) regs, error_code);
+		return;
+	}
+	if (current->flags & PF_PTRACED)
+		current->blocked &= ~(1 << (SIGTRAP-1));
+	send_sig(SIGTRAP, current, 1);
+	current->tss.trap_no = 1;
+	current->tss.error_code = error_code;
+	if ((regs->cs & 3) == 0) {
+		/* If this is a kernel mode trap, then reset db7 and allow us to continue */
+		__asm__("movl %0,%%db7"
+			: /* no output */
+			: "r" (0));
+		return;
+	}
+	die_if_kernel("debug",regs,error_code);
+}
+
+/*
+ * Allow the process which triggered the interrupt to recover the error
+ * condition.
+ *  - the status word is saved in the cs selector.
+ *  - the tag word is saved in the operand selector.
+ *  - the status word is then cleared and the tags all set to Empty.
+ *
+ * This will give sufficient information for complete recovery provided that
+ * the affected process knows or can deduce the code and data segments
+ * which were in force when the exception condition arose.
+ *
+ * Note that we play around with the 'TS' bit to hopefully get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+void math_error(void)
+{
+	struct i387_hard_struct * env;
+
+	clts();
+	if (!last_task_used_math) {
+		__asm__("fnclex");
+		return;
+	}
+	env = &last_task_used_math->tss.i387.hard;
+	send_sig(SIGFPE, last_task_used_math, 1);
+	last_task_used_math->tss.trap_no = 16;
+	last_task_used_math->tss.error_code = 0;
+	__asm__ __volatile__("fnsave %0":"=m" (*env));
+	last_task_used_math = NULL;
+	stts();
+	env->fcs = (env->swd & 0x0000ffff) | (env->fcs & 0xffff0000);
+	env->fos = env->twd;
+	env->swd &= 0xffff3800;
+	env->twd = 0xffffffff;
+}
+
+asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+	ignore_irq13 = 1;
+	math_error();
+}
+
+void trap_init(void)
+{
+	int i;
+
+	set_trap_gate(0,&divide_error);
+	set_trap_gate(1,&debug);
+	set_trap_gate(2,&nmi);
+	set_system_gate(3,&int3);	/* int3-5 can be called from all */
+	set_system_gate(4,&overflow);
+	set_system_gate(5,&bounds);
+	set_trap_gate(6,&invalid_op);
+	set_trap_gate(7,&device_not_available);
+	set_trap_gate(8,&double_fault);
+	set_trap_gate(9,&coprocessor_segment_overrun);
+	set_trap_gate(10,&invalid_TSS);
+	set_trap_gate(11,&segment_not_present);
+	set_trap_gate(12,&stack_segment);
+	set_trap_gate(13,&general_protection);
+	set_trap_gate(14,&page_fault);
+	set_trap_gate(15,&reserved);
+	set_trap_gate(16,&coprocessor_error);
+	set_trap_gate(17,&alignment_check);
+	for (i=18;i<48;i++)
+		set_trap_gate(i,&reserved);
+}
diff --git a/arch/i386/vm86.c b/arch/i386/vm86.c
new file mode 100644
index 000000000..144d93a02
--- /dev/null
+++ b/arch/i386/vm86.c
@@ -0,0 +1,404 @@
+/*
+ *  linux/kernel/vm86.c
+ *
+ *  Copyright (C) 1994  Linus Torvalds
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/string.h>
+#include <linux/ptrace.h>
+
+#include <asm/segment.h>
+#include <asm/io.h>
+
+/*
+ * Known problems:
+ *
+ * Interrupt handling is not guaranteed:
+ * - a real x86 will disable all interrupts for one instruction
+ *   after a "mov ss,xx" to make stack handling atomic even without
+ *   the 'lss' instruction. We can't guarantee this in v86 mode,
+ *   as the next instruction might result in a page fault or similar.
+ * - a real x86 will have interrupts disabled for one instruction
+ *   past the 'sti' that enables them. We don't bother with all the
+ *   details yet..
+ *
+ * Hopefully these problems do not actually matter for anything.
+ */
+
+/*
+ * 8- and 16-bit register defines..
+ */
+#define AL(regs)	(((unsigned char *)&((regs)->eax))[0])
+#define AH(regs)	(((unsigned char *)&((regs)->eax))[1])
+#define IP(regs)	(*(unsigned short *)&((regs)->eip))
+#define SP(regs)	(*(unsigned short *)&((regs)->esp))
+
+/*
+ * virtual flags (16 and 32-bit versions)
+ */
+#define VFLAGS	(*(unsigned short *)&(current->v86flags))
+#define VEFLAGS	(current->v86flags)
+
+#define set_flags(X,new,mask) \
+((X) = ((X) & ~(mask)) | ((new) & (mask)))
+
+#define SAFE_MASK	(0xDD5)
+#define RETURN_MASK	(0xDFF)
+
+asmlinkage struct pt_regs * save_v86_state(struct vm86_regs * regs)
+{
+	unsigned long tmp;
+
+	if (!current->vm86_info) {
+		printk("no vm86_info: BAD\n");
+		do_exit(SIGSEGV);
+	}
+	set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->v86mask);
+	memcpy_tofs(&current->vm86_info->regs,regs,sizeof(*regs));
+	put_fs_long(current->screen_bitmap,&current->vm86_info->screen_bitmap);
+	tmp = current->tss.esp0;
+	current->tss.esp0 = current->saved_kernel_stack;
+	current->saved_kernel_stack = 0;
+	return (struct pt_regs *) tmp;
+}
+
+static void mark_screen_rdonly(struct task_struct * tsk)
+{
+	unsigned long tmp;
+	unsigned long *pg_table;
+
+	if ((tmp = tsk->tss.cr3) != 0) {
+		tmp = *(unsigned long *) tmp;
+		if (tmp & PAGE_PRESENT) {
+			tmp &= PAGE_MASK;
+			pg_table = (0xA0000 >> PAGE_SHIFT) + (unsigned long *) tmp;
+			tmp = 32;
+			while (tmp--) {
+				if (PAGE_PRESENT & *pg_table)
+					*pg_table &= ~PAGE_RW;
+				pg_table++;
+			}
+		}
+	}
+}
+
+asmlinkage int sys_vm86(struct vm86_struct * v86)
+{
+	struct vm86_struct info;
+	struct pt_regs * pt_regs = (struct pt_regs *) &v86;
+	int error;
+
+	if (current->saved_kernel_stack)
+		return -EPERM;
+	/* v86 must be readable (now) and writable (for save_v86_state) */
+	error = verify_area(VERIFY_WRITE,v86,sizeof(*v86));
+	if (error)
+		return error;
+	memcpy_fromfs(&info,v86,sizeof(info));
+/*
+ * make sure the vm86() system call doesn't try to do anything silly
+ */
+	info.regs.__null_ds = 0;
+	info.regs.__null_es = 0;
+	info.regs.__null_fs = 0;
+	info.regs.__null_gs = 0;
+/*
+ * The eflags register is also special: we cannot trust that the user
+ * has set it up safely, so this makes sure interrupt etc flags are
+ * inherited from protected mode.
+ */
+ 	VEFLAGS = info.regs.eflags;
+	info.regs.eflags &= SAFE_MASK;
+	info.regs.eflags |= pt_regs->eflags & ~SAFE_MASK;
+	info.regs.eflags |= VM_MASK;
+
+	switch (info.cpu_type) {
+		case CPU_286:
+			current->v86mask = 0;
+			break;
+		case CPU_386:
+			current->v86mask = NT_MASK | IOPL_MASK;
+			break;
+		case CPU_486:
+			current->v86mask = AC_MASK | NT_MASK | IOPL_MASK;
+			break;
+		default:
+			current->v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
+			break;
+	}
+
+/*
+ * Save old state, set default return value (%eax) to 0
+ */
+	pt_regs->eax = 0;
+	current->saved_kernel_stack = current->tss.esp0;
+	current->tss.esp0 = (unsigned long) pt_regs;
+	current->vm86_info = v86;
+
+	current->screen_bitmap = info.screen_bitmap;
+	if (info.flags & VM86_SCREEN_BITMAP)
+		mark_screen_rdonly(current);
+	__asm__ __volatile__("movl %0,%%esp\n\t"
+		"jmp ret_from_sys_call"
+		: /* no outputs */
+		:"r" (&info.regs));
+	return 0;
+}
+
+static inline void return_to_32bit(struct vm86_regs * regs16, int retval)
+{
+	struct pt_regs * regs32;
+
+	regs32 = save_v86_state(regs16);
+	regs32->eax = retval;
+	__asm__ __volatile__("movl %0,%%esp\n\t"
+		"jmp ret_from_sys_call"
+		: : "r" (regs32));
+}
+
+static inline void set_IF(struct vm86_regs * regs)
+{
+	VEFLAGS |= VIF_MASK;
+	if (VEFLAGS & VIP_MASK)
+		return_to_32bit(regs, VM86_STI);
+}
+
+static inline void clear_IF(struct vm86_regs * regs)
+{
+	VEFLAGS &= ~VIF_MASK;
+}
+
+static inline void clear_TF(struct vm86_regs * regs)
+{
+	regs->eflags &= ~TF_MASK;
+}
+
+static inline void set_vflags_long(unsigned long eflags, struct vm86_regs * regs)
+{
+	set_flags(VEFLAGS, eflags, current->v86mask);
+	set_flags(regs->eflags, eflags, SAFE_MASK);
+	if (eflags & IF_MASK)
+		set_IF(regs);
+}
+
+static inline void set_vflags_short(unsigned short flags, struct vm86_regs * regs)
+{
+	set_flags(VFLAGS, flags, current->v86mask);
+	set_flags(regs->eflags, flags, SAFE_MASK);
+	if (flags & IF_MASK)
+		set_IF(regs);
+}
+
+static inline unsigned long get_vflags(struct vm86_regs * regs)
+{
+	unsigned long flags = regs->eflags & RETURN_MASK;
+
+	if (VEFLAGS & VIF_MASK)
+		flags |= IF_MASK;
+	return flags | (VEFLAGS & current->v86mask);
+}
+
+static inline int is_revectored(int nr, struct revectored_struct * bitmap)
+{
+	__asm__ __volatile__("btl %2,%%fs:%1\n\tsbbl %0,%0"
+		:"=r" (nr)
+		:"m" (*bitmap),"r" (nr));
+	return nr;
+}
+
+/*
+ * Boy are these ugly, but we need to do the correct 16-bit arithmetic.
+ * Gcc makes a mess of it, so we do it inline and use non-obvious calling
+ * conventions..
+ */
+#define pushb(base, ptr, val) \
+__asm__ __volatile__( \
+	"decw %w0\n\t" \
+	"movb %2,%%fs:0(%1,%0)" \
+	: "=r" (ptr) \
+	: "r" (base), "q" (val), "0" (ptr))
+
+#define pushw(base, ptr, val) \
+__asm__ __volatile__( \
+	"decw %w0\n\t" \
+	"movb %h2,%%fs:0(%1,%0)\n\t" \
+	"decw %w0\n\t" \
+	"movb %b2,%%fs:0(%1,%0)" \
+	: "=r" (ptr) \
+	: "r" (base), "q" (val), "0" (ptr))
+
+#define pushl(base, ptr, val) \
+__asm__ __volatile__( \
+	"decw %w0\n\t" \
+	"rorl $16,%2\n\t" \
+	"movb %h2,%%fs:0(%1,%0)\n\t" \
+	"decw %w0\n\t" \
+	"movb %b2,%%fs:0(%1,%0)\n\t" \
+	"decw %w0\n\t" \
+	"rorl $16,%2\n\t" \
+	"movb %h2,%%fs:0(%1,%0)\n\t" \
+	"decw %w0\n\t" \
+	"movb %b2,%%fs:0(%1,%0)" \
+	: "=r" (ptr) \
+	: "r" (base), "q" (val), "0" (ptr))
+
+#define popb(base, ptr) \
+({ unsigned long __res; \
+__asm__ __volatile__( \
+	"movb %%fs:0(%1,%0),%b2\n\t" \
+	"incw %w0" \
+	: "=r" (ptr), "=r" (base), "=q" (__res) \
+	: "0" (ptr), "1" (base), "2" (0)); \
+__res; })
+
+#define popw(base, ptr) \
+({ unsigned long __res; \
+__asm__ __volatile__( \
+	"movb %%fs:0(%1,%0),%b2\n\t" \
+	"incw %w0\n\t" \
+	"movb %%fs:0(%1,%0),%h2\n\t" \
+	"incw %w0" \
+	: "=r" (ptr), "=r" (base), "=q" (__res) \
+	: "0" (ptr), "1" (base), "2" (0)); \
+__res; })
+
+#define popl(base, ptr) \
+({ unsigned long __res; \
+__asm__ __volatile__( \
+	"movb %%fs:0(%1,%0),%b2\n\t" \
+	"incw %w0\n\t" \
+	"movb %%fs:0(%1,%0),%h2\n\t" \
+	"incw %w0\n\t" \
+	"rorl $16,%2\n\t" \
+	"movb %%fs:0(%1,%0),%b2\n\t" \
+	"incw %w0\n\t" \
+	"movb %%fs:0(%1,%0),%h2\n\t" \
+	"incw %w0\n\t" \
+	"rorl $16,%2" \
+	: "=r" (ptr), "=r" (base), "=q" (__res) \
+	: "0" (ptr), "1" (base)); \
+__res; })
+
+static void do_int(struct vm86_regs *regs, int i, unsigned char * ssp, unsigned long sp)
+{
+	unsigned short seg = get_fs_word((void *) ((i<<2)+2));
+
+	if (seg == BIOSSEG || regs->cs == BIOSSEG ||
+	    is_revectored(i, &current->vm86_info->int_revectored))
+		return_to_32bit(regs, VM86_INTx + (i << 8));
+	if (i==0x21 && is_revectored(AH(regs),&current->vm86_info->int21_revectored))
+		return_to_32bit(regs, VM86_INTx + (i << 8));
+	pushw(ssp, sp, get_vflags(regs));
+	pushw(ssp, sp, regs->cs);
+	pushw(ssp, sp, IP(regs));
+	regs->cs = seg;
+	SP(regs) -= 6;
+	IP(regs) = get_fs_word((void *) (i<<2));
+	clear_TF(regs);
+	clear_IF(regs);
+	return;
+}
+
+void handle_vm86_debug(struct vm86_regs * regs, long error_code)
+{
+#if 0
+	do_int(regs, 1, (unsigned char *) (regs->ss << 4), SP(regs));
+#else
+	if (current->flags & PF_PTRACED)
+		current->blocked &= ~(1 << (SIGTRAP-1));
+	send_sig(SIGTRAP, current, 1);
+	current->tss.trap_no = 1;
+	current->tss.error_code = error_code;
+#endif
+}
+
+void handle_vm86_fault(struct vm86_regs * regs, long error_code)
+{
+	unsigned char *csp, *ssp;
+	unsigned long ip, sp;
+
+	csp = (unsigned char *) (regs->cs << 4);
+	ssp = (unsigned char *) (regs->ss << 4);
+	sp = SP(regs);
+	ip = IP(regs);
+
+	switch (popb(csp, ip)) {
+
+	/* operand size override */
+	case 0x66:
+		switch (popb(csp, ip)) {
+
+		/* pushfd */
+		case 0x9c:
+			SP(regs) -= 4;
+			IP(regs) += 2;
+			pushl(ssp, sp, get_vflags(regs));
+			return;
+
+		/* popfd */
+		case 0x9d:
+			SP(regs) += 4;
+			IP(regs) += 2;
+			set_vflags_long(popl(ssp, sp), regs);
+			return;
+		}
+
+	/* pushf */
+	case 0x9c:
+		SP(regs) -= 2;
+		IP(regs)++;
+		pushw(ssp, sp, get_vflags(regs));
+		return;
+
+	/* popf */
+	case 0x9d:
+		SP(regs) += 2;
+		IP(regs)++;
+		set_vflags_short(popw(ssp, sp), regs);
+		return;
+
+	/* int 3 */
+	case 0xcc:
+		IP(regs)++;
+		do_int(regs, 3, ssp, sp);
+		return;
+
+	/* int xx */
+	case 0xcd:
+		IP(regs) += 2;
+		do_int(regs, popb(csp, ip), ssp, sp);
+		return;
+
+	/* iret */
+	case 0xcf:
+		SP(regs) += 6;
+		IP(regs) = popw(ssp, sp);
+		regs->cs = popw(ssp, sp);
+		set_vflags_short(popw(ssp, sp), regs);
+		return;
+
+	/* cli */
+	case 0xfa:
+		IP(regs)++;
+		clear_IF(regs);
+		return;
+
+	/* sti */
+	/*
+	 * Damn. This is incorrect: the 'sti' instruction should actually
+	 * enable interrupts after the /next/ instruction. Not good.
+	 *
+	 * Probably needs some horsing around with the TF flag. Aiee..
+	 */
+	case 0xfb:
+		IP(regs)++;
+		set_IF(regs);
+		return;
+
+	default:
+		return_to_32bit(regs, VM86_UNKNOWN);
+	}
+}
author	Ralf Baechle <ralf@linux-mips.org>	1994-12-01 08:00:00 +0000
committer	<ralf@linux-mips.org>	1994-12-01 08:00:00 +0000
commit	90ecc248e200fee448001248dde0ca540dd3ef64 (patch)
tree	a3fe89494ce63b4835f0f9cf5c45e74cde88252b /arch/i386
parent	1513ff9b7899ab588401c89db0e99903dbf5f886 (diff)