summaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1995-11-14 08:00:00 +0000
committer <ralf@linux-mips.org>1995-11-14 08:00:00 +0000
commite7c2a72e2680827d6a733931273a93461c0d8d1b (patch)
treec9abeda78ef7504062bb2e816bcf3e3c9d680112 /arch/i386/kernel
parentec6044459060a8c9ce7f64405c465d141898548c (diff)
Import of Linux/MIPS 1.3.0
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile48
-rw-r--r--arch/i386/kernel/bios32.c457
-rw-r--r--arch/i386/kernel/entry.S547
-rw-r--r--arch/i386/kernel/head.S364
-rw-r--r--arch/i386/kernel/ioport.c89
-rw-r--r--arch/i386/kernel/irq.c366
-rw-r--r--arch/i386/kernel/ldt.c104
-rw-r--r--arch/i386/kernel/process.c288
-rw-r--r--arch/i386/kernel/ptrace.c545
-rw-r--r--arch/i386/kernel/setup.c181
-rw-r--r--arch/i386/kernel/signal.c260
-rw-r--r--arch/i386/kernel/traps.c349
-rw-r--r--arch/i386/kernel/vm86.c420
13 files changed, 4018 insertions, 0 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
new file mode 100644
index 000000000..701926d26
--- /dev/null
+++ b/arch/i386/kernel/Makefile
@@ -0,0 +1,48 @@
+#
+# Makefile for the linux kernel.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.s.o:
+ $(AS) -o $*.o $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+.S.s:
+ $(CPP) -D__ASSEMBLY__ -traditional $< -o $*.s
+.S.o:
+ $(CC) -D__ASSEMBLY__ -traditional -c $< -o $*.o
+
+OBJS = process.o signal.o entry.o traps.o irq.o vm86.o bios32.o ptrace.o ioport.o ldt.o setup.o
+
+all: kernel.o head.o
+
+head.o: head.s
+
+head.s: head.S $(TOPDIR)/include/linux/tasks.h
+ $(CPP) -traditional -o $*.s $<
+
+kernel.o: $(OBJS)
+ $(LD) -r -o kernel.o $(OBJS)
+ sync
+
+dep:
+ $(CPP) -M *.c > .depend
+
+modules:
+
+dummy:
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
+
+
diff --git a/arch/i386/kernel/bios32.c b/arch/i386/kernel/bios32.c
new file mode 100644
index 000000000..8cc3d76fa
--- /dev/null
+++ b/arch/i386/kernel/bios32.c
@@ -0,0 +1,457 @@
+/*
+ * bios32.c - BIOS32, PCI BIOS functions.
+ *
+ * Sponsored by
+ * iX Multiuser Multitasking Magazine
+ * Hannover, Germany
+ * hm@ix.de
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * Drew@Colorado.EDU
+ * +1 (303) 786-7975
+ *
+ * For more information, please consult
+ *
+ * PCI BIOS Specification Revision
+ * PCI Local Bus Specification
+ * PCI System Design Guide
+ *
+ * PCI Special Interest Group
+ * M/S HF3-15A
+ * 5200 N.E. Elam Young Parkway
+ * Hillsboro, Oregon 97124-6497
+ * +1 (503) 696-2000
+ * +1 (800) 433-5177
+ *
+ * Manuals are $25 each or $50 for all three, plus $7 shipping
+ * within the United States, $35 abroad.
+ *
+ *
+ * CHANGELOG :
+ * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
+ * Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
+ *
+ * Jan 5, 1995 : Modified to probe PCI hardware at boot time by Frederic
+ * Potter, potter@cao-vlsi.ibp.fr
+ *
+ * Jan 10, 1995 : Modified to store the information about configured pci
+ * devices into a list, which can be accessed via /proc/pci by
+ * Curtis Varner, cvarner@cs.ucr.edu
+ *
+ * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter.
+ * Alpha version. Intel & UMC chipset support only.
+ *
+ * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code
+ * moved to drivers/pci/pci.c.
+ *
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bios32.h>
+#include <linux/pci.h>
+
+#include <asm/segment.h>
+
+#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX
+#define PCIBIOS_PCI_BIOS_PRESENT 0xb101
+#define PCIBIOS_FIND_PCI_DEVICE 0xb102
+#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103
+#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106
+#define PCIBIOS_READ_CONFIG_BYTE 0xb108
+#define PCIBIOS_READ_CONFIG_WORD 0xb109
+#define PCIBIOS_READ_CONFIG_DWORD 0xb10a
+#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b
+#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c
+#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d
+
+
+/* BIOS32 signature: "_32_" */
+#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
+
+/* PCI signature: "PCI " */
+#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
+
+/* PCI service signature: "$PCI" */
+#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
+
+/*
+ * This is the standard structure used to identify the entry point
+ * to the BIOS32 Service Directory, as documented in
+ * Standard BIOS 32-bit Service Directory Proposal
+ * Revision 0.4 May 24, 1993
+ * Phoenix Technologies Ltd.
+ * Norwood, MA
+ * and the PCI BIOS specification.
+ */
+
+union bios32 {
+ struct {
+ unsigned long signature; /* _32_ */
+ unsigned long entry; /* 32 bit physical address */
+ unsigned char revision; /* Revision level, 0 */
+ unsigned char length; /* Length in paragraphs should be 01 */
+ unsigned char checksum; /* All bytes must add up to zero */
+ unsigned char reserved[5]; /* Must be zero */
+ } fields;
+ char chars[16];
+};
+
+/*
+ * Physical address of the service directory. I don't know if we're
+ * allowed to have more than one of these or not, so just in case
+ * we'll make pcibios_present() take a memory start parameter and store
+ * the array there.
+ */
+
+static unsigned long bios32_entry = 0;
+static struct {
+ unsigned long address;
+ unsigned short segment;
+} bios32_indirect = { 0, KERNEL_CS };
+
+#ifdef CONFIG_PCI
+/*
+ * Returns the entry point for the given service, NULL on error
+ */
+
+static unsigned long bios32_service(unsigned long service)
+{
+ unsigned char return_code; /* %al */
+ unsigned long address; /* %ebx */
+ unsigned long length; /* %ecx */
+ unsigned long entry; /* %edx */
+
+ __asm__("lcall (%%edi)"
+ : "=a" (return_code),
+ "=b" (address),
+ "=c" (length),
+ "=d" (entry)
+ : "0" (service),
+ "1" (0),
+ "D" (&bios32_indirect));
+
+ switch (return_code) {
+ case 0:
+ return address + entry;
+ case 0x80: /* Not present */
+ printk("bios32_service(%ld) : not present\n", service);
+ return 0;
+ default: /* Shouldn't happen */
+ printk("bios32_service(%ld) : returned 0x%x, mail drew@colorado.edu\n",
+ service, return_code);
+ return 0;
+ }
+}
+
+static long pcibios_entry = 0;
+static struct {
+ unsigned long address;
+ unsigned short segment;
+} pci_indirect = { 0, KERNEL_CS };
+
+
+extern unsigned long check_pcibios(unsigned long memory_start, unsigned long memory_end)
+{
+ unsigned long signature;
+ unsigned char present_status;
+ unsigned char major_revision;
+ unsigned char minor_revision;
+ int pack;
+
+ if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
+ pci_indirect.address = pcibios_entry;
+
+ __asm__("lcall (%%edi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:\tshl $8, %%eax\n\t"
+ "movw %%bx, %%ax"
+ : "=d" (signature),
+ "=a" (pack)
+ : "1" (PCIBIOS_PCI_BIOS_PRESENT),
+ "D" (&pci_indirect)
+ : "bx", "cx");
+
+ present_status = (pack >> 16) & 0xff;
+ major_revision = (pack >> 8) & 0xff;
+ minor_revision = pack & 0xff;
+ if (present_status || (signature != PCI_SIGNATURE)) {
+ printk ("pcibios_init : %s : BIOS32 Service Directory says PCI BIOS is present,\n"
+ " but PCI_BIOS_PRESENT subfunction fails with present status of 0x%x\n"
+ " and signature of 0x%08lx (%c%c%c%c). mail drew@Colorado.EDU\n",
+ (signature == PCI_SIGNATURE) ? "WARNING" : "ERROR",
+ present_status, signature,
+ (char) (signature >> 0), (char) (signature >> 8),
+ (char) (signature >> 16), (char) (signature >> 24));
+
+ if (signature != PCI_SIGNATURE)
+ pcibios_entry = 0;
+ }
+ if (pcibios_entry) {
+ printk ("pcibios_init : PCI BIOS revision %x.%02x entry at 0x%lx\n",
+ major_revision, minor_revision, pcibios_entry);
+ }
+ }
+ return memory_start;
+}
+
+int pcibios_present(void)
+{
+ return pcibios_entry ? 1 : 0;
+}
+
+int pcibios_find_class (unsigned int class_code, unsigned short index,
+ unsigned char *bus, unsigned char *device_fn)
+{
+ unsigned long bx;
+ unsigned long ret;
+
+ __asm__ ("lcall (%%edi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=b" (bx),
+ "=a" (ret)
+ : "1" (PCIBIOS_FIND_PCI_CLASS_CODE),
+ "c" (class_code),
+ "S" ((int) index),
+ "D" (&pci_indirect));
+ *bus = (bx >> 8) & 0xff;
+ *device_fn = bx & 0xff;
+ return (int) (ret & 0xff00) >> 8;
+}
+
+
+int pcibios_find_device (unsigned short vendor, unsigned short device_id,
+ unsigned short index, unsigned char *bus, unsigned char *device_fn)
+{
+ unsigned short bx;
+ unsigned short ret;
+
+ __asm__("lcall (%%edi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=b" (bx),
+ "=a" (ret)
+ : "1" (PCIBIOS_FIND_PCI_DEVICE),
+ "c" (device_id),
+ "d" (vendor),
+ "S" ((int) index),
+ "D" (&pci_indirect));
+ *bus = (bx >> 8) & 0xff;
+ *device_fn = bx & 0xff;
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_read_config_byte(unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned char *value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (ret)
+ : "1" (PCIBIOS_READ_CONFIG_BYTE),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_read_config_word (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned short *value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (ret)
+ : "1" (PCIBIOS_READ_CONFIG_WORD),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_read_config_dword (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned int *value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (ret)
+ : "1" (PCIBIOS_READ_CONFIG_DWORD),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_write_config_byte (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned char value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret)
+ : "0" (PCIBIOS_WRITE_CONFIG_BYTE),
+ "c" (value),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_write_config_word (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned short value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret)
+ : "0" (PCIBIOS_WRITE_CONFIG_WORD),
+ "c" (value),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_write_config_dword (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned int value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret)
+ : "0" (PCIBIOS_WRITE_CONFIG_DWORD),
+ "c" (value),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+char *pcibios_strerror (int error)
+{
+ static char buf[80];
+
+ switch (error) {
+ case PCIBIOS_SUCCESSFUL:
+ return "SUCCESSFUL";
+
+ case PCIBIOS_FUNC_NOT_SUPPORTED:
+ return "FUNC_NOT_SUPPORTED";
+
+ case PCIBIOS_BAD_VENDOR_ID:
+ return "SUCCESSFUL";
+
+ case PCIBIOS_DEVICE_NOT_FOUND:
+ return "DEVICE_NOT_FOUND";
+
+ case PCIBIOS_BAD_REGISTER_NUMBER:
+ return "BAD_REGISTER_NUMBER";
+
+ default:
+ sprintf (buf, "UNKNOWN RETURN 0x%x", error);
+ return buf;
+ }
+}
+
+
+unsigned long pcibios_fixup(unsigned long mem_start, unsigned long mem_end)
+{
+return mem_start;
+}
+
+
+#endif
+
+unsigned long pcibios_init(unsigned long memory_start, unsigned long memory_end)
+{
+ union bios32 *check;
+ unsigned char sum;
+ int i, length;
+
+ /*
+ * Follow the standard procedure for locating the BIOS32 Service
+ * directory by scanning the permissible address range from
+ * 0xe0000 through 0xfffff for a valid BIOS32 structure.
+ *
+ */
+
+ for (check = (union bios32 *) 0xe0000; check <= (union bios32 *) 0xffff0; ++check) {
+ if (check->fields.signature != BIOS32_SIGNATURE)
+ continue;
+ length = check->fields.length * 16;
+ if (!length)
+ continue;
+ sum = 0;
+ for (i = 0; i < length ; ++i)
+ sum += check->chars[i];
+ if (sum != 0)
+ continue;
+ if (check->fields.revision != 0) {
+ printk("pcibios_init : unsupported revision %d at 0x%p, mail drew@colorado.edu\n",
+ check->fields.revision, check);
+ continue;
+ }
+ printk ("pcibios_init : BIOS32 Service Directory structure at 0x%p\n", check);
+ if (!bios32_entry) {
+ if (check->fields.entry >= 0x100000) {
+ printk("pcibios_init: entry in high memory, unable to access\n");
+ } else {
+ bios32_indirect.address = bios32_entry = check->fields.entry;
+ printk ("pcibios_init : BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+ }
+ } else {
+ printk ("pcibios_init : multiple entries, mail drew@colorado.edu\n");
+ /*
+ * Jeremy Fitzhardinge reports at least one PCI BIOS
+ * with two different service directories, and as both
+ * worked for him, we'll just mention the fact, and
+ * not actually disallow it..
+ */
+ }
+ }
+#ifdef CONFIG_PCI
+ if (bios32_entry) {
+ memory_start = check_pcibios (memory_start, memory_end);
+ }
+#endif
+ return memory_start;
+}
+
+
+
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
new file mode 100644
index 000000000..1bdf062c8
--- /dev/null
+++ b/arch/i386/kernel/entry.S
@@ -0,0 +1,547 @@
+/*
+ * linux/arch/i386/entry.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all interrupts
+ * and faults that can result in a task-switch.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after a timer-interrupt and after each system call.
+ *
+ * I changed all the .align's to 4 (16 byte alignment), as that's faster
+ * on a 486.
+ *
+ * Stack layout in 'ret_from_system_call':
+ * ptrace needs to have all regs on the stack.
+ * if the order here is changed, it needs to be
+ * updated in fork.c:copy_process, signal.c:do_signal,
+ * ptrace.c and ptrace.h
+ *
+ * 0(%esp) - %ebx
+ * 4(%esp) - %ecx
+ * 8(%esp) - %edx
+ * C(%esp) - %esi
+ * 10(%esp) - %edi
+ * 14(%esp) - %ebp
+ * 18(%esp) - %eax
+ * 1C(%esp) - %ds
+ * 20(%esp) - %es
+ * 24(%esp) - %fs
+ * 28(%esp) - %gs
+ * 2C(%esp) - orig_eax
+ * 30(%esp) - %eip
+ * 34(%esp) - %cs
+ * 38(%esp) - %eflags
+ * 3C(%esp) - %oldesp
+ * 40(%esp) - %oldss
+ */
+
+#include <linux/sys.h>
+#include <asm/segment.h>
+
+EBX = 0x00
+ECX = 0x04
+EDX = 0x08
+ESI = 0x0C
+EDI = 0x10
+EBP = 0x14
+EAX = 0x18
+DS = 0x1C
+ES = 0x20
+FS = 0x24
+GS = 0x28
+ORIG_EAX = 0x2C
+EIP = 0x30
+CS = 0x34
+EFLAGS = 0x38
+OLDESP = 0x3C
+OLDSS = 0x40
+
+CF_MASK = 0x00000001
+IF_MASK = 0x00000200
+NT_MASK = 0x00004000
+VM_MASK = 0x00020000
+
+/*
+ * these are offsets into the task-struct.
+ */
+state = 0
+counter = 4
+priority = 8
+signal = 12
+blocked = 16
+flags = 20
+errno = 24
+dbgreg6 = 52
+dbgreg7 = 56
+exec_domain = 60
+
+ENOSYS = 38
+
+.globl _system_call,_lcall7
+.globl _device_not_available, _coprocessor_error
+.globl _divide_error,_debug,_nmi,_int3,_overflow,_bounds,_invalid_op
+.globl _double_fault,_coprocessor_segment_overrun
+.globl _invalid_TSS,_segment_not_present,_stack_segment
+.globl _general_protection,_reserved
+.globl _alignment_check,_page_fault
+.globl ret_from_sys_call, _sys_call_table
+
+#define SAVE_ALL \
+ cld; \
+ push %gs; \
+ push %fs; \
+ push %es; \
+ push %ds; \
+ pushl %eax; \
+ pushl %ebp; \
+ pushl %edi; \
+ pushl %esi; \
+ pushl %edx; \
+ pushl %ecx; \
+ pushl %ebx; \
+ movl $(KERNEL_DS),%edx; \
+ mov %dx,%ds; \
+ mov %dx,%es; \
+ movl $(USER_DS),%edx; \
+ mov %dx,%fs;
+
+#define RESTORE_ALL \
+ cmpw $(KERNEL_CS),CS(%esp); \
+ je 1f; \
+ movl _current,%eax; \
+ movl dbgreg7(%eax),%ebx; \
+ movl %ebx,%db7; \
+1: popl %ebx; \
+ popl %ecx; \
+ popl %edx; \
+ popl %esi; \
+ popl %edi; \
+ popl %ebp; \
+ popl %eax; \
+ pop %ds; \
+ pop %es; \
+ pop %fs; \
+ pop %gs; \
+ addl $4,%esp; \
+ iret
+
+.align 4
+_lcall7:
+ pushfl # We get a different stack layout with call gates,
+ pushl %eax # which has to be cleaned up later..
+ SAVE_ALL
+ movl EIP(%esp),%eax # due to call gates, this is eflags, not eip..
+ movl CS(%esp),%edx # this is eip..
+ movl EFLAGS(%esp),%ecx # and this is cs..
+ movl %eax,EFLAGS(%esp) #
+ movl %edx,EIP(%esp) # Now we move them to their "normal" places
+ movl %ecx,CS(%esp) #
+ movl %esp,%eax
+ movl _current,%edx
+ pushl %eax
+ movl exec_domain(%edx),%edx # Get the execution domain
+ movl 4(%edx),%edx # Get the lcall7 handler for the domain
+ call *%edx
+ popl %eax
+ jmp ret_from_sys_call
+
+.align 4
+handle_bottom_half:
+ pushfl
+ incl _intr_count
+ sti
+ call _do_bottom_half
+ popfl
+ decl _intr_count
+ jmp 9f
+.align 4
+reschedule:
+ pushl $ret_from_sys_call
+ jmp _schedule
+.align 4
+_system_call:
+ pushl %eax # save orig_eax
+ SAVE_ALL
+ movl $-ENOSYS,EAX(%esp)
+ cmpl $(NR_syscalls),%eax
+ jae ret_from_sys_call
+ movl _sys_call_table(,%eax,4),%eax
+ testl %eax,%eax
+ je ret_from_sys_call
+ movl _current,%ebx
+ andl $~CF_MASK,EFLAGS(%esp) # clear carry - assume no errors
+ movl $0,errno(%ebx)
+ movl %db6,%edx
+ movl %edx,dbgreg6(%ebx) # save current hardware debugging status
+ testb $0x20,flags(%ebx) # PF_TRACESYS
+ jne 1f
+ call *%eax
+ movl %eax,EAX(%esp) # save the return value
+ movl errno(%ebx),%edx
+ negl %edx
+ je ret_from_sys_call
+ movl %edx,EAX(%esp)
+ orl $(CF_MASK),EFLAGS(%esp) # set carry to indicate error
+ jmp ret_from_sys_call
+.align 4
+1: call _syscall_trace
+ movl ORIG_EAX(%esp),%eax
+ call _sys_call_table(,%eax,4)
+ movl %eax,EAX(%esp) # save the return value
+ movl _current,%eax
+ movl errno(%eax),%edx
+ negl %edx
+ je 1f
+ movl %edx,EAX(%esp)
+ orl $(CF_MASK),EFLAGS(%esp) # set carry to indicate error
+1: call _syscall_trace
+
+ .align 4,0x90
+ret_from_sys_call:
+ cmpl $0,_intr_count
+ jne 2f
+9: movl _bh_mask,%eax
+ andl _bh_active,%eax
+ jne handle_bottom_half
+ movl EFLAGS(%esp),%eax # check VM86 flag: CS/SS are
+ testl $(VM_MASK),%eax # different then
+ jne 1f
+ cmpw $(KERNEL_CS),CS(%esp) # was old code segment supervisor ?
+ je 2f
+1: sti
+ orl $(IF_MASK),%eax # these just try to make sure
+ andl $~NT_MASK,%eax # the program doesn't do anything
+ movl %eax,EFLAGS(%esp) # stupid
+ cmpl $0,_need_resched
+ jne reschedule
+ movl _current,%eax
+ cmpl _task,%eax # task[0] cannot have signals
+ je 2f
+ cmpl $0,state(%eax) # state
+ jne reschedule
+ cmpl $0,counter(%eax) # counter
+ je reschedule
+ movl blocked(%eax),%ecx
+ movl %ecx,%ebx # save blocked in %ebx for signal handling
+ notl %ecx
+ andl signal(%eax),%ecx
+ jne signal_return
+2: RESTORE_ALL
+.align 4
+signal_return:
+ movl %esp,%ecx
+ pushl %ecx
+ testl $(VM_MASK),EFLAGS(%ecx)
+ jne v86_signal_return
+ pushl %ebx
+ call _do_signal
+ popl %ebx
+ popl %ebx
+ RESTORE_ALL
+.align 4
+v86_signal_return:
+ call _save_v86_state
+ movl %eax,%esp
+ pushl %eax
+ pushl %ebx
+ call _do_signal
+ popl %ebx
+ popl %ebx
+ RESTORE_ALL
+
+.align 4
+_divide_error:
+ pushl $0 # no error code
+ pushl $_do_divide_error
+.align 4,0x90
+error_code:
+ push %fs
+ push %es
+ push %ds
+ pushl %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
+ movl $0,%eax
+ movl %eax,%db7 # disable hardware debugging...
+ cld
+ movl $-1, %eax
+ xchgl %eax, ORIG_EAX(%esp) # orig_eax (get the error code. )
+ xorl %ebx,%ebx # zero ebx
+ mov %gs,%bx # get the lower order bits of gs
+ xchgl %ebx, GS(%esp) # get the address and save gs.
+ pushl %eax # push the error code
+ lea 4(%esp),%edx
+ pushl %edx
+ movl $(KERNEL_DS),%edx
+ mov %dx,%ds
+ mov %dx,%es
+ movl $(USER_DS),%edx
+ mov %dx,%fs
+ pushl %eax
+ movl _current,%eax
+ movl %db6,%edx
+ movl %edx,dbgreg6(%eax) # save current hardware debugging status
+ popl %eax
+ call *%ebx
+ addl $8,%esp
+ jmp ret_from_sys_call
+
+.align 4
+_coprocessor_error:
+ pushl $0
+ pushl $_do_coprocessor_error
+ jmp error_code
+
+.align 4
+_device_not_available:
+ pushl $-1 # mark this as an int
+ SAVE_ALL
+ pushl $ret_from_sys_call
+ movl %cr0,%eax
+ testl $0x4,%eax # EM (math emulation bit)
+ je _math_state_restore
+ pushl $0 # temporary storage for ORIG_EIP
+ call _math_emulate
+ addl $4,%esp
+ ret
+
+.align 4
+_debug:
+ pushl $0
+ pushl $_do_debug
+ jmp error_code
+
+.align 4
+_nmi:
+ pushl $0
+ pushl $_do_nmi
+ jmp error_code
+
+.align 4
+_int3:
+ pushl $0
+ pushl $_do_int3
+ jmp error_code
+
+.align 4
+_overflow:
+ pushl $0
+ pushl $_do_overflow
+ jmp error_code
+
+.align 4
+_bounds:
+ pushl $0
+ pushl $_do_bounds
+ jmp error_code
+
+.align 4
+_invalid_op:
+ pushl $0
+ pushl $_do_invalid_op
+ jmp error_code
+
+.align 4
+_coprocessor_segment_overrun:
+ pushl $0
+ pushl $_do_coprocessor_segment_overrun
+ jmp error_code
+
+.align 4
+_reserved:
+ pushl $0
+ pushl $_do_reserved
+ jmp error_code
+
+.align 4
+_double_fault:
+ pushl $_do_double_fault
+ jmp error_code
+
+.align 4
+_invalid_TSS:
+ pushl $_do_invalid_TSS
+ jmp error_code
+
+.align 4
+_segment_not_present:
+ pushl $_do_segment_not_present
+ jmp error_code
+
+.align 4
+_stack_segment:
+ pushl $_do_stack_segment
+ jmp error_code
+
+.align 4
+_general_protection:
+ pushl $_do_general_protection
+ jmp error_code
+
+.align 4
+_alignment_check:
+ pushl $_do_alignment_check
+ jmp error_code
+
+.align 4
+_page_fault:
+ pushl $_do_page_fault
+ jmp error_code
+
+.data
+.align 4
+_sys_call_table:
+ .long _sys_setup /* 0 */
+ .long _sys_exit
+ .long _sys_fork
+ .long _sys_read
+ .long _sys_write
+ .long _sys_open /* 5 */
+ .long _sys_close
+ .long _sys_waitpid
+ .long _sys_creat
+ .long _sys_link
+ .long _sys_unlink /* 10 */
+ .long _sys_execve
+ .long _sys_chdir
+ .long _sys_time
+ .long _sys_mknod
+ .long _sys_chmod /* 15 */
+ .long _sys_chown
+ .long _sys_break
+ .long _sys_stat
+ .long _sys_lseek
+ .long _sys_getpid /* 20 */
+ .long _sys_mount
+ .long _sys_umount
+ .long _sys_setuid
+ .long _sys_getuid
+ .long _sys_stime /* 25 */
+ .long _sys_ptrace
+ .long _sys_alarm
+ .long _sys_fstat
+ .long _sys_pause
+ .long _sys_utime /* 30 */
+ .long _sys_stty
+ .long _sys_gtty
+ .long _sys_access
+ .long _sys_nice
+ .long _sys_ftime /* 35 */
+ .long _sys_sync
+ .long _sys_kill
+ .long _sys_rename
+ .long _sys_mkdir
+ .long _sys_rmdir /* 40 */
+ .long _sys_dup
+ .long _sys_pipe
+ .long _sys_times
+ .long _sys_prof
+ .long _sys_brk /* 45 */
+ .long _sys_setgid
+ .long _sys_getgid
+ .long _sys_signal
+ .long _sys_geteuid
+ .long _sys_getegid /* 50 */
+ .long _sys_acct
+ .long _sys_phys
+ .long _sys_lock
+ .long _sys_ioctl
+ .long _sys_fcntl /* 55 */
+ .long _sys_mpx
+ .long _sys_setpgid
+ .long _sys_ulimit
+ .long _sys_olduname
+ .long _sys_umask /* 60 */
+ .long _sys_chroot
+ .long _sys_ustat
+ .long _sys_dup2
+ .long _sys_getppid
+ .long _sys_getpgrp /* 65 */
+ .long _sys_setsid
+ .long _sys_sigaction
+ .long _sys_sgetmask
+ .long _sys_ssetmask
+ .long _sys_setreuid /* 70 */
+ .long _sys_setregid
+ .long _sys_sigsuspend
+ .long _sys_sigpending
+ .long _sys_sethostname
+ .long _sys_setrlimit /* 75 */
+ .long _sys_getrlimit
+ .long _sys_getrusage
+ .long _sys_gettimeofday
+ .long _sys_settimeofday
+ .long _sys_getgroups /* 80 */
+ .long _sys_setgroups
+ .long _old_select
+ .long _sys_symlink
+ .long _sys_lstat
+ .long _sys_readlink /* 85 */
+ .long _sys_uselib
+ .long _sys_swapon
+ .long _sys_reboot
+ .long _old_readdir
+ .long _sys_mmap /* 90 */
+ .long _sys_munmap
+ .long _sys_truncate
+ .long _sys_ftruncate
+ .long _sys_fchmod
+ .long _sys_fchown /* 95 */
+ .long _sys_getpriority
+ .long _sys_setpriority
+ .long _sys_profil
+ .long _sys_statfs
+ .long _sys_fstatfs /* 100 */
+ .long _sys_ioperm
+ .long _sys_socketcall
+ .long _sys_syslog
+ .long _sys_setitimer
+ .long _sys_getitimer /* 105 */
+ .long _sys_newstat
+ .long _sys_newlstat
+ .long _sys_newfstat
+ .long _sys_uname
+ .long _sys_iopl /* 110 */
+ .long _sys_vhangup
+ .long _sys_idle
+ .long _sys_vm86
+ .long _sys_wait4
+ .long _sys_swapoff /* 115 */
+ .long _sys_sysinfo
+ .long _sys_ipc
+ .long _sys_fsync
+ .long _sys_sigreturn
+ .long _sys_clone /* 120 */
+ .long _sys_setdomainname
+ .long _sys_newuname
+ .long _sys_modify_ldt
+ .long _sys_adjtimex
+ .long _sys_mprotect /* 125 */
+ .long _sys_sigprocmask
+ .long _sys_create_module
+ .long _sys_init_module
+ .long _sys_delete_module
+ .long _sys_get_kernel_syms /* 130 */
+ .long _sys_quotactl
+ .long _sys_getpgid
+ .long _sys_fchdir
+ .long _sys_bdflush
+ .long _sys_sysfs /* 135 */
+ .long _sys_personality
+ .long 0 /* for afs_syscall */
+ .long _sys_setfsuid
+ .long _sys_setfsgid
+ .long _sys_llseek /* 140 */
+ .long _sys_getdents
+ .long _sys_select
+ .long _sys_flock
+ .space (NR_syscalls-140)*4
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
new file mode 100644
index 000000000..a3e0df213
--- /dev/null
+++ b/arch/i386/kernel/head.S
@@ -0,0 +1,364 @@
+/*
+ * linux/arch/i386/head.S
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * head.S contains the 32-bit startup code.
+ */
+
+.text
+.globl _idt,_gdt,_stext,__stext
+.globl _swapper_pg_dir,_pg0
+.globl _empty_bad_page
+.globl _empty_bad_page_table
+.globl _empty_zero_page
+.globl _floppy_track_buffer
+
+#define __ASSEMBLY__
+#include <linux/tasks.h>
+#include <linux/fd.h>
+#include <asm/segment.h>
+
+#define CL_MAGIC_ADDR 0x90020
+#define CL_MAGIC 0xA33F
+#define CL_BASE_ADDR 0x90000
+#define CL_OFFSET 0x90022
+
+/*
+ * swapper_pg_dir is the main page directory, address 0x00001000 (or at
+ * address 0x00101000 for a compressed boot).
+ */
+_stext:
+__stext:
+startup_32:
+ cld
+ movl $(KERNEL_DS),%eax
+ mov %ax,%ds
+ mov %ax,%es
+ mov %ax,%fs
+ mov %ax,%gs
+ lss stack_start,%esp
+/*
+ * Clear BSS first so that there are no surprises...
+ */
+ xorl %eax,%eax
+ movl $__edata,%edi
+ movl $__end,%ecx
+ subl %edi,%ecx
+ cld
+ rep
+ stosb
+/*
+ * start system 32-bit setup. We need to re-do some of the things done
+ * in 16-bit mode for the "real" operations.
+ */
+ call setup_idt
+ xorl %eax,%eax
+1: incl %eax # check that A20 really IS enabled
+ movl %eax,0x000000 # loop forever if it isn't
+ cmpl %eax,0x100000
+ je 1b
+/*
+ * Initialize eflags. Some BIOS's leave bits like NT set. This would
+ * confuse the debugger if this code is traced.
+ * XXX - best to initialize before switching to protected mode.
+ */
+ pushl $0
+ popfl
+/*
+ * Copy bootup parameters out of the way. First 2kB of
+ * _empty_zero_page is for boot parameters, second 2kB
+ * is for the command line.
+ */
+ movl $0x90000,%esi
+ movl $_empty_zero_page,%edi
+ movl $512,%ecx
+ cld
+ rep
+ movsl
+ xorl %eax,%eax
+ movl $512,%ecx
+ rep
+ stosl
+ cmpw $(CL_MAGIC),CL_MAGIC_ADDR
+ jne 1f
+ movl $_empty_zero_page+2048,%edi
+ movzwl CL_OFFSET,%esi
+ addl $(CL_BASE_ADDR),%esi
+ movl $2048,%ecx
+ rep
+ movsb
+1:
+/* check if it is 486 or 386. */
+/*
+ * XXX - this does a lot of unnecessary setup. Alignment checks don't
+ * apply at our cpl of 0 and the stack ought to be aligned already, and
+ * we don't need to preserve eflags.
+ */
+ movl $3,_x86
+ pushfl # push EFLAGS
+ popl %eax # get EFLAGS
+ movl %eax,%ecx # save original EFLAGS
+ xorl $0x40000,%eax # flip AC bit in EFLAGS
+ pushl %eax # copy to EFLAGS
+ popfl # set EFLAGS
+ pushfl # get new EFLAGS
+ popl %eax # put it in eax
+ xorl %ecx,%eax # change in flags
+ andl $0x40000,%eax # check if AC bit changed
+ je is386
+ movl $4,_x86
+ movl %ecx,%eax
+ xorl $0x200000,%eax # check ID flag
+ pushl %eax
+ popfl # if we are on a straight 486DX, SX, or
+ pushfl # 487SX we can't change it
+ popl %eax
+ xorl %ecx,%eax
+ andl $0x200000,%eax
+ je is486
+isnew: pushl %ecx # restore original EFLAGS
+ popfl
+ /* get processor type */
+ movl $1, %eax # Use the CPUID instruction to
+ .byte 0x0f, 0xa2 # check the processor type
+ movb %al, %cl # save reg for future use
+ andb $0x0f,%ah # mask processor family
+ movb %ah, _x86
+ andb $0xf0, %eax # mask model
+ shrb $4, %al
+ movb %al, _x86_model
+ andb $0x0f, %cl # mask mask revision
+ movb %cl, _x86_mask
+ movl %edx, _x86_capability
+ /* get vendor info */
+ xorl %eax, %eax # call CPUID with 0 -> return vendor ID
+ .byte 0x0f, 0xa2 # CPUID
+ movl %ebx, _x86_vendor_id # lo 4 chars
+ movl %edx, _x86_vendor_id+4 # next 4 chars
+ movl %ecx, _x86_vendor_id+8 # last 4 chars
+
+ movl %cr0,%eax # 486+
+ andl $0x80000011,%eax # Save PG,PE,ET
+ orl $0x50022,%eax # set AM, WP, NE and MP
+ jmp 2f
+is486: pushl %ecx # restore original EFLAGS
+ popfl
+ movl %cr0,%eax # 486
+ andl $0x80000011,%eax # Save PG,PE,ET
+ orl $0x50022,%eax # set AM, WP, NE and MP
+ jmp 2f
+is386: pushl %ecx # restore original EFLAGS
+ popfl
+ movl %cr0,%eax # 386
+ andl $0x80000011,%eax # Save PG,PE,ET
+ orl $2,%eax # set MP
+2: movl %eax,%cr0
+ call check_x87
+ call setup_paging
+ lgdt gdt_descr
+ lidt idt_descr
+ ljmp $(KERNEL_CS),$1f
+1: movl $(KERNEL_DS),%eax # reload all the segment registers
+ mov %ax,%ds # after changing gdt.
+ mov %ax,%es
+ mov %ax,%fs
+ mov %ax,%gs
+ lss stack_start,%esp
+ xorl %eax,%eax
+ lldt %ax
+ pushl %eax # These are the parameters to main :-)
+ pushl %eax
+ pushl %eax
+ cld # gcc2 wants the direction flag cleared at all times
+ call _start_kernel
+L6:
+ jmp L6 # main should never return here, but
+ # just in case, we know what happens.
+
+/*
+ * We depend on ET to be correct. This checks for 287/387.
+ */
+check_x87:
+ movb $0,_hard_math
+ clts
+ fninit
+ fstsw %ax
+ cmpb $0,%al
+ je 1f
+ movl %cr0,%eax /* no coprocessor: have to set bits */
+ xorl $4,%eax /* set EM */
+ movl %eax,%cr0
+ ret
+.align 2
+1: movb $1,_hard_math
+ .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
+ ret
+
+/*
+ * setup_idt
+ *
+ * sets up a idt with 256 entries pointing to
+ * ignore_int, interrupt gates. It doesn't actually load
+ * idt - that can be done only after paging has been enabled
+ * and the kernel moved to 0xC0000000. Interrupts
+ * are enabled elsewhere, when we can be relatively
+ * sure everything is ok.
+ */
+setup_idt:
+ lea ignore_int,%edx
+ movl $(KERNEL_CS << 16),%eax
+ movw %dx,%ax /* selector = 0x0010 = cs */
+ movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
+
+ lea _idt,%edi
+ mov $256,%ecx
+rp_sidt:
+ movl %eax,(%edi)
+ movl %edx,4(%edi)
+ addl $8,%edi
+ dec %ecx
+ jne rp_sidt
+ ret
+
+
+/*
+ * Setup_paging
+ *
+ * This routine sets up paging by setting the page bit
+ * in cr0. The page tables are set up, identity-mapping
+ * the first 4MB. The rest are initialized later.
+ *
+ * (ref: added support for up to 32mb, 17Apr92) -- Rik Faith
+ * (ref: update, 25Sept92) -- croutons@crunchy.uucp
+ * (ref: 92.10.11 - Linus Torvalds. Corrected 16M limit - no upper memory limit)
+ */
+.align 2
+setup_paging:
+ movl $1024*2,%ecx /* 2 pages - swapper_pg_dir+1 page table */
+ xorl %eax,%eax
+ movl $_swapper_pg_dir,%edi /* swapper_pg_dir is at 0x1000 */
+ cld;rep;stosl
+/* Identity-map the kernel in low 4MB memory for ease of transition */
+ movl $_pg0+7,_swapper_pg_dir /* set present bit/user r/w */
+/* But the real place is at 0xC0000000 */
+ movl $_pg0+7,_swapper_pg_dir+3072 /* set present bit/user r/w */
+ movl $_pg0+4092,%edi
+ movl $0x03ff007,%eax /* 4Mb - 4096 + 7 (r/w user,p) */
+ std
+1: stosl /* fill the page backwards - more efficient :-) */
+ subl $0x1000,%eax
+ jge 1b
+ cld
+ movl $_swapper_pg_dir,%eax
+ movl %eax,%cr3 /* cr3 - page directory start */
+ movl %cr0,%eax
+ orl $0x80000000,%eax
+ movl %eax,%cr0 /* set paging (PG) bit */
+ ret /* this also flushes the prefetch-queue */
+
+/*
+ * page 0 is made non-existent, so that kernel NULL pointer references get
+ * caught. Thus the swapper page directory has been moved to 0x1000
+ *
+ * XXX Actually, the swapper page directory is at 0x1000 plus 1 megabyte,
+ * with the introduction of the compressed boot code. Theoretically,
+ * the original design of overlaying the startup code with the swapper
+ * page directory is still possible --- it would reduce the size of the kernel
+ * by 2-3k. This would be a good thing to do at some point.....
+ */
+.org 0x1000
+_swapper_pg_dir:
+/*
+ * The page tables are initialized to only 4MB here - the final page
+ * tables are set up later depending on memory size.
+ */
+.org 0x2000
+_pg0:
+
+.org 0x3000
+_empty_bad_page:
+
+.org 0x4000
+_empty_bad_page_table:
+
+.org 0x5000
+_empty_zero_page:
+
+.org 0x6000
+/*
+ * floppy_track_buffer is used to buffer one track of floppy data: it
+ * has to be separate from the tmp_floppy area, as otherwise a single-
+ * sector read/write can mess it up. It can contain one full cylinder (sic) of
+ * data (36*2*512 bytes).
+ */
+_floppy_track_buffer:
+ .fill 512*2*MAX_BUFFER_SECTORS,1,0
+
+stack_start:
+ .long _init_user_stack+4096
+ .long KERNEL_DS
+
+/* This is the default interrupt "handler" :-) */
+int_msg:
+ .asciz "Unknown interrupt\n"
+.align 2
+ignore_int:
+ cld
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ push %ds
+ push %es
+ push %fs
+ movl $(KERNEL_DS),%eax
+ mov %ax,%ds
+ mov %ax,%es
+ mov %ax,%fs
+ pushl $int_msg
+ call _printk
+ popl %eax
+ pop %fs
+ pop %es
+ pop %ds
+ popl %edx
+ popl %ecx
+ popl %eax
+ iret
+
+/*
+ * The interrupt descriptor table has room for 256 idt's
+ */
+.align 4
+.word 0
+idt_descr:
+ .word 256*8-1 # idt contains 256 entries
+ .long 0xc0000000+_idt
+
+.align 4
+_idt:
+ .fill 256,8,0 # idt is uninitialized
+
+.align 4
+.word 0
+gdt_descr:
+ .word (8+2*NR_TASKS)*8-1
+ .long 0xc0000000+_gdt
+
+/*
+ * This gdt setup gives the kernel a 1GB address space at virtual
+ * address 0xC0000000 - space enough for expansion, I hope.
+ */
+.align 4
+_gdt:
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x0000000000000000 /* not used */
+ .quad 0xc0c39a000000ffff /* 0x10 kernel 1GB code at 0xC0000000 */
+ .quad 0xc0c392000000ffff /* 0x18 kernel 1GB data at 0xC0000000 */
+ .quad 0x00cbfa000000ffff /* 0x23 user 3GB code at 0x00000000 */
+ .quad 0x00cbf2000000ffff /* 0x2b user 3GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* not used */
+ .quad 0x0000000000000000 /* not used */
+ .fill 2*NR_TASKS,8,0 /* space for LDT's and TSS's etc */
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
new file mode 100644
index 000000000..c949f70cd
--- /dev/null
+++ b/arch/i386/kernel/ioport.c
@@ -0,0 +1,89 @@
+/*
+ * linux/arch/i386/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value)
+{
+ int mask;
+ unsigned long *bitmap_base = bitmap + (base >> 5);
+ unsigned short low_index = base & 0x1f;
+ int length = low_index + extent;
+
+ if (low_index != 0) {
+ mask = (~0 << low_index);
+ if (length < 32)
+ mask &= ~(~0 << length);
+ if (new_value)
+ *bitmap_base++ |= mask;
+ else
+ *bitmap_base++ &= ~mask;
+ length -= 32;
+ }
+
+ mask = (new_value ? ~0 : 0);
+ while (length >= 32) {
+ *bitmap_base++ = mask;
+ length -= 32;
+ }
+
+ if (length > 0) {
+ mask = ~(~0 << length);
+ if (new_value)
+ *bitmap_base++ |= mask;
+ else
+ *bitmap_base++ &= ~mask;
+ }
+}
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+ if (from + num <= from)
+ return -EINVAL;
+ if (from + num > IO_BITMAP_SIZE*32)
+ return -EINVAL;
+ if (!suser())
+ return -EPERM;
+
+ set_bitmap((unsigned long *)current->tss.io_bitmap, from, num, !turn_on);
+ return 0;
+}
+
+unsigned int *stack;
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ * Here we just change the eflags value on the stack: we allow
+ * only the super-user to do it. This depends on the stack-layout
+ * on system-call entry - see also fork() and the signal handling
+ * code.
+ */
+asmlinkage int sys_iopl(long ebx,long ecx,long edx,
+ long esi, long edi, long ebp, long eax, long ds,
+ long es, long fs, long gs, long orig_eax,
+ long eip,long cs,long eflags,long esp,long ss)
+{
+ unsigned int level = ebx;
+
+ if (level > 3)
+ return -EINVAL;
+ if (!suser())
+ return -EPERM;
+ *(&eflags) = (eflags & 0xffffcfff) | (level << 12);
+ return 0;
+}
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
new file mode 100644
index 000000000..23085dc6c
--- /dev/null
+++ b/arch/i386/kernel/irq.c
@@ -0,0 +1,366 @@
+/*
+ * linux/arch/i386/kernel/irq.c
+ *
+ * Copyright (C) 1992 Linus Torvalds
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * IRQ's are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/bitops.h>
+
+#define CR0_NE 32
+
+static unsigned char cache_21 = 0xff;
+static unsigned char cache_A1 = 0xff;
+
+void disable_irq(unsigned int irq_nr)
+{
+ unsigned long flags;
+ unsigned char mask;
+
+ mask = 1 << (irq_nr & 7);
+ save_flags(flags);
+ if (irq_nr < 8) {
+ cli();
+ cache_21 |= mask;
+ outb(cache_21,0x21);
+ restore_flags(flags);
+ return;
+ }
+ cli();
+ cache_A1 |= mask;
+ outb(cache_A1,0xA1);
+ restore_flags(flags);
+}
+
+void enable_irq(unsigned int irq_nr)
+{
+ unsigned long flags;
+ unsigned char mask;
+
+ mask = ~(1 << (irq_nr & 7));
+ save_flags(flags);
+ if (irq_nr < 8) {
+ cli();
+ cache_21 &= mask;
+ outb(cache_21,0x21);
+ restore_flags(flags);
+ return;
+ }
+ cli();
+ cache_A1 &= mask;
+ outb(cache_A1,0xA1);
+ restore_flags(flags);
+}
+
+/*
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that do all
+ * the operations that are needed to keep the AT interrupt-controller
+ * happy. They are also written to be fast - and to disable interrupts
+ * as little as humanly possible.
+ *
+ * NOTE! These macros expand to three different handlers for each line: one
+ * complete handler that does all the fancy stuff (including signal handling),
+ * and one fast handler that is meant for simple IRQ's that want to be
+ * atomic. The specific handler is chosen depending on the SA_INTERRUPT
+ * flag when installing a handler. Finally, one "bad interrupt" handler, that
+ * is used when no handler is present.
+ */
+BUILD_IRQ(FIRST,0,0x01)
+BUILD_IRQ(FIRST,1,0x02)
+BUILD_IRQ(FIRST,2,0x04)
+BUILD_IRQ(FIRST,3,0x08)
+BUILD_IRQ(FIRST,4,0x10)
+BUILD_IRQ(FIRST,5,0x20)
+BUILD_IRQ(FIRST,6,0x40)
+BUILD_IRQ(FIRST,7,0x80)
+BUILD_IRQ(SECOND,8,0x01)
+BUILD_IRQ(SECOND,9,0x02)
+BUILD_IRQ(SECOND,10,0x04)
+BUILD_IRQ(SECOND,11,0x08)
+BUILD_IRQ(SECOND,12,0x10)
+BUILD_IRQ(SECOND,13,0x20)
+BUILD_IRQ(SECOND,14,0x40)
+BUILD_IRQ(SECOND,15,0x80)
+
+/*
+ * Pointers to the low-level handlers: first the general ones, then the
+ * fast ones, then the bad ones.
+ */
+static void (*interrupt[16])(void) = {
+ IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt,
+ IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt,
+ IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt,
+ IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt
+};
+
+static void (*fast_interrupt[16])(void) = {
+ fast_IRQ0_interrupt, fast_IRQ1_interrupt,
+ fast_IRQ2_interrupt, fast_IRQ3_interrupt,
+ fast_IRQ4_interrupt, fast_IRQ5_interrupt,
+ fast_IRQ6_interrupt, fast_IRQ7_interrupt,
+ fast_IRQ8_interrupt, fast_IRQ9_interrupt,
+ fast_IRQ10_interrupt, fast_IRQ11_interrupt,
+ fast_IRQ12_interrupt, fast_IRQ13_interrupt,
+ fast_IRQ14_interrupt, fast_IRQ15_interrupt
+};
+
+static void (*bad_interrupt[16])(void) = {
+ bad_IRQ0_interrupt, bad_IRQ1_interrupt,
+ bad_IRQ2_interrupt, bad_IRQ3_interrupt,
+ bad_IRQ4_interrupt, bad_IRQ5_interrupt,
+ bad_IRQ6_interrupt, bad_IRQ7_interrupt,
+ bad_IRQ8_interrupt, bad_IRQ9_interrupt,
+ bad_IRQ10_interrupt, bad_IRQ11_interrupt,
+ bad_IRQ12_interrupt, bad_IRQ13_interrupt,
+ bad_IRQ14_interrupt, bad_IRQ15_interrupt
+};
+
+/*
+ * Initial irq handlers.
+ */
+struct irqaction {
+ void (*handler)(int, struct pt_regs *);
+ unsigned long flags;
+ unsigned long mask;
+ const char *name;
+};
+
+static struct irqaction irq_action[16] = {
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }
+};
+
+int get_irq_list(char *buf)
+{
+ int i, len = 0;
+ struct irqaction * action = irq_action;
+
+ for (i = 0 ; i < 16 ; i++, action++) {
+ if (!action->handler)
+ continue;
+ len += sprintf(buf+len, "%2d: %8d %c %s\n",
+ i, kstat.interrupts[i],
+ (action->flags & SA_INTERRUPT) ? '+' : ' ',
+ action->name);
+ }
+ return len;
+}
+
+/*
+ * do_IRQ handles IRQ's that have been installed without the
+ * SA_INTERRUPT flag: it uses the full signal-handling return
+ * and runs with other interrupts enabled. All relatively slow
+ * IRQ's should use this format: notably the keyboard/timer
+ * routines.
+ */
+asmlinkage void do_IRQ(int irq, struct pt_regs * regs)
+{
+ struct irqaction * action = irq + irq_action;
+
+ kstat.interrupts[irq]++;
+ action->handler(irq, regs);
+}
+
+/*
+ * do_fast_IRQ handles IRQ's that don't need the fancy interrupt return
+ * stuff - the handler is also running with interrupts disabled unless
+ * it explicitly enables them later.
+ */
+asmlinkage void do_fast_IRQ(int irq)
+{
+ struct irqaction * action = irq + irq_action;
+
+ kstat.interrupts[irq]++;
+ action->handler(irq, NULL);
+}
+
+#define SA_PROBE SA_ONESHOT
+
+int request_irq(unsigned int irq, void (*handler)(int, struct pt_regs *),
+ unsigned long irqflags, const char * devname)
+{
+ struct irqaction * action;
+ unsigned long flags;
+
+ if (irq > 15)
+ return -EINVAL;
+ action = irq + irq_action;
+ if (action->handler)
+ return -EBUSY;
+ if (!handler)
+ return -EINVAL;
+ save_flags(flags);
+ cli();
+ action->handler = handler;
+ action->flags = irqflags;
+ action->mask = 0;
+ action->name = devname;
+ if (!(action->flags & SA_PROBE)) { /* SA_ONESHOT is used by probing */
+ if (action->flags & SA_INTERRUPT)
+ set_intr_gate(0x20+irq,fast_interrupt[irq]);
+ else
+ set_intr_gate(0x20+irq,interrupt[irq]);
+ }
+ if (irq < 8) {
+ cache_21 &= ~(1<<irq);
+ outb(cache_21,0x21);
+ } else {
+ cache_21 &= ~(1<<2);
+ cache_A1 &= ~(1<<(irq-8));
+ outb(cache_21,0x21);
+ outb(cache_A1,0xA1);
+ }
+ restore_flags(flags);
+ return 0;
+}
+
+void free_irq(unsigned int irq)
+{
+ struct irqaction * action = irq + irq_action;
+ unsigned long flags;
+
+ if (irq > 15) {
+ printk("Trying to free IRQ%d\n",irq);
+ return;
+ }
+ if (!action->handler) {
+ printk("Trying to free free IRQ%d\n",irq);
+ return;
+ }
+ save_flags(flags);
+ cli();
+ if (irq < 8) {
+ cache_21 |= 1 << irq;
+ outb(cache_21,0x21);
+ } else {
+ cache_A1 |= 1 << (irq-8);
+ outb(cache_A1,0xA1);
+ }
+ set_intr_gate(0x20+irq,bad_interrupt[irq]);
+ action->handler = NULL;
+ action->flags = 0;
+ action->mask = 0;
+ action->name = NULL;
+ restore_flags(flags);
+}
+
+/*
+ * Note that on a 486, we don't want to do a SIGFPE on a irq13
+ * as the irq is unreliable, and exception 16 works correctly
+ * (ie as explained in the intel literature). On a 386, you
+ * can't use exception 16 due to bad IBM design, so we have to
+ * rely on the less exact irq13.
+ *
+ * Careful.. Not only is IRQ13 unreliable, but it is also
+ * leads to races. IBM designers who came up with it should
+ * be shot.
+ */
+static void math_error_irq(int cpl, struct pt_regs *regs)
+{
+ outb(0,0xF0);
+ if (ignore_irq13 || !hard_math)
+ return;
+ math_error();
+}
+
+static void no_action(int cpl, struct pt_regs * regs) { }
+
+unsigned int probe_irq_on (void)
+{
+ unsigned int i, irqs = 0, irqmask;
+ unsigned long delay;
+
+ /* first, snaffle up any unassigned irqs */
+ for (i = 15; i > 0; i--) {
+ if (!request_irq(i, no_action, SA_PROBE, "probe")) {
+ enable_irq(i);
+ irqs |= (1 << i);
+ }
+ }
+
+ /* wait for spurious interrupts to mask themselves out again */
+ for (delay = jiffies + 2; delay > jiffies; ); /* min 10ms delay */
+
+ /* now filter out any obviously spurious interrupts */
+ irqmask = (((unsigned int)cache_A1)<<8) | (unsigned int)cache_21;
+ for (i = 15; i > 0; i--) {
+ if (irqs & (1 << i) & irqmask) {
+ irqs ^= (1 << i);
+ free_irq(i);
+ }
+ }
+#ifdef DEBUG
+ printk("probe_irq_on: irqs=0x%04x irqmask=0x%04x\n", irqs, irqmask);
+#endif
+ return irqs;
+}
+
+int probe_irq_off (unsigned int irqs)
+{
+ unsigned int i, irqmask;
+
+ irqmask = (((unsigned int)cache_A1)<<8) | (unsigned int)cache_21;
+ for (i = 15; i > 0; i--) {
+ if (irqs & (1 << i)) {
+ free_irq(i);
+ }
+ }
+#ifdef DEBUG
+ printk("probe_irq_off: irqs=0x%04x irqmask=0x%04x\n", irqs, irqmask);
+#endif
+ irqs &= irqmask;
+ if (!irqs)
+ return 0;
+ i = ffz(~irqs);
+ if (irqs != (irqs & (1 << i)))
+ i = -i;
+ return i;
+}
+
+void init_IRQ(void)
+{
+ int i;
+
+ /* set the clock to 100 Hz */
+ outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff , 0x40); /* LSB */
+ outb(LATCH >> 8 , 0x40); /* MSB */
+ for (i = 0; i < 16 ; i++)
+ set_intr_gate(0x20+i,bad_interrupt[i]);
+ if (request_irq(2, no_action, SA_INTERRUPT, "cascade"))
+ printk("Unable to get IRQ2 for cascade\n");
+ if (request_irq(13,math_error_irq, 0, "math error"))
+ printk("Unable to get IRQ13 for math-error handler\n");
+ request_region(0x20,0x20,"pic1");
+ request_region(0xa0,0x20,"pic2");
+}
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
new file mode 100644
index 000000000..bace95f4e
--- /dev/null
+++ b/arch/i386/kernel/ldt.c
@@ -0,0 +1,104 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <linux/ldt.h>
+
+static int read_ldt(void * ptr, unsigned long bytecount)
+{
+ int error;
+ void * address = current->ldt;
+ unsigned long size;
+
+ if (!ptr)
+ return -EINVAL;
+ size = LDT_ENTRIES*LDT_ENTRY_SIZE;
+ if (!address) {
+ address = &default_ldt;
+ size = sizeof(default_ldt);
+ }
+ if (size > bytecount)
+ size = bytecount;
+ error = verify_area(VERIFY_WRITE, ptr, size);
+ if (error)
+ return error;
+ memcpy_tofs(ptr, address, size);
+ return size;
+}
+
+static int write_ldt(void * ptr, unsigned long bytecount)
+{
+ struct modify_ldt_ldt_s ldt_info;
+ unsigned long *lp;
+ unsigned long base, limit;
+ int error, i;
+
+ if (bytecount != sizeof(ldt_info))
+ return -EINVAL;
+ error = verify_area(VERIFY_READ, ptr, sizeof(ldt_info));
+ if (error)
+ return error;
+
+ memcpy_fromfs(&ldt_info, ptr, sizeof(ldt_info));
+
+ if (ldt_info.contents == 3 || ldt_info.entry_number >= LDT_ENTRIES)
+ return -EINVAL;
+
+ limit = ldt_info.limit;
+ base = ldt_info.base_addr;
+ if (ldt_info.limit_in_pages)
+ limit *= PAGE_SIZE;
+
+ limit += base;
+ if (limit < base || limit >= 0xC0000000)
+ return -EINVAL;
+
+ if (!current->ldt) {
+ for (i=1 ; i<NR_TASKS ; i++) {
+ if (task[i] == current) {
+ if (!(current->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE)))
+ return -ENOMEM;
+ memset(current->ldt, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, current->ldt, LDT_ENTRIES);
+ load_ldt(i);
+ }
+ }
+ }
+
+ lp = (unsigned long *) &current->ldt[ldt_info.entry_number];
+ /* Allow LDTs to be cleared by the user. */
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+ *lp = 0;
+ *(lp+1) = 0;
+ return 0;
+ }
+ *lp = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+ (ldt_info.limit & 0x0ffff);
+ *(lp+1) = (ldt_info.base_addr & 0xff000000) |
+ ((ldt_info.base_addr & 0x00ff0000)>>16) |
+ (ldt_info.limit & 0xf0000) |
+ (ldt_info.contents << 10) |
+ ((ldt_info.read_exec_only ^ 1) << 9) |
+ (ldt_info.seg_32bit << 22) |
+ (ldt_info.limit_in_pages << 23) |
+ ((ldt_info.seg_not_present ^1) << 15) |
+ 0x7000;
+ return 0;
+}
+
+asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
+{
+ if (func == 0)
+ return read_ldt(ptr, bytecount);
+ if (func == 1)
+ return write_ldt(ptr, bytecount);
+ return -ENOSYS;
+}
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
new file mode 100644
index 000000000..a5e8777bf
--- /dev/null
+++ b/arch/i386/kernel/process.c
@@ -0,0 +1,288 @@
+/*
+ * linux/arch/i386/kernel/process.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/malloc.h>
+#include <linux/ldt.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+
+#include <asm/segment.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+asmlinkage void ret_from_sys_call(void) __asm__("ret_from_sys_call");
+
+static int hlt_counter=0;
+
+void disable_hlt(void)
+{
+ hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+ hlt_counter--;
+}
+
+asmlinkage int sys_pipe(unsigned long * fildes)
+{
+ int fd[2];
+ int error;
+
+ error = verify_area(VERIFY_WRITE,fildes,8);
+ if (error)
+ return error;
+ error = do_pipe(fd);
+ if (error)
+ return error;
+ put_fs_long(fd[0],0+fildes);
+ put_fs_long(fd[1],1+fildes);
+ return 0;
+}
+
+/*
+ * The idle loop on a i386..
+ */
+asmlinkage int sys_idle(void)
+{
+ int i;
+ pmd_t * pmd;
+
+ if (current->pid != 0)
+ return -EPERM;
+
+ /* Map out the low memory: it's no longer needed */
+ pmd = pmd_offset(swapper_pg_dir, 0);
+ for (i = 0 ; i < 768 ; i++)
+ pmd_clear(pmd++);
+
+ /* endless idle loop with no priority at all */
+ current->counter = -100;
+ for (;;) {
+ if (hlt_works_ok && !hlt_counter && !need_resched)
+ __asm__("hlt");
+ schedule();
+ }
+}
+
+/*
+ * This routine reboots the machine by asking the keyboard
+ * controller to pulse the reset-line low. We try that for a while,
+ * and if it doesn't work, we do some other stupid things.
+ */
+static long no_idt[2] = {0, 0};
+
+static inline void kb_wait(void)
+{
+ int i;
+
+ for (i=0; i<0x10000; i++)
+ if ((inb_p(0x64) & 0x02) == 0)
+ break;
+}
+
+void hard_reset_now(void)
+{
+ int i, j;
+
+ sti();
+/* rebooting needs to touch the page at absolute addr 0 */
+ pg0[0] = 7;
+ *((unsigned short *)0x472) = 0x1234;
+ for (;;) {
+ for (i=0; i<100; i++) {
+ kb_wait();
+ for(j = 0; j < 100000 ; j++)
+ /* nothing */;
+ outb(0xfe,0x64); /* pulse reset low */
+ }
+ __asm__ __volatile__("\tlidt %0": "=m" (no_idt));
+ }
+}
+
+void show_regs(struct pt_regs * regs)
+{
+ printk("\n");
+ printk("EIP: %04x:%08lx",0xffff & regs->cs,regs->eip);
+ if (regs->cs & 3)
+ printk(" ESP: %04x:%08lx",0xffff & regs->ss,regs->esp);
+ printk(" EFLAGS: %08lx\n",regs->eflags);
+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+ regs->eax,regs->ebx,regs->ecx,regs->edx);
+ printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+ regs->esi, regs->edi, regs->ebp);
+ printk(" DS: %04x ES: %04x FS: %04x GS: %04x\n",
+ 0xffff & regs->ds,0xffff & regs->es,
+ 0xffff & regs->fs,0xffff & regs->gs);
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+ /* forget local segments */
+ __asm__ __volatile__("mov %w0,%%fs ; mov %w0,%%gs ; lldt %w0"
+ : /* no outputs */
+ : "r" (0));
+ current->tss.ldt = 0;
+ if (current->ldt) {
+ void * ldt = current->ldt;
+ current->ldt = NULL;
+ vfree(ldt);
+ }
+}
+
+void flush_thread(void)
+{
+ int i;
+
+ if (current->ldt) {
+ free_page((unsigned long) current->ldt);
+ current->ldt = NULL;
+ for (i=1 ; i<NR_TASKS ; i++) {
+ if (task[i] == current) {
+ set_ldt_desc(gdt+(i<<1)+
+ FIRST_LDT_ENTRY,&default_ldt, 1);
+ load_ldt(i);
+ }
+ }
+ }
+
+ for (i=0 ; i<8 ; i++)
+ current->debugreg[i] = 0;
+}
+
+void copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
+ struct task_struct * p, struct pt_regs * regs)
+{
+ int i;
+ struct pt_regs * childregs;
+
+ p->tss.es = KERNEL_DS;
+ p->tss.cs = KERNEL_CS;
+ p->tss.ss = KERNEL_DS;
+ p->tss.ds = KERNEL_DS;
+ p->tss.fs = USER_DS;
+ p->tss.gs = KERNEL_DS;
+ p->tss.ss0 = KERNEL_DS;
+ p->tss.esp0 = p->kernel_stack_page + PAGE_SIZE;
+ p->tss.tr = _TSS(nr);
+ childregs = ((struct pt_regs *) (p->kernel_stack_page + PAGE_SIZE)) - 1;
+ p->tss.esp = (unsigned long) childregs;
+ p->tss.eip = (unsigned long) ret_from_sys_call;
+ *childregs = *regs;
+ childregs->eax = 0;
+ childregs->esp = esp;
+ p->tss.back_link = 0;
+ p->tss.eflags = regs->eflags & 0xffffcfff; /* iopl is always 0 for a new process */
+ p->tss.ldt = _LDT(nr);
+ if (p->ldt) {
+ p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+ if (p->ldt != NULL)
+ memcpy(p->ldt, current->ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ }
+ set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss));
+ if (p->ldt)
+ set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,p->ldt, 512);
+ else
+ set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&default_ldt, 1);
+ p->tss.bitmap = offsetof(struct thread_struct,io_bitmap);
+ for (i = 0; i < IO_BITMAP_SIZE+1 ; i++) /* IO bitmap is actually SIZE+1 */
+ p->tss.io_bitmap[i] = ~0;
+ if (last_task_used_math == current)
+ __asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387));
+}
+
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+ int i;
+
+/* changed the size calculations - should hopefully work better. lbt */
+ dump->magic = CMAGIC;
+ dump->start_code = 0;
+ dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
+ dump->u_tsize = ((unsigned long) current->mm->end_code) >> 12;
+ dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> 12;
+ dump->u_dsize -= dump->u_tsize;
+ dump->u_ssize = 0;
+ for (i = 0; i < 8; i++)
+ dump->u_debugreg[i] = current->debugreg[i];
+
+ if (dump->start_stack < TASK_SIZE)
+ dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> 12;
+
+ dump->regs = *regs;
+
+/* Flag indicating the math stuff is valid. We don't support this for the
+ soft-float routines yet */
+ if (hard_math) {
+ if ((dump->u_fpvalid = current->used_math) != 0) {
+ if (last_task_used_math == current)
+ __asm__("clts ; fnsave %0": :"m" (dump->i387));
+ else
+ memcpy(&dump->i387,&current->tss.i387.hard,sizeof(dump->i387));
+ }
+ } else {
+ /* we should dump the emulator state here, but we need to
+ convert it into standard 387 format first.. */
+ dump->u_fpvalid = 0;
+ }
+}
+
+asmlinkage int sys_fork(struct pt_regs regs)
+{
+ return do_fork(COPYVM | SIGCHLD, regs.esp, &regs);
+}
+
+asmlinkage int sys_clone(struct pt_regs regs)
+{
+#ifdef CLONE_ACTUALLY_WORKS_OK
+ unsigned long clone_flags;
+ unsigned long newsp;
+
+ newsp = regs.ebx;
+ clone_flags = regs.ecx;
+ if (!newsp)
+ newsp = regs.esp;
+ if (newsp == regs.esp)
+ clone_flags |= COPYVM;
+ return do_fork(clone_flags, newsp, &regs);
+#else
+ return -ENOSYS;
+#endif
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage int sys_execve(struct pt_regs regs)
+{
+ int error;
+ char * filename;
+
+ error = getname((char *) regs.ebx, &filename);
+ if (error)
+ return error;
+ error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);
+ putname(filename);
+ return error;
+}
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
new file mode 100644
index 000000000..f32035edc
--- /dev/null
+++ b/arch/i386/kernel/ptrace.c
@@ -0,0 +1,545 @@
+/* ptrace.c */
+/* By Ross Biro 1/23/92 */
+/* edited by Linus Torvalds */
+
+#include <linux/head.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/debugreg.h>
+
+#include <asm/segment.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/* determines which flags the user has access to. */
+/* 1 = access 0 = no access */
+#define FLAG_MASK 0x00044dd5
+
+/* set's the trap flag. */
+#define TRAP_FLAG 0x100
+
+/*
+ * this is the number to subtract from the top of the stack. To find
+ * the local frame.
+ */
+#define MAGICNUMBER 68
+
+/* change a pid into a task struct. */
+static inline struct task_struct * get_task(int pid)
+{
+ int i;
+
+ for (i = 1; i < NR_TASKS; i++) {
+ if (task[i] != NULL && (task[i]->pid == pid))
+ return task[i];
+ }
+ return NULL;
+}
+
+/*
+ * this routine will get a word off of the processes privileged stack.
+ * the offset is how far from the base addr as stored in the TSS.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int get_stack_long(struct task_struct *task, int offset)
+{
+ unsigned char *stack;
+
+ stack = (unsigned char *)task->tss.esp0;
+ stack += offset;
+ return (*((int *)stack));
+}
+
+/*
+ * this routine will put a word on the processes privileged stack.
+ * the offset is how far from the base addr as stored in the TSS.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int put_stack_long(struct task_struct *task, int offset,
+ unsigned long data)
+{
+ unsigned char * stack;
+
+ stack = (unsigned char *) task->tss.esp0;
+ stack += offset;
+ *(unsigned long *) stack = data;
+ return 0;
+}
+
+/*
+ * This routine gets a long from any process space by following the page
+ * tables. NOTE! You should check that the long isn't on a page boundary,
+ * and that it is in the task area before calling this: this routine does
+ * no checking.
+ */
+static unsigned long get_long(struct vm_area_struct * vma, unsigned long addr)
+{
+ pgd_t * pgdir;
+ pmd_t * pgmiddle;
+ pte_t * pgtable;
+ unsigned long page;
+
+repeat:
+ pgdir = pgd_offset(vma->vm_task, addr);
+ if (pgd_none(*pgdir)) {
+ do_no_page(vma, addr, 0);
+ goto repeat;
+ }
+ if (pgd_bad(*pgdir)) {
+ printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir));
+ pgd_clear(pgdir);
+ return 0;
+ }
+ pgmiddle = pmd_offset(pgdir, addr);
+ if (pmd_none(*pgmiddle)) {
+ do_no_page(vma, addr, 0);
+ goto repeat;
+ }
+ if (pmd_bad(*pgmiddle)) {
+ printk("ptrace: bad page middle %08lx\n", pmd_val(*pgmiddle));
+ pmd_clear(pgmiddle);
+ return 0;
+ }
+ pgtable = pte_offset(pgmiddle, addr);
+ if (!pte_present(*pgtable)) {
+ do_no_page(vma, addr, 0);
+ goto repeat;
+ }
+ page = pte_page(*pgtable);
+/* this is a hack for non-kernel-mapped video buffers and similar */
+ if (page >= high_memory)
+ return 0;
+ page += addr & ~PAGE_MASK;
+ return *(unsigned long *) page;
+}
+
+/*
+ * This routine puts a long into any process space by following the page
+ * tables. NOTE! You should check that the long isn't on a page boundary,
+ * and that it is in the task area before calling this: this routine does
+ * no checking.
+ *
+ * Now keeps R/W state of page so that a text page stays readonly
+ * even if a debugger scribbles breakpoints into it. -M.U-
+ */
+static void put_long(struct vm_area_struct * vma, unsigned long addr,
+ unsigned long data)
+{
+ pgd_t *pgdir;
+ pmd_t *pgmiddle;
+ pte_t *pgtable;
+ unsigned long page;
+
+repeat:
+ pgdir = pgd_offset(vma->vm_task, addr);
+ if (!pgd_present(*pgdir)) {
+ do_no_page(vma, addr, 1);
+ goto repeat;
+ }
+ if (pgd_bad(*pgdir)) {
+ printk("ptrace: bad page directory %08lx\n", pgd_val(*pgdir));
+ pgd_clear(pgdir);
+ return;
+ }
+ pgmiddle = pmd_offset(pgdir, addr);
+ if (pmd_none(*pgmiddle)) {
+ do_no_page(vma, addr, 1);
+ goto repeat;
+ }
+ if (pmd_bad(*pgmiddle)) {
+ printk("ptrace: bad page middle %08lx\n", pmd_val(*pgmiddle));
+ pmd_clear(pgmiddle);
+ return;
+ }
+ pgtable = pte_offset(pgmiddle, addr);
+ if (!pte_present(*pgtable)) {
+ do_no_page(vma, addr, 1);
+ goto repeat;
+ }
+ page = pte_page(*pgtable);
+ if (!pte_write(*pgtable)) {
+ do_wp_page(vma, addr, 1);
+ goto repeat;
+ }
+/* this is a hack for non-kernel-mapped video buffers and similar */
+ if (page < high_memory) {
+ page += addr & ~PAGE_MASK;
+ *(unsigned long *) page = data;
+ }
+/* we're bypassing pagetables, so we have to set the dirty bit ourselves */
+/* this should also re-instate whatever read-only mode there was before */
+ *pgtable = pte_mkdirty(mk_pte(page, vma->vm_page_prot));
+ invalidate();
+}
+
+static struct vm_area_struct * find_extend_vma(struct task_struct * tsk, unsigned long addr)
+{
+ struct vm_area_struct * vma;
+
+ addr &= PAGE_MASK;
+ vma = find_vma(tsk,addr);
+ if (!vma)
+ return NULL;
+ if (vma->vm_start <= addr)
+ return vma;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return NULL;
+ if (vma->vm_end - addr > tsk->rlim[RLIMIT_STACK].rlim_cur)
+ return NULL;
+ vma->vm_offset -= vma->vm_start - addr;
+ vma->vm_start = addr;
+ return vma;
+}
+
+/*
+ * This routine checks the page boundaries, and that the offset is
+ * within the task area. It then calls get_long() to read a long.
+ */
+static int read_long(struct task_struct * tsk, unsigned long addr,
+ unsigned long * result)
+{
+ struct vm_area_struct * vma = find_extend_vma(tsk, addr);
+
+ if (!vma)
+ return -EIO;
+ if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) {
+ unsigned long low,high;
+ struct vm_area_struct * vma_high = vma;
+
+ if (addr + sizeof(long) >= vma->vm_end) {
+ vma_high = vma->vm_next;
+ if (!vma_high || vma_high->vm_start != vma->vm_end)
+ return -EIO;
+ }
+ low = get_long(vma, addr & ~(sizeof(long)-1));
+ high = get_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+ switch (addr & (sizeof(long)-1)) {
+ case 1:
+ low >>= 8;
+ low |= high << 24;
+ break;
+ case 2:
+ low >>= 16;
+ low |= high << 16;
+ break;
+ case 3:
+ low >>= 24;
+ low |= high << 8;
+ break;
+ }
+ *result = low;
+ } else
+ *result = get_long(vma, addr);
+ return 0;
+}
+
+/*
+ * This routine checks the page boundaries, and that the offset is
+ * within the task area. It then calls put_long() to write a long.
+ */
+static int write_long(struct task_struct * tsk, unsigned long addr,
+ unsigned long data)
+{
+ struct vm_area_struct * vma = find_extend_vma(tsk, addr);
+
+ if (!vma)
+ return -EIO;
+ if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) {
+ unsigned long low,high;
+ struct vm_area_struct * vma_high = vma;
+
+ if (addr + sizeof(long) >= vma->vm_end) {
+ vma_high = vma->vm_next;
+ if (!vma_high || vma_high->vm_start != vma->vm_end)
+ return -EIO;
+ }
+ low = get_long(vma, addr & ~(sizeof(long)-1));
+ high = get_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+ switch (addr & (sizeof(long)-1)) {
+ case 0: /* shouldn't happen, but safety first */
+ low = data;
+ break;
+ case 1:
+ low &= 0x000000ff;
+ low |= data << 8;
+ high &= ~0xff;
+ high |= data >> 24;
+ break;
+ case 2:
+ low &= 0x0000ffff;
+ low |= data << 16;
+ high &= ~0xffff;
+ high |= data >> 16;
+ break;
+ case 3:
+ low &= 0x00ffffff;
+ low |= data << 24;
+ high &= ~0xffffff;
+ high |= data >> 8;
+ break;
+ }
+ put_long(vma, addr & ~(sizeof(long)-1),low);
+ put_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high);
+ } else
+ put_long(vma, addr, data);
+ return 0;
+}
+
+asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+{
+ struct task_struct *child;
+ struct user * dummy;
+ int i;
+
+ dummy = NULL;
+
+ if (request == PTRACE_TRACEME) {
+ /* are we already being traced? */
+ if (current->flags & PF_PTRACED)
+ return -EPERM;
+ /* set the ptrace bit in the process flags. */
+ current->flags |= PF_PTRACED;
+ return 0;
+ }
+ if (pid == 1) /* you may not mess with init */
+ return -EPERM;
+ if (!(child = get_task(pid)))
+ return -ESRCH;
+ if (request == PTRACE_ATTACH) {
+ if (child == current)
+ return -EPERM;
+ if ((!child->dumpable ||
+ (current->uid != child->euid) ||
+ (current->uid != child->uid) ||
+ (current->gid != child->egid) ||
+ (current->gid != child->gid)) && !suser())
+ return -EPERM;
+ /* the same process cannot be attached many times */
+ if (child->flags & PF_PTRACED)
+ return -EPERM;
+ child->flags |= PF_PTRACED;
+ if (child->p_pptr != current) {
+ REMOVE_LINKS(child);
+ child->p_pptr = current;
+ SET_LINKS(child);
+ }
+ send_sig(SIGSTOP, child, 1);
+ return 0;
+ }
+ if (!(child->flags & PF_PTRACED))
+ return -ESRCH;
+ if (child->state != TASK_STOPPED) {
+ if (request != PTRACE_KILL)
+ return -ESRCH;
+ }
+ if (child->p_pptr != current)
+ return -ESRCH;
+
+ switch (request) {
+ /* when I and D space are separate, these will need to be fixed. */
+ case PTRACE_PEEKTEXT: /* read word at location addr. */
+ case PTRACE_PEEKDATA: {
+ unsigned long tmp;
+ int res;
+
+ res = read_long(child, addr, &tmp);
+ if (res < 0)
+ return res;
+ res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long));
+ if (!res)
+ put_fs_long(tmp,(unsigned long *) data);
+ return res;
+ }
+
+ /* read the word at location addr in the USER area. */
+ case PTRACE_PEEKUSR: {
+ unsigned long tmp;
+ int res;
+
+ if ((addr & 3) || addr < 0 ||
+ addr > sizeof(struct user) - 3)
+ return -EIO;
+
+ res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long));
+ if (res)
+ return res;
+ tmp = 0; /* Default return condition */
+ if(addr < 17*sizeof(long)) {
+ addr = addr >> 2; /* temporary hack. */
+
+ tmp = get_stack_long(child, sizeof(long)*addr - MAGICNUMBER);
+ if (addr == DS || addr == ES ||
+ addr == FS || addr == GS ||
+ addr == CS || addr == SS)
+ tmp &= 0xffff;
+ };
+ if(addr >= (long) &dummy->u_debugreg[0] &&
+ addr <= (long) &dummy->u_debugreg[7]){
+ addr -= (long) &dummy->u_debugreg[0];
+ addr = addr >> 2;
+ tmp = child->debugreg[addr];
+ };
+ put_fs_long(tmp,(unsigned long *) data);
+ return 0;
+ }
+
+ /* when I and D space are separate, this will have to be fixed. */
+ case PTRACE_POKETEXT: /* write the word at location addr. */
+ case PTRACE_POKEDATA:
+ return write_long(child,addr,data);
+
+ case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
+ if ((addr & 3) || addr < 0 ||
+ addr > sizeof(struct user) - 3)
+ return -EIO;
+
+ addr = addr >> 2; /* temporary hack. */
+
+ if (addr == ORIG_EAX)
+ return -EIO;
+ if (addr == DS || addr == ES ||
+ addr == FS || addr == GS ||
+ addr == CS || addr == SS) {
+ data &= 0xffff;
+ if (data && (data & 3) != 3)
+ return -EIO;
+ }
+ if (addr == EFL) { /* flags. */
+ data &= FLAG_MASK;
+ data |= get_stack_long(child, EFL*sizeof(long)-MAGICNUMBER) & ~FLAG_MASK;
+ }
+ /* Do not allow the user to set the debug register for kernel
+ address space */
+ if(addr < 17){
+ if (put_stack_long(child, sizeof(long)*addr-MAGICNUMBER, data))
+ return -EIO;
+ return 0;
+ };
+
+ /* We need to be very careful here. We implicitly
+ want to modify a portion of the task_struct, and we
+ have to be selective about what portions we allow someone
+ to modify. */
+
+ addr = addr << 2; /* Convert back again */
+ if(addr >= (long) &dummy->u_debugreg[0] &&
+ addr <= (long) &dummy->u_debugreg[7]){
+
+ if(addr == (long) &dummy->u_debugreg[4]) return -EIO;
+ if(addr == (long) &dummy->u_debugreg[5]) return -EIO;
+ if(addr < (long) &dummy->u_debugreg[4] &&
+ ((unsigned long) data) >= 0xbffffffd) return -EIO;
+
+ if(addr == (long) &dummy->u_debugreg[7]) {
+ data &= ~DR_CONTROL_RESERVED;
+ for(i=0; i<4; i++)
+ if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+ return -EIO;
+ };
+
+ addr -= (long) &dummy->u_debugreg;
+ addr = addr >> 2;
+ child->debugreg[addr] = data;
+ return 0;
+ };
+ return -EIO;
+
+ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+ case PTRACE_CONT: { /* restart after signal. */
+ long tmp;
+
+ if ((unsigned long) data > NSIG)
+ return -EIO;
+ if (request == PTRACE_SYSCALL)
+ child->flags |= PF_TRACESYS;
+ else
+ child->flags &= ~PF_TRACESYS;
+ child->exit_code = data;
+ child->state = TASK_RUNNING;
+ /* make sure the single step bit is not set. */
+ tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
+ put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+ return 0;
+ }
+
+/*
+ * make the child exit. Best I can do is send it a sigkill.
+ * perhaps it should be put in the status that it wants to
+ * exit.
+ */
+ case PTRACE_KILL: {
+ long tmp;
+
+ child->state = TASK_RUNNING;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+ tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
+ put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+ return 0;
+ }
+
+ case PTRACE_SINGLESTEP: { /* set the trap flag. */
+ long tmp;
+
+ if ((unsigned long) data > NSIG)
+ return -EIO;
+ child->flags &= ~PF_TRACESYS;
+ tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) | TRAP_FLAG;
+ put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+ child->state = TASK_RUNNING;
+ child->exit_code = data;
+ /* give it a chance to run. */
+ return 0;
+ }
+
+ case PTRACE_DETACH: { /* detach a process that was attached. */
+ long tmp;
+
+ if ((unsigned long) data > NSIG)
+ return -EIO;
+ child->flags &= ~(PF_PTRACED|PF_TRACESYS);
+ child->state = TASK_RUNNING;
+ child->exit_code = data;
+ REMOVE_LINKS(child);
+ child->p_pptr = child->p_opptr;
+ SET_LINKS(child);
+ /* make sure the single step bit is not set. */
+ tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
+ put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+ return 0;
+ }
+
+ default:
+ return -EIO;
+ }
+}
+
+asmlinkage void syscall_trace(void)
+{
+ if ((current->flags & (PF_PTRACED|PF_TRACESYS))
+ != (PF_PTRACED|PF_TRACESYS))
+ return;
+ current->exit_code = SIGTRAP;
+ current->state = TASK_STOPPED;
+ notify_parent(current);
+ schedule();
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code)
+ current->signal |= (1 << (current->exit_code - 1));
+ current->exit_code = 0;
+}
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
new file mode 100644
index 000000000..5cc4c5d7d
--- /dev/null
+++ b/arch/i386/kernel/setup.c
@@ -0,0 +1,181 @@
+/*
+ * linux/arch/i386/kernel/setup.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/malloc.h>
+#include <linux/ldt.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+/*
+ * Tell us the machine setup..
+ */
+char hard_math = 0; /* set by boot/head.S */
+char x86 = 0; /* set by boot/head.S to 3 or 4 */
+char x86_model = 0; /* set by boot/head.S */
+char x86_mask = 0; /* set by boot/head.S */
+int x86_capability = 0; /* set by boot/head.S */
+int fdiv_bug = 0; /* set if Pentium(TM) with FP bug */
+
+char x86_vendor_id[13] = "Unknown";
+
+char ignore_irq13 = 0; /* set if exception 16 works */
+char wp_works_ok = 0; /* set if paging hardware honours WP */
+char hlt_works_ok = 1; /* set if the "hlt" instruction works */
+
+/*
+ * Bus types ..
+ */
+int EISA_bus = 0;
+
+/*
+ * Setup options
+ */
+struct drive_info_struct { char dummy[32]; } drive_info;
+struct screen_info screen_info;
+
+unsigned char aux_device_present;
+extern int ramdisk_size;
+extern int root_mountflags;
+extern int etext, edata, end;
+
+extern char empty_zero_page[PAGE_SIZE];
+
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+#define PARAM empty_zero_page
+#define EXT_MEM_K (*(unsigned short *) (PARAM+2))
+#define DRIVE_INFO (*(struct drive_info_struct *) (PARAM+0x80))
+#define SCREEN_INFO (*(struct screen_info *) (PARAM+0))
+#define MOUNT_ROOT_RDONLY (*(unsigned short *) (PARAM+0x1F2))
+#define RAMDISK_SIZE (*(unsigned short *) (PARAM+0x1F8))
+#define ORIG_ROOT_DEV (*(unsigned short *) (PARAM+0x1FC))
+#define AUX_DEVICE_INFO (*(unsigned char *) (PARAM+0x1FF))
+#define COMMAND_LINE ((char *) (PARAM+2048))
+#define COMMAND_LINE_SIZE 256
+
+static char command_line[COMMAND_LINE_SIZE] = { 0, };
+
+void setup_arch(char **cmdline_p,
+ unsigned long * memory_start_p, unsigned long * memory_end_p)
+{
+ unsigned long memory_start, memory_end;
+ char c = ' ', *to = command_line, *from = COMMAND_LINE;
+ int len = 0;
+
+ ROOT_DEV = ORIG_ROOT_DEV;
+ drive_info = DRIVE_INFO;
+ screen_info = SCREEN_INFO;
+ aux_device_present = AUX_DEVICE_INFO;
+ memory_end = (1<<20) + (EXT_MEM_K<<10);
+ memory_end &= PAGE_MASK;
+ ramdisk_size = RAMDISK_SIZE;
+#ifdef CONFIG_MAX_16M
+ if (memory_end > 16*1024*1024)
+ memory_end = 16*1024*1024;
+#endif
+ if (MOUNT_ROOT_RDONLY)
+ root_mountflags |= MS_RDONLY;
+ memory_start = (unsigned long) &end;
+ init_task.mm->start_code = TASK_SIZE;
+ init_task.mm->end_code = TASK_SIZE + (unsigned long) &etext;
+ init_task.mm->end_data = TASK_SIZE + (unsigned long) &edata;
+ init_task.mm->brk = TASK_SIZE + (unsigned long) &end;
+
+ for (;;) {
+ if (c == ' ' && *(unsigned long *)from == *(unsigned long *)"mem=") {
+ memory_end = simple_strtoul(from+4, &from, 0);
+ if ( *from == 'K' || *from == 'k' ) {
+ memory_end = memory_end << 10;
+ from++;
+ } else if ( *from == 'M' || *from == 'm' ) {
+ memory_end = memory_end << 20;
+ from++;
+ }
+ }
+ c = *(from++);
+ if (!c)
+ break;
+ if (COMMAND_LINE_SIZE <= ++len)
+ break;
+ *(to++) = c;
+ }
+ *to = '\0';
+ *cmdline_p = command_line;
+ *memory_start_p = memory_start;
+ *memory_end_p = memory_end;
+ /* request io space for devices used on all i[345]86 PC'S */
+ request_region(0x00,0x20,"dma1");
+ request_region(0x40,0x20,"timer");
+ request_region(0x70,0x10,"rtc");
+ request_region(0x80,0x20,"dma page reg");
+ request_region(0xc0,0x20,"dma2");
+ request_region(0xf0,0x2,"npu");
+ request_region(0xf8,0x8,"npu");
+}
+
+int get_cpuinfo(char * buffer)
+{
+ char *model[2][9]={{"DX","SX","DX/2","4","SX/2","6",
+ "DX/2-WB","DX/4"},
+ {"Pentium 60/66","Pentium 90/100","3",
+ "4","5","6","7","8"}};
+ char mask[2];
+ mask[0] = x86_mask+'@';
+ mask[1] = '\0';
+ return sprintf(buffer,"cpu\t\t: %c86\n"
+ "model\t\t: %s\n"
+ "mask\t\t: %s\n"
+ "vid\t\t: %s\n"
+ "fdiv_bug\t: %s\n"
+ "math\t\t: %s\n"
+ "hlt\t\t: %s\n"
+ "wp\t\t: %s\n"
+ "Integrated NPU\t: %s\n"
+ "Enhanced VM86\t: %s\n"
+ "IO Breakpoints\t: %s\n"
+ "4MB Pages\t: %s\n"
+ "TS Counters\t: %s\n"
+ "Pentium MSR\t: %s\n"
+ "Mach. Ch. Exep.\t: %s\n"
+ "CMPXCHGB8B\t: %s\n"
+ "BogoMips\t: %lu.%02lu\n",
+ x86+'0',
+ x86_model ? model[x86-4][x86_model-1] : "Unknown",
+ x86_mask ? mask : "Unknown",
+ x86_vendor_id,
+ fdiv_bug ? "yes" : "no",
+ hard_math ? "yes" : "no",
+ hlt_works_ok ? "yes" : "no",
+ wp_works_ok ? "yes" : "no",
+ x86_capability & 1 ? "yes" : "no",
+ x86_capability & 2 ? "yes" : "no",
+ x86_capability & 4 ? "yes" : "no",
+ x86_capability & 8 ? "yes" : "no",
+ x86_capability & 16 ? "yes" : "no",
+ x86_capability & 32 ? "yes" : "no",
+ x86_capability & 128 ? "yes" : "no",
+ x86_capability & 256 ? "yes" : "no",
+ loops_per_sec/500000, (loops_per_sec/5000) % 100
+ );
+}
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
new file mode 100644
index 000000000..3db1a6985
--- /dev/null
+++ b/arch/i386/kernel/signal.c
@@ -0,0 +1,260 @@
+/*
+ * linux/arch/i386/kernel/signal.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+
+#include <asm/segment.h>
+
+#define _S(nr) (1<<((nr)-1))
+
+#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
+
+asmlinkage int sys_waitpid(pid_t pid,unsigned long * stat_addr, int options);
+
+/*
+ * atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, unsigned long set)
+{
+ unsigned long mask;
+ struct pt_regs * regs = (struct pt_regs *) &restart;
+
+ mask = current->blocked;
+ current->blocked = set & _BLOCKABLE;
+ regs->eax = -EINTR;
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ if (do_signal(mask,regs))
+ return -EINTR;
+ }
+}
+
+/*
+ * This sets regs->esp even though we don't actually use sigstacks yet..
+ */
+asmlinkage int sys_sigreturn(unsigned long __unused)
+{
+#define COPY(x) regs->x = context.x
+#define COPY_SEG(x) \
+if ((context.x & 0xfffc) && (context.x & 3) != 3) goto badframe; COPY(x);
+#define COPY_SEG_STRICT(x) \
+if (!(context.x & 0xfffc) || (context.x & 3) != 3) goto badframe; COPY(x);
+ struct sigcontext_struct context;
+ struct pt_regs * regs;
+
+ regs = (struct pt_regs *) &__unused;
+ if (verify_area(VERIFY_READ, (void *) regs->esp, sizeof(context)))
+ goto badframe;
+ memcpy_fromfs(&context,(void *) regs->esp, sizeof(context));
+ current->blocked = context.oldmask & _BLOCKABLE;
+ COPY_SEG(ds);
+ COPY_SEG(es);
+ COPY_SEG(fs);
+ COPY_SEG(gs);
+ COPY_SEG_STRICT(ss);
+ COPY_SEG_STRICT(cs);
+ COPY(eip);
+ COPY(ecx); COPY(edx);
+ COPY(ebx);
+ COPY(esp); COPY(ebp);
+ COPY(edi); COPY(esi);
+ regs->eflags &= ~0x40DD5;
+ regs->eflags |= context.eflags & 0x40DD5;
+ regs->orig_eax = -1; /* disable syscall checks */
+ return context.eax;
+badframe:
+ do_exit(SIGSEGV);
+}
+
+/*
+ * Set up a signal frame... Make the stack look the way iBCS2 expects
+ * it to look.
+ */
+void setup_frame(struct sigaction * sa, unsigned long ** fp, unsigned long eip,
+ struct pt_regs * regs, int signr, unsigned long oldmask)
+{
+ unsigned long * frame;
+
+#define __CODE ((unsigned long)(frame+24))
+#define CODE(x) ((unsigned long *) ((x)+__CODE))
+ frame = *fp;
+ if (regs->ss != USER_DS)
+ frame = (unsigned long *) sa->sa_restorer;
+ frame -= 32;
+ if (verify_area(VERIFY_WRITE,frame,32*4))
+ do_exit(SIGSEGV);
+/* set up the "normal" stack seen by the signal handler (iBCS2) */
+ put_fs_long(__CODE,frame);
+ if (current->exec_domain && current->exec_domain->signal_invmap)
+ put_fs_long(current->exec_domain->signal_invmap[signr], frame+1);
+ else
+ put_fs_long(signr, frame+1);
+ put_fs_long(regs->gs, frame+2);
+ put_fs_long(regs->fs, frame+3);
+ put_fs_long(regs->es, frame+4);
+ put_fs_long(regs->ds, frame+5);
+ put_fs_long(regs->edi, frame+6);
+ put_fs_long(regs->esi, frame+7);
+ put_fs_long(regs->ebp, frame+8);
+ put_fs_long((long)*fp, frame+9);
+ put_fs_long(regs->ebx, frame+10);
+ put_fs_long(regs->edx, frame+11);
+ put_fs_long(regs->ecx, frame+12);
+ put_fs_long(regs->eax, frame+13);
+ put_fs_long(current->tss.trap_no, frame+14);
+ put_fs_long(current->tss.error_code, frame+15);
+ put_fs_long(eip, frame+16);
+ put_fs_long(regs->cs, frame+17);
+ put_fs_long(regs->eflags, frame+18);
+ put_fs_long(regs->esp, frame+19);
+ put_fs_long(regs->ss, frame+20);
+ put_fs_long(0,frame+21); /* 387 state pointer - not implemented*/
+/* non-iBCS2 extensions.. */
+ put_fs_long(oldmask, frame+22);
+ put_fs_long(current->tss.cr2, frame+23);
+/* set up the return code... */
+ put_fs_long(0x0000b858, CODE(0)); /* popl %eax ; movl $,%eax */
+ put_fs_long(0x80cd0000, CODE(4)); /* int $0x80 */
+ put_fs_long(__NR_sigreturn, CODE(2));
+ *fp = frame;
+#undef __CODE
+#undef CODE
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs)
+{
+ unsigned long mask = ~current->blocked;
+ unsigned long handler_signal = 0;
+ unsigned long *frame = NULL;
+ unsigned long eip = 0;
+ unsigned long signr;
+ struct sigaction * sa;
+
+ while ((signr = current->signal & mask)) {
+ __asm__("bsf %3,%1\n\t"
+ "btrl %1,%0"
+ :"=m" (current->signal),"=r" (signr)
+ :"0" (current->signal), "1" (signr));
+ sa = current->sigaction + signr;
+ signr++;
+ if ((current->flags & PF_PTRACED) && signr != SIGKILL) {
+ current->exit_code = signr;
+ current->state = TASK_STOPPED;
+ notify_parent(current);
+ schedule();
+ if (!(signr = current->exit_code))
+ continue;
+ current->exit_code = 0;
+ if (signr == SIGSTOP)
+ continue;
+ if (_S(signr) & current->blocked) {
+ current->signal |= _S(signr);
+ continue;
+ }
+ sa = current->sigaction + signr - 1;
+ }
+ if (sa->sa_handler == SIG_IGN) {
+ if (signr != SIGCHLD)
+ continue;
+ /* check for SIGCHLD: it's special */
+ while (sys_waitpid(-1,NULL,WNOHANG) > 0)
+ /* nothing */;
+ continue;
+ }
+ if (sa->sa_handler == SIG_DFL) {
+ if (current->pid == 1)
+ continue;
+ switch (signr) {
+ case SIGCONT: case SIGCHLD: case SIGWINCH:
+ continue;
+
+ case SIGSTOP: case SIGTSTP: case SIGTTIN: case SIGTTOU:
+ if (current->flags & PF_PTRACED)
+ continue;
+ current->state = TASK_STOPPED;
+ current->exit_code = signr;
+ if (!(current->p_pptr->sigaction[SIGCHLD-1].sa_flags &
+ SA_NOCLDSTOP))
+ notify_parent(current);
+ schedule();
+ continue;
+
+ case SIGQUIT: case SIGILL: case SIGTRAP:
+ case SIGABRT: case SIGFPE: case SIGSEGV:
+ if (current->binfmt && current->binfmt->core_dump) {
+ if (current->binfmt->core_dump(signr, regs))
+ signr |= 0x80;
+ }
+ /* fall through */
+ default:
+ current->signal |= _S(signr & 0x7f);
+ do_exit(signr);
+ }
+ }
+ /*
+ * OK, we're invoking a handler
+ */
+ if (regs->orig_eax >= 0) {
+ if (regs->eax == -ERESTARTNOHAND ||
+ (regs->eax == -ERESTARTSYS && !(sa->sa_flags & SA_RESTART)))
+ regs->eax = -EINTR;
+ }
+ handler_signal |= 1 << (signr-1);
+ mask &= ~sa->sa_mask;
+ }
+ if (regs->orig_eax >= 0 &&
+ (regs->eax == -ERESTARTNOHAND ||
+ regs->eax == -ERESTARTSYS ||
+ regs->eax == -ERESTARTNOINTR)) {
+ regs->eax = regs->orig_eax;
+ regs->eip -= 2;
+ }
+ if (!handler_signal) /* no handler will be called - return 0 */
+ return 0;
+ eip = regs->eip;
+ frame = (unsigned long *) regs->esp;
+ signr = 1;
+ sa = current->sigaction;
+ for (mask = 1 ; mask ; sa++,signr++,mask += mask) {
+ if (mask > handler_signal)
+ break;
+ if (!(mask & handler_signal))
+ continue;
+ setup_frame(sa,&frame,eip,regs,signr,oldmask);
+ eip = (unsigned long) sa->sa_handler;
+ if (sa->sa_flags & SA_ONESHOT)
+ sa->sa_handler = NULL;
+/* force a supervisor-mode page-in of the signal handler to reduce races */
+ __asm__("testb $0,%%fs:%0": :"m" (*(char *) eip));
+ regs->cs = USER_CS; regs->ss = USER_DS;
+ regs->ds = USER_DS; regs->es = USER_DS;
+ regs->gs = USER_DS; regs->fs = USER_DS;
+ current->blocked |= sa->sa_mask;
+ oldmask |= sa->sa_mask;
+ }
+ regs->esp = (unsigned long) frame;
+ regs->eip = eip; /* "return" to the first handler */
+ regs->eflags &= ~TF_MASK;
+ current->tss.trap_no = current->tss.error_code = 0;
+ return 1;
+}
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
new file mode 100644
index 000000000..6dd7fc65b
--- /dev/null
+++ b/arch/i386/kernel/traps.c
@@ -0,0 +1,349 @@
+/*
+ * linux/arch/i386/traps.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'. Currently mostly a debugging-aid, will be extended
+ * to mainly kill the offending process (probably by giving it a signal,
+ * but possibly by killing it outright if necessary).
+ */
+#include <linux/head.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/config.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+
+asmlinkage int system_call(void);
+asmlinkage void lcall7(void);
+struct desc_struct default_ldt;
+
+static inline void console_verbose(void)
+{
+ extern int console_loglevel;
+ console_loglevel = 15;
+}
+
+#define DO_ERROR(trapnr, signr, str, name, tsk) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ tsk->tss.error_code = error_code; \
+ tsk->tss.trap_no = trapnr; \
+ if (signr == SIGTRAP && current->flags & PF_PTRACED) \
+ current->blocked &= ~(1 << (SIGTRAP-1)); \
+ send_sig(signr, tsk, 1); \
+ die_if_kernel(str,regs,error_code); \
+}
+
+#define get_seg_byte(seg,addr) ({ \
+register unsigned char __res; \
+__asm__("push %%fs;mov %%ax,%%fs;movb %%fs:%2,%%al;pop %%fs" \
+ :"=a" (__res):"0" (seg),"m" (*(addr))); \
+__res;})
+
+#define get_seg_long(seg,addr) ({ \
+register unsigned long __res; \
+__asm__("push %%fs;mov %%ax,%%fs;movl %%fs:%2,%%eax;pop %%fs" \
+ :"=a" (__res):"0" (seg),"m" (*(addr))); \
+__res;})
+
+#define _fs() ({ \
+register unsigned short __res; \
+__asm__("mov %%fs,%%ax":"=a" (__res):); \
+__res;})
+
+void page_exception(void);
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void reserved(void);
+asmlinkage void alignment_check(void);
+
+int kstack_depth_to_print = 24;
+
+/*
+ * These constants are for searching for possible module text
+ * segments. VMALLOC_OFFSET comes from mm/vmalloc.c; MODULE_RANGE is
+ * a guess of how much space is likely to be vmalloced.
+ */
+#define VMALLOC_OFFSET (8*1024*1024)
+#define MODULE_RANGE (8*1024*1024)
+
+/*static*/ void die_if_kernel(char * str, struct pt_regs * regs, long err)
+{
+ int i;
+ unsigned long esp;
+ unsigned short ss;
+ unsigned long *stack, addr, module_start, module_end;
+ extern char start_kernel, etext;
+
+ esp = (unsigned long) &regs->esp;
+ ss = KERNEL_DS;
+ if ((regs->eflags & VM_MASK) || (3 & regs->cs) == 3)
+ return;
+ if (regs->cs & 3) {
+ esp = regs->esp;
+ ss = regs->ss;
+ }
+ console_verbose();
+ printk("%s: %04lx\n", str, err & 0xffff);
+ printk("EIP: %04x:%08lx\nEFLAGS: %08lx\n", 0xffff & regs->cs,regs->eip,regs->eflags);
+ printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+ printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
+ regs->ds, regs->es, regs->fs, regs->gs, ss);
+ store_TR(i);
+ if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page)
+ printk("Corrupted stack page\n");
+ printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\nStack: ",
+ current->comm, current->pid, 0xffff & i, current->kernel_stack_page);
+ stack = (unsigned long *) esp;
+ for(i=0; i < kstack_depth_to_print; i++) {
+ if (((long) stack & 4095) == 0)
+ break;
+ if (i && ((i % 8) == 0))
+ printk("\n ");
+ printk("%08lx ", get_seg_long(ss,stack++));
+ }
+ printk("\nCall Trace: ");
+ stack = (unsigned long *) esp;
+ i = 1;
+ module_start = ((high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1));
+ module_end = module_start + MODULE_RANGE;
+ while (((long) stack & 4095) != 0) {
+ addr = get_seg_long(ss, stack++);
+ /*
+ * If the address is either in the text segment of the
+ * kernel, or in the region which contains vmalloc'ed
+ * memory, it *may* be the address of a calling
+ * routine; if so, print it so that someone tracing
+ * down the cause of the crash will be able to figure
+ * out the call path that was taken.
+ */
+ if (((addr >= (unsigned long) &start_kernel) &&
+ (addr <= (unsigned long) &etext)) ||
+ ((addr >= module_start) && (addr <= module_end))) {
+ if (i && ((i % 8) == 0))
+ printk("\n ");
+ printk("%08lx ", addr);
+ i++;
+ }
+ }
+ printk("\nCode: ");
+ for(i=0;i<20;i++)
+ printk("%02x ",0xff & get_seg_byte(regs->cs,(i+(char *)regs->eip)));
+ printk("\n");
+ do_exit(SIGSEGV);
+}
+
+DO_ERROR( 0, SIGFPE, "divide error", divide_error, current)
+DO_ERROR( 3, SIGTRAP, "int3", int3, current)
+DO_ERROR( 4, SIGSEGV, "overflow", overflow, current)
+DO_ERROR( 5, SIGSEGV, "bounds", bounds, current)
+DO_ERROR( 6, SIGILL, "invalid operand", invalid_op, current)
+DO_ERROR( 7, SIGSEGV, "device not available", device_not_available, current)
+DO_ERROR( 8, SIGSEGV, "double fault", double_fault, current)
+DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun, last_task_used_math)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS, current)
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present, current)
+DO_ERROR(12, SIGBUS, "stack segment", stack_segment, current)
+DO_ERROR(15, SIGSEGV, "reserved", reserved, current)
+DO_ERROR(17, SIGSEGV, "alignment check", alignment_check, current)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+ if (regs->eflags & VM_MASK) {
+ handle_vm86_fault((struct vm86_regs *) regs, error_code);
+ return;
+ }
+ die_if_kernel("general protection",regs,error_code);
+ current->tss.error_code = error_code;
+ current->tss.trap_no = 13;
+ send_sig(SIGSEGV, current, 1);
+}
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+#ifndef CONFIG_IGNORE_NMI
+ printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+ printk("You probably have a hardware problem with your RAM chips or a\n");
+ printk("power saving mode enabled.\n");
+#endif
+}
+
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+ if (regs->eflags & VM_MASK) {
+ handle_vm86_debug((struct vm86_regs *) regs, error_code);
+ return;
+ }
+ if (current->flags & PF_PTRACED)
+ current->blocked &= ~(1 << (SIGTRAP-1));
+ send_sig(SIGTRAP, current, 1);
+ current->tss.trap_no = 1;
+ current->tss.error_code = error_code;
+ if ((regs->cs & 3) == 0) {
+ /* If this is a kernel mode trap, then reset db7 and allow us to continue */
+ __asm__("movl %0,%%db7"
+ : /* no output */
+ : "r" (0));
+ return;
+ }
+ die_if_kernel("debug",regs,error_code);
+}
+
+/*
+ * Allow the process which triggered the interrupt to recover the error
+ * condition.
+ * - the status word is saved in the cs selector.
+ * - the tag word is saved in the operand selector.
+ * - the status word is then cleared and the tags all set to Empty.
+ *
+ * This will give sufficient information for complete recovery provided that
+ * the affected process knows or can deduce the code and data segments
+ * which were in force when the exception condition arose.
+ *
+ * Note that we play around with the 'TS' bit to hopefully get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+void math_error(void)
+{
+ struct i387_hard_struct * env;
+
+ clts();
+ if (!last_task_used_math) {
+ __asm__("fnclex");
+ return;
+ }
+ env = &last_task_used_math->tss.i387.hard;
+ send_sig(SIGFPE, last_task_used_math, 1);
+ last_task_used_math->tss.trap_no = 16;
+ last_task_used_math->tss.error_code = 0;
+ __asm__ __volatile__("fnsave %0":"=m" (*env));
+ last_task_used_math = NULL;
+ stts();
+ env->fcs = (env->swd & 0x0000ffff) | (env->fcs & 0xffff0000);
+ env->fos = env->twd;
+ env->swd &= 0xffff3800;
+ env->twd = 0xffffffff;
+}
+
+asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+ ignore_irq13 = 1;
+ math_error();
+}
+
+/*
+ * 'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ */
+asmlinkage void math_state_restore(void)
+{
+ __asm__ __volatile__("clts");
+ if (last_task_used_math == current)
+ return;
+ timer_table[COPRO_TIMER].expires = jiffies+50;
+ timer_active |= 1<<COPRO_TIMER;
+ if (last_task_used_math)
+ __asm__("fnsave %0":"=m" (last_task_used_math->tss.i387));
+ else
+ __asm__("fnclex");
+ last_task_used_math = current;
+ if (current->used_math) {
+ __asm__("frstor %0": :"m" (current->tss.i387));
+ } else {
+ __asm__("fninit");
+ current->used_math=1;
+ }
+ timer_active &= ~(1<<COPRO_TIMER);
+}
+
+#ifndef CONFIG_MATH_EMULATION
+
+asmlinkage void math_emulate(long arg)
+{
+ printk("math-emulation not enabled and no coprocessor found.\n");
+ printk("killing %s.\n",current->comm);
+ send_sig(SIGFPE,current,1);
+ schedule();
+}
+
+#endif /* CONFIG_MATH_EMULATION */
+
+void trap_init(void)
+{
+ int i;
+ struct desc_struct * p;
+
+ if (strncmp((char*)0x0FFFD9, "EISA", 4) == 0)
+ EISA_bus = 1;
+ set_call_gate(&default_ldt,lcall7);
+ set_trap_gate(0,&divide_error);
+ set_trap_gate(1,&debug);
+ set_trap_gate(2,&nmi);
+ set_system_gate(3,&int3); /* int3-5 can be called from all */
+ set_system_gate(4,&overflow);
+ set_system_gate(5,&bounds);
+ set_trap_gate(6,&invalid_op);
+ set_trap_gate(7,&device_not_available);
+ set_trap_gate(8,&double_fault);
+ set_trap_gate(9,&coprocessor_segment_overrun);
+ set_trap_gate(10,&invalid_TSS);
+ set_trap_gate(11,&segment_not_present);
+ set_trap_gate(12,&stack_segment);
+ set_trap_gate(13,&general_protection);
+ set_trap_gate(14,&page_fault);
+ set_trap_gate(15,&reserved);
+ set_trap_gate(16,&coprocessor_error);
+ set_trap_gate(17,&alignment_check);
+ for (i=18;i<48;i++)
+ set_trap_gate(i,&reserved);
+ set_system_gate(0x80,&system_call);
+/* set up GDT task & ldt entries */
+ p = gdt+FIRST_TSS_ENTRY;
+ set_tss_desc(p, &init_task.tss);
+ p++;
+ set_ldt_desc(p, &default_ldt, 1);
+ p++;
+ for(i=1 ; i<NR_TASKS ; i++) {
+ p->a=p->b=0;
+ p++;
+ p->a=p->b=0;
+ p++;
+ }
+/* Clear NT, so that we won't have troubles with that later on */
+ __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
+ load_TR(0);
+ load_ldt(0);
+}
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
new file mode 100644
index 000000000..d55f8248f
--- /dev/null
+++ b/arch/i386/kernel/vm86.c
@@ -0,0 +1,420 @@
+/*
+ * linux/kernel/vm86.c
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/string.h>
+#include <linux/ptrace.h>
+#include <linux/mm.h>
+
+#include <asm/segment.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+
+/*
+ * Known problems:
+ *
+ * Interrupt handling is not guaranteed:
+ * - a real x86 will disable all interrupts for one instruction
+ * after a "mov ss,xx" to make stack handling atomic even without
+ * the 'lss' instruction. We can't guarantee this in v86 mode,
+ * as the next instruction might result in a page fault or similar.
+ * - a real x86 will have interrupts disabled for one instruction
+ * past the 'sti' that enables them. We don't bother with all the
+ * details yet..
+ *
+ * Hopefully these problems do not actually matter for anything.
+ */
+
+/*
+ * 8- and 16-bit register defines..
+ */
+#define AL(regs) (((unsigned char *)&((regs)->eax))[0])
+#define AH(regs) (((unsigned char *)&((regs)->eax))[1])
+#define IP(regs) (*(unsigned short *)&((regs)->eip))
+#define SP(regs) (*(unsigned short *)&((regs)->esp))
+
+/*
+ * virtual flags (16 and 32-bit versions)
+ */
+#define VFLAGS (*(unsigned short *)&(current->tss.v86flags))
+#define VEFLAGS (current->tss.v86flags)
+
+#define set_flags(X,new,mask) \
+((X) = ((X) & ~(mask)) | ((new) & (mask)))
+
+#define SAFE_MASK (0xDD5)
+#define RETURN_MASK (0xDFF)
+
+asmlinkage struct pt_regs * save_v86_state(struct vm86_regs * regs)
+{
+ unsigned long tmp;
+
+ if (!current->tss.vm86_info) {
+ printk("no vm86_info: BAD\n");
+ do_exit(SIGSEGV);
+ }
+ set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->tss.v86mask);
+ memcpy_tofs(&current->tss.vm86_info->regs,regs,sizeof(*regs));
+ put_fs_long(current->tss.screen_bitmap,&current->tss.vm86_info->screen_bitmap);
+ tmp = current->tss.esp0;
+ current->tss.esp0 = current->saved_kernel_stack;
+ current->saved_kernel_stack = 0;
+ return (struct pt_regs *) tmp;
+}
+
+static void mark_screen_rdonly(struct task_struct * tsk)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ int i;
+
+ pgd = pgd_offset(tsk, 0xA0000);
+ if (pgd_none(*pgd))
+ return;
+ if (pgd_bad(*pgd)) {
+ printk("vm86: bad pgd entry [%p]:%08lx\n", pgd, pgd_val(*pgd));
+ pgd_clear(pgd);
+ return;
+ }
+ pmd = pmd_offset(pgd, 0xA0000);
+ if (pmd_none(*pmd))
+ return;
+ if (pmd_bad(*pmd)) {
+ printk("vm86: bad pmd entry [%p]:%08lx\n", pmd, pmd_val(*pmd));
+ pmd_clear(pmd);
+ return;
+ }
+ pte = pte_offset(pmd, 0xA0000);
+ for (i = 0; i < 32; i++) {
+ if (pte_present(*pte))
+ *pte = pte_wrprotect(*pte);
+ pte++;
+ }
+ invalidate();
+}
+
+asmlinkage int sys_vm86(struct vm86_struct * v86)
+{
+ struct vm86_struct info;
+ struct pt_regs * pt_regs = (struct pt_regs *) &v86;
+ int error;
+
+ if (current->saved_kernel_stack)
+ return -EPERM;
+ /* v86 must be readable (now) and writable (for save_v86_state) */
+ error = verify_area(VERIFY_WRITE,v86,sizeof(*v86));
+ if (error)
+ return error;
+ memcpy_fromfs(&info,v86,sizeof(info));
+/*
+ * make sure the vm86() system call doesn't try to do anything silly
+ */
+ info.regs.__null_ds = 0;
+ info.regs.__null_es = 0;
+ info.regs.__null_fs = 0;
+ info.regs.__null_gs = 0;
+/*
+ * The eflags register is also special: we cannot trust that the user
+ * has set it up safely, so this makes sure interrupt etc flags are
+ * inherited from protected mode.
+ */
+ VEFLAGS = info.regs.eflags;
+ info.regs.eflags &= SAFE_MASK;
+ info.regs.eflags |= pt_regs->eflags & ~SAFE_MASK;
+ info.regs.eflags |= VM_MASK;
+
+ switch (info.cpu_type) {
+ case CPU_286:
+ current->tss.v86mask = 0;
+ break;
+ case CPU_386:
+ current->tss.v86mask = NT_MASK | IOPL_MASK;
+ break;
+ case CPU_486:
+ current->tss.v86mask = AC_MASK | NT_MASK | IOPL_MASK;
+ break;
+ default:
+ current->tss.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
+ break;
+ }
+
+/*
+ * Save old state, set default return value (%eax) to 0
+ */
+ pt_regs->eax = 0;
+ current->saved_kernel_stack = current->tss.esp0;
+ current->tss.esp0 = (unsigned long) pt_regs;
+ current->tss.vm86_info = v86;
+
+ current->tss.screen_bitmap = info.screen_bitmap;
+ if (info.flags & VM86_SCREEN_BITMAP)
+ mark_screen_rdonly(current);
+ __asm__ __volatile__("movl %0,%%esp\n\t"
+ "jmp ret_from_sys_call"
+ : /* no outputs */
+ :"r" (&info.regs));
+ return 0;
+}
+
+static inline void return_to_32bit(struct vm86_regs * regs16, int retval)
+{
+ struct pt_regs * regs32;
+
+ regs32 = save_v86_state(regs16);
+ regs32->eax = retval;
+ __asm__ __volatile__("movl %0,%%esp\n\t"
+ "jmp ret_from_sys_call"
+ : : "r" (regs32));
+}
+
+static inline void set_IF(struct vm86_regs * regs)
+{
+ VEFLAGS |= VIF_MASK;
+ if (VEFLAGS & VIP_MASK)
+ return_to_32bit(regs, VM86_STI);
+}
+
+static inline void clear_IF(struct vm86_regs * regs)
+{
+ VEFLAGS &= ~VIF_MASK;
+}
+
+static inline void clear_TF(struct vm86_regs * regs)
+{
+ regs->eflags &= ~TF_MASK;
+}
+
+static inline void set_vflags_long(unsigned long eflags, struct vm86_regs * regs)
+{
+ set_flags(VEFLAGS, eflags, current->tss.v86mask);
+ set_flags(regs->eflags, eflags, SAFE_MASK);
+ if (eflags & IF_MASK)
+ set_IF(regs);
+}
+
+static inline void set_vflags_short(unsigned short flags, struct vm86_regs * regs)
+{
+ set_flags(VFLAGS, flags, current->tss.v86mask);
+ set_flags(regs->eflags, flags, SAFE_MASK);
+ if (flags & IF_MASK)
+ set_IF(regs);
+}
+
+static inline unsigned long get_vflags(struct vm86_regs * regs)
+{
+ unsigned long flags = regs->eflags & RETURN_MASK;
+
+ if (VEFLAGS & VIF_MASK)
+ flags |= IF_MASK;
+ return flags | (VEFLAGS & current->tss.v86mask);
+}
+
+static inline int is_revectored(int nr, struct revectored_struct * bitmap)
+{
+ __asm__ __volatile__("btl %2,%%fs:%1\n\tsbbl %0,%0"
+ :"=r" (nr)
+ :"m" (*bitmap),"r" (nr));
+ return nr;
+}
+
+/*
+ * Boy are these ugly, but we need to do the correct 16-bit arithmetic.
+ * Gcc makes a mess of it, so we do it inline and use non-obvious calling
+ * conventions..
+ */
+#define pushb(base, ptr, val) \
+__asm__ __volatile__( \
+ "decw %w0\n\t" \
+ "movb %2,%%fs:0(%1,%0)" \
+ : "=r" (ptr) \
+ : "r" (base), "q" (val), "0" (ptr))
+
+#define pushw(base, ptr, val) \
+__asm__ __volatile__( \
+ "decw %w0\n\t" \
+ "movb %h2,%%fs:0(%1,%0)\n\t" \
+ "decw %w0\n\t" \
+ "movb %b2,%%fs:0(%1,%0)" \
+ : "=r" (ptr) \
+ : "r" (base), "q" (val), "0" (ptr))
+
+#define pushl(base, ptr, val) \
+__asm__ __volatile__( \
+ "decw %w0\n\t" \
+ "rorl $16,%2\n\t" \
+ "movb %h2,%%fs:0(%1,%0)\n\t" \
+ "decw %w0\n\t" \
+ "movb %b2,%%fs:0(%1,%0)\n\t" \
+ "decw %w0\n\t" \
+ "rorl $16,%2\n\t" \
+ "movb %h2,%%fs:0(%1,%0)\n\t" \
+ "decw %w0\n\t" \
+ "movb %b2,%%fs:0(%1,%0)" \
+ : "=r" (ptr) \
+ : "r" (base), "q" (val), "0" (ptr))
+
+#define popb(base, ptr) \
+({ unsigned long __res; \
+__asm__ __volatile__( \
+ "movb %%fs:0(%1,%0),%b2\n\t" \
+ "incw %w0" \
+ : "=r" (ptr), "=r" (base), "=q" (__res) \
+ : "0" (ptr), "1" (base), "2" (0)); \
+__res; })
+
+#define popw(base, ptr) \
+({ unsigned long __res; \
+__asm__ __volatile__( \
+ "movb %%fs:0(%1,%0),%b2\n\t" \
+ "incw %w0\n\t" \
+ "movb %%fs:0(%1,%0),%h2\n\t" \
+ "incw %w0" \
+ : "=r" (ptr), "=r" (base), "=q" (__res) \
+ : "0" (ptr), "1" (base), "2" (0)); \
+__res; })
+
+#define popl(base, ptr) \
+({ unsigned long __res; \
+__asm__ __volatile__( \
+ "movb %%fs:0(%1,%0),%b2\n\t" \
+ "incw %w0\n\t" \
+ "movb %%fs:0(%1,%0),%h2\n\t" \
+ "incw %w0\n\t" \
+ "rorl $16,%2\n\t" \
+ "movb %%fs:0(%1,%0),%b2\n\t" \
+ "incw %w0\n\t" \
+ "movb %%fs:0(%1,%0),%h2\n\t" \
+ "incw %w0\n\t" \
+ "rorl $16,%2" \
+ : "=r" (ptr), "=r" (base), "=q" (__res) \
+ : "0" (ptr), "1" (base)); \
+__res; })
+
+static void do_int(struct vm86_regs *regs, int i, unsigned char * ssp, unsigned long sp)
+{
+ unsigned short seg = get_fs_word((void *) ((i<<2)+2));
+
+ if (seg == BIOSSEG || regs->cs == BIOSSEG ||
+ is_revectored(i, &current->tss.vm86_info->int_revectored))
+ return_to_32bit(regs, VM86_INTx + (i << 8));
+ if (i==0x21 && is_revectored(AH(regs),&current->tss.vm86_info->int21_revectored))
+ return_to_32bit(regs, VM86_INTx + (i << 8));
+ pushw(ssp, sp, get_vflags(regs));
+ pushw(ssp, sp, regs->cs);
+ pushw(ssp, sp, IP(regs));
+ regs->cs = seg;
+ SP(regs) -= 6;
+ IP(regs) = get_fs_word((void *) (i<<2));
+ clear_TF(regs);
+ clear_IF(regs);
+ return;
+}
+
+void handle_vm86_debug(struct vm86_regs * regs, long error_code)
+{
+#if 0
+ do_int(regs, 1, (unsigned char *) (regs->ss << 4), SP(regs));
+#else
+ if (current->flags & PF_PTRACED)
+ current->blocked &= ~(1 << (SIGTRAP-1));
+ send_sig(SIGTRAP, current, 1);
+ current->tss.trap_no = 1;
+ current->tss.error_code = error_code;
+#endif
+}
+
+void handle_vm86_fault(struct vm86_regs * regs, long error_code)
+{
+ unsigned char *csp, *ssp;
+ unsigned long ip, sp;
+
+ csp = (unsigned char *) (regs->cs << 4);
+ ssp = (unsigned char *) (regs->ss << 4);
+ sp = SP(regs);
+ ip = IP(regs);
+
+ switch (popb(csp, ip)) {
+
+ /* operand size override */
+ case 0x66:
+ switch (popb(csp, ip)) {
+
+ /* pushfd */
+ case 0x9c:
+ SP(regs) -= 4;
+ IP(regs) += 2;
+ pushl(ssp, sp, get_vflags(regs));
+ return;
+
+ /* popfd */
+ case 0x9d:
+ SP(regs) += 4;
+ IP(regs) += 2;
+ set_vflags_long(popl(ssp, sp), regs);
+ return;
+
+ /* iretd */
+ case 0xcf:
+ SP(regs) += 12;
+ IP(regs) = (unsigned short)popl(ssp, sp);
+ regs->cs = (unsigned short)popl(ssp, sp);
+ set_vflags_long(popl(ssp, sp), regs);
+ return;
+ }
+
+ /* pushf */
+ case 0x9c:
+ SP(regs) -= 2;
+ IP(regs)++;
+ pushw(ssp, sp, get_vflags(regs));
+ return;
+
+ /* popf */
+ case 0x9d:
+ SP(regs) += 2;
+ IP(regs)++;
+ set_vflags_short(popw(ssp, sp), regs);
+ return;
+
+ /* int xx */
+ case 0xcd:
+ IP(regs) += 2;
+ do_int(regs, popb(csp, ip), ssp, sp);
+ return;
+
+ /* iret */
+ case 0xcf:
+ SP(regs) += 6;
+ IP(regs) = popw(ssp, sp);
+ regs->cs = popw(ssp, sp);
+ set_vflags_short(popw(ssp, sp), regs);
+ return;
+
+ /* cli */
+ case 0xfa:
+ IP(regs)++;
+ clear_IF(regs);
+ return;
+
+ /* sti */
+ /*
+ * Damn. This is incorrect: the 'sti' instruction should actually
+ * enable interrupts after the /next/ instruction. Not good.
+ *
+ * Probably needs some horsing around with the TF flag. Aiee..
+ */
+ case 0xfb:
+ IP(regs)++;
+ set_IF(regs);
+ return;
+
+ default:
+ return_to_32bit(regs, VM86_UNKNOWN);
+ }
+}