summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile48
-rw-r--r--kernel/bios32.c476
-rw-r--r--kernel/dma.c113
-rw-r--r--kernel/exec_domain.c102
-rw-r--r--kernel/exit.c603
-rw-r--r--kernel/fork.c265
-rw-r--r--kernel/info.c42
-rw-r--r--kernel/ioport.c194
-rw-r--r--kernel/irq.c354
-rw-r--r--kernel/itimer.c135
-rw-r--r--kernel/ksyms.c263
-rw-r--r--kernel/ldt.c103
-rw-r--r--kernel/module.c584
-rw-r--r--kernel/panic.c32
-rw-r--r--kernel/printk.c229
-rw-r--r--kernel/ptrace.c517
-rw-r--r--kernel/sched.c861
-rw-r--r--kernel/signal.c407
-rw-r--r--kernel/splx.c27
-rw-r--r--kernel/sys.c787
-rw-r--r--kernel/time.c487
-rw-r--r--kernel/tqueue.c10
-rw-r--r--kernel/traps.c245
-rw-r--r--kernel/vm86.c404
-rw-r--r--kernel/vsprintf.c309
25 files changed, 7597 insertions, 0 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
new file mode 100644
index 000000000..6de499ca7
--- /dev/null
+++ b/kernel/Makefile
@@ -0,0 +1,48 @@
+#
+# Makefile for the linux kernel.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.S.s:
+ $(CPP) -traditional $< -o $*.s
+.c.s:
+ $(CC) $(CFLAGS) -S $<
+.s.o:
+ $(AS) -o $*.o $<
+.c.o:
+ $(CC) $(CFLAGS) -c $<
+
+OBJS = sched.o entry.o traps.o irq.o dma.o fork.o exec_domain.o \
+ panic.o printk.o vsprintf.o sys.o module.o ksyms.o exit.o \
+ signal.o ptrace.o ioport.o itimer.o \
+ info.o ldt.o time.o tqueue.o vm86.o bios32.o splx.o
+
+all: kernel.o
+
+kernel.o: $(OBJS)
+ $(LD) -r -o kernel.o $(OBJS)
+ sync
+
+entry.s: entry.S
+
+entry.o: entry.s
+
+sched.o: sched.c
+ $(CC) $(CFLAGS) $(PROFILING) -fno-omit-frame-pointer -c $<
+
+dep:
+ $(CPP) -M *.c > .depend
+
+dummy:
+
+#
+# include a dependency file if one exists
+#
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
+
diff --git a/kernel/bios32.c b/kernel/bios32.c
new file mode 100644
index 000000000..311dd111e
--- /dev/null
+++ b/kernel/bios32.c
@@ -0,0 +1,476 @@
+/*
+ * bios32.c - BIOS32, PCI BIOS functions.
+ *
+ * Sponsored by
+ * iX Multiuser Multitasking Magazine
+ * Hannover, Germany
+ * hm@ix.de
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * Drew@Colorado.EDU
+ * +1 (303) 786-7975
+ *
+ * For more information, please consult
+ *
+ * PCI BIOS Specification Revision
+ * PCI Local Bus Specification
+ * PCI System Design Guide
+ *
+ * PCI Special Interest Group
+ * M/S HF3-15A
+ * 5200 N.E. Elam Young Parkway
+ * Hillsboro, Oregon 97124-6497
+ * +1 (503) 696-2000
+ * +1 (800) 433-5177
+ *
+ * Manuals are $25 each or $50 for all three, plus $7 shipping
+ * within the United States, $35 abroad.
+ *
+ *
+ * CHANGELOG :
+ * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
+ * Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/bios32.h>
+#include <linux/pci.h>
+
+#include <asm/segment.h>
+
+/*
+ * It would seem some PCI bioses are buggy, so we don't actually use these
+ * routines unless we need to..
+ */
+#ifdef CONFIG_SCSI_NCR53C7xx
+ #define CONFIG_PCI
+#else
+ #undef CONFIG_PCI
+#endif
+
+#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX
+#define PCIBIOS_PCI_BIOS_PRESENT 0xb101
+#define PCIBIOS_FIND_PCI_DEVICE 0xb102
+#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103
+#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106
+#define PCIBIOS_READ_CONFIG_BYTE 0xb108
+#define PCIBIOS_READ_CONFIG_WORD 0xb109
+#define PCIBIOS_READ_CONFIG_DWORD 0xb10a
+#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b
+#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c
+#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d
+
+/* BIOS32 signature: "_32_" */
+#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
+
+/* PCI signature: "PCI " */
+#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24))
+
+/* PCI service signature: "$PCI" */
+#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24))
+
+/*
+ * This is the standard structure used to identify the entry point
+ * to the BIOS32 Service Directory, as documented in
+ * Standard BIOS 32-bit Service Directory Proposal
+ * Revision 0.4 May 24, 1993
+ * Phoenix Technologies Ltd.
+ * Norwood, MA
+ * and the PCI BIOS specification.
+ */
+
+union bios32 {
+ struct {
+ unsigned long signature; /* _32_ */
+ unsigned long entry; /* 32 bit physical address */
+ unsigned char revision; /* Revision level, 0 */
+ unsigned char length; /* Length in paragraphs should be 01 */
+ unsigned char checksum; /* All bytes must add up to zero */
+ unsigned char reserved[5]; /* Must be zero */
+ } fields;
+ char chars[16];
+};
+
+/*
+ * Physical address of the service directory. I don't know if we're
+ * allowed to have more than one of these or not, so just in case
+ * we'll make bios32_init() take a memory start parameter and store
+ * the array there.
+ */
+
+static unsigned long bios32_entry = 0;
+static struct {
+ unsigned long address;
+ unsigned short segment;
+} bios32_indirect = { 0, KERNEL_CS };
+
+#ifdef CONFIG_PCI
+/*
+ * Returns the entry point for the given service, NULL on error
+ */
+
+static unsigned long bios32_service(unsigned long service)
+{
+ unsigned char return_code; /* %al */
+ unsigned long address; /* %ebx */
+ unsigned long length; /* %ecx */
+ unsigned long entry; /* %edx */
+
+ __asm__("lcall (%%edi)"
+ : "=a" (return_code),
+ "=b" (address),
+ "=c" (length),
+ "=d" (entry)
+ : "0" (service),
+ "1" (0),
+ "D" (&bios32_indirect));
+
+ switch (return_code) {
+ case 0:
+ return address + entry;
+ case 0x80: /* Not present */
+ printk("bios32_service(%ld) : not present\n", service);
+ return 0;
+ default: /* Shouldn't happen */
+ printk("bios32_service(%ld) : returned 0x%x, mail drew@colorado.edu\n",
+ service, return_code);
+ return 0;
+ }
+}
+
+static long pcibios_entry = 0;
+static struct {
+ unsigned long address;
+ unsigned short segment;
+} pci_indirect = { 0, KERNEL_CS };
+
+void NCR53c810_test(void);
+
+static unsigned long pcibios_init(unsigned long memory_start, unsigned long memory_end)
+{
+ unsigned long signature;
+ unsigned char present_status;
+ unsigned char major_revision;
+ unsigned char minor_revision;
+ int pack;
+
+ if ((pcibios_entry = bios32_service(PCI_SERVICE))) {
+ pci_indirect.address = pcibios_entry;
+
+ __asm__("lcall (%%edi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:\tshl $8, %%eax\n\t"
+ "movw %%bx, %%ax"
+ : "=d" (signature),
+ "=a" (pack)
+ : "1" (PCIBIOS_PCI_BIOS_PRESENT),
+ "D" (&pci_indirect)
+ : "bx", "cx");
+
+ present_status = (pack >> 16) & 0xff;
+ major_revision = (pack >> 8) & 0xff;
+ minor_revision = pack & 0xff;
+ if (present_status || (signature != PCI_SIGNATURE)) {
+ printk ("pcibios_init : %s : BIOS32 Service Directory says PCI BIOS is present,\n"
+ " but PCI_BIOS_PRESENT subfunction fails with present status of 0x%x\n"
+ " and signature of 0x%08lx (%c%c%c%c). mail drew@Colorado.EDU\n",
+ (signature == PCI_SIGNATURE) ? "WARNING" : "ERROR",
+ present_status, signature,
+ (char) (signature >> 0), (char) (signature >> 8),
+ (char) (signature >> 16), (char) (signature >> 24));
+
+ if (signature != PCI_SIGNATURE)
+ pcibios_entry = 0;
+ }
+ if (pcibios_entry) {
+ printk ("pcibios_init : PCI BIOS revision %x.%02x entry at 0x%lx\n",
+ major_revision, minor_revision, pcibios_entry);
+ }
+ }
+
+#if 0
+ NCR53c810_test();
+#endif
+ return memory_start;
+}
+
+int pcibios_present(void)
+{
+ return pcibios_entry ? 1 : 0;
+}
+
+int pcibios_find_class_code (unsigned long class_code, unsigned short index,
+ unsigned char *bus, unsigned char *device_fn)
+{
+ unsigned long bx;
+ unsigned long ret;
+
+ __asm__ ("lcall (%%edi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=b" (bx),
+ "=a" (ret)
+ : "1" (PCIBIOS_FIND_PCI_CLASS_CODE),
+ "c" (class_code),
+ "S" ((int) index),
+ "D" (&pci_indirect));
+ *bus = (bx >> 8) & 0xff;
+ *device_fn = bx & 0xff;
+ return (int) (ret & 0xff00) >> 8;
+}
+
+
+int pcibios_find_device (unsigned short vendor, unsigned short device_id,
+ unsigned short index, unsigned char *bus, unsigned char *device_fn)
+{
+ unsigned short bx;
+ unsigned short ret;
+
+ __asm__("lcall (%%edi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=b" (bx),
+ "=a" (ret)
+ : "1" (PCIBIOS_FIND_PCI_DEVICE),
+ "c" (device_id),
+ "d" (vendor),
+ "S" ((int) index),
+ "D" (&pci_indirect));
+ *bus = (bx >> 8) & 0xff;
+ *device_fn = bx & 0xff;
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_read_config_byte(unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned char *value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (ret)
+ : "1" (PCIBIOS_READ_CONFIG_BYTE),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_read_config_word (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned short *value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (ret)
+ : "1" (PCIBIOS_READ_CONFIG_WORD),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_read_config_dword (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned long *value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=c" (*value),
+ "=a" (ret)
+ : "1" (PCIBIOS_READ_CONFIG_DWORD),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_write_config_byte (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned char value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret)
+ : "0" (PCIBIOS_WRITE_CONFIG_BYTE),
+ "c" (value),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_write_config_word (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned short value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret)
+ : "0" (PCIBIOS_WRITE_CONFIG_WORD),
+ "c" (value),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+int pcibios_write_config_dword (unsigned char bus,
+ unsigned char device_fn, unsigned char where, unsigned long value)
+{
+ unsigned long ret;
+ unsigned long bx = (bus << 8) | device_fn;
+
+ __asm__("lcall (%%esi)\n\t"
+ "jc 1f\n\t"
+ "xor %%ah, %%ah\n"
+ "1:"
+ : "=a" (ret)
+ : "0" (PCIBIOS_WRITE_CONFIG_DWORD),
+ "c" (value),
+ "b" (bx),
+ "D" ((long) where),
+ "S" (&pci_indirect));
+ return (int) (ret & 0xff00) >> 8;
+}
+
+void NCR53c810_test(void)
+{
+ unsigned char bus, device_fn;
+ unsigned short index;
+ int ret;
+ unsigned char row, col;
+ unsigned long val;
+
+ for (index = 0; index < 4; ++index) {
+ ret = pcibios_find_device (
+ (unsigned short) PCI_VENDOR_ID_NCR,
+ (unsigned short) PCI_DEVICE_ID_NCR_53C810,
+ index, &bus, &device_fn);
+ if (ret)
+ break;
+ printk ("ncr53c810 : at PCI bus %d, device %d, function %d.",
+ bus, ((device_fn & 0xf8) >> 3), (device_fn & 7));
+ for (row = 0; row < 0x3c; row += 0x10) {
+ printk ("\n reg 0x%02x ", row);
+ for (col = 0; col < 0x10; col += 4) {
+ if (!(ret = pcibios_read_config_dword (bus, device_fn, row+col, &val)))
+ printk ("0x%08lx ", val);
+ else
+ printk ("error 0x%02x ", ret);
+ }
+ }
+ printk ("\n");
+ }
+}
+
+char *pcibios_strerror (int error)
+{
+ static char buf[80];
+
+ switch (error) {
+ case PCIBIOS_SUCCESSFUL:
+ return "SUCCESSFUL";
+
+ case PCIBIOS_FUNC_NOT_SUPPORTED:
+ return "FUNC_NOT_SUPPORTED";
+
+ case PCIBIOS_BAD_VENDOR_ID:
+ return "SUCCESSFUL";
+
+ case PCIBIOS_DEVICE_NOT_FOUND:
+ return "DEVICE_NOT_FOUND";
+
+ case PCIBIOS_BAD_REGISTER_NUMBER:
+ return "BAD_REGISTER_NUMBER";
+
+ default:
+ sprintf (buf, "UNKNOWN RETURN 0x%x", error);
+ return buf;
+ }
+}
+
+#endif
+
+unsigned long bios32_init(unsigned long memory_start, unsigned long memory_end)
+{
+ union bios32 *check;
+ unsigned char sum;
+ int i, length;
+
+ /*
+ * Follow the standard procedure for locating the BIOS32 Service
+ * directory by scanning the permissible address range from
+ * 0xe0000 through 0xfffff for a valid BIOS32 structure.
+ *
+ * The PCI BIOS doesn't seem to work too well on many machines,
+ * so we disable this unless it's really needed (NCR SCSI driver)
+ */
+
+ for (check = (union bios32 *) 0xe0000; check <= (union bios32 *) 0xffff0; ++check) {
+ if (check->fields.signature != BIOS32_SIGNATURE)
+ continue;
+ length = check->fields.length * 16;
+ if (!length)
+ continue;
+ sum = 0;
+ for (i = 0; i < length ; ++i)
+ sum += check->chars[i];
+ if (sum != 0)
+ continue;
+ if (check->fields.revision != 0) {
+ printk("bios32_init : unsupported revision %d at 0x%p, mail drew@colorado.edu\n",
+ check->fields.revision, check);
+ continue;
+ }
+ printk ("bios32_init : BIOS32 Service Directory structure at 0x%p\n", check);
+ if (!bios32_entry) {
+ bios32_indirect.address = bios32_entry = check->fields.entry;
+ printk ("bios32_init : BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+ } else {
+ printk ("bios32_init : multiple entries, mail drew@colorado.edu\n");
+ /*
+ * Jeremy Fitzhardinge reports at least one PCI BIOS
+ * with two different service directories, and as both
+ * worked for him, we'll just mention the fact, and
+ * not actually disallow it..
+ */
+#if 0
+ return memory_start;
+#endif
+ }
+ }
+#ifdef CONFIG_PCI
+ if (bios32_entry) {
+ memory_start = pcibios_init (memory_start, memory_end);
+ }
+#endif
+ return memory_start;
+}
diff --git a/kernel/dma.c b/kernel/dma.c
new file mode 100644
index 000000000..ce80c2fa6
--- /dev/null
+++ b/kernel/dma.c
@@ -0,0 +1,113 @@
+/* $Id: dma.c,v 1.5 1992/11/18 02:49:05 root Exp root $
+ * linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c.
+ * Written by Hennus Bergman, 1992.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <asm/dma.h>
+
+
+/* A note on resource allocation:
+ *
+ * All drivers needing DMA channels, should allocate and release them
+ * through the public routines `request_dma()' and `free_dma()'.
+ *
+ * In order to avoid problems, all processes should allocate resources in
+ * the same sequence and release them in the reverse order.
+ *
+ * So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA.
+ * When releasing them, first release the DMA, then release the IRQ.
+ * If you don't, you may cause allocation requests to fail unnecessarily.
+ * This doesn't really matter now, but it will once we get real semaphores
+ * in the kernel.
+ */
+
+
+
+/* Channel n is busy iff dma_chan_busy[n] != 0.
+ * DMA0 used to be reserved for DRAM refresh, but apparently not any more...
+ * DMA4 is reserved for cascading.
+ */
+/*
+static volatile unsigned int dma_chan_busy[MAX_DMA_CHANNELS] = {
+ 0, 0, 0, 0, 1, 0, 0, 0
+};
+*/
+static volatile char * dma_chan_busy[MAX_DMA_CHANNELS] = {
+ 0,
+ 0,
+ 0,
+ 0,
+ "cascade",
+ 0,
+ 0,
+ 0
+};
+
+/* Atomically swap memory location [32 bits] with `newval'.
+ * This avoid the cli()/sti() junk and related problems.
+ * [And it's faster too :-)]
+ * Maybe this should be in include/asm/mutex.h and be used for
+ * implementing kernel-semaphores as well.
+ */
+static __inline__ unsigned int mutex_atomic_swap(volatile unsigned int * p, unsigned int newval)
+{
+ unsigned int semval = newval;
+
+ /* If one of the operands for the XCHG instructions is a memory ref,
+ * it makes the swap an uninterruptible RMW cycle.
+ *
+ * One operand must be in memory, the other in a register, otherwise
+ * the swap may not be atomic.
+ */
+
+ asm __volatile__ ("xchgl %2, %0\n"
+ : /* outputs: semval */ "=r" (semval)
+ : /* inputs: newval, p */ "0" (semval), "m" (*p)
+ ); /* p is a var, containing an address */
+ return semval;
+} /* mutex_atomic_swap */
+
+
+int get_dma_list(char *buf)
+{
+ int i, len = 0;
+
+ for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) {
+ if (dma_chan_busy[i]) {
+ len += sprintf(buf+len, "%2d: %s\n",
+ i,
+ dma_chan_busy[i]);
+ }
+ }
+ return len;
+}
+
+int request_dma(unsigned int dmanr, char * deviceID)
+{
+ if (dmanr >= MAX_DMA_CHANNELS)
+ return -EINVAL;
+
+ if (mutex_atomic_swap((unsigned int *) &dma_chan_busy[dmanr], (unsigned int) deviceID) != 0)
+ return -EBUSY;
+
+ /* old flag was 0, now contains 1 to indicate busy */
+ return 0;
+} /* request_dma */
+
+
+void free_dma(unsigned int dmanr)
+{
+ if (dmanr >= MAX_DMA_CHANNELS) {
+ printk("Trying to free DMA%d\n", dmanr);
+ return;
+ }
+
+ if (mutex_atomic_swap((unsigned int *) &dma_chan_busy[dmanr], 0) == 0) {
+ printk("Trying to free free DMA%d\n", dmanr);
+ return;
+ }
+
+} /* free_dma */
+
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
new file mode 100644
index 000000000..c80423314
--- /dev/null
+++ b/kernel/exec_domain.c
@@ -0,0 +1,102 @@
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+
+
+static asmlinkage void no_lcall7(struct pt_regs * regs);
+
+
+static unsigned long ident_map[32] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31
+};
+
+struct exec_domain default_exec_domain = {
+ "Linux", /* name */
+ no_lcall7, /* lcall7 causes a seg fault. */
+ 0, 0xff, /* All personalities. */
+ ident_map, /* Identity map signals. */
+ ident_map, /* - both ways. */
+ NULL, /* No usage counter. */
+ NULL /* Nothing after this in the list. */
+};
+
+static struct exec_domain *exec_domains = &default_exec_domain;
+
+
+static asmlinkage void no_lcall7(struct pt_regs * regs)
+{
+ send_sig(SIGSEGV, current, 1);
+}
+
+struct exec_domain *lookup_exec_domain(unsigned long personality)
+{
+ unsigned long pers = personality & PER_MASK;
+ struct exec_domain *it;
+
+ for (it=exec_domains; it; it=it->next)
+ if (pers >= it->pers_low
+ && pers <= it->pers_high)
+ return it;
+
+ /* Should never get this far. */
+ printk(KERN_ERR "No execution domain for personality 0x%02lx\n", pers);
+ return NULL;
+}
+
+int register_exec_domain(struct exec_domain *it)
+{
+ struct exec_domain *tmp;
+
+ if (!it)
+ return -EINVAL;
+ if (it->next)
+ return -EBUSY;
+ for (tmp=exec_domains; tmp; tmp=tmp->next)
+ if (tmp == it)
+ return -EBUSY;
+ it->next = exec_domains;
+ exec_domains = it;
+ return 0;
+}
+
+int unregister_exec_domain(struct exec_domain *it)
+{
+ struct exec_domain ** tmp;
+
+ tmp = &exec_domains;
+ while (*tmp) {
+ if (it == *tmp) {
+ *tmp = it->next;
+ it->next = NULL;
+ return 0;
+ }
+ tmp = &(*tmp)->next;
+ }
+ return -EINVAL;
+}
+
+asmlinkage int sys_personality(unsigned long personality)
+{
+ struct exec_domain *it;
+ unsigned long old_personality;
+
+ if (personality == 0xffffffff)
+ return current->personality;
+
+ it = lookup_exec_domain(personality);
+ if (!it)
+ return -EINVAL;
+
+ old_personality = current->personality;
+ if (current->exec_domain && current->exec_domain->use_count)
+ (*current->exec_domain->use_count)--;
+ current->personality = personality;
+ current->exec_domain = it;
+ if (current->exec_domain->use_count)
+ (*current->exec_domain->use_count)++;
+
+ return old_personality;
+}
diff --git a/kernel/exit.c b/kernel/exit.c
new file mode 100644
index 000000000..b2a8c4fb0
--- /dev/null
+++ b/kernel/exit.c
@@ -0,0 +1,603 @@
+/*
+ * linux/kernel/exit.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#define DEBUG_PROC_TREE
+
+#include <linux/wait.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/resource.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/malloc.h>
+
+#include <asm/segment.h>
+extern void sem_exit (void);
+
+int getrusage(struct task_struct *, int, struct rusage *);
+
+static int generate(unsigned long sig, struct task_struct * p)
+{
+ unsigned long mask = 1 << (sig-1);
+ struct sigaction * sa = sig + p->sigaction - 1;
+
+ /* always generate signals for traced processes ??? */
+ if (p->flags & PF_PTRACED) {
+ p->signal |= mask;
+ return 1;
+ }
+ /* don't bother with ignored signals (but SIGCHLD is special) */
+ if (sa->sa_handler == SIG_IGN && sig != SIGCHLD)
+ return 0;
+ /* some signals are ignored by default.. (but SIGCONT already did its deed) */
+ if ((sa->sa_handler == SIG_DFL) &&
+ (sig == SIGCONT || sig == SIGCHLD || sig == SIGWINCH))
+ return 0;
+ p->signal |= mask;
+ return 1;
+}
+
+int send_sig(unsigned long sig,struct task_struct * p,int priv)
+{
+ if (!p || sig > 32)
+ return -EINVAL;
+ if (!priv && ((sig != SIGCONT) || (current->session != p->session)) &&
+ (current->euid != p->euid) && (current->uid != p->uid) && !suser())
+ return -EPERM;
+ if (!sig)
+ return 0;
+ /*
+ * Forget it if the process is already zombie'd.
+ */
+ if (p->state == TASK_ZOMBIE)
+ return 0;
+ if ((sig == SIGKILL) || (sig == SIGCONT)) {
+ if (p->state == TASK_STOPPED)
+ p->state = TASK_RUNNING;
+ p->exit_code = 0;
+ p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) |
+ (1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) );
+ }
+ /* Depends on order SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU */
+ if ((sig >= SIGSTOP) && (sig <= SIGTTOU))
+ p->signal &= ~(1<<(SIGCONT-1));
+ /* Actually generate the signal */
+ generate(sig,p);
+ return 0;
+}
+
+void notify_parent(struct task_struct * tsk)
+{
+ if (tsk->p_pptr == task[1])
+ tsk->exit_signal = SIGCHLD;
+ send_sig(tsk->exit_signal, tsk->p_pptr, 1);
+ wake_up_interruptible(&tsk->p_pptr->wait_chldexit);
+}
+
+void release(struct task_struct * p)
+{
+ int i;
+
+ if (!p)
+ return;
+ if (p == current) {
+ printk("task releasing itself\n");
+ return;
+ }
+ for (i=1 ; i<NR_TASKS ; i++)
+ if (task[i] == p) {
+ task[i] = NULL;
+ REMOVE_LINKS(p);
+ if (STACK_MAGIC != *(unsigned long *)p->kernel_stack_page)
+ printk(KERN_ALERT "release: %s kernel stack corruption. Aiee\n", p->comm);
+ free_page(p->kernel_stack_page);
+ free_page((long) p);
+ return;
+ }
+ panic("trying to release non-existent task");
+}
+
+#ifdef DEBUG_PROC_TREE
+/*
+ * Check to see if a task_struct pointer is present in the task[] array
+ * Return 0 if found, and 1 if not found.
+ */
+int bad_task_ptr(struct task_struct *p)
+{
+ int i;
+
+ if (!p)
+ return 0;
+ for (i=0 ; i<NR_TASKS ; i++)
+ if (task[i] == p)
+ return 0;
+ return 1;
+}
+
+/*
+ * This routine scans the pid tree and makes sure the rep invariant still
+ * holds. Used for debugging only, since it's very slow....
+ *
+ * It looks a lot scarier than it really is.... we're doing nothing more
+ * than verifying the doubly-linked list found in p_ysptr and p_osptr,
+ * and checking it corresponds with the process tree defined by p_cptr and
+ * p_pptr;
+ */
+void audit_ptree(void)
+{
+ int i;
+
+ for (i=1 ; i<NR_TASKS ; i++) {
+ if (!task[i])
+ continue;
+ if (bad_task_ptr(task[i]->p_pptr))
+ printk("Warning, pid %d's parent link is bad\n",
+ task[i]->pid);
+ if (bad_task_ptr(task[i]->p_cptr))
+ printk("Warning, pid %d's child link is bad\n",
+ task[i]->pid);
+ if (bad_task_ptr(task[i]->p_ysptr))
+ printk("Warning, pid %d's ys link is bad\n",
+ task[i]->pid);
+ if (bad_task_ptr(task[i]->p_osptr))
+ printk("Warning, pid %d's os link is bad\n",
+ task[i]->pid);
+ if (task[i]->p_pptr == task[i])
+ printk("Warning, pid %d parent link points to self\n",
+ task[i]->pid);
+ if (task[i]->p_cptr == task[i])
+ printk("Warning, pid %d child link points to self\n",
+ task[i]->pid);
+ if (task[i]->p_ysptr == task[i])
+ printk("Warning, pid %d ys link points to self\n",
+ task[i]->pid);
+ if (task[i]->p_osptr == task[i])
+ printk("Warning, pid %d os link points to self\n",
+ task[i]->pid);
+ if (task[i]->p_osptr) {
+ if (task[i]->p_pptr != task[i]->p_osptr->p_pptr)
+ printk(
+ "Warning, pid %d older sibling %d parent is %d\n",
+ task[i]->pid, task[i]->p_osptr->pid,
+ task[i]->p_osptr->p_pptr->pid);
+ if (task[i]->p_osptr->p_ysptr != task[i])
+ printk(
+ "Warning, pid %d older sibling %d has mismatched ys link\n",
+ task[i]->pid, task[i]->p_osptr->pid);
+ }
+ if (task[i]->p_ysptr) {
+ if (task[i]->p_pptr != task[i]->p_ysptr->p_pptr)
+ printk(
+ "Warning, pid %d younger sibling %d parent is %d\n",
+ task[i]->pid, task[i]->p_osptr->pid,
+ task[i]->p_osptr->p_pptr->pid);
+ if (task[i]->p_ysptr->p_osptr != task[i])
+ printk(
+ "Warning, pid %d younger sibling %d has mismatched os link\n",
+ task[i]->pid, task[i]->p_ysptr->pid);
+ }
+ if (task[i]->p_cptr) {
+ if (task[i]->p_cptr->p_pptr != task[i])
+ printk(
+ "Warning, pid %d youngest child %d has mismatched parent link\n",
+ task[i]->pid, task[i]->p_cptr->pid);
+ if (task[i]->p_cptr->p_ysptr)
+ printk(
+ "Warning, pid %d youngest child %d has non-NULL ys link\n",
+ task[i]->pid, task[i]->p_cptr->pid);
+ }
+ }
+}
+#endif /* DEBUG_PROC_TREE */
+
+/*
+ * This checks not only the pgrp, but falls back on the pid if no
+ * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
+ * without this...
+ */
+int session_of_pgrp(int pgrp)
+{
+ struct task_struct *p;
+ int fallback;
+
+ fallback = -1;
+ for_each_task(p) {
+ if (p->session <= 0)
+ continue;
+ if (p->pgrp == pgrp)
+ return p->session;
+ if (p->pid == pgrp)
+ fallback = p->session;
+ }
+ return fallback;
+}
+
+/*
+ * kill_pg() sends a signal to a process group: this is what the tty
+ * control characters do (^C, ^Z etc)
+ */
+int kill_pg(int pgrp, int sig, int priv)
+{
+ struct task_struct *p;
+ int err,retval = -ESRCH;
+ int found = 0;
+
+ if (sig<0 || sig>32 || pgrp<=0)
+ return -EINVAL;
+ for_each_task(p) {
+ if (p->pgrp == pgrp) {
+ if ((err = send_sig(sig,p,priv)) != 0)
+ retval = err;
+ else
+ found++;
+ }
+ }
+ return(found ? 0 : retval);
+}
+
+/*
+ * kill_sl() sends a signal to the session leader: this is used
+ * to send SIGHUP to the controlling process of a terminal when
+ * the connection is lost.
+ */
+int kill_sl(int sess, int sig, int priv)
+{
+ struct task_struct *p;
+ int err,retval = -ESRCH;
+ int found = 0;
+
+ if (sig<0 || sig>32 || sess<=0)
+ return -EINVAL;
+ for_each_task(p) {
+ if (p->session == sess && p->leader) {
+ if ((err = send_sig(sig,p,priv)) != 0)
+ retval = err;
+ else
+ found++;
+ }
+ }
+ return(found ? 0 : retval);
+}
+
+int kill_proc(int pid, int sig, int priv)
+{
+ struct task_struct *p;
+
+ if (sig<0 || sig>32)
+ return -EINVAL;
+ for_each_task(p) {
+ if (p && p->pid == pid)
+ return send_sig(sig,p,priv);
+ }
+ return(-ESRCH);
+}
+
+/*
+ * POSIX specifies that kill(-1,sig) is unspecified, but what we have
+ * is probably wrong. Should make it like BSD or SYSV.
+ */
+asmlinkage int sys_kill(int pid,int sig)
+{
+ int err, retval = 0, count = 0;
+
+ if (!pid)
+ return(kill_pg(current->pgrp,sig,0));
+ if (pid == -1) {
+ struct task_struct * p;
+ for_each_task(p) {
+ if (p->pid > 1 && p != current) {
+ ++count;
+ if ((err = send_sig(sig,p,0)) != -EPERM)
+ retval = err;
+ }
+ }
+ return(count ? retval : -ESRCH);
+ }
+ if (pid < 0)
+ return(kill_pg(-pid,sig,0));
+ /* Normal kill */
+ return(kill_proc(pid,sig,0));
+}
+
+/*
+ * Determine if a process group is "orphaned", according to the POSIX
+ * definition in 2.2.2.52. Orphaned process groups are not to be affected
+ * by terminal-generated stop signals. Newly orphaned process groups are
+ * to receive a SIGHUP and a SIGCONT.
+ *
+ * "I ask you, have you ever known what it is to be an orphan?"
+ */
+int is_orphaned_pgrp(int pgrp)
+{
+ struct task_struct *p;
+
+ for_each_task(p) {
+ if ((p->pgrp != pgrp) ||
+ (p->state == TASK_ZOMBIE) ||
+ (p->p_pptr->pid == 1))
+ continue;
+ if ((p->p_pptr->pgrp != pgrp) &&
+ (p->p_pptr->session == p->session))
+ return 0;
+ }
+ return(1); /* (sighing) "Often!" */
+}
+
+static int has_stopped_jobs(int pgrp)
+{
+ struct task_struct * p;
+
+ for_each_task(p) {
+ if (p->pgrp != pgrp)
+ continue;
+ if (p->state == TASK_STOPPED)
+ return(1);
+ }
+ return(0);
+}
+
+static void forget_original_parent(struct task_struct * father)
+{
+ struct task_struct * p;
+
+ for_each_task(p) {
+ if (p->p_opptr == father)
+ if (task[1])
+ p->p_opptr = task[1];
+ else
+ p->p_opptr = task[0];
+ }
+}
+
+static void exit_mm(void)
+{
+ struct vm_area_struct * mpnt;
+
+ mpnt = current->mm->mmap;
+ current->mm->mmap = NULL;
+ while (mpnt) {
+ struct vm_area_struct * next = mpnt->vm_next;
+ if (mpnt->vm_ops && mpnt->vm_ops->close)
+ mpnt->vm_ops->close(mpnt);
+ if (mpnt->vm_inode)
+ iput(mpnt->vm_inode);
+ kfree(mpnt);
+ mpnt = next;
+ }
+
+ /* forget local segments */
+ __asm__ __volatile__("mov %w0,%%fs ; mov %w0,%%gs ; lldt %w0"
+ : /* no outputs */
+ : "r" (0));
+ current->tss.ldt = 0;
+ if (current->ldt) {
+ void * ldt = current->ldt;
+ current->ldt = NULL;
+ vfree(ldt);
+ }
+
+ free_page_tables(current);
+}
+
+static void exit_files(void)
+{
+ int i;
+
+ for (i=0 ; i<NR_OPEN ; i++)
+ if (current->files->fd[i])
+ sys_close(i);
+}
+
+static void exit_fs(void)
+{
+ iput(current->fs->pwd);
+ current->fs->pwd = NULL;
+ iput(current->fs->root);
+ current->fs->root = NULL;
+}
+
+NORET_TYPE void do_exit(long code)
+{
+ struct task_struct *p;
+
+ if (intr_count) {
+ printk("Aiee, killing interrupt handler\n");
+ intr_count = 0;
+ }
+fake_volatile:
+ if (current->semundo)
+ sem_exit();
+ exit_mm();
+ exit_files();
+ exit_fs();
+ forget_original_parent(current);
+ /*
+ * Check to see if any process groups have become orphaned
+ * as a result of our exiting, and if they have any stopped
+ * jobs, send them a SIGUP and then a SIGCONT. (POSIX 3.2.2.2)
+ *
+ * Case i: Our father is in a different pgrp than we are
+ * and we were the only connection outside, so our pgrp
+ * is about to become orphaned.
+ */
+ if ((current->p_pptr->pgrp != current->pgrp) &&
+ (current->p_pptr->session == current->session) &&
+ is_orphaned_pgrp(current->pgrp) &&
+ has_stopped_jobs(current->pgrp)) {
+ kill_pg(current->pgrp,SIGHUP,1);
+ kill_pg(current->pgrp,SIGCONT,1);
+ }
+ /* Let father know we died */
+ notify_parent(current);
+
+ /*
+ * This loop does two things:
+ *
+ * A. Make init inherit all the child processes
+ * B. Check to see if any process groups have become orphaned
+ * as a result of our exiting, and if they have any stopped
+ * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
+ */
+ while ((p = current->p_cptr) != NULL) {
+ current->p_cptr = p->p_osptr;
+ p->p_ysptr = NULL;
+ p->flags &= ~(PF_PTRACED|PF_TRACESYS);
+ if (task[1] && task[1] != current)
+ p->p_pptr = task[1];
+ else
+ p->p_pptr = task[0];
+ p->p_osptr = p->p_pptr->p_cptr;
+ p->p_osptr->p_ysptr = p;
+ p->p_pptr->p_cptr = p;
+ if (p->state == TASK_ZOMBIE)
+ notify_parent(p);
+ /*
+ * process group orphan check
+ * Case ii: Our child is in a different pgrp
+ * than we are, and it was the only connection
+ * outside, so the child pgrp is now orphaned.
+ */
+ if ((p->pgrp != current->pgrp) &&
+ (p->session == current->session) &&
+ is_orphaned_pgrp(p->pgrp) &&
+ has_stopped_jobs(p->pgrp)) {
+ kill_pg(p->pgrp,SIGHUP,1);
+ kill_pg(p->pgrp,SIGCONT,1);
+ }
+ }
+ if (current->leader)
+ disassociate_ctty(1);
+ if (last_task_used_math == current)
+ last_task_used_math = NULL;
+ current->state = TASK_ZOMBIE;
+ current->exit_code = code;
+ current->mm->rss = 0;
+#ifdef DEBUG_PROC_TREE
+ audit_ptree();
+#endif
+ if (current->exec_domain && current->exec_domain->use_count)
+ (*current->exec_domain->use_count)--;
+ if (current->binfmt && current->binfmt->use_count)
+ (*current->binfmt->use_count)--;
+ schedule();
+/*
+ * In order to get rid of the "volatile function does return" message
+ * I did this little loop that confuses gcc to think do_exit really
+ * is volatile. In fact it's schedule() that is volatile in some
+ * circumstances: when current->state = ZOMBIE, schedule() never
+ * returns.
+ *
+ * In fact the natural way to do all this is to have the label and the
+ * goto right after each other, but I put the fake_volatile label at
+ * the start of the function just in case something /really/ bad
+ * happens, and the schedule returns. This way we can try again. I'm
+ * not paranoid: it's just that everybody is out to get me.
+ */
+ goto fake_volatile;
+}
+
+asmlinkage int sys_exit(int error_code)
+{
+ do_exit((error_code&0xff)<<8);
+}
+
+asmlinkage int sys_wait4(pid_t pid,unsigned long * stat_addr, int options, struct rusage * ru)
+{
+ int flag, retval;
+ struct wait_queue wait = { current, NULL };
+ struct task_struct *p;
+
+ if (stat_addr) {
+ flag = verify_area(VERIFY_WRITE, stat_addr, 4);
+ if (flag)
+ return flag;
+ }
+ add_wait_queue(&current->wait_chldexit,&wait);
+repeat:
+ flag=0;
+ for (p = current->p_cptr ; p ; p = p->p_osptr) {
+ if (pid>0) {
+ if (p->pid != pid)
+ continue;
+ } else if (!pid) {
+ if (p->pgrp != current->pgrp)
+ continue;
+ } else if (pid != -1) {
+ if (p->pgrp != -pid)
+ continue;
+ }
+ /* wait for cloned processes iff the __WCLONE flag is set */
+ if ((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
+ continue;
+ flag = 1;
+ switch (p->state) {
+ case TASK_STOPPED:
+ if (!p->exit_code)
+ continue;
+ if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED))
+ continue;
+ if (stat_addr)
+ put_fs_long((p->exit_code << 8) | 0x7f,
+ stat_addr);
+ p->exit_code = 0;
+ if (ru != NULL)
+ getrusage(p, RUSAGE_BOTH, ru);
+ retval = p->pid;
+ goto end_wait4;
+ case TASK_ZOMBIE:
+ current->cutime += p->utime + p->cutime;
+ current->cstime += p->stime + p->cstime;
+ current->mm->cmin_flt += p->mm->min_flt + p->mm->cmin_flt;
+ current->mm->cmaj_flt += p->mm->maj_flt + p->mm->cmaj_flt;
+ if (ru != NULL)
+ getrusage(p, RUSAGE_BOTH, ru);
+ flag = p->pid;
+ if (stat_addr)
+ put_fs_long(p->exit_code, stat_addr);
+ if (p->p_opptr != p->p_pptr) {
+ REMOVE_LINKS(p);
+ p->p_pptr = p->p_opptr;
+ SET_LINKS(p);
+ notify_parent(p);
+ } else
+ release(p);
+#ifdef DEBUG_PROC_TREE
+ audit_ptree();
+#endif
+ retval = flag;
+ goto end_wait4;
+ default:
+ continue;
+ }
+ }
+ if (flag) {
+ retval = 0;
+ if (options & WNOHANG)
+ goto end_wait4;
+ current->state=TASK_INTERRUPTIBLE;
+ schedule();
+ current->signal &= ~(1<<(SIGCHLD-1));
+ retval = -ERESTARTSYS;
+ if (current->signal & ~current->blocked)
+ goto end_wait4;
+ goto repeat;
+ }
+ retval = -ECHILD;
+end_wait4:
+ remove_wait_queue(&current->wait_chldexit,&wait);
+ return retval;
+}
+
+/*
+ * sys_waitpid() remains for compatibility. waitpid() should be
+ * implemented by calling sys_wait4() from libc.a.
+ */
+asmlinkage int sys_waitpid(pid_t pid,unsigned long * stat_addr, int options)
+{
+ return sys_wait4(pid, stat_addr, options, NULL);
+}
diff --git a/kernel/fork.c b/kernel/fork.c
new file mode 100644
index 000000000..63a54e999
--- /dev/null
+++ b/kernel/fork.c
@@ -0,0 +1,265 @@
+/*
+ * linux/kernel/fork.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'fork.c' contains the help-routines for the 'fork' system call
+ * (see also system_call.s).
+ * Fork is rather simple, once you get the hang of it, but the memory
+ * management can be a bitch. See 'mm/mm.c': 'copy_page_tables()'
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/malloc.h>
+#include <linux/ldt.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+asmlinkage void ret_from_sys_call(void) __asm__("ret_from_sys_call");
+
+/* These should maybe be in <linux/tasks.h> */
+
+#define MAX_TASKS_PER_USER (NR_TASKS/2)
+#define MIN_TASKS_LEFT_FOR_ROOT 4
+
+long last_pid=0;
+
+static int find_empty_process(void)
+{
+ int free_task;
+ int i, tasks_free;
+ int this_user_tasks;
+
+repeat:
+ if ((++last_pid) & 0xffff8000)
+ last_pid=1;
+ this_user_tasks = 0;
+ tasks_free = 0;
+ free_task = -EAGAIN;
+ i = NR_TASKS;
+ while (--i > 0) {
+ if (!task[i]) {
+ free_task = i;
+ tasks_free++;
+ continue;
+ }
+ if (task[i]->uid == current->uid)
+ this_user_tasks++;
+ if (task[i]->pid == last_pid || task[i]->pgrp == last_pid ||
+ task[i]->session == last_pid)
+ goto repeat;
+ }
+ if (tasks_free <= MIN_TASKS_LEFT_FOR_ROOT ||
+ this_user_tasks > MAX_TASKS_PER_USER)
+ if (current->uid)
+ return -EAGAIN;
+ return free_task;
+}
+
+static struct file * copy_fd(struct file * old_file)
+{
+ struct file * new_file = get_empty_filp();
+ int error;
+
+ if (new_file) {
+ memcpy(new_file,old_file,sizeof(struct file));
+ new_file->f_count = 1;
+ if (new_file->f_inode)
+ new_file->f_inode->i_count++;
+ if (new_file->f_op && new_file->f_op->open) {
+ error = new_file->f_op->open(new_file->f_inode,new_file);
+ if (error) {
+ iput(new_file->f_inode);
+ new_file->f_count = 0;
+ new_file = NULL;
+ }
+ }
+ }
+ return new_file;
+}
+
+static int dup_mmap(struct task_struct * tsk)
+{
+ struct vm_area_struct * mpnt, **p, *tmp;
+
+ tsk->mm->mmap = NULL;
+ p = &tsk->mm->mmap;
+ for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
+ tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+ *tmp = *mpnt;
+ tmp->vm_task = tsk;
+ tmp->vm_next = NULL;
+ if (tmp->vm_inode)
+ tmp->vm_inode->i_count++;
+ if (tmp->vm_ops && tmp->vm_ops->open)
+ tmp->vm_ops->open(tmp);
+ *p = tmp;
+ p = &tmp->vm_next;
+ }
+ return 0;
+}
+
+/*
+ * SHAREFD not yet implemented..
+ */
+static void copy_files(unsigned long clone_flags, struct task_struct * p)
+{
+ int i;
+ struct file * f;
+
+ if (clone_flags & COPYFD) {
+ for (i=0; i<NR_OPEN;i++)
+ if ((f = p->files->fd[i]) != NULL)
+ p->files->fd[i] = copy_fd(f);
+ } else {
+ for (i=0; i<NR_OPEN;i++)
+ if ((f = p->files->fd[i]) != NULL)
+ f->f_count++;
+ }
+}
+
+/*
+ * CLONEVM not yet correctly implemented: needs to clone the mmap
+ * instead of duplicating it..
+ */
+static int copy_mm(unsigned long clone_flags, struct task_struct * p)
+{
+ if (clone_flags & COPYVM) {
+ p->mm->swappable = 1;
+ p->mm->min_flt = p->mm->maj_flt = 0;
+ p->mm->cmin_flt = p->mm->cmaj_flt = 0;
+ if (copy_page_tables(p))
+ return 1;
+ return dup_mmap(p);
+ } else {
+ if (clone_page_tables(p))
+ return 1;
+ return dup_mmap(p); /* wrong.. */
+ }
+}
+
+static void copy_fs(unsigned long clone_flags, struct task_struct * p)
+{
+ if (current->fs->pwd)
+ current->fs->pwd->i_count++;
+ if (current->fs->root)
+ current->fs->root->i_count++;
+}
+
+#define IS_CLONE (regs.orig_eax == __NR_clone)
+
+/*
+ * Ok, this is the main fork-routine. It copies the system process
+ * information (task[nr]) and sets up the necessary registers. It
+ * also copies the data segment in its entirety.
+ */
+asmlinkage int sys_fork(struct pt_regs regs)
+{
+ struct pt_regs * childregs;
+ struct task_struct *p;
+ int i,nr;
+ unsigned long clone_flags = COPYVM | SIGCHLD;
+
+ if(!(p = (struct task_struct*)__get_free_page(GFP_KERNEL)))
+ goto bad_fork;
+ nr = find_empty_process();
+ if (nr < 0)
+ goto bad_fork_free;
+ task[nr] = p;
+ *p = *current;
+
+ if (p->exec_domain && p->exec_domain->use_count)
+ (*p->exec_domain->use_count)++;
+ if (p->binfmt && p->binfmt->use_count)
+ (*p->binfmt->use_count)++;
+
+ p->did_exec = 0;
+ p->kernel_stack_page = 0;
+ p->state = TASK_UNINTERRUPTIBLE;
+ p->flags &= ~(PF_PTRACED|PF_TRACESYS);
+ p->pid = last_pid;
+ p->p_pptr = p->p_opptr = current;
+ p->p_cptr = NULL;
+ SET_LINKS(p);
+ p->signal = 0;
+ p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
+ p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
+ p->leader = 0; /* process leadership doesn't inherit */
+ p->utime = p->stime = 0;
+ p->cutime = p->cstime = 0;
+ p->start_time = jiffies;
+/*
+ * set up new TSS and kernel stack
+ */
+ if (!(p->kernel_stack_page = get_free_page(GFP_KERNEL)))
+ goto bad_fork_cleanup;
+ *(unsigned long *)p->kernel_stack_page = STACK_MAGIC;
+ p->tss.es = KERNEL_DS;
+ p->tss.cs = KERNEL_CS;
+ p->tss.ss = KERNEL_DS;
+ p->tss.ds = KERNEL_DS;
+ p->tss.fs = USER_DS;
+ p->tss.gs = KERNEL_DS;
+ p->tss.ss0 = KERNEL_DS;
+ p->tss.esp0 = p->kernel_stack_page + PAGE_SIZE;
+ p->tss.tr = _TSS(nr);
+ childregs = ((struct pt_regs *) (p->kernel_stack_page + PAGE_SIZE)) - 1;
+ p->tss.esp = (unsigned long) childregs;
+ p->tss.eip = (unsigned long) ret_from_sys_call;
+ *childregs = regs;
+ childregs->eax = 0;
+ p->tss.back_link = 0;
+ p->tss.eflags = regs.eflags & 0xffffcfff; /* iopl is always 0 for a new process */
+ if (IS_CLONE) {
+ if (regs.ebx)
+ childregs->esp = regs.ebx;
+ clone_flags = regs.ecx;
+ if (childregs->esp == regs.esp)
+ clone_flags |= COPYVM;
+ }
+ p->exit_signal = clone_flags & CSIGNAL;
+ p->tss.ldt = _LDT(nr);
+ if (p->ldt) {
+ p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+ if (p->ldt != NULL)
+ memcpy(p->ldt, current->ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ }
+ p->tss.bitmap = offsetof(struct tss_struct,io_bitmap);
+ for (i = 0; i < IO_BITMAP_SIZE+1 ; i++) /* IO bitmap is actually SIZE+1 */
+ p->tss.io_bitmap[i] = ~0;
+ if (last_task_used_math == current)
+ __asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387));
+ if (copy_mm(clone_flags, p))
+ goto bad_fork_cleanup;
+ p->semundo = NULL;
+ copy_files(clone_flags, p);
+ copy_fs(clone_flags, p);
+ set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss));
+ if (p->ldt)
+ set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,p->ldt, 512);
+ else
+ set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&default_ldt, 1);
+
+ p->counter = current->counter >> 1;
+ p->state = TASK_RUNNING; /* do this last, just in case */
+ return p->pid;
+bad_fork_cleanup:
+ task[nr] = NULL;
+ REMOVE_LINKS(p);
+ free_page(p->kernel_stack_page);
+bad_fork_free:
+ free_page((long) p);
+bad_fork:
+ return -EAGAIN;
+}
diff --git a/kernel/info.c b/kernel/info.c
new file mode 100644
index 000000000..c7b2b9a8c
--- /dev/null
+++ b/kernel/info.c
@@ -0,0 +1,42 @@
+/*
+ * linux/kernel/info.c
+ *
+ * Copyright (C) 1992 Darren Senn
+ */
+
+/* This implements the sysinfo() system call */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/unistd.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+
+asmlinkage int sys_sysinfo(struct sysinfo *info)
+{
+ int error;
+ struct sysinfo val;
+ struct task_struct **p;
+
+ error = verify_area(VERIFY_WRITE, info, sizeof(struct sysinfo));
+ if (error)
+ return error;
+ memset((char *)&val, 0, sizeof(struct sysinfo));
+
+ val.uptime = jiffies / HZ;
+
+ val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+ val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+ val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+
+ for (p = &LAST_TASK; p > &FIRST_TASK; p--)
+ if (*p) val.procs++;
+
+ si_meminfo(&val);
+ si_swapinfo(&val);
+
+ memcpy_tofs(info, &val, sizeof(struct sysinfo));
+ return 0;
+}
diff --git a/kernel/ioport.c b/kernel/ioport.c
new file mode 100644
index 000000000..c61690e3c
--- /dev/null
+++ b/kernel/ioport.c
@@ -0,0 +1,194 @@
+/*
+ * linux/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+static unsigned long ioport_registrar[IO_BITMAP_SIZE] = {0, /* ... */};
+
+#define _IODEBUG
+
+#ifdef IODEBUG
+static char * ios(unsigned long l)
+{
+ static char str[33] = { '\0' };
+ int i;
+ unsigned long mask;
+
+ for (i = 0, mask = 0x80000000; i < 32; ++i, mask >>= 1)
+ str[i] = (l & mask) ? '1' : '0';
+ return str;
+}
+
+static void dump_io_bitmap(void)
+{
+ int i, j;
+ int numl = sizeof(current->tss.io_bitmap) >> 2;
+
+ for (i = j = 0; j < numl; ++i)
+ {
+ printk("%4d [%3x]: ", 64*i, 64*i);
+ printk("%s ", ios(current->tss.io_bitmap[j++]));
+ if (j < numl)
+ printk("%s", ios(current->tss.io_bitmap[j++]));
+ printk("\n");
+ }
+}
+#endif
+
+/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+asmlinkage void set_bitmap(unsigned long *bitmap,
+ short base, short extent, int new_value)
+{
+ int mask;
+ unsigned long *bitmap_base = bitmap + (base >> 5);
+ unsigned short low_index = base & 0x1f;
+ int length = low_index + extent;
+
+ if (low_index != 0) {
+ mask = (~0 << low_index);
+ if (length < 32)
+ mask &= ~(~0 << length);
+ if (new_value)
+ *bitmap_base++ |= mask;
+ else
+ *bitmap_base++ &= ~mask;
+ length -= 32;
+ }
+
+ mask = (new_value ? ~0 : 0);
+ while (length >= 32) {
+ *bitmap_base++ = mask;
+ length -= 32;
+ }
+
+ if (length > 0) {
+ mask = ~(~0 << length);
+ if (new_value)
+ *bitmap_base++ |= mask;
+ else
+ *bitmap_base++ &= ~mask;
+ }
+}
+
+/* Check for set bits in BITMAP starting at BASE, going to EXTENT. */
+asmlinkage int check_bitmap(unsigned long *bitmap, short base, short extent)
+{
+ int mask;
+ unsigned long *bitmap_base = bitmap + (base >> 5);
+ unsigned short low_index = base & 0x1f;
+ int length = low_index + extent;
+
+ if (low_index != 0) {
+ mask = (~0 << low_index);
+ if (length < 32)
+ mask &= ~(~0 << length);
+ if (*bitmap_base++ & mask)
+ return 1;
+ length -= 32;
+ }
+ while (length >= 32) {
+ if (*bitmap_base++ != 0)
+ return 1;
+ length -= 32;
+ }
+
+ if (length > 0) {
+ mask = ~(~0 << length);
+ if (*bitmap_base++ & mask)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+ if (from + num <= from)
+ return -EINVAL;
+ if (from + num > IO_BITMAP_SIZE*32)
+ return -EINVAL;
+ if (!suser())
+ return -EPERM;
+
+#ifdef IODEBUG
+ printk("io: from=%d num=%d %s\n", from, num, (turn_on ? "on" : "off"));
+#endif
+ set_bitmap((unsigned long *)current->tss.io_bitmap, from, num, !turn_on);
+ return 0;
+}
+
+unsigned int *stack;
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ * Here we just change the eflags value on the stack: we allow
+ * only the super-user to do it. This depends on the stack-layout
+ * on system-call entry - see also fork() and the signal handling
+ * code.
+ */
+asmlinkage int sys_iopl(long ebx,long ecx,long edx,
+ long esi, long edi, long ebp, long eax, long ds,
+ long es, long fs, long gs, long orig_eax,
+ long eip,long cs,long eflags,long esp,long ss)
+{
+ unsigned int level = ebx;
+
+ if (level > 3)
+ return -EINVAL;
+ if (!suser())
+ return -EPERM;
+ *(&eflags) = (eflags & 0xffffcfff) | (level << 12);
+ return 0;
+}
+
+
+void snarf_region(unsigned int from, unsigned int num)
+{
+ if (from > IO_BITMAP_SIZE*32)
+ return;
+ if (from + num > IO_BITMAP_SIZE*32)
+ num = IO_BITMAP_SIZE*32 - from;
+ set_bitmap(ioport_registrar, from, num, 1);
+ return;
+}
+
+void release_region(unsigned int from, unsigned int num)
+{
+ if (from > IO_BITMAP_SIZE*32)
+ return;
+ if (from + num > IO_BITMAP_SIZE*32)
+ num = IO_BITMAP_SIZE*32 - from;
+ set_bitmap(ioport_registrar, from, num, 0);
+ return;
+}
+
+int check_region(unsigned int from, unsigned int num)
+{
+ if (from > IO_BITMAP_SIZE*32)
+ return 0;
+ if (from + num > IO_BITMAP_SIZE*32)
+ num = IO_BITMAP_SIZE*32 - from;
+ return check_bitmap(ioport_registrar, from, num);
+}
+
+/* Called from init/main.c to reserve IO ports. */
+void reserve_setup(char *str, int *ints)
+{
+ int i;
+
+ for (i = 1; i < ints[0]; i += 2)
+ snarf_region(ints[i], ints[i+1]);
+}
diff --git a/kernel/irq.c b/kernel/irq.c
new file mode 100644
index 000000000..2de16db53
--- /dev/null
+++ b/kernel/irq.c
@@ -0,0 +1,354 @@
+/*
+ * linux/kernel/irq.c
+ *
+ * Copyright (C) 1992 Linus Torvalds
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * IRQ's are in fact implemented a bit like signal handlers for the kernel.
+ * The same sigaction struct is used, and with similar semantics (ie there
+ * is a SA_INTERRUPT flag etc). Naturally it's not a 1:1 relation, but there
+ * are similarities.
+ *
+ * sa_handler(int irq_NR) is the default function called (0 if no).
+ * sa_mask is horribly ugly (I won't even mention it)
+ * sa_flags contains various info: SA_INTERRUPT etc
+ * sa_restorer is the unused
+ */
+
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#define CR0_NE 32
+
+static unsigned char cache_21 = 0xff;
+static unsigned char cache_A1 = 0xff;
+
+unsigned long intr_count = 0;
+unsigned long bh_active = 0;
+unsigned long bh_mask = 0xFFFFFFFF;
+struct bh_struct bh_base[32];
+
+void disable_irq(unsigned int irq_nr)
+{
+ unsigned long flags;
+ unsigned char mask;
+
+ mask = 1 << (irq_nr & 7);
+ save_flags(flags);
+ if (irq_nr < 8) {
+ cli();
+ cache_21 |= mask;
+ outb(cache_21,0x21);
+ restore_flags(flags);
+ return;
+ }
+ cli();
+ cache_A1 |= mask;
+ outb(cache_A1,0xA1);
+ restore_flags(flags);
+}
+
+void enable_irq(unsigned int irq_nr)
+{
+ unsigned long flags;
+ unsigned char mask;
+
+ mask = ~(1 << (irq_nr & 7));
+ save_flags(flags);
+ if (irq_nr < 8) {
+ cli();
+ cache_21 &= mask;
+ outb(cache_21,0x21);
+ restore_flags(flags);
+ return;
+ }
+ cli();
+ cache_A1 &= mask;
+ outb(cache_A1,0xA1);
+ restore_flags(flags);
+}
+
+/*
+ * do_bottom_half() runs at normal kernel priority: all interrupts
+ * enabled. do_bottom_half() is atomic with respect to itself: a
+ * bottom_half handler need not be re-entrant.
+ */
+asmlinkage void do_bottom_half(void)
+{
+ unsigned long active;
+ unsigned long mask, left;
+ struct bh_struct *bh;
+
+ bh = bh_base;
+ active = bh_active & bh_mask;
+ for (mask = 1, left = ~0 ; left & active ; bh++,mask += mask,left += left) {
+ if (mask & active) {
+ void (*fn)(void *);
+ bh_active &= ~mask;
+ fn = bh->routine;
+ if (!fn)
+ goto bad_bh;
+ fn(bh->data);
+ }
+ }
+ return;
+bad_bh:
+ printk ("irq.c:bad bottom half entry\n");
+}
+
+/*
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that do all
+ * the operations that are needed to keep the AT interrupt-controller
+ * happy. They are also written to be fast - and to disable interrupts
+ * as little as humanly possible.
+ *
+ * NOTE! These macros expand to three different handlers for each line: one
+ * complete handler that does all the fancy stuff (including signal handling),
+ * and one fast handler that is meant for simple IRQ's that want to be
+ * atomic. The specific handler is chosen depending on the SA_INTERRUPT
+ * flag when installing a handler. Finally, one "bad interrupt" handler, that
+ * is used when no handler is present.
+ */
+BUILD_IRQ(FIRST,0,0x01)
+BUILD_IRQ(FIRST,1,0x02)
+BUILD_IRQ(FIRST,2,0x04)
+BUILD_IRQ(FIRST,3,0x08)
+BUILD_IRQ(FIRST,4,0x10)
+BUILD_IRQ(FIRST,5,0x20)
+BUILD_IRQ(FIRST,6,0x40)
+BUILD_IRQ(FIRST,7,0x80)
+BUILD_IRQ(SECOND,8,0x01)
+BUILD_IRQ(SECOND,9,0x02)
+BUILD_IRQ(SECOND,10,0x04)
+BUILD_IRQ(SECOND,11,0x08)
+BUILD_IRQ(SECOND,12,0x10)
+BUILD_IRQ(SECOND,13,0x20)
+BUILD_IRQ(SECOND,14,0x40)
+BUILD_IRQ(SECOND,15,0x80)
+
+/*
+ * Pointers to the low-level handlers: first the general ones, then the
+ * fast ones, then the bad ones.
+ */
+static void (*interrupt[16])(void) = {
+ IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt,
+ IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt,
+ IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt,
+ IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt
+};
+
+static void (*fast_interrupt[16])(void) = {
+ fast_IRQ0_interrupt, fast_IRQ1_interrupt,
+ fast_IRQ2_interrupt, fast_IRQ3_interrupt,
+ fast_IRQ4_interrupt, fast_IRQ5_interrupt,
+ fast_IRQ6_interrupt, fast_IRQ7_interrupt,
+ fast_IRQ8_interrupt, fast_IRQ9_interrupt,
+ fast_IRQ10_interrupt, fast_IRQ11_interrupt,
+ fast_IRQ12_interrupt, fast_IRQ13_interrupt,
+ fast_IRQ14_interrupt, fast_IRQ15_interrupt
+};
+
+static void (*bad_interrupt[16])(void) = {
+ bad_IRQ0_interrupt, bad_IRQ1_interrupt,
+ bad_IRQ2_interrupt, bad_IRQ3_interrupt,
+ bad_IRQ4_interrupt, bad_IRQ5_interrupt,
+ bad_IRQ6_interrupt, bad_IRQ7_interrupt,
+ bad_IRQ8_interrupt, bad_IRQ9_interrupt,
+ bad_IRQ10_interrupt, bad_IRQ11_interrupt,
+ bad_IRQ12_interrupt, bad_IRQ13_interrupt,
+ bad_IRQ14_interrupt, bad_IRQ15_interrupt
+};
+
+/*
+ * Initial irq handlers.
+ */
+static struct sigaction irq_sigaction[16] = {
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL },
+ { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }
+};
+
+int get_irq_list(char *buf)
+{
+ int i, len = 0;
+ struct sigaction * sa = irq_sigaction;
+
+ for (i = 0 ; i < 16 ; i++, sa++) {
+ if (!sa->sa_handler)
+ continue;
+ len += sprintf(buf+len, "%2d: %8d %c %s\n",
+ i, kstat.interrupts[i],
+ (sa->sa_flags & SA_INTERRUPT) ? '+' : ' ',
+ (char *) sa->sa_mask);
+ }
+ return len;
+}
+
+/*
+ * do_IRQ handles IRQ's that have been installed without the
+ * SA_INTERRUPT flag: it uses the full signal-handling return
+ * and runs with other interrupts enabled. All relatively slow
+ * IRQ's should use this format: notably the keyboard/timer
+ * routines.
+ */
+asmlinkage void do_IRQ(int irq, struct pt_regs * regs)
+{
+ struct sigaction * sa = irq + irq_sigaction;
+
+ kstat.interrupts[irq]++;
+ sa->sa_handler((int) regs);
+}
+
+/*
+ * do_fast_IRQ handles IRQ's that don't need the fancy interrupt return
+ * stuff - the handler is also running with interrupts disabled unless
+ * it explicitly enables them later.
+ */
+asmlinkage void do_fast_IRQ(int irq)
+{
+ struct sigaction * sa = irq + irq_sigaction;
+
+ kstat.interrupts[irq]++;
+ sa->sa_handler(irq);
+}
+
+/*
+ * Using "struct sigaction" is slightly silly, but there
+ * are historical reasons and it works well, so..
+ */
+static int irqaction(unsigned int irq, struct sigaction * new_sa)
+{
+ struct sigaction * sa;
+ unsigned long flags;
+
+ if (irq > 15)
+ return -EINVAL;
+ sa = irq + irq_sigaction;
+ if (sa->sa_handler)
+ return -EBUSY;
+ if (!new_sa->sa_handler)
+ return -EINVAL;
+ save_flags(flags);
+ cli();
+ *sa = *new_sa;
+ if (sa->sa_flags & SA_INTERRUPT)
+ set_intr_gate(0x20+irq,fast_interrupt[irq]);
+ else
+ set_intr_gate(0x20+irq,interrupt[irq]);
+ if (irq < 8) {
+ cache_21 &= ~(1<<irq);
+ outb(cache_21,0x21);
+ } else {
+ cache_21 &= ~(1<<2);
+ cache_A1 &= ~(1<<(irq-8));
+ outb(cache_21,0x21);
+ outb(cache_A1,0xA1);
+ }
+ restore_flags(flags);
+ return 0;
+}
+
+int request_irq(unsigned int irq, void (*handler)(int),
+ unsigned long flags, const char * devname)
+{
+ struct sigaction sa;
+
+ sa.sa_handler = handler;
+ sa.sa_flags = flags;
+ sa.sa_mask = (unsigned long) devname;
+ sa.sa_restorer = NULL;
+ return irqaction(irq,&sa);
+}
+
+void free_irq(unsigned int irq)
+{
+ struct sigaction * sa = irq + irq_sigaction;
+ unsigned long flags;
+
+ if (irq > 15) {
+ printk("Trying to free IRQ%d\n",irq);
+ return;
+ }
+ if (!sa->sa_handler) {
+ printk("Trying to free free IRQ%d\n",irq);
+ return;
+ }
+ save_flags(flags);
+ cli();
+ if (irq < 8) {
+ cache_21 |= 1 << irq;
+ outb(cache_21,0x21);
+ } else {
+ cache_A1 |= 1 << (irq-8);
+ outb(cache_A1,0xA1);
+ }
+ set_intr_gate(0x20+irq,bad_interrupt[irq]);
+ sa->sa_handler = NULL;
+ sa->sa_flags = 0;
+ sa->sa_mask = 0;
+ sa->sa_restorer = NULL;
+ restore_flags(flags);
+}
+
+/*
+ * Note that on a 486, we don't want to do a SIGFPE on a irq13
+ * as the irq is unreliable, and exception 16 works correctly
+ * (ie as explained in the intel literature). On a 386, you
+ * can't use exception 16 due to bad IBM design, so we have to
+ * rely on the less exact irq13.
+ *
+ * Careful.. Not only is IRQ13 unreliable, but it is also
+ * leads to races. IBM designers who came up with it should
+ * be shot.
+ */
+static void math_error_irq(int cpl)
+{
+ outb(0,0xF0);
+ if (ignore_irq13 || !hard_math)
+ return;
+ math_error();
+}
+
+static void no_action(int cpl) { }
+
+void init_IRQ(void)
+{
+ int i;
+
+ for (i = 0; i < 16 ; i++)
+ set_intr_gate(0x20+i,bad_interrupt[i]);
+ if (request_irq(2, no_action, SA_INTERRUPT, "cascade"))
+ printk("Unable to get IRQ2 for cascade\n");
+ if (request_irq(13,math_error_irq, 0, "math error"))
+ printk("Unable to get IRQ13 for math-error handler\n");
+
+ /* initialize the bottom half routines. */
+ for (i = 0; i < 32; i++) {
+ bh_base[i].routine = NULL;
+ bh_base[i].data = NULL;
+ }
+ bh_active = 0;
+ intr_count = 0;
+}
diff --git a/kernel/itimer.c b/kernel/itimer.c
new file mode 100644
index 000000000..4d5fa0f67
--- /dev/null
+++ b/kernel/itimer.c
@@ -0,0 +1,135 @@
+/*
+ * linux/kernel/itimer.c
+ *
+ * Copyright (C) 1992 Darren Senn
+ */
+
+/* These are all the functions necessary to implement itimers */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/time.h>
+
+#include <asm/segment.h>
+
+static unsigned long tvtojiffies(struct timeval *value)
+{
+ return((unsigned long )value->tv_sec * HZ +
+ (unsigned long )(value->tv_usec + (1000000 / HZ - 1)) /
+ (1000000 / HZ));
+}
+
+static void jiffiestotv(unsigned long jiffies, struct timeval *value)
+{
+ value->tv_usec = (jiffies % HZ) * (1000000 / HZ);
+ value->tv_sec = jiffies / HZ;
+ return;
+}
+
+int _getitimer(int which, struct itimerval *value)
+{
+ register unsigned long val, interval;
+
+ switch (which) {
+ case ITIMER_REAL:
+ val = current->it_real_value;
+ interval = current->it_real_incr;
+ break;
+ case ITIMER_VIRTUAL:
+ val = current->it_virt_value;
+ interval = current->it_virt_incr;
+ break;
+ case ITIMER_PROF:
+ val = current->it_prof_value;
+ interval = current->it_prof_incr;
+ break;
+ default:
+ return(-EINVAL);
+ }
+ jiffiestotv(val, &value->it_value);
+ jiffiestotv(interval, &value->it_interval);
+ return(0);
+}
+
+asmlinkage int sys_getitimer(int which, struct itimerval *value)
+{
+ int error;
+ struct itimerval get_buffer;
+
+ if (!value)
+ return -EFAULT;
+ error = _getitimer(which, &get_buffer);
+ if (error)
+ return error;
+ error = verify_area(VERIFY_WRITE, value, sizeof(struct itimerval));
+ if (error)
+ return error;
+ memcpy_tofs(value, &get_buffer, sizeof(get_buffer));
+ return 0;
+}
+
+int _setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
+{
+ register unsigned long i, j;
+ int k;
+
+ i = tvtojiffies(&value->it_interval);
+ j = tvtojiffies(&value->it_value);
+ if (ovalue && (k = _getitimer(which, ovalue)) < 0)
+ return k;
+ switch (which) {
+ case ITIMER_REAL:
+ if (j) {
+ j += 1+itimer_ticks;
+ if (j < itimer_next)
+ itimer_next = j;
+ }
+ current->it_real_value = j;
+ current->it_real_incr = i;
+ break;
+ case ITIMER_VIRTUAL:
+ if (j)
+ j++;
+ current->it_virt_value = j;
+ current->it_virt_incr = i;
+ break;
+ case ITIMER_PROF:
+ if (j)
+ j++;
+ current->it_prof_value = j;
+ current->it_prof_incr = i;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
+{
+ int error;
+ struct itimerval set_buffer, get_buffer;
+
+ if (value) {
+ error = verify_area(VERIFY_READ, value, sizeof(*value));
+ if (error)
+ return error;
+ memcpy_fromfs(&set_buffer, value, sizeof(set_buffer));
+ } else
+ memset((char *) &set_buffer, 0, sizeof(set_buffer));
+
+ if (ovalue) {
+ error = verify_area(VERIFY_WRITE, ovalue, sizeof(struct itimerval));
+ if (error)
+ return error;
+ }
+
+ error = _setitimer(which, &set_buffer, ovalue ? &get_buffer : 0);
+ if (error || !ovalue)
+ return error;
+
+ memcpy_tofs(ovalue, &get_buffer, sizeof(get_buffer));
+ return error;
+}
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
new file mode 100644
index 000000000..62bca052c
--- /dev/null
+++ b/kernel/ksyms.c
@@ -0,0 +1,263 @@
+/*
+ * Herein lies all the functions/variables that are "exported" for linkage
+ * with dynamically loaded kernel modules.
+ * Jon.
+ *
+ * Stacked module support and unified symbol table added by
+ * Bjorn Ekwall <bj0rn@blox.se>
+ */
+
+#include <linux/autoconf.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/ptrace.h>
+#include <linux/sys.h>
+#include <linux/utsname.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/timer.h>
+#include <linux/binfmts.h>
+#include <linux/personality.h>
+#include <linux/module.h>
+#include <linux/termios.h>
+#include <linux/tqueue.h>
+#include <linux/tty.h>
+#include <linux/serial.h>
+#include <linux/locks.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#ifdef CONFIG_INET
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#endif
+
+#include <asm/irq.h>
+extern char floppy_track_buffer[];
+extern void set_device_ro(int dev,int flag);
+#include <linux/delay.h>
+#include <linux/locks.h>
+
+extern void *sys_call_table;
+
+/* must match struct internal_symbol !!! */
+#define X(name) { (void *) &name, "_" #name }
+
+#ifdef CONFIG_FTAPE
+extern char * ftape_big_buffer;
+extern void (*do_floppy)(void);
+#endif
+
+extern int sys_tz;
+extern int request_dma(unsigned int dmanr, char * deviceID);
+extern void free_dma(unsigned int dmanr);
+
+extern int do_execve(char * filename, char ** argv, char ** envp,
+ struct pt_regs * regs);
+extern int do_signal(unsigned long oldmask, struct pt_regs * regs);
+
+extern void (* iABI_hook)(struct pt_regs * regs);
+
+struct symbol_table symbol_table = { 0, 0, 0, /* for stacked module support */
+ {
+ /* stackable module support */
+ X(rename_module_symbol),
+
+ /* system info variables */
+ X(EISA_bus),
+ X(wp_works_ok),
+
+ /* process memory management */
+ X(verify_area),
+ X(do_mmap),
+ X(do_munmap),
+ X(zeromap_page_range),
+ X(unmap_page_range),
+ X(insert_vm_struct),
+ X(merge_segments),
+
+ /* internal kernel memory management */
+ X(__get_free_pages),
+ X(free_pages),
+ X(kmalloc),
+ X(kfree_s),
+ X(vmalloc),
+ X(vfree),
+
+ /* filesystem internal functions */
+ X(getname),
+ X(putname),
+ X(__iget),
+ X(iput),
+ X(namei),
+ X(lnamei),
+ X(open_namei),
+ X(check_disk_change),
+ X(invalidate_buffers),
+ X(fsync_dev),
+ X(permission),
+ X(inode_setattr),
+ X(inode_change_ok),
+ X(generic_mmap),
+ X(set_blocksize),
+ X(getblk),
+ X(bread),
+ X(brelse),
+ X(ll_rw_block),
+ X(__wait_on_buffer),
+
+ /* device registration */
+ X(register_chrdev),
+ X(unregister_chrdev),
+ X(register_blkdev),
+ X(unregister_blkdev),
+ X(tty_register_driver),
+ X(tty_unregister_driver),
+ X(tty_std_termios),
+
+ /* block device driver support */
+ X(block_read),
+ X(block_write),
+ X(block_fsync),
+ X(wait_for_request),
+ X(blksize_size),
+ X(blk_size),
+ X(blk_dev),
+ X(is_read_only),
+ X(set_device_ro),
+ X(bmap),
+ X(sync_dev),
+
+ /* Module creation of serial units */
+ X(register_serial),
+ X(unregister_serial),
+
+ /* filesystem registration */
+ X(register_filesystem),
+ X(unregister_filesystem),
+
+ /* executable format registration */
+ X(register_binfmt),
+ X(unregister_binfmt),
+
+ /* execution environment registration */
+ X(lookup_exec_domain),
+ X(register_exec_domain),
+ X(unregister_exec_domain),
+
+ /* interrupt handling */
+ X(request_irq),
+ X(free_irq),
+ X(enable_irq),
+ X(disable_irq),
+ X(bh_active),
+ X(bh_mask),
+ X(add_timer),
+ X(del_timer),
+ X(tq_timer),
+ X(tq_immediate),
+ X(tq_last),
+ X(timer_active),
+ X(timer_table),
+
+ /* dma handling */
+ X(request_dma),
+ X(free_dma),
+
+ /* process management */
+ X(wake_up),
+ X(wake_up_interruptible),
+ X(sleep_on),
+ X(interruptible_sleep_on),
+ X(schedule),
+ X(current),
+ X(jiffies),
+ X(xtime),
+ X(loops_per_sec),
+ X(need_resched),
+ X(kill_proc),
+ X(kill_pg),
+ X(kill_sl),
+
+ /* misc */
+ X(panic),
+ X(printk),
+ X(sprintf),
+ X(vsprintf),
+ X(simple_strtoul),
+ X(system_utsname),
+ X(sys_call_table),
+
+ /* Signal interfaces */
+ X(do_signal),
+ X(send_sig),
+
+ /* Program loader interfaces */
+ X(change_ldt),
+ X(copy_strings),
+ X(create_tables),
+ X(do_execve),
+ X(flush_old_exec),
+ X(open_inode),
+ X(read_exec),
+
+ /* Miscellaneous access points */
+ X(si_meminfo),
+
+ /* socket layer registration */
+ X(sock_register),
+ X(sock_unregister),
+
+#ifdef CONFIG_FTAPE
+ /* The next labels are needed for ftape driver. */
+ X(ftape_big_buffer),
+ X(do_floppy),
+#endif
+#ifdef CONFIG_INET
+ /* support for loadable net drivers */
+ X(register_netdev),
+ X(unregister_netdev),
+ X(ether_setup),
+ X(alloc_skb),
+ X(kfree_skb),
+ X(dev_kfree_skb),
+ X(snarf_region),
+ X(netif_rx),
+ X(dev_rint),
+ X(dev_tint),
+ X(irq2dev_map),
+ X(dev_add_pack),
+ X(dev_remove_pack),
+ X(dev_get),
+ X(dev_ioctl),
+ X(dev_queue_xmit),
+ X(dev_base),
+#endif
+ /* Added to make file system as module */
+ X(set_writetime),
+ X(sys_tz),
+ X(__wait_on_super),
+ X(file_fsync),
+ X(clear_inode),
+ X(refile_buffer),
+ X(___strtok),
+ X(init_fifo),
+ X(super_blocks),
+ X(chrdev_inode_operations),
+ X(blkdev_inode_operations),
+ X(read_ahead),
+ /********************************************************
+ * Do not add anything below this line,
+ * as the stacked modules depend on this!
+ */
+ { NULL, NULL } /* mark end of table */
+ },
+ { { NULL, NULL } /* no module refs */ }
+};
+
+/*
+int symbol_table_size = sizeof (symbol_table) / sizeof (symbol_table[0]);
+*/
diff --git a/kernel/ldt.c b/kernel/ldt.c
new file mode 100644
index 000000000..dd0e477d4
--- /dev/null
+++ b/kernel/ldt.c
@@ -0,0 +1,103 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <linux/ldt.h>
+
+static int read_ldt(void * ptr, unsigned long bytecount)
+{
+ int error;
+ void * address = current->ldt;
+ unsigned long size;
+
+ if (!ptr)
+ return -EINVAL;
+ size = LDT_ENTRIES*LDT_ENTRY_SIZE;
+ if (!address) {
+ address = &default_ldt;
+ size = sizeof(default_ldt);
+ }
+ if (size > bytecount)
+ size = bytecount;
+ error = verify_area(VERIFY_WRITE, ptr, size);
+ if (error)
+ return error;
+ memcpy_tofs(ptr, address, size);
+ return size;
+}
+
+static int write_ldt(void * ptr, unsigned long bytecount)
+{
+ struct modify_ldt_ldt_s ldt_info;
+ unsigned long *lp;
+ unsigned long base, limit;
+ int error, i;
+
+ if (bytecount != sizeof(ldt_info))
+ return -EINVAL;
+ error = verify_area(VERIFY_READ, ptr, sizeof(ldt_info));
+ if (error)
+ return error;
+
+ memcpy_fromfs(&ldt_info, ptr, sizeof(ldt_info));
+
+ if (ldt_info.contents == 3 || ldt_info.entry_number >= LDT_ENTRIES)
+ return -EINVAL;
+
+ limit = ldt_info.limit;
+ base = ldt_info.base_addr;
+ if (ldt_info.limit_in_pages)
+ limit *= PAGE_SIZE;
+
+ limit += base;
+ if (limit < base || limit >= 0xC0000000)
+ return -EINVAL;
+
+ if (!current->ldt) {
+ for (i=1 ; i<NR_TASKS ; i++) {
+ if (task[i] == current) {
+ if (!(current->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE)))
+ return -ENOMEM;
+ set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, current->ldt, LDT_ENTRIES);
+ load_ldt(i);
+ }
+ }
+ }
+
+ lp = (unsigned long *) &current->ldt[ldt_info.entry_number];
+ /* Allow LDTs to be cleared by the user. */
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+ *lp = 0;
+ *(lp+1) = 0;
+ return 0;
+ }
+ *lp = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+ (ldt_info.limit & 0x0ffff);
+ *(lp+1) = (ldt_info.base_addr & 0xff000000) |
+ ((ldt_info.base_addr & 0x00ff0000)>>16) |
+ (ldt_info.limit & 0xf0000) |
+ (ldt_info.contents << 10) |
+ ((ldt_info.read_exec_only ^ 1) << 9) |
+ (ldt_info.seg_32bit << 22) |
+ (ldt_info.limit_in_pages << 23) |
+ ((ldt_info.seg_not_present ^1) << 15) |
+ 0x7000;
+ return 0;
+}
+
+asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
+{
+ if (func == 0)
+ return read_ldt(ptr, bytecount);
+ if (func == 1)
+ return write_ldt(ptr, bytecount);
+ return -ENOSYS;
+}
diff --git a/kernel/module.c b/kernel/module.c
new file mode 100644
index 000000000..eb3ca2417
--- /dev/null
+++ b/kernel/module.c
@@ -0,0 +1,584 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <asm/segment.h>
+#include <linux/mm.h> /* defines GFP_KERNEL */
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/malloc.h>
+/*
+ * Originally by Anonymous (as far as I know...)
+ * Linux version by Bas Laarhoven <bas@vimec.nl>
+ * 0.99.14 version by Jon Tombs <jon@gtex02.us.es>,
+ *
+ * Heavily modified by Bjorn Ekwall <bj0rn@blox.se> May 1994 (C)
+ * This source is covered by the GNU GPL, the same as all kernel sources.
+ *
+ * Features:
+ * - Supports stacked modules (removable only of there are no dependents).
+ * - Supports table of symbols defined by the modules.
+ * - Supports /proc/ksyms, showing value, name and owner of all
+ * the symbols defined by all modules (in stack order).
+ * - Added module dependencies information into /proc/modules
+ * - Supports redefines of all symbols, for streams-like behaviour.
+ * - Compatible with older versions of insmod.
+ *
+ */
+
+#ifdef DEBUG_MODULE
+#define PRINTK(a) printk a
+#else
+#define PRINTK(a) /* */
+#endif
+
+static struct module kernel_module;
+static struct module *module_list = &kernel_module;
+
+static int freeing_modules; /* true if some modules are marked for deletion */
+
+static struct module *find_module( const char *name);
+static int get_mod_name( char *user_name, char *buf);
+static int free_modules( void);
+
+
+/*
+ * Called at boot time
+ */
+void init_modules(void) {
+ extern struct symbol_table symbol_table; /* in kernel/ksyms.c */
+ struct internal_symbol *sym;
+ int i;
+
+ for (i = 0, sym = symbol_table.symbol; sym->name; ++sym, ++i)
+ ;
+ symbol_table.n_symbols = i;
+
+ kernel_module.symtab = &symbol_table;
+ kernel_module.state = MOD_RUNNING; /* Hah! */
+ kernel_module.name = "";
+}
+
+int
+rename_module_symbol(char *old_name, char *new_name)
+{
+ struct internal_symbol *sym;
+ int i = 0; /* keep gcc silent */
+
+ if (module_list->symtab) {
+ sym = module_list->symtab->symbol;
+ for (i = module_list->symtab->n_symbols; i > 0; ++sym, --i) {
+ if (strcmp(sym->name, old_name) == 0) { /* found it! */
+ sym->name = new_name; /* done! */
+ PRINTK(("renamed %s to %s\n", old_name, new_name));
+ return 1; /* it worked! */
+ }
+ }
+ }
+ printk("rename %s to %s failed!\n", old_name, new_name);
+ return 0; /* not there... */
+
+ /*
+ * This one will change the name of the first matching symbol!
+ *
+ * With this function, you can replace the name of a symbol defined
+ * in the current module with a new name, e.g. when you want to insert
+ * your own function instead of a previously defined function
+ * with the same name.
+ *
+ * "Normal" usage:
+ *
+ * bogus_function(int params)
+ * {
+ * do something "smart";
+ * return real_function(params);
+ * }
+ *
+ * ...
+ *
+ * init_module()
+ * {
+ * if (rename_module_symbol("_bogus_function", "_real_function"))
+ * printk("yep!\n");
+ * else
+ * printk("no way!\n");
+ * ...
+ * }
+ *
+ * When loading this module, real_function will be resolved
+ * to the real function address.
+ * All later loaded modules that refer to "real_function()" will
+ * then really call "bogus_function()" instead!!!
+ *
+ * This feature will give you ample opportunities to get to know
+ * the taste of your foot when you stuff it into your mouth!!!
+ */
+}
+
+/*
+ * Allocate space for a module.
+ */
+asmlinkage int
+sys_create_module(char *module_name, unsigned long size)
+{
+ struct module *mp;
+ void* addr;
+ int error;
+ int npages;
+ int sspace = sizeof(struct module) + MOD_MAX_NAME;
+ char name[MOD_MAX_NAME];
+
+ if (!suser())
+ return -EPERM;
+ if (module_name == NULL || size == 0)
+ return -EINVAL;
+ if ((error = get_mod_name(module_name, name)) != 0)
+ return error;
+ if (find_module(name) != NULL) {
+ return -EEXIST;
+ }
+
+ if ((mp = (struct module*) kmalloc(sspace, GFP_KERNEL)) == NULL) {
+ return -ENOMEM;
+ }
+ strcpy((char *)(mp + 1), name); /* why not? */
+
+ npages = (size + sizeof (int) + 4095) / 4096;
+ if ((addr = vmalloc(npages * 4096)) == 0) {
+ kfree_s(mp, sspace);
+ return -ENOMEM;
+ }
+
+ mp->next = module_list;
+ mp->ref = NULL;
+ mp->symtab = NULL;
+ mp->name = (char *)(mp + 1);
+ mp->size = npages;
+ mp->addr = addr;
+ mp->state = MOD_UNINITIALIZED;
+ mp->cleanup = NULL;
+
+ * (int *) addr = 0; /* set use count to zero */
+ module_list = mp; /* link it in */
+
+ PRINTK(("module `%s' (%lu pages @ 0x%08lx) created\n",
+ mp->name, (unsigned long) mp->size, (unsigned long) mp->addr));
+ return (int) addr;
+}
+
+/*
+ * Initialize a module.
+ */
+asmlinkage int
+sys_init_module(char *module_name, char *code, unsigned codesize,
+ struct mod_routines *routines,
+ struct symbol_table *symtab)
+{
+ struct module *mp;
+ struct symbol_table *newtab;
+ char name[MOD_MAX_NAME];
+ int error;
+ struct mod_routines rt;
+
+ if (!suser())
+ return -EPERM;
+
+ /* A little bit of protection... we "know" where the user stack is... */
+ if (symtab && ((unsigned long)symtab > 0xb0000000)) {
+ printk("warning: you are using an old insmod, no symbols will be inserted!\n");
+ symtab = NULL;
+ }
+
+ /*
+ * First reclaim any memory from dead modules that where not
+ * freed when deleted. Should I think be done by timers when
+ * the module was deleted - Jon.
+ */
+ free_modules();
+
+ if ((error = get_mod_name(module_name, name)) != 0)
+ return error;
+ PRINTK(("initializing module `%s', %d (0x%x) bytes\n",
+ name, codesize, codesize));
+ memcpy_fromfs(&rt, routines, sizeof rt);
+ if ((mp = find_module(name)) == NULL)
+ return -ENOENT;
+ if ((codesize + sizeof (int) + 4095) / 4096 > mp->size)
+ return -EINVAL;
+ memcpy_fromfs((char *)mp->addr + sizeof (int), code, codesize);
+ memset((char *)mp->addr + sizeof (int) + codesize, 0,
+ mp->size * 4096 - (codesize + sizeof (int)));
+ PRINTK(( "module init entry = 0x%08lx, cleanup entry = 0x%08lx\n",
+ (unsigned long) rt.init, (unsigned long) rt.cleanup));
+ mp->cleanup = rt.cleanup;
+
+ /* update kernel symbol table */
+ if (symtab) { /* symtab == NULL means no new entries to handle */
+ struct internal_symbol *sym;
+ struct module_ref *ref;
+ int size;
+ int i;
+ int legal_start;
+
+ if ((error = verify_area(VERIFY_READ, symtab, sizeof(int))))
+ return error;
+ memcpy_fromfs((char *)(&(size)), symtab, sizeof(int));
+
+ if ((newtab = (struct symbol_table*) kmalloc(size, GFP_KERNEL)) == NULL) {
+ return -ENOMEM;
+ }
+
+ if ((error = verify_area(VERIFY_READ, symtab, size))) {
+ kfree_s(newtab, size);
+ return error;
+ }
+ memcpy_fromfs((char *)(newtab), symtab, size);
+
+ /* sanity check */
+ legal_start = sizeof(struct symbol_table) +
+ newtab->n_symbols * sizeof(struct internal_symbol) +
+ newtab->n_refs * sizeof(struct module_ref);
+
+ if ((newtab->n_symbols < 0) || (newtab->n_refs < 0) ||
+ (legal_start > size)) {
+ printk("Illegal symbol table! Rejected!\n");
+ kfree_s(newtab, size);
+ return -EINVAL;
+ }
+
+ /* relocate name pointers, index referred from start of table */
+ for (sym = &(newtab->symbol[0]), i = 0;
+ i < newtab->n_symbols; ++sym, ++i) {
+ if ((int)sym->name < legal_start || size <= (int)sym->name) {
+ printk("Illegal symbol table! Rejected!\n");
+ kfree_s(newtab, size);
+ return -EINVAL;
+ }
+ /* else */
+ sym->name += (long)newtab;
+ }
+ mp->symtab = newtab;
+
+ /* Update module references.
+ * On entry, from "insmod", ref->module points to
+ * the referenced module!
+ * Now it will point to the current module instead!
+ * The ref structure becomes the first link in the linked
+ * list of references to the referenced module.
+ * Also, "sym" from above, points to the first ref entry!!!
+ */
+ for (ref = (struct module_ref *)sym, i = 0;
+ i < newtab->n_refs; ++ref, ++i) {
+
+ /* Check for valid reference */
+ struct module *link = module_list;
+ while (link && (ref->module != link))
+ link = link->next;
+
+ if (link == (struct module *)0) {
+ printk("Non-module reference! Rejected!\n");
+ return -EINVAL;
+ }
+
+ ref->next = ref->module->ref;
+ ref->module->ref = ref;
+ ref->module = mp;
+ }
+ }
+
+ if ((*rt.init)() != 0)
+ return -EBUSY;
+ mp->state = MOD_RUNNING;
+
+ return 0;
+}
+
+asmlinkage int
+sys_delete_module(char *module_name)
+{
+ struct module *mp;
+ char name[MOD_MAX_NAME];
+ int error;
+
+ if (!suser())
+ return -EPERM;
+ /* else */
+ if (module_name != NULL) {
+ if ((error = get_mod_name(module_name, name)) != 0)
+ return error;
+ if ((mp = find_module(name)) == NULL)
+ return -ENOENT;
+ if ((mp->ref != NULL) || (GET_USE_COUNT(mp) != 0))
+ return -EBUSY;
+ if (mp->state == MOD_RUNNING)
+ (*mp->cleanup)();
+ mp->state = MOD_DELETED;
+ }
+ free_modules();
+ return 0;
+}
+
+
+/*
+ * Copy the kernel symbol table to user space. If the argument is null,
+ * just return the size of the table.
+ *
+ * Note that the transient module symbols are copied _first_,
+ * in lifo order!!!
+ *
+ * The symbols to "insmod" are according to the "old" format: struct kernel_sym,
+ * which is actually quite handy for this purpose.
+ * Note that insmod inserts a struct symbol_table later on...
+ * (as that format is quite handy for the kernel...)
+ *
+ * For every module, the first (pseudo)symbol copied is the module name
+ * and the address of the module struct.
+ * This lets "insmod" keep track of references, and build the array of
+ * struct module_refs in the symbol table.
+ * The format of the module name is "#module", so that "insmod" can easily
+ * notice when a module name comes along. Also, this will make it possible
+ * to use old versions of "insmod", albeit with reduced functionality...
+ * The "kernel" module has an empty name.
+ */
+asmlinkage int
+sys_get_kernel_syms(struct kernel_sym *table)
+{
+ struct internal_symbol *from;
+ struct kernel_sym isym;
+ struct kernel_sym *to;
+ struct module *mp = module_list;
+ int i;
+ int nmodsyms = 0;
+
+ for (mp = module_list; mp; mp = mp->next) {
+ if (mp->symtab && mp->symtab->n_symbols) {
+ /* include the count for the module name! */
+ nmodsyms += mp->symtab->n_symbols + 1;
+ }
+ }
+
+ if (table != NULL) {
+ to = table;
+
+ if ((i = verify_area(VERIFY_WRITE, to, nmodsyms * sizeof(*table))))
+ return i;
+
+ /* copy all module symbols first (always LIFO order) */
+ for (mp = module_list; mp; mp = mp->next) {
+ if ((mp->state == MOD_RUNNING) &&
+ (mp->symtab != NULL) && (mp->symtab->n_symbols > 0)) {
+ /* magic: write module info as a pseudo symbol */
+ isym.value = (unsigned long)mp;
+ sprintf(isym.name, "#%s", mp->name);
+ memcpy_tofs(to, &isym, sizeof isym);
+ ++to;
+
+ for (i = mp->symtab->n_symbols,
+ from = mp->symtab->symbol;
+ i > 0; --i, ++from, ++to) {
+
+ isym.value = (unsigned long)from->addr;
+ strncpy(isym.name, from->name, sizeof isym.name);
+ memcpy_tofs(to, &isym, sizeof isym);
+ }
+ }
+ }
+ }
+
+ return nmodsyms;
+}
+
+
+/*
+ * Copy the name of a module from user space.
+ */
+int
+get_mod_name(char *user_name, char *buf)
+{
+ int i;
+
+ i = 0;
+ for (i = 0 ; (buf[i] = get_fs_byte(user_name + i)) != '\0' ; ) {
+ if (++i >= MOD_MAX_NAME)
+ return -E2BIG;
+ }
+ return 0;
+}
+
+
+/*
+ * Look for a module by name, ignoring modules marked for deletion.
+ */
+struct module *
+find_module( const char *name)
+{
+ struct module *mp;
+
+ for (mp = module_list ; mp ; mp = mp->next) {
+ if (mp->state == MOD_DELETED)
+ continue;
+ if (!strcmp(mp->name, name))
+ break;
+ }
+ return mp;
+}
+
+static void
+drop_refs(struct module *mp)
+{
+ struct module *step;
+ struct module_ref *prev;
+ struct module_ref *ref;
+
+ for (step = module_list; step; step = step->next) {
+ for (prev = ref = step->ref; ref; ref = prev->next) {
+ if (ref->module == mp) {
+ if (ref == step->ref)
+ step->ref = ref->next;
+ else
+ prev->next = ref->next;
+ break; /* every module only references once! */
+ }
+ else
+ prev = ref;
+ }
+ }
+}
+
+/*
+ * Try to free modules which have been marked for deletion. Returns nonzero
+ * if a module was actually freed.
+ */
+int
+free_modules( void)
+{
+ struct module *mp;
+ struct module **mpp;
+ int did_deletion;
+
+ did_deletion = 0;
+ freeing_modules = 0;
+ mpp = &module_list;
+ while ((mp = *mpp) != NULL) {
+ if (mp->state != MOD_DELETED) {
+ mpp = &mp->next;
+ } else {
+ if (GET_USE_COUNT(mp) != 0) {
+ freeing_modules = 1;
+ mpp = &mp->next;
+ } else { /* delete it */
+ *mpp = mp->next;
+ if (mp->symtab) {
+ if (mp->symtab->n_refs)
+ drop_refs(mp);
+ if (mp->symtab->size)
+ kfree_s(mp->symtab, mp->symtab->size);
+ }
+ vfree(mp->addr);
+ kfree_s(mp, sizeof(struct module) + MOD_MAX_NAME);
+ did_deletion = 1;
+ }
+ }
+ }
+ return did_deletion;
+}
+
+
+/*
+ * Called by the /proc file system to return a current list of modules.
+ */
+int get_module_list(char *buf)
+{
+ char *p;
+ char *q;
+ int i;
+ struct module *mp;
+ struct module_ref *ref;
+ char size[32];
+
+ p = buf;
+ /* Do not show the kernel pseudo module */
+ for (mp = module_list ; mp && mp->next; mp = mp->next) {
+ if (p - buf > 4096 - 100)
+ break; /* avoid overflowing buffer */
+ q = mp->name;
+ i = 20;
+ while (*q) {
+ *p++ = *q++;
+ i--;
+ }
+ sprintf(size, "%d", mp->size);
+ i -= strlen(size);
+ if (i <= 0)
+ i = 1;
+ while (--i >= 0)
+ *p++ = ' ';
+ q = size;
+ while (*q)
+ *p++ = *q++;
+ if (mp->state == MOD_UNINITIALIZED)
+ q = " (uninitialized)";
+ else if (mp->state == MOD_RUNNING)
+ q = "";
+ else if (mp->state == MOD_DELETED)
+ q = " (deleted)";
+ else
+ q = " (bad state)";
+ while (*q)
+ *p++ = *q++;
+
+ if ((ref = mp->ref) != NULL) {
+ *p++ = '\t';
+ *p++ = '[';
+ for (; ref; ref = ref->next) {
+ q = ref->module->name;
+ while (*q)
+ *p++ = *q++;
+ if (ref->next)
+ *p++ = ' ';
+ }
+ *p++ = ']';
+ }
+ *p++ = '\n';
+ }
+ return p - buf;
+}
+
+
+/*
+ * Called by the /proc file system to return a current list of ksyms.
+ */
+int get_ksyms_list(char *buf)
+{
+ struct module *mp;
+ struct internal_symbol *sym;
+ int i;
+ char *p = buf;
+
+ for (mp = module_list; mp; mp = mp->next) {
+ if ((mp->state == MOD_RUNNING) &&
+ (mp->symtab != NULL) && (mp->symtab->n_symbols > 0)) {
+ for (i = mp->symtab->n_symbols,
+ sym = mp->symtab->symbol;
+ i > 0; --i, ++sym) {
+
+ if (p - buf > 4096 - 100) {
+ strcat(p, "...\n");
+ p += strlen(p);
+ return p - buf; /* avoid overflowing buffer */
+ }
+
+ if (mp->name[0]) {
+ sprintf(p, "%08lx %s\t[%s]\n",
+ (long)sym->addr, sym->name, mp->name);
+ }
+ else {
+ sprintf(p, "%08lx %s\n",
+ (long)sym->addr, sym->name);
+ }
+ p += strlen(p);
+ }
+ }
+ }
+
+ return p - buf;
+}
diff --git a/kernel/panic.c b/kernel/panic.c
new file mode 100644
index 000000000..300fcbbf3
--- /dev/null
+++ b/kernel/panic.c
@@ -0,0 +1,32 @@
+/*
+ * linux/kernel/panic.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * This function is used through-out the kernel (including mm and fs)
+ * to indicate a major problem.
+ */
+#include <stdarg.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+asmlinkage void sys_sync(void); /* it's really int */
+
+NORET_TYPE void panic(const char * fmt, ...)
+{
+ static char buf[1024];
+ va_list args;
+
+ va_start(args, fmt);
+ vsprintf(buf, fmt, args);
+ va_end(args);
+ printk(KERN_EMERG "Kernel panic: %s\n",buf);
+ if (current == task[0])
+ printk(KERN_EMERG "In swapper task - not syncing\n");
+ else
+ sys_sync();
+ for(;;);
+}
diff --git a/kernel/printk.c b/kernel/printk.c
new file mode 100644
index 000000000..d92269b30
--- /dev/null
+++ b/kernel/printk.c
@@ -0,0 +1,229 @@
+/*
+ * linux/kernel/printk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * Modified to make sys_syslog() more flexible: added commands to
+ * return the last 4k of kernel messages, regardless of whether
+ * they've been read or not. Added option to suppress kernel printk's
+ * to the console. Added hook for sending the console messages
+ * elsewhere, in preparation for a serial line console (someday).
+ * Ted Ts'o, 2/11/93.
+ */
+
+#include <stdarg.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+
+#define LOG_BUF_LEN 4096
+
+static char buf[1024];
+
+extern void console_print(const char *);
+
+#define DEFAULT_MESSAGE_LOGLEVEL 7 /* KERN_DEBUG */
+#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything more serious than KERN_DEBUG */
+
+unsigned long log_size = 0;
+struct wait_queue * log_wait = NULL;
+int console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
+
+static void (*console_print_proc)(const char *) = 0;
+static char log_buf[LOG_BUF_LEN];
+static unsigned long log_start = 0;
+static unsigned long logged_chars = 0;
+
+/*
+ * Commands to sys_syslog:
+ *
+ * 0 -- Close the log. Currently a NOP.
+ * 1 -- Open the log. Currently a NOP.
+ * 2 -- Read from the log.
+ * 3 -- Read up to the last 4k of messages in the ring buffer.
+ * 4 -- Read and clear last 4k of messages in the ring buffer
+ * 5 -- Clear ring buffer.
+ * 6 -- Disable printk's to console
+ * 7 -- Enable printk's to console
+ * 8 -- Set level of messages printed to console
+ */
+asmlinkage int sys_syslog(int type, char * buf, int len)
+{
+ unsigned long i, j, count;
+ int do_clear = 0;
+ char c;
+ int error;
+
+ if ((type != 3) && !suser())
+ return -EPERM;
+ switch (type) {
+ case 0: /* Close log */
+ return 0;
+ case 1: /* Open log */
+ return 0;
+ case 2: /* Read from log */
+ if (!buf || len < 0)
+ return -EINVAL;
+ if (!len)
+ return 0;
+ error = verify_area(VERIFY_WRITE,buf,len);
+ if (error)
+ return error;
+ cli();
+ while (!log_size) {
+ if (current->signal & ~current->blocked) {
+ sti();
+ return -ERESTARTSYS;
+ }
+ interruptible_sleep_on(&log_wait);
+ }
+ i = 0;
+ while (log_size && i < len) {
+ c = *((char *) log_buf+log_start);
+ log_start++;
+ log_size--;
+ log_start &= LOG_BUF_LEN-1;
+ sti();
+ put_fs_byte(c,buf);
+ buf++;
+ i++;
+ cli();
+ }
+ sti();
+ return i;
+ case 4: /* Read/clear last kernel messages */
+ do_clear = 1;
+ /* FALL THRU */
+ case 3: /* Read last kernel messages */
+ if (!buf || len < 0)
+ return -EINVAL;
+ if (!len)
+ return 0;
+ error = verify_area(VERIFY_WRITE,buf,len);
+ if (error)
+ return error;
+ count = len;
+ if (count > LOG_BUF_LEN)
+ count = LOG_BUF_LEN;
+ if (count > logged_chars)
+ count = logged_chars;
+ j = log_start + log_size - count;
+ for (i = 0; i < count; i++) {
+ c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1)));
+ put_fs_byte(c, buf++);
+ }
+ if (do_clear)
+ logged_chars = 0;
+ return i;
+ case 5: /* Clear ring buffer */
+ logged_chars = 0;
+ return 0;
+ case 6: /* Disable logging to console */
+ console_loglevel = 1; /* only panic messages shown */
+ return 0;
+ case 7: /* Enable logging to console */
+ console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
+ return 0;
+ case 8:
+ if (len < 0 || len > 8)
+ return -EINVAL;
+ console_loglevel = len;
+ return 0;
+ }
+ return -EINVAL;
+}
+
+
+asmlinkage int printk(const char *fmt, ...)
+{
+ va_list args;
+ int i;
+ char *msg, *p, *buf_end;
+ static char msg_level = -1;
+ long flags;
+
+ save_flags(flags);
+ cli();
+ va_start(args, fmt);
+ i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */
+ buf_end = buf + 3 + i;
+ va_end(args);
+ for (p = buf + 3; p < buf_end; p++) {
+ msg = p;
+ if (msg_level < 0) {
+ if (
+ p[0] != '<' ||
+ p[1] < '0' ||
+ p[1] > '7' ||
+ p[2] != '>'
+ ) {
+ p -= 3;
+ p[0] = '<';
+ p[1] = DEFAULT_MESSAGE_LOGLEVEL - 1 + '0';
+ p[2] = '>';
+ } else
+ msg += 3;
+ msg_level = p[1] - '0';
+ }
+ for (; p < buf_end; p++) {
+ log_buf[(log_start+log_size) & (LOG_BUF_LEN-1)] = *p;
+ if (log_size < LOG_BUF_LEN)
+ log_size++;
+ else
+ log_start++;
+ logged_chars++;
+ if (*p == '\n')
+ break;
+ }
+ if (msg_level < console_loglevel && console_print_proc) {
+ char tmp = p[1];
+ p[1] = '\0';
+ (*console_print_proc)(msg);
+ p[1] = tmp;
+ }
+ if (*p == '\n')
+ msg_level = -1;
+ }
+ restore_flags(flags);
+ wake_up_interruptible(&log_wait);
+ return i;
+}
+
+/*
+ * The console driver calls this routine during kernel initialization
+ * to register the console printing procedure with printk() and to
+ * print any messages that were printed by the kernel before the
+ * console driver was initialized.
+ */
+void register_console(void (*proc)(const char *))
+{
+ int i,j;
+ int p = log_start;
+ char buf[16];
+ char msg_level = -1;
+ char *q;
+
+ console_print_proc = proc;
+
+ for (i=0,j=0; i < log_size; i++) {
+ buf[j++] = log_buf[p];
+ p++; p &= LOG_BUF_LEN-1;
+ if (buf[j-1] != '\n' && i < log_size - 1 && j < sizeof(buf)-1)
+ continue;
+ buf[j] = 0;
+ q = buf;
+ if (msg_level < 0) {
+ msg_level = buf[1] - '0';
+ q = buf + 3;
+ }
+ if (msg_level < console_loglevel)
+ (*proc)(q);
+ if (buf[j-1] == '\n')
+ msg_level = -1;
+ j = 0;
+ }
+}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
new file mode 100644
index 000000000..cade04750
--- /dev/null
+++ b/kernel/ptrace.c
@@ -0,0 +1,517 @@
+/* ptrace.c */
+/* By Ross Biro 1/23/92 */
+/* edited by Linus Torvalds */
+
+#include <linux/head.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <linux/debugreg.h>
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/* determines which flags the user has access to. */
+/* 1 = access 0 = no access */
+#define FLAG_MASK 0x00044dd5
+
+/* set's the trap flag. */
+#define TRAP_FLAG 0x100
+
+/*
+ * this is the number to subtract from the top of the stack. To find
+ * the local frame.
+ */
+#define MAGICNUMBER 68
+
+/* change a pid into a task struct. */
+static inline struct task_struct * get_task(int pid)
+{
+ int i;
+
+ for (i = 1; i < NR_TASKS; i++) {
+ if (task[i] != NULL && (task[i]->pid == pid))
+ return task[i];
+ }
+ return NULL;
+}
+
+/*
+ * this routine will get a word off of the processes privileged stack.
+ * the offset is how far from the base addr as stored in the TSS.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int get_stack_long(struct task_struct *task, int offset)
+{
+ unsigned char *stack;
+
+ stack = (unsigned char *)task->tss.esp0;
+ stack += offset;
+ return (*((int *)stack));
+}
+
+/*
+ * this routine will put a word on the processes privileged stack.
+ * the offset is how far from the base addr as stored in the TSS.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int put_stack_long(struct task_struct *task, int offset,
+ unsigned long data)
+{
+ unsigned char * stack;
+
+ stack = (unsigned char *) task->tss.esp0;
+ stack += offset;
+ *(unsigned long *) stack = data;
+ return 0;
+}
+
+/*
+ * This routine gets a long from any process space by following the page
+ * tables. NOTE! You should check that the long isn't on a page boundary,
+ * and that it is in the task area before calling this: this routine does
+ * no checking.
+ */
+static unsigned long get_long(struct vm_area_struct * vma, unsigned long addr)
+{
+ unsigned long page;
+
+repeat:
+ page = *PAGE_DIR_OFFSET(vma->vm_task->tss.cr3, addr);
+ if (page & PAGE_PRESENT) {
+ page &= PAGE_MASK;
+ page += PAGE_PTR(addr);
+ page = *((unsigned long *) page);
+ }
+ if (!(page & PAGE_PRESENT)) {
+ do_no_page(vma, addr, 0);
+ goto repeat;
+ }
+/* this is a hack for non-kernel-mapped video buffers and similar */
+ if (page >= high_memory)
+ return 0;
+ page &= PAGE_MASK;
+ page += addr & ~PAGE_MASK;
+ return *(unsigned long *) page;
+}
+
+/*
+ * This routine puts a long into any process space by following the page
+ * tables. NOTE! You should check that the long isn't on a page boundary,
+ * and that it is in the task area before calling this: this routine does
+ * no checking.
+ *
+ * Now keeps R/W state of page so that a text page stays readonly
+ * even if a debugger scribbles breakpoints into it. -M.U-
+ */
+static void put_long(struct vm_area_struct * vma, unsigned long addr,
+ unsigned long data)
+{
+ unsigned long page, pte = 0;
+ int readonly = 0;
+
+repeat:
+ page = *PAGE_DIR_OFFSET(vma->vm_task->tss.cr3, addr);
+ if (page & PAGE_PRESENT) {
+ page &= PAGE_MASK;
+ page += PAGE_PTR(addr);
+ pte = page;
+ page = *((unsigned long *) page);
+ }
+ if (!(page & PAGE_PRESENT)) {
+ do_no_page(vma, addr, 0 /* PAGE_RW */);
+ goto repeat;
+ }
+ if (!(page & PAGE_RW)) {
+ if (!(page & PAGE_COW))
+ readonly = 1;
+ do_wp_page(vma, addr, PAGE_RW | PAGE_PRESENT);
+ goto repeat;
+ }
+/* this is a hack for non-kernel-mapped video buffers and similar */
+ if (page >= high_memory)
+ return;
+/* we're bypassing pagetables, so we have to set the dirty bit ourselves */
+ *(unsigned long *) pte |= (PAGE_DIRTY|PAGE_COW);
+ page &= PAGE_MASK;
+ page += addr & ~PAGE_MASK;
+ *(unsigned long *) page = data;
+ if (readonly) {
+ *(unsigned long *) pte &=~ (PAGE_RW|PAGE_COW);
+ invalidate();
+ }
+}
+
+static struct vm_area_struct * find_vma(struct task_struct * tsk, unsigned long addr)
+{
+ struct vm_area_struct * vma;
+
+ addr &= PAGE_MASK;
+ for (vma = tsk->mm->mmap ; ; vma = vma->vm_next) {
+ if (!vma)
+ return NULL;
+ if (vma->vm_end > addr)
+ break;
+ }
+ if (vma->vm_start <= addr)
+ return vma;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return NULL;
+ if (vma->vm_end - addr > tsk->rlim[RLIMIT_STACK].rlim_cur)
+ return NULL;
+ vma->vm_offset -= vma->vm_start - addr;
+ vma->vm_start = addr;
+ return vma;
+}
+
+/*
+ * This routine checks the page boundaries, and that the offset is
+ * within the task area. It then calls get_long() to read a long.
+ */
+static int read_long(struct task_struct * tsk, unsigned long addr,
+ unsigned long * result)
+{
+ struct vm_area_struct * vma = find_vma(tsk, addr);
+
+ if (!vma)
+ return -EIO;
+ if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) {
+ unsigned long low,high;
+ struct vm_area_struct * vma_high = vma;
+
+ if (addr + sizeof(long) >= vma->vm_end) {
+ vma_high = vma->vm_next;
+ if (!vma_high || vma_high->vm_start != vma->vm_end)
+ return -EIO;
+ }
+ low = get_long(vma, addr & ~(sizeof(long)-1));
+ high = get_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+ switch (addr & (sizeof(long)-1)) {
+ case 1:
+ low >>= 8;
+ low |= high << 24;
+ break;
+ case 2:
+ low >>= 16;
+ low |= high << 16;
+ break;
+ case 3:
+ low >>= 24;
+ low |= high << 8;
+ break;
+ }
+ *result = low;
+ } else
+ *result = get_long(vma, addr);
+ return 0;
+}
+
+/*
+ * This routine checks the page boundaries, and that the offset is
+ * within the task area. It then calls put_long() to write a long.
+ */
+static int write_long(struct task_struct * tsk, unsigned long addr,
+ unsigned long data)
+{
+ struct vm_area_struct * vma = find_vma(tsk, addr);
+
+ if (!vma)
+ return -EIO;
+ if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) {
+ unsigned long low,high;
+ struct vm_area_struct * vma_high = vma;
+
+ if (addr + sizeof(long) >= vma->vm_end) {
+ vma_high = vma->vm_next;
+ if (!vma_high || vma_high->vm_start != vma->vm_end)
+ return -EIO;
+ }
+ low = get_long(vma, addr & ~(sizeof(long)-1));
+ high = get_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+ switch (addr & (sizeof(long)-1)) {
+ case 0: /* shouldn't happen, but safety first */
+ low = data;
+ break;
+ case 1:
+ low &= 0x000000ff;
+ low |= data << 8;
+ high &= ~0xff;
+ high |= data >> 24;
+ break;
+ case 2:
+ low &= 0x0000ffff;
+ low |= data << 16;
+ high &= ~0xffff;
+ high |= data >> 16;
+ break;
+ case 3:
+ low &= 0x00ffffff;
+ low |= data << 24;
+ high &= ~0xffffff;
+ high |= data >> 8;
+ break;
+ }
+ put_long(vma, addr & ~(sizeof(long)-1),low);
+ put_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high);
+ } else
+ put_long(vma, addr, data);
+ return 0;
+}
+
+asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+{
+ struct task_struct *child;
+ struct user * dummy;
+ int i;
+
+ dummy = NULL;
+
+ if (request == PTRACE_TRACEME) {
+ /* are we already being traced? */
+ if (current->flags & PF_PTRACED)
+ return -EPERM;
+ /* set the ptrace bit in the process flags. */
+ current->flags |= PF_PTRACED;
+ return 0;
+ }
+ if (pid == 1) /* you may not mess with init */
+ return -EPERM;
+ if (!(child = get_task(pid)))
+ return -ESRCH;
+ if (request == PTRACE_ATTACH) {
+ if (child == current)
+ return -EPERM;
+ if ((!child->dumpable ||
+ (current->uid != child->euid) ||
+ (current->uid != child->uid) ||
+ (current->gid != child->egid) ||
+ (current->gid != child->gid)) && !suser())
+ return -EPERM;
+ /* the same process cannot be attached many times */
+ if (child->flags & PF_PTRACED)
+ return -EPERM;
+ child->flags |= PF_PTRACED;
+ if (child->p_pptr != current) {
+ REMOVE_LINKS(child);
+ child->p_pptr = current;
+ SET_LINKS(child);
+ }
+ send_sig(SIGSTOP, child, 1);
+ return 0;
+ }
+ if (!(child->flags & PF_PTRACED))
+ return -ESRCH;
+ if (child->state != TASK_STOPPED) {
+ if (request != PTRACE_KILL)
+ return -ESRCH;
+ }
+ if (child->p_pptr != current)
+ return -ESRCH;
+
+ switch (request) {
+ /* when I and D space are separate, these will need to be fixed. */
+ case PTRACE_PEEKTEXT: /* read word at location addr. */
+ case PTRACE_PEEKDATA: {
+ unsigned long tmp;
+ int res;
+
+ res = read_long(child, addr, &tmp);
+ if (res < 0)
+ return res;
+ res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long));
+ if (!res)
+ put_fs_long(tmp,(unsigned long *) data);
+ return res;
+ }
+
+ /* read the word at location addr in the USER area. */
+ case PTRACE_PEEKUSR: {
+ unsigned long tmp;
+ int res;
+
+ if ((addr & 3) || addr < 0 ||
+ addr > sizeof(struct user) - 3)
+ return -EIO;
+
+ res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long));
+ if (res)
+ return res;
+ tmp = 0; /* Default return condition */
+ if(addr < 17*sizeof(long)) {
+ addr = addr >> 2; /* temporary hack. */
+
+ tmp = get_stack_long(child, sizeof(long)*addr - MAGICNUMBER);
+ if (addr == DS || addr == ES ||
+ addr == FS || addr == GS ||
+ addr == CS || addr == SS)
+ tmp &= 0xffff;
+ };
+ if(addr >= (long) &dummy->u_debugreg[0] &&
+ addr <= (long) &dummy->u_debugreg[7]){
+ addr -= (long) &dummy->u_debugreg[0];
+ addr = addr >> 2;
+ tmp = child->debugreg[addr];
+ };
+ put_fs_long(tmp,(unsigned long *) data);
+ return 0;
+ }
+
+ /* when I and D space are separate, this will have to be fixed. */
+ case PTRACE_POKETEXT: /* write the word at location addr. */
+ case PTRACE_POKEDATA:
+ return write_long(child,addr,data);
+
+ case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
+ if ((addr & 3) || addr < 0 ||
+ addr > sizeof(struct user) - 3)
+ return -EIO;
+
+ addr = addr >> 2; /* temporary hack. */
+
+ if (addr == ORIG_EAX)
+ return -EIO;
+ if (addr == DS || addr == ES ||
+ addr == FS || addr == GS ||
+ addr == CS || addr == SS) {
+ data &= 0xffff;
+ if (data && (data & 3) != 3)
+ return -EIO;
+ }
+ if (addr == EFL) { /* flags. */
+ data &= FLAG_MASK;
+ data |= get_stack_long(child, EFL*sizeof(long)-MAGICNUMBER) & ~FLAG_MASK;
+ }
+ /* Do not allow the user to set the debug register for kernel
+ address space */
+ if(addr < 17){
+ if (put_stack_long(child, sizeof(long)*addr-MAGICNUMBER, data))
+ return -EIO;
+ return 0;
+ };
+
+ /* We need to be very careful here. We implicitly
+ want to modify a portion of the task_struct, and we
+ have to be selective about what portions we allow someone
+ to modify. */
+
+ addr = addr << 2; /* Convert back again */
+ if(addr >= (long) &dummy->u_debugreg[0] &&
+ addr <= (long) &dummy->u_debugreg[7]){
+
+ if(addr == (long) &dummy->u_debugreg[4]) return -EIO;
+ if(addr == (long) &dummy->u_debugreg[5]) return -EIO;
+ if(addr < (long) &dummy->u_debugreg[4] &&
+ ((unsigned long) data) >= 0xbffffffd) return -EIO;
+
+ if(addr == (long) &dummy->u_debugreg[7]) {
+ data &= ~DR_CONTROL_RESERVED;
+ for(i=0; i<4; i++)
+ if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+ return -EIO;
+ };
+
+ addr -= (long) &dummy->u_debugreg;
+ addr = addr >> 2;
+ child->debugreg[addr] = data;
+ return 0;
+ };
+ return -EIO;
+
+ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+ case PTRACE_CONT: { /* restart after signal. */
+ long tmp;
+
+ if ((unsigned long) data > NSIG)
+ return -EIO;
+ if (request == PTRACE_SYSCALL)
+ child->flags |= PF_TRACESYS;
+ else
+ child->flags &= ~PF_TRACESYS;
+ child->exit_code = data;
+ child->state = TASK_RUNNING;
+ /* make sure the single step bit is not set. */
+ tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
+ put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+ return 0;
+ }
+
+/*
+ * make the child exit. Best I can do is send it a sigkill.
+ * perhaps it should be put in the status that it wants to
+ * exit.
+ */
+ case PTRACE_KILL: {
+ long tmp;
+
+ child->state = TASK_RUNNING;
+ child->exit_code = SIGKILL;
+ /* make sure the single step bit is not set. */
+ tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
+ put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+ return 0;
+ }
+
+ case PTRACE_SINGLESTEP: { /* set the trap flag. */
+ long tmp;
+
+ if ((unsigned long) data > NSIG)
+ return -EIO;
+ child->flags &= ~PF_TRACESYS;
+ tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) | TRAP_FLAG;
+ put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+ child->state = TASK_RUNNING;
+ child->exit_code = data;
+ /* give it a chance to run. */
+ return 0;
+ }
+
+ case PTRACE_DETACH: { /* detach a process that was attached. */
+ long tmp;
+
+ if ((unsigned long) data > NSIG)
+ return -EIO;
+ child->flags &= ~(PF_PTRACED|PF_TRACESYS);
+ child->state = TASK_RUNNING;
+ child->exit_code = data;
+ REMOVE_LINKS(child);
+ child->p_pptr = child->p_opptr;
+ SET_LINKS(child);
+ /* make sure the single step bit is not set. */
+ tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG;
+ put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp);
+ return 0;
+ }
+
+ default:
+ return -EIO;
+ }
+}
+
+asmlinkage void syscall_trace(void)
+{
+ if ((current->flags & (PF_PTRACED|PF_TRACESYS))
+ != (PF_PTRACED|PF_TRACESYS))
+ return;
+ current->exit_code = SIGTRAP;
+ current->state = TASK_STOPPED;
+ notify_parent(current);
+ schedule();
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+ * for normal use. strace only continues with a signal if the
+ * stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code)
+ current->signal |= (1 << (current->exit_code - 1));
+ current->exit_code = 0;
+}
diff --git a/kernel/sched.c b/kernel/sched.c
new file mode 100644
index 000000000..6eed6e8f5
--- /dev/null
+++ b/kernel/sched.c
@@ -0,0 +1,861 @@
+/*
+ * linux/kernel/sched.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'sched.c' is the main kernel file. It contains scheduling primitives
+ * (sleep_on, wakeup, schedule etc) as well as a number of simple system
+ * call functions (type getpid(), which just extracts a field from
+ * current-task
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/fdreg.h>
+#include <linux/errno.h>
+#include <linux/time.h>
+#include <linux/ptrace.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/tqueue.h>
+#include <linux/resource.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/segment.h>
+
+#define TIMER_IRQ 0
+
+#include <linux/timex.h>
+
+/*
+ * kernel variables
+ */
+long tick = 1000000 / HZ; /* timer interrupt period */
+volatile struct timeval xtime; /* The current time */
+int tickadj = 500/HZ; /* microsecs */
+
+DECLARE_TASK_QUEUE(tq_timer);
+DECLARE_TASK_QUEUE(tq_immediate);
+
+/*
+ * phase-lock loop variables
+ */
+int time_status = TIME_BAD; /* clock synchronization status */
+long time_offset = 0; /* time adjustment (us) */
+long time_constant = 0; /* pll time constant */
+long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
+long time_precision = 1; /* clock precision (us) */
+long time_maxerror = 0x70000000;/* maximum error */
+long time_esterror = 0x70000000;/* estimated error */
+long time_phase = 0; /* phase offset (scaled us) */
+long time_freq = 0; /* frequency offset (scaled ppm) */
+long time_adj = 0; /* tick adjust (scaled 1 / HZ) */
+long time_reftime = 0; /* time at last adjustment (s) */
+
+long time_adjust = 0;
+long time_adjust_step = 0;
+
+int need_resched = 0;
+unsigned long event = 0;
+
+/*
+ * Tell us the machine setup..
+ */
+int hard_math = 0; /* set by boot/head.S */
+int x86 = 0; /* set by boot/head.S to 3 or 4 */
+int ignore_irq13 = 0; /* set if exception 16 works */
+int wp_works_ok = 0; /* set if paging hardware honours WP */
+int hlt_works_ok = 1; /* set if the "hlt" instruction works */
+
+/*
+ * Bus types ..
+ */
+int EISA_bus = 0;
+
+extern int _setitimer(int, struct itimerval *, struct itimerval *);
+unsigned long * prof_buffer = NULL;
+unsigned long prof_len = 0;
+
+#define _S(nr) (1<<((nr)-1))
+
+extern void mem_use(void);
+
+extern int timer_interrupt(void);
+asmlinkage int system_call(void);
+
+static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
+static struct vm_area_struct init_mmap = INIT_MMAP;
+struct task_struct init_task = INIT_TASK;
+
+unsigned long volatile jiffies=0;
+
+struct task_struct *current = &init_task;
+struct task_struct *last_task_used_math = NULL;
+
+struct task_struct * task[NR_TASKS] = {&init_task, };
+
+long user_stack [ PAGE_SIZE>>2 ] = { STACK_MAGIC, };
+
+struct {
+ long * a;
+ short b;
+ } stack_start = { & user_stack [PAGE_SIZE>>2] , KERNEL_DS };
+
+struct kernel_stat kstat = { 0 };
+
+/*
+ * 'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ */
+asmlinkage void math_state_restore(void)
+{
+ __asm__ __volatile__("clts");
+ if (last_task_used_math == current)
+ return;
+ timer_table[COPRO_TIMER].expires = jiffies+50;
+ timer_active |= 1<<COPRO_TIMER;
+ if (last_task_used_math)
+ __asm__("fnsave %0":"=m" (last_task_used_math->tss.i387));
+ else
+ __asm__("fnclex");
+ last_task_used_math = current;
+ if (current->used_math) {
+ __asm__("frstor %0": :"m" (current->tss.i387));
+ } else {
+ __asm__("fninit");
+ current->used_math=1;
+ }
+ timer_active &= ~(1<<COPRO_TIMER);
+}
+
+#ifndef CONFIG_MATH_EMULATION
+
+asmlinkage void math_emulate(long arg)
+{
+ printk("math-emulation not enabled and no coprocessor found.\n");
+ printk("killing %s.\n",current->comm);
+ send_sig(SIGFPE,current,1);
+ schedule();
+}
+
+#endif /* CONFIG_MATH_EMULATION */
+
+unsigned long itimer_ticks = 0;
+unsigned long itimer_next = ~0;
+
+/*
+ * 'schedule()' is the scheduler function. It's a very simple and nice
+ * scheduler: it's not perfect, but certainly works for most things.
+ * The one thing you might take a look at is the signal-handler code here.
+ *
+ * NOTE!! Task 0 is the 'idle' task, which gets called when no other
+ * tasks can run. It can not be killed, and it cannot sleep. The 'state'
+ * information in task[0] is never used.
+ *
+ * The "confuse_gcc" goto is used only to get better assembly code..
+ * Dijkstra probably hates me.
+ */
+asmlinkage void schedule(void)
+{
+ int c;
+ struct task_struct * p;
+ struct task_struct * next;
+ unsigned long ticks;
+
+/* check alarm, wake up any interruptible tasks that have got a signal */
+
+ if (intr_count) {
+ printk("Aiee: scheduling in interrupt\n");
+ intr_count = 0;
+ }
+ cli();
+ ticks = itimer_ticks;
+ itimer_ticks = 0;
+ itimer_next = ~0;
+ sti();
+ need_resched = 0;
+ p = &init_task;
+ for (;;) {
+ if ((p = p->next_task) == &init_task)
+ goto confuse_gcc1;
+ if (ticks && p->it_real_value) {
+ if (p->it_real_value <= ticks) {
+ send_sig(SIGALRM, p, 1);
+ if (!p->it_real_incr) {
+ p->it_real_value = 0;
+ goto end_itimer;
+ }
+ do {
+ p->it_real_value += p->it_real_incr;
+ } while (p->it_real_value <= ticks);
+ }
+ p->it_real_value -= ticks;
+ if (p->it_real_value < itimer_next)
+ itimer_next = p->it_real_value;
+ }
+end_itimer:
+ if (p->state != TASK_INTERRUPTIBLE)
+ continue;
+ if (p->signal & ~p->blocked) {
+ p->state = TASK_RUNNING;
+ continue;
+ }
+ if (p->timeout && p->timeout <= jiffies) {
+ p->timeout = 0;
+ p->state = TASK_RUNNING;
+ }
+ }
+confuse_gcc1:
+
+/* this is the scheduler proper: */
+#if 0
+ /* give processes that go to sleep a bit higher priority.. */
+ /* This depends on the values for TASK_XXX */
+ /* This gives smoother scheduling for some things, but */
+ /* can be very unfair under some circumstances, so.. */
+ if (TASK_UNINTERRUPTIBLE >= (unsigned) current->state &&
+ current->counter < current->priority*2) {
+ ++current->counter;
+ }
+#endif
+ c = -1000;
+ next = p = &init_task;
+ for (;;) {
+ if ((p = p->next_task) == &init_task)
+ goto confuse_gcc2;
+ if (p->state == TASK_RUNNING && p->counter > c)
+ c = p->counter, next = p;
+ }
+confuse_gcc2:
+ if (!c) {
+ for_each_task(p)
+ p->counter = (p->counter >> 1) + p->priority;
+ }
+ if (current == next)
+ return;
+ kstat.context_swtch++;
+ switch_to(next);
+ /* Now maybe reload the debug registers */
+ if(current->debugreg[7]){
+ loaddebug(0);
+ loaddebug(1);
+ loaddebug(2);
+ loaddebug(3);
+ loaddebug(6);
+ };
+}
+
+asmlinkage int sys_pause(void)
+{
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ return -ERESTARTNOHAND;
+}
+
+/*
+ * wake_up doesn't wake up stopped processes - they have to be awakened
+ * with signals or similar.
+ *
+ * Note that this doesn't need cli-sti pairs: interrupts may not change
+ * the wait-queue structures directly, but only call wake_up() to wake
+ * a process. The process itself must remove the queue once it has woken.
+ */
+void wake_up(struct wait_queue **q)
+{
+ struct wait_queue *tmp;
+ struct task_struct * p;
+
+ if (!q || !(tmp = *q))
+ return;
+ do {
+ if ((p = tmp->task) != NULL) {
+ if ((p->state == TASK_UNINTERRUPTIBLE) ||
+ (p->state == TASK_INTERRUPTIBLE)) {
+ p->state = TASK_RUNNING;
+ if (p->counter > current->counter + 3)
+ need_resched = 1;
+ }
+ }
+ if (!tmp->next) {
+ printk("wait_queue is bad (eip = %p)\n",
+ __builtin_return_address(0));
+ printk(" q = %p\n",q);
+ printk(" *q = %p\n",*q);
+ printk(" tmp = %p\n",tmp);
+ break;
+ }
+ tmp = tmp->next;
+ } while (tmp != *q);
+}
+
+void wake_up_interruptible(struct wait_queue **q)
+{
+ struct wait_queue *tmp;
+ struct task_struct * p;
+
+ if (!q || !(tmp = *q))
+ return;
+ do {
+ if ((p = tmp->task) != NULL) {
+ if (p->state == TASK_INTERRUPTIBLE) {
+ p->state = TASK_RUNNING;
+ if (p->counter > current->counter + 3)
+ need_resched = 1;
+ }
+ }
+ if (!tmp->next) {
+ printk("wait_queue is bad (eip = %p)\n",
+ __builtin_return_address(0));
+ printk(" q = %p\n",q);
+ printk(" *q = %p\n",*q);
+ printk(" tmp = %p\n",tmp);
+ break;
+ }
+ tmp = tmp->next;
+ } while (tmp != *q);
+}
+
+void __down(struct semaphore * sem)
+{
+ struct wait_queue wait = { current, NULL };
+ add_wait_queue(&sem->wait, &wait);
+ current->state = TASK_UNINTERRUPTIBLE;
+ while (sem->count <= 0) {
+ schedule();
+ current->state = TASK_UNINTERRUPTIBLE;
+ }
+ current->state = TASK_RUNNING;
+ remove_wait_queue(&sem->wait, &wait);
+}
+
+static inline void __sleep_on(struct wait_queue **p, int state)
+{
+ unsigned long flags;
+ struct wait_queue wait = { current, NULL };
+
+ if (!p)
+ return;
+ if (current == task[0])
+ panic("task[0] trying to sleep");
+ current->state = state;
+ add_wait_queue(p, &wait);
+ save_flags(flags);
+ sti();
+ schedule();
+ remove_wait_queue(p, &wait);
+ restore_flags(flags);
+}
+
+void interruptible_sleep_on(struct wait_queue **p)
+{
+ __sleep_on(p,TASK_INTERRUPTIBLE);
+}
+
+void sleep_on(struct wait_queue **p)
+{
+ __sleep_on(p,TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * The head for the timer-list has a "expires" field of MAX_UINT,
+ * and the sorting routine counts on this..
+ */
+static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
+#define SLOW_BUT_DEBUGGING_TIMERS 1
+
+void add_timer(struct timer_list * timer)
+{
+ unsigned long flags;
+ struct timer_list *p;
+
+#if SLOW_BUT_DEBUGGING_TIMERS
+ if (timer->next || timer->prev) {
+ printk("add_timer() called with non-zero list from %p\n",
+ __builtin_return_address(0));
+ return;
+ }
+#endif
+ p = &timer_head;
+ timer->expires += jiffies;
+ save_flags(flags);
+ cli();
+ do {
+ p = p->next;
+ } while (timer->expires > p->expires);
+ timer->next = p;
+ timer->prev = p->prev;
+ p->prev = timer;
+ timer->prev->next = timer;
+ restore_flags(flags);
+}
+
+int del_timer(struct timer_list * timer)
+{
+ unsigned long flags;
+#if SLOW_BUT_DEBUGGING_TIMERS
+ struct timer_list * p;
+
+ p = &timer_head;
+ save_flags(flags);
+ cli();
+ while ((p = p->next) != &timer_head) {
+ if (p == timer) {
+ timer->next->prev = timer->prev;
+ timer->prev->next = timer->next;
+ timer->next = timer->prev = NULL;
+ restore_flags(flags);
+ timer->expires -= jiffies;
+ return 1;
+ }
+ }
+ if (timer->next || timer->prev)
+ printk("del_timer() called from %p with timer not initialized\n",
+ __builtin_return_address(0));
+ restore_flags(flags);
+ return 0;
+#else
+ save_flags(flags);
+ cli();
+ if (timer->next) {
+ timer->next->prev = timer->prev;
+ timer->prev->next = timer->next;
+ timer->next = timer->prev = NULL;
+ restore_flags(flags);
+ timer->expires -= jiffies;
+ return 1;
+ }
+ restore_flags(flags);
+ return 0;
+#endif
+}
+
+unsigned long timer_active = 0;
+struct timer_struct timer_table[32];
+
+/*
+ * Hmm.. Changed this, as the GNU make sources (load.c) seems to
+ * imply that avenrun[] is the standard name for this kind of thing.
+ * Nothing else seems to be standardized: the fractional size etc
+ * all seem to differ on different machines.
+ */
+unsigned long avenrun[3] = { 0,0,0 };
+
+/*
+ * Nr of active tasks - counted in fixed-point numbers
+ */
+static unsigned long count_active_tasks(void)
+{
+ struct task_struct **p;
+ unsigned long nr = 0;
+
+ for(p = &LAST_TASK; p > &FIRST_TASK; --p)
+ if (*p && ((*p)->state == TASK_RUNNING ||
+ (*p)->state == TASK_UNINTERRUPTIBLE ||
+ (*p)->state == TASK_SWAPPING))
+ nr += FIXED_1;
+ return nr;
+}
+
+static inline void calc_load(void)
+{
+ unsigned long active_tasks; /* fixed-point */
+ static int count = LOAD_FREQ;
+
+ if (count-- > 0)
+ return;
+ count = LOAD_FREQ;
+ active_tasks = count_active_tasks();
+ CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+ CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+ CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+}
+
+/*
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ * These were ported to Linux by Philip Gladstone.
+ */
+static void second_overflow(void)
+{
+ long ltemp;
+ /* last time the cmos clock got updated */
+ static long last_rtc_update=0;
+ extern int set_rtc_mmss(unsigned long);
+
+ /* Bump the maxerror field */
+ time_maxerror = (0x70000000-time_maxerror < time_tolerance) ?
+ 0x70000000 : (time_maxerror + time_tolerance);
+
+ /* Run the PLL */
+ if (time_offset < 0) {
+ ltemp = (-(time_offset+1) >> (SHIFT_KG + time_constant)) + 1;
+ time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ time_offset += (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE);
+ time_adj = - time_adj;
+ } else if (time_offset > 0) {
+ ltemp = ((time_offset-1) >> (SHIFT_KG + time_constant)) + 1;
+ time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ time_offset -= (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE);
+ } else {
+ time_adj = 0;
+ }
+
+ time_adj += (time_freq >> (SHIFT_KF + SHIFT_HZ - SHIFT_SCALE))
+ + FINETUNE;
+
+ /* Handle the leap second stuff */
+ switch (time_status) {
+ case TIME_INS:
+ /* ugly divide should be replaced */
+ if (xtime.tv_sec % 86400 == 0) {
+ xtime.tv_sec--; /* !! */
+ time_status = TIME_OOP;
+ printk("Clock: inserting leap second 23:59:60 GMT\n");
+ }
+ break;
+
+ case TIME_DEL:
+ /* ugly divide should be replaced */
+ if (xtime.tv_sec % 86400 == 86399) {
+ xtime.tv_sec++;
+ time_status = TIME_OK;
+ printk("Clock: deleting leap second 23:59:59 GMT\n");
+ }
+ break;
+
+ case TIME_OOP:
+ time_status = TIME_OK;
+ break;
+ }
+ if (xtime.tv_sec > last_rtc_update + 660)
+ if (set_rtc_mmss(xtime.tv_sec) == 0)
+ last_rtc_update = xtime.tv_sec;
+ else
+ last_rtc_update = xtime.tv_sec - 600; /* do it again in one min */
+}
+
+/*
+ * disregard lost ticks for now.. We don't care enough.
+ */
+static void timer_bh(void * unused)
+{
+ unsigned long mask;
+ struct timer_struct *tp;
+ struct timer_list * timer;
+
+ cli();
+ while ((timer = timer_head.next) != &timer_head && timer->expires < jiffies) {
+ void (*fn)(unsigned long) = timer->function;
+ unsigned long data = timer->data;
+ timer->next->prev = timer->prev;
+ timer->prev->next = timer->next;
+ timer->next = timer->prev = NULL;
+ sti();
+ fn(data);
+ cli();
+ }
+ sti();
+
+ for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
+ if (mask > timer_active)
+ break;
+ if (!(mask & timer_active))
+ continue;
+ if (tp->expires > jiffies)
+ continue;
+ timer_active &= ~mask;
+ tp->fn();
+ sti();
+ }
+}
+
+void tqueue_bh(void * unused)
+{
+ run_task_queue(&tq_timer);
+}
+
+void immediate_bh(void * unused)
+{
+ run_task_queue(&tq_immediate);
+}
+
+/*
+ * The int argument is really a (struct pt_regs *), in case the
+ * interrupt wants to know from where it was called. The timer
+ * irq uses this to decide if it should update the user or system
+ * times.
+ */
+static void do_timer(struct pt_regs * regs)
+{
+ unsigned long mask;
+ struct timer_struct *tp;
+
+ long ltemp, psecs;
+
+ /* Advance the phase, once it gets to one microsecond, then
+ * advance the tick more.
+ */
+ time_phase += time_adj;
+ if (time_phase < -FINEUSEC) {
+ ltemp = -time_phase >> SHIFT_SCALE;
+ time_phase += ltemp << SHIFT_SCALE;
+ xtime.tv_usec += tick + time_adjust_step - ltemp;
+ }
+ else if (time_phase > FINEUSEC) {
+ ltemp = time_phase >> SHIFT_SCALE;
+ time_phase -= ltemp << SHIFT_SCALE;
+ xtime.tv_usec += tick + time_adjust_step + ltemp;
+ } else
+ xtime.tv_usec += tick + time_adjust_step;
+
+ if (time_adjust)
+ {
+ /* We are doing an adjtime thing.
+ *
+ * Modify the value of the tick for next time.
+ * Note that a positive delta means we want the clock
+ * to run fast. This means that the tick should be bigger
+ *
+ * Limit the amount of the step for *next* tick to be
+ * in the range -tickadj .. +tickadj
+ */
+ if (time_adjust > tickadj)
+ time_adjust_step = tickadj;
+ else if (time_adjust < -tickadj)
+ time_adjust_step = -tickadj;
+ else
+ time_adjust_step = time_adjust;
+
+ /* Reduce by this step the amount of time left */
+ time_adjust -= time_adjust_step;
+ }
+ else
+ time_adjust_step = 0;
+
+ if (xtime.tv_usec >= 1000000) {
+ xtime.tv_usec -= 1000000;
+ xtime.tv_sec++;
+ second_overflow();
+ }
+
+ jiffies++;
+ calc_load();
+ if ((VM_MASK & regs->eflags) || (3 & regs->cs)) {
+ current->utime++;
+ if (current != task[0]) {
+ if (current->priority < 15)
+ kstat.cpu_nice++;
+ else
+ kstat.cpu_user++;
+ }
+ /* Update ITIMER_VIRT for current task if not in a system call */
+ if (current->it_virt_value && !(--current->it_virt_value)) {
+ current->it_virt_value = current->it_virt_incr;
+ send_sig(SIGVTALRM,current,1);
+ }
+ } else {
+ current->stime++;
+ if(current != task[0])
+ kstat.cpu_system++;
+#ifdef CONFIG_PROFILE
+ if (prof_buffer && current != task[0]) {
+ unsigned long eip = regs->eip;
+ eip >>= 2;
+ if (eip < prof_len)
+ prof_buffer[eip]++;
+ }
+#endif
+ }
+ /*
+ * check the cpu time limit on the process.
+ */
+ if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
+ (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
+ send_sig(SIGKILL, current, 1);
+ if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
+ (((current->stime + current->utime) % HZ) == 0)) {
+ psecs = (current->stime + current->utime) / HZ;
+ /* send when equal */
+ if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
+ send_sig(SIGXCPU, current, 1);
+ /* and every five seconds thereafter. */
+ else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
+ ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
+ send_sig(SIGXCPU, current, 1);
+ }
+
+ if (current != task[0] && 0 > --current->counter) {
+ current->counter = 0;
+ need_resched = 1;
+ }
+ /* Update ITIMER_PROF for the current task */
+ if (current->it_prof_value && !(--current->it_prof_value)) {
+ current->it_prof_value = current->it_prof_incr;
+ send_sig(SIGPROF,current,1);
+ }
+ for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
+ if (mask > timer_active)
+ break;
+ if (!(mask & timer_active))
+ continue;
+ if (tp->expires > jiffies)
+ continue;
+ mark_bh(TIMER_BH);
+ }
+ cli();
+ itimer_ticks++;
+ if (itimer_ticks > itimer_next)
+ need_resched = 1;
+ if (timer_head.next->expires < jiffies)
+ mark_bh(TIMER_BH);
+ if (tq_timer != &tq_last)
+ mark_bh(TQUEUE_BH);
+ sti();
+}
+
+asmlinkage int sys_alarm(long seconds)
+{
+ struct itimerval it_new, it_old;
+
+ it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+ it_new.it_value.tv_sec = seconds;
+ it_new.it_value.tv_usec = 0;
+ _setitimer(ITIMER_REAL, &it_new, &it_old);
+ return(it_old.it_value.tv_sec + (it_old.it_value.tv_usec / 1000000));
+}
+
+asmlinkage int sys_getpid(void)
+{
+ return current->pid;
+}
+
+asmlinkage int sys_getppid(void)
+{
+ return current->p_opptr->pid;
+}
+
+asmlinkage int sys_getuid(void)
+{
+ return current->uid;
+}
+
+asmlinkage int sys_geteuid(void)
+{
+ return current->euid;
+}
+
+asmlinkage int sys_getgid(void)
+{
+ return current->gid;
+}
+
+asmlinkage int sys_getegid(void)
+{
+ return current->egid;
+}
+
+asmlinkage int sys_nice(long increment)
+{
+ int newprio;
+
+ if (increment < 0 && !suser())
+ return -EPERM;
+ newprio = current->priority - increment;
+ if (newprio < 1)
+ newprio = 1;
+ if (newprio > 35)
+ newprio = 35;
+ current->priority = newprio;
+ return 0;
+}
+
+static void show_task(int nr,struct task_struct * p)
+{
+ unsigned long free;
+ static char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
+
+ printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
+ if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
+ printk(stat_nam[p->state]);
+ else
+ printk(" ");
+ if (p == current)
+ printk(" current ");
+ else
+ printk(" %08lX ", ((unsigned long *)p->tss.esp)[3]);
+ for (free = 1; free < 1024 ; free++) {
+ if (((unsigned long *)p->kernel_stack_page)[free])
+ break;
+ }
+ printk("%5lu %5d %6d ", free << 2, p->pid, p->p_pptr->pid);
+ if (p->p_cptr)
+ printk("%5d ", p->p_cptr->pid);
+ else
+ printk(" ");
+ if (p->p_ysptr)
+ printk("%7d", p->p_ysptr->pid);
+ else
+ printk(" ");
+ if (p->p_osptr)
+ printk(" %5d\n", p->p_osptr->pid);
+ else
+ printk("\n");
+}
+
+void show_state(void)
+{
+ int i;
+
+ printk(" free sibling\n");
+ printk(" task PC stack pid father child younger older\n");
+ for (i=0 ; i<NR_TASKS ; i++)
+ if (task[i])
+ show_task(i,task[i]);
+}
+
+void sched_init(void)
+{
+ int i;
+ struct desc_struct * p;
+
+ bh_base[TIMER_BH].routine = timer_bh;
+ bh_base[TQUEUE_BH].routine = tqueue_bh;
+ bh_base[IMMEDIATE_BH].routine = immediate_bh;
+ if (sizeof(struct sigaction) != 16)
+ panic("Struct sigaction MUST be 16 bytes");
+ set_tss_desc(gdt+FIRST_TSS_ENTRY,&init_task.tss);
+ set_ldt_desc(gdt+FIRST_LDT_ENTRY,&default_ldt,1);
+ set_system_gate(0x80,&system_call);
+ p = gdt+2+FIRST_TSS_ENTRY;
+ for(i=1 ; i<NR_TASKS ; i++) {
+ task[i] = NULL;
+ p->a=p->b=0;
+ p++;
+ p->a=p->b=0;
+ p++;
+ }
+/* Clear NT, so that we won't have troubles with that later on */
+ __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl");
+ load_TR(0);
+ load_ldt(0);
+ outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(LATCH & 0xff , 0x40); /* LSB */
+ outb(LATCH >> 8 , 0x40); /* MSB */
+ if (request_irq(TIMER_IRQ,(void (*)(int)) do_timer, 0, "timer") != 0)
+ panic("Could not allocate timer IRQ!");
+}
diff --git a/kernel/signal.c b/kernel/signal.c
new file mode 100644
index 000000000..df7324294
--- /dev/null
+++ b/kernel/signal.c
@@ -0,0 +1,407 @@
+/*
+ * linux/kernel/signal.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+
+#include <asm/segment.h>
+
+#define _S(nr) (1<<((nr)-1))
+
+#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
+
+asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs);
+
+asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset)
+{
+ sigset_t new_set, old_set = current->blocked;
+ int error;
+
+ if (set) {
+ error = verify_area(VERIFY_READ, set, sizeof(sigset_t));
+ if (error)
+ return error;
+ new_set = get_fs_long((unsigned long *) set) & _BLOCKABLE;
+ switch (how) {
+ case SIG_BLOCK:
+ current->blocked |= new_set;
+ break;
+ case SIG_UNBLOCK:
+ current->blocked &= ~new_set;
+ break;
+ case SIG_SETMASK:
+ current->blocked = new_set;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ if (oset) {
+ error = verify_area(VERIFY_WRITE, oset, sizeof(sigset_t));
+ if (error)
+ return error;
+ put_fs_long(old_set, (unsigned long *) oset);
+ }
+ return 0;
+}
+
+asmlinkage int sys_sgetmask(void)
+{
+ return current->blocked;
+}
+
+asmlinkage int sys_ssetmask(int newmask)
+{
+ int old=current->blocked;
+
+ current->blocked = newmask & _BLOCKABLE;
+ return old;
+}
+
+asmlinkage int sys_sigpending(sigset_t *set)
+{
+ int error;
+ /* fill in "set" with signals pending but blocked. */
+ error = verify_area(VERIFY_WRITE, set, 4);
+ if (!error)
+ put_fs_long(current->blocked & current->signal, (unsigned long *)set);
+ return error;
+}
+
+/*
+ * atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, unsigned long set)
+{
+ unsigned long mask;
+ struct pt_regs * regs = (struct pt_regs *) &restart;
+
+ mask = current->blocked;
+ current->blocked = set & _BLOCKABLE;
+ regs->eax = -EINTR;
+ while (1) {
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ if (do_signal(mask,regs))
+ return -EINTR;
+ }
+}
+
+/*
+ * POSIX 3.3.1.3:
+ * "Setting a signal action to SIG_IGN for a signal that is pending
+ * shall cause the pending signal to be discarded, whether or not
+ * it is blocked" (but SIGCHLD is unspecified: linux leaves it alone).
+ *
+ * "Setting a signal action to SIG_DFL for a signal that is pending
+ * and whose default action is to ignore the signal (for example,
+ * SIGCHLD), shall cause the pending signal to be discarded, whether
+ * or not it is blocked"
+ *
+ * Note the silly behaviour of SIGCHLD: SIG_IGN means that the signal
+ * isn't actually ignored, but does automatic child reaping, while
+ * SIG_DFL is explicitly said by POSIX to force the signal to be ignored..
+ */
+static void check_pending(int signum)
+{
+ struct sigaction *p;
+
+ p = signum - 1 + current->sigaction;
+ if (p->sa_handler == SIG_IGN) {
+ if (signum == SIGCHLD)
+ return;
+ current->signal &= ~_S(signum);
+ return;
+ }
+ if (p->sa_handler == SIG_DFL) {
+ if (signum != SIGCONT && signum != SIGCHLD && signum != SIGWINCH)
+ return;
+ current->signal &= ~_S(signum);
+ return;
+ }
+}
+
+asmlinkage int sys_signal(int signum, unsigned long handler)
+{
+ struct sigaction tmp;
+
+ if (signum<1 || signum>32)
+ return -EINVAL;
+ if (signum==SIGKILL || signum==SIGSTOP)
+ return -EINVAL;
+ if (handler >= TASK_SIZE)
+ return -EFAULT;
+ tmp.sa_handler = (void (*)(int)) handler;
+ tmp.sa_mask = 0;
+ tmp.sa_flags = SA_ONESHOT | SA_NOMASK;
+ tmp.sa_restorer = NULL;
+ handler = (long) current->sigaction[signum-1].sa_handler;
+ current->sigaction[signum-1] = tmp;
+ check_pending(signum);
+ return handler;
+}
+
+asmlinkage int sys_sigaction(int signum, const struct sigaction * action,
+ struct sigaction * oldaction)
+{
+ struct sigaction new_sa, *p;
+
+ if (signum<1 || signum>32)
+ return -EINVAL;
+ if (signum==SIGKILL || signum==SIGSTOP)
+ return -EINVAL;
+ p = signum - 1 + current->sigaction;
+ if (action) {
+ int err = verify_area(VERIFY_READ, action, sizeof(*action));
+ if (err)
+ return err;
+ memcpy_fromfs(&new_sa, action, sizeof(struct sigaction));
+ if (new_sa.sa_flags & SA_NOMASK)
+ new_sa.sa_mask = 0;
+ else {
+ new_sa.sa_mask |= _S(signum);
+ new_sa.sa_mask &= _BLOCKABLE;
+ }
+ if (TASK_SIZE <= (unsigned long) new_sa.sa_handler)
+ return -EFAULT;
+ }
+ if (oldaction) {
+ int err = verify_area(VERIFY_WRITE, oldaction, sizeof(*oldaction));
+ if (err)
+ return err;
+ memcpy_tofs(oldaction, p, sizeof(struct sigaction));
+ }
+ if (action) {
+ *p = new_sa;
+ check_pending(signum);
+ }
+ return 0;
+}
+
+asmlinkage int sys_waitpid(pid_t pid,unsigned long * stat_addr, int options);
+
+/*
+ * This sets regs->esp even though we don't actually use sigstacks yet..
+ */
+asmlinkage int sys_sigreturn(unsigned long __unused)
+{
+#define COPY(x) regs->x = context.x
+#define COPY_SEG(x) \
+if ((context.x & 0xfffc) && (context.x & 3) != 3) goto badframe; COPY(x);
+#define COPY_SEG_STRICT(x) \
+if (!(context.x & 0xfffc) || (context.x & 3) != 3) goto badframe; COPY(x);
+ struct sigcontext_struct context;
+ struct pt_regs * regs;
+
+ regs = (struct pt_regs *) &__unused;
+ if (verify_area(VERIFY_READ, (void *) regs->esp, sizeof(context)))
+ goto badframe;
+ memcpy_fromfs(&context,(void *) regs->esp, sizeof(context));
+ current->blocked = context.oldmask & _BLOCKABLE;
+ COPY_SEG(ds);
+ COPY_SEG(es);
+ COPY_SEG(fs);
+ COPY_SEG(gs);
+ COPY_SEG_STRICT(ss);
+ COPY_SEG_STRICT(cs);
+ COPY(eip);
+ COPY(ecx); COPY(edx);
+ COPY(ebx);
+ COPY(esp); COPY(ebp);
+ COPY(edi); COPY(esi);
+ regs->eflags &= ~0x40DD5;
+ regs->eflags |= context.eflags & 0x40DD5;
+ regs->orig_eax = -1; /* disable syscall checks */
+ return context.eax;
+badframe:
+ do_exit(SIGSEGV);
+}
+
+/*
+ * Set up a signal frame... Make the stack look the way iBCS2 expects
+ * it to look.
+ */
+static void setup_frame(struct sigaction * sa, unsigned long ** fp, unsigned long eip,
+ struct pt_regs * regs, int signr, unsigned long oldmask)
+{
+ unsigned long * frame;
+
+#define __CODE ((unsigned long)(frame+24))
+#define CODE(x) ((unsigned long *) ((x)+__CODE))
+ frame = *fp;
+ if (regs->ss != USER_DS)
+ frame = (unsigned long *) sa->sa_restorer;
+ frame -= 32;
+ if (verify_area(VERIFY_WRITE,frame,32*4))
+ do_exit(SIGSEGV);
+/* set up the "normal" stack seen by the signal handler (iBCS2) */
+ put_fs_long(__CODE,frame);
+ if (current->exec_domain && current->exec_domain->signal_invmap)
+ put_fs_long(current->exec_domain->signal_invmap[signr], frame+1);
+ else
+ put_fs_long(signr, frame+1);
+ put_fs_long(regs->gs, frame+2);
+ put_fs_long(regs->fs, frame+3);
+ put_fs_long(regs->es, frame+4);
+ put_fs_long(regs->ds, frame+5);
+ put_fs_long(regs->edi, frame+6);
+ put_fs_long(regs->esi, frame+7);
+ put_fs_long(regs->ebp, frame+8);
+ put_fs_long((long)*fp, frame+9);
+ put_fs_long(regs->ebx, frame+10);
+ put_fs_long(regs->edx, frame+11);
+ put_fs_long(regs->ecx, frame+12);
+ put_fs_long(regs->eax, frame+13);
+ put_fs_long(current->tss.trap_no, frame+14);
+ put_fs_long(current->tss.error_code, frame+15);
+ put_fs_long(eip, frame+16);
+ put_fs_long(regs->cs, frame+17);
+ put_fs_long(regs->eflags, frame+18);
+ put_fs_long(regs->esp, frame+19);
+ put_fs_long(regs->ss, frame+20);
+ put_fs_long(0,frame+21); /* 387 state pointer - not implemented*/
+/* non-iBCS2 extensions.. */
+ put_fs_long(oldmask, frame+22);
+ put_fs_long(current->tss.cr2, frame+23);
+/* set up the return code... */
+ put_fs_long(0x0000b858, CODE(0)); /* popl %eax ; movl $,%eax */
+ put_fs_long(0x80cd0000, CODE(4)); /* int $0x80 */
+ put_fs_long(__NR_sigreturn, CODE(2));
+ *fp = frame;
+#undef __CODE
+#undef CODE
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs)
+{
+ unsigned long mask = ~current->blocked;
+ unsigned long handler_signal = 0;
+ unsigned long *frame = NULL;
+ unsigned long eip = 0;
+ unsigned long signr;
+ struct sigaction * sa;
+
+ while ((signr = current->signal & mask)) {
+ __asm__("bsf %2,%1\n\t"
+ "btrl %1,%0"
+ :"=m" (current->signal),"=r" (signr)
+ :"1" (signr));
+ sa = current->sigaction + signr;
+ signr++;
+ if ((current->flags & PF_PTRACED) && signr != SIGKILL) {
+ current->exit_code = signr;
+ current->state = TASK_STOPPED;
+ notify_parent(current);
+ schedule();
+ if (!(signr = current->exit_code))
+ continue;
+ current->exit_code = 0;
+ if (signr == SIGSTOP)
+ continue;
+ if (_S(signr) & current->blocked) {
+ current->signal |= _S(signr);
+ continue;
+ }
+ sa = current->sigaction + signr - 1;
+ }
+ if (sa->sa_handler == SIG_IGN) {
+ if (signr != SIGCHLD)
+ continue;
+ /* check for SIGCHLD: it's special */
+ while (sys_waitpid(-1,NULL,WNOHANG) > 0)
+ /* nothing */;
+ continue;
+ }
+ if (sa->sa_handler == SIG_DFL) {
+ if (current->pid == 1)
+ continue;
+ switch (signr) {
+ case SIGCONT: case SIGCHLD: case SIGWINCH:
+ continue;
+
+ case SIGSTOP: case SIGTSTP: case SIGTTIN: case SIGTTOU:
+ if (current->flags & PF_PTRACED)
+ continue;
+ current->state = TASK_STOPPED;
+ current->exit_code = signr;
+ if (!(current->p_pptr->sigaction[SIGCHLD-1].sa_flags &
+ SA_NOCLDSTOP))
+ notify_parent(current);
+ schedule();
+ continue;
+
+ case SIGQUIT: case SIGILL: case SIGTRAP:
+ case SIGIOT: case SIGFPE: case SIGSEGV:
+ if (current->binfmt && current->binfmt->core_dump) {
+ if (current->binfmt->core_dump(signr, regs))
+ signr |= 0x80;
+ }
+ /* fall through */
+ default:
+ current->signal |= _S(signr & 0x7f);
+ do_exit(signr);
+ }
+ }
+ /*
+ * OK, we're invoking a handler
+ */
+ if (regs->orig_eax >= 0) {
+ if (regs->eax == -ERESTARTNOHAND ||
+ (regs->eax == -ERESTARTSYS && !(sa->sa_flags & SA_RESTART)))
+ regs->eax = -EINTR;
+ }
+ handler_signal |= 1 << (signr-1);
+ mask &= ~sa->sa_mask;
+ }
+ if (regs->orig_eax >= 0 &&
+ (regs->eax == -ERESTARTNOHAND ||
+ regs->eax == -ERESTARTSYS ||
+ regs->eax == -ERESTARTNOINTR)) {
+ regs->eax = regs->orig_eax;
+ regs->eip -= 2;
+ }
+ if (!handler_signal) /* no handler will be called - return 0 */
+ return 0;
+ eip = regs->eip;
+ frame = (unsigned long *) regs->esp;
+ signr = 1;
+ sa = current->sigaction;
+ for (mask = 1 ; mask ; sa++,signr++,mask += mask) {
+ if (mask > handler_signal)
+ break;
+ if (!(mask & handler_signal))
+ continue;
+ setup_frame(sa,&frame,eip,regs,signr,oldmask);
+ eip = (unsigned long) sa->sa_handler;
+ if (sa->sa_flags & SA_ONESHOT)
+ sa->sa_handler = NULL;
+/* force a supervisor-mode page-in of the signal handler to reduce races */
+ __asm__("testb $0,%%fs:%0": :"m" (*(char *) eip));
+ regs->cs = USER_CS; regs->ss = USER_DS;
+ regs->ds = USER_DS; regs->es = USER_DS;
+ regs->gs = USER_DS; regs->fs = USER_DS;
+ current->blocked |= sa->sa_mask;
+ oldmask |= sa->sa_mask;
+ }
+ regs->esp = (unsigned long) frame;
+ regs->eip = eip; /* "return" to the first handler */
+ current->tss.trap_no = current->tss.error_code = 0;
+ return 1;
+}
diff --git a/kernel/splx.c b/kernel/splx.c
new file mode 100644
index 000000000..c1b292ec9
--- /dev/null
+++ b/kernel/splx.c
@@ -0,0 +1,27 @@
+/*
+ * splx.c - SYSV DDI/DKI ipl manipulation functions
+ *
+ * Internally, many unices use a range of different interrupt
+ * privilege levels, ie from "allow all interrupts" (7) to
+ * "allow no interrupts." (0) under SYSV.
+ *
+ * This a simple splx() function behaves as the SYSV DDI/DKI function does,
+ * although since Linux only implements the equivalent of level 0 (cli) and
+ * level 7 (sti), this implementation only implements those levels.
+ *
+ * Also, unlike the current Linux routines, splx() also returns the
+ * old privilege level so that it can be restored.
+ */
+
+#include <asm/system.h>
+
+int splx (int new_level) {
+ register int old_level, tmp;
+ save_flags(tmp);
+ old_level = (tmp & 0x200) ? 7 : 0;
+ if (new_level)
+ sti();
+ else
+ cli();
+ return old_level;
+}
diff --git a/kernel/sys.c b/kernel/sys.c
new file mode 100644
index 000000000..1ce3ee387
--- /dev/null
+++ b/kernel/sys.c
@@ -0,0 +1,787 @@
+/*
+ * linux/kernel/sys.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/times.h>
+#include <linux/utsname.h>
+#include <linux/param.h>
+#include <linux/resource.h>
+#include <linux/signal.h>
+#include <linux/string.h>
+#include <linux/ptrace.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+
+#include <asm/segment.h>
+#include <asm/io.h>
+
+/*
+ * this indicates whether you can reboot with ctrl-alt-del: the default is yes
+ */
+static int C_A_D = 1;
+
+extern void adjust_clock(void);
+
+#define PZERO 15
+
+asmlinkage int sys_ni_syscall(void)
+{
+ return -EINVAL;
+}
+
+asmlinkage int sys_idle(void)
+{
+ int i;
+
+ if (current->pid != 0)
+ return -EPERM;
+
+ /* Map out the low memory: it's no longer needed */
+ for (i = 0 ; i < 768 ; i++)
+ swapper_pg_dir[i] = 0;
+
+ /* endless idle loop with no priority at all */
+ current->counter = -100;
+ for (;;) {
+ if (hlt_works_ok && !need_resched)
+ __asm__("hlt");
+ schedule();
+ }
+}
+
+static int proc_sel(struct task_struct *p, int which, int who)
+{
+ switch (which) {
+ case PRIO_PROCESS:
+ if (!who && p == current)
+ return 1;
+ return(p->pid == who);
+ case PRIO_PGRP:
+ if (!who)
+ who = current->pgrp;
+ return(p->pgrp == who);
+ case PRIO_USER:
+ if (!who)
+ who = current->uid;
+ return(p->uid == who);
+ }
+ return 0;
+}
+
+asmlinkage int sys_setpriority(int which, int who, int niceval)
+{
+ struct task_struct **p;
+ int error = ESRCH;
+ int priority;
+
+ if (which > 2 || which < 0)
+ return -EINVAL;
+
+ if ((priority = PZERO - niceval) <= 0)
+ priority = 1;
+
+ for(p = &LAST_TASK; p > &FIRST_TASK; --p) {
+ if (!*p || !proc_sel(*p, which, who))
+ continue;
+ if ((*p)->uid != current->euid &&
+ (*p)->uid != current->uid && !suser()) {
+ error = EPERM;
+ continue;
+ }
+ if (error == ESRCH)
+ error = 0;
+ if (priority > (*p)->priority && !suser())
+ error = EACCES;
+ else
+ (*p)->priority = priority;
+ }
+ return -error;
+}
+
+asmlinkage int sys_getpriority(int which, int who)
+{
+ struct task_struct **p;
+ int max_prio = 0;
+
+ if (which > 2 || which < 0)
+ return -EINVAL;
+
+ for(p = &LAST_TASK; p > &FIRST_TASK; --p) {
+ if (!*p || !proc_sel(*p, which, who))
+ continue;
+ if ((*p)->priority > max_prio)
+ max_prio = (*p)->priority;
+ }
+ return(max_prio ? max_prio : -ESRCH);
+}
+
+asmlinkage int sys_profil(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_ftime(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_break(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_stty(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_gtty(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_prof(void)
+{
+ return -ENOSYS;
+}
+
+extern void hard_reset_now(void);
+
+/*
+ * Reboot system call: for obvious reasons only root may call it,
+ * and even root needs to set up some magic numbers in the registers
+ * so that some mistake won't make this reboot the whole machine.
+ * You can also set the meaning of the ctrl-alt-del-key here.
+ *
+ * reboot doesn't sync: do that yourself before calling this.
+ */
+asmlinkage int sys_reboot(int magic, int magic_too, int flag)
+{
+ if (!suser())
+ return -EPERM;
+ if (magic != 0xfee1dead || magic_too != 672274793)
+ return -EINVAL;
+ if (flag == 0x01234567)
+ hard_reset_now();
+ else if (flag == 0x89ABCDEF)
+ C_A_D = 1;
+ else if (!flag)
+ C_A_D = 0;
+ else
+ return -EINVAL;
+ return (0);
+}
+
+/*
+ * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
+ * As it's called within an interrupt, it may NOT sync: the only choice
+ * is whether to reboot at once, or just ignore the ctrl-alt-del.
+ */
+void ctrl_alt_del(void)
+{
+ if (C_A_D)
+ hard_reset_now();
+ else
+ send_sig(SIGINT,task[1],1);
+}
+
+
+/*
+ * Unprivileged users may change the real gid to the effective gid
+ * or vice versa. (BSD-style)
+ *
+ * If you set the real gid at all, or set the effective gid to a value not
+ * equal to the real gid, then the saved gid is set to the new effective gid.
+ *
+ * This makes it possible for a setgid program to completely drop its
+ * privileges, which is often a useful assertion to make when you are doing
+ * a security audit over a program.
+ *
+ * The general idea is that a program which uses just setregid() will be
+ * 100% compatible with BSD. A program which uses just setgid() will be
+ * 100% compatible with POSIX w/ Saved ID's.
+ */
+asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
+{
+ int old_rgid = current->gid;
+
+ if (rgid != (gid_t) -1) {
+ if ((old_rgid == rgid) ||
+ (current->egid==rgid) ||
+ suser())
+ current->gid = rgid;
+ else
+ return(-EPERM);
+ }
+ if (egid != (gid_t) -1) {
+ if ((old_rgid == egid) ||
+ (current->egid == egid) ||
+ (current->sgid == egid) ||
+ suser())
+ current->egid = egid;
+ else {
+ current->gid = old_rgid;
+ return(-EPERM);
+ }
+ }
+ if (rgid != (gid_t) -1 ||
+ (egid != (gid_t) -1 && egid != old_rgid))
+ current->sgid = current->egid;
+ current->fsgid = current->egid;
+ return 0;
+}
+
+/*
+ * setgid() is implemented like SysV w/ SAVED_IDS
+ */
+asmlinkage int sys_setgid(gid_t gid)
+{
+ if (suser())
+ current->gid = current->egid = current->sgid = current->fsgid = gid;
+ else if ((gid == current->gid) || (gid == current->sgid))
+ current->egid = current->fsgid = gid;
+ else
+ return -EPERM;
+ return 0;
+}
+
+asmlinkage int sys_acct(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_phys(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_lock(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_mpx(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_ulimit(void)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int sys_old_syscall(void)
+{
+ return -ENOSYS;
+}
+
+/*
+ * Unprivileged users may change the real uid to the effective uid
+ * or vice versa. (BSD-style)
+ *
+ * If you set the real uid at all, or set the effective uid to a value not
+ * equal to the real uid, then the saved uid is set to the new effective uid.
+ *
+ * This makes it possible for a setuid program to completely drop its
+ * privileges, which is often a useful assertion to make when you are doing
+ * a security audit over a program.
+ *
+ * The general idea is that a program which uses just setreuid() will be
+ * 100% compatible with BSD. A program which uses just setuid() will be
+ * 100% compatible with POSIX w/ Saved ID's.
+ */
+asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
+{
+ int old_ruid = current->uid;
+
+ if (ruid != (uid_t) -1) {
+ if ((old_ruid == ruid) ||
+ (current->euid==ruid) ||
+ suser())
+ current->uid = ruid;
+ else
+ return(-EPERM);
+ }
+ if (euid != (uid_t) -1) {
+ if ((old_ruid == euid) ||
+ (current->euid == euid) ||
+ (current->suid == euid) ||
+ suser())
+ current->euid = euid;
+ else {
+ current->uid = old_ruid;
+ return(-EPERM);
+ }
+ }
+ if (ruid != (uid_t) -1 ||
+ (euid != (uid_t) -1 && euid != old_ruid))
+ current->suid = current->euid;
+ current->fsuid = current->euid;
+ return 0;
+}
+
+/*
+ * setuid() is implemented like SysV w/ SAVED_IDS
+ *
+ * Note that SAVED_ID's is deficient in that a setuid root program
+ * like sendmail, for example, cannot set its uid to be a normal
+ * user and then switch back, because if you're root, setuid() sets
+ * the saved uid too. If you don't like this, blame the bright people
+ * in the POSIX committee and/or USG. Note that the BSD-style setreuid()
+ * will allow a root program to temporarily drop privileges and be able to
+ * regain them by swapping the real and effective uid.
+ */
+asmlinkage int sys_setuid(uid_t uid)
+{
+ if (suser())
+ current->uid = current->euid = current->suid = current->fsuid = uid;
+ else if ((uid == current->uid) || (uid == current->suid))
+ current->fsuid = current->euid = uid;
+ else
+ return -EPERM;
+ return(0);
+}
+
+/*
+ * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
+ * is used for "access()" and for the NFS daemon (letting nfsd stay at
+ * whatever uid it wants to). It normally shadows "euid", except when
+ * explicitly set by setfsuid() or for access..
+ */
+asmlinkage int sys_setfsuid(uid_t uid)
+{
+ int old_fsuid = current->fsuid;
+
+ if (uid == current->uid || uid == current->euid ||
+ uid == current->suid || uid == current->fsuid || suser())
+ current->fsuid = uid;
+ return old_fsuid;
+}
+
+/*
+ * Samma på svenska..
+ */
+asmlinkage int sys_setfsgid(gid_t gid)
+{
+ int old_fsgid = current->fsgid;
+
+ if (gid == current->gid || gid == current->egid ||
+ gid == current->sgid || gid == current->fsgid || suser())
+ current->fsgid = gid;
+ return old_fsgid;
+}
+
+asmlinkage int sys_times(struct tms * tbuf)
+{
+ if (tbuf) {
+ int error = verify_area(VERIFY_WRITE,tbuf,sizeof *tbuf);
+ if (error)
+ return error;
+ put_fs_long(current->utime,(unsigned long *)&tbuf->tms_utime);
+ put_fs_long(current->stime,(unsigned long *)&tbuf->tms_stime);
+ put_fs_long(current->cutime,(unsigned long *)&tbuf->tms_cutime);
+ put_fs_long(current->cstime,(unsigned long *)&tbuf->tms_cstime);
+ }
+ return jiffies;
+}
+
+asmlinkage int sys_brk(unsigned long brk)
+{
+ int freepages;
+ unsigned long rlim;
+ unsigned long newbrk, oldbrk;
+ struct vm_area_struct * vma;
+
+ if (brk < current->mm->end_code)
+ return current->mm->brk;
+ newbrk = PAGE_ALIGN(brk);
+ oldbrk = PAGE_ALIGN(current->mm->brk);
+ if (oldbrk == newbrk)
+ return current->mm->brk = brk;
+
+ /*
+ * Always allow shrinking brk
+ */
+ if (brk <= current->mm->brk) {
+ current->mm->brk = brk;
+ do_munmap(newbrk, oldbrk-newbrk);
+ return brk;
+ }
+ /*
+ * Check against rlimit and stack..
+ */
+ rlim = current->rlim[RLIMIT_DATA].rlim_cur;
+ if (rlim >= RLIM_INFINITY)
+ rlim = ~0;
+ if (brk - current->mm->end_code > rlim ||
+ brk >= current->mm->start_stack - 16384)
+ return current->mm->brk;
+ /*
+ * Check against existing mmap mappings.
+ */
+ for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
+ if (newbrk <= vma->vm_start)
+ break;
+ if (oldbrk < vma->vm_end)
+ return current->mm->brk;
+ }
+ /*
+ * stupid algorithm to decide if we have enough memory: while
+ * simple, it hopefully works in most obvious cases.. Easy to
+ * fool it, but this should catch most mistakes.
+ */
+ freepages = buffermem >> 12;
+ freepages += nr_free_pages;
+ freepages += nr_swap_pages;
+ freepages -= (high_memory - 0x100000) >> 16;
+ freepages -= (newbrk-oldbrk) >> 12;
+ if (freepages < 0)
+ return current->mm->brk;
+#if 0
+ freepages += current->mm->rss;
+ freepages -= oldbrk >> 12;
+ if (freepages < 0)
+ return current->mm->brk;
+#endif
+ /*
+ * Ok, we have probably got enough memory - let it rip.
+ */
+ current->mm->brk = brk;
+ do_mmap(NULL, oldbrk, newbrk-oldbrk,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_FIXED|MAP_PRIVATE, 0);
+ return brk;
+}
+
+/*
+ * This needs some heave checking ...
+ * I just haven't get the stomach for it. I also don't fully
+ * understand sessions/pgrp etc. Let somebody who does explain it.
+ *
+ * OK, I think I have the protection semantics right.... this is really
+ * only important on a multi-user system anyway, to make sure one user
+ * can't send a signal to a process owned by another. -TYT, 12/12/91
+ *
+ * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
+ * LBT 04.03.94
+ */
+asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
+{
+ struct task_struct * p;
+
+ if (!pid)
+ pid = current->pid;
+ if (!pgid)
+ pgid = pid;
+ if (pgid < 0)
+ return -EINVAL;
+ for_each_task(p) {
+ if (p->pid == pid)
+ goto found_task;
+ }
+ return -ESRCH;
+
+found_task:
+ if (p->p_pptr == current || p->p_opptr == current) {
+ if (p->session != current->session)
+ return -EPERM;
+ if (p->did_exec)
+ return -EACCES;
+ } else if (p != current)
+ return -ESRCH;
+ if (p->leader)
+ return -EPERM;
+ if (pgid != pid) {
+ struct task_struct * tmp;
+ for_each_task (tmp) {
+ if (tmp->pgrp == pgid &&
+ tmp->session == current->session)
+ goto ok_pgid;
+ }
+ return -EPERM;
+ }
+
+ok_pgid:
+ p->pgrp = pgid;
+ return 0;
+}
+
+asmlinkage int sys_getpgid(pid_t pid)
+{
+ struct task_struct * p;
+
+ if (!pid)
+ return current->pgrp;
+ for_each_task(p) {
+ if (p->pid == pid)
+ return p->pgrp;
+ }
+ return -ESRCH;
+}
+
+asmlinkage int sys_getpgrp(void)
+{
+ return current->pgrp;
+}
+
+asmlinkage int sys_setsid(void)
+{
+ if (current->leader)
+ return -EPERM;
+ current->leader = 1;
+ current->session = current->pgrp = current->pid;
+ current->tty = NULL;
+ return current->pgrp;
+}
+
+/*
+ * Supplementary group ID's
+ */
+asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
+{
+ int i;
+
+ if (gidsetsize) {
+ i = verify_area(VERIFY_WRITE, grouplist, sizeof(gid_t) * gidsetsize);
+ if (i)
+ return i;
+ }
+ for (i = 0 ; (i < NGROUPS) && (current->groups[i] != NOGROUP) ; i++) {
+ if (!gidsetsize)
+ continue;
+ if (i >= gidsetsize)
+ break;
+ put_fs_word(current->groups[i], (short *) grouplist);
+ grouplist++;
+ }
+ return(i);
+}
+
+asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist)
+{
+ int i;
+
+ if (!suser())
+ return -EPERM;
+ if (gidsetsize > NGROUPS)
+ return -EINVAL;
+ for (i = 0; i < gidsetsize; i++, grouplist++) {
+ current->groups[i] = get_fs_word((unsigned short *) grouplist);
+ }
+ if (i < NGROUPS)
+ current->groups[i] = NOGROUP;
+ return 0;
+}
+
+int in_group_p(gid_t grp)
+{
+ int i;
+
+ if (grp == current->fsgid)
+ return 1;
+
+ for (i = 0; i < NGROUPS; i++) {
+ if (current->groups[i] == NOGROUP)
+ break;
+ if (current->groups[i] == grp)
+ return 1;
+ }
+ return 0;
+}
+
+asmlinkage int sys_newuname(struct new_utsname * name)
+{
+ int error;
+
+ if (!name)
+ return -EFAULT;
+ error = verify_area(VERIFY_WRITE, name, sizeof *name);
+ if (!error)
+ memcpy_tofs(name,&system_utsname,sizeof *name);
+ return error;
+}
+
+asmlinkage int sys_uname(struct old_utsname * name)
+{
+ int error;
+ if (!name)
+ return -EFAULT;
+ error = verify_area(VERIFY_WRITE, name,sizeof *name);
+ if (error)
+ return error;
+ memcpy_tofs(&name->sysname,&system_utsname.sysname,
+ sizeof (system_utsname.sysname));
+ memcpy_tofs(&name->nodename,&system_utsname.nodename,
+ sizeof (system_utsname.nodename));
+ memcpy_tofs(&name->release,&system_utsname.release,
+ sizeof (system_utsname.release));
+ memcpy_tofs(&name->version,&system_utsname.version,
+ sizeof (system_utsname.version));
+ memcpy_tofs(&name->machine,&system_utsname.machine,
+ sizeof (system_utsname.machine));
+ return 0;
+}
+
+asmlinkage int sys_olduname(struct oldold_utsname * name)
+{
+ int error;
+ if (!name)
+ return -EFAULT;
+ error = verify_area(VERIFY_WRITE, name,sizeof *name);
+ if (error)
+ return error;
+ memcpy_tofs(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+ put_fs_byte(0,name->sysname+__OLD_UTS_LEN);
+ memcpy_tofs(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+ put_fs_byte(0,name->nodename+__OLD_UTS_LEN);
+ memcpy_tofs(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+ put_fs_byte(0,name->release+__OLD_UTS_LEN);
+ memcpy_tofs(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+ put_fs_byte(0,name->version+__OLD_UTS_LEN);
+ memcpy_tofs(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+ put_fs_byte(0,name->machine+__OLD_UTS_LEN);
+ return 0;
+}
+
+/*
+ * Only sethostname; gethostname can be implemented by calling uname()
+ */
+asmlinkage int sys_sethostname(char *name, int len)
+{
+ int i;
+
+ if (!suser())
+ return -EPERM;
+ if (len > __NEW_UTS_LEN)
+ return -EINVAL;
+ for (i=0; i < len; i++) {
+ if ((system_utsname.nodename[i] = get_fs_byte(name+i)) == 0)
+ return 0;
+ }
+ system_utsname.nodename[i] = 0;
+ return 0;
+}
+
+/*
+ * Only setdomainname; getdomainname can be implemented by calling
+ * uname()
+ */
+asmlinkage int sys_setdomainname(char *name, int len)
+{
+ int i;
+
+ if (!suser())
+ return -EPERM;
+ if (len > __NEW_UTS_LEN)
+ return -EINVAL;
+ for (i=0; i < len; i++) {
+ if ((system_utsname.domainname[i] = get_fs_byte(name+i)) == 0)
+ return 0;
+ }
+ system_utsname.domainname[i] = 0;
+ return 0;
+}
+
+asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim)
+{
+ int error;
+
+ if (resource >= RLIM_NLIMITS)
+ return -EINVAL;
+ error = verify_area(VERIFY_WRITE,rlim,sizeof *rlim);
+ if (error)
+ return error;
+ put_fs_long(current->rlim[resource].rlim_cur,
+ (unsigned long *) rlim);
+ put_fs_long(current->rlim[resource].rlim_max,
+ ((unsigned long *) rlim)+1);
+ return 0;
+}
+
+asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
+{
+ struct rlimit new_rlim, *old_rlim;
+ int err;
+
+ if (resource >= RLIM_NLIMITS)
+ return -EINVAL;
+ err = verify_area(VERIFY_READ, rlim, sizeof(*rlim));
+ if (err)
+ return err;
+ memcpy_fromfs(&new_rlim, rlim, sizeof(*rlim));
+ old_rlim = current->rlim + resource;
+ if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
+ (new_rlim.rlim_max > old_rlim->rlim_max)) &&
+ !suser())
+ return -EPERM;
+ *old_rlim = new_rlim;
+ return 0;
+}
+
+/*
+ * It would make sense to put struct rusage in the task_struct,
+ * except that would make the task_struct be *really big*. After
+ * task_struct gets moved into malloc'ed memory, it would
+ * make sense to do this. It will make moving the rest of the information
+ * a lot simpler! (Which we're not doing right now because we're not
+ * measuring them yet).
+ */
+int getrusage(struct task_struct *p, int who, struct rusage *ru)
+{
+ int error;
+ struct rusage r;
+
+ error = verify_area(VERIFY_WRITE, ru, sizeof *ru);
+ if (error)
+ return error;
+ memset((char *) &r, 0, sizeof(r));
+ switch (who) {
+ case RUSAGE_SELF:
+ r.ru_utime.tv_sec = CT_TO_SECS(p->utime);
+ r.ru_utime.tv_usec = CT_TO_USECS(p->utime);
+ r.ru_stime.tv_sec = CT_TO_SECS(p->stime);
+ r.ru_stime.tv_usec = CT_TO_USECS(p->stime);
+ r.ru_minflt = p->mm->min_flt;
+ r.ru_majflt = p->mm->maj_flt;
+ break;
+ case RUSAGE_CHILDREN:
+ r.ru_utime.tv_sec = CT_TO_SECS(p->cutime);
+ r.ru_utime.tv_usec = CT_TO_USECS(p->cutime);
+ r.ru_stime.tv_sec = CT_TO_SECS(p->cstime);
+ r.ru_stime.tv_usec = CT_TO_USECS(p->cstime);
+ r.ru_minflt = p->mm->cmin_flt;
+ r.ru_majflt = p->mm->cmaj_flt;
+ break;
+ default:
+ r.ru_utime.tv_sec = CT_TO_SECS(p->utime + p->cutime);
+ r.ru_utime.tv_usec = CT_TO_USECS(p->utime + p->cutime);
+ r.ru_stime.tv_sec = CT_TO_SECS(p->stime + p->cstime);
+ r.ru_stime.tv_usec = CT_TO_USECS(p->stime + p->cstime);
+ r.ru_minflt = p->mm->min_flt + p->mm->cmin_flt;
+ r.ru_majflt = p->mm->maj_flt + p->mm->cmaj_flt;
+ break;
+ }
+ memcpy_tofs(ru, &r, sizeof(r));
+ return 0;
+}
+
+asmlinkage int sys_getrusage(int who, struct rusage *ru)
+{
+ if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
+ return -EINVAL;
+ return getrusage(current, who, ru);
+}
+
+asmlinkage int sys_umask(int mask)
+{
+ int old = current->fs->umask;
+
+ current->fs->umask = mask & S_IRWXUGO;
+ return (old);
+}
diff --git a/kernel/time.c b/kernel/time.c
new file mode 100644
index 000000000..e290a3654
--- /dev/null
+++ b/kernel/time.c
@@ -0,0 +1,487 @@
+/*
+ * linux/kernel/time.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * This file contains the interface functions for the various
+ * time related system calls: time, stime, gettimeofday, settimeofday,
+ * adjtime
+ */
+/*
+ * Modification history kernel/time.c
+ *
+ * 02 Sep 93 Philip Gladstone
+ * Created file with time related functions from sched.c and adjtimex()
+ * 08 Oct 93 Torsten Duwe
+ * adjtime interface update and CMOS clock write code
+ * 02 Jul 94 Alan Modra
+ * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+
+#include <asm/segment.h>
+#include <asm/io.h>
+
+#include <linux/mc146818rtc.h>
+#define RTC_ALWAYS_BCD 1
+
+#include <linux/timex.h>
+
+/* converts date to days since 1/1/1970
+ * assumes year,mon,day in normal date format
+ * ie. 1/1/1970 => year=1970, mon=1, day=1
+ *
+ * For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.
+ *
+ * This algorithm was first published by Gauss (I think).
+ */
+static inline unsigned long mktime(unsigned int year, unsigned int mon,
+ unsigned int day, unsigned int hour,
+ unsigned int min, unsigned int sec)
+{
+ if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
+ mon += 12; /* Puts Feb last since it has leap day */
+ year -= 1;
+ }
+ return (((
+ (unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day) +
+ year*365 - 719499
+ )*24 + hour /* now have hours */
+ )*60 + min /* now have minutes */
+ )*60 + sec; /* finally seconds */
+}
+
+void time_init(void)
+{
+ unsigned int year, mon, day, hour, min, sec;
+ int i;
+
+ /* checking for Update-In-Progress could be done more elegantly
+ * (using the "update finished"-interrupt for example), but that
+ * would require excessive testing. promise I'll do that when I find
+ * the time. - Torsten
+ */
+ /* read RTC exactly on falling edge of update flag */
+ for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
+ break;
+ for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms*/
+ if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+ break;
+ do { /* Isn't this overkill ? UIP above should guarantee consistency */
+ sec = CMOS_READ(RTC_SECONDS);
+ min = CMOS_READ(RTC_MINUTES);
+ hour = CMOS_READ(RTC_HOURS);
+ day = CMOS_READ(RTC_DAY_OF_MONTH);
+ mon = CMOS_READ(RTC_MONTH);
+ year = CMOS_READ(RTC_YEAR);
+ } while (sec != CMOS_READ(RTC_SECONDS));
+ if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ {
+ BCD_TO_BIN(sec);
+ BCD_TO_BIN(min);
+ BCD_TO_BIN(hour);
+ BCD_TO_BIN(day);
+ BCD_TO_BIN(mon);
+ BCD_TO_BIN(year);
+ }
+ if ((year += 1900) < 1970)
+ year += 100;
+ xtime.tv_sec = mktime(year, mon, day, hour, min, sec);
+ xtime.tv_usec = 0;
+}
+/*
+ * The timezone where the local system is located. Used as a default by some
+ * programs who obtain this value by using gettimeofday.
+ */
+struct timezone sys_tz = { 0, 0};
+
+asmlinkage int sys_time(long * tloc)
+{
+ int i, error;
+
+ i = CURRENT_TIME;
+ if (tloc) {
+ error = verify_area(VERIFY_WRITE, tloc, 4);
+ if (error)
+ return error;
+ put_fs_long(i,(unsigned long *)tloc);
+ }
+ return i;
+}
+
+asmlinkage int sys_stime(unsigned long * tptr)
+{
+ int error;
+ unsigned long value;
+
+ if (!suser())
+ return -EPERM;
+ error = verify_area(VERIFY_READ, tptr, sizeof(*tptr));
+ if (error)
+ return error;
+ value = get_fs_long(tptr);
+ cli();
+ xtime.tv_sec = value;
+ xtime.tv_usec = 0;
+ time_status = TIME_BAD;
+ time_maxerror = 0x70000000;
+ time_esterror = 0x70000000;
+ sti();
+ return 0;
+}
+
+/* This function must be called with interrupts disabled
+ * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
+ *
+ * However, the pc-audio speaker driver changes the divisor so that
+ * it gets interrupted rather more often - it loads 64 into the
+ * counter rather than 11932! This has an adverse impact on
+ * do_gettimeoffset() -- it stops working! What is also not
+ * good is that the interval that our timer function gets called
+ * is no longer 10.0002 msecs, but 9.9767 msec. To get around this
+ * would require using a different timing source. Maybe someone
+ * could use the RTC - I know that this can interrupt at frequencies
+ * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
+ * it so that at startup, the timer code in sched.c would select
+ * using either the RTC or the 8253 timer. The decision would be
+ * based on whether there was any other device around that needed
+ * to trample on the 8253. I'd set up the RTC to interrupt at 1024Hz,
+ * and then do some jiggery to have a version of do_timer that
+ * advanced the clock by 1/1024 sec. Every time that reached over 1/100
+ * of a second, then do all the old code. If the time was kept correct
+ * then do_gettimeoffset could just return 0 - there is no low order
+ * divider that can be accessed.
+ *
+ * Ideally, you would be able to use the RTC for the speaker driver,
+ * but it appears that the speaker driver really needs interrupt more
+ * often than every 120us or so.
+ *
+ * Anyway, this needs more thought.... pjsg (28 Aug 93)
+ *
+ * If you are really that interested, you should be reading
+ * comp.protocols.time.ntp!
+ */
+
+#define TICK_SIZE tick
+
+static inline unsigned long do_gettimeoffset(void)
+{
+ int count;
+ unsigned long offset = 0;
+
+ /* timer count may underflow right here */
+ outb_p(0x00, 0x43); /* latch the count ASAP */
+ count = inb_p(0x40); /* read the latched count */
+ count |= inb(0x40) << 8;
+ /* we know probability of underflow is always MUCH less than 1% */
+ if (count > (LATCH - LATCH/100)) {
+ /* check for pending timer interrupt */
+ outb_p(0x0a, 0x20);
+ if (inb(0x20) & 1)
+ offset = TICK_SIZE;
+ }
+ count = ((LATCH-1) - count) * TICK_SIZE;
+ count = (count + LATCH/2) / LATCH;
+ return offset + count;
+}
+
+/*
+ * This version of gettimeofday has near microsecond resolution.
+ */
+static inline void do_gettimeofday(struct timeval *tv)
+{
+#ifdef __i386__
+ cli();
+ *tv = xtime;
+ tv->tv_usec += do_gettimeoffset();
+ if (tv->tv_usec >= 1000000) {
+ tv->tv_usec -= 1000000;
+ tv->tv_sec++;
+ }
+ sti();
+#else /* not __i386__ */
+ cli();
+ *tv = xtime;
+ sti();
+#endif /* not __i386__ */
+}
+
+asmlinkage int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ int error;
+
+ if (tv) {
+ struct timeval ktv;
+ error = verify_area(VERIFY_WRITE, tv, sizeof *tv);
+ if (error)
+ return error;
+ do_gettimeofday(&ktv);
+ put_fs_long(ktv.tv_sec, (unsigned long *) &tv->tv_sec);
+ put_fs_long(ktv.tv_usec, (unsigned long *) &tv->tv_usec);
+ }
+ if (tz) {
+ error = verify_area(VERIFY_WRITE, tz, sizeof *tz);
+ if (error)
+ return error;
+ put_fs_long(sys_tz.tz_minuteswest, (unsigned long *) tz);
+ put_fs_long(sys_tz.tz_dsttime, ((unsigned long *) tz)+1);
+ }
+ return 0;
+}
+
+/*
+ * Adjust the time obtained from the CMOS to be GMT time instead of
+ * local time.
+ *
+ * This is ugly, but preferable to the alternatives. Otherwise we
+ * would either need to write a program to do it in /etc/rc (and risk
+ * confusion if the program gets run more than once; it would also be
+ * hard to make the program warp the clock precisely n hours) or
+ * compile in the timezone information into the kernel. Bad, bad....
+ *
+ * XXX Currently does not adjust for daylight savings time. May not
+ * need to do anything, depending on how smart (dumb?) the BIOS
+ * is. Blast it all.... the best thing to do not depend on the CMOS
+ * clock at all, but get the time via NTP or timed if you're on a
+ * network.... - TYT, 1/1/92
+ */
+inline static void warp_clock(void)
+{
+ cli();
+ xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+ sti();
+}
+
+/*
+ * The first time we set the timezone, we will warp the clock so that
+ * it is ticking GMT time instead of local time. Presumably,
+ * if someone is setting the timezone then we are running in an
+ * environment where the programs understand about timezones.
+ * This should be done at boot time in the /etc/rc script, as
+ * soon as possible, so that the clock can be set right. Otherwise,
+ * various programs will get confused when the clock gets warped.
+ */
+asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
+{
+ static int firsttime = 1;
+ struct timeval new_tv;
+ struct timezone new_tz;
+
+ if (!suser())
+ return -EPERM;
+ if (tv) {
+ int error = verify_area(VERIFY_READ, tv, sizeof(*tv));
+ if (error)
+ return error;
+ memcpy_fromfs(&new_tv, tv, sizeof(*tv));
+ }
+ if (tz) {
+ int error = verify_area(VERIFY_READ, tz, sizeof(*tz));
+ if (error)
+ return error;
+ memcpy_fromfs(&new_tz, tz, sizeof(*tz));
+ }
+ if (tz) {
+ sys_tz = new_tz;
+ if (firsttime) {
+ firsttime = 0;
+ if (!tv)
+ warp_clock();
+ }
+ }
+ if (tv) {
+ cli();
+ /* This is revolting. We need to set the xtime.tv_usec
+ * correctly. However, the value in this location is
+ * is value at the last tick.
+ * Discover what correction gettimeofday
+ * would have done, and then undo it!
+ */
+ new_tv.tv_usec -= do_gettimeoffset();
+
+ if (new_tv.tv_usec < 0) {
+ new_tv.tv_usec += 1000000;
+ new_tv.tv_sec--;
+ }
+
+ xtime = new_tv;
+ time_status = TIME_BAD;
+ time_maxerror = 0x70000000;
+ time_esterror = 0x70000000;
+ sti();
+ }
+ return 0;
+}
+
+/* adjtimex mainly allows reading (and writing, if superuser) of
+ * kernel time-keeping variables. used by xntpd.
+ */
+asmlinkage int sys_adjtimex(struct timex *txc_p)
+{
+ long ltemp, mtemp, save_adjust;
+ int error;
+
+ /* Local copy of parameter */
+ struct timex txc;
+
+ error = verify_area(VERIFY_WRITE, txc_p, sizeof(struct timex));
+ if (error)
+ return error;
+
+ /* Copy the user data space into the kernel copy
+ * structure. But bear in mind that the structures
+ * may change
+ */
+ memcpy_fromfs(&txc, txc_p, sizeof(struct timex));
+
+ /* In order to modify anything, you gotta be super-user! */
+ if (txc.mode && !suser())
+ return -EPERM;
+
+ /* Now we validate the data before disabling interrupts
+ */
+
+ if (txc.mode != ADJ_OFFSET_SINGLESHOT && (txc.mode & ADJ_OFFSET))
+ /* Microsec field limited to -131000 .. 131000 usecs */
+ if (txc.offset <= -(1 << (31 - SHIFT_UPDATE))
+ || txc.offset >= (1 << (31 - SHIFT_UPDATE)))
+ return -EINVAL;
+
+ /* time_status must be in a fairly small range */
+ if (txc.mode & ADJ_STATUS)
+ if (txc.status < TIME_OK || txc.status > TIME_BAD)
+ return -EINVAL;
+
+ /* if the quartz is off by more than 10% something is VERY wrong ! */
+ if (txc.mode & ADJ_TICK)
+ if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ)
+ return -EINVAL;
+
+ cli();
+
+ /* Save for later - semantics of adjtime is to return old value */
+ save_adjust = time_adjust;
+
+ /* If there are input parameters, then process them */
+ if (txc.mode)
+ {
+ if (time_status == TIME_BAD)
+ time_status = TIME_OK;
+
+ if (txc.mode & ADJ_STATUS)
+ time_status = txc.status;
+
+ if (txc.mode & ADJ_FREQUENCY)
+ time_freq = txc.frequency << (SHIFT_KF - 16);
+
+ if (txc.mode & ADJ_MAXERROR)
+ time_maxerror = txc.maxerror;
+
+ if (txc.mode & ADJ_ESTERROR)
+ time_esterror = txc.esterror;
+
+ if (txc.mode & ADJ_TIMECONST)
+ time_constant = txc.time_constant;
+
+ if (txc.mode & ADJ_OFFSET)
+ if (txc.mode == ADJ_OFFSET_SINGLESHOT)
+ {
+ time_adjust = txc.offset;
+ }
+ else /* XXX should give an error if other bits set */
+ {
+ time_offset = txc.offset << SHIFT_UPDATE;
+ mtemp = xtime.tv_sec - time_reftime;
+ time_reftime = xtime.tv_sec;
+ if (mtemp > (MAXSEC+2) || mtemp < 0)
+ mtemp = 0;
+
+ if (txc.offset < 0)
+ time_freq -= (-txc.offset * mtemp) >>
+ (time_constant + time_constant);
+ else
+ time_freq += (txc.offset * mtemp) >>
+ (time_constant + time_constant);
+
+ ltemp = time_tolerance << SHIFT_KF;
+
+ if (time_freq > ltemp)
+ time_freq = ltemp;
+ else if (time_freq < -ltemp)
+ time_freq = -ltemp;
+ }
+ if (txc.mode & ADJ_TICK)
+ tick = txc.tick;
+
+ }
+ txc.offset = save_adjust;
+ txc.frequency = ((time_freq+1) >> (SHIFT_KF - 16));
+ txc.maxerror = time_maxerror;
+ txc.esterror = time_esterror;
+ txc.status = time_status;
+ txc.time_constant = time_constant;
+ txc.precision = time_precision;
+ txc.tolerance = time_tolerance;
+ txc.time = xtime;
+ txc.tick = tick;
+
+ sti();
+
+ memcpy_tofs(txc_p, &txc, sizeof(struct timex));
+ return time_status;
+}
+
+int set_rtc_mmss(unsigned long nowtime)
+{
+ int retval = 0;
+ int real_seconds, real_minutes, cmos_minutes;
+ unsigned char save_control, save_freq_select;
+
+ save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
+ CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */
+ CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+ cmos_minutes = CMOS_READ(RTC_MINUTES);
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ BCD_TO_BIN(cmos_minutes);
+
+ /* since we're only adjusting minutes and seconds,
+ * don't interfere with hour overflow. This avoids
+ * messing with unknown time zones but requires your
+ * RTC not to be off by more than 15 minutes
+ */
+ real_seconds = nowtime % 60;
+ real_minutes = nowtime / 60;
+ if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
+ real_minutes += 30; /* correct for half hour time zone */
+ real_minutes %= 60;
+
+ if (abs(real_minutes - cmos_minutes) < 30)
+ {
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+ {
+ BIN_TO_BCD(real_seconds);
+ BIN_TO_BCD(real_minutes);
+ }
+ CMOS_WRITE(real_seconds,RTC_SECONDS);
+ CMOS_WRITE(real_minutes,RTC_MINUTES);
+ }
+ else
+ retval = -1;
+
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ CMOS_WRITE(save_control, RTC_CONTROL);
+ return retval;
+}
diff --git a/kernel/tqueue.c b/kernel/tqueue.c
new file mode 100644
index 000000000..440709611
--- /dev/null
+++ b/kernel/tqueue.c
@@ -0,0 +1,10 @@
+/*
+ * tqueue.c --- task queue handling for Linux.
+ *
+ * This routine merely draws in the static portion of the task queue
+ * inline functions. Look in tqueue.h for the relevant functions.
+ */
+
+#define INCLUDE_INLINE_FUNCS
+
+#include <linux/tqueue.h>
diff --git a/kernel/traps.c b/kernel/traps.c
new file mode 100644
index 000000000..150b702b3
--- /dev/null
+++ b/kernel/traps.c
@@ -0,0 +1,245 @@
+/*
+ * linux/kernel/traps.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'. Currently mostly a debugging-aid, will be extended
+ * to mainly kill the offending process (probably by giving it a signal,
+ * but possibly by killing it outright if necessary).
+ */
+#include <linux/head.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+
+static inline void console_verbose(void)
+{
+ extern int console_loglevel;
+ console_loglevel = 15;
+}
+
+#define DO_ERROR(trapnr, signr, str, name, tsk) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+ tsk->tss.error_code = error_code; \
+ tsk->tss.trap_no = trapnr; \
+ if (signr == SIGTRAP && current->flags & PF_PTRACED) \
+ current->blocked &= ~(1 << (SIGTRAP-1)); \
+ send_sig(signr, tsk, 1); \
+ die_if_kernel(str,regs,error_code); \
+}
+
+#define get_seg_byte(seg,addr) ({ \
+register unsigned char __res; \
+__asm__("push %%fs;mov %%ax,%%fs;movb %%fs:%2,%%al;pop %%fs" \
+ :"=a" (__res):"0" (seg),"m" (*(addr))); \
+__res;})
+
+#define get_seg_long(seg,addr) ({ \
+register unsigned long __res; \
+__asm__("push %%fs;mov %%ax,%%fs;movl %%fs:%2,%%eax;pop %%fs" \
+ :"=a" (__res):"0" (seg),"m" (*(addr))); \
+__res;})
+
+#define _fs() ({ \
+register unsigned short __res; \
+__asm__("mov %%fs,%%ax":"=a" (__res):); \
+__res;})
+
+void page_exception(void);
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void reserved(void);
+asmlinkage void alignment_check(void);
+
+/*static*/ void die_if_kernel(char * str, struct pt_regs * regs, long err)
+{
+ int i;
+ unsigned long esp;
+ unsigned short ss;
+
+ esp = (unsigned long) &regs->esp;
+ ss = KERNEL_DS;
+ if ((regs->eflags & VM_MASK) || (3 & regs->cs) == 3)
+ return;
+ if (regs->cs & 3) {
+ esp = regs->esp;
+ ss = regs->ss;
+ }
+ console_verbose();
+ printk("%s: %04lx\n", str, err & 0xffff);
+ printk("EIP: %04x:%08lx\nEFLAGS: %08lx\n", 0xffff & regs->cs,regs->eip,regs->eflags);
+ printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+ printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
+ regs->esi, regs->edi, regs->ebp, esp);
+ printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
+ regs->ds, regs->es, regs->fs, regs->gs, ss);
+ store_TR(i);
+ if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page)
+ printk("Corrupted stack page\n");
+ printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\nStack: ",
+ current->comm, current->pid, 0xffff & i, current->kernel_stack_page);
+ for(i=0;i<5;i++)
+ printk("%08lx ", get_seg_long(ss,(i+(unsigned long *)esp)));
+ printk("\nCode: ");
+ for(i=0;i<20;i++)
+ printk("%02x ",0xff & get_seg_byte(regs->cs,(i+(char *)regs->eip)));
+ printk("\n");
+ do_exit(SIGSEGV);
+}
+
+DO_ERROR( 0, SIGFPE, "divide error", divide_error, current)
+DO_ERROR( 3, SIGTRAP, "int3", int3, current)
+DO_ERROR( 4, SIGSEGV, "overflow", overflow, current)
+DO_ERROR( 5, SIGSEGV, "bounds", bounds, current)
+DO_ERROR( 6, SIGILL, "invalid operand", invalid_op, current)
+DO_ERROR( 7, SIGSEGV, "device not available", device_not_available, current)
+DO_ERROR( 8, SIGSEGV, "double fault", double_fault, current)
+DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun, last_task_used_math)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS, current)
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present, current)
+DO_ERROR(12, SIGBUS, "stack segment", stack_segment, current)
+DO_ERROR(15, SIGSEGV, "reserved", reserved, current)
+DO_ERROR(17, SIGSEGV, "alignment check", alignment_check, current)
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+ int signr = SIGSEGV;
+
+ if (regs->eflags & VM_MASK) {
+ handle_vm86_fault((struct vm86_regs *) regs, error_code);
+ return;
+ }
+ die_if_kernel("general protection",regs,error_code);
+ switch (get_seg_byte(regs->cs, (char *)regs->eip)) {
+ case 0xCD: /* INT */
+ case 0xF4: /* HLT */
+ case 0xFA: /* CLI */
+ case 0xFB: /* STI */
+ signr = SIGILL;
+ }
+ current->tss.error_code = error_code;
+ current->tss.trap_no = 13;
+ send_sig(signr, current, 1);
+}
+
+asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
+{
+ printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+ printk("You probably have a hardware problem with your RAM chips\n");
+}
+
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+ if (regs->eflags & VM_MASK) {
+ handle_vm86_debug((struct vm86_regs *) regs, error_code);
+ return;
+ }
+ if (current->flags & PF_PTRACED)
+ current->blocked &= ~(1 << (SIGTRAP-1));
+ send_sig(SIGTRAP, current, 1);
+ current->tss.trap_no = 1;
+ current->tss.error_code = error_code;
+ if ((regs->cs & 3) == 0) {
+ /* If this is a kernel mode trap, then reset db7 and allow us to continue */
+ __asm__("movl %0,%%db7"
+ : /* no output */
+ : "r" (0));
+ return;
+ }
+ die_if_kernel("debug",regs,error_code);
+}
+
+/*
+ * Allow the process which triggered the interrupt to recover the error
+ * condition.
+ * - the status word is saved in the cs selector.
+ * - the tag word is saved in the operand selector.
+ * - the status word is then cleared and the tags all set to Empty.
+ *
+ * This will give sufficient information for complete recovery provided that
+ * the affected process knows or can deduce the code and data segments
+ * which were in force when the exception condition arose.
+ *
+ * Note that we play around with the 'TS' bit to hopefully get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+void math_error(void)
+{
+ struct i387_hard_struct * env;
+
+ clts();
+ if (!last_task_used_math) {
+ __asm__("fnclex");
+ return;
+ }
+ env = &last_task_used_math->tss.i387.hard;
+ send_sig(SIGFPE, last_task_used_math, 1);
+ last_task_used_math->tss.trap_no = 16;
+ last_task_used_math->tss.error_code = 0;
+ __asm__ __volatile__("fnsave %0":"=m" (*env));
+ last_task_used_math = NULL;
+ stts();
+ env->fcs = (env->swd & 0x0000ffff) | (env->fcs & 0xffff0000);
+ env->fos = env->twd;
+ env->swd &= 0xffff3800;
+ env->twd = 0xffffffff;
+}
+
+asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+ ignore_irq13 = 1;
+ math_error();
+}
+
+void trap_init(void)
+{
+ int i;
+
+ set_trap_gate(0,&divide_error);
+ set_trap_gate(1,&debug);
+ set_trap_gate(2,&nmi);
+ set_system_gate(3,&int3); /* int3-5 can be called from all */
+ set_system_gate(4,&overflow);
+ set_system_gate(5,&bounds);
+ set_trap_gate(6,&invalid_op);
+ set_trap_gate(7,&device_not_available);
+ set_trap_gate(8,&double_fault);
+ set_trap_gate(9,&coprocessor_segment_overrun);
+ set_trap_gate(10,&invalid_TSS);
+ set_trap_gate(11,&segment_not_present);
+ set_trap_gate(12,&stack_segment);
+ set_trap_gate(13,&general_protection);
+ set_trap_gate(14,&page_fault);
+ set_trap_gate(15,&reserved);
+ set_trap_gate(16,&coprocessor_error);
+ set_trap_gate(17,&alignment_check);
+ for (i=18;i<48;i++)
+ set_trap_gate(i,&reserved);
+}
diff --git a/kernel/vm86.c b/kernel/vm86.c
new file mode 100644
index 000000000..144d93a02
--- /dev/null
+++ b/kernel/vm86.c
@@ -0,0 +1,404 @@
+/*
+ * linux/kernel/vm86.c
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/string.h>
+#include <linux/ptrace.h>
+
+#include <asm/segment.h>
+#include <asm/io.h>
+
+/*
+ * Known problems:
+ *
+ * Interrupt handling is not guaranteed:
+ * - a real x86 will disable all interrupts for one instruction
+ * after a "mov ss,xx" to make stack handling atomic even without
+ * the 'lss' instruction. We can't guarantee this in v86 mode,
+ * as the next instruction might result in a page fault or similar.
+ * - a real x86 will have interrupts disabled for one instruction
+ * past the 'sti' that enables them. We don't bother with all the
+ * details yet..
+ *
+ * Hopefully these problems do not actually matter for anything.
+ */
+
+/*
+ * 8- and 16-bit register defines..
+ */
+#define AL(regs) (((unsigned char *)&((regs)->eax))[0])
+#define AH(regs) (((unsigned char *)&((regs)->eax))[1])
+#define IP(regs) (*(unsigned short *)&((regs)->eip))
+#define SP(regs) (*(unsigned short *)&((regs)->esp))
+
+/*
+ * virtual flags (16 and 32-bit versions)
+ */
+#define VFLAGS (*(unsigned short *)&(current->v86flags))
+#define VEFLAGS (current->v86flags)
+
+#define set_flags(X,new,mask) \
+((X) = ((X) & ~(mask)) | ((new) & (mask)))
+
+#define SAFE_MASK (0xDD5)
+#define RETURN_MASK (0xDFF)
+
+asmlinkage struct pt_regs * save_v86_state(struct vm86_regs * regs)
+{
+ unsigned long tmp;
+
+ if (!current->vm86_info) {
+ printk("no vm86_info: BAD\n");
+ do_exit(SIGSEGV);
+ }
+ set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->v86mask);
+ memcpy_tofs(&current->vm86_info->regs,regs,sizeof(*regs));
+ put_fs_long(current->screen_bitmap,&current->vm86_info->screen_bitmap);
+ tmp = current->tss.esp0;
+ current->tss.esp0 = current->saved_kernel_stack;
+ current->saved_kernel_stack = 0;
+ return (struct pt_regs *) tmp;
+}
+
+static void mark_screen_rdonly(struct task_struct * tsk)
+{
+ unsigned long tmp;
+ unsigned long *pg_table;
+
+ if ((tmp = tsk->tss.cr3) != 0) {
+ tmp = *(unsigned long *) tmp;
+ if (tmp & PAGE_PRESENT) {
+ tmp &= PAGE_MASK;
+ pg_table = (0xA0000 >> PAGE_SHIFT) + (unsigned long *) tmp;
+ tmp = 32;
+ while (tmp--) {
+ if (PAGE_PRESENT & *pg_table)
+ *pg_table &= ~PAGE_RW;
+ pg_table++;
+ }
+ }
+ }
+}
+
+asmlinkage int sys_vm86(struct vm86_struct * v86)
+{
+ struct vm86_struct info;
+ struct pt_regs * pt_regs = (struct pt_regs *) &v86;
+ int error;
+
+ if (current->saved_kernel_stack)
+ return -EPERM;
+ /* v86 must be readable (now) and writable (for save_v86_state) */
+ error = verify_area(VERIFY_WRITE,v86,sizeof(*v86));
+ if (error)
+ return error;
+ memcpy_fromfs(&info,v86,sizeof(info));
+/*
+ * make sure the vm86() system call doesn't try to do anything silly
+ */
+ info.regs.__null_ds = 0;
+ info.regs.__null_es = 0;
+ info.regs.__null_fs = 0;
+ info.regs.__null_gs = 0;
+/*
+ * The eflags register is also special: we cannot trust that the user
+ * has set it up safely, so this makes sure interrupt etc flags are
+ * inherited from protected mode.
+ */
+ VEFLAGS = info.regs.eflags;
+ info.regs.eflags &= SAFE_MASK;
+ info.regs.eflags |= pt_regs->eflags & ~SAFE_MASK;
+ info.regs.eflags |= VM_MASK;
+
+ switch (info.cpu_type) {
+ case CPU_286:
+ current->v86mask = 0;
+ break;
+ case CPU_386:
+ current->v86mask = NT_MASK | IOPL_MASK;
+ break;
+ case CPU_486:
+ current->v86mask = AC_MASK | NT_MASK | IOPL_MASK;
+ break;
+ default:
+ current->v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
+ break;
+ }
+
+/*
+ * Save old state, set default return value (%eax) to 0
+ */
+ pt_regs->eax = 0;
+ current->saved_kernel_stack = current->tss.esp0;
+ current->tss.esp0 = (unsigned long) pt_regs;
+ current->vm86_info = v86;
+
+ current->screen_bitmap = info.screen_bitmap;
+ if (info.flags & VM86_SCREEN_BITMAP)
+ mark_screen_rdonly(current);
+ __asm__ __volatile__("movl %0,%%esp\n\t"
+ "jmp ret_from_sys_call"
+ : /* no outputs */
+ :"r" (&info.regs));
+ return 0;
+}
+
+static inline void return_to_32bit(struct vm86_regs * regs16, int retval)
+{
+ struct pt_regs * regs32;
+
+ regs32 = save_v86_state(regs16);
+ regs32->eax = retval;
+ __asm__ __volatile__("movl %0,%%esp\n\t"
+ "jmp ret_from_sys_call"
+ : : "r" (regs32));
+}
+
+static inline void set_IF(struct vm86_regs * regs)
+{
+ VEFLAGS |= VIF_MASK;
+ if (VEFLAGS & VIP_MASK)
+ return_to_32bit(regs, VM86_STI);
+}
+
+static inline void clear_IF(struct vm86_regs * regs)
+{
+ VEFLAGS &= ~VIF_MASK;
+}
+
+static inline void clear_TF(struct vm86_regs * regs)
+{
+ regs->eflags &= ~TF_MASK;
+}
+
+static inline void set_vflags_long(unsigned long eflags, struct vm86_regs * regs)
+{
+ set_flags(VEFLAGS, eflags, current->v86mask);
+ set_flags(regs->eflags, eflags, SAFE_MASK);
+ if (eflags & IF_MASK)
+ set_IF(regs);
+}
+
+static inline void set_vflags_short(unsigned short flags, struct vm86_regs * regs)
+{
+ set_flags(VFLAGS, flags, current->v86mask);
+ set_flags(regs->eflags, flags, SAFE_MASK);
+ if (flags & IF_MASK)
+ set_IF(regs);
+}
+
+static inline unsigned long get_vflags(struct vm86_regs * regs)
+{
+ unsigned long flags = regs->eflags & RETURN_MASK;
+
+ if (VEFLAGS & VIF_MASK)
+ flags |= IF_MASK;
+ return flags | (VEFLAGS & current->v86mask);
+}
+
+static inline int is_revectored(int nr, struct revectored_struct * bitmap)
+{
+ __asm__ __volatile__("btl %2,%%fs:%1\n\tsbbl %0,%0"
+ :"=r" (nr)
+ :"m" (*bitmap),"r" (nr));
+ return nr;
+}
+
+/*
+ * Boy are these ugly, but we need to do the correct 16-bit arithmetic.
+ * Gcc makes a mess of it, so we do it inline and use non-obvious calling
+ * conventions..
+ */
+#define pushb(base, ptr, val) \
+__asm__ __volatile__( \
+ "decw %w0\n\t" \
+ "movb %2,%%fs:0(%1,%0)" \
+ : "=r" (ptr) \
+ : "r" (base), "q" (val), "0" (ptr))
+
+#define pushw(base, ptr, val) \
+__asm__ __volatile__( \
+ "decw %w0\n\t" \
+ "movb %h2,%%fs:0(%1,%0)\n\t" \
+ "decw %w0\n\t" \
+ "movb %b2,%%fs:0(%1,%0)" \
+ : "=r" (ptr) \
+ : "r" (base), "q" (val), "0" (ptr))
+
+#define pushl(base, ptr, val) \
+__asm__ __volatile__( \
+ "decw %w0\n\t" \
+ "rorl $16,%2\n\t" \
+ "movb %h2,%%fs:0(%1,%0)\n\t" \
+ "decw %w0\n\t" \
+ "movb %b2,%%fs:0(%1,%0)\n\t" \
+ "decw %w0\n\t" \
+ "rorl $16,%2\n\t" \
+ "movb %h2,%%fs:0(%1,%0)\n\t" \
+ "decw %w0\n\t" \
+ "movb %b2,%%fs:0(%1,%0)" \
+ : "=r" (ptr) \
+ : "r" (base), "q" (val), "0" (ptr))
+
+#define popb(base, ptr) \
+({ unsigned long __res; \
+__asm__ __volatile__( \
+ "movb %%fs:0(%1,%0),%b2\n\t" \
+ "incw %w0" \
+ : "=r" (ptr), "=r" (base), "=q" (__res) \
+ : "0" (ptr), "1" (base), "2" (0)); \
+__res; })
+
+#define popw(base, ptr) \
+({ unsigned long __res; \
+__asm__ __volatile__( \
+ "movb %%fs:0(%1,%0),%b2\n\t" \
+ "incw %w0\n\t" \
+ "movb %%fs:0(%1,%0),%h2\n\t" \
+ "incw %w0" \
+ : "=r" (ptr), "=r" (base), "=q" (__res) \
+ : "0" (ptr), "1" (base), "2" (0)); \
+__res; })
+
+#define popl(base, ptr) \
+({ unsigned long __res; \
+__asm__ __volatile__( \
+ "movb %%fs:0(%1,%0),%b2\n\t" \
+ "incw %w0\n\t" \
+ "movb %%fs:0(%1,%0),%h2\n\t" \
+ "incw %w0\n\t" \
+ "rorl $16,%2\n\t" \
+ "movb %%fs:0(%1,%0),%b2\n\t" \
+ "incw %w0\n\t" \
+ "movb %%fs:0(%1,%0),%h2\n\t" \
+ "incw %w0\n\t" \
+ "rorl $16,%2" \
+ : "=r" (ptr), "=r" (base), "=q" (__res) \
+ : "0" (ptr), "1" (base)); \
+__res; })
+
+static void do_int(struct vm86_regs *regs, int i, unsigned char * ssp, unsigned long sp)
+{
+ unsigned short seg = get_fs_word((void *) ((i<<2)+2));
+
+ if (seg == BIOSSEG || regs->cs == BIOSSEG ||
+ is_revectored(i, &current->vm86_info->int_revectored))
+ return_to_32bit(regs, VM86_INTx + (i << 8));
+ if (i==0x21 && is_revectored(AH(regs),&current->vm86_info->int21_revectored))
+ return_to_32bit(regs, VM86_INTx + (i << 8));
+ pushw(ssp, sp, get_vflags(regs));
+ pushw(ssp, sp, regs->cs);
+ pushw(ssp, sp, IP(regs));
+ regs->cs = seg;
+ SP(regs) -= 6;
+ IP(regs) = get_fs_word((void *) (i<<2));
+ clear_TF(regs);
+ clear_IF(regs);
+ return;
+}
+
+void handle_vm86_debug(struct vm86_regs * regs, long error_code)
+{
+#if 0
+ do_int(regs, 1, (unsigned char *) (regs->ss << 4), SP(regs));
+#else
+ if (current->flags & PF_PTRACED)
+ current->blocked &= ~(1 << (SIGTRAP-1));
+ send_sig(SIGTRAP, current, 1);
+ current->tss.trap_no = 1;
+ current->tss.error_code = error_code;
+#endif
+}
+
+void handle_vm86_fault(struct vm86_regs * regs, long error_code)
+{
+ unsigned char *csp, *ssp;
+ unsigned long ip, sp;
+
+ csp = (unsigned char *) (regs->cs << 4);
+ ssp = (unsigned char *) (regs->ss << 4);
+ sp = SP(regs);
+ ip = IP(regs);
+
+ switch (popb(csp, ip)) {
+
+ /* operand size override */
+ case 0x66:
+ switch (popb(csp, ip)) {
+
+ /* pushfd */
+ case 0x9c:
+ SP(regs) -= 4;
+ IP(regs) += 2;
+ pushl(ssp, sp, get_vflags(regs));
+ return;
+
+ /* popfd */
+ case 0x9d:
+ SP(regs) += 4;
+ IP(regs) += 2;
+ set_vflags_long(popl(ssp, sp), regs);
+ return;
+ }
+
+ /* pushf */
+ case 0x9c:
+ SP(regs) -= 2;
+ IP(regs)++;
+ pushw(ssp, sp, get_vflags(regs));
+ return;
+
+ /* popf */
+ case 0x9d:
+ SP(regs) += 2;
+ IP(regs)++;
+ set_vflags_short(popw(ssp, sp), regs);
+ return;
+
+ /* int 3 */
+ case 0xcc:
+ IP(regs)++;
+ do_int(regs, 3, ssp, sp);
+ return;
+
+ /* int xx */
+ case 0xcd:
+ IP(regs) += 2;
+ do_int(regs, popb(csp, ip), ssp, sp);
+ return;
+
+ /* iret */
+ case 0xcf:
+ SP(regs) += 6;
+ IP(regs) = popw(ssp, sp);
+ regs->cs = popw(ssp, sp);
+ set_vflags_short(popw(ssp, sp), regs);
+ return;
+
+ /* cli */
+ case 0xfa:
+ IP(regs)++;
+ clear_IF(regs);
+ return;
+
+ /* sti */
+ /*
+ * Damn. This is incorrect: the 'sti' instruction should actually
+ * enable interrupts after the /next/ instruction. Not good.
+ *
+ * Probably needs some horsing around with the TF flag. Aiee..
+ */
+ case 0xfb:
+ IP(regs)++;
+ set_IF(regs);
+ return;
+
+ default:
+ return_to_32bit(regs, VM86_UNKNOWN);
+ }
+}
diff --git a/kernel/vsprintf.c b/kernel/vsprintf.c
new file mode 100644
index 000000000..b85f78420
--- /dev/null
+++ b/kernel/vsprintf.c
@@ -0,0 +1,309 @@
+/*
+ * linux/kernel/vsprintf.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */
+/*
+ * Wirzenius wrote this portably, Torvalds fucked it up :-)
+ */
+
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+
+unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
+{
+ unsigned long result = 0,value;
+
+ if (!base) {
+ base = 10;
+ if (*cp == '0') {
+ base = 8;
+ cp++;
+ if ((*cp == 'x') && isxdigit(cp[1])) {
+ cp++;
+ base = 16;
+ }
+ }
+ }
+ while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp)
+ ? toupper(*cp) : *cp)-'A'+10) < base) {
+ result = result*base + value;
+ cp++;
+ }
+ if (endp)
+ *endp = (char *)cp;
+ return result;
+}
+
+/* we use this so that we can do without the ctype library */
+#define is_digit(c) ((c) >= '0' && (c) <= '9')
+
+static int skip_atoi(const char **s)
+{
+ int i=0;
+
+ while (is_digit(**s))
+ i = i*10 + *((*s)++) - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SPECIAL 32 /* 0x */
+#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+
+#define do_div(n,base) ({ \
+int __res; \
+__res = ((unsigned long) n) % (unsigned) base; \
+n = ((unsigned long) n) / (unsigned) base; \
+__res; })
+
+static char * number(char * str, long num, int base, int size, int precision
+ ,int type)
+{
+ char c,sign,tmp[36];
+ const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+ int i;
+
+ if (type & LARGE)
+ digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 36)
+ return 0;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++]='0';
+ else while (num != 0)
+ tmp[i++] = digits[do_div(num,base)];
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type&(ZEROPAD+LEFT)))
+ while(size-->0)
+ *str++ = ' ';
+ if (sign)
+ *str++ = sign;
+ if (type & SPECIAL)
+ if (base==8)
+ *str++ = '0';
+ else if (base==16) {
+ *str++ = '0';
+ *str++ = digits[33];
+ }
+ if (!(type & LEFT))
+ while (size-- > 0)
+ *str++ = c;
+ while (i < precision--)
+ *str++ = '0';
+ while (i-- > 0)
+ *str++ = tmp[i];
+ while (size-- > 0)
+ *str++ = ' ';
+ return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ int len;
+ unsigned long num;
+ int i, base;
+ char * str;
+ char *s;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+
+ for (str=buf ; *fmt ; ++fmt) {
+ if (*fmt != '%') {
+ *str++ = *fmt;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-': flags |= LEFT; goto repeat;
+ case '+': flags |= PLUS; goto repeat;
+ case ' ': flags |= SPACE; goto repeat;
+ case '#': flags |= SPECIAL; goto repeat;
+ case '0': flags |= ZEROPAD; goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (is_digit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (is_digit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT))
+ while (--field_width > 0)
+ *str++ = ' ';
+ *str++ = (unsigned char) va_arg(args, int);
+ while (--field_width > 0)
+ *str++ = ' ';
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ if (!s)
+ s = "<NULL>";
+ len = strlen(s);
+ if (precision < 0)
+ precision = len;
+ else if (len > precision)
+ len = precision;
+
+ if (!(flags & LEFT))
+ while (len < field_width--)
+ *str++ = ' ';
+ for (i = 0; i < len; ++i)
+ *str++ = *s++;
+ while (len < field_width--)
+ *str++ = ' ';
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2*sizeof(void *);
+ flags |= ZEROPAD;
+ }
+ str = number(str,
+ (unsigned long) va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+
+ case 'n':
+ if (qualifier == 'l') {
+ long * ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else {
+ int * ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'X':
+ flags |= LARGE;
+ case 'x':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ if (*fmt != '%')
+ *str++ = '%';
+ if (*fmt)
+ *str++ = *fmt;
+ else
+ --fmt;
+ continue;
+ }
+ if (qualifier == 'l')
+ num = va_arg(args, unsigned long);
+ else if (qualifier == 'h')
+ if (flags & SIGN)
+ num = va_arg(args, short);
+ else
+ num = va_arg(args, unsigned short);
+ else if (flags & SIGN)
+ num = va_arg(args, int);
+ else
+ num = va_arg(args, unsigned int);
+ str = number(str, num, base, field_width, precision, flags);
+ }
+ *str = '\0';
+ return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i=vsprintf(buf,fmt,args);
+ va_end(args);
+ return i;
+}
+