diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 48 | ||||
-rw-r--r-- | kernel/bios32.c | 476 | ||||
-rw-r--r-- | kernel/dma.c | 113 | ||||
-rw-r--r-- | kernel/exec_domain.c | 102 | ||||
-rw-r--r-- | kernel/exit.c | 603 | ||||
-rw-r--r-- | kernel/fork.c | 265 | ||||
-rw-r--r-- | kernel/info.c | 42 | ||||
-rw-r--r-- | kernel/ioport.c | 194 | ||||
-rw-r--r-- | kernel/irq.c | 354 | ||||
-rw-r--r-- | kernel/itimer.c | 135 | ||||
-rw-r--r-- | kernel/ksyms.c | 263 | ||||
-rw-r--r-- | kernel/ldt.c | 103 | ||||
-rw-r--r-- | kernel/module.c | 584 | ||||
-rw-r--r-- | kernel/panic.c | 32 | ||||
-rw-r--r-- | kernel/printk.c | 229 | ||||
-rw-r--r-- | kernel/ptrace.c | 517 | ||||
-rw-r--r-- | kernel/sched.c | 861 | ||||
-rw-r--r-- | kernel/signal.c | 407 | ||||
-rw-r--r-- | kernel/splx.c | 27 | ||||
-rw-r--r-- | kernel/sys.c | 787 | ||||
-rw-r--r-- | kernel/time.c | 487 | ||||
-rw-r--r-- | kernel/tqueue.c | 10 | ||||
-rw-r--r-- | kernel/traps.c | 245 | ||||
-rw-r--r-- | kernel/vm86.c | 404 | ||||
-rw-r--r-- | kernel/vsprintf.c | 309 |
25 files changed, 7597 insertions, 0 deletions
diff --git a/kernel/Makefile b/kernel/Makefile new file mode 100644 index 000000000..6de499ca7 --- /dev/null +++ b/kernel/Makefile @@ -0,0 +1,48 @@ +# +# Makefile for the linux kernel. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.S.s: + $(CPP) -traditional $< -o $*.s +.c.s: + $(CC) $(CFLAGS) -S $< +.s.o: + $(AS) -o $*.o $< +.c.o: + $(CC) $(CFLAGS) -c $< + +OBJS = sched.o entry.o traps.o irq.o dma.o fork.o exec_domain.o \ + panic.o printk.o vsprintf.o sys.o module.o ksyms.o exit.o \ + signal.o ptrace.o ioport.o itimer.o \ + info.o ldt.o time.o tqueue.o vm86.o bios32.o splx.o + +all: kernel.o + +kernel.o: $(OBJS) + $(LD) -r -o kernel.o $(OBJS) + sync + +entry.s: entry.S + +entry.o: entry.s + +sched.o: sched.c + $(CC) $(CFLAGS) $(PROFILING) -fno-omit-frame-pointer -c $< + +dep: + $(CPP) -M *.c > .depend + +dummy: + +# +# include a dependency file if one exists +# +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + diff --git a/kernel/bios32.c b/kernel/bios32.c new file mode 100644 index 000000000..311dd111e --- /dev/null +++ b/kernel/bios32.c @@ -0,0 +1,476 @@ +/* + * bios32.c - BIOS32, PCI BIOS functions. + * + * Sponsored by + * iX Multiuser Multitasking Magazine + * Hannover, Germany + * hm@ix.de + * + * Copyright 1993, 1994 Drew Eckhardt + * Visionary Computing + * (Unix and Linux consulting and custom programming) + * Drew@Colorado.EDU + * +1 (303) 786-7975 + * + * For more information, please consult + * + * PCI BIOS Specification Revision + * PCI Local Bus Specification + * PCI System Design Guide + * + * PCI Special Interest Group + * M/S HF3-15A + * 5200 N.E. Elam Young Parkway + * Hillsboro, Oregon 97124-6497 + * +1 (503) 696-2000 + * +1 (800) 433-5177 + * + * Manuals are $25 each or $50 for all three, plus $7 shipping + * within the United States, $35 abroad. + * + * + * CHANGELOG : + * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION + * Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard. + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/bios32.h> +#include <linux/pci.h> + +#include <asm/segment.h> + +/* + * It would seem some PCI bioses are buggy, so we don't actually use these + * routines unless we need to.. + */ +#ifdef CONFIG_SCSI_NCR53C7xx + #define CONFIG_PCI +#else + #undef CONFIG_PCI +#endif + +#define PCIBIOS_PCI_FUNCTION_ID 0xb1XX +#define PCIBIOS_PCI_BIOS_PRESENT 0xb101 +#define PCIBIOS_FIND_PCI_DEVICE 0xb102 +#define PCIBIOS_FIND_PCI_CLASS_CODE 0xb103 +#define PCIBIOS_GENERATE_SPECIAL_CYCLE 0xb106 +#define PCIBIOS_READ_CONFIG_BYTE 0xb108 +#define PCIBIOS_READ_CONFIG_WORD 0xb109 +#define PCIBIOS_READ_CONFIG_DWORD 0xb10a +#define PCIBIOS_WRITE_CONFIG_BYTE 0xb10b +#define PCIBIOS_WRITE_CONFIG_WORD 0xb10c +#define PCIBIOS_WRITE_CONFIG_DWORD 0xb10d + +/* BIOS32 signature: "_32_" */ +#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) + +/* PCI signature: "PCI " */ +#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24)) + +/* PCI service signature: "$PCI" */ +#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24)) + +/* + * This is the standard structure used to identify the entry point + * to the BIOS32 Service Directory, as documented in + * Standard BIOS 32-bit Service Directory Proposal + * Revision 0.4 May 24, 1993 + * Phoenix Technologies Ltd. + * Norwood, MA + * and the PCI BIOS specification. + */ + +union bios32 { + struct { + unsigned long signature; /* _32_ */ + unsigned long entry; /* 32 bit physical address */ + unsigned char revision; /* Revision level, 0 */ + unsigned char length; /* Length in paragraphs should be 01 */ + unsigned char checksum; /* All bytes must add up to zero */ + unsigned char reserved[5]; /* Must be zero */ + } fields; + char chars[16]; +}; + +/* + * Physical address of the service directory. I don't know if we're + * allowed to have more than one of these or not, so just in case + * we'll make bios32_init() take a memory start parameter and store + * the array there. + */ + +static unsigned long bios32_entry = 0; +static struct { + unsigned long address; + unsigned short segment; +} bios32_indirect = { 0, KERNEL_CS }; + +#ifdef CONFIG_PCI +/* + * Returns the entry point for the given service, NULL on error + */ + +static unsigned long bios32_service(unsigned long service) +{ + unsigned char return_code; /* %al */ + unsigned long address; /* %ebx */ + unsigned long length; /* %ecx */ + unsigned long entry; /* %edx */ + + __asm__("lcall (%%edi)" + : "=a" (return_code), + "=b" (address), + "=c" (length), + "=d" (entry) + : "0" (service), + "1" (0), + "D" (&bios32_indirect)); + + switch (return_code) { + case 0: + return address + entry; + case 0x80: /* Not present */ + printk("bios32_service(%ld) : not present\n", service); + return 0; + default: /* Shouldn't happen */ + printk("bios32_service(%ld) : returned 0x%x, mail drew@colorado.edu\n", + service, return_code); + return 0; + } +} + +static long pcibios_entry = 0; +static struct { + unsigned long address; + unsigned short segment; +} pci_indirect = { 0, KERNEL_CS }; + +void NCR53c810_test(void); + +static unsigned long pcibios_init(unsigned long memory_start, unsigned long memory_end) +{ + unsigned long signature; + unsigned char present_status; + unsigned char major_revision; + unsigned char minor_revision; + int pack; + + if ((pcibios_entry = bios32_service(PCI_SERVICE))) { + pci_indirect.address = pcibios_entry; + + __asm__("lcall (%%edi)\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:\tshl $8, %%eax\n\t" + "movw %%bx, %%ax" + : "=d" (signature), + "=a" (pack) + : "1" (PCIBIOS_PCI_BIOS_PRESENT), + "D" (&pci_indirect) + : "bx", "cx"); + + present_status = (pack >> 16) & 0xff; + major_revision = (pack >> 8) & 0xff; + minor_revision = pack & 0xff; + if (present_status || (signature != PCI_SIGNATURE)) { + printk ("pcibios_init : %s : BIOS32 Service Directory says PCI BIOS is present,\n" + " but PCI_BIOS_PRESENT subfunction fails with present status of 0x%x\n" + " and signature of 0x%08lx (%c%c%c%c). mail drew@Colorado.EDU\n", + (signature == PCI_SIGNATURE) ? "WARNING" : "ERROR", + present_status, signature, + (char) (signature >> 0), (char) (signature >> 8), + (char) (signature >> 16), (char) (signature >> 24)); + + if (signature != PCI_SIGNATURE) + pcibios_entry = 0; + } + if (pcibios_entry) { + printk ("pcibios_init : PCI BIOS revision %x.%02x entry at 0x%lx\n", + major_revision, minor_revision, pcibios_entry); + } + } + +#if 0 + NCR53c810_test(); +#endif + return memory_start; +} + +int pcibios_present(void) +{ + return pcibios_entry ? 1 : 0; +} + +int pcibios_find_class_code (unsigned long class_code, unsigned short index, + unsigned char *bus, unsigned char *device_fn) +{ + unsigned long bx; + unsigned long ret; + + __asm__ ("lcall (%%edi)\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=b" (bx), + "=a" (ret) + : "1" (PCIBIOS_FIND_PCI_CLASS_CODE), + "c" (class_code), + "S" ((int) index), + "D" (&pci_indirect)); + *bus = (bx >> 8) & 0xff; + *device_fn = bx & 0xff; + return (int) (ret & 0xff00) >> 8; +} + + +int pcibios_find_device (unsigned short vendor, unsigned short device_id, + unsigned short index, unsigned char *bus, unsigned char *device_fn) +{ + unsigned short bx; + unsigned short ret; + + __asm__("lcall (%%edi)\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=b" (bx), + "=a" (ret) + : "1" (PCIBIOS_FIND_PCI_DEVICE), + "c" (device_id), + "d" (vendor), + "S" ((int) index), + "D" (&pci_indirect)); + *bus = (bx >> 8) & 0xff; + *device_fn = bx & 0xff; + return (int) (ret & 0xff00) >> 8; +} + +int pcibios_read_config_byte(unsigned char bus, + unsigned char device_fn, unsigned char where, unsigned char *value) +{ + unsigned long ret; + unsigned long bx = (bus << 8) | device_fn; + + __asm__("lcall (%%esi)\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (ret) + : "1" (PCIBIOS_READ_CONFIG_BYTE), + "b" (bx), + "D" ((long) where), + "S" (&pci_indirect)); + return (int) (ret & 0xff00) >> 8; +} + +int pcibios_read_config_word (unsigned char bus, + unsigned char device_fn, unsigned char where, unsigned short *value) +{ + unsigned long ret; + unsigned long bx = (bus << 8) | device_fn; + + __asm__("lcall (%%esi)\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (ret) + : "1" (PCIBIOS_READ_CONFIG_WORD), + "b" (bx), + "D" ((long) where), + "S" (&pci_indirect)); + return (int) (ret & 0xff00) >> 8; +} + +int pcibios_read_config_dword (unsigned char bus, + unsigned char device_fn, unsigned char where, unsigned long *value) +{ + unsigned long ret; + unsigned long bx = (bus << 8) | device_fn; + + __asm__("lcall (%%esi)\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (ret) + : "1" (PCIBIOS_READ_CONFIG_DWORD), + "b" (bx), + "D" ((long) where), + "S" (&pci_indirect)); + return (int) (ret & 0xff00) >> 8; +} + +int pcibios_write_config_byte (unsigned char bus, + unsigned char device_fn, unsigned char where, unsigned char value) +{ + unsigned long ret; + unsigned long bx = (bus << 8) | device_fn; + + __asm__("lcall (%%esi)\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret) + : "0" (PCIBIOS_WRITE_CONFIG_BYTE), + "c" (value), + "b" (bx), + "D" ((long) where), + "S" (&pci_indirect)); + return (int) (ret & 0xff00) >> 8; +} + +int pcibios_write_config_word (unsigned char bus, + unsigned char device_fn, unsigned char where, unsigned short value) +{ + unsigned long ret; + unsigned long bx = (bus << 8) | device_fn; + + __asm__("lcall (%%esi)\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret) + : "0" (PCIBIOS_WRITE_CONFIG_WORD), + "c" (value), + "b" (bx), + "D" ((long) where), + "S" (&pci_indirect)); + return (int) (ret & 0xff00) >> 8; +} + +int pcibios_write_config_dword (unsigned char bus, + unsigned char device_fn, unsigned char where, unsigned long value) +{ + unsigned long ret; + unsigned long bx = (bus << 8) | device_fn; + + __asm__("lcall (%%esi)\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret) + : "0" (PCIBIOS_WRITE_CONFIG_DWORD), + "c" (value), + "b" (bx), + "D" ((long) where), + "S" (&pci_indirect)); + return (int) (ret & 0xff00) >> 8; +} + +void NCR53c810_test(void) +{ + unsigned char bus, device_fn; + unsigned short index; + int ret; + unsigned char row, col; + unsigned long val; + + for (index = 0; index < 4; ++index) { + ret = pcibios_find_device ( + (unsigned short) PCI_VENDOR_ID_NCR, + (unsigned short) PCI_DEVICE_ID_NCR_53C810, + index, &bus, &device_fn); + if (ret) + break; + printk ("ncr53c810 : at PCI bus %d, device %d, function %d.", + bus, ((device_fn & 0xf8) >> 3), (device_fn & 7)); + for (row = 0; row < 0x3c; row += 0x10) { + printk ("\n reg 0x%02x ", row); + for (col = 0; col < 0x10; col += 4) { + if (!(ret = pcibios_read_config_dword (bus, device_fn, row+col, &val))) + printk ("0x%08lx ", val); + else + printk ("error 0x%02x ", ret); + } + } + printk ("\n"); + } +} + +char *pcibios_strerror (int error) +{ + static char buf[80]; + + switch (error) { + case PCIBIOS_SUCCESSFUL: + return "SUCCESSFUL"; + + case PCIBIOS_FUNC_NOT_SUPPORTED: + return "FUNC_NOT_SUPPORTED"; + + case PCIBIOS_BAD_VENDOR_ID: + return "SUCCESSFUL"; + + case PCIBIOS_DEVICE_NOT_FOUND: + return "DEVICE_NOT_FOUND"; + + case PCIBIOS_BAD_REGISTER_NUMBER: + return "BAD_REGISTER_NUMBER"; + + default: + sprintf (buf, "UNKNOWN RETURN 0x%x", error); + return buf; + } +} + +#endif + +unsigned long bios32_init(unsigned long memory_start, unsigned long memory_end) +{ + union bios32 *check; + unsigned char sum; + int i, length; + + /* + * Follow the standard procedure for locating the BIOS32 Service + * directory by scanning the permissible address range from + * 0xe0000 through 0xfffff for a valid BIOS32 structure. + * + * The PCI BIOS doesn't seem to work too well on many machines, + * so we disable this unless it's really needed (NCR SCSI driver) + */ + + for (check = (union bios32 *) 0xe0000; check <= (union bios32 *) 0xffff0; ++check) { + if (check->fields.signature != BIOS32_SIGNATURE) + continue; + length = check->fields.length * 16; + if (!length) + continue; + sum = 0; + for (i = 0; i < length ; ++i) + sum += check->chars[i]; + if (sum != 0) + continue; + if (check->fields.revision != 0) { + printk("bios32_init : unsupported revision %d at 0x%p, mail drew@colorado.edu\n", + check->fields.revision, check); + continue; + } + printk ("bios32_init : BIOS32 Service Directory structure at 0x%p\n", check); + if (!bios32_entry) { + bios32_indirect.address = bios32_entry = check->fields.entry; + printk ("bios32_init : BIOS32 Service Directory entry at 0x%lx\n", bios32_entry); + } else { + printk ("bios32_init : multiple entries, mail drew@colorado.edu\n"); + /* + * Jeremy Fitzhardinge reports at least one PCI BIOS + * with two different service directories, and as both + * worked for him, we'll just mention the fact, and + * not actually disallow it.. + */ +#if 0 + return memory_start; +#endif + } + } +#ifdef CONFIG_PCI + if (bios32_entry) { + memory_start = pcibios_init (memory_start, memory_end); + } +#endif + return memory_start; +} diff --git a/kernel/dma.c b/kernel/dma.c new file mode 100644 index 000000000..ce80c2fa6 --- /dev/null +++ b/kernel/dma.c @@ -0,0 +1,113 @@ +/* $Id: dma.c,v 1.5 1992/11/18 02:49:05 root Exp root $ + * linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c. + * Written by Hennus Bergman, 1992. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <asm/dma.h> + + +/* A note on resource allocation: + * + * All drivers needing DMA channels, should allocate and release them + * through the public routines `request_dma()' and `free_dma()'. + * + * In order to avoid problems, all processes should allocate resources in + * the same sequence and release them in the reverse order. + * + * So, when allocating DMAs and IRQs, first allocate the IRQ, then the DMA. + * When releasing them, first release the DMA, then release the IRQ. + * If you don't, you may cause allocation requests to fail unnecessarily. + * This doesn't really matter now, but it will once we get real semaphores + * in the kernel. + */ + + + +/* Channel n is busy iff dma_chan_busy[n] != 0. + * DMA0 used to be reserved for DRAM refresh, but apparently not any more... + * DMA4 is reserved for cascading. + */ +/* +static volatile unsigned int dma_chan_busy[MAX_DMA_CHANNELS] = { + 0, 0, 0, 0, 1, 0, 0, 0 +}; +*/ +static volatile char * dma_chan_busy[MAX_DMA_CHANNELS] = { + 0, + 0, + 0, + 0, + "cascade", + 0, + 0, + 0 +}; + +/* Atomically swap memory location [32 bits] with `newval'. + * This avoid the cli()/sti() junk and related problems. + * [And it's faster too :-)] + * Maybe this should be in include/asm/mutex.h and be used for + * implementing kernel-semaphores as well. + */ +static __inline__ unsigned int mutex_atomic_swap(volatile unsigned int * p, unsigned int newval) +{ + unsigned int semval = newval; + + /* If one of the operands for the XCHG instructions is a memory ref, + * it makes the swap an uninterruptible RMW cycle. + * + * One operand must be in memory, the other in a register, otherwise + * the swap may not be atomic. + */ + + asm __volatile__ ("xchgl %2, %0\n" + : /* outputs: semval */ "=r" (semval) + : /* inputs: newval, p */ "0" (semval), "m" (*p) + ); /* p is a var, containing an address */ + return semval; +} /* mutex_atomic_swap */ + + +int get_dma_list(char *buf) +{ + int i, len = 0; + + for (i = 0 ; i < MAX_DMA_CHANNELS ; i++) { + if (dma_chan_busy[i]) { + len += sprintf(buf+len, "%2d: %s\n", + i, + dma_chan_busy[i]); + } + } + return len; +} + +int request_dma(unsigned int dmanr, char * deviceID) +{ + if (dmanr >= MAX_DMA_CHANNELS) + return -EINVAL; + + if (mutex_atomic_swap((unsigned int *) &dma_chan_busy[dmanr], (unsigned int) deviceID) != 0) + return -EBUSY; + + /* old flag was 0, now contains 1 to indicate busy */ + return 0; +} /* request_dma */ + + +void free_dma(unsigned int dmanr) +{ + if (dmanr >= MAX_DMA_CHANNELS) { + printk("Trying to free DMA%d\n", dmanr); + return; + } + + if (mutex_atomic_swap((unsigned int *) &dma_chan_busy[dmanr], 0) == 0) { + printk("Trying to free free DMA%d\n", dmanr); + return; + } + +} /* free_dma */ + diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c new file mode 100644 index 000000000..c80423314 --- /dev/null +++ b/kernel/exec_domain.c @@ -0,0 +1,102 @@ +#include <linux/personality.h> +#include <linux/ptrace.h> +#include <linux/sched.h> + + +static asmlinkage void no_lcall7(struct pt_regs * regs); + + +static unsigned long ident_map[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +struct exec_domain default_exec_domain = { + "Linux", /* name */ + no_lcall7, /* lcall7 causes a seg fault. */ + 0, 0xff, /* All personalities. */ + ident_map, /* Identity map signals. */ + ident_map, /* - both ways. */ + NULL, /* No usage counter. */ + NULL /* Nothing after this in the list. */ +}; + +static struct exec_domain *exec_domains = &default_exec_domain; + + +static asmlinkage void no_lcall7(struct pt_regs * regs) +{ + send_sig(SIGSEGV, current, 1); +} + +struct exec_domain *lookup_exec_domain(unsigned long personality) +{ + unsigned long pers = personality & PER_MASK; + struct exec_domain *it; + + for (it=exec_domains; it; it=it->next) + if (pers >= it->pers_low + && pers <= it->pers_high) + return it; + + /* Should never get this far. */ + printk(KERN_ERR "No execution domain for personality 0x%02lx\n", pers); + return NULL; +} + +int register_exec_domain(struct exec_domain *it) +{ + struct exec_domain *tmp; + + if (!it) + return -EINVAL; + if (it->next) + return -EBUSY; + for (tmp=exec_domains; tmp; tmp=tmp->next) + if (tmp == it) + return -EBUSY; + it->next = exec_domains; + exec_domains = it; + return 0; +} + +int unregister_exec_domain(struct exec_domain *it) +{ + struct exec_domain ** tmp; + + tmp = &exec_domains; + while (*tmp) { + if (it == *tmp) { + *tmp = it->next; + it->next = NULL; + return 0; + } + tmp = &(*tmp)->next; + } + return -EINVAL; +} + +asmlinkage int sys_personality(unsigned long personality) +{ + struct exec_domain *it; + unsigned long old_personality; + + if (personality == 0xffffffff) + return current->personality; + + it = lookup_exec_domain(personality); + if (!it) + return -EINVAL; + + old_personality = current->personality; + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)--; + current->personality = personality; + current->exec_domain = it; + if (current->exec_domain->use_count) + (*current->exec_domain->use_count)++; + + return old_personality; +} diff --git a/kernel/exit.c b/kernel/exit.c new file mode 100644 index 000000000..b2a8c4fb0 --- /dev/null +++ b/kernel/exit.c @@ -0,0 +1,603 @@ +/* + * linux/kernel/exit.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#define DEBUG_PROC_TREE + +#include <linux/wait.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/resource.h> +#include <linux/mm.h> +#include <linux/tty.h> +#include <linux/malloc.h> + +#include <asm/segment.h> +extern void sem_exit (void); + +int getrusage(struct task_struct *, int, struct rusage *); + +static int generate(unsigned long sig, struct task_struct * p) +{ + unsigned long mask = 1 << (sig-1); + struct sigaction * sa = sig + p->sigaction - 1; + + /* always generate signals for traced processes ??? */ + if (p->flags & PF_PTRACED) { + p->signal |= mask; + return 1; + } + /* don't bother with ignored signals (but SIGCHLD is special) */ + if (sa->sa_handler == SIG_IGN && sig != SIGCHLD) + return 0; + /* some signals are ignored by default.. (but SIGCONT already did its deed) */ + if ((sa->sa_handler == SIG_DFL) && + (sig == SIGCONT || sig == SIGCHLD || sig == SIGWINCH)) + return 0; + p->signal |= mask; + return 1; +} + +int send_sig(unsigned long sig,struct task_struct * p,int priv) +{ + if (!p || sig > 32) + return -EINVAL; + if (!priv && ((sig != SIGCONT) || (current->session != p->session)) && + (current->euid != p->euid) && (current->uid != p->uid) && !suser()) + return -EPERM; + if (!sig) + return 0; + /* + * Forget it if the process is already zombie'd. + */ + if (p->state == TASK_ZOMBIE) + return 0; + if ((sig == SIGKILL) || (sig == SIGCONT)) { + if (p->state == TASK_STOPPED) + p->state = TASK_RUNNING; + p->exit_code = 0; + p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) | + (1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) ); + } + /* Depends on order SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU */ + if ((sig >= SIGSTOP) && (sig <= SIGTTOU)) + p->signal &= ~(1<<(SIGCONT-1)); + /* Actually generate the signal */ + generate(sig,p); + return 0; +} + +void notify_parent(struct task_struct * tsk) +{ + if (tsk->p_pptr == task[1]) + tsk->exit_signal = SIGCHLD; + send_sig(tsk->exit_signal, tsk->p_pptr, 1); + wake_up_interruptible(&tsk->p_pptr->wait_chldexit); +} + +void release(struct task_struct * p) +{ + int i; + + if (!p) + return; + if (p == current) { + printk("task releasing itself\n"); + return; + } + for (i=1 ; i<NR_TASKS ; i++) + if (task[i] == p) { + task[i] = NULL; + REMOVE_LINKS(p); + if (STACK_MAGIC != *(unsigned long *)p->kernel_stack_page) + printk(KERN_ALERT "release: %s kernel stack corruption. Aiee\n", p->comm); + free_page(p->kernel_stack_page); + free_page((long) p); + return; + } + panic("trying to release non-existent task"); +} + +#ifdef DEBUG_PROC_TREE +/* + * Check to see if a task_struct pointer is present in the task[] array + * Return 0 if found, and 1 if not found. + */ +int bad_task_ptr(struct task_struct *p) +{ + int i; + + if (!p) + return 0; + for (i=0 ; i<NR_TASKS ; i++) + if (task[i] == p) + return 0; + return 1; +} + +/* + * This routine scans the pid tree and makes sure the rep invariant still + * holds. Used for debugging only, since it's very slow.... + * + * It looks a lot scarier than it really is.... we're doing nothing more + * than verifying the doubly-linked list found in p_ysptr and p_osptr, + * and checking it corresponds with the process tree defined by p_cptr and + * p_pptr; + */ +void audit_ptree(void) +{ + int i; + + for (i=1 ; i<NR_TASKS ; i++) { + if (!task[i]) + continue; + if (bad_task_ptr(task[i]->p_pptr)) + printk("Warning, pid %d's parent link is bad\n", + task[i]->pid); + if (bad_task_ptr(task[i]->p_cptr)) + printk("Warning, pid %d's child link is bad\n", + task[i]->pid); + if (bad_task_ptr(task[i]->p_ysptr)) + printk("Warning, pid %d's ys link is bad\n", + task[i]->pid); + if (bad_task_ptr(task[i]->p_osptr)) + printk("Warning, pid %d's os link is bad\n", + task[i]->pid); + if (task[i]->p_pptr == task[i]) + printk("Warning, pid %d parent link points to self\n", + task[i]->pid); + if (task[i]->p_cptr == task[i]) + printk("Warning, pid %d child link points to self\n", + task[i]->pid); + if (task[i]->p_ysptr == task[i]) + printk("Warning, pid %d ys link points to self\n", + task[i]->pid); + if (task[i]->p_osptr == task[i]) + printk("Warning, pid %d os link points to self\n", + task[i]->pid); + if (task[i]->p_osptr) { + if (task[i]->p_pptr != task[i]->p_osptr->p_pptr) + printk( + "Warning, pid %d older sibling %d parent is %d\n", + task[i]->pid, task[i]->p_osptr->pid, + task[i]->p_osptr->p_pptr->pid); + if (task[i]->p_osptr->p_ysptr != task[i]) + printk( + "Warning, pid %d older sibling %d has mismatched ys link\n", + task[i]->pid, task[i]->p_osptr->pid); + } + if (task[i]->p_ysptr) { + if (task[i]->p_pptr != task[i]->p_ysptr->p_pptr) + printk( + "Warning, pid %d younger sibling %d parent is %d\n", + task[i]->pid, task[i]->p_osptr->pid, + task[i]->p_osptr->p_pptr->pid); + if (task[i]->p_ysptr->p_osptr != task[i]) + printk( + "Warning, pid %d younger sibling %d has mismatched os link\n", + task[i]->pid, task[i]->p_ysptr->pid); + } + if (task[i]->p_cptr) { + if (task[i]->p_cptr->p_pptr != task[i]) + printk( + "Warning, pid %d youngest child %d has mismatched parent link\n", + task[i]->pid, task[i]->p_cptr->pid); + if (task[i]->p_cptr->p_ysptr) + printk( + "Warning, pid %d youngest child %d has non-NULL ys link\n", + task[i]->pid, task[i]->p_cptr->pid); + } + } +} +#endif /* DEBUG_PROC_TREE */ + +/* + * This checks not only the pgrp, but falls back on the pid if no + * satisfactory pgrp is found. I dunno - gdb doesn't work correctly + * without this... + */ +int session_of_pgrp(int pgrp) +{ + struct task_struct *p; + int fallback; + + fallback = -1; + for_each_task(p) { + if (p->session <= 0) + continue; + if (p->pgrp == pgrp) + return p->session; + if (p->pid == pgrp) + fallback = p->session; + } + return fallback; +} + +/* + * kill_pg() sends a signal to a process group: this is what the tty + * control characters do (^C, ^Z etc) + */ +int kill_pg(int pgrp, int sig, int priv) +{ + struct task_struct *p; + int err,retval = -ESRCH; + int found = 0; + + if (sig<0 || sig>32 || pgrp<=0) + return -EINVAL; + for_each_task(p) { + if (p->pgrp == pgrp) { + if ((err = send_sig(sig,p,priv)) != 0) + retval = err; + else + found++; + } + } + return(found ? 0 : retval); +} + +/* + * kill_sl() sends a signal to the session leader: this is used + * to send SIGHUP to the controlling process of a terminal when + * the connection is lost. + */ +int kill_sl(int sess, int sig, int priv) +{ + struct task_struct *p; + int err,retval = -ESRCH; + int found = 0; + + if (sig<0 || sig>32 || sess<=0) + return -EINVAL; + for_each_task(p) { + if (p->session == sess && p->leader) { + if ((err = send_sig(sig,p,priv)) != 0) + retval = err; + else + found++; + } + } + return(found ? 0 : retval); +} + +int kill_proc(int pid, int sig, int priv) +{ + struct task_struct *p; + + if (sig<0 || sig>32) + return -EINVAL; + for_each_task(p) { + if (p && p->pid == pid) + return send_sig(sig,p,priv); + } + return(-ESRCH); +} + +/* + * POSIX specifies that kill(-1,sig) is unspecified, but what we have + * is probably wrong. Should make it like BSD or SYSV. + */ +asmlinkage int sys_kill(int pid,int sig) +{ + int err, retval = 0, count = 0; + + if (!pid) + return(kill_pg(current->pgrp,sig,0)); + if (pid == -1) { + struct task_struct * p; + for_each_task(p) { + if (p->pid > 1 && p != current) { + ++count; + if ((err = send_sig(sig,p,0)) != -EPERM) + retval = err; + } + } + return(count ? retval : -ESRCH); + } + if (pid < 0) + return(kill_pg(-pid,sig,0)); + /* Normal kill */ + return(kill_proc(pid,sig,0)); +} + +/* + * Determine if a process group is "orphaned", according to the POSIX + * definition in 2.2.2.52. Orphaned process groups are not to be affected + * by terminal-generated stop signals. Newly orphaned process groups are + * to receive a SIGHUP and a SIGCONT. + * + * "I ask you, have you ever known what it is to be an orphan?" + */ +int is_orphaned_pgrp(int pgrp) +{ + struct task_struct *p; + + for_each_task(p) { + if ((p->pgrp != pgrp) || + (p->state == TASK_ZOMBIE) || + (p->p_pptr->pid == 1)) + continue; + if ((p->p_pptr->pgrp != pgrp) && + (p->p_pptr->session == p->session)) + return 0; + } + return(1); /* (sighing) "Often!" */ +} + +static int has_stopped_jobs(int pgrp) +{ + struct task_struct * p; + + for_each_task(p) { + if (p->pgrp != pgrp) + continue; + if (p->state == TASK_STOPPED) + return(1); + } + return(0); +} + +static void forget_original_parent(struct task_struct * father) +{ + struct task_struct * p; + + for_each_task(p) { + if (p->p_opptr == father) + if (task[1]) + p->p_opptr = task[1]; + else + p->p_opptr = task[0]; + } +} + +static void exit_mm(void) +{ + struct vm_area_struct * mpnt; + + mpnt = current->mm->mmap; + current->mm->mmap = NULL; + while (mpnt) { + struct vm_area_struct * next = mpnt->vm_next; + if (mpnt->vm_ops && mpnt->vm_ops->close) + mpnt->vm_ops->close(mpnt); + if (mpnt->vm_inode) + iput(mpnt->vm_inode); + kfree(mpnt); + mpnt = next; + } + + /* forget local segments */ + __asm__ __volatile__("mov %w0,%%fs ; mov %w0,%%gs ; lldt %w0" + : /* no outputs */ + : "r" (0)); + current->tss.ldt = 0; + if (current->ldt) { + void * ldt = current->ldt; + current->ldt = NULL; + vfree(ldt); + } + + free_page_tables(current); +} + +static void exit_files(void) +{ + int i; + + for (i=0 ; i<NR_OPEN ; i++) + if (current->files->fd[i]) + sys_close(i); +} + +static void exit_fs(void) +{ + iput(current->fs->pwd); + current->fs->pwd = NULL; + iput(current->fs->root); + current->fs->root = NULL; +} + +NORET_TYPE void do_exit(long code) +{ + struct task_struct *p; + + if (intr_count) { + printk("Aiee, killing interrupt handler\n"); + intr_count = 0; + } +fake_volatile: + if (current->semundo) + sem_exit(); + exit_mm(); + exit_files(); + exit_fs(); + forget_original_parent(current); + /* + * Check to see if any process groups have become orphaned + * as a result of our exiting, and if they have any stopped + * jobs, send them a SIGUP and then a SIGCONT. (POSIX 3.2.2.2) + * + * Case i: Our father is in a different pgrp than we are + * and we were the only connection outside, so our pgrp + * is about to become orphaned. + */ + if ((current->p_pptr->pgrp != current->pgrp) && + (current->p_pptr->session == current->session) && + is_orphaned_pgrp(current->pgrp) && + has_stopped_jobs(current->pgrp)) { + kill_pg(current->pgrp,SIGHUP,1); + kill_pg(current->pgrp,SIGCONT,1); + } + /* Let father know we died */ + notify_parent(current); + + /* + * This loop does two things: + * + * A. Make init inherit all the child processes + * B. Check to see if any process groups have become orphaned + * as a result of our exiting, and if they have any stopped + * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) + */ + while ((p = current->p_cptr) != NULL) { + current->p_cptr = p->p_osptr; + p->p_ysptr = NULL; + p->flags &= ~(PF_PTRACED|PF_TRACESYS); + if (task[1] && task[1] != current) + p->p_pptr = task[1]; + else + p->p_pptr = task[0]; + p->p_osptr = p->p_pptr->p_cptr; + p->p_osptr->p_ysptr = p; + p->p_pptr->p_cptr = p; + if (p->state == TASK_ZOMBIE) + notify_parent(p); + /* + * process group orphan check + * Case ii: Our child is in a different pgrp + * than we are, and it was the only connection + * outside, so the child pgrp is now orphaned. + */ + if ((p->pgrp != current->pgrp) && + (p->session == current->session) && + is_orphaned_pgrp(p->pgrp) && + has_stopped_jobs(p->pgrp)) { + kill_pg(p->pgrp,SIGHUP,1); + kill_pg(p->pgrp,SIGCONT,1); + } + } + if (current->leader) + disassociate_ctty(1); + if (last_task_used_math == current) + last_task_used_math = NULL; + current->state = TASK_ZOMBIE; + current->exit_code = code; + current->mm->rss = 0; +#ifdef DEBUG_PROC_TREE + audit_ptree(); +#endif + if (current->exec_domain && current->exec_domain->use_count) + (*current->exec_domain->use_count)--; + if (current->binfmt && current->binfmt->use_count) + (*current->binfmt->use_count)--; + schedule(); +/* + * In order to get rid of the "volatile function does return" message + * I did this little loop that confuses gcc to think do_exit really + * is volatile. In fact it's schedule() that is volatile in some + * circumstances: when current->state = ZOMBIE, schedule() never + * returns. + * + * In fact the natural way to do all this is to have the label and the + * goto right after each other, but I put the fake_volatile label at + * the start of the function just in case something /really/ bad + * happens, and the schedule returns. This way we can try again. I'm + * not paranoid: it's just that everybody is out to get me. + */ + goto fake_volatile; +} + +asmlinkage int sys_exit(int error_code) +{ + do_exit((error_code&0xff)<<8); +} + +asmlinkage int sys_wait4(pid_t pid,unsigned long * stat_addr, int options, struct rusage * ru) +{ + int flag, retval; + struct wait_queue wait = { current, NULL }; + struct task_struct *p; + + if (stat_addr) { + flag = verify_area(VERIFY_WRITE, stat_addr, 4); + if (flag) + return flag; + } + add_wait_queue(¤t->wait_chldexit,&wait); +repeat: + flag=0; + for (p = current->p_cptr ; p ; p = p->p_osptr) { + if (pid>0) { + if (p->pid != pid) + continue; + } else if (!pid) { + if (p->pgrp != current->pgrp) + continue; + } else if (pid != -1) { + if (p->pgrp != -pid) + continue; + } + /* wait for cloned processes iff the __WCLONE flag is set */ + if ((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) + continue; + flag = 1; + switch (p->state) { + case TASK_STOPPED: + if (!p->exit_code) + continue; + if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED)) + continue; + if (stat_addr) + put_fs_long((p->exit_code << 8) | 0x7f, + stat_addr); + p->exit_code = 0; + if (ru != NULL) + getrusage(p, RUSAGE_BOTH, ru); + retval = p->pid; + goto end_wait4; + case TASK_ZOMBIE: + current->cutime += p->utime + p->cutime; + current->cstime += p->stime + p->cstime; + current->mm->cmin_flt += p->mm->min_flt + p->mm->cmin_flt; + current->mm->cmaj_flt += p->mm->maj_flt + p->mm->cmaj_flt; + if (ru != NULL) + getrusage(p, RUSAGE_BOTH, ru); + flag = p->pid; + if (stat_addr) + put_fs_long(p->exit_code, stat_addr); + if (p->p_opptr != p->p_pptr) { + REMOVE_LINKS(p); + p->p_pptr = p->p_opptr; + SET_LINKS(p); + notify_parent(p); + } else + release(p); +#ifdef DEBUG_PROC_TREE + audit_ptree(); +#endif + retval = flag; + goto end_wait4; + default: + continue; + } + } + if (flag) { + retval = 0; + if (options & WNOHANG) + goto end_wait4; + current->state=TASK_INTERRUPTIBLE; + schedule(); + current->signal &= ~(1<<(SIGCHLD-1)); + retval = -ERESTARTSYS; + if (current->signal & ~current->blocked) + goto end_wait4; + goto repeat; + } + retval = -ECHILD; +end_wait4: + remove_wait_queue(¤t->wait_chldexit,&wait); + return retval; +} + +/* + * sys_waitpid() remains for compatibility. waitpid() should be + * implemented by calling sys_wait4() from libc.a. + */ +asmlinkage int sys_waitpid(pid_t pid,unsigned long * stat_addr, int options) +{ + return sys_wait4(pid, stat_addr, options, NULL); +} diff --git a/kernel/fork.c b/kernel/fork.c new file mode 100644 index 000000000..63a54e999 --- /dev/null +++ b/kernel/fork.c @@ -0,0 +1,265 @@ +/* + * linux/kernel/fork.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * 'fork.c' contains the help-routines for the 'fork' system call + * (see also system_call.s). + * Fork is rather simple, once you get the hang of it, but the memory + * management can be a bitch. See 'mm/mm.c': 'copy_page_tables()' + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/malloc.h> +#include <linux/ldt.h> + +#include <asm/segment.h> +#include <asm/system.h> + +asmlinkage void ret_from_sys_call(void) __asm__("ret_from_sys_call"); + +/* These should maybe be in <linux/tasks.h> */ + +#define MAX_TASKS_PER_USER (NR_TASKS/2) +#define MIN_TASKS_LEFT_FOR_ROOT 4 + +long last_pid=0; + +static int find_empty_process(void) +{ + int free_task; + int i, tasks_free; + int this_user_tasks; + +repeat: + if ((++last_pid) & 0xffff8000) + last_pid=1; + this_user_tasks = 0; + tasks_free = 0; + free_task = -EAGAIN; + i = NR_TASKS; + while (--i > 0) { + if (!task[i]) { + free_task = i; + tasks_free++; + continue; + } + if (task[i]->uid == current->uid) + this_user_tasks++; + if (task[i]->pid == last_pid || task[i]->pgrp == last_pid || + task[i]->session == last_pid) + goto repeat; + } + if (tasks_free <= MIN_TASKS_LEFT_FOR_ROOT || + this_user_tasks > MAX_TASKS_PER_USER) + if (current->uid) + return -EAGAIN; + return free_task; +} + +static struct file * copy_fd(struct file * old_file) +{ + struct file * new_file = get_empty_filp(); + int error; + + if (new_file) { + memcpy(new_file,old_file,sizeof(struct file)); + new_file->f_count = 1; + if (new_file->f_inode) + new_file->f_inode->i_count++; + if (new_file->f_op && new_file->f_op->open) { + error = new_file->f_op->open(new_file->f_inode,new_file); + if (error) { + iput(new_file->f_inode); + new_file->f_count = 0; + new_file = NULL; + } + } + } + return new_file; +} + +static int dup_mmap(struct task_struct * tsk) +{ + struct vm_area_struct * mpnt, **p, *tmp; + + tsk->mm->mmap = NULL; + p = &tsk->mm->mmap; + for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { + tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + *tmp = *mpnt; + tmp->vm_task = tsk; + tmp->vm_next = NULL; + if (tmp->vm_inode) + tmp->vm_inode->i_count++; + if (tmp->vm_ops && tmp->vm_ops->open) + tmp->vm_ops->open(tmp); + *p = tmp; + p = &tmp->vm_next; + } + return 0; +} + +/* + * SHAREFD not yet implemented.. + */ +static void copy_files(unsigned long clone_flags, struct task_struct * p) +{ + int i; + struct file * f; + + if (clone_flags & COPYFD) { + for (i=0; i<NR_OPEN;i++) + if ((f = p->files->fd[i]) != NULL) + p->files->fd[i] = copy_fd(f); + } else { + for (i=0; i<NR_OPEN;i++) + if ((f = p->files->fd[i]) != NULL) + f->f_count++; + } +} + +/* + * CLONEVM not yet correctly implemented: needs to clone the mmap + * instead of duplicating it.. + */ +static int copy_mm(unsigned long clone_flags, struct task_struct * p) +{ + if (clone_flags & COPYVM) { + p->mm->swappable = 1; + p->mm->min_flt = p->mm->maj_flt = 0; + p->mm->cmin_flt = p->mm->cmaj_flt = 0; + if (copy_page_tables(p)) + return 1; + return dup_mmap(p); + } else { + if (clone_page_tables(p)) + return 1; + return dup_mmap(p); /* wrong.. */ + } +} + +static void copy_fs(unsigned long clone_flags, struct task_struct * p) +{ + if (current->fs->pwd) + current->fs->pwd->i_count++; + if (current->fs->root) + current->fs->root->i_count++; +} + +#define IS_CLONE (regs.orig_eax == __NR_clone) + +/* + * Ok, this is the main fork-routine. It copies the system process + * information (task[nr]) and sets up the necessary registers. It + * also copies the data segment in its entirety. + */ +asmlinkage int sys_fork(struct pt_regs regs) +{ + struct pt_regs * childregs; + struct task_struct *p; + int i,nr; + unsigned long clone_flags = COPYVM | SIGCHLD; + + if(!(p = (struct task_struct*)__get_free_page(GFP_KERNEL))) + goto bad_fork; + nr = find_empty_process(); + if (nr < 0) + goto bad_fork_free; + task[nr] = p; + *p = *current; + + if (p->exec_domain && p->exec_domain->use_count) + (*p->exec_domain->use_count)++; + if (p->binfmt && p->binfmt->use_count) + (*p->binfmt->use_count)++; + + p->did_exec = 0; + p->kernel_stack_page = 0; + p->state = TASK_UNINTERRUPTIBLE; + p->flags &= ~(PF_PTRACED|PF_TRACESYS); + p->pid = last_pid; + p->p_pptr = p->p_opptr = current; + p->p_cptr = NULL; + SET_LINKS(p); + p->signal = 0; + p->it_real_value = p->it_virt_value = p->it_prof_value = 0; + p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; + p->leader = 0; /* process leadership doesn't inherit */ + p->utime = p->stime = 0; + p->cutime = p->cstime = 0; + p->start_time = jiffies; +/* + * set up new TSS and kernel stack + */ + if (!(p->kernel_stack_page = get_free_page(GFP_KERNEL))) + goto bad_fork_cleanup; + *(unsigned long *)p->kernel_stack_page = STACK_MAGIC; + p->tss.es = KERNEL_DS; + p->tss.cs = KERNEL_CS; + p->tss.ss = KERNEL_DS; + p->tss.ds = KERNEL_DS; + p->tss.fs = USER_DS; + p->tss.gs = KERNEL_DS; + p->tss.ss0 = KERNEL_DS; + p->tss.esp0 = p->kernel_stack_page + PAGE_SIZE; + p->tss.tr = _TSS(nr); + childregs = ((struct pt_regs *) (p->kernel_stack_page + PAGE_SIZE)) - 1; + p->tss.esp = (unsigned long) childregs; + p->tss.eip = (unsigned long) ret_from_sys_call; + *childregs = regs; + childregs->eax = 0; + p->tss.back_link = 0; + p->tss.eflags = regs.eflags & 0xffffcfff; /* iopl is always 0 for a new process */ + if (IS_CLONE) { + if (regs.ebx) + childregs->esp = regs.ebx; + clone_flags = regs.ecx; + if (childregs->esp == regs.esp) + clone_flags |= COPYVM; + } + p->exit_signal = clone_flags & CSIGNAL; + p->tss.ldt = _LDT(nr); + if (p->ldt) { + p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); + if (p->ldt != NULL) + memcpy(p->ldt, current->ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); + } + p->tss.bitmap = offsetof(struct tss_struct,io_bitmap); + for (i = 0; i < IO_BITMAP_SIZE+1 ; i++) /* IO bitmap is actually SIZE+1 */ + p->tss.io_bitmap[i] = ~0; + if (last_task_used_math == current) + __asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387)); + if (copy_mm(clone_flags, p)) + goto bad_fork_cleanup; + p->semundo = NULL; + copy_files(clone_flags, p); + copy_fs(clone_flags, p); + set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss)); + if (p->ldt) + set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,p->ldt, 512); + else + set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&default_ldt, 1); + + p->counter = current->counter >> 1; + p->state = TASK_RUNNING; /* do this last, just in case */ + return p->pid; +bad_fork_cleanup: + task[nr] = NULL; + REMOVE_LINKS(p); + free_page(p->kernel_stack_page); +bad_fork_free: + free_page((long) p); +bad_fork: + return -EAGAIN; +} diff --git a/kernel/info.c b/kernel/info.c new file mode 100644 index 000000000..c7b2b9a8c --- /dev/null +++ b/kernel/info.c @@ -0,0 +1,42 @@ +/* + * linux/kernel/info.c + * + * Copyright (C) 1992 Darren Senn + */ + +/* This implements the sysinfo() system call */ + +#include <asm/segment.h> + +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/unistd.h> +#include <linux/types.h> +#include <linux/mm.h> + +asmlinkage int sys_sysinfo(struct sysinfo *info) +{ + int error; + struct sysinfo val; + struct task_struct **p; + + error = verify_area(VERIFY_WRITE, info, sizeof(struct sysinfo)); + if (error) + return error; + memset((char *)&val, 0, sizeof(struct sysinfo)); + + val.uptime = jiffies / HZ; + + val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); + val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); + val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); + + for (p = &LAST_TASK; p > &FIRST_TASK; p--) + if (*p) val.procs++; + + si_meminfo(&val); + si_swapinfo(&val); + + memcpy_tofs(info, &val, sizeof(struct sysinfo)); + return 0; +} diff --git a/kernel/ioport.c b/kernel/ioport.c new file mode 100644 index 000000000..c61690e3c --- /dev/null +++ b/kernel/ioport.c @@ -0,0 +1,194 @@ +/* + * linux/kernel/ioport.c + * + * This contains the io-permission bitmap code - written by obz, with changes + * by Linus. + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/ioport.h> + +static unsigned long ioport_registrar[IO_BITMAP_SIZE] = {0, /* ... */}; + +#define _IODEBUG + +#ifdef IODEBUG +static char * ios(unsigned long l) +{ + static char str[33] = { '\0' }; + int i; + unsigned long mask; + + for (i = 0, mask = 0x80000000; i < 32; ++i, mask >>= 1) + str[i] = (l & mask) ? '1' : '0'; + return str; +} + +static void dump_io_bitmap(void) +{ + int i, j; + int numl = sizeof(current->tss.io_bitmap) >> 2; + + for (i = j = 0; j < numl; ++i) + { + printk("%4d [%3x]: ", 64*i, 64*i); + printk("%s ", ios(current->tss.io_bitmap[j++])); + if (j < numl) + printk("%s", ios(current->tss.io_bitmap[j++])); + printk("\n"); + } +} +#endif + +/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ +asmlinkage void set_bitmap(unsigned long *bitmap, + short base, short extent, int new_value) +{ + int mask; + unsigned long *bitmap_base = bitmap + (base >> 5); + unsigned short low_index = base & 0x1f; + int length = low_index + extent; + + if (low_index != 0) { + mask = (~0 << low_index); + if (length < 32) + mask &= ~(~0 << length); + if (new_value) + *bitmap_base++ |= mask; + else + *bitmap_base++ &= ~mask; + length -= 32; + } + + mask = (new_value ? ~0 : 0); + while (length >= 32) { + *bitmap_base++ = mask; + length -= 32; + } + + if (length > 0) { + mask = ~(~0 << length); + if (new_value) + *bitmap_base++ |= mask; + else + *bitmap_base++ &= ~mask; + } +} + +/* Check for set bits in BITMAP starting at BASE, going to EXTENT. */ +asmlinkage int check_bitmap(unsigned long *bitmap, short base, short extent) +{ + int mask; + unsigned long *bitmap_base = bitmap + (base >> 5); + unsigned short low_index = base & 0x1f; + int length = low_index + extent; + + if (low_index != 0) { + mask = (~0 << low_index); + if (length < 32) + mask &= ~(~0 << length); + if (*bitmap_base++ & mask) + return 1; + length -= 32; + } + while (length >= 32) { + if (*bitmap_base++ != 0) + return 1; + length -= 32; + } + + if (length > 0) { + mask = ~(~0 << length); + if (*bitmap_base++ & mask) + return 1; + } + return 0; +} + +/* + * this changes the io permissions bitmap in the current task. + */ +asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + if (from + num <= from) + return -EINVAL; + if (from + num > IO_BITMAP_SIZE*32) + return -EINVAL; + if (!suser()) + return -EPERM; + +#ifdef IODEBUG + printk("io: from=%d num=%d %s\n", from, num, (turn_on ? "on" : "off")); +#endif + set_bitmap((unsigned long *)current->tss.io_bitmap, from, num, !turn_on); + return 0; +} + +unsigned int *stack; + +/* + * sys_iopl has to be used when you want to access the IO ports + * beyond the 0x3ff range: to get the full 65536 ports bitmapped + * you'd need 8kB of bitmaps/process, which is a bit excessive. + * + * Here we just change the eflags value on the stack: we allow + * only the super-user to do it. This depends on the stack-layout + * on system-call entry - see also fork() and the signal handling + * code. + */ +asmlinkage int sys_iopl(long ebx,long ecx,long edx, + long esi, long edi, long ebp, long eax, long ds, + long es, long fs, long gs, long orig_eax, + long eip,long cs,long eflags,long esp,long ss) +{ + unsigned int level = ebx; + + if (level > 3) + return -EINVAL; + if (!suser()) + return -EPERM; + *(&eflags) = (eflags & 0xffffcfff) | (level << 12); + return 0; +} + + +void snarf_region(unsigned int from, unsigned int num) +{ + if (from > IO_BITMAP_SIZE*32) + return; + if (from + num > IO_BITMAP_SIZE*32) + num = IO_BITMAP_SIZE*32 - from; + set_bitmap(ioport_registrar, from, num, 1); + return; +} + +void release_region(unsigned int from, unsigned int num) +{ + if (from > IO_BITMAP_SIZE*32) + return; + if (from + num > IO_BITMAP_SIZE*32) + num = IO_BITMAP_SIZE*32 - from; + set_bitmap(ioport_registrar, from, num, 0); + return; +} + +int check_region(unsigned int from, unsigned int num) +{ + if (from > IO_BITMAP_SIZE*32) + return 0; + if (from + num > IO_BITMAP_SIZE*32) + num = IO_BITMAP_SIZE*32 - from; + return check_bitmap(ioport_registrar, from, num); +} + +/* Called from init/main.c to reserve IO ports. */ +void reserve_setup(char *str, int *ints) +{ + int i; + + for (i = 1; i < ints[0]; i += 2) + snarf_region(ints[i], ints[i+1]); +} diff --git a/kernel/irq.c b/kernel/irq.c new file mode 100644 index 000000000..2de16db53 --- /dev/null +++ b/kernel/irq.c @@ -0,0 +1,354 @@ +/* + * linux/kernel/irq.c + * + * Copyright (C) 1992 Linus Torvalds + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +/* + * IRQ's are in fact implemented a bit like signal handlers for the kernel. + * The same sigaction struct is used, and with similar semantics (ie there + * is a SA_INTERRUPT flag etc). Naturally it's not a 1:1 relation, but there + * are similarities. + * + * sa_handler(int irq_NR) is the default function called (0 if no). + * sa_mask is horribly ugly (I won't even mention it) + * sa_flags contains various info: SA_INTERRUPT etc + * sa_restorer is the unused + */ + +#include <linux/ptrace.h> +#include <linux/errno.h> +#include <linux/kernel_stat.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/interrupt.h> + +#include <asm/system.h> +#include <asm/io.h> +#include <asm/irq.h> + +#define CR0_NE 32 + +static unsigned char cache_21 = 0xff; +static unsigned char cache_A1 = 0xff; + +unsigned long intr_count = 0; +unsigned long bh_active = 0; +unsigned long bh_mask = 0xFFFFFFFF; +struct bh_struct bh_base[32]; + +void disable_irq(unsigned int irq_nr) +{ + unsigned long flags; + unsigned char mask; + + mask = 1 << (irq_nr & 7); + save_flags(flags); + if (irq_nr < 8) { + cli(); + cache_21 |= mask; + outb(cache_21,0x21); + restore_flags(flags); + return; + } + cli(); + cache_A1 |= mask; + outb(cache_A1,0xA1); + restore_flags(flags); +} + +void enable_irq(unsigned int irq_nr) +{ + unsigned long flags; + unsigned char mask; + + mask = ~(1 << (irq_nr & 7)); + save_flags(flags); + if (irq_nr < 8) { + cli(); + cache_21 &= mask; + outb(cache_21,0x21); + restore_flags(flags); + return; + } + cli(); + cache_A1 &= mask; + outb(cache_A1,0xA1); + restore_flags(flags); +} + +/* + * do_bottom_half() runs at normal kernel priority: all interrupts + * enabled. do_bottom_half() is atomic with respect to itself: a + * bottom_half handler need not be re-entrant. + */ +asmlinkage void do_bottom_half(void) +{ + unsigned long active; + unsigned long mask, left; + struct bh_struct *bh; + + bh = bh_base; + active = bh_active & bh_mask; + for (mask = 1, left = ~0 ; left & active ; bh++,mask += mask,left += left) { + if (mask & active) { + void (*fn)(void *); + bh_active &= ~mask; + fn = bh->routine; + if (!fn) + goto bad_bh; + fn(bh->data); + } + } + return; +bad_bh: + printk ("irq.c:bad bottom half entry\n"); +} + +/* + * This builds up the IRQ handler stubs using some ugly macros in irq.h + * + * These macros create the low-level assembly IRQ routines that do all + * the operations that are needed to keep the AT interrupt-controller + * happy. They are also written to be fast - and to disable interrupts + * as little as humanly possible. + * + * NOTE! These macros expand to three different handlers for each line: one + * complete handler that does all the fancy stuff (including signal handling), + * and one fast handler that is meant for simple IRQ's that want to be + * atomic. The specific handler is chosen depending on the SA_INTERRUPT + * flag when installing a handler. Finally, one "bad interrupt" handler, that + * is used when no handler is present. + */ +BUILD_IRQ(FIRST,0,0x01) +BUILD_IRQ(FIRST,1,0x02) +BUILD_IRQ(FIRST,2,0x04) +BUILD_IRQ(FIRST,3,0x08) +BUILD_IRQ(FIRST,4,0x10) +BUILD_IRQ(FIRST,5,0x20) +BUILD_IRQ(FIRST,6,0x40) +BUILD_IRQ(FIRST,7,0x80) +BUILD_IRQ(SECOND,8,0x01) +BUILD_IRQ(SECOND,9,0x02) +BUILD_IRQ(SECOND,10,0x04) +BUILD_IRQ(SECOND,11,0x08) +BUILD_IRQ(SECOND,12,0x10) +BUILD_IRQ(SECOND,13,0x20) +BUILD_IRQ(SECOND,14,0x40) +BUILD_IRQ(SECOND,15,0x80) + +/* + * Pointers to the low-level handlers: first the general ones, then the + * fast ones, then the bad ones. + */ +static void (*interrupt[16])(void) = { + IRQ0_interrupt, IRQ1_interrupt, IRQ2_interrupt, IRQ3_interrupt, + IRQ4_interrupt, IRQ5_interrupt, IRQ6_interrupt, IRQ7_interrupt, + IRQ8_interrupt, IRQ9_interrupt, IRQ10_interrupt, IRQ11_interrupt, + IRQ12_interrupt, IRQ13_interrupt, IRQ14_interrupt, IRQ15_interrupt +}; + +static void (*fast_interrupt[16])(void) = { + fast_IRQ0_interrupt, fast_IRQ1_interrupt, + fast_IRQ2_interrupt, fast_IRQ3_interrupt, + fast_IRQ4_interrupt, fast_IRQ5_interrupt, + fast_IRQ6_interrupt, fast_IRQ7_interrupt, + fast_IRQ8_interrupt, fast_IRQ9_interrupt, + fast_IRQ10_interrupt, fast_IRQ11_interrupt, + fast_IRQ12_interrupt, fast_IRQ13_interrupt, + fast_IRQ14_interrupt, fast_IRQ15_interrupt +}; + +static void (*bad_interrupt[16])(void) = { + bad_IRQ0_interrupt, bad_IRQ1_interrupt, + bad_IRQ2_interrupt, bad_IRQ3_interrupt, + bad_IRQ4_interrupt, bad_IRQ5_interrupt, + bad_IRQ6_interrupt, bad_IRQ7_interrupt, + bad_IRQ8_interrupt, bad_IRQ9_interrupt, + bad_IRQ10_interrupt, bad_IRQ11_interrupt, + bad_IRQ12_interrupt, bad_IRQ13_interrupt, + bad_IRQ14_interrupt, bad_IRQ15_interrupt +}; + +/* + * Initial irq handlers. + */ +static struct sigaction irq_sigaction[16] = { + { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }, + { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }, + { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }, + { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }, + { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }, + { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }, + { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL }, + { NULL, 0, 0, NULL }, { NULL, 0, 0, NULL } +}; + +int get_irq_list(char *buf) +{ + int i, len = 0; + struct sigaction * sa = irq_sigaction; + + for (i = 0 ; i < 16 ; i++, sa++) { + if (!sa->sa_handler) + continue; + len += sprintf(buf+len, "%2d: %8d %c %s\n", + i, kstat.interrupts[i], + (sa->sa_flags & SA_INTERRUPT) ? '+' : ' ', + (char *) sa->sa_mask); + } + return len; +} + +/* + * do_IRQ handles IRQ's that have been installed without the + * SA_INTERRUPT flag: it uses the full signal-handling return + * and runs with other interrupts enabled. All relatively slow + * IRQ's should use this format: notably the keyboard/timer + * routines. + */ +asmlinkage void do_IRQ(int irq, struct pt_regs * regs) +{ + struct sigaction * sa = irq + irq_sigaction; + + kstat.interrupts[irq]++; + sa->sa_handler((int) regs); +} + +/* + * do_fast_IRQ handles IRQ's that don't need the fancy interrupt return + * stuff - the handler is also running with interrupts disabled unless + * it explicitly enables them later. + */ +asmlinkage void do_fast_IRQ(int irq) +{ + struct sigaction * sa = irq + irq_sigaction; + + kstat.interrupts[irq]++; + sa->sa_handler(irq); +} + +/* + * Using "struct sigaction" is slightly silly, but there + * are historical reasons and it works well, so.. + */ +static int irqaction(unsigned int irq, struct sigaction * new_sa) +{ + struct sigaction * sa; + unsigned long flags; + + if (irq > 15) + return -EINVAL; + sa = irq + irq_sigaction; + if (sa->sa_handler) + return -EBUSY; + if (!new_sa->sa_handler) + return -EINVAL; + save_flags(flags); + cli(); + *sa = *new_sa; + if (sa->sa_flags & SA_INTERRUPT) + set_intr_gate(0x20+irq,fast_interrupt[irq]); + else + set_intr_gate(0x20+irq,interrupt[irq]); + if (irq < 8) { + cache_21 &= ~(1<<irq); + outb(cache_21,0x21); + } else { + cache_21 &= ~(1<<2); + cache_A1 &= ~(1<<(irq-8)); + outb(cache_21,0x21); + outb(cache_A1,0xA1); + } + restore_flags(flags); + return 0; +} + +int request_irq(unsigned int irq, void (*handler)(int), + unsigned long flags, const char * devname) +{ + struct sigaction sa; + + sa.sa_handler = handler; + sa.sa_flags = flags; + sa.sa_mask = (unsigned long) devname; + sa.sa_restorer = NULL; + return irqaction(irq,&sa); +} + +void free_irq(unsigned int irq) +{ + struct sigaction * sa = irq + irq_sigaction; + unsigned long flags; + + if (irq > 15) { + printk("Trying to free IRQ%d\n",irq); + return; + } + if (!sa->sa_handler) { + printk("Trying to free free IRQ%d\n",irq); + return; + } + save_flags(flags); + cli(); + if (irq < 8) { + cache_21 |= 1 << irq; + outb(cache_21,0x21); + } else { + cache_A1 |= 1 << (irq-8); + outb(cache_A1,0xA1); + } + set_intr_gate(0x20+irq,bad_interrupt[irq]); + sa->sa_handler = NULL; + sa->sa_flags = 0; + sa->sa_mask = 0; + sa->sa_restorer = NULL; + restore_flags(flags); +} + +/* + * Note that on a 486, we don't want to do a SIGFPE on a irq13 + * as the irq is unreliable, and exception 16 works correctly + * (ie as explained in the intel literature). On a 386, you + * can't use exception 16 due to bad IBM design, so we have to + * rely on the less exact irq13. + * + * Careful.. Not only is IRQ13 unreliable, but it is also + * leads to races. IBM designers who came up with it should + * be shot. + */ +static void math_error_irq(int cpl) +{ + outb(0,0xF0); + if (ignore_irq13 || !hard_math) + return; + math_error(); +} + +static void no_action(int cpl) { } + +void init_IRQ(void) +{ + int i; + + for (i = 0; i < 16 ; i++) + set_intr_gate(0x20+i,bad_interrupt[i]); + if (request_irq(2, no_action, SA_INTERRUPT, "cascade")) + printk("Unable to get IRQ2 for cascade\n"); + if (request_irq(13,math_error_irq, 0, "math error")) + printk("Unable to get IRQ13 for math-error handler\n"); + + /* initialize the bottom half routines. */ + for (i = 0; i < 32; i++) { + bh_base[i].routine = NULL; + bh_base[i].data = NULL; + } + bh_active = 0; + intr_count = 0; +} diff --git a/kernel/itimer.c b/kernel/itimer.c new file mode 100644 index 000000000..4d5fa0f67 --- /dev/null +++ b/kernel/itimer.c @@ -0,0 +1,135 @@ +/* + * linux/kernel/itimer.c + * + * Copyright (C) 1992 Darren Senn + */ + +/* These are all the functions necessary to implement itimers */ + +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/time.h> + +#include <asm/segment.h> + +static unsigned long tvtojiffies(struct timeval *value) +{ + return((unsigned long )value->tv_sec * HZ + + (unsigned long )(value->tv_usec + (1000000 / HZ - 1)) / + (1000000 / HZ)); +} + +static void jiffiestotv(unsigned long jiffies, struct timeval *value) +{ + value->tv_usec = (jiffies % HZ) * (1000000 / HZ); + value->tv_sec = jiffies / HZ; + return; +} + +int _getitimer(int which, struct itimerval *value) +{ + register unsigned long val, interval; + + switch (which) { + case ITIMER_REAL: + val = current->it_real_value; + interval = current->it_real_incr; + break; + case ITIMER_VIRTUAL: + val = current->it_virt_value; + interval = current->it_virt_incr; + break; + case ITIMER_PROF: + val = current->it_prof_value; + interval = current->it_prof_incr; + break; + default: + return(-EINVAL); + } + jiffiestotv(val, &value->it_value); + jiffiestotv(interval, &value->it_interval); + return(0); +} + +asmlinkage int sys_getitimer(int which, struct itimerval *value) +{ + int error; + struct itimerval get_buffer; + + if (!value) + return -EFAULT; + error = _getitimer(which, &get_buffer); + if (error) + return error; + error = verify_area(VERIFY_WRITE, value, sizeof(struct itimerval)); + if (error) + return error; + memcpy_tofs(value, &get_buffer, sizeof(get_buffer)); + return 0; +} + +int _setitimer(int which, struct itimerval *value, struct itimerval *ovalue) +{ + register unsigned long i, j; + int k; + + i = tvtojiffies(&value->it_interval); + j = tvtojiffies(&value->it_value); + if (ovalue && (k = _getitimer(which, ovalue)) < 0) + return k; + switch (which) { + case ITIMER_REAL: + if (j) { + j += 1+itimer_ticks; + if (j < itimer_next) + itimer_next = j; + } + current->it_real_value = j; + current->it_real_incr = i; + break; + case ITIMER_VIRTUAL: + if (j) + j++; + current->it_virt_value = j; + current->it_virt_incr = i; + break; + case ITIMER_PROF: + if (j) + j++; + current->it_prof_value = j; + current->it_prof_incr = i; + break; + default: + return -EINVAL; + } + return 0; +} + +asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) +{ + int error; + struct itimerval set_buffer, get_buffer; + + if (value) { + error = verify_area(VERIFY_READ, value, sizeof(*value)); + if (error) + return error; + memcpy_fromfs(&set_buffer, value, sizeof(set_buffer)); + } else + memset((char *) &set_buffer, 0, sizeof(set_buffer)); + + if (ovalue) { + error = verify_area(VERIFY_WRITE, ovalue, sizeof(struct itimerval)); + if (error) + return error; + } + + error = _setitimer(which, &set_buffer, ovalue ? &get_buffer : 0); + if (error || !ovalue) + return error; + + memcpy_tofs(ovalue, &get_buffer, sizeof(get_buffer)); + return error; +} diff --git a/kernel/ksyms.c b/kernel/ksyms.c new file mode 100644 index 000000000..62bca052c --- /dev/null +++ b/kernel/ksyms.c @@ -0,0 +1,263 @@ +/* + * Herein lies all the functions/variables that are "exported" for linkage + * with dynamically loaded kernel modules. + * Jon. + * + * Stacked module support and unified symbol table added by + * Bjorn Ekwall <bj0rn@blox.se> + */ + +#include <linux/autoconf.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/blkdev.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/malloc.h> +#include <linux/ptrace.h> +#include <linux/sys.h> +#include <linux/utsname.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/timer.h> +#include <linux/binfmts.h> +#include <linux/personality.h> +#include <linux/module.h> +#include <linux/termios.h> +#include <linux/tqueue.h> +#include <linux/tty.h> +#include <linux/serial.h> +#include <linux/locks.h> +#include <linux/string.h> +#include <linux/delay.h> +#ifdef CONFIG_INET +#include <linux/net.h> +#include <linux/netdevice.h> +#endif + +#include <asm/irq.h> +extern char floppy_track_buffer[]; +extern void set_device_ro(int dev,int flag); +#include <linux/delay.h> +#include <linux/locks.h> + +extern void *sys_call_table; + +/* must match struct internal_symbol !!! */ +#define X(name) { (void *) &name, "_" #name } + +#ifdef CONFIG_FTAPE +extern char * ftape_big_buffer; +extern void (*do_floppy)(void); +#endif + +extern int sys_tz; +extern int request_dma(unsigned int dmanr, char * deviceID); +extern void free_dma(unsigned int dmanr); + +extern int do_execve(char * filename, char ** argv, char ** envp, + struct pt_regs * regs); +extern int do_signal(unsigned long oldmask, struct pt_regs * regs); + +extern void (* iABI_hook)(struct pt_regs * regs); + +struct symbol_table symbol_table = { 0, 0, 0, /* for stacked module support */ + { + /* stackable module support */ + X(rename_module_symbol), + + /* system info variables */ + X(EISA_bus), + X(wp_works_ok), + + /* process memory management */ + X(verify_area), + X(do_mmap), + X(do_munmap), + X(zeromap_page_range), + X(unmap_page_range), + X(insert_vm_struct), + X(merge_segments), + + /* internal kernel memory management */ + X(__get_free_pages), + X(free_pages), + X(kmalloc), + X(kfree_s), + X(vmalloc), + X(vfree), + + /* filesystem internal functions */ + X(getname), + X(putname), + X(__iget), + X(iput), + X(namei), + X(lnamei), + X(open_namei), + X(check_disk_change), + X(invalidate_buffers), + X(fsync_dev), + X(permission), + X(inode_setattr), + X(inode_change_ok), + X(generic_mmap), + X(set_blocksize), + X(getblk), + X(bread), + X(brelse), + X(ll_rw_block), + X(__wait_on_buffer), + + /* device registration */ + X(register_chrdev), + X(unregister_chrdev), + X(register_blkdev), + X(unregister_blkdev), + X(tty_register_driver), + X(tty_unregister_driver), + X(tty_std_termios), + + /* block device driver support */ + X(block_read), + X(block_write), + X(block_fsync), + X(wait_for_request), + X(blksize_size), + X(blk_size), + X(blk_dev), + X(is_read_only), + X(set_device_ro), + X(bmap), + X(sync_dev), + + /* Module creation of serial units */ + X(register_serial), + X(unregister_serial), + + /* filesystem registration */ + X(register_filesystem), + X(unregister_filesystem), + + /* executable format registration */ + X(register_binfmt), + X(unregister_binfmt), + + /* execution environment registration */ + X(lookup_exec_domain), + X(register_exec_domain), + X(unregister_exec_domain), + + /* interrupt handling */ + X(request_irq), + X(free_irq), + X(enable_irq), + X(disable_irq), + X(bh_active), + X(bh_mask), + X(add_timer), + X(del_timer), + X(tq_timer), + X(tq_immediate), + X(tq_last), + X(timer_active), + X(timer_table), + + /* dma handling */ + X(request_dma), + X(free_dma), + + /* process management */ + X(wake_up), + X(wake_up_interruptible), + X(sleep_on), + X(interruptible_sleep_on), + X(schedule), + X(current), + X(jiffies), + X(xtime), + X(loops_per_sec), + X(need_resched), + X(kill_proc), + X(kill_pg), + X(kill_sl), + + /* misc */ + X(panic), + X(printk), + X(sprintf), + X(vsprintf), + X(simple_strtoul), + X(system_utsname), + X(sys_call_table), + + /* Signal interfaces */ + X(do_signal), + X(send_sig), + + /* Program loader interfaces */ + X(change_ldt), + X(copy_strings), + X(create_tables), + X(do_execve), + X(flush_old_exec), + X(open_inode), + X(read_exec), + + /* Miscellaneous access points */ + X(si_meminfo), + + /* socket layer registration */ + X(sock_register), + X(sock_unregister), + +#ifdef CONFIG_FTAPE + /* The next labels are needed for ftape driver. */ + X(ftape_big_buffer), + X(do_floppy), +#endif +#ifdef CONFIG_INET + /* support for loadable net drivers */ + X(register_netdev), + X(unregister_netdev), + X(ether_setup), + X(alloc_skb), + X(kfree_skb), + X(dev_kfree_skb), + X(snarf_region), + X(netif_rx), + X(dev_rint), + X(dev_tint), + X(irq2dev_map), + X(dev_add_pack), + X(dev_remove_pack), + X(dev_get), + X(dev_ioctl), + X(dev_queue_xmit), + X(dev_base), +#endif + /* Added to make file system as module */ + X(set_writetime), + X(sys_tz), + X(__wait_on_super), + X(file_fsync), + X(clear_inode), + X(refile_buffer), + X(___strtok), + X(init_fifo), + X(super_blocks), + X(chrdev_inode_operations), + X(blkdev_inode_operations), + X(read_ahead), + /******************************************************** + * Do not add anything below this line, + * as the stacked modules depend on this! + */ + { NULL, NULL } /* mark end of table */ + }, + { { NULL, NULL } /* no module refs */ } +}; + +/* +int symbol_table_size = sizeof (symbol_table) / sizeof (symbol_table[0]); +*/ diff --git a/kernel/ldt.c b/kernel/ldt.c new file mode 100644 index 000000000..dd0e477d4 --- /dev/null +++ b/kernel/ldt.c @@ -0,0 +1,103 @@ +/* + * linux/kernel/ldt.c + * + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/ldt.h> + +static int read_ldt(void * ptr, unsigned long bytecount) +{ + int error; + void * address = current->ldt; + unsigned long size; + + if (!ptr) + return -EINVAL; + size = LDT_ENTRIES*LDT_ENTRY_SIZE; + if (!address) { + address = &default_ldt; + size = sizeof(default_ldt); + } + if (size > bytecount) + size = bytecount; + error = verify_area(VERIFY_WRITE, ptr, size); + if (error) + return error; + memcpy_tofs(ptr, address, size); + return size; +} + +static int write_ldt(void * ptr, unsigned long bytecount) +{ + struct modify_ldt_ldt_s ldt_info; + unsigned long *lp; + unsigned long base, limit; + int error, i; + + if (bytecount != sizeof(ldt_info)) + return -EINVAL; + error = verify_area(VERIFY_READ, ptr, sizeof(ldt_info)); + if (error) + return error; + + memcpy_fromfs(&ldt_info, ptr, sizeof(ldt_info)); + + if (ldt_info.contents == 3 || ldt_info.entry_number >= LDT_ENTRIES) + return -EINVAL; + + limit = ldt_info.limit; + base = ldt_info.base_addr; + if (ldt_info.limit_in_pages) + limit *= PAGE_SIZE; + + limit += base; + if (limit < base || limit >= 0xC0000000) + return -EINVAL; + + if (!current->ldt) { + for (i=1 ; i<NR_TASKS ; i++) { + if (task[i] == current) { + if (!(current->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE))) + return -ENOMEM; + set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, current->ldt, LDT_ENTRIES); + load_ldt(i); + } + } + } + + lp = (unsigned long *) ¤t->ldt[ldt_info.entry_number]; + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + *lp = 0; + *(lp+1) = 0; + return 0; + } + *lp = ((ldt_info.base_addr & 0x0000ffff) << 16) | + (ldt_info.limit & 0x0ffff); + *(lp+1) = (ldt_info.base_addr & 0xff000000) | + ((ldt_info.base_addr & 0x00ff0000)>>16) | + (ldt_info.limit & 0xf0000) | + (ldt_info.contents << 10) | + ((ldt_info.read_exec_only ^ 1) << 9) | + (ldt_info.seg_32bit << 22) | + (ldt_info.limit_in_pages << 23) | + ((ldt_info.seg_not_present ^1) << 15) | + 0x7000; + return 0; +} + +asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) +{ + if (func == 0) + return read_ldt(ptr, bytecount); + if (func == 1) + return write_ldt(ptr, bytecount); + return -ENOSYS; +} diff --git a/kernel/module.c b/kernel/module.c new file mode 100644 index 000000000..eb3ca2417 --- /dev/null +++ b/kernel/module.c @@ -0,0 +1,584 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <asm/segment.h> +#include <linux/mm.h> /* defines GFP_KERNEL */ +#include <linux/string.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/malloc.h> +/* + * Originally by Anonymous (as far as I know...) + * Linux version by Bas Laarhoven <bas@vimec.nl> + * 0.99.14 version by Jon Tombs <jon@gtex02.us.es>, + * + * Heavily modified by Bjorn Ekwall <bj0rn@blox.se> May 1994 (C) + * This source is covered by the GNU GPL, the same as all kernel sources. + * + * Features: + * - Supports stacked modules (removable only of there are no dependents). + * - Supports table of symbols defined by the modules. + * - Supports /proc/ksyms, showing value, name and owner of all + * the symbols defined by all modules (in stack order). + * - Added module dependencies information into /proc/modules + * - Supports redefines of all symbols, for streams-like behaviour. + * - Compatible with older versions of insmod. + * + */ + +#ifdef DEBUG_MODULE +#define PRINTK(a) printk a +#else +#define PRINTK(a) /* */ +#endif + +static struct module kernel_module; +static struct module *module_list = &kernel_module; + +static int freeing_modules; /* true if some modules are marked for deletion */ + +static struct module *find_module( const char *name); +static int get_mod_name( char *user_name, char *buf); +static int free_modules( void); + + +/* + * Called at boot time + */ +void init_modules(void) { + extern struct symbol_table symbol_table; /* in kernel/ksyms.c */ + struct internal_symbol *sym; + int i; + + for (i = 0, sym = symbol_table.symbol; sym->name; ++sym, ++i) + ; + symbol_table.n_symbols = i; + + kernel_module.symtab = &symbol_table; + kernel_module.state = MOD_RUNNING; /* Hah! */ + kernel_module.name = ""; +} + +int +rename_module_symbol(char *old_name, char *new_name) +{ + struct internal_symbol *sym; + int i = 0; /* keep gcc silent */ + + if (module_list->symtab) { + sym = module_list->symtab->symbol; + for (i = module_list->symtab->n_symbols; i > 0; ++sym, --i) { + if (strcmp(sym->name, old_name) == 0) { /* found it! */ + sym->name = new_name; /* done! */ + PRINTK(("renamed %s to %s\n", old_name, new_name)); + return 1; /* it worked! */ + } + } + } + printk("rename %s to %s failed!\n", old_name, new_name); + return 0; /* not there... */ + + /* + * This one will change the name of the first matching symbol! + * + * With this function, you can replace the name of a symbol defined + * in the current module with a new name, e.g. when you want to insert + * your own function instead of a previously defined function + * with the same name. + * + * "Normal" usage: + * + * bogus_function(int params) + * { + * do something "smart"; + * return real_function(params); + * } + * + * ... + * + * init_module() + * { + * if (rename_module_symbol("_bogus_function", "_real_function")) + * printk("yep!\n"); + * else + * printk("no way!\n"); + * ... + * } + * + * When loading this module, real_function will be resolved + * to the real function address. + * All later loaded modules that refer to "real_function()" will + * then really call "bogus_function()" instead!!! + * + * This feature will give you ample opportunities to get to know + * the taste of your foot when you stuff it into your mouth!!! + */ +} + +/* + * Allocate space for a module. + */ +asmlinkage int +sys_create_module(char *module_name, unsigned long size) +{ + struct module *mp; + void* addr; + int error; + int npages; + int sspace = sizeof(struct module) + MOD_MAX_NAME; + char name[MOD_MAX_NAME]; + + if (!suser()) + return -EPERM; + if (module_name == NULL || size == 0) + return -EINVAL; + if ((error = get_mod_name(module_name, name)) != 0) + return error; + if (find_module(name) != NULL) { + return -EEXIST; + } + + if ((mp = (struct module*) kmalloc(sspace, GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + strcpy((char *)(mp + 1), name); /* why not? */ + + npages = (size + sizeof (int) + 4095) / 4096; + if ((addr = vmalloc(npages * 4096)) == 0) { + kfree_s(mp, sspace); + return -ENOMEM; + } + + mp->next = module_list; + mp->ref = NULL; + mp->symtab = NULL; + mp->name = (char *)(mp + 1); + mp->size = npages; + mp->addr = addr; + mp->state = MOD_UNINITIALIZED; + mp->cleanup = NULL; + + * (int *) addr = 0; /* set use count to zero */ + module_list = mp; /* link it in */ + + PRINTK(("module `%s' (%lu pages @ 0x%08lx) created\n", + mp->name, (unsigned long) mp->size, (unsigned long) mp->addr)); + return (int) addr; +} + +/* + * Initialize a module. + */ +asmlinkage int +sys_init_module(char *module_name, char *code, unsigned codesize, + struct mod_routines *routines, + struct symbol_table *symtab) +{ + struct module *mp; + struct symbol_table *newtab; + char name[MOD_MAX_NAME]; + int error; + struct mod_routines rt; + + if (!suser()) + return -EPERM; + + /* A little bit of protection... we "know" where the user stack is... */ + if (symtab && ((unsigned long)symtab > 0xb0000000)) { + printk("warning: you are using an old insmod, no symbols will be inserted!\n"); + symtab = NULL; + } + + /* + * First reclaim any memory from dead modules that where not + * freed when deleted. Should I think be done by timers when + * the module was deleted - Jon. + */ + free_modules(); + + if ((error = get_mod_name(module_name, name)) != 0) + return error; + PRINTK(("initializing module `%s', %d (0x%x) bytes\n", + name, codesize, codesize)); + memcpy_fromfs(&rt, routines, sizeof rt); + if ((mp = find_module(name)) == NULL) + return -ENOENT; + if ((codesize + sizeof (int) + 4095) / 4096 > mp->size) + return -EINVAL; + memcpy_fromfs((char *)mp->addr + sizeof (int), code, codesize); + memset((char *)mp->addr + sizeof (int) + codesize, 0, + mp->size * 4096 - (codesize + sizeof (int))); + PRINTK(( "module init entry = 0x%08lx, cleanup entry = 0x%08lx\n", + (unsigned long) rt.init, (unsigned long) rt.cleanup)); + mp->cleanup = rt.cleanup; + + /* update kernel symbol table */ + if (symtab) { /* symtab == NULL means no new entries to handle */ + struct internal_symbol *sym; + struct module_ref *ref; + int size; + int i; + int legal_start; + + if ((error = verify_area(VERIFY_READ, symtab, sizeof(int)))) + return error; + memcpy_fromfs((char *)(&(size)), symtab, sizeof(int)); + + if ((newtab = (struct symbol_table*) kmalloc(size, GFP_KERNEL)) == NULL) { + return -ENOMEM; + } + + if ((error = verify_area(VERIFY_READ, symtab, size))) { + kfree_s(newtab, size); + return error; + } + memcpy_fromfs((char *)(newtab), symtab, size); + + /* sanity check */ + legal_start = sizeof(struct symbol_table) + + newtab->n_symbols * sizeof(struct internal_symbol) + + newtab->n_refs * sizeof(struct module_ref); + + if ((newtab->n_symbols < 0) || (newtab->n_refs < 0) || + (legal_start > size)) { + printk("Illegal symbol table! Rejected!\n"); + kfree_s(newtab, size); + return -EINVAL; + } + + /* relocate name pointers, index referred from start of table */ + for (sym = &(newtab->symbol[0]), i = 0; + i < newtab->n_symbols; ++sym, ++i) { + if ((int)sym->name < legal_start || size <= (int)sym->name) { + printk("Illegal symbol table! Rejected!\n"); + kfree_s(newtab, size); + return -EINVAL; + } + /* else */ + sym->name += (long)newtab; + } + mp->symtab = newtab; + + /* Update module references. + * On entry, from "insmod", ref->module points to + * the referenced module! + * Now it will point to the current module instead! + * The ref structure becomes the first link in the linked + * list of references to the referenced module. + * Also, "sym" from above, points to the first ref entry!!! + */ + for (ref = (struct module_ref *)sym, i = 0; + i < newtab->n_refs; ++ref, ++i) { + + /* Check for valid reference */ + struct module *link = module_list; + while (link && (ref->module != link)) + link = link->next; + + if (link == (struct module *)0) { + printk("Non-module reference! Rejected!\n"); + return -EINVAL; + } + + ref->next = ref->module->ref; + ref->module->ref = ref; + ref->module = mp; + } + } + + if ((*rt.init)() != 0) + return -EBUSY; + mp->state = MOD_RUNNING; + + return 0; +} + +asmlinkage int +sys_delete_module(char *module_name) +{ + struct module *mp; + char name[MOD_MAX_NAME]; + int error; + + if (!suser()) + return -EPERM; + /* else */ + if (module_name != NULL) { + if ((error = get_mod_name(module_name, name)) != 0) + return error; + if ((mp = find_module(name)) == NULL) + return -ENOENT; + if ((mp->ref != NULL) || (GET_USE_COUNT(mp) != 0)) + return -EBUSY; + if (mp->state == MOD_RUNNING) + (*mp->cleanup)(); + mp->state = MOD_DELETED; + } + free_modules(); + return 0; +} + + +/* + * Copy the kernel symbol table to user space. If the argument is null, + * just return the size of the table. + * + * Note that the transient module symbols are copied _first_, + * in lifo order!!! + * + * The symbols to "insmod" are according to the "old" format: struct kernel_sym, + * which is actually quite handy for this purpose. + * Note that insmod inserts a struct symbol_table later on... + * (as that format is quite handy for the kernel...) + * + * For every module, the first (pseudo)symbol copied is the module name + * and the address of the module struct. + * This lets "insmod" keep track of references, and build the array of + * struct module_refs in the symbol table. + * The format of the module name is "#module", so that "insmod" can easily + * notice when a module name comes along. Also, this will make it possible + * to use old versions of "insmod", albeit with reduced functionality... + * The "kernel" module has an empty name. + */ +asmlinkage int +sys_get_kernel_syms(struct kernel_sym *table) +{ + struct internal_symbol *from; + struct kernel_sym isym; + struct kernel_sym *to; + struct module *mp = module_list; + int i; + int nmodsyms = 0; + + for (mp = module_list; mp; mp = mp->next) { + if (mp->symtab && mp->symtab->n_symbols) { + /* include the count for the module name! */ + nmodsyms += mp->symtab->n_symbols + 1; + } + } + + if (table != NULL) { + to = table; + + if ((i = verify_area(VERIFY_WRITE, to, nmodsyms * sizeof(*table)))) + return i; + + /* copy all module symbols first (always LIFO order) */ + for (mp = module_list; mp; mp = mp->next) { + if ((mp->state == MOD_RUNNING) && + (mp->symtab != NULL) && (mp->symtab->n_symbols > 0)) { + /* magic: write module info as a pseudo symbol */ + isym.value = (unsigned long)mp; + sprintf(isym.name, "#%s", mp->name); + memcpy_tofs(to, &isym, sizeof isym); + ++to; + + for (i = mp->symtab->n_symbols, + from = mp->symtab->symbol; + i > 0; --i, ++from, ++to) { + + isym.value = (unsigned long)from->addr; + strncpy(isym.name, from->name, sizeof isym.name); + memcpy_tofs(to, &isym, sizeof isym); + } + } + } + } + + return nmodsyms; +} + + +/* + * Copy the name of a module from user space. + */ +int +get_mod_name(char *user_name, char *buf) +{ + int i; + + i = 0; + for (i = 0 ; (buf[i] = get_fs_byte(user_name + i)) != '\0' ; ) { + if (++i >= MOD_MAX_NAME) + return -E2BIG; + } + return 0; +} + + +/* + * Look for a module by name, ignoring modules marked for deletion. + */ +struct module * +find_module( const char *name) +{ + struct module *mp; + + for (mp = module_list ; mp ; mp = mp->next) { + if (mp->state == MOD_DELETED) + continue; + if (!strcmp(mp->name, name)) + break; + } + return mp; +} + +static void +drop_refs(struct module *mp) +{ + struct module *step; + struct module_ref *prev; + struct module_ref *ref; + + for (step = module_list; step; step = step->next) { + for (prev = ref = step->ref; ref; ref = prev->next) { + if (ref->module == mp) { + if (ref == step->ref) + step->ref = ref->next; + else + prev->next = ref->next; + break; /* every module only references once! */ + } + else + prev = ref; + } + } +} + +/* + * Try to free modules which have been marked for deletion. Returns nonzero + * if a module was actually freed. + */ +int +free_modules( void) +{ + struct module *mp; + struct module **mpp; + int did_deletion; + + did_deletion = 0; + freeing_modules = 0; + mpp = &module_list; + while ((mp = *mpp) != NULL) { + if (mp->state != MOD_DELETED) { + mpp = &mp->next; + } else { + if (GET_USE_COUNT(mp) != 0) { + freeing_modules = 1; + mpp = &mp->next; + } else { /* delete it */ + *mpp = mp->next; + if (mp->symtab) { + if (mp->symtab->n_refs) + drop_refs(mp); + if (mp->symtab->size) + kfree_s(mp->symtab, mp->symtab->size); + } + vfree(mp->addr); + kfree_s(mp, sizeof(struct module) + MOD_MAX_NAME); + did_deletion = 1; + } + } + } + return did_deletion; +} + + +/* + * Called by the /proc file system to return a current list of modules. + */ +int get_module_list(char *buf) +{ + char *p; + char *q; + int i; + struct module *mp; + struct module_ref *ref; + char size[32]; + + p = buf; + /* Do not show the kernel pseudo module */ + for (mp = module_list ; mp && mp->next; mp = mp->next) { + if (p - buf > 4096 - 100) + break; /* avoid overflowing buffer */ + q = mp->name; + i = 20; + while (*q) { + *p++ = *q++; + i--; + } + sprintf(size, "%d", mp->size); + i -= strlen(size); + if (i <= 0) + i = 1; + while (--i >= 0) + *p++ = ' '; + q = size; + while (*q) + *p++ = *q++; + if (mp->state == MOD_UNINITIALIZED) + q = " (uninitialized)"; + else if (mp->state == MOD_RUNNING) + q = ""; + else if (mp->state == MOD_DELETED) + q = " (deleted)"; + else + q = " (bad state)"; + while (*q) + *p++ = *q++; + + if ((ref = mp->ref) != NULL) { + *p++ = '\t'; + *p++ = '['; + for (; ref; ref = ref->next) { + q = ref->module->name; + while (*q) + *p++ = *q++; + if (ref->next) + *p++ = ' '; + } + *p++ = ']'; + } + *p++ = '\n'; + } + return p - buf; +} + + +/* + * Called by the /proc file system to return a current list of ksyms. + */ +int get_ksyms_list(char *buf) +{ + struct module *mp; + struct internal_symbol *sym; + int i; + char *p = buf; + + for (mp = module_list; mp; mp = mp->next) { + if ((mp->state == MOD_RUNNING) && + (mp->symtab != NULL) && (mp->symtab->n_symbols > 0)) { + for (i = mp->symtab->n_symbols, + sym = mp->symtab->symbol; + i > 0; --i, ++sym) { + + if (p - buf > 4096 - 100) { + strcat(p, "...\n"); + p += strlen(p); + return p - buf; /* avoid overflowing buffer */ + } + + if (mp->name[0]) { + sprintf(p, "%08lx %s\t[%s]\n", + (long)sym->addr, sym->name, mp->name); + } + else { + sprintf(p, "%08lx %s\n", + (long)sym->addr, sym->name); + } + p += strlen(p); + } + } + } + + return p - buf; +} diff --git a/kernel/panic.c b/kernel/panic.c new file mode 100644 index 000000000..300fcbbf3 --- /dev/null +++ b/kernel/panic.c @@ -0,0 +1,32 @@ +/* + * linux/kernel/panic.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * This function is used through-out the kernel (including mm and fs) + * to indicate a major problem. + */ +#include <stdarg.h> + +#include <linux/kernel.h> +#include <linux/sched.h> + +asmlinkage void sys_sync(void); /* it's really int */ + +NORET_TYPE void panic(const char * fmt, ...) +{ + static char buf[1024]; + va_list args; + + va_start(args, fmt); + vsprintf(buf, fmt, args); + va_end(args); + printk(KERN_EMERG "Kernel panic: %s\n",buf); + if (current == task[0]) + printk(KERN_EMERG "In swapper task - not syncing\n"); + else + sys_sync(); + for(;;); +} diff --git a/kernel/printk.c b/kernel/printk.c new file mode 100644 index 000000000..d92269b30 --- /dev/null +++ b/kernel/printk.c @@ -0,0 +1,229 @@ +/* + * linux/kernel/printk.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Modified to make sys_syslog() more flexible: added commands to + * return the last 4k of kernel messages, regardless of whether + * they've been read or not. Added option to suppress kernel printk's + * to the console. Added hook for sending the console messages + * elsewhere, in preparation for a serial line console (someday). + * Ted Ts'o, 2/11/93. + */ + +#include <stdarg.h> + +#include <asm/segment.h> +#include <asm/system.h> + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> + +#define LOG_BUF_LEN 4096 + +static char buf[1024]; + +extern void console_print(const char *); + +#define DEFAULT_MESSAGE_LOGLEVEL 7 /* KERN_DEBUG */ +#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything more serious than KERN_DEBUG */ + +unsigned long log_size = 0; +struct wait_queue * log_wait = NULL; +int console_loglevel = DEFAULT_CONSOLE_LOGLEVEL; + +static void (*console_print_proc)(const char *) = 0; +static char log_buf[LOG_BUF_LEN]; +static unsigned long log_start = 0; +static unsigned long logged_chars = 0; + +/* + * Commands to sys_syslog: + * + * 0 -- Close the log. Currently a NOP. + * 1 -- Open the log. Currently a NOP. + * 2 -- Read from the log. + * 3 -- Read up to the last 4k of messages in the ring buffer. + * 4 -- Read and clear last 4k of messages in the ring buffer + * 5 -- Clear ring buffer. + * 6 -- Disable printk's to console + * 7 -- Enable printk's to console + * 8 -- Set level of messages printed to console + */ +asmlinkage int sys_syslog(int type, char * buf, int len) +{ + unsigned long i, j, count; + int do_clear = 0; + char c; + int error; + + if ((type != 3) && !suser()) + return -EPERM; + switch (type) { + case 0: /* Close log */ + return 0; + case 1: /* Open log */ + return 0; + case 2: /* Read from log */ + if (!buf || len < 0) + return -EINVAL; + if (!len) + return 0; + error = verify_area(VERIFY_WRITE,buf,len); + if (error) + return error; + cli(); + while (!log_size) { + if (current->signal & ~current->blocked) { + sti(); + return -ERESTARTSYS; + } + interruptible_sleep_on(&log_wait); + } + i = 0; + while (log_size && i < len) { + c = *((char *) log_buf+log_start); + log_start++; + log_size--; + log_start &= LOG_BUF_LEN-1; + sti(); + put_fs_byte(c,buf); + buf++; + i++; + cli(); + } + sti(); + return i; + case 4: /* Read/clear last kernel messages */ + do_clear = 1; + /* FALL THRU */ + case 3: /* Read last kernel messages */ + if (!buf || len < 0) + return -EINVAL; + if (!len) + return 0; + error = verify_area(VERIFY_WRITE,buf,len); + if (error) + return error; + count = len; + if (count > LOG_BUF_LEN) + count = LOG_BUF_LEN; + if (count > logged_chars) + count = logged_chars; + j = log_start + log_size - count; + for (i = 0; i < count; i++) { + c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1))); + put_fs_byte(c, buf++); + } + if (do_clear) + logged_chars = 0; + return i; + case 5: /* Clear ring buffer */ + logged_chars = 0; + return 0; + case 6: /* Disable logging to console */ + console_loglevel = 1; /* only panic messages shown */ + return 0; + case 7: /* Enable logging to console */ + console_loglevel = DEFAULT_CONSOLE_LOGLEVEL; + return 0; + case 8: + if (len < 0 || len > 8) + return -EINVAL; + console_loglevel = len; + return 0; + } + return -EINVAL; +} + + +asmlinkage int printk(const char *fmt, ...) +{ + va_list args; + int i; + char *msg, *p, *buf_end; + static char msg_level = -1; + long flags; + + save_flags(flags); + cli(); + va_start(args, fmt); + i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */ + buf_end = buf + 3 + i; + va_end(args); + for (p = buf + 3; p < buf_end; p++) { + msg = p; + if (msg_level < 0) { + if ( + p[0] != '<' || + p[1] < '0' || + p[1] > '7' || + p[2] != '>' + ) { + p -= 3; + p[0] = '<'; + p[1] = DEFAULT_MESSAGE_LOGLEVEL - 1 + '0'; + p[2] = '>'; + } else + msg += 3; + msg_level = p[1] - '0'; + } + for (; p < buf_end; p++) { + log_buf[(log_start+log_size) & (LOG_BUF_LEN-1)] = *p; + if (log_size < LOG_BUF_LEN) + log_size++; + else + log_start++; + logged_chars++; + if (*p == '\n') + break; + } + if (msg_level < console_loglevel && console_print_proc) { + char tmp = p[1]; + p[1] = '\0'; + (*console_print_proc)(msg); + p[1] = tmp; + } + if (*p == '\n') + msg_level = -1; + } + restore_flags(flags); + wake_up_interruptible(&log_wait); + return i; +} + +/* + * The console driver calls this routine during kernel initialization + * to register the console printing procedure with printk() and to + * print any messages that were printed by the kernel before the + * console driver was initialized. + */ +void register_console(void (*proc)(const char *)) +{ + int i,j; + int p = log_start; + char buf[16]; + char msg_level = -1; + char *q; + + console_print_proc = proc; + + for (i=0,j=0; i < log_size; i++) { + buf[j++] = log_buf[p]; + p++; p &= LOG_BUF_LEN-1; + if (buf[j-1] != '\n' && i < log_size - 1 && j < sizeof(buf)-1) + continue; + buf[j] = 0; + q = buf; + if (msg_level < 0) { + msg_level = buf[1] - '0'; + q = buf + 3; + } + if (msg_level < console_loglevel) + (*proc)(q); + if (buf[j-1] == '\n') + msg_level = -1; + j = 0; + } +} diff --git a/kernel/ptrace.c b/kernel/ptrace.c new file mode 100644 index 000000000..cade04750 --- /dev/null +++ b/kernel/ptrace.c @@ -0,0 +1,517 @@ +/* ptrace.c */ +/* By Ross Biro 1/23/92 */ +/* edited by Linus Torvalds */ + +#include <linux/head.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/user.h> + +#include <asm/segment.h> +#include <asm/system.h> +#include <linux/debugreg.h> + +/* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* determines which flags the user has access to. */ +/* 1 = access 0 = no access */ +#define FLAG_MASK 0x00044dd5 + +/* set's the trap flag. */ +#define TRAP_FLAG 0x100 + +/* + * this is the number to subtract from the top of the stack. To find + * the local frame. + */ +#define MAGICNUMBER 68 + +/* change a pid into a task struct. */ +static inline struct task_struct * get_task(int pid) +{ + int i; + + for (i = 1; i < NR_TASKS; i++) { + if (task[i] != NULL && (task[i]->pid == pid)) + return task[i]; + } + return NULL; +} + +/* + * this routine will get a word off of the processes privileged stack. + * the offset is how far from the base addr as stored in the TSS. + * this routine assumes that all the privileged stacks are in our + * data space. + */ +static inline int get_stack_long(struct task_struct *task, int offset) +{ + unsigned char *stack; + + stack = (unsigned char *)task->tss.esp0; + stack += offset; + return (*((int *)stack)); +} + +/* + * this routine will put a word on the processes privileged stack. + * the offset is how far from the base addr as stored in the TSS. + * this routine assumes that all the privileged stacks are in our + * data space. + */ +static inline int put_stack_long(struct task_struct *task, int offset, + unsigned long data) +{ + unsigned char * stack; + + stack = (unsigned char *) task->tss.esp0; + stack += offset; + *(unsigned long *) stack = data; + return 0; +} + +/* + * This routine gets a long from any process space by following the page + * tables. NOTE! You should check that the long isn't on a page boundary, + * and that it is in the task area before calling this: this routine does + * no checking. + */ +static unsigned long get_long(struct vm_area_struct * vma, unsigned long addr) +{ + unsigned long page; + +repeat: + page = *PAGE_DIR_OFFSET(vma->vm_task->tss.cr3, addr); + if (page & PAGE_PRESENT) { + page &= PAGE_MASK; + page += PAGE_PTR(addr); + page = *((unsigned long *) page); + } + if (!(page & PAGE_PRESENT)) { + do_no_page(vma, addr, 0); + goto repeat; + } +/* this is a hack for non-kernel-mapped video buffers and similar */ + if (page >= high_memory) + return 0; + page &= PAGE_MASK; + page += addr & ~PAGE_MASK; + return *(unsigned long *) page; +} + +/* + * This routine puts a long into any process space by following the page + * tables. NOTE! You should check that the long isn't on a page boundary, + * and that it is in the task area before calling this: this routine does + * no checking. + * + * Now keeps R/W state of page so that a text page stays readonly + * even if a debugger scribbles breakpoints into it. -M.U- + */ +static void put_long(struct vm_area_struct * vma, unsigned long addr, + unsigned long data) +{ + unsigned long page, pte = 0; + int readonly = 0; + +repeat: + page = *PAGE_DIR_OFFSET(vma->vm_task->tss.cr3, addr); + if (page & PAGE_PRESENT) { + page &= PAGE_MASK; + page += PAGE_PTR(addr); + pte = page; + page = *((unsigned long *) page); + } + if (!(page & PAGE_PRESENT)) { + do_no_page(vma, addr, 0 /* PAGE_RW */); + goto repeat; + } + if (!(page & PAGE_RW)) { + if (!(page & PAGE_COW)) + readonly = 1; + do_wp_page(vma, addr, PAGE_RW | PAGE_PRESENT); + goto repeat; + } +/* this is a hack for non-kernel-mapped video buffers and similar */ + if (page >= high_memory) + return; +/* we're bypassing pagetables, so we have to set the dirty bit ourselves */ + *(unsigned long *) pte |= (PAGE_DIRTY|PAGE_COW); + page &= PAGE_MASK; + page += addr & ~PAGE_MASK; + *(unsigned long *) page = data; + if (readonly) { + *(unsigned long *) pte &=~ (PAGE_RW|PAGE_COW); + invalidate(); + } +} + +static struct vm_area_struct * find_vma(struct task_struct * tsk, unsigned long addr) +{ + struct vm_area_struct * vma; + + addr &= PAGE_MASK; + for (vma = tsk->mm->mmap ; ; vma = vma->vm_next) { + if (!vma) + return NULL; + if (vma->vm_end > addr) + break; + } + if (vma->vm_start <= addr) + return vma; + if (!(vma->vm_flags & VM_GROWSDOWN)) + return NULL; + if (vma->vm_end - addr > tsk->rlim[RLIMIT_STACK].rlim_cur) + return NULL; + vma->vm_offset -= vma->vm_start - addr; + vma->vm_start = addr; + return vma; +} + +/* + * This routine checks the page boundaries, and that the offset is + * within the task area. It then calls get_long() to read a long. + */ +static int read_long(struct task_struct * tsk, unsigned long addr, + unsigned long * result) +{ + struct vm_area_struct * vma = find_vma(tsk, addr); + + if (!vma) + return -EIO; + if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) { + unsigned long low,high; + struct vm_area_struct * vma_high = vma; + + if (addr + sizeof(long) >= vma->vm_end) { + vma_high = vma->vm_next; + if (!vma_high || vma_high->vm_start != vma->vm_end) + return -EIO; + } + low = get_long(vma, addr & ~(sizeof(long)-1)); + high = get_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1)); + switch (addr & (sizeof(long)-1)) { + case 1: + low >>= 8; + low |= high << 24; + break; + case 2: + low >>= 16; + low |= high << 16; + break; + case 3: + low >>= 24; + low |= high << 8; + break; + } + *result = low; + } else + *result = get_long(vma, addr); + return 0; +} + +/* + * This routine checks the page boundaries, and that the offset is + * within the task area. It then calls put_long() to write a long. + */ +static int write_long(struct task_struct * tsk, unsigned long addr, + unsigned long data) +{ + struct vm_area_struct * vma = find_vma(tsk, addr); + + if (!vma) + return -EIO; + if ((addr & ~PAGE_MASK) > PAGE_SIZE-sizeof(long)) { + unsigned long low,high; + struct vm_area_struct * vma_high = vma; + + if (addr + sizeof(long) >= vma->vm_end) { + vma_high = vma->vm_next; + if (!vma_high || vma_high->vm_start != vma->vm_end) + return -EIO; + } + low = get_long(vma, addr & ~(sizeof(long)-1)); + high = get_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1)); + switch (addr & (sizeof(long)-1)) { + case 0: /* shouldn't happen, but safety first */ + low = data; + break; + case 1: + low &= 0x000000ff; + low |= data << 8; + high &= ~0xff; + high |= data >> 24; + break; + case 2: + low &= 0x0000ffff; + low |= data << 16; + high &= ~0xffff; + high |= data >> 16; + break; + case 3: + low &= 0x00ffffff; + low |= data << 24; + high &= ~0xffffff; + high |= data >> 8; + break; + } + put_long(vma, addr & ~(sizeof(long)-1),low); + put_long(vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high); + } else + put_long(vma, addr, data); + return 0; +} + +asmlinkage int sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + struct user * dummy; + int i; + + dummy = NULL; + + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->flags & PF_PTRACED) + return -EPERM; + /* set the ptrace bit in the process flags. */ + current->flags |= PF_PTRACED; + return 0; + } + if (pid == 1) /* you may not mess with init */ + return -EPERM; + if (!(child = get_task(pid))) + return -ESRCH; + if (request == PTRACE_ATTACH) { + if (child == current) + return -EPERM; + if ((!child->dumpable || + (current->uid != child->euid) || + (current->uid != child->uid) || + (current->gid != child->egid) || + (current->gid != child->gid)) && !suser()) + return -EPERM; + /* the same process cannot be attached many times */ + if (child->flags & PF_PTRACED) + return -EPERM; + child->flags |= PF_PTRACED; + if (child->p_pptr != current) { + REMOVE_LINKS(child); + child->p_pptr = current; + SET_LINKS(child); + } + send_sig(SIGSTOP, child, 1); + return 0; + } + if (!(child->flags & PF_PTRACED)) + return -ESRCH; + if (child->state != TASK_STOPPED) { + if (request != PTRACE_KILL) + return -ESRCH; + } + if (child->p_pptr != current) + return -ESRCH; + + switch (request) { + /* when I and D space are separate, these will need to be fixed. */ + case PTRACE_PEEKTEXT: /* read word at location addr. */ + case PTRACE_PEEKDATA: { + unsigned long tmp; + int res; + + res = read_long(child, addr, &tmp); + if (res < 0) + return res; + res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long)); + if (!res) + put_fs_long(tmp,(unsigned long *) data); + return res; + } + + /* read the word at location addr in the USER area. */ + case PTRACE_PEEKUSR: { + unsigned long tmp; + int res; + + if ((addr & 3) || addr < 0 || + addr > sizeof(struct user) - 3) + return -EIO; + + res = verify_area(VERIFY_WRITE, (void *) data, sizeof(long)); + if (res) + return res; + tmp = 0; /* Default return condition */ + if(addr < 17*sizeof(long)) { + addr = addr >> 2; /* temporary hack. */ + + tmp = get_stack_long(child, sizeof(long)*addr - MAGICNUMBER); + if (addr == DS || addr == ES || + addr == FS || addr == GS || + addr == CS || addr == SS) + tmp &= 0xffff; + }; + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[7]){ + addr -= (long) &dummy->u_debugreg[0]; + addr = addr >> 2; + tmp = child->debugreg[addr]; + }; + put_fs_long(tmp,(unsigned long *) data); + return 0; + } + + /* when I and D space are separate, this will have to be fixed. */ + case PTRACE_POKETEXT: /* write the word at location addr. */ + case PTRACE_POKEDATA: + return write_long(child,addr,data); + + case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ + if ((addr & 3) || addr < 0 || + addr > sizeof(struct user) - 3) + return -EIO; + + addr = addr >> 2; /* temporary hack. */ + + if (addr == ORIG_EAX) + return -EIO; + if (addr == DS || addr == ES || + addr == FS || addr == GS || + addr == CS || addr == SS) { + data &= 0xffff; + if (data && (data & 3) != 3) + return -EIO; + } + if (addr == EFL) { /* flags. */ + data &= FLAG_MASK; + data |= get_stack_long(child, EFL*sizeof(long)-MAGICNUMBER) & ~FLAG_MASK; + } + /* Do not allow the user to set the debug register for kernel + address space */ + if(addr < 17){ + if (put_stack_long(child, sizeof(long)*addr-MAGICNUMBER, data)) + return -EIO; + return 0; + }; + + /* We need to be very careful here. We implicitly + want to modify a portion of the task_struct, and we + have to be selective about what portions we allow someone + to modify. */ + + addr = addr << 2; /* Convert back again */ + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[7]){ + + if(addr == (long) &dummy->u_debugreg[4]) return -EIO; + if(addr == (long) &dummy->u_debugreg[5]) return -EIO; + if(addr < (long) &dummy->u_debugreg[4] && + ((unsigned long) data) >= 0xbffffffd) return -EIO; + + if(addr == (long) &dummy->u_debugreg[7]) { + data &= ~DR_CONTROL_RESERVED; + for(i=0; i<4; i++) + if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) + return -EIO; + }; + + addr -= (long) &dummy->u_debugreg; + addr = addr >> 2; + child->debugreg[addr] = data; + return 0; + }; + return -EIO; + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: { /* restart after signal. */ + long tmp; + + if ((unsigned long) data > NSIG) + return -EIO; + if (request == PTRACE_SYSCALL) + child->flags |= PF_TRACESYS; + else + child->flags &= ~PF_TRACESYS; + child->exit_code = data; + child->state = TASK_RUNNING; + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG; + put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp); + return 0; + } + +/* + * make the child exit. Best I can do is send it a sigkill. + * perhaps it should be put in the status that it wants to + * exit. + */ + case PTRACE_KILL: { + long tmp; + + child->state = TASK_RUNNING; + child->exit_code = SIGKILL; + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG; + put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp); + return 0; + } + + case PTRACE_SINGLESTEP: { /* set the trap flag. */ + long tmp; + + if ((unsigned long) data > NSIG) + return -EIO; + child->flags &= ~PF_TRACESYS; + tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) | TRAP_FLAG; + put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp); + child->state = TASK_RUNNING; + child->exit_code = data; + /* give it a chance to run. */ + return 0; + } + + case PTRACE_DETACH: { /* detach a process that was attached. */ + long tmp; + + if ((unsigned long) data > NSIG) + return -EIO; + child->flags &= ~(PF_PTRACED|PF_TRACESYS); + child->state = TASK_RUNNING; + child->exit_code = data; + REMOVE_LINKS(child); + child->p_pptr = child->p_opptr; + SET_LINKS(child); + /* make sure the single step bit is not set. */ + tmp = get_stack_long(child, sizeof(long)*EFL-MAGICNUMBER) & ~TRAP_FLAG; + put_stack_long(child, sizeof(long)*EFL-MAGICNUMBER,tmp); + return 0; + } + + default: + return -EIO; + } +} + +asmlinkage void syscall_trace(void) +{ + if ((current->flags & (PF_PTRACED|PF_TRACESYS)) + != (PF_PTRACED|PF_TRACESYS)) + return; + current->exit_code = SIGTRAP; + current->state = TASK_STOPPED; + notify_parent(current); + schedule(); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) + current->signal |= (1 << (current->exit_code - 1)); + current->exit_code = 0; +} diff --git a/kernel/sched.c b/kernel/sched.c new file mode 100644 index 000000000..6eed6e8f5 --- /dev/null +++ b/kernel/sched.c @@ -0,0 +1,861 @@ +/* + * linux/kernel/sched.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * 'sched.c' is the main kernel file. It contains scheduling primitives + * (sleep_on, wakeup, schedule etc) as well as a number of simple system + * call functions (type getpid(), which just extracts a field from + * current-task + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/fdreg.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/ptrace.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/tqueue.h> +#include <linux/resource.h> + +#include <asm/system.h> +#include <asm/io.h> +#include <asm/segment.h> + +#define TIMER_IRQ 0 + +#include <linux/timex.h> + +/* + * kernel variables + */ +long tick = 1000000 / HZ; /* timer interrupt period */ +volatile struct timeval xtime; /* The current time */ +int tickadj = 500/HZ; /* microsecs */ + +DECLARE_TASK_QUEUE(tq_timer); +DECLARE_TASK_QUEUE(tq_immediate); + +/* + * phase-lock loop variables + */ +int time_status = TIME_BAD; /* clock synchronization status */ +long time_offset = 0; /* time adjustment (us) */ +long time_constant = 0; /* pll time constant */ +long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */ +long time_precision = 1; /* clock precision (us) */ +long time_maxerror = 0x70000000;/* maximum error */ +long time_esterror = 0x70000000;/* estimated error */ +long time_phase = 0; /* phase offset (scaled us) */ +long time_freq = 0; /* frequency offset (scaled ppm) */ +long time_adj = 0; /* tick adjust (scaled 1 / HZ) */ +long time_reftime = 0; /* time at last adjustment (s) */ + +long time_adjust = 0; +long time_adjust_step = 0; + +int need_resched = 0; +unsigned long event = 0; + +/* + * Tell us the machine setup.. + */ +int hard_math = 0; /* set by boot/head.S */ +int x86 = 0; /* set by boot/head.S to 3 or 4 */ +int ignore_irq13 = 0; /* set if exception 16 works */ +int wp_works_ok = 0; /* set if paging hardware honours WP */ +int hlt_works_ok = 1; /* set if the "hlt" instruction works */ + +/* + * Bus types .. + */ +int EISA_bus = 0; + +extern int _setitimer(int, struct itimerval *, struct itimerval *); +unsigned long * prof_buffer = NULL; +unsigned long prof_len = 0; + +#define _S(nr) (1<<((nr)-1)) + +extern void mem_use(void); + +extern int timer_interrupt(void); +asmlinkage int system_call(void); + +static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, }; +static struct vm_area_struct init_mmap = INIT_MMAP; +struct task_struct init_task = INIT_TASK; + +unsigned long volatile jiffies=0; + +struct task_struct *current = &init_task; +struct task_struct *last_task_used_math = NULL; + +struct task_struct * task[NR_TASKS] = {&init_task, }; + +long user_stack [ PAGE_SIZE>>2 ] = { STACK_MAGIC, }; + +struct { + long * a; + short b; + } stack_start = { & user_stack [PAGE_SIZE>>2] , KERNEL_DS }; + +struct kernel_stat kstat = { 0 }; + +/* + * 'math_state_restore()' saves the current math information in the + * old math state array, and gets the new ones from the current task + * + * Careful.. There are problems with IBM-designed IRQ13 behaviour. + * Don't touch unless you *really* know how it works. + */ +asmlinkage void math_state_restore(void) +{ + __asm__ __volatile__("clts"); + if (last_task_used_math == current) + return; + timer_table[COPRO_TIMER].expires = jiffies+50; + timer_active |= 1<<COPRO_TIMER; + if (last_task_used_math) + __asm__("fnsave %0":"=m" (last_task_used_math->tss.i387)); + else + __asm__("fnclex"); + last_task_used_math = current; + if (current->used_math) { + __asm__("frstor %0": :"m" (current->tss.i387)); + } else { + __asm__("fninit"); + current->used_math=1; + } + timer_active &= ~(1<<COPRO_TIMER); +} + +#ifndef CONFIG_MATH_EMULATION + +asmlinkage void math_emulate(long arg) +{ + printk("math-emulation not enabled and no coprocessor found.\n"); + printk("killing %s.\n",current->comm); + send_sig(SIGFPE,current,1); + schedule(); +} + +#endif /* CONFIG_MATH_EMULATION */ + +unsigned long itimer_ticks = 0; +unsigned long itimer_next = ~0; + +/* + * 'schedule()' is the scheduler function. It's a very simple and nice + * scheduler: it's not perfect, but certainly works for most things. + * The one thing you might take a look at is the signal-handler code here. + * + * NOTE!! Task 0 is the 'idle' task, which gets called when no other + * tasks can run. It can not be killed, and it cannot sleep. The 'state' + * information in task[0] is never used. + * + * The "confuse_gcc" goto is used only to get better assembly code.. + * Dijkstra probably hates me. + */ +asmlinkage void schedule(void) +{ + int c; + struct task_struct * p; + struct task_struct * next; + unsigned long ticks; + +/* check alarm, wake up any interruptible tasks that have got a signal */ + + if (intr_count) { + printk("Aiee: scheduling in interrupt\n"); + intr_count = 0; + } + cli(); + ticks = itimer_ticks; + itimer_ticks = 0; + itimer_next = ~0; + sti(); + need_resched = 0; + p = &init_task; + for (;;) { + if ((p = p->next_task) == &init_task) + goto confuse_gcc1; + if (ticks && p->it_real_value) { + if (p->it_real_value <= ticks) { + send_sig(SIGALRM, p, 1); + if (!p->it_real_incr) { + p->it_real_value = 0; + goto end_itimer; + } + do { + p->it_real_value += p->it_real_incr; + } while (p->it_real_value <= ticks); + } + p->it_real_value -= ticks; + if (p->it_real_value < itimer_next) + itimer_next = p->it_real_value; + } +end_itimer: + if (p->state != TASK_INTERRUPTIBLE) + continue; + if (p->signal & ~p->blocked) { + p->state = TASK_RUNNING; + continue; + } + if (p->timeout && p->timeout <= jiffies) { + p->timeout = 0; + p->state = TASK_RUNNING; + } + } +confuse_gcc1: + +/* this is the scheduler proper: */ +#if 0 + /* give processes that go to sleep a bit higher priority.. */ + /* This depends on the values for TASK_XXX */ + /* This gives smoother scheduling for some things, but */ + /* can be very unfair under some circumstances, so.. */ + if (TASK_UNINTERRUPTIBLE >= (unsigned) current->state && + current->counter < current->priority*2) { + ++current->counter; + } +#endif + c = -1000; + next = p = &init_task; + for (;;) { + if ((p = p->next_task) == &init_task) + goto confuse_gcc2; + if (p->state == TASK_RUNNING && p->counter > c) + c = p->counter, next = p; + } +confuse_gcc2: + if (!c) { + for_each_task(p) + p->counter = (p->counter >> 1) + p->priority; + } + if (current == next) + return; + kstat.context_swtch++; + switch_to(next); + /* Now maybe reload the debug registers */ + if(current->debugreg[7]){ + loaddebug(0); + loaddebug(1); + loaddebug(2); + loaddebug(3); + loaddebug(6); + }; +} + +asmlinkage int sys_pause(void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + return -ERESTARTNOHAND; +} + +/* + * wake_up doesn't wake up stopped processes - they have to be awakened + * with signals or similar. + * + * Note that this doesn't need cli-sti pairs: interrupts may not change + * the wait-queue structures directly, but only call wake_up() to wake + * a process. The process itself must remove the queue once it has woken. + */ +void wake_up(struct wait_queue **q) +{ + struct wait_queue *tmp; + struct task_struct * p; + + if (!q || !(tmp = *q)) + return; + do { + if ((p = tmp->task) != NULL) { + if ((p->state == TASK_UNINTERRUPTIBLE) || + (p->state == TASK_INTERRUPTIBLE)) { + p->state = TASK_RUNNING; + if (p->counter > current->counter + 3) + need_resched = 1; + } + } + if (!tmp->next) { + printk("wait_queue is bad (eip = %p)\n", + __builtin_return_address(0)); + printk(" q = %p\n",q); + printk(" *q = %p\n",*q); + printk(" tmp = %p\n",tmp); + break; + } + tmp = tmp->next; + } while (tmp != *q); +} + +void wake_up_interruptible(struct wait_queue **q) +{ + struct wait_queue *tmp; + struct task_struct * p; + + if (!q || !(tmp = *q)) + return; + do { + if ((p = tmp->task) != NULL) { + if (p->state == TASK_INTERRUPTIBLE) { + p->state = TASK_RUNNING; + if (p->counter > current->counter + 3) + need_resched = 1; + } + } + if (!tmp->next) { + printk("wait_queue is bad (eip = %p)\n", + __builtin_return_address(0)); + printk(" q = %p\n",q); + printk(" *q = %p\n",*q); + printk(" tmp = %p\n",tmp); + break; + } + tmp = tmp->next; + } while (tmp != *q); +} + +void __down(struct semaphore * sem) +{ + struct wait_queue wait = { current, NULL }; + add_wait_queue(&sem->wait, &wait); + current->state = TASK_UNINTERRUPTIBLE; + while (sem->count <= 0) { + schedule(); + current->state = TASK_UNINTERRUPTIBLE; + } + current->state = TASK_RUNNING; + remove_wait_queue(&sem->wait, &wait); +} + +static inline void __sleep_on(struct wait_queue **p, int state) +{ + unsigned long flags; + struct wait_queue wait = { current, NULL }; + + if (!p) + return; + if (current == task[0]) + panic("task[0] trying to sleep"); + current->state = state; + add_wait_queue(p, &wait); + save_flags(flags); + sti(); + schedule(); + remove_wait_queue(p, &wait); + restore_flags(flags); +} + +void interruptible_sleep_on(struct wait_queue **p) +{ + __sleep_on(p,TASK_INTERRUPTIBLE); +} + +void sleep_on(struct wait_queue **p) +{ + __sleep_on(p,TASK_UNINTERRUPTIBLE); +} + +/* + * The head for the timer-list has a "expires" field of MAX_UINT, + * and the sorting routine counts on this.. + */ +static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL }; +#define SLOW_BUT_DEBUGGING_TIMERS 1 + +void add_timer(struct timer_list * timer) +{ + unsigned long flags; + struct timer_list *p; + +#if SLOW_BUT_DEBUGGING_TIMERS + if (timer->next || timer->prev) { + printk("add_timer() called with non-zero list from %p\n", + __builtin_return_address(0)); + return; + } +#endif + p = &timer_head; + timer->expires += jiffies; + save_flags(flags); + cli(); + do { + p = p->next; + } while (timer->expires > p->expires); + timer->next = p; + timer->prev = p->prev; + p->prev = timer; + timer->prev->next = timer; + restore_flags(flags); +} + +int del_timer(struct timer_list * timer) +{ + unsigned long flags; +#if SLOW_BUT_DEBUGGING_TIMERS + struct timer_list * p; + + p = &timer_head; + save_flags(flags); + cli(); + while ((p = p->next) != &timer_head) { + if (p == timer) { + timer->next->prev = timer->prev; + timer->prev->next = timer->next; + timer->next = timer->prev = NULL; + restore_flags(flags); + timer->expires -= jiffies; + return 1; + } + } + if (timer->next || timer->prev) + printk("del_timer() called from %p with timer not initialized\n", + __builtin_return_address(0)); + restore_flags(flags); + return 0; +#else + save_flags(flags); + cli(); + if (timer->next) { + timer->next->prev = timer->prev; + timer->prev->next = timer->next; + timer->next = timer->prev = NULL; + restore_flags(flags); + timer->expires -= jiffies; + return 1; + } + restore_flags(flags); + return 0; +#endif +} + +unsigned long timer_active = 0; +struct timer_struct timer_table[32]; + +/* + * Hmm.. Changed this, as the GNU make sources (load.c) seems to + * imply that avenrun[] is the standard name for this kind of thing. + * Nothing else seems to be standardized: the fractional size etc + * all seem to differ on different machines. + */ +unsigned long avenrun[3] = { 0,0,0 }; + +/* + * Nr of active tasks - counted in fixed-point numbers + */ +static unsigned long count_active_tasks(void) +{ + struct task_struct **p; + unsigned long nr = 0; + + for(p = &LAST_TASK; p > &FIRST_TASK; --p) + if (*p && ((*p)->state == TASK_RUNNING || + (*p)->state == TASK_UNINTERRUPTIBLE || + (*p)->state == TASK_SWAPPING)) + nr += FIXED_1; + return nr; +} + +static inline void calc_load(void) +{ + unsigned long active_tasks; /* fixed-point */ + static int count = LOAD_FREQ; + + if (count-- > 0) + return; + count = LOAD_FREQ; + active_tasks = count_active_tasks(); + CALC_LOAD(avenrun[0], EXP_1, active_tasks); + CALC_LOAD(avenrun[1], EXP_5, active_tasks); + CALC_LOAD(avenrun[2], EXP_15, active_tasks); +} + +/* + * this routine handles the overflow of the microsecond field + * + * The tricky bits of code to handle the accurate clock support + * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. + * They were originally developed for SUN and DEC kernels. + * All the kudos should go to Dave for this stuff. + * + * These were ported to Linux by Philip Gladstone. + */ +static void second_overflow(void) +{ + long ltemp; + /* last time the cmos clock got updated */ + static long last_rtc_update=0; + extern int set_rtc_mmss(unsigned long); + + /* Bump the maxerror field */ + time_maxerror = (0x70000000-time_maxerror < time_tolerance) ? + 0x70000000 : (time_maxerror + time_tolerance); + + /* Run the PLL */ + if (time_offset < 0) { + ltemp = (-(time_offset+1) >> (SHIFT_KG + time_constant)) + 1; + time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + time_offset += (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE); + time_adj = - time_adj; + } else if (time_offset > 0) { + ltemp = ((time_offset-1) >> (SHIFT_KG + time_constant)) + 1; + time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + time_offset -= (time_adj * HZ) >> (SHIFT_SCALE - SHIFT_UPDATE); + } else { + time_adj = 0; + } + + time_adj += (time_freq >> (SHIFT_KF + SHIFT_HZ - SHIFT_SCALE)) + + FINETUNE; + + /* Handle the leap second stuff */ + switch (time_status) { + case TIME_INS: + /* ugly divide should be replaced */ + if (xtime.tv_sec % 86400 == 0) { + xtime.tv_sec--; /* !! */ + time_status = TIME_OOP; + printk("Clock: inserting leap second 23:59:60 GMT\n"); + } + break; + + case TIME_DEL: + /* ugly divide should be replaced */ + if (xtime.tv_sec % 86400 == 86399) { + xtime.tv_sec++; + time_status = TIME_OK; + printk("Clock: deleting leap second 23:59:59 GMT\n"); + } + break; + + case TIME_OOP: + time_status = TIME_OK; + break; + } + if (xtime.tv_sec > last_rtc_update + 660) + if (set_rtc_mmss(xtime.tv_sec) == 0) + last_rtc_update = xtime.tv_sec; + else + last_rtc_update = xtime.tv_sec - 600; /* do it again in one min */ +} + +/* + * disregard lost ticks for now.. We don't care enough. + */ +static void timer_bh(void * unused) +{ + unsigned long mask; + struct timer_struct *tp; + struct timer_list * timer; + + cli(); + while ((timer = timer_head.next) != &timer_head && timer->expires < jiffies) { + void (*fn)(unsigned long) = timer->function; + unsigned long data = timer->data; + timer->next->prev = timer->prev; + timer->prev->next = timer->next; + timer->next = timer->prev = NULL; + sti(); + fn(data); + cli(); + } + sti(); + + for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) { + if (mask > timer_active) + break; + if (!(mask & timer_active)) + continue; + if (tp->expires > jiffies) + continue; + timer_active &= ~mask; + tp->fn(); + sti(); + } +} + +void tqueue_bh(void * unused) +{ + run_task_queue(&tq_timer); +} + +void immediate_bh(void * unused) +{ + run_task_queue(&tq_immediate); +} + +/* + * The int argument is really a (struct pt_regs *), in case the + * interrupt wants to know from where it was called. The timer + * irq uses this to decide if it should update the user or system + * times. + */ +static void do_timer(struct pt_regs * regs) +{ + unsigned long mask; + struct timer_struct *tp; + + long ltemp, psecs; + + /* Advance the phase, once it gets to one microsecond, then + * advance the tick more. + */ + time_phase += time_adj; + if (time_phase < -FINEUSEC) { + ltemp = -time_phase >> SHIFT_SCALE; + time_phase += ltemp << SHIFT_SCALE; + xtime.tv_usec += tick + time_adjust_step - ltemp; + } + else if (time_phase > FINEUSEC) { + ltemp = time_phase >> SHIFT_SCALE; + time_phase -= ltemp << SHIFT_SCALE; + xtime.tv_usec += tick + time_adjust_step + ltemp; + } else + xtime.tv_usec += tick + time_adjust_step; + + if (time_adjust) + { + /* We are doing an adjtime thing. + * + * Modify the value of the tick for next time. + * Note that a positive delta means we want the clock + * to run fast. This means that the tick should be bigger + * + * Limit the amount of the step for *next* tick to be + * in the range -tickadj .. +tickadj + */ + if (time_adjust > tickadj) + time_adjust_step = tickadj; + else if (time_adjust < -tickadj) + time_adjust_step = -tickadj; + else + time_adjust_step = time_adjust; + + /* Reduce by this step the amount of time left */ + time_adjust -= time_adjust_step; + } + else + time_adjust_step = 0; + + if (xtime.tv_usec >= 1000000) { + xtime.tv_usec -= 1000000; + xtime.tv_sec++; + second_overflow(); + } + + jiffies++; + calc_load(); + if ((VM_MASK & regs->eflags) || (3 & regs->cs)) { + current->utime++; + if (current != task[0]) { + if (current->priority < 15) + kstat.cpu_nice++; + else + kstat.cpu_user++; + } + /* Update ITIMER_VIRT for current task if not in a system call */ + if (current->it_virt_value && !(--current->it_virt_value)) { + current->it_virt_value = current->it_virt_incr; + send_sig(SIGVTALRM,current,1); + } + } else { + current->stime++; + if(current != task[0]) + kstat.cpu_system++; +#ifdef CONFIG_PROFILE + if (prof_buffer && current != task[0]) { + unsigned long eip = regs->eip; + eip >>= 2; + if (eip < prof_len) + prof_buffer[eip]++; + } +#endif + } + /* + * check the cpu time limit on the process. + */ + if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) && + (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max)) + send_sig(SIGKILL, current, 1); + if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) && + (((current->stime + current->utime) % HZ) == 0)) { + psecs = (current->stime + current->utime) / HZ; + /* send when equal */ + if (psecs == current->rlim[RLIMIT_CPU].rlim_cur) + send_sig(SIGXCPU, current, 1); + /* and every five seconds thereafter. */ + else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) && + ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0) + send_sig(SIGXCPU, current, 1); + } + + if (current != task[0] && 0 > --current->counter) { + current->counter = 0; + need_resched = 1; + } + /* Update ITIMER_PROF for the current task */ + if (current->it_prof_value && !(--current->it_prof_value)) { + current->it_prof_value = current->it_prof_incr; + send_sig(SIGPROF,current,1); + } + for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) { + if (mask > timer_active) + break; + if (!(mask & timer_active)) + continue; + if (tp->expires > jiffies) + continue; + mark_bh(TIMER_BH); + } + cli(); + itimer_ticks++; + if (itimer_ticks > itimer_next) + need_resched = 1; + if (timer_head.next->expires < jiffies) + mark_bh(TIMER_BH); + if (tq_timer != &tq_last) + mark_bh(TQUEUE_BH); + sti(); +} + +asmlinkage int sys_alarm(long seconds) +{ + struct itimerval it_new, it_old; + + it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; + it_new.it_value.tv_sec = seconds; + it_new.it_value.tv_usec = 0; + _setitimer(ITIMER_REAL, &it_new, &it_old); + return(it_old.it_value.tv_sec + (it_old.it_value.tv_usec / 1000000)); +} + +asmlinkage int sys_getpid(void) +{ + return current->pid; +} + +asmlinkage int sys_getppid(void) +{ + return current->p_opptr->pid; +} + +asmlinkage int sys_getuid(void) +{ + return current->uid; +} + +asmlinkage int sys_geteuid(void) +{ + return current->euid; +} + +asmlinkage int sys_getgid(void) +{ + return current->gid; +} + +asmlinkage int sys_getegid(void) +{ + return current->egid; +} + +asmlinkage int sys_nice(long increment) +{ + int newprio; + + if (increment < 0 && !suser()) + return -EPERM; + newprio = current->priority - increment; + if (newprio < 1) + newprio = 1; + if (newprio > 35) + newprio = 35; + current->priority = newprio; + return 0; +} + +static void show_task(int nr,struct task_struct * p) +{ + unsigned long free; + static char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" }; + + printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr); + if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *)) + printk(stat_nam[p->state]); + else + printk(" "); + if (p == current) + printk(" current "); + else + printk(" %08lX ", ((unsigned long *)p->tss.esp)[3]); + for (free = 1; free < 1024 ; free++) { + if (((unsigned long *)p->kernel_stack_page)[free]) + break; + } + printk("%5lu %5d %6d ", free << 2, p->pid, p->p_pptr->pid); + if (p->p_cptr) + printk("%5d ", p->p_cptr->pid); + else + printk(" "); + if (p->p_ysptr) + printk("%7d", p->p_ysptr->pid); + else + printk(" "); + if (p->p_osptr) + printk(" %5d\n", p->p_osptr->pid); + else + printk("\n"); +} + +void show_state(void) +{ + int i; + + printk(" free sibling\n"); + printk(" task PC stack pid father child younger older\n"); + for (i=0 ; i<NR_TASKS ; i++) + if (task[i]) + show_task(i,task[i]); +} + +void sched_init(void) +{ + int i; + struct desc_struct * p; + + bh_base[TIMER_BH].routine = timer_bh; + bh_base[TQUEUE_BH].routine = tqueue_bh; + bh_base[IMMEDIATE_BH].routine = immediate_bh; + if (sizeof(struct sigaction) != 16) + panic("Struct sigaction MUST be 16 bytes"); + set_tss_desc(gdt+FIRST_TSS_ENTRY,&init_task.tss); + set_ldt_desc(gdt+FIRST_LDT_ENTRY,&default_ldt,1); + set_system_gate(0x80,&system_call); + p = gdt+2+FIRST_TSS_ENTRY; + for(i=1 ; i<NR_TASKS ; i++) { + task[i] = NULL; + p->a=p->b=0; + p++; + p->a=p->b=0; + p++; + } +/* Clear NT, so that we won't have troubles with that later on */ + __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); + load_TR(0); + load_ldt(0); + outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(LATCH & 0xff , 0x40); /* LSB */ + outb(LATCH >> 8 , 0x40); /* MSB */ + if (request_irq(TIMER_IRQ,(void (*)(int)) do_timer, 0, "timer") != 0) + panic("Could not allocate timer IRQ!"); +} diff --git a/kernel/signal.c b/kernel/signal.c new file mode 100644 index 000000000..df7324294 --- /dev/null +++ b/kernel/signal.c @@ -0,0 +1,407 @@ +/* + * linux/kernel/signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/ptrace.h> +#include <linux/unistd.h> + +#include <asm/segment.h> + +#define _S(nr) (1<<((nr)-1)) + +#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) + +asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs); + +asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset) +{ + sigset_t new_set, old_set = current->blocked; + int error; + + if (set) { + error = verify_area(VERIFY_READ, set, sizeof(sigset_t)); + if (error) + return error; + new_set = get_fs_long((unsigned long *) set) & _BLOCKABLE; + switch (how) { + case SIG_BLOCK: + current->blocked |= new_set; + break; + case SIG_UNBLOCK: + current->blocked &= ~new_set; + break; + case SIG_SETMASK: + current->blocked = new_set; + break; + default: + return -EINVAL; + } + } + if (oset) { + error = verify_area(VERIFY_WRITE, oset, sizeof(sigset_t)); + if (error) + return error; + put_fs_long(old_set, (unsigned long *) oset); + } + return 0; +} + +asmlinkage int sys_sgetmask(void) +{ + return current->blocked; +} + +asmlinkage int sys_ssetmask(int newmask) +{ + int old=current->blocked; + + current->blocked = newmask & _BLOCKABLE; + return old; +} + +asmlinkage int sys_sigpending(sigset_t *set) +{ + int error; + /* fill in "set" with signals pending but blocked. */ + error = verify_area(VERIFY_WRITE, set, 4); + if (!error) + put_fs_long(current->blocked & current->signal, (unsigned long *)set); + return error; +} + +/* + * atomically swap in the new signal mask, and wait for a signal. + */ +asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, unsigned long set) +{ + unsigned long mask; + struct pt_regs * regs = (struct pt_regs *) &restart; + + mask = current->blocked; + current->blocked = set & _BLOCKABLE; + regs->eax = -EINTR; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(mask,regs)) + return -EINTR; + } +} + +/* + * POSIX 3.3.1.3: + * "Setting a signal action to SIG_IGN for a signal that is pending + * shall cause the pending signal to be discarded, whether or not + * it is blocked" (but SIGCHLD is unspecified: linux leaves it alone). + * + * "Setting a signal action to SIG_DFL for a signal that is pending + * and whose default action is to ignore the signal (for example, + * SIGCHLD), shall cause the pending signal to be discarded, whether + * or not it is blocked" + * + * Note the silly behaviour of SIGCHLD: SIG_IGN means that the signal + * isn't actually ignored, but does automatic child reaping, while + * SIG_DFL is explicitly said by POSIX to force the signal to be ignored.. + */ +static void check_pending(int signum) +{ + struct sigaction *p; + + p = signum - 1 + current->sigaction; + if (p->sa_handler == SIG_IGN) { + if (signum == SIGCHLD) + return; + current->signal &= ~_S(signum); + return; + } + if (p->sa_handler == SIG_DFL) { + if (signum != SIGCONT && signum != SIGCHLD && signum != SIGWINCH) + return; + current->signal &= ~_S(signum); + return; + } +} + +asmlinkage int sys_signal(int signum, unsigned long handler) +{ + struct sigaction tmp; + + if (signum<1 || signum>32) + return -EINVAL; + if (signum==SIGKILL || signum==SIGSTOP) + return -EINVAL; + if (handler >= TASK_SIZE) + return -EFAULT; + tmp.sa_handler = (void (*)(int)) handler; + tmp.sa_mask = 0; + tmp.sa_flags = SA_ONESHOT | SA_NOMASK; + tmp.sa_restorer = NULL; + handler = (long) current->sigaction[signum-1].sa_handler; + current->sigaction[signum-1] = tmp; + check_pending(signum); + return handler; +} + +asmlinkage int sys_sigaction(int signum, const struct sigaction * action, + struct sigaction * oldaction) +{ + struct sigaction new_sa, *p; + + if (signum<1 || signum>32) + return -EINVAL; + if (signum==SIGKILL || signum==SIGSTOP) + return -EINVAL; + p = signum - 1 + current->sigaction; + if (action) { + int err = verify_area(VERIFY_READ, action, sizeof(*action)); + if (err) + return err; + memcpy_fromfs(&new_sa, action, sizeof(struct sigaction)); + if (new_sa.sa_flags & SA_NOMASK) + new_sa.sa_mask = 0; + else { + new_sa.sa_mask |= _S(signum); + new_sa.sa_mask &= _BLOCKABLE; + } + if (TASK_SIZE <= (unsigned long) new_sa.sa_handler) + return -EFAULT; + } + if (oldaction) { + int err = verify_area(VERIFY_WRITE, oldaction, sizeof(*oldaction)); + if (err) + return err; + memcpy_tofs(oldaction, p, sizeof(struct sigaction)); + } + if (action) { + *p = new_sa; + check_pending(signum); + } + return 0; +} + +asmlinkage int sys_waitpid(pid_t pid,unsigned long * stat_addr, int options); + +/* + * This sets regs->esp even though we don't actually use sigstacks yet.. + */ +asmlinkage int sys_sigreturn(unsigned long __unused) +{ +#define COPY(x) regs->x = context.x +#define COPY_SEG(x) \ +if ((context.x & 0xfffc) && (context.x & 3) != 3) goto badframe; COPY(x); +#define COPY_SEG_STRICT(x) \ +if (!(context.x & 0xfffc) || (context.x & 3) != 3) goto badframe; COPY(x); + struct sigcontext_struct context; + struct pt_regs * regs; + + regs = (struct pt_regs *) &__unused; + if (verify_area(VERIFY_READ, (void *) regs->esp, sizeof(context))) + goto badframe; + memcpy_fromfs(&context,(void *) regs->esp, sizeof(context)); + current->blocked = context.oldmask & _BLOCKABLE; + COPY_SEG(ds); + COPY_SEG(es); + COPY_SEG(fs); + COPY_SEG(gs); + COPY_SEG_STRICT(ss); + COPY_SEG_STRICT(cs); + COPY(eip); + COPY(ecx); COPY(edx); + COPY(ebx); + COPY(esp); COPY(ebp); + COPY(edi); COPY(esi); + regs->eflags &= ~0x40DD5; + regs->eflags |= context.eflags & 0x40DD5; + regs->orig_eax = -1; /* disable syscall checks */ + return context.eax; +badframe: + do_exit(SIGSEGV); +} + +/* + * Set up a signal frame... Make the stack look the way iBCS2 expects + * it to look. + */ +static void setup_frame(struct sigaction * sa, unsigned long ** fp, unsigned long eip, + struct pt_regs * regs, int signr, unsigned long oldmask) +{ + unsigned long * frame; + +#define __CODE ((unsigned long)(frame+24)) +#define CODE(x) ((unsigned long *) ((x)+__CODE)) + frame = *fp; + if (regs->ss != USER_DS) + frame = (unsigned long *) sa->sa_restorer; + frame -= 32; + if (verify_area(VERIFY_WRITE,frame,32*4)) + do_exit(SIGSEGV); +/* set up the "normal" stack seen by the signal handler (iBCS2) */ + put_fs_long(__CODE,frame); + if (current->exec_domain && current->exec_domain->signal_invmap) + put_fs_long(current->exec_domain->signal_invmap[signr], frame+1); + else + put_fs_long(signr, frame+1); + put_fs_long(regs->gs, frame+2); + put_fs_long(regs->fs, frame+3); + put_fs_long(regs->es, frame+4); + put_fs_long(regs->ds, frame+5); + put_fs_long(regs->edi, frame+6); + put_fs_long(regs->esi, frame+7); + put_fs_long(regs->ebp, frame+8); + put_fs_long((long)*fp, frame+9); + put_fs_long(regs->ebx, frame+10); + put_fs_long(regs->edx, frame+11); + put_fs_long(regs->ecx, frame+12); + put_fs_long(regs->eax, frame+13); + put_fs_long(current->tss.trap_no, frame+14); + put_fs_long(current->tss.error_code, frame+15); + put_fs_long(eip, frame+16); + put_fs_long(regs->cs, frame+17); + put_fs_long(regs->eflags, frame+18); + put_fs_long(regs->esp, frame+19); + put_fs_long(regs->ss, frame+20); + put_fs_long(0,frame+21); /* 387 state pointer - not implemented*/ +/* non-iBCS2 extensions.. */ + put_fs_long(oldmask, frame+22); + put_fs_long(current->tss.cr2, frame+23); +/* set up the return code... */ + put_fs_long(0x0000b858, CODE(0)); /* popl %eax ; movl $,%eax */ + put_fs_long(0x80cd0000, CODE(4)); /* int $0x80 */ + put_fs_long(__NR_sigreturn, CODE(2)); + *fp = frame; +#undef __CODE +#undef CODE +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + * + * Note that we go through the signals twice: once to check the signals that + * the kernel can handle, and then we build all the user-level signal handling + * stack-frames in one go after that. + */ +asmlinkage int do_signal(unsigned long oldmask, struct pt_regs * regs) +{ + unsigned long mask = ~current->blocked; + unsigned long handler_signal = 0; + unsigned long *frame = NULL; + unsigned long eip = 0; + unsigned long signr; + struct sigaction * sa; + + while ((signr = current->signal & mask)) { + __asm__("bsf %2,%1\n\t" + "btrl %1,%0" + :"=m" (current->signal),"=r" (signr) + :"1" (signr)); + sa = current->sigaction + signr; + signr++; + if ((current->flags & PF_PTRACED) && signr != SIGKILL) { + current->exit_code = signr; + current->state = TASK_STOPPED; + notify_parent(current); + schedule(); + if (!(signr = current->exit_code)) + continue; + current->exit_code = 0; + if (signr == SIGSTOP) + continue; + if (_S(signr) & current->blocked) { + current->signal |= _S(signr); + continue; + } + sa = current->sigaction + signr - 1; + } + if (sa->sa_handler == SIG_IGN) { + if (signr != SIGCHLD) + continue; + /* check for SIGCHLD: it's special */ + while (sys_waitpid(-1,NULL,WNOHANG) > 0) + /* nothing */; + continue; + } + if (sa->sa_handler == SIG_DFL) { + if (current->pid == 1) + continue; + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: + continue; + + case SIGSTOP: case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (current->flags & PF_PTRACED) + continue; + current->state = TASK_STOPPED; + current->exit_code = signr; + if (!(current->p_pptr->sigaction[SIGCHLD-1].sa_flags & + SA_NOCLDSTOP)) + notify_parent(current); + schedule(); + continue; + + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGIOT: case SIGFPE: case SIGSEGV: + if (current->binfmt && current->binfmt->core_dump) { + if (current->binfmt->core_dump(signr, regs)) + signr |= 0x80; + } + /* fall through */ + default: + current->signal |= _S(signr & 0x7f); + do_exit(signr); + } + } + /* + * OK, we're invoking a handler + */ + if (regs->orig_eax >= 0) { + if (regs->eax == -ERESTARTNOHAND || + (regs->eax == -ERESTARTSYS && !(sa->sa_flags & SA_RESTART))) + regs->eax = -EINTR; + } + handler_signal |= 1 << (signr-1); + mask &= ~sa->sa_mask; + } + if (regs->orig_eax >= 0 && + (regs->eax == -ERESTARTNOHAND || + regs->eax == -ERESTARTSYS || + regs->eax == -ERESTARTNOINTR)) { + regs->eax = regs->orig_eax; + regs->eip -= 2; + } + if (!handler_signal) /* no handler will be called - return 0 */ + return 0; + eip = regs->eip; + frame = (unsigned long *) regs->esp; + signr = 1; + sa = current->sigaction; + for (mask = 1 ; mask ; sa++,signr++,mask += mask) { + if (mask > handler_signal) + break; + if (!(mask & handler_signal)) + continue; + setup_frame(sa,&frame,eip,regs,signr,oldmask); + eip = (unsigned long) sa->sa_handler; + if (sa->sa_flags & SA_ONESHOT) + sa->sa_handler = NULL; +/* force a supervisor-mode page-in of the signal handler to reduce races */ + __asm__("testb $0,%%fs:%0": :"m" (*(char *) eip)); + regs->cs = USER_CS; regs->ss = USER_DS; + regs->ds = USER_DS; regs->es = USER_DS; + regs->gs = USER_DS; regs->fs = USER_DS; + current->blocked |= sa->sa_mask; + oldmask |= sa->sa_mask; + } + regs->esp = (unsigned long) frame; + regs->eip = eip; /* "return" to the first handler */ + current->tss.trap_no = current->tss.error_code = 0; + return 1; +} diff --git a/kernel/splx.c b/kernel/splx.c new file mode 100644 index 000000000..c1b292ec9 --- /dev/null +++ b/kernel/splx.c @@ -0,0 +1,27 @@ +/* + * splx.c - SYSV DDI/DKI ipl manipulation functions + * + * Internally, many unices use a range of different interrupt + * privilege levels, ie from "allow all interrupts" (7) to + * "allow no interrupts." (0) under SYSV. + * + * This a simple splx() function behaves as the SYSV DDI/DKI function does, + * although since Linux only implements the equivalent of level 0 (cli) and + * level 7 (sti), this implementation only implements those levels. + * + * Also, unlike the current Linux routines, splx() also returns the + * old privilege level so that it can be restored. + */ + +#include <asm/system.h> + +int splx (int new_level) { + register int old_level, tmp; + save_flags(tmp); + old_level = (tmp & 0x200) ? 7 : 0; + if (new_level) + sti(); + else + cli(); + return old_level; +} diff --git a/kernel/sys.c b/kernel/sys.c new file mode 100644 index 000000000..1ce3ee387 --- /dev/null +++ b/kernel/sys.c @@ -0,0 +1,787 @@ +/* + * linux/kernel/sys.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/times.h> +#include <linux/utsname.h> +#include <linux/param.h> +#include <linux/resource.h> +#include <linux/signal.h> +#include <linux/string.h> +#include <linux/ptrace.h> +#include <linux/stat.h> +#include <linux/mman.h> + +#include <asm/segment.h> +#include <asm/io.h> + +/* + * this indicates whether you can reboot with ctrl-alt-del: the default is yes + */ +static int C_A_D = 1; + +extern void adjust_clock(void); + +#define PZERO 15 + +asmlinkage int sys_ni_syscall(void) +{ + return -EINVAL; +} + +asmlinkage int sys_idle(void) +{ + int i; + + if (current->pid != 0) + return -EPERM; + + /* Map out the low memory: it's no longer needed */ + for (i = 0 ; i < 768 ; i++) + swapper_pg_dir[i] = 0; + + /* endless idle loop with no priority at all */ + current->counter = -100; + for (;;) { + if (hlt_works_ok && !need_resched) + __asm__("hlt"); + schedule(); + } +} + +static int proc_sel(struct task_struct *p, int which, int who) +{ + switch (which) { + case PRIO_PROCESS: + if (!who && p == current) + return 1; + return(p->pid == who); + case PRIO_PGRP: + if (!who) + who = current->pgrp; + return(p->pgrp == who); + case PRIO_USER: + if (!who) + who = current->uid; + return(p->uid == who); + } + return 0; +} + +asmlinkage int sys_setpriority(int which, int who, int niceval) +{ + struct task_struct **p; + int error = ESRCH; + int priority; + + if (which > 2 || which < 0) + return -EINVAL; + + if ((priority = PZERO - niceval) <= 0) + priority = 1; + + for(p = &LAST_TASK; p > &FIRST_TASK; --p) { + if (!*p || !proc_sel(*p, which, who)) + continue; + if ((*p)->uid != current->euid && + (*p)->uid != current->uid && !suser()) { + error = EPERM; + continue; + } + if (error == ESRCH) + error = 0; + if (priority > (*p)->priority && !suser()) + error = EACCES; + else + (*p)->priority = priority; + } + return -error; +} + +asmlinkage int sys_getpriority(int which, int who) +{ + struct task_struct **p; + int max_prio = 0; + + if (which > 2 || which < 0) + return -EINVAL; + + for(p = &LAST_TASK; p > &FIRST_TASK; --p) { + if (!*p || !proc_sel(*p, which, who)) + continue; + if ((*p)->priority > max_prio) + max_prio = (*p)->priority; + } + return(max_prio ? max_prio : -ESRCH); +} + +asmlinkage int sys_profil(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_ftime(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_break(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_stty(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_gtty(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_prof(void) +{ + return -ENOSYS; +} + +extern void hard_reset_now(void); + +/* + * Reboot system call: for obvious reasons only root may call it, + * and even root needs to set up some magic numbers in the registers + * so that some mistake won't make this reboot the whole machine. + * You can also set the meaning of the ctrl-alt-del-key here. + * + * reboot doesn't sync: do that yourself before calling this. + */ +asmlinkage int sys_reboot(int magic, int magic_too, int flag) +{ + if (!suser()) + return -EPERM; + if (magic != 0xfee1dead || magic_too != 672274793) + return -EINVAL; + if (flag == 0x01234567) + hard_reset_now(); + else if (flag == 0x89ABCDEF) + C_A_D = 1; + else if (!flag) + C_A_D = 0; + else + return -EINVAL; + return (0); +} + +/* + * This function gets called by ctrl-alt-del - ie the keyboard interrupt. + * As it's called within an interrupt, it may NOT sync: the only choice + * is whether to reboot at once, or just ignore the ctrl-alt-del. + */ +void ctrl_alt_del(void) +{ + if (C_A_D) + hard_reset_now(); + else + send_sig(SIGINT,task[1],1); +} + + +/* + * Unprivileged users may change the real gid to the effective gid + * or vice versa. (BSD-style) + * + * If you set the real gid at all, or set the effective gid to a value not + * equal to the real gid, then the saved gid is set to the new effective gid. + * + * This makes it possible for a setgid program to completely drop its + * privileges, which is often a useful assertion to make when you are doing + * a security audit over a program. + * + * The general idea is that a program which uses just setregid() will be + * 100% compatible with BSD. A program which uses just setgid() will be + * 100% compatible with POSIX w/ Saved ID's. + */ +asmlinkage int sys_setregid(gid_t rgid, gid_t egid) +{ + int old_rgid = current->gid; + + if (rgid != (gid_t) -1) { + if ((old_rgid == rgid) || + (current->egid==rgid) || + suser()) + current->gid = rgid; + else + return(-EPERM); + } + if (egid != (gid_t) -1) { + if ((old_rgid == egid) || + (current->egid == egid) || + (current->sgid == egid) || + suser()) + current->egid = egid; + else { + current->gid = old_rgid; + return(-EPERM); + } + } + if (rgid != (gid_t) -1 || + (egid != (gid_t) -1 && egid != old_rgid)) + current->sgid = current->egid; + current->fsgid = current->egid; + return 0; +} + +/* + * setgid() is implemented like SysV w/ SAVED_IDS + */ +asmlinkage int sys_setgid(gid_t gid) +{ + if (suser()) + current->gid = current->egid = current->sgid = current->fsgid = gid; + else if ((gid == current->gid) || (gid == current->sgid)) + current->egid = current->fsgid = gid; + else + return -EPERM; + return 0; +} + +asmlinkage int sys_acct(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_phys(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_lock(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_mpx(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_ulimit(void) +{ + return -ENOSYS; +} + +asmlinkage int sys_old_syscall(void) +{ + return -ENOSYS; +} + +/* + * Unprivileged users may change the real uid to the effective uid + * or vice versa. (BSD-style) + * + * If you set the real uid at all, or set the effective uid to a value not + * equal to the real uid, then the saved uid is set to the new effective uid. + * + * This makes it possible for a setuid program to completely drop its + * privileges, which is often a useful assertion to make when you are doing + * a security audit over a program. + * + * The general idea is that a program which uses just setreuid() will be + * 100% compatible with BSD. A program which uses just setuid() will be + * 100% compatible with POSIX w/ Saved ID's. + */ +asmlinkage int sys_setreuid(uid_t ruid, uid_t euid) +{ + int old_ruid = current->uid; + + if (ruid != (uid_t) -1) { + if ((old_ruid == ruid) || + (current->euid==ruid) || + suser()) + current->uid = ruid; + else + return(-EPERM); + } + if (euid != (uid_t) -1) { + if ((old_ruid == euid) || + (current->euid == euid) || + (current->suid == euid) || + suser()) + current->euid = euid; + else { + current->uid = old_ruid; + return(-EPERM); + } + } + if (ruid != (uid_t) -1 || + (euid != (uid_t) -1 && euid != old_ruid)) + current->suid = current->euid; + current->fsuid = current->euid; + return 0; +} + +/* + * setuid() is implemented like SysV w/ SAVED_IDS + * + * Note that SAVED_ID's is deficient in that a setuid root program + * like sendmail, for example, cannot set its uid to be a normal + * user and then switch back, because if you're root, setuid() sets + * the saved uid too. If you don't like this, blame the bright people + * in the POSIX committee and/or USG. Note that the BSD-style setreuid() + * will allow a root program to temporarily drop privileges and be able to + * regain them by swapping the real and effective uid. + */ +asmlinkage int sys_setuid(uid_t uid) +{ + if (suser()) + current->uid = current->euid = current->suid = current->fsuid = uid; + else if ((uid == current->uid) || (uid == current->suid)) + current->fsuid = current->euid = uid; + else + return -EPERM; + return(0); +} + +/* + * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This + * is used for "access()" and for the NFS daemon (letting nfsd stay at + * whatever uid it wants to). It normally shadows "euid", except when + * explicitly set by setfsuid() or for access.. + */ +asmlinkage int sys_setfsuid(uid_t uid) +{ + int old_fsuid = current->fsuid; + + if (uid == current->uid || uid == current->euid || + uid == current->suid || uid == current->fsuid || suser()) + current->fsuid = uid; + return old_fsuid; +} + +/* + * Samma på svenska.. + */ +asmlinkage int sys_setfsgid(gid_t gid) +{ + int old_fsgid = current->fsgid; + + if (gid == current->gid || gid == current->egid || + gid == current->sgid || gid == current->fsgid || suser()) + current->fsgid = gid; + return old_fsgid; +} + +asmlinkage int sys_times(struct tms * tbuf) +{ + if (tbuf) { + int error = verify_area(VERIFY_WRITE,tbuf,sizeof *tbuf); + if (error) + return error; + put_fs_long(current->utime,(unsigned long *)&tbuf->tms_utime); + put_fs_long(current->stime,(unsigned long *)&tbuf->tms_stime); + put_fs_long(current->cutime,(unsigned long *)&tbuf->tms_cutime); + put_fs_long(current->cstime,(unsigned long *)&tbuf->tms_cstime); + } + return jiffies; +} + +asmlinkage int sys_brk(unsigned long brk) +{ + int freepages; + unsigned long rlim; + unsigned long newbrk, oldbrk; + struct vm_area_struct * vma; + + if (brk < current->mm->end_code) + return current->mm->brk; + newbrk = PAGE_ALIGN(brk); + oldbrk = PAGE_ALIGN(current->mm->brk); + if (oldbrk == newbrk) + return current->mm->brk = brk; + + /* + * Always allow shrinking brk + */ + if (brk <= current->mm->brk) { + current->mm->brk = brk; + do_munmap(newbrk, oldbrk-newbrk); + return brk; + } + /* + * Check against rlimit and stack.. + */ + rlim = current->rlim[RLIMIT_DATA].rlim_cur; + if (rlim >= RLIM_INFINITY) + rlim = ~0; + if (brk - current->mm->end_code > rlim || + brk >= current->mm->start_stack - 16384) + return current->mm->brk; + /* + * Check against existing mmap mappings. + */ + for (vma = current->mm->mmap; vma; vma = vma->vm_next) { + if (newbrk <= vma->vm_start) + break; + if (oldbrk < vma->vm_end) + return current->mm->brk; + } + /* + * stupid algorithm to decide if we have enough memory: while + * simple, it hopefully works in most obvious cases.. Easy to + * fool it, but this should catch most mistakes. + */ + freepages = buffermem >> 12; + freepages += nr_free_pages; + freepages += nr_swap_pages; + freepages -= (high_memory - 0x100000) >> 16; + freepages -= (newbrk-oldbrk) >> 12; + if (freepages < 0) + return current->mm->brk; +#if 0 + freepages += current->mm->rss; + freepages -= oldbrk >> 12; + if (freepages < 0) + return current->mm->brk; +#endif + /* + * Ok, we have probably got enough memory - let it rip. + */ + current->mm->brk = brk; + do_mmap(NULL, oldbrk, newbrk-oldbrk, + PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE, 0); + return brk; +} + +/* + * This needs some heave checking ... + * I just haven't get the stomach for it. I also don't fully + * understand sessions/pgrp etc. Let somebody who does explain it. + * + * OK, I think I have the protection semantics right.... this is really + * only important on a multi-user system anyway, to make sure one user + * can't send a signal to a process owned by another. -TYT, 12/12/91 + * + * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. + * LBT 04.03.94 + */ +asmlinkage int sys_setpgid(pid_t pid, pid_t pgid) +{ + struct task_struct * p; + + if (!pid) + pid = current->pid; + if (!pgid) + pgid = pid; + if (pgid < 0) + return -EINVAL; + for_each_task(p) { + if (p->pid == pid) + goto found_task; + } + return -ESRCH; + +found_task: + if (p->p_pptr == current || p->p_opptr == current) { + if (p->session != current->session) + return -EPERM; + if (p->did_exec) + return -EACCES; + } else if (p != current) + return -ESRCH; + if (p->leader) + return -EPERM; + if (pgid != pid) { + struct task_struct * tmp; + for_each_task (tmp) { + if (tmp->pgrp == pgid && + tmp->session == current->session) + goto ok_pgid; + } + return -EPERM; + } + +ok_pgid: + p->pgrp = pgid; + return 0; +} + +asmlinkage int sys_getpgid(pid_t pid) +{ + struct task_struct * p; + + if (!pid) + return current->pgrp; + for_each_task(p) { + if (p->pid == pid) + return p->pgrp; + } + return -ESRCH; +} + +asmlinkage int sys_getpgrp(void) +{ + return current->pgrp; +} + +asmlinkage int sys_setsid(void) +{ + if (current->leader) + return -EPERM; + current->leader = 1; + current->session = current->pgrp = current->pid; + current->tty = NULL; + return current->pgrp; +} + +/* + * Supplementary group ID's + */ +asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist) +{ + int i; + + if (gidsetsize) { + i = verify_area(VERIFY_WRITE, grouplist, sizeof(gid_t) * gidsetsize); + if (i) + return i; + } + for (i = 0 ; (i < NGROUPS) && (current->groups[i] != NOGROUP) ; i++) { + if (!gidsetsize) + continue; + if (i >= gidsetsize) + break; + put_fs_word(current->groups[i], (short *) grouplist); + grouplist++; + } + return(i); +} + +asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist) +{ + int i; + + if (!suser()) + return -EPERM; + if (gidsetsize > NGROUPS) + return -EINVAL; + for (i = 0; i < gidsetsize; i++, grouplist++) { + current->groups[i] = get_fs_word((unsigned short *) grouplist); + } + if (i < NGROUPS) + current->groups[i] = NOGROUP; + return 0; +} + +int in_group_p(gid_t grp) +{ + int i; + + if (grp == current->fsgid) + return 1; + + for (i = 0; i < NGROUPS; i++) { + if (current->groups[i] == NOGROUP) + break; + if (current->groups[i] == grp) + return 1; + } + return 0; +} + +asmlinkage int sys_newuname(struct new_utsname * name) +{ + int error; + + if (!name) + return -EFAULT; + error = verify_area(VERIFY_WRITE, name, sizeof *name); + if (!error) + memcpy_tofs(name,&system_utsname,sizeof *name); + return error; +} + +asmlinkage int sys_uname(struct old_utsname * name) +{ + int error; + if (!name) + return -EFAULT; + error = verify_area(VERIFY_WRITE, name,sizeof *name); + if (error) + return error; + memcpy_tofs(&name->sysname,&system_utsname.sysname, + sizeof (system_utsname.sysname)); + memcpy_tofs(&name->nodename,&system_utsname.nodename, + sizeof (system_utsname.nodename)); + memcpy_tofs(&name->release,&system_utsname.release, + sizeof (system_utsname.release)); + memcpy_tofs(&name->version,&system_utsname.version, + sizeof (system_utsname.version)); + memcpy_tofs(&name->machine,&system_utsname.machine, + sizeof (system_utsname.machine)); + return 0; +} + +asmlinkage int sys_olduname(struct oldold_utsname * name) +{ + int error; + if (!name) + return -EFAULT; + error = verify_area(VERIFY_WRITE, name,sizeof *name); + if (error) + return error; + memcpy_tofs(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + put_fs_byte(0,name->sysname+__OLD_UTS_LEN); + memcpy_tofs(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + put_fs_byte(0,name->nodename+__OLD_UTS_LEN); + memcpy_tofs(&name->release,&system_utsname.release,__OLD_UTS_LEN); + put_fs_byte(0,name->release+__OLD_UTS_LEN); + memcpy_tofs(&name->version,&system_utsname.version,__OLD_UTS_LEN); + put_fs_byte(0,name->version+__OLD_UTS_LEN); + memcpy_tofs(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + put_fs_byte(0,name->machine+__OLD_UTS_LEN); + return 0; +} + +/* + * Only sethostname; gethostname can be implemented by calling uname() + */ +asmlinkage int sys_sethostname(char *name, int len) +{ + int i; + + if (!suser()) + return -EPERM; + if (len > __NEW_UTS_LEN) + return -EINVAL; + for (i=0; i < len; i++) { + if ((system_utsname.nodename[i] = get_fs_byte(name+i)) == 0) + return 0; + } + system_utsname.nodename[i] = 0; + return 0; +} + +/* + * Only setdomainname; getdomainname can be implemented by calling + * uname() + */ +asmlinkage int sys_setdomainname(char *name, int len) +{ + int i; + + if (!suser()) + return -EPERM; + if (len > __NEW_UTS_LEN) + return -EINVAL; + for (i=0; i < len; i++) { + if ((system_utsname.domainname[i] = get_fs_byte(name+i)) == 0) + return 0; + } + system_utsname.domainname[i] = 0; + return 0; +} + +asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim) +{ + int error; + + if (resource >= RLIM_NLIMITS) + return -EINVAL; + error = verify_area(VERIFY_WRITE,rlim,sizeof *rlim); + if (error) + return error; + put_fs_long(current->rlim[resource].rlim_cur, + (unsigned long *) rlim); + put_fs_long(current->rlim[resource].rlim_max, + ((unsigned long *) rlim)+1); + return 0; +} + +asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim) +{ + struct rlimit new_rlim, *old_rlim; + int err; + + if (resource >= RLIM_NLIMITS) + return -EINVAL; + err = verify_area(VERIFY_READ, rlim, sizeof(*rlim)); + if (err) + return err; + memcpy_fromfs(&new_rlim, rlim, sizeof(*rlim)); + old_rlim = current->rlim + resource; + if (((new_rlim.rlim_cur > old_rlim->rlim_max) || + (new_rlim.rlim_max > old_rlim->rlim_max)) && + !suser()) + return -EPERM; + *old_rlim = new_rlim; + return 0; +} + +/* + * It would make sense to put struct rusage in the task_struct, + * except that would make the task_struct be *really big*. After + * task_struct gets moved into malloc'ed memory, it would + * make sense to do this. It will make moving the rest of the information + * a lot simpler! (Which we're not doing right now because we're not + * measuring them yet). + */ +int getrusage(struct task_struct *p, int who, struct rusage *ru) +{ + int error; + struct rusage r; + + error = verify_area(VERIFY_WRITE, ru, sizeof *ru); + if (error) + return error; + memset((char *) &r, 0, sizeof(r)); + switch (who) { + case RUSAGE_SELF: + r.ru_utime.tv_sec = CT_TO_SECS(p->utime); + r.ru_utime.tv_usec = CT_TO_USECS(p->utime); + r.ru_stime.tv_sec = CT_TO_SECS(p->stime); + r.ru_stime.tv_usec = CT_TO_USECS(p->stime); + r.ru_minflt = p->mm->min_flt; + r.ru_majflt = p->mm->maj_flt; + break; + case RUSAGE_CHILDREN: + r.ru_utime.tv_sec = CT_TO_SECS(p->cutime); + r.ru_utime.tv_usec = CT_TO_USECS(p->cutime); + r.ru_stime.tv_sec = CT_TO_SECS(p->cstime); + r.ru_stime.tv_usec = CT_TO_USECS(p->cstime); + r.ru_minflt = p->mm->cmin_flt; + r.ru_majflt = p->mm->cmaj_flt; + break; + default: + r.ru_utime.tv_sec = CT_TO_SECS(p->utime + p->cutime); + r.ru_utime.tv_usec = CT_TO_USECS(p->utime + p->cutime); + r.ru_stime.tv_sec = CT_TO_SECS(p->stime + p->cstime); + r.ru_stime.tv_usec = CT_TO_USECS(p->stime + p->cstime); + r.ru_minflt = p->mm->min_flt + p->mm->cmin_flt; + r.ru_majflt = p->mm->maj_flt + p->mm->cmaj_flt; + break; + } + memcpy_tofs(ru, &r, sizeof(r)); + return 0; +} + +asmlinkage int sys_getrusage(int who, struct rusage *ru) +{ + if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) + return -EINVAL; + return getrusage(current, who, ru); +} + +asmlinkage int sys_umask(int mask) +{ + int old = current->fs->umask; + + current->fs->umask = mask & S_IRWXUGO; + return (old); +} diff --git a/kernel/time.c b/kernel/time.c new file mode 100644 index 000000000..e290a3654 --- /dev/null +++ b/kernel/time.c @@ -0,0 +1,487 @@ +/* + * linux/kernel/time.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * This file contains the interface functions for the various + * time related system calls: time, stime, gettimeofday, settimeofday, + * adjtime + */ +/* + * Modification history kernel/time.c + * + * 02 Sep 93 Philip Gladstone + * Created file with time related functions from sched.c and adjtimex() + * 08 Oct 93 Torsten Duwe + * adjtime interface update and CMOS clock write code + * 02 Jul 94 Alan Modra + * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> + +#include <asm/segment.h> +#include <asm/io.h> + +#include <linux/mc146818rtc.h> +#define RTC_ALWAYS_BCD 1 + +#include <linux/timex.h> + +/* converts date to days since 1/1/1970 + * assumes year,mon,day in normal date format + * ie. 1/1/1970 => year=1970, mon=1, day=1 + * + * For the Julian calendar (which was used in Russia before 1917, + * Britain & colonies before 1752, anywhere else before 1582, + * and is still in use by some communities) leave out the + * -year/100+year/400 terms, and add 10. + * + * This algorithm was first published by Gauss (I think). + */ +static inline unsigned long mktime(unsigned int year, unsigned int mon, + unsigned int day, unsigned int hour, + unsigned int min, unsigned int sec) +{ + if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ + mon += 12; /* Puts Feb last since it has leap day */ + year -= 1; + } + return ((( + (unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day) + + year*365 - 719499 + )*24 + hour /* now have hours */ + )*60 + min /* now have minutes */ + )*60 + sec; /* finally seconds */ +} + +void time_init(void) +{ + unsigned int year, mon, day, hour, min, sec; + int i; + + /* checking for Update-In-Progress could be done more elegantly + * (using the "update finished"-interrupt for example), but that + * would require excessive testing. promise I'll do that when I find + * the time. - Torsten + */ + /* read RTC exactly on falling edge of update flag */ + for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */ + if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) + break; + for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms*/ + if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)) + break; + do { /* Isn't this overkill ? UIP above should guarantee consistency */ + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); + } while (sec != CMOS_READ(RTC_SECONDS)); + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + { + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); + } + if ((year += 1900) < 1970) + year += 100; + xtime.tv_sec = mktime(year, mon, day, hour, min, sec); + xtime.tv_usec = 0; +} +/* + * The timezone where the local system is located. Used as a default by some + * programs who obtain this value by using gettimeofday. + */ +struct timezone sys_tz = { 0, 0}; + +asmlinkage int sys_time(long * tloc) +{ + int i, error; + + i = CURRENT_TIME; + if (tloc) { + error = verify_area(VERIFY_WRITE, tloc, 4); + if (error) + return error; + put_fs_long(i,(unsigned long *)tloc); + } + return i; +} + +asmlinkage int sys_stime(unsigned long * tptr) +{ + int error; + unsigned long value; + + if (!suser()) + return -EPERM; + error = verify_area(VERIFY_READ, tptr, sizeof(*tptr)); + if (error) + return error; + value = get_fs_long(tptr); + cli(); + xtime.tv_sec = value; + xtime.tv_usec = 0; + time_status = TIME_BAD; + time_maxerror = 0x70000000; + time_esterror = 0x70000000; + sti(); + return 0; +} + +/* This function must be called with interrupts disabled + * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs + * + * However, the pc-audio speaker driver changes the divisor so that + * it gets interrupted rather more often - it loads 64 into the + * counter rather than 11932! This has an adverse impact on + * do_gettimeoffset() -- it stops working! What is also not + * good is that the interval that our timer function gets called + * is no longer 10.0002 msecs, but 9.9767 msec. To get around this + * would require using a different timing source. Maybe someone + * could use the RTC - I know that this can interrupt at frequencies + * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix + * it so that at startup, the timer code in sched.c would select + * using either the RTC or the 8253 timer. The decision would be + * based on whether there was any other device around that needed + * to trample on the 8253. I'd set up the RTC to interrupt at 1024Hz, + * and then do some jiggery to have a version of do_timer that + * advanced the clock by 1/1024 sec. Every time that reached over 1/100 + * of a second, then do all the old code. If the time was kept correct + * then do_gettimeoffset could just return 0 - there is no low order + * divider that can be accessed. + * + * Ideally, you would be able to use the RTC for the speaker driver, + * but it appears that the speaker driver really needs interrupt more + * often than every 120us or so. + * + * Anyway, this needs more thought.... pjsg (28 Aug 93) + * + * If you are really that interested, you should be reading + * comp.protocols.time.ntp! + */ + +#define TICK_SIZE tick + +static inline unsigned long do_gettimeoffset(void) +{ + int count; + unsigned long offset = 0; + + /* timer count may underflow right here */ + outb_p(0x00, 0x43); /* latch the count ASAP */ + count = inb_p(0x40); /* read the latched count */ + count |= inb(0x40) << 8; + /* we know probability of underflow is always MUCH less than 1% */ + if (count > (LATCH - LATCH/100)) { + /* check for pending timer interrupt */ + outb_p(0x0a, 0x20); + if (inb(0x20) & 1) + offset = TICK_SIZE; + } + count = ((LATCH-1) - count) * TICK_SIZE; + count = (count + LATCH/2) / LATCH; + return offset + count; +} + +/* + * This version of gettimeofday has near microsecond resolution. + */ +static inline void do_gettimeofday(struct timeval *tv) +{ +#ifdef __i386__ + cli(); + *tv = xtime; + tv->tv_usec += do_gettimeoffset(); + if (tv->tv_usec >= 1000000) { + tv->tv_usec -= 1000000; + tv->tv_sec++; + } + sti(); +#else /* not __i386__ */ + cli(); + *tv = xtime; + sti(); +#endif /* not __i386__ */ +} + +asmlinkage int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + int error; + + if (tv) { + struct timeval ktv; + error = verify_area(VERIFY_WRITE, tv, sizeof *tv); + if (error) + return error; + do_gettimeofday(&ktv); + put_fs_long(ktv.tv_sec, (unsigned long *) &tv->tv_sec); + put_fs_long(ktv.tv_usec, (unsigned long *) &tv->tv_usec); + } + if (tz) { + error = verify_area(VERIFY_WRITE, tz, sizeof *tz); + if (error) + return error; + put_fs_long(sys_tz.tz_minuteswest, (unsigned long *) tz); + put_fs_long(sys_tz.tz_dsttime, ((unsigned long *) tz)+1); + } + return 0; +} + +/* + * Adjust the time obtained from the CMOS to be GMT time instead of + * local time. + * + * This is ugly, but preferable to the alternatives. Otherwise we + * would either need to write a program to do it in /etc/rc (and risk + * confusion if the program gets run more than once; it would also be + * hard to make the program warp the clock precisely n hours) or + * compile in the timezone information into the kernel. Bad, bad.... + * + * XXX Currently does not adjust for daylight savings time. May not + * need to do anything, depending on how smart (dumb?) the BIOS + * is. Blast it all.... the best thing to do not depend on the CMOS + * clock at all, but get the time via NTP or timed if you're on a + * network.... - TYT, 1/1/92 + */ +inline static void warp_clock(void) +{ + cli(); + xtime.tv_sec += sys_tz.tz_minuteswest * 60; + sti(); +} + +/* + * The first time we set the timezone, we will warp the clock so that + * it is ticking GMT time instead of local time. Presumably, + * if someone is setting the timezone then we are running in an + * environment where the programs understand about timezones. + * This should be done at boot time in the /etc/rc script, as + * soon as possible, so that the clock can be set right. Otherwise, + * various programs will get confused when the clock gets warped. + */ +asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz) +{ + static int firsttime = 1; + struct timeval new_tv; + struct timezone new_tz; + + if (!suser()) + return -EPERM; + if (tv) { + int error = verify_area(VERIFY_READ, tv, sizeof(*tv)); + if (error) + return error; + memcpy_fromfs(&new_tv, tv, sizeof(*tv)); + } + if (tz) { + int error = verify_area(VERIFY_READ, tz, sizeof(*tz)); + if (error) + return error; + memcpy_fromfs(&new_tz, tz, sizeof(*tz)); + } + if (tz) { + sys_tz = new_tz; + if (firsttime) { + firsttime = 0; + if (!tv) + warp_clock(); + } + } + if (tv) { + cli(); + /* This is revolting. We need to set the xtime.tv_usec + * correctly. However, the value in this location is + * is value at the last tick. + * Discover what correction gettimeofday + * would have done, and then undo it! + */ + new_tv.tv_usec -= do_gettimeoffset(); + + if (new_tv.tv_usec < 0) { + new_tv.tv_usec += 1000000; + new_tv.tv_sec--; + } + + xtime = new_tv; + time_status = TIME_BAD; + time_maxerror = 0x70000000; + time_esterror = 0x70000000; + sti(); + } + return 0; +} + +/* adjtimex mainly allows reading (and writing, if superuser) of + * kernel time-keeping variables. used by xntpd. + */ +asmlinkage int sys_adjtimex(struct timex *txc_p) +{ + long ltemp, mtemp, save_adjust; + int error; + + /* Local copy of parameter */ + struct timex txc; + + error = verify_area(VERIFY_WRITE, txc_p, sizeof(struct timex)); + if (error) + return error; + + /* Copy the user data space into the kernel copy + * structure. But bear in mind that the structures + * may change + */ + memcpy_fromfs(&txc, txc_p, sizeof(struct timex)); + + /* In order to modify anything, you gotta be super-user! */ + if (txc.mode && !suser()) + return -EPERM; + + /* Now we validate the data before disabling interrupts + */ + + if (txc.mode != ADJ_OFFSET_SINGLESHOT && (txc.mode & ADJ_OFFSET)) + /* Microsec field limited to -131000 .. 131000 usecs */ + if (txc.offset <= -(1 << (31 - SHIFT_UPDATE)) + || txc.offset >= (1 << (31 - SHIFT_UPDATE))) + return -EINVAL; + + /* time_status must be in a fairly small range */ + if (txc.mode & ADJ_STATUS) + if (txc.status < TIME_OK || txc.status > TIME_BAD) + return -EINVAL; + + /* if the quartz is off by more than 10% something is VERY wrong ! */ + if (txc.mode & ADJ_TICK) + if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ) + return -EINVAL; + + cli(); + + /* Save for later - semantics of adjtime is to return old value */ + save_adjust = time_adjust; + + /* If there are input parameters, then process them */ + if (txc.mode) + { + if (time_status == TIME_BAD) + time_status = TIME_OK; + + if (txc.mode & ADJ_STATUS) + time_status = txc.status; + + if (txc.mode & ADJ_FREQUENCY) + time_freq = txc.frequency << (SHIFT_KF - 16); + + if (txc.mode & ADJ_MAXERROR) + time_maxerror = txc.maxerror; + + if (txc.mode & ADJ_ESTERROR) + time_esterror = txc.esterror; + + if (txc.mode & ADJ_TIMECONST) + time_constant = txc.time_constant; + + if (txc.mode & ADJ_OFFSET) + if (txc.mode == ADJ_OFFSET_SINGLESHOT) + { + time_adjust = txc.offset; + } + else /* XXX should give an error if other bits set */ + { + time_offset = txc.offset << SHIFT_UPDATE; + mtemp = xtime.tv_sec - time_reftime; + time_reftime = xtime.tv_sec; + if (mtemp > (MAXSEC+2) || mtemp < 0) + mtemp = 0; + + if (txc.offset < 0) + time_freq -= (-txc.offset * mtemp) >> + (time_constant + time_constant); + else + time_freq += (txc.offset * mtemp) >> + (time_constant + time_constant); + + ltemp = time_tolerance << SHIFT_KF; + + if (time_freq > ltemp) + time_freq = ltemp; + else if (time_freq < -ltemp) + time_freq = -ltemp; + } + if (txc.mode & ADJ_TICK) + tick = txc.tick; + + } + txc.offset = save_adjust; + txc.frequency = ((time_freq+1) >> (SHIFT_KF - 16)); + txc.maxerror = time_maxerror; + txc.esterror = time_esterror; + txc.status = time_status; + txc.time_constant = time_constant; + txc.precision = time_precision; + txc.tolerance = time_tolerance; + txc.time = xtime; + txc.tick = tick; + + sti(); + + memcpy_tofs(txc_p, &txc, sizeof(struct timex)); + return time_status; +} + +int set_rtc_mmss(unsigned long nowtime) +{ + int retval = 0; + int real_seconds, real_minutes, cmos_minutes; + unsigned char save_control, save_freq_select; + + save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */ + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */ + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + cmos_minutes = CMOS_READ(RTC_MINUTES); + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + BCD_TO_BIN(cmos_minutes); + + /* since we're only adjusting minutes and seconds, + * don't interfere with hour overflow. This avoids + * messing with unknown time zones but requires your + * RTC not to be off by more than 15 minutes + */ + real_seconds = nowtime % 60; + real_minutes = nowtime / 60; + if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) + real_minutes += 30; /* correct for half hour time zone */ + real_minutes %= 60; + + if (abs(real_minutes - cmos_minutes) < 30) + { + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + { + BIN_TO_BCD(real_seconds); + BIN_TO_BCD(real_minutes); + } + CMOS_WRITE(real_seconds,RTC_SECONDS); + CMOS_WRITE(real_minutes,RTC_MINUTES); + } + else + retval = -1; + + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + CMOS_WRITE(save_control, RTC_CONTROL); + return retval; +} diff --git a/kernel/tqueue.c b/kernel/tqueue.c new file mode 100644 index 000000000..440709611 --- /dev/null +++ b/kernel/tqueue.c @@ -0,0 +1,10 @@ +/* + * tqueue.c --- task queue handling for Linux. + * + * This routine merely draws in the static portion of the task queue + * inline functions. Look in tqueue.h for the relevant functions. + */ + +#define INCLUDE_INLINE_FUNCS + +#include <linux/tqueue.h> diff --git a/kernel/traps.c b/kernel/traps.c new file mode 100644 index 000000000..150b702b3 --- /dev/null +++ b/kernel/traps.c @@ -0,0 +1,245 @@ +/* + * linux/kernel/traps.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * 'Traps.c' handles hardware traps and faults after we have saved some + * state in 'asm.s'. Currently mostly a debugging-aid, will be extended + * to mainly kill the offending process (probably by giving it a signal, + * but possibly by killing it outright if necessary). + */ +#include <linux/head.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/ptrace.h> + +#include <asm/system.h> +#include <asm/segment.h> +#include <asm/io.h> + +static inline void console_verbose(void) +{ + extern int console_loglevel; + console_loglevel = 15; +} + +#define DO_ERROR(trapnr, signr, str, name, tsk) \ +asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +{ \ + tsk->tss.error_code = error_code; \ + tsk->tss.trap_no = trapnr; \ + if (signr == SIGTRAP && current->flags & PF_PTRACED) \ + current->blocked &= ~(1 << (SIGTRAP-1)); \ + send_sig(signr, tsk, 1); \ + die_if_kernel(str,regs,error_code); \ +} + +#define get_seg_byte(seg,addr) ({ \ +register unsigned char __res; \ +__asm__("push %%fs;mov %%ax,%%fs;movb %%fs:%2,%%al;pop %%fs" \ + :"=a" (__res):"0" (seg),"m" (*(addr))); \ +__res;}) + +#define get_seg_long(seg,addr) ({ \ +register unsigned long __res; \ +__asm__("push %%fs;mov %%ax,%%fs;movl %%fs:%2,%%eax;pop %%fs" \ + :"=a" (__res):"0" (seg),"m" (*(addr))); \ +__res;}) + +#define _fs() ({ \ +register unsigned short __res; \ +__asm__("mov %%fs,%%ax":"=a" (__res):); \ +__res;}) + +void page_exception(void); + +asmlinkage void divide_error(void); +asmlinkage void debug(void); +asmlinkage void nmi(void); +asmlinkage void int3(void); +asmlinkage void overflow(void); +asmlinkage void bounds(void); +asmlinkage void invalid_op(void); +asmlinkage void device_not_available(void); +asmlinkage void double_fault(void); +asmlinkage void coprocessor_segment_overrun(void); +asmlinkage void invalid_TSS(void); +asmlinkage void segment_not_present(void); +asmlinkage void stack_segment(void); +asmlinkage void general_protection(void); +asmlinkage void page_fault(void); +asmlinkage void coprocessor_error(void); +asmlinkage void reserved(void); +asmlinkage void alignment_check(void); + +/*static*/ void die_if_kernel(char * str, struct pt_regs * regs, long err) +{ + int i; + unsigned long esp; + unsigned short ss; + + esp = (unsigned long) ®s->esp; + ss = KERNEL_DS; + if ((regs->eflags & VM_MASK) || (3 & regs->cs) == 3) + return; + if (regs->cs & 3) { + esp = regs->esp; + ss = regs->ss; + } + console_verbose(); + printk("%s: %04lx\n", str, err & 0xffff); + printk("EIP: %04x:%08lx\nEFLAGS: %08lx\n", 0xffff & regs->cs,regs->eip,regs->eflags); + printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", + regs->ds, regs->es, regs->fs, regs->gs, ss); + store_TR(i); + if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page) + printk("Corrupted stack page\n"); + printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\nStack: ", + current->comm, current->pid, 0xffff & i, current->kernel_stack_page); + for(i=0;i<5;i++) + printk("%08lx ", get_seg_long(ss,(i+(unsigned long *)esp))); + printk("\nCode: "); + for(i=0;i<20;i++) + printk("%02x ",0xff & get_seg_byte(regs->cs,(i+(char *)regs->eip))); + printk("\n"); + do_exit(SIGSEGV); +} + +DO_ERROR( 0, SIGFPE, "divide error", divide_error, current) +DO_ERROR( 3, SIGTRAP, "int3", int3, current) +DO_ERROR( 4, SIGSEGV, "overflow", overflow, current) +DO_ERROR( 5, SIGSEGV, "bounds", bounds, current) +DO_ERROR( 6, SIGILL, "invalid operand", invalid_op, current) +DO_ERROR( 7, SIGSEGV, "device not available", device_not_available, current) +DO_ERROR( 8, SIGSEGV, "double fault", double_fault, current) +DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun, last_task_used_math) +DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS, current) +DO_ERROR(11, SIGBUS, "segment not present", segment_not_present, current) +DO_ERROR(12, SIGBUS, "stack segment", stack_segment, current) +DO_ERROR(15, SIGSEGV, "reserved", reserved, current) +DO_ERROR(17, SIGSEGV, "alignment check", alignment_check, current) + +asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +{ + int signr = SIGSEGV; + + if (regs->eflags & VM_MASK) { + handle_vm86_fault((struct vm86_regs *) regs, error_code); + return; + } + die_if_kernel("general protection",regs,error_code); + switch (get_seg_byte(regs->cs, (char *)regs->eip)) { + case 0xCD: /* INT */ + case 0xF4: /* HLT */ + case 0xFA: /* CLI */ + case 0xFB: /* STI */ + signr = SIGILL; + } + current->tss.error_code = error_code; + current->tss.trap_no = 13; + send_sig(signr, current, 1); +} + +asmlinkage void do_nmi(struct pt_regs * regs, long error_code) +{ + printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); + printk("You probably have a hardware problem with your RAM chips\n"); +} + +asmlinkage void do_debug(struct pt_regs * regs, long error_code) +{ + if (regs->eflags & VM_MASK) { + handle_vm86_debug((struct vm86_regs *) regs, error_code); + return; + } + if (current->flags & PF_PTRACED) + current->blocked &= ~(1 << (SIGTRAP-1)); + send_sig(SIGTRAP, current, 1); + current->tss.trap_no = 1; + current->tss.error_code = error_code; + if ((regs->cs & 3) == 0) { + /* If this is a kernel mode trap, then reset db7 and allow us to continue */ + __asm__("movl %0,%%db7" + : /* no output */ + : "r" (0)); + return; + } + die_if_kernel("debug",regs,error_code); +} + +/* + * Allow the process which triggered the interrupt to recover the error + * condition. + * - the status word is saved in the cs selector. + * - the tag word is saved in the operand selector. + * - the status word is then cleared and the tags all set to Empty. + * + * This will give sufficient information for complete recovery provided that + * the affected process knows or can deduce the code and data segments + * which were in force when the exception condition arose. + * + * Note that we play around with the 'TS' bit to hopefully get + * the correct behaviour even in the presence of the asynchronous + * IRQ13 behaviour + */ +void math_error(void) +{ + struct i387_hard_struct * env; + + clts(); + if (!last_task_used_math) { + __asm__("fnclex"); + return; + } + env = &last_task_used_math->tss.i387.hard; + send_sig(SIGFPE, last_task_used_math, 1); + last_task_used_math->tss.trap_no = 16; + last_task_used_math->tss.error_code = 0; + __asm__ __volatile__("fnsave %0":"=m" (*env)); + last_task_used_math = NULL; + stts(); + env->fcs = (env->swd & 0x0000ffff) | (env->fcs & 0xffff0000); + env->fos = env->twd; + env->swd &= 0xffff3800; + env->twd = 0xffffffff; +} + +asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) +{ + ignore_irq13 = 1; + math_error(); +} + +void trap_init(void) +{ + int i; + + set_trap_gate(0,÷_error); + set_trap_gate(1,&debug); + set_trap_gate(2,&nmi); + set_system_gate(3,&int3); /* int3-5 can be called from all */ + set_system_gate(4,&overflow); + set_system_gate(5,&bounds); + set_trap_gate(6,&invalid_op); + set_trap_gate(7,&device_not_available); + set_trap_gate(8,&double_fault); + set_trap_gate(9,&coprocessor_segment_overrun); + set_trap_gate(10,&invalid_TSS); + set_trap_gate(11,&segment_not_present); + set_trap_gate(12,&stack_segment); + set_trap_gate(13,&general_protection); + set_trap_gate(14,&page_fault); + set_trap_gate(15,&reserved); + set_trap_gate(16,&coprocessor_error); + set_trap_gate(17,&alignment_check); + for (i=18;i<48;i++) + set_trap_gate(i,&reserved); +} diff --git a/kernel/vm86.c b/kernel/vm86.c new file mode 100644 index 000000000..144d93a02 --- /dev/null +++ b/kernel/vm86.c @@ -0,0 +1,404 @@ +/* + * linux/kernel/vm86.c + * + * Copyright (C) 1994 Linus Torvalds + */ +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/string.h> +#include <linux/ptrace.h> + +#include <asm/segment.h> +#include <asm/io.h> + +/* + * Known problems: + * + * Interrupt handling is not guaranteed: + * - a real x86 will disable all interrupts for one instruction + * after a "mov ss,xx" to make stack handling atomic even without + * the 'lss' instruction. We can't guarantee this in v86 mode, + * as the next instruction might result in a page fault or similar. + * - a real x86 will have interrupts disabled for one instruction + * past the 'sti' that enables them. We don't bother with all the + * details yet.. + * + * Hopefully these problems do not actually matter for anything. + */ + +/* + * 8- and 16-bit register defines.. + */ +#define AL(regs) (((unsigned char *)&((regs)->eax))[0]) +#define AH(regs) (((unsigned char *)&((regs)->eax))[1]) +#define IP(regs) (*(unsigned short *)&((regs)->eip)) +#define SP(regs) (*(unsigned short *)&((regs)->esp)) + +/* + * virtual flags (16 and 32-bit versions) + */ +#define VFLAGS (*(unsigned short *)&(current->v86flags)) +#define VEFLAGS (current->v86flags) + +#define set_flags(X,new,mask) \ +((X) = ((X) & ~(mask)) | ((new) & (mask))) + +#define SAFE_MASK (0xDD5) +#define RETURN_MASK (0xDFF) + +asmlinkage struct pt_regs * save_v86_state(struct vm86_regs * regs) +{ + unsigned long tmp; + + if (!current->vm86_info) { + printk("no vm86_info: BAD\n"); + do_exit(SIGSEGV); + } + set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->v86mask); + memcpy_tofs(¤t->vm86_info->regs,regs,sizeof(*regs)); + put_fs_long(current->screen_bitmap,¤t->vm86_info->screen_bitmap); + tmp = current->tss.esp0; + current->tss.esp0 = current->saved_kernel_stack; + current->saved_kernel_stack = 0; + return (struct pt_regs *) tmp; +} + +static void mark_screen_rdonly(struct task_struct * tsk) +{ + unsigned long tmp; + unsigned long *pg_table; + + if ((tmp = tsk->tss.cr3) != 0) { + tmp = *(unsigned long *) tmp; + if (tmp & PAGE_PRESENT) { + tmp &= PAGE_MASK; + pg_table = (0xA0000 >> PAGE_SHIFT) + (unsigned long *) tmp; + tmp = 32; + while (tmp--) { + if (PAGE_PRESENT & *pg_table) + *pg_table &= ~PAGE_RW; + pg_table++; + } + } + } +} + +asmlinkage int sys_vm86(struct vm86_struct * v86) +{ + struct vm86_struct info; + struct pt_regs * pt_regs = (struct pt_regs *) &v86; + int error; + + if (current->saved_kernel_stack) + return -EPERM; + /* v86 must be readable (now) and writable (for save_v86_state) */ + error = verify_area(VERIFY_WRITE,v86,sizeof(*v86)); + if (error) + return error; + memcpy_fromfs(&info,v86,sizeof(info)); +/* + * make sure the vm86() system call doesn't try to do anything silly + */ + info.regs.__null_ds = 0; + info.regs.__null_es = 0; + info.regs.__null_fs = 0; + info.regs.__null_gs = 0; +/* + * The eflags register is also special: we cannot trust that the user + * has set it up safely, so this makes sure interrupt etc flags are + * inherited from protected mode. + */ + VEFLAGS = info.regs.eflags; + info.regs.eflags &= SAFE_MASK; + info.regs.eflags |= pt_regs->eflags & ~SAFE_MASK; + info.regs.eflags |= VM_MASK; + + switch (info.cpu_type) { + case CPU_286: + current->v86mask = 0; + break; + case CPU_386: + current->v86mask = NT_MASK | IOPL_MASK; + break; + case CPU_486: + current->v86mask = AC_MASK | NT_MASK | IOPL_MASK; + break; + default: + current->v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK; + break; + } + +/* + * Save old state, set default return value (%eax) to 0 + */ + pt_regs->eax = 0; + current->saved_kernel_stack = current->tss.esp0; + current->tss.esp0 = (unsigned long) pt_regs; + current->vm86_info = v86; + + current->screen_bitmap = info.screen_bitmap; + if (info.flags & VM86_SCREEN_BITMAP) + mark_screen_rdonly(current); + __asm__ __volatile__("movl %0,%%esp\n\t" + "jmp ret_from_sys_call" + : /* no outputs */ + :"r" (&info.regs)); + return 0; +} + +static inline void return_to_32bit(struct vm86_regs * regs16, int retval) +{ + struct pt_regs * regs32; + + regs32 = save_v86_state(regs16); + regs32->eax = retval; + __asm__ __volatile__("movl %0,%%esp\n\t" + "jmp ret_from_sys_call" + : : "r" (regs32)); +} + +static inline void set_IF(struct vm86_regs * regs) +{ + VEFLAGS |= VIF_MASK; + if (VEFLAGS & VIP_MASK) + return_to_32bit(regs, VM86_STI); +} + +static inline void clear_IF(struct vm86_regs * regs) +{ + VEFLAGS &= ~VIF_MASK; +} + +static inline void clear_TF(struct vm86_regs * regs) +{ + regs->eflags &= ~TF_MASK; +} + +static inline void set_vflags_long(unsigned long eflags, struct vm86_regs * regs) +{ + set_flags(VEFLAGS, eflags, current->v86mask); + set_flags(regs->eflags, eflags, SAFE_MASK); + if (eflags & IF_MASK) + set_IF(regs); +} + +static inline void set_vflags_short(unsigned short flags, struct vm86_regs * regs) +{ + set_flags(VFLAGS, flags, current->v86mask); + set_flags(regs->eflags, flags, SAFE_MASK); + if (flags & IF_MASK) + set_IF(regs); +} + +static inline unsigned long get_vflags(struct vm86_regs * regs) +{ + unsigned long flags = regs->eflags & RETURN_MASK; + + if (VEFLAGS & VIF_MASK) + flags |= IF_MASK; + return flags | (VEFLAGS & current->v86mask); +} + +static inline int is_revectored(int nr, struct revectored_struct * bitmap) +{ + __asm__ __volatile__("btl %2,%%fs:%1\n\tsbbl %0,%0" + :"=r" (nr) + :"m" (*bitmap),"r" (nr)); + return nr; +} + +/* + * Boy are these ugly, but we need to do the correct 16-bit arithmetic. + * Gcc makes a mess of it, so we do it inline and use non-obvious calling + * conventions.. + */ +#define pushb(base, ptr, val) \ +__asm__ __volatile__( \ + "decw %w0\n\t" \ + "movb %2,%%fs:0(%1,%0)" \ + : "=r" (ptr) \ + : "r" (base), "q" (val), "0" (ptr)) + +#define pushw(base, ptr, val) \ +__asm__ __volatile__( \ + "decw %w0\n\t" \ + "movb %h2,%%fs:0(%1,%0)\n\t" \ + "decw %w0\n\t" \ + "movb %b2,%%fs:0(%1,%0)" \ + : "=r" (ptr) \ + : "r" (base), "q" (val), "0" (ptr)) + +#define pushl(base, ptr, val) \ +__asm__ __volatile__( \ + "decw %w0\n\t" \ + "rorl $16,%2\n\t" \ + "movb %h2,%%fs:0(%1,%0)\n\t" \ + "decw %w0\n\t" \ + "movb %b2,%%fs:0(%1,%0)\n\t" \ + "decw %w0\n\t" \ + "rorl $16,%2\n\t" \ + "movb %h2,%%fs:0(%1,%0)\n\t" \ + "decw %w0\n\t" \ + "movb %b2,%%fs:0(%1,%0)" \ + : "=r" (ptr) \ + : "r" (base), "q" (val), "0" (ptr)) + +#define popb(base, ptr) \ +({ unsigned long __res; \ +__asm__ __volatile__( \ + "movb %%fs:0(%1,%0),%b2\n\t" \ + "incw %w0" \ + : "=r" (ptr), "=r" (base), "=q" (__res) \ + : "0" (ptr), "1" (base), "2" (0)); \ +__res; }) + +#define popw(base, ptr) \ +({ unsigned long __res; \ +__asm__ __volatile__( \ + "movb %%fs:0(%1,%0),%b2\n\t" \ + "incw %w0\n\t" \ + "movb %%fs:0(%1,%0),%h2\n\t" \ + "incw %w0" \ + : "=r" (ptr), "=r" (base), "=q" (__res) \ + : "0" (ptr), "1" (base), "2" (0)); \ +__res; }) + +#define popl(base, ptr) \ +({ unsigned long __res; \ +__asm__ __volatile__( \ + "movb %%fs:0(%1,%0),%b2\n\t" \ + "incw %w0\n\t" \ + "movb %%fs:0(%1,%0),%h2\n\t" \ + "incw %w0\n\t" \ + "rorl $16,%2\n\t" \ + "movb %%fs:0(%1,%0),%b2\n\t" \ + "incw %w0\n\t" \ + "movb %%fs:0(%1,%0),%h2\n\t" \ + "incw %w0\n\t" \ + "rorl $16,%2" \ + : "=r" (ptr), "=r" (base), "=q" (__res) \ + : "0" (ptr), "1" (base)); \ +__res; }) + +static void do_int(struct vm86_regs *regs, int i, unsigned char * ssp, unsigned long sp) +{ + unsigned short seg = get_fs_word((void *) ((i<<2)+2)); + + if (seg == BIOSSEG || regs->cs == BIOSSEG || + is_revectored(i, ¤t->vm86_info->int_revectored)) + return_to_32bit(regs, VM86_INTx + (i << 8)); + if (i==0x21 && is_revectored(AH(regs),¤t->vm86_info->int21_revectored)) + return_to_32bit(regs, VM86_INTx + (i << 8)); + pushw(ssp, sp, get_vflags(regs)); + pushw(ssp, sp, regs->cs); + pushw(ssp, sp, IP(regs)); + regs->cs = seg; + SP(regs) -= 6; + IP(regs) = get_fs_word((void *) (i<<2)); + clear_TF(regs); + clear_IF(regs); + return; +} + +void handle_vm86_debug(struct vm86_regs * regs, long error_code) +{ +#if 0 + do_int(regs, 1, (unsigned char *) (regs->ss << 4), SP(regs)); +#else + if (current->flags & PF_PTRACED) + current->blocked &= ~(1 << (SIGTRAP-1)); + send_sig(SIGTRAP, current, 1); + current->tss.trap_no = 1; + current->tss.error_code = error_code; +#endif +} + +void handle_vm86_fault(struct vm86_regs * regs, long error_code) +{ + unsigned char *csp, *ssp; + unsigned long ip, sp; + + csp = (unsigned char *) (regs->cs << 4); + ssp = (unsigned char *) (regs->ss << 4); + sp = SP(regs); + ip = IP(regs); + + switch (popb(csp, ip)) { + + /* operand size override */ + case 0x66: + switch (popb(csp, ip)) { + + /* pushfd */ + case 0x9c: + SP(regs) -= 4; + IP(regs) += 2; + pushl(ssp, sp, get_vflags(regs)); + return; + + /* popfd */ + case 0x9d: + SP(regs) += 4; + IP(regs) += 2; + set_vflags_long(popl(ssp, sp), regs); + return; + } + + /* pushf */ + case 0x9c: + SP(regs) -= 2; + IP(regs)++; + pushw(ssp, sp, get_vflags(regs)); + return; + + /* popf */ + case 0x9d: + SP(regs) += 2; + IP(regs)++; + set_vflags_short(popw(ssp, sp), regs); + return; + + /* int 3 */ + case 0xcc: + IP(regs)++; + do_int(regs, 3, ssp, sp); + return; + + /* int xx */ + case 0xcd: + IP(regs) += 2; + do_int(regs, popb(csp, ip), ssp, sp); + return; + + /* iret */ + case 0xcf: + SP(regs) += 6; + IP(regs) = popw(ssp, sp); + regs->cs = popw(ssp, sp); + set_vflags_short(popw(ssp, sp), regs); + return; + + /* cli */ + case 0xfa: + IP(regs)++; + clear_IF(regs); + return; + + /* sti */ + /* + * Damn. This is incorrect: the 'sti' instruction should actually + * enable interrupts after the /next/ instruction. Not good. + * + * Probably needs some horsing around with the TF flag. Aiee.. + */ + case 0xfb: + IP(regs)++; + set_IF(regs); + return; + + default: + return_to_32bit(regs, VM86_UNKNOWN); + } +} diff --git a/kernel/vsprintf.c b/kernel/vsprintf.c new file mode 100644 index 000000000..b85f78420 --- /dev/null +++ b/kernel/vsprintf.c @@ -0,0 +1,309 @@ +/* + * linux/kernel/vsprintf.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */ +/* + * Wirzenius wrote this portably, Torvalds fucked it up :-) + */ + +#include <stdarg.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/ctype.h> + +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +{ + unsigned long result = 0,value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && (value = isdigit(*cp) ? *cp-'0' : (islower(*cp) + ? toupper(*cp) : *cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +/* we use this so that we can do without the ctype library */ +#define is_digit(c) ((c) >= '0' && (c) <= '9') + +static int skip_atoi(const char **s) +{ + int i=0; + + while (is_digit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +#define do_div(n,base) ({ \ +int __res; \ +__res = ((unsigned long) n) % (unsigned) base; \ +n = ((unsigned long) n) / (unsigned) base; \ +__res; }) + +static char * number(char * str, long num, int base, int size, int precision + ,int type) +{ + char c,sign,tmp[36]; + const char *digits="0123456789abcdefghijklmnopqrstuvwxyz"; + int i; + + if (type & LARGE) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) + tmp[i++] = digits[do_div(num,base)]; + if (i > precision) + precision = i; + size -= precision; + if (!(type&(ZEROPAD+LEFT))) + while(size-->0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) + if (base==8) + *str++ = '0'; + else if (base==16) { + *str++ = '0'; + *str++ = digits[33]; + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long num; + int i, base; + char * str; + char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + + for (str=buf ; *fmt ; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= LEFT; goto repeat; + case '+': flags |= PLUS; goto repeat; + case ' ': flags |= SPACE; goto repeat; + case '#': flags |= SPECIAL; goto repeat; + case '0': flags |= ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (is_digit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (is_digit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char) va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = "<NULL>"; + len = strlen(s); + if (precision < 0) + precision = len; + else if (len > precision) + len = precision; + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long) va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + + case 'n': + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + if (*fmt != '%') + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'l') + num = va_arg(args, unsigned long); + else if (qualifier == 'h') + if (flags & SIGN) + num = va_arg(args, short); + else + num = va_arg(args, unsigned short); + else if (flags & SIGN) + num = va_arg(args, int); + else + num = va_arg(args, unsigned int); + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str-buf; +} + +int sprintf(char * buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i=vsprintf(buf,fmt,args); + va_end(args); + return i; +} + |