Merge with 2.3.19.

author: Ralf Baechle <ralf@linux-mips.org> 1999-10-09 00:00:47 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 1999-10-09 00:00:47 +0000
commit: d6434e1042f3b0a6dfe1b1f615af369486f9b1fa (patch)
tree: e2be02f33984c48ec019c654051d27964e42c441 /kernel
parent: 609d1e803baf519487233b765eb487f9ec227a18 (diff)
19 files changed, 1032 insertions, 832 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index c6142afc7..7e64105a8 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -146,7 +146,7 @@ static int check_free_space(struct file *file)
  *  should be written. If the filename is NULL, accounting will be
  *  shutdown.
  */
-asmlinkage int sys_acct(const char *name)
+asmlinkage long sys_acct(const char *name)
 {
 	struct file *file = NULL, *old_acct = NULL;
 	char *tmp;
@@ -354,7 +354,7 @@ int acct_process(long exitcode)
  * into the kernel.
  */
 
-asmlinkage int sys_acct(const char * filename)
+asmlinkage long sys_acct(const char * filename)
 {
 	return -ENOSYS;
 }
diff --git a/kernel/capability.c b/kernel/capability.c
index a4a1a3d03..2dbfe83f7 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -17,7 +17,9 @@ spinlock_t task_capability_lock;
  * uninteresting and/or not to be changed.
  */
 
-asmlinkage int sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
+kernel_cap_t cap_bset = CAP_FULL_SET;
+
+asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
 {
      int error, pid;
      __u32 version;
@@ -124,7 +126,7 @@ static void cap_set_all(kernel_cap_t *effective,
  * E: must be set to a subset of (new target) Permitted
  */
 
-asmlinkage int sys_capset(cap_user_header_t header, const cap_user_data_t data)
+asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 {
      kernel_cap_t inheritable, permitted, effective;
      __u32 version;
diff --git a/kernel/dma.c b/kernel/dma.c
index 4ae38f4e5..e9f0f7a52 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -12,9 +12,9 @@
 
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/spinlock.h>
 #include <asm/dma.h>
 #include <asm/system.h>
-#include <asm/spinlock.h>
 
 
 /* A note on resource allocation:
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 3c5881ee2..4aa968ee1 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -2,7 +2,7 @@
 #include <linux/smp_lock.h>
 #include <linux/module.h>
 
-static asmlinkage void no_lcall7(struct pt_regs * regs);
+static asmlinkage void no_lcall7(int segment, struct pt_regs * regs);
 
 
 static unsigned long ident_map[32] = {
@@ -25,9 +25,8 @@ struct exec_domain default_exec_domain = {
 static struct exec_domain *exec_domains = &default_exec_domain;
 
 
-static asmlinkage void no_lcall7(struct pt_regs * regs)
+static asmlinkage void no_lcall7(int segment, struct pt_regs * regs)
 {
-
   /*
    * This may have been a static linked SVr4 binary, so we would have the
    * personality set incorrectly.  Check to see whether SVr4 is available,
@@ -44,7 +43,7 @@ static asmlinkage void no_lcall7(struct pt_regs * regs)
 
 	if (current->exec_domain && current->exec_domain->handler
 	&& current->exec_domain->handler != no_lcall7) {
-		current->exec_domain->handler(regs);
+		current->exec_domain->handler(segment, regs);
 		return;
 	}
 
@@ -98,7 +97,7 @@ int unregister_exec_domain(struct exec_domain *it)
 	return -EINVAL;
 }
 
-asmlinkage int sys_personality(unsigned long personality)
+asmlinkage long sys_personality(unsigned long personality)
 {
 	struct exec_domain *it;
 	unsigned long old_personality;
diff --git a/kernel/exit.c b/kernel/exit.c
index a3d8a7547..39103a683 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -26,30 +26,20 @@ void release(struct task_struct * p)
 {
 	if (p != current) {
 #ifdef __SMP__
+		int has_cpu;
+
 		/*
-		 * Wait to make sure the process isn't active on any
-		 * other CPU
+		 * Wait to make sure the process isn't on the
+		 * runqueue (active on some other CPU still)
 		 */
-		for (;;)  {
-			int has_cpu;
+		do {
 			spin_lock_irq(&runqueue_lock);
 			has_cpu = p->has_cpu;
 			spin_unlock_irq(&runqueue_lock);
-			if (!has_cpu)
-				break;
-			do {
-				barrier();
-			} while (p->has_cpu);
-		}
+		} while (has_cpu);
 #endif
 		free_uid(p);
-		nr_tasks--;
-		add_free_taskslot(p->tarray_ptr);
-
-		write_lock_irq(&tasklist_lock);
-		unhash_pid(p);
-		REMOVE_LINKS(p);
-		write_unlock_irq(&tasklist_lock);
+		unhash_process(p);
 
 		release_thread(p);
 		current->cmin_flt += p->min_flt + p->cmin_flt;
@@ -159,11 +149,11 @@ static inline void close_files(struct files_struct * files)
 
 	j = 0;
 	for (;;) {
-		unsigned long set = files->open_fds.fds_bits[j];
+		unsigned long set;
 		i = j * __NFDBITS;
-		j++;
-		if (i >= files->max_fds)
+		if (i >= files->max_fdset || i >= files->max_fds)
 			break;
+		set = files->open_fds->fds_bits[j++];
 		while (set) {
 			if (set & 1) {
 				struct file * file = xchg(&files->fd[i], NULL);
@@ -186,12 +176,14 @@ static inline void __exit_files(struct task_struct *tsk)
 		if (atomic_dec_and_test(&files->count)) {
 			close_files(files);
 			/*
-			 * Free the fd array as appropriate ...
+			 * Free the fd and fdset arrays if we expanded them.
 			 */
-			if (NR_OPEN * sizeof(struct file *) == PAGE_SIZE)
-				free_page((unsigned long) files->fd);
-			else
-				kfree(files->fd);
+			if (files->fd != &files->fd_array[0])
+				free_fd_array(files->fd, files->max_fds);
+			if (files->max_fdset > __FD_SETSIZE) {
+				free_fdset(files->open_fds, files->max_fdset);
+				free_fdset(files->close_on_exec, files->max_fdset);
+			}
 			kmem_cache_free(files_cachep, files);
 		}
 	}
@@ -243,19 +235,44 @@ void exit_sighand(struct task_struct *tsk)
 	__exit_sighand(tsk);
 }
 
+/*
+ * We can use these to temporarily drop into
+ * "lazy TLB" mode and back.
+ */
+struct mm_struct * start_lazy_tlb(void)
+{
+	struct mm_struct *mm = current->mm;
+	current->mm = NULL;
+	/* active_mm is still 'mm' */
+	atomic_inc(&mm->mm_count);
+	return mm;
+}
+
+void end_lazy_tlb(struct mm_struct *mm)
+{
+	struct mm_struct *active_mm = current->active_mm;
+
+	current->mm = mm;
+	if (mm != active_mm) {
+		current->active_mm = mm;
+		activate_mm(active_mm, mm);
+	}
+	mmdrop(active_mm);
+}
+
+/*
+ * Turn us into a lazy TLB process if we
+ * aren't already..
+ */
 static inline void __exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct * mm = tsk->mm;
 
-	/* Set us up to use the kernel mm state */
-	if (mm != &init_mm) {
-		flush_cache_mm(mm);
-		flush_tlb_mm(mm);
-		destroy_context(mm);
-		tsk->mm = &init_mm;
-		tsk->swappable = 0;
-		SET_PAGE_DIR(tsk, swapper_pg_dir);
+	if (mm) {
+		atomic_inc(&mm->mm_count);
 		mm_release();
+		if (mm != tsk->active_mm) BUG();
+		tsk->mm = NULL;
 		mmput(mm);
 	}
 }
@@ -395,12 +412,12 @@ fake_volatile:
 	goto fake_volatile;
 }
 
-asmlinkage int sys_exit(int error_code)
+asmlinkage long sys_exit(int error_code)
 {
 	do_exit((error_code&0xff)<<8);
 }
 
-asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
+asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
 {
 	int flag, retval;
 	DECLARE_WAITQUEUE(wait, current);
@@ -488,13 +505,13 @@ end_wait4:
 	return retval;
 }
 
-#ifndef __alpha__
+#if !defined(__alpha__) && !defined(__ia64__)
 
 /*
  * sys_waitpid() remains for compatibility. waitpid() should be
  * implemented by calling sys_wait4() from libc.a.
  */
-asmlinkage int sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
+asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
 {
 	return sys_wait4(pid, stat_addr, options, NULL);
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 12c580852..6520e1843 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -22,11 +22,12 @@
 #include <asm/mmu_context.h>
 #include <asm/uaccess.h>
 
-/* The idle tasks do not count.. */
-int nr_tasks=0;
+/* The idle threads do not count.. */
+int nr_threads=0;
 int nr_running=0;
 
-unsigned long int total_forks=0;	/* Handle normal Linux uptimes. */
+int max_threads;
+unsigned long total_forks = 0;	/* Handle normal Linux uptimes. */
 int last_pid=0;
 
 /* SLAB cache for mm_struct's. */
@@ -37,9 +38,6 @@ kmem_cache_t *files_cachep;
 
 struct task_struct *pidhash[PIDHASH_SZ];
 
-struct task_struct **tarray_freelist = NULL;
-spinlock_t taskslot_lock = SPIN_LOCK_UNLOCKED;
-
 /* UID task count cache, to prevent walking entire process list every
  * single fork() operation.
  */
@@ -159,7 +157,7 @@ int alloc_uid(struct task_struct *p)
 	return 0;
 }
 
-void __init uidcache_init(void)
+void __init fork_init(unsigned long memsize)
 {
 	int i;
 
@@ -171,15 +169,16 @@ void __init uidcache_init(void)
 
 	for(i = 0; i < UIDHASH_SZ; i++)
 		uidhash[i] = 0;
-}
 
-static inline struct task_struct ** find_empty_process(void)
-{
-	struct task_struct **tslot = NULL;
+	/*
+	 * The default maximum number of threads is set to a safe
+	 * value: the thread structures can take up at most half
+	 * of memory.
+	 */
+	max_threads = memsize / THREAD_SIZE / 2;
 
-	if ((nr_tasks < NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT) || !current->uid)
-		tslot = get_free_taskslot();
-	return tslot;
+	init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
+	init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
 }
 
 /* Protects next_safe and last_pid. */
@@ -233,6 +232,9 @@ static inline int dup_mmap(struct mm_struct * mm)
 	struct vm_area_struct * mpnt, *tmp, **pprev;
 	int retval;
 
+	/* Kill me slowly. UGLY! FIXME! */
+	memcpy(&mm->start_code, &current->mm->start_code, 15*sizeof(unsigned long));
+
 	flush_cache_mm(current->mm);
 	pprev = &mm->mmap;
 	for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
@@ -290,9 +292,6 @@ fail_nomem:
 
 /*
  * Allocate and initialize an mm_struct.
- *
- * NOTE! The mm mutex will be locked until the
- * caller decides that all systems are go..
  */
 struct mm_struct * mm_alloc(void)
 {
@@ -300,25 +299,41 @@ struct mm_struct * mm_alloc(void)
 
 	mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
 	if (mm) {
-		*mm = *current->mm;
-		init_new_context(mm);
-		atomic_set(&mm->count, 1);
-		mm->map_count = 0;
-		mm->def_flags = 0;
-		init_MUTEX_LOCKED(&mm->mmap_sem);
+		memset(mm, 0, sizeof(*mm));
+		atomic_set(&mm->mm_users, 1);
+		atomic_set(&mm->mm_count, 1);
+		init_MUTEX(&mm->mmap_sem);
 		mm->page_table_lock = SPIN_LOCK_UNLOCKED;
-		/*
-		 * Leave mm->pgd set to the parent's pgd
-		 * so that pgd_offset() is always valid.
-		 */
-		mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
+		mm->pgd = pgd_alloc();
+		if (mm->pgd)
+			return mm;
+		kmem_cache_free(mm_cachep, mm);
+	}
+	return NULL;
+}
 
-		/* It has not run yet, so cannot be present in anyone's
-		 * cache or tlb.
-		 */
-		mm->cpu_vm_mask = 0;
+/*
+ * Called when the last reference to the mm
+ * is dropped: either by a lazy thread or by
+ * mmput. Free the page directory and the mm.
+ */
+inline void __mmdrop(struct mm_struct *mm)
+{
+	if (mm == &init_mm) BUG();
+	pgd_free(mm->pgd);
+	destroy_context(mm);
+	kmem_cache_free(mm_cachep, mm);
+}
+
+/*
+ * Decrement the use count and release all resources for an mm.
+ */
+void mmput(struct mm_struct *mm)
+{
+	if (atomic_dec_and_test(&mm->mm_users)) {
+		exit_mmap(mm);
+		mmdrop(mm);
 	}
-	return mm;
 }
 
 /* Please note the differences between mmput and mm_release.
@@ -345,20 +360,7 @@ void mm_release(void)
 	}
 }
 
-/*
- * Decrement the use count and release all resources for an mm.
- */
-void mmput(struct mm_struct *mm)
-{
-	if (atomic_dec_and_test(&mm->count)) {
-		release_segments(mm);
-		exit_mmap(mm);
-		free_page_tables(mm);
-		kmem_cache_free(mm_cachep, mm);
-	}
-}
-
-static inline int copy_mm(int nr, unsigned long clone_flags, struct task_struct * tsk)
+static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct mm_struct * mm;
 	int retval;
@@ -367,14 +369,21 @@ static inline int copy_mm(int nr, unsigned long clone_flags, struct task_struct
 	tsk->cmin_flt = tsk->cmaj_flt = 0;
 	tsk->nswap = tsk->cnswap = 0;
 
-	if (clone_flags & CLONE_VM) {
-		mmget(current->mm);
-		/*
-		 * Set up the LDT descriptor for the clone task.
-		 */
-		copy_segments(nr, tsk, NULL);
-		SET_PAGE_DIR(tsk, current->mm->pgd);
+	tsk->mm = NULL;
+	tsk->active_mm = NULL;
+
+	/*
+	 * Are we cloning a kernel thread?
+	 *
+	 * We need to steal a active VM for that..
+	 */
+	mm = current->mm;
+	if (!mm)
 		return 0;
+
+	if (clone_flags & CLONE_VM) {
+		atomic_inc(&mm->mm_users);
+		goto good_mm;
 	}
 
 	retval = -ENOMEM;
@@ -383,23 +392,26 @@ static inline int copy_mm(int nr, unsigned long clone_flags, struct task_struct
 		goto fail_nomem;
 
 	tsk->mm = mm;
-	copy_segments(nr, tsk, mm);
-	retval = new_page_tables(tsk);
-	if (retval)
-		goto free_mm;
+	tsk->active_mm = mm;
+
+	/*
+	 * child gets a private LDT (if there was an LDT in the parent)
+	 */
+	copy_segments(tsk, mm);
+
+	down(&current->mm->mmap_sem);
 	retval = dup_mmap(mm);
+	up(&current->mm->mmap_sem);
 	if (retval)
 		goto free_pt;
-	up(&mm->mmap_sem);
+
+good_mm:
+	tsk->mm = mm;
+	tsk->active_mm = mm;
+	init_new_context(tsk,mm);
 	return 0;
 
-free_mm:
-	tsk->mm = NULL;
-	release_segments(mm);
-	kmem_cache_free(mm_cachep, mm);
-	return retval;
 free_pt:
-	tsk->mm = NULL;
 	mmput(mm);
 fail_nomem:
 	return retval;
@@ -421,32 +433,24 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
 	return 0;
 }
 
-/*
- * Copy a fd_set and compute the maximum fd it contains. 
- */
-static inline int __copy_fdset(unsigned long *d, unsigned long *src)
+static int count_open_files(struct files_struct *files, int size)
 {
-	int i; 
-	unsigned long *p = src; 
-	unsigned long *max = src; 
-
-	for (i = __FDSET_LONGS; i; --i) {
-		if ((*d++ = *p++) != 0) 
-			max = p; 
+	int i;
+	
+	/* Find the last open fd */
+	for (i = size/(8*sizeof(long)); i > 0; ) {
+		if (files->open_fds->fds_bits[--i])
+			break;
 	}
-	return (max - src)*sizeof(long)*8; 
-}
-
-static inline int copy_fdset(fd_set *dst, fd_set *src)
-{
-	return __copy_fdset(dst->fds_bits, src->fds_bits);  
+	i = (i+1) * 8 * sizeof(long);
+	return i;
 }
 
 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct files_struct *oldf, *newf;
 	struct file **old_fds, **new_fds;
-	int size, i, error = 0;
+	int open_files, nfds, size, i, error = 0;
 
 	/*
 	 * A background process may not have any files ...
@@ -466,43 +470,85 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
 	if (!newf) 
 		goto out;
 
-	/*
-	 * Allocate the fd array, using get_free_page() if possible.
-	 * Eventually we want to make the array size variable ...
-	 */
-	size = NR_OPEN * sizeof(struct file *);
-	if (size == PAGE_SIZE)
-		new_fds = (struct file **) __get_free_page(GFP_KERNEL);
-	else
-		new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
-	if (!new_fds)
-		goto out_release;
-
-	newf->file_lock = RW_LOCK_UNLOCKED;
 	atomic_set(&newf->count, 1);
-	newf->max_fds = NR_OPEN;
-	newf->fd = new_fds;
+
+	newf->file_lock	    = RW_LOCK_UNLOCKED;
+	newf->next_fd	    = 0;
+	newf->max_fds	    = NR_OPEN_DEFAULT;
+	newf->max_fdset	    = __FD_SETSIZE;
+	newf->close_on_exec = &newf->close_on_exec_init;
+	newf->open_fds	    = &newf->open_fds_init;
+	newf->fd	    = &newf->fd_array[0];
+
+	/* We don't yet have the oldf readlock, but even if the old
+           fdset gets grown now, we'll only copy up to "size" fds */
+	size = oldf->max_fdset;
+	if (size > __FD_SETSIZE) {
+		newf->max_fdset = 0;
+		write_lock(&newf->file_lock);
+		error = expand_fdset(newf, size);
+		write_unlock(&newf->file_lock);
+		if (error)
+			goto out_release;
+	}
 	read_lock(&oldf->file_lock);
-	newf->close_on_exec = oldf->close_on_exec;
-	i = copy_fdset(&newf->open_fds, &oldf->open_fds);
+
+	open_files = count_open_files(oldf, size);
+
+	/*
+	 * Check whether we need to allocate a larger fd array.
+	 * Note: we're not a clone task, so the open count won't
+	 * change.
+	 */
+	nfds = NR_OPEN_DEFAULT;
+	if (open_files > nfds) {
+		read_unlock(&oldf->file_lock);
+		newf->max_fds = 0;
+		write_lock(&newf->file_lock);
+		error = expand_fd_array(newf, open_files);
+		write_unlock(&newf->file_lock);
+		if (error) 
+			goto out_release;
+		nfds = newf->max_fds;
+		read_lock(&oldf->file_lock);
+	}
 
 	old_fds = oldf->fd;
-	for (; i != 0; i--) {
+	new_fds = newf->fd;
+
+	memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
+	memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
+
+	for (i = open_files; i != 0; i--) {
 		struct file *f = *old_fds++;
 		if (f)
 			get_file(f);
 		*new_fds++ = f;
 	}
 	read_unlock(&oldf->file_lock);
+
+	/* compute the remainder to be cleared */
+	size = (newf->max_fds - open_files) * sizeof(struct file *);
+
 	/* This is long word aligned thus could use a optimized version */ 
-	memset(new_fds, 0, (char *)newf->fd + size - (char *)new_fds); 
-      
+	memset(new_fds, 0, size); 
+
+	if (newf->max_fdset > open_files) {
+		int left = (newf->max_fdset-open_files)/8;
+		int start = open_files / (8 * sizeof(unsigned long));
+		
+		memset(&newf->open_fds->fds_bits[start], 0, left);
+		memset(&newf->close_on_exec->fds_bits[start], 0, left);
+	}
+
 	tsk->files = newf;
 	error = 0;
 out:
 	return error;
 
 out_release:
+	free_fdset (newf->close_on_exec, newf->max_fdset);
+	free_fdset (newf->open_fds, newf->max_fdset);
 	kmem_cache_free(files_cachep, newf);
 	goto out;
 }
@@ -542,7 +588,6 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
  */
 int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 {
-	int nr;
 	int retval = -ENOMEM;
 	struct task_struct *p;
 	DECLARE_MUTEX_LOCKED(sem);
@@ -555,7 +600,6 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 
 	*p = *current;
 
-	down(&current->mm->mmap_sem);
 	lock_kernel();
 
 	retval = -EAGAIN;
@@ -565,15 +609,12 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 		atomic_inc(&p->user->count);
 	}
 
-	{
-		struct task_struct **tslot;
-		tslot = find_empty_process();
-		if (!tslot)
-			goto bad_fork_cleanup_count;
-		p->tarray_ptr = tslot;
-		*tslot = p;
-		nr = tslot - &task[0];
-	}
+	/*
+	 * Counter atomicity is protected by
+	 * the kernel lock
+	 */
+	if (nr_threads >= max_threads)
+		goto bad_fork_cleanup_count;
 
 	if (p->exec_domain && p->exec_domain->module)
 		__MOD_INC_USE_COUNT(p->exec_domain->module);
@@ -594,10 +635,11 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 	 * very end).
 	 */
 	p->state = TASK_RUNNING;
-	p->next_run = p;
-	p->prev_run = p;
+	p->run_list.next = NULL;
+	p->run_list.prev = NULL;
 
-	p->p_pptr = p->p_opptr = current;
+	if ((clone_flags & CLONE_VFORK) || !(clone_flags & CLONE_PARENT))
+		p->p_pptr = p->p_opptr = current;
 	p->p_cptr = NULL;
 	init_waitqueue_head(&p->wait_chldexit);
 	p->vfork_sem = NULL;
@@ -638,9 +680,9 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 		goto bad_fork_cleanup_files;
 	if (copy_sighand(clone_flags, p))
 		goto bad_fork_cleanup_fs;
-	if (copy_mm(nr, clone_flags, p))
+	if (copy_mm(clone_flags, p))
 		goto bad_fork_cleanup_sighand;
-	retval = copy_thread(nr, clone_flags, usp, p, regs);
+	retval = copy_thread(0, clone_flags, usp, p, regs);
 	if (retval)
 		goto bad_fork_cleanup_sighand;
 	p->semundo = NULL;
@@ -666,22 +708,17 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 	 * Let it rip!
 	 */
 	retval = p->pid;
-	if (retval) {
-		write_lock_irq(&tasklist_lock);
-		SET_LINKS(p);
-		hash_pid(p);
-		write_unlock_irq(&tasklist_lock);
-
-		nr_tasks++;
+	write_lock_irq(&tasklist_lock);
+	SET_LINKS(p);
+	hash_pid(p);
+	write_unlock_irq(&tasklist_lock);
 
-		p->next_run = NULL;
-		p->prev_run = NULL;
-		wake_up_process(p);		/* do this last */
-	}
+	nr_threads++;
+	wake_up_process(p);		/* do this last */
 	++total_forks;
+
 bad_fork:
 	unlock_kernel();
-	up(&current->mm->mmap_sem);
 fork_out:
 	if ((clone_flags & CLONE_VFORK) && (retval > 0)) 
 		down(&sem);
@@ -699,7 +736,7 @@ bad_fork_cleanup:
 	if (p->binfmt && p->binfmt->module)
 		__MOD_DEC_USE_COUNT(p->binfmt->module);
 
-	add_free_taskslot(p->tarray_ptr);
+	nr_threads--;
 bad_fork_cleanup_count:
 	if (p->user)
 		free_uid(p);
diff --git a/kernel/info.c b/kernel/info.c
index 1dffddc7b..3ee347444 100644
--- a/kernel/info.c
+++ b/kernel/info.c
@@ -13,7 +13,7 @@
 
 #include <asm/uaccess.h>
 
-asmlinkage int sys_sysinfo(struct sysinfo *info)
+asmlinkage long sys_sysinfo(struct sysinfo *info)
 {
 	struct sysinfo val;
 
@@ -26,7 +26,7 @@ asmlinkage int sys_sysinfo(struct sysinfo *info)
 	val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
 	val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
 
-	val.procs = nr_tasks-1;
+	val.procs = nr_threads-1;
 	sti();
 
 	si_meminfo(&val);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 1b4661c39..7d38ac1ac 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -75,7 +75,7 @@ int do_getitimer(int which, struct itimerval *value)
 }
 
 /* SMP: Only we modify our itimer values. */
-asmlinkage int sys_getitimer(int which, struct itimerval *value)
+asmlinkage long sys_getitimer(int which, struct itimerval *value)
 {
 	int error = -EFAULT;
 	struct itimerval get_buffer;
@@ -149,15 +149,13 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 /* SMP: Again, only we play with our itimers, and signals are SMP safe
  *      now so that is not an issue at all anymore.
  */
-asmlinkage int sys_setitimer(int which, struct itimerval *value,
-			     struct itimerval *ovalue)
+asmlinkage long sys_setitimer(int which, struct itimerval *value,
+			      struct itimerval *ovalue)
 {
 	struct itimerval set_buffer, get_buffer;
 	int error;
 
 	if (value) {
-		if(verify_area(VERIFY_READ, value, sizeof(*value)))
-			return -EFAULT;
 		if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
 			return -EFAULT;
 	} else
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index 798015eaa..71761a3b1 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -39,6 +39,7 @@
 #include <linux/console.h>
 #include <linux/poll.h>
 #include <linux/mm.h>
+#include <linux/capability.h>
 
 #if defined(CONFIG_PROC_FS)
 #include <linux/proc_fs.h>
@@ -47,7 +48,6 @@
 #include <linux/kmod.h>
 #endif
 
-extern char *get_options(char *str, int *ints);
 extern void set_device_ro(kdev_t dev,int flag);
 extern struct file_operations * get_blkfops(unsigned int);
 extern int blkdev_release(struct inode * inode);
@@ -77,6 +77,7 @@ EXPORT_SYMBOL(request_module);
 #ifdef CONFIG_MODULES
 EXPORT_SYMBOL(get_module_symbol);
 #endif
+EXPORT_SYMBOL(get_option);
 EXPORT_SYMBOL(get_options);
 
 /* process memory management */
@@ -94,6 +95,7 @@ EXPORT_SYMBOL(free_pages);
 EXPORT_SYMBOL(__free_page);
 EXPORT_SYMBOL(kmem_find_general_cachep);
 EXPORT_SYMBOL(kmem_cache_create);
+EXPORT_SYMBOL(kmem_cache_destroy);
 EXPORT_SYMBOL(kmem_cache_shrink);
 EXPORT_SYMBOL(kmem_cache_alloc);
 EXPORT_SYMBOL(kmem_cache_free);
@@ -109,6 +111,7 @@ EXPORT_SYMBOL(high_memory);
 EXPORT_SYMBOL(vmtruncate);
 EXPORT_SYMBOL(find_vma);
 EXPORT_SYMBOL(get_unmapped_area);
+EXPORT_SYMBOL(init_mm);
 
 /* filesystem internal functions */
 EXPORT_SYMBOL(in_group_p);
@@ -116,7 +119,6 @@ EXPORT_SYMBOL(update_atime);
 EXPORT_SYMBOL(get_super);
 EXPORT_SYMBOL(get_fs_type);
 EXPORT_SYMBOL(getname);
-EXPORT_SYMBOL(__fput);	/* goner? */
 EXPORT_SYMBOL(_fput);
 EXPORT_SYMBOL(igrab);
 EXPORT_SYMBOL(iunique);
@@ -168,10 +170,13 @@ EXPORT_SYMBOL(add_blkdev_randomness);
 EXPORT_SYMBOL(block_read_full_page);
 EXPORT_SYMBOL(block_write_full_page);
 EXPORT_SYMBOL(block_write_partial_page);
+EXPORT_SYMBOL(block_write_cont_page);
 EXPORT_SYMBOL(block_flushpage);
 EXPORT_SYMBOL(generic_file_read);
+EXPORT_SYMBOL(do_generic_file_read);
 EXPORT_SYMBOL(generic_file_write);
 EXPORT_SYMBOL(generic_file_mmap);
+EXPORT_SYMBOL(generic_buffer_fdatasync);
 EXPORT_SYMBOL(page_hash_bits);
 EXPORT_SYMBOL(page_hash_table);
 EXPORT_SYMBOL(file_lock_table);
@@ -193,7 +198,10 @@ EXPORT_SYMBOL(vfs_unlink);
 EXPORT_SYMBOL(vfs_rename);
 EXPORT_SYMBOL(__pollwait);
 EXPORT_SYMBOL(ROOT_DEV);
-
+EXPORT_SYMBOL(add_to_page_cache_unique);
+EXPORT_SYMBOL(__find_get_page);
+EXPORT_SYMBOL(__find_lock_page);
+                        
 #if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
 EXPORT_SYMBOL(do_nfsservctl);
 #endif
@@ -270,6 +278,8 @@ EXPORT_SYMBOL(proc_dostring);
 EXPORT_SYMBOL(proc_dointvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
+EXPORT_SYMBOL(proc_doulongvec_minmax);
 
 /* interrupt handling */
 EXPORT_SYMBOL(request_irq);
@@ -308,11 +318,14 @@ EXPORT_SYMBOL(enable_hlt);
 #endif
 
 /* resource handling */
-EXPORT_SYMBOL(check_resource);
 EXPORT_SYMBOL(request_resource);
 EXPORT_SYMBOL(release_resource);
-EXPORT_SYMBOL(occupy_resource);
-EXPORT_SYMBOL(vacate_resource);
+EXPORT_SYMBOL(allocate_resource);
+EXPORT_SYMBOL(__request_region);
+EXPORT_SYMBOL(__check_region);
+EXPORT_SYMBOL(__release_region);
+EXPORT_SYMBOL(ioport_resource);
+EXPORT_SYMBOL(iomem_resource);
 
 /* process management */
 EXPORT_SYMBOL(__wake_up);
@@ -350,6 +363,7 @@ EXPORT_SYMBOL(_ctype);
 EXPORT_SYMBOL(secure_tcp_sequence_number);
 EXPORT_SYMBOL(get_random_bytes);
 EXPORT_SYMBOL(securebits);
+EXPORT_SYMBOL(cap_bset);
 
 /* Program loader interfaces */
 EXPORT_SYMBOL(setup_arg_pages);
diff --git a/kernel/module.c b/kernel/module.c
index c5591db7f..6f4ad977d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -57,7 +57,7 @@ static void free_module(struct module *, int tag_freed);
  * Called at boot time
  */
 
-__initfunc(void init_modules(void))
+void __init init_modules(void)
 {
 	kernel_module.nsyms = __stop___ksymtab - __start___ksymtab;
 
@@ -76,10 +76,6 @@ get_mod_name(const char *user_name, char **buf)
 	unsigned long page;
 	long retval;
 
-	if ((unsigned long)user_name >= TASK_SIZE
-	    && !segment_eq(get_fs (), KERNEL_DS))
-		return -EFAULT;
-
 	page = __get_free_page(GFP_KERNEL);
 	if (!page)
 		return -ENOMEM;
@@ -161,7 +157,7 @@ err0:
  * Initialize a module.
  */
 
-asmlinkage int
+asmlinkage long
 sys_init_module(const char *name_user, struct module *mod_user)
 {
 	struct module mod_tmp, *mod;
@@ -353,7 +349,7 @@ err0:
 	return error;
 }
 
-asmlinkage int
+asmlinkage long
 sys_delete_module(const char *name_user)
 {
 	struct module *mod, *next;
@@ -628,7 +624,7 @@ qm_info(struct module *mod, char *buf, size_t bufsize, size_t *ret)
 	return error;
 }
 
-asmlinkage int
+asmlinkage long
 sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
 		 size_t *ret)
 {
@@ -693,7 +689,7 @@ out:
  * which does not arbitrarily limit the length of symbols.
  */
 
-asmlinkage int
+asmlinkage long
 sys_get_kernel_syms(struct kernel_sym *table)
 {
 	struct module *mod;
@@ -981,19 +977,19 @@ sys_create_module(const char *name_user, size_t size)
 	return -ENOSYS;
 }
 
-asmlinkage int
+asmlinkage long
 sys_init_module(const char *name_user, struct module *mod_user)
 {
 	return -ENOSYS;
 }
 
-asmlinkage int
+asmlinkage long
 sys_delete_module(const char *name_user)
 {
 	return -ENOSYS;
 }
 
-asmlinkage int
+asmlinkage long
 sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
 		 size_t *ret)
 {
@@ -1005,7 +1001,7 @@ sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
 	return -ENOSYS;
 }
 
-asmlinkage int
+asmlinkage long
 sys_get_kernel_syms(struct kernel_sym *table)
 {
 	return -ENOSYS;
diff --git a/kernel/panic.c b/kernel/panic.c
index 51ee692b5..48168d864 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -28,12 +28,14 @@ int panic_timeout = 0;
 
 struct notifier_block *panic_notifier_list = NULL;
 
-void __init panic_setup(char *str, int *ints)
+static int __init panic_setup(char *str)
 {
-	if (ints[0] == 1)
-		panic_timeout = ints[1];
+	panic_timeout = simple_strtoul(str, NULL, 0);
+	return 1;
 }
 
+__setup("panic=", panic_setup);
+
 NORET_TYPE void panic(const char * fmt, ...)
 {
 	static char buf[1024];
@@ -43,7 +45,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 	vsprintf(buf, fmt, args);
 	va_end(args);
 	printk(KERN_EMERG "Kernel panic: %s\n",buf);
-	if (current == task[0])
+	if (current == init_tasks[0])
 		printk(KERN_EMERG "In swapper task - not syncing\n");
 	else if (in_interrupt())
 		printk(KERN_EMERG "In interrupt handler - not syncing\n");
diff --git a/kernel/printk.c b/kernel/printk.c
index c2cbd4a78..330ce3efe 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -10,6 +10,8 @@
  * elsewhere, in preparation for a serial line console (someday).
  * Ted Ts'o, 2/11/93.
  * Modified for sysctl support, 1/8/97, Chris Horn.
+ * Fixed SMP synchronization, 08/08/99, Manfred Spraul 
+ *     manfreds@colorfullife.com
  */
 
 #include <linux/mm.h>
@@ -21,6 +23,7 @@
 #include <asm/uaccess.h>
 
 #define LOG_BUF_LEN	(16384)
+#define LOG_BUF_MASK	(LOG_BUF_LEN-1)
 
 static char buf[1024];
 
@@ -40,6 +43,8 @@ int default_message_loglevel = DEFAULT_MESSAGE_LOGLEVEL;
 int minimum_console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
 int default_console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
 
+spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+
 struct console *console_drivers = NULL;
 static char log_buf[LOG_BUF_LEN];
 static unsigned long log_start = 0;
@@ -50,7 +55,7 @@ static int preferred_console = -1;
 /*
  *	Setup a list of consoles. Called from init/main.c
  */
-void __init console_setup(char *str, int *ints)
+static int __init console_setup(char *str)
 {
 	struct console_cmdline *c;
 	char name[sizeof(c->name)];
@@ -88,17 +93,19 @@ void __init console_setup(char *str, int *ints)
 		if (strcmp(console_cmdline[i].name, name) == 0 &&
 			  console_cmdline[i].index == idx) {
 				preferred_console = i;
-				return;
+				return 1;
 		}
 	if (i == MAX_CMDLINECONSOLES)
-		return;
+		return 1;
 	preferred_console = i;
 	c = &console_cmdline[i];
 	memcpy(c->name, name, sizeof(c->name));
 	c->options = options;
 	c->index = idx;
+	return 1;
 }
 
+__setup("console=", console_setup);
 
 /*
  * Commands to do_syslog:
@@ -115,12 +122,11 @@ void __init console_setup(char *str, int *ints)
  */
 int do_syslog(int type, char * buf, int len)
 {
-	unsigned long i, j, count, flags;
+	unsigned long i, j, limit, count;
 	int do_clear = 0;
 	char c;
 	int error = -EPERM;
 
-	lock_kernel();
 	error = 0;
 	switch (type) {
 	case 0:		/* Close log */
@@ -141,18 +147,18 @@ int do_syslog(int type, char * buf, int len)
 		if (error)
 			goto out;
 		i = 0;
+		spin_lock_irq(&console_lock);
 		while (log_size && i < len) {
-			c = *((char *) log_buf+log_start);
+			c = log_buf[log_start & LOG_BUF_MASK];
 			log_start++;
 			log_size--;
-			log_start &= LOG_BUF_LEN-1;
-			sti();
+			spin_unlock_irq(&console_lock);
 			__put_user(c,buf);
 			buf++;
 			i++;
-			cli();
+			spin_lock_irq(&console_lock);
 		}
-		sti();
+		spin_unlock_irq(&console_lock);
 		error = i;
 		break;
 	case 4:		/* Read/clear last kernel messages */
@@ -168,35 +174,56 @@ int do_syslog(int type, char * buf, int len)
 		error = verify_area(VERIFY_WRITE,buf,len);
 		if (error)
 			goto out;
-		/*
-		 * The logged_chars, log_start, and log_size values may
-		 * change from an interrupt, so we disable interrupts.
-		 */
-		__save_flags(flags);
-		__cli();
 		count = len;
 		if (count > LOG_BUF_LEN)
 			count = LOG_BUF_LEN;
+		spin_lock_irq(&console_lock);
 		if (count > logged_chars)
 			count = logged_chars;
-		j = log_start + log_size - count;
-		__restore_flags(flags);
-		for (i = 0; i < count; i++) {
-			c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1)));
-			__put_user(c, buf++);
-		}
 		if (do_clear)
 			logged_chars = 0;
+		limit = log_start + log_size;
+		/*
+		 * __put_user() could sleep, and while we sleep
+		 * printk() could overwrite the messages 
+		 * we try to copy to user space. Therefore
+		 * the messages are copied in reverse. <manfreds>
+		 */
+		for(i=0;i < count;i++) {
+			j = limit-1-i;
+			if (j+LOG_BUF_LEN < log_start+log_size)
+				break;
+			c = log_buf[ j  & LOG_BUF_MASK ];
+			spin_unlock_irq(&console_lock);
+			__put_user(c,&buf[count-1-i]);
+			spin_lock_irq(&console_lock);
+		}
+		spin_unlock_irq(&console_lock);
 		error = i;
+		if(i != count) {
+			int offset = count-error;
+			/* buffer overflow during copy, correct user buffer. */
+			for(i=0;i<error;i++) {
+				__get_user(c,&buf[i+offset]);
+				__put_user(c,&buf[i]);
+			}
+		}
+
 		break;
 	case 5:		/* Clear ring buffer */
+		spin_lock_irq(&console_lock);
 		logged_chars = 0;
+		spin_unlock_irq(&console_lock);
 		break;
 	case 6:		/* Disable logging to console */
+		spin_lock_irq(&console_lock);
 		console_loglevel = minimum_console_loglevel;
+		spin_unlock_irq(&console_lock);
 		break;
 	case 7:		/* Enable logging to console */
+		spin_lock_irq(&console_lock);
 		console_loglevel = default_console_loglevel;
+		spin_unlock_irq(&console_lock);
 		break;
 	case 8:
 		error = -EINVAL;
@@ -204,7 +231,9 @@ int do_syslog(int type, char * buf, int len)
 			goto out;
 		if (len < minimum_console_loglevel)
 			len = minimum_console_loglevel;
+		spin_lock_irq(&console_lock);
 		console_loglevel = len;
+		spin_unlock_irq(&console_lock);
 		error = 0;
 		break;
 	default:
@@ -212,20 +241,16 @@ int do_syslog(int type, char * buf, int len)
 		break;
 	}
 out:
-	unlock_kernel();
 	return error;
 }
 
-asmlinkage int sys_syslog(int type, char * buf, int len)
+asmlinkage long sys_syslog(int type, char * buf, int len)
 {
 	if ((type != 3) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	return do_syslog(type, buf, len);
 }
 
-
-spinlock_t console_lock;
-
 asmlinkage int printk(const char *fmt, ...)
 {
 	va_list args;
@@ -259,13 +284,12 @@ asmlinkage int printk(const char *fmt, ...)
 		}
 		line_feed = 0;
 		for (; p < buf_end; p++) {
-			log_buf[(log_start+log_size) & (LOG_BUF_LEN-1)] = *p;
+			log_buf[(log_start+log_size) & LOG_BUF_MASK] = *p;
 			if (log_size < LOG_BUF_LEN)
 				log_size++;
-			else {
+			else
 				log_start++;
-				log_start &= LOG_BUF_LEN-1;
-			}
+
 			logged_chars++;
 			if (*p == '\n') {
 				line_feed = 1;
@@ -290,24 +314,33 @@ asmlinkage int printk(const char *fmt, ...)
 
 void console_print(const char *s)
 {
-	struct console *c = console_drivers;
+	struct console *c;
+	unsigned long flags;
 	int len = strlen(s);
 
+	spin_lock_irqsave(&console_lock,flags);
+	c = console_drivers;
 	while(c) {
 		if ((c->flags & CON_ENABLED) && c->write)
 			c->write(c, s, len);
 		c = c->next;
 	}
+	spin_unlock_irqrestore(&console_lock,flags);
 }
 
 void unblank_console(void)
 {
-	struct console *c = console_drivers;
+	struct console *c;
+	unsigned long flags;
+	
+	spin_lock_irqsave(&console_lock,flags);
+	c = console_drivers;
 	while(c) {
 		if ((c->flags & CON_ENABLED) && c->unblank)
 			c->unblank();
 		c = c->next;
 	}
+	spin_unlock_irqrestore(&console_lock,flags);
 }
 
 /*
@@ -318,11 +351,12 @@ void unblank_console(void)
  */
 void register_console(struct console * console)
 {
-	int	i,j,len;
-	int	p = log_start;
+	int     i, j,len;
+	int	p;
 	char	buf[16];
 	signed char msg_level = -1;
 	char	*q;
+	unsigned long flags;
 
 	/*
 	 *	See if we want to use this console driver. If we
@@ -368,6 +402,7 @@ void register_console(struct console * console)
 	 *	Put this console in the list - keep the
 	 *	preferred driver at the head of the list.
 	 */
+	spin_lock_irqsave(&console_lock,flags);
 	if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
 		console->next = console_drivers;
 		console_drivers = console;
@@ -375,23 +410,33 @@ void register_console(struct console * console)
 		console->next = console_drivers->next;
 		console_drivers->next = console;
 	}
-	if ((console->flags & CON_PRINTBUFFER) == 0) return;
-
+	if ((console->flags & CON_PRINTBUFFER) == 0)
+		goto done;
 	/*
 	 *	Print out buffered log messages.
 	 */
+	p = log_start & LOG_BUF_MASK;
+
 	for (i=0,j=0; i < log_size; i++) {
 		buf[j++] = log_buf[p];
-		p++; p &= LOG_BUF_LEN-1;
+		p = (p+1) & LOG_BUF_MASK;
 		if (buf[j-1] != '\n' && i < log_size - 1 && j < sizeof(buf)-1)
 			continue;
 		buf[j] = 0;
 		q = buf;
 		len = j;
 		if (msg_level < 0) {
-			msg_level = buf[1] - '0';
-			q = buf + 3;
-			len -= 3;
+			if(buf[0] == '<' &&
+				buf[1] >= '0' &&
+				buf[1] <= '7' &&
+				buf[2] == '>') {
+				msg_level = buf[1] - '0';
+				q = buf + 3;
+				len -= 3;
+			} else
+			{
+				msg_level = default_message_loglevel; 
+			}
 		}
 		if (msg_level < console_loglevel)
 			console->write(console, q, len);
@@ -399,26 +444,35 @@ void register_console(struct console * console)
 			msg_level = -1;
 		j = 0;
 	}
+done:
+	spin_unlock_irqrestore(&console_lock,flags);
 }
 
 
 int unregister_console(struct console * console)
 {
         struct console *a,*b;
-	
+	unsigned long flags;
+	int res = 1;
+
+	spin_lock_irqsave(&console_lock,flags);
 	if (console_drivers == console) {
 		console_drivers=console->next;
-		return (0);
-	}
-	for (a=console_drivers->next, b=console_drivers ;
-	     a; b=a, a=b->next) {
-		if (a == console) {
-			b->next = a->next;
-			return 0;
-		}  
+		res = 0;
+	} else
+	{
+		for (a=console_drivers->next, b=console_drivers ;
+		     a; b=a, a=b->next) {
+			if (a == console) {
+				b->next = a->next;
+				res = 0;
+				break;
+			}  
+		}
 	}
 	
-	return (1);
+	spin_unlock_irqrestore(&console_lock,flags);
+	return res;
 }
 	
 /*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0d007d492..35fa9768d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -10,6 +10,7 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
+#include <linux/bigmem.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -39,6 +40,8 @@ repeat:
 	if (!pte_present(*pgtable))
 		goto fault_in_page;
 	page = pte_page(*pgtable);
+	if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable)))
+		goto fault_in_page;
 	if (MAP_NR(page) >= max_mapnr)
 		return 0;
 	flush_cache_page(vma, addr);
@@ -50,7 +53,11 @@ repeat:
 			dst = src;
 			src = buf;
 		}
+		src = (void *) kmap((unsigned long) src, KM_READ);
+		dst = (void *) kmap((unsigned long) dst, KM_WRITE);
 		memcpy(dst, src, len);
+		kunmap((unsigned long) src, KM_READ);
+		kunmap((unsigned long) dst, KM_WRITE);
 	}
 	flush_page_to_ram(page);
 	return len;
diff --git a/kernel/resource.c b/kernel/resource.c
index 4c672c6b0..26ee5e29d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1,232 +1,293 @@
 /*
  *	linux/kernel/resource.c
  *
- * Copyright (C) 1995, 1999	Linus Torvalds
- *				David Hinds
+ * Copyright (C) 1999	Linus Torvalds
+ * Copyright (C) 1999	Martin Mares <mj@ucw.cz>
  *
- * Kernel resource management
- *
- * We now distinguish between claiming space for devices (using the
- * 'occupy' and 'vacate' calls), and associating a resource with a
- * device driver (with the 'request', 'release', and 'check' calls).
- * A resource can be claimed even if there is no associated driver
- * (by occupying with name=NULL).  Vacating a resource makes it
- * available for other dynamically configured devices.
+ * Arbitrary resource management.
  */
 
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
+#include <linux/malloc.h>
+#include <linux/spinlock.h>
 
-#define RSRC_TABLE_SIZE 128
-
-struct resource_entry {
-	u_long from, num;
-	const char *name;
-	struct resource_entry *next;
-};
+struct resource ioport_resource = { "PCI IO", 0x0000, 0xFFFF, IORESOURCE_IO };
+struct resource iomem_resource = { "PCI mem", 0x00000000, 0xFFFFFFFF, IORESOURCE_MEM };
 
-struct resource_entry res_list[] = {
-    { 0, 0, NULL, NULL }, /* IO */
-    { 0, 0, NULL, NULL }  /* mem */
-};
-
-static struct resource_entry rsrc_table[RSRC_TABLE_SIZE];
+static rwlock_t resource_lock = RW_LOCK_UNLOCKED;
 
 /*
  * This generates reports for /proc/ioports and /proc/memory
  */
-int get_resource_list(int class, char *buf)
+static char * do_resource_list(struct resource *entry, const char *fmt, int offset, char *buf, char *end)
 {
-	struct resource_entry *root = &res_list[class];
-	struct resource_entry *p;
-	int len = 0;
-	char *fmt = (class == RES_IO) ?
-		"%04lx-%04lx : %s\n" : "%08lx-%08lx : %s\n";
-	
-	for (p = root->next; (p) && (len < 4000); p = p->next)
-		len += sprintf(buf+len, fmt, p->from, p->from+p->num-1,
-			       (p->name ? p->name : "occupied"));
-	if (p)
-		len += sprintf(buf+len, "4K limit reached!\n");
-	return len;
+	if (offset < 0)
+		offset = 0;
+
+	while (entry) {
+		const char *name = entry->name;
+		unsigned long from, to;
+
+		if ((int) (end-buf) < 80)
+			return buf;
+
+		from = entry->start;
+		to = entry->end;
+		if (!name)
+			name = "<BAD>";
+
+		buf += sprintf(buf, fmt + offset, from, to, name);
+		if (entry->child)
+			buf = do_resource_list(entry->child, fmt, offset-2, buf, end);
+		entry = entry->sibling;
+	}
+
+	return buf;
 }
 
-/*
- * Basics: find a matching resource entry, or find an insertion point
- */
-static struct resource_entry *
-find_match(struct resource_entry *root, u_long from, u_long num)
+int get_resource_list(struct resource *root, char *buf, int size)
 {
-	struct resource_entry *p;
-	for (p = root; p; p = p->next)
-		if ((p->from == from) && (p->num == num))
-			return p;
-	return NULL;
-}
+	char *fmt;
+	int retval;
 
-static struct resource_entry *
-find_gap(struct resource_entry *root, u_long from, u_long num)
+	fmt = "        %08lx-%08lx : %s\n";
+	if (root == &ioport_resource)
+		fmt = "        %04lx-%04lx : %s\n";
+	read_lock(&resource_lock);
+	retval = do_resource_list(root->child, fmt, 8, buf, buf + size) - buf;
+	read_unlock(&resource_lock);
+	return retval;
+}	
+
+/* Return the conflict entry if you can't request it */
+static struct resource * __request_resource(struct resource *root, struct resource *new)
 {
-	struct resource_entry *p;
-	if (from > from+num-1)
-		return NULL;
-	for (p = root; ; p = p->next) {
-		if ((p != root) && (p->from+p->num-1 >= from)) {
-			p = NULL;
-			break;
+	unsigned long start = new->start;
+	unsigned long end = new->end;
+	struct resource *tmp, **p;
+
+	if (end < start)
+		return root;
+	if (start < root->start)
+		return root;
+	if (end > root->end)
+		return root;
+	p = &root->child;
+	for (;;) {
+		tmp = *p;
+		if (!tmp || tmp->start > end) {
+			new->sibling = tmp;
+			*p = new;
+			new->parent = root;
+			return NULL;
 		}
-		if ((p->next == NULL) || (p->next->from > from+num-1))
-			break;
+		p = &tmp->sibling;
+		if (tmp->end < start)
+			continue;
+		return tmp;
 	}
-	return p;
 }
 
-/*
- * Call this from a driver to assert ownership of a resource
- */
-void request_resource(int class, unsigned long from,
-		      unsigned long num, const char *name)
+int request_resource(struct resource *root, struct resource *new)
 {
-	struct resource_entry *root = &res_list[class];
-	struct resource_entry *p;
-	long flags;
-	int i;
-
-	p = find_match(root, from, num);
-	if (p) {
-		p->name = name;
-		return;
-	}
+	struct resource *conflict;
+
+	write_lock(&resource_lock);
+	conflict = __request_resource(root, new);
+	write_unlock(&resource_lock);
+	return conflict ? -EBUSY : 0;
+}
+
+int release_resource(struct resource *old)
+{
+	struct resource *tmp, **p;
 
-	save_flags(flags);
-	cli();
-	for (i = 0; i < RSRC_TABLE_SIZE; i++)
-		if (rsrc_table[i].num == 0)
+	p = &old->parent->child;
+	for (;;) {
+		tmp = *p;
+		if (!tmp)
 			break;
-	if (i == RSRC_TABLE_SIZE)
-		printk("warning: resource table is full\n");
-	else {
-		p = find_gap(root, from, num);
-		if (p == NULL) {
-			restore_flags(flags);
-			return;
+		if (tmp == old) {
+			*p = tmp->sibling;
+			old->parent = NULL;
+			return 0;
 		}
-		rsrc_table[i].name = name;
-		rsrc_table[i].from = from;
-		rsrc_table[i].num = num;
-		rsrc_table[i].next = p->next;
-		p->next = &rsrc_table[i];
+		p = &tmp->sibling;
 	}
-	restore_flags(flags);
+	return -EINVAL;
 }
 
-/* 
- * Call these when a driver is unloaded but the device remains
+/*
+ * Find empty slot in the resource tree given range and alignment.
  */
-void release_resource(int class, unsigned long from, unsigned long num)
+static int find_resource(struct resource *root, struct resource *new,
+			 unsigned long size,
+			 unsigned long min, unsigned long max,
+			 unsigned long align)
 {
-	struct resource_entry *root = &res_list[class];
-	struct resource_entry *p;
-	p = find_match(root, from, num);
-	if (p) p->name = NULL;
+	struct resource *this = root->child;
+	unsigned long start, end;
+
+	start = root->start;
+	for(;;) {
+		if (this)
+			end = this->start;
+		else
+			end = root->end;
+		if (start < min)
+			start = min;
+		if (end > max)
+			end = max;
+		start = (start + align - 1) & ~(align - 1);
+		if (start < end && end - start + 1 >= size) {
+			new->start = start;
+			new->end = start + size - 1;
+			return 0;
+		}
+		if (!this)
+			break;
+		start = this->end + 1;
+		this = this->sibling;
+	}
+	return -EBUSY;
 }
 
 /*
- * Call these to check a region for conflicts before probing
+ * Allocate empty slot in the resource tree given range and alignment.
  */
-int check_resource(int class, unsigned long from, unsigned long num)
+int allocate_resource(struct resource *root, struct resource *new,
+		      unsigned long size,
+		      unsigned long min, unsigned long max,
+		      unsigned long align)
 {
-	struct resource_entry *root = &res_list[class];
-	struct resource_entry *p;
-	p = find_match(root, from, num);
-	if (p != NULL)
-		return (p->name != NULL) ? -EBUSY : 0;
-	return (find_gap(root, from, num) == NULL) ? -EBUSY : 0;
+	int err;
+
+	write_lock(&resource_lock);
+	err = find_resource(root, new, size, min, max, align);
+	if (err >= 0 && __request_resource(root, new))
+		err = -EBUSY;
+	write_unlock(&resource_lock);
+	return err;
 }
 
 /*
- * Call this to claim a resource for a piece of hardware
+ * This is compatibility stuff for IO resources.
+ *
+ * Note how this, unlike the above, knows about
+ * the IO flag meanings (busy etc).
+ *
+ * Request-region creates a new busy region.
+ *
+ * Check-region returns non-zero if the area is already busy
+ *
+ * Release-region releases a matching busy region.
  */
-unsigned long occupy_resource(int class, unsigned long base,
-			      unsigned long end, unsigned long num,
-			      unsigned long align, const char *name)
+struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
 {
-	struct resource_entry *root = &res_list[class];
-	unsigned long from = 0, till;
-	unsigned long flags;
-	int i;
-	struct resource_entry *p, *q;
+	struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
 
-	if ((base > end-1) || (num > end - base))
-		return 0;
+	if (res) {
+		memset(res, 0, sizeof(*res));
+		res->name = name;
+		res->start = start;
+		res->end = start + n - 1;
+		res->flags = IORESOURCE_BUSY;
 
-	for (i = 0; i < RSRC_TABLE_SIZE; i++)
-		if (rsrc_table[i].num == 0)
-			break;
-	if (i == RSRC_TABLE_SIZE)
-		return 0;
-
-	save_flags(flags);
-	cli();
-	/* printk("occupy: search in %08lx[%08lx] ", base, end - base); */
-	for (p = root; p != NULL; p = q) {
-		q = p->next;
-		/* Find window in list */
-		from = (p->from+p->num + align-1) & ~(align-1);
-		till = (q == NULL) ? (0 - align) : q->from;
-		/* printk(" %08lx:%08lx", from, till); */
-		/* Clip window with base and end */
-		if (from < base) from = base;
-		if (till > end) till = end;
-		/* See if result is large enougth */
-		if ((from < till) && (from + num < till))
+		write_lock(&resource_lock);
+
+		for (;;) {
+			struct resource *conflict;
+
+			conflict = __request_resource(parent, res);
+			if (!conflict)
+				break;
+			if (conflict != parent) {
+				parent = conflict;
+				if (!(conflict->flags & IORESOURCE_BUSY))
+					continue;
+			}
+
+			/* Uhhuh, that didn't work out.. */
+			kfree(res);
+			res = NULL;
 			break;
+		}
+		write_unlock(&resource_lock);
 	}
-	/* printk("\r\n"); */
-	restore_flags(flags);
-
-	if (p == NULL)
-		return 0;
-
-	rsrc_table[i].name = name;
-	rsrc_table[i].from = from;
-	rsrc_table[i].num = num;
-	rsrc_table[i].next = p->next;
-	p->next = &rsrc_table[i];
-	return from;
+	return res;
 }
 
-/*
- * Call this when a resource becomes available for other hardware
- */
-void vacate_resource(int class, unsigned long from, unsigned long num)
+int __check_region(struct resource *parent, unsigned long start, unsigned long n)
 {
-	struct resource_entry *root = &res_list[class];
-	struct resource_entry *p, *q;
-	long flags;
-
-	save_flags(flags);
-	cli();
-	for (p = root; ; p = q) {
-		q = p->next;
-		if (q == NULL)
-			break;
-		if ((q->from == from) && (q->num == num)) {
-			q->num = 0;
-			p->next = q->next;
+	struct resource * res;
+
+	res = __request_region(parent, start, n, "check-region");
+	if (!res)
+		return -EBUSY;
+
+	release_resource(res);
+	kfree(res);
+	return 0;
+}
+
+void __release_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+	struct resource **p;
+	unsigned long end;
+
+	p = &parent->child;
+	end = start + n - 1;
+
+	for (;;) {
+		struct resource *res = *p;
+
+		if (!res)
 			break;
+		if (res->start <= start && res->end >= end) {
+			if (!(res->flags & IORESOURCE_BUSY)) {
+				p = &res->child;
+				continue;
+			}
+			if (res->start != start || res->end != end)
+				break;
+			*p = res->sibling;
+			kfree(res);
+			return;
 		}
+		p = &res->sibling;
 	}
-	restore_flags(flags);
+	printk("Trying to free nonexistent resource <%04lx-%04lx>\n", start, end);
 }
 
-/* Called from init/main.c to reserve IO ports. */
-void __init reserve_setup(char *str, int *ints)
+/*
+ * Called from init/main.c to reserve IO ports.
+ */
+#define MAXRESERVE 4
+static int __init reserve_setup(char *str)
 {
-	int i;
+	int opt = 2, io_start, io_num;
+	static int reserved = 0;
+	static struct resource reserve[MAXRESERVE];
+
+    while (opt==2) {
+		int x = reserved;
 
-	for (i = 1; i < ints[0]; i += 2)
-		request_region(ints[i], ints[i+1], "reserved");
+        if (get_option (&str, &io_start) != 2) break;
+        if (get_option (&str, &io_num)   == 0) break;
+		if (x < MAXRESERVE) {
+			struct resource *res = reserve + x;
+			res->name = "reserved";
+			res->start = io_start;
+			res->end = io_start + io_num - 1;
+			res->child = NULL;
+			if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
+				reserved = x+1;
+		}
+	}
+	return 1;
 }
+
+__setup("reserve=", reserve_setup);
diff --git a/kernel/sched.c b/kernel/sched.c
index 95b9b823c..431d5c719 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -36,7 +36,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
-#include <asm/semaphore-helper.h>
 
 #include <linux/timex.h>
 
@@ -94,7 +93,23 @@ unsigned long volatile jiffies=0;
  *	via the SMP irq return path.
  */
  
-struct task_struct * task[NR_TASKS] = {&init_task, };
+struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
+
+/*
+ * The tasklist_lock protects the linked list of processes.
+ *
+ * The scheduler lock is protecting against multiple entry
+ * into the scheduling code, and doesn't need to worry
+ * about interrupts (because interrupts cannot call the
+ * scheduler).
+ *
+ * The run-queue lock locks the parts that actually access
+ * and change the run-queues, and have to be interrupt-safe.
+ */
+spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED;  /* second */
+rwlock_t tasklist_lock = RW_LOCK_UNLOCKED;	/* third */
+
+static LIST_HEAD(runqueue_head);
 
 /*
  * We align per-CPU scheduling data on cacheline boundaries,
@@ -114,7 +129,7 @@ struct kernel_stat kstat = { 0 };
 
 #ifdef __SMP__
 
-#define idle_task(cpu) (task[cpu_number_map[(cpu)]])
+#define idle_task(cpu) (init_tasks[cpu_number_map[(cpu)]])
 #define can_schedule(p)	(!(p)->has_cpu)
 
 #else
@@ -140,8 +155,7 @@ void scheduling_functions_start_here(void) { }
  *	 +1000: realtime process, select this.
  */
 
-static inline int goodness (struct task_struct * prev,
-				 struct task_struct * p, int this_cpu)
+static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
 {
 	int weight;
 
@@ -174,7 +188,7 @@ static inline int goodness (struct task_struct * prev,
 #endif
 
 	/* .. and a slight advantage to the current MM */
-	if (p->mm == prev->mm)
+	if (p->mm == this_mm)
 		weight += 1;
 	weight += p->priority;
 
@@ -191,89 +205,32 @@ out:
  * to care about SCHED_YIELD is when we calculate the previous process'
  * goodness ...
  */
-static inline int prev_goodness (struct task_struct * prev,
-					struct task_struct * p, int this_cpu)
+static inline int prev_goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
 {
 	if (p->policy & SCHED_YIELD) {
 		p->policy &= ~SCHED_YIELD;
 		return 0;
 	}
-	return goodness(prev, p, this_cpu);
+	return goodness(p, this_cpu, this_mm);
 }
 
 /*
  * the 'goodness value' of replacing a process on a given CPU.
  * positive value means 'replace', zero or negative means 'dont'.
  */
-static inline int preemption_goodness (struct task_struct * prev,
-				struct task_struct * p, int cpu)
+static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
 {
-	return goodness(prev, p, cpu) - goodness(prev, prev, cpu);
+	return goodness(p, cpu, prev->mm) - goodness(prev, cpu, prev->mm);
 }
 
-/*
- * If there is a dependency between p1 and p2,
- * don't be too eager to go into the slow schedule.
- * In particular, if p1 and p2 both want the kernel
- * lock, there is no point in trying to make them
- * extremely parallel..
- *
- * (No lock - lock_depth < 0)
- *
- * There are two additional metrics here:
- *
- * first, a 'cutoff' interval, currently 0-200 usecs on
- * x86 CPUs, depending on the size of the 'SMP-local cache'.
- * If the current process has longer average timeslices than
- * this, then we utilize the idle CPU.
- *
- * second, if the wakeup comes from a process context,
- * then the two processes are 'related'. (they form a
- * 'gang')
- *
- * An idle CPU is almost always a bad thing, thus we skip
- * the idle-CPU utilization only if both these conditions
- * are true. (ie. a 'process-gang' rescheduling with rather
- * high frequency should stay on the same CPU).
- *
- * [We can switch to something more finegrained in 2.3.]
- *
- * do not 'guess' if the to-be-scheduled task is RT.
- */
-#define related(p1,p2) (((p1)->lock_depth >= 0) && (p2)->lock_depth >= 0) && \
-	(((p2)->policy == SCHED_OTHER) && ((p1)->avg_slice < cacheflush_time))
-
-static inline void reschedule_idle_slow(struct task_struct * p)
+static void reschedule_idle(struct task_struct * p)
 {
 #ifdef __SMP__
-/*
- * (see reschedule_idle() for an explanation first ...)
- *
- * Pass #2
- *
- * We try to find another (idle) CPU for this woken-up process.
- *
- * On SMP, we mostly try to see if the CPU the task used
- * to run on is idle.. but we will use another idle CPU too,
- * at this point we already know that this CPU is not
- * willing to reschedule in the near future.
- *
- * An idle CPU is definitely wasted, especially if this CPU is
- * running long-timeslice processes. The following algorithm is
- * pretty good at finding the best idle CPU to send this process
- * to.
- *
- * [We can try to preempt low-priority processes on other CPUs in
- * 2.3. Also we can try to use the avg_slice value to predict
- * 'likely reschedule' events even on other CPUs.]
- */
 	int this_cpu = smp_processor_id(), target_cpu;
 	struct task_struct *tsk, *target_tsk;
-	int cpu, best_cpu, weight, best_weight, i;
+	int cpu, best_cpu, i;
 	unsigned long flags;
 
-	best_weight = 0; /* prevents negative weight */
-
 	spin_lock_irqsave(&runqueue_lock, flags);
 
 	/*
@@ -289,15 +246,17 @@ static inline void reschedule_idle_slow(struct task_struct * p)
 	for (i = 0; i < smp_num_cpus; i++) {
 		cpu = cpu_logical_map(i);
 		tsk = cpu_curr(cpu);
-		if (related(tsk, p))
-			goto out_no_target;
-		weight = preemption_goodness(tsk, p, cpu);
-		if (weight > best_weight) {
-			best_weight = weight;
+		if (tsk == idle_task(cpu))
 			target_tsk = tsk;
-		}
 	}
 
+	if (target_tsk && p->avg_slice > cacheflush_time)
+		goto send_now;
+
+	tsk = cpu_curr(best_cpu);
+	if (preemption_goodness(tsk, p, best_cpu) > 0)
+		target_tsk = tsk;
+
 	/*
 	 * found any suitable CPU?
 	 */
@@ -328,35 +287,6 @@ out_no_target:
 #endif
 }
 
-static void reschedule_idle(struct task_struct * p)
-{
-#ifdef __SMP__
-	int cpu = smp_processor_id();
-	/*
-	 * ("wakeup()" should not be called before we've initialized
-	 * SMP completely.
-	 * Basically a not-yet initialized SMP subsystem can be
-	 * considered as a not-yet working scheduler, simply dont use
-	 * it before it's up and running ...)
-	 *
-	 * SMP rescheduling is done in 2 passes:
-	 *  - pass #1: faster: 'quick decisions'
-	 *  - pass #2: slower: 'lets try and find a suitable CPU'
-	 */
-
-	/*
-	 * Pass #1. (subtle. We might be in the middle of __switch_to, so
-	 * to preserve scheduling atomicity we have to use cpu_curr)
-	 */
-	if ((p->processor == cpu) && related(cpu_curr(cpu), p))
-		return;
-#endif /* __SMP__ */
-	/*
-	 * Pass #2
-	 */
-	reschedule_idle_slow(p);
-}
-
 /*
  * Careful!
  *
@@ -366,74 +296,23 @@ static void reschedule_idle(struct task_struct * p)
  */
 static inline void add_to_runqueue(struct task_struct * p)
 {
-	struct task_struct *next = init_task.next_run;
-
-	p->prev_run = &init_task;
-	init_task.next_run = p;
-	p->next_run = next;
-	next->prev_run = p;
+	list_add(&p->run_list, &runqueue_head);
 	nr_running++;
 }
 
-static inline void del_from_runqueue(struct task_struct * p)
-{
-	struct task_struct *next = p->next_run;
-	struct task_struct *prev = p->prev_run;
-
-	nr_running--;
-	next->prev_run = prev;
-	prev->next_run = next;
-	p->next_run = NULL;
-	p->prev_run = NULL;
-}
-
 static inline void move_last_runqueue(struct task_struct * p)
 {
-	struct task_struct *next = p->next_run;
-	struct task_struct *prev = p->prev_run;
-
-	/* remove from list */
-	next->prev_run = prev;
-	prev->next_run = next;
-	/* add back to list */
-	p->next_run = &init_task;
-	prev = init_task.prev_run;
-	init_task.prev_run = p;
-	p->prev_run = prev;
-	prev->next_run = p;
+	list_del(&p->run_list);
+	list_add_tail(&p->run_list, &runqueue_head);
 }
 
 static inline void move_first_runqueue(struct task_struct * p)
 {
-	struct task_struct *next = p->next_run;
-	struct task_struct *prev = p->prev_run;
-
-	/* remove from list */
-	next->prev_run = prev;
-	prev->next_run = next;
-	/* add back to list */
-	p->prev_run = &init_task;
-	next = init_task.next_run;
-	init_task.next_run = p;
-	p->next_run = next;
-	next->prev_run = p;
+	list_del(&p->run_list);
+	list_add(&p->run_list, &runqueue_head);
 }
 
 /*
- * The tasklist_lock protects the linked list of processes.
- *
- * The scheduler lock is protecting against multiple entry
- * into the scheduling code, and doesn't need to worry
- * about interrupts (because interrupts cannot call the
- * scheduler).
- *
- * The run-queue lock locks the parts that actually access
- * and change the run-queues, and have to be interrupt-safe.
- */
-spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED;  /* second */
-rwlock_t tasklist_lock = RW_LOCK_UNLOCKED;	/* third */
-
-/*
  * Wake up a process. Put it on the run-queue if it's not
  * already there.  The "current" process is always on the
  * run-queue (except when the actual re-schedule is in
@@ -450,7 +329,7 @@ void wake_up_process(struct task_struct * p)
 	 */
 	spin_lock_irqsave(&runqueue_lock, flags);
 	p->state = TASK_RUNNING;
-	if (p->next_run)
+	if (task_on_runqueue(p))
 		goto out;
 	add_to_runqueue(p);
 	spin_unlock_irqrestore(&runqueue_lock, flags);
@@ -657,7 +536,7 @@ signed long schedule_timeout(signed long timeout)
  * cleans up all remaining scheduler things, without impacting the
  * common case.
  */
-static inline void __schedule_tail (struct task_struct *prev)
+static inline void __schedule_tail(struct task_struct *prev)
 {
 #ifdef __SMP__
 	if ((prev->state == TASK_RUNNING) &&
@@ -668,7 +547,7 @@ static inline void __schedule_tail (struct task_struct *prev)
 #endif /* __SMP__ */
 }
 
-void schedule_tail (struct task_struct *prev)
+void schedule_tail(struct task_struct *prev)
 {
 	__schedule_tail(prev);
 }
@@ -687,8 +566,10 @@ asmlinkage void schedule(void)
 {
 	struct schedule_data * sched_data;
 	struct task_struct *prev, *next, *p;
+	struct list_head *tmp;
 	int this_cpu, c;
 
+	if (!current->active_mm) BUG();
 	if (tq_scheduler)
 		goto handle_tq_scheduler;
 tq_scheduler_back:
@@ -731,42 +612,29 @@ move_rr_back:
 	}
 	prev->need_resched = 0;
 
-repeat_schedule:
-
 	/*
 	 * this is the scheduler proper:
 	 */
 
-	p = init_task.next_run;
-	/* Default process to select.. */
+repeat_schedule:
+	/*
+	 * Default process to select..
+	 */
 	next = idle_task(this_cpu);
 	c = -1000;
 	if (prev->state == TASK_RUNNING)
 		goto still_running;
 still_running_back:
 
-	/*
-	 * This is subtle.
-	 * Note how we can enable interrupts here, even
-	 * though interrupts can add processes to the run-
-	 * queue. This is because any new processes will
-	 * be added to the front of the queue, so "p" above
-	 * is a safe starting point.
-	 * run-queue deletion and re-ordering is protected by
-	 * the scheduler lock
-	 */
-/*
- * Note! there may appear new tasks on the run-queue during this, as
- * interrupts are enabled. However, they will be put on front of the
- * list, so our list starting at "p" is essentially fixed.
- */
-	while (p != &init_task) {
+	tmp = runqueue_head.next;
+	while (tmp != &runqueue_head) {
+		p = list_entry(tmp, struct task_struct, run_list);
 		if (can_schedule(p)) {
-			int weight = goodness(prev, p, this_cpu);
+			int weight = goodness(p, this_cpu, prev->active_mm);
 			if (weight > c)
 				c = weight, next = p;
 		}
-		p = p->next_run;
+		tmp = tmp->next;
 	}
 
 	/* Do we need to re-calculate counters? */
@@ -819,12 +687,42 @@ still_running_back:
 #endif /* __SMP__ */
 
 	kstat.context_swtch++;
-	get_mmu_context(next);
+	/*
+	 * there are 3 processes which are affected by a context switch:
+	 *
+	 * prev == .... ==> (last => next)
+	 *
+	 * It's the 'much more previous' 'prev' that is on next's stack,
+	 * but prev is set to (the just run) 'last' process by switch_to().
+	 * This might sound slightly confusing but makes tons of sense.
+	 */
+	prepare_to_switch();
+	{
+		struct mm_struct *mm = next->mm;
+		struct mm_struct *oldmm = prev->active_mm;
+		if (!mm) {
+			if (next->active_mm) BUG();
+			next->active_mm = oldmm;
+			atomic_inc(&oldmm->mm_count);
+		} else {
+			if (next->active_mm != mm) BUG();
+			switch_mm(oldmm, mm, next, this_cpu);
+		}
+
+		if (!prev->mm) {
+			prev->active_mm = NULL;
+			mmdrop(oldmm);
+		}
+	}
+
+	/*
+	 * This just switches the register state and the
+	 * stack.
+	 */
 	switch_to(prev, next, prev);
 	__schedule_tail(prev);
 
 same_process:
-  
 	reacquire_kernel_lock(current);
 	return;
 
@@ -837,11 +735,11 @@ recalculate:
 			p->counter = (p->counter >> 1) + p->priority;
 		read_unlock(&tasklist_lock);
 		spin_lock_irq(&runqueue_lock);
-		goto repeat_schedule;
 	}
+	goto repeat_schedule;
 
 still_running:
-	c = prev_goodness(prev, prev, this_cpu);
+	c = prev_goodness(prev, this_cpu, prev->active_mm);
 	next = prev;
 	goto still_running_back;
 
@@ -912,128 +810,6 @@ out:
 	return;
 }
 
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-#define DOWN_VAR				\
-	struct task_struct *tsk = current;	\
-	wait_queue_t wait;			\
-	init_waitqueue_entry(&wait, tsk);
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	tsk->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		tsk->state = (task_state);	\
-	}					\
-	tsk->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __down(struct semaphore * sem)
-{
-	DOWN_VAR
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-	DOWN_VAR
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, tsk);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
-
 #define	SLEEP_ON_VAR				\
 	unsigned long flags;			\
 	wait_queue_t wait;			\
@@ -1533,13 +1309,13 @@ void do_timer(struct pt_regs * regs)
 		mark_bh(TQUEUE_BH);
 }
 
-#ifndef __alpha__
+#if !defined(__alpha__) && !defined(__ia64__)
 
 /*
  * For backwards compatibility?  This can be done in libc so Alpha
  * and all newer ports shouldn't need it.
  */
-asmlinkage unsigned int sys_alarm(unsigned int seconds)
+asmlinkage unsigned long sys_alarm(unsigned int seconds)
 {
 	struct itimerval it_new, it_old;
 	unsigned int oldalarm;
@@ -1556,12 +1332,16 @@ asmlinkage unsigned int sys_alarm(unsigned int seconds)
 	return oldalarm;
 }
 
+#endif
+
+#ifndef __alpha__
+
 /*
  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
  * should be moved into arch/i386 instead?
  */
  
-asmlinkage int sys_getpid(void)
+asmlinkage long sys_getpid(void)
 {
 	/* This is SMP safe - current->pid doesn't change */
 	return current->pid;
@@ -1590,7 +1370,7 @@ asmlinkage int sys_getpid(void)
  * a small window for a race, using the old pointer is
  * harmless for a while).
  */
-asmlinkage int sys_getppid(void)
+asmlinkage long sys_getppid(void)
 {
 	int pid;
 	struct task_struct * me = current;
@@ -1613,25 +1393,25 @@ asmlinkage int sys_getppid(void)
 	return pid;
 }
 
-asmlinkage int sys_getuid(void)
+asmlinkage long sys_getuid(void)
 {
 	/* Only we change this so SMP safe */
 	return current->uid;
 }
 
-asmlinkage int sys_geteuid(void)
+asmlinkage long sys_geteuid(void)
 {
 	/* Only we change this so SMP safe */
 	return current->euid;
 }
 
-asmlinkage int sys_getgid(void)
+asmlinkage long sys_getgid(void)
 {
 	/* Only we change this so SMP safe */
 	return current->gid;
 }
 
-asmlinkage int sys_getegid(void)
+asmlinkage long sys_getegid(void)
 {
 	/* Only we change this so SMP safe */
 	return  current->egid;
@@ -1643,7 +1423,7 @@ asmlinkage int sys_getegid(void)
  * it for backward compatibility?
  */
 
-asmlinkage int sys_nice(int increment)
+asmlinkage long sys_nice(int increment)
 {
 	unsigned long newprio;
 	int increase = 0;
@@ -1760,7 +1540,7 @@ static int setscheduler(pid_t pid, int policy,
 	retval = 0;
 	p->policy = policy;
 	p->rt_priority = lp.sched_priority;
-	if (p->next_run)
+	if (task_on_runqueue(p))
 		move_first_runqueue(p);
 
 	current->need_resched = 1;
@@ -1773,18 +1553,18 @@ out_nounlock:
 	return retval;
 }
 
-asmlinkage int sys_sched_setscheduler(pid_t pid, int policy, 
+asmlinkage long sys_sched_setscheduler(pid_t pid, int policy, 
 				      struct sched_param *param)
 {
 	return setscheduler(pid, policy, param);
 }
 
-asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
+asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param)
 {
 	return setscheduler(pid, -1, param);
 }
 
-asmlinkage int sys_sched_getscheduler(pid_t pid)
+asmlinkage long sys_sched_getscheduler(pid_t pid)
 {
 	struct task_struct *p;
 	int retval;
@@ -1809,7 +1589,7 @@ out_nounlock:
 	return retval;
 }
 
-asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
+asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
 {
 	struct task_struct *p;
 	struct sched_param lp;
@@ -1840,7 +1620,7 @@ out_unlock:
 	return retval;
 }
 
-asmlinkage int sys_sched_yield(void)
+asmlinkage long sys_sched_yield(void)
 {
 	spin_lock_irq(&runqueue_lock);
 	if (current->policy == SCHED_OTHER)
@@ -1851,7 +1631,7 @@ asmlinkage int sys_sched_yield(void)
 	return 0;
 }
 
-asmlinkage int sys_sched_get_priority_max(int policy)
+asmlinkage long sys_sched_get_priority_max(int policy)
 {
 	int ret = -EINVAL;
 
@@ -1867,7 +1647,7 @@ asmlinkage int sys_sched_get_priority_max(int policy)
 	return ret;
 }
 
-asmlinkage int sys_sched_get_priority_min(int policy)
+asmlinkage long sys_sched_get_priority_min(int policy)
 {
 	int ret = -EINVAL;
 
@@ -1882,7 +1662,7 @@ asmlinkage int sys_sched_get_priority_min(int policy)
 	return ret;
 }
 
-asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
+asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
 {
 	struct timespec t;
 
@@ -1893,7 +1673,7 @@ asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
 	return 0;
 }
 
-asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
+asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 {
 	struct timespec t;
 	unsigned long expire;
@@ -1934,13 +1714,13 @@ asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 	return 0;
 }
 
-static void show_task(int nr,struct task_struct * p)
+static void show_task(struct task_struct * p)
 {
 	unsigned long free = 0;
 	int state;
 	static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
 
-	printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
+	printk("%-8s  ", p->comm);
 	state = p->state ? ffz(~p->state) + 1 : 0;
 	if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
 		printk(stat_nam[state]);
@@ -1950,12 +1730,12 @@ static void show_task(int nr,struct task_struct * p)
 	if (p == current)
 		printk(" current  ");
 	else
-		printk(" %08lX ", thread_saved_pc(&p->tss));
+		printk(" %08lX ", thread_saved_pc(&p->thread));
 #else
 	if (p == current)
 		printk("   current task   ");
 	else
-		printk(" %016lx ", thread_saved_pc(&p->tss));
+		printk(" %016lx ", thread_saved_pc(&p->thread));
 #endif
 	{
 		unsigned long * n = (unsigned long *) (p+1);
@@ -1968,6 +1748,10 @@ static void show_task(int nr,struct task_struct * p)
 		printk("%5d ", p->p_cptr->pid);
 	else
 		printk("      ");
+	if (!p->mm)
+		printk(" (L-TLB) ");
+	else
+		printk(" (NOTLB) ");
 	if (p->p_ysptr)
 		printk("%7d", p->p_ysptr->pid);
 	else
@@ -2020,7 +1804,7 @@ void show_state(void)
 #endif
 	read_lock(&tasklist_lock);
 	for_each_task(p)
-		show_task((p->tarray_ptr - &task[0]),p);
+		show_task(p);
 	read_unlock(&tasklist_lock);
 }
 
@@ -2030,6 +1814,11 @@ void __init init_idle(void)
 	struct schedule_data * sched_data;
 	sched_data = &aligned_data[smp_processor_id()].schedule_data;
 
+	if (current != &init_task && task_on_runqueue(current)) {
+		printk("UGH! (%d:%d) was on the runqueue, removing.\n",
+			smp_processor_id(), current->pid);
+		del_from_runqueue(current);
+	}
 	t = get_cycles();
 	sched_data->curr = current;
 	sched_data->last_schedule = t;
@@ -2042,18 +1831,20 @@ void __init sched_init(void)
 	 * process right in SMP mode.
 	 */
 	int cpu=hard_smp_processor_id();
-	int nr = NR_TASKS;
+	int nr;
 
 	init_task.processor=cpu;
 
-	/* Init task array free list and pidhash table. */
-	while(--nr > 0)
-		add_free_taskslot(&task[nr]);
-
 	for(nr = 0; nr < PIDHASH_SZ; nr++)
 		pidhash[nr] = NULL;
 
 	init_bh(TIMER_BH, timer_bh);
 	init_bh(TQUEUE_BH, tqueue_bh);
 	init_bh(IMMEDIATE_BH, immediate_bh);
+
+	/*
+	 * The boot idle thread does lazy MMU switching as well:
+	 */
+	atomic_inc(&init_mm.mm_count);
 }
+
diff --git a/kernel/signal.c b/kernel/signal.c
index 5077115ce..047ee4395 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -641,7 +641,7 @@ EXPORT_SYMBOL(send_sig_info);
  * used by various programs)
  */
 
-asmlinkage int
+asmlinkage long
 sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize)
 {
 	int error = -EINVAL;
@@ -697,7 +697,7 @@ out:
 	return error;
 }
 
-asmlinkage int
+asmlinkage long
 sys_rt_sigpending(sigset_t *set, size_t sigsetsize)
 {
 	int error = -EINVAL;
@@ -718,7 +718,7 @@ out:
 	return error;
 }
 
-asmlinkage int
+asmlinkage long
 sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
 		    const struct timespec *uts, size_t sigsetsize)
 {
@@ -788,7 +788,7 @@ sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
 	return ret;
 }
 
-asmlinkage int
+asmlinkage long
 sys_kill(int pid, int sig)
 {
 	struct siginfo info;
@@ -802,7 +802,7 @@ sys_kill(int pid, int sig)
 	return kill_something_info(sig, &info, pid);
 }
 
-asmlinkage int
+asmlinkage long
 sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo)
 {
 	siginfo_t info;
@@ -948,7 +948,7 @@ out:
 #if !defined(__alpha__)
 /* Alpha has its own versions with special arguments.  */
 
-asmlinkage int
+asmlinkage long
 sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset)
 {
 	int error;
@@ -997,7 +997,7 @@ out:
 	return error;
 }
 
-asmlinkage int
+asmlinkage long
 sys_sigpending(old_sigset_t *set)
 {
 	int error;
@@ -1014,7 +1014,7 @@ sys_sigpending(old_sigset_t *set)
 }
 
 #ifndef __sparc__
-asmlinkage int
+asmlinkage long
 sys_rt_sigaction(int sig, const struct sigaction *act, struct sigaction *oact,
 		 size_t sigsetsize)
 {
@@ -1046,14 +1046,14 @@ out:
 /*
  * For backwards compatibility.  Functionality superseded by sigprocmask.
  */
-asmlinkage int
+asmlinkage long
 sys_sgetmask(void)
 {
 	/* SMP safe */
 	return current->blocked.sig[0];
 }
 
-asmlinkage int
+asmlinkage long
 sys_ssetmask(int newmask)
 {
 	int old;
@@ -1068,9 +1068,9 @@ sys_ssetmask(int newmask)
 
 	return old;
 }
-#endif /* !defined(__alpha__) && !defined(__ia64__) */
+#endif /* !defined(__alpha__) */
 
-#if !defined(__alpha__) && !defined(__mips__) && !defined(__ia64__)
+#if !defined(__alpha__) && !defined(__mips__)
 /*
  * For backwards compatibility.  Functionality superseded by sigaction.
  */
@@ -1087,4 +1087,4 @@ sys_signal(int sig, __sighandler_t handler)
 
 	return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
 }
-#endif /* !defined(__alpha__) && !defined(__mips__) && !defined(__ia64__) */
+#endif /* !alpha && !__ia64__ && !defined(__mips__) */
diff --git a/kernel/sys.c b/kernel/sys.c
index 665c44e30..619d78391 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -41,11 +41,7 @@ int unregister_reboot_notifier(struct notifier_block * nb)
 	return notifier_chain_unregister(&reboot_notifier_list, nb);
 }
 
-
-
-extern void adjust_clock(void);
-
-asmlinkage int sys_ni_syscall(void)
+asmlinkage long sys_ni_syscall(void)
 {
 	return -ENOSYS;
 }
@@ -72,7 +68,7 @@ static int proc_sel(struct task_struct *p, int which, int who)
 	return 0;
 }
 
-asmlinkage int sys_setpriority(int which, int who, int niceval)
+asmlinkage long sys_setpriority(int which, int who, int niceval)
 {
 	struct task_struct *p;
 	unsigned int priority;
@@ -122,7 +118,7 @@ asmlinkage int sys_setpriority(int which, int who, int niceval)
  * not return the normal nice-value, but a value that has been
  * offset by 20 (ie it returns 0..40 instead of -20..20)
  */
-asmlinkage int sys_getpriority(int which, int who)
+asmlinkage long sys_getpriority(int which, int who)
 {
 	struct task_struct *p;
 	long max_prio = -ESRCH;
@@ -154,7 +150,7 @@ asmlinkage int sys_getpriority(int which, int who)
  *
  * reboot doesn't sync: do that yourself before calling this.
  */
-asmlinkage int sys_reboot(int magic1, int magic2, int cmd, void * arg)
+asmlinkage long sys_reboot(int magic1, int magic2, int cmd, void * arg)
 {
 	char buffer[256];
 
@@ -252,7 +248,7 @@ void ctrl_alt_del(void)
  * SMP: There are not races, the GIDs are checked only by filesystem
  *      operations (as far as semantic preservation is concerned).
  */
-asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
+asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 {
 	int old_rgid = current->gid;
 	int old_egid = current->egid;
@@ -290,7 +286,7 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
  *
  * SMP: Same implicit races as above.
  */
-asmlinkage int sys_setgid(gid_t gid)
+asmlinkage long sys_setgid(gid_t gid)
 {
 	int old_egid = current->egid;
 
@@ -356,7 +352,7 @@ extern inline void cap_emulate_setxuid(int old_ruid, int old_euid,
  * 100% compatible with BSD.  A program which uses just setuid() will be
  * 100% compatible with POSIX with saved IDs. 
  */
-asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
+asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 {
 	int old_ruid, old_euid, old_suid, new_ruid;
 
@@ -418,7 +414,7 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
  * will allow a root program to temporarily drop privileges and be able to
  * regain them by swapping the real and effective uid.  
  */
-asmlinkage int sys_setuid(uid_t uid)
+asmlinkage long sys_setuid(uid_t uid)
 {
 	int old_euid = current->euid;
 	int old_ruid, old_suid, new_ruid;
@@ -454,7 +450,7 @@ asmlinkage int sys_setuid(uid_t uid)
  * This function implements a generic ability to update ruid, euid,
  * and suid.  This allows you to implement the 4.4 compatible seteuid().
  */
-asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
+asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 {
 	int old_ruid = current->uid;
 	int old_euid = current->euid;
@@ -493,7 +489,7 @@ asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 	return 0;
 }
 
-asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
+asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
 {
 	int retval;
 
@@ -507,7 +503,7 @@ asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
 /*
  * Same as above, but for rgid, egid, sgid.
  */
-asmlinkage int sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
+asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 {
        if (!capable(CAP_SETGID)) {
 		if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
@@ -533,7 +529,7 @@ asmlinkage int sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 	return 0;
 }
 
-asmlinkage int sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
+asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
 {
 	int retval;
 
@@ -551,7 +547,7 @@ asmlinkage int sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
  * whatever uid it wants to). It normally shadows "euid", except when
  * explicitly set by setfsuid() or for access..
  */
-asmlinkage int sys_setfsuid(uid_t uid)
+asmlinkage long sys_setfsuid(uid_t uid)
 {
 	int old_fsuid;
 
@@ -588,7 +584,7 @@ asmlinkage int sys_setfsuid(uid_t uid)
 /*
  * Samma p� svenska..
  */
-asmlinkage int sys_setfsgid(gid_t gid)
+asmlinkage long sys_setfsgid(gid_t gid)
 {
 	int old_fsgid;
 
@@ -637,7 +633,7 @@ asmlinkage long sys_times(struct tms * tbuf)
  * LBT 04.03.94
  */
 
-asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
+asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 {
 	struct task_struct * p;
 	int err = -EINVAL;
@@ -690,7 +686,7 @@ out:
 	return err;
 }
 
-asmlinkage int sys_getpgid(pid_t pid)
+asmlinkage long sys_getpgid(pid_t pid)
 {
 	if (!pid) {
 		return current->pgrp;
@@ -709,13 +705,13 @@ asmlinkage int sys_getpgid(pid_t pid)
 	}
 }
 
-asmlinkage int sys_getpgrp(void)
+asmlinkage long sys_getpgrp(void)
 {
 	/* SMP - assuming writes are word atomic this is fine */
 	return current->pgrp;
 }
 
-asmlinkage int sys_getsid(pid_t pid)
+asmlinkage long sys_getsid(pid_t pid)
 {
 	if (!pid) {
 		return current->session;
@@ -734,7 +730,7 @@ asmlinkage int sys_getsid(pid_t pid)
 	}
 }
 
-asmlinkage int sys_setsid(void)
+asmlinkage long sys_setsid(void)
 {
 	struct task_struct * p;
 	int err = -EPERM;
@@ -758,7 +754,7 @@ out:
 /*
  * Supplementary group IDs
  */
-asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
+asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
 {
 	int i;
 	
@@ -784,7 +780,7 @@ asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
  *	without another task interfering.
  */
  
-asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist)
+asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
 {
 	if (!capable(CAP_SETGID))
 		return -EPERM;
@@ -822,7 +818,7 @@ out:
  */
 DECLARE_MUTEX(uts_sem);
 
-asmlinkage int sys_newuname(struct new_utsname * name)
+asmlinkage long sys_newuname(struct new_utsname * name)
 {
 	int errno = 0;
 
@@ -833,7 +829,7 @@ asmlinkage int sys_newuname(struct new_utsname * name)
 	return errno;
 }
 
-asmlinkage int sys_sethostname(char *name, int len)
+asmlinkage long sys_sethostname(char *name, int len)
 {
 	int errno;
 
@@ -851,7 +847,7 @@ asmlinkage int sys_sethostname(char *name, int len)
 	return errno;
 }
 
-asmlinkage int sys_gethostname(char *name, int len)
+asmlinkage long sys_gethostname(char *name, int len)
 {
 	int i, errno;
 
@@ -872,7 +868,7 @@ asmlinkage int sys_gethostname(char *name, int len)
  * Only setdomainname; getdomainname can be implemented by calling
  * uname()
  */
-asmlinkage int sys_setdomainname(char *name, int len)
+asmlinkage long sys_setdomainname(char *name, int len)
 {
 	int errno;
 
@@ -891,7 +887,7 @@ asmlinkage int sys_setdomainname(char *name, int len)
 	return errno;
 }
 
-asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim)
+asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
 {
 	if (resource >= RLIM_NLIMITS)
 		return -EINVAL;
@@ -900,7 +896,7 @@ asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim)
 			? -EFAULT : 0;
 }
 
-asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
+asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
 {
 	struct rlimit new_rlim, *old_rlim;
 
@@ -977,21 +973,21 @@ int getrusage(struct task_struct *p, int who, struct rusage *ru)
 	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
 }
 
-asmlinkage int sys_getrusage(int who, struct rusage *ru)
+asmlinkage long sys_getrusage(int who, struct rusage *ru)
 {
 	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
 		return -EINVAL;
 	return getrusage(current, who, ru);
 }
 
-asmlinkage int sys_umask(int mask)
+asmlinkage long sys_umask(int mask)
 {
 	mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
 	return mask;
 }
     
-asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
-			 unsigned long arg4, unsigned long arg5)
+asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
+			  unsigned long arg4, unsigned long arg5)
 {
 	int error = 0;
 	int sig;
@@ -1005,6 +1001,9 @@ asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 			}
 			current->pdeath_signal = sig;
 			break;
+		case PR_GET_PDEATHSIG:
+			error = put_user(current->pdeath_signal, (int *)arg2);
+			break;
 		default:
 			error = -EINVAL;
 			break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 195c2cb5b..48320b0db 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -8,7 +8,10 @@
  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
  * Dynamic registration fixes, Stephen Tweedie.
  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
- * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris Horn.
+ * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
+ *  Horn.
+ * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
+ * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
  */
 
 #include <linux/config.h>
@@ -18,8 +21,10 @@
 #include <linux/proc_fs.h>
 #include <linux/ctype.h>
 #include <linux/utsname.h>
+#include <linux/capability.h>
 #include <linux/smp_lock.h>
 #include <linux/init.h>
+#include <linux/sysrq.h>
 
 #include <asm/uaccess.h>
 
@@ -34,6 +39,7 @@ extern int panic_timeout;
 extern int console_loglevel, C_A_D;
 extern int bdf_prm[], bdflush_min[], bdflush_max[];
 extern int sysctl_overcommit_memory;
+extern int max_threads;
 extern int nr_queued_signals, max_queued_signals;
 
 #ifdef CONFIG_KMOD
@@ -80,6 +86,7 @@ static ctl_table proc_table[];
 static ctl_table fs_table[];
 static ctl_table debug_table[];
 static ctl_table dev_table[];
+extern ctl_table random_table[];
 
 
 /* /proc declarations: */
@@ -165,6 +172,8 @@ static ctl_table kern_table[] = {
 	 0644, NULL, &proc_doutsstring, &sysctl_string},
 	{KERN_PANIC, "panic", &panic_timeout, sizeof(int),
 	 0644, NULL, &proc_dointvec},
+	{KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),
+	 0600, NULL, &proc_dointvec_bset},
 #ifdef CONFIG_BLK_DEV_INITRD
 	{KERN_REALROOTDEV, "real-root-dev", &real_root_dev, sizeof(int),
 	 0644, NULL, &proc_dointvec},
@@ -207,6 +216,13 @@ static ctl_table kern_table[] = {
 	{KERN_SHMMAX, "shmmax", &shmmax, sizeof (int),
 	 0644, NULL, &proc_dointvec},
 #endif
+#ifdef CONFIG_MAGIC_SYSRQ
+	{KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int),
+	 0644, NULL, &proc_dointvec},
+#endif	 
+	{KERN_MAX_THREADS, "threads-max", &max_threads, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_RANDOM, "random", NULL, 0, 0555, random_table},
 	{0}
 };
 
@@ -309,7 +325,7 @@ int do_sysctl (int *name, int nlen,
 	return -ENOTDIR;
 }
 
-extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
+extern asmlinkage long sys_sysctl(struct __sysctl_args *args)
 {
 	struct __sysctl_args tmp;
 	int error;
@@ -679,8 +695,13 @@ static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
 	return r;
 }
 
+#define OP_SET	0
+#define OP_AND	1
+#define OP_OR	2
+#define OP_MAX	3
+#define OP_MIN	4
 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
-		  void *buffer, size_t *lenp, int conv)
+		  void *buffer, size_t *lenp, int conv, int op)
 {
 	int *i, vleft, first=1, len, left, neg, val;
 	#define TMPBUFLEN 20
@@ -731,7 +752,17 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
 				val = -val;
 			buffer += len;
 			left -= len;
-			*i = val;
+			switch(op) {
+			case OP_SET:	*i = val; break;
+			case OP_AND:	*i &= val; break;
+			case OP_OR:	*i |= val; break;
+			case OP_MAX:	if(*i < val)
+						*i = val;
+					break;
+			case OP_MIN:	if(*i > val)
+						*i = val;
+					break;
+			}
 		} else {
 			p = buf;
 			if (!first)
@@ -773,9 +804,21 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
 int proc_dointvec(ctl_table *table, int write, struct file *filp,
 		     void *buffer, size_t *lenp)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,1);
+    return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET);
+}
+
+/*
+ *	init may raise the set.
+ */
+ 
+int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
+			void *buffer, size_t *lenp)
+{
+	return do_proc_dointvec(table,write,filp,buffer,lenp,1,
+		(current->pid == 1) ? OP_SET : OP_AND);
 }
 
+
 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
 		  void *buffer, size_t *lenp)
 {
@@ -874,11 +917,136 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
 	return 0;
 }
 
+
+/*
+ * an unsigned long function version
+ */
+
+static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+				     struct file *filp,
+				     void *buffer, size_t *lenp,
+				     unsigned long convmul,
+				     unsigned long convdiv)
+{
+#define TMPBUFLEN 20
+	unsigned long *i, *min, *max, val;
+	int vleft, first=1, len, left, neg;
+	char buf[TMPBUFLEN], *p;
+	
+	if (!table->data || !table->maxlen || !*lenp ||
+	    (filp->f_pos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+	
+	i = (unsigned long *) table->data;
+	min = (unsigned long *) table->extra1;
+	max = (unsigned long *) table->extra2;
+	vleft = table->maxlen / sizeof(unsigned long);
+	left = *lenp;
+	
+	for (; left && vleft--; i++, first=0) {
+		if (write) {
+			while (left) {
+				char c;
+				if(get_user(c, (char *) buffer))
+					return -EFAULT;
+				if (!isspace(c))
+					break;
+				left--;
+				((char *) buffer)++;
+			}
+			if (!left)
+				break;
+			neg = 0;
+			len = left;
+			if (len > TMPBUFLEN-1)
+				len = TMPBUFLEN-1;
+			if(copy_from_user(buf, buffer, len))
+				return -EFAULT;
+			buf[len] = 0;
+			p = buf;
+			if (*p == '-' && left > 1) {
+				neg = 1;
+				left--, p++;
+			}
+			if (*p < '0' || *p > '9')
+				break;
+			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
+			len = p-buf;
+			if ((len < left) && *p && !isspace(*p))
+				break;
+			if (neg)
+				val = -val;
+			buffer += len;
+			left -= len;
+
+			if(neg)
+				continue;
+			if (min && val < *min++)
+				continue;
+			if (max && val > *max++)
+				continue;
+			*i = val;
+		} else {
+			p = buf;
+			if (!first)
+				*p++ = '\t';
+			sprintf(p, "%lu", convdiv * (*i) / convmul);
+			len = strlen(buf);
+			if (len > left)
+				len = left;
+			if(copy_to_user(buffer, buf, len))
+				return -EFAULT;
+			left -= len;
+			buffer += len;
+		}
+	}
+
+	if (!write && !first && left) {
+		if(put_user('\n', (char *) buffer))
+			return -EFAULT;
+		left--, buffer++;
+	}
+	if (write) {
+		p = (char *) buffer;
+		while (left) {
+			char c;
+			if(get_user(c, p++))
+				return -EFAULT;
+			if (!isspace(c))
+				break;
+			left--;
+		}
+	}
+	if (write && first)
+		return -EINVAL;
+	*lenp -= left;
+	filp->f_pos += *lenp;
+	return 0;
+#undef TMPBUFLEN
+}
+
+int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
+			   void *buffer, size_t *lenp)
+{
+    return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, 1l, 1l);
+}
+
+int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
+				      struct file *filp,
+				      void *buffer, size_t *lenp)
+{
+    return do_proc_doulongvec_minmax(table, write, filp, buffer,
+				     lenp, HZ, 1000l);
+}
+
+
 /* Like proc_dointvec, but converts seconds to jiffies */
 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
 			  void *buffer, size_t *lenp)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,HZ);
+    return do_proc_dointvec(table,write,filp,buffer,lenp,HZ,OP_SET);
 }
 
 #else /* CONFIG_PROC_FS */
@@ -913,6 +1081,20 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
 	return -ENOSYS;
 }
 
+int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
+		    void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
+				      struct file *filp,
+				      void *buffer, size_t *lenp)
+{
+    return -ENOSYS;
+}
+
+
 #endif /* CONFIG_PROC_FS */
 
 
@@ -997,6 +1179,34 @@ int sysctl_intvec(ctl_table *table, int *name, int nlen,
 	return 0;
 }
 
+/* Strategy function to convert jiffies to seconds */ 
+int sysctl_jiffies(ctl_table *table, int *name, int nlen,
+		void *oldval, size_t *oldlenp,
+		void *newval, size_t newlen, void **context)
+{
+	if (oldval) {
+		size_t olen;
+		if (oldlenp) { 
+			if (get_user(olen, oldlenp))
+				return -EFAULT;
+			if (olen!=sizeof(int))
+				return -EINVAL; 
+		}
+		if (put_user(*(int *)(table->data) / HZ, (int *)oldval) || 
+		    (oldlenp && put_user(sizeof(int),oldlenp)))
+			return -EFAULT;
+	}
+	if (newval && newlen) { 
+		int new;
+		if (newlen != sizeof(int))
+			return -EINVAL; 
+		if (get_user(new, (int *)newval))
+			return -EFAULT;
+		*(int *)(table->data) = new*HZ; 
+	}
+	return 1;
+}
+
 int do_string (
 	void *oldval, size_t *oldlenp, void *newval, size_t newlen,
 	int rdwr, char *data, size_t max)
@@ -1073,7 +1283,7 @@ int do_struct (
 #else /* CONFIG_SYSCTL */
 
 
-extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
+extern asmlinkage long sys_sysctl(struct __sysctl_args *args)
 {
 	return -ENOSYS;
 }
@@ -1111,11 +1321,24 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
 }
 
 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
+			  void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
 		    void *buffer, size_t *lenp)
 {
 	return -ENOSYS;
 }
 
+int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
+				      struct file *filp,
+				      void *buffer, size_t *lenp)
+{
+    return -ENOSYS;
+}
+
 struct ctl_table_header * register_sysctl_table(ctl_table * table, 
 						int insert_at_head)
 {
diff --git a/kernel/time.c b/kernel/time.c
index 911442dad..1517d6d9d 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -64,7 +64,7 @@ void get_fast_time(struct timeval * t)
  *
  * XXX This function is NOT 64-bit clean!
  */
-asmlinkage int sys_time(int * tloc)
+asmlinkage long sys_time(int * tloc)
 {
 	int i;
 
@@ -85,7 +85,7 @@ asmlinkage int sys_time(int * tloc)
  * architectures that need it).
  */
  
-asmlinkage int sys_stime(int * tptr)
+asmlinkage long sys_stime(int * tptr)
 {
 	int value;
 
@@ -106,7 +106,7 @@ asmlinkage int sys_stime(int * tptr)
 
 #endif
 
-asmlinkage int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
 	if (tv) {
 		struct timeval ktv;
@@ -181,7 +181,7 @@ int do_sys_settimeofday(struct timeval *tv, struct timezone *tz)
 	return 0;
 }
 
-asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
+asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz)
 {
 	struct timeval	new_tv;
 	struct timezone new_tz;
@@ -400,7 +400,7 @@ leave:	if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
 	return(result);
 }
 
-asmlinkage int sys_adjtimex(struct timex *txc_p)
+asmlinkage long sys_adjtimex(struct timex *txc_p)
 {
 	struct timex txc;		/* Local copy of parameter */
 	int ret;
author	Ralf Baechle <ralf@linux-mips.org>	1999-10-09 00:00:47 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	1999-10-09 00:00:47 +0000
commit	d6434e1042f3b0a6dfe1b1f615af369486f9b1fa (patch)
tree	e2be02f33984c48ec019c654051d27964e42c441 /kernel
parent	609d1e803baf519487233b765eb487f9ec227a18 (diff)