Initial revision

author: Ralf Baechle <ralf@linux-mips.org> 1997-06-01 03:16:17 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 1997-06-01 03:16:17 +0000
commit: d8d9b8f76f22b7a16a83e261e64f89ee611f49df (patch)
tree: 3067bc130b80d52808e6390c9fc7fc087ec1e33c /kernel
parent: 19c9bba94152148523ba0f7ef7cffe3d45656b11 (diff)
10 files changed, 489 insertions, 551 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 0d03916e8..3fce2a2d7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -4,8 +4,6 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#undef DEBUG_PROC_TREE
-
 #include <linux/config.h>
 #include <linux/wait.h>
 #include <linux/errno.h>
@@ -16,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/tty.h>
 #include <linux/malloc.h>
+#include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
@@ -127,130 +126,27 @@ void notify_parent(struct task_struct * tsk)
 
 void release(struct task_struct * p)
 {
-	int i;
-
-	if (!p)
-		return;
-	if (p == current) {
-		printk("task releasing itself\n");
-		return;
-	}
-	for (i=1 ; i<NR_TASKS ; i++)
-		if (task[i] == p) {
+	if (p != current) {
 #ifdef __SMP__
-			/* FIXME! Cheesy, but kills the window... -DaveM */
-			while(p->processor != NO_PROC_ID)
-				barrier();
-			spin_unlock_wait(&scheduler_lock);
+		/* FIXME! Cheesy, but kills the window... -DaveM */
+		while (p->has_cpu)
+			barrier();
+		spin_unlock_wait(&scheduler_lock);
 #endif
-			nr_tasks--;
-			task[i] = NULL;
-			REMOVE_LINKS(p);
-			release_thread(p);
-			if (STACK_MAGIC != *(unsigned long *)p->kernel_stack_page)
-				printk(KERN_ALERT "release: %s kernel stack corruption. Aiee\n", p->comm);
-			free_kernel_stack(p->kernel_stack_page);
-			current->cmin_flt += p->min_flt + p->cmin_flt;
-			current->cmaj_flt += p->maj_flt + p->cmaj_flt;
-			current->cnswap += p->nswap + p->cnswap;
-			free_task_struct(p);
-			return;
-		}
-	panic("trying to release non-existent task");
-}
-
-#ifdef DEBUG_PROC_TREE
-/*
- * Check to see if a task_struct pointer is present in the task[] array
- * Return 0 if found, and 1 if not found.
- */
-int bad_task_ptr(struct task_struct *p)
-{
-	int 	i;
-
-	if (!p)
-		return 0;
-	for (i=0 ; i<NR_TASKS ; i++)
-		if (task[i] == p)
-			return 0;
-	return 1;
-}
-
-/*
- * This routine scans the pid tree and makes sure the rep invariant still
- * holds.  Used for debugging only, since it's very slow....
- *
- * It looks a lot scarier than it really is.... we're doing nothing more
- * than verifying the doubly-linked list found in p_ysptr and p_osptr,
- * and checking it corresponds with the process tree defined by p_cptr and
- * p_pptr;
- */
-void audit_ptree(void)
-{
-	int	i;
-
-	for (i=1 ; i<NR_TASKS ; i++) {
-		if (!task[i])
-			continue;
-		if (bad_task_ptr(task[i]->p_pptr))
-			printk("Warning, pid %d's parent link is bad\n",
-				task[i]->pid);
-		if (bad_task_ptr(task[i]->p_cptr))
-			printk("Warning, pid %d's child link is bad\n",
-				task[i]->pid);
-		if (bad_task_ptr(task[i]->p_ysptr))
-			printk("Warning, pid %d's ys link is bad\n",
-				task[i]->pid);
-		if (bad_task_ptr(task[i]->p_osptr))
-			printk("Warning, pid %d's os link is bad\n",
-				task[i]->pid);
-		if (task[i]->p_pptr == task[i])
-			printk("Warning, pid %d parent link points to self\n",
-				task[i]->pid);
-		if (task[i]->p_cptr == task[i])
-			printk("Warning, pid %d child link points to self\n",
-				task[i]->pid);
-		if (task[i]->p_ysptr == task[i])
-			printk("Warning, pid %d ys link points to self\n",
-				task[i]->pid);
-		if (task[i]->p_osptr == task[i])
-			printk("Warning, pid %d os link points to self\n",
-				task[i]->pid);
-		if (task[i]->p_osptr) {
-			if (task[i]->p_pptr != task[i]->p_osptr->p_pptr)
-				printk(
-			"Warning, pid %d older sibling %d parent is %d\n",
-				task[i]->pid, task[i]->p_osptr->pid,
-				task[i]->p_osptr->p_pptr->pid);
-			if (task[i]->p_osptr->p_ysptr != task[i])
-				printk(
-		"Warning, pid %d older sibling %d has mismatched ys link\n",
-				task[i]->pid, task[i]->p_osptr->pid);
-		}
-		if (task[i]->p_ysptr) {
-			if (task[i]->p_pptr != task[i]->p_ysptr->p_pptr)
-				printk(
-			"Warning, pid %d younger sibling %d parent is %d\n",
-				task[i]->pid, task[i]->p_osptr->pid,
-				task[i]->p_osptr->p_pptr->pid);
-			if (task[i]->p_ysptr->p_osptr != task[i])
-				printk(
-		"Warning, pid %d younger sibling %d has mismatched os link\n",
-				task[i]->pid, task[i]->p_ysptr->pid);
-		}
-		if (task[i]->p_cptr) {
-			if (task[i]->p_cptr->p_pptr != task[i])
-				printk(
-			"Warning, pid %d youngest child %d has mismatched parent link\n",
-				task[i]->pid, task[i]->p_cptr->pid);
-			if (task[i]->p_cptr->p_ysptr)
-				printk(
-			"Warning, pid %d youngest child %d has non-NULL ys link\n",
-				task[i]->pid, task[i]->p_cptr->pid);
-		}
+		charge_uid(p, -1);
+		nr_tasks--;
+		add_free_taskslot(p->tarray_ptr);
+		unhash_pid(p);
+		REMOVE_LINKS(p);
+		release_thread(p);
+		current->cmin_flt += p->min_flt + p->cmin_flt;
+		current->cmaj_flt += p->maj_flt + p->cmaj_flt;
+		current->cnswap += p->nswap + p->cnswap;
+		free_task_struct(p);
+	} else {
+		printk("task releasing itself\n");
 	}
 }
-#endif /* DEBUG_PROC_TREE */
 
 /*
  * This checks not only the pgrp, but falls back on the pid if no
@@ -348,17 +244,12 @@ int kill_proc(int pid, int sig, int priv)
 
 	retval = -EINVAL;
 	if (sig >= 0 && sig <= 32) {
-		struct task_struct *p;
+		struct task_struct *p = find_task_by_pid(pid);
 		
-		retval = -ESRCH;
-		read_lock(&tasklist_lock);
-		for_each_task(p) {
-			if (p->pid != pid)
-				continue;
-			retval = send_sig(sig,p,priv);
-			break;
-		}
-		read_unlock(&tasklist_lock);
+		if(p)
+			retval = send_sig(sig, p, priv);
+		else
+			retval = -ESRCH;
 	}
 	return retval;
 }
@@ -551,7 +442,7 @@ static inline void __exit_mm(struct task_struct * tsk)
 		if (!--mm->count) {
 			exit_mmap(mm);
 			free_page_tables(mm);
-			kfree(mm);
+			kmem_cache_free(mm_cachep, mm);
 		}
 	}
 }
@@ -735,8 +626,8 @@ repeat:
 				retval = p->pid;
 				goto end_wait4;
 			case TASK_ZOMBIE:
-				current->cutime += p->utime + p->cutime;
-				current->cstime += p->stime + p->cstime;
+				current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
+				current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
 				read_unlock(&tasklist_lock);
 				if (ru != NULL)
 					getrusage(p, RUSAGE_BOTH, ru);
diff --git a/kernel/fork.c b/kernel/fork.c
index 6204ffeaf..804e37bd5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,68 +33,177 @@ int nr_running=1;
 unsigned long int total_forks=0;	/* Handle normal Linux uptimes. */
 int last_pid=0;
 
-static inline int find_empty_process(void)
+/* SLAB cache for mm_struct's. */
+kmem_cache_t *mm_cachep;
+
+struct task_struct *pidhash[PIDHASH_SZ];
+spinlock_t pidhash_lock = SPIN_LOCK_UNLOCKED;
+
+struct task_struct **tarray_freelist = NULL;
+spinlock_t taskslot_lock = SPIN_LOCK_UNLOCKED;
+
+/* UID task count cache, to prevent walking entire process list every
+ * single fork() operation.
+ */
+#define UIDHASH_SZ	(PIDHASH_SZ >> 2)
+
+static struct uid_taskcount {
+	struct uid_taskcount *next, **pprev;
+	unsigned short uid;
+	int task_count;
+} *uidhash[UIDHASH_SZ];
+static spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
+
+kmem_cache_t *uid_cachep;
+
+#define uidhashfn(uid)	(((uid >> 8) ^ uid) & (UIDHASH_SZ - 1))
+
+static inline void uid_hash_insert(struct uid_taskcount *up, unsigned int hashent)
 {
-	int i;
+	spin_lock(&uidhash_lock);
+	if((up->next = uidhash[hashent]) != NULL)
+		uidhash[hashent]->pprev = &up->next;
+	up->pprev = &uidhash[hashent];
+	uidhash[hashent] = up;
+	spin_unlock(&uidhash_lock);
+}
 
-	if (nr_tasks >= NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT) {
-		if (current->uid)
+static inline void uid_hash_remove(struct uid_taskcount *up)
+{
+	spin_lock(&uidhash_lock);
+	if(up->next)
+		up->next->pprev = up->pprev;
+	*up->pprev = up->next;
+	spin_unlock(&uidhash_lock);
+}
+
+static inline struct uid_taskcount *uid_find(unsigned short uid, unsigned int hashent)
+{
+	struct uid_taskcount *up;
+
+	spin_lock(&uidhash_lock);
+	for(up = uidhash[hashent]; (up && up->uid != uid); up = up->next)
+		;
+	spin_unlock(&uidhash_lock);
+	return up;
+}
+
+int charge_uid(struct task_struct *p, int count)
+{
+	unsigned int hashent = uidhashfn(p->uid);
+	struct uid_taskcount *up = uid_find(p->uid, hashent);
+
+	if(up) {
+		int limit = p->rlim[RLIMIT_NPROC].rlim_cur;
+		int newcnt = up->task_count + count;
+
+		if(newcnt > limit)
 			return -EAGAIN;
-	}
-	if (current->uid) {
-		long max_tasks = current->rlim[RLIMIT_NPROC].rlim_cur;
-
-		max_tasks--;	/* count the new process.. */
-		if (max_tasks < nr_tasks) {
-			struct task_struct *p;
-			read_lock(&tasklist_lock);
-			for_each_task (p) {
-				if (p->uid == current->uid)
-					if (--max_tasks < 0) {
-						read_unlock(&tasklist_lock);
-						return -EAGAIN;
-					}
-			}
-			read_unlock(&tasklist_lock);
+		else if(newcnt == 0) {
+			uid_hash_remove(up);
+			kmem_cache_free(uid_cachep, up);
+			return 0;
 		}
+	} else {
+		up = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
+		if(!up)
+			return -EAGAIN;
+		up->uid = p->uid;
+		up->task_count = 0;
+		uid_hash_insert(up, hashent);
 	}
-	for (i = 0 ; i < NR_TASKS ; i++) {
-		if (!task[i])
-			return i;
+	up->task_count += count;
+	return 0;
+}
+
+void uidcache_init(void)
+{
+	int i;
+
+	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct uid_taskcount),
+				       0,
+				       SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if(!uid_cachep)
+		panic("Cannot create uid taskcount SLAB cache\n");
+
+	for(i = 0; i < UIDHASH_SZ; i++)
+		uidhash[i] = 0;
+}
+
+static inline int find_empty_process(void)
+{
+	struct task_struct **tslot;
+
+	if(current->uid) {
+		int error;
+
+		if(nr_tasks >= NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT)
+			return -EAGAIN;
+		if((error = charge_uid(current, 1)) < 0)
+			return error;
 	}
+	tslot = get_free_taskslot();
+	if(tslot)
+		return tslot - &task[0];
 	return -EAGAIN;
 }
 
+/* Protects next_safe and last_pid. */
+static spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
+
 static int get_pid(unsigned long flags)
 {
+	static int next_safe = PID_MAX;
 	struct task_struct *p;
 
 	if (flags & CLONE_PID)
 		return current->pid;
 
-	read_lock(&tasklist_lock);
-repeat:
-	if ((++last_pid) & 0xffff8000)
-		last_pid=1;
-	for_each_task (p) {
-		if (p->pid == last_pid ||
-		    p->pgrp == last_pid ||
-		    p->session == last_pid)
-			goto repeat;
+	spin_lock(&lastpid_lock);
+	if((++last_pid) & 0xffff8000) {
+		last_pid = 300;		/* Skip daemons etc. */
+		goto inside;
+	}
+	if(last_pid >= next_safe) {
+inside:
+		next_safe = PID_MAX;
+		read_lock(&tasklist_lock);
+	repeat:
+		for_each_task(p) {
+			if(p->pid == last_pid	||
+			   p->pgrp == last_pid	||
+			   p->session == last_pid) {
+				if(++last_pid >= next_safe) {
+					if(last_pid & 0xffff8000)
+						last_pid = 300;
+					next_safe = PID_MAX;
+					goto repeat;
+				}
+			}
+			if(p->pid > last_pid && next_safe > p->pid)
+				next_safe = p->pid;
+			if(p->pgrp > last_pid && next_safe > p->pgrp)
+				next_safe = p->pgrp;
+			if(p->session > last_pid && next_safe > p->session)
+				next_safe = p->session;
+		}
+		read_unlock(&tasklist_lock);
 	}
-	read_unlock(&tasklist_lock);
+	spin_unlock(&lastpid_lock);
 
 	return last_pid;
 }
 
 static inline int dup_mmap(struct mm_struct * mm)
 {
-	struct vm_area_struct * mpnt, **p, *tmp;
+	struct vm_area_struct * mpnt, *tmp, **pprev;
 
-	mm->mmap = NULL;
-	p = &mm->mmap;
+	mm->mmap = mm->mmap_cache = NULL;
 	flush_cache_mm(current->mm);
+	pprev = &mm->mmap;
 	for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
+		struct inode *inode;
+
 		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 		if (!tmp) {
 			exit_mmap(mm);
@@ -105,12 +214,18 @@ static inline int dup_mmap(struct mm_struct * mm)
 		tmp->vm_flags &= ~VM_LOCKED;
 		tmp->vm_mm = mm;
 		tmp->vm_next = NULL;
-		if (tmp->vm_inode) {
-			tmp->vm_inode->i_count++;
+		inode = tmp->vm_inode;
+		if (inode) {
+			inode->i_count++;
+			if (tmp->vm_flags & VM_DENYWRITE)
+				inode->i_writecount--;
+      
 			/* insert tmp into the share list, just after mpnt */
-			tmp->vm_next_share->vm_prev_share = tmp;
+			if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
+				mpnt->vm_next_share->vm_pprev_share =
+					&tmp->vm_next_share;
 			mpnt->vm_next_share = tmp;
-			tmp->vm_prev_share = mpnt;
+			tmp->vm_pprev_share = &mpnt->vm_next_share;
 		}
 		if (copy_page_range(mm, current->mm, tmp)) {
 			exit_mmap(mm);
@@ -119,24 +234,35 @@ static inline int dup_mmap(struct mm_struct * mm)
 		}
 		if (tmp->vm_ops && tmp->vm_ops->open)
 			tmp->vm_ops->open(tmp);
-		*p = tmp;
-		p = &tmp->vm_next;
+
+		/* Ok, finally safe to link it in. */
+		if((tmp->vm_next = *pprev) != NULL)
+			(*pprev)->vm_pprev = &tmp->vm_next;
+		*pprev = tmp;
+		tmp->vm_pprev = pprev;
+
+		pprev = &tmp->vm_next;
 	}
 	flush_tlb_mm(current->mm);
-	build_mmap_avl(mm);
 	return 0;
 }
 
 static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 {
 	if (!(clone_flags & CLONE_VM)) {
-		struct mm_struct * mm = kmalloc(sizeof(*tsk->mm), GFP_KERNEL);
+		struct mm_struct * mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
 		if (!mm)
 			return -1;
 		*mm = *current->mm;
 		init_new_context(mm);
 		mm->count = 1;
 		mm->def_flags = 0;
+
+		/* It has not run yet, so cannot be present in anyone's
+		 * cache or tlb.
+		 */
+		mm->cpu_vm_mask = 0;
+
 		tsk->mm = mm;
 		tsk->min_flt = tsk->maj_flt = 0;
 		tsk->cmin_flt = tsk->cmaj_flt = 0;
@@ -146,7 +272,7 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 		if (dup_mmap(mm)) {
 			free_page_tables(mm);
 free_mm:
-			kfree(mm);
+			kmem_cache_free(mm_cachep, mm);
 			return -1;
 		}
 		return 0;
@@ -232,20 +358,17 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 {
 	int nr;
 	int error = -ENOMEM;
-	unsigned long new_stack;
 	struct task_struct *p;
 
 	lock_kernel();
 	p = alloc_task_struct();
 	if (!p)
 		goto bad_fork;
-	new_stack = alloc_kernel_stack(p);
-	if (!new_stack)
-		goto bad_fork_free_p;
+
 	error = -EAGAIN;
 	nr = find_empty_process();
 	if (nr < 0)
-		goto bad_fork_free_stack;
+		goto bad_fork_free;
 
 	*p = *current;
 
@@ -256,8 +379,6 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 
 	p->did_exec = 0;
 	p->swappable = 0;
-	p->kernel_stack_page = new_stack;
-	*(unsigned long *) p->kernel_stack_page = STACK_MAGIC;
 	p->state = TASK_UNINTERRUPTIBLE;
 	p->flags &= ~(PF_PTRACED|PF_TRACESYS|PF_SUPERPRIV);
 	p->flags |= PF_FORKNOEXEC;
@@ -274,15 +395,18 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 	p->real_timer.data = (unsigned long) p;
 	p->leader = 0;		/* session leadership doesn't inherit */
 	p->tty_old_pgrp = 0;
-	p->utime = p->stime = 0;
-	p->cutime = p->cstime = 0;
+	p->times.tms_utime = p->times.tms_stime = 0;
+	p->times.tms_cutime = p->times.tms_cstime = 0;
 #ifdef __SMP__
+	p->has_cpu = 0;
 	p->processor = NO_PROC_ID;
 #endif
 	p->lock_depth = 0;
 	p->start_time = jiffies;
-	task[nr] = p;
+	p->tarray_ptr = &task[nr];
+	*p->tarray_ptr = p;
 	SET_LINKS(p);
+	hash_pid(p);
 	nr_tasks++;
 
 	error = -ENOMEM;
@@ -330,16 +454,16 @@ bad_fork_cleanup_fs:
 bad_fork_cleanup_files:
 	exit_files(p);
 bad_fork_cleanup:
+	charge_uid(current, -1);
 	if (p->exec_domain && p->exec_domain->module)
 		__MOD_DEC_USE_COUNT(p->exec_domain->module);
 	if (p->binfmt && p->binfmt->module)
 		__MOD_DEC_USE_COUNT(p->binfmt->module);
-	task[nr] = NULL;
+	add_free_taskslot(p->tarray_ptr);
+	unhash_pid(p);
 	REMOVE_LINKS(p);
 	nr_tasks--;
-bad_fork_free_stack:
-	free_kernel_stack(new_stack);
-bad_fork_free_p:
+bad_fork_free:
 	free_task_struct(p);
 bad_fork:
 fork_out:
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index f5f202c8e..ec0be876f 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -52,10 +52,6 @@
 #include <linux/ctype.h>
 #include <linux/file.h>
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
-extern struct drive_info_struct drive_info;
-#endif
-
 extern unsigned char aux_device_present, kbd_read_mask;
 
 #ifdef CONFIG_PCI
@@ -124,8 +120,14 @@ EXPORT_SYMBOL(exit_files);
 /* internal kernel memory management */
 EXPORT_SYMBOL(__get_free_pages);
 EXPORT_SYMBOL(free_pages);
+EXPORT_SYMBOL(kmem_find_general_cachep);
+EXPORT_SYMBOL(kmem_cache_create);
+EXPORT_SYMBOL(kmem_cache_shrink);
+EXPORT_SYMBOL(kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_free);
 EXPORT_SYMBOL(kmalloc);
 EXPORT_SYMBOL(kfree);
+EXPORT_SYMBOL(kfree_s);
 EXPORT_SYMBOL(vmalloc);
 EXPORT_SYMBOL(vfree);
 EXPORT_SYMBOL(mem_map);
@@ -134,10 +136,6 @@ EXPORT_SYMBOL(max_mapnr);
 EXPORT_SYMBOL(num_physpages);
 EXPORT_SYMBOL(high_memory);
 EXPORT_SYMBOL(update_vm_cache);
-EXPORT_SYMBOL(kmem_cache_create);
-EXPORT_SYMBOL(kmem_cache_destroy);
-EXPORT_SYMBOL(kmem_cache_alloc);
-EXPORT_SYMBOL(kmem_cache_free);
 
 /* filesystem internal functions */
 EXPORT_SYMBOL(getname);
@@ -150,6 +148,7 @@ EXPORT_SYMBOL(lnamei);
 EXPORT_SYMBOL(open_namei);
 EXPORT_SYMBOL(sys_close);
 EXPORT_SYMBOL(close_fp);
+EXPORT_SYMBOL(insert_file_free);
 EXPORT_SYMBOL(check_disk_change);
 EXPORT_SYMBOL(invalidate_buffers);
 EXPORT_SYMBOL(invalidate_inodes);
@@ -215,10 +214,6 @@ EXPORT_SYMBOL(gendisk_head);
 EXPORT_SYMBOL(resetup_one_dev);
 EXPORT_SYMBOL(unplug_device);
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
-EXPORT_SYMBOL(drive_info);
-#endif
-
 /* tty routines */
 EXPORT_SYMBOL(tty_hangup);
 EXPORT_SYMBOL(tty_wait_until_sent);
@@ -300,7 +295,6 @@ EXPORT_SYMBOL(wake_up_interruptible);
 EXPORT_SYMBOL(sleep_on);
 EXPORT_SYMBOL(interruptible_sleep_on);
 EXPORT_SYMBOL(schedule);
-EXPORT_SYMBOL(current_set);
 EXPORT_SYMBOL(jiffies);
 EXPORT_SYMBOL(xtime);
 EXPORT_SYMBOL(do_gettimeofday);
diff --git a/kernel/panic.c b/kernel/panic.c
index deaa2f339..c5482bffe 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -15,6 +15,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/reboot.h>
+#include <linux/init.h>
 
 #include <asm/sgialib.h>
 
@@ -24,7 +25,7 @@ extern int C_A_D;
 
 int panic_timeout = 0;
 
-void panic_setup(char *str, int *ints)
+__initfunc(void panic_setup(char *str, int *ints))
 {
 	if (ints[0] == 1)
 		panic_timeout = ints[1];
diff --git a/kernel/printk.c b/kernel/printk.c
index 0d5d619b0..3d409f2d5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -25,6 +25,7 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/console.h>
+#include <linux/init.h>
 
 #include <asm/uaccess.h>
 
@@ -261,7 +262,7 @@ void unblank_console(void)
  * print any messages that were printed by the kernel before the
  * console driver was initialized.
  */
-void register_console(struct console * console)
+__initfunc(void register_console(struct console * console))
 {
 	int	i,j,len;
 	int	p = log_start;
diff --git a/kernel/resource.c b/kernel/resource.c
index 27abcf4dc..ff7c7492a 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -12,6 +12,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/ioport.h>
+#include <linux/init.h>
 
 #define IOTABLE_SIZE 128
 
@@ -181,7 +182,7 @@ unsigned long occupy_region(unsigned long base, unsigned long end,
 #endif
 
 /* Called from init/main.c to reserve IO ports. */
-void reserve_setup(char *str, int *ints)
+__initfunc(void reserve_setup(char *str, int *ints))
 {
 	int i;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index bc256d029..9f32305ee 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -32,6 +32,7 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
+#include <linux/init.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -88,21 +89,6 @@ unsigned long prof_shift = 0;
 
 extern void mem_use(void);
 
-#ifdef __mips__
-unsigned long init_kernel_stack[2048] = { STACK_MAGIC, };
-unsigned long init_user_stack[2048] = { STACK_MAGIC, };
-#else
-unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
-unsigned long init_user_stack[1024] = { STACK_MAGIC, };
-#endif
-static struct vm_area_struct init_mmap = INIT_MMAP;
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS;
-
-struct mm_struct init_mm = INIT_MM;
-struct task_struct init_task = INIT_TASK;
-
 unsigned long volatile jiffies=0;
 
 /*
@@ -110,7 +96,6 @@ unsigned long volatile jiffies=0;
  *	via the SMP irq return path.
  */
  
-struct task_struct *current_set[NR_CPUS] = {&init_task, };
 struct task_struct *last_task_used_math = NULL;
 
 struct task_struct * task[NR_TASKS] = {&init_task, };
@@ -119,12 +104,6 @@ struct kernel_stat kstat = { 0 };
 
 static inline void add_to_runqueue(struct task_struct * p)
 {
-#if 1	/* sanity tests */
-	if (p->next_run || p->prev_run) {
-		printk("task already on run-queue\n");
-		return;
-	}
-#endif
 	if (p->counter > current->counter + 3)
 		need_resched = 1;
 	nr_running++;
@@ -138,20 +117,6 @@ static inline void del_from_runqueue(struct task_struct * p)
 	struct task_struct *next = p->next_run;
 	struct task_struct *prev = p->prev_run;
 
-#if 1	/* sanity tests */
-	if (!next || !prev) {
-		printk("task not on run-queue\n");
-		return;
-	}
-#endif
-	if (!p->pid) {
-		static int nr = 0;
-		if (nr < 5) {
-			nr++;
-			printk("idle task may not sleep\n");
-		}
-		return;
-	}
 	nr_running--;
 	next->prev_run = prev;
 	prev->next_run = next;
@@ -255,7 +220,7 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
 #ifdef __SMP__
 		/* Give a largish advantage to the same processor...   */
 		/* (this is equivalent to penalizing other processors) */
-		if (p->last_processor == this_cpu)
+		if (p->processor == this_cpu)
 			weight += PROC_CHANGE_PENALTY;
 #endif
 
@@ -267,10 +232,127 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
 	return weight;
 }
 
+/*
+ * Event timer code
+ */
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct timer_vec {
+        int index;
+        struct timer_list *vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+        int index;
+        struct timer_list *vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5 = { 0 };
+static struct timer_vec tv4 = { 0 };
+static struct timer_vec tv3 = { 0 };
+static struct timer_vec tv2 = { 0 };
+static struct timer_vec_root tv1 = { 0 };
+
+static struct timer_vec * const tvecs[] = {
+	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+static unsigned long timer_jiffies = 0;
+
+static inline void insert_timer(struct timer_list *timer,
+				struct timer_list **vec, int idx)
+{
+	if ((timer->next = vec[idx]))
+		vec[idx]->prev = timer;
+	vec[idx] = timer;
+	timer->prev = (struct timer_list *)&vec[idx];
+}
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+	/*
+	 * must be cli-ed when calling this
+	 */
+	unsigned long expires = timer->expires;
+	unsigned long idx = expires - timer_jiffies;
+
+	if (idx < TVR_SIZE) {
+		int i = expires & TVR_MASK;
+		insert_timer(timer, tv1.vec, i);
+	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+		int i = (expires >> TVR_BITS) & TVN_MASK;
+		insert_timer(timer, tv2.vec, i);
+	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+		insert_timer(timer, tv3.vec, i);
+	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+		insert_timer(timer, tv4.vec, i);
+	} else if (expires < timer_jiffies) {
+		/* can happen if you add a timer with expires == jiffies,
+		 * or you set a timer to go off in the past
+		 */
+		insert_timer(timer, tv1.vec, tv1.index);
+	} else if (idx < 0xffffffffUL) {
+		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+		insert_timer(timer, tv5.vec, i);
+	} else {
+		/* Can only get here on architectures with 64-bit jiffies */
+		timer->next = timer->prev = timer;
+	}
+}
+
+static spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+void add_timer(struct timer_list *timer)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	internal_add_timer(timer);
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+}
+
+static inline int detach_timer(struct timer_list *timer)
+{
+	int ret = 0;
+	struct timer_list *next, *prev;
+	next = timer->next;
+	prev = timer->prev;
+	if (next) {
+		next->prev = prev;
+	}
+	if (prev) {
+		ret = 1;
+		prev->next = next;
+	}
+	return ret;
+}
+
+
+int del_timer(struct timer_list * timer)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&timerlist_lock, flags);
+	ret = detach_timer(timer);
+	timer->next = timer->prev = 0;
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return ret;
+}
+
 #ifdef __SMP__
 
 #define idle_task (task[cpu_number_map[this_cpu]])
-#define can_schedule(p)	((p)->processor == NO_PROC_ID)
+#define can_schedule(p)	(!(p)->has_cpu)
 
 #else
 
@@ -297,12 +379,10 @@ asmlinkage void schedule(void)
 	int this_cpu;
 
 	need_resched = 0;
-	this_cpu = smp_processor_id();
-	if (local_irq_count[this_cpu]) {
-		printk("Scheduling in interrupt\n");
-		*(char *)0 = 0;
-	}
 	prev = current;
+	this_cpu = smp_processor_id();
+	if (local_irq_count[this_cpu])
+		goto scheduling_in_interrupt;
 	release_kernel_lock(prev, this_cpu, lock_depth);
 	if (bh_active & bh_mask)
 		do_bottom_half();
@@ -312,16 +392,8 @@ asmlinkage void schedule(void)
 
 	/* move an exhausted RR process to be last.. */
 	if (!prev->counter && prev->policy == SCHED_RR) {
-		if (prev->pid) {
-			prev->counter = prev->priority;
-			move_last_runqueue(prev);
-		} else {
-			static int count = 5;
-			if (count) {
-				count--;
-				printk("Moving pid 0 last\n");
-			}
-		}
+		prev->counter = prev->priority;
+		move_last_runqueue(prev);
 	}
 	timeout = 0;
 	switch (prev->state) {
@@ -354,7 +426,7 @@ asmlinkage void schedule(void)
 		 */
 		spin_unlock_irq(&runqueue_lock);
 #ifdef __SMP__
-		prev->processor = NO_PROC_ID;
+		prev->has_cpu = 0;
 #endif
 	
 /*
@@ -386,8 +458,10 @@ asmlinkage void schedule(void)
 		}
 	}
 
+#ifdef __SMP__
+	next->has_cpu = 1;
 	next->processor = this_cpu;
-	next->last_processor = this_cpu;
+#endif
 
 	if (prev != next) {
 		struct timer_list timer;
@@ -410,6 +484,11 @@ asmlinkage void schedule(void)
 	spin_unlock(&scheduler_lock);
 
 	reacquire_kernel_lock(prev, smp_processor_id(), lock_depth);
+	return;
+
+scheduling_in_interrupt:
+	printk("Scheduling in interrupt\n");
+	*(int *)0 = 0;
 }
 
 #ifndef __alpha__
@@ -427,67 +506,53 @@ asmlinkage int sys_pause(void)
 
 #endif
 
-spinlock_t waitqueue_lock;
+rwlock_t waitqueue_lock = RW_LOCK_UNLOCKED;
 
 /*
  * wake_up doesn't wake up stopped processes - they have to be awakened
  * with signals or similar.
+ *
+ * Note that we only need a read lock for the wait queue (and thus do not
+ * have to protect against interrupts), as the actual removal from the
+ * queue is handled by the process itself.
  */
 void wake_up(struct wait_queue **q)
 {
-	unsigned long flags;
 	struct wait_queue *next;
-	struct wait_queue *head;
 
-	spin_lock_irqsave(&waitqueue_lock, flags);
+	read_lock(&waitqueue_lock);
 	if (q && (next = *q)) {
+		struct wait_queue *head;
+
 		head = WAIT_QUEUE_HEAD(q);
 		while (next != head) {
 			struct task_struct *p = next->task;
 			next = next->next;
-			if (p != NULL) {
-				if ((p->state == TASK_UNINTERRUPTIBLE) ||
-				    (p->state == TASK_INTERRUPTIBLE))
-					wake_up_process(p);
-			}
-			if (next)
-				continue;
-			printk("wait_queue is bad (eip = %p)\n",
-				__builtin_return_address(0));
-			printk("        q = %p\n",q);
-			printk("       *q = %p\n",*q);
-			break;
+			if ((p->state == TASK_UNINTERRUPTIBLE) ||
+			    (p->state == TASK_INTERRUPTIBLE))
+				wake_up_process(p);
 		}
 	}
-	spin_unlock_irqrestore(&waitqueue_lock, flags);
+	read_unlock(&waitqueue_lock);
 }
 
 void wake_up_interruptible(struct wait_queue **q)
 {
-	unsigned long flags;
 	struct wait_queue *next;
-	struct wait_queue *head;
 
-	spin_lock_irqsave(&waitqueue_lock, flags);
+	read_lock(&waitqueue_lock);
 	if (q && (next = *q)) {
+		struct wait_queue *head;
+
 		head = WAIT_QUEUE_HEAD(q);
 		while (next != head) {
 			struct task_struct *p = next->task;
 			next = next->next;
-			if (p != NULL) {
-				if (p->state == TASK_INTERRUPTIBLE)
-					wake_up_process(p);
-			}
-			if (next)
-				continue;
-			printk("wait_queue is bad (eip = %p)\n",
-				__builtin_return_address(0));
-			printk("        q = %p\n",q);
-			printk("       *q = %p\n",*q);
-			break;
+			if (p->state == TASK_INTERRUPTIBLE)
+				wake_up_process(p);
 		}
 	}
-	spin_unlock_irqrestore(&waitqueue_lock, flags);
+	read_unlock(&waitqueue_lock);
 }
 
 /*
@@ -606,17 +671,14 @@ static inline void __sleep_on(struct wait_queue **p, int state)
 
 	if (!p)
 		return;
-	if (current == task[0])
-		panic("task[0] trying to sleep");
 	current->state = state;
-	spin_lock_irqsave(&waitqueue_lock, flags);
+	write_lock_irqsave(&waitqueue_lock, flags);
 	__add_wait_queue(p, &wait);
-	spin_unlock(&waitqueue_lock);
-	sti();
+	write_unlock(&waitqueue_lock);
 	schedule();
-	spin_lock_irq(&waitqueue_lock);
+	write_lock_irq(&waitqueue_lock);
 	__remove_wait_queue(p, &wait);
-	spin_unlock_irqrestore(&waitqueue_lock, flags);
+	write_unlock_irqrestore(&waitqueue_lock, flags);
 }
 
 void interruptible_sleep_on(struct wait_queue **p)
@@ -629,133 +691,6 @@ void sleep_on(struct wait_queue **p)
 	__sleep_on(p,TASK_UNINTERRUPTIBLE);
 }
 
-
-#define TVN_BITS 6
-#define TVR_BITS 8
-#define TVN_SIZE (1 << TVN_BITS)
-#define TVR_SIZE (1 << TVR_BITS)
-#define TVN_MASK (TVN_SIZE - 1)
-#define TVR_MASK (TVR_SIZE - 1)
-
-#define SLOW_BUT_DEBUGGING_TIMERS 0
-
-struct timer_vec {
-        int index;
-        struct timer_list *vec[TVN_SIZE];
-};
-
-struct timer_vec_root {
-        int index;
-        struct timer_list *vec[TVR_SIZE];
-};
-
-static struct timer_vec tv5 = { 0 };
-static struct timer_vec tv4 = { 0 };
-static struct timer_vec tv3 = { 0 };
-static struct timer_vec tv2 = { 0 };
-static struct timer_vec_root tv1 = { 0 };
-
-static struct timer_vec * const tvecs[] = {
-	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
-};
-
-#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
-
-static unsigned long timer_jiffies = 0;
-
-static inline void insert_timer(struct timer_list *timer,
-				struct timer_list **vec, int idx)
-{
-	if ((timer->next = vec[idx]))
-		vec[idx]->prev = timer;
-	vec[idx] = timer;
-	timer->prev = (struct timer_list *)&vec[idx];
-}
-
-static inline void internal_add_timer(struct timer_list *timer)
-{
-	/*
-	 * must be cli-ed when calling this
-	 */
-	unsigned long expires = timer->expires;
-	unsigned long idx = expires - timer_jiffies;
-
-	if (idx < TVR_SIZE) {
-		int i = expires & TVR_MASK;
-		insert_timer(timer, tv1.vec, i);
-	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
-		int i = (expires >> TVR_BITS) & TVN_MASK;
-		insert_timer(timer, tv2.vec, i);
-	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
-		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv3.vec, i);
-	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
-		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv4.vec, i);
-	} else if (expires < timer_jiffies) {
-		/* can happen if you add a timer with expires == jiffies,
-		 * or you set a timer to go off in the past
-		 */
-		insert_timer(timer, tv1.vec, tv1.index);
-	} else if (idx < 0xffffffffUL) {
-		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv5.vec, i);
-	} else {
-		/* Can only get here on architectures with 64-bit jiffies */
-		timer->next = timer->prev = timer;
-	}
-}
-
-static spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
-
-void add_timer(struct timer_list *timer)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&timerlist_lock, flags);
-#if SLOW_BUT_DEBUGGING_TIMERS
-        if (timer->next || timer->prev) {
-                printk("add_timer() called with non-zero list from %p\n",
-		       __builtin_return_address(0));
-		goto out;
-        }
-#endif
-	internal_add_timer(timer);
-#if SLOW_BUT_DEBUGGING_TIMERS
-out:
-#endif
-	spin_unlock_irqrestore(&timerlist_lock, flags);
-}
-
-static inline int detach_timer(struct timer_list *timer)
-{
-	int ret = 0;
-	struct timer_list *next, *prev;
-	next = timer->next;
-	prev = timer->prev;
-	if (next) {
-		next->prev = prev;
-	}
-	if (prev) {
-		ret = 1;
-		prev->next = next;
-	}
-	return ret;
-}
-
-
-int del_timer(struct timer_list * timer)
-{
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&timerlist_lock, flags);
-	ret = detach_timer(timer);
-	timer->next = timer->prev = 0;
-	spin_unlock_irqrestore(&timerlist_lock, flags);
-	return ret;
-}
-
 static inline void cascade_timers(struct timer_vec *tv)
 {
         /* cascade all the timers from tv up one level */
@@ -847,17 +782,18 @@ unsigned long avenrun[3] = { 0,0,0 };
  */
 static unsigned long count_active_tasks(void)
 {
-	struct task_struct **p;
+	struct task_struct *p;
 	unsigned long nr = 0;
 
-	for(p = &LAST_TASK; p > &FIRST_TASK; --p)
-		if (*p && ((*p)->state == TASK_RUNNING ||
-			   (*p)->state == TASK_UNINTERRUPTIBLE ||
-			   (*p)->state == TASK_SWAPPING))
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		if (p->pid &&
+		    (p->state == TASK_RUNNING ||
+		     p->state == TASK_UNINTERRUPTIBLE ||
+		     p->state == TASK_SWAPPING))
 			nr += FIXED_1;
-#ifdef __SMP__
-	nr-=(smp_num_cpus-1)*FIXED_1;
-#endif			
+	}
+	read_unlock(&tasklist_lock);
 	return nr;
 }
 
@@ -1065,16 +1001,14 @@ static inline void do_process_times(struct task_struct *p,
 {
 	long psecs;
 
-	p->utime += user;
-	p->stime += system;
-
-	psecs = (p->stime + p->utime) / HZ;
-	if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) {
+	psecs = (p->times.tms_utime += user);
+	psecs += (p->times.tms_stime += system);
+	if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
 		/* Send SIGXCPU every second.. */
-		if (psecs * HZ == p->stime + p->utime)
+		if (!(psecs % HZ))
 			send_sig(SIGXCPU, p, 1);
 		/* and SIGKILL when we go over max.. */
-		if (psecs > p->rlim[RLIMIT_CPU].rlim_max)
+		if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
 			send_sig(SIGKILL, p, 1);
 	}
 }
@@ -1344,22 +1278,12 @@ asmlinkage int sys_nice(int increment)
 
 #endif
 
-static struct task_struct *find_process_by_pid(pid_t pid)
+static inline struct task_struct *find_process_by_pid(pid_t pid)
 {
-	struct task_struct *p;
-
-	p = current;
-	if (pid) {
-		read_lock(&tasklist_lock);
-		for_each_task(p) {
-			if (p->pid == pid)
-				goto found;
-		}
-		p = NULL;
-found:
-		read_unlock(&tasklist_lock);
-	}
-	return p;
+	if (pid)
+		return find_task_by_pid(pid);
+	else
+		return current;
 }
 
 static int setscheduler(pid_t pid, int policy, 
@@ -1572,7 +1496,7 @@ asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 
 static void show_task(int nr,struct task_struct * p)
 {
-	unsigned long free;
+	unsigned long free = 0;
 	static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
 
 	printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
@@ -1591,10 +1515,12 @@ static void show_task(int nr,struct task_struct * p)
 	else
 		printk(" %016lx ", thread_saved_pc(&p->tss));
 #endif
+#if 0
 	for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
 		if (((unsigned long *)p->kernel_stack_page)[free])
 			break;
 	}
+#endif
 	printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
 	if (p->p_cptr)
 		printk("%5d ", p->p_cptr->pid);
@@ -1612,7 +1538,7 @@ static void show_task(int nr,struct task_struct * p)
 
 void show_state(void)
 {
-	int i;
+	struct task_struct *p;
 
 #if ((~0UL) == 0xffffffff)
 	printk("\n"
@@ -1623,25 +1549,30 @@ void show_state(void)
 	       "                                 free                        sibling\n");
 	printk("  task                 PC        stack   pid father child younger older\n");
 #endif
-	for (i=0 ; i<NR_TASKS ; i++)
-		if (task[i])
-			show_task(i,task[i]);
+	read_lock(&tasklist_lock);
+	for_each_task(p)
+		show_task((p->tarray_ptr - &task[0]),p);
+	read_unlock(&tasklist_lock);
 }
 
-void sched_init(void)
+__initfunc(void sched_init(void))
 {
 	/*
 	 *	We have to do a little magic to get the first
 	 *	process right in SMP mode.
 	 */
-	int cpu=smp_processor_id();
-#ifndef __SMP__
-	current_set[cpu]=&init_task;
-#else
+	int cpu=hard_smp_processor_id();
+	int nr = NR_TASKS;
+
 	init_task.processor=cpu;
-	for(cpu = 0; cpu < NR_CPUS; cpu++)
-		current_set[cpu] = &init_task;
-#endif
+
+	/* Init task array free list and pidhash table. */
+	while(--nr > 0)
+		add_free_taskslot(&task[nr]);
+
+	for(nr = 0; nr < PIDHASH_SZ; nr++)
+		pidhash[nr] = NULL;
+
 	init_bh(TIMER_BH, timer_bh);
 	init_bh(TQUEUE_BH, tqueue_bh);
 	init_bh(IMMEDIATE_BH, immediate_bh);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 6b9b41aa5..de398bcff 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -62,13 +62,14 @@ static inline void run_bottom_halves(void)
 
 asmlinkage void do_bottom_half(void)
 {
-	int cpu = smp_processor_id();
+	if (softirq_trylock()) {
+		int cpu = smp_processor_id();
 
-	if (hardirq_trylock(cpu)) {
-		if (softirq_trylock()) {
+		if (hardirq_trylock(cpu)) {
+			__sti();
 			run_bottom_halves();
-			softirq_endlock();
+			hardirq_endlock(cpu);
 		}
-		hardirq_endlock(cpu);
+		softirq_endlock();
 	}
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index 934108fa8..311527865 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -370,8 +370,8 @@ int acct_process(long exitcode)
    if (acct_active) {
       strncpy(ac.ac_comm, current->comm, ACCT_COMM);
       ac.ac_comm[ACCT_COMM-1] = '\0';
-      ac.ac_utime = current->utime;
-      ac.ac_stime = current->stime;
+      ac.ac_utime = current->times.tms_utime;
+      ac.ac_stime = current->times.tms_stime;
       ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
       ac.ac_etime = CURRENT_TIME - ac.ac_btime;
       ac.ac_uid   = current->uid;
@@ -523,16 +523,15 @@ asmlinkage int sys_old_syscall(void)
  */
 asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
 {
-	int old_ruid;
-	int old_euid;
+	int old_ruid, old_euid, new_ruid;
 
-	old_ruid = current->uid;
+	new_ruid = old_ruid = current->uid;
 	old_euid = current->euid;
 	if (ruid != (uid_t) -1) {
 		if ((old_ruid == ruid) || 
 		    (current->euid==ruid) ||
 		    suser())
-			current->uid = ruid;
+			new_ruid = ruid;
 		else
 			return -EPERM;
 	}
@@ -542,10 +541,8 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
 		    (current->suid == euid) ||
 		    suser())
 			current->fsuid = current->euid = euid;
-		else {
-			current->uid = old_ruid;
+		else
 			return -EPERM;
-		}
 	}
 	if (ruid != (uid_t) -1 ||
 	    (euid != (uid_t) -1 && euid != old_ruid))
@@ -553,6 +550,18 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
 	current->fsuid = current->euid;
 	if (current->euid != old_euid)
 		current->dumpable = 0;
+
+	if(new_ruid != old_ruid) {
+		/* What if a process setreuid()'s and this brings the
+		 * new uid over his NPROC rlimit?  We can check this now
+		 * cheaply with the new uid cache, so if it matters
+		 * we should be checking for it.  -DaveM
+		 */
+		charge_uid(current, -1);
+		current->uid = new_ruid;
+		if(new_ruid)
+			charge_uid(current, 1);
+	}
 	return 0;
 }
 
@@ -570,9 +579,11 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
 asmlinkage int sys_setuid(uid_t uid)
 {
 	int old_euid = current->euid;
+	int old_ruid, new_ruid;
 
+	old_ruid = new_ruid = current->uid;
 	if (suser())
-		current->uid = current->euid = current->suid = current->fsuid = uid;
+		new_ruid = current->euid = current->suid = current->fsuid = uid;
 	else if ((uid == current->uid) || (uid == current->suid))
 		current->fsuid = current->euid = uid;
 	else
@@ -580,6 +591,14 @@ asmlinkage int sys_setuid(uid_t uid)
 
 	if (current->euid != old_euid)
 		current->dumpable = 0;
+
+	if(new_ruid != old_ruid) {
+		/* See comment above about NPROC rlimit issues... */
+		charge_uid(current, -1);
+		current->uid = new_ruid;
+		if(new_ruid)
+			charge_uid(current, 1);
+	}
 	return 0;
 }
 
@@ -605,8 +624,13 @@ asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 	if ((suid != (uid_t) -1) && (suid != current->uid) &&
 	    (suid != current->euid) && (suid != current->suid))
 		return -EPERM;
-	if (ruid != (uid_t) -1)
+	if (ruid != (uid_t) -1) {
+		/* See above commentary about NPROC rlimit issues here. */
+		charge_uid(current, -1);
 		current->uid = ruid;
+		if(ruid)
+			charge_uid(current, 1);
+	}
 	if (euid != (uid_t) -1)
 		current->euid = euid;
 	if (suid != (uid_t) -1)
@@ -671,16 +695,9 @@ asmlinkage long sys_times(struct tms * tbuf)
 	 *	atomically safe type this is just fine. Conceptually its
 	 *	as if the syscall took an instant longer to occur.
 	 */
-	if (tbuf) 
-	{
-		/* ?? use copy_to_user() */
-		if(!access_ok(VERIFY_READ, tbuf, sizeof(struct tms)) ||
-		   __put_user(current->utime,&tbuf->tms_utime)||
-		   __put_user(current->stime,&tbuf->tms_stime) ||
-		   __put_user(current->cutime,&tbuf->tms_cutime) ||
-		   __put_user(current->cstime,&tbuf->tms_cstime))
+	if (tbuf)
+		if (copy_to_user(tbuf, &current->times, sizeof(struct tms)))
 			return -EFAULT;
-	}
 	return jiffies;
 }
 
@@ -709,22 +726,13 @@ asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
 	if (pgid < 0)
 		return -EINVAL;
 
-	read_lock(&tasklist_lock);
-	for_each_task(p) {
-		if (p->pid == pid) {
-			/* NOTE: I haven't dropped tasklist_lock, this is
-			 *       on purpose. -DaveM
-			 */
-			goto found_task;
-		}
-	}
-	read_unlock(&tasklist_lock);
-	return -ESRCH;
+	if((p = find_task_by_pid(pid)) == NULL)
+		return -ESRCH;
 
-found_task:
 	/* From this point forward we keep holding onto the tasklist lock
 	 * so that our parent does not change from under us. -DaveM
 	 */
+	read_lock(&tasklist_lock);
 	err = -ESRCH;
 	if (p->p_pptr == current || p->p_opptr == current) {
 		err = -EPERM;
@@ -762,18 +770,12 @@ asmlinkage int sys_getpgid(pid_t pid)
 	if (!pid) {
 		return current->pgrp;
 	} else {
-		struct task_struct *p;
-		int ret = -ESRCH;
-
-		read_lock(&tasklist_lock);
-		for_each_task(p) {
-			if (p->pid == pid) {
-				ret = p->pgrp;
-				break;
-			}
-		}
-		read_unlock(&tasklist_lock);
-		return ret;
+		struct task_struct *p = find_task_by_pid(pid);
+
+		if(p)
+			return p->pgrp;
+		else
+			return -ESRCH;
 	}
 }
 
@@ -785,25 +787,16 @@ asmlinkage int sys_getpgrp(void)
 
 asmlinkage int sys_getsid(pid_t pid)
 {
-	struct task_struct * p;
-	int ret;
-
-	/* SMP: The 'self' case requires no lock */
 	if (!pid) {
-		ret = current->session;
+		return current->session;
 	} else {
-		ret = -ESRCH;
+		struct task_struct *p = find_task_by_pid(pid);
 
-		read_lock(&tasklist_lock);
-		for_each_task(p) {
-			if (p->pid == pid) {
-				ret = p->session;
-				break;
-			}
-		}
-		read_unlock(&tasklist_lock);
+		if(p)
+			return p->session;
+		else
+			return -ESRCH;
 	}
-	return ret;
 }
 
 asmlinkage int sys_setsid(void)
@@ -1030,28 +1023,28 @@ int getrusage(struct task_struct *p, int who, struct rusage *ru)
 	memset((char *) &r, 0, sizeof(r));
 	switch (who) {
 		case RUSAGE_SELF:
-			r.ru_utime.tv_sec = CT_TO_SECS(p->utime);
-			r.ru_utime.tv_usec = CT_TO_USECS(p->utime);
-			r.ru_stime.tv_sec = CT_TO_SECS(p->stime);
-			r.ru_stime.tv_usec = CT_TO_USECS(p->stime);
+			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
+			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
+			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
+			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
 			r.ru_minflt = p->min_flt;
 			r.ru_majflt = p->maj_flt;
 			r.ru_nswap = p->nswap;
 			break;
 		case RUSAGE_CHILDREN:
-			r.ru_utime.tv_sec = CT_TO_SECS(p->cutime);
-			r.ru_utime.tv_usec = CT_TO_USECS(p->cutime);
-			r.ru_stime.tv_sec = CT_TO_SECS(p->cstime);
-			r.ru_stime.tv_usec = CT_TO_USECS(p->cstime);
+			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
+			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
+			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
+			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
 			r.ru_minflt = p->cmin_flt;
 			r.ru_majflt = p->cmaj_flt;
 			r.ru_nswap = p->cnswap;
 			break;
 		default:
-			r.ru_utime.tv_sec = CT_TO_SECS(p->utime + p->cutime);
-			r.ru_utime.tv_usec = CT_TO_USECS(p->utime + p->cutime);
-			r.ru_stime.tv_sec = CT_TO_SECS(p->stime + p->cstime);
-			r.ru_stime.tv_usec = CT_TO_USECS(p->stime + p->cstime);
+			r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
+			r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
+			r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
+			r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
 			r.ru_minflt = p->min_flt + p->cmin_flt;
 			r.ru_majflt = p->maj_flt + p->cmaj_flt;
 			r.ru_nswap = p->nswap + p->cnswap;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9e0bb0fd8..3f2e86a6b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -24,6 +24,7 @@
 #include <linux/swapctl.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
+#include <linux/init.h>
 
 #include <asm/bitops.h>
 #include <asm/uaccess.h>
@@ -210,7 +211,7 @@ static ctl_table dev_table[] = {
 };  
 
 
-void sysctl_init(void)
+__initfunc(void sysctl_init(void))
 {
 #ifdef CONFIG_PROC_FS
 	register_proc_table(root_table, &proc_sys_root);
author	Ralf Baechle <ralf@linux-mips.org>	1997-06-01 03:16:17 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	1997-06-01 03:16:17 +0000
commit	d8d9b8f76f22b7a16a83e261e64f89ee611f49df (patch)
tree	3067bc130b80d52808e6390c9fc7fc087ec1e33c /kernel
parent	19c9bba94152148523ba0f7ef7cffe3d45656b11 (diff)