Import of Linux/MIPS 2.1.36

author: Ralf Baechle <ralf@linux-mips.org> 1997-04-29 21:13:14 +0000
committer: <ralf@linux-mips.org> 1997-04-29 21:13:14 +0000
commit: 19c9bba94152148523ba0f7ef7cffe3d45656b11 (patch)
tree: 40b1cb534496a7f1ca0f5c314a523c69f1fee464 /kernel
parent: 7206675c40394c78a90e74812bbdbf8cf3cca1be (diff)
16 files changed, 2774 insertions, 1891 deletions
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 9a202359a..5d7e2f056 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -2,6 +2,9 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
 
 static asmlinkage void no_lcall7(struct pt_regs * regs);
 
@@ -34,14 +37,14 @@ static asmlinkage void no_lcall7(struct pt_regs * regs)
    * personality set incorrectly.  Check to see whether SVr4 is available,
    * and use it, otherwise give the user a SEGV.
    */
-	if (current->exec_domain && current->exec_domain->use_count)
-		(*current->exec_domain->use_count)--;
+	if (current->exec_domain && current->exec_domain->module)
+		__MOD_DEC_USE_COUNT(current->exec_domain->module);
 
 	current->personality = PER_SVR4;
 	current->exec_domain = lookup_exec_domain(current->personality);
 
-	if (current->exec_domain && current->exec_domain->use_count)
-		(*current->exec_domain->use_count)++;
+	if (current->exec_domain && current->exec_domain->module)
+		__MOD_INC_USE_COUNT(current->exec_domain->module);
 
 	if (current->exec_domain && current->exec_domain->handler
 	&& current->exec_domain->handler != no_lcall7) {
@@ -103,21 +106,27 @@ asmlinkage int sys_personality(unsigned long personality)
 {
 	struct exec_domain *it;
 	unsigned long old_personality;
+	int ret;
 
+	lock_kernel();
+	ret = current->personality;
 	if (personality == 0xffffffff)
-		return current->personality;
+		goto out;
 
+	ret = -EINVAL;
 	it = lookup_exec_domain(personality);
 	if (!it)
-		return -EINVAL;
+		goto out;
 
 	old_personality = current->personality;
-	if (current->exec_domain && current->exec_domain->use_count)
-		(*current->exec_domain->use_count)--;
+	if (current->exec_domain && current->exec_domain->module)
+		__MOD_DEC_USE_COUNT(current->exec_domain->module);
 	current->personality = personality;
 	current->exec_domain = it;
-	if (current->exec_domain->use_count)
-		(*current->exec_domain->use_count)++;
-
-	return old_personality;
+	if (current->exec_domain->module)
+		__MOD_INC_USE_COUNT(current->exec_domain->module);
+	ret = old_personality;
+out:
+	unlock_kernel();
+	return ret;
 }
diff --git a/kernel/exit.c b/kernel/exit.c
index d4c54209d..0d03916e8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -6,6 +6,7 @@
 
 #undef DEBUG_PROC_TREE
 
+#include <linux/config.h>
 #include <linux/wait.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
@@ -16,9 +17,14 @@
 #include <linux/tty.h>
 #include <linux/malloc.h>
 #include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
 
+#include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
+#include <asm/mmu_context.h>
 
 extern void sem_exit (void);
 extern void acct_process (long exitcode);
@@ -36,18 +42,23 @@ static inline void generate(unsigned long sig, struct task_struct * p)
 	 * be handled immediately (ie non-blocked and untraced)
 	 * and that is ignored (either explicitly or by default)
 	 */
+	spin_lock_irq(&p->sig->siglock);
 	if (!(mask & p->blocked) && !(p->flags & PF_PTRACED)) {
 		/* don't bother with ignored signals (but SIGCHLD is special) */
 		if (sa->sa_handler == SIG_IGN && sig != SIGCHLD)
-			return;
+			goto out;
 		/* some signals are ignored by default.. (but SIGCONT already did its deed) */
 		if ((sa->sa_handler == SIG_DFL) &&
 		    (sig == SIGCONT || sig == SIGCHLD || sig == SIGWINCH || sig == SIGURG))
-			return;
+			goto out;
 	}
+	spin_lock(&p->sigmask_lock);
 	p->signal |= mask;
+	spin_unlock(&p->sigmask_lock);
 	if (p->state == TASK_INTERRUPTIBLE && (p->signal & ~p->blocked))
 		wake_up_process(p);
+out:
+	spin_unlock_irq(&p->sig->siglock);
 }
 
 /*
@@ -60,15 +71,22 @@ void force_sig(unsigned long sig, struct task_struct * p)
 	if (p->sig) {
 		unsigned long mask = 1UL << sig;
 		struct sigaction *sa = p->sig->action + sig;
+
+		spin_lock_irq(&p->sig->siglock);
+
+		spin_lock(&p->sigmask_lock);
 		p->signal |= mask;
 		p->blocked &= ~mask;
+		spin_unlock(&p->sigmask_lock);
+
 		if (sa->sa_handler == SIG_IGN)
 			sa->sa_handler = SIG_DFL;
 		if (p->state == TASK_INTERRUPTIBLE)
 			wake_up_process(p);
+
+		spin_unlock_irq(&p->sig->siglock);
 	}
 }
-		
 
 int send_sig(unsigned long sig,struct task_struct * p,int priv)
 {
@@ -79,24 +97,23 @@ int send_sig(unsigned long sig,struct task_struct * p,int priv)
 	    (current->uid ^ p->suid) && (current->uid ^ p->uid) &&
 	    !suser())
 		return -EPERM;
-	if (!sig)
-		return 0;
-	/*
-	 * Forget it if the process is already zombie'd.
-	 */
-	if (!p->sig)
-		return 0;
-	if ((sig == SIGKILL) || (sig == SIGCONT)) {
-		if (p->state == TASK_STOPPED)
-			wake_up_process(p);
-		p->exit_code = 0;
-		p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) |
-				(1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) );
+
+	if (sig && p->sig) {
+		spin_lock_irq(&p->sigmask_lock);
+		if ((sig == SIGKILL) || (sig == SIGCONT)) {
+			if (p->state == TASK_STOPPED)
+				wake_up_process(p);
+			p->exit_code = 0;
+			p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) |
+					(1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) );
+		}
+		if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU)
+			p->signal &= ~(1<<(SIGCONT-1));
+		spin_unlock_irq(&p->sigmask_lock);
+
+		/* Actually generate the signal */
+		generate(sig,p);
 	}
-	if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU)
-		p->signal &= ~(1<<(SIGCONT-1));
-	/* Actually generate the signal */
-	generate(sig,p);
 	return 0;
 }
 
@@ -120,6 +137,12 @@ void release(struct task_struct * p)
 	}
 	for (i=1 ; i<NR_TASKS ; i++)
 		if (task[i] == p) {
+#ifdef __SMP__
+			/* FIXME! Cheesy, but kills the window... -DaveM */
+			while(p->processor != NO_PROC_ID)
+				barrier();
+			spin_unlock_wait(&scheduler_lock);
+#endif
 			nr_tasks--;
 			task[i] = NULL;
 			REMOVE_LINKS(p);
@@ -130,7 +153,7 @@ void release(struct task_struct * p)
 			current->cmin_flt += p->min_flt + p->cmin_flt;
 			current->cmaj_flt += p->maj_flt + p->cmaj_flt;
 			current->cnswap += p->nswap + p->cnswap;
-			kfree(p);
+			free_task_struct(p);
 			return;
 		}
 	panic("trying to release non-existent task");
@@ -152,14 +175,14 @@ int bad_task_ptr(struct task_struct *p)
 			return 0;
 	return 1;
 }
-	
+
 /*
  * This routine scans the pid tree and makes sure the rep invariant still
  * holds.  Used for debugging only, since it's very slow....
  *
  * It looks a lot scarier than it really is.... we're doing nothing more
- * than verifying the doubly-linked list found in p_ysptr and p_osptr, 
- * and checking it corresponds with the process tree defined by p_cptr and 
+ * than verifying the doubly-linked list found in p_ysptr and p_osptr,
+ * and checking it corresponds with the process tree defined by p_cptr and
  * p_pptr;
  */
 void audit_ptree(void)
@@ -240,14 +263,18 @@ int session_of_pgrp(int pgrp)
 	int fallback;
 
 	fallback = -1;
+	read_lock(&tasklist_lock);
 	for_each_task(p) {
  		if (p->session <= 0)
  			continue;
-		if (p->pgrp == pgrp)
-			return p->session;
+		if (p->pgrp == pgrp) {
+			fallback = p->session;
+			break;
+		}
 		if (p->pid == pgrp)
 			fallback = p->session;
 	}
+	read_unlock(&tasklist_lock);
 	return fallback;
 }
 
@@ -257,21 +284,29 @@ int session_of_pgrp(int pgrp)
  */
 int kill_pg(int pgrp, int sig, int priv)
 {
-	struct task_struct *p;
-	int err,retval = -ESRCH;
-	int found = 0;
+	int retval;
 
-	if (sig<0 || sig>32 || pgrp<=0)
-		return -EINVAL;
-	for_each_task(p) {
-		if (p->pgrp == pgrp) {
-			if ((err = send_sig(sig,p,priv)) != 0)
-				retval = err;
-			else
-				found++;
+	retval = -EINVAL;
+	if (sig >= 0 && sig <= 32 && pgrp > 0) {
+		struct task_struct *p;
+		int found = 0;
+
+		retval = -ESRCH;
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->pgrp == pgrp) {
+				int err = send_sig(sig,p,priv);
+				if (err != 0)
+					retval = err;
+				else
+					found++;
+			}
 		}
+		read_unlock(&tasklist_lock);
+		if (found)
+			retval = 0;
 	}
-	return(found ? 0 : retval);
+	return retval;
 }
 
 /*
@@ -281,34 +316,51 @@ int kill_pg(int pgrp, int sig, int priv)
  */
 int kill_sl(int sess, int sig, int priv)
 {
-	struct task_struct *p;
-	int err,retval = -ESRCH;
-	int found = 0;
+	int retval;
 
-	if (sig<0 || sig>32 || sess<=0)
-		return -EINVAL;
-	for_each_task(p) {
-		if (p->session == sess && p->leader) {
-			if ((err = send_sig(sig,p,priv)) != 0)
-				retval = err;
-			else
-				found++;
+	retval = -EINVAL;
+	if (sig >= 0 && sig <= 32 && sess > 0) {
+		struct task_struct *p;
+		int found = 0;
+
+		retval = -ESRCH;
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->leader && p->session == sess) {
+				int err = send_sig(sig,p,priv);
+
+				if (err)
+					retval = err;
+				else
+					found++;
+			}
 		}
+		read_unlock(&tasklist_lock);
+		if (found)
+			retval = 0;
 	}
-	return(found ? 0 : retval);
+	return retval;
 }
 
 int kill_proc(int pid, int sig, int priv)
 {
- 	struct task_struct *p;
+	int retval;
 
-	if (sig<0 || sig>32)
-		return -EINVAL;
-	for_each_task(p) {
-		if (p && p->pid == pid)
-			return send_sig(sig,p,priv);
+	retval = -EINVAL;
+	if (sig >= 0 && sig <= 32) {
+		struct task_struct *p;
+		
+		retval = -ESRCH;
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->pid != pid)
+				continue;
+			retval = send_sig(sig,p,priv);
+			break;
+		}
+		read_unlock(&tasklist_lock);
 	}
-	return(-ESRCH);
+	return retval;
 }
 
 /*
@@ -317,49 +369,58 @@ int kill_proc(int pid, int sig, int priv)
  */
 asmlinkage int sys_kill(int pid,int sig)
 {
-	int err, retval = 0, count = 0;
-
 	if (!pid)
-		return(kill_pg(current->pgrp,sig,0));
+		return kill_pg(current->pgrp,sig,0);
+
 	if (pid == -1) {
+		int retval = 0, count = 0;
 		struct task_struct * p;
+
+		read_lock(&tasklist_lock);
 		for_each_task(p) {
 			if (p->pid > 1 && p != current) {
+				int err;
 				++count;
 				if ((err = send_sig(sig,p,0)) != -EPERM)
 					retval = err;
 			}
 		}
-		return(count ? retval : -ESRCH);
+		read_unlock(&tasklist_lock);
+		return count ? retval : -ESRCH;
 	}
-	if (pid < 0) 
-		return(kill_pg(-pid,sig,0));
+	if (pid < 0)
+		return kill_pg(-pid,sig,0);
+
 	/* Normal kill */
-	return(kill_proc(pid,sig,0));
+	return kill_proc(pid,sig,0);
 }
 
 /*
  * Determine if a process group is "orphaned", according to the POSIX
  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
- * by terminal-generated stop signals.  Newly orphaned process groups are 
+ * by terminal-generated stop signals.  Newly orphaned process groups are
  * to receive a SIGHUP and a SIGCONT.
- * 
+ *
  * "I ask you, have you ever known what it is to be an orphan?"
  */
 static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
 {
 	struct task_struct *p;
 
+	read_lock(&tasklist_lock);
 	for_each_task(p) {
-		if ((p == ignored_task) || (p->pgrp != pgrp) || 
+		if ((p == ignored_task) || (p->pgrp != pgrp) ||
 		    (p->state == TASK_ZOMBIE) ||
 		    (p->p_pptr->pid == 1))
 			continue;
 		if ((p->p_pptr->pgrp != pgrp) &&
-		    (p->p_pptr->session == p->session))
-			return 0;
+		    (p->p_pptr->session == p->session)) {
+			read_unlock(&tasklist_lock);
+ 			return 0;
+		}
 	}
-	return(1);	/* (sighing) "Often!" */
+	read_unlock(&tasklist_lock);
+	return 1;	/* (sighing) "Often!" */
 }
 
 int is_orphaned_pgrp(int pgrp)
@@ -369,21 +430,27 @@ int is_orphaned_pgrp(int pgrp)
 
 static inline int has_stopped_jobs(int pgrp)
 {
+	int retval = 0;
 	struct task_struct * p;
 
+	read_lock(&tasklist_lock);
 	for_each_task(p) {
 		if (p->pgrp != pgrp)
 			continue;
-		if (p->state == TASK_STOPPED)
-			return(1);
+		if (p->state != TASK_STOPPED)
+			continue;
+		retval = 1;
+		break;
 	}
-	return(0);
+	read_unlock(&tasklist_lock);
+	return retval;
 }
 
 static inline void forget_original_parent(struct task_struct * father)
 {
 	struct task_struct * p;
 
+	read_lock(&tasklist_lock);
 	for_each_task(p) {
 		if (p->p_opptr == father)
 			if (task[smp_num_cpus])	/* init */
@@ -391,6 +458,7 @@ static inline void forget_original_parent(struct task_struct * father)
 			else
 				p->p_opptr = task[0];
 	}
+	read_unlock(&tasklist_lock);
 }
 
 static inline void close_files(struct files_struct * files)
@@ -456,9 +524,8 @@ static inline void __exit_sighand(struct task_struct *tsk)
 
 	if (sig) {
 		tsk->sig = NULL;
-		if (!--sig->count) {
+		if (atomic_dec_and_test(&sig->count))
 			kfree(sig);
-		}
 	}
 }
 
@@ -475,9 +542,7 @@ static inline void __exit_mm(struct task_struct * tsk)
 	if (mm != &init_mm) {
 		flush_cache_mm(mm);
 		flush_tlb_mm(mm);
-#ifdef __mips__
-		mm->context = 0;
-#endif
+		destroy_context(mm);
 		tsk->mm = &init_mm;
 		tsk->swappable = 0;
 		SET_PAGE_DIR(tsk, swapper_pg_dir);
@@ -496,7 +561,7 @@ void exit_mm(struct task_struct *tsk)
 	__exit_mm(tsk);
 }
 
-/* 
+/*
  * Send signals to all our closest relatives so that they know
  * to properly mourn us..
  */
@@ -505,7 +570,7 @@ static void exit_notify(void)
 	struct task_struct * p;
 
 	forget_original_parent(current);
-	/* 
+	/*
 	 * Check to see if any process groups have become orphaned
 	 * as a result of our exiting, and if they have any stopped
 	 * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
@@ -523,10 +588,10 @@ static void exit_notify(void)
 	}
 	/* Let father know we died */
 	notify_parent(current);
-	
+
 	/*
 	 * This loop does two things:
-	 * 
+	 *
   	 * A.  Make init inherit all the child processes
 	 * B.  Check to see if any process groups have become orphaned
 	 *	as a result of our exiting, and if they have any stopped
@@ -547,7 +612,7 @@ static void exit_notify(void)
 			notify_parent(p);
 		/*
 		 * process group orphan check
-		 * Case ii: Our child is in a different pgrp 
+		 * Case ii: Our child is in a different pgrp
 		 * than we are, and it was the only connection
 		 * outside, so the child pgrp is now orphaned.
 		 */
@@ -565,9 +630,9 @@ static void exit_notify(void)
 
 NORET_TYPE void do_exit(long code)
 {
-	if (intr_count) {
+	if (in_interrupt()) {
+		local_irq_count[smp_processor_id()] = 0;	/* Not really correct */
 		printk("Aiee, killing interrupt handler\n");
-		intr_count = 0;
 	}
 fake_volatile:
 	acct_process(code);
@@ -576,6 +641,9 @@ fake_volatile:
 	sem_exit();
 	kerneld_exit();
 	__exit_mm(current);
+#if CONFIG_AP1000
+	exit_msc(current);
+#endif
 	__exit_files(current);
 	__exit_fs(current);
 	__exit_sighand(current);
@@ -586,10 +654,10 @@ fake_volatile:
 #ifdef DEBUG_PROC_TREE
 	audit_ptree();
 #endif
-	if (current->exec_domain && current->exec_domain->use_count)
-		(*current->exec_domain->use_count)--;
-	if (current->binfmt && current->binfmt->use_count)
-		(*current->binfmt->use_count)--;
+	if (current->exec_domain && current->exec_domain->module)
+		__MOD_DEC_USE_COUNT(current->exec_domain->module);
+	if (current->binfmt && current->binfmt->module)
+		__MOD_DEC_USE_COUNT(current->binfmt->module);
 	schedule();
 /*
  * In order to get rid of the "volatile function does return" message
@@ -609,7 +677,9 @@ fake_volatile:
 
 asmlinkage int sys_exit(int error_code)
 {
+	lock_kernel();
 	do_exit((error_code&0xff)<<8);
+	unlock_kernel();
 }
 
 asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
@@ -619,21 +689,21 @@ asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct
 	struct task_struct *p;
 
 	if (stat_addr) {
-		flag = verify_area(VERIFY_WRITE, stat_addr, sizeof(*stat_addr));
-		if (flag)
-			return flag;
+		if(verify_area(VERIFY_WRITE, stat_addr, sizeof(*stat_addr)))
+			return -EFAULT;
 	}
 	if (ru) {
-		flag = verify_area(VERIFY_WRITE, ru, sizeof(*ru));
-		if (flag)
-			return flag;
+		if(verify_area(VERIFY_WRITE, ru, sizeof(*ru)))
+			return -EFAULT;
 	}
+
 	if (options & ~(WNOHANG|WUNTRACED|__WCLONE))
-	    return -EINVAL;
+		return -EINVAL;
 
 	add_wait_queue(&current->wait_chldexit,&wait);
 repeat:
-	flag=0;
+	flag = 0;
+	read_lock(&tasklist_lock);
  	for (p = current->p_cptr ; p ; p = p->p_osptr) {
 		if (pid>0) {
 			if (p->pid != pid)
@@ -655,23 +725,28 @@ repeat:
 					continue;
 				if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED))
 					continue;
+				read_unlock(&tasklist_lock);
 				if (ru != NULL)
 					getrusage(p, RUSAGE_BOTH, ru);
 				if (stat_addr)
-					put_user((p->exit_code << 8) | 0x7f,
-						stat_addr);
+					__put_user((p->exit_code << 8) | 0x7f,
+						   stat_addr);
 				p->exit_code = 0;
 				retval = p->pid;
 				goto end_wait4;
 			case TASK_ZOMBIE:
 				current->cutime += p->utime + p->cutime;
 				current->cstime += p->stime + p->cstime;
+				read_unlock(&tasklist_lock);
 				if (ru != NULL)
 					getrusage(p, RUSAGE_BOTH, ru);
 				if (stat_addr)
-					put_user(p->exit_code, stat_addr);
+					__put_user(p->exit_code, stat_addr);
 				retval = p->pid;
 				if (p->p_opptr != p->p_pptr) {
+					/* Note this grabs tasklist_lock
+					 * as a writer... (twice!)
+					 */
 					REMOVE_LINKS(p);
 					p->p_pptr = p->p_opptr;
 					SET_LINKS(p);
@@ -686,6 +761,7 @@ repeat:
 				continue;
 		}
 	}
+	read_unlock(&tasklist_lock);
 	if (flag) {
 		retval = 0;
 		if (options & WNOHANG)
diff --git a/kernel/fork.c b/kernel/fork.c
index b81d98e77..6204ffeaf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -15,13 +15,17 @@
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/unistd.h>
 #include <linux/ptrace.h>
 #include <linux/malloc.h>
 #include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
+#include <asm/mmu_context.h>
 #include <asm/uaccess.h>
 
 int nr_tasks=1;
@@ -43,11 +47,15 @@ static inline int find_empty_process(void)
 		max_tasks--;	/* count the new process.. */
 		if (max_tasks < nr_tasks) {
 			struct task_struct *p;
+			read_lock(&tasklist_lock);
 			for_each_task (p) {
 				if (p->uid == current->uid)
-					if (--max_tasks < 0)
+					if (--max_tasks < 0) {
+						read_unlock(&tasklist_lock);
 						return -EAGAIN;
+					}
 			}
+			read_unlock(&tasklist_lock);
 		}
 	}
 	for (i = 0 ; i < NR_TASKS ; i++) {
@@ -63,6 +71,8 @@ static int get_pid(unsigned long flags)
 
 	if (flags & CLONE_PID)
 		return current->pid;
+
+	read_lock(&tasklist_lock);
 repeat:
 	if ((++last_pid) & 0xffff8000)
 		last_pid=1;
@@ -72,6 +82,8 @@ repeat:
 		    p->session == last_pid)
 			goto repeat;
 	}
+	read_unlock(&tasklist_lock);
+
 	return last_pid;
 }
 
@@ -81,10 +93,12 @@ static inline int dup_mmap(struct mm_struct * mm)
 
 	mm->mmap = NULL;
 	p = &mm->mmap;
+	flush_cache_mm(current->mm);
 	for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
-		tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 		if (!tmp) {
 			exit_mmap(mm);
+			flush_tlb_mm(current->mm);
 			return -ENOMEM;
 		}
 		*tmp = *mpnt;
@@ -100,6 +114,7 @@ static inline int dup_mmap(struct mm_struct * mm)
 		}
 		if (copy_page_range(mm, current->mm, tmp)) {
 			exit_mmap(mm);
+			flush_tlb_mm(current->mm);
 			return -ENOMEM;
 		}
 		if (tmp->vm_ops && tmp->vm_ops->open)
@@ -107,6 +122,7 @@ static inline int dup_mmap(struct mm_struct * mm)
 		*p = tmp;
 		p = &tmp->vm_next;
 	}
+	flush_tlb_mm(current->mm);
 	build_mmap_avl(mm);
 	return 0;
 }
@@ -118,9 +134,7 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 		if (!mm)
 			return -1;
 		*mm = *current->mm;
-#ifdef __mips__
-		mm->context = 0;
-#endif
+		init_new_context(mm);
 		mm->count = 1;
 		mm->def_flags = 0;
 		tsk->mm = mm;
@@ -128,15 +142,17 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 		tsk->cmin_flt = tsk->cmaj_flt = 0;
 		tsk->nswap = tsk->cnswap = 0;
 		if (new_page_tables(tsk))
-			return -1;
+			goto free_mm;
 		if (dup_mmap(mm)) {
 			free_page_tables(mm);
+free_mm:
+			kfree(mm);
 			return -1;
 		}
 		return 0;
 	}
-	SET_PAGE_DIR(tsk, current->mm->pgd);
 	current->mm->count++;
+	SET_PAGE_DIR(tsk, current->mm->pgd);
 	return 0;
 }
 
@@ -174,7 +190,7 @@ static inline int copy_files(unsigned long clone_flags, struct task_struct * tsk
 	tsk->files = newf;
 	if (!newf)
 		return -1;
-			
+
 	newf->count = 1;
 	newf->close_on_exec = oldf->close_on_exec;
 	newf->open_fds = oldf->open_fds;
@@ -195,13 +211,14 @@ static inline int copy_files(unsigned long clone_flags, struct task_struct * tsk
 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 {
 	if (clone_flags & CLONE_SIGHAND) {
-		current->sig->count++;
+		atomic_inc(&current->sig->count);
 		return 0;
 	}
 	tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL);
 	if (!tsk->sig)
 		return -1;
-	tsk->sig->count = 1;
+	spin_lock_init(&tsk->sig->siglock);
+	atomic_set(&tsk->sig->count, 1);
 	memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
 	return 0;
 }
@@ -218,10 +235,11 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 	unsigned long new_stack;
 	struct task_struct *p;
 
-	p = (struct task_struct *) kmalloc(sizeof(*p), GFP_KERNEL);
+	lock_kernel();
+	p = alloc_task_struct();
 	if (!p)
 		goto bad_fork;
-	new_stack = alloc_kernel_stack();
+	new_stack = alloc_kernel_stack(p);
 	if (!new_stack)
 		goto bad_fork_free_p;
 	error = -EAGAIN;
@@ -231,10 +249,10 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 
 	*p = *current;
 
-	if (p->exec_domain && p->exec_domain->use_count)
-		(*p->exec_domain->use_count)++;
-	if (p->binfmt && p->binfmt->use_count)
-		(*p->binfmt->use_count)++;
+	if (p->exec_domain && p->exec_domain->module)
+		__MOD_INC_USE_COUNT(p->exec_domain->module);
+	if (p->binfmt && p->binfmt->module)
+		__MOD_INC_USE_COUNT(p->binfmt->module);
 
 	p->did_exec = 0;
 	p->swappable = 0;
@@ -260,8 +278,8 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 	p->cutime = p->cstime = 0;
 #ifdef __SMP__
 	p->processor = NO_PROC_ID;
-	p->lock_depth = 1;
 #endif
+	p->lock_depth = 0;
 	p->start_time = jiffies;
 	task[nr] = p;
 	SET_LINKS(p);
@@ -277,16 +295,33 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 		goto bad_fork_cleanup_fs;
 	if (copy_mm(clone_flags, p))
 		goto bad_fork_cleanup_sighand;
-	copy_thread(nr, clone_flags, usp, p, regs);
+	error = copy_thread(nr, clone_flags, usp, p, regs);
+	if (error)
+		goto bad_fork_cleanup_sighand;
 	p->semundo = NULL;
 
 	/* ok, now we should be set up.. */
 	p->swappable = 1;
 	p->exit_signal = clone_flags & CSIGNAL;
-	p->counter = current->counter >> 1;
-	wake_up_process(p);			/* do this last, just in case */
+
+	/*
+	 * "share" dynamic priority between parent and child, thus the
+	 * total amount of dynamic priorities in the system doesnt change,
+	 * more scheduling fairness. This is only important in the first
+	 * timeslice, on the long run the scheduling behaviour is unchanged.
+	 */
+	current->counter >>= 1;
+	p->counter = current->counter;
+
+	if(p->pid) {
+		wake_up_process(p);		/* do this last, just in case */
+	} else {
+		p->state = TASK_RUNNING;
+		p->next_run = p->prev_run = p;
+	}
 	++total_forks;
-	return p->pid;
+	error = p->pid;
+	goto fork_out;
 
 bad_fork_cleanup_sighand:
 	exit_sighand(p);
@@ -295,17 +330,19 @@ bad_fork_cleanup_fs:
 bad_fork_cleanup_files:
 	exit_files(p);
 bad_fork_cleanup:
-	if (p->exec_domain && p->exec_domain->use_count)
-		(*p->exec_domain->use_count)--;
-	if (p->binfmt && p->binfmt->use_count)
-		(*p->binfmt->use_count)--;
+	if (p->exec_domain && p->exec_domain->module)
+		__MOD_DEC_USE_COUNT(p->exec_domain->module);
+	if (p->binfmt && p->binfmt->module)
+		__MOD_DEC_USE_COUNT(p->binfmt->module);
 	task[nr] = NULL;
 	REMOVE_LINKS(p);
 	nr_tasks--;
 bad_fork_free_stack:
 	free_kernel_stack(new_stack);
 bad_fork_free_p:
-	kfree(p);
+	free_task_struct(p);
 bad_fork:
+fork_out:
+	unlock_kernel();
 	return error;
 }
diff --git a/kernel/info.c b/kernel/info.c
index 20b6ad6ae..ffaec7140 100644
--- a/kernel/info.c
+++ b/kernel/info.c
@@ -12,6 +12,8 @@
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 
@@ -21,6 +23,7 @@ asmlinkage int sys_sysinfo(struct sysinfo *info)
 
 	memset((char *)&val, 0, sizeof(struct sysinfo));
 
+	cli();
 	val.uptime = jiffies / HZ;
 
 	val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
@@ -28,6 +31,7 @@ asmlinkage int sys_sysinfo(struct sysinfo *info)
 	val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
 
 	val.procs = nr_tasks-1;
+	sti();
 
 	si_meminfo(&val);
 	si_swapinfo(&val);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index efcc8351b..479f660a0 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -12,6 +12,8 @@
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 
@@ -41,7 +43,6 @@ static void jiffiestotv(unsigned long jiffies, struct timeval *value)
 {
 	value->tv_usec = (jiffies % HZ) * (1000000 / HZ);
 	value->tv_sec = jiffies / HZ;
-	return;
 }
 
 static int _getitimer(int which, struct itimerval *value)
@@ -78,17 +79,19 @@ static int _getitimer(int which, struct itimerval *value)
 	return 0;
 }
 
+/* SMP: Only we modify our itimer values. */
 asmlinkage int sys_getitimer(int which, struct itimerval *value)
 {
-	int error;
+	int error = -EFAULT;
 	struct itimerval get_buffer;
 
-	if (!value)
-		return -EFAULT;
-	error = _getitimer(which, &get_buffer);
-	if (error)
-		return error;
-	return copy_to_user(value, &get_buffer, sizeof(get_buffer)) ? -EFAULT : 0;
+	if (value) {
+		error = _getitimer(which, &get_buffer);
+		if (!error)
+			error =	copy_to_user(value, &get_buffer, sizeof(get_buffer))
+				? -EFAULT : 0;
+	}
+	return error;
 }
 
 void it_real_fn(unsigned long __data)
@@ -149,17 +152,18 @@ int _setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 	return 0;
 }
 
+/* SMP: Again, only we play with our itimers, and signals are SMP safe
+ *      now so that is not an issue at all anymore.
+ */
 asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 {
-	int error;
 	struct itimerval set_buffer, get_buffer;
+	int error;
 
 	if (value) {
-		error = verify_area(VERIFY_READ, value, sizeof(*value));
-		if (error)
-			return error;
-		error = copy_from_user(&set_buffer, value, sizeof(set_buffer));
-		if (error)
+		if(verify_area(VERIFY_READ, value, sizeof(*value)))
+			return -EFAULT;
+		if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
 			return -EFAULT;
 	} else
 		memset((char *) &set_buffer, 0, sizeof(set_buffer));
@@ -169,6 +173,6 @@ asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerva
 		return error;
 
 	if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
-		error = -EFAULT; 
-	return error;
+		return -EFAULT; 
+	return 0;
 }
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index b694cd6d2..f5f202c8e 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Herein lies all the functions/variables that are "exported" for linkage
  * with dynamically loaded kernel modules.
  *			Jon.
@@ -6,13 +6,12 @@
  * - Stacked module support and unified symbol table added (June 1994)
  * - External symbol table support added (December 1994)
  * - Versions on symbols added (December 1994)
- * by Bjorn Ekwall <bj0rn@blox.se>
+ *   by Bjorn Ekwall <bj0rn@blox.se>
  */
 
-#include <linux/module.h>
 #include <linux/config.h>
+#include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/smp.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/cdrom.h>
@@ -21,6 +20,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/mm.h>
 #include <linux/malloc.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/ptrace.h>
 #include <linux/sys.h>
@@ -41,6 +41,7 @@
 #include <linux/minix_fs.h>
 #include <linux/ext2_fs.h>
 #include <linux/random.h>
+#include <linux/reboot.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/sysctl.h>
@@ -49,12 +50,14 @@
 #include <linux/genhd.h>
 #include <linux/swap.h>
 #include <linux/ctype.h>
+#include <linux/file.h>
 
-extern unsigned char aux_device_present, kbd_read_mask;
-#ifdef __i386__
-	extern struct drive_info_struct drive_info;
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
 #endif
 
+extern unsigned char aux_device_present, kbd_read_mask;
+
 #ifdef CONFIG_PCI
 #include <linux/bios32.h>
 #include <linux/pci.h>
@@ -73,7 +76,10 @@ extern unsigned char aux_device_present, kbd_read_mask;
 extern char *get_options(char *str, int *ints);
 extern void set_device_ro(int dev,int flag);
 extern struct file_operations * get_blkfops(unsigned int);
-extern void blkdev_release(struct inode * inode);
+extern int blkdev_release(struct inode * inode);
+#if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
+extern int (*do_nfsservctl)(int, void *, void *);
+#endif
 
 extern void *sys_call_table;
 
@@ -81,293 +87,299 @@ extern int sys_tz;
 extern int request_dma(unsigned int dmanr, char * deviceID);
 extern void free_dma(unsigned int dmanr);
 
-extern void hard_reset_now(void);
-
-struct symbol_table symbol_table = {
-#include <linux/symtab_begin.h>
 #ifdef MODVERSIONS
-	{ (void *)1 /* Version version :-) */,
-		SYMBOL_NAME_STR (Using_Versions) },
+const struct module_symbol __export_Using_Versions
+__attribute__((section("__ksymtab"))) = {
+	1 /* Version version */, "Using_Versions"
+};
 #endif
 
-	/* stackable module support */
-	X(register_symtab_from),
 #ifdef CONFIG_KERNELD
-	X(kerneld_send),
+EXPORT_SYMBOL(kerneld_send);
 #endif
-	X(get_options),
+EXPORT_SYMBOL(get_options);
 
 #ifdef CONFIG_PCI
-	/* PCI BIOS support */
-	X(pcibios_present),
-	X(pcibios_find_class),
-	X(pcibios_find_device),
-	X(pcibios_read_config_byte),
-	X(pcibios_read_config_word),
-	X(pcibios_read_config_dword),
-    	X(pcibios_strerror),
-	X(pcibios_write_config_byte),
-	X(pcibios_write_config_word),
-	X(pcibios_write_config_dword),
+/* PCI BIOS support */
+EXPORT_SYMBOL(pcibios_present);
+EXPORT_SYMBOL(pcibios_find_class);
+EXPORT_SYMBOL(pcibios_find_device);
+EXPORT_SYMBOL(pcibios_read_config_byte);
+EXPORT_SYMBOL(pcibios_read_config_word);
+EXPORT_SYMBOL(pcibios_read_config_dword);
+EXPORT_SYMBOL(pcibios_write_config_byte);
+EXPORT_SYMBOL(pcibios_write_config_word);
+EXPORT_SYMBOL(pcibios_write_config_dword);
+EXPORT_SYMBOL(pcibios_strerror);
+EXPORT_SYMBOL(pci_strvendor);
+EXPORT_SYMBOL(pci_strdev);
 #endif
 
-	/* process memory management */
-	X(do_mmap),
-	X(do_munmap),
-	X(exit_mm),
-
-	/* internal kernel memory management */
-	X(__get_free_pages),
-	X(free_pages),
-	X(kmalloc),
-	X(kfree),
-	X(vmalloc),
-	X(vfree),
- 	X(mem_map),
- 	X(remap_page_range),
-	X(max_mapnr),
-	X(high_memory),
-	X(update_vm_cache),
-
-	/* filesystem internal functions */
-	X(getname),
-	X(putname),
-	X(__iget),
-	X(iput),
-	X(namei),
-	X(lnamei),
-	X(open_namei),
-	X(sys_close),
-	X(close_fp),
-	X(check_disk_change),
-	X(invalidate_buffers),
-	X(invalidate_inodes),
-	X(invalidate_inode_pages),
-	X(fsync_dev),
-	X(permission),
-	X(inode_setattr),
-	X(inode_change_ok),
-	X(set_blocksize),
-	X(getblk),
-	X(bread),
-	X(breada),
-	X(__brelse),
-	X(__bforget),
-	X(ll_rw_block),
-	X(__wait_on_buffer),
-	X(mark_buffer_uptodate),
-	X(unlock_buffer),
-	X(dcache_lookup),
-	X(dcache_add),
-	X(add_blkdev_randomness),
-	X(generic_file_read),
-	X(generic_file_mmap),
-	X(generic_readpage),
-
-	/* device registration */
-	X(register_chrdev),
-	X(unregister_chrdev),
-	X(register_blkdev),
-	X(unregister_blkdev),
-	X(tty_register_driver),
-	X(tty_unregister_driver),
-	X(tty_std_termios),
-
-#if defined(CONFIG_BLK_DEV_IDECD) || \
-    defined(CONFIG_BLK_DEV_SR) || \
-    defined(CONFIG_CM206)
-       X(register_cdrom),
-       X(unregister_cdrom),
-       X(cdrom_fops),
+/* process memory management */
+EXPORT_SYMBOL(do_mmap);
+EXPORT_SYMBOL(do_munmap);
+EXPORT_SYMBOL(exit_mm);
+EXPORT_SYMBOL(exit_files);
+
+/* internal kernel memory management */
+EXPORT_SYMBOL(__get_free_pages);
+EXPORT_SYMBOL(free_pages);
+EXPORT_SYMBOL(kmalloc);
+EXPORT_SYMBOL(kfree);
+EXPORT_SYMBOL(vmalloc);
+EXPORT_SYMBOL(vfree);
+EXPORT_SYMBOL(mem_map);
+EXPORT_SYMBOL(remap_page_range);
+EXPORT_SYMBOL(max_mapnr);
+EXPORT_SYMBOL(num_physpages);
+EXPORT_SYMBOL(high_memory);
+EXPORT_SYMBOL(update_vm_cache);
+EXPORT_SYMBOL(kmem_cache_create);
+EXPORT_SYMBOL(kmem_cache_destroy);
+EXPORT_SYMBOL(kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_free);
+
+/* filesystem internal functions */
+EXPORT_SYMBOL(getname);
+EXPORT_SYMBOL(putname);
+EXPORT_SYMBOL(__fput);
+EXPORT_SYMBOL(__iget);
+EXPORT_SYMBOL(iput);
+EXPORT_SYMBOL(namei);
+EXPORT_SYMBOL(lnamei);
+EXPORT_SYMBOL(open_namei);
+EXPORT_SYMBOL(sys_close);
+EXPORT_SYMBOL(close_fp);
+EXPORT_SYMBOL(check_disk_change);
+EXPORT_SYMBOL(invalidate_buffers);
+EXPORT_SYMBOL(invalidate_inodes);
+EXPORT_SYMBOL(invalidate_inode_pages);
+EXPORT_SYMBOL(fsync_dev);
+EXPORT_SYMBOL(permission);
+EXPORT_SYMBOL(inode_setattr);
+EXPORT_SYMBOL(inode_change_ok);
+EXPORT_SYMBOL(get_hardblocksize);
+EXPORT_SYMBOL(set_blocksize);
+EXPORT_SYMBOL(getblk);
+EXPORT_SYMBOL(bread);
+EXPORT_SYMBOL(breada);
+EXPORT_SYMBOL(__brelse);
+EXPORT_SYMBOL(__bforget);
+EXPORT_SYMBOL(ll_rw_block);
+EXPORT_SYMBOL(__wait_on_buffer);
+EXPORT_SYMBOL(mark_buffer_uptodate);
+EXPORT_SYMBOL(unlock_buffer);
+EXPORT_SYMBOL(dcache_lookup);
+EXPORT_SYMBOL(dcache_add);
+EXPORT_SYMBOL(add_blkdev_randomness);
+EXPORT_SYMBOL(generic_file_read);
+EXPORT_SYMBOL(generic_file_write);
+EXPORT_SYMBOL(generic_file_mmap);
+EXPORT_SYMBOL(generic_readpage);
+EXPORT_SYMBOL(file_lock_table);
+EXPORT_SYMBOL(posix_lock_file);
+EXPORT_SYMBOL(posix_test_lock);
+EXPORT_SYMBOL(posix_block_lock);
+EXPORT_SYMBOL(posix_unblock_lock);
+
+#if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
+EXPORT_SYMBOL(do_nfsservctl);
 #endif
- 
-	/* block device driver support */
-	X(block_read),
-	X(block_write),
-	X(block_fsync),
-	X(wait_for_request),
-	X(blksize_size),
-	X(hardsect_size),
-	X(blk_size),
-	X(blk_dev),
-	X(is_read_only),
-	X(set_device_ro),
-	X(bmap),
-	X(sync_dev),
-	X(get_blkfops),
-	X(blkdev_open),
-	X(blkdev_release),
-	X(gendisk_head),
-	X(resetup_one_dev),
-	X(unplug_device),
-#ifdef __i386__
-	X(drive_info),
+
+/* device registration */
+EXPORT_SYMBOL(register_chrdev);
+EXPORT_SYMBOL(unregister_chrdev);
+EXPORT_SYMBOL(register_blkdev);
+EXPORT_SYMBOL(unregister_blkdev);
+EXPORT_SYMBOL(tty_register_driver);
+EXPORT_SYMBOL(tty_unregister_driver);
+EXPORT_SYMBOL(tty_std_termios);
+
+/* block device driver support */
+EXPORT_SYMBOL(block_read);
+EXPORT_SYMBOL(block_write);
+EXPORT_SYMBOL(block_fsync);
+EXPORT_SYMBOL(wait_for_request);
+EXPORT_SYMBOL(blksize_size);
+EXPORT_SYMBOL(hardsect_size);
+EXPORT_SYMBOL(blk_size);
+EXPORT_SYMBOL(blk_dev);
+EXPORT_SYMBOL(is_read_only);
+EXPORT_SYMBOL(set_device_ro);
+EXPORT_SYMBOL(bmap);
+EXPORT_SYMBOL(sync_dev);
+EXPORT_SYMBOL(get_blkfops);
+EXPORT_SYMBOL(blkdev_open);
+EXPORT_SYMBOL(blkdev_release);
+EXPORT_SYMBOL(gendisk_head);
+EXPORT_SYMBOL(resetup_one_dev);
+EXPORT_SYMBOL(unplug_device);
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+EXPORT_SYMBOL(drive_info);
 #endif
 
-#ifdef CONFIG_SERIAL	
-	/* Module creation of serial units */
-	X(register_serial),
-	X(unregister_serial),
+/* tty routines */
+EXPORT_SYMBOL(tty_hangup);
+EXPORT_SYMBOL(tty_wait_until_sent);
+EXPORT_SYMBOL(tty_check_change);
+EXPORT_SYMBOL(tty_hung_up_p);
+EXPORT_SYMBOL(do_SAK);
+EXPORT_SYMBOL(console_print);
+
+/* filesystem registration */
+EXPORT_SYMBOL(register_filesystem);
+EXPORT_SYMBOL(unregister_filesystem);
+
+/* executable format registration */
+EXPORT_SYMBOL(register_binfmt);
+EXPORT_SYMBOL(unregister_binfmt);
+EXPORT_SYMBOL(search_binary_handler);
+EXPORT_SYMBOL(prepare_binprm);
+EXPORT_SYMBOL(remove_arg_zero);
+
+/* execution environment registration */
+EXPORT_SYMBOL(lookup_exec_domain);
+EXPORT_SYMBOL(register_exec_domain);
+EXPORT_SYMBOL(unregister_exec_domain);
+
+/* sysctl table registration */
+EXPORT_SYMBOL(register_sysctl_table);
+EXPORT_SYMBOL(unregister_sysctl_table);
+EXPORT_SYMBOL(sysctl_string);
+EXPORT_SYMBOL(sysctl_intvec);
+EXPORT_SYMBOL(proc_dostring);
+EXPORT_SYMBOL(proc_dointvec);
+EXPORT_SYMBOL(proc_dointvec_minmax);
+
+/* interrupt handling */
+EXPORT_SYMBOL(request_irq);
+EXPORT_SYMBOL(free_irq);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(probe_irq_on);
+EXPORT_SYMBOL(probe_irq_off);
+EXPORT_SYMBOL(bh_active);
+EXPORT_SYMBOL(bh_mask);
+EXPORT_SYMBOL(bh_mask_count);
+EXPORT_SYMBOL(bh_base);
+EXPORT_SYMBOL(add_timer);
+EXPORT_SYMBOL(del_timer);
+EXPORT_SYMBOL(tq_timer);
+EXPORT_SYMBOL(tq_immediate);
+EXPORT_SYMBOL(tq_scheduler);
+EXPORT_SYMBOL(timer_active);
+EXPORT_SYMBOL(timer_table);
+
+#ifdef __SMP__
+/* Various random spinlocks we want to export */
+EXPORT_SYMBOL(tqueue_lock);
+EXPORT_SYMBOL(waitqueue_lock);
 #endif
-	/* tty routines */
-	X(tty_hangup),
-	X(tty_wait_until_sent),
-	X(tty_check_change),
-	X(tty_hung_up_p),
-	X(do_SAK),
-	X(console_print),
-
-	/* filesystem registration */
-	X(register_filesystem),
-	X(unregister_filesystem),
-
-	/* executable format registration */
-	X(register_binfmt),
-	X(unregister_binfmt),
-	X(search_binary_handler),
-	X(prepare_binprm),
-	X(remove_arg_zero),
-
-	/* execution environment registration */
-	X(lookup_exec_domain),
-	X(register_exec_domain),
-	X(unregister_exec_domain),
-
-	/* sysctl table registration */
-	X(register_sysctl_table),
-	X(unregister_sysctl_table),
-	X(sysctl_string),
-	X(sysctl_intvec),
-	X(proc_dostring),
-	X(proc_dointvec),
-	X(proc_dointvec_minmax),
-
-	/* interrupt handling */
-	X(request_irq),
-	X(free_irq),
-	X(enable_irq),
-	X(disable_irq),
-	X(probe_irq_on),
-	X(probe_irq_off),
-	X(bh_active),
-	X(bh_mask),
-	X(bh_mask_count),
-	X(bh_base),
-	X(add_timer),
-	X(del_timer),
-	X(tq_timer),
-	X(tq_immediate),
-	X(tq_scheduler),
-	X(timer_active),
-	X(timer_table),
- 	X(intr_count),
-
-	/* autoirq from  drivers/net/auto_irq.c */
-	X(autoirq_setup),
-	X(autoirq_report),
-
-	/* dma handling */
-	X(request_dma),
-	X(free_dma),
+
+/* autoirq from  drivers/net/auto_irq.c */
+EXPORT_SYMBOL(autoirq_setup);
+EXPORT_SYMBOL(autoirq_report);
+
+/* dma handling */
+EXPORT_SYMBOL(request_dma);
+EXPORT_SYMBOL(free_dma);
 #ifdef HAVE_DISABLE_HLT
-	X(disable_hlt),
-	X(enable_hlt),
+EXPORT_SYMBOL(disable_hlt);
+EXPORT_SYMBOL(enable_hlt);
 #endif
 
-	/* IO port handling */
-	X(check_region),
-	X(request_region),
-	X(release_region),
-
-	/* process management */
-	X(wake_up),
-	X(wake_up_interruptible),
-	X(sleep_on),
-	X(interruptible_sleep_on),
-	X(schedule),
-	X(current_set),
-	X(jiffies),
-	X(xtime),
-	X(do_gettimeofday),
-	X(loops_per_sec),
-	X(need_resched),
-	X(kstat),
-	X(kill_proc),
-	X(kill_pg),
-	X(kill_sl),
-
-	/* misc */
-	X(panic),
-	X(printk),
-	X(sprintf),
-	X(vsprintf),
-	X(kdevname),
-	X(simple_strtoul),
-	X(system_utsname),
-	X(sys_call_table),
-	X(hard_reset_now),
-	X(_ctype),
-	X(secure_tcp_sequence_number),
-
-	/* Signal interfaces */
-	X(send_sig),
-
-	/* Program loader interfaces */
-	X(setup_arg_pages),
-	X(copy_strings),
-	X(do_execve),
-	X(flush_old_exec),
-	X(open_inode),
-	X(read_exec),
-
-	/* Miscellaneous access points */
-	X(si_meminfo),
-
-	/* Added to make file system as module */
-	X(set_writetime),
-	X(sys_tz),
-	X(__wait_on_super),
-	X(file_fsync),
-	X(clear_inode),
-	X(refile_buffer),
-	X(nr_async_pages),
-	X(___strtok),
-	X(init_fifo),
-	X(super_blocks),
-	X(reuse_list),
-	X(fifo_inode_operations),
-	X(chrdev_inode_operations),
-	X(blkdev_inode_operations),
-	X(read_ahead),
-	X(get_hash_table),
-	X(get_empty_inode),
-	X(insert_inode_hash),
-	X(event),
-	X(__down),
-	X(__up),
-	X(securelevel),
+/* IO port handling */
+EXPORT_SYMBOL(check_region);
+EXPORT_SYMBOL(request_region);
+EXPORT_SYMBOL(release_region);
+
+/* process management */
+EXPORT_SYMBOL(wake_up);
+EXPORT_SYMBOL(wake_up_interruptible);
+EXPORT_SYMBOL(sleep_on);
+EXPORT_SYMBOL(interruptible_sleep_on);
+EXPORT_SYMBOL(schedule);
+EXPORT_SYMBOL(current_set);
+EXPORT_SYMBOL(jiffies);
+EXPORT_SYMBOL(xtime);
+EXPORT_SYMBOL(do_gettimeofday);
+EXPORT_SYMBOL(loops_per_sec);
+EXPORT_SYMBOL(need_resched);
+EXPORT_SYMBOL(kstat);
+EXPORT_SYMBOL(kill_proc);
+EXPORT_SYMBOL(kill_pg);
+EXPORT_SYMBOL(kill_sl);
+
+/* misc */
+EXPORT_SYMBOL(panic);
+EXPORT_SYMBOL(printk);
+EXPORT_SYMBOL(sprintf);
+EXPORT_SYMBOL(vsprintf);
+EXPORT_SYMBOL(kdevname);
+EXPORT_SYMBOL(simple_strtoul);
+EXPORT_SYMBOL(system_utsname);
+EXPORT_SYMBOL(sys_call_table);
+EXPORT_SYMBOL(machine_restart);
+EXPORT_SYMBOL(machine_halt);
+EXPORT_SYMBOL(machine_power_off);
+EXPORT_SYMBOL(register_reboot_notifier);
+EXPORT_SYMBOL(unregister_reboot_notifier);
+EXPORT_SYMBOL(_ctype);
+EXPORT_SYMBOL(secure_tcp_sequence_number);
+EXPORT_SYMBOL(get_random_bytes);
+
+/* Signal interfaces */
+EXPORT_SYMBOL(send_sig);
+
+/* Program loader interfaces */
+EXPORT_SYMBOL(setup_arg_pages);
+EXPORT_SYMBOL(copy_strings);
+EXPORT_SYMBOL(do_execve);
+EXPORT_SYMBOL(flush_old_exec);
+EXPORT_SYMBOL(open_inode);
+EXPORT_SYMBOL(read_exec);
+
+/* Miscellaneous access points */
+EXPORT_SYMBOL(si_meminfo);
+
+/* Added to make file system as module */
+EXPORT_SYMBOL(set_writetime);
+EXPORT_SYMBOL(sys_tz);
+EXPORT_SYMBOL(__wait_on_super);
+EXPORT_SYMBOL(file_fsync);
+EXPORT_SYMBOL(clear_inode);
+EXPORT_SYMBOL(refile_buffer);
+EXPORT_SYMBOL(nr_async_pages);
+EXPORT_SYMBOL(___strtok);
+EXPORT_SYMBOL(init_fifo);
+EXPORT_SYMBOL(super_blocks);
+EXPORT_SYMBOL(fifo_inode_operations);
+EXPORT_SYMBOL(chrdev_inode_operations);
+EXPORT_SYMBOL(blkdev_inode_operations);
+EXPORT_SYMBOL(read_ahead);
+EXPORT_SYMBOL(get_hash_table);
+EXPORT_SYMBOL(get_empty_inode);
+EXPORT_SYMBOL(insert_inode_hash);
+EXPORT_SYMBOL(event);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(securelevel);
+
 /* all busmice */
-	X(add_mouse_randomness),
-	X(fasync_helper),
+EXPORT_SYMBOL(add_mouse_randomness);
+EXPORT_SYMBOL(fasync_helper);
+
 /* psaux mouse */
-	X(aux_device_present),
-	X(kbd_read_mask),
+EXPORT_SYMBOL(aux_device_present);
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(kbd_read_mask);
+#endif
 
 #ifdef CONFIG_BLK_DEV_MD
-	X(disk_name),	/* for md.c */
+EXPORT_SYMBOL(disk_name);	/* for md.c */
 #endif
  	
-	/* binfmt_aout */
-	X(get_write_access),
-	X(put_write_access),
-
-	/********************************************************
-	 * Do not add anything below this line,
-	 * as the stacked modules depend on this!
-	 */
-#include <linux/symtab_end.h>
-};
-
-/*
-int symbol_table_size = sizeof (symbol_table) / sizeof (symbol_table[0]);
-*/
+/* binfmt_aout */
+EXPORT_SYMBOL(get_write_access);
+EXPORT_SYMBOL(put_write_access);
diff --git a/kernel/module.c b/kernel/module.c
index 09cee93b7..885539b5c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4,789 +4,963 @@
 #include <linux/string.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/malloc.h>
-#include <linux/vmalloc.h>
 #include <linux/config.h>
-
 #include <asm/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+
 /*
  * Originally by Anonymous (as far as I know...)
  * Linux version by Bas Laarhoven <bas@vimec.nl>
  * 0.99.14 version by Jon Tombs <jon@gtex02.us.es>,
- *
  * Heavily modified by Bjorn Ekwall <bj0rn@blox.se> May 1994 (C)
- * This source is covered by the GNU GPL, the same as all kernel sources.
- *
- * Features:
- *	- Supports stacked modules (removable only of there are no dependents).
- *	- Supports table of symbols defined by the modules.
- *	- Supports /proc/ksyms, showing value, name and owner of all
- *	  the symbols defined by all modules (in stack order).
- *	- Added module dependencies information into /proc/modules
- *	- Supports redefines of all symbols, for streams-like behaviour.
- *	- Compatible with older versions of insmod.
- *
- * New addition in December 1994: (Bjorn Ekwall, idea from Jacques Gelinas)
- *	- Externally callable function:
- *
- *		"int register_symtab(struct symbol_table *)"
+ * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
  *
- *	  This function can be called from within the kernel,
- *	  and ALSO from loadable modules.
- *	  The goal is to assist in modularizing the kernel even more,
- *	  and finally: reducing the number of entries in ksyms.c
- *	  since every subsystem should now be able to decide and
- *	  control exactly what symbols it wants to export, locally!
- *
- * On 1-Aug-95:  <Matti.Aarnio@utu.fi>  altered code to use same style as
- *		 do  /proc/net/XXX  "files".  Namely allow more than 4kB
- *		 (or what the block size is) output.
- *
- *	- Use dummy syscall functions for users who disable all
- *	  module support. Similar to kernel/sys.c (Paul Gortmaker)
+ * This source is covered by the GNU GPL, the same as all kernel sources.
  */
 
 #ifdef CONFIG_MODULES		/* a *big* #ifdef block... */
 
-static struct module kernel_module;
-struct module *module_list = &kernel_module;
+extern struct module_symbol __start___ksymtab[];
+extern struct module_symbol __stop___ksymtab[];
 
-static int freeing_modules; /* true if some modules are marked for deletion */
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
 
-static struct module *find_module( const char *name);
-static int free_modules( void);
+static struct module kernel_module =
+{
+	sizeof(struct module),	/* size_of_struct */
+	NULL,			/* next */
+	"",			/* name */
+	0,			/* size */
+	1,			/* usecount */
+	MOD_RUNNING,		/* flags */
+	0,			/* nsyms -- to filled in in init_modules */
+	0,			/* ndeps */
+	__start___ksymtab,	/* syms */
+	NULL,			/* deps */
+	NULL,			/* refs */
+	NULL,			/* init */
+	NULL,			/* cleanup */
+	__start___ex_table,	/* ex_table_start */
+	__stop___ex_table,	/* ex_table_end */
+	/* Rest are NULL */
+};
+
+struct module *module_list = &kernel_module;
+
+static long get_mod_name(const char *user_name, char **buf);
+static void put_mod_name(char *buf);
+static struct module *find_module(const char *name);
+static void free_module(struct module *);
 
-extern struct symbol_table symbol_table; /* in kernel/ksyms.c */
 
 /*
  * Called at boot time
  */
-void init_modules(void) {
-	struct internal_symbol *sym;
-	int i;
 
-	for (i = 0, sym = symbol_table.symbol; sym->name; ++sym, ++i)
-		;
-	symbol_table.n_symbols = i;
+void init_modules(void)
+{
+	kernel_module.nsyms = __stop___ksymtab - __start___ksymtab;
 
-	kernel_module.symtab = &symbol_table;
-	kernel_module.state = MOD_RUNNING; /* Hah! */
-	kernel_module.name = "";
+#ifdef __alpha__
+	__asm__("stq $29,%0" : "=m"(kernel_module.gp));
+#endif
 }
 
-
 /*
  * Copy the name of a module from user space.
  */
-inline int
-get_mod_name(char *user_name, char *buf)
+
+static inline long
+get_mod_name(const char *user_name, char **buf)
 {
-	/* Should return -EBIG instead of -EFAULT when the name
-       is too long, but that we couldn't detect real faults then.
-       Maybe strncpy_from_user() should return -EBIG, when
-       the source string is too long. */
-	return strncpy_from_user(buf, user_name, MOD_MAX_NAME); 
+	unsigned long page;
+	long retval;
+
+	if ((unsigned long)user_name >= TASK_SIZE)
+		return -EFAULT;
+
+	page = __get_free_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	retval = strncpy_from_user((char *)page, user_name, PAGE_SIZE);
+	if (retval > 0) {
+		if (retval < PAGE_SIZE) {
+			*buf = (char *)page;
+			return retval;
+		}
+		retval = -ENAMETOOLONG;
+	} else if (!retval)
+		retval = -EINVAL;
+
+	free_page(page);
+	return retval;
 }
 
+static inline void
+put_mod_name(char *buf)
+{
+	free_page((unsigned long)buf);
+}
 
 /*
  * Allocate space for a module.
  */
+
 asmlinkage unsigned long
-sys_create_module(char *module_name, unsigned long size)
+sys_create_module(const char *name_user, size_t size)
 {
-	struct module *mp;
-	void* addr;
-	int error;
-	int npages;
-	int sspace = sizeof(struct module) + MOD_MAX_NAME;
-	char name[MOD_MAX_NAME];
-
-	if (!suser())
-		return -EPERM;
-	if (module_name == NULL || size == 0)
-		return -EINVAL;
-	if ((error = get_mod_name(module_name, name)) < 0)
-		return error;
+	char *name;
+	long namelen, error;
+	struct module *mod;
+
+	lock_kernel();
+	if (!suser()) {
+		error = -EPERM;
+		goto err0;
+	}
+	if ((namelen = get_mod_name(name_user, &name)) < 0) {
+		error = namelen;
+		goto err0;
+	}
+	if (size < sizeof(struct module)+namelen) {
+		error = -EINVAL;
+		goto err1;
+	}
 	if (find_module(name) != NULL) {
-		return -EEXIST;
+		error = -EEXIST;
+		goto err1;
 	}
-
-	if ((mp = (struct module*) kmalloc(sspace, GFP_KERNEL)) == NULL) {
-		return -ENOMEM;
+	if ((mod = (struct module *)vmalloc(size)) == NULL) {
+		error = -ENOMEM;
+		goto err1;
 	}
-	strcpy((char *)(mp + 1), name); /* why not? */
 
-	npages = (size + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE;
-	if ((addr = vmalloc(npages * PAGE_SIZE)) == 0) {
-		kfree_s(mp, sspace);
-		return -ENOMEM;
-	}
+	memset(mod, 0, sizeof(*mod));
+	mod->size_of_struct = sizeof(*mod);
+	mod->next = module_list;
+	mod->name = (char *)(mod + 1);
+	mod->size = size;
+	memcpy((char*)(mod+1), name, namelen+1);
 
-	mp->next = module_list;
-	mp->ref = NULL;
-	mp->symtab = NULL;
-	mp->name = (char *)(mp + 1);
-	mp->size = npages;
-	mp->addr = addr;
-	mp->state = MOD_UNINITIALIZED;
-	mp->cleanup = NULL;
-	mp->exceptinfo.start = NULL;
-	mp->exceptinfo.stop = NULL;
+	put_mod_name(name);
 
-	* (long *) addr = 0;	/* set use count to zero */
-	module_list = mp;	/* link it in */
+	module_list = mod;	/* link it in */
 
-	pr_debug("module `%s' (%lu pages @ 0x%08lx) created\n",
-		mp->name, (unsigned long) mp->size, (unsigned long) mp->addr);
-	return (unsigned long) addr;
+	error = (long) mod;
+	goto err0;
+err1:
+	put_mod_name(name);
+err0:
+	unlock_kernel();
+	return error;
 }
 
-
 /*
  * Initialize a module.
  */
+
 asmlinkage int
-sys_init_module(char *module_name, char *code, unsigned codesize,
-		struct mod_routines *routines,
-		struct symbol_table *symtab)
+sys_init_module(const char *name_user, struct module *mod_user)
 {
-	struct module *mp;
-	struct symbol_table *newtab;
-	char name[MOD_MAX_NAME];
-	int error;
-	struct mod_routines rt;
+	struct module mod_tmp, *mod;
+	char *name, *n_name;
+	long namelen, n_namelen, i, error = -EPERM;
+	unsigned long mod_user_size;
+	struct module_ref *dep;
 
+	lock_kernel();
 	if (!suser())
-		return -EPERM;
+		goto err0;
+	if ((namelen = get_mod_name(name_user, &name)) < 0) {
+		error = namelen;
+		goto err0;
+	}
+	if ((mod = find_module(name)) == NULL) {
+		error = -ENOENT;
+		goto err1;
+	}
+
+	/* Check module header size.  We allow a bit of slop over the 
+	   size we are familiar with to cope with a version of insmod
+	   for a newer kernel.  But don't over do it. */
+	if ((error = get_user(mod_user_size, &mod_user->size_of_struct)) != 0)
+		goto err1;
+	if (mod_user_size < (unsigned long)&((struct module *)0L)->persist_start
+	    || mod_user_size > sizeof(struct module) + 16*sizeof(void*)) {
+		printk(KERN_ERR "init_module: Invalid module header size.\n"
+		       KERN_ERR "A new version of the modutils is likely "
+				"needed.\n");
+		error = -EINVAL;
+		goto err1;
+	}
+
+	/* Hold the current contents while we play with the user's idea
+	   of righteousness.  */
+	mod_tmp = *mod;
+
+	error = copy_from_user(mod, mod_user, sizeof(struct module));
+	if (error) {
+		error = -EFAULT;
+		goto err2;
+	}
+
+	/* Sanity check the size of the module.  */
+	error = -EINVAL;
+
+	if (mod->size > mod_tmp.size) {
+		printk(KERN_ERR "init_module: Size of initialized module "
+				"exceeds size of created module.\n");
+		goto err2;
+	}
+
+	/* Make sure all interesting pointers are sane.  */
 
-#ifdef __i386__
-	/* A little bit of protection... we "know" where the user stack is... */
+#define bound(p, n, m)  ((unsigned long)(p) >= (unsigned long)(m+1) &&  \
+	         (unsigned long)((p)+(n)) <= (unsigned long)(m) + (m)->size)
 
-	if (symtab && ((unsigned long)symtab > 0xb0000000)) {
-		printk(KERN_WARNING "warning: you are using an old insmod, no symbols will be inserted!\n");
-		symtab = NULL;
+	if (!bound(mod->name, namelen, mod)) {
+		printk(KERN_ERR "init_module: mod->name out of bounds.\n");
+		goto err2;
+	}
+	if (mod->nsyms && !bound(mod->syms, mod->nsyms, mod)) {
+		printk(KERN_ERR "init_module: mod->syms out of bounds.\n");
+		goto err2;
+	}
+	if (mod->ndeps && !bound(mod->deps, mod->ndeps, mod)) {
+		printk(KERN_ERR "init_module: mod->deps out of bounds.\n");
+		goto err2;
+	}
+	if (mod->init && !bound(mod->init, 0, mod)) {
+		printk(KERN_ERR "init_module: mod->init out of bounds.\n");
+		goto err2;
+	}
+	if (mod->cleanup && !bound(mod->cleanup, 0, mod)) {
+		printk(KERN_ERR "init_module: mod->cleanup out of bounds.\n");
+		goto err2;
+	}
+	if (mod->ex_table_start > mod->ex_table_end
+	    || (mod->ex_table_start &&
+		!((unsigned long)mod->ex_table_start >= (unsigned long)(mod+1)
+		  && ((unsigned long)mod->ex_table_end
+		      < (unsigned long)mod + mod->size)))
+	    || (((unsigned long)mod->ex_table_start
+		 - (unsigned long)mod->ex_table_end)
+		% sizeof(struct exception_table_entry))) {
+		printk(KERN_ERR "init_module: mod->ex_table_* invalid.\n");
+		goto err2;
+	}
+	if (mod->flags & ~MOD_AUTOCLEAN) {
+		printk(KERN_ERR "init_module: mod->flags invalid.\n");
+		goto err2;
+	}
+#ifdef __alpha__
+	if (!bound(mod->gp - 0x8000, 0, mod)) {
+		printk(KERN_ERR "init_module: mod->gp out of bounds.\n");
+		goto err2;
 	}
 #endif
-	if ((error = get_mod_name(module_name, name)) < 0)
-		return error;
-	pr_debug("initializing module `%s', %d (0x%x) bytes\n",
-		name, codesize, codesize);
-	if (copy_from_user(&rt, routines, sizeof rt))
-			return -EFAULT;
-	if ((mp = find_module(name)) == NULL)
-		return -ENOENT;
-	if (codesize & MOD_AUTOCLEAN) {
-		/*
-		 * set autoclean marker from codesize...
-		 * set usage count to "zero"
-		 */
-		codesize &= ~MOD_AUTOCLEAN;
-		GET_USE_COUNT(mp) = MOD_AUTOCLEAN;
-	}
-	if ((codesize + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE > mp->size)
-		return -EINVAL;
-	if (copy_from_user((char *)mp->addr + sizeof (long), code, codesize))
-			return -EFAULT;
-	memset((char *)mp->addr + sizeof (long) + codesize, 0,
-		mp->size * PAGE_SIZE - (codesize + sizeof (long)));
-	pr_debug("module init entry = 0x%08lx, cleanup entry = 0x%08lx\n",
-		(unsigned long) rt.init, (unsigned long) rt.cleanup);
-	if (rt.signature != MODULE_2_1_7_SIG){
-		printk ("Older insmod used with kernel 2.1.7 +\n");
-		return -EINVAL;
+	if (mod_member_present(mod, can_unload)
+	    && mod->can_unload && !bound(mod->can_unload, 0, mod)) {
+		printk(KERN_ERR "init_module: mod->can_unload out of bounds.\n");
+		goto err2;
 	}
-	mp->cleanup = rt.cleanup;
-	mp->exceptinfo = rt.exceptinfo;
-
-	/* update kernel symbol table */
-	if (symtab) { /* symtab == NULL means no new entries to handle */
-		struct internal_symbol *sym;
-		struct module_ref *ref;
-		int size;
-		int i;
-		int legal_start;
-
-		error = get_user(size, &symtab->size);
-		if (error)
-				return error;
-		if ((newtab = (struct symbol_table*) kmalloc(size, GFP_KERNEL)) == NULL) {
-			return -ENOMEM;
-		}
 
-		if (copy_from_user((char *)(newtab), symtab, size)) {
-				kfree_s(newtab, size);
-				return -EFAULT;
-		}
+#undef bound
 
-		/* sanity check */
-		legal_start = sizeof(struct symbol_table) +
-			newtab->n_symbols * sizeof(struct internal_symbol) +
-			newtab->n_refs * sizeof(struct module_ref);
+	/* Check that the user isn't doing something silly with the name.  */
 
-		if ((newtab->n_symbols < 0) || (newtab->n_refs < 0) || (legal_start > size)) {
-			printk(KERN_WARNING "Rejecting illegal symbol table (n_symbols=%d,n_refs=%d)\n",
-			       newtab->n_symbols, newtab->n_refs);
-			kfree_s(newtab, size);
-			return -EINVAL;
-		}
+	if ((n_namelen = get_mod_name(mod->name - (unsigned long)mod
+				      + (unsigned long)mod_user,
+				      &n_name)) < 0) {
+		error = n_namelen;
+		goto err2;
+	}
+	if (namelen != n_namelen || strcmp(n_name, mod_tmp.name) != 0) {
+		printk(KERN_ERR "init_module: changed module name to "
+				"`%s' from `%s'\n",
+		       n_name, mod_tmp.name);
+		goto err3;
+	}
 
-		/* relocate name pointers, index referred from start of table */
-		for (sym = &(newtab->symbol[0]), i = 0; i < newtab->n_symbols; ++sym, ++i) {
-			if ((unsigned long)sym->name < legal_start || size <= (unsigned long)sym->name) {
-				printk(KERN_WARNING "Rejecting illegal symbol table\n");
-				kfree_s(newtab, size);
-				return -EINVAL;
-			}
-			/* else */
-			sym->name += (long)newtab;
-		}
-		mp->symtab = newtab;
-
-		/* Update module references.
-		 * On entry, from "insmod", ref->module points to
-		 * the referenced module!
-		 * Now it will point to the current module instead!
-		 * The ref structure becomes the first link in the linked
-		 * list of references to the referenced module.
-		 * Also, "sym" from above, points to the first ref entry!!!
-		 */
-		for (ref = (struct module_ref *)sym, i = 0;
-			i < newtab->n_refs; ++ref, ++i) {
-
-			/* Check for valid reference */
-			struct module *link = module_list;
-			while (link && (ref->module != link))
-				link = link->next;
-
-			if (link == (struct module *)0) {
-				printk(KERN_WARNING "Non-module reference! Rejected!\n");
-				return -EINVAL;
-			}
+	/* Ok, that's about all the sanity we can stomach; copy the rest.  */
 
-			ref->next = ref->module->ref;
-			ref->module->ref = ref;
-			ref->module = mp;
-		}
+	if (copy_from_user(mod+1, mod_user+1, mod->size-sizeof(*mod))) {
+		error = -EFAULT;
+		goto err3;
 	}
 
-	GET_USE_COUNT(mp) += 1;
-	if ((*rt.init)() != 0) {
-		GET_USE_COUNT(mp) = 0;
-		return -EBUSY;
+	/* Update module references.  */
+	mod->next = mod_tmp.next;
+	mod->refs = NULL;
+	for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) {
+		struct module *o, *d = dep->dep;
+
+		/* Make sure the indicated dependancies are really modules.  */
+		if (d == mod) {
+			printk(KERN_ERR "init_module: self-referential "
+					"dependancy in mod->deps.\n");
+			goto err3;
+		}
+
+		for (o = module_list; o != &kernel_module; o = o->next)
+			if (o == d) goto found_dep;
+
+		printk(KERN_ERR "init_module: found dependancy that is "
+				"(no longer?) a module.\n");
+		goto err3;
+		
+	found_dep:
+		dep->ref = mod;
+		dep->next_ref = d->refs;
+		d->refs = dep;
+		/* Being referenced by a dependant module counts as a 
+		   use as far as kerneld is concerned.  */
+		d->flags |= MOD_USED_ONCE;
 	}
-	GET_USE_COUNT(mp) -= 1;
-	mp->state = MOD_RUNNING;
 
-	return 0;
+	/* Free our temporary memory.  */
+	put_mod_name(n_name);
+	put_mod_name(name);
+
+	/* Initialize the module.  */
+	mod->usecount = 1;
+	if (mod->init && mod->init() != 0) {
+		mod->usecount = 0;
+		error = -EBUSY;
+		goto err0;
+	}
+	mod->usecount--;
+
+	/* And set it running.  */
+	mod->flags |= MOD_RUNNING;
+	error = 0;
+	goto err0;
+
+err3:
+	put_mod_name(n_name);
+err2:
+	*mod = mod_tmp;
+err1:
+	put_mod_name(name);
+err0:
+	unlock_kernel();
+	return error;
 }
 
 asmlinkage int
-sys_delete_module(char *module_name)
+sys_delete_module(const char *name_user)
 {
-	struct module *mp;
-	char name[MOD_MAX_NAME];
-	int error;
+	struct module *mod, *next;
+	char *name;
+	long error = -EPERM;
 
+	lock_kernel();
 	if (!suser())
-		return -EPERM;
-	/* else */
-	if (module_name != NULL) {
-		if ((error = get_mod_name(module_name, name)) < 0)
-			return error;
-		if ((mp = find_module(name)) == NULL)
-			return -ENOENT;
-		if ((mp->ref != NULL) ||
-		    ((GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED)) != 0))
-			return -EBUSY;
-		GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED);
-		if (mp->state == MOD_RUNNING)
-			(*mp->cleanup)();
-		mp->state = MOD_DELETED;
-		free_modules();
-	}
-	/* for automatic reaping */
-	else {
-		struct module *mp_next;
-		for (mp = module_list; mp != &kernel_module; mp = mp_next) {
-			mp_next = mp->next;
-			if ((mp->ref == NULL) && (mp->state == MOD_RUNNING) &&
-			    ((GET_USE_COUNT(mp) & ~MOD_VISITED) == MOD_AUTOCLEAN)) {
-			    	if ((GET_USE_COUNT(mp) & MOD_VISITED)) {
-					/* Don't reap until one "cycle" after last _use_ */
-			   		GET_USE_COUNT(mp) &= ~MOD_VISITED;
-				}
-				else {
-					GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED);
-					(*mp->cleanup)();
-					mp->state = MOD_DELETED;
-					free_modules();
-				}
-			}
+		goto out;
+
+	if (name_user) {
+		if ((error = get_mod_name(name_user, &name)) < 0)
+			goto out;
+		if (error == 0) {
+			error = -EINVAL;
+			put_mod_name(name);
+			goto out;
+		}
+		error = -ENOENT;
+		if ((mod = find_module(name)) == NULL) {
+			put_mod_name(name);
+			goto out;
+		}
+		put_mod_name(name);
+		error = -EBUSY;
+ 		if (mod->refs != NULL || __MOD_IN_USE(mod))
+			goto out;
+
+		free_module(mod);
+		error = 0;
+		goto out;
+	}
+
+	/* Do automatic reaping */
+	for (mod = module_list; mod != &kernel_module; mod = next) {
+		next = mod->next;
+		if (mod->refs == NULL &&
+		    ((mod->flags
+		      & (MOD_AUTOCLEAN|MOD_RUNNING|MOD_DELETED|MOD_USED_ONCE))
+		     == (MOD_AUTOCLEAN|MOD_RUNNING|MOD_USED_ONCE)) &&
+		    !__MOD_IN_USE(mod)) {
+			if (mod->flags & MOD_VISITED)
+				mod->flags &= ~MOD_VISITED;
+			else
+				free_module(mod);
 		}
 	}
-	return 0;
+	error = 0;
+out:
+	unlock_kernel();
+	return error;
 }
 
+/* Query various bits about modules.  */
 
-/*
- * Copy the kernel symbol table to user space.  If the argument is null,
- * just return the size of the table.
- *
- * Note that the transient module symbols are copied _first_,
- * in lifo order!!!
- *
- * The symbols to "insmod" are according to the "old" format: struct kernel_sym,
- * which is actually quite handy for this purpose.
- * Note that insmod inserts a struct symbol_table later on...
- * (as that format is quite handy for the kernel...)
- *
- * For every module, the first (pseudo)symbol copied is the module name
- * and the address of the module struct.
- * This lets "insmod" keep track of references, and build the array of
- * struct module_refs in the symbol table.
- * The format of the module name is "#module", so that "insmod" can easily
- * notice when a module name comes along. Also, this will make it possible
- * to use old versions of "insmod", albeit with reduced functionality...
- * The "kernel" module has an empty name.
- */
-asmlinkage int
-sys_get_kernel_syms(struct kernel_sym *table)
+static int
+qm_modules(char *buf, size_t bufsize, size_t *ret)
 {
-	struct internal_symbol *from;
-	struct kernel_sym isym;
-	struct kernel_sym *to;
-	struct module *mp = module_list;
-	int i;
-	int nmodsyms = 0;
-	int err;
+	struct module *mod;
+	size_t nmod, space, len;
 
-	for (mp = module_list; mp; mp = mp->next) {
-		if (mp->symtab && mp->symtab->n_symbols) {
-			/* include the count for the module name! */
-			nmodsyms += mp->symtab->n_symbols + 1;
-		}
-		else
-			/* include the count for the module name! */
-			nmodsyms += 1; /* return modules without symbols too */
-	}
-
-	if (table != NULL) {
-		to = table;
-
-		/* copy all module symbols first (always LIFO order) */
-		for (mp = module_list; mp; mp = mp->next) {
-			if (mp->state == MOD_RUNNING) {
-				/* magic: write module info as a pseudo symbol */
-				isym.value = (unsigned long)mp;
-				sprintf(isym.name, "#%s", mp->name);
-				err = copy_to_user(to, &isym, sizeof isym);
-				if (err)
-						return -EFAULT;
-				++to;
-
-				if (mp->symtab != NULL) {
-					for (i = mp->symtab->n_symbols,
-						from = mp->symtab->symbol;
-						i > 0; --i, ++from, ++to) {
-
-						isym.value = (unsigned long)from->addr;
-						strncpy(isym.name, from->name, sizeof isym.name);
-						err = copy_to_user(to, &isym, sizeof isym);
-						if (err)
-								return -EFAULT;
-					}
-				}
-			}
-		}
+	nmod = space = 0;
+
+	for (mod=module_list; mod != &kernel_module; mod=mod->next, ++nmod) {
+		len = strlen(mod->name)+1;
+		if (len > bufsize)
+			goto calc_space_needed;
+		if (copy_to_user(buf, mod->name, len))
+			return -EFAULT;
+		buf += len;
+		bufsize -= len;
+		space += len;
 	}
 
-	return nmodsyms;
+	if (put_user(nmod, ret))
+		return -EFAULT;
+	else
+		return 0;
+
+calc_space_needed:
+	space += len;
+	while ((mod = mod->next) != &kernel_module)
+		space += strlen(mod->name)+1;
+
+	if (put_user(space, ret))
+		return -EFAULT;
+	else
+		return -ENOSPC;
 }
 
-/*
- * Look for a module by name, ignoring modules marked for deletion.
- */
-struct module *
-find_module( const char *name)
+static int
+qm_deps(struct module *mod, char *buf, size_t bufsize, size_t *ret)
 {
-	struct module *mp;
+	size_t i, space, len;
 
-	for (mp = module_list ; mp ; mp = mp->next) {
-		if (mp->state == MOD_DELETED)
-			continue;
-		if (!strcmp(mp->name, name))
-			break;
+	if (mod == &kernel_module)
+		return -EINVAL;
+	if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING)
+		if (put_user(0, ret))
+			return -EFAULT;
+		else
+			return 0;
+
+	space = 0;
+	for (i = 0; i < mod->ndeps; ++i) {
+		const char *dep_name = mod->deps[i].dep->name;
+
+		len = strlen(dep_name)+1;
+		if (len > bufsize)
+			goto calc_space_needed;
+		if (copy_to_user(buf, dep_name, len))
+			return -EFAULT;
+		buf += len;
+		bufsize -= len;
+		space += len;
 	}
-	return mp;
+
+	if (put_user(i, ret))
+		return -EFAULT;
+	else
+		return 0;
+
+calc_space_needed:
+	space += len;
+	while (++i < mod->ndeps)
+		space += strlen(mod->deps[i].dep->name)+1;
+
+	if (put_user(space, ret))
+		return -EFAULT;
+	else
+		return -ENOSPC;
 }
 
-static void
-drop_refs(struct module *mp)
+static int
+qm_refs(struct module *mod, char *buf, size_t bufsize, size_t *ret)
 {
-	struct module *step;
-	struct module_ref *prev;
+	size_t nrefs, space, len;
 	struct module_ref *ref;
 
-	for (step = module_list; step; step = step->next) {
-		for (prev = ref = step->ref; ref; ref = prev->next) {
-			if (ref->module == mp) {
-				if (ref == step->ref)
-					step->ref = ref->next;
-				else
-					prev->next = ref->next;
-				break; /* every module only references once! */
-			}
-			else
-				prev = ref;
-		}
+	if (mod == &kernel_module)
+		return -EINVAL;
+	if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING)
+		if (put_user(0, ret))
+			return -EFAULT;
+		else
+			return 0;
+
+	space = 0;
+	for (nrefs = 0, ref = mod->refs; ref ; ++nrefs, ref = ref->next_ref) {
+		const char *ref_name = ref->ref->name;
+
+		len = strlen(ref_name)+1;
+		if (len > bufsize)
+			goto calc_space_needed;
+		if (copy_to_user(buf, ref_name, len))
+			return -EFAULT;
+		buf += len;
+		bufsize -= len;
+		space += len;
 	}
+
+	if (put_user(nrefs, ret))
+		return -EFAULT;
+	else
+		return 0;
+
+calc_space_needed:
+	space += len;
+	while ((ref = ref->next_ref) != NULL)
+		space += strlen(ref->ref->name)+1;
+
+	if (put_user(space, ret))
+		return -EFAULT;
+	else
+		return -ENOSPC;
 }
 
-/*
- * Try to free modules which have been marked for deletion.  Returns nonzero
- * if a module was actually freed.
- */
-int
-free_modules( void)
+static int
+qm_symbols(struct module *mod, char *buf, size_t bufsize, size_t *ret)
 {
-	struct module *mp;
-	struct module **mpp;
-	int did_deletion;
-
-	did_deletion = 0;
-	freeing_modules = 0;
-	mpp = &module_list;
-	while ((mp = *mpp) != NULL) {
-		if (mp->state != MOD_DELETED) {
-			mpp = &mp->next;
-		} else {
-			if ((GET_USE_COUNT(mp) != 0) || (mp->ref != NULL)) {
-				freeing_modules = 1;
-				mpp = &mp->next;
-			} else {	/* delete it */
-				*mpp = mp->next;
-				if (mp->symtab) {
-					if (mp->symtab->n_refs)
-						drop_refs(mp);
-					if (mp->symtab->size)
-						kfree_s(mp->symtab, mp->symtab->size);
-				}
-				vfree(mp->addr);
-				kfree_s(mp, sizeof(struct module) + MOD_MAX_NAME);
-				did_deletion = 1;
-			}
-		}
+	size_t i, space, len;
+	struct module_symbol *s;
+	char *strings;
+	unsigned long *vals;
+
+	if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING)
+		if (put_user(0, ret))
+			return -EFAULT;
+		else
+			return 0;
+
+	space = mod->nsyms * 2*sizeof(void *);
+
+	i = len = 0;
+	s = mod->syms;
+
+	if (space > bufsize)
+		goto calc_space_needed;
+
+	if (!access_ok(VERIFY_WRITE, buf, space))
+		return -EFAULT;
+
+	bufsize -= space;
+	vals = (unsigned long *)buf;
+	strings = buf+space;
+
+	for (; i < mod->nsyms ; ++i, ++s, vals += 2) {
+		len = strlen(s->name)+1;
+		if (len > bufsize)
+			goto calc_space_needed;
+
+		if (copy_to_user(strings, s->name, len)
+		    || __put_user(s->value, vals+0)
+		    || __put_user(space, vals+1))
+			return -EFAULT;
+
+		strings += len;
+		bufsize -= len;
+		space += len;
 	}
-	return did_deletion;
+
+	if (put_user(i, ret))
+		return -EFAULT;
+	else
+		return 0;
+
+calc_space_needed:
+	for (; i < mod->nsyms; ++i, ++s)
+		space += strlen(s->name)+1;
+
+	if (put_user(space, ret))
+		return -EFAULT;
+	else
+		return -ENOSPC;
 }
 
+static int
+qm_info(struct module *mod, char *buf, size_t bufsize, size_t *ret)
+{
+	int error = 0;
 
-/*
- * Called by the /proc file system to return a current list of modules.
- */
-int get_module_list(char *buf)
+	if (mod == &kernel_module)
+		return -EINVAL;
+
+	if (sizeof(struct module_info) <= bufsize) {
+		struct module_info info;
+		info.addr = (unsigned long)mod;
+		info.size = mod->size;
+		info.flags = mod->flags;
+		info.usecount = (mod_member_present(mod, can_unload)
+				 && mod->can_unload ? -1 : mod->usecount);
+
+		if (copy_to_user(buf, &info, sizeof(struct module_info)))
+			return -EFAULT;
+	} else
+		error = -ENOSPC;
+
+	if (put_user(sizeof(struct module_info), ret))
+		return -EFAULT;
+
+	return error;
+}
+
+asmlinkage int
+sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
+		 size_t *ret)
 {
-	char *p;
-	const char *q;
-	int i;
-	struct module *mp;
-	struct module_ref *ref;
-	char size[32];
-
-	p = buf;
-	/* Do not show the kernel pseudo module */
-	for (mp = module_list ; mp && mp->next; mp = mp->next) {
-		if (p - buf > 4096 - 100)
-			break;			/* avoid overflowing buffer */
-		q = mp->name;
-		if (*q == '\0' && mp->size == 0 && mp->ref == NULL)
-			continue; /* don't list modules for kernel syms */
-		i = 20;
-		while (*q) {
-			*p++ = *q++;
-			i--;
-		}
-		sprintf(size, "%d", mp->size);
-		i -= strlen(size);
-		if (i <= 0)
-			i = 1;
-		while (--i >= 0)
-			*p++ = ' ';
-		q = size;
-		while (*q)
-			*p++ = *q++;
-		if (mp->state == MOD_UNINITIALIZED)
-			q = "  (uninitialized)";
-		else if (mp->state == MOD_RUNNING)
-			q = "";
-		else if (mp->state == MOD_DELETED)
-			q = "  (deleted)";
-		else
-			q = "  (bad state)";
-		while (*q)
-			*p++ = *q++;
-
-		*p++ = '\t';
-		if ((ref = mp->ref) != NULL) {
-			*p++ = '[';
-			for (; ref; ref = ref->next) {
-				q = ref->module->name;
-				while (*q)
-					*p++ = *q++;
-				if (ref->next)
-					*p++ = ' ';
-			}
-			*p++ = ']';
+	struct module *mod;
+	int err;
+
+	lock_kernel();
+	if (name_user == NULL)
+		mod = &kernel_module;
+	else {
+		long namelen;
+		char *name;
+
+		if ((namelen = get_mod_name(name_user, &name)) < 0) {
+			err = namelen;
+			goto out;
 		}
-		if (mp->state == MOD_RUNNING) {
-			sprintf(size,"\t%ld%s",
-				GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED),
-				((GET_USE_COUNT(mp) & MOD_AUTOCLEAN)?
-					" (autoclean)":""));
-			q = size;
-			while (*q)
-				*p++ = *q++;
+		err = -ENOENT;
+		if (namelen == 0)
+			mod = &kernel_module;
+		else if ((mod = find_module(name)) == NULL) {
+			put_mod_name(name);
+			goto out;
 		}
-		*p++ = '\n';
+		put_mod_name(name);
 	}
-	return p - buf;
-}
 
+	switch (which)
+	{
+	case 0:
+		err = 0;
+		break;
+	case QM_MODULES:
+		err = qm_modules(buf, bufsize, ret);
+		break;
+	case QM_DEPS:
+		err = qm_deps(mod, buf, bufsize, ret);
+		break;
+	case QM_REFS:
+		err = qm_refs(mod, buf, bufsize, ret);
+		break;
+	case QM_SYMBOLS:
+		err = qm_symbols(mod, buf, bufsize, ret);
+		break;
+	case QM_INFO:
+		err = qm_info(mod, buf, bufsize, ret);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+out:
+	unlock_kernel();
+	return err;
+}
 
 /*
- * Called by the /proc file system to return a current list of ksyms.
+ * Copy the kernel symbol table to user space.  If the argument is
+ * NULL, just return the size of the table.
+ *
+ * This call is obsolete.  New programs should use query_module+QM_SYMBOLS
+ * which does not arbitrarily limit the length of symbols.
  */
-int get_ksyms_list(char *buf, char **start, off_t offset, int length)
+
+asmlinkage int
+sys_get_kernel_syms(struct kernel_sym *table)
 {
-	struct module *mp;
-	struct internal_symbol *sym;
+	struct module *mod;
 	int i;
-	char *p = buf;
-	int len     = 0;	/* code from  net/ipv4/proc.c */
-	off_t pos   = 0;
-	off_t begin = 0;
 
-	for (mp = module_list; mp; mp = mp->next) {
-		if ((mp->state == MOD_RUNNING) &&
-		    (mp->symtab != NULL) &&
-		    (mp->symtab->n_symbols > 0)) {
-			for (i = mp->symtab->n_symbols,
-				sym = mp->symtab->symbol;
-				i > 0; --i, ++sym) {
-
-				p = buf + len;
-				if (mp->name[0]) {
-					len += sprintf(p, "%08lx %s\t[%s]\n",
-						       (long)sym->addr,
-						       sym->name, mp->name);
-				} else {
-					len += sprintf(p, "%08lx %s\n",
-						       (long)sym->addr,
-						       sym->name);
-				}
-				pos = begin + len;
-				if (pos < offset) {
-					len   = 0;
-					begin = pos;
-				}
-				pos = begin + len;
-				if (pos > offset+length)
-					goto leave_the_loop;
-			}
+	lock_kernel();
+	for (mod = module_list, i = 0; mod; mod = mod->next) {
+		/* include the count for the module name! */
+		i += mod->nsyms + 1;
+	}
+
+	if (table == NULL)
+		goto out;
+
+	for (mod = module_list, i = 0; mod; mod = mod->next) {
+		struct kernel_sym ksym;
+		struct module_symbol *msym;
+		unsigned int j;
+
+		if ((mod->flags & (MOD_RUNNING|MOD_DELETED)) != MOD_RUNNING)
+			continue;
+
+		/* magic: write module info as a pseudo symbol */
+		ksym.value = (unsigned long)mod;
+		ksym.name[0] = '#';
+		strncpy(ksym.name+1, mod->name, sizeof(ksym.name)-1);
+		ksym.name[sizeof(ksym.name)-1] = '\0';
+
+		if (copy_to_user(table, &ksym, sizeof(ksym)) != 0)
+			goto out;
+		++i, ++table;
+
+		if (mod->nsyms == 0)
+			continue;
+
+		for (j = 0, msym = mod->syms; j < mod->nsyms; ++j, ++msym) {
+			ksym.value = msym->value;
+			strncpy(ksym.name, msym->name, sizeof(ksym.name));
+			ksym.name[sizeof(ksym.name)-1] = '\0';
+
+			if (copy_to_user(table, &ksym, sizeof(ksym)) != 0)
+				goto out;
+			++i, ++table;
 		}
 	}
-    leave_the_loop:
-	*start = buf + (offset - begin);
-	len -= (offset - begin);
-	if (len > length)
-		len = length;
-	return len;
+out:
+	unlock_kernel();
+	return i;
 }
 
 /*
- * Rules:
- * - The new symbol table should be statically allocated, or else you _have_
- *   to set the "size" field of the struct to the number of bytes allocated.
- *
- * - The strings that name the symbols will not be copied, maybe the pointers
- *
- * - For a loadable module, the function should only be called in the
- *   context of init_module
- *
- * Those are the only restrictions! (apart from not being reentrant...)
- *
- * If you want to remove a symbol table for a loadable module,
- * the call looks like: "register_symtab(0)".
- *
- * The look of the code is mostly dictated by the format of
- * the frozen struct symbol_table, due to compatibility demands.
+ * Look for a module by name, ignoring modules marked for deletion.
  */
-#define INTSIZ sizeof(struct internal_symbol)
-#define REFSIZ sizeof(struct module_ref)
-#define SYMSIZ sizeof(struct symbol_table)
-#define MODSIZ sizeof(struct module)
-static struct symbol_table nulltab;
 
-int
-register_symtab_from(struct symbol_table *intab, long *from)
+static struct module *
+find_module(const char *name)
 {
-	struct module *mp;
-	struct module *link;
-	struct symbol_table *oldtab;
-	struct symbol_table *newtab;
-	struct module_ref *newref;
-	int size;
-
-	if (intab && (intab->n_symbols == 0)) {
-		struct internal_symbol *sym;
-		/* How many symbols, really? */
-
-		for (sym = intab->symbol; sym->name; ++sym)
-			intab->n_symbols +=1;
-	}
-
-	for (mp = module_list; mp != &kernel_module; mp = mp->next) {
-		/*
-		 * "from" points to "mod_use_count_" (== start of module)
-		 * or is == 0 if called from a non-module
-		 */
-		if ((unsigned long)(mp->addr) == (unsigned long)from)
+	struct module *mod;
+
+	for (mod = module_list; mod ; mod = mod->next) {
+		if (mod->flags & MOD_DELETED)
+			continue;
+		if (!strcmp(mod->name, name))
 			break;
 	}
 
-	if (mp == &kernel_module) {
-		/* Aha! Called from an "internal" module */
-		if (!intab)
-			return 0; /* or -ESILLY_PROGRAMMER :-) */
+	return mod;
+}
 
-		/* create a pseudo module! */
-		if (!(mp = (struct module*) kmalloc(MODSIZ, GFP_KERNEL))) {
-			/* panic time! */
-			printk(KERN_ERR "Out of memory for new symbol table!\n");
-			return -ENOMEM;
-		}
-		/* else  OK */
-		memset(mp, 0, MODSIZ);
-		mp->state = MOD_RUNNING; /* Since it is resident... */
-		mp->name = ""; /* This is still the "kernel" symbol table! */
-		mp->symtab = intab;
+/*
+ * Free the given module.
+ */
 
-		/* link it in _after_ the resident symbol table */
-		mp->next = kernel_module.next;
-		kernel_module.next = mp;
+static void
+free_module(struct module *mod)
+{
+	struct module_ref *dep;
+	unsigned i;
 
-		return 0;
-	}
+	/* Let the module clean up.  */
 
-	/* else ******** Called from a loadable module **********/
+	mod->flags |= MOD_DELETED;
+	if (mod->flags & MOD_RUNNING) {
+		mod->cleanup();
+		mod->flags &= ~MOD_RUNNING;
+	}
 
-	/*
-	 * This call should _only_ be done in the context of the
-	 * call to  init_module  i.e. when loading the module!!
-	 * Or else...
-	 */
+	/* Remove the module from the dependancy lists.  */
 
-	/* Any table there before? */
-	if ((oldtab = mp->symtab) == (struct symbol_table*)0) {
-		/* No, just insert it! */
-		mp->symtab = intab;
-		return 0;
+	for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) {
+		struct module_ref **pp;
+		for (pp = &dep->dep->refs; *pp != dep; pp = &(*pp)->next_ref)
+			continue;
+		*pp = dep->next_ref;
 	}
 
-	/* else  ****** we have to replace the module symbol table ******/
+	/* And from the main module list.  */
 
-	if (oldtab->n_refs == 0) { /* no problems! */
-		mp->symtab = intab;
-		/* if the old table was kmalloc-ed, drop it */
-		if (oldtab->size > 0)
-			kfree_s(oldtab, oldtab->size);
-
-		return 0;
+	if (mod == module_list) {
+		module_list = mod->next;
+	} else {
+		struct module *p;
+		for (p = module_list; p->next != mod; p = p->next)
+			continue;
+		p->next = mod->next;
 	}
 
-	/* else */
-	/***** The module references other modules... insmod said so! *****/
-	/* We have to allocate a new symbol table, or we lose them! */
-	if (intab == (struct symbol_table*)0)
-		intab = &nulltab; /* easier code with zeroes in place */
+	/* And free the memory.  */
 
-	/* the input symbol table space does not include the string table */
-	/* (it does for symbol tables that insmod creates) */
+	vfree(mod);
+}
 
-	if (!(newtab = (struct symbol_table*)kmalloc(
-		size = SYMSIZ + intab->n_symbols * INTSIZ +
-			oldtab->n_refs * REFSIZ,
-		GFP_KERNEL))) {
-		/* panic time! */
-		printk(KERN_ERR "Out of memory for new symbol table!\n");
-		return -ENOMEM;
-	}
+/*
+ * Called by the /proc file system to return a current list of modules.
+ */
 
-	/* copy up to, and including, the new symbols */
-	memcpy(newtab, intab, SYMSIZ + intab->n_symbols * INTSIZ);
+int get_module_list(char *p)
+{
+	size_t left = PAGE_SIZE;
+	struct module *mod;
+	char tmpstr[64];
+	struct module_ref *ref;
 
-	newtab->size = size;
-	newtab->n_refs = oldtab->n_refs;
+	for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+		long len;
+		const char *q;
+
+#define safe_copy_str(str, len)						\
+		do {							\
+			if (left < len)					\
+				goto fini;				\
+			memcpy(p, str, len); p += len, left -= len;	\
+		} while (0)
+#define safe_copy_cstr(str)	safe_copy_str(str, sizeof(str)-1)
+
+        	len = strlen(mod->name);
+		safe_copy_str(mod->name, len);
+
+		if ((len = 20 - len) > 0) {
+			if (left < len)
+				goto fini;
+			memset(p, ' ', len);
+			p += len;
+			left -= len;
+		}
 
-	/* copy references */
-	memcpy( ((char *)newtab) + SYMSIZ + intab->n_symbols * INTSIZ,
-		((char *)oldtab) + SYMSIZ + oldtab->n_symbols * INTSIZ,
-		oldtab->n_refs * REFSIZ);
+		len = sprintf(tmpstr, "%8lu", mod->size);
+		safe_copy_str(tmpstr, len);
 
-	/* relink references from the old table to the new one */
+		if (mod->flags & MOD_RUNNING) {
+			len = sprintf(tmpstr, "%4ld",
+				      (mod_member_present(mod, can_unload)
+				       && mod->can_unload
+				       ? -1 : mod->usecount));
+			safe_copy_str(tmpstr, len);
+		}
 
-	/* pointer to the first reference entry in newtab! Really! */
-	newref = (struct module_ref*) &(newtab->symbol[newtab->n_symbols]);
+		if (mod->flags & MOD_DELETED)
+			safe_copy_cstr(" (deleted)");
+		else if (mod->flags & MOD_RUNNING) {
+			if (mod->flags & MOD_AUTOCLEAN)
+				safe_copy_cstr(" (autoclean)");
+			if (!(mod->flags & MOD_USED_ONCE))
+				safe_copy_cstr(" (unused)");
+		} else
+			safe_copy_cstr(" (uninitialized)");
+
+		if ((ref = mod->refs) != NULL) {
+			safe_copy_cstr(" [");
+			while (1) {
+				q = ref->ref->name;
+				len = strlen(q);
+				safe_copy_str(q, len);
+
+				if ((ref = ref->next_ref) != NULL)
+					safe_copy_cstr(" ");
+				else
+					break;
+			}
+			safe_copy_cstr("]");
+		}
 
-	/* check for reference links from previous modules */
-	for (	link = module_list;
-		link && (link != &kernel_module);
-		link = link->next) {
+		safe_copy_cstr("\n");
 
-		if (link->ref && (link->ref->module == mp))
-			link->ref = newref++;
+#undef safe_copy_str
+#undef safe_copy_cstr
 	}
 
-	mp->symtab = newtab;
+fini:
+	return PAGE_SIZE - left;
+}
 
-	/* all references (if any) have been handled */
+/*
+ * Called by the /proc file system to return a current list of ksyms.
+ */
 
-	/* if the old table was kmalloc-ed, drop it */
-	if (oldtab->size > 0)
-		kfree_s(oldtab, oldtab->size);
+int
+get_ksyms_list(char *buf, char **start, off_t offset, int length)
+{
+	struct module *mod;
+	char *p = buf;
+	int len     = 0;	/* code from  net/ipv4/proc.c */
+	off_t pos   = 0;
+	off_t begin = 0;
+
+	for (mod = module_list; mod; mod = mod->next) {
+		unsigned i;
+		struct module_symbol *sym;
 
-	return 0;
+		if (!(mod->flags & MOD_RUNNING) || (mod->flags & MOD_DELETED))
+			continue;
+
+		for (i = mod->nsyms, sym = mod->syms; i > 0; --i, ++sym) {
+			p = buf + len;
+			if (*mod->name) {
+				len += sprintf(p, "%0*lx %s\t[%s]\n",
+					       (int)(2*sizeof(void*)),
+					       sym->value, sym->name,
+					       mod->name);
+			} else {
+				len += sprintf(p, "%0*lx %s\n",
+					       (int)(2*sizeof(void*)),
+					       sym->value, sym->name);
+			}
+			pos = begin + len;
+			if (pos < offset) {
+				len = 0;
+				begin = pos;
+			}
+			pos = begin + len;
+			if (pos > offset+length)
+				goto leave_the_loop;
+		}
+	}
+leave_the_loop:
+	*start = buf + (offset - begin);
+	len -= (offset - begin);
+	if (len > length)
+		len = length;
+	return len;
 }
 
 #else		/* CONFIG_MODULES */
 
 /* Dummy syscalls for people who don't want modules */
 
-asmlinkage unsigned long sys_create_module(void)
+asmlinkage unsigned long
+sys_create_module(const char *name_user, size_t size)
 {
 	return -ENOSYS;
 }
 
-asmlinkage int sys_init_module(void)
+asmlinkage int
+sys_init_module(const char *name_user, struct module *mod_user)
 {
 	return -ENOSYS;
 }
 
-asmlinkage int sys_delete_module(void)
+asmlinkage int
+sys_delete_module(const char *name_user)
 {
 	return -ENOSYS;
 }
 
-asmlinkage int sys_get_kernel_syms(void)
+asmlinkage int
+sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
+		 size_t *ret)
 {
+	/* Let the program know about the new interface.  Not that
+	   it'll do them much good.  */
+	if (which == 0)
+		return 0;
+
 	return -ENOSYS;
 }
 
-int register_symtab_from(struct symbol_table *intab, long *from)
+asmlinkage int
+sys_get_kernel_syms(struct kernel_sym *table)
 {
-	return 0;
+	return -ENOSYS;
 }
 
 #endif	/* CONFIG_MODULES */
-
diff --git a/kernel/panic.c b/kernel/panic.c
index d42541e9f..deaa2f339 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -10,15 +10,16 @@
  */
 #include <stdarg.h>
 
-#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
 
 #include <asm/sgialib.h>
 
 asmlinkage void sys_sync(void);	/* it's really int */
-extern void do_unblank_screen(void);
+extern void unblank_console(void);
 extern int C_A_D;
 
 int panic_timeout = 0;
@@ -43,9 +44,12 @@ NORET_TYPE void panic(const char * fmt, ...)
 	else
 		sys_sync();
 
-	do_unblank_screen();
+#ifdef __SMP__
+	smp_message_pass(MSG_ALL_BUT_SELF, MSG_STOP_CPU, 0, 0);
+#endif
+
+	unblank_console();
 
-#ifdef CONFIG_SGI
 	if (panic_timeout > 0)
 	{
 		int i;
@@ -54,17 +58,20 @@ NORET_TYPE void panic(const char * fmt, ...)
 	 	 * Delay timeout seconds before rebooting the machine. 
 		 * We can't use the "normal" timers since we just panicked..
 	 	 */
-		prom_printf(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
+		printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
 		for(i = 0; i < (panic_timeout*1000); i++)
 			udelay(1000);
-		hard_reset_now();
+		/*
+		 *	Should we run the reboot notifier. For the moment Im
+		 *	choosing not too. It might crash, be corrupt or do
+		 *	more harm than good for other reasons.
+		 */
+		machine_restart(NULL);
 	}
-#if 0
-	printk("Hit a key\n");
-	prom_getchar();
-	romvec->imode();
-#endif
+#ifdef __sparc__
+	printk("Press L1-A to return to the boot prom\n");
 #endif
+	sti();
 	for(;;);
 }
 
diff --git a/kernel/printk.c b/kernel/printk.c
index ed39d4fab..0d5d619b0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -9,6 +9,7 @@
  * to the console.  Added hook for sending the console messages
  * elsewhere, in preparation for a serial line console (someday).
  * Ted Ts'o, 2/11/93.
+ * Modified for sysctl support, 1/8/97, Chris Horn.
  */
 
 #include <stdarg.h>
@@ -21,6 +22,9 @@
 #include <linux/mm.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/console.h>
 
 #include <asm/uaccess.h>
 
@@ -28,8 +32,6 @@
 
 static char buf[1024];
 
-extern void console_print(const char *);
-
 /* printk's without a loglevel use this.. */
 #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
 
@@ -39,9 +41,14 @@ extern void console_print(const char *);
 
 unsigned long log_size = 0;
 struct wait_queue * log_wait = NULL;
+
+/* Keep together for sysctl support */
 int console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
+int default_message_loglevel = DEFAULT_MESSAGE_LOGLEVEL;
+int minimum_console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
+int default_console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
 
-static void (*console_print_proc)(const char *) = 0;
+struct console *console_drivers = NULL;
 static char log_buf[LOG_BUF_LEN];
 static unsigned long log_start = 0;
 static unsigned long logged_chars = 0;
@@ -64,87 +71,103 @@ asmlinkage int sys_syslog(int type, char * buf, int len)
 	unsigned long i, j, count;
 	int do_clear = 0;
 	char c;
-	int error;
+	int error = -EPERM;
 
+	lock_kernel();
 	if ((type != 3) && !suser())
-		return -EPERM;
+		goto out;
+	error = 0;
 	switch (type) {
-		case 0:		/* Close log */
-			return 0;
-		case 1:		/* Open log */
-			return 0;
-		case 2:		/* Read from log */
-			if (!buf || len < 0)
-				return -EINVAL;
-			if (!len)
-				return 0;
-			error = verify_area(VERIFY_WRITE,buf,len);
-			if (error)
-				return error;
-			cli();
-			while (!log_size) {
-				if (current->signal & ~current->blocked) {
-					sti();
-					return -ERESTARTSYS;
-				}
-				interruptible_sleep_on(&log_wait);
-			}
-			i = 0;
-			while (log_size && i < len) {
-				c = *((char *) log_buf+log_start);
-				log_start++;
-				log_size--;
-				log_start &= LOG_BUF_LEN-1;
+	case 0:		/* Close log */
+		break;
+	case 1:		/* Open log */
+		break;
+	case 2:		/* Read from log */
+		error = -EINVAL;
+		if (!buf || len < 0)
+			goto out;
+		error = 0;
+		if (!len)
+			goto out;
+		error = verify_area(VERIFY_WRITE,buf,len);
+		if (error)
+			goto out;
+		cli();
+		error = -ERESTARTSYS;
+		while (!log_size) {
+			if (current->signal & ~current->blocked) {
 				sti();
-				put_user(c,buf);
-				buf++;
-				i++;
-				cli();
+				goto out;
 			}
+			interruptible_sleep_on(&log_wait);
+		}
+		i = 0;
+		while (log_size && i < len) {
+			c = *((char *) log_buf+log_start);
+			log_start++;
+			log_size--;
+			log_start &= LOG_BUF_LEN-1;
 			sti();
-			return i;
-		case 4:		/* Read/clear last kernel messages */
-			do_clear = 1; 
-			/* FALL THRU */
-		case 3:		/* Read last kernel messages */
-			if (!buf || len < 0)
-				return -EINVAL;
-			if (!len)
-				return 0;
-			error = verify_area(VERIFY_WRITE,buf,len);
-			if (error)
-				return error;
-			count = len;
-			if (count > LOG_BUF_LEN)
-				count = LOG_BUF_LEN;
-			if (count > logged_chars)
-				count = logged_chars;
-			j = log_start + log_size - count;
-			for (i = 0; i < count; i++) {
-				c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1)));
-				put_user(c, buf++);
-			}
-			if (do_clear)
-				logged_chars = 0;
-			return i;
-		case 5:		/* Clear ring buffer */
+			put_user(c,buf);
+			buf++;
+			i++;
+			cli();
+		}
+		sti();
+		error = i;
+		break;
+	case 4:		/* Read/clear last kernel messages */
+		do_clear = 1; 
+		/* FALL THRU */
+	case 3:		/* Read last kernel messages */
+		error = -EINVAL;
+		if (!buf || len < 0)
+			goto out;
+		error = 0;
+		if (!len)
+			goto out;
+		error = verify_area(VERIFY_WRITE,buf,len);
+		if (error)
+			goto out;
+		count = len;
+		if (count > LOG_BUF_LEN)
+			count = LOG_BUF_LEN;
+		if (count > logged_chars)
+			count = logged_chars;
+		j = log_start + log_size - count;
+		for (i = 0; i < count; i++) {
+			c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1)));
+			put_user(c, buf++);
+		}
+		if (do_clear)
 			logged_chars = 0;
-			return 0;
-		case 6:		/* Disable logging to console */
-			console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
-			return 0;
-		case 7:		/* Enable logging to console */
-			console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
-			return 0;
-		case 8:
-			if (len < 1 || len > 8)
-				return -EINVAL;
-			if (len < MINIMUM_CONSOLE_LOGLEVEL)
-				len = MINIMUM_CONSOLE_LOGLEVEL;
-			console_loglevel = len;
-			return 0;
+		error = i;
+		break;
+	case 5:		/* Clear ring buffer */
+		logged_chars = 0;
+		break;
+	case 6:		/* Disable logging to console */
+		console_loglevel = minimum_console_loglevel;
+		break;
+	case 7:		/* Enable logging to console */
+		console_loglevel = default_console_loglevel;
+		break;
+	case 8:
+		error = -EINVAL;
+		if (len < 1 || len > 8)
+			goto out;
+		if (len < minimum_console_loglevel)
+			len = minimum_console_loglevel;
+		console_loglevel = len;
+		error = 0;
+		break;
+	default:
+		error = -EINVAL;
+		break;
 	}
-	return -EINVAL;
+out:
+	unlock_kernel();
+	return error;
 }
 
 
@@ -153,11 +176,12 @@ asmlinkage int printk(const char *fmt, ...)
 	va_list args;
 	int i;
 	char *msg, *p, *buf_end;
-	static char msg_level = -1;
+	int line_feed;
+	static signed char msg_level = -1;
 	long flags;
 
-	save_flags(flags);
-	cli();
+	__save_flags(flags);
+	__cli();
 	va_start(args, fmt);
 	i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */
 	buf_end = buf + 3 + i;
@@ -173,12 +197,13 @@ asmlinkage int printk(const char *fmt, ...)
 			) {
 				p -= 3;
 				p[0] = '<';
-				p[1] = DEFAULT_MESSAGE_LOGLEVEL + '0';
+				p[1] = default_message_loglevel + '0';
 				p[2] = '>';
 			} else
 				msg += 3;
 			msg_level = p[1] - '0';
 		}
+		line_feed = 0;
 		for (; p < buf_end; p++) {
 			log_buf[(log_start+log_size) & (LOG_BUF_LEN-1)] = *p;
 			if (log_size < LOG_BUF_LEN)
@@ -188,38 +213,64 @@ asmlinkage int printk(const char *fmt, ...)
 				log_start &= LOG_BUF_LEN-1;
 			}
 			logged_chars++;
-			if (*p == '\n')
+			if (*p == '\n') {
+				line_feed = 1;
 				break;
+			}
 		}
-		if (msg_level < console_loglevel && console_print_proc) {
-			char tmp = p[1];
-			p[1] = '\0';
-			(*console_print_proc)(msg);
-			p[1] = tmp;
+		if (msg_level < console_loglevel && console_drivers) {
+			struct console *c = console_drivers;
+			while(c) {
+				if (c->write)
+					c->write(msg, p - msg + line_feed);
+				c = c->next;
+			}
 		}
-		if (*p == '\n')
+		if (line_feed)
 			msg_level = -1;
 	}
-	restore_flags(flags);
+	__restore_flags(flags);
 	wake_up_interruptible(&log_wait);
 	return i;
 }
 
+void console_print(const char *s)
+{
+	struct console *c = console_drivers;
+	int len = strlen(s);
+	while(c) {
+		if (c->write)
+			c->write(s, len);
+		c = c->next;
+	}
+}
+
+void unblank_console(void)
+{
+	struct console *c = console_drivers;
+	while(c) {
+		if (c->unblank)
+			c->unblank();
+		c = c->next;
+	}
+}
+
 /*
  * The console driver calls this routine during kernel initialization
  * to register the console printing procedure with printk() and to
  * print any messages that were printed by the kernel before the
  * console driver was initialized.
  */
-void register_console(void (*proc)(const char *))
+void register_console(struct console * console)
 {
-	int	i,j;
+	int	i,j,len;
 	int	p = log_start;
 	char	buf[16];
-	char	msg_level = -1;
+	signed char msg_level = -1;
 	char	*q;
 
-	console_print_proc = proc;
+	console->next = console_drivers;
+	console_drivers = console;
 
 	for (i=0,j=0; i < log_size; i++) {
 		buf[j++] = log_buf[p];
@@ -228,12 +279,14 @@ void register_console(void (*proc)(const char *))
 			continue;
 		buf[j] = 0;
 		q = buf;
+		len = j;
 		if (msg_level < 0) {
 			msg_level = buf[1] - '0';
 			q = buf + 3;
+			len -= 3;
 		}
 		if (msg_level < console_loglevel)
-			(*proc)(q);
+			console->write(q, len);
 		if (buf[j-1] == '\n')
 			msg_level = -1;
 		j = 0;
diff --git a/kernel/resource.c b/kernel/resource.c
index 48184bfcf..27abcf4dc 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -13,7 +13,7 @@
 #include <linux/types.h>
 #include <linux/ioport.h>
 
-#define IOTABLE_SIZE 64
+#define IOTABLE_SIZE 128
 
 typedef struct resource_entry_t {
 	u_long from, num;
@@ -69,7 +69,7 @@ static resource_entry_t *find_gap(resource_entry_t *root,
 /*
  * Call this from the device driver to register the ioport region.
  */
-void request_region(unsigned int from, unsigned int num, const char *name)
+void request_region(unsigned long from, unsigned long num, const char *name)
 {
 	resource_entry_t *p;
 	int i;
@@ -95,7 +95,7 @@ void request_region(unsigned int from, unsigned int num, const char *name)
 /* 
  * Call this when the device driver is unloaded
  */
-void release_region(unsigned int from, unsigned int num)
+void release_region(unsigned long from, unsigned long num)
 {
 	resource_entry_t *p, *q;
 
@@ -114,11 +114,72 @@ void release_region(unsigned int from, unsigned int num)
 /*
  * Call this to check the ioport region before probing
  */
-int check_region(unsigned int from, unsigned int num)
+int check_region(unsigned long from, unsigned long num)
 {
 	return (find_gap(&iolist, from, num) == NULL) ? -EBUSY : 0;
 }
 
+#ifdef __sparc__   /* Why to carry unused code on other architectures? */
+/*
+ * This is for architectures with MMU-managed ports (sparc).
+ */
+unsigned long occupy_region(unsigned long base, unsigned long end,
+			    unsigned long num, unsigned int align, const char *name)
+{
+	unsigned long from = 0, till;
+	unsigned long flags;
+	int i;
+	resource_entry_t *p;		/* Scanning ptr */
+	resource_entry_t *p1;		/* === p->next */
+	resource_entry_t *s;		/* Found slot */
+
+	if (base > end-1)
+		return 0;
+	if (num > end - base)
+		return 0;
+
+	for (i = 0; i < IOTABLE_SIZE; i++)
+		if (iotable[i].num == 0)
+			break;
+	if (i == IOTABLE_SIZE) {
+		/* Driver prints a warning typicaly. */
+		return 0;
+	}
+
+	save_flags(flags);
+	cli();
+	/* printk("occupy: search in %08lx[%08lx] ", base, end - base); */
+	s = NULL;
+	for (p = &iolist; p != NULL; p = p1) {
+		p1 = p->next;
+		/* Find window in list */
+		from = (p->from+p->num + align-1) & ~((unsigned long)align-1);
+		till = (p1 == NULL)? (unsigned long) (0 - (unsigned long)align): p1->from;
+		/* printk(" %08lx:%08lx", from, till); */
+		/* Clip window with base and end */
+		if (from < base) from = base;
+		if (till > end) till = end;
+		/* See if result is large enougth */
+		if (from < till && from + num < till) {
+			s = p;
+			break;
+		}
+	}
+	/* printk("\r\n"); */
+	restore_flags(flags);
+
+	if (s == NULL)
+		return 0;
+
+	iotable[i].name = name;
+	iotable[i].from = from;
+	iotable[i].num = num;
+	iotable[i].next = s->next;
+	s->next = &iotable[i];
+	return from;
+}
+#endif
+
 /* Called from init/main.c to reserve IO ports. */
 void reserve_setup(char *str, int *ints)
 {
diff --git a/kernel/sched.c b/kernel/sched.c
index 98502b3fc..bc256d029 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4,6 +4,9 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
  *  1996-04-21	Modified by Ulrich Windl to make NTP work
+ *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
+ *              make semaphores SMP safe
+ *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
  */
 
 /*
@@ -28,12 +31,14 @@
 #include <linux/resource.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
+#include <linux/smp_lock.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
+#include <asm/spinlock.h>
 
 #include <linux/timex.h>
 
@@ -44,7 +49,7 @@
 int securelevel = 0;			/* system security level */
 
 long tick = (1000000 + HZ/2) / HZ;	/* timer interrupt period */
-volatile struct timeval xtime;		/* The current time */
+volatile struct timeval xtime __attribute__ ((aligned (8)));	/* The current time */
 int tickadj = 500/HZ;			/* microsecs */
 
 DECLARE_TASK_QUEUE(tq_timer);
@@ -100,7 +105,12 @@ struct task_struct init_task = INIT_TASK;
 
 unsigned long volatile jiffies=0;
 
-struct task_struct *current_set[NR_CPUS];
+/*
+ *	Init task must be ok at boot for the ix86 as we will check its signals
+ *	via the SMP irq return path.
+ */
+ 
+struct task_struct *current_set[NR_CPUS] = {&init_task, };
 struct task_struct *last_task_used_math = NULL;
 
 struct task_struct * task[NR_TASKS] = {&init_task, };
@@ -109,9 +119,6 @@ struct kernel_stat kstat = { 0 };
 
 static inline void add_to_runqueue(struct task_struct * p)
 {
-#ifdef __SMP__
-	int cpu=smp_processor_id();
-#endif	
 #if 1	/* sanity tests */
 	if (p->next_run || p->prev_run) {
 		printk("task already on run-queue\n");
@@ -124,36 +131,6 @@ static inline void add_to_runqueue(struct task_struct * p)
 	(p->prev_run = init_task.prev_run)->next_run = p;
 	p->next_run = &init_task;
 	init_task.prev_run = p;
-#ifdef __SMP__
-	/* this is safe only if called with cli()*/
-	while(set_bit(31,&smp_process_available));
-#if 0	
-	{
-		while(test_bit(31,&smp_process_available))
-		{
-			if(clear_bit(cpu,&smp_invalidate_needed))
-			{
-				local_flush_tlb();
-				set_bit(cpu,&cpu_callin_map[0]);
-			}
-		}
-	}
-#endif	
-	smp_process_available++;
-	clear_bit(31,&smp_process_available);
-	if ((0!=p->pid) && smp_threads_ready)
-	{
-		int i;
-		for (i=0;i<smp_num_cpus;i++)
-		{
-			if (0==current_set[cpu_logical_map[i]]->pid) 
-			{
-				smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);
-				break;
-			}
-		}
-	}
-#endif
 }
 
 static inline void del_from_runqueue(struct task_struct * p)
@@ -167,7 +144,7 @@ static inline void del_from_runqueue(struct task_struct * p)
 		return;
 	}
 #endif
-	if (p == &init_task) {
+	if (!p->pid) {
 		static int nr = 0;
 		if (nr < 5) {
 			nr++;
@@ -199,6 +176,21 @@ static inline void move_last_runqueue(struct task_struct * p)
 }
 
 /*
+ * The tasklist_lock protects the linked list of processes.
+ *
+ * The scheduler lock is protecting against multiple entry
+ * into the scheduling code, and doesn't need to worry
+ * about interrupts (because interrupts cannot call the
+ * scheduler).
+ *
+ * The run-queue lock locks the parts that actually access
+ * and change the run-queues, and have to be interrupt-safe.
+ */
+rwlock_t tasklist_lock = RW_LOCK_UNLOCKED;
+spinlock_t scheduler_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED;
+
+/*
  * Wake up a process. Put it on the run-queue if it's not
  * already there.  The "current" process is always on the
  * run-queue (except when the actual re-schedule is in
@@ -210,12 +202,11 @@ inline void wake_up_process(struct task_struct * p)
 {
 	unsigned long flags;
 
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&runqueue_lock, flags);
 	p->state = TASK_RUNNING;
 	if (!p->next_run)
 		add_to_runqueue(p);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&runqueue_lock, flags);
 }
 
 static void process_timeout(unsigned long __data)
@@ -243,17 +234,6 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
 {
 	int weight;
 
-#ifdef __SMP__	
-	/* We are not permitted to run a task someone else is running */
-	if (p->processor != NO_PROC_ID)
-		return -1000;
-#ifdef PAST_2_0		
-	/* This process is locked to a processor group */
-	if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))
-		return -1000;
-#endif		
-#endif
-
 	/*
 	 * Realtime process, select the first one on the
 	 * runqueue (taking priorities within processes
@@ -287,6 +267,18 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
 	return weight;
 }
 
+#ifdef __SMP__
+
+#define idle_task (task[cpu_number_map[this_cpu]])
+#define can_schedule(p)	((p)->processor == NO_PROC_ID)
+
+#else
+
+#define idle_task (&init_task)
+#define can_schedule(p) (1)
+
+#endif
+
 /*
  *  'schedule()' is the scheduler function. It's a very simple and nice
  * scheduler: it's not perfect, but certainly works for most things.
@@ -299,33 +291,39 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
  */
 asmlinkage void schedule(void)
 {
-	int c;
-	struct task_struct * p;
+	int lock_depth;
 	struct task_struct * prev, * next;
-	unsigned long timeout = 0;
-	int this_cpu=smp_processor_id();
+	unsigned long timeout;
+	int this_cpu;
 
-/* check alarm, wake up any interruptible tasks that have got a signal */
-
-	if (intr_count)
-		goto scheduling_in_interrupt;
-
-	if (bh_active & bh_mask) {
-		intr_count = 1;
-		do_bottom_half();
-		intr_count = 0;
+	need_resched = 0;
+	this_cpu = smp_processor_id();
+	if (local_irq_count[this_cpu]) {
+		printk("Scheduling in interrupt\n");
+		*(char *)0 = 0;
 	}
+	prev = current;
+	release_kernel_lock(prev, this_cpu, lock_depth);
+	if (bh_active & bh_mask)
+		do_bottom_half();
 
-	run_task_queue(&tq_scheduler);
+	spin_lock(&scheduler_lock);
+	spin_lock_irq(&runqueue_lock);
 
-	need_resched = 0;
-	prev = current;
-	cli();
 	/* move an exhausted RR process to be last.. */
 	if (!prev->counter && prev->policy == SCHED_RR) {
-		prev->counter = prev->priority;
-		move_last_runqueue(prev);
+		if (prev->pid) {
+			prev->counter = prev->priority;
+			move_last_runqueue(prev);
+		} else {
+			static int count = 5;
+			if (count) {
+				count--;
+				printk("Moving pid 0 last\n");
+			}
+		}
 	}
+	timeout = 0;
 	switch (prev->state) {
 		case TASK_INTERRUPTIBLE:
 			if (prev->signal & ~prev->blocked)
@@ -342,54 +340,55 @@ asmlinkage void schedule(void)
 			del_from_runqueue(prev);
 		case TASK_RUNNING:
 	}
-	p = init_task.next_run;
-	sti();
-	
+	{
+		struct task_struct * p = init_task.next_run;
+		/*
+		 * This is subtle.
+		 * Note how we can enable interrupts here, even
+		 * though interrupts can add processes to the run-
+		 * queue. This is because any new processes will
+		 * be added to the front of the queue, so "p" above
+		 * is a safe starting point.
+		 * run-queue deletion and re-ordering is protected by
+		 * the scheduler lock
+		 */
+		spin_unlock_irq(&runqueue_lock);
 #ifdef __SMP__
-	/*
-	 *	This is safe as we do not permit re-entry of schedule()
-	 */
-	prev->processor = NO_PROC_ID;
-#define idle_task (task[cpu_number_map[this_cpu]])
-#else
-#define idle_task (&init_task)
-#endif	
-
+		prev->processor = NO_PROC_ID;
+#endif
+	
 /*
  * Note! there may appear new tasks on the run-queue during this, as
  * interrupts are enabled. However, they will be put on front of the
  * list, so our list starting at "p" is essentially fixed.
  */
 /* this is the scheduler proper: */
-	c = -1000;
-	next = idle_task;
-	while (p != &init_task) {
-		int weight = goodness(p, prev, this_cpu);
-		if (weight > c)
-			c = weight, next = p;
-		p = p->next_run;
-	}
+		{
+			int c = -1000;
+			next = idle_task;
+			while (p != &init_task) {
+				if (can_schedule(p)) {
+					int weight = goodness(p, prev, this_cpu);
+					if (weight > c)
+						c = weight, next = p;
+				}
+				p = p->next_run;
+			}
 
-	/* if all runnable processes have "counter == 0", re-calculate counters */
-	if (!c) {
-		for_each_task(p)
-			p->counter = (p->counter >> 1) + p->priority;
+			/* Do we need to re-calculate counters? */
+			if (!c) {
+				struct task_struct *p;
+				read_lock(&tasklist_lock);
+				for_each_task(p)
+					p->counter = (p->counter >> 1) + p->priority;
+				read_unlock(&tasklist_lock);
+			}
+		}
 	}
-#ifdef __SMP__
-	/*
-	 *	Allocate process to CPU
-	 */
-	 
-	 next->processor = this_cpu;
-	 next->last_processor = this_cpu;
-#endif	 
-#ifdef __SMP_PROF__ 
-	/* mark processor running an idle thread */
-	if (0==next->pid)
-		set_bit(this_cpu,&smp_idle_map);
-	else
-		clear_bit(this_cpu,&smp_idle_map);
-#endif
+
+	next->processor = this_cpu;
+	next->last_processor = this_cpu;
+
 	if (prev != next) {
 		struct timer_list timer;
 
@@ -404,14 +403,13 @@ asmlinkage void schedule(void)
 
 		get_mmu_context(next);
 		switch_to(prev,next);
+
 		if (timeout)
 			del_timer(&timer);
 	}
-	return;
+	spin_unlock(&scheduler_lock);
 
-scheduling_in_interrupt:
-	printk("Aiee: scheduling in interrupt %p\n",
-		return_address());
+	reacquire_kernel_lock(prev, smp_processor_id(), lock_depth);
 }
 
 #ifndef __alpha__
@@ -429,93 +427,92 @@ asmlinkage int sys_pause(void)
 
 #endif
 
+spinlock_t waitqueue_lock;
+
 /*
  * wake_up doesn't wake up stopped processes - they have to be awakened
  * with signals or similar.
- *
- * Note that this doesn't need cli-sti pairs: interrupts may not change
- * the wait-queue structures directly, but only call wake_up() to wake
- * a process. The process itself must remove the queue once it has woken.
  */
 void wake_up(struct wait_queue **q)
 {
+	unsigned long flags;
 	struct wait_queue *next;
 	struct wait_queue *head;
 
-	if (!q || !(next = *q))
-		return;
-	head = WAIT_QUEUE_HEAD(q);
-	while (next != head) {
-		struct task_struct *p = next->task;
-		next = next->next;
-		if (p != NULL) {
-			if ((p->state == TASK_UNINTERRUPTIBLE) ||
-			    (p->state == TASK_INTERRUPTIBLE))
-				wake_up_process(p);
+	spin_lock_irqsave(&waitqueue_lock, flags);
+	if (q && (next = *q)) {
+		head = WAIT_QUEUE_HEAD(q);
+		while (next != head) {
+			struct task_struct *p = next->task;
+			next = next->next;
+			if (p != NULL) {
+				if ((p->state == TASK_UNINTERRUPTIBLE) ||
+				    (p->state == TASK_INTERRUPTIBLE))
+					wake_up_process(p);
+			}
+			if (next)
+				continue;
+			printk("wait_queue is bad (eip = %p)\n",
+				__builtin_return_address(0));
+			printk("        q = %p\n",q);
+			printk("       *q = %p\n",*q);
+			break;
 		}
-		if (!next)
-			goto bad;
 	}
-	return;
-bad:
-	printk("wait_queue is bad (eip = %p)\n",
-		__builtin_return_address(0));
-	printk("        q = %p\n",q);
-	printk("       *q = %p\n",*q);
+	spin_unlock_irqrestore(&waitqueue_lock, flags);
 }
 
 void wake_up_interruptible(struct wait_queue **q)
 {
+	unsigned long flags;
 	struct wait_queue *next;
 	struct wait_queue *head;
 
-	if (!q || !(next = *q))
-		return;
-	head = WAIT_QUEUE_HEAD(q);
-	while (next != head) {
-		struct task_struct *p = next->task;
-		next = next->next;
-		if (p != NULL) {
-			if (p->state == TASK_INTERRUPTIBLE)
-				wake_up_process(p);
+	spin_lock_irqsave(&waitqueue_lock, flags);
+	if (q && (next = *q)) {
+		head = WAIT_QUEUE_HEAD(q);
+		while (next != head) {
+			struct task_struct *p = next->task;
+			next = next->next;
+			if (p != NULL) {
+				if (p->state == TASK_INTERRUPTIBLE)
+					wake_up_process(p);
+			}
+			if (next)
+				continue;
+			printk("wait_queue is bad (eip = %p)\n",
+				__builtin_return_address(0));
+			printk("        q = %p\n",q);
+			printk("       *q = %p\n",*q);
+			break;
 		}
-		if (!next)
-			goto bad;
 	}
-	return;
-bad:
-	printk("wait_queue is bad (eip = %p)\n",
-		return_address());
-	printk("        q = %p\n",q);
-	printk("       *q = %p\n",*q);
+	spin_unlock_irqrestore(&waitqueue_lock, flags);
 }
 
 /*
  * Semaphores are implemented using a two-way counter:
  * The "count" variable is decremented for each process
- * that tries to sleep, while the "waiting" variable is
- * incremented _while_ the process is sleeping on that
- * semaphore. 
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
  *
  * Notably, the inline "up()" and "down()" functions can
  * efficiently test if they need to do any extra work (up
  * needs to do something only if count was negative before
  * the increment operation.
- */
-static inline void normalize_semaphore(struct semaphore *sem)
-{
-	atomic_add(xchg(&sem->waiting,0), &sem->count);
-}
-
-/*
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
  * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody. In
- * most cases "waiting" will be positive, and the normalization
- * will allow things to continue. However, if somebody has
- * /just/ done a down(), it may be that count was negative
- * without waiting being positive (or in the generic case
- * "count is more negative than waiting is positive"), and
- * the waiter needs to check this itself (see __down).
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit.  ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore.  The others will go back
+ * to sleep.
  *
  * Note that these functions are only called when there is
  * contention on the lock, and as such all this is the
@@ -525,55 +522,83 @@ static inline void normalize_semaphore(struct semaphore *sem)
  */
 void __up(struct semaphore *sem)
 {
-	normalize_semaphore(sem);
+	wake_one_more(sem);
 	wake_up(&sem->wait);
 }
 
-void __down(struct semaphore * sem)
+/*
+ * Perform the "down" function.  Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible.  This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return.  If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+static inline int __do_down(struct semaphore * sem, int task_state)
 {
 	struct task_struct *tsk = current;
 	struct wait_queue wait = { tsk, NULL };
+	int		  ret = 0;
 
-	/*
-	 * The order here is important. We add ourselves to the
-	 * wait queues and mark ourselves sleeping _first_. That
-	 * way, if a "up()" comes in here, we'll either get
-	 * woken up (up happens after the wait queues are set up)
-	 * OR we'll have "waiting > 0".
-	 */
-	tsk->state = TASK_UNINTERRUPTIBLE;
+	tsk->state = task_state;
 	add_wait_queue(&sem->wait, &wait);
-	atomic_inc(&sem->waiting);
 
 	/*
-	 * Ok, we're set up. The only race here is really that
-	 * an "up()" might have incremented count before we got
-	 * here, so we check "count+waiting". If that is larger
-	 * than zero, we shouldn't sleep, but re-try the lock.
+	 * Ok, we're set up.  sem->count is known to be less than zero
+	 * so we must wait.
+	 *
+	 * We can let go the lock for purposes of waiting.
+	 * We re-acquire it after awaking so as to protect
+	 * all semaphore operations.
+	 *
+	 * If "up()" is called before we call waking_non_zero() then
+	 * we will catch it right away.  If it is called later then
+	 * we will have to go through a wakeup cycle to catch it.
+	 *
+	 * Multiple waiters contend for the semaphore lock to see
+	 * who gets to gate through and who has to wait some more.
 	 */
-	if (sem->count+sem->waiting <= 0) {
-		/*
-		 * If "count+waiting" <= 0, we have to wait
-		 * for a up(), which will normalize the count.
-		 * Remember, at this point we have decremented
-		 * count, and incremented up, so if count is
-		 * zero or positive we need to return to re-try
-		 * the lock.  It _may_ be that both count and
-		 * waiting is zero and that it is still locked,
-		 * but we still want to re-try the lock in that
-		 * case to make count go negative again so that
-		 * the optimized "up()" wake_up sequence works.
-		 */
-		do {
-			schedule();
-			tsk->state = TASK_UNINTERRUPTIBLE;
-		} while (sem->count < 0);
+	for (;;) {
+		if (waking_non_zero(sem))	/* are we waking up?  */
+			break;			/* yes, exit loop */
+
+		if (   task_state == TASK_INTERRUPTIBLE
+		    && (tsk->signal & ~tsk->blocked)	/* signalled */
+		   ) {
+			ret = -EINTR;			/* interrupted */
+			atomic_inc(&sem->count);	/* give up on down operation */
+			break;
+		}
+
+		schedule();
+		tsk->state = task_state;
 	}
+
 	tsk->state = TASK_RUNNING;
 	remove_wait_queue(&sem->wait, &wait);
-	normalize_semaphore(sem);
+	return ret;
+}
+
+void __down(struct semaphore * sem)
+{
+	__do_down(sem,TASK_UNINTERRUPTIBLE);
 }
 
+int __down_interruptible(struct semaphore * sem)
+{
+	return __do_down(sem,TASK_INTERRUPTIBLE);
+}
+
+
 static inline void __sleep_on(struct wait_queue **p, int state)
 {
 	unsigned long flags;
@@ -584,14 +609,14 @@ static inline void __sleep_on(struct wait_queue **p, int state)
 	if (current == task[0])
 		panic("task[0] trying to sleep");
 	current->state = state;
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&waitqueue_lock, flags);
 	__add_wait_queue(p, &wait);
+	spin_unlock(&waitqueue_lock);
 	sti();
 	schedule();
-	cli();
+	spin_lock_irq(&waitqueue_lock);
 	__remove_wait_queue(p, &wait);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&waitqueue_lock, flags);
 }
 
 void interruptible_sleep_on(struct wait_queue **p)
@@ -604,74 +629,178 @@ void sleep_on(struct wait_queue **p)
 	__sleep_on(p,TASK_UNINTERRUPTIBLE);
 }
 
-/*
- * The head for the timer-list has a "expires" field of MAX_UINT,
- * and the sorting routine counts on this..
- */
-static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
+
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
 #define SLOW_BUT_DEBUGGING_TIMERS 0
 
-void add_timer(struct timer_list * timer)
+struct timer_vec {
+        int index;
+        struct timer_list *vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+        int index;
+        struct timer_list *vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5 = { 0 };
+static struct timer_vec tv4 = { 0 };
+static struct timer_vec tv3 = { 0 };
+static struct timer_vec tv2 = { 0 };
+static struct timer_vec_root tv1 = { 0 };
+
+static struct timer_vec * const tvecs[] = {
+	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+static unsigned long timer_jiffies = 0;
+
+static inline void insert_timer(struct timer_list *timer,
+				struct timer_list **vec, int idx)
+{
+	if ((timer->next = vec[idx]))
+		vec[idx]->prev = timer;
+	vec[idx] = timer;
+	timer->prev = (struct timer_list *)&vec[idx];
+}
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+	/*
+	 * must be cli-ed when calling this
+	 */
+	unsigned long expires = timer->expires;
+	unsigned long idx = expires - timer_jiffies;
+
+	if (idx < TVR_SIZE) {
+		int i = expires & TVR_MASK;
+		insert_timer(timer, tv1.vec, i);
+	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+		int i = (expires >> TVR_BITS) & TVN_MASK;
+		insert_timer(timer, tv2.vec, i);
+	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+		insert_timer(timer, tv3.vec, i);
+	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+		insert_timer(timer, tv4.vec, i);
+	} else if (expires < timer_jiffies) {
+		/* can happen if you add a timer with expires == jiffies,
+		 * or you set a timer to go off in the past
+		 */
+		insert_timer(timer, tv1.vec, tv1.index);
+	} else if (idx < 0xffffffffUL) {
+		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+		insert_timer(timer, tv5.vec, i);
+	} else {
+		/* Can only get here on architectures with 64-bit jiffies */
+		timer->next = timer->prev = timer;
+	}
+}
+
+static spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+void add_timer(struct timer_list *timer)
 {
 	unsigned long flags;
-	struct timer_list *p;
 
+	spin_lock_irqsave(&timerlist_lock, flags);
 #if SLOW_BUT_DEBUGGING_TIMERS
-	if (timer->next || timer->prev) {
-		printk("add_timer() called with non-zero list from %p\n",
-			__builtin_return_address(0));
-		return;
-	}
+        if (timer->next || timer->prev) {
+                printk("add_timer() called with non-zero list from %p\n",
+		       __builtin_return_address(0));
+		goto out;
+        }
 #endif
-	p = &timer_head;
-	save_flags(flags);
-	cli();
-	do {
-		p = p->next;
-	} while (timer->expires > p->expires);
-	timer->next = p;
-	timer->prev = p->prev;
-	p->prev = timer;
-	timer->prev->next = timer;
-	restore_flags(flags);
+	internal_add_timer(timer);
+#if SLOW_BUT_DEBUGGING_TIMERS
+out:
+#endif
+	spin_unlock_irqrestore(&timerlist_lock, flags);
 }
 
-int del_timer(struct timer_list * timer)
+static inline int detach_timer(struct timer_list *timer)
 {
 	int ret = 0;
-	if (timer->next) {
-		unsigned long flags;
-		struct timer_list * next;
-		save_flags(flags);
-		cli();
-		if ((next = timer->next) != NULL) {
-			(next->prev = timer->prev)->next = next;
-			timer->next = timer->prev = NULL;
-			ret = 1;
-		}
-		restore_flags(flags);
+	struct timer_list *next, *prev;
+	next = timer->next;
+	prev = timer->prev;
+	if (next) {
+		next->prev = prev;
+	}
+	if (prev) {
+		ret = 1;
+		prev->next = next;
 	}
 	return ret;
 }
 
-static inline void run_timer_list(void)
+
+int del_timer(struct timer_list * timer)
 {
-	struct timer_list * timer;
+	int ret;
+	unsigned long flags;
 
-	cli();
-	while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
-		void (*fn)(unsigned long) = timer->function;
-		unsigned long data = timer->data;
-		timer->next->prev = timer->prev;
-		timer->prev->next = timer->next;
-		timer->next = timer->prev = NULL;
-		sti();
-		fn(data);
-		cli();
+	spin_lock_irqsave(&timerlist_lock, flags);
+	ret = detach_timer(timer);
+	timer->next = timer->prev = 0;
+	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return ret;
+}
+
+static inline void cascade_timers(struct timer_vec *tv)
+{
+        /* cascade all the timers from tv up one level */
+        struct timer_list *timer;
+        timer = tv->vec[tv->index];
+        /*
+         * We are removing _all_ timers from the list, so we don't  have to
+         * detach them individually, just clear the list afterwards.
+         */
+        while (timer) {
+                struct timer_list *tmp = timer;
+                timer = timer->next;
+                internal_add_timer(tmp);
+        }
+        tv->vec[tv->index] = NULL;
+        tv->index = (tv->index + 1) & TVN_MASK;
+}
+
+static inline void run_timer_list(void)
+{
+	spin_lock_irq(&timerlist_lock);
+	while ((long)(jiffies - timer_jiffies) >= 0) {
+		struct timer_list *timer;
+		if (!tv1.index) {
+			int n = 1;
+			do {
+				cascade_timers(tvecs[n]);
+			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
+		}
+		while ((timer = tv1.vec[tv1.index])) {
+			void (*fn)(unsigned long) = timer->function;
+			unsigned long data = timer->data;
+			detach_timer(timer);
+			timer->next = timer->prev = NULL;
+			spin_unlock_irq(&timerlist_lock);
+			fn(data);
+			spin_lock_irq(&timerlist_lock);
+		}
+		++timer_jiffies; 
+		tv1.index = (tv1.index + 1) & TVR_MASK;
 	}
-	sti();
+	spin_unlock_irq(&timerlist_lock);
 }
 
+
 static inline void run_old_timers(void)
 {
 	struct timer_struct *tp;
@@ -690,6 +819,8 @@ static inline void run_old_timers(void)
 	}
 }
 
+spinlock_t tqueue_lock;
+
 void tqueue_bh(void)
 {
 	run_task_queue(&tq_timer);
@@ -974,7 +1105,7 @@ static inline void do_it_prof(struct task_struct * p, unsigned long ticks)
 	}
 }
 
-static __inline__ void update_one_process(struct task_struct *p,
+void update_one_process(struct task_struct *p,
 	unsigned long ticks, unsigned long user, unsigned long system)
 {
 	do_process_times(p, user, system);
@@ -984,6 +1115,9 @@ static __inline__ void update_one_process(struct task_struct *p,
 
 static void update_process_times(unsigned long ticks, unsigned long system)
 {
+/*
+ * SMP does this on a per-CPU basis elsewhere
+ */
 #ifndef  __SMP__
 	struct task_struct * p = current;
 	unsigned long user = ticks - system;
@@ -1000,79 +1134,35 @@ static void update_process_times(unsigned long ticks, unsigned long system)
 		kstat.cpu_system += system;
 	}
 	update_one_process(p, ticks, user, system);
-#else
-	int cpu,j;
-	cpu = smp_processor_id();
-	for (j=0;j<smp_num_cpus;j++)
-	{
-		int i = cpu_logical_map[j];
-		struct task_struct *p;
-		
-#ifdef __SMP_PROF__
-		if (test_bit(i,&smp_idle_map)) 
-			smp_idle_count[i]++;
-#endif
-		p = current_set[i];
-		/*
-		 * Do we have a real process?
-		 */
-		if (p->pid) {
-			/* assume user-mode process */
-			unsigned long utime = ticks;
-			unsigned long stime = 0;
-			if (cpu == i) {
-				utime = ticks-system;
-				stime = system;
-			} else if (smp_proc_in_lock[j]) {
-				utime = 0;
-				stime = ticks;
-			}
-			update_one_process(p, ticks, utime, stime);
-
-			if (p->priority < DEF_PRIORITY)
-				kstat.cpu_nice += utime;
-			else
-				kstat.cpu_user += utime;
-			kstat.cpu_system += stime;
-
-			p->counter -= ticks;
-			if (p->counter >= 0)
-				continue;
-			p->counter = 0;
-		} else {
-			/*
-			 * Idle processor found, do we have anything
-			 * we could run?
-			 */
-			if (!(0x7fffffff & smp_process_available))
-				continue;
-		}
-		/* Ok, we should reschedule, do the magic */
-		if (i==cpu)
-			need_resched = 1;
-		else
-			smp_message_pass(i, MSG_RESCHEDULE, 0L, 0);
-	}
 #endif
 }
 
-static unsigned long lost_ticks = 0;
+volatile unsigned long lost_ticks = 0;
 static unsigned long lost_ticks_system = 0;
 
 static inline void update_times(void)
 {
 	unsigned long ticks;
+	unsigned long flags;
+
+	save_flags(flags);
+	cli();
 
-	ticks = xchg(&lost_ticks, 0);
+	ticks = lost_ticks;
+	lost_ticks = 0;
 
 	if (ticks) {
 		unsigned long system;
-
 		system = xchg(&lost_ticks_system, 0);
+
 		calc_load(ticks);
 		update_wall_time(ticks);
+		restore_flags(flags);
+		
 		update_process_times(ticks, system);
-	}
+
+	} else
+		restore_flags(flags);
 }
 
 static void timer_bh(void)
@@ -1087,17 +1177,8 @@ void do_timer(struct pt_regs * regs)
 	(*(unsigned long *)&jiffies)++;
 	lost_ticks++;
 	mark_bh(TIMER_BH);
-	if (!user_mode(regs)) {
+	if (!user_mode(regs))
 		lost_ticks_system++;
-		if (prof_buffer && current->pid) {
-			extern int _stext;
-			unsigned long ip = instruction_pointer(regs);
-			ip -= (unsigned long) &_stext;
-			ip >>= prof_shift;
-			if (ip < prof_len)
-				prof_buffer[ip]++;
-		}
-	}
 	if (tq_timer)
 		mark_bh(TQUEUE_BH);
 }
@@ -1129,34 +1210,81 @@ asmlinkage unsigned int sys_alarm(unsigned int seconds)
  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
  * should be moved into arch/i386 instead?
  */
+ 
 asmlinkage int sys_getpid(void)
 {
+	/* This is SMP safe - current->pid doesnt change */
 	return current->pid;
 }
 
+/*
+ * This is not strictly SMP safe: p_opptr could change
+ * from under us. However, rather than getting any lock
+ * we can use an optimistic algorithm: get the parent
+ * pid, and go back and check that the parent is still
+ * the same. If it has changed (which is extremely unlikely
+ * indeed), we just try again..
+ *
+ * NOTE! This depends on the fact that even if we _do_
+ * get an old value of "parent", we can happily dereference
+ * the pointer: we just can't necessarily trust the result
+ * until we know that the parent pointer is valid.
+ *
+ * The "mb()" macro is a memory barrier - a synchronizing
+ * event. It also makes sure that gcc doesn't optimize
+ * away the necessary memory references.. The barrier doesn't
+ * have to have all that strong semantics: on x86 we don't
+ * really require a synchronizing instruction, for example.
+ * The barrier is more important for code generation than
+ * for any real memory ordering semantics (even if there is
+ * a small window for a race, using the old pointer is
+ * harmless for a while).
+ */
 asmlinkage int sys_getppid(void)
 {
-	return current->p_opptr->pid;
+	int pid;
+	struct task_struct * me = current;
+	struct task_struct * parent;
+
+	parent = me->p_opptr;
+	for (;;) {
+		pid = parent->pid;
+#if __SMP__
+{
+		struct task_struct *old = parent;
+		mb();
+		parent = me->p_opptr;
+		if (old != parent)
+			continue;
+}
+#endif
+		break;
+	}
+	return pid;
 }
 
 asmlinkage int sys_getuid(void)
 {
+	/* Only we change this so SMP safe */
 	return current->uid;
 }
 
 asmlinkage int sys_geteuid(void)
 {
+	/* Only we change this so SMP safe */
 	return current->euid;
 }
 
 asmlinkage int sys_getgid(void)
 {
+	/* Only we change this so SMP safe */
 	return current->gid;
 }
 
 asmlinkage int sys_getegid(void)
 {
-	return current->egid;
+	/* Only we change this so SMP safe */
+	return  current->egid;
 }
 
 /*
@@ -1164,11 +1292,18 @@ asmlinkage int sys_getegid(void)
  * moved into the arch dependent tree for those ports that require
  * it for backward compatibility?
  */
+
 asmlinkage int sys_nice(int increment)
 {
 	unsigned long newprio;
 	int increase = 0;
 
+	/*
+	 *	Setpriority might change our priority at the same moment.
+	 *	We don't have to worry. Conceptually one call occurs first
+	 *	and we have a single winner.
+	 */
+	 
 	newprio = increment;
 	if (increment < 0) {
 		if (!suser())
@@ -1176,6 +1311,7 @@ asmlinkage int sys_nice(int increment)
 		newprio = -increment;
 		increase = 1;
 	}
+
 	if (newprio > 40)
 		newprio = 40;
 	/*
@@ -1189,6 +1325,14 @@ asmlinkage int sys_nice(int increment)
 	increment = newprio;
 	if (increase)
 		increment = -increment;
+	/*
+	 *	Current->priority can change between this point
+	 *	and the assignment. We are assigning not doing add/subs
+	 *	so thats ok. Conceptually a process might just instantaneously
+	 *	read the value we stomp over. I don't think that is an issue
+	 *	unless posix makes it one. If so we can loop on changes
+	 *	to current->priority.
+	 */
 	newprio = current->priority - increment;
 	if ((signed) newprio < 1)
 		newprio = 1;
@@ -1206,13 +1350,15 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 
 	p = current;
 	if (pid) {
+		read_lock(&tasklist_lock);
 		for_each_task(p) {
 			if (p->pid == pid)
 				goto found;
 		}
 		p = NULL;
-	}
 found:
+		read_unlock(&tasklist_lock);
+	}
 	return p;
 }
 
@@ -1255,12 +1401,13 @@ static int setscheduler(pid_t pid, int policy,
 
 	p->policy = policy;
 	p->rt_priority = lp.sched_priority;
-	cli();
+	spin_lock(&scheduler_lock);
+	spin_lock_irq(&runqueue_lock);
 	if (p->next_run)
 		move_last_runqueue(p);
-	sti();
-	schedule();
-
+	spin_unlock_irq(&runqueue_lock);
+	spin_unlock(&scheduler_lock);
+	need_resched = 1;
 	return 0;
 }
 
@@ -1307,36 +1454,44 @@ asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
 
 asmlinkage int sys_sched_yield(void)
 {
-	cli();
+	spin_lock(&scheduler_lock);
+	spin_lock_irq(&runqueue_lock);
 	move_last_runqueue(current);
-	sti();
+	spin_unlock_irq(&runqueue_lock);
+	spin_unlock(&scheduler_lock);
+	need_resched = 1;
 	return 0;
 }
 
 asmlinkage int sys_sched_get_priority_max(int policy)
 {
+	int ret = -EINVAL;
+
 	switch (policy) {
-	      case SCHED_FIFO:
-	      case SCHED_RR:
-		return 99;
-	      case SCHED_OTHER:
-		return 0;
+	case SCHED_FIFO:
+	case SCHED_RR:
+		ret = 99;
+		break;
+	case SCHED_OTHER:
+		ret = 0;
+		break;
 	}
-
-	return -EINVAL;
+	return ret;
 }
 
 asmlinkage int sys_sched_get_priority_min(int policy)
 {
+	int ret = -EINVAL;
+
 	switch (policy) {
-	      case SCHED_FIFO:
-	      case SCHED_RR:
-		return 1;
-	      case SCHED_OTHER:
-		return 0;
+	case SCHED_FIFO:
+	case SCHED_RR:
+		ret = 1;
+		break;
+	case SCHED_OTHER:
+		ret = 0;
 	}
-
-	return -EINVAL;
+	return ret;
 }
 
 asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
@@ -1344,9 +1499,10 @@ asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
 	struct timespec t;
 
 	t.tv_sec = 0;
-	t.tv_nsec = 0;   /* <-- Linus, please fill correct value in here */
-	return -ENOSYS;  /* and then delete this line. Thanks!           */
-	return copy_to_user(interval, &t, sizeof(struct timespec)) ? -EFAULT : 0;
+	t.tv_nsec = 150000;
+	if (copy_to_user(interval, &t, sizeof(struct timespec)))
+		return -EFAULT;
+	return 0;
 }
 
 /*
@@ -1369,33 +1525,35 @@ static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)
 {
 	value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
 	value->tv_sec = jiffies / HZ;
-	return;
 }
 
 asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 {
-	int error;
 	struct timespec t;
 	unsigned long expire;
 
-	error = copy_from_user(&t, rqtp, sizeof(struct timespec));
-	if (error)
-		return -EFAULT;	
+	if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
+		return -EFAULT;
 
 	if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
 		return -EINVAL;
 
+
 	if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
-	    current->policy != SCHED_OTHER) {
+	    current->policy != SCHED_OTHER)
+	{
 		/*
 		 * Short delay requests up to 2 ms will be handled with
 		 * high precision by a busy wait for all real-time processes.
+		 *
+		 * Its important on SMP not to do this holding locks.
 		 */
 		udelay((t.tv_nsec + 999) / 1000);
 		return 0;
 	}
 
 	expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;
+
 	current->timeout = expire;
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
@@ -1405,11 +1563,10 @@ asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
 			jiffiestotimespec(expire - jiffies -
 					  (expire > jiffies + 1), &t);
 			if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
-				return -EFAULT;	
+				return -EFAULT;
 		}
 		return -EINTR;
 	}
-
 	return 0;
 }
 
@@ -1478,7 +1635,7 @@ void sched_init(void)
 	 *	process right in SMP mode.
 	 */
 	int cpu=smp_processor_id();
-#ifndef __SMP__	
+#ifndef __SMP__
 	current_set[cpu]=&init_task;
 #else
 	init_task.processor=cpu;
diff --git a/kernel/signal.c b/kernel/signal.c
index 325663bed..3203ad39c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -12,6 +12,8 @@
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
 #include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 
@@ -19,61 +21,70 @@
 
 #define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
 
-#if !defined(__alpha__) && !defined(__mips__)
+#ifndef __alpha__
 
 /*
  * This call isn't used by all ports, in particular, the Alpha
  * uses osf_sigprocmask instead.  Maybe it should be moved into
  * arch-dependent dir?
+ *
+ * We don't need to get the kernel lock - this is all local to this
+ * particular thread.. (and that's good, because this is _heavily_
+ * used by various programs)
+ *
+ * No SMP locking would prevent the inherent races present in this
+ * routine, thus we do not perform any locking at all.
  */
 asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset)
 {
-	sigset_t new_set, old_set = current->blocked;
-	int error;
+	sigset_t old_set = current->blocked;
 
 	if (set) {
-		error = get_user(new_set, set);
-		if (error)
-			return error;	
+		sigset_t new_set;
+
+		if(get_user(new_set, set))
+			return -EFAULT;
+
 		new_set &= _BLOCKABLE;
 		switch (how) {
+		default:
+			return -EINVAL;
 		case SIG_BLOCK:
-			current->blocked |= new_set;
+			new_set |= old_set;
 			break;
 		case SIG_UNBLOCK:
-			current->blocked &= ~new_set;
+			new_set = old_set & ~new_set;
 			break;
 		case SIG_SETMASK:
-			current->blocked = new_set;
 			break;
-		default:
-			return -EINVAL;
 		}
+		current->blocked = new_set;
 	}
 	if (oset) {
-		error = put_user(old_set, oset);
-		if (error)
-			return error;	
+		if(put_user(old_set, oset))
+			return -EFAULT;
 	}
 	return 0;
 }
-#endif
-
-#ifndef __alpha__
 
 /*
  * For backwards compatibility?  Functionality superseded by sigprocmask.
  */
 asmlinkage int sys_sgetmask(void)
 {
+	/* SMP safe */
 	return current->blocked;
 }
 
 asmlinkage int sys_ssetmask(int newmask)
 {
-	int old=current->blocked;
+	int old;
 
+	spin_lock_irq(&current->sigmask_lock);
+	old = current->blocked;
 	current->blocked = newmask & _BLOCKABLE;
+	spin_unlock_irq(&current->sigmask_lock);
+
 	return old;
 }
 
@@ -81,8 +92,13 @@ asmlinkage int sys_ssetmask(int newmask)
 
 asmlinkage int sys_sigpending(sigset_t *set)
 {
-	return put_user(current->blocked & current->signal,
-	                /* Hack */(unsigned long *)set);
+	int ret;
+
+	/* fill in "set" with signals pending but blocked. */
+	spin_lock_irq(&current->sigmask_lock);
+	ret = put_user(current->blocked & current->signal, set);
+	spin_unlock_irq(&current->sigmask_lock);
+	return ret;
 }
 
 /*
@@ -99,22 +115,24 @@ asmlinkage int sys_sigpending(sigset_t *set)
  * Note the silly behaviour of SIGCHLD: SIG_IGN means that the signal
  * isn't actually ignored, but does automatic child reaping, while
  * SIG_DFL is explicitly said by POSIX to force the signal to be ignored..
+ *
+ * All callers of check_pending must be holding current->sig->siglock.
  */
-static inline void check_pending(int signum)
+inline void check_pending(int signum)
 {
 	struct sigaction *p;
 
 	p = signum - 1 + current->sig->action;
+	spin_lock(&current->sigmask_lock);
 	if (p->sa_handler == SIG_IGN) {
-		k_sigdelset(&current->signal, signum);
-		return;
-	}
-	if (p->sa_handler == SIG_DFL) {
-		if (signum != SIGCONT && signum != SIGCHLD && signum != SIGWINCH)
-			return;
-		k_sigdelset(&current->signal, signum);
-		return;
+		current->signal &= ~_S(signum);
+	} else if (p->sa_handler == SIG_DFL) {
+		if (signum == SIGCONT ||
+		    signum == SIGCHLD ||
+		    signum != SIGWINCH)
+			current->signal &= ~_S(signum);
 	}	
+	spin_unlock(&current->sigmask_lock);
 }
 
 #if !defined(__alpha__) && !defined(__mips__)
@@ -123,69 +141,65 @@ static inline void check_pending(int signum)
  */
 asmlinkage unsigned long sys_signal(int signum, __sighandler_t handler)
 {
-	int err;
 	struct sigaction tmp;
 
-	/*
-	 * HACK: We still cannot handle signals > 32 due to the limited
-	 *       size of ksigset_t (which will go away).
-	 */
-	if (signum > 32)
-		return -EINVAL;
-	if (signum<1 || signum>_NSIG)
+	if (signum<1 || signum>32)
 		return -EINVAL;
 	if (signum==SIGKILL || signum==SIGSTOP)
 		return -EINVAL;
 	if (handler != SIG_DFL && handler != SIG_IGN) {
-		err = verify_area(VERIFY_READ, handler, 1);
-		if (err)
-			return err;
+		if(verify_area(VERIFY_READ, handler, 1))
+			return -EFAULT;
 	}
+
 	memset(&tmp, 0, sizeof(tmp));
 	tmp.sa_handler = handler;
 	tmp.sa_flags = SA_ONESHOT | SA_NOMASK;
+
+	spin_lock_irq(&current->sig->siglock);
 	handler = current->sig->action[signum-1].sa_handler;
 	current->sig->action[signum-1] = tmp;
 	check_pending(signum);
+	spin_unlock_irq(&current->sig->siglock);
+
 	return (unsigned long) handler;
 }
 #endif /* !defined(__alpha__) && !defined(__mips__) */
 
+#ifndef __sparc__
 asmlinkage int sys_sigaction(int signum, const struct sigaction * action,
 	struct sigaction * oldaction)
 {
 	struct sigaction new_sa, *p;
 
-	/*
-	 * HACK: We still cannot handle signals > 32 due to the limited
-	 *       size of ksigset_t (which will go away).
-	 */
-	if (signum > 32)
-		return -EINVAL;
-	if (signum<1 || signum>_NSIG)
+	if (signum < 1 || signum > 32)
 		return -EINVAL;
+
 	p = signum - 1 + current->sig->action;
+
 	if (action) {
-		int err = verify_area(VERIFY_READ, action, sizeof(*action));
-		if (err)
-			return err;
+		if (copy_from_user(&new_sa, action, sizeof(struct sigaction)))
+			return -EFAULT;
 		if (signum==SIGKILL || signum==SIGSTOP)
 			return -EINVAL;
-		if (copy_from_user(&new_sa, action, sizeof(struct sigaction)))
-			return -EFAULT;	
-		if (new_sa.sa_handler != SIG_DFL && new_sa.sa_handler != SIG_IGN) {
-			err = verify_area(VERIFY_READ, new_sa.sa_handler, 1);
-			if (err)
-				return err;
-		}
 	}
+
 	if (oldaction) {
+		/* In the clone() case we could copy half consistant
+		 * state to the user, however this could sleep and
+		 * deadlock us if we held the signal lock on SMP.  So for
+		 * now I take the easy way out and do no locking.
+		 */
 		if (copy_to_user(oldaction, p, sizeof(struct sigaction)))
 			return -EFAULT;
 	}
+
 	if (action) {
+		spin_lock_irq(&current->sig->siglock);
 		*p = new_sa;
 		check_pending(signum);
+		spin_unlock_irq(&current->sig->siglock);
 	}
 	return 0;
 }
+#endif
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 022b55355..6b9b41aa5 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -15,40 +15,60 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/bitops.h>
+#include <asm/atomic.h>
 
-unsigned long intr_count = 0;
+/* intr_count died a painless death... -DaveM */
 
 int bh_mask_count[32];
 unsigned long bh_active = 0;
 unsigned long bh_mask = 0;
 void (*bh_base[32])(void);
 
-
-asmlinkage void do_bottom_half(void)
+/*
+ * This needs to make sure that only one bottom half handler
+ * is ever active at a time. We do this without locking by
+ * doing an atomic increment on the intr_count, and checking
+ * (nonatomically) against 1. Only if it's 1 do we schedule
+ * the bottom half.
+ *
+ * Note that the non-atomicity of the test (as opposed to the
+ * actual update) means that the test may fail, and _nobody_
+ * runs the handlers if there is a race that makes multiple
+ * CPU's get here at the same time. That's ok, we'll run them
+ * next time around.
+ */
+static inline void run_bottom_halves(void)
 {
 	unsigned long active;
-	unsigned long mask, left;
 	void (**bh)(void);
 
-	sti();
+	active = get_active_bhs();
+	clear_active_bhs(active);
 	bh = bh_base;
-	active = bh_active & bh_mask;
-	for (mask = 1, left = ~0 ; left & active ; bh++,mask += mask,left += left) {
-		if (mask & active) {
-			void (*fn)(void);
-			bh_active &= ~mask;
-			fn = *bh;
-			if (!fn)
-				goto bad_bh;
-			fn();
+	do {
+		if (active & 1)
+			(*bh)();
+		bh++;
+		active >>= 1;
+	} while (active);
+}
+
+asmlinkage void do_bottom_half(void)
+{
+	int cpu = smp_processor_id();
+
+	if (hardirq_trylock(cpu)) {
+		if (softirq_trylock()) {
+			run_bottom_halves();
+			softirq_endlock();
 		}
+		hardirq_endlock(cpu);
 	}
-	return;
-bad_bh:
-	printk ("irq.c:bad bottom half entry %08lx\n", mask);
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index 8fcaba2de..934108fa8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -4,7 +4,6 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -21,9 +20,10 @@
 #include <linux/fcntl.h>
 #include <linux/acct.h>
 #include <linux/tty.h>
-#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
-#include <linux/apm_bios.h>
-#endif
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -31,8 +31,30 @@
 /*
  * this indicates whether you can reboot with ctrl-alt-del: the default is yes
  */
+
 int C_A_D = 1;
 
+
+/*
+ *	Notifier list for kernel code which wants to be called
+ *	at shutdown. This is used to stop any idling DMA operations
+ *	and the like. 
+ */
+
+struct notifier_block *reboot_notifier_list = NULL;
+
+int register_reboot_notifier(struct notifier_block * nb)
+{
+	return notifier_chain_register(&reboot_notifier_list, nb);
+}
+
+int unregister_reboot_notifier(struct notifier_block * nb)
+{
+	return notifier_chain_unregister(&reboot_notifier_list, nb);
+}
+
+
+
 extern void adjust_clock(void);
 
 asmlinkage int sys_ni_syscall(void)
@@ -65,13 +87,14 @@ static int proc_sel(struct task_struct *p, int which, int who)
 asmlinkage int sys_setpriority(int which, int who, int niceval)
 {
 	struct task_struct *p;
-	int error = ESRCH;
 	unsigned int priority;
+	int error;
 
 	if (which > 2 || which < 0)
 		return -EINVAL;
 
 	/* normalize: avoid signed division (rounding problems) */
+	error = ESRCH;
 	priority = niceval;
 	if (niceval < 0)
 		priority = -niceval;
@@ -85,6 +108,7 @@ asmlinkage int sys_setpriority(int which, int who, int niceval)
 			priority = 1;
 	}
 
+	read_lock(&tasklist_lock);
 	for_each_task(p) {
 		if (!proc_sel(p, which, who))
 			continue;
@@ -100,6 +124,8 @@ asmlinkage int sys_setpriority(int which, int who, int niceval)
 		else
 			p->priority = priority;
 	}
+	read_unlock(&tasklist_lock);
+
 	return -error;
 }
 
@@ -116,12 +142,14 @@ asmlinkage int sys_getpriority(int which, int who)
 	if (which > 2 || which < 0)
 		return -EINVAL;
 
+	read_lock(&tasklist_lock);
 	for_each_task (p) {
 		if (!proc_sel(p, which, who))
 			continue;
 		if (p->priority > max_prio)
 			max_prio = p->priority;
 	}
+	read_unlock(&tasklist_lock);
 
 	/* scale the priority from timeslice to 0..40 */
 	if (max_prio > 0)
@@ -169,7 +197,7 @@ asmlinkage int sys_prof(void)
 
 #endif
 
-extern asmlinkage sys_kill(int, int);
+extern asmlinkage int sys_kill(int, int);
 
 /*
  * Reboot system call: for obvious reasons only root may call it,
@@ -178,29 +206,70 @@ extern asmlinkage sys_kill(int, int);
  * You can also set the meaning of the ctrl-alt-del-key here.
  *
  * reboot doesn't sync: do that yourself before calling this.
+ *
  */
-asmlinkage int sys_reboot(int magic, int magic_too, int flag)
+asmlinkage int sys_reboot(int magic1, int magic2, int cmd, void * arg)
 {
+	char buffer[256];
+
+	/* We only trust the superuser with rebooting the system. */
 	if (!suser())
 		return -EPERM;
-	if (magic != 0xfee1dead || magic_too != 672274793)
+
+	/* For safety, we require "magic" arguments. */
+	if (magic1 != LINUX_REBOOT_MAGIC1 ||
+	    (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A))
 		return -EINVAL;
-	if (flag == 0x01234567)
-		hard_reset_now();
-	else if (flag == 0x89ABCDEF)
+
+	lock_kernel();
+	switch (cmd) {
+	case LINUX_REBOOT_CMD_RESTART:
+		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
+		printk(KERN_EMERG "Restarting system.\n");
+		machine_restart(NULL);
+		break;
+
+	case LINUX_REBOOT_CMD_CAD_ON:
 		C_A_D = 1;
-	else if (!flag)
+		break;
+
+	case LINUX_REBOOT_CMD_CAD_OFF:
 		C_A_D = 0;
-	else if (flag == 0xCDEF0123) {
-		printk(KERN_EMERG "System halted\n");
-		sys_kill(-1, SIGKILL);
-#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
-		apm_set_power_state(APM_STATE_OFF);
-#endif
+		break;
+
+	case LINUX_REBOOT_CMD_HALT:
+		notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
+		printk(KERN_EMERG "System halted.\n");
+		machine_halt();
 		do_exit(0);
-	} else
+		break;
+
+	case LINUX_REBOOT_CMD_POWER_OFF:
+		notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
+		printk(KERN_EMERG "Power down.\n");
+		machine_power_off();
+		do_exit(0);
+		break;
+
+	case LINUX_REBOOT_CMD_RESTART2:
+		if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
+			unlock_kernel();
+			return -EFAULT;
+		}
+		buffer[sizeof(buffer) - 1] = '\0';
+
+		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
+		printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
+		machine_restart(buffer);
+		break;
+
+	default:
+		unlock_kernel();
 		return -EINVAL;
-	return (0);
+		break;
+	};
+	unlock_kernel();
+	return 0;
 }
 
 /*
@@ -210,9 +279,10 @@ asmlinkage int sys_reboot(int magic, int magic_too, int flag)
  */
 void ctrl_alt_del(void)
 {
-	if (C_A_D)
-		hard_reset_now();
-	else
+	if (C_A_D) {
+		notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
+		machine_restart(NULL);
+	} else
 		kill_proc(1, SIGINT, 1);
 }
 	
@@ -231,6 +301,9 @@ void ctrl_alt_del(void)
  * The general idea is that a program which uses just setregid() will be
  * 100% compatible with BSD.  A program which uses just setgid() will be
  * 100% compatible with POSIX w/ Saved ID's. 
+ *
+ * SMP: There are not races, the gid's are checked only by filesystem
+ *      operations (as far as semantic preservation is concerned).
  */
 asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
 {
@@ -243,7 +316,7 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
 		    suser())
 			current->gid = rgid;
 		else
-			return(-EPERM);
+			return -EPERM;
 	}
 	if (egid != (gid_t) -1) {
 		if ((old_rgid == egid) ||
@@ -253,7 +326,7 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
 			current->fsgid = current->egid = egid;
 		else {
 			current->gid = old_rgid;
-			return(-EPERM);
+			return -EPERM;
 		}
 	}
 	if (rgid != (gid_t) -1 ||
@@ -267,6 +340,8 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
 
 /*
  * setgid() is implemented like SysV w/ SAVED_IDS 
+ *
+ * SMP: Same implicit races as above.
  */
 asmlinkage int sys_setgid(gid_t gid)
 {
@@ -278,6 +353,7 @@ asmlinkage int sys_setgid(gid_t gid)
 		current->egid = current->fsgid = gid;
 	else
 		return -EPERM;
+
 	if (current->egid != old_egid)
 		current->dumpable = 0;
 	return 0;
@@ -329,66 +405,70 @@ int acct_process(long exitcode)
 
 asmlinkage int sys_acct(const char *name)
 {
-   struct inode *inode = (struct inode *)0;
-   char *tmp;
-   int error;
-
-   if (!suser())
-      return -EPERM;
-
-   if (name == (char *)0) {
-      if (acct_active) {
-         if (acct_file.f_op->release)
-            acct_file.f_op->release(acct_file.f_inode, &acct_file);
-
-         if (acct_file.f_inode != (struct inode *) 0)
-            iput(acct_file.f_inode);
-
-         acct_active = 0;
-      }
-      return 0;
-   } else {
-      if (!acct_active) {
-
-         if ((error = getname(name, &tmp)) != 0)
-            return (error);
-
-         error = open_namei(tmp, O_RDWR, 0600, &inode, 0);
-         putname(tmp);
-
-         if (error)
-            return (error);
-
-         if (!S_ISREG(inode->i_mode)) {
-            iput(inode);
-            return -EACCES;
-         }
-
-         if (!inode->i_op || !inode->i_op->default_file_ops || 
-             !inode->i_op->default_file_ops->write) {
-            iput(inode);
-            return -EIO;
-         }
-
-         acct_file.f_mode = 3;
-         acct_file.f_flags = 0;
-         acct_file.f_count = 1;
-         acct_file.f_inode = inode;
-         acct_file.f_pos = inode->i_size;
-         acct_file.f_reada = 0;
-         acct_file.f_op = inode->i_op->default_file_ops;
-
-         if (acct_file.f_op->open)
-            if (acct_file.f_op->open(acct_file.f_inode, &acct_file)) {
-               iput(inode);
-               return -EIO;
-            }
-
-         acct_active = 1;
-         return 0;
-      } else
-         return -EBUSY;
-   }
+	struct inode *inode = (struct inode *)0;
+	char *tmp;
+	int error = -EPERM;
+
+	lock_kernel();
+	if (!suser())
+		goto out;
+
+	if (name == (char *)0) {
+		if (acct_active) {
+			if (acct_file.f_op->release)
+				acct_file.f_op->release(acct_file.f_inode, &acct_file);
+
+			if (acct_file.f_inode != (struct inode *) 0)
+				iput(acct_file.f_inode);
+
+			acct_active = 0;
+		}
+		error = 0;
+	} else {
+		error = -EBUSY;
+		if (!acct_active) {
+			if ((error = getname(name, &tmp)) != 0)
+				goto out;
+
+			error = open_namei(tmp, O_RDWR, 0600, &inode, 0);
+			putname(tmp);
+			if (error)
+				goto out;
+
+			error = -EACCES;
+			if (!S_ISREG(inode->i_mode)) {
+				iput(inode);
+				goto out;
+			}
+
+			error = -EIO;
+			if (!inode->i_op || !inode->i_op->default_file_ops || 
+			    !inode->i_op->default_file_ops->write) {
+				iput(inode);
+				goto out;
+			}
+
+			acct_file.f_mode = 3;
+			acct_file.f_flags = 0;
+			acct_file.f_count = 1;
+			acct_file.f_inode = inode;
+			acct_file.f_pos = inode->i_size;
+			acct_file.f_reada = 0;
+			acct_file.f_op = inode->i_op->default_file_ops;
+
+			if(acct_file.f_op->open)
+				if(acct_file.f_op->open(acct_file.f_inode, &acct_file)) {
+					iput(inode);
+					goto out;
+				}
+
+			acct_active = 1;
+			error = 0;
+		}
+	}
+out:
+	unlock_kernel();
+	return error;
 }
 
 #ifndef __alpha__
@@ -443,16 +523,18 @@ asmlinkage int sys_old_syscall(void)
  */
 asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
 {
-	int old_ruid = current->uid;
-	int old_euid = current->euid;
+	int old_ruid;
+	int old_euid;
 
+	old_ruid = current->uid;
+	old_euid = current->euid;
 	if (ruid != (uid_t) -1) {
 		if ((old_ruid == ruid) || 
 		    (current->euid==ruid) ||
 		    suser())
 			current->uid = ruid;
 		else
-			return(-EPERM);
+			return -EPERM;
 	}
 	if (euid != (uid_t) -1) {
 		if ((old_ruid == euid) ||
@@ -462,7 +544,7 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
 			current->fsuid = current->euid = euid;
 		else {
 			current->uid = old_ruid;
-			return(-EPERM);
+			return -EPERM;
 		}
 	}
 	if (ruid != (uid_t) -1 ||
@@ -495,9 +577,10 @@ asmlinkage int sys_setuid(uid_t uid)
 		current->fsuid = current->euid = uid;
 	else
 		return -EPERM;
+
 	if (current->euid != old_euid)
 		current->dumpable = 0;
-	return(0);
+	return 0;
 }
 
 
@@ -538,6 +621,7 @@ asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
 	if (!(retval = put_user(current->uid, ruid)) &&
 	    !(retval = put_user(current->euid, euid)))
 		retval = put_user(current->suid, suid);
+
 	return retval;
 }
 
@@ -550,13 +634,15 @@ asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
  */
 asmlinkage int sys_setfsuid(uid_t uid)
 {
-	int old_fsuid = current->fsuid;
+	int old_fsuid;
 
+	old_fsuid = current->fsuid;
 	if (uid == current->uid || uid == current->euid ||
 	    uid == current->suid || uid == current->fsuid || suser())
 		current->fsuid = uid;
 	if (current->fsuid != old_fsuid)
 		current->dumpable = 0;
+
 	return old_fsuid;
 }
 
@@ -565,29 +651,35 @@ asmlinkage int sys_setfsuid(uid_t uid)
  */
 asmlinkage int sys_setfsgid(gid_t gid)
 {
-	int old_fsgid = current->fsgid;
+	int old_fsgid;
 
+	old_fsgid = current->fsgid;
 	if (gid == current->gid || gid == current->egid ||
 	    gid == current->sgid || gid == current->fsgid || suser())
 		current->fsgid = gid;
 	if (current->fsgid != old_fsgid)
 		current->dumpable = 0;
+
 	return old_fsgid;
 }
 
 asmlinkage long sys_times(struct tms * tbuf)
 {
-	int error;
-	if (tbuf) {
-		error = put_user(current->utime,&tbuf->tms_utime);
-		if (!error)
-			error = put_user(current->stime,&tbuf->tms_stime);
-		if (!error)
-			error = put_user(current->cutime,&tbuf->tms_cutime);
-		if (!error)
-			error =	put_user(current->cstime,&tbuf->tms_cstime);
-		if (error)
-			return error;	
+	/*
+	 *	In the SMP world we might just be unlucky and have one of
+	 *	the times increment as we use it. Since the value is an
+	 *	atomically safe type this is just fine. Conceptually its
+	 *	as if the syscall took an instant longer to occur.
+	 */
+	if (tbuf) 
+	{
+		/* ?? use copy_to_user() */
+		if(!access_ok(VERIFY_READ, tbuf, sizeof(struct tms)) ||
+		   __put_user(current->utime,&tbuf->tms_utime)||
+		   __put_user(current->stime,&tbuf->tms_stime) ||
+		   __put_user(current->cutime,&tbuf->tms_cutime) ||
+		   __put_user(current->cstime,&tbuf->tms_cstime))
+			return -EFAULT;
 	}
 	return jiffies;
 }
@@ -604,9 +696,11 @@ asmlinkage long sys_times(struct tms * tbuf)
  * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
  * LBT 04.03.94
  */
+
 asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
 {
 	struct task_struct * p;
+	int err = -EINVAL;
 
 	if (!pid)
 		pid = current->pid;
@@ -614,82 +708,123 @@ asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
 		pgid = pid;
 	if (pgid < 0)
 		return -EINVAL;
+
+	read_lock(&tasklist_lock);
 	for_each_task(p) {
-		if (p->pid == pid)
+		if (p->pid == pid) {
+			/* NOTE: I haven't dropped tasklist_lock, this is
+			 *       on purpose. -DaveM
+			 */
 			goto found_task;
+		}
 	}
+	read_unlock(&tasklist_lock);
 	return -ESRCH;
 
 found_task:
+	/* From this point forward we keep holding onto the tasklist lock
+	 * so that our parent does not change from under us. -DaveM
+	 */
+	err = -ESRCH;
 	if (p->p_pptr == current || p->p_opptr == current) {
+		err = -EPERM;
 		if (p->session != current->session)
-			return -EPERM;
+			goto out;
+		err = -EACCES;
 		if (p->did_exec)
-			return -EACCES;
+			goto out;
 	} else if (p != current)
-		return -ESRCH;
+		goto out;
+	err = -EPERM;
 	if (p->leader)
-		return -EPERM;
+		goto out;
 	if (pgid != pid) {
 		struct task_struct * tmp;
 		for_each_task (tmp) {
 			if (tmp->pgrp == pgid &&
-			 tmp->session == current->session)
+			    tmp->session == current->session)
 				goto ok_pgid;
 		}
-		return -EPERM;
+		goto out;
 	}
 
 ok_pgid:
 	p->pgrp = pgid;
-	return 0;
+	err = 0;
+out:
+	/* All paths lead to here, thus we are safe. -DaveM */
+	read_unlock(&tasklist_lock);
+	return err;
 }
 
 asmlinkage int sys_getpgid(pid_t pid)
 {
-	struct task_struct * p;
-
-	if (!pid)
+	if (!pid) {
 		return current->pgrp;
-	for_each_task(p) {
-		if (p->pid == pid)
-			return p->pgrp;
+	} else {
+		struct task_struct *p;
+		int ret = -ESRCH;
+
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->pid == pid) {
+				ret = p->pgrp;
+				break;
+			}
+		}
+		read_unlock(&tasklist_lock);
+		return ret;
 	}
-	return -ESRCH;
 }
 
 asmlinkage int sys_getpgrp(void)
 {
+	/* SMP - assuming writes are word atomic this is fine */
 	return current->pgrp;
 }
 
 asmlinkage int sys_getsid(pid_t pid)
 {
 	struct task_struct * p;
-
-	if (!pid)
-		return current->session;
-	for_each_task(p) {
-		if (p->pid == pid)
-			return p->session;
+	int ret;
+
+	/* SMP: The 'self' case requires no lock */
+	if (!pid) {
+		ret = current->session;
+	} else {
+		ret = -ESRCH;
+
+		read_lock(&tasklist_lock);
+		for_each_task(p) {
+			if (p->pid == pid) {
+				ret = p->session;
+				break;
+			}
+		}
+		read_unlock(&tasklist_lock);
 	}
-	return -ESRCH;
+	return ret;
 }
 
 asmlinkage int sys_setsid(void)
 {
 	struct task_struct * p;
+	int err = -EPERM;
 
+	read_lock(&tasklist_lock);
 	for_each_task(p) {
 		if (p->pgrp == current->pid)
-		        return -EPERM;
+		        goto out;
 	}
 
 	current->leader = 1;
 	current->session = current->pgrp = current->pid;
 	current->tty = NULL;
 	current->tty_old_pgrp = 0;
-	return current->pgrp;
+	err = current->pgrp;
+out:
+	read_unlock(&tasklist_lock);
+	return err;
 }
 
 /*
@@ -698,6 +833,11 @@ asmlinkage int sys_setsid(void)
 asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
 {
 	int i;
+	
+	/*
+	 *	SMP: Nobody else can change our grouplist. Thus we are
+	 *	safe.
+	 */
 
 	if (gidsetsize < 0)
 		return -EINVAL;
@@ -711,21 +851,21 @@ asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
 	return i;
 }
 
+/*
+ *	SMP: Our groups are not shared. We can copy to/from them safely
+ *	without another task interfering.
+ */
+ 
 asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist)
 {
-	int	err;
-
 	if (!suser())
 		return -EPERM;
 	if ((unsigned) gidsetsize > NGROUPS)
 		return -EINVAL;
-	err = copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t));
-	if (err) {
-		gidsetsize = 0;
-		err = -EFAULT;
-	} 
+	if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
+		return -EFAULT;
 	current->ngroups = gidsetsize;
-	return err;
+	return 0;
 }
 
 int in_group_p(gid_t grp)
@@ -762,53 +902,49 @@ asmlinkage int sys_newuname(struct new_utsname * name)
  * Move these to arch dependent dir since they are for
  * backward compatibility only?
  */
+
+#ifndef __sparc__
 asmlinkage int sys_uname(struct old_utsname * name)
 {
-	int error = -EFAULT;;
 	if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
-		error = 0;
-	return error;
+		return 0;
+	return -EFAULT;
 }
+#endif
 
 asmlinkage int sys_olduname(struct oldold_utsname * name)
 {
 	int error;
+
 	if (!name)
 		return -EFAULT;
-	error = copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-	if (!error)
-		error = put_user(0,name->sysname+__OLD_UTS_LEN);
-	if (!error)
-		error = copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-	if (!error)
-		error = put_user(0,name->nodename+__OLD_UTS_LEN);
-	if (!error)
-		error = copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-	if (!error)
-		error = put_user(0,name->release+__OLD_UTS_LEN);
-	if (!error)
-		error = copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-	if (!error)
-		error = put_user(0,name->version+__OLD_UTS_LEN);
-	if (!error)
-		error = copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
-	if (!error)
-		error = put_user(0,name->machine+__OLD_UTS_LEN);
-	return error ? -EFAULT : 0;
+	if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+		return -EFAULT;
+  
+	error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+	error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
+	error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+	error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
+	error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+	error -= __put_user(0,name->release+__OLD_UTS_LEN);
+	error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+	error -= __put_user(0,name->version+__OLD_UTS_LEN);
+	error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+	error = __put_user(0,name->machine+__OLD_UTS_LEN);
+	error = error ? -EFAULT : 0;
+
+	return error;
 }
 
 #endif
 
 asmlinkage int sys_sethostname(char *name, int len)
 {
-	int error;
-
 	if (!suser())
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
-	error = copy_from_user(system_utsname.nodename, name, len);
-	if (error)
+	if(copy_from_user(system_utsname.nodename, name, len))
 		return -EFAULT;
 	system_utsname.nodename[len] = 0;
 	return 0;
@@ -820,7 +956,7 @@ asmlinkage int sys_gethostname(char *name, int len)
 
 	if (len < 0)
 		return -EINVAL;
-	i = 1+strlen(system_utsname.nodename);
+	i = 1 + strlen(system_utsname.nodename);
 	if (i > len)
 		i = len;
 	return copy_to_user(name, system_utsname.nodename, i) ? -EFAULT : 0;
@@ -832,14 +968,11 @@ asmlinkage int sys_gethostname(char *name, int len)
  */
 asmlinkage int sys_setdomainname(char *name, int len)
 {
-	int error;
-	
 	if (!suser())
 		return -EPERM;
 	if (len < 0 || len > __NEW_UTS_LEN)
 		return -EINVAL;
-	error = copy_from_user(system_utsname.domainname, name, len);
-	if (error)
+	if(copy_from_user(system_utsname.domainname, name, len))
 		return -EFAULT;
 	system_utsname.domainname[len] = 0;
 	return 0;
@@ -849,20 +982,19 @@ asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim)
 {
 	if (resource >= RLIM_NLIMITS)
 		return -EINVAL;
-	return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim)) 
-			? -EFAULT : 0 ;
+	else
+		return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
+			? -EFAULT : 0;
 }
 
 asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
 {
 	struct rlimit new_rlim, *old_rlim;
-	int err;
 
 	if (resource >= RLIM_NLIMITS)
 		return -EINVAL;
-	err = copy_from_user(&new_rlim, rlim, sizeof(*rlim));
-	if (err)
-		return -EFAULT;	
+	if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+		return -EFAULT;
 	old_rlim = current->rlim + resource;
 	if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
 	     (new_rlim.rlim_max > old_rlim->rlim_max)) &&
@@ -883,6 +1015,13 @@ asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
  * make sense to do this.  It will make moving the rest of the information
  * a lot simpler!  (Which we're not doing right now because we're not
  * measuring them yet).
+ *
+ * This is SMP safe.  Either we are called from sys_getrusage on ourselves
+ * below (we know we aren't going to exit/disappear and only we change our
+ * rusage counters), or we are called from wait4() on a process which is
+ * either stopped or zombied.  In the zombied case the task won't get
+ * reaped till shortly after the call to getrusage(), in both cases the
+ * task being examined is in a frozen state so the counters won't change.
  */
 int getrusage(struct task_struct *p, int who, struct rusage *ru)
 {
@@ -930,8 +1069,6 @@ asmlinkage int sys_getrusage(int who, struct rusage *ru)
 
 asmlinkage int sys_umask(int mask)
 {
-	int old = current->fs->umask;
-
-	current->fs->umask = mask & S_IRWXUGO;
-	return (old);
+	mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
+	return mask;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3d0fbf49b..9e0bb0fd8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -7,6 +7,8 @@
  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
  * Dynamic registration fixes, Stephen Tweedie.
+ * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
+ * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris Horn.
  */
 
 #include <linux/config.h>
@@ -20,28 +22,49 @@
 #include <linux/ctype.h>
 #include <linux/utsname.h>
 #include <linux/swapctl.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
 
 #include <asm/bitops.h>
 #include <asm/uaccess.h>
 
+#ifdef CONFIG_ROOT_NFS
+#include <linux/nfs_fs.h>
+#endif
+
+#ifdef CONFIG_SYSCTL
+
 /* External variables not in a header file. */
 extern int panic_timeout;
+extern int console_loglevel, default_message_loglevel;
+extern int minimum_console_loglevel, default_console_loglevel;
+extern int C_A_D, swapout_interval;
+extern int bdf_prm[], bdflush_min[], bdflush_max[];
+extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];
+extern int sysctl_overcommit_memory;
 
-
-#ifdef CONFIG_ROOT_NFS
-#include <linux/nfs_fs.h>
+#ifdef __sparc__
+extern char reboot_command [];
 #endif
 
+static int parse_table(int *, int, void *, size_t *, void *, size_t,
+		       ctl_table *, void **);
+static int do_securelevel_strategy (ctl_table *, int *, int, void *, size_t *,
+				    void *, size_t, void **);
+
+
 static ctl_table root_table[];
 static struct ctl_table_header root_table_header = 
 	{root_table, DNODE_SINGLE(&root_table_header)};
 
-static int parse_table(int *, int, void *, size_t *, void *, size_t,
-		       ctl_table *, void **);
-
 static ctl_table kern_table[];
 static ctl_table vm_table[];
 extern ctl_table net_table[];
+static ctl_table proc_table[];
+static ctl_table fs_table[];
+static ctl_table debug_table[];
+static ctl_table dev_table[];
+
 
 /* /proc declarations: */
 
@@ -59,7 +82,7 @@ struct file_operations proc_sys_file_operations =
 	proc_readsys,	/* read	   */
 	proc_writesys,	/* write   */
 	NULL,		/* readdir */
-	NULL,		/* select  */
+	NULL,		/* poll    */
 	NULL,		/* ioctl   */
 	NULL,		/* mmap	   */
 	NULL,		/* no special open code	   */
@@ -94,19 +117,16 @@ static void register_proc_table(ctl_table *, struct proc_dir_entry *);
 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
 #endif
 
-extern int bdf_prm[], bdflush_min[], bdflush_max[];
-
-static int do_securelevel_strategy (ctl_table *, int *, int, void *, size_t *,
-				    void *, size_t, void **);
-
-extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];
-
 /* The default sysctl tables: */
 
 static ctl_table root_table[] = {
 	{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
 	{CTL_VM, "vm", NULL, 0, 0555, vm_table},
 	{CTL_NET, "net", NULL, 0, 0555, net_table},
+	{CTL_PROC, "proc", NULL, 0, 0555, proc_table},
+	{CTL_FS, "fs", NULL, 0, 0555, fs_table},
+	{CTL_DEBUG, "debug", NULL, 0, 0555, debug_table},
+        {CTL_DEV, "dev", NULL, 0, 0555, dev_table},
 	{0}
 };
 
@@ -140,7 +160,7 @@ static ctl_table kern_table[] = {
 #ifdef CONFIG_ROOT_NFS
 	{KERN_NFSRNAME, "nfs-root-name", nfs_root_name, NFS_ROOT_NAME_LEN,
 	 0644, NULL, &proc_dostring, &sysctl_string },
-	{KERN_NFSRNAME, "nfs-root-addrs", nfs_root_addrs, NFS_ROOT_ADDRS_LEN,
+	{KERN_NFSRADDRS, "nfs-root-addrs", nfs_root_addrs, NFS_ROOT_ADDRS_LEN,
 	 0644, NULL, &proc_dostring, &sysctl_string },
 #endif
 #ifdef CONFIG_BINFMT_JAVA
@@ -149,22 +169,47 @@ static ctl_table kern_table[] = {
 	{KERN_JAVA_APPLETVIEWER, "java-appletviewer", binfmt_java_appletviewer,
 	 64, 0644, NULL, &proc_dostring, &sysctl_string },
 #endif
+#ifdef __sparc__
+	{KERN_SPARC_REBOOT, "reboot-cmd", reboot_command,
+	 256, 0644, NULL, &proc_dostring, &sysctl_string },
+#endif
+	{KERN_CTLALTDEL, "ctrl-alt-del", &C_A_D, sizeof(int),
+	 0644, NULL, &proc_dointvec},
+	{KERN_PRINTK, "printk", &console_loglevel, 4*sizeof(int),
+	 0644, NULL, &proc_dointvec},
 	{0}
 };
 
 static ctl_table vm_table[] = {
 	{VM_SWAPCTL, "swapctl", 
 	 &swap_control, sizeof(swap_control_t), 0600, NULL, &proc_dointvec},
-	{VM_KSWAPD, "kswapd", 
-	 &kswapd_ctl, sizeof(kswapd_ctl), 0600, NULL, &proc_dointvec},
 	{VM_FREEPG, "freepages", 
 	 &min_free_pages, 3*sizeof(int), 0600, NULL, &proc_dointvec},
 	{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL,
 	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
 	 &bdflush_min, &bdflush_max},
+	{VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory,
+	 sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec},
+	{0}
+};
+
+static ctl_table proc_table[] = {
+	{0}
+};
+
+static ctl_table fs_table[] = {
+	{0}
+};
+
+static ctl_table debug_table[] = {
 	{0}
 };
 
+static ctl_table dev_table[] = {
+	{0}
+};  
+
+
 void sysctl_init(void)
 {
 #ifdef CONFIG_PROC_FS
@@ -184,25 +229,17 @@ int do_sysctl (int *name, int nlen,
 	if (nlen == 0 || nlen >= CTL_MAXNAME)
 		return -ENOTDIR;
 	
-	error = verify_area(VERIFY_READ,name,nlen*sizeof(int));
-	if (error) return error;
-	if (oldval) {
+	if (oldval) 
+	{
 		int old_len;
 		if (!oldlenp)
 			return -EFAULT;
-		error = verify_area(VERIFY_WRITE,oldlenp,sizeof(size_t));
-		if (error) return error;
-		get_user(old_len, oldlenp);
-		error = verify_area(VERIFY_WRITE,oldval,old_len);
-		if (error) return error;
-	}
-	if (newval) {
-		error = verify_area(VERIFY_READ,newval,newlen);
-		if (error) return error;
+		if(get_user(old_len, oldlenp))
+			return -EFAULT;
 	}
 	tmp = &root_table_header;
 	do {
-		context = 0;
+		context = NULL;
 		error = parse_table(name, nlen, oldval, oldlenp, 
 				    newval, newlen, tmp->ctl_table, &context);
 		if (context)
@@ -218,12 +255,15 @@ extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
 {
 	struct __sysctl_args tmp;
 	int error;
-	error = verify_area(VERIFY_READ, args, sizeof(*args));
-	if (error)
-		return error;
-	copy_from_user(&tmp, args, sizeof(tmp));
-	return do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp, 
-			 tmp.newval, tmp.newlen);
+
+	if(copy_from_user(&tmp, args, sizeof(tmp)))
+		return -EFAULT;
+		
+	lock_kernel();
+	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
+			  tmp.newval, tmp.newlen);
+	unlock_kernel();
+	return error;
 }
 
 /* Like in_group_p, but testing against egid, not fsgid */
@@ -248,6 +288,7 @@ out:
 
 /* ctl_perm does NOT grant the superuser all rights automatically, because
    some sysctl variables are readonly even to root. */
+
 static int test_perm(int mode, int op)
 {
 	if (!current->euid)
@@ -258,6 +299,7 @@ static int test_perm(int mode, int op)
 		return 0;
 	return -EACCES;
 }
+
 static inline int ctl_perm(ctl_table *table, int op)
 {
 	return test_perm(table->mode, op);
@@ -275,7 +317,8 @@ repeat:
 
 	for ( ; table->ctl_name; table++) {
 		int n;
-		get_user(n,name);
+		if(get_user(n,name))
+			return -EFAULT;
 		if (n == table->ctl_name ||
 		    table->ctl_name == CTL_ANY) {
 			if (table->child) {
@@ -335,15 +378,18 @@ int do_sysctl_strategy (ctl_table *table,
 			if (len) {
 				if (len > table->maxlen)
 					len = table->maxlen;
-				copy_to_user(oldval, table->data, len);
-				put_user(len, oldlenp);
+				if(copy_to_user(oldval, table->data, len))
+					return -EFAULT;
+				if(put_user(len, oldlenp))
+					return -EFAULT;
 			}
 		}
 		if (newval && newlen) {
 			len = newlen;
 			if (len > table->maxlen)
 				len = table->maxlen;
-			copy_from_user(table->data, newval, len);
+			if(copy_from_user(table->data, newval, len))
+				return -EFAULT;
 		}
 	}
 	return 0;
@@ -364,7 +410,8 @@ static int do_securelevel_strategy (ctl_table *table,
 	if (newval && newlen) {
 		if (newlen != sizeof (int))
 			return -EINVAL;
-		copy_from_user (&level, newval, newlen);
+		if(copy_from_user (&level, newval, newlen))
+			return -EFAULT;
 		if (level < securelevel && current->pid != 1)
 			return -EPERM;
 	}
@@ -406,11 +453,12 @@ void unregister_sysctl_table(struct ctl_table_header * table)
 /* Scan the sysctl entries in table and add them all into /proc */
 static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
 {
-	struct proc_dir_entry *de, *tmp;
-	int exists;
+	struct proc_dir_entry *de;
+	int len;
+	mode_t mode;
 	
 	for (; table->ctl_name; table++) {
-		exists = 0;
+		de = 0;
 		/* Can't do anything without a proc name. */
 		if (!table->procname)
 			continue;
@@ -418,46 +466,32 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
 		if (!table->proc_handler &&
 		    !table->child)
 			continue;
-		
-		de = kmalloc(sizeof(*de), GFP_KERNEL);
-		if (!de) continue;
-		de->namelen = strlen(table->procname);
-		de->name = table->procname;
-		de->mode = table->mode;
-		de->nlink = 1;
-		de->uid = 0;
-		de->gid = 0;
-		de->size = 0;
-		de->get_info = 0;	/* For internal use if we want it */
-		de->fill_inode = 0;	/* To override struct inode fields */
-		de->next = de->subdir = 0;
-		de->data = (void *) table;
-		/* Is it a file? */
-		if (table->proc_handler) {
-			de->ops = &proc_sys_inode_operations;
-			de->mode |= S_IFREG;
-		}
-		/* Otherwise it's a subdir */
-		else  {
-			/* First check to see if it already exists */
-			for (tmp = root->subdir; tmp; tmp = tmp->next) {
-				if (tmp->namelen == de->namelen &&
-				    !memcmp(tmp->name,de->name,de->namelen)) {
-					exists = 1;
-					kfree (de);
-					de = tmp;
-				}
-			}
-			if (!exists) {
-				de->ops = &proc_dir_inode_operations;
-				de->nlink++;
-				de->mode |= S_IFDIR;
+
+		len = strlen(table->procname);
+		mode = table->mode;
+
+		if (table->proc_handler)
+			mode |= S_IFREG;
+		else {
+			mode |= S_IFDIR;
+			for (de = root->subdir; de; de = de->next) {
+				if (proc_match(len, table->procname, de))
+					break;
 			}
+			/* If the subdir exists already, de is non-NULL */
+		}
+
+		if (!de) {
+			de = create_proc_entry(table->procname, mode, root);
+			if (!de)
+				continue;
+			de->data = (void *) table;
+			if (table->proc_handler)
+				de->ops = &proc_sys_inode_operations;
+
 		}
 		table->de = de;
-		if (!exists)
-			proc_register_dynamic(root, de);
-		if (de->mode & S_IFDIR )
+		if (de->mode & S_IFDIR)
 			register_proc_table(table->child, de);
 	}
 }
@@ -494,10 +528,6 @@ static long do_rw_proc(int write, struct inode * inode, struct file * file,
 	size_t res;
 	long error;
 	
-	error = verify_area(write ? VERIFY_READ : VERIFY_WRITE, buf, count);
-	if (error)
-		return error;
-
 	de = (struct proc_dir_entry*) inode->u.generic_ip;
 	if (!de || !de->data)
 		return -ENOTDIR;
@@ -548,14 +578,16 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
 		len = 0;
 		p = buffer;
 		while (len < *lenp) {
-			get_user(c, p++);
+			if(get_user(c, p++))
+				return -EFAULT;
 			if (c == 0 || c == '\n')
 				break;
 			len++;
 		}
 		if (len >= table->maxlen)
 			len = table->maxlen-1;
-		copy_from_user(table->data, buffer, len);
+		if(copy_from_user(table->data, buffer, len))
+			return -EFAULT;
 		((char *) table->data)[len] = 0;
 		filp->f_pos += *lenp;
 	} else {
@@ -565,9 +597,11 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
 		if (len > *lenp)
 			len = *lenp;
 		if (len)
-			copy_to_user(buffer, table->data, len);
+			if(copy_to_user(buffer, table->data, len))
+				return -EFAULT;
 		if (len < *lenp) {
-			put_user('\n', ((char *) buffer) + len);
+			if(put_user('\n', ((char *) buffer) + len))
+				return -EFAULT;
 			len++;
 		}
 		*lenp = len;
@@ -597,7 +631,8 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp,
 		if (write) {
 			while (left) {
 				char c;
-				get_user(c,(char *) buffer);
+				if(get_user(c,(char *) buffer))
+					return -EFAULT;
 				if (!isspace(c))
 					break;
 				left--;
@@ -609,7 +644,8 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp,
 			len = left;
 			if (len > TMPBUFLEN-1)
 				len = TMPBUFLEN-1;
-			copy_from_user(buf, buffer, len);
+			if(copy_from_user(buf, buffer, len))
+				return -EFAULT;
 			buf[len] = 0;
 			p = buf;
 			if (*p == '-' && left > 1) {
@@ -635,21 +671,24 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp,
 			len = strlen(buf);
 			if (len > left)
 				len = left;
-			copy_to_user(buffer, buf, len);
+			if(copy_to_user(buffer, buf, len))
+				return -EFAULT;
 			left -= len;
 			buffer += len;
 		}
 	}
 
 	if (!write && !first && left) {
-		put_user('\n', (char *) buffer);
+		if(put_user('\n', (char *) buffer))
+			return -EFAULT;
 		left--, buffer++;
 	}
 	if (write) {
 		p = (char *) buffer;
 		while (left) {
 			char c;
-			get_user(c, p++);
+			if(get_user(c, p++))
+				return -EFAULT;
 			if (!isspace(c))
 				break;
 			left--;
@@ -685,7 +724,8 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
 		if (write) {
 			while (left) {
 				char c;
-				get_user(c, (char *) buffer);
+				if(get_user(c, (char *) buffer))
+					return -EFAULT;
 				if (!isspace(c))
 					break;
 				left--;
@@ -697,7 +737,8 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
 			len = left;
 			if (len > TMPBUFLEN-1)
 				len = TMPBUFLEN-1;
-			copy_from_user(buf, buffer, len);
+			if(copy_from_user(buf, buffer, len))
+				return -EFAULT;
 			buf[len] = 0;
 			p = buf;
 			if (*p == '-' && left > 1) {
@@ -728,21 +769,24 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
 			len = strlen(buf);
 			if (len > left)
 				len = left;
-			copy_to_user(buffer, buf, len);
+			if(copy_to_user(buffer, buf, len))
+				return -EFAULT;
 			left -= len;
 			buffer += len;
 		}
 	}
 
 	if (!write && !first && left) {
-		put_user('\n', (char *) buffer);
+		if(put_user('\n', (char *) buffer))
+			return -EFAULT;
 		left--, buffer++;
 	}
 	if (write) {
 		p = (char *) buffer;
 		while (left) {
 			char c;
-			get_user(c, p++);
+			if(get_user(c, p++))
+				return -EFAULT;
 			if (!isspace(c))
 				break;
 			left--;
@@ -793,22 +837,27 @@ int sysctl_string(ctl_table *table, int *name, int nlen,
 		return -ENOTDIR;
 	
 	if (oldval && oldlenp) {
-		get_user(len, oldlenp);
+		if(get_user(len, oldlenp))
+			return -EFAULT;
 		if (len) {
 			l = strlen(table->data);
 			if (len > l) len = l;
 			if (len >= table->maxlen)
 				len = table->maxlen;
-			copy_to_user(oldval, table->data, len);
-			put_user(0, ((char *) oldval) + len);
-			put_user(len, oldlenp);
+			if(copy_to_user(oldval, table->data, len))
+				return -EFAULT;
+			if(put_user(0, ((char *) oldval) + len))
+				return -EFAULT;
+			if(put_user(len, oldlenp))
+				return -EFAULT;
 		}
 	}
 	if (newval && newlen) {
 		len = newlen;
 		if (len > table->maxlen)
 			len = table->maxlen;
-		copy_from_user(table->data, newval, len);
+		if(copy_from_user(table->data, newval, len))
+			return -EFAULT;
 		if (len == table->maxlen)
 			len--;
 		((char *) table->data)[len] = 0;
@@ -865,14 +914,16 @@ int do_string (
 		return -EINVAL;
 	if (oldval) {
 		int old_l;
-		get_user(old_l, oldlenp);
+		if(get_user(old_l, oldlenp))
+			return -EFAULT;
 		if (l > old_l)
 			return -ENOMEM;
-		put_user(l, oldlenp);
-		copy_to_user(oldval, data, l);
+		if(put_user(l, oldlenp) || copy_to_user(oldval, data, l))
+			return -EFAULT;
 	}
 	if (newval) {
-		copy_from_user(data, newval, newlen);
+		if(copy_from_user(data, newval, newlen))
+			return -EFAULT;
 		data[newlen] = 0;
 	}
 	return 0;
@@ -888,14 +939,16 @@ int do_int (
 		return -EINVAL;
 	if (oldval) {
 		int old_l;
-		get_user(old_l, oldlenp);
+		if(get_user(old_l, oldlenp))
+			return -EFAULT;
 		if (old_l < sizeof(int))
 			return -ENOMEM;
-		put_user(sizeof(int), oldlenp);
-		copy_to_user(oldval, data, sizeof(int));
+		if(put_user(sizeof(int), oldlenp)||copy_to_user(oldval, data, sizeof(int)))
+			return -EFAULT;
 	}
 	if (newval)
-		copy_from_user(data, newval, sizeof(int));
+		if(copy_from_user(data, newval, sizeof(int)))
+			return -EFAULT;
 	return 0;
 }
 
@@ -909,14 +962,71 @@ int do_struct (
 		return -EINVAL;
 	if (oldval) {
 		int old_l;
-		get_user(old_l, oldlenp);
+		if(get_user(old_l, oldlenp))
+			return -EFAULT;
 		if (old_l < len)
 			return -ENOMEM;
-		put_user(len, oldlenp);
-		copy_to_user(oldval, data, len);
+		if(put_user(len, oldlenp) || copy_to_user(oldval, data, len))
+			return -EFAULT;
 	}
 	if (newval)
-		copy_from_user(data, newval, len);
+		if(copy_from_user(data, newval, len))
+			return -EFAULT;
 	return 0;
 }
 
+
+#else /* CONFIG_SYSCTL */
+
+
+extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
+{
+	return -ENOSYS;
+}
+
+int sysctl_string(ctl_table *table, int *name, int nlen,
+		  void *oldval, size_t *oldlenp,
+		  void *newval, size_t newlen, void **context)
+{
+	return -ENOSYS;
+}
+
+int sysctl_intvec(ctl_table *table, int *name, int nlen,
+		void *oldval, size_t *oldlenp,
+		void *newval, size_t newlen, void **context)
+{
+	return -ENOSYS;
+}
+
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec(ctl_table *table, int write, struct file *filp,
+		  void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
+		    void *buffer, size_t *lenp)
+{
+	return -ENOSYS;
+}
+
+struct ctl_table_header * register_sysctl_table(ctl_table * table, 
+						int insert_at_head)
+{
+	return 0;
+}
+
+void unregister_sysctl_table(struct ctl_table_header * table)
+{
+}
+
+#endif /* CONFIG_SYSCTL */
+
+
+
diff --git a/kernel/time.c b/kernel/time.c
index c2090a583..d20fdbd98 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -25,6 +25,8 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 
@@ -64,6 +66,8 @@ asmlinkage int sys_time(int * tloc)
 {
 	int i;
 
+	/* SMP: This is fairly trivial. We grab CURRENT_TIME and 
+	   stuff it to user space. No side effects */
 	i = CURRENT_TIME;
 	if (tloc) {
 		if (put_user(i,tloc))
@@ -78,6 +82,7 @@ asmlinkage int sys_time(int * tloc)
  * why not move it into the appropriate arch directory (for those
  * architectures that need it).
  */
+ 
 asmlinkage int sys_stime(int * tptr)
 {
 	int value;
@@ -154,6 +159,7 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
 
 	if (!suser())
 		return -EPERM;
+		
 	if (tv) {
 		if (copy_from_user(&new_tv, tv, sizeof(*tv)))
 			return -EFAULT;
@@ -161,6 +167,8 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
 	if (tz) {
 		if (copy_from_user(&new_tz, tz, sizeof(*tz)))
 			return -EFAULT;
+
+		/* SMP safe, global irq locking makes it work. */
 		sys_tz = new_tz;
 		if (firsttime) {
 			firsttime = 0;
@@ -169,7 +177,12 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
 		}
 	}
 	if (tv)
+	{
+		/* SMP safe, again the code in arch/foo/time.c should
+		 * globally block out interrupts when it runs.
+		 */
 		do_settimeofday(&new_tv);
+	}
 	return 0;
 }
 
@@ -197,37 +210,32 @@ void (*hardpps_ptr)(struct timeval *) = (void (*)(struct timeval *))0;
 asmlinkage int sys_adjtimex(struct timex *txc_p)
 {
         long ltemp, mtemp, save_adjust;
-	int error;
-
-	/* Local copy of parameter */
-	struct timex txc;
+	struct timex txc;		/* Local copy of parameter */
 
 	/* Copy the user data space into the kernel copy
 	 * structure. But bear in mind that the structures
 	 * may change
 	 */
-	error = copy_from_user(&txc, txc_p, sizeof(struct timex));
-	if (error)
-		return -EFAULT;	
+	if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+		return -EFAULT;
 
 	/* In order to modify anything, you gotta be super-user! */
 	if (txc.modes && !suser())
 		return -EPERM;
-
-	/* Now we validate the data before disabling interrupts
-	 */
+		
+	/* Now we validate the data before disabling interrupts */
 
 	if (txc.modes != ADJ_OFFSET_SINGLESHOT && (txc.modes & ADJ_OFFSET))
 	  /* adjustment Offset limited to +- .512 seconds */
-	  if (txc.offset <= - MAXPHASE || txc.offset >= MAXPHASE )
-	    return -EINVAL;
+		if (txc.offset <= - MAXPHASE || txc.offset >= MAXPHASE )
+			return -EINVAL;	
 
 	/* if the quartz is off by more than 10% something is VERY wrong ! */
 	if (txc.modes & ADJ_TICK)
-	  if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ)
-	    return -EINVAL;
+		if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ)
+			return -EINVAL;
 
-	cli();
+	cli(); /* SMP: global cli() is enough protection. */
 
 	/* Save for later - semantics of adjtime is to return old value */
 	save_adjust = time_adjust;
author	Ralf Baechle <ralf@linux-mips.org>	1997-04-29 21:13:14 +0000
committer	<ralf@linux-mips.org>	1997-04-29 21:13:14 +0000
commit	19c9bba94152148523ba0f7ef7cffe3d45656b11 (patch)
tree	40b1cb534496a7f1ca0f5c314a523c69f1fee464 /kernel
parent	7206675c40394c78a90e74812bbdbf8cf3cca1be (diff)