summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-04-29 21:13:14 +0000
committer <ralf@linux-mips.org>1997-04-29 21:13:14 +0000
commit19c9bba94152148523ba0f7ef7cffe3d45656b11 (patch)
tree40b1cb534496a7f1ca0f5c314a523c69f1fee464 /kernel
parent7206675c40394c78a90e74812bbdbf8cf3cca1be (diff)
Import of Linux/MIPS 2.1.36
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exec_domain.c33
-rw-r--r--kernel/exit.c276
-rw-r--r--kernel/fork.c89
-rw-r--r--kernel/info.c4
-rw-r--r--kernel/itimer.c36
-rw-r--r--kernel/ksyms.c564
-rw-r--r--kernel/module.c1404
-rw-r--r--kernel/panic.c29
-rw-r--r--kernel/printk.c239
-rw-r--r--kernel/resource.c69
-rw-r--r--kernel/sched.c869
-rw-r--r--kernel/signal.c128
-rw-r--r--kernel/softirq.c54
-rw-r--r--kernel/sys.c491
-rw-r--r--kernel/sysctl.c342
-rw-r--r--kernel/time.c38
16 files changed, 2774 insertions, 1891 deletions
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 9a202359a..5d7e2f056 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -2,6 +2,9 @@
#include <linux/ptrace.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
static asmlinkage void no_lcall7(struct pt_regs * regs);
@@ -34,14 +37,14 @@ static asmlinkage void no_lcall7(struct pt_regs * regs)
* personality set incorrectly. Check to see whether SVr4 is available,
* and use it, otherwise give the user a SEGV.
*/
- if (current->exec_domain && current->exec_domain->use_count)
- (*current->exec_domain->use_count)--;
+ if (current->exec_domain && current->exec_domain->module)
+ __MOD_DEC_USE_COUNT(current->exec_domain->module);
current->personality = PER_SVR4;
current->exec_domain = lookup_exec_domain(current->personality);
- if (current->exec_domain && current->exec_domain->use_count)
- (*current->exec_domain->use_count)++;
+ if (current->exec_domain && current->exec_domain->module)
+ __MOD_INC_USE_COUNT(current->exec_domain->module);
if (current->exec_domain && current->exec_domain->handler
&& current->exec_domain->handler != no_lcall7) {
@@ -103,21 +106,27 @@ asmlinkage int sys_personality(unsigned long personality)
{
struct exec_domain *it;
unsigned long old_personality;
+ int ret;
+ lock_kernel();
+ ret = current->personality;
if (personality == 0xffffffff)
- return current->personality;
+ goto out;
+ ret = -EINVAL;
it = lookup_exec_domain(personality);
if (!it)
- return -EINVAL;
+ goto out;
old_personality = current->personality;
- if (current->exec_domain && current->exec_domain->use_count)
- (*current->exec_domain->use_count)--;
+ if (current->exec_domain && current->exec_domain->module)
+ __MOD_DEC_USE_COUNT(current->exec_domain->module);
current->personality = personality;
current->exec_domain = it;
- if (current->exec_domain->use_count)
- (*current->exec_domain->use_count)++;
-
- return old_personality;
+ if (current->exec_domain->module)
+ __MOD_INC_USE_COUNT(current->exec_domain->module);
+ ret = old_personality;
+out:
+ unlock_kernel();
+ return ret;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index d4c54209d..0d03916e8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -6,6 +6,7 @@
#undef DEBUG_PROC_TREE
+#include <linux/config.h>
#include <linux/wait.h>
#include <linux/errno.h>
#include <linux/signal.h>
@@ -16,9 +17,14 @@
#include <linux/tty.h>
#include <linux/malloc.h>
#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
extern void sem_exit (void);
extern void acct_process (long exitcode);
@@ -36,18 +42,23 @@ static inline void generate(unsigned long sig, struct task_struct * p)
* be handled immediately (ie non-blocked and untraced)
* and that is ignored (either explicitly or by default)
*/
+ spin_lock_irq(&p->sig->siglock);
if (!(mask & p->blocked) && !(p->flags & PF_PTRACED)) {
/* don't bother with ignored signals (but SIGCHLD is special) */
if (sa->sa_handler == SIG_IGN && sig != SIGCHLD)
- return;
+ goto out;
/* some signals are ignored by default.. (but SIGCONT already did its deed) */
if ((sa->sa_handler == SIG_DFL) &&
(sig == SIGCONT || sig == SIGCHLD || sig == SIGWINCH || sig == SIGURG))
- return;
+ goto out;
}
+ spin_lock(&p->sigmask_lock);
p->signal |= mask;
+ spin_unlock(&p->sigmask_lock);
if (p->state == TASK_INTERRUPTIBLE && (p->signal & ~p->blocked))
wake_up_process(p);
+out:
+ spin_unlock_irq(&p->sig->siglock);
}
/*
@@ -60,15 +71,22 @@ void force_sig(unsigned long sig, struct task_struct * p)
if (p->sig) {
unsigned long mask = 1UL << sig;
struct sigaction *sa = p->sig->action + sig;
+
+ spin_lock_irq(&p->sig->siglock);
+
+ spin_lock(&p->sigmask_lock);
p->signal |= mask;
p->blocked &= ~mask;
+ spin_unlock(&p->sigmask_lock);
+
if (sa->sa_handler == SIG_IGN)
sa->sa_handler = SIG_DFL;
if (p->state == TASK_INTERRUPTIBLE)
wake_up_process(p);
+
+ spin_unlock_irq(&p->sig->siglock);
}
}
-
int send_sig(unsigned long sig,struct task_struct * p,int priv)
{
@@ -79,24 +97,23 @@ int send_sig(unsigned long sig,struct task_struct * p,int priv)
(current->uid ^ p->suid) && (current->uid ^ p->uid) &&
!suser())
return -EPERM;
- if (!sig)
- return 0;
- /*
- * Forget it if the process is already zombie'd.
- */
- if (!p->sig)
- return 0;
- if ((sig == SIGKILL) || (sig == SIGCONT)) {
- if (p->state == TASK_STOPPED)
- wake_up_process(p);
- p->exit_code = 0;
- p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) |
- (1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) );
+
+ if (sig && p->sig) {
+ spin_lock_irq(&p->sigmask_lock);
+ if ((sig == SIGKILL) || (sig == SIGCONT)) {
+ if (p->state == TASK_STOPPED)
+ wake_up_process(p);
+ p->exit_code = 0;
+ p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) |
+ (1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) );
+ }
+ if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU)
+ p->signal &= ~(1<<(SIGCONT-1));
+ spin_unlock_irq(&p->sigmask_lock);
+
+ /* Actually generate the signal */
+ generate(sig,p);
}
- if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU)
- p->signal &= ~(1<<(SIGCONT-1));
- /* Actually generate the signal */
- generate(sig,p);
return 0;
}
@@ -120,6 +137,12 @@ void release(struct task_struct * p)
}
for (i=1 ; i<NR_TASKS ; i++)
if (task[i] == p) {
+#ifdef __SMP__
+ /* FIXME! Cheesy, but kills the window... -DaveM */
+ while(p->processor != NO_PROC_ID)
+ barrier();
+ spin_unlock_wait(&scheduler_lock);
+#endif
nr_tasks--;
task[i] = NULL;
REMOVE_LINKS(p);
@@ -130,7 +153,7 @@ void release(struct task_struct * p)
current->cmin_flt += p->min_flt + p->cmin_flt;
current->cmaj_flt += p->maj_flt + p->cmaj_flt;
current->cnswap += p->nswap + p->cnswap;
- kfree(p);
+ free_task_struct(p);
return;
}
panic("trying to release non-existent task");
@@ -152,14 +175,14 @@ int bad_task_ptr(struct task_struct *p)
return 0;
return 1;
}
-
+
/*
* This routine scans the pid tree and makes sure the rep invariant still
* holds. Used for debugging only, since it's very slow....
*
* It looks a lot scarier than it really is.... we're doing nothing more
- * than verifying the doubly-linked list found in p_ysptr and p_osptr,
- * and checking it corresponds with the process tree defined by p_cptr and
+ * than verifying the doubly-linked list found in p_ysptr and p_osptr,
+ * and checking it corresponds with the process tree defined by p_cptr and
* p_pptr;
*/
void audit_ptree(void)
@@ -240,14 +263,18 @@ int session_of_pgrp(int pgrp)
int fallback;
fallback = -1;
+ read_lock(&tasklist_lock);
for_each_task(p) {
if (p->session <= 0)
continue;
- if (p->pgrp == pgrp)
- return p->session;
+ if (p->pgrp == pgrp) {
+ fallback = p->session;
+ break;
+ }
if (p->pid == pgrp)
fallback = p->session;
}
+ read_unlock(&tasklist_lock);
return fallback;
}
@@ -257,21 +284,29 @@ int session_of_pgrp(int pgrp)
*/
int kill_pg(int pgrp, int sig, int priv)
{
- struct task_struct *p;
- int err,retval = -ESRCH;
- int found = 0;
+ int retval;
- if (sig<0 || sig>32 || pgrp<=0)
- return -EINVAL;
- for_each_task(p) {
- if (p->pgrp == pgrp) {
- if ((err = send_sig(sig,p,priv)) != 0)
- retval = err;
- else
- found++;
+ retval = -EINVAL;
+ if (sig >= 0 && sig <= 32 && pgrp > 0) {
+ struct task_struct *p;
+ int found = 0;
+
+ retval = -ESRCH;
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (p->pgrp == pgrp) {
+ int err = send_sig(sig,p,priv);
+ if (err != 0)
+ retval = err;
+ else
+ found++;
+ }
}
+ read_unlock(&tasklist_lock);
+ if (found)
+ retval = 0;
}
- return(found ? 0 : retval);
+ return retval;
}
/*
@@ -281,34 +316,51 @@ int kill_pg(int pgrp, int sig, int priv)
*/
int kill_sl(int sess, int sig, int priv)
{
- struct task_struct *p;
- int err,retval = -ESRCH;
- int found = 0;
+ int retval;
- if (sig<0 || sig>32 || sess<=0)
- return -EINVAL;
- for_each_task(p) {
- if (p->session == sess && p->leader) {
- if ((err = send_sig(sig,p,priv)) != 0)
- retval = err;
- else
- found++;
+ retval = -EINVAL;
+ if (sig >= 0 && sig <= 32 && sess > 0) {
+ struct task_struct *p;
+ int found = 0;
+
+ retval = -ESRCH;
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (p->leader && p->session == sess) {
+ int err = send_sig(sig,p,priv);
+
+ if (err)
+ retval = err;
+ else
+ found++;
+ }
}
+ read_unlock(&tasklist_lock);
+ if (found)
+ retval = 0;
}
- return(found ? 0 : retval);
+ return retval;
}
int kill_proc(int pid, int sig, int priv)
{
- struct task_struct *p;
+ int retval;
- if (sig<0 || sig>32)
- return -EINVAL;
- for_each_task(p) {
- if (p && p->pid == pid)
- return send_sig(sig,p,priv);
+ retval = -EINVAL;
+ if (sig >= 0 && sig <= 32) {
+ struct task_struct *p;
+
+ retval = -ESRCH;
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (p->pid != pid)
+ continue;
+ retval = send_sig(sig,p,priv);
+ break;
+ }
+ read_unlock(&tasklist_lock);
}
- return(-ESRCH);
+ return retval;
}
/*
@@ -317,49 +369,58 @@ int kill_proc(int pid, int sig, int priv)
*/
asmlinkage int sys_kill(int pid,int sig)
{
- int err, retval = 0, count = 0;
-
if (!pid)
- return(kill_pg(current->pgrp,sig,0));
+ return kill_pg(current->pgrp,sig,0);
+
if (pid == -1) {
+ int retval = 0, count = 0;
struct task_struct * p;
+
+ read_lock(&tasklist_lock);
for_each_task(p) {
if (p->pid > 1 && p != current) {
+ int err;
++count;
if ((err = send_sig(sig,p,0)) != -EPERM)
retval = err;
}
}
- return(count ? retval : -ESRCH);
+ read_unlock(&tasklist_lock);
+ return count ? retval : -ESRCH;
}
- if (pid < 0)
- return(kill_pg(-pid,sig,0));
+ if (pid < 0)
+ return kill_pg(-pid,sig,0);
+
/* Normal kill */
- return(kill_proc(pid,sig,0));
+ return kill_proc(pid,sig,0);
}
/*
* Determine if a process group is "orphaned", according to the POSIX
* definition in 2.2.2.52. Orphaned process groups are not to be affected
- * by terminal-generated stop signals. Newly orphaned process groups are
+ * by terminal-generated stop signals. Newly orphaned process groups are
* to receive a SIGHUP and a SIGCONT.
- *
+ *
* "I ask you, have you ever known what it is to be an orphan?"
*/
static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
{
struct task_struct *p;
+ read_lock(&tasklist_lock);
for_each_task(p) {
- if ((p == ignored_task) || (p->pgrp != pgrp) ||
+ if ((p == ignored_task) || (p->pgrp != pgrp) ||
(p->state == TASK_ZOMBIE) ||
(p->p_pptr->pid == 1))
continue;
if ((p->p_pptr->pgrp != pgrp) &&
- (p->p_pptr->session == p->session))
- return 0;
+ (p->p_pptr->session == p->session)) {
+ read_unlock(&tasklist_lock);
+ return 0;
+ }
}
- return(1); /* (sighing) "Often!" */
+ read_unlock(&tasklist_lock);
+ return 1; /* (sighing) "Often!" */
}
int is_orphaned_pgrp(int pgrp)
@@ -369,21 +430,27 @@ int is_orphaned_pgrp(int pgrp)
static inline int has_stopped_jobs(int pgrp)
{
+ int retval = 0;
struct task_struct * p;
+ read_lock(&tasklist_lock);
for_each_task(p) {
if (p->pgrp != pgrp)
continue;
- if (p->state == TASK_STOPPED)
- return(1);
+ if (p->state != TASK_STOPPED)
+ continue;
+ retval = 1;
+ break;
}
- return(0);
+ read_unlock(&tasklist_lock);
+ return retval;
}
static inline void forget_original_parent(struct task_struct * father)
{
struct task_struct * p;
+ read_lock(&tasklist_lock);
for_each_task(p) {
if (p->p_opptr == father)
if (task[smp_num_cpus]) /* init */
@@ -391,6 +458,7 @@ static inline void forget_original_parent(struct task_struct * father)
else
p->p_opptr = task[0];
}
+ read_unlock(&tasklist_lock);
}
static inline void close_files(struct files_struct * files)
@@ -456,9 +524,8 @@ static inline void __exit_sighand(struct task_struct *tsk)
if (sig) {
tsk->sig = NULL;
- if (!--sig->count) {
+ if (atomic_dec_and_test(&sig->count))
kfree(sig);
- }
}
}
@@ -475,9 +542,7 @@ static inline void __exit_mm(struct task_struct * tsk)
if (mm != &init_mm) {
flush_cache_mm(mm);
flush_tlb_mm(mm);
-#ifdef __mips__
- mm->context = 0;
-#endif
+ destroy_context(mm);
tsk->mm = &init_mm;
tsk->swappable = 0;
SET_PAGE_DIR(tsk, swapper_pg_dir);
@@ -496,7 +561,7 @@ void exit_mm(struct task_struct *tsk)
__exit_mm(tsk);
}
-/*
+/*
* Send signals to all our closest relatives so that they know
* to properly mourn us..
*/
@@ -505,7 +570,7 @@ static void exit_notify(void)
struct task_struct * p;
forget_original_parent(current);
- /*
+ /*
* Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
@@ -523,10 +588,10 @@ static void exit_notify(void)
}
/* Let father know we died */
notify_parent(current);
-
+
/*
* This loop does two things:
- *
+ *
* A. Make init inherit all the child processes
* B. Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
@@ -547,7 +612,7 @@ static void exit_notify(void)
notify_parent(p);
/*
* process group orphan check
- * Case ii: Our child is in a different pgrp
+ * Case ii: Our child is in a different pgrp
* than we are, and it was the only connection
* outside, so the child pgrp is now orphaned.
*/
@@ -565,9 +630,9 @@ static void exit_notify(void)
NORET_TYPE void do_exit(long code)
{
- if (intr_count) {
+ if (in_interrupt()) {
+ local_irq_count[smp_processor_id()] = 0; /* Not really correct */
printk("Aiee, killing interrupt handler\n");
- intr_count = 0;
}
fake_volatile:
acct_process(code);
@@ -576,6 +641,9 @@ fake_volatile:
sem_exit();
kerneld_exit();
__exit_mm(current);
+#if CONFIG_AP1000
+ exit_msc(current);
+#endif
__exit_files(current);
__exit_fs(current);
__exit_sighand(current);
@@ -586,10 +654,10 @@ fake_volatile:
#ifdef DEBUG_PROC_TREE
audit_ptree();
#endif
- if (current->exec_domain && current->exec_domain->use_count)
- (*current->exec_domain->use_count)--;
- if (current->binfmt && current->binfmt->use_count)
- (*current->binfmt->use_count)--;
+ if (current->exec_domain && current->exec_domain->module)
+ __MOD_DEC_USE_COUNT(current->exec_domain->module);
+ if (current->binfmt && current->binfmt->module)
+ __MOD_DEC_USE_COUNT(current->binfmt->module);
schedule();
/*
* In order to get rid of the "volatile function does return" message
@@ -609,7 +677,9 @@ fake_volatile:
asmlinkage int sys_exit(int error_code)
{
+ lock_kernel();
do_exit((error_code&0xff)<<8);
+ unlock_kernel();
}
asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
@@ -619,21 +689,21 @@ asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct
struct task_struct *p;
if (stat_addr) {
- flag = verify_area(VERIFY_WRITE, stat_addr, sizeof(*stat_addr));
- if (flag)
- return flag;
+ if(verify_area(VERIFY_WRITE, stat_addr, sizeof(*stat_addr)))
+ return -EFAULT;
}
if (ru) {
- flag = verify_area(VERIFY_WRITE, ru, sizeof(*ru));
- if (flag)
- return flag;
+ if(verify_area(VERIFY_WRITE, ru, sizeof(*ru)))
+ return -EFAULT;
}
+
if (options & ~(WNOHANG|WUNTRACED|__WCLONE))
- return -EINVAL;
+ return -EINVAL;
add_wait_queue(&current->wait_chldexit,&wait);
repeat:
- flag=0;
+ flag = 0;
+ read_lock(&tasklist_lock);
for (p = current->p_cptr ; p ; p = p->p_osptr) {
if (pid>0) {
if (p->pid != pid)
@@ -655,23 +725,28 @@ repeat:
continue;
if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED))
continue;
+ read_unlock(&tasklist_lock);
if (ru != NULL)
getrusage(p, RUSAGE_BOTH, ru);
if (stat_addr)
- put_user((p->exit_code << 8) | 0x7f,
- stat_addr);
+ __put_user((p->exit_code << 8) | 0x7f,
+ stat_addr);
p->exit_code = 0;
retval = p->pid;
goto end_wait4;
case TASK_ZOMBIE:
current->cutime += p->utime + p->cutime;
current->cstime += p->stime + p->cstime;
+ read_unlock(&tasklist_lock);
if (ru != NULL)
getrusage(p, RUSAGE_BOTH, ru);
if (stat_addr)
- put_user(p->exit_code, stat_addr);
+ __put_user(p->exit_code, stat_addr);
retval = p->pid;
if (p->p_opptr != p->p_pptr) {
+ /* Note this grabs tasklist_lock
+ * as a writer... (twice!)
+ */
REMOVE_LINKS(p);
p->p_pptr = p->p_opptr;
SET_LINKS(p);
@@ -686,6 +761,7 @@ repeat:
continue;
}
}
+ read_unlock(&tasklist_lock);
if (flag) {
retval = 0;
if (options & WNOHANG)
diff --git a/kernel/fork.c b/kernel/fork.c
index b81d98e77..6204ffeaf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -15,13 +15,17 @@
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/malloc.h>
#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
#include <asm/system.h>
#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
#include <asm/uaccess.h>
int nr_tasks=1;
@@ -43,11 +47,15 @@ static inline int find_empty_process(void)
max_tasks--; /* count the new process.. */
if (max_tasks < nr_tasks) {
struct task_struct *p;
+ read_lock(&tasklist_lock);
for_each_task (p) {
if (p->uid == current->uid)
- if (--max_tasks < 0)
+ if (--max_tasks < 0) {
+ read_unlock(&tasklist_lock);
return -EAGAIN;
+ }
}
+ read_unlock(&tasklist_lock);
}
}
for (i = 0 ; i < NR_TASKS ; i++) {
@@ -63,6 +71,8 @@ static int get_pid(unsigned long flags)
if (flags & CLONE_PID)
return current->pid;
+
+ read_lock(&tasklist_lock);
repeat:
if ((++last_pid) & 0xffff8000)
last_pid=1;
@@ -72,6 +82,8 @@ repeat:
p->session == last_pid)
goto repeat;
}
+ read_unlock(&tasklist_lock);
+
return last_pid;
}
@@ -81,10 +93,12 @@ static inline int dup_mmap(struct mm_struct * mm)
mm->mmap = NULL;
p = &mm->mmap;
+ flush_cache_mm(current->mm);
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
- tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
+ tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!tmp) {
exit_mmap(mm);
+ flush_tlb_mm(current->mm);
return -ENOMEM;
}
*tmp = *mpnt;
@@ -100,6 +114,7 @@ static inline int dup_mmap(struct mm_struct * mm)
}
if (copy_page_range(mm, current->mm, tmp)) {
exit_mmap(mm);
+ flush_tlb_mm(current->mm);
return -ENOMEM;
}
if (tmp->vm_ops && tmp->vm_ops->open)
@@ -107,6 +122,7 @@ static inline int dup_mmap(struct mm_struct * mm)
*p = tmp;
p = &tmp->vm_next;
}
+ flush_tlb_mm(current->mm);
build_mmap_avl(mm);
return 0;
}
@@ -118,9 +134,7 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
if (!mm)
return -1;
*mm = *current->mm;
-#ifdef __mips__
- mm->context = 0;
-#endif
+ init_new_context(mm);
mm->count = 1;
mm->def_flags = 0;
tsk->mm = mm;
@@ -128,15 +142,17 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
tsk->cmin_flt = tsk->cmaj_flt = 0;
tsk->nswap = tsk->cnswap = 0;
if (new_page_tables(tsk))
- return -1;
+ goto free_mm;
if (dup_mmap(mm)) {
free_page_tables(mm);
+free_mm:
+ kfree(mm);
return -1;
}
return 0;
}
- SET_PAGE_DIR(tsk, current->mm->pgd);
current->mm->count++;
+ SET_PAGE_DIR(tsk, current->mm->pgd);
return 0;
}
@@ -174,7 +190,7 @@ static inline int copy_files(unsigned long clone_flags, struct task_struct * tsk
tsk->files = newf;
if (!newf)
return -1;
-
+
newf->count = 1;
newf->close_on_exec = oldf->close_on_exec;
newf->open_fds = oldf->open_fds;
@@ -195,13 +211,14 @@ static inline int copy_files(unsigned long clone_flags, struct task_struct * tsk
static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
{
if (clone_flags & CLONE_SIGHAND) {
- current->sig->count++;
+ atomic_inc(&current->sig->count);
return 0;
}
tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL);
if (!tsk->sig)
return -1;
- tsk->sig->count = 1;
+ spin_lock_init(&tsk->sig->siglock);
+ atomic_set(&tsk->sig->count, 1);
memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
return 0;
}
@@ -218,10 +235,11 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
unsigned long new_stack;
struct task_struct *p;
- p = (struct task_struct *) kmalloc(sizeof(*p), GFP_KERNEL);
+ lock_kernel();
+ p = alloc_task_struct();
if (!p)
goto bad_fork;
- new_stack = alloc_kernel_stack();
+ new_stack = alloc_kernel_stack(p);
if (!new_stack)
goto bad_fork_free_p;
error = -EAGAIN;
@@ -231,10 +249,10 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
*p = *current;
- if (p->exec_domain && p->exec_domain->use_count)
- (*p->exec_domain->use_count)++;
- if (p->binfmt && p->binfmt->use_count)
- (*p->binfmt->use_count)++;
+ if (p->exec_domain && p->exec_domain->module)
+ __MOD_INC_USE_COUNT(p->exec_domain->module);
+ if (p->binfmt && p->binfmt->module)
+ __MOD_INC_USE_COUNT(p->binfmt->module);
p->did_exec = 0;
p->swappable = 0;
@@ -260,8 +278,8 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
p->cutime = p->cstime = 0;
#ifdef __SMP__
p->processor = NO_PROC_ID;
- p->lock_depth = 1;
#endif
+ p->lock_depth = 0;
p->start_time = jiffies;
task[nr] = p;
SET_LINKS(p);
@@ -277,16 +295,33 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
goto bad_fork_cleanup_fs;
if (copy_mm(clone_flags, p))
goto bad_fork_cleanup_sighand;
- copy_thread(nr, clone_flags, usp, p, regs);
+ error = copy_thread(nr, clone_flags, usp, p, regs);
+ if (error)
+ goto bad_fork_cleanup_sighand;
p->semundo = NULL;
/* ok, now we should be set up.. */
p->swappable = 1;
p->exit_signal = clone_flags & CSIGNAL;
- p->counter = current->counter >> 1;
- wake_up_process(p); /* do this last, just in case */
+
+ /*
+ * "share" dynamic priority between parent and child, thus the
+ * total amount of dynamic priorities in the system doesnt change,
+ * more scheduling fairness. This is only important in the first
+ * timeslice, on the long run the scheduling behaviour is unchanged.
+ */
+ current->counter >>= 1;
+ p->counter = current->counter;
+
+ if(p->pid) {
+ wake_up_process(p); /* do this last, just in case */
+ } else {
+ p->state = TASK_RUNNING;
+ p->next_run = p->prev_run = p;
+ }
++total_forks;
- return p->pid;
+ error = p->pid;
+ goto fork_out;
bad_fork_cleanup_sighand:
exit_sighand(p);
@@ -295,17 +330,19 @@ bad_fork_cleanup_fs:
bad_fork_cleanup_files:
exit_files(p);
bad_fork_cleanup:
- if (p->exec_domain && p->exec_domain->use_count)
- (*p->exec_domain->use_count)--;
- if (p->binfmt && p->binfmt->use_count)
- (*p->binfmt->use_count)--;
+ if (p->exec_domain && p->exec_domain->module)
+ __MOD_DEC_USE_COUNT(p->exec_domain->module);
+ if (p->binfmt && p->binfmt->module)
+ __MOD_DEC_USE_COUNT(p->binfmt->module);
task[nr] = NULL;
REMOVE_LINKS(p);
nr_tasks--;
bad_fork_free_stack:
free_kernel_stack(new_stack);
bad_fork_free_p:
- kfree(p);
+ free_task_struct(p);
bad_fork:
+fork_out:
+ unlock_kernel();
return error;
}
diff --git a/kernel/info.c b/kernel/info.c
index 20b6ad6ae..ffaec7140 100644
--- a/kernel/info.c
+++ b/kernel/info.c
@@ -12,6 +12,8 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
#include <asm/uaccess.h>
@@ -21,6 +23,7 @@ asmlinkage int sys_sysinfo(struct sysinfo *info)
memset((char *)&val, 0, sizeof(struct sysinfo));
+ cli();
val.uptime = jiffies / HZ;
val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
@@ -28,6 +31,7 @@ asmlinkage int sys_sysinfo(struct sysinfo *info)
val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
val.procs = nr_tasks-1;
+ sti();
si_meminfo(&val);
si_swapinfo(&val);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index efcc8351b..479f660a0 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -12,6 +12,8 @@
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
#include <asm/uaccess.h>
@@ -41,7 +43,6 @@ static void jiffiestotv(unsigned long jiffies, struct timeval *value)
{
value->tv_usec = (jiffies % HZ) * (1000000 / HZ);
value->tv_sec = jiffies / HZ;
- return;
}
static int _getitimer(int which, struct itimerval *value)
@@ -78,17 +79,19 @@ static int _getitimer(int which, struct itimerval *value)
return 0;
}
+/* SMP: Only we modify our itimer values. */
asmlinkage int sys_getitimer(int which, struct itimerval *value)
{
- int error;
+ int error = -EFAULT;
struct itimerval get_buffer;
- if (!value)
- return -EFAULT;
- error = _getitimer(which, &get_buffer);
- if (error)
- return error;
- return copy_to_user(value, &get_buffer, sizeof(get_buffer)) ? -EFAULT : 0;
+ if (value) {
+ error = _getitimer(which, &get_buffer);
+ if (!error)
+ error = copy_to_user(value, &get_buffer, sizeof(get_buffer))
+ ? -EFAULT : 0;
+ }
+ return error;
}
void it_real_fn(unsigned long __data)
@@ -149,17 +152,18 @@ int _setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
return 0;
}
+/* SMP: Again, only we play with our itimers, and signals are SMP safe
+ * now so that is not an issue at all anymore.
+ */
asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
{
- int error;
struct itimerval set_buffer, get_buffer;
+ int error;
if (value) {
- error = verify_area(VERIFY_READ, value, sizeof(*value));
- if (error)
- return error;
- error = copy_from_user(&set_buffer, value, sizeof(set_buffer));
- if (error)
+ if(verify_area(VERIFY_READ, value, sizeof(*value)))
+ return -EFAULT;
+ if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
return -EFAULT;
} else
memset((char *) &set_buffer, 0, sizeof(set_buffer));
@@ -169,6 +173,6 @@ asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerva
return error;
if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer)))
- error = -EFAULT;
- return error;
+ return -EFAULT;
+ return 0;
}
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index b694cd6d2..f5f202c8e 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -1,4 +1,4 @@
-/*
+/*
* Herein lies all the functions/variables that are "exported" for linkage
* with dynamically loaded kernel modules.
* Jon.
@@ -6,13 +6,12 @@
* - Stacked module support and unified symbol table added (June 1994)
* - External symbol table support added (December 1994)
* - Versions on symbols added (December 1994)
- * by Bjorn Ekwall <bj0rn@blox.se>
+ * by Bjorn Ekwall <bj0rn@blox.se>
*/
-#include <linux/module.h>
#include <linux/config.h>
+#include <linux/module.h>
#include <linux/kernel.h>
-#include <linux/smp.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/cdrom.h>
@@ -21,6 +20,7 @@
#include <linux/kernel_stat.h>
#include <linux/mm.h>
#include <linux/malloc.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/ptrace.h>
#include <linux/sys.h>
@@ -41,6 +41,7 @@
#include <linux/minix_fs.h>
#include <linux/ext2_fs.h>
#include <linux/random.h>
+#include <linux/reboot.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/sysctl.h>
@@ -49,12 +50,14 @@
#include <linux/genhd.h>
#include <linux/swap.h>
#include <linux/ctype.h>
+#include <linux/file.h>
-extern unsigned char aux_device_present, kbd_read_mask;
-#ifdef __i386__
- extern struct drive_info_struct drive_info;
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
#endif
+extern unsigned char aux_device_present, kbd_read_mask;
+
#ifdef CONFIG_PCI
#include <linux/bios32.h>
#include <linux/pci.h>
@@ -73,7 +76,10 @@ extern unsigned char aux_device_present, kbd_read_mask;
extern char *get_options(char *str, int *ints);
extern void set_device_ro(int dev,int flag);
extern struct file_operations * get_blkfops(unsigned int);
-extern void blkdev_release(struct inode * inode);
+extern int blkdev_release(struct inode * inode);
+#if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
+extern int (*do_nfsservctl)(int, void *, void *);
+#endif
extern void *sys_call_table;
@@ -81,293 +87,299 @@ extern int sys_tz;
extern int request_dma(unsigned int dmanr, char * deviceID);
extern void free_dma(unsigned int dmanr);
-extern void hard_reset_now(void);
-
-struct symbol_table symbol_table = {
-#include <linux/symtab_begin.h>
#ifdef MODVERSIONS
- { (void *)1 /* Version version :-) */,
- SYMBOL_NAME_STR (Using_Versions) },
+const struct module_symbol __export_Using_Versions
+__attribute__((section("__ksymtab"))) = {
+ 1 /* Version version */, "Using_Versions"
+};
#endif
- /* stackable module support */
- X(register_symtab_from),
#ifdef CONFIG_KERNELD
- X(kerneld_send),
+EXPORT_SYMBOL(kerneld_send);
#endif
- X(get_options),
+EXPORT_SYMBOL(get_options);
#ifdef CONFIG_PCI
- /* PCI BIOS support */
- X(pcibios_present),
- X(pcibios_find_class),
- X(pcibios_find_device),
- X(pcibios_read_config_byte),
- X(pcibios_read_config_word),
- X(pcibios_read_config_dword),
- X(pcibios_strerror),
- X(pcibios_write_config_byte),
- X(pcibios_write_config_word),
- X(pcibios_write_config_dword),
+/* PCI BIOS support */
+EXPORT_SYMBOL(pcibios_present);
+EXPORT_SYMBOL(pcibios_find_class);
+EXPORT_SYMBOL(pcibios_find_device);
+EXPORT_SYMBOL(pcibios_read_config_byte);
+EXPORT_SYMBOL(pcibios_read_config_word);
+EXPORT_SYMBOL(pcibios_read_config_dword);
+EXPORT_SYMBOL(pcibios_write_config_byte);
+EXPORT_SYMBOL(pcibios_write_config_word);
+EXPORT_SYMBOL(pcibios_write_config_dword);
+EXPORT_SYMBOL(pcibios_strerror);
+EXPORT_SYMBOL(pci_strvendor);
+EXPORT_SYMBOL(pci_strdev);
#endif
- /* process memory management */
- X(do_mmap),
- X(do_munmap),
- X(exit_mm),
-
- /* internal kernel memory management */
- X(__get_free_pages),
- X(free_pages),
- X(kmalloc),
- X(kfree),
- X(vmalloc),
- X(vfree),
- X(mem_map),
- X(remap_page_range),
- X(max_mapnr),
- X(high_memory),
- X(update_vm_cache),
-
- /* filesystem internal functions */
- X(getname),
- X(putname),
- X(__iget),
- X(iput),
- X(namei),
- X(lnamei),
- X(open_namei),
- X(sys_close),
- X(close_fp),
- X(check_disk_change),
- X(invalidate_buffers),
- X(invalidate_inodes),
- X(invalidate_inode_pages),
- X(fsync_dev),
- X(permission),
- X(inode_setattr),
- X(inode_change_ok),
- X(set_blocksize),
- X(getblk),
- X(bread),
- X(breada),
- X(__brelse),
- X(__bforget),
- X(ll_rw_block),
- X(__wait_on_buffer),
- X(mark_buffer_uptodate),
- X(unlock_buffer),
- X(dcache_lookup),
- X(dcache_add),
- X(add_blkdev_randomness),
- X(generic_file_read),
- X(generic_file_mmap),
- X(generic_readpage),
-
- /* device registration */
- X(register_chrdev),
- X(unregister_chrdev),
- X(register_blkdev),
- X(unregister_blkdev),
- X(tty_register_driver),
- X(tty_unregister_driver),
- X(tty_std_termios),
-
-#if defined(CONFIG_BLK_DEV_IDECD) || \
- defined(CONFIG_BLK_DEV_SR) || \
- defined(CONFIG_CM206)
- X(register_cdrom),
- X(unregister_cdrom),
- X(cdrom_fops),
+/* process memory management */
+EXPORT_SYMBOL(do_mmap);
+EXPORT_SYMBOL(do_munmap);
+EXPORT_SYMBOL(exit_mm);
+EXPORT_SYMBOL(exit_files);
+
+/* internal kernel memory management */
+EXPORT_SYMBOL(__get_free_pages);
+EXPORT_SYMBOL(free_pages);
+EXPORT_SYMBOL(kmalloc);
+EXPORT_SYMBOL(kfree);
+EXPORT_SYMBOL(vmalloc);
+EXPORT_SYMBOL(vfree);
+EXPORT_SYMBOL(mem_map);
+EXPORT_SYMBOL(remap_page_range);
+EXPORT_SYMBOL(max_mapnr);
+EXPORT_SYMBOL(num_physpages);
+EXPORT_SYMBOL(high_memory);
+EXPORT_SYMBOL(update_vm_cache);
+EXPORT_SYMBOL(kmem_cache_create);
+EXPORT_SYMBOL(kmem_cache_destroy);
+EXPORT_SYMBOL(kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_free);
+
+/* filesystem internal functions */
+EXPORT_SYMBOL(getname);
+EXPORT_SYMBOL(putname);
+EXPORT_SYMBOL(__fput);
+EXPORT_SYMBOL(__iget);
+EXPORT_SYMBOL(iput);
+EXPORT_SYMBOL(namei);
+EXPORT_SYMBOL(lnamei);
+EXPORT_SYMBOL(open_namei);
+EXPORT_SYMBOL(sys_close);
+EXPORT_SYMBOL(close_fp);
+EXPORT_SYMBOL(check_disk_change);
+EXPORT_SYMBOL(invalidate_buffers);
+EXPORT_SYMBOL(invalidate_inodes);
+EXPORT_SYMBOL(invalidate_inode_pages);
+EXPORT_SYMBOL(fsync_dev);
+EXPORT_SYMBOL(permission);
+EXPORT_SYMBOL(inode_setattr);
+EXPORT_SYMBOL(inode_change_ok);
+EXPORT_SYMBOL(get_hardblocksize);
+EXPORT_SYMBOL(set_blocksize);
+EXPORT_SYMBOL(getblk);
+EXPORT_SYMBOL(bread);
+EXPORT_SYMBOL(breada);
+EXPORT_SYMBOL(__brelse);
+EXPORT_SYMBOL(__bforget);
+EXPORT_SYMBOL(ll_rw_block);
+EXPORT_SYMBOL(__wait_on_buffer);
+EXPORT_SYMBOL(mark_buffer_uptodate);
+EXPORT_SYMBOL(unlock_buffer);
+EXPORT_SYMBOL(dcache_lookup);
+EXPORT_SYMBOL(dcache_add);
+EXPORT_SYMBOL(add_blkdev_randomness);
+EXPORT_SYMBOL(generic_file_read);
+EXPORT_SYMBOL(generic_file_write);
+EXPORT_SYMBOL(generic_file_mmap);
+EXPORT_SYMBOL(generic_readpage);
+EXPORT_SYMBOL(file_lock_table);
+EXPORT_SYMBOL(posix_lock_file);
+EXPORT_SYMBOL(posix_test_lock);
+EXPORT_SYMBOL(posix_block_lock);
+EXPORT_SYMBOL(posix_unblock_lock);
+
+#if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
+EXPORT_SYMBOL(do_nfsservctl);
#endif
-
- /* block device driver support */
- X(block_read),
- X(block_write),
- X(block_fsync),
- X(wait_for_request),
- X(blksize_size),
- X(hardsect_size),
- X(blk_size),
- X(blk_dev),
- X(is_read_only),
- X(set_device_ro),
- X(bmap),
- X(sync_dev),
- X(get_blkfops),
- X(blkdev_open),
- X(blkdev_release),
- X(gendisk_head),
- X(resetup_one_dev),
- X(unplug_device),
-#ifdef __i386__
- X(drive_info),
+
+/* device registration */
+EXPORT_SYMBOL(register_chrdev);
+EXPORT_SYMBOL(unregister_chrdev);
+EXPORT_SYMBOL(register_blkdev);
+EXPORT_SYMBOL(unregister_blkdev);
+EXPORT_SYMBOL(tty_register_driver);
+EXPORT_SYMBOL(tty_unregister_driver);
+EXPORT_SYMBOL(tty_std_termios);
+
+/* block device driver support */
+EXPORT_SYMBOL(block_read);
+EXPORT_SYMBOL(block_write);
+EXPORT_SYMBOL(block_fsync);
+EXPORT_SYMBOL(wait_for_request);
+EXPORT_SYMBOL(blksize_size);
+EXPORT_SYMBOL(hardsect_size);
+EXPORT_SYMBOL(blk_size);
+EXPORT_SYMBOL(blk_dev);
+EXPORT_SYMBOL(is_read_only);
+EXPORT_SYMBOL(set_device_ro);
+EXPORT_SYMBOL(bmap);
+EXPORT_SYMBOL(sync_dev);
+EXPORT_SYMBOL(get_blkfops);
+EXPORT_SYMBOL(blkdev_open);
+EXPORT_SYMBOL(blkdev_release);
+EXPORT_SYMBOL(gendisk_head);
+EXPORT_SYMBOL(resetup_one_dev);
+EXPORT_SYMBOL(unplug_device);
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+EXPORT_SYMBOL(drive_info);
#endif
-#ifdef CONFIG_SERIAL
- /* Module creation of serial units */
- X(register_serial),
- X(unregister_serial),
+/* tty routines */
+EXPORT_SYMBOL(tty_hangup);
+EXPORT_SYMBOL(tty_wait_until_sent);
+EXPORT_SYMBOL(tty_check_change);
+EXPORT_SYMBOL(tty_hung_up_p);
+EXPORT_SYMBOL(do_SAK);
+EXPORT_SYMBOL(console_print);
+
+/* filesystem registration */
+EXPORT_SYMBOL(register_filesystem);
+EXPORT_SYMBOL(unregister_filesystem);
+
+/* executable format registration */
+EXPORT_SYMBOL(register_binfmt);
+EXPORT_SYMBOL(unregister_binfmt);
+EXPORT_SYMBOL(search_binary_handler);
+EXPORT_SYMBOL(prepare_binprm);
+EXPORT_SYMBOL(remove_arg_zero);
+
+/* execution environment registration */
+EXPORT_SYMBOL(lookup_exec_domain);
+EXPORT_SYMBOL(register_exec_domain);
+EXPORT_SYMBOL(unregister_exec_domain);
+
+/* sysctl table registration */
+EXPORT_SYMBOL(register_sysctl_table);
+EXPORT_SYMBOL(unregister_sysctl_table);
+EXPORT_SYMBOL(sysctl_string);
+EXPORT_SYMBOL(sysctl_intvec);
+EXPORT_SYMBOL(proc_dostring);
+EXPORT_SYMBOL(proc_dointvec);
+EXPORT_SYMBOL(proc_dointvec_minmax);
+
+/* interrupt handling */
+EXPORT_SYMBOL(request_irq);
+EXPORT_SYMBOL(free_irq);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(probe_irq_on);
+EXPORT_SYMBOL(probe_irq_off);
+EXPORT_SYMBOL(bh_active);
+EXPORT_SYMBOL(bh_mask);
+EXPORT_SYMBOL(bh_mask_count);
+EXPORT_SYMBOL(bh_base);
+EXPORT_SYMBOL(add_timer);
+EXPORT_SYMBOL(del_timer);
+EXPORT_SYMBOL(tq_timer);
+EXPORT_SYMBOL(tq_immediate);
+EXPORT_SYMBOL(tq_scheduler);
+EXPORT_SYMBOL(timer_active);
+EXPORT_SYMBOL(timer_table);
+
+#ifdef __SMP__
+/* Various random spinlocks we want to export */
+EXPORT_SYMBOL(tqueue_lock);
+EXPORT_SYMBOL(waitqueue_lock);
#endif
- /* tty routines */
- X(tty_hangup),
- X(tty_wait_until_sent),
- X(tty_check_change),
- X(tty_hung_up_p),
- X(do_SAK),
- X(console_print),
-
- /* filesystem registration */
- X(register_filesystem),
- X(unregister_filesystem),
-
- /* executable format registration */
- X(register_binfmt),
- X(unregister_binfmt),
- X(search_binary_handler),
- X(prepare_binprm),
- X(remove_arg_zero),
-
- /* execution environment registration */
- X(lookup_exec_domain),
- X(register_exec_domain),
- X(unregister_exec_domain),
-
- /* sysctl table registration */
- X(register_sysctl_table),
- X(unregister_sysctl_table),
- X(sysctl_string),
- X(sysctl_intvec),
- X(proc_dostring),
- X(proc_dointvec),
- X(proc_dointvec_minmax),
-
- /* interrupt handling */
- X(request_irq),
- X(free_irq),
- X(enable_irq),
- X(disable_irq),
- X(probe_irq_on),
- X(probe_irq_off),
- X(bh_active),
- X(bh_mask),
- X(bh_mask_count),
- X(bh_base),
- X(add_timer),
- X(del_timer),
- X(tq_timer),
- X(tq_immediate),
- X(tq_scheduler),
- X(timer_active),
- X(timer_table),
- X(intr_count),
-
- /* autoirq from drivers/net/auto_irq.c */
- X(autoirq_setup),
- X(autoirq_report),
-
- /* dma handling */
- X(request_dma),
- X(free_dma),
+
+/* autoirq from drivers/net/auto_irq.c */
+EXPORT_SYMBOL(autoirq_setup);
+EXPORT_SYMBOL(autoirq_report);
+
+/* dma handling */
+EXPORT_SYMBOL(request_dma);
+EXPORT_SYMBOL(free_dma);
#ifdef HAVE_DISABLE_HLT
- X(disable_hlt),
- X(enable_hlt),
+EXPORT_SYMBOL(disable_hlt);
+EXPORT_SYMBOL(enable_hlt);
#endif
- /* IO port handling */
- X(check_region),
- X(request_region),
- X(release_region),
-
- /* process management */
- X(wake_up),
- X(wake_up_interruptible),
- X(sleep_on),
- X(interruptible_sleep_on),
- X(schedule),
- X(current_set),
- X(jiffies),
- X(xtime),
- X(do_gettimeofday),
- X(loops_per_sec),
- X(need_resched),
- X(kstat),
- X(kill_proc),
- X(kill_pg),
- X(kill_sl),
-
- /* misc */
- X(panic),
- X(printk),
- X(sprintf),
- X(vsprintf),
- X(kdevname),
- X(simple_strtoul),
- X(system_utsname),
- X(sys_call_table),
- X(hard_reset_now),
- X(_ctype),
- X(secure_tcp_sequence_number),
-
- /* Signal interfaces */
- X(send_sig),
-
- /* Program loader interfaces */
- X(setup_arg_pages),
- X(copy_strings),
- X(do_execve),
- X(flush_old_exec),
- X(open_inode),
- X(read_exec),
-
- /* Miscellaneous access points */
- X(si_meminfo),
-
- /* Added to make file system as module */
- X(set_writetime),
- X(sys_tz),
- X(__wait_on_super),
- X(file_fsync),
- X(clear_inode),
- X(refile_buffer),
- X(nr_async_pages),
- X(___strtok),
- X(init_fifo),
- X(super_blocks),
- X(reuse_list),
- X(fifo_inode_operations),
- X(chrdev_inode_operations),
- X(blkdev_inode_operations),
- X(read_ahead),
- X(get_hash_table),
- X(get_empty_inode),
- X(insert_inode_hash),
- X(event),
- X(__down),
- X(__up),
- X(securelevel),
+/* IO port handling */
+EXPORT_SYMBOL(check_region);
+EXPORT_SYMBOL(request_region);
+EXPORT_SYMBOL(release_region);
+
+/* process management */
+EXPORT_SYMBOL(wake_up);
+EXPORT_SYMBOL(wake_up_interruptible);
+EXPORT_SYMBOL(sleep_on);
+EXPORT_SYMBOL(interruptible_sleep_on);
+EXPORT_SYMBOL(schedule);
+EXPORT_SYMBOL(current_set);
+EXPORT_SYMBOL(jiffies);
+EXPORT_SYMBOL(xtime);
+EXPORT_SYMBOL(do_gettimeofday);
+EXPORT_SYMBOL(loops_per_sec);
+EXPORT_SYMBOL(need_resched);
+EXPORT_SYMBOL(kstat);
+EXPORT_SYMBOL(kill_proc);
+EXPORT_SYMBOL(kill_pg);
+EXPORT_SYMBOL(kill_sl);
+
+/* misc */
+EXPORT_SYMBOL(panic);
+EXPORT_SYMBOL(printk);
+EXPORT_SYMBOL(sprintf);
+EXPORT_SYMBOL(vsprintf);
+EXPORT_SYMBOL(kdevname);
+EXPORT_SYMBOL(simple_strtoul);
+EXPORT_SYMBOL(system_utsname);
+EXPORT_SYMBOL(sys_call_table);
+EXPORT_SYMBOL(machine_restart);
+EXPORT_SYMBOL(machine_halt);
+EXPORT_SYMBOL(machine_power_off);
+EXPORT_SYMBOL(register_reboot_notifier);
+EXPORT_SYMBOL(unregister_reboot_notifier);
+EXPORT_SYMBOL(_ctype);
+EXPORT_SYMBOL(secure_tcp_sequence_number);
+EXPORT_SYMBOL(get_random_bytes);
+
+/* Signal interfaces */
+EXPORT_SYMBOL(send_sig);
+
+/* Program loader interfaces */
+EXPORT_SYMBOL(setup_arg_pages);
+EXPORT_SYMBOL(copy_strings);
+EXPORT_SYMBOL(do_execve);
+EXPORT_SYMBOL(flush_old_exec);
+EXPORT_SYMBOL(open_inode);
+EXPORT_SYMBOL(read_exec);
+
+/* Miscellaneous access points */
+EXPORT_SYMBOL(si_meminfo);
+
+/* Added to make file system as module */
+EXPORT_SYMBOL(set_writetime);
+EXPORT_SYMBOL(sys_tz);
+EXPORT_SYMBOL(__wait_on_super);
+EXPORT_SYMBOL(file_fsync);
+EXPORT_SYMBOL(clear_inode);
+EXPORT_SYMBOL(refile_buffer);
+EXPORT_SYMBOL(nr_async_pages);
+EXPORT_SYMBOL(___strtok);
+EXPORT_SYMBOL(init_fifo);
+EXPORT_SYMBOL(super_blocks);
+EXPORT_SYMBOL(fifo_inode_operations);
+EXPORT_SYMBOL(chrdev_inode_operations);
+EXPORT_SYMBOL(blkdev_inode_operations);
+EXPORT_SYMBOL(read_ahead);
+EXPORT_SYMBOL(get_hash_table);
+EXPORT_SYMBOL(get_empty_inode);
+EXPORT_SYMBOL(insert_inode_hash);
+EXPORT_SYMBOL(event);
+EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(securelevel);
+
/* all busmice */
- X(add_mouse_randomness),
- X(fasync_helper),
+EXPORT_SYMBOL(add_mouse_randomness);
+EXPORT_SYMBOL(fasync_helper);
+
/* psaux mouse */
- X(aux_device_present),
- X(kbd_read_mask),
+EXPORT_SYMBOL(aux_device_present);
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(kbd_read_mask);
+#endif
#ifdef CONFIG_BLK_DEV_MD
- X(disk_name), /* for md.c */
+EXPORT_SYMBOL(disk_name); /* for md.c */
#endif
- /* binfmt_aout */
- X(get_write_access),
- X(put_write_access),
-
- /********************************************************
- * Do not add anything below this line,
- * as the stacked modules depend on this!
- */
-#include <linux/symtab_end.h>
-};
-
-/*
-int symbol_table_size = sizeof (symbol_table) / sizeof (symbol_table[0]);
-*/
+/* binfmt_aout */
+EXPORT_SYMBOL(get_write_access);
+EXPORT_SYMBOL(put_write_access);
diff --git a/kernel/module.c b/kernel/module.c
index 09cee93b7..885539b5c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4,789 +4,963 @@
#include <linux/string.h>
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/malloc.h>
-#include <linux/vmalloc.h>
#include <linux/config.h>
-
#include <asm/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+
/*
* Originally by Anonymous (as far as I know...)
* Linux version by Bas Laarhoven <bas@vimec.nl>
* 0.99.14 version by Jon Tombs <jon@gtex02.us.es>,
- *
* Heavily modified by Bjorn Ekwall <bj0rn@blox.se> May 1994 (C)
- * This source is covered by the GNU GPL, the same as all kernel sources.
- *
- * Features:
- * - Supports stacked modules (removable only of there are no dependents).
- * - Supports table of symbols defined by the modules.
- * - Supports /proc/ksyms, showing value, name and owner of all
- * the symbols defined by all modules (in stack order).
- * - Added module dependencies information into /proc/modules
- * - Supports redefines of all symbols, for streams-like behaviour.
- * - Compatible with older versions of insmod.
- *
- * New addition in December 1994: (Bjorn Ekwall, idea from Jacques Gelinas)
- * - Externally callable function:
- *
- * "int register_symtab(struct symbol_table *)"
+ * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
*
- * This function can be called from within the kernel,
- * and ALSO from loadable modules.
- * The goal is to assist in modularizing the kernel even more,
- * and finally: reducing the number of entries in ksyms.c
- * since every subsystem should now be able to decide and
- * control exactly what symbols it wants to export, locally!
- *
- * On 1-Aug-95: <Matti.Aarnio@utu.fi> altered code to use same style as
- * do /proc/net/XXX "files". Namely allow more than 4kB
- * (or what the block size is) output.
- *
- * - Use dummy syscall functions for users who disable all
- * module support. Similar to kernel/sys.c (Paul Gortmaker)
+ * This source is covered by the GNU GPL, the same as all kernel sources.
*/
#ifdef CONFIG_MODULES /* a *big* #ifdef block... */
-static struct module kernel_module;
-struct module *module_list = &kernel_module;
+extern struct module_symbol __start___ksymtab[];
+extern struct module_symbol __stop___ksymtab[];
-static int freeing_modules; /* true if some modules are marked for deletion */
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
-static struct module *find_module( const char *name);
-static int free_modules( void);
+static struct module kernel_module =
+{
+ sizeof(struct module), /* size_of_struct */
+ NULL, /* next */
+ "", /* name */
+ 0, /* size */
+ 1, /* usecount */
+ MOD_RUNNING, /* flags */
+ 0, /* nsyms -- to filled in in init_modules */
+ 0, /* ndeps */
+ __start___ksymtab, /* syms */
+ NULL, /* deps */
+ NULL, /* refs */
+ NULL, /* init */
+ NULL, /* cleanup */
+ __start___ex_table, /* ex_table_start */
+ __stop___ex_table, /* ex_table_end */
+ /* Rest are NULL */
+};
+
+struct module *module_list = &kernel_module;
+
+static long get_mod_name(const char *user_name, char **buf);
+static void put_mod_name(char *buf);
+static struct module *find_module(const char *name);
+static void free_module(struct module *);
-extern struct symbol_table symbol_table; /* in kernel/ksyms.c */
/*
* Called at boot time
*/
-void init_modules(void) {
- struct internal_symbol *sym;
- int i;
- for (i = 0, sym = symbol_table.symbol; sym->name; ++sym, ++i)
- ;
- symbol_table.n_symbols = i;
+void init_modules(void)
+{
+ kernel_module.nsyms = __stop___ksymtab - __start___ksymtab;
- kernel_module.symtab = &symbol_table;
- kernel_module.state = MOD_RUNNING; /* Hah! */
- kernel_module.name = "";
+#ifdef __alpha__
+ __asm__("stq $29,%0" : "=m"(kernel_module.gp));
+#endif
}
-
/*
* Copy the name of a module from user space.
*/
-inline int
-get_mod_name(char *user_name, char *buf)
+
+static inline long
+get_mod_name(const char *user_name, char **buf)
{
- /* Should return -EBIG instead of -EFAULT when the name
- is too long, but that we couldn't detect real faults then.
- Maybe strncpy_from_user() should return -EBIG, when
- the source string is too long. */
- return strncpy_from_user(buf, user_name, MOD_MAX_NAME);
+ unsigned long page;
+ long retval;
+
+ if ((unsigned long)user_name >= TASK_SIZE)
+ return -EFAULT;
+
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ retval = strncpy_from_user((char *)page, user_name, PAGE_SIZE);
+ if (retval > 0) {
+ if (retval < PAGE_SIZE) {
+ *buf = (char *)page;
+ return retval;
+ }
+ retval = -ENAMETOOLONG;
+ } else if (!retval)
+ retval = -EINVAL;
+
+ free_page(page);
+ return retval;
}
+static inline void
+put_mod_name(char *buf)
+{
+ free_page((unsigned long)buf);
+}
/*
* Allocate space for a module.
*/
+
asmlinkage unsigned long
-sys_create_module(char *module_name, unsigned long size)
+sys_create_module(const char *name_user, size_t size)
{
- struct module *mp;
- void* addr;
- int error;
- int npages;
- int sspace = sizeof(struct module) + MOD_MAX_NAME;
- char name[MOD_MAX_NAME];
-
- if (!suser())
- return -EPERM;
- if (module_name == NULL || size == 0)
- return -EINVAL;
- if ((error = get_mod_name(module_name, name)) < 0)
- return error;
+ char *name;
+ long namelen, error;
+ struct module *mod;
+
+ lock_kernel();
+ if (!suser()) {
+ error = -EPERM;
+ goto err0;
+ }
+ if ((namelen = get_mod_name(name_user, &name)) < 0) {
+ error = namelen;
+ goto err0;
+ }
+ if (size < sizeof(struct module)+namelen) {
+ error = -EINVAL;
+ goto err1;
+ }
if (find_module(name) != NULL) {
- return -EEXIST;
+ error = -EEXIST;
+ goto err1;
}
-
- if ((mp = (struct module*) kmalloc(sspace, GFP_KERNEL)) == NULL) {
- return -ENOMEM;
+ if ((mod = (struct module *)vmalloc(size)) == NULL) {
+ error = -ENOMEM;
+ goto err1;
}
- strcpy((char *)(mp + 1), name); /* why not? */
- npages = (size + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE;
- if ((addr = vmalloc(npages * PAGE_SIZE)) == 0) {
- kfree_s(mp, sspace);
- return -ENOMEM;
- }
+ memset(mod, 0, sizeof(*mod));
+ mod->size_of_struct = sizeof(*mod);
+ mod->next = module_list;
+ mod->name = (char *)(mod + 1);
+ mod->size = size;
+ memcpy((char*)(mod+1), name, namelen+1);
- mp->next = module_list;
- mp->ref = NULL;
- mp->symtab = NULL;
- mp->name = (char *)(mp + 1);
- mp->size = npages;
- mp->addr = addr;
- mp->state = MOD_UNINITIALIZED;
- mp->cleanup = NULL;
- mp->exceptinfo.start = NULL;
- mp->exceptinfo.stop = NULL;
+ put_mod_name(name);
- * (long *) addr = 0; /* set use count to zero */
- module_list = mp; /* link it in */
+ module_list = mod; /* link it in */
- pr_debug("module `%s' (%lu pages @ 0x%08lx) created\n",
- mp->name, (unsigned long) mp->size, (unsigned long) mp->addr);
- return (unsigned long) addr;
+ error = (long) mod;
+ goto err0;
+err1:
+ put_mod_name(name);
+err0:
+ unlock_kernel();
+ return error;
}
-
/*
* Initialize a module.
*/
+
asmlinkage int
-sys_init_module(char *module_name, char *code, unsigned codesize,
- struct mod_routines *routines,
- struct symbol_table *symtab)
+sys_init_module(const char *name_user, struct module *mod_user)
{
- struct module *mp;
- struct symbol_table *newtab;
- char name[MOD_MAX_NAME];
- int error;
- struct mod_routines rt;
+ struct module mod_tmp, *mod;
+ char *name, *n_name;
+ long namelen, n_namelen, i, error = -EPERM;
+ unsigned long mod_user_size;
+ struct module_ref *dep;
+ lock_kernel();
if (!suser())
- return -EPERM;
+ goto err0;
+ if ((namelen = get_mod_name(name_user, &name)) < 0) {
+ error = namelen;
+ goto err0;
+ }
+ if ((mod = find_module(name)) == NULL) {
+ error = -ENOENT;
+ goto err1;
+ }
+
+ /* Check module header size. We allow a bit of slop over the
+ size we are familiar with to cope with a version of insmod
+ for a newer kernel. But don't over do it. */
+ if ((error = get_user(mod_user_size, &mod_user->size_of_struct)) != 0)
+ goto err1;
+ if (mod_user_size < (unsigned long)&((struct module *)0L)->persist_start
+ || mod_user_size > sizeof(struct module) + 16*sizeof(void*)) {
+ printk(KERN_ERR "init_module: Invalid module header size.\n"
+ KERN_ERR "A new version of the modutils is likely "
+ "needed.\n");
+ error = -EINVAL;
+ goto err1;
+ }
+
+ /* Hold the current contents while we play with the user's idea
+ of righteousness. */
+ mod_tmp = *mod;
+
+ error = copy_from_user(mod, mod_user, sizeof(struct module));
+ if (error) {
+ error = -EFAULT;
+ goto err2;
+ }
+
+ /* Sanity check the size of the module. */
+ error = -EINVAL;
+
+ if (mod->size > mod_tmp.size) {
+ printk(KERN_ERR "init_module: Size of initialized module "
+ "exceeds size of created module.\n");
+ goto err2;
+ }
+
+ /* Make sure all interesting pointers are sane. */
-#ifdef __i386__
- /* A little bit of protection... we "know" where the user stack is... */
+#define bound(p, n, m) ((unsigned long)(p) >= (unsigned long)(m+1) && \
+ (unsigned long)((p)+(n)) <= (unsigned long)(m) + (m)->size)
- if (symtab && ((unsigned long)symtab > 0xb0000000)) {
- printk(KERN_WARNING "warning: you are using an old insmod, no symbols will be inserted!\n");
- symtab = NULL;
+ if (!bound(mod->name, namelen, mod)) {
+ printk(KERN_ERR "init_module: mod->name out of bounds.\n");
+ goto err2;
+ }
+ if (mod->nsyms && !bound(mod->syms, mod->nsyms, mod)) {
+ printk(KERN_ERR "init_module: mod->syms out of bounds.\n");
+ goto err2;
+ }
+ if (mod->ndeps && !bound(mod->deps, mod->ndeps, mod)) {
+ printk(KERN_ERR "init_module: mod->deps out of bounds.\n");
+ goto err2;
+ }
+ if (mod->init && !bound(mod->init, 0, mod)) {
+ printk(KERN_ERR "init_module: mod->init out of bounds.\n");
+ goto err2;
+ }
+ if (mod->cleanup && !bound(mod->cleanup, 0, mod)) {
+ printk(KERN_ERR "init_module: mod->cleanup out of bounds.\n");
+ goto err2;
+ }
+ if (mod->ex_table_start > mod->ex_table_end
+ || (mod->ex_table_start &&
+ !((unsigned long)mod->ex_table_start >= (unsigned long)(mod+1)
+ && ((unsigned long)mod->ex_table_end
+ < (unsigned long)mod + mod->size)))
+ || (((unsigned long)mod->ex_table_start
+ - (unsigned long)mod->ex_table_end)
+ % sizeof(struct exception_table_entry))) {
+ printk(KERN_ERR "init_module: mod->ex_table_* invalid.\n");
+ goto err2;
+ }
+ if (mod->flags & ~MOD_AUTOCLEAN) {
+ printk(KERN_ERR "init_module: mod->flags invalid.\n");
+ goto err2;
+ }
+#ifdef __alpha__
+ if (!bound(mod->gp - 0x8000, 0, mod)) {
+ printk(KERN_ERR "init_module: mod->gp out of bounds.\n");
+ goto err2;
}
#endif
- if ((error = get_mod_name(module_name, name)) < 0)
- return error;
- pr_debug("initializing module `%s', %d (0x%x) bytes\n",
- name, codesize, codesize);
- if (copy_from_user(&rt, routines, sizeof rt))
- return -EFAULT;
- if ((mp = find_module(name)) == NULL)
- return -ENOENT;
- if (codesize & MOD_AUTOCLEAN) {
- /*
- * set autoclean marker from codesize...
- * set usage count to "zero"
- */
- codesize &= ~MOD_AUTOCLEAN;
- GET_USE_COUNT(mp) = MOD_AUTOCLEAN;
- }
- if ((codesize + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE > mp->size)
- return -EINVAL;
- if (copy_from_user((char *)mp->addr + sizeof (long), code, codesize))
- return -EFAULT;
- memset((char *)mp->addr + sizeof (long) + codesize, 0,
- mp->size * PAGE_SIZE - (codesize + sizeof (long)));
- pr_debug("module init entry = 0x%08lx, cleanup entry = 0x%08lx\n",
- (unsigned long) rt.init, (unsigned long) rt.cleanup);
- if (rt.signature != MODULE_2_1_7_SIG){
- printk ("Older insmod used with kernel 2.1.7 +\n");
- return -EINVAL;
+ if (mod_member_present(mod, can_unload)
+ && mod->can_unload && !bound(mod->can_unload, 0, mod)) {
+ printk(KERN_ERR "init_module: mod->can_unload out of bounds.\n");
+ goto err2;
}
- mp->cleanup = rt.cleanup;
- mp->exceptinfo = rt.exceptinfo;
-
- /* update kernel symbol table */
- if (symtab) { /* symtab == NULL means no new entries to handle */
- struct internal_symbol *sym;
- struct module_ref *ref;
- int size;
- int i;
- int legal_start;
-
- error = get_user(size, &symtab->size);
- if (error)
- return error;
- if ((newtab = (struct symbol_table*) kmalloc(size, GFP_KERNEL)) == NULL) {
- return -ENOMEM;
- }
- if (copy_from_user((char *)(newtab), symtab, size)) {
- kfree_s(newtab, size);
- return -EFAULT;
- }
+#undef bound
- /* sanity check */
- legal_start = sizeof(struct symbol_table) +
- newtab->n_symbols * sizeof(struct internal_symbol) +
- newtab->n_refs * sizeof(struct module_ref);
+ /* Check that the user isn't doing something silly with the name. */
- if ((newtab->n_symbols < 0) || (newtab->n_refs < 0) || (legal_start > size)) {
- printk(KERN_WARNING "Rejecting illegal symbol table (n_symbols=%d,n_refs=%d)\n",
- newtab->n_symbols, newtab->n_refs);
- kfree_s(newtab, size);
- return -EINVAL;
- }
+ if ((n_namelen = get_mod_name(mod->name - (unsigned long)mod
+ + (unsigned long)mod_user,
+ &n_name)) < 0) {
+ error = n_namelen;
+ goto err2;
+ }
+ if (namelen != n_namelen || strcmp(n_name, mod_tmp.name) != 0) {
+ printk(KERN_ERR "init_module: changed module name to "
+ "`%s' from `%s'\n",
+ n_name, mod_tmp.name);
+ goto err3;
+ }
- /* relocate name pointers, index referred from start of table */
- for (sym = &(newtab->symbol[0]), i = 0; i < newtab->n_symbols; ++sym, ++i) {
- if ((unsigned long)sym->name < legal_start || size <= (unsigned long)sym->name) {
- printk(KERN_WARNING "Rejecting illegal symbol table\n");
- kfree_s(newtab, size);
- return -EINVAL;
- }
- /* else */
- sym->name += (long)newtab;
- }
- mp->symtab = newtab;
-
- /* Update module references.
- * On entry, from "insmod", ref->module points to
- * the referenced module!
- * Now it will point to the current module instead!
- * The ref structure becomes the first link in the linked
- * list of references to the referenced module.
- * Also, "sym" from above, points to the first ref entry!!!
- */
- for (ref = (struct module_ref *)sym, i = 0;
- i < newtab->n_refs; ++ref, ++i) {
-
- /* Check for valid reference */
- struct module *link = module_list;
- while (link && (ref->module != link))
- link = link->next;
-
- if (link == (struct module *)0) {
- printk(KERN_WARNING "Non-module reference! Rejected!\n");
- return -EINVAL;
- }
+ /* Ok, that's about all the sanity we can stomach; copy the rest. */
- ref->next = ref->module->ref;
- ref->module->ref = ref;
- ref->module = mp;
- }
+ if (copy_from_user(mod+1, mod_user+1, mod->size-sizeof(*mod))) {
+ error = -EFAULT;
+ goto err3;
}
- GET_USE_COUNT(mp) += 1;
- if ((*rt.init)() != 0) {
- GET_USE_COUNT(mp) = 0;
- return -EBUSY;
+ /* Update module references. */
+ mod->next = mod_tmp.next;
+ mod->refs = NULL;
+ for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) {
+ struct module *o, *d = dep->dep;
+
+ /* Make sure the indicated dependancies are really modules. */
+ if (d == mod) {
+ printk(KERN_ERR "init_module: self-referential "
+ "dependancy in mod->deps.\n");
+ goto err3;
+ }
+
+ for (o = module_list; o != &kernel_module; o = o->next)
+ if (o == d) goto found_dep;
+
+ printk(KERN_ERR "init_module: found dependancy that is "
+ "(no longer?) a module.\n");
+ goto err3;
+
+ found_dep:
+ dep->ref = mod;
+ dep->next_ref = d->refs;
+ d->refs = dep;
+ /* Being referenced by a dependant module counts as a
+ use as far as kerneld is concerned. */
+ d->flags |= MOD_USED_ONCE;
}
- GET_USE_COUNT(mp) -= 1;
- mp->state = MOD_RUNNING;
- return 0;
+ /* Free our temporary memory. */
+ put_mod_name(n_name);
+ put_mod_name(name);
+
+ /* Initialize the module. */
+ mod->usecount = 1;
+ if (mod->init && mod->init() != 0) {
+ mod->usecount = 0;
+ error = -EBUSY;
+ goto err0;
+ }
+ mod->usecount--;
+
+ /* And set it running. */
+ mod->flags |= MOD_RUNNING;
+ error = 0;
+ goto err0;
+
+err3:
+ put_mod_name(n_name);
+err2:
+ *mod = mod_tmp;
+err1:
+ put_mod_name(name);
+err0:
+ unlock_kernel();
+ return error;
}
asmlinkage int
-sys_delete_module(char *module_name)
+sys_delete_module(const char *name_user)
{
- struct module *mp;
- char name[MOD_MAX_NAME];
- int error;
+ struct module *mod, *next;
+ char *name;
+ long error = -EPERM;
+ lock_kernel();
if (!suser())
- return -EPERM;
- /* else */
- if (module_name != NULL) {
- if ((error = get_mod_name(module_name, name)) < 0)
- return error;
- if ((mp = find_module(name)) == NULL)
- return -ENOENT;
- if ((mp->ref != NULL) ||
- ((GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED)) != 0))
- return -EBUSY;
- GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED);
- if (mp->state == MOD_RUNNING)
- (*mp->cleanup)();
- mp->state = MOD_DELETED;
- free_modules();
- }
- /* for automatic reaping */
- else {
- struct module *mp_next;
- for (mp = module_list; mp != &kernel_module; mp = mp_next) {
- mp_next = mp->next;
- if ((mp->ref == NULL) && (mp->state == MOD_RUNNING) &&
- ((GET_USE_COUNT(mp) & ~MOD_VISITED) == MOD_AUTOCLEAN)) {
- if ((GET_USE_COUNT(mp) & MOD_VISITED)) {
- /* Don't reap until one "cycle" after last _use_ */
- GET_USE_COUNT(mp) &= ~MOD_VISITED;
- }
- else {
- GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED);
- (*mp->cleanup)();
- mp->state = MOD_DELETED;
- free_modules();
- }
- }
+ goto out;
+
+ if (name_user) {
+ if ((error = get_mod_name(name_user, &name)) < 0)
+ goto out;
+ if (error == 0) {
+ error = -EINVAL;
+ put_mod_name(name);
+ goto out;
+ }
+ error = -ENOENT;
+ if ((mod = find_module(name)) == NULL) {
+ put_mod_name(name);
+ goto out;
+ }
+ put_mod_name(name);
+ error = -EBUSY;
+ if (mod->refs != NULL || __MOD_IN_USE(mod))
+ goto out;
+
+ free_module(mod);
+ error = 0;
+ goto out;
+ }
+
+ /* Do automatic reaping */
+ for (mod = module_list; mod != &kernel_module; mod = next) {
+ next = mod->next;
+ if (mod->refs == NULL &&
+ ((mod->flags
+ & (MOD_AUTOCLEAN|MOD_RUNNING|MOD_DELETED|MOD_USED_ONCE))
+ == (MOD_AUTOCLEAN|MOD_RUNNING|MOD_USED_ONCE)) &&
+ !__MOD_IN_USE(mod)) {
+ if (mod->flags & MOD_VISITED)
+ mod->flags &= ~MOD_VISITED;
+ else
+ free_module(mod);
}
}
- return 0;
+ error = 0;
+out:
+ unlock_kernel();
+ return error;
}
+/* Query various bits about modules. */
-/*
- * Copy the kernel symbol table to user space. If the argument is null,
- * just return the size of the table.
- *
- * Note that the transient module symbols are copied _first_,
- * in lifo order!!!
- *
- * The symbols to "insmod" are according to the "old" format: struct kernel_sym,
- * which is actually quite handy for this purpose.
- * Note that insmod inserts a struct symbol_table later on...
- * (as that format is quite handy for the kernel...)
- *
- * For every module, the first (pseudo)symbol copied is the module name
- * and the address of the module struct.
- * This lets "insmod" keep track of references, and build the array of
- * struct module_refs in the symbol table.
- * The format of the module name is "#module", so that "insmod" can easily
- * notice when a module name comes along. Also, this will make it possible
- * to use old versions of "insmod", albeit with reduced functionality...
- * The "kernel" module has an empty name.
- */
-asmlinkage int
-sys_get_kernel_syms(struct kernel_sym *table)
+static int
+qm_modules(char *buf, size_t bufsize, size_t *ret)
{
- struct internal_symbol *from;
- struct kernel_sym isym;
- struct kernel_sym *to;
- struct module *mp = module_list;
- int i;
- int nmodsyms = 0;
- int err;
+ struct module *mod;
+ size_t nmod, space, len;
- for (mp = module_list; mp; mp = mp->next) {
- if (mp->symtab && mp->symtab->n_symbols) {
- /* include the count for the module name! */
- nmodsyms += mp->symtab->n_symbols + 1;
- }
- else
- /* include the count for the module name! */
- nmodsyms += 1; /* return modules without symbols too */
- }
-
- if (table != NULL) {
- to = table;
-
- /* copy all module symbols first (always LIFO order) */
- for (mp = module_list; mp; mp = mp->next) {
- if (mp->state == MOD_RUNNING) {
- /* magic: write module info as a pseudo symbol */
- isym.value = (unsigned long)mp;
- sprintf(isym.name, "#%s", mp->name);
- err = copy_to_user(to, &isym, sizeof isym);
- if (err)
- return -EFAULT;
- ++to;
-
- if (mp->symtab != NULL) {
- for (i = mp->symtab->n_symbols,
- from = mp->symtab->symbol;
- i > 0; --i, ++from, ++to) {
-
- isym.value = (unsigned long)from->addr;
- strncpy(isym.name, from->name, sizeof isym.name);
- err = copy_to_user(to, &isym, sizeof isym);
- if (err)
- return -EFAULT;
- }
- }
- }
- }
+ nmod = space = 0;
+
+ for (mod=module_list; mod != &kernel_module; mod=mod->next, ++nmod) {
+ len = strlen(mod->name)+1;
+ if (len > bufsize)
+ goto calc_space_needed;
+ if (copy_to_user(buf, mod->name, len))
+ return -EFAULT;
+ buf += len;
+ bufsize -= len;
+ space += len;
}
- return nmodsyms;
+ if (put_user(nmod, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+calc_space_needed:
+ space += len;
+ while ((mod = mod->next) != &kernel_module)
+ space += strlen(mod->name)+1;
+
+ if (put_user(space, ret))
+ return -EFAULT;
+ else
+ return -ENOSPC;
}
-/*
- * Look for a module by name, ignoring modules marked for deletion.
- */
-struct module *
-find_module( const char *name)
+static int
+qm_deps(struct module *mod, char *buf, size_t bufsize, size_t *ret)
{
- struct module *mp;
+ size_t i, space, len;
- for (mp = module_list ; mp ; mp = mp->next) {
- if (mp->state == MOD_DELETED)
- continue;
- if (!strcmp(mp->name, name))
- break;
+ if (mod == &kernel_module)
+ return -EINVAL;
+ if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING)
+ if (put_user(0, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+ space = 0;
+ for (i = 0; i < mod->ndeps; ++i) {
+ const char *dep_name = mod->deps[i].dep->name;
+
+ len = strlen(dep_name)+1;
+ if (len > bufsize)
+ goto calc_space_needed;
+ if (copy_to_user(buf, dep_name, len))
+ return -EFAULT;
+ buf += len;
+ bufsize -= len;
+ space += len;
}
- return mp;
+
+ if (put_user(i, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+calc_space_needed:
+ space += len;
+ while (++i < mod->ndeps)
+ space += strlen(mod->deps[i].dep->name)+1;
+
+ if (put_user(space, ret))
+ return -EFAULT;
+ else
+ return -ENOSPC;
}
-static void
-drop_refs(struct module *mp)
+static int
+qm_refs(struct module *mod, char *buf, size_t bufsize, size_t *ret)
{
- struct module *step;
- struct module_ref *prev;
+ size_t nrefs, space, len;
struct module_ref *ref;
- for (step = module_list; step; step = step->next) {
- for (prev = ref = step->ref; ref; ref = prev->next) {
- if (ref->module == mp) {
- if (ref == step->ref)
- step->ref = ref->next;
- else
- prev->next = ref->next;
- break; /* every module only references once! */
- }
- else
- prev = ref;
- }
+ if (mod == &kernel_module)
+ return -EINVAL;
+ if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING)
+ if (put_user(0, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+ space = 0;
+ for (nrefs = 0, ref = mod->refs; ref ; ++nrefs, ref = ref->next_ref) {
+ const char *ref_name = ref->ref->name;
+
+ len = strlen(ref_name)+1;
+ if (len > bufsize)
+ goto calc_space_needed;
+ if (copy_to_user(buf, ref_name, len))
+ return -EFAULT;
+ buf += len;
+ bufsize -= len;
+ space += len;
}
+
+ if (put_user(nrefs, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+calc_space_needed:
+ space += len;
+ while ((ref = ref->next_ref) != NULL)
+ space += strlen(ref->ref->name)+1;
+
+ if (put_user(space, ret))
+ return -EFAULT;
+ else
+ return -ENOSPC;
}
-/*
- * Try to free modules which have been marked for deletion. Returns nonzero
- * if a module was actually freed.
- */
-int
-free_modules( void)
+static int
+qm_symbols(struct module *mod, char *buf, size_t bufsize, size_t *ret)
{
- struct module *mp;
- struct module **mpp;
- int did_deletion;
-
- did_deletion = 0;
- freeing_modules = 0;
- mpp = &module_list;
- while ((mp = *mpp) != NULL) {
- if (mp->state != MOD_DELETED) {
- mpp = &mp->next;
- } else {
- if ((GET_USE_COUNT(mp) != 0) || (mp->ref != NULL)) {
- freeing_modules = 1;
- mpp = &mp->next;
- } else { /* delete it */
- *mpp = mp->next;
- if (mp->symtab) {
- if (mp->symtab->n_refs)
- drop_refs(mp);
- if (mp->symtab->size)
- kfree_s(mp->symtab, mp->symtab->size);
- }
- vfree(mp->addr);
- kfree_s(mp, sizeof(struct module) + MOD_MAX_NAME);
- did_deletion = 1;
- }
- }
+ size_t i, space, len;
+ struct module_symbol *s;
+ char *strings;
+ unsigned long *vals;
+
+ if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING)
+ if (put_user(0, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+ space = mod->nsyms * 2*sizeof(void *);
+
+ i = len = 0;
+ s = mod->syms;
+
+ if (space > bufsize)
+ goto calc_space_needed;
+
+ if (!access_ok(VERIFY_WRITE, buf, space))
+ return -EFAULT;
+
+ bufsize -= space;
+ vals = (unsigned long *)buf;
+ strings = buf+space;
+
+ for (; i < mod->nsyms ; ++i, ++s, vals += 2) {
+ len = strlen(s->name)+1;
+ if (len > bufsize)
+ goto calc_space_needed;
+
+ if (copy_to_user(strings, s->name, len)
+ || __put_user(s->value, vals+0)
+ || __put_user(space, vals+1))
+ return -EFAULT;
+
+ strings += len;
+ bufsize -= len;
+ space += len;
}
- return did_deletion;
+
+ if (put_user(i, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+calc_space_needed:
+ for (; i < mod->nsyms; ++i, ++s)
+ space += strlen(s->name)+1;
+
+ if (put_user(space, ret))
+ return -EFAULT;
+ else
+ return -ENOSPC;
}
+static int
+qm_info(struct module *mod, char *buf, size_t bufsize, size_t *ret)
+{
+ int error = 0;
-/*
- * Called by the /proc file system to return a current list of modules.
- */
-int get_module_list(char *buf)
+ if (mod == &kernel_module)
+ return -EINVAL;
+
+ if (sizeof(struct module_info) <= bufsize) {
+ struct module_info info;
+ info.addr = (unsigned long)mod;
+ info.size = mod->size;
+ info.flags = mod->flags;
+ info.usecount = (mod_member_present(mod, can_unload)
+ && mod->can_unload ? -1 : mod->usecount);
+
+ if (copy_to_user(buf, &info, sizeof(struct module_info)))
+ return -EFAULT;
+ } else
+ error = -ENOSPC;
+
+ if (put_user(sizeof(struct module_info), ret))
+ return -EFAULT;
+
+ return error;
+}
+
+asmlinkage int
+sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
+ size_t *ret)
{
- char *p;
- const char *q;
- int i;
- struct module *mp;
- struct module_ref *ref;
- char size[32];
-
- p = buf;
- /* Do not show the kernel pseudo module */
- for (mp = module_list ; mp && mp->next; mp = mp->next) {
- if (p - buf > 4096 - 100)
- break; /* avoid overflowing buffer */
- q = mp->name;
- if (*q == '\0' && mp->size == 0 && mp->ref == NULL)
- continue; /* don't list modules for kernel syms */
- i = 20;
- while (*q) {
- *p++ = *q++;
- i--;
- }
- sprintf(size, "%d", mp->size);
- i -= strlen(size);
- if (i <= 0)
- i = 1;
- while (--i >= 0)
- *p++ = ' ';
- q = size;
- while (*q)
- *p++ = *q++;
- if (mp->state == MOD_UNINITIALIZED)
- q = " (uninitialized)";
- else if (mp->state == MOD_RUNNING)
- q = "";
- else if (mp->state == MOD_DELETED)
- q = " (deleted)";
- else
- q = " (bad state)";
- while (*q)
- *p++ = *q++;
-
- *p++ = '\t';
- if ((ref = mp->ref) != NULL) {
- *p++ = '[';
- for (; ref; ref = ref->next) {
- q = ref->module->name;
- while (*q)
- *p++ = *q++;
- if (ref->next)
- *p++ = ' ';
- }
- *p++ = ']';
+ struct module *mod;
+ int err;
+
+ lock_kernel();
+ if (name_user == NULL)
+ mod = &kernel_module;
+ else {
+ long namelen;
+ char *name;
+
+ if ((namelen = get_mod_name(name_user, &name)) < 0) {
+ err = namelen;
+ goto out;
}
- if (mp->state == MOD_RUNNING) {
- sprintf(size,"\t%ld%s",
- GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED),
- ((GET_USE_COUNT(mp) & MOD_AUTOCLEAN)?
- " (autoclean)":""));
- q = size;
- while (*q)
- *p++ = *q++;
+ err = -ENOENT;
+ if (namelen == 0)
+ mod = &kernel_module;
+ else if ((mod = find_module(name)) == NULL) {
+ put_mod_name(name);
+ goto out;
}
- *p++ = '\n';
+ put_mod_name(name);
}
- return p - buf;
-}
+ switch (which)
+ {
+ case 0:
+ err = 0;
+ break;
+ case QM_MODULES:
+ err = qm_modules(buf, bufsize, ret);
+ break;
+ case QM_DEPS:
+ err = qm_deps(mod, buf, bufsize, ret);
+ break;
+ case QM_REFS:
+ err = qm_refs(mod, buf, bufsize, ret);
+ break;
+ case QM_SYMBOLS:
+ err = qm_symbols(mod, buf, bufsize, ret);
+ break;
+ case QM_INFO:
+ err = qm_info(mod, buf, bufsize, ret);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+out:
+ unlock_kernel();
+ return err;
+}
/*
- * Called by the /proc file system to return a current list of ksyms.
+ * Copy the kernel symbol table to user space. If the argument is
+ * NULL, just return the size of the table.
+ *
+ * This call is obsolete. New programs should use query_module+QM_SYMBOLS
+ * which does not arbitrarily limit the length of symbols.
*/
-int get_ksyms_list(char *buf, char **start, off_t offset, int length)
+
+asmlinkage int
+sys_get_kernel_syms(struct kernel_sym *table)
{
- struct module *mp;
- struct internal_symbol *sym;
+ struct module *mod;
int i;
- char *p = buf;
- int len = 0; /* code from net/ipv4/proc.c */
- off_t pos = 0;
- off_t begin = 0;
- for (mp = module_list; mp; mp = mp->next) {
- if ((mp->state == MOD_RUNNING) &&
- (mp->symtab != NULL) &&
- (mp->symtab->n_symbols > 0)) {
- for (i = mp->symtab->n_symbols,
- sym = mp->symtab->symbol;
- i > 0; --i, ++sym) {
-
- p = buf + len;
- if (mp->name[0]) {
- len += sprintf(p, "%08lx %s\t[%s]\n",
- (long)sym->addr,
- sym->name, mp->name);
- } else {
- len += sprintf(p, "%08lx %s\n",
- (long)sym->addr,
- sym->name);
- }
- pos = begin + len;
- if (pos < offset) {
- len = 0;
- begin = pos;
- }
- pos = begin + len;
- if (pos > offset+length)
- goto leave_the_loop;
- }
+ lock_kernel();
+ for (mod = module_list, i = 0; mod; mod = mod->next) {
+ /* include the count for the module name! */
+ i += mod->nsyms + 1;
+ }
+
+ if (table == NULL)
+ goto out;
+
+ for (mod = module_list, i = 0; mod; mod = mod->next) {
+ struct kernel_sym ksym;
+ struct module_symbol *msym;
+ unsigned int j;
+
+ if ((mod->flags & (MOD_RUNNING|MOD_DELETED)) != MOD_RUNNING)
+ continue;
+
+ /* magic: write module info as a pseudo symbol */
+ ksym.value = (unsigned long)mod;
+ ksym.name[0] = '#';
+ strncpy(ksym.name+1, mod->name, sizeof(ksym.name)-1);
+ ksym.name[sizeof(ksym.name)-1] = '\0';
+
+ if (copy_to_user(table, &ksym, sizeof(ksym)) != 0)
+ goto out;
+ ++i, ++table;
+
+ if (mod->nsyms == 0)
+ continue;
+
+ for (j = 0, msym = mod->syms; j < mod->nsyms; ++j, ++msym) {
+ ksym.value = msym->value;
+ strncpy(ksym.name, msym->name, sizeof(ksym.name));
+ ksym.name[sizeof(ksym.name)-1] = '\0';
+
+ if (copy_to_user(table, &ksym, sizeof(ksym)) != 0)
+ goto out;
+ ++i, ++table;
}
}
- leave_the_loop:
- *start = buf + (offset - begin);
- len -= (offset - begin);
- if (len > length)
- len = length;
- return len;
+out:
+ unlock_kernel();
+ return i;
}
/*
- * Rules:
- * - The new symbol table should be statically allocated, or else you _have_
- * to set the "size" field of the struct to the number of bytes allocated.
- *
- * - The strings that name the symbols will not be copied, maybe the pointers
- *
- * - For a loadable module, the function should only be called in the
- * context of init_module
- *
- * Those are the only restrictions! (apart from not being reentrant...)
- *
- * If you want to remove a symbol table for a loadable module,
- * the call looks like: "register_symtab(0)".
- *
- * The look of the code is mostly dictated by the format of
- * the frozen struct symbol_table, due to compatibility demands.
+ * Look for a module by name, ignoring modules marked for deletion.
*/
-#define INTSIZ sizeof(struct internal_symbol)
-#define REFSIZ sizeof(struct module_ref)
-#define SYMSIZ sizeof(struct symbol_table)
-#define MODSIZ sizeof(struct module)
-static struct symbol_table nulltab;
-int
-register_symtab_from(struct symbol_table *intab, long *from)
+static struct module *
+find_module(const char *name)
{
- struct module *mp;
- struct module *link;
- struct symbol_table *oldtab;
- struct symbol_table *newtab;
- struct module_ref *newref;
- int size;
-
- if (intab && (intab->n_symbols == 0)) {
- struct internal_symbol *sym;
- /* How many symbols, really? */
-
- for (sym = intab->symbol; sym->name; ++sym)
- intab->n_symbols +=1;
- }
-
- for (mp = module_list; mp != &kernel_module; mp = mp->next) {
- /*
- * "from" points to "mod_use_count_" (== start of module)
- * or is == 0 if called from a non-module
- */
- if ((unsigned long)(mp->addr) == (unsigned long)from)
+ struct module *mod;
+
+ for (mod = module_list; mod ; mod = mod->next) {
+ if (mod->flags & MOD_DELETED)
+ continue;
+ if (!strcmp(mod->name, name))
break;
}
- if (mp == &kernel_module) {
- /* Aha! Called from an "internal" module */
- if (!intab)
- return 0; /* or -ESILLY_PROGRAMMER :-) */
+ return mod;
+}
- /* create a pseudo module! */
- if (!(mp = (struct module*) kmalloc(MODSIZ, GFP_KERNEL))) {
- /* panic time! */
- printk(KERN_ERR "Out of memory for new symbol table!\n");
- return -ENOMEM;
- }
- /* else OK */
- memset(mp, 0, MODSIZ);
- mp->state = MOD_RUNNING; /* Since it is resident... */
- mp->name = ""; /* This is still the "kernel" symbol table! */
- mp->symtab = intab;
+/*
+ * Free the given module.
+ */
- /* link it in _after_ the resident symbol table */
- mp->next = kernel_module.next;
- kernel_module.next = mp;
+static void
+free_module(struct module *mod)
+{
+ struct module_ref *dep;
+ unsigned i;
- return 0;
- }
+ /* Let the module clean up. */
- /* else ******** Called from a loadable module **********/
+ mod->flags |= MOD_DELETED;
+ if (mod->flags & MOD_RUNNING) {
+ mod->cleanup();
+ mod->flags &= ~MOD_RUNNING;
+ }
- /*
- * This call should _only_ be done in the context of the
- * call to init_module i.e. when loading the module!!
- * Or else...
- */
+ /* Remove the module from the dependancy lists. */
- /* Any table there before? */
- if ((oldtab = mp->symtab) == (struct symbol_table*)0) {
- /* No, just insert it! */
- mp->symtab = intab;
- return 0;
+ for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) {
+ struct module_ref **pp;
+ for (pp = &dep->dep->refs; *pp != dep; pp = &(*pp)->next_ref)
+ continue;
+ *pp = dep->next_ref;
}
- /* else ****** we have to replace the module symbol table ******/
+ /* And from the main module list. */
- if (oldtab->n_refs == 0) { /* no problems! */
- mp->symtab = intab;
- /* if the old table was kmalloc-ed, drop it */
- if (oldtab->size > 0)
- kfree_s(oldtab, oldtab->size);
-
- return 0;
+ if (mod == module_list) {
+ module_list = mod->next;
+ } else {
+ struct module *p;
+ for (p = module_list; p->next != mod; p = p->next)
+ continue;
+ p->next = mod->next;
}
- /* else */
- /***** The module references other modules... insmod said so! *****/
- /* We have to allocate a new symbol table, or we lose them! */
- if (intab == (struct symbol_table*)0)
- intab = &nulltab; /* easier code with zeroes in place */
+ /* And free the memory. */
- /* the input symbol table space does not include the string table */
- /* (it does for symbol tables that insmod creates) */
+ vfree(mod);
+}
- if (!(newtab = (struct symbol_table*)kmalloc(
- size = SYMSIZ + intab->n_symbols * INTSIZ +
- oldtab->n_refs * REFSIZ,
- GFP_KERNEL))) {
- /* panic time! */
- printk(KERN_ERR "Out of memory for new symbol table!\n");
- return -ENOMEM;
- }
+/*
+ * Called by the /proc file system to return a current list of modules.
+ */
- /* copy up to, and including, the new symbols */
- memcpy(newtab, intab, SYMSIZ + intab->n_symbols * INTSIZ);
+int get_module_list(char *p)
+{
+ size_t left = PAGE_SIZE;
+ struct module *mod;
+ char tmpstr[64];
+ struct module_ref *ref;
- newtab->size = size;
- newtab->n_refs = oldtab->n_refs;
+ for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+ long len;
+ const char *q;
+
+#define safe_copy_str(str, len) \
+ do { \
+ if (left < len) \
+ goto fini; \
+ memcpy(p, str, len); p += len, left -= len; \
+ } while (0)
+#define safe_copy_cstr(str) safe_copy_str(str, sizeof(str)-1)
+
+ len = strlen(mod->name);
+ safe_copy_str(mod->name, len);
+
+ if ((len = 20 - len) > 0) {
+ if (left < len)
+ goto fini;
+ memset(p, ' ', len);
+ p += len;
+ left -= len;
+ }
- /* copy references */
- memcpy( ((char *)newtab) + SYMSIZ + intab->n_symbols * INTSIZ,
- ((char *)oldtab) + SYMSIZ + oldtab->n_symbols * INTSIZ,
- oldtab->n_refs * REFSIZ);
+ len = sprintf(tmpstr, "%8lu", mod->size);
+ safe_copy_str(tmpstr, len);
- /* relink references from the old table to the new one */
+ if (mod->flags & MOD_RUNNING) {
+ len = sprintf(tmpstr, "%4ld",
+ (mod_member_present(mod, can_unload)
+ && mod->can_unload
+ ? -1 : mod->usecount));
+ safe_copy_str(tmpstr, len);
+ }
- /* pointer to the first reference entry in newtab! Really! */
- newref = (struct module_ref*) &(newtab->symbol[newtab->n_symbols]);
+ if (mod->flags & MOD_DELETED)
+ safe_copy_cstr(" (deleted)");
+ else if (mod->flags & MOD_RUNNING) {
+ if (mod->flags & MOD_AUTOCLEAN)
+ safe_copy_cstr(" (autoclean)");
+ if (!(mod->flags & MOD_USED_ONCE))
+ safe_copy_cstr(" (unused)");
+ } else
+ safe_copy_cstr(" (uninitialized)");
+
+ if ((ref = mod->refs) != NULL) {
+ safe_copy_cstr(" [");
+ while (1) {
+ q = ref->ref->name;
+ len = strlen(q);
+ safe_copy_str(q, len);
+
+ if ((ref = ref->next_ref) != NULL)
+ safe_copy_cstr(" ");
+ else
+ break;
+ }
+ safe_copy_cstr("]");
+ }
- /* check for reference links from previous modules */
- for ( link = module_list;
- link && (link != &kernel_module);
- link = link->next) {
+ safe_copy_cstr("\n");
- if (link->ref && (link->ref->module == mp))
- link->ref = newref++;
+#undef safe_copy_str
+#undef safe_copy_cstr
}
- mp->symtab = newtab;
+fini:
+ return PAGE_SIZE - left;
+}
- /* all references (if any) have been handled */
+/*
+ * Called by the /proc file system to return a current list of ksyms.
+ */
- /* if the old table was kmalloc-ed, drop it */
- if (oldtab->size > 0)
- kfree_s(oldtab, oldtab->size);
+int
+get_ksyms_list(char *buf, char **start, off_t offset, int length)
+{
+ struct module *mod;
+ char *p = buf;
+ int len = 0; /* code from net/ipv4/proc.c */
+ off_t pos = 0;
+ off_t begin = 0;
+
+ for (mod = module_list; mod; mod = mod->next) {
+ unsigned i;
+ struct module_symbol *sym;
- return 0;
+ if (!(mod->flags & MOD_RUNNING) || (mod->flags & MOD_DELETED))
+ continue;
+
+ for (i = mod->nsyms, sym = mod->syms; i > 0; --i, ++sym) {
+ p = buf + len;
+ if (*mod->name) {
+ len += sprintf(p, "%0*lx %s\t[%s]\n",
+ (int)(2*sizeof(void*)),
+ sym->value, sym->name,
+ mod->name);
+ } else {
+ len += sprintf(p, "%0*lx %s\n",
+ (int)(2*sizeof(void*)),
+ sym->value, sym->name);
+ }
+ pos = begin + len;
+ if (pos < offset) {
+ len = 0;
+ begin = pos;
+ }
+ pos = begin + len;
+ if (pos > offset+length)
+ goto leave_the_loop;
+ }
+ }
+leave_the_loop:
+ *start = buf + (offset - begin);
+ len -= (offset - begin);
+ if (len > length)
+ len = length;
+ return len;
}
#else /* CONFIG_MODULES */
/* Dummy syscalls for people who don't want modules */
-asmlinkage unsigned long sys_create_module(void)
+asmlinkage unsigned long
+sys_create_module(const char *name_user, size_t size)
{
return -ENOSYS;
}
-asmlinkage int sys_init_module(void)
+asmlinkage int
+sys_init_module(const char *name_user, struct module *mod_user)
{
return -ENOSYS;
}
-asmlinkage int sys_delete_module(void)
+asmlinkage int
+sys_delete_module(const char *name_user)
{
return -ENOSYS;
}
-asmlinkage int sys_get_kernel_syms(void)
+asmlinkage int
+sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
+ size_t *ret)
{
+ /* Let the program know about the new interface. Not that
+ it'll do them much good. */
+ if (which == 0)
+ return 0;
+
return -ENOSYS;
}
-int register_symtab_from(struct symbol_table *intab, long *from)
+asmlinkage int
+sys_get_kernel_syms(struct kernel_sym *table)
{
- return 0;
+ return -ENOSYS;
}
#endif /* CONFIG_MODULES */
-
diff --git a/kernel/panic.c b/kernel/panic.c
index d42541e9f..deaa2f339 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -10,15 +10,16 @@
*/
#include <stdarg.h>
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
#include <asm/sgialib.h>
asmlinkage void sys_sync(void); /* it's really int */
-extern void do_unblank_screen(void);
+extern void unblank_console(void);
extern int C_A_D;
int panic_timeout = 0;
@@ -43,9 +44,12 @@ NORET_TYPE void panic(const char * fmt, ...)
else
sys_sync();
- do_unblank_screen();
+#ifdef __SMP__
+ smp_message_pass(MSG_ALL_BUT_SELF, MSG_STOP_CPU, 0, 0);
+#endif
+
+ unblank_console();
-#ifdef CONFIG_SGI
if (panic_timeout > 0)
{
int i;
@@ -54,17 +58,20 @@ NORET_TYPE void panic(const char * fmt, ...)
* Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked..
*/
- prom_printf(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
+ printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
for(i = 0; i < (panic_timeout*1000); i++)
udelay(1000);
- hard_reset_now();
+ /*
+ * Should we run the reboot notifier. For the moment Im
+ * choosing not too. It might crash, be corrupt or do
+ * more harm than good for other reasons.
+ */
+ machine_restart(NULL);
}
-#if 0
- printk("Hit a key\n");
- prom_getchar();
- romvec->imode();
-#endif
+#ifdef __sparc__
+ printk("Press L1-A to return to the boot prom\n");
#endif
+ sti();
for(;;);
}
diff --git a/kernel/printk.c b/kernel/printk.c
index ed39d4fab..0d5d619b0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -9,6 +9,7 @@
* to the console. Added hook for sending the console messages
* elsewhere, in preparation for a serial line console (someday).
* Ted Ts'o, 2/11/93.
+ * Modified for sysctl support, 1/8/97, Chris Horn.
*/
#include <stdarg.h>
@@ -21,6 +22,9 @@
#include <linux/mm.h>
#include <linux/tty.h>
#include <linux/tty_driver.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/console.h>
#include <asm/uaccess.h>
@@ -28,8 +32,6 @@
static char buf[1024];
-extern void console_print(const char *);
-
/* printk's without a loglevel use this.. */
#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
@@ -39,9 +41,14 @@ extern void console_print(const char *);
unsigned long log_size = 0;
struct wait_queue * log_wait = NULL;
+
+/* Keep together for sysctl support */
int console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
+int default_message_loglevel = DEFAULT_MESSAGE_LOGLEVEL;
+int minimum_console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
+int default_console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
-static void (*console_print_proc)(const char *) = 0;
+struct console *console_drivers = NULL;
static char log_buf[LOG_BUF_LEN];
static unsigned long log_start = 0;
static unsigned long logged_chars = 0;
@@ -64,87 +71,103 @@ asmlinkage int sys_syslog(int type, char * buf, int len)
unsigned long i, j, count;
int do_clear = 0;
char c;
- int error;
+ int error = -EPERM;
+ lock_kernel();
if ((type != 3) && !suser())
- return -EPERM;
+ goto out;
+ error = 0;
switch (type) {
- case 0: /* Close log */
- return 0;
- case 1: /* Open log */
- return 0;
- case 2: /* Read from log */
- if (!buf || len < 0)
- return -EINVAL;
- if (!len)
- return 0;
- error = verify_area(VERIFY_WRITE,buf,len);
- if (error)
- return error;
- cli();
- while (!log_size) {
- if (current->signal & ~current->blocked) {
- sti();
- return -ERESTARTSYS;
- }
- interruptible_sleep_on(&log_wait);
- }
- i = 0;
- while (log_size && i < len) {
- c = *((char *) log_buf+log_start);
- log_start++;
- log_size--;
- log_start &= LOG_BUF_LEN-1;
+ case 0: /* Close log */
+ break;
+ case 1: /* Open log */
+ break;
+ case 2: /* Read from log */
+ error = -EINVAL;
+ if (!buf || len < 0)
+ goto out;
+ error = 0;
+ if (!len)
+ goto out;
+ error = verify_area(VERIFY_WRITE,buf,len);
+ if (error)
+ goto out;
+ cli();
+ error = -ERESTARTSYS;
+ while (!log_size) {
+ if (current->signal & ~current->blocked) {
sti();
- put_user(c,buf);
- buf++;
- i++;
- cli();
+ goto out;
}
+ interruptible_sleep_on(&log_wait);
+ }
+ i = 0;
+ while (log_size && i < len) {
+ c = *((char *) log_buf+log_start);
+ log_start++;
+ log_size--;
+ log_start &= LOG_BUF_LEN-1;
sti();
- return i;
- case 4: /* Read/clear last kernel messages */
- do_clear = 1;
- /* FALL THRU */
- case 3: /* Read last kernel messages */
- if (!buf || len < 0)
- return -EINVAL;
- if (!len)
- return 0;
- error = verify_area(VERIFY_WRITE,buf,len);
- if (error)
- return error;
- count = len;
- if (count > LOG_BUF_LEN)
- count = LOG_BUF_LEN;
- if (count > logged_chars)
- count = logged_chars;
- j = log_start + log_size - count;
- for (i = 0; i < count; i++) {
- c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1)));
- put_user(c, buf++);
- }
- if (do_clear)
- logged_chars = 0;
- return i;
- case 5: /* Clear ring buffer */
+ put_user(c,buf);
+ buf++;
+ i++;
+ cli();
+ }
+ sti();
+ error = i;
+ break;
+ case 4: /* Read/clear last kernel messages */
+ do_clear = 1;
+ /* FALL THRU */
+ case 3: /* Read last kernel messages */
+ error = -EINVAL;
+ if (!buf || len < 0)
+ goto out;
+ error = 0;
+ if (!len)
+ goto out;
+ error = verify_area(VERIFY_WRITE,buf,len);
+ if (error)
+ goto out;
+ count = len;
+ if (count > LOG_BUF_LEN)
+ count = LOG_BUF_LEN;
+ if (count > logged_chars)
+ count = logged_chars;
+ j = log_start + log_size - count;
+ for (i = 0; i < count; i++) {
+ c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1)));
+ put_user(c, buf++);
+ }
+ if (do_clear)
logged_chars = 0;
- return 0;
- case 6: /* Disable logging to console */
- console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
- return 0;
- case 7: /* Enable logging to console */
- console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
- return 0;
- case 8:
- if (len < 1 || len > 8)
- return -EINVAL;
- if (len < MINIMUM_CONSOLE_LOGLEVEL)
- len = MINIMUM_CONSOLE_LOGLEVEL;
- console_loglevel = len;
- return 0;
+ error = i;
+ break;
+ case 5: /* Clear ring buffer */
+ logged_chars = 0;
+ break;
+ case 6: /* Disable logging to console */
+ console_loglevel = minimum_console_loglevel;
+ break;
+ case 7: /* Enable logging to console */
+ console_loglevel = default_console_loglevel;
+ break;
+ case 8:
+ error = -EINVAL;
+ if (len < 1 || len > 8)
+ goto out;
+ if (len < minimum_console_loglevel)
+ len = minimum_console_loglevel;
+ console_loglevel = len;
+ error = 0;
+ break;
+ default:
+ error = -EINVAL;
+ break;
}
- return -EINVAL;
+out:
+ unlock_kernel();
+ return error;
}
@@ -153,11 +176,12 @@ asmlinkage int printk(const char *fmt, ...)
va_list args;
int i;
char *msg, *p, *buf_end;
- static char msg_level = -1;
+ int line_feed;
+ static signed char msg_level = -1;
long flags;
- save_flags(flags);
- cli();
+ __save_flags(flags);
+ __cli();
va_start(args, fmt);
i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */
buf_end = buf + 3 + i;
@@ -173,12 +197,13 @@ asmlinkage int printk(const char *fmt, ...)
) {
p -= 3;
p[0] = '<';
- p[1] = DEFAULT_MESSAGE_LOGLEVEL + '0';
+ p[1] = default_message_loglevel + '0';
p[2] = '>';
} else
msg += 3;
msg_level = p[1] - '0';
}
+ line_feed = 0;
for (; p < buf_end; p++) {
log_buf[(log_start+log_size) & (LOG_BUF_LEN-1)] = *p;
if (log_size < LOG_BUF_LEN)
@@ -188,38 +213,64 @@ asmlinkage int printk(const char *fmt, ...)
log_start &= LOG_BUF_LEN-1;
}
logged_chars++;
- if (*p == '\n')
+ if (*p == '\n') {
+ line_feed = 1;
break;
+ }
}
- if (msg_level < console_loglevel && console_print_proc) {
- char tmp = p[1];
- p[1] = '\0';
- (*console_print_proc)(msg);
- p[1] = tmp;
+ if (msg_level < console_loglevel && console_drivers) {
+ struct console *c = console_drivers;
+ while(c) {
+ if (c->write)
+ c->write(msg, p - msg + line_feed);
+ c = c->next;
+ }
}
- if (*p == '\n')
+ if (line_feed)
msg_level = -1;
}
- restore_flags(flags);
+ __restore_flags(flags);
wake_up_interruptible(&log_wait);
return i;
}
+void console_print(const char *s)
+{
+ struct console *c = console_drivers;
+ int len = strlen(s);
+ while(c) {
+ if (c->write)
+ c->write(s, len);
+ c = c->next;
+ }
+}
+
+void unblank_console(void)
+{
+ struct console *c = console_drivers;
+ while(c) {
+ if (c->unblank)
+ c->unblank();
+ c = c->next;
+ }
+}
+
/*
* The console driver calls this routine during kernel initialization
* to register the console printing procedure with printk() and to
* print any messages that were printed by the kernel before the
* console driver was initialized.
*/
-void register_console(void (*proc)(const char *))
+void register_console(struct console * console)
{
- int i,j;
+ int i,j,len;
int p = log_start;
char buf[16];
- char msg_level = -1;
+ signed char msg_level = -1;
char *q;
- console_print_proc = proc;
+ console->next = console_drivers;
+ console_drivers = console;
for (i=0,j=0; i < log_size; i++) {
buf[j++] = log_buf[p];
@@ -228,12 +279,14 @@ void register_console(void (*proc)(const char *))
continue;
buf[j] = 0;
q = buf;
+ len = j;
if (msg_level < 0) {
msg_level = buf[1] - '0';
q = buf + 3;
+ len -= 3;
}
if (msg_level < console_loglevel)
- (*proc)(q);
+ console->write(q, len);
if (buf[j-1] == '\n')
msg_level = -1;
j = 0;
diff --git a/kernel/resource.c b/kernel/resource.c
index 48184bfcf..27abcf4dc 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -13,7 +13,7 @@
#include <linux/types.h>
#include <linux/ioport.h>
-#define IOTABLE_SIZE 64
+#define IOTABLE_SIZE 128
typedef struct resource_entry_t {
u_long from, num;
@@ -69,7 +69,7 @@ static resource_entry_t *find_gap(resource_entry_t *root,
/*
* Call this from the device driver to register the ioport region.
*/
-void request_region(unsigned int from, unsigned int num, const char *name)
+void request_region(unsigned long from, unsigned long num, const char *name)
{
resource_entry_t *p;
int i;
@@ -95,7 +95,7 @@ void request_region(unsigned int from, unsigned int num, const char *name)
/*
* Call this when the device driver is unloaded
*/
-void release_region(unsigned int from, unsigned int num)
+void release_region(unsigned long from, unsigned long num)
{
resource_entry_t *p, *q;
@@ -114,11 +114,72 @@ void release_region(unsigned int from, unsigned int num)
/*
* Call this to check the ioport region before probing
*/
-int check_region(unsigned int from, unsigned int num)
+int check_region(unsigned long from, unsigned long num)
{
return (find_gap(&iolist, from, num) == NULL) ? -EBUSY : 0;
}
+#ifdef __sparc__ /* Why to carry unused code on other architectures? */
+/*
+ * This is for architectures with MMU-managed ports (sparc).
+ */
+unsigned long occupy_region(unsigned long base, unsigned long end,
+ unsigned long num, unsigned int align, const char *name)
+{
+ unsigned long from = 0, till;
+ unsigned long flags;
+ int i;
+ resource_entry_t *p; /* Scanning ptr */
+ resource_entry_t *p1; /* === p->next */
+ resource_entry_t *s; /* Found slot */
+
+ if (base > end-1)
+ return 0;
+ if (num > end - base)
+ return 0;
+
+ for (i = 0; i < IOTABLE_SIZE; i++)
+ if (iotable[i].num == 0)
+ break;
+ if (i == IOTABLE_SIZE) {
+ /* Driver prints a warning typicaly. */
+ return 0;
+ }
+
+ save_flags(flags);
+ cli();
+ /* printk("occupy: search in %08lx[%08lx] ", base, end - base); */
+ s = NULL;
+ for (p = &iolist; p != NULL; p = p1) {
+ p1 = p->next;
+ /* Find window in list */
+ from = (p->from+p->num + align-1) & ~((unsigned long)align-1);
+ till = (p1 == NULL)? (unsigned long) (0 - (unsigned long)align): p1->from;
+ /* printk(" %08lx:%08lx", from, till); */
+ /* Clip window with base and end */
+ if (from < base) from = base;
+ if (till > end) till = end;
+ /* See if result is large enougth */
+ if (from < till && from + num < till) {
+ s = p;
+ break;
+ }
+ }
+ /* printk("\r\n"); */
+ restore_flags(flags);
+
+ if (s == NULL)
+ return 0;
+
+ iotable[i].name = name;
+ iotable[i].from = from;
+ iotable[i].num = num;
+ iotable[i].next = s->next;
+ s->next = &iotable[i];
+ return from;
+}
+#endif
+
/* Called from init/main.c to reserve IO ports. */
void reserve_setup(char *str, int *ints)
{
diff --git a/kernel/sched.c b/kernel/sched.c
index 98502b3fc..bc256d029 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4,6 +4,9 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*
* 1996-04-21 Modified by Ulrich Windl to make NTP work
+ * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
+ * make semaphores SMP safe
+ * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
*/
/*
@@ -28,12 +31,14 @@
#include <linux/resource.h>
#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/smp_lock.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
+#include <asm/spinlock.h>
#include <linux/timex.h>
@@ -44,7 +49,7 @@
int securelevel = 0; /* system security level */
long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
-volatile struct timeval xtime; /* The current time */
+volatile struct timeval xtime __attribute__ ((aligned (8))); /* The current time */
int tickadj = 500/HZ; /* microsecs */
DECLARE_TASK_QUEUE(tq_timer);
@@ -100,7 +105,12 @@ struct task_struct init_task = INIT_TASK;
unsigned long volatile jiffies=0;
-struct task_struct *current_set[NR_CPUS];
+/*
+ * Init task must be ok at boot for the ix86 as we will check its signals
+ * via the SMP irq return path.
+ */
+
+struct task_struct *current_set[NR_CPUS] = {&init_task, };
struct task_struct *last_task_used_math = NULL;
struct task_struct * task[NR_TASKS] = {&init_task, };
@@ -109,9 +119,6 @@ struct kernel_stat kstat = { 0 };
static inline void add_to_runqueue(struct task_struct * p)
{
-#ifdef __SMP__
- int cpu=smp_processor_id();
-#endif
#if 1 /* sanity tests */
if (p->next_run || p->prev_run) {
printk("task already on run-queue\n");
@@ -124,36 +131,6 @@ static inline void add_to_runqueue(struct task_struct * p)
(p->prev_run = init_task.prev_run)->next_run = p;
p->next_run = &init_task;
init_task.prev_run = p;
-#ifdef __SMP__
- /* this is safe only if called with cli()*/
- while(set_bit(31,&smp_process_available));
-#if 0
- {
- while(test_bit(31,&smp_process_available))
- {
- if(clear_bit(cpu,&smp_invalidate_needed))
- {
- local_flush_tlb();
- set_bit(cpu,&cpu_callin_map[0]);
- }
- }
- }
-#endif
- smp_process_available++;
- clear_bit(31,&smp_process_available);
- if ((0!=p->pid) && smp_threads_ready)
- {
- int i;
- for (i=0;i<smp_num_cpus;i++)
- {
- if (0==current_set[cpu_logical_map[i]]->pid)
- {
- smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0);
- break;
- }
- }
- }
-#endif
}
static inline void del_from_runqueue(struct task_struct * p)
@@ -167,7 +144,7 @@ static inline void del_from_runqueue(struct task_struct * p)
return;
}
#endif
- if (p == &init_task) {
+ if (!p->pid) {
static int nr = 0;
if (nr < 5) {
nr++;
@@ -199,6 +176,21 @@ static inline void move_last_runqueue(struct task_struct * p)
}
/*
+ * The tasklist_lock protects the linked list of processes.
+ *
+ * The scheduler lock is protecting against multiple entry
+ * into the scheduling code, and doesn't need to worry
+ * about interrupts (because interrupts cannot call the
+ * scheduler).
+ *
+ * The run-queue lock locks the parts that actually access
+ * and change the run-queues, and have to be interrupt-safe.
+ */
+rwlock_t tasklist_lock = RW_LOCK_UNLOCKED;
+spinlock_t scheduler_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED;
+
+/*
* Wake up a process. Put it on the run-queue if it's not
* already there. The "current" process is always on the
* run-queue (except when the actual re-schedule is in
@@ -210,12 +202,11 @@ inline void wake_up_process(struct task_struct * p)
{
unsigned long flags;
- save_flags(flags);
- cli();
+ spin_lock_irqsave(&runqueue_lock, flags);
p->state = TASK_RUNNING;
if (!p->next_run)
add_to_runqueue(p);
- restore_flags(flags);
+ spin_unlock_irqrestore(&runqueue_lock, flags);
}
static void process_timeout(unsigned long __data)
@@ -243,17 +234,6 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
{
int weight;
-#ifdef __SMP__
- /* We are not permitted to run a task someone else is running */
- if (p->processor != NO_PROC_ID)
- return -1000;
-#ifdef PAST_2_0
- /* This process is locked to a processor group */
- if (p->processor_mask && !(p->processor_mask & (1<<this_cpu))
- return -1000;
-#endif
-#endif
-
/*
* Realtime process, select the first one on the
* runqueue (taking priorities within processes
@@ -287,6 +267,18 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
return weight;
}
+#ifdef __SMP__
+
+#define idle_task (task[cpu_number_map[this_cpu]])
+#define can_schedule(p) ((p)->processor == NO_PROC_ID)
+
+#else
+
+#define idle_task (&init_task)
+#define can_schedule(p) (1)
+
+#endif
+
/*
* 'schedule()' is the scheduler function. It's a very simple and nice
* scheduler: it's not perfect, but certainly works for most things.
@@ -299,33 +291,39 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
*/
asmlinkage void schedule(void)
{
- int c;
- struct task_struct * p;
+ int lock_depth;
struct task_struct * prev, * next;
- unsigned long timeout = 0;
- int this_cpu=smp_processor_id();
+ unsigned long timeout;
+ int this_cpu;
-/* check alarm, wake up any interruptible tasks that have got a signal */
-
- if (intr_count)
- goto scheduling_in_interrupt;
-
- if (bh_active & bh_mask) {
- intr_count = 1;
- do_bottom_half();
- intr_count = 0;
+ need_resched = 0;
+ this_cpu = smp_processor_id();
+ if (local_irq_count[this_cpu]) {
+ printk("Scheduling in interrupt\n");
+ *(char *)0 = 0;
}
+ prev = current;
+ release_kernel_lock(prev, this_cpu, lock_depth);
+ if (bh_active & bh_mask)
+ do_bottom_half();
- run_task_queue(&tq_scheduler);
+ spin_lock(&scheduler_lock);
+ spin_lock_irq(&runqueue_lock);
- need_resched = 0;
- prev = current;
- cli();
/* move an exhausted RR process to be last.. */
if (!prev->counter && prev->policy == SCHED_RR) {
- prev->counter = prev->priority;
- move_last_runqueue(prev);
+ if (prev->pid) {
+ prev->counter = prev->priority;
+ move_last_runqueue(prev);
+ } else {
+ static int count = 5;
+ if (count) {
+ count--;
+ printk("Moving pid 0 last\n");
+ }
+ }
}
+ timeout = 0;
switch (prev->state) {
case TASK_INTERRUPTIBLE:
if (prev->signal & ~prev->blocked)
@@ -342,54 +340,55 @@ asmlinkage void schedule(void)
del_from_runqueue(prev);
case TASK_RUNNING:
}
- p = init_task.next_run;
- sti();
-
+ {
+ struct task_struct * p = init_task.next_run;
+ /*
+ * This is subtle.
+ * Note how we can enable interrupts here, even
+ * though interrupts can add processes to the run-
+ * queue. This is because any new processes will
+ * be added to the front of the queue, so "p" above
+ * is a safe starting point.
+ * run-queue deletion and re-ordering is protected by
+ * the scheduler lock
+ */
+ spin_unlock_irq(&runqueue_lock);
#ifdef __SMP__
- /*
- * This is safe as we do not permit re-entry of schedule()
- */
- prev->processor = NO_PROC_ID;
-#define idle_task (task[cpu_number_map[this_cpu]])
-#else
-#define idle_task (&init_task)
-#endif
-
+ prev->processor = NO_PROC_ID;
+#endif
+
/*
* Note! there may appear new tasks on the run-queue during this, as
* interrupts are enabled. However, they will be put on front of the
* list, so our list starting at "p" is essentially fixed.
*/
/* this is the scheduler proper: */
- c = -1000;
- next = idle_task;
- while (p != &init_task) {
- int weight = goodness(p, prev, this_cpu);
- if (weight > c)
- c = weight, next = p;
- p = p->next_run;
- }
+ {
+ int c = -1000;
+ next = idle_task;
+ while (p != &init_task) {
+ if (can_schedule(p)) {
+ int weight = goodness(p, prev, this_cpu);
+ if (weight > c)
+ c = weight, next = p;
+ }
+ p = p->next_run;
+ }
- /* if all runnable processes have "counter == 0", re-calculate counters */
- if (!c) {
- for_each_task(p)
- p->counter = (p->counter >> 1) + p->priority;
+ /* Do we need to re-calculate counters? */
+ if (!c) {
+ struct task_struct *p;
+ read_lock(&tasklist_lock);
+ for_each_task(p)
+ p->counter = (p->counter >> 1) + p->priority;
+ read_unlock(&tasklist_lock);
+ }
+ }
}
-#ifdef __SMP__
- /*
- * Allocate process to CPU
- */
-
- next->processor = this_cpu;
- next->last_processor = this_cpu;
-#endif
-#ifdef __SMP_PROF__
- /* mark processor running an idle thread */
- if (0==next->pid)
- set_bit(this_cpu,&smp_idle_map);
- else
- clear_bit(this_cpu,&smp_idle_map);
-#endif
+
+ next->processor = this_cpu;
+ next->last_processor = this_cpu;
+
if (prev != next) {
struct timer_list timer;
@@ -404,14 +403,13 @@ asmlinkage void schedule(void)
get_mmu_context(next);
switch_to(prev,next);
+
if (timeout)
del_timer(&timer);
}
- return;
+ spin_unlock(&scheduler_lock);
-scheduling_in_interrupt:
- printk("Aiee: scheduling in interrupt %p\n",
- return_address());
+ reacquire_kernel_lock(prev, smp_processor_id(), lock_depth);
}
#ifndef __alpha__
@@ -429,93 +427,92 @@ asmlinkage int sys_pause(void)
#endif
+spinlock_t waitqueue_lock;
+
/*
* wake_up doesn't wake up stopped processes - they have to be awakened
* with signals or similar.
- *
- * Note that this doesn't need cli-sti pairs: interrupts may not change
- * the wait-queue structures directly, but only call wake_up() to wake
- * a process. The process itself must remove the queue once it has woken.
*/
void wake_up(struct wait_queue **q)
{
+ unsigned long flags;
struct wait_queue *next;
struct wait_queue *head;
- if (!q || !(next = *q))
- return;
- head = WAIT_QUEUE_HEAD(q);
- while (next != head) {
- struct task_struct *p = next->task;
- next = next->next;
- if (p != NULL) {
- if ((p->state == TASK_UNINTERRUPTIBLE) ||
- (p->state == TASK_INTERRUPTIBLE))
- wake_up_process(p);
+ spin_lock_irqsave(&waitqueue_lock, flags);
+ if (q && (next = *q)) {
+ head = WAIT_QUEUE_HEAD(q);
+ while (next != head) {
+ struct task_struct *p = next->task;
+ next = next->next;
+ if (p != NULL) {
+ if ((p->state == TASK_UNINTERRUPTIBLE) ||
+ (p->state == TASK_INTERRUPTIBLE))
+ wake_up_process(p);
+ }
+ if (next)
+ continue;
+ printk("wait_queue is bad (eip = %p)\n",
+ __builtin_return_address(0));
+ printk(" q = %p\n",q);
+ printk(" *q = %p\n",*q);
+ break;
}
- if (!next)
- goto bad;
}
- return;
-bad:
- printk("wait_queue is bad (eip = %p)\n",
- __builtin_return_address(0));
- printk(" q = %p\n",q);
- printk(" *q = %p\n",*q);
+ spin_unlock_irqrestore(&waitqueue_lock, flags);
}
void wake_up_interruptible(struct wait_queue **q)
{
+ unsigned long flags;
struct wait_queue *next;
struct wait_queue *head;
- if (!q || !(next = *q))
- return;
- head = WAIT_QUEUE_HEAD(q);
- while (next != head) {
- struct task_struct *p = next->task;
- next = next->next;
- if (p != NULL) {
- if (p->state == TASK_INTERRUPTIBLE)
- wake_up_process(p);
+ spin_lock_irqsave(&waitqueue_lock, flags);
+ if (q && (next = *q)) {
+ head = WAIT_QUEUE_HEAD(q);
+ while (next != head) {
+ struct task_struct *p = next->task;
+ next = next->next;
+ if (p != NULL) {
+ if (p->state == TASK_INTERRUPTIBLE)
+ wake_up_process(p);
+ }
+ if (next)
+ continue;
+ printk("wait_queue is bad (eip = %p)\n",
+ __builtin_return_address(0));
+ printk(" q = %p\n",q);
+ printk(" *q = %p\n",*q);
+ break;
}
- if (!next)
- goto bad;
}
- return;
-bad:
- printk("wait_queue is bad (eip = %p)\n",
- return_address());
- printk(" q = %p\n",q);
- printk(" *q = %p\n",*q);
+ spin_unlock_irqrestore(&waitqueue_lock, flags);
}
/*
* Semaphores are implemented using a two-way counter:
* The "count" variable is decremented for each process
- * that tries to sleep, while the "waiting" variable is
- * incremented _while_ the process is sleeping on that
- * semaphore.
+ * that tries to sleep, while the "waking" variable is
+ * incremented when the "up()" code goes to wake up waiting
+ * processes.
*
* Notably, the inline "up()" and "down()" functions can
* efficiently test if they need to do any extra work (up
* needs to do something only if count was negative before
* the increment operation.
- */
-static inline void normalize_semaphore(struct semaphore *sem)
-{
- atomic_add(xchg(&sem->waiting,0), &sem->count);
-}
-
-/*
+ *
+ * waking_non_zero() (from asm/semaphore.h) must execute
+ * atomically.
+ *
* When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody. In
- * most cases "waiting" will be positive, and the normalization
- * will allow things to continue. However, if somebody has
- * /just/ done a down(), it may be that count was negative
- * without waiting being positive (or in the generic case
- * "count is more negative than waiting is positive"), and
- * the waiter needs to check this itself (see __down).
+ * incrementing it, and we need to wake up somebody.
+ *
+ * This routine adds one to the count of processes that need to
+ * wake up and exit. ALL waiting processes actually wake up but
+ * only the one that gets to the "waking" field first will gate
+ * through and acquire the semaphore. The others will go back
+ * to sleep.
*
* Note that these functions are only called when there is
* contention on the lock, and as such all this is the
@@ -525,55 +522,83 @@ static inline void normalize_semaphore(struct semaphore *sem)
*/
void __up(struct semaphore *sem)
{
- normalize_semaphore(sem);
+ wake_one_more(sem);
wake_up(&sem->wait);
}
-void __down(struct semaphore * sem)
+/*
+ * Perform the "down" function. Return zero for semaphore acquired,
+ * return negative for signalled out of the function.
+ *
+ * If called from __down, the return is ignored and the wait loop is
+ * not interruptible. This means that a task waiting on a semaphore
+ * using "down()" cannot be killed until someone does an "up()" on
+ * the semaphore.
+ *
+ * If called from __down_interruptible, the return value gets checked
+ * upon return. If the return value is negative then the task continues
+ * with the negative value in the return register (it can be tested by
+ * the caller).
+ *
+ * Either form may be used in conjunction with "up()".
+ *
+ */
+static inline int __do_down(struct semaphore * sem, int task_state)
{
struct task_struct *tsk = current;
struct wait_queue wait = { tsk, NULL };
+ int ret = 0;
- /*
- * The order here is important. We add ourselves to the
- * wait queues and mark ourselves sleeping _first_. That
- * way, if a "up()" comes in here, we'll either get
- * woken up (up happens after the wait queues are set up)
- * OR we'll have "waiting > 0".
- */
- tsk->state = TASK_UNINTERRUPTIBLE;
+ tsk->state = task_state;
add_wait_queue(&sem->wait, &wait);
- atomic_inc(&sem->waiting);
/*
- * Ok, we're set up. The only race here is really that
- * an "up()" might have incremented count before we got
- * here, so we check "count+waiting". If that is larger
- * than zero, we shouldn't sleep, but re-try the lock.
+ * Ok, we're set up. sem->count is known to be less than zero
+ * so we must wait.
+ *
+ * We can let go the lock for purposes of waiting.
+ * We re-acquire it after awaking so as to protect
+ * all semaphore operations.
+ *
+ * If "up()" is called before we call waking_non_zero() then
+ * we will catch it right away. If it is called later then
+ * we will have to go through a wakeup cycle to catch it.
+ *
+ * Multiple waiters contend for the semaphore lock to see
+ * who gets to gate through and who has to wait some more.
*/
- if (sem->count+sem->waiting <= 0) {
- /*
- * If "count+waiting" <= 0, we have to wait
- * for a up(), which will normalize the count.
- * Remember, at this point we have decremented
- * count, and incremented up, so if count is
- * zero or positive we need to return to re-try
- * the lock. It _may_ be that both count and
- * waiting is zero and that it is still locked,
- * but we still want to re-try the lock in that
- * case to make count go negative again so that
- * the optimized "up()" wake_up sequence works.
- */
- do {
- schedule();
- tsk->state = TASK_UNINTERRUPTIBLE;
- } while (sem->count < 0);
+ for (;;) {
+ if (waking_non_zero(sem)) /* are we waking up? */
+ break; /* yes, exit loop */
+
+ if ( task_state == TASK_INTERRUPTIBLE
+ && (tsk->signal & ~tsk->blocked) /* signalled */
+ ) {
+ ret = -EINTR; /* interrupted */
+ atomic_inc(&sem->count); /* give up on down operation */
+ break;
+ }
+
+ schedule();
+ tsk->state = task_state;
}
+
tsk->state = TASK_RUNNING;
remove_wait_queue(&sem->wait, &wait);
- normalize_semaphore(sem);
+ return ret;
+}
+
+void __down(struct semaphore * sem)
+{
+ __do_down(sem,TASK_UNINTERRUPTIBLE);
}
+int __down_interruptible(struct semaphore * sem)
+{
+ return __do_down(sem,TASK_INTERRUPTIBLE);
+}
+
+
static inline void __sleep_on(struct wait_queue **p, int state)
{
unsigned long flags;
@@ -584,14 +609,14 @@ static inline void __sleep_on(struct wait_queue **p, int state)
if (current == task[0])
panic("task[0] trying to sleep");
current->state = state;
- save_flags(flags);
- cli();
+ spin_lock_irqsave(&waitqueue_lock, flags);
__add_wait_queue(p, &wait);
+ spin_unlock(&waitqueue_lock);
sti();
schedule();
- cli();
+ spin_lock_irq(&waitqueue_lock);
__remove_wait_queue(p, &wait);
- restore_flags(flags);
+ spin_unlock_irqrestore(&waitqueue_lock, flags);
}
void interruptible_sleep_on(struct wait_queue **p)
@@ -604,74 +629,178 @@ void sleep_on(struct wait_queue **p)
__sleep_on(p,TASK_UNINTERRUPTIBLE);
}
-/*
- * The head for the timer-list has a "expires" field of MAX_UINT,
- * and the sorting routine counts on this..
- */
-static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
+
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
#define SLOW_BUT_DEBUGGING_TIMERS 0
-void add_timer(struct timer_list * timer)
+struct timer_vec {
+ int index;
+ struct timer_list *vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+ int index;
+ struct timer_list *vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5 = { 0 };
+static struct timer_vec tv4 = { 0 };
+static struct timer_vec tv3 = { 0 };
+static struct timer_vec tv2 = { 0 };
+static struct timer_vec_root tv1 = { 0 };
+
+static struct timer_vec * const tvecs[] = {
+ (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+static unsigned long timer_jiffies = 0;
+
+static inline void insert_timer(struct timer_list *timer,
+ struct timer_list **vec, int idx)
+{
+ if ((timer->next = vec[idx]))
+ vec[idx]->prev = timer;
+ vec[idx] = timer;
+ timer->prev = (struct timer_list *)&vec[idx];
+}
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+ /*
+ * must be cli-ed when calling this
+ */
+ unsigned long expires = timer->expires;
+ unsigned long idx = expires - timer_jiffies;
+
+ if (idx < TVR_SIZE) {
+ int i = expires & TVR_MASK;
+ insert_timer(timer, tv1.vec, i);
+ } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+ int i = (expires >> TVR_BITS) & TVN_MASK;
+ insert_timer(timer, tv2.vec, i);
+ } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+ insert_timer(timer, tv3.vec, i);
+ } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+ insert_timer(timer, tv4.vec, i);
+ } else if (expires < timer_jiffies) {
+ /* can happen if you add a timer with expires == jiffies,
+ * or you set a timer to go off in the past
+ */
+ insert_timer(timer, tv1.vec, tv1.index);
+ } else if (idx < 0xffffffffUL) {
+ int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+ insert_timer(timer, tv5.vec, i);
+ } else {
+ /* Can only get here on architectures with 64-bit jiffies */
+ timer->next = timer->prev = timer;
+ }
+}
+
+static spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+void add_timer(struct timer_list *timer)
{
unsigned long flags;
- struct timer_list *p;
+ spin_lock_irqsave(&timerlist_lock, flags);
#if SLOW_BUT_DEBUGGING_TIMERS
- if (timer->next || timer->prev) {
- printk("add_timer() called with non-zero list from %p\n",
- __builtin_return_address(0));
- return;
- }
+ if (timer->next || timer->prev) {
+ printk("add_timer() called with non-zero list from %p\n",
+ __builtin_return_address(0));
+ goto out;
+ }
#endif
- p = &timer_head;
- save_flags(flags);
- cli();
- do {
- p = p->next;
- } while (timer->expires > p->expires);
- timer->next = p;
- timer->prev = p->prev;
- p->prev = timer;
- timer->prev->next = timer;
- restore_flags(flags);
+ internal_add_timer(timer);
+#if SLOW_BUT_DEBUGGING_TIMERS
+out:
+#endif
+ spin_unlock_irqrestore(&timerlist_lock, flags);
}
-int del_timer(struct timer_list * timer)
+static inline int detach_timer(struct timer_list *timer)
{
int ret = 0;
- if (timer->next) {
- unsigned long flags;
- struct timer_list * next;
- save_flags(flags);
- cli();
- if ((next = timer->next) != NULL) {
- (next->prev = timer->prev)->next = next;
- timer->next = timer->prev = NULL;
- ret = 1;
- }
- restore_flags(flags);
+ struct timer_list *next, *prev;
+ next = timer->next;
+ prev = timer->prev;
+ if (next) {
+ next->prev = prev;
+ }
+ if (prev) {
+ ret = 1;
+ prev->next = next;
}
return ret;
}
-static inline void run_timer_list(void)
+
+int del_timer(struct timer_list * timer)
{
- struct timer_list * timer;
+ int ret;
+ unsigned long flags;
- cli();
- while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
- void (*fn)(unsigned long) = timer->function;
- unsigned long data = timer->data;
- timer->next->prev = timer->prev;
- timer->prev->next = timer->next;
- timer->next = timer->prev = NULL;
- sti();
- fn(data);
- cli();
+ spin_lock_irqsave(&timerlist_lock, flags);
+ ret = detach_timer(timer);
+ timer->next = timer->prev = 0;
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return ret;
+}
+
+static inline void cascade_timers(struct timer_vec *tv)
+{
+ /* cascade all the timers from tv up one level */
+ struct timer_list *timer;
+ timer = tv->vec[tv->index];
+ /*
+ * We are removing _all_ timers from the list, so we don't have to
+ * detach them individually, just clear the list afterwards.
+ */
+ while (timer) {
+ struct timer_list *tmp = timer;
+ timer = timer->next;
+ internal_add_timer(tmp);
+ }
+ tv->vec[tv->index] = NULL;
+ tv->index = (tv->index + 1) & TVN_MASK;
+}
+
+static inline void run_timer_list(void)
+{
+ spin_lock_irq(&timerlist_lock);
+ while ((long)(jiffies - timer_jiffies) >= 0) {
+ struct timer_list *timer;
+ if (!tv1.index) {
+ int n = 1;
+ do {
+ cascade_timers(tvecs[n]);
+ } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
+ }
+ while ((timer = tv1.vec[tv1.index])) {
+ void (*fn)(unsigned long) = timer->function;
+ unsigned long data = timer->data;
+ detach_timer(timer);
+ timer->next = timer->prev = NULL;
+ spin_unlock_irq(&timerlist_lock);
+ fn(data);
+ spin_lock_irq(&timerlist_lock);
+ }
+ ++timer_jiffies;
+ tv1.index = (tv1.index + 1) & TVR_MASK;
}
- sti();
+ spin_unlock_irq(&timerlist_lock);
}
+
static inline void run_old_timers(void)
{
struct timer_struct *tp;
@@ -690,6 +819,8 @@ static inline void run_old_timers(void)
}
}
+spinlock_t tqueue_lock;
+
void tqueue_bh(void)
{
run_task_queue(&tq_timer);
@@ -974,7 +1105,7 @@ static inline void do_it_prof(struct task_struct * p, unsigned long ticks)
}
}
-static __inline__ void update_one_process(struct task_struct *p,
+void update_one_process(struct task_struct *p,
unsigned long ticks, unsigned long user, unsigned long system)
{
do_process_times(p, user, system);
@@ -984,6 +1115,9 @@ static __inline__ void update_one_process(struct task_struct *p,
static void update_process_times(unsigned long ticks, unsigned long system)
{
+/*
+ * SMP does this on a per-CPU basis elsewhere
+ */
#ifndef __SMP__
struct task_struct * p = current;
unsigned long user = ticks - system;
@@ -1000,79 +1134,35 @@ static void update_process_times(unsigned long ticks, unsigned long system)
kstat.cpu_system += system;
}
update_one_process(p, ticks, user, system);
-#else
- int cpu,j;
- cpu = smp_processor_id();
- for (j=0;j<smp_num_cpus;j++)
- {
- int i = cpu_logical_map[j];
- struct task_struct *p;
-
-#ifdef __SMP_PROF__
- if (test_bit(i,&smp_idle_map))
- smp_idle_count[i]++;
-#endif
- p = current_set[i];
- /*
- * Do we have a real process?
- */
- if (p->pid) {
- /* assume user-mode process */
- unsigned long utime = ticks;
- unsigned long stime = 0;
- if (cpu == i) {
- utime = ticks-system;
- stime = system;
- } else if (smp_proc_in_lock[j]) {
- utime = 0;
- stime = ticks;
- }
- update_one_process(p, ticks, utime, stime);
-
- if (p->priority < DEF_PRIORITY)
- kstat.cpu_nice += utime;
- else
- kstat.cpu_user += utime;
- kstat.cpu_system += stime;
-
- p->counter -= ticks;
- if (p->counter >= 0)
- continue;
- p->counter = 0;
- } else {
- /*
- * Idle processor found, do we have anything
- * we could run?
- */
- if (!(0x7fffffff & smp_process_available))
- continue;
- }
- /* Ok, we should reschedule, do the magic */
- if (i==cpu)
- need_resched = 1;
- else
- smp_message_pass(i, MSG_RESCHEDULE, 0L, 0);
- }
#endif
}
-static unsigned long lost_ticks = 0;
+volatile unsigned long lost_ticks = 0;
static unsigned long lost_ticks_system = 0;
static inline void update_times(void)
{
unsigned long ticks;
+ unsigned long flags;
+
+ save_flags(flags);
+ cli();
- ticks = xchg(&lost_ticks, 0);
+ ticks = lost_ticks;
+ lost_ticks = 0;
if (ticks) {
unsigned long system;
-
system = xchg(&lost_ticks_system, 0);
+
calc_load(ticks);
update_wall_time(ticks);
+ restore_flags(flags);
+
update_process_times(ticks, system);
- }
+
+ } else
+ restore_flags(flags);
}
static void timer_bh(void)
@@ -1087,17 +1177,8 @@ void do_timer(struct pt_regs * regs)
(*(unsigned long *)&jiffies)++;
lost_ticks++;
mark_bh(TIMER_BH);
- if (!user_mode(regs)) {
+ if (!user_mode(regs))
lost_ticks_system++;
- if (prof_buffer && current->pid) {
- extern int _stext;
- unsigned long ip = instruction_pointer(regs);
- ip -= (unsigned long) &_stext;
- ip >>= prof_shift;
- if (ip < prof_len)
- prof_buffer[ip]++;
- }
- }
if (tq_timer)
mark_bh(TQUEUE_BH);
}
@@ -1129,34 +1210,81 @@ asmlinkage unsigned int sys_alarm(unsigned int seconds)
* The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
* should be moved into arch/i386 instead?
*/
+
asmlinkage int sys_getpid(void)
{
+ /* This is SMP safe - current->pid doesnt change */
return current->pid;
}
+/*
+ * This is not strictly SMP safe: p_opptr could change
+ * from under us. However, rather than getting any lock
+ * we can use an optimistic algorithm: get the parent
+ * pid, and go back and check that the parent is still
+ * the same. If it has changed (which is extremely unlikely
+ * indeed), we just try again..
+ *
+ * NOTE! This depends on the fact that even if we _do_
+ * get an old value of "parent", we can happily dereference
+ * the pointer: we just can't necessarily trust the result
+ * until we know that the parent pointer is valid.
+ *
+ * The "mb()" macro is a memory barrier - a synchronizing
+ * event. It also makes sure that gcc doesn't optimize
+ * away the necessary memory references.. The barrier doesn't
+ * have to have all that strong semantics: on x86 we don't
+ * really require a synchronizing instruction, for example.
+ * The barrier is more important for code generation than
+ * for any real memory ordering semantics (even if there is
+ * a small window for a race, using the old pointer is
+ * harmless for a while).
+ */
asmlinkage int sys_getppid(void)
{
- return current->p_opptr->pid;
+ int pid;
+ struct task_struct * me = current;
+ struct task_struct * parent;
+
+ parent = me->p_opptr;
+ for (;;) {
+ pid = parent->pid;
+#if __SMP__
+{
+ struct task_struct *old = parent;
+ mb();
+ parent = me->p_opptr;
+ if (old != parent)
+ continue;
+}
+#endif
+ break;
+ }
+ return pid;
}
asmlinkage int sys_getuid(void)
{
+ /* Only we change this so SMP safe */
return current->uid;
}
asmlinkage int sys_geteuid(void)
{
+ /* Only we change this so SMP safe */
return current->euid;
}
asmlinkage int sys_getgid(void)
{
+ /* Only we change this so SMP safe */
return current->gid;
}
asmlinkage int sys_getegid(void)
{
- return current->egid;
+ /* Only we change this so SMP safe */
+ return current->egid;
}
/*
@@ -1164,11 +1292,18 @@ asmlinkage int sys_getegid(void)
* moved into the arch dependent tree for those ports that require
* it for backward compatibility?
*/
+
asmlinkage int sys_nice(int increment)
{
unsigned long newprio;
int increase = 0;
+ /*
+ * Setpriority might change our priority at the same moment.
+ * We don't have to worry. Conceptually one call occurs first
+ * and we have a single winner.
+ */
+
newprio = increment;
if (increment < 0) {
if (!suser())
@@ -1176,6 +1311,7 @@ asmlinkage int sys_nice(int increment)
newprio = -increment;
increase = 1;
}
+
if (newprio > 40)
newprio = 40;
/*
@@ -1189,6 +1325,14 @@ asmlinkage int sys_nice(int increment)
increment = newprio;
if (increase)
increment = -increment;
+ /*
+ * Current->priority can change between this point
+ * and the assignment. We are assigning not doing add/subs
+ * so thats ok. Conceptually a process might just instantaneously
+ * read the value we stomp over. I don't think that is an issue
+ * unless posix makes it one. If so we can loop on changes
+ * to current->priority.
+ */
newprio = current->priority - increment;
if ((signed) newprio < 1)
newprio = 1;
@@ -1206,13 +1350,15 @@ static struct task_struct *find_process_by_pid(pid_t pid)
p = current;
if (pid) {
+ read_lock(&tasklist_lock);
for_each_task(p) {
if (p->pid == pid)
goto found;
}
p = NULL;
- }
found:
+ read_unlock(&tasklist_lock);
+ }
return p;
}
@@ -1255,12 +1401,13 @@ static int setscheduler(pid_t pid, int policy,
p->policy = policy;
p->rt_priority = lp.sched_priority;
- cli();
+ spin_lock(&scheduler_lock);
+ spin_lock_irq(&runqueue_lock);
if (p->next_run)
move_last_runqueue(p);
- sti();
- schedule();
-
+ spin_unlock_irq(&runqueue_lock);
+ spin_unlock(&scheduler_lock);
+ need_resched = 1;
return 0;
}
@@ -1307,36 +1454,44 @@ asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
asmlinkage int sys_sched_yield(void)
{
- cli();
+ spin_lock(&scheduler_lock);
+ spin_lock_irq(&runqueue_lock);
move_last_runqueue(current);
- sti();
+ spin_unlock_irq(&runqueue_lock);
+ spin_unlock(&scheduler_lock);
+ need_resched = 1;
return 0;
}
asmlinkage int sys_sched_get_priority_max(int policy)
{
+ int ret = -EINVAL;
+
switch (policy) {
- case SCHED_FIFO:
- case SCHED_RR:
- return 99;
- case SCHED_OTHER:
- return 0;
+ case SCHED_FIFO:
+ case SCHED_RR:
+ ret = 99;
+ break;
+ case SCHED_OTHER:
+ ret = 0;
+ break;
}
-
- return -EINVAL;
+ return ret;
}
asmlinkage int sys_sched_get_priority_min(int policy)
{
+ int ret = -EINVAL;
+
switch (policy) {
- case SCHED_FIFO:
- case SCHED_RR:
- return 1;
- case SCHED_OTHER:
- return 0;
+ case SCHED_FIFO:
+ case SCHED_RR:
+ ret = 1;
+ break;
+ case SCHED_OTHER:
+ ret = 0;
}
-
- return -EINVAL;
+ return ret;
}
asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
@@ -1344,9 +1499,10 @@ asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
struct timespec t;
t.tv_sec = 0;
- t.tv_nsec = 0; /* <-- Linus, please fill correct value in here */
- return -ENOSYS; /* and then delete this line. Thanks! */
- return copy_to_user(interval, &t, sizeof(struct timespec)) ? -EFAULT : 0;
+ t.tv_nsec = 150000;
+ if (copy_to_user(interval, &t, sizeof(struct timespec)))
+ return -EFAULT;
+ return 0;
}
/*
@@ -1369,33 +1525,35 @@ static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)
{
value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
value->tv_sec = jiffies / HZ;
- return;
}
asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
{
- int error;
struct timespec t;
unsigned long expire;
- error = copy_from_user(&t, rqtp, sizeof(struct timespec));
- if (error)
- return -EFAULT;
+ if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
+ return -EFAULT;
if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
return -EINVAL;
+
if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
- current->policy != SCHED_OTHER) {
+ current->policy != SCHED_OTHER)
+ {
/*
* Short delay requests up to 2 ms will be handled with
* high precision by a busy wait for all real-time processes.
+ *
+ * Its important on SMP not to do this holding locks.
*/
udelay((t.tv_nsec + 999) / 1000);
return 0;
}
expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;
+
current->timeout = expire;
current->state = TASK_INTERRUPTIBLE;
schedule();
@@ -1405,11 +1563,10 @@ asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
jiffiestotimespec(expire - jiffies -
(expire > jiffies + 1), &t);
if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
- return -EFAULT;
+ return -EFAULT;
}
return -EINTR;
}
-
return 0;
}
@@ -1478,7 +1635,7 @@ void sched_init(void)
* process right in SMP mode.
*/
int cpu=smp_processor_id();
-#ifndef __SMP__
+#ifndef __SMP__
current_set[cpu]=&init_task;
#else
init_task.processor=cpu;
diff --git a/kernel/signal.c b/kernel/signal.c
index 325663bed..3203ad39c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -12,6 +12,8 @@
#include <linux/ptrace.h>
#include <linux/unistd.h>
#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
#include <asm/uaccess.h>
@@ -19,61 +21,70 @@
#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
-#if !defined(__alpha__) && !defined(__mips__)
+#ifndef __alpha__
/*
* This call isn't used by all ports, in particular, the Alpha
* uses osf_sigprocmask instead. Maybe it should be moved into
* arch-dependent dir?
+ *
+ * We don't need to get the kernel lock - this is all local to this
+ * particular thread.. (and that's good, because this is _heavily_
+ * used by various programs)
+ *
+ * No SMP locking would prevent the inherent races present in this
+ * routine, thus we do not perform any locking at all.
*/
asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset)
{
- sigset_t new_set, old_set = current->blocked;
- int error;
+ sigset_t old_set = current->blocked;
if (set) {
- error = get_user(new_set, set);
- if (error)
- return error;
+ sigset_t new_set;
+
+ if(get_user(new_set, set))
+ return -EFAULT;
+
new_set &= _BLOCKABLE;
switch (how) {
+ default:
+ return -EINVAL;
case SIG_BLOCK:
- current->blocked |= new_set;
+ new_set |= old_set;
break;
case SIG_UNBLOCK:
- current->blocked &= ~new_set;
+ new_set = old_set & ~new_set;
break;
case SIG_SETMASK:
- current->blocked = new_set;
break;
- default:
- return -EINVAL;
}
+ current->blocked = new_set;
}
if (oset) {
- error = put_user(old_set, oset);
- if (error)
- return error;
+ if(put_user(old_set, oset))
+ return -EFAULT;
}
return 0;
}
-#endif
-
-#ifndef __alpha__
/*
* For backwards compatibility? Functionality superseded by sigprocmask.
*/
asmlinkage int sys_sgetmask(void)
{
+ /* SMP safe */
return current->blocked;
}
asmlinkage int sys_ssetmask(int newmask)
{
- int old=current->blocked;
+ int old;
+ spin_lock_irq(&current->sigmask_lock);
+ old = current->blocked;
current->blocked = newmask & _BLOCKABLE;
+ spin_unlock_irq(&current->sigmask_lock);
+
return old;
}
@@ -81,8 +92,13 @@ asmlinkage int sys_ssetmask(int newmask)
asmlinkage int sys_sigpending(sigset_t *set)
{
- return put_user(current->blocked & current->signal,
- /* Hack */(unsigned long *)set);
+ int ret;
+
+ /* fill in "set" with signals pending but blocked. */
+ spin_lock_irq(&current->sigmask_lock);
+ ret = put_user(current->blocked & current->signal, set);
+ spin_unlock_irq(&current->sigmask_lock);
+ return ret;
}
/*
@@ -99,22 +115,24 @@ asmlinkage int sys_sigpending(sigset_t *set)
* Note the silly behaviour of SIGCHLD: SIG_IGN means that the signal
* isn't actually ignored, but does automatic child reaping, while
* SIG_DFL is explicitly said by POSIX to force the signal to be ignored..
+ *
+ * All callers of check_pending must be holding current->sig->siglock.
*/
-static inline void check_pending(int signum)
+inline void check_pending(int signum)
{
struct sigaction *p;
p = signum - 1 + current->sig->action;
+ spin_lock(&current->sigmask_lock);
if (p->sa_handler == SIG_IGN) {
- k_sigdelset(&current->signal, signum);
- return;
- }
- if (p->sa_handler == SIG_DFL) {
- if (signum != SIGCONT && signum != SIGCHLD && signum != SIGWINCH)
- return;
- k_sigdelset(&current->signal, signum);
- return;
+ current->signal &= ~_S(signum);
+ } else if (p->sa_handler == SIG_DFL) {
+ if (signum == SIGCONT ||
+ signum == SIGCHLD ||
+ signum != SIGWINCH)
+ current->signal &= ~_S(signum);
}
+ spin_unlock(&current->sigmask_lock);
}
#if !defined(__alpha__) && !defined(__mips__)
@@ -123,69 +141,65 @@ static inline void check_pending(int signum)
*/
asmlinkage unsigned long sys_signal(int signum, __sighandler_t handler)
{
- int err;
struct sigaction tmp;
- /*
- * HACK: We still cannot handle signals > 32 due to the limited
- * size of ksigset_t (which will go away).
- */
- if (signum > 32)
- return -EINVAL;
- if (signum<1 || signum>_NSIG)
+ if (signum<1 || signum>32)
return -EINVAL;
if (signum==SIGKILL || signum==SIGSTOP)
return -EINVAL;
if (handler != SIG_DFL && handler != SIG_IGN) {
- err = verify_area(VERIFY_READ, handler, 1);
- if (err)
- return err;
+ if(verify_area(VERIFY_READ, handler, 1))
+ return -EFAULT;
}
+
memset(&tmp, 0, sizeof(tmp));
tmp.sa_handler = handler;
tmp.sa_flags = SA_ONESHOT | SA_NOMASK;
+
+ spin_lock_irq(&current->sig->siglock);
handler = current->sig->action[signum-1].sa_handler;
current->sig->action[signum-1] = tmp;
check_pending(signum);
+ spin_unlock_irq(&current->sig->siglock);
+
return (unsigned long) handler;
}
#endif /* !defined(__alpha__) && !defined(__mips__) */
+#ifndef __sparc__
asmlinkage int sys_sigaction(int signum, const struct sigaction * action,
struct sigaction * oldaction)
{
struct sigaction new_sa, *p;
- /*
- * HACK: We still cannot handle signals > 32 due to the limited
- * size of ksigset_t (which will go away).
- */
- if (signum > 32)
- return -EINVAL;
- if (signum<1 || signum>_NSIG)
+ if (signum < 1 || signum > 32)
return -EINVAL;
+
p = signum - 1 + current->sig->action;
+
if (action) {
- int err = verify_area(VERIFY_READ, action, sizeof(*action));
- if (err)
- return err;
+ if (copy_from_user(&new_sa, action, sizeof(struct sigaction)))
+ return -EFAULT;
if (signum==SIGKILL || signum==SIGSTOP)
return -EINVAL;
- if (copy_from_user(&new_sa, action, sizeof(struct sigaction)))
- return -EFAULT;
- if (new_sa.sa_handler != SIG_DFL && new_sa.sa_handler != SIG_IGN) {
- err = verify_area(VERIFY_READ, new_sa.sa_handler, 1);
- if (err)
- return err;
- }
}
+
if (oldaction) {
+ /* In the clone() case we could copy half consistant
+ * state to the user, however this could sleep and
+ * deadlock us if we held the signal lock on SMP. So for
+ * now I take the easy way out and do no locking.
+ */
if (copy_to_user(oldaction, p, sizeof(struct sigaction)))
return -EFAULT;
}
+
if (action) {
+ spin_lock_irq(&current->sig->siglock);
*p = new_sa;
check_pending(signum);
+ spin_unlock_irq(&current->sig->siglock);
}
return 0;
}
+#endif
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 022b55355..6b9b41aa5 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -15,40 +15,60 @@
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/bitops.h>
+#include <asm/atomic.h>
-unsigned long intr_count = 0;
+/* intr_count died a painless death... -DaveM */
int bh_mask_count[32];
unsigned long bh_active = 0;
unsigned long bh_mask = 0;
void (*bh_base[32])(void);
-
-asmlinkage void do_bottom_half(void)
+/*
+ * This needs to make sure that only one bottom half handler
+ * is ever active at a time. We do this without locking by
+ * doing an atomic increment on the intr_count, and checking
+ * (nonatomically) against 1. Only if it's 1 do we schedule
+ * the bottom half.
+ *
+ * Note that the non-atomicity of the test (as opposed to the
+ * actual update) means that the test may fail, and _nobody_
+ * runs the handlers if there is a race that makes multiple
+ * CPU's get here at the same time. That's ok, we'll run them
+ * next time around.
+ */
+static inline void run_bottom_halves(void)
{
unsigned long active;
- unsigned long mask, left;
void (**bh)(void);
- sti();
+ active = get_active_bhs();
+ clear_active_bhs(active);
bh = bh_base;
- active = bh_active & bh_mask;
- for (mask = 1, left = ~0 ; left & active ; bh++,mask += mask,left += left) {
- if (mask & active) {
- void (*fn)(void);
- bh_active &= ~mask;
- fn = *bh;
- if (!fn)
- goto bad_bh;
- fn();
+ do {
+ if (active & 1)
+ (*bh)();
+ bh++;
+ active >>= 1;
+ } while (active);
+}
+
+asmlinkage void do_bottom_half(void)
+{
+ int cpu = smp_processor_id();
+
+ if (hardirq_trylock(cpu)) {
+ if (softirq_trylock()) {
+ run_bottom_halves();
+ softirq_endlock();
}
+ hardirq_endlock(cpu);
}
- return;
-bad_bh:
- printk ("irq.c:bad bottom half entry %08lx\n", mask);
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 8fcaba2de..934108fa8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -4,7 +4,6 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -21,9 +20,10 @@
#include <linux/fcntl.h>
#include <linux/acct.h>
#include <linux/tty.h>
-#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
-#include <linux/apm_bios.h>
-#endif
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -31,8 +31,30 @@
/*
* this indicates whether you can reboot with ctrl-alt-del: the default is yes
*/
+
int C_A_D = 1;
+
+/*
+ * Notifier list for kernel code which wants to be called
+ * at shutdown. This is used to stop any idling DMA operations
+ * and the like.
+ */
+
+struct notifier_block *reboot_notifier_list = NULL;
+
+int register_reboot_notifier(struct notifier_block * nb)
+{
+ return notifier_chain_register(&reboot_notifier_list, nb);
+}
+
+int unregister_reboot_notifier(struct notifier_block * nb)
+{
+ return notifier_chain_unregister(&reboot_notifier_list, nb);
+}
+
+
+
extern void adjust_clock(void);
asmlinkage int sys_ni_syscall(void)
@@ -65,13 +87,14 @@ static int proc_sel(struct task_struct *p, int which, int who)
asmlinkage int sys_setpriority(int which, int who, int niceval)
{
struct task_struct *p;
- int error = ESRCH;
unsigned int priority;
+ int error;
if (which > 2 || which < 0)
return -EINVAL;
/* normalize: avoid signed division (rounding problems) */
+ error = ESRCH;
priority = niceval;
if (niceval < 0)
priority = -niceval;
@@ -85,6 +108,7 @@ asmlinkage int sys_setpriority(int which, int who, int niceval)
priority = 1;
}
+ read_lock(&tasklist_lock);
for_each_task(p) {
if (!proc_sel(p, which, who))
continue;
@@ -100,6 +124,8 @@ asmlinkage int sys_setpriority(int which, int who, int niceval)
else
p->priority = priority;
}
+ read_unlock(&tasklist_lock);
+
return -error;
}
@@ -116,12 +142,14 @@ asmlinkage int sys_getpriority(int which, int who)
if (which > 2 || which < 0)
return -EINVAL;
+ read_lock(&tasklist_lock);
for_each_task (p) {
if (!proc_sel(p, which, who))
continue;
if (p->priority > max_prio)
max_prio = p->priority;
}
+ read_unlock(&tasklist_lock);
/* scale the priority from timeslice to 0..40 */
if (max_prio > 0)
@@ -169,7 +197,7 @@ asmlinkage int sys_prof(void)
#endif
-extern asmlinkage sys_kill(int, int);
+extern asmlinkage int sys_kill(int, int);
/*
* Reboot system call: for obvious reasons only root may call it,
@@ -178,29 +206,70 @@ extern asmlinkage sys_kill(int, int);
* You can also set the meaning of the ctrl-alt-del-key here.
*
* reboot doesn't sync: do that yourself before calling this.
+ *
*/
-asmlinkage int sys_reboot(int magic, int magic_too, int flag)
+asmlinkage int sys_reboot(int magic1, int magic2, int cmd, void * arg)
{
+ char buffer[256];
+
+ /* We only trust the superuser with rebooting the system. */
if (!suser())
return -EPERM;
- if (magic != 0xfee1dead || magic_too != 672274793)
+
+ /* For safety, we require "magic" arguments. */
+ if (magic1 != LINUX_REBOOT_MAGIC1 ||
+ (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A))
return -EINVAL;
- if (flag == 0x01234567)
- hard_reset_now();
- else if (flag == 0x89ABCDEF)
+
+ lock_kernel();
+ switch (cmd) {
+ case LINUX_REBOOT_CMD_RESTART:
+ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
+ printk(KERN_EMERG "Restarting system.\n");
+ machine_restart(NULL);
+ break;
+
+ case LINUX_REBOOT_CMD_CAD_ON:
C_A_D = 1;
- else if (!flag)
+ break;
+
+ case LINUX_REBOOT_CMD_CAD_OFF:
C_A_D = 0;
- else if (flag == 0xCDEF0123) {
- printk(KERN_EMERG "System halted\n");
- sys_kill(-1, SIGKILL);
-#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
- apm_set_power_state(APM_STATE_OFF);
-#endif
+ break;
+
+ case LINUX_REBOOT_CMD_HALT:
+ notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
+ printk(KERN_EMERG "System halted.\n");
+ machine_halt();
do_exit(0);
- } else
+ break;
+
+ case LINUX_REBOOT_CMD_POWER_OFF:
+ notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
+ printk(KERN_EMERG "Power down.\n");
+ machine_power_off();
+ do_exit(0);
+ break;
+
+ case LINUX_REBOOT_CMD_RESTART2:
+ if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
+ unlock_kernel();
+ return -EFAULT;
+ }
+ buffer[sizeof(buffer) - 1] = '\0';
+
+ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
+ printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
+ machine_restart(buffer);
+ break;
+
+ default:
+ unlock_kernel();
return -EINVAL;
- return (0);
+ break;
+ };
+ unlock_kernel();
+ return 0;
}
/*
@@ -210,9 +279,10 @@ asmlinkage int sys_reboot(int magic, int magic_too, int flag)
*/
void ctrl_alt_del(void)
{
- if (C_A_D)
- hard_reset_now();
- else
+ if (C_A_D) {
+ notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
+ machine_restart(NULL);
+ } else
kill_proc(1, SIGINT, 1);
}
@@ -231,6 +301,9 @@ void ctrl_alt_del(void)
* The general idea is that a program which uses just setregid() will be
* 100% compatible with BSD. A program which uses just setgid() will be
* 100% compatible with POSIX w/ Saved ID's.
+ *
+ * SMP: There are not races, the gid's are checked only by filesystem
+ * operations (as far as semantic preservation is concerned).
*/
asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
{
@@ -243,7 +316,7 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
suser())
current->gid = rgid;
else
- return(-EPERM);
+ return -EPERM;
}
if (egid != (gid_t) -1) {
if ((old_rgid == egid) ||
@@ -253,7 +326,7 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
current->fsgid = current->egid = egid;
else {
current->gid = old_rgid;
- return(-EPERM);
+ return -EPERM;
}
}
if (rgid != (gid_t) -1 ||
@@ -267,6 +340,8 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
/*
* setgid() is implemented like SysV w/ SAVED_IDS
+ *
+ * SMP: Same implicit races as above.
*/
asmlinkage int sys_setgid(gid_t gid)
{
@@ -278,6 +353,7 @@ asmlinkage int sys_setgid(gid_t gid)
current->egid = current->fsgid = gid;
else
return -EPERM;
+
if (current->egid != old_egid)
current->dumpable = 0;
return 0;
@@ -329,66 +405,70 @@ int acct_process(long exitcode)
asmlinkage int sys_acct(const char *name)
{
- struct inode *inode = (struct inode *)0;
- char *tmp;
- int error;
-
- if (!suser())
- return -EPERM;
-
- if (name == (char *)0) {
- if (acct_active) {
- if (acct_file.f_op->release)
- acct_file.f_op->release(acct_file.f_inode, &acct_file);
-
- if (acct_file.f_inode != (struct inode *) 0)
- iput(acct_file.f_inode);
-
- acct_active = 0;
- }
- return 0;
- } else {
- if (!acct_active) {
-
- if ((error = getname(name, &tmp)) != 0)
- return (error);
-
- error = open_namei(tmp, O_RDWR, 0600, &inode, 0);
- putname(tmp);
-
- if (error)
- return (error);
-
- if (!S_ISREG(inode->i_mode)) {
- iput(inode);
- return -EACCES;
- }
-
- if (!inode->i_op || !inode->i_op->default_file_ops ||
- !inode->i_op->default_file_ops->write) {
- iput(inode);
- return -EIO;
- }
-
- acct_file.f_mode = 3;
- acct_file.f_flags = 0;
- acct_file.f_count = 1;
- acct_file.f_inode = inode;
- acct_file.f_pos = inode->i_size;
- acct_file.f_reada = 0;
- acct_file.f_op = inode->i_op->default_file_ops;
-
- if (acct_file.f_op->open)
- if (acct_file.f_op->open(acct_file.f_inode, &acct_file)) {
- iput(inode);
- return -EIO;
- }
-
- acct_active = 1;
- return 0;
- } else
- return -EBUSY;
- }
+ struct inode *inode = (struct inode *)0;
+ char *tmp;
+ int error = -EPERM;
+
+ lock_kernel();
+ if (!suser())
+ goto out;
+
+ if (name == (char *)0) {
+ if (acct_active) {
+ if (acct_file.f_op->release)
+ acct_file.f_op->release(acct_file.f_inode, &acct_file);
+
+ if (acct_file.f_inode != (struct inode *) 0)
+ iput(acct_file.f_inode);
+
+ acct_active = 0;
+ }
+ error = 0;
+ } else {
+ error = -EBUSY;
+ if (!acct_active) {
+ if ((error = getname(name, &tmp)) != 0)
+ goto out;
+
+ error = open_namei(tmp, O_RDWR, 0600, &inode, 0);
+ putname(tmp);
+ if (error)
+ goto out;
+
+ error = -EACCES;
+ if (!S_ISREG(inode->i_mode)) {
+ iput(inode);
+ goto out;
+ }
+
+ error = -EIO;
+ if (!inode->i_op || !inode->i_op->default_file_ops ||
+ !inode->i_op->default_file_ops->write) {
+ iput(inode);
+ goto out;
+ }
+
+ acct_file.f_mode = 3;
+ acct_file.f_flags = 0;
+ acct_file.f_count = 1;
+ acct_file.f_inode = inode;
+ acct_file.f_pos = inode->i_size;
+ acct_file.f_reada = 0;
+ acct_file.f_op = inode->i_op->default_file_ops;
+
+ if(acct_file.f_op->open)
+ if(acct_file.f_op->open(acct_file.f_inode, &acct_file)) {
+ iput(inode);
+ goto out;
+ }
+
+ acct_active = 1;
+ error = 0;
+ }
+ }
+out:
+ unlock_kernel();
+ return error;
}
#ifndef __alpha__
@@ -443,16 +523,18 @@ asmlinkage int sys_old_syscall(void)
*/
asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
{
- int old_ruid = current->uid;
- int old_euid = current->euid;
+ int old_ruid;
+ int old_euid;
+ old_ruid = current->uid;
+ old_euid = current->euid;
if (ruid != (uid_t) -1) {
if ((old_ruid == ruid) ||
(current->euid==ruid) ||
suser())
current->uid = ruid;
else
- return(-EPERM);
+ return -EPERM;
}
if (euid != (uid_t) -1) {
if ((old_ruid == euid) ||
@@ -462,7 +544,7 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
current->fsuid = current->euid = euid;
else {
current->uid = old_ruid;
- return(-EPERM);
+ return -EPERM;
}
}
if (ruid != (uid_t) -1 ||
@@ -495,9 +577,10 @@ asmlinkage int sys_setuid(uid_t uid)
current->fsuid = current->euid = uid;
else
return -EPERM;
+
if (current->euid != old_euid)
current->dumpable = 0;
- return(0);
+ return 0;
}
@@ -538,6 +621,7 @@ asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
if (!(retval = put_user(current->uid, ruid)) &&
!(retval = put_user(current->euid, euid)))
retval = put_user(current->suid, suid);
+
return retval;
}
@@ -550,13 +634,15 @@ asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
*/
asmlinkage int sys_setfsuid(uid_t uid)
{
- int old_fsuid = current->fsuid;
+ int old_fsuid;
+ old_fsuid = current->fsuid;
if (uid == current->uid || uid == current->euid ||
uid == current->suid || uid == current->fsuid || suser())
current->fsuid = uid;
if (current->fsuid != old_fsuid)
current->dumpable = 0;
+
return old_fsuid;
}
@@ -565,29 +651,35 @@ asmlinkage int sys_setfsuid(uid_t uid)
*/
asmlinkage int sys_setfsgid(gid_t gid)
{
- int old_fsgid = current->fsgid;
+ int old_fsgid;
+ old_fsgid = current->fsgid;
if (gid == current->gid || gid == current->egid ||
gid == current->sgid || gid == current->fsgid || suser())
current->fsgid = gid;
if (current->fsgid != old_fsgid)
current->dumpable = 0;
+
return old_fsgid;
}
asmlinkage long sys_times(struct tms * tbuf)
{
- int error;
- if (tbuf) {
- error = put_user(current->utime,&tbuf->tms_utime);
- if (!error)
- error = put_user(current->stime,&tbuf->tms_stime);
- if (!error)
- error = put_user(current->cutime,&tbuf->tms_cutime);
- if (!error)
- error = put_user(current->cstime,&tbuf->tms_cstime);
- if (error)
- return error;
+ /*
+ * In the SMP world we might just be unlucky and have one of
+ * the times increment as we use it. Since the value is an
+ * atomically safe type this is just fine. Conceptually its
+ * as if the syscall took an instant longer to occur.
+ */
+ if (tbuf)
+ {
+ /* ?? use copy_to_user() */
+ if(!access_ok(VERIFY_READ, tbuf, sizeof(struct tms)) ||
+ __put_user(current->utime,&tbuf->tms_utime)||
+ __put_user(current->stime,&tbuf->tms_stime) ||
+ __put_user(current->cutime,&tbuf->tms_cutime) ||
+ __put_user(current->cstime,&tbuf->tms_cstime))
+ return -EFAULT;
}
return jiffies;
}
@@ -604,9 +696,11 @@ asmlinkage long sys_times(struct tms * tbuf)
* Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
* LBT 04.03.94
*/
+
asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
{
struct task_struct * p;
+ int err = -EINVAL;
if (!pid)
pid = current->pid;
@@ -614,82 +708,123 @@ asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
pgid = pid;
if (pgid < 0)
return -EINVAL;
+
+ read_lock(&tasklist_lock);
for_each_task(p) {
- if (p->pid == pid)
+ if (p->pid == pid) {
+ /* NOTE: I haven't dropped tasklist_lock, this is
+ * on purpose. -DaveM
+ */
goto found_task;
+ }
}
+ read_unlock(&tasklist_lock);
return -ESRCH;
found_task:
+ /* From this point forward we keep holding onto the tasklist lock
+ * so that our parent does not change from under us. -DaveM
+ */
+ err = -ESRCH;
if (p->p_pptr == current || p->p_opptr == current) {
+ err = -EPERM;
if (p->session != current->session)
- return -EPERM;
+ goto out;
+ err = -EACCES;
if (p->did_exec)
- return -EACCES;
+ goto out;
} else if (p != current)
- return -ESRCH;
+ goto out;
+ err = -EPERM;
if (p->leader)
- return -EPERM;
+ goto out;
if (pgid != pid) {
struct task_struct * tmp;
for_each_task (tmp) {
if (tmp->pgrp == pgid &&
- tmp->session == current->session)
+ tmp->session == current->session)
goto ok_pgid;
}
- return -EPERM;
+ goto out;
}
ok_pgid:
p->pgrp = pgid;
- return 0;
+ err = 0;
+out:
+ /* All paths lead to here, thus we are safe. -DaveM */
+ read_unlock(&tasklist_lock);
+ return err;
}
asmlinkage int sys_getpgid(pid_t pid)
{
- struct task_struct * p;
-
- if (!pid)
+ if (!pid) {
return current->pgrp;
- for_each_task(p) {
- if (p->pid == pid)
- return p->pgrp;
+ } else {
+ struct task_struct *p;
+ int ret = -ESRCH;
+
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (p->pid == pid) {
+ ret = p->pgrp;
+ break;
+ }
+ }
+ read_unlock(&tasklist_lock);
+ return ret;
}
- return -ESRCH;
}
asmlinkage int sys_getpgrp(void)
{
+ /* SMP - assuming writes are word atomic this is fine */
return current->pgrp;
}
asmlinkage int sys_getsid(pid_t pid)
{
struct task_struct * p;
-
- if (!pid)
- return current->session;
- for_each_task(p) {
- if (p->pid == pid)
- return p->session;
+ int ret;
+
+ /* SMP: The 'self' case requires no lock */
+ if (!pid) {
+ ret = current->session;
+ } else {
+ ret = -ESRCH;
+
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (p->pid == pid) {
+ ret = p->session;
+ break;
+ }
+ }
+ read_unlock(&tasklist_lock);
}
- return -ESRCH;
+ return ret;
}
asmlinkage int sys_setsid(void)
{
struct task_struct * p;
+ int err = -EPERM;
+ read_lock(&tasklist_lock);
for_each_task(p) {
if (p->pgrp == current->pid)
- return -EPERM;
+ goto out;
}
current->leader = 1;
current->session = current->pgrp = current->pid;
current->tty = NULL;
current->tty_old_pgrp = 0;
- return current->pgrp;
+ err = current->pgrp;
+out:
+ read_unlock(&tasklist_lock);
+ return err;
}
/*
@@ -698,6 +833,11 @@ asmlinkage int sys_setsid(void)
asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
{
int i;
+
+ /*
+ * SMP: Nobody else can change our grouplist. Thus we are
+ * safe.
+ */
if (gidsetsize < 0)
return -EINVAL;
@@ -711,21 +851,21 @@ asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
return i;
}
+/*
+ * SMP: Our groups are not shared. We can copy to/from them safely
+ * without another task interfering.
+ */
+
asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist)
{
- int err;
-
if (!suser())
return -EPERM;
if ((unsigned) gidsetsize > NGROUPS)
return -EINVAL;
- err = copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t));
- if (err) {
- gidsetsize = 0;
- err = -EFAULT;
- }
+ if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
+ return -EFAULT;
current->ngroups = gidsetsize;
- return err;
+ return 0;
}
int in_group_p(gid_t grp)
@@ -762,53 +902,49 @@ asmlinkage int sys_newuname(struct new_utsname * name)
* Move these to arch dependent dir since they are for
* backward compatibility only?
*/
+
+#ifndef __sparc__
asmlinkage int sys_uname(struct old_utsname * name)
{
- int error = -EFAULT;;
if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
- error = 0;
- return error;
+ return 0;
+ return -EFAULT;
}
+#endif
asmlinkage int sys_olduname(struct oldold_utsname * name)
{
int error;
+
if (!name)
return -EFAULT;
- error = copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
- if (!error)
- error = put_user(0,name->sysname+__OLD_UTS_LEN);
- if (!error)
- error = copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
- if (!error)
- error = put_user(0,name->nodename+__OLD_UTS_LEN);
- if (!error)
- error = copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
- if (!error)
- error = put_user(0,name->release+__OLD_UTS_LEN);
- if (!error)
- error = copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
- if (!error)
- error = put_user(0,name->version+__OLD_UTS_LEN);
- if (!error)
- error = copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
- if (!error)
- error = put_user(0,name->machine+__OLD_UTS_LEN);
- return error ? -EFAULT : 0;
+ if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+ return -EFAULT;
+
+ error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+ error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+ error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+ error -= __put_user(0,name->release+__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+ error -= __put_user(0,name->version+__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+ error = __put_user(0,name->machine+__OLD_UTS_LEN);
+ error = error ? -EFAULT : 0;
+
+ return error;
}
#endif
asmlinkage int sys_sethostname(char *name, int len)
{
- int error;
-
if (!suser())
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- error = copy_from_user(system_utsname.nodename, name, len);
- if (error)
+ if(copy_from_user(system_utsname.nodename, name, len))
return -EFAULT;
system_utsname.nodename[len] = 0;
return 0;
@@ -820,7 +956,7 @@ asmlinkage int sys_gethostname(char *name, int len)
if (len < 0)
return -EINVAL;
- i = 1+strlen(system_utsname.nodename);
+ i = 1 + strlen(system_utsname.nodename);
if (i > len)
i = len;
return copy_to_user(name, system_utsname.nodename, i) ? -EFAULT : 0;
@@ -832,14 +968,11 @@ asmlinkage int sys_gethostname(char *name, int len)
*/
asmlinkage int sys_setdomainname(char *name, int len)
{
- int error;
-
if (!suser())
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
- error = copy_from_user(system_utsname.domainname, name, len);
- if (error)
+ if(copy_from_user(system_utsname.domainname, name, len))
return -EFAULT;
system_utsname.domainname[len] = 0;
return 0;
@@ -849,20 +982,19 @@ asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim)
{
if (resource >= RLIM_NLIMITS)
return -EINVAL;
- return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
- ? -EFAULT : 0 ;
+ else
+ return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
+ ? -EFAULT : 0;
}
asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
{
struct rlimit new_rlim, *old_rlim;
- int err;
if (resource >= RLIM_NLIMITS)
return -EINVAL;
- err = copy_from_user(&new_rlim, rlim, sizeof(*rlim));
- if (err)
- return -EFAULT;
+ if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
+ return -EFAULT;
old_rlim = current->rlim + resource;
if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
(new_rlim.rlim_max > old_rlim->rlim_max)) &&
@@ -883,6 +1015,13 @@ asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
* make sense to do this. It will make moving the rest of the information
* a lot simpler! (Which we're not doing right now because we're not
* measuring them yet).
+ *
+ * This is SMP safe. Either we are called from sys_getrusage on ourselves
+ * below (we know we aren't going to exit/disappear and only we change our
+ * rusage counters), or we are called from wait4() on a process which is
+ * either stopped or zombied. In the zombied case the task won't get
+ * reaped till shortly after the call to getrusage(), in both cases the
+ * task being examined is in a frozen state so the counters won't change.
*/
int getrusage(struct task_struct *p, int who, struct rusage *ru)
{
@@ -930,8 +1069,6 @@ asmlinkage int sys_getrusage(int who, struct rusage *ru)
asmlinkage int sys_umask(int mask)
{
- int old = current->fs->umask;
-
- current->fs->umask = mask & S_IRWXUGO;
- return (old);
+ mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
+ return mask;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3d0fbf49b..9e0bb0fd8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -7,6 +7,8 @@
* Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
* Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
* Dynamic registration fixes, Stephen Tweedie.
+ * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
+ * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris Horn.
*/
#include <linux/config.h>
@@ -20,28 +22,49 @@
#include <linux/ctype.h>
#include <linux/utsname.h>
#include <linux/swapctl.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
#include <asm/bitops.h>
#include <asm/uaccess.h>
+#ifdef CONFIG_ROOT_NFS
+#include <linux/nfs_fs.h>
+#endif
+
+#ifdef CONFIG_SYSCTL
+
/* External variables not in a header file. */
extern int panic_timeout;
+extern int console_loglevel, default_message_loglevel;
+extern int minimum_console_loglevel, default_console_loglevel;
+extern int C_A_D, swapout_interval;
+extern int bdf_prm[], bdflush_min[], bdflush_max[];
+extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];
+extern int sysctl_overcommit_memory;
-
-#ifdef CONFIG_ROOT_NFS
-#include <linux/nfs_fs.h>
+#ifdef __sparc__
+extern char reboot_command [];
#endif
+static int parse_table(int *, int, void *, size_t *, void *, size_t,
+ ctl_table *, void **);
+static int do_securelevel_strategy (ctl_table *, int *, int, void *, size_t *,
+ void *, size_t, void **);
+
+
static ctl_table root_table[];
static struct ctl_table_header root_table_header =
{root_table, DNODE_SINGLE(&root_table_header)};
-static int parse_table(int *, int, void *, size_t *, void *, size_t,
- ctl_table *, void **);
-
static ctl_table kern_table[];
static ctl_table vm_table[];
extern ctl_table net_table[];
+static ctl_table proc_table[];
+static ctl_table fs_table[];
+static ctl_table debug_table[];
+static ctl_table dev_table[];
+
/* /proc declarations: */
@@ -59,7 +82,7 @@ struct file_operations proc_sys_file_operations =
proc_readsys, /* read */
proc_writesys, /* write */
NULL, /* readdir */
- NULL, /* select */
+ NULL, /* poll */
NULL, /* ioctl */
NULL, /* mmap */
NULL, /* no special open code */
@@ -94,19 +117,16 @@ static void register_proc_table(ctl_table *, struct proc_dir_entry *);
static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
#endif
-extern int bdf_prm[], bdflush_min[], bdflush_max[];
-
-static int do_securelevel_strategy (ctl_table *, int *, int, void *, size_t *,
- void *, size_t, void **);
-
-extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];
-
/* The default sysctl tables: */
static ctl_table root_table[] = {
{CTL_KERN, "kernel", NULL, 0, 0555, kern_table},
{CTL_VM, "vm", NULL, 0, 0555, vm_table},
{CTL_NET, "net", NULL, 0, 0555, net_table},
+ {CTL_PROC, "proc", NULL, 0, 0555, proc_table},
+ {CTL_FS, "fs", NULL, 0, 0555, fs_table},
+ {CTL_DEBUG, "debug", NULL, 0, 0555, debug_table},
+ {CTL_DEV, "dev", NULL, 0, 0555, dev_table},
{0}
};
@@ -140,7 +160,7 @@ static ctl_table kern_table[] = {
#ifdef CONFIG_ROOT_NFS
{KERN_NFSRNAME, "nfs-root-name", nfs_root_name, NFS_ROOT_NAME_LEN,
0644, NULL, &proc_dostring, &sysctl_string },
- {KERN_NFSRNAME, "nfs-root-addrs", nfs_root_addrs, NFS_ROOT_ADDRS_LEN,
+ {KERN_NFSRADDRS, "nfs-root-addrs", nfs_root_addrs, NFS_ROOT_ADDRS_LEN,
0644, NULL, &proc_dostring, &sysctl_string },
#endif
#ifdef CONFIG_BINFMT_JAVA
@@ -149,22 +169,47 @@ static ctl_table kern_table[] = {
{KERN_JAVA_APPLETVIEWER, "java-appletviewer", binfmt_java_appletviewer,
64, 0644, NULL, &proc_dostring, &sysctl_string },
#endif
+#ifdef __sparc__
+ {KERN_SPARC_REBOOT, "reboot-cmd", reboot_command,
+ 256, 0644, NULL, &proc_dostring, &sysctl_string },
+#endif
+ {KERN_CTLALTDEL, "ctrl-alt-del", &C_A_D, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+ {KERN_PRINTK, "printk", &console_loglevel, 4*sizeof(int),
+ 0644, NULL, &proc_dointvec},
{0}
};
static ctl_table vm_table[] = {
{VM_SWAPCTL, "swapctl",
&swap_control, sizeof(swap_control_t), 0600, NULL, &proc_dointvec},
- {VM_KSWAPD, "kswapd",
- &kswapd_ctl, sizeof(kswapd_ctl), 0600, NULL, &proc_dointvec},
{VM_FREEPG, "freepages",
&min_free_pages, 3*sizeof(int), 0600, NULL, &proc_dointvec},
{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL,
&proc_dointvec_minmax, &sysctl_intvec, NULL,
&bdflush_min, &bdflush_max},
+ {VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory,
+ sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec},
+ {0}
+};
+
+static ctl_table proc_table[] = {
+ {0}
+};
+
+static ctl_table fs_table[] = {
+ {0}
+};
+
+static ctl_table debug_table[] = {
{0}
};
+static ctl_table dev_table[] = {
+ {0}
+};
+
+
void sysctl_init(void)
{
#ifdef CONFIG_PROC_FS
@@ -184,25 +229,17 @@ int do_sysctl (int *name, int nlen,
if (nlen == 0 || nlen >= CTL_MAXNAME)
return -ENOTDIR;
- error = verify_area(VERIFY_READ,name,nlen*sizeof(int));
- if (error) return error;
- if (oldval) {
+ if (oldval)
+ {
int old_len;
if (!oldlenp)
return -EFAULT;
- error = verify_area(VERIFY_WRITE,oldlenp,sizeof(size_t));
- if (error) return error;
- get_user(old_len, oldlenp);
- error = verify_area(VERIFY_WRITE,oldval,old_len);
- if (error) return error;
- }
- if (newval) {
- error = verify_area(VERIFY_READ,newval,newlen);
- if (error) return error;
+ if(get_user(old_len, oldlenp))
+ return -EFAULT;
}
tmp = &root_table_header;
do {
- context = 0;
+ context = NULL;
error = parse_table(name, nlen, oldval, oldlenp,
newval, newlen, tmp->ctl_table, &context);
if (context)
@@ -218,12 +255,15 @@ extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
{
struct __sysctl_args tmp;
int error;
- error = verify_area(VERIFY_READ, args, sizeof(*args));
- if (error)
- return error;
- copy_from_user(&tmp, args, sizeof(tmp));
- return do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
- tmp.newval, tmp.newlen);
+
+ if(copy_from_user(&tmp, args, sizeof(tmp)))
+ return -EFAULT;
+
+ lock_kernel();
+ error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
+ tmp.newval, tmp.newlen);
+ unlock_kernel();
+ return error;
}
/* Like in_group_p, but testing against egid, not fsgid */
@@ -248,6 +288,7 @@ out:
/* ctl_perm does NOT grant the superuser all rights automatically, because
some sysctl variables are readonly even to root. */
+
static int test_perm(int mode, int op)
{
if (!current->euid)
@@ -258,6 +299,7 @@ static int test_perm(int mode, int op)
return 0;
return -EACCES;
}
+
static inline int ctl_perm(ctl_table *table, int op)
{
return test_perm(table->mode, op);
@@ -275,7 +317,8 @@ repeat:
for ( ; table->ctl_name; table++) {
int n;
- get_user(n,name);
+ if(get_user(n,name))
+ return -EFAULT;
if (n == table->ctl_name ||
table->ctl_name == CTL_ANY) {
if (table->child) {
@@ -335,15 +378,18 @@ int do_sysctl_strategy (ctl_table *table,
if (len) {
if (len > table->maxlen)
len = table->maxlen;
- copy_to_user(oldval, table->data, len);
- put_user(len, oldlenp);
+ if(copy_to_user(oldval, table->data, len))
+ return -EFAULT;
+ if(put_user(len, oldlenp))
+ return -EFAULT;
}
}
if (newval && newlen) {
len = newlen;
if (len > table->maxlen)
len = table->maxlen;
- copy_from_user(table->data, newval, len);
+ if(copy_from_user(table->data, newval, len))
+ return -EFAULT;
}
}
return 0;
@@ -364,7 +410,8 @@ static int do_securelevel_strategy (ctl_table *table,
if (newval && newlen) {
if (newlen != sizeof (int))
return -EINVAL;
- copy_from_user (&level, newval, newlen);
+ if(copy_from_user (&level, newval, newlen))
+ return -EFAULT;
if (level < securelevel && current->pid != 1)
return -EPERM;
}
@@ -406,11 +453,12 @@ void unregister_sysctl_table(struct ctl_table_header * table)
/* Scan the sysctl entries in table and add them all into /proc */
static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
{
- struct proc_dir_entry *de, *tmp;
- int exists;
+ struct proc_dir_entry *de;
+ int len;
+ mode_t mode;
for (; table->ctl_name; table++) {
- exists = 0;
+ de = 0;
/* Can't do anything without a proc name. */
if (!table->procname)
continue;
@@ -418,46 +466,32 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
if (!table->proc_handler &&
!table->child)
continue;
-
- de = kmalloc(sizeof(*de), GFP_KERNEL);
- if (!de) continue;
- de->namelen = strlen(table->procname);
- de->name = table->procname;
- de->mode = table->mode;
- de->nlink = 1;
- de->uid = 0;
- de->gid = 0;
- de->size = 0;
- de->get_info = 0; /* For internal use if we want it */
- de->fill_inode = 0; /* To override struct inode fields */
- de->next = de->subdir = 0;
- de->data = (void *) table;
- /* Is it a file? */
- if (table->proc_handler) {
- de->ops = &proc_sys_inode_operations;
- de->mode |= S_IFREG;
- }
- /* Otherwise it's a subdir */
- else {
- /* First check to see if it already exists */
- for (tmp = root->subdir; tmp; tmp = tmp->next) {
- if (tmp->namelen == de->namelen &&
- !memcmp(tmp->name,de->name,de->namelen)) {
- exists = 1;
- kfree (de);
- de = tmp;
- }
- }
- if (!exists) {
- de->ops = &proc_dir_inode_operations;
- de->nlink++;
- de->mode |= S_IFDIR;
+
+ len = strlen(table->procname);
+ mode = table->mode;
+
+ if (table->proc_handler)
+ mode |= S_IFREG;
+ else {
+ mode |= S_IFDIR;
+ for (de = root->subdir; de; de = de->next) {
+ if (proc_match(len, table->procname, de))
+ break;
}
+ /* If the subdir exists already, de is non-NULL */
+ }
+
+ if (!de) {
+ de = create_proc_entry(table->procname, mode, root);
+ if (!de)
+ continue;
+ de->data = (void *) table;
+ if (table->proc_handler)
+ de->ops = &proc_sys_inode_operations;
+
}
table->de = de;
- if (!exists)
- proc_register_dynamic(root, de);
- if (de->mode & S_IFDIR )
+ if (de->mode & S_IFDIR)
register_proc_table(table->child, de);
}
}
@@ -494,10 +528,6 @@ static long do_rw_proc(int write, struct inode * inode, struct file * file,
size_t res;
long error;
- error = verify_area(write ? VERIFY_READ : VERIFY_WRITE, buf, count);
- if (error)
- return error;
-
de = (struct proc_dir_entry*) inode->u.generic_ip;
if (!de || !de->data)
return -ENOTDIR;
@@ -548,14 +578,16 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
len = 0;
p = buffer;
while (len < *lenp) {
- get_user(c, p++);
+ if(get_user(c, p++))
+ return -EFAULT;
if (c == 0 || c == '\n')
break;
len++;
}
if (len >= table->maxlen)
len = table->maxlen-1;
- copy_from_user(table->data, buffer, len);
+ if(copy_from_user(table->data, buffer, len))
+ return -EFAULT;
((char *) table->data)[len] = 0;
filp->f_pos += *lenp;
} else {
@@ -565,9 +597,11 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
if (len > *lenp)
len = *lenp;
if (len)
- copy_to_user(buffer, table->data, len);
+ if(copy_to_user(buffer, table->data, len))
+ return -EFAULT;
if (len < *lenp) {
- put_user('\n', ((char *) buffer) + len);
+ if(put_user('\n', ((char *) buffer) + len))
+ return -EFAULT;
len++;
}
*lenp = len;
@@ -597,7 +631,8 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp,
if (write) {
while (left) {
char c;
- get_user(c,(char *) buffer);
+ if(get_user(c,(char *) buffer))
+ return -EFAULT;
if (!isspace(c))
break;
left--;
@@ -609,7 +644,8 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp,
len = left;
if (len > TMPBUFLEN-1)
len = TMPBUFLEN-1;
- copy_from_user(buf, buffer, len);
+ if(copy_from_user(buf, buffer, len))
+ return -EFAULT;
buf[len] = 0;
p = buf;
if (*p == '-' && left > 1) {
@@ -635,21 +671,24 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp,
len = strlen(buf);
if (len > left)
len = left;
- copy_to_user(buffer, buf, len);
+ if(copy_to_user(buffer, buf, len))
+ return -EFAULT;
left -= len;
buffer += len;
}
}
if (!write && !first && left) {
- put_user('\n', (char *) buffer);
+ if(put_user('\n', (char *) buffer))
+ return -EFAULT;
left--, buffer++;
}
if (write) {
p = (char *) buffer;
while (left) {
char c;
- get_user(c, p++);
+ if(get_user(c, p++))
+ return -EFAULT;
if (!isspace(c))
break;
left--;
@@ -685,7 +724,8 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
if (write) {
while (left) {
char c;
- get_user(c, (char *) buffer);
+ if(get_user(c, (char *) buffer))
+ return -EFAULT;
if (!isspace(c))
break;
left--;
@@ -697,7 +737,8 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
len = left;
if (len > TMPBUFLEN-1)
len = TMPBUFLEN-1;
- copy_from_user(buf, buffer, len);
+ if(copy_from_user(buf, buffer, len))
+ return -EFAULT;
buf[len] = 0;
p = buf;
if (*p == '-' && left > 1) {
@@ -728,21 +769,24 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
len = strlen(buf);
if (len > left)
len = left;
- copy_to_user(buffer, buf, len);
+ if(copy_to_user(buffer, buf, len))
+ return -EFAULT;
left -= len;
buffer += len;
}
}
if (!write && !first && left) {
- put_user('\n', (char *) buffer);
+ if(put_user('\n', (char *) buffer))
+ return -EFAULT;
left--, buffer++;
}
if (write) {
p = (char *) buffer;
while (left) {
char c;
- get_user(c, p++);
+ if(get_user(c, p++))
+ return -EFAULT;
if (!isspace(c))
break;
left--;
@@ -793,22 +837,27 @@ int sysctl_string(ctl_table *table, int *name, int nlen,
return -ENOTDIR;
if (oldval && oldlenp) {
- get_user(len, oldlenp);
+ if(get_user(len, oldlenp))
+ return -EFAULT;
if (len) {
l = strlen(table->data);
if (len > l) len = l;
if (len >= table->maxlen)
len = table->maxlen;
- copy_to_user(oldval, table->data, len);
- put_user(0, ((char *) oldval) + len);
- put_user(len, oldlenp);
+ if(copy_to_user(oldval, table->data, len))
+ return -EFAULT;
+ if(put_user(0, ((char *) oldval) + len))
+ return -EFAULT;
+ if(put_user(len, oldlenp))
+ return -EFAULT;
}
}
if (newval && newlen) {
len = newlen;
if (len > table->maxlen)
len = table->maxlen;
- copy_from_user(table->data, newval, len);
+ if(copy_from_user(table->data, newval, len))
+ return -EFAULT;
if (len == table->maxlen)
len--;
((char *) table->data)[len] = 0;
@@ -865,14 +914,16 @@ int do_string (
return -EINVAL;
if (oldval) {
int old_l;
- get_user(old_l, oldlenp);
+ if(get_user(old_l, oldlenp))
+ return -EFAULT;
if (l > old_l)
return -ENOMEM;
- put_user(l, oldlenp);
- copy_to_user(oldval, data, l);
+ if(put_user(l, oldlenp) || copy_to_user(oldval, data, l))
+ return -EFAULT;
}
if (newval) {
- copy_from_user(data, newval, newlen);
+ if(copy_from_user(data, newval, newlen))
+ return -EFAULT;
data[newlen] = 0;
}
return 0;
@@ -888,14 +939,16 @@ int do_int (
return -EINVAL;
if (oldval) {
int old_l;
- get_user(old_l, oldlenp);
+ if(get_user(old_l, oldlenp))
+ return -EFAULT;
if (old_l < sizeof(int))
return -ENOMEM;
- put_user(sizeof(int), oldlenp);
- copy_to_user(oldval, data, sizeof(int));
+ if(put_user(sizeof(int), oldlenp)||copy_to_user(oldval, data, sizeof(int)))
+ return -EFAULT;
}
if (newval)
- copy_from_user(data, newval, sizeof(int));
+ if(copy_from_user(data, newval, sizeof(int)))
+ return -EFAULT;
return 0;
}
@@ -909,14 +962,71 @@ int do_struct (
return -EINVAL;
if (oldval) {
int old_l;
- get_user(old_l, oldlenp);
+ if(get_user(old_l, oldlenp))
+ return -EFAULT;
if (old_l < len)
return -ENOMEM;
- put_user(len, oldlenp);
- copy_to_user(oldval, data, len);
+ if(put_user(len, oldlenp) || copy_to_user(oldval, data, len))
+ return -EFAULT;
}
if (newval)
- copy_from_user(data, newval, len);
+ if(copy_from_user(data, newval, len))
+ return -EFAULT;
return 0;
}
+
+#else /* CONFIG_SYSCTL */
+
+
+extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
+{
+ return -ENOSYS;
+}
+
+int sysctl_string(ctl_table *table, int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context)
+{
+ return -ENOSYS;
+}
+
+int sysctl_intvec(ctl_table *table, int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context)
+{
+ return -ENOSYS;
+}
+
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
+int proc_dointvec(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
+int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
+struct ctl_table_header * register_sysctl_table(ctl_table * table,
+ int insert_at_head)
+{
+ return 0;
+}
+
+void unregister_sysctl_table(struct ctl_table_header * table)
+{
+}
+
+#endif /* CONFIG_SYSCTL */
+
+
+
diff --git a/kernel/time.c b/kernel/time.c
index c2090a583..d20fdbd98 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -25,6 +25,8 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
#include <asm/uaccess.h>
@@ -64,6 +66,8 @@ asmlinkage int sys_time(int * tloc)
{
int i;
+ /* SMP: This is fairly trivial. We grab CURRENT_TIME and
+ stuff it to user space. No side effects */
i = CURRENT_TIME;
if (tloc) {
if (put_user(i,tloc))
@@ -78,6 +82,7 @@ asmlinkage int sys_time(int * tloc)
* why not move it into the appropriate arch directory (for those
* architectures that need it).
*/
+
asmlinkage int sys_stime(int * tptr)
{
int value;
@@ -154,6 +159,7 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
if (!suser())
return -EPERM;
+
if (tv) {
if (copy_from_user(&new_tv, tv, sizeof(*tv)))
return -EFAULT;
@@ -161,6 +167,8 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
if (tz) {
if (copy_from_user(&new_tz, tz, sizeof(*tz)))
return -EFAULT;
+
+ /* SMP safe, global irq locking makes it work. */
sys_tz = new_tz;
if (firsttime) {
firsttime = 0;
@@ -169,7 +177,12 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
}
}
if (tv)
+ {
+ /* SMP safe, again the code in arch/foo/time.c should
+ * globally block out interrupts when it runs.
+ */
do_settimeofday(&new_tv);
+ }
return 0;
}
@@ -197,37 +210,32 @@ void (*hardpps_ptr)(struct timeval *) = (void (*)(struct timeval *))0;
asmlinkage int sys_adjtimex(struct timex *txc_p)
{
long ltemp, mtemp, save_adjust;
- int error;
-
- /* Local copy of parameter */
- struct timex txc;
+ struct timex txc; /* Local copy of parameter */
/* Copy the user data space into the kernel copy
* structure. But bear in mind that the structures
* may change
*/
- error = copy_from_user(&txc, txc_p, sizeof(struct timex));
- if (error)
- return -EFAULT;
+ if(copy_from_user(&txc, txc_p, sizeof(struct timex)))
+ return -EFAULT;
/* In order to modify anything, you gotta be super-user! */
if (txc.modes && !suser())
return -EPERM;
-
- /* Now we validate the data before disabling interrupts
- */
+
+ /* Now we validate the data before disabling interrupts */
if (txc.modes != ADJ_OFFSET_SINGLESHOT && (txc.modes & ADJ_OFFSET))
/* adjustment Offset limited to +- .512 seconds */
- if (txc.offset <= - MAXPHASE || txc.offset >= MAXPHASE )
- return -EINVAL;
+ if (txc.offset <= - MAXPHASE || txc.offset >= MAXPHASE )
+ return -EINVAL;
/* if the quartz is off by more than 10% something is VERY wrong ! */
if (txc.modes & ADJ_TICK)
- if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ)
- return -EINVAL;
+ if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ)
+ return -EINVAL;
- cli();
+ cli(); /* SMP: global cli() is enough protection. */
/* Save for later - semantics of adjtime is to return old value */
save_adjust = time_adjust;