diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1997-04-29 21:13:14 +0000 |
---|---|---|
committer | <ralf@linux-mips.org> | 1997-04-29 21:13:14 +0000 |
commit | 19c9bba94152148523ba0f7ef7cffe3d45656b11 (patch) | |
tree | 40b1cb534496a7f1ca0f5c314a523c69f1fee464 /kernel | |
parent | 7206675c40394c78a90e74812bbdbf8cf3cca1be (diff) |
Import of Linux/MIPS 2.1.36
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exec_domain.c | 33 | ||||
-rw-r--r-- | kernel/exit.c | 276 | ||||
-rw-r--r-- | kernel/fork.c | 89 | ||||
-rw-r--r-- | kernel/info.c | 4 | ||||
-rw-r--r-- | kernel/itimer.c | 36 | ||||
-rw-r--r-- | kernel/ksyms.c | 564 | ||||
-rw-r--r-- | kernel/module.c | 1404 | ||||
-rw-r--r-- | kernel/panic.c | 29 | ||||
-rw-r--r-- | kernel/printk.c | 239 | ||||
-rw-r--r-- | kernel/resource.c | 69 | ||||
-rw-r--r-- | kernel/sched.c | 869 | ||||
-rw-r--r-- | kernel/signal.c | 128 | ||||
-rw-r--r-- | kernel/softirq.c | 54 | ||||
-rw-r--r-- | kernel/sys.c | 491 | ||||
-rw-r--r-- | kernel/sysctl.c | 342 | ||||
-rw-r--r-- | kernel/time.c | 38 |
16 files changed, 2774 insertions, 1891 deletions
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 9a202359a..5d7e2f056 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -2,6 +2,9 @@ #include <linux/ptrace.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/module.h> static asmlinkage void no_lcall7(struct pt_regs * regs); @@ -34,14 +37,14 @@ static asmlinkage void no_lcall7(struct pt_regs * regs) * personality set incorrectly. Check to see whether SVr4 is available, * and use it, otherwise give the user a SEGV. */ - if (current->exec_domain && current->exec_domain->use_count) - (*current->exec_domain->use_count)--; + if (current->exec_domain && current->exec_domain->module) + __MOD_DEC_USE_COUNT(current->exec_domain->module); current->personality = PER_SVR4; current->exec_domain = lookup_exec_domain(current->personality); - if (current->exec_domain && current->exec_domain->use_count) - (*current->exec_domain->use_count)++; + if (current->exec_domain && current->exec_domain->module) + __MOD_INC_USE_COUNT(current->exec_domain->module); if (current->exec_domain && current->exec_domain->handler && current->exec_domain->handler != no_lcall7) { @@ -103,21 +106,27 @@ asmlinkage int sys_personality(unsigned long personality) { struct exec_domain *it; unsigned long old_personality; + int ret; + lock_kernel(); + ret = current->personality; if (personality == 0xffffffff) - return current->personality; + goto out; + ret = -EINVAL; it = lookup_exec_domain(personality); if (!it) - return -EINVAL; + goto out; old_personality = current->personality; - if (current->exec_domain && current->exec_domain->use_count) - (*current->exec_domain->use_count)--; + if (current->exec_domain && current->exec_domain->module) + __MOD_DEC_USE_COUNT(current->exec_domain->module); current->personality = personality; current->exec_domain = it; - if (current->exec_domain->use_count) - (*current->exec_domain->use_count)++; - - return old_personality; + if (current->exec_domain->module) + __MOD_INC_USE_COUNT(current->exec_domain->module); + ret = old_personality; +out: + unlock_kernel(); + return ret; } diff --git a/kernel/exit.c b/kernel/exit.c index d4c54209d..0d03916e8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -6,6 +6,7 @@ #undef DEBUG_PROC_TREE +#include <linux/config.h> #include <linux/wait.h> #include <linux/errno.h> #include <linux/signal.h> @@ -16,9 +17,14 @@ #include <linux/tty.h> #include <linux/malloc.h> #include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/module.h> +#include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgtable.h> +#include <asm/mmu_context.h> extern void sem_exit (void); extern void acct_process (long exitcode); @@ -36,18 +42,23 @@ static inline void generate(unsigned long sig, struct task_struct * p) * be handled immediately (ie non-blocked and untraced) * and that is ignored (either explicitly or by default) */ + spin_lock_irq(&p->sig->siglock); if (!(mask & p->blocked) && !(p->flags & PF_PTRACED)) { /* don't bother with ignored signals (but SIGCHLD is special) */ if (sa->sa_handler == SIG_IGN && sig != SIGCHLD) - return; + goto out; /* some signals are ignored by default.. (but SIGCONT already did its deed) */ if ((sa->sa_handler == SIG_DFL) && (sig == SIGCONT || sig == SIGCHLD || sig == SIGWINCH || sig == SIGURG)) - return; + goto out; } + spin_lock(&p->sigmask_lock); p->signal |= mask; + spin_unlock(&p->sigmask_lock); if (p->state == TASK_INTERRUPTIBLE && (p->signal & ~p->blocked)) wake_up_process(p); +out: + spin_unlock_irq(&p->sig->siglock); } /* @@ -60,15 +71,22 @@ void force_sig(unsigned long sig, struct task_struct * p) if (p->sig) { unsigned long mask = 1UL << sig; struct sigaction *sa = p->sig->action + sig; + + spin_lock_irq(&p->sig->siglock); + + spin_lock(&p->sigmask_lock); p->signal |= mask; p->blocked &= ~mask; + spin_unlock(&p->sigmask_lock); + if (sa->sa_handler == SIG_IGN) sa->sa_handler = SIG_DFL; if (p->state == TASK_INTERRUPTIBLE) wake_up_process(p); + + spin_unlock_irq(&p->sig->siglock); } } - int send_sig(unsigned long sig,struct task_struct * p,int priv) { @@ -79,24 +97,23 @@ int send_sig(unsigned long sig,struct task_struct * p,int priv) (current->uid ^ p->suid) && (current->uid ^ p->uid) && !suser()) return -EPERM; - if (!sig) - return 0; - /* - * Forget it if the process is already zombie'd. - */ - if (!p->sig) - return 0; - if ((sig == SIGKILL) || (sig == SIGCONT)) { - if (p->state == TASK_STOPPED) - wake_up_process(p); - p->exit_code = 0; - p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) | - (1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) ); + + if (sig && p->sig) { + spin_lock_irq(&p->sigmask_lock); + if ((sig == SIGKILL) || (sig == SIGCONT)) { + if (p->state == TASK_STOPPED) + wake_up_process(p); + p->exit_code = 0; + p->signal &= ~( (1<<(SIGSTOP-1)) | (1<<(SIGTSTP-1)) | + (1<<(SIGTTIN-1)) | (1<<(SIGTTOU-1)) ); + } + if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU) + p->signal &= ~(1<<(SIGCONT-1)); + spin_unlock_irq(&p->sigmask_lock); + + /* Actually generate the signal */ + generate(sig,p); } - if (sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU) - p->signal &= ~(1<<(SIGCONT-1)); - /* Actually generate the signal */ - generate(sig,p); return 0; } @@ -120,6 +137,12 @@ void release(struct task_struct * p) } for (i=1 ; i<NR_TASKS ; i++) if (task[i] == p) { +#ifdef __SMP__ + /* FIXME! Cheesy, but kills the window... -DaveM */ + while(p->processor != NO_PROC_ID) + barrier(); + spin_unlock_wait(&scheduler_lock); +#endif nr_tasks--; task[i] = NULL; REMOVE_LINKS(p); @@ -130,7 +153,7 @@ void release(struct task_struct * p) current->cmin_flt += p->min_flt + p->cmin_flt; current->cmaj_flt += p->maj_flt + p->cmaj_flt; current->cnswap += p->nswap + p->cnswap; - kfree(p); + free_task_struct(p); return; } panic("trying to release non-existent task"); @@ -152,14 +175,14 @@ int bad_task_ptr(struct task_struct *p) return 0; return 1; } - + /* * This routine scans the pid tree and makes sure the rep invariant still * holds. Used for debugging only, since it's very slow.... * * It looks a lot scarier than it really is.... we're doing nothing more - * than verifying the doubly-linked list found in p_ysptr and p_osptr, - * and checking it corresponds with the process tree defined by p_cptr and + * than verifying the doubly-linked list found in p_ysptr and p_osptr, + * and checking it corresponds with the process tree defined by p_cptr and * p_pptr; */ void audit_ptree(void) @@ -240,14 +263,18 @@ int session_of_pgrp(int pgrp) int fallback; fallback = -1; + read_lock(&tasklist_lock); for_each_task(p) { if (p->session <= 0) continue; - if (p->pgrp == pgrp) - return p->session; + if (p->pgrp == pgrp) { + fallback = p->session; + break; + } if (p->pid == pgrp) fallback = p->session; } + read_unlock(&tasklist_lock); return fallback; } @@ -257,21 +284,29 @@ int session_of_pgrp(int pgrp) */ int kill_pg(int pgrp, int sig, int priv) { - struct task_struct *p; - int err,retval = -ESRCH; - int found = 0; + int retval; - if (sig<0 || sig>32 || pgrp<=0) - return -EINVAL; - for_each_task(p) { - if (p->pgrp == pgrp) { - if ((err = send_sig(sig,p,priv)) != 0) - retval = err; - else - found++; + retval = -EINVAL; + if (sig >= 0 && sig <= 32 && pgrp > 0) { + struct task_struct *p; + int found = 0; + + retval = -ESRCH; + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pgrp == pgrp) { + int err = send_sig(sig,p,priv); + if (err != 0) + retval = err; + else + found++; + } } + read_unlock(&tasklist_lock); + if (found) + retval = 0; } - return(found ? 0 : retval); + return retval; } /* @@ -281,34 +316,51 @@ int kill_pg(int pgrp, int sig, int priv) */ int kill_sl(int sess, int sig, int priv) { - struct task_struct *p; - int err,retval = -ESRCH; - int found = 0; + int retval; - if (sig<0 || sig>32 || sess<=0) - return -EINVAL; - for_each_task(p) { - if (p->session == sess && p->leader) { - if ((err = send_sig(sig,p,priv)) != 0) - retval = err; - else - found++; + retval = -EINVAL; + if (sig >= 0 && sig <= 32 && sess > 0) { + struct task_struct *p; + int found = 0; + + retval = -ESRCH; + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->leader && p->session == sess) { + int err = send_sig(sig,p,priv); + + if (err) + retval = err; + else + found++; + } } + read_unlock(&tasklist_lock); + if (found) + retval = 0; } - return(found ? 0 : retval); + return retval; } int kill_proc(int pid, int sig, int priv) { - struct task_struct *p; + int retval; - if (sig<0 || sig>32) - return -EINVAL; - for_each_task(p) { - if (p && p->pid == pid) - return send_sig(sig,p,priv); + retval = -EINVAL; + if (sig >= 0 && sig <= 32) { + struct task_struct *p; + + retval = -ESRCH; + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pid != pid) + continue; + retval = send_sig(sig,p,priv); + break; + } + read_unlock(&tasklist_lock); } - return(-ESRCH); + return retval; } /* @@ -317,49 +369,58 @@ int kill_proc(int pid, int sig, int priv) */ asmlinkage int sys_kill(int pid,int sig) { - int err, retval = 0, count = 0; - if (!pid) - return(kill_pg(current->pgrp,sig,0)); + return kill_pg(current->pgrp,sig,0); + if (pid == -1) { + int retval = 0, count = 0; struct task_struct * p; + + read_lock(&tasklist_lock); for_each_task(p) { if (p->pid > 1 && p != current) { + int err; ++count; if ((err = send_sig(sig,p,0)) != -EPERM) retval = err; } } - return(count ? retval : -ESRCH); + read_unlock(&tasklist_lock); + return count ? retval : -ESRCH; } - if (pid < 0) - return(kill_pg(-pid,sig,0)); + if (pid < 0) + return kill_pg(-pid,sig,0); + /* Normal kill */ - return(kill_proc(pid,sig,0)); + return kill_proc(pid,sig,0); } /* * Determine if a process group is "orphaned", according to the POSIX * definition in 2.2.2.52. Orphaned process groups are not to be affected - * by terminal-generated stop signals. Newly orphaned process groups are + * by terminal-generated stop signals. Newly orphaned process groups are * to receive a SIGHUP and a SIGCONT. - * + * * "I ask you, have you ever known what it is to be an orphan?" */ static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task) { struct task_struct *p; + read_lock(&tasklist_lock); for_each_task(p) { - if ((p == ignored_task) || (p->pgrp != pgrp) || + if ((p == ignored_task) || (p->pgrp != pgrp) || (p->state == TASK_ZOMBIE) || (p->p_pptr->pid == 1)) continue; if ((p->p_pptr->pgrp != pgrp) && - (p->p_pptr->session == p->session)) - return 0; + (p->p_pptr->session == p->session)) { + read_unlock(&tasklist_lock); + return 0; + } } - return(1); /* (sighing) "Often!" */ + read_unlock(&tasklist_lock); + return 1; /* (sighing) "Often!" */ } int is_orphaned_pgrp(int pgrp) @@ -369,21 +430,27 @@ int is_orphaned_pgrp(int pgrp) static inline int has_stopped_jobs(int pgrp) { + int retval = 0; struct task_struct * p; + read_lock(&tasklist_lock); for_each_task(p) { if (p->pgrp != pgrp) continue; - if (p->state == TASK_STOPPED) - return(1); + if (p->state != TASK_STOPPED) + continue; + retval = 1; + break; } - return(0); + read_unlock(&tasklist_lock); + return retval; } static inline void forget_original_parent(struct task_struct * father) { struct task_struct * p; + read_lock(&tasklist_lock); for_each_task(p) { if (p->p_opptr == father) if (task[smp_num_cpus]) /* init */ @@ -391,6 +458,7 @@ static inline void forget_original_parent(struct task_struct * father) else p->p_opptr = task[0]; } + read_unlock(&tasklist_lock); } static inline void close_files(struct files_struct * files) @@ -456,9 +524,8 @@ static inline void __exit_sighand(struct task_struct *tsk) if (sig) { tsk->sig = NULL; - if (!--sig->count) { + if (atomic_dec_and_test(&sig->count)) kfree(sig); - } } } @@ -475,9 +542,7 @@ static inline void __exit_mm(struct task_struct * tsk) if (mm != &init_mm) { flush_cache_mm(mm); flush_tlb_mm(mm); -#ifdef __mips__ - mm->context = 0; -#endif + destroy_context(mm); tsk->mm = &init_mm; tsk->swappable = 0; SET_PAGE_DIR(tsk, swapper_pg_dir); @@ -496,7 +561,7 @@ void exit_mm(struct task_struct *tsk) __exit_mm(tsk); } -/* +/* * Send signals to all our closest relatives so that they know * to properly mourn us.. */ @@ -505,7 +570,7 @@ static void exit_notify(void) struct task_struct * p; forget_original_parent(current); - /* + /* * Check to see if any process groups have become orphaned * as a result of our exiting, and if they have any stopped * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) @@ -523,10 +588,10 @@ static void exit_notify(void) } /* Let father know we died */ notify_parent(current); - + /* * This loop does two things: - * + * * A. Make init inherit all the child processes * B. Check to see if any process groups have become orphaned * as a result of our exiting, and if they have any stopped @@ -547,7 +612,7 @@ static void exit_notify(void) notify_parent(p); /* * process group orphan check - * Case ii: Our child is in a different pgrp + * Case ii: Our child is in a different pgrp * than we are, and it was the only connection * outside, so the child pgrp is now orphaned. */ @@ -565,9 +630,9 @@ static void exit_notify(void) NORET_TYPE void do_exit(long code) { - if (intr_count) { + if (in_interrupt()) { + local_irq_count[smp_processor_id()] = 0; /* Not really correct */ printk("Aiee, killing interrupt handler\n"); - intr_count = 0; } fake_volatile: acct_process(code); @@ -576,6 +641,9 @@ fake_volatile: sem_exit(); kerneld_exit(); __exit_mm(current); +#if CONFIG_AP1000 + exit_msc(current); +#endif __exit_files(current); __exit_fs(current); __exit_sighand(current); @@ -586,10 +654,10 @@ fake_volatile: #ifdef DEBUG_PROC_TREE audit_ptree(); #endif - if (current->exec_domain && current->exec_domain->use_count) - (*current->exec_domain->use_count)--; - if (current->binfmt && current->binfmt->use_count) - (*current->binfmt->use_count)--; + if (current->exec_domain && current->exec_domain->module) + __MOD_DEC_USE_COUNT(current->exec_domain->module); + if (current->binfmt && current->binfmt->module) + __MOD_DEC_USE_COUNT(current->binfmt->module); schedule(); /* * In order to get rid of the "volatile function does return" message @@ -609,7 +677,9 @@ fake_volatile: asmlinkage int sys_exit(int error_code) { + lock_kernel(); do_exit((error_code&0xff)<<8); + unlock_kernel(); } asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) @@ -619,21 +689,21 @@ asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct struct task_struct *p; if (stat_addr) { - flag = verify_area(VERIFY_WRITE, stat_addr, sizeof(*stat_addr)); - if (flag) - return flag; + if(verify_area(VERIFY_WRITE, stat_addr, sizeof(*stat_addr))) + return -EFAULT; } if (ru) { - flag = verify_area(VERIFY_WRITE, ru, sizeof(*ru)); - if (flag) - return flag; + if(verify_area(VERIFY_WRITE, ru, sizeof(*ru))) + return -EFAULT; } + if (options & ~(WNOHANG|WUNTRACED|__WCLONE)) - return -EINVAL; + return -EINVAL; add_wait_queue(¤t->wait_chldexit,&wait); repeat: - flag=0; + flag = 0; + read_lock(&tasklist_lock); for (p = current->p_cptr ; p ; p = p->p_osptr) { if (pid>0) { if (p->pid != pid) @@ -655,23 +725,28 @@ repeat: continue; if (!(options & WUNTRACED) && !(p->flags & PF_PTRACED)) continue; + read_unlock(&tasklist_lock); if (ru != NULL) getrusage(p, RUSAGE_BOTH, ru); if (stat_addr) - put_user((p->exit_code << 8) | 0x7f, - stat_addr); + __put_user((p->exit_code << 8) | 0x7f, + stat_addr); p->exit_code = 0; retval = p->pid; goto end_wait4; case TASK_ZOMBIE: current->cutime += p->utime + p->cutime; current->cstime += p->stime + p->cstime; + read_unlock(&tasklist_lock); if (ru != NULL) getrusage(p, RUSAGE_BOTH, ru); if (stat_addr) - put_user(p->exit_code, stat_addr); + __put_user(p->exit_code, stat_addr); retval = p->pid; if (p->p_opptr != p->p_pptr) { + /* Note this grabs tasklist_lock + * as a writer... (twice!) + */ REMOVE_LINKS(p); p->p_pptr = p->p_opptr; SET_LINKS(p); @@ -686,6 +761,7 @@ repeat: continue; } } + read_unlock(&tasklist_lock); if (flag) { retval = 0; if (options & WNOHANG) diff --git a/kernel/fork.c b/kernel/fork.c index b81d98e77..6204ffeaf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -15,13 +15,17 @@ #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/unistd.h> #include <linux/ptrace.h> #include <linux/malloc.h> #include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/module.h> #include <asm/system.h> #include <asm/pgtable.h> +#include <asm/mmu_context.h> #include <asm/uaccess.h> int nr_tasks=1; @@ -43,11 +47,15 @@ static inline int find_empty_process(void) max_tasks--; /* count the new process.. */ if (max_tasks < nr_tasks) { struct task_struct *p; + read_lock(&tasklist_lock); for_each_task (p) { if (p->uid == current->uid) - if (--max_tasks < 0) + if (--max_tasks < 0) { + read_unlock(&tasklist_lock); return -EAGAIN; + } } + read_unlock(&tasklist_lock); } } for (i = 0 ; i < NR_TASKS ; i++) { @@ -63,6 +71,8 @@ static int get_pid(unsigned long flags) if (flags & CLONE_PID) return current->pid; + + read_lock(&tasklist_lock); repeat: if ((++last_pid) & 0xffff8000) last_pid=1; @@ -72,6 +82,8 @@ repeat: p->session == last_pid) goto repeat; } + read_unlock(&tasklist_lock); + return last_pid; } @@ -81,10 +93,12 @@ static inline int dup_mmap(struct mm_struct * mm) mm->mmap = NULL; p = &mm->mmap; + flush_cache_mm(current->mm); for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { - tmp = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); + tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!tmp) { exit_mmap(mm); + flush_tlb_mm(current->mm); return -ENOMEM; } *tmp = *mpnt; @@ -100,6 +114,7 @@ static inline int dup_mmap(struct mm_struct * mm) } if (copy_page_range(mm, current->mm, tmp)) { exit_mmap(mm); + flush_tlb_mm(current->mm); return -ENOMEM; } if (tmp->vm_ops && tmp->vm_ops->open) @@ -107,6 +122,7 @@ static inline int dup_mmap(struct mm_struct * mm) *p = tmp; p = &tmp->vm_next; } + flush_tlb_mm(current->mm); build_mmap_avl(mm); return 0; } @@ -118,9 +134,7 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk) if (!mm) return -1; *mm = *current->mm; -#ifdef __mips__ - mm->context = 0; -#endif + init_new_context(mm); mm->count = 1; mm->def_flags = 0; tsk->mm = mm; @@ -128,15 +142,17 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk) tsk->cmin_flt = tsk->cmaj_flt = 0; tsk->nswap = tsk->cnswap = 0; if (new_page_tables(tsk)) - return -1; + goto free_mm; if (dup_mmap(mm)) { free_page_tables(mm); +free_mm: + kfree(mm); return -1; } return 0; } - SET_PAGE_DIR(tsk, current->mm->pgd); current->mm->count++; + SET_PAGE_DIR(tsk, current->mm->pgd); return 0; } @@ -174,7 +190,7 @@ static inline int copy_files(unsigned long clone_flags, struct task_struct * tsk tsk->files = newf; if (!newf) return -1; - + newf->count = 1; newf->close_on_exec = oldf->close_on_exec; newf->open_fds = oldf->open_fds; @@ -195,13 +211,14 @@ static inline int copy_files(unsigned long clone_flags, struct task_struct * tsk static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) { if (clone_flags & CLONE_SIGHAND) { - current->sig->count++; + atomic_inc(¤t->sig->count); return 0; } tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL); if (!tsk->sig) return -1; - tsk->sig->count = 1; + spin_lock_init(&tsk->sig->siglock); + atomic_set(&tsk->sig->count, 1); memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action)); return 0; } @@ -218,10 +235,11 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) unsigned long new_stack; struct task_struct *p; - p = (struct task_struct *) kmalloc(sizeof(*p), GFP_KERNEL); + lock_kernel(); + p = alloc_task_struct(); if (!p) goto bad_fork; - new_stack = alloc_kernel_stack(); + new_stack = alloc_kernel_stack(p); if (!new_stack) goto bad_fork_free_p; error = -EAGAIN; @@ -231,10 +249,10 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) *p = *current; - if (p->exec_domain && p->exec_domain->use_count) - (*p->exec_domain->use_count)++; - if (p->binfmt && p->binfmt->use_count) - (*p->binfmt->use_count)++; + if (p->exec_domain && p->exec_domain->module) + __MOD_INC_USE_COUNT(p->exec_domain->module); + if (p->binfmt && p->binfmt->module) + __MOD_INC_USE_COUNT(p->binfmt->module); p->did_exec = 0; p->swappable = 0; @@ -260,8 +278,8 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) p->cutime = p->cstime = 0; #ifdef __SMP__ p->processor = NO_PROC_ID; - p->lock_depth = 1; #endif + p->lock_depth = 0; p->start_time = jiffies; task[nr] = p; SET_LINKS(p); @@ -277,16 +295,33 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) goto bad_fork_cleanup_fs; if (copy_mm(clone_flags, p)) goto bad_fork_cleanup_sighand; - copy_thread(nr, clone_flags, usp, p, regs); + error = copy_thread(nr, clone_flags, usp, p, regs); + if (error) + goto bad_fork_cleanup_sighand; p->semundo = NULL; /* ok, now we should be set up.. */ p->swappable = 1; p->exit_signal = clone_flags & CSIGNAL; - p->counter = current->counter >> 1; - wake_up_process(p); /* do this last, just in case */ + + /* + * "share" dynamic priority between parent and child, thus the + * total amount of dynamic priorities in the system doesnt change, + * more scheduling fairness. This is only important in the first + * timeslice, on the long run the scheduling behaviour is unchanged. + */ + current->counter >>= 1; + p->counter = current->counter; + + if(p->pid) { + wake_up_process(p); /* do this last, just in case */ + } else { + p->state = TASK_RUNNING; + p->next_run = p->prev_run = p; + } ++total_forks; - return p->pid; + error = p->pid; + goto fork_out; bad_fork_cleanup_sighand: exit_sighand(p); @@ -295,17 +330,19 @@ bad_fork_cleanup_fs: bad_fork_cleanup_files: exit_files(p); bad_fork_cleanup: - if (p->exec_domain && p->exec_domain->use_count) - (*p->exec_domain->use_count)--; - if (p->binfmt && p->binfmt->use_count) - (*p->binfmt->use_count)--; + if (p->exec_domain && p->exec_domain->module) + __MOD_DEC_USE_COUNT(p->exec_domain->module); + if (p->binfmt && p->binfmt->module) + __MOD_DEC_USE_COUNT(p->binfmt->module); task[nr] = NULL; REMOVE_LINKS(p); nr_tasks--; bad_fork_free_stack: free_kernel_stack(new_stack); bad_fork_free_p: - kfree(p); + free_task_struct(p); bad_fork: +fork_out: + unlock_kernel(); return error; } diff --git a/kernel/info.c b/kernel/info.c index 20b6ad6ae..ffaec7140 100644 --- a/kernel/info.c +++ b/kernel/info.c @@ -12,6 +12,8 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/swap.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -21,6 +23,7 @@ asmlinkage int sys_sysinfo(struct sysinfo *info) memset((char *)&val, 0, sizeof(struct sysinfo)); + cli(); val.uptime = jiffies / HZ; val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); @@ -28,6 +31,7 @@ asmlinkage int sys_sysinfo(struct sysinfo *info) val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT); val.procs = nr_tasks-1; + sti(); si_meminfo(&val); si_swapinfo(&val); diff --git a/kernel/itimer.c b/kernel/itimer.c index efcc8351b..479f660a0 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -12,6 +12,8 @@ #include <linux/errno.h> #include <linux/time.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -41,7 +43,6 @@ static void jiffiestotv(unsigned long jiffies, struct timeval *value) { value->tv_usec = (jiffies % HZ) * (1000000 / HZ); value->tv_sec = jiffies / HZ; - return; } static int _getitimer(int which, struct itimerval *value) @@ -78,17 +79,19 @@ static int _getitimer(int which, struct itimerval *value) return 0; } +/* SMP: Only we modify our itimer values. */ asmlinkage int sys_getitimer(int which, struct itimerval *value) { - int error; + int error = -EFAULT; struct itimerval get_buffer; - if (!value) - return -EFAULT; - error = _getitimer(which, &get_buffer); - if (error) - return error; - return copy_to_user(value, &get_buffer, sizeof(get_buffer)) ? -EFAULT : 0; + if (value) { + error = _getitimer(which, &get_buffer); + if (!error) + error = copy_to_user(value, &get_buffer, sizeof(get_buffer)) + ? -EFAULT : 0; + } + return error; } void it_real_fn(unsigned long __data) @@ -149,17 +152,18 @@ int _setitimer(int which, struct itimerval *value, struct itimerval *ovalue) return 0; } +/* SMP: Again, only we play with our itimers, and signals are SMP safe + * now so that is not an issue at all anymore. + */ asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) { - int error; struct itimerval set_buffer, get_buffer; + int error; if (value) { - error = verify_area(VERIFY_READ, value, sizeof(*value)); - if (error) - return error; - error = copy_from_user(&set_buffer, value, sizeof(set_buffer)); - if (error) + if(verify_area(VERIFY_READ, value, sizeof(*value))) + return -EFAULT; + if(copy_from_user(&set_buffer, value, sizeof(set_buffer))) return -EFAULT; } else memset((char *) &set_buffer, 0, sizeof(set_buffer)); @@ -169,6 +173,6 @@ asmlinkage int sys_setitimer(int which, struct itimerval *value, struct itimerva return error; if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer))) - error = -EFAULT; - return error; + return -EFAULT; + return 0; } diff --git a/kernel/ksyms.c b/kernel/ksyms.c index b694cd6d2..f5f202c8e 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -1,4 +1,4 @@ -/* +/* * Herein lies all the functions/variables that are "exported" for linkage * with dynamically loaded kernel modules. * Jon. @@ -6,13 +6,12 @@ * - Stacked module support and unified symbol table added (June 1994) * - External symbol table support added (December 1994) * - Versions on symbols added (December 1994) - * by Bjorn Ekwall <bj0rn@blox.se> + * by Bjorn Ekwall <bj0rn@blox.se> */ -#include <linux/module.h> #include <linux/config.h> +#include <linux/module.h> #include <linux/kernel.h> -#include <linux/smp.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/cdrom.h> @@ -21,6 +20,7 @@ #include <linux/kernel_stat.h> #include <linux/mm.h> #include <linux/malloc.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/ptrace.h> #include <linux/sys.h> @@ -41,6 +41,7 @@ #include <linux/minix_fs.h> #include <linux/ext2_fs.h> #include <linux/random.h> +#include <linux/reboot.h> #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/sysctl.h> @@ -49,12 +50,14 @@ #include <linux/genhd.h> #include <linux/swap.h> #include <linux/ctype.h> +#include <linux/file.h> -extern unsigned char aux_device_present, kbd_read_mask; -#ifdef __i386__ - extern struct drive_info_struct drive_info; +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +extern struct drive_info_struct drive_info; #endif +extern unsigned char aux_device_present, kbd_read_mask; + #ifdef CONFIG_PCI #include <linux/bios32.h> #include <linux/pci.h> @@ -73,7 +76,10 @@ extern unsigned char aux_device_present, kbd_read_mask; extern char *get_options(char *str, int *ints); extern void set_device_ro(int dev,int flag); extern struct file_operations * get_blkfops(unsigned int); -extern void blkdev_release(struct inode * inode); +extern int blkdev_release(struct inode * inode); +#if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE) +extern int (*do_nfsservctl)(int, void *, void *); +#endif extern void *sys_call_table; @@ -81,293 +87,299 @@ extern int sys_tz; extern int request_dma(unsigned int dmanr, char * deviceID); extern void free_dma(unsigned int dmanr); -extern void hard_reset_now(void); - -struct symbol_table symbol_table = { -#include <linux/symtab_begin.h> #ifdef MODVERSIONS - { (void *)1 /* Version version :-) */, - SYMBOL_NAME_STR (Using_Versions) }, +const struct module_symbol __export_Using_Versions +__attribute__((section("__ksymtab"))) = { + 1 /* Version version */, "Using_Versions" +}; #endif - /* stackable module support */ - X(register_symtab_from), #ifdef CONFIG_KERNELD - X(kerneld_send), +EXPORT_SYMBOL(kerneld_send); #endif - X(get_options), +EXPORT_SYMBOL(get_options); #ifdef CONFIG_PCI - /* PCI BIOS support */ - X(pcibios_present), - X(pcibios_find_class), - X(pcibios_find_device), - X(pcibios_read_config_byte), - X(pcibios_read_config_word), - X(pcibios_read_config_dword), - X(pcibios_strerror), - X(pcibios_write_config_byte), - X(pcibios_write_config_word), - X(pcibios_write_config_dword), +/* PCI BIOS support */ +EXPORT_SYMBOL(pcibios_present); +EXPORT_SYMBOL(pcibios_find_class); +EXPORT_SYMBOL(pcibios_find_device); +EXPORT_SYMBOL(pcibios_read_config_byte); +EXPORT_SYMBOL(pcibios_read_config_word); +EXPORT_SYMBOL(pcibios_read_config_dword); +EXPORT_SYMBOL(pcibios_write_config_byte); +EXPORT_SYMBOL(pcibios_write_config_word); +EXPORT_SYMBOL(pcibios_write_config_dword); +EXPORT_SYMBOL(pcibios_strerror); +EXPORT_SYMBOL(pci_strvendor); +EXPORT_SYMBOL(pci_strdev); #endif - /* process memory management */ - X(do_mmap), - X(do_munmap), - X(exit_mm), - - /* internal kernel memory management */ - X(__get_free_pages), - X(free_pages), - X(kmalloc), - X(kfree), - X(vmalloc), - X(vfree), - X(mem_map), - X(remap_page_range), - X(max_mapnr), - X(high_memory), - X(update_vm_cache), - - /* filesystem internal functions */ - X(getname), - X(putname), - X(__iget), - X(iput), - X(namei), - X(lnamei), - X(open_namei), - X(sys_close), - X(close_fp), - X(check_disk_change), - X(invalidate_buffers), - X(invalidate_inodes), - X(invalidate_inode_pages), - X(fsync_dev), - X(permission), - X(inode_setattr), - X(inode_change_ok), - X(set_blocksize), - X(getblk), - X(bread), - X(breada), - X(__brelse), - X(__bforget), - X(ll_rw_block), - X(__wait_on_buffer), - X(mark_buffer_uptodate), - X(unlock_buffer), - X(dcache_lookup), - X(dcache_add), - X(add_blkdev_randomness), - X(generic_file_read), - X(generic_file_mmap), - X(generic_readpage), - - /* device registration */ - X(register_chrdev), - X(unregister_chrdev), - X(register_blkdev), - X(unregister_blkdev), - X(tty_register_driver), - X(tty_unregister_driver), - X(tty_std_termios), - -#if defined(CONFIG_BLK_DEV_IDECD) || \ - defined(CONFIG_BLK_DEV_SR) || \ - defined(CONFIG_CM206) - X(register_cdrom), - X(unregister_cdrom), - X(cdrom_fops), +/* process memory management */ +EXPORT_SYMBOL(do_mmap); +EXPORT_SYMBOL(do_munmap); +EXPORT_SYMBOL(exit_mm); +EXPORT_SYMBOL(exit_files); + +/* internal kernel memory management */ +EXPORT_SYMBOL(__get_free_pages); +EXPORT_SYMBOL(free_pages); +EXPORT_SYMBOL(kmalloc); +EXPORT_SYMBOL(kfree); +EXPORT_SYMBOL(vmalloc); +EXPORT_SYMBOL(vfree); +EXPORT_SYMBOL(mem_map); +EXPORT_SYMBOL(remap_page_range); +EXPORT_SYMBOL(max_mapnr); +EXPORT_SYMBOL(num_physpages); +EXPORT_SYMBOL(high_memory); +EXPORT_SYMBOL(update_vm_cache); +EXPORT_SYMBOL(kmem_cache_create); +EXPORT_SYMBOL(kmem_cache_destroy); +EXPORT_SYMBOL(kmem_cache_alloc); +EXPORT_SYMBOL(kmem_cache_free); + +/* filesystem internal functions */ +EXPORT_SYMBOL(getname); +EXPORT_SYMBOL(putname); +EXPORT_SYMBOL(__fput); +EXPORT_SYMBOL(__iget); +EXPORT_SYMBOL(iput); +EXPORT_SYMBOL(namei); +EXPORT_SYMBOL(lnamei); +EXPORT_SYMBOL(open_namei); +EXPORT_SYMBOL(sys_close); +EXPORT_SYMBOL(close_fp); +EXPORT_SYMBOL(check_disk_change); +EXPORT_SYMBOL(invalidate_buffers); +EXPORT_SYMBOL(invalidate_inodes); +EXPORT_SYMBOL(invalidate_inode_pages); +EXPORT_SYMBOL(fsync_dev); +EXPORT_SYMBOL(permission); +EXPORT_SYMBOL(inode_setattr); +EXPORT_SYMBOL(inode_change_ok); +EXPORT_SYMBOL(get_hardblocksize); +EXPORT_SYMBOL(set_blocksize); +EXPORT_SYMBOL(getblk); +EXPORT_SYMBOL(bread); +EXPORT_SYMBOL(breada); +EXPORT_SYMBOL(__brelse); +EXPORT_SYMBOL(__bforget); +EXPORT_SYMBOL(ll_rw_block); +EXPORT_SYMBOL(__wait_on_buffer); +EXPORT_SYMBOL(mark_buffer_uptodate); +EXPORT_SYMBOL(unlock_buffer); +EXPORT_SYMBOL(dcache_lookup); +EXPORT_SYMBOL(dcache_add); +EXPORT_SYMBOL(add_blkdev_randomness); +EXPORT_SYMBOL(generic_file_read); +EXPORT_SYMBOL(generic_file_write); +EXPORT_SYMBOL(generic_file_mmap); +EXPORT_SYMBOL(generic_readpage); +EXPORT_SYMBOL(file_lock_table); +EXPORT_SYMBOL(posix_lock_file); +EXPORT_SYMBOL(posix_test_lock); +EXPORT_SYMBOL(posix_block_lock); +EXPORT_SYMBOL(posix_unblock_lock); + +#if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE) +EXPORT_SYMBOL(do_nfsservctl); #endif - - /* block device driver support */ - X(block_read), - X(block_write), - X(block_fsync), - X(wait_for_request), - X(blksize_size), - X(hardsect_size), - X(blk_size), - X(blk_dev), - X(is_read_only), - X(set_device_ro), - X(bmap), - X(sync_dev), - X(get_blkfops), - X(blkdev_open), - X(blkdev_release), - X(gendisk_head), - X(resetup_one_dev), - X(unplug_device), -#ifdef __i386__ - X(drive_info), + +/* device registration */ +EXPORT_SYMBOL(register_chrdev); +EXPORT_SYMBOL(unregister_chrdev); +EXPORT_SYMBOL(register_blkdev); +EXPORT_SYMBOL(unregister_blkdev); +EXPORT_SYMBOL(tty_register_driver); +EXPORT_SYMBOL(tty_unregister_driver); +EXPORT_SYMBOL(tty_std_termios); + +/* block device driver support */ +EXPORT_SYMBOL(block_read); +EXPORT_SYMBOL(block_write); +EXPORT_SYMBOL(block_fsync); +EXPORT_SYMBOL(wait_for_request); +EXPORT_SYMBOL(blksize_size); +EXPORT_SYMBOL(hardsect_size); +EXPORT_SYMBOL(blk_size); +EXPORT_SYMBOL(blk_dev); +EXPORT_SYMBOL(is_read_only); +EXPORT_SYMBOL(set_device_ro); +EXPORT_SYMBOL(bmap); +EXPORT_SYMBOL(sync_dev); +EXPORT_SYMBOL(get_blkfops); +EXPORT_SYMBOL(blkdev_open); +EXPORT_SYMBOL(blkdev_release); +EXPORT_SYMBOL(gendisk_head); +EXPORT_SYMBOL(resetup_one_dev); +EXPORT_SYMBOL(unplug_device); + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +EXPORT_SYMBOL(drive_info); #endif -#ifdef CONFIG_SERIAL - /* Module creation of serial units */ - X(register_serial), - X(unregister_serial), +/* tty routines */ +EXPORT_SYMBOL(tty_hangup); +EXPORT_SYMBOL(tty_wait_until_sent); +EXPORT_SYMBOL(tty_check_change); +EXPORT_SYMBOL(tty_hung_up_p); +EXPORT_SYMBOL(do_SAK); +EXPORT_SYMBOL(console_print); + +/* filesystem registration */ +EXPORT_SYMBOL(register_filesystem); +EXPORT_SYMBOL(unregister_filesystem); + +/* executable format registration */ +EXPORT_SYMBOL(register_binfmt); +EXPORT_SYMBOL(unregister_binfmt); +EXPORT_SYMBOL(search_binary_handler); +EXPORT_SYMBOL(prepare_binprm); +EXPORT_SYMBOL(remove_arg_zero); + +/* execution environment registration */ +EXPORT_SYMBOL(lookup_exec_domain); +EXPORT_SYMBOL(register_exec_domain); +EXPORT_SYMBOL(unregister_exec_domain); + +/* sysctl table registration */ +EXPORT_SYMBOL(register_sysctl_table); +EXPORT_SYMBOL(unregister_sysctl_table); +EXPORT_SYMBOL(sysctl_string); +EXPORT_SYMBOL(sysctl_intvec); +EXPORT_SYMBOL(proc_dostring); +EXPORT_SYMBOL(proc_dointvec); +EXPORT_SYMBOL(proc_dointvec_minmax); + +/* interrupt handling */ +EXPORT_SYMBOL(request_irq); +EXPORT_SYMBOL(free_irq); +EXPORT_SYMBOL(enable_irq); +EXPORT_SYMBOL(disable_irq); +EXPORT_SYMBOL(probe_irq_on); +EXPORT_SYMBOL(probe_irq_off); +EXPORT_SYMBOL(bh_active); +EXPORT_SYMBOL(bh_mask); +EXPORT_SYMBOL(bh_mask_count); +EXPORT_SYMBOL(bh_base); +EXPORT_SYMBOL(add_timer); +EXPORT_SYMBOL(del_timer); +EXPORT_SYMBOL(tq_timer); +EXPORT_SYMBOL(tq_immediate); +EXPORT_SYMBOL(tq_scheduler); +EXPORT_SYMBOL(timer_active); +EXPORT_SYMBOL(timer_table); + +#ifdef __SMP__ +/* Various random spinlocks we want to export */ +EXPORT_SYMBOL(tqueue_lock); +EXPORT_SYMBOL(waitqueue_lock); #endif - /* tty routines */ - X(tty_hangup), - X(tty_wait_until_sent), - X(tty_check_change), - X(tty_hung_up_p), - X(do_SAK), - X(console_print), - - /* filesystem registration */ - X(register_filesystem), - X(unregister_filesystem), - - /* executable format registration */ - X(register_binfmt), - X(unregister_binfmt), - X(search_binary_handler), - X(prepare_binprm), - X(remove_arg_zero), - - /* execution environment registration */ - X(lookup_exec_domain), - X(register_exec_domain), - X(unregister_exec_domain), - - /* sysctl table registration */ - X(register_sysctl_table), - X(unregister_sysctl_table), - X(sysctl_string), - X(sysctl_intvec), - X(proc_dostring), - X(proc_dointvec), - X(proc_dointvec_minmax), - - /* interrupt handling */ - X(request_irq), - X(free_irq), - X(enable_irq), - X(disable_irq), - X(probe_irq_on), - X(probe_irq_off), - X(bh_active), - X(bh_mask), - X(bh_mask_count), - X(bh_base), - X(add_timer), - X(del_timer), - X(tq_timer), - X(tq_immediate), - X(tq_scheduler), - X(timer_active), - X(timer_table), - X(intr_count), - - /* autoirq from drivers/net/auto_irq.c */ - X(autoirq_setup), - X(autoirq_report), - - /* dma handling */ - X(request_dma), - X(free_dma), + +/* autoirq from drivers/net/auto_irq.c */ +EXPORT_SYMBOL(autoirq_setup); +EXPORT_SYMBOL(autoirq_report); + +/* dma handling */ +EXPORT_SYMBOL(request_dma); +EXPORT_SYMBOL(free_dma); #ifdef HAVE_DISABLE_HLT - X(disable_hlt), - X(enable_hlt), +EXPORT_SYMBOL(disable_hlt); +EXPORT_SYMBOL(enable_hlt); #endif - /* IO port handling */ - X(check_region), - X(request_region), - X(release_region), - - /* process management */ - X(wake_up), - X(wake_up_interruptible), - X(sleep_on), - X(interruptible_sleep_on), - X(schedule), - X(current_set), - X(jiffies), - X(xtime), - X(do_gettimeofday), - X(loops_per_sec), - X(need_resched), - X(kstat), - X(kill_proc), - X(kill_pg), - X(kill_sl), - - /* misc */ - X(panic), - X(printk), - X(sprintf), - X(vsprintf), - X(kdevname), - X(simple_strtoul), - X(system_utsname), - X(sys_call_table), - X(hard_reset_now), - X(_ctype), - X(secure_tcp_sequence_number), - - /* Signal interfaces */ - X(send_sig), - - /* Program loader interfaces */ - X(setup_arg_pages), - X(copy_strings), - X(do_execve), - X(flush_old_exec), - X(open_inode), - X(read_exec), - - /* Miscellaneous access points */ - X(si_meminfo), - - /* Added to make file system as module */ - X(set_writetime), - X(sys_tz), - X(__wait_on_super), - X(file_fsync), - X(clear_inode), - X(refile_buffer), - X(nr_async_pages), - X(___strtok), - X(init_fifo), - X(super_blocks), - X(reuse_list), - X(fifo_inode_operations), - X(chrdev_inode_operations), - X(blkdev_inode_operations), - X(read_ahead), - X(get_hash_table), - X(get_empty_inode), - X(insert_inode_hash), - X(event), - X(__down), - X(__up), - X(securelevel), +/* IO port handling */ +EXPORT_SYMBOL(check_region); +EXPORT_SYMBOL(request_region); +EXPORT_SYMBOL(release_region); + +/* process management */ +EXPORT_SYMBOL(wake_up); +EXPORT_SYMBOL(wake_up_interruptible); +EXPORT_SYMBOL(sleep_on); +EXPORT_SYMBOL(interruptible_sleep_on); +EXPORT_SYMBOL(schedule); +EXPORT_SYMBOL(current_set); +EXPORT_SYMBOL(jiffies); +EXPORT_SYMBOL(xtime); +EXPORT_SYMBOL(do_gettimeofday); +EXPORT_SYMBOL(loops_per_sec); +EXPORT_SYMBOL(need_resched); +EXPORT_SYMBOL(kstat); +EXPORT_SYMBOL(kill_proc); +EXPORT_SYMBOL(kill_pg); +EXPORT_SYMBOL(kill_sl); + +/* misc */ +EXPORT_SYMBOL(panic); +EXPORT_SYMBOL(printk); +EXPORT_SYMBOL(sprintf); +EXPORT_SYMBOL(vsprintf); +EXPORT_SYMBOL(kdevname); +EXPORT_SYMBOL(simple_strtoul); +EXPORT_SYMBOL(system_utsname); +EXPORT_SYMBOL(sys_call_table); +EXPORT_SYMBOL(machine_restart); +EXPORT_SYMBOL(machine_halt); +EXPORT_SYMBOL(machine_power_off); +EXPORT_SYMBOL(register_reboot_notifier); +EXPORT_SYMBOL(unregister_reboot_notifier); +EXPORT_SYMBOL(_ctype); +EXPORT_SYMBOL(secure_tcp_sequence_number); +EXPORT_SYMBOL(get_random_bytes); + +/* Signal interfaces */ +EXPORT_SYMBOL(send_sig); + +/* Program loader interfaces */ +EXPORT_SYMBOL(setup_arg_pages); +EXPORT_SYMBOL(copy_strings); +EXPORT_SYMBOL(do_execve); +EXPORT_SYMBOL(flush_old_exec); +EXPORT_SYMBOL(open_inode); +EXPORT_SYMBOL(read_exec); + +/* Miscellaneous access points */ +EXPORT_SYMBOL(si_meminfo); + +/* Added to make file system as module */ +EXPORT_SYMBOL(set_writetime); +EXPORT_SYMBOL(sys_tz); +EXPORT_SYMBOL(__wait_on_super); +EXPORT_SYMBOL(file_fsync); +EXPORT_SYMBOL(clear_inode); +EXPORT_SYMBOL(refile_buffer); +EXPORT_SYMBOL(nr_async_pages); +EXPORT_SYMBOL(___strtok); +EXPORT_SYMBOL(init_fifo); +EXPORT_SYMBOL(super_blocks); +EXPORT_SYMBOL(fifo_inode_operations); +EXPORT_SYMBOL(chrdev_inode_operations); +EXPORT_SYMBOL(blkdev_inode_operations); +EXPORT_SYMBOL(read_ahead); +EXPORT_SYMBOL(get_hash_table); +EXPORT_SYMBOL(get_empty_inode); +EXPORT_SYMBOL(insert_inode_hash); +EXPORT_SYMBOL(event); +EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(securelevel); + /* all busmice */ - X(add_mouse_randomness), - X(fasync_helper), +EXPORT_SYMBOL(add_mouse_randomness); +EXPORT_SYMBOL(fasync_helper); + /* psaux mouse */ - X(aux_device_present), - X(kbd_read_mask), +EXPORT_SYMBOL(aux_device_present); +#ifdef CONFIG_VT +EXPORT_SYMBOL(kbd_read_mask); +#endif #ifdef CONFIG_BLK_DEV_MD - X(disk_name), /* for md.c */ +EXPORT_SYMBOL(disk_name); /* for md.c */ #endif - /* binfmt_aout */ - X(get_write_access), - X(put_write_access), - - /******************************************************** - * Do not add anything below this line, - * as the stacked modules depend on this! - */ -#include <linux/symtab_end.h> -}; - -/* -int symbol_table_size = sizeof (symbol_table) / sizeof (symbol_table[0]); -*/ +/* binfmt_aout */ +EXPORT_SYMBOL(get_write_access); +EXPORT_SYMBOL(put_write_access); diff --git a/kernel/module.c b/kernel/module.c index 09cee93b7..885539b5c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -4,789 +4,963 @@ #include <linux/string.h> #include <linux/module.h> #include <linux/sched.h> -#include <linux/malloc.h> -#include <linux/vmalloc.h> #include <linux/config.h> - #include <asm/uaccess.h> +#include <linux/vmalloc.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> + /* * Originally by Anonymous (as far as I know...) * Linux version by Bas Laarhoven <bas@vimec.nl> * 0.99.14 version by Jon Tombs <jon@gtex02.us.es>, - * * Heavily modified by Bjorn Ekwall <bj0rn@blox.se> May 1994 (C) - * This source is covered by the GNU GPL, the same as all kernel sources. - * - * Features: - * - Supports stacked modules (removable only of there are no dependents). - * - Supports table of symbols defined by the modules. - * - Supports /proc/ksyms, showing value, name and owner of all - * the symbols defined by all modules (in stack order). - * - Added module dependencies information into /proc/modules - * - Supports redefines of all symbols, for streams-like behaviour. - * - Compatible with older versions of insmod. - * - * New addition in December 1994: (Bjorn Ekwall, idea from Jacques Gelinas) - * - Externally callable function: - * - * "int register_symtab(struct symbol_table *)" + * Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996 * - * This function can be called from within the kernel, - * and ALSO from loadable modules. - * The goal is to assist in modularizing the kernel even more, - * and finally: reducing the number of entries in ksyms.c - * since every subsystem should now be able to decide and - * control exactly what symbols it wants to export, locally! - * - * On 1-Aug-95: <Matti.Aarnio@utu.fi> altered code to use same style as - * do /proc/net/XXX "files". Namely allow more than 4kB - * (or what the block size is) output. - * - * - Use dummy syscall functions for users who disable all - * module support. Similar to kernel/sys.c (Paul Gortmaker) + * This source is covered by the GNU GPL, the same as all kernel sources. */ #ifdef CONFIG_MODULES /* a *big* #ifdef block... */ -static struct module kernel_module; -struct module *module_list = &kernel_module; +extern struct module_symbol __start___ksymtab[]; +extern struct module_symbol __stop___ksymtab[]; -static int freeing_modules; /* true if some modules are marked for deletion */ +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; -static struct module *find_module( const char *name); -static int free_modules( void); +static struct module kernel_module = +{ + sizeof(struct module), /* size_of_struct */ + NULL, /* next */ + "", /* name */ + 0, /* size */ + 1, /* usecount */ + MOD_RUNNING, /* flags */ + 0, /* nsyms -- to filled in in init_modules */ + 0, /* ndeps */ + __start___ksymtab, /* syms */ + NULL, /* deps */ + NULL, /* refs */ + NULL, /* init */ + NULL, /* cleanup */ + __start___ex_table, /* ex_table_start */ + __stop___ex_table, /* ex_table_end */ + /* Rest are NULL */ +}; + +struct module *module_list = &kernel_module; + +static long get_mod_name(const char *user_name, char **buf); +static void put_mod_name(char *buf); +static struct module *find_module(const char *name); +static void free_module(struct module *); -extern struct symbol_table symbol_table; /* in kernel/ksyms.c */ /* * Called at boot time */ -void init_modules(void) { - struct internal_symbol *sym; - int i; - for (i = 0, sym = symbol_table.symbol; sym->name; ++sym, ++i) - ; - symbol_table.n_symbols = i; +void init_modules(void) +{ + kernel_module.nsyms = __stop___ksymtab - __start___ksymtab; - kernel_module.symtab = &symbol_table; - kernel_module.state = MOD_RUNNING; /* Hah! */ - kernel_module.name = ""; +#ifdef __alpha__ + __asm__("stq $29,%0" : "=m"(kernel_module.gp)); +#endif } - /* * Copy the name of a module from user space. */ -inline int -get_mod_name(char *user_name, char *buf) + +static inline long +get_mod_name(const char *user_name, char **buf) { - /* Should return -EBIG instead of -EFAULT when the name - is too long, but that we couldn't detect real faults then. - Maybe strncpy_from_user() should return -EBIG, when - the source string is too long. */ - return strncpy_from_user(buf, user_name, MOD_MAX_NAME); + unsigned long page; + long retval; + + if ((unsigned long)user_name >= TASK_SIZE) + return -EFAULT; + + page = __get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + retval = strncpy_from_user((char *)page, user_name, PAGE_SIZE); + if (retval > 0) { + if (retval < PAGE_SIZE) { + *buf = (char *)page; + return retval; + } + retval = -ENAMETOOLONG; + } else if (!retval) + retval = -EINVAL; + + free_page(page); + return retval; } +static inline void +put_mod_name(char *buf) +{ + free_page((unsigned long)buf); +} /* * Allocate space for a module. */ + asmlinkage unsigned long -sys_create_module(char *module_name, unsigned long size) +sys_create_module(const char *name_user, size_t size) { - struct module *mp; - void* addr; - int error; - int npages; - int sspace = sizeof(struct module) + MOD_MAX_NAME; - char name[MOD_MAX_NAME]; - - if (!suser()) - return -EPERM; - if (module_name == NULL || size == 0) - return -EINVAL; - if ((error = get_mod_name(module_name, name)) < 0) - return error; + char *name; + long namelen, error; + struct module *mod; + + lock_kernel(); + if (!suser()) { + error = -EPERM; + goto err0; + } + if ((namelen = get_mod_name(name_user, &name)) < 0) { + error = namelen; + goto err0; + } + if (size < sizeof(struct module)+namelen) { + error = -EINVAL; + goto err1; + } if (find_module(name) != NULL) { - return -EEXIST; + error = -EEXIST; + goto err1; } - - if ((mp = (struct module*) kmalloc(sspace, GFP_KERNEL)) == NULL) { - return -ENOMEM; + if ((mod = (struct module *)vmalloc(size)) == NULL) { + error = -ENOMEM; + goto err1; } - strcpy((char *)(mp + 1), name); /* why not? */ - npages = (size + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE; - if ((addr = vmalloc(npages * PAGE_SIZE)) == 0) { - kfree_s(mp, sspace); - return -ENOMEM; - } + memset(mod, 0, sizeof(*mod)); + mod->size_of_struct = sizeof(*mod); + mod->next = module_list; + mod->name = (char *)(mod + 1); + mod->size = size; + memcpy((char*)(mod+1), name, namelen+1); - mp->next = module_list; - mp->ref = NULL; - mp->symtab = NULL; - mp->name = (char *)(mp + 1); - mp->size = npages; - mp->addr = addr; - mp->state = MOD_UNINITIALIZED; - mp->cleanup = NULL; - mp->exceptinfo.start = NULL; - mp->exceptinfo.stop = NULL; + put_mod_name(name); - * (long *) addr = 0; /* set use count to zero */ - module_list = mp; /* link it in */ + module_list = mod; /* link it in */ - pr_debug("module `%s' (%lu pages @ 0x%08lx) created\n", - mp->name, (unsigned long) mp->size, (unsigned long) mp->addr); - return (unsigned long) addr; + error = (long) mod; + goto err0; +err1: + put_mod_name(name); +err0: + unlock_kernel(); + return error; } - /* * Initialize a module. */ + asmlinkage int -sys_init_module(char *module_name, char *code, unsigned codesize, - struct mod_routines *routines, - struct symbol_table *symtab) +sys_init_module(const char *name_user, struct module *mod_user) { - struct module *mp; - struct symbol_table *newtab; - char name[MOD_MAX_NAME]; - int error; - struct mod_routines rt; + struct module mod_tmp, *mod; + char *name, *n_name; + long namelen, n_namelen, i, error = -EPERM; + unsigned long mod_user_size; + struct module_ref *dep; + lock_kernel(); if (!suser()) - return -EPERM; + goto err0; + if ((namelen = get_mod_name(name_user, &name)) < 0) { + error = namelen; + goto err0; + } + if ((mod = find_module(name)) == NULL) { + error = -ENOENT; + goto err1; + } + + /* Check module header size. We allow a bit of slop over the + size we are familiar with to cope with a version of insmod + for a newer kernel. But don't over do it. */ + if ((error = get_user(mod_user_size, &mod_user->size_of_struct)) != 0) + goto err1; + if (mod_user_size < (unsigned long)&((struct module *)0L)->persist_start + || mod_user_size > sizeof(struct module) + 16*sizeof(void*)) { + printk(KERN_ERR "init_module: Invalid module header size.\n" + KERN_ERR "A new version of the modutils is likely " + "needed.\n"); + error = -EINVAL; + goto err1; + } + + /* Hold the current contents while we play with the user's idea + of righteousness. */ + mod_tmp = *mod; + + error = copy_from_user(mod, mod_user, sizeof(struct module)); + if (error) { + error = -EFAULT; + goto err2; + } + + /* Sanity check the size of the module. */ + error = -EINVAL; + + if (mod->size > mod_tmp.size) { + printk(KERN_ERR "init_module: Size of initialized module " + "exceeds size of created module.\n"); + goto err2; + } + + /* Make sure all interesting pointers are sane. */ -#ifdef __i386__ - /* A little bit of protection... we "know" where the user stack is... */ +#define bound(p, n, m) ((unsigned long)(p) >= (unsigned long)(m+1) && \ + (unsigned long)((p)+(n)) <= (unsigned long)(m) + (m)->size) - if (symtab && ((unsigned long)symtab > 0xb0000000)) { - printk(KERN_WARNING "warning: you are using an old insmod, no symbols will be inserted!\n"); - symtab = NULL; + if (!bound(mod->name, namelen, mod)) { + printk(KERN_ERR "init_module: mod->name out of bounds.\n"); + goto err2; + } + if (mod->nsyms && !bound(mod->syms, mod->nsyms, mod)) { + printk(KERN_ERR "init_module: mod->syms out of bounds.\n"); + goto err2; + } + if (mod->ndeps && !bound(mod->deps, mod->ndeps, mod)) { + printk(KERN_ERR "init_module: mod->deps out of bounds.\n"); + goto err2; + } + if (mod->init && !bound(mod->init, 0, mod)) { + printk(KERN_ERR "init_module: mod->init out of bounds.\n"); + goto err2; + } + if (mod->cleanup && !bound(mod->cleanup, 0, mod)) { + printk(KERN_ERR "init_module: mod->cleanup out of bounds.\n"); + goto err2; + } + if (mod->ex_table_start > mod->ex_table_end + || (mod->ex_table_start && + !((unsigned long)mod->ex_table_start >= (unsigned long)(mod+1) + && ((unsigned long)mod->ex_table_end + < (unsigned long)mod + mod->size))) + || (((unsigned long)mod->ex_table_start + - (unsigned long)mod->ex_table_end) + % sizeof(struct exception_table_entry))) { + printk(KERN_ERR "init_module: mod->ex_table_* invalid.\n"); + goto err2; + } + if (mod->flags & ~MOD_AUTOCLEAN) { + printk(KERN_ERR "init_module: mod->flags invalid.\n"); + goto err2; + } +#ifdef __alpha__ + if (!bound(mod->gp - 0x8000, 0, mod)) { + printk(KERN_ERR "init_module: mod->gp out of bounds.\n"); + goto err2; } #endif - if ((error = get_mod_name(module_name, name)) < 0) - return error; - pr_debug("initializing module `%s', %d (0x%x) bytes\n", - name, codesize, codesize); - if (copy_from_user(&rt, routines, sizeof rt)) - return -EFAULT; - if ((mp = find_module(name)) == NULL) - return -ENOENT; - if (codesize & MOD_AUTOCLEAN) { - /* - * set autoclean marker from codesize... - * set usage count to "zero" - */ - codesize &= ~MOD_AUTOCLEAN; - GET_USE_COUNT(mp) = MOD_AUTOCLEAN; - } - if ((codesize + sizeof (long) + PAGE_SIZE - 1) / PAGE_SIZE > mp->size) - return -EINVAL; - if (copy_from_user((char *)mp->addr + sizeof (long), code, codesize)) - return -EFAULT; - memset((char *)mp->addr + sizeof (long) + codesize, 0, - mp->size * PAGE_SIZE - (codesize + sizeof (long))); - pr_debug("module init entry = 0x%08lx, cleanup entry = 0x%08lx\n", - (unsigned long) rt.init, (unsigned long) rt.cleanup); - if (rt.signature != MODULE_2_1_7_SIG){ - printk ("Older insmod used with kernel 2.1.7 +\n"); - return -EINVAL; + if (mod_member_present(mod, can_unload) + && mod->can_unload && !bound(mod->can_unload, 0, mod)) { + printk(KERN_ERR "init_module: mod->can_unload out of bounds.\n"); + goto err2; } - mp->cleanup = rt.cleanup; - mp->exceptinfo = rt.exceptinfo; - - /* update kernel symbol table */ - if (symtab) { /* symtab == NULL means no new entries to handle */ - struct internal_symbol *sym; - struct module_ref *ref; - int size; - int i; - int legal_start; - - error = get_user(size, &symtab->size); - if (error) - return error; - if ((newtab = (struct symbol_table*) kmalloc(size, GFP_KERNEL)) == NULL) { - return -ENOMEM; - } - if (copy_from_user((char *)(newtab), symtab, size)) { - kfree_s(newtab, size); - return -EFAULT; - } +#undef bound - /* sanity check */ - legal_start = sizeof(struct symbol_table) + - newtab->n_symbols * sizeof(struct internal_symbol) + - newtab->n_refs * sizeof(struct module_ref); + /* Check that the user isn't doing something silly with the name. */ - if ((newtab->n_symbols < 0) || (newtab->n_refs < 0) || (legal_start > size)) { - printk(KERN_WARNING "Rejecting illegal symbol table (n_symbols=%d,n_refs=%d)\n", - newtab->n_symbols, newtab->n_refs); - kfree_s(newtab, size); - return -EINVAL; - } + if ((n_namelen = get_mod_name(mod->name - (unsigned long)mod + + (unsigned long)mod_user, + &n_name)) < 0) { + error = n_namelen; + goto err2; + } + if (namelen != n_namelen || strcmp(n_name, mod_tmp.name) != 0) { + printk(KERN_ERR "init_module: changed module name to " + "`%s' from `%s'\n", + n_name, mod_tmp.name); + goto err3; + } - /* relocate name pointers, index referred from start of table */ - for (sym = &(newtab->symbol[0]), i = 0; i < newtab->n_symbols; ++sym, ++i) { - if ((unsigned long)sym->name < legal_start || size <= (unsigned long)sym->name) { - printk(KERN_WARNING "Rejecting illegal symbol table\n"); - kfree_s(newtab, size); - return -EINVAL; - } - /* else */ - sym->name += (long)newtab; - } - mp->symtab = newtab; - - /* Update module references. - * On entry, from "insmod", ref->module points to - * the referenced module! - * Now it will point to the current module instead! - * The ref structure becomes the first link in the linked - * list of references to the referenced module. - * Also, "sym" from above, points to the first ref entry!!! - */ - for (ref = (struct module_ref *)sym, i = 0; - i < newtab->n_refs; ++ref, ++i) { - - /* Check for valid reference */ - struct module *link = module_list; - while (link && (ref->module != link)) - link = link->next; - - if (link == (struct module *)0) { - printk(KERN_WARNING "Non-module reference! Rejected!\n"); - return -EINVAL; - } + /* Ok, that's about all the sanity we can stomach; copy the rest. */ - ref->next = ref->module->ref; - ref->module->ref = ref; - ref->module = mp; - } + if (copy_from_user(mod+1, mod_user+1, mod->size-sizeof(*mod))) { + error = -EFAULT; + goto err3; } - GET_USE_COUNT(mp) += 1; - if ((*rt.init)() != 0) { - GET_USE_COUNT(mp) = 0; - return -EBUSY; + /* Update module references. */ + mod->next = mod_tmp.next; + mod->refs = NULL; + for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) { + struct module *o, *d = dep->dep; + + /* Make sure the indicated dependancies are really modules. */ + if (d == mod) { + printk(KERN_ERR "init_module: self-referential " + "dependancy in mod->deps.\n"); + goto err3; + } + + for (o = module_list; o != &kernel_module; o = o->next) + if (o == d) goto found_dep; + + printk(KERN_ERR "init_module: found dependancy that is " + "(no longer?) a module.\n"); + goto err3; + + found_dep: + dep->ref = mod; + dep->next_ref = d->refs; + d->refs = dep; + /* Being referenced by a dependant module counts as a + use as far as kerneld is concerned. */ + d->flags |= MOD_USED_ONCE; } - GET_USE_COUNT(mp) -= 1; - mp->state = MOD_RUNNING; - return 0; + /* Free our temporary memory. */ + put_mod_name(n_name); + put_mod_name(name); + + /* Initialize the module. */ + mod->usecount = 1; + if (mod->init && mod->init() != 0) { + mod->usecount = 0; + error = -EBUSY; + goto err0; + } + mod->usecount--; + + /* And set it running. */ + mod->flags |= MOD_RUNNING; + error = 0; + goto err0; + +err3: + put_mod_name(n_name); +err2: + *mod = mod_tmp; +err1: + put_mod_name(name); +err0: + unlock_kernel(); + return error; } asmlinkage int -sys_delete_module(char *module_name) +sys_delete_module(const char *name_user) { - struct module *mp; - char name[MOD_MAX_NAME]; - int error; + struct module *mod, *next; + char *name; + long error = -EPERM; + lock_kernel(); if (!suser()) - return -EPERM; - /* else */ - if (module_name != NULL) { - if ((error = get_mod_name(module_name, name)) < 0) - return error; - if ((mp = find_module(name)) == NULL) - return -ENOENT; - if ((mp->ref != NULL) || - ((GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED)) != 0)) - return -EBUSY; - GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED); - if (mp->state == MOD_RUNNING) - (*mp->cleanup)(); - mp->state = MOD_DELETED; - free_modules(); - } - /* for automatic reaping */ - else { - struct module *mp_next; - for (mp = module_list; mp != &kernel_module; mp = mp_next) { - mp_next = mp->next; - if ((mp->ref == NULL) && (mp->state == MOD_RUNNING) && - ((GET_USE_COUNT(mp) & ~MOD_VISITED) == MOD_AUTOCLEAN)) { - if ((GET_USE_COUNT(mp) & MOD_VISITED)) { - /* Don't reap until one "cycle" after last _use_ */ - GET_USE_COUNT(mp) &= ~MOD_VISITED; - } - else { - GET_USE_COUNT(mp) &= ~(MOD_AUTOCLEAN | MOD_VISITED); - (*mp->cleanup)(); - mp->state = MOD_DELETED; - free_modules(); - } - } + goto out; + + if (name_user) { + if ((error = get_mod_name(name_user, &name)) < 0) + goto out; + if (error == 0) { + error = -EINVAL; + put_mod_name(name); + goto out; + } + error = -ENOENT; + if ((mod = find_module(name)) == NULL) { + put_mod_name(name); + goto out; + } + put_mod_name(name); + error = -EBUSY; + if (mod->refs != NULL || __MOD_IN_USE(mod)) + goto out; + + free_module(mod); + error = 0; + goto out; + } + + /* Do automatic reaping */ + for (mod = module_list; mod != &kernel_module; mod = next) { + next = mod->next; + if (mod->refs == NULL && + ((mod->flags + & (MOD_AUTOCLEAN|MOD_RUNNING|MOD_DELETED|MOD_USED_ONCE)) + == (MOD_AUTOCLEAN|MOD_RUNNING|MOD_USED_ONCE)) && + !__MOD_IN_USE(mod)) { + if (mod->flags & MOD_VISITED) + mod->flags &= ~MOD_VISITED; + else + free_module(mod); } } - return 0; + error = 0; +out: + unlock_kernel(); + return error; } +/* Query various bits about modules. */ -/* - * Copy the kernel symbol table to user space. If the argument is null, - * just return the size of the table. - * - * Note that the transient module symbols are copied _first_, - * in lifo order!!! - * - * The symbols to "insmod" are according to the "old" format: struct kernel_sym, - * which is actually quite handy for this purpose. - * Note that insmod inserts a struct symbol_table later on... - * (as that format is quite handy for the kernel...) - * - * For every module, the first (pseudo)symbol copied is the module name - * and the address of the module struct. - * This lets "insmod" keep track of references, and build the array of - * struct module_refs in the symbol table. - * The format of the module name is "#module", so that "insmod" can easily - * notice when a module name comes along. Also, this will make it possible - * to use old versions of "insmod", albeit with reduced functionality... - * The "kernel" module has an empty name. - */ -asmlinkage int -sys_get_kernel_syms(struct kernel_sym *table) +static int +qm_modules(char *buf, size_t bufsize, size_t *ret) { - struct internal_symbol *from; - struct kernel_sym isym; - struct kernel_sym *to; - struct module *mp = module_list; - int i; - int nmodsyms = 0; - int err; + struct module *mod; + size_t nmod, space, len; - for (mp = module_list; mp; mp = mp->next) { - if (mp->symtab && mp->symtab->n_symbols) { - /* include the count for the module name! */ - nmodsyms += mp->symtab->n_symbols + 1; - } - else - /* include the count for the module name! */ - nmodsyms += 1; /* return modules without symbols too */ - } - - if (table != NULL) { - to = table; - - /* copy all module symbols first (always LIFO order) */ - for (mp = module_list; mp; mp = mp->next) { - if (mp->state == MOD_RUNNING) { - /* magic: write module info as a pseudo symbol */ - isym.value = (unsigned long)mp; - sprintf(isym.name, "#%s", mp->name); - err = copy_to_user(to, &isym, sizeof isym); - if (err) - return -EFAULT; - ++to; - - if (mp->symtab != NULL) { - for (i = mp->symtab->n_symbols, - from = mp->symtab->symbol; - i > 0; --i, ++from, ++to) { - - isym.value = (unsigned long)from->addr; - strncpy(isym.name, from->name, sizeof isym.name); - err = copy_to_user(to, &isym, sizeof isym); - if (err) - return -EFAULT; - } - } - } - } + nmod = space = 0; + + for (mod=module_list; mod != &kernel_module; mod=mod->next, ++nmod) { + len = strlen(mod->name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, mod->name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; } - return nmodsyms; + if (put_user(nmod, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while ((mod = mod->next) != &kernel_module) + space += strlen(mod->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; } -/* - * Look for a module by name, ignoring modules marked for deletion. - */ -struct module * -find_module( const char *name) +static int +qm_deps(struct module *mod, char *buf, size_t bufsize, size_t *ret) { - struct module *mp; + size_t i, space, len; - for (mp = module_list ; mp ; mp = mp->next) { - if (mp->state == MOD_DELETED) - continue; - if (!strcmp(mp->name, name)) - break; + if (mod == &kernel_module) + return -EINVAL; + if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = 0; + for (i = 0; i < mod->ndeps; ++i) { + const char *dep_name = mod->deps[i].dep->name; + + len = strlen(dep_name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, dep_name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; } - return mp; + + if (put_user(i, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while (++i < mod->ndeps) + space += strlen(mod->deps[i].dep->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; } -static void -drop_refs(struct module *mp) +static int +qm_refs(struct module *mod, char *buf, size_t bufsize, size_t *ret) { - struct module *step; - struct module_ref *prev; + size_t nrefs, space, len; struct module_ref *ref; - for (step = module_list; step; step = step->next) { - for (prev = ref = step->ref; ref; ref = prev->next) { - if (ref->module == mp) { - if (ref == step->ref) - step->ref = ref->next; - else - prev->next = ref->next; - break; /* every module only references once! */ - } - else - prev = ref; - } + if (mod == &kernel_module) + return -EINVAL; + if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = 0; + for (nrefs = 0, ref = mod->refs; ref ; ++nrefs, ref = ref->next_ref) { + const char *ref_name = ref->ref->name; + + len = strlen(ref_name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, ref_name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; } + + if (put_user(nrefs, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while ((ref = ref->next_ref) != NULL) + space += strlen(ref->ref->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; } -/* - * Try to free modules which have been marked for deletion. Returns nonzero - * if a module was actually freed. - */ -int -free_modules( void) +static int +qm_symbols(struct module *mod, char *buf, size_t bufsize, size_t *ret) { - struct module *mp; - struct module **mpp; - int did_deletion; - - did_deletion = 0; - freeing_modules = 0; - mpp = &module_list; - while ((mp = *mpp) != NULL) { - if (mp->state != MOD_DELETED) { - mpp = &mp->next; - } else { - if ((GET_USE_COUNT(mp) != 0) || (mp->ref != NULL)) { - freeing_modules = 1; - mpp = &mp->next; - } else { /* delete it */ - *mpp = mp->next; - if (mp->symtab) { - if (mp->symtab->n_refs) - drop_refs(mp); - if (mp->symtab->size) - kfree_s(mp->symtab, mp->symtab->size); - } - vfree(mp->addr); - kfree_s(mp, sizeof(struct module) + MOD_MAX_NAME); - did_deletion = 1; - } - } + size_t i, space, len; + struct module_symbol *s; + char *strings; + unsigned long *vals; + + if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = mod->nsyms * 2*sizeof(void *); + + i = len = 0; + s = mod->syms; + + if (space > bufsize) + goto calc_space_needed; + + if (!access_ok(VERIFY_WRITE, buf, space)) + return -EFAULT; + + bufsize -= space; + vals = (unsigned long *)buf; + strings = buf+space; + + for (; i < mod->nsyms ; ++i, ++s, vals += 2) { + len = strlen(s->name)+1; + if (len > bufsize) + goto calc_space_needed; + + if (copy_to_user(strings, s->name, len) + || __put_user(s->value, vals+0) + || __put_user(space, vals+1)) + return -EFAULT; + + strings += len; + bufsize -= len; + space += len; } - return did_deletion; + + if (put_user(i, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + for (; i < mod->nsyms; ++i, ++s) + space += strlen(s->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; } +static int +qm_info(struct module *mod, char *buf, size_t bufsize, size_t *ret) +{ + int error = 0; -/* - * Called by the /proc file system to return a current list of modules. - */ -int get_module_list(char *buf) + if (mod == &kernel_module) + return -EINVAL; + + if (sizeof(struct module_info) <= bufsize) { + struct module_info info; + info.addr = (unsigned long)mod; + info.size = mod->size; + info.flags = mod->flags; + info.usecount = (mod_member_present(mod, can_unload) + && mod->can_unload ? -1 : mod->usecount); + + if (copy_to_user(buf, &info, sizeof(struct module_info))) + return -EFAULT; + } else + error = -ENOSPC; + + if (put_user(sizeof(struct module_info), ret)) + return -EFAULT; + + return error; +} + +asmlinkage int +sys_query_module(const char *name_user, int which, char *buf, size_t bufsize, + size_t *ret) { - char *p; - const char *q; - int i; - struct module *mp; - struct module_ref *ref; - char size[32]; - - p = buf; - /* Do not show the kernel pseudo module */ - for (mp = module_list ; mp && mp->next; mp = mp->next) { - if (p - buf > 4096 - 100) - break; /* avoid overflowing buffer */ - q = mp->name; - if (*q == '\0' && mp->size == 0 && mp->ref == NULL) - continue; /* don't list modules for kernel syms */ - i = 20; - while (*q) { - *p++ = *q++; - i--; - } - sprintf(size, "%d", mp->size); - i -= strlen(size); - if (i <= 0) - i = 1; - while (--i >= 0) - *p++ = ' '; - q = size; - while (*q) - *p++ = *q++; - if (mp->state == MOD_UNINITIALIZED) - q = " (uninitialized)"; - else if (mp->state == MOD_RUNNING) - q = ""; - else if (mp->state == MOD_DELETED) - q = " (deleted)"; - else - q = " (bad state)"; - while (*q) - *p++ = *q++; - - *p++ = '\t'; - if ((ref = mp->ref) != NULL) { - *p++ = '['; - for (; ref; ref = ref->next) { - q = ref->module->name; - while (*q) - *p++ = *q++; - if (ref->next) - *p++ = ' '; - } - *p++ = ']'; + struct module *mod; + int err; + + lock_kernel(); + if (name_user == NULL) + mod = &kernel_module; + else { + long namelen; + char *name; + + if ((namelen = get_mod_name(name_user, &name)) < 0) { + err = namelen; + goto out; } - if (mp->state == MOD_RUNNING) { - sprintf(size,"\t%ld%s", - GET_USE_COUNT(mp) & ~(MOD_AUTOCLEAN | MOD_VISITED), - ((GET_USE_COUNT(mp) & MOD_AUTOCLEAN)? - " (autoclean)":"")); - q = size; - while (*q) - *p++ = *q++; + err = -ENOENT; + if (namelen == 0) + mod = &kernel_module; + else if ((mod = find_module(name)) == NULL) { + put_mod_name(name); + goto out; } - *p++ = '\n'; + put_mod_name(name); } - return p - buf; -} + switch (which) + { + case 0: + err = 0; + break; + case QM_MODULES: + err = qm_modules(buf, bufsize, ret); + break; + case QM_DEPS: + err = qm_deps(mod, buf, bufsize, ret); + break; + case QM_REFS: + err = qm_refs(mod, buf, bufsize, ret); + break; + case QM_SYMBOLS: + err = qm_symbols(mod, buf, bufsize, ret); + break; + case QM_INFO: + err = qm_info(mod, buf, bufsize, ret); + break; + default: + err = -EINVAL; + break; + } +out: + unlock_kernel(); + return err; +} /* - * Called by the /proc file system to return a current list of ksyms. + * Copy the kernel symbol table to user space. If the argument is + * NULL, just return the size of the table. + * + * This call is obsolete. New programs should use query_module+QM_SYMBOLS + * which does not arbitrarily limit the length of symbols. */ -int get_ksyms_list(char *buf, char **start, off_t offset, int length) + +asmlinkage int +sys_get_kernel_syms(struct kernel_sym *table) { - struct module *mp; - struct internal_symbol *sym; + struct module *mod; int i; - char *p = buf; - int len = 0; /* code from net/ipv4/proc.c */ - off_t pos = 0; - off_t begin = 0; - for (mp = module_list; mp; mp = mp->next) { - if ((mp->state == MOD_RUNNING) && - (mp->symtab != NULL) && - (mp->symtab->n_symbols > 0)) { - for (i = mp->symtab->n_symbols, - sym = mp->symtab->symbol; - i > 0; --i, ++sym) { - - p = buf + len; - if (mp->name[0]) { - len += sprintf(p, "%08lx %s\t[%s]\n", - (long)sym->addr, - sym->name, mp->name); - } else { - len += sprintf(p, "%08lx %s\n", - (long)sym->addr, - sym->name); - } - pos = begin + len; - if (pos < offset) { - len = 0; - begin = pos; - } - pos = begin + len; - if (pos > offset+length) - goto leave_the_loop; - } + lock_kernel(); + for (mod = module_list, i = 0; mod; mod = mod->next) { + /* include the count for the module name! */ + i += mod->nsyms + 1; + } + + if (table == NULL) + goto out; + + for (mod = module_list, i = 0; mod; mod = mod->next) { + struct kernel_sym ksym; + struct module_symbol *msym; + unsigned int j; + + if ((mod->flags & (MOD_RUNNING|MOD_DELETED)) != MOD_RUNNING) + continue; + + /* magic: write module info as a pseudo symbol */ + ksym.value = (unsigned long)mod; + ksym.name[0] = '#'; + strncpy(ksym.name+1, mod->name, sizeof(ksym.name)-1); + ksym.name[sizeof(ksym.name)-1] = '\0'; + + if (copy_to_user(table, &ksym, sizeof(ksym)) != 0) + goto out; + ++i, ++table; + + if (mod->nsyms == 0) + continue; + + for (j = 0, msym = mod->syms; j < mod->nsyms; ++j, ++msym) { + ksym.value = msym->value; + strncpy(ksym.name, msym->name, sizeof(ksym.name)); + ksym.name[sizeof(ksym.name)-1] = '\0'; + + if (copy_to_user(table, &ksym, sizeof(ksym)) != 0) + goto out; + ++i, ++table; } } - leave_the_loop: - *start = buf + (offset - begin); - len -= (offset - begin); - if (len > length) - len = length; - return len; +out: + unlock_kernel(); + return i; } /* - * Rules: - * - The new symbol table should be statically allocated, or else you _have_ - * to set the "size" field of the struct to the number of bytes allocated. - * - * - The strings that name the symbols will not be copied, maybe the pointers - * - * - For a loadable module, the function should only be called in the - * context of init_module - * - * Those are the only restrictions! (apart from not being reentrant...) - * - * If you want to remove a symbol table for a loadable module, - * the call looks like: "register_symtab(0)". - * - * The look of the code is mostly dictated by the format of - * the frozen struct symbol_table, due to compatibility demands. + * Look for a module by name, ignoring modules marked for deletion. */ -#define INTSIZ sizeof(struct internal_symbol) -#define REFSIZ sizeof(struct module_ref) -#define SYMSIZ sizeof(struct symbol_table) -#define MODSIZ sizeof(struct module) -static struct symbol_table nulltab; -int -register_symtab_from(struct symbol_table *intab, long *from) +static struct module * +find_module(const char *name) { - struct module *mp; - struct module *link; - struct symbol_table *oldtab; - struct symbol_table *newtab; - struct module_ref *newref; - int size; - - if (intab && (intab->n_symbols == 0)) { - struct internal_symbol *sym; - /* How many symbols, really? */ - - for (sym = intab->symbol; sym->name; ++sym) - intab->n_symbols +=1; - } - - for (mp = module_list; mp != &kernel_module; mp = mp->next) { - /* - * "from" points to "mod_use_count_" (== start of module) - * or is == 0 if called from a non-module - */ - if ((unsigned long)(mp->addr) == (unsigned long)from) + struct module *mod; + + for (mod = module_list; mod ; mod = mod->next) { + if (mod->flags & MOD_DELETED) + continue; + if (!strcmp(mod->name, name)) break; } - if (mp == &kernel_module) { - /* Aha! Called from an "internal" module */ - if (!intab) - return 0; /* or -ESILLY_PROGRAMMER :-) */ + return mod; +} - /* create a pseudo module! */ - if (!(mp = (struct module*) kmalloc(MODSIZ, GFP_KERNEL))) { - /* panic time! */ - printk(KERN_ERR "Out of memory for new symbol table!\n"); - return -ENOMEM; - } - /* else OK */ - memset(mp, 0, MODSIZ); - mp->state = MOD_RUNNING; /* Since it is resident... */ - mp->name = ""; /* This is still the "kernel" symbol table! */ - mp->symtab = intab; +/* + * Free the given module. + */ - /* link it in _after_ the resident symbol table */ - mp->next = kernel_module.next; - kernel_module.next = mp; +static void +free_module(struct module *mod) +{ + struct module_ref *dep; + unsigned i; - return 0; - } + /* Let the module clean up. */ - /* else ******** Called from a loadable module **********/ + mod->flags |= MOD_DELETED; + if (mod->flags & MOD_RUNNING) { + mod->cleanup(); + mod->flags &= ~MOD_RUNNING; + } - /* - * This call should _only_ be done in the context of the - * call to init_module i.e. when loading the module!! - * Or else... - */ + /* Remove the module from the dependancy lists. */ - /* Any table there before? */ - if ((oldtab = mp->symtab) == (struct symbol_table*)0) { - /* No, just insert it! */ - mp->symtab = intab; - return 0; + for (i = 0, dep = mod->deps; i < mod->ndeps; ++i, ++dep) { + struct module_ref **pp; + for (pp = &dep->dep->refs; *pp != dep; pp = &(*pp)->next_ref) + continue; + *pp = dep->next_ref; } - /* else ****** we have to replace the module symbol table ******/ + /* And from the main module list. */ - if (oldtab->n_refs == 0) { /* no problems! */ - mp->symtab = intab; - /* if the old table was kmalloc-ed, drop it */ - if (oldtab->size > 0) - kfree_s(oldtab, oldtab->size); - - return 0; + if (mod == module_list) { + module_list = mod->next; + } else { + struct module *p; + for (p = module_list; p->next != mod; p = p->next) + continue; + p->next = mod->next; } - /* else */ - /***** The module references other modules... insmod said so! *****/ - /* We have to allocate a new symbol table, or we lose them! */ - if (intab == (struct symbol_table*)0) - intab = &nulltab; /* easier code with zeroes in place */ + /* And free the memory. */ - /* the input symbol table space does not include the string table */ - /* (it does for symbol tables that insmod creates) */ + vfree(mod); +} - if (!(newtab = (struct symbol_table*)kmalloc( - size = SYMSIZ + intab->n_symbols * INTSIZ + - oldtab->n_refs * REFSIZ, - GFP_KERNEL))) { - /* panic time! */ - printk(KERN_ERR "Out of memory for new symbol table!\n"); - return -ENOMEM; - } +/* + * Called by the /proc file system to return a current list of modules. + */ - /* copy up to, and including, the new symbols */ - memcpy(newtab, intab, SYMSIZ + intab->n_symbols * INTSIZ); +int get_module_list(char *p) +{ + size_t left = PAGE_SIZE; + struct module *mod; + char tmpstr[64]; + struct module_ref *ref; - newtab->size = size; - newtab->n_refs = oldtab->n_refs; + for (mod = module_list; mod != &kernel_module; mod = mod->next) { + long len; + const char *q; + +#define safe_copy_str(str, len) \ + do { \ + if (left < len) \ + goto fini; \ + memcpy(p, str, len); p += len, left -= len; \ + } while (0) +#define safe_copy_cstr(str) safe_copy_str(str, sizeof(str)-1) + + len = strlen(mod->name); + safe_copy_str(mod->name, len); + + if ((len = 20 - len) > 0) { + if (left < len) + goto fini; + memset(p, ' ', len); + p += len; + left -= len; + } - /* copy references */ - memcpy( ((char *)newtab) + SYMSIZ + intab->n_symbols * INTSIZ, - ((char *)oldtab) + SYMSIZ + oldtab->n_symbols * INTSIZ, - oldtab->n_refs * REFSIZ); + len = sprintf(tmpstr, "%8lu", mod->size); + safe_copy_str(tmpstr, len); - /* relink references from the old table to the new one */ + if (mod->flags & MOD_RUNNING) { + len = sprintf(tmpstr, "%4ld", + (mod_member_present(mod, can_unload) + && mod->can_unload + ? -1 : mod->usecount)); + safe_copy_str(tmpstr, len); + } - /* pointer to the first reference entry in newtab! Really! */ - newref = (struct module_ref*) &(newtab->symbol[newtab->n_symbols]); + if (mod->flags & MOD_DELETED) + safe_copy_cstr(" (deleted)"); + else if (mod->flags & MOD_RUNNING) { + if (mod->flags & MOD_AUTOCLEAN) + safe_copy_cstr(" (autoclean)"); + if (!(mod->flags & MOD_USED_ONCE)) + safe_copy_cstr(" (unused)"); + } else + safe_copy_cstr(" (uninitialized)"); + + if ((ref = mod->refs) != NULL) { + safe_copy_cstr(" ["); + while (1) { + q = ref->ref->name; + len = strlen(q); + safe_copy_str(q, len); + + if ((ref = ref->next_ref) != NULL) + safe_copy_cstr(" "); + else + break; + } + safe_copy_cstr("]"); + } - /* check for reference links from previous modules */ - for ( link = module_list; - link && (link != &kernel_module); - link = link->next) { + safe_copy_cstr("\n"); - if (link->ref && (link->ref->module == mp)) - link->ref = newref++; +#undef safe_copy_str +#undef safe_copy_cstr } - mp->symtab = newtab; +fini: + return PAGE_SIZE - left; +} - /* all references (if any) have been handled */ +/* + * Called by the /proc file system to return a current list of ksyms. + */ - /* if the old table was kmalloc-ed, drop it */ - if (oldtab->size > 0) - kfree_s(oldtab, oldtab->size); +int +get_ksyms_list(char *buf, char **start, off_t offset, int length) +{ + struct module *mod; + char *p = buf; + int len = 0; /* code from net/ipv4/proc.c */ + off_t pos = 0; + off_t begin = 0; + + for (mod = module_list; mod; mod = mod->next) { + unsigned i; + struct module_symbol *sym; - return 0; + if (!(mod->flags & MOD_RUNNING) || (mod->flags & MOD_DELETED)) + continue; + + for (i = mod->nsyms, sym = mod->syms; i > 0; --i, ++sym) { + p = buf + len; + if (*mod->name) { + len += sprintf(p, "%0*lx %s\t[%s]\n", + (int)(2*sizeof(void*)), + sym->value, sym->name, + mod->name); + } else { + len += sprintf(p, "%0*lx %s\n", + (int)(2*sizeof(void*)), + sym->value, sym->name); + } + pos = begin + len; + if (pos < offset) { + len = 0; + begin = pos; + } + pos = begin + len; + if (pos > offset+length) + goto leave_the_loop; + } + } +leave_the_loop: + *start = buf + (offset - begin); + len -= (offset - begin); + if (len > length) + len = length; + return len; } #else /* CONFIG_MODULES */ /* Dummy syscalls for people who don't want modules */ -asmlinkage unsigned long sys_create_module(void) +asmlinkage unsigned long +sys_create_module(const char *name_user, size_t size) { return -ENOSYS; } -asmlinkage int sys_init_module(void) +asmlinkage int +sys_init_module(const char *name_user, struct module *mod_user) { return -ENOSYS; } -asmlinkage int sys_delete_module(void) +asmlinkage int +sys_delete_module(const char *name_user) { return -ENOSYS; } -asmlinkage int sys_get_kernel_syms(void) +asmlinkage int +sys_query_module(const char *name_user, int which, char *buf, size_t bufsize, + size_t *ret) { + /* Let the program know about the new interface. Not that + it'll do them much good. */ + if (which == 0) + return 0; + return -ENOSYS; } -int register_symtab_from(struct symbol_table *intab, long *from) +asmlinkage int +sys_get_kernel_syms(struct kernel_sym *table) { - return 0; + return -ENOSYS; } #endif /* CONFIG_MODULES */ - diff --git a/kernel/panic.c b/kernel/panic.c index d42541e9f..deaa2f339 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -10,15 +10,16 @@ */ #include <stdarg.h> -#include <linux/config.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/delay.h> +#include <linux/smp.h> +#include <linux/reboot.h> #include <asm/sgialib.h> asmlinkage void sys_sync(void); /* it's really int */ -extern void do_unblank_screen(void); +extern void unblank_console(void); extern int C_A_D; int panic_timeout = 0; @@ -43,9 +44,12 @@ NORET_TYPE void panic(const char * fmt, ...) else sys_sync(); - do_unblank_screen(); +#ifdef __SMP__ + smp_message_pass(MSG_ALL_BUT_SELF, MSG_STOP_CPU, 0, 0); +#endif + + unblank_console(); -#ifdef CONFIG_SGI if (panic_timeout > 0) { int i; @@ -54,17 +58,20 @@ NORET_TYPE void panic(const char * fmt, ...) * Delay timeout seconds before rebooting the machine. * We can't use the "normal" timers since we just panicked.. */ - prom_printf(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); + printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); for(i = 0; i < (panic_timeout*1000); i++) udelay(1000); - hard_reset_now(); + /* + * Should we run the reboot notifier. For the moment Im + * choosing not too. It might crash, be corrupt or do + * more harm than good for other reasons. + */ + machine_restart(NULL); } -#if 0 - printk("Hit a key\n"); - prom_getchar(); - romvec->imode(); -#endif +#ifdef __sparc__ + printk("Press L1-A to return to the boot prom\n"); #endif + sti(); for(;;); } diff --git a/kernel/printk.c b/kernel/printk.c index ed39d4fab..0d5d619b0 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -9,6 +9,7 @@ * to the console. Added hook for sending the console messages * elsewhere, in preparation for a serial line console (someday). * Ted Ts'o, 2/11/93. + * Modified for sysctl support, 1/8/97, Chris Horn. */ #include <stdarg.h> @@ -21,6 +22,9 @@ #include <linux/mm.h> #include <linux/tty.h> #include <linux/tty_driver.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/console.h> #include <asm/uaccess.h> @@ -28,8 +32,6 @@ static char buf[1024]; -extern void console_print(const char *); - /* printk's without a loglevel use this.. */ #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ @@ -39,9 +41,14 @@ extern void console_print(const char *); unsigned long log_size = 0; struct wait_queue * log_wait = NULL; + +/* Keep together for sysctl support */ int console_loglevel = DEFAULT_CONSOLE_LOGLEVEL; +int default_message_loglevel = DEFAULT_MESSAGE_LOGLEVEL; +int minimum_console_loglevel = MINIMUM_CONSOLE_LOGLEVEL; +int default_console_loglevel = DEFAULT_CONSOLE_LOGLEVEL; -static void (*console_print_proc)(const char *) = 0; +struct console *console_drivers = NULL; static char log_buf[LOG_BUF_LEN]; static unsigned long log_start = 0; static unsigned long logged_chars = 0; @@ -64,87 +71,103 @@ asmlinkage int sys_syslog(int type, char * buf, int len) unsigned long i, j, count; int do_clear = 0; char c; - int error; + int error = -EPERM; + lock_kernel(); if ((type != 3) && !suser()) - return -EPERM; + goto out; + error = 0; switch (type) { - case 0: /* Close log */ - return 0; - case 1: /* Open log */ - return 0; - case 2: /* Read from log */ - if (!buf || len < 0) - return -EINVAL; - if (!len) - return 0; - error = verify_area(VERIFY_WRITE,buf,len); - if (error) - return error; - cli(); - while (!log_size) { - if (current->signal & ~current->blocked) { - sti(); - return -ERESTARTSYS; - } - interruptible_sleep_on(&log_wait); - } - i = 0; - while (log_size && i < len) { - c = *((char *) log_buf+log_start); - log_start++; - log_size--; - log_start &= LOG_BUF_LEN-1; + case 0: /* Close log */ + break; + case 1: /* Open log */ + break; + case 2: /* Read from log */ + error = -EINVAL; + if (!buf || len < 0) + goto out; + error = 0; + if (!len) + goto out; + error = verify_area(VERIFY_WRITE,buf,len); + if (error) + goto out; + cli(); + error = -ERESTARTSYS; + while (!log_size) { + if (current->signal & ~current->blocked) { sti(); - put_user(c,buf); - buf++; - i++; - cli(); + goto out; } + interruptible_sleep_on(&log_wait); + } + i = 0; + while (log_size && i < len) { + c = *((char *) log_buf+log_start); + log_start++; + log_size--; + log_start &= LOG_BUF_LEN-1; sti(); - return i; - case 4: /* Read/clear last kernel messages */ - do_clear = 1; - /* FALL THRU */ - case 3: /* Read last kernel messages */ - if (!buf || len < 0) - return -EINVAL; - if (!len) - return 0; - error = verify_area(VERIFY_WRITE,buf,len); - if (error) - return error; - count = len; - if (count > LOG_BUF_LEN) - count = LOG_BUF_LEN; - if (count > logged_chars) - count = logged_chars; - j = log_start + log_size - count; - for (i = 0; i < count; i++) { - c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1))); - put_user(c, buf++); - } - if (do_clear) - logged_chars = 0; - return i; - case 5: /* Clear ring buffer */ + put_user(c,buf); + buf++; + i++; + cli(); + } + sti(); + error = i; + break; + case 4: /* Read/clear last kernel messages */ + do_clear = 1; + /* FALL THRU */ + case 3: /* Read last kernel messages */ + error = -EINVAL; + if (!buf || len < 0) + goto out; + error = 0; + if (!len) + goto out; + error = verify_area(VERIFY_WRITE,buf,len); + if (error) + goto out; + count = len; + if (count > LOG_BUF_LEN) + count = LOG_BUF_LEN; + if (count > logged_chars) + count = logged_chars; + j = log_start + log_size - count; + for (i = 0; i < count; i++) { + c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1))); + put_user(c, buf++); + } + if (do_clear) logged_chars = 0; - return 0; - case 6: /* Disable logging to console */ - console_loglevel = MINIMUM_CONSOLE_LOGLEVEL; - return 0; - case 7: /* Enable logging to console */ - console_loglevel = DEFAULT_CONSOLE_LOGLEVEL; - return 0; - case 8: - if (len < 1 || len > 8) - return -EINVAL; - if (len < MINIMUM_CONSOLE_LOGLEVEL) - len = MINIMUM_CONSOLE_LOGLEVEL; - console_loglevel = len; - return 0; + error = i; + break; + case 5: /* Clear ring buffer */ + logged_chars = 0; + break; + case 6: /* Disable logging to console */ + console_loglevel = minimum_console_loglevel; + break; + case 7: /* Enable logging to console */ + console_loglevel = default_console_loglevel; + break; + case 8: + error = -EINVAL; + if (len < 1 || len > 8) + goto out; + if (len < minimum_console_loglevel) + len = minimum_console_loglevel; + console_loglevel = len; + error = 0; + break; + default: + error = -EINVAL; + break; } - return -EINVAL; +out: + unlock_kernel(); + return error; } @@ -153,11 +176,12 @@ asmlinkage int printk(const char *fmt, ...) va_list args; int i; char *msg, *p, *buf_end; - static char msg_level = -1; + int line_feed; + static signed char msg_level = -1; long flags; - save_flags(flags); - cli(); + __save_flags(flags); + __cli(); va_start(args, fmt); i = vsprintf(buf + 3, fmt, args); /* hopefully i < sizeof(buf)-4 */ buf_end = buf + 3 + i; @@ -173,12 +197,13 @@ asmlinkage int printk(const char *fmt, ...) ) { p -= 3; p[0] = '<'; - p[1] = DEFAULT_MESSAGE_LOGLEVEL + '0'; + p[1] = default_message_loglevel + '0'; p[2] = '>'; } else msg += 3; msg_level = p[1] - '0'; } + line_feed = 0; for (; p < buf_end; p++) { log_buf[(log_start+log_size) & (LOG_BUF_LEN-1)] = *p; if (log_size < LOG_BUF_LEN) @@ -188,38 +213,64 @@ asmlinkage int printk(const char *fmt, ...) log_start &= LOG_BUF_LEN-1; } logged_chars++; - if (*p == '\n') + if (*p == '\n') { + line_feed = 1; break; + } } - if (msg_level < console_loglevel && console_print_proc) { - char tmp = p[1]; - p[1] = '\0'; - (*console_print_proc)(msg); - p[1] = tmp; + if (msg_level < console_loglevel && console_drivers) { + struct console *c = console_drivers; + while(c) { + if (c->write) + c->write(msg, p - msg + line_feed); + c = c->next; + } } - if (*p == '\n') + if (line_feed) msg_level = -1; } - restore_flags(flags); + __restore_flags(flags); wake_up_interruptible(&log_wait); return i; } +void console_print(const char *s) +{ + struct console *c = console_drivers; + int len = strlen(s); + while(c) { + if (c->write) + c->write(s, len); + c = c->next; + } +} + +void unblank_console(void) +{ + struct console *c = console_drivers; + while(c) { + if (c->unblank) + c->unblank(); + c = c->next; + } +} + /* * The console driver calls this routine during kernel initialization * to register the console printing procedure with printk() and to * print any messages that were printed by the kernel before the * console driver was initialized. */ -void register_console(void (*proc)(const char *)) +void register_console(struct console * console) { - int i,j; + int i,j,len; int p = log_start; char buf[16]; - char msg_level = -1; + signed char msg_level = -1; char *q; - console_print_proc = proc; + console->next = console_drivers; + console_drivers = console; for (i=0,j=0; i < log_size; i++) { buf[j++] = log_buf[p]; @@ -228,12 +279,14 @@ void register_console(void (*proc)(const char *)) continue; buf[j] = 0; q = buf; + len = j; if (msg_level < 0) { msg_level = buf[1] - '0'; q = buf + 3; + len -= 3; } if (msg_level < console_loglevel) - (*proc)(q); + console->write(q, len); if (buf[j-1] == '\n') msg_level = -1; j = 0; diff --git a/kernel/resource.c b/kernel/resource.c index 48184bfcf..27abcf4dc 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -13,7 +13,7 @@ #include <linux/types.h> #include <linux/ioport.h> -#define IOTABLE_SIZE 64 +#define IOTABLE_SIZE 128 typedef struct resource_entry_t { u_long from, num; @@ -69,7 +69,7 @@ static resource_entry_t *find_gap(resource_entry_t *root, /* * Call this from the device driver to register the ioport region. */ -void request_region(unsigned int from, unsigned int num, const char *name) +void request_region(unsigned long from, unsigned long num, const char *name) { resource_entry_t *p; int i; @@ -95,7 +95,7 @@ void request_region(unsigned int from, unsigned int num, const char *name) /* * Call this when the device driver is unloaded */ -void release_region(unsigned int from, unsigned int num) +void release_region(unsigned long from, unsigned long num) { resource_entry_t *p, *q; @@ -114,11 +114,72 @@ void release_region(unsigned int from, unsigned int num) /* * Call this to check the ioport region before probing */ -int check_region(unsigned int from, unsigned int num) +int check_region(unsigned long from, unsigned long num) { return (find_gap(&iolist, from, num) == NULL) ? -EBUSY : 0; } +#ifdef __sparc__ /* Why to carry unused code on other architectures? */ +/* + * This is for architectures with MMU-managed ports (sparc). + */ +unsigned long occupy_region(unsigned long base, unsigned long end, + unsigned long num, unsigned int align, const char *name) +{ + unsigned long from = 0, till; + unsigned long flags; + int i; + resource_entry_t *p; /* Scanning ptr */ + resource_entry_t *p1; /* === p->next */ + resource_entry_t *s; /* Found slot */ + + if (base > end-1) + return 0; + if (num > end - base) + return 0; + + for (i = 0; i < IOTABLE_SIZE; i++) + if (iotable[i].num == 0) + break; + if (i == IOTABLE_SIZE) { + /* Driver prints a warning typicaly. */ + return 0; + } + + save_flags(flags); + cli(); + /* printk("occupy: search in %08lx[%08lx] ", base, end - base); */ + s = NULL; + for (p = &iolist; p != NULL; p = p1) { + p1 = p->next; + /* Find window in list */ + from = (p->from+p->num + align-1) & ~((unsigned long)align-1); + till = (p1 == NULL)? (unsigned long) (0 - (unsigned long)align): p1->from; + /* printk(" %08lx:%08lx", from, till); */ + /* Clip window with base and end */ + if (from < base) from = base; + if (till > end) till = end; + /* See if result is large enougth */ + if (from < till && from + num < till) { + s = p; + break; + } + } + /* printk("\r\n"); */ + restore_flags(flags); + + if (s == NULL) + return 0; + + iotable[i].name = name; + iotable[i].from = from; + iotable[i].num = num; + iotable[i].next = s->next; + s->next = &iotable[i]; + return from; +} +#endif + /* Called from init/main.c to reserve IO ports. */ void reserve_setup(char *str, int *ints) { diff --git a/kernel/sched.c b/kernel/sched.c index 98502b3fc..bc256d029 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4,6 +4,9 @@ * Copyright (C) 1991, 1992 Linus Torvalds * * 1996-04-21 Modified by Ulrich Windl to make NTP work + * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and + * make semaphores SMP safe + * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. */ /* @@ -28,12 +31,14 @@ #include <linux/resource.h> #include <linux/mm.h> #include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/system.h> #include <asm/io.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/mmu_context.h> +#include <asm/spinlock.h> #include <linux/timex.h> @@ -44,7 +49,7 @@ int securelevel = 0; /* system security level */ long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ -volatile struct timeval xtime; /* The current time */ +volatile struct timeval xtime __attribute__ ((aligned (8))); /* The current time */ int tickadj = 500/HZ; /* microsecs */ DECLARE_TASK_QUEUE(tq_timer); @@ -100,7 +105,12 @@ struct task_struct init_task = INIT_TASK; unsigned long volatile jiffies=0; -struct task_struct *current_set[NR_CPUS]; +/* + * Init task must be ok at boot for the ix86 as we will check its signals + * via the SMP irq return path. + */ + +struct task_struct *current_set[NR_CPUS] = {&init_task, }; struct task_struct *last_task_used_math = NULL; struct task_struct * task[NR_TASKS] = {&init_task, }; @@ -109,9 +119,6 @@ struct kernel_stat kstat = { 0 }; static inline void add_to_runqueue(struct task_struct * p) { -#ifdef __SMP__ - int cpu=smp_processor_id(); -#endif #if 1 /* sanity tests */ if (p->next_run || p->prev_run) { printk("task already on run-queue\n"); @@ -124,36 +131,6 @@ static inline void add_to_runqueue(struct task_struct * p) (p->prev_run = init_task.prev_run)->next_run = p; p->next_run = &init_task; init_task.prev_run = p; -#ifdef __SMP__ - /* this is safe only if called with cli()*/ - while(set_bit(31,&smp_process_available)); -#if 0 - { - while(test_bit(31,&smp_process_available)) - { - if(clear_bit(cpu,&smp_invalidate_needed)) - { - local_flush_tlb(); - set_bit(cpu,&cpu_callin_map[0]); - } - } - } -#endif - smp_process_available++; - clear_bit(31,&smp_process_available); - if ((0!=p->pid) && smp_threads_ready) - { - int i; - for (i=0;i<smp_num_cpus;i++) - { - if (0==current_set[cpu_logical_map[i]]->pid) - { - smp_message_pass(cpu_logical_map[i], MSG_RESCHEDULE, 0L, 0); - break; - } - } - } -#endif } static inline void del_from_runqueue(struct task_struct * p) @@ -167,7 +144,7 @@ static inline void del_from_runqueue(struct task_struct * p) return; } #endif - if (p == &init_task) { + if (!p->pid) { static int nr = 0; if (nr < 5) { nr++; @@ -199,6 +176,21 @@ static inline void move_last_runqueue(struct task_struct * p) } /* + * The tasklist_lock protects the linked list of processes. + * + * The scheduler lock is protecting against multiple entry + * into the scheduling code, and doesn't need to worry + * about interrupts (because interrupts cannot call the + * scheduler). + * + * The run-queue lock locks the parts that actually access + * and change the run-queues, and have to be interrupt-safe. + */ +rwlock_t tasklist_lock = RW_LOCK_UNLOCKED; +spinlock_t scheduler_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED; + +/* * Wake up a process. Put it on the run-queue if it's not * already there. The "current" process is always on the * run-queue (except when the actual re-schedule is in @@ -210,12 +202,11 @@ inline void wake_up_process(struct task_struct * p) { unsigned long flags; - save_flags(flags); - cli(); + spin_lock_irqsave(&runqueue_lock, flags); p->state = TASK_RUNNING; if (!p->next_run) add_to_runqueue(p); - restore_flags(flags); + spin_unlock_irqrestore(&runqueue_lock, flags); } static void process_timeout(unsigned long __data) @@ -243,17 +234,6 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in { int weight; -#ifdef __SMP__ - /* We are not permitted to run a task someone else is running */ - if (p->processor != NO_PROC_ID) - return -1000; -#ifdef PAST_2_0 - /* This process is locked to a processor group */ - if (p->processor_mask && !(p->processor_mask & (1<<this_cpu)) - return -1000; -#endif -#endif - /* * Realtime process, select the first one on the * runqueue (taking priorities within processes @@ -287,6 +267,18 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in return weight; } +#ifdef __SMP__ + +#define idle_task (task[cpu_number_map[this_cpu]]) +#define can_schedule(p) ((p)->processor == NO_PROC_ID) + +#else + +#define idle_task (&init_task) +#define can_schedule(p) (1) + +#endif + /* * 'schedule()' is the scheduler function. It's a very simple and nice * scheduler: it's not perfect, but certainly works for most things. @@ -299,33 +291,39 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in */ asmlinkage void schedule(void) { - int c; - struct task_struct * p; + int lock_depth; struct task_struct * prev, * next; - unsigned long timeout = 0; - int this_cpu=smp_processor_id(); + unsigned long timeout; + int this_cpu; -/* check alarm, wake up any interruptible tasks that have got a signal */ - - if (intr_count) - goto scheduling_in_interrupt; - - if (bh_active & bh_mask) { - intr_count = 1; - do_bottom_half(); - intr_count = 0; + need_resched = 0; + this_cpu = smp_processor_id(); + if (local_irq_count[this_cpu]) { + printk("Scheduling in interrupt\n"); + *(char *)0 = 0; } + prev = current; + release_kernel_lock(prev, this_cpu, lock_depth); + if (bh_active & bh_mask) + do_bottom_half(); - run_task_queue(&tq_scheduler); + spin_lock(&scheduler_lock); + spin_lock_irq(&runqueue_lock); - need_resched = 0; - prev = current; - cli(); /* move an exhausted RR process to be last.. */ if (!prev->counter && prev->policy == SCHED_RR) { - prev->counter = prev->priority; - move_last_runqueue(prev); + if (prev->pid) { + prev->counter = prev->priority; + move_last_runqueue(prev); + } else { + static int count = 5; + if (count) { + count--; + printk("Moving pid 0 last\n"); + } + } } + timeout = 0; switch (prev->state) { case TASK_INTERRUPTIBLE: if (prev->signal & ~prev->blocked) @@ -342,54 +340,55 @@ asmlinkage void schedule(void) del_from_runqueue(prev); case TASK_RUNNING: } - p = init_task.next_run; - sti(); - + { + struct task_struct * p = init_task.next_run; + /* + * This is subtle. + * Note how we can enable interrupts here, even + * though interrupts can add processes to the run- + * queue. This is because any new processes will + * be added to the front of the queue, so "p" above + * is a safe starting point. + * run-queue deletion and re-ordering is protected by + * the scheduler lock + */ + spin_unlock_irq(&runqueue_lock); #ifdef __SMP__ - /* - * This is safe as we do not permit re-entry of schedule() - */ - prev->processor = NO_PROC_ID; -#define idle_task (task[cpu_number_map[this_cpu]]) -#else -#define idle_task (&init_task) -#endif - + prev->processor = NO_PROC_ID; +#endif + /* * Note! there may appear new tasks on the run-queue during this, as * interrupts are enabled. However, they will be put on front of the * list, so our list starting at "p" is essentially fixed. */ /* this is the scheduler proper: */ - c = -1000; - next = idle_task; - while (p != &init_task) { - int weight = goodness(p, prev, this_cpu); - if (weight > c) - c = weight, next = p; - p = p->next_run; - } + { + int c = -1000; + next = idle_task; + while (p != &init_task) { + if (can_schedule(p)) { + int weight = goodness(p, prev, this_cpu); + if (weight > c) + c = weight, next = p; + } + p = p->next_run; + } - /* if all runnable processes have "counter == 0", re-calculate counters */ - if (!c) { - for_each_task(p) - p->counter = (p->counter >> 1) + p->priority; + /* Do we need to re-calculate counters? */ + if (!c) { + struct task_struct *p; + read_lock(&tasklist_lock); + for_each_task(p) + p->counter = (p->counter >> 1) + p->priority; + read_unlock(&tasklist_lock); + } + } } -#ifdef __SMP__ - /* - * Allocate process to CPU - */ - - next->processor = this_cpu; - next->last_processor = this_cpu; -#endif -#ifdef __SMP_PROF__ - /* mark processor running an idle thread */ - if (0==next->pid) - set_bit(this_cpu,&smp_idle_map); - else - clear_bit(this_cpu,&smp_idle_map); -#endif + + next->processor = this_cpu; + next->last_processor = this_cpu; + if (prev != next) { struct timer_list timer; @@ -404,14 +403,13 @@ asmlinkage void schedule(void) get_mmu_context(next); switch_to(prev,next); + if (timeout) del_timer(&timer); } - return; + spin_unlock(&scheduler_lock); -scheduling_in_interrupt: - printk("Aiee: scheduling in interrupt %p\n", - return_address()); + reacquire_kernel_lock(prev, smp_processor_id(), lock_depth); } #ifndef __alpha__ @@ -429,93 +427,92 @@ asmlinkage int sys_pause(void) #endif +spinlock_t waitqueue_lock; + /* * wake_up doesn't wake up stopped processes - they have to be awakened * with signals or similar. - * - * Note that this doesn't need cli-sti pairs: interrupts may not change - * the wait-queue structures directly, but only call wake_up() to wake - * a process. The process itself must remove the queue once it has woken. */ void wake_up(struct wait_queue **q) { + unsigned long flags; struct wait_queue *next; struct wait_queue *head; - if (!q || !(next = *q)) - return; - head = WAIT_QUEUE_HEAD(q); - while (next != head) { - struct task_struct *p = next->task; - next = next->next; - if (p != NULL) { - if ((p->state == TASK_UNINTERRUPTIBLE) || - (p->state == TASK_INTERRUPTIBLE)) - wake_up_process(p); + spin_lock_irqsave(&waitqueue_lock, flags); + if (q && (next = *q)) { + head = WAIT_QUEUE_HEAD(q); + while (next != head) { + struct task_struct *p = next->task; + next = next->next; + if (p != NULL) { + if ((p->state == TASK_UNINTERRUPTIBLE) || + (p->state == TASK_INTERRUPTIBLE)) + wake_up_process(p); + } + if (next) + continue; + printk("wait_queue is bad (eip = %p)\n", + __builtin_return_address(0)); + printk(" q = %p\n",q); + printk(" *q = %p\n",*q); + break; } - if (!next) - goto bad; } - return; -bad: - printk("wait_queue is bad (eip = %p)\n", - __builtin_return_address(0)); - printk(" q = %p\n",q); - printk(" *q = %p\n",*q); + spin_unlock_irqrestore(&waitqueue_lock, flags); } void wake_up_interruptible(struct wait_queue **q) { + unsigned long flags; struct wait_queue *next; struct wait_queue *head; - if (!q || !(next = *q)) - return; - head = WAIT_QUEUE_HEAD(q); - while (next != head) { - struct task_struct *p = next->task; - next = next->next; - if (p != NULL) { - if (p->state == TASK_INTERRUPTIBLE) - wake_up_process(p); + spin_lock_irqsave(&waitqueue_lock, flags); + if (q && (next = *q)) { + head = WAIT_QUEUE_HEAD(q); + while (next != head) { + struct task_struct *p = next->task; + next = next->next; + if (p != NULL) { + if (p->state == TASK_INTERRUPTIBLE) + wake_up_process(p); + } + if (next) + continue; + printk("wait_queue is bad (eip = %p)\n", + __builtin_return_address(0)); + printk(" q = %p\n",q); + printk(" *q = %p\n",*q); + break; } - if (!next) - goto bad; } - return; -bad: - printk("wait_queue is bad (eip = %p)\n", - return_address()); - printk(" q = %p\n",q); - printk(" *q = %p\n",*q); + spin_unlock_irqrestore(&waitqueue_lock, flags); } /* * Semaphores are implemented using a two-way counter: * The "count" variable is decremented for each process - * that tries to sleep, while the "waiting" variable is - * incremented _while_ the process is sleeping on that - * semaphore. + * that tries to sleep, while the "waking" variable is + * incremented when the "up()" code goes to wake up waiting + * processes. * * Notably, the inline "up()" and "down()" functions can * efficiently test if they need to do any extra work (up * needs to do something only if count was negative before * the increment operation. - */ -static inline void normalize_semaphore(struct semaphore *sem) -{ - atomic_add(xchg(&sem->waiting,0), &sem->count); -} - -/* + * + * waking_non_zero() (from asm/semaphore.h) must execute + * atomically. + * * When __up() is called, the count was negative before - * incrementing it, and we need to wake up somebody. In - * most cases "waiting" will be positive, and the normalization - * will allow things to continue. However, if somebody has - * /just/ done a down(), it may be that count was negative - * without waiting being positive (or in the generic case - * "count is more negative than waiting is positive"), and - * the waiter needs to check this itself (see __down). + * incrementing it, and we need to wake up somebody. + * + * This routine adds one to the count of processes that need to + * wake up and exit. ALL waiting processes actually wake up but + * only the one that gets to the "waking" field first will gate + * through and acquire the semaphore. The others will go back + * to sleep. * * Note that these functions are only called when there is * contention on the lock, and as such all this is the @@ -525,55 +522,83 @@ static inline void normalize_semaphore(struct semaphore *sem) */ void __up(struct semaphore *sem) { - normalize_semaphore(sem); + wake_one_more(sem); wake_up(&sem->wait); } -void __down(struct semaphore * sem) +/* + * Perform the "down" function. Return zero for semaphore acquired, + * return negative for signalled out of the function. + * + * If called from __down, the return is ignored and the wait loop is + * not interruptible. This means that a task waiting on a semaphore + * using "down()" cannot be killed until someone does an "up()" on + * the semaphore. + * + * If called from __down_interruptible, the return value gets checked + * upon return. If the return value is negative then the task continues + * with the negative value in the return register (it can be tested by + * the caller). + * + * Either form may be used in conjunction with "up()". + * + */ +static inline int __do_down(struct semaphore * sem, int task_state) { struct task_struct *tsk = current; struct wait_queue wait = { tsk, NULL }; + int ret = 0; - /* - * The order here is important. We add ourselves to the - * wait queues and mark ourselves sleeping _first_. That - * way, if a "up()" comes in here, we'll either get - * woken up (up happens after the wait queues are set up) - * OR we'll have "waiting > 0". - */ - tsk->state = TASK_UNINTERRUPTIBLE; + tsk->state = task_state; add_wait_queue(&sem->wait, &wait); - atomic_inc(&sem->waiting); /* - * Ok, we're set up. The only race here is really that - * an "up()" might have incremented count before we got - * here, so we check "count+waiting". If that is larger - * than zero, we shouldn't sleep, but re-try the lock. + * Ok, we're set up. sem->count is known to be less than zero + * so we must wait. + * + * We can let go the lock for purposes of waiting. + * We re-acquire it after awaking so as to protect + * all semaphore operations. + * + * If "up()" is called before we call waking_non_zero() then + * we will catch it right away. If it is called later then + * we will have to go through a wakeup cycle to catch it. + * + * Multiple waiters contend for the semaphore lock to see + * who gets to gate through and who has to wait some more. */ - if (sem->count+sem->waiting <= 0) { - /* - * If "count+waiting" <= 0, we have to wait - * for a up(), which will normalize the count. - * Remember, at this point we have decremented - * count, and incremented up, so if count is - * zero or positive we need to return to re-try - * the lock. It _may_ be that both count and - * waiting is zero and that it is still locked, - * but we still want to re-try the lock in that - * case to make count go negative again so that - * the optimized "up()" wake_up sequence works. - */ - do { - schedule(); - tsk->state = TASK_UNINTERRUPTIBLE; - } while (sem->count < 0); + for (;;) { + if (waking_non_zero(sem)) /* are we waking up? */ + break; /* yes, exit loop */ + + if ( task_state == TASK_INTERRUPTIBLE + && (tsk->signal & ~tsk->blocked) /* signalled */ + ) { + ret = -EINTR; /* interrupted */ + atomic_inc(&sem->count); /* give up on down operation */ + break; + } + + schedule(); + tsk->state = task_state; } + tsk->state = TASK_RUNNING; remove_wait_queue(&sem->wait, &wait); - normalize_semaphore(sem); + return ret; +} + +void __down(struct semaphore * sem) +{ + __do_down(sem,TASK_UNINTERRUPTIBLE); } +int __down_interruptible(struct semaphore * sem) +{ + return __do_down(sem,TASK_INTERRUPTIBLE); +} + + static inline void __sleep_on(struct wait_queue **p, int state) { unsigned long flags; @@ -584,14 +609,14 @@ static inline void __sleep_on(struct wait_queue **p, int state) if (current == task[0]) panic("task[0] trying to sleep"); current->state = state; - save_flags(flags); - cli(); + spin_lock_irqsave(&waitqueue_lock, flags); __add_wait_queue(p, &wait); + spin_unlock(&waitqueue_lock); sti(); schedule(); - cli(); + spin_lock_irq(&waitqueue_lock); __remove_wait_queue(p, &wait); - restore_flags(flags); + spin_unlock_irqrestore(&waitqueue_lock, flags); } void interruptible_sleep_on(struct wait_queue **p) @@ -604,74 +629,178 @@ void sleep_on(struct wait_queue **p) __sleep_on(p,TASK_UNINTERRUPTIBLE); } -/* - * The head for the timer-list has a "expires" field of MAX_UINT, - * and the sorting routine counts on this.. - */ -static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL }; + +#define TVN_BITS 6 +#define TVR_BITS 8 +#define TVN_SIZE (1 << TVN_BITS) +#define TVR_SIZE (1 << TVR_BITS) +#define TVN_MASK (TVN_SIZE - 1) +#define TVR_MASK (TVR_SIZE - 1) + #define SLOW_BUT_DEBUGGING_TIMERS 0 -void add_timer(struct timer_list * timer) +struct timer_vec { + int index; + struct timer_list *vec[TVN_SIZE]; +}; + +struct timer_vec_root { + int index; + struct timer_list *vec[TVR_SIZE]; +}; + +static struct timer_vec tv5 = { 0 }; +static struct timer_vec tv4 = { 0 }; +static struct timer_vec tv3 = { 0 }; +static struct timer_vec tv2 = { 0 }; +static struct timer_vec_root tv1 = { 0 }; + +static struct timer_vec * const tvecs[] = { + (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5 +}; + +#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) + +static unsigned long timer_jiffies = 0; + +static inline void insert_timer(struct timer_list *timer, + struct timer_list **vec, int idx) +{ + if ((timer->next = vec[idx])) + vec[idx]->prev = timer; + vec[idx] = timer; + timer->prev = (struct timer_list *)&vec[idx]; +} + +static inline void internal_add_timer(struct timer_list *timer) +{ + /* + * must be cli-ed when calling this + */ + unsigned long expires = timer->expires; + unsigned long idx = expires - timer_jiffies; + + if (idx < TVR_SIZE) { + int i = expires & TVR_MASK; + insert_timer(timer, tv1.vec, i); + } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { + int i = (expires >> TVR_BITS) & TVN_MASK; + insert_timer(timer, tv2.vec, i); + } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; + insert_timer(timer, tv3.vec, i); + } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { + int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; + insert_timer(timer, tv4.vec, i); + } else if (expires < timer_jiffies) { + /* can happen if you add a timer with expires == jiffies, + * or you set a timer to go off in the past + */ + insert_timer(timer, tv1.vec, tv1.index); + } else if (idx < 0xffffffffUL) { + int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; + insert_timer(timer, tv5.vec, i); + } else { + /* Can only get here on architectures with 64-bit jiffies */ + timer->next = timer->prev = timer; + } +} + +static spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; + +void add_timer(struct timer_list *timer) { unsigned long flags; - struct timer_list *p; + spin_lock_irqsave(&timerlist_lock, flags); #if SLOW_BUT_DEBUGGING_TIMERS - if (timer->next || timer->prev) { - printk("add_timer() called with non-zero list from %p\n", - __builtin_return_address(0)); - return; - } + if (timer->next || timer->prev) { + printk("add_timer() called with non-zero list from %p\n", + __builtin_return_address(0)); + goto out; + } #endif - p = &timer_head; - save_flags(flags); - cli(); - do { - p = p->next; - } while (timer->expires > p->expires); - timer->next = p; - timer->prev = p->prev; - p->prev = timer; - timer->prev->next = timer; - restore_flags(flags); + internal_add_timer(timer); +#if SLOW_BUT_DEBUGGING_TIMERS +out: +#endif + spin_unlock_irqrestore(&timerlist_lock, flags); } -int del_timer(struct timer_list * timer) +static inline int detach_timer(struct timer_list *timer) { int ret = 0; - if (timer->next) { - unsigned long flags; - struct timer_list * next; - save_flags(flags); - cli(); - if ((next = timer->next) != NULL) { - (next->prev = timer->prev)->next = next; - timer->next = timer->prev = NULL; - ret = 1; - } - restore_flags(flags); + struct timer_list *next, *prev; + next = timer->next; + prev = timer->prev; + if (next) { + next->prev = prev; + } + if (prev) { + ret = 1; + prev->next = next; } return ret; } -static inline void run_timer_list(void) + +int del_timer(struct timer_list * timer) { - struct timer_list * timer; + int ret; + unsigned long flags; - cli(); - while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) { - void (*fn)(unsigned long) = timer->function; - unsigned long data = timer->data; - timer->next->prev = timer->prev; - timer->prev->next = timer->next; - timer->next = timer->prev = NULL; - sti(); - fn(data); - cli(); + spin_lock_irqsave(&timerlist_lock, flags); + ret = detach_timer(timer); + timer->next = timer->prev = 0; + spin_unlock_irqrestore(&timerlist_lock, flags); + return ret; +} + +static inline void cascade_timers(struct timer_vec *tv) +{ + /* cascade all the timers from tv up one level */ + struct timer_list *timer; + timer = tv->vec[tv->index]; + /* + * We are removing _all_ timers from the list, so we don't have to + * detach them individually, just clear the list afterwards. + */ + while (timer) { + struct timer_list *tmp = timer; + timer = timer->next; + internal_add_timer(tmp); + } + tv->vec[tv->index] = NULL; + tv->index = (tv->index + 1) & TVN_MASK; +} + +static inline void run_timer_list(void) +{ + spin_lock_irq(&timerlist_lock); + while ((long)(jiffies - timer_jiffies) >= 0) { + struct timer_list *timer; + if (!tv1.index) { + int n = 1; + do { + cascade_timers(tvecs[n]); + } while (tvecs[n]->index == 1 && ++n < NOOF_TVECS); + } + while ((timer = tv1.vec[tv1.index])) { + void (*fn)(unsigned long) = timer->function; + unsigned long data = timer->data; + detach_timer(timer); + timer->next = timer->prev = NULL; + spin_unlock_irq(&timerlist_lock); + fn(data); + spin_lock_irq(&timerlist_lock); + } + ++timer_jiffies; + tv1.index = (tv1.index + 1) & TVR_MASK; } - sti(); + spin_unlock_irq(&timerlist_lock); } + static inline void run_old_timers(void) { struct timer_struct *tp; @@ -690,6 +819,8 @@ static inline void run_old_timers(void) } } +spinlock_t tqueue_lock; + void tqueue_bh(void) { run_task_queue(&tq_timer); @@ -974,7 +1105,7 @@ static inline void do_it_prof(struct task_struct * p, unsigned long ticks) } } -static __inline__ void update_one_process(struct task_struct *p, +void update_one_process(struct task_struct *p, unsigned long ticks, unsigned long user, unsigned long system) { do_process_times(p, user, system); @@ -984,6 +1115,9 @@ static __inline__ void update_one_process(struct task_struct *p, static void update_process_times(unsigned long ticks, unsigned long system) { +/* + * SMP does this on a per-CPU basis elsewhere + */ #ifndef __SMP__ struct task_struct * p = current; unsigned long user = ticks - system; @@ -1000,79 +1134,35 @@ static void update_process_times(unsigned long ticks, unsigned long system) kstat.cpu_system += system; } update_one_process(p, ticks, user, system); -#else - int cpu,j; - cpu = smp_processor_id(); - for (j=0;j<smp_num_cpus;j++) - { - int i = cpu_logical_map[j]; - struct task_struct *p; - -#ifdef __SMP_PROF__ - if (test_bit(i,&smp_idle_map)) - smp_idle_count[i]++; -#endif - p = current_set[i]; - /* - * Do we have a real process? - */ - if (p->pid) { - /* assume user-mode process */ - unsigned long utime = ticks; - unsigned long stime = 0; - if (cpu == i) { - utime = ticks-system; - stime = system; - } else if (smp_proc_in_lock[j]) { - utime = 0; - stime = ticks; - } - update_one_process(p, ticks, utime, stime); - - if (p->priority < DEF_PRIORITY) - kstat.cpu_nice += utime; - else - kstat.cpu_user += utime; - kstat.cpu_system += stime; - - p->counter -= ticks; - if (p->counter >= 0) - continue; - p->counter = 0; - } else { - /* - * Idle processor found, do we have anything - * we could run? - */ - if (!(0x7fffffff & smp_process_available)) - continue; - } - /* Ok, we should reschedule, do the magic */ - if (i==cpu) - need_resched = 1; - else - smp_message_pass(i, MSG_RESCHEDULE, 0L, 0); - } #endif } -static unsigned long lost_ticks = 0; +volatile unsigned long lost_ticks = 0; static unsigned long lost_ticks_system = 0; static inline void update_times(void) { unsigned long ticks; + unsigned long flags; + + save_flags(flags); + cli(); - ticks = xchg(&lost_ticks, 0); + ticks = lost_ticks; + lost_ticks = 0; if (ticks) { unsigned long system; - system = xchg(&lost_ticks_system, 0); + calc_load(ticks); update_wall_time(ticks); + restore_flags(flags); + update_process_times(ticks, system); - } + + } else + restore_flags(flags); } static void timer_bh(void) @@ -1087,17 +1177,8 @@ void do_timer(struct pt_regs * regs) (*(unsigned long *)&jiffies)++; lost_ticks++; mark_bh(TIMER_BH); - if (!user_mode(regs)) { + if (!user_mode(regs)) lost_ticks_system++; - if (prof_buffer && current->pid) { - extern int _stext; - unsigned long ip = instruction_pointer(regs); - ip -= (unsigned long) &_stext; - ip >>= prof_shift; - if (ip < prof_len) - prof_buffer[ip]++; - } - } if (tq_timer) mark_bh(TQUEUE_BH); } @@ -1129,34 +1210,81 @@ asmlinkage unsigned int sys_alarm(unsigned int seconds) * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this * should be moved into arch/i386 instead? */ + asmlinkage int sys_getpid(void) { + /* This is SMP safe - current->pid doesnt change */ return current->pid; } +/* + * This is not strictly SMP safe: p_opptr could change + * from under us. However, rather than getting any lock + * we can use an optimistic algorithm: get the parent + * pid, and go back and check that the parent is still + * the same. If it has changed (which is extremely unlikely + * indeed), we just try again.. + * + * NOTE! This depends on the fact that even if we _do_ + * get an old value of "parent", we can happily dereference + * the pointer: we just can't necessarily trust the result + * until we know that the parent pointer is valid. + * + * The "mb()" macro is a memory barrier - a synchronizing + * event. It also makes sure that gcc doesn't optimize + * away the necessary memory references.. The barrier doesn't + * have to have all that strong semantics: on x86 we don't + * really require a synchronizing instruction, for example. + * The barrier is more important for code generation than + * for any real memory ordering semantics (even if there is + * a small window for a race, using the old pointer is + * harmless for a while). + */ asmlinkage int sys_getppid(void) { - return current->p_opptr->pid; + int pid; + struct task_struct * me = current; + struct task_struct * parent; + + parent = me->p_opptr; + for (;;) { + pid = parent->pid; +#if __SMP__ +{ + struct task_struct *old = parent; + mb(); + parent = me->p_opptr; + if (old != parent) + continue; +} +#endif + break; + } + return pid; } asmlinkage int sys_getuid(void) { + /* Only we change this so SMP safe */ return current->uid; } asmlinkage int sys_geteuid(void) { + /* Only we change this so SMP safe */ return current->euid; } asmlinkage int sys_getgid(void) { + /* Only we change this so SMP safe */ return current->gid; } asmlinkage int sys_getegid(void) { - return current->egid; + /* Only we change this so SMP safe */ + return current->egid; } /* @@ -1164,11 +1292,18 @@ asmlinkage int sys_getegid(void) * moved into the arch dependent tree for those ports that require * it for backward compatibility? */ + asmlinkage int sys_nice(int increment) { unsigned long newprio; int increase = 0; + /* + * Setpriority might change our priority at the same moment. + * We don't have to worry. Conceptually one call occurs first + * and we have a single winner. + */ + newprio = increment; if (increment < 0) { if (!suser()) @@ -1176,6 +1311,7 @@ asmlinkage int sys_nice(int increment) newprio = -increment; increase = 1; } + if (newprio > 40) newprio = 40; /* @@ -1189,6 +1325,14 @@ asmlinkage int sys_nice(int increment) increment = newprio; if (increase) increment = -increment; + /* + * Current->priority can change between this point + * and the assignment. We are assigning not doing add/subs + * so thats ok. Conceptually a process might just instantaneously + * read the value we stomp over. I don't think that is an issue + * unless posix makes it one. If so we can loop on changes + * to current->priority. + */ newprio = current->priority - increment; if ((signed) newprio < 1) newprio = 1; @@ -1206,13 +1350,15 @@ static struct task_struct *find_process_by_pid(pid_t pid) p = current; if (pid) { + read_lock(&tasklist_lock); for_each_task(p) { if (p->pid == pid) goto found; } p = NULL; - } found: + read_unlock(&tasklist_lock); + } return p; } @@ -1255,12 +1401,13 @@ static int setscheduler(pid_t pid, int policy, p->policy = policy; p->rt_priority = lp.sched_priority; - cli(); + spin_lock(&scheduler_lock); + spin_lock_irq(&runqueue_lock); if (p->next_run) move_last_runqueue(p); - sti(); - schedule(); - + spin_unlock_irq(&runqueue_lock); + spin_unlock(&scheduler_lock); + need_resched = 1; return 0; } @@ -1307,36 +1454,44 @@ asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param) asmlinkage int sys_sched_yield(void) { - cli(); + spin_lock(&scheduler_lock); + spin_lock_irq(&runqueue_lock); move_last_runqueue(current); - sti(); + spin_unlock_irq(&runqueue_lock); + spin_unlock(&scheduler_lock); + need_resched = 1; return 0; } asmlinkage int sys_sched_get_priority_max(int policy) { + int ret = -EINVAL; + switch (policy) { - case SCHED_FIFO: - case SCHED_RR: - return 99; - case SCHED_OTHER: - return 0; + case SCHED_FIFO: + case SCHED_RR: + ret = 99; + break; + case SCHED_OTHER: + ret = 0; + break; } - - return -EINVAL; + return ret; } asmlinkage int sys_sched_get_priority_min(int policy) { + int ret = -EINVAL; + switch (policy) { - case SCHED_FIFO: - case SCHED_RR: - return 1; - case SCHED_OTHER: - return 0; + case SCHED_FIFO: + case SCHED_RR: + ret = 1; + break; + case SCHED_OTHER: + ret = 0; } - - return -EINVAL; + return ret; } asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval) @@ -1344,9 +1499,10 @@ asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval) struct timespec t; t.tv_sec = 0; - t.tv_nsec = 0; /* <-- Linus, please fill correct value in here */ - return -ENOSYS; /* and then delete this line. Thanks! */ - return copy_to_user(interval, &t, sizeof(struct timespec)) ? -EFAULT : 0; + t.tv_nsec = 150000; + if (copy_to_user(interval, &t, sizeof(struct timespec))) + return -EFAULT; + return 0; } /* @@ -1369,33 +1525,35 @@ static void jiffiestotimespec(unsigned long jiffies, struct timespec *value) { value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ); value->tv_sec = jiffies / HZ; - return; } asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) { - int error; struct timespec t; unsigned long expire; - error = copy_from_user(&t, rqtp, sizeof(struct timespec)); - if (error) - return -EFAULT; + if(copy_from_user(&t, rqtp, sizeof(struct timespec))) + return -EFAULT; if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0) return -EINVAL; + if (t.tv_sec == 0 && t.tv_nsec <= 2000000L && - current->policy != SCHED_OTHER) { + current->policy != SCHED_OTHER) + { /* * Short delay requests up to 2 ms will be handled with * high precision by a busy wait for all real-time processes. + * + * Its important on SMP not to do this holding locks. */ udelay((t.tv_nsec + 999) / 1000); return 0; } expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies; + current->timeout = expire; current->state = TASK_INTERRUPTIBLE; schedule(); @@ -1405,11 +1563,10 @@ asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp) jiffiestotimespec(expire - jiffies - (expire > jiffies + 1), &t); if (copy_to_user(rmtp, &t, sizeof(struct timespec))) - return -EFAULT; + return -EFAULT; } return -EINTR; } - return 0; } @@ -1478,7 +1635,7 @@ void sched_init(void) * process right in SMP mode. */ int cpu=smp_processor_id(); -#ifndef __SMP__ +#ifndef __SMP__ current_set[cpu]=&init_task; #else init_task.processor=cpu; diff --git a/kernel/signal.c b/kernel/signal.c index 325663bed..3203ad39c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -12,6 +12,8 @@ #include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -19,61 +21,70 @@ #define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) -#if !defined(__alpha__) && !defined(__mips__) +#ifndef __alpha__ /* * This call isn't used by all ports, in particular, the Alpha * uses osf_sigprocmask instead. Maybe it should be moved into * arch-dependent dir? + * + * We don't need to get the kernel lock - this is all local to this + * particular thread.. (and that's good, because this is _heavily_ + * used by various programs) + * + * No SMP locking would prevent the inherent races present in this + * routine, thus we do not perform any locking at all. */ asmlinkage int sys_sigprocmask(int how, sigset_t *set, sigset_t *oset) { - sigset_t new_set, old_set = current->blocked; - int error; + sigset_t old_set = current->blocked; if (set) { - error = get_user(new_set, set); - if (error) - return error; + sigset_t new_set; + + if(get_user(new_set, set)) + return -EFAULT; + new_set &= _BLOCKABLE; switch (how) { + default: + return -EINVAL; case SIG_BLOCK: - current->blocked |= new_set; + new_set |= old_set; break; case SIG_UNBLOCK: - current->blocked &= ~new_set; + new_set = old_set & ~new_set; break; case SIG_SETMASK: - current->blocked = new_set; break; - default: - return -EINVAL; } + current->blocked = new_set; } if (oset) { - error = put_user(old_set, oset); - if (error) - return error; + if(put_user(old_set, oset)) + return -EFAULT; } return 0; } -#endif - -#ifndef __alpha__ /* * For backwards compatibility? Functionality superseded by sigprocmask. */ asmlinkage int sys_sgetmask(void) { + /* SMP safe */ return current->blocked; } asmlinkage int sys_ssetmask(int newmask) { - int old=current->blocked; + int old; + spin_lock_irq(¤t->sigmask_lock); + old = current->blocked; current->blocked = newmask & _BLOCKABLE; + spin_unlock_irq(¤t->sigmask_lock); + return old; } @@ -81,8 +92,13 @@ asmlinkage int sys_ssetmask(int newmask) asmlinkage int sys_sigpending(sigset_t *set) { - return put_user(current->blocked & current->signal, - /* Hack */(unsigned long *)set); + int ret; + + /* fill in "set" with signals pending but blocked. */ + spin_lock_irq(¤t->sigmask_lock); + ret = put_user(current->blocked & current->signal, set); + spin_unlock_irq(¤t->sigmask_lock); + return ret; } /* @@ -99,22 +115,24 @@ asmlinkage int sys_sigpending(sigset_t *set) * Note the silly behaviour of SIGCHLD: SIG_IGN means that the signal * isn't actually ignored, but does automatic child reaping, while * SIG_DFL is explicitly said by POSIX to force the signal to be ignored.. + * + * All callers of check_pending must be holding current->sig->siglock. */ -static inline void check_pending(int signum) +inline void check_pending(int signum) { struct sigaction *p; p = signum - 1 + current->sig->action; + spin_lock(¤t->sigmask_lock); if (p->sa_handler == SIG_IGN) { - k_sigdelset(¤t->signal, signum); - return; - } - if (p->sa_handler == SIG_DFL) { - if (signum != SIGCONT && signum != SIGCHLD && signum != SIGWINCH) - return; - k_sigdelset(¤t->signal, signum); - return; + current->signal &= ~_S(signum); + } else if (p->sa_handler == SIG_DFL) { + if (signum == SIGCONT || + signum == SIGCHLD || + signum != SIGWINCH) + current->signal &= ~_S(signum); } + spin_unlock(¤t->sigmask_lock); } #if !defined(__alpha__) && !defined(__mips__) @@ -123,69 +141,65 @@ static inline void check_pending(int signum) */ asmlinkage unsigned long sys_signal(int signum, __sighandler_t handler) { - int err; struct sigaction tmp; - /* - * HACK: We still cannot handle signals > 32 due to the limited - * size of ksigset_t (which will go away). - */ - if (signum > 32) - return -EINVAL; - if (signum<1 || signum>_NSIG) + if (signum<1 || signum>32) return -EINVAL; if (signum==SIGKILL || signum==SIGSTOP) return -EINVAL; if (handler != SIG_DFL && handler != SIG_IGN) { - err = verify_area(VERIFY_READ, handler, 1); - if (err) - return err; + if(verify_area(VERIFY_READ, handler, 1)) + return -EFAULT; } + memset(&tmp, 0, sizeof(tmp)); tmp.sa_handler = handler; tmp.sa_flags = SA_ONESHOT | SA_NOMASK; + + spin_lock_irq(¤t->sig->siglock); handler = current->sig->action[signum-1].sa_handler; current->sig->action[signum-1] = tmp; check_pending(signum); + spin_unlock_irq(¤t->sig->siglock); + return (unsigned long) handler; } #endif /* !defined(__alpha__) && !defined(__mips__) */ +#ifndef __sparc__ asmlinkage int sys_sigaction(int signum, const struct sigaction * action, struct sigaction * oldaction) { struct sigaction new_sa, *p; - /* - * HACK: We still cannot handle signals > 32 due to the limited - * size of ksigset_t (which will go away). - */ - if (signum > 32) - return -EINVAL; - if (signum<1 || signum>_NSIG) + if (signum < 1 || signum > 32) return -EINVAL; + p = signum - 1 + current->sig->action; + if (action) { - int err = verify_area(VERIFY_READ, action, sizeof(*action)); - if (err) - return err; + if (copy_from_user(&new_sa, action, sizeof(struct sigaction))) + return -EFAULT; if (signum==SIGKILL || signum==SIGSTOP) return -EINVAL; - if (copy_from_user(&new_sa, action, sizeof(struct sigaction))) - return -EFAULT; - if (new_sa.sa_handler != SIG_DFL && new_sa.sa_handler != SIG_IGN) { - err = verify_area(VERIFY_READ, new_sa.sa_handler, 1); - if (err) - return err; - } } + if (oldaction) { + /* In the clone() case we could copy half consistant + * state to the user, however this could sleep and + * deadlock us if we held the signal lock on SMP. So for + * now I take the easy way out and do no locking. + */ if (copy_to_user(oldaction, p, sizeof(struct sigaction))) return -EFAULT; } + if (action) { + spin_lock_irq(¤t->sig->siglock); *p = new_sa; check_pending(signum); + spin_unlock_irq(¤t->sig->siglock); } return 0; } +#endif diff --git a/kernel/softirq.c b/kernel/softirq.c index 022b55355..6b9b41aa5 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -15,40 +15,60 @@ #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/system.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/bitops.h> +#include <asm/atomic.h> -unsigned long intr_count = 0; +/* intr_count died a painless death... -DaveM */ int bh_mask_count[32]; unsigned long bh_active = 0; unsigned long bh_mask = 0; void (*bh_base[32])(void); - -asmlinkage void do_bottom_half(void) +/* + * This needs to make sure that only one bottom half handler + * is ever active at a time. We do this without locking by + * doing an atomic increment on the intr_count, and checking + * (nonatomically) against 1. Only if it's 1 do we schedule + * the bottom half. + * + * Note that the non-atomicity of the test (as opposed to the + * actual update) means that the test may fail, and _nobody_ + * runs the handlers if there is a race that makes multiple + * CPU's get here at the same time. That's ok, we'll run them + * next time around. + */ +static inline void run_bottom_halves(void) { unsigned long active; - unsigned long mask, left; void (**bh)(void); - sti(); + active = get_active_bhs(); + clear_active_bhs(active); bh = bh_base; - active = bh_active & bh_mask; - for (mask = 1, left = ~0 ; left & active ; bh++,mask += mask,left += left) { - if (mask & active) { - void (*fn)(void); - bh_active &= ~mask; - fn = *bh; - if (!fn) - goto bad_bh; - fn(); + do { + if (active & 1) + (*bh)(); + bh++; + active >>= 1; + } while (active); +} + +asmlinkage void do_bottom_half(void) +{ + int cpu = smp_processor_id(); + + if (hardirq_trylock(cpu)) { + if (softirq_trylock()) { + run_bottom_halves(); + softirq_endlock(); } + hardirq_endlock(cpu); } - return; -bad_bh: - printk ("irq.c:bad bottom half entry %08lx\n", mask); } diff --git a/kernel/sys.c b/kernel/sys.c index 8fcaba2de..934108fa8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -4,7 +4,6 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -21,9 +20,10 @@ #include <linux/fcntl.h> #include <linux/acct.h> #include <linux/tty.h> -#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF) -#include <linux/apm_bios.h> -#endif +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/notifier.h> +#include <linux/reboot.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -31,8 +31,30 @@ /* * this indicates whether you can reboot with ctrl-alt-del: the default is yes */ + int C_A_D = 1; + +/* + * Notifier list for kernel code which wants to be called + * at shutdown. This is used to stop any idling DMA operations + * and the like. + */ + +struct notifier_block *reboot_notifier_list = NULL; + +int register_reboot_notifier(struct notifier_block * nb) +{ + return notifier_chain_register(&reboot_notifier_list, nb); +} + +int unregister_reboot_notifier(struct notifier_block * nb) +{ + return notifier_chain_unregister(&reboot_notifier_list, nb); +} + + + extern void adjust_clock(void); asmlinkage int sys_ni_syscall(void) @@ -65,13 +87,14 @@ static int proc_sel(struct task_struct *p, int which, int who) asmlinkage int sys_setpriority(int which, int who, int niceval) { struct task_struct *p; - int error = ESRCH; unsigned int priority; + int error; if (which > 2 || which < 0) return -EINVAL; /* normalize: avoid signed division (rounding problems) */ + error = ESRCH; priority = niceval; if (niceval < 0) priority = -niceval; @@ -85,6 +108,7 @@ asmlinkage int sys_setpriority(int which, int who, int niceval) priority = 1; } + read_lock(&tasklist_lock); for_each_task(p) { if (!proc_sel(p, which, who)) continue; @@ -100,6 +124,8 @@ asmlinkage int sys_setpriority(int which, int who, int niceval) else p->priority = priority; } + read_unlock(&tasklist_lock); + return -error; } @@ -116,12 +142,14 @@ asmlinkage int sys_getpriority(int which, int who) if (which > 2 || which < 0) return -EINVAL; + read_lock(&tasklist_lock); for_each_task (p) { if (!proc_sel(p, which, who)) continue; if (p->priority > max_prio) max_prio = p->priority; } + read_unlock(&tasklist_lock); /* scale the priority from timeslice to 0..40 */ if (max_prio > 0) @@ -169,7 +197,7 @@ asmlinkage int sys_prof(void) #endif -extern asmlinkage sys_kill(int, int); +extern asmlinkage int sys_kill(int, int); /* * Reboot system call: for obvious reasons only root may call it, @@ -178,29 +206,70 @@ extern asmlinkage sys_kill(int, int); * You can also set the meaning of the ctrl-alt-del-key here. * * reboot doesn't sync: do that yourself before calling this. + * */ -asmlinkage int sys_reboot(int magic, int magic_too, int flag) +asmlinkage int sys_reboot(int magic1, int magic2, int cmd, void * arg) { + char buffer[256]; + + /* We only trust the superuser with rebooting the system. */ if (!suser()) return -EPERM; - if (magic != 0xfee1dead || magic_too != 672274793) + + /* For safety, we require "magic" arguments. */ + if (magic1 != LINUX_REBOOT_MAGIC1 || + (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A)) return -EINVAL; - if (flag == 0x01234567) - hard_reset_now(); - else if (flag == 0x89ABCDEF) + + lock_kernel(); + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); + printk(KERN_EMERG "Restarting system.\n"); + machine_restart(NULL); + break; + + case LINUX_REBOOT_CMD_CAD_ON: C_A_D = 1; - else if (!flag) + break; + + case LINUX_REBOOT_CMD_CAD_OFF: C_A_D = 0; - else if (flag == 0xCDEF0123) { - printk(KERN_EMERG "System halted\n"); - sys_kill(-1, SIGKILL); -#if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF) - apm_set_power_state(APM_STATE_OFF); -#endif + break; + + case LINUX_REBOOT_CMD_HALT: + notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); + printk(KERN_EMERG "System halted.\n"); + machine_halt(); do_exit(0); - } else + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); + printk(KERN_EMERG "Power down.\n"); + machine_power_off(); + do_exit(0); + break; + + case LINUX_REBOOT_CMD_RESTART2: + if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) { + unlock_kernel(); + return -EFAULT; + } + buffer[sizeof(buffer) - 1] = '\0'; + + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer); + printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer); + machine_restart(buffer); + break; + + default: + unlock_kernel(); return -EINVAL; - return (0); + break; + }; + unlock_kernel(); + return 0; } /* @@ -210,9 +279,10 @@ asmlinkage int sys_reboot(int magic, int magic_too, int flag) */ void ctrl_alt_del(void) { - if (C_A_D) - hard_reset_now(); - else + if (C_A_D) { + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); + machine_restart(NULL); + } else kill_proc(1, SIGINT, 1); } @@ -231,6 +301,9 @@ void ctrl_alt_del(void) * The general idea is that a program which uses just setregid() will be * 100% compatible with BSD. A program which uses just setgid() will be * 100% compatible with POSIX w/ Saved ID's. + * + * SMP: There are not races, the gid's are checked only by filesystem + * operations (as far as semantic preservation is concerned). */ asmlinkage int sys_setregid(gid_t rgid, gid_t egid) { @@ -243,7 +316,7 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid) suser()) current->gid = rgid; else - return(-EPERM); + return -EPERM; } if (egid != (gid_t) -1) { if ((old_rgid == egid) || @@ -253,7 +326,7 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid) current->fsgid = current->egid = egid; else { current->gid = old_rgid; - return(-EPERM); + return -EPERM; } } if (rgid != (gid_t) -1 || @@ -267,6 +340,8 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid) /* * setgid() is implemented like SysV w/ SAVED_IDS + * + * SMP: Same implicit races as above. */ asmlinkage int sys_setgid(gid_t gid) { @@ -278,6 +353,7 @@ asmlinkage int sys_setgid(gid_t gid) current->egid = current->fsgid = gid; else return -EPERM; + if (current->egid != old_egid) current->dumpable = 0; return 0; @@ -329,66 +405,70 @@ int acct_process(long exitcode) asmlinkage int sys_acct(const char *name) { - struct inode *inode = (struct inode *)0; - char *tmp; - int error; - - if (!suser()) - return -EPERM; - - if (name == (char *)0) { - if (acct_active) { - if (acct_file.f_op->release) - acct_file.f_op->release(acct_file.f_inode, &acct_file); - - if (acct_file.f_inode != (struct inode *) 0) - iput(acct_file.f_inode); - - acct_active = 0; - } - return 0; - } else { - if (!acct_active) { - - if ((error = getname(name, &tmp)) != 0) - return (error); - - error = open_namei(tmp, O_RDWR, 0600, &inode, 0); - putname(tmp); - - if (error) - return (error); - - if (!S_ISREG(inode->i_mode)) { - iput(inode); - return -EACCES; - } - - if (!inode->i_op || !inode->i_op->default_file_ops || - !inode->i_op->default_file_ops->write) { - iput(inode); - return -EIO; - } - - acct_file.f_mode = 3; - acct_file.f_flags = 0; - acct_file.f_count = 1; - acct_file.f_inode = inode; - acct_file.f_pos = inode->i_size; - acct_file.f_reada = 0; - acct_file.f_op = inode->i_op->default_file_ops; - - if (acct_file.f_op->open) - if (acct_file.f_op->open(acct_file.f_inode, &acct_file)) { - iput(inode); - return -EIO; - } - - acct_active = 1; - return 0; - } else - return -EBUSY; - } + struct inode *inode = (struct inode *)0; + char *tmp; + int error = -EPERM; + + lock_kernel(); + if (!suser()) + goto out; + + if (name == (char *)0) { + if (acct_active) { + if (acct_file.f_op->release) + acct_file.f_op->release(acct_file.f_inode, &acct_file); + + if (acct_file.f_inode != (struct inode *) 0) + iput(acct_file.f_inode); + + acct_active = 0; + } + error = 0; + } else { + error = -EBUSY; + if (!acct_active) { + if ((error = getname(name, &tmp)) != 0) + goto out; + + error = open_namei(tmp, O_RDWR, 0600, &inode, 0); + putname(tmp); + if (error) + goto out; + + error = -EACCES; + if (!S_ISREG(inode->i_mode)) { + iput(inode); + goto out; + } + + error = -EIO; + if (!inode->i_op || !inode->i_op->default_file_ops || + !inode->i_op->default_file_ops->write) { + iput(inode); + goto out; + } + + acct_file.f_mode = 3; + acct_file.f_flags = 0; + acct_file.f_count = 1; + acct_file.f_inode = inode; + acct_file.f_pos = inode->i_size; + acct_file.f_reada = 0; + acct_file.f_op = inode->i_op->default_file_ops; + + if(acct_file.f_op->open) + if(acct_file.f_op->open(acct_file.f_inode, &acct_file)) { + iput(inode); + goto out; + } + + acct_active = 1; + error = 0; + } + } +out: + unlock_kernel(); + return error; } #ifndef __alpha__ @@ -443,16 +523,18 @@ asmlinkage int sys_old_syscall(void) */ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid) { - int old_ruid = current->uid; - int old_euid = current->euid; + int old_ruid; + int old_euid; + old_ruid = current->uid; + old_euid = current->euid; if (ruid != (uid_t) -1) { if ((old_ruid == ruid) || (current->euid==ruid) || suser()) current->uid = ruid; else - return(-EPERM); + return -EPERM; } if (euid != (uid_t) -1) { if ((old_ruid == euid) || @@ -462,7 +544,7 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid) current->fsuid = current->euid = euid; else { current->uid = old_ruid; - return(-EPERM); + return -EPERM; } } if (ruid != (uid_t) -1 || @@ -495,9 +577,10 @@ asmlinkage int sys_setuid(uid_t uid) current->fsuid = current->euid = uid; else return -EPERM; + if (current->euid != old_euid) current->dumpable = 0; - return(0); + return 0; } @@ -538,6 +621,7 @@ asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid) if (!(retval = put_user(current->uid, ruid)) && !(retval = put_user(current->euid, euid))) retval = put_user(current->suid, suid); + return retval; } @@ -550,13 +634,15 @@ asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid) */ asmlinkage int sys_setfsuid(uid_t uid) { - int old_fsuid = current->fsuid; + int old_fsuid; + old_fsuid = current->fsuid; if (uid == current->uid || uid == current->euid || uid == current->suid || uid == current->fsuid || suser()) current->fsuid = uid; if (current->fsuid != old_fsuid) current->dumpable = 0; + return old_fsuid; } @@ -565,29 +651,35 @@ asmlinkage int sys_setfsuid(uid_t uid) */ asmlinkage int sys_setfsgid(gid_t gid) { - int old_fsgid = current->fsgid; + int old_fsgid; + old_fsgid = current->fsgid; if (gid == current->gid || gid == current->egid || gid == current->sgid || gid == current->fsgid || suser()) current->fsgid = gid; if (current->fsgid != old_fsgid) current->dumpable = 0; + return old_fsgid; } asmlinkage long sys_times(struct tms * tbuf) { - int error; - if (tbuf) { - error = put_user(current->utime,&tbuf->tms_utime); - if (!error) - error = put_user(current->stime,&tbuf->tms_stime); - if (!error) - error = put_user(current->cutime,&tbuf->tms_cutime); - if (!error) - error = put_user(current->cstime,&tbuf->tms_cstime); - if (error) - return error; + /* + * In the SMP world we might just be unlucky and have one of + * the times increment as we use it. Since the value is an + * atomically safe type this is just fine. Conceptually its + * as if the syscall took an instant longer to occur. + */ + if (tbuf) + { + /* ?? use copy_to_user() */ + if(!access_ok(VERIFY_READ, tbuf, sizeof(struct tms)) || + __put_user(current->utime,&tbuf->tms_utime)|| + __put_user(current->stime,&tbuf->tms_stime) || + __put_user(current->cutime,&tbuf->tms_cutime) || + __put_user(current->cstime,&tbuf->tms_cstime)) + return -EFAULT; } return jiffies; } @@ -604,9 +696,11 @@ asmlinkage long sys_times(struct tms * tbuf) * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. * LBT 04.03.94 */ + asmlinkage int sys_setpgid(pid_t pid, pid_t pgid) { struct task_struct * p; + int err = -EINVAL; if (!pid) pid = current->pid; @@ -614,82 +708,123 @@ asmlinkage int sys_setpgid(pid_t pid, pid_t pgid) pgid = pid; if (pgid < 0) return -EINVAL; + + read_lock(&tasklist_lock); for_each_task(p) { - if (p->pid == pid) + if (p->pid == pid) { + /* NOTE: I haven't dropped tasklist_lock, this is + * on purpose. -DaveM + */ goto found_task; + } } + read_unlock(&tasklist_lock); return -ESRCH; found_task: + /* From this point forward we keep holding onto the tasklist lock + * so that our parent does not change from under us. -DaveM + */ + err = -ESRCH; if (p->p_pptr == current || p->p_opptr == current) { + err = -EPERM; if (p->session != current->session) - return -EPERM; + goto out; + err = -EACCES; if (p->did_exec) - return -EACCES; + goto out; } else if (p != current) - return -ESRCH; + goto out; + err = -EPERM; if (p->leader) - return -EPERM; + goto out; if (pgid != pid) { struct task_struct * tmp; for_each_task (tmp) { if (tmp->pgrp == pgid && - tmp->session == current->session) + tmp->session == current->session) goto ok_pgid; } - return -EPERM; + goto out; } ok_pgid: p->pgrp = pgid; - return 0; + err = 0; +out: + /* All paths lead to here, thus we are safe. -DaveM */ + read_unlock(&tasklist_lock); + return err; } asmlinkage int sys_getpgid(pid_t pid) { - struct task_struct * p; - - if (!pid) + if (!pid) { return current->pgrp; - for_each_task(p) { - if (p->pid == pid) - return p->pgrp; + } else { + struct task_struct *p; + int ret = -ESRCH; + + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pid == pid) { + ret = p->pgrp; + break; + } + } + read_unlock(&tasklist_lock); + return ret; } - return -ESRCH; } asmlinkage int sys_getpgrp(void) { + /* SMP - assuming writes are word atomic this is fine */ return current->pgrp; } asmlinkage int sys_getsid(pid_t pid) { struct task_struct * p; - - if (!pid) - return current->session; - for_each_task(p) { - if (p->pid == pid) - return p->session; + int ret; + + /* SMP: The 'self' case requires no lock */ + if (!pid) { + ret = current->session; + } else { + ret = -ESRCH; + + read_lock(&tasklist_lock); + for_each_task(p) { + if (p->pid == pid) { + ret = p->session; + break; + } + } + read_unlock(&tasklist_lock); } - return -ESRCH; + return ret; } asmlinkage int sys_setsid(void) { struct task_struct * p; + int err = -EPERM; + read_lock(&tasklist_lock); for_each_task(p) { if (p->pgrp == current->pid) - return -EPERM; + goto out; } current->leader = 1; current->session = current->pgrp = current->pid; current->tty = NULL; current->tty_old_pgrp = 0; - return current->pgrp; + err = current->pgrp; +out: + read_unlock(&tasklist_lock); + return err; } /* @@ -698,6 +833,11 @@ asmlinkage int sys_setsid(void) asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist) { int i; + + /* + * SMP: Nobody else can change our grouplist. Thus we are + * safe. + */ if (gidsetsize < 0) return -EINVAL; @@ -711,21 +851,21 @@ asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist) return i; } +/* + * SMP: Our groups are not shared. We can copy to/from them safely + * without another task interfering. + */ + asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist) { - int err; - if (!suser()) return -EPERM; if ((unsigned) gidsetsize > NGROUPS) return -EINVAL; - err = copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)); - if (err) { - gidsetsize = 0; - err = -EFAULT; - } + if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t))) + return -EFAULT; current->ngroups = gidsetsize; - return err; + return 0; } int in_group_p(gid_t grp) @@ -762,53 +902,49 @@ asmlinkage int sys_newuname(struct new_utsname * name) * Move these to arch dependent dir since they are for * backward compatibility only? */ + +#ifndef __sparc__ asmlinkage int sys_uname(struct old_utsname * name) { - int error = -EFAULT;; if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) - error = 0; - return error; + return 0; + return -EFAULT; } +#endif asmlinkage int sys_olduname(struct oldold_utsname * name) { int error; + if (!name) return -EFAULT; - error = copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); - if (!error) - error = put_user(0,name->sysname+__OLD_UTS_LEN); - if (!error) - error = copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); - if (!error) - error = put_user(0,name->nodename+__OLD_UTS_LEN); - if (!error) - error = copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); - if (!error) - error = put_user(0,name->release+__OLD_UTS_LEN); - if (!error) - error = copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); - if (!error) - error = put_user(0,name->version+__OLD_UTS_LEN); - if (!error) - error = copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); - if (!error) - error = put_user(0,name->machine+__OLD_UTS_LEN); - return error ? -EFAULT : 0; + if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) + return -EFAULT; + + error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + error -= __put_user(0,name->sysname+__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __put_user(0,name->nodename+__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __put_user(0,name->release+__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __put_user(0,name->version+__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error = __put_user(0,name->machine+__OLD_UTS_LEN); + error = error ? -EFAULT : 0; + + return error; } #endif asmlinkage int sys_sethostname(char *name, int len) { - int error; - if (!suser()) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - error = copy_from_user(system_utsname.nodename, name, len); - if (error) + if(copy_from_user(system_utsname.nodename, name, len)) return -EFAULT; system_utsname.nodename[len] = 0; return 0; @@ -820,7 +956,7 @@ asmlinkage int sys_gethostname(char *name, int len) if (len < 0) return -EINVAL; - i = 1+strlen(system_utsname.nodename); + i = 1 + strlen(system_utsname.nodename); if (i > len) i = len; return copy_to_user(name, system_utsname.nodename, i) ? -EFAULT : 0; @@ -832,14 +968,11 @@ asmlinkage int sys_gethostname(char *name, int len) */ asmlinkage int sys_setdomainname(char *name, int len) { - int error; - if (!suser()) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - error = copy_from_user(system_utsname.domainname, name, len); - if (error) + if(copy_from_user(system_utsname.domainname, name, len)) return -EFAULT; system_utsname.domainname[len] = 0; return 0; @@ -849,20 +982,19 @@ asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim) { if (resource >= RLIM_NLIMITS) return -EINVAL; - return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim)) - ? -EFAULT : 0 ; + else + return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim)) + ? -EFAULT : 0; } asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim) { struct rlimit new_rlim, *old_rlim; - int err; if (resource >= RLIM_NLIMITS) return -EINVAL; - err = copy_from_user(&new_rlim, rlim, sizeof(*rlim)); - if (err) - return -EFAULT; + if(copy_from_user(&new_rlim, rlim, sizeof(*rlim))) + return -EFAULT; old_rlim = current->rlim + resource; if (((new_rlim.rlim_cur > old_rlim->rlim_max) || (new_rlim.rlim_max > old_rlim->rlim_max)) && @@ -883,6 +1015,13 @@ asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim) * make sense to do this. It will make moving the rest of the information * a lot simpler! (Which we're not doing right now because we're not * measuring them yet). + * + * This is SMP safe. Either we are called from sys_getrusage on ourselves + * below (we know we aren't going to exit/disappear and only we change our + * rusage counters), or we are called from wait4() on a process which is + * either stopped or zombied. In the zombied case the task won't get + * reaped till shortly after the call to getrusage(), in both cases the + * task being examined is in a frozen state so the counters won't change. */ int getrusage(struct task_struct *p, int who, struct rusage *ru) { @@ -930,8 +1069,6 @@ asmlinkage int sys_getrusage(int who, struct rusage *ru) asmlinkage int sys_umask(int mask) { - int old = current->fs->umask; - - current->fs->umask = mask & S_IRWXUGO; - return (old); + mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); + return mask; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3d0fbf49b..9e0bb0fd8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -7,6 +7,8 @@ * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver. * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver. * Dynamic registration fixes, Stephen Tweedie. + * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn. + * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris Horn. */ #include <linux/config.h> @@ -20,28 +22,49 @@ #include <linux/ctype.h> #include <linux/utsname.h> #include <linux/swapctl.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/bitops.h> #include <asm/uaccess.h> +#ifdef CONFIG_ROOT_NFS +#include <linux/nfs_fs.h> +#endif + +#ifdef CONFIG_SYSCTL + /* External variables not in a header file. */ extern int panic_timeout; +extern int console_loglevel, default_message_loglevel; +extern int minimum_console_loglevel, default_console_loglevel; +extern int C_A_D, swapout_interval; +extern int bdf_prm[], bdflush_min[], bdflush_max[]; +extern char binfmt_java_interpreter[], binfmt_java_appletviewer[]; +extern int sysctl_overcommit_memory; - -#ifdef CONFIG_ROOT_NFS -#include <linux/nfs_fs.h> +#ifdef __sparc__ +extern char reboot_command []; #endif +static int parse_table(int *, int, void *, size_t *, void *, size_t, + ctl_table *, void **); +static int do_securelevel_strategy (ctl_table *, int *, int, void *, size_t *, + void *, size_t, void **); + + static ctl_table root_table[]; static struct ctl_table_header root_table_header = {root_table, DNODE_SINGLE(&root_table_header)}; -static int parse_table(int *, int, void *, size_t *, void *, size_t, - ctl_table *, void **); - static ctl_table kern_table[]; static ctl_table vm_table[]; extern ctl_table net_table[]; +static ctl_table proc_table[]; +static ctl_table fs_table[]; +static ctl_table debug_table[]; +static ctl_table dev_table[]; + /* /proc declarations: */ @@ -59,7 +82,7 @@ struct file_operations proc_sys_file_operations = proc_readsys, /* read */ proc_writesys, /* write */ NULL, /* readdir */ - NULL, /* select */ + NULL, /* poll */ NULL, /* ioctl */ NULL, /* mmap */ NULL, /* no special open code */ @@ -94,19 +117,16 @@ static void register_proc_table(ctl_table *, struct proc_dir_entry *); static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); #endif -extern int bdf_prm[], bdflush_min[], bdflush_max[]; - -static int do_securelevel_strategy (ctl_table *, int *, int, void *, size_t *, - void *, size_t, void **); - -extern char binfmt_java_interpreter[], binfmt_java_appletviewer[]; - /* The default sysctl tables: */ static ctl_table root_table[] = { {CTL_KERN, "kernel", NULL, 0, 0555, kern_table}, {CTL_VM, "vm", NULL, 0, 0555, vm_table}, {CTL_NET, "net", NULL, 0, 0555, net_table}, + {CTL_PROC, "proc", NULL, 0, 0555, proc_table}, + {CTL_FS, "fs", NULL, 0, 0555, fs_table}, + {CTL_DEBUG, "debug", NULL, 0, 0555, debug_table}, + {CTL_DEV, "dev", NULL, 0, 0555, dev_table}, {0} }; @@ -140,7 +160,7 @@ static ctl_table kern_table[] = { #ifdef CONFIG_ROOT_NFS {KERN_NFSRNAME, "nfs-root-name", nfs_root_name, NFS_ROOT_NAME_LEN, 0644, NULL, &proc_dostring, &sysctl_string }, - {KERN_NFSRNAME, "nfs-root-addrs", nfs_root_addrs, NFS_ROOT_ADDRS_LEN, + {KERN_NFSRADDRS, "nfs-root-addrs", nfs_root_addrs, NFS_ROOT_ADDRS_LEN, 0644, NULL, &proc_dostring, &sysctl_string }, #endif #ifdef CONFIG_BINFMT_JAVA @@ -149,22 +169,47 @@ static ctl_table kern_table[] = { {KERN_JAVA_APPLETVIEWER, "java-appletviewer", binfmt_java_appletviewer, 64, 0644, NULL, &proc_dostring, &sysctl_string }, #endif +#ifdef __sparc__ + {KERN_SPARC_REBOOT, "reboot-cmd", reboot_command, + 256, 0644, NULL, &proc_dostring, &sysctl_string }, +#endif + {KERN_CTLALTDEL, "ctrl-alt-del", &C_A_D, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_PRINTK, "printk", &console_loglevel, 4*sizeof(int), + 0644, NULL, &proc_dointvec}, {0} }; static ctl_table vm_table[] = { {VM_SWAPCTL, "swapctl", &swap_control, sizeof(swap_control_t), 0600, NULL, &proc_dointvec}, - {VM_KSWAPD, "kswapd", - &kswapd_ctl, sizeof(kswapd_ctl), 0600, NULL, &proc_dointvec}, {VM_FREEPG, "freepages", &min_free_pages, 3*sizeof(int), 0600, NULL, &proc_dointvec}, {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, &bdflush_min, &bdflush_max}, + {VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory, + sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec}, + {0} +}; + +static ctl_table proc_table[] = { + {0} +}; + +static ctl_table fs_table[] = { + {0} +}; + +static ctl_table debug_table[] = { {0} }; +static ctl_table dev_table[] = { + {0} +}; + + void sysctl_init(void) { #ifdef CONFIG_PROC_FS @@ -184,25 +229,17 @@ int do_sysctl (int *name, int nlen, if (nlen == 0 || nlen >= CTL_MAXNAME) return -ENOTDIR; - error = verify_area(VERIFY_READ,name,nlen*sizeof(int)); - if (error) return error; - if (oldval) { + if (oldval) + { int old_len; if (!oldlenp) return -EFAULT; - error = verify_area(VERIFY_WRITE,oldlenp,sizeof(size_t)); - if (error) return error; - get_user(old_len, oldlenp); - error = verify_area(VERIFY_WRITE,oldval,old_len); - if (error) return error; - } - if (newval) { - error = verify_area(VERIFY_READ,newval,newlen); - if (error) return error; + if(get_user(old_len, oldlenp)) + return -EFAULT; } tmp = &root_table_header; do { - context = 0; + context = NULL; error = parse_table(name, nlen, oldval, oldlenp, newval, newlen, tmp->ctl_table, &context); if (context) @@ -218,12 +255,15 @@ extern asmlinkage int sys_sysctl(struct __sysctl_args *args) { struct __sysctl_args tmp; int error; - error = verify_area(VERIFY_READ, args, sizeof(*args)); - if (error) - return error; - copy_from_user(&tmp, args, sizeof(tmp)); - return do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp, - tmp.newval, tmp.newlen); + + if(copy_from_user(&tmp, args, sizeof(tmp))) + return -EFAULT; + + lock_kernel(); + error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp, + tmp.newval, tmp.newlen); + unlock_kernel(); + return error; } /* Like in_group_p, but testing against egid, not fsgid */ @@ -248,6 +288,7 @@ out: /* ctl_perm does NOT grant the superuser all rights automatically, because some sysctl variables are readonly even to root. */ + static int test_perm(int mode, int op) { if (!current->euid) @@ -258,6 +299,7 @@ static int test_perm(int mode, int op) return 0; return -EACCES; } + static inline int ctl_perm(ctl_table *table, int op) { return test_perm(table->mode, op); @@ -275,7 +317,8 @@ repeat: for ( ; table->ctl_name; table++) { int n; - get_user(n,name); + if(get_user(n,name)) + return -EFAULT; if (n == table->ctl_name || table->ctl_name == CTL_ANY) { if (table->child) { @@ -335,15 +378,18 @@ int do_sysctl_strategy (ctl_table *table, if (len) { if (len > table->maxlen) len = table->maxlen; - copy_to_user(oldval, table->data, len); - put_user(len, oldlenp); + if(copy_to_user(oldval, table->data, len)) + return -EFAULT; + if(put_user(len, oldlenp)) + return -EFAULT; } } if (newval && newlen) { len = newlen; if (len > table->maxlen) len = table->maxlen; - copy_from_user(table->data, newval, len); + if(copy_from_user(table->data, newval, len)) + return -EFAULT; } } return 0; @@ -364,7 +410,8 @@ static int do_securelevel_strategy (ctl_table *table, if (newval && newlen) { if (newlen != sizeof (int)) return -EINVAL; - copy_from_user (&level, newval, newlen); + if(copy_from_user (&level, newval, newlen)) + return -EFAULT; if (level < securelevel && current->pid != 1) return -EPERM; } @@ -406,11 +453,12 @@ void unregister_sysctl_table(struct ctl_table_header * table) /* Scan the sysctl entries in table and add them all into /proc */ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) { - struct proc_dir_entry *de, *tmp; - int exists; + struct proc_dir_entry *de; + int len; + mode_t mode; for (; table->ctl_name; table++) { - exists = 0; + de = 0; /* Can't do anything without a proc name. */ if (!table->procname) continue; @@ -418,46 +466,32 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) if (!table->proc_handler && !table->child) continue; - - de = kmalloc(sizeof(*de), GFP_KERNEL); - if (!de) continue; - de->namelen = strlen(table->procname); - de->name = table->procname; - de->mode = table->mode; - de->nlink = 1; - de->uid = 0; - de->gid = 0; - de->size = 0; - de->get_info = 0; /* For internal use if we want it */ - de->fill_inode = 0; /* To override struct inode fields */ - de->next = de->subdir = 0; - de->data = (void *) table; - /* Is it a file? */ - if (table->proc_handler) { - de->ops = &proc_sys_inode_operations; - de->mode |= S_IFREG; - } - /* Otherwise it's a subdir */ - else { - /* First check to see if it already exists */ - for (tmp = root->subdir; tmp; tmp = tmp->next) { - if (tmp->namelen == de->namelen && - !memcmp(tmp->name,de->name,de->namelen)) { - exists = 1; - kfree (de); - de = tmp; - } - } - if (!exists) { - de->ops = &proc_dir_inode_operations; - de->nlink++; - de->mode |= S_IFDIR; + + len = strlen(table->procname); + mode = table->mode; + + if (table->proc_handler) + mode |= S_IFREG; + else { + mode |= S_IFDIR; + for (de = root->subdir; de; de = de->next) { + if (proc_match(len, table->procname, de)) + break; } + /* If the subdir exists already, de is non-NULL */ + } + + if (!de) { + de = create_proc_entry(table->procname, mode, root); + if (!de) + continue; + de->data = (void *) table; + if (table->proc_handler) + de->ops = &proc_sys_inode_operations; + } table->de = de; - if (!exists) - proc_register_dynamic(root, de); - if (de->mode & S_IFDIR ) + if (de->mode & S_IFDIR) register_proc_table(table->child, de); } } @@ -494,10 +528,6 @@ static long do_rw_proc(int write, struct inode * inode, struct file * file, size_t res; long error; - error = verify_area(write ? VERIFY_READ : VERIFY_WRITE, buf, count); - if (error) - return error; - de = (struct proc_dir_entry*) inode->u.generic_ip; if (!de || !de->data) return -ENOTDIR; @@ -548,14 +578,16 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, len = 0; p = buffer; while (len < *lenp) { - get_user(c, p++); + if(get_user(c, p++)) + return -EFAULT; if (c == 0 || c == '\n') break; len++; } if (len >= table->maxlen) len = table->maxlen-1; - copy_from_user(table->data, buffer, len); + if(copy_from_user(table->data, buffer, len)) + return -EFAULT; ((char *) table->data)[len] = 0; filp->f_pos += *lenp; } else { @@ -565,9 +597,11 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, if (len > *lenp) len = *lenp; if (len) - copy_to_user(buffer, table->data, len); + if(copy_to_user(buffer, table->data, len)) + return -EFAULT; if (len < *lenp) { - put_user('\n', ((char *) buffer) + len); + if(put_user('\n', ((char *) buffer) + len)) + return -EFAULT; len++; } *lenp = len; @@ -597,7 +631,8 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp, if (write) { while (left) { char c; - get_user(c,(char *) buffer); + if(get_user(c,(char *) buffer)) + return -EFAULT; if (!isspace(c)) break; left--; @@ -609,7 +644,8 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp, len = left; if (len > TMPBUFLEN-1) len = TMPBUFLEN-1; - copy_from_user(buf, buffer, len); + if(copy_from_user(buf, buffer, len)) + return -EFAULT; buf[len] = 0; p = buf; if (*p == '-' && left > 1) { @@ -635,21 +671,24 @@ int proc_dointvec(ctl_table *table, int write, struct file *filp, len = strlen(buf); if (len > left) len = left; - copy_to_user(buffer, buf, len); + if(copy_to_user(buffer, buf, len)) + return -EFAULT; left -= len; buffer += len; } } if (!write && !first && left) { - put_user('\n', (char *) buffer); + if(put_user('\n', (char *) buffer)) + return -EFAULT; left--, buffer++; } if (write) { p = (char *) buffer; while (left) { char c; - get_user(c, p++); + if(get_user(c, p++)) + return -EFAULT; if (!isspace(c)) break; left--; @@ -685,7 +724,8 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, if (write) { while (left) { char c; - get_user(c, (char *) buffer); + if(get_user(c, (char *) buffer)) + return -EFAULT; if (!isspace(c)) break; left--; @@ -697,7 +737,8 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, len = left; if (len > TMPBUFLEN-1) len = TMPBUFLEN-1; - copy_from_user(buf, buffer, len); + if(copy_from_user(buf, buffer, len)) + return -EFAULT; buf[len] = 0; p = buf; if (*p == '-' && left > 1) { @@ -728,21 +769,24 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, len = strlen(buf); if (len > left) len = left; - copy_to_user(buffer, buf, len); + if(copy_to_user(buffer, buf, len)) + return -EFAULT; left -= len; buffer += len; } } if (!write && !first && left) { - put_user('\n', (char *) buffer); + if(put_user('\n', (char *) buffer)) + return -EFAULT; left--, buffer++; } if (write) { p = (char *) buffer; while (left) { char c; - get_user(c, p++); + if(get_user(c, p++)) + return -EFAULT; if (!isspace(c)) break; left--; @@ -793,22 +837,27 @@ int sysctl_string(ctl_table *table, int *name, int nlen, return -ENOTDIR; if (oldval && oldlenp) { - get_user(len, oldlenp); + if(get_user(len, oldlenp)) + return -EFAULT; if (len) { l = strlen(table->data); if (len > l) len = l; if (len >= table->maxlen) len = table->maxlen; - copy_to_user(oldval, table->data, len); - put_user(0, ((char *) oldval) + len); - put_user(len, oldlenp); + if(copy_to_user(oldval, table->data, len)) + return -EFAULT; + if(put_user(0, ((char *) oldval) + len)) + return -EFAULT; + if(put_user(len, oldlenp)) + return -EFAULT; } } if (newval && newlen) { len = newlen; if (len > table->maxlen) len = table->maxlen; - copy_from_user(table->data, newval, len); + if(copy_from_user(table->data, newval, len)) + return -EFAULT; if (len == table->maxlen) len--; ((char *) table->data)[len] = 0; @@ -865,14 +914,16 @@ int do_string ( return -EINVAL; if (oldval) { int old_l; - get_user(old_l, oldlenp); + if(get_user(old_l, oldlenp)) + return -EFAULT; if (l > old_l) return -ENOMEM; - put_user(l, oldlenp); - copy_to_user(oldval, data, l); + if(put_user(l, oldlenp) || copy_to_user(oldval, data, l)) + return -EFAULT; } if (newval) { - copy_from_user(data, newval, newlen); + if(copy_from_user(data, newval, newlen)) + return -EFAULT; data[newlen] = 0; } return 0; @@ -888,14 +939,16 @@ int do_int ( return -EINVAL; if (oldval) { int old_l; - get_user(old_l, oldlenp); + if(get_user(old_l, oldlenp)) + return -EFAULT; if (old_l < sizeof(int)) return -ENOMEM; - put_user(sizeof(int), oldlenp); - copy_to_user(oldval, data, sizeof(int)); + if(put_user(sizeof(int), oldlenp)||copy_to_user(oldval, data, sizeof(int))) + return -EFAULT; } if (newval) - copy_from_user(data, newval, sizeof(int)); + if(copy_from_user(data, newval, sizeof(int))) + return -EFAULT; return 0; } @@ -909,14 +962,71 @@ int do_struct ( return -EINVAL; if (oldval) { int old_l; - get_user(old_l, oldlenp); + if(get_user(old_l, oldlenp)) + return -EFAULT; if (old_l < len) return -ENOMEM; - put_user(len, oldlenp); - copy_to_user(oldval, data, len); + if(put_user(len, oldlenp) || copy_to_user(oldval, data, len)) + return -EFAULT; } if (newval) - copy_from_user(data, newval, len); + if(copy_from_user(data, newval, len)) + return -EFAULT; return 0; } + +#else /* CONFIG_SYSCTL */ + + +extern asmlinkage int sys_sysctl(struct __sysctl_args *args) +{ + return -ENOSYS; +} + +int sysctl_string(ctl_table *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + return -ENOSYS; +} + +int sysctl_intvec(ctl_table *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + return -ENOSYS; +} + +int proc_dostring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return -ENOSYS; +} + +struct ctl_table_header * register_sysctl_table(ctl_table * table, + int insert_at_head) +{ + return 0; +} + +void unregister_sysctl_table(struct ctl_table_header * table) +{ +} + +#endif /* CONFIG_SYSCTL */ + + + diff --git a/kernel/time.c b/kernel/time.c index c2090a583..d20fdbd98 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -25,6 +25,8 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/timex.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -64,6 +66,8 @@ asmlinkage int sys_time(int * tloc) { int i; + /* SMP: This is fairly trivial. We grab CURRENT_TIME and + stuff it to user space. No side effects */ i = CURRENT_TIME; if (tloc) { if (put_user(i,tloc)) @@ -78,6 +82,7 @@ asmlinkage int sys_time(int * tloc) * why not move it into the appropriate arch directory (for those * architectures that need it). */ + asmlinkage int sys_stime(int * tptr) { int value; @@ -154,6 +159,7 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz) if (!suser()) return -EPERM; + if (tv) { if (copy_from_user(&new_tv, tv, sizeof(*tv))) return -EFAULT; @@ -161,6 +167,8 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz) if (tz) { if (copy_from_user(&new_tz, tz, sizeof(*tz))) return -EFAULT; + + /* SMP safe, global irq locking makes it work. */ sys_tz = new_tz; if (firsttime) { firsttime = 0; @@ -169,7 +177,12 @@ asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz) } } if (tv) + { + /* SMP safe, again the code in arch/foo/time.c should + * globally block out interrupts when it runs. + */ do_settimeofday(&new_tv); + } return 0; } @@ -197,37 +210,32 @@ void (*hardpps_ptr)(struct timeval *) = (void (*)(struct timeval *))0; asmlinkage int sys_adjtimex(struct timex *txc_p) { long ltemp, mtemp, save_adjust; - int error; - - /* Local copy of parameter */ - struct timex txc; + struct timex txc; /* Local copy of parameter */ /* Copy the user data space into the kernel copy * structure. But bear in mind that the structures * may change */ - error = copy_from_user(&txc, txc_p, sizeof(struct timex)); - if (error) - return -EFAULT; + if(copy_from_user(&txc, txc_p, sizeof(struct timex))) + return -EFAULT; /* In order to modify anything, you gotta be super-user! */ if (txc.modes && !suser()) return -EPERM; - - /* Now we validate the data before disabling interrupts - */ + + /* Now we validate the data before disabling interrupts */ if (txc.modes != ADJ_OFFSET_SINGLESHOT && (txc.modes & ADJ_OFFSET)) /* adjustment Offset limited to +- .512 seconds */ - if (txc.offset <= - MAXPHASE || txc.offset >= MAXPHASE ) - return -EINVAL; + if (txc.offset <= - MAXPHASE || txc.offset >= MAXPHASE ) + return -EINVAL; /* if the quartz is off by more than 10% something is VERY wrong ! */ if (txc.modes & ADJ_TICK) - if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ) - return -EINVAL; + if (txc.tick < 900000/HZ || txc.tick > 1100000/HZ) + return -EINVAL; - cli(); + cli(); /* SMP: global cli() is enough protection. */ /* Save for later - semantics of adjtime is to return old value */ save_adjust = time_adjust; |