diff options
author | Ralf Baechle <ralf@linux-mips.org> | 1998-08-25 09:12:35 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 1998-08-25 09:12:35 +0000 |
commit | c7fc24dc4420057f103afe8fc64524ebc25c5d37 (patch) | |
tree | 3682407a599b8f9f03fc096298134cafba1c9b2f /kernel | |
parent | 1d793fade8b063fde3cf275bf1a5c2d381292cd9 (diff) |
o Merge with Linux 2.1.116.
o New Newport console code.
o New G364 console code.
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 2 | ||||
-rw-r--r-- | kernel/capability.c | 129 | ||||
-rw-r--r-- | kernel/exit.c | 81 | ||||
-rw-r--r-- | kernel/fork.c | 186 | ||||
-rw-r--r-- | kernel/kmod.c | 57 | ||||
-rw-r--r-- | kernel/ksyms.c | 21 | ||||
-rw-r--r-- | kernel/module.c | 5 | ||||
-rw-r--r-- | kernel/panic.c | 5 | ||||
-rw-r--r-- | kernel/printk.c | 5 | ||||
-rw-r--r-- | kernel/resource.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 167 | ||||
-rw-r--r-- | kernel/signal.c | 64 | ||||
-rw-r--r-- | kernel/sys.c | 130 | ||||
-rw-r--r-- | kernel/sysctl.c | 75 | ||||
-rw-r--r-- | kernel/time.c | 3 |
15 files changed, 594 insertions, 338 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 6a00f3571..c5b161ae0 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -257,7 +257,7 @@ int acct_process(long exitcode) /* * First check to see if there is enough free_space to continue the process - * accounting system. Check_free_space toggle's the acct_active flag so we + * accounting system. Check_free_space toggles the acct_active flag so we * need to check that after check_free_space. */ check_free_space(); diff --git a/kernel/capability.c b/kernel/capability.c index ddbfaa87b..60d4ed6b5 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -5,7 +5,6 @@ * Integrated into 2.1.97+, Andrew G. Morgan <morgan@transmeta.com> */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/capability.h> @@ -15,20 +14,8 @@ #include <asm/uaccess.h> -static inline void cap_fromuser(kernel_cap_t *k, __u32 *u) -{ - copy_from_user(k, u, sizeof(*k)); -} - - -static inline void cap_touser(__u32 *u, const kernel_cap_t *k) -{ - copy_to_user(u, k, sizeof(*k)); -} - -#ifdef __SMP__ -static spinlock_t task_capability_lock; -#endif +/* Note: never hold tasklist_lock while spinning for this one */ +spinlock_t task_capability_lock; /* * For sys_getproccap() and sys_setproccap(), any of the three @@ -36,65 +23,58 @@ static spinlock_t task_capability_lock; * uninteresting and/or not to be changed. */ -asmlinkage int sys_capget(cap_user_header_t header, cap_user_data_t data) +asmlinkage int sys_capget(cap_user_header_t header, cap_user_data_t dataptr) { - int error = -EINVAL, pid; + int error, pid; __u32 version; struct task_struct *target; + struct __user_cap_data_struct data; - if (!access_ok(VERIFY_WRITE, &header->version, sizeof(*header))) { - /* not large enough for current header so indicate error */ - if (access_ok(VERIFY_WRITE, &header->version, - sizeof(header->version))) { - return error; - } - goto all_done; - } - - copy_from_user(&version, &header->version, sizeof(header->version)); + if (get_user(version, &header->version)) + return -EFAULT; + + error = -EINVAL; if (version != _LINUX_CAPABILITY_VERSION) { - /* if enough space for kernel version, write that */ - - all_done: version = _LINUX_CAPABILITY_VERSION; - copy_to_user(&header->version, &version, - sizeof(header->version)); + if (put_user(version, &header->version)) + error = -EFAULT; return error; } - if (!access_ok(VERIFY_WRITE, data, sizeof(*data))) { - return error; - } + if (get_user(pid, &header->pid)) + return -EFAULT; - copy_from_user(&pid, &header->pid, sizeof(header->pid)); - if (pid < 0) { - return error; - } + if (pid < 0) + return -EINVAL; + + error = 0; spin_lock(&task_capability_lock); if (pid && pid != current->pid) { - read_lock(&tasklist_lock); + read_lock(&tasklist_lock); target = find_task_by_pid(pid); /* identify target of query */ - if (!target) { + if (!target) error = -ESRCH; - goto out; - } } else { target = current; } - cap_touser(&data->permitted, &target->cap_permitted); - cap_touser(&data->inheritable, &target->cap_inheritable); - cap_touser(&data->effective, &target->cap_effective); + if (!error) { + data.permitted = target->cap_permitted.cap; + data.inheritable = target->cap_inheritable.cap; + data.effective = target->cap_effective.cap; + } - error = 0; + if (target != current) + read_unlock(&tasklist_lock); + spin_unlock(&task_capability_lock); -out: - if (target != current) { - read_unlock(&tasklist_lock); + if (!error) { + if (copy_to_user(dataptr, &data, sizeof data)) + return -EFAULT; } - spin_unlock(&task_capability_lock); + return error; } @@ -155,38 +135,31 @@ asmlinkage int sys_capset(cap_user_header_t header, const cap_user_data_t data) kernel_cap_t inheritable, permitted, effective; __u32 version; struct task_struct *target; - int error = -EINVAL, pid; - - if (!access_ok(VERIFY_WRITE, &header->version, sizeof(*header))) { - /* not large enough for current header so indicate error */ - if (!access_ok(VERIFY_WRITE, &header->version, - sizeof(header->version))) { - return error; - } - goto all_done; - } + int error, pid; - copy_from_user(&version, &header->version, sizeof(header->version)); - if (version != _LINUX_CAPABILITY_VERSION) { + if (get_user(version, &header->version)) + return -EFAULT; - all_done: + if (version != _LINUX_CAPABILITY_VERSION) { version = _LINUX_CAPABILITY_VERSION; - copy_to_user(&header->version, &version, - sizeof(header->version)); - return error; - } - - if (!access_ok(VERIFY_READ, data, sizeof(*data))) { - return error; + if (put_user(version, &header->version)) + return -EFAULT; + return -EINVAL; } /* may want to set other processes at some point -- for now demand 0 */ - copy_from_user(&pid, &header->pid, sizeof(pid)); + if (get_user(pid, &header->pid)) + return -EFAULT; - error = -EPERM; if (pid && !capable(CAP_SETPCAP)) - return error; + return -EPERM; + if (copy_from_user(&effective, &data->effective, sizeof(effective)) || + copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) || + copy_from_user(&permitted, &data->permitted, sizeof(permitted))) + return -EFAULT; + + error = -EPERM; spin_lock(&task_capability_lock); if (pid > 0 && pid != current->pid) { @@ -194,16 +167,12 @@ asmlinkage int sys_capset(cap_user_header_t header, const cap_user_data_t data) target = find_task_by_pid(pid); /* identify target of query */ if (!target) { error = -ESRCH; - goto out; - } + goto out; + } } else { target = current; } - /* copy from userspace */ - cap_fromuser(&effective, &data->effective); - cap_fromuser(&inheritable, &data->inheritable); - cap_fromuser(&permitted, &data->permitted); /* verify restrictions on target's new Inheritable set */ if (!cap_issubset(inheritable, diff --git a/kernel/exit.c b/kernel/exit.c index ae2a34f44..49e0294db 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -20,7 +20,9 @@ #include <linux/smp_lock.h> #include <linux/module.h> #include <linux/slab.h> +#ifdef CONFIG_BSD_PROCESS_ACCT #include <linux/acct.h> +#endif #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -34,23 +36,31 @@ void release(struct task_struct * p) { if (p != current) { #ifdef __SMP__ - /* FIXME! Cheesy, but kills the window... -DaveM */ - do { - barrier(); - } while (p->has_cpu); - spin_unlock_wait(&scheduler_lock); + /* + * Wait to make sure the process isn't active on any + * other CPU + */ + for (;;) { + int has_cpu; + spin_lock(&scheduler_lock); + has_cpu = p->has_cpu; + spin_unlock(&scheduler_lock); + if (!has_cpu) + break; + do { + barrier(); + } while (p->has_cpu); + } #endif - charge_uid(p, -1); + free_uid(p); nr_tasks--; add_free_taskslot(p->tarray_ptr); - { - unsigned long flags; - write_lock_irqsave(&tasklist_lock, flags); - unhash_pid(p); - REMOVE_LINKS(p); - write_unlock_irqrestore(&tasklist_lock, flags); - } + write_lock_irq(&tasklist_lock); + unhash_pid(p); + REMOVE_LINKS(p); + write_unlock_irq(&tasklist_lock); + release_thread(p); current->cmin_flt += p->min_flt + p->cmin_flt; current->cmaj_flt += p->maj_flt + p->cmaj_flt; @@ -186,7 +196,7 @@ static inline void __exit_files(struct task_struct *tsk) if (files) { tsk->files = NULL; - if (!--files->count) { + if (atomic_dec_and_test(&files->count)) { close_files(files); /* * Free the fd array as appropriate ... @@ -211,7 +221,7 @@ static inline void __exit_fs(struct task_struct *tsk) if (fs) { tsk->fs = NULL; - if (!--fs->count) { + if (atomic_dec_and_test(&fs->count)) { dput(fs->root); dput(fs->pwd); kfree(fs); @@ -340,33 +350,39 @@ static void exit_notify(void) NORET_TYPE void do_exit(long code) { + struct task_struct *tsk = current; + if (in_interrupt()) printk("Aiee, killing interrupt handler\n"); - if (current == task[0]) + if (!tsk->pid) panic("Attempted to kill the idle task!"); + tsk->flags |= PF_EXITING; + del_timer(&tsk->real_timer); + + lock_kernel(); fake_volatile: - current->flags |= PF_EXITING; +#ifdef CONFIG_BSD_PROCESS_ACCT acct_process(code); - del_timer(¤t->real_timer); +#endif sem_exit(); - __exit_mm(current); + __exit_mm(tsk); #if CONFIG_AP1000 - exit_msc(current); + exit_msc(tsk); #endif - __exit_files(current); - __exit_fs(current); - __exit_sighand(current); + __exit_files(tsk); + __exit_fs(tsk); + __exit_sighand(tsk); exit_thread(); - current->state = TASK_ZOMBIE; - current->exit_code = code; + tsk->state = TASK_ZOMBIE; + tsk->exit_code = code; exit_notify(); #ifdef DEBUG_PROC_TREE audit_ptree(); #endif - if (current->exec_domain && current->exec_domain->module) - __MOD_DEC_USE_COUNT(current->exec_domain->module); - if (current->binfmt && current->binfmt->module) - __MOD_DEC_USE_COUNT(current->binfmt->module); + if (tsk->exec_domain && tsk->exec_domain->module) + __MOD_DEC_USE_COUNT(tsk->exec_domain->module); + if (tsk->binfmt && tsk->binfmt->module) + __MOD_DEC_USE_COUNT(tsk->binfmt->module); schedule(); /* * In order to get rid of the "volatile function does return" message @@ -386,9 +402,7 @@ fake_volatile: asmlinkage int sys_exit(int error_code) { - lock_kernel(); do_exit((error_code&0xff)<<8); - unlock_kernel(); } asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru) @@ -452,12 +466,11 @@ repeat: __put_user(p->exit_code, stat_addr); retval = p->pid; if (p->p_opptr != p->p_pptr) { - /* Note this grabs tasklist_lock - * as a writer... (twice!) - */ + write_lock_irq(&tasklist_lock); REMOVE_LINKS(p); p->p_pptr = p->p_opptr; SET_LINKS(p); + write_unlock_irq(&tasklist_lock); notify_parent(p, SIGCHLD); } else release(p); diff --git a/kernel/fork.c b/kernel/fork.c index c29c52ece..5a577abe3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -50,10 +50,10 @@ spinlock_t taskslot_lock = SPIN_LOCK_UNLOCKED; */ #define UIDHASH_SZ (PIDHASH_SZ >> 2) -static struct uid_taskcount { - struct uid_taskcount *next, **pprev; - unsigned short uid; - int task_count; +static struct user_struct { + atomic_t count; + struct user_struct *next, **pprev; + unsigned int uid; } *uidhash[UIDHASH_SZ]; spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED; @@ -62,7 +62,7 @@ kmem_cache_t *uid_cachep; #define uidhashfn(uid) (((uid >> 8) ^ uid) & (UIDHASH_SZ - 1)) -static inline void uid_hash_insert(struct uid_taskcount *up, unsigned int hashent) +static inline void uid_hash_insert(struct user_struct *up, unsigned int hashent) { spin_lock(&uidhash_lock); if((up->next = uidhash[hashent]) != NULL) @@ -72,7 +72,7 @@ static inline void uid_hash_insert(struct uid_taskcount *up, unsigned int hashen spin_unlock(&uidhash_lock); } -static inline void uid_hash_remove(struct uid_taskcount *up) +static inline void uid_hash_remove(struct user_struct *up) { spin_lock(&uidhash_lock); if(up->next) @@ -81,9 +81,9 @@ static inline void uid_hash_remove(struct uid_taskcount *up) spin_unlock(&uidhash_lock); } -static inline struct uid_taskcount *uid_find(unsigned short uid, unsigned int hashent) +static inline struct user_struct *uid_find(unsigned short uid, unsigned int hashent) { - struct uid_taskcount *up; + struct user_struct *up; spin_lock(&uidhash_lock); for(up = uidhash[hashent]; (up && up->uid != uid); up = up->next) @@ -92,31 +92,36 @@ static inline struct uid_taskcount *uid_find(unsigned short uid, unsigned int ha return up; } -int charge_uid(struct task_struct *p, int count) +void free_uid(struct task_struct *p) { - unsigned int hashent = uidhashfn(p->uid); - struct uid_taskcount *up = uid_find(p->uid, hashent); - - if(up) { - int limit = p->rlim[RLIMIT_NPROC].rlim_cur; - int newcnt = up->task_count + count; + struct user_struct *up = p->user; - if(newcnt > limit) - return -EAGAIN; - else if(newcnt == 0) { + if (up) { + p->user = NULL; + if (atomic_dec_and_test(&up->count)) { uid_hash_remove(up); kmem_cache_free(uid_cachep, up); - return 0; } - } else { + } +} + +int alloc_uid(struct task_struct *p) +{ + unsigned int hashent = uidhashfn(p->uid); + struct user_struct *up = uid_find(p->uid, hashent); + + p->user = up; + if (!up) { up = kmem_cache_alloc(uid_cachep, SLAB_KERNEL); - if(!up) + if (!up) return -EAGAIN; + p->user = up; up->uid = p->uid; - up->task_count = 0; + atomic_set(&up->count, 0); uid_hash_insert(up, hashent); } - up->task_count += count; + + atomic_inc(&up->count); return 0; } @@ -124,7 +129,7 @@ __initfunc(void uidcache_init(void)) { int i; - uid_cachep = kmem_cache_create("uid_cache", sizeof(struct uid_taskcount), + uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if(!uid_cachep) @@ -134,22 +139,13 @@ __initfunc(void uidcache_init(void)) uidhash[i] = 0; } -static inline int find_empty_process(void) +static inline struct task_struct ** find_empty_process(void) { - struct task_struct **tslot; + struct task_struct **tslot = NULL; - if(current->uid) { - int error; - - if(nr_tasks >= NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT) - return -EAGAIN; - if((error = charge_uid(current, 1)) < 0) - return error; - } - tslot = get_free_taskslot(); - if(tslot) - return tslot - &task[0]; - return -EAGAIN; + if (!current->uid || (nr_tasks < NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT)) + tslot = get_free_taskslot(); + return tslot; } /* Protects next_safe and last_pid. */ @@ -204,7 +200,6 @@ static inline int dup_mmap(struct mm_struct * mm) int retval; flush_cache_mm(current->mm); - down(¤t->mm->mmap_sem); pprev = &mm->mmap; for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { struct file *file; @@ -254,7 +249,6 @@ static inline int dup_mmap(struct mm_struct * mm) fail_nomem: flush_tlb_mm(current->mm); - up(¤t->mm->mmap_sem); return retval; } @@ -272,7 +266,7 @@ struct mm_struct * mm_alloc(void) if (mm) { *mm = *current->mm; init_new_context(mm); - mm->count = 1; + atomic_set(&mm->count, 1); mm->map_count = 0; mm->def_flags = 0; mm->mmap_sem = MUTEX_LOCKED; @@ -295,7 +289,7 @@ struct mm_struct * mm_alloc(void) */ void mmput(struct mm_struct *mm) { - if (!--mm->count) { + if (atomic_dec_and_test(&mm->count)) { release_segments(mm); exit_mmap(mm); free_page_tables(mm); @@ -310,6 +304,10 @@ static inline int copy_mm(int nr, unsigned long clone_flags, struct task_struct if (clone_flags & CLONE_VM) { mmget(current->mm); + /* + * Set up the LDT descriptor for the clone task. + */ + copy_segments(nr, tsk, NULL); SET_PAGE_DIR(tsk, current->mm->pgd); return 0; } @@ -345,13 +343,13 @@ fail_nomem: static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) { if (clone_flags & CLONE_FS) { - current->fs->count++; + atomic_inc(¤t->fs->count); return 0; } tsk->fs = kmalloc(sizeof(*tsk->fs), GFP_KERNEL); if (!tsk->fs) return -1; - tsk->fs->count = 1; + atomic_set(&tsk->fs->count, 1); tsk->fs->umask = current->fs->umask; tsk->fs->root = dget(current->fs->root); tsk->fs->pwd = dget(current->fs->pwd); @@ -392,7 +390,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) goto out; if (clone_flags & CLONE_FILES) { - oldf->count++; + atomic_inc(&oldf->count); goto out; } @@ -415,7 +413,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) goto out_release; memset((void *) new_fds, 0, size); - newf->count = 1; + atomic_set(&newf->count, 1); newf->max_fds = NR_OPEN; newf->fd = new_fds; newf->close_on_exec = oldf->close_on_exec; @@ -455,6 +453,17 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t return 0; } +static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) +{ + unsigned long new_flags = p->flags; + + new_flags &= ~PF_SUPERPRIV; + new_flags |= PF_FORKNOEXEC; + if (!(clone_flags & CLONE_PTRACE)) + new_flags &= ~(PF_PTRACED|PF_TRACESYS); + p->flags = new_flags; +} + /* * Ok, this is the main fork-routine. It copies the system process * information (task[nr]) and sets up the necessary registers. It @@ -463,21 +472,34 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) { int nr; - int error = -ENOMEM; + int retval = -ENOMEM; struct task_struct *p; - lock_kernel(); p = alloc_task_struct(); if (!p) - goto bad_fork; - - error = -EAGAIN; - nr = find_empty_process(); - if (nr < 0) - goto bad_fork_free; + goto fork_out; *p = *current; + down(¤t->mm->mmap_sem); + lock_kernel(); + + if (p->user) { + if (atomic_read(&p->user->count) >= p->rlim[RLIMIT_NPROC].rlim_cur) + goto bad_fork_free; + } + + { + struct task_struct **tslot; + tslot = find_empty_process(); + retval = -EAGAIN; + if (!tslot) + goto bad_fork_free; + p->tarray_ptr = tslot; + *tslot = p; + nr = tslot - &task[0]; + } + if (p->exec_domain && p->exec_domain->module) __MOD_INC_USE_COUNT(p->exec_domain->module); if (p->binfmt && p->binfmt->module) @@ -486,22 +508,34 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) p->did_exec = 0; p->swappable = 0; p->state = TASK_UNINTERRUPTIBLE; - p->flags &= ~(PF_PTRACED|PF_TRACESYS|PF_SUPERPRIV); - p->sigpending = 0; - p->flags |= PF_FORKNOEXEC; + + copy_flags(clone_flags, p); p->pid = get_pid(clone_flags); - p->next_run = NULL; - p->prev_run = NULL; + + /* + * This is a "shadow run" state. The process + * is marked runnable, but isn't actually on + * any run queue yet.. (that happens at the + * very end). + */ + p->state = TASK_RUNNING; + p->next_run = p; + p->prev_run = p; + p->p_pptr = p->p_opptr = current; p->p_cptr = NULL; init_waitqueue(&p->wait_chldexit); + + p->sigpending = 0; sigemptyset(&p->signal); p->sigqueue = NULL; p->sigqueue_tail = &p->sigqueue; + p->it_real_value = p->it_virt_value = p->it_prof_value = 0; p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; + p->leader = 0; /* session leadership doesn't inherit */ p->tty_old_pgrp = 0; p->times.tms_utime = p->times.tms_stime = 0; @@ -517,12 +551,11 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) spin_lock_init(&p->sigmask_lock); } #endif - p->lock_depth = 0; + p->lock_depth = -1; /* -1 = no lock */ p->start_time = jiffies; - p->tarray_ptr = &task[nr]; - *p->tarray_ptr = p; { + /* This makes it visible to the rest of the system */ unsigned long flags; write_lock_irqsave(&tasklist_lock, flags); SET_LINKS(p); @@ -531,8 +564,10 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) } nr_tasks++; + if (p->user) + atomic_inc(&p->user->count); - error = -ENOMEM; + retval = -ENOMEM; /* copy all the process information */ if (copy_files(clone_flags, p)) goto bad_fork_cleanup; @@ -542,8 +577,8 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) goto bad_fork_cleanup_fs; if (copy_mm(nr, clone_flags, p)) goto bad_fork_cleanup_sighand; - error = copy_thread(nr, clone_flags, usp, p, regs); - if (error) + retval = copy_thread(nr, clone_flags, usp, p, regs); + if (retval) goto bad_fork_cleanup_sighand; p->semundo = NULL; @@ -561,17 +596,19 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) current->counter >>= 1; p->counter = current->counter; - if(p->pid) { - wake_up_process(p); /* do this last, just in case */ - } else { - p->state = TASK_RUNNING; - p->next_run = p->prev_run = p; + /* Ok, add it to the run-queues, let it rip! */ + retval = p->pid; + if (retval) { + p->next_run = NULL; + p->prev_run = NULL; + wake_up_process(p); /* do this last */ } ++total_forks; - error = p->pid; bad_fork: + up(¤t->mm->mmap_sem); unlock_kernel(); - return error; +fork_out: + return retval; bad_fork_cleanup_sighand: exit_sighand(p); @@ -580,12 +617,10 @@ bad_fork_cleanup_fs: bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup: - charge_uid(current, -1); if (p->exec_domain && p->exec_domain->module) __MOD_DEC_USE_COUNT(p->exec_domain->module); if (p->binfmt && p->binfmt->module) __MOD_DEC_USE_COUNT(p->binfmt->module); - add_free_taskslot(p->tarray_ptr); { unsigned long flags; @@ -595,7 +630,10 @@ bad_fork_cleanup: write_unlock_irqrestore(&tasklist_lock, flags); } + if (p->user) + atomic_dec(&p->user->count); nr_tasks--; + add_free_taskslot(p->tarray_ptr); bad_fork_free: free_task_struct(p); goto bad_fork; diff --git a/kernel/kmod.c b/kernel/kmod.c index 7468e4382..ec0d85d32 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -14,14 +14,15 @@ #include <linux/sched.h> #include <linux/types.h> #include <linux/unistd.h> -#include <asm/smp_lock.h> +#include <linux/smp_lock.h> +#include <linux/signal.h> + #include <asm/uaccess.h> /* modprobe_path is set via /proc/sys. */ char modprobe_path[256] = "/sbin/modprobe"; -static char * envp[] = { "HOME=/", "TERM=linux", "PATH=/usr/bin:/bin", NULL }; /* exec_modprobe is spawned from a kernel-mode user process, @@ -33,20 +34,22 @@ static char * envp[] = { "HOME=/", "TERM=linux", "PATH=/usr/bin:/bin", NULL }; #define task_init task[smp_num_cpus] static inline void -use_init_file_context(void) { +use_init_file_context(void) +{ lock_kernel(); /* don't use the user's root, use init's root instead */ exit_fs(current); /* current->fs->count--; */ current->fs = task_init->fs; - current->fs->count++; + atomic_inc(¤t->fs->count); unlock_kernel(); } static int exec_modprobe(void * module_name) { - char *argv[] = { modprobe_path, "-s", "-k", (char*)module_name, NULL}; + static char * envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; + char *argv[] = { modprobe_path, "-s", "-k", (char*)module_name, NULL }; int i; use_init_file_context(); @@ -63,11 +66,21 @@ static int exec_modprobe(void * module_name) spin_unlock_irq(¤t->sigmask_lock); for (i = 0; i < current->files->max_fds; i++ ) { - if (current->files->fd[i]) close(i); + if (current->files->fd[i]) close(i); } - set_fs(KERNEL_DS); /* Allow execve args to be in kernel space. */ + /* Drop the "current user" thing */ + free_uid(current); + + /* Give kmod all privileges.. */ current->uid = current->euid = current->fsuid = 0; + cap_set_full(current->cap_inheritable); + cap_set_full(current->cap_effective); + + /* Allow execve args to be in kernel space. */ + set_fs(KERNEL_DS); + + /* Go, go, go... */ if (execve(modprobe_path, argv, envp) < 0) { printk(KERN_ERR "kmod: failed to exec %s -s -k %s, errno = %d\n", @@ -85,14 +98,36 @@ int request_module(const char * module_name) { int pid; int waitpid_result; + sigset_t tmpsig; + + /* Don't allow request_module() before the root fs is mounted! */ + if ( ! current->fs->root ) { + printk(KERN_ERR "request_module[%s]: Root fs not mounted\n", + module_name); + return -EPERM; + } - pid = kernel_thread(exec_modprobe, (void*) module_name, - CLONE_FS | SIGCHLD); + pid = kernel_thread(exec_modprobe, (void*) module_name, CLONE_FS); if (pid < 0) { - printk(KERN_ERR "kmod: fork failed, errno %d\n", -pid); + printk(KERN_ERR "request_module[%s]: fork failed, errno %d\n", module_name, -pid); return pid; } - waitpid_result = waitpid(pid, NULL, 0); + + /* Block everything but SIGKILL/SIGSTOP */ + spin_lock_irq(¤t->sigmask_lock); + tmpsig = current->blocked; + siginitset(¤t->blocked, ~(sigmask(SIGKILL)|sigmask(SIGSTOP))); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + waitpid_result = waitpid(pid, NULL, __WCLONE); + + /* Allow signals again.. */ + spin_lock_irq(¤t->sigmask_lock); + current->blocked = tmpsig; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + if (waitpid_result != pid) { printk (KERN_ERR "kmod: waitpid(%d,NULL,0) failed, returning %d.\n", pid, waitpid_result); diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 61201da16..e9cb2ea70 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -51,8 +51,9 @@ #include <linux/ctype.h> #include <linux/file.h> #include <linux/console.h> +#include <linux/time.h> -extern unsigned char aux_device_present, kbd_read_mask; +extern unsigned char aux_device_present, pckbd_read_mask; #if defined(CONFIG_PROC_FS) #include <linux/proc_fs.h> @@ -121,7 +122,6 @@ EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(mem_map); EXPORT_SYMBOL(remap_page_range); EXPORT_SYMBOL(max_mapnr); -EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(high_memory); EXPORT_SYMBOL(update_vm_cache); EXPORT_SYMBOL(vmtruncate); @@ -147,6 +147,7 @@ EXPORT_SYMBOL(d_instantiate); EXPORT_SYMBOL(d_alloc); EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(__mark_inode_dirty); +EXPORT_SYMBOL(get_empty_filp); EXPORT_SYMBOL(init_private_file); EXPORT_SYMBOL(insert_file_free); EXPORT_SYMBOL(check_disk_change); @@ -184,6 +185,7 @@ EXPORT_SYMBOL(locks_remove_flock); EXPORT_SYMBOL(dput); EXPORT_SYMBOL(get_cached_page); EXPORT_SYMBOL(put_cached_page); +EXPORT_SYMBOL(is_root_busy); EXPORT_SYMBOL(prune_dcache); EXPORT_SYMBOL(shrink_dcache_sb); EXPORT_SYMBOL(shrink_dcache_parent); @@ -269,8 +271,6 @@ EXPORT_SYMBOL(proc_dointvec_minmax); /* interrupt handling */ EXPORT_SYMBOL(request_irq); EXPORT_SYMBOL(free_irq); -EXPORT_SYMBOL(enable_irq); -EXPORT_SYMBOL(disable_irq); EXPORT_SYMBOL(probe_irq_on); EXPORT_SYMBOL(probe_irq_off); EXPORT_SYMBOL(bh_active); @@ -318,7 +318,6 @@ EXPORT_SYMBOL(jiffies); EXPORT_SYMBOL(xtime); EXPORT_SYMBOL(do_gettimeofday); EXPORT_SYMBOL(loops_per_sec); -EXPORT_SYMBOL(need_resched); EXPORT_SYMBOL(kstat); /* misc */ @@ -328,7 +327,8 @@ EXPORT_SYMBOL(sprintf); EXPORT_SYMBOL(vsprintf); EXPORT_SYMBOL(kdevname); EXPORT_SYMBOL(simple_strtoul); -EXPORT_SYMBOL(system_utsname); +EXPORT_SYMBOL(system_utsname); /* UTS data */ +EXPORT_SYMBOL(uts_sem); /* UTS semaphore */ EXPORT_SYMBOL(sys_call_table); EXPORT_SYMBOL(machine_restart); EXPORT_SYMBOL(machine_halt); @@ -379,16 +379,18 @@ EXPORT_SYMBOL(__up); EXPORT_SYMBOL(add_mouse_randomness); EXPORT_SYMBOL(fasync_helper); +#ifdef CONFIG_PSMOUSE_MODULE /* psaux mouse */ EXPORT_SYMBOL(aux_device_present); #ifdef CONFIG_VT -EXPORT_SYMBOL(kbd_read_mask); +EXPORT_SYMBOL(pckbd_read_mask); +#endif #endif #ifdef CONFIG_BLK_DEV_MD EXPORT_SYMBOL(disk_name); /* for md.c */ #endif - + /* binfmt_aout */ EXPORT_SYMBOL(get_write_access); EXPORT_SYMBOL(put_write_access); @@ -396,3 +398,6 @@ EXPORT_SYMBOL(put_write_access); /* dynamic registering of consoles */ EXPORT_SYMBOL(register_console); EXPORT_SYMBOL(unregister_console); + +/* time */ +EXPORT_SYMBOL(get_fast_time); diff --git a/kernel/module.c b/kernel/module.c index 2d0665246..5225c6709 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -323,7 +323,7 @@ sys_init_module(const char *name_user, struct module *mod_user) dep->ref = mod; dep->next_ref = d->refs; d->refs = dep; - /* Being referenced by a dependant module counts as a + /* Being referenced by a dependent module counts as a use as far as kmod is concerned. */ d->flags |= MOD_USED_ONCE; } @@ -961,7 +961,8 @@ get_module_symbol(char *modname, char *symname) for (mp = module_list; mp; mp = mp->next) { if (((modname == NULL) || (strcmp(mp->name, modname) == 0)) && - (mp->flags == MOD_RUNNING) && (mp->nsyms > 0)) { + (mp->flags & (MOD_RUNNING | MOD_DELETED)) == MOD_RUNNING && + (mp->nsyms > 0)) { for (i = mp->nsyms, sym = mp->syms; i > 0; --i, ++sym) { diff --git a/kernel/panic.c b/kernel/panic.c index c72d257a7..4ba287e05 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -60,15 +60,12 @@ NORET_TYPE void panic(const char * fmt, ...) if (panic_timeout > 0) { - int i; - /* * Delay timeout seconds before rebooting the machine. * We can't use the "normal" timers since we just panicked.. */ printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); - for(i = 0; i < (panic_timeout*1000); i++) - udelay(1000); + mdelay(panic_timeout*1000); /* * Should we run the reboot notifier. For the moment Im * choosing not too. It might crash, be corrupt or do diff --git a/kernel/printk.c b/kernel/printk.c index 9060da3ef..2998dbede 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -14,8 +14,6 @@ #include <stdarg.h> -#include <asm/system.h> - #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -27,9 +25,10 @@ #include <linux/console.h> #include <linux/init.h> +#include <asm/system.h> #include <asm/uaccess.h> -#define LOG_BUF_LEN 8192 +#define LOG_BUF_LEN (16384) static char buf[1024]; diff --git a/kernel/resource.c b/kernel/resource.c index ff7c7492a..dc23b159b 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -143,7 +143,7 @@ unsigned long occupy_region(unsigned long base, unsigned long end, if (iotable[i].num == 0) break; if (i == IOTABLE_SIZE) { - /* Driver prints a warning typicaly. */ + /* Driver prints a warning typically. */ return 0; } diff --git a/kernel/sched.c b/kernel/sched.c index af68649ef..1b76fee50 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -81,7 +81,6 @@ long time_reftime = 0; /* time at last adjustment (s) */ long time_adjust = 0; long time_adjust_step = 0; -int need_resched = 0; unsigned long event = 0; extern int do_setitimer(int, struct itimerval *, struct itimerval *); @@ -98,22 +97,70 @@ unsigned long volatile jiffies=0; * via the SMP irq return path. */ -struct task_struct *last_task_used_math = NULL; - struct task_struct * task[NR_TASKS] = {&init_task, }; struct kernel_stat kstat = { 0 }; void scheduling_functions_start_here(void) { } -static inline void add_to_runqueue(struct task_struct * p) +static inline void reschedule_idle(struct task_struct * p) { + + /* + * For SMP, we try to see if the CPU the task used + * to run on is idle.. + */ +#if 0 + /* + * Disable this for now. Ingo has some interesting + * code that looks too complex, and I have some ideas, + * but in the meantime.. One problem is that "wakeup()" + * can be (and is) called before we've even initialized + * SMP completely, so.. + */ +#ifdef __SMP__ + int want_cpu = p->processor; + + /* + * Don't even try to find another CPU for us if the task + * ran on this one before.. + */ + if (want_cpu != smp_processor_id()) { + struct task_struct **idle = task; + int i = smp_num_cpus; + + do { + struct task_struct *tsk = *idle; + idle++; + /* Something like this.. */ + if (tsk->has_cpu && tsk->processor == want_cpu) { + tsk->need_resched = 1; + smp_send_reschedule(want_cpu); + return; + } + } while (--i > 0); + } +#endif +#endif if (p->policy != SCHED_OTHER || p->counter > current->counter + 3) - need_resched = 1; - nr_running++; - (p->prev_run = init_task.prev_run)->next_run = p; - p->next_run = &init_task; - init_task.prev_run = p; + current->need_resched = 1; +} + +/* + * Careful! + * + * This has to add the process to the _beginning_ of the + * run-queue, not the end. See the comment about "This is + * subtle" in the scheduler proper.. + */ +static inline void add_to_runqueue(struct task_struct * p) +{ + struct task_struct *next = init_task.next_run; + + p->prev_run = &init_task; + init_task.next_run = p; + p->next_run = next; + next->prev_run = p; } static inline void del_from_runqueue(struct task_struct * p) @@ -144,6 +191,22 @@ static inline void move_last_runqueue(struct task_struct * p) prev->next_run = p; } +static inline void move_first_runqueue(struct task_struct * p) +{ + struct task_struct *next = p->next_run; + struct task_struct *prev = p->prev_run; + + /* remove from list */ + next->prev_run = prev; + prev->next_run = next; + /* add back to list */ + p->prev_run = &init_task; + next = init_task.next_run; + init_task.next_run = p; + p->next_run = next; + next->prev_run = p; +} + /* * The tasklist_lock protects the linked list of processes. * @@ -155,7 +218,7 @@ static inline void move_last_runqueue(struct task_struct * p) * The run-queue lock locks the parts that actually access * and change the run-queues, and have to be interrupt-safe. */ -spinlock_t scheduler_lock = SPIN_LOCK_UNLOCKED; /* should be aquired first */ +spinlock_t scheduler_lock = SPIN_LOCK_UNLOCKED; /* should be acquired first */ spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED; /* second */ rwlock_t tasklist_lock = RW_LOCK_UNLOCKED; /* third */ @@ -173,8 +236,11 @@ inline void wake_up_process(struct task_struct * p) spin_lock_irqsave(&runqueue_lock, flags); p->state = TASK_RUNNING; - if (!p->next_run) + if (!p->next_run) { add_to_runqueue(p); + reschedule_idle(p); + nr_running++; + } spin_unlock_irqrestore(&runqueue_lock, flags); } @@ -364,6 +430,9 @@ int del_timer(struct timer_list * timer) ret = detach_timer(timer); timer->next = timer->prev = 0; spin_unlock_irqrestore(&timerlist_lock, flags); + + /* Make sure the timer isn't running in parallell.. */ + synchronize_bh(); return ret; } @@ -391,26 +460,26 @@ int del_timer(struct timer_list * timer) */ asmlinkage void schedule(void) { - int lock_depth; struct task_struct * prev, * next; unsigned long timeout; int this_cpu; - need_resched = 0; prev = current; - this_cpu = smp_processor_id(); - if (local_irq_count[this_cpu]) - goto scheduling_in_interrupt; - if (local_bh_count[this_cpu]) + this_cpu = prev->processor; + if (in_interrupt()) goto scheduling_in_interrupt; - release_kernel_lock(prev, this_cpu, lock_depth); + release_kernel_lock(prev, this_cpu); + + /* Do "administrative" work here while we don't hold any locks */ if (bh_active & bh_mask) do_bottom_half(); + run_task_queue(&tq_scheduler); spin_lock(&scheduler_lock); spin_lock_irq(&runqueue_lock); /* move an exhausted RR process to be last.. */ + prev->need_resched = 0; if (!prev->counter && prev->policy == SCHED_RR) { prev->counter = prev->priority; move_last_runqueue(prev); @@ -500,9 +569,15 @@ asmlinkage void schedule(void) if (timeout) del_timer(&timer); } + spin_unlock(&scheduler_lock); - reacquire_kernel_lock(prev, smp_processor_id(), lock_depth); + /* + * At this point "prev" is "current", as we just + * switched into it (from an even more "previous" + * prev) + */ + reacquire_kernel_lock(prev); return; scheduling_in_interrupt: @@ -1046,7 +1121,7 @@ static void update_process_times(unsigned long ticks, unsigned long system) p->counter -= ticks; if (p->counter < 0) { p->counter = 0; - need_resched = 1; + p->need_resched = 1; } if (p->priority < DEF_PRIORITY) kstat.cpu_nice += user; @@ -1134,7 +1209,7 @@ asmlinkage unsigned int sys_alarm(unsigned int seconds) asmlinkage int sys_getpid(void) { - /* This is SMP safe - current->pid doesnt change */ + /* This is SMP safe - current->pid doesn't change */ return current->pid; } @@ -1237,9 +1312,9 @@ asmlinkage int sys_nice(int increment) newprio = 40; /* * do a "normalization" of the priority (traditionally - * unix nice values are -20..20, linux doesn't really + * Unix nice values are -20 to 20; Linux doesn't really * use that kind of thing, but uses the length of the - * timeslice instead (default 150 msec). The rounding is + * timeslice instead (default 150 ms). The rounding is * why we want to avoid negative values. */ newprio = (newprio * DEF_PRIORITY + 10) / 20; @@ -1292,8 +1367,8 @@ static int setscheduler(pid_t pid, int policy, /* * We play safe to avoid deadlocks. */ - spin_lock_irq(&scheduler_lock); - spin_lock(&runqueue_lock); + spin_lock(&scheduler_lock); + spin_lock_irq(&runqueue_lock); read_lock(&tasklist_lock); p = find_process_by_pid(pid); @@ -1333,14 +1408,14 @@ static int setscheduler(pid_t pid, int policy, p->policy = policy; p->rt_priority = lp.sched_priority; if (p->next_run) - move_last_runqueue(p); + move_first_runqueue(p); - need_resched = 1; + current->need_resched = 1; out_unlock: read_unlock(&tasklist_lock); - spin_unlock(&runqueue_lock); - spin_unlock_irq(&scheduler_lock); + spin_unlock_irq(&runqueue_lock); + spin_unlock(&scheduler_lock); out_nounlock: return retval; @@ -1418,10 +1493,10 @@ asmlinkage int sys_sched_yield(void) spin_lock(&scheduler_lock); spin_lock_irq(&runqueue_lock); current->policy |= SCHED_YIELD; + current->need_resched = 1; move_last_runqueue(current); spin_unlock_irq(&runqueue_lock); spin_unlock(&scheduler_lock); - need_resched = 1; return 0; } @@ -1520,7 +1595,7 @@ static void show_task(int nr,struct task_struct * p) printk(stat_nam[p->state]); else printk(" "); -#if ((~0UL) == 0xffffffff) +#if (BITS_PER_LONG == 32) if (p == current) printk(" current "); else @@ -1531,13 +1606,13 @@ static void show_task(int nr,struct task_struct * p) else printk(" %016lx ", thread_saved_pc(&p->tss)); #endif -#if 0 - for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) { - if (((unsigned long *)p->kernel_stack_page)[free]) - break; + { + unsigned long * n = (unsigned long *) (p+1); + while (!*n) + n++; + free = (unsigned long) n - (unsigned long)(p+1); } -#endif - printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid); + printk("%5lu %5d %6d ", free, p->pid, p->p_pptr->pid); if (p->p_cptr) printk("%5d ", p->p_cptr->pid); else @@ -1552,7 +1627,6 @@ static void show_task(int nr,struct task_struct * p) printk("\n"); { - extern char * render_sigset_t(sigset_t *set, char *buffer); struct signal_queue *q; char s[sizeof(sigset_t)*2+1], b[sizeof(sigset_t)*2+1]; @@ -1565,11 +1639,26 @@ static void show_task(int nr,struct task_struct * p) } } +char * render_sigset_t(sigset_t *set, char *buffer) +{ + int i = _NSIG, x; + do { + i -= 4, x = 0; + if (sigismember(set, i+1)) x |= 1; + if (sigismember(set, i+2)) x |= 2; + if (sigismember(set, i+3)) x |= 4; + if (sigismember(set, i+4)) x |= 8; + *buffer++ = (x < 10 ? '0' : 'a' - 10) + x; + } while (i >= 4); + *buffer = 0; + return buffer; +} + void show_state(void) { struct task_struct *p; -#if ((~0UL) == 0xffffffff) +#if (BITS_PER_LONG == 32) printk("\n" " free sibling\n"); printk(" task PC stack pid father child younger older\n"); diff --git a/kernel/signal.c b/kernel/signal.c index c6a512c19..ff5ec0a8d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -255,7 +255,7 @@ printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig); t->exit_code = 0; sigdelsetmask(&t->signal, (sigmask(SIGSTOP)|sigmask(SIGTSTP)| sigmask(SIGTTOU)|sigmask(SIGTTIN))); - /* Inflict this corner case with recalculaions, not mainline */ + /* Inflict this corner case with recalculations, not mainline */ recalc_sigpending(t); break; @@ -263,7 +263,7 @@ printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig); case SIGTTIN: case SIGTTOU: /* If we're stopping again, cancel SIGCONT */ sigdelset(&t->signal, SIGCONT); - /* Inflict this corner case with recalculaions, not mainline */ + /* Inflict this corner case with recalculations, not mainline */ recalc_sigpending(t); break; } @@ -811,7 +811,7 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) || (k->sa.sa_handler == SIG_DFL && (sig == SIGCONT || sig == SIGCHLD || - sig != SIGWINCH))) { + sig == SIGWINCH))) { /* So dequeue any that might be pending. XXX: process-wide signals? */ if (sig >= SIGRTMIN && @@ -840,6 +840,62 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) return 0; } +int +do_sigaltstack (const stack_t *uss, stack_t *uoss, unsigned long sp) +{ + stack_t oss; + int error; + + if (uoss) { + oss.ss_sp = (void *) current->sas_ss_sp; + oss.ss_size = current->sas_ss_size; + oss.ss_flags = sas_ss_flags(sp); + } + + if (uss) { + void *ss_sp; + size_t ss_size; + int ss_flags; + + error = -EFAULT; + if (verify_area(VERIFY_READ, uss, sizeof(*uss)) + || __get_user(ss_sp, &uss->ss_sp) + || __get_user(ss_flags, &uss->ss_flags) + || __get_user(ss_size, &uss->ss_size)) + goto out; + + error = -EPERM; + if (on_sig_stack (sp)) + goto out; + + error = -EINVAL; + if (ss_flags & ~SS_DISABLE) + goto out; + + if (ss_flags & SS_DISABLE) { + ss_size = 0; + ss_sp = NULL; + } else { + error = -ENOMEM; + if (ss_size < MINSIGSTKSZ) + goto out; + } + + current->sas_ss_sp = (unsigned long) ss_sp; + current->sas_ss_size = ss_size; + } + + if (uoss) { + error = -EFAULT; + if (copy_to_user(uoss, &oss, sizeof(oss))) + goto out; + } + + error = 0; +out: + return error; +} + #if !defined(__alpha__) /* Alpha has its own versions with special arguments. */ @@ -908,6 +964,7 @@ sys_sigpending(old_sigset_t *set) return error; } +#ifndef __sparc__ asmlinkage int sys_rt_sigaction(int sig, const struct sigaction *act, struct sigaction *oact, size_t sigsetsize) @@ -933,6 +990,7 @@ sys_rt_sigaction(int sig, const struct sigaction *act, struct sigaction *oact, out: return ret; } +#endif /* __sparc__ */ #endif #if !defined(__alpha__) diff --git a/kernel/sys.c b/kernel/sys.c index 1de75c366..1b4c6df40 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -4,7 +4,6 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ -#include <linux/config.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -260,9 +259,9 @@ void ctrl_alt_del(void) * * The general idea is that a program which uses just setregid() will be * 100% compatible with BSD. A program which uses just setgid() will be - * 100% compatible with POSIX w/ Saved ID's. + * 100% compatible with POSIX with saved IDs. * - * SMP: There are not races, the gid's are checked only by filesystem + * SMP: There are not races, the GIDs are checked only by filesystem * operations (as far as semantic preservation is concerned). */ asmlinkage int sys_setregid(gid_t rgid, gid_t egid) @@ -367,7 +366,7 @@ extern inline void cap_emulate_setxuid(int old_ruid, int old_euid, * * The general idea is that a program which uses just setreuid() will be * 100% compatible with BSD. A program which uses just setuid() will be - * 100% compatible with POSIX w/ Saved ID's. + * 100% compatible with POSIX with saved IDs. */ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid) { @@ -406,10 +405,9 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid) * cheaply with the new uid cache, so if it matters * we should be checking for it. -DaveM */ - charge_uid(current, -1); + free_uid(current); current->uid = new_ruid; - if(new_ruid) - charge_uid(current, 1); + alloc_uid(current); } if (!issecure(SECURE_NO_SETUID_FIXUP)) { @@ -422,7 +420,7 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid) /* - * setuid() is implemented like SysV w/ SAVED_IDS + * setuid() is implemented like SysV with SAVED_IDS * * Note that SAVED_ID's is deficient in that a setuid root program * like sendmail, for example, cannot set its uid to be a normal @@ -449,12 +447,11 @@ asmlinkage int sys_setuid(uid_t uid) if (current->euid != old_euid) current->dumpable = 0; - if(new_ruid != old_ruid) { + if (new_ruid != old_ruid) { /* See comment above about NPROC rlimit issues... */ - charge_uid(current, -1); + free_uid(current); current->uid = new_ruid; - if(new_ruid) - charge_uid(current, 1); + alloc_uid(current); } if (!issecure(SECURE_NO_SETUID_FIXUP)) { @@ -466,7 +463,7 @@ asmlinkage int sys_setuid(uid_t uid) /* - * This function implementes a generic ability to update ruid, euid, + * This function implements a generic ability to update ruid, euid, * and suid. This allows you to implement the 4.4 compatible seteuid(). */ asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) @@ -474,7 +471,8 @@ asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) int old_ruid = current->uid; int old_euid = current->euid; int old_suid = current->suid; - if (current->uid != 0 && current->euid != 0 && current->suid != 0) { + + if (!capable(CAP_SETUID)) { if ((ruid != (uid_t) -1) && (ruid != current->uid) && (ruid != current->euid) && (ruid != current->suid)) return -EPERM; @@ -487,10 +485,9 @@ asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) } if (ruid != (uid_t) -1) { /* See above commentary about NPROC rlimit issues here. */ - charge_uid(current, -1); + free_uid(current); current->uid = ruid; - if(ruid) - charge_uid(current, 1); + alloc_uid(current); } if (euid != (uid_t) -1) { if (euid != current->euid) @@ -524,7 +521,7 @@ asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid) */ asmlinkage int sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { - if (current->uid != 0 && current->euid != 0 && current->suid != 0) { + if (!capable(CAP_SETGID)) { if ((rgid != (gid_t) -1) && (rgid != current->gid) && (rgid != current->egid) && (rgid != current->sgid)) return -EPERM; @@ -579,13 +576,13 @@ asmlinkage int sys_setfsuid(uid_t uid) current->dumpable = 0; /* We emulate fsuid by essentially doing a scaled-down version - * of what we did in setresuid and friends. However, we only - * operate on the fs-specific bits of the process' effective - * capabilities - * - * FIXME - is fsuser used for all CAP_FS_MASK capabilities? - * if not, we might be a bit too harsh here. - */ + * of what we did in setresuid and friends. However, we only + * operate on the fs-specific bits of the process' effective + * capabilities + * + * FIXME - is fsuser used for all CAP_FS_MASK capabilities? + * if not, we might be a bit too harsh here. + */ if (!issecure(SECURE_NO_SETUID_FIXUP)) { if (old_fsuid == 0 && current->fsuid != 0) { @@ -750,7 +747,7 @@ asmlinkage int sys_setsid(void) read_lock(&tasklist_lock); for_each_task(p) { if (p->pgrp == current->pid) - goto out; + goto out; } current->leader = 1; @@ -764,7 +761,7 @@ out: } /* - * Supplementary group ID's + * Supplementary group IDs */ asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist) { @@ -780,7 +777,7 @@ asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist) i = current->ngroups; if (gidsetsize) { if (i > gidsetsize) - return -EINVAL; + return -EINVAL; if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i)) return -EFAULT; } @@ -823,40 +820,57 @@ out: return 1; } +/* + * This should really be a blocking read-write lock + * rather than a semaphore. Anybody want to implement + * one? + */ +struct semaphore uts_sem = MUTEX; + asmlinkage int sys_newuname(struct new_utsname * name) { - if (!name) - return -EFAULT; + int errno = 0; + + down(&uts_sem); if (copy_to_user(name,&system_utsname,sizeof *name)) - return -EFAULT; - return 0; + errno = -EFAULT; + up(&uts_sem); + return errno; } asmlinkage int sys_sethostname(char *name, int len) { + int errno; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - if(copy_from_user(system_utsname.nodename, name, len)) - return -EFAULT; - system_utsname.nodename[len] = 0; -#ifdef CONFIG_TRANS_NAMES - translations_dirty = 1; -#endif - return 0; + down(&uts_sem); + errno = -EFAULT; + if (!copy_from_user(system_utsname.nodename, name, len)) { + system_utsname.nodename[len] = 0; + errno = 0; + } + up(&uts_sem); + return errno; } asmlinkage int sys_gethostname(char *name, int len) { - int i; + int i, errno; if (len < 0) return -EINVAL; + down(&uts_sem); i = 1 + strlen(system_utsname.nodename); if (i > len) i = len; - return copy_to_user(name, system_utsname.nodename, i) ? -EFAULT : 0; + errno = 0; + if (copy_to_user(name, system_utsname.nodename, i)) + errno = -EFAULT; + up(&uts_sem); + return errno; } /* @@ -865,17 +879,21 @@ asmlinkage int sys_gethostname(char *name, int len) */ asmlinkage int sys_setdomainname(char *name, int len) { + int errno; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; - if(copy_from_user(system_utsname.domainname, name, len)) - return -EFAULT; - system_utsname.domainname[len] = 0; -#ifdef CONFIG_TRANS_NAMES - translations_dirty = 1; -#endif - return 0; + + down(&uts_sem); + errno = -EFAULT; + if (!copy_from_user(system_utsname.domainname, name, len)) { + errno = 0; + system_utsname.domainname[len] = 0; + } + up(&uts_sem); + return errno; } asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim) @@ -974,7 +992,7 @@ asmlinkage int sys_umask(int mask) } asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) + unsigned long arg4, unsigned long arg5) { int error = 0; int sig; @@ -983,15 +1001,15 @@ asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_PDEATHSIG: sig = arg2; if (sig > _NSIG) { - error = -EINVAL; - break; - } - current->pdeath_signal = sig; - break; + error = -EINVAL; + break; + } + current->pdeath_signal = sig; + break; default: error = -EINVAL; break; - } - return error; + } + return error; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0865db07c..95db16576 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -51,9 +51,16 @@ extern int sg_big_buff; #ifdef __sparc__ extern char reboot_command []; #endif +#ifdef __powerpc__ +extern unsigned long htab_reclaim_on, zero_paged_on, powersave_nap; +#endif + +extern int pgt_cache_water[]; static int parse_table(int *, int, void *, size_t *, void *, size_t, ctl_table *, void **); +static int proc_doutsstring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp); static ctl_table root_table[]; @@ -135,27 +142,15 @@ static ctl_table root_table[] = { static ctl_table kern_table[] = { {KERN_OSTYPE, "ostype", system_utsname.sysname, 64, - 0444, NULL, &proc_dostring, &sysctl_string}, + 0444, NULL, &proc_doutsstring, &sysctl_string}, {KERN_OSRELEASE, "osrelease", system_utsname.release, 64, - 0444, NULL, &proc_dostring, &sysctl_string}, + 0444, NULL, &proc_doutsstring, &sysctl_string}, {KERN_VERSION, "version", system_utsname.version, 64, - 0444, NULL, &proc_dostring, &sysctl_string}, + 0444, NULL, &proc_doutsstring, &sysctl_string}, {KERN_NODENAME, "hostname", system_utsname.nodename, 64, - 0644, NULL, &proc_dostring, &sysctl_string}, + 0644, NULL, &proc_doutsstring, &sysctl_string}, {KERN_DOMAINNAME, "domainname", system_utsname.domainname, 64, - 0644, NULL, &proc_dostring, &sysctl_string}, - {KERN_NRINODE, "inode-nr", &inodes_stat, 2*sizeof(int), - 0444, NULL, &proc_dointvec}, - {KERN_STATINODE, "inode-state", &inodes_stat, 7*sizeof(int), - 0444, NULL, &proc_dointvec}, - {KERN_MAXINODE, "inode-max", &max_inodes, sizeof(int), - 0644, NULL, &proc_dointvec}, - {KERN_NRFILE, "file-nr", &nr_files, 3*sizeof(int), - 0444, NULL, &proc_dointvec}, - {KERN_MAXFILE, "file-max", &max_files, sizeof(int), - 0644, NULL, &proc_dointvec}, - {KERN_DENTRY, "dentry-state", &dentry_stat, 6*sizeof(int), - 0444, NULL, &proc_dointvec}, + 0644, NULL, &proc_doutsstring, &sysctl_string}, {KERN_PANIC, "panic", &panic_timeout, sizeof(int), 0644, NULL, &proc_dointvec}, #ifdef CONFIG_BLK_DEV_INITRD @@ -172,6 +167,14 @@ static ctl_table kern_table[] = { {KERN_SPARC_REBOOT, "reboot-cmd", reboot_command, 256, 0644, NULL, &proc_dostring, &sysctl_string }, #endif +#ifdef __powerpc__ + {KERN_PPC_HTABRECLAIM, "htab-reclaim", &htab_reclaim_on, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_PPC_ZEROPAGED, "zero-paged", &zero_paged_on, sizeof(int), + 0644, NULL, &proc_dointvec}, + {KERN_PPC_POWERSAVE_NAP, "powersave-nap", &powersave_nap, sizeof(int), + 0644, NULL, &proc_dointvec}, +#endif {KERN_CTLALTDEL, "ctrl-alt-del", &C_A_D, sizeof(int), 0644, NULL, &proc_dointvec}, {KERN_PRINTK, "printk", &console_loglevel, 4*sizeof(int), @@ -181,7 +184,7 @@ static ctl_table kern_table[] = { 0644, NULL, &proc_dostring, &sysctl_string }, #endif #ifdef CONFIG_CHR_DEV_SG - {KERN_NRFILE, "sg-big-buff", &sg_big_buff, sizeof (int), + {KERN_SG_BIG_BUFF, "sg-big-buff", &sg_big_buff, sizeof (int), 0444, NULL, &proc_dointvec}, #endif {0} @@ -205,6 +208,8 @@ static ctl_table vm_table[] = { &page_cache, sizeof(buffer_mem_t), 0644, NULL, &proc_dointvec}, {VM_PAGERDAEMON, "kswapd", &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec}, + {VM_PGT_CACHE, "pagetable_cache", + &pgt_cache_water, 2*sizeof(int), 0600, NULL, &proc_dointvec}, {0} }; @@ -213,6 +218,22 @@ static ctl_table proc_table[] = { }; static ctl_table fs_table[] = { + {FS_NRINODE, "inode-nr", &inodes_stat, 2*sizeof(int), + 0444, NULL, &proc_dointvec}, + {FS_STATINODE, "inode-state", &inodes_stat, 7*sizeof(int), + 0444, NULL, &proc_dointvec}, + {FS_MAXINODE, "inode-max", &max_inodes, sizeof(int), + 0644, NULL, &proc_dointvec}, + {FS_NRFILE, "file-nr", &nr_files, 3*sizeof(int), + 0444, NULL, &proc_dointvec}, + {FS_MAXFILE, "file-max", &max_files, sizeof(int), + 0644, NULL, &proc_dointvec}, + {FS_NRDQUOT, "dquot-nr", &nr_dquots, 2*sizeof(int), + 0444, NULL, &proc_dointvec}, + {FS_MAXDQUOT, "dquot-max", &max_dquots, sizeof(int), + 0644, NULL, &proc_dointvec}, + {FS_DENTRY, "dentry-state", &dentry_stat, 6*sizeof(int), + 0444, NULL, &proc_dointvec}, {0} }; @@ -616,6 +637,21 @@ int proc_dostring(ctl_table *table, int write, struct file *filp, return 0; } +/* + * Special case of dostring for the UTS structure. This has locks + * to observe. Should this be in kernel/sys.c ???? + */ + +static int proc_doutsstring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + int r; + down(&uts_sem); + r=proc_dostring(table,write,filp,buffer,lenp); + up(&uts_sem); + return r; +} + static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp, int conv) { @@ -892,9 +928,6 @@ int sysctl_string(ctl_table *table, int *name, int nlen, if (len == table->maxlen) len--; ((char *) table->data)[len] = 0; -#ifdef CONFIG_TRANS_NAMES - translations_dirty = 1; -#endif } return 0; } diff --git a/kernel/time.c b/kernel/time.c index ff3a5d684..0f1094655 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -262,7 +262,7 @@ int do_adjtimex(struct timex *txc) if (txc->modes & ADJ_TIMECONST) time_constant = txc->constant; - if (txc->modes & ADJ_OFFSET) + if (txc->modes & ADJ_OFFSET) { if ((txc->modes == ADJ_OFFSET_SINGLESHOT) || !(time_status & STA_PLL)) { @@ -327,6 +327,7 @@ int do_adjtimex(struct timex *txc) else if (time_freq < -time_tolerance) time_freq = -time_tolerance; } /* STA_PLL || STA_PPSTIME */ + } if (txc->modes & ADJ_TICK) tick = txc->tick; |