summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1997-06-01 03:16:17 +0000
committerRalf Baechle <ralf@linux-mips.org>1997-06-01 03:16:17 +0000
commitd8d9b8f76f22b7a16a83e261e64f89ee611f49df (patch)
tree3067bc130b80d52808e6390c9fc7fc087ec1e33c /kernel
parent19c9bba94152148523ba0f7ef7cffe3d45656b11 (diff)
Initial revision
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c161
-rw-r--r--kernel/fork.c242
-rw-r--r--kernel/ksyms.c20
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/printk.c3
-rw-r--r--kernel/resource.c3
-rw-r--r--kernel/sched.c459
-rw-r--r--kernel/softirq.c11
-rw-r--r--kernel/sys.c135
-rw-r--r--kernel/sysctl.c3
10 files changed, 489 insertions, 551 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 0d03916e8..3fce2a2d7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -4,8 +4,6 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-#undef DEBUG_PROC_TREE
-
#include <linux/config.h>
#include <linux/wait.h>
#include <linux/errno.h>
@@ -16,6 +14,7 @@
#include <linux/mm.h>
#include <linux/tty.h>
#include <linux/malloc.h>
+#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
@@ -127,130 +126,27 @@ void notify_parent(struct task_struct * tsk)
void release(struct task_struct * p)
{
- int i;
-
- if (!p)
- return;
- if (p == current) {
- printk("task releasing itself\n");
- return;
- }
- for (i=1 ; i<NR_TASKS ; i++)
- if (task[i] == p) {
+ if (p != current) {
#ifdef __SMP__
- /* FIXME! Cheesy, but kills the window... -DaveM */
- while(p->processor != NO_PROC_ID)
- barrier();
- spin_unlock_wait(&scheduler_lock);
+ /* FIXME! Cheesy, but kills the window... -DaveM */
+ while (p->has_cpu)
+ barrier();
+ spin_unlock_wait(&scheduler_lock);
#endif
- nr_tasks--;
- task[i] = NULL;
- REMOVE_LINKS(p);
- release_thread(p);
- if (STACK_MAGIC != *(unsigned long *)p->kernel_stack_page)
- printk(KERN_ALERT "release: %s kernel stack corruption. Aiee\n", p->comm);
- free_kernel_stack(p->kernel_stack_page);
- current->cmin_flt += p->min_flt + p->cmin_flt;
- current->cmaj_flt += p->maj_flt + p->cmaj_flt;
- current->cnswap += p->nswap + p->cnswap;
- free_task_struct(p);
- return;
- }
- panic("trying to release non-existent task");
-}
-
-#ifdef DEBUG_PROC_TREE
-/*
- * Check to see if a task_struct pointer is present in the task[] array
- * Return 0 if found, and 1 if not found.
- */
-int bad_task_ptr(struct task_struct *p)
-{
- int i;
-
- if (!p)
- return 0;
- for (i=0 ; i<NR_TASKS ; i++)
- if (task[i] == p)
- return 0;
- return 1;
-}
-
-/*
- * This routine scans the pid tree and makes sure the rep invariant still
- * holds. Used for debugging only, since it's very slow....
- *
- * It looks a lot scarier than it really is.... we're doing nothing more
- * than verifying the doubly-linked list found in p_ysptr and p_osptr,
- * and checking it corresponds with the process tree defined by p_cptr and
- * p_pptr;
- */
-void audit_ptree(void)
-{
- int i;
-
- for (i=1 ; i<NR_TASKS ; i++) {
- if (!task[i])
- continue;
- if (bad_task_ptr(task[i]->p_pptr))
- printk("Warning, pid %d's parent link is bad\n",
- task[i]->pid);
- if (bad_task_ptr(task[i]->p_cptr))
- printk("Warning, pid %d's child link is bad\n",
- task[i]->pid);
- if (bad_task_ptr(task[i]->p_ysptr))
- printk("Warning, pid %d's ys link is bad\n",
- task[i]->pid);
- if (bad_task_ptr(task[i]->p_osptr))
- printk("Warning, pid %d's os link is bad\n",
- task[i]->pid);
- if (task[i]->p_pptr == task[i])
- printk("Warning, pid %d parent link points to self\n",
- task[i]->pid);
- if (task[i]->p_cptr == task[i])
- printk("Warning, pid %d child link points to self\n",
- task[i]->pid);
- if (task[i]->p_ysptr == task[i])
- printk("Warning, pid %d ys link points to self\n",
- task[i]->pid);
- if (task[i]->p_osptr == task[i])
- printk("Warning, pid %d os link points to self\n",
- task[i]->pid);
- if (task[i]->p_osptr) {
- if (task[i]->p_pptr != task[i]->p_osptr->p_pptr)
- printk(
- "Warning, pid %d older sibling %d parent is %d\n",
- task[i]->pid, task[i]->p_osptr->pid,
- task[i]->p_osptr->p_pptr->pid);
- if (task[i]->p_osptr->p_ysptr != task[i])
- printk(
- "Warning, pid %d older sibling %d has mismatched ys link\n",
- task[i]->pid, task[i]->p_osptr->pid);
- }
- if (task[i]->p_ysptr) {
- if (task[i]->p_pptr != task[i]->p_ysptr->p_pptr)
- printk(
- "Warning, pid %d younger sibling %d parent is %d\n",
- task[i]->pid, task[i]->p_osptr->pid,
- task[i]->p_osptr->p_pptr->pid);
- if (task[i]->p_ysptr->p_osptr != task[i])
- printk(
- "Warning, pid %d younger sibling %d has mismatched os link\n",
- task[i]->pid, task[i]->p_ysptr->pid);
- }
- if (task[i]->p_cptr) {
- if (task[i]->p_cptr->p_pptr != task[i])
- printk(
- "Warning, pid %d youngest child %d has mismatched parent link\n",
- task[i]->pid, task[i]->p_cptr->pid);
- if (task[i]->p_cptr->p_ysptr)
- printk(
- "Warning, pid %d youngest child %d has non-NULL ys link\n",
- task[i]->pid, task[i]->p_cptr->pid);
- }
+ charge_uid(p, -1);
+ nr_tasks--;
+ add_free_taskslot(p->tarray_ptr);
+ unhash_pid(p);
+ REMOVE_LINKS(p);
+ release_thread(p);
+ current->cmin_flt += p->min_flt + p->cmin_flt;
+ current->cmaj_flt += p->maj_flt + p->cmaj_flt;
+ current->cnswap += p->nswap + p->cnswap;
+ free_task_struct(p);
+ } else {
+ printk("task releasing itself\n");
}
}
-#endif /* DEBUG_PROC_TREE */
/*
* This checks not only the pgrp, but falls back on the pid if no
@@ -348,17 +244,12 @@ int kill_proc(int pid, int sig, int priv)
retval = -EINVAL;
if (sig >= 0 && sig <= 32) {
- struct task_struct *p;
+ struct task_struct *p = find_task_by_pid(pid);
- retval = -ESRCH;
- read_lock(&tasklist_lock);
- for_each_task(p) {
- if (p->pid != pid)
- continue;
- retval = send_sig(sig,p,priv);
- break;
- }
- read_unlock(&tasklist_lock);
+ if(p)
+ retval = send_sig(sig, p, priv);
+ else
+ retval = -ESRCH;
}
return retval;
}
@@ -551,7 +442,7 @@ static inline void __exit_mm(struct task_struct * tsk)
if (!--mm->count) {
exit_mmap(mm);
free_page_tables(mm);
- kfree(mm);
+ kmem_cache_free(mm_cachep, mm);
}
}
}
@@ -735,8 +626,8 @@ repeat:
retval = p->pid;
goto end_wait4;
case TASK_ZOMBIE:
- current->cutime += p->utime + p->cutime;
- current->cstime += p->stime + p->cstime;
+ current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
+ current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
read_unlock(&tasklist_lock);
if (ru != NULL)
getrusage(p, RUSAGE_BOTH, ru);
diff --git a/kernel/fork.c b/kernel/fork.c
index 6204ffeaf..804e37bd5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,68 +33,177 @@ int nr_running=1;
unsigned long int total_forks=0; /* Handle normal Linux uptimes. */
int last_pid=0;
-static inline int find_empty_process(void)
+/* SLAB cache for mm_struct's. */
+kmem_cache_t *mm_cachep;
+
+struct task_struct *pidhash[PIDHASH_SZ];
+spinlock_t pidhash_lock = SPIN_LOCK_UNLOCKED;
+
+struct task_struct **tarray_freelist = NULL;
+spinlock_t taskslot_lock = SPIN_LOCK_UNLOCKED;
+
+/* UID task count cache, to prevent walking entire process list every
+ * single fork() operation.
+ */
+#define UIDHASH_SZ (PIDHASH_SZ >> 2)
+
+static struct uid_taskcount {
+ struct uid_taskcount *next, **pprev;
+ unsigned short uid;
+ int task_count;
+} *uidhash[UIDHASH_SZ];
+static spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
+
+kmem_cache_t *uid_cachep;
+
+#define uidhashfn(uid) (((uid >> 8) ^ uid) & (UIDHASH_SZ - 1))
+
+static inline void uid_hash_insert(struct uid_taskcount *up, unsigned int hashent)
{
- int i;
+ spin_lock(&uidhash_lock);
+ if((up->next = uidhash[hashent]) != NULL)
+ uidhash[hashent]->pprev = &up->next;
+ up->pprev = &uidhash[hashent];
+ uidhash[hashent] = up;
+ spin_unlock(&uidhash_lock);
+}
- if (nr_tasks >= NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT) {
- if (current->uid)
+static inline void uid_hash_remove(struct uid_taskcount *up)
+{
+ spin_lock(&uidhash_lock);
+ if(up->next)
+ up->next->pprev = up->pprev;
+ *up->pprev = up->next;
+ spin_unlock(&uidhash_lock);
+}
+
+static inline struct uid_taskcount *uid_find(unsigned short uid, unsigned int hashent)
+{
+ struct uid_taskcount *up;
+
+ spin_lock(&uidhash_lock);
+ for(up = uidhash[hashent]; (up && up->uid != uid); up = up->next)
+ ;
+ spin_unlock(&uidhash_lock);
+ return up;
+}
+
+int charge_uid(struct task_struct *p, int count)
+{
+ unsigned int hashent = uidhashfn(p->uid);
+ struct uid_taskcount *up = uid_find(p->uid, hashent);
+
+ if(up) {
+ int limit = p->rlim[RLIMIT_NPROC].rlim_cur;
+ int newcnt = up->task_count + count;
+
+ if(newcnt > limit)
return -EAGAIN;
- }
- if (current->uid) {
- long max_tasks = current->rlim[RLIMIT_NPROC].rlim_cur;
-
- max_tasks--; /* count the new process.. */
- if (max_tasks < nr_tasks) {
- struct task_struct *p;
- read_lock(&tasklist_lock);
- for_each_task (p) {
- if (p->uid == current->uid)
- if (--max_tasks < 0) {
- read_unlock(&tasklist_lock);
- return -EAGAIN;
- }
- }
- read_unlock(&tasklist_lock);
+ else if(newcnt == 0) {
+ uid_hash_remove(up);
+ kmem_cache_free(uid_cachep, up);
+ return 0;
}
+ } else {
+ up = kmem_cache_alloc(uid_cachep, SLAB_KERNEL);
+ if(!up)
+ return -EAGAIN;
+ up->uid = p->uid;
+ up->task_count = 0;
+ uid_hash_insert(up, hashent);
}
- for (i = 0 ; i < NR_TASKS ; i++) {
- if (!task[i])
- return i;
+ up->task_count += count;
+ return 0;
+}
+
+void uidcache_init(void)
+{
+ int i;
+
+ uid_cachep = kmem_cache_create("uid_cache", sizeof(struct uid_taskcount),
+ 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if(!uid_cachep)
+ panic("Cannot create uid taskcount SLAB cache\n");
+
+ for(i = 0; i < UIDHASH_SZ; i++)
+ uidhash[i] = 0;
+}
+
+static inline int find_empty_process(void)
+{
+ struct task_struct **tslot;
+
+ if(current->uid) {
+ int error;
+
+ if(nr_tasks >= NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT)
+ return -EAGAIN;
+ if((error = charge_uid(current, 1)) < 0)
+ return error;
}
+ tslot = get_free_taskslot();
+ if(tslot)
+ return tslot - &task[0];
return -EAGAIN;
}
+/* Protects next_safe and last_pid. */
+static spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
+
static int get_pid(unsigned long flags)
{
+ static int next_safe = PID_MAX;
struct task_struct *p;
if (flags & CLONE_PID)
return current->pid;
- read_lock(&tasklist_lock);
-repeat:
- if ((++last_pid) & 0xffff8000)
- last_pid=1;
- for_each_task (p) {
- if (p->pid == last_pid ||
- p->pgrp == last_pid ||
- p->session == last_pid)
- goto repeat;
+ spin_lock(&lastpid_lock);
+ if((++last_pid) & 0xffff8000) {
+ last_pid = 300; /* Skip daemons etc. */
+ goto inside;
+ }
+ if(last_pid >= next_safe) {
+inside:
+ next_safe = PID_MAX;
+ read_lock(&tasklist_lock);
+ repeat:
+ for_each_task(p) {
+ if(p->pid == last_pid ||
+ p->pgrp == last_pid ||
+ p->session == last_pid) {
+ if(++last_pid >= next_safe) {
+ if(last_pid & 0xffff8000)
+ last_pid = 300;
+ next_safe = PID_MAX;
+ goto repeat;
+ }
+ }
+ if(p->pid > last_pid && next_safe > p->pid)
+ next_safe = p->pid;
+ if(p->pgrp > last_pid && next_safe > p->pgrp)
+ next_safe = p->pgrp;
+ if(p->session > last_pid && next_safe > p->session)
+ next_safe = p->session;
+ }
+ read_unlock(&tasklist_lock);
}
- read_unlock(&tasklist_lock);
+ spin_unlock(&lastpid_lock);
return last_pid;
}
static inline int dup_mmap(struct mm_struct * mm)
{
- struct vm_area_struct * mpnt, **p, *tmp;
+ struct vm_area_struct * mpnt, *tmp, **pprev;
- mm->mmap = NULL;
- p = &mm->mmap;
+ mm->mmap = mm->mmap_cache = NULL;
flush_cache_mm(current->mm);
+ pprev = &mm->mmap;
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
+ struct inode *inode;
+
tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!tmp) {
exit_mmap(mm);
@@ -105,12 +214,18 @@ static inline int dup_mmap(struct mm_struct * mm)
tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm;
tmp->vm_next = NULL;
- if (tmp->vm_inode) {
- tmp->vm_inode->i_count++;
+ inode = tmp->vm_inode;
+ if (inode) {
+ inode->i_count++;
+ if (tmp->vm_flags & VM_DENYWRITE)
+ inode->i_writecount--;
+
/* insert tmp into the share list, just after mpnt */
- tmp->vm_next_share->vm_prev_share = tmp;
+ if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
+ mpnt->vm_next_share->vm_pprev_share =
+ &tmp->vm_next_share;
mpnt->vm_next_share = tmp;
- tmp->vm_prev_share = mpnt;
+ tmp->vm_pprev_share = &mpnt->vm_next_share;
}
if (copy_page_range(mm, current->mm, tmp)) {
exit_mmap(mm);
@@ -119,24 +234,35 @@ static inline int dup_mmap(struct mm_struct * mm)
}
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
- *p = tmp;
- p = &tmp->vm_next;
+
+ /* Ok, finally safe to link it in. */
+ if((tmp->vm_next = *pprev) != NULL)
+ (*pprev)->vm_pprev = &tmp->vm_next;
+ *pprev = tmp;
+ tmp->vm_pprev = pprev;
+
+ pprev = &tmp->vm_next;
}
flush_tlb_mm(current->mm);
- build_mmap_avl(mm);
return 0;
}
static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
{
if (!(clone_flags & CLONE_VM)) {
- struct mm_struct * mm = kmalloc(sizeof(*tsk->mm), GFP_KERNEL);
+ struct mm_struct * mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
if (!mm)
return -1;
*mm = *current->mm;
init_new_context(mm);
mm->count = 1;
mm->def_flags = 0;
+
+ /* It has not run yet, so cannot be present in anyone's
+ * cache or tlb.
+ */
+ mm->cpu_vm_mask = 0;
+
tsk->mm = mm;
tsk->min_flt = tsk->maj_flt = 0;
tsk->cmin_flt = tsk->cmaj_flt = 0;
@@ -146,7 +272,7 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
if (dup_mmap(mm)) {
free_page_tables(mm);
free_mm:
- kfree(mm);
+ kmem_cache_free(mm_cachep, mm);
return -1;
}
return 0;
@@ -232,20 +358,17 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
{
int nr;
int error = -ENOMEM;
- unsigned long new_stack;
struct task_struct *p;
lock_kernel();
p = alloc_task_struct();
if (!p)
goto bad_fork;
- new_stack = alloc_kernel_stack(p);
- if (!new_stack)
- goto bad_fork_free_p;
+
error = -EAGAIN;
nr = find_empty_process();
if (nr < 0)
- goto bad_fork_free_stack;
+ goto bad_fork_free;
*p = *current;
@@ -256,8 +379,6 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
p->did_exec = 0;
p->swappable = 0;
- p->kernel_stack_page = new_stack;
- *(unsigned long *) p->kernel_stack_page = STACK_MAGIC;
p->state = TASK_UNINTERRUPTIBLE;
p->flags &= ~(PF_PTRACED|PF_TRACESYS|PF_SUPERPRIV);
p->flags |= PF_FORKNOEXEC;
@@ -274,15 +395,18 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
p->real_timer.data = (unsigned long) p;
p->leader = 0; /* session leadership doesn't inherit */
p->tty_old_pgrp = 0;
- p->utime = p->stime = 0;
- p->cutime = p->cstime = 0;
+ p->times.tms_utime = p->times.tms_stime = 0;
+ p->times.tms_cutime = p->times.tms_cstime = 0;
#ifdef __SMP__
+ p->has_cpu = 0;
p->processor = NO_PROC_ID;
#endif
p->lock_depth = 0;
p->start_time = jiffies;
- task[nr] = p;
+ p->tarray_ptr = &task[nr];
+ *p->tarray_ptr = p;
SET_LINKS(p);
+ hash_pid(p);
nr_tasks++;
error = -ENOMEM;
@@ -330,16 +454,16 @@ bad_fork_cleanup_fs:
bad_fork_cleanup_files:
exit_files(p);
bad_fork_cleanup:
+ charge_uid(current, -1);
if (p->exec_domain && p->exec_domain->module)
__MOD_DEC_USE_COUNT(p->exec_domain->module);
if (p->binfmt && p->binfmt->module)
__MOD_DEC_USE_COUNT(p->binfmt->module);
- task[nr] = NULL;
+ add_free_taskslot(p->tarray_ptr);
+ unhash_pid(p);
REMOVE_LINKS(p);
nr_tasks--;
-bad_fork_free_stack:
- free_kernel_stack(new_stack);
-bad_fork_free_p:
+bad_fork_free:
free_task_struct(p);
bad_fork:
fork_out:
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index f5f202c8e..ec0be876f 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -52,10 +52,6 @@
#include <linux/ctype.h>
#include <linux/file.h>
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
-extern struct drive_info_struct drive_info;
-#endif
-
extern unsigned char aux_device_present, kbd_read_mask;
#ifdef CONFIG_PCI
@@ -124,8 +120,14 @@ EXPORT_SYMBOL(exit_files);
/* internal kernel memory management */
EXPORT_SYMBOL(__get_free_pages);
EXPORT_SYMBOL(free_pages);
+EXPORT_SYMBOL(kmem_find_general_cachep);
+EXPORT_SYMBOL(kmem_cache_create);
+EXPORT_SYMBOL(kmem_cache_shrink);
+EXPORT_SYMBOL(kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_free);
EXPORT_SYMBOL(kmalloc);
EXPORT_SYMBOL(kfree);
+EXPORT_SYMBOL(kfree_s);
EXPORT_SYMBOL(vmalloc);
EXPORT_SYMBOL(vfree);
EXPORT_SYMBOL(mem_map);
@@ -134,10 +136,6 @@ EXPORT_SYMBOL(max_mapnr);
EXPORT_SYMBOL(num_physpages);
EXPORT_SYMBOL(high_memory);
EXPORT_SYMBOL(update_vm_cache);
-EXPORT_SYMBOL(kmem_cache_create);
-EXPORT_SYMBOL(kmem_cache_destroy);
-EXPORT_SYMBOL(kmem_cache_alloc);
-EXPORT_SYMBOL(kmem_cache_free);
/* filesystem internal functions */
EXPORT_SYMBOL(getname);
@@ -150,6 +148,7 @@ EXPORT_SYMBOL(lnamei);
EXPORT_SYMBOL(open_namei);
EXPORT_SYMBOL(sys_close);
EXPORT_SYMBOL(close_fp);
+EXPORT_SYMBOL(insert_file_free);
EXPORT_SYMBOL(check_disk_change);
EXPORT_SYMBOL(invalidate_buffers);
EXPORT_SYMBOL(invalidate_inodes);
@@ -215,10 +214,6 @@ EXPORT_SYMBOL(gendisk_head);
EXPORT_SYMBOL(resetup_one_dev);
EXPORT_SYMBOL(unplug_device);
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
-EXPORT_SYMBOL(drive_info);
-#endif
-
/* tty routines */
EXPORT_SYMBOL(tty_hangup);
EXPORT_SYMBOL(tty_wait_until_sent);
@@ -300,7 +295,6 @@ EXPORT_SYMBOL(wake_up_interruptible);
EXPORT_SYMBOL(sleep_on);
EXPORT_SYMBOL(interruptible_sleep_on);
EXPORT_SYMBOL(schedule);
-EXPORT_SYMBOL(current_set);
EXPORT_SYMBOL(jiffies);
EXPORT_SYMBOL(xtime);
EXPORT_SYMBOL(do_gettimeofday);
diff --git a/kernel/panic.c b/kernel/panic.c
index deaa2f339..c5482bffe 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -15,6 +15,7 @@
#include <linux/delay.h>
#include <linux/smp.h>
#include <linux/reboot.h>
+#include <linux/init.h>
#include <asm/sgialib.h>
@@ -24,7 +25,7 @@ extern int C_A_D;
int panic_timeout = 0;
-void panic_setup(char *str, int *ints)
+__initfunc(void panic_setup(char *str, int *ints))
{
if (ints[0] == 1)
panic_timeout = ints[1];
diff --git a/kernel/printk.c b/kernel/printk.c
index 0d5d619b0..3d409f2d5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -25,6 +25,7 @@
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/console.h>
+#include <linux/init.h>
#include <asm/uaccess.h>
@@ -261,7 +262,7 @@ void unblank_console(void)
* print any messages that were printed by the kernel before the
* console driver was initialized.
*/
-void register_console(struct console * console)
+__initfunc(void register_console(struct console * console))
{
int i,j,len;
int p = log_start;
diff --git a/kernel/resource.c b/kernel/resource.c
index 27abcf4dc..ff7c7492a 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -12,6 +12,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/ioport.h>
+#include <linux/init.h>
#define IOTABLE_SIZE 128
@@ -181,7 +182,7 @@ unsigned long occupy_region(unsigned long base, unsigned long end,
#endif
/* Called from init/main.c to reserve IO ports. */
-void reserve_setup(char *str, int *ints)
+__initfunc(void reserve_setup(char *str, int *ints))
{
int i;
diff --git a/kernel/sched.c b/kernel/sched.c
index bc256d029..9f32305ee 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -32,6 +32,7 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
+#include <linux/init.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -88,21 +89,6 @@ unsigned long prof_shift = 0;
extern void mem_use(void);
-#ifdef __mips__
-unsigned long init_kernel_stack[2048] = { STACK_MAGIC, };
-unsigned long init_user_stack[2048] = { STACK_MAGIC, };
-#else
-unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
-unsigned long init_user_stack[1024] = { STACK_MAGIC, };
-#endif
-static struct vm_area_struct init_mmap = INIT_MMAP;
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS;
-
-struct mm_struct init_mm = INIT_MM;
-struct task_struct init_task = INIT_TASK;
-
unsigned long volatile jiffies=0;
/*
@@ -110,7 +96,6 @@ unsigned long volatile jiffies=0;
* via the SMP irq return path.
*/
-struct task_struct *current_set[NR_CPUS] = {&init_task, };
struct task_struct *last_task_used_math = NULL;
struct task_struct * task[NR_TASKS] = {&init_task, };
@@ -119,12 +104,6 @@ struct kernel_stat kstat = { 0 };
static inline void add_to_runqueue(struct task_struct * p)
{
-#if 1 /* sanity tests */
- if (p->next_run || p->prev_run) {
- printk("task already on run-queue\n");
- return;
- }
-#endif
if (p->counter > current->counter + 3)
need_resched = 1;
nr_running++;
@@ -138,20 +117,6 @@ static inline void del_from_runqueue(struct task_struct * p)
struct task_struct *next = p->next_run;
struct task_struct *prev = p->prev_run;
-#if 1 /* sanity tests */
- if (!next || !prev) {
- printk("task not on run-queue\n");
- return;
- }
-#endif
- if (!p->pid) {
- static int nr = 0;
- if (nr < 5) {
- nr++;
- printk("idle task may not sleep\n");
- }
- return;
- }
nr_running--;
next->prev_run = prev;
prev->next_run = next;
@@ -255,7 +220,7 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
#ifdef __SMP__
/* Give a largish advantage to the same processor... */
/* (this is equivalent to penalizing other processors) */
- if (p->last_processor == this_cpu)
+ if (p->processor == this_cpu)
weight += PROC_CHANGE_PENALTY;
#endif
@@ -267,10 +232,127 @@ static inline int goodness(struct task_struct * p, struct task_struct * prev, in
return weight;
}
+/*
+ * Event timer code
+ */
+#define TVN_BITS 6
+#define TVR_BITS 8
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct timer_vec {
+ int index;
+ struct timer_list *vec[TVN_SIZE];
+};
+
+struct timer_vec_root {
+ int index;
+ struct timer_list *vec[TVR_SIZE];
+};
+
+static struct timer_vec tv5 = { 0 };
+static struct timer_vec tv4 = { 0 };
+static struct timer_vec tv3 = { 0 };
+static struct timer_vec tv2 = { 0 };
+static struct timer_vec_root tv1 = { 0 };
+
+static struct timer_vec * const tvecs[] = {
+ (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
+};
+
+#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
+
+static unsigned long timer_jiffies = 0;
+
+static inline void insert_timer(struct timer_list *timer,
+ struct timer_list **vec, int idx)
+{
+ if ((timer->next = vec[idx]))
+ vec[idx]->prev = timer;
+ vec[idx] = timer;
+ timer->prev = (struct timer_list *)&vec[idx];
+}
+
+static inline void internal_add_timer(struct timer_list *timer)
+{
+ /*
+ * must be cli-ed when calling this
+ */
+ unsigned long expires = timer->expires;
+ unsigned long idx = expires - timer_jiffies;
+
+ if (idx < TVR_SIZE) {
+ int i = expires & TVR_MASK;
+ insert_timer(timer, tv1.vec, i);
+ } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+ int i = (expires >> TVR_BITS) & TVN_MASK;
+ insert_timer(timer, tv2.vec, i);
+ } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+ insert_timer(timer, tv3.vec, i);
+ } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
+ int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
+ insert_timer(timer, tv4.vec, i);
+ } else if (expires < timer_jiffies) {
+ /* can happen if you add a timer with expires == jiffies,
+ * or you set a timer to go off in the past
+ */
+ insert_timer(timer, tv1.vec, tv1.index);
+ } else if (idx < 0xffffffffUL) {
+ int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
+ insert_timer(timer, tv5.vec, i);
+ } else {
+ /* Can only get here on architectures with 64-bit jiffies */
+ timer->next = timer->prev = timer;
+ }
+}
+
+static spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
+
+void add_timer(struct timer_list *timer)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ internal_add_timer(timer);
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+}
+
+static inline int detach_timer(struct timer_list *timer)
+{
+ int ret = 0;
+ struct timer_list *next, *prev;
+ next = timer->next;
+ prev = timer->prev;
+ if (next) {
+ next->prev = prev;
+ }
+ if (prev) {
+ ret = 1;
+ prev->next = next;
+ }
+ return ret;
+}
+
+
+int del_timer(struct timer_list * timer)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ ret = detach_timer(timer);
+ timer->next = timer->prev = 0;
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+ return ret;
+}
+
#ifdef __SMP__
#define idle_task (task[cpu_number_map[this_cpu]])
-#define can_schedule(p) ((p)->processor == NO_PROC_ID)
+#define can_schedule(p) (!(p)->has_cpu)
#else
@@ -297,12 +379,10 @@ asmlinkage void schedule(void)
int this_cpu;
need_resched = 0;
- this_cpu = smp_processor_id();
- if (local_irq_count[this_cpu]) {
- printk("Scheduling in interrupt\n");
- *(char *)0 = 0;
- }
prev = current;
+ this_cpu = smp_processor_id();
+ if (local_irq_count[this_cpu])
+ goto scheduling_in_interrupt;
release_kernel_lock(prev, this_cpu, lock_depth);
if (bh_active & bh_mask)
do_bottom_half();
@@ -312,16 +392,8 @@ asmlinkage void schedule(void)
/* move an exhausted RR process to be last.. */
if (!prev->counter && prev->policy == SCHED_RR) {
- if (prev->pid) {
- prev->counter = prev->priority;
- move_last_runqueue(prev);
- } else {
- static int count = 5;
- if (count) {
- count--;
- printk("Moving pid 0 last\n");
- }
- }
+ prev->counter = prev->priority;
+ move_last_runqueue(prev);
}
timeout = 0;
switch (prev->state) {
@@ -354,7 +426,7 @@ asmlinkage void schedule(void)
*/
spin_unlock_irq(&runqueue_lock);
#ifdef __SMP__
- prev->processor = NO_PROC_ID;
+ prev->has_cpu = 0;
#endif
/*
@@ -386,8 +458,10 @@ asmlinkage void schedule(void)
}
}
+#ifdef __SMP__
+ next->has_cpu = 1;
next->processor = this_cpu;
- next->last_processor = this_cpu;
+#endif
if (prev != next) {
struct timer_list timer;
@@ -410,6 +484,11 @@ asmlinkage void schedule(void)
spin_unlock(&scheduler_lock);
reacquire_kernel_lock(prev, smp_processor_id(), lock_depth);
+ return;
+
+scheduling_in_interrupt:
+ printk("Scheduling in interrupt\n");
+ *(int *)0 = 0;
}
#ifndef __alpha__
@@ -427,67 +506,53 @@ asmlinkage int sys_pause(void)
#endif
-spinlock_t waitqueue_lock;
+rwlock_t waitqueue_lock = RW_LOCK_UNLOCKED;
/*
* wake_up doesn't wake up stopped processes - they have to be awakened
* with signals or similar.
+ *
+ * Note that we only need a read lock for the wait queue (and thus do not
+ * have to protect against interrupts), as the actual removal from the
+ * queue is handled by the process itself.
*/
void wake_up(struct wait_queue **q)
{
- unsigned long flags;
struct wait_queue *next;
- struct wait_queue *head;
- spin_lock_irqsave(&waitqueue_lock, flags);
+ read_lock(&waitqueue_lock);
if (q && (next = *q)) {
+ struct wait_queue *head;
+
head = WAIT_QUEUE_HEAD(q);
while (next != head) {
struct task_struct *p = next->task;
next = next->next;
- if (p != NULL) {
- if ((p->state == TASK_UNINTERRUPTIBLE) ||
- (p->state == TASK_INTERRUPTIBLE))
- wake_up_process(p);
- }
- if (next)
- continue;
- printk("wait_queue is bad (eip = %p)\n",
- __builtin_return_address(0));
- printk(" q = %p\n",q);
- printk(" *q = %p\n",*q);
- break;
+ if ((p->state == TASK_UNINTERRUPTIBLE) ||
+ (p->state == TASK_INTERRUPTIBLE))
+ wake_up_process(p);
}
}
- spin_unlock_irqrestore(&waitqueue_lock, flags);
+ read_unlock(&waitqueue_lock);
}
void wake_up_interruptible(struct wait_queue **q)
{
- unsigned long flags;
struct wait_queue *next;
- struct wait_queue *head;
- spin_lock_irqsave(&waitqueue_lock, flags);
+ read_lock(&waitqueue_lock);
if (q && (next = *q)) {
+ struct wait_queue *head;
+
head = WAIT_QUEUE_HEAD(q);
while (next != head) {
struct task_struct *p = next->task;
next = next->next;
- if (p != NULL) {
- if (p->state == TASK_INTERRUPTIBLE)
- wake_up_process(p);
- }
- if (next)
- continue;
- printk("wait_queue is bad (eip = %p)\n",
- __builtin_return_address(0));
- printk(" q = %p\n",q);
- printk(" *q = %p\n",*q);
- break;
+ if (p->state == TASK_INTERRUPTIBLE)
+ wake_up_process(p);
}
}
- spin_unlock_irqrestore(&waitqueue_lock, flags);
+ read_unlock(&waitqueue_lock);
}
/*
@@ -606,17 +671,14 @@ static inline void __sleep_on(struct wait_queue **p, int state)
if (!p)
return;
- if (current == task[0])
- panic("task[0] trying to sleep");
current->state = state;
- spin_lock_irqsave(&waitqueue_lock, flags);
+ write_lock_irqsave(&waitqueue_lock, flags);
__add_wait_queue(p, &wait);
- spin_unlock(&waitqueue_lock);
- sti();
+ write_unlock(&waitqueue_lock);
schedule();
- spin_lock_irq(&waitqueue_lock);
+ write_lock_irq(&waitqueue_lock);
__remove_wait_queue(p, &wait);
- spin_unlock_irqrestore(&waitqueue_lock, flags);
+ write_unlock_irqrestore(&waitqueue_lock, flags);
}
void interruptible_sleep_on(struct wait_queue **p)
@@ -629,133 +691,6 @@ void sleep_on(struct wait_queue **p)
__sleep_on(p,TASK_UNINTERRUPTIBLE);
}
-
-#define TVN_BITS 6
-#define TVR_BITS 8
-#define TVN_SIZE (1 << TVN_BITS)
-#define TVR_SIZE (1 << TVR_BITS)
-#define TVN_MASK (TVN_SIZE - 1)
-#define TVR_MASK (TVR_SIZE - 1)
-
-#define SLOW_BUT_DEBUGGING_TIMERS 0
-
-struct timer_vec {
- int index;
- struct timer_list *vec[TVN_SIZE];
-};
-
-struct timer_vec_root {
- int index;
- struct timer_list *vec[TVR_SIZE];
-};
-
-static struct timer_vec tv5 = { 0 };
-static struct timer_vec tv4 = { 0 };
-static struct timer_vec tv3 = { 0 };
-static struct timer_vec tv2 = { 0 };
-static struct timer_vec_root tv1 = { 0 };
-
-static struct timer_vec * const tvecs[] = {
- (struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
-};
-
-#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
-
-static unsigned long timer_jiffies = 0;
-
-static inline void insert_timer(struct timer_list *timer,
- struct timer_list **vec, int idx)
-{
- if ((timer->next = vec[idx]))
- vec[idx]->prev = timer;
- vec[idx] = timer;
- timer->prev = (struct timer_list *)&vec[idx];
-}
-
-static inline void internal_add_timer(struct timer_list *timer)
-{
- /*
- * must be cli-ed when calling this
- */
- unsigned long expires = timer->expires;
- unsigned long idx = expires - timer_jiffies;
-
- if (idx < TVR_SIZE) {
- int i = expires & TVR_MASK;
- insert_timer(timer, tv1.vec, i);
- } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
- int i = (expires >> TVR_BITS) & TVN_MASK;
- insert_timer(timer, tv2.vec, i);
- } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
- int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
- insert_timer(timer, tv3.vec, i);
- } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
- int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
- insert_timer(timer, tv4.vec, i);
- } else if (expires < timer_jiffies) {
- /* can happen if you add a timer with expires == jiffies,
- * or you set a timer to go off in the past
- */
- insert_timer(timer, tv1.vec, tv1.index);
- } else if (idx < 0xffffffffUL) {
- int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
- insert_timer(timer, tv5.vec, i);
- } else {
- /* Can only get here on architectures with 64-bit jiffies */
- timer->next = timer->prev = timer;
- }
-}
-
-static spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
-
-void add_timer(struct timer_list *timer)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&timerlist_lock, flags);
-#if SLOW_BUT_DEBUGGING_TIMERS
- if (timer->next || timer->prev) {
- printk("add_timer() called with non-zero list from %p\n",
- __builtin_return_address(0));
- goto out;
- }
-#endif
- internal_add_timer(timer);
-#if SLOW_BUT_DEBUGGING_TIMERS
-out:
-#endif
- spin_unlock_irqrestore(&timerlist_lock, flags);
-}
-
-static inline int detach_timer(struct timer_list *timer)
-{
- int ret = 0;
- struct timer_list *next, *prev;
- next = timer->next;
- prev = timer->prev;
- if (next) {
- next->prev = prev;
- }
- if (prev) {
- ret = 1;
- prev->next = next;
- }
- return ret;
-}
-
-
-int del_timer(struct timer_list * timer)
-{
- int ret;
- unsigned long flags;
-
- spin_lock_irqsave(&timerlist_lock, flags);
- ret = detach_timer(timer);
- timer->next = timer->prev = 0;
- spin_unlock_irqrestore(&timerlist_lock, flags);
- return ret;
-}
-
static inline void cascade_timers(struct timer_vec *tv)
{
/* cascade all the timers from tv up one level */
@@ -847,17 +782,18 @@ unsigned long avenrun[3] = { 0,0,0 };
*/
static unsigned long count_active_tasks(void)
{
- struct task_struct **p;
+ struct task_struct *p;
unsigned long nr = 0;
- for(p = &LAST_TASK; p > &FIRST_TASK; --p)
- if (*p && ((*p)->state == TASK_RUNNING ||
- (*p)->state == TASK_UNINTERRUPTIBLE ||
- (*p)->state == TASK_SWAPPING))
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (p->pid &&
+ (p->state == TASK_RUNNING ||
+ p->state == TASK_UNINTERRUPTIBLE ||
+ p->state == TASK_SWAPPING))
nr += FIXED_1;
-#ifdef __SMP__
- nr-=(smp_num_cpus-1)*FIXED_1;
-#endif
+ }
+ read_unlock(&tasklist_lock);
return nr;
}
@@ -1065,16 +1001,14 @@ static inline void do_process_times(struct task_struct *p,
{
long psecs;
- p->utime += user;
- p->stime += system;
-
- psecs = (p->stime + p->utime) / HZ;
- if (psecs > p->rlim[RLIMIT_CPU].rlim_cur) {
+ psecs = (p->times.tms_utime += user);
+ psecs += (p->times.tms_stime += system);
+ if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
/* Send SIGXCPU every second.. */
- if (psecs * HZ == p->stime + p->utime)
+ if (!(psecs % HZ))
send_sig(SIGXCPU, p, 1);
/* and SIGKILL when we go over max.. */
- if (psecs > p->rlim[RLIMIT_CPU].rlim_max)
+ if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
send_sig(SIGKILL, p, 1);
}
}
@@ -1344,22 +1278,12 @@ asmlinkage int sys_nice(int increment)
#endif
-static struct task_struct *find_process_by_pid(pid_t pid)
+static inline struct task_struct *find_process_by_pid(pid_t pid)
{
- struct task_struct *p;
-
- p = current;
- if (pid) {
- read_lock(&tasklist_lock);
- for_each_task(p) {
- if (p->pid == pid)
- goto found;
- }
- p = NULL;
-found:
- read_unlock(&tasklist_lock);
- }
- return p;
+ if (pid)
+ return find_task_by_pid(pid);
+ else
+ return current;
}
static int setscheduler(pid_t pid, int policy,
@@ -1572,7 +1496,7 @@ asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
static void show_task(int nr,struct task_struct * p)
{
- unsigned long free;
+ unsigned long free = 0;
static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
@@ -1591,10 +1515,12 @@ static void show_task(int nr,struct task_struct * p)
else
printk(" %016lx ", thread_saved_pc(&p->tss));
#endif
+#if 0
for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
if (((unsigned long *)p->kernel_stack_page)[free])
break;
}
+#endif
printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
if (p->p_cptr)
printk("%5d ", p->p_cptr->pid);
@@ -1612,7 +1538,7 @@ static void show_task(int nr,struct task_struct * p)
void show_state(void)
{
- int i;
+ struct task_struct *p;
#if ((~0UL) == 0xffffffff)
printk("\n"
@@ -1623,25 +1549,30 @@ void show_state(void)
" free sibling\n");
printk(" task PC stack pid father child younger older\n");
#endif
- for (i=0 ; i<NR_TASKS ; i++)
- if (task[i])
- show_task(i,task[i]);
+ read_lock(&tasklist_lock);
+ for_each_task(p)
+ show_task((p->tarray_ptr - &task[0]),p);
+ read_unlock(&tasklist_lock);
}
-void sched_init(void)
+__initfunc(void sched_init(void))
{
/*
* We have to do a little magic to get the first
* process right in SMP mode.
*/
- int cpu=smp_processor_id();
-#ifndef __SMP__
- current_set[cpu]=&init_task;
-#else
+ int cpu=hard_smp_processor_id();
+ int nr = NR_TASKS;
+
init_task.processor=cpu;
- for(cpu = 0; cpu < NR_CPUS; cpu++)
- current_set[cpu] = &init_task;
-#endif
+
+ /* Init task array free list and pidhash table. */
+ while(--nr > 0)
+ add_free_taskslot(&task[nr]);
+
+ for(nr = 0; nr < PIDHASH_SZ; nr++)
+ pidhash[nr] = NULL;
+
init_bh(TIMER_BH, timer_bh);
init_bh(TQUEUE_BH, tqueue_bh);
init_bh(IMMEDIATE_BH, immediate_bh);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 6b9b41aa5..de398bcff 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -62,13 +62,14 @@ static inline void run_bottom_halves(void)
asmlinkage void do_bottom_half(void)
{
- int cpu = smp_processor_id();
+ if (softirq_trylock()) {
+ int cpu = smp_processor_id();
- if (hardirq_trylock(cpu)) {
- if (softirq_trylock()) {
+ if (hardirq_trylock(cpu)) {
+ __sti();
run_bottom_halves();
- softirq_endlock();
+ hardirq_endlock(cpu);
}
- hardirq_endlock(cpu);
+ softirq_endlock();
}
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 934108fa8..311527865 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -370,8 +370,8 @@ int acct_process(long exitcode)
if (acct_active) {
strncpy(ac.ac_comm, current->comm, ACCT_COMM);
ac.ac_comm[ACCT_COMM-1] = '\0';
- ac.ac_utime = current->utime;
- ac.ac_stime = current->stime;
+ ac.ac_utime = current->times.tms_utime;
+ ac.ac_stime = current->times.tms_stime;
ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
ac.ac_etime = CURRENT_TIME - ac.ac_btime;
ac.ac_uid = current->uid;
@@ -523,16 +523,15 @@ asmlinkage int sys_old_syscall(void)
*/
asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
{
- int old_ruid;
- int old_euid;
+ int old_ruid, old_euid, new_ruid;
- old_ruid = current->uid;
+ new_ruid = old_ruid = current->uid;
old_euid = current->euid;
if (ruid != (uid_t) -1) {
if ((old_ruid == ruid) ||
(current->euid==ruid) ||
suser())
- current->uid = ruid;
+ new_ruid = ruid;
else
return -EPERM;
}
@@ -542,10 +541,8 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
(current->suid == euid) ||
suser())
current->fsuid = current->euid = euid;
- else {
- current->uid = old_ruid;
+ else
return -EPERM;
- }
}
if (ruid != (uid_t) -1 ||
(euid != (uid_t) -1 && euid != old_ruid))
@@ -553,6 +550,18 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
current->fsuid = current->euid;
if (current->euid != old_euid)
current->dumpable = 0;
+
+ if(new_ruid != old_ruid) {
+ /* What if a process setreuid()'s and this brings the
+ * new uid over his NPROC rlimit? We can check this now
+ * cheaply with the new uid cache, so if it matters
+ * we should be checking for it. -DaveM
+ */
+ charge_uid(current, -1);
+ current->uid = new_ruid;
+ if(new_ruid)
+ charge_uid(current, 1);
+ }
return 0;
}
@@ -570,9 +579,11 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
asmlinkage int sys_setuid(uid_t uid)
{
int old_euid = current->euid;
+ int old_ruid, new_ruid;
+ old_ruid = new_ruid = current->uid;
if (suser())
- current->uid = current->euid = current->suid = current->fsuid = uid;
+ new_ruid = current->euid = current->suid = current->fsuid = uid;
else if ((uid == current->uid) || (uid == current->suid))
current->fsuid = current->euid = uid;
else
@@ -580,6 +591,14 @@ asmlinkage int sys_setuid(uid_t uid)
if (current->euid != old_euid)
current->dumpable = 0;
+
+ if(new_ruid != old_ruid) {
+ /* See comment above about NPROC rlimit issues... */
+ charge_uid(current, -1);
+ current->uid = new_ruid;
+ if(new_ruid)
+ charge_uid(current, 1);
+ }
return 0;
}
@@ -605,8 +624,13 @@ asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if ((suid != (uid_t) -1) && (suid != current->uid) &&
(suid != current->euid) && (suid != current->suid))
return -EPERM;
- if (ruid != (uid_t) -1)
+ if (ruid != (uid_t) -1) {
+ /* See above commentary about NPROC rlimit issues here. */
+ charge_uid(current, -1);
current->uid = ruid;
+ if(ruid)
+ charge_uid(current, 1);
+ }
if (euid != (uid_t) -1)
current->euid = euid;
if (suid != (uid_t) -1)
@@ -671,16 +695,9 @@ asmlinkage long sys_times(struct tms * tbuf)
* atomically safe type this is just fine. Conceptually its
* as if the syscall took an instant longer to occur.
*/
- if (tbuf)
- {
- /* ?? use copy_to_user() */
- if(!access_ok(VERIFY_READ, tbuf, sizeof(struct tms)) ||
- __put_user(current->utime,&tbuf->tms_utime)||
- __put_user(current->stime,&tbuf->tms_stime) ||
- __put_user(current->cutime,&tbuf->tms_cutime) ||
- __put_user(current->cstime,&tbuf->tms_cstime))
+ if (tbuf)
+ if (copy_to_user(tbuf, &current->times, sizeof(struct tms)))
return -EFAULT;
- }
return jiffies;
}
@@ -709,22 +726,13 @@ asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
if (pgid < 0)
return -EINVAL;
- read_lock(&tasklist_lock);
- for_each_task(p) {
- if (p->pid == pid) {
- /* NOTE: I haven't dropped tasklist_lock, this is
- * on purpose. -DaveM
- */
- goto found_task;
- }
- }
- read_unlock(&tasklist_lock);
- return -ESRCH;
+ if((p = find_task_by_pid(pid)) == NULL)
+ return -ESRCH;
-found_task:
/* From this point forward we keep holding onto the tasklist lock
* so that our parent does not change from under us. -DaveM
*/
+ read_lock(&tasklist_lock);
err = -ESRCH;
if (p->p_pptr == current || p->p_opptr == current) {
err = -EPERM;
@@ -762,18 +770,12 @@ asmlinkage int sys_getpgid(pid_t pid)
if (!pid) {
return current->pgrp;
} else {
- struct task_struct *p;
- int ret = -ESRCH;
-
- read_lock(&tasklist_lock);
- for_each_task(p) {
- if (p->pid == pid) {
- ret = p->pgrp;
- break;
- }
- }
- read_unlock(&tasklist_lock);
- return ret;
+ struct task_struct *p = find_task_by_pid(pid);
+
+ if(p)
+ return p->pgrp;
+ else
+ return -ESRCH;
}
}
@@ -785,25 +787,16 @@ asmlinkage int sys_getpgrp(void)
asmlinkage int sys_getsid(pid_t pid)
{
- struct task_struct * p;
- int ret;
-
- /* SMP: The 'self' case requires no lock */
if (!pid) {
- ret = current->session;
+ return current->session;
} else {
- ret = -ESRCH;
+ struct task_struct *p = find_task_by_pid(pid);
- read_lock(&tasklist_lock);
- for_each_task(p) {
- if (p->pid == pid) {
- ret = p->session;
- break;
- }
- }
- read_unlock(&tasklist_lock);
+ if(p)
+ return p->session;
+ else
+ return -ESRCH;
}
- return ret;
}
asmlinkage int sys_setsid(void)
@@ -1030,28 +1023,28 @@ int getrusage(struct task_struct *p, int who, struct rusage *ru)
memset((char *) &r, 0, sizeof(r));
switch (who) {
case RUSAGE_SELF:
- r.ru_utime.tv_sec = CT_TO_SECS(p->utime);
- r.ru_utime.tv_usec = CT_TO_USECS(p->utime);
- r.ru_stime.tv_sec = CT_TO_SECS(p->stime);
- r.ru_stime.tv_usec = CT_TO_USECS(p->stime);
+ r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
+ r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
+ r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
+ r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
r.ru_minflt = p->min_flt;
r.ru_majflt = p->maj_flt;
r.ru_nswap = p->nswap;
break;
case RUSAGE_CHILDREN:
- r.ru_utime.tv_sec = CT_TO_SECS(p->cutime);
- r.ru_utime.tv_usec = CT_TO_USECS(p->cutime);
- r.ru_stime.tv_sec = CT_TO_SECS(p->cstime);
- r.ru_stime.tv_usec = CT_TO_USECS(p->cstime);
+ r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
+ r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
+ r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
+ r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
r.ru_minflt = p->cmin_flt;
r.ru_majflt = p->cmaj_flt;
r.ru_nswap = p->cnswap;
break;
default:
- r.ru_utime.tv_sec = CT_TO_SECS(p->utime + p->cutime);
- r.ru_utime.tv_usec = CT_TO_USECS(p->utime + p->cutime);
- r.ru_stime.tv_sec = CT_TO_SECS(p->stime + p->cstime);
- r.ru_stime.tv_usec = CT_TO_USECS(p->stime + p->cstime);
+ r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
+ r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
+ r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
+ r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
r.ru_minflt = p->min_flt + p->cmin_flt;
r.ru_majflt = p->maj_flt + p->cmaj_flt;
r.ru_nswap = p->nswap + p->cnswap;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9e0bb0fd8..3f2e86a6b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -24,6 +24,7 @@
#include <linux/swapctl.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
+#include <linux/init.h>
#include <asm/bitops.h>
#include <asm/uaccess.h>
@@ -210,7 +211,7 @@ static ctl_table dev_table[] = {
};
-void sysctl_init(void)
+__initfunc(void sysctl_init(void))
{
#ifdef CONFIG_PROC_FS
register_proc_table(root_table, &proc_sys_root);