summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>1999-10-09 00:00:47 +0000
committerRalf Baechle <ralf@linux-mips.org>1999-10-09 00:00:47 +0000
commitd6434e1042f3b0a6dfe1b1f615af369486f9b1fa (patch)
treee2be02f33984c48ec019c654051d27964e42c441 /kernel
parent609d1e803baf519487233b765eb487f9ec227a18 (diff)
Merge with 2.3.19.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c4
-rw-r--r--kernel/capability.c6
-rw-r--r--kernel/dma.c2
-rw-r--r--kernel/exec_domain.c9
-rw-r--r--kernel/exit.c91
-rw-r--r--kernel/fork.c301
-rw-r--r--kernel/info.c4
-rw-r--r--kernel/itimer.c8
-rw-r--r--kernel/ksyms.c26
-rw-r--r--kernel/module.c22
-rw-r--r--kernel/panic.c10
-rw-r--r--kernel/printk.c156
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/resource.c405
-rw-r--r--kernel/sched.c471
-rw-r--r--kernel/signal.c26
-rw-r--r--kernel/sys.c69
-rw-r--r--kernel/sysctl.c237
-rw-r--r--kernel/time.c10
19 files changed, 1032 insertions, 832 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index c6142afc7..7e64105a8 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -146,7 +146,7 @@ static int check_free_space(struct file *file)
* should be written. If the filename is NULL, accounting will be
* shutdown.
*/
-asmlinkage int sys_acct(const char *name)
+asmlinkage long sys_acct(const char *name)
{
struct file *file = NULL, *old_acct = NULL;
char *tmp;
@@ -354,7 +354,7 @@ int acct_process(long exitcode)
* into the kernel.
*/
-asmlinkage int sys_acct(const char * filename)
+asmlinkage long sys_acct(const char * filename)
{
return -ENOSYS;
}
diff --git a/kernel/capability.c b/kernel/capability.c
index a4a1a3d03..2dbfe83f7 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -17,7 +17,9 @@ spinlock_t task_capability_lock;
* uninteresting and/or not to be changed.
*/
-asmlinkage int sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
+kernel_cap_t cap_bset = CAP_FULL_SET;
+
+asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
{
int error, pid;
__u32 version;
@@ -124,7 +126,7 @@ static void cap_set_all(kernel_cap_t *effective,
* E: must be set to a subset of (new target) Permitted
*/
-asmlinkage int sys_capset(cap_user_header_t header, const cap_user_data_t data)
+asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
{
kernel_cap_t inheritable, permitted, effective;
__u32 version;
diff --git a/kernel/dma.c b/kernel/dma.c
index 4ae38f4e5..e9f0f7a52 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -12,9 +12,9 @@
#include <linux/kernel.h>
#include <linux/errno.h>
+#include <linux/spinlock.h>
#include <asm/dma.h>
#include <asm/system.h>
-#include <asm/spinlock.h>
/* A note on resource allocation:
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 3c5881ee2..4aa968ee1 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -2,7 +2,7 @@
#include <linux/smp_lock.h>
#include <linux/module.h>
-static asmlinkage void no_lcall7(struct pt_regs * regs);
+static asmlinkage void no_lcall7(int segment, struct pt_regs * regs);
static unsigned long ident_map[32] = {
@@ -25,9 +25,8 @@ struct exec_domain default_exec_domain = {
static struct exec_domain *exec_domains = &default_exec_domain;
-static asmlinkage void no_lcall7(struct pt_regs * regs)
+static asmlinkage void no_lcall7(int segment, struct pt_regs * regs)
{
-
/*
* This may have been a static linked SVr4 binary, so we would have the
* personality set incorrectly. Check to see whether SVr4 is available,
@@ -44,7 +43,7 @@ static asmlinkage void no_lcall7(struct pt_regs * regs)
if (current->exec_domain && current->exec_domain->handler
&& current->exec_domain->handler != no_lcall7) {
- current->exec_domain->handler(regs);
+ current->exec_domain->handler(segment, regs);
return;
}
@@ -98,7 +97,7 @@ int unregister_exec_domain(struct exec_domain *it)
return -EINVAL;
}
-asmlinkage int sys_personality(unsigned long personality)
+asmlinkage long sys_personality(unsigned long personality)
{
struct exec_domain *it;
unsigned long old_personality;
diff --git a/kernel/exit.c b/kernel/exit.c
index a3d8a7547..39103a683 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -26,30 +26,20 @@ void release(struct task_struct * p)
{
if (p != current) {
#ifdef __SMP__
+ int has_cpu;
+
/*
- * Wait to make sure the process isn't active on any
- * other CPU
+ * Wait to make sure the process isn't on the
+ * runqueue (active on some other CPU still)
*/
- for (;;) {
- int has_cpu;
+ do {
spin_lock_irq(&runqueue_lock);
has_cpu = p->has_cpu;
spin_unlock_irq(&runqueue_lock);
- if (!has_cpu)
- break;
- do {
- barrier();
- } while (p->has_cpu);
- }
+ } while (has_cpu);
#endif
free_uid(p);
- nr_tasks--;
- add_free_taskslot(p->tarray_ptr);
-
- write_lock_irq(&tasklist_lock);
- unhash_pid(p);
- REMOVE_LINKS(p);
- write_unlock_irq(&tasklist_lock);
+ unhash_process(p);
release_thread(p);
current->cmin_flt += p->min_flt + p->cmin_flt;
@@ -159,11 +149,11 @@ static inline void close_files(struct files_struct * files)
j = 0;
for (;;) {
- unsigned long set = files->open_fds.fds_bits[j];
+ unsigned long set;
i = j * __NFDBITS;
- j++;
- if (i >= files->max_fds)
+ if (i >= files->max_fdset || i >= files->max_fds)
break;
+ set = files->open_fds->fds_bits[j++];
while (set) {
if (set & 1) {
struct file * file = xchg(&files->fd[i], NULL);
@@ -186,12 +176,14 @@ static inline void __exit_files(struct task_struct *tsk)
if (atomic_dec_and_test(&files->count)) {
close_files(files);
/*
- * Free the fd array as appropriate ...
+ * Free the fd and fdset arrays if we expanded them.
*/
- if (NR_OPEN * sizeof(struct file *) == PAGE_SIZE)
- free_page((unsigned long) files->fd);
- else
- kfree(files->fd);
+ if (files->fd != &files->fd_array[0])
+ free_fd_array(files->fd, files->max_fds);
+ if (files->max_fdset > __FD_SETSIZE) {
+ free_fdset(files->open_fds, files->max_fdset);
+ free_fdset(files->close_on_exec, files->max_fdset);
+ }
kmem_cache_free(files_cachep, files);
}
}
@@ -243,19 +235,44 @@ void exit_sighand(struct task_struct *tsk)
__exit_sighand(tsk);
}
+/*
+ * We can use these to temporarily drop into
+ * "lazy TLB" mode and back.
+ */
+struct mm_struct * start_lazy_tlb(void)
+{
+ struct mm_struct *mm = current->mm;
+ current->mm = NULL;
+ /* active_mm is still 'mm' */
+ atomic_inc(&mm->mm_count);
+ return mm;
+}
+
+void end_lazy_tlb(struct mm_struct *mm)
+{
+ struct mm_struct *active_mm = current->active_mm;
+
+ current->mm = mm;
+ if (mm != active_mm) {
+ current->active_mm = mm;
+ activate_mm(active_mm, mm);
+ }
+ mmdrop(active_mm);
+}
+
+/*
+ * Turn us into a lazy TLB process if we
+ * aren't already..
+ */
static inline void __exit_mm(struct task_struct * tsk)
{
struct mm_struct * mm = tsk->mm;
- /* Set us up to use the kernel mm state */
- if (mm != &init_mm) {
- flush_cache_mm(mm);
- flush_tlb_mm(mm);
- destroy_context(mm);
- tsk->mm = &init_mm;
- tsk->swappable = 0;
- SET_PAGE_DIR(tsk, swapper_pg_dir);
+ if (mm) {
+ atomic_inc(&mm->mm_count);
mm_release();
+ if (mm != tsk->active_mm) BUG();
+ tsk->mm = NULL;
mmput(mm);
}
}
@@ -395,12 +412,12 @@ fake_volatile:
goto fake_volatile;
}
-asmlinkage int sys_exit(int error_code)
+asmlinkage long sys_exit(int error_code)
{
do_exit((error_code&0xff)<<8);
}
-asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
+asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
{
int flag, retval;
DECLARE_WAITQUEUE(wait, current);
@@ -488,13 +505,13 @@ end_wait4:
return retval;
}
-#ifndef __alpha__
+#if !defined(__alpha__) && !defined(__ia64__)
/*
* sys_waitpid() remains for compatibility. waitpid() should be
* implemented by calling sys_wait4() from libc.a.
*/
-asmlinkage int sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
+asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
{
return sys_wait4(pid, stat_addr, options, NULL);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 12c580852..6520e1843 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -22,11 +22,12 @@
#include <asm/mmu_context.h>
#include <asm/uaccess.h>
-/* The idle tasks do not count.. */
-int nr_tasks=0;
+/* The idle threads do not count.. */
+int nr_threads=0;
int nr_running=0;
-unsigned long int total_forks=0; /* Handle normal Linux uptimes. */
+int max_threads;
+unsigned long total_forks = 0; /* Handle normal Linux uptimes. */
int last_pid=0;
/* SLAB cache for mm_struct's. */
@@ -37,9 +38,6 @@ kmem_cache_t *files_cachep;
struct task_struct *pidhash[PIDHASH_SZ];
-struct task_struct **tarray_freelist = NULL;
-spinlock_t taskslot_lock = SPIN_LOCK_UNLOCKED;
-
/* UID task count cache, to prevent walking entire process list every
* single fork() operation.
*/
@@ -159,7 +157,7 @@ int alloc_uid(struct task_struct *p)
return 0;
}
-void __init uidcache_init(void)
+void __init fork_init(unsigned long memsize)
{
int i;
@@ -171,15 +169,16 @@ void __init uidcache_init(void)
for(i = 0; i < UIDHASH_SZ; i++)
uidhash[i] = 0;
-}
-static inline struct task_struct ** find_empty_process(void)
-{
- struct task_struct **tslot = NULL;
+ /*
+ * The default maximum number of threads is set to a safe
+ * value: the thread structures can take up at most half
+ * of memory.
+ */
+ max_threads = memsize / THREAD_SIZE / 2;
- if ((nr_tasks < NR_TASKS - MIN_TASKS_LEFT_FOR_ROOT) || !current->uid)
- tslot = get_free_taskslot();
- return tslot;
+ init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
+ init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
}
/* Protects next_safe and last_pid. */
@@ -233,6 +232,9 @@ static inline int dup_mmap(struct mm_struct * mm)
struct vm_area_struct * mpnt, *tmp, **pprev;
int retval;
+ /* Kill me slowly. UGLY! FIXME! */
+ memcpy(&mm->start_code, &current->mm->start_code, 15*sizeof(unsigned long));
+
flush_cache_mm(current->mm);
pprev = &mm->mmap;
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
@@ -290,9 +292,6 @@ fail_nomem:
/*
* Allocate and initialize an mm_struct.
- *
- * NOTE! The mm mutex will be locked until the
- * caller decides that all systems are go..
*/
struct mm_struct * mm_alloc(void)
{
@@ -300,25 +299,41 @@ struct mm_struct * mm_alloc(void)
mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
if (mm) {
- *mm = *current->mm;
- init_new_context(mm);
- atomic_set(&mm->count, 1);
- mm->map_count = 0;
- mm->def_flags = 0;
- init_MUTEX_LOCKED(&mm->mmap_sem);
+ memset(mm, 0, sizeof(*mm));
+ atomic_set(&mm->mm_users, 1);
+ atomic_set(&mm->mm_count, 1);
+ init_MUTEX(&mm->mmap_sem);
mm->page_table_lock = SPIN_LOCK_UNLOCKED;
- /*
- * Leave mm->pgd set to the parent's pgd
- * so that pgd_offset() is always valid.
- */
- mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
+ mm->pgd = pgd_alloc();
+ if (mm->pgd)
+ return mm;
+ kmem_cache_free(mm_cachep, mm);
+ }
+ return NULL;
+}
- /* It has not run yet, so cannot be present in anyone's
- * cache or tlb.
- */
- mm->cpu_vm_mask = 0;
+/*
+ * Called when the last reference to the mm
+ * is dropped: either by a lazy thread or by
+ * mmput. Free the page directory and the mm.
+ */
+inline void __mmdrop(struct mm_struct *mm)
+{
+ if (mm == &init_mm) BUG();
+ pgd_free(mm->pgd);
+ destroy_context(mm);
+ kmem_cache_free(mm_cachep, mm);
+}
+
+/*
+ * Decrement the use count and release all resources for an mm.
+ */
+void mmput(struct mm_struct *mm)
+{
+ if (atomic_dec_and_test(&mm->mm_users)) {
+ exit_mmap(mm);
+ mmdrop(mm);
}
- return mm;
}
/* Please note the differences between mmput and mm_release.
@@ -345,20 +360,7 @@ void mm_release(void)
}
}
-/*
- * Decrement the use count and release all resources for an mm.
- */
-void mmput(struct mm_struct *mm)
-{
- if (atomic_dec_and_test(&mm->count)) {
- release_segments(mm);
- exit_mmap(mm);
- free_page_tables(mm);
- kmem_cache_free(mm_cachep, mm);
- }
-}
-
-static inline int copy_mm(int nr, unsigned long clone_flags, struct task_struct * tsk)
+static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
{
struct mm_struct * mm;
int retval;
@@ -367,14 +369,21 @@ static inline int copy_mm(int nr, unsigned long clone_flags, struct task_struct
tsk->cmin_flt = tsk->cmaj_flt = 0;
tsk->nswap = tsk->cnswap = 0;
- if (clone_flags & CLONE_VM) {
- mmget(current->mm);
- /*
- * Set up the LDT descriptor for the clone task.
- */
- copy_segments(nr, tsk, NULL);
- SET_PAGE_DIR(tsk, current->mm->pgd);
+ tsk->mm = NULL;
+ tsk->active_mm = NULL;
+
+ /*
+ * Are we cloning a kernel thread?
+ *
+ * We need to steal a active VM for that..
+ */
+ mm = current->mm;
+ if (!mm)
return 0;
+
+ if (clone_flags & CLONE_VM) {
+ atomic_inc(&mm->mm_users);
+ goto good_mm;
}
retval = -ENOMEM;
@@ -383,23 +392,26 @@ static inline int copy_mm(int nr, unsigned long clone_flags, struct task_struct
goto fail_nomem;
tsk->mm = mm;
- copy_segments(nr, tsk, mm);
- retval = new_page_tables(tsk);
- if (retval)
- goto free_mm;
+ tsk->active_mm = mm;
+
+ /*
+ * child gets a private LDT (if there was an LDT in the parent)
+ */
+ copy_segments(tsk, mm);
+
+ down(&current->mm->mmap_sem);
retval = dup_mmap(mm);
+ up(&current->mm->mmap_sem);
if (retval)
goto free_pt;
- up(&mm->mmap_sem);
+
+good_mm:
+ tsk->mm = mm;
+ tsk->active_mm = mm;
+ init_new_context(tsk,mm);
return 0;
-free_mm:
- tsk->mm = NULL;
- release_segments(mm);
- kmem_cache_free(mm_cachep, mm);
- return retval;
free_pt:
- tsk->mm = NULL;
mmput(mm);
fail_nomem:
return retval;
@@ -421,32 +433,24 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
return 0;
}
-/*
- * Copy a fd_set and compute the maximum fd it contains.
- */
-static inline int __copy_fdset(unsigned long *d, unsigned long *src)
+static int count_open_files(struct files_struct *files, int size)
{
- int i;
- unsigned long *p = src;
- unsigned long *max = src;
-
- for (i = __FDSET_LONGS; i; --i) {
- if ((*d++ = *p++) != 0)
- max = p;
+ int i;
+
+ /* Find the last open fd */
+ for (i = size/(8*sizeof(long)); i > 0; ) {
+ if (files->open_fds->fds_bits[--i])
+ break;
}
- return (max - src)*sizeof(long)*8;
-}
-
-static inline int copy_fdset(fd_set *dst, fd_set *src)
-{
- return __copy_fdset(dst->fds_bits, src->fds_bits);
+ i = (i+1) * 8 * sizeof(long);
+ return i;
}
static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
{
struct files_struct *oldf, *newf;
struct file **old_fds, **new_fds;
- int size, i, error = 0;
+ int open_files, nfds, size, i, error = 0;
/*
* A background process may not have any files ...
@@ -466,43 +470,85 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
if (!newf)
goto out;
- /*
- * Allocate the fd array, using get_free_page() if possible.
- * Eventually we want to make the array size variable ...
- */
- size = NR_OPEN * sizeof(struct file *);
- if (size == PAGE_SIZE)
- new_fds = (struct file **) __get_free_page(GFP_KERNEL);
- else
- new_fds = (struct file **) kmalloc(size, GFP_KERNEL);
- if (!new_fds)
- goto out_release;
-
- newf->file_lock = RW_LOCK_UNLOCKED;
atomic_set(&newf->count, 1);
- newf->max_fds = NR_OPEN;
- newf->fd = new_fds;
+
+ newf->file_lock = RW_LOCK_UNLOCKED;
+ newf->next_fd = 0;
+ newf->max_fds = NR_OPEN_DEFAULT;
+ newf->max_fdset = __FD_SETSIZE;
+ newf->close_on_exec = &newf->close_on_exec_init;
+ newf->open_fds = &newf->open_fds_init;
+ newf->fd = &newf->fd_array[0];
+
+ /* We don't yet have the oldf readlock, but even if the old
+ fdset gets grown now, we'll only copy up to "size" fds */
+ size = oldf->max_fdset;
+ if (size > __FD_SETSIZE) {
+ newf->max_fdset = 0;
+ write_lock(&newf->file_lock);
+ error = expand_fdset(newf, size);
+ write_unlock(&newf->file_lock);
+ if (error)
+ goto out_release;
+ }
read_lock(&oldf->file_lock);
- newf->close_on_exec = oldf->close_on_exec;
- i = copy_fdset(&newf->open_fds, &oldf->open_fds);
+
+ open_files = count_open_files(oldf, size);
+
+ /*
+ * Check whether we need to allocate a larger fd array.
+ * Note: we're not a clone task, so the open count won't
+ * change.
+ */
+ nfds = NR_OPEN_DEFAULT;
+ if (open_files > nfds) {
+ read_unlock(&oldf->file_lock);
+ newf->max_fds = 0;
+ write_lock(&newf->file_lock);
+ error = expand_fd_array(newf, open_files);
+ write_unlock(&newf->file_lock);
+ if (error)
+ goto out_release;
+ nfds = newf->max_fds;
+ read_lock(&oldf->file_lock);
+ }
old_fds = oldf->fd;
- for (; i != 0; i--) {
+ new_fds = newf->fd;
+
+ memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
+ memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
+
+ for (i = open_files; i != 0; i--) {
struct file *f = *old_fds++;
if (f)
get_file(f);
*new_fds++ = f;
}
read_unlock(&oldf->file_lock);
+
+ /* compute the remainder to be cleared */
+ size = (newf->max_fds - open_files) * sizeof(struct file *);
+
/* This is long word aligned thus could use a optimized version */
- memset(new_fds, 0, (char *)newf->fd + size - (char *)new_fds);
-
+ memset(new_fds, 0, size);
+
+ if (newf->max_fdset > open_files) {
+ int left = (newf->max_fdset-open_files)/8;
+ int start = open_files / (8 * sizeof(unsigned long));
+
+ memset(&newf->open_fds->fds_bits[start], 0, left);
+ memset(&newf->close_on_exec->fds_bits[start], 0, left);
+ }
+
tsk->files = newf;
error = 0;
out:
return error;
out_release:
+ free_fdset (newf->close_on_exec, newf->max_fdset);
+ free_fdset (newf->open_fds, newf->max_fdset);
kmem_cache_free(files_cachep, newf);
goto out;
}
@@ -542,7 +588,6 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
*/
int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
{
- int nr;
int retval = -ENOMEM;
struct task_struct *p;
DECLARE_MUTEX_LOCKED(sem);
@@ -555,7 +600,6 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
*p = *current;
- down(&current->mm->mmap_sem);
lock_kernel();
retval = -EAGAIN;
@@ -565,15 +609,12 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
atomic_inc(&p->user->count);
}
- {
- struct task_struct **tslot;
- tslot = find_empty_process();
- if (!tslot)
- goto bad_fork_cleanup_count;
- p->tarray_ptr = tslot;
- *tslot = p;
- nr = tslot - &task[0];
- }
+ /*
+ * Counter atomicity is protected by
+ * the kernel lock
+ */
+ if (nr_threads >= max_threads)
+ goto bad_fork_cleanup_count;
if (p->exec_domain && p->exec_domain->module)
__MOD_INC_USE_COUNT(p->exec_domain->module);
@@ -594,10 +635,11 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
* very end).
*/
p->state = TASK_RUNNING;
- p->next_run = p;
- p->prev_run = p;
+ p->run_list.next = NULL;
+ p->run_list.prev = NULL;
- p->p_pptr = p->p_opptr = current;
+ if ((clone_flags & CLONE_VFORK) || !(clone_flags & CLONE_PARENT))
+ p->p_pptr = p->p_opptr = current;
p->p_cptr = NULL;
init_waitqueue_head(&p->wait_chldexit);
p->vfork_sem = NULL;
@@ -638,9 +680,9 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
goto bad_fork_cleanup_files;
if (copy_sighand(clone_flags, p))
goto bad_fork_cleanup_fs;
- if (copy_mm(nr, clone_flags, p))
+ if (copy_mm(clone_flags, p))
goto bad_fork_cleanup_sighand;
- retval = copy_thread(nr, clone_flags, usp, p, regs);
+ retval = copy_thread(0, clone_flags, usp, p, regs);
if (retval)
goto bad_fork_cleanup_sighand;
p->semundo = NULL;
@@ -666,22 +708,17 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
* Let it rip!
*/
retval = p->pid;
- if (retval) {
- write_lock_irq(&tasklist_lock);
- SET_LINKS(p);
- hash_pid(p);
- write_unlock_irq(&tasklist_lock);
-
- nr_tasks++;
+ write_lock_irq(&tasklist_lock);
+ SET_LINKS(p);
+ hash_pid(p);
+ write_unlock_irq(&tasklist_lock);
- p->next_run = NULL;
- p->prev_run = NULL;
- wake_up_process(p); /* do this last */
- }
+ nr_threads++;
+ wake_up_process(p); /* do this last */
++total_forks;
+
bad_fork:
unlock_kernel();
- up(&current->mm->mmap_sem);
fork_out:
if ((clone_flags & CLONE_VFORK) && (retval > 0))
down(&sem);
@@ -699,7 +736,7 @@ bad_fork_cleanup:
if (p->binfmt && p->binfmt->module)
__MOD_DEC_USE_COUNT(p->binfmt->module);
- add_free_taskslot(p->tarray_ptr);
+ nr_threads--;
bad_fork_cleanup_count:
if (p->user)
free_uid(p);
diff --git a/kernel/info.c b/kernel/info.c
index 1dffddc7b..3ee347444 100644
--- a/kernel/info.c
+++ b/kernel/info.c
@@ -13,7 +13,7 @@
#include <asm/uaccess.h>
-asmlinkage int sys_sysinfo(struct sysinfo *info)
+asmlinkage long sys_sysinfo(struct sysinfo *info)
{
struct sysinfo val;
@@ -26,7 +26,7 @@ asmlinkage int sys_sysinfo(struct sysinfo *info)
val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
- val.procs = nr_tasks-1;
+ val.procs = nr_threads-1;
sti();
si_meminfo(&val);
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 1b4661c39..7d38ac1ac 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -75,7 +75,7 @@ int do_getitimer(int which, struct itimerval *value)
}
/* SMP: Only we modify our itimer values. */
-asmlinkage int sys_getitimer(int which, struct itimerval *value)
+asmlinkage long sys_getitimer(int which, struct itimerval *value)
{
int error = -EFAULT;
struct itimerval get_buffer;
@@ -149,15 +149,13 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
/* SMP: Again, only we play with our itimers, and signals are SMP safe
* now so that is not an issue at all anymore.
*/
-asmlinkage int sys_setitimer(int which, struct itimerval *value,
- struct itimerval *ovalue)
+asmlinkage long sys_setitimer(int which, struct itimerval *value,
+ struct itimerval *ovalue)
{
struct itimerval set_buffer, get_buffer;
int error;
if (value) {
- if(verify_area(VERIFY_READ, value, sizeof(*value)))
- return -EFAULT;
if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
return -EFAULT;
} else
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index 798015eaa..71761a3b1 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -39,6 +39,7 @@
#include <linux/console.h>
#include <linux/poll.h>
#include <linux/mm.h>
+#include <linux/capability.h>
#if defined(CONFIG_PROC_FS)
#include <linux/proc_fs.h>
@@ -47,7 +48,6 @@
#include <linux/kmod.h>
#endif
-extern char *get_options(char *str, int *ints);
extern void set_device_ro(kdev_t dev,int flag);
extern struct file_operations * get_blkfops(unsigned int);
extern int blkdev_release(struct inode * inode);
@@ -77,6 +77,7 @@ EXPORT_SYMBOL(request_module);
#ifdef CONFIG_MODULES
EXPORT_SYMBOL(get_module_symbol);
#endif
+EXPORT_SYMBOL(get_option);
EXPORT_SYMBOL(get_options);
/* process memory management */
@@ -94,6 +95,7 @@ EXPORT_SYMBOL(free_pages);
EXPORT_SYMBOL(__free_page);
EXPORT_SYMBOL(kmem_find_general_cachep);
EXPORT_SYMBOL(kmem_cache_create);
+EXPORT_SYMBOL(kmem_cache_destroy);
EXPORT_SYMBOL(kmem_cache_shrink);
EXPORT_SYMBOL(kmem_cache_alloc);
EXPORT_SYMBOL(kmem_cache_free);
@@ -109,6 +111,7 @@ EXPORT_SYMBOL(high_memory);
EXPORT_SYMBOL(vmtruncate);
EXPORT_SYMBOL(find_vma);
EXPORT_SYMBOL(get_unmapped_area);
+EXPORT_SYMBOL(init_mm);
/* filesystem internal functions */
EXPORT_SYMBOL(in_group_p);
@@ -116,7 +119,6 @@ EXPORT_SYMBOL(update_atime);
EXPORT_SYMBOL(get_super);
EXPORT_SYMBOL(get_fs_type);
EXPORT_SYMBOL(getname);
-EXPORT_SYMBOL(__fput); /* goner? */
EXPORT_SYMBOL(_fput);
EXPORT_SYMBOL(igrab);
EXPORT_SYMBOL(iunique);
@@ -168,10 +170,13 @@ EXPORT_SYMBOL(add_blkdev_randomness);
EXPORT_SYMBOL(block_read_full_page);
EXPORT_SYMBOL(block_write_full_page);
EXPORT_SYMBOL(block_write_partial_page);
+EXPORT_SYMBOL(block_write_cont_page);
EXPORT_SYMBOL(block_flushpage);
EXPORT_SYMBOL(generic_file_read);
+EXPORT_SYMBOL(do_generic_file_read);
EXPORT_SYMBOL(generic_file_write);
EXPORT_SYMBOL(generic_file_mmap);
+EXPORT_SYMBOL(generic_buffer_fdatasync);
EXPORT_SYMBOL(page_hash_bits);
EXPORT_SYMBOL(page_hash_table);
EXPORT_SYMBOL(file_lock_table);
@@ -193,7 +198,10 @@ EXPORT_SYMBOL(vfs_unlink);
EXPORT_SYMBOL(vfs_rename);
EXPORT_SYMBOL(__pollwait);
EXPORT_SYMBOL(ROOT_DEV);
-
+EXPORT_SYMBOL(add_to_page_cache_unique);
+EXPORT_SYMBOL(__find_get_page);
+EXPORT_SYMBOL(__find_lock_page);
+
#if !defined(CONFIG_NFSD) && defined(CONFIG_NFSD_MODULE)
EXPORT_SYMBOL(do_nfsservctl);
#endif
@@ -270,6 +278,8 @@ EXPORT_SYMBOL(proc_dostring);
EXPORT_SYMBOL(proc_dointvec);
EXPORT_SYMBOL(proc_dointvec_jiffies);
EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
+EXPORT_SYMBOL(proc_doulongvec_minmax);
/* interrupt handling */
EXPORT_SYMBOL(request_irq);
@@ -308,11 +318,14 @@ EXPORT_SYMBOL(enable_hlt);
#endif
/* resource handling */
-EXPORT_SYMBOL(check_resource);
EXPORT_SYMBOL(request_resource);
EXPORT_SYMBOL(release_resource);
-EXPORT_SYMBOL(occupy_resource);
-EXPORT_SYMBOL(vacate_resource);
+EXPORT_SYMBOL(allocate_resource);
+EXPORT_SYMBOL(__request_region);
+EXPORT_SYMBOL(__check_region);
+EXPORT_SYMBOL(__release_region);
+EXPORT_SYMBOL(ioport_resource);
+EXPORT_SYMBOL(iomem_resource);
/* process management */
EXPORT_SYMBOL(__wake_up);
@@ -350,6 +363,7 @@ EXPORT_SYMBOL(_ctype);
EXPORT_SYMBOL(secure_tcp_sequence_number);
EXPORT_SYMBOL(get_random_bytes);
EXPORT_SYMBOL(securebits);
+EXPORT_SYMBOL(cap_bset);
/* Program loader interfaces */
EXPORT_SYMBOL(setup_arg_pages);
diff --git a/kernel/module.c b/kernel/module.c
index c5591db7f..6f4ad977d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -57,7 +57,7 @@ static void free_module(struct module *, int tag_freed);
* Called at boot time
*/
-__initfunc(void init_modules(void))
+void __init init_modules(void)
{
kernel_module.nsyms = __stop___ksymtab - __start___ksymtab;
@@ -76,10 +76,6 @@ get_mod_name(const char *user_name, char **buf)
unsigned long page;
long retval;
- if ((unsigned long)user_name >= TASK_SIZE
- && !segment_eq(get_fs (), KERNEL_DS))
- return -EFAULT;
-
page = __get_free_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
@@ -161,7 +157,7 @@ err0:
* Initialize a module.
*/
-asmlinkage int
+asmlinkage long
sys_init_module(const char *name_user, struct module *mod_user)
{
struct module mod_tmp, *mod;
@@ -353,7 +349,7 @@ err0:
return error;
}
-asmlinkage int
+asmlinkage long
sys_delete_module(const char *name_user)
{
struct module *mod, *next;
@@ -628,7 +624,7 @@ qm_info(struct module *mod, char *buf, size_t bufsize, size_t *ret)
return error;
}
-asmlinkage int
+asmlinkage long
sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
size_t *ret)
{
@@ -693,7 +689,7 @@ out:
* which does not arbitrarily limit the length of symbols.
*/
-asmlinkage int
+asmlinkage long
sys_get_kernel_syms(struct kernel_sym *table)
{
struct module *mod;
@@ -981,19 +977,19 @@ sys_create_module(const char *name_user, size_t size)
return -ENOSYS;
}
-asmlinkage int
+asmlinkage long
sys_init_module(const char *name_user, struct module *mod_user)
{
return -ENOSYS;
}
-asmlinkage int
+asmlinkage long
sys_delete_module(const char *name_user)
{
return -ENOSYS;
}
-asmlinkage int
+asmlinkage long
sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
size_t *ret)
{
@@ -1005,7 +1001,7 @@ sys_query_module(const char *name_user, int which, char *buf, size_t bufsize,
return -ENOSYS;
}
-asmlinkage int
+asmlinkage long
sys_get_kernel_syms(struct kernel_sym *table)
{
return -ENOSYS;
diff --git a/kernel/panic.c b/kernel/panic.c
index 51ee692b5..48168d864 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -28,12 +28,14 @@ int panic_timeout = 0;
struct notifier_block *panic_notifier_list = NULL;
-void __init panic_setup(char *str, int *ints)
+static int __init panic_setup(char *str)
{
- if (ints[0] == 1)
- panic_timeout = ints[1];
+ panic_timeout = simple_strtoul(str, NULL, 0);
+ return 1;
}
+__setup("panic=", panic_setup);
+
NORET_TYPE void panic(const char * fmt, ...)
{
static char buf[1024];
@@ -43,7 +45,7 @@ NORET_TYPE void panic(const char * fmt, ...)
vsprintf(buf, fmt, args);
va_end(args);
printk(KERN_EMERG "Kernel panic: %s\n",buf);
- if (current == task[0])
+ if (current == init_tasks[0])
printk(KERN_EMERG "In swapper task - not syncing\n");
else if (in_interrupt())
printk(KERN_EMERG "In interrupt handler - not syncing\n");
diff --git a/kernel/printk.c b/kernel/printk.c
index c2cbd4a78..330ce3efe 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -10,6 +10,8 @@
* elsewhere, in preparation for a serial line console (someday).
* Ted Ts'o, 2/11/93.
* Modified for sysctl support, 1/8/97, Chris Horn.
+ * Fixed SMP synchronization, 08/08/99, Manfred Spraul
+ * manfreds@colorfullife.com
*/
#include <linux/mm.h>
@@ -21,6 +23,7 @@
#include <asm/uaccess.h>
#define LOG_BUF_LEN (16384)
+#define LOG_BUF_MASK (LOG_BUF_LEN-1)
static char buf[1024];
@@ -40,6 +43,8 @@ int default_message_loglevel = DEFAULT_MESSAGE_LOGLEVEL;
int minimum_console_loglevel = MINIMUM_CONSOLE_LOGLEVEL;
int default_console_loglevel = DEFAULT_CONSOLE_LOGLEVEL;
+spinlock_t console_lock = SPIN_LOCK_UNLOCKED;
+
struct console *console_drivers = NULL;
static char log_buf[LOG_BUF_LEN];
static unsigned long log_start = 0;
@@ -50,7 +55,7 @@ static int preferred_console = -1;
/*
* Setup a list of consoles. Called from init/main.c
*/
-void __init console_setup(char *str, int *ints)
+static int __init console_setup(char *str)
{
struct console_cmdline *c;
char name[sizeof(c->name)];
@@ -88,17 +93,19 @@ void __init console_setup(char *str, int *ints)
if (strcmp(console_cmdline[i].name, name) == 0 &&
console_cmdline[i].index == idx) {
preferred_console = i;
- return;
+ return 1;
}
if (i == MAX_CMDLINECONSOLES)
- return;
+ return 1;
preferred_console = i;
c = &console_cmdline[i];
memcpy(c->name, name, sizeof(c->name));
c->options = options;
c->index = idx;
+ return 1;
}
+__setup("console=", console_setup);
/*
* Commands to do_syslog:
@@ -115,12 +122,11 @@ void __init console_setup(char *str, int *ints)
*/
int do_syslog(int type, char * buf, int len)
{
- unsigned long i, j, count, flags;
+ unsigned long i, j, limit, count;
int do_clear = 0;
char c;
int error = -EPERM;
- lock_kernel();
error = 0;
switch (type) {
case 0: /* Close log */
@@ -141,18 +147,18 @@ int do_syslog(int type, char * buf, int len)
if (error)
goto out;
i = 0;
+ spin_lock_irq(&console_lock);
while (log_size && i < len) {
- c = *((char *) log_buf+log_start);
+ c = log_buf[log_start & LOG_BUF_MASK];
log_start++;
log_size--;
- log_start &= LOG_BUF_LEN-1;
- sti();
+ spin_unlock_irq(&console_lock);
__put_user(c,buf);
buf++;
i++;
- cli();
+ spin_lock_irq(&console_lock);
}
- sti();
+ spin_unlock_irq(&console_lock);
error = i;
break;
case 4: /* Read/clear last kernel messages */
@@ -168,35 +174,56 @@ int do_syslog(int type, char * buf, int len)
error = verify_area(VERIFY_WRITE,buf,len);
if (error)
goto out;
- /*
- * The logged_chars, log_start, and log_size values may
- * change from an interrupt, so we disable interrupts.
- */
- __save_flags(flags);
- __cli();
count = len;
if (count > LOG_BUF_LEN)
count = LOG_BUF_LEN;
+ spin_lock_irq(&console_lock);
if (count > logged_chars)
count = logged_chars;
- j = log_start + log_size - count;
- __restore_flags(flags);
- for (i = 0; i < count; i++) {
- c = *((char *) log_buf+(j++ & (LOG_BUF_LEN-1)));
- __put_user(c, buf++);
- }
if (do_clear)
logged_chars = 0;
+ limit = log_start + log_size;
+ /*
+ * __put_user() could sleep, and while we sleep
+ * printk() could overwrite the messages
+ * we try to copy to user space. Therefore
+ * the messages are copied in reverse. <manfreds>
+ */
+ for(i=0;i < count;i++) {
+ j = limit-1-i;
+ if (j+LOG_BUF_LEN < log_start+log_size)
+ break;
+ c = log_buf[ j & LOG_BUF_MASK ];
+ spin_unlock_irq(&console_lock);
+ __put_user(c,&buf[count-1-i]);
+ spin_lock_irq(&console_lock);
+ }
+ spin_unlock_irq(&console_lock);
error = i;
+ if(i != count) {
+ int offset = count-error;
+ /* buffer overflow during copy, correct user buffer. */
+ for(i=0;i<error;i++) {
+ __get_user(c,&buf[i+offset]);
+ __put_user(c,&buf[i]);
+ }
+ }
+
break;
case 5: /* Clear ring buffer */
+ spin_lock_irq(&console_lock);
logged_chars = 0;
+ spin_unlock_irq(&console_lock);
break;
case 6: /* Disable logging to console */
+ spin_lock_irq(&console_lock);
console_loglevel = minimum_console_loglevel;
+ spin_unlock_irq(&console_lock);
break;
case 7: /* Enable logging to console */
+ spin_lock_irq(&console_lock);
console_loglevel = default_console_loglevel;
+ spin_unlock_irq(&console_lock);
break;
case 8:
error = -EINVAL;
@@ -204,7 +231,9 @@ int do_syslog(int type, char * buf, int len)
goto out;
if (len < minimum_console_loglevel)
len = minimum_console_loglevel;
+ spin_lock_irq(&console_lock);
console_loglevel = len;
+ spin_unlock_irq(&console_lock);
error = 0;
break;
default:
@@ -212,20 +241,16 @@ int do_syslog(int type, char * buf, int len)
break;
}
out:
- unlock_kernel();
return error;
}
-asmlinkage int sys_syslog(int type, char * buf, int len)
+asmlinkage long sys_syslog(int type, char * buf, int len)
{
if ((type != 3) && !capable(CAP_SYS_ADMIN))
return -EPERM;
return do_syslog(type, buf, len);
}
-
-spinlock_t console_lock;
-
asmlinkage int printk(const char *fmt, ...)
{
va_list args;
@@ -259,13 +284,12 @@ asmlinkage int printk(const char *fmt, ...)
}
line_feed = 0;
for (; p < buf_end; p++) {
- log_buf[(log_start+log_size) & (LOG_BUF_LEN-1)] = *p;
+ log_buf[(log_start+log_size) & LOG_BUF_MASK] = *p;
if (log_size < LOG_BUF_LEN)
log_size++;
- else {
+ else
log_start++;
- log_start &= LOG_BUF_LEN-1;
- }
+
logged_chars++;
if (*p == '\n') {
line_feed = 1;
@@ -290,24 +314,33 @@ asmlinkage int printk(const char *fmt, ...)
void console_print(const char *s)
{
- struct console *c = console_drivers;
+ struct console *c;
+ unsigned long flags;
int len = strlen(s);
+ spin_lock_irqsave(&console_lock,flags);
+ c = console_drivers;
while(c) {
if ((c->flags & CON_ENABLED) && c->write)
c->write(c, s, len);
c = c->next;
}
+ spin_unlock_irqrestore(&console_lock,flags);
}
void unblank_console(void)
{
- struct console *c = console_drivers;
+ struct console *c;
+ unsigned long flags;
+
+ spin_lock_irqsave(&console_lock,flags);
+ c = console_drivers;
while(c) {
if ((c->flags & CON_ENABLED) && c->unblank)
c->unblank();
c = c->next;
}
+ spin_unlock_irqrestore(&console_lock,flags);
}
/*
@@ -318,11 +351,12 @@ void unblank_console(void)
*/
void register_console(struct console * console)
{
- int i,j,len;
- int p = log_start;
+ int i, j,len;
+ int p;
char buf[16];
signed char msg_level = -1;
char *q;
+ unsigned long flags;
/*
* See if we want to use this console driver. If we
@@ -368,6 +402,7 @@ void register_console(struct console * console)
* Put this console in the list - keep the
* preferred driver at the head of the list.
*/
+ spin_lock_irqsave(&console_lock,flags);
if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
console->next = console_drivers;
console_drivers = console;
@@ -375,23 +410,33 @@ void register_console(struct console * console)
console->next = console_drivers->next;
console_drivers->next = console;
}
- if ((console->flags & CON_PRINTBUFFER) == 0) return;
-
+ if ((console->flags & CON_PRINTBUFFER) == 0)
+ goto done;
/*
* Print out buffered log messages.
*/
+ p = log_start & LOG_BUF_MASK;
+
for (i=0,j=0; i < log_size; i++) {
buf[j++] = log_buf[p];
- p++; p &= LOG_BUF_LEN-1;
+ p = (p+1) & LOG_BUF_MASK;
if (buf[j-1] != '\n' && i < log_size - 1 && j < sizeof(buf)-1)
continue;
buf[j] = 0;
q = buf;
len = j;
if (msg_level < 0) {
- msg_level = buf[1] - '0';
- q = buf + 3;
- len -= 3;
+ if(buf[0] == '<' &&
+ buf[1] >= '0' &&
+ buf[1] <= '7' &&
+ buf[2] == '>') {
+ msg_level = buf[1] - '0';
+ q = buf + 3;
+ len -= 3;
+ } else
+ {
+ msg_level = default_message_loglevel;
+ }
}
if (msg_level < console_loglevel)
console->write(console, q, len);
@@ -399,26 +444,35 @@ void register_console(struct console * console)
msg_level = -1;
j = 0;
}
+done:
+ spin_unlock_irqrestore(&console_lock,flags);
}
int unregister_console(struct console * console)
{
struct console *a,*b;
-
+ unsigned long flags;
+ int res = 1;
+
+ spin_lock_irqsave(&console_lock,flags);
if (console_drivers == console) {
console_drivers=console->next;
- return (0);
- }
- for (a=console_drivers->next, b=console_drivers ;
- a; b=a, a=b->next) {
- if (a == console) {
- b->next = a->next;
- return 0;
- }
+ res = 0;
+ } else
+ {
+ for (a=console_drivers->next, b=console_drivers ;
+ a; b=a, a=b->next) {
+ if (a == console) {
+ b->next = a->next;
+ res = 0;
+ break;
+ }
+ }
}
- return (1);
+ spin_unlock_irqrestore(&console_lock,flags);
+ return res;
}
/*
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0d007d492..35fa9768d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -10,6 +10,7 @@
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/mm.h>
+#include <linux/bigmem.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
@@ -39,6 +40,8 @@ repeat:
if (!pte_present(*pgtable))
goto fault_in_page;
page = pte_page(*pgtable);
+ if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable)))
+ goto fault_in_page;
if (MAP_NR(page) >= max_mapnr)
return 0;
flush_cache_page(vma, addr);
@@ -50,7 +53,11 @@ repeat:
dst = src;
src = buf;
}
+ src = (void *) kmap((unsigned long) src, KM_READ);
+ dst = (void *) kmap((unsigned long) dst, KM_WRITE);
memcpy(dst, src, len);
+ kunmap((unsigned long) src, KM_READ);
+ kunmap((unsigned long) dst, KM_WRITE);
}
flush_page_to_ram(page);
return len;
diff --git a/kernel/resource.c b/kernel/resource.c
index 4c672c6b0..26ee5e29d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1,232 +1,293 @@
/*
* linux/kernel/resource.c
*
- * Copyright (C) 1995, 1999 Linus Torvalds
- * David Hinds
+ * Copyright (C) 1999 Linus Torvalds
+ * Copyright (C) 1999 Martin Mares <mj@ucw.cz>
*
- * Kernel resource management
- *
- * We now distinguish between claiming space for devices (using the
- * 'occupy' and 'vacate' calls), and associating a resource with a
- * device driver (with the 'request', 'release', and 'check' calls).
- * A resource can be claimed even if there is no associated driver
- * (by occupying with name=NULL). Vacating a resource makes it
- * available for other dynamically configured devices.
+ * Arbitrary resource management.
*/
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/ioport.h>
#include <linux/init.h>
+#include <linux/malloc.h>
+#include <linux/spinlock.h>
-#define RSRC_TABLE_SIZE 128
-
-struct resource_entry {
- u_long from, num;
- const char *name;
- struct resource_entry *next;
-};
+struct resource ioport_resource = { "PCI IO", 0x0000, 0xFFFF, IORESOURCE_IO };
+struct resource iomem_resource = { "PCI mem", 0x00000000, 0xFFFFFFFF, IORESOURCE_MEM };
-struct resource_entry res_list[] = {
- { 0, 0, NULL, NULL }, /* IO */
- { 0, 0, NULL, NULL } /* mem */
-};
-
-static struct resource_entry rsrc_table[RSRC_TABLE_SIZE];
+static rwlock_t resource_lock = RW_LOCK_UNLOCKED;
/*
* This generates reports for /proc/ioports and /proc/memory
*/
-int get_resource_list(int class, char *buf)
+static char * do_resource_list(struct resource *entry, const char *fmt, int offset, char *buf, char *end)
{
- struct resource_entry *root = &res_list[class];
- struct resource_entry *p;
- int len = 0;
- char *fmt = (class == RES_IO) ?
- "%04lx-%04lx : %s\n" : "%08lx-%08lx : %s\n";
-
- for (p = root->next; (p) && (len < 4000); p = p->next)
- len += sprintf(buf+len, fmt, p->from, p->from+p->num-1,
- (p->name ? p->name : "occupied"));
- if (p)
- len += sprintf(buf+len, "4K limit reached!\n");
- return len;
+ if (offset < 0)
+ offset = 0;
+
+ while (entry) {
+ const char *name = entry->name;
+ unsigned long from, to;
+
+ if ((int) (end-buf) < 80)
+ return buf;
+
+ from = entry->start;
+ to = entry->end;
+ if (!name)
+ name = "<BAD>";
+
+ buf += sprintf(buf, fmt + offset, from, to, name);
+ if (entry->child)
+ buf = do_resource_list(entry->child, fmt, offset-2, buf, end);
+ entry = entry->sibling;
+ }
+
+ return buf;
}
-/*
- * Basics: find a matching resource entry, or find an insertion point
- */
-static struct resource_entry *
-find_match(struct resource_entry *root, u_long from, u_long num)
+int get_resource_list(struct resource *root, char *buf, int size)
{
- struct resource_entry *p;
- for (p = root; p; p = p->next)
- if ((p->from == from) && (p->num == num))
- return p;
- return NULL;
-}
+ char *fmt;
+ int retval;
-static struct resource_entry *
-find_gap(struct resource_entry *root, u_long from, u_long num)
+ fmt = " %08lx-%08lx : %s\n";
+ if (root == &ioport_resource)
+ fmt = " %04lx-%04lx : %s\n";
+ read_lock(&resource_lock);
+ retval = do_resource_list(root->child, fmt, 8, buf, buf + size) - buf;
+ read_unlock(&resource_lock);
+ return retval;
+}
+
+/* Return the conflict entry if you can't request it */
+static struct resource * __request_resource(struct resource *root, struct resource *new)
{
- struct resource_entry *p;
- if (from > from+num-1)
- return NULL;
- for (p = root; ; p = p->next) {
- if ((p != root) && (p->from+p->num-1 >= from)) {
- p = NULL;
- break;
+ unsigned long start = new->start;
+ unsigned long end = new->end;
+ struct resource *tmp, **p;
+
+ if (end < start)
+ return root;
+ if (start < root->start)
+ return root;
+ if (end > root->end)
+ return root;
+ p = &root->child;
+ for (;;) {
+ tmp = *p;
+ if (!tmp || tmp->start > end) {
+ new->sibling = tmp;
+ *p = new;
+ new->parent = root;
+ return NULL;
}
- if ((p->next == NULL) || (p->next->from > from+num-1))
- break;
+ p = &tmp->sibling;
+ if (tmp->end < start)
+ continue;
+ return tmp;
}
- return p;
}
-/*
- * Call this from a driver to assert ownership of a resource
- */
-void request_resource(int class, unsigned long from,
- unsigned long num, const char *name)
+int request_resource(struct resource *root, struct resource *new)
{
- struct resource_entry *root = &res_list[class];
- struct resource_entry *p;
- long flags;
- int i;
-
- p = find_match(root, from, num);
- if (p) {
- p->name = name;
- return;
- }
+ struct resource *conflict;
+
+ write_lock(&resource_lock);
+ conflict = __request_resource(root, new);
+ write_unlock(&resource_lock);
+ return conflict ? -EBUSY : 0;
+}
+
+int release_resource(struct resource *old)
+{
+ struct resource *tmp, **p;
- save_flags(flags);
- cli();
- for (i = 0; i < RSRC_TABLE_SIZE; i++)
- if (rsrc_table[i].num == 0)
+ p = &old->parent->child;
+ for (;;) {
+ tmp = *p;
+ if (!tmp)
break;
- if (i == RSRC_TABLE_SIZE)
- printk("warning: resource table is full\n");
- else {
- p = find_gap(root, from, num);
- if (p == NULL) {
- restore_flags(flags);
- return;
+ if (tmp == old) {
+ *p = tmp->sibling;
+ old->parent = NULL;
+ return 0;
}
- rsrc_table[i].name = name;
- rsrc_table[i].from = from;
- rsrc_table[i].num = num;
- rsrc_table[i].next = p->next;
- p->next = &rsrc_table[i];
+ p = &tmp->sibling;
}
- restore_flags(flags);
+ return -EINVAL;
}
-/*
- * Call these when a driver is unloaded but the device remains
+/*
+ * Find empty slot in the resource tree given range and alignment.
*/
-void release_resource(int class, unsigned long from, unsigned long num)
+static int find_resource(struct resource *root, struct resource *new,
+ unsigned long size,
+ unsigned long min, unsigned long max,
+ unsigned long align)
{
- struct resource_entry *root = &res_list[class];
- struct resource_entry *p;
- p = find_match(root, from, num);
- if (p) p->name = NULL;
+ struct resource *this = root->child;
+ unsigned long start, end;
+
+ start = root->start;
+ for(;;) {
+ if (this)
+ end = this->start;
+ else
+ end = root->end;
+ if (start < min)
+ start = min;
+ if (end > max)
+ end = max;
+ start = (start + align - 1) & ~(align - 1);
+ if (start < end && end - start + 1 >= size) {
+ new->start = start;
+ new->end = start + size - 1;
+ return 0;
+ }
+ if (!this)
+ break;
+ start = this->end + 1;
+ this = this->sibling;
+ }
+ return -EBUSY;
}
/*
- * Call these to check a region for conflicts before probing
+ * Allocate empty slot in the resource tree given range and alignment.
*/
-int check_resource(int class, unsigned long from, unsigned long num)
+int allocate_resource(struct resource *root, struct resource *new,
+ unsigned long size,
+ unsigned long min, unsigned long max,
+ unsigned long align)
{
- struct resource_entry *root = &res_list[class];
- struct resource_entry *p;
- p = find_match(root, from, num);
- if (p != NULL)
- return (p->name != NULL) ? -EBUSY : 0;
- return (find_gap(root, from, num) == NULL) ? -EBUSY : 0;
+ int err;
+
+ write_lock(&resource_lock);
+ err = find_resource(root, new, size, min, max, align);
+ if (err >= 0 && __request_resource(root, new))
+ err = -EBUSY;
+ write_unlock(&resource_lock);
+ return err;
}
/*
- * Call this to claim a resource for a piece of hardware
+ * This is compatibility stuff for IO resources.
+ *
+ * Note how this, unlike the above, knows about
+ * the IO flag meanings (busy etc).
+ *
+ * Request-region creates a new busy region.
+ *
+ * Check-region returns non-zero if the area is already busy
+ *
+ * Release-region releases a matching busy region.
*/
-unsigned long occupy_resource(int class, unsigned long base,
- unsigned long end, unsigned long num,
- unsigned long align, const char *name)
+struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
{
- struct resource_entry *root = &res_list[class];
- unsigned long from = 0, till;
- unsigned long flags;
- int i;
- struct resource_entry *p, *q;
+ struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
- if ((base > end-1) || (num > end - base))
- return 0;
+ if (res) {
+ memset(res, 0, sizeof(*res));
+ res->name = name;
+ res->start = start;
+ res->end = start + n - 1;
+ res->flags = IORESOURCE_BUSY;
- for (i = 0; i < RSRC_TABLE_SIZE; i++)
- if (rsrc_table[i].num == 0)
- break;
- if (i == RSRC_TABLE_SIZE)
- return 0;
-
- save_flags(flags);
- cli();
- /* printk("occupy: search in %08lx[%08lx] ", base, end - base); */
- for (p = root; p != NULL; p = q) {
- q = p->next;
- /* Find window in list */
- from = (p->from+p->num + align-1) & ~(align-1);
- till = (q == NULL) ? (0 - align) : q->from;
- /* printk(" %08lx:%08lx", from, till); */
- /* Clip window with base and end */
- if (from < base) from = base;
- if (till > end) till = end;
- /* See if result is large enougth */
- if ((from < till) && (from + num < till))
+ write_lock(&resource_lock);
+
+ for (;;) {
+ struct resource *conflict;
+
+ conflict = __request_resource(parent, res);
+ if (!conflict)
+ break;
+ if (conflict != parent) {
+ parent = conflict;
+ if (!(conflict->flags & IORESOURCE_BUSY))
+ continue;
+ }
+
+ /* Uhhuh, that didn't work out.. */
+ kfree(res);
+ res = NULL;
break;
+ }
+ write_unlock(&resource_lock);
}
- /* printk("\r\n"); */
- restore_flags(flags);
-
- if (p == NULL)
- return 0;
-
- rsrc_table[i].name = name;
- rsrc_table[i].from = from;
- rsrc_table[i].num = num;
- rsrc_table[i].next = p->next;
- p->next = &rsrc_table[i];
- return from;
+ return res;
}
-/*
- * Call this when a resource becomes available for other hardware
- */
-void vacate_resource(int class, unsigned long from, unsigned long num)
+int __check_region(struct resource *parent, unsigned long start, unsigned long n)
{
- struct resource_entry *root = &res_list[class];
- struct resource_entry *p, *q;
- long flags;
-
- save_flags(flags);
- cli();
- for (p = root; ; p = q) {
- q = p->next;
- if (q == NULL)
- break;
- if ((q->from == from) && (q->num == num)) {
- q->num = 0;
- p->next = q->next;
+ struct resource * res;
+
+ res = __request_region(parent, start, n, "check-region");
+ if (!res)
+ return -EBUSY;
+
+ release_resource(res);
+ kfree(res);
+ return 0;
+}
+
+void __release_region(struct resource *parent, unsigned long start, unsigned long n)
+{
+ struct resource **p;
+ unsigned long end;
+
+ p = &parent->child;
+ end = start + n - 1;
+
+ for (;;) {
+ struct resource *res = *p;
+
+ if (!res)
break;
+ if (res->start <= start && res->end >= end) {
+ if (!(res->flags & IORESOURCE_BUSY)) {
+ p = &res->child;
+ continue;
+ }
+ if (res->start != start || res->end != end)
+ break;
+ *p = res->sibling;
+ kfree(res);
+ return;
}
+ p = &res->sibling;
}
- restore_flags(flags);
+ printk("Trying to free nonexistent resource <%04lx-%04lx>\n", start, end);
}
-/* Called from init/main.c to reserve IO ports. */
-void __init reserve_setup(char *str, int *ints)
+/*
+ * Called from init/main.c to reserve IO ports.
+ */
+#define MAXRESERVE 4
+static int __init reserve_setup(char *str)
{
- int i;
+ int opt = 2, io_start, io_num;
+ static int reserved = 0;
+ static struct resource reserve[MAXRESERVE];
+
+ while (opt==2) {
+ int x = reserved;
- for (i = 1; i < ints[0]; i += 2)
- request_region(ints[i], ints[i+1], "reserved");
+ if (get_option (&str, &io_start) != 2) break;
+ if (get_option (&str, &io_num) == 0) break;
+ if (x < MAXRESERVE) {
+ struct resource *res = reserve + x;
+ res->name = "reserved";
+ res->start = io_start;
+ res->end = io_start + io_num - 1;
+ res->child = NULL;
+ if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
+ reserved = x+1;
+ }
+ }
+ return 1;
}
+
+__setup("reserve=", reserve_setup);
diff --git a/kernel/sched.c b/kernel/sched.c
index 95b9b823c..431d5c719 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -36,7 +36,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
-#include <asm/semaphore-helper.h>
#include <linux/timex.h>
@@ -94,7 +93,23 @@ unsigned long volatile jiffies=0;
* via the SMP irq return path.
*/
-struct task_struct * task[NR_TASKS] = {&init_task, };
+struct task_struct * init_tasks[NR_CPUS] = {&init_task, };
+
+/*
+ * The tasklist_lock protects the linked list of processes.
+ *
+ * The scheduler lock is protecting against multiple entry
+ * into the scheduling code, and doesn't need to worry
+ * about interrupts (because interrupts cannot call the
+ * scheduler).
+ *
+ * The run-queue lock locks the parts that actually access
+ * and change the run-queues, and have to be interrupt-safe.
+ */
+spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED; /* second */
+rwlock_t tasklist_lock = RW_LOCK_UNLOCKED; /* third */
+
+static LIST_HEAD(runqueue_head);
/*
* We align per-CPU scheduling data on cacheline boundaries,
@@ -114,7 +129,7 @@ struct kernel_stat kstat = { 0 };
#ifdef __SMP__
-#define idle_task(cpu) (task[cpu_number_map[(cpu)]])
+#define idle_task(cpu) (init_tasks[cpu_number_map[(cpu)]])
#define can_schedule(p) (!(p)->has_cpu)
#else
@@ -140,8 +155,7 @@ void scheduling_functions_start_here(void) { }
* +1000: realtime process, select this.
*/
-static inline int goodness (struct task_struct * prev,
- struct task_struct * p, int this_cpu)
+static inline int goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
{
int weight;
@@ -174,7 +188,7 @@ static inline int goodness (struct task_struct * prev,
#endif
/* .. and a slight advantage to the current MM */
- if (p->mm == prev->mm)
+ if (p->mm == this_mm)
weight += 1;
weight += p->priority;
@@ -191,89 +205,32 @@ out:
* to care about SCHED_YIELD is when we calculate the previous process'
* goodness ...
*/
-static inline int prev_goodness (struct task_struct * prev,
- struct task_struct * p, int this_cpu)
+static inline int prev_goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm)
{
if (p->policy & SCHED_YIELD) {
p->policy &= ~SCHED_YIELD;
return 0;
}
- return goodness(prev, p, this_cpu);
+ return goodness(p, this_cpu, this_mm);
}
/*
* the 'goodness value' of replacing a process on a given CPU.
* positive value means 'replace', zero or negative means 'dont'.
*/
-static inline int preemption_goodness (struct task_struct * prev,
- struct task_struct * p, int cpu)
+static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
{
- return goodness(prev, p, cpu) - goodness(prev, prev, cpu);
+ return goodness(p, cpu, prev->mm) - goodness(prev, cpu, prev->mm);
}
-/*
- * If there is a dependency between p1 and p2,
- * don't be too eager to go into the slow schedule.
- * In particular, if p1 and p2 both want the kernel
- * lock, there is no point in trying to make them
- * extremely parallel..
- *
- * (No lock - lock_depth < 0)
- *
- * There are two additional metrics here:
- *
- * first, a 'cutoff' interval, currently 0-200 usecs on
- * x86 CPUs, depending on the size of the 'SMP-local cache'.
- * If the current process has longer average timeslices than
- * this, then we utilize the idle CPU.
- *
- * second, if the wakeup comes from a process context,
- * then the two processes are 'related'. (they form a
- * 'gang')
- *
- * An idle CPU is almost always a bad thing, thus we skip
- * the idle-CPU utilization only if both these conditions
- * are true. (ie. a 'process-gang' rescheduling with rather
- * high frequency should stay on the same CPU).
- *
- * [We can switch to something more finegrained in 2.3.]
- *
- * do not 'guess' if the to-be-scheduled task is RT.
- */
-#define related(p1,p2) (((p1)->lock_depth >= 0) && (p2)->lock_depth >= 0) && \
- (((p2)->policy == SCHED_OTHER) && ((p1)->avg_slice < cacheflush_time))
-
-static inline void reschedule_idle_slow(struct task_struct * p)
+static void reschedule_idle(struct task_struct * p)
{
#ifdef __SMP__
-/*
- * (see reschedule_idle() for an explanation first ...)
- *
- * Pass #2
- *
- * We try to find another (idle) CPU for this woken-up process.
- *
- * On SMP, we mostly try to see if the CPU the task used
- * to run on is idle.. but we will use another idle CPU too,
- * at this point we already know that this CPU is not
- * willing to reschedule in the near future.
- *
- * An idle CPU is definitely wasted, especially if this CPU is
- * running long-timeslice processes. The following algorithm is
- * pretty good at finding the best idle CPU to send this process
- * to.
- *
- * [We can try to preempt low-priority processes on other CPUs in
- * 2.3. Also we can try to use the avg_slice value to predict
- * 'likely reschedule' events even on other CPUs.]
- */
int this_cpu = smp_processor_id(), target_cpu;
struct task_struct *tsk, *target_tsk;
- int cpu, best_cpu, weight, best_weight, i;
+ int cpu, best_cpu, i;
unsigned long flags;
- best_weight = 0; /* prevents negative weight */
-
spin_lock_irqsave(&runqueue_lock, flags);
/*
@@ -289,15 +246,17 @@ static inline void reschedule_idle_slow(struct task_struct * p)
for (i = 0; i < smp_num_cpus; i++) {
cpu = cpu_logical_map(i);
tsk = cpu_curr(cpu);
- if (related(tsk, p))
- goto out_no_target;
- weight = preemption_goodness(tsk, p, cpu);
- if (weight > best_weight) {
- best_weight = weight;
+ if (tsk == idle_task(cpu))
target_tsk = tsk;
- }
}
+ if (target_tsk && p->avg_slice > cacheflush_time)
+ goto send_now;
+
+ tsk = cpu_curr(best_cpu);
+ if (preemption_goodness(tsk, p, best_cpu) > 0)
+ target_tsk = tsk;
+
/*
* found any suitable CPU?
*/
@@ -328,35 +287,6 @@ out_no_target:
#endif
}
-static void reschedule_idle(struct task_struct * p)
-{
-#ifdef __SMP__
- int cpu = smp_processor_id();
- /*
- * ("wakeup()" should not be called before we've initialized
- * SMP completely.
- * Basically a not-yet initialized SMP subsystem can be
- * considered as a not-yet working scheduler, simply dont use
- * it before it's up and running ...)
- *
- * SMP rescheduling is done in 2 passes:
- * - pass #1: faster: 'quick decisions'
- * - pass #2: slower: 'lets try and find a suitable CPU'
- */
-
- /*
- * Pass #1. (subtle. We might be in the middle of __switch_to, so
- * to preserve scheduling atomicity we have to use cpu_curr)
- */
- if ((p->processor == cpu) && related(cpu_curr(cpu), p))
- return;
-#endif /* __SMP__ */
- /*
- * Pass #2
- */
- reschedule_idle_slow(p);
-}
-
/*
* Careful!
*
@@ -366,74 +296,23 @@ static void reschedule_idle(struct task_struct * p)
*/
static inline void add_to_runqueue(struct task_struct * p)
{
- struct task_struct *next = init_task.next_run;
-
- p->prev_run = &init_task;
- init_task.next_run = p;
- p->next_run = next;
- next->prev_run = p;
+ list_add(&p->run_list, &runqueue_head);
nr_running++;
}
-static inline void del_from_runqueue(struct task_struct * p)
-{
- struct task_struct *next = p->next_run;
- struct task_struct *prev = p->prev_run;
-
- nr_running--;
- next->prev_run = prev;
- prev->next_run = next;
- p->next_run = NULL;
- p->prev_run = NULL;
-}
-
static inline void move_last_runqueue(struct task_struct * p)
{
- struct task_struct *next = p->next_run;
- struct task_struct *prev = p->prev_run;
-
- /* remove from list */
- next->prev_run = prev;
- prev->next_run = next;
- /* add back to list */
- p->next_run = &init_task;
- prev = init_task.prev_run;
- init_task.prev_run = p;
- p->prev_run = prev;
- prev->next_run = p;
+ list_del(&p->run_list);
+ list_add_tail(&p->run_list, &runqueue_head);
}
static inline void move_first_runqueue(struct task_struct * p)
{
- struct task_struct *next = p->next_run;
- struct task_struct *prev = p->prev_run;
-
- /* remove from list */
- next->prev_run = prev;
- prev->next_run = next;
- /* add back to list */
- p->prev_run = &init_task;
- next = init_task.next_run;
- init_task.next_run = p;
- p->next_run = next;
- next->prev_run = p;
+ list_del(&p->run_list);
+ list_add(&p->run_list, &runqueue_head);
}
/*
- * The tasklist_lock protects the linked list of processes.
- *
- * The scheduler lock is protecting against multiple entry
- * into the scheduling code, and doesn't need to worry
- * about interrupts (because interrupts cannot call the
- * scheduler).
- *
- * The run-queue lock locks the parts that actually access
- * and change the run-queues, and have to be interrupt-safe.
- */
-spinlock_t runqueue_lock = SPIN_LOCK_UNLOCKED; /* second */
-rwlock_t tasklist_lock = RW_LOCK_UNLOCKED; /* third */
-
-/*
* Wake up a process. Put it on the run-queue if it's not
* already there. The "current" process is always on the
* run-queue (except when the actual re-schedule is in
@@ -450,7 +329,7 @@ void wake_up_process(struct task_struct * p)
*/
spin_lock_irqsave(&runqueue_lock, flags);
p->state = TASK_RUNNING;
- if (p->next_run)
+ if (task_on_runqueue(p))
goto out;
add_to_runqueue(p);
spin_unlock_irqrestore(&runqueue_lock, flags);
@@ -657,7 +536,7 @@ signed long schedule_timeout(signed long timeout)
* cleans up all remaining scheduler things, without impacting the
* common case.
*/
-static inline void __schedule_tail (struct task_struct *prev)
+static inline void __schedule_tail(struct task_struct *prev)
{
#ifdef __SMP__
if ((prev->state == TASK_RUNNING) &&
@@ -668,7 +547,7 @@ static inline void __schedule_tail (struct task_struct *prev)
#endif /* __SMP__ */
}
-void schedule_tail (struct task_struct *prev)
+void schedule_tail(struct task_struct *prev)
{
__schedule_tail(prev);
}
@@ -687,8 +566,10 @@ asmlinkage void schedule(void)
{
struct schedule_data * sched_data;
struct task_struct *prev, *next, *p;
+ struct list_head *tmp;
int this_cpu, c;
+ if (!current->active_mm) BUG();
if (tq_scheduler)
goto handle_tq_scheduler;
tq_scheduler_back:
@@ -731,42 +612,29 @@ move_rr_back:
}
prev->need_resched = 0;
-repeat_schedule:
-
/*
* this is the scheduler proper:
*/
- p = init_task.next_run;
- /* Default process to select.. */
+repeat_schedule:
+ /*
+ * Default process to select..
+ */
next = idle_task(this_cpu);
c = -1000;
if (prev->state == TASK_RUNNING)
goto still_running;
still_running_back:
- /*
- * This is subtle.
- * Note how we can enable interrupts here, even
- * though interrupts can add processes to the run-
- * queue. This is because any new processes will
- * be added to the front of the queue, so "p" above
- * is a safe starting point.
- * run-queue deletion and re-ordering is protected by
- * the scheduler lock
- */
-/*
- * Note! there may appear new tasks on the run-queue during this, as
- * interrupts are enabled. However, they will be put on front of the
- * list, so our list starting at "p" is essentially fixed.
- */
- while (p != &init_task) {
+ tmp = runqueue_head.next;
+ while (tmp != &runqueue_head) {
+ p = list_entry(tmp, struct task_struct, run_list);
if (can_schedule(p)) {
- int weight = goodness(prev, p, this_cpu);
+ int weight = goodness(p, this_cpu, prev->active_mm);
if (weight > c)
c = weight, next = p;
}
- p = p->next_run;
+ tmp = tmp->next;
}
/* Do we need to re-calculate counters? */
@@ -819,12 +687,42 @@ still_running_back:
#endif /* __SMP__ */
kstat.context_swtch++;
- get_mmu_context(next);
+ /*
+ * there are 3 processes which are affected by a context switch:
+ *
+ * prev == .... ==> (last => next)
+ *
+ * It's the 'much more previous' 'prev' that is on next's stack,
+ * but prev is set to (the just run) 'last' process by switch_to().
+ * This might sound slightly confusing but makes tons of sense.
+ */
+ prepare_to_switch();
+ {
+ struct mm_struct *mm = next->mm;
+ struct mm_struct *oldmm = prev->active_mm;
+ if (!mm) {
+ if (next->active_mm) BUG();
+ next->active_mm = oldmm;
+ atomic_inc(&oldmm->mm_count);
+ } else {
+ if (next->active_mm != mm) BUG();
+ switch_mm(oldmm, mm, next, this_cpu);
+ }
+
+ if (!prev->mm) {
+ prev->active_mm = NULL;
+ mmdrop(oldmm);
+ }
+ }
+
+ /*
+ * This just switches the register state and the
+ * stack.
+ */
switch_to(prev, next, prev);
__schedule_tail(prev);
same_process:
-
reacquire_kernel_lock(current);
return;
@@ -837,11 +735,11 @@ recalculate:
p->counter = (p->counter >> 1) + p->priority;
read_unlock(&tasklist_lock);
spin_lock_irq(&runqueue_lock);
- goto repeat_schedule;
}
+ goto repeat_schedule;
still_running:
- c = prev_goodness(prev, prev, this_cpu);
+ c = prev_goodness(prev, this_cpu, prev->active_mm);
next = prev;
goto still_running_back;
@@ -912,128 +810,6 @@ out:
return;
}
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit. ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore. The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
- wake_one_more(sem);
- wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function. Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible. This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return. If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-#define DOWN_VAR \
- struct task_struct *tsk = current; \
- wait_queue_t wait; \
- init_waitqueue_entry(&wait, tsk);
-
-#define DOWN_HEAD(task_state) \
- \
- \
- tsk->state = (task_state); \
- add_wait_queue(&sem->wait, &wait); \
- \
- /* \
- * Ok, we're set up. sem->count is known to be less than zero \
- * so we must wait. \
- * \
- * We can let go the lock for purposes of waiting. \
- * We re-acquire it after awaking so as to protect \
- * all semaphore operations. \
- * \
- * If "up()" is called before we call waking_non_zero() then \
- * we will catch it right away. If it is called later then \
- * we will have to go through a wakeup cycle to catch it. \
- * \
- * Multiple waiters contend for the semaphore lock to see \
- * who gets to gate through and who has to wait some more. \
- */ \
- for (;;) {
-
-#define DOWN_TAIL(task_state) \
- tsk->state = (task_state); \
- } \
- tsk->state = TASK_RUNNING; \
- remove_wait_queue(&sem->wait, &wait);
-
-void __down(struct semaphore * sem)
-{
- DOWN_VAR
- DOWN_HEAD(TASK_UNINTERRUPTIBLE)
- if (waking_non_zero(sem))
- break;
- schedule();
- DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __down_interruptible(struct semaphore * sem)
-{
- int ret = 0;
- DOWN_VAR
- DOWN_HEAD(TASK_INTERRUPTIBLE)
-
- ret = waking_non_zero_interruptible(sem, tsk);
- if (ret)
- {
- if (ret == 1)
- /* ret != 0 only if we get interrupted -arca */
- ret = 0;
- break;
- }
- schedule();
- DOWN_TAIL(TASK_INTERRUPTIBLE)
- return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
- return waking_non_zero_trylock(sem);
-}
-
#define SLEEP_ON_VAR \
unsigned long flags; \
wait_queue_t wait; \
@@ -1533,13 +1309,13 @@ void do_timer(struct pt_regs * regs)
mark_bh(TQUEUE_BH);
}
-#ifndef __alpha__
+#if !defined(__alpha__) && !defined(__ia64__)
/*
* For backwards compatibility? This can be done in libc so Alpha
* and all newer ports shouldn't need it.
*/
-asmlinkage unsigned int sys_alarm(unsigned int seconds)
+asmlinkage unsigned long sys_alarm(unsigned int seconds)
{
struct itimerval it_new, it_old;
unsigned int oldalarm;
@@ -1556,12 +1332,16 @@ asmlinkage unsigned int sys_alarm(unsigned int seconds)
return oldalarm;
}
+#endif
+
+#ifndef __alpha__
+
/*
* The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
* should be moved into arch/i386 instead?
*/
-asmlinkage int sys_getpid(void)
+asmlinkage long sys_getpid(void)
{
/* This is SMP safe - current->pid doesn't change */
return current->pid;
@@ -1590,7 +1370,7 @@ asmlinkage int sys_getpid(void)
* a small window for a race, using the old pointer is
* harmless for a while).
*/
-asmlinkage int sys_getppid(void)
+asmlinkage long sys_getppid(void)
{
int pid;
struct task_struct * me = current;
@@ -1613,25 +1393,25 @@ asmlinkage int sys_getppid(void)
return pid;
}
-asmlinkage int sys_getuid(void)
+asmlinkage long sys_getuid(void)
{
/* Only we change this so SMP safe */
return current->uid;
}
-asmlinkage int sys_geteuid(void)
+asmlinkage long sys_geteuid(void)
{
/* Only we change this so SMP safe */
return current->euid;
}
-asmlinkage int sys_getgid(void)
+asmlinkage long sys_getgid(void)
{
/* Only we change this so SMP safe */
return current->gid;
}
-asmlinkage int sys_getegid(void)
+asmlinkage long sys_getegid(void)
{
/* Only we change this so SMP safe */
return current->egid;
@@ -1643,7 +1423,7 @@ asmlinkage int sys_getegid(void)
* it for backward compatibility?
*/
-asmlinkage int sys_nice(int increment)
+asmlinkage long sys_nice(int increment)
{
unsigned long newprio;
int increase = 0;
@@ -1760,7 +1540,7 @@ static int setscheduler(pid_t pid, int policy,
retval = 0;
p->policy = policy;
p->rt_priority = lp.sched_priority;
- if (p->next_run)
+ if (task_on_runqueue(p))
move_first_runqueue(p);
current->need_resched = 1;
@@ -1773,18 +1553,18 @@ out_nounlock:
return retval;
}
-asmlinkage int sys_sched_setscheduler(pid_t pid, int policy,
+asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
struct sched_param *param)
{
return setscheduler(pid, policy, param);
}
-asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
+asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param *param)
{
return setscheduler(pid, -1, param);
}
-asmlinkage int sys_sched_getscheduler(pid_t pid)
+asmlinkage long sys_sched_getscheduler(pid_t pid)
{
struct task_struct *p;
int retval;
@@ -1809,7 +1589,7 @@ out_nounlock:
return retval;
}
-asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
+asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param *param)
{
struct task_struct *p;
struct sched_param lp;
@@ -1840,7 +1620,7 @@ out_unlock:
return retval;
}
-asmlinkage int sys_sched_yield(void)
+asmlinkage long sys_sched_yield(void)
{
spin_lock_irq(&runqueue_lock);
if (current->policy == SCHED_OTHER)
@@ -1851,7 +1631,7 @@ asmlinkage int sys_sched_yield(void)
return 0;
}
-asmlinkage int sys_sched_get_priority_max(int policy)
+asmlinkage long sys_sched_get_priority_max(int policy)
{
int ret = -EINVAL;
@@ -1867,7 +1647,7 @@ asmlinkage int sys_sched_get_priority_max(int policy)
return ret;
}
-asmlinkage int sys_sched_get_priority_min(int policy)
+asmlinkage long sys_sched_get_priority_min(int policy)
{
int ret = -EINVAL;
@@ -1882,7 +1662,7 @@ asmlinkage int sys_sched_get_priority_min(int policy)
return ret;
}
-asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
+asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
{
struct timespec t;
@@ -1893,7 +1673,7 @@ asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
return 0;
}
-asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
+asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
{
struct timespec t;
unsigned long expire;
@@ -1934,13 +1714,13 @@ asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
return 0;
}
-static void show_task(int nr,struct task_struct * p)
+static void show_task(struct task_struct * p)
{
unsigned long free = 0;
int state;
static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
- printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
+ printk("%-8s ", p->comm);
state = p->state ? ffz(~p->state) + 1 : 0;
if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
printk(stat_nam[state]);
@@ -1950,12 +1730,12 @@ static void show_task(int nr,struct task_struct * p)
if (p == current)
printk(" current ");
else
- printk(" %08lX ", thread_saved_pc(&p->tss));
+ printk(" %08lX ", thread_saved_pc(&p->thread));
#else
if (p == current)
printk(" current task ");
else
- printk(" %016lx ", thread_saved_pc(&p->tss));
+ printk(" %016lx ", thread_saved_pc(&p->thread));
#endif
{
unsigned long * n = (unsigned long *) (p+1);
@@ -1968,6 +1748,10 @@ static void show_task(int nr,struct task_struct * p)
printk("%5d ", p->p_cptr->pid);
else
printk(" ");
+ if (!p->mm)
+ printk(" (L-TLB) ");
+ else
+ printk(" (NOTLB) ");
if (p->p_ysptr)
printk("%7d", p->p_ysptr->pid);
else
@@ -2020,7 +1804,7 @@ void show_state(void)
#endif
read_lock(&tasklist_lock);
for_each_task(p)
- show_task((p->tarray_ptr - &task[0]),p);
+ show_task(p);
read_unlock(&tasklist_lock);
}
@@ -2030,6 +1814,11 @@ void __init init_idle(void)
struct schedule_data * sched_data;
sched_data = &aligned_data[smp_processor_id()].schedule_data;
+ if (current != &init_task && task_on_runqueue(current)) {
+ printk("UGH! (%d:%d) was on the runqueue, removing.\n",
+ smp_processor_id(), current->pid);
+ del_from_runqueue(current);
+ }
t = get_cycles();
sched_data->curr = current;
sched_data->last_schedule = t;
@@ -2042,18 +1831,20 @@ void __init sched_init(void)
* process right in SMP mode.
*/
int cpu=hard_smp_processor_id();
- int nr = NR_TASKS;
+ int nr;
init_task.processor=cpu;
- /* Init task array free list and pidhash table. */
- while(--nr > 0)
- add_free_taskslot(&task[nr]);
-
for(nr = 0; nr < PIDHASH_SZ; nr++)
pidhash[nr] = NULL;
init_bh(TIMER_BH, timer_bh);
init_bh(TQUEUE_BH, tqueue_bh);
init_bh(IMMEDIATE_BH, immediate_bh);
+
+ /*
+ * The boot idle thread does lazy MMU switching as well:
+ */
+ atomic_inc(&init_mm.mm_count);
}
+
diff --git a/kernel/signal.c b/kernel/signal.c
index 5077115ce..047ee4395 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -641,7 +641,7 @@ EXPORT_SYMBOL(send_sig_info);
* used by various programs)
*/
-asmlinkage int
+asmlinkage long
sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, size_t sigsetsize)
{
int error = -EINVAL;
@@ -697,7 +697,7 @@ out:
return error;
}
-asmlinkage int
+asmlinkage long
sys_rt_sigpending(sigset_t *set, size_t sigsetsize)
{
int error = -EINVAL;
@@ -718,7 +718,7 @@ out:
return error;
}
-asmlinkage int
+asmlinkage long
sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
const struct timespec *uts, size_t sigsetsize)
{
@@ -788,7 +788,7 @@ sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
return ret;
}
-asmlinkage int
+asmlinkage long
sys_kill(int pid, int sig)
{
struct siginfo info;
@@ -802,7 +802,7 @@ sys_kill(int pid, int sig)
return kill_something_info(sig, &info, pid);
}
-asmlinkage int
+asmlinkage long
sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo)
{
siginfo_t info;
@@ -948,7 +948,7 @@ out:
#if !defined(__alpha__)
/* Alpha has its own versions with special arguments. */
-asmlinkage int
+asmlinkage long
sys_sigprocmask(int how, old_sigset_t *set, old_sigset_t *oset)
{
int error;
@@ -997,7 +997,7 @@ out:
return error;
}
-asmlinkage int
+asmlinkage long
sys_sigpending(old_sigset_t *set)
{
int error;
@@ -1014,7 +1014,7 @@ sys_sigpending(old_sigset_t *set)
}
#ifndef __sparc__
-asmlinkage int
+asmlinkage long
sys_rt_sigaction(int sig, const struct sigaction *act, struct sigaction *oact,
size_t sigsetsize)
{
@@ -1046,14 +1046,14 @@ out:
/*
* For backwards compatibility. Functionality superseded by sigprocmask.
*/
-asmlinkage int
+asmlinkage long
sys_sgetmask(void)
{
/* SMP safe */
return current->blocked.sig[0];
}
-asmlinkage int
+asmlinkage long
sys_ssetmask(int newmask)
{
int old;
@@ -1068,9 +1068,9 @@ sys_ssetmask(int newmask)
return old;
}
-#endif /* !defined(__alpha__) && !defined(__ia64__) */
+#endif /* !defined(__alpha__) */
-#if !defined(__alpha__) && !defined(__mips__) && !defined(__ia64__)
+#if !defined(__alpha__) && !defined(__mips__)
/*
* For backwards compatibility. Functionality superseded by sigaction.
*/
@@ -1087,4 +1087,4 @@ sys_signal(int sig, __sighandler_t handler)
return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
}
-#endif /* !defined(__alpha__) && !defined(__mips__) && !defined(__ia64__) */
+#endif /* !alpha && !__ia64__ && !defined(__mips__) */
diff --git a/kernel/sys.c b/kernel/sys.c
index 665c44e30..619d78391 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -41,11 +41,7 @@ int unregister_reboot_notifier(struct notifier_block * nb)
return notifier_chain_unregister(&reboot_notifier_list, nb);
}
-
-
-extern void adjust_clock(void);
-
-asmlinkage int sys_ni_syscall(void)
+asmlinkage long sys_ni_syscall(void)
{
return -ENOSYS;
}
@@ -72,7 +68,7 @@ static int proc_sel(struct task_struct *p, int which, int who)
return 0;
}
-asmlinkage int sys_setpriority(int which, int who, int niceval)
+asmlinkage long sys_setpriority(int which, int who, int niceval)
{
struct task_struct *p;
unsigned int priority;
@@ -122,7 +118,7 @@ asmlinkage int sys_setpriority(int which, int who, int niceval)
* not return the normal nice-value, but a value that has been
* offset by 20 (ie it returns 0..40 instead of -20..20)
*/
-asmlinkage int sys_getpriority(int which, int who)
+asmlinkage long sys_getpriority(int which, int who)
{
struct task_struct *p;
long max_prio = -ESRCH;
@@ -154,7 +150,7 @@ asmlinkage int sys_getpriority(int which, int who)
*
* reboot doesn't sync: do that yourself before calling this.
*/
-asmlinkage int sys_reboot(int magic1, int magic2, int cmd, void * arg)
+asmlinkage long sys_reboot(int magic1, int magic2, int cmd, void * arg)
{
char buffer[256];
@@ -252,7 +248,7 @@ void ctrl_alt_del(void)
* SMP: There are not races, the GIDs are checked only by filesystem
* operations (as far as semantic preservation is concerned).
*/
-asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
+asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
{
int old_rgid = current->gid;
int old_egid = current->egid;
@@ -290,7 +286,7 @@ asmlinkage int sys_setregid(gid_t rgid, gid_t egid)
*
* SMP: Same implicit races as above.
*/
-asmlinkage int sys_setgid(gid_t gid)
+asmlinkage long sys_setgid(gid_t gid)
{
int old_egid = current->egid;
@@ -356,7 +352,7 @@ extern inline void cap_emulate_setxuid(int old_ruid, int old_euid,
* 100% compatible with BSD. A program which uses just setuid() will be
* 100% compatible with POSIX with saved IDs.
*/
-asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
+asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
{
int old_ruid, old_euid, old_suid, new_ruid;
@@ -418,7 +414,7 @@ asmlinkage int sys_setreuid(uid_t ruid, uid_t euid)
* will allow a root program to temporarily drop privileges and be able to
* regain them by swapping the real and effective uid.
*/
-asmlinkage int sys_setuid(uid_t uid)
+asmlinkage long sys_setuid(uid_t uid)
{
int old_euid = current->euid;
int old_ruid, old_suid, new_ruid;
@@ -454,7 +450,7 @@ asmlinkage int sys_setuid(uid_t uid)
* This function implements a generic ability to update ruid, euid,
* and suid. This allows you to implement the 4.4 compatible seteuid().
*/
-asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
+asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
{
int old_ruid = current->uid;
int old_euid = current->euid;
@@ -493,7 +489,7 @@ asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
return 0;
}
-asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
+asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
{
int retval;
@@ -507,7 +503,7 @@ asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
/*
* Same as above, but for rgid, egid, sgid.
*/
-asmlinkage int sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
+asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
{
if (!capable(CAP_SETGID)) {
if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
@@ -533,7 +529,7 @@ asmlinkage int sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
return 0;
}
-asmlinkage int sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
+asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
{
int retval;
@@ -551,7 +547,7 @@ asmlinkage int sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
* whatever uid it wants to). It normally shadows "euid", except when
* explicitly set by setfsuid() or for access..
*/
-asmlinkage int sys_setfsuid(uid_t uid)
+asmlinkage long sys_setfsuid(uid_t uid)
{
int old_fsuid;
@@ -588,7 +584,7 @@ asmlinkage int sys_setfsuid(uid_t uid)
/*
* Samma på svenska..
*/
-asmlinkage int sys_setfsgid(gid_t gid)
+asmlinkage long sys_setfsgid(gid_t gid)
{
int old_fsgid;
@@ -637,7 +633,7 @@ asmlinkage long sys_times(struct tms * tbuf)
* LBT 04.03.94
*/
-asmlinkage int sys_setpgid(pid_t pid, pid_t pgid)
+asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
{
struct task_struct * p;
int err = -EINVAL;
@@ -690,7 +686,7 @@ out:
return err;
}
-asmlinkage int sys_getpgid(pid_t pid)
+asmlinkage long sys_getpgid(pid_t pid)
{
if (!pid) {
return current->pgrp;
@@ -709,13 +705,13 @@ asmlinkage int sys_getpgid(pid_t pid)
}
}
-asmlinkage int sys_getpgrp(void)
+asmlinkage long sys_getpgrp(void)
{
/* SMP - assuming writes are word atomic this is fine */
return current->pgrp;
}
-asmlinkage int sys_getsid(pid_t pid)
+asmlinkage long sys_getsid(pid_t pid)
{
if (!pid) {
return current->session;
@@ -734,7 +730,7 @@ asmlinkage int sys_getsid(pid_t pid)
}
}
-asmlinkage int sys_setsid(void)
+asmlinkage long sys_setsid(void)
{
struct task_struct * p;
int err = -EPERM;
@@ -758,7 +754,7 @@ out:
/*
* Supplementary group IDs
*/
-asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
+asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
{
int i;
@@ -784,7 +780,7 @@ asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist)
* without another task interfering.
*/
-asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist)
+asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
{
if (!capable(CAP_SETGID))
return -EPERM;
@@ -822,7 +818,7 @@ out:
*/
DECLARE_MUTEX(uts_sem);
-asmlinkage int sys_newuname(struct new_utsname * name)
+asmlinkage long sys_newuname(struct new_utsname * name)
{
int errno = 0;
@@ -833,7 +829,7 @@ asmlinkage int sys_newuname(struct new_utsname * name)
return errno;
}
-asmlinkage int sys_sethostname(char *name, int len)
+asmlinkage long sys_sethostname(char *name, int len)
{
int errno;
@@ -851,7 +847,7 @@ asmlinkage int sys_sethostname(char *name, int len)
return errno;
}
-asmlinkage int sys_gethostname(char *name, int len)
+asmlinkage long sys_gethostname(char *name, int len)
{
int i, errno;
@@ -872,7 +868,7 @@ asmlinkage int sys_gethostname(char *name, int len)
* Only setdomainname; getdomainname can be implemented by calling
* uname()
*/
-asmlinkage int sys_setdomainname(char *name, int len)
+asmlinkage long sys_setdomainname(char *name, int len)
{
int errno;
@@ -891,7 +887,7 @@ asmlinkage int sys_setdomainname(char *name, int len)
return errno;
}
-asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim)
+asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
{
if (resource >= RLIM_NLIMITS)
return -EINVAL;
@@ -900,7 +896,7 @@ asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim)
? -EFAULT : 0;
}
-asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim)
+asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
{
struct rlimit new_rlim, *old_rlim;
@@ -977,21 +973,21 @@ int getrusage(struct task_struct *p, int who, struct rusage *ru)
return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
}
-asmlinkage int sys_getrusage(int who, struct rusage *ru)
+asmlinkage long sys_getrusage(int who, struct rusage *ru)
{
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
return -EINVAL;
return getrusage(current, who, ru);
}
-asmlinkage int sys_umask(int mask)
+asmlinkage long sys_umask(int mask)
{
mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
return mask;
}
-asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
- unsigned long arg4, unsigned long arg5)
+asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
{
int error = 0;
int sig;
@@ -1005,6 +1001,9 @@ asmlinkage int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
}
current->pdeath_signal = sig;
break;
+ case PR_GET_PDEATHSIG:
+ error = put_user(current->pdeath_signal, (int *)arg2);
+ break;
default:
error = -EINVAL;
break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 195c2cb5b..48320b0db 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -8,7 +8,10 @@
* Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
* Dynamic registration fixes, Stephen Tweedie.
* Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
- * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris Horn.
+ * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
+ * Horn.
+ * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
+ * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
*/
#include <linux/config.h>
@@ -18,8 +21,10 @@
#include <linux/proc_fs.h>
#include <linux/ctype.h>
#include <linux/utsname.h>
+#include <linux/capability.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
+#include <linux/sysrq.h>
#include <asm/uaccess.h>
@@ -34,6 +39,7 @@ extern int panic_timeout;
extern int console_loglevel, C_A_D;
extern int bdf_prm[], bdflush_min[], bdflush_max[];
extern int sysctl_overcommit_memory;
+extern int max_threads;
extern int nr_queued_signals, max_queued_signals;
#ifdef CONFIG_KMOD
@@ -80,6 +86,7 @@ static ctl_table proc_table[];
static ctl_table fs_table[];
static ctl_table debug_table[];
static ctl_table dev_table[];
+extern ctl_table random_table[];
/* /proc declarations: */
@@ -165,6 +172,8 @@ static ctl_table kern_table[] = {
0644, NULL, &proc_doutsstring, &sysctl_string},
{KERN_PANIC, "panic", &panic_timeout, sizeof(int),
0644, NULL, &proc_dointvec},
+ {KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),
+ 0600, NULL, &proc_dointvec_bset},
#ifdef CONFIG_BLK_DEV_INITRD
{KERN_REALROOTDEV, "real-root-dev", &real_root_dev, sizeof(int),
0644, NULL, &proc_dointvec},
@@ -207,6 +216,13 @@ static ctl_table kern_table[] = {
{KERN_SHMMAX, "shmmax", &shmmax, sizeof (int),
0644, NULL, &proc_dointvec},
#endif
+#ifdef CONFIG_MAGIC_SYSRQ
+ {KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int),
+ 0644, NULL, &proc_dointvec},
+#endif
+ {KERN_MAX_THREADS, "threads-max", &max_threads, sizeof(int),
+ 0644, NULL, &proc_dointvec},
+ {KERN_RANDOM, "random", NULL, 0, 0555, random_table},
{0}
};
@@ -309,7 +325,7 @@ int do_sysctl (int *name, int nlen,
return -ENOTDIR;
}
-extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
+extern asmlinkage long sys_sysctl(struct __sysctl_args *args)
{
struct __sysctl_args tmp;
int error;
@@ -679,8 +695,13 @@ static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
return r;
}
+#define OP_SET 0
+#define OP_AND 1
+#define OP_OR 2
+#define OP_MAX 3
+#define OP_MIN 4
static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
- void *buffer, size_t *lenp, int conv)
+ void *buffer, size_t *lenp, int conv, int op)
{
int *i, vleft, first=1, len, left, neg, val;
#define TMPBUFLEN 20
@@ -731,7 +752,17 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
val = -val;
buffer += len;
left -= len;
- *i = val;
+ switch(op) {
+ case OP_SET: *i = val; break;
+ case OP_AND: *i &= val; break;
+ case OP_OR: *i |= val; break;
+ case OP_MAX: if(*i < val)
+ *i = val;
+ break;
+ case OP_MIN: if(*i > val)
+ *i = val;
+ break;
+ }
} else {
p = buf;
if (!first)
@@ -773,9 +804,21 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
int proc_dointvec(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
- return do_proc_dointvec(table,write,filp,buffer,lenp,1);
+ return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET);
+}
+
+/*
+ * init may raise the set.
+ */
+
+int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return do_proc_dointvec(table,write,filp,buffer,lenp,1,
+ (current->pid == 1) ? OP_SET : OP_AND);
}
+
int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
@@ -874,11 +917,136 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
return 0;
}
+
+/*
+ * an unsigned long function version
+ */
+
+static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+ struct file *filp,
+ void *buffer, size_t *lenp,
+ unsigned long convmul,
+ unsigned long convdiv)
+{
+#define TMPBUFLEN 20
+ unsigned long *i, *min, *max, val;
+ int vleft, first=1, len, left, neg;
+ char buf[TMPBUFLEN], *p;
+
+ if (!table->data || !table->maxlen || !*lenp ||
+ (filp->f_pos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ i = (unsigned long *) table->data;
+ min = (unsigned long *) table->extra1;
+ max = (unsigned long *) table->extra2;
+ vleft = table->maxlen / sizeof(unsigned long);
+ left = *lenp;
+
+ for (; left && vleft--; i++, first=0) {
+ if (write) {
+ while (left) {
+ char c;
+ if(get_user(c, (char *) buffer))
+ return -EFAULT;
+ if (!isspace(c))
+ break;
+ left--;
+ ((char *) buffer)++;
+ }
+ if (!left)
+ break;
+ neg = 0;
+ len = left;
+ if (len > TMPBUFLEN-1)
+ len = TMPBUFLEN-1;
+ if(copy_from_user(buf, buffer, len))
+ return -EFAULT;
+ buf[len] = 0;
+ p = buf;
+ if (*p == '-' && left > 1) {
+ neg = 1;
+ left--, p++;
+ }
+ if (*p < '0' || *p > '9')
+ break;
+ val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
+ len = p-buf;
+ if ((len < left) && *p && !isspace(*p))
+ break;
+ if (neg)
+ val = -val;
+ buffer += len;
+ left -= len;
+
+ if(neg)
+ continue;
+ if (min && val < *min++)
+ continue;
+ if (max && val > *max++)
+ continue;
+ *i = val;
+ } else {
+ p = buf;
+ if (!first)
+ *p++ = '\t';
+ sprintf(p, "%lu", convdiv * (*i) / convmul);
+ len = strlen(buf);
+ if (len > left)
+ len = left;
+ if(copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ left -= len;
+ buffer += len;
+ }
+ }
+
+ if (!write && !first && left) {
+ if(put_user('\n', (char *) buffer))
+ return -EFAULT;
+ left--, buffer++;
+ }
+ if (write) {
+ p = (char *) buffer;
+ while (left) {
+ char c;
+ if(get_user(c, p++))
+ return -EFAULT;
+ if (!isspace(c))
+ break;
+ left--;
+ }
+ }
+ if (write && first)
+ return -EINVAL;
+ *lenp -= left;
+ filp->f_pos += *lenp;
+ return 0;
+#undef TMPBUFLEN
+}
+
+int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, 1l, 1l);
+}
+
+int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
+ struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return do_proc_doulongvec_minmax(table, write, filp, buffer,
+ lenp, HZ, 1000l);
+}
+
+
/* Like proc_dointvec, but converts seconds to jiffies */
int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
- return do_proc_dointvec(table,write,filp,buffer,lenp,HZ);
+ return do_proc_dointvec(table,write,filp,buffer,lenp,HZ,OP_SET);
}
#else /* CONFIG_PROC_FS */
@@ -913,6 +1081,20 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
return -ENOSYS;
}
+int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
+int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
+ struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
+
#endif /* CONFIG_PROC_FS */
@@ -997,6 +1179,34 @@ int sysctl_intvec(ctl_table *table, int *name, int nlen,
return 0;
}
+/* Strategy function to convert jiffies to seconds */
+int sysctl_jiffies(ctl_table *table, int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context)
+{
+ if (oldval) {
+ size_t olen;
+ if (oldlenp) {
+ if (get_user(olen, oldlenp))
+ return -EFAULT;
+ if (olen!=sizeof(int))
+ return -EINVAL;
+ }
+ if (put_user(*(int *)(table->data) / HZ, (int *)oldval) ||
+ (oldlenp && put_user(sizeof(int),oldlenp)))
+ return -EFAULT;
+ }
+ if (newval && newlen) {
+ int new;
+ if (newlen != sizeof(int))
+ return -EINVAL;
+ if (get_user(new, (int *)newval))
+ return -EFAULT;
+ *(int *)(table->data) = new*HZ;
+ }
+ return 1;
+}
+
int do_string (
void *oldval, size_t *oldlenp, void *newval, size_t newlen,
int rdwr, char *data, size_t max)
@@ -1073,7 +1283,7 @@ int do_struct (
#else /* CONFIG_SYSCTL */
-extern asmlinkage int sys_sysctl(struct __sysctl_args *args)
+extern asmlinkage long sys_sysctl(struct __sysctl_args *args)
{
return -ENOSYS;
}
@@ -1111,11 +1321,24 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
}
int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
+int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
return -ENOSYS;
}
+int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
+ struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return -ENOSYS;
+}
+
struct ctl_table_header * register_sysctl_table(ctl_table * table,
int insert_at_head)
{
diff --git a/kernel/time.c b/kernel/time.c
index 911442dad..1517d6d9d 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -64,7 +64,7 @@ void get_fast_time(struct timeval * t)
*
* XXX This function is NOT 64-bit clean!
*/
-asmlinkage int sys_time(int * tloc)
+asmlinkage long sys_time(int * tloc)
{
int i;
@@ -85,7 +85,7 @@ asmlinkage int sys_time(int * tloc)
* architectures that need it).
*/
-asmlinkage int sys_stime(int * tptr)
+asmlinkage long sys_stime(int * tptr)
{
int value;
@@ -106,7 +106,7 @@ asmlinkage int sys_stime(int * tptr)
#endif
-asmlinkage int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz)
{
if (tv) {
struct timeval ktv;
@@ -181,7 +181,7 @@ int do_sys_settimeofday(struct timeval *tv, struct timezone *tz)
return 0;
}
-asmlinkage int sys_settimeofday(struct timeval *tv, struct timezone *tz)
+asmlinkage long sys_settimeofday(struct timeval *tv, struct timezone *tz)
{
struct timeval new_tv;
struct timezone new_tz;
@@ -400,7 +400,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
return(result);
}
-asmlinkage int sys_adjtimex(struct timex *txc_p)
+asmlinkage long sys_adjtimex(struct timex *txc_p)
{
struct timex txc; /* Local copy of parameter */
int ret;