diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2001-03-09 20:33:35 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2001-03-09 20:33:35 +0000 |
commit | 116674acc97ba75a720329996877077d988443a2 (patch) | |
tree | 6a3f2ff0b612ae2ee8a3f3509370c9e6333a53b3 /kernel | |
parent | 71118c319fcae4a138f16e35b4f7e0a6d53ce2ca (diff) |
Merge with Linux 2.4.2.
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 1 | ||||
-rw-r--r-- | kernel/dma.c | 3 | ||||
-rw-r--r-- | kernel/exit.c | 3 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/ksyms.c | 3 | ||||
-rw-r--r-- | kernel/module.c | 24 | ||||
-rw-r--r-- | kernel/printk.c | 4 | ||||
-rw-r--r-- | kernel/resource.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 84 | ||||
-rw-r--r-- | kernel/sysctl.c | 221 |
10 files changed, 270 insertions, 77 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index e2e8826fa..a836dd1a6 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -53,6 +53,7 @@ #include <linux/acct.h> #include <linux/smp_lock.h> #include <linux/file.h> +#include <linux/tty.h> #include <asm/uaccess.h> diff --git a/kernel/dma.c b/kernel/dma.c index 3ee09759f..8f8586f42 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -116,9 +116,8 @@ int request_dma(unsigned int dmanr, const char *device_id) return -EINVAL; } -int free_dma(unsigned int dmanr) +void free_dma(unsigned int dmanr) { - return -EINVAL; } int get_dma_list(char *buf) diff --git a/kernel/exit.c b/kernel/exit.c index cd642927b..9fd9162e2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -5,10 +5,11 @@ */ #include <linux/config.h> -#include <linux/malloc.h> +#include <linux/slab.h> #include <linux/interrupt.h> #include <linux/smp_lock.h> #include <linux/module.h> +#include <linux/tty.h> #ifdef CONFIG_BSD_PROCESS_ACCT #include <linux/acct.h> #endif diff --git a/kernel/fork.c b/kernel/fork.c index e578a9644..6f0582cbf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -12,7 +12,7 @@ */ #include <linux/config.h> -#include <linux/malloc.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/unistd.h> #include <linux/smp_lock.h> diff --git a/kernel/ksyms.c b/kernel/ksyms.c index f36261500..ca9001dbe 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -10,7 +10,7 @@ */ #include <linux/config.h> -#include <linux/malloc.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/blkdev.h> #include <linux/cdrom.h> @@ -45,6 +45,7 @@ #include <linux/highuid.h> #include <linux/brlock.h> #include <linux/fs.h> +#include <linux/tty.h> #if defined(CONFIG_PROC_FS) #include <linux/proc_fs.h> diff --git a/kernel/module.c b/kernel/module.c index dd02b40cd..36023bce0 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -23,6 +23,7 @@ * Fix sys_init_module race, Andrew Morton <andrewm@uow.edu.au> Oct 2000 * http://www.uwsg.iu.edu/hypermail/linux/kernel/0008.3/0379.html * Replace xxx_module_symbol with inter_module_xxx. Keith Owens <kaos@ocs.com.au> Oct 2000 + * Add a module list lock for kernel fault race fixing. Alan Cox <alan@redhat.com> * * This source is covered by the GNU GPL, the same as all kernel sources. */ @@ -65,6 +66,17 @@ static struct list_head ime_list = LIST_HEAD_INIT(ime_list); static spinlock_t ime_lock = SPIN_LOCK_UNLOCKED; static int kmalloc_failed; +/* + * This lock prevents modifications that might race the kernel fault + * fixups. It does not prevent reader walks that the modules code + * does. The kernel lock does that. + * + * Since vmalloc fault fixups occur in any context this lock is taken + * irqsave at all times. + */ + +spinlock_t modlist_lock = SPIN_LOCK_UNLOCKED; + /** * inter_module_register - register a new set of inter module data. * @im_name: an arbitrary string to identify the data, must be unique @@ -283,6 +295,7 @@ sys_create_module(const char *name_user, size_t size) char *name; long namelen, error; struct module *mod; + unsigned long flags; if (!capable(CAP_SYS_MODULE)) return -EPERM; @@ -306,14 +319,16 @@ sys_create_module(const char *name_user, size_t size) memset(mod, 0, sizeof(*mod)); mod->size_of_struct = sizeof(*mod); - mod->next = module_list; mod->name = (char *)(mod + 1); mod->size = size; memcpy((char*)(mod+1), name, namelen+1); put_mod_name(name); + spin_lock_irqsave(&modlist_lock, flags); + mod->next = module_list; module_list = mod; /* link it in */ + spin_unlock_irqrestore(&modlist_lock, flags); error = (long) mod; goto err0; @@ -628,6 +643,7 @@ sys_delete_module(const char *name_user) /* Do automatic reaping */ restart: something_changed = 0; + for (mod = module_list; mod != &kernel_module; mod = next) { next = mod->next; spin_lock(&unload_lock); @@ -651,10 +667,13 @@ restart: spin_unlock(&unload_lock); } } + if (something_changed) goto restart; + for (mod = module_list; mod != &kernel_module; mod = mod->next) mod->flags &= ~MOD_JUST_FREED; + error = 0; out: unlock_kernel(); @@ -1018,6 +1037,7 @@ free_module(struct module *mod, int tag_freed) { struct module_ref *dep; unsigned i; + unsigned long flags; /* Let the module clean up. */ @@ -1041,6 +1061,7 @@ free_module(struct module *mod, int tag_freed) /* And from the main module list. */ + spin_lock_irqsave(&modlist_lock, flags); if (mod == module_list) { module_list = mod->next; } else { @@ -1049,6 +1070,7 @@ free_module(struct module *mod, int tag_freed) continue; p->next = mod->next; } + spin_unlock_irqrestore(&modlist_lock, flags); /* And free the memory. */ diff --git a/kernel/printk.c b/kernel/printk.c index 0c6299b0d..168d47422 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -15,6 +15,7 @@ */ #include <linux/mm.h> +#include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/smp_lock.h> #include <linux/console.h> @@ -125,9 +126,8 @@ int do_syslog(int type, char * buf, int len) unsigned long i, j, limit, count; int do_clear = 0; char c; - int error = -EPERM; + int error = 0; - error = 0; switch (type) { case 0: /* Close log */ break; diff --git a/kernel/resource.c b/kernel/resource.c index b553eb0ff..117368874 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -11,7 +11,7 @@ #include <linux/errno.h> #include <linux/ioport.h> #include <linux/init.h> -#include <linux/malloc.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <asm/io.h> diff --git a/kernel/sched.c b/kernel/sched.c index ec1f463d0..b6f6c879a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -326,9 +326,10 @@ static inline void move_first_runqueue(struct task_struct * p) * "current->state = TASK_RUNNING" to mark yourself runnable * without the overhead of this. */ -inline void wake_up_process(struct task_struct * p) +static inline int try_to_wake_up(struct task_struct * p, int synchronous) { unsigned long flags; + int success = 0; /* * We want the common case fall through straight, thus the goto. @@ -338,25 +339,17 @@ inline void wake_up_process(struct task_struct * p) if (task_on_runqueue(p)) goto out; add_to_runqueue(p); - reschedule_idle(p); + if (!synchronous) + reschedule_idle(p); + success = 1; out: spin_unlock_irqrestore(&runqueue_lock, flags); + return success; } -static inline void wake_up_process_synchronous(struct task_struct * p) +inline int wake_up_process(struct task_struct * p) { - unsigned long flags; - - /* - * We want the common case fall through straight, thus the goto. - */ - spin_lock_irqsave(&runqueue_lock, flags); - p->state = TASK_RUNNING; - if (task_on_runqueue(p)) - goto out; - add_to_runqueue(p); -out: - spin_unlock_irqrestore(&runqueue_lock, flags); + return try_to_wake_up(p, 0); } static void process_timeout(unsigned long __data) @@ -689,64 +682,59 @@ scheduling_in_interrupt: return; } +/* + * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just wake everything + * up. If it's an exclusive wakeup (nr_exclusive == small +ve number) then we wake all the + * non-exclusive tasks and one exclusive task. + * + * There are circumstances in which we can try to wake a task which has already + * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns zero + * in this (rare) case, and we handle it by contonuing to scan the queue. + */ static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode, int nr_exclusive, const int sync) { struct list_head *tmp, *head; struct task_struct *p; - unsigned long flags; - if (!q) - goto out; - - wq_write_lock_irqsave(&q->lock, flags); - -#if WAITQUEUE_DEBUG CHECK_MAGIC_WQHEAD(q); -#endif - head = &q->task_list; -#if WAITQUEUE_DEBUG - if (!head->next || !head->prev) - WQ_BUG(); -#endif + WQ_CHECK_LIST_HEAD(head); tmp = head->next; while (tmp != head) { unsigned int state; wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); tmp = tmp->next; - -#if WAITQUEUE_DEBUG CHECK_MAGIC(curr->__magic); -#endif p = curr->task; state = p->state; if (state & mode) { -#if WAITQUEUE_DEBUG - curr->__waker = (long)__builtin_return_address(0); -#endif - if (sync) - wake_up_process_synchronous(p); - else - wake_up_process(p); - if ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) + WQ_NOTE_WAKER(curr); + if (try_to_wake_up(p, sync) && curr->flags && !--nr_exclusive) break; } } - wq_write_unlock_irqrestore(&q->lock, flags); -out: - return; } void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr) { - __wake_up_common(q, mode, nr, 0); + if (q) { + unsigned long flags; + wq_read_lock_irqsave(&q->lock, flags); + __wake_up_common(q, mode, nr, 0); + wq_read_unlock_irqrestore(&q->lock, flags); + } } void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr) { - __wake_up_common(q, mode, nr, 1); + if (q) { + unsigned long flags; + wq_read_lock_irqsave(&q->lock, flags); + __wake_up_common(q, mode, nr, 1); + wq_read_unlock_irqrestore(&q->lock, flags); + } } #define SLEEP_ON_VAR \ @@ -1127,11 +1115,11 @@ static void show_task(struct task_struct * p) else printk("\n"); -#ifdef CONFIG_X86 -/* This is very useful, but only works on x86 right now */ +#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) +/* This is very useful, but only works on x86 and sparc64 right now */ { - extern void show_trace(unsigned long); - show_trace(p->thread.esp); + extern void show_trace_task(struct task_struct *tsk); + show_trace_task(p); } #endif } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1c22d7838..5484da89b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -19,7 +19,7 @@ */ #include <linux/config.h> -#include <linux/malloc.h> +#include <linux/slab.h> #include <linux/sysctl.h> #include <linux/swapctl.h> #include <linux/proc_fs.h> @@ -75,6 +75,14 @@ extern int sem_ctls[]; extern char reboot_command []; extern int stop_a_enabled; #endif + +#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_IEEEFPU_EMULATION +extern int sysctl_ieee_emulation_warnings; +#endif +extern int sysctl_userprocess_debug; +#endif + #ifdef __powerpc__ extern unsigned long htab_reclaim_on, zero_paged_on, powersave_nap; int proc_dol2crvec(ctl_table *table, int write, struct file *filp, @@ -130,9 +138,6 @@ static void register_proc_table(ctl_table *, struct proc_dir_entry *); static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); #endif -extern int inodes_stat[]; -extern int dentry_stat[]; - /* The default sysctl tables: */ static ctl_table root_table[] = { @@ -236,6 +241,14 @@ static ctl_table kern_table[] = { {KERN_OVERFLOWGID, "overflowgid", &overflowgid, sizeof(int), 0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, &minolduid, &maxolduid}, +#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_IEEEFPU_EMULATION + {KERN_IEEE_EMULATION_WARNINGS,"ieee_emulation_warnings", + &sysctl_ieee_emulation_warnings,sizeof(int),0644,NULL,&proc_dointvec}, +#endif + {KERN_S390_USER_DEBUG_LOGGING,"userprocess_debug", + &sysctl_userprocess_debug,sizeof(int),0644,NULL,&proc_dointvec}, +#endif {0} }; @@ -321,7 +334,7 @@ int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp, { struct list_head *tmp; - if (nlen == 0 || nlen >= CTL_MAXNAME) + if (nlen <= 0 || nlen >= CTL_MAXNAME) return -ENOTDIR; if (oldval) { int old_len; @@ -426,7 +439,8 @@ int do_sysctl_strategy (ctl_table *table, void *oldval, size_t *oldlenp, void *newval, size_t newlen, void **context) { - int op = 0, rc, len; + int op = 0, rc; + size_t len; if (oldval) op |= 004; @@ -469,13 +483,82 @@ int do_sysctl_strategy (ctl_table *table, return 0; } +/** + * register_sysctl_table - register a sysctl heirarchy + * @table: the top-level table structure + * @insert_at_head: whether the entry should be inserted in front or at the end + * + * Register a sysctl table heirarchy. @table should be a filled in ctl_table + * array. An entry with a ctl_name of 0 terminates the table. + * + * The members of the &ctl_table structure are used as follows: + * + * ctl_name - This is the numeric sysctl value used by sysctl(2). The number + * must be unique within that level of sysctl + * + * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not + * enter a sysctl file + * + * data - a pointer to data for use by proc_handler + * + * maxlen - the maximum size in bytes of the data + * + * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * + * child - a pointer to the child sysctl table if this entry is a directory, or + * %NULL. + * + * proc_handler - the text handler routine (described below) + * + * strategy - the strategy routine (described below) + * + * de - for internal use by the sysctl routines + * + * extra1, extra2 - extra pointers usable by the proc handler routines + * + * Leaf nodes in the sysctl tree will be represented by a single file + * under /proc; non-leaf nodes will be represented by directories. + * + * sysctl(2) can automatically manage read and write requests through + * the sysctl table. The data and maxlen fields of the ctl_table + * struct enable minimal validation of the values being written to be + * performed, and the mode field allows minimal authentication. + * + * More sophisticated management can be enabled by the provision of a + * strategy routine with the table entry. This will be called before + * any automatic read or write of the data is performed. + * + * The strategy routine may return + * + * < 0 - Error occurred (error is passed to user process) + * + * 0 - OK - proceed with automatic read or write. + * + * > 0 - OK - read or write has been done by the strategy routine, so + * return immediately. + * + * There must be a proc_handler routine for any terminal nodes + * mirrored under /proc/sys (non-terminals are handled by a built-in + * directory handler). Several default handlers are available to + * cover common cases - + * + * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), + * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(), + * proc_doulongvec_minmax() + * + * It is the handler's job to read the input buffer from user memory + * and process it. The handler should return 0 on success. + * + * This routine returns %NULL on a failure to register, and a pointer + * to the table header on success. + */ struct ctl_table_header *register_sysctl_table(ctl_table * table, int insert_at_head) { struct ctl_table_header *tmp; tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); if (!tmp) - return 0; + return NULL; tmp->ctl_table = table; INIT_LIST_HEAD(&tmp->ctl_entry); if (insert_at_head) @@ -488,8 +571,12 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, return tmp; } -/* - * Unlink and free a ctl_table. +/** + * unregister_sysctl_table - unregister a sysctl table heirarchy + * @header: the header returned from register_sysctl_table + * + * Unregisters the sysctl table and all children. proc entries may not + * actually be removed until they are no longer used by anyone. */ void unregister_sysctl_table(struct ctl_table_header * header) { @@ -633,10 +720,27 @@ static int proc_sys_permission(struct inode *inode, int op) return test_perm(inode->i_mode, op); } +/** + * proc_dostring - read a string sysctl + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes a string from/to the user buffer. If the kernel + * buffer provided is not large enough to hold the string, the + * string is truncated. The copied string is %NULL-terminated. + * If the string is being read by the user process, it is copied + * and a newline '\n' is added. It is truncated if the buffer is + * not large enough. + * + * Returns 0 on success. + */ int proc_dostring(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { - int len; + size_t len; char *p, c; if (!table->data || !table->maxlen || !*lenp || @@ -712,7 +816,9 @@ static int proc_doutsstring(ctl_table *table, int write, struct file *filp, static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp, int conv, int op) { - int *i, vleft, first=1, len, left, neg, val; + int *i, vleft, first=1, neg, val; + size_t left, len; + #define TMPBUFLEN 20 char buf[TMPBUFLEN], *p; @@ -810,6 +916,19 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp, return 0; } +/** + * proc_dointvec - read a vector of integers + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * + * Returns 0 on success. + */ int proc_dointvec(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { @@ -830,10 +949,27 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, (current->pid == 1) ? OP_SET : OP_AND); } +/** + * proc_dointvec_minmax - read a vector of integers with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success. + */ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { - int *i, *min, *max, vleft, first=1, len, left, neg, val; + int *i, *min, *max, vleft, first=1, neg, val; + size_t len, left; #define TMPBUFLEN 20 char buf[TMPBUFLEN], *p; @@ -928,10 +1064,6 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, return 0; } -/* - * an unsigned long function version - */ - static int do_proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp, @@ -940,7 +1072,8 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, { #define TMPBUFLEN 20 unsigned long *i, *min, *max, val; - int vleft, first=1, len, left, neg; + int vleft, first=1, neg; + size_t len, left; char buf[TMPBUFLEN], *p; if (!table->data || !table->maxlen || !*lenp || @@ -1037,12 +1170,45 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write, #undef TMPBUFLEN } +/** + * proc_doulongvec_minmax - read a vector of long integers with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long + * values from/to the user buffer, treated as an ASCII string. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success. + */ int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, 1l, 1l); } +/** + * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long + * values from/to the user buffer, treated as an ASCII string. The values + * are treated as milliseconds, and converted to jiffies when they are stored. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Returns 0 on success. + */ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) @@ -1052,7 +1218,21 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, } -/* Like proc_dointvec, but converts seconds to jiffies */ +/** + * proc_dointvec_jiffies - read a vector of integers as seconds + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in seconds, and are converted into + * jiffies. + * + * Returns 0 on success. + */ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, void *buffer, size_t *lenp) { @@ -1123,7 +1303,7 @@ int sysctl_string(ctl_table *table, int *name, int nlen, void *oldval, size_t *oldlenp, void *newval, size_t newlen, void **context) { - int l, len; + size_t l, len; if (!table->data || !table->maxlen) return -ENOTDIR; @@ -1166,7 +1346,8 @@ int sysctl_intvec(ctl_table *table, int *name, int nlen, void *oldval, size_t *oldlenp, void *newval, size_t newlen, void **context) { - int i, length, *vec, *min, *max; + int i, *vec, *min, *max; + size_t length; if (newval && newlen) { if (newlen % sizeof(int) != 0) |