diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-02-23 00:40:54 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-02-23 00:40:54 +0000 |
commit | 529c593ece216e4aaffd36bd940cb94f1fa63129 (patch) | |
tree | 78f1c0b805f5656aa7b0417a043c5346f700a2cf /kernel | |
parent | 0bd079751d25808d1972baee5c4eaa1db2227257 (diff) |
Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c
driver due to the Origin A64 hacks.
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 8 | ||||
-rw-r--r-- | kernel/exec_domain.c | 5 | ||||
-rw-r--r-- | kernel/exit.c | 6 | ||||
-rw-r--r-- | kernel/itimer.c | 10 | ||||
-rw-r--r-- | kernel/ksyms.c | 32 | ||||
-rw-r--r-- | kernel/panic.c | 6 | ||||
-rw-r--r-- | kernel/pm.c | 170 | ||||
-rw-r--r-- | kernel/sched.c | 69 | ||||
-rw-r--r-- | kernel/softirq.c | 291 | ||||
-rw-r--r-- | kernel/sys.c | 3 | ||||
-rw-r--r-- | kernel/sysctl.c | 16 | ||||
-rw-r--r-- | kernel/timer.c | 72 |
12 files changed, 551 insertions, 137 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 270692eb2..cce15a524 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -29,6 +29,14 @@ ifeq ($(CONFIG_MODULES),y) OX_OBJS += ksyms.o endif +ifdef CONFIG_ACPI +OX_OBJS += pm.o +else + ifdef CONFIG_APM + OX_OBJS += pm.o + endif +endif + CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer include $(TOPDIR)/Rules.make diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index c31678a4e..1ee1eee4d 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -103,12 +103,11 @@ asmlinkage long sys_personality(unsigned long personality) unsigned long old_personality; int ret; - lock_kernel(); - ret = current->personality; if (personality == 0xffffffff) - goto out; + return current->personality; ret = -EINVAL; + lock_kernel(); it = lookup_exec_domain(personality); if (!it) goto out; diff --git a/kernel/exit.c b/kernel/exit.c index 80280e821..65d72df43 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -247,6 +247,7 @@ struct mm_struct * start_lazy_tlb(void) current->mm = NULL; /* active_mm is still 'mm' */ atomic_inc(&mm->mm_count); + enter_lazy_tlb(mm, current, smp_processor_id()); return mm; } @@ -275,6 +276,7 @@ static inline void __exit_mm(struct task_struct * tsk) mm_release(); if (mm != tsk->active_mm) BUG(); tsk->mm = NULL; + enter_lazy_tlb(mm, current, smp_processor_id()); mmput(mm); } } @@ -393,9 +395,7 @@ NORET_TYPE void do_exit(long code) if (!tsk->pid) panic("Attempted to kill the idle task!"); tsk->flags |= PF_EXITING; - start_bh_atomic(); - del_timer(&tsk->real_timer); - end_bh_atomic(); + del_timer_sync(&tsk->real_timer); lock_kernel(); fake_volatile: diff --git a/kernel/itimer.c b/kernel/itimer.c index 7d38ac1ac..6c38477be 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -48,7 +48,9 @@ int do_getitimer(int which, struct itimerval *value) case ITIMER_REAL: interval = current->it_real_incr; val = 0; - start_bh_atomic(); + /* + * FIXME! This needs to be atomic, in case the kernel timer happens! + */ if (timer_pending(¤t->real_timer)) { val = current->real_timer.expires - jiffies; @@ -56,7 +58,6 @@ int do_getitimer(int which, struct itimerval *value) if ((long) val <= 0) val = 1; } - end_bh_atomic(); break; case ITIMER_VIRTUAL: val = current->it_virt_value; @@ -102,6 +103,7 @@ void it_real_fn(unsigned long __data) p->real_timer.expires = jiffies + interval; add_timer(&p->real_timer); } + timer_exit(&p->real_timer); } int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) @@ -115,9 +117,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) return k; switch (which) { case ITIMER_REAL: - start_bh_atomic(); - del_timer(¤t->real_timer); - end_bh_atomic(); + del_timer_sync(¤t->real_timer); current->it_real_value = j; current->it_real_incr = i; if (!j) diff --git a/kernel/ksyms.c b/kernel/ksyms.c index dea3c38a5..bd74f5413 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -151,6 +151,7 @@ EXPORT_SYMBOL(d_instantiate); EXPORT_SYMBOL(d_alloc); EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(d_path); +EXPORT_SYMBOL(mark_buffer_dirty); EXPORT_SYMBOL(__mark_buffer_dirty); EXPORT_SYMBOL(__mark_inode_dirty); EXPORT_SYMBOL(free_kiovec); @@ -163,7 +164,7 @@ EXPORT_SYMBOL(filp_close); EXPORT_SYMBOL(put_filp); EXPORT_SYMBOL(files_lock); EXPORT_SYMBOL(check_disk_change); -EXPORT_SYMBOL(invalidate_buffers); +EXPORT_SYMBOL(__invalidate_buffers); EXPORT_SYMBOL(invalidate_inodes); EXPORT_SYMBOL(invalidate_inode_pages); EXPORT_SYMBOL(truncate_inode_pages); @@ -183,11 +184,12 @@ EXPORT_SYMBOL(__bforget); EXPORT_SYMBOL(ll_rw_block); EXPORT_SYMBOL(__wait_on_buffer); EXPORT_SYMBOL(___wait_on_page); -EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_write_full_page); -EXPORT_SYMBOL(block_write_partial_page); -EXPORT_SYMBOL(block_write_cont_page); -EXPORT_SYMBOL(block_write_zero_range); +EXPORT_SYMBOL(block_read_full_page); +EXPORT_SYMBOL(block_prepare_write); +EXPORT_SYMBOL(cont_prepare_write); +EXPORT_SYMBOL(generic_commit_write); +EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_file_read); EXPORT_SYMBOL(do_generic_file_read); EXPORT_SYMBOL(generic_file_write); @@ -222,6 +224,7 @@ EXPORT_SYMBOL(vfs_readlink); EXPORT_SYMBOL(vfs_follow_link); EXPORT_SYMBOL(page_readlink); EXPORT_SYMBOL(page_follow_link); +EXPORT_SYMBOL(page_symlink_inode_operations); EXPORT_SYMBOL(block_symlink); /* for stackable file systems (lofs, wrapfs, etc.) */ @@ -263,8 +266,6 @@ EXPORT_SYMBOL(ioctl_by_bdev); EXPORT_SYMBOL(gendisk_head); EXPORT_SYMBOL(grok_partitions); EXPORT_SYMBOL(register_disk); -EXPORT_SYMBOL(unplug_device); -EXPORT_SYMBOL(make_request); EXPORT_SYMBOL(tq_disk); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(refile_buffer); @@ -317,12 +318,11 @@ EXPORT_SYMBOL(request_irq); EXPORT_SYMBOL(free_irq); EXPORT_SYMBOL(probe_irq_on); EXPORT_SYMBOL(probe_irq_off); -EXPORT_SYMBOL(bh_active); -EXPORT_SYMBOL(bh_mask); -EXPORT_SYMBOL(bh_mask_count); -EXPORT_SYMBOL(bh_base); EXPORT_SYMBOL(add_timer); EXPORT_SYMBOL(del_timer); +#ifdef __SMP__ +EXPORT_SYMBOL(del_timer_sync); +#endif EXPORT_SYMBOL(mod_timer); EXPORT_SYMBOL(tq_timer); EXPORT_SYMBOL(tq_immediate); @@ -369,7 +369,9 @@ EXPORT_SYMBOL(schedule_timeout); EXPORT_SYMBOL(jiffies); EXPORT_SYMBOL(xtime); EXPORT_SYMBOL(do_gettimeofday); +#ifndef __ia64__ EXPORT_SYMBOL(loops_per_sec); +#endif EXPORT_SYMBOL(kstat); /* misc */ @@ -380,7 +382,6 @@ EXPORT_SYMBOL(vsprintf); EXPORT_SYMBOL(kdevname); EXPORT_SYMBOL(bdevname); EXPORT_SYMBOL(cdevname); -EXPORT_SYMBOL(partition_name); /* md.c only */ EXPORT_SYMBOL(simple_strtoul); EXPORT_SYMBOL(system_utsname); /* UTS data */ EXPORT_SYMBOL(uts_sem); /* UTS semaphore */ @@ -460,5 +461,12 @@ EXPORT_SYMBOL(get_fast_time); /* library functions */ EXPORT_SYMBOL(strnicmp); +/* software interrupts */ +EXPORT_SYMBOL(tasklet_hi_vec); +EXPORT_SYMBOL(bh_task_vec); +EXPORT_SYMBOL(init_bh); +EXPORT_SYMBOL(remove_bh); + /* init task, for moving kthread roots - ought to export a function ?? */ + EXPORT_SYMBOL(init_task_union); diff --git a/kernel/panic.c b/kernel/panic.c index 8a68b3ad5..e040ee454 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -41,10 +41,10 @@ NORET_TYPE void panic(const char * fmt, ...) vsprintf(buf, fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic: %s\n",buf); - if (current == init_tasks[0]) - printk(KERN_EMERG "In swapper task - not syncing\n"); - else if (in_interrupt()) + if (in_interrupt()) printk(KERN_EMERG "In interrupt handler - not syncing\n"); + else if (!current->pid) + printk(KERN_EMERG "In idle task - not syncing\n"); else sys_sync(); diff --git a/kernel/pm.c b/kernel/pm.c new file mode 100644 index 000000000..26811bff2 --- /dev/null +++ b/kernel/pm.c @@ -0,0 +1,170 @@ +/* + * pm.c - Power management interface + * + * Copyright (C) 2000 Andrew Henroid + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/pm.h> + +int pm_active = 0; + +static spinlock_t pm_devs_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(pm_devs); + +/* + * Register a device with power management + */ +struct pm_dev *pm_register(pm_dev_t type, + unsigned long id, + pm_callback callback) +{ + struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL); + if (dev) { + unsigned long flags; + + memset(dev, 0, sizeof(*dev)); + dev->type = type; + dev->id = id; + dev->callback = callback; + + spin_lock_irqsave(&pm_devs_lock, flags); + list_add(&dev->entry, &pm_devs); + spin_unlock_irqrestore(&pm_devs_lock, flags); + } + return dev; +} + +/* + * Unregister a device with power management + */ +void pm_unregister(struct pm_dev *dev) +{ + if (dev) { + unsigned long flags; + + spin_lock_irqsave(&pm_devs_lock, flags); + list_del(&dev->entry); + spin_unlock_irqrestore(&pm_devs_lock, flags); + + kfree(dev); + } +} + +/* + * Unregister all devices with matching callback + */ +void pm_unregister_all(pm_callback callback) +{ + struct list_head *entry; + + if (!callback) + return; + + entry = pm_devs.next; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + entry = entry->next; + if (dev->callback == callback) + pm_unregister(dev); + } +} + +/* + * Send request to an individual device + */ +static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data) +{ + int status = 0; + int next_state; + switch (rqst) { + case PM_SUSPEND: + case PM_RESUME: + next_state = (int) data; + if (dev->state != next_state) { + if (dev->callback) + status = (*dev->callback)(dev, rqst, data); + if (!status) + dev->state = next_state; + } + break; + default: + if (dev->callback) + status = (*dev->callback)(dev, rqst, data); + break; + } + return status; +} + +/* + * Undo incomplete request + */ +static void pm_undo_request(struct pm_dev *last, pm_request_t undo, void *data) +{ + struct list_head *entry = last->entry.prev; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + if (dev->callback) + pm_send(dev, undo, data); + entry = entry->prev; + } +} + +/* + * Send a request to all devices + */ +int pm_send_request(pm_request_t rqst, void *data) +{ + struct list_head *entry = pm_devs.next; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + if (dev->callback) { + int status = pm_send(dev, rqst, data); + if (status) { + /* resume devices on failed suspend request */ + if (rqst == PM_SUSPEND) + pm_undo_request(dev, PM_RESUME, 0); + return status; + } + } + entry = entry->next; + } + return 0; +} + +/* + * Find a device + */ +struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from) +{ + struct list_head *entry = from ? from->entry.next:pm_devs.next; + while (entry != &pm_devs) { + struct pm_dev *dev = list_entry(entry, struct pm_dev, entry); + if (type == PM_UNKNOWN_DEV || dev->type == type) + return dev; + entry = entry->next; + } + return 0; +} + +EXPORT_SYMBOL(pm_register); +EXPORT_SYMBOL(pm_unregister); +EXPORT_SYMBOL(pm_unregister_all); +EXPORT_SYMBOL(pm_send_request); +EXPORT_SYMBOL(pm_find); diff --git a/kernel/sched.c b/kernel/sched.c index ce72ecc7b..03c05e7c3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -199,30 +199,17 @@ static inline void reschedule_idle(struct task_struct * p, unsigned long flags) goto send_now; /* - * The only heuristics - we use the tsk->avg_slice value - * to detect 'frequent reschedulers'. - * - * If both the woken-up process and the preferred CPU is - * is a frequent rescheduler, then skip the asynchronous - * wakeup, the frequent rescheduler will likely chose this - * task during it's next schedule(): - */ - if (p->policy == SCHED_OTHER) { - tsk = cpu_curr(best_cpu); - if (p->avg_slice + tsk->avg_slice < cacheflush_time) - goto out_no_target; - } - - /* * We know that the preferred CPU has a cache-affine current * process, lets try to find a new idle CPU for the woken-up * process: */ - for (i = 0; i < smp_num_cpus; i++) { + for (i = smp_num_cpus - 1; i >= 0; i--) { cpu = cpu_logical_map(i); + if (cpu == best_cpu) + continue; tsk = cpu_curr(cpu); /* - * We use the first available idle CPU. This creates + * We use the last available idle CPU. This creates * a priority list between idle CPUs, but this is not * a problem. */ @@ -232,26 +219,32 @@ static inline void reschedule_idle(struct task_struct * p, unsigned long flags) /* * No CPU is idle, but maybe this process has enough priority - * to preempt it's preferred CPU. (this is a shortcut): + * to preempt it's preferred CPU. */ tsk = cpu_curr(best_cpu); if (preemption_goodness(tsk, p, best_cpu) > 0) goto send_now; /* - * We should get here rarely - or in the high CPU contention + * We will get here often - or in the high CPU contention * case. No CPU is idle and this process is either lowprio or - * the preferred CPU is highprio. Maybe some other CPU can/must - * be preempted: + * the preferred CPU is highprio. Try to preemt some other CPU + * only if it's RT or if it's iteractive and the preferred + * cpu won't reschedule shortly. */ - for (i = 0; i < smp_num_cpus; i++) { - cpu = cpu_logical_map(i); - tsk = cpu_curr(cpu); - if (preemption_goodness(tsk, p, cpu) > 0) - goto send_now; + if ((p->avg_slice < cacheflush_time && cpu_curr(best_cpu)->avg_slice > cacheflush_time) || + p->policy != SCHED_OTHER) + { + for (i = smp_num_cpus - 1; i >= 0; i--) { + cpu = cpu_logical_map(i); + if (cpu == best_cpu) + continue; + tsk = cpu_curr(cpu); + if (preemption_goodness(tsk, p, cpu) > 0) + goto send_now; + } } -out_no_target: spin_unlock_irqrestore(&runqueue_lock, flags); return; @@ -397,6 +390,9 @@ signed long schedule_timeout(signed long timeout) add_timer(&timer); schedule(); del_timer(&timer); + /* RED-PEN. Timer may be running now on another cpu. + * Pray that process will not exit enough fastly. + */ timeout = expire - jiffies; @@ -460,9 +456,9 @@ tq_scheduler_back: release_kernel_lock(prev, this_cpu); /* Do "administrative" work here while we don't hold any locks */ - if (bh_mask & bh_active) - goto handle_bh; -handle_bh_back: + if (softirq_state[this_cpu].active & softirq_state[this_cpu].mask) + goto handle_softirq; +handle_softirq_back: /* * 'sched_data' is protected by the fact that we can run @@ -581,6 +577,7 @@ still_running_back: if (next->active_mm) BUG(); next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); + enter_lazy_tlb(oldmm, next, this_cpu); } else { if (next->active_mm != mm) BUG(); switch_mm(oldmm, mm, next, this_cpu); @@ -620,9 +617,9 @@ still_running: next = prev; goto still_running_back; -handle_bh: - do_bottom_half(); - goto handle_bh_back; +handle_softirq: + do_softirq(); + goto handle_softirq_back; handle_tq_scheduler: run_task_queue(&tq_scheduler); @@ -1148,7 +1145,6 @@ void daemonize(void) void __init init_idle(void) { - cycles_t t; struct schedule_data * sched_data; sched_data = &aligned_data[smp_processor_id()].schedule_data; @@ -1157,9 +1153,8 @@ void __init init_idle(void) smp_processor_id(), current->pid); del_from_runqueue(current); } - t = get_cycles(); sched_data->curr = current; - sched_data->last_schedule = t; + sched_data->last_schedule = get_cycles(); } void __init sched_init(void) @@ -1184,5 +1179,5 @@ void __init sched_init(void) * The boot idle thread does lazy MMU switching as well: */ atomic_inc(&init_mm.mm_count); + enter_lazy_tlb(&init_mm, current, cpu); } - diff --git a/kernel/softirq.c b/kernel/softirq.c index d184c944e..0f3c23ee9 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -3,68 +3,271 @@ * * Copyright (C) 1992 Linus Torvalds * - * do_bottom_half() runs at normal kernel priority: all interrupts - * enabled. do_bottom_half() is atomic with respect to itself: a - * bottom_half handler need not be re-entrant. - * * Fixed a disable_bh()/enable_bh() race (was causing a console lockup) * due bh_mask_count not atomic handling. Copyright (C) 1998 Andrea Arcangeli + * + * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) */ #include <linux/mm.h> #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/smp_lock.h> +#include <linux/init.h> -#include <asm/io.h> +/* + - No shared variables, all the data are CPU local. + - If a softirq needs serialization, let it serialize itself + by its own spinlocks. + - Even if softirq is serialized, only local cpu is marked for + execution. Hence, we get something sort of weak cpu binding. + Though it is still not clear, will it result in better locality + or will not. + - These softirqs are not masked by global cli() and start_bh_atomic() + (by clear reasons). Hence, old parts of code still using global locks + MUST NOT use softirqs, but insert interfacing routines acquiring + global locks. F.e. look at BHs implementation. -/* intr_count died a painless death... -DaveM */ + Examples: + - NET RX softirq. It is multithreaded and does not require + any global serialization. + - NET TX softirq. It kicks software netdevice queues, hence + it is logically serialized per device, but this serialization + is invisible to common code. + - Tasklets: serialized wrt itself. + - Bottom halves: globally serialized, grr... + */ -atomic_t bh_mask_count[32]; -unsigned long bh_active = 0; -unsigned long bh_mask = 0; -void (*bh_base[32])(void); -/* - * This needs to make sure that only one bottom half handler - * is ever active at a time. We do this without locking by - * doing an atomic increment on the intr_count, and checking - * (nonatomically) against 1. Only if it's 1 do we schedule - * the bottom half. - * - * Note that the non-atomicity of the test (as opposed to the - * actual update) means that the test may fail, and _nobody_ - * runs the handlers if there is a race that makes multiple - * CPU's get here at the same time. That's ok, we'll run them - * next time around. - */ -static inline void run_bottom_halves(void) +struct softirq_state softirq_state[NR_CPUS]; +static struct softirq_action softirq_vec[32]; + +asmlinkage void do_softirq() { - unsigned long active; - void (**bh)(void); - - active = get_active_bhs(); - clear_active_bhs(active); - bh = bh_base; - do { - if (active & 1) - (*bh)(); - bh++; - active >>= 1; - } while (active); + int cpu = smp_processor_id(); + __u32 active, mask; + + if (in_interrupt()) + return; + + local_bh_disable(); + + local_irq_disable(); + mask = softirq_state[cpu].mask; + active = softirq_state[cpu].active & mask; + + if (active) { + struct softirq_action *h; + +restart: + /* Reset active bitmask before enabling irqs */ + softirq_state[cpu].active &= ~active; + + local_irq_enable(); + + h = softirq_vec; + mask &= ~active; + + do { + if (active & 1) + h->action(h); + h++; + active >>= 1; + } while (active); + + local_irq_disable(); + + active = softirq_state[cpu].active; + if ((active &= mask) != 0) + goto retry; + } + + local_bh_enable(); + + /* Leave with locally disabled hard irqs. It is critical to close + * window for infinite recursion, while we help local bh count, + * it protected us. Now we are defenceless. + */ + return; + +retry: + goto restart; } -asmlinkage void do_bottom_half(void) + +static spinlock_t softirq_mask_lock = SPIN_LOCK_UNLOCKED; + +void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&softirq_mask_lock, flags); + softirq_vec[nr].data = data; + softirq_vec[nr].action = action; + + for (i=0; i<NR_CPUS; i++) + softirq_state[i].mask |= (1<<nr); + spin_unlock_irqrestore(&softirq_mask_lock, flags); +} + + +/* Tasklets */ + +struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned; + +static void tasklet_action(struct softirq_action *a) { int cpu = smp_processor_id(); + struct tasklet_struct *list; + + local_irq_disable(); + list = tasklet_vec[cpu].list; + tasklet_vec[cpu].list = NULL; + local_irq_enable(); - if (softirq_trylock(cpu)) { - if (hardirq_trylock(cpu)) { - __sti(); - run_bottom_halves(); - __cli(); - hardirq_endlock(cpu); + while (list != NULL) { + struct tasklet_struct *t = list; + + list = list->next; + + if (tasklet_trylock(t)) { + if (atomic_read(&t->count) == 0) { + clear_bit(TASKLET_STATE_SCHED, &t->state); + + t->func(t->data); + tasklet_unlock(t); + continue; + } + tasklet_unlock(t); } - softirq_endlock(cpu); + local_irq_disable(); + t->next = tasklet_vec[cpu].list; + tasklet_vec[cpu].list = t; + __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); + local_irq_enable(); } } + + + +struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned; + +static void tasklet_hi_action(struct softirq_action *a) +{ + int cpu = smp_processor_id(); + struct tasklet_struct *list; + + local_irq_disable(); + list = tasklet_hi_vec[cpu].list; + tasklet_hi_vec[cpu].list = NULL; + local_irq_enable(); + + while (list != NULL) { + struct tasklet_struct *t = list; + + list = list->next; + + if (tasklet_trylock(t)) { + if (atomic_read(&t->count) == 0) { + clear_bit(TASKLET_STATE_SCHED, &t->state); + + t->func(t->data); + tasklet_unlock(t); + continue; + } + tasklet_unlock(t); + } + local_irq_disable(); + t->next = tasklet_hi_vec[cpu].list; + tasklet_hi_vec[cpu].list = t; + __cpu_raise_softirq(cpu, HI_SOFTIRQ); + local_irq_enable(); + } +} + + +void tasklet_init(struct tasklet_struct *t, + void (*func)(unsigned long), unsigned long data) +{ + t->func = func; + t->data = data; + t->state = 0; + atomic_set(&t->count, 0); +} + +void tasklet_kill(struct tasklet_struct *t) +{ + while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { + if (in_interrupt()) + panic("Attempt to kill tasklet from interrupt\n"); + schedule(); + } + tasklet_unlock_wait(t); +} + + + +/* Old style BHs */ + +static void (*bh_base[32])(void); +struct tasklet_struct bh_task_vec[32]; + +/* BHs are serialized by spinlock global_bh_lock. + + It is still possible to make synchronize_bh() as + spin_unlock_wait(&global_bh_lock). This operation is not used + by kernel now, so that this lock is not made private only + due to wait_on_irq(). + + It can be removed only after auditing all the BHs. + */ +spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED; + +static void bh_action(unsigned long nr) +{ + int cpu = smp_processor_id(); + + if (!spin_trylock(&global_bh_lock)) + goto resched; + + if (!hardirq_trylock(cpu)) + goto resched_unlock; + + if (bh_base[nr]) + bh_base[nr](); + + hardirq_endlock(cpu); + spin_unlock(&global_bh_lock); + return; + +resched_unlock: + spin_unlock(&global_bh_lock); +resched: + mark_bh(nr); +} + +void init_bh(int nr, void (*routine)(void)) +{ + bh_base[nr] = routine; + mb(); +} + +void remove_bh(int nr) +{ + tasklet_kill(bh_task_vec+nr); + bh_base[nr] = NULL; +} + +void __init softirq_init() +{ + int i; + + for (i=0; i<32; i++) + tasklet_init(bh_task_vec+i, bh_action, i); + + open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); + open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); +} + + diff --git a/kernel/sys.c b/kernel/sys.c index 57940edea..e3f7c5e2b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -908,6 +908,8 @@ asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim) ? -EFAULT : 0; } +#if !defined(__ia64__) + /* * Back compatibility for getrlimit. Needed for some apps. */ @@ -926,6 +928,7 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim) return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; } +#endif asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 50ba37060..9a6a7a74d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -111,17 +111,8 @@ static int proc_sys_permission(struct inode *, int); struct file_operations proc_sys_file_operations = { - NULL, /* lseek */ - proc_readsys, /* read */ - proc_writesys, /* write */ - NULL, /* readdir */ - NULL, /* poll */ - NULL, /* ioctl */ - NULL, /* mmap */ - NULL, /* no special open code */ - NULL, /* no special flush code */ - NULL, /* no special release code */ - NULL /* can't fsync */ + read: proc_readsys, + write: proc_writesys, }; struct inode_operations proc_sys_inode_operations = @@ -138,9 +129,6 @@ struct inode_operations proc_sys_inode_operations = NULL, /* rename */ NULL, /* readlink */ NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ NULL, /* truncate */ proc_sys_permission, /* permission */ NULL /* revalidate */ diff --git a/kernel/timer.c b/kernel/timer.c index fccf7faa7..f087d239f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -105,13 +105,15 @@ static struct timer_vec * const tvecs[] = { static unsigned long timer_jiffies = 0; -static inline void insert_timer(struct timer_list *timer, - struct timer_list **vec, int idx) +static inline void insert_timer(struct timer_list *timer, struct timer_list **vec) { - if ((timer->next = vec[idx])) - vec[idx]->prev = timer; - vec[idx] = timer; - timer->prev = (struct timer_list *)&vec[idx]; + struct timer_list *next = *vec; + + timer->next = next; + if (next) + next->prev = timer; + *vec = timer; + timer->prev = (struct timer_list *)vec; } static inline void internal_add_timer(struct timer_list *timer) @@ -121,31 +123,34 @@ static inline void internal_add_timer(struct timer_list *timer) */ unsigned long expires = timer->expires; unsigned long idx = expires - timer_jiffies; + struct timer_list ** vec; if (idx < TVR_SIZE) { int i = expires & TVR_MASK; - insert_timer(timer, tv1.vec, i); + vec = tv1.vec + i; } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { int i = (expires >> TVR_BITS) & TVN_MASK; - insert_timer(timer, tv2.vec, i); + vec = tv2.vec + i; } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; - insert_timer(timer, tv3.vec, i); + vec = tv3.vec + i; } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; - insert_timer(timer, tv4.vec, i); + vec = tv4.vec + i; } else if ((signed long) idx < 0) { /* can happen if you add a timer with expires == jiffies, * or you set a timer to go off in the past */ - insert_timer(timer, tv1.vec, tv1.index); + vec = tv1.vec + tv1.index; } else if (idx <= 0xffffffffUL) { int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; - insert_timer(timer, tv5.vec, i); + vec = tv5.vec + i; } else { /* Can only get here on architectures with 64-bit jiffies */ timer->next = timer->prev = timer; + return; } + insert_timer(timer, vec); } spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED; @@ -181,15 +186,17 @@ static inline int detach_timer(struct timer_list *timer) return 0; } -void mod_timer(struct timer_list *timer, unsigned long expires) +int mod_timer(struct timer_list *timer, unsigned long expires) { + int ret; unsigned long flags; spin_lock_irqsave(&timerlist_lock, flags); timer->expires = expires; - detach_timer(timer); + ret = detach_timer(timer); internal_add_timer(timer); spin_unlock_irqrestore(&timerlist_lock, flags); + return ret; } int del_timer(struct timer_list * timer) @@ -204,6 +211,39 @@ int del_timer(struct timer_list * timer) return ret; } +#ifdef __SMP__ +/* + * SMP specific function to delete periodic timer. + * Caller must disable by some means restarting the timer + * for new. Upon exit the timer is not queued and handler is not running + * on any CPU. It returns number of times, which timer was deleted + * (for reference counting). + */ + +int del_timer_sync(struct timer_list * timer) +{ + int ret = 0; + + for (;;) { + unsigned long flags; + int running; + + spin_lock_irqsave(&timerlist_lock, flags); + ret += detach_timer(timer); + timer->next = timer->prev = 0; + running = timer->running; + spin_unlock_irqrestore(&timerlist_lock, flags); + + if (!running) + return ret; + timer_synchronize(timer); + } + + return ret; +} +#endif + + static inline void cascade_timers(struct timer_vec *tv) { /* cascade all the timers from tv up one level */ @@ -238,6 +278,7 @@ static inline void run_timer_list(void) unsigned long data = timer->data; detach_timer(timer); timer->next = timer->prev = NULL; + timer_set_running(timer); spin_unlock_irq(&timerlist_lock); fn(data); spin_lock_irq(&timerlist_lock); @@ -553,8 +594,7 @@ static unsigned long count_active_tasks(void) read_lock(&tasklist_lock); for_each_task(p) { if ((p->state == TASK_RUNNING || - (p->state & TASK_UNINTERRUPTIBLE) || - (p->state & TASK_SWAPPING))) + (p->state & TASK_UNINTERRUPTIBLE))) nr += FIXED_1; } read_unlock(&tasklist_lock); |