summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-02-23 00:40:54 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-02-23 00:40:54 +0000
commit529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree78f1c0b805f5656aa7b0417a043c5346f700a2cf /kernel
parent0bd079751d25808d1972baee5c4eaa1db2227257 (diff)
Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c
driver due to the Origin A64 hacks.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/exec_domain.c5
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/itimer.c10
-rw-r--r--kernel/ksyms.c32
-rw-r--r--kernel/panic.c6
-rw-r--r--kernel/pm.c170
-rw-r--r--kernel/sched.c69
-rw-r--r--kernel/softirq.c291
-rw-r--r--kernel/sys.c3
-rw-r--r--kernel/sysctl.c16
-rw-r--r--kernel/timer.c72
12 files changed, 551 insertions, 137 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 270692eb2..cce15a524 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -29,6 +29,14 @@ ifeq ($(CONFIG_MODULES),y)
OX_OBJS += ksyms.o
endif
+ifdef CONFIG_ACPI
+OX_OBJS += pm.o
+else
+ ifdef CONFIG_APM
+ OX_OBJS += pm.o
+ endif
+endif
+
CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
include $(TOPDIR)/Rules.make
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c31678a4e..1ee1eee4d 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -103,12 +103,11 @@ asmlinkage long sys_personality(unsigned long personality)
unsigned long old_personality;
int ret;
- lock_kernel();
- ret = current->personality;
if (personality == 0xffffffff)
- goto out;
+ return current->personality;
ret = -EINVAL;
+ lock_kernel();
it = lookup_exec_domain(personality);
if (!it)
goto out;
diff --git a/kernel/exit.c b/kernel/exit.c
index 80280e821..65d72df43 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -247,6 +247,7 @@ struct mm_struct * start_lazy_tlb(void)
current->mm = NULL;
/* active_mm is still 'mm' */
atomic_inc(&mm->mm_count);
+ enter_lazy_tlb(mm, current, smp_processor_id());
return mm;
}
@@ -275,6 +276,7 @@ static inline void __exit_mm(struct task_struct * tsk)
mm_release();
if (mm != tsk->active_mm) BUG();
tsk->mm = NULL;
+ enter_lazy_tlb(mm, current, smp_processor_id());
mmput(mm);
}
}
@@ -393,9 +395,7 @@ NORET_TYPE void do_exit(long code)
if (!tsk->pid)
panic("Attempted to kill the idle task!");
tsk->flags |= PF_EXITING;
- start_bh_atomic();
- del_timer(&tsk->real_timer);
- end_bh_atomic();
+ del_timer_sync(&tsk->real_timer);
lock_kernel();
fake_volatile:
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 7d38ac1ac..6c38477be 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -48,7 +48,9 @@ int do_getitimer(int which, struct itimerval *value)
case ITIMER_REAL:
interval = current->it_real_incr;
val = 0;
- start_bh_atomic();
+ /*
+ * FIXME! This needs to be atomic, in case the kernel timer happens!
+ */
if (timer_pending(&current->real_timer)) {
val = current->real_timer.expires - jiffies;
@@ -56,7 +58,6 @@ int do_getitimer(int which, struct itimerval *value)
if ((long) val <= 0)
val = 1;
}
- end_bh_atomic();
break;
case ITIMER_VIRTUAL:
val = current->it_virt_value;
@@ -102,6 +103,7 @@ void it_real_fn(unsigned long __data)
p->real_timer.expires = jiffies + interval;
add_timer(&p->real_timer);
}
+ timer_exit(&p->real_timer);
}
int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
@@ -115,9 +117,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
return k;
switch (which) {
case ITIMER_REAL:
- start_bh_atomic();
- del_timer(&current->real_timer);
- end_bh_atomic();
+ del_timer_sync(&current->real_timer);
current->it_real_value = j;
current->it_real_incr = i;
if (!j)
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index dea3c38a5..bd74f5413 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -151,6 +151,7 @@ EXPORT_SYMBOL(d_instantiate);
EXPORT_SYMBOL(d_alloc);
EXPORT_SYMBOL(d_lookup);
EXPORT_SYMBOL(d_path);
+EXPORT_SYMBOL(mark_buffer_dirty);
EXPORT_SYMBOL(__mark_buffer_dirty);
EXPORT_SYMBOL(__mark_inode_dirty);
EXPORT_SYMBOL(free_kiovec);
@@ -163,7 +164,7 @@ EXPORT_SYMBOL(filp_close);
EXPORT_SYMBOL(put_filp);
EXPORT_SYMBOL(files_lock);
EXPORT_SYMBOL(check_disk_change);
-EXPORT_SYMBOL(invalidate_buffers);
+EXPORT_SYMBOL(__invalidate_buffers);
EXPORT_SYMBOL(invalidate_inodes);
EXPORT_SYMBOL(invalidate_inode_pages);
EXPORT_SYMBOL(truncate_inode_pages);
@@ -183,11 +184,12 @@ EXPORT_SYMBOL(__bforget);
EXPORT_SYMBOL(ll_rw_block);
EXPORT_SYMBOL(__wait_on_buffer);
EXPORT_SYMBOL(___wait_on_page);
-EXPORT_SYMBOL(block_read_full_page);
EXPORT_SYMBOL(block_write_full_page);
-EXPORT_SYMBOL(block_write_partial_page);
-EXPORT_SYMBOL(block_write_cont_page);
-EXPORT_SYMBOL(block_write_zero_range);
+EXPORT_SYMBOL(block_read_full_page);
+EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(cont_prepare_write);
+EXPORT_SYMBOL(generic_commit_write);
+EXPORT_SYMBOL(generic_block_bmap);
EXPORT_SYMBOL(generic_file_read);
EXPORT_SYMBOL(do_generic_file_read);
EXPORT_SYMBOL(generic_file_write);
@@ -222,6 +224,7 @@ EXPORT_SYMBOL(vfs_readlink);
EXPORT_SYMBOL(vfs_follow_link);
EXPORT_SYMBOL(page_readlink);
EXPORT_SYMBOL(page_follow_link);
+EXPORT_SYMBOL(page_symlink_inode_operations);
EXPORT_SYMBOL(block_symlink);
/* for stackable file systems (lofs, wrapfs, etc.) */
@@ -263,8 +266,6 @@ EXPORT_SYMBOL(ioctl_by_bdev);
EXPORT_SYMBOL(gendisk_head);
EXPORT_SYMBOL(grok_partitions);
EXPORT_SYMBOL(register_disk);
-EXPORT_SYMBOL(unplug_device);
-EXPORT_SYMBOL(make_request);
EXPORT_SYMBOL(tq_disk);
EXPORT_SYMBOL(init_buffer);
EXPORT_SYMBOL(refile_buffer);
@@ -317,12 +318,11 @@ EXPORT_SYMBOL(request_irq);
EXPORT_SYMBOL(free_irq);
EXPORT_SYMBOL(probe_irq_on);
EXPORT_SYMBOL(probe_irq_off);
-EXPORT_SYMBOL(bh_active);
-EXPORT_SYMBOL(bh_mask);
-EXPORT_SYMBOL(bh_mask_count);
-EXPORT_SYMBOL(bh_base);
EXPORT_SYMBOL(add_timer);
EXPORT_SYMBOL(del_timer);
+#ifdef __SMP__
+EXPORT_SYMBOL(del_timer_sync);
+#endif
EXPORT_SYMBOL(mod_timer);
EXPORT_SYMBOL(tq_timer);
EXPORT_SYMBOL(tq_immediate);
@@ -369,7 +369,9 @@ EXPORT_SYMBOL(schedule_timeout);
EXPORT_SYMBOL(jiffies);
EXPORT_SYMBOL(xtime);
EXPORT_SYMBOL(do_gettimeofday);
+#ifndef __ia64__
EXPORT_SYMBOL(loops_per_sec);
+#endif
EXPORT_SYMBOL(kstat);
/* misc */
@@ -380,7 +382,6 @@ EXPORT_SYMBOL(vsprintf);
EXPORT_SYMBOL(kdevname);
EXPORT_SYMBOL(bdevname);
EXPORT_SYMBOL(cdevname);
-EXPORT_SYMBOL(partition_name); /* md.c only */
EXPORT_SYMBOL(simple_strtoul);
EXPORT_SYMBOL(system_utsname); /* UTS data */
EXPORT_SYMBOL(uts_sem); /* UTS semaphore */
@@ -460,5 +461,12 @@ EXPORT_SYMBOL(get_fast_time);
/* library functions */
EXPORT_SYMBOL(strnicmp);
+/* software interrupts */
+EXPORT_SYMBOL(tasklet_hi_vec);
+EXPORT_SYMBOL(bh_task_vec);
+EXPORT_SYMBOL(init_bh);
+EXPORT_SYMBOL(remove_bh);
+
/* init task, for moving kthread roots - ought to export a function ?? */
+
EXPORT_SYMBOL(init_task_union);
diff --git a/kernel/panic.c b/kernel/panic.c
index 8a68b3ad5..e040ee454 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -41,10 +41,10 @@ NORET_TYPE void panic(const char * fmt, ...)
vsprintf(buf, fmt, args);
va_end(args);
printk(KERN_EMERG "Kernel panic: %s\n",buf);
- if (current == init_tasks[0])
- printk(KERN_EMERG "In swapper task - not syncing\n");
- else if (in_interrupt())
+ if (in_interrupt())
printk(KERN_EMERG "In interrupt handler - not syncing\n");
+ else if (!current->pid)
+ printk(KERN_EMERG "In idle task - not syncing\n");
else
sys_sync();
diff --git a/kernel/pm.c b/kernel/pm.c
new file mode 100644
index 000000000..26811bff2
--- /dev/null
+++ b/kernel/pm.c
@@ -0,0 +1,170 @@
+/*
+ * pm.c - Power management interface
+ *
+ * Copyright (C) 2000 Andrew Henroid
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/pm.h>
+
+int pm_active = 0;
+
+static spinlock_t pm_devs_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(pm_devs);
+
+/*
+ * Register a device with power management
+ */
+struct pm_dev *pm_register(pm_dev_t type,
+ unsigned long id,
+ pm_callback callback)
+{
+ struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL);
+ if (dev) {
+ unsigned long flags;
+
+ memset(dev, 0, sizeof(*dev));
+ dev->type = type;
+ dev->id = id;
+ dev->callback = callback;
+
+ spin_lock_irqsave(&pm_devs_lock, flags);
+ list_add(&dev->entry, &pm_devs);
+ spin_unlock_irqrestore(&pm_devs_lock, flags);
+ }
+ return dev;
+}
+
+/*
+ * Unregister a device with power management
+ */
+void pm_unregister(struct pm_dev *dev)
+{
+ if (dev) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&pm_devs_lock, flags);
+ list_del(&dev->entry);
+ spin_unlock_irqrestore(&pm_devs_lock, flags);
+
+ kfree(dev);
+ }
+}
+
+/*
+ * Unregister all devices with matching callback
+ */
+void pm_unregister_all(pm_callback callback)
+{
+ struct list_head *entry;
+
+ if (!callback)
+ return;
+
+ entry = pm_devs.next;
+ while (entry != &pm_devs) {
+ struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+ entry = entry->next;
+ if (dev->callback == callback)
+ pm_unregister(dev);
+ }
+}
+
+/*
+ * Send request to an individual device
+ */
+static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
+{
+ int status = 0;
+ int next_state;
+ switch (rqst) {
+ case PM_SUSPEND:
+ case PM_RESUME:
+ next_state = (int) data;
+ if (dev->state != next_state) {
+ if (dev->callback)
+ status = (*dev->callback)(dev, rqst, data);
+ if (!status)
+ dev->state = next_state;
+ }
+ break;
+ default:
+ if (dev->callback)
+ status = (*dev->callback)(dev, rqst, data);
+ break;
+ }
+ return status;
+}
+
+/*
+ * Undo incomplete request
+ */
+static void pm_undo_request(struct pm_dev *last, pm_request_t undo, void *data)
+{
+ struct list_head *entry = last->entry.prev;
+ while (entry != &pm_devs) {
+ struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+ if (dev->callback)
+ pm_send(dev, undo, data);
+ entry = entry->prev;
+ }
+}
+
+/*
+ * Send a request to all devices
+ */
+int pm_send_request(pm_request_t rqst, void *data)
+{
+ struct list_head *entry = pm_devs.next;
+ while (entry != &pm_devs) {
+ struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+ if (dev->callback) {
+ int status = pm_send(dev, rqst, data);
+ if (status) {
+ /* resume devices on failed suspend request */
+ if (rqst == PM_SUSPEND)
+ pm_undo_request(dev, PM_RESUME, 0);
+ return status;
+ }
+ }
+ entry = entry->next;
+ }
+ return 0;
+}
+
+/*
+ * Find a device
+ */
+struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from)
+{
+ struct list_head *entry = from ? from->entry.next:pm_devs.next;
+ while (entry != &pm_devs) {
+ struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+ if (type == PM_UNKNOWN_DEV || dev->type == type)
+ return dev;
+ entry = entry->next;
+ }
+ return 0;
+}
+
+EXPORT_SYMBOL(pm_register);
+EXPORT_SYMBOL(pm_unregister);
+EXPORT_SYMBOL(pm_unregister_all);
+EXPORT_SYMBOL(pm_send_request);
+EXPORT_SYMBOL(pm_find);
diff --git a/kernel/sched.c b/kernel/sched.c
index ce72ecc7b..03c05e7c3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -199,30 +199,17 @@ static inline void reschedule_idle(struct task_struct * p, unsigned long flags)
goto send_now;
/*
- * The only heuristics - we use the tsk->avg_slice value
- * to detect 'frequent reschedulers'.
- *
- * If both the woken-up process and the preferred CPU is
- * is a frequent rescheduler, then skip the asynchronous
- * wakeup, the frequent rescheduler will likely chose this
- * task during it's next schedule():
- */
- if (p->policy == SCHED_OTHER) {
- tsk = cpu_curr(best_cpu);
- if (p->avg_slice + tsk->avg_slice < cacheflush_time)
- goto out_no_target;
- }
-
- /*
* We know that the preferred CPU has a cache-affine current
* process, lets try to find a new idle CPU for the woken-up
* process:
*/
- for (i = 0; i < smp_num_cpus; i++) {
+ for (i = smp_num_cpus - 1; i >= 0; i--) {
cpu = cpu_logical_map(i);
+ if (cpu == best_cpu)
+ continue;
tsk = cpu_curr(cpu);
/*
- * We use the first available idle CPU. This creates
+ * We use the last available idle CPU. This creates
* a priority list between idle CPUs, but this is not
* a problem.
*/
@@ -232,26 +219,32 @@ static inline void reschedule_idle(struct task_struct * p, unsigned long flags)
/*
* No CPU is idle, but maybe this process has enough priority
- * to preempt it's preferred CPU. (this is a shortcut):
+ * to preempt it's preferred CPU.
*/
tsk = cpu_curr(best_cpu);
if (preemption_goodness(tsk, p, best_cpu) > 0)
goto send_now;
/*
- * We should get here rarely - or in the high CPU contention
+ * We will get here often - or in the high CPU contention
* case. No CPU is idle and this process is either lowprio or
- * the preferred CPU is highprio. Maybe some other CPU can/must
- * be preempted:
+ * the preferred CPU is highprio. Try to preemt some other CPU
+ * only if it's RT or if it's iteractive and the preferred
+ * cpu won't reschedule shortly.
*/
- for (i = 0; i < smp_num_cpus; i++) {
- cpu = cpu_logical_map(i);
- tsk = cpu_curr(cpu);
- if (preemption_goodness(tsk, p, cpu) > 0)
- goto send_now;
+ if ((p->avg_slice < cacheflush_time && cpu_curr(best_cpu)->avg_slice > cacheflush_time) ||
+ p->policy != SCHED_OTHER)
+ {
+ for (i = smp_num_cpus - 1; i >= 0; i--) {
+ cpu = cpu_logical_map(i);
+ if (cpu == best_cpu)
+ continue;
+ tsk = cpu_curr(cpu);
+ if (preemption_goodness(tsk, p, cpu) > 0)
+ goto send_now;
+ }
}
-out_no_target:
spin_unlock_irqrestore(&runqueue_lock, flags);
return;
@@ -397,6 +390,9 @@ signed long schedule_timeout(signed long timeout)
add_timer(&timer);
schedule();
del_timer(&timer);
+ /* RED-PEN. Timer may be running now on another cpu.
+ * Pray that process will not exit enough fastly.
+ */
timeout = expire - jiffies;
@@ -460,9 +456,9 @@ tq_scheduler_back:
release_kernel_lock(prev, this_cpu);
/* Do "administrative" work here while we don't hold any locks */
- if (bh_mask & bh_active)
- goto handle_bh;
-handle_bh_back:
+ if (softirq_state[this_cpu].active & softirq_state[this_cpu].mask)
+ goto handle_softirq;
+handle_softirq_back:
/*
* 'sched_data' is protected by the fact that we can run
@@ -581,6 +577,7 @@ still_running_back:
if (next->active_mm) BUG();
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
+ enter_lazy_tlb(oldmm, next, this_cpu);
} else {
if (next->active_mm != mm) BUG();
switch_mm(oldmm, mm, next, this_cpu);
@@ -620,9 +617,9 @@ still_running:
next = prev;
goto still_running_back;
-handle_bh:
- do_bottom_half();
- goto handle_bh_back;
+handle_softirq:
+ do_softirq();
+ goto handle_softirq_back;
handle_tq_scheduler:
run_task_queue(&tq_scheduler);
@@ -1148,7 +1145,6 @@ void daemonize(void)
void __init init_idle(void)
{
- cycles_t t;
struct schedule_data * sched_data;
sched_data = &aligned_data[smp_processor_id()].schedule_data;
@@ -1157,9 +1153,8 @@ void __init init_idle(void)
smp_processor_id(), current->pid);
del_from_runqueue(current);
}
- t = get_cycles();
sched_data->curr = current;
- sched_data->last_schedule = t;
+ sched_data->last_schedule = get_cycles();
}
void __init sched_init(void)
@@ -1184,5 +1179,5 @@ void __init sched_init(void)
* The boot idle thread does lazy MMU switching as well:
*/
atomic_inc(&init_mm.mm_count);
+ enter_lazy_tlb(&init_mm, current, cpu);
}
-
diff --git a/kernel/softirq.c b/kernel/softirq.c
index d184c944e..0f3c23ee9 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -3,68 +3,271 @@
*
* Copyright (C) 1992 Linus Torvalds
*
- * do_bottom_half() runs at normal kernel priority: all interrupts
- * enabled. do_bottom_half() is atomic with respect to itself: a
- * bottom_half handler need not be re-entrant.
- *
* Fixed a disable_bh()/enable_bh() race (was causing a console lockup)
* due bh_mask_count not atomic handling. Copyright (C) 1998 Andrea Arcangeli
+ *
+ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
*/
#include <linux/mm.h>
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
#include <linux/smp_lock.h>
+#include <linux/init.h>
-#include <asm/io.h>
+/*
+ - No shared variables, all the data are CPU local.
+ - If a softirq needs serialization, let it serialize itself
+ by its own spinlocks.
+ - Even if softirq is serialized, only local cpu is marked for
+ execution. Hence, we get something sort of weak cpu binding.
+ Though it is still not clear, will it result in better locality
+ or will not.
+ - These softirqs are not masked by global cli() and start_bh_atomic()
+ (by clear reasons). Hence, old parts of code still using global locks
+ MUST NOT use softirqs, but insert interfacing routines acquiring
+ global locks. F.e. look at BHs implementation.
-/* intr_count died a painless death... -DaveM */
+ Examples:
+ - NET RX softirq. It is multithreaded and does not require
+ any global serialization.
+ - NET TX softirq. It kicks software netdevice queues, hence
+ it is logically serialized per device, but this serialization
+ is invisible to common code.
+ - Tasklets: serialized wrt itself.
+ - Bottom halves: globally serialized, grr...
+ */
-atomic_t bh_mask_count[32];
-unsigned long bh_active = 0;
-unsigned long bh_mask = 0;
-void (*bh_base[32])(void);
-/*
- * This needs to make sure that only one bottom half handler
- * is ever active at a time. We do this without locking by
- * doing an atomic increment on the intr_count, and checking
- * (nonatomically) against 1. Only if it's 1 do we schedule
- * the bottom half.
- *
- * Note that the non-atomicity of the test (as opposed to the
- * actual update) means that the test may fail, and _nobody_
- * runs the handlers if there is a race that makes multiple
- * CPU's get here at the same time. That's ok, we'll run them
- * next time around.
- */
-static inline void run_bottom_halves(void)
+struct softirq_state softirq_state[NR_CPUS];
+static struct softirq_action softirq_vec[32];
+
+asmlinkage void do_softirq()
{
- unsigned long active;
- void (**bh)(void);
-
- active = get_active_bhs();
- clear_active_bhs(active);
- bh = bh_base;
- do {
- if (active & 1)
- (*bh)();
- bh++;
- active >>= 1;
- } while (active);
+ int cpu = smp_processor_id();
+ __u32 active, mask;
+
+ if (in_interrupt())
+ return;
+
+ local_bh_disable();
+
+ local_irq_disable();
+ mask = softirq_state[cpu].mask;
+ active = softirq_state[cpu].active & mask;
+
+ if (active) {
+ struct softirq_action *h;
+
+restart:
+ /* Reset active bitmask before enabling irqs */
+ softirq_state[cpu].active &= ~active;
+
+ local_irq_enable();
+
+ h = softirq_vec;
+ mask &= ~active;
+
+ do {
+ if (active & 1)
+ h->action(h);
+ h++;
+ active >>= 1;
+ } while (active);
+
+ local_irq_disable();
+
+ active = softirq_state[cpu].active;
+ if ((active &= mask) != 0)
+ goto retry;
+ }
+
+ local_bh_enable();
+
+ /* Leave with locally disabled hard irqs. It is critical to close
+ * window for infinite recursion, while we help local bh count,
+ * it protected us. Now we are defenceless.
+ */
+ return;
+
+retry:
+ goto restart;
}
-asmlinkage void do_bottom_half(void)
+
+static spinlock_t softirq_mask_lock = SPIN_LOCK_UNLOCKED;
+
+void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+{
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&softirq_mask_lock, flags);
+ softirq_vec[nr].data = data;
+ softirq_vec[nr].action = action;
+
+ for (i=0; i<NR_CPUS; i++)
+ softirq_state[i].mask |= (1<<nr);
+ spin_unlock_irqrestore(&softirq_mask_lock, flags);
+}
+
+
+/* Tasklets */
+
+struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned;
+
+static void tasklet_action(struct softirq_action *a)
{
int cpu = smp_processor_id();
+ struct tasklet_struct *list;
+
+ local_irq_disable();
+ list = tasklet_vec[cpu].list;
+ tasklet_vec[cpu].list = NULL;
+ local_irq_enable();
- if (softirq_trylock(cpu)) {
- if (hardirq_trylock(cpu)) {
- __sti();
- run_bottom_halves();
- __cli();
- hardirq_endlock(cpu);
+ while (list != NULL) {
+ struct tasklet_struct *t = list;
+
+ list = list->next;
+
+ if (tasklet_trylock(t)) {
+ if (atomic_read(&t->count) == 0) {
+ clear_bit(TASKLET_STATE_SCHED, &t->state);
+
+ t->func(t->data);
+ tasklet_unlock(t);
+ continue;
+ }
+ tasklet_unlock(t);
}
- softirq_endlock(cpu);
+ local_irq_disable();
+ t->next = tasklet_vec[cpu].list;
+ tasklet_vec[cpu].list = t;
+ __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+ local_irq_enable();
}
}
+
+
+
+struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned;
+
+static void tasklet_hi_action(struct softirq_action *a)
+{
+ int cpu = smp_processor_id();
+ struct tasklet_struct *list;
+
+ local_irq_disable();
+ list = tasklet_hi_vec[cpu].list;
+ tasklet_hi_vec[cpu].list = NULL;
+ local_irq_enable();
+
+ while (list != NULL) {
+ struct tasklet_struct *t = list;
+
+ list = list->next;
+
+ if (tasklet_trylock(t)) {
+ if (atomic_read(&t->count) == 0) {
+ clear_bit(TASKLET_STATE_SCHED, &t->state);
+
+ t->func(t->data);
+ tasklet_unlock(t);
+ continue;
+ }
+ tasklet_unlock(t);
+ }
+ local_irq_disable();
+ t->next = tasklet_hi_vec[cpu].list;
+ tasklet_hi_vec[cpu].list = t;
+ __cpu_raise_softirq(cpu, HI_SOFTIRQ);
+ local_irq_enable();
+ }
+}
+
+
+void tasklet_init(struct tasklet_struct *t,
+ void (*func)(unsigned long), unsigned long data)
+{
+ t->func = func;
+ t->data = data;
+ t->state = 0;
+ atomic_set(&t->count, 0);
+}
+
+void tasklet_kill(struct tasklet_struct *t)
+{
+ while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+ if (in_interrupt())
+ panic("Attempt to kill tasklet from interrupt\n");
+ schedule();
+ }
+ tasklet_unlock_wait(t);
+}
+
+
+
+/* Old style BHs */
+
+static void (*bh_base[32])(void);
+struct tasklet_struct bh_task_vec[32];
+
+/* BHs are serialized by spinlock global_bh_lock.
+
+ It is still possible to make synchronize_bh() as
+ spin_unlock_wait(&global_bh_lock). This operation is not used
+ by kernel now, so that this lock is not made private only
+ due to wait_on_irq().
+
+ It can be removed only after auditing all the BHs.
+ */
+spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED;
+
+static void bh_action(unsigned long nr)
+{
+ int cpu = smp_processor_id();
+
+ if (!spin_trylock(&global_bh_lock))
+ goto resched;
+
+ if (!hardirq_trylock(cpu))
+ goto resched_unlock;
+
+ if (bh_base[nr])
+ bh_base[nr]();
+
+ hardirq_endlock(cpu);
+ spin_unlock(&global_bh_lock);
+ return;
+
+resched_unlock:
+ spin_unlock(&global_bh_lock);
+resched:
+ mark_bh(nr);
+}
+
+void init_bh(int nr, void (*routine)(void))
+{
+ bh_base[nr] = routine;
+ mb();
+}
+
+void remove_bh(int nr)
+{
+ tasklet_kill(bh_task_vec+nr);
+ bh_base[nr] = NULL;
+}
+
+void __init softirq_init()
+{
+ int i;
+
+ for (i=0; i<32; i++)
+ tasklet_init(bh_task_vec+i, bh_action, i);
+
+ open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
+ open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+}
+
+
diff --git a/kernel/sys.c b/kernel/sys.c
index 57940edea..e3f7c5e2b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -908,6 +908,8 @@ asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
? -EFAULT : 0;
}
+#if !defined(__ia64__)
+
/*
* Back compatibility for getrlimit. Needed for some apps.
*/
@@ -926,6 +928,7 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
}
+#endif
asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
{
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 50ba37060..9a6a7a74d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -111,17 +111,8 @@ static int proc_sys_permission(struct inode *, int);
struct file_operations proc_sys_file_operations =
{
- NULL, /* lseek */
- proc_readsys, /* read */
- proc_writesys, /* write */
- NULL, /* readdir */
- NULL, /* poll */
- NULL, /* ioctl */
- NULL, /* mmap */
- NULL, /* no special open code */
- NULL, /* no special flush code */
- NULL, /* no special release code */
- NULL /* can't fsync */
+ read: proc_readsys,
+ write: proc_writesys,
};
struct inode_operations proc_sys_inode_operations =
@@ -138,9 +129,6 @@ struct inode_operations proc_sys_inode_operations =
NULL, /* rename */
NULL, /* readlink */
NULL, /* follow_link */
- NULL, /* get_block */
- NULL, /* readpage */
- NULL, /* writepage */
NULL, /* truncate */
proc_sys_permission, /* permission */
NULL /* revalidate */
diff --git a/kernel/timer.c b/kernel/timer.c
index fccf7faa7..f087d239f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -105,13 +105,15 @@ static struct timer_vec * const tvecs[] = {
static unsigned long timer_jiffies = 0;
-static inline void insert_timer(struct timer_list *timer,
- struct timer_list **vec, int idx)
+static inline void insert_timer(struct timer_list *timer, struct timer_list **vec)
{
- if ((timer->next = vec[idx]))
- vec[idx]->prev = timer;
- vec[idx] = timer;
- timer->prev = (struct timer_list *)&vec[idx];
+ struct timer_list *next = *vec;
+
+ timer->next = next;
+ if (next)
+ next->prev = timer;
+ *vec = timer;
+ timer->prev = (struct timer_list *)vec;
}
static inline void internal_add_timer(struct timer_list *timer)
@@ -121,31 +123,34 @@ static inline void internal_add_timer(struct timer_list *timer)
*/
unsigned long expires = timer->expires;
unsigned long idx = expires - timer_jiffies;
+ struct timer_list ** vec;
if (idx < TVR_SIZE) {
int i = expires & TVR_MASK;
- insert_timer(timer, tv1.vec, i);
+ vec = tv1.vec + i;
} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
int i = (expires >> TVR_BITS) & TVN_MASK;
- insert_timer(timer, tv2.vec, i);
+ vec = tv2.vec + i;
} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
- insert_timer(timer, tv3.vec, i);
+ vec = tv3.vec + i;
} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
- insert_timer(timer, tv4.vec, i);
+ vec = tv4.vec + i;
} else if ((signed long) idx < 0) {
/* can happen if you add a timer with expires == jiffies,
* or you set a timer to go off in the past
*/
- insert_timer(timer, tv1.vec, tv1.index);
+ vec = tv1.vec + tv1.index;
} else if (idx <= 0xffffffffUL) {
int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
- insert_timer(timer, tv5.vec, i);
+ vec = tv5.vec + i;
} else {
/* Can only get here on architectures with 64-bit jiffies */
timer->next = timer->prev = timer;
+ return;
}
+ insert_timer(timer, vec);
}
spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
@@ -181,15 +186,17 @@ static inline int detach_timer(struct timer_list *timer)
return 0;
}
-void mod_timer(struct timer_list *timer, unsigned long expires)
+int mod_timer(struct timer_list *timer, unsigned long expires)
{
+ int ret;
unsigned long flags;
spin_lock_irqsave(&timerlist_lock, flags);
timer->expires = expires;
- detach_timer(timer);
+ ret = detach_timer(timer);
internal_add_timer(timer);
spin_unlock_irqrestore(&timerlist_lock, flags);
+ return ret;
}
int del_timer(struct timer_list * timer)
@@ -204,6 +211,39 @@ int del_timer(struct timer_list * timer)
return ret;
}
+#ifdef __SMP__
+/*
+ * SMP specific function to delete periodic timer.
+ * Caller must disable by some means restarting the timer
+ * for new. Upon exit the timer is not queued and handler is not running
+ * on any CPU. It returns number of times, which timer was deleted
+ * (for reference counting).
+ */
+
+int del_timer_sync(struct timer_list * timer)
+{
+ int ret = 0;
+
+ for (;;) {
+ unsigned long flags;
+ int running;
+
+ spin_lock_irqsave(&timerlist_lock, flags);
+ ret += detach_timer(timer);
+ timer->next = timer->prev = 0;
+ running = timer->running;
+ spin_unlock_irqrestore(&timerlist_lock, flags);
+
+ if (!running)
+ return ret;
+ timer_synchronize(timer);
+ }
+
+ return ret;
+}
+#endif
+
+
static inline void cascade_timers(struct timer_vec *tv)
{
/* cascade all the timers from tv up one level */
@@ -238,6 +278,7 @@ static inline void run_timer_list(void)
unsigned long data = timer->data;
detach_timer(timer);
timer->next = timer->prev = NULL;
+ timer_set_running(timer);
spin_unlock_irq(&timerlist_lock);
fn(data);
spin_lock_irq(&timerlist_lock);
@@ -553,8 +594,7 @@ static unsigned long count_active_tasks(void)
read_lock(&tasklist_lock);
for_each_task(p) {
if ((p->state == TASK_RUNNING ||
- (p->state & TASK_UNINTERRUPTIBLE) ||
- (p->state & TASK_SWAPPING)))
+ (p->state & TASK_UNINTERRUPTIBLE)))
nr += FIXED_1;
}
read_unlock(&tasklist_lock);