Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c

driver due to the Origin A64 hacks.
author: Ralf Baechle <ralf@linux-mips.org> 2000-02-23 00:40:54 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 2000-02-23 00:40:54 +0000
commit: 529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree: 78f1c0b805f5656aa7b0417a043c5346f700a2cf /kernel
parent: 0bd079751d25808d1972baee5c4eaa1db2227257 (diff)
12 files changed, 551 insertions, 137 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 270692eb2..cce15a524 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -29,6 +29,14 @@ ifeq ($(CONFIG_MODULES),y)
 OX_OBJS  += ksyms.o
 endif
 
+ifdef CONFIG_ACPI
+OX_OBJS += pm.o
+else
+  ifdef CONFIG_APM
+  OX_OBJS += pm.o
+  endif
+endif
+
 CFLAGS_sched.o := $(PROFILING) -fno-omit-frame-pointer
 
 include $(TOPDIR)/Rules.make
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c31678a4e..1ee1eee4d 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -103,12 +103,11 @@ asmlinkage long sys_personality(unsigned long personality)
 	unsigned long old_personality;
 	int ret;
 
-	lock_kernel();
-	ret = current->personality;
 	if (personality == 0xffffffff)
-		goto out;
+		return current->personality;
 
 	ret = -EINVAL;
+	lock_kernel();
 	it = lookup_exec_domain(personality);
 	if (!it)
 		goto out;
diff --git a/kernel/exit.c b/kernel/exit.c
index 80280e821..65d72df43 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -247,6 +247,7 @@ struct mm_struct * start_lazy_tlb(void)
 	current->mm = NULL;
 	/* active_mm is still 'mm' */
 	atomic_inc(&mm->mm_count);
+	enter_lazy_tlb(mm, current, smp_processor_id());
 	return mm;
 }
 
@@ -275,6 +276,7 @@ static inline void __exit_mm(struct task_struct * tsk)
 		mm_release();
 		if (mm != tsk->active_mm) BUG();
 		tsk->mm = NULL;
+		enter_lazy_tlb(mm, current, smp_processor_id());
 		mmput(mm);
 	}
 }
@@ -393,9 +395,7 @@ NORET_TYPE void do_exit(long code)
 	if (!tsk->pid)
 		panic("Attempted to kill the idle task!");
 	tsk->flags |= PF_EXITING;
-	start_bh_atomic();
-	del_timer(&tsk->real_timer);
-	end_bh_atomic();
+	del_timer_sync(&tsk->real_timer);
 
 	lock_kernel();
 fake_volatile:
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 7d38ac1ac..6c38477be 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -48,7 +48,9 @@ int do_getitimer(int which, struct itimerval *value)
 	case ITIMER_REAL:
 		interval = current->it_real_incr;
 		val = 0;
-		start_bh_atomic();
+		/* 
+		 * FIXME! This needs to be atomic, in case the kernel timer happens!
+		 */
 		if (timer_pending(&current->real_timer)) {
 			val = current->real_timer.expires - jiffies;
 
@@ -56,7 +58,6 @@ int do_getitimer(int which, struct itimerval *value)
 			if ((long) val <= 0)
 				val = 1;
 		}
-		end_bh_atomic();
 		break;
 	case ITIMER_VIRTUAL:
 		val = current->it_virt_value;
@@ -102,6 +103,7 @@ void it_real_fn(unsigned long __data)
 		p->real_timer.expires = jiffies + interval;
 		add_timer(&p->real_timer);
 	}
+	timer_exit(&p->real_timer);
 }
 
 int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
@@ -115,9 +117,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 		return k;
 	switch (which) {
 		case ITIMER_REAL:
-			start_bh_atomic();
-			del_timer(&current->real_timer);
-			end_bh_atomic();
+			del_timer_sync(&current->real_timer);
 			current->it_real_value = j;
 			current->it_real_incr = i;
 			if (!j)
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index dea3c38a5..bd74f5413 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -151,6 +151,7 @@ EXPORT_SYMBOL(d_instantiate);
 EXPORT_SYMBOL(d_alloc);
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(d_path);
+EXPORT_SYMBOL(mark_buffer_dirty);
 EXPORT_SYMBOL(__mark_buffer_dirty);
 EXPORT_SYMBOL(__mark_inode_dirty);
 EXPORT_SYMBOL(free_kiovec);
@@ -163,7 +164,7 @@ EXPORT_SYMBOL(filp_close);
 EXPORT_SYMBOL(put_filp);
 EXPORT_SYMBOL(files_lock);
 EXPORT_SYMBOL(check_disk_change);
-EXPORT_SYMBOL(invalidate_buffers);
+EXPORT_SYMBOL(__invalidate_buffers);
 EXPORT_SYMBOL(invalidate_inodes);
 EXPORT_SYMBOL(invalidate_inode_pages);
 EXPORT_SYMBOL(truncate_inode_pages);
@@ -183,11 +184,12 @@ EXPORT_SYMBOL(__bforget);
 EXPORT_SYMBOL(ll_rw_block);
 EXPORT_SYMBOL(__wait_on_buffer);
 EXPORT_SYMBOL(___wait_on_page);
-EXPORT_SYMBOL(block_read_full_page);
 EXPORT_SYMBOL(block_write_full_page);
-EXPORT_SYMBOL(block_write_partial_page);
-EXPORT_SYMBOL(block_write_cont_page);
-EXPORT_SYMBOL(block_write_zero_range);
+EXPORT_SYMBOL(block_read_full_page);
+EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(cont_prepare_write);
+EXPORT_SYMBOL(generic_commit_write);
+EXPORT_SYMBOL(generic_block_bmap);
 EXPORT_SYMBOL(generic_file_read);
 EXPORT_SYMBOL(do_generic_file_read);
 EXPORT_SYMBOL(generic_file_write);
@@ -222,6 +224,7 @@ EXPORT_SYMBOL(vfs_readlink);
 EXPORT_SYMBOL(vfs_follow_link);
 EXPORT_SYMBOL(page_readlink);
 EXPORT_SYMBOL(page_follow_link);
+EXPORT_SYMBOL(page_symlink_inode_operations);
 EXPORT_SYMBOL(block_symlink);
 
 /* for stackable file systems (lofs, wrapfs, etc.) */
@@ -263,8 +266,6 @@ EXPORT_SYMBOL(ioctl_by_bdev);
 EXPORT_SYMBOL(gendisk_head);
 EXPORT_SYMBOL(grok_partitions);
 EXPORT_SYMBOL(register_disk);
-EXPORT_SYMBOL(unplug_device);
-EXPORT_SYMBOL(make_request);
 EXPORT_SYMBOL(tq_disk);
 EXPORT_SYMBOL(init_buffer);
 EXPORT_SYMBOL(refile_buffer);
@@ -317,12 +318,11 @@ EXPORT_SYMBOL(request_irq);
 EXPORT_SYMBOL(free_irq);
 EXPORT_SYMBOL(probe_irq_on);
 EXPORT_SYMBOL(probe_irq_off);
-EXPORT_SYMBOL(bh_active);
-EXPORT_SYMBOL(bh_mask);
-EXPORT_SYMBOL(bh_mask_count);
-EXPORT_SYMBOL(bh_base);
 EXPORT_SYMBOL(add_timer);
 EXPORT_SYMBOL(del_timer);
+#ifdef __SMP__
+EXPORT_SYMBOL(del_timer_sync);
+#endif
 EXPORT_SYMBOL(mod_timer);
 EXPORT_SYMBOL(tq_timer);
 EXPORT_SYMBOL(tq_immediate);
@@ -369,7 +369,9 @@ EXPORT_SYMBOL(schedule_timeout);
 EXPORT_SYMBOL(jiffies);
 EXPORT_SYMBOL(xtime);
 EXPORT_SYMBOL(do_gettimeofday);
+#ifndef __ia64__
 EXPORT_SYMBOL(loops_per_sec);
+#endif
 EXPORT_SYMBOL(kstat);
 
 /* misc */
@@ -380,7 +382,6 @@ EXPORT_SYMBOL(vsprintf);
 EXPORT_SYMBOL(kdevname);
 EXPORT_SYMBOL(bdevname);
 EXPORT_SYMBOL(cdevname);
-EXPORT_SYMBOL(partition_name);		/* md.c only */
 EXPORT_SYMBOL(simple_strtoul);
 EXPORT_SYMBOL(system_utsname);	/* UTS data */
 EXPORT_SYMBOL(uts_sem);		/* UTS semaphore */
@@ -460,5 +461,12 @@ EXPORT_SYMBOL(get_fast_time);
 /* library functions */
 EXPORT_SYMBOL(strnicmp);
 
+/* software interrupts */
+EXPORT_SYMBOL(tasklet_hi_vec);
+EXPORT_SYMBOL(bh_task_vec);
+EXPORT_SYMBOL(init_bh);
+EXPORT_SYMBOL(remove_bh);
+
 /* init task, for moving kthread roots - ought to export a function ?? */
+
 EXPORT_SYMBOL(init_task_union);
diff --git a/kernel/panic.c b/kernel/panic.c
index 8a68b3ad5..e040ee454 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -41,10 +41,10 @@ NORET_TYPE void panic(const char * fmt, ...)
 	vsprintf(buf, fmt, args);
 	va_end(args);
 	printk(KERN_EMERG "Kernel panic: %s\n",buf);
-	if (current == init_tasks[0])
-		printk(KERN_EMERG "In swapper task - not syncing\n");
-	else if (in_interrupt())
+	if (in_interrupt())
 		printk(KERN_EMERG "In interrupt handler - not syncing\n");
+	else if (!current->pid)
+		printk(KERN_EMERG "In idle task - not syncing\n");
 	else
 		sys_sync();
 
diff --git a/kernel/pm.c b/kernel/pm.c
new file mode 100644
index 000000000..26811bff2
--- /dev/null
+++ b/kernel/pm.c
@@ -0,0 +1,170 @@
+/*
+ *  pm.c - Power management interface
+ *
+ *  Copyright (C) 2000 Andrew Henroid
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/pm.h>
+
+int pm_active = 0;
+
+static spinlock_t pm_devs_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(pm_devs);
+
+/*
+ * Register a device with power management
+ */
+struct pm_dev *pm_register(pm_dev_t type,
+			   unsigned long id,
+			   pm_callback callback)
+{
+	struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL);
+	if (dev) {
+		unsigned long flags;
+
+		memset(dev, 0, sizeof(*dev));
+		dev->type = type;
+		dev->id = id;
+		dev->callback = callback;
+
+		spin_lock_irqsave(&pm_devs_lock, flags);
+		list_add(&dev->entry, &pm_devs);
+		spin_unlock_irqrestore(&pm_devs_lock, flags);
+	}
+	return dev;
+}
+
+/*
+ * Unregister a device with power management
+ */
+void pm_unregister(struct pm_dev *dev)
+{
+	if (dev) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&pm_devs_lock, flags);
+		list_del(&dev->entry);
+		spin_unlock_irqrestore(&pm_devs_lock, flags);
+
+		kfree(dev);
+	}
+}
+
+/*
+ * Unregister all devices with matching callback
+ */
+void pm_unregister_all(pm_callback callback)
+{
+	struct list_head *entry;
+
+	if (!callback)
+		return;
+
+	entry = pm_devs.next;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		entry = entry->next;
+		if (dev->callback == callback)
+			pm_unregister(dev);
+	}
+}
+
+/*
+ * Send request to an individual device
+ */
+static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
+{
+	int status = 0;
+	int next_state;
+	switch (rqst) {
+	case PM_SUSPEND:
+	case PM_RESUME:
+		next_state = (int) data;
+		if (dev->state != next_state) {
+			if (dev->callback)
+				status = (*dev->callback)(dev, rqst, data);
+			if (!status)
+				dev->state = next_state;
+		}
+		break;
+	default:
+		if (dev->callback)
+			status = (*dev->callback)(dev, rqst, data);
+		break;
+	}
+	return status;
+}
+
+/*
+ * Undo incomplete request
+ */
+static void pm_undo_request(struct pm_dev *last, pm_request_t undo, void *data)
+{
+	struct list_head *entry = last->entry.prev;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		if (dev->callback)
+			pm_send(dev, undo, data);
+		entry = entry->prev;
+	}
+}
+
+/*
+ * Send a request to all devices
+ */
+int pm_send_request(pm_request_t rqst, void *data)
+{
+	struct list_head *entry = pm_devs.next;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		if (dev->callback) {
+			int status = pm_send(dev, rqst, data);
+			if (status) {
+				/* resume devices on failed suspend request */
+				if (rqst == PM_SUSPEND)
+					pm_undo_request(dev, PM_RESUME, 0);
+				return status;
+			}
+		}
+		entry = entry->next;
+	}
+	return 0;
+}
+
+/*
+ * Find a device
+ */
+struct pm_dev *pm_find(pm_dev_t type, struct pm_dev *from)
+{
+	struct list_head *entry = from ? from->entry.next:pm_devs.next;
+	while (entry != &pm_devs) {
+		struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
+		if (type == PM_UNKNOWN_DEV || dev->type == type)
+			return dev;
+		entry = entry->next;
+	}
+	return 0;
+}
+
+EXPORT_SYMBOL(pm_register);
+EXPORT_SYMBOL(pm_unregister);
+EXPORT_SYMBOL(pm_unregister_all);
+EXPORT_SYMBOL(pm_send_request);
+EXPORT_SYMBOL(pm_find);
diff --git a/kernel/sched.c b/kernel/sched.c
index ce72ecc7b..03c05e7c3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -199,30 +199,17 @@ static inline void reschedule_idle(struct task_struct * p, unsigned long flags)
 		goto send_now;
 
 	/*
-	 * The only heuristics - we use the tsk->avg_slice value
-	 * to detect 'frequent reschedulers'.
-	 *
-	 * If both the woken-up process and the preferred CPU is
-	 * is a frequent rescheduler, then skip the asynchronous
-	 * wakeup, the frequent rescheduler will likely chose this
-	 * task during it's next schedule():
-	 */
-	if (p->policy == SCHED_OTHER) {
-		tsk = cpu_curr(best_cpu);
-		if (p->avg_slice + tsk->avg_slice < cacheflush_time)
-			goto out_no_target;
-	}
-
-	/*
 	 * We know that the preferred CPU has a cache-affine current
 	 * process, lets try to find a new idle CPU for the woken-up
 	 * process:
 	 */
-	for (i = 0; i < smp_num_cpus; i++) {
+	for (i = smp_num_cpus - 1; i >= 0; i--) {
 		cpu = cpu_logical_map(i);
+		if (cpu == best_cpu)
+			continue;
 		tsk = cpu_curr(cpu);
 		/*
-		 * We use the first available idle CPU. This creates
+		 * We use the last available idle CPU. This creates
 		 * a priority list between idle CPUs, but this is not
 		 * a problem.
 		 */
@@ -232,26 +219,32 @@ static inline void reschedule_idle(struct task_struct * p, unsigned long flags)
 
 	/*
 	 * No CPU is idle, but maybe this process has enough priority
-	 * to preempt it's preferred CPU. (this is a shortcut):
+	 * to preempt it's preferred CPU.
 	 */
 	tsk = cpu_curr(best_cpu);
 	if (preemption_goodness(tsk, p, best_cpu) > 0)
 		goto send_now;
 
 	/*
-	 * We should get here rarely - or in the high CPU contention
+	 * We will get here often - or in the high CPU contention
 	 * case. No CPU is idle and this process is either lowprio or
-	 * the preferred CPU is highprio. Maybe some other CPU can/must
-	 * be preempted:
+	 * the preferred CPU is highprio. Try to preemt some other CPU
+	 * only if it's RT or if it's iteractive and the preferred
+	 * cpu won't reschedule shortly.
 	 */
-	for (i = 0; i < smp_num_cpus; i++) {
-		cpu = cpu_logical_map(i);
-		tsk = cpu_curr(cpu);
-		if (preemption_goodness(tsk, p, cpu) > 0)
-			goto send_now;
+	if ((p->avg_slice < cacheflush_time && cpu_curr(best_cpu)->avg_slice > cacheflush_time) ||
+	    p->policy != SCHED_OTHER)
+	{
+		for (i = smp_num_cpus - 1; i >= 0; i--) {
+			cpu = cpu_logical_map(i);
+			if (cpu == best_cpu)
+				continue;
+			tsk = cpu_curr(cpu);
+			if (preemption_goodness(tsk, p, cpu) > 0)
+				goto send_now;
+		}
 	}
 
-out_no_target:
 	spin_unlock_irqrestore(&runqueue_lock, flags);
 	return;
 		
@@ -397,6 +390,9 @@ signed long schedule_timeout(signed long timeout)
 	add_timer(&timer);
 	schedule();
 	del_timer(&timer);
+	/* RED-PEN. Timer may be running now on another cpu.
+	 * Pray that process will not exit enough fastly.
+	 */
 
 	timeout = expire - jiffies;
 
@@ -460,9 +456,9 @@ tq_scheduler_back:
 	release_kernel_lock(prev, this_cpu);
 
 	/* Do "administrative" work here while we don't hold any locks */
-	if (bh_mask & bh_active)
-		goto handle_bh;
-handle_bh_back:
+	if (softirq_state[this_cpu].active & softirq_state[this_cpu].mask)
+		goto handle_softirq;
+handle_softirq_back:
 
 	/*
 	 * 'sched_data' is protected by the fact that we can run
@@ -581,6 +577,7 @@ still_running_back:
 			if (next->active_mm) BUG();
 			next->active_mm = oldmm;
 			atomic_inc(&oldmm->mm_count);
+			enter_lazy_tlb(oldmm, next, this_cpu);
 		} else {
 			if (next->active_mm != mm) BUG();
 			switch_mm(oldmm, mm, next, this_cpu);
@@ -620,9 +617,9 @@ still_running:
 	next = prev;
 	goto still_running_back;
 
-handle_bh:
-	do_bottom_half();
-	goto handle_bh_back;
+handle_softirq:
+	do_softirq();
+	goto handle_softirq_back;
 
 handle_tq_scheduler:
 	run_task_queue(&tq_scheduler);
@@ -1148,7 +1145,6 @@ void daemonize(void)
 
 void __init init_idle(void)
 {
-	cycles_t t;
 	struct schedule_data * sched_data;
 	sched_data = &aligned_data[smp_processor_id()].schedule_data;
 
@@ -1157,9 +1153,8 @@ void __init init_idle(void)
 			smp_processor_id(), current->pid);
 		del_from_runqueue(current);
 	}
-	t = get_cycles();
 	sched_data->curr = current;
-	sched_data->last_schedule = t;
+	sched_data->last_schedule = get_cycles();
 }
 
 void __init sched_init(void)
@@ -1184,5 +1179,5 @@ void __init sched_init(void)
 	 * The boot idle thread does lazy MMU switching as well:
 	 */
 	atomic_inc(&init_mm.mm_count);
+	enter_lazy_tlb(&init_mm, current, cpu);
 }
-
diff --git a/kernel/softirq.c b/kernel/softirq.c
index d184c944e..0f3c23ee9 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -3,68 +3,271 @@
  *
  *	Copyright (C) 1992 Linus Torvalds
  *
- * do_bottom_half() runs at normal kernel priority: all interrupts
- * enabled.  do_bottom_half() is atomic with respect to itself: a
- * bottom_half handler need not be re-entrant.
- *
  * Fixed a disable_bh()/enable_bh() race (was causing a console lockup)
  * due bh_mask_count not atomic handling. Copyright (C) 1998  Andrea Arcangeli
+ *
+ * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
  */
 
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
 #include <linux/smp_lock.h>
+#include <linux/init.h>
 
-#include <asm/io.h>
+/*
+   - No shared variables, all the data are CPU local.
+   - If a softirq needs serialization, let it serialize itself
+     by its own spinlocks.
+   - Even if softirq is serialized, only local cpu is marked for
+     execution. Hence, we get something sort of weak cpu binding.
+     Though it is still not clear, will it result in better locality
+     or will not.
+   - These softirqs are not masked by global cli() and start_bh_atomic()
+     (by clear reasons). Hence, old parts of code still using global locks
+     MUST NOT use softirqs, but insert interfacing routines acquiring
+     global locks. F.e. look at BHs implementation.
 
-/* intr_count died a painless death... -DaveM */
+   Examples:
+   - NET RX softirq. It is multithreaded and does not require
+     any global serialization.
+   - NET TX softirq. It kicks software netdevice queues, hence
+     it is logically serialized per device, but this serialization
+     is invisible to common code.
+   - Tasklets: serialized wrt itself.
+   - Bottom halves: globally serialized, grr...
+ */
 
-atomic_t bh_mask_count[32];
-unsigned long bh_active = 0;
-unsigned long bh_mask = 0;
-void (*bh_base[32])(void);
 
-/*
- * This needs to make sure that only one bottom half handler
- * is ever active at a time. We do this without locking by
- * doing an atomic increment on the intr_count, and checking
- * (nonatomically) against 1. Only if it's 1 do we schedule
- * the bottom half.
- *
- * Note that the non-atomicity of the test (as opposed to the
- * actual update) means that the test may fail, and _nobody_
- * runs the handlers if there is a race that makes multiple
- * CPU's get here at the same time. That's ok, we'll run them
- * next time around.
- */
-static inline void run_bottom_halves(void)
+struct softirq_state softirq_state[NR_CPUS];
+static struct softirq_action softirq_vec[32];
+
+asmlinkage void do_softirq()
 {
-	unsigned long active;
-	void (**bh)(void);
-
-	active = get_active_bhs();
-	clear_active_bhs(active);
-	bh = bh_base;
-	do {
-		if (active & 1)
-			(*bh)();
-		bh++;
-		active >>= 1;
-	} while (active);
+	int cpu = smp_processor_id();
+	__u32 active, mask;
+
+	if (in_interrupt())
+		return;
+
+	local_bh_disable();
+
+	local_irq_disable();
+	mask = softirq_state[cpu].mask;
+	active = softirq_state[cpu].active & mask;
+
+	if (active) {
+		struct softirq_action *h;
+
+restart:
+		/* Reset active bitmask before enabling irqs */
+		softirq_state[cpu].active &= ~active;
+
+		local_irq_enable();
+
+		h = softirq_vec;
+		mask &= ~active;
+
+		do {
+			if (active & 1)
+				h->action(h);
+			h++;
+			active >>= 1;
+		} while (active);
+
+		local_irq_disable();
+
+		active = softirq_state[cpu].active;
+		if ((active &= mask) != 0)
+			goto retry;
+	}
+
+	local_bh_enable();
+
+	/* Leave with locally disabled hard irqs. It is critical to close
+	 * window for infinite recursion, while we help local bh count,
+	 * it protected us. Now we are defenceless.
+	 */
+	return;
+
+retry:
+	goto restart;
 }
 
-asmlinkage void do_bottom_half(void)
+
+static spinlock_t softirq_mask_lock = SPIN_LOCK_UNLOCKED;
+
+void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&softirq_mask_lock, flags);
+	softirq_vec[nr].data = data;
+	softirq_vec[nr].action = action;
+
+	for (i=0; i<NR_CPUS; i++)
+		softirq_state[i].mask |= (1<<nr);
+	spin_unlock_irqrestore(&softirq_mask_lock, flags);
+}
+
+
+/* Tasklets */
+
+struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned;
+
+static void tasklet_action(struct softirq_action *a)
 {
 	int cpu = smp_processor_id();
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = tasklet_vec[cpu].list;
+	tasklet_vec[cpu].list = NULL;
+	local_irq_enable();
 
-	if (softirq_trylock(cpu)) {
-		if (hardirq_trylock(cpu)) {
-			__sti();
-			run_bottom_halves();
-			__cli();
-			hardirq_endlock(cpu);
+	while (list != NULL) {
+		struct tasklet_struct *t = list;
+
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (atomic_read(&t->count) == 0) {
+				clear_bit(TASKLET_STATE_SCHED, &t->state);
+
+				t->func(t->data);
+				tasklet_unlock(t);
+				continue;
+			}
+			tasklet_unlock(t);
 		}
-		softirq_endlock(cpu);
+		local_irq_disable();
+		t->next = tasklet_vec[cpu].list;
+		tasklet_vec[cpu].list = t;
+		__cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+		local_irq_enable();
 	}
 }
+
+
+
+struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned;
+
+static void tasklet_hi_action(struct softirq_action *a)
+{
+	int cpu = smp_processor_id();
+	struct tasklet_struct *list;
+
+	local_irq_disable();
+	list = tasklet_hi_vec[cpu].list;
+	tasklet_hi_vec[cpu].list = NULL;
+	local_irq_enable();
+
+	while (list != NULL) {
+		struct tasklet_struct *t = list;
+
+		list = list->next;
+
+		if (tasklet_trylock(t)) {
+			if (atomic_read(&t->count) == 0) {
+				clear_bit(TASKLET_STATE_SCHED, &t->state);
+
+				t->func(t->data);
+				tasklet_unlock(t);
+				continue;
+			}
+			tasklet_unlock(t);
+		}
+		local_irq_disable();
+		t->next = tasklet_hi_vec[cpu].list;
+		tasklet_hi_vec[cpu].list = t;
+		__cpu_raise_softirq(cpu, HI_SOFTIRQ);
+		local_irq_enable();
+	}
+}
+
+
+void tasklet_init(struct tasklet_struct *t,
+		  void (*func)(unsigned long), unsigned long data)
+{
+	t->func = func;
+	t->data = data;
+	t->state = 0;
+	atomic_set(&t->count, 0);
+}
+
+void tasklet_kill(struct tasklet_struct *t)
+{
+	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+		if (in_interrupt())
+			panic("Attempt to kill tasklet from interrupt\n");
+		schedule();
+	}
+	tasklet_unlock_wait(t);
+}
+
+
+
+/* Old style BHs */
+
+static void (*bh_base[32])(void);
+struct tasklet_struct bh_task_vec[32];
+
+/* BHs are serialized by spinlock global_bh_lock.
+
+   It is still possible to make synchronize_bh() as
+   spin_unlock_wait(&global_bh_lock). This operation is not used
+   by kernel now, so that this lock is not made private only
+   due to wait_on_irq().
+
+   It can be removed only after auditing all the BHs.
+ */
+spinlock_t global_bh_lock = SPIN_LOCK_UNLOCKED;
+
+static void bh_action(unsigned long nr)
+{
+	int cpu = smp_processor_id();
+
+	if (!spin_trylock(&global_bh_lock))
+		goto resched;
+
+	if (!hardirq_trylock(cpu))
+		goto resched_unlock;
+
+	if (bh_base[nr])
+		bh_base[nr]();
+
+	hardirq_endlock(cpu);
+	spin_unlock(&global_bh_lock);
+	return;
+
+resched_unlock:
+	spin_unlock(&global_bh_lock);
+resched:
+	mark_bh(nr);
+}
+
+void init_bh(int nr, void (*routine)(void))
+{
+	bh_base[nr] = routine;
+	mb();
+}
+
+void remove_bh(int nr)
+{
+	tasklet_kill(bh_task_vec+nr);
+	bh_base[nr] = NULL;
+}
+
+void __init softirq_init()
+{
+	int i;
+
+	for (i=0; i<32; i++)
+		tasklet_init(bh_task_vec+i, bh_action, i);
+
+	open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
+	open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
+}
+
+
diff --git a/kernel/sys.c b/kernel/sys.c
index 57940edea..e3f7c5e2b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -908,6 +908,8 @@ asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
 			? -EFAULT : 0;
 }
 
+#if !defined(__ia64__)
+
 /*
  *	Back compatibility for getrlimit. Needed for some apps.
  */
@@ -926,6 +928,7 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
 	return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
 }
 
+#endif
 
 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
 {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 50ba37060..9a6a7a74d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -111,17 +111,8 @@ static int proc_sys_permission(struct inode *, int);
 
 struct file_operations proc_sys_file_operations =
 {
-	NULL,		/* lseek   */
-	proc_readsys,	/* read	   */
-	proc_writesys,	/* write   */
-	NULL,		/* readdir */
-	NULL,		/* poll    */
-	NULL,		/* ioctl   */
-	NULL,		/* mmap	   */
-	NULL,		/* no special open code	   */
-	NULL,		/* no special flush code */
-	NULL,		/* no special release code */
-	NULL		/* can't fsync */
+	read:		proc_readsys,
+	write:		proc_writesys,
 };
 
 struct inode_operations proc_sys_inode_operations =
@@ -138,9 +129,6 @@ struct inode_operations proc_sys_inode_operations =
 	NULL,		/* rename */
 	NULL,		/* readlink */
 	NULL,		/* follow_link */
-	NULL,		/* get_block */
-	NULL,		/* readpage */
-	NULL,		/* writepage */
 	NULL,		/* truncate */
 	proc_sys_permission, /* permission */
 	NULL		/* revalidate */
diff --git a/kernel/timer.c b/kernel/timer.c
index fccf7faa7..f087d239f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -105,13 +105,15 @@ static struct timer_vec * const tvecs[] = {
 
 static unsigned long timer_jiffies = 0;
 
-static inline void insert_timer(struct timer_list *timer,
-				struct timer_list **vec, int idx)
+static inline void insert_timer(struct timer_list *timer, struct timer_list **vec)
 {
-	if ((timer->next = vec[idx]))
-		vec[idx]->prev = timer;
-	vec[idx] = timer;
-	timer->prev = (struct timer_list *)&vec[idx];
+	struct timer_list *next = *vec;
+
+	timer->next = next;
+	if (next)
+		next->prev = timer;
+	*vec = timer;
+	timer->prev = (struct timer_list *)vec;
 }
 
 static inline void internal_add_timer(struct timer_list *timer)
@@ -121,31 +123,34 @@ static inline void internal_add_timer(struct timer_list *timer)
 	 */
 	unsigned long expires = timer->expires;
 	unsigned long idx = expires - timer_jiffies;
+	struct timer_list ** vec;
 
 	if (idx < TVR_SIZE) {
 		int i = expires & TVR_MASK;
-		insert_timer(timer, tv1.vec, i);
+		vec = tv1.vec + i;
 	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 		int i = (expires >> TVR_BITS) & TVN_MASK;
-		insert_timer(timer, tv2.vec, i);
+		vec = tv2.vec + i;
 	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv3.vec, i);
+		vec =  tv3.vec + i;
 	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv4.vec, i);
+		vec = tv4.vec + i;
 	} else if ((signed long) idx < 0) {
 		/* can happen if you add a timer with expires == jiffies,
 		 * or you set a timer to go off in the past
 		 */
-		insert_timer(timer, tv1.vec, tv1.index);
+		vec = tv1.vec + tv1.index;
 	} else if (idx <= 0xffffffffUL) {
 		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv5.vec, i);
+		vec = tv5.vec + i;
 	} else {
 		/* Can only get here on architectures with 64-bit jiffies */
 		timer->next = timer->prev = timer;
+		return;
 	}
+	insert_timer(timer, vec);
 }
 
 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
@@ -181,15 +186,17 @@ static inline int detach_timer(struct timer_list *timer)
 	return 0;
 }
 
-void mod_timer(struct timer_list *timer, unsigned long expires)
+int mod_timer(struct timer_list *timer, unsigned long expires)
 {
+	int ret;
 	unsigned long flags;
 
 	spin_lock_irqsave(&timerlist_lock, flags);
 	timer->expires = expires;
-	detach_timer(timer);
+	ret = detach_timer(timer);
 	internal_add_timer(timer);
 	spin_unlock_irqrestore(&timerlist_lock, flags);
+	return ret;
 }
 
 int del_timer(struct timer_list * timer)
@@ -204,6 +211,39 @@ int del_timer(struct timer_list * timer)
 	return ret;
 }
 
+#ifdef __SMP__
+/*
+ * SMP specific function to delete periodic timer.
+ * Caller must disable by some means restarting the timer
+ * for new. Upon exit the timer is not queued and handler is not running
+ * on any CPU. It returns number of times, which timer was deleted
+ * (for reference counting).
+ */
+
+int del_timer_sync(struct timer_list * timer)
+{
+	int ret = 0;
+
+	for (;;) {
+		unsigned long flags;
+		int running;
+
+		spin_lock_irqsave(&timerlist_lock, flags);
+		ret += detach_timer(timer);
+		timer->next = timer->prev = 0;
+		running = timer->running;
+		spin_unlock_irqrestore(&timerlist_lock, flags);
+
+		if (!running)
+			return ret;
+		timer_synchronize(timer);
+	}
+
+	return ret;
+}
+#endif
+
+
 static inline void cascade_timers(struct timer_vec *tv)
 {
         /* cascade all the timers from tv up one level */
@@ -238,6 +278,7 @@ static inline void run_timer_list(void)
 			unsigned long data = timer->data;
 			detach_timer(timer);
 			timer->next = timer->prev = NULL;
+			timer_set_running(timer);
 			spin_unlock_irq(&timerlist_lock);
 			fn(data);
 			spin_lock_irq(&timerlist_lock);
@@ -553,8 +594,7 @@ static unsigned long count_active_tasks(void)
 	read_lock(&tasklist_lock);
 	for_each_task(p) {
 		if ((p->state == TASK_RUNNING ||
-		     (p->state & TASK_UNINTERRUPTIBLE) ||
-		     (p->state & TASK_SWAPPING)))
+		     (p->state & TASK_UNINTERRUPTIBLE)))
 			nr += FIXED_1;
 	}
 	read_unlock(&tasklist_lock);
author	Ralf Baechle <ralf@linux-mips.org>	2000-02-23 00:40:54 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	2000-02-23 00:40:54 +0000
commit	529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree	78f1c0b805f5656aa7b0417a043c5346f700a2cf /kernel
parent	0bd079751d25808d1972baee5c4eaa1db2227257 (diff)