Import of Linux/MIPS 2.1.36

author: Ralf Baechle <ralf@linux-mips.org> 1997-04-29 21:13:14 +0000
committer: <ralf@linux-mips.org> 1997-04-29 21:13:14 +0000
commit: 19c9bba94152148523ba0f7ef7cffe3d45656b11 (patch)
tree: 40b1cb534496a7f1ca0f5c314a523c69f1fee464 /arch/sparc/mm
parent: 7206675c40394c78a90e74812bbdbf8cf3cca1be (diff)
11 files changed, 2900 insertions, 1657 deletions
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index 13652e467..4ae57f18f 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile,v 1.21 1996/04/26 10:45:53 tridge Exp $
+# $Id: Makefile,v 1.24 1997/04/20 14:11:49 ecd Exp $
 # Makefile for the linux Sparc-specific parts of the memory manager.
 #
 # Note! Dependencies are done automagically by 'make dep', which also
@@ -8,6 +8,13 @@
 # Note 2! The CFLAGS definition is now in the main makefile...
 
 O_TARGET := mm.o
-O_OBJS   := fault.o init.o sun4c.o srmmu.o loadmmu.o generic.o asyncd.o
+O_OBJS   := fault.o init.o sun4c.o srmmu.o hypersparc.o viking.o \
+	    loadmmu.o generic.o asyncd.o extable.o
 
 include $(TOPDIR)/Rules.make
+
+hypersparc.o: hypersparc.S
+	$(CC) -D__ASSEMBLY__ -ansi -c -o hypersparc.o hypersparc.S
+
+viking.o: viking.S
+	$(CC) -D__ASSEMBLY__ -ansi -c -o viking.o viking.S
diff --git a/arch/sparc/mm/asyncd.c b/arch/sparc/mm/asyncd.c
index d6ed42252..5d9d476a5 100644
--- a/arch/sparc/mm/asyncd.c
+++ b/arch/sparc/mm/asyncd.c
@@ -1,4 +1,4 @@
-/*  $Id: asyncd.c,v 1.8 1996/09/21 04:30:12 davem Exp $
+/*  $Id: asyncd.c,v 1.9 1996/12/18 06:43:22 tridge Exp $
  *  The asyncd kernel daemon. This handles paging on behalf of 
  *  processes that receive page faults due to remote (async) memory
  *  accesses. 
@@ -19,6 +19,8 @@
 #include <linux/stat.h>
 #include <linux/swap.h>
 #include <linux/fs.h>
+#include <linux/config.h>
+#include <linux/interrupt.h>
 
 #include <asm/dma.h>
 #include <asm/system.h> /* for cli()/sti() */
@@ -26,6 +28,15 @@
 #include <asm/bitops.h>
 #include <asm/pgtable.h>
 
+#define DEBUG 0
+
+#define WRITE_LIMIT 100
+#define LOOP_LIMIT 200
+
+static struct {
+	int faults, read, write, success, failure, errors;
+} stats;
+
 /* 
  * The wait queue for waking up the async daemon:
  */
@@ -51,8 +62,16 @@ static void add_to_async_queue(int taskid,
 {
 	struct async_job *a = kmalloc(sizeof(*a),GFP_ATOMIC);
 
-	if (!a)
-		panic("out of memory in asyncd\n");
+	if (!a) {
+		printk("ERROR: out of memory in asyncd\n");
+		a->callback(taskid,address,write,1);
+		return;
+	}
+
+	if (write)
+		stats.write++;
+	else
+		stats.read++;
 
 	a->next = NULL;
 	a->taskid = taskid;
@@ -76,17 +95,23 @@ void async_fault(unsigned long address, int write, int taskid,
 	struct task_struct *tsk = task[taskid];
 	struct mm_struct *mm = tsk->mm;
 
+	stats.faults++;
+
 #if 0
 	printk("paging in %x for task=%d\n",address,taskid);
 #endif
+
 	add_to_async_queue(taskid, mm, address, write, callback);
 	wake_up(&asyncd_wait);  
+	mark_bh(TQUEUE_BH);
 }
 
 static int fault_in_page(int taskid,
 			 struct vm_area_struct *vma,
 			 unsigned address,int write)
 {
+	static unsigned last_address;
+	static int last_task, loop_counter;
 	struct task_struct *tsk = task[taskid];
 	pgd_t *pgd;
 	pmd_t *pmd;
@@ -99,7 +124,27 @@ static int fault_in_page(int taskid,
 	  goto bad_area;
 	if (vma->vm_start > address)
 	  goto bad_area;
-	
+
+	if (address == last_address && taskid == last_task) {
+		loop_counter++;
+	} else {
+		loop_counter = 0;
+		last_address = address; 
+		last_task = taskid;
+	}
+
+	if (loop_counter == WRITE_LIMIT && !write) {
+		printk("MSC bug? setting write request\n");
+		stats.errors++;
+		write = 1;
+	}
+
+	if (loop_counter == LOOP_LIMIT) {
+		printk("MSC bug? failing request\n");
+		stats.errors++;
+		return 1;
+	}
+
 	pgd = pgd_offset(vma->vm_mm, address);
 	pmd = pmd_alloc(pgd,address);
 	if(!pmd)
@@ -124,34 +169,51 @@ static int fault_in_page(int taskid,
 
 	/* Fall through for do_wp_page */
 finish_up:
+	stats.success++;
 	update_mmu_cache(vma, address, *pte);
 	return 0;
 
 no_memory:
+	stats.failure++;
 	oom(tsk);
 	return 1;
 	
 bad_area:	  
+	stats.failure++;
 	tsk->tss.sig_address = address;
 	tsk->tss.sig_desc = SUBSIG_NOMAPPING;
 	send_sig(SIGSEGV, tsk, 1);
 	return 1;
 }
 
+
 /* Note the semaphore operations must be done here, and _not_
  * in async_fault().
  */
 static void run_async_queue(void)
 {
 	int ret;
+	unsigned flags;
+
 	while (async_queue) {
-		volatile struct async_job *a = async_queue;
-		struct mm_struct *mm = a->mm;
+		volatile struct async_job *a;
+		struct mm_struct *mm;
 		struct vm_area_struct *vma;
+
+		save_flags(flags); cli();
+		a = async_queue;
 		async_queue = async_queue->next;
+		restore_flags(flags);
+
+		mm = a->mm;
+
 		down(&mm->mmap_sem);
 		vma = find_vma(mm, a->address);
 		ret = fault_in_page(a->taskid,vma,a->address,a->write);
+#if DEBUG
+		printk("fault_in_page(task=%d addr=%x write=%d) = %d\n",
+		       a->taskid,a->address,a->write,ret);
+#endif
 		a->callback(a->taskid,a->address,a->write,ret);
 		up(&mm->mmap_sem);
 		kfree_s((void *)a,sizeof(*a));
@@ -159,6 +221,14 @@ static void run_async_queue(void)
 }
 
 
+#if CONFIG_AP1000
+static void asyncd_info(void)
+{
+	printk("CID(%d) faults: total=%d  read=%d  write=%d  success=%d fail=%d err=%d\n",
+	       mpp_cid(),stats.faults, stats.read, stats.write, stats.success,
+	       stats.failure, stats.errors);
+}
+#endif
 
 
 /*
@@ -172,17 +242,30 @@ int asyncd(void *unused)
 	sprintf(current->comm, "asyncd");
 	current->blocked = ~0UL; /* block all signals */
   
-	/* Give kswapd a realtime priority. */
+	/* Give asyncd a realtime priority. */
 	current->policy = SCHED_FIFO;
 	current->priority = 32;  /* Fixme --- we need to standardise our
 				    namings for POSIX.4 realtime scheduling
 				    priorities.  */
   
 	printk("Started asyncd\n");
-  
+
+#if CONFIG_AP1000
+	bif_add_debug_key('a',asyncd_info,"stats on asyncd");
+#endif
+
 	while (1) {
-		current->signal = 0;
-		interruptible_sleep_on(&asyncd_wait);
+		unsigned flags;
+
+		save_flags(flags); cli();
+
+		while (!async_queue) {
+			current->signal = 0;
+			interruptible_sleep_on(&asyncd_wait);
+		}
+
+		restore_flags(flags);
+
 		run_async_queue();
 	}
 }
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
new file mode 100644
index 000000000..7fe26ad96
--- /dev/null
+++ b/arch/sparc/mm/extable.c
@@ -0,0 +1,69 @@
+/*
+ * linux/arch/sparc/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
+
+static unsigned long
+search_one_table(const struct exception_table_entry *start,
+		 const struct exception_table_entry *last,
+		 unsigned long value, unsigned long *g2)
+{
+	const struct exception_table_entry *first = start;
+	const struct exception_table_entry *mid;
+	long diff = 0;
+        while (first <= last) {
+		mid = (last - first) / 2 + first;
+		diff = mid->insn - value;
+                if (diff == 0) {
+                	if (!mid->fixup) {
+                		*g2 = 0;
+                		return (mid + 1)->fixup;
+                	} else
+	                        return mid->fixup;
+                } else if (diff < 0)
+                        first = mid+1;
+                else
+                        last = mid-1;
+        }
+        if (last->insn < value && !last->fixup && last[1].insn > value) {
+        	*g2 = (value - last->insn)/4;
+        	return last[1].fixup;
+        }
+        if (first > start && first[-1].insn < value
+	    && !first[-1].fixup && first->insn < value) {
+        	*g2 = (value - first[-1].insn)/4;
+        	return first->fixup;
+        }
+        return 0;
+}
+
+unsigned long
+search_exception_table(unsigned long addr, unsigned long *g2)
+{
+	unsigned long ret;
+
+#ifndef CONFIG_MODULES
+	/* There is only the kernel to search.  */
+	ret = search_one_table(__start___ex_table,
+			       __stop___ex_table-1, addr, g2);
+	if (ret) return ret;
+#else
+	/* The kernel is the last "module" -- no need to treat it special.  */
+	struct module *mp;
+	for (mp = module_list; mp != NULL; mp = mp->next) {
+		if (mp->ex_table_start == NULL)
+			continue;
+		ret = search_one_table(mp->ex_table_start,
+				       mp->ex_table_end-1, addr, g2);
+		if (ret) return ret;
+	}
+#endif
+
+	return 0;
+}
diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c
index 8c8755ce5..cfac6bcc2 100644
--- a/arch/sparc/mm/fault.c
+++ b/arch/sparc/mm/fault.c
@@ -1,8 +1,9 @@
-/* $Id: fault.c,v 1.77 1996/10/28 00:56:02 davem Exp $
+/* $Id: fault.c,v 1.91 1997/03/18 17:56:00 jj Exp $
  * fault.c:  Page fault handlers for the Sparc.
  *
  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
  * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
 #include <asm/head.h>
@@ -15,6 +16,8 @@
 #include <linux/smp.h>
 #include <linux/signal.h>
 #include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
 
 #include <asm/system.h>
 #include <asm/segment.h>
@@ -26,14 +29,13 @@
 #include <asm/smp.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
+#include <asm/uaccess.h>
 
 #define ELEMENTS(arr) (sizeof (arr)/sizeof (arr[0]))
 
 extern struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
 extern int prom_node_root;
 
-extern void die_if_kernel(char *,struct pt_regs *);
-
 struct linux_romvec *romvec;
 
 /* At boot time we determine these two values necessary for setting
@@ -70,8 +72,10 @@ int prom_probe_memory (void)
 		bytes = mlist->num_bytes;
 		tally += bytes;
 		if (i >= SPARC_PHYS_BANKS-1) {
-			printk ("The machine has more banks that this kernel can support\n"
-				"Increase the SPARC_PHYS_BANKS setting (currently %d)\n",
+			printk ("The machine has more banks than "
+				"this kernel can support\n"
+				"Increase the SPARC_PHYS_BANKS "
+				"setting (currently %d)\n",
 				SPARC_PHYS_BANKS);
 			i = SPARC_PHYS_BANKS-1;
 			break;
@@ -128,49 +132,80 @@ asmlinkage void sparc_lvl15_nmi(struct pt_regs *regs, unsigned long serr,
 	prom_halt();
 }
 
+static void unhandled_fault(unsigned long, struct task_struct *,
+		struct pt_regs *) __attribute__ ((noreturn));
+
+static void unhandled_fault(unsigned long address, struct task_struct *tsk,
+                     struct pt_regs *regs)
+{
+	if((unsigned long) address < PAGE_SIZE) {
+		printk(KERN_ALERT "Unable to handle kernel NULL "
+		       "pointer dereference");
+	} else {
+		printk(KERN_ALERT "Unable to handle kernel paging request "
+		       "at virtual address %08lx\n", address);
+	}
+	printk(KERN_ALERT "tsk->mm->context = %08lx\n",
+	       (unsigned long) tsk->mm->context);
+	printk(KERN_ALERT "tsk->mm->pgd = %08lx\n",
+	       (unsigned long) tsk->mm->pgd);
+	die_if_kernel("Oops", regs);
+}
+
+asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, 
+			    unsigned long address)
+{
+	unsigned long g2;
+	int i;
+	unsigned insn;
+	struct pt_regs regs;
+	
+	i = search_exception_table (ret_pc, &g2);
+	switch (i) {
+	/* load & store will be handled by fixup */
+	case 3: return 3;
+	/* store will be handled by fixup, load will bump out */
+	/* for _to_ macros */
+	case 1: insn = (unsigned)pc; if ((insn >> 21) & 1) return 1; break;
+	/* load will be handled by fixup, store will bump out */
+	/* for _from_ macros */
+	case 2: insn = (unsigned)pc; 
+		if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15) return 2; 
+		break; 
+	default: break;
+	}
+	memset (&regs, 0, sizeof (regs));
+	regs.pc = pc;
+	regs.npc = pc + 4;
+	__asm__ __volatile__ ("
+		rd %%psr, %0
+		nop
+		nop
+		nop" : "=r" (regs.psr));
+	unhandled_fault (address, current, &regs);
+	/* Not reached */
+	return 0;
+}
+
 asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
 			       unsigned long address)
 {
 	struct vm_area_struct *vma;
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
+	unsigned int fixup;
+	unsigned long g2;
 	int from_user = !(regs->psr & PSR_PS);
-#if 0
-	static unsigned long last_one;
-#endif
-
+	lock_kernel();
 	down(&mm->mmap_sem);
 	if(text_fault)
 		address = regs->pc;
 
-#if 0
-	if(current->tss.ex.count) {
-		printk("f<pid=%d,tf=%d,wr=%d,addr=%08lx,pc=%08lx>\n",
-		       tsk->pid, text_fault, write, address, regs->pc);
-		printk("EX: count<%d> pc<%08lx> expc<%08lx> address<%08lx>\n",
-		       (int) current->tss.ex.count, current->tss.ex.pc,
-		       current->tss.ex.expc, current->tss.ex.address);
-#if 0
-		if(last_one == address) {
-			printk("Twice in a row, AIEEE.  Spinning so you can see the dump.\n");
-			show_regs(regs);
-			sti();
-			while(1)
-				barrier();
-		}
-		last_one = address;
-#endif
-	}
-#endif
-	/* Now actually handle the fault.  Do kernel faults special,
-	 * because on the sun4c we could have faulted trying to read
-	 * the vma area of the task and without the following code
-	 * we'd fault recursively until all our stack is gone. ;-(
+	/* The kernel referencing a bad kernel pointer can lock up
+	 * a sun4c machine completely, so we must attempt recovery.
 	 */
-	if(!from_user && address >= PAGE_OFFSET) {
-		quick_kernel_fault(address);
-		return;
-	}
+	if(!from_user && address >= PAGE_OFFSET)
+		goto bad_area;
 
 	vma = find_vma(mm, address);
 	if(!vma)
@@ -196,32 +231,37 @@ good_area:
 	}
 	handle_mm_fault(vma, address, write);
 	up(&mm->mmap_sem);
-	return;
+	goto out;
 	/*
 	 * Something tried to access memory that isn't in our memory map..
 	 * Fix it, but check if it's kernel or user first..
 	 */
 bad_area:
 	up(&mm->mmap_sem);
-	/* Did we have an exception handler installed? */
-	if(current->tss.ex.count == 1) {
-		if(from_user) {
-			printk("Yieee, exception signalled from user mode.\n");
-		} else {
-			/* Set pc to %g1, set %g1 to -EFAULT and %g2 to
-			 * the faulting address so we can cleanup.
-			 */
+	/* Is this in ex_table? */
+	
+	g2 = regs->u_regs[UREG_G2];
+	if (!from_user && (fixup = search_exception_table (regs->pc, &g2))) {
+		if (fixup > 10) { /* Values below are reserved for other things */
+			extern const unsigned __memset_start[];
+			extern const unsigned __memset_end[];
+			extern const unsigned __csum_partial_copy_start[];
+			extern const unsigned __csum_partial_copy_end[];
+
 			printk("Exception: PC<%08lx> faddr<%08lx>\n", regs->pc, address);
-			printk("EX: count<%d> pc<%08lx> expc<%08lx> address<%08lx>\n",
-			       (int) current->tss.ex.count, current->tss.ex.pc,
-			       current->tss.ex.expc, current->tss.ex.address);
-			current->tss.ex.count = 0;
-			regs->pc = current->tss.ex.expc;
+			printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n",
+				regs->pc, fixup, g2);
+			if ((regs->pc >= (unsigned long)__memset_start &&
+			     regs->pc < (unsigned long)__memset_end) ||
+			    (regs->pc >= (unsigned long)__csum_partial_copy_start &&
+			     regs->pc < (unsigned long)__csum_partial_copy_end)) {
+			        regs->u_regs[UREG_I4] = address;
+				regs->u_regs[UREG_I5] = regs->pc;
+			}
+			regs->u_regs[UREG_G2] = g2;
+			regs->pc = fixup;
 			regs->npc = regs->pc + 4;
-			regs->u_regs[UREG_G1] = -EFAULT;
-			regs->u_regs[UREG_G2] = address - current->tss.ex.address;
-			regs->u_regs[UREG_G3] = current->tss.ex.pc;
-			return;
+			goto out;
 		}
 	}
 	if(from_user) {
@@ -232,44 +272,72 @@ bad_area:
 		tsk->tss.sig_address = address;
 		tsk->tss.sig_desc = SUBSIG_NOMAPPING;
 		send_sig(SIGSEGV, tsk, 1);
-		return;
+		goto out;
 	}
-	if((unsigned long) address < PAGE_SIZE) {
-		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
-	} else
-		printk(KERN_ALERT "Unable to handle kernel paging request");
-	printk(KERN_ALERT " at virtual address %08lx\n",address);
-	printk(KERN_ALERT "tsk->mm->context = %08lx\n",
-	       (unsigned long) tsk->mm->context);
-	printk(KERN_ALERT "tsk->mm->pgd = %08lx\n",
-	       (unsigned long) tsk->mm->pgd);
-	die_if_kernel("Oops", regs);
+	unhandled_fault (address, tsk, regs);
+out:
+	unlock_kernel();
 }
 
 asmlinkage void do_sun4c_fault(struct pt_regs *regs, int text_fault, int write,
 			       unsigned long address)
 {
-	extern void sun4c_update_mmu_cache(struct vm_area_struct *,unsigned long,pte_t);
+	extern void sun4c_update_mmu_cache(struct vm_area_struct *,
+					   unsigned long,pte_t);
 	extern pgd_t *sun4c_pgd_offset(struct mm_struct *,unsigned long);
 	extern pte_t *sun4c_pte_offset(pmd_t *,unsigned long);
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
-	pgd_t *pgd;
-	pte_t *pte;
+	pgd_t *pgdp;
+	pte_t *ptep;
 
-	if(text_fault)
+	if (text_fault)
 		address = regs->pc;
 
-	pgd = sun4c_pgd_offset(mm, address);
-	pte = sun4c_pte_offset((pmd_t *) pgd, address);
+	pgdp = sun4c_pgd_offset(mm, address);
+	ptep = sun4c_pte_offset((pmd_t *) pgdp, address);
+
+	if (pgd_val(*pgdp)) {
+	    if (write) {
+		if ((pte_val(*ptep) & (_SUN4C_PAGE_WRITE|_SUN4C_PAGE_PRESENT))
+				   == (_SUN4C_PAGE_WRITE|_SUN4C_PAGE_PRESENT)) {
+
+			*ptep = __pte(pte_val(*ptep) | _SUN4C_PAGE_ACCESSED |
+				      _SUN4C_PAGE_MODIFIED |
+				      _SUN4C_PAGE_VALID |
+				      _SUN4C_PAGE_DIRTY);
+
+			if (sun4c_get_segmap(address) != invalid_segment) {
+				sun4c_put_pte(address, pte_val(*ptep));
+				return;
+			}
+		}
+	    } else {
+		if ((pte_val(*ptep) & (_SUN4C_PAGE_READ|_SUN4C_PAGE_PRESENT))
+				   == (_SUN4C_PAGE_READ|_SUN4C_PAGE_PRESENT)) {
+
+			*ptep = __pte(pte_val(*ptep) | _SUN4C_PAGE_ACCESSED |
+				      _SUN4C_PAGE_VALID);
+
+			if (sun4c_get_segmap(address) != invalid_segment) {
+				sun4c_put_pte(address, pte_val(*ptep));
+				return;
+			}
+		}
+	    }
+	}
 
 	/* This conditional is 'interesting'. */
-	if(pgd_val(*pgd) && !(write && !(pte_val(*pte) & _SUN4C_PAGE_WRITE))
-	   && (pte_val(*pte) & _SUN4C_PAGE_VALID))
-		/* XXX Very bad, can't do this optimization when VMA arg is actually
-		 * XXX used by update_mmu_cache()!
+	if (pgd_val(*pgdp) && !(write && !(pte_val(*ptep) & _SUN4C_PAGE_WRITE))
+	    && (pte_val(*ptep) & _SUN4C_PAGE_VALID))
+		/* Note: It is safe to not grab the MMAP semaphore here because
+		 *       we know that update_mmu_cache() will not sleep for
+		 *       any reason (at least not in the current implementation)
+		 *       and therefore there is no danger of another thread getting
+		 *       on the CPU and doing a shrink_mmap() on this vma.
 		 */
-		sun4c_update_mmu_cache((struct vm_area_struct *) 0, address, *pte);
+		sun4c_update_mmu_cache (find_vma(current->mm, address), address,
+					*ptep);
 	else
 		do_sparc_fault(regs, text_fault, write, address);
 }
@@ -321,25 +389,31 @@ void window_overflow_fault(void)
 {
 	unsigned long sp;
 
+	lock_kernel();
 	sp = current->tss.rwbuf_stkptrs[0];
 	if(((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
 		force_user_fault(sp + 0x38, 1);
 	force_user_fault(sp, 1);
+	unlock_kernel();
 }
 
 void window_underflow_fault(unsigned long sp)
 {
+	lock_kernel();
 	if(((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
 		force_user_fault(sp + 0x38, 0);
 	force_user_fault(sp, 0);
+	unlock_kernel();
 }
 
 void window_ret_fault(struct pt_regs *regs)
 {
 	unsigned long sp;
 
+	lock_kernel();
 	sp = regs->u_regs[UREG_FP];
 	if(((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
 		force_user_fault(sp + 0x38, 0);
 	force_user_fault(sp, 0);
+	unlock_kernel();
 }
diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c
index 0c202fdeb..4ad1810e3 100644
--- a/arch/sparc/mm/generic.c
+++ b/arch/sparc/mm/generic.c
@@ -1,4 +1,4 @@
-/* $Id: generic.c,v 1.4 1996/10/27 08:36:41 davem Exp $
+/* $Id: generic.c,v 1.5 1996/12/18 06:43:23 tridge Exp $
  * generic.c: Generic Sparc mm routines that are not dependent upon
  *            MMU type but are Sparc specific.
  *
@@ -104,7 +104,7 @@ int io_remap_page_range(unsigned long from, unsigned long offset, unsigned long
 	unsigned long beg = from;
 	unsigned long end = from + size;
 
-	pgprot_val(prot) = pg_iobits;
+	prot = __pgprot(pg_iobits);
 	offset -= from;
 	dir = pgd_offset(current->mm, from);
 	flush_cache_range(current->mm, beg, end);
diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S
new file mode 100644
index 000000000..4e5a19301
--- /dev/null
+++ b/arch/sparc/mm/hypersparc.S
@@ -0,0 +1,336 @@
+/* $Id: hypersparc.S,v 1.4 1997/04/19 04:33:39 davem Exp $
+ * hypersparc.S: High speed Hypersparc mmu/cache operations.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/pgtsrmmu.h>
+
+#define WINDOW_FLUSH(tmp1, tmp2)					\
+	mov	0, tmp1;						\
+98:	ld	[%g6 + AOFF_task_tss + AOFF_thread_uwinmask], tmp2;	\
+	orcc	%g0, tmp2, %g0;						\
+	add	tmp1, 1, tmp1;						\
+	bne	98b;							\
+	 save	%sp, -64, %sp;						\
+99:	subcc	tmp1, 1, tmp1;						\
+	bne	99b;							\
+	 restore %g0, %g0, %g0;
+
+	.text
+	.align	4
+
+	.globl	hypersparc_flush_cache_all, hypersparc_flush_cache_mm
+	.globl	hypersparc_flush_cache_range, hypersparc_flush_cache_page
+	.globl	hypersparc_flush_page_to_ram, hypersparc_flush_chunk
+	.globl	hypersparc_flush_page_for_dma, hypersparc_flush_sig_insns
+	.globl	hypersparc_flush_tlb_all, hypersparc_flush_tlb_mm
+	.globl	hypersparc_flush_tlb_range, hypersparc_flush_tlb_page
+
+hypersparc_flush_cache_all:
+	WINDOW_FLUSH(%g4, %g5)
+	sethi	%hi(vac_cache_size), %g4
+	ld	[%g4 + %lo(vac_cache_size)], %g5
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %g2
+1:	
+	subcc	%g5, %g2, %g5			! hyper_flush_unconditional_combined
+	bne	1b
+	 sta	%g0, [%g5] ASI_M_FLUSH_CTX
+	retl
+	 sta	%g0, [%g0] ASI_M_FLUSH_IWHOLE	! hyper_flush_whole_icache
+
+	/* We expand the window flush to get maximum performance. */
+hypersparc_flush_cache_mm:
+#ifndef __SMP__
+	ld	[%o0 + AOFF_mm_context], %g1
+	cmp	%g1, -1
+	be	hypersparc_flush_cache_mm_out
+#endif
+	WINDOW_FLUSH(%g4, %g5)
+
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %o1
+	sethi	%hi(vac_cache_size), %g2
+	ld	[%g2 + %lo(vac_cache_size)], %o0
+	add	%o1, %o1, %g1
+	add	%o1, %g1, %g2
+	add	%o1, %g2, %g3
+	add	%o1, %g3, %g4
+	add	%o1, %g4, %g5
+	add	%o1, %g5, %o4
+	add	%o1, %o4, %o5
+
+	/* BLAMMO! */
+1:
+	subcc	%o0, %o5, %o0				! hyper_flush_cache_user
+	sta	%g0, [%o0 + %g0] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %o1] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g1] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g2] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g3] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g4] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g5] ASI_M_FLUSH_USER
+	bne	1b
+	 sta	%g0, [%o0 + %o4] ASI_M_FLUSH_USER
+hypersparc_flush_cache_mm_out:
+	retl
+	 sta	%g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE	! hyper_flush_whole_icache
+
+	/* The things we do for performance... */
+hypersparc_flush_cache_range:
+#ifndef __SMP__
+	ld	[%o0 + AOFF_mm_context], %g1
+	cmp	%g1, -1
+	be	hypersparc_flush_cache_range_out
+#endif
+	WINDOW_FLUSH(%g4, %g5)
+
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %o4
+	sethi	%hi(vac_cache_size), %g2
+	ld	[%g2 + %lo(vac_cache_size)], %o3
+
+	/* Here comes the fun part... */
+	add	%o2, (PAGE_SIZE - 1), %o2
+	andn	%o1, (PAGE_SIZE - 1), %o1
+	add	%o4, %o4, %o5
+	andn	%o2, (PAGE_SIZE - 1), %o2
+	add	%o4, %o5, %g1
+	sub	%o2, %o1, %g4
+	add	%o4, %g1, %g2
+	sll	%o3, 2, %g5
+	add	%o4, %g2, %g3
+	cmp	%g4, %g5
+	add	%o4, %g3, %g4
+	blu	0f
+	 add	%o4, %g4, %g5
+	add	%o4, %g5, %g7
+
+	/* Flush entire user space, believe it or not this is quicker
+	 * than page at a time flushings for range > (cache_size<<2).
+	 */
+1:
+	subcc	%o3, %g7, %o3
+	sta	%g0, [%o3 + %g0] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %o4] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %o5] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %g1] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %g2] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %g3] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %g4] ASI_M_FLUSH_USER
+	bne	1b
+	 sta	%g0, [%o3 + %g5] ASI_M_FLUSH_USER
+	retl
+	 sta	%g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE
+
+	/* Below our threshold, flush one page at a time. */
+0:
+	ld	[%o0 + AOFF_mm_context], %o0
+	mov	SRMMU_CTX_REG, %g7
+	lda	[%g7] ASI_M_MMUREGS, %o3
+	sta	%o0, [%g7] ASI_M_MMUREGS
+	sethi	%hi(PAGE_SIZE), %g7		/* XXX ick, stupid stalls... */
+	sub	%o2, %g7, %o0
+1:
+	or	%o0, 0x400, %g7
+	lda	[%g7] ASI_M_FLUSH_PROBE, %g7
+	orcc	%g7, 0, %g0
+	be,a	3f
+	 mov	%o0, %o2
+	add	%o4, %g5, %g7
+2:
+	sub	%o2, %g7, %o2
+	sta	%g0, [%o2 + %g0] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %o4] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %o5] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %g1] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %g2] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %g3] ASI_M_FLUSH_PAGE
+	andcc	%o2, 0xffc, %g0
+	sta	%g0, [%o2 + %g4] ASI_M_FLUSH_PAGE
+	bne	2b
+	 sta	%g0, [%o2 + %g5] ASI_M_FLUSH_PAGE
+3:
+	sethi	%hi(PAGE_SIZE), %g7
+	cmp	%o2, %o1
+	bne	1b
+	 sub	%o2, %g7, %o0
+	mov	SRMMU_FAULT_STATUS, %g5
+	lda	[%g5] ASI_M_MMUREGS, %g0
+	mov	SRMMU_CTX_REG, %g7
+	sta	%o3, [%g7] ASI_M_MMUREGS
+hypersparc_flush_cache_range_out:
+	retl
+	 sta	%g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE
+
+	/* HyperSparc requires a valid mapping where we are about to flush
+	 * in order to check for a physical tag match during the flush.
+	 */
+	/* Verified, my ass... */
+hypersparc_flush_cache_page:
+	ld	[%o0 + 0x0], %o0		/* XXX vma->vm_mm, GROSS XXX */
+	ld	[%o0 + AOFF_mm_context], %g2
+#ifndef __SMP__
+	cmp	%g2, -1
+	be	hypersparc_flush_cache_page_out
+#endif
+	WINDOW_FLUSH(%g4, %g5)
+
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %o4
+	mov	SRMMU_CTX_REG, %o3
+	andn	%o1, (PAGE_SIZE - 1), %o1
+	lda	[%o3] ASI_M_MMUREGS, %o2
+	sta	%g2, [%o3] ASI_M_MMUREGS
+	or	%o1, 0x400, %o5
+	lda	[%o5] ASI_M_FLUSH_PROBE, %g1
+	orcc	%g0, %g1, %g0
+	sethi	%hi(PAGE_SIZE), %g7
+	be	2f
+	 add	%o4, %o4, %o5
+	add	%o1, %g7, %o1
+	add	%o4, %o5, %g1
+	add	%o4, %g1, %g2
+	add	%o4, %g2, %g3
+	add	%o4, %g3, %g4
+	add	%o4, %g4, %g5
+	add	%o4, %g5, %g7
+
+	/* BLAMMO! */
+1:
+	sub	%o1, %g7, %o1
+	sta	%g0, [%o1 + %g0] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o4] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o5] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g1] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g2] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
+	andcc	%o1, 0xffc, %g0
+	sta	%g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
+	bne	1b
+	 sta	%g0, [%o1 + %g5] ASI_M_FLUSH_PAGE
+2:
+	mov	SRMMU_FAULT_STATUS, %g7
+	mov	SRMMU_CTX_REG, %g4
+	lda	[%g7] ASI_M_MMUREGS, %g0
+	sta	%o2, [%g4] ASI_M_MMUREGS
+hypersparc_flush_cache_page_out:
+	retl
+	 sta	%g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE
+
+hypersparc_flush_sig_insns:
+	flush	%o2
+	retl
+	 flush	%o2 + 4
+
+	/* HyperSparc is copy-back. */
+hypersparc_flush_page_to_ram:
+hypersparc_flush_chunk:
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %o4
+	andn	%o0, (PAGE_SIZE - 1), %o0
+	add	%o4, %o4, %o5
+	or	%o0, 0x400, %g7
+	lda	[%g7] ASI_M_FLUSH_PROBE, %g5
+	add	%o4, %o5, %g1
+	orcc	%g5, 0, %g0
+	be	2f
+	 add	%o4, %g1, %g2
+	sethi	%hi(PAGE_SIZE), %g5
+	add	%o4, %g2, %g3
+	add	%o0, %g5, %o0
+	add	%o4, %g3, %g4
+	add	%o4, %g4, %g5
+	add	%o4, %g5, %g7
+
+	/* BLAMMO! */
+1:
+	sub	%o0, %g7, %o0
+	sta	%g0, [%o0 + %g0] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %o4] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %o5] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %g1] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %g2] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %g3] ASI_M_FLUSH_PAGE
+	andcc	%o0, 0xffc, %g0
+	sta	%g0, [%o0 + %g4] ASI_M_FLUSH_PAGE
+	bne	1b
+	 sta	%g0, [%o0 + %g5] ASI_M_FLUSH_PAGE
+2:
+	mov	SRMMU_FAULT_STATUS, %g1
+	retl
+	 lda	[%g1] ASI_M_MMUREGS, %g0
+
+	/* HyperSparc is IO cache coherent. */
+hypersparc_flush_page_for_dma:
+	retl
+	 nop
+
+	/* It was noted that at boot time a TLB flush all in a delay slot
+	 * can deliver an illegal instruction to the processor if the timing
+	 * is just right...
+	 */
+hypersparc_flush_tlb_all:
+	mov	0x400, %g1
+	sta	%g0, [%g1] ASI_M_FLUSH_PROBE
+	retl
+	 nop
+
+hypersparc_flush_tlb_mm:
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o1
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef __SMP__
+	cmp	%o1, -1
+	be	hypersparc_flush_tlb_mm_out
+#endif
+	mov	0x300, %g2
+	sta	%o1, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%g2] ASI_M_FLUSH_PROBE
+hypersparc_flush_tlb_mm_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+hypersparc_flush_tlb_range:
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef __SMP__
+	cmp	%o3, -1
+	be	hypersparc_flush_tlb_range_out
+#endif
+	srl	%o1, SRMMU_PGDIR_SHIFT, %o1
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sll	%o1, SRMMU_PGDIR_SHIFT, %o1
+	sethi	%hi(1 << SRMMU_PGDIR_SHIFT), %o4
+	add	%o1, 0x200, %o1
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+1:
+	add	%o1, %o4, %o1
+	cmp	%o1, %o2
+	blu,a	1b
+	 sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+hypersparc_flush_tlb_range_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+hypersparc_flush_tlb_page:
+	ld	[%o0 + 0x00], %o0	/* XXX vma->vm_mm GROSS XXX */
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	andn	%o1, (PAGE_SIZE - 1), %o1
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef __SMP__
+	cmp	%o3, -1
+	be	hypersparc_flush_tlb_page_out
+#endif
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+hypersparc_flush_tlb_page_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c
index 41ac6c194..3dd0e470f 100644
--- a/arch/sparc/mm/init.c
+++ b/arch/sparc/mm/init.c
@@ -1,4 +1,4 @@
-/*  $Id: init.c,v 1.42 1996/10/27 08:36:44 davem Exp $
+/*  $Id: init.c,v 1.49 1997/04/17 21:49:31 jj Exp $
  *  linux/arch/sparc/mm/init.c
  *
  *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -20,6 +20,7 @@
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/blk.h>
 #endif
+#include <linux/init.h>
 
 #include <asm/system.h>
 #include <asm/segment.h>
@@ -33,6 +34,9 @@ extern void show_net_buffers(void);
 struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
 unsigned long sparc_unmapped_base;
 
+/* References to section boundaries */
+extern char __init_begin, __init_end, etext;
+
 /*
  * BAD_PAGE is the page that is used for page faults when linux
  * is out-of-memory. Older versions of linux just did a
@@ -71,10 +75,10 @@ void show_mem(void)
 		total++;
 		if (PageReserved(mem_map + i))
 			reserved++;
-		else if (!mem_map[i].count)
+		else if (!atomic_read(&mem_map[i].count))
 			free++;
 		else
-			shared += mem_map[i].count-1;
+			shared += atomic_read(&mem_map[i].count) - 1;
 	}
 	printk("%d pages of RAM\n",total);
 	printk("%d free pages\n",free);
@@ -88,7 +92,7 @@ void show_mem(void)
 
 extern pgprot_t protection_map[16];
 
-unsigned long sparc_context_init(unsigned long start_mem, int numctx)
+__initfunc(unsigned long sparc_context_init(unsigned long start_mem, int numctx))
 {
 	int ctx;
 
@@ -117,7 +121,8 @@ extern unsigned long sun4c_paging_init(unsigned long, unsigned long);
 extern unsigned long srmmu_paging_init(unsigned long, unsigned long);
 extern unsigned long device_scan(unsigned long);
 
-unsigned long paging_init(unsigned long start_mem, unsigned long end_mem)
+__initfunc(unsigned long 
+paging_init(unsigned long start_mem, unsigned long end_mem))
 {
 	switch(sparc_cpu_model) {
 	case sun4c:
@@ -130,6 +135,14 @@ unsigned long paging_init(unsigned long start_mem, unsigned long end_mem)
 		start_mem = srmmu_paging_init(start_mem, end_mem);
 		sparc_unmapped_base = 0x50000000;
 		break;
+
+	case ap1000:
+#if CONFIG_AP1000
+		start_mem = apmmu_paging_init(start_mem, end_mem);
+		sparc_unmapped_base = 0x50000000;
+		break;
+#endif
+
 	default:
 		prom_printf("paging_init: Cannot init paging on this Sparc\n");
 		prom_printf("paging_init: sparc_cpu_model = %d\n", sparc_cpu_model);
@@ -166,7 +179,7 @@ extern void srmmu_frob_mem_map(unsigned long);
 
 int physmem_mapped_contig = 1;
 
-static void taint_real_pages(unsigned long start_mem, unsigned long end_mem)
+__initfunc(static void taint_real_pages(unsigned long start_mem, unsigned long end_mem))
 {
 	unsigned long addr, tmp2 = 0;
 
@@ -194,12 +207,12 @@ static void taint_real_pages(unsigned long start_mem, unsigned long end_mem)
 	}
 }
 
-void mem_init(unsigned long start_mem, unsigned long end_mem)
+__initfunc(void mem_init(unsigned long start_mem, unsigned long end_mem))
 {
 	int codepages = 0;
 	int datapages = 0;
+	int initpages = 0; 
 	unsigned long tmp2, addr;
-	extern char etext;
 
 	/* Saves us work later. */
 	memset((void *) ZERO_PAGE, 0, PAGE_SIZE);
@@ -209,6 +222,7 @@ void mem_init(unsigned long start_mem, unsigned long end_mem)
 	high_memory = (void *) end_mem;
 
 	start_mem = PAGE_ALIGN(start_mem);
+	num_physpages = (start_mem - KERNBASE) >> PAGE_SHIFT;
 
 	addr = KERNBASE;
 	while(addr < start_mem) {
@@ -226,11 +240,14 @@ void mem_init(unsigned long start_mem, unsigned long end_mem)
 		if(PageReserved(mem_map + MAP_NR(addr))) {
 			if ((addr < (unsigned long) &etext) && (addr >= KERNBASE))
 				codepages++;
-			else if((addr < start_mem) && (addr >= KERNBASE))
+                        else if((addr >= (unsigned long)&__init_begin && addr < (unsigned long)&__init_end))
+                                initpages++;
+                        else if((addr < start_mem) && (addr >= KERNBASE))
 				datapages++;
 			continue;
 		}
-		mem_map[MAP_NR(addr)].count = 1;
+		atomic_set(&mem_map[MAP_NR(addr)].count, 1);
+		num_physpages++;
 #ifdef CONFIG_BLK_DEV_INITRD
 		if (!initrd_start ||
 		    (addr < initrd_start || addr >= initrd_end))
@@ -240,10 +257,12 @@ void mem_init(unsigned long start_mem, unsigned long end_mem)
 
 	tmp2 = nr_free_pages << PAGE_SHIFT;
 
-	printk("Memory: %luk available (%dk kernel code, %dk data) [%08lx,%08lx]\n",
+	printk("Memory: %luk available (%dk kernel code, %dk data, %dk init) [%08lx,%08lx]\n",
 	       tmp2 >> 10,
 	       codepages << (PAGE_SHIFT-10),
-	       datapages << (PAGE_SHIFT-10), PAGE_OFFSET, end_mem);
+	       datapages << (PAGE_SHIFT-10), 
+	       initpages << (PAGE_SHIFT-10),
+	       PAGE_OFFSET, end_mem);
 
 	min_free_pages = nr_free_pages >> 7;
 	if(min_free_pages < 16)
@@ -252,6 +271,18 @@ void mem_init(unsigned long start_mem, unsigned long end_mem)
 	free_pages_high = min_free_pages + min_free_pages;
 }
 
+void free_initmem (void)
+{
+	unsigned long addr;
+	
+	addr = (unsigned long)(&__init_begin);
+	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+		mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved);
+		atomic_set(&mem_map[MAP_NR(addr)].count, 1);
+		free_page(addr);
+	}
+}
+
 void si_meminfo(struct sysinfo *val)
 {
 	int i;
@@ -265,9 +296,9 @@ void si_meminfo(struct sysinfo *val)
 		if (PageReserved(mem_map + i))
 			continue;
 		val->totalram++;
-		if (!mem_map[i].count)
+		if (!atomic_read(&mem_map[i].count))
 			continue;
-		val->sharedram += mem_map[i].count-1;
+		val->sharedram += atomic_read(&mem_map[i].count) - 1;
 	}
 	val->totalram <<= PAGE_SHIFT;
 	val->sharedram <<= PAGE_SHIFT;
diff --git a/arch/sparc/mm/loadmmu.c b/arch/sparc/mm/loadmmu.c
index ac1ecd790..10eebecce 100644
--- a/arch/sparc/mm/loadmmu.c
+++ b/arch/sparc/mm/loadmmu.c
@@ -1,4 +1,4 @@
-/* $Id: loadmmu.c,v 1.36 1996/10/27 08:36:46 davem Exp $
+/* $Id: loadmmu.c,v 1.46 1997/04/10 05:12:51 davem Exp $
  * loadmmu.c:  This code loads up all the mm function pointers once the
  *             machine type has been determined.  It also sets the static
  *             mmu values such as PAGE_NONE, etc.
@@ -8,12 +8,17 @@
 
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/config.h>
 
 #include <asm/system.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
+#include <asm/a.out.h>
+#include <asm/mmu_context.h>
 
 unsigned long page_offset = 0xf0000000;
+unsigned long stack_top = 0xf0000000 - PAGE_SIZE;
 
 struct ctx_list *ctx_list_pool;
 struct ctx_list ctx_free;
@@ -26,8 +31,8 @@ void (*free_task_struct)(struct task_struct *tsk);
 
 void (*quick_kernel_fault)(unsigned long);
 
-void (*mmu_exit_hook)(void);
-void (*mmu_flush_hook)(void);
+void (*init_new_context)(struct mm_struct *mm);
+void (*destroy_context)(struct mm_struct *mm);
 
 /* translate between physical and virtual addresses */
 unsigned long (*mmu_v2p)(unsigned long);
@@ -36,9 +41,9 @@ unsigned long (*mmu_p2v)(unsigned long);
 char *(*mmu_lockarea)(char *, unsigned long);
 void  (*mmu_unlockarea)(char *, unsigned long);
 
-char *(*mmu_get_scsi_one)(char *, unsigned long, struct linux_sbus *sbus);
+__u32 (*mmu_get_scsi_one)(char *, unsigned long, struct linux_sbus *sbus);
 void  (*mmu_get_scsi_sgl)(struct mmu_sglist *, int, struct linux_sbus *sbus);
-void  (*mmu_release_scsi_one)(char *, unsigned long, struct linux_sbus *sbus);
+void  (*mmu_release_scsi_one)(__u32, unsigned long, struct linux_sbus *sbus);
 void  (*mmu_release_scsi_sgl)(struct mmu_sglist *, int, struct linux_sbus *sbus);
 
 void  (*mmu_map_dma_area)(unsigned long addr, int len);
@@ -58,6 +63,7 @@ void (*local_flush_tlb_range)(struct mm_struct *, unsigned long start,
 			      unsigned long end);
 void (*local_flush_tlb_page)(struct vm_area_struct *, unsigned long address);
 void (*local_flush_page_to_ram)(unsigned long address);
+void (*local_flush_sig_insns)(struct mm_struct *mm, unsigned long insn_addr);
 #endif
 
 void (*flush_cache_all)(void);
@@ -74,6 +80,8 @@ void (*flush_tlb_page)(struct vm_area_struct *, unsigned long address);
 
 void (*flush_page_to_ram)(unsigned long page);
 
+void (*flush_sig_insns)(struct mm_struct *mm, unsigned long insn_addr);
+
 void (*set_pte)(pte_t *pteptr, pte_t pteval);
 
 unsigned int pmd_shift, pmd_size, pmd_mask;
@@ -145,8 +153,7 @@ char *(*mmu_info)(void);
 extern void ld_mmu_sun4c(void);
 extern void ld_mmu_srmmu(void);
 
-void
-load_mmu(void)
+__initfunc(void load_mmu(void))
 {
 	switch(sparc_cpu_model) {
 	case sun4c:
@@ -156,6 +163,11 @@ load_mmu(void)
 	case sun4d:
 		ld_mmu_srmmu();
 		break;
+	case ap1000:
+#if CONFIG_AP1000
+		ld_mmu_apmmu();
+		break;
+#endif
 	default:
 		printk("load_mmu:MMU support not available for this architecture\n");
 		printk("load_mmu:sparc_cpu_model = %d\n", (int) sparc_cpu_model);
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 7d9b653df..9d3afdbdf 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -1,4 +1,4 @@
-/* $Id: srmmu.c,v 1.103 1996/10/31 06:28:35 davem Exp $
+/* $Id: srmmu.c,v 1.136 1997/04/20 14:11:51 ecd Exp $
  * srmmu.c:  SRMMU specific routines for memory management.
  *
  * Copyright (C) 1995 David S. Miller  (davem@caip.rutgers.edu)
@@ -10,6 +10,8 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/malloc.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -25,6 +27,8 @@
 #include <asm/iommu.h>
 #include <asm/asi.h>
 #include <asm/msi.h>
+#include <asm/a.out.h>
+#include <asm/mmu_context.h>
 
 /* Now the cpu specific definitions. */
 #include <asm/viking.h>
@@ -42,25 +46,20 @@ int vac_badbits;
 extern unsigned long sparc_iobase_vaddr;
 
 #ifdef __SMP__
-extern void smp_capture(void);
-extern void smp_release(void);
+#define FLUSH_BEGIN(mm)
+#define FLUSH_END
 #else
-#define smp_capture()
-#define smp_release()
-#endif /* !(__SMP__) */
-
-/* #define USE_CHUNK_ALLOC 1 */
+#define FLUSH_BEGIN(mm) if((mm)->context != NO_CONTEXT) {
+#define FLUSH_END	}
+#endif
 
 static void (*ctxd_set)(ctxd_t *ctxp, pgd_t *pgdp);
 static void (*pmd_set)(pmd_t *pmdp, pte_t *ptep);
 
 static void (*flush_page_for_dma)(unsigned long page);
-static void (*flush_cache_page_to_uncache)(unsigned long page);
-static void (*flush_tlb_page_for_cbit)(unsigned long page);
+static void (*flush_chunk)(unsigned long chunk);
 #ifdef __SMP__
 static void (*local_flush_page_for_dma)(unsigned long page);
-static void (*local_flush_cache_page_to_uncache)(unsigned long page);
-static void (*local_flush_tlb_page_for_cbit)(unsigned long page);
 #endif
 
 static struct srmmu_stats {
@@ -75,7 +74,10 @@ static char *srmmu_name;
 ctxd_t *srmmu_ctx_table_phys;
 ctxd_t *srmmu_context_table;
 
-static struct srmmu_trans {
+/* Don't change this without changing access to this
+ * in arch/sparc/mm/viking.S
+ */
+struct srmmu_trans {
 	unsigned long vbase;
 	unsigned long pbase;
 	unsigned long size;
@@ -144,22 +146,12 @@ static inline unsigned long srmmu_p2v(unsigned long paddr)
  */
 static inline unsigned long srmmu_swap(unsigned long *addr, unsigned long value)
 {
-#if MEM_BUS_SPACE
-  /* the AP1000 has its memory on bus 8, not 0 like suns do */
-  if (!(value&KERNBASE))
-    value |= MEM_BUS_SPACE<<28;
-  if (value == MEM_BUS_SPACE<<28) value = 0;
-#endif
-	__asm__ __volatile__("swap [%2], %0\n\t" :
-			     "=&r" (value) :
-			     "0" (value), "r" (addr));
+	__asm__ __volatile__("swap [%2], %0" : "=&r" (value) : "0" (value), "r" (addr));
 	return value;
 }
 
 /* Functions really use this, not srmmu_swap directly. */
-#define srmmu_set_entry(ptr, newentry) \
-        srmmu_swap((unsigned long *) (ptr), (newentry))
-
+#define srmmu_set_entry(ptr, newentry) srmmu_swap((unsigned long *) (ptr), (newentry))
 
 /* The very generic SRMMU page table operations. */
 static unsigned int srmmu_pmd_align(unsigned int addr) { return SRMMU_PMD_ALIGN(addr); }
@@ -170,27 +162,29 @@ static unsigned long srmmu_vmalloc_start(void)
 	return SRMMU_VMALLOC_START;
 }
 
+static inline int srmmu_device_memory(unsigned long x) 
+{
+	return ((x & 0xF0000000) != 0);
+}
+
 static unsigned long srmmu_pgd_page(pgd_t pgd)
-{ return srmmu_p2v((pgd_val(pgd) & SRMMU_PTD_PMASK) << 4); }
+{ return srmmu_device_memory(pgd_val(pgd))?~0:srmmu_p2v((pgd_val(pgd) & SRMMU_PTD_PMASK) << 4); }
 
 static unsigned long srmmu_pmd_page(pmd_t pmd)
-{ return srmmu_p2v((pmd_val(pmd) & SRMMU_PTD_PMASK) << 4); }
-
-static inline int srmmu_device_memory(pte_t pte) 
-{
-	return (pte_val(pte)>>28) != MEM_BUS_SPACE;
-}
+{ return srmmu_device_memory(pmd_val(pmd))?~0:srmmu_p2v((pmd_val(pmd) & SRMMU_PTD_PMASK) << 4); }
 
 static unsigned long srmmu_pte_page(pte_t pte)
-{ return srmmu_device_memory(pte)?~0:srmmu_p2v((pte_val(pte) & SRMMU_PTE_PMASK) << 4); }
+{ return srmmu_device_memory(pte_val(pte))?~0:srmmu_p2v((pte_val(pte) & SRMMU_PTE_PMASK) << 4); }
 
-static int srmmu_pte_none(pte_t pte)          { return !pte_val(pte); }
+static int srmmu_pte_none(pte_t pte)          
+{ return !(pte_val(pte) & 0xFFFFFFF); }
 static int srmmu_pte_present(pte_t pte)
 { return ((pte_val(pte) & SRMMU_ET_MASK) == SRMMU_ET_PTE); }
 
 static void srmmu_pte_clear(pte_t *ptep)      { set_pte(ptep, __pte(0)); }
 
-static int srmmu_pmd_none(pmd_t pmd)          { return !pmd_val(pmd); }
+static int srmmu_pmd_none(pmd_t pmd)          
+{ return !(pmd_val(pmd) & 0xFFFFFFF); }
 static int srmmu_pmd_bad(pmd_t pmd)
 { return (pmd_val(pmd) & SRMMU_ET_MASK) != SRMMU_ET_PTD; }
 
@@ -199,7 +193,9 @@ static int srmmu_pmd_present(pmd_t pmd)
 
 static void srmmu_pmd_clear(pmd_t *pmdp)      { set_pte((pte_t *)pmdp, __pte(0)); }
 
-static int srmmu_pgd_none(pgd_t pgd)          { return !pgd_val(pgd); }
+static int srmmu_pgd_none(pgd_t pgd)          
+{ return !(pgd_val(pgd) & 0xFFFFFFF); }
+
 static int srmmu_pgd_bad(pgd_t pgd)
 { return (pgd_val(pgd) & SRMMU_ET_MASK) != SRMMU_ET_PTD; }
 
@@ -212,28 +208,26 @@ static int srmmu_pte_write(pte_t pte)         { return pte_val(pte) & SRMMU_WRIT
 static int srmmu_pte_dirty(pte_t pte)         { return pte_val(pte) & SRMMU_DIRTY; }
 static int srmmu_pte_young(pte_t pte)         { return pte_val(pte) & SRMMU_REF; }
 
-static pte_t srmmu_pte_wrprotect(pte_t pte)   { pte_val(pte) &= ~SRMMU_WRITE; return pte;}
-static pte_t srmmu_pte_mkclean(pte_t pte)     { pte_val(pte) &= ~SRMMU_DIRTY; return pte; }
-static pte_t srmmu_pte_mkold(pte_t pte)       { pte_val(pte) &= ~SRMMU_REF; return pte; }
-static pte_t srmmu_pte_mkwrite(pte_t pte)     { pte_val(pte) |= SRMMU_WRITE; return pte; }
-static pte_t srmmu_pte_mkdirty(pte_t pte)     { pte_val(pte) |= SRMMU_DIRTY; return pte; }
-static pte_t srmmu_pte_mkyoung(pte_t pte)     { pte_val(pte) |= SRMMU_REF; return pte; }
+static pte_t srmmu_pte_wrprotect(pte_t pte)   { return __pte(pte_val(pte) & ~SRMMU_WRITE);}
+static pte_t srmmu_pte_mkclean(pte_t pte)     { return __pte(pte_val(pte) & ~SRMMU_DIRTY);}
+static pte_t srmmu_pte_mkold(pte_t pte)       { return __pte(pte_val(pte) & ~SRMMU_REF);}
+static pte_t srmmu_pte_mkwrite(pte_t pte)     { return __pte(pte_val(pte) | SRMMU_WRITE);}
+static pte_t srmmu_pte_mkdirty(pte_t pte)     { return __pte(pte_val(pte) | SRMMU_DIRTY);}
+static pte_t srmmu_pte_mkyoung(pte_t pte)     { return __pte(pte_val(pte) | SRMMU_REF);}
 
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
  */
 static pte_t srmmu_mk_pte(unsigned long page, pgprot_t pgprot)
-{ pte_t pte; pte_val(pte) = ((srmmu_v2p(page)) >> 4) | pgprot_val(pgprot); return pte; }
+{ return __pte(((srmmu_v2p(page)) >> 4) | pgprot_val(pgprot)); }
 
 static pte_t srmmu_mk_pte_phys(unsigned long page, pgprot_t pgprot)
-{ pte_t pte; pte_val(pte) = ((page) >> 4) | pgprot_val(pgprot); return pte; }
+{ return __pte(((page) >> 4) | pgprot_val(pgprot)); }
 
 static pte_t srmmu_mk_pte_io(unsigned long page, pgprot_t pgprot, int space)
 {
-	pte_t pte;
-	pte_val(pte) = ((page) >> 4) | (space << 28) | pgprot_val(pgprot);
-	return pte;
+	return __pte(((page) >> 4) | (space << 28) | pgprot_val(pgprot));
 }
 
 static void srmmu_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp)
@@ -253,8 +247,7 @@ static void srmmu_pmd_set(pmd_t * pmdp, pte_t * ptep)
 
 static pte_t srmmu_pte_modify(pte_t pte, pgprot_t newprot)
 {
-	pte_val(pte) = (pte_val(pte) & SRMMU_CHG_MASK) | pgprot_val(newprot);
-	return pte;
+	return __pte((pte_val(pte) & SRMMU_CHG_MASK) | pgprot_val(newprot));
 }
 
 /* to find an entry in a top-level page table... */
@@ -279,68 +272,17 @@ static pte_t *srmmu_pte_offset(pmd_t * dir, unsigned long address)
 static void srmmu_update_rootmmu_dir(struct task_struct *tsk, pgd_t *pgdp) 
 {
 	if(tsk->mm->context != NO_CONTEXT) {
-		flush_cache_mm(current->mm);
+		flush_cache_mm(tsk->mm);
 		ctxd_set(&srmmu_context_table[tsk->mm->context], pgdp);
-		flush_tlb_mm(current->mm);
+		flush_tlb_mm(tsk->mm);
 	}
 }
 
-static inline void srmmu_uncache_page(unsigned long addr)
-{
-	pgd_t *pgdp = srmmu_pgd_offset(init_task.mm, addr);
-	pmd_t *pmdp;
-	pte_t *ptep;
-
-	if((pgd_val(*pgdp) & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
-		ptep = (pte_t *) pgdp;
-	} else {
-		pmdp = srmmu_pmd_offset(pgdp, addr);
-		if((pmd_val(*pmdp) & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
-			ptep = (pte_t *) pmdp;
-		} else {
-			ptep = srmmu_pte_offset(pmdp, addr);
-		}
-	}
-
-	flush_cache_page_to_uncache(addr);
-	set_pte(ptep, __pte((pte_val(*ptep) & ~SRMMU_CACHE)));
-	flush_tlb_page_for_cbit(addr);
-}
-
-static inline void srmmu_recache_page(unsigned long addr)
-{
-	pgd_t *pgdp = srmmu_pgd_offset(init_task.mm, addr);
-	pmd_t *pmdp;
-	pte_t *ptep;
-
-	if((pgd_val(*pgdp) & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
-		ptep = (pte_t *) pgdp;
-	} else {
-		pmdp = srmmu_pmd_offset(pgdp, addr);
-		if((pmd_val(*pmdp) & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
-			ptep = (pte_t *) pmdp;
-		} else {
-			ptep = srmmu_pte_offset(pmdp, addr);
-		}
-	}
-	set_pte(ptep, __pte((pte_val(*ptep) | SRMMU_CACHE)));
-	flush_tlb_page_for_cbit(addr);
-}
-
-static inline unsigned long srmmu_getpage(void)
-{
-	unsigned long page = get_free_page(GFP_KERNEL);
-
-	return page;
-}
-
 static inline void srmmu_putpage(unsigned long page)
 {
 	free_page(page);
 }
 
-#ifdef USE_CHUNK_ALLOC
-
 #define LC_HIGH_WATER	128
 #define BC_HIGH_WATER	32
 
@@ -368,7 +310,7 @@ static int garbage_calls = 0;
 
 #define OTHER_PAGE(p,q)	(((unsigned long)(p) ^ (unsigned long)(q)) & PAGE_MASK)
 
-static inline int garbage_collect(unsigned long **cnks, int n, int cpp)
+static int garbage_collect(unsigned long **cnks, int n, int cpp)
 {
 	struct chunk *root = (struct chunk *)*cnks;
 	struct chunk *p, *q, *curr, *next;
@@ -464,8 +406,7 @@ static inline int garbage_collect(unsigned long **cnks, int n, int cpp)
 	return water;
 }
 
-
-static inline unsigned long *get_small_chunk(void)
+static unsigned long *get_small_chunk(void)
 {
 	unsigned long *rval;
 	unsigned long flags;
@@ -507,6 +448,7 @@ static inline unsigned long *get_small_chunk(void)
 	lcjiffies = jiffies;
 	restore_flags(flags);
 	memset(rval, 0, 256);
+	flush_chunk((unsigned long)rval);
 	return rval;
 }
 
@@ -526,7 +468,7 @@ static inline void free_small_chunk(unsigned long *it)
 	restore_flags(flags);
 }
 
-static inline unsigned long *get_big_chunk(void)
+static unsigned long *get_big_chunk(void)
 {
 	unsigned long *rval;
 	unsigned long flags;
@@ -556,6 +498,7 @@ static inline unsigned long *get_big_chunk(void)
 	bcjiffies = jiffies;
 	restore_flags(flags);
 	memset(rval, 0, 1024);
+	flush_chunk((unsigned long)rval);
 	return rval;
 }
 
@@ -582,18 +525,6 @@ static inline void free_big_chunk(unsigned long *it)
 #define FREE_PMD(chunk) free_small_chunk((unsigned long *)(chunk))
 #define FREE_PTE(chunk) free_small_chunk((unsigned long *)(chunk))
 
-#else
-
-/* The easy versions. */
-#define NEW_PGD() (pgd_t *) srmmu_getpage()
-#define NEW_PMD() (pmd_t *) srmmu_getpage()
-#define NEW_PTE() (pte_t *) srmmu_getpage()
-#define FREE_PGD(chunk) srmmu_putpage((unsigned long)(chunk))
-#define FREE_PMD(chunk) srmmu_putpage((unsigned long)(chunk))
-#define FREE_PTE(chunk) srmmu_putpage((unsigned long)(chunk))
-
-#endif
-
 /*
  * Allocate and free page tables. The xxx_kernel() versions are
  * used to allocate a kernel page table - this turns on ASN bits
@@ -730,12 +661,23 @@ static void srmmu_set_pte_cacheable(pte_t *ptep, pte_t pteval)
 
 static void srmmu_set_pte_nocache_hyper(pte_t *ptep, pte_t pteval)
 {
-	unsigned long flags;
+	unsigned long page = ((unsigned long)ptep) & PAGE_MASK;
 
-	save_and_cli(flags);
 	srmmu_set_entry(ptep, pte_val(pteval));
-	hyper_flush_cache_page(((unsigned long)ptep) & PAGE_MASK);
-	restore_flags(flags);
+	__asm__ __volatile__("
+	lda	[%0] %2, %%g4
+	orcc	%%g4, 0x0, %%g0
+	be	2f
+	 sethi	%%hi(%7), %%g5
+1:	subcc	%%g5, %6, %%g5		! hyper_flush_cache_page
+	bne	1b
+	 sta	%%g0, [%1 + %%g5] %3
+	lda	[%4] %5, %%g0
+2:"	: /* no outputs */
+	: "r" (page | 0x400), "r" (page), "i" (ASI_M_FLUSH_PROBE),
+	  "i" (ASI_M_FLUSH_PAGE), "r" (SRMMU_FAULT_STATUS), "i" (ASI_M_MMUREGS),
+	  "r" (vac_line_size), "i" (PAGE_SIZE)
+	: "g4", "g5", "cc");
 }
 
 static void srmmu_set_pte_nocache_cypress(pte_t *ptep, pte_t pteval)
@@ -766,16 +708,15 @@ static void srmmu_set_pte_nocache_cypress(pte_t *ptep, pte_t pteval)
 	} while(line != page);
 }
 
-static void srmmu_set_pte_nocache_nomxccvik(pte_t *ptep, pte_t pteval)
+static void srmmu_set_pte_nocache_viking(pte_t *ptep, pte_t pteval)
 {
-	unsigned long paddr = srmmu_v2p(((unsigned long)ptep));
 	unsigned long vaddr;
 	int set;
 	int i;
 
-	set = (paddr >> 5) & 0x7f;
+	set = ((unsigned long)ptep >> 5) & 0x7f;
 	vaddr = (KERNBASE + PAGE_SIZE) | (set << 5);
-	srmmu_set_entry(ptep, pteval);
+	srmmu_set_entry(ptep, pte_val(pteval));
 	for (i = 0; i < 8; i++) {
 		__asm__ __volatile__ ("ld [%0], %%g0" : : "r" (vaddr));
 		vaddr += PAGE_SIZE;
@@ -795,18 +736,10 @@ static void srmmu_quick_kernel_fault(unsigned long address)
 #endif
 }
 
-static inline void alloc_context(struct task_struct *tsk)
+static inline void alloc_context(struct mm_struct *mm)
 {
-	struct mm_struct *mm = tsk->mm;
 	struct ctx_list *ctxp;
 
-#if CONFIG_AP1000
-        if (tsk->taskid >= MPP_TASK_BASE) {
-		mm->context = MPP_CONTEXT_BASE + (tsk->taskid - MPP_TASK_BASE);
-		return;
-	}
-#endif
-
 	ctxp = ctx_free.next;
 	if(ctxp != &ctx_free) {
 		remove_from_ctx_list(ctxp);
@@ -833,11 +766,6 @@ static inline void free_context(int context)
 {
 	struct ctx_list *ctx_old;
 
-#if CONFIG_AP1000
-	if (context >= MPP_CONTEXT_BASE)
-		return; /* nothing to do! */
-#endif
-	
 	ctx_old = ctx_list_pool + context;
 	remove_from_ctx_list(ctx_old);
 	add_to_free_ctxlist(ctx_old);
@@ -847,14 +775,26 @@ static inline void free_context(int context)
 static void srmmu_switch_to_context(struct task_struct *tsk)
 {
 	if(tsk->mm->context == NO_CONTEXT) {
-		alloc_context(tsk);
-		flush_cache_mm(current->mm);
+		alloc_context(tsk->mm);
+		flush_cache_mm(tsk->mm);
 		ctxd_set(&srmmu_context_table[tsk->mm->context], tsk->mm->pgd);
-		flush_tlb_mm(current->mm);
+		flush_tlb_mm(tsk->mm);
 	}
 	srmmu_set_context(tsk->mm->context);
 }
 
+static void srmmu_init_new_context(struct mm_struct *mm)
+{
+	alloc_context(mm);
+
+	flush_cache_mm(mm);
+	ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
+	flush_tlb_mm(mm);
+
+	if(mm == current->mm)
+		srmmu_set_context(mm->context);
+}
+
 /* Low level IO area allocation on the SRMMU. */
 void srmmu_mapioaddr(unsigned long physaddr, unsigned long virt_addr, int bus_type, int rdonly)
 {
@@ -879,7 +819,7 @@ void srmmu_mapioaddr(unsigned long physaddr, unsigned long virt_addr, int bus_ty
 	else
 		tmp |= SRMMU_PRIV;
 	flush_page_to_ram(virt_addr);
-	set_pte(ptep, tmp);
+	set_pte(ptep, __pte(tmp));
 	flush_tlb_all();
 }
 
@@ -894,7 +834,7 @@ void srmmu_unmapioaddr(unsigned long virt_addr)
 	ptep = srmmu_pte_offset(pmdp, virt_addr);
 
 	/* No need to flush uncacheable page. */
-	set_pte(ptep, pte_val(srmmu_mk_pte((unsigned long) EMPTY_PGE, PAGE_SHARED)));
+	set_pte(ptep, srmmu_mk_pte((unsigned long) EMPTY_PGE, PAGE_SHARED));
 	flush_tlb_all();
 }
 
@@ -907,6 +847,9 @@ static void srmmu_unlockarea(char *vaddr, unsigned long len)
 {
 }
 
+/* This is used in many routines below. */
+#define UWINMASK_OFFSET (const unsigned long)(&(((struct task_struct *)0)->tss.uwinmask))
+
 /* On the SRMMU we do not have the problems with limited tlb entries
  * for mapping kernel pages, so we just take things from the free page
  * pool.  As a side effect we are putting a little too much pressure
@@ -922,7 +865,12 @@ struct task_struct *srmmu_alloc_task_struct(void)
 
 unsigned long srmmu_alloc_kernel_stack(struct task_struct *tsk)
 {
-	return __get_free_pages(GFP_KERNEL, 1, 0);
+	unsigned long kstk = __get_free_pages(GFP_KERNEL, 1, 0);
+
+	if(!kstk)
+		kstk = (unsigned long) vmalloc(PAGE_SIZE << 1);
+
+	return kstk;
 }
 
 static void srmmu_free_task_struct(struct task_struct *tsk)
@@ -932,7 +880,10 @@ static void srmmu_free_task_struct(struct task_struct *tsk)
 
 static void srmmu_free_kernel_stack(unsigned long stack)
 {
-	free_pages(stack, 1);
+	if(stack < VMALLOC_START)
+		free_pages(stack, 1);
+	else
+		vfree((char *)stack);
 }
 
 /* Tsunami flushes.  It's page level tlb invalidation is not very
@@ -948,47 +899,29 @@ static void tsunami_flush_cache_all(void)
 
 static void tsunami_flush_cache_mm(struct mm_struct *mm)
 {
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		tsunami_flush_icache();
-		tsunami_flush_dcache();
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_BEGIN(mm)
+	flush_user_windows();
+	tsunami_flush_icache();
+	tsunami_flush_dcache();
+	FLUSH_END
 }
 
 static void tsunami_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end)
 {
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		tsunami_flush_icache();
-		tsunami_flush_dcache();
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_BEGIN(mm)
+	flush_user_windows();
+	tsunami_flush_icache();
+	tsunami_flush_dcache();
+	FLUSH_END
 }
 
 static void tsunami_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
 {
-#ifndef __SMP__
-	struct mm_struct *mm = vma->vm_mm;
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		tsunami_flush_icache();
-		tsunami_flush_dcache();
-#ifndef __SMP__
-	}
-#endif
-}
-
-static void tsunami_flush_cache_page_to_uncache(unsigned long page)
-{
+	FLUSH_BEGIN(vma->vm_mm)
+	flush_user_windows();
+	tsunami_flush_icache();
 	tsunami_flush_dcache();
+	FLUSH_END
 }
 
 /* Tsunami does not have a Copy-back style virtual cache. */
@@ -1003,62 +936,57 @@ static void tsunami_flush_page_for_dma(unsigned long page)
 	tsunami_flush_dcache();
 }
 
+/* Tsunami has harvard style split I/D caches which do not snoop each other,
+ * so we have to flush on-stack sig insns.  Only the icache need be flushed
+ * since the Tsunami has a write-through data cache.
+ */
+static void tsunami_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
+{
+	tsunami_flush_icache();
+}
+
+static void tsunami_flush_chunk(unsigned long chunk)
+{
+}
+
 static void tsunami_flush_tlb_all(void)
 {
-	module_stats.invall++;
 	srmmu_flush_whole_tlb();
+	module_stats.invall++;
 }
 
 static void tsunami_flush_tlb_mm(struct mm_struct *mm)
 {
+	FLUSH_BEGIN(mm)
+	srmmu_flush_whole_tlb();
 	module_stats.invmm++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		srmmu_flush_whole_tlb();
-#ifndef __SMP__
-        }
-#endif
+	FLUSH_END
 }
 
 static void tsunami_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
 {
+	FLUSH_BEGIN(mm)
+	srmmu_flush_whole_tlb();
 	module_stats.invrnge++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		srmmu_flush_whole_tlb();
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_END
 }
 
 static void tsunami_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 {
-	int octx;
 	struct mm_struct *mm = vma->vm_mm;
 
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		unsigned long flags;
-
-		save_and_cli(flags);
-		octx = srmmu_get_context();
-
-		srmmu_set_context(mm->context);
-		srmmu_flush_tlb_page(page);
-		srmmu_set_context(octx);
-		restore_flags(flags);
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_BEGIN(mm)
+	__asm__ __volatile__("
+	lda	[%0] %3, %%g5
+	sta	%1, [%0] %3
+	sta	%%g0, [%2] %4
+	sta	%%g5, [%0] %3"
+	: /* no outputs */
+	: "r" (SRMMU_CTX_REG), "r" (mm->context), "r" (page & PAGE_MASK),
+	  "i" (ASI_M_MMUREGS), "i" (ASI_M_FLUSH_PROBE)
+	: "g5");
 	module_stats.invpg++;
-}
-
-static void tsunami_flush_tlb_page_for_cbit(unsigned long page)
-{
-	srmmu_flush_tlb_page(page);
+	FLUSH_END
 }
 
 /* Swift flushes.  It has the recommended SRMMU specification flushing
@@ -1074,41 +1002,28 @@ static void swift_flush_cache_all(void)
 
 static void swift_flush_cache_mm(struct mm_struct *mm)
 {
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		swift_idflash_clear();
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_BEGIN(mm)
+	flush_user_windows();
+	swift_idflash_clear();
+	FLUSH_END
 }
 
 static void swift_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end)
 {
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		swift_idflash_clear();
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_BEGIN(mm)
+	flush_user_windows();
+	swift_idflash_clear();
+	FLUSH_END
 }
 
 static void swift_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
 {
-#ifndef __SMP__
-	struct mm_struct *mm = vma->vm_mm;
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		if(vma->vm_flags & VM_EXEC)
-			swift_flush_icache();
-		swift_flush_dcache();
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_BEGIN(vma->vm_mm)
+	flush_user_windows();
+	if(vma->vm_flags & VM_EXEC)
+		swift_flush_icache();
+	swift_flush_dcache();
+	FLUSH_END
 }
 
 /* Not copy-back on swift. */
@@ -1122,48 +1037,47 @@ static void swift_flush_page_for_dma(unsigned long page)
 	swift_flush_dcache();
 }
 
-static void swift_flush_cache_page_to_uncache(unsigned long page)
+/* Again, Swift is non-snooping split I/D cache'd just like tsunami,
+ * so have to punt the icache for on-stack signal insns.  Only the
+ * icache need be flushed since the dcache is write-through.
+ */
+static void swift_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
+{
+	swift_flush_icache();
+}
+
+static void swift_flush_chunk(unsigned long chunk)
 {
-	swift_flush_dcache();
 }
 
 static void swift_flush_tlb_all(void)
 {
-	module_stats.invall++;
 	srmmu_flush_whole_tlb();
+	module_stats.invall++;
 }
 
 static void swift_flush_tlb_mm(struct mm_struct *mm)
 {
+	FLUSH_BEGIN(mm)
+	srmmu_flush_whole_tlb();
 	module_stats.invmm++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT)
-#endif
-		srmmu_flush_whole_tlb();
+	FLUSH_END
 }
 
 static void swift_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
 {
+	FLUSH_BEGIN(mm)
+	srmmu_flush_whole_tlb();
 	module_stats.invrnge++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT)
-#endif
-		srmmu_flush_whole_tlb();
+	FLUSH_END
 }
 
 static void swift_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 {
-#ifndef __SMP__
-	struct mm_struct *mm = vma->vm_mm;
-	if(mm->context != NO_CONTEXT)
-#endif
-		srmmu_flush_whole_tlb();
-	module_stats.invpg++;
-}
-
-static void swift_flush_tlb_page_for_cbit(unsigned long page)
-{
+	FLUSH_BEGIN(vma->vm_mm)
 	srmmu_flush_whole_tlb();
+	module_stats.invpg++;
+	FLUSH_END
 }
 
 /* The following are all MBUS based SRMMU modules, and therefore could
@@ -1172,212 +1086,6 @@ static void swift_flush_tlb_page_for_cbit(unsigned long page)
  * with respect to cache coherency.
  */
 
-/* Viking flushes.  For Sun's mainline MBUS processor it is pretty much
- * a crappy mmu.  The on-chip I&D caches only have full flushes, no fine
- * grained cache invalidations.  It only has these "flash clear" things
- * just like the MicroSparcI.  Added to this many revs of the chip are
- * teaming with hardware buggery.  Someday maybe we'll do direct
- * diagnostic tag accesses for page level flushes as those should
- * be painless and will increase performance due to the frequency of
- * page level flushes. This is a must to _really_ flush the caches,
- * crazy hardware ;-)
- */
-
-static void viking_flush_cache_all(void)
-{
-}
-
-static void viking_flush_cache_mm(struct mm_struct *mm)
-{
-}
-
-static void viking_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-}
-
-static void viking_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
-{
-}
-
-/* Non-mxcc vikings are copy-back but are pure-physical so no flushing. */
-static void viking_flush_page_to_ram(unsigned long page)
-{
-}
-
-static void viking_mxcc_flush_page(unsigned long page)
-{
-	unsigned long ppage = srmmu_v2p(page & PAGE_MASK);
-	unsigned long paddr0, paddr1;
-
-	if (ppage == 0xffffffffUL)
-		return;
-
-	paddr0 = 0x10;			/* Set cacheable bit. */
-	paddr1 = ppage;
-
-	/* Read the page's data through the stream registers,
-	 * and write it back to memory. This will issue
-	 * coherent write invalidates to all other caches, thus
-         * should also be sufficient in an MP system.
-	 */
-	__asm__ __volatile__ ("or %%g0, %0, %%g2\n\t"
-			      "or %%g0, %1, %%g3\n"
-			      "1:\n\t"
-			      "stda %%g2, [%2] %5\n\t"
-			      "stda %%g2, [%3] %5\n\t"
-			      "add %%g3, %4, %%g3\n\t"
-			      "btst 0xfff, %%g3\n\t"
-			      "bne 1b\n\t"
-			      "nop\n\t" : :
-			      "r" (paddr0), "r" (paddr1),
-			      "r" (MXCC_SRCSTREAM),
-			      "r" (MXCC_DESSTREAM),
-			      "r" (MXCC_STREAM_SIZE),
-			      "i" (ASI_M_MXCC) : "g2", "g3");
-
-	/* This was handcoded after a look at the gcc output from
-	 *
-	 *	do {
-	 *		mxcc_set_stream_src(paddr);
-	 *		mxcc_set_stream_dst(paddr);
-	 *		paddr[1] += MXCC_STREAM_SIZE;
-	 *	} while (paddr[1] & ~PAGE_MASK);
-	 */
-}
-
-static void viking_no_mxcc_flush_page(unsigned long page)
-{
-	unsigned long ppage = srmmu_v2p(page & PAGE_MASK);
-	int set, block;
-	unsigned long ptag[2];
-	unsigned long vaddr;
-	int i;
-
-	if (ppage == 0xffffffffUL)
-		return;
-	ppage >>= 12;
-
-	for (set = 0; set < 128; set++) {
-		for (block = 0; block < 4; block++) {
-
-			viking_get_dcache_ptag(set, block, ptag);
-
-			if (ptag[1] != ppage)
-				continue;
-			if (!(ptag[0] & VIKING_PTAG_VALID))
-				continue;
-			if (!(ptag[0] & VIKING_PTAG_DIRTY))
-				continue;
-
-			/* There was a great cache from TI
-			 * with comfort as much as vi,
-			 * 4 pages to flush,
-			 * 4 pages, no rush,
-			 * since anything else makes him die.
-			 */
-			vaddr = (KERNBASE + PAGE_SIZE) | (set << 5);
-			for (i = 0; i < 8; i++) {
-				__asm__ __volatile__ ("ld [%0], %%g2\n\t" : :
-						      "r" (vaddr) : "g2");
-				vaddr += PAGE_SIZE;
-			}
-
-			/* Continue with next set. */
-			break;
-		}
-	}
-}
-
-/* Viking is IO cache coherent, but really only on MXCC. */
-static void viking_flush_page_for_dma(unsigned long page)
-{
-}
-
-static void viking_flush_tlb_all(void)
-{
-	module_stats.invall++;
-	flush_user_windows();
-	srmmu_flush_whole_tlb();
-}
-
-static void viking_flush_tlb_mm(struct mm_struct *mm)
-{
-	int octx;
-	module_stats.invmm++;
-
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		srmmu_flush_tlb_ctx();
-		srmmu_set_context(octx);
-#ifndef __SMP__
-	}
-#endif
-}
-
-static void viking_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	int octx;
-	module_stats.invrnge++;
-
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		if((start - end) < SRMMU_PMD_SIZE) {
-			start &= PAGE_MASK;
-			while(start < end) {
-				srmmu_flush_tlb_page(start);
-				start += PAGE_SIZE;
-			}
-		} else if((start - end) < SRMMU_PGDIR_SIZE) {
-			start &= SRMMU_PMD_MASK;
-			while(start < end) {
-				srmmu_flush_tlb_segment(start);
-				start += SRMMU_PMD_SIZE;
-			}
-		} else {
-			start &= SRMMU_PGDIR_MASK;
-			while(start < end) {
-				srmmu_flush_tlb_region(start);
-				start += SRMMU_PGDIR_SIZE;
-			}
-		}
-		srmmu_set_context(octx);
-#ifndef __SMP__
-	}
-#endif
-}
-
-static void viking_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
-{
-	int octx;
-	struct mm_struct *mm = vma->vm_mm;
-
-	module_stats.invpg++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		srmmu_flush_tlb_page(page);
-		srmmu_set_context(octx);
-#ifndef __SMP__
-	}
-#endif
-}
-
-static void viking_flush_tlb_page_for_cbit(unsigned long page)
-{
-	srmmu_flush_tlb_page(page);
-}
-
 /* Cypress flushes. */
 static void cypress_flush_cache_all(void)
 {
@@ -1399,19 +1107,57 @@ static void cypress_flush_cache_all(void)
 
 static void cypress_flush_cache_mm(struct mm_struct *mm)
 {
+	register unsigned long a, b, c, d, e, f, g;
 	unsigned long flags, faddr;
 	int octx;
 
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		register unsigned long a, b, c, d, e, f, g;
-		flush_user_windows();
-		save_and_cli(flags);
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		a = 0x20; b = 0x40; c = 0x60; d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
-		faddr = (0x10000 - 0x100);
+	FLUSH_BEGIN(mm)
+	flush_user_windows();
+	save_and_cli(flags);
+	octx = srmmu_get_context();
+	srmmu_set_context(mm->context);
+	a = 0x20; b = 0x40; c = 0x60;
+	d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
+
+	faddr = (0x10000 - 0x100);
+	goto inside;
+	do {
+		faddr -= 0x100;
+	inside:
+		__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+				     "sta %%g0, [%0 + %2] %1\n\t"
+				     "sta %%g0, [%0 + %3] %1\n\t"
+				     "sta %%g0, [%0 + %4] %1\n\t"
+				     "sta %%g0, [%0 + %5] %1\n\t"
+				     "sta %%g0, [%0 + %6] %1\n\t"
+				     "sta %%g0, [%0 + %7] %1\n\t"
+				     "sta %%g0, [%0 + %8] %1\n\t" : :
+				     "r" (faddr), "i" (ASI_M_FLUSH_CTX),
+				     "r" (a), "r" (b), "r" (c), "r" (d),
+				     "r" (e), "r" (f), "r" (g));
+	} while(faddr);
+	srmmu_set_context(octx);
+	restore_flags(flags);
+	FLUSH_END
+}
+
+static void cypress_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	register unsigned long a, b, c, d, e, f, g;
+	unsigned long flags, faddr;
+	int octx;
+
+	FLUSH_BEGIN(mm)
+	flush_user_windows();
+	save_and_cli(flags);
+	octx = srmmu_get_context();
+	srmmu_set_context(mm->context);
+	a = 0x20; b = 0x40; c = 0x60;
+	d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
+
+	start &= SRMMU_PMD_MASK;
+	while(start < end) {
+		faddr = (start + (0x10000 - 0x100));
 		goto inside;
 		do {
 			faddr -= 0x100;
@@ -1424,99 +1170,55 @@ static void cypress_flush_cache_mm(struct mm_struct *mm)
 					     "sta %%g0, [%0 + %6] %1\n\t"
 					     "sta %%g0, [%0 + %7] %1\n\t"
 					     "sta %%g0, [%0 + %8] %1\n\t" : :
-					     "r" (faddr), "i" (ASI_M_FLUSH_CTX),
+					     "r" (faddr),
+					     "i" (ASI_M_FLUSH_SEG),
 					     "r" (a), "r" (b), "r" (c), "r" (d),
 					     "r" (e), "r" (f), "r" (g));
-		} while(faddr);
-		srmmu_set_context(octx);
-		restore_flags(flags);
-#ifndef __SMP__
-	}
-#endif
-}
-
-static void cypress_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	unsigned long flags, faddr;
-	int octx;
-
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		register unsigned long a, b, c, d, e, f, g;
-		flush_user_windows();
-		save_and_cli(flags);
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		a = 0x20; b = 0x40; c = 0x60; d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
-		start &= SRMMU_PMD_MASK;
-		while(start < end) {
-			faddr = (start + (0x10000 - 0x100));
-			goto inside;
-			do {
-				faddr -= 0x100;
-			inside:
-				__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
-						     "sta %%g0, [%0 + %2] %1\n\t"
-						     "sta %%g0, [%0 + %3] %1\n\t"
-						     "sta %%g0, [%0 + %4] %1\n\t"
-						     "sta %%g0, [%0 + %5] %1\n\t"
-						     "sta %%g0, [%0 + %6] %1\n\t"
-						     "sta %%g0, [%0 + %7] %1\n\t"
-						     "sta %%g0, [%0 + %8] %1\n\t" : :
-						     "r" (faddr),
-						     "i" (ASI_M_FLUSH_SEG),
-						     "r" (a), "r" (b), "r" (c), "r" (d),
-						     "r" (e), "r" (f), "r" (g));
-			} while (faddr != start);
-			start += SRMMU_PMD_SIZE;
-		}
-		srmmu_set_context(octx);
-		restore_flags(flags);
-#ifndef __SMP__
+		} while (faddr != start);
+		start += SRMMU_PMD_SIZE;
 	}
-#endif
+	srmmu_set_context(octx);
+	restore_flags(flags);
+	FLUSH_END
 }
 
 static void cypress_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
 {
+	register unsigned long a, b, c, d, e, f, g;
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long flags, line;
 	int octx;
 
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		register unsigned long a, b, c, d, e, f, g;
-		flush_user_windows();
-		save_and_cli(flags);
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		a = 0x20; b = 0x40; c = 0x60; d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
-		page &= PAGE_MASK;
-		line = (page + PAGE_SIZE) - 0x100;
-		goto inside;
-		do {
-			line -= 0x100;
-		inside:
-				__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
-						     "sta %%g0, [%0 + %2] %1\n\t"
-						     "sta %%g0, [%0 + %3] %1\n\t"
-						     "sta %%g0, [%0 + %4] %1\n\t"
-						     "sta %%g0, [%0 + %5] %1\n\t"
-						     "sta %%g0, [%0 + %6] %1\n\t"
-						     "sta %%g0, [%0 + %7] %1\n\t"
-						     "sta %%g0, [%0 + %8] %1\n\t" : :
-						     "r" (line),
-						     "i" (ASI_M_FLUSH_PAGE),
-						     "r" (a), "r" (b), "r" (c), "r" (d),
-						     "r" (e), "r" (f), "r" (g));
-		} while(line != page);
-		srmmu_set_context(octx);
-		restore_flags(flags);
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_BEGIN(mm)
+	flush_user_windows();
+	save_and_cli(flags);
+	octx = srmmu_get_context();
+	srmmu_set_context(mm->context);
+	a = 0x20; b = 0x40; c = 0x60;
+	d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
+
+	page &= PAGE_MASK;
+	line = (page + PAGE_SIZE) - 0x100;
+	goto inside;
+	do {
+		line -= 0x100;
+	inside:
+			__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+					     "sta %%g0, [%0 + %2] %1\n\t"
+					     "sta %%g0, [%0 + %3] %1\n\t"
+					     "sta %%g0, [%0 + %4] %1\n\t"
+					     "sta %%g0, [%0 + %5] %1\n\t"
+					     "sta %%g0, [%0 + %6] %1\n\t"
+					     "sta %%g0, [%0 + %7] %1\n\t"
+					     "sta %%g0, [%0 + %8] %1\n\t" : :
+					     "r" (line),
+					     "i" (ASI_M_FLUSH_PAGE),
+					     "r" (a), "r" (b), "r" (c), "r" (d),
+					     "r" (e), "r" (f), "r" (g));
+	} while(line != page);
+	srmmu_set_context(octx);
+	restore_flags(flags);
+	FLUSH_END
 }
 
 /* Cypress is copy-back, at least that is how we configure it. */
@@ -1547,314 +1249,177 @@ static void cypress_flush_page_to_ram(unsigned long page)
 	} while(line != page);
 }
 
+static void cypress_flush_chunk(unsigned long chunk)
+{
+	cypress_flush_page_to_ram(chunk);
+}
+
 /* Cypress is also IO cache coherent. */
 static void cypress_flush_page_for_dma(unsigned long page)
 {
 }
 
-static void cypress_flush_page_to_uncache(unsigned long page)
+/* Cypress has unified L2 VIPT, from which both instructions and data
+ * are stored.  It does not have an onboard icache of any sort, therefore
+ * no flush is necessary.
+ */
+static void cypress_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
 {
-	register unsigned long a, b, c, d, e, f, g;
-	unsigned long line;
-
-	a = 0x20; b = 0x40; c = 0x60; d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
-	page &= PAGE_MASK;
-	line = (page + PAGE_SIZE) - 0x100;
-	goto inside;
-	do {
-		line -= 0x100;
-	inside:
-		__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
-				     "sta %%g0, [%0 + %2] %1\n\t"
-				     "sta %%g0, [%0 + %3] %1\n\t"
-				     "sta %%g0, [%0 + %4] %1\n\t"
-				     "sta %%g0, [%0 + %5] %1\n\t"
-				     "sta %%g0, [%0 + %6] %1\n\t"
-				     "sta %%g0, [%0 + %7] %1\n\t"
-				     "sta %%g0, [%0 + %8] %1\n\t" : :
-				     "r" (line),
-				     "i" (ASI_M_FLUSH_PAGE),
-				     "r" (a), "r" (b), "r" (c), "r" (d),
-				     "r" (e), "r" (f), "r" (g));
-	} while(line != page);
 }
 
 static void cypress_flush_tlb_all(void)
 {
-	module_stats.invall++;
 	srmmu_flush_whole_tlb();
+	module_stats.invall++;
 }
 
 static void cypress_flush_tlb_mm(struct mm_struct *mm)
 {
-	int octx;
-
+	FLUSH_BEGIN(mm)
+	__asm__ __volatile__("
+	lda	[%0] %3, %%g5
+	sta	%2, [%0] %3
+	sta	%%g0, [%1] %4
+	sta	%%g5, [%0] %3"
+	: /* no outputs */
+	: "r" (SRMMU_CTX_REG), "r" (0x300), "r" (mm->context),
+	  "i" (ASI_M_MMUREGS), "i" (ASI_M_FLUSH_PROBE)
+	: "g5");
 	module_stats.invmm++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		srmmu_flush_tlb_ctx();
-		srmmu_set_context(octx);
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_END
 }
 
 static void cypress_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
 {
-	int octx;
-	module_stats.invrnge++;
+	unsigned long size;
 
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		if((start - end) < SRMMU_PMD_SIZE) {
-			start &= PAGE_MASK;
-			while(start < end) {
-				srmmu_flush_tlb_page(start);
-				start += PAGE_SIZE;
-			}
-		} else if((start - end) < SRMMU_PGDIR_SIZE) {
-			start &= SRMMU_PMD_MASK;
-			while(start < end) {
-				srmmu_flush_tlb_segment(start);
-				start += SRMMU_PMD_SIZE;
-			}
-		} else {
-			start &= SRMMU_PGDIR_MASK;
-			while(start < end) {
-				srmmu_flush_tlb_region(start);
-				start += SRMMU_PGDIR_SIZE;
-			}
-		}
-		srmmu_set_context(octx);
-#ifndef __SMP__
-	}
-#endif
+	FLUSH_BEGIN(mm)
+	start &= SRMMU_PGDIR_MASK;
+	size = SRMMU_PGDIR_ALIGN(end) - start;
+	__asm__ __volatile__("
+		lda	[%0] %5, %%g5
+		sta	%1, [%0] %5
+	1:	subcc	%3, %4, %3
+		bne	1b
+		 sta	%%g0, [%2 + %3] %6
+		sta	%%g5, [%0] %5"
+	: /* no outputs */
+	: "r" (SRMMU_CTX_REG), "r" (mm->context), "r" (start | 0x200),
+	  "r" (size), "r" (SRMMU_PGDIR_SIZE), "i" (ASI_M_MMUREGS),
+	  "i" (ASI_M_FLUSH_PROBE)
+	: "g5", "cc");
+	module_stats.invrnge++;
+	FLUSH_END
 }
 
 static void cypress_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 {
-	int octx;
 	struct mm_struct *mm = vma->vm_mm;
 
+	FLUSH_BEGIN(mm)
+	__asm__ __volatile__("
+	lda	[%0] %3, %%g5
+	sta	%1, [%0] %3
+	sta	%%g0, [%2] %4
+	sta	%%g5, [%0] %3"
+	: /* no outputs */
+	: "r" (SRMMU_CTX_REG), "r" (mm->context), "r" (page & PAGE_MASK),
+	  "i" (ASI_M_MMUREGS), "i" (ASI_M_FLUSH_PROBE)
+	: "g5");
 	module_stats.invpg++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		srmmu_flush_tlb_page(page);
-		srmmu_set_context(octx);
-#ifndef __SMP__
-	}
-#endif
-}
+	FLUSH_END
+}
+
+/* viking.S */
+extern void viking_flush_cache_all(void);
+extern void viking_flush_cache_mm(struct mm_struct *mm);
+extern void viking_flush_cache_range(struct mm_struct *mm, unsigned long start,
+				     unsigned long end);
+extern void viking_flush_cache_page(struct vm_area_struct *vma,
+				    unsigned long page);
+extern void viking_flush_page_to_ram(unsigned long page);
+extern void viking_flush_page_for_dma(unsigned long page);
+extern void viking_flush_sig_insns(struct mm_struct *mm, unsigned long addr);
+extern void viking_flush_page(unsigned long page);
+extern void viking_mxcc_flush_page(unsigned long page);
+extern void viking_flush_chunk(unsigned long chunk);
+extern void viking_mxcc_flush_chunk(unsigned long chunk);
+extern void viking_flush_tlb_all(void);
+extern void viking_flush_tlb_mm(struct mm_struct *mm);
+extern void viking_flush_tlb_range(struct mm_struct *mm, unsigned long start,
+				   unsigned long end);
+extern void viking_flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long page);
+
+/* hypersparc.S */
+extern void hypersparc_flush_cache_all(void);
+extern void hypersparc_flush_cache_mm(struct mm_struct *mm);
+extern void hypersparc_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end);
+extern void hypersparc_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
+extern void hypersparc_flush_page_to_ram(unsigned long page);
+extern void hypersparc_flush_chunk(unsigned long chunk);
+extern void hypersparc_flush_page_for_dma(unsigned long page);
+extern void hypersparc_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
+extern void hypersparc_flush_tlb_all(void);
+extern void hypersparc_flush_tlb_mm(struct mm_struct *mm);
+extern void hypersparc_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end);
+extern void hypersparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
 
-static void cypress_flush_tlb_page_for_cbit(unsigned long page)
-{
-	srmmu_flush_tlb_page(page);
-}
-
-/* Hypersparc flushes.  Very nice chip... */
-static void hypersparc_flush_cache_all(void)
+static void hypersparc_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp)
 {
-	flush_user_windows();
-	hyper_flush_unconditional_combined();
 	hyper_flush_whole_icache();
+	set_pte((pte_t *)ctxp, __pte((SRMMU_ET_PTD | (srmmu_v2p((unsigned long) pgdp) >> 4))));
 }
 
-static void hypersparc_flush_cache_mm(struct mm_struct *mm)
-{
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		hyper_flush_cache_user();
-		hyper_flush_whole_icache();
-#ifndef __SMP__
-	}
-#endif
-}
-
-/* Boy was my older implementation inefficient... */
-static void hypersparc_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	volatile unsigned long clear;
-	int octx;
-
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		flush_user_windows();
-		octx = srmmu_get_context();
-		start &= PAGE_MASK;
-		srmmu_set_context(mm->context);
-		while(start < end) {
-			if(srmmu_hwprobe(start))
-				hyper_flush_cache_page(start);
-			start += PAGE_SIZE;
-		}
-		clear = srmmu_get_fstatus();
-		srmmu_set_context(octx);
-		hyper_flush_whole_icache();
-#ifndef __SMP__
-	}
-#endif
-}
-
-/* HyperSparc requires a valid mapping where we are about to flush
- * in order to check for a physical tag match during the flush.
- */
-static void hypersparc_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	volatile unsigned long clear;
-	int octx;
-
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		octx = srmmu_get_context();
-		flush_user_windows();
-		srmmu_set_context(mm->context);
-		hyper_flush_whole_icache();
-		if(!srmmu_hwprobe(page))
-			goto no_mapping;
-		hyper_flush_cache_page(page);
-	no_mapping:
-		clear = srmmu_get_fstatus();
-		srmmu_set_context(octx);
-#ifndef __SMP__
-	}
-#endif
-}
-
-/* HyperSparc is copy-back. */
-static void hypersparc_flush_page_to_ram(unsigned long page)
-{
-	volatile unsigned long clear;
-
-	if(srmmu_hwprobe(page))
-		hyper_flush_cache_page(page);
-	clear = srmmu_get_fstatus();
-}
-
-/* HyperSparc is IO cache coherent. */
-static void hypersparc_flush_page_for_dma(unsigned long page)
-{
-}
-
-static void hypersparc_flush_cache_page_to_uncache(unsigned long page)
-{
-	volatile unsigned long clear;
-
-	if(srmmu_hwprobe(page))
-		hyper_flush_cache_page(page);
-	clear = srmmu_get_fstatus();
-}
-
-static void hypersparc_flush_tlb_all(void)
-{
-	module_stats.invall++;
-	srmmu_flush_whole_tlb();
-}
-
-static void hypersparc_flush_tlb_mm(struct mm_struct *mm)
-{
-	int octx;
-
-	module_stats.invmm++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		srmmu_flush_tlb_ctx();
-		srmmu_set_context(octx);
-
-#ifndef __SMP__
-	}
-#endif
-}
-
-static void hypersparc_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+static void hypersparc_update_rootmmu_dir(struct task_struct *tsk, pgd_t *pgdp) 
 {
-	int octx;
-
-	module_stats.invrnge++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
+	unsigned long page = ((unsigned long) pgdp) & PAGE_MASK;
 
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		if((start - end) < SRMMU_PMD_SIZE) {
-			start &= PAGE_MASK;
-			while(start < end) {
-				srmmu_flush_tlb_page(start);
-				start += PAGE_SIZE;
-			}
-		} else if((start - end) < SRMMU_PGDIR_SIZE) {
-			start &= SRMMU_PMD_MASK;
-			while(start < end) {
-				srmmu_flush_tlb_segment(start);
-				start += SRMMU_PMD_SIZE;
-			}
-		} else {
-			start &= SRMMU_PGDIR_MASK;
-			while(start < end) {
-				srmmu_flush_tlb_region(start);
-				start += SRMMU_PGDIR_SIZE;
-			}
-		}
-		srmmu_set_context(octx);
-
-#ifndef __SMP__
+	hypersparc_flush_page_to_ram(page);
+	if(tsk->mm->context != NO_CONTEXT) {
+		flush_cache_mm(tsk->mm);
+		ctxd_set(&srmmu_context_table[tsk->mm->context], pgdp);
+		flush_tlb_mm(tsk->mm);
 	}
-#endif
 }
 
-static void hypersparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+static void viking_update_rootmmu_dir(struct task_struct *tsk, pgd_t *pgdp) 
 {
-	struct mm_struct *mm = vma->vm_mm;
-	int octx;
-
-	module_stats.invpg++;
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-
-		octx = srmmu_get_context();
-		srmmu_set_context(mm->context);
-		srmmu_flush_tlb_page(page);
-		srmmu_set_context(octx);
-
-#ifndef __SMP__
+	viking_flush_page((unsigned long)pgdp);
+	if(tsk->mm->context != NO_CONTEXT) {
+		flush_cache_mm(current->mm);
+		ctxd_set(&srmmu_context_table[tsk->mm->context], pgdp);
+		flush_tlb_mm(current->mm);
 	}
-#endif
 }
 
-static void hypersparc_flush_tlb_page_for_cbit(unsigned long page)
+static void cypress_update_rootmmu_dir(struct task_struct *tsk, pgd_t *pgdp) 
 {
-	srmmu_flush_tlb_page(page);
-}
+	register unsigned long a, b, c, d, e, f, g;
+	unsigned long page = ((unsigned long) pgdp) & PAGE_MASK;
+	unsigned long line;
 
-static void hypersparc_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp)
-{
-	hyper_flush_whole_icache();
-	set_pte((pte_t *)ctxp, (SRMMU_ET_PTD | (srmmu_v2p((unsigned long) pgdp) >> 4)));
-}
+	a = 0x20; b = 0x40; c = 0x60; d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
+	page &= PAGE_MASK;
+	line = (page + PAGE_SIZE) - 0x100;
+	goto inside;
+	do {
+		line -= 0x100;
+	inside:
+		__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+				     "sta %%g0, [%0 + %2] %1\n\t"
+				     "sta %%g0, [%0 + %3] %1\n\t"
+				     "sta %%g0, [%0 + %4] %1\n\t"
+				     "sta %%g0, [%0 + %5] %1\n\t"
+				     "sta %%g0, [%0 + %6] %1\n\t"
+				     "sta %%g0, [%0 + %7] %1\n\t"
+				     "sta %%g0, [%0 + %8] %1\n\t" : :
+				     "r" (line),
+				     "i" (ASI_M_FLUSH_PAGE),
+				     "r" (a), "r" (b), "r" (c), "r" (d),
+				     "r" (e), "r" (f), "r" (g));
+	} while(line != page);
 
-static void hypersparc_update_rootmmu_dir(struct task_struct *tsk, pgd_t *pgdp) 
-{
 	if(tsk->mm->context != NO_CONTEXT) {
 		flush_cache_mm(current->mm);
 		ctxd_set(&srmmu_context_table[tsk->mm->context], pgdp);
@@ -1866,14 +1431,28 @@ static void hypersparc_switch_to_context(struct task_struct *tsk)
 {
 	hyper_flush_whole_icache();
 	if(tsk->mm->context == NO_CONTEXT) {
-		alloc_context(tsk);
-		flush_cache_mm(current->mm);
+		alloc_context(tsk->mm);
+		flush_cache_mm(tsk->mm);
 		ctxd_set(&srmmu_context_table[tsk->mm->context], tsk->mm->pgd);
-		flush_tlb_mm(current->mm);
+		flush_tlb_mm(tsk->mm);
 	}
 	srmmu_set_context(tsk->mm->context);
 }
 
+static void hypersparc_init_new_context(struct mm_struct *mm)
+{
+	hyper_flush_whole_icache();
+
+	alloc_context(mm);
+
+	flush_cache_mm(mm);
+	ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
+	flush_tlb_mm(mm);
+
+	if(mm == current->mm)
+		srmmu_set_context(mm->context);
+}
+
 /* IOMMU things go here. */
 
 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
@@ -1890,7 +1469,7 @@ static inline void srmmu_map_dvma_pages_for_iommu(struct iommu_struct *iommu,
 
 	iopte += ((first - iommu->start) >> PAGE_SHIFT);
 	while(first <= last) {
-		iopte_val(*iopte++) = MKIOPTE(srmmu_v2p(first));
+		*iopte++ = __iopte(MKIOPTE(srmmu_v2p(first)));
 		first += PAGE_SIZE;
 	}
 }
@@ -1955,6 +1534,8 @@ unsigned long iommu_init(int iommund, unsigned long memory_start,
 
 	/* Initialize new table. */
 	flush_cache_all();
+	memset(iommu->page_table, 0, ptsize);
+	srmmu_map_dvma_pages_for_iommu(iommu, memory_end);
 	if(viking_mxcc_present) {
 		unsigned long start = (unsigned long) iommu->page_table;
 		unsigned long end = (start + ptsize);
@@ -1962,16 +1543,14 @@ unsigned long iommu_init(int iommund, unsigned long memory_start,
 			viking_mxcc_flush_page(start);
 			start += PAGE_SIZE;
 		}
-	} else if(flush_page_for_dma == viking_no_mxcc_flush_page) {
+	} else if(flush_page_for_dma == viking_flush_page) {
 		unsigned long start = (unsigned long) iommu->page_table;
 		unsigned long end = (start + ptsize);
 		while(start < end) {
-			viking_no_mxcc_flush_page(start);
+			viking_flush_page(start);
 			start += PAGE_SIZE;
 		}
 	}
-	memset(iommu->page_table, 0, ptsize);
-	srmmu_map_dvma_pages_for_iommu(iommu, memory_end);
 	flush_tlb_all();
 	iommu->regs->base = srmmu_v2p((unsigned long) iommu->page_table) >> 4;
 	iommu_invalidate(iommu->regs);
@@ -1997,6 +1576,7 @@ void iommu_sun4d_init(int sbi_node, struct linux_sbus *sbus)
 
 	/* Initialize new table. */
 	flush_cache_all();
+	memset(iommu, 0, 16 * PAGE_SIZE);
 	if(viking_mxcc_present) {
 		unsigned long start = (unsigned long) iommu;
 		unsigned long end = (start + 16 * PAGE_SIZE);
@@ -2004,21 +1584,20 @@ void iommu_sun4d_init(int sbi_node, struct linux_sbus *sbus)
 			viking_mxcc_flush_page(start);
 			start += PAGE_SIZE;
 		}
-	} else if(flush_page_for_dma == viking_no_mxcc_flush_page) {
+	} else if(flush_page_for_dma == viking_flush_page) {
 		unsigned long start = (unsigned long) iommu;
 		unsigned long end = (start + 16 * PAGE_SIZE);
 		while(start < end) {
-			viking_no_mxcc_flush_page(start);
+			viking_flush_page(start);
 			start += PAGE_SIZE;
 		}
 	}
-	memset(iommu, 0, 16 * PAGE_SIZE);
 	flush_tlb_all();
 
 	sbus->iommu = (struct iommu_struct *)iommu;
 }
 
-static char *srmmu_get_scsi_one(char *vaddr, unsigned long len, struct linux_sbus *sbus)
+static __u32 srmmu_get_scsi_one(char *vaddr, unsigned long len, struct linux_sbus *sbus)
 {
 	unsigned long page = ((unsigned long) vaddr) & PAGE_MASK;
 
@@ -2026,7 +1605,7 @@ static char *srmmu_get_scsi_one(char *vaddr, unsigned long len, struct linux_sbu
 		flush_page_for_dma(page);
 		page += PAGE_SIZE;
 	}
-	return vaddr;
+	return (__u32)vaddr;
 }
 
 static void srmmu_get_scsi_sgl(struct mmu_sglist *sg, int sz, struct linux_sbus *sbus)
@@ -2039,12 +1618,12 @@ static void srmmu_get_scsi_sgl(struct mmu_sglist *sg, int sz, struct linux_sbus
 			flush_page_for_dma(page);
 			page += PAGE_SIZE;
 		}
-		sg[sz].dvma_addr = (char *) (sg[sz].addr);
+		sg[sz].dvma_addr = (__u32) (sg[sz].addr);
 		sz--;
 	}
 }
 
-static void srmmu_release_scsi_one(char *vaddr, unsigned long len, struct linux_sbus *sbus)
+static void srmmu_release_scsi_one(__u32 vaddr, unsigned long len, struct linux_sbus *sbus)
 {
 }
 
@@ -2070,12 +1649,12 @@ static inline unsigned long srmmu_early_paddr(unsigned long vaddr)
 
 static inline void srmmu_early_pgd_set(pgd_t *pgdp, pmd_t *pmdp)
 {
-	set_pte((pte_t *)pgdp, (SRMMU_ET_PTD | (srmmu_early_paddr((unsigned long) pmdp) >> 4)));
+	set_pte((pte_t *)pgdp, __pte((SRMMU_ET_PTD | (srmmu_early_paddr((unsigned long) pmdp) >> 4))));
 }
 
 static inline void srmmu_early_pmd_set(pmd_t *pmdp, pte_t *ptep)
 {
-	set_pte((pte_t *)pmdp, (SRMMU_ET_PTD | (srmmu_early_paddr((unsigned long) ptep) >> 4)));
+	set_pte((pte_t *)pmdp, __pte((SRMMU_ET_PTD | (srmmu_early_paddr((unsigned long) ptep) >> 4))));
 }
 
 static inline unsigned long srmmu_early_pgd_page(pgd_t pgd)
@@ -2157,7 +1736,7 @@ void srmmu_inherit_prom_mappings(unsigned long start,unsigned long end)
     
 		pgdp = srmmu_pgd_offset(init_task.mm, start);
 		if(what == 2) {
-			pgd_val(*pgdp) = prompte;
+			*pgdp = __pgd(prompte);
 			start += SRMMU_PGDIR_SIZE;
 			continue;
 		}
@@ -2167,7 +1746,7 @@ void srmmu_inherit_prom_mappings(unsigned long start,unsigned long end)
 		}
 		pmdp = srmmu_early_pmd_offset(pgdp, start);
 		if(what == 1) {
-			pmd_val(*pmdp) = prompte;
+			*pmdp = __pmd(prompte);
 			start += SRMMU_PMD_SIZE;
 			continue;
 		}
@@ -2176,11 +1755,12 @@ void srmmu_inherit_prom_mappings(unsigned long start,unsigned long end)
 			srmmu_early_pmd_set(pmdp, ptep);
 		}
 		ptep = srmmu_early_pte_offset(pmdp, start);
-		pte_val(*ptep) = prompte;
+		*ptep = __pte(prompte);
 		start += PAGE_SIZE;
 	}
 }
 
+#ifdef CONFIG_SBUS
 static void srmmu_map_dma_area(unsigned long addr, int len)
 {
 	unsigned long page, end;
@@ -2224,17 +1804,18 @@ static void srmmu_map_dma_area(unsigned long addr, int len)
 			viking_mxcc_flush_page(start);
 			start += PAGE_SIZE;
 		}
-	} else if(flush_page_for_dma == viking_no_mxcc_flush_page) {
+	} else if(flush_page_for_dma == viking_flush_page) {
 		unsigned long start = ((unsigned long) iopte_first) & PAGE_MASK;
 		unsigned long end = PAGE_ALIGN(((unsigned long) iopte));
 		while(start < end) {
-			viking_no_mxcc_flush_page(start);
+			viking_flush_page(start);
 			start += PAGE_SIZE;
 		}
 	}
 	flush_tlb_all();
 	iommu_invalidate(iommu->regs);
 }
+#endif
 
 /* #define DEBUG_MAP_KERNEL */
 
@@ -2257,7 +1838,7 @@ static inline void do_large_mapping(unsigned long vaddr, unsigned long phys_base
 
 	MKTRACE(("dlm[v<%08lx>-->p<%08lx>]", vaddr, phys_base));
 	big_pte = KERNEL_PTE(phys_base >> 4);
-	pgd_val(*pgdp) = big_pte;
+	*pgdp = __pgd(big_pte);
 }
 
 /* Create second-level SRMMU 256K medium sized page mappings. */
@@ -2273,7 +1854,7 @@ static inline void do_medium_mapping(unsigned long vaddr, unsigned long vend,
 		pgdp = srmmu_pgd_offset(init_task.mm, vaddr);
 		pmdp = srmmu_early_pmd_offset(pgdp, vaddr);
 		medium_pte = KERNEL_PTE(phys_base >> 4);
-		pmd_val(*pmdp) = medium_pte;
+		*pmdp = __pmd(medium_pte);
 		phys_base += SRMMU_PMD_SIZE;
 		vaddr += SRMMU_PMD_SIZE;
 	}
@@ -2295,7 +1876,7 @@ static inline void do_small_mapping(unsigned long start, unsigned long end,
 		pmdp = srmmu_early_pmd_offset(pgdp, start);
 		ptep = srmmu_early_pte_offset(pmdp, start);
 
-		pte_val(*ptep) = KERNEL_PTE(phys_base >> 4);
+		*ptep = __pte(KERNEL_PTE(phys_base >> 4));
 		phys_base += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
@@ -2436,7 +2017,7 @@ static void map_kernel(void)
 	tally = 0;
 	for(entry = 0; sp_banks[entry].num_bytes; entry++)
 		tally += sp_banks[entry].num_bytes;
-	if(tally >= (0xfd000000 - KERNBASE))
+	if(tally > (0xfd000000 - KERNBASE))
 		lots_of_ram = 1;
 	else
 		lots_of_ram = 0;
@@ -2487,7 +2068,7 @@ static void map_kernel(void)
 		MKTRACE(("<%d> base=%08lx bs=%08lx ", entry, sp_banks[entry].base_addr, bank_size));
 		if(!bank_size)
 			break;
-		if(((vaddr + bank_size) >= 0xfd000000) ||
+		if(((vaddr + bank_size) > 0xfd000000) ||
 		   ((vaddr + bank_size) < KERNBASE)) {
 			unsigned long orig_base = sp_banks[entry].base_addr;
 			unsigned long orig_len = sp_banks[entry].num_bytes;
@@ -2568,6 +2149,7 @@ check_and_return:
 	}
 	MKTRACE(("success\n"));
 	init_task.mm->mmap->vm_start = page_offset = low_base;
+	stack_top = page_offset - PAGE_SIZE;
 	return; /* SUCCESS! */
 }
 
@@ -2603,9 +2185,6 @@ unsigned long srmmu_paging_init(unsigned long start_mem, unsigned long end_mem)
 	sparc_iobase_vaddr = 0xfd000000;    /* 16MB of IOSPACE on all sun4m's. */
 	physmem_mapped_contig = 0;	    /* for init.c:taint_real_pages()   */
 
-#if CONFIG_AP1000
-        num_contexts = AP_NUM_CONTEXTS;
-#else
 	/* Find the number of contexts on the srmmu. */
 	cpunode = prom_getchild(prom_root_node);
 	num_contexts = 0;
@@ -2616,7 +2195,7 @@ unsigned long srmmu_paging_init(unsigned long start_mem, unsigned long end_mem)
 			break;
 		}
 	}
-#endif
+
 	if(!num_contexts) {
 		prom_printf("Something wrong, can't find cpu node in paging_init.\n");
 		prom_halt();
@@ -2635,22 +2214,9 @@ unsigned long srmmu_paging_init(unsigned long start_mem, unsigned long end_mem)
 #endif
 
 	mempool = PAGE_ALIGN(mempool);
-#if CONFIG_AP1000
-        ap_inherit_mappings();
-#else
         srmmu_inherit_prom_mappings(0xfe400000,(LINUX_OPPROM_ENDVM-PAGE_SIZE));
-#endif
 	map_kernel();
-#if CONFIG_AP1000
-	/* the MSC wants this aligned on a 16k boundary */
-	srmmu_context_table = 
-	  sparc_init_alloc(&mempool, 
-			   num_contexts*sizeof(ctxd_t)<0x4000?
-			   0x4000:
-			   num_contexts*sizeof(ctxd_t));
-#else
 	srmmu_context_table = sparc_init_alloc(&mempool, num_contexts*sizeof(ctxd_t));
-#endif
 	srmmu_ctx_table_phys = (ctxd_t *) srmmu_v2p((unsigned long) srmmu_context_table);
 	for(i = 0; i < num_contexts; i++)
 		ctxd_set(&srmmu_context_table[i], swapper_pg_dir);
@@ -2658,12 +2224,12 @@ unsigned long srmmu_paging_init(unsigned long start_mem, unsigned long end_mem)
 	start_mem = PAGE_ALIGN(mempool);
 
 	flush_cache_all();
-	if(flush_page_for_dma == viking_no_mxcc_flush_page) {
+	if(flush_page_for_dma == viking_flush_page) {
 		unsigned long start = ptables_start;
 		unsigned long end = start_mem;
 
 		while(start < end) {
-			viking_no_mxcc_flush_page(start);
+			viking_flush_page(start);
 			start += PAGE_SIZE;
 		}
 	}
@@ -2671,13 +2237,7 @@ unsigned long srmmu_paging_init(unsigned long start_mem, unsigned long end_mem)
 	flush_tlb_all();
 	poke_srmmu();
 
-#if CONFIG_AP1000
-	/* on the AP we don't put the top few contexts into the free
-	   context list as these are reserved for parallel tasks */
-	start_mem = sparc_context_init(start_mem, MPP_CONTEXT_BASE);
-#else
 	start_mem = sparc_context_init(start_mem, num_contexts);
-#endif
 	start_mem = free_area_init(start_mem, end_mem);
 
 	return PAGE_ALIGN(start_mem);
@@ -2720,10 +2280,8 @@ static void srmmu_update_mmu_cache(struct vm_area_struct * vma, unsigned long ad
 {
 }
 
-static void srmmu_exit_hook(void)
+static void srmmu_destroy_context(struct mm_struct *mm)
 {
-	struct mm_struct *mm = current->mm;
-
 	if(mm->context != NO_CONTEXT && mm->count == 1) {
 		flush_cache_mm(mm);
 		ctxd_set(&srmmu_context_table[mm->context], swapper_pg_dir);
@@ -2733,66 +2291,69 @@ static void srmmu_exit_hook(void)
 	}
 }
 
-static void srmmu_flush_hook(void)
-{
-	if(current->tss.flags & SPARC_FLAG_KTHREAD) {
-		alloc_context(current);
-		flush_cache_mm(current->mm);
-		ctxd_set(&srmmu_context_table[current->mm->context], current->mm->pgd);
-		flush_tlb_mm(current->mm);
-		srmmu_set_context(current->mm->context);
-	}
-}
-
 static void srmmu_vac_update_mmu_cache(struct vm_area_struct * vma,
 				       unsigned long address, pte_t pte)
 {
-#if 0
-	struct inode *inode;
-	struct vm_area_struct *vmaring;
-	unsigned long offset, vaddr;
-	unsigned long start;
-	pgd_t *pgdp;
-	pmd_t *pmdp;
-	pte_t *ptep;
-
-	if (!(vma->vm_flags & VM_WRITE) ||
-	    !(vma->vm_flags & VM_SHARED))
-		return;
-
-	inode = vma->vm_inode;
-	if (!inode)
-		return;
-
-	offset = (address & PAGE_MASK) - vma->vm_start;
-	vmaring = inode->i_mmap; 
-	do {
-		vaddr = vmaring->vm_start + offset;
+	if((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)) {
+		struct vm_area_struct *vmaring;
+		struct inode *inode;
+		unsigned long flags, offset, vaddr, start;
+		int alias_found = 0;
+		pgd_t *pgdp;
+		pmd_t *pmdp;
+		pte_t *ptep;
 
-		if ((vaddr ^ address) & vac_badbits) {
-			start = vma->vm_start;
-			while (start < vma->vm_end) {
-				pgdp = srmmu_pgd_offset(vma->vm_mm, start);
-				pmdp = srmmu_pmd_offset(pgdp, start);
-				ptep = srmmu_pte_offset(pmdp, start);
-
-				flush_cache_page_to_uncache(start);
-				set_pte(ptep, __pte((pte_val(*ptep) &
-					      ~SRMMU_CACHE)));
-				flush_tlb_page_for_cbit(start);
+		save_and_cli(flags);
 
-				start += PAGE_SIZE;
+		inode = vma->vm_inode;
+		if (!inode)
+			goto done;
+		offset = (address & PAGE_MASK) - vma->vm_start;
+		vmaring = inode->i_mmap; 
+		do {
+			vaddr = vmaring->vm_start + offset;
+
+			if ((vaddr ^ address) & vac_badbits) {
+				alias_found++;
+				start = vmaring->vm_start;
+				while (start < vmaring->vm_end) {
+					pgdp = srmmu_pgd_offset(vmaring->vm_mm, start);
+					if(!pgdp) goto next;
+					pmdp = srmmu_pmd_offset(pgdp, start);
+					if(!pmdp) goto next;
+					ptep = srmmu_pte_offset(pmdp, start);
+					if(!ptep) goto next;
+
+					if((pte_val(*ptep) & SRMMU_ET_MASK) == SRMMU_VALID) {
+#if 1
+						printk("Fixing USER/USER alias [%ld:%08lx]\n",
+						       vmaring->vm_mm->context, start);
+#endif
+						flush_cache_page(vmaring, start);
+						set_pte(ptep, __pte((pte_val(*ptep) &
+								     ~SRMMU_CACHE)));
+						flush_tlb_page(vmaring, start);
+					}
+				next:
+					start += PAGE_SIZE;
+				}
 			}
-			return;
+		} while ((vmaring = vmaring->vm_next_share) != inode->i_mmap);
+
+		if(alias_found && !(pte_val(pte) & _SUN4C_PAGE_NOCACHE)) {
+			pgdp = srmmu_pgd_offset(vma->vm_mm, address);
+			ptep = srmmu_pte_offset((pmd_t *) pgdp, address);
+			flush_cache_page(vma, address);
+			*ptep = __pte(pte_val(*ptep) | _SUN4C_PAGE_NOCACHE);
+			flush_tlb_page(vma, address);
 		}
-	} while ((vmaring = vmaring->vm_next_share) != inode->i_mmap);
-#endif
+	done:
+		restore_flags(flags);
+	}
 }
 
-static void hypersparc_exit_hook(void)
+static void hypersparc_destroy_context(struct mm_struct *mm)
 {
-	struct mm_struct *mm = current->mm;
-
 	if(mm->context != NO_CONTEXT && mm->count == 1) {
 		/* HyperSparc is copy-back, any data for this
 		 * process in a modified cache line is stale
@@ -2807,52 +2368,65 @@ static void hypersparc_exit_hook(void)
 	}
 }
 
-static void hypersparc_flush_hook(void)
-{
-	if(current->tss.flags & SPARC_FLAG_KTHREAD) {
-		alloc_context(current);
-		flush_cache_mm(current->mm);
-		ctxd_set(&srmmu_context_table[current->mm->context], current->mm->pgd);
-		flush_tlb_mm(current->mm);
-		srmmu_set_context(current->mm->context);
-	}
-}
-
 /* Init various srmmu chip types. */
-static void srmmu_is_bad(void)
+__initfunc(static void srmmu_is_bad(void))
 {
 	prom_printf("Could not determine SRMMU chip type.\n");
 	prom_halt();
 }
 
-static void init_vac_layout(void)
+__initfunc(static void init_vac_layout(void))
 {
 	int nd, cache_lines;
 	char node_str[128];
+#ifdef __SMP__
+	int cpu = 0;
+	unsigned long max_size = 0;
+	unsigned long min_line_size = 0x10000000;
+#endif
 
 	nd = prom_getchild(prom_root_node);
 	while((nd = prom_getsibling(nd)) != 0) {
 		prom_getstring(nd, "device_type", node_str, sizeof(node_str));
-		if(!strcmp(node_str, "cpu"))
+		if(!strcmp(node_str, "cpu")) {
+			vac_line_size = prom_getint(nd, "cache-line-size");
+			if (vac_line_size == -1) {
+				prom_printf("can't determine cache-line-size, "
+					    "halting.\n");
+				prom_halt();
+			}
+			cache_lines = prom_getint(nd, "cache-nlines");
+			if (cache_lines == -1) {
+				prom_printf("can't determine cache-nlines, halting.\n");
+				prom_halt();
+			}
+
+			vac_cache_size = cache_lines * vac_line_size;
+			vac_badbits = (vac_cache_size - 1) & PAGE_MASK;
+#ifdef __SMP__
+			if(vac_cache_size > max_size)
+				max_size = vac_cache_size;
+			if(vac_line_size < min_line_size)
+				min_line_size = vac_line_size;
+			cpu++;
+			if(cpu == smp_num_cpus)
+				break;
+#else
 			break;
+#endif
+		}
 	}
 	if(nd == 0) {
 		prom_printf("No CPU nodes found, halting.\n");
 		prom_halt();
 	}
-
-	vac_line_size = prom_getint(nd, "cache-line-size");
-	if (vac_line_size == -1) {
-		prom_printf("can't determine cache-line-size, halting.\n");
-		prom_halt();
-	}
-	cache_lines = prom_getint(nd, "cache-nlines");
-	if (cache_lines == -1) {
-		prom_printf("can't determine cache-nlines, halting.\n");
-		prom_halt();
-	}
-	vac_cache_size = cache_lines * vac_line_size;
+#ifdef __SMP__
+	vac_cache_size = max_size;
+	vac_line_size = min_line_size;
 	vac_badbits = (vac_cache_size - 1) & PAGE_MASK;
+#endif
+	printk("SRMMU: Using VAC size of %d bytes, line size %d bytes.\n",
+	       (int)vac_cache_size, (int)vac_line_size);
 }
 
 static void poke_hypersparc(void)
@@ -2867,7 +2441,10 @@ static void poke_hypersparc(void)
 	mreg |= (HYPERSPARC_CMODE);
 
 	srmmu_set_mmureg(mreg);
+
+#if 0 /* I think this is bad news... -DaveM */
 	hyper_clear_all_tags();
+#endif
 
 	put_ross_icr(HYPERSPARC_ICCR_FTD | HYPERSPARC_ICCR_ICE);
 	hyper_flush_whole_icache();
@@ -2875,7 +2452,7 @@ static void poke_hypersparc(void)
 	clear = srmmu_get_fstatus();
 }
 
-static void init_hypersparc(void)
+__initfunc(static void init_hypersparc(void))
 {
 	srmmu_name = "ROSS HyperSparc";
 
@@ -2893,14 +2470,15 @@ static void init_hypersparc(void)
 	flush_tlb_page = hypersparc_flush_tlb_page;
 
 	flush_page_to_ram = hypersparc_flush_page_to_ram;
+	flush_sig_insns = hypersparc_flush_sig_insns;
 	flush_page_for_dma = hypersparc_flush_page_for_dma;
-	flush_cache_page_to_uncache = hypersparc_flush_cache_page_to_uncache;
-	flush_tlb_page_for_cbit = hypersparc_flush_tlb_page_for_cbit;
+
+	flush_chunk = hypersparc_flush_chunk; /* local flush _only_ */
 
 	ctxd_set = hypersparc_ctxd_set;
 	switch_to_context = hypersparc_switch_to_context;
-	mmu_exit_hook = hypersparc_exit_hook;
-	mmu_flush_hook = hypersparc_flush_hook;
+	init_new_context = hypersparc_init_new_context;
+	destroy_context = hypersparc_destroy_context;
 	update_mmu_cache = srmmu_vac_update_mmu_cache;
 	sparc_update_rootmmu_dir = hypersparc_update_rootmmu_dir;
 	poke_srmmu = poke_hypersparc;
@@ -2909,17 +2487,32 @@ static void init_hypersparc(void)
 static void poke_cypress(void)
 {
 	unsigned long mreg = srmmu_get_mmureg();
-	unsigned long faddr;
+	unsigned long faddr, tagval;
+	volatile unsigned long cypress_sucks;
 	volatile unsigned long clear;
 
 	clear = srmmu_get_faddr();
 	clear = srmmu_get_fstatus();
 
-	for(faddr = 0x0; faddr < 0x10000; faddr += 20) {
-		__asm__ __volatile__("sta %%g0, [%0 + %1] %2\n\t"
-				     "sta %%g0, [%0] %2\n\t" : :
-				     "r" (faddr), "r" (0x40000),
-				     "i" (ASI_M_DATAC_TAG));
+	if (!(mreg & CYPRESS_CENABLE)) {
+		for(faddr = 0x0; faddr < 0x10000; faddr += 20) {
+			__asm__ __volatile__("sta %%g0, [%0 + %1] %2\n\t"
+					     "sta %%g0, [%0] %2\n\t" : :
+					     "r" (faddr), "r" (0x40000),
+					     "i" (ASI_M_DATAC_TAG));
+		}
+	} else {
+		for(faddr = 0; faddr < 0x10000; faddr += 0x20) {
+			__asm__ __volatile__("lda [%1 + %2] %3, %0\n\t" :
+					     "=r" (tagval) :
+					     "r" (faddr), "r" (0x40000),
+					     "i" (ASI_M_DATAC_TAG));
+
+			/* If modified and valid, kick it. */
+			if((tagval & 0x60) == 0x60)
+				cypress_sucks = *(unsigned long *)
+							(0xf0020000 + faddr);
+		}
 	}
 
 	/* And one more, for our good neighbor, Mr. Broken Cypress. */
@@ -2930,7 +2523,7 @@ static void poke_cypress(void)
 	srmmu_set_mmureg(mreg);
 }
 
-static void init_cypress_common(void)
+__initfunc(static void init_cypress_common(void))
 {
 	init_vac_layout();
 
@@ -2945,23 +2538,25 @@ static void init_cypress_common(void)
 	flush_tlb_page = cypress_flush_tlb_page;
 	flush_tlb_range = cypress_flush_tlb_range;
 
+	flush_chunk = cypress_flush_chunk; /* local flush _only_ */
+
 	flush_page_to_ram = cypress_flush_page_to_ram;
+	flush_sig_insns = cypress_flush_sig_insns;
 	flush_page_for_dma = cypress_flush_page_for_dma;
-	flush_cache_page_to_uncache = cypress_flush_page_to_uncache;
-	flush_tlb_page_for_cbit = cypress_flush_tlb_page_for_cbit;
+	sparc_update_rootmmu_dir = cypress_update_rootmmu_dir;
 
 	update_mmu_cache = srmmu_vac_update_mmu_cache;
 	poke_srmmu = poke_cypress;
 }
 
-static void init_cypress_604(void)
+__initfunc(static void init_cypress_604(void))
 {
 	srmmu_name = "ROSS Cypress-604(UP)";
 	srmmu_modtype = Cypress;
 	init_cypress_common();
 }
 
-static void init_cypress_605(unsigned long mrev)
+__initfunc(static void init_cypress_605(unsigned long mrev))
 {
 	srmmu_name = "ROSS Cypress-605(MP)";
 	if(mrev == 0xe) {
@@ -2999,7 +2594,7 @@ static void poke_swift(void)
 }
 
 #define SWIFT_MASKID_ADDR  0x10003018
-static void init_swift(void)
+__initfunc(static void init_swift(void))
 {
 	unsigned long swift_rev;
 
@@ -3051,15 +2646,16 @@ static void init_swift(void)
 	flush_cache_page = swift_flush_cache_page;
 	flush_cache_range = swift_flush_cache_range;
 
+	flush_chunk = swift_flush_chunk; /* local flush _only_ */
+
 	flush_tlb_all = swift_flush_tlb_all;
 	flush_tlb_mm = swift_flush_tlb_mm;
 	flush_tlb_page = swift_flush_tlb_page;
 	flush_tlb_range = swift_flush_tlb_range;
 
 	flush_page_to_ram = swift_flush_page_to_ram;
+	flush_sig_insns = swift_flush_sig_insns;
 	flush_page_for_dma = swift_flush_page_for_dma;
-	flush_cache_page_to_uncache = swift_flush_cache_page_to_uncache;
-	flush_tlb_page_for_cbit = swift_flush_tlb_page_for_cbit;
 
 	/* Are you now convinced that the Swift is one of the
 	 * biggest VLSI abortions of all time?  Bravo Fujitsu!
@@ -3081,7 +2677,7 @@ static void poke_tsunami(void)
 	srmmu_set_mmureg(mreg);
 }
 
-static void init_tsunami(void)
+__initfunc(static void init_tsunami(void))
 {
 	/* Tsunami's pretty sane, Sun and TI actually got it
 	 * somewhat right this time.  Fujitsu should have
@@ -3096,15 +2692,16 @@ static void init_tsunami(void)
 	flush_cache_page = tsunami_flush_cache_page;
 	flush_cache_range = tsunami_flush_cache_range;
 
+	flush_chunk = tsunami_flush_chunk; /* local flush _only_ */
+
 	flush_tlb_all = tsunami_flush_tlb_all;
 	flush_tlb_mm = tsunami_flush_tlb_mm;
 	flush_tlb_page = tsunami_flush_tlb_page;
 	flush_tlb_range = tsunami_flush_tlb_range;
 
 	flush_page_to_ram = tsunami_flush_page_to_ram;
+	flush_sig_insns = tsunami_flush_sig_insns;
 	flush_page_for_dma = tsunami_flush_page_for_dma;
-	flush_cache_page_to_uncache = tsunami_flush_cache_page_to_uncache;
-	flush_tlb_page_for_cbit = tsunami_flush_tlb_page_for_cbit;
 
 	poke_srmmu = poke_tsunami;
 }
@@ -3149,35 +2746,27 @@ static void poke_viking(void)
 	mreg |= (VIKING_ICENABLE | VIKING_DCENABLE);
 	mreg |= VIKING_SBENABLE;
 	mreg &= ~(VIKING_ACENABLE);
-#if CONFIG_AP1000
-        mreg &= ~(VIKING_SBENABLE);
-#endif
 	srmmu_set_mmureg(mreg);
 
-
 #ifdef __SMP__
 	/* Avoid unnecessary cross calls. */
 	flush_cache_all = local_flush_cache_all;
 	flush_page_to_ram = local_flush_page_to_ram;
+	flush_sig_insns = local_flush_sig_insns;
 	flush_page_for_dma = local_flush_page_for_dma;
-	if (viking_mxcc_present) {
-		flush_cache_page_to_uncache = local_flush_cache_page_to_uncache;
-	}
 #endif
 }
 
-static void init_viking(void)
+__initfunc(static void init_viking(void))
 {
 	unsigned long mreg = srmmu_get_mmureg();
 
 	/* Ahhh, the viking.  SRMMU VLSI abortion number two... */
-
 	if(mreg & VIKING_MMODE) {
 		unsigned long bpreg;
 
 		srmmu_name = "TI Viking";
 		viking_mxcc_present = 0;
-		set_pte = srmmu_set_pte_nocache_nomxccvik;
 
 		bpreg = viking_get_bpreg();
 		bpreg &= ~(VIKING_ACTION_MIX);
@@ -3185,7 +2774,10 @@ static void init_viking(void)
 
 		msi_set_sync();
 
-		flush_cache_page_to_uncache = viking_no_mxcc_flush_page;
+		set_pte = srmmu_set_pte_nocache_viking;
+		sparc_update_rootmmu_dir = viking_update_rootmmu_dir;
+
+		flush_chunk = viking_flush_chunk; /* local flush _only_ */
 
 		/* We need this to make sure old viking takes no hits
 		 * on it's cache for dma snoops to workaround the
@@ -3193,11 +2785,12 @@ static void init_viking(void)
 		 * This is only necessary because of the new way in
 		 * which we use the IOMMU.
 		 */
-		flush_page_for_dma = viking_no_mxcc_flush_page;
+		flush_page_for_dma = viking_flush_page;
 	} else {
 		srmmu_name = "TI Viking/MXCC";
 		viking_mxcc_present = 1;
-		flush_cache_page_to_uncache = viking_mxcc_flush_page;
+
+		flush_chunk = viking_mxcc_flush_chunk; /* local flush _only_ */
 
 		/* MXCC vikings lack the DMA snooping bug. */
 		flush_page_for_dma = viking_flush_page_for_dma;
@@ -3214,13 +2807,13 @@ static void init_viking(void)
 	flush_tlb_range = viking_flush_tlb_range;
 
 	flush_page_to_ram = viking_flush_page_to_ram;
-	flush_tlb_page_for_cbit = viking_flush_tlb_page_for_cbit;
+	flush_sig_insns = viking_flush_sig_insns;
 
 	poke_srmmu = poke_viking;
 }
 
 /* Probe for the srmmu chip version. */
-static void get_srmmu_type(void)
+__initfunc(static void get_srmmu_type(void))
 {
 	unsigned long mreg, psr;
 	unsigned long mod_typ, mod_rev, psr_typ, psr_vers;
@@ -3242,9 +2835,12 @@ static void get_srmmu_type(void)
 			init_hypersparc();
 			break;
 		case 0:
+		case 2:
 			/* Uniprocessor Cypress */
 			init_cypress_604();
 			break;
+		case 10:
+		case 11:
 		case 12:
 			/* _REALLY OLD_ Cypress MP chips... */
 		case 13:
@@ -3254,7 +2850,8 @@ static void get_srmmu_type(void)
 			init_cypress_605(mod_rev);
 			break;
 		default:
-			srmmu_is_bad();
+			/* Some other Cypress revision, assume a 605. */
+			init_cypress_605(mod_rev);
 			break;
 		};
 		return;
@@ -3290,10 +2887,6 @@ extern unsigned long spwin_mmu_patchme, fwin_mmu_patchme,
 extern unsigned long spwin_srmmu_stackchk, srmmu_fwin_stackchk,
 	tsetup_srmmu_stackchk, srmmu_rett_stackchk;
 
-#ifdef __SMP__
-extern unsigned long rirq_mmu_patchme, srmmu_reti_stackchk;
-#endif
-
 extern unsigned long srmmu_fault;
 
 #define PATCH_BRANCH(insn, dest) do { \
@@ -3302,7 +2895,7 @@ extern unsigned long srmmu_fault;
 		*iaddr = SPARC_BRANCH((unsigned long) daddr, (unsigned long) iaddr); \
         } while(0);
 
-static void patch_window_trap_handlers(void)
+__initfunc(static void patch_window_trap_handlers(void))
 {
 	unsigned long *iaddr, *daddr;
 	
@@ -3310,9 +2903,6 @@ static void patch_window_trap_handlers(void)
 	PATCH_BRANCH(fwin_mmu_patchme, srmmu_fwin_stackchk);
 	PATCH_BRANCH(tsetup_mmu_patchme, tsetup_srmmu_stackchk);
 	PATCH_BRANCH(rtrap_mmu_patchme, srmmu_rett_stackchk);
-#ifdef __SMP__
-	PATCH_BRANCH(rirq_mmu_patchme, srmmu_reti_stackchk);
-#endif
 	PATCH_BRANCH(sparc_ttable[SP_TRAP_TFLT].inst_three, srmmu_fault);
 	PATCH_BRANCH(sparc_ttable[SP_TRAP_DFLT].inst_three, srmmu_fault);
 	PATCH_BRANCH(sparc_ttable[SP_TRAP_DACC].inst_three, srmmu_fault);
@@ -3325,19 +2915,10 @@ static void smp_flush_page_for_dma(unsigned long page)
 	xc1((smpfunc_t) local_flush_page_for_dma, page);
 }
 
-static void smp_flush_cache_page_to_uncache(unsigned long page)
-{
-	xc1((smpfunc_t) local_flush_cache_page_to_uncache, page);
-}
-
-static void smp_flush_tlb_page_for_cbit(unsigned long page)
-{
-	xc1((smpfunc_t) local_flush_tlb_page_for_cbit, page);
-}
 #endif
 
 /* Load up routines and constants for sun4m mmu */
-void ld_mmu_srmmu(void)
+__initfunc(void ld_mmu_srmmu(void))
 {
 	/* First the constants */
 	pmd_shift = SRMMU_PMD_SHIFT;
@@ -3360,6 +2941,7 @@ void ld_mmu_srmmu(void)
 	    
 	/* Functions */
 	set_pte = srmmu_set_pte_cacheable;
+	init_new_context = srmmu_init_new_context;
 	switch_to_context = srmmu_switch_to_context;
 	pmd_align = srmmu_pmd_align;
 	pgdir_align = srmmu_pgdir_align;
@@ -3414,8 +2996,7 @@ void ld_mmu_srmmu(void)
 	pte_mkdirty = srmmu_pte_mkdirty;
 	pte_mkyoung = srmmu_pte_mkyoung;
 	update_mmu_cache = srmmu_update_mmu_cache;
-	mmu_exit_hook = srmmu_exit_hook;
-	mmu_flush_hook = srmmu_flush_hook;
+	destroy_context = srmmu_destroy_context;
 	mmu_lockarea = srmmu_lockarea;
 	mmu_unlockarea = srmmu_unlockarea;
 
@@ -3424,7 +3005,9 @@ void ld_mmu_srmmu(void)
 	mmu_release_scsi_one = srmmu_release_scsi_one;
 	mmu_release_scsi_sgl = srmmu_release_scsi_sgl;
 
+#ifdef CONFIG_SBUS
 	mmu_map_dma_area = srmmu_map_dma_area;
+#endif
 
 	mmu_info = srmmu_mmu_info;
         mmu_v2p = srmmu_v2p;
@@ -3457,9 +3040,8 @@ void ld_mmu_srmmu(void)
 	local_flush_tlb_range = flush_tlb_range;
 	local_flush_tlb_page = flush_tlb_page;
 	local_flush_page_to_ram = flush_page_to_ram;
+	local_flush_sig_insns = flush_sig_insns;
 	local_flush_page_for_dma = flush_page_for_dma;
-	local_flush_cache_page_to_uncache = flush_cache_page_to_uncache;
-	local_flush_tlb_page_for_cbit = flush_tlb_page_for_cbit;
 
 	flush_cache_all = smp_flush_cache_all;
 	flush_cache_mm = smp_flush_cache_mm;
@@ -3470,8 +3052,7 @@ void ld_mmu_srmmu(void)
 	flush_tlb_range = smp_flush_tlb_range;
 	flush_tlb_page = smp_flush_tlb_page;
 	flush_page_to_ram = smp_flush_page_to_ram;
+	flush_sig_insns = smp_flush_sig_insns;
 	flush_page_for_dma = smp_flush_page_for_dma;
-	flush_cache_page_to_uncache = smp_flush_cache_page_to_uncache;
-	flush_tlb_page_for_cbit = smp_flush_tlb_page_for_cbit;
 #endif
 }
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index fdc74d3d7..ebeada4c7 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -1,4 +1,4 @@
-/* $Id: sun4c.c,v 1.121 1996/11/01 20:36:27 ecd Exp $
+/* $Id: sun4c.c,v 1.143 1997/04/11 00:42:14 davem Exp $
  * sun4c.c: Doing in software what should be done in hardware.
  *
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
@@ -8,6 +8,7 @@
 
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/init.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -20,22 +21,51 @@
 #include <asm/io.h>
 #include <asm/oplib.h>
 #include <asm/openprom.h>
+#include <asm/mmu_context.h>
+
+/* TODO: Make it such that interrupt handlers cannot dick with
+ *       the user segment lists, most of the cli/sti pairs can
+ *       disappear once that is taken care of.
+ */
+
+/* XXX Ok the real performance win, I figure, will be to use a combined hashing
+ * XXX and bitmap scheme to keep track of what we have mapped where.  The whole
+ * XXX incentive is to make it such that the range flushes can be serviced
+ * XXX always in near constant time. --DaveM
+ */
 
 extern int num_segmaps, num_contexts;
 
-/* Small structure for ease of handling in the low level kernel fault
- * handler. This holds all information necessary, like the sun4c_ufree_ring
- * for user segments.
+/* Define this to get extremely anal debugging, undefine for performance. */
+/* #define DEBUG_SUN4C_MM */
+
+#define UWINMASK_OFFSET (const unsigned long)(&(((struct task_struct *)0)->tss.uwinmask))
+
+/* This is used in many routines below. */
+#define FUW_INLINE do {							\
+	register int ctr asm("g5");					\
+	ctr = 0;							\
+	__asm__ __volatile__("\n"					\
+	"1:	ld	[%%g6 + %2], %%g4	! flush user windows\n"	\
+	"	orcc	%%g0, %%g4, %%g0\n"				\
+	"	add	%0, 1, %0\n"					\
+	"	bne	1b\n"						\
+	"	 save	%%sp, -64, %%sp\n"				\
+	"2:	subcc	%0, 1, %0\n"					\
+	"	bne	2b\n"						\
+	"	 restore %%g0, %%g0, %%g0\n"				\
+	: "=&r" (ctr)							\
+	: "0" (ctr), "i" (UWINMASK_OFFSET)				\
+	: "g4", "cc");							\
+} while(0);
+
+/* That's it, we prom_halt() if the cache size is something other than 65536.
+ * So let's save some cycles and just use that everywhere except for that bootup
+ * sanity check.
  */
-struct sun4c_segment_info {
-	unsigned long vaddr;
-	unsigned char pseg;
-};
-struct sun4c_segment_info *sun4c_kernel_next;
+#define SUN4C_VAC_SIZE	65536
 
 #define SUN4C_KERNEL_BUCKETS 32
-#define SUN4C_KERNEL_BSIZE (sizeof(struct sun4c_segment_info) \
-			    * SUN4C_KERNEL_BUCKETS)
 
 #ifndef MAX
 #define MAX(a,b) ((a)<(b)?(b):(a))
@@ -45,7 +75,6 @@ struct sun4c_segment_info *sun4c_kernel_next;
 #endif
 
 
-
 #define KGPROF_PROFILING 0
 #if KGPROF_PROFILING
 #define KGPROF_DEPTH 3 /* this needs to match the code below */
@@ -85,6 +114,7 @@ static inline void kgprof_profile(void)
 /* Flushing the cache. */
 struct sun4c_vac_props sun4c_vacinfo;
 static int ctxflushes, segflushes, pageflushes;
+unsigned long sun4c_kernel_faults;
 
 /* convert a virtual address to a physical address and vice
    versa. Easy on the 4c */
@@ -110,7 +140,7 @@ void sun4c_flush_all(void)
 
 	/* Clear 'valid' bit in all cache line tags */
 	begin = AC_CACHETAGS;
-	end = (AC_CACHETAGS + sun4c_vacinfo.num_bytes);
+	end = (AC_CACHETAGS + SUN4C_VAC_SIZE);
 	while(begin < end) {
 		__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
 				     "r" (begin), "i" (ASI_CONTROL));
@@ -118,56 +148,70 @@ void sun4c_flush_all(void)
 	}
 }
 
-/* Blow the entire current context out of the virtual cache. */
-static inline void sun4c_flush_context(void)
+/* Context level flush. */
+static inline void sun4c_flush_context_hw(void)
 {
-	unsigned long vaddr;
+	unsigned long end = SUN4C_VAC_SIZE;
+	unsigned pgsz = PAGE_SIZE;
 
 	ctxflushes++;
-	if(sun4c_vacinfo.do_hwflushes) {
-		for(vaddr=0; vaddr < sun4c_vacinfo.num_bytes; vaddr+=PAGE_SIZE)
-			__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
-					     "r" (vaddr), "i" (ASI_HWFLUSHCONTEXT));
-	} else {
-		/* AJT: possibly read the tags and avoid flushing the ones that 
-		   are above 0xf0000000 so the kernel isn't flushed all the time */
-		__asm__ __volatile__("add	%1, %1, %%g1\n\t"
-				     "add	%1, %%g1, %%g2\n\t"
-				     "add	%1, %%g2, %%g3\n\t"
-				     "add	%1, %%g3, %%g4\n\t"
-				     "add	%1, %%g4, %%g5\n\t"
-				     "add	%1, %%g5, %%o4\n\t"
-				     "add	%1, %%o4, %%o5\n"
-				     "1:\n\t"
-				     "subcc	%0, %%o5, %0\n\t"
-				     "sta	%%g0, [%0] %2\n\t"
-				     "sta	%%g0, [%0 + %1] %2\n\t"
-				     "sta	%%g0, [%0 + %%g1] %2\n\t"
-				     "sta	%%g0, [%0 + %%g2] %2\n\t"
-				     "sta	%%g0, [%0 + %%g3] %2\n\t"
-				     "sta	%%g0, [%0 + %%g4] %2\n\t"
-				     "sta	%%g0, [%0 + %%g5] %2\n\t"
-				     "bg	1b\n\t"
-				     " sta	%%g0, [%0 + %%o4] %2\n\t" : :
-				     "r" (sun4c_vacinfo.num_bytes),
-				     "r" (sun4c_vacinfo.linesize),
-				     "i" (ASI_FLUSHCTX) :
-				     "g1", "g2", "g3", "g4", "g5", "o4", "o5");
-	}
+	__asm__ __volatile__("
+1:	subcc	%0, %2, %0
+	bg	1b
+	 sta	%%g0, [%0] %3
+	nop; nop; nop;		! Weitek hwbug
+"	: "=&r" (end)
+	: "0" (end), "r" (pgsz), "i" (ASI_HWFLUSHCONTEXT)
+	: "cc");
+}
+
+/* Don't inline the software version as it eats too many cache lines if expanded. */
+static void sun4c_flush_context_sw(void)
+{
+	unsigned long nbytes = SUN4C_VAC_SIZE;
+	unsigned long lsize = sun4c_vacinfo.linesize;
+
+	ctxflushes++;
+	__asm__ __volatile__("
+	add	%2, %2, %%g1
+	add	%2, %%g1, %%g2
+	add	%2, %%g2, %%g3
+	add	%2, %%g3, %%g4
+	add	%2, %%g4, %%g5
+	add	%2, %%g5, %%o4
+	add	%2, %%o4, %%o5
+1:	subcc	%0, %%o5, %0
+	sta	%%g0, [%0] %3
+	sta	%%g0, [%0 + %2] %3
+	sta	%%g0, [%0 + %%g1] %3
+	sta	%%g0, [%0 + %%g2] %3
+	sta	%%g0, [%0 + %%g3] %3
+	sta	%%g0, [%0 + %%g4] %3
+	sta	%%g0, [%0 + %%g5] %3
+	bg	1b
+	 sta	%%g0, [%1 + %%o4] %3
+"	: "=&r" (nbytes)
+	: "0" (nbytes), "r" (lsize), "i" (ASI_FLUSHCTX)
+	: "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc");
 }
 
 /* Scrape the segment starting at ADDR from the virtual cache. */
 static inline void sun4c_flush_segment(unsigned long addr)
 {
+	if(sun4c_get_segmap(addr) == invalid_segment)
+		return;
+
 	segflushes++;
-	addr &= SUN4C_REAL_PGDIR_MASK;
 	if(sun4c_vacinfo.do_hwflushes) {
-		unsigned long end = (addr + sun4c_vacinfo.num_bytes);
+		unsigned long end = (addr + SUN4C_VAC_SIZE);
 
 		for( ; addr < end; addr += PAGE_SIZE)
-			__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
+			__asm__ __volatile__("sta %%g0, [%0] %1;nop;nop;nop;\n\t" : :
 					     "r" (addr), "i" (ASI_HWFLUSHSEG));
 	} else {
+		unsigned long nbytes = SUN4C_VAC_SIZE;
+		unsigned long lsize = sun4c_vacinfo.linesize;
+
 		__asm__ __volatile__("add	%2, %2, %%g1\n\t"
 				     "add	%2, %%g1, %%g2\n\t"
 				     "add	%2, %%g2, %%g3\n\t"
@@ -177,33 +221,89 @@ static inline void sun4c_flush_segment(unsigned long addr)
 				     "add	%2, %%o4, %%o5\n"
 				     "1:\n\t"
 				     "subcc	%1, %%o5, %1\n\t"
-				     "sta	%%g0, [%0] %3\n\t"
-				     "sta	%%g0, [%0 + %2] %3\n\t"
-				     "sta	%%g0, [%0 + %%g1] %3\n\t"
-				     "sta	%%g0, [%0 + %%g2] %3\n\t"
-				     "sta	%%g0, [%0 + %%g3] %3\n\t"
-				     "sta	%%g0, [%0 + %%g4] %3\n\t"
-				     "sta	%%g0, [%0 + %%g5] %3\n\t"
-				     "sta	%%g0, [%0 + %%o4] %3\n\t"
+				     "sta	%%g0, [%0] %6\n\t"
+				     "sta	%%g0, [%0 + %2] %6\n\t"
+				     "sta	%%g0, [%0 + %%g1] %6\n\t"
+				     "sta	%%g0, [%0 + %%g2] %6\n\t"
+				     "sta	%%g0, [%0 + %%g3] %6\n\t"
+				     "sta	%%g0, [%0 + %%g4] %6\n\t"
+				     "sta	%%g0, [%0 + %%g5] %6\n\t"
+				     "sta	%%g0, [%0 + %%o4] %6\n\t"
 				     "bg	1b\n\t"
-				     " add	%0, %%o5, %0\n\t" : :
-				     "r" (addr), "r" (sun4c_vacinfo.num_bytes),
-				     "r" (sun4c_vacinfo.linesize),
-				     "i" (ASI_FLUSHSEG) :
-				     "g1", "g2", "g3", "g4", "g5", "o4", "o5");
+				     " add	%0, %%o5, %0\n\t"
+				     : "=&r" (addr), "=&r" (nbytes), "=&r" (lsize)
+				     : "0" (addr), "1" (nbytes), "2" (lsize),
+				       "i" (ASI_FLUSHSEG)
+				     : "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc");
+	}
+}
+
+/* Call this version when you know hardware flushes are available. */
+static inline void sun4c_flush_segment_hw(unsigned long addr)
+{
+	if(sun4c_get_segmap(addr) != invalid_segment) {
+		unsigned long end;
+
+		segflushes++;
+		for(end = addr + SUN4C_VAC_SIZE; addr < end; addr += PAGE_SIZE)
+			__asm__ __volatile__("sta %%g0, [%0] %1"
+					     : : "r" (addr), "i" (ASI_HWFLUSHSEG));
+		/* Weitek POWER-UP hwbug workaround. */
+		__asm__ __volatile__("nop;nop;nop;	! Weitek hwbug");
+	}
+}
+
+/* Don't inline the software version as it eats too many cache lines if expanded. */
+static void sun4c_flush_segment_sw(unsigned long addr)
+{
+	if(sun4c_get_segmap(addr) != invalid_segment) {
+		unsigned long nbytes = SUN4C_VAC_SIZE;
+		unsigned long lsize = sun4c_vacinfo.linesize;
+
+		segflushes++;
+		__asm__ __volatile__("
+		add	%2, %2, %%g1
+		add	%2, %%g1, %%g2
+		add	%2, %%g2, %%g3
+		add	%2, %%g3, %%g4
+		add	%2, %%g4, %%g5
+		add	%2, %%g5, %%o4
+		add	%2, %%o4, %%o5
+1:		subcc	%1, %%o5, %1
+		sta	%%g0, [%0] %6
+		sta	%%g0, [%0 + %2] %6
+		sta	%%g0, [%0 + %%g1] %6
+		sta	%%g0, [%0 + %%g2] %6
+		sta	%%g0, [%0 + %%g3] %6
+		sta	%%g0, [%0 + %%g4] %6
+		sta	%%g0, [%0 + %%g5] %6
+		sta	%%g0, [%0 + %%o4] %6
+		bg	1b
+		 add	%0, %%o5, %0
+"		: "=&r" (addr), "=&r" (nbytes), "=&r" (lsize)
+		: "0" (addr), "1" (nbytes), "2" (lsize),
+		  "i" (ASI_FLUSHSEG)
+		: "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc");
 	}
 }
 
 /* Bolix one page from the virtual cache. */
-static inline void sun4c_flush_page(unsigned long addr)
+static void sun4c_flush_page(unsigned long addr)
 {
 	addr &= PAGE_MASK;
 
+	if((sun4c_get_pte(addr) & (_SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_VALID)) !=
+	   _SUN4C_PAGE_VALID)
+		return;
+
 	pageflushes++;
 	if(sun4c_vacinfo.do_hwflushes) {
-		__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
+		__asm__ __volatile__("sta %%g0, [%0] %1;nop;nop;nop;\n\t" : :
 				     "r" (addr), "i" (ASI_HWFLUSHPAGE));
 	} else {
+		unsigned long left = PAGE_SIZE;
+		unsigned long lsize = sun4c_vacinfo.linesize;
+
 		__asm__ __volatile__("add	%2, %2, %%g1\n\t"
 				     "add	%2, %%g1, %%g2\n\t"
 				     "add	%2, %%g2, %%g3\n\t"
@@ -213,20 +313,70 @@ static inline void sun4c_flush_page(unsigned long addr)
 				     "add	%2, %%o4, %%o5\n"
 				     "1:\n\t"
 				     "subcc	%1, %%o5, %1\n\t"
-				     "sta	%%g0, [%0] %3\n\t"
-				     "sta	%%g0, [%0 + %2] %3\n\t"
-				     "sta	%%g0, [%0 + %%g1] %3\n\t"
-				     "sta	%%g0, [%0 + %%g2] %3\n\t"
-				     "sta	%%g0, [%0 + %%g3] %3\n\t"
-				     "sta	%%g0, [%0 + %%g4] %3\n\t"
-				     "sta	%%g0, [%0 + %%g5] %3\n\t"
-				     "sta	%%g0, [%0 + %%o4] %3\n\t"
+				     "sta	%%g0, [%0] %6\n\t"
+				     "sta	%%g0, [%0 + %2] %6\n\t"
+				     "sta	%%g0, [%0 + %%g1] %6\n\t"
+				     "sta	%%g0, [%0 + %%g2] %6\n\t"
+				     "sta	%%g0, [%0 + %%g3] %6\n\t"
+				     "sta	%%g0, [%0 + %%g4] %6\n\t"
+				     "sta	%%g0, [%0 + %%g5] %6\n\t"
+				     "sta	%%g0, [%0 + %%o4] %6\n\t"
 				     "bg	1b\n\t"
-				     " add	%0, %%o5, %0\n\t" : :
-				     "r" (addr), "r" (PAGE_SIZE),
-				     "r" (sun4c_vacinfo.linesize),
-				     "i" (ASI_FLUSHPG) :
-				     "g1", "g2", "g3", "g4", "g5", "o4", "o5");
+				     " add	%0, %%o5, %0\n\t"
+				     : "=&r" (addr), "=&r" (left), "=&r" (lsize)
+				     : "0" (addr), "1" (left), "2" (lsize),
+				       "i" (ASI_FLUSHPG)
+				     : "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc");
+	}
+}
+
+/* Again, hw-only and sw-only cache page-level flush variants. */
+static inline void sun4c_flush_page_hw(unsigned long addr)
+{
+	addr &= PAGE_MASK;
+	if((sun4c_get_pte(addr) & (_SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_VALID)) ==
+	   _SUN4C_PAGE_VALID) {
+		pageflushes++;
+		__asm__ __volatile__("sta %%g0, [%0] %1"
+				     : : "r" (addr), "i" (ASI_HWFLUSHPAGE));
+		/* Weitek POWER-UP hwbug workaround. */
+		__asm__ __volatile__("nop;nop;nop;	! Weitek hwbug");
+	}
+}
+
+/* Don't inline the software version as it eats too many cache lines if expanded. */
+static void sun4c_flush_page_sw(unsigned long addr)
+{
+	addr &= PAGE_MASK;
+	if((sun4c_get_pte(addr) & (_SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_VALID)) ==
+	   _SUN4C_PAGE_VALID) {
+		unsigned long left = PAGE_SIZE;
+		unsigned long lsize = sun4c_vacinfo.linesize;
+
+		pageflushes++;
+		__asm__ __volatile__("
+		add	%2, %2, %%g1
+		add	%2, %%g1, %%g2
+		add	%2, %%g2, %%g3
+		add	%2, %%g3, %%g4
+		add	%2, %%g4, %%g5
+		add	%2, %%g5, %%o4
+		add	%2, %%o4, %%o5
+1:		subcc	%1, %%o5, %1
+		sta	%%g0, [%0] %6
+		sta	%%g0, [%0 + %2] %6
+		sta	%%g0, [%0 + %%g1] %6
+		sta	%%g0, [%0 + %%g2] %6
+		sta	%%g0, [%0 + %%g3] %6
+		sta	%%g0, [%0 + %%g4] %6
+		sta	%%g0, [%0 + %%g5] %6
+		sta	%%g0, [%0 + %%o4] %6
+		bg	1b
+		 add	%0, %%o5, %0
+"		: "=&r" (addr), "=&r" (left), "=&r" (lsize)
+		: "0" (addr), "1" (left), "2" (lsize),
+		  "i" (ASI_FLUSHPG)
+		: "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc");
 	}
 }
 
@@ -280,10 +430,19 @@ static inline void sun4c_init_clean_mmu(unsigned long kernel_end)
 void sun4c_probe_vac(void)
 {
 	sun4c_disable_vac();
-	sun4c_vacinfo.num_bytes = prom_getintdefault(prom_root_node,
-						     "vac-size", 65536);
-	sun4c_vacinfo.linesize = prom_getintdefault(prom_root_node,
-						    "vac-linesize", 16);
+	if((idprom->id_machtype == (SM_SUN4C | SM_4C_SS1)) ||
+	   (idprom->id_machtype == (SM_SUN4C | SM_4C_SS1PLUS))) {
+		/* PROM on SS1 lacks this info, to be super safe we
+		 * hard code it here since this arch is cast in stone.
+		 */
+		sun4c_vacinfo.num_bytes = 65536;
+		sun4c_vacinfo.linesize = 16;
+	} else {
+		sun4c_vacinfo.num_bytes = prom_getintdefault(prom_root_node,
+							     "vac-size", 65536);
+		sun4c_vacinfo.linesize = prom_getintdefault(prom_root_node,
+							    "vac-linesize", 16);
+	}
 	sun4c_vacinfo.num_lines =
 		(sun4c_vacinfo.num_bytes / sun4c_vacinfo.linesize);
 	switch(sun4c_vacinfo.linesize) {
@@ -299,12 +458,11 @@ void sun4c_probe_vac(void)
 		prom_halt();
 	};
 
-	/* Only vac-hwflush (with a dash) is reliable, weitek
-	 * power-up processor claims vac_hwflush (underscore)
-	 * yet crashes if you try to use hardware based flushes.
-	 */
 	sun4c_vacinfo.do_hwflushes = prom_getintdefault(prom_root_node,
 							"vac-hwflush", 0);
+	if(sun4c_vacinfo.do_hwflushes == 0)
+		sun4c_vacinfo.do_hwflushes = prom_getintdefault(prom_root_node,
+								"vac_hwflush", 0);
 
 	if(sun4c_vacinfo.num_bytes != 65536) {
 		prom_printf("WEIRD Sun4C VAC cache size, tell davem");
@@ -320,8 +478,9 @@ extern unsigned long invalid_segment_patch1, invalid_segment_patch1_ff;
 extern unsigned long invalid_segment_patch2, invalid_segment_patch2_ff;
 extern unsigned long num_context_patch1, num_context_patch1_16;
 extern unsigned long num_context_patch2, num_context_patch2_16;
-extern unsigned long sun4c_kernel_buckets_patch;
-extern unsigned long sun4c_kernel_buckets_patch_32;
+extern unsigned long vac_linesize_patch, vac_linesize_patch_32;
+extern unsigned long vac_hwflush_patch1, vac_hwflush_patch1_on;
+extern unsigned long vac_hwflush_patch2, vac_hwflush_patch2_on;
 
 #define PATCH_INSN(src, dst) do {	\
 		daddr = &(dst);		\
@@ -363,25 +522,38 @@ static void patch_kernel_fault_handler(void)
 				    num_contexts);
 			prom_halt();
 	}
-	switch (SUN4C_KERNEL_BUCKETS) {
+	if(sun4c_vacinfo.do_hwflushes != 0) {
+		PATCH_INSN(vac_hwflush_patch1_on, vac_hwflush_patch1);
+		PATCH_INSN(vac_hwflush_patch2_on, vac_hwflush_patch2);
+	} else {
+		switch(sun4c_vacinfo.linesize) {
 		case 16:
 			/* Default, nothing to do. */
 			break;
 		case 32:
-			PATCH_INSN(sun4c_kernel_buckets_patch_32,
-				   sun4c_kernel_buckets_patch);
+			PATCH_INSN(vac_linesize_patch_32, vac_linesize_patch);
 			break;
 		default:
-			prom_printf("Unhandled number of kernel buckets: %d\n",
-				    SUN4C_KERNEL_BUCKETS);
+			prom_printf("Impossible VAC linesize %d, halting...\n",
+				    sun4c_vacinfo.linesize);
 			prom_halt();
+		};
 	}
 }
 
 static void sun4c_probe_mmu(void)
 {
-	num_segmaps = prom_getintdefault(prom_root_node, "mmu-npmg", 128);
-	num_contexts = prom_getintdefault(prom_root_node, "mmu-nctx", 0x8);
+	if((idprom->id_machtype == (SM_SUN4C | SM_4C_SS1)) ||
+	   (idprom->id_machtype == (SM_SUN4C | SM_4C_SS1PLUS))) {
+		/* Hardcode these just to be safe, PROM on SS1 does
+		 * not have this info available in the root node.
+		 */
+		num_segmaps = 128;
+		num_contexts = 8;
+	} else {
+		num_segmaps = prom_getintdefault(prom_root_node, "mmu-npmg", 128);
+		num_contexts = prom_getintdefault(prom_root_node, "mmu-nctx", 0x8);
+	}
 	patch_kernel_fault_handler();
 }
 
@@ -408,7 +580,8 @@ static inline void sun4c_init_ss2_cache_bug(void)
 	extern unsigned long start;
 
 	if((idprom->id_machtype == (SM_SUN4C | SM_4C_SS2)) ||
-	   (idprom->id_machtype == (SM_SUN4C | SM_4C_IPX))) {
+	   (idprom->id_machtype == (SM_SUN4C | SM_4C_IPX)) ||
+	   (idprom->id_machtype == (SM_SUN4C | SM_4C_ELC))) {
 		/* Whee.. */
 		printk("SS2 cache bug detected, uncaching trap table page\n");
 		sun4c_flush_page((unsigned int) &start);
@@ -432,7 +605,8 @@ static void sun4c_map_dma_area(unsigned long addr, int len)
 		sun4c_flush_page(page);
 		page -= PAGE_OFFSET;
 		page >>= PAGE_SHIFT;
-		page |= (_SUN4C_PAGE_VALID | _SUN4C_PAGE_WRITE | _SUN4C_PAGE_NOCACHE);
+		page |= (_SUN4C_PAGE_VALID | _SUN4C_PAGE_DIRTY |
+			 _SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_PRIV);
 		sun4c_put_pte(addr, page);
 		addr += PAGE_SIZE;
 	}
@@ -440,6 +614,11 @@ static void sun4c_map_dma_area(unsigned long addr, int len)
 
 
 /* TLB management. */
+
+/* Don't change this struct without changing entry.S. This is used
+ * in the in-window kernel fault handler, and you don't want to mess
+ * with that. (See sun4c_fault in entry.S).
+ */
 struct sun4c_mmu_entry {
 	struct sun4c_mmu_entry *next;
 	struct sun4c_mmu_entry *prev;
@@ -449,7 +628,7 @@ struct sun4c_mmu_entry {
 };
 static struct sun4c_mmu_entry mmu_entry_pool[256];
 
-static void sun4c_init_mmu_entry_pool(void)
+__initfunc(static void sun4c_init_mmu_entry_pool(void))
 {
 	int i;
 
@@ -500,7 +679,7 @@ static inline void sun4c_init_map_kernelprom(unsigned long kernel_end)
 	}
 }
 
-static void sun4c_init_lock_area(unsigned long start, unsigned long end)
+__initfunc(static void sun4c_init_lock_area(unsigned long start, unsigned long end))
 {
 	int i, ctx;
 
@@ -516,19 +695,18 @@ static void sun4c_init_lock_area(unsigned long start, unsigned long end)
 	}
 }
 
+/* Don't change this struct without changing entry.S. This is used
+ * in the in-window kernel fault handler, and you don't want to mess
+ * with that. (See sun4c_fault in entry.S).
+ */
 struct sun4c_mmu_ring {
 	struct sun4c_mmu_entry ringhd;
 	int num_entries;
 };
 static struct sun4c_mmu_ring sun4c_context_ring[16]; /* used user entries */
 static struct sun4c_mmu_ring sun4c_ufree_ring;       /* free user entries */
-
-static inline void sun4c_next_kernel_bucket(struct sun4c_segment_info **next)
-{
-	(*next)++;
-	*next = (struct sun4c_segment_info *)
-			((unsigned long)*next & ~SUN4C_KERNEL_BSIZE);
-}
+struct sun4c_mmu_ring sun4c_kernel_ring;      /* used kernel entries */
+struct sun4c_mmu_ring sun4c_kfree_ring;       /* free kernel entries */
 
 static inline void sun4c_init_rings(unsigned long *mempool)
 {
@@ -542,11 +720,16 @@ static inline void sun4c_init_rings(unsigned long *mempool)
 	sun4c_ufree_ring.ringhd.next = sun4c_ufree_ring.ringhd.prev =
 		&sun4c_ufree_ring.ringhd;
 	sun4c_ufree_ring.num_entries = 0;
-	/* This needs to be aligned to twice it's size for speed. */
-	sun4c_kernel_next = sparc_init_alloc(mempool, 2 * SUN4C_KERNEL_BSIZE);
+	sun4c_kernel_ring.ringhd.next = sun4c_kernel_ring.ringhd.prev =
+		&sun4c_kernel_ring.ringhd;
+	sun4c_kernel_ring.num_entries = 0;
+	sun4c_kfree_ring.ringhd.next = sun4c_kfree_ring.ringhd.prev =
+		&sun4c_kfree_ring.ringhd;
+	sun4c_kfree_ring.num_entries = 0;
 }
 
-static inline void add_ring(struct sun4c_mmu_ring *ring, struct sun4c_mmu_entry *entry)
+static inline void add_ring(struct sun4c_mmu_ring *ring,
+			    struct sun4c_mmu_entry *entry)
 {
 	struct sun4c_mmu_entry *head = &ring->ringhd;
 
@@ -556,23 +739,33 @@ static inline void add_ring(struct sun4c_mmu_ring *ring, struct sun4c_mmu_entry
 	ring->num_entries++;
 }
 
-static inline void remove_ring(struct sun4c_mmu_ring *ring, struct sun4c_mmu_entry *entry)
+static inline void add_ring_ordered(struct sun4c_mmu_ring *ring,
+				    struct sun4c_mmu_entry *entry)
 {
-	struct sun4c_mmu_entry *next = entry->next;
+	struct sun4c_mmu_entry *head = &ring->ringhd;
+	unsigned long addr = entry->vaddr;
 
-	(next->prev = entry->prev)->next = next;
-	ring->num_entries--;
+	if(head->next != &ring->ringhd) {
+		while((head->next != &ring->ringhd) && (head->next->vaddr < addr))
+			head = head->next;
+	}
+	entry->prev = head;
+	(entry->next = head->next)->prev = entry;
+	head->next = entry;
+	ring->num_entries++;
 }
 
-static inline void recycle_ring(struct sun4c_mmu_ring *ring, struct sun4c_mmu_entry *entry)
+static inline void remove_ring(struct sun4c_mmu_ring *ring,
+			       struct sun4c_mmu_entry *entry)
 {
-	struct sun4c_mmu_entry *head = &ring->ringhd;
 	struct sun4c_mmu_entry *next = entry->next;
 
 	(next->prev = entry->prev)->next = next;
-	entry->prev = head; (entry->next = head->next)->prev = entry;
-	head->next = entry;
-	/* num_entries stays the same */
+	ring->num_entries--;
+#ifdef DEBUG_SUN4C_MM
+	if(ring->num_entries < 0)
+		panic("sun4c: Ring num_entries < 0!");
+#endif
 }
 
 static inline void free_user_entry(int ctx, struct sun4c_mmu_entry *entry)
@@ -584,10 +777,17 @@ static inline void free_user_entry(int ctx, struct sun4c_mmu_entry *entry)
 static inline void assign_user_entry(int ctx, struct sun4c_mmu_entry *entry) 
 {
         remove_ring(&sun4c_ufree_ring, entry);
-        add_ring(sun4c_context_ring+ctx, entry);
+        add_ring_ordered(sun4c_context_ring+ctx, entry);
 }
 
-static void sun4c_init_fill_kernel_ring(int howmany)
+static inline void free_kernel_entry(struct sun4c_mmu_entry *entry,
+				     struct sun4c_mmu_ring *ring)
+{
+        remove_ring(ring, entry);
+        add_ring(&sun4c_kfree_ring, entry);
+}
+
+__initfunc(static void sun4c_init_fill_kernel_ring(int howmany))
 {
 	int i;
 
@@ -597,14 +797,12 @@ static void sun4c_init_fill_kernel_ring(int howmany)
 				break;
 		mmu_entry_pool[i].locked = 1;
 		sun4c_init_clean_segmap(i);
-		sun4c_kernel_next->vaddr = 0;
-		sun4c_kernel_next->pseg = mmu_entry_pool[i].pseg;
-		sun4c_next_kernel_bucket(&sun4c_kernel_next);
+		add_ring(&sun4c_kfree_ring, &mmu_entry_pool[i]);
 		howmany--;
 	}
 }
 
-static void sun4c_init_fill_user_ring(void)
+__initfunc(static void sun4c_init_fill_user_ring(void))
 {
 	int i;
 
@@ -642,7 +840,6 @@ static inline void sun4c_kernel_map(struct sun4c_mmu_entry *kentry)
 
 static inline void sun4c_user_unmap(struct sun4c_mmu_entry *uentry)
 {
-	/* PM: need flush_user_windows() ?? */
 	sun4c_put_segmap(uentry->vaddr, invalid_segment);
 }
 
@@ -658,36 +855,127 @@ static inline void sun4c_user_map(struct sun4c_mmu_entry *uentry)
 	}
 }
 
-static inline void sun4c_demap_context(struct sun4c_mmu_ring *crp, unsigned char ctx)
+static void sun4c_demap_context_hw(struct sun4c_mmu_ring *crp, unsigned char ctx)
 {
-	struct sun4c_mmu_entry *this_entry, *next_entry;
-	int savectx = sun4c_get_context();
+	struct sun4c_mmu_entry *head = &crp->ringhd;
+	unsigned long flags;
 
-	this_entry = crp->ringhd.next;
-	flush_user_windows();
-	sun4c_set_context(ctx);
-	sun4c_flush_context();
-	while(crp->num_entries) {
-		next_entry = this_entry->next;
-		sun4c_user_unmap(this_entry);
-		free_user_entry(ctx, this_entry);
-		this_entry = next_entry;
+	save_and_cli(flags);
+	if(head->next != head) {
+		struct sun4c_mmu_entry *entry = head->next;
+		int savectx = sun4c_get_context();
+
+		FUW_INLINE
+		sun4c_set_context(ctx);
+		sun4c_flush_context_hw();
+		do {
+			struct sun4c_mmu_entry *next = entry->next;
+
+			sun4c_user_unmap(entry);
+			free_user_entry(ctx, entry);
+
+			entry = next;
+		} while(entry != head);
+		sun4c_set_context(savectx);
 	}
-	sun4c_set_context(savectx);
+	restore_flags(flags);
 }
 
-static inline void sun4c_demap_one(struct sun4c_mmu_ring *crp,unsigned char ctx)
+static void sun4c_demap_context_sw(struct sun4c_mmu_ring *crp, unsigned char ctx)
+{
+	struct sun4c_mmu_entry *head = &crp->ringhd;
+	unsigned long flags;
+
+	save_and_cli(flags);
+	if(head->next != head) {
+		struct sun4c_mmu_entry *entry = head->next;
+		int savectx = sun4c_get_context();
+
+		FUW_INLINE
+		sun4c_set_context(ctx);
+		sun4c_flush_context_sw();
+		do {
+			struct sun4c_mmu_entry *next = entry->next;
+
+			sun4c_user_unmap(entry);
+			free_user_entry(ctx, entry);
+
+			entry = next;
+		} while(entry != head);
+		sun4c_set_context(savectx);
+	}
+	restore_flags(flags);
+}
+
+static inline void sun4c_demap_one(struct sun4c_mmu_ring *crp, unsigned char ctx)
 {
 	/* by using .prev we get a kind of "lru" algorithm */
 	struct sun4c_mmu_entry *entry = crp->ringhd.prev;
+	unsigned long flags;
 	int savectx = sun4c_get_context();
 
-	flush_user_windows();
+#ifdef DEBUG_SUN4C_MM
+	if(entry == &crp->ringhd)
+		panic("sun4c_demap_one: Freeing from empty ctx ring.");
+#endif
+	FUW_INLINE
+	save_and_cli(flags);
 	sun4c_set_context(ctx);
 	sun4c_flush_segment(entry->vaddr);
 	sun4c_user_unmap(entry);
 	free_user_entry(ctx, entry);
 	sun4c_set_context(savectx);
+	restore_flags(flags);
+}
+
+static int sun4c_user_taken_entries = 0;  /* This is how much we have.             */
+static int max_user_taken_entries = 0;    /* This limits us and prevents deadlock. */
+
+static inline struct sun4c_mmu_entry *sun4c_kernel_strategy(void)
+{
+	struct sun4c_mmu_entry *this_entry;
+
+	/* If some are free, return first one. */
+	if(sun4c_kfree_ring.num_entries) {
+		this_entry = sun4c_kfree_ring.ringhd.next;
+		return this_entry;
+	}
+
+	/* Else free one up. */
+	this_entry = sun4c_kernel_ring.ringhd.prev;
+	sun4c_flush_segment(this_entry->vaddr);
+	sun4c_kernel_unmap(this_entry);
+	free_kernel_entry(this_entry, &sun4c_kernel_ring);
+	this_entry = sun4c_kfree_ring.ringhd.next;
+
+	return this_entry;
+}
+
+void sun4c_shrink_kernel_ring(void)
+{
+	struct sun4c_mmu_entry *entry;
+	unsigned long flags;
+
+	/* If an interrupt comes in here, we die... */
+	save_and_cli(flags);
+
+	if (sun4c_user_taken_entries) {
+		entry = sun4c_kernel_strategy();
+        	remove_ring(&sun4c_kfree_ring, entry);
+		add_ring(&sun4c_ufree_ring, entry);
+		sun4c_user_taken_entries--;
+#if 0
+		printk("shrink: ufree= %d, kfree= %d, kernel= %d\n",
+			sun4c_ufree_ring.num_entries,
+			sun4c_kfree_ring.num_entries,
+			sun4c_kernel_ring.num_entries);
+#endif
+#ifdef DEBUG_SUN4C_MM
+		if(sun4c_user_taken_entries < 0)
+			panic("sun4c_shrink_kernel_ring: taken < 0.");
+#endif
+	}
+	restore_flags(flags);
 }
 
 /* Using this method to free up mmu entries eliminates a lot of
@@ -699,101 +987,110 @@ static inline struct sun4c_mmu_entry *sun4c_user_strategy(void)
 	struct ctx_list *next_one;
 	struct sun4c_mmu_ring *rp = 0;
 	unsigned char ctx;
+#ifdef DEBUG_SUN4C_MM
+	int lim = num_contexts;
+#endif
 
 	/* If some are free, return first one. */
-	if(sun4c_ufree_ring.num_entries)
+	if(sun4c_ufree_ring.num_entries) {
+#ifdef DEBUG_SUN4C_MM
+		if(sun4c_ufree_ring.ringhd.next == &sun4c_ufree_ring.ringhd)
+			panic("sun4c_user_strategy: num_entries!=0 but ring empty.");
+#endif
+		return sun4c_ufree_ring.ringhd.next;
+	}
+
+	if (sun4c_user_taken_entries) {
+		sun4c_shrink_kernel_ring();
+#ifdef DEBUG_SUN4C_MM
+		if(sun4c_ufree_ring.ringhd.next == &sun4c_ufree_ring.ringhd)
+			panic("sun4c_user_strategy: kernel shrunk but ufree empty.");
+#endif
 		return sun4c_ufree_ring.ringhd.next;
+	}
 
 	/* Grab one from the LRU context. */
 	next_one = ctx_used.next;
-	while (sun4c_context_ring[next_one->ctx_number].num_entries == 0)
+	while ((sun4c_context_ring[next_one->ctx_number].num_entries == 0)
+#ifdef DEBUG_SUN4C_MM
+	       && (--lim >= 0)
+#endif
+	       )
 		next_one = next_one->next;
 
+#ifdef DEBUG_SUN4C_MM
+	if(lim < 0)
+		panic("No user segmaps!");
+#endif
+
 	ctx = next_one->ctx_number;
 	rp = &sun4c_context_ring[ctx];
 
-	sun4c_demap_one(rp,ctx);
+	sun4c_demap_one(rp, ctx);
+#ifdef DEBUG_SUN4C_MM
+	if(sun4c_ufree_ring.ringhd.next == &sun4c_ufree_ring.ringhd)
+		panic("sun4c_user_strategy: demapped one but ufree empty.");
+#endif
 	return sun4c_ufree_ring.ringhd.next;
 }
 
-static inline void alloc_user_segment(unsigned long address, unsigned char ctx)
+void sun4c_grow_kernel_ring(void)
 {
 	struct sun4c_mmu_entry *entry;
 
-	address &= SUN4C_REAL_PGDIR_MASK;
-	entry = sun4c_user_strategy();
-	assign_user_entry(ctx, entry);
-	entry->vaddr = address;
-	sun4c_user_map(entry);
-}
-
-/* XXX Just like kernel tlb replacement we'd like to have a low level
- * XXX equivalent for user faults which need not go through the mm
- * XXX subsystem just to load a mmu entry.  But this might not be as
- * XXX feasible since we need to go through the kernel page tables
- * XXX for this process, which we currently don't lock into the mmu
- * XXX so we would fault with traps off... must think about this...
- */
-void sun4c_update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
-{
-	unsigned long flags;
 #if 0
-	struct inode *inode;
-	struct vm_area_struct *vmaring;
-	unsigned long offset, vaddr;
-	unsigned long start;
-	pgd_t *pgdp;
-	pmd_t *pmdp;
-	pte_t *ptep;
+	printk("grow: ");
 #endif
 
-	save_and_cli(flags);
-	address &= PAGE_MASK;
-	if(sun4c_get_segmap(address) == invalid_segment)
-		alloc_user_segment(address, sun4c_get_context());
-	sun4c_put_pte(address, pte_val(pte));
+	/* Prevent deadlock condition. */
+	if(sun4c_user_taken_entries >= max_user_taken_entries) {
+#if 0
+		printk("deadlock avoidance, taken= %d max= %d\n",
+		       sun4c_user_taken_entries, max_user_taken_entries);
+#endif
+		return;
+	}
 
+	if (sun4c_ufree_ring.num_entries) {
+		entry = sun4c_ufree_ring.ringhd.next;
+#ifdef DEBUG_SUN4C_MM
+		if(entry == &sun4c_ufree_ring.ringhd)
+			panic("\nsun4c_grow_kernel_ring: num_entries!=0, ring empty.");
+#endif
+        	remove_ring(&sun4c_ufree_ring, entry);
+		add_ring(&sun4c_kfree_ring, entry);
+#ifdef DEBUG_SUN4C_MM
+		if(sun4c_user_taken_entries < 0)
+			panic("\nsun4c_grow_kernel_ring: taken < 0.");
+#endif
+		sun4c_user_taken_entries++;
 #if 0
-	if (!(vma->vm_flags & VM_WRITE) ||
-	    !(vma->vm_flags & VM_SHARED))
-		goto done;
+		printk("ufree= %d, kfree= %d, kernel= %d\n",
+			sun4c_ufree_ring.num_entries,
+			sun4c_kfree_ring.num_entries,
+			sun4c_kernel_ring.num_entries);
+#endif
+	}
+}
 
-	inode = vma->vm_inode;
-	if (!inode)
-		goto done;
-
-	offset = (address & PAGE_MASK) - vma->vm_start;
-	vmaring = inode->i_mmap; 
-	do {
-		vaddr = vmaring->vm_start + offset;
-
-		if (S4CVAC_BADALIAS(vaddr, address)) {
-			start = vma->vm_start;
-			while (start < vma->vm_end) {
-				pgdp = pgd_offset(vma->vm_mm, start);
-				pmdp = pmd_offset(pgdp, start);
-				ptep = pte_offset(pmdp, start);
-
-				if (sun4c_get_pte(start) & _SUN4C_PAGE_VALID)
-				sun4c_put_pte(start, sun4c_get_pte(start) |
-						     _SUN4C_PAGE_NOCACHE);
-
-				start += PAGE_SIZE;
-			}
-			goto done;
-		}
-	} while ((vmaring = vmaring->vm_next_share) != inode->i_mmap);
+static inline void alloc_user_segment(unsigned long address, unsigned char ctx)
+{
+	struct sun4c_mmu_entry *entry;
+	unsigned long flags;
 
-done:
-#endif
+	save_and_cli(flags);
+	entry = sun4c_user_strategy();
+	entry->vaddr = (address & SUN4C_REAL_PGDIR_MASK);
+	assign_user_entry(ctx, entry);
+	sun4c_user_map(entry);
 	restore_flags(flags);
 }
 
 /* This is now a fast in-window trap handler to avoid any and all races. */
 static void sun4c_quick_kernel_fault(unsigned long address)
 {
-        printk("Kernel faults at addr=0x%08lx\n", address);
-        panic("sun4c fault handler bolixed...");
+        printk("Kernel faults at addr 0x%08lx\n", address);
+        panic("sun4c kernel fault handler bolixed...");
 }
 
 /*
@@ -821,6 +1118,8 @@ struct task_bucket {
 
 struct task_bucket *sun4c_bucket[NR_TASKS];
 
+static int sun4c_lowbucket_avail;
+
 #define BUCKET_EMPTY     ((struct task_bucket *) 0)
 #define BUCKET_SIZE      (PAGE_SIZE << 2)
 #define BUCKET_SHIFT     14        /* log2(sizeof(struct task_bucket)) */
@@ -840,8 +1139,13 @@ static inline void get_locked_segment(unsigned long addr)
 	addr &= SUN4C_REAL_PGDIR_MASK;
 	stolen = sun4c_user_strategy();
 	remove_ring(&sun4c_ufree_ring, stolen);
+	max_user_taken_entries--;
+#ifdef DEBUG_SUN4C_MM
+	if(max_user_taken_entries < 0)
+		panic("get_locked_segment: max_user_taken < 0.");
+#endif
 	stolen->vaddr = addr;
-	flush_user_windows();
+	FUW_INLINE
 	sun4c_kernel_map(stolen);
 	restore_flags(flags);
 }
@@ -856,10 +1160,16 @@ static inline void free_locked_segment(unsigned long addr)
 	addr &= SUN4C_REAL_PGDIR_MASK;
 	pseg = sun4c_get_segmap(addr);
 	entry = &mmu_entry_pool[pseg];
-	flush_user_windows();
+
+	FUW_INLINE
 	sun4c_flush_segment(addr);
 	sun4c_kernel_unmap(entry);
 	add_ring(&sun4c_ufree_ring, entry);
+#ifdef DEBUG_SUN4C_MM
+	if(max_user_taken_entries < 0)
+		panic("free_locked_segment: max_user_taken < 0.");
+#endif
+	max_user_taken_entries++;
 	restore_flags(flags);
 }
 
@@ -873,6 +1183,7 @@ static inline void garbage_collect(int entry)
 	for(end = (start + 16); start < end; start++)
 		if(sun4c_bucket[start] != BUCKET_EMPTY)
 			return;
+
 	/* Entire segment empty, release it. */
 	free_locked_segment(BUCKET_ADDR(entry));
 }
@@ -885,23 +1196,23 @@ static struct task_struct *sun4c_alloc_task_struct(void)
 	page = get_free_page(GFP_KERNEL);
 	if(!page)
 		return (struct task_struct *) 0;
-	/* XXX Bahh, linear search too slow, use hash
-	 * XXX table in final implementation.  Or
-	 * XXX keep track of first free when we free
-	 * XXX a bucket... anything but this.
-	 */
-	for(entry = 0; entry < NR_TASKS; entry++)
+
+	for(entry = sun4c_lowbucket_avail; entry < NR_TASKS; entry++)
 		if(sun4c_bucket[entry] == BUCKET_EMPTY)
 			break;
 	if(entry == NR_TASKS) {
 		free_page(page);
 		return (struct task_struct *) 0;
 	}
+	if(entry >= sun4c_lowbucket_avail)
+		sun4c_lowbucket_avail = entry + 1;
+
 	addr = BUCKET_ADDR(entry);
 	sun4c_bucket[entry] = (struct task_bucket *) addr;
 	if(sun4c_get_segmap(addr) == invalid_segment)
 		get_locked_segment(addr);
 	sun4c_put_pte(addr, BUCKET_PTE(page));
+
 	return (struct task_struct *) addr;
 }
 
@@ -920,40 +1231,84 @@ static unsigned long sun4c_alloc_kernel_stack(struct task_struct *tsk)
 		free_page(page[0]);
 		return 0;
 	}
-	saddr += (PAGE_SIZE << 1);
+
+	saddr += PAGE_SIZE << 1;
 	sun4c_put_pte(saddr, BUCKET_PTE(page[0]));
 	sun4c_put_pte(saddr + PAGE_SIZE, BUCKET_PTE(page[1]));
 	return saddr;
 }
 
-static void sun4c_free_kernel_stack(unsigned long stack)
+static void sun4c_free_kernel_stack_hw(unsigned long stack)
+{
+	unsigned long page[2];
+
+	page[0] = BUCKET_PTE_PAGE(sun4c_get_pte(stack));
+	page[1] = BUCKET_PTE_PAGE(sun4c_get_pte(stack+PAGE_SIZE));
+
+	/* We are deleting a mapping, so the flushes here are mandatory. */
+	sun4c_flush_page_hw(stack);
+	sun4c_flush_page_hw(stack + PAGE_SIZE);
+
+	sun4c_put_pte(stack, 0);
+	sun4c_put_pte(stack + PAGE_SIZE, 0);
+	free_page(page[0]);
+	free_page(page[1]);
+}
+
+static void sun4c_free_task_struct_hw(struct task_struct *tsk)
+{
+	unsigned long tsaddr = (unsigned long) tsk;
+	unsigned long page = BUCKET_PTE_PAGE(sun4c_get_pte(tsaddr));
+	int entry = BUCKET_NUM(tsaddr);
+
+	/* We are deleting a mapping, so the flush here is mandatory. */
+	sun4c_flush_page_hw(tsaddr);
+
+	sun4c_put_pte(tsaddr, 0);
+	sun4c_bucket[entry] = BUCKET_EMPTY;
+	if(entry < sun4c_lowbucket_avail)
+		sun4c_lowbucket_avail = entry;
+
+	free_page(page);
+	garbage_collect(entry);
+}
+
+static void sun4c_free_kernel_stack_sw(unsigned long stack)
 {
 	unsigned long page[2];
 
 	page[0] = BUCKET_PTE_PAGE(sun4c_get_pte(stack));
 	page[1] = BUCKET_PTE_PAGE(sun4c_get_pte(stack+PAGE_SIZE));
-	sun4c_flush_page(stack);
-	sun4c_flush_page(stack + PAGE_SIZE);
+
+	/* We are deleting a mapping, so the flushes here are mandatory. */
+	sun4c_flush_page_sw(stack);
+	sun4c_flush_page_sw(stack + PAGE_SIZE);
+
 	sun4c_put_pte(stack, 0);
 	sun4c_put_pte(stack + PAGE_SIZE, 0);
 	free_page(page[0]);
 	free_page(page[1]);
 }
 
-static void sun4c_free_task_struct(struct task_struct *tsk)
+static void sun4c_free_task_struct_sw(struct task_struct *tsk)
 {
 	unsigned long tsaddr = (unsigned long) tsk;
 	unsigned long page = BUCKET_PTE_PAGE(sun4c_get_pte(tsaddr));
 	int entry = BUCKET_NUM(tsaddr);
 
-	sun4c_flush_page(tsaddr);
+	/* We are deleting a mapping, so the flush here is mandatory. */
+	sun4c_flush_page_sw(tsaddr);
+
 	sun4c_put_pte(tsaddr, 0);
 	sun4c_bucket[entry] = BUCKET_EMPTY;
+	if(entry < sun4c_lowbucket_avail)
+		sun4c_lowbucket_avail = entry;
+
 	free_page(page);
 	garbage_collect(entry);
 }
 
-static void sun4c_init_buckets(void)
+__initfunc(static void sun4c_init_buckets(void))
 {
 	int entry;
 
@@ -963,6 +1318,7 @@ static void sun4c_init_buckets(void)
 	}
 	for(entry = 0; entry < NR_TASKS; entry++)
 		sun4c_bucket[entry] = BUCKET_EMPTY;
+	sun4c_lowbucket_avail = 0;
 }
 
 static unsigned long sun4c_iobuffer_start;
@@ -1017,7 +1373,10 @@ found:
 		pte |= _SUN4C_PAGE_NOCACHE;
 		set_bit(scan, sun4c_iobuffer_map);
 		apage = (scan << PAGE_SHIFT) + sun4c_iobuffer_start;
+
+		/* Flush original mapping so we see the right things later. */
 		sun4c_flush_page(vpage);
+
 		sun4c_put_pte(apage, pte);
 		vpage += PAGE_SIZE;
 	}
@@ -1041,8 +1400,12 @@ static void sun4c_unlockarea(char *vaddr, unsigned long size)
 	vpage = (unsigned long)vaddr & PAGE_MASK;
 	npages = (((unsigned long)vaddr & ~PAGE_MASK) +
 		  size + (PAGE_SIZE-1)) >> PAGE_SHIFT;
+
+	save_and_cli(flags);
 	while (npages != 0) {
 		--npages;
+
+		/* This mapping is marked non-cachable, no flush necessary. */
 		sun4c_put_pte(vpage, 0);
 		clear_bit((vpage - sun4c_iobuffer_start) >> PAGE_SHIFT,
 			  sun4c_iobuffer_map);
@@ -1050,7 +1413,6 @@ static void sun4c_unlockarea(char *vaddr, unsigned long size)
 	}
 
 	/* garbage collect */
-	save_and_cli(flags);
 	scan = (sun4c_iobuffer_high - sun4c_iobuffer_start) >> PAGE_SHIFT;
 	while (scan >= 0 && !sun4c_iobuffer_map[scan >> 5])
 		scan -= 32;
@@ -1069,37 +1431,37 @@ static void sun4c_unlockarea(char *vaddr, unsigned long size)
  * by implication and fool the page locking code above
  * if passed to by mistake.
  */
-static char *sun4c_get_scsi_one(char *bufptr, unsigned long len, struct linux_sbus *sbus)
+static __u32 sun4c_get_scsi_one(char *bufptr, unsigned long len, struct linux_sbus *sbus)
 {
 	unsigned long page;
 
-	page = ((unsigned long) bufptr) & PAGE_MASK;
-	if(MAP_NR(page) > max_mapnr)
-		return bufptr; /* already locked */
-	return sun4c_lockarea(bufptr, len);
+	page = ((unsigned long)bufptr) & PAGE_MASK;
+	if(MAP_NR(page) > max_mapnr) {
+		sun4c_flush_page(page);
+		return (__u32)bufptr; /* already locked */
+	}
+	return (__u32)sun4c_lockarea(bufptr, len);
 }
 
 static void sun4c_get_scsi_sgl(struct mmu_sglist *sg, int sz, struct linux_sbus *sbus)
 {
 	while(sz >= 0) {
-		sg[sz].dvma_addr = sun4c_lockarea(sg[sz].addr, sg[sz].len);
+		sg[sz].dvma_addr = (__u32)sun4c_lockarea(sg[sz].addr, sg[sz].len);
 		sz--;
 	}
 }
 
-static void sun4c_release_scsi_one(char *bufptr, unsigned long len, struct linux_sbus *sbus)
+static void sun4c_release_scsi_one(__u32 bufptr, unsigned long len, struct linux_sbus *sbus)
 {
-	unsigned long page = (unsigned long) bufptr;
-
-	if(page < sun4c_iobuffer_start)
+	if(bufptr < sun4c_iobuffer_start)
 		return; /* On kernel stack or similar, see above */
-	sun4c_unlockarea(bufptr, len);
+	sun4c_unlockarea((char *)bufptr, len);
 }
 
 static void sun4c_release_scsi_sgl(struct mmu_sglist *sg, int sz, struct linux_sbus *sbus)
 {
 	while(sz >= 0) {
-		sun4c_unlockarea(sg[sz].dvma_addr, sg[sz].len);
+		sun4c_unlockarea((char *)sg[sz].dvma_addr, sg[sz].len);
 		sz--;
 	}
 }
@@ -1109,7 +1471,7 @@ static void sun4c_release_scsi_sgl(struct mmu_sglist *sg, int sz, struct linux_s
 
 struct vm_area_struct sun4c_kstack_vma;
 
-static unsigned long sun4c_init_lock_areas(unsigned long start_mem)
+__initfunc(static unsigned long sun4c_init_lock_areas(unsigned long start_mem))
 {
 	unsigned long sun4c_taskstack_start;
 	unsigned long sun4c_taskstack_end;
@@ -1135,8 +1497,6 @@ static unsigned long sun4c_init_lock_areas(unsigned long start_mem)
 	memset((void *) start_mem, 0, bitmap_size);
 	start_mem += bitmap_size;
 
-	/* Now get us some mmu entries for I/O maps. */
-	/* sun4c_init_lock_area(sun4c_iobuffer_start, sun4c_iobuffer_end); */
 	sun4c_kstack_vma.vm_mm = init_task.mm;
 	sun4c_kstack_vma.vm_start = sun4c_taskstack_start;
 	sun4c_kstack_vma.vm_end = sun4c_taskstack_end;
@@ -1149,143 +1509,293 @@ static unsigned long sun4c_init_lock_areas(unsigned long start_mem)
 /* Cache flushing on the sun4c. */
 static void sun4c_flush_cache_all(void)
 {
-	/* Clear all tags in the sun4c cache.
-	 * The cache is write through so this is safe.
-	 */
-	flush_user_windows();
-	__asm__ __volatile__("add	%2, %2, %%g1\n\t"
-			     "add	%2, %%g1, %%g2\n\t"
-			     "add	%2, %%g2, %%g3\n\t"
-			     "add	%2, %%g3, %%g4\n\t"
-			     "add	%2, %%g4, %%g5\n\t"
-			     "add	%2, %%g5, %%o4\n\t"
-			     "add	%2, %%o4, %%o5\n"
-			     "1:\n\t"
-			     "subcc	%1, %%o5, %1\n\t"
-			     "sta	%%g0, [%0] %3\n\t"
-			     "sta	%%g0, [%0 + %2] %3\n\t"
-			     "sta	%%g0, [%0 + %%g1] %3\n\t"
-			     "sta	%%g0, [%0 + %%g2] %3\n\t"
-			     "sta	%%g0, [%0 + %%g3] %3\n\t"
-			     "sta	%%g0, [%0 + %%g4] %3\n\t"
-			     "sta	%%g0, [%0 + %%g5] %3\n\t"
-			     "sta	%%g0, [%0 + %%o4] %3\n\t"
-			     "bg	1b\n\t"
-			     " add	%0, %%o5, %0\n\t" : :
-			     "r" (AC_CACHETAGS),
-			     "r" (sun4c_vacinfo.num_bytes),
-			     "r" (sun4c_vacinfo.linesize),
-			     "i" (ASI_CONTROL) :
-			     "g1", "g2", "g3", "g4", "g5", "o4", "o5");
-}
-
-static void sun4c_flush_cache_mm(struct mm_struct *mm)
-{
-	int octx;
-
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		octx = sun4c_get_context();
-		flush_user_windows();
-		sun4c_set_context(mm->context);
-		sun4c_flush_context();
-		sun4c_set_context(octx);
-#ifndef __SMP__
+	unsigned long begin, end;
+
+	FUW_INLINE
+	begin = (KERNBASE + SUN4C_REAL_PGDIR_SIZE);
+	end = (begin + SUN4C_VAC_SIZE);
+
+	if(sun4c_vacinfo.linesize == 32) {
+		while(begin < end) {
+			__asm__ __volatile__("
+			ld	[%0 + 0x00], %%g0
+			ld	[%0 + 0x20], %%g0
+			ld	[%0 + 0x40], %%g0
+			ld	[%0 + 0x60], %%g0
+			ld	[%0 + 0x80], %%g0
+			ld	[%0 + 0xa0], %%g0
+			ld	[%0 + 0xc0], %%g0
+			ld	[%0 + 0xe0], %%g0
+			ld	[%0 + 0x100], %%g0
+			ld	[%0 + 0x120], %%g0
+			ld	[%0 + 0x140], %%g0
+			ld	[%0 + 0x160], %%g0
+			ld	[%0 + 0x180], %%g0
+			ld	[%0 + 0x1a0], %%g0
+			ld	[%0 + 0x1c0], %%g0
+			ld	[%0 + 0x1e0], %%g0
+			" : : "r" (begin));
+			begin += 512;
+		}
+	} else {
+		while(begin < end) {
+			__asm__ __volatile__("
+			ld	[%0 + 0x00], %%g0
+			ld	[%0 + 0x10], %%g0
+			ld	[%0 + 0x20], %%g0
+			ld	[%0 + 0x30], %%g0
+			ld	[%0 + 0x40], %%g0
+			ld	[%0 + 0x50], %%g0
+			ld	[%0 + 0x60], %%g0
+			ld	[%0 + 0x70], %%g0
+			ld	[%0 + 0x80], %%g0
+			ld	[%0 + 0x90], %%g0
+			ld	[%0 + 0xa0], %%g0
+			ld	[%0 + 0xb0], %%g0
+			ld	[%0 + 0xc0], %%g0
+			ld	[%0 + 0xd0], %%g0
+			ld	[%0 + 0xe0], %%g0
+			ld	[%0 + 0xf0], %%g0
+			" : : "r" (begin));
+			begin += 256;
+		}
 	}
-#endif
 }
 
+static void sun4c_flush_cache_mm_hw(struct mm_struct *mm)
+{
+	int new_ctx = mm->context;
+
+	if(new_ctx != NO_CONTEXT && sun4c_context_ring[new_ctx].num_entries) {
+		struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd;
+		unsigned long flags;
+
+		save_and_cli(flags);
+		if(head->next != head) {
+			struct sun4c_mmu_entry *entry = head->next;
+			int savectx = sun4c_get_context();
+
+			FUW_INLINE
+			sun4c_set_context(new_ctx);
+			sun4c_flush_context_hw();
+			do {
+				struct sun4c_mmu_entry *next = entry->next;
+
+				sun4c_user_unmap(entry);
+				free_user_entry(new_ctx, entry);
+
+				entry = next;
+			} while(entry != head);
+			sun4c_set_context(savectx);
+		}
+		restore_flags(flags);
+	}
+}
 
-static void sun4c_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+static void sun4c_flush_cache_range_hw(struct mm_struct *mm, unsigned long start, unsigned long end)
 {
-	int size, size2, octx, i;
-	unsigned long start2,end2;
-	struct sun4c_mmu_entry *entry,*entry2;
+	int new_ctx = mm->context;
 	
-	/* don't flush kernel memory as its always valid in
-	   all contexts */
-	if (start >= PAGE_OFFSET)
-		return;
-
 #if KGPROF_PROFILING
 	kgprof_profile();
 #endif
+	if(new_ctx != NO_CONTEXT) {
+		struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd;
+		struct sun4c_mmu_entry *entry;
+		unsigned long flags;
+
+		FUW_INLINE
+		save_and_cli(flags);
+
+		/* All user segmap chains are ordered on entry->vaddr. */
+		for(entry = head->next;
+		    (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start);
+		    entry = entry->next)
+			;
+
+		/* Tracing various job mixtures showed that this conditional
+		 * only passes ~35% of the time for most worse case situations,
+		 * therefore we avoid all of this gross overhead ~65% of the time.
+		 */
+		if((entry != head) && (entry->vaddr < end)) {
+			int octx = sun4c_get_context();
+			sun4c_set_context(new_ctx);
+
+			/* At this point, always, (start >= entry->vaddr) and
+			 * (entry->vaddr < end), once the latter condition
+			 * ceases to hold, or we hit the end of the list, we
+			 * exit the loop.  The ordering of all user allocated
+			 * segmaps makes this all work out so beautifully.
+			 */
+			do {
+				struct sun4c_mmu_entry *next = entry->next;
+				unsigned long realend;
+
+				/* "realstart" is always >= entry->vaddr */
+				realend = entry->vaddr + SUN4C_REAL_PGDIR_SIZE;
+				if(end < realend)
+					realend = end;
+				if((realend - entry->vaddr) <= (PAGE_SIZE << 3)) {
+					unsigned long page = entry->vaddr;
+					while(page < realend) {
+						sun4c_flush_page_hw(page);
+						page += PAGE_SIZE;
+					}
+				} else {
+					sun4c_flush_segment_hw(entry->vaddr);
+					sun4c_user_unmap(entry);
+					free_user_entry(new_ctx, entry);
+				}
+				entry = next;
+			} while((entry != head) && (entry->vaddr < end));
+			sun4c_set_context(octx);
+		}
+		restore_flags(flags);
+	}
+}
 
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		size = end - start;
+/* XXX no save_and_cli/restore_flags needed, but put here if darkside still crashes */
+static void sun4c_flush_cache_page_hw(struct vm_area_struct *vma, unsigned long page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int new_ctx = mm->context;
 
-		octx = sun4c_get_context();
-		flush_user_windows();
-		sun4c_set_context(mm->context);
+	/* Sun4c has no separate I/D caches so cannot optimize for non
+	 * text page flushes.
+	 */
+	if(new_ctx != NO_CONTEXT) {
+		int octx = sun4c_get_context();
+
+		FUW_INLINE
+		sun4c_set_context(new_ctx);
+		sun4c_flush_page_hw(page);
+		sun4c_set_context(octx);
+	}
+}
+
+static void sun4c_flush_page_to_ram_hw(unsigned long page)
+{
+	sun4c_flush_page_hw(page);
+}
+
+static void sun4c_flush_cache_mm_sw(struct mm_struct *mm)
+{
+	int new_ctx = mm->context;
+
+	if(new_ctx != NO_CONTEXT && sun4c_context_ring[new_ctx].num_entries) {
+		struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd;
+		unsigned long flags;
+
+		save_and_cli(flags);
+		if(head->next != head) {
+			struct sun4c_mmu_entry *entry = head->next;
+			int savectx = sun4c_get_context();
+
+			FUW_INLINE
+			sun4c_set_context(new_ctx);
+			sun4c_flush_context_sw();
+			do {
+				struct sun4c_mmu_entry *next = entry->next;
 
-		entry = sun4c_context_ring[mm->context].ringhd.next;
-		i = sun4c_context_ring[mm->context].num_entries;
-		while (i--) {
-			entry2 = entry->next;
-			if (entry->vaddr < start || entry->vaddr >= end) 
-				goto next_entry;
-
-			start2 = MAX(start,entry->vaddr);
-			end2 = MIN(end,entry->vaddr+SUN4C_REAL_PGDIR_SIZE);
-			size2 = end2 - start2;
-
-			if (size2 <= (PAGE_SIZE << 3)) {
-				start2 &= PAGE_MASK;
-				while(start2 < end2) {
-					sun4c_flush_page(start2);
-					start2 += PAGE_SIZE;
-				}
-			} else {
-				start2 &= SUN4C_REAL_PGDIR_MASK;
-				sun4c_flush_segment(start2);
-				/* we are betting that the entry will not be 
-				   needed for a while */
 				sun4c_user_unmap(entry);
-				free_user_entry(mm->context, entry);
-			}
+				free_user_entry(new_ctx, entry);
 
-		next_entry:
-			entry = entry2;
+				entry = next;
+			} while(entry != head);
+			sun4c_set_context(savectx);
 		}
-		sun4c_set_context(octx);
-#ifndef __SMP__
+		restore_flags(flags);
 	}
+}
+
+static void sun4c_flush_cache_range_sw(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	int new_ctx = mm->context;
+	
+#if KGPROF_PROFILING
+	kgprof_profile();
 #endif
+	if(new_ctx != NO_CONTEXT) {
+		struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd;
+		struct sun4c_mmu_entry *entry;
+		unsigned long flags;
+
+		FUW_INLINE
+		save_and_cli(flags);
+		/* All user segmap chains are ordered on entry->vaddr. */
+		for(entry = head->next;
+		    (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start);
+		    entry = entry->next)
+			;
+
+		/* Tracing various job mixtures showed that this conditional
+		 * only passes ~35% of the time for most worse case situations,
+		 * therefore we avoid all of this gross overhead ~65% of the time.
+		 */
+		if((entry != head) && (entry->vaddr < end)) {
+			int octx = sun4c_get_context();
+			sun4c_set_context(new_ctx);
+
+			/* At this point, always, (start >= entry->vaddr) and
+			 * (entry->vaddr < end), once the latter condition
+			 * ceases to hold, or we hit the end of the list, we
+			 * exit the loop.  The ordering of all user allocated
+			 * segmaps makes this all work out so beautifully.
+			 */
+			do {
+				struct sun4c_mmu_entry *next = entry->next;
+				unsigned long realend;
+
+				/* "realstart" is always >= entry->vaddr */
+				realend = entry->vaddr + SUN4C_REAL_PGDIR_SIZE;
+				if(end < realend)
+					realend = end;
+				if((realend - entry->vaddr) <= (PAGE_SIZE << 3)) {
+					unsigned long page = entry->vaddr;
+					while(page < realend) {
+						sun4c_flush_page_sw(page);
+						page += PAGE_SIZE;
+					}
+				} else {
+					sun4c_flush_segment_sw(entry->vaddr);
+					sun4c_user_unmap(entry);
+					free_user_entry(new_ctx, entry);
+				}
+				entry = next;
+			} while((entry != head) && (entry->vaddr < end));
+			sun4c_set_context(octx);
+		}
+		restore_flags(flags);
+	}
 }
 
-static void sun4c_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
+static void sun4c_flush_cache_page_sw(struct vm_area_struct *vma, unsigned long page)
 {
-	int octx;
 	struct mm_struct *mm = vma->vm_mm;
-
-	/* don't flush kernel memory as its always valid in
-	   all contexts */
-	if (page >= PAGE_OFFSET)
-		return;
+	int new_ctx = mm->context;
 
 	/* Sun4c has no separate I/D caches so cannot optimize for non
 	 * text page flushes.
 	 */
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		octx = sun4c_get_context();
-		flush_user_windows();
-		sun4c_set_context(mm->context);
-		sun4c_flush_page(page);
+	if(new_ctx != NO_CONTEXT) {
+		int octx = sun4c_get_context();
+
+		FUW_INLINE
+		sun4c_set_context(new_ctx);
+		sun4c_flush_page_sw(page);
 		sun4c_set_context(octx);
-#ifndef __SMP__
 	}
-#endif
 }
 
-/* Sun4c cache is write-through, so no need to validate main memory
- * during a page copy in kernel space.
+static void sun4c_flush_page_to_ram_sw(unsigned long page)
+{
+	sun4c_flush_page_sw(page);
+}
+
+/* Sun4c cache is unified, both instructions and data live there, so
+ * no need to flush the on-stack instructions for new signal handlers.
  */
-static void sun4c_flush_page_to_ram(unsigned long page)
+static void sun4c_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
 {
 }
 
@@ -1296,117 +1806,205 @@ static void sun4c_flush_page_to_ram(unsigned long page)
 
 static void sun4c_flush_tlb_all(void)
 {
+	struct sun4c_mmu_entry *this_entry, *next_entry;
 	unsigned long flags;
-	int savectx, ctx, entry;
+	int savectx, ctx;
 
 	save_and_cli(flags);
+	this_entry = sun4c_kernel_ring.ringhd.next;
 	savectx = sun4c_get_context();
-	for (entry = 0; entry < SUN4C_KERNEL_BUCKETS; entry++) {
-		if (sun4c_kernel_next->vaddr) {
-			for(ctx = 0; ctx < num_contexts; ctx++) {
-				sun4c_set_context(ctx);
-				sun4c_put_segmap(sun4c_kernel_next->vaddr,
-						 invalid_segment);
-			}
-			sun4c_kernel_next->vaddr = 0;
+	flush_user_windows();
+	while (sun4c_kernel_ring.num_entries) {
+		next_entry = this_entry->next;
+		sun4c_flush_segment(this_entry->vaddr);
+		for(ctx = 0; ctx < num_contexts; ctx++) {
+			sun4c_set_context(ctx);
+			sun4c_put_segmap(this_entry->vaddr, invalid_segment);
 		}
-		sun4c_next_kernel_bucket(&sun4c_kernel_next);
+		free_kernel_entry(this_entry, &sun4c_kernel_ring);
+		this_entry = next_entry;
 	}
 	sun4c_set_context(savectx);
 	restore_flags(flags);
 }
 
-static void sun4c_flush_tlb_mm(struct mm_struct *mm)
+static void sun4c_flush_tlb_mm_hw(struct mm_struct *mm)
 {
-	struct sun4c_mmu_entry *this_entry, *next_entry;
-	struct sun4c_mmu_ring *crp;
-	int savectx, ctx;
+	int new_ctx = mm->context;
+
+	if(new_ctx != NO_CONTEXT) {
+		struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd;
+		unsigned long flags;
+
+		save_and_cli(flags);
+		if(head->next != head) {
+			struct sun4c_mmu_entry *entry = head->next;
+			int savectx = sun4c_get_context();
+
+			FUW_INLINE
+			sun4c_set_context(new_ctx);
+			sun4c_flush_context_hw();
+			do {
+				struct sun4c_mmu_entry *next = entry->next;
+
+				sun4c_user_unmap(entry);
+				free_user_entry(new_ctx, entry);
 
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
+				entry = next;
+			} while(entry != head);
+			sun4c_set_context(savectx);
+		}
+		restore_flags(flags);
+	}
+}
+
+static void sun4c_flush_tlb_range_hw(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	int new_ctx = mm->context;
+
+	if(new_ctx != NO_CONTEXT) {
+		struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd;
+		struct sun4c_mmu_entry *entry;
+		unsigned long flags;
+#if KGPROF_PROFILING
+		kgprof_profile();
 #endif
-		crp = &sun4c_context_ring[mm->context];
-		savectx = sun4c_get_context();
-		ctx = mm->context;
-		this_entry = crp->ringhd.next;
-		flush_user_windows();
-		sun4c_set_context(mm->context);
-		sun4c_flush_context();
-		while(crp->num_entries) {
-			next_entry = this_entry->next;
-			sun4c_user_unmap(this_entry);
-			free_user_entry(ctx, this_entry);
-			this_entry = next_entry;
+
+		save_and_cli(flags);
+		/* See commentary in sun4c_flush_cache_range_*(). */
+		for(entry = head->next;
+		    (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start);
+		    entry = entry->next)
+			;
+
+		if((entry != head) && (entry->vaddr < end)) {
+			int octx = sun4c_get_context();
+
+			/* This window flush is paranoid I think... -DaveM */
+			FUW_INLINE
+			sun4c_set_context(new_ctx);
+			do {
+				struct sun4c_mmu_entry *next = entry->next;
+
+				sun4c_flush_segment_hw(entry->vaddr);
+				sun4c_user_unmap(entry);
+				free_user_entry(new_ctx, entry);
+
+				entry = next;
+			} while((entry != head) && (entry->vaddr < end));
+			sun4c_set_context(octx);
 		}
+		restore_flags(flags);
+	}
+}
+
+static void sun4c_flush_tlb_page_hw(struct vm_area_struct *vma, unsigned long page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int new_ctx = mm->context;
+
+	if(new_ctx != NO_CONTEXT) {
+		int savectx = sun4c_get_context();
+
+		FUW_INLINE
+		sun4c_set_context(new_ctx);
+		page &= PAGE_MASK;
+		sun4c_flush_page_hw(page);
+		sun4c_put_pte(page, 0);
 		sun4c_set_context(savectx);
-#ifndef __SMP__
 	}
-#endif
 }
 
-static void sun4c_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+static void sun4c_flush_tlb_mm_sw(struct mm_struct *mm)
 {
-	struct sun4c_mmu_entry *entry,*entry2;
-	unsigned char savectx;
-	int i;
+	int new_ctx = mm->context;
 
-#ifndef __SMP__
-	if(mm->context == NO_CONTEXT)
-		return;
-#endif
+	if(new_ctx != NO_CONTEXT) {
+		struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd;
+		unsigned long flags;
+
+		save_and_cli(flags);
+		if(head->next != head) {
+			struct sun4c_mmu_entry *entry = head->next;
+			int savectx = sun4c_get_context();
+
+			FUW_INLINE
+			sun4c_set_context(new_ctx);
+			sun4c_flush_context_sw();
+			do {
+				struct sun4c_mmu_entry *next = entry->next;
+
+				sun4c_user_unmap(entry);
+				free_user_entry(new_ctx, entry);
+
+				entry = next;
+			} while(entry != head);
+			sun4c_set_context(savectx);
+		}
+		restore_flags(flags);
+	}
+}
+
+static void sun4c_flush_tlb_range_sw(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+	int new_ctx = mm->context;
+
+	if(new_ctx != NO_CONTEXT) {
+		struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd;
+		struct sun4c_mmu_entry *entry;
+		unsigned long flags;
 
 #if KGPROF_PROFILING
-	kgprof_profile();
+		kgprof_profile();
 #endif
 
-	savectx = sun4c_get_context();
-	sun4c_set_context(mm->context);
-	start &= SUN4C_REAL_PGDIR_MASK;
-
-	entry = sun4c_context_ring[mm->context].ringhd.next;
-	i = sun4c_context_ring[mm->context].num_entries;
-	while (i--) {
-		entry2 = entry->next;
-		if (entry->vaddr >= start && entry->vaddr < end) {
-			sun4c_flush_segment(entry->vaddr);
-			sun4c_user_unmap(entry);
-			free_user_entry(mm->context, entry);
+		save_and_cli(flags);
+		/* See commentary in sun4c_flush_cache_range_*(). */
+		for(entry = head->next;
+		    (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start);
+		    entry = entry->next)
+			;
+
+		if((entry != head) && (entry->vaddr < end)) {
+			int octx = sun4c_get_context();
+
+			/* This window flush is paranoid I think... -DaveM */
+			FUW_INLINE
+			sun4c_set_context(new_ctx);
+			do {
+				struct sun4c_mmu_entry *next = entry->next;
+
+				sun4c_flush_segment_sw(entry->vaddr);
+				sun4c_user_unmap(entry);
+				free_user_entry(new_ctx, entry);
+
+				entry = next;
+			} while((entry != head) && (entry->vaddr < end));
+			sun4c_set_context(octx);
 		}
-		entry = entry2;
+		restore_flags(flags);
 	}
-	sun4c_set_context(savectx);
 }
 
-
-static void sun4c_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+static void sun4c_flush_tlb_page_sw(struct vm_area_struct *vma, unsigned long page)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	int savectx;
+	int new_ctx = mm->context;
 
-#ifndef __SMP__
-	if(mm->context != NO_CONTEXT) {
-#endif
-		savectx = sun4c_get_context();
-		sun4c_set_context(mm->context);
+	if(new_ctx != NO_CONTEXT) {
+		int savectx = sun4c_get_context();
+
+		FUW_INLINE
+		sun4c_set_context(new_ctx);
 		page &= PAGE_MASK;
-		if(sun4c_get_pte(page) & _SUN4C_PAGE_VALID)
-			sun4c_put_pte(page, 0);
+		sun4c_flush_page_sw(page);
+		sun4c_put_pte(page, 0);
 		sun4c_set_context(savectx);
-#ifndef __SMP__
 	}
-#endif
 }
 
-/* Sun4c mmu hardware doesn't update the dirty bit in the pte's
- * for us, so we do it in software.
- */
 static void sun4c_set_pte(pte_t *ptep, pte_t pte)
 {
-
-	if((pte_val(pte) & (_SUN4C_PAGE_WRITE|_SUN4C_PAGE_DIRTY)) ==
-	   _SUN4C_PAGE_WRITE)
-		pte_val(pte) |= _SUN4C_PAGE_DIRTY;
-
 	*ptep = pte;
 }
 
@@ -1416,21 +2014,18 @@ void sun4c_mapioaddr(unsigned long physaddr, unsigned long virt_addr,
 	unsigned long page_entry;
 
 	page_entry = ((physaddr >> PAGE_SHIFT) & 0xffff);
-	page_entry |= (_SUN4C_PAGE_VALID | _SUN4C_PAGE_WRITE |
-		       _SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_IO);
+	page_entry |= ((pg_iobits | _SUN4C_PAGE_PRIV) & ~(_SUN4C_PAGE_PRESENT));
 	if(rdonly)
-		page_entry &= (~_SUN4C_PAGE_WRITE);
-	sun4c_flush_page(virt_addr);
+		page_entry &= ~_SUN4C_WRITEABLE;
 	sun4c_put_pte(virt_addr, page_entry);
 }
 
 void sun4c_unmapioaddr(unsigned long virt_addr)
 {
-	sun4c_flush_page(virt_addr); /* XXX P3: Is it necessary for I/O page? */
 	sun4c_put_pte(virt_addr, 0);
 }
 
-static inline void sun4c_alloc_context(struct mm_struct *mm)
+static void sun4c_alloc_context_hw(struct mm_struct *mm)
 {
 	struct ctx_list *ctxp;
 
@@ -1445,65 +2040,110 @@ static inline void sun4c_alloc_context(struct mm_struct *mm)
 	ctxp = ctx_used.next;
 	if(ctxp->ctx_mm == current->mm)
 		ctxp = ctxp->next;
+#ifdef DEBUG_SUN4C_MM
 	if(ctxp == &ctx_used)
 		panic("out of mmu contexts");
+#endif
 	remove_from_ctx_list(ctxp);
 	add_to_used_ctxlist(ctxp);
 	ctxp->ctx_mm->context = NO_CONTEXT;
 	ctxp->ctx_mm = mm;
 	mm->context = ctxp->ctx_number;
-	sun4c_demap_context(&sun4c_context_ring[ctxp->ctx_number],
+	sun4c_demap_context_hw(&sun4c_context_ring[ctxp->ctx_number],
 			    ctxp->ctx_number);
 }
 
-#if some_day_soon /* We need some tweaking to start using this */
-extern void force_user_fault(unsigned long, int);
+static void sun4c_switch_to_context_hw(struct task_struct *tsk)
+{
+	struct ctx_list *ctx;
+
+	if(tsk->mm->context == NO_CONTEXT) {
+		sun4c_alloc_context_hw(tsk->mm);
+	} else {
+		/* Update the LRU ring of contexts. */
+		ctx = ctx_list_pool + tsk->mm->context;
+		remove_from_ctx_list(ctx);
+		add_to_used_ctxlist(ctx);
+	}
+	sun4c_set_context(tsk->mm->context);
+}
 
-void sun4c_switch_heuristic(struct pt_regs *regs)
+static void sun4c_init_new_context_hw(struct mm_struct *mm)
 {
-	unsigned long sp = regs->u_regs[UREG_FP];
-	unsigned long sp2 = sp + REGWIN_SZ - 0x8;
+	sun4c_alloc_context_hw(mm);
+	if(mm == current->mm)
+		sun4c_set_context(mm->context);
+}
+
+static void sun4c_destroy_context_hw(struct mm_struct *mm)
+{
+	struct ctx_list *ctx_old;
 
-	force_user_fault(regs->pc, 0);
-	force_user_fault(sp, 0);
-	if((sp&PAGE_MASK) != (sp2&PAGE_MASK))
-		force_user_fault(sp2, 0);
+	if(mm->context != NO_CONTEXT && mm->count == 1) {
+		sun4c_demap_context_hw(&sun4c_context_ring[mm->context], mm->context);
+		ctx_old = ctx_list_pool + mm->context;
+		remove_from_ctx_list(ctx_old);
+		add_to_free_ctxlist(ctx_old);
+		mm->context = NO_CONTEXT;
+	}
 }
+
+static void sun4c_alloc_context_sw(struct mm_struct *mm)
+{
+	struct ctx_list *ctxp;
+
+	ctxp = ctx_free.next;
+	if(ctxp != &ctx_free) {
+		remove_from_ctx_list(ctxp);
+		add_to_used_ctxlist(ctxp);
+		mm->context = ctxp->ctx_number;
+		ctxp->ctx_mm = mm;
+		return;
+	}
+	ctxp = ctx_used.next;
+	if(ctxp->ctx_mm == current->mm)
+		ctxp = ctxp->next;
+#ifdef DEBUG_SUN4C_MM
+	if(ctxp == &ctx_used)
+		panic("out of mmu contexts");
 #endif
+	remove_from_ctx_list(ctxp);
+	add_to_used_ctxlist(ctxp);
+	ctxp->ctx_mm->context = NO_CONTEXT;
+	ctxp->ctx_mm = mm;
+	mm->context = ctxp->ctx_number;
+	sun4c_demap_context_sw(&sun4c_context_ring[ctxp->ctx_number],
+			    ctxp->ctx_number);
+}
 
-static void sun4c_switch_to_context(struct task_struct *tsk)
+static void sun4c_switch_to_context_sw(struct task_struct *tsk)
 {
 	struct ctx_list *ctx;
 
 	if(tsk->mm->context == NO_CONTEXT) {
-		sun4c_alloc_context(tsk->mm);
-		goto set_context;
+		sun4c_alloc_context_sw(tsk->mm);
+	} else {
+		/* Update the LRU ring of contexts. */
+		ctx = ctx_list_pool + tsk->mm->context;
+		remove_from_ctx_list(ctx);
+		add_to_used_ctxlist(ctx);
 	}
-
-	/* Update the LRU ring of contexts. */
-	ctx = ctx_list_pool + tsk->mm->context;
-	remove_from_ctx_list(ctx);
-	add_to_used_ctxlist(ctx);
-
-set_context:
 	sun4c_set_context(tsk->mm->context);
 }
 
-static void sun4c_flush_hook(void)
+static void sun4c_init_new_context_sw(struct mm_struct *mm)
 {
-	if(current->tss.flags & SPARC_FLAG_KTHREAD) {
-		sun4c_alloc_context(current->mm);
-		sun4c_set_context(current->mm->context);
-	}
+	sun4c_alloc_context_sw(mm);
+	if(mm == current->mm)
+		sun4c_set_context(mm->context);
 }
 
-static void sun4c_exit_hook(void)
+static void sun4c_destroy_context_sw(struct mm_struct *mm)
 {
 	struct ctx_list *ctx_old;
-	struct mm_struct *mm = current->mm;
 
 	if(mm->context != NO_CONTEXT && mm->count == 1) {
-		sun4c_demap_context(&sun4c_context_ring[mm->context], mm->context);
+		sun4c_demap_context_sw(&sun4c_context_ring[mm->context], mm->context);
 		ctx_old = ctx_list_pool + mm->context;
 		remove_from_ctx_list(ctx_old);
 		add_to_free_ctxlist(ctx_old);
@@ -1531,8 +2171,11 @@ static char *sun4c_mmu_info(void)
 		"mmuctxs\t\t: %d\n"
 		"mmupsegs\t: %d\n"
 		"kernelpsegs\t: %d\n"
+		"kfreepsegs\t: %d\n"
 		"usedpsegs\t: %d\n"
 		"ufreepsegs\t: %d\n"
+		"user_taken\t: %d\n"
+		"max_taken\t: %d\n"
 		"context\t\t: %d flushes\n"
 		"segment\t\t: %d flushes\n"
 		"page\t\t: %d flushes\n",
@@ -1541,10 +2184,12 @@ static char *sun4c_mmu_info(void)
 		sun4c_vacinfo.linesize,
 		num_contexts,
 		(invalid_segment + 1),
-		invalid_segment - used_user_entries -
-			sun4c_ufree_ring.num_entries + 1,
+		sun4c_kernel_ring.num_entries,
+		sun4c_kfree_ring.num_entries,
 		used_user_entries,
 		sun4c_ufree_ring.num_entries,
+		sun4c_user_taken_entries,
+		max_user_taken_entries,
 		ctxflushes, segflushes, pageflushes);
 
 #if KGPROF_PROFILING
@@ -1590,18 +2235,24 @@ static unsigned long sun4c_vmalloc_start(void)
 }
 
 static int sun4c_pte_none(pte_t pte)		{ return !pte_val(pte); }
-static int sun4c_pte_present(pte_t pte)	        { return pte_val(pte) & _SUN4C_PAGE_VALID; }
-static void sun4c_pte_clear(pte_t *ptep)	{ pte_val(*ptep) = 0; }
+static int sun4c_pte_present(pte_t pte)
+{
+	return ((pte_val(pte) & (_SUN4C_PAGE_PRESENT | _SUN4C_PAGE_PRIV)) != 0);
+}
+static void sun4c_pte_clear(pte_t *ptep)	{ *ptep = __pte(0); }
 
 static int sun4c_pmd_none(pmd_t pmd)		{ return !pmd_val(pmd); }
 static int sun4c_pmd_bad(pmd_t pmd)
 {
-	return (pmd_val(pmd) & ~PAGE_MASK) != PGD_TABLE ||
-		MAP_NR(pmd_val(pmd)) > max_mapnr;
+	return (((pmd_val(pmd) & ~PAGE_MASK) != PGD_TABLE) ||
+		(MAP_NR(pmd_val(pmd)) > max_mapnr));
 }
 
-static int sun4c_pmd_present(pmd_t pmd)	        { return pmd_val(pmd) & PGD_PRESENT; }
-static void sun4c_pmd_clear(pmd_t *pmdp)	{ pmd_val(*pmdp) = 0; }
+static int sun4c_pmd_present(pmd_t pmd)
+{
+	return ((pmd_val(pmd) & PGD_PRESENT) != 0);
+}
+static void sun4c_pmd_clear(pmd_t *pmdp)	{ *pmdp = __pmd(0); }
 
 static int sun4c_pgd_none(pgd_t pgd)		{ return 0; }
 static int sun4c_pgd_bad(pgd_t pgd)		{ return 0; }
@@ -1612,16 +2263,59 @@ static void sun4c_pgd_clear(pgd_t * pgdp)	{ }
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
  */
-static int sun4c_pte_write(pte_t pte)		{ return pte_val(pte) & _SUN4C_PAGE_WRITE; }
-static int sun4c_pte_dirty(pte_t pte)		{ return pte_val(pte) & _SUN4C_PAGE_DIRTY; }
-static int sun4c_pte_young(pte_t pte)		{ return pte_val(pte) & _SUN4C_PAGE_REF; }
+static int sun4c_pte_write(pte_t pte)
+{
+	return pte_val(pte) & _SUN4C_PAGE_WRITE;
+}
 
-static pte_t sun4c_pte_wrprotect(pte_t pte)	{ pte_val(pte) &= ~_SUN4C_PAGE_WRITE; return pte; }
-static pte_t sun4c_pte_mkclean(pte_t pte)	{ pte_val(pte) &= ~_SUN4C_PAGE_DIRTY; return pte; }
-static pte_t sun4c_pte_mkold(pte_t pte)	        { pte_val(pte) &= ~_SUN4C_PAGE_REF; return pte; }
-static pte_t sun4c_pte_mkwrite(pte_t pte)	{ pte_val(pte) |= _SUN4C_PAGE_WRITE; return pte; }
-static pte_t sun4c_pte_mkdirty(pte_t pte)	{ pte_val(pte) |= _SUN4C_PAGE_DIRTY; return pte; }
-static pte_t sun4c_pte_mkyoung(pte_t pte)	{ pte_val(pte) |= _SUN4C_PAGE_REF; return pte; }
+static int sun4c_pte_dirty(pte_t pte)
+{
+	return pte_val(pte) & _SUN4C_PAGE_MODIFIED;
+}
+
+static int sun4c_pte_young(pte_t pte)
+{
+	return pte_val(pte) & _SUN4C_PAGE_ACCESSED;
+}
+
+static pte_t sun4c_pte_wrprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~(_SUN4C_PAGE_WRITE | _SUN4C_PAGE_SILENT_WRITE));
+}
+
+static pte_t sun4c_pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~(_SUN4C_PAGE_MODIFIED | _SUN4C_PAGE_SILENT_WRITE));
+}
+
+static pte_t sun4c_pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~(_SUN4C_PAGE_ACCESSED | _SUN4C_PAGE_SILENT_READ));
+}
+
+static pte_t sun4c_pte_mkwrite(pte_t pte)
+{
+	pte = __pte(pte_val(pte) | _SUN4C_PAGE_WRITE);
+	if (pte_val(pte) & _SUN4C_PAGE_MODIFIED)
+		pte = __pte(pte_val(pte) | _SUN4C_PAGE_SILENT_WRITE);
+	return pte;
+}
+
+static pte_t sun4c_pte_mkdirty(pte_t pte)
+{
+	pte = __pte(pte_val(pte) | _SUN4C_PAGE_MODIFIED);
+	if (pte_val(pte) & _SUN4C_PAGE_WRITE)
+		pte = __pte(pte_val(pte) | _SUN4C_PAGE_SILENT_WRITE);
+	return pte;
+}
+
+static pte_t sun4c_pte_mkyoung(pte_t pte)
+{
+	pte = __pte(pte_val(pte) | _SUN4C_PAGE_ACCESSED);
+	if (pte_val(pte) & _SUN4C_PAGE_READ)
+		pte = __pte(pte_val(pte) | _SUN4C_PAGE_SILENT_READ);
+	return pte;
+}
 
 /*
  * Conversion functions: convert a page and protection to a page entry,
@@ -1644,7 +2338,8 @@ static pte_t sun4c_mk_pte_io(unsigned long page, pgprot_t pgprot, int space)
 
 static pte_t sun4c_pte_modify(pte_t pte, pgprot_t newprot)
 {
-	return __pte((pte_val(pte) & _SUN4C_PAGE_CHG_MASK) | pgprot_val(newprot));
+	return __pte((pte_val(pte) & _SUN4C_PAGE_CHG_MASK) |
+		     pgprot_val(newprot));
 }
 
 static unsigned long sun4c_pte_page(pte_t pte)
@@ -1680,33 +2375,36 @@ static void sun4c_update_rootmmu_dir(struct task_struct *tsk, pgd_t *pgdir)
 {
 }
 
+/* Please take special note on the foo_kernel() routines below, our
+ * fast in window fault handler wants to get at the pte's for vmalloc
+ * area with traps off, therefore they _MUST_ be locked down to prevent
+ * a watchdog from happening.  It only takes 4 pages of pte's to lock
+ * down the maximum vmalloc space possible on sun4c so we statically
+ * allocate these page table pieces in the kernel image.  Therefore
+ * we should never have to really allocate or free any kernel page
+ * table information.
+ */
+
 /* Allocate and free page tables. The xxx_kernel() versions are
  * used to allocate a kernel page table - this turns on ASN bits
  * if any, and marks the page tables reserved.
  */
 static void sun4c_pte_free_kernel(pte_t *pte)
 {
-	free_page((unsigned long) pte);
+	/* This should never get called. */
+	panic("sun4c_pte_free_kernel called, can't happen...");
 }
 
 static pte_t *sun4c_pte_alloc_kernel(pmd_t *pmd, unsigned long address)
 {
+	if(address >= SUN4C_LOCK_VADDR)
+		return NULL;
 	address = (address >> PAGE_SHIFT) & (SUN4C_PTRS_PER_PTE - 1);
-	if (sun4c_pmd_none(*pmd)) {
-		pte_t *page = (pte_t *) get_free_page(GFP_KERNEL);
-		if (sun4c_pmd_none(*pmd)) {
-			if (page) {
-				pmd_val(*pmd) = PGD_TABLE | (unsigned long) page;
-				return page + address;
-			}
-			pmd_val(*pmd) = PGD_TABLE | (unsigned long) BAD_PAGETABLE;
-			return NULL;
-		}
-		free_page((unsigned long) page);
-	}
+	if (sun4c_pmd_none(*pmd))
+		panic("sun4c_pmd_none for kernel pmd, can't happen...");
 	if (sun4c_pmd_bad(*pmd)) {
 		printk("Bad pmd in pte_alloc_kernel: %08lx\n", pmd_val(*pmd));
-		pmd_val(*pmd) = PGD_TABLE | (unsigned long) BAD_PAGETABLE;
+		*pmd = __pmd(PGD_TABLE | (unsigned long) BAD_PAGETABLE);
 		return NULL;
 	}
 	return (pte_t *) sun4c_pmd_page(*pmd) + address;
@@ -1718,7 +2416,6 @@ static pte_t *sun4c_pte_alloc_kernel(pmd_t *pmd, unsigned long address)
  */
 static void sun4c_pmd_free_kernel(pmd_t *pmd)
 {
-	pmd_val(*pmd) = 0;
 }
 
 static pmd_t *sun4c_pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
@@ -1738,17 +2435,17 @@ static pte_t *sun4c_pte_alloc(pmd_t * pmd, unsigned long address)
 		pte_t *page = (pte_t *) get_free_page(GFP_KERNEL);
 		if (sun4c_pmd_none(*pmd)) {
 			if (page) {
-				pmd_val(*pmd) = PGD_TABLE | (unsigned long) page;
+				*pmd = __pmd(PGD_TABLE | (unsigned long) page);
 				return page + address;
 			}
-			pmd_val(*pmd) = PGD_TABLE | (unsigned long) BAD_PAGETABLE;
+			*pmd = __pmd(PGD_TABLE | (unsigned long) BAD_PAGETABLE);
 			return NULL;
 		}
 		free_page((unsigned long) page);
 	}
 	if (sun4c_pmd_bad(*pmd)) {
 		printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
-		pmd_val(*pmd) = PGD_TABLE | (unsigned long) BAD_PAGETABLE;
+		*pmd = __pmd(PGD_TABLE | (unsigned long) BAD_PAGETABLE);
 		return NULL;
 	}
 	return (pte_t *) sun4c_pmd_page(*pmd) + address;
@@ -1760,7 +2457,7 @@ static pte_t *sun4c_pte_alloc(pmd_t * pmd, unsigned long address)
  */
 static void sun4c_pmd_free(pmd_t * pmd)
 {
-	pmd_val(*pmd) = 0;
+	*pmd = __pmd(0);
 }
 
 static pmd_t *sun4c_pmd_alloc(pgd_t * pgd, unsigned long address)
@@ -1778,18 +2475,96 @@ static pgd_t *sun4c_pgd_alloc(void)
 	return (pgd_t *) get_free_page(GFP_KERNEL);
 }
 
+/* There are really two cases of aliases to watch out for, and these
+ * are:
+ *
+ *     1) A user's page which can be aliased with the kernels virtual
+ *        mapping of the physical page.
+ *
+ *     2) Multiple user mappings of the same inode/anonymous object
+ *        such that two copies of the same data for the same phys page
+ *        can live (writable) in the cache at the same time.
+ *
+ * We handle number 1 by flushing the kernel copy of the page always
+ * after COW page operations.
+ *
+ * NOTE: We are a bit slowed down now because the VMA arg is indeed used
+ *       now, so our ref/mod bit tracking quick userfaults eat a few more
+ *       cycles than they used to.
+ */
+static void sun4c_vac_alias_fixup(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	struct inode *inode;
+	pgd_t *pgdp;
+	pte_t *ptep;
+
+	inode = vma->vm_inode;
+	if(inode) {
+		unsigned long offset = (address & PAGE_MASK) - vma->vm_start;
+		struct vm_area_struct *vmaring = inode->i_mmap; 
+		int alias_found = 0;
+		do {
+			unsigned long vaddr = vmaring->vm_start + offset;
+			unsigned long start;
+
+			if (S4CVAC_BADALIAS(vaddr, address)) {
+				alias_found++;
+				start = vmaring->vm_start;
+				while(start < vmaring->vm_end) {
+					pgdp = sun4c_pgd_offset(vmaring->vm_mm, start);
+					if(!pgdp) goto next;
+					ptep = sun4c_pte_offset((pmd_t *) pgdp, start);
+					if(!ptep) goto next;
+
+					if(pte_val(*ptep) & _SUN4C_PAGE_PRESENT) {
+						flush_cache_page(vmaring, start);
+						pte_val(*ptep) = (pte_val(*ptep) |
+								  _SUN4C_PAGE_NOCACHE);
+						flush_tlb_page(vmaring, start);
+					}
+				next:
+					start += PAGE_SIZE;
+				}
+			}
+		} while ((vmaring = vmaring->vm_next_share) != inode->i_mmap);
+
+		if(alias_found && !(pte_val(pte) & _SUN4C_PAGE_NOCACHE)) {
+			pgdp = sun4c_pgd_offset(vma->vm_mm, address);
+			ptep = sun4c_pte_offset((pmd_t *) pgdp, address);
+			pte_val(*ptep) = (pte_val(*ptep) | _SUN4C_PAGE_NOCACHE);
+			pte = pte_val(*ptep);
+		}
+	}
+}
+
+void sun4c_update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+{
+	unsigned long flags;
+
+	save_and_cli(flags);
+	address &= PAGE_MASK;
+	if(sun4c_get_segmap(address) == invalid_segment)
+		alloc_user_segment(address, sun4c_get_context());
+
+	if((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))
+		sun4c_vac_alias_fixup(vma, address, pte);
+
+	sun4c_put_pte(address, pte_val(pte));
+	restore_flags(flags);
+}
+
 extern unsigned long free_area_init(unsigned long, unsigned long);
 extern unsigned long sparc_context_init(unsigned long, int);
 extern unsigned long end;
 
-unsigned long sun4c_paging_init(unsigned long start_mem, unsigned long end_mem)
+__initfunc(unsigned long sun4c_paging_init(unsigned long start_mem, unsigned long end_mem))
 {
 	int i, cnt;
-	unsigned long kernel_end;
+	unsigned long kernel_end, vaddr;
 	extern unsigned long sparc_iobase_vaddr;
 
 	kernel_end = (unsigned long) &end;
-	kernel_end += (SUN4C_REAL_PGDIR_SIZE * 3);
+	kernel_end += (SUN4C_REAL_PGDIR_SIZE * 4);
 	kernel_end = SUN4C_REAL_PGDIR_ALIGN(kernel_end);
 	sun4c_probe_mmu();
 	invalid_segment = (num_segmaps - 1);
@@ -1806,24 +2581,36 @@ unsigned long sun4c_paging_init(unsigned long start_mem, unsigned long end_mem)
 	sun4c_set_context(0);
 	memset(swapper_pg_dir, 0, PAGE_SIZE);
 	memset(pg0, 0, PAGE_SIZE);
+	memset(pg1, 0, PAGE_SIZE);
+	memset(pg2, 0, PAGE_SIZE);
+	memset(pg3, 0, PAGE_SIZE);
+
 	/* Save work later. */
-	pgd_val(swapper_pg_dir[SUN4C_VMALLOC_START>>SUN4C_PGDIR_SHIFT]) =
-		PGD_TABLE | (unsigned long) pg0;
+	vaddr = SUN4C_VMALLOC_START;
+	swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg0);
+	vaddr += SUN4C_PGDIR_SIZE;
+	swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg1);
+	vaddr += SUN4C_PGDIR_SIZE;
+	swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg2);
+	vaddr += SUN4C_PGDIR_SIZE;
+	swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg3);
 	sun4c_init_ss2_cache_bug();
 	start_mem = PAGE_ALIGN(start_mem);
-	/* start_mem = sun4c_init_alloc_dvma_pages(start_mem); */
 	start_mem = sparc_context_init(start_mem, num_contexts);
 	start_mem = free_area_init(start_mem, end_mem);
 	cnt = 0;
 	for(i = 0; i < num_segmaps; i++)
 		if(mmu_entry_pool[i].locked)
 			cnt++;
+
+	max_user_taken_entries = num_segmaps - cnt - 40 - 1;
+
 	printk("SUN4C: %d mmu entries for the kernel\n", cnt);
 	return start_mem;
 }
 
 /* Load up routines and constants for sun4c mmu */
-void ld_mmu_sun4c(void)
+__initfunc(void ld_mmu_sun4c(void))
 {
 	printk("Loading sun4c MMU routines\n");
 
@@ -1844,46 +2631,45 @@ void ld_mmu_sun4c(void)
 	page_copy = SUN4C_PAGE_COPY;
 	page_readonly = SUN4C_PAGE_READONLY;
 	page_kernel = SUN4C_PAGE_KERNEL;
-	pg_iobits = _SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_IO | _SUN4C_PAGE_VALID
-	    | _SUN4C_PAGE_WRITE | _SUN4C_PAGE_DIRTY;
+	pg_iobits = _SUN4C_PAGE_PRESENT | _SUN4C_READABLE | _SUN4C_WRITEABLE |
+		    _SUN4C_PAGE_IO | _SUN4C_PAGE_NOCACHE;
 	
 	/* Functions */
-#ifndef __SMP__
 	flush_cache_all = sun4c_flush_cache_all;
-	flush_cache_mm = sun4c_flush_cache_mm;
-	flush_cache_range = sun4c_flush_cache_range;
-	flush_cache_page = sun4c_flush_cache_page;
+
+	if(sun4c_vacinfo.do_hwflushes) {
+		flush_cache_mm = sun4c_flush_cache_mm_hw;
+		flush_cache_range = sun4c_flush_cache_range_hw;
+		flush_cache_page = sun4c_flush_cache_page_hw;
+		flush_page_to_ram = sun4c_flush_page_to_ram_hw;
+		flush_tlb_mm = sun4c_flush_tlb_mm_hw;
+		flush_tlb_range = sun4c_flush_tlb_range_hw;
+		flush_tlb_page = sun4c_flush_tlb_page_hw;
+		free_kernel_stack = sun4c_free_kernel_stack_hw;
+		free_task_struct = sun4c_free_task_struct_hw;
+		switch_to_context = sun4c_switch_to_context_hw;
+		destroy_context = sun4c_destroy_context_hw;
+		init_new_context = sun4c_init_new_context_hw;
+	} else {
+		flush_cache_mm = sun4c_flush_cache_mm_sw;
+		flush_cache_range = sun4c_flush_cache_range_sw;
+		flush_cache_page = sun4c_flush_cache_page_sw;
+		flush_page_to_ram = sun4c_flush_page_to_ram_sw;
+		flush_tlb_mm = sun4c_flush_tlb_mm_sw;
+		flush_tlb_range = sun4c_flush_tlb_range_sw;
+		flush_tlb_page = sun4c_flush_tlb_page_sw;
+		free_kernel_stack = sun4c_free_kernel_stack_sw;
+		free_task_struct = sun4c_free_task_struct_sw;
+		switch_to_context = sun4c_switch_to_context_sw;
+		destroy_context = sun4c_destroy_context_sw;
+		init_new_context = sun4c_init_new_context_sw;
+	}
 
 	flush_tlb_all = sun4c_flush_tlb_all;
-	flush_tlb_mm = sun4c_flush_tlb_mm;
-	flush_tlb_range = sun4c_flush_tlb_range;
-	flush_tlb_page = sun4c_flush_tlb_page;
-#else
-	local_flush_cache_all = sun4c_flush_cache_all;
-	local_flush_cache_mm = sun4c_flush_cache_mm;
-	local_flush_cache_range = sun4c_flush_cache_range;
-	local_flush_cache_page = sun4c_flush_cache_page;
-
-	local_flush_tlb_all = sun4c_flush_tlb_all;
-	local_flush_tlb_mm = sun4c_flush_tlb_mm;
-	local_flush_tlb_range = sun4c_flush_tlb_range;
-	local_flush_tlb_page = sun4c_flush_tlb_page;
-
-	flush_cache_all = smp_flush_cache_all;
-	flush_cache_mm = smp_flush_cache_mm;
-	flush_cache_range = smp_flush_cache_range;
-	flush_cache_page = smp_flush_cache_page;
-
-	flush_tlb_all = smp_flush_tlb_all;
-	flush_tlb_mm = smp_flush_tlb_mm;
-	flush_tlb_range = smp_flush_tlb_range;
-	flush_tlb_page = smp_flush_tlb_page;
-#endif
 
-	flush_page_to_ram = sun4c_flush_page_to_ram;
+	flush_sig_insns = sun4c_flush_sig_insns;
 
 	set_pte = sun4c_set_pte;
-	switch_to_context = sun4c_switch_to_context;
 	pmd_align = sun4c_pmd_align;
 	pgdir_align = sun4c_pgdir_align;
 	vmalloc_start = sun4c_vmalloc_start;
@@ -1935,8 +2721,7 @@ void ld_mmu_sun4c(void)
 	pte_mkdirty = sun4c_pte_mkdirty;
 	pte_mkyoung = sun4c_pte_mkyoung;
 	update_mmu_cache = sun4c_update_mmu_cache;
-	mmu_exit_hook = sun4c_exit_hook;
-	mmu_flush_hook = sun4c_flush_hook;
+
 	mmu_lockarea = sun4c_lockarea;
 	mmu_unlockarea = sun4c_unlockarea;
 
@@ -1953,8 +2738,6 @@ void ld_mmu_sun4c(void)
 	/* Task struct and kernel stack allocating/freeing. */
 	alloc_kernel_stack = sun4c_alloc_kernel_stack;
 	alloc_task_struct = sun4c_alloc_task_struct;
-	free_kernel_stack = sun4c_free_kernel_stack;
-	free_task_struct = sun4c_free_task_struct;
 
 	quick_kernel_fault = sun4c_quick_kernel_fault;
 	mmu_info = sun4c_mmu_info;
diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S
new file mode 100644
index 000000000..f61aa4398
--- /dev/null
+++ b/arch/sparc/mm/viking.S
@@ -0,0 +1,267 @@
+/* $Id: viking.S,v 1.2 1997/04/20 21:21:49 ecd Exp $
+ * viking.S: High speed Viking cache/mmu operations
+ *
+ * Copyright (C) 1997  Eddie C. Dost  (ecd@skynet.be)
+ */
+
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/asi.h>
+#include <asm/mxcc.h>
+#include <asm/page.h>
+#include <asm/pgtsrmmu.h>
+#include <asm/viking.h>
+#include <asm/cprefix.h>
+
+#define WINDOW_FLUSH(tmp1, tmp2)					\
+	mov	0, tmp1;						\
+98:	ld	[%g6 + AOFF_task_tss + AOFF_thread_uwinmask], tmp2;	\
+	orcc	%g0, tmp2, %g0;						\
+	add	tmp1, 1, tmp1;						\
+	bne	98b;							\
+	 save	%sp, -64, %sp;						\
+99:	subcc	tmp1, 1, tmp1;						\
+	bne	99b;							\
+	 restore %g0, %g0, %g0;
+
+	.text
+	.align	4
+
+	.globl	viking_flush_cache_all, viking_flush_cache_mm
+	.globl	viking_flush_cache_range, viking_flush_cache_page
+	.globl	viking_flush_page, viking_mxcc_flush_page
+	.globl	viking_flush_page_for_dma, viking_flush_page_to_ram
+	.globl	viking_flush_chunk, viking_mxcc_flush_chunk
+	.globl	viking_flush_sig_insns
+	.globl	viking_flush_tlb_all, viking_flush_tlb_mm
+	.globl	viking_flush_tlb_range, viking_flush_tlb_page
+
+viking_flush_page:
+viking_flush_chunk:
+	sethi	%hi(C_LABEL(srmmu_map)), %g2
+	or	%g2, %lo(C_LABEL(srmmu_map)), %g3
+	ld	[%g3 + 8], %g2
+	cmp	%g2, 0
+	be	3f
+	 and	%o0, PAGE_MASK, %o0
+
+	ld	[%g3], %o1
+1:
+	cmp	%o1, %o0
+	bgu,a	2f
+	 add	%g3, 0xc, %g3
+
+	add	%o1, %g2, %g2
+	cmp	%g2, %o0
+	bleu,a	2f
+	 add	%g3, 0xc, %g3
+
+	sub	%o0, %o1, %g2
+	ld	[%g3 + 4], %o0
+	add	%g2, %o0, %g3
+	b	4f
+	 srl	%g3, 12, %g1		! ppage >> 12
+
+2:
+	ld	[%g3 + 8], %g2
+	cmp	%g2, 0
+	bne,a	1b
+	 ld	[%g3], %o1
+3:
+	retl
+	 nop
+
+4:
+	clr	%o1			! set counter, 0 - 127
+	sethi	%hi(KERNBASE + PAGE_SIZE - 0x80000000), %o3
+	sethi	%hi(0x80000000), %o4
+	sethi	%hi(VIKING_PTAG_VALID | VIKING_PTAG_DIRTY), %o5
+	sethi	%hi(PAGE_SIZE), %o0
+	clr	%o2			! block counter, 0 - 3
+5:
+	sll	%o1, 5, %g4
+	or	%g4, %o4, %g4		! 0x80000000 | (set << 5)
+
+	sll	%o2, 26, %g5		! block << 26
+6:
+	or	%g5, %g4, %g5
+	ldda	[%g5] ASI_M_DATAC_TAG, %g2
+	cmp	%g3, %g1		! ptag == ppage?
+	bne,a	7f
+	 inc	%o2
+
+	and	%g2, %o5, %g3		! ptag VALID and DIRTY?
+	cmp	%g3, %o5
+	bne,a	7f
+	 inc	%o2
+
+	add	%g4, %o3, %g2		! (KERNBASE + PAGE_SIZE) | (set << 5)
+	ld	[%g2], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+
+	b	8f
+	 inc	%o1
+
+7:
+	cmp	%o2, 3
+	ble	6b
+	 sll	%o2, 26, %g5			! block << 26
+
+	inc	%o1
+8:
+	cmp	%o1, 0x7f
+	ble	5b
+	 clr	%o2
+
+	retl
+	 nop
+
+
+viking_mxcc_flush_page:
+	sethi	%hi(C_LABEL(srmmu_map)), %g2
+	or	%g2, %lo(C_LABEL(srmmu_map)), %g3
+	ld	[%g3 + 8], %g2
+	cmp	%g2, 0
+	be	3f
+	 and	%o0, PAGE_MASK, %o0
+
+	ld	[%g3], %o1
+1:
+	cmp	%o1, %o0
+	bgu,a	2f
+	 add	%g3, 0xc, %g3
+
+	add	%o1, %g2, %g2
+	cmp	%g2, %o0
+	bleu,a	2f
+	 add	%g3, 0xc, %g3
+
+	sub	%o0, %o1, %g2
+	ld	[%g3 + 4], %o0
+	add	%g2, %o0, %g3
+	sethi	%hi(PAGE_SIZE), %g4
+	b	4f
+	 add	%g3, %g4, %g3			! ppage + PAGE_SIZE
+
+2:
+	ld	[%g3 + 8], %g2
+	cmp	%g2, 0
+	bne,a	1b
+	 ld	[%g3], %o1
+3:
+	retl
+	 nop
+4:
+	mov	0x10, %g2			! set cacheable bit
+	sethi	%hi(MXCC_SRCSTREAM), %o2
+	or	%o2, %lo(MXCC_SRCSTREAM), %o2
+	sethi	%hi(MXCC_DESSTREAM), %o3
+	or	%o3, %lo(MXCC_DESSTREAM), %o3
+
+5:
+	sub	%g3, MXCC_STREAM_SIZE, %g3
+6:
+	stda	%g2, [%o2] ASI_M_MXCC
+	stda	%g2, [%o3] ASI_M_MXCC
+	andncc	%g3, PAGE_MASK, %g0
+	bne	6b
+	 sub	%g3, MXCC_STREAM_SIZE, %g3
+
+	retl
+	 nop
+
+viking_mxcc_flush_chunk:
+	retl
+	 nop
+
+viking_flush_cache_all:
+viking_flush_cache_mm:
+viking_flush_cache_range:
+viking_flush_cache_page:
+	retl
+	 nop
+
+viking_flush_tlb_all:
+	WINDOW_FLUSH(%g4, %g5)
+	mov	0x400, %g1
+	retl
+	 sta	%g0, [%g1] ASI_M_FLUSH_PROBE
+
+viking_flush_tlb_mm:
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o1
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef __SMP__
+	cmp	%o1, -1
+	be	viking_flush_tlb_mm_out
+#endif
+	WINDOW_FLUSH(%g2, %g3)
+
+	mov	0x300, %g2
+	sta	%o1, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%g2] ASI_M_FLUSH_PROBE
+viking_flush_tlb_mm_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+viking_flush_tlb_range:
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef __SMP__
+	cmp	%o3, -1
+	be	viking_flush_tlb_range_out
+#endif
+	WINDOW_FLUSH(%g2, %g3)
+
+	srl	%o1, SRMMU_PGDIR_SHIFT, %o1
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sll	%o1, SRMMU_PGDIR_SHIFT, %o1
+	sethi	%hi(1 << SRMMU_PGDIR_SHIFT), %o4
+	add	%o1, 0x200, %o1
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+1:
+	add	%o1, %o4, %o1
+	cmp	%o1, %o2
+	blu,a	1b
+	 sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+viking_flush_tlb_range_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+viking_flush_tlb_page:
+	ld	[%o0 + 0x00], %o0	/* XXX vma->vm_mm GROSS XXX */
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	and	%o1, PAGE_MASK, %o1
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef __SMP__
+	cmp	%o3, -1
+	be	viking_flush_tlb_page_out
+#endif
+	WINDOW_FLUSH(%g2, %g3)
+
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+viking_flush_tlb_page_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+viking_flush_page_to_ram:
+viking_flush_page_for_dma:
+viking_flush_sig_insns:
+	retl
+	 nop
author	Ralf Baechle <ralf@linux-mips.org>	1997-04-29 21:13:14 +0000
committer	<ralf@linux-mips.org>	1997-04-29 21:13:14 +0000
commit	19c9bba94152148523ba0f7ef7cffe3d45656b11 (patch)
tree	40b1cb534496a7f1ca0f5c314a523c69f1fee464 /arch/sparc/mm
parent	7206675c40394c78a90e74812bbdbf8cf3cca1be (diff)