8 files changed, 347 insertions, 341 deletions
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index 4ae57f18f..f7b9b367c 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile,v 1.24 1997/04/20 14:11:49 ecd Exp $
+# $Id: Makefile,v 1.25 1997/05/03 05:09:11 davem Exp $
 # Makefile for the linux Sparc-specific parts of the memory manager.
 #
 # Note! Dependencies are done automagically by 'make dep', which also
@@ -9,12 +9,30 @@
 
 O_TARGET := mm.o
 O_OBJS   := fault.o init.o sun4c.o srmmu.o hypersparc.o viking.o \
-	    loadmmu.o generic.o asyncd.o extable.o
+	    tsunami.o loadmmu.o generic.o asyncd.o extable.o
 
 include $(TOPDIR)/Rules.make
 
+ifdef SMP
+
+hypersparc.o: hypersparc.S
+	$(CC) -D__ASSEMBLY__ $(AFLAGS) -ansi -c -o hypersparc.o hypersparc.S
+
+viking.o: viking.S
+	$(CC) -D__ASSEMBLY__ $(AFLAGS) -ansi -c -o viking.o viking.S
+
+tsunami.o: tsunami.S
+	$(CC) -D__ASSEMBLY__ $(AFLAGS) -ansi -c -o tsunami.o tsunami.S
+
+else
+
 hypersparc.o: hypersparc.S
 	$(CC) -D__ASSEMBLY__ -ansi -c -o hypersparc.o hypersparc.S
 
 viking.o: viking.S
 	$(CC) -D__ASSEMBLY__ -ansi -c -o viking.o viking.S
+
+tsunami.o: tsunami.S
+	$(CC) -D__ASSEMBLY__ -ansi -c -o tsunami.o tsunami.S
+
+endif
diff --git a/arch/sparc/mm/asyncd.c b/arch/sparc/mm/asyncd.c
index 5d9d476a5..46635db97 100644
--- a/arch/sparc/mm/asyncd.c
+++ b/arch/sparc/mm/asyncd.c
@@ -1,4 +1,4 @@
-/*  $Id: asyncd.c,v 1.9 1996/12/18 06:43:22 tridge Exp $
+/*  $Id: asyncd.c,v 1.10 1997/05/15 21:14:24 davem Exp $
  *  The asyncd kernel daemon. This handles paging on behalf of 
  *  processes that receive page faults due to remote (async) memory
  *  accesses. 
@@ -153,7 +153,7 @@ static int fault_in_page(int taskid,
 	if(!pte)
 		goto no_memory;
 	if(!pte_present(*pte)) {
-		do_no_page(tsk, vma, address, write);
+		handle_mm_fault(tsk, vma, address, write);
 		goto finish_up;
 	}
 	set_pte(pte, pte_mkyoung(*pte));
@@ -165,12 +165,11 @@ static int fault_in_page(int taskid,
 		flush_tlb_page(vma, address);
 		goto finish_up;
 	}
-	do_wp_page(tsk, vma, address, write);
+	handle_mm_fault(tsk, vma, address, write);
 
 	/* Fall through for do_wp_page */
 finish_up:
 	stats.success++;
-	update_mmu_cache(vma, address, *pte);
 	return 0;
 
 no_memory:
diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c
index cfac6bcc2..0d6490860 100644
--- a/arch/sparc/mm/fault.c
+++ b/arch/sparc/mm/fault.c
@@ -1,4 +1,4 @@
-/* $Id: fault.c,v 1.91 1997/03/18 17:56:00 jj Exp $
+/* $Id: fault.c,v 1.92 1997/05/15 21:14:21 davem Exp $
  * fault.c:  Page fault handlers for the Sparc.
  *
  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -229,7 +229,7 @@ good_area:
 		if(!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
 	}
-	handle_mm_fault(vma, address, write);
+	handle_mm_fault(current, vma, address, write);
 	up(&mm->mmap_sem);
 	goto out;
 	/*
@@ -370,7 +370,7 @@ good_area:
 	else
 		if(!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;
-	handle_mm_fault(vma, address, write);
+	handle_mm_fault(current, vma, address, write);
 	up(&mm->mmap_sem);
 	return;
 bad_area:
diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S
index 4e5a19301..62e2022e0 100644
--- a/arch/sparc/mm/hypersparc.S
+++ b/arch/sparc/mm/hypersparc.S
@@ -1,4 +1,4 @@
-/* $Id: hypersparc.S,v 1.4 1997/04/19 04:33:39 davem Exp $
+/* $Id: hypersparc.S,v 1.7 1997/05/03 05:09:12 davem Exp $
  * hypersparc.S: High speed Hypersparc mmu/cache operations.
  *
  * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
@@ -79,7 +79,7 @@ hypersparc_flush_cache_mm:
 	 sta	%g0, [%o0 + %o4] ASI_M_FLUSH_USER
 hypersparc_flush_cache_mm_out:
 	retl
-	 sta	%g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE	! hyper_flush_whole_icache
+	 nop
 
 	/* The things we do for performance... */
 hypersparc_flush_cache_range:
@@ -126,7 +126,7 @@ hypersparc_flush_cache_range:
 	bne	1b
 	 sta	%g0, [%o3 + %g5] ASI_M_FLUSH_USER
 	retl
-	 sta	%g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE
+	 nop
 
 	/* Below our threshold, flush one page at a time. */
 0:
@@ -166,7 +166,7 @@ hypersparc_flush_cache_range:
 	sta	%o3, [%g7] ASI_M_MMUREGS
 hypersparc_flush_cache_range_out:
 	retl
-	 sta	%g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE
+	 nop
 
 	/* HyperSparc requires a valid mapping where we are about to flush
 	 * in order to check for a physical tag match during the flush.
@@ -221,12 +221,12 @@ hypersparc_flush_cache_page:
 	sta	%o2, [%g4] ASI_M_MMUREGS
 hypersparc_flush_cache_page_out:
 	retl
-	 sta	%g0, [%g0 + %g0] ASI_M_FLUSH_IWHOLE
+	 nop
 
 hypersparc_flush_sig_insns:
-	flush	%o2
+	flush	%o1
 	retl
-	 flush	%o2 + 4
+	 flush	%o1 + 4
 
 	/* HyperSparc is copy-back. */
 hypersparc_flush_page_to_ram:
@@ -289,7 +289,7 @@ hypersparc_flush_tlb_mm:
 	cmp	%o1, -1
 	be	hypersparc_flush_tlb_mm_out
 #endif
-	mov	0x300, %g2
+	 mov	0x300, %g2
 	sta	%o1, [%g1] ASI_M_MMUREGS
 	sta	%g0, [%g2] ASI_M_FLUSH_PROBE
 hypersparc_flush_tlb_mm_out:
@@ -304,7 +304,7 @@ hypersparc_flush_tlb_range:
 	cmp	%o3, -1
 	be	hypersparc_flush_tlb_range_out
 #endif
-	srl	%o1, SRMMU_PGDIR_SHIFT, %o1
+	 srl	%o1, SRMMU_PGDIR_SHIFT, %o1
 	sta	%o3, [%g1] ASI_M_MMUREGS
 	sll	%o1, SRMMU_PGDIR_SHIFT, %o1
 	sethi	%hi(1 << SRMMU_PGDIR_SHIFT), %o4
@@ -324,13 +324,67 @@ hypersparc_flush_tlb_page:
 	mov	SRMMU_CTX_REG, %g1
 	ld	[%o0 + AOFF_mm_context], %o3
 	andn	%o1, (PAGE_SIZE - 1), %o1
-	lda	[%g1] ASI_M_MMUREGS, %g5
 #ifndef __SMP__
 	cmp	%o3, -1
 	be	hypersparc_flush_tlb_page_out
 #endif
+	 lda	[%g1] ASI_M_MMUREGS, %g5
 	sta	%o3, [%g1] ASI_M_MMUREGS
 	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
 hypersparc_flush_tlb_page_out:
 	retl
 	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+	/* High speed page clear/copy. */
+	.globl	hypersparc_bzero_1page, hypersparc_copy_1page
+hypersparc_bzero_1page:
+	clr	%g1
+	mov	32, %g2
+	add	%g2, %g2, %g3
+	add	%g2, %g3, %g4
+	add	%g2, %g4, %g5
+	add	%g2, %g5, %g7
+	add	%g2, %g7, %o2
+	add	%g2, %o2, %o3
+	mov	16, %o1
+1:
+	stda	%g0, [%o0 + %g0] ASI_M_BFILL
+	stda	%g0, [%o0 + %g2] ASI_M_BFILL
+	stda	%g0, [%o0 + %g3] ASI_M_BFILL
+	stda	%g0, [%o0 + %g4] ASI_M_BFILL
+	stda	%g0, [%o0 + %g5] ASI_M_BFILL
+	stda	%g0, [%o0 + %g7] ASI_M_BFILL
+	stda	%g0, [%o0 + %o2] ASI_M_BFILL
+	stda	%g0, [%o0 + %o3] ASI_M_BFILL
+	subcc	%o1, 1, %o1
+	bne	1b
+	 add	%o0, 256, %o0
+
+	retl
+	 nop
+
+hypersparc_copy_1page:
+	sub	%o1, %o0, %o2		! difference
+	mov	16, %g1
+1:
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	subcc	%g1, 1, %g1
+	bne	1b
+	 add	%o0, 32, %o0
+
+	retl
+	 nop
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 9d3afdbdf..b04064efb 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -1,4 +1,4 @@
-/* $Id: srmmu.c,v 1.136 1997/04/20 14:11:51 ecd Exp $
+/* $Id: srmmu.c,v 1.146 1997/05/18 21:11:09 davem Exp $
  * srmmu.c:  SRMMU specific routines for memory management.
  *
  * Copyright (C) 1995 David S. Miller  (davem@caip.rutgers.edu)
@@ -77,12 +77,20 @@ ctxd_t *srmmu_context_table;
 /* Don't change this without changing access to this
  * in arch/sparc/mm/viking.S
  */
-struct srmmu_trans {
+static struct srmmu_trans {
 	unsigned long vbase;
 	unsigned long pbase;
 	unsigned long size;
 } srmmu_map[SPARC_PHYS_BANKS];
 
+#define SRMMU_HASHSZ	256
+
+/* Not static, viking.S uses it. */
+struct srmmu_trans *srmmu_v2p_hash[SRMMU_HASHSZ];
+static struct srmmu_trans *srmmu_p2v_hash[SRMMU_HASHSZ];
+
+#define srmmu_ahashfn(addr)	((addr) >> 24)
+
 static int viking_mxcc_present = 0;
 
 void srmmu_frob_mem_map(unsigned long start_mem)
@@ -113,31 +121,26 @@ void srmmu_frob_mem_map(unsigned long start_mem)
 
 /* Physical memory can be _very_ non-contiguous on the sun4m, especially
  * the SS10/20 class machines and with the latest openprom revisions.
- * So we have to crunch the free page pool.
+ * So we have to do a quick lookup.
  */
 static inline unsigned long srmmu_v2p(unsigned long vaddr)
 {
-	int i;
+	struct srmmu_trans *tp = srmmu_v2p_hash[srmmu_ahashfn(vaddr)];
 
-	for(i=0; srmmu_map[i].size != 0; i++) {
-		if(srmmu_map[i].vbase <= vaddr &&
-		   (srmmu_map[i].vbase + srmmu_map[i].size > vaddr)) {
-			return (vaddr - srmmu_map[i].vbase) + srmmu_map[i].pbase;
-		}
-	}
-	return 0xffffffffUL;
+	if(tp)
+		return (vaddr - tp->vbase + tp->pbase);
+	else
+		return 0xffffffffUL;
 }
 
 static inline unsigned long srmmu_p2v(unsigned long paddr)
 {
-	int i;
+	struct srmmu_trans *tp = srmmu_p2v_hash[srmmu_ahashfn(paddr)];
 
-	for(i=0; srmmu_map[i].size != 0; i++) {
-		if(srmmu_map[i].pbase <= paddr &&
-		   (srmmu_map[i].pbase + srmmu_map[i].size > paddr))
-			return (paddr - srmmu_map[i].pbase) + srmmu_map[i].vbase;
-	}
-	return 0xffffffffUL;
+	if(tp)
+		return (paddr - tp->pbase + tp->vbase);
+	else
+		return 0xffffffffUL;
 }
 
 /* In general all page table modifications should use the V8 atomic
@@ -659,27 +662,6 @@ static void srmmu_set_pte_cacheable(pte_t *ptep, pte_t pteval)
 	srmmu_set_entry(ptep, pte_val(pteval));
 }
 
-static void srmmu_set_pte_nocache_hyper(pte_t *ptep, pte_t pteval)
-{
-	unsigned long page = ((unsigned long)ptep) & PAGE_MASK;
-
-	srmmu_set_entry(ptep, pte_val(pteval));
-	__asm__ __volatile__("
-	lda	[%0] %2, %%g4
-	orcc	%%g4, 0x0, %%g0
-	be	2f
-	 sethi	%%hi(%7), %%g5
-1:	subcc	%%g5, %6, %%g5		! hyper_flush_cache_page
-	bne	1b
-	 sta	%%g0, [%1 + %%g5] %3
-	lda	[%4] %5, %%g0
-2:"	: /* no outputs */
-	: "r" (page | 0x400), "r" (page), "i" (ASI_M_FLUSH_PROBE),
-	  "i" (ASI_M_FLUSH_PAGE), "r" (SRMMU_FAULT_STATUS), "i" (ASI_M_MMUREGS),
-	  "r" (vac_line_size), "i" (PAGE_SIZE)
-	: "g4", "g5", "cc");
-}
-
 static void srmmu_set_pte_nocache_cypress(pte_t *ptep, pte_t pteval)
 {
 	register unsigned long a, b, c, d, e, f, g;
@@ -860,134 +842,27 @@ static void srmmu_unlockarea(char *vaddr, unsigned long len)
  */
 struct task_struct *srmmu_alloc_task_struct(void)
 {
-	return (struct task_struct *) kmalloc(sizeof(struct task_struct), GFP_KERNEL);
-}
-
-unsigned long srmmu_alloc_kernel_stack(struct task_struct *tsk)
-{
-	unsigned long kstk = __get_free_pages(GFP_KERNEL, 1, 0);
-
-	if(!kstk)
-		kstk = (unsigned long) vmalloc(PAGE_SIZE << 1);
-
-	return kstk;
+	return (struct task_struct *) __get_free_pages(GFP_KERNEL, 1, 0);
 }
 
 static void srmmu_free_task_struct(struct task_struct *tsk)
 {
-	kfree(tsk);
-}
-
-static void srmmu_free_kernel_stack(unsigned long stack)
-{
-	if(stack < VMALLOC_START)
-		free_pages(stack, 1);
-	else
-		vfree((char *)stack);
-}
-
-/* Tsunami flushes.  It's page level tlb invalidation is not very
- * useful at all, you must be in the context that page exists in to
- * get a match.
- */
-static void tsunami_flush_cache_all(void)
-{
-	flush_user_windows();
-	tsunami_flush_icache();
-	tsunami_flush_dcache();
-}
-
-static void tsunami_flush_cache_mm(struct mm_struct *mm)
-{
-	FLUSH_BEGIN(mm)
-	flush_user_windows();
-	tsunami_flush_icache();
-	tsunami_flush_dcache();
-	FLUSH_END
-}
-
-static void tsunami_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	FLUSH_BEGIN(mm)
-	flush_user_windows();
-	tsunami_flush_icache();
-	tsunami_flush_dcache();
-	FLUSH_END
-}
-
-static void tsunami_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
-{
-	FLUSH_BEGIN(vma->vm_mm)
-	flush_user_windows();
-	tsunami_flush_icache();
-	tsunami_flush_dcache();
-	FLUSH_END
-}
-
-/* Tsunami does not have a Copy-back style virtual cache. */
-static void tsunami_flush_page_to_ram(unsigned long page)
-{
-}
-
-/* However, Tsunami is not IO coherent. */
-static void tsunami_flush_page_for_dma(unsigned long page)
-{
-	tsunami_flush_icache();
-	tsunami_flush_dcache();
-}
-
-/* Tsunami has harvard style split I/D caches which do not snoop each other,
- * so we have to flush on-stack sig insns.  Only the icache need be flushed
- * since the Tsunami has a write-through data cache.
- */
-static void tsunami_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
-{
-	tsunami_flush_icache();
-}
-
-static void tsunami_flush_chunk(unsigned long chunk)
-{
+	free_pages((unsigned long)tsk, 1);
 }
 
-static void tsunami_flush_tlb_all(void)
-{
-	srmmu_flush_whole_tlb();
-	module_stats.invall++;
-}
-
-static void tsunami_flush_tlb_mm(struct mm_struct *mm)
-{
-	FLUSH_BEGIN(mm)
-	srmmu_flush_whole_tlb();
-	module_stats.invmm++;
-	FLUSH_END
-}
-
-static void tsunami_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	FLUSH_BEGIN(mm)
-	srmmu_flush_whole_tlb();
-	module_stats.invrnge++;
-	FLUSH_END
-}
-
-static void tsunami_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
-{
-	struct mm_struct *mm = vma->vm_mm;
-
-	FLUSH_BEGIN(mm)
-	__asm__ __volatile__("
-	lda	[%0] %3, %%g5
-	sta	%1, [%0] %3
-	sta	%%g0, [%2] %4
-	sta	%%g5, [%0] %3"
-	: /* no outputs */
-	: "r" (SRMMU_CTX_REG), "r" (mm->context), "r" (page & PAGE_MASK),
-	  "i" (ASI_M_MMUREGS), "i" (ASI_M_FLUSH_PROBE)
-	: "g5");
-	module_stats.invpg++;
-	FLUSH_END
-}
+/* tsunami.S */
+extern void tsunami_flush_cache_all(void);
+extern void tsunami_flush_cache_mm(struct mm_struct *mm);
+extern void tsunami_flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end);
+extern void tsunami_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
+extern void tsunami_flush_page_to_ram(unsigned long page);
+extern void tsunami_flush_page_for_dma(unsigned long page);
+extern void tsunami_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
+extern void tsunami_flush_chunk(unsigned long chunk);
+extern void tsunami_flush_tlb_all(void);
+extern void tsunami_flush_tlb_mm(struct mm_struct *mm);
+extern void tsunami_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end);
+extern void tsunami_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
 
 /* Swift flushes.  It has the recommended SRMMU specification flushing
  * facilities, so we can do things in a more fine grained fashion than we
@@ -1364,18 +1239,31 @@ extern void hypersparc_flush_tlb_all(void);
 extern void hypersparc_flush_tlb_mm(struct mm_struct *mm);
 extern void hypersparc_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end);
 extern void hypersparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void hypersparc_bzero_1page(void *);
+extern void hypersparc_copy_1page(void *, const void *);
+
+static void srmmu_set_pte_nocache_hyper(pte_t *ptep, pte_t pteval)
+{
+	unsigned long page = ((unsigned long)ptep) & PAGE_MASK;
+
+	srmmu_set_entry(ptep, pte_val(pteval));
+	hypersparc_flush_page_to_ram(page);
+}
 
 static void hypersparc_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp)
 {
+	srmmu_set_entry((pte_t *)ctxp, __pte((SRMMU_ET_PTD | (srmmu_v2p((unsigned long) pgdp) >> 4))));
+	hypersparc_flush_page_to_ram((unsigned long)ctxp);
 	hyper_flush_whole_icache();
-	set_pte((pte_t *)ctxp, __pte((SRMMU_ET_PTD | (srmmu_v2p((unsigned long) pgdp) >> 4))));
 }
 
 static void hypersparc_update_rootmmu_dir(struct task_struct *tsk, pgd_t *pgdp) 
 {
 	unsigned long page = ((unsigned long) pgdp) & PAGE_MASK;
 
-	hypersparc_flush_page_to_ram(page);
+	if(pgdp != swapper_pg_dir)
+		hypersparc_flush_page_to_ram(page);
+
 	if(tsk->mm->context != NO_CONTEXT) {
 		flush_cache_mm(tsk->mm);
 		ctxd_set(&srmmu_context_table[tsk->mm->context], pgdp);
@@ -1429,26 +1317,29 @@ static void cypress_update_rootmmu_dir(struct task_struct *tsk, pgd_t *pgdp)
 
 static void hypersparc_switch_to_context(struct task_struct *tsk)
 {
-	hyper_flush_whole_icache();
 	if(tsk->mm->context == NO_CONTEXT) {
+		ctxd_t *ctxp;
+
 		alloc_context(tsk->mm);
-		flush_cache_mm(tsk->mm);
-		ctxd_set(&srmmu_context_table[tsk->mm->context], tsk->mm->pgd);
-		flush_tlb_mm(tsk->mm);
+		ctxp = &srmmu_context_table[tsk->mm->context];
+		srmmu_set_entry((pte_t *)ctxp, __pte((SRMMU_ET_PTD | (srmmu_v2p((unsigned long) tsk->mm->pgd) >> 4))));
+		hypersparc_flush_page_to_ram((unsigned long)ctxp);
 	}
+	hyper_flush_whole_icache();
 	srmmu_set_context(tsk->mm->context);
 }
 
 static void hypersparc_init_new_context(struct mm_struct *mm)
 {
-	hyper_flush_whole_icache();
+	ctxd_t *ctxp;
 
 	alloc_context(mm);
 
-	flush_cache_mm(mm);
-	ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
-	flush_tlb_mm(mm);
+	ctxp = &srmmu_context_table[mm->context];
+	srmmu_set_entry((pte_t *)ctxp, __pte((SRMMU_ET_PTD | (srmmu_v2p((unsigned long) mm->pgd) >> 4))));
+	hypersparc_flush_page_to_ram((unsigned long)ctxp);
 
+	hyper_flush_whole_icache();
 	if(mm == current->mm)
 		srmmu_set_context(mm->context);
 }
@@ -2150,6 +2041,32 @@ check_and_return:
 	MKTRACE(("success\n"));
 	init_task.mm->mmap->vm_start = page_offset = low_base;
 	stack_top = page_offset - PAGE_SIZE;
+#if 1
+	for(entry = 0; srmmu_map[entry].size; entry++) {
+		printk("[%d]: v[%08lx,%08lx](%lx) p[%08lx]\n", entry,
+		       srmmu_map[entry].vbase,
+		       srmmu_map[entry].vbase + srmmu_map[entry].size,
+		       srmmu_map[entry].size,
+		       srmmu_map[entry].pbase);
+	}
+#endif
+
+	/* Now setup the p2v/v2p hash tables. */
+	for(entry = 0; entry < SRMMU_HASHSZ; entry++)
+		srmmu_v2p_hash[entry] = srmmu_p2v_hash[entry] = NULL;
+	for(entry = 0; srmmu_map[entry].size; entry++) {
+		unsigned long addr;
+
+		for(addr = srmmu_map[entry].vbase;
+		    addr < (srmmu_map[entry].vbase + srmmu_map[entry].size);
+		    addr += (1 << 24))
+			srmmu_v2p_hash[srmmu_ahashfn(addr)] = &srmmu_map[entry];
+		for(addr = srmmu_map[entry].pbase;
+		    addr < (srmmu_map[entry].pbase + srmmu_map[entry].size);
+		    addr += (1 << 24))
+			srmmu_p2v_hash[srmmu_ahashfn(addr)] = &srmmu_map[entry];
+	}
+
 	return; /* SUCCESS! */
 }
 
@@ -2338,7 +2255,7 @@ static void srmmu_vac_update_mmu_cache(struct vm_area_struct * vma,
 					start += PAGE_SIZE;
 				}
 			}
-		} while ((vmaring = vmaring->vm_next_share) != inode->i_mmap);
+		} while ((vmaring = vmaring->vm_next_share) != NULL);
 
 		if(alias_found && !(pte_val(pte) & _SUN4C_PAGE_NOCACHE)) {
 			pgdp = srmmu_pgd_offset(vma->vm_mm, address);
@@ -2355,13 +2272,19 @@ static void srmmu_vac_update_mmu_cache(struct vm_area_struct * vma,
 static void hypersparc_destroy_context(struct mm_struct *mm)
 {
 	if(mm->context != NO_CONTEXT && mm->count == 1) {
+		ctxd_t *ctxp;
+
 		/* HyperSparc is copy-back, any data for this
 		 * process in a modified cache line is stale
 		 * and must be written back to main memory now
 		 * else we eat shit later big time.
 		 */
 		flush_cache_mm(mm);
-		ctxd_set(&srmmu_context_table[mm->context], swapper_pg_dir);
+
+		ctxp = &srmmu_context_table[mm->context];
+		srmmu_set_entry((pte_t *)ctxp, __pte((SRMMU_ET_PTD | (srmmu_v2p((unsigned long) swapper_pg_dir) >> 4))));
+		hypersparc_flush_page_to_ram((unsigned long)ctxp);
+
 		flush_tlb_mm(mm);
 		free_context(mm->context);
 		mm->context = NO_CONTEXT;
@@ -2450,6 +2373,11 @@ static void poke_hypersparc(void)
 	hyper_flush_whole_icache();
 	clear = srmmu_get_faddr();
 	clear = srmmu_get_fstatus();
+
+#ifdef __SMP__
+	/* Avoid unnecessary cross calls. */
+	flush_page_for_dma = local_flush_page_for_dma;
+#endif
 }
 
 __initfunc(static void init_hypersparc(void))
@@ -2482,6 +2410,14 @@ __initfunc(static void init_hypersparc(void))
 	update_mmu_cache = srmmu_vac_update_mmu_cache;
 	sparc_update_rootmmu_dir = hypersparc_update_rootmmu_dir;
 	poke_srmmu = poke_hypersparc;
+
+	/* High performance page copy/clear. */
+	{	extern void (*__copy_1page)(void *, const void *);
+		extern void (*bzero_1page)(void *);
+
+		__copy_1page = hypersparc_copy_1page;
+		bzero_1page = hypersparc_bzero_1page;
+	}
 }
 
 static void poke_cypress(void)
@@ -3014,9 +2950,7 @@ __initfunc(void ld_mmu_srmmu(void))
         mmu_p2v = srmmu_p2v;
 
 	/* Task struct and kernel stack allocating/freeing. */
-	alloc_kernel_stack = srmmu_alloc_kernel_stack;
 	alloc_task_struct = srmmu_alloc_task_struct;
-	free_kernel_stack = srmmu_free_kernel_stack;
 	free_task_struct = srmmu_free_task_struct;
 
 	quick_kernel_fault = srmmu_quick_kernel_fault;
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index ebeada4c7..a0ffc10ed 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -1,4 +1,4 @@
-/* $Id: sun4c.c,v 1.143 1997/04/11 00:42:14 davem Exp $
+/* $Id: sun4c.c,v 1.148 1997/05/18 21:11:19 davem Exp $
  * sun4c.c: Doing in software what should be done in hardware.
  *
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
@@ -1093,8 +1093,7 @@ static void sun4c_quick_kernel_fault(unsigned long address)
         panic("sun4c kernel fault handler bolixed...");
 }
 
-/*
- * 4 page buckets for task struct and kernel stack allocation.
+/* 2 page buckets for task struct and kernel stack allocation.
  *
  * TASK_STACK_BEGIN
  * bucket[0]
@@ -1105,24 +1104,17 @@ static void sun4c_quick_kernel_fault(unsigned long address)
  *
  * Each slot looks like:
  *
- *  page 1   --  task struct
- *  page 2   --  unmapped, for stack redzone (maybe use for pgd)
- *  page 3/4 --  kernel stack
+ *  page 1 --  task struct + beginning of kernel stack
+ *  page 2 --  rest of kernel stack
  */
 
-struct task_bucket {
-	struct task_struct task;
-	char _unused1[PAGE_SIZE - sizeof(struct task_struct)];
-	char kstack[(PAGE_SIZE*3)];
-};
-
-struct task_bucket *sun4c_bucket[NR_TASKS];
+union task_union *sun4c_bucket[NR_TASKS];
 
 static int sun4c_lowbucket_avail;
 
-#define BUCKET_EMPTY     ((struct task_bucket *) 0)
-#define BUCKET_SIZE      (PAGE_SIZE << 2)
-#define BUCKET_SHIFT     14        /* log2(sizeof(struct task_bucket)) */
+#define BUCKET_EMPTY     ((union task_union *) 0)
+#define BUCKET_SHIFT     (PAGE_SHIFT + 1)        /* log2(sizeof(struct task_bucket)) */
+#define BUCKET_SIZE      (1 << BUCKET_SHIFT)
 #define BUCKET_NUM(addr) ((((addr) - SUN4C_LOCK_VADDR) >> BUCKET_SHIFT))
 #define BUCKET_ADDR(num) (((num) << BUCKET_SHIFT) + SUN4C_LOCK_VADDR)
 #define BUCKET_PTE(page)       \
@@ -1177,10 +1169,10 @@ static inline void garbage_collect(int entry)
 {
 	int start, end;
 
-	/* 16 buckets per segment... */
-	entry &= ~15;
+	/* 32 buckets per segment... */
+	entry &= ~31;
 	start = entry;
-	for(end = (start + 16); start < end; start++)
+	for(end = (start + 32); start < end; start++)
 		if(sun4c_bucket[start] != BUCKET_EMPTY)
 			return;
 
@@ -1190,121 +1182,70 @@ static inline void garbage_collect(int entry)
 
 static struct task_struct *sun4c_alloc_task_struct(void)
 {
-	unsigned long addr, page;
+	unsigned long addr, pages;
 	int entry;
 
-	page = get_free_page(GFP_KERNEL);
-	if(!page)
+	pages = __get_free_pages(GFP_KERNEL, 1, 0);
+	if(!pages)
 		return (struct task_struct *) 0;
 
 	for(entry = sun4c_lowbucket_avail; entry < NR_TASKS; entry++)
 		if(sun4c_bucket[entry] == BUCKET_EMPTY)
 			break;
 	if(entry == NR_TASKS) {
-		free_page(page);
+		free_pages(pages, 1);
 		return (struct task_struct *) 0;
 	}
 	if(entry >= sun4c_lowbucket_avail)
 		sun4c_lowbucket_avail = entry + 1;
 
 	addr = BUCKET_ADDR(entry);
-	sun4c_bucket[entry] = (struct task_bucket *) addr;
+	sun4c_bucket[entry] = (union task_union *) addr;
 	if(sun4c_get_segmap(addr) == invalid_segment)
 		get_locked_segment(addr);
-	sun4c_put_pte(addr, BUCKET_PTE(page));
+	sun4c_put_pte(addr, BUCKET_PTE(pages));
+	sun4c_put_pte(addr + PAGE_SIZE, BUCKET_PTE(pages + PAGE_SIZE));
 
 	return (struct task_struct *) addr;
 }
 
-static unsigned long sun4c_alloc_kernel_stack(struct task_struct *tsk)
-{
-	unsigned long saddr = (unsigned long) tsk;
-	unsigned long page[2];
-
-	if(!saddr)
-		return 0;
-	page[0] = __get_free_page(GFP_KERNEL);
-	if(!page[0])
-		return 0;
-	page[1] = __get_free_page(GFP_KERNEL);
-	if(!page[1]) {
-		free_page(page[0]);
-		return 0;
-	}
-
-	saddr += PAGE_SIZE << 1;
-	sun4c_put_pte(saddr, BUCKET_PTE(page[0]));
-	sun4c_put_pte(saddr + PAGE_SIZE, BUCKET_PTE(page[1]));
-	return saddr;
-}
-
-static void sun4c_free_kernel_stack_hw(unsigned long stack)
-{
-	unsigned long page[2];
-
-	page[0] = BUCKET_PTE_PAGE(sun4c_get_pte(stack));
-	page[1] = BUCKET_PTE_PAGE(sun4c_get_pte(stack+PAGE_SIZE));
-
-	/* We are deleting a mapping, so the flushes here are mandatory. */
-	sun4c_flush_page_hw(stack);
-	sun4c_flush_page_hw(stack + PAGE_SIZE);
-
-	sun4c_put_pte(stack, 0);
-	sun4c_put_pte(stack + PAGE_SIZE, 0);
-	free_page(page[0]);
-	free_page(page[1]);
-}
-
 static void sun4c_free_task_struct_hw(struct task_struct *tsk)
 {
 	unsigned long tsaddr = (unsigned long) tsk;
-	unsigned long page = BUCKET_PTE_PAGE(sun4c_get_pte(tsaddr));
+	unsigned long pages = BUCKET_PTE_PAGE(sun4c_get_pte(tsaddr));
 	int entry = BUCKET_NUM(tsaddr);
 
 	/* We are deleting a mapping, so the flush here is mandatory. */
 	sun4c_flush_page_hw(tsaddr);
+	sun4c_flush_page_hw(tsaddr + PAGE_SIZE);
 
 	sun4c_put_pte(tsaddr, 0);
+	sun4c_put_pte(tsaddr + PAGE_SIZE, 0);
 	sun4c_bucket[entry] = BUCKET_EMPTY;
 	if(entry < sun4c_lowbucket_avail)
 		sun4c_lowbucket_avail = entry;
 
-	free_page(page);
+	free_pages(pages, 1);
 	garbage_collect(entry);
 }
 
-static void sun4c_free_kernel_stack_sw(unsigned long stack)
-{
-	unsigned long page[2];
-
-	page[0] = BUCKET_PTE_PAGE(sun4c_get_pte(stack));
-	page[1] = BUCKET_PTE_PAGE(sun4c_get_pte(stack+PAGE_SIZE));
-
-	/* We are deleting a mapping, so the flushes here are mandatory. */
-	sun4c_flush_page_sw(stack);
-	sun4c_flush_page_sw(stack + PAGE_SIZE);
-
-	sun4c_put_pte(stack, 0);
-	sun4c_put_pte(stack + PAGE_SIZE, 0);
-	free_page(page[0]);
-	free_page(page[1]);
-}
-
 static void sun4c_free_task_struct_sw(struct task_struct *tsk)
 {
 	unsigned long tsaddr = (unsigned long) tsk;
-	unsigned long page = BUCKET_PTE_PAGE(sun4c_get_pte(tsaddr));
+	unsigned long pages = BUCKET_PTE_PAGE(sun4c_get_pte(tsaddr));
 	int entry = BUCKET_NUM(tsaddr);
 
 	/* We are deleting a mapping, so the flush here is mandatory. */
 	sun4c_flush_page_sw(tsaddr);
+	sun4c_flush_page_sw(tsaddr + PAGE_SIZE);
 
 	sun4c_put_pte(tsaddr, 0);
+	sun4c_put_pte(tsaddr + PAGE_SIZE, 0);
 	sun4c_bucket[entry] = BUCKET_EMPTY;
 	if(entry < sun4c_lowbucket_avail)
 		sun4c_lowbucket_avail = entry;
 
-	free_page(page);
+	free_pages(pages, 1);
 	garbage_collect(entry);
 }
 
@@ -1312,8 +1253,8 @@ __initfunc(static void sun4c_init_buckets(void))
 {
 	int entry;
 
-	if(sizeof(struct task_bucket) != (PAGE_SIZE << 2)) {
-		prom_printf("task bucket not 4 pages!\n");
+	if(sizeof(union task_union) != (PAGE_SIZE << 1)) {
+		prom_printf("task union not 2 pages!\n");
 		prom_halt();
 	}
 	for(entry = 0; entry < NR_TASKS; entry++)
@@ -2526,7 +2467,7 @@ static void sun4c_vac_alias_fixup(struct vm_area_struct *vma, unsigned long addr
 					start += PAGE_SIZE;
 				}
 			}
-		} while ((vmaring = vmaring->vm_next_share) != inode->i_mmap);
+		} while ((vmaring = vmaring->vm_next_share) != NULL);
 
 		if(alias_found && !(pte_val(pte) & _SUN4C_PAGE_NOCACHE)) {
 			pgdp = sun4c_pgd_offset(vma->vm_mm, address);
@@ -2645,7 +2586,6 @@ __initfunc(void ld_mmu_sun4c(void))
 		flush_tlb_mm = sun4c_flush_tlb_mm_hw;
 		flush_tlb_range = sun4c_flush_tlb_range_hw;
 		flush_tlb_page = sun4c_flush_tlb_page_hw;
-		free_kernel_stack = sun4c_free_kernel_stack_hw;
 		free_task_struct = sun4c_free_task_struct_hw;
 		switch_to_context = sun4c_switch_to_context_hw;
 		destroy_context = sun4c_destroy_context_hw;
@@ -2658,7 +2598,6 @@ __initfunc(void ld_mmu_sun4c(void))
 		flush_tlb_mm = sun4c_flush_tlb_mm_sw;
 		flush_tlb_range = sun4c_flush_tlb_range_sw;
 		flush_tlb_page = sun4c_flush_tlb_page_sw;
-		free_kernel_stack = sun4c_free_kernel_stack_sw;
 		free_task_struct = sun4c_free_task_struct_sw;
 		switch_to_context = sun4c_switch_to_context_sw;
 		destroy_context = sun4c_destroy_context_sw;
@@ -2736,7 +2675,6 @@ __initfunc(void ld_mmu_sun4c(void))
         mmu_p2v = sun4c_p2v;
 	
 	/* Task struct and kernel stack allocating/freeing. */
-	alloc_kernel_stack = sun4c_alloc_kernel_stack;
 	alloc_task_struct = sun4c_alloc_task_struct;
 
 	quick_kernel_fault = sun4c_quick_kernel_fault;
diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S
new file mode 100644
index 000000000..2a598cd7b
--- /dev/null
+++ b/arch/sparc/mm/tsunami.S
@@ -0,0 +1,90 @@
+/* $Id: tsunami.S,v 1.1 1997/05/03 05:09:09 davem Exp $
+ * tsunami.S: High speed MicroSparc-I mmu/cache operations.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/pgtsrmmu.h>
+
+#define WINDOW_FLUSH(tmp1, tmp2)					\
+	mov	0, tmp1;						\
+98:	ld	[%g6 + AOFF_task_tss + AOFF_thread_uwinmask], tmp2;	\
+	orcc	%g0, tmp2, %g0;						\
+	add	tmp1, 1, tmp1;						\
+	bne	98b;							\
+	 save	%sp, -64, %sp;						\
+99:	subcc	tmp1, 1, tmp1;						\
+	bne	99b;							\
+	 restore %g0, %g0, %g0;
+
+	.text
+	.align	4
+
+	.globl	tsunami_flush_cache_all, tsunami_flush_cache_mm
+	.globl	tsunami_flush_cache_range, tsunami_flush_cache_page
+	.globl	tsunami_flush_page_to_ram, tsunami_flush_page_for_dma
+	.globl	tsunami_flush_sig_insns, tsunami_flush_chunk
+	.globl	tsunami_flush_tlb_all, tsunami_flush_tlb_mm
+	.globl	tsunami_flush_tlb_range, tsunami_flush_tlb_page
+
+	/* Sliiick... */
+tsunami_flush_cache_page:
+	ld	[%o0 + 0x0], %o0	/* XXX vma->vm_mm, GROSS XXX */
+tsunami_flush_cache_mm:
+tsunami_flush_cache_range:
+	ld	[%o0 + AOFF_mm_context], %g2
+#ifndef __SMP__
+	cmp	%g2, -1
+	be	tsunami_flush_cache_out
+#endif
+tsunami_flush_cache_all:
+	WINDOW_FLUSH(%g4, %g5)
+tsunami_flush_page_for_dma:
+	sta	%g0, [%g0] ASI_M_DC_FLCLEAR
+	sta	%g0, [%g0] ASI_M_IC_FLCLEAR
+tsunami_flush_cache_out:
+tsunami_flush_page_to_ram:
+tsunami_flush_chunk:
+	retl
+	 nop
+
+tsunami_flush_sig_insns:
+	flush	%o1
+	retl
+	 flush	%o1 + 4
+
+	/* More slick stuff... */
+tsunami_flush_tlb_mm:
+tsunami_flush_tlb_range:
+#ifndef __SMP__
+	ld	[%o0 + AOFF_mm_context], %g2
+	cmp	%g2, -1
+	be	tsunami_flush_tlb_out
+#endif
+tsunami_flush_tlb_all:
+	 mov	0x400, %o1
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+tsunami_flush_tlb_out:
+	retl
+	 nop
+
+	/* This one can be done in a fine grained manner... */
+tsunami_flush_tlb_page:
+	ld	[%o0 + 0x00], %o0	/* XXX vma->vm_mm GROSS XXX */
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	andn	%o1, (PAGE_SIZE - 1), %o1
+#ifndef __SMP__
+	cmp	%o3, -1
+	be	tsunami_flush_tlb_page_out
+#endif
+	 lda	[%g1] ASI_M_MMUREGS, %g5
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+tsunami_flush_tlb_page_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S
index f61aa4398..19d426ec7 100644
--- a/arch/sparc/mm/viking.S
+++ b/arch/sparc/mm/viking.S
@@ -1,4 +1,4 @@
-/* $Id: viking.S,v 1.2 1997/04/20 21:21:49 ecd Exp $
+/* $Id: viking.S,v 1.3 1997/05/04 10:02:14 ecd Exp $
  * viking.S: High speed Viking cache/mmu operations
  *
  * Copyright (C) 1997  Eddie C. Dost  (ecd@skynet.be)
@@ -38,40 +38,26 @@
 
 viking_flush_page:
 viking_flush_chunk:
-	sethi	%hi(C_LABEL(srmmu_map)), %g2
-	or	%g2, %lo(C_LABEL(srmmu_map)), %g3
-	ld	[%g3 + 8], %g2
-	cmp	%g2, 0
-	be	3f
+	sethi	%hi(C_LABEL(srmmu_v2p_hash)), %g2
+	or	%g2, %lo(C_LABEL(srmmu_v2p_hash)), %g2
+	srl	%o0, 24, %o1
+	sll	%o1, 2, %o1
+
+	ld	[%g2 + %o1], %g3
+	cmp	%g3, 0
+	bne	1f
 	 and	%o0, PAGE_MASK, %o0
 
-	ld	[%g3], %o1
-1:
-	cmp	%o1, %o0
-	bgu,a	2f
-	 add	%g3, 0xc, %g3
-
-	add	%o1, %g2, %g2
-	cmp	%g2, %o0
-	bleu,a	2f
-	 add	%g3, 0xc, %g3
+	retl
+	 nop
 
+1:
+	ld	[%g3], %o1
 	sub	%o0, %o1, %g2
 	ld	[%g3 + 4], %o0
 	add	%g2, %o0, %g3
-	b	4f
-	 srl	%g3, 12, %g1		! ppage >> 12
-
-2:
-	ld	[%g3 + 8], %g2
-	cmp	%g2, 0
-	bne,a	1b
-	 ld	[%g3], %o1
-3:
-	retl
-	 nop
+	srl	%g3, 12, %g1		! ppage >> 12
 
-4:
 	clr	%o1			! set counter, 0 - 127
 	sethi	%hi(KERNBASE + PAGE_SIZE - 0x80000000), %o3
 	sethi	%hi(0x80000000), %o4
@@ -131,40 +117,27 @@ viking_flush_chunk:
 
 
 viking_mxcc_flush_page:
-	sethi	%hi(C_LABEL(srmmu_map)), %g2
-	or	%g2, %lo(C_LABEL(srmmu_map)), %g3
-	ld	[%g3 + 8], %g2
-	cmp	%g2, 0
-	be	3f
+	sethi	%hi(C_LABEL(srmmu_v2p_hash)), %g2
+	or	%g2, %lo(C_LABEL(srmmu_v2p_hash)), %g2
+	srl	%o0, 24, %o1
+	sll	%o1, 2, %o1
+
+	ld	[%g2 + %o1], %g3
+	cmp	%g3, 0
+	bne	1f
 	 and	%o0, PAGE_MASK, %o0
 
-	ld	[%g3], %o1
-1:
-	cmp	%o1, %o0
-	bgu,a	2f
-	 add	%g3, 0xc, %g3
-
-	add	%o1, %g2, %g2
-	cmp	%g2, %o0
-	bleu,a	2f
-	 add	%g3, 0xc, %g3
+	retl
+	 nop
 
+1:
+	ld	[%g3], %o1
 	sub	%o0, %o1, %g2
 	ld	[%g3 + 4], %o0
+	sethi   %hi(PAGE_SIZE), %g4
 	add	%g2, %o0, %g3
-	sethi	%hi(PAGE_SIZE), %g4
-	b	4f
-	 add	%g3, %g4, %g3			! ppage + PAGE_SIZE
-
-2:
-	ld	[%g3 + 8], %g2
-	cmp	%g2, 0
-	bne,a	1b
-	 ld	[%g3], %o1
-3:
-	retl
-	 nop
-4:
+	add	%g3, %g4, %g3			! ppage + PAGE_SIZE
+
 	mov	0x10, %g2			! set cacheable bit
 	sethi	%hi(MXCC_SRCSTREAM), %o2
 	or	%o2, %lo(MXCC_SRCSTREAM), %o2