Merge with Linux 2.4.0.

author: Ralf Baechle <ralf@linux-mips.org> 2001-01-11 04:02:40 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 2001-01-11 04:02:40 +0000
commit: e47f00743fc4776491344f2c618cc8dc2c23bcbc (patch)
tree: 13e03a113a82a184c51c19c209867cfd3a59b3b9 /arch/ia64
parent: b2ad5f821b1381492d792ca10b1eb7a107b48f14 (diff)
111 files changed, 44044 insertions, 1888 deletions
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 0a1714c35..b7781442b 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -19,22 +19,28 @@ AFLAGS_KERNEL := -mconstant-gp
 EXTRA	=
 
 CFLAGS := $(CFLAGS) -pipe $(EXTRA) -Wa,-x -ffixed-r13 -mfixed-range=f10-f15,f32-f127 \
-	  -funwind-tables
+	  -funwind-tables -falign-functions=32
+# -frename-registers
 CFLAGS_KERNEL := -mconstant-gp
 
 ifeq ($(CONFIG_ITANIUM_ASTEP_SPECIFIC),y)
 	CFLAGS += -ma-step
 endif
+ifeq ($(CONFIG_ITANIUM_BSTEP_SPECIFIC),y)
+	CFLAGS += -mb-step
+endif
 
 ifdef CONFIG_IA64_GENERIC
 	CORE_FILES      :=      arch/$(ARCH)/hp/hp.a	\
 				arch/$(ARCH)/sn/sn.a	\
 				arch/$(ARCH)/dig/dig.a	\
+				arch/$(ARCH)/sn/io/sgiio.o \
 				$(CORE_FILES)
 	SUBDIRS		:=	arch/$(ARCH)/hp		\
 				arch/$(ARCH)/sn/sn1	\
 				arch/$(ARCH)/sn		\
 				arch/$(ARCH)/dig	\
+				arch/$(ARCH)/sn/io	\
 				$(SUBDIRS)
 
 else # !GENERIC
@@ -47,10 +53,7 @@ ifdef CONFIG_IA64_HP_SIM
 endif
 
 ifdef CONFIG_IA64_SGI_SN1
-CFLAGS := $(CFLAGS) -DSN -I. -DBRINGUP -DDIRECT_L1_CONSOLE \
-		-DNUMA_BASE -DSIMULATED_KLGRAPH -DNUMA_MIGR_CONTROL  \
-		-DLITTLE_ENDIAN -DREAL_HARDWARE -DLANGUAGE_C=1 	     \
-		-D_LANGUAGE_C=1
+CFLAGS += -DBRINGUP
         SUBDIRS         :=      arch/$(ARCH)/sn/sn1	\
 				arch/$(ARCH)/sn		\
 				arch/$(ARCH)/sn/io	\
@@ -96,7 +99,7 @@ vmlinux: arch/$(ARCH)/vmlinux.lds
 
 arch/$(ARCH)/vmlinux.lds: arch/$(ARCH)/vmlinux.lds.S FORCE
 	$(CPP) -D__ASSEMBLY__ -C -P -I$(HPATH) -I$(HPATH)/asm-$(ARCH) \
-		arch/$(ARCH)/vmlinux.lds.S > $@
+		-traditional arch/$(ARCH)/vmlinux.lds.S > $@
 
 FORCE: ;
 
diff --git a/arch/ia64/boot/Makefile b/arch/ia64/boot/Makefile
index bdeef72ff..14eeeadd8 100644
--- a/arch/ia64/boot/Makefile
+++ b/arch/ia64/boot/Makefile
@@ -16,13 +16,11 @@ LINKFLAGS = -static -T bootloader.lds
 	$(CC) $(AFLAGS) -traditional -c -o $*.o $<
 
 OBJECTS	= bootloader.o
-TARGETS =
 
-ifdef CONFIG_IA64_HP_SIM
- TARGETS += bootloader
-endif
+targets-$(CONFIG_IA64_HP_SIM) += bootloader
+targets-$(CONFIG_IA64_GENERIC) += bootloader
 
-all:	$(TARGETS)
+all:	$(targets-y)
 
 bootloader: $(OBJECTS)
 	$(LD) $(LINKFLAGS) $(OBJECTS) $(TOPDIR)/lib/lib.a $(TOPDIR)/arch/$(ARCH)/lib/lib.a \
diff --git a/arch/ia64/config.in b/arch/ia64/config.in
index 8defec849..ae49891c5 100644
--- a/arch/ia64/config.in
+++ b/arch/ia64/config.in
@@ -18,7 +18,6 @@ mainmenu_option next_comment
 comment 'General setup'
 
 define_bool CONFIG_IA64 y
-define_bool CONFIG_SWIOTLB y	# for now...
 
 define_bool CONFIG_ISA n
 define_bool CONFIG_EISA n
@@ -41,20 +40,22 @@ if [ "$CONFIG_IA64_DIG" = "y" ]; then
 	define_bool CONFIG_ITANIUM y
 	define_bool CONFIG_IA64_BRL_EMU y
 	bool '  Enable Itanium A-step specific code' CONFIG_ITANIUM_ASTEP_SPECIFIC
-	if [ "$CONFIG_ITANIUM_ASTEP_SPECIFIC" = "y" ]; then
-	  bool '   Enable Itanium A1-step specific code' CONFIG_ITANIUM_A1_SPECIFIC
-	fi
 	bool '  Enable Itanium B-step specific code' CONFIG_ITANIUM_BSTEP_SPECIFIC
 	if [ "$CONFIG_ITANIUM_BSTEP_SPECIFIC" = "y" ]; then
 	  bool '   Enable Itanium B0-step specific code' CONFIG_ITANIUM_B0_SPECIFIC
+	  bool '   Enable Itanium B1-step specific code' CONFIG_ITANIUM_B1_SPECIFIC
+	  bool '   Enable Itanium B2-step specific code' CONFIG_ITANIUM_B2_SPECIFIC
+	fi
+	bool '  Enable Itanium C-step specific code' CONFIG_ITANIUM_CSTEP_SPECIFIC
+	if [ "$CONFIG_ITANIUM_CSTEP_SPECIFIC" = "y" ]; then
+	  bool '   Enable Itanium C0-step specific code' CONFIG_ITANIUM_C0_SPECIFIC
 	fi
 	bool '  Force interrupt redirection' CONFIG_IA64_HAVE_IRQREDIR
 	bool '  Enable use of global TLB purge instruction (ptc.g)' CONFIG_ITANIUM_PTCG
 	bool '  Enable SoftSDV hacks' CONFIG_IA64_SOFTSDV_HACKS
 	bool '  Enable AzusA hacks' CONFIG_IA64_AZUSA_HACKS
 	bool '  Enable IA-64 Machine Check Abort' CONFIG_IA64_MCA
-	bool '  Force socket buffers below 4GB?' CONFIG_SKB_BELOW_4GB
-
+	bool '  Enable ACPI 2.0 with errata 1.3' CONFIG_ACPI20
 	bool '  ACPI kernel configuration manager (EXPERIMENTAL)' CONFIG_ACPI_KERNEL_CONFIG
 	if [ "$CONFIG_ACPI_KERNEL_CONFIG" = "y" ]; then
 	  define_bool CONFIG_PM y
@@ -70,13 +71,16 @@ if [ "$CONFIG_IA64_SGI_SN1" = "y" ]; then
 	  bool '    Enable Itanium B0-step specific code' CONFIG_ITANIUM_B0_SPECIFIC
 	fi
 	bool '  Enable SGI Medusa Simulator Support' CONFIG_IA64_SGI_SN1_SIM n
-        bool '  Enable SGI hack for version 1.0 syngery bugs' CONFIG_IA64_SGI_SYNERGY_1_0_HACKS n
 	define_bool CONFIG_DEVFS_DEBUG y
 	define_bool CONFIG_DEVFS_FS y
 	define_bool CONFIG_IA64_BRL_EMU y
 	define_bool CONFIG_IA64_MCA y
-	define_bool CONFIG_IA64_SGI_IO y
 	define_bool CONFIG_ITANIUM y
+	define_bool CONFIG_SGI_IOC3_ETH y
+	define_bool CONFIG_PERCPU_IRQ y
+	define_int  CONFIG_CACHE_LINE_SHIFT 7
+	bool '  Enable DISCONTIGMEM support' CONFIG_DISCONTIGMEM y
+	bool '	Enable NUMA support' CONFIG_NUMA y
 fi
 
 define_bool CONFIG_KCORE_ELF y	# On IA-64, we always want an ELF /proc/kcore.
diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile
index f067606ee..6ca481897 100644
--- a/arch/ia64/dig/Makefile
+++ b/arch/ia64/dig/Makefile
@@ -12,12 +12,10 @@
 
 all: dig.a
 
-O_TARGET        = dig.a
-O_OBJS          = iosapic.o setup.o
+O_TARGET := dig.a
 
-ifdef CONFIG_IA64_GENERIC
-O_OBJS		+= machvec.o
-endif
+obj-y := setup.o
+obj-$(CONFIG_IA64_GENERIC) += machvec.o
 
 clean::
 
diff --git a/arch/ia64/dig/iosapic.c b/arch/ia64/dig/iosapic.c
deleted file mode 100644
index 18c7713bd..000000000
--- a/arch/ia64/dig/iosapic.c
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * Streamlined APIC support.
- *
- * Copyright (C) 1999 Intel Corp.
- * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 1999-2000 Hewlett-Packard Co.
- * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 1999 VA Linux Systems
- * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
- *
- * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O APIC code.
- *				In particular, we now have separate handlers for edge
- *				and level triggered interrupts.
- */
-#include <linux/config.h>
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/string.h>
-#include <linux/irq.h>
-
-#include <asm/acpi-ext.h>
-#include <asm/delay.h>
-#include <asm/io.h>
-#include <asm/iosapic.h>
-#include <asm/machvec.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/system.h>
-
-#ifdef	CONFIG_ACPI_KERNEL_CONFIG
-# include <asm/acpikcfg.h>
-#endif
-
-#undef DEBUG_IRQ_ROUTING
-
-static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED;
-
-struct iosapic_vector iosapic_vector[NR_IRQS] = {
-	[0 ... NR_IRQS-1] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }
-};
-
-/*
- * find the IRQ in the IOSAPIC map for the PCI device on bus/slot/pin
- */
-int
-iosapic_get_PCI_irq_vector (int bus, int slot, int pci_pin)
-{
-	int i;
-
-	for (i = 0; i < NR_IRQS; i++) {
-		if ((iosapic_bustype(i) == BUS_PCI) &&
-		    (iosapic_bus(i) == bus) &&
-		    (iosapic_busdata(i) == ((slot << 16) | pci_pin))) {
-			return i;
-		}
-	}
-	return -1;
-}
-
-static void
-set_rte (unsigned long iosapic_addr, int entry, int pol, int trigger, int delivery,
-	 long dest, int vector)
-{
-	u32 low32;
-	u32 high32;
-
-	low32 = ((pol << IO_SAPIC_POLARITY_SHIFT) |
-		 (trigger << IO_SAPIC_TRIGGER_SHIFT) |
-		 (delivery << IO_SAPIC_DELIVERY_SHIFT) |
-		 vector);
-
-#ifdef CONFIG_IA64_AZUSA_HACKS
-	/* set Flush Disable bit */
-	if (iosapic_addr != 0xc0000000fec00000)
-		low32 |= (1 << 17);
-#endif
-
-	/* dest contains both id and eid */
-	high32 = (dest << IO_SAPIC_DEST_SHIFT);	
-
-	writel(IO_SAPIC_RTE_HIGH(entry), iosapic_addr + IO_SAPIC_REG_SELECT);
-	writel(high32, iosapic_addr + IO_SAPIC_WINDOW);
-	writel(IO_SAPIC_RTE_LOW(entry), iosapic_addr + IO_SAPIC_REG_SELECT);
-	writel(low32, iosapic_addr + IO_SAPIC_WINDOW);
-}
-
-static void
-nop (unsigned int irq)
-{
-	/* do nothing... */
-}
-
-static void 
-mask_irq (unsigned int irq)
-{
-	unsigned long flags, iosapic_addr = iosapic_addr(irq);
-	u32 low32;
-
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		writel(IO_SAPIC_RTE_LOW(iosapic_pin(irq)), iosapic_addr + IO_SAPIC_REG_SELECT);
-		low32 = readl(iosapic_addr + IO_SAPIC_WINDOW);
-
-		low32 |= (1 << IO_SAPIC_MASK_SHIFT);    /* Zero only the mask bit */
-		writel(low32, iosapic_addr + IO_SAPIC_WINDOW);
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
-}
-
-static void 
-unmask_irq (unsigned int irq)
-{
-	unsigned long flags, iosapic_addr = iosapic_addr(irq);
-	u32 low32;
-
-	spin_lock_irqsave(&iosapic_lock, flags);
-	{
-		writel(IO_SAPIC_RTE_LOW(iosapic_pin(irq)), iosapic_addr + IO_SAPIC_REG_SELECT);
-		low32 = readl(iosapic_addr + IO_SAPIC_WINDOW);
-
-		low32 &= ~(1 << IO_SAPIC_MASK_SHIFT);    /* Zero only the mask bit */
-		writel(low32, iosapic_addr + IO_SAPIC_WINDOW);
-	}
-	spin_unlock_irqrestore(&iosapic_lock, flags);
-}
-
-
-static void
-iosapic_set_affinity (unsigned int irq, unsigned long mask)
-{
-	printk("iosapic_set_affinity: not implemented yet\n");
-}
-
-/*
- * Handlers for level-triggered interrupts.
- */
-
-static unsigned int
-iosapic_startup_level_irq (unsigned int irq)
-{
-	unmask_irq(irq);
-	return 0;
-}
-
-static void
-iosapic_end_level_irq (unsigned int irq)
-{
-	writel(irq, iosapic_addr(irq) + IO_SAPIC_EOI);
-}
-
-#define iosapic_shutdown_level_irq	mask_irq
-#define iosapic_enable_level_irq	unmask_irq
-#define iosapic_disable_level_irq	mask_irq
-#define iosapic_ack_level_irq		nop
-
-struct hw_interrupt_type irq_type_iosapic_level = {
-	typename:	"IO-SAPIC-level",
-	startup:	iosapic_startup_level_irq,
-	shutdown:	iosapic_shutdown_level_irq,
-	enable:		iosapic_enable_level_irq,
-	disable:	iosapic_disable_level_irq,
-	ack:		iosapic_ack_level_irq,
-	end:		iosapic_end_level_irq,
-	set_affinity:	iosapic_set_affinity
-};
-
-/*
- * Handlers for edge-triggered interrupts.
- */
-
-static unsigned int
-iosapic_startup_edge_irq (unsigned int irq)
-{
-	unmask_irq(irq);
-	/*
-	 * IOSAPIC simply drops interrupts pended while the
-	 * corresponding pin was masked, so we can't know if an
-	 * interrupt is pending already.  Let's hope not...
-	 */
-	return 0;
-}
-
-static void
-iosapic_ack_edge_irq (unsigned int irq)
-{
-	/*
-	 * Once we have recorded IRQ_PENDING already, we can mask the
-	 * interrupt for real. This prevents IRQ storms from unhandled
-	 * devices.
-	 */
-	if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED))
-		mask_irq(irq);
-}
-
-#define iosapic_enable_edge_irq		unmask_irq
-#define iosapic_disable_edge_irq	nop
-#define iosapic_end_edge_irq		nop
-
-struct hw_interrupt_type irq_type_iosapic_edge = {
-	typename:	"IO-SAPIC-edge",
-	startup:	iosapic_startup_edge_irq,
-	shutdown:	iosapic_disable_edge_irq,
-	enable:		iosapic_enable_edge_irq,
-	disable:	iosapic_disable_edge_irq,
-	ack:		iosapic_ack_edge_irq,
-	end:		iosapic_end_edge_irq,
-	set_affinity:	iosapic_set_affinity
-};
-
-unsigned int
-iosapic_version (unsigned long base_addr) 
-{
-	/*
-	 * IOSAPIC Version Register return 32 bit structure like:
-	 * {
-	 *	unsigned int version   : 8;
-	 *	unsigned int reserved1 : 8;
-	 *	unsigned int pins      : 8;
-	 *	unsigned int reserved2 : 8;
-	 * }
-	 */
-	writel(IO_SAPIC_VERSION, base_addr + IO_SAPIC_REG_SELECT);
-	return readl(IO_SAPIC_WINDOW + base_addr);
-}
-
-void
-iosapic_init (unsigned long address, int irqbase)
-{
-	struct hw_interrupt_type *irq_type;
-	struct pci_vector_struct *vectors;
-	int i, irq, num_pci_vectors;
-
-	if (irqbase == 0)
-		/* 
-		 * Map the legacy ISA devices into the IOSAPIC data.
-		 * Some of these may get reprogrammed later on with
-		 * data from the ACPI Interrupt Source Override table.
-		 */
-		for (i = 0; i < 16; i++) {
-			irq = isa_irq_to_vector(i);
-			iosapic_pin(irq) = i; 
-			iosapic_bus(irq) = BUS_ISA;
-			iosapic_busdata(irq) = 0;
-			iosapic_dmode(irq) = IO_SAPIC_LOWEST_PRIORITY;
-			iosapic_trigger(irq)  = IO_SAPIC_EDGE;
-			iosapic_polarity(irq) = IO_SAPIC_POL_HIGH;
-#ifdef DEBUG_IRQ_ROUTING
-			printk("ISA: IRQ %02x -> Vector %02x IOSAPIC Pin %d\n",
-			       i, irq, iosapic_pin(irq));
-#endif
-		}
-
-#ifndef CONFIG_IA64_SOFTSDV_HACKS
-	/* 
-	 * Map the PCI Interrupt data into the ACPI IOSAPIC data using
-	 * the info that the bootstrap loader passed to us.
-	 */
-# ifdef CONFIG_ACPI_KERNEL_CONFIG
-	acpi_cf_get_pci_vectors(&vectors, &num_pci_vectors);
-# else
-	ia64_boot_param.pci_vectors = (__u64) __va(ia64_boot_param.pci_vectors);
-	vectors = (struct pci_vector_struct *) ia64_boot_param.pci_vectors;
-	num_pci_vectors = ia64_boot_param.num_pci_vectors;
-# endif
-	for (i = 0; i < num_pci_vectors; i++) {
-		irq = vectors[i].irq;
-		if (irq < 16)
-			irq = isa_irq_to_vector(irq);
-		if (iosapic_baseirq(irq) != irqbase)
-			continue;
-
-		iosapic_bustype(irq) = BUS_PCI;
-		iosapic_pin(irq) = irq - iosapic_baseirq(irq);
-		iosapic_bus(irq) = vectors[i].bus;
-		/*
-		 * Map the PCI slot and pin data into iosapic_busdata()
-		 */
-		iosapic_busdata(irq) = (vectors[i].pci_id & 0xffff0000) | vectors[i].pin;
-
-		/* Default settings for PCI */
-		iosapic_dmode(irq) = IO_SAPIC_LOWEST_PRIORITY;
-		iosapic_trigger(irq)  = IO_SAPIC_LEVEL;
-		iosapic_polarity(irq) = IO_SAPIC_POL_LOW;
-
-# ifdef DEBUG_IRQ_ROUTING
-		printk("PCI: BUS %d Slot %x Pin %x IRQ %02x --> Vector %02x IOSAPIC Pin %d\n", 
-		       vectors[i].bus, vectors[i].pci_id>>16, vectors[i].pin, vectors[i].irq, 
-		       irq, iosapic_pin(irq));
-# endif
-	}
-#endif /* CONFIG_IA64_SOFTSDV_HACKS */
-
-	for (i = 0; i < NR_IRQS; ++i) {
-		if (iosapic_baseirq(i) != irqbase)
-			continue;
-
-		if (iosapic_pin(i) != -1) {
-			if (iosapic_trigger(i) == IO_SAPIC_LEVEL)
-			  irq_type = &irq_type_iosapic_level;
-			else
-			  irq_type = &irq_type_iosapic_edge;
-			if (irq_desc[i].handler != &no_irq_type)
-				printk("dig_irq_init: warning: changing vector %d from %s to %s\n",
-				       i, irq_desc[i].handler->typename,
-				       irq_type->typename);
-			irq_desc[i].handler = irq_type;
-
-			/* program the IOSAPIC routing table: */
-			set_rte(iosapic_addr(i), iosapic_pin(i), iosapic_polarity(i),
-				iosapic_trigger(i), iosapic_dmode(i),
-				(ia64_get_lid() >> 16) & 0xffff, i);
-		}
-	}
-}
-
-void
-dig_irq_init (void)
-{
-	/*
-	 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
-	 * enabled.
-	 */
-	outb(0xff, 0xA1);
-	outb(0xff, 0x21);
-}
-
-void
-dig_pci_fixup (void)
-{
-	struct	pci_dev	*dev;
-	int		irq;
-	unsigned char 	pin;
-
-	pci_for_each_dev(dev) {
-		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-		if (pin) {
-			pin--;          /* interrupt pins are numbered starting from 1 */
-			irq = iosapic_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn),
-							 pin);
-			if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
-				struct pci_dev * bridge = dev->bus->self;
-
-				/* allow for multiple bridges on an adapter */
-				do {
-					/* do the bridge swizzle... */
-					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
-					irq = iosapic_get_PCI_irq_vector(bridge->bus->number,
-									 PCI_SLOT(bridge->devfn), pin);
-				} while (irq < 0 && (bridge = bridge->bus->self));
-				if (irq >= 0)
-					printk(KERN_WARNING
-					       "PCI: using PPB(B%d,I%d,P%d) to get irq %02x\n",
-					       bridge->bus->number, PCI_SLOT(bridge->devfn),
-					       pin, irq);
-				else
-					printk(KERN_WARNING
-					       "PCI: Couldn't map irq for B%d,I%d,P%d\n",
-					       bridge->bus->number, PCI_SLOT(bridge->devfn),
-					       pin);
-			}
-			if (irq >= 0) {
-				printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %02x\n",
-				       dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
-				dev->irq = irq;
-			}
-		}
-		/*
-		 * Nothing to fixup
-		 * Fix out-of-range IRQ numbers
-		 */
-		if (dev->irq >= NR_IRQS)
-			dev->irq = 15;	/* Spurious interrupts */
-	}
-}
-
-/*
- * Register an IOSAPIC discovered via ACPI.
- */
-void __init
-dig_register_iosapic (acpi_entry_iosapic_t *iosapic)
-{
-	unsigned int ver, v;
-	int l, max_pin;
-
-	ver = iosapic_version((unsigned long) ioremap(iosapic->address, 0));
-	max_pin = (ver >> 16) & 0xff;
-	
-	printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", 
-	       (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, 
-	       iosapic->irq_base, iosapic->irq_base + max_pin);
-	
-	for (l = 0; l <= max_pin; l++) {
-		v = iosapic->irq_base + l;
-		if (v < 16)
-			v = isa_irq_to_vector(v);
-		if (v > IA64_MAX_VECTORED_IRQ) {
-			printk("    !!! bad IOSAPIC interrupt vector: %u\n", v);
-			continue;
-		}
-		/* XXX Check for IOSAPIC collisions */
-		iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
-		iosapic_baseirq(v) = iosapic->irq_base;
-	}
-	iosapic_init(iosapic->address, iosapic->irq_base);
-}
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
index fa48254cc..d6aeaed0d 100644
--- a/arch/ia64/dig/setup.c
+++ b/arch/ia64/dig/setup.c
@@ -84,3 +84,14 @@ dig_setup (char **cmdline_p)
 	screen_info.orig_video_isVGA = 1;	/* XXX fake */
 	screen_info.orig_video_ega_bx = 3;	/* XXX fake */
 }
+
+void
+dig_irq_init (void)
+{
+	/*
+	 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
+	 * enabled.
+	 */
+	outb(0xff, 0xA1);
+	outb(0xff, 0x21);
+}
diff --git a/arch/ia64/hp/Makefile b/arch/ia64/hp/Makefile
index 458269e0a..e151251ce 100644
--- a/arch/ia64/hp/Makefile
+++ b/arch/ia64/hp/Makefile
@@ -7,12 +7,10 @@
 
 all: hp.a
 
-O_TARGET	= hp.a
-O_OBJS		= hpsim_console.o hpsim_irq.o hpsim_setup.o
+O_TARGET := hp.a
 
-ifdef CONFIG_IA64_GENERIC
-O_OBJS		+= hpsim_machvec.o
-endif
+obj-y := hpsim_console.o hpsim_irq.o hpsim_setup.o
+obj-$(CONFIG_IA64_GENERIC) += hpsim_machvec.o
 
 clean::
 
diff --git a/arch/ia64/hp/hpsim_setup.c b/arch/ia64/hp/hpsim_setup.c
index aaa87c4dc..dfa83e135 100644
--- a/arch/ia64/hp/hpsim_setup.c
+++ b/arch/ia64/hp/hpsim_setup.c
@@ -63,12 +63,6 @@ ia64_ctl_trace (long on)
 }
 
 void __init
-hpsim_pci_fixup (void)
-{
-}
-
-
-void __init
 hpsim_setup (char **cmdline_p)
 {
 	ROOT_DEV = to_kdev_t(0x0801);		/* default to first SCSI drive */
diff --git a/arch/ia64/ia32/Makefile b/arch/ia64/ia32/Makefile
index 23ffbd0d9..834e24fff 100644
--- a/arch/ia64/ia32/Makefile
+++ b/arch/ia64/ia32/Makefile
@@ -10,7 +10,8 @@
 all: ia32.o
 
 O_TARGET := ia32.o
-O_OBJS	 := ia32_entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o ia32_support.o ia32_traps.o binfmt_elf32.o
+
+obj-y := ia32_entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o ia32_support.o ia32_traps.o binfmt_elf32.o
 
 clean::
 
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index 384747f8b..aab0db860 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -9,6 +9,7 @@
 
 #include <linux/types.h>
 
+#include <asm/param.h>
 #include <asm/signal.h>
 #include <asm/ia32.h>
 
@@ -31,6 +32,9 @@
 # define CONFIG_BINFMT_ELF_MODULE	CONFIG_BINFMT_ELF32_MODULE
 #endif
 
+#undef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC	IA32_CLOCKS_PER_SEC
+
 extern void ia64_elf32_init(struct pt_regs *regs);
 extern void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address);
 
@@ -89,8 +93,8 @@ void ia64_elf32_init(struct pt_regs *regs)
 	
 	/* Do all the IA-32 setup here */
 
-	current->thread.map_base = 0x40000000;
-
+	current->thread.map_base  =  0x40000000;
+	current->thread.task_size =  0xc0000000;	/* use what Linux/x86 uses... */
  
 	/* setup ia32 state for ia32_load_state */
 
@@ -239,6 +243,12 @@ elf_map32 (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int p
 	if (eppnt->p_memsz >= (1UL<<32) || addr > (1UL<<32) - eppnt->p_memsz)
 		return -EINVAL;
 
+	/*
+	 *  Make sure the elf interpreter doesn't get loaded at location 0
+	 *    so that NULL pointers correctly cause segfaults.
+	 */
+	if (addr == 0)
+		addr += PAGE_SIZE;
 #if 1
 	set_brk(ia32_mm_addr(addr), addr + eppnt->p_memsz);
 	memset((char *) addr + eppnt->p_filesz, 0, eppnt->p_memsz - eppnt->p_filesz);
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index 5fb064a3b..5b03d528d 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -133,7 +133,7 @@ ia32_syscall_table:
 	data8 sys32_ni_syscall /* sys_stime is not supported on IA64 */  /* 25 */
 	data8 sys32_ptrace
 	data8 sys32_alarm
-	data8 sys32_ni_syscall
+	data8 sys_pause
 	data8 sys32_ni_syscall
 	data8 ia32_utime	  /* 30 */
 	data8 sys32_ni_syscall	  /* old stty syscall holder */
@@ -291,11 +291,43 @@ ia32_syscall_table:
 	data8 sys_getcwd
 	data8 sys_capget
 	data8 sys_capset	  /* 185 */
-	data8 sys_sigaltstack
+	data8 sys32_sigaltstack
 	data8 sys_sendfile
 	data8 sys32_ni_syscall		  /* streams1 */
 	data8 sys32_ni_syscall		  /* streams2 */
 	data8 sys32_vfork	  /* 190 */
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall	  /* 195 */
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall	  /* 200 */
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall	  /* 205 */
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall	  /* 210 */
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall	  /* 215 */
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall	  /* 220 */
+	data8 sys_ni_syscall
+	data8 sys_ni_syscall
 	/*
 	 *  CAUTION: If any system calls are added beyond this point
 	 *	then the check in `arch/ia64/kernel/ivt.S' will have
diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c
index a5cd927b3..f9093e952 100644
--- a/arch/ia64/ia32/ia32_ioctl.c
+++ b/arch/ia64/ia32/ia32_ioctl.c
@@ -22,81 +22,193 @@
 #include <linux/if_ppp.h>
 #include <linux/ixjuser.h>
 #include <linux/i2o-dev.h>
+#include <../drivers/char/drm/drm.h>
+
+#define IOCTL_NR(a)	((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
+
+#define DO_IOCTL(fd, cmd, arg) ({			\
+	int _ret;					\
+	mm_segment_t _old_fs = get_fs();		\
+							\
+	set_fs(KERNEL_DS);				\
+	_ret = sys_ioctl(fd, cmd, (unsigned long)arg);	\
+	set_fs(_old_fs);				\
+	_ret;						\
+})
+
+#define P(i)	((void *)(long)(i))
+
 
 asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
 
 asmlinkage long ia32_ioctl(unsigned int fd, unsigned int cmd, unsigned int arg)
 {
+	long ret;
+
+	switch (IOCTL_NR(cmd)) {
+
+	case IOCTL_NR(DRM_IOCTL_VERSION):
+		{
+			drm_version_t ver;
+			struct {
+				int	version_major;
+				int	version_minor;
+				int	version_patchlevel;
+				unsigned int name_len;
+				unsigned int name; /* pointer */
+				unsigned int date_len;
+				unsigned int date; /* pointer */
+				unsigned int desc_len;
+				unsigned int desc; /* pointer */
+			} ver32;
+
+			if (copy_from_user(&ver32, P(arg), sizeof(ver32)))
+				return -EFAULT;
+			ver.name_len = ver32.name_len;
+			ver.name = P(ver32.name);
+			ver.date_len = ver32.date_len;
+			ver.date = P(ver32.date);
+			ver.desc_len = ver32.desc_len;
+			ver.desc = P(ver32.desc);
+			ret = DO_IOCTL(fd, cmd, &ver);
+			if (ret >= 0) {
+				ver32.version_major = ver.version_major;
+				ver32.version_minor = ver.version_minor;
+				ver32.version_patchlevel = ver.version_patchlevel;
+				ver32.name_len = ver.name_len;
+				ver32.date_len = ver.date_len;
+				ver32.desc_len = ver.desc_len;
+				if (copy_to_user(P(arg), &ver32, sizeof(ver32)))
+					return -EFAULT;
+			}
+			return(ret);
+		}
+
+	case IOCTL_NR(DRM_IOCTL_GET_UNIQUE):
+		{
+			drm_unique_t un;
+			struct {
+				unsigned int unique_len;
+				unsigned int unique;
+			} un32;
+
+			if (copy_from_user(&un32, P(arg), sizeof(un32)))
+				return -EFAULT;
+			un.unique_len = un32.unique_len;
+			un.unique = P(un32.unique);
+			ret = DO_IOCTL(fd, cmd, &un);
+			if (ret >= 0) {
+				un32.unique_len = un.unique_len;
+				if (copy_to_user(P(arg), &un32, sizeof(un32)))
+					return -EFAULT;
+			}
+			return(ret);
+		}
+	case IOCTL_NR(DRM_IOCTL_SET_UNIQUE):
+	case IOCTL_NR(DRM_IOCTL_ADD_MAP):
+	case IOCTL_NR(DRM_IOCTL_ADD_BUFS):
+	case IOCTL_NR(DRM_IOCTL_MARK_BUFS):
+	case IOCTL_NR(DRM_IOCTL_INFO_BUFS):
+	case IOCTL_NR(DRM_IOCTL_MAP_BUFS):
+	case IOCTL_NR(DRM_IOCTL_FREE_BUFS):
+	case IOCTL_NR(DRM_IOCTL_ADD_CTX):
+	case IOCTL_NR(DRM_IOCTL_RM_CTX):
+	case IOCTL_NR(DRM_IOCTL_MOD_CTX):
+	case IOCTL_NR(DRM_IOCTL_GET_CTX):
+	case IOCTL_NR(DRM_IOCTL_SWITCH_CTX):
+	case IOCTL_NR(DRM_IOCTL_NEW_CTX):
+	case IOCTL_NR(DRM_IOCTL_RES_CTX):
+
+	case IOCTL_NR(DRM_IOCTL_AGP_ACQUIRE):
+	case IOCTL_NR(DRM_IOCTL_AGP_RELEASE):
+	case IOCTL_NR(DRM_IOCTL_AGP_ENABLE):
+	case IOCTL_NR(DRM_IOCTL_AGP_INFO):
+	case IOCTL_NR(DRM_IOCTL_AGP_ALLOC):
+	case IOCTL_NR(DRM_IOCTL_AGP_FREE):
+	case IOCTL_NR(DRM_IOCTL_AGP_BIND):
+	case IOCTL_NR(DRM_IOCTL_AGP_UNBIND):
+
+	/* Mga specific ioctls */
+
+	case IOCTL_NR(DRM_IOCTL_MGA_INIT):
+
+	/* I810 specific ioctls */
+
+	case IOCTL_NR(DRM_IOCTL_I810_GETBUF):
+	case IOCTL_NR(DRM_IOCTL_I810_COPY):
+
+	/* Rage 128 specific ioctls */
+
+	case IOCTL_NR(DRM_IOCTL_R128_PACKET):
 
-	switch (cmd) {
-
-	case VFAT_IOCTL_READDIR_BOTH:
-	case VFAT_IOCTL_READDIR_SHORT:
-	case MTIOCGET:
-	case MTIOCPOS:
-	case MTIOCGETCONFIG:
-	case MTIOCSETCONFIG:
-	case PPPIOCSCOMPRESS:
-	case PPPIOCGIDLE:
-	case NCP_IOC_GET_FS_INFO_V2:
-	case NCP_IOC_GETOBJECTNAME:
-	case NCP_IOC_SETOBJECTNAME:
-	case NCP_IOC_GETPRIVATEDATA:
-	case NCP_IOC_SETPRIVATEDATA:
-	case NCP_IOC_GETMOUNTUID2:
-	case CAPI_MANUFACTURER_CMD:
-	case VIDIOCGTUNER:
-	case VIDIOCSTUNER:
-	case VIDIOCGWIN:
-	case VIDIOCSWIN:
-	case VIDIOCGFBUF:
-	case VIDIOCSFBUF:
-	case MGSL_IOCSPARAMS:
-	case MGSL_IOCGPARAMS:
-	case ATM_GETNAMES:
-	case ATM_GETLINKRATE:
-	case ATM_GETTYPE:
-	case ATM_GETESI:
-	case ATM_GETADDR:
-	case ATM_RSTADDR:
-	case ATM_ADDADDR:
-	case ATM_DELADDR:
-	case ATM_GETCIRANGE:
-	case ATM_SETCIRANGE:
-	case ATM_SETESI:
-	case ATM_SETESIF:
-	case ATM_GETSTAT:
-	case ATM_GETSTATZ:
-	case ATM_GETLOOP:
-	case ATM_SETLOOP:
-	case ATM_QUERYLOOP:
-	case ENI_SETMULT:
-	case NS_GETPSTAT:
-	/* case NS_SETBUFLEV: This is a duplicate case with ZATM_GETPOOLZ */
-	case ZATM_GETPOOLZ:
-	case ZATM_GETPOOL:
-	case ZATM_SETPOOL:
-	case ZATM_GETTHIST:
-	case IDT77105_GETSTAT:
-	case IDT77105_GETSTATZ:
-	case IXJCTL_TONE_CADENCE:
-	case IXJCTL_FRAMES_READ:
-	case IXJCTL_FRAMES_WRITTEN:
-	case IXJCTL_READ_WAIT:
-	case IXJCTL_WRITE_WAIT:
-	case IXJCTL_DRYBUFFER_READ:
-	case I2OHRTGET:
-	case I2OLCTGET:
-	case I2OPARMSET:
-	case I2OPARMGET:
-	case I2OSWDL:
-	case I2OSWUL:
-	case I2OSWDEL:
-	case I2OHTML:
-		printk("%x:unimplemented IA32 ioctl system call\n", cmd);
-		return(-EINVAL);
+	case IOCTL_NR(VFAT_IOCTL_READDIR_BOTH):
+	case IOCTL_NR(VFAT_IOCTL_READDIR_SHORT):
+	case IOCTL_NR(MTIOCGET):
+	case IOCTL_NR(MTIOCPOS):
+	case IOCTL_NR(MTIOCGETCONFIG):
+	case IOCTL_NR(MTIOCSETCONFIG):
+	case IOCTL_NR(PPPIOCSCOMPRESS):
+	case IOCTL_NR(PPPIOCGIDLE):
+	case IOCTL_NR(NCP_IOC_GET_FS_INFO_V2):
+	case IOCTL_NR(NCP_IOC_GETOBJECTNAME):
+	case IOCTL_NR(NCP_IOC_SETOBJECTNAME):
+	case IOCTL_NR(NCP_IOC_GETPRIVATEDATA):
+	case IOCTL_NR(NCP_IOC_SETPRIVATEDATA):
+	case IOCTL_NR(NCP_IOC_GETMOUNTUID2):
+	case IOCTL_NR(CAPI_MANUFACTURER_CMD):
+	case IOCTL_NR(VIDIOCGTUNER):
+	case IOCTL_NR(VIDIOCSTUNER):
+	case IOCTL_NR(VIDIOCGWIN):
+	case IOCTL_NR(VIDIOCSWIN):
+	case IOCTL_NR(VIDIOCGFBUF):
+	case IOCTL_NR(VIDIOCSFBUF):
+	case IOCTL_NR(MGSL_IOCSPARAMS):
+	case IOCTL_NR(MGSL_IOCGPARAMS):
+	case IOCTL_NR(ATM_GETNAMES):
+	case IOCTL_NR(ATM_GETLINKRATE):
+	case IOCTL_NR(ATM_GETTYPE):
+	case IOCTL_NR(ATM_GETESI):
+	case IOCTL_NR(ATM_GETADDR):
+	case IOCTL_NR(ATM_RSTADDR):
+	case IOCTL_NR(ATM_ADDADDR):
+	case IOCTL_NR(ATM_DELADDR):
+	case IOCTL_NR(ATM_GETCIRANGE):
+	case IOCTL_NR(ATM_SETCIRANGE):
+	case IOCTL_NR(ATM_SETESI):
+	case IOCTL_NR(ATM_SETESIF):
+	case IOCTL_NR(ATM_GETSTAT):
+	case IOCTL_NR(ATM_GETSTATZ):
+	case IOCTL_NR(ATM_GETLOOP):
+	case IOCTL_NR(ATM_SETLOOP):
+	case IOCTL_NR(ATM_QUERYLOOP):
+	case IOCTL_NR(ENI_SETMULT):
+	case IOCTL_NR(NS_GETPSTAT):
+	/* case IOCTL_NR(NS_SETBUFLEV): This is a duplicate case with ZATM_GETPOOLZ */
+	case IOCTL_NR(ZATM_GETPOOLZ):
+	case IOCTL_NR(ZATM_GETPOOL):
+	case IOCTL_NR(ZATM_SETPOOL):
+	case IOCTL_NR(ZATM_GETTHIST):
+	case IOCTL_NR(IDT77105_GETSTAT):
+	case IOCTL_NR(IDT77105_GETSTATZ):
+	case IOCTL_NR(IXJCTL_TONE_CADENCE):
+	case IOCTL_NR(IXJCTL_FRAMES_READ):
+	case IOCTL_NR(IXJCTL_FRAMES_WRITTEN):
+	case IOCTL_NR(IXJCTL_READ_WAIT):
+	case IOCTL_NR(IXJCTL_WRITE_WAIT):
+	case IOCTL_NR(IXJCTL_DRYBUFFER_READ):
+	case IOCTL_NR(I2OHRTGET):
+	case IOCTL_NR(I2OLCTGET):
+	case IOCTL_NR(I2OPARMSET):
+	case IOCTL_NR(I2OPARMGET):
+	case IOCTL_NR(I2OSWDL):
+	case IOCTL_NR(I2OSWUL):
+	case IOCTL_NR(I2OSWDEL):
+	case IOCTL_NR(I2OHTML):
+		break;
 	default:
 		return(sys_ioctl(fd, cmd, (unsigned long)arg));
 
 	}
+	printk("%x:unimplemented IA32 ioctl system call\n", cmd);
+	return(-EINVAL);
 }
diff --git a/arch/ia64/ia32/ia32_traps.c b/arch/ia64/ia32/ia32_traps.c
index 2cfc9ae02..5c1558fec 100644
--- a/arch/ia64/ia32/ia32_traps.c
+++ b/arch/ia64/ia32/ia32_traps.c
@@ -119,6 +119,6 @@ ia32_exception (struct pt_regs *regs, unsigned long isr)
 	      default:
 		return -1;
 	}
-	force_sig_info(SIGTRAP, &siginfo, current);
+	force_sig_info(siginfo.si_signo, &siginfo, current);
 	return 0;
 }
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 416b23faa..f7b857b4c 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -236,8 +236,6 @@ do_mmap_fake(struct file *file, unsigned long addr, unsigned long len,
 
 	if (OFFSET4K(addr) || OFFSET4K(off))
 		return -EINVAL;
-	if (prot & PROT_WRITE)
-		prot |= PROT_EXEC;
 	prot |= PROT_WRITE;
 	front = NULL;
 	back = NULL;
@@ -287,23 +285,20 @@ ia32_do_mmap (struct file *file, unsigned int addr, unsigned int len, unsigned i
 	unsigned int poff;
 
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	prot |= PROT_EXEC;
 
  	if ((flags & MAP_FIXED) && ((addr & ~PAGE_MASK) || (offset & ~PAGE_MASK)))
  		error = do_mmap_fake(file, addr, len, prot, flags, (loff_t)offset);
- 	else if (!addr && (offset & ~PAGE_MASK)) {
+	else {
  		poff = offset & PAGE_MASK;
  		len += offset - poff;
 
  		down(&current->mm->mmap_sem);
- 		error = do_mmap(file, addr, len, prot, flags, poff);
+ 		error = do_mmap_pgoff(file, addr, len, prot, flags, poff >> PAGE_SHIFT);
   		up(&current->mm->mmap_sem);
 
  		if (!IS_ERR((void *) error))
  			error += offset - poff;
- 	} else {
-  		down(&current->mm->mmap_sem);
-  		error = do_mmap(file, addr, len, prot, flags, offset);
- 		up(&current->mm->mmap_sem);
  	}
 	return error;
 }
@@ -2032,14 +2027,14 @@ sys32_times(struct tms32 *tbuf)
 	ret = sys_times(tbuf ? &t : NULL);
 	set_fs (old_fs);
 	if (tbuf) {
-		err = put_user (t.tms_utime, &tbuf->tms_utime);
-		err |= __put_user (t.tms_stime, &tbuf->tms_stime);
-		err |= __put_user (t.tms_cutime, &tbuf->tms_cutime);
-		err |= __put_user (t.tms_cstime, &tbuf->tms_cstime);
+		err = put_user (IA32_TICK(t.tms_utime), &tbuf->tms_utime);
+		err |= __put_user (IA32_TICK(t.tms_stime), &tbuf->tms_stime);
+		err |= __put_user (IA32_TICK(t.tms_cutime), &tbuf->tms_cutime);
+		err |= __put_user (IA32_TICK(t.tms_cstime), &tbuf->tms_cstime);
 		if (err)
 			ret = -EFAULT;
 	}
-	return ret;
+	return IA32_TICK(ret);
 }
 
 unsigned int
@@ -2619,6 +2614,53 @@ sys_ioperm (unsigned long from, unsigned long num, int on)
 	return(sys_iopl(3, 0, 0, 0));
 }
 
+typedef struct {
+	unsigned int	ss_sp;
+	unsigned int	ss_flags;
+	unsigned int	ss_size;
+} ia32_stack_t;
+
+asmlinkage long
+sys32_sigaltstack (const ia32_stack_t *uss32, ia32_stack_t *uoss32,
+long arg2, long arg3, long arg4,
+long arg5, long arg6, long arg7,
+long stack)
+{
+	struct pt_regs *pt = (struct pt_regs *) &stack;
+	stack_t uss, uoss;
+	ia32_stack_t buf32;
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	if (uss32)
+		if (copy_from_user(&buf32, (void *)A(uss32), sizeof(ia32_stack_t)))
+			return(-EFAULT);
+	uss.ss_sp = (void *) (long) buf32.ss_sp;
+	uss.ss_flags = buf32.ss_flags;
+	uss.ss_size = buf32.ss_size;
+	set_fs(KERNEL_DS);
+	ret = do_sigaltstack(uss32 ? &uss : NULL, &uoss, pt->r12);
+	set_fs(old_fs);
+	if (ret < 0)
+		return(ret);
+	if (uoss32) {
+		buf32.ss_sp = (long) uoss.ss_sp;
+		buf32.ss_flags = uoss.ss_flags;
+		buf32.ss_size = uoss.ss_size;
+		if (copy_to_user((void*)A(uoss32), &buf32, sizeof(ia32_stack_t)))
+			return(-EFAULT);
+	}
+	return(ret);
+}
+
+asmlinkage int
+sys_pause (void)
+{
+	current->state = TASK_INTERRUPTIBLE;
+	schedule();
+	return -ERESTARTNOHAND;
+}
+
 #ifdef	NOTYET  /* UNTESTED FOR IA64 FROM HERE DOWN */
 
 /* In order to reduce some races, while at the same time doing additional
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 7a49511d3..e4ffb3ae6 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -9,20 +9,20 @@
 
 all: kernel.o head.o init_task.o
 
-obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o		\
-	 machvec.o pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o	\
-	 signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o
+O_TARGET := kernel.o
 
-obj-$(CONFIG_IA64_GENERIC) += machvec.o
+obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o \
+	 machvec.o pal.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o	\
+	 signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o
+obj-$(CONFIG_IA64_GENERIC) += machvec.o iosapic.o
+obj-$(CONFIG_IA64_DIG) += iosapic.o
 obj-$(CONFIG_IA64_PALINFO) += palinfo.o
 obj-$(CONFIG_PCI) += pci.o
 obj-$(CONFIG_SMP) += smp.o smpboot.o
 obj-$(CONFIG_IA64_MCA) += mca.o mca_asm.o
 obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
 
-O_TARGET := kernel.o
-O_OBJS	 :=  $(obj-y)
-OX_OBJS  := ia64_ksyms.o
+export-objs := ia64_ksyms.o
 
 clean::
 
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index a8c1ead1f..35ed564c9 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -6,6 +6,12 @@
  * 
  * Copyright (C) 1999 VA Linux Systems
  * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2000 Hewlett-Packard Co.
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000 Intel Corp.
+ * Copyright (C) 2000 J.I. Lee <jung-ik.lee@intel.com>
+ *      ACPI based kernel configuration manager.
+ *      ACPI 2.0 & IA64 ext 0.71
  */
 
 #include <linux/config.h>
@@ -36,29 +42,87 @@ int __initdata total_cpus;
 
 void (*pm_idle)(void);
 
+asm (".weak iosapic_register_legacy_irq");
+asm (".weak iosapic_init");
+
+const char *
+acpi_get_sysname (void)
+{
+	/* the following should go away once we have an ACPI parser: */
+#ifdef CONFIG_IA64_GENERIC
+	return "hpsim";
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+	return "hpsim";
+# elif defined (CONFIG_IA64_SGI_SN1)
+	return "sn1";
+# elif defined (CONFIG_IA64_DIG)
+	return "dig";
+# else
+#	error Unknown platform.  Fix acpi.c.
+# endif
+#endif
+
+}
+
 /*
- * Identify usable CPU's and remember them for SMP bringup later.
+ * Configure legacy IRQ information.
  */
 static void __init
-acpi_lsapic(char *p) 
+acpi_legacy_irq (char *p)
 {
-	int add = 1;
-
-	acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p;
+	acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p;
+	unsigned long polarity = 0, edge_triggered = 0;
 
-	if ((lsapic->flags & LSAPIC_PRESENT) == 0) 
+	/*
+	 * If the platform we're running doesn't define
+	 * iosapic_register_legacy_irq(), we ignore this info...
+	 */
+	if (!iosapic_register_legacy_irq)
 		return;
 
+	switch (legacy->flags) {
+	      case 0x5:	polarity = 1; edge_triggered = 1; break;
+	      case 0x7: polarity = 0; edge_triggered = 1; break;
+	      case 0xd: polarity = 1; edge_triggered = 0; break;
+	      case 0xf: polarity = 0; edge_triggered = 0; break;
+	      default:
+		printk("    ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq,
+		       legacy->flags);
+		break;
+	}
+	iosapic_register_legacy_irq(legacy->isa_irq, legacy->pin, polarity, edge_triggered);
+}
+
+/*
+ * ACPI 2.0 tables parsing functions
+ */
+
+static unsigned long
+readl_unaligned(void *p)
+{
+	unsigned long ret;
+
+	memcpy(&ret, p, sizeof(long));
+	return ret;
+}
+
+/*
+ * Identify usable CPU's and remember them for SMP bringup later.
+ */
+static void __init
+acpi20_lsapic (char *p) 
+{
+	int add = 1;
+
+	acpi20_entry_lsapic_t *lsapic = (acpi20_entry_lsapic_t *) p;
 	printk("      CPU %d (%.04x:%.04x): ", total_cpus, lsapic->eid, lsapic->id);
 
 	if ((lsapic->flags & LSAPIC_ENABLED) == 0) {
 		printk("Disabled.\n");
 		add = 0;
-	} else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) {
-		printk("Performance Restricted; ignoring.\n");
-		add = 0;
 	}
-	
+
 #ifdef CONFIG_SMP
 	smp_boot_data.cpu_phys_id[total_cpus] = -1;
 #endif
@@ -73,87 +137,234 @@ acpi_lsapic(char *p)
 }
 
 /*
- * Configure legacy IRQ information in iosapic_vector
+ * Info on platform interrupt sources: NMI. PMI, INIT, etc.
  */
 static void __init
-acpi_legacy_irq(char *p)
+acpi20_platform (char *p)
 {
-	/*
-	 * This is not good.  ACPI is not necessarily limited to CONFIG_IA64_DIG, yet
-	 * ACPI does not necessarily imply IOSAPIC either.  Perhaps there should be
-	 * a means for platform_setup() to register ACPI handlers?
-	 */
-#ifdef CONFIG_IA64_IRQ_ACPI
-	acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p;
-	unsigned char vector; 
-	int i;
+	acpi20_entry_platform_src_t *plat = (acpi20_entry_platform_src_t *) p;
+
+	printk("PLATFORM: IOSAPIC %x -> Vector %x on CPU %.04u:%.04u\n",
+	       plat->iosapic_vector, plat->global_vector, plat->eid, plat->id);
+}
 
-	vector = isa_irq_to_vector(legacy->isa_irq);
+/*
+ * Override the physical address of the local APIC in the MADT stable header.
+ */
+static void __init
+acpi20_lapic_addr_override (char *p)
+{
+	acpi20_entry_lapic_addr_override_t * lapic = (acpi20_entry_lapic_addr_override_t *) p;
+
+	if (lapic->lapic_address) {
+		iounmap((void *)ipi_base_addr);
+		ipi_base_addr = (unsigned long) ioremap(lapic->lapic_address, 0);
+
+		printk("LOCAL ACPI override to 0x%lx(p=0x%lx)\n",
+		       ipi_base_addr, lapic->lapic_address);
+	}
+}
+
+/*
+ * Parse the ACPI Multiple APIC Description Table
+ */
+static void __init
+acpi20_parse_madt (acpi_madt_t *madt)
+{
+	acpi_entry_iosapic_t *iosapic;
+	char *p, *end;
+
+	/* Base address of IPI Message Block */
+	if (madt->lapic_address) {
+		ipi_base_addr = (unsigned long) ioremap(madt->lapic_address, 0);
+		printk("Lapic address set to 0x%lx\n", ipi_base_addr);
+	} else
+		printk("Lapic address set to default 0x%lx\n", ipi_base_addr);
+
+	p = (char *) (madt + 1);
+	end = p + (madt->header.length - sizeof(acpi_madt_t));
 
 	/*
-	 * Clobber any old pin mapping.  It may be that it gets replaced later on
+	 * Splitted entry parsing to ensure ordering.
 	 */
-	for (i = 0; i < IA64_MAX_VECTORED_IRQ; i++) {
-		if (i == vector) 
-			continue;
-		if (iosapic_pin(i) == iosapic_pin(vector))
-			iosapic_pin(i) = 0xff;
-        }
 
-	iosapic_pin(vector) = legacy->pin;
-	iosapic_bus(vector) = BUS_ISA;	/* This table only overrides the ISA devices */
-	iosapic_busdata(vector) = 0;
-	
-	/* 
-	 * External timer tick is special... 
-	 */
-	if (vector != TIMER_IRQ)
-		iosapic_dmode(vector) = IO_SAPIC_LOWEST_PRIORITY;
-	else 
-		iosapic_dmode(vector) = IO_SAPIC_FIXED;
+	while (p < end) {
+		switch (*p) {
+		case ACPI20_ENTRY_LOCAL_APIC_ADDR_OVERRIDE:
+			printk("ACPI 2.0 MADT: LOCAL APIC Override\n");
+			acpi20_lapic_addr_override(p);
+			break;
+
+		case ACPI20_ENTRY_LOCAL_SAPIC:
+			printk("ACPI 2.0 MADT: LOCAL SAPIC\n");
+			acpi20_lsapic(p);
+			break;
 	
-	/* See MPS 1.4 section 4.3.4 */
-	switch (legacy->flags) {
-	case 0x5:
-		iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
-		iosapic_trigger(vector) = IO_SAPIC_EDGE;
-		break;
-	case 0x8:
-		iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
-		iosapic_trigger(vector) = IO_SAPIC_EDGE;
-		break;
-	case 0xd:
-		iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
-		iosapic_trigger(vector) = IO_SAPIC_LEVEL;
-		break;
-	case 0xf:
-		iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
-		iosapic_trigger(vector) = IO_SAPIC_LEVEL;
-		break;
-	default:
-		printk("    ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq,
-		       legacy->flags);
-		break;
+		case ACPI20_ENTRY_IO_SAPIC:
+			iosapic = (acpi_entry_iosapic_t *) p;
+			if (iosapic_init)
+				iosapic_init(iosapic->address, iosapic->irq_base);
+			break;
+
+		case ACPI20_ENTRY_PLATFORM_INT_SOURCE:
+			printk("ACPI 2.0 MADT: PLATFORM INT SOUCE\n");
+			acpi20_platform(p);
+			break;
+
+		case ACPI20_ENTRY_LOCAL_APIC:
+			printk("ACPI 2.0 MADT: LOCAL APIC entry\n"); break;
+		case ACPI20_ENTRY_IO_APIC:
+			printk("ACPI 2.0 MADT: IO APIC entry\n"); break;
+		case ACPI20_ENTRY_NMI_SOURCE:
+			printk("ACPI 2.0 MADT: NMI SOURCE entry\n"); break;
+		case ACPI20_ENTRY_LOCAL_APIC_NMI:
+			printk("ACPI 2.0 MADT: LOCAL APIC NMI entry\n"); break;
+		case ACPI20_ENTRY_INT_SRC_OVERRIDE:
+			break;
+		default:
+			printk("ACPI 2.0 MADT: unknown entry skip\n"); break;
+			break;
+		}
+
+		p += p[1];
+	}
+
+	p = (char *) (madt + 1);
+	end = p + (madt->header.length - sizeof(acpi_madt_t));
+
+	while (p < end) {
+		
+		switch (*p) {
+		case ACPI20_ENTRY_INT_SRC_OVERRIDE:
+			printk("ACPI 2.0 MADT: INT SOURCE Override\n");
+			acpi_legacy_irq(p);
+			break;
+		default:
+			break;
+		}
+
+		p += p[1];
+	}
+
+	/* Make bootup pretty */
+	printk("      %d CPUs available, %d CPUs total\n",
+		available_cpus, total_cpus);
+}
+
+int __init 
+acpi20_parse (acpi20_rsdp_t *rsdp20)
+{
+	acpi_xsdt_t *xsdt;
+	acpi_desc_table_hdr_t *hdrp;
+	int tables, i;
+
+	if (strncmp(rsdp20->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) {
+		printk("ACPI 2.0 RSDP signature incorrect!\n");
+		return 0;
+	} else {
+		printk("ACPI 2.0 Root System Description Ptr at 0x%lx\n",
+			(unsigned long)rsdp20);
+	}
+
+	xsdt = __va(rsdp20->xsdt);
+	hdrp = &xsdt->header;
+	if (strncmp(hdrp->signature,
+		ACPI_XSDT_SIG, ACPI_XSDT_SIG_LEN)) {
+		printk("ACPI 2.0 XSDT signature incorrect. Trying RSDT\n");
+		/* RSDT parsing here */
+		return 0;
+	} else {
+		printk("ACPI 2.0 XSDT at 0x%lx (p=0x%lx)\n",
+		(unsigned long)xsdt, (unsigned long)rsdp20->xsdt);
+	}
+
+	printk("ACPI 2.0: %.6s %.8s %d.%d\n",
+		hdrp->oem_id,
+		hdrp->oem_table_id,
+		hdrp->oem_revision >> 16,
+		hdrp->oem_revision & 0xffff);
+
+#ifdef CONFIG_ACPI_KERNEL_CONFIG
+	acpi_cf_init((void *)rsdp20);
+#endif
+
+	tables =(hdrp->length -sizeof(acpi_desc_table_hdr_t))>>3;
+
+	for (i = 0; i < tables; i++) {
+		hdrp = (acpi_desc_table_hdr_t *) __va(readl_unaligned(&xsdt->entry_ptrs[i]));
+		printk("        :table %4.4s found\n", hdrp->signature);
+
+		/* Only interested int the MADT table for now ... */
+		if (strncmp(hdrp->signature,
+			ACPI_MADT_SIG, ACPI_MADT_SIG_LEN) != 0)
+			continue;
+
+		acpi20_parse_madt((acpi_madt_t *) hdrp);
+	}
+
+#ifdef CONFIG_ACPI_KERNEL_CONFIG
+	acpi_cf_terminate();
+#endif
+
+#ifdef CONFIG_SMP
+	if (available_cpus == 0) {
+		printk("ACPI: Found 0 CPUS; assuming 1\n");
+		available_cpus = 1; /* We've got at least one of these, no? */
+	}
+	smp_boot_data.cpu_count = available_cpus;
+#endif
+	return 1;
+}
+/*
+ * ACPI 1.0b with 0.71 IA64 extensions functions; should be removed once all 
+ * platforms start supporting ACPI 2.0
+ */
+
+/*
+ * Identify usable CPU's and remember them for SMP bringup later.
+ */
+static void __init
+acpi_lsapic (char *p) 
+{
+	int add = 1;
+
+	acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p;
+
+	if ((lsapic->flags & LSAPIC_PRESENT) == 0) 
+		return;
+
+	printk("      CPU %d (%.04x:%.04x): ", total_cpus, lsapic->eid, lsapic->id);
+
+	if ((lsapic->flags & LSAPIC_ENABLED) == 0) {
+		printk("Disabled.\n");
+		add = 0;
+	} else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) {
+		printk("Performance Restricted; ignoring.\n");
+		add = 0;
 	}
 
-# ifdef ACPI_DEBUG
-	printk("Legacy ISA IRQ %x -> IA64 Vector %x IOSAPIC Pin %x Active %s %s Trigger\n", 
-	       legacy->isa_irq, vector, iosapic_pin(vector), 
-	       ((iosapic_polarity(vector) == IO_SAPIC_POL_LOW) ? "Low" : "High"),
-	       ((iosapic_trigger(vector) == IO_SAPIC_LEVEL) ? "Level" : "Edge"));
-# endif /* ACPI_DEBUG */
-#endif /* CONFIG_IA64_IRQ_ACPI */
+#ifdef CONFIG_SMP
+	smp_boot_data.cpu_phys_id[total_cpus] = -1;
+#endif
+	if (add) {
+		printk("Available.\n");
+		available_cpus++;
+#ifdef CONFIG_SMP
+		smp_boot_data.cpu_phys_id[total_cpus] = (lsapic->id << 8) | lsapic->eid;
+#endif /* CONFIG_SMP */
+	}
+	total_cpus++;
 }
 
 /*
  * Info on platform interrupt sources: NMI. PMI, INIT, etc.
  */
 static void __init
-acpi_platform(char *p)
+acpi_platform (char *p)
 {
 	acpi_entry_platform_src_t *plat = (acpi_entry_platform_src_t *) p;
 
-	printk("PLATFORM: IOSAPIC %x -> Vector %lx on CPU %.04u:%.04u\n",
+	printk("PLATFORM: IOSAPIC %x -> Vector %x on CPU %.04u:%.04u\n",
 	       plat->iosapic_vector, plat->global_vector, plat->eid, plat->id);
 }
 
@@ -161,8 +372,9 @@ acpi_platform(char *p)
  * Parse the ACPI Multiple SAPIC Table
  */
 static void __init
-acpi_parse_msapic(acpi_sapic_t *msapic)
+acpi_parse_msapic (acpi_sapic_t *msapic)
 {
+	acpi_entry_iosapic_t *iosapic;
 	char *p, *end;
 
 	/* Base address of IPI Message Block */
@@ -172,41 +384,31 @@ acpi_parse_msapic(acpi_sapic_t *msapic)
 	end = p + (msapic->header.length - sizeof(acpi_sapic_t));
 
 	while (p < end) {
-		
 		switch (*p) {
-		case ACPI_ENTRY_LOCAL_SAPIC:
+		      case ACPI_ENTRY_LOCAL_SAPIC:
 			acpi_lsapic(p);
 			break;
 	
-		case ACPI_ENTRY_IO_SAPIC:
-			platform_register_iosapic((acpi_entry_iosapic_t *) p);
+		      case ACPI_ENTRY_IO_SAPIC:
+			iosapic = (acpi_entry_iosapic_t *) p;
+			if (iosapic_init)
+				iosapic_init(iosapic->address, iosapic->irq_base);
 			break;
 
-		case ACPI_ENTRY_INT_SRC_OVERRIDE:
+		      case ACPI_ENTRY_INT_SRC_OVERRIDE:
 			acpi_legacy_irq(p);
 			break;
-		
-		case ACPI_ENTRY_PLATFORM_INT_SOURCE:
+
+		      case ACPI_ENTRY_PLATFORM_INT_SOURCE:
 			acpi_platform(p);
 			break;
-		
-		default:
+
+		      default:
 			break;
 		}
 
 		/* Move to next table entry. */
-#define BAD_ACPI_TABLE
-#ifdef BAD_ACPI_TABLE
-		/*
-		 * Some prototype Lion's have a bad ACPI table
-		 * requiring this fix.  Without this fix, those
-		 * machines crash during bootup.
-		 */
-		if (p[1] == 0)
-			p = end;
-		else
-#endif
-			p += p[1];
+		p += p[1];
 	}
 
 	/* Make bootup pretty */
@@ -214,24 +416,18 @@ acpi_parse_msapic(acpi_sapic_t *msapic)
 }
 
 int __init 
-acpi_parse(acpi_rsdp_t *rsdp)
+acpi_parse (acpi_rsdp_t *rsdp)
 {
 	acpi_rsdt_t *rsdt;
 	acpi_desc_table_hdr_t *hdrp;
 	long tables, i;
 
-	if (!rsdp) {
-		printk("Uh-oh, no ACPI Root System Description Pointer table!\n");
-		return 0;
-	}
-
 	if (strncmp(rsdp->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) {
 		printk("Uh-oh, ACPI RSDP signature incorrect!\n");
 		return 0;
 	}
 
-	rsdp->rsdt = __va(rsdp->rsdt);
-	rsdt = rsdp->rsdt;
+	rsdt = __va(rsdp->rsdt);
 	if (strncmp(rsdt->header.signature, ACPI_RSDT_SIG, ACPI_RSDT_SIG_LEN)) {
 		printk("Uh-oh, ACPI RDST signature incorrect!\n");
 		return 0;
@@ -256,7 +452,7 @@ acpi_parse(acpi_rsdp_t *rsdp)
 	}
 
 #ifdef CONFIG_ACPI_KERNEL_CONFIG
-       acpi_cf_terminate();
+	acpi_cf_terminate();
 #endif
 
 #ifdef CONFIG_SMP
@@ -268,22 +464,3 @@ acpi_parse(acpi_rsdp_t *rsdp)
 #endif
 	return 1;
 }
-
-const char *
-acpi_get_sysname (void)
-{       
-	/* the following should go away once we have an ACPI parser: */
-#ifdef CONFIG_IA64_GENERIC
-	return "hpsim";
-#else
-# if defined (CONFIG_IA64_HP_SIM)
-	return "hpsim";
-# elif defined (CONFIG_IA64_SGI_SN1)
-	return "sn1";
-# elif defined (CONFIG_IA64_DIG)
-	return "dig";
-# else
-#	error Unknown platform.  Fix acpi.c.
-# endif
-#endif
-}
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 759db7f52..1ac4e04f4 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -333,6 +333,9 @@ efi_init (void)
 		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
 			efi.mps = __va(config_tables[i].table);
 			printk(" MPS=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
+			efi.acpi20 = __va(config_tables[i].table);
+			printk(" ACPI 2.0=0x%lx", config_tables[i].table);
 		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
 			efi.acpi = __va(config_tables[i].table);
 			printk(" ACPI=0x%lx", config_tables[i].table);
@@ -364,7 +367,7 @@ efi_init (void)
 #if EFI_DEBUG
 	/* print EFI memory map: */
 	{
-		efi_memory_desc_t *md = p;
+		efi_memory_desc_t *md;
 		void *p;
 
 		for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index ffb1760ea..f8c647386 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -11,6 +11,17 @@
  * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
  */
 /*
+ * ia64_switch_to now places correct virtual mapping in in TR2 for
+ * kernel stack. This allows us to handle interrupts without changing
+ * to physical mode.
+ *
+ * ar.k4 is now used to hold last virtual map address
+ * 
+ * Jonathan Nickin	<nicklin@missioncriticallinux.com>
+ * Patrick O'Rourke	<orourke@missioncriticallinux.com>
+ * 11/07/2000
+ /
+/*
  * Global (preserved) predicate usage on syscall entry/exit path:
  *
  *	pKern:		See entry.h.
@@ -27,7 +38,8 @@
 #include <asm/processor.h>
 #include <asm/unistd.h>
 #include <asm/asmmacro.h>
-
+#include <asm/pgtable.h>
+	
 #include "entry.h"
 
 	.text
@@ -98,6 +110,8 @@ GLOBAL_ENTRY(sys_clone)
 	br.ret.sptk.many rp
 END(sys_clone)
 
+#define KSTACK_TR	2
+
 /*
  * prev_task <- ia64_switch_to(struct task_struct *next)
  */
@@ -108,22 +122,55 @@ GLOBAL_ENTRY(ia64_switch_to)
 	UNW(.body)
 
 	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
-	dep r18=-1,r0,0,61	// build mask 0x1fffffffffffffff
+	mov r27=ar.k4
+	dep r20=0,in0,61,3		// physical address of "current"
+	;;
+	st8 [r22]=sp			// save kernel stack pointer of old task
+	shr.u r26=r20,_PAGE_SIZE_256M
+	;;
+	cmp.eq p7,p6=r26,r0		// check < 256M
 	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
 	;;
-	st8 [r22]=sp		// save kernel stack pointer of old task
-	ld8 sp=[r21]		// load kernel stack pointer of new task
-	and r20=in0,r18		// physical address of "current"
+	/*
+	 * If we've already mapped this task's page, we can skip doing it
+	 * again.
+	 */
+(p6)	cmp.eq p7,p6=r26,r27
+(p6)	br.cond.dpnt.few .map
+	;;
+.done:	ld8 sp=[r21]			// load kernel stack pointer of new task
+(p6)	ssm psr.ic			// if we we had to map, renable the psr.ic bit FIRST!!!
 	;;
-	mov ar.k6=r20		// copy "current" into ar.k6
-	mov r8=r13		// return pointer to previously running task
-	mov r13=in0		// set "current" pointer
+(p6)	srlz.d
+	mov ar.k6=r20			// copy "current" into ar.k6
+	mov r8=r13			// return pointer to previously running task
+	mov r13=in0			// set "current" pointer
 	;;
+(p6)	ssm psr.i			// renable psr.i AFTER the ic bit is serialized
 	DO_LOAD_SWITCH_STACK( )
+
 #ifdef CONFIG_SMP
-	sync.i			// ensure "fc"s done by this CPU are visible on other CPUs
-#endif
-	br.ret.sptk.few rp
+	sync.i				// ensure "fc"s done by this CPU are visible on other CPUs
+#endif 
+	br.ret.sptk.few rp		// boogie on out in new context
+
+.map:
+	rsm psr.i | psr.ic
+	movl r25=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX
+	;;
+	srlz.d
+	or r23=r25,r20			// construct PA | page properties
+	mov r25=_PAGE_SIZE_256M<<2
+	;;
+	mov cr.itir=r25
+	mov cr.ifa=in0			// VA of next task...
+	;;
+	mov r25=KSTACK_TR		// use tr entry #2...
+	mov ar.k4=r26			// remember last page we mapped...
+	;;
+	itr.d dtr[r25]=r23		// wire in new mapping...
+	br.cond.sptk.many .done
+	;;
 END(ia64_switch_to)
 
 #ifndef CONFIG_IA64_NEW_UNWIND
@@ -503,7 +550,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	ld4 r2=[r2]
 	;;
-	shl r2=r2,SMP_LOG_CACHE_BYTES	// can't use shladd here...
+	shl r2=r2,SMP_CACHE_SHIFT	// can't use shladd here...
 	;;
 	add r3=r2,r3
 #else
@@ -542,7 +589,7 @@ back_from_resched:
 	// check & deliver pending signals:
 (p2)	br.call.spnt.few rp=handle_signal_delivery
 .ret9:
-#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_IA64_SOFTSDV_HACKS)
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
 	// Check for lost ticks
 	rsm psr.i
 	mov r2 = ar.itc
@@ -611,14 +658,13 @@ restore_all:
 	mov ar.ccv=r1
 	mov ar.fpsr=r13
 	mov b0=r14
-	// turn off interrupts, interrupt collection, & data translation
-	rsm psr.i | psr.ic | psr.dt
+	// turn off interrupts, interrupt collection
+	rsm psr.i | psr.ic
 	;;
 	srlz.i			// EAS 2.5
 	mov b7=r15
 	;;
 	invala			// invalidate ALAT
-	dep r12=0,r12,61,3	// convert sp to physical address
 	bsw.0;;			// switch back to bank 0 (must be last in insn group)
 	;;
 #ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
@@ -757,7 +803,7 @@ END(invoke_schedule_tail)
 
 #endif /* CONFIG_SMP */
 
-#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_IA64_SOFTSDV_HACKS)
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
 
 ENTRY(invoke_ia64_reset_itm)
 	UNW(.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8))
@@ -772,7 +818,7 @@ ENTRY(invoke_ia64_reset_itm)
 	br.ret.sptk.many rp
 END(invoke_ia64_reset_itm)
 
-#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC || CONFIG_IA64_SOFTSDV_HACKS */
+#endif /* CONFIG_IA64_SOFTSDV_HACKS */
 
 	/*
 	 * Invoke do_softirq() while preserving in0-in7, which may be needed
@@ -1091,7 +1137,7 @@ sys_call_table:
 	data8 sys_setpriority
 	data8 sys_statfs
 	data8 sys_fstatfs
-	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1105
 	data8 sys_semget
 	data8 sys_semop
 	data8 sys_semctl
diff --git a/arch/ia64/kernel/fw-emu.c b/arch/ia64/kernel/fw-emu.c
index 34316fe58..e16f23426 100644
--- a/arch/ia64/kernel/fw-emu.c
+++ b/arch/ia64/kernel/fw-emu.c
@@ -402,7 +402,6 @@ sys_fw_init (const char *args, int arglen)
 	sal_systab->sal_rev_minor = 1;
 	sal_systab->sal_rev_major = 0;
 	sal_systab->entry_count = 1;
-	sal_systab->ia32_bios_present = 0;
 
 #ifdef CONFIG_IA64_GENERIC
         strcpy(sal_systab->oem_id, "Generic");
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index e6298b297..abee408f1 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -74,8 +74,8 @@ GLOBAL_ENTRY(_start)
 	;;
 
 #ifdef CONFIG_IA64_EARLY_PRINTK
-	mov r2=6
-	mov r3=(8<<8) | (28<<2)
+	mov r3=(6<<8) | (28<<2)
+	movl r2=6<<61
 	;;
 	mov rr[r2]=r3
 	;;
@@ -168,6 +168,11 @@ GLOBAL_ENTRY(ia64_save_debug_regs)
 	add r19=IA64_NUM_DBG_REGS*8,in0
 	;;
 1:	mov r16=dbr[r18]
+#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) \
+    || defined(CONFIG_ITANIUM_C0_SPECIFIC)
+	;;
+	srlz.d
+#endif
 	mov r17=ibr[r18]
 	add r18=1,r18
 	;;
@@ -181,7 +186,8 @@ END(ia64_save_debug_regs)
 
 GLOBAL_ENTRY(ia64_load_debug_regs)
 	alloc r16=ar.pfs,1,0,0,0
-#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
+#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) \
+   || defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC))
 	lfetch.nta [in0]
 #endif
 	mov r20=ar.lc			// preserve ar.lc
@@ -194,6 +200,11 @@ GLOBAL_ENTRY(ia64_load_debug_regs)
 	add r18=1,r18
 	;;
 	mov dbr[r18]=r16
+#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) \
+    || defined(CONFIG_ITANIUM_C0_SPECIFIC)
+	;;
+	srlz.d
+#endif
 	mov ibr[r18]=r17
 	br.cloop.sptk.few 1b
 	;;
@@ -754,7 +765,7 @@ GLOBAL_ENTRY(ia64_spinlock_contention)
 	mov tmp=ar.itc
 (p15)	br.cond.sptk .wait
 	;;
-	ld1 tmp=[r31]
+	ld4 tmp=[r31]
 	;;
 	cmp.ne p15,p0=tmp,r0
 	mov tmp=ar.itc
@@ -764,7 +775,7 @@ GLOBAL_ENTRY(ia64_spinlock_contention)
 	mov tmp=1
 	;;
 	IA64_SEMFIX_INSN
-	cmpxchg1.acq tmp=[r31],tmp,ar.ccv
+	cmpxchg4.acq tmp=[r31],tmp,ar.ccv
 	;;
 	cmp.eq p15,p0=tmp,r0
 
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index d3d2416cf..f831f86d9 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -24,9 +24,8 @@ EXPORT_SYMBOL(strrchr);
 EXPORT_SYMBOL(strstr);
 EXPORT_SYMBOL(strtok);
 
-#include <linux/pci.h>
-EXPORT_SYMBOL(pci_alloc_consistent);
-EXPORT_SYMBOL(pci_free_consistent);
+#include <asm/hw_irq.h>
+EXPORT_SYMBOL(isa_irq_to_vector_map);
 
 #include <linux/in6.h>
 #include <asm/checksum.h>
@@ -49,14 +48,6 @@ EXPORT_SYMBOL(disable_irq_nosync);
 #include <asm/page.h>
 EXPORT_SYMBOL(clear_page);
 
-#include <asm/pci.h>
-EXPORT_SYMBOL(pci_dma_sync_sg);
-EXPORT_SYMBOL(pci_dma_sync_single);
-EXPORT_SYMBOL(pci_map_sg);
-EXPORT_SYMBOL(pci_map_single);
-EXPORT_SYMBOL(pci_unmap_sg);
-EXPORT_SYMBOL(pci_unmap_single);
-
 #include <asm/processor.h>
 EXPORT_SYMBOL(cpu_data);
 EXPORT_SYMBOL(kernel_thread);
@@ -92,6 +83,9 @@ EXPORT_SYMBOL(__global_restore_flags);
 #include <asm/uaccess.h>
 EXPORT_SYMBOL(__copy_user);
 EXPORT_SYMBOL(__do_clear_user);
+EXPORT_SYMBOL(__strlen_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__strnlen_user);
 
 #include <asm/unistd.h>
 EXPORT_SYMBOL(__ia64_syscall);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
new file mode 100644
index 000000000..9d8408c3f
--- /dev/null
+++ b/arch/ia64/kernel/iosapic.c
@@ -0,0 +1,498 @@
+/*
+ * I/O SAPIC support.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999-2000 Hewlett-Packard Co.
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ *
+ * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O APIC code.
+ *				In particular, we now have separate handlers for edge
+ *				and level triggered interrupts.
+ * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation 
+ *				PCI to vector mapping, shared PCI interrupts.
+ * 00/10/27	D. Mosberger	Document things a bit more to make them more understandable.
+ *				Clean up much of the old IOSAPIC cruft.
+ */
+/*
+ * Here is what the interrupt logic between a PCI device and the CPU looks like:
+ *
+ * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD).  The
+ *     device is uniquely identified by its bus-, device-, and slot-number (the function
+ *     number does not matter here because all functions share the same interrupt
+ *     lines).
+ *
+ * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
+ *     Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
+ *     triggered and use the same polarity).  Each interrupt line has a unique IOSAPIC
+ *     irq number which can be calculated as the sum of the controller's base irq number
+ *     and the IOSAPIC pin number to which the line connects.
+ *
+ * (3) The IOSAPIC uses an internal table to map the IOSAPIC pin into the IA-64 interrupt
+ *     vector.  This interrupt vector is then sent to the CPU.
+ *
+ * In other words, there are two levels of indirections involved:
+ *
+ *	pci pin -> iosapic irq -> IA-64 vector
+ *
+ * Note: outside this module, IA-64 vectors are called "irqs".  This is because that's
+ * the traditional name Linux uses for interrupt vectors.
+ */
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/string.h>
+#include <linux/irq.h>
+
+#include <asm/acpi-ext.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+#ifdef	CONFIG_ACPI_KERNEL_CONFIG
+# include <asm/acpikcfg.h>
+#endif
+
+#undef DEBUG_IRQ_ROUTING
+
+static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED;
+
+/* PCI pin to IOSAPIC irq routing information.  This info typically comes from ACPI. */
+
+static struct {
+	int num_routes;
+	struct pci_vector_struct *route;
+} pci_irq;
+
+/* This tables maps IA-64 vectors to the IOSAPIC pin that generates this vector. */
+
+static struct iosapic_irq {
+	char *addr;			/* base address of IOSAPIC */
+	unsigned char base_irq;		/* first irq assigned to this IOSAPIC */
+        char pin;			/* IOSAPIC pin (-1 => not an IOSAPIC irq) */
+	unsigned char dmode 	: 3;	/* delivery mode (see iosapic.h) */
+	unsigned char polarity	: 1;	/* interrupt polarity (see iosapic.h) */
+	unsigned char trigger	: 1;	/* trigger mode (see iosapic.h) */
+} iosapic_irq[NR_IRQS];
+
+/*
+ * Translate IOSAPIC irq number to the corresponding IA-64 interrupt vector.  If no
+ * entry exists, return -1.
+ */
+static int 
+iosapic_irq_to_vector (int irq)
+{
+	int vector;
+
+	for (vector = 0; vector < NR_IRQS; ++vector)
+		if (iosapic_irq[vector].base_irq + iosapic_irq[vector].pin == irq)
+			return vector;
+	return -1;
+}
+		
+/*
+ * Map PCI pin to the corresponding IA-64 interrupt vector.  If no such mapping exists,
+ * return -1.
+ */
+static int
+pci_pin_to_vector (int bus, int slot, int pci_pin)
+{
+	struct pci_vector_struct *r;
+
+	for (r = pci_irq.route; r < pci_irq.route + pci_irq.num_routes; ++r)
+		if (r->bus == bus && (r->pci_id >> 16) == slot && r->pin == pci_pin)
+			return iosapic_irq_to_vector(r->irq);
+	return -1;
+}
+
+static void
+set_rte (unsigned int vector, unsigned long dest)
+{
+	unsigned long pol, trigger, dmode;
+	u32 low32, high32;
+	char *addr;
+	int pin;
+
+	pin = iosapic_irq[vector].pin;
+	if (pin < 0)
+		return;		/* not an IOSAPIC interrupt */
+
+	addr    = iosapic_irq[vector].addr;
+	pol     = iosapic_irq[vector].polarity;
+	trigger = iosapic_irq[vector].trigger;
+	dmode   = iosapic_irq[vector].dmode;
+
+	low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
+		 (trigger << IOSAPIC_TRIGGER_SHIFT) |
+		 (dmode << IOSAPIC_DELIVERY_SHIFT) |
+		 vector);
+
+#ifdef CONFIG_IA64_AZUSA_HACKS
+	/* set Flush Disable bit */
+	if (addr != (char *) 0xc0000000fec00000)
+		low32 |= (1 << 17);
+#endif
+
+	/* dest contains both id and eid */
+	high32 = (dest << IOSAPIC_DEST_SHIFT);	
+
+	writel(IOSAPIC_RTE_HIGH(pin), addr + IOSAPIC_REG_SELECT);
+	writel(high32, addr + IOSAPIC_WINDOW);
+	writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
+	writel(low32, addr + IOSAPIC_WINDOW);
+}
+
+static void
+nop (unsigned int vector)
+{
+	/* do nothing... */
+}
+
+static void 
+mask_irq (unsigned int vector)
+{
+	unsigned long flags;
+	char *addr;
+	u32 low32;
+	int pin;
+
+	addr = iosapic_irq[vector].addr;
+	pin = iosapic_irq[vector].pin;
+
+	if (pin < 0)
+		return;			/* not an IOSAPIC interrupt! */
+
+	spin_lock_irqsave(&iosapic_lock, flags);
+	{
+		writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
+		low32 = readl(addr + IOSAPIC_WINDOW);
+
+		low32 |= (1 << IOSAPIC_MASK_SHIFT);    /* set only the mask bit */
+		writel(low32, addr + IOSAPIC_WINDOW);
+	}
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+}
+
+static void 
+unmask_irq (unsigned int vector)
+{
+	unsigned long flags;
+	char *addr;
+	u32 low32;
+	int pin;
+
+	addr = iosapic_irq[vector].addr;
+	pin = iosapic_irq[vector].pin;
+	if (pin < 0)
+		return;			/* not an IOSAPIC interrupt! */
+
+	spin_lock_irqsave(&iosapic_lock, flags);
+	{
+		writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
+		low32 = readl(addr + IOSAPIC_WINDOW);
+
+		low32 &= ~(1 << IOSAPIC_MASK_SHIFT);    /* clear only the mask bit */
+		writel(low32, addr + IOSAPIC_WINDOW);
+	}
+	spin_unlock_irqrestore(&iosapic_lock, flags);
+}
+
+
+static void
+iosapic_set_affinity (unsigned int vector, unsigned long mask)
+{
+	printk("iosapic_set_affinity: not implemented yet\n");
+}
+
+/*
+ * Handlers for level-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_level_irq (unsigned int vector)
+{
+	unmask_irq(vector);
+	return 0;
+}
+
+static void
+iosapic_end_level_irq (unsigned int vector)
+{
+	writel(vector, iosapic_irq[vector].addr + IOSAPIC_EOI);
+}
+
+#define iosapic_shutdown_level_irq	mask_irq
+#define iosapic_enable_level_irq	unmask_irq
+#define iosapic_disable_level_irq	mask_irq
+#define iosapic_ack_level_irq		nop
+
+struct hw_interrupt_type irq_type_iosapic_level = {
+	typename:	"IO-SAPIC-level",
+	startup:	iosapic_startup_level_irq,
+	shutdown:	iosapic_shutdown_level_irq,
+	enable:		iosapic_enable_level_irq,
+	disable:	iosapic_disable_level_irq,
+	ack:		iosapic_ack_level_irq,
+	end:		iosapic_end_level_irq,
+	set_affinity:	iosapic_set_affinity
+};
+
+/*
+ * Handlers for edge-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_edge_irq (unsigned int vector)
+{
+	unmask_irq(vector);
+	/*
+	 * IOSAPIC simply drops interrupts pended while the
+	 * corresponding pin was masked, so we can't know if an
+	 * interrupt is pending already.  Let's hope not...
+	 */
+	return 0;
+}
+
+static void
+iosapic_ack_edge_irq (unsigned int vector)
+{
+	/*
+	 * Once we have recorded IRQ_PENDING already, we can mask the
+	 * interrupt for real. This prevents IRQ storms from unhandled
+	 * devices.
+	 */
+	if ((irq_desc[vector].status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
+		mask_irq(vector);
+}
+
+#define iosapic_enable_edge_irq		unmask_irq
+#define iosapic_disable_edge_irq	nop
+#define iosapic_end_edge_irq		nop
+
+struct hw_interrupt_type irq_type_iosapic_edge = {
+	typename:	"IO-SAPIC-edge",
+	startup:	iosapic_startup_edge_irq,
+	shutdown:	iosapic_disable_edge_irq,
+	enable:		iosapic_enable_edge_irq,
+	disable:	iosapic_disable_edge_irq,
+	ack:		iosapic_ack_edge_irq,
+	end:		iosapic_end_edge_irq,
+	set_affinity:	iosapic_set_affinity
+};
+
+static unsigned int
+iosapic_version (char *addr) 
+{
+	/*
+	 * IOSAPIC Version Register return 32 bit structure like:
+	 * {
+	 *	unsigned int version   : 8;
+	 *	unsigned int reserved1 : 8;
+	 *	unsigned int pins      : 8;
+	 *	unsigned int reserved2 : 8;
+	 * }
+	 */
+	writel(IOSAPIC_VERSION, addr + IOSAPIC_REG_SELECT);
+	return readl(IOSAPIC_WINDOW + addr);
+}
+
+/*
+ * ACPI calls this when it finds an entry for a legacy ISA interrupt.  Note that the
+ * irq_base and IOSAPIC address must be set in iosapic_init().
+ */
+void
+iosapic_register_legacy_irq (unsigned long irq,
+			     unsigned long pin, unsigned long polarity,
+			     unsigned long edge_triggered)
+{
+	unsigned int vector = isa_irq_to_vector(irq);
+
+#ifdef DEBUG_IRQ_ROUTING
+	printk("ISA: IRQ %u -> IOSAPIC irq 0x%02x (%s, %s) -> vector %02x\n",
+	       (unsigned) irq, (unsigned) pin,
+	       polarity ? "high" : "low", edge_triggered ? "edge" : "level",
+	       vector);
+#endif
+
+	iosapic_irq[vector].pin = pin;
+	iosapic_irq[vector].dmode = IOSAPIC_LOWEST_PRIORITY;
+	iosapic_irq[vector].polarity = polarity ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW;
+	iosapic_irq[vector].trigger = edge_triggered ? IOSAPIC_EDGE : IOSAPIC_LEVEL;
+}
+
+void __init
+iosapic_init (unsigned long phys_addr, unsigned int base_irq)
+{
+	struct hw_interrupt_type *irq_type;
+	int i, irq, max_pin, vector;
+	unsigned int ver;
+	char *addr;
+	static int first_time = 1;
+
+	if (first_time) {
+		first_time = 0;
+
+		for (vector = 0; vector < NR_IRQS; ++vector)
+			iosapic_irq[vector].pin = -1;	/* mark as unused */
+
+		/* 
+		 * Fetch the PCI interrupt routing table:
+		 */
+#ifdef CONFIG_ACPI_KERNEL_CONFIG
+		acpi_cf_get_pci_vectors(&pci_irq.route, &pci_irq.num_routes);
+#else
+		pci_irq.route =
+			(struct pci_vector_struct *) __va(ia64_boot_param.pci_vectors);
+		pci_irq.num_routes = ia64_boot_param.num_pci_vectors;
+#endif
+	}
+
+	addr = ioremap(phys_addr, 0);
+
+	ver = iosapic_version(addr);
+	max_pin = (ver >> 16) & 0xff;
+	
+	printk("IOSAPIC: version %x.%x, address 0x%lx, IRQs 0x%02x-0x%02x\n", 
+	       (ver & 0xf0) >> 4, (ver & 0x0f), phys_addr, base_irq, base_irq + max_pin);
+
+	if (base_irq == 0)
+		/*
+		 * Map the legacy ISA devices into the IOSAPIC data.  Some of these may
+		 * get reprogrammed later on with data from the ACPI Interrupt Source
+		 * Override table.
+		 */
+		for (irq = 0; irq < 16; ++irq) {
+			vector = isa_irq_to_vector(irq);
+			iosapic_irq[vector].addr = addr;
+			iosapic_irq[vector].base_irq = 0;
+			if (iosapic_irq[vector].pin == -1)
+				iosapic_irq[vector].pin = irq;
+			iosapic_irq[vector].dmode = IOSAPIC_LOWEST_PRIORITY;
+			iosapic_irq[vector].trigger  = IOSAPIC_EDGE;
+			iosapic_irq[vector].polarity = IOSAPIC_POL_HIGH;
+#ifdef DEBUG_IRQ_ROUTING
+			printk("ISA: IRQ %u -> IOSAPIC irq 0x%02x (high, edge) -> vector 0x%02x\n",
+			       irq, iosapic_irq[vector].base_irq + iosapic_irq[vector].pin,
+			       vector);
+#endif
+		  	irq_type = &irq_type_iosapic_edge;
+			if (irq_desc[vector].handler != irq_type) {
+				if (irq_desc[vector].handler != &no_irq_type)
+					printk("iosapic_init: changing vector 0x%02x from %s to "
+					       "%s\n", irq, irq_desc[vector].handler->typename,
+					       irq_type->typename);
+				irq_desc[vector].handler = irq_type;
+			}
+
+			/* program the IOSAPIC routing table: */
+			set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
+		}
+
+#ifndef CONFIG_IA64_SOFTSDV_HACKS
+	for (i = 0; i < pci_irq.num_routes; i++) {
+		irq = pci_irq.route[i].irq;
+
+		if ((unsigned) (irq - base_irq) > max_pin)
+			/* the interrupt route is for another controller... */
+			continue;
+
+		if (irq < 16)
+			vector = isa_irq_to_vector(irq);
+		else {
+			vector = iosapic_irq_to_vector(irq);
+			if (vector < 0)
+				/* new iosapic irq: allocate a vector for it */
+				vector = ia64_alloc_irq();
+		}
+
+		iosapic_irq[vector].addr     = addr;
+		iosapic_irq[vector].base_irq = base_irq;
+		iosapic_irq[vector].pin	     = (irq - base_irq);
+		iosapic_irq[vector].dmode    = IOSAPIC_LOWEST_PRIORITY;
+		iosapic_irq[vector].trigger  = IOSAPIC_LEVEL;
+		iosapic_irq[vector].polarity = IOSAPIC_POL_LOW;
+
+# ifdef DEBUG_IRQ_ROUTING
+		printk("PCI: (B%d,I%d,P%d) -> IOSAPIC irq 0x%02x -> vector 0x%02x\n",
+		       pci_irq.route[i].bus, pci_irq.route[i].pci_id>>16, pci_irq.route[i].pin,
+		       iosapic_irq[vector].base_irq + iosapic_irq[vector].pin, vector);
+# endif
+		irq_type = &irq_type_iosapic_level;
+		if (irq_desc[vector].handler != irq_type){
+			if (irq_desc[vector].handler != &no_irq_type)
+				printk("iosapic_init: changing vector 0x%02x from %s to %s\n",
+				       vector, irq_desc[vector].handler->typename,
+				       irq_type->typename);
+			irq_desc[vector].handler = irq_type;
+		}
+
+		/* program the IOSAPIC routing table: */
+		set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
+	}
+#endif /* !CONFIG_IA64_SOFTSDV_HACKS */
+}
+
+void
+iosapic_pci_fixup (int phase)
+{
+	struct	pci_dev	*dev;
+	unsigned char pin;
+	int vector;
+
+	if (phase != 1)
+		return;
+
+	pci_for_each_dev(dev) {
+		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+		if (pin) {
+			pin--;          /* interrupt pins are numbered starting from 1 */
+			vector = pci_pin_to_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+			if (vector < 0 && dev->bus->parent) {
+				/* go back to the bridge */
+				struct pci_dev *bridge = dev->bus->self;
+
+				if (bridge) {
+					/* allow for multiple bridges on an adapter */
+					do {
+						/* do the bridge swizzle... */
+						pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+						vector = pci_pin_to_vector(bridge->bus->number,
+									   PCI_SLOT(bridge->devfn),
+									   pin);
+					} while (vector < 0 && (bridge = bridge->bus->self));
+				}
+				if (vector >= 0)
+					printk(KERN_WARNING
+					       "PCI: using PPB(B%d,I%d,P%d) to get vector %02x\n",
+					       bridge->bus->number, PCI_SLOT(bridge->devfn),
+					       pin, vector);
+				else
+					printk(KERN_WARNING
+					       "PCI: Couldn't map irq for (B%d,I%d,P%d)o\n",
+					       bridge->bus->number, PCI_SLOT(bridge->devfn),
+					       pin);
+			}
+			if (vector >= 0) {
+				printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> 0x%02x\n",
+				       dev->bus->number, PCI_SLOT(dev->devfn), pin, vector);
+				dev->irq = vector;
+			}
+		}
+		/*
+		 * Nothing to fixup
+		 * Fix out-of-range IRQ numbers
+		 */
+		if (dev->irq >= NR_IRQS)
+			dev->irq = 15;	/* Spurious interrupts */
+	}
+}
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index b3646e275..ab8961a54 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -541,6 +541,18 @@ void enable_irq(unsigned int irq)
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
 
+void do_IRQ_per_cpu(unsigned long irq, struct pt_regs *regs)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	int cpu = smp_processor_id();
+
+	kstat.irqs[cpu][irq]++;
+
+	desc->handler->ack(irq);
+	handle_IRQ_event(irq, regs, desc->action);
+	desc->handler->end(irq);
+}
+
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
@@ -581,8 +593,7 @@ unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs)
 	if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
 		action = desc->action;
 		status &= ~IRQ_PENDING; /* we commit to handling */
-		if (!(status & IRQ_PER_CPU))
-			status |= IRQ_INPROGRESS; /* we are handling it */
+		status |= IRQ_INPROGRESS; /* we are handling it */
 	}
 	desc->status = status;
 
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 2166e205f..155ee66b7 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -7,6 +7,9 @@
  *
  *  6/10/99: Updated to bring in sync with x86 version to facilitate
  *	     support for SMP and different interrupt controllers.
+ *
+ * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
+ *                      PCI to vector allocation routine.
  */
 
 #include <linux/config.h>
@@ -35,38 +38,28 @@
 
 #define IRQ_DEBUG	0
 
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-spinlock_t ivr_read_lock;
-#endif
-
 /* default base addr of IPI table */
 unsigned long ipi_base_addr = (__IA64_UNCACHED_OFFSET | IPI_DEFAULT_BASE_ADDR);	
 
 /*
- * Legacy IRQ to IA-64 vector translation table.  Any vector not in
- * this table maps to itself (ie: irq 0x30 => IA64 vector 0x30)
+ * Legacy IRQ to IA-64 vector translation table.
  */
 __u8 isa_irq_to_vector_map[16] = {
 	/* 8259 IRQ translation, first 16 entries */
-	0x60, 0x50, 0x10, 0x51, 0x52, 0x53, 0x43, 0x54,
-	0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x40, 0x41
+	0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
+	0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
 };
 
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-
-int usbfix;
-
-static int __init
-usbfix_option (char *str)
+int
+ia64_alloc_irq (void)
 {
-	printk("irq: enabling USB workaround\n");
-	usbfix = 1;
-	return 1;
-}
+	static int next_irq = FIRST_DEVICE_IRQ;
 
-__setup("usbfix", usbfix_option);
-
-#endif /* CONFIG_ITANIUM_A1_SPECIFIC */
+	if (next_irq > LAST_DEVICE_IRQ)
+		/* XXX could look for sharable vectors instead of panic'ing... */
+		panic("ia64_alloc_irq: out of interrupt vectors!");
+	return next_irq++;
+}
 
 /*
  * That's where the IVT branches when we get an external
@@ -77,42 +70,6 @@ void
 ia64_handle_irq (unsigned long vector, struct pt_regs *regs)
 {
 	unsigned long saved_tpr;
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-	unsigned long eoi_ptr;
- 
-# ifdef CONFIG_USB
-	extern void reenable_usb (void);
-	extern void disable_usb (void);
-
-	if (usbfix)
-		disable_usb();
-# endif
-	/*
-	 * Stop IPIs by getting the ivr_read_lock
-	 */
-	spin_lock(&ivr_read_lock);
-	{
-		unsigned int tmp;
-		/*
-		 * Disable PCI writes
-		 */
-		outl(0x80ff81c0, 0xcf8);
-		tmp = inl(0xcfc);
-		outl(tmp | 0x400, 0xcfc);
-		eoi_ptr = inl(0xcfc);
-		vector = ia64_get_ivr();
-		/*
-		 * Enable PCI writes
-		 */
-		outl(tmp, 0xcfc);
-	}
-	spin_unlock(&ivr_read_lock);
-
-# ifdef CONFIG_USB
-	if (usbfix)
-		reenable_usb();
-# endif
-#endif /* CONFIG_ITANIUM_A1_SPECIFIC */
 
 #if IRQ_DEBUG
 	{
@@ -161,7 +118,10 @@ ia64_handle_irq (unsigned long vector, struct pt_regs *regs)
 		ia64_set_tpr(vector);
 		ia64_srlz_d();
 
-		do_IRQ(vector, regs);
+		if ((irq_desc[vector].status & IRQ_PER_CPU) != 0)
+			do_IRQ_per_cpu(vector, regs);
+		else
+			do_IRQ(vector, regs);
 
 		/*
 		 * Disable interrupts and send EOI:
@@ -169,9 +129,6 @@ ia64_handle_irq (unsigned long vector, struct pt_regs *regs)
 		local_irq_disable();
 		ia64_set_tpr(saved_tpr);
 		ia64_eoi();
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-		break;
-#endif
 		vector = ia64_get_ivr();
 	} while (vector != IA64_SPURIOUS_INT);
 }
@@ -194,8 +151,8 @@ init_IRQ (void)
 	 * Disable all local interrupts
 	 */
 	ia64_set_itv(0, 1);
-	ia64_set_lrr0(0, 1);	
-	ia64_set_lrr1(0, 1);	
+	ia64_set_lrr0(0, 1);
+	ia64_set_lrr1(0, 1);
 
 	irq_desc[IA64_SPURIOUS_INT].handler = &irq_type_ia64_sapic;
 #ifdef CONFIG_SMP
@@ -217,14 +174,11 @@ init_IRQ (void)
 }
 
 void
-ipi_send (int cpu, int vector, int delivery_mode, int redirect)
+ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
 {
 	unsigned long ipi_addr;
 	unsigned long ipi_data;
 	unsigned long phys_cpu_id;
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-	unsigned long flags;
-#endif
 
 #ifdef CONFIG_SMP
 	phys_cpu_id = cpu_physical_id(cpu);
@@ -239,13 +193,5 @@ ipi_send (int cpu, int vector, int delivery_mode, int redirect)
 	ipi_data = (delivery_mode << 8) | (vector & 0xff);
 	ipi_addr = ipi_base_addr | (phys_cpu_id << 4) | ((redirect & 1)  << 3);
 
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-	spin_lock_irqsave(&ivr_read_lock, flags);
-#endif
-
 	writeq(ipi_data, ipi_addr);
-
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-	spin_unlock_irqrestore(&ivr_read_lock, flags);
-#endif
 }
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index fa0ad0993..b75cd9dbc 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -6,6 +6,7 @@
  * Copyright (C) 1998-2000 David Mosberger <davidm@hpl.hp.com>
  *
  * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
  */
 /*
  * This file defines the interrupt vector table used by the CPU.
@@ -44,23 +45,13 @@
 #include <asm/system.h>
 #include <asm/unistd.h>
 
-#define MINSTATE_START_SAVE_MIN	/* no special action needed */
-#define MINSTATE_END_SAVE_MIN									\
-	or r2=r2,r14;		/* make first base a kernel virtual address */			\
-	or r12=r12,r14;		/* make sp a kernel virtual address */				\
-	or r13=r13,r14;		/* make `current' a kernel virtual address */			\
-	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
-	;;
-
+#define MINSTATE_VIRT	/* needed by minstate.h */
 #include "minstate.h"
 
 #define FAULT(n)									\
-	rsm psr.dt;			/* avoid nested faults due to TLB misses... */	\
-	;;										\
-	srlz.d;				/* ensure everyone knows psr.dt is off... */	\
 	mov r31=pr;									\
 	mov r19=n;;			/* prepare to save predicates */		\
-	br.cond.sptk.many dispatch_to_fault_handler
+	br.sptk.many dispatch_to_fault_handler
 
 /*
  * As we don't (hopefully) use the space available, we need to fill it with
@@ -122,15 +113,14 @@ ia64_ivt:
 (p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
 	srlz.d					// ensure "rsm psr.dt" has taken effect
 (p6)	movl r19=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
-(p6)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
-(p7)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
 	;;
 (p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
 (p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
 	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
 	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
 	;;
-(p6)	cmp.eq p7,p6=-1,r21			// unused address bits all ones?
 	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
 	;;
 (p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
@@ -145,7 +135,7 @@ ia64_ivt:
 (p7)	ld8 r18=[r21]				// read the L3 PTE
 	mov r19=cr.isr				// cr.isr bit 0 tells us if this is an insn miss
 	;;
-(p7)	tbit.z p6,p7=r18,0			// page present bit cleared?
+(p7)	tbit.z p6,p7=r18,_PAGE_P_BIT		// page present bit cleared?
 	mov r22=cr.iha				// get the VHPT address that caused the TLB miss
 	;;					// avoid RAW on p7
 (p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
@@ -153,7 +143,7 @@ ia64_ivt:
 	;;
 (p10)	itc.i r18				// insert the instruction TLB entry
 (p11)	itc.d r18				// insert the data TLB entry
-(p6)	br.spnt.few page_fault			// handle bad address/page not present (page fault)
+(p6)	br.spnt.many page_fault			// handle bad address/page not present (page fault)
 	mov cr.ifa=r22
 
 	// Now compute and insert the TLB entry for the virtual page table.
@@ -183,212 +173,117 @@ ia64_ivt:
 
 	mov pr=r31,-1				// restore predicate registers
 	rfi
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x0400 Entry 1 (size 64 bundles) ITLB (21)
 	/*
-	 * The ITLB basically does the same as the VHPT handler except
-	 * that we always insert exactly one instruction TLB entry.
-	 */
-	/*
-	 * Attempt to lookup PTE through virtual linear page table.
-	 * The speculative access will fail if there is no TLB entry
-	 * for the L3 page table page we're trying to access.
+	 * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+	 * page table.  If a nested TLB miss occurs, we switch into physical
+	 * mode, walk the page table, and then re-execute the L3 PTE read
+	 * and go on normally after that.
 	 */
+itlb_fault:
 	mov r16=cr.ifa				// get virtual address
-	mov r19=cr.iha				// get virtual address of L3 PTE
-	;;
-	ld8.s r17=[r19]				// try to read L3 PTE
+	mov r29=b0				// save b0
 	mov r31=pr				// save predicates
+	mov r17=cr.iha				// get virtual address of L3 PTE
+	movl r30=1f				// load nested fault continuation point
 	;;
-	tnat.nz p6,p0=r17			// did read succeed?
-(p6)	br.cond.spnt.many 1f
+1:	ld8 r18=[r17]				// read L3 PTE
 	;;
-	itc.i r17
+	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
+(p6)	br.cond.spnt.many page_fault
+	;;
+	itc.i r18
 	;;
 #ifdef CONFIG_SMP
-	ld8.s r18=[r19]				// try to read L3 PTE again and see if same
+	ld8 r19=[r17]				// read L3 PTE again and see if same
 	mov r20=PAGE_SHIFT<<2			// setup page size for purge
 	;;
-	cmp.eq p6,p7=r17,r18
+	cmp.ne p7,p0=r18,r19
 	;;
 (p7)	ptc.l r16,r20
 #endif
 	mov pr=r31,-1
 	rfi
-
-#ifdef CONFIG_DISABLE_VHPT
-itlb_fault:
-#endif
-1:	rsm psr.dt				// use physical addressing for data
-	mov r19=ar.k7				// get page table base address
-	shl r21=r16,3				// shift bit 60 into sign bit
-	shr.u r17=r16,61			// get the region number into r17
 	;;
-	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
-	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of the faulting address
-	;;
-(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
-	srlz.d					// ensure "rsm psr.dt" has taken effect
-(p6)	movl r19=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
-(p6)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
-(p7)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
-	;;
-(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
-(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
-	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
-	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
-	;;
-(p6)	cmp.eq p7,p6=-1,r21			// unused address bits all ones?
-	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
-	;;
-(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
-	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
-	;;
-(p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
-	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
-	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
-	;;
-(p7)	ld8 r18=[r17]				// read the L3 PTE
-	;;
-(p7)	tbit.z p6,p7=r18,0			// page present bit cleared?
-	;;
-(p7)	itc.i r18				// insert the instruction TLB entry
-(p6)	br.spnt.few page_fault			// handle bad address/page not present (page fault)
-	;;
-#ifdef CONFIG_SMP
-	ld8 r19=[r17]				// re-read the PTE and check if same
-	;;
-	cmp.eq p6,p7=r18,r19
-	mov r20=PAGE_SHIFT<<2
-	;;
-(p7)	ptc.l r16,r20				// PTE changed purge translation
-#endif
-
-	mov pr=r31,-1				// restore predicate registers
-	rfi
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
 	/*
-	 * The DTLB basically does the same as the VHPT handler except
-	 * that we always insert exactly one data TLB entry.
-	 */
-	/*
-	 * Attempt to lookup PTE through virtual linear page table.
-	 * The speculative access will fail if there is no TLB entry
-	 * for the L3 page table page we're trying to access.
+	 * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+	 * page table.  If a nested TLB miss occurs, we switch into physical
+	 * mode, walk the page table, and then re-execute the L3 PTE read
+	 * and go on normally after that.
 	 */
+dtlb_fault:
 	mov r16=cr.ifa				// get virtual address
-	mov r19=cr.iha				// get virtual address of L3 PTE
-	;;
-	ld8.s r17=[r19]				// try to read L3 PTE
+	mov r29=b0				// save b0
 	mov r31=pr				// save predicates
+	mov r17=cr.iha				// get virtual address of L3 PTE
+	movl r30=1f				// load nested fault continuation point
+	;;
+1:	ld8 r18=[r17]				// read L3 PTE
 	;;
-	tnat.nz p6,p0=r17			// did read succeed?
-(p6)	br.cond.spnt.many 1f
+	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
+(p6)	br.cond.spnt.many page_fault
 	;;
-	itc.d r17
+	itc.d r18
 	;;
 #ifdef CONFIG_SMP
-	ld8.s r18=[r19]				// try to read L3 PTE again and see if same
+	ld8 r19=[r17]				// read L3 PTE again and see if same
 	mov r20=PAGE_SHIFT<<2			// setup page size for purge
 	;;
-	cmp.eq p6,p7=r17,r18
+	cmp.ne p7,p0=r18,r19
 	;;
 (p7)	ptc.l r16,r20
 #endif
 	mov pr=r31,-1
 	rfi
-
-#ifdef CONFIG_DISABLE_VHPT
-dtlb_fault:
-#endif
-1:	rsm psr.dt				// use physical addressing for data
-	mov r19=ar.k7				// get page table base address
-	shl r21=r16,3				// shift bit 60 into sign bit
-	shr.u r17=r16,61			// get the region number into r17
-	;;
-	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
-	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of the faulting address
 	;;
-(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
-	srlz.d					// ensure "rsm psr.dt" has taken effect
-(p6)	movl r19=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
-(p6)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
-(p7)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
-	;;
-(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
-(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
-	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
-	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
-	;;
-(p6)	cmp.eq p7,p6=-1,r21			// unused address bits all ones?
-	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
-	;;
-(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
-	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
-	;;
-(p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
-	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
-	;;
-(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
-	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
-	;;
-(p7)	ld8 r18=[r17]				// read the L3 PTE
-	;;
-(p7)	tbit.z p6,p7=r18,0			// page present bit cleared?
-	;;
-(p7)	itc.d r18				// insert the instruction TLB entry
-(p6)	br.spnt.few page_fault			// handle bad address/page not present (page fault)
-	;;
-#ifdef CONFIG_SMP
-	ld8 r19=[r17]				// re-read the PTE and check if same
-	;;
-	cmp.eq p6,p7=r18,r19
-	mov r20=PAGE_SHIFT<<2
-	;;
-(p7)	ptc.l r16,r20				// PTE changed purge translation
-#endif
-	mov pr=r31,-1				// restore predicate registers
-	rfi
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
 	mov r16=cr.ifa		// get address that caused the TLB miss
-#ifdef CONFIG_DISABLE_VHPT
+	movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX
+	mov r21=cr.ipsr
 	mov r31=pr
 	;;
-	shr.u r21=r16,61			// get the region number into r21
+#ifdef CONFIG_DISABLE_VHPT
+	shr.u r22=r16,61			// get the region number into r21
 	;;
-	cmp.gt p6,p0=6,r21			// user mode 
-(p6)	br.cond.dptk.many itlb_fault
+	cmp.gt p8,p0=6,r22			// user mode 
 	;;
-	mov pr=r31,-1
-#endif
-	movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RX
+(p8)	thash r17=r16
 	;;
+(p8)	mov cr.iha=r17
+(p8)	br.cond.dptk.many itlb_fault
+#endif
+	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
 	shr.u r18=r16,57	// move address bit 61 to bit 4
-	dep r16=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS)	// clear ed & reserved bits
+	dep r19=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS)	// clear ed & reserved bits
 	;;
 	andcm r18=0x10,r18	// bit 4=~address-bit(61)
-	dep r16=r17,r16,0,12	// insert PTE control bits into r16
+	cmp.ne p8,p0=r0,r23	// psr.cpl != 0?
+	dep r19=r17,r19,0,12	// insert PTE control bits into r19
 	;;
-	or r16=r16,r18		// set bit 4 (uncached) if the access was to region 6
+	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
+(p8)	br.cond.spnt.many page_fault
 	;;
-	itc.i r16		// insert the TLB entry
+	itc.i r19		// insert the TLB entry
+	mov pr=r31,-1
 	rfi
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
 	mov r16=cr.ifa		// get address that caused the TLB miss
-	movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW
+	movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX
 	mov r20=cr.isr
 	mov r21=cr.ipsr
 	mov r31=pr
@@ -396,29 +291,40 @@ dtlb_fault:
 #ifdef CONFIG_DISABLE_VHPT
 	shr.u r22=r16,61			// get the region number into r21
 	;;
-	cmp.gt p8,p0=6,r22			// user mode
+	cmp.gt p8,p0=6,r22			// access to region 0-5
+	;;
+(p8)	thash r17=r16
+	;;
+(p8)	mov cr.iha=r17
 (p8)	br.cond.dptk.many dtlb_fault
 #endif
+	extr.u r23=r21,IA64_PSR_CPL0_BIT,2	// extract psr.cpl
 	tbit.nz p6,p7=r20,IA64_ISR_SP_BIT	// is speculation bit on?
 	shr.u r18=r16,57	// move address bit 61 to bit 4
-	dep r16=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits
+	dep r19=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits
 	;;
-	dep r21=-1,r21,IA64_PSR_ED_BIT,1
 	andcm r18=0x10,r18	// bit 4=~address-bit(61)
-	dep r16=r17,r16,0,12	// insert PTE control bits into r16
+	cmp.ne p8,p0=r0,r23
+(p8)	br.cond.spnt.many page_fault
+
+	dep r21=-1,r21,IA64_PSR_ED_BIT,1
+	dep r19=r17,r19,0,12	// insert PTE control bits into r19
 	;;
-	or r16=r16,r18		// set bit 4 (uncached) if the access was to region 6
+	or r19=r19,r18		// set bit 4 (uncached) if the access was to region 6
 (p6)	mov cr.ipsr=r21
 	;;
-(p7)	itc.d r16		// insert the TLB entry
+(p7)	itc.d r19		// insert the TLB entry
 	mov pr=r31,-1
 	rfi
-
 	;;
 
 	//-----------------------------------------------------------------------------------
-	// call do_page_fault (predicates are in r31, psr.dt is off, r16 is faulting address)
+	// call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
 page_fault:
+	ssm psr.dt
+	;;
+	srlz.i
+	;;
 	SAVE_MIN_WITH_COVER
 	//
 	// Copy control registers to temporary registers, then turn on psr bits,
@@ -430,7 +336,7 @@ page_fault:
 	mov r9=cr.isr
 	adds r3=8,r2				// set up second base pointer
 	;;
-	ssm psr.ic | psr.dt
+	ssm psr.ic
 	;;
 	srlz.i					// guarantee that interrupt collection is enabled
 	;;
@@ -445,36 +351,37 @@ page_fault:
 	mov rp=r14
 	;;
 	adds out2=16,r12			// out2 = pointer to pt_regs
-	br.call.sptk.few b6=ia64_do_page_fault	// ignore return address
+	br.call.sptk.many b6=ia64_do_page_fault	// ignore return address
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
 	//
-	// In the absence of kernel bugs, we get here when the Dirty-bit, Instruction
-	// Access-bit, or Data Access-bit faults cause a nested fault because the
-	// dTLB entry for the virtual page table isn't present.  In such a case,
-	// we lookup the pte for the faulting address by walking the page table
-	// and return to the continuation point passed in register r30.
-	// In accessing the page tables, we don't need to check for NULL entries
-	// because if the page tables didn't map the faulting address, it would not
-	// be possible to receive one of the above faults.
+	// In the absence of kernel bugs, we get here when the virtually mapped linear page
+	// table is accessed non-speculatively (e.g.,  in the Dirty-bit, Instruction
+	// Access-bit, or Data Access-bit faults).  If the DTLB entry for the virtual page
+	// table is missing, a nested TLB miss fault is triggered and control is transferred
+	// to this point.  When this happens, we lookup the pte for the faulting address
+	// by walking the page table in physical mode and return to the continuation point
+	// passed in register r30 (or call page_fault if the address is not mapped).
 	//
 	// Input:	r16:	faulting address
 	//		r29:	saved b0
 	//		r30:	continuation address
+	//		r31:	saved pr
 	//
 	// Output:	r17:	physical address of L3 PTE of faulting address
 	//		r29:	saved b0
 	//		r30:	continuation address
+	//		r31:	saved pr
 	//
-	// Clobbered:	b0, r18, r19, r21, r31, psr.dt (cleared)
+	// Clobbered:	b0, r18, r19, r21, psr.dt (cleared)
 	//
 	rsm psr.dt				// switch to using physical data addressing
 	mov r19=ar.k7				// get the page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
 	;;
-	mov r31=pr				// save the predicate registers
 	shr.u r17=r16,61			// get the region number into r17
 	;;
 	cmp.eq p6,p7=5,r17			// is faulting address in region 5?
@@ -482,26 +389,30 @@ page_fault:
 	;;
 (p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
 	srlz.d
-(p6)	movl r17=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
-(p6)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
-(p7)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+(p6)	movl r19=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
+(p6)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7)	shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
 	;;
-(p6)	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
+(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
 (p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
 	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
 	;;
-	ld8 r17=[r17]				// fetch the L1 entry
+	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
 	mov b0=r30
 	;;
+(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
 	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
 	;;
-	ld8 r17=[r17]				// fetch the L2 entry
+(p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
 	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
 	;;
+(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
 	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
 	;;
-	mov pr=r31,-1				// restore predicates
-	br.cond.sptk.few b0			// return to continuation point
+(p6)	br.cond.spnt.many page_fault
+	br.sptk.many b0				// return to continuation point
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -526,33 +437,19 @@ page_fault:
 	// a nested TLB miss hit where we look up the physical address of the L3 PTE
 	// and then continue at label 1 below.
 	//
-#ifndef CONFIG_SMP
 	mov r16=cr.ifa				// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
 	thash r17=r16				// compute virtual address of L3 PTE
 	mov r29=b0				// save b0 in case of nested fault
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	or r18=_PAGE_D,r18			// set the dirty bit
-	mov b0=r29				// restore b0
-	;;
-	st8 [r17]=r18				// store back updated PTE
-	itc.d r18				// install updated PTE
-#else
-	mov r16=cr.ifa				// get the address that caused the fault
-	movl r30=1f				// load continuation point in case of nested fault
-	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	mov r31=pr				// save pr
+#ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
-	mov r29=b0				// save b0 in case of nested fault
-	mov r27=pr
 	;;
 1:	ld8 r18=[r17]
 	;;					// avoid RAW on r18
 	mov ar.ccv=r18				// set compare value for cmpxchg
-	or r25=_PAGE_D,r18			// set the dirty bit
+	or r25=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
 	;;
 	cmpxchg8.acq r26=[r17],r25,ar.ccv
 	mov r24=PAGE_SHIFT<<2
@@ -568,70 +465,46 @@ page_fault:
 (p7)	ptc.l r16,r24
 	mov b0=r29				// restore b0
 	mov ar.ccv=r28
-	mov pr=r27,-1
+#else
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	or r18=_PAGE_D|_PAGE_A,r18		// set the dirty and accessed bits
+	mov b0=r29				// restore b0
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	itc.d r18				// install updated PTE
 #endif
+	mov pr=r31,-1				// restore pr
 	rfi
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
 	// Like Entry 8, except for instruction access
 	mov r16=cr.ifa				// get the address that caused the fault
+	movl r30=1f				// load continuation point in case of nested fault
+	mov r31=pr				// save predicates
 #ifdef CONFIG_ITANIUM
 	/*
-	 * Erratum 10 (IFA may contain incorrect address) now has
-	 * "NoFix" status.  There are no plans for fixing this.
+	 * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
 	 */
 	mov r17=cr.ipsr
-	mov r31=pr				// save predicates
 	;;
 	mov r18=cr.iip
 	tbit.z p6,p0=r17,IA64_PSR_IS_BIT	// IA64 instruction set?
 	;;
 (p6)	mov r16=r18				// if so, use cr.iip instead of cr.ifa
-	mov pr=r31,-1
 #endif /* CONFIG_ITANIUM */
-
-#ifndef CONFIG_SMP
-	movl r30=1f				// load continuation point in case of nested fault
 	;;
 	thash r17=r16				// compute virtual address of L3 PTE
 	mov r29=b0				// save b0 in case of nested fault)
-	;;
-1:	ld8 r18=[r17]
-#if defined(CONFIG_IA32_SUPPORT) && \
-    (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC))
-	//
-	// Erratum 85 (Access bit fault could be reported before page not present fault)
-	//   If the PTE is indicates the page is not present, then just turn this into a
-	//   page fault.
-	//
-	mov r31=pr				// save predicates
-	;;
-	tbit.nz p6,p0=r18,0			// page present bit set?
-(p6)	br.cond.sptk 1f
-	;;					// avoid WAW on p6
-	mov pr=r31,-1
-	br.cond.sptk page_fault			// page wasn't present
-1:	mov pr=r31,-1
-#else
-	;;					// avoid RAW on r18
-#endif
-	or r18=_PAGE_A,r18			// set the accessed bit
-	mov b0=r29				// restore b0
-	;;
-	st8 [r17]=r18				// store back updated PTE
-	itc.i r18				// install updated PTE
-#else
-	movl r30=1f				// load continuation point in case of nested fault
-	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+#ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
-	mov r29=b0				// save b0 in case of nested fault)
-	mov r27=pr
 	;;
 1:	ld8 r18=[r17]
-#if defined(CONFIG_IA32_SUPPORT) && \
+# if defined(CONFIG_IA32_SUPPORT) && \
     (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC))
 	//
 	// Erratum 85 (Access bit fault could be reported before page not present fault)
@@ -639,15 +512,9 @@ page_fault:
 	//   page fault.
 	//
 	;;
-	tbit.nz p6,p0=r18,0			// page present bit set?
-(p6)	br.cond.sptk 1f
-	;;					// avoid WAW on p6
-	mov pr=r27,-1
-	br.cond.sptk page_fault			// page wasn't present
-1:	
-#else
-	;;					// avoid RAW on r18
-#endif
+	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
+(p6)	br.sptk page_fault			// page wasn't present
+# endif
 	mov ar.ccv=r18				// set compare value for cmpxchg
 	or r25=_PAGE_A,r18			// set the accessed bit
 	;;
@@ -665,36 +532,42 @@ page_fault:
 (p7)	ptc.l r16,r24
 	mov b0=r29				// restore b0
 	mov ar.ccv=r28
-	mov pr=r27,-1
-#endif
+#else /* !CONFIG_SMP */
+	;;
+1:	ld8 r18=[r17]
+	;;
+# if defined(CONFIG_IA32_SUPPORT) && \
+    (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC))
+	//
+	// Erratum 85 (Access bit fault could be reported before page not present fault)
+	//   If the PTE is indicates the page is not present, then just turn this into a
+	//   page fault.
+	//
+	tbit.z p6,p0=r18,_PAGE_P_BIT		// page present bit cleared?
+(p6)	br.sptk page_fault			// page wasn't present
+# endif
+	or r18=_PAGE_A,r18			// set the accessed bit
+	mov b0=r29				// restore b0
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	itc.i r18				// install updated PTE
+#endif /* !CONFIG_SMP */
+	mov pr=r31,-1
 	rfi
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
 	// Like Entry 8, except for data access
-#ifndef CONFIG_SMP
 	mov r16=cr.ifa				// get the address that caused the fault
 	movl r30=1f				// load continuation point in case of nested fault
 	;;
 	thash r17=r16				// compute virtual address of L3 PTE
+	mov r31=pr
 	mov r29=b0				// save b0 in case of nested fault)
-	;;
-1:	ld8 r18=[r17]
-	;;					// avoid RAW on r18
-	or r18=_PAGE_A,r18			// set the accessed bit
-	mov b0=r29				// restore b0
-	;;
-	st8 [r17]=r18				// store back updated PTE
-	itc.d r18				// install updated PTE
-#else
-	mov r16=cr.ifa				// get the address that caused the fault
-	movl r30=1f				// load continuation point in case of nested fault
-	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+#ifdef CONFIG_SMP
 	mov r28=ar.ccv				// save ar.ccv
-	mov r29=b0				// save b0 in case of nested fault
-	mov r27=pr
 	;;
 1:	ld8 r18=[r17]
 	;;					// avoid RAW on r18
@@ -713,11 +586,20 @@ page_fault:
 	cmp.eq p6,p7=r18,r25			// is it same as the newly installed
 	;;
 (p7)	ptc.l r16,r24
-	mov b0=r29				// restore b0
 	mov ar.ccv=r28
-	mov pr=r27,-1
+#else
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	or r18=_PAGE_A,r18			// set the accessed bit
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	itc.d r18				// install updated PTE
 #endif
+	mov b0=r29				// restore b0
+	mov pr=r31,-1
 	rfi
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -725,16 +607,14 @@ page_fault:
 	mov r16=cr.iim
 	mov r17=__IA64_BREAK_SYSCALL
 	mov r31=pr		// prepare to save predicates
-	rsm psr.dt		// avoid nested faults due to TLB misses...
 	;;
-	srlz.d			// ensure everyone knows psr.dt is off...
 	cmp.eq p0,p7=r16,r17	// is this a system call? (p7 <- false, if so)
 (p7)	br.cond.spnt.many non_syscall
 
 	SAVE_MIN				// uses r31; defines r2:
 
-	// turn interrupt collection and data translation back on:
-	ssm psr.ic | psr.dt
+	// turn interrupt collection back on:
+	ssm psr.ic
 	;;
 	srlz.i					// guarantee that interrupt collection is enabled
 	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
@@ -746,14 +626,13 @@ page_fault:
 	adds r3=8,r2		// set up second base pointer for SAVE_REST
 	;;
 	SAVE_REST
-	;;			// avoid WAW on r2 & r3
+	br.call.sptk rp=demine_args		// clear NaT bits in (potential) syscall args
 
 	mov r3=255
 	adds r15=-1024,r15			// r15 contains the syscall number---subtract 1024
 	adds r2=IA64_TASK_PTRACE_OFFSET,r13	// r2 = &current->ptrace
-
 	;;
-	cmp.geu.unc p6,p7=r3,r15		// (syscall > 0 && syscall <= 1024+255) ?
+	cmp.geu p6,p7=r3,r15		// (syscall > 0 && syscall <= 1024+255) ?
 	movl r16=sys_call_table
 	;;
 (p6)	shladd r16=r15,3,r16
@@ -788,40 +667,61 @@ page_fault:
 	;;
 	st8 [r16]=r18				// store new value for cr.isr
 
-(p8)	br.call.sptk.few b6=b6			// ignore this return addr 
-	br.call.sptk.few rp=ia64_trace_syscall	// rp will be overwritten (ignored)
+(p8)	br.call.sptk.many b6=b6			// ignore this return addr 
+	br.call.sptk.many rp=ia64_trace_syscall	// rp will be overwritten (ignored)
 	// NOT REACHED
 
+	.proc demine_args
+demine_args:
+	alloc r2=ar.pfs,8,0,0,0
+	tnat.nz p8,p0=in0
+	tnat.nz p9,p0=in1
+	;;
+(p8)	mov in0=-1
+	tnat.nz p10,p0=in2
+	tnat.nz p11,p0=in3
+
+(p9)	mov in1=-1
+	tnat.nz p12,p0=in4
+	tnat.nz p13,p0=in5
+	;;
+(p10)	mov in2=-1
+	tnat.nz p14,p0=in6
+	tnat.nz p15,p0=in7
+
+(p11)	mov in3=-1
+(p12)	mov in4=-1
+(p13)	mov in5=-1
+	;;
+(p14)	mov in6=-1
+(p15)	mov in7=-1
+	br.ret.sptk.many rp
+	.endp demine_args
+
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-	rsm psr.dt		// avoid nested faults due to TLB misses...
-	;;
-	srlz.d			// ensure everyone knows psr.dt is off...
 	mov r31=pr		// prepare to save predicates
 	;;
 
 	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
-	ssm psr.ic | psr.dt	// turn interrupt collection and data translation back on
+	ssm psr.ic		// turn interrupt collection
 	;;
 	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic and psr.dt are back on
+	srlz.i			// ensure everybody knows psr.ic is back on
 	;;
 	SAVE_REST
 	;;
 	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-	mov out0=r0		// defer reading of cr.ivr to handle_irq...
-#else
 	mov out0=cr.ivr		// pass cr.ivr as first arg
-#endif
 	add out1=16,sp		// pass pointer to pt_regs as second arg
 	;;
 	srlz.d			// make  sure we see the effect of cr.ivr
 	movl r14=ia64_leave_kernel
 	;;
 	mov rp=r14
-	br.call.sptk.few b6=ia64_handle_irq
+	br.call.sptk.many b6=ia64_handle_irq
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -855,7 +755,7 @@ dispatch_illegal_op_fault:
 	// The "alloc" can cause a mandatory store which could lead to
 	// an "Alt DTLB" fault which we can handle only if psr.ic is on.
 	//
-	ssm psr.ic | psr.dt
+	ssm psr.ic
 	;;
 	srlz.i		// guarantee that interrupt collection is enabled
 	;;
@@ -867,7 +767,7 @@ dispatch_illegal_op_fault:
 	;;
 	SAVE_REST
 	;;
-	br.call.sptk.few rp=ia64_illegal_op_fault
+	br.call.sptk.many rp=ia64_illegal_op_fault
 .ret0:	;;
 	alloc r14=ar.pfs,0,0,3,0	// must be first in insn group
 	mov out0=r9
@@ -881,6 +781,7 @@ dispatch_illegal_op_fault:
 	cmp.ne p6,p0=0,r8
 (p6)	br.call.dpnt b6=b6		// call returns to ia64_leave_kernel
 	br.sptk ia64_leave_kernel
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -900,7 +801,7 @@ dispatch_to_ia32_handler:
 	SAVE_MIN
 	;;
 	mov r14=cr.isr
-	ssm psr.ic | psr.dt
+	ssm psr.ic
 	;;
 	srlz.i					// guarantee that interrupt collection is enabled
 	;;
@@ -913,7 +814,7 @@ dispatch_to_ia32_handler:
 	shr r14=r14,16          // Get interrupt number
 	;; 
 	cmp.ne p6,p0=r14,r15
-(p6)    br.call.dpnt.few b6=non_ia32_syscall
+(p6)    br.call.dpnt.many b6=non_ia32_syscall
 
 	adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp	// 16 byte hole per SW conventions
 	adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
@@ -924,7 +825,7 @@ dispatch_to_ia32_handler:
 	alloc r15=ar.pfs,0,0,6,0	// must first in an insn group
 	;; 
 	ld4 r8=[r14],8          // r8 == EAX (syscall number)
-	mov r15=190		// sys_vfork - last implemented system call
+	mov r15=222		// sys_vfork - last implemented system call
 	;;
 	cmp.leu.unc p6,p7=r8,r15
 	ld4 out1=[r14],8        // r9 == ecx
@@ -961,11 +862,12 @@ non_ia32_syscall:
 	mov out0=r14                            // interrupt #
 	add out1=16,sp                          // pointer to pt_regs
 	;;			// avoid WAW on CFM
-	br.call.sptk.few rp=ia32_bad_interrupt
+	br.call.sptk.many rp=ia32_bad_interrupt
 .ret1:	movl r15=ia64_leave_kernel
 	;;
 	mov rp=r15
 	br.ret.sptk.many rp
+	;;
 
 #endif /* CONFIG_IA32_SUPPORT */
 
@@ -985,8 +887,8 @@ non_syscall:
 	mov r8=cr.iim			// get break immediate (must be done while psr.ic is off)
 	adds r3=8,r2			// set up second base pointer for SAVE_REST
 
-	// turn interrupt collection and data translation back on:
-	ssm psr.ic | psr.dt
+	// turn interrupt collection back on:
+	ssm psr.ic
 	;;
 	srlz.i				// guarantee that interrupt collection is enabled
 	;;
@@ -1000,7 +902,8 @@ non_syscall:
 	SAVE_REST
 	mov rp=r15
 	;;
-	br.call.sptk.few b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
+	br.call.sptk.many b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1023,7 +926,7 @@ dispatch_unaligned_handler:
 	// wouldn't get the state to recover.
 	//
 	mov r15=cr.ifa
-	ssm psr.ic | psr.dt
+	ssm psr.ic
 	;;
 	srlz.i					// guarantee that interrupt collection is enabled
 	;;
@@ -1039,7 +942,8 @@ dispatch_unaligned_handler:
 	adds out1=16,sp				// out1 = pointer to pt_regs
 	;;
 	mov rp=r14
-	br.sptk.few ia64_prepare_handle_unaligned
+	br.sptk.many ia64_prepare_handle_unaligned
+	;;
 
 	.align 1024
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1055,7 +959,6 @@ dispatch_to_fault_handler:
 	//
 	// Input:
 	//	psr.ic:	off
-	//	psr.dt:	off
 	//	r19:	fault vector number (e.g., 24 for General Exception)
 	//	r31:	contains saved predicates (pr)
 	//
@@ -1071,7 +974,7 @@ dispatch_to_fault_handler:
 	mov r10=cr.iim
 	mov r11=cr.itir
 	;;
-	ssm psr.ic | psr.dt
+	ssm psr.ic
 	;;
 	srlz.i					// guarantee that interrupt collection is enabled
 	;;
@@ -1089,7 +992,9 @@ dispatch_to_fault_handler:
 	movl r14=ia64_leave_kernel
 	;;
 	mov rp=r14
-	br.call.sptk.few b6=ia64_fault
+	br.call.sptk.many b6=ia64_fault
+	;;
+
 //
 // --- End of long entries, Beginning of short entries
 //
@@ -1099,16 +1004,16 @@ dispatch_to_fault_handler:
 // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
 	mov r16=cr.ifa
 	rsm psr.dt
-#if 1
-	// If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h
+	// The Linux page fault handler doesn't expect non-present pages to be in
+	// the TLB.  Flush the existing entry now, so we meet that expectation.
 	mov r17=_PAGE_SIZE_4K<<2
 	;;
 	ptc.l r16,r17
-#endif
 	;;
 	mov r31=pr
 	srlz.d
-	br.cond.sptk.many page_fault
+	br.sptk.many page_fault
+	;;
 
 	.align 256
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1118,7 +1023,8 @@ dispatch_to_fault_handler:
 	mov r31=pr
 	;;
 	srlz.d
-	br.cond.sptk.many page_fault
+	br.sptk.many page_fault
+	;;
 
 	.align 256
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1128,7 +1034,8 @@ dispatch_to_fault_handler:
 	mov r31=pr
 	;;
 	srlz.d
-	br.cond.sptk.many page_fault
+	br.sptk.many page_fault
+	;;
 
 	.align 256
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1138,31 +1045,32 @@ dispatch_to_fault_handler:
 	mov r31=pr
 	;;
 	srlz.d
-	br.cond.sptk.many page_fault
+	br.sptk.many page_fault
+	;;
 
 	.align 256
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
 	mov r16=cr.isr
 	mov r31=pr
-	rsm psr.dt		// avoid nested faults due to TLB misses...
 	;;
-	srlz.d			// ensure everyone knows psr.dt is off...
 	cmp4.eq p6,p0=0,r16
 (p6)	br.sptk dispatch_illegal_op_fault
 	;;
 	mov r19=24		// fault number
-	br.cond.sptk.many dispatch_to_fault_handler
+	br.sptk.many dispatch_to_fault_handler
+	;;
 
 	.align 256
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-	rsm psr.dt | psr.dfh			// ensure we can access fph
+	rsm psr.dfh		// ensure we can access fph
 	;;
 	srlz.d
 	mov r31=pr
 	mov r19=25
-	br.cond.sptk.many dispatch_to_fault_handler
+	br.sptk.many dispatch_to_fault_handler
+	;;
 
 	.align 256
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1204,6 +1112,7 @@ dispatch_to_fault_handler:
 	;;
 
 	rfi				// and go back
+	;;
 
 	.align 256
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1218,12 +1127,11 @@ dispatch_to_fault_handler:
 	.align 256
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-	rsm psr.dt		// avoid nested faults due to TLB misses...
 	mov r16=cr.ipsr
 	mov r31=pr		// prepare to save predicates
 	;;									
-	srlz.d			// ensure everyone knows psr.dt is off
-	br.cond.sptk.many dispatch_unaligned_handler
+	br.sptk.many dispatch_unaligned_handler
+	;;
 
 	.align 256
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -1304,9 +1212,6 @@ dispatch_to_fault_handler:
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
 #ifdef	CONFIG_IA32_SUPPORT
-	rsm psr.dt
-	;;
-	srlz.d
 	mov r31=pr
 	mov r16=cr.isr
 	;;
@@ -1325,7 +1230,7 @@ dispatch_to_fault_handler:
 	;;
 	mov pr=r31,-1		// restore predicate registers
 	rfi
-
+	;;
 1:
 #endif	// CONFIG_IA32_SUPPORT
 	FAULT(46)
@@ -1334,11 +1239,9 @@ dispatch_to_fault_handler:
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
 #ifdef CONFIG_IA32_SUPPORT
-	rsm psr.dt
-	;;
-	srlz.d
 	mov r31=pr
-	br.cond.sptk.many dispatch_to_ia32_handler
+	br.sptk.many dispatch_to_ia32_handler
+	;;
 #else
 	FAULT(47)
 #endif
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 2afb5613e..df19a8d6f 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -1,11 +1,13 @@
 #include <linux/config.h>
+
+#ifdef CONFIG_IA64_GENERIC
+
 #include <linux/kernel.h>
+#include <linux/string.h>
 
 #include <asm/page.h>
 #include <asm/machvec.h>
 
-#ifdef CONFIG_IA64_GENERIC
-
 struct ia64_machine_vector ia64_mv;
 
 /*
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 333258d35..1456b8d96 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -19,6 +19,7 @@
 #include <linux/irq.h>
 #include <linux/smp_lock.h>
 
+#include <asm/machvec.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
@@ -26,7 +27,6 @@
 #include <asm/mca.h>
 
 #include <asm/irq.h>
-#include <asm/machvec.h>
 
  
 typedef struct ia64_fptr {
@@ -365,7 +365,7 @@ ia64_mca_wakeup_ipi_wait(void)
 void
 ia64_mca_wakeup(int cpu)
 {
-	ipi_send(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT, 0);
+	platform_send_ipi(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT, 0);
 	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
 	
 }
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 15993525d..b148d8b9c 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -3,11 +3,11 @@
 //
 // Mods by cfleck to integrate into kernel build
 // 00/03/15 davidm Added various stop bits to get a clean compile
-// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp kstack,
-//		   switch modes, jump to C INIT handler
+//
+// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp
+//		   kstack, switch modes, jump to C INIT handler
 //
 #include <linux/config.h>
-
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mca_asm.h>
@@ -17,14 +17,7 @@
  * When we get an machine check, the kernel stack pointer is no longer
  * valid, so we need to set a new stack pointer.
  */
-#define MINSTATE_START_SAVE_MIN							\
-(pKern) movl sp=ia64_init_stack+IA64_STK_OFFSET-IA64_PT_REGS_SIZE;		\
-	;;
-
-#define MINSTATE_END_SAVE_MIN							\
-	or r12=r12,r14;		/* make sp a kernel virtual address */		\
-	or r13=r13,r14;		/* make `current' a kernel virtual address */	\
-	;;
+#define	MINSTATE_PHYS	/* Make sure stack access is physical for MINSTATE */ 
 
 #include "minstate.h"
 	
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index 8790d49c3..2ea6f1791 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -20,6 +20,72 @@
 #define rR1		r20
 
 /*
+ * Here start the source dependent macros.
+ */
+
+/*
+ * For ivt.s we want to access the stack virtually so we dont have to disable translation
+ * on interrupts.
+ */
+#define MINSTATE_START_SAVE_MIN_VIRT								\
+	dep r1=-1,r1,61,3;				/* r1 = current (virtual) */		\
+(p7)	mov ar.rsc=r0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+	;;											\
+(p7)	addl rKRBS=IA64_RBS_OFFSET,r1;			/* compute base of RBS */		\
+(p7)	mov rARRNAT=ar.rnat;									\
+(pKern) mov r1=sp;					/* get sp  */				\
+	;;											\
+(p7)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(p7)	mov rARBSPSTORE=ar.bspstore;			/* save ar.bspstore */			\
+	;;											\
+(pKern) addl r1=-IA64_PT_REGS_SIZE,r1;			/* if in kernel mode, use sp (r12) */	\
+(p7)	mov ar.bspstore=rKRBS;				/* switch to kernel RBS */		\
+	;;											\
+(p7)	mov r18=ar.bsp;										\
+(p7)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+
+#define MINSTATE_END_SAVE_MIN_VIRT								\
+	or r13=r13,r14;		/* make `current' a kernel virtual address */			\
+	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
+	;;
+
+/*
+ * For mca_asm.S we want to access the stack physically since the state is saved before we
+ * go virtual and dont want to destroy the iip or ipsr.
+ */
+#define MINSTATE_START_SAVE_MIN_PHYS								\
+(pKern) movl sp=ia64_init_stack+IA64_STK_OFFSET-IA64_PT_REGS_SIZE;				\
+(p7)	mov ar.rsc=r0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	\
+(p7)	addl rKRBS=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	\
+	;;											\
+(p7)	mov rARRNAT=ar.rnat;									\
+(pKern) dep r1=0,sp,61,3;				/* compute physical addr of sp	*/	\
+(p7)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	\
+(p7)	mov rARBSPSTORE=ar.bspstore;			/* save ar.bspstore */			\
+(p7)	dep rKRBS=-1,rKRBS,61,3;			/* compute kernel virtual addr of RBS */\
+	;;											\
+(pKern) addl r1=-IA64_PT_REGS_SIZE,r1;		/* if in kernel mode, use sp (r12) */		\
+(p7)	mov ar.bspstore=rKRBS;			/* switch to kernel RBS */			\
+	;;											\
+(p7)	mov r18=ar.bsp;										\
+(p7)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		\
+
+#define MINSTATE_END_SAVE_MIN_PHYS								\
+	or r12=r12,r14;		/* make sp a kernel virtual address */				\
+	or r13=r13,r14;		/* make `current' a kernel virtual address */			\
+	;;
+
+#ifdef MINSTATE_VIRT
+# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_VIRT
+# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_VIRT
+#endif
+
+#ifdef MINSTATE_PHYS
+# define MINSTATE_START_SAVE_MIN	MINSTATE_START_SAVE_MIN_PHYS
+# define MINSTATE_END_SAVE_MIN		MINSTATE_END_SAVE_MIN_PHYS
+#endif
+
+/*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
  * on.
@@ -31,7 +97,6 @@
  *
  * Upon exit, the state is as follows:
  *	psr.ic: off
- *	psr.dt: off
  *	r2 = points to &pt_regs.r16
  *	r12 = kernel sp (kernel virtual address)
  *	r13 = points to current task_struct (kernel virtual address)
@@ -50,7 +115,7 @@
 	mov rCRIPSR=cr.ipsr;									  \
 	mov rB6=b6;		/* rB6 = branch reg 6 */					  \
 	mov rCRIIP=cr.iip;									  \
-	mov r1=ar.k6;		/* r1 = current */						  \
+	mov r1=ar.k6;		/* r1 = current (physical) */					  \
 	;;											  \
 	invala;											  \
 	extr.u r16=rCRIPSR,32,2;		/* extract psr.cpl */				  \
@@ -58,25 +123,11 @@
 	cmp.eq pKern,p7=r0,r16;			/* are we in kernel mode already? (psr.cpl==0) */ \
 	/* switch from user to kernel RBS: */							  \
 	COVER;											  \
-	;; 									                  \
+	;;											  \
 	MINSTATE_START_SAVE_MIN									  \
-(p7)	mov ar.rsc=r0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	  \
-(p7)	addl rKRBS=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	  \
-	;;											  \
-(p7)	mov rARRNAT=ar.rnat;									  \
-(pKern)	dep r1=0,sp,61,3;				/* compute physical addr of sp  */	  \
-(p7)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	  \
-(p7)	mov rARBSPSTORE=ar.bspstore;			/* save ar.bspstore */			  \
-(p7)	dep rKRBS=-1,rKRBS,61,3;			/* compute kernel virtual addr of RBS */  \
-	;;											  \
-(pKern)	addl r1=-IA64_PT_REGS_SIZE,r1;		/* if in kernel mode, use sp (r12) */		  \
-(p7)	mov ar.bspstore=rKRBS;			/* switch to kernel RBS */			  \
-	;;											  \
-(p7)	mov r18=ar.bsp;										  \
-(p7)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		  \
-												  \
-	mov r16=r1;		/* initialize first base pointer */				  \
-	adds r17=8,r1;		/* initialize second base pointer */				  \
+	;;											  \
+	mov r16=r1;					/* initialize first base pointer */	  \
+	adds r17=8,r1;					/* initialize second base pointer */	  \
 	;;											  \
 	st8 [r16]=rCRIPSR,16;	/* save cr.ipsr */						  \
 	st8 [r17]=rCRIIP,16;	/* save cr.iip */						  \
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
index 2e56a428e..fc14cc31c 100644
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@@ -52,10 +52,9 @@ END(ia64_pal_default_handler)
 /*
  * Make a PAL call using the static calling convention.
  *
- * in0         Pointer to struct ia64_pal_retval
- * in1         Index of PAL service
- * in2 - in4   Remaining PAL arguments
- * in5	       1 ==> clear psr.ic,  0 ==> don't clear psr.ic
+ * in0         Index of PAL service
+ * in1 - in3   Remaining PAL arguments
+ * in4	       1 ==> clear psr.ic,  0 ==> don't clear psr.ic
  *
  */
 GLOBAL_ENTRY(ia64_pal_call_static)
@@ -69,7 +68,7 @@ GLOBAL_ENTRY(ia64_pal_call_static)
 	}
 	;;
 	ld8 loc2 = [loc2]		// loc2 <- entry point
-	tbit.nz p6,p7 = in5, 0
+	tbit.nz p6,p7 = in4, 0
 	adds r8 = 1f-1b,r8
 	;;
 	mov loc3 = psr
diff --git a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c
index 2d814b443..37dbf811a 100644
--- a/arch/ia64/kernel/pci.c
+++ b/arch/ia64/kernel/pci.c
@@ -1,10 +1,8 @@
 /*
- * pci.c - Low-Level PCI Access in IA64
+ * pci.c - Low-Level PCI Access in IA-64
  * 
  * Derived from bios32.c of i386 tree.
- *
  */
-
 #include <linux/config.h>
 
 #include <linux/types.h>
@@ -44,19 +42,16 @@
  * This interrupt-safe spinlock protects all accesses to PCI
  * configuration space.
  */
-
 spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
 
-struct pci_fixup pcibios_fixups[] = { { 0 } };
-
-#define PCI_NO_CHECKS		0x400
-#define PCI_NO_PEER_FIXUP	0x800
-
-static unsigned int pci_probe = PCI_NO_CHECKS;
+struct pci_fixup pcibios_fixups[] = {
+	{ 0 }
+};
 
 /* Macro to build a PCI configuration address to be passed as a parameter to SAL. */
 
-#define PCI_CONFIG_ADDRESS(dev, where) (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff))
+#define PCI_CONFIG_ADDRESS(dev, where) \
+	(((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff))
 
 static int 
 pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value)
@@ -109,8 +104,7 @@ pci_conf_write_config_dword (struct pci_dev *dev, int where, u32 value)
 	return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 4, value);
 }
 
-
-static struct pci_ops pci_conf = {
+struct pci_ops pci_conf = {
       pci_conf_read_config_byte,
       pci_conf_read_config_word,
       pci_conf_read_config_dword,
@@ -120,36 +114,21 @@ static struct pci_ops pci_conf = {
 };
 
 /*
- * Try to find PCI BIOS.  This will always work for IA64.
- */
-
-static struct pci_ops * __init
-pci_find_bios(void)
-{
-	return &pci_conf;
-}
-
-/*
  * Initialization. Uses the SAL interface
  */
-
-#define PCI_BUSES_TO_SCAN 255
-
 void __init 
-pcibios_init(void)
+pcibios_init (void)
 {
-	struct pci_ops *ops = NULL;
+#	define PCI_BUSES_TO_SCAN 255
 	int i;
 
-	if ((ops = pci_find_bios()) == NULL) {
-		printk("PCI: No PCI bus detected\n");
-		return;
-	}
+	platform_pci_fixup(0);	/* phase 0 initialization (before PCI bus has been scanned) */
 
 	printk("PCI: Probing PCI hardware\n");
 	for (i = 0; i < PCI_BUSES_TO_SCAN; i++) 
-		pci_scan_bus(i, ops, NULL);
-	platform_pci_fixup();
+		pci_scan_bus(i, &pci_conf, NULL);
+
+	platform_pci_fixup(1);	/* phase 1 initialization (after PCI bus has been scanned) */
 	return;
 }
 
@@ -157,16 +136,15 @@ pcibios_init(void)
  *  Called after each bus is probed, but before its children
  *  are examined.
  */
-
 void __init
-pcibios_fixup_bus(struct pci_bus *b)
+pcibios_fixup_bus (struct pci_bus *b)
 {
 	return;
 }
 
 void __init
-pcibios_update_resource(struct pci_dev *dev, struct resource *root,
-			struct resource *res, int resource)
+pcibios_update_resource (struct pci_dev *dev, struct resource *root,
+			 struct resource *res, int resource)
 {
         unsigned long where, size;
         u32 reg;
@@ -181,7 +159,7 @@ pcibios_update_resource(struct pci_dev *dev, struct resource *root,
 }
 
 void __init
-pcibios_update_irq(struct pci_dev *dev, int irq)
+pcibios_update_irq (struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 
@@ -204,18 +182,16 @@ pcibios_enable_device (struct pci_dev *dev)
 	return 0;
 }
 
+void
+pcibios_align_resource (void *data, struct resource *res, unsigned long size)
+{
+}
+
 /*
  * PCI BIOS setup, always defaults to SAL interface
  */
-
 char * __init 
-pcibios_setup(char *str)
+pcibios_setup (char *str)
 {
-	pci_probe =  PCI_NO_CHECKS;
 	return NULL;
 }
-
-void
-pcibios_align_resource (void *data, struct resource *res, unsigned long size)
-{
-}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index e5efbc8b5..4c7ba4295 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -4,18 +4,20 @@
  *
  * Originaly Written by Ganesh Venkitachalam, IBM Corp.
  * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
+ * Modifications by Stephane Eranian, Hewlett-Packard Co.
  * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
  * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
  */
 
 #include <linux/config.h>
+
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/smp_lock.h>
 #include <linux/proc_fs.h>
-#include <linux/ptrace.h>
 
 #include <asm/errno.h>
 #include <asm/hw_irq.h>
@@ -58,19 +60,51 @@
 #define MAX_PERF_COUNTER	4	/* true for Itanium, at least */
 #define PMU_FIRST_COUNTER	4	/* first generic counter */
 
-#define WRITE_PMCS_AND_START	0xa0
-#define WRITE_PMCS		0xa1
-#define READ_PMDS		0xa2
-#define STOP_PMCS		0xa3
+#define PFM_WRITE_PMCS		0xa0
+#define PFM_WRITE_PMDS		0xa1
+#define PFM_READ_PMDS		0xa2
+#define PFM_STOP		0xa3
+#define PFM_START		0xa4
+#define PFM_ENABLE		0xa5	/* unfreeze only */
+#define PFM_DISABLE		0xa6	/* freeze only */
+/* 
+ * Those 2 are just meant for debugging. I considered using sysctl() for
+ * that but it is a little bit too pervasive. This solution is at least
+ * self-contained.
+ */
+#define PFM_DEBUG_ON		0xe0	
+#define PFM_DEBUG_OFF		0xe1
+
+#ifdef CONFIG_SMP
+#define cpu_is_online(i) (cpu_online_map & (1UL << i))
+#else
+#define cpu_is_online(i)	1
+#endif
 
+#define PMC_IS_IMPL(i)		(pmu_conf.impl_regs[i>>6] & (1<< (i&~(64-1))))
+#define PMD_IS_IMPL(i)  	(pmu_conf.impl_regs[4+(i>>6)] & (1<< (i&~(64-1))))
+#define PMD_IS_COUNTER(i)	(i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters))
+#define PMC_IS_COUNTER(i)	(i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters))
 
 /*
  * this structure needs to be enhanced
  */
 typedef struct {
+	unsigned long	pfr_reg_num;	/* which register */
+	unsigned long	pfr_reg_value;	/* configuration (PMC) or initial value (PMD) */
+	unsigned long	pfr_reg_reset;	/* reset value on overflow (PMD) */
+	void		*pfr_smpl_buf;	/* pointer to user buffer for EAR/BTB */
+	unsigned long	pfr_smpl_size;	/* size of user buffer for EAR/BTB */
+	pid_t		pfr_notify_pid;	/* process to notify */
+	int		pfr_notify_sig;	/* signal for notification, 0=no notification */
+} perfmon_req_t;
+
+#if 0
+typedef struct {
 	unsigned long pmu_reg_data;	/* generic PMD register */
 	unsigned long pmu_reg_num;	/* which register number */
 } perfmon_reg_t; 
+#endif
 
 /*
  * This structure is initialize at boot time and contains
@@ -78,86 +112,141 @@ typedef struct {
  * by PAL
  */
 typedef struct {
-	unsigned long perf_ovfl_val;	/* overflow value for generic counters */
-	unsigned long max_pmc;		/* highest PMC */
-	unsigned long max_pmd;		/* highest PMD */
-	unsigned long max_counters;	/* number of generic counter pairs (PMC/PMD) */
+	unsigned long perf_ovfl_val;	/* overflow value for generic counters   */
+	unsigned long max_counters;	/* upper limit on counter pair (PMC/PMD) */
+	unsigned long impl_regs[16];	/* buffer used to hold implememted PMC/PMD mask */
 } pmu_config_t;
 
-/* XXX will go static when ptrace() is cleaned */
-unsigned long perf_ovfl_val;	/* overflow value for generic counters */
-
 static pmu_config_t pmu_conf;
 
+/* for debug only */
+static unsigned long pfm_debug=1;	/* 0= nodebug, >0= debug output on */
+#define DBprintk(a)	{\
+	if (pfm_debug >0) { printk a; } \
+}
+
 /*
- * could optimize to avoid cache conflicts in SMP
+ * could optimize to avoid cache line conflicts in SMP
  */
-unsigned long pmds[NR_CPUS][MAX_PERF_COUNTER];
+static struct task_struct *pmu_owners[NR_CPUS];
 
-asmlinkage unsigned long
-sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6, long arg7, long arg8, long stack)
+static int
+do_perfmonctl (struct task_struct *task, int cmd, int flags, perfmon_req_t *req, int count, struct pt_regs *regs)
 {
-	struct pt_regs *regs = (struct pt_regs *) &stack;
-        perfmon_reg_t tmp, *cptr = ptr;
-        unsigned long cnum;
+        perfmon_req_t tmp;
         int i;
 
         switch (cmd) {
-	      case WRITE_PMCS:           /* Writes to PMC's and clears PMDs */
-	      case WRITE_PMCS_AND_START: /* Also starts counting */
+		case PFM_WRITE_PMCS:          
+			/* we don't quite support this right now */
+			if (task != current) return -EINVAL;
+
+			if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
+
+			for (i = 0; i < count; i++, req++) {
+				copy_from_user(&tmp, req, sizeof(tmp));
+
+				/* XXX needs to check validity of the data maybe */
+
+				if (!PMC_IS_IMPL(tmp.pfr_reg_num)) {
+					DBprintk((__FUNCTION__ " invalid pmc[%ld]\n", tmp.pfr_reg_num));
+					return -EINVAL;
+				}
+
+				/* XXX: for counters, need to some checks */
+				if (PMC_IS_COUNTER(tmp.pfr_reg_num)) {
+					current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].sig = tmp.pfr_notify_sig;
+					current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].pid = tmp.pfr_notify_pid;
+
+					DBprintk((__FUNCTION__" setting PMC[%ld] send sig %d to %d\n",tmp.pfr_reg_num, tmp.pfr_notify_sig, tmp.pfr_notify_pid));
+				}
+				ia64_set_pmc(tmp.pfr_reg_num, tmp.pfr_reg_value);
+
+				DBprintk((__FUNCTION__" setting PMC[%ld]=0x%lx\n", tmp.pfr_reg_num, tmp.pfr_reg_value));
+			}
+			/*
+			 * we have to set this here event hough we haven't necessarily started monitoring
+			 * because we may be context switched out
+			 */
+			current->thread.flags |= IA64_THREAD_PM_VALID;
+                	break;
+
+		case PFM_WRITE_PMDS:
+			/* we don't quite support this right now */
+			if (task != current) return -EINVAL;
+
+			if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
+
+			for (i = 0; i < count; i++, req++) {
+				copy_from_user(&tmp, req, sizeof(tmp));
+
+				if (!PMD_IS_IMPL(tmp.pfr_reg_num)) return -EINVAL;
+
+				/* update virtualized (64bits) counter */
+				if (PMD_IS_COUNTER(tmp.pfr_reg_num)) {
+					current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].val  = tmp.pfr_reg_value & ~pmu_conf.perf_ovfl_val;
+					current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].rval = tmp.pfr_reg_reset;
+				}
+				/* writes to unimplemented part is ignored, so this is safe */
+				ia64_set_pmd(tmp.pfr_reg_num, tmp.pfr_reg_value);
+				/* to go away */
+				ia64_srlz_d();
+				DBprintk((__FUNCTION__" setting PMD[%ld]:  pmod.val=0x%lx pmd=0x%lx rval=0x%lx\n", tmp.pfr_reg_num, current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].val, ia64_get_pmd(tmp.pfr_reg_num),current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].rval));
+			}
+			/*
+			 * we have to set this here event hough we haven't necessarily started monitoring
+			 * because we may be context switched out
+			 */
+			current->thread.flags |= IA64_THREAD_PM_VALID;
+                	break;
+
+		case PFM_START:
+			/* we don't quite support this right now */
+			if (task != current) return -EINVAL;
+
+			pmu_owners[smp_processor_id()] = current;
 
-		if (!access_ok(VERIFY_READ, cptr, sizeof(struct perfmon_reg_t)*count))
-			return -EFAULT;
-
-		for (i = 0; i < count; i++, cptr++) {
-
-			copy_from_user(&tmp, cptr, sizeof(tmp));
-
-			/* XXX need to check validity of pmu_reg_num and perhaps data!! */
-
-			if (tmp.pmu_reg_num > pmu_conf.max_pmc || tmp.pmu_reg_num == 0) return -EFAULT;
+			/* will start monitoring right after rfi */
+			ia64_psr(regs)->up = 1;
 
-			ia64_set_pmc(tmp.pmu_reg_num, tmp.pmu_reg_data);
+			/* 
+		 	 * mark the state as valid.
+		 	 * this will trigger save/restore at context switch
+		 	 */
+			current->thread.flags |= IA64_THREAD_PM_VALID;
 
-			/* to go away */
-			if (tmp.pmu_reg_num >= PMU_FIRST_COUNTER && tmp.pmu_reg_num < PMU_FIRST_COUNTER+pmu_conf.max_counters) {
-				ia64_set_pmd(tmp.pmu_reg_num, 0);
-				pmds[smp_processor_id()][tmp.pmu_reg_num - PMU_FIRST_COUNTER] = 0;
+			ia64_set_pmc(0, 0);
 
-				printk(__FUNCTION__" setting PMC/PMD[%ld] es=0x%lx pmd[%ld]=%lx\n", tmp.pmu_reg_num, (tmp.pmu_reg_data>>8) & 0x7f, tmp.pmu_reg_num, ia64_get_pmd(tmp.pmu_reg_num));
-			} else
-				printk(__FUNCTION__" setting PMC[%ld]=0x%lx\n", tmp.pmu_reg_num, tmp.pmu_reg_data);
-		}
+                	break;
 
-		if (cmd == WRITE_PMCS_AND_START) {
-#if 0
-/* irrelevant with user monitors */
-			local_irq_save(flags);
+		case PFM_ENABLE:
+			/* we don't quite support this right now */
+			if (task != current) return -EINVAL;
 
-			dcr = ia64_get_dcr();
-			dcr |= IA64_DCR_PP;
-			ia64_set_dcr(dcr);
+			pmu_owners[smp_processor_id()] = current;
 
-			local_irq_restore(flags);
-#endif
+			/* 
+		 	 * mark the state as valid.
+		 	 * this will trigger save/restore at context switch
+		 	 */
+			current->thread.flags |= IA64_THREAD_PM_VALID;
 
+			/* simply unfreeze */
 			ia64_set_pmc(0, 0);
+			break;
 
-			/* will start monitoring right after rfi */
-			ia64_psr(regs)->up = 1;
-		}
-		/* 
-		 * mark the state as valid.
-		 * this will trigger save/restore at context switch
-		 */
-		current->thread.flags |= IA64_THREAD_PM_VALID;
-                break;
+		case PFM_DISABLE:
+			/* we don't quite support this right now */
+			if (task != current) return -EINVAL;
+
+			/* simply unfreeze */
+			ia64_set_pmc(0, 1);
+			ia64_srlz_d();
+			break;
 
-	      case READ_PMDS:
-		if (count <= 0 || count > MAX_PERF_COUNTER)
-			return -EINVAL;
-		if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perfmon_reg_t)*count))
-			return -EFAULT;
+	        case PFM_READ_PMDS:
+			if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
+			if (!access_ok(VERIFY_WRITE, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
 
 		/* This looks shady, but IMHO this will work fine. This is  
 		 * the sequence that I could come up with to avoid races
@@ -187,16 +276,31 @@ sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6,
 		 * is the irq_save/restore needed?
 		 */
 
+		for (i = 0; i < count; i++, req++) {
+			unsigned long val=0;
 
-		/* XXX: This needs to change to read more than just the counters */
-		for (i = 0, cnum = PMU_FIRST_COUNTER;i < count; i++, cnum++, cptr++) {
+			copy_from_user(&tmp, req, sizeof(tmp));
 
-			tmp.pmu_reg_data = (pmds[smp_processor_id()][i]
-				    + (ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val));
+			if (!PMD_IS_IMPL(tmp.pfr_reg_num)) return -EINVAL;
 
-			tmp.pmu_reg_num = cnum;
+			if (PMD_IS_COUNTER(tmp.pfr_reg_num)) {
+				if (task == current){
+					val = ia64_get_pmd(tmp.pfr_reg_num) & pmu_conf.perf_ovfl_val;
+				} else {
+					val = task->thread.pmd[tmp.pfr_reg_num - PMU_FIRST_COUNTER] & pmu_conf.perf_ovfl_val;
+				}
+				val += task->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].val;
+			} else {
+				/* for now */
+				if (task != current) return -EINVAL;
 
-			if (copy_to_user(cptr, &tmp, sizeof(tmp))) return -EFAULT;
+				val = ia64_get_pmd(tmp.pfr_reg_num);
+			}
+			tmp.pfr_reg_value = val;
+
+DBprintk((__FUNCTION__" reading PMD[%ld]=0x%lx\n", tmp.pfr_reg_num, val));
+
+			if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
 		}
 #if 0
 /* irrelevant with user monitors */
@@ -209,11 +313,18 @@ sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6,
 #endif
                 break;
 
-	      case STOP_PMCS:
+	      case PFM_STOP:
+		/* we don't quite support this right now */
+		if (task != current) return -EINVAL;
+
 		ia64_set_pmc(0, 1);
 		ia64_srlz_d();
-		for (i = 0; i < MAX_PERF_COUNTER; ++i)
-			ia64_set_pmc(4+i, 0);
+
+		ia64_psr(regs)->up = 0;
+
+		current->thread.flags &= ~IA64_THREAD_PM_VALID;
+
+		pmu_owners[smp_processor_id()] = NULL;
 
 #if 0
 /* irrelevant with user monitors */
@@ -225,48 +336,140 @@ sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6,
 		ia64_psr(regs)->up = 0;
 #endif
 
-		current->thread.flags &= ~(IA64_THREAD_PM_VALID);
-
 		break;
 
+	      case PFM_DEBUG_ON:
+			printk(__FUNCTION__" debuggin on\n");
+			pfm_debug = 1;
+			break;
+
+	      case PFM_DEBUG_OFF:
+			printk(__FUNCTION__" debuggin off\n");
+			pfm_debug = 0;
+			break;
+
 	      default:
+		DBprintk((__FUNCTION__" UNknown command 0x%x\n", cmd));
 		return -EINVAL;
 		break;
         }
         return 0;
 }
 
-static inline void
-update_counters (void)
+asmlinkage int
+sys_perfmonctl (int pid, int cmd, int flags, perfmon_req_t *req, int count, long arg6, long arg7, long arg8, long stack)
 {
-	unsigned long mask, i, cnum, val;
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+	struct task_struct *child = current;
+	int ret;
+
+	if (pid != current->pid) {
+		read_lock(&tasklist_lock);
+		{
+			child = find_task_by_pid(pid);
+			if (child)
+				get_task_struct(child);
+		}
+		if (!child) { 
+			read_unlock(&tasklist_lock);
+			return -ESRCH;
+		}
+		/*
+		 * XXX: need to do more checking here
+		 */
+		if (child->state != TASK_ZOMBIE) {
+			DBprintk((__FUNCTION__" warning process %d not in stable state %ld\n", pid, child->state));
+		}
+	} 
+	ret = do_perfmonctl(child, cmd, flags, req, count, regs);
 
-	mask = ia64_get_pmc(0) >> 4;
-	for (i = 0, cnum = PMU_FIRST_COUNTER ; i < pmu_conf.max_counters; cnum++, i++, mask >>= 1) {
+	if (child != current) read_unlock(&tasklist_lock);
 
+	return ret;
+}
 
-		val = mask & 0x1 ? pmu_conf.perf_ovfl_val + 1 : 0;
 
-		if (mask & 0x1) 
-			printk(__FUNCTION__ " PMD%ld overflowed pmd=%lx pmod=%lx\n", cnum, ia64_get_pmd(cnum), pmds[smp_processor_id()][i]); 
+static inline int
+update_counters (u64 pmc0)
+{
+	unsigned long mask, i, cnum;
+	struct thread_struct *th;
+	struct task_struct *ta;
 
-		/* since we got an interrupt, might as well clear every pmd. */
-		val += ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val;
+	if (pmu_owners[smp_processor_id()] == NULL) {
+		DBprintk((__FUNCTION__" Spurious overflow interrupt: PMU not owned\n"));
+		return 0;
+	}
+	
+	/*
+	 * It is never safe to access the task for which the overflow interrupt is destinated
+	 * using the current variable as the interrupt may occur in the middle of a context switch
+	 * where current does not hold the task that is running yet.
+	 *
+	 * For monitoring, however, we do need to get access to the task which caused the overflow
+	 * to account for overflow on the counters.
+	 * We accomplish this by maintaining a current owner of the PMU per CPU. During context
+	 * switch the ownership is changed in a way such that the reflected owner is always the 
+	 * valid one, i.e. the one that caused the interrupt.
+	 */
+	ta = pmu_owners[smp_processor_id()];
+	th = &pmu_owners[smp_processor_id()]->thread;
 
-		printk(__FUNCTION__ " adding val=%lx to pmod[%ld]=%lx \n", val, i, pmds[smp_processor_id()][i]); 
+	/*
+	 * Don't think this could happen given first test. Keep as sanity check
+	 */
+	if ((th->flags & IA64_THREAD_PM_VALID) == 0) {
+		DBprintk((__FUNCTION__" Spurious overflow interrupt: process %d not using perfmon\n", ta->pid));
+		return 0;
+	}
+
+	/*
+	 * if PMU not frozen: spurious from previous context 
+	 * if PMC[0] = 0x1 : frozen but no overflow reported: leftover from previous context
+	 *
+	 * in either case we don't touch the state upon return from handler
+	 */
+	if ((pmc0 & 0x1) == 0 || pmc0 == 0x1) { 
+		DBprintk((__FUNCTION__" Spurious overflow interrupt: process %d freeze=0\n",ta->pid));
+		return 0;
+	}
 
-		pmds[smp_processor_id()][i] += val;
+	mask = pmc0 >> 4;
 
-		ia64_set_pmd(cnum, 0);
+	for (i = 0, cnum = PMU_FIRST_COUNTER; i < pmu_conf.max_counters; cnum++, i++, mask >>= 1) {
+
+		if (mask & 0x1) {
+			DBprintk((__FUNCTION__ " PMD[%ld] overflowed pmd=0x%lx pmod.val=0x%lx\n", cnum, ia64_get_pmd(cnum), th->pmu_counters[i].val)); 
+			
+			/*
+			 * Because we somtimes (EARS/BTB) reset to a specific value, we cannot simply use 
+			 * val to count the number of times we overflowed. Otherwise we would loose the value
+			 * current in the PMD (which can be >0). So to make sure we don't loose
+			 * the residual counts we set val to contain full 64bits value of the counter.
+			 */
+			th->pmu_counters[i].val += 1+pmu_conf.perf_ovfl_val+(ia64_get_pmd(cnum) &pmu_conf.perf_ovfl_val);
+
+			/* writes to upper part are ignored, so this is safe */
+			ia64_set_pmd(cnum, th->pmu_counters[i].rval);
+
+			DBprintk((__FUNCTION__ " pmod[%ld].val=0x%lx pmd=0x%lx\n", i, th->pmu_counters[i].val, ia64_get_pmd(cnum)&pmu_conf.perf_ovfl_val)); 
+
+			if (th->pmu_counters[i].pid != 0 && th->pmu_counters[i].sig>0) {
+				DBprintk((__FUNCTION__ " shouild notify process %d with signal %d\n",th->pmu_counters[i].pid, th->pmu_counters[i].sig)); 
+			}
+		}
 	}
+	return 1;
 }
 
 static void
 perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
 {
-	update_counters();
-	ia64_set_pmc(0, 0);
-	ia64_srlz_d();
+	/* unfreeze if not spurious */
+	if ( update_counters(ia64_get_pmc(0)) ) {
+		ia64_set_pmc(0, 0);
+		ia64_srlz_d();
+	}
 }
 
 static struct irqaction perfmon_irqaction = {
@@ -280,9 +483,13 @@ perfmon_proc_info(char *page)
 {
 	char *p = page;
 	u64 pmc0 = ia64_get_pmc(0);
+	int i;
 
-	p += sprintf(p, "PMC[0]=%lx\n", pmc0);
-
+	p += sprintf(p, "PMC[0]=%lx\nPerfmon debug: %s\n", pmc0, pfm_debug ? "On" : "Off");
+	for(i=0; i < NR_CPUS; i++) {
+		if (cpu_is_online(i)) 
+			p += sprintf(p, "CPU%d.PMU %d\n", i, pmu_owners[i] ? pmu_owners[i]->pid: -1);
+	}
 	return p - page;
 }
 
@@ -308,7 +515,6 @@ void __init
 perfmon_init (void)
 {
 	pal_perf_mon_info_u_t pm_info;
-	u64 pm_buffer[16];
 	s64 status;
 	
 	irq_desc[PERFMON_IRQ].status |= IRQ_PER_CPU;
@@ -320,15 +526,13 @@ perfmon_init (void)
 
 	printk("perfmon: Initialized vector to %u\n",PERFMON_IRQ);
 
-	if ((status=ia64_pal_perf_mon_info(pm_buffer, &pm_info)) != 0) {
+	if ((status=ia64_pal_perf_mon_info(pmu_conf.impl_regs, &pm_info)) != 0) {
 		printk(__FUNCTION__ " pal call failed (%ld)\n", status);
 		return;
 	} 
-	pmu_conf.perf_ovfl_val = perf_ovfl_val = (1L << pm_info.pal_perf_mon_info_s.width) - 1; 
+	pmu_conf.perf_ovfl_val = (1L << pm_info.pal_perf_mon_info_s.width) - 1; 
 
 	/* XXX need to use PAL instead */
-	pmu_conf.max_pmc       = 13;
-	pmu_conf.max_pmd       = 17;
 	pmu_conf.max_counters  = pm_info.pal_perf_mon_info_s.generic;
 
 	printk("perfmon: Counters are %d bits\n", pm_info.pal_perf_mon_info_s.width);
@@ -347,36 +551,137 @@ perfmon_init_percpu (void)
 	ia64_srlz_d();
 }
 
+/*
+ * XXX: for system wide this function MUST never be called
+ */
 void
-ia64_save_pm_regs (struct thread_struct *t)
+ia64_save_pm_regs (struct task_struct *ta)
 {
-	int i;
+	struct thread_struct *t = &ta->thread;
+	u64 pmc0, psr;
+	int i,j;
+
+	/*
+	 * We must maek sure that we don't loose any potential overflow
+	 * interrupt while saving PMU context. In this code, external
+	 * interrupts are always enabled.
+	 */
+
+	/*
+	 * save current PSR: needed because we modify it
+	 */
+	__asm__ __volatile__ ("mov %0=psr;;": "=r"(psr) :: "memory");
+
+	/*
+	 * stop monitoring:
+	 * This is the only way to stop monitoring without destroying overflow
+	 * information in PMC[0..3].
+	 * This is the last instruction which can cause overflow when monitoring
+	 * in kernel.
+	 * By now, we could still have an overflow interrupt in flight.
+	 */
+	__asm__ __volatile__ ("rsm psr.up;;"::: "memory");
+	
+	/*
+	 * read current overflow status:
+	 *
+	 * We may be reading stale information at this point, if we got interrupt
+	 * just before the read(pmc0) but that's all right. However, if we did
+	 * not get the interrupt before, this read reflects LAST state.
+	 *
+	 */
+	pmc0 = ia64_get_pmc(0);
 
+	/*
+	 * freeze PMU:
+	 *
+	 * This destroys the overflow information. This is required to make sure
+	 * next process does not start with monitoring on if not requested
+	 * (PSR.up may not be enough).
+	 *
+	 * We could still get an overflow interrupt by now. However the handler
+	 * will not do anything if is sees PMC[0].fr=1 but no overflow bits
+	 * are set. So PMU will stay in frozen state. This implies that pmc0
+	 * will still be holding the correct unprocessed information.
+	 *
+	 */
 	ia64_set_pmc(0, 1);
 	ia64_srlz_d();
+
+	/*
+	 * check for overflow bits set:
+	 *
+	 * If pmc0 reports PMU frozen, this means we have a pending overflow,
+	 * therefore we invoke the handler. Handler is reentrant with regards
+	 * to PMC[0] so it is safe to call it twice.
+	 *
+	 * IF pmc0 reports overflow, we need to reread current PMC[0] value
+	 * in case the handler was invoked right after the first pmc0 read.
+	 * it is was not invoked then pmc0==PMC[0], otherwise it's been invoked
+	 * and overflow information has been processed, so we don't need to call.
+	 *
+	 * Test breakdown:
+	 *	- pmc0 & ~0x1: test if overflow happened
+	 * 	- second part: check if current register reflects this as well.
+	 *
+	 * NOTE: testing for pmc0 & 0x1 is not enough has it would trigger call
+	 * when PM_VALID and PMU.fr which is common when setting up registers
+	 * just before actually starting monitors.
+	 *
+	 */
+	if ((pmc0 & ~0x1) && ((pmc0=ia64_get_pmc(0)) &~0x1) ) {
+		printk(__FUNCTION__" Warning: pmc[0]=0x%lx\n", pmc0);
+		update_counters(pmc0);
+		/* 
+		 * XXX: not sure that's enough. the next task may still get the
+		 * interrupt.
+		 */
+	}
+
+	/*
+	 * restore PSR for context switch to save
+	 */
+	__asm__ __volatile__ ("mov psr.l=%0;;"::"r"(psr): "memory");
+
 	/*
 	 * XXX: this will need to be extended beyong just counters
 	 */
-	for (i=0; i< IA64_NUM_PM_REGS; i++) {
-		t->pmd[i]  = ia64_get_pmd(4+i);
-		t->pmod[i] = pmds[smp_processor_id()][i];
-		t->pmc[i]  = ia64_get_pmc(4+i);
+	for (i=0,j=4; i< IA64_NUM_PMD_COUNTERS; i++,j++) {
+		t->pmd[i] = ia64_get_pmd(j);
+		t->pmc[i] = ia64_get_pmc(j);
 	}
+	/*
+	 * PMU is frozen, PMU context is saved: nobody owns the PMU on this CPU
+	 * At this point, we should not receive any pending interrupt from the 
+	 * 'switched out' task
+	 */
+	pmu_owners[smp_processor_id()] = NULL;
 }
 
 void
-ia64_load_pm_regs (struct thread_struct *t)
+ia64_load_pm_regs (struct task_struct *ta)
 {
-	int i;
+	struct thread_struct *t = &ta->thread;
+	int i,j;
+
+	/*
+	 * we first restore ownership of the PMU to the 'soon to be current'
+	 * context. This way, if, as soon as we unfreeze the PMU at the end
+	 * of this function, we get an interrupt, we attribute it to the correct
+	 * task
+	 */
+	pmu_owners[smp_processor_id()] = ta;
 
 	/*
 	 * XXX: this will need to be extended beyong just counters 
 	 */
-	for (i=0; i< IA64_NUM_PM_REGS ; i++) {
-		ia64_set_pmd(4+i, t->pmd[i]);
-		pmds[smp_processor_id()][i] = t->pmod[i];
-		ia64_set_pmc(4+i, t->pmc[i]);
+	for (i=0,j=4; i< IA64_NUM_PMD_COUNTERS; i++,j++) {
+		ia64_set_pmd(j, t->pmd[i]);
+		ia64_set_pmc(j, t->pmc[i]);
 	}
+	/*
+	 * unfreeze PMU
+	 */
 	ia64_set_pmc(0, 0);
 	ia64_srlz_d();
 }
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 41db60a0c..e61843db5 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -137,23 +137,6 @@ cpu_idle (void *unused)
 		check_pgt_cache();
 		if (pm_idle)
 			(*pm_idle)();
-#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
-		local_irq_disable();
-		{
-			u64 itc, itm;
-
-			itc = ia64_get_itc();
-			itm = ia64_get_itm();
-			if (time_after(itc, itm + 1000)) {
-				extern void ia64_reset_itm (void);
-
-				printk("cpu_idle: ITM in past (itc=%lx,itm=%lx:%lums)\n",
-				       itc, itm, (itc - itm)/500000);
-				ia64_reset_itm();
-			}
-		}
-		local_irq_enable();
-#endif
 	}
 }
 
@@ -164,7 +147,7 @@ ia64_save_extra (struct task_struct *task)
 		ia64_save_debug_regs(&task->thread.dbr[0]);
 #ifdef CONFIG_PERFMON
 	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
-		ia64_save_pm_regs(&task->thread);
+		ia64_save_pm_regs(task);
 #endif
 	if (IS_IA32_PROCESS(ia64_task_regs(task)))
 		ia32_save_state(&task->thread);
@@ -177,7 +160,7 @@ ia64_load_extra (struct task_struct *task)
 		ia64_load_debug_regs(&task->thread.dbr[0]);
 #ifdef CONFIG_PERFMON
 	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
-		ia64_load_pm_regs(&task->thread);
+		ia64_load_pm_regs(task);
 #endif
 	if (IS_IA32_PROCESS(ia64_task_regs(task)))
 		ia32_load_state(&task->thread);
@@ -299,6 +282,14 @@ copy_thread (int nr, unsigned long clone_flags,
 #	define THREAD_FLAGS_TO_SET	0
 	p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
 			   | THREAD_FLAGS_TO_SET);
+#ifdef CONFIG_IA32_SUPPORT
+	/*
+	 * If we're cloning an IA32 task then save the IA32 extra
+	 * state from the current task to the new task
+	 */
+	if (IS_IA32_PROCESS(ia64_task_regs(current)))
+		ia32_save_state(&p->thread);
+#endif
 	return 0;
 }
 
@@ -554,7 +545,7 @@ exit_thread (void)
 		 * we garantee no race.  this call we also stop
 		 * monitoring
 		 */
-		ia64_save_pm_regs(&current->thread);
+		ia64_save_pm_regs(current);
 		/*
 		 * make sure that switch_to() will not save context again
 		 */
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 820a87854..0b49bdcaa 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -617,7 +617,6 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
 	struct switch_stack *sw;
 	struct unw_frame_info info;
 	struct pt_regs *pt;
-	unsigned long pmd_tmp;
 
 	pt = ia64_task_regs(child);
 	sw = (struct switch_stack *) (child->thread.ksp + 16);
@@ -794,11 +793,7 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
 				addr);
 			return -1;
 		}
-	} else 
-#ifdef CONFIG_PERFMON
-		if (addr < PT_PMD) 
-#endif
-		{
+	} else {
 		/* access debug registers */
 
 		if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
@@ -820,33 +815,14 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
 		}
 
 		ptr += regnum;
-	}
-#ifdef CONFIG_PERFMON
-	else {
-		/*
-		 * XXX: will eventually move back to perfmonctl()
-		 */
-		unsigned long pmd = (addr - PT_PMD) >> 3;
-		extern unsigned long perf_ovfl_val;
-
-		/* we just use ptrace to read */
-		if (write_access) return -1;
-
-		if (pmd > 3) {
-			printk("ptrace: rejecting access to PMD[%ld] address 0x%lx\n", pmd, addr);
-			return -1;
-		}
 
-		/* 
-		 * We always need to mask upper 32bits of pmd because value is random
-		 */
-		pmd_tmp = child->thread.pmod[pmd]+(child->thread.pmd[pmd]& perf_ovfl_val);
-
-		/*printk(__FUNCTION__" child=%d reading pmd[%ld]=%lx\n", child->pid, pmd, pmd_tmp);*/
-
-		ptr = &pmd_tmp;
+		if (write_access)
+			/* don't let the user set kernel-level breakpoints... */
+			*ptr = *data & ~(7UL << 56);
+		else
+			*data = *ptr;
+		return 0;
 	}
-#endif
 	if (write_access)
 		*ptr = *data;
 	else
@@ -861,7 +837,6 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
 {
 	unsigned long *ptr = NULL, *rbs, *bspstore, ndirty, regnum;
 	struct switch_stack *sw;
-	unsigned long pmd_tmp;
 	struct pt_regs *pt;
 
 	if ((addr & 0x7) != 0)
@@ -977,11 +952,7 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
 			/* disallow accessing anything else... */
 			return -1;
 		}
-	} else 
-#ifdef CONFIG_PERFMON
-		if (addr < PT_PMD) 
-#endif
-		{
+	} else {
 
 		/* access debug registers */
 
@@ -1002,34 +973,14 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
 			return -1;
 
 		ptr += regnum;
-	}
-#ifdef CONFIG_PERFMON
-	else {
-		/*
-		 * XXX: will eventually move back to perfmonctl()
-		 */
-		unsigned long pmd = (addr - PT_PMD) >> 3;
-		extern unsigned long perf_ovfl_val;
-
-		/* we just use ptrace to read */
-		if (write_access) return -1;
-
-		if (pmd > 3) {
-			printk("ptrace: rejecting access to PMD[%ld] address 0x%lx\n", pmd, addr);
-			return -1;
-		}
 
-		/* 
-		 * We always need to mask upper 32bits of pmd because value is random
-		 */
-		pmd_tmp = child->thread.pmod[pmd]+(child->thread.pmd[pmd]& perf_ovfl_val);
-
-		/*printk(__FUNCTION__" child=%d reading pmd[%ld]=%lx\n", child->pid, pmd, pmd_tmp);*/
-
-		ptr = &pmd_tmp;
+		if (write_access)
+			/* don't let the user set kernel-level breakpoints... */
+			*ptr = *data & ~(7UL << 56);
+		else
+			*data = *ptr;
+		return 0;
 	}
-#endif
-
 	if (write_access)
 		*ptr = *data;
 	else
@@ -1107,7 +1058,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data,
 		goto out_tsk;
 
 	if (child->state != TASK_STOPPED) {
-		if (request != PTRACE_KILL && request != PTRACE_PEEKUSR)
+		if (request != PTRACE_KILL)
 			goto out_tsk;
 	}
 
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index 87c7befea..56059a306 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -104,9 +104,11 @@ ia64_sal_init (struct ia64_sal_systab *systab)
 	if (strncmp(systab->signature, "SST_", 4) != 0)
 		printk("bad signature in system table!");
 
-	printk("SAL v%u.%02u: ia32bios=%s, oem=%.32s, product=%.32s\n",
+	/* 
+	 * revisions are coded in BCD, so %x does the job for us
+	 */
+	printk("SAL v%x.%02x: oem=%.32s, product=%.32s\n",
 	       systab->sal_rev_major, systab->sal_rev_minor,
-	       systab->ia32_bios_present ? "present" : "absent",
 	       systab->oem_id, systab->product_id);
 
 	min = ~0UL;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index ed091d864..83d5643cd 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -235,6 +235,12 @@ setup_arch (char **cmdline_p)
 	machvec_init(acpi_get_sysname());
 #endif
 
+#ifdef	CONFIG_ACPI20
+	if (efi.acpi20) {
+		/* Parse the ACPI 2.0 tables */
+		acpi20_parse(efi.acpi20);
+	} else 
+#endif
 	if (efi.acpi) {
 		/* Parse the ACPI tables */
 		acpi_parse(efi.acpi);
@@ -255,13 +261,6 @@ setup_arch (char **cmdline_p)
 
 	paging_init();
 	platform_setup(cmdline_p);
-
-#ifdef CONFIG_SWIOTLB
-	{
-		extern void setup_swiotlb (void);
-		setup_swiotlb();
-	}
-#endif
 }
 
 /*
@@ -271,9 +270,9 @@ int
 get_cpuinfo (char *buffer)
 {
 #ifdef CONFIG_SMP
-#	define lps	c->loops_per_sec
+#	define lpj	c->loops_per_jiffy
 #else
-#	define lps	loops_per_sec
+#	define lpj	loops_per_jiffy
 #endif
 	char family[32], model[32], features[128], *cp, *p = buffer;
 	struct cpuinfo_ia64 *c;
@@ -325,7 +324,7 @@ get_cpuinfo (char *buffer)
 			     features,
 			     c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000,
 			     c->itc_freq / 1000000, c->itc_freq % 1000000,
-			     lps / 500000, (lps / 5000) % 100);
+			     lpj*HZ/500000, (lpj*HZ/5000) % 100);
         }
 	return p - buffer;
 }
@@ -376,15 +375,7 @@ identify_cpu (struct cpuinfo_ia64 *c)
 
 	status = ia64_pal_vm_summary(&vm1, &vm2);
 	if (status == PAL_STATUS_SUCCESS) {
-#if 1
-		/*
-		 * XXX the current PAL code returns IMPL_VA_MSB==60, which is dead-wrong.
-		 * --davidm 00/05/26
-		 s*/
-		impl_va_msb = 50;
-#else
 		impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
-#endif
 		phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
 	}
 	printk("CPU %d: %lu virtual and %lu physical address bits\n",
@@ -408,6 +399,8 @@ cpu_init (void)
 {
 	extern void __init ia64_rid_init (void);
 	extern void __init ia64_tlb_init (void);
+	pal_vm_info_2_u_t vmi;
+	unsigned int max_ctx;
 
 	identify_cpu(&my_cpu_data);
 
@@ -415,15 +408,12 @@ cpu_init (void)
 	memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
 
 	/*
-	 * Initialize default control register to defer speculative
-	 * faults.  On a speculative load, we want to defer access
-	 * right, key miss, and key permission faults.  We currently
-	 * do NOT defer TLB misses, page-not-present, access bit, or
-	 * debug faults but kernel code should not rely on any
-	 * particular setting of these bits.
-	ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP);
+	 * Initialize default control register to defer all speculative faults.  The
+	 * kernel MUST NOT depend on a particular setting of these bits (in other words,
+	 * the kernel must have recovery code for all speculative accesses).
 	 */
-	ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX );
+	ia64_set_dcr(  IA64_DCR_DM | IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
+		     | IA64_DCR_DA | IA64_DCR_DD);
 #ifndef CONFIG_SMP
 	ia64_set_fpu_owner(0);		/* initialize ar.k5 */
 #endif
@@ -444,4 +434,17 @@ cpu_init (void)
 #ifdef CONFIG_SMP
 	normal_xtp();
 #endif
+
+	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
+	else {
+		printk("ia64_rid_init: PAL VM summary failed, assuming 18 RID bits\n");
+		max_ctx = (1U << 15) - 1;	/* use architected minimum */
+	}
+	while (max_ctx < ia64_ctx.max_ctx) {
+		unsigned int old = ia64_ctx.max_ctx;
+		if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
+			break;
+	}
 }
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index e0adf1981..3ffa201aa 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -91,7 +91,7 @@ ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct sigscratch *scr)
 		scr->pt.r10 = -1;
 	}
 	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
+		current->state = TASK_INTERRUPTIBLE;
 		schedule();
 		if (ia64_do_signal(&oldset, scr, 1))
 			return -EINTR;
@@ -499,9 +499,10 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 			/* Let the debugger run.  */
 			current->exit_code = signr;
 			current->thread.siginfo = &info;
-			set_current_state(TASK_STOPPED);
+			current->state = TASK_STOPPED;
 			notify_parent(current, SIGCHLD);
 			schedule();
+
 			signr = current->exit_code;
 			current->thread.siginfo = 0;
 
@@ -557,7 +558,7 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
 				/* FALLTHRU */
 
 			      case SIGSTOP:
-				set_current_state(TASK_STOPPED);
+				current->state = TASK_STOPPED;
 				current->exit_code = signr;
 				if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags
 				      & SA_NOCLDSTOP))
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 694711507..5093341a5 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -6,11 +6,13 @@
  * 
  * Lots of stuff stolen from arch/alpha/kernel/smp.c
  *
- *  00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_sec calibration on each CPU.
+ *  00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy calibration on each CPU.
  *  00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
  *  00/03/31 Rohit Seth <rohit.seth@intel.com>	Fixes for Bootstrap Processor & cpu_online_map
  *			now gets done here (instead of setup.c)
  *  99/10/05 davidm	Update to bring it in sync with new command-line processing scheme.
+ *  10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
+ *		smp_call_function_single to resend IPI on timeouts
  */
 #define __KERNEL_SYSCALLS__
 
@@ -30,6 +32,7 @@
 #include <asm/current.h>
 #include <asm/delay.h>
 #include <asm/efi.h>
+#include <asm/machvec.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -78,10 +81,6 @@ struct smp_call_struct {
 };
 static volatile struct smp_call_struct *smp_call_function_data;
 
-#ifdef	CONFIG_ITANIUM_A1_SPECIFIC
-extern spinlock_t ivr_read_lock;
-#endif
-
 #define IPI_RESCHEDULE	        0
 #define IPI_CALL_FUNC	        1
 #define IPI_CPU_STOP	        2
@@ -269,14 +268,14 @@ handle_IPI(int irq, void *dev_id, struct pt_regs *regs)
 }
 
 static inline void
-send_IPI_single(int dest_cpu, int op) 
+send_IPI_single (int dest_cpu, int op) 
 {
 	
 	if (dest_cpu == -1) 
                 return;
         
 	set_bit(op, &ipi_op[dest_cpu]);
-	ipi_send(dest_cpu, IPI_IRQ, IA64_IPI_DM_INT, 0);
+	platform_send_ipi(dest_cpu, IPI_IRQ, IA64_IPI_DM_INT, 0);
 }
 
 static inline void
@@ -358,6 +357,7 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
 	if (pointer_lock(&smp_call_function_data, &data, retry))
 		return -EBUSY;
 
+resend:
 	/*  Send a message to all other CPUs and wait for them to respond  */
 	send_IPI_single(cpuid, IPI_CALL_FUNC);
 
@@ -366,8 +366,12 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
 	while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout))
 		barrier();
 	if (atomic_read(&data.unstarted_count) > 0) {
+#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
+		goto resend;
+#else
 		smp_call_function_data = NULL;
 		return -ETIMEDOUT;
+#endif
 	}
 	if (wait)
 		while (atomic_read(&data.unfinished_count) > 0)
@@ -411,13 +415,23 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
 	/*  Send a message to all other CPUs and wait for them to respond  */
 	send_IPI_allbutself(IPI_CALL_FUNC);
 
+retry:
 	/*  Wait for response  */
 	timeout = jiffies + HZ;
 	while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout))
 		barrier();
 	if (atomic_read(&data.unstarted_count) > 0) {
+#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
+		int i;
+		for (i = 0; i < smp_num_cpus; i++) {
+			if (i != smp_processor_id())
+				platform_send_ipi(i, IPI_IRQ, IA64_IPI_DM_INT, 0);
+		}
+		goto retry;
+#else
 		smp_call_function_data = NULL;
 		return -ETIMEDOUT;
+#endif
 	}
 	if (wait)
 		while (atomic_read(&data.unfinished_count) > 0)
@@ -430,8 +444,6 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
 /*
  * Flush all other CPU's tlb and then mine.  Do this with smp_call_function() as we
  * want to ensure all TLB's flushed before proceeding.
- *
- * XXX: Is it OK to use the same ptc.e info on all cpus?
  */
 void
 smp_flush_tlb_all(void)
@@ -502,7 +514,7 @@ smp_callin (void)
 	local_irq_enable();		/* Interrupts have been off until now */
 
 	calibrate_delay();
-	my_cpu_data.loops_per_sec = loops_per_sec;
+	my_cpu_data.loops_per_jiffy = loops_per_jiffy;
 
 	/* allow the master to continue */
 	set_bit(cpu, &cpu_callin_map);
@@ -569,7 +581,7 @@ smp_boot_one_cpu(int cpu)
 	cpu_now_booting = cpu;
 
 	/* Kick the AP in the butt */
-	ipi_send(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
+	platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
 
 	/* wait up to 10s for the AP to start  */
 	for (timeout = 0; timeout < 100000; timeout++) {
@@ -603,7 +615,7 @@ smp_boot_cpus(void)
 	__cpu_physical_id[0] = hard_smp_processor_id();
 
 	/* on the BP, the kernel already called calibrate_delay_loop() in init/main.c */
-	my_cpu_data.loops_per_sec = loops_per_sec;
+	my_cpu_data.loops_per_jiffy = loops_per_jiffy;
 #if 0
 	smp_tune_scheduling();
 #endif
@@ -653,13 +665,11 @@ smp_boot_cpus(void)
 	bogosum = 0;
         for (i = 0; i < NR_CPUS; i++) {
 		if (cpu_online_map & (1L << i))
-			bogosum += cpu_data[i].loops_per_sec;
+			bogosum += cpu_data[i].loops_per_jiffy;
         }
 
-	printk(KERN_INFO "SMP: Total of %d processors activated "
-	       "(%lu.%02lu BogoMIPS).\n",
-	       cpu_count, (bogosum + 2500) / 500000,
-	       ((bogosum + 2500) / 5000) % 100);
+	printk(KERN_INFO "SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+	       cpu_count, bogosum*HZ/500000, (bogosum*HZ/5000) % 100);
 
 	smp_num_cpus = cpu_count;
 }
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index f78512229..2713d7fd9 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -16,8 +16,38 @@
 #include <linux/smp_lock.h>
 #include <linux/highuid.h>
 
+#include <asm/shmparam.h>
 #include <asm/uaccess.h>
 
+#define COLOR_ALIGN(addr)	(((addr) + SHMLBA - 1) & ~(SHMLBA - 1))
+
+unsigned long
+get_unmapped_area (unsigned long addr, unsigned long len)
+{
+	struct vm_area_struct * vmm;
+
+	if (len > RGN_MAP_LIMIT)
+		return 0;
+	if (!addr)
+		addr = TASK_UNMAPPED_BASE;
+
+	if (current->thread.flags & IA64_THREAD_MAP_SHARED)
+		addr = COLOR_ALIGN(addr);
+	else
+		addr = PAGE_ALIGN(addr);
+
+	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
+		/* At this point:  (!vmm || addr < vmm->vm_end). */
+		if (TASK_SIZE - len < addr)
+			return 0;
+		if (rgn_offset(addr) + len > RGN_MAP_LIMIT)	/* no risk of overflow here... */
+			return 0;
+		if (!vmm || addr + len <= vmm->vm_start)
+			return addr;
+		addr = vmm->vm_end;
+	}
+}
+
 asmlinkage long
 ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6, 
 		  long arg7, long stack)
@@ -34,6 +64,7 @@ ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5
 	return prio;
 }
 
+/* XXX obsolete, but leave it here until the old libc is gone... */
 asmlinkage unsigned long
 sys_getpagesize (void)
 {
@@ -58,16 +89,61 @@ ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg
 }
 
 asmlinkage unsigned long
-ia64_brk (long brk, long arg1, long arg2, long arg3,
+ia64_brk (unsigned long brk, long arg1, long arg2, long arg3,
 	  long arg4, long arg5, long arg6, long arg7, long stack)
 {
-	extern unsigned long sys_brk (unsigned long brk);
+	extern int vm_enough_memory (long pages);
 	struct pt_regs *regs = (struct pt_regs *) &stack;
-	unsigned long retval;
+	unsigned long rlim, retval, newbrk, oldbrk;
+	struct mm_struct *mm = current->mm;
+
+	/*
+	 * Most of this replicates the code in sys_brk() except for an additional safety
+	 * check and the clearing of r8.  However, we can't call sys_brk() because we need
+	 * to acquire the mmap_sem before we can do the test...
+	 */
+	down(&mm->mmap_sem);
 
-	retval = sys_brk(brk);
+	if (brk < mm->end_code)
+		goto out;
+	newbrk = PAGE_ALIGN(brk);
+	oldbrk = PAGE_ALIGN(mm->brk);
+	if (oldbrk == newbrk)
+		goto set_brk;
+
+	/* Always allow shrinking brk. */
+	if (brk <= mm->brk) {
+		if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+			goto set_brk;
+		goto out;
+	}
 
-	regs->r8 = 0;	/* ensure large retval isn't mistaken as error code */
+	/* Check against unimplemented/unmapped addresses: */
+	if ((newbrk - oldbrk) > RGN_MAP_LIMIT || rgn_offset(newbrk) > RGN_MAP_LIMIT)
+		goto out;
+
+	/* Check against rlimit.. */
+	rlim = current->rlim[RLIMIT_DATA].rlim_cur;
+	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+		goto out;
+
+	/* Check against existing mmap mappings. */
+	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+		goto out;
+
+	/* Check if we have enough memory.. */
+	if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
+		goto out;
+
+	/* Ok, looks good - let it rip. */
+	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+		goto out;
+set_brk:
+	mm->brk = brk;
+out:
+	retval = mm->brk;
+	up(&mm->mmap_sem);
+	regs->r8 = 0;		/* ensure large retval isn't mistaken as error code */
 	return retval;
 }
 
@@ -95,10 +171,8 @@ sys_pipe (long arg0, long arg1, long arg2, long arg3,
 static inline unsigned long
 do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
 {
-	unsigned long loff, hoff;
+	unsigned long roff;
 	struct file *file = 0;
-	/* the virtual address space that is mappable in each region: */
-#	define OCTANT_SIZE	((PTRS_PER_PGD<<PGDIR_SHIFT)/8)
 
 	/*
 	 * A zero mmap always succeeds in Linux, independent of
@@ -107,15 +181,12 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un
 	if (PAGE_ALIGN(len) == 0)
 		return addr;
 
-	/* Don't permit mappings into or across the address hole in a region: */
-	loff = rgn_offset(addr);
-	hoff = loff - (RGN_SIZE - OCTANT_SIZE/2);
-	if ((len | loff | (loff + len)) >= OCTANT_SIZE/2
-	    && (len | hoff | (hoff + len)) >= OCTANT_SIZE/2)
+	/* don't permit mappings into unmapped space or the virtual page table of a region: */
+	roff = rgn_offset(addr);
+	if ((len | roff | (roff + len)) >= RGN_MAP_LIMIT)
 		return -EINVAL;
 
-	/* Don't permit mappings that would cross a region boundary: */
-
+	/* don't permit mappings that would cross a region boundary: */
 	if (rgn_index(addr) != rgn_index(addr + len))
 		return -EINVAL;
 
@@ -126,10 +197,15 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un
 			return -EBADF;
 	}
 
+	if (flags & MAP_SHARED)
+		current->thread.flags |= IA64_THREAD_MAP_SHARED;
+
 	down(&current->mm->mmap_sem);
 	addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 	up(&current->mm->mmap_sem);
 
+	current->thread.flags &= ~IA64_THREAD_MAP_SHARED;
+
 	if (file)
 		fput(file);
 	return addr;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 5e54e4f4b..8f65adc2c 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -152,19 +152,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	int cpu = smp_processor_id();
 	unsigned long new_itm;
-#if 0
-	static unsigned long last_time;
-	static unsigned char count;
-	int printed = 0;
-#endif
 
-	/*
-	 * Here we are in the timer irq handler. We have irqs locally
-	 * disabled, but we don't know if the timer_bh is running on
-	 * another CPU. We need to avoid to SMP race by acquiring the
-	 * xtime_lock.
-	 */
-	write_lock(&xtime_lock);
 	new_itm = itm.next[cpu].count;
 
 	if (!time_after(ia64_get_itc(), new_itm))
@@ -173,48 +161,33 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 
 	while (1) {
 		/*
-		 * Do kernel PC profiling here.  We multiply the
-		 * instruction number by four so that we can use a
-		 * prof_shift of 2 to get instruction-level instead of
-		 * just bundle-level accuracy.
+		 * Do kernel PC profiling here.  We multiply the instruction number by
+		 * four so that we can use a prof_shift of 2 to get instruction-level
+		 * instead of just bundle-level accuracy.
 		 */
 		if (!user_mode(regs)) 
 			do_profile(regs->cr_iip + 4*ia64_psr(regs)->ri);
 
 #ifdef CONFIG_SMP
 		smp_do_timer(regs);
-		if (smp_processor_id() == 0)
-			do_timer(regs);
-#else
-		do_timer(regs);
 #endif
+		if (smp_processor_id() == 0) {
+			/*
+			 * Here we are in the timer irq handler. We have irqs locally
+			 * disabled, but we don't know if the timer_bh is running on
+			 * another CPU. We need to avoid to SMP race by acquiring the
+			 * xtime_lock.
+			 */
+			write_lock(&xtime_lock);
+			do_timer(regs);
+			write_unlock(&xtime_lock);
+		}
 
 		new_itm += itm.delta;
 		itm.next[cpu].count = new_itm;
 		if (time_after(new_itm, ia64_get_itc()))
 			break;
-
-#if 0
-		/*
-		 * SoftSDV in SMP mode is _slow_, so we do "lose" ticks, 
-		 * but it's really OK...
-		 */
-		if (count > 0 && jiffies - last_time > 5*HZ)
-			count = 0;
-		if (count++ == 0) {
-			last_time = jiffies;
-			if (!printed) {
-				printk("Lost clock tick on CPU %d (now=%lx, next=%lx)!!\n",
-				       cpu, ia64_get_itc(), itm.next[cpu].count);
-				printed = 1;
-# ifdef CONFIG_IA64_DEBUG_IRQ
-				printk("last_cli_ip=%lx\n", last_cli_ip);
-# endif
-			}
-		}
-#endif
 	}
-	write_unlock(&xtime_lock);
 
 	/*
 	 * If we're too close to the next clock tick for comfort, we
@@ -229,7 +202,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	ia64_set_itm(new_itm);
 }
 
-#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_IA64_SOFTSDV_HACKS)
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
 
 /*
  * Interrupts must be disabled before calling this routine.
@@ -240,7 +213,7 @@ ia64_reset_itm (void)
 	timer_interrupt(0, 0, ia64_task_regs(current));
 }
 
-#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+#endif
 
 /*
  * Encapsulate access to the itm structure for SMP.
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 43340bf85..fd8369291 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -78,7 +78,7 @@ void
 die_if_kernel (char *str, struct pt_regs *regs, long err)
 {
 	if (user_mode(regs)) {
-#if 1
+#if 0
 		/* XXX for debugging only */
 		printk ("!!die_if_kernel: %s(%d): %s %ld\n",
 			current->comm, current->pid, str, err);
@@ -484,6 +484,20 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		sprintf(buf, "Disabled FPL fault---not supposed to happen!");
 		break;
 
+	      case 26: /* NaT Consumption */
+	      case 31: /* Unsupported Data Reference */
+		if (user_mode(regs)) {
+			siginfo.si_signo = SIGILL;
+			siginfo.si_code = ILL_ILLOPN;
+			siginfo.si_errno = 0;
+			siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
+			siginfo.si_imm = vector;
+			force_sig_info(SIGILL, &siginfo, current);
+			return;
+		}
+		sprintf(buf, (vector == 26) ? "NaT consumption" : "Unsupported data reference");
+		break;
+
 	      case 29: /* Debug */
 	      case 35: /* Taken Branch Trap */
 	      case 36: /* Single Step Trap */
@@ -522,10 +536,10 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 
 	      case 34:		/* Unimplemented Instruction Address Trap */
 		if (user_mode(regs)) {
-			printk("Woah! Unimplemented Instruction Address Trap!\n");
-			siginfo.si_code = ILL_BADIADDR;
 			siginfo.si_signo = SIGILL;
+			siginfo.si_code = ILL_BADIADDR;
 			siginfo.si_errno = 0;
+			siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
 			force_sig_info(SIGILL, &siginfo, current);
 			return;
 		}
@@ -544,7 +558,8 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 
 	      case 46:
 		printk("Unexpected IA-32 intercept trap (Trap 46)\n");
-		printk("  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", regs->cr_iip, ifa, isr);
+		printk("  iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
+		       regs->cr_iip, ifa, isr, iim);
 		force_sig(SIGSEGV, current);
 		return;
 
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 7cc238a83..a24121a26 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -572,7 +572,8 @@ getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
 	 */
 	if (regnum == 0) {
 		*val = 0;
-		*nat = 0;
+		if (nat)
+			*nat = 0;
 		return;
 	}
 
@@ -1563,9 +1564,13 @@ ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs)
 
 	DPRINT(("ret=%d\n", ret));
 	if (ret) {
-		lock_kernel();
-	        force_sig(SIGSEGV, current);
-	        unlock_kernel();
+		struct siginfo si;
+
+		si.si_signo = SIGBUS;
+		si.si_errno = 0;
+		si.si_code = BUS_ADRALN;
+		si.si_addr = (void *) ifa;
+	        force_sig_info(SIGBUS, &si, current);
 	} else {
 		/*
 	 	 * given today's architecture this case is not likely to happen
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 21a2ead16..f5ae7e497 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -46,16 +46,6 @@
 #define MIN(a,b)	((a) < (b) ? (a) : (b))
 #define p5		5
 
-/*
- * The unwind tables are supposed to be sorted, but the GNU toolchain
- * currently fails to produce a sorted table in the presence of
- * functions that go into sections other than .text.  For example, the
- * kernel likes to put initialization code into .text.init, which
- * messes up the sort order.  Hopefully, this will get fixed sometime
- * soon.  --davidm 00/05/23
- */
-#define UNWIND_TABLE_SORT_BUG
-
 #define UNW_LOG_CACHE_SIZE	7	/* each unw_script is ~256 bytes in size */
 #define UNW_CACHE_SIZE		(1 << UNW_LOG_CACHE_SIZE)
 
@@ -531,6 +521,10 @@ push (struct unw_state_record *sr)
 	struct unw_reg_state *rs;
 
 	rs = alloc_reg_state();
+	if (!rs) {
+		printk("unwind: cannot stack reg state!\n");
+		return;
+	}
 	memcpy(rs, &sr->curr, sizeof(*rs));
 	rs->next = sr->stack;
 	sr->stack = rs;
@@ -1964,23 +1958,6 @@ init_unwind_table (struct unw_table *table, const char *name, unsigned long segm
 {
 	struct unw_table_entry *start = table_start, *end = table_end;
 
-#ifdef UNWIND_TABLE_SORT_BUG
-	{
-		struct unw_table_entry *e1, *e2, tmp;
-
-		/* stupid bubble sort... */
-
-		for (e1 = start; e1 < end; ++e1) {
-			for (e2 = e1 + 1; e2 < end; ++e2) {
-				if (e2->start_offset < e1->start_offset) {
-					tmp = *e1;
-					*e1 = *e2;
-					*e2 = tmp;
-				}
-			}
-		}
-	}
-#endif
 	table->name = name;
 	table->segment_base = segment_base;
 	table->gp = gp;
@@ -2023,8 +2000,8 @@ unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned lon
 void
 unw_remove_unwind_table (void *handle)
 {
-	struct unw_table *table, *prevt;
-	struct unw_script *tmp, *prev;
+	struct unw_table *table, *prev;
+	struct unw_script *tmp;
 	unsigned long flags;
 	long index;
 
@@ -2043,41 +2020,35 @@ unw_remove_unwind_table (void *handle)
 	{
 		/* first, delete the table: */
 
-		for (prevt = (struct unw_table *) &unw.tables; prevt; prevt = prevt->next)
-			if (prevt->next == table)
+		for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next)
+			if (prev->next == table)
 				break;
-		if (!prevt) {
+		if (!prev) {
 			dprintk("unwind: failed to find unwind table %p\n", (void *) table);
 			spin_unlock_irqrestore(&unw.lock, flags);
 			return;
 		}
-		prevt->next = table->next;
+		prev->next = table->next;
+	}
+	spin_unlock_irqrestore(&unw.lock, flags);
 
-		/* next, remove hash table entries for this table */
+	/* next, remove hash table entries for this table */
 
-		for (index = 0; index <= UNW_HASH_SIZE; ++index) {
-			if (unw.hash[index] >= UNW_CACHE_SIZE)
-				continue;
+	for (index = 0; index <= UNW_HASH_SIZE; ++index) {
+		tmp = unw.cache + unw.hash[index];
+		if (unw.hash[index] >= UNW_CACHE_SIZE
+		    || tmp->ip < table->start || tmp->ip >= table->end)
+			continue;
 
-			tmp = unw.cache + unw.hash[index];
-			prev = 0;
-			while (1) {
-				write_lock(&tmp->lock);
-				{
-					if (tmp->ip >= table->start && tmp->ip < table->end) {
-						if (prev)
-							prev->coll_chain = tmp->coll_chain;
-						else
-							unw.hash[index] = -1;
-						tmp->ip = 0;
-					} else
-						prev = tmp;
-				}
-				write_unlock(&tmp->lock);
+		write_lock(&tmp->lock);
+		{
+			if (tmp->ip >= table->start && tmp->ip < table->end) {
+				unw.hash[index] = tmp->coll_chain;
+				tmp->ip = 0;
 			}
 		}
+		write_unlock(&tmp->lock);
 	}
-	spin_unlock_irqrestore(&unw.lock, flags);
 
 	kfree(table);
 }
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
index 90e697179..5759108f8 100644
--- a/arch/ia64/lib/Makefile
+++ b/arch/ia64/lib/Makefile
@@ -7,22 +7,23 @@
 
 L_TARGET = lib.a
 
-L_OBJS  = __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o					\
+obj-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o					\
 	__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o					\
 	checksum.o clear_page.o csum_partial_copy.o copy_page.o				\
 	copy_user.o clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o	\
-	flush.o do_csum.o
+	flush.o do_csum.o								\
+	swiotlb.o
 
 ifneq ($(CONFIG_ITANIUM_ASTEP_SPECIFIC),y)
-  L_OBJS += memcpy.o memset.o strlen.o
+  obj-y += memcpy.o memset.o strlen.o
 endif
 
-LX_OBJS = io.o
+export-objs += io.o
 
 IGNORE_FLAGS_OBJS =	__divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
 			__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o
 
-$(L_TARGET):
+$(L_TARGET): $(obj-y) $(export-objs)
 
 __divdi3.o: idiv64.S
 	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $@ $<
diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S
index e13febb22..cabbf6653 100644
--- a/arch/ia64/lib/copy_user.S
+++ b/arch/ia64/lib/copy_user.S
@@ -65,6 +65,12 @@
 //
 // local registers
 //
+#define t1		r2	// rshift in bytes
+#define t2		r3	// lshift in bytes
+#define rshift		r14	// right shift in bits
+#define lshift		r15	// left shift in bits
+#define word1		r16
+#define word2		r17
 #define cnt		r18
 #define len2		r19
 #define saved_lc	r20
@@ -134,6 +140,190 @@ GLOBAL_ENTRY(__copy_user)
 	br.ret.sptk.few rp	// end of short memcpy
 
 	//
+	// Not 8-byte aligned
+	//
+diff_align_copy_user:
+	// At this point we know we have more than 16 bytes to copy
+	// and also that src and dest do _not_ have the same alignment.
+	and src2=0x7,src1				// src offset
+	and dst2=0x7,dst1				// dst offset
+	;;
+	// The basic idea is that we copy byte-by-byte at the head so 
+	// that we can reach 8-byte alignment for both src1 and dst1. 
+	// Then copy the body using software pipelined 8-byte copy, 
+	// shifting the two back-to-back words right and left, then copy 
+	// the tail by copying byte-by-byte.
+	//
+	// Fault handling. If the byte-by-byte at the head fails on the
+	// load, then restart and finish the pipleline by copying zeros
+	// to the dst1. Then copy zeros for the rest of dst1.
+	// If 8-byte software pipeline fails on the load, do the same as
+	// failure_in3 does. If the byte-by-byte at the tail fails, it is
+	// handled simply by failure_in_pipe1.
+	//
+	// The case p14 represents the source has more bytes in the
+	// the first word (by the shifted part), whereas the p15 needs to 
+	// copy some bytes from the 2nd word of the source that has the 
+	// tail of the 1st of the destination.
+	//
+
+	//
+	// Optimization. If dst1 is 8-byte aligned (not rarely), we don't need 
+	// to copy the head to dst1, to start 8-byte copy software pipleline. 
+	// We know src1 is not 8-byte aligned in this case.
+	//
+	cmp.eq p14,p15=r0,dst2
+(p15)	br.cond.spnt.few 1f				
+	;;
+	sub t1=8,src2
+	mov t2=src2
+	;;
+	shl rshift=t2,3
+	sub len1=len,t1					// set len1
+	;;
+	sub lshift=64,rshift
+	;; 
+	br.cond.spnt.few word_copy_user
+	;; 
+1:			
+	cmp.leu	p14,p15=src2,dst2
+	sub t1=dst2,src2
+	;;
+	.pred.rel "mutex", p14, p15
+(p14)	sub word1=8,src2				// (8 - src offset)
+(p15)	sub t1=r0,t1					// absolute value
+(p15)	sub word1=8,dst2				// (8 - dst offset)
+	;;
+	// For the case p14, we don't need to copy the shifted part to
+	// the 1st word of destination.
+	sub t2=8,t1	
+(p14)	sub word1=word1,t1
+	;;
+	sub len1=len,word1				// resulting len
+(p15)	shl rshift=t1,3					// in bits
+(p14)	shl rshift=t2,3
+	;; 
+(p14)	sub len1=len1,t1
+	adds cnt=-1,word1
+	;; 
+	sub lshift=64,rshift
+	mov ar.ec=PIPE_DEPTH
+	mov pr.rot=1<<16	// p16=true all others are false
+	mov ar.lc=cnt
+	;; 
+2:	
+	EX(failure_in_pipe2,(p16) ld1 val1[0]=[src1],1)
+	;; 
+	EX(failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+	br.ctop.dptk.few 2b
+	;;
+	clrrrb	
+	;; 
+word_copy_user:		
+	cmp.gtu p9,p0=16,len1
+(p9)	br.cond.spnt.few 4f		// if (16 > len1) skip 8-byte copy
+	;;
+	shr.u cnt=len1,3		// number of 64-bit words
+	;;
+	adds cnt=-1,cnt
+	;;
+	.pred.rel "mutex", p14, p15	
+(p14)	sub src1=src1,t2
+(p15)	sub src1=src1,t1
+	//
+	// Now both src1 and dst1 point to an 8-byte aligned address. And
+	// we have more than 8 bytes to copy.
+	//
+	mov ar.lc=cnt
+	mov ar.ec=PIPE_DEPTH
+	mov pr.rot=1<<16	// p16=true all others are false
+	;; 
+3:
+	//
+	// The pipleline consists of 3 stages:	
+	// 1 (p16):	Load a word from src1
+	// 2 (EPI_1):	Shift right pair, saving to tmp
+	// 3 (EPI):	Store tmp to dst1
+	//
+	// To make it simple, use at least 2 (p16) loops to set up val1[n] 
+	// because we need 2 back-to-back val1[] to get tmp.
+	// Note that this implies EPI_2 must be p18 or greater.
+	// 
+
+#define EPI_1		p[PIPE_DEPTH-2]
+#define SWITCH(pred, shift)	cmp.eq pred,p0=shift,rshift
+#define CASE(pred, shift)	\
+	(pred)	br.cond.spnt.few copy_user_bit##shift	
+#define BODY(rshift)							\
+copy_user_bit##rshift:							\
+1:									\
+	EX(failure_out,(EPI) st8 [dst1]=tmp,8);				\
+(EPI_1) shrp tmp=val1[PIPE_DEPTH-3],val1[PIPE_DEPTH-2],rshift;		\
+	EX(failure_in2,(p16) ld8 val1[0]=[src1],8);			\
+	br.ctop.dptk.few 1b;						\
+	;;								\
+	br.cond.spnt.few .diff_align_do_tail
+
+	//
+	// Since the instruction 'shrp' requires a fixed 128-bit value
+	// specifying the bits to shift, we need to provide 7 cases
+	// below. 
+	//
+	SWITCH(p6, 8)
+	SWITCH(p7, 16)
+	SWITCH(p8, 24)	
+	SWITCH(p9, 32)
+	SWITCH(p10, 40)
+	SWITCH(p11, 48)
+	SWITCH(p12, 56)
+	;;
+	CASE(p6, 8)
+	CASE(p7, 16)
+	CASE(p8, 24)
+	CASE(p9, 32)
+	CASE(p10, 40)
+	CASE(p11, 48)
+	CASE(p12, 56)
+	;;
+	BODY(8)
+	BODY(16)
+	BODY(24)
+	BODY(32)
+	BODY(40)		
+	BODY(48)
+	BODY(56)
+	;; 
+.diff_align_do_tail:	
+	.pred.rel "mutex", p14, p15		
+(p14)	sub src1=src1,t1
+(p14)	adds dst1=-8,dst1			
+(p15)	sub dst1=dst1,t1
+	;; 
+4:	
+	// Tail correction.
+	//
+	// The problem with this piplelined loop is that the last word is not
+	// loaded and thus parf of the last word written is not correct. 
+	// To fix that, we simply copy the tail byte by byte.
+	
+	sub len1=endsrc,src1,1
+	clrrrb
+	;; 
+	mov ar.ec=PIPE_DEPTH
+	mov pr.rot=1<<16	// p16=true all others are false
+	mov ar.lc=len1
+	;;
+5:		
+	EX(failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
+	
+	EX(failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+	br.ctop.dptk.few 5b
+	;;
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.pfs=saved_pfs
+	br.ret.dptk.few rp
+	
+	//
 	// Beginning of long mempcy (i.e. > 16 bytes)
 	//
 long_copy_user:
@@ -142,7 +332,7 @@ long_copy_user:
 	;;
 	cmp.eq p10,p8=r0,tmp
 	mov len1=len		// copy because of rotation
-(p8)	br.cond.dpnt.few 1b	// XXX Fixme. memcpy_diff_align 
+(p8)	br.cond.dpnt.few diff_align_copy_user
 	;;
 	// At this point we know we have more than 16 bytes to copy
 	// and also that both src and dest have the same alignment
@@ -267,6 +457,21 @@ failure_in_pipe1:
 	mov ar.pfs=saved_pfs
 	br.ret.dptk.few rp
 
+	//
+	// This is the case where the byte by byte copy fails on the load
+	// when we copy the head. We need to finish the pipeline and copy 
+	// zeros for the rest of the destination. Since this happens
+	// at the top we still need to fill the body and tail.
+failure_in_pipe2:
+	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
+2:
+(p16)	mov val1[0]=r0
+(EPI)	st1 [dst1]=val1[PIPE_DEPTH-1],1
+	br.ctop.dptk.few 2b
+	;;
+	sub len=enddst,dst1,1		// precompute len
+	br.cond.dptk.few failure_in1bis
+	;; 
 
 	//
 	// Here we handle the head & tail part when we check for alignment.
@@ -395,6 +600,23 @@ failure_in3:
 	mov ar.pfs=saved_pfs
 	br.ret.dptk.few rp
 
+failure_in2:
+	sub ret0=endsrc,src1	// number of bytes to zero, i.e. not copied
+	;;
+3:
+(p16)	mov val1[0]=r0
+(EPI)	st8 [dst1]=val1[PIPE_DEPTH-1],8
+	br.ctop.dptk.few 3b
+	;;
+	cmp.ne p6,p0=dst1,enddst	// Do we need to finish the tail ?
+	sub len=enddst,dst1,1		// precompute len
+(p6)	br.cond.dptk.few failure_in1bis	
+	;;
+	mov pr=saved_pr,0xffffffffffff0000
+	mov ar.lc=saved_lc
+	mov ar.pfs=saved_pfs
+	br.ret.dptk.few rp
+	
 	//
 	// handling of failures on stores: that's the easy part
 	//
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
index ba9d59f84..9911b0184 100644
--- a/arch/ia64/lib/flush.S
+++ b/arch/ia64/lib/flush.S
@@ -12,29 +12,33 @@
 	.psr lsb
 	.lsb
 
-GLOBAL_ENTRY(ia64_flush_icache_page)
+	/*
+	 * flush_icache_range(start,end)
+	 *	Must flush range from start to end-1 but nothing else (need to
+	 *	be careful not to touch addresses that may be unmapped).
+	 */
+GLOBAL_ENTRY(flush_icache_range)
 	UNW(.prologue)
-	alloc r2=ar.pfs,1,0,0,0
+	alloc r2=ar.pfs,2,0,0,0
+	sub r8=in1,in0,1
+	;;
+	shr.u r8=r8,5			// we flush 32 bytes per iteration
 	UNW(.save ar.lc, r3)
 	mov r3=ar.lc			// save ar.lc	
+	;;
 
 	.body
 
-	mov r8=PAGE_SIZE/64-1		// repeat/until loop
-	;;
 	mov ar.lc=r8
-	add r8=32,in0
 	;;
-.Loop1:	fc in0				// issuable on M0 only
-	add in0=64,in0
-	fc r8
-	add r8=64,r8
-	br.cloop.sptk.few .Loop1
+.Loop:	fc in0				// issuable on M0 only
+	add in0=32,in0
+	br.cloop.sptk.few .Loop
 	;;
 	sync.i
 	;;
 	srlz.i
 	;;	
 	mov ar.lc=r3			// restore ar.lc
-	br.ret.sptk.few rp
-END(ia64_flush_icache_page)
+	br.ret.sptk.many rp
+END(flush_icache_range)
diff --git a/arch/ia64/lib/io.c b/arch/ia64/lib/io.c
index baa408e01..cbc662c93 100644
--- a/arch/ia64/lib/io.c
+++ b/arch/ia64/lib/io.c
@@ -1,3 +1,4 @@
+#include <linux/config.h>
 #include <linux/types.h>
 
 #include <asm/io.h>
@@ -48,3 +49,54 @@ __ia64_memset_c_io (unsigned long dst, unsigned long c, long count)
 	}
 }
 
+#ifdef CONFIG_IA64_GENERIC
+
+unsigned int
+ia64_inb (unsigned long port)
+{
+	return __ia64_inb(port);
+}
+
+unsigned int
+ia64_inw (unsigned long port)
+{
+	return __ia64_inw(port);
+}
+
+unsigned int
+ia64_inl (unsigned long port)
+{
+	return __ia64_inl(port);
+}
+
+void
+ia64_outb (unsigned char val, unsigned long port)
+{
+	__ia64_outb(val, port);
+}
+
+void
+ia64_outw (unsigned short val, unsigned long port)
+{
+	__ia64_outw(val, port);
+}
+
+void
+ia64_outl (unsigned int val, unsigned long port)
+{
+	__ia64_outl(val, port);
+}
+
+/* define aliases: */
+
+asm (".global __ia64_inb, __ia64_inw, __ia64_inl");
+asm ("__ia64_inb = ia64_inb");
+asm ("__ia64_inw = ia64_inw");
+asm ("__ia64_inl = ia64_inl");
+
+asm (".global __ia64_outb, __ia64_outw, __ia64_outl");
+asm ("__ia64_outb = ia64_outb");
+asm ("__ia64_outw = ia64_outw");
+asm ("__ia64_outl = ia64_outl");
+
+#endif /* CONFIG_IA64_GENERIC */
diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S
index 350e66256..151302c96 100644
--- a/arch/ia64/lib/memcpy.S
+++ b/arch/ia64/lib/memcpy.S
@@ -17,17 +17,31 @@
 
 #include <asm/asmmacro.h>
 
+#if defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)
+# define BRP(args...)	nop.b 0
+#else
+# define BRP(args...)	brp.loop.imp args
+#endif
+
 GLOBAL_ENTRY(bcopy)
 	.regstk 3,0,0,0
 	mov r8=in0
 	mov in0=in1
 	;;
 	mov in1=r8
+	// gas doesn't handle control flow across procedures, so it doesn't
+	// realize that a stop bit is needed before the "alloc" instruction
+	// below
+{
+	nop.m 0
+	nop.f 0
+	nop.i 0
+}	;;
 END(bcopy)
 	// FALL THROUGH
 GLOBAL_ENTRY(memcpy)
 
-#	define MEM_LAT	2		/* latency to L1 cache */
+#	define MEM_LAT	21		/* latency to memory */
 
 #	define dst	r2
 #	define src	r3
@@ -57,20 +71,17 @@ GLOBAL_ENTRY(memcpy)
 	UNW(.prologue)
 	UNW(.save ar.pfs, saved_pfs)
 	alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot
-#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
-	lfetch [in1]
-#else
-	nop.m 0
-#endif
+	UNW(.save ar.lc, saved_lc)
+	mov saved_lc=ar.lc
 	or t0=in0,in1
 	;;
 
 	or t0=t0,in2
-	UNW(.save ar.lc, saved_lc)
-	mov saved_lc=ar.lc
 	UNW(.save pr, saved_pr)
 	mov saved_pr=pr
 
+	UNW(.body)
+
 	cmp.eq p6,p0=in2,r0	// zero length?
 	mov retval=in0		// return dst
 (p6)	br.ret.spnt.many rp	// zero length, return immediately
@@ -83,7 +94,6 @@ GLOBAL_ENTRY(memcpy)
 
 	adds cnt=-1,cnt		// br.ctop is repeat/until
 	cmp.gtu p7,p0=16,in2	// copying less than 16 bytes?
-	UNW(.body)
 	mov ar.ec=N
 	;;
 
@@ -96,12 +106,26 @@ GLOBAL_ENTRY(memcpy)
 (p7)	br.cond.spnt.few memcpy_short
 (p6)	br.cond.spnt.few memcpy_long
 	;;
+	nop.m	0
+	;;
+	nop.m	0
+	nop.i	0
+	;;
+	nop.m	0
+	;;
 	.rotr val[N]
 	.rotp p[N]
-1:
+	.align 32
+1: { .mib
 (p[0])	ld8 val[0]=[src],8
+	nop.i 0
+	BRP(1b, 2f)
+}
+2: { .mfb
 (p[N-1])st8 [dst]=val[N-1],8
+	nop.f 0
 	br.ctop.dptk.few 1b
+}
 	;;
 	mov ar.lc=saved_lc
 	mov pr=saved_pr,-1
@@ -118,19 +142,34 @@ GLOBAL_ENTRY(memcpy)
 memcpy_short:
 	adds cnt=-1,in2		// br.ctop is repeat/until
 	mov ar.ec=MEM_LAT
+	BRP(1f, 2f)
 	;;
 	mov ar.lc=cnt
 	;;
+	nop.m	0			
+	;;
+	nop.m	0
+	nop.i	0
+	;;
+	nop.m	0
+	;;
+	nop.m	0
+	;;
 	/*
 	 * It is faster to put a stop bit in the loop here because it makes
 	 * the pipeline shorter (and latency is what matters on short copies).
 	 */
-1:
+	.align 32
+1: { .mib
 (p[0])	ld1 val[0]=[src],1
-	;;
+	nop.i 0
+	BRP(1b, 2f)
+} ;;
+2: { .mfb
 (p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1
+	nop.f 0
 	br.ctop.dptk.few 1b
-	;;
+} ;;
 	mov ar.lc=saved_lc
 	mov pr=saved_pr,-1
 	mov ar.pfs=saved_pfs
@@ -227,6 +266,13 @@ memcpy_long:
 	mov pr=cnt,0x38			// set (p5,p4,p3) to # of bytes last-word bytes to copy
 	mov ar.lc=t2
 	;;
+	nop.m	0			
+	;;
+	nop.m	0
+	nop.i	0
+	;;
+	nop.m	0
+	;;
 (p6)	ld8 val[1]=[src2],8		// prime the pump...
 	mov b6=t4
 	br.sptk.few b6
@@ -251,17 +297,16 @@ memcpy_tail:
 	.align 64
 
 #define COPY(shift,index)									\
- 1:												\
-  { .mfi											\
+ 1: { .mib											\
 	(p[0])		ld8 val[0]=[src2],8;							\
-			nop.f 0;								\
 	(p[MEM_LAT+3])	shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift;			\
-  };												\
-  { .mbb											\
+			BRP(1b, 2f)								\
+    };												\
+ 2: { .mfb											\
 	(p[MEM_LAT+4])	st8 [dst]=w[1],8;							\
-			nop.b 0;								\
+			nop.f 0;								\
 			br.ctop.dptk.few 1b;							\
-  };												\
+    };												\
 			;;									\
 			ld8 val[N-1]=[src_end];	/* load last word (may be same as val[N]) */	\
 			;;									\
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/lib/swiotlb.c
index 6293cdfa0..534729ccd 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/lib/swiotlb.c
@@ -5,10 +5,11 @@
  * I/O TLBs (aka DMA address translation hardware).
  * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
  * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
+ *
+ * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
+ *			unnecessary i-cache flushing.
  */
 
-#include <linux/config.h>
-
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
@@ -19,22 +20,20 @@
 #include <asm/pci.h>
 #include <asm/dma.h>
 
-#ifdef CONFIG_SWIOTLB
-
 #include <linux/init.h>
 #include <linux/bootmem.h>
 
-#define ALIGN(val, align) ((unsigned long) (((unsigned long) (val) + ((align) - 1)) & ~((align) - 1)))
+#define ALIGN(val, align) ((unsigned long)	\
+	(((unsigned long) (val) + ((align) - 1)) & ~((align) - 1)))
 
 /*
- * log of the size of each IO TLB slab.  The number of slabs is command line
- * controllable.
+ * log of the size of each IO TLB slab.  The number of slabs is command line controllable.
  */
 #define IO_TLB_SHIFT 11
 
 /*
- * Used to do a quick range check in pci_unmap_single and pci_sync_single, to see if the 
- * memory was in fact allocated by this API.
+ * Used to do a quick range check in swiotlb_unmap_single and swiotlb_sync_single, to see
+ * if the memory was in fact allocated by this API.
  */
 static char *io_tlb_start, *io_tlb_end;
 
@@ -42,7 +41,7 @@ static char *io_tlb_start, *io_tlb_end;
  * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and io_tlb_end.
  * This is command line adjustable via setup_io_tlb_npages.
  */
-unsigned long io_tlb_nslabs = 1024;
+static unsigned long io_tlb_nslabs = 1024;
 
 /*
  * This is a free list describing the number of free entries available from each index
@@ -59,7 +58,7 @@ static unsigned char **io_tlb_orig_addr;
 /*
  * Protect the above data structures in the map and unmap calls
  */ 
-spinlock_t io_tlb_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t io_tlb_lock = SPIN_LOCK_UNLOCKED;
 
 static int __init
 setup_io_tlb_npages (char *str)
@@ -70,11 +69,11 @@ setup_io_tlb_npages (char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 
 /*
- * Statically reserve bounce buffer space and initialize bounce buffer
- * data structures for the software IO TLB used to implement the PCI DMA API
+ * Statically reserve bounce buffer space and initialize bounce buffer data structures for
+ * the software IO TLB used to implement the PCI DMA API.
  */
 void
-setup_swiotlb (void)
+swiotlb_init (void)
 {
 	int i;
 
@@ -105,11 +104,12 @@ setup_swiotlb (void)
  * Allocates bounce buffer and returns its kernel virtual address.
  */
 static void *
-__pci_map_single (struct pci_dev *hwdev, char *buffer, size_t size, int direction)
+map_single (struct pci_dev *hwdev, char *buffer, size_t size, int direction)
 {
 	unsigned long flags;
 	char *dma_addr;
-	unsigned int i, nslots, stride, index, wrap;
+	unsigned int nslots, stride, index, wrap;
+	int i;
 
 	/*
 	 * For mappings greater than a page size, we limit the stride (and hence alignment)
@@ -125,27 +125,36 @@ __pci_map_single (struct pci_dev *hwdev, char *buffer, size_t size, int directio
 		BUG();
 
 	/*
-	 * Find suitable number of IO TLB entries size that will fit this request and allocate a buffer
-	 * from that IO TLB pool.
+	 * Find suitable number of IO TLB entries size that will fit this request and
+	 * allocate a buffer from that IO TLB pool.
 	 */
 	spin_lock_irqsave(&io_tlb_lock, flags);
 	{
 		wrap = index = ALIGN(io_tlb_index, stride);
+
+		if (index >= io_tlb_nslabs) 
+			wrap = index = 0;
+
 		do {
 			/*
-			 * If we find a slot that indicates we have 'nslots' number of 
-			 * contiguous buffers, we allocate the buffers from that slot and mark the
-			 * entries as '0' indicating unavailable.
+			 * If we find a slot that indicates we have 'nslots' number of
+			 * contiguous buffers, we allocate the buffers from that slot and
+			 * mark the entries as '0' indicating unavailable.
 			 */
 			if (io_tlb_list[index] >= nslots) {
+				int count = 0;
+
 				for (i = index; i < index + nslots; i++)
 					io_tlb_list[i] = 0;
+				for (i = index - 1; (i >= 0) && io_tlb_list[i]; i--)
+					io_tlb_list[i] = ++count;
 				dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
 
 				/*
 				 * Update the indices to avoid searching in the next round.
 				 */
-				io_tlb_index = (index + nslots) < io_tlb_nslabs ? (index + nslots) : 0;
+				io_tlb_index = ((index + nslots) < io_tlb_nslabs
+						? (index + nslots) : 0);
 
 				goto found;
 			}
@@ -158,14 +167,14 @@ __pci_map_single (struct pci_dev *hwdev, char *buffer, size_t size, int directio
 		 * XXX What is a suitable recovery mechanism here?  We cannot 
 		 * sleep because we are called from with in interrupts!
 		 */
-		panic("__pci_map_single: could not allocate software IO TLB (%ld bytes)", size);
+		panic("map_single: could not allocate software IO TLB (%ld bytes)", size);
 found:
 	}
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 
 	/*
-	 * Save away the mapping from the original address to the DMA address.  This is needed
-	 * when we sync the memory.  Then we sync the buffer if needed.
+	 * Save away the mapping from the original address to the DMA address.  This is
+	 * needed when we sync the memory.  Then we sync the buffer if needed.
 	 */
 	io_tlb_orig_addr[index] = buffer;
 	if (direction == PCI_DMA_TODEVICE || direction == PCI_DMA_BIDIRECTIONAL)
@@ -178,7 +187,7 @@ found:
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
 static void
-__pci_unmap_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
+unmap_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
 {
 	unsigned long flags;
 	int i, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -190,36 +199,38 @@ __pci_unmap_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int dire
 	 */
 	if ((direction == PCI_DMA_FROMDEVICE) || (direction == PCI_DMA_BIDIRECTIONAL))
 		/*
- 	 	 * bounce... copy the data back into the original buffer
-	  	 * and delete the bounce buffer.
+ 	 	 * bounce... copy the data back into the original buffer * and delete the
+ 	 	 * bounce buffer.
  	 	 */
 		memcpy(buffer, dma_addr, size);
 
 	/*
-	 * Return the buffer to the free list by setting the corresponding entries to indicate
-	 * the number of contigous entries available.  
-	 * While returning the entries to the free list, we merge the entries with slots below
-	 * and above the pool being returned.
+	 * Return the buffer to the free list by setting the corresponding entries to
+	 * indicate the number of contigous entries available.  While returning the
+	 * entries to the free list, we merge the entries with slots below and above the
+	 * pool being returned.
 	 */
 	spin_lock_irqsave(&io_tlb_lock, flags);
 	{
 		int count = ((index + nslots) < io_tlb_nslabs ? io_tlb_list[index + nslots] : 0);
 		/*
-		 * Step 1: return the slots to the free list, merging the slots with superceeding slots
+		 * Step 1: return the slots to the free list, merging the slots with
+		 * superceeding slots
 		 */
 		for (i = index + nslots - 1; i >= index; i--)
 			io_tlb_list[i] = ++count;
 		/*
-		 * Step 2: merge the returned slots with the preceeding slots, if available (non zero)
+		 * Step 2: merge the returned slots with the preceeding slots, if
+		 * available (non zero)
 		 */
 		for (i = index - 1; (i >= 0) && io_tlb_list[i]; i--)
-			io_tlb_list[i] += io_tlb_list[index];
+			io_tlb_list[i] = ++count;
 	}
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
 
 static void
-__pci_sync_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
+sync_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
 {
 	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
 	char *buffer = io_tlb_orig_addr[index];
@@ -236,15 +247,42 @@ __pci_sync_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direc
 		BUG();
 }
 
+void *
+swiotlb_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
+{
+	unsigned long pci_addr;
+	int gfp = GFP_ATOMIC;
+	void *ret;
+
+	if (!hwdev || hwdev->dma_mask <= 0xffffffff)
+		gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */
+	ret = (void *)__get_free_pages(gfp, get_order(size));
+	if (!ret)
+		return NULL;
+
+	memset(ret, 0, size);
+	pci_addr = virt_to_phys(ret);
+	if ((pci_addr & ~hwdev->dma_mask) != 0)
+		panic("swiotlb_alloc_consistent: allocated memory is out of range for PCI device");
+	*dma_handle = pci_addr;
+	return ret;
+}
+
+void
+swiotlb_free_consistent (struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+	free_pages((unsigned long) vaddr, get_order(size));
+}
+
 /*
- * Map a single buffer of the indicated size for DMA in streaming mode.
- * The PCI address to use is returned.
+ * Map a single buffer of the indicated size for DMA in streaming mode.  The PCI address
+ * to use is returned.
  *
- * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
+ * Once the device is given the dma address, the device owns this memory until either
+ * swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
  */
 dma_addr_t
-pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+swiotlb_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
 {
 	unsigned long pci_addr = virt_to_phys(ptr);
 
@@ -255,71 +293,96 @@ pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
 	 */
 	if ((pci_addr & ~hwdev->dma_mask) == 0)
 		/*
-		 * Device is bit capable of DMA'ing to the
-		 * buffer... just return the PCI address of ptr
+		 * Device is bit capable of DMA'ing to the buffer... just return the PCI
+		 * address of ptr
 		 */
 		return pci_addr;
 
 	/* 
 	 * get a bounce buffer: 
 	 */
-	pci_addr = virt_to_phys(__pci_map_single(hwdev, ptr, size, direction));
+	pci_addr = virt_to_phys(map_single(hwdev, ptr, size, direction));
 
 	/*
 	 * Ensure that the address returned is DMA'ble:
 	 */
 	if ((pci_addr & ~hwdev->dma_mask) != 0)
-		panic("__pci_map_single: bounce buffer is not DMA'ble");
+		panic("map_single: bounce buffer is not DMA'ble");
 
 	return pci_addr;
 }
 
 /*
- * Unmap a single streaming mode DMA translation.  The dma_addr and size
- * must match what was provided for in a previous pci_map_single call.  All
- * other usages are undefined.
+ * Since DMA is i-cache coherent, any (complete) pages that were written via
+ * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to
+ * flush them when they get mapped into an executable vm-area.
+ */
+static void
+mark_clean (void *addr, size_t size)
+{
+	unsigned long pg_addr, end;
+
+	pg_addr = PAGE_ALIGN((unsigned long) addr);
+	end = (unsigned long) addr + size;
+	while (pg_addr + PAGE_SIZE <= end) {
+#if 0
+		set_bit(PG_arch_1, virt_to_page(pg_addr));
+#else
+		if (!VALID_PAGE(virt_to_page(pg_addr)))
+			printk("Invalid addr %lx!!!\n", pg_addr);
+#endif
+		pg_addr += PAGE_SIZE;
+	}
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The dma_addr and size must match what
+ * was provided for in a previous swiotlb_map_single call.  All other usages are
+ * undefined.
  *
- * After this call, reads by the cpu to the buffer are guarenteed to see
- * whatever the device wrote there.
+ * After this call, reads by the cpu to the buffer are guarenteed to see whatever the
+ * device wrote there.
  */
 void
-pci_unmap_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
+swiotlb_unmap_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
 {
 	char *dma_addr = phys_to_virt(pci_addr);
 
 	if (direction == PCI_DMA_NONE)
 		BUG();
 	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
-		__pci_unmap_single(hwdev, dma_addr, size, direction);
+		unmap_single(hwdev, dma_addr, size, direction);
+	else if (direction == PCI_DMA_FROMDEVICE)
+		mark_clean(dma_addr, size);
 }
 
 /*
- * Make physical memory consistent for a single
- * streaming mode DMA translation after a transfer.
+ * Make physical memory consistent for a single streaming mode DMA translation after a
+ * transfer.
  *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, the
- * device again owns the buffer.
+ * If you perform a swiotlb_map_single() but wish to interrogate the buffer using the cpu,
+ * yet do not wish to teardown the PCI dma mapping, you must call this function before
+ * doing so.  At the next point you give the PCI dma address back to the card, the device
+ * again owns the buffer.
  */
 void
-pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
+swiotlb_sync_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
 {
 	char *dma_addr = phys_to_virt(pci_addr);
 
 	if (direction == PCI_DMA_NONE)
 		BUG();
 	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
-		__pci_sync_single(hwdev, dma_addr, size, direction);
+		sync_single(hwdev, dma_addr, size, direction);
+	else if (direction == PCI_DMA_FROMDEVICE)
+		mark_clean(dma_addr, size);
 }
 
 /*
- * Map a set of buffers described by scatterlist in streaming
- * mode for DMA.  This is the scather-gather version of the
- * above pci_map_single interface.  Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length.  They are obtained via sg_dma_{address,length}(SG).
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.  This is the
+ * scather-gather version of the above swiotlb_map_single interface.  Here the scatter
+ * gather list elements are each tagged with the appropriate dma address and length.  They
+ * are obtained via sg_dma_{address,length}(SG).
  *
  * NOTE: An implementation may be able to use a smaller number of
  *       DMA address/length pairs than there are SG table elements.
@@ -327,11 +390,10 @@ pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, in
  *       The routine returns the number of addr/length pairs actually
  *       used, at most nents.
  *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
+ * Device ownership issues as mentioned above for swiotlb_map_single are the same here.
  */
 int
-pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+swiotlb_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
 {
 	int i;
 
@@ -341,19 +403,18 @@ pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direc
 	for (i = 0; i < nelems; i++, sg++) {
 		sg->orig_address = sg->address;
 		if ((virt_to_phys(sg->address) & ~hwdev->dma_mask) != 0) {
-			sg->address = __pci_map_single(hwdev, sg->address, sg->length, direction);
+			sg->address = map_single(hwdev, sg->address, sg->length, direction);
 		}
 	}
 	return nelems;
 }
 
 /*
- * Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
+ * Unmap a set of streaming mode DMA translations.  Again, cpu read rules concerning calls
+ * here are the same as for swiotlb_unmap_single() above.
  */
 void
-pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+swiotlb_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
 {
 	int i;
 
@@ -362,20 +423,21 @@ pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int dir
 
 	for (i = 0; i < nelems; i++, sg++)
 		if (sg->orig_address != sg->address) {
-			__pci_unmap_single(hwdev, sg->address, sg->length, direction);
+			unmap_single(hwdev, sg->address, sg->length, direction);
 			sg->address = sg->orig_address;
-		}
+		} else if (direction == PCI_DMA_FROMDEVICE)
+			mark_clean(sg->address, sg->length);
 }
 
 /*
- * Make physical memory consistent for a set of streaming mode DMA
- * translations after a transfer.
+ * Make physical memory consistent for a set of streaming mode DMA translations after a
+ * transfer.
  *
- * The same as pci_dma_sync_single but for a scatter-gather list,
- * same rules and usage.
+ * The same as swiotlb_dma_sync_single but for a scatter-gather list, same rules and
+ * usage.
  */
 void
-pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+swiotlb_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
 {
 	int i;
 
@@ -384,134 +446,11 @@ pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int
 
 	for (i = 0; i < nelems; i++, sg++)
 		if (sg->orig_address != sg->address)
-			__pci_sync_single(hwdev, sg->address, sg->length, direction);
-}
-
-#else
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode.
- * The 32-bit bus address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
- */
-dma_addr_t
-pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
-{
-        if (direction == PCI_DMA_NONE)
-                BUG();
-        return virt_to_bus(ptr);
-}
-
-/*
- * Unmap a single streaming mode DMA translation.  The dma_addr and size
- * must match what was provided for in a previous pci_map_single call.  All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guarenteed to see
- * whatever the device wrote there.
- */
-void
-pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
-{
-        if (direction == PCI_DMA_NONE)
-                BUG();
-        /* Nothing to do */
-}
-/*
- * Map a set of buffers described by scatterlist in streaming
- * mode for DMA.  This is the scather-gather version of the
- * above pci_map_single interface.  Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length.  They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-int
-pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
-{
-        if (direction == PCI_DMA_NONE)
-                BUG();
-        return nents;
+			sync_single(hwdev, sg->address, sg->length, direction);
 }
 
-/*
- * Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-void
-pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
-{
-        if (direction == PCI_DMA_NONE)
-                BUG();
-        /* Nothing to do */
-}
-/*
- * Make physical memory consistent for a single
- * streaming mode DMA translation after a transfer.
- *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, the
- * device again owns the buffer.
- */
-void
-pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
+unsigned long
+swiotlb_dma_address (struct scatterlist *sg)
 {
-        if (direction == PCI_DMA_NONE)
-                BUG();
-        /* Nothing to do */
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA
- * translations after a transfer.
- *
- * The same as pci_dma_sync_single but for a scatter-gather list,
- * same rules and usage.
- */
-void
-pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
-{
-        if (direction == PCI_DMA_NONE)
-                BUG();
-        /* Nothing to do */
-}
-
-#endif /* CONFIG_SWIOTLB */
-
-void *
-pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
-{
-	unsigned long pci_addr;
-	int gfp = GFP_ATOMIC;
-	void *ret;
-
-	if (!hwdev || hwdev->dma_mask <= 0xffffffff)
-		gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */
-	ret = (void *)__get_free_pages(gfp, get_order(size));
-	if (!ret)
-		return NULL;
-
-	memset(ret, 0, size);
-	pci_addr = virt_to_phys(ret);
-	if ((pci_addr & ~hwdev->dma_mask) != 0)
-		panic("pci_alloc_consistent: allocated memory is out of range for PCI device");
-	*dma_handle = pci_addr;
-	return ret;
-}
-
-void
-pci_free_consistent (struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
-{
-	free_pages((unsigned long) vaddr, get_order(size));
+	return virt_to_phys(sg->address);
 }
diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile
index ab2b95cf9..02dee5d57 100644
--- a/arch/ia64/mm/Makefile
+++ b/arch/ia64/mm/Makefile
@@ -8,7 +8,7 @@
 # Note 2! The CFLAGS definition is now in the main makefile...
 
 O_TARGET := mm.o
-#O_OBJS	 := ioremap.o
-O_OBJS	 := init.o fault.o tlb.o extable.o
+
+obj-y	 := init.o fault.o tlb.o extable.o
 
 include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index eaac24372..1a2438917 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -94,7 +94,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	 * sure we exit gracefully rather than endlessly redo the
 	 * fault.
 	 */
-	switch (handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0)) {
+	switch (handle_mm_fault(mm, vma, address, mask) != 0) {
 	      case 1:
 		++current->min_flt;
 		break;
@@ -119,19 +119,27 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
 		if (!(vma->vm_flags & VM_GROWSDOWN))
 			goto bad_area;
+		if (rgn_index(address) != rgn_index(vma->vm_start)
+		    || rgn_offset(address) >= RGN_MAP_LIMIT)
+			goto bad_area;
 		if (expand_stack(vma, address))
 			goto bad_area;
-	} else if (expand_backing_store(prev_vma, address))
-		goto bad_area;
+	} else {
+		vma = prev_vma;
+		if (rgn_index(address) != rgn_index(vma->vm_start)
+		    || rgn_offset(address) >= RGN_MAP_LIMIT)
+			goto bad_area;
+		if (expand_backing_store(vma, address))
+			goto bad_area;
+	}
 	goto good_area;
 
   bad_area:
 	up(&mm->mmap_sem);
 	if (isr & IA64_ISR_SP) {
 		/*
-		 * This fault was due to a speculative load set the
-		 * "ed" bit in the psr to ensure forward progress
-		 * (target register will get a NaT).
+		 * This fault was due to a speculative load set the "ed" bit in the psr to
+		 * ensure forward progress (target register will get a NaT).
 		 */
 		ia64_psr(regs)->ed = 1;
 		return;
@@ -146,6 +154,15 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	}
 
   no_context:
+	if (isr & IA64_ISR_SP) {
+		/*
+		 * This fault was due to a speculative load set the "ed" bit in the psr to
+		 * ensure forward progress (target register will get a NaT).
+		 */
+		ia64_psr(regs)->ed = 1;
+		return;
+	}
+
 	fix = search_exception_table(regs->cr_iip);
 	if (fix) {
 		regs->r8 = -EFAULT;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index bfbb2050e..7615b389f 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -1,8 +1,8 @@
 /*
  * Initialize MMU support.
  *
- * Copyright (C) 1998, 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 #include <linux/config.h>
 #include <linux/kernel.h>
@@ -19,6 +19,7 @@
 #include <asm/efi.h>
 #include <asm/ia32.h>
 #include <asm/io.h>
+#include <asm/machvec.h>
 #include <asm/pgalloc.h>
 #include <asm/sal.h>
 #include <asm/system.h>
@@ -303,7 +304,7 @@ put_gate_page (struct page *page, unsigned long address)
 		return 0;
 	}
 	flush_page_to_ram(page);
-	set_pte(pte, page_pte_prot(page, PAGE_GATE));
+	set_pte(pte, mk_pte(page, PAGE_GATE));
 	/* no need for flush_tlb */
 	return page;
 }
@@ -311,7 +312,12 @@ put_gate_page (struct page *page, unsigned long address)
 void __init
 ia64_rid_init (void)
 {
-	unsigned long flags, rid, pta, impl_va_msb;
+	unsigned long flags, rid, pta, impl_va_bits;
+#ifdef CONFIG_DISABLE_VHPT
+#	define VHPT_ENABLE_BIT	0
+#else
+#	define VHPT_ENABLE_BIT	1
+#endif
 
 	/* Set up the kernel identity mappings (regions 6 & 7) and the vmalloc area (region 5): */
 	ia64_clear_ic(flags);
@@ -328,44 +334,46 @@ ia64_rid_init (void)
 	__restore_flags(flags);
 
 	/*
-	 * Check if the virtually mapped linear page table (VMLPT)
-	 * overlaps with a mapped address space.  The IA-64
-	 * architecture guarantees that at least 50 bits of virtual
-	 * address space are implemented but if we pick a large enough
-	 * page size (e.g., 64KB), the VMLPT is big enough that it
-	 * will overlap with the upper half of the kernel mapped
-	 * region.  I assume that once we run on machines big enough
-	 * to warrant 64KB pages, IMPL_VA_MSB will be significantly
-	 * bigger, so we can just adjust the number below to get
-	 * things going.  Alternatively, we could truncate the upper
-	 * half of each regions address space to not permit mappings
-	 * that would overlap with the VMLPT.  --davidm 99/11/13
+	 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
+	 * address space.  The IA-64 architecture guarantees that at least 50 bits of
+	 * virtual address space are implemented but if we pick a large enough page size
+	 * (e.g., 64KB), the mapped address space is big enough that it will overlap with
+	 * VMLPT.  I assume that once we run on machines big enough to warrant 64KB pages,
+	 * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a
+	 * problem in practice.  Alternatively, we could truncate the top of the mapped
+	 * address space to not permit mappings that would overlap with the VMLPT.
+	 * --davidm 00/12/06
+	 */
+#	define pte_bits			3
+#	define mapped_space_bits	(3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
+	/*
+	 * The virtual page table has to cover the entire implemented address space within
+	 * a region even though not all of this space may be mappable.  The reason for
+	 * this is that the Access bit and Dirty bit fault handlers perform
+	 * non-speculative accesses to the virtual page table, so the address range of the
+	 * virtual page table itself needs to be covered by virtual page table.
 	 */
-#	define ld_pte_size		3
-#	define ld_max_addr_space_pages	3*(PAGE_SHIFT - ld_pte_size) /* max # of mappable pages */
-#	define ld_max_addr_space_size	(ld_max_addr_space_pages + PAGE_SHIFT)
-#	define ld_max_vpt_size		(ld_max_addr_space_pages + ld_pte_size)
+#	define vmlpt_bits		(impl_va_bits - PAGE_SHIFT + pte_bits)
 #	define POW2(n)			(1ULL << (n))
-	impl_va_msb = ffz(~my_cpu_data.unimpl_va_mask) - 1;
 
-	if (impl_va_msb < 50 || impl_va_msb > 60)
-		panic("Bogus impl_va_msb value of %lu!\n", impl_va_msb);
+	impl_va_bits = ffz(~my_cpu_data.unimpl_va_mask);
+
+	if (impl_va_bits < 51 || impl_va_bits > 61)
+		panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
+
+	/* place the VMLPT at the end of each page-table mapped region: */
+	pta = POW2(61) - POW2(vmlpt_bits);
 
-	if (POW2(ld_max_addr_space_size - 1) + POW2(ld_max_vpt_size) > POW2(impl_va_msb))
+	if (POW2(mapped_space_bits) >= pta)
 		panic("mm/init: overlap between virtually mapped linear page table and "
 		      "mapped kernel space!");
-	pta = POW2(61) - POW2(impl_va_msb);
-#ifndef CONFIG_DISABLE_VHPT
 	/*
 	 * Set the (virtually mapped linear) page table address.  Bit
 	 * 8 selects between the short and long format, bits 2-7 the
 	 * size of the table, and bit 0 whether the VHPT walker is
 	 * enabled.
 	 */
-	ia64_set_pta(pta | (0<<8) | ((3*(PAGE_SHIFT-3)+3)<<2) | 1);
-#else
-	ia64_set_pta(pta | (0<<8) | ((3*(PAGE_SHIFT-3)+3)<<2) | 0);
-#endif
+	ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
 }
 
 /*
@@ -421,6 +429,15 @@ mem_init (void)
 	extern char __start_gate_section[];
 	long reserved_pages, codesize, datasize, initsize;
 
+#ifdef CONFIG_PCI
+	/*
+	 * This needs to be called _after_ the command line has been parsed but _before_
+	 * any drivers that may need the PCI DMA interface are initialized or bootmem has
+	 * been freed.
+	 */
+	platform_pci_dma_init();
+#endif
+
 	if (!mem_map)
 		BUG();
 
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 875ce446c..f880c73ee 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -6,6 +6,8 @@
  *
  * 08/02/00 A. Mallick <asit.k.mallick@intel.com>	
  *		Modified RID allocation for SMP 
+ *          Goutham Rao <goutham.rao@intel.com>
+ *              IPI based ptc implementation and A-step IPI implementation.
  */
 #include <linux/config.h>
 #include <linux/init.h>
@@ -17,6 +19,7 @@
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 #include <asm/pal.h>
+#include <asm/delay.h>
 
 #define SUPPORTED_PGBITS (			\
 		1 << _PAGE_SIZE_256M |		\
@@ -33,15 +36,10 @@
 struct ia64_ctx ia64_ctx = {
 	lock:	SPIN_LOCK_UNLOCKED,
 	next:	1,
-	limit:	(1UL << IA64_HW_CONTEXT_BITS)
+	limit:	(1 << 15) - 1,		/* start out with the safe (architected) limit */
+	max_ctx: ~0U
 };
 
- /*
-  * Put everything in a struct so we avoid the global offset table whenever
-  * possible.
-  */
-ia64_ptce_info_t ia64_ptce_info;
-
 /*
  * Seralize usage of ptc.g 
  */
@@ -99,9 +97,22 @@ flush_tlb_no_ptcg (unsigned long start, unsigned long end, unsigned long nbits)
 	/*
 	 * Wait for other CPUs to finish purging entries.
 	 */
+#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
+	{
+		unsigned long start = ia64_get_itc();
+		while (atomic_read(&flush_cpu_count) > 0) {
+			if ((ia64_get_itc() - start) > 40000UL) {
+				atomic_set(&flush_cpu_count, smp_num_cpus - 1);
+				smp_send_flush_tlb();
+				start = ia64_get_itc();
+			}
+		}
+	}
+#else
 	while (atomic_read(&flush_cpu_count)) {
 		/* Nothing */
 	}
+#endif
 	if (!(flags & IA64_PSR_I)) {
 		local_irq_disable();
 		ia64_set_tpr(saved_tpr);
@@ -117,12 +128,12 @@ flush_tlb_no_ptcg (unsigned long start, unsigned long end, unsigned long nbits)
 void
 wrap_mmu_context (struct mm_struct *mm)
 {
+	unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
 	struct task_struct *tsk;
-	unsigned long tsk_context;
 
-	if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS)) 
+	if (ia64_ctx.next > max_ctx)
 		ia64_ctx.next = 300;	/* skip daemons */
-	ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS);
+	ia64_ctx.limit = max_ctx + 1;
 
 	/*
 	 * Scan all the task's mm->context and set proper safe range
@@ -137,9 +148,9 @@ wrap_mmu_context (struct mm_struct *mm)
 		if (tsk_context == ia64_ctx.next) {
 			if (++ia64_ctx.next >= ia64_ctx.limit) {
 				/* empty range: reset the range limit and start over */
-				if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS)) 
+				if (ia64_ctx.next > max_ctx) 
 					ia64_ctx.next = 300;
-				ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS);
+				ia64_ctx.limit = max_ctx + 1;
 				goto repeat;
 			}
 		}
@@ -153,12 +164,13 @@ wrap_mmu_context (struct mm_struct *mm)
 void
 __flush_tlb_all (void)
 {
-	unsigned long i, j, flags, count0, count1, stride0, stride1, addr = ia64_ptce_info.base;
+	unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
 
-	count0  = ia64_ptce_info.count[0];
-	count1  = ia64_ptce_info.count[1];
-	stride0 = ia64_ptce_info.stride[0];
-	stride1 = ia64_ptce_info.stride[1];
+	addr    = my_cpu_data.ptce_base;
+	count0  = my_cpu_data.ptce_count[0];
+	count1  = my_cpu_data.ptce_count[1];
+	stride0 = my_cpu_data.ptce_stride[0];
+	stride1 = my_cpu_data.ptce_stride[1];
 
 	local_irq_save(flags);
 	for (i = 0; i < count0; ++i) {
@@ -182,7 +194,11 @@ flush_tlb_range (struct mm_struct *mm, unsigned long start, unsigned long end)
 
 	if (mm != current->active_mm) {
 		/* this does happen, but perhaps it's not worth optimizing for? */
+#ifdef CONFIG_SMP
+		flush_tlb_all();
+#else
 		mm->context = 0;
+#endif
 		return;
 	}
 
@@ -230,6 +246,14 @@ flush_tlb_range (struct mm_struct *mm, unsigned long start, unsigned long end)
 void __init
 ia64_tlb_init (void)
 {
-	ia64_get_ptce(&ia64_ptce_info);
+	ia64_ptce_info_t ptce_info;
+
+	ia64_get_ptce(&ptce_info);
+	my_cpu_data.ptce_base = ptce_info.base;
+	my_cpu_data.ptce_count[0] = ptce_info.count[0];
+	my_cpu_data.ptce_count[1] = ptce_info.count[1];
+	my_cpu_data.ptce_stride[0] = ptce_info.stride[0];
+	my_cpu_data.ptce_stride[1] = ptce_info.stride[1];
+
 	__flush_tlb_all();		/* nuke left overs from bootstrapping... */
 }
diff --git a/arch/ia64/sn/Makefile b/arch/ia64/sn/Makefile
index b35ce21ff..1575cda30 100644
--- a/arch/ia64/sn/Makefile
+++ b/arch/ia64/sn/Makefile
@@ -5,15 +5,10 @@
 # Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
 #
 
-CFLAGS          :=     $(CFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSN -DSOFTSDV \
-			-DLANGUAGE_C=1 -D_LANGUAGE_C=1
-AFLAGS          :=      $(AFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSOFTSDV
-
-.S.s:
-	$(CPP) $(AFLAGS) -o $*.s $<
-.S.o:
-	$(CC) $(AFLAGS) -c -o $*.o $<
-
+EXTRA_CFLAGS	:= -DSN -DLANGUAGE_C=1 -D_LANGUAGE_C=1 -I. -DBRINGUP \
+		   -DDIRECT_L1_CONSOLE -DNUMA_BASE -DSIMULATED_KLGRAPH \
+		   -DNUMA_MIGR_CONTROL -DLITTLE_ENDIAN -DREAL_HARDWARE \
+		   -DNEW_INTERRUPTS -DCONFIG_IA64_SGI_IO
 all: sn.a
 
 O_TARGET        = sn.a
diff --git a/arch/ia64/sn/fprom/Makefile b/arch/ia64/sn/fprom/Makefile
new file mode 100644
index 000000000..2192f6ea8
--- /dev/null
+++ b/arch/ia64/sn/fprom/Makefile
@@ -0,0 +1,30 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2000 Silicon Graphics, Inc.
+# Copyright (C) Jack Steiner (steiner@sgi.com)
+#
+
+TOPDIR=../../../..
+HPATH           = $(TOPDIR)/include
+
+LIB     = ../../lib/lib.a
+
+OBJ=fpromasm.o main.o  fw-emu.o fpmem.o
+
+fprom: $(OBJ)
+	$(LD) -static -Tfprom.lds -o fprom $(OBJ) $(LIB)
+
+.S.o:
+	$(CC)  -D__ASSEMBLY__ $(AFLAGS) $(AFLAGS_KERNEL) -c -o $*.o $<
+.c.o:
+	$(CC)  $(CFLAGS) $(CFLAGS_KERNEL) -c -o $*.o $<
+
+clean:
+	rm -f *.o fprom
+
+
+include $(TOPDIR)/Rules.make
+
diff --git a/arch/ia64/sn/fprom/README b/arch/ia64/sn/fprom/README
new file mode 100644
index 000000000..263c2a8b4
--- /dev/null
+++ b/arch/ia64/sn/fprom/README
@@ -0,0 +1,85 @@
+This directory contains the files required to build
+the fake PROM image that is currently being used to
+boot IA64 kernels running under the SGI Medusa kernel.
+
+The FPROM currently provides the following functions:
+
+	- PAL emulation for all PAL calls we've made so far.
+	- SAL emulation for all SAL calls we've made so far.
+	- EFI emulation for all EFI calls we've made so far.
+	- builds the "ia64_bootparam" structure that is
+	  passed to the kernel from SAL. This structure 
+	  shows the cpu & memory configurations.
+	- supports medusa boottime options for changing
+	  the number of cpus present
+	- supports medusa boottime options for changing
+	  the memory configuration.
+
+
+
+At some point, this fake PROM will be replaced by the
+real PROM.
+
+
+
+
+To build a fake PROM, cd to this directory & type:
+
+	make
+
+This will (or should) build a fake PROM named "fprom".
+
+
+
+
+Use this fprom image when booting the Medusa simulator. The
+control file used to boot Medusa should include the 
+following lines:
+
+	load fprom
+	load vmlinux
+	sr pc 0x100000
+	sr g 9 <address of kernel _start function> #(currently 0xe000000000520000)
+
+NOTE: There is a script "runsim" in this directory that can be used to
+simplify setting up an environment for running under Medusa.
+
+
+
+
+The following parameters may be passed to the fake PROM to
+control the PAL/SAL/EFI parameters passed to the kernel:
+
+	GR[8] = # of cpus
+	GR[9] = address of primary entry point into the kernel
+	GR[20] = memory configuration for node 0
+	GR[21] = memory configuration for node 1
+	GR[22] = memory configuration for node 2
+	GR[23] = memory configuration for node 3
+
+
+Registers GR[20] - GR[23] contain information to specify the
+amount of memory present on nodes 0-3.
+
+  - if nothing is specified (all registers are 0), the configuration
+    defaults to 8 MB on node 0.
+
+  - a mem config entry for node N is passed in GR[20+N]
+
+  - a mem config entry consists of 8 hex digits. Each digit gives the
+    amount of physical memory available on the node starting at
+    1GB*<dn>, where dn is the digit number. The amount of memory
+    is 8MB*2**<d>. (If <d> = 0, the memory size is 0).
+
+    SN1 doesnt support dimms this small but small memory systems 
+    boot faster on Medusa.
+
+
+
+An example helps a lot. The following specifies that node 0 has
+physical memory 0 to 8MB and 1GB to 1GB+32MB, and that node 1 has
+64MB starting at address 0 of the node which is 8GB.
+
+      gr[20] = 0x21           # 0 to 8MB, 1GB to 1GB+32MB
+      gr[21] = 0x4            # 8GB to 8GB+64MB
+
diff --git a/arch/ia64/sn/fprom/fpmem.c b/arch/ia64/sn/fprom/fpmem.c
new file mode 100644
index 000000000..14f62bfc3
--- /dev/null
+++ b/arch/ia64/sn/fprom/fpmem.c
@@ -0,0 +1,200 @@
+/* 
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com)
+ */
+
+
+/*
+ * FPROM EFI memory descriptor build routines
+ *
+ * 	- Routines to build the EFI memory descriptor map
+ * 	- Should also be usable by the SGI SN1 prom to convert
+ * 	  klconfig to efi_memmap
+ */
+
+#include <asm/efi.h>
+#include "fpmem.h"
+
+/*
+ * args points to a layout in memory like this
+ *
+ *		32 bit		32 bit
+ *
+ * 		numnodes	numcpus
+ *
+ *		16 bit   16 bit		   32 bit
+ *		nasid0	cpuconf		membankdesc0
+ *		nasid1	cpuconf		membankdesc1
+ *			   .
+ *			   .
+ *			   .
+ *			   .
+ *			   .
+ */
+
+sn_memmap_t	*sn_memmap ;
+sn_config_t	*sn_config ;
+
+/*
+ * There is a hole in the node 0 address space. Dont put it
+ * in the memory map
+ */
+#define NODE0_HOLE_SIZE         (20*MB)
+#define NODE0_HOLE_END          (4UL*GB)
+
+#define	MB			(1024*1024)
+#define GB			(1024*MB)
+#define KERNEL_SIZE		(4*MB)
+#define PROMRESERVED_SIZE	(1*MB)
+#define MD_BANK_SHFT 30
+
+#define TO_NODE(_n, _x)		(((long)_n<<33L) | (long)_x)
+
+/*
+ * For SN, this may not take an arg and gets the numnodes from 
+ * the prom variable or by traversing klcfg or promcfg
+ */
+int
+GetNumNodes(void)
+{
+	return sn_config->nodes;
+}
+
+int
+GetNumCpus(void)
+{
+	return sn_config->cpus;
+}
+
+/* For SN1, get the index th nasid */
+
+int
+GetNasid(int index)
+{
+	return sn_memmap[index].nasid ;
+}
+
+node_memmap_t
+GetMemBankInfo(int index)
+{
+	return sn_memmap[index].node_memmap ;
+}
+
+int
+IsCpuPresent(int cnode, int cpu)
+{
+	return  sn_memmap[cnode].cpuconfig & (1<<cpu);
+}
+
+
+/*
+ * Made this into an explicit case statement so that
+ * we can assign specific properties to banks like bank0
+ * actually disabled etc.
+ */
+
+int
+IsBankPresent(int index, node_memmap_t nmemmap)
+{
+	switch (index) {
+		case 0:return nmemmap.b0;
+		case 1:return nmemmap.b1;
+		case 2:return nmemmap.b2;
+		case 3:return nmemmap.b3;
+		case 4:return nmemmap.b4;
+		case 5:return nmemmap.b5;
+		case 6:return nmemmap.b6;
+		case 7:return nmemmap.b7;
+		default:return -1 ;
+	}
+}
+
+int
+GetBankSize(int index, node_memmap_t nmemmap)
+{
+        switch (index) {
+                case 0:
+                case 1:return nmemmap.b01size;
+                case 2:
+                case 3:return nmemmap.b23size;
+                case 4:
+                case 5:return nmemmap.b45size;
+                case 6:
+                case 7:return nmemmap.b67size;
+                default:return -1 ;
+        }
+}
+
+void
+build_mem_desc(efi_memory_desc_t *md, int type, long paddr, long numbytes)
+{
+        md->type = type;
+        md->phys_addr = paddr;
+        md->virt_addr = 0;
+        md->num_pages = numbytes >> 12;
+        md->attribute = EFI_MEMORY_WB;
+}
+
+int
+build_efi_memmap(void *md, int mdsize)
+{
+	int		numnodes = GetNumNodes() ;
+	int		cnode,bank ;
+	int		nasid ;
+	node_memmap_t	membank_info ;
+	int		bsize;
+	int		count = 0 ;
+	long		paddr, hole, numbytes;
+
+
+	for (cnode=0;cnode<numnodes;cnode++) {
+		nasid = GetNasid(cnode) ;
+		membank_info = GetMemBankInfo(cnode) ;
+		for (bank=0;bank<SN1_MAX_BANK_PER_NODE;bank++) {
+			if (IsBankPresent(bank, membank_info)) {
+				bsize = GetBankSize(bank, membank_info) ;
+                                paddr = TO_NODE(nasid, (long)bank<<MD_BANK_SHFT);
+                                numbytes = BankSizeBytes(bsize);
+
+                                /*
+                                 * Check for the node 0 hole. Since banks cant
+                                 * span the hole, we only need to check if the end of
+                                 * the range is the end of the hole.
+                                 */
+                                if (paddr+numbytes == NODE0_HOLE_END)
+                                        numbytes -= NODE0_HOLE_SIZE;
+                                /*
+                                 * UGLY hack - we must skip overr the kernel and
+                                 * PROM runtime services but we dont exactly where it is.
+                                 * So lets just reserve 0-12MB.
+                                 */
+                                if (bank == 0) {
+					hole = (cnode == 0) ? KERNEL_SIZE : PROMRESERVED_SIZE;
+					numbytes -= hole;
+                                        build_mem_desc(md, EFI_RUNTIME_SERVICES_DATA, paddr, hole);
+                                        paddr += hole;
+			        	count++ ;
+                                        md += mdsize;
+                                }
+                                build_mem_desc(md, EFI_CONVENTIONAL_MEMORY, paddr, numbytes);
+
+			        md += mdsize ;
+			        count++ ;
+			}
+		}
+	}
+	return count ;
+}
+
+void
+build_init(unsigned long args)
+{
+	sn_config = (sn_config_t *) (args);	
+	sn_memmap = (sn_memmap_t *)(args + 8) ; /* SN equiv for this is */
+						/* init to klconfig start */
+}
diff --git a/arch/ia64/sn/fprom/fpmem.h b/arch/ia64/sn/fprom/fpmem.h
new file mode 100644
index 000000000..bbab73638
--- /dev/null
+++ b/arch/ia64/sn/fprom/fpmem.h
@@ -0,0 +1,35 @@
+/* 
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com)
+ */
+
+
+
+#include <asm/sn/mmzone_sn1.h>
+
+typedef struct sn_memmap_s
+{
+	short		nasid ;
+	short		cpuconfig;
+	node_memmap_t 	node_memmap ;
+} sn_memmap_t ;
+
+typedef struct sn_config_s
+{
+	int		cpus;
+	int		nodes;
+	sn_memmap_t	memmap[1];		/* start of array */
+} sn_config_t;
+
+
+extern void build_init(unsigned long);
+extern int build_efi_memmap(void *, int);
+extern int GetNumNodes(void);
+extern int GetNumCpus(void);
+extern int IsCpuPresent(int, int);
+extern int GetNasid(int);
diff --git a/arch/ia64/sn/fprom/fprom.lds b/arch/ia64/sn/fprom/fprom.lds
new file mode 100644
index 000000000..8f416ec83
--- /dev/null
+++ b/arch/ia64/sn/fprom/fprom.lds
@@ -0,0 +1,96 @@
+
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(_start)
+SECTIONS
+{
+  v = 0x0000000000000000 ;	/* this symbol is here to make debugging with kdb easier... */
+
+  . = (0x000000000000000  + 0x100000) ;
+
+  _text = .;
+  .text : AT(ADDR(.text) - 0x0000000000000000 )
+    {
+	*(__ivt_section)
+	/* these are not really text pages, but the zero page needs to be in a fixed location: */
+	*(__special_page_section)
+	__start_gate_section = .;
+	*(__gate_section)
+	__stop_gate_section = .;
+	*(.text)
+    }
+
+  /* Global data */
+  _data = .;
+
+  .rodata : AT(ADDR(.rodata) - 0x0000000000000000 )
+	{ *(.rodata) }
+  .opd : AT(ADDR(.opd) - 0x0000000000000000 )
+	{ *(.opd) }
+  .data : AT(ADDR(.data) - 0x0000000000000000 )
+	{ *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
+
+  __gp = ALIGN (8) + 0x200000;
+
+  .got : AT(ADDR(.got) - 0x0000000000000000 )
+	{ *(.got.plt) *(.got) }
+  /* We want the small data sections together, so single-instruction offsets
+     can access them all, and initialized data all before uninitialized, so
+     we can shorten the on-disk segment size.  */
+  .sdata : AT(ADDR(.sdata) - 0x0000000000000000 )
+	{ *(.sdata) }
+  _edata  =  .;
+  _bss = .;
+  .sbss : AT(ADDR(.sbss) - 0x0000000000000000 )
+	{ *(.sbss) *(.scommon) }
+  .bss : AT(ADDR(.bss) - 0x0000000000000000 )
+	{ *(.bss) *(COMMON) }
+  . = ALIGN(64 / 8);
+  _end = .;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+	*(.text.exit)
+	*(.data.exit)
+	}
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+  /* These must appear regardless of  .  */
+  /* Discard them for now since Intel SoftSDV cannot handle them.
+  .comment 0 : { *(.comment) }
+  .note 0 : { *(.note) }
+  */
+  /DISCARD/ : { *(.comment) }
+  /DISCARD/ : { *(.note) }
+}
diff --git a/arch/ia64/sn/fprom/fpromasm.S b/arch/ia64/sn/fprom/fpromasm.S
new file mode 100644
index 000000000..332a9a85c
--- /dev/null
+++ b/arch/ia64/sn/fprom/fpromasm.S
@@ -0,0 +1,314 @@
+/* 
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ *   (Code copied from or=ther files)
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com)
+ */
+
+
+
+#define __ASSEMBLY__ 1
+#include "asm/processor.h"
+
+/*
+ * This file contains additional set up code that is needed to get going on
+ * Medusa.  This code should disappear once real hw is available.
+ *
+ * On entry to this routine, the following register values are assumed:
+ *
+ *	gr[8]	- BSP cpu
+ *	pr[9]	- kernel entry address
+ *
+ * NOTE:
+ *   This FPROM may be loaded/executed at an address different from the
+ *   address that it was linked at. The FPROM is linked to run on node 0
+ *   at address 0x100000. If the code in loaded into another node, it
+ *   must be loaded at offset 0x100000 of the node. In addition, the
+ *   FPROM does the following things:
+ *		- determine the base address of the node it is loaded on
+ *		- add the node base to _gp.
+ *		- add the node base to all addresses derived from "movl" 
+ *		  instructions. (I couldnt get GPREL addressing to work)
+ *		  (maybe newer versions of the tools will support this)
+ *		- scan the .got section and add the node base to all
+ *		  pointers in this section.
+ *		- add the node base to all physical addresses in the
+ *		  SAL/PAL/EFI table built by the C code. (This is done
+ *		  in the C code - not here)
+ *		- add the node base to the TLB entries for vmlinux
+ */
+
+#define KERNEL_BASE	0xe000000000000000
+#define PAGESIZE_256M	28
+
+/* 
+ * ar.k0 gets set to IOPB_PA value, on 460gx chipset it should 
+ * be 0x00000ffffc000000, but on snia we use the (inverse swizzled)
+ * IOSPEC_BASE value
+ */
+#define IOPB_PA		0x00000a0000000000 /* inv swizzle IOSPEC_BASE */
+
+#define RR_RID		8
+
+
+
+// ====================================================================================	
+        .text
+        .align 16
+	.global _start
+	.proc _start
+_start:
+
+// Setup psr and rse for system init
+	mov		psr.l = r0;;
+	srlz.d;;
+	invala
+	mov		ar.rsc = r0;;
+	loadrs
+	;;
+
+// Set CALIAS size to zero. We dont use it.
+	movl		r24=0x80000a0001000028;;	// BR_PI_CALIAS_SIZE
+	st8 		[r24]=r0
+
+// Isolate node number we are running on.
+	mov		r6 = ip;;
+	shr		r5 = r6,33;;			// r5 = node number
+	shl		r6 = r5,33			// r6 = base memory address of node
+
+// Set & relocate gp.
+	movl		r1= __gp;;			// Add base memory address
+	add 		r1 = r1,r6			// Relocate to boot node
+
+// Lets figure out who we are & put it in the LID register.
+// The BR_PI_SELF_CPU_NUM register gives us a value of 0-3.
+// This identifies the cpu on the node. 
+// Merge the cpu number with the NASID to generate the LID.
+	movl		r24=0x80000a0001000020;;	// BR_PI_SELF_CPU_NUM
+	ld8 		r25=[r24]			// Fetch PI_SELF
+	movl		r27=0x80000a0001600000;;	// Fetch REVID to get local NASID
+	ld8 		r27=[r27];;
+	extr.u		r27=r27,32,8
+	shl 		r26=r25,16;;			// Align local cpu# to lid.eid
+	shl 		r27=r27,24;;			// Align NASID to lid.id
+	or  		r26=r26,r27;;			// build the LID
+	mov 		cr.lid=r26			// Now put in in the LID register
+	
+	movl		r2=FPSR_DEFAULT;;
+	mov 		ar.fpsr=r2
+	movl		sp = bootstacke-16;;
+	add 		sp = sp,r6			// Relocate to boot node			
+
+// Save the NASID that we are loaded on.
+	movl		r2=base_nasid;;			// Save base_nasid for C code
+	add 		r2 = r2,r6;;			// Relocate to boot node
+  	st8 		[r2]=r5				// Uncond st8 - same on all cpus
+
+// Save the kernel entry address. It is passed in r9 on one of
+// the cpus.
+	movl		r2=bsp_entry_pc
+	cmp.ne		p6,p0=r9,r0;;
+	add 		r2 = r2,r6;;			// Relocate to boot node
+(p6)  	st8 		[r2]=r9				// Uncond st8 - same on all cpus
+
+
+// The following can ONLY be done by 1 cpu. Lets set a lock - the
+// cpu that gets it does the initilization. The rest just spin waiting
+// til initilization is complete.
+	movl		r22 = initlock;;
+	add		r22 = r22,r6			// Relocate to boot node
+	mov		r23 = 1;;
+	xchg8		r23 = [r22],r23;;
+	cmp.eq 		p6,p0 = 0,r23
+(p6)	br.cond.spnt.few init
+1:	ld4		r23 = [r22];;
+	cmp.eq		p6,p0 = 1,r23
+(p6)	br.cond.sptk	1b
+	br		initx
+
+// Add base address of node memory to each pointer in the .got section.
+init:	movl		r16 = _GLOBAL_OFFSET_TABLE_;;
+	add		r16 = r16,r6;;			// Relocate to boot node
+1: 	ld8		r17 = [r16];;
+	cmp.eq		p6,p7=0,r17
+(p6)	br.cond.sptk.few.clr 2f;;
+	add		r17 = r17,r6;;			// Relocate to boot node
+	st8		[r16] = r17,8
+	br		1b
+2:
+	mov		r23 = 2;;			// All done, release the spinning cpus
+	st4		[r22] = r23
+initx:
+
+//
+//	I/O-port space base address:
+//
+	movl		r2 = IOPB_PA;;
+	mov		ar.k0 = r2
+
+
+// Now call main & pass it the current LID value.
+	alloc 		r0=ar.pfs,0,0,2,0
+	mov    		r32=r26
+	mov   		r33=r8;;
+	br.call.sptk.few rp=fmain
+	
+// Initialize Region Registers
+//
+	mov		r10 = r0
+	mov		r2 = (13<<2) 
+	mov		r3 = r0;;
+1:	cmp4.gtu	p6,p7 = 7, r3
+	dep		r10 = r3, r10, 61, 3
+	dep		r2 = r3, r2, RR_RID, 4;;
+(p7)	dep		r2 = 0, r2, 0, 1;;
+(p6)	dep		r2 = -1, r2, 0, 1;;
+	mov		rr[r10] = r2
+	add		r3 = 1, r3;;
+	srlz.d;;
+	cmp4.gtu	p6,p0 = 8, r3
+(p6)	br.cond.sptk.few.clr 1b
+
+//
+// Return value indicates if we are the BSP or AP.
+// 	   1 = BSP, 0 = AP
+	mov             cr.tpr=r0;;
+	cmp.eq		p6,p0=r8,r0
+(p6)	br.cond.spnt	slave
+
+//
+// Initialize the protection key registers with only pkr[0] = valid.
+//
+// Should be initialized in accordance with the OS.
+//
+	mov		r2 = 1
+	mov		r3 = r0;;
+	mov		pkr[r3] = r2;;
+	srlz.d;;
+	mov		r2 = r0
+
+1:	add		r3 = r3, r0, 1;;		// increment PKR
+	cmp.gtu		p6, p0 = 16, r3;;
+(p6)	mov		pkr[r3] = r2
+(p6)	br.cond.sptk.few.clr 1b
+
+	mov		ar.rnat = r0			// clear RNAT register
+
+//
+// Setup system address translation for kernel
+//
+// Note: The setup of Kernel Virtual address space can be done by the
+// C code of the boot loader.
+//
+//
+
+#define LINUX_PAGE_OFFSET       0xe000000000000000
+#define ITIR(key, ps)           ((key<<8) | (ps<<2))
+#define ITRGR(ed,ar,ma)         ((ed<<52) | (ar<<9) | (ma<<2) | 0x61)
+
+#define AR_RX                   1                       // RX permission
+#define AR_RW                   4                       // RW permission
+#define MA_WB                   0                       // WRITEBACK memory attribute
+
+#define TLB_PAGESIZE		28			// Use 256MB pages for now.
+	mov		r16=r5
+
+//
+//     text section
+//
+        movl            r2 = LINUX_PAGE_OFFSET;;        // Set up IFA with VPN of linux
+        mov             cr.ifa = r2
+        movl            r3 = ITIR(0,TLB_PAGESIZE);;     // Set ITIR to default pagesize
+        mov             cr.itir = r3
+
+        shl             r4 = r16,33;;                   // physical addr of start of node
+        movl            r5 = ITRGR(1,AR_RX,MA_WB);;     // TLB attributes
+        or              r10=r4,r5;;
+
+        itr.i           itr[r0] = r10;;                   // Dropin ITR entry
+	srlz.i;;
+
+//
+//     data section
+//
+        movl            r2 = LINUX_PAGE_OFFSET;;        // Set up IFA with VPN of linux
+        mov             cr.ifa = r2
+        movl            r3 = ITIR(0,TLB_PAGESIZE);;     // Set ITIR to default pagesize
+        mov             cr.itir = r3
+
+        shl             r4 = r16,33;;                   // physical addr of start of node
+        movl            r5 = ITRGR(1,AR_RW,MA_WB);;     // TLB attributes
+        or              r10=r4,r5;;
+
+        itr.d           dtr[r0] = r10;;                 // Dropin DTR entry
+	srlz.d;;
+
+
+
+
+//
+// Turn on address translation, interrupt collection, psr.ed, protection key.
+// Interrupts (PSR.i) are still off here.
+//
+
+	movl		r3 = (	IA64_PSR_BN | \
+				IA64_PSR_AC | \
+				IA64_PSR_IT | \
+				IA64_PSR_DB | \
+				IA64_PSR_DA | \
+				IA64_PSR_RT | \
+				IA64_PSR_DT | \
+				IA64_PSR_IC   \
+			     )
+	;;
+	mov		cr.ipsr = r3
+
+//
+// Go to kernel C startup routines
+//	Need to do a "rfi" in order set "it" and "ed" bits in the PSR.
+//	This is the only way to set them.
+
+	movl		r2=bsp_entry_pc;;
+	add 		r2 = r2,r6;;			// Relocate to boot node
+	ld8		r2=[r2];;
+	mov		cr.iip = r2
+	srlz.d;;
+	rfi;;
+	.endp		_start
+
+// Slave processors come here to spin til they get an interrupt. Then they launch themselves to
+// the place ap_entry points. No initialization is necessary - the kernel makes no
+// assumptions about state on this entry.
+//	Note: should verify that the interrupt we got was really the ap_wakeup
+//	      interrupt but this should not be an issue on medusa
+slave:
+	nop.i		0x8beef				// Medusa - put cpu to sleep til interrupt occurs
+	mov		r8=cr.irr0;;			// Check for interrupt pending.
+	cmp.eq		p6,p0=r8,r0
+(p6)	br.cond.sptk	slave;;
+
+	mov		r8=cr.ivr;;			// Got one. Must read ivr to accept it
+	srlz.d;;
+	mov		cr.eoi=r0;;			// must write eoi to clear
+	movl		r8=ap_entry;;			// now jump to kernel entry
+	add 		r8 = r8,r6;;			// Relocate to boot node
+	ld8		r9=[r8],8;;
+	ld8		r1=[r8]
+	mov		b0=r9;;
+	br		b0
+
+// Here is the kernel stack used for the fake PROM
+	.bss
+	.align		16384
+bootstack:
+	.skip		16384
+bootstacke:
+initlock:
+	data4
diff --git a/arch/ia64/sn/fprom/fw-emu.c b/arch/ia64/sn/fprom/fw-emu.c
new file mode 100644
index 000000000..2d85befca
--- /dev/null
+++ b/arch/ia64/sn/fprom/fw-emu.c
@@ -0,0 +1,492 @@
+/*
+ * PAL & SAL emulation.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com)
+ */
+#include <asm/efi.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/processor.h>
+#include <asm/acpi-ext.h>
+#include "fpmem.h"
+
+#define MB	(1024*1024UL)
+#define GB	(MB*1024UL)
+
+#define FPROM_BUG()		do {while (1);} while (0)
+#define MAX_NODES		128
+#define MAX_LSAPICS		512
+#define MAX_CPUS		512
+#define MAX_CPUS_NODE		4
+#define CPUS_PER_NODE		4
+#define CPUS_PER_FSB		2
+#define CPUS_PER_FSB_MASK	(CPUS_PER_FSB-1)
+
+#define NUM_EFI_DESCS		2
+
+typedef union ia64_nasid_va {
+        struct {
+                unsigned long off   : 33;       /* intra-region offset */
+		unsigned long nasid :  7;	/* NASID */
+		unsigned long off2  : 21;	/* fill */
+                unsigned long reg   :  3;       /* region number */
+        } f;
+        unsigned long l;
+        void *p;
+} ia64_nasid_va;
+
+typedef struct {
+	unsigned long	pc;
+	unsigned long	gp;
+} func_ptr_t;
+ 
+#define IS_VIRTUAL_MODE() 	 ({struct ia64_psr psr; asm("mov %0=psr" : "=r"(psr)); psr.dt;})
+#define ADDR_OF(p)		(IS_VIRTUAL_MODE() ? ((void*)((long)(p)+PAGE_OFFSET)) : ((void*) (p)))
+#define __fwtab_pa(n,x)		({ia64_nasid_va _v; _v.l = (long) (x); _v.f.nasid = (x) ? (n) : 0; _v.f.reg = 0; _v.l;})
+
+/*
+ * The following variables are passed thru registersfrom the configuration file and
+ * are set via the _start function.
+ */
+long		base_nasid;
+long		num_cpus;
+long		bsp_entry_pc=0;
+long		num_nodes;
+long		app_entry_pc;
+int		bsp_lid;
+func_ptr_t	ap_entry;
+
+
+static char fw_mem[(  sizeof(efi_system_table_t)
+		    + sizeof(efi_runtime_services_t)
+		    + NUM_EFI_DESCS*sizeof(efi_config_table_t)
+		    + sizeof(struct ia64_sal_systab)
+		    + sizeof(struct ia64_sal_desc_entry_point)
+		    + sizeof(struct ia64_sal_desc_ap_wakeup)
+		    + sizeof(acpi_rsdp_t)
+		    + sizeof(acpi_rsdt_t)
+		    + sizeof(acpi_sapic_t)
+		    + MAX_LSAPICS*(sizeof(acpi_entry_lsapic_t))
+		    + (1+8*MAX_NODES)*(sizeof(efi_memory_desc_t))
+		    + sizeof(ia64_sal_desc_ptc_t) +
+		    + MAX_NODES*sizeof(ia64_sal_ptc_domain_info_t) +
+		    + MAX_CPUS*sizeof(ia64_sal_ptc_domain_proc_entry_t) +
+		    + 1024)] __attribute__ ((aligned (8)));
+
+/*
+ * Very ugly, but we need this in the simulator only.  Once we run on
+ * real hw, this can all go away.
+ */
+extern void pal_emulator_static (void);
+
+asm ("
+	.text
+	.proc pal_emulator_static
+pal_emulator_static:
+	mov r8=-1
+	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
+(p7)	br.cond.sptk.few 1f
+	;;
+	mov r8=0			/* status = 0 */
+	movl r9=0x500000000		/* tc.base */
+	movl r10=0x0000000200000003	/* count[0], count[1] */
+	movl r11=0x1000000000002000	/* stride[0], stride[1] */
+	br.cond.sptk.few rp
+
+1:	cmp.eq p6,p7=14,r28		/* PAL_FREQ_RATIOS */
+(p7)	br.cond.sptk.few 1f
+	mov r8=0			/* status = 0 */
+	movl r9 =0x100000064		/* proc_ratio (1/100) */
+	movl r10=0x100000100		/* bus_ratio<<32 (1/256) */
+	movl r11=0x10000000a		/* itc_ratio<<32 (1/100) */
+
+1:	cmp.eq p6,p7=22,r28		/* PAL_MC_DRAIN */
+(p7)	br.cond.sptk.few 1f
+	mov r8=0
+	br.cond.sptk.few rp
+
+1:	cmp.eq p6,p7=23,r28		/* PAL_MC_EXPECTED */
+(p7)	br.cond.sptk.few 1f
+	mov r8=0
+	br.cond.sptk.few rp
+
+1:	br.cond.sptk.few rp
+	.endp pal_emulator_static\n");
+
+
+static efi_status_t
+efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+	if (tm) {
+		memset(tm, 0, sizeof(*tm));
+		tm->year = 2000;
+		tm->month = 2;
+		tm->day = 13;
+		tm->hour = 10;
+		tm->minute = 11;
+		tm->second = 12;
+	}
+
+	if (tc) {
+		tc->resolution = 10;
+		tc->accuracy = 12;
+		tc->sets_to_zero = 1;
+	}
+
+	return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data)
+{
+	while(1);	/* Is there a pseudo-op to stop medusa */
+}
+
+static efi_status_t
+efi_success (void)
+{
+	return EFI_SUCCESS;
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+	return EFI_UNSUPPORTED;
+}
+
+static long
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+	      unsigned long in3, unsigned long in4, unsigned long in5,
+	      unsigned long in6, unsigned long in7)
+{
+	register long r9 asm ("r9") = 0;
+	register long r10 asm ("r10") = 0;
+	register long r11 asm ("r11") = 0;
+	long status;
+
+	/*
+	 * Don't do a "switch" here since that gives us code that
+	 * isn't self-relocatable.
+	 */
+	status = 0;
+	if (index == SAL_FREQ_BASE) {
+		switch (in1) {
+		      case SAL_FREQ_BASE_PLATFORM:
+			r9 = 500000000;
+			break;
+
+		      case SAL_FREQ_BASE_INTERVAL_TIMER:
+			/*
+			 * Is this supposed to be the cr.itc frequency
+			 * or something platform specific?  The SAL
+			 * doc ain't exactly clear on this...
+			 */
+			r9 = 700000000;
+			break;
+
+		      case SAL_FREQ_BASE_REALTIME_CLOCK:
+			r9 = 1;
+			break;
+
+		      default:
+			status = -1;
+			break;
+		}
+	} else if (index == SAL_SET_VECTORS) {
+		if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
+			func_ptr_t	*fp;
+			fp = ADDR_OF(&ap_entry);
+			fp->pc = in2;
+			fp->gp = in3;
+		} else {
+			status = -1;
+		}
+		;
+	} else if (index == SAL_GET_STATE_INFO) {
+		;
+	} else if (index == SAL_GET_STATE_INFO_SIZE) {
+		;
+	} else if (index == SAL_CLEAR_STATE_INFO) {
+		;
+	} else if (index == SAL_MC_RENDEZ) {
+		;
+	} else if (index == SAL_MC_SET_PARAMS) {
+		;
+	} else if (index == SAL_CACHE_FLUSH) {
+		;
+	} else if (index == SAL_CACHE_INIT) {
+		;
+	} else if (index == SAL_UPDATE_PAL) {
+		;
+	} else {
+		status = -1;
+	}
+	asm volatile ("" :: "r"(r9), "r"(r10), "r"(r11));
+	return status;
+}
+
+
+/*
+ * This is here to work around a bug in egcs-1.1.1b that causes the
+ * compiler to crash (seems like a bug in the new alias analysis code.
+ */
+void *
+id (long addr)
+{
+	return (void *) addr;
+}
+
+
+/*
+ * Fix the addresses in a function pointer by adding base node address
+ * to pc & gp.
+ */
+void
+fix_function_pointer(void *fp)
+{
+	func_ptr_t	*_fp;
+
+	_fp = fp;
+	_fp->pc = __fwtab_pa(base_nasid, _fp->pc);
+	_fp->gp = __fwtab_pa(base_nasid, _fp->gp);
+}
+
+
+void
+sys_fw_init (const char *args, int arglen, int bsp)
+{
+	/*
+	 * Use static variables to keep from overflowing the RSE stack
+	 */
+	static efi_system_table_t *efi_systab;
+	static efi_runtime_services_t *efi_runtime;
+	static efi_config_table_t *efi_tables;
+	static ia64_sal_desc_ptc_t *sal_ptc;
+	static ia64_sal_ptc_domain_info_t *sal_ptcdi;
+	static ia64_sal_ptc_domain_proc_entry_t *sal_ptclid;
+	static acpi_rsdp_t *acpi_systab;
+	static acpi_rsdt_t *acpi_rsdt;
+	static acpi_sapic_t *acpi_sapic;
+	static acpi_entry_lsapic_t *acpi_lsapic;
+	static struct ia64_sal_systab *sal_systab;
+	static efi_memory_desc_t *efi_memmap, *md;
+	static unsigned long *pal_desc, *sal_desc;
+	static struct ia64_sal_desc_entry_point *sal_ed;
+	static struct ia64_boot_param *bp;
+	static struct ia64_sal_desc_ap_wakeup *sal_apwake;
+	static unsigned char checksum = 0;
+	static char *cp, *cmd_line, *vendor;
+	static int mdsize, domain, last_domain ;
+	static int cnode, nasid, cpu, num_memmd, cpus_found;
+
+	/*
+	 * Pass the parameter base address to the build_efi_xxx routines.
+	 */
+	build_init(8LL*GB*base_nasid);
+
+	num_nodes = GetNumNodes();
+	num_cpus = GetNumCpus();
+
+
+	memset(fw_mem, 0, sizeof(fw_mem));
+
+	pal_desc = (unsigned long *) &pal_emulator_static;
+	sal_desc = (unsigned long *) &sal_emulator;
+	fix_function_pointer(&pal_emulator_static);
+	fix_function_pointer(&sal_emulator);
+
+	/* Align this to 16 bytes, probably EFI does this  */
+	mdsize = (sizeof(efi_memory_desc_t) + 15) & ~15 ;
+
+	cp = fw_mem;
+	efi_systab  = (void *) cp; cp += sizeof(*efi_systab);
+	efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+	efi_tables  = (void *) cp; cp += NUM_EFI_DESCS*sizeof(*efi_tables);
+	sal_systab  = (void *) cp; cp += sizeof(*sal_systab);
+	sal_ed      = (void *) cp; cp += sizeof(*sal_ed);
+	sal_ptc     = (void *) cp; cp += sizeof(*sal_ptc);
+	sal_apwake  = (void *) cp; cp += sizeof(*sal_apwake);
+	acpi_systab = (void *) cp; cp += sizeof(*acpi_systab);
+	acpi_rsdt   = (void *) cp; cp += sizeof(*acpi_rsdt);
+	acpi_sapic  = (void *) cp; cp += sizeof(*acpi_sapic);
+	acpi_lsapic = (void *) cp; cp += num_cpus*sizeof(*acpi_lsapic);
+	vendor 	    = (char *) cp; cp += 32;
+	efi_memmap  = (void *) cp; cp += 8*32*sizeof(*efi_memmap);
+	sal_ptcdi   = (void *) cp; cp += CPUS_PER_FSB*(1+num_nodes)*sizeof(*sal_ptcdi);
+	sal_ptclid  = (void *) cp; cp += ((3+num_cpus)*sizeof(*sal_ptclid)+7)/8*8;
+	cmd_line    = (void *) cp;
+
+	if (args) {
+		if (arglen >= 1024)
+			arglen = 1023;
+		memcpy(cmd_line, args, arglen);
+	} else {
+		arglen = 0;
+	}
+	cmd_line[arglen] = '\0';
+#ifdef BRINGUP
+	/* for now, just bring up bash */
+	strcpy(cmd_line, "init=/bin/bash");
+#else
+	strcpy(cmd_line, "");
+#endif
+
+	memset(efi_systab, 0, sizeof(efi_systab));
+	efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+	efi_systab->hdr.revision  = EFI_SYSTEM_TABLE_REVISION;
+	efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+	efi_systab->fw_vendor = __fwtab_pa(base_nasid, vendor);
+	efi_systab->fw_revision = 1;
+	efi_systab->runtime = __fwtab_pa(base_nasid, efi_runtime);
+	efi_systab->nr_tables = 2;
+	efi_systab->tables = __fwtab_pa(base_nasid, efi_tables);
+	memcpy(vendor, "S\0i\0l\0i\0c\0o\0n\0-\0G\0r\0a\0p\0h\0i\0c\0s\0\0", 32);
+
+	efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+	efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+	efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+	efi_runtime->get_time = __fwtab_pa(base_nasid, &efi_get_time);
+	efi_runtime->set_time = __fwtab_pa(base_nasid, &efi_unimplemented);
+	efi_runtime->get_wakeup_time = __fwtab_pa(base_nasid, &efi_unimplemented);
+	efi_runtime->set_wakeup_time = __fwtab_pa(base_nasid, &efi_unimplemented);
+	efi_runtime->set_virtual_address_map = __fwtab_pa(base_nasid, &efi_success);
+	efi_runtime->get_variable = __fwtab_pa(base_nasid, &efi_unimplemented);
+	efi_runtime->get_next_variable = __fwtab_pa(base_nasid, &efi_unimplemented);
+	efi_runtime->set_variable = __fwtab_pa(base_nasid, &efi_unimplemented);
+	efi_runtime->get_next_high_mono_count = __fwtab_pa(base_nasid, &efi_unimplemented);
+	efi_runtime->reset_system = __fwtab_pa(base_nasid, &efi_reset_system);
+
+	efi_tables->guid = SAL_SYSTEM_TABLE_GUID;
+	efi_tables->table = __fwtab_pa(base_nasid, sal_systab);
+	efi_tables++;
+	efi_tables->guid = ACPI_TABLE_GUID;
+	efi_tables->table = __fwtab_pa(base_nasid, acpi_systab);
+	fix_function_pointer(&efi_unimplemented);
+	fix_function_pointer(&efi_get_time);
+	fix_function_pointer(&efi_success);
+	fix_function_pointer(&efi_reset_system);
+
+	/* fill in the ACPI system table: */
+	memcpy(acpi_systab->signature, "RSD PTR ", 8);
+	acpi_systab->rsdt = (acpi_rsdt_t*)__fwtab_pa(base_nasid, acpi_rsdt);
+
+	memcpy(acpi_rsdt->header.signature, "RSDT",4);
+	acpi_rsdt->header.length = sizeof(acpi_rsdt_t);
+	memcpy(acpi_rsdt->header.oem_id, "SGI", 3);
+	memcpy(acpi_rsdt->header.oem_table_id, "SN1", 3);
+	acpi_rsdt->header.oem_revision = 0x00010001;
+	acpi_rsdt->entry_ptrs[0] = __fwtab_pa(base_nasid, acpi_sapic);
+
+	memcpy(acpi_sapic->header.signature, "SPIC ", 4);
+	acpi_sapic->header.length = sizeof(acpi_sapic_t)+num_cpus*sizeof(acpi_entry_lsapic_t);
+	for (cnode=0; cnode<num_nodes; cnode++) {
+		nasid = GetNasid(cnode);
+		for(cpu=0; cpu<CPUS_PER_NODE; cpu++) {
+			if (!IsCpuPresent(cnode, cpu))
+				continue;
+			acpi_lsapic->type = ACPI_ENTRY_LOCAL_SAPIC;
+			acpi_lsapic->length = sizeof(acpi_entry_lsapic_t);
+			acpi_lsapic->acpi_processor_id = cnode*4+cpu;
+			acpi_lsapic->flags = LSAPIC_ENABLED|LSAPIC_PRESENT;
+			acpi_lsapic->eid = cpu;
+			acpi_lsapic->id = nasid;
+			acpi_lsapic++;
+		}
+	}
+
+
+	/* fill in the SAL system table: */
+	memcpy(sal_systab->signature, "SST_", 4);
+	sal_systab->size = sizeof(*sal_systab);
+	sal_systab->sal_rev_minor = 1;
+	sal_systab->sal_rev_major = 0;
+	sal_systab->entry_count = 3;
+
+	strcpy(sal_systab->oem_id, "SGI");
+	strcpy(sal_systab->product_id, "SN1");
+
+	/* fill in an entry point: */	
+	sal_ed->type = SAL_DESC_ENTRY_POINT;
+	sal_ed->pal_proc = __fwtab_pa(base_nasid, pal_desc[0]);
+	sal_ed->sal_proc = __fwtab_pa(base_nasid, sal_desc[0]);
+	sal_ed->gp = __fwtab_pa(base_nasid, sal_desc[1]);
+
+	/* kludge the PTC domain info */
+	sal_ptc->type = SAL_DESC_PTC;
+	sal_ptc->num_domains = 0;
+	sal_ptc->domain_info = __fwtab_pa(base_nasid, sal_ptcdi);
+	cpus_found = 0;
+	last_domain = -1;
+	sal_ptcdi--;
+	for (cnode=0; cnode<num_nodes; cnode++) {
+		nasid = GetNasid(cnode);
+		for(cpu=0; cpu<CPUS_PER_NODE; cpu++) {
+			if (IsCpuPresent(cnode, cpu)) {
+				domain = cnode*CPUS_PER_NODE + cpu/CPUS_PER_FSB;
+				if (domain != last_domain) {
+					sal_ptc->num_domains++;
+					sal_ptcdi++;
+					sal_ptcdi->proc_count = 0;
+					sal_ptcdi->proc_list = __fwtab_pa(base_nasid, sal_ptclid);
+					last_domain = domain;
+				}
+				sal_ptcdi->proc_count++;
+				sal_ptclid->id = nasid;
+				sal_ptclid->eid = cpu;
+				sal_ptclid++;
+				cpus_found++;
+			}
+		}
+	}
+
+	if (cpus_found != num_cpus)
+		FPROM_BUG();
+
+	/* Make the AP WAKEUP entry */
+	sal_apwake->type = SAL_DESC_AP_WAKEUP;
+	sal_apwake->mechanism = IA64_SAL_AP_EXTERNAL_INT;
+	sal_apwake->vector = 18;
+
+	for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+		checksum += *cp;
+
+	sal_systab->checksum = -checksum;
+
+	md = &efi_memmap[0];
+	num_memmd = build_efi_memmap((void *)md, mdsize) ;
+
+	bp = id(ZERO_PAGE_ADDR + (((long)base_nasid)<<33));
+	bp->efi_systab = __fwtab_pa(base_nasid, &fw_mem);
+	bp->efi_memmap = __fwtab_pa(base_nasid, efi_memmap);
+	bp->efi_memmap_size = num_memmd*mdsize;
+	bp->efi_memdesc_size = mdsize;
+	bp->efi_memdesc_version = 0x101;
+	bp->command_line = __fwtab_pa(base_nasid, cmd_line);
+	bp->console_info.num_cols = 80;
+	bp->console_info.num_rows = 25;
+	bp->console_info.orig_x = 0;
+	bp->console_info.orig_y = 24;
+	bp->num_pci_vectors = 0;
+	bp->fpswa = 0;
+
+	/*
+	 * Now pick the BSP & store it LID value in
+	 * a global variable. Note if BSP is greater than last cpu,
+	 * pick the last cpu.
+	 */
+	for (cnode=0; cnode<num_nodes; cnode++) {
+		for(cpu=0; cpu<CPUS_PER_NODE; cpu++) {
+			if (!IsCpuPresent(cnode, cpu))
+				continue;
+			bsp_lid = (GetNasid(cnode)<<24) | (cpu<<16);
+			if (bsp-- > 0)
+				continue;
+			return;
+		}
+	}
+}
diff --git a/arch/ia64/sn/fprom/main.c b/arch/ia64/sn/fprom/main.c
new file mode 100644
index 000000000..45632c2ce
--- /dev/null
+++ b/arch/ia64/sn/fprom/main.c
@@ -0,0 +1,110 @@
+/* 
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com)
+ */
+
+
+
+#include <linux/types.h>
+#include <asm/bitops.h>
+
+void bedrock_init(int);
+void synergy_init(int, int);
+void sys_fw_init (const char *args, int arglen, int bsp);
+
+volatile int	bootmaster=0;		/* Used to pick bootmaster */
+volatile int	nasidmaster[128]={0};	/* Used to pick node/synergy masters */
+int		init_done=0;
+extern int	bsp_lid;
+
+#define get_bit(b,p)	(((*p)>>(b))&1)
+
+int
+fmain(int lid, int bsp) {
+	int	syn, nasid, cpu;
+
+	/*
+	 * First lets figure out who we are. This is done from the
+	 * LID passed to us.
+	 */
+	nasid = (lid>>24);
+	syn = (lid>>17)&1;
+	cpu = (lid>>16)&1;
+
+	/*
+	 * Now pick a synergy master to initialize synergy registers.
+	 */
+	if (test_and_set_bit(syn, &nasidmaster[nasid]) == 0) {
+		synergy_init(nasid, syn);
+		test_and_set_bit(syn+2, &nasidmaster[nasid]);
+	} else
+		while (get_bit(syn+2, &nasidmaster[nasid]) == 0);
+	
+	/*
+	 * Now pick a nasid master to initialize Bedrock registers.
+	 */
+	if (test_and_set_bit(8, &nasidmaster[nasid]) == 0) {
+		bedrock_init(nasid);
+		test_and_set_bit(9, &nasidmaster[nasid]);
+	} else
+		while (get_bit(9, &nasidmaster[nasid]) == 0);
+	
+
+	/*
+	 * Now pick a BSP & finish init.
+	 */
+	if (test_and_set_bit(0, &bootmaster) == 0) {
+		sys_fw_init(0, 0, bsp);
+		test_and_set_bit(1, &bootmaster);
+	} else
+		while (get_bit(1, &bootmaster) == 0);
+
+	return (lid == bsp_lid);
+}
+
+
+void
+bedrock_init(int nasid)
+{
+	nasid = nasid;		/* to quiet gcc */
+}
+
+
+void
+synergy_init(int nasid, int syn)
+{
+	long	*base;
+	long	off;
+
+	/*
+	 * Enable all FSB flashed interrupts.
+	 * ZZZ - I'd really like defines for this......
+	 */
+	base = (long*)0x80000e0000000000LL;		/* base of synergy regs */
+	for (off = 0x2a0; off < 0x2e0; off+=8)		/* offset for VEC_MASK_{0-3}_A/B */
+		*(base+off/8) = -1LL;
+
+	/*
+	 * Set the NASID in the FSB_CONFIG register.
+	 */
+	base = (long*)0x80000e0000000450LL;
+	*base = (long)((nasid<<16)|(syn<<9));
+}
+
+
+/* Why isnt there a bcopy/memcpy in lib64.a */
+
+void* 
+memcpy(void * dest, const void *src, size_t count)
+{
+	char *s, *se, *d;
+
+	for(d=dest, s=(char*)src, se=s+count; s<se; s++, d++)
+		*d = *s;
+	return dest;
+}
diff --git a/arch/ia64/sn/fprom/runsim b/arch/ia64/sn/fprom/runsim
new file mode 100644
index 000000000..496967e4d
--- /dev/null
+++ b/arch/ia64/sn/fprom/runsim
@@ -0,0 +1,372 @@
+#!/bin/sh
+
+# Script for running PROMs and LINUX kernwls on medusa. 
+# Type "sim -H" for instructions.
+
+MEDUSA=${MEDUSA:-/home/rickc/official_medusa/medusa}
+
+# ------------------ err -----------------------
+err() {
+	echo "ERROR - $1"
+	exit 1
+}
+
+# ----------------  help ----------------------
+help() {
+cat <<END
+Script for running a PROM or LINUX kernel under medusa.
+This script creates a control file, creates links to the appropriate
+linux/prom files, and/or calls medusa to make simulation runs.
+
+Usage:  
+   Initial setup:
+   	sim [-c <config_file>] <-p> | <-k>  [<work_dir>]
+		-p	Create PROM control file & links
+		-k	Create LINUX control file & links
+		-c<cf>	Control file name				[Default: cf]
+		<work_dir> Path to directory that contains the linux or PROM files.
+		    The directory can be any of the following:
+		       (linux simulations)
+		       		worktree
+				worktree/linux
+				any directory with vmlinux, vmlinux.sym & fprom files
+			(prom simulations)
+				worktree
+				worktree/stand/arcs/IP37prom/dev
+				any directory with fw.bin & fw.sim files
+
+    Simulations:
+	sim  [-X <n>] [-o <output>] [-M] [<config_file>]
+		-c<cf>	Control file name				[Default: cf]
+		-M	Pipe output thru fmtmedusa
+		-o	Output filename (copy of all commands/output)	[Default: simout]
+		-X	Specifies number of instructions to execute	[Default: 0]
+			(Used only in auto test mode - not described here)
+
+Examples:
+	sim -p <promtree>	# create control file (cf) & links for prom simulations
+	sim -k <linuxtree>	# create control file (cf) & links for linux simulations
+	sim -p -c cfprom	# create a prom control file (cfprom) only. No links are made.
+
+	sim			# run medusa using previously created links &
+				#   control file (cf).
+END
+exit 1
+}
+
+# ----------------------- create control file header --------------------
+create_cf_header() {
+cat <<END >>$CF
+#
+# Template for a control file for running linux kernels under medusa. 
+# You probably want to make mods here but this is a good starting point.
+#
+
+# Preferences
+setenv cpu_stepping A
+setenv exceptionPrint off
+setenv interrupt_messages off
+setenv lastPCsize 100000
+setenv low_power_mode on
+setenv partialIntelChipSet on
+setenv printIntelMessages off
+setenv prom_write_action halt
+setenv prom_write_messages on
+setenv step_quantum 100
+setenv swizzling on
+setenv tsconsole on
+setenv uart_echo on
+symbols on
+
+# IDE disk params
+setenv diskCylinders 611
+setenv bootDrive C
+setenv diskHeads 16
+setenv diskPath idedisk
+setenv diskPresent 1
+setenv diskSpt 63
+
+# Hardware config
+setenv coherency_type nasid
+setenv cpu_cache_type default
+setenv synergy_cache_type syn_cac_64m_8w
+
+# Numalink config
+setenv route_enable on
+setenv network_type xbar		# Select [xbar|router]
+setenv network_warning 0xff
+
+END
+}
+
+
+# ------------------ create control file entries for linux simulations -------------
+create_cf_linux() {
+cat <<END >>$CF
+# Kernel specific options
+setenv mca_on_memory_failure off
+setenv LOADPC 0x00100000		# FPROM load address/entry point (8 digits!)
+sr g 9 0xe000000000520000		# Kernel entry point
+setenv symbol_table vmlinux.sym
+load fprom
+load vmlinux
+
+# Useful breakpoints to always have set. Add more if desired.
+break 0xe000000000505e00	all	# dispatch_to_fault_handler
+break panic			all	# stop on panic
+break die_if_kernel		all	# may as well stop
+
+END
+}
+
+# ------------------ create control file entries for prom simulations ---------------
+create_cf_prom() {
+	SYM2=""
+	ADDR="0x80000000ff800000"
+	[ "$EMBEDDED_LINUX" != "0" ] || SYM2="setenv symbol_table2 vmlinux.sym"
+	[ "$SIZE" = "8MB" ] || ADDR="0x80000000ffc00000"
+	cat <<END >>$CF
+# PROM specific options
+setenv mca_on_memory_failure on
+setenv LOADPC 0x80000000ffffffb0
+setenv promFile fw.bin
+setenv promAddr $ADDR
+setenv symbol_table fw.sym
+$SYM2
+
+# Useful breakpoints to always have set. Add more if desired.
+break Pr_ivt_gexx 		all
+break Pr_ivt_brk		all
+break Pr_PROM_Panic_Spin	all
+break Pr_PROM_Panic		all
+break Pr_PROM_C_Panic		all
+break Pr_fled_die		all
+break Pr_ResetNow		all
+break Pr_zzzbkpt		all
+
+END
+}
+
+
+# ------------------ create control file entries for memory configuration -------------
+create_cf_memory() {
+cat <<END >>$CF
+# CPU/Memory map format:
+#	setenv nodeN_memory_config 0xBSBSBSBS
+#		B=banksize (0=unused, 1=64M, 2=128M, .., 5-1G, c=8M, d=16M, e=32M)
+#		S=bank enable (0=both disable, 3=both enable, 2=bank1 enable, 1=bank0 enable)
+#		  rightmost digits are for bank 0, the lowest address.
+#	setenv nodeN_nasid <nasid>
+#		specifies the NASID for the node. This is used ONLY if booting the kernel.
+#		On PROM configurations, set to 0 - PROM will change it later.
+#	setenv nodeN_cpu_config <cpu_mask>
+#		Set bit number N to 1 to enable cpu N. Ex., a value of 5 enables cpu 0 & 2.
+#
+# Repeat the above 3 commands for each node.
+#
+# For kernel, default to 32MB. Although this is not a valid hardware configuration,
+# it runs faster on medusa. For PROM, 64MB is smallest allowed value.
+
+setenv node0_cpu_config		0x1	# Enable only cpu 0 on the node
+END
+
+if [ $LINUX -eq 1 ] ; then
+cat <<END >>$CF
+setenv node0_nasid		0	# cnode 0 has NASID 0
+setenv node0_memory_config 	0xe1	# 32MB
+END
+else
+cat <<END >>$CF
+setenv node0_memory_config 	0x11	# 64MB
+END
+fi
+}
+
+# -------------------- set links to linux files -------------------------
+set_linux_links() {
+	if [ -d $D/linux/arch ] ; then
+		D=$D/linux
+	elif [ -d $D/arch -o -e vmlinux.sym ] ; then
+		D=$D
+	else
+		err "cant determine directory for linux binaries"
+	fi
+	rm -rf vmlinux vmlinux.sym fprom
+	ln -s $D/vmlinux vmlinux
+	ln -s $D/vmlinux.sym vmlinux.sym
+	if [ -d $D/arch ] ; then
+		ln -s $D/arch/ia64/sn/fprom/fprom fprom
+	else
+		ln -s $D/fprom fprom
+	fi
+	echo "  .. Created links to linux files"	
+}
+
+# -------------------- set links to prom files -------------------------
+set_prom_links() {
+	if [ -d $D/stand ] ; then
+		D=$D/stand/arcs/IP37prom/dev
+	elif [ -d $D/sal ] ; then
+		D=$D
+	else
+		err "cant determine directory for PROM binaries"
+	fi
+	SETUP="$D/../../../../.setup"
+	grep -q '^ *setenv *PROMSIZE *8MB' $SETUP
+	if [ $? -eq 0 ] ; then
+		SIZE="8MB"
+	else
+		SIZE="4MB"
+	fi
+	grep -q '^ *setenv *LAUNCH_VMLINUX' $SETUP
+	EMBEDDED_LINUX=$?
+	rm -f fw.bin fw.map fw.sym vmlinux vmlinux.sym fprom
+	SDIR="SN1IA${SIZE}.O"
+	BIN="SN1IAip37prom${SIZE}"
+	ln -s $D/$SDIR/$BIN.bin fw.bin
+	ln -s $D/$SDIR/$BIN.map fw.map
+	ln -s $D/$SDIR/$BIN.sym fw.sym
+	echo "  .. Created links to $SIZE prom files"
+	if [ $EMBEDDED_LINUX -eq 0 ] ; then
+		ln -s $D/linux/vmlinux vmlinux
+		ln -s $D/linux/vmlinux.sym vmlinux.sym
+		if [ -d linux/arch ] ; then
+			ln -s $D/linux/arch/ia64/sn/fprom/fprom fprom
+		else
+			ln -s $D/linux/fprom fprom
+		fi
+		echo "  .. Created links to embedded linux files in prom tree"
+	fi
+}
+
+# --------------- start of shell script --------------------------------
+OUT="simout"
+FMTMED=0
+STEPCNT=0
+PROM=0
+LINUX=0
+NCF="cf"
+while getopts "HMX:c:o:pk" c ; do
+        case ${c} in
+                H) help;;
+		M) FMTMED=1;;
+		X) STEPCNT=${OPTARG};;
+		c) NCF=${OPTARG};;
+		k) PROM=0;LINUX=1;;
+		p) PROM=1;LINUX=0;;
+		o) OUT=${OPTARG};;
+                \?) exit 1;;
+        esac
+done
+shift `expr ${OPTIND} - 1`
+
+# Check if command is for creating control file and/or links to images.
+if [ $PROM -eq 1 -o $LINUX -eq 1 ] ; then
+	CF=$NCF
+	[ ! -f $CF ] || err "wont overwrite an existing control file ($CF)"
+	if [ $# -gt 0 ] ; then
+		D=$1
+		[ -d $D ] || err "cannot find directory $D"
+		[ $PROM -eq 0 ]  || set_prom_links
+		[ $LINUX -eq 0 ] || set_linux_links
+	fi
+	create_cf_header
+	[ $PROM -eq 0 ]  || create_cf_prom
+	[ $LINUX -eq 0 ] || create_cf_linux
+	create_cf_memory
+	echo "  .. Basic control file created (in $CF). You might want to edit"
+	echo "     this file (at least, look at it)."
+	exit 0
+fi
+
+# Verify that the control file exists
+CF=${1:-$NCF}
+[ -f $CF ] || err "No control file exists. For help, type: $0 -H"
+
+# Build the .cf files from the user control file. The .cf file is
+# identical except that the actual start & load addresses are inserted
+# into the file. In addition, the FPROM commands for configuring memory
+# and LIDs are generated. 
+
+rm -f .cf .cf1 .cf2
+awk '
+function strtonum(n) {
+	 if (substr(n,1,2) != "0x")
+	 	return int(n)
+	 n = substr(n,3)
+	 r=0
+	 while (length(n) > 0) {
+	 	r = r*16+(index("0123456789abcdef", substr(n,1,1))-1)
+		n = substr(n,2)
+	 }
+	 return r
+	}
+/^#/   	{next}
+/^$/	{next}
+/^setenv *LOADPC/               {loadpc = $3; next}
+/^setenv *node._cpu_config/	{n=int(substr($2,5,1)); cpuconf[n] = strtonum($3); print; next}
+/^setenv *node._memory_config/	{n=int(substr($2,5,1)); memconf[n] = strtonum($3); print; next}
+/^setenv *node._nasid/		{n=int(substr($2,5,1)); nasid[n] = strtonum($3); print; next}
+		{print}
+END	{
+	 # Generate the memmap info that starts at the beginning of
+	 # the node the kernel was loaded on.
+	 loadnasid = nasid[0]
+	 cnode = 0
+	 for (i=0; i<128; i++) {
+		if (memconf[i] != "") {
+			printf "sm 0x%x%08x 0x%x%04x%04x\n", 
+				2*loadnasid, 8*cnodes+8, memconf[i], cpuconf[i], nasid[i]
+			cnodes++
+			cpus += substr("0112122312232334", cpuconf[i]+1,1)
+		}
+	 }
+	 printf "sm 0x%x00000000 0x%x%08x\n", 2*loadnasid, cnodes, cpus
+	 printf "setenv number_of_nodes %d\n", cnodes
+
+	 # Now set the starting PC for each cpu.
+	 cnode = 0
+	 lowcpu=-1
+	 for (i=0; i<128; i++) {
+		if (memconf[i] != "") {
+			printf "setnode %d\n", cnode
+			conf = cpuconf[i]
+			for (j=0; j<4; j++) {
+				if (conf != int(conf/2)*2) {
+	 				printf "setcpu %d\n", j
+					if (length(loadpc) == 18)
+						printf "sr pc %s\n", loadpc
+					else
+						printf "sr pc 0x%x%s\n", 2*loadnasid, substr(loadpc,3)
+					if (lowcpu == -1)
+						lowcpu = j
+				}
+				conf = int(conf/2)
+			}
+			cnode++
+		}
+	 }
+	 printf "setnode 0\n"
+	 printf "setcpu %d\n", lowcpu
+	}
+' <$CF >.cf
+
+# Now build the .cf1 & .cf2 control files.
+CF2_LINES="^sm |^break |^run |^si |^quit |^symbols "
+egrep  "$CF2_LINES" .cf >.cf2
+egrep -v "$CF2_LINES" .cf >.cf1
+if [ $STEPCNT -ne 0 ] ; then
+	echo "s $STEPCNT" >>.cf2
+	echo "lastpc 1000" >>.cf2
+	echo "q" >>.cf2
+fi
+echo "script-on $OUT" >>.cf2
+
+# Now start medusa....
+if [ $FMTMED -ne 0 ] ; then
+	$MEDUSA -system mpsn1 -c .cf1 -i .cf2 |  fmtmedusa
+elif [ $STEPCNT -eq 0 ] ; then
+	$MEDUSA -system mpsn1 -c .cf1 -i .cf2 
+else
+	$MEDUSA -system mpsn1 -c .cf1 -i .cf2 2>&1 
+fi
diff --git a/arch/ia64/sn/io/Makefile b/arch/ia64/sn/io/Makefile
new file mode 100644
index 000000000..887896bbd
--- /dev/null
+++ b/arch/ia64/sn/io/Makefile
@@ -0,0 +1,32 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2000 Silicon Graphics, Inc.
+# Copyright (C) Jack Steiner (steiner@sgi.com)
+#
+#
+# Makefile for the linux kernel.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+EXTRA_CFLAGS	:= -DSN -DLANGUAGE_C=1 -D_LANGUAGE_C=1 -I. -DBRINGUP \
+		   -DDIRECT_L1_CONSOLE -DNUMA_BASE -DSIMULATED_KLGRAPH \
+		   -DNUMA_MIGR_CONTROL -DLITTLE_ENDIAN -DREAL_HARDWARE \
+		   -DNEW_INTERRUPTS -DCONFIG_IA64_SGI_IO
+O_TARGET := sgiio.o
+O_OBJS   := stubs.o sgi_if.o pciio.o pcibr.o xtalk.o xbow.o xswitch.o hubspc.o \
+		klgraph_hack.o io.o hubdev.o \
+		hcl.o labelcl.o invent.o klgraph.o klconflib.o sgi_io_sim.o \
+		module.o sgi_io_init.o klgraph_hack.o ml_SN_init.o \
+		ml_SN_intr.o ip37.o \
+		ml_iograph.o hcl_util.o cdl.o \
+		mem_refcnt.o devsupport.o alenlist.o pci_bus_cvlink.o \
+		eeprom.o pci.o pci_dma.o l1.o l1_command.o
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/sn/io/alenlist.c b/arch/ia64/sn/io/alenlist.c
new file mode 100644
index 000000000..7d8e0e158
--- /dev/null
+++ b/arch/ia64/sn/io/alenlist.c
@@ -0,0 +1,900 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+/* Implementation of Address/Length Lists. */
+
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/alenlist.h>
+#include <asm/sn/mmzone_sn1.h>
+
+/*
+ * Logically, an Address/Length List is a list of Pairs, where each pair
+ * holds an Address and a Length, all in some Address Space.  In this
+ * context, "Address Space" is a particular Crosstalk Widget address
+ * space, a PCI device address space, a VME bus address space, a
+ * physical memory address space, etc.
+ *
+ * The main use for these Lists is to provide a single mechanism that
+ * describes where in an address space a DMA occurs.  This allows the
+ * various I/O Bus support layers to provide a single interface for
+ * DMA mapping and DMA translation without regard to how the DMA target
+ * was specified by upper layers.  The upper layers commonly specify a 
+ * DMA target via a buf structure page list, a kernel virtual address,
+ * a user virtual address, a vector of addresses (a la uio and iov), 
+ * or possibly a pfn list.
+ *
+ * Address/Length Lists also enable drivers to take advantage of their
+ * inate scatter/gather capabilities in systems where some address
+ * translation may be required between bus adapters.  The driver forms
+ * a List that represents physical memory targets.  This list is passed
+ * to the various adapters, which apply various translations.  The final
+ * list that's returned to the driver is in terms of its local address
+ * address space -- addresses which can be passed off to a scatter/gather
+ * capable DMA controller.
+ *
+ * The current implementation is intended to be useful both in kernels
+ * that support interrupt threads (INTR_KTHREAD) and in systems that do
+ * not support interrupt threads.  Of course, in the latter case, some
+ * interfaces can be called only within a suspendable context.
+ *
+ * Basic operations on Address/Length Lists include:
+ *	alenlist_create		Create a list
+ *	alenlist_clear		Clear a list
+ *	alenlist_destroy	Destroy a list
+ *	alenlist_append		Append a Pair to the end of a list
+ *	alenlist_replace	Replace a Pair in the middle of a list
+ *	alenlist_get		Get an Address/Length Pair from a list
+ *	alenlist_size		Return the number of Pairs in a list
+ *	alenlist_concat		Append one list to the end of another
+ *	alenlist_clone		Create a new copy of a list
+ *
+ * Operations that convert from upper-level specifications to Address/
+ * Length Lists currently include:
+ *	kvaddr_to_alenlist	Convert from a kernel virtual address
+ *	uvaddr_to_alenlist	Convert from a user virtual address
+ *	buf_to_alenlist		Convert from a buf structure
+ *	alenlist_done		Tell system that we're done with an alenlist
+ *				obtained from a conversion.
+ * Additional convenience operations:
+ *	alenpair_init		Create a list and initialize it with a Pair
+ *	alenpair_get		Peek at the first pair on a List
+ *
+ * A supporting type for Address/Length Lists is an alenlist_cursor_t.  A
+ * cursor marks a position in a List, and determines which Pair is fetched
+ * by alenlist_get.
+ *	alenlist_cursor_create	Allocate and initialize a cursor
+ *	alenlist_cursor_destroy	Free space consumed by a cursor
+ *	alenlist_cursor_init	(Re-)Initialize a cursor to point 
+ *				to the start of a list
+ *	alenlist_cursor_clone	Clone a cursor (at the current offset)
+ *	alenlist_cursor_offset	Return the number of bytes into
+ *				a list that this cursor marks
+ * Multiple cursors can point at various points into a List.  Also, each
+ * list maintains one "internal cursor" which may be updated by alenlist_clear
+ * and alenlist_get.  If calling code simply wishes to scan sequentially
+ * through a list starting at the beginning, and if it is the only user of
+ * a list, it can rely on this internal cursor rather than managing a 
+ * separate explicit cursor.
+ *
+ * The current implementation allows callers to allocate both cursors and
+ * the lists as local stack (structure) variables.  This allows for some
+ * extra efficiency at the expense of forward binary compatibility.  It 
+ * is recommended that customer drivers refrain from local allocation.
+ * In fact, we likely will choose to move the structures out of the public 
+ * header file into a private place in order to discourage this usage.
+ *
+ * Currently, no locking is provided by the alenlist implementation.
+ *
+ * Implementation notes:
+ * For efficiency, Pairs are grouped into "chunks" of, say, 32 Pairs
+ * and a List consists of some number of these chunks.  Chunks are completely
+ * invisible to calling code.  Chunks should be large enough to hold most
+ * standard-sized DMA's, but not so large that they consume excessive space.
+ *
+ * It is generally expected that Lists will be constructed at one time and
+ * scanned at a later time.  It is NOT expected that drivers will scan
+ * a List while the List is simultaneously extended, although this is
+ * theoretically possible with sufficient upper-level locking.
+ *
+ * In order to support demands of Real-Time drivers and in order to support
+ * swapping under low-memory conditions, we support the concept of a
+ * "pre-allocated fixed-sized List".  After creating a List with 
+ * alenlist_create, a driver may explicitly grow the list (via "alenlist_grow")
+ * to a specific number of Address/Length pairs.  It is guaranteed that future 
+ * operations involving this list will never automatically grow the list 
+ * (i.e. if growth is ever required, the operation will fail).  Additionally, 
+ * operations that use alenlist's (e.g. DMA operations) accept a flag which 
+ * causes processing to take place "in-situ"; that is, the input alenlist 
+ * entries are replaced with output alenlist entries.  The combination of 
+ * pre-allocated Lists and in-situ processing allows us to avoid the 
+ * potential deadlock scenario where we sleep (waiting for memory) in the 
+ * swap out path.
+ *
+ * For debugging, we track the number of allocated Lists in alenlist_count
+ * the number of allocated chunks in alenlist_chunk_count, and the number
+ * of allocate cursors in alenlist_cursor_count.  We also provide a debug 
+ * routine, alenlist_show, which dumps the contents of an Address/Length List.
+ *
+ * Currently, Lists are formed by drivers on-demand.  Eventually, we may
+ * associate an alenlist with a buf structure and keep it up to date as
+ * we go along.  In that case, buf_to_alenlist simply returns a pointer
+ * to the existing List, and increments the Lists's reference count.
+ * alenlist_done would decrement the reference count and destroys the List
+ * if it was the last reference.
+ *
+ * Eventually alenlist's may allow better support for user-level scatter/
+ * gather operations (e.g. via readv/writev):  With proper support, we
+ * could potentially handle a vector of reads with a single scatter/gather
+ * DMA operation.  This could be especially useful on NUMA systems where
+ * there's more of a reason for users to use vector I/O operations.
+ *
+ * Eventually, alenlist's may replace kaio lists, vhand page lists,
+ * buffer cache pfdat lists, DMA page lists, etc.
+ */
+
+/* Opaque data types */
+
+/* An Address/Length pair.  */
+typedef struct alen_s {
+	alenaddr_t	al_addr;
+	size_t		al_length;
+} alen_t;
+
+/* 
+ * Number of elements in one chunk of an Address/Length List.
+ *
+ * This size should be sufficient to hold at least an "average" size
+ * DMA request.  Must be at least 1, and should be a power of 2,
+ * for efficiency.
+ */
+#define ALEN_CHUNK_SZ ((512*1024)/NBPP)
+
+/*
+ * A fixed-size set of Address/Length Pairs.  Chunks of Pairs are strung together 
+ * to form a complete Address/Length List.  Chunking is entirely hidden within the 
+ * alenlist implementation, and it simply makes allocation and growth of lists more 
+ * efficient.
+ */
+typedef struct alenlist_chunk_s {
+	alen_t			alc_pair[ALEN_CHUNK_SZ];/* list of addr/len pairs */
+	struct alenlist_chunk_s *alc_next;		/* point to next chunk of pairs */
+} *alenlist_chunk_t;
+
+/* 
+ * An Address/Length List.  An Address/Length List is allocated with alenlist_create.  
+ * Alternatively, a list can be allocated on the stack (local variable of type 
+ * alenlist_t) and initialized with alenpair_init or with a combination of 
+ * alenlist_clear and alenlist_append, etc.  Code which statically allocates these
+ * structures loses forward binary compatibility!
+ *
+ * A statically allocated List is sufficiently large to hold ALEN_CHUNK_SZ pairs.
+ */
+struct alenlist_s {
+	unsigned short		al_flags;
+	unsigned short		al_logical_size;	/* logical size of list, in pairs */
+	unsigned short		al_actual_size;		/* actual size of list, in pairs */
+	struct alenlist_chunk_s	*al_last_chunk;		/* pointer to last logical chunk */
+	struct alenlist_cursor_s al_cursor;		/* internal cursor */
+	struct alenlist_chunk_s	al_chunk;		/* initial set of pairs */
+	alenaddr_t		al_compaction_address;	/* used to compact pairs */
+};
+
+/* al_flags field */
+#define AL_FIXED_SIZE	0x1	/* List is pre-allocated, and of fixed size */
+
+
+zone_t *alenlist_zone = NULL;
+zone_t *alenlist_chunk_zone = NULL;
+zone_t *alenlist_cursor_zone = NULL;
+
+#if DEBUG
+int alenlist_count=0;		/* Currently allocated Lists */
+int alenlist_chunk_count = 0;	/* Currently allocated chunks */
+int alenlist_cursor_count = 0;	/* Currently allocate cursors */
+#define INCR_COUNT(ptr) atomicAddInt((ptr), 1);
+#define DECR_COUNT(ptr) atomicAddInt((ptr), -1);
+#else
+#define INCR_COUNT(ptr)
+#define DECR_COUNT(ptr)
+#endif /* DEBUG */
+
+#if DEBUG
+static void alenlist_show(alenlist_t);
+#endif /* DEBUG */
+
+/*
+ * Initialize Address/Length List management.  One time initialization.
+ */
+void
+alenlist_init(void)
+{
+	alenlist_zone = kmem_zone_init(sizeof(struct alenlist_s), "alenlist");
+	alenlist_chunk_zone = kmem_zone_init(sizeof(struct alenlist_chunk_s), "alchunk");
+	alenlist_cursor_zone = kmem_zone_init(sizeof(struct alenlist_cursor_s), "alcursor");
+#if DEBUG
+	idbg_addfunc("alenshow", alenlist_show);
+#endif /* DEBUG */
+}
+
+
+/*
+ * Initialize an Address/Length List cursor.
+ */
+static void
+do_cursor_init(alenlist_t alenlist, alenlist_cursor_t cursorp)
+{
+	cursorp->al_alenlist = alenlist;
+	cursorp->al_offset = 0;
+	cursorp->al_chunk = &alenlist->al_chunk;
+	cursorp->al_index = 0;
+	cursorp->al_bcount = 0;
+}
+
+
+/*
+ * Create an Address/Length List, and clear it.
+ * Set the cursor to the beginning.
+ */
+alenlist_t 
+alenlist_create(unsigned flags)
+{
+	alenlist_t alenlist;
+
+	alenlist = kmem_zone_alloc(alenlist_zone, flags & AL_NOSLEEP ? VM_NOSLEEP : 0);
+	if (alenlist) {
+		INCR_COUNT(&alenlist_count);
+
+		alenlist->al_flags = 0;
+		alenlist->al_logical_size = 0;
+		alenlist->al_actual_size = ALEN_CHUNK_SZ;
+		alenlist->al_last_chunk = &alenlist->al_chunk;
+		alenlist->al_chunk.alc_next = NULL;
+		do_cursor_init(alenlist, &alenlist->al_cursor);
+	}
+
+	return(alenlist);
+}
+
+
+/*
+ * Grow an Address/Length List so that all resources needed to contain
+ * the specified number of Pairs are pre-allocated.  An Address/Length
+ * List that has been explicitly "grown" will never *automatically*
+ * grow, shrink, or be destroyed.
+ *
+ * Pre-allocation is useful for Real-Time drivers and for drivers that
+ * may be used along the swap-out path and therefore cannot afford to 
+ * sleep until memory is freed.
+ * 
+ * The cursor is set to the beginning of the list.
+ */
+int
+alenlist_grow(alenlist_t alenlist, size_t npairs)
+{
+	/* 
+	 * This interface should be used relatively rarely, so
+	 * the implementation is kept simple: We clear the List,
+	 * then append npairs bogus entries.  Finally, we mark
+	 * the list as FIXED_SIZE and re-initialize the internal
+	 * cursor.
+	 */
+
+	/* 
+	 * Temporarily mark as non-fixed size, since we're about
+	 * to shrink and expand it.
+	 */
+	alenlist->al_flags &= ~AL_FIXED_SIZE;
+
+	/* Free whatever was in the alenlist. */
+	alenlist_clear(alenlist);
+
+	/* Allocate everything that we need via automatic expansion. */
+	while (npairs--)
+		if (alenlist_append(alenlist, 0, 0, AL_NOCOMPACT) == ALENLIST_FAILURE)
+			return(ALENLIST_FAILURE);
+
+	/* Now, mark as FIXED_SIZE */
+	alenlist->al_flags |= AL_FIXED_SIZE;
+
+	/* Clear out bogus entries */
+	alenlist_clear(alenlist);
+
+	/* Initialize internal cursor to the beginning */
+	do_cursor_init(alenlist, &alenlist->al_cursor);
+
+	return(ALENLIST_SUCCESS);
+}
+
+
+/*
+ * Clear an Address/Length List so that it holds no pairs.
+ */
+void
+alenlist_clear(alenlist_t alenlist)
+{
+	alenlist_chunk_t chunk, freechunk;
+
+	/*
+	 * If this List is not FIXED_SIZE, free all the
+	 * extra chunks.
+	 */
+	if (!(alenlist->al_flags & AL_FIXED_SIZE)) {
+		/* First, free any extension alenlist chunks */
+		chunk = alenlist->al_chunk.alc_next;
+		while (chunk) {
+			freechunk = chunk;
+			chunk = chunk->alc_next;
+			kmem_zone_free(alenlist_chunk_zone, freechunk);
+			DECR_COUNT(&alenlist_chunk_count);
+		}
+		alenlist->al_actual_size = ALEN_CHUNK_SZ;
+		alenlist->al_chunk.alc_next = NULL;
+	}
+
+	alenlist->al_logical_size = 0;
+	alenlist->al_last_chunk = &alenlist->al_chunk;
+	do_cursor_init(alenlist, &alenlist->al_cursor);
+}
+
+
+/*
+ * Create and initialize an Address/Length Pair.
+ * This is intended for degenerate lists, consisting of a single 
+ * address/length pair.
+ */
+alenlist_t
+alenpair_init(	alenaddr_t address, 
+		size_t length)
+{
+	alenlist_t alenlist;
+
+	alenlist = alenlist_create(0);
+
+	alenlist->al_logical_size = 1;
+	ASSERT(alenlist->al_last_chunk == &alenlist->al_chunk);
+	alenlist->al_chunk.alc_pair[0].al_length = length;
+	alenlist->al_chunk.alc_pair[0].al_addr = address;
+
+	return(alenlist);
+}
+
+/*
+ * Return address/length from a degenerate (1-pair) List, or
+ * first pair from a larger list.  Does NOT update the internal cursor,
+ * so this is an easy way to peek at a start address.
+ */
+int
+alenpair_get(	alenlist_t alenlist,
+		alenaddr_t *address,
+		size_t *length)
+{
+	if (alenlist->al_logical_size == 0)
+		return(ALENLIST_FAILURE);
+
+	*length = alenlist->al_chunk.alc_pair[0].al_length;
+	*address = alenlist->al_chunk.alc_pair[0].al_addr;
+	return(ALENLIST_SUCCESS);
+}
+
+
+/*
+ * Destroy an Address/Length List.
+ */
+void 
+alenlist_destroy(alenlist_t alenlist)
+{
+	if (alenlist == NULL)
+		return;
+
+	/* 
+	 * Turn off FIXED_SIZE so this List can be 
+	 * automatically shrunk.
+	 */
+	alenlist->al_flags &= ~AL_FIXED_SIZE;
+
+	/* Free extension chunks first */
+	if (alenlist->al_chunk.alc_next)
+		alenlist_clear(alenlist);
+
+	/* Now, free the alenlist itself */
+	kmem_zone_free(alenlist_zone, alenlist);
+	DECR_COUNT(&alenlist_count);
+}
+
+/*
+ * Release an Address/Length List.
+ * This is in preparation for a day when alenlist's may be longer-lived, and
+ * perhaps associated with a buf structure.  We'd add a reference count, and
+ * this routine would decrement the count.  For now, we create alenlist's on
+ * on demand and free them when done.  If the driver is not explicitly managing
+ * a List for its own use, it should call alenlist_done rather than alenlist_destroy.
+ */
+void
+alenlist_done(alenlist_t alenlist)
+{
+	alenlist_destroy(alenlist);
+}
+
+
+/*
+ * Append another address/length to the end of an Address/Length List,
+ * growing the list if permitted and necessary.
+ *
+ * Returns: SUCCESS/FAILURE
+ */
+int 
+alenlist_append(	alenlist_t alenlist, 		/* append to this list */
+			alenaddr_t address, 		/* address to append */
+			size_t length,			/* length to append */
+			unsigned flags)
+{
+	alen_t *alenp;
+	int index, last_index;
+
+	index = alenlist->al_logical_size % ALEN_CHUNK_SZ;
+
+	if ((alenlist->al_logical_size > 0)) {
+		/*
+		 * See if we can compact this new pair in with the previous entry.
+		 * al_compaction_address holds that value that we'd need to see
+		 * in order to compact.
+		 */
+		if (!(flags & AL_NOCOMPACT) &&
+		    (alenlist->al_compaction_address == address)) {
+			last_index = (alenlist->al_logical_size-1) % ALEN_CHUNK_SZ;
+			alenp = &(alenlist->al_last_chunk->alc_pair[last_index]);
+			alenp->al_length += length;
+			alenlist->al_compaction_address += length;
+			return(ALENLIST_SUCCESS);
+		}
+
+		/*
+		 * If we're out of room in this chunk, move to a new chunk.
+	 	 */
+		if (index == 0) {
+			if (alenlist->al_flags & AL_FIXED_SIZE) {
+				alenlist->al_last_chunk = alenlist->al_last_chunk->alc_next;
+
+				/* If we're out of space in a FIXED_SIZE List, quit. */
+				if (alenlist->al_last_chunk == NULL) {
+					ASSERT(alenlist->al_logical_size == alenlist->al_actual_size);
+					return(ALENLIST_FAILURE);
+				}
+			} else {
+				alenlist_chunk_t new_chunk;
+
+				new_chunk = kmem_zone_alloc(alenlist_chunk_zone, 
+							flags & AL_NOSLEEP ? VM_NOSLEEP : 0);
+
+				if (new_chunk == NULL)
+					return(ALENLIST_FAILURE);
+
+				alenlist->al_last_chunk->alc_next = new_chunk;
+				new_chunk->alc_next = NULL;
+				alenlist->al_last_chunk = new_chunk;
+				alenlist->al_actual_size += ALEN_CHUNK_SZ;
+				INCR_COUNT(&alenlist_chunk_count);
+			}
+		}
+	}
+
+	alenp = &(alenlist->al_last_chunk->alc_pair[index]);
+	alenp->al_addr = address;
+	alenp->al_length = length;
+	
+	alenlist->al_logical_size++;
+	alenlist->al_compaction_address = address + length;
+
+	return(ALENLIST_SUCCESS);
+}
+
+
+/*
+ * Replace an item in an Address/Length List.  Cursor is updated so
+ * that alenlist_get will get the next item in the list.  This interface 
+ * is not very useful for drivers; but it is useful to bus providers 
+ * that need to translate between address spaced in situ.  The old Address
+ * and Length are returned.
+ */
+/* ARGSUSED */
+int
+alenlist_replace(	alenlist_t alenlist, 		/* in: replace in this list */
+			alenlist_cursor_t cursorp, 	/* inout: which item to replace */
+			alenaddr_t *addrp, 		/* inout: address */
+			size_t *lengthp,		/* inout: length */
+			unsigned flags)
+{
+	alen_t *alenp;
+	alenlist_chunk_t chunk;
+	unsigned int index;
+	size_t length;
+	alenaddr_t addr;
+
+	if ((addrp == NULL) || (lengthp == NULL))
+		return(ALENLIST_FAILURE);
+
+	if (alenlist->al_logical_size == 0)
+		return(ALENLIST_FAILURE);
+
+	addr = *addrp;
+	length = *lengthp;
+
+	/* 
+	 * If no cursor explicitly specified, use the Address/Length List's 
+	 * internal cursor.
+	 */
+	if (cursorp == NULL)
+		cursorp = &alenlist->al_cursor;
+
+	chunk = cursorp->al_chunk;
+	index = cursorp->al_index;
+
+	ASSERT(cursorp->al_alenlist == alenlist);
+	if (cursorp->al_alenlist != alenlist)
+		return(ALENLIST_FAILURE);
+
+	alenp = &chunk->alc_pair[index];
+
+	/* Return old values */
+	*addrp = alenp->al_length;
+	*lengthp = alenp->al_addr;
+
+	/* Set up new values */
+	alenp->al_length = length;
+	alenp->al_addr = addr;
+
+	/* Update cursor to point to next item */
+	cursorp->al_bcount = length;
+
+	return(ALENLIST_SUCCESS);
+}
+
+
+/*
+ * Initialize a cursor in order to walk an alenlist.
+ * An alenlist_cursor always points to the last thing that was obtained
+ * from the list.  If al_chunk is NULL, then nothing has yet been obtained.
+ *
+ * Note: There is an "internal cursor" associated with every Address/Length List.
+ * For users that scan sequentially through a List, it is more efficient to
+ * simply use the internal cursor.  The caller must insure that no other users
+ * will simultaneously scan the List.  The caller can reposition the internal
+ * cursor by calling alenlist_cursor_init with a NULL cursorp.
+ */
+int
+alenlist_cursor_init(alenlist_t alenlist, size_t offset, alenlist_cursor_t cursorp)
+{
+	size_t byte_count;
+
+	if (cursorp == NULL)
+		cursorp = &alenlist->al_cursor;
+
+	/* Get internal cursor's byte count for use as a hint.
+	 *
+	 * If the internal cursor points passed the point that we're interested in,
+	 * we need to seek forward from the beginning.  Otherwise, we can seek forward
+	 * from the internal cursor.
+	 */
+	if ((offset > 0) &&
+	   ((byte_count = alenlist_cursor_offset(alenlist, (alenlist_cursor_t)NULL)) <= offset)) {
+		offset -= byte_count;
+		alenlist_cursor_clone(alenlist, NULL, cursorp);
+	} else
+		do_cursor_init(alenlist, cursorp);
+
+	/* We could easily speed this up, but it shouldn't be used very often. */
+	while (offset != 0) {
+		alenaddr_t addr;
+		size_t length;
+
+		if (alenlist_get(alenlist, cursorp, offset, &addr, &length, 0) != ALENLIST_SUCCESS)
+			return(ALENLIST_FAILURE);
+		offset -= length;
+	}
+	return(ALENLIST_SUCCESS);
+}
+
+
+/*
+ * Copy a cursor.  The source cursor is either an internal alenlist cursor
+ * or an explicit cursor.
+ */
+int
+alenlist_cursor_clone(	alenlist_t alenlist, 
+			alenlist_cursor_t cursorp_in, 
+			alenlist_cursor_t cursorp_out)
+{
+	ASSERT(cursorp_out);
+
+	if (alenlist && cursorp_in)
+		if (alenlist != cursorp_in->al_alenlist)
+			return(ALENLIST_FAILURE);
+
+	if (alenlist)
+		*cursorp_out = alenlist->al_cursor; /* small structure copy */
+	else if (cursorp_in)
+		*cursorp_out = *cursorp_in; /* small structure copy */
+	else
+		return(ALENLIST_FAILURE); /* no source */
+
+	return(ALENLIST_SUCCESS);
+}
+
+/*
+ * Return the number of bytes passed so far according to the specified cursor.
+ * If cursorp is NULL, use the alenlist's internal cursor.
+ */
+size_t
+alenlist_cursor_offset(alenlist_t alenlist, alenlist_cursor_t cursorp)
+{
+	ASSERT(!alenlist || !cursorp || (alenlist == cursorp->al_alenlist));
+
+	if (cursorp == NULL) {
+		ASSERT(alenlist);
+		cursorp = &alenlist->al_cursor;
+	}
+
+	return(cursorp->al_offset);
+}
+
+/*
+ * Allocate and initialize an Address/Length List cursor.
+ */
+alenlist_cursor_t
+alenlist_cursor_create(alenlist_t alenlist, unsigned flags)
+{
+	alenlist_cursor_t cursorp;
+
+	ASSERT(alenlist != NULL);
+	cursorp = kmem_zone_alloc(alenlist_cursor_zone, flags & AL_NOSLEEP ? VM_NOSLEEP : 0);
+	if (cursorp) {
+		INCR_COUNT(&alenlist_cursor_count);
+		alenlist_cursor_init(alenlist, 0, cursorp);
+	}
+	return(cursorp);
+}
+
+/*
+ * Free an Address/Length List cursor.
+ */
+void
+alenlist_cursor_destroy(alenlist_cursor_t cursorp)
+{
+	DECR_COUNT(&alenlist_cursor_count);
+	kmem_zone_free(alenlist_cursor_zone, cursorp);
+}
+
+
+/*
+ * Fetch an address/length pair from an Address/Length List.  Update
+ * the "cursor" so that next time this routine is called, we'll get
+ * the next address range.  Never return a length that exceeds maxlength
+ * (if non-zero).  If maxlength is a power of 2, never return a length 
+ * that crosses a maxlength boundary.  [This may seem strange at first,
+ * but it's what many drivers want.]
+ *
+ * Returns: SUCCESS/FAILURE
+ */
+int
+alenlist_get(	alenlist_t alenlist, 		/* in: get from this list */
+		alenlist_cursor_t cursorp, 	/* inout: which item to get */
+		size_t	maxlength,		/* in: at most this length */
+		alenaddr_t *addrp, 		/* out: address */
+		size_t *lengthp,		/* out: length */
+		unsigned flags)
+{
+	alen_t *alenp;
+	alenlist_chunk_t chunk;
+	unsigned int index;
+	size_t bcount;
+	size_t length;
+
+	/* 
+	 * If no cursor explicitly specified, use the Address/Length List's 
+	 * internal cursor.
+	 */
+	if (cursorp == NULL) {
+		if (alenlist->al_logical_size == 0)
+			return(ALENLIST_FAILURE);
+		cursorp = &alenlist->al_cursor;
+	}
+
+	chunk = cursorp->al_chunk;
+	index = cursorp->al_index;
+	bcount = cursorp->al_bcount;
+
+	ASSERT(cursorp->al_alenlist == alenlist);
+	if (cursorp->al_alenlist != alenlist)
+		return(ALENLIST_FAILURE);
+
+	alenp = &chunk->alc_pair[index];
+	length = alenp->al_length - bcount;
+
+	/* Bump up to next pair, if we're done with this pair. */
+	if (length == 0) {
+		cursorp->al_bcount = bcount = 0;
+		cursorp->al_index = index = (index + 1) % ALEN_CHUNK_SZ;
+
+		/* Bump up to next chunk, if we're done with this chunk. */
+		if (index == 0) {
+			if (cursorp->al_chunk == alenlist->al_last_chunk)
+				return(ALENLIST_FAILURE);
+			chunk = chunk->alc_next;
+			ASSERT(chunk != NULL);
+		} else {
+			/* If in last chunk, don't go beyond end. */
+			if (cursorp->al_chunk == alenlist->al_last_chunk) {
+				int last_size = alenlist->al_logical_size % ALEN_CHUNK_SZ;
+				if (last_size && (index >= last_size))
+					return(ALENLIST_FAILURE);
+			}
+		}
+
+		alenp = &chunk->alc_pair[index];
+		length = alenp->al_length;
+	}
+
+	/* Constrain what we return according to maxlength */
+	if (maxlength) {
+		size_t maxlen1 = maxlength - 1;
+
+		if ((maxlength & maxlen1) == 0) /* power of 2 */
+			maxlength -= 
+			   ((alenp->al_addr + cursorp->al_bcount) & maxlen1);
+
+		length = MIN(maxlength, length);
+	}
+
+	/* Update the cursor, if desired. */
+	if (!(flags & AL_LEAVE_CURSOR)) {
+		cursorp->al_bcount += length;
+		cursorp->al_chunk = chunk;
+	}
+
+	*lengthp = length;
+	*addrp = alenp->al_addr + bcount;
+
+	return(ALENLIST_SUCCESS);
+}
+
+
+/*
+ * Return the number of pairs in the specified Address/Length List.
+ * (For FIXED_SIZE Lists, this returns the logical size of the List, 
+ * not the actual capacity of the List.)
+ */
+int
+alenlist_size(alenlist_t alenlist)
+{
+	return(alenlist->al_logical_size);
+}
+
+
+/*
+ * Concatenate two Address/Length Lists.
+ */
+void
+alenlist_concat(alenlist_t from,
+		alenlist_t to)
+{
+	struct alenlist_cursor_s cursor;
+	alenaddr_t addr;
+	size_t length;
+
+	alenlist_cursor_init(from, 0, &cursor);
+
+	while(alenlist_get(from, &cursor, (size_t)0, &addr, &length, 0) == ALENLIST_SUCCESS)
+		alenlist_append(to, addr, length, 0);
+}
+
+/*
+ * Create a copy of a list.
+ * (Not all attributes of the old list are cloned.  For instance, if
+ * a FIXED_SIZE list is cloned, the resulting list is NOT FIXED_SIZE.)
+ */
+alenlist_t
+alenlist_clone(alenlist_t old_list, unsigned flags)
+{
+	alenlist_t new_list;
+
+	new_list = alenlist_create(flags);
+	if (new_list != NULL)
+		alenlist_concat(old_list, new_list);
+
+	return(new_list);
+}
+
+
+/* 
+ * Convert a kernel virtual address to a Physical Address/Length List.
+ */
+alenlist_t
+kvaddr_to_alenlist(alenlist_t alenlist, caddr_t kvaddr, size_t length, unsigned flags)
+{
+	alenaddr_t paddr;
+	long offset;
+	size_t piece_length;
+	int created_alenlist;
+
+	if (length <=0)
+		return(NULL);
+
+	/* If caller supplied a List, use it.  Otherwise, allocate one. */
+	if (alenlist == NULL) {
+		alenlist = alenlist_create(0);
+		created_alenlist = 1;
+	} else {
+		alenlist_clear(alenlist);
+		created_alenlist = 0;
+	}
+
+	paddr = kvtophys(kvaddr);
+	offset = poff(kvaddr);
+
+	/* Handle first page */
+	piece_length = MIN(NBPP - offset, length);
+	if (alenlist_append(alenlist, paddr, piece_length, flags) == ALENLIST_FAILURE)
+		goto failure;
+	length -= piece_length;
+	kvaddr += piece_length;
+
+	/* Handle middle pages */
+	while (length >= NBPP) {
+		paddr = kvtophys(kvaddr);
+		if (alenlist_append(alenlist, paddr, NBPP, flags) == ALENLIST_FAILURE)
+			goto failure;
+		length -= NBPP;
+		kvaddr += NBPP;
+	}
+
+	/* Handle last page */
+	if (length) {
+		ASSERT(length < NBPP);
+		paddr = kvtophys(kvaddr);
+		if (alenlist_append(alenlist, paddr, length, flags) == ALENLIST_FAILURE)
+			goto failure;
+	}
+
+	alenlist_cursor_init(alenlist, 0, NULL);
+	return(alenlist);
+
+failure:
+	if (created_alenlist)
+		alenlist_destroy(alenlist);
+	return(NULL);
+}
+
+
+#if DEBUG
+static void
+alenlist_show(alenlist_t alenlist)
+{
+	struct alenlist_cursor_s cursor;
+	alenaddr_t addr;
+	size_t length;
+	int i = 0;
+
+	alenlist_cursor_init(alenlist, 0, &cursor);
+
+	qprintf("Address/Length List@0x%x:\n", alenlist);
+	qprintf("logical size=0x%x actual size=0x%x last_chunk at 0x%x\n", 
+		alenlist->al_logical_size, alenlist->al_actual_size, 
+		alenlist->al_last_chunk);
+	qprintf("cursor: chunk=0x%x index=%d offset=0x%x\n",
+		alenlist->al_cursor.al_chunk, 
+		alenlist->al_cursor.al_index,
+		alenlist->al_cursor.al_bcount);
+	while(alenlist_get(alenlist, &cursor, (size_t)0, &addr, &length, 0) == ALENLIST_SUCCESS)
+		qprintf("%d:\t0x%lx 0x%lx\n", ++i, addr, length);
+}
+#endif /* DEBUG */
diff --git a/arch/ia64/sn/io/cdl.c b/arch/ia64/sn/io/cdl.c
new file mode 100644
index 000000000..eb854b207
--- /dev/null
+++ b/arch/ia64/sn/io/cdl.c
@@ -0,0 +1,230 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <asm/sn/sgi.h>
+#include <asm/io.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/pci/bridge.h>
+#include "asm/sn/ioerror_handling.h"
+#include <asm/sn/xtalk/xbow.h>
+
+#ifdef BRINGUP
+/* these get called directly in cdl_add_connpt in fops bypass hack */
+extern int pcibr_attach(devfs_handle_t);
+extern int xbow_attach(devfs_handle_t);
+#endif /* BRINGUP */
+
+/*
+ *    cdl: Connection and Driver List
+ *
+ *	We are not porting this to Linux.  Devices are registered via 
+ *	the normal Linux PCI layer.  This is a very simplified version 
+ *	of cdl that will allow us to register and call our very own 
+ *	IO Infrastructure Drivers e.g. pcibr.
+ */
+
+struct cdl {
+    int		part_num;
+    int		mfg_num;
+    int (*attach) (devfs_handle_t);
+} dummy_reg;
+
+typedef struct cdl     *cdl_p;
+
+#define MAX_SGI_IO_INFRA_DRVR 4
+struct cdl sgi_infrastructure_drivers[MAX_SGI_IO_INFRA_DRVR] =
+{
+	{ XBRIDGE_WIDGET_PART_NUM, XBRIDGE_WIDGET_MFGR_NUM, pcibr_attach /* &pcibr_fops  */},
+	{ BRIDGE_WIDGET_PART_NUM,  BRIDGE_WIDGET_MFGR_NUM,  pcibr_attach /* &pcibr_fops */},
+	{ XXBOW_WIDGET_PART_NUM,   XXBOW_WIDGET_MFGR_NUM,   xbow_attach /* &xbow_fops */},
+	{ XBOW_WIDGET_PART_NUM,    XBOW_WIDGET_MFGR_NUM,    xbow_attach /* &xbow_fops */},
+};
+
+/*
+ * cdl_new:  Called by pciio and xtalk.
+ */
+cdl_p
+cdl_new(char *name, char *k1str, char *k2str)
+{
+    /*
+     * Just return a dummy pointer.
+     */
+    return((cdl_p)&dummy_reg);
+}
+
+/*
+ * cdl_del: Do nothing.
+ */
+void
+cdl_del(cdl_p reg)
+{
+	printk("SGI IO INFRASTRUCTURE - cdl_del not supported.\n");
+}
+
+/*
+ * cdl_add_driver: The driver part number and manufacturers number 
+ * are statically initialized above.
+ * 
+  Do nothing.
+ */
+int
+cdl_add_driver(cdl_p reg, int key1, int key2, char *prefix, int flags)
+{
+    return 0;
+}
+
+/*
+ * cdl_del_driver: Not supported.
+ */
+void
+cdl_del_driver(cdl_p reg,
+	       char *prefix)
+{
+
+	printk("SGI IO INFRASTRUCTURE - cdl_del_driver not supported.\n");
+}
+
+/*
+ * cdl_add_connpt: We found a device and it's connect point.  Call the 
+ * attach routine of that driver.
+ *
+ * May need support for pciba registration here ...
+ *
+ * This routine use to create /hw/.id/pci/.../.. that links to 
+ * /hw/module/006c06/Pbrick/xtalk/15/pci/<slotnum> .. do we still need 
+ * it?  The specified driver attach routine does not reference these 
+ * vertices.
+ */
+int
+cdl_add_connpt(cdl_p reg, int part_num, int mfg_num, 
+	       devfs_handle_t connpt)
+{
+	int i;
+	
+	/*
+	 * Find the driver entry point and call the attach routine.
+	 */
+	for (i = 0; i < MAX_SGI_IO_INFRA_DRVR; i++) {
+
+		if ( (part_num == sgi_infrastructure_drivers[i].part_num) &&
+		   ( mfg_num == sgi_infrastructure_drivers[i].mfg_num) ) {
+			/*
+			 * Call the device attach routines.
+			 */
+			if (sgi_infrastructure_drivers[i].attach) {
+			    return(sgi_infrastructure_drivers[i].attach(connpt));
+			}
+#ifdef BRINGUP
+			/*
+			 * XXX HACK ALERT bypassing fops for now..
+			 */
+			else {
+			    printk("cdl_add_connpt: NEED FOPS FOR OUR DRIVERS!!\n");
+			    printk("cdl_add_connpt: part_num= 0x%x  mfg_num= 0x%x\n",
+				part_num, mfg_num);
+			    return(-1);
+			}
+#endif /* BRINGUP */
+		} else {
+			continue;
+		}
+
+		printk("**** cdl_add_connpt: driver not found for part_num %d mfg_num %d ****\n", part_num, mfg_num);
+
+		return(-1);
+	}	
+	if ( (i == MAX_SGI_IO_INFRA_DRVR) ) 
+		printk("**** cdl_add_connpt: Driver not found for part_num 0x%x mfg_num 0x%x ****\n", part_num, mfg_num);
+
+	return (0);
+}
+
+/*
+ * cdl_del_connpt: Not implemented.
+ */
+void
+cdl_del_connpt(cdl_p reg, int key1, int key2, devfs_handle_t connpt)
+{
+
+	printk("SGI IO INFRASTRUCTURE - cdl_del_cdl_del_connpt not supported.\n");
+}
+
+/*
+ *    cdl_iterate: Not Implemented.
+ */
+void
+cdl_iterate(cdl_p reg,
+	    char *prefix,
+	    cdl_iter_f * func)
+{
+
+	printk("SGI IO INFRASTRUCTURE - cdl_iterate not supported.\n");
+}
+
+async_attach_t 
+async_attach_new(void)
+{
+
+	printk("SGI IO INFRASTRUCTURE - async_attach_new not supported.\n");
+	return(0);
+}
+
+void 
+async_attach_free(async_attach_t aa)
+{
+	printk("SGI IO INFRASTRUCTURE - async_attach_free not supported.\n");
+}
+
+async_attach_t 
+async_attach_get_info(devfs_handle_t vhdl)
+{
+
+	printk("SGI IO INFRASTRUCTURE - async_attach_get_info not supported.\n");
+	return(0);
+}
+
+void            
+async_attach_add_info(devfs_handle_t vhdl, async_attach_t aa)
+{
+	printk("SGI IO INFRASTRUCTURE - async_attach_add_info not supported.\n");
+
+}
+
+void            
+async_attach_del_info(devfs_handle_t vhdl)
+{
+
+	printk("SGI IO INFRASTRUCTURE - async_attach_del_info not supported.\n");
+
+}
+
+void async_attach_signal_start(async_attach_t aa)
+{
+
+	printk("SGI IO INFRASTRUCTURE - async_attach_signal_start not supported.\n");
+
+}
+
+void async_attach_signal_done(async_attach_t aa)
+{
+
+	printk("SGI IO INFRASTRUCTURE - async_attach_signal_done not supported.\n");
+
+}
+
+void async_attach_waitall(async_attach_t aa)
+{
+
+	printk("SGI IO INFRASTRUCTURE - async_attach_waitall not supported.\n");
+
+}
+
diff --git a/arch/ia64/sn/io/devsupport.c b/arch/ia64/sn/io/devsupport.c
new file mode 100644
index 000000000..760e596ca
--- /dev/null
+++ b/arch/ia64/sn/io/devsupport.c
@@ -0,0 +1,1291 @@
+#define ilvt_t int
+
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/iobus.h>
+#include <asm/sn/iograph.h>
+
+/* 
+ * Interfaces in this file are all platform-independent AND IObus-independent.
+ * Be aware that there may be macro equivalents to each of these hiding in
+ * header files which supercede these functions.
+ */
+
+/* =====Generic iobus support===== */
+
+/* String table to hold names of interrupts. */
+#ifdef notyet
+static struct string_table device_desc_string_table;
+#endif
+
+/* One time initialization for device descriptor support. */
+static void
+device_desc_init(void)
+{
+#ifdef notyet
+	string_table_init(&device_desc_string_table);
+#endif
+	FIXME("device_desc_init");
+}
+
+
+/* Drivers use these interfaces to manage device descriptors */
+static device_desc_t
+device_desc_alloc(void)
+{
+#ifdef notyet
+	device_desc_t device_desc;
+
+	device_desc = (device_desc_t)kmem_zalloc(sizeof(struct device_desc_s), 0);
+	device_desc->intr_target = GRAPH_VERTEX_NONE;
+
+	ASSERT(device_desc->intr_policy == 0);
+	device_desc->intr_swlevel = -1;
+	ASSERT(device_desc->intr_name == NULL);
+	ASSERT(device_desc->flags == 0);
+
+	ASSERT(!(device_desc->flags & D_IS_ASSOC));
+	return(device_desc);
+#else
+	FIXME("device_desc_alloc");
+	return((device_desc_t)0);
+#endif
+}
+
+void
+device_desc_free(device_desc_t device_desc)
+{
+#ifdef notyet
+	if (!(device_desc->flags & D_IS_ASSOC)) /* sanity */
+		kfree(device_desc);
+#endif
+	FIXME("device_desc_free");
+}
+
+device_desc_t
+device_desc_dup(devfs_handle_t dev)
+{
+#ifdef notyet
+	device_desc_t orig_device_desc, new_device_desc;
+
+
+	new_device_desc = device_desc_alloc();
+	orig_device_desc = device_desc_default_get(dev);
+	if (orig_device_desc)
+		*new_device_desc = *orig_device_desc;/* small structure copy */
+	else {
+		device_driver_t		driver;
+		ilvl_t			pri;		
+		/* 
+		 * Use the driver's thread priority in 
+		 * case the device thread priority has not
+		 * been given.
+		 */
+		if (driver = device_driver_getbydev(dev)) {
+			pri = device_driver_thread_pri_get(driver);
+			device_desc_intr_swlevel_set(new_device_desc,pri);
+		}
+	}		
+	new_device_desc->flags &= ~D_IS_ASSOC;
+	return(new_device_desc);
+#else
+	FIXME("device_desc_dup");
+	return((device_desc_t)0);
+#endif
+}
+
+device_desc_t	
+device_desc_default_get(devfs_handle_t dev)
+{
+#ifdef notyet
+	graph_error_t rc;
+	device_desc_t device_desc;
+
+	rc = hwgraph_info_get_LBL(dev, INFO_LBL_DEVICE_DESC, (arbitrary_info_t *)&device_desc);
+
+	if (rc == GRAPH_SUCCESS)
+		return(device_desc);
+	else
+		return(NULL);
+#else
+	FIXME("device_desc_default_get");
+	return((device_desc_t)0);
+#endif
+}
+
+void		
+device_desc_default_set(devfs_handle_t dev, device_desc_t new_device_desc)
+{
+#ifdef notyet
+	graph_error_t rc;
+	device_desc_t old_device_desc = NULL;
+
+	if (new_device_desc) {
+		new_device_desc->flags |= D_IS_ASSOC;
+		rc = hwgraph_info_add_LBL(dev, INFO_LBL_DEVICE_DESC, 
+						(arbitrary_info_t)new_device_desc);
+		if (rc == GRAPH_DUP) {
+			rc = hwgraph_info_replace_LBL(dev, INFO_LBL_DEVICE_DESC, 
+				(arbitrary_info_t)new_device_desc, 
+				(arbitrary_info_t *)&old_device_desc);
+
+			ASSERT(rc == GRAPH_SUCCESS);
+		}
+		hwgraph_info_export_LBL(dev, INFO_LBL_DEVICE_DESC,
+					sizeof(struct device_desc_s));
+	} else {
+		rc = hwgraph_info_remove_LBL(dev, INFO_LBL_DEVICE_DESC,
+					(arbitrary_info_t *)&old_device_desc);
+	}
+
+	if (old_device_desc) {
+		ASSERT(old_device_desc->flags & D_IS_ASSOC);
+		old_device_desc->flags &= ~D_IS_ASSOC;
+		device_desc_free(old_device_desc);
+	}
+#endif
+	FIXME("device_desc_default_set");
+}
+
+devfs_handle_t
+device_desc_intr_target_get(device_desc_t device_desc)
+{
+#ifdef notyet
+	return(device_desc->intr_target);
+#else
+	FIXME("device_desc_intr_target_get");
+	return((devfs_handle_t)0);
+#endif
+}
+
+int
+device_desc_intr_policy_get(device_desc_t device_desc)
+{
+#ifdef notyet
+	return(device_desc->intr_policy);
+#else
+	FIXME("device_desc_intr_policy_get");
+	return(0);
+#endif
+}
+
+ilvl_t
+device_desc_intr_swlevel_get(device_desc_t device_desc)
+{
+#ifdef notyet
+	return(device_desc->intr_swlevel);
+#else
+	FIXME("device_desc_intr_swlevel_get");
+	return((ilvl_t)0);
+#endif
+}
+
+char *
+device_desc_intr_name_get(device_desc_t device_desc)
+{
+#ifdef notyet
+	return(device_desc->intr_name);
+#else
+	FIXME("device_desc_intr_name_get");
+	return(NULL);
+#endif
+}
+
+int
+device_desc_flags_get(device_desc_t device_desc)
+{
+#ifdef notyet
+	return(device_desc->flags);
+#else
+	FIXME("device_desc_flags_get");
+	return(0);
+#endif
+}
+
+void
+device_desc_intr_target_set(device_desc_t device_desc, devfs_handle_t target)
+{
+	if ( device_desc != (device_desc_t)0 )
+		device_desc->intr_target = target;
+}
+
+void
+device_desc_intr_policy_set(device_desc_t device_desc, int policy)
+{
+	if ( device_desc != (device_desc_t)0 )
+		device_desc->intr_policy = policy;
+}
+
+void
+device_desc_intr_swlevel_set(device_desc_t device_desc, ilvl_t swlevel)
+{
+	if ( device_desc != (device_desc_t)0 )
+		device_desc->intr_swlevel = swlevel;
+}
+
+void
+device_desc_intr_name_set(device_desc_t device_desc, char *name)
+{
+#ifdef notyet
+	if ( device_desc != (device_desc_t)0 )
+		device_desc->intr_name = string_table_insert(&device_desc_string_table, name);
+#else
+	FIXME("device_desc_intr_name_set");
+#endif
+}
+
+void
+device_desc_flags_set(device_desc_t device_desc, int flags)
+{
+	if ( device_desc != (device_desc_t)0 )
+		device_desc->flags = flags;
+}
+
+
+
+/*============= device admin registry routines ===================== */
+
+/* Linked list of <admin-name,admin-val> pairs */
+typedef struct dev_admin_list_s {
+	struct dev_admin_list_s		*admin_next; 	/* next entry in the
+							 * list 
+							 */
+	char				*admin_name;	/* info label */
+	char				*admin_val;	/* actual info */
+} dev_admin_list_t;
+
+/* Device/Driver administration registry */
+typedef struct dev_admin_registry_s {
+	mrlock_t			reg_lock;	/* To allow
+							 * exclusive
+							 * access
+							 */
+	dev_admin_list_t		*reg_first;	/* first entry in 
+							 * the list
+							 */
+	dev_admin_list_t		**reg_last;	/* pointer to the
+							 * next to last entry
+							 * in the last which 
+							 * is also the place
+							 * where the new
+							 * entry gets
+							 * inserted
+							 */
+} dev_admin_registry_t;
+
+/*
+** device_driver_s associates a device driver prefix with device switch entries.
+*/
+struct device_driver_s {
+	struct device_driver_s	*dd_next;	/* next element on hash chain */
+	struct device_driver_s	*dd_prev;	/* previous element on hash chain */
+	char			*dd_prefix;	/* driver prefix string */
+	struct bdevsw		*dd_bdevsw;	/* driver's bdevsw */
+	struct cdevsw		*dd_cdevsw;	/* driver's cdevsw */
+	
+	/* driver administration specific data structures need to
+	 * maintain the list of <driver-paramater,value> pairs
+	 */
+	dev_admin_registry_t	dd_dev_admin_registry;
+	ilvl_t			dd_thread_pri;	/* default thread priority for
+						 *  all this driver's
+						 * threads.
+						 */
+
+};
+
+#define	NEW(_p)		(_p = kmalloc(sizeof(*_p), GFP_KERNEL))
+#define FREE(_p)	(kmem_free(_p))
+	
+/*
+ * helpful lock macros
+ */
+
+#define DEV_ADMIN_REGISTRY_INITLOCK(lockp,name)	mrinit(lockp,name)
+#define DEV_ADMIN_REGISTRY_RDLOCK(lockp)	mraccess(lockp)	       
+#define DEV_ADMIN_REGISTRY_WRLOCK(lockp)	mrupdate(lockp)	       
+#define DEV_ADMIN_REGISTRY_UNLOCK(lockp)	mrunlock(lockp)
+
+/* Initialize the registry 
+ */
+static void
+dev_admin_registry_init(dev_admin_registry_t *registry)
+{
+#ifdef notyet
+	if ( registry != (dev_admin_registry_t *)0 )
+		DEV_ADMIN_REGISTRY_INITLOCK(&registry->reg_lock,
+				    "dev_admin_registry_lock");
+		registry->reg_first = NULL;
+		registry->reg_last = &registry->reg_first;
+	}
+#else
+	FIXME("dev_admin_registry_init");
+#endif
+}
+
+/*
+ * add an <name , value > entry to the dev admin registry.
+ * if the name already exists in the registry then change the
+ * value iff the new value differs from the old value.
+ * if the name doesn't exist a new list entry is created and put
+ * at the end.
+ */
+static void
+dev_admin_registry_add(dev_admin_registry_t	*registry,
+		       char			*name,
+		       char			*val)
+{
+#ifdef notyet
+	dev_admin_list_t	*reg_entry;
+	dev_admin_list_t	*scan = 0;
+
+	DEV_ADMIN_REGISTRY_WRLOCK(&registry->reg_lock);
+
+	/* check if the name already exists in the registry */
+	scan = registry->reg_first;
+
+	while (scan) {
+		if (strcmp(scan->admin_name,name) == 0) {
+			/* name is there in the registry */
+			if (strcmp(scan->admin_val,val)) {
+				/* old value != new value 
+				 * reallocate  memory and copy the new value
+				 */
+				FREE(scan->admin_val);
+				scan->admin_val = 
+					(char *)kern_calloc(1,strlen(val)+1);
+				strcpy(scan->admin_val,val);
+				goto out;
+			}
+			goto out;	/* old value == new value */
+		}
+		scan = scan->admin_next;
+	}
+
+	/* name is not there in the registry.
+	 * allocate memory for the new registry entry 
+	 */
+	NEW(reg_entry);
+	
+	reg_entry->admin_next   	= 0;
+	reg_entry->admin_name	= (char *)kern_calloc(1,strlen(name)+1);
+	strcpy(reg_entry->admin_name,name);
+	reg_entry->admin_val	= (char *)kern_calloc(1,strlen(val)+1);
+	strcpy(reg_entry->admin_val,val);
+
+	/* add the entry at the end of the registry */
+
+	*(registry->reg_last)	= reg_entry;
+	registry->reg_last	= &reg_entry->admin_next;
+
+out:	DEV_ADMIN_REGISTRY_UNLOCK(&registry->reg_lock);
+#endif
+	FIXME("dev_admin_registry_add");
+}
+/*
+ * check if there is an info corr. to a particular
+ * name starting from the cursor position in the 
+ * registry
+ */
+static char *
+dev_admin_registry_find(dev_admin_registry_t *registry,char *name)
+{
+#ifdef notyet
+	dev_admin_list_t	*scan = 0;
+	
+	DEV_ADMIN_REGISTRY_RDLOCK(&registry->reg_lock);
+	scan = registry->reg_first;
+
+	while (scan) {
+		if (strcmp(scan->admin_name,name) == 0) {
+			DEV_ADMIN_REGISTRY_UNLOCK(&registry->reg_lock);
+			return scan->admin_val;
+		}
+		scan = scan->admin_next;
+	}
+	DEV_ADMIN_REGISTRY_UNLOCK(&registry->reg_lock);
+	return 0;
+#else
+	FIXME("dev_admin_registry_find");
+	return(NULL);
+#endif
+}
+/*============= MAIN DEVICE/ DRIVER ADMINISTRATION INTERFACE================ */
+/*
+ * return any labelled info associated with a device.
+ * called by any kernel code including device drivers.
+ */
+char *
+device_admin_info_get(devfs_handle_t	dev_vhdl,
+		      char		*info_lbl)
+{
+#ifdef notyet
+	char		*info = 0;
+
+	/* return value need not be GRAPH_SUCCESS as the labelled
+	 * info may not be present
+	 */
+	(void)hwgraph_info_get_LBL(dev_vhdl,info_lbl,
+				   (arbitrary_info_t *)&info);
+
+	
+	return info;
+#else
+	FIXME("device_admin_info_get");
+	return(NULL);
+#endif
+}
+
+/*
+ * set labelled info associated with a device.
+ * called by hwgraph infrastructure . may also be called
+ * by device drivers etc.
+ */
+int
+device_admin_info_set(devfs_handle_t	dev_vhdl,
+		      char		*dev_info_lbl,
+		      char		*dev_info_val)
+{
+#ifdef notyet
+	graph_error_t		rv;
+	arbitrary_info_t	old_info;
+
+	/* Handle the labelled info
+	 *		intr_target
+	 *		sw_level 
+	 * in a special way. These are part of device_desc_t
+	 * Right now this is the only case where we have 
+	 * a set of related device_admin attributes which 
+	 * are grouped together.
+	 * In case there is a need for another set we need to
+	 * take a more generic approach to solving this.
+	 * Basically a registry should be implemented. This
+	 * registry is initialized with the callbacks for the
+	 * attributes which need to handled in a special way
+	 * For example:
+	 * Consider
+	 * 		device_desc
+	 *			intr_target
+	 *			intr_swlevel
+	 * register "do_intr_target" for intr_target
+	 * register "do_intr_swlevel" for intr_swlevel.
+	 * When the device_admin interface layer gets an <attr,val> pair
+	 * it looks in the registry to see if there is a function registered to
+	 * handle "attr. If not follow the default path of setting the <attr,val>
+	 * as labelled information hanging off the vertex.
+	 * In the above example:
+	 * "do_intr_target" does what is being done below for the ADMIN_LBL_INTR_TARGET
+	 * case
+	 */		
+	if (!strcmp(dev_info_lbl,ADMIN_LBL_INTR_TARGET) ||
+	    !strcmp(dev_info_lbl,ADMIN_LBL_INTR_SWLEVEL)) {
+
+		device_desc_t	device_desc;
+		
+		/* Check if there is a default device descriptor
+		 * information for this vertex. If not dup one .
+		 */
+		if (!(device_desc = device_desc_default_get(dev_vhdl))) {
+			device_desc = device_desc_dup(dev_vhdl);
+			device_desc_default_set(dev_vhdl,device_desc);
+
+		}
+		if (!strcmp(dev_info_lbl,ADMIN_LBL_INTR_TARGET)) {
+			/* Check if a target cpu has been specified
+			 * for this device by a device administration
+			 * directive
+			 */
+#ifdef DEBUG	
+			printf(ADMIN_LBL_INTR_TARGET
+			       " dev = 0x%x "
+			       "dev_admin_info = %s"
+			       " target = 0x%x\n",
+			       dev_vhdl,
+			       dev_info_lbl,
+			       hwgraph_path_to_vertex(dev_info_val));
+#endif	
+
+			device_desc->intr_target = 
+				hwgraph_path_to_vertex(dev_info_val);
+		} else if (!strcmp(dev_info_lbl,ADMIN_LBL_INTR_SWLEVEL)) {
+			/* Check if the ithread priority level  has been 
+			 * specified for this device by a device administration
+			 * directive
+			 */
+#ifdef DEBUG	
+			printf(ADMIN_LBL_INTR_SWLEVEL
+			       " dev = 0x%x "
+			       "dev_admin_info = %s"
+			       " sw level = 0x%x\n",
+			       dev_vhdl,
+			       dev_info_lbl,
+			       atoi(dev_info_val));
+#endif	
+			device_desc->intr_swlevel = atoi(dev_info_val);
+		}
+
+	}
+	if (!dev_info_val)
+		rv = hwgraph_info_remove_LBL(dev_vhdl,
+					     dev_info_lbl,
+					     &old_info);
+	else {
+
+		rv = hwgraph_info_add_LBL(dev_vhdl,
+					  dev_info_lbl,
+					  (arbitrary_info_t)dev_info_val);
+	
+		if (rv == GRAPH_DUP)  {
+			rv = hwgraph_info_replace_LBL(dev_vhdl,
+					      dev_info_lbl,
+					      (arbitrary_info_t)dev_info_val,
+					      &old_info);
+		}
+	}
+	ASSERT(rv == GRAPH_SUCCESS);
+#endif
+	FIXME("device_admin_info_set");
+	return 0;
+}
+
+/*
+ * return labelled info associated with a device driver
+ * called by kernel code including device drivers
+ */
+char *
+device_driver_admin_info_get(char		*driver_prefix,
+			     char		*driver_info_lbl)
+{
+#ifdef notyet
+	device_driver_t driver;
+
+	driver = device_driver_get(driver_prefix);
+	return (dev_admin_registry_find(&driver->dd_dev_admin_registry,
+					driver_info_lbl));
+#else
+	FIXME("device_driver_admin_info_get");
+	return(NULL);
+#endif
+}
+
+/*
+ * set labelled info associated with a device driver.
+ * called by hwgraph infrastructure . may also be called
+ * from drivers etc.
+ */
+int
+device_driver_admin_info_set(char		*driver_prefix,
+			     char		*driver_info_lbl,
+			     char		*driver_info_val)
+{
+#ifdef notyet
+	device_driver_t driver;
+
+	driver = device_driver_get(driver_prefix);
+	dev_admin_registry_add(&driver->dd_dev_admin_registry,	
+			       driver_info_lbl,
+			       driver_info_val);
+#endif
+	FIXME("device_driver_admin_info_set");
+	return 0;
+}
+/*================== device / driver  admin support routines================*/
+
+/* static tables created by lboot */
+extern dev_admin_info_t	dev_admin_table[];
+extern dev_admin_info_t	drv_admin_table[];
+extern int		dev_admin_table_size;
+extern int		drv_admin_table_size;
+
+/* Extend the device admin table to allow the kernel startup code to 
+ * provide some device specific administrative hints
+ */
+#define ADMIN_TABLE_CHUNK	100
+static dev_admin_info_t extended_dev_admin_table[ADMIN_TABLE_CHUNK];	
+static int		extended_dev_admin_table_size = 0;
+static mrlock_t		extended_dev_admin_table_lock;
+
+/* Initialize the extended device admin table */
+void
+device_admin_table_init(void)
+{
+#ifdef notyet
+	extended_dev_admin_table_size = 0;
+	mrinit(&extended_dev_admin_table_lock,
+	       "extended_dev_admin_table_lock");
+#endif
+	FIXME("device_admin_table_init");
+}
+/* Add <device-name , parameter-name , parameter-value> triple to
+ * the extended device administration info table. This is helpful
+ * for kernel startup code to put some hints before the hwgraph
+ * is setup 
+ */
+void
+device_admin_table_update(char *name,char *label,char *value)
+{
+#ifdef notyet
+	dev_admin_info_t	*p;
+
+	mrupdate(&extended_dev_admin_table_lock);
+
+	/* Safety check that we haven't exceeded array limits */
+	ASSERT(extended_dev_admin_table_size < ADMIN_TABLE_CHUNK);
+
+	if (extended_dev_admin_table_size == ADMIN_TABLE_CHUNK)
+		goto out;
+	
+	/* Get the pointer to the entry in the table where we are
+	 * going to put the new information 
+	 */
+	p = &extended_dev_admin_table[extended_dev_admin_table_size++];
+
+	/* Allocate memory for the strings and copy them in */
+	p->dai_name = (char *)kern_calloc(1,strlen(name)+1);
+	strcpy(p->dai_name,name);
+	p->dai_param_name = (char *)kern_calloc(1,strlen(label)+1);
+	strcpy(p->dai_param_name,label);
+	p->dai_param_val = (char *)kern_calloc(1,strlen(value)+1);
+	strcpy(p->dai_param_val,value);
+
+out:	mrunlock(&extended_dev_admin_table_lock);
+#endif
+	FIXME("device_admin_table_update");
+}
+/* Extend the device driver  admin table to allow the kernel startup code to 
+ * provide some device driver specific administrative hints
+ */
+
+static dev_admin_info_t extended_drv_admin_table[ADMIN_TABLE_CHUNK];	
+static int		extended_drv_admin_table_size = 0;
+mrlock_t		extended_drv_admin_table_lock;
+
+/* Initialize the extended device driver admin table */
+void
+device_driver_admin_table_init(void)
+{
+#ifdef notyet
+	extended_drv_admin_table_size = 0;
+	mrinit(&extended_drv_admin_table_lock,
+	       "extended_drv_admin_table_lock");
+#endif
+	FIXME("device_driver_admin_table_init");
+}
+/* Add <device-driver prefix , parameter-name , parameter-value> triple to
+ * the extended device administration info table. This is helpful
+ * for kernel startup code to put some hints before the hwgraph
+ * is setup 
+ */
+void
+device_driver_admin_table_update(char *name,char *label,char *value)
+{
+#ifdef notyet
+	dev_admin_info_t	*p;
+
+	mrupdate(&extended_dev_admin_table_lock);
+
+	/* Safety check that we haven't exceeded array limits */
+	ASSERT(extended_drv_admin_table_size < ADMIN_TABLE_CHUNK);
+
+	if (extended_drv_admin_table_size == ADMIN_TABLE_CHUNK)
+		goto out;
+	
+	/* Get the pointer to the entry in the table where we are
+	 * going to put the new information 
+	 */
+	p = &extended_drv_admin_table[extended_drv_admin_table_size++];
+
+	/* Allocate memory for the strings and copy them in */
+	p->dai_name = (char *)kern_calloc(1,strlen(name)+1);
+	strcpy(p->dai_name,name);
+	p->dai_param_name = (char *)kern_calloc(1,strlen(label)+1);
+	strcpy(p->dai_param_name,label);
+	p->dai_param_val = (char *)kern_calloc(1,strlen(value)+1);
+	strcpy(p->dai_param_val,value);
+
+out:	mrunlock(&extended_drv_admin_table_lock);
+#endif
+	FIXME("device_driver_admin_table_update");
+}
+/*
+ * keeps on adding the labelled info for each new (lbl,value) pair
+ * that it finds in the static dev admin table (  created by lboot)
+ * and the extended dev admin table ( created if at all by the kernel startup
+ *  code) corresponding to a device in the hardware graph.
+ */
+void
+device_admin_info_update(devfs_handle_t	dev_vhdl)
+{
+#ifdef notyet
+	int			i = 0;
+	dev_admin_info_t	*scan;
+	devfs_handle_t		scan_vhdl;
+	
+	/* Check the static device administration info table */
+	scan = dev_admin_table;
+	while (i < dev_admin_table_size) {
+		
+		scan_vhdl = hwgraph_path_to_dev(scan->dai_name);
+		if (scan_vhdl == dev_vhdl) {
+			device_admin_info_set(dev_vhdl,
+					      scan->dai_param_name,
+					      scan->dai_param_val);
+		}
+		if (scan_vhdl != NODEV)
+			hwgraph_vertex_unref(scan_vhdl);
+		scan++;i++;
+
+	}
+	i = 0;
+	/* Check the extended device administration info table */
+	scan = extended_dev_admin_table;
+	while (i < extended_dev_admin_table_size) {
+		scan_vhdl = hwgraph_path_to_dev(scan->dai_name);
+		if (scan_vhdl == dev_vhdl) {
+			device_admin_info_set(dev_vhdl,
+					      scan->dai_param_name,
+					      scan->dai_param_val);
+		}
+		if (scan_vhdl != NODEV)
+			hwgraph_vertex_unref(scan_vhdl);
+		scan++;i++;
+
+	}
+
+
+#endif
+	FIXME("device_admin_info_update");
+}
+
+/* looks up the static drv admin table ( created by the lboot) and the extended
+ * drv admin table (created if at all by the kernel startup code) 
+ * for this driver specific administration info and adds it to the admin info 
+ * associated with this device driver's object
+ */
+void
+device_driver_admin_info_update(device_driver_t	driver)
+{
+#ifdef notyet
+	int			i = 0;
+	dev_admin_info_t	*scan;
+
+	/* Check the static device driver administration info table */
+	scan = drv_admin_table;
+	while (i < drv_admin_table_size) {
+
+		if (strcmp(scan->dai_name,driver->dd_prefix) == 0) {
+			dev_admin_registry_add(&driver->dd_dev_admin_registry,
+						scan->dai_param_name,
+					 	scan->dai_param_val);
+		}
+		scan++;i++;
+	}
+	i = 0;
+	/* Check the extended device driver administration info table */
+	scan = extended_drv_admin_table;
+	while (i < extended_drv_admin_table_size) {
+
+		if (strcmp(scan->dai_name,driver->dd_prefix) == 0) {
+			dev_admin_registry_add(&driver->dd_dev_admin_registry,
+						scan->dai_param_name,
+					 	scan->dai_param_val);
+		}
+		scan++;i++;
+	}
+#endif
+	FIXME("device_driver_admin_info_update");
+}
+
+/* =====Device Driver Support===== */
+
+
+
+/*
+** Generic device driver support routines for use by kernel modules that
+** deal with device drivers (but NOT for use by the drivers themselves).
+** EVERY registered driver currently in the system -- static or loadable --
+** has an entry in the device_driver_hash table.  A pointer to such an entry
+** serves as a generic device driver handle.
+*/
+
+#define DEVICE_DRIVER_HASH_SIZE 32
+#ifdef notyet
+lock_t device_driver_lock[DEVICE_DRIVER_HASH_SIZE];
+device_driver_t device_driver_hash[DEVICE_DRIVER_HASH_SIZE];
+static struct string_table driver_prefix_string_table;
+#endif
+
+/*
+** Initialize device driver infrastructure.
+*/
+void
+device_driver_init(void)
+{
+#ifdef notyet
+	int i;
+	extern void alenlist_init(void);
+	extern void hwgraph_init(void);
+	extern void device_desc_init(void);
+
+	ASSERT(DEVICE_DRIVER_NONE == NULL);
+	alenlist_init();
+	hwgraph_init();
+	device_desc_init();
+
+	string_table_init(&driver_prefix_string_table);
+
+	for (i=0; i<DEVICE_DRIVER_HASH_SIZE; i++) {
+		spinlock_init(&device_driver_lock[i], "devdrv");
+		device_driver_hash[i] = NULL;
+	}
+
+	/* Initialize static drivers from master.c table */
+	for (i=0; i<static_devsw_count; i++) {
+		device_driver_t driver;
+		static_device_driver_desc_t desc;
+		int pri;
+
+		desc = &static_device_driver_table[i];
+		driver = device_driver_get(desc->sdd_prefix);
+		if (!driver)
+			driver = device_driver_alloc(desc->sdd_prefix);
+		pri = device_driver_sysgen_thread_pri_get(desc->sdd_prefix);
+		device_driver_thread_pri_set(driver, pri);
+		device_driver_devsw_put(driver, desc->sdd_bdevsw, desc->sdd_cdevsw);
+	}
+#endif
+	FIXME("device_driver_init");
+}
+
+/*
+** Hash a prefix string into a hash table chain.
+*/
+static int
+driver_prefix_hash(char *prefix)
+{
+#ifdef notyet
+	int accum = 0;
+	char nextchar;
+
+	while (nextchar = *prefix++)
+		accum = accum ^ nextchar;
+
+	return(accum % DEVICE_DRIVER_HASH_SIZE);
+#else
+	FIXME("driver_prefix_hash");
+	return(0);
+#endif
+}
+
+
+/*
+** Allocate a driver handle.
+** Returns the driver handle, or NULL if the driver prefix 
+** already has a handle.
+** 
+** Upper layers prevent races among device_driver_alloc,
+** device_driver_free, and device_driver_get*.
+*/
+device_driver_t
+device_driver_alloc(char *prefix)
+{
+#ifdef notyet
+	int which_hash;
+	device_driver_t new_driver;
+	int s;
+		
+	which_hash = driver_prefix_hash(prefix);
+
+	new_driver = kern_calloc(1, sizeof(*new_driver));
+	ASSERT(new_driver != NULL);
+	new_driver->dd_prev = NULL;
+	new_driver->dd_prefix = string_table_insert(&driver_prefix_string_table, prefix);
+	new_driver->dd_bdevsw = NULL;
+	new_driver->dd_cdevsw = NULL;
+
+	dev_admin_registry_init(&new_driver->dd_dev_admin_registry);
+	device_driver_admin_info_update(new_driver);
+
+	s = mutex_spinlock(&device_driver_lock[which_hash]);
+
+#if DEBUG
+	{
+		device_driver_t drvscan;
+
+		/* Make sure we haven't already added a driver with this prefix */
+		drvscan = device_driver_hash[which_hash];
+		while (drvscan && 
+		        strcmp(drvscan->dd_prefix, prefix)) {
+			drvscan = drvscan->dd_next;
+		}
+
+		ASSERT(!drvscan);
+	}
+#endif /* DEBUG */
+
+
+	/* Add new_driver to front of hash chain. */
+	new_driver->dd_next = device_driver_hash[which_hash];
+	if (new_driver->dd_next)
+		new_driver->dd_next->dd_prev = new_driver;
+	device_driver_hash[which_hash] = new_driver;
+
+	mutex_spinunlock(&device_driver_lock[which_hash], s);
+
+	return(new_driver);
+#else
+	FIXME("device_driver_alloc");
+	return((device_driver_t)0);
+#endif
+}
+
+/*
+** Free a driver handle.
+**
+** Statically loaded drivers should never device_driver_free.
+** Dynamically loaded drivers device_driver_free when either an
+** unloaded driver is unregistered, or when an unregistered driver
+** is unloaded.
+*/
+void
+device_driver_free(device_driver_t driver)
+{
+#ifdef notyet
+	int which_hash;
+	int s;
+
+	if (!driver)
+		return;
+
+	which_hash = driver_prefix_hash(driver->dd_prefix);
+
+	s = mutex_spinlock(&device_driver_lock[which_hash]);
+
+#if DEBUG
+	{
+		device_driver_t drvscan;
+
+		/* Make sure we're dealing with the right list */
+		drvscan = device_driver_hash[which_hash];
+		while (drvscan && (drvscan != driver))
+			drvscan = drvscan->dd_next;
+
+		ASSERT(drvscan);
+	}
+#endif /* DEBUG */
+
+	if (driver->dd_next)
+		driver->dd_next->dd_prev = driver->dd_prev;
+
+	if (driver->dd_prev)
+		driver->dd_prev->dd_next = driver->dd_next;
+	else
+		device_driver_hash[which_hash] = driver->dd_next;
+
+	mutex_spinunlock(&device_driver_lock[which_hash], s);
+
+	driver->dd_next = NULL;		/* sanity */
+	driver->dd_prev = NULL;		/* sanity */
+	driver->dd_prefix = NULL;	/* sanity */
+
+	if (driver->dd_bdevsw) {
+		driver->dd_bdevsw->d_driver = NULL;
+		driver->dd_bdevsw = NULL;
+	}
+
+	if (driver->dd_cdevsw) {
+		if (driver->dd_cdevsw->d_str) {
+			str_free_mux_node(driver);
+		}
+		driver->dd_cdevsw->d_driver = NULL;
+		driver->dd_cdevsw = NULL;
+	}
+
+	kern_free(driver);
+#endif
+	FIXME("device_driver_free");
+}
+
+
+/*
+** Given a device driver prefix, return a handle to the caller.
+*/
+device_driver_t
+device_driver_get(char *prefix)
+{
+#ifdef notyet
+	int which_hash;
+	device_driver_t drvscan;
+	int s;
+
+	if (prefix == NULL)
+		return(NULL);
+		
+	which_hash = driver_prefix_hash(prefix);
+
+	s = mutex_spinlock(&device_driver_lock[which_hash]);
+
+	drvscan = device_driver_hash[which_hash];
+	while (drvscan && strcmp(drvscan->dd_prefix, prefix))
+		drvscan = drvscan->dd_next;
+
+	mutex_spinunlock(&device_driver_lock[which_hash], s);
+
+	return(drvscan);
+#else
+	FIXME("device_driver_get");
+	return((device_driver_t)0);
+#endif
+}
+
+
+/*
+** Given a block or char special file devfs_handle_t, find the 
+** device driver that controls it.
+*/
+device_driver_t
+device_driver_getbydev(devfs_handle_t device)
+{
+#ifdef notyet
+	struct bdevsw *my_bdevsw;
+	struct cdevsw *my_cdevsw;
+
+	my_cdevsw = get_cdevsw(device);
+	if (my_cdevsw != NULL)
+		return(my_cdevsw->d_driver);
+
+	my_bdevsw = get_bdevsw(device);
+	if (my_bdevsw != NULL)
+		return(my_bdevsw->d_driver);
+
+#endif
+	FIXME("device_driver_getbydev");
+	return((device_driver_t)0);
+}
+
+
+/*
+** Associate a driver with bdevsw/cdevsw pointers.
+**
+** Statically loaded drivers are permanently and automatically associated
+** with the proper bdevsw/cdevsw.  Dynamically loaded drivers associate
+** themselves when the driver is registered, and disassociate when the
+** driver unregisters.
+**
+** Returns 0 on success, -1 on failure (devsw already associated with driver)
+*/
+int
+device_driver_devsw_put(device_driver_t driver,
+			struct bdevsw *my_bdevsw,
+			struct cdevsw *my_cdevsw)
+{
+#ifdef notyet
+	int i;
+
+	if (!driver)
+		return(-1);
+
+	/* Trying to re-register data?  */
+	if (((my_bdevsw != NULL) && (driver->dd_bdevsw != NULL)) ||
+	    ((my_cdevsw != NULL) && (driver->dd_cdevsw != NULL)))
+		return(-1);
+
+	if (my_bdevsw != NULL) {
+		driver->dd_bdevsw = my_bdevsw;
+		my_bdevsw->d_driver = driver;
+		for (i = 0; i < bdevmax; i++) {
+			if (driver->dd_bdevsw->d_flags == bdevsw[i].d_flags) {
+				bdevsw[i].d_driver = driver;
+				break;
+			}
+		}
+	}
+
+	if (my_cdevsw != NULL) {
+		driver->dd_cdevsw = my_cdevsw;
+		my_cdevsw->d_driver = driver;
+		for (i = 0; i < cdevmax; i++) {
+			if (driver->dd_cdevsw->d_flags == cdevsw[i].d_flags) {
+				cdevsw[i].d_driver = driver;
+				break;
+			}
+		}
+	}
+#endif
+	FIXME("device_driver_devsw_put");
+	return(0);
+}
+
+
+/*
+** Given a driver, return the corresponding bdevsw and cdevsw pointers.
+*/
+void
+device_driver_devsw_get(	device_driver_t driver, 
+				struct bdevsw **bdevswp,
+				struct cdevsw **cdevswp)
+{
+	if (!driver) {
+		*bdevswp = NULL;
+		*cdevswp = NULL;
+	} else {
+		*bdevswp = driver->dd_bdevsw;
+		*cdevswp = driver->dd_cdevsw;
+	}
+}
+
+/*
+ * device_driver_thread_pri_set
+ *	Given a driver try to set its thread priority.
+ *	Returns 0 on success , -1 on failure.
+ */ 
+int
+device_driver_thread_pri_set(device_driver_t driver,ilvl_t pri)
+{
+	if (!driver)
+		return(-1);
+	driver->dd_thread_pri = pri;
+	return(0);
+}
+/*
+ * device_driver_thread_pri_get
+ * 	Given a driver return the driver thread priority.
+ * 	If the driver is NULL return invalid driver thread
+ * 	priority.
+ */
+ilvl_t
+device_driver_thread_pri_get(device_driver_t driver)
+{
+	if (driver)
+		return(driver->dd_thread_pri);
+	else
+		return(DRIVER_THREAD_PRI_INVALID);
+}
+/*
+** Given a device driver, return it's handle (prefix).
+*/
+void
+device_driver_name_get(device_driver_t driver, char *buffer, int length)
+{
+	if (driver == NULL)
+		return;
+
+	strncpy(buffer, driver->dd_prefix, length);
+}
+
+
+/*
+** Associate a pointer-sized piece of information with a device.
+*/
+void 
+device_info_set(devfs_handle_t device, void *info)
+{
+#ifdef notyet
+	hwgraph_fastinfo_set(device, (arbitrary_info_t)info);
+#endif
+	FIXME("device_info_set");
+}
+
+
+/*
+** Retrieve a pointer-sized piece of information associated with a device.
+*/
+void *
+device_info_get(devfs_handle_t device)
+{
+#ifdef notyet
+	return((void *)hwgraph_fastinfo_get(device));
+#else
+	FIXME("device_info_get");
+	return(NULL);
+#endif
+}
+
+/*
+ * Find the thread priority for a device, from the various
+ * sysgen files.
+ */
+int
+device_driver_sysgen_thread_pri_get(char *dev_prefix)
+{
+#ifdef notyet
+	int pri;
+	char *pri_s;
+	char *class;
+
+	extern default_intr_pri;
+	extern disk_intr_pri;
+	extern serial_intr_pri;
+	extern parallel_intr_pri;
+	extern tape_intr_pri;
+	extern graphics_intr_pri;
+	extern network_intr_pri;
+	extern scsi_intr_pri;
+	extern audio_intr_pri;
+	extern video_intr_pri;
+	extern external_intr_pri;
+	extern tserialio_intr_pri;
+
+	/* Check if there is a thread priority specified for
+	 * this driver's thread thru admin hints. If so 
+	 * use that value. Otherwise set it to its default
+	 * class value, otherwise set it to the default
+	 * value.
+	 */
+
+	if (pri_s = device_driver_admin_info_get(dev_prefix,
+						ADMIN_LBL_THREAD_PRI)) {
+		pri = atoi(pri_s);
+	} else if (class = device_driver_admin_info_get(dev_prefix,
+						ADMIN_LBL_THREAD_CLASS)) {
+		if (strcmp(class, "disk") == 0)
+			pri = disk_intr_pri;
+		else if (strcmp(class, "serial") == 0)
+			pri = serial_intr_pri;
+		else if (strcmp(class, "parallel") == 0)
+			pri = parallel_intr_pri;
+		else if (strcmp(class, "tape") == 0)
+			pri = tape_intr_pri;
+		else if (strcmp(class, "graphics") == 0)
+			pri = graphics_intr_pri;
+		else if (strcmp(class, "network") == 0)
+			pri = network_intr_pri;
+		else if (strcmp(class, "scsi") == 0)
+			pri = scsi_intr_pri;
+		else if (strcmp(class, "audio") == 0)
+			pri = audio_intr_pri;
+		else if (strcmp(class, "video") == 0)
+			pri = video_intr_pri;
+		else if (strcmp(class, "external") == 0)
+			pri = external_intr_pri;
+		else if (strcmp(class, "tserialio") == 0)
+			pri = tserialio_intr_pri;
+		else
+			pri = default_intr_pri;
+	} else
+		pri = default_intr_pri;
+
+	if (pri > 255)
+		pri = 255;
+	else if (pri < 0)
+		pri = 0;
+	return pri;
+#else
+	FIXME("device_driver_sysgen_thread_pri_get");
+	return(-1);
+#endif
+}
diff --git a/arch/ia64/sn/io/eeprom.c b/arch/ia64/sn/io/eeprom.c
new file mode 100644
index 000000000..6a22aac93
--- /dev/null
+++ b/arch/ia64/sn/io/eeprom.c
@@ -0,0 +1,1457 @@
+/*
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com)
+ */
+
+
+/*
+ * WARNING:     There is more than one copy of this file in different isms.
+ *              All copies must be kept exactly in sync.
+ *              Do not modify this file without also updating the following:
+ *
+ *              irix/kern/io/eeprom.c
+ *              stand/arcs/lib/libsk/ml/eeprom.c
+ *		stand/arcs/lib/libkl/io/eeprom.c
+ *
+ *      (from time to time they might not be in sync but that's due to bringup
+ *       activity - this comment is to remind us that they eventually have to
+ *       get back together)
+ *
+ * eeprom.c
+ *
+ * access to board-mounted EEPROMs via the L1 system controllers
+ *
+ */
+
+/**************************************************************************
+ *                                                                        *
+ *  Copyright (C) 1999 Silicon Graphics, Inc.                             *
+ *                                                                        *
+ *  These coded instructions, statements, and computer programs  contain  *
+ *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
+ *  are protected by Federal copyright law.  They  may  not be disclosed  *
+ *  to  third  parties  or copied or duplicated in any form, in whole or  *
+ *  in part, without the prior written consent of Silicon Graphics, Inc.  *
+ *                                                                        *
+ **************************************************************************
+ */
+
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/eeprom.h>
+#include <asm/sn/ksys/i2c.h>
+#include <asm/sn/cmn_err.h>
+/* #include <sys/SN/SN1/ip27log.h> */
+#include <asm/sn/router.h>
+#include <asm/sn/module.h>
+#include <asm/sn/ksys/l1.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/clksupport.h>
+
+#if defined(EEPROM_DEBUG)
+#define db_printf(x) printk x
+#else
+#define db_printf(x) printk x
+#endif
+
+#define BCOPY(x,y,z)	memcpy(y,x,z)
+
+#define UNDERSCORE	0	/* don't convert underscores to hyphens */
+#define HYPHEN		1	/* convert underscores to hyphens */
+
+void		copy_ascii_field( char *to, char *from, int length,
+			          int change_underscore );
+uint64_t	generate_unique_id( char *sn, int sn_len );
+uchar_t		char_to_base36( char c );
+int		nicify( char *dst, eeprom_brd_record_t *src );
+static void	int64_to_hex_string( char *out, uint64_t val );
+
+// extern int router_lock( net_vec_t, int, int );
+// extern int router_unlock( net_vec_t );
+#define ROUTER_LOCK(p) 	// router_lock(p, 10000, 3000000)
+#define ROUTER_UNLOCK(p) 	// router_unlock(p)
+
+#define IP27LOG_OVNIC           "OverrideNIC"
+
+
+/* the following function converts an EEPROM record to a close facsimile
+ * of the string returned by reading a Dallas Semiconductor NIC (see
+ * one of the many incarnations of nic.c for details on that driver)
+ */
+int nicify( char *dst, eeprom_brd_record_t *src )
+{
+    int field_len;
+    uint64_t unique_id;
+    char *cur_dst = dst;
+    eeprom_board_ia_t   *board;
+
+    board   = src->board_ia;
+    ASSERT( board );  /* there should always be a board info area */
+
+    /* copy part number */
+    strcpy( cur_dst, "Part:" );
+    cur_dst += strlen( cur_dst );
+    ASSERT( (board->part_num_tl & FIELD_FORMAT_MASK)
+	    == FIELD_FORMAT_ASCII );
+    field_len = board->part_num_tl & FIELD_LENGTH_MASK;
+    copy_ascii_field( cur_dst, board->part_num, field_len, HYPHEN );
+    cur_dst += field_len;
+
+    /* copy product name */
+    strcpy( cur_dst, ";Name:" );
+    cur_dst += strlen( cur_dst );
+    ASSERT( (board->product_tl & FIELD_FORMAT_MASK) == FIELD_FORMAT_ASCII );
+    field_len = board->product_tl & FIELD_LENGTH_MASK;
+    copy_ascii_field( cur_dst, board->product, field_len, UNDERSCORE );
+    cur_dst += field_len;
+
+    /* copy serial number */
+    strcpy( cur_dst, ";Serial:" );
+    cur_dst += strlen( cur_dst );
+    ASSERT( (board->serial_num_tl & FIELD_FORMAT_MASK)
+	    == FIELD_FORMAT_ASCII );
+    field_len = board->serial_num_tl & FIELD_LENGTH_MASK;
+    copy_ascii_field( cur_dst, board->serial_num, field_len,
+		      HYPHEN);
+
+    cur_dst += field_len;
+
+    /* copy revision */
+    strcpy( cur_dst, ";Revision:");
+    cur_dst += strlen( cur_dst );
+    ASSERT( (board->board_rev_tl & FIELD_FORMAT_MASK)
+	    == FIELD_FORMAT_ASCII );
+    field_len = board->board_rev_tl & FIELD_LENGTH_MASK;
+    copy_ascii_field( cur_dst, board->board_rev, field_len, HYPHEN );
+    cur_dst += field_len;
+
+    /* EEPROMs don't have equivalents for the Group, Capability and
+     * Variety fields, so we pad these with 0's
+     */
+    strcpy( cur_dst, ";Group:ff;Capability:ffffffff;Variety:ff" );
+    cur_dst += strlen( cur_dst );
+
+    /* use the board serial number to "fake" a laser id */
+    strcpy( cur_dst, ";Laser:" );
+    cur_dst += strlen( cur_dst );
+    unique_id = generate_unique_id( board->serial_num,
+				    board->serial_num_tl & FIELD_LENGTH_MASK );
+    int64_to_hex_string( cur_dst, unique_id );
+    strcat( dst, ";" );
+
+    return 1;
+}
+
+
+/* These functions borrow heavily from chars2* in nic.c
+ */
+void copy_ascii_field( char *to, char *from, int length,
+		       int change_underscore )
+{
+    int i;
+    for( i = 0; i < length; i++ ) {
+
+	/* change underscores to hyphens if requested */
+	if( from[i] == '_' && change_underscore == HYPHEN )
+	    to[i] = '-';
+
+	/* ; and ; are separators, so mustn't appear within
+	 * a field */
+	else if( from[i] == ':' || from[i] == ';' )
+	    to[i] = '?';
+
+	/* I'm not sure why or if ASCII character 0xff would
+	 * show up in an EEPROM field, but the NIC parsing
+	 * routines wouldn't like it if it did... so we
+	 * get rid of it, just in case. */
+	else if( (unsigned char)from[i] == (unsigned char)0xff )
+	    to[i] = ' ';
+	
+	/* unprintable characters are replaced with . */
+	else if( from[i] < ' ' || from[i] >= 0x7f )
+	    to[i] = '.';
+
+	/* otherwise, just copy the character */
+	else
+	    to[i] = from[i];
+    }
+
+    if( i == 0 ) {
+	to[i] = ' '; /* return at least a space... */
+	i++;
+    }
+    to[i] = 0;	     /* terminating null */
+}
+
+/* Note that int64_to_hex_string currently only has a big-endian
+ * implementation.
+ */
+#ifdef _MIPSEB
+static void int64_to_hex_string( char *out, uint64_t val )
+{
+    int i;
+    uchar_t table[] = "0123456789abcdef";
+    uchar_t *byte_ptr = (uchar_t *)&val;
+    for( i = 0; i < sizeof(uint64_t); i++ ) {
+	out[i*2] = table[ ((*byte_ptr) >> 4) & 0x0f ];
+	out[i*2+1] = table[ (*byte_ptr) & 0x0f ];
+	byte_ptr++;
+    }
+    out[i*2] = '\0';
+}
+
+#else /* little endian */
+
+static void int64_to_hex_string( char *out, uint64_t val )
+{
+
+
+	printk("int64_to_hex_string needs a little-endian implementation.\n");
+}
+#endif /* _MIPSEB */
+
+/* Convert a standard ASCII serial number to a unique integer
+ * id number by treating the serial number string as though
+ * it were a base 36 number
+ */
+uint64_t generate_unique_id( char *sn, int sn_len )
+{
+    int uid = 0;
+    int i;
+
+    #define VALID_BASE36(c)	((c >= '0' && c <='9') \
+			    ||   (c >= 'A' && c <='Z') \
+			    ||   (c >= 'a' && c <='z'))
+
+    for( i = 0; i < sn_len; i++ ) {
+	if( !VALID_BASE36(sn[i]) )
+	    continue;
+	uid *= 36;
+	uid += char_to_base36( sn[i] );
+    }
+
+    if( uid == 0 )
+	return rtc_time();
+
+    return uid;
+}
+
+uchar_t char_to_base36( char c )
+{
+    uchar_t val;
+
+    if( c >= '0' && c <= '9' )
+	val = (c - '0');
+
+    else if( c >= 'A' && c <= 'Z' )
+	val = (c - 'A' + 10);
+
+    else if( c >= 'a' && c <= 'z' )
+	val = (c - 'a' + 10);
+
+    else val = 0;
+
+    return val;
+}
+
+
+/* given a pointer to the three-byte little-endian EEPROM representation
+ * of date-of-manufacture, this function translates to a big-endian
+ * integer format
+ */
+int eeprom_xlate_board_mfr_date( uchar_t *src )
+{
+    int rval = 0;
+    rval += *src; src++;
+    rval += ((int)(*src) << 8); src ++;
+    rval += ((int)(*src) << 16);
+    return rval;
+}
+
+
+int eeprom_str( char *nic_str, nasid_t nasid, int component )
+{
+    eeprom_brd_record_t eep;
+    eeprom_board_ia_t board;
+    eeprom_chassis_ia_t chassis;
+    int r;
+
+    if( (component & C_DIMM) == C_DIMM ) {
+	/* this function isn't applicable to DIMMs */
+	return EEP_PARAM;
+    }
+    else {
+	eep.board_ia = &board;
+	eep.spd = NULL;
+	if( !(component & SUBORD_MASK) )
+	    eep.chassis_ia = &chassis;  /* only main boards have a chassis
+					 * info area */
+	else
+	    eep.chassis_ia = NULL;
+    }
+    
+    switch( component & BRICK_MASK ) {
+      case C_BRICK:
+	r = cbrick_eeprom_read( &eep, nasid, component );
+	break;
+      case IO_BRICK:
+	r = iobrick_eeprom_read( &eep, nasid, component );
+	break;
+      default:
+	return EEP_PARAM;  /* must be an invalid component */
+    }
+    if( r )
+	return r;
+    if( !nicify( nic_str, &eep ) )
+	return EEP_NICIFY;
+
+    return EEP_OK;
+}
+
+int vector_eeprom_str( char *nic_str, nasid_t nasid,
+		       int component, net_vec_t path )
+{
+    eeprom_brd_record_t eep;
+    eeprom_board_ia_t board;
+    eeprom_chassis_ia_t chassis;
+    int r;
+
+    eep.board_ia = &board;
+    if( !(component & SUBORD_MASK) )
+        eep.chassis_ia = &chassis;  /* only main boards have a chassis
+                                     * info area */
+    else
+        eep.chassis_ia = NULL;
+
+    if( !(component & VECTOR) )
+	return EEP_PARAM;
+
+    if( (r = vector_eeprom_read( &eep, nasid, path, component )) )
+	return r;
+
+    if( !nicify( nic_str, &eep ) )
+        return EEP_NICIFY;
+
+    return EEP_OK;
+}
+
+
+int is_iobrick( int nasid, int widget_num )
+{
+    uint32_t wid_reg;
+    int part_num, mfg_num;
+
+    /* Read the widget's WIDGET_ID register to get
+     * its part number and mfg number
+     */
+    wid_reg = *(volatile int32_t *)
+        (NODE_SWIN_BASE( nasid, widget_num ) + WIDGET_ID);
+
+    part_num = (wid_reg & WIDGET_PART_NUM) >> WIDGET_PART_NUM_SHFT;
+    mfg_num = (wid_reg & WIDGET_MFG_NUM) >> WIDGET_MFG_NUM_SHFT;
+
+    /* Is this the "xbow part" of an XBridge?  If so, this
+     * widget is definitely part of an I/O brick.
+     */
+    if( part_num == XXBOW_WIDGET_PART_NUM &&
+	mfg_num == XXBOW_WIDGET_MFGR_NUM )
+
+	return 1;
+
+    /* Is this a "bridge part" of an XBridge?  If so, once
+     * again, we know this widget is part of an I/O brick.
+     */
+    if( part_num == XBRIDGE_WIDGET_PART_NUM &&
+	mfg_num == XBRIDGE_WIDGET_MFGR_NUM )
+
+	return 1;
+
+    return 0;
+}
+
+
+int cbrick_uid_get( nasid_t nasid, uint64_t *uid )
+{
+#if !defined(CONFIG_SERIAL_SGI_L1_PROTOCOL)
+    return EEP_L1;
+#else
+    char uid_str[32];
+    char msg[BRL1_QSIZE];
+    int subch, len;
+    l1sc_t sc;
+    l1sc_t *scp;
+    int local = (nasid == get_nasid());
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+	return EEP_L1;
+
+    /* If the promlog variable pointed to by IP27LOG_OVNIC is set,
+     * use that value for the cbrick UID rather than the EEPROM
+     * serial number.
+     */
+#ifdef LOG_GETENV
+    if( ip27log_getenv( nasid, IP27LOG_OVNIC, uid_str, NULL, 0 ) >= 0 )
+    {
+	/* We successfully read IP27LOG_OVNIC, so return it as the UID. */
+	db_printf(( "cbrick_uid_get:"
+		    "Overriding UID with environment variable %s\n", 
+		    IP27LOG_OVNIC ));
+	*uid = strtoull( uid_str, NULL, 0 );
+	return EEP_OK;
+    }
+#endif
+
+    /* If this brick is retrieving its own uid, use the local l1sc_t to
+     * arbitrate access to the l1; otherwise, set up a new one.
+     */
+    if( local ) {
+	scp = get_l1sc();
+    }
+    else {
+	scp = &sc;
+	sc_init( &sc, nasid, BRL1_LOCALUART );
+    }
+
+    /* fill in msg with the opcode & params */
+    BZERO( msg, BRL1_QSIZE );
+    if( (subch = sc_open( scp, L1_ADDR_LOCAL )) < 0 )
+	return EEP_L1;
+
+    if( (len = sc_construct_msg( scp, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_SER_NUM, 0 )) < 0 )
+    {
+	sc_close( scp, subch );
+	return( EEP_L1 );
+    }
+
+    /* send the request to the L1 */
+    if( sc_command( scp, subch, msg, msg, &len ) ) {
+	sc_close( scp, subch );
+	return( EEP_L1 );
+    }
+
+    /* free up subchannel */
+    sc_close(scp, subch);
+
+    /* check response */
+    if( sc_interpret_resp( msg, 2, L1_ARG_ASCII, uid_str ) < 0 )
+    {
+	return( EEP_L1 );
+    }
+
+    *uid = generate_unique_id( uid_str, strlen( uid_str ) );
+
+    return EEP_OK;
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
+
+
+int rbrick_uid_get( nasid_t nasid, net_vec_t path, uint64_t *uid )
+{
+#if !defined(CONFIG_SERIAL_SGI_L1_PROTOCOL)
+    return EEP_L1;
+#else
+    char uid_str[32];
+    char msg[BRL1_QSIZE];
+    int subch, len;
+    l1sc_t sc;
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+	return EEP_L1;
+
+#ifdef BRINGUP
+#define FAIL								\
+    {									\
+	*uid = rtc_time();						\
+	printk( "rbrick_uid_get failed; using current time as uid\n" );	\
+	return EEP_OK;							\
+    }
+#endif /* BRINGUP */
+
+    ROUTER_LOCK(path);
+    sc_init( &sc, nasid, path );
+
+    /* fill in msg with the opcode & params */
+    BZERO( msg, BRL1_QSIZE );
+    if( (subch = sc_open( &sc, L1_ADDR_LOCAL )) < 0 ) {
+	ROUTER_UNLOCK(path);
+	FAIL;
+    }
+
+    if( (len = sc_construct_msg( &sc, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_SER_NUM, 0 )) < 0 )
+    {
+	ROUTER_UNLOCK(path);
+	sc_close( &sc, subch );
+	FAIL;
+    }
+
+    /* send the request to the L1 */
+    if( sc_command( &sc, subch, msg, msg, &len ) ) {
+	ROUTER_UNLOCK(path);
+	sc_close( &sc, subch );
+	FAIL;
+    }
+
+    /* free up subchannel */
+    ROUTER_UNLOCK(path);
+    sc_close(&sc, subch);
+
+    /* check response */
+    if( sc_interpret_resp( msg, 2, L1_ARG_ASCII, uid_str ) < 0 )
+    {
+	FAIL;
+    }
+
+    *uid = generate_unique_id( uid_str, strlen( uid_str ) );
+
+    return EEP_OK;
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
+
+int iobrick_uid_get( nasid_t nasid, uint64_t *uid )
+{
+    eeprom_brd_record_t eep;
+    eeprom_board_ia_t board;
+    eeprom_chassis_ia_t chassis;
+    int r;
+
+    eep.board_ia = &board;
+    eep.chassis_ia = &chassis;
+    eep.spd = NULL;
+
+    r = iobrick_eeprom_read( &eep, nasid, IO_BRICK );
+    if( r != EEP_OK ) {
+        *uid = rtc_time();
+        return r;
+    }
+
+    *uid = generate_unique_id( board.serial_num,
+                               board.serial_num_tl & FIELD_LENGTH_MASK );
+
+    return EEP_OK;
+}
+
+
+int ibrick_mac_addr_get( nasid_t nasid, char *eaddr )
+{
+    eeprom_brd_record_t eep;
+    eeprom_board_ia_t board;
+    eeprom_chassis_ia_t chassis;
+    int r;
+    char *tmp;
+
+    eep.board_ia = &board;
+    eep.chassis_ia = &chassis;
+    eep.spd = NULL;
+
+    r = iobrick_eeprom_read( &eep, nasid, IO_BRICK );
+    if( (r != EEP_OK) || (board.mac_addr[0] == '\0') ) {
+	db_printf(( "ibrick_mac_addr_get: "
+		    "Couldn't read MAC address from EEPROM\n" ));
+	return EEP_L1;
+    }
+    else {
+	/* successfully read info area */
+	int ix;
+	tmp = board.mac_addr;
+	for( ix = 0; ix < (board.mac_addr_tl & FIELD_LENGTH_MASK); ix++ )
+	{
+	    *eaddr++ = *tmp++;
+	}
+	*eaddr = '\0';
+    }
+
+    return EEP_OK;
+}
+
+
+/* 
+ * eeprom_vertex_info_set
+ *
+ * Given a vertex handle, a component designation, a starting nasid
+ * and (in the case of a router) a vector path to the component, this
+ * function will read the EEPROM and attach the resulting information
+ * to the vertex in the same string format as that provided by the
+ * Dallas Semiconductor NIC drivers.  If the vertex already has the
+ * string, this function just returns the string.
+ */
+
+extern char *nic_vertex_info_get( devfs_handle_t );
+extern void nic_vmc_check( devfs_handle_t, char * );
+#ifdef BRINGUP
+/* the following were lifted from nic.c - change later? */
+#define MAX_INFO 2048
+#define NEWSZ(ptr,sz)   ((ptr) = kern_malloc((sz)))
+#define DEL(ptr) (kern_free((ptr)))
+#endif /* BRINGUP */
+
+char *eeprom_vertex_info_set( int component, int nasid, devfs_handle_t v,
+                              net_vec_t path )
+{
+        char *info_tmp;
+        int info_len;
+        char *info;
+
+        /* see if this vertex is already marked */
+        info_tmp = nic_vertex_info_get(v);
+        if (info_tmp) return info_tmp;
+
+        /* get a temporary place for the data */
+        NEWSZ(info_tmp, MAX_INFO);
+        if (!info_tmp) return NULL;
+
+        /* read the EEPROM */
+	if( component & R_BRICK ) {
+	    if( RBRICK_EEPROM_STR( info_tmp, nasid, path ) != EEP_OK )
+		return NULL;
+	}
+	else {
+            if( eeprom_str( info_tmp, nasid, component ) != EEP_OK )
+	        return NULL;
+	}
+
+        /* allocate a smaller final place */
+        info_len = strlen(info_tmp)+1;
+        NEWSZ(info, info_len);
+        if (info) {
+                strcpy(info, info_tmp);
+                DEL(info_tmp);
+        } else {
+                info = info_tmp;
+        }
+
+        /* add info to the vertex */
+        hwgraph_info_add_LBL(v, INFO_LBL_NIC,
+                             (arbitrary_info_t) info);
+
+        /* see if someone else got there first */
+        info_tmp = nic_vertex_info_get(v);
+        if (info != info_tmp) {
+            DEL(info);
+            return info_tmp;
+        }
+
+        /* export the data */
+        hwgraph_info_export_LBL(v, INFO_LBL_NIC, info_len);
+
+        /* trigger all matching callbacks */
+        nic_vmc_check(v, info);
+
+        return info;
+}
+
+
+/*********************************************************************
+ *
+ * stubs for use until the Bedrock/L1 link is available
+ *
+ */
+
+#include <asm/sn/nic.h>
+
+/* #define EEPROM_TEST */
+
+/* fake eeprom reading functions (replace when the BR/L1 communication
+ * channel is in working order)
+ */
+
+
+/* generate a charater in [0-9A-Z]; if an "extra" character is
+ * specified (such as '_'), include it as one of the possibilities.
+ */
+char random_eeprom_ch( char extra ) 
+{
+    char ch;
+    int modval = 36;
+    if( extra )
+	modval++;
+    
+    ch = rtc_time() % modval;
+
+    if( ch < 10 )
+        ch += '0';
+    else if( ch >= 10 && ch < 36 )
+	ch += ('A' - 10);
+    else
+	ch = extra;
+
+    return ch;
+}
+
+/* create a part number of the form xxx-xxxx-xxx.
+ * It may be important later to generate different
+ * part numbers depending on the component we're
+ * supposed to be "reading" from, so the component
+ * paramter is provided.
+ */
+void fake_a_part_number( char *buf, int component )
+{
+    int i;
+    switch( component ) {
+
+    /* insert component-specific routines here */
+
+    case C_BRICK:
+	strcpy( buf, "030-1266-001" );
+	break;
+    default:
+        for( i = 0; i < 12; i++ ) {
+	    if( i == 3 || i == 8 )
+	        buf[i] = '-';
+	    else
+	        buf[i] = random_eeprom_ch(0);
+        }
+    }
+}
+
+
+/* create a six-character serial number */
+void fake_a_serial_number( char *buf, uint64_t ser )
+{
+    int i;
+    static const char hexchars[] = "0123456789ABCDEF";
+
+    if (ser) {
+	for( i = 5; i >=0; i-- ) {
+	    buf[i] = hexchars[ser & 0xf];
+	    ser >>= 4;
+	}
+    }
+    else {
+	for( i = 0; i < 6; i++ )
+	    buf[i] = random_eeprom_ch(0);
+    }
+}
+
+
+void fake_a_product_name( uchar_t *format, char* buf, int component )
+{
+    switch( component & BRICK_MASK ) {
+
+    case C_BRICK:
+	if( component & SUBORD_MASK ) {
+	    strcpy( buf, "C_BRICK_SUB" );
+	    *format = 0xCB;
+	}
+	else {
+	    strcpy( buf, "IP35" );
+	    *format = 0xC4;
+	}
+	break;
+
+    case R_BRICK:
+        if( component & SUBORD_MASK ) {
+            strcpy( buf, "R_BRICK_SUB" );
+            *format = 0xCB;
+        }
+        else {
+            strcpy( buf, "R_BRICK" );
+            *format = 0xC7;
+        }
+        break;
+
+    case IO_BRICK:
+        if( component & SUBORD_MASK ) {
+            strcpy( buf, "IO_BRICK_SUB" );
+            *format = 0xCC;
+        }
+        else {
+            strcpy( buf, "IO_BRICK" );
+            *format = 0xC8;
+        }
+        break;
+
+    default:
+	strcpy( buf, "UNK_DEVICE" );
+	*format = 0xCA;
+    }
+}
+
+
+
+int fake_an_eeprom_record( eeprom_brd_record_t *buf, int component, 
+			   uint64_t ser )
+{
+    eeprom_board_ia_t *board;
+    eeprom_chassis_ia_t *chassis;
+    int i, cs;
+
+    board = buf->board_ia;
+    chassis = buf->chassis_ia;
+
+    if( !(component & SUBORD_MASK) ) {
+	if( !chassis )
+	    return EEP_PARAM;
+	chassis->format = 0;
+	chassis->length = 5;
+	chassis->type = 0x17;
+
+	chassis->part_num_tl = 0xCC;
+	fake_a_part_number( chassis->part_num, component );
+	chassis->serial_num_tl = 0xC6;
+	fake_a_serial_number( chassis->serial_num, ser );
+
+	cs = chassis->format + chassis->length + chassis->type
+	    + chassis->part_num_tl + chassis->serial_num_tl;
+	for( i = 0; i < (chassis->part_num_tl & FIELD_LENGTH_MASK); i++ )
+	    cs += chassis->part_num[i];
+	for( i = 0; i < (chassis->serial_num_tl & FIELD_LENGTH_MASK); i++ )
+	    cs += chassis->serial_num[i];
+	chassis->checksum = 256 - (cs % 256);
+    }
+
+    if( !board )
+	return EEP_PARAM;
+    board->format = 0;
+    board->length = 10;
+    board->language = 0;
+    board->mfg_date = 1789200; /* noon, 5/26/99 */
+    board->manuf_tl = 0xC3;
+    strcpy( board->manuf, "SGI" );
+
+    fake_a_product_name( &(board->product_tl), board->product, component );
+
+    board->serial_num_tl = 0xC6;
+    fake_a_serial_number( board->serial_num, ser );
+
+    board->part_num_tl = 0xCC;
+    fake_a_part_number( board->part_num, component );
+
+    board->board_rev_tl = 0xC2;
+    board->board_rev[0] = '0';
+    board->board_rev[1] = '1';
+
+    board->eeprom_size_tl = 0x01;
+    board->eeprom_size = 1;
+
+    board->temp_waiver_tl = 0xC2;
+    board->temp_waiver[0] = '0';
+    board->temp_waiver[1] = '1';
+
+    cs = board->format + board->length + board->language
+	+ (board->mfg_date & 0xFF)
+	+ (board->mfg_date & 0xFF00)
+	+ (board->mfg_date & 0xFF0000)
+	+ board->manuf_tl + board->product_tl + board->serial_num_tl
+	+ board->part_num_tl + board->board_rev_tl
+	+ board->board_rev[0] + board->board_rev[1]
+	+ board->eeprom_size_tl + board->eeprom_size + board->temp_waiver_tl
+	+ board->temp_waiver[0] + board->temp_waiver[1];
+    for( i = 0; i < (board->manuf_tl & FIELD_LENGTH_MASK); i++ )
+	cs += board->manuf[i];
+    for( i = 0; i < (board->product_tl & FIELD_LENGTH_MASK); i++ )
+	cs += board->product[i];
+    for( i = 0; i < (board->serial_num_tl & FIELD_LENGTH_MASK); i++ )
+	cs += board->serial_num[i];
+    for( i = 0; i < (board->part_num_tl & FIELD_LENGTH_MASK); i++ )
+	cs += board->part_num[i];
+    
+    board->checksum = 256 - (cs % 256);
+
+    return EEP_OK;
+}
+
+#define EEPROM_CHUNKSIZE	64
+
+#if defined(EEPROM_DEBUG)
+#define RETURN_ERROR							\
+{									\
+    printk( "read_ia error return, component 0x%x, line %d"		\
+	    ", address 0x%x, ia code 0x%x\n",				\
+	    l1_compt, __LINE__, sc->subch[subch].target, ia_code );	\
+    return EEP_L1;							\
+}
+
+#else
+#define RETURN_ERROR	return(EEP_L1)
+#endif
+
+int read_ia( l1sc_t *sc, int subch, int l1_compt, 
+	     int ia_code, char *eep_record )
+{
+#if !defined(CONFIG_SERIAL_SGI_L1_PROTOCOL)
+    return EEP_L1;
+#else
+    char msg[BRL1_QSIZE]; 	   /* message buffer */
+    int len;              	   /* number of bytes used in message buffer */
+    int ia_len = EEPROM_CHUNKSIZE; /* remaining bytes in info area */
+    int offset = 0;                /* current offset into info area */
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+	return EEP_L1;
+
+    BZERO( msg, BRL1_QSIZE );
+
+    /* retrieve EEPROM data in 64-byte chunks
+     */
+
+    while( ia_len )
+    {
+	/* fill in msg with opcode & params */
+	if( (len = sc_construct_msg( sc, subch, msg, BRL1_QSIZE,
+				     L1_ADDR_TASK_GENERAL,
+				     L1_REQ_EEPROM, 8,
+				     L1_ARG_INT, l1_compt,
+				     L1_ARG_INT, ia_code,
+				     L1_ARG_INT, offset,
+				     L1_ARG_INT, ia_len )) < 0 )
+	{
+	    RETURN_ERROR;
+	}
+
+	/* send the request to the L1 */
+
+	if( sc_command( sc, subch, msg, msg, &len ) ) {
+	    RETURN_ERROR;
+	}
+
+	/* check response */
+	if( sc_interpret_resp( msg, 5, 
+			       L1_ARG_INT, &ia_len,
+			       L1_ARG_UNKNOWN, &len, eep_record ) < 0 )
+	{
+	    RETURN_ERROR;
+	}
+
+	if( ia_len > EEPROM_CHUNKSIZE )
+	    ia_len = EEPROM_CHUNKSIZE;
+
+	eep_record += EEPROM_CHUNKSIZE;
+	offset += EEPROM_CHUNKSIZE;
+    }
+
+    return EEP_OK;
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
+
+
+int read_spd( l1sc_t *sc, int subch, int l1_compt,
+	      eeprom_spd_u *spd )
+{
+#if !defined(CONFIG_SERIAL_SGI_L1_PROTOCOL)
+    return EEP_L1;
+#else
+    char msg[BRL1_QSIZE]; 	    /* message buffer */
+    int len;              	    /* number of bytes used in message buffer */
+    int spd_len = EEPROM_CHUNKSIZE; /* remaining bytes in spd record */
+    int offset = 0;		    /* current offset into spd record */
+    char *spd_p = spd->bytes;	    /* "thumb" for writing to spd */
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+	return EEP_L1;
+
+    BZERO( msg, BRL1_QSIZE );
+
+    /* retrieve EEPROM data in 64-byte chunks
+     */
+
+    while( spd_len )
+    {
+	/* fill in msg with opcode & params */
+	if( (len = sc_construct_msg( sc, subch, msg, BRL1_QSIZE,
+				     L1_ADDR_TASK_GENERAL,
+				     L1_REQ_EEPROM, 8,
+				     L1_ARG_INT, l1_compt,
+				     L1_ARG_INT, L1_EEP_SPD,
+				     L1_ARG_INT, offset,
+				     L1_ARG_INT, spd_len )) < 0 )
+	{
+	    return( EEP_L1 );
+	}
+
+	/* send the request to the L1 */
+	if( sc_command( sc, subch, msg, msg, &len ) ) {
+	    return( EEP_L1 );
+	}
+
+	/* check response */
+	if( sc_interpret_resp( msg, 5, 
+			       L1_ARG_INT, &spd_len,
+			       L1_ARG_UNKNOWN, &len, spd_p ) < 0 )
+	{
+	    return( EEP_L1 );
+	}
+
+	if( spd_len > EEPROM_CHUNKSIZE )
+	    spd_len = EEPROM_CHUNKSIZE;
+
+	spd_p += EEPROM_CHUNKSIZE;
+	offset += EEPROM_CHUNKSIZE;
+    }
+    return EEP_OK;
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
+
+
+int read_chassis_ia( l1sc_t *sc, int subch, int l1_compt,
+		     eeprom_chassis_ia_t *ia )
+{
+    char eep_record[512];          /* scratch area for building up info area */
+    char *eep_rec_p = eep_record;  /* thumb for moving through eep_record */
+    int checksum = 0;              /* use to verify eeprom record checksum */
+    int i;
+
+    /* Read in info area record from the L1.
+     */
+    if( read_ia( sc, subch, l1_compt, L1_EEP_CHASSIS, eep_record )
+	!= EEP_OK )
+    {
+	return EEP_L1;
+    }
+
+    /* Now we've got the whole info area.  Transfer it to the data structure.
+     */
+
+    eep_rec_p = eep_record;
+    ia->format = *eep_rec_p++;
+    ia->length = *eep_rec_p++;
+    if( ia->length == 0 ) {
+	/* since we're using 8*ia->length-1 as an array index later, make
+	 * sure it's sane.
+	 */
+	db_printf(( "read_chassis_ia: eeprom length byte of ZERO\n" ));
+	return EEP_L1;
+    }
+    ia->type = *eep_rec_p++;
+   
+    ia->part_num_tl = *eep_rec_p++;
+
+    (void)BCOPY( eep_rec_p, ia->part_num, (ia->part_num_tl & FIELD_LENGTH_MASK) );
+    eep_rec_p += (ia->part_num_tl & FIELD_LENGTH_MASK);
+
+    ia->serial_num_tl = *eep_rec_p++;
+
+    BCOPY( eep_rec_p, ia->serial_num, 
+	   (ia->serial_num_tl & FIELD_LENGTH_MASK) );
+    eep_rec_p += (ia->serial_num_tl & FIELD_LENGTH_MASK);
+
+    ia->checksum = eep_record[(8 * ia->length) - 1];
+
+    /* verify checksum */
+    eep_rec_p = eep_record;
+    checksum = 0;
+    for( i = 0; i < (8 * ia->length); i++ ) {
+	checksum += *eep_rec_p++;
+    }
+
+    if( (checksum & 0xff) != 0 )
+    {
+	db_printf(( "read_chassis_ia: bad checksum\n" ));
+	db_printf(( "read_chassis_ia: target 0x%x  uart 0x%x\n",
+			   sc->subch[subch].target, sc->uart ));
+	return EEP_BAD_CHECKSUM;
+    }
+
+    return EEP_OK;
+}
+
+
+int read_board_ia( l1sc_t *sc, int subch, int l1_compt,
+		   eeprom_board_ia_t *ia )
+{
+    char eep_record[512];          /* scratch area for building up info area */
+    char *eep_rec_p = eep_record;  /* thumb for moving through eep_record */
+    int checksum = 0;              /* running checksum total */
+    int i;
+
+    BZERO( ia, sizeof( eeprom_board_ia_t ) );
+
+    /* Read in info area record from the L1.
+     */
+    if( read_ia( sc, subch, l1_compt, L1_EEP_BOARD, eep_record )
+	!= EEP_OK )
+    {
+	db_printf(( "read_board_ia: error reading info area from L1\n" ));
+	return EEP_L1;
+    }
+
+     /* Now we've got the whole info area.  Transfer it to the data structure.
+      */
+
+    eep_rec_p = eep_record;
+    ia->format = *eep_rec_p++;
+    ia->length = *eep_rec_p++;
+    if( ia->length == 0 ) {
+	/* since we're using 8*ia->length-1 as an array index later, make
+	 * sure it's sane.
+	 */
+	db_printf(( "read_board_ia: eeprom length byte of ZERO\n" ));
+	return EEP_L1;
+    }
+    ia->language = *eep_rec_p++;
+    
+    ia->mfg_date = eeprom_xlate_board_mfr_date( (uchar_t *)eep_rec_p );
+    eep_rec_p += 3;
+
+    ia->manuf_tl = *eep_rec_p++;
+    
+    BCOPY( eep_rec_p, ia->manuf, (ia->manuf_tl & FIELD_LENGTH_MASK) );
+    eep_rec_p += (ia->manuf_tl & FIELD_LENGTH_MASK);
+
+    ia->product_tl = *eep_rec_p++;
+    
+    BCOPY( eep_rec_p, ia->product, (ia->product_tl & FIELD_LENGTH_MASK) );
+    eep_rec_p += (ia->product_tl & FIELD_LENGTH_MASK);
+
+    ia->serial_num_tl = *eep_rec_p++;
+    
+    BCOPY(eep_rec_p, ia->serial_num, (ia->serial_num_tl & FIELD_LENGTH_MASK));
+    eep_rec_p += (ia->serial_num_tl & FIELD_LENGTH_MASK);
+
+    ia->part_num_tl = *eep_rec_p++;
+
+    BCOPY( eep_rec_p, ia->part_num, (ia->part_num_tl & FIELD_LENGTH_MASK) );
+    eep_rec_p += (ia->part_num_tl & FIELD_LENGTH_MASK);
+
+    eep_rec_p++; /* we do not use the FRU file id */
+    
+    ia->board_rev_tl = *eep_rec_p++;
+    
+    BCOPY( eep_rec_p, ia->board_rev, (ia->board_rev_tl & FIELD_LENGTH_MASK) );
+    eep_rec_p += (ia->board_rev_tl & FIELD_LENGTH_MASK);
+
+    ia->eeprom_size_tl = *eep_rec_p++;
+    ia->eeprom_size = *eep_rec_p++;
+
+    ia->temp_waiver_tl = *eep_rec_p++;
+    
+    BCOPY( eep_rec_p, ia->temp_waiver, 
+	   (ia->temp_waiver_tl & FIELD_LENGTH_MASK) );
+    eep_rec_p += (ia->temp_waiver_tl & FIELD_LENGTH_MASK);
+
+    /* if there's more, we must be reading a main board; get
+     * additional fields
+     */
+    if( ((unsigned char)*eep_rec_p != (unsigned char)EEPROM_EOF) ) {
+
+	ia->ekey_G_tl = *eep_rec_p++;
+	BCOPY( eep_rec_p, (char *)&ia->ekey_G, 
+	       ia->ekey_G_tl & FIELD_LENGTH_MASK );
+	eep_rec_p += (ia->ekey_G_tl & FIELD_LENGTH_MASK);
+	
+	ia->ekey_P_tl = *eep_rec_p++;
+	BCOPY( eep_rec_p, (char *)&ia->ekey_P, 
+	       ia->ekey_P_tl & FIELD_LENGTH_MASK );
+	eep_rec_p += (ia->ekey_P_tl & FIELD_LENGTH_MASK);
+	
+	ia->ekey_Y_tl = *eep_rec_p++;
+	BCOPY( eep_rec_p, (char *)&ia->ekey_Y, 
+	       ia->ekey_Y_tl & FIELD_LENGTH_MASK );
+	eep_rec_p += (ia->ekey_Y_tl & FIELD_LENGTH_MASK);
+	
+	/* 
+	 * need to get a couple more fields if this is an I brick 
+	 */
+	if( ((unsigned char)*eep_rec_p != (unsigned char)EEPROM_EOF) ) {
+
+	    ia->mac_addr_tl = *eep_rec_p++;
+	    BCOPY( eep_rec_p, ia->mac_addr, 
+		   ia->mac_addr_tl & FIELD_LENGTH_MASK );
+	    eep_rec_p += (ia->mac_addr_tl & FIELD_LENGTH_MASK);
+	    
+	    ia->ieee1394_cfg_tl = *eep_rec_p++;
+	    BCOPY( eep_rec_p, ia->ieee1394_cfg,
+		   ia->ieee1394_cfg_tl & FIELD_LENGTH_MASK );
+	    
+	}
+    }
+
+    ia->checksum = eep_record[(ia->length * 8) - 1];
+
+    /* verify checksum */
+    eep_rec_p = eep_record;
+    checksum = 0;
+    for( i = 0; i < (8 * ia->length); i++ ) {
+	checksum += *eep_rec_p++;
+    }
+
+    if( (checksum & 0xff) != 0 )
+    {
+	db_printf(( "read_board_ia: bad checksum\n" ));
+	db_printf(( "read_board_ia: target 0x%x  uart 0x%x\n",
+		    sc->subch[subch].target, sc->uart ));
+	return EEP_BAD_CHECKSUM;
+    }
+
+    return EEP_OK;
+}
+
+
+int _cbrick_eeprom_read( eeprom_brd_record_t *buf, l1sc_t *scp,
+			 int component )
+{
+#if !defined(CONFIG_SERIAL_SGI_L1_PROTOCOL)
+    return EEP_L1;
+#else
+    int r;
+    uint64_t uid = 0;
+    char uid_str[32];
+    int l1_compt, subch;
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+	return EEP_L1;
+
+    /* make sure we're targeting a cbrick */
+    if( !(component & C_BRICK) )
+	return EEP_PARAM;
+
+    /* If the promlog variable pointed to by IP27LOG_OVNIC is set,
+     * use that value for the cbrick UID rather than the EEPROM
+     * serial number.
+     */
+#ifdef LOG_GETENV
+    if( ip27log_getenv( scp->nasid, IP27LOG_OVNIC, uid_str, "0", 0 ) >= 0 )
+    {
+	db_printf(( "_cbrick_eeprom_read: "
+		    "Overriding UID with environment variable %s\n", 
+		    IP27LOG_OVNIC ));
+	uid = strtoull( uid_str, NULL, 0 );
+    }
+#endif
+
+    if( (subch = sc_open( scp, L1_ADDR_LOCAL )) < 0 )
+	return EEP_L1;
+
+    switch( component )
+    {
+      case C_BRICK:
+	/* c-brick motherboard */
+	l1_compt = L1_EEP_NODE;
+	r = read_chassis_ia( scp, subch, l1_compt, buf->chassis_ia );
+	if( r != EEP_OK ) {
+	    sc_close( scp, subch );
+	    db_printf(( "_cbrick_eeprom_read: using a fake eeprom record\n" ));
+	    return fake_an_eeprom_record( buf, component, uid );
+	}
+	if( uid ) {
+	    /* If IP27LOG_OVNIC is set, we want to put that value
+	     * in as our UID. */
+	    fake_a_serial_number( buf->chassis_ia->serial_num, uid );
+	    buf->chassis_ia->serial_num_tl = 6;
+	}
+	break;
+
+      case C_PIMM:
+	/* one of the PIMM boards */
+	l1_compt = L1_EEP_PIMM( component & COMPT_MASK );
+	break;
+
+      case C_DIMM:
+	/* one of the DIMMs */
+	l1_compt = L1_EEP_DIMM( component & COMPT_MASK );
+	r = read_spd( scp, subch, l1_compt, buf->spd );
+	sc_close( scp, subch );
+	return r;
+
+      default:
+	/* unsupported board type */
+	sc_close( scp, subch );
+	return EEP_PARAM;
+    }
+	      
+    r = read_board_ia( scp, subch, l1_compt, buf->board_ia );
+    sc_close( scp, subch );
+    if( r != EEP_OK ) 
+    {
+	db_printf(( "_cbrick_eeprom_read: using a fake eeprom record\n" ));
+	return fake_an_eeprom_record( buf, component, uid );
+    }
+    return EEP_OK;
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
+
+
+int cbrick_eeprom_read( eeprom_brd_record_t *buf, nasid_t nasid,
+    		        int component )
+{
+#if !defined(CONFIG_SERIAL_SGI_L1_PROTOCOL)
+    return EEP_L1;
+#else
+    l1sc_t *scp;
+    int local = (nasid == get_nasid());
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+	return EEP_L1;
+
+    /* If this brick is retrieving its own uid, use the local l1sc_t to
+     * arbitrate access to the l1; otherwise, set up a new one (prom) or
+     * use an existing remote l1sc_t (kernel)
+     */
+    if( local ) {
+	scp = get_l1sc();
+    }
+    else {
+	elsc_t *get_elsc(void);
+	scp = get_elsc();
+    }
+
+    return _cbrick_eeprom_read( buf, scp, component );
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
+
+
+int iobrick_eeprom_read( eeprom_brd_record_t *buf, nasid_t nasid,
+			 int component )
+{
+#if !defined(CONFIG_SERIAL_SGI_L1_PROTOCOL)
+    return EEP_L1;
+#else
+    int r;
+    int l1_compt, subch;
+    l1sc_t *scp;
+    int local = (nasid == get_nasid());
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+	return EEP_L1;
+
+    /* make sure we're talking to an applicable brick */
+    if( !(component & IO_BRICK) ) {
+	return EEP_PARAM;
+    }
+
+    /* If we're talking to this c-brick's attached io brick, use
+     * the local l1sc_t; otherwise, set up a new one (prom) or
+     * use an existing remote l1sc_t (kernel)
+     */
+    if( local ) {
+	scp = get_l1sc();
+    }
+    else {
+	elsc_t *get_elsc(void);
+	scp = get_elsc();
+    }
+
+    if( (subch = sc_open( scp, L1_ADDR_LOCALIO )) < 0 )
+	return EEP_L1;
+
+
+    switch( component )
+    {
+      case IO_BRICK:
+	/* IO brick motherboard */
+	l1_compt = L1_EEP_LOGIC;
+	r = read_chassis_ia( scp, subch, l1_compt, buf->chassis_ia );
+
+	if( r != EEP_OK ) {
+	    sc_close( scp, subch );
+#ifdef BRINGUP /* Once EEPROMs are universally available, remove this */
+	    r = fake_an_eeprom_record( buf, component, rtc_time() );
+#endif /* BRINGUP */
+	    return r;
+	}
+	break;
+
+      case IO_POWER:
+	/* IO brick power board */
+	l1_compt = L1_EEP_POWER;
+	break;
+
+      default:
+	/* unsupported board type */
+	sc_close( scp, subch );
+	return EEP_PARAM;
+    }
+
+    r = read_board_ia( scp, subch, l1_compt, buf->board_ia );
+    sc_close( scp, subch );
+    if( r != EEP_OK ) {
+	return r;
+    }
+    return EEP_OK;
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */    
+}
+
+
+int vector_eeprom_read( eeprom_brd_record_t *buf, nasid_t nasid,
+			net_vec_t path, int component )
+{
+#if !defined(CONFIG_SERIAL_SGI_L1_PROTOCOL)
+    return EEP_L1;
+#else
+    int r;
+    uint64_t uid = 0;
+    int l1_compt, subch;
+    l1sc_t sc;
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+	return EEP_L1;
+
+    /* make sure we're targeting an applicable brick */
+    if( !(component & VECTOR) )
+	return EEP_PARAM;
+
+    switch( component & BRICK_MASK )
+    {
+      case R_BRICK:
+	ROUTER_LOCK( path );
+	sc_init( &sc, nasid, path );
+
+	if( (subch = sc_open( &sc, L1_ADDR_LOCAL )) < 0 )
+	{
+	    db_printf(( "vector_eeprom_read: couldn't open subch\n" ));
+	    ROUTER_UNLOCK(path);
+	    return EEP_L1;
+	}
+	switch( component )
+	{
+	  case R_BRICK:
+	    /* r-brick motherboard */
+	    l1_compt = L1_EEP_LOGIC;
+    	    r = read_chassis_ia( &sc, subch, l1_compt, buf->chassis_ia );
+	    if( r != EEP_OK ) {
+		sc_close( &sc, subch );
+		ROUTER_UNLOCK( path );
+		printk( "vector_eeprom_read: couldn't get rbrick eeprom info;"
+			" using current time as uid\n" );
+		uid = rtc_time();
+		db_printf(("vector_eeprom_read: using a fake eeprom record\n"));
+		return fake_an_eeprom_record( buf, component, uid );
+	    }
+	    break;
+
+	  case R_POWER:
+	    /* r-brick power board */
+	    l1_compt = L1_EEP_POWER;
+	    break;
+
+	  default:
+	    /* unsupported board type */
+	    sc_close( &sc, subch );
+	    ROUTER_UNLOCK( path );
+	    return EEP_PARAM;
+	}
+	r = read_board_ia( &sc, subch, l1_compt, buf->board_ia );
+	sc_close( &sc, subch );
+	ROUTER_UNLOCK( path );
+	if( r != EEP_OK ) {
+	    db_printf(( "vector_eeprom_read: using a fake eeprom record\n" ));
+	    return fake_an_eeprom_record( buf, component, uid );
+	}
+	return EEP_OK;
+
+      case C_BRICK:
+	sc_init( &sc, nasid, path );
+	return _cbrick_eeprom_read( buf, &sc, component );
+
+      default:
+	/* unsupported brick type */
+	return EEP_PARAM;
+    }
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
diff --git a/arch/ia64/sn/io/hcl.c b/arch/ia64/sn/io/hcl.c
new file mode 100644
index 000000000..295456feb
--- /dev/null
+++ b/arch/ia64/sn/io/hcl.c
@@ -0,0 +1,1506 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ *  hcl - SGI's Hardware Graph compatibility layer.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <asm/sn/sgi.h>
+#include <linux/devfs_fs.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm/io.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+
+#define HCL_NAME "SGI-HWGRAPH COMPATIBILITY DRIVER"
+#define HCL_TEMP_NAME "HCL_TEMP_NAME_USED_FOR_HWGRAPH_VERTEX_CREATE"
+#define HCL_TEMP_NAME_LEN 44 
+#define HCL_VERSION "1.0"
+devfs_handle_t hwgraph_root = NULL;
+
+/*
+ * Debug flag definition.
+ */
+#define OPTION_NONE             0x00
+#define HCL_DEBUG_NONE 0x00000
+#define HCL_DEBUG_ALL  0x0ffff
+#if defined(CONFIG_HCL_DEBUG)
+static unsigned int hcl_debug_init __initdata = HCL_DEBUG_NONE;
+#endif
+static unsigned int hcl_debug = HCL_DEBUG_NONE;
+static unsigned int boot_options = OPTION_NONE;
+
+/*
+ * Some Global definitions.
+ */
+spinlock_t hcl_spinlock;
+devfs_handle_t hcl_handle = NULL;
+
+/*
+ * HCL device driver.
+ * The purpose of this device driver is to provide a facility 
+ * for User Level Apps e.g. hinv, ioconfig etc. an ioctl path 
+ * to manipulate label entries without having to implement
+ * system call interfaces.  This methodology will enable us to 
+ * make this feature module loadable.
+ */
+static int hcl_open(struct inode * inode, struct file * filp)
+{
+	if (hcl_debug) {
+        	printk("HCL: hcl_open called.\n");
+	}
+
+        return(0);
+
+}
+
+static int hcl_close(struct inode * inode, struct file * filp)
+{
+
+	if (hcl_debug) {
+        	printk("HCL: hcl_close called.\n");
+	}
+
+        return(0);
+
+}
+
+static int hcl_ioctl(struct inode * inode, struct file * file,
+        unsigned int cmd, unsigned long arg)
+{
+
+	if (hcl_debug) {
+		printk("HCL: hcl_ioctl called.\n");
+	}
+
+	switch (cmd) {
+		default:
+			if (hcl_debug) {
+				printk("HCL: hcl_ioctl cmd = 0x%x\n", cmd);
+			}
+	}
+
+	return(0);
+
+}
+
+struct file_operations hcl_fops = {
+	NULL,		/* lseek - default */
+	NULL,		/* read - general block-dev read */
+	NULL,		/* write - general block-dev write */
+	NULL,		/* readdir - bad */
+	NULL,		/* poll */
+	hcl_ioctl,      /* ioctl */
+	NULL,		/* mmap */
+	hcl_open,	/* open */
+	NULL,		/* flush */
+	hcl_close,	/* release */
+	NULL,		/* fsync */
+	NULL,		/* fasync */
+	NULL,		/* check_media_change */
+	NULL,		/* revalidate */
+	NULL		/* lock */
+};
+
+
+/*
+ * init_hcl() - Boot time initialization.  Ensure that it is called 
+ *	after devfs has been initialized.
+ *
+ * For now this routine is being called out of devfs/base.c.  Actually 
+ * Not a bad place to be ..
+ *
+ */
+#ifdef MODULE
+int init_module (void)
+#else
+int __init init_hcl(void)
+#endif
+{
+	extern void string_table_init(struct string_table *);
+	extern struct string_table label_string_table;
+	int rv = 0;
+
+	printk ("\n%s: v%s Colin Ngam (cngam@sgi.com)\n",
+		HCL_NAME, HCL_VERSION);
+#if defined(CONFIG_HCL_DEBUG) && !defined(MODULE)
+	hcl_debug = hcl_debug_init;
+	printk ("%s: hcl_debug: 0x%0x\n", HCL_NAME, hcl_debug);
+#endif
+	printk ("\n%s: boot_options: 0x%0x\n", HCL_NAME, boot_options);
+	spin_lock_init(&hcl_spinlock);
+
+	/*
+	 * Create the hwgraph_root on devfs.
+	 */
+	rv = hwgraph_path_add(NULL, "hw", &hwgraph_root);
+	if (rv)
+		printk ("init_hcl: Failed to create hwgraph_root. Error = %d.\n", rv);
+
+	/*
+	 * Create the hcl driver to support inventory entry manipulations.
+	 * By default, it is expected that devfs is mounted on /dev.
+	 *
+	 */
+	hcl_handle = hwgraph_register(hwgraph_root, ".hcl",
+			0, DEVFS_FL_AUTO_DEVNUM,
+			0, 0,
+			S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, 0, 0,
+			&hcl_fops, NULL);
+
+	if (hcl_handle == NULL) {
+		panic("HCL: Unable to create HCL Driver in init_hcl().\n");
+		return(0);
+	}
+
+	/*
+	 * Initialize the HCL string table.
+	 */
+	string_table_init(&label_string_table);
+
+	return(0);
+
+}
+
+
+/*
+ * hcl_setup() - Process boot time parameters if given.
+ *	"hcl="
+ *	This routine gets called only if "hcl=" is given in the 
+ *	boot line and before init_hcl().
+ *
+ *	We currently do not have any boot options .. when we do, 
+ *	functionalities can be added here.
+ *
+ */
+static int __init hcl_setup(char *str)
+{
+    while ( (*str != '\0') && !isspace (*str) )
+    {
+	printk("HCL: Boot time parameter %s\n", str);
+#ifdef CONFIG_HCL_DEBUG
+        if (strncmp (str, "all", 3) == 0) {
+            hcl_debug_init |= HCL_DEBUG_ALL;
+            str += 3;
+        } else 
+        	return 0;
+#endif
+        if (*str != ',') return 0;
+        ++str;
+    }
+
+    return 1;
+
+}
+
+__setup("hcl=", hcl_setup);
+
+
+/*
+ * Set device specific "fast information".
+ *
+ */
+void
+hwgraph_fastinfo_set(devfs_handle_t de, arbitrary_info_t fastinfo)
+{
+
+	if (hcl_debug) {
+		printk("HCL: hwgraph_fastinfo_set handle 0x%p fastinfo %ld\n",
+			de, fastinfo);
+	}
+		
+	labelcl_info_replace_IDX(de, HWGRAPH_FASTINFO, fastinfo, NULL);
+
+}
+
+
+/*
+ * Get device specific "fast information".
+ *
+ */
+arbitrary_info_t
+hwgraph_fastinfo_get(devfs_handle_t de)
+{
+	arbitrary_info_t fastinfo;
+	int rv;
+
+	if (!de) {
+		printk(KERN_WARNING "HCL: hwgraph_fastinfo_get handle given is NULL.\n");
+		return(-1);
+	}
+
+	rv = labelcl_info_get_IDX(de, HWGRAPH_FASTINFO, &fastinfo);
+	if (rv == 0)
+		return(fastinfo);
+
+	return(0);
+}
+
+
+/*
+ * hwgraph_connectpt_set - Sets the connect point handle in de to the 
+ *	given connect_de handle.  By default, the connect point of the 
+ *	devfs node is the parent.  This effectively changes this assumption.
+ */
+int
+hwgraph_connectpt_set(devfs_handle_t de, devfs_handle_t connect_de)
+{
+	int rv;
+
+	if (!de)
+		return(-1);
+
+	rv = labelcl_info_connectpt_set(de, connect_de);
+
+	return(rv);
+}
+
+
+/*
+ * hwgraph_connectpt_get: Returns the entry's connect point  in the devfs 
+ *	tree.
+ */
+devfs_handle_t
+hwgraph_connectpt_get(devfs_handle_t de)
+{
+	int rv;
+	arbitrary_info_t info;
+	devfs_handle_t connect;
+
+	rv = labelcl_info_get_IDX(de, HWGRAPH_CONNECTPT, &info);
+	if (rv != 0) {
+		return(NULL);
+	}
+
+	connect = (devfs_handle_t)info;
+	return(connect);
+
+}
+
+
+/*
+ * hwgraph_mk_dir - Creates a directory entry with devfs.
+ *	Note that a directory entry in devfs can have children 
+ *	but it cannot be a char|block special file.
+ */
+devfs_handle_t
+hwgraph_mk_dir(devfs_handle_t de, const char *name,
+                unsigned int namelen, void *info)
+{
+
+	int rv;
+	labelcl_info_t *labelcl_info = NULL;
+	devfs_handle_t new_devfs_handle = NULL;
+	devfs_handle_t parent = NULL;
+
+	/*
+	 * Create the device info structure for hwgraph compatiblity support.
+	 */
+	labelcl_info = labelcl_info_create();
+	if (!labelcl_info)
+		return(NULL);
+
+	/*
+	 * Create a devfs entry.
+	 */
+	new_devfs_handle = devfs_mk_dir(de, name, (void *)labelcl_info);
+	if (!new_devfs_handle) {
+		labelcl_info_destroy(labelcl_info);
+		return(NULL);
+	}
+
+	/*
+	 * Get the parent handle.
+	 */
+	parent = devfs_get_parent (new_devfs_handle);
+
+	/*
+	 * To provide the same semantics as the hwgraph, set the connect point.
+	 */
+	rv = hwgraph_connectpt_set(new_devfs_handle, parent);
+	if (!rv) {
+		/*
+		 * We need to clean up!
+		 */
+	}
+
+	/*
+	 * If the caller provides a private data pointer, save it in the 
+	 * labelcl info structure(fastinfo).  This can be retrieved via
+	 * hwgraph_fastinfo_get()
+	 */
+	if (info)
+		hwgraph_fastinfo_set(new_devfs_handle, (arbitrary_info_t)info);
+		
+	return(new_devfs_handle);
+
+}
+
+/*
+ * hwgraph_vertex_create - Create a vertex by giving it a temp name.
+ */
+
+/*
+ * hwgraph_path_add - Create a directory node with the given path starting 
+ * from the given devfs_handle_t.
+ */
+extern char * dev_to_name(devfs_handle_t, char *, uint);
+int
+hwgraph_path_add(devfs_handle_t  fromv,
+		 char *path,
+		 devfs_handle_t *new_de)
+{
+
+	unsigned int	namelen = strlen(path);
+	int		rv;
+
+	/*
+	 * We need to handle the case when fromv is NULL ..
+	 * in this case we need to create the path from the 
+	 * hwgraph root!
+	 */
+	if (fromv == NULL)
+		fromv = hwgraph_root;
+
+	/*
+	 * check the entry doesn't already exist, if it does
+	 * then we simply want new_de to point to it (otherwise
+	 * we'll overwrite the existing labelcl_info struct)
+	 */
+	rv = hwgraph_edge_get(fromv, path, new_de);
+	if (rv)	{	/* couldn't find entry so we create it */
+		*new_de = hwgraph_mk_dir(fromv, path, namelen, NULL);
+		if (new_de == NULL)
+			return(-1);
+		else
+			return(0);
+	}
+	else 
+ 		return(0);
+
+}
+
+/*
+ * hwgraph_register  - Creates a file entry with devfs.
+ *	Note that a file entry cannot have children .. it is like a 
+ *	char|block special vertex in hwgraph.
+ */
+devfs_handle_t
+hwgraph_register(devfs_handle_t de, const char *name,
+                unsigned int namelen, unsigned int flags, 
+		unsigned int major, unsigned int minor,
+                umode_t mode, uid_t uid, gid_t gid, 
+		struct file_operations *fops,
+                void *info)
+{
+
+	int rv;
+        void *labelcl_info = NULL;
+        devfs_handle_t new_devfs_handle = NULL;
+	devfs_handle_t parent = NULL;
+
+        /*
+         * Create the labelcl info structure for hwgraph compatiblity support.
+         */
+        labelcl_info = labelcl_info_create();
+        if (!labelcl_info)
+                return(NULL);
+
+        /*
+         * Create a devfs entry.
+         */
+        new_devfs_handle = devfs_register(de, name, flags, major,
+				minor, mode, fops, labelcl_info);
+        if (!new_devfs_handle) {
+                labelcl_info_destroy((labelcl_info_t *)labelcl_info);
+                return(NULL);
+        }
+
+	/*
+	 * Get the parent handle.
+	 */
+	if (de == NULL)
+		parent = devfs_get_parent (new_devfs_handle);
+	else
+		parent = de;
+		
+	/*
+	 * To provide the same semantics as the hwgraph, set the connect point.
+	 */
+	rv = hwgraph_connectpt_set(new_devfs_handle, parent);
+	if (rv) {
+		/*
+		 * We need to clean up!
+		 */
+		printk("HCL: Unable to set the connect point to it's parent 0x%p\n",
+			new_devfs_handle);
+	}
+
+        /*
+         * If the caller provides a private data pointer, save it in the 
+         * labelcl info structure(fastinfo).  This can be retrieved via
+         * hwgraph_fastinfo_get()
+         */
+        if (info)
+                hwgraph_fastinfo_set(new_devfs_handle, (arbitrary_info_t)info);
+
+        return(new_devfs_handle);
+
+}
+
+
+/*
+ * hwgraph_mk_symlink - Create a symbolic link.
+ */
+int
+hwgraph_mk_symlink(devfs_handle_t de, const char *name, unsigned int namelen,
+                unsigned int flags, const char *link, unsigned int linklen, 
+		devfs_handle_t *handle, void *info)
+{
+
+	void *labelcl_info = NULL;
+	int status = 0;
+	devfs_handle_t new_devfs_handle = NULL;
+
+	/*
+	 * Create the labelcl info structure for hwgraph compatiblity support.
+	 */
+	labelcl_info = labelcl_info_create();
+	if (!labelcl_info)
+		return(-1);
+
+	/*
+	 * Create a symbolic link devfs entry.
+	 */
+	status = devfs_mk_symlink(de, name, flags, link,
+				&new_devfs_handle, labelcl_info);
+	if ( (!new_devfs_handle) || (!status) ){
+		labelcl_info_destroy((labelcl_info_t *)labelcl_info);
+		return(-1);
+	}
+
+	/*
+	 * If the caller provides a private data pointer, save it in the 
+	 * labelcl info structure(fastinfo).  This can be retrieved via
+	 * hwgraph_fastinfo_get()
+	 */
+	if (info)
+		hwgraph_fastinfo_set(new_devfs_handle, (arbitrary_info_t)info);
+
+	*handle = new_devfs_handle;
+	return(0);
+
+}
+
+/*
+ * hwgraph_vertex_get_next - this routine returns the next sibbling for the 
+ *	device entry given in de.  If there are no more sibbling, NULL 
+ * 	is returned in next_sibbling.
+ *
+ *	Currently we do not have any protection against de being deleted 
+ *	while it's handle is being held.
+ */
+int
+hwgraph_vertex_get_next(devfs_handle_t *next_sibbling, devfs_handle_t *de)
+{
+	*next_sibbling = devfs_get_next_sibling (*de);
+
+	if (*next_sibbling != NULL)
+		*de = *next_sibbling;
+	return (0);
+}
+
+
+/*
+ * hwgraph_vertex_destroy - Destroy the devfs entry
+ */
+int
+hwgraph_vertex_destroy(devfs_handle_t de)
+{
+
+	void *labelcl_info = NULL;
+
+	labelcl_info = devfs_get_info(de);
+	devfs_unregister(de);
+
+	if (labelcl_info)
+		labelcl_info_destroy((labelcl_info_t *)labelcl_info);
+
+	return(0);
+}
+
+/*
+** See if a vertex has an outgoing edge with a specified name.
+** Vertices in the hwgraph *implicitly* contain these edges:
+**	"." 	refers to "current vertex"
+**	".." 	refers to "connect point vertex"
+**	"char"	refers to current vertex (character device access)
+**	"block"	refers to current vertex (block device access)
+*/
+
+/*
+ * hwgraph_edge_add - This routines has changed from the original conext.
+ * All it does now is to create a symbolic link from "from" to "to".
+ */
+/* ARGSUSED */
+int
+hwgraph_edge_add(devfs_handle_t from, devfs_handle_t to, char *name)
+{
+
+	char *path;
+	int name_start;
+	devfs_handle_t handle = NULL;
+	int rv;
+
+	path = kmalloc(1024, GFP_KERNEL);
+	name_start = devfs_generate_path (to, path, 1024);
+
+	/*
+	 * Otherwise, just create a symlink to the vertex.
+	 * In this case the vertex was previous created with a REAL pathname.
+	 */
+	rv = devfs_mk_symlink (from, (const char *)name, 
+			       DEVFS_FL_DEFAULT, (const char *)&path[name_start],
+			       &handle, NULL);
+
+	name_start = devfs_generate_path (handle, path, 1024);
+	return(rv);
+
+	
+}
+/* ARGSUSED */
+int
+hwgraph_edge_get(devfs_handle_t from, char *name, devfs_handle_t *toptr)
+{
+
+	int namelen = 0;
+	devfs_handle_t target_handle = NULL;
+
+	if (name == NULL)
+		return(-1);
+
+	if (toptr == NULL)
+		return(-1);
+
+	/*
+	 * If the name is "." just return the current devfs entry handle.
+	 */
+	if (!strcmp(name, HWGRAPH_EDGELBL_DOT)) {
+		if (toptr) {
+			*toptr = from;
+		}
+	} else if (!strcmp(name, HWGRAPH_EDGELBL_DOTDOT)) {
+		/*
+		 * Hmmm .. should we return the connect point or parent ..
+		 * see in hwgraph, the concept of parent is the connectpt!
+		 *
+		 * Maybe we should see whether the connectpt is set .. if 
+		 * not just return the parent!
+		 */
+		target_handle = hwgraph_connectpt_get(from);
+		if (target_handle) {
+			/*
+			 * Just return the connect point.
+			 */
+			*toptr = target_handle;
+			return(0);
+		}
+		target_handle = devfs_get_parent(from);
+		*toptr = target_handle;
+
+	} else {
+		/*
+		 * Call devfs to get the devfs entry.
+		 */
+		namelen = (int) strlen(name);
+		target_handle = devfs_find_handle (from, name, 0, 0,
+					0, 1); /* Yes traverse symbolic links */
+		if (target_handle == NULL)
+			return(-1);
+		else
+		*toptr = target_handle;
+	}
+
+	return(0);
+}
+
+
+/*
+ * hwgraph_edge_get_next - Retrieves the next sibbling given the current
+ *	entry number "placeptr".
+ *
+ * 	Allow the caller to retrieve walk through the sibblings of "source" 
+ * 	devfs_handle_t.  The implicit edges "." and ".." is returned first 
+ * 	followed by each of the real children.
+ *
+ *	We may end up returning garbage if another thread perform any deletion 
+ *	in this directory before "placeptr".
+ *
+ */
+/* ARGSUSED */
+int
+hwgraph_edge_get_next(devfs_handle_t source, char *name, devfs_handle_t *target,
+                              uint *placeptr)
+
+{
+
+        uint which_place;
+	unsigned int namelen = 0;
+	const char *tempname = NULL;
+
+        if (placeptr == NULL)
+                return(-1);
+
+        which_place = *placeptr;
+
+again:
+        if (which_place <= HWGRAPH_RESERVED_PLACES) {
+                if (which_place == EDGE_PLACE_WANT_CURRENT) {
+			/*
+			 * Looking for "."
+			 * Return the current devfs handle.
+			 */
+                        if (name != NULL)
+                                strcpy(name, HWGRAPH_EDGELBL_DOT);
+
+                        if (target != NULL) {
+                                *target = source; 
+				/* XXX should incr "source" ref count here if we
+				 * ever implement ref counts */
+                        }
+
+                } else if (which_place == EDGE_PLACE_WANT_CONNECTPT) {
+			/*
+			 * Looking for the connect point or parent.
+			 * If the connect point is set .. it returns the connect point.
+			 * Otherwise, it returns the parent .. will we support 
+			 * connect point?
+			 */
+                        devfs_handle_t connect_point = hwgraph_connectpt_get(source);
+
+                        if (connect_point == NULL) {
+				/*
+				 * No connectpoint set .. either the User
+				 * explicitly NULL it or this node was not 
+				 * created via hcl.
+				 */
+                                which_place++;
+                                goto again;
+                        }
+
+                        if (name != NULL)
+                                strcpy(name, HWGRAPH_EDGELBL_DOTDOT);
+
+                        if (target != NULL)
+                                *target = connect_point;
+
+                } else if (which_place == EDGE_PLACE_WANT_REAL_EDGES) {
+			/* 
+			 * return first "real" entry in directory, and increment
+			 * placeptr.  Next time around we should have 
+			 * which_place > HWGRAPH_RESERVED_EDGES so we'll fall through
+			 * this nested if block.
+			 */
+			*target = devfs_get_first_child(source);
+			if (*target && name) {
+				tempname = devfs_get_name(*target, &namelen);
+				if (tempname && namelen)
+					strcpy(name, tempname);
+			}
+					
+			*placeptr = which_place + 1;
+			return (0);
+                }
+
+                *placeptr = which_place+1;
+                return(0);
+        }
+
+	/*
+	 * walk linked list, (which_place - HWGRAPH_RESERVED_PLACES) times
+	 */
+	{
+		devfs_handle_t	curr;
+		int		i = 0;
+
+		for (curr=devfs_get_first_child(source), i= i+HWGRAPH_RESERVED_PLACES; 
+			curr!=NULL && i<which_place; 
+			curr=devfs_get_next_sibling(curr), i++)
+			;
+		*target = curr;
+		*placeptr = which_place + 1;
+		if (curr && name) {
+			tempname = devfs_get_name(*target, &namelen);
+			printk("hwgraph_edge_get_next: Component name = %s, length = %d\n", tempname, namelen);
+			if (tempname && namelen)
+				strcpy(name, tempname);
+		}
+	}
+	if (target == NULL)
+		return(-1);
+	else
+        	return(0);
+}
+
+/*
+ * hwgraph_info_add_LBL - Adds a new label for the device.  Mark the info_desc
+ *	of the label as INFO_DESC_PRIVATE and store the info in the label.
+ */
+/* ARGSUSED */
+int
+hwgraph_info_add_LBL(	devfs_handle_t de,
+			char *name,
+			arbitrary_info_t info)
+{
+	return(labelcl_info_add_LBL(de, name, INFO_DESC_PRIVATE, info));
+}
+
+/*
+ * hwgraph_info_remove_LBL - Remove the label entry for the device.
+ */
+/* ARGSUSED */
+int
+hwgraph_info_remove_LBL(	devfs_handle_t de,
+				char *name,
+				arbitrary_info_t *old_info)
+{
+	return(labelcl_info_remove_LBL(de, name, NULL, old_info));
+}
+
+/*
+ * hwgraph_info_replace_LBL - replaces an existing label with 
+ *	a new label info value.
+ */
+/* ARGSUSED */
+int
+hwgraph_info_replace_LBL(	devfs_handle_t de,
+				char *name,
+				arbitrary_info_t info,
+				arbitrary_info_t *old_info)
+{
+	return(labelcl_info_replace_LBL(de, name,
+			INFO_DESC_PRIVATE, info,
+			NULL, old_info));
+}
+/*
+ * hwgraph_info_get_LBL - Get and return the info value in the label of the 
+ * 	device.
+ */
+/* ARGSUSED */
+int
+hwgraph_info_get_LBL(	devfs_handle_t de,
+			char *name,
+			arbitrary_info_t *infop)
+{
+	return(labelcl_info_get_LBL(de, name, NULL, infop));
+}
+
+/*
+ * hwgraph_info_get_exported_LBL - Retrieve the info_desc and info pointer 
+ *	of the given label for the device.  The weird thing is that the label 
+ *	that matches the name is return irrespective of the info_desc value!
+ *	Do not understand why the word "exported" is used!
+ */
+/* ARGSUSED */
+int
+hwgraph_info_get_exported_LBL(	devfs_handle_t de,
+				char *name,
+				int *export_info,
+				arbitrary_info_t *infop)
+{
+	int rc;
+	arb_info_desc_t info_desc;
+
+	rc = labelcl_info_get_LBL(de, name, &info_desc, infop);
+	if (rc == 0)
+		*export_info = (int)info_desc;
+
+	return(rc);
+}
+
+/*
+ * hwgraph_info_get_next_LBL - Returns the next label info given the 
+ *	current label entry in place.
+ *
+ *	Once again this has no locking or reference count for protection.
+ *
+ */
+/* ARGSUSED */
+int
+hwgraph_info_get_next_LBL(	devfs_handle_t de,
+				char *buf,
+				arbitrary_info_t *infop,
+				labelcl_info_place_t *place)
+{
+	return(labelcl_info_get_next_LBL(de, buf, NULL, infop, place));
+}
+
+/*
+ * hwgraph_info_export_LBL - Retrieve the specified label entry and modify 
+ *	the info_desc field with the given value in nbytes.
+ */
+/* ARGSUSED */
+int
+hwgraph_info_export_LBL(devfs_handle_t de, char *name, int nbytes)
+{
+	arbitrary_info_t info;
+	int rc;
+
+	if (nbytes == 0)
+		nbytes = INFO_DESC_EXPORT;
+
+	if (nbytes < 0)
+		return(-1);
+
+	rc = labelcl_info_get_LBL(de, name, NULL, &info);
+	if (rc != 0)
+		return(rc);
+
+	rc = labelcl_info_replace_LBL(de, name,
+				nbytes, info, NULL, NULL);
+
+	return(rc);
+}
+
+/*
+ * hwgraph_info_unexport_LBL - Retrieve the given label entry and change the 
+ * label info_descr filed to INFO_DESC_PRIVATE.
+ */
+/* ARGSUSED */
+int
+hwgraph_info_unexport_LBL(devfs_handle_t de, char *name)
+{
+	arbitrary_info_t info;
+	int rc;
+
+	rc = labelcl_info_get_LBL(de, name, NULL, &info);
+	if (rc != 0)
+		return(rc);
+
+	rc = labelcl_info_replace_LBL(de, name,
+				INFO_DESC_PRIVATE, info, NULL, NULL);
+
+	return(rc);
+}
+
+/*
+ * hwgraph_path_lookup - return the handle for the given path.
+ *
+ */
+int
+hwgraph_path_lookup(	devfs_handle_t start_vertex_handle,
+			char *lookup_path,
+			devfs_handle_t *vertex_handle_ptr,
+			char **remainder)
+{
+	*vertex_handle_ptr = devfs_find_handle(start_vertex_handle,	/* start dir */
+					lookup_path,		/* path */
+					0,			/* major */
+					0,			/* minor */
+					0,			/* char | block */
+					1);			/* traverse symlinks */
+	if (*vertex_handle_ptr == NULL)
+		return(-1);
+	else
+		return(0);
+}
+
+/*
+ * hwgraph_traverse - Find and return the devfs handle starting from de.
+ *
+ */
+graph_error_t
+hwgraph_traverse(devfs_handle_t de, char *path, devfs_handle_t *found)
+{
+	/* 
+	 * get the directory entry (path should end in a directory)
+	 */
+
+	*found = devfs_find_handle(de,	/* start dir */
+			    path,	/* path */
+			    0,		/* major */
+			    0,		/* minor */
+			    0,		/* char | block */
+			    1);		/* traverse symlinks */
+	if (*found == NULL)
+		return(GRAPH_NOT_FOUND);
+	else
+		return(GRAPH_SUCCESS);
+}
+
+/*
+ * hwgraph_path_to_vertex - Return the devfs entry handle for the given 
+ *	pathname .. assume traverse symlinks too!.
+ */
+devfs_handle_t
+hwgraph_path_to_vertex(char *path)
+{
+	return(devfs_find_handle(NULL,	/* start dir */
+			path,		/* path */
+		    	0,		/* major */
+		    	0,		/* minor */
+		    	0,		/* char | block */
+		    	1));		/* traverse symlinks */
+}
+
+/*
+ * hwgraph_path_to_dev - Returns the devfs_handle_t of the given path ..
+ *	We only deal with devfs handle and not devfs_handle_t.
+*/
+devfs_handle_t
+hwgraph_path_to_dev(char *path)
+{
+	devfs_handle_t  de;
+
+	de = hwgraph_path_to_vertex(path);
+	return(de);
+}
+
+/*
+ * hwgraph_block_device_get - return the handle of the block device file.
+ *	The assumption here is that de is a directory.
+*/
+devfs_handle_t
+hwgraph_block_device_get(devfs_handle_t de)
+{
+	return(devfs_find_handle(de,		/* start dir */
+			"block",		/* path */
+		    	0,			/* major */
+		    	0,			/* minor */
+		    	DEVFS_SPECIAL_BLK,	/* char | block */
+		    	1));			/* traverse symlinks */
+}
+
+/*
+ * hwgraph_char_device_get - return the handle of the char device file.
+ *      The assumption here is that de is a directory.
+*/
+devfs_handle_t
+hwgraph_char_device_get(devfs_handle_t de)
+{
+	return(devfs_find_handle(de,		/* start dir */
+			"char",			/* path */
+		    	0,			/* major */
+		    	0,			/* minor */
+		    	DEVFS_SPECIAL_CHR,	/* char | block */
+		    	1));			/* traverse symlinks */
+}
+
+/*
+ * hwgraph_cdevsw_get - returns the fops of the given devfs entry.
+ */
+struct file_operations *
+hwgraph_cdevsw_get(devfs_handle_t de)
+{
+	return(devfs_get_ops(de));
+}
+
+/*
+ * hwgraph_bdevsw_get - returns the fops of the given devfs entry.
+*/
+struct file_operations *
+hwgraph_bdevsw_get(devfs_handle_t de)
+{
+	return(devfs_get_ops(de));
+}
+
+/*
+** Inventory is now associated with a vertex in the graph.  For items that
+** belong in the inventory but have no vertex 
+** (e.g. old non-graph-aware drivers), we create a bogus vertex under the 
+** INFO_LBL_INVENT name.
+**
+** For historical reasons, we prevent exact duplicate entries from being added
+** to a single vertex.
+*/
+
+/*
+ * hwgraph_inventory_add - Adds an inventory entry into de.
+ */
+int
+hwgraph_inventory_add(	devfs_handle_t de,
+			int class,
+			int type,
+			major_t controller,
+			minor_t unit,
+			int state)
+{
+	inventory_t *pinv = NULL, *old_pinv = NULL, *last_pinv = NULL;
+	int rv;
+
+	/*
+	 * Add our inventory data to the list of inventory data
+	 * associated with this vertex.
+	 */
+again:
+	/* GRAPH_LOCK_UPDATE(&invent_lock); */
+	rv = labelcl_info_get_LBL(de,
+			INFO_LBL_INVENT,
+			NULL, (arbitrary_info_t *)&old_pinv);
+	if ((rv != LABELCL_SUCCESS) && (rv != LABELCL_NOT_FOUND))
+		goto failure;
+
+	/*
+	 * Seek to end of inventory items associated with this
+	 * vertex.  Along the way, make sure we're not duplicating
+	 * an inventory item (for compatibility with old add_to_inventory)
+	 */
+	for (;old_pinv; last_pinv = old_pinv, old_pinv = old_pinv->inv_next) {
+		if ((int)class != -1 && old_pinv->inv_class != class)
+			continue;
+		if ((int)type != -1 && old_pinv->inv_type != type)
+			continue;
+		if ((int)state != -1 && old_pinv->inv_state != state)
+			continue;
+		if ((int)controller != -1
+		    && old_pinv->inv_controller != controller)
+			continue;
+		if ((int)unit != -1 && old_pinv->inv_unit != unit)
+			continue;
+
+		/* exact duplicate of previously-added inventory item */
+		rv = LABELCL_DUP;
+		goto failure;
+	}
+
+	/* Not a duplicate, so we know that we need to add something. */
+	if (pinv == NULL) {
+		/* Release lock while we wait for memory. */
+		/* GRAPH_LOCK_DONE_UPDATE(&invent_lock); */
+		pinv = (inventory_t *)kmalloc(sizeof(inventory_t), GFP_KERNEL);
+		replace_in_inventory(pinv, class, type, controller, unit, state);
+		goto again;
+	}
+
+	pinv->inv_next = NULL;
+	if (last_pinv) {
+		last_pinv->inv_next = pinv;
+	} else {
+		rv = labelcl_info_add_LBL(de, INFO_LBL_INVENT, 
+			sizeof(inventory_t), (arbitrary_info_t)pinv);
+
+		if (!rv)
+			goto failure;
+	}
+
+	/* GRAPH_LOCK_DONE_UPDATE(&invent_lock); */
+	return(0);
+
+failure:
+	/* GRAPH_LOCK_DONE_UPDATE(&invent_lock); */
+	if (pinv)
+		kfree(pinv);
+	return(rv);
+}
+
+
+/*
+ * hwgraph_inventory_remove - Removes an inventory entry.
+ *
+ *	Remove an inventory item associated with a vertex.   It is the caller's
+ *	responsibility to make sure that there are no races between removing
+ *	inventory from a vertex and simultaneously removing that vertex.
+*/
+int
+hwgraph_inventory_remove(	devfs_handle_t de,
+				int class,
+				int type,
+				major_t controller,
+				minor_t unit,
+				int state)
+{
+	inventory_t *pinv = NULL, *last_pinv = NULL, *next_pinv = NULL;
+	labelcl_error_t rv;
+
+	/*
+	 * We never remove stuff from ".invent" ..
+	 */
+	if (!de)
+		return (-1);
+
+	/*
+	 * Remove our inventory data to the list of inventory data
+	 * associated with this vertex.
+	 */
+	/* GRAPH_LOCK_UPDATE(&invent_lock); */
+	rv = labelcl_info_get_LBL(de,
+			INFO_LBL_INVENT,
+			NULL, (arbitrary_info_t *)&pinv);
+	if (rv != LABELCL_SUCCESS)
+		goto failure;
+
+	/*
+	 * Search through inventory items associated with this
+	 * vertex, looking for a match.
+	 */
+	for (;pinv; pinv = next_pinv) {
+		next_pinv = pinv->inv_next;
+
+		if(((int)class == -1 || pinv->inv_class == class) &&
+		   ((int)type == -1 || pinv->inv_type == type) &&
+		   ((int)state == -1 || pinv->inv_state == state) &&
+		   ((int)controller == -1 || pinv->inv_controller == controller) &&
+		   ((int)unit == -1 || pinv->inv_unit == unit)) {
+
+			/* Found a matching inventory item. Remove it. */
+			if (last_pinv) {
+				last_pinv->inv_next = pinv->inv_next;
+			} else {
+				rv = hwgraph_info_replace_LBL(de, INFO_LBL_INVENT, (arbitrary_info_t)pinv->inv_next, NULL);
+				if (rv != LABELCL_SUCCESS)
+					goto failure;
+			}
+
+			pinv->inv_next = NULL; /* sanity */
+			kfree(pinv);
+		} else
+			last_pinv = pinv;
+	}
+
+	if (last_pinv == NULL) {
+		rv = hwgraph_info_remove_LBL(de, INFO_LBL_INVENT, NULL);
+		if (rv != LABELCL_SUCCESS)
+			goto failure;
+	}
+
+	rv = LABELCL_SUCCESS;
+
+failure:
+	/* GRAPH_LOCK_DONE_UPDATE(&invent_lock); */
+	return(rv);
+}
+
+/*
+ * hwgraph_inventory_get_next - Get next inventory item associated with the 
+ *	specified vertex.
+ *
+ *	No locking is really needed.  We don't yet have the ability
+ *	to remove inventory items, and new items are always added to
+ *	the end of a vertex' inventory list.
+ *
+ * 	However, a devfs entry can be removed!
+*/
+int
+hwgraph_inventory_get_next(devfs_handle_t de, invplace_t *place, inventory_t **ppinv)
+{
+	inventory_t *pinv;
+	labelcl_error_t rv;
+
+	if (de == NULL)
+		return(LABELCL_BAD_PARAM);
+
+	if (place->invplace_vhdl == NULL) {
+		place->invplace_vhdl = de;
+		place->invplace_inv = NULL;
+	}
+
+	if (de != place->invplace_vhdl)
+		return(LABELCL_BAD_PARAM);
+
+	if (place->invplace_inv == NULL) {
+		/* Just starting on this vertex */
+		rv = labelcl_info_get_LBL(de, INFO_LBL_INVENT,
+						NULL, (arbitrary_info_t *)&pinv);
+		if (rv != LABELCL_SUCCESS)
+			return(LABELCL_NOT_FOUND);
+
+	} else {
+		/* Advance to next item on this vertex */
+		pinv = place->invplace_inv->inv_next;
+	}
+	place->invplace_inv = pinv;
+	*ppinv = pinv;
+
+	return(LABELCL_SUCCESS);
+}
+
+/*
+ * hwgraph_controller_num_get - Returns the controller number in the inventory 
+ *	entry.
+ */
+int
+hwgraph_controller_num_get(devfs_handle_t device)
+{
+	inventory_t *pinv;
+	invplace_t invplace = { NULL, NULL, NULL };
+	int val = -1;
+	if ((pinv = device_inventory_get_next(device, &invplace)) != NULL) {
+		val = (pinv->inv_class == INV_NETWORK)? pinv->inv_unit: pinv->inv_controller;
+	}
+#ifdef DEBUG
+	/*
+	 * It does not make any sense to call this on vertexes with multiple
+	 * inventory structs chained together
+	 */
+	if ( device_inventory_get_next(device, &invplace) != NULL ) {
+		printk("Should panic here ... !\n");
+#endif
+	return (val);	
+}
+
+/*
+ * hwgraph_controller_num_set - Sets the controller number in the inventory 
+ *	entry.
+ */
+void
+hwgraph_controller_num_set(devfs_handle_t device, int contr_num)
+{
+	inventory_t *pinv;
+	invplace_t invplace = { NULL, NULL, NULL };
+	if ((pinv = device_inventory_get_next(device, &invplace)) != NULL) {
+		if (pinv->inv_class == INV_NETWORK)
+			pinv->inv_unit = contr_num;
+		else {
+			if (pinv->inv_class == INV_FCNODE)
+				pinv = device_inventory_get_next(device, &invplace);
+			if (pinv != NULL)
+				pinv->inv_controller = contr_num;
+		}
+	}
+#ifdef DEBUG
+	/*
+	 * It does not make any sense to call this on vertexes with multiple
+	 * inventory structs chained together
+	 */
+	if(pinv != NULL)
+		ASSERT(device_inventory_get_next(device, &invplace) == NULL);
+#endif
+}
+
+/*
+ * Find the canonical name for a given vertex by walking back through
+ * connectpt's until we hit the hwgraph root vertex (or until we run
+ * out of buffer space or until something goes wrong).
+ *
+ *	COMPATIBILITY FUNCTIONALITY
+ * Walks back through 'parents', not necessarily the same as connectpts.
+ *
+ * Need to resolve the fact that devfs does not return the path from 
+ * "/" but rather it just stops right before /dev ..
+ */
+int
+hwgraph_vertex_name_get(devfs_handle_t vhdl, char *buf, uint buflen)
+{
+	char *locbuf;
+	int   pos;
+
+	if (buflen < 1)
+		return(-1);	/* XXX should be GRAPH_BAD_PARAM ? */
+
+	locbuf = kmalloc(buflen, GFP_KERNEL);
+
+	pos = devfs_generate_path(vhdl, locbuf, buflen);
+	if (pos < 0) {
+		kfree(locbuf);
+		return pos;
+	}
+
+	strcpy(buf, &locbuf[pos]);
+	kfree(locbuf);
+	return 0;
+}
+
+/*
+** vertex_to_name converts a vertex into a canonical name by walking
+** back through connect points until we hit the hwgraph root (or until
+** we run out of buffer space).
+**
+** Usually returns a pointer to the original buffer, filled in as
+** appropriate.  If the buffer is too small to hold the entire name,
+** or if anything goes wrong while determining the name, vertex_to_name
+** returns "UnknownDevice".
+*/
+
+#define DEVNAME_UNKNOWN "UnknownDevice"
+
+char *
+vertex_to_name(devfs_handle_t vhdl, char *buf, uint buflen)
+{
+	if (hwgraph_vertex_name_get(vhdl, buf, buflen) == GRAPH_SUCCESS)
+		return(buf);
+	else
+		return(DEVNAME_UNKNOWN);
+}
+
+#ifdef IRIX
+/*
+** Return the compact node id of the node that ultimately "owns" the specified
+** vertex.  In order to do this, we walk back through masters and connect points
+** until we reach a vertex that represents a node.
+*/
+cnodeid_t
+master_node_get(devfs_handle_t vhdl)
+{
+	cnodeid_t cnodeid;
+	devfs_handle_t master;
+
+	for (;;) {
+		cnodeid = nodevertex_to_cnodeid(vhdl);
+		if (cnodeid != CNODEID_NONE)
+			return(cnodeid);
+
+		master = device_master_get(vhdl);
+
+		/* Check for exceptional cases */
+		if (master == vhdl) {
+			/* Since we got a reference to the "master" thru
+			 * device_master_get() we should decrement
+			 * its reference count by 1
+			 */
+			hwgraph_vertex_unref(master);
+			return(CNODEID_NONE);
+		}
+
+		if (master == GRAPH_VERTEX_NONE) {
+			master = hwgraph_connectpt_get(vhdl);
+			if ((master == GRAPH_VERTEX_NONE) ||
+			    (master == vhdl)) {
+				if (master == vhdl)
+					/* Since we got a reference to the
+					 * "master" thru
+					 * hwgraph_connectpt_get() we should
+					 * decrement its reference count by 1
+					 */
+					hwgraph_vertex_unref(master);
+				return(CNODEID_NONE);
+			}
+		}
+		
+		vhdl = master;
+		/* Decrement the reference to "master" which was got
+		 * either thru device_master_get() or hwgraph_connectpt_get()
+		 * above.
+		 */
+		hwgraph_vertex_unref(master);
+	}
+}
+
+/*
+ * Using the canonical path name to get hold of the desired vertex handle will
+ * not work on multi-hub sn0 nodes. Hence, we use the following (slightly
+ * convoluted) algorithm.
+ *
+ * - Start at the vertex corresponding to the driver (provided as input parameter)
+ * - Loop till you reach a vertex which has EDGE_LBL_MEMORY
+ *    - If EDGE_LBL_CONN exists, follow that up.
+ *      else if EDGE_LBL_MASTER exists, follow that up.
+ *      else follow EDGE_LBL_DOTDOT up.
+ *
+ * * We should be at desired hub/heart vertex now *
+ * - Follow EDGE_LBL_CONN to the widget vertex.
+ *
+ * - return vertex handle of this widget.
+ */
+devfs_handle_t
+mem_vhdl_get(devfs_handle_t drv_vhdl)
+{
+devfs_handle_t cur_vhdl, cur_upper_vhdl;
+devfs_handle_t tmp_mem_vhdl, mem_vhdl;
+graph_error_t loop_rv;
+
+  /* Initializations */
+  cur_vhdl = drv_vhdl;
+  loop_rv = ~GRAPH_SUCCESS;
+
+  /* Loop till current vertex has EDGE_LBL_MEMORY */
+  while (loop_rv != GRAPH_SUCCESS) {
+
+    if ((hwgraph_edge_get(cur_vhdl, EDGE_LBL_CONN, &cur_upper_vhdl)) == GRAPH_SUCCESS) {
+
+    } else if ((hwgraph_edge_get(cur_vhdl, EDGE_LBL_MASTER, &cur_upper_vhdl)) == GRAPH_SUCCESS) {
+      } else { /* Follow HWGRAPH_EDGELBL_DOTDOT up */
+           (void) hwgraph_edge_get(cur_vhdl, HWGRAPH_EDGELBL_DOTDOT, &cur_upper_vhdl);
+        }
+
+    cur_vhdl = cur_upper_vhdl;
+
+#if DEBUG && HWG_DEBUG
+    printf("Current vhdl %d \n", cur_vhdl);
+#endif /* DEBUG */
+
+    loop_rv = hwgraph_edge_get(cur_vhdl, EDGE_LBL_MEMORY, &tmp_mem_vhdl);
+  }
+
+  /* We should be at desired hub/heart vertex now */
+  if ((hwgraph_edge_get(cur_vhdl, EDGE_LBL_CONN, &mem_vhdl)) != GRAPH_SUCCESS)
+    return (GRAPH_VERTEX_NONE);
+
+  return (mem_vhdl);
+}
+#endif /* IRIX */
+
+
+/*
+** Add a char device -- if the driver supports it -- at a specified vertex.
+*/
+graph_error_t
+hwgraph_char_device_add(        devfs_handle_t from,
+                                char *path,
+                                char *prefix,
+                                devfs_handle_t *devhdl)
+{
+	devfs_handle_t xx = NULL;
+
+	printk("FIXME: hwgraph_char_device_add() called. Use hwgraph_register.\n");
+	*devhdl = xx;	// Must set devhdl
+	return(GRAPH_SUCCESS);
+}
+
+graph_error_t
+hwgraph_edge_remove(devfs_handle_t from, char *name, devfs_handle_t *toptr)
+{
+	printk("FIXME: hwgraph_edge_remove\n");
+	return(GRAPH_ILLEGAL_REQUEST);
+}
+
+graph_error_t
+hwgraph_vertex_unref(devfs_handle_t vhdl)
+{
+	printk("FIXME: hwgraph_vertex_unref\n");
+	return(GRAPH_ILLEGAL_REQUEST);
+}
+
+
+EXPORT_SYMBOL(hwgraph_mk_dir);
+EXPORT_SYMBOL(hwgraph_path_add);
+EXPORT_SYMBOL(hwgraph_char_device_add);
+EXPORT_SYMBOL(hwgraph_register);
+EXPORT_SYMBOL(hwgraph_vertex_destroy);
+
+EXPORT_SYMBOL(hwgraph_fastinfo_get);
+EXPORT_SYMBOL(hwgraph_edge_get);
+
+EXPORT_SYMBOL(hwgraph_fastinfo_set);
+EXPORT_SYMBOL(hwgraph_connectpt_set);
+EXPORT_SYMBOL(hwgraph_connectpt_get);
+EXPORT_SYMBOL(hwgraph_edge_get_next);
+EXPORT_SYMBOL(hwgraph_info_add_LBL);
+EXPORT_SYMBOL(hwgraph_info_remove_LBL);
+EXPORT_SYMBOL(hwgraph_info_replace_LBL);
+EXPORT_SYMBOL(hwgraph_info_get_LBL);
+EXPORT_SYMBOL(hwgraph_info_get_exported_LBL);
+EXPORT_SYMBOL(hwgraph_info_get_next_LBL);
+EXPORT_SYMBOL(hwgraph_info_export_LBL);
+EXPORT_SYMBOL(hwgraph_info_unexport_LBL);
+EXPORT_SYMBOL(hwgraph_path_lookup);
+EXPORT_SYMBOL(hwgraph_traverse);
+EXPORT_SYMBOL(hwgraph_path_to_vertex);
+EXPORT_SYMBOL(hwgraph_path_to_dev);
+EXPORT_SYMBOL(hwgraph_block_device_get);
+EXPORT_SYMBOL(hwgraph_char_device_get);
+EXPORT_SYMBOL(hwgraph_cdevsw_get);
+EXPORT_SYMBOL(hwgraph_bdevsw_get);
+EXPORT_SYMBOL(hwgraph_vertex_name_get);
diff --git a/arch/ia64/sn/io/hcl_util.c b/arch/ia64/sn/io/hcl_util.c
new file mode 100644
index 000000000..d11f49cdd
--- /dev/null
+++ b/arch/ia64/sn/io/hcl_util.c
@@ -0,0 +1,159 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/devfs_fs.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm/sn/sgi.h>
+#include <asm/io.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/nodepda.h>
+
+static devfs_handle_t hwgraph_all_cnodes = GRAPH_VERTEX_NONE;
+extern devfs_handle_t hwgraph_root;
+
+
+/*
+** Return the "master" for a given vertex.  A master vertex is a
+** controller or adapter or other piece of hardware that the given
+** vertex passes through on the way to the rest of the system.
+*/
+devfs_handle_t
+device_master_get(devfs_handle_t vhdl)
+{
+	graph_error_t rc;
+	devfs_handle_t master;
+
+	rc = hwgraph_edge_get(vhdl, EDGE_LBL_MASTER, &master);
+	if (rc == GRAPH_SUCCESS)
+		return(master);
+	else
+		return(GRAPH_VERTEX_NONE);
+}
+
+/*
+** Set the master for a given vertex.
+** Returns 0 on success, non-0 indicates failure
+*/
+int
+device_master_set(devfs_handle_t vhdl, devfs_handle_t master)
+{
+	graph_error_t rc;
+
+	rc = hwgraph_edge_add(vhdl, master, EDGE_LBL_MASTER);
+	return(rc != GRAPH_SUCCESS);
+}
+
+
+/*
+** Return the compact node id of the node that ultimately "owns" the specified
+** vertex.  In order to do this, we walk back through masters and connect points
+** until we reach a vertex that represents a node.
+*/
+cnodeid_t
+master_node_get(devfs_handle_t vhdl)
+{
+	cnodeid_t cnodeid;
+	devfs_handle_t master;
+
+	for (;;) {
+		cnodeid = nodevertex_to_cnodeid(vhdl);
+		if (cnodeid != CNODEID_NONE)
+			return(cnodeid);
+
+		master = device_master_get(vhdl);
+
+		/* Check for exceptional cases */
+		if (master == vhdl) {
+			/* Since we got a reference to the "master" thru
+			 * device_master_get() we should decrement
+			 * its reference count by 1
+			 */
+			return(CNODEID_NONE);
+		}
+
+		if (master == GRAPH_VERTEX_NONE) {
+			master = hwgraph_connectpt_get(vhdl);
+			if ((master == GRAPH_VERTEX_NONE) ||
+			    (master == vhdl)) {
+				return(CNODEID_NONE);
+			}
+		}
+
+		vhdl = master;
+	}
+}
+
+/*
+** If the specified device represents a node, return its
+** compact node ID; otherwise, return CNODEID_NONE.
+*/
+cnodeid_t
+nodevertex_to_cnodeid(devfs_handle_t vhdl)
+{
+	int rv = 0;
+	arbitrary_info_t cnodeid = CNODEID_NONE;
+
+	rv = labelcl_info_get_LBL(vhdl, INFO_LBL_CNODEID, NULL, &cnodeid);
+
+	return((cnodeid_t)cnodeid);
+}
+
+void
+mark_nodevertex_as_node(devfs_handle_t vhdl, cnodeid_t cnodeid)
+{
+	if (cnodeid == CNODEID_NONE)
+		return;
+
+	cnodeid_to_vertex(cnodeid) = vhdl;
+	labelcl_info_add_LBL(vhdl, INFO_LBL_CNODEID, INFO_DESC_EXPORT, 
+		(arbitrary_info_t)cnodeid);
+
+	{
+		char cnodeid_buffer[10];
+
+		if (hwgraph_all_cnodes == GRAPH_VERTEX_NONE) {
+			(void)hwgraph_path_add( hwgraph_root,
+						EDGE_LBL_NODENUM,
+						&hwgraph_all_cnodes);
+		}
+
+		sprintf(cnodeid_buffer, "%d", cnodeid);
+		(void)hwgraph_edge_add( hwgraph_all_cnodes,
+					vhdl,
+					cnodeid_buffer);
+	}
+}
+
+
+/*
+** dev_to_name converts a devfs_handle_t into a canonical name.  If the devfs_handle_t
+** represents a vertex in the hardware graph, it is converted in the
+** normal way for vertices.  If the devfs_handle_t is an old devfs_handle_t (one which
+** does not represent a hwgraph vertex), we synthesize a name based
+** on major/minor number.
+**
+** Usually returns a pointer to the original buffer, filled in as
+** appropriate.  If the buffer is too small to hold the entire name,
+** or if anything goes wrong while determining the name, dev_to_name
+** returns "UnknownDevice".
+*/
+char *
+dev_to_name(devfs_handle_t dev, char *buf, uint buflen)
+{
+        return(vertex_to_name(dev, buf, buflen));
+}
+
+
diff --git a/arch/ia64/sn/io/hubdev.c b/arch/ia64/sn/io/hubdev.c
new file mode 100644
index 000000000..64379f081
--- /dev/null
+++ b/arch/ia64/sn/io/hubdev.c
@@ -0,0 +1,126 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/sn1/hubdev.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+
+struct hubdev_callout {
+        int (*attach_method)(devfs_handle_t);
+        struct hubdev_callout *fp;
+};
+
+typedef struct hubdev_callout hubdev_callout_t;
+
+mutex_t hubdev_callout_mutex;
+hubdev_callout_t *hubdev_callout_list = NULL;
+
+void
+hubdev_init(void)
+{
+	mutex_init(&hubdev_callout_mutex, MUTEX_DEFAULT, "hubdev");
+        hubdev_callout_list = NULL;
+}
+        
+void
+hubdev_register(int (*attach_method)(devfs_handle_t))
+{
+        hubdev_callout_t *callout;
+        
+        ASSERT(attach_method);
+
+        callout =  (hubdev_callout_t *)kmem_zalloc(sizeof(hubdev_callout_t), KM_SLEEP);
+        ASSERT(callout);
+        
+	mutex_lock(&hubdev_callout_mutex, PZERO);
+        /*
+         * Insert at the front of the list
+         */
+        callout->fp = hubdev_callout_list;
+        hubdev_callout_list = callout;
+        callout->attach_method = attach_method;
+	mutex_unlock(&hubdev_callout_mutex);
+}
+
+int
+hubdev_unregister(int (*attach_method)(devfs_handle_t))
+{
+        hubdev_callout_t **p;
+        
+        ASSERT(attach_method);
+   
+	mutex_lock(&hubdev_callout_mutex, PZERO);
+        /*
+         * Remove registry element containing attach_method
+         */
+        for (p = &hubdev_callout_list; *p != NULL; p = &(*p)->fp) {
+                if ((*p)->attach_method == attach_method) {
+                        hubdev_callout_t* victim = *p;
+                        *p = (*p)->fp;
+                        kfree(victim);
+                        mutex_unlock(&hubdev_callout_mutex);
+                        return (0);
+                }
+        }
+        mutex_unlock(&hubdev_callout_mutex);
+        return (ENOENT);
+}
+
+
+int
+hubdev_docallouts(devfs_handle_t hub)
+{
+        hubdev_callout_t *p;
+        int errcode;
+
+	mutex_lock(&hubdev_callout_mutex, PZERO);
+        
+        for (p = hubdev_callout_list; p != NULL; p = p->fp) {
+                ASSERT(p->attach_method);
+                errcode = (*p->attach_method)(hub);
+                if (errcode != 0) {
+			mutex_unlock(&hubdev_callout_mutex);
+                        return (errcode);
+                }
+        }
+        mutex_unlock(&hubdev_callout_mutex);
+        return (0);
+}
+
+/*
+ * Given a hub vertex, return the base address of the Hspec space
+ * for that hub.
+ */
+caddr_t
+hubdev_prombase_get(devfs_handle_t hub)
+{
+	hubinfo_t	hinfo = NULL;
+
+	hubinfo_get(hub, &hinfo);
+	ASSERT(hinfo);
+
+	return ((caddr_t)NODE_RBOOT_BASE(hinfo->h_nasid));
+}
+
+cnodeid_t
+hubdev_cnodeid_get(devfs_handle_t hub)
+{
+	hubinfo_t	hinfo = NULL;
+	hubinfo_get(hub, &hinfo);
+	ASSERT(hinfo);
+
+	return hinfo->h_cnodeid;
+}
diff --git a/arch/ia64/sn/io/hubspc.c b/arch/ia64/sn/io/hubspc.c
new file mode 100644
index 000000000..a6a229b96
--- /dev/null
+++ b/arch/ia64/sn/io/hubspc.c
@@ -0,0 +1,447 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+/*
+ * hubspc.c - Hub Memory Space Management Driver
+ * This driver implements the managers for the following
+ * memory resources:
+ * 1) reference counters
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <linux/devfs_fs.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm/io.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/mem_refcnt.h>
+#include <asm/sn/agent.h>
+#include <asm/sn/addrs.h>
+
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+#include <asm/sn/sn1/ip27config.h>
+#include <asm/sn/sn1/hubdev.h>
+#include <asm/sn/ksys/elsc.h>
+#endif
+
+#include <asm/sn/hubspc.h>
+
+
+/* Uncomment the following line for tracing */
+/* #define HUBSPC_DEBUG 1 */
+
+int hubspc_devflag = D_MP;
+
+extern void *device_info_get(devfs_handle_t device);
+extern void device_info_set(devfs_handle_t device, void *info);
+
+
+
+/***********************************************************************/
+/* CPU Prom Space 						       */
+/***********************************************************************/
+
+typedef struct cpuprom_info {
+	devfs_handle_t	prom_dev;
+	devfs_handle_t	nodevrtx;
+	struct	cpuprom_info *next;
+}cpuprom_info_t;
+
+static cpuprom_info_t	*cpuprom_head;
+lock_t	cpuprom_spinlock;
+#define	PROM_LOCK()	mutex_spinlock(&cpuprom_spinlock)
+#define	PROM_UNLOCK(s)	mutex_spinunlock(&cpuprom_spinlock, (s))
+
+/*
+ * Add prominfo to the linked list maintained.
+ */
+void
+prominfo_add(devfs_handle_t hub, devfs_handle_t prom)
+{
+	cpuprom_info_t	*info;
+	int	s;
+
+	info = kmalloc(sizeof(cpuprom_info_t), GFP_KERNEL);
+	ASSERT(info);
+	info->prom_dev = prom;
+	info->nodevrtx = hub;
+
+
+	s = PROM_LOCK();
+	info->next = cpuprom_head;
+	cpuprom_head = info;
+	PROM_UNLOCK(s);
+}
+
+void
+prominfo_del(devfs_handle_t prom)
+{
+	int	s;
+	cpuprom_info_t	*info;
+	cpuprom_info_t	**prev;
+
+	s = PROM_LOCK();
+	prev = &cpuprom_head;
+	while ( (info = *prev) ) {
+		if (info->prom_dev == prom) {
+			*prev = info->next;
+			PROM_UNLOCK(s);
+			return;
+		}
+		
+		prev = &info->next;
+	}
+	PROM_UNLOCK(s);
+	ASSERT(0);
+}
+
+devfs_handle_t
+prominfo_nodeget(devfs_handle_t prom)
+{
+	int	s;
+	cpuprom_info_t	*info;
+
+	s = PROM_LOCK();
+	info = cpuprom_head;
+	while (info) {
+		if(info->prom_dev == prom) {
+			PROM_UNLOCK(s);
+			return info->nodevrtx;
+		}
+		info = info->next;
+	}
+	PROM_UNLOCK(s);
+	return 0;
+}
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+#define	SN_PROMVERSION		INV_IP35PROM
+#endif
+
+/* Add "detailed" labelled inventory information to the
+ * prom vertex 
+ */
+void
+cpuprom_detailed_inventory_info_add(devfs_handle_t prom_dev,devfs_handle_t node)
+{
+	invent_miscinfo_t 	*cpuprom_inventory_info;
+	extern invent_generic_t *klhwg_invent_alloc(cnodeid_t cnode, 
+						     int class, int size);
+	cnodeid_t		cnode = hubdev_cnodeid_get(node);
+
+	/* Allocate memory for the extra inventory information
+	 * for the  prom
+	 */
+	cpuprom_inventory_info = (invent_miscinfo_t *) 
+		klhwg_invent_alloc(cnode, INV_PROM, sizeof(invent_miscinfo_t));
+
+	ASSERT(cpuprom_inventory_info);
+
+	/* Set the enabled flag so that the hinv interprets this
+	 * information
+	 */
+	cpuprom_inventory_info->im_gen.ig_flag = INVENT_ENABLED;
+	cpuprom_inventory_info->im_type = SN_PROMVERSION;
+	/* Store prom revision into inventory information */
+	cpuprom_inventory_info->im_rev = IP27CONFIG.pvers_rev;
+	cpuprom_inventory_info->im_version = IP27CONFIG.pvers_vers;
+
+
+	/* Store this info as labelled information hanging off the
+	 * prom device vertex
+	 */
+	hwgraph_info_add_LBL(prom_dev, INFO_LBL_DETAIL_INVENT, 
+			     (arbitrary_info_t) cpuprom_inventory_info);
+	/* Export this information so that user programs can get to
+	 * this by using attr_get()
+	 */
+        hwgraph_info_export_LBL(prom_dev, INFO_LBL_DETAIL_INVENT,
+				sizeof(invent_miscinfo_t));
+}
+
+int
+cpuprom_attach(devfs_handle_t node)
+{
+        devfs_handle_t prom_dev;
+
+        hwgraph_char_device_add(node, EDGE_LBL_PROM, "hubspc_", &prom_dev);
+#ifdef	HUBSPC_DEBUG
+	printf("hubspc: prom_attach hub: 0x%x prom: 0x%x\n", node, prom_dev);
+#endif	/* HUBSPC_DEBUG */
+	device_inventory_add(prom_dev, INV_PROM, SN_PROMVERSION,
+				(major_t)0, (minor_t)0, 0);
+
+	/* Add additional inventory info about the cpu prom like
+	 * revision & version numbers etc.
+	 */
+	cpuprom_detailed_inventory_info_add(prom_dev,node);
+        device_info_set(prom_dev, (void*)(ulong)HUBSPC_PROM);
+	prominfo_add(node, prom_dev);
+
+        return (0);
+}
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+#define FPROM_CONFIG_ADDR	MD_JUNK_BUS_TIMING
+#define FPROM_ENABLE_MASK	MJT_FPROM_ENABLE_MASK
+#define FPROM_ENABLE_SHFT	MJT_FPROM_ENABLE_SHFT
+#define FPROM_SETUP_MASK	MJT_FPROM_SETUP_MASK
+#define FPROM_SETUP_SHFT	MJT_FPROM_SETUP_SHFT
+#endif
+
+/*ARGSUSED*/
+int
+cpuprom_map(devfs_handle_t dev, vhandl_t *vt, off_t addr, size_t len)
+{
+        int 		errcode;
+	caddr_t 	kvaddr;
+	devfs_handle_t		node;
+	cnodeid_t 	cnode;
+
+	node = prominfo_nodeget(dev);
+
+	if (!node)
+		return EIO;
+        
+
+	kvaddr = hubdev_prombase_get(node);
+	cnode  = hubdev_cnodeid_get(node);
+#ifdef	HUBSPC_DEBUG
+	printf("cpuprom_map: hubnode %d kvaddr 0x%x\n", node, kvaddr);
+#endif
+
+	if (len > RBOOT_SIZE)
+		len = RBOOT_SIZE;
+        /*
+         * Map in the prom space
+         */
+	errcode = v_mapphys(vt, kvaddr, len);
+
+	if (errcode == 0 ){
+		/*
+		 * Set the MD configuration registers suitably.
+		 */
+		nasid_t		nasid;
+		uint64_t	value;
+		volatile hubreg_t	*regaddr;
+
+		nasid = COMPACT_TO_NASID_NODEID(cnode);
+		regaddr = REMOTE_HUB_ADDR(nasid, FPROM_CONFIG_ADDR);
+		value = HUB_L(regaddr);
+		value &= ~(FPROM_SETUP_MASK | FPROM_ENABLE_MASK);
+		{
+			value |= (((long)CONFIG_FPROM_SETUP << FPROM_SETUP_SHFT) | 
+				  ((long)CONFIG_FPROM_ENABLE << FPROM_ENABLE_SHFT));
+		}
+		HUB_S(regaddr, value);
+
+	}
+        return (errcode);
+}
+
+/*ARGSUSED*/
+int
+cpuprom_unmap(devfs_handle_t dev, vhandl_t *vt)
+{
+        return 0;
+}
+
+/***********************************************************************/
+/* Base Hub Space Driver                                               */
+/***********************************************************************/
+
+// extern int l1_attach( devfs_handle_t );
+
+/*
+ * hubspc_init
+ * Registration of the hubspc devices with the hub manager
+ */
+void
+hubspc_init(void)
+{
+        /*
+         * Register with the hub manager
+         */
+
+        /* The reference counters */
+        hubdev_register(mem_refcnt_attach);
+
+	/* Prom space */
+	hubdev_register(cpuprom_attach);
+
+#if defined(CONFIG_SERIAL_SGI_L1_PROTOCOL)
+	/* L1 system controller link */
+	if ( !IS_RUNNING_ON_SIMULATOR() ) {
+		/* initialize the L1 link */
+		void l1_cons_init( l1sc_t *sc );
+		elsc_t *get_elsc(void);
+
+		l1_cons_init((l1sc_t *)get_elsc());
+	}
+#endif
+
+#ifdef	HUBSPC_DEBUG
+	printf("hubspc_init: Completed\n");
+#endif	/* HUBSPC_DEBUG */
+	/* Initialize spinlocks */
+	spinlock_init(&cpuprom_spinlock, "promlist");
+}
+
+/* ARGSUSED */
+int
+hubspc_open(devfs_handle_t *devp, mode_t oflag, int otyp, cred_t *crp)
+{
+        int errcode = 0;
+        
+        switch ((hubspc_subdevice_t)(ulong)device_info_get(*devp)) {
+        case HUBSPC_REFCOUNTERS:
+                errcode = mem_refcnt_open(devp, oflag, otyp, crp);
+                break;
+
+        case HUBSPC_PROM:
+		/* Check if the user has proper access rights to 
+		 * read/write the prom space.
+		 */
+                if (!cap_able(CAP_DEVICE_MGT)) {
+                        errcode = EPERM;
+                }                
+                break;
+
+        default:
+                errcode = ENODEV;
+        }
+
+#ifdef	HUBSPC_DEBUG
+	printf("hubspc_open: Completed open for type %d\n",
+               (hubspc_subdevice_t)(ulong)device_info_get(*devp));
+#endif	/* HUBSPC_DEBUG */
+
+        return (errcode);
+}
+
+
+/* ARGSUSED */
+int
+hubspc_close(devfs_handle_t dev, int oflag, int otyp, cred_t *crp)
+{
+        int errcode = 0;
+        
+        switch ((hubspc_subdevice_t)(ulong)device_info_get(dev)) {
+        case HUBSPC_REFCOUNTERS:
+                errcode = mem_refcnt_close(dev, oflag, otyp, crp);
+                break;
+
+        case HUBSPC_PROM:
+                break;
+        default:
+                errcode = ENODEV;
+        }
+
+#ifdef	HUBSPC_DEBUG
+	printf("hubspc_close: Completed close for type %d\n",
+               (hubspc_subdevice_t)(ulong)device_info_get(dev));
+#endif	/* HUBSPC_DEBUG */
+
+        return (errcode);
+}
+
+/* ARGSUSED */
+int
+hubspc_map(devfs_handle_t dev, vhandl_t *vt, off_t off, size_t len, uint prot)
+{
+	/*REFERENCED*/
+        hubspc_subdevice_t subdevice;
+        int errcode = 0;
+
+	/* check validity of request */
+	if( len == 0 ) {
+		return ENXIO;
+        }
+
+        subdevice = (hubspc_subdevice_t)(ulong)device_info_get(dev);
+
+#ifdef	HUBSPC_DEBUG
+	printf("hubspc_map: subdevice: %d vaddr: 0x%x phyaddr: 0x%x len: 0x%x\n",
+	       subdevice, v_getaddr(vt), off, len);
+#endif /* HUBSPC_DEBUG */
+
+        switch ((hubspc_subdevice_t)(ulong)device_info_get(dev)) {
+        case HUBSPC_REFCOUNTERS:
+                errcode = mem_refcnt_mmap(dev, vt, off, len, prot);
+                break;
+
+        case HUBSPC_PROM:
+		errcode = cpuprom_map(dev, vt, off, len);
+                break;
+        default:
+                errcode = ENODEV;
+        }
+
+#ifdef	HUBSPC_DEBUG
+	printf("hubspc_map finished: spctype: %d vaddr: 0x%x len: 0x%x\n",
+	       (hubspc_subdevice_t)(ulong)device_info_get(dev), v_getaddr(vt), len);
+#endif /* HUBSPC_DEBUG */
+
+	return errcode;
+}
+
+/* ARGSUSED */
+int
+hubspc_unmap(devfs_handle_t dev, vhandl_t *vt)
+{
+        int errcode = 0;
+        
+        switch ((hubspc_subdevice_t)(ulong)device_info_get(dev)) {
+        case HUBSPC_REFCOUNTERS:
+                errcode = mem_refcnt_unmap(dev, vt);
+                break;
+
+        case HUBSPC_PROM:
+                errcode = cpuprom_unmap(dev, vt);
+                break;
+
+        default:
+                errcode = ENODEV;
+        }
+	return errcode;
+
+}
+
+/* ARGSUSED */
+int
+hubspc_ioctl(devfs_handle_t dev,
+             int cmd,
+             void *arg,
+             int mode,
+             cred_t *cred_p,
+             int *rvalp)
+{
+        int errcode = 0;
+        
+        switch ((hubspc_subdevice_t)(ulong)device_info_get(dev)) {
+        case HUBSPC_REFCOUNTERS:
+                errcode = mem_refcnt_ioctl(dev, cmd, arg, mode, cred_p, rvalp);
+                break;
+
+        case HUBSPC_PROM:
+                break;
+
+        default:
+                errcode = ENODEV;
+        }
+	return errcode;
+
+}
diff --git a/arch/ia64/sn/io/invent.c b/arch/ia64/sn/io/invent.c
new file mode 100644
index 000000000..011294d84
--- /dev/null
+++ b/arch/ia64/sn/io/invent.c
@@ -0,0 +1,197 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+/*
+ * Hardware Inventory
+ *
+ * See sys/sn/invent.h for an explanation of the hardware inventory contents.
+ *
+ */
+#include <linux/types.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+
+void
+inventinit(void)
+{
+}
+
+/*
+ * For initializing/updating an inventory entry.
+ */
+void
+replace_in_inventory(
+	inventory_t *pinv, int class, int type,
+	int controller, int unit, int state)
+{
+	pinv->inv_class = class;
+	pinv->inv_type = type;
+	pinv->inv_controller = controller;
+	pinv->inv_unit = unit;
+	pinv->inv_state = state;
+}
+
+/*
+ * Inventory addition 
+ *
+ * XXX NOTE: Currently must be called after dynamic memory allocator is
+ * initialized.
+ *
+ */
+void
+add_to_inventory(int class, int type, int controller, int unit, int state)
+{
+	(void)device_inventory_add((devfs_handle_t)GRAPH_VERTEX_NONE, class, type, 
+					controller, unit, state);
+}
+
+
+/*
+ * Inventory retrieval 
+ *
+ * These two routines are intended to prevent the caller from having to know
+ * the internal structure of the inventory table.
+ *
+ */
+inventory_t *
+get_next_inventory(invplace_t *place)
+{
+	inventory_t *pinv;
+	devfs_handle_t device = place->invplace_vhdl;
+	int rv;
+
+	while ((pinv = device_inventory_get_next(device, place)) == NULL) {
+		/*
+		 * We've exhausted inventory items on the last device.
+		 * Advance to next device.
+		 */
+		rv = hwgraph_vertex_get_next(&device, &place->invplace_vplace);
+		if (rv != LABELCL_SUCCESS)
+			return(NULL);
+		place->invplace_vhdl = device;
+		place->invplace_inv = NULL; /* Start from beginning invent on this device */
+	}
+
+	return(pinv);
+}
+
+/* ARGSUSED */
+int
+get_sizeof_inventory(int abi)
+{
+	return sizeof(inventory_t);
+}
+
+/*
+ * Hardware inventory scanner.
+ *
+ * Calls fun() for every entry in inventory list unless fun() returns something
+ * other than 0.
+ */
+int
+scaninvent(int (*fun)(inventory_t *, void *), void *arg)
+{
+	inventory_t *ie;
+	invplace_t iplace = { NULL,NULL, NULL };
+	int rc;
+
+	ie = 0;
+	rc = 0;
+	while ( (ie = (inventory_t *)get_next_inventory(&iplace)) ) {
+		rc = (*fun)(ie, arg);
+		if (rc)
+			break;
+	}
+	return rc;
+}
+
+/*
+ * Find a particular inventory object
+ *
+ * pinv can be a pointer to an inventory entry and the search will begin from
+ * there, or it can be 0 in which case the search starts at the beginning.
+ * A -1 for any of the other arguments is a wildcard (i.e. it always matches).
+ */
+inventory_t *
+find_inventory(inventory_t *pinv, int class, int type, int controller,
+	       int unit, int state)
+{
+	invplace_t iplace =  { NULL,NULL, NULL };
+
+	while ((pinv = (inventory_t *)get_next_inventory(&iplace)) != NULL) {
+		if (class != -1 && pinv->inv_class != class)
+			continue;
+		if (type != -1 && pinv->inv_type != type)
+			continue;
+
+		/* XXXX - perhaps the "state" entry should be ignored so an
+		 * an existing entry can be updated.  See vino_init() and
+		 * ml/IP22.c:add_ioboard() for an example.
+		 */
+		if (state != -1 && pinv->inv_state != state)
+			continue;
+		if (controller != -1
+		    && pinv->inv_controller != controller)
+			continue;
+		if (unit != -1 && pinv->inv_unit != unit)
+			continue;
+		break;
+	}
+
+	return(pinv);
+}
+
+
+/*
+** Retrieve inventory data associated with a device.
+*/
+inventory_t *
+device_inventory_get_next(	devfs_handle_t device,
+				invplace_t *invplace)
+{
+	inventory_t *pinv;
+	int rv;
+
+	rv = hwgraph_inventory_get_next(device, invplace, &pinv);
+	if (rv == LABELCL_SUCCESS)
+		return(pinv);
+	else
+		return(NULL);
+}
+
+
+/*
+** Associate canonical inventory information with a device (and
+** add it to the general inventory).
+*/
+void
+device_inventory_add(	devfs_handle_t device,
+			int class, 
+			int type, 
+			major_t controller, 
+			minor_t unit, 
+			int state)
+{
+	hwgraph_inventory_add(device, class, type, controller, unit, state);
+}
+
+int
+device_controller_num_get(devfs_handle_t device)
+{
+	return (hwgraph_controller_num_get(device));
+}
+
+void
+device_controller_num_set(devfs_handle_t device, int contr_num)
+{
+	hwgraph_controller_num_set(device, contr_num);
+}
diff --git a/arch/ia64/sn/io/io.c b/arch/ia64/sn/io/io.c
new file mode 100644
index 000000000..1e436baad
--- /dev/null
+++ b/arch/ia64/sn/io/io.c
@@ -0,0 +1,1311 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/types.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/cmn_err.h>
+#include <asm/sn/iobus.h>
+#include <asm/sn/iograph.h>
+#include <asm/param.h>
+#include <asm/sn/pio.h>
+#include <asm/sn/xtalk/xwidget.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/agent.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/xtalk/xtalkaddrs.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/io.h>
+#include <asm/sn/sn_cpuid.h>
+
+extern xtalk_provider_t hub_provider;
+
+#ifndef CONFIG_IA64_SGI_IO
+/* Global variables */
+extern pdaindr_t       pdaindr[MAXCPUS];
+#endif
+
+/*
+ * Perform any initializations needed to support hub-based I/O.
+ * Called once during startup.
+ */
+void
+hubio_init(void)
+{
+#if 0
+	/* This isn't needed unless we port the entire sio driver ... */
+        extern void early_brl1_port_init( void );
+	early_brl1_port_init();
+#endif
+}
+
+/* 
+ * Implementation of hub iobus operations.
+ *
+ * Hub provides a crosstalk "iobus" on IP27 systems.  These routines
+ * provide a platform-specific implementation of xtalk used by all xtalk 
+ * cards on IP27 systems.
+ *
+ * Called from corresponding xtalk_* routines.
+ */
+
+
+/* PIO MANAGEMENT */
+/* For mapping system virtual address space to xtalk space on a specified widget */
+
+/*
+ * Setup pio structures needed for a particular hub.
+ */
+static void
+hub_pio_init(devfs_handle_t hubv)
+{
+	xwidgetnum_t widget;
+	hubinfo_t hubinfo;
+	nasid_t nasid;
+	int bigwin;
+	hub_piomap_t hub_piomap;
+
+	hubinfo_get(hubv, &hubinfo);
+	nasid = hubinfo->h_nasid;
+
+	/* Initialize small window piomaps for this hub */
+	for (widget=0; widget <= HUB_WIDGET_ID_MAX; widget++) {
+		hub_piomap = hubinfo_swin_piomap_get(hubinfo, (int)widget);
+		hub_piomap->hpio_xtalk_info.xp_target = widget;
+		hub_piomap->hpio_xtalk_info.xp_xtalk_addr = 0;
+		hub_piomap->hpio_xtalk_info.xp_mapsz = SWIN_SIZE;
+		hub_piomap->hpio_xtalk_info.xp_kvaddr = (caddr_t)NODE_SWIN_BASE(nasid, widget);
+		hub_piomap->hpio_hub = hubv;
+		hub_piomap->hpio_flags = HUB_PIOMAP_IS_VALID;
+	}
+
+	/* Initialize big window piomaps for this hub */
+	for (bigwin=0; bigwin < HUB_NUM_BIG_WINDOW; bigwin++) {
+		hub_piomap = hubinfo_bwin_piomap_get(hubinfo, bigwin);
+		hub_piomap->hpio_xtalk_info.xp_mapsz = BWIN_SIZE;
+		hub_piomap->hpio_hub = hubv;
+		hub_piomap->hpio_holdcnt = 0;
+		hub_piomap->hpio_flags = HUB_PIOMAP_IS_BIGWINDOW;
+		IIO_ITTE_DISABLE(nasid, bigwin);
+	}
+#ifdef	BRINGUP
+	hub_set_piomode(nasid, HUB_PIO_CONVEYOR);
+#else
+	/* Set all the xwidgets in fire-and-forget mode
+	 * by default
+	 */
+	hub_set_piomode(nasid, HUB_PIO_FIRE_N_FORGET);
+#endif	/* BRINGUP */
+
+	sv_init(&hubinfo->h_bwwait, SV_FIFO, "bigwin");
+	spinlock_init(&hubinfo->h_bwlock, "bigwin");
+}
+
+/* 
+ * Create a caddr_t-to-xtalk_addr mapping.
+ *
+ * Use a small window if possible (that's the usual case), but
+ * manage big windows if needed.  Big window mappings can be
+ * either FIXED or UNFIXED -- we keep at least 1 big window available
+ * for UNFIXED mappings.
+ *
+ * Returns an opaque pointer-sized type which can be passed to
+ * other hub_pio_* routines on success, or NULL if the request
+ * cannot be satisfied.
+ */
+/* ARGSUSED */
+hub_piomap_t
+hub_piomap_alloc(devfs_handle_t dev,	/* set up mapping for this device */
+		device_desc_t dev_desc,	/* device descriptor */
+		iopaddr_t xtalk_addr,	/* map for this xtalk_addr range */
+		size_t byte_count,
+		size_t byte_count_max, 	/* maximum size of a mapping */
+		unsigned flags)		/* defined in sys/pio.h */
+{
+	xwidget_info_t widget_info = xwidget_info_get(dev);
+	xwidgetnum_t widget = xwidget_info_id_get(widget_info);
+	devfs_handle_t hubv = xwidget_info_master_get(widget_info);
+	hubinfo_t hubinfo;
+	hub_piomap_t bw_piomap;
+	int bigwin, free_bw_index;
+	nasid_t nasid;
+	volatile hubreg_t junk;
+	int s;
+
+	/* sanity check */
+	if (byte_count_max > byte_count)
+		return(NULL);
+
+	hubinfo_get(hubv, &hubinfo);
+
+	/* If xtalk_addr range is mapped by a small window, we don't have 
+	 * to do much 
+	 */
+	if (xtalk_addr + byte_count <= SWIN_SIZE)
+		return(hubinfo_swin_piomap_get(hubinfo, (int)widget));
+
+	/* We need to use a big window mapping.  */
+
+	/*
+	 * TBD: Allow requests that would consume multiple big windows --
+	 * split the request up and use multiple mapping entries.
+	 * For now, reject requests that span big windows.
+	 */
+	if ((xtalk_addr % BWIN_SIZE) + byte_count > BWIN_SIZE)
+		return(NULL);
+
+
+	/* Round xtalk address down for big window alignement */
+	xtalk_addr = xtalk_addr & ~(BWIN_SIZE-1);
+
+	/*
+	 * Check to see if an existing big window mapping will suffice.
+	 */
+tryagain:
+	free_bw_index = -1;
+	s = mutex_spinlock(&hubinfo->h_bwlock);
+	for (bigwin=0; bigwin < HUB_NUM_BIG_WINDOW; bigwin++) {
+		bw_piomap = hubinfo_bwin_piomap_get(hubinfo, bigwin);
+
+		/* If mapping is not valid, skip it */
+		if (!(bw_piomap->hpio_flags & HUB_PIOMAP_IS_VALID)) {
+			free_bw_index = bigwin;
+			continue;
+		}
+
+		/* 
+		 * If mapping is UNFIXED, skip it.  We don't allow sharing
+		 * of UNFIXED mappings, because this would allow starvation.
+		 */
+		if (!(bw_piomap->hpio_flags & HUB_PIOMAP_IS_FIXED))
+			continue;
+
+		if ( xtalk_addr == bw_piomap->hpio_xtalk_info.xp_xtalk_addr &&
+		     widget == bw_piomap->hpio_xtalk_info.xp_target) {
+			bw_piomap->hpio_holdcnt++;
+			mutex_spinunlock(&hubinfo->h_bwlock, s);
+			return(bw_piomap);
+		}
+	}
+
+	/*
+	 * None of the existing big window mappings will work for us --
+	 * we need to establish a new mapping.
+	 */
+
+	/* Insure that we don't consume all big windows with FIXED mappings */
+	if (flags & PIOMAP_FIXED) {
+		if (hubinfo->h_num_big_window_fixed < HUB_NUM_BIG_WINDOW-1) {
+			ASSERT(free_bw_index >= 0);
+			hubinfo->h_num_big_window_fixed++;
+		} else {
+			bw_piomap = NULL;
+			goto done;
+		}
+	} else /* PIOMAP_UNFIXED */ {
+		if (free_bw_index < 0) {
+			if (flags & PIOMAP_NOSLEEP) {
+				bw_piomap = NULL;
+				goto done;
+			}
+
+			sv_wait(&hubinfo->h_bwwait, PZERO, &hubinfo->h_bwlock, s);
+			goto tryagain;
+		}
+	}
+
+
+	/* OK!  Allocate big window free_bw_index for this mapping. */
+ 	/* 
+	 * The code below does a PIO write to setup an ITTE entry.
+	 * We need to prevent other CPUs from seeing our updated memory 
+	 * shadow of the ITTE (in the piomap) until the ITTE entry is 
+	 * actually set up; otherwise, another CPU might attempt a PIO 
+	 * prematurely.  
+	 *
+	 * Also, the only way we can know that an entry has been received 
+	 * by the hub and can be used by future PIO reads/writes is by 
+	 * reading back the ITTE entry after writing it.
+	 *
+	 * For these two reasons, we PIO read back the ITTE entry after
+	 * we write it.
+	 */
+
+	nasid = hubinfo->h_nasid;
+	IIO_ITTE_PUT(nasid, free_bw_index, HUB_PIO_MAP_TO_MEM, widget, xtalk_addr);	
+	junk = HUB_L(IIO_ITTE_GET(nasid, free_bw_index));
+
+	bw_piomap = hubinfo_bwin_piomap_get(hubinfo, free_bw_index);
+	bw_piomap->hpio_xtalk_info.xp_dev = dev;
+	bw_piomap->hpio_xtalk_info.xp_target = widget;
+	bw_piomap->hpio_xtalk_info.xp_xtalk_addr = xtalk_addr;
+	bw_piomap->hpio_xtalk_info.xp_kvaddr = (caddr_t)NODE_BWIN_BASE(nasid, free_bw_index);
+	bw_piomap->hpio_holdcnt++;
+	bw_piomap->hpio_bigwin_num = free_bw_index;
+
+	if (flags & PIOMAP_FIXED)
+		bw_piomap->hpio_flags |= HUB_PIOMAP_IS_VALID | HUB_PIOMAP_IS_FIXED;
+	else
+		bw_piomap->hpio_flags |= HUB_PIOMAP_IS_VALID;
+
+done:
+	mutex_spinunlock(&hubinfo->h_bwlock, s);
+	return(bw_piomap);
+}
+
+/*
+ * hub_piomap_free destroys a caddr_t-to-xtalk pio mapping and frees
+ * any associated mapping resources.  
+ *
+ * If this * piomap was handled with a small window, or if it was handled
+ * in a big window that's still in use by someone else, then there's 
+ * nothing to do.  On the other hand, if this mapping was handled 
+ * with a big window, AND if we were the final user of that mapping, 
+ * then destroy the mapping.
+ */
+void
+hub_piomap_free(hub_piomap_t hub_piomap)
+{
+	devfs_handle_t hubv;
+	hubinfo_t hubinfo;
+	nasid_t nasid;
+	int s;
+
+	/* 
+	 * Small windows are permanently mapped to corresponding widgets,
+	 * so there're no resources to free.
+	 */
+	if (!(hub_piomap->hpio_flags & HUB_PIOMAP_IS_BIGWINDOW))
+		return;
+
+	ASSERT(hub_piomap->hpio_flags & HUB_PIOMAP_IS_VALID);
+	ASSERT(hub_piomap->hpio_holdcnt > 0);
+
+	hubv = hub_piomap->hpio_hub;
+	hubinfo_get(hubv, &hubinfo);
+	nasid = hubinfo->h_nasid;
+
+	s = mutex_spinlock(&hubinfo->h_bwlock);
+
+	/*
+	 * If this is the last hold on this mapping, free it.
+	 */
+	if (--hub_piomap->hpio_holdcnt == 0) {
+		IIO_ITTE_DISABLE(nasid, hub_piomap->hpio_bigwin_num );
+
+		if (hub_piomap->hpio_flags & HUB_PIOMAP_IS_FIXED) {
+			hub_piomap->hpio_flags &= ~(HUB_PIOMAP_IS_VALID | HUB_PIOMAP_IS_FIXED);
+			hubinfo->h_num_big_window_fixed--;
+			ASSERT(hubinfo->h_num_big_window_fixed >= 0);
+		} else
+			hub_piomap->hpio_flags &= ~HUB_PIOMAP_IS_VALID;
+
+		(void)sv_signal(&hubinfo->h_bwwait);
+	}
+
+	mutex_spinunlock(&hubinfo->h_bwlock, s);
+}
+
+/*
+ * Establish a mapping to a given xtalk address range using the resources
+ * allocated earlier.
+ */
+caddr_t
+hub_piomap_addr(hub_piomap_t hub_piomap,	/* mapping resources */
+		iopaddr_t xtalk_addr,		/* map for this xtalk address */
+		size_t byte_count)		/* map this many bytes */
+{
+	/* Verify that range can be mapped using the specified piomap */
+	if (xtalk_addr < hub_piomap->hpio_xtalk_info.xp_xtalk_addr)
+		return(0);
+
+	if (xtalk_addr + byte_count > 
+		( hub_piomap->hpio_xtalk_info.xp_xtalk_addr + 
+			hub_piomap->hpio_xtalk_info.xp_mapsz))
+		return(0);
+
+	if (hub_piomap->hpio_flags & HUB_PIOMAP_IS_VALID)
+		return(hub_piomap->hpio_xtalk_info.xp_kvaddr + 
+			(xtalk_addr % hub_piomap->hpio_xtalk_info.xp_mapsz));
+	else
+		return(0);
+}
+
+
+/*
+ * Driver indicates that it's done with PIO's from an earlier piomap_addr.
+ */
+/* ARGSUSED */
+void
+hub_piomap_done(hub_piomap_t hub_piomap)	/* done with these mapping resources */
+{
+	/* Nothing to do */
+}
+
+
+/*
+ * For translations that require no mapping resources, supply a kernel virtual
+ * address that maps to the specified xtalk address range.
+ */
+/* ARGSUSED */
+caddr_t
+hub_piotrans_addr(	devfs_handle_t dev,	/* translate to this device */
+			device_desc_t dev_desc,	/* device descriptor */
+			iopaddr_t xtalk_addr,	/* Crosstalk address */
+			size_t byte_count,	/* map this many bytes */
+			unsigned flags)		/* (currently unused) */
+{
+	xwidget_info_t widget_info = xwidget_info_get(dev);
+	xwidgetnum_t widget = xwidget_info_id_get(widget_info);
+	devfs_handle_t hubv = xwidget_info_master_get(widget_info);
+	hub_piomap_t hub_piomap;
+	hubinfo_t hubinfo;
+
+	hubinfo_get(hubv, &hubinfo);
+
+	if (xtalk_addr + byte_count <= SWIN_SIZE) {
+		hub_piomap = hubinfo_swin_piomap_get(hubinfo, (int)widget);
+		return(hub_piomap_addr(hub_piomap, xtalk_addr, byte_count));
+	} else
+		return(0);
+}
+
+
+/* DMA MANAGEMENT */
+/* Mapping from crosstalk space to system physical space */
+
+/* 
+ * There's not really very much to do here, since crosstalk maps
+ * directly to system physical space.  It's quite possible that this
+ * DMA layer will be bypassed in performance kernels.
+ */
+
+
+/* ARGSUSED */
+static void
+hub_dma_init(devfs_handle_t hubv)
+{
+}
+
+
+/*
+ * Allocate resources needed to set up DMA mappings up to a specified size
+ * on a specified adapter.
+ * 
+ * We don't actually use the adapter ID for anything.  It's just the adapter
+ * that the lower level driver plans to use for DMA.
+ */
+/* ARGSUSED */
+hub_dmamap_t
+hub_dmamap_alloc(	devfs_handle_t dev,	/* set up mappings for this device */
+			device_desc_t dev_desc,	/* device descriptor */
+			size_t byte_count_max, 	/* max size of a mapping */
+			unsigned flags)		/* defined in dma.h */
+{
+	hub_dmamap_t dmamap;
+	xwidget_info_t widget_info = xwidget_info_get(dev);
+	xwidgetnum_t widget = xwidget_info_id_get(widget_info);
+	devfs_handle_t hubv = xwidget_info_master_get(widget_info);
+
+	dmamap = kern_malloc(sizeof(struct hub_dmamap_s));
+	dmamap->hdma_xtalk_info.xd_dev = dev;
+	dmamap->hdma_xtalk_info.xd_target = widget;
+	dmamap->hdma_hub = hubv;
+	dmamap->hdma_flags = HUB_DMAMAP_IS_VALID;
+ 	if (flags & XTALK_FIXED)
+		dmamap->hdma_flags |= HUB_DMAMAP_IS_FIXED;
+
+	return(dmamap);
+}
+
+/*
+ * Destroy a DMA mapping from crosstalk space to system address space.
+ * There is no actual mapping hardware to destroy, but we at least mark
+ * the dmamap INVALID and free the space that it took.
+ */
+void
+hub_dmamap_free(hub_dmamap_t hub_dmamap)
+{
+	hub_dmamap->hdma_flags &= ~HUB_DMAMAP_IS_VALID;
+	kern_free(hub_dmamap);
+}
+
+/*
+ * Establish a DMA mapping using the resources allocated in a previous dmamap_alloc.
+ * Return an appropriate crosstalk address range that maps to the specified physical 
+ * address range.
+ */
+/* ARGSUSED */
+extern iopaddr_t
+hub_dmamap_addr(	hub_dmamap_t dmamap,	/* use these mapping resources */
+			paddr_t paddr,		/* map for this address */
+			size_t byte_count)	/* map this many bytes */
+{
+	devfs_handle_t vhdl;
+
+	ASSERT(dmamap->hdma_flags & HUB_DMAMAP_IS_VALID);
+
+	if (dmamap->hdma_flags & HUB_DMAMAP_USED) {
+	    /* If the map is FIXED, re-use is OK. */
+	    if (!(dmamap->hdma_flags & HUB_DMAMAP_IS_FIXED)) {
+		vhdl = dmamap->hdma_xtalk_info.xd_dev;
+#if defined(SUPPORT_PRINTING_V_FORMAT)
+		cmn_err(CE_WARN, "%v: hub_dmamap_addr re-uses dmamap.\n",vhdl);
+#else
+		cmn_err(CE_WARN, "0x%p: hub_dmamap_addr re-uses dmamap.\n", &vhdl);
+#endif
+	    }
+	} else {
+		dmamap->hdma_flags |= HUB_DMAMAP_USED;
+	}
+
+	/* There isn't actually any DMA mapping hardware on the hub. */
+	return(paddr);
+}
+
+/*
+ * Establish a DMA mapping using the resources allocated in a previous dmamap_alloc.
+ * Return an appropriate crosstalk address list that maps to the specified physical 
+ * address list.
+ */
+/* ARGSUSED */
+alenlist_t
+hub_dmamap_list(hub_dmamap_t hub_dmamap,	/* use these mapping resources */
+		alenlist_t palenlist,		/* map this area of memory */
+		unsigned flags)
+{
+	devfs_handle_t vhdl;
+
+	ASSERT(hub_dmamap->hdma_flags & HUB_DMAMAP_IS_VALID);
+
+	if (hub_dmamap->hdma_flags & HUB_DMAMAP_USED) {
+	    /* If the map is FIXED, re-use is OK. */
+	    if (!(hub_dmamap->hdma_flags & HUB_DMAMAP_IS_FIXED)) {
+		vhdl = hub_dmamap->hdma_xtalk_info.xd_dev;
+#if defined(SUPPORT_PRINTING_V_FORMAT)
+		cmn_err(CE_WARN,"%v: hub_dmamap_list re-uses dmamap\n",vhdl);
+#else
+		cmn_err(CE_WARN,"0x%p: hub_dmamap_list re-uses dmamap\n", &vhdl);
+#endif
+	    }
+	} else {
+		hub_dmamap->hdma_flags |= HUB_DMAMAP_USED;
+	}
+
+	/* There isn't actually any DMA mapping hardware on the hub.  */
+	return(palenlist);
+}
+
+/*
+ * Driver indicates that it has completed whatever DMA it may have started
+ * after an earlier dmamap_addr or dmamap_list call.
+ */
+void
+hub_dmamap_done(hub_dmamap_t hub_dmamap)	/* done with these mapping resources */
+{
+	devfs_handle_t vhdl;
+
+	if (hub_dmamap->hdma_flags & HUB_DMAMAP_USED) {
+		hub_dmamap->hdma_flags &= ~HUB_DMAMAP_USED;
+	} else {
+	    /* If the map is FIXED, re-done is OK. */
+	    if (!(hub_dmamap->hdma_flags & HUB_DMAMAP_IS_FIXED)) {
+		vhdl = hub_dmamap->hdma_xtalk_info.xd_dev;
+#if defined(SUPPORT_PRINTING_V_FORMAT)
+		cmn_err(CE_WARN, "%v: hub_dmamap_done already done with dmamap\n",vhdl);
+#else
+		cmn_err(CE_WARN, "0x%p: hub_dmamap_done already done with dmamap\n", &vhdl);
+#endif
+	    }
+	}
+}
+
+/*
+ * Translate a single system physical address into a crosstalk address.
+ */
+/* ARGSUSED */
+iopaddr_t
+hub_dmatrans_addr(	devfs_handle_t dev,	/* translate for this device */
+			device_desc_t dev_desc,	/* device descriptor */
+			paddr_t paddr,		/* system physical address */
+			size_t byte_count,	/* length */
+			unsigned flags)		/* defined in dma.h */
+{
+	/* no translation needed */
+	return(paddr);
+}
+
+/*
+ * Translate a list of IP27 addresses and lengths into a list of crosstalk 
+ * addresses and lengths.  No actual hardware mapping takes place; the hub 
+ * has no DMA mapping registers -- crosstalk addresses map directly.
+ */
+/* ARGSUSED */
+alenlist_t
+hub_dmatrans_list(	devfs_handle_t dev,	/* translate for this device */
+			device_desc_t dev_desc,	/* device descriptor */
+			alenlist_t palenlist,	/* system address/length list */
+			unsigned flags)		/* defined in dma.h */
+{
+	/* no translation needed */
+	return(palenlist);
+}
+
+/*ARGSUSED*/
+void
+hub_dmamap_drain(	hub_dmamap_t map)
+{
+    /* XXX- flush caches, if cache coherency WAR is needed */
+}
+
+/*ARGSUSED*/
+void
+hub_dmaaddr_drain(	devfs_handle_t vhdl,
+			paddr_t addr,
+			size_t bytes)
+{
+    /* XXX- flush caches, if cache coherency WAR is needed */
+}
+
+/*ARGSUSED*/
+void
+hub_dmalist_drain(	devfs_handle_t vhdl,
+			alenlist_t list)
+{
+    /* XXX- flush caches, if cache coherency WAR is needed */
+}
+
+
+
+/* INTERRUPT MANAGEMENT */
+
+/* ARGSUSED */
+static void
+hub_intr_init(devfs_handle_t hubv)
+{
+}
+
+/*
+ * hub_device_desc_update
+ *	Update the passed in device descriptor with the actual the
+ * 	target cpu number and interrupt priority level.
+ *	NOTE : These might be the same as the ones passed in thru
+ *	the descriptor.
+ */
+static void
+hub_device_desc_update(device_desc_t 	dev_desc, 
+		       ilvl_t 		intr_swlevel,
+		       cpuid_t		cpu)
+{
+	char	cpuname[40];
+	
+	/* Store the interrupt priority level in the device descriptor */
+	device_desc_intr_swlevel_set(dev_desc, intr_swlevel);
+
+	/* Convert the cpuid to the vertex handle in the hwgraph and
+	 * save it in the device descriptor.
+	 */
+	sprintf(cpuname,"/hw/cpunum/%ld",cpu);
+	device_desc_intr_target_set(dev_desc, 
+				    hwgraph_path_to_dev(cpuname));
+}
+
+int allocate_my_bit = INTRCONNECT_ANYBIT;
+
+/*
+ * Allocate resources required for an interrupt as specified in dev_desc.
+ * Returns a hub interrupt handle on success, or 0 on failure.
+ */
+hub_intr_t
+hub_intr_alloc(	devfs_handle_t dev,		/* which crosstalk device */
+		device_desc_t dev_desc,		/* device descriptor */
+		devfs_handle_t owner_dev)		/* owner of this interrupt, if known */
+{
+	cpuid_t cpu;			/* cpu to receive interrupt */
+        int cpupicked = 0;
+	int bit;			/* interrupt vector */
+	/*REFERENCED*/
+	int intr_resflags;
+	hub_intr_t intr_hdl;
+	cnodeid_t nodeid;		/* node to receive interrupt */
+	/*REFERENCED*/
+	nasid_t nasid;			/* nasid to receive interrupt */
+	struct xtalk_intr_s *xtalk_info;
+	iopaddr_t xtalk_addr;		/* xtalk addr on hub to set intr */
+	xwidget_info_t xwidget_info;	/* standard crosstalk widget info handle */
+	char *intr_name = NULL;
+	ilvl_t intr_swlevel;
+	extern int default_intr_pri;
+#ifdef CONFIG_IA64_SGI_SN1 
+	extern void synergy_intr_alloc(int, int);
+#endif
+	
+	/*
+	 * If caller didn't explicily specify a device descriptor, see if there's
+	 * a default descriptor associated with the device.
+	 */
+	if (!dev_desc) 
+		dev_desc = device_desc_default_get(dev);
+
+	if (dev_desc) {
+		intr_name = device_desc_intr_name_get(dev_desc);
+		intr_swlevel = device_desc_intr_swlevel_get(dev_desc);
+		if (dev_desc->flags & D_INTR_ISERR) {
+			intr_resflags = II_ERRORINT;
+		} else if (!(dev_desc->flags & D_INTR_NOTHREAD)) {
+			intr_resflags = II_THREADED;
+		} else {
+			/* Neither an error nor a thread. */
+			intr_resflags = 0;
+		}
+	} else {
+		intr_swlevel = default_intr_pri;
+		intr_resflags = II_THREADED;
+	}
+
+	/* XXX - Need to determine if the interrupt should be threaded. */
+
+	/* If the cpu has not been picked already then choose a candidate 
+	 * interrupt target and reserve the interrupt bit 
+	 */
+#if defined(NEW_INTERRUPTS)
+	if (!cpupicked) {
+		cpu = intr_heuristic(dev,dev_desc,allocate_my_bit,
+				     intr_resflags,owner_dev,
+				     intr_name,&bit);
+	}
+#endif
+
+	/* At this point we SHOULD have a valid cpu */
+	if (cpu == CPU_NONE) {
+#if defined(SUPPORT_PRINTING_V_FORMAT)
+		cmn_err(CE_WARN, 
+			"%v hub_intr_alloc could not allocate interrupt\n",
+			owner_dev);
+#else
+		cmn_err(CE_WARN, 
+			"0x%p hub_intr_alloc could not allocate interrupt\n",
+			&owner_dev);
+#endif
+		return(0);
+
+	}
+
+	/* If the cpu has been picked already (due to the bridge data 
+	 * corruption bug) then try to reserve an interrupt bit .
+	 */
+#if defined(NEW_INTERRUPTS)
+	if (cpupicked) {
+		bit = intr_reserve_level(cpu, allocate_my_bit, 
+					 intr_resflags, 
+					 owner_dev, intr_name);
+		if (bit < 0) {
+#if defined(SUPPORT_PRINTING_V_FORMAT)
+			cmn_err(CE_WARN,
+				"Could not reserve an interrupt bit for cpu "
+				" %d and dev %v\n",
+				cpu,owner_dev);
+#else
+			cmn_err(CE_WARN,
+				"Could not reserve an interrupt bit for cpu "
+				" %d and dev 0x%x\n",
+				cpu, &owner_dev);
+#endif
+				
+			return(0);
+		}
+	}
+#endif	/* NEW_INTERRUPTS */
+
+	nodeid = cpuid_to_cnodeid(cpu);
+	nasid = cpuid_to_nasid(cpu);
+	xtalk_addr = HUBREG_AS_XTALKADDR(nasid, PIREG(PI_INT_PEND_MOD, cpuid_to_subnode(cpu)));
+
+	/*
+	 * Allocate an interrupt handle, and fill it in.  There are two
+	 * pieces to an interrupt handle: the piece needed by generic
+	 * xtalk code which is used by crosstalk device drivers, and
+	 * the piece needed by low-level IP27 hardware code.
+	 */
+	intr_hdl = kmem_alloc_node(sizeof(struct hub_intr_s), KM_NOSLEEP, nodeid);
+	ASSERT_ALWAYS(intr_hdl);
+
+	/* 
+	 * Fill in xtalk information for generic xtalk interfaces that
+	 * operate on xtalk_intr_hdl's.
+	 */
+	xtalk_info = &intr_hdl->i_xtalk_info;
+	xtalk_info->xi_dev = dev;
+	xtalk_info->xi_vector = bit;
+	xtalk_info->xi_addr = xtalk_addr;
+	xtalk_info->xi_flags =  (intr_resflags == II_THREADED) ?
+				0 : XTALK_INTR_NOTHREAD;
+
+	/*
+	 * Regardless of which CPU we ultimately interrupt, a given crosstalk
+	 * widget always handles interrupts (and PIO and DMA) through its 
+	 * designated "master" crosstalk provider.
+	 */
+	xwidget_info = xwidget_info_get(dev);
+	if (xwidget_info)
+		xtalk_info->xi_target = xwidget_info_masterid_get(xwidget_info);
+
+	/* Fill in low level hub information for hub_* interrupt interface */
+	intr_hdl->i_swlevel = intr_swlevel;
+	intr_hdl->i_cpuid = cpu;
+	intr_hdl->i_bit = bit;
+	intr_hdl->i_flags = HUB_INTR_IS_ALLOCED;
+
+	/* Store the actual interrupt priority level & interrupt target
+	 * cpu back in the device descriptor.
+	 */
+	hub_device_desc_update(dev_desc, intr_swlevel, cpu);
+#ifdef CONFIG_IA64_SGI_SN1
+	synergy_intr_alloc((int)bit, (int)cpu);
+#endif
+	return(intr_hdl);
+}
+
+
+/*
+ * Free resources consumed by intr_alloc.
+ */
+void
+hub_intr_free(hub_intr_t intr_hdl)
+{
+	cpuid_t cpu = intr_hdl->i_cpuid;
+	int bit = intr_hdl->i_bit;
+	xtalk_intr_t xtalk_info;
+
+	if (intr_hdl->i_flags & HUB_INTR_IS_CONNECTED) {
+		/* Setting the following fields in the xtalk interrupt info
+	 	 * clears the interrupt target register in the xtalk user
+	 	 */
+		xtalk_info = &intr_hdl->i_xtalk_info;
+		xtalk_info->xi_dev = NODEV;
+		xtalk_info->xi_vector = 0;
+		xtalk_info->xi_addr = 0;
+		hub_intr_disconnect(intr_hdl);
+	}
+
+	if (intr_hdl->i_flags & HUB_INTR_IS_ALLOCED)
+		kfree(intr_hdl);
+
+#if defined(NEW_INTERRUPTS)
+	intr_unreserve_level(cpu, bit);
+#endif
+}
+
+
+/*
+ * Associate resources allocated with a previous hub_intr_alloc call with the
+ * described handler, arg, name, etc.
+ */
+/*ARGSUSED*/
+int
+hub_intr_connect(	hub_intr_t intr_hdl,		/* xtalk intr resource handle */
+			intr_func_t intr_func,		/* xtalk intr handler */
+			void *intr_arg,			/* arg to intr handler */
+			xtalk_intr_setfunc_t setfunc,	/* func to set intr hw */
+			void *setfunc_arg,		/* arg to setfunc */
+			void *thread)			/* intr thread to use */
+{
+	int rv;
+	cpuid_t cpu = intr_hdl->i_cpuid;
+	int bit = intr_hdl->i_bit;
+#ifdef CONFIG_IA64_SGI_SN1
+	extern int synergy_intr_connect(int, int);
+#endif
+
+	ASSERT(intr_hdl->i_flags & HUB_INTR_IS_ALLOCED);
+
+#if defined(NEW_INTERRUPTS)
+	rv = intr_connect_level(cpu, bit, intr_hdl->i_swlevel, 
+					intr_func, intr_arg, NULL);
+	if (rv < 0)
+		return(rv);
+
+#endif
+	intr_hdl->i_xtalk_info.xi_setfunc = setfunc;
+	intr_hdl->i_xtalk_info.xi_sfarg = setfunc_arg;
+
+	if (setfunc) (*setfunc)((xtalk_intr_t)intr_hdl);
+
+	intr_hdl->i_flags |= HUB_INTR_IS_CONNECTED;
+#ifdef CONFIG_IA64_SGI_SN1
+	return(synergy_intr_connect((int)bit, (int)cpu));
+#endif
+}
+
+
+/*
+ * Disassociate handler with the specified interrupt.
+ */
+void
+hub_intr_disconnect(hub_intr_t intr_hdl)
+{
+	/*REFERENCED*/
+	int rv;
+	cpuid_t cpu = intr_hdl->i_cpuid;
+	int bit = intr_hdl->i_bit;
+	xtalk_intr_setfunc_t setfunc;
+
+	setfunc = intr_hdl->i_xtalk_info.xi_setfunc;
+
+	/* TBD: send disconnected interrupts somewhere harmless */
+	if (setfunc) (*setfunc)((xtalk_intr_t)intr_hdl);
+
+#if defined(NEW_INTERRUPTS)
+	rv = intr_disconnect_level(cpu, bit);
+	ASSERT(rv == 0);
+#endif
+
+	intr_hdl->i_flags &= ~HUB_INTR_IS_CONNECTED;
+}
+
+
+/*
+ * Return a hwgraph vertex that represents the CPU currently
+ * targeted by an interrupt.
+ */
+devfs_handle_t
+hub_intr_cpu_get(hub_intr_t intr_hdl)
+{
+	cpuid_t cpuid = intr_hdl->i_cpuid;
+	ASSERT(cpuid != CPU_NONE);
+
+	return(cpuid_to_vertex(cpuid));
+}
+
+
+
+/* CONFIGURATION MANAGEMENT */
+
+/*
+ * Perform initializations that allow this hub to start crosstalk support.
+ */
+void
+hub_provider_startup(devfs_handle_t hubv)
+{
+	hub_pio_init(hubv);
+	hub_dma_init(hubv);
+	hub_intr_init(hubv);
+}
+
+/*
+ * Shutdown crosstalk support from a hub.
+ */
+void
+hub_provider_shutdown(devfs_handle_t hub)
+{
+	/* TBD */
+	xtalk_provider_unregister(hub);
+}
+
+/*
+ * Check that an address is in teh real small window widget 0 space
+ * or else in the big window we're using to emulate small window 0
+ * in the kernel.
+ */
+int
+hub_check_is_widget0(void *addr)
+{
+	nasid_t nasid = NASID_GET(addr);
+
+	if (((__psunsigned_t)addr >= RAW_NODE_SWIN_BASE(nasid, 0)) &&
+	    ((__psunsigned_t)addr < RAW_NODE_SWIN_BASE(nasid, 1)))
+		return 1;
+	return 0;
+}
+
+
+/*
+ * Check that two addresses use the same widget
+ */
+int
+hub_check_window_equiv(void *addra, void *addrb)
+{
+	if (hub_check_is_widget0(addra) && hub_check_is_widget0(addrb))
+		return 1;
+
+	/* XXX - Assume this is really a small window address */
+	if (WIDGETID_GET((__psunsigned_t)addra) ==
+	    WIDGETID_GET((__psunsigned_t)addrb))
+		return 1;
+
+	return 0;
+}
+
+
+/*
+ * Determine whether two PCI addresses actually refer to the same device.
+ * This only works if both addresses are in small windows.  It's used to
+ * determine whether prom addresses refer to particular PCI devices.
+ */
+/*	
+ * XXX - This won't work as written if we ever have more than two nodes
+ * on a crossbow.  In that case, we'll need an array or partners.
+ */
+int
+hub_check_pci_equiv(void *addra, void *addrb)
+{
+	nasid_t nasida, nasidb;
+
+	/*
+	 * This is for a permanent workaround that causes us to use a
+	 * big window in place of small window 0.
+	 */
+	if (!hub_check_window_equiv(addra, addrb))
+		return 0;
+
+	/* If the offsets aren't the same, forget it. */
+	if (SWIN_WIDGETADDR((__psunsigned_t)addra) !=
+	    (SWIN_WIDGETADDR((__psunsigned_t)addrb)))
+		return 0;
+
+	/* Now, check the nasids */
+	nasida = NASID_GET(addra);
+	nasidb = NASID_GET(addrb);
+
+	ASSERT(NASID_TO_COMPACT_NODEID(nasida) != INVALID_NASID);
+	ASSERT(NASID_TO_COMPACT_NODEID(nasidb) != INVALID_NASID);
+
+	/*
+	 * Either the NASIDs must be the same or they must be crossbow
+	 * partners (on the same crossbow).
+	 */
+	return (check_nasid_equiv(nasida, nasidb));
+}
+
+/*
+ * hub_setup_prb(nasid, prbnum, credits, conveyor)
+ *
+ * 	Put a PRB into fire-and-forget mode if conveyor isn't set.  Otehrwise,
+ * 	put it into conveyor belt mode with the specified number of credits.
+ */
+void
+hub_setup_prb(nasid_t nasid, int prbnum, int credits, int conveyor)
+{
+	iprb_t prb;
+	int prb_offset;
+#ifdef IRIX
+	extern int force_fire_and_forget;
+	extern volatile int ignore_conveyor_override;
+
+	if (force_fire_and_forget && !ignore_conveyor_override)
+	    if (conveyor == HUB_PIO_CONVEYOR)
+		conveyor = HUB_PIO_FIRE_N_FORGET;
+#endif
+
+	/*
+	 * Get the current register value.
+	 */
+	prb_offset = IIO_IOPRB(prbnum);
+	prb.iprb_regval = REMOTE_HUB_L(nasid, prb_offset);
+
+	/*
+	 * Clear out some fields.
+	 */
+	prb.iprb_ovflow = 1;
+	prb.iprb_bnakctr = 0;
+	prb.iprb_anakctr = 0;
+
+	/*
+	 * Enable or disable fire-and-forget mode.
+	 */
+	prb.iprb_ff = ((conveyor == HUB_PIO_CONVEYOR) ? 0 : 1);
+
+	/*
+	 * Set the appropriate number of PIO cresits for the widget.
+	 */
+	prb.iprb_xtalkctr = credits;
+
+	/*
+	 * Store the new value to the register.
+	 */
+	REMOTE_HUB_S(nasid, prb_offset, prb.iprb_regval);
+}
+
+/*
+ * hub_set_piomode()
+ *
+ * 	Put the hub into either "PIO conveyor belt" mode or "fire-and-forget"
+ * 	mode.  To do this, we have to make absolutely sure that no PIOs
+ *	are in progress so we turn off access to all widgets for the duration
+ *	of the function.
+ * 
+ * XXX - This code should really check what kind of widget we're talking
+ * to.  Bridges can only handle three requests, but XG will do more.
+ * How many can crossbow handle to widget 0?  We're assuming 1.
+ *
+ * XXX - There is a bug in the crossbow that link reset PIOs do not
+ * return write responses.  The easiest solution to this problem is to
+ * leave widget 0 (xbow) in fire-and-forget mode at all times.  This
+ * only affects pio's to xbow registers, which should be rare.
+ */
+void
+hub_set_piomode(nasid_t nasid, int conveyor)
+{
+	hubreg_t ii_iowa;
+	int direct_connect;
+	hubii_wcr_t ii_wcr;
+	int prbnum;
+	int s, cons_lock = 0;
+
+	ASSERT(NASID_TO_COMPACT_NODEID(nasid) != INVALID_CNODEID);
+	if (nasid == get_console_nasid()) {
+		PUTBUF_LOCK(s);	
+		cons_lock = 1;
+	}
+
+	ii_iowa = REMOTE_HUB_L(nasid, IIO_OUTWIDGET_ACCESS);
+	REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, 0);
+
+	ii_wcr.wcr_reg_value = REMOTE_HUB_L(nasid, IIO_WCR);
+	direct_connect = ii_wcr.iwcr_dir_con;
+
+	if (direct_connect) {
+		/* 
+		 * Assume a bridge here.
+		 */
+		hub_setup_prb(nasid, 0, 3, conveyor);
+	} else {
+		/* 
+		 * Assume a crossbow here.
+		 */
+		hub_setup_prb(nasid, 0, 1, conveyor);
+	}
+
+	for (prbnum = HUB_WIDGET_ID_MIN; prbnum <= HUB_WIDGET_ID_MAX; prbnum++) {
+		/*
+		 * XXX - Here's where we should take the widget type into
+		 * when account assigning credits.
+		 */
+		/* Always set the PRBs in fire-and-forget mode */
+		hub_setup_prb(nasid, prbnum, 3, conveyor);
+	}
+
+#ifdef IRIX
+	/*
+	 * In direct connect mode, disable access to all widgets but 0.
+	 * Later, the prom will do this for us.
+	 */
+	if (direct_connect)
+		ii_iowa = 1;
+#endif
+
+	REMOTE_HUB_S(nasid, IIO_OUTWIDGET_ACCESS, ii_iowa);
+
+	if (cons_lock)
+	    PUTBUF_UNLOCK(s);
+}
+/* Interface to allow special drivers to set hub specific
+ * device flags.
+ * Return 0 on failure , 1 on success
+ */
+int
+hub_widget_flags_set(nasid_t		nasid,
+		     xwidgetnum_t	widget_num,
+		     hub_widget_flags_t	flags)
+{
+
+	ASSERT((flags & HUB_WIDGET_FLAGS) == flags);
+
+	if (flags & HUB_PIO_CONVEYOR) {
+		hub_setup_prb(nasid,widget_num,
+			      3,HUB_PIO_CONVEYOR); /* set the PRB in conveyor 
+						    * belt mode with 3 credits
+						    */
+	} else if (flags & HUB_PIO_FIRE_N_FORGET) {
+		hub_setup_prb(nasid,widget_num,
+			      3,HUB_PIO_FIRE_N_FORGET); /* set the PRB in fire
+							 *  and forget mode 
+							 */
+	}
+
+	return 1;
+}
+/* Interface to allow special drivers to set hub specific
+ * device flags.
+ * Return 0 on failure , 1 on success
+ */
+int
+hub_device_flags_set(devfs_handle_t	widget_vhdl,
+		     hub_widget_flags_t	flags)
+{
+	xwidget_info_t		widget_info = xwidget_info_get(widget_vhdl);
+	xwidgetnum_t		widget_num  = xwidget_info_id_get(widget_info);
+	devfs_handle_t		hub_vhdl    = xwidget_info_master_get(widget_info);
+	hubinfo_t		hub_info = 0;
+	nasid_t			nasid;
+	int			s,rv;
+
+	/* Use the nasid from the hub info hanging off the hub vertex
+	 * and widget number from the widget vertex
+	 */
+	hubinfo_get(hub_vhdl, &hub_info);
+	/* Being over cautious by grabbing a lock */
+	s 	= mutex_spinlock(&hub_info->h_bwlock);
+	nasid 	= hub_info->h_nasid;
+	rv 	= hub_widget_flags_set(nasid,widget_num,flags);
+	mutex_spinunlock(&hub_info->h_bwlock, s);
+
+	return rv;
+}
+
+#if ((defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)) && defined(BRINGUP))
+/* BRINGUP:  This ought to be useful for IP27 too but, for now,
+ * make it SN1 only because `ii_ixtt_u_t' is not in IP27/hubio.h
+ * (or anywhere else :-).
+ */
+int
+hubii_ixtt_set(devfs_handle_t widget_vhdl, ii_ixtt_u_t *ixtt)
+{
+	xwidget_info_t		widget_info = xwidget_info_get(widget_vhdl);
+	devfs_handle_t		hub_vhdl    = xwidget_info_master_get(widget_info);
+	hubinfo_t		hub_info = 0;
+	nasid_t			nasid;
+	int			s;
+
+	/* Use the nasid from the hub info hanging off the hub vertex
+	 * and widget number from the widget vertex
+	 */
+	hubinfo_get(hub_vhdl, &hub_info);
+	/* Being over cautious by grabbing a lock */
+	s 	= mutex_spinlock(&hub_info->h_bwlock);
+	nasid 	= hub_info->h_nasid;
+
+	REMOTE_HUB_S(nasid, IIO_IXTT, ixtt->ii_ixtt_regval);
+
+	mutex_spinunlock(&hub_info->h_bwlock, s);
+	return 0;
+}
+
+int
+hubii_ixtt_get(devfs_handle_t widget_vhdl, ii_ixtt_u_t *ixtt)
+{
+	xwidget_info_t		widget_info = xwidget_info_get(widget_vhdl);
+	devfs_handle_t		hub_vhdl    = xwidget_info_master_get(widget_info);
+	hubinfo_t		hub_info = 0;
+	nasid_t			nasid;
+	int			s;
+
+	/* Use the nasid from the hub info hanging off the hub vertex
+	 * and widget number from the widget vertex
+	 */
+	hubinfo_get(hub_vhdl, &hub_info);
+	/* Being over cautious by grabbing a lock */
+	s 	= mutex_spinlock(&hub_info->h_bwlock);
+	nasid 	= hub_info->h_nasid;
+
+	ixtt->ii_ixtt_regval = REMOTE_HUB_L(nasid, IIO_IXTT);
+
+	mutex_spinunlock(&hub_info->h_bwlock, s);
+	return 0;
+}
+#endif /* CONFIG_IA64_SGI_SN1 */
+
+/*
+ * hub_device_inquiry
+ *	Find out the xtalk widget related information stored in this 
+ *	hub's II.
+ */
+void
+hub_device_inquiry(devfs_handle_t	xbus_vhdl, xwidgetnum_t widget)
+{
+	devfs_handle_t	xconn, hub_vhdl;
+	char		widget_name[8];
+	hubreg_t	ii_iidem,ii_iiwa, ii_iowa;
+	hubinfo_t	hubinfo;
+	nasid_t		nasid;
+	int		d;
+
+	sprintf(widget_name, "%d", widget);
+	if (hwgraph_traverse(xbus_vhdl, widget_name, &xconn)
+	    != GRAPH_SUCCESS)
+		return;
+
+	hub_vhdl = device_master_get(xconn);
+	if (hub_vhdl == GRAPH_VERTEX_NONE)
+		return;
+
+	hubinfo_get(hub_vhdl, &hubinfo);
+	if (!hubinfo)
+		return;
+	
+	nasid = hubinfo->h_nasid;
+
+	ii_iidem	= REMOTE_HUB_L(nasid, IIO_IIDEM);
+	ii_iiwa 	= REMOTE_HUB_L(nasid, IIO_IIWA);
+	ii_iowa 	= REMOTE_HUB_L(nasid, IIO_IOWA);
+
+#if defined(SUPPORT_PRINTING_V_FORMAT)
+	cmn_err(CE_CONT, "Inquiry Info for %v\n", xconn);
+#else
+	cmn_err(CE_CONT, "Inquiry Info for 0x%p\n", &xconn);
+#endif
+
+	cmn_err(CE_CONT,"\tDevices shutdown [ ");
+
+	for (d = 0 ; d <= 7 ; d++)
+		if (!(ii_iidem & (IIO_IIDEM_WIDGETDEV_MASK(widget,d))))
+			cmn_err(CE_CONT, " %d", d);
+
+	cmn_err(CE_CONT,"]\n");
+
+	cmn_err(CE_CONT,
+		"\tInbound access ? %s\n",
+		ii_iiwa & IIO_IIWA_WIDGET(widget) ? "yes" : "no");
+
+	cmn_err(CE_CONT,
+		"\tOutbound access ? %s\n",
+		ii_iowa & IIO_IOWA_WIDGET(widget) ? "yes" : "no");
+
+}
+
+/*
+ * A pointer to this structure hangs off of every hub hwgraph vertex.
+ * The generic xtalk layer may indirect through it to get to this specific
+ * crosstalk bus provider.
+ */
+xtalk_provider_t hub_provider = {
+	(xtalk_piomap_alloc_f *)	hub_piomap_alloc,
+	(xtalk_piomap_free_f *)		hub_piomap_free,
+	(xtalk_piomap_addr_f *)		hub_piomap_addr,
+	(xtalk_piomap_done_f *)		hub_piomap_done,
+	(xtalk_piotrans_addr_f *)	hub_piotrans_addr,
+
+	(xtalk_dmamap_alloc_f *)	hub_dmamap_alloc,
+	(xtalk_dmamap_free_f *)		hub_dmamap_free,
+	(xtalk_dmamap_addr_f *)		hub_dmamap_addr,
+	(xtalk_dmamap_list_f *)		hub_dmamap_list,
+	(xtalk_dmamap_done_f *)		hub_dmamap_done,
+	(xtalk_dmatrans_addr_f *)	hub_dmatrans_addr,
+	(xtalk_dmatrans_list_f *)	hub_dmatrans_list,
+	(xtalk_dmamap_drain_f *)	hub_dmamap_drain,
+	(xtalk_dmaaddr_drain_f *)	hub_dmaaddr_drain,
+	(xtalk_dmalist_drain_f *)	hub_dmalist_drain,
+
+	(xtalk_intr_alloc_f *)		hub_intr_alloc,
+	(xtalk_intr_free_f *)		hub_intr_free,
+	(xtalk_intr_connect_f *)	hub_intr_connect,
+	(xtalk_intr_disconnect_f *)	hub_intr_disconnect,
+	(xtalk_intr_cpu_get_f *)	hub_intr_cpu_get,
+
+	(xtalk_provider_startup_f *)	hub_provider_startup,
+	(xtalk_provider_shutdown_f *)	hub_provider_shutdown,
+};
+
diff --git a/arch/ia64/sn/io/ip37.c b/arch/ia64/sn/io/ip37.c
new file mode 100644
index 000000000..38ea6993b
--- /dev/null
+++ b/arch/ia64/sn/io/ip37.c
@@ -0,0 +1,127 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+/*
+ * ip37.c
+ *	Support for IP35/IP37 machines
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+#include <asm/sn/sgi.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/sn1/hubdev.h>
+#include <asm/sn/pci/bridge.h>     /* for bridge_t */
+
+
+xwidgetnum_t
+hub_widget_id(nasid_t nasid)
+{
+	hubii_wcr_t	ii_wcr;	/* the control status register */
+		
+	ii_wcr.wcr_reg_value = REMOTE_HUB_L(nasid,IIO_WCR);
+
+	printk("hub_widget_id: Found Hub Widget ID 0x%x from Register 0x%p\n", ii_wcr.wcr_fields_s.wcr_widget_id, REMOTE_HUB_ADDR(nasid, IIO_WCR));
+
+	printk("hub_widget_id: Found Hub Widget 0x%lx wcr_reg_value 0x%lx\n", REMOTE_HUB_L(nasid,IIO_WCR), ii_wcr.wcr_reg_value);
+
+	return ii_wcr.wcr_fields_s.wcr_widget_id;
+}
+
+/*
+ * get_nasid() returns the physical node id number of the caller.
+ */
+nasid_t
+get_nasid(void)
+{
+	return (nasid_t)((LOCAL_HUB_L(LB_REV_ID) & LRI_NODEID_MASK) >> LRI_NODEID_SHFT);
+}
+
+int
+get_slice(void)
+{
+	return LOCAL_HUB_L(PI_CPU_NUM);
+}
+
+int
+is_fine_dirmode(void)
+{
+	return (((LOCAL_HUB_L(LB_REV_ID) & LRI_SYSTEM_SIZE_MASK)
+		>> LRI_SYSTEM_SIZE_SHFT) == SYSTEM_SIZE_SMALL);
+
+}
+
+hubreg_t
+get_hub_chiprev(nasid_t nasid)
+{
+
+	printk("get_hub_chiprev: Hub Chip Rev 0x%lx\n",
+		(REMOTE_HUB_L(nasid, LB_REV_ID) & LRI_REV_MASK) >> LRI_REV_SHFT);
+	return ((REMOTE_HUB_L(nasid, LB_REV_ID) & LRI_REV_MASK)
+		                                         >> LRI_REV_SHFT);
+}
+
+int
+verify_snchip_rev(void)
+{
+	int hub_chip_rev;
+	int i;
+	static int min_hub_rev = 0;
+	nasid_t nasid;
+	static int first_time = 1;
+	extern int maxnodes;
+
+        
+	if (first_time) {
+	    for (i = 0; i < maxnodes; i++) {	
+		nasid = COMPACT_TO_NASID_NODEID(i);
+		hub_chip_rev = get_hub_chiprev(nasid);
+
+		if ((hub_chip_rev < min_hub_rev) || (i == 0))
+		    min_hub_rev = hub_chip_rev;
+	    }
+
+	
+	    first_time = 0;
+	}
+
+	return min_hub_rev;
+	
+}
+
+#ifdef SN1_USE_POISON_BITS
+int
+hub_bte_poison_ok(void)
+{
+	/*
+	 * For now, assume poisoning is ok. If it turns out there are chip
+	 * bugs that prevent its use in early revs, there is some neat code
+	 * to steal from the IP27 equivalent of this code.
+	 */
+
+#ifdef BRINGUP	/* temp disable BTE poisoning - might be sw bugs in this area */
+	return 0;
+#else
+	return 1;
+#endif
+}
+#endif /* SN1_USE_POISON_BITS */
+                
+
+void
+ni_reset_port(void)
+{
+	LOCAL_HUB_S(NI_RESET_ENABLE, NRE_RESETOK);
+	LOCAL_HUB_S(NI_PORT_RESET, NPR_PORTRESET | NPR_LOCALRESET);
+}
+
+#endif	/* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
diff --git a/arch/ia64/sn/io/klconflib.c b/arch/ia64/sn/io/klconflib.c
new file mode 100644
index 000000000..6fd745a0b
--- /dev/null
+++ b/arch/ia64/sn/io/klconflib.c
@@ -0,0 +1,1334 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/ctype.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+
+#include <asm/sn/agent.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/module.h>
+#include <asm/sn/router.h>
+#include <asm/sn/xtalk/xbow.h>
+
+#define printf printk
+int hasmetarouter;
+
+#define LDEBUG 0
+#define NIC_UNKNOWN ((nic_t) -1)
+
+#undef DEBUG_KLGRAPH
+#ifdef DEBUG_KLGRAPH
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif /* DEBUG_KLGRAPH */
+
+static void sort_nic_names(lboard_t *) ;
+
+lboard_t *
+find_lboard(lboard_t *start, unsigned char brd_type)
+{
+	/* Search all boards stored on this node. */
+	while (start) {
+		if (start->brd_type == brd_type)
+			return start;
+		start = KLCF_NEXT(start);
+	}
+
+	/* Didn't find it. */
+	return (lboard_t *)NULL;
+}
+
+lboard_t *
+find_lboard_class(lboard_t *start, unsigned char brd_type)
+{
+	/* Search all boards stored on this node. */
+	while (start) {
+		if (KLCLASS(start->brd_type) == KLCLASS(brd_type))
+			return start;
+		start = KLCF_NEXT(start);
+	}
+
+	/* Didn't find it. */
+	return (lboard_t *)NULL;
+}
+
+klinfo_t *
+find_component(lboard_t *brd, klinfo_t *kli, unsigned char struct_type)
+{
+	int index, j;
+
+	if (kli == (klinfo_t *)NULL) {
+		index = 0;
+	} else {
+		for (j = 0; j < KLCF_NUM_COMPS(brd); j++) {
+			if (kli == KLCF_COMP(brd, j))
+				break;
+		}
+		index = j;
+		if (index == KLCF_NUM_COMPS(brd)) {
+			printf("find_component: Bad pointer: 0x%p\n", kli);
+			return (klinfo_t *)NULL;
+		}
+		index++;	/* next component */
+	}
+	
+	for (; index < KLCF_NUM_COMPS(brd); index++) {		
+		kli = KLCF_COMP(brd, index);
+		DBG("find_component: brd %p kli %p  request type = 0x%x kli type 0x%x\n", brd, kli, kli->struct_type, KLCF_COMP_TYPE(kli));
+		if (KLCF_COMP_TYPE(kli) == struct_type)
+			return kli;
+	}
+
+	/* Didn't find it. */
+	return (klinfo_t *)NULL;
+}
+
+klinfo_t *
+find_first_component(lboard_t *brd, unsigned char struct_type)
+{
+	return find_component(brd, (klinfo_t *)NULL, struct_type);
+}
+
+lboard_t *
+find_lboard_modslot(lboard_t *start, moduleid_t mod, slotid_t slot)
+{
+	/* Search all boards stored on this node. */
+	while (start) {
+		if (MODULE_MATCH(start->brd_module, mod) &&
+		    (start->brd_slot == slot))
+			return start;
+		start = KLCF_NEXT(start);
+	}
+
+	/* Didn't find it. */
+	return (lboard_t *)NULL;
+}
+
+lboard_t *
+find_lboard_module(lboard_t *start, moduleid_t mod)
+{
+        /* Search all boards stored on this node. */
+        while (start) {
+                if (MODULE_MATCH(start->brd_module, mod))
+                        return start;
+                start = KLCF_NEXT(start);
+        }
+
+        /* Didn't find it. */
+        return (lboard_t *)NULL;
+}
+
+lboard_t *
+find_lboard_module_class(lboard_t *start, moduleid_t mod,
+                                                unsigned char brd_type)
+{
+	while (start) {
+
+		DBG("find_lboard_module_class: lboard 0x%p, start->brd_module 0x%x, mod 0x%x, start->brd_type 0x%x, brd_type 0x%x\n", start, start->brd_module, mod, start->brd_type, brd_type);
+
+		if (MODULE_MATCH(start->brd_module, mod) &&
+			(KLCLASS(start->brd_type) == KLCLASS(brd_type)))
+			return start;
+		start = KLCF_NEXT(start);
+	}
+
+	/* Didn't find it. */
+	return (lboard_t *)NULL;
+}
+
+#ifndef CONFIG_IA64_SGI_IO
+#define tolower(c)	(isupper(c) ? (c) - 'A' + 'a' : (c))
+#define toupper(c)	(islower(c) ? (c) - 'a' + 'A' : (c))
+#endif
+
+
+/*
+ * Convert a NIC name to a name for use in the hardware graph.
+ */
+void
+nic_name_convert(char *old_name, char *new_name)
+{
+        int i;
+        char c;
+        char *compare_ptr;
+
+	if ((old_name[0] == '\0') || (old_name[1] == '\0')) {
+                strcpy(new_name, EDGE_LBL_XWIDGET);
+        } else {
+                for (i = 0; i < strlen(old_name); i++) {
+                        c = old_name[i];
+
+                        if (isalpha(c))
+                                new_name[i] = tolower(c);
+                        else if (isdigit(c))
+                                new_name[i] = c;
+                        else
+                                new_name[i] = '_';
+                }
+                new_name[i] = '\0';
+        }
+
+        /* XXX -
+         * Since a bunch of boards made it out with weird names like
+         * IO6-fibbbed and IO6P2, we need to look for IO6 in a name and
+         * replace it with "baseio" to avoid confusion in the field.
+	 * We also have to make sure we don't report media_io instead of
+	 * baseio.
+         */
+
+        /* Skip underscores at the beginning of the name */
+        for (compare_ptr = new_name; (*compare_ptr) == '_'; compare_ptr++)
+                ;
+
+	/*
+	 * Check for some names we need to replace.  Early boards
+	 * had junk following the name so check only the first
+	 * characters.
+	 */
+        if (!strncmp(new_name, "io6", 3) || 
+            !strncmp(new_name, "mio", 3) || 
+	    !strncmp(new_name, "media_io", 8))
+		strcpy(new_name, "baseio");
+#if !defined(CONFIG_SGI_IP35) && !defined(CONFIG_IA64_SGI_SN1) && !defined(CONFIG_IA64_GENERIC)
+	else if (!strncmp(new_name, "ip29", 4))
+		strcpy(new_name,SN00_MOTHERBOARD);
+#endif
+	else if (!strncmp(new_name, "divo", 4))
+		strcpy(new_name, "divo") ;
+
+}
+
+/* Check if the given board corresponds to the global 
+ * master io6
+ */
+int
+is_master_baseio(nasid_t nasid,moduleid_t module,slotid_t slot)
+{
+	lboard_t	*board;
+
+#if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
+/* BRINGUP: If this works then look for callers of is_master_baseio()
+ * (e.g. iograph.c) and let them pass in a slot if they want
+ */
+	board = find_lboard_module((lboard_t *)KL_CONFIG_INFO(nasid), module);
+#else
+	board = find_lboard_modslot((lboard_t *)KL_CONFIG_INFO(nasid),
+				    module, slot);
+#endif
+
+#ifndef _STANDALONE
+	{
+		cnodeid_t cnode = NASID_TO_COMPACT_NODEID(nasid);
+
+		if (!board && (NODEPDA(cnode)->xbow_peer != INVALID_NASID))
+#if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
+			board = find_lboard_module((lboard_t *)
+				    KL_CONFIG_INFO(NODEPDA(cnode)->xbow_peer),
+				    module);
+#else
+			board = find_lboard_modslot((lboard_t *)
+				    KL_CONFIG_INFO(NODEPDA(cnode)->xbow_peer),
+				    module, slot);
+#endif
+	}
+#endif
+	if (!board)
+		return(0);
+	return(board->brd_flags & GLOBAL_MASTER_IO6);
+}
+/*
+ * Find the lboard structure and get the board name.
+ * If we can't find the structure or it's too low a revision,
+ * use default name.
+ */
+lboard_t *
+get_board_name(nasid_t nasid, moduleid_t mod, slotid_t slot, char *name)
+{
+	lboard_t *brd;
+
+	brd = find_lboard_modslot((lboard_t *)KL_CONFIG_INFO(nasid),
+				  mod, slot);
+
+#ifndef _STANDALONE
+	{
+		cnodeid_t cnode = NASID_TO_COMPACT_NODEID(nasid);
+
+		if (!brd && (NODEPDA(cnode)->xbow_peer != INVALID_NASID))
+			brd = find_lboard_modslot((lboard_t *)
+				KL_CONFIG_INFO(NODEPDA(cnode)->xbow_peer),
+				mod, slot);
+	}
+#endif
+
+	if (!brd || (brd->brd_sversion < 2)) {
+		strcpy(name, EDGE_LBL_XWIDGET);
+	} else {
+		nic_name_convert(brd->brd_name, name);
+	}
+
+	/*
+ 	 * PV # 540860
+	 * If the name is not 'baseio' or SN00 MOTHERBOARD
+	 * get the lowest of all the names in the nic string.
+	 * This is needed for boards like divo, which can have
+	 * a bunch of daughter cards, but would like to be called
+	 * divo. We could do this for baseio and SN00 MOTHERBOARD
+ 	 * but it has some special case names that we would not
+ 	 * like to disturb at this point.
+	 */
+
+	/* gfx boards don't need any of this name scrambling */
+	if (brd && (KLCLASS(brd->brd_type) == KLCLASS_GFX)) {
+		return(brd);
+	}
+
+	if (!(!strcmp(name, "baseio") )) {
+		if (brd) {
+			sort_nic_names(brd) ;
+			/* Convert to small case, '-' to '_' etc */
+			nic_name_convert(brd->brd_name, name) ;
+		}
+	}
+
+	return(brd);
+}
+
+int
+get_cpu_slice(cpuid_t cpu)
+{
+	klcpu_t *acpu;
+	if ((acpu = get_cpuinfo(cpu)) == NULL)
+	    return -1;
+	return acpu->cpu_info.physid;
+}
+
+
+/*
+ * get_actual_nasid
+ *
+ *	Completely disabled brds have their klconfig on 
+ *	some other nasid as they have no memory. But their
+ *	actual nasid is hidden in the klconfig. Use this
+ *	routine to get it. Works for normal boards too.
+ */
+nasid_t
+get_actual_nasid(lboard_t *brd)
+{
+	klhub_t	*hub ;
+
+	if (!brd)
+		return INVALID_NASID ;
+
+	/* find out if we are a completely disabled brd. */
+
+        hub  = (klhub_t *)find_first_component(brd, KLSTRUCT_HUB);
+	if (!hub)
+                return INVALID_NASID ;
+	if (!(hub->hub_info.flags & KLINFO_ENABLE))	/* disabled node brd */
+		return hub->hub_info.physid ;
+	else
+		return brd->brd_nasid ;
+}
+
+int
+xbow_port_io_enabled(nasid_t nasid, int link)
+{
+	lboard_t *brd;
+	klxbow_t *xbow_p;
+
+	/*
+	 * look for boards that might contain an xbow or xbridge
+	 */
+#if SN0
+	brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_MIDPLANE8);
+#else
+	brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_PBRICK_XBOW);
+#endif
+	if (brd == NULL) return 0;
+		
+	if ((xbow_p = (klxbow_t *)find_component(brd, NULL, KLSTRUCT_XBOW))
+	    == NULL)
+	    return 0;
+
+	if (!XBOW_PORT_TYPE_IO(xbow_p, link) || !XBOW_PORT_IS_ENABLED(xbow_p, link))
+	    return 0;
+
+	printf("xbow_port_io_enabled:  brd 0x%p xbow_p 0x%p \n", brd, xbow_p);
+
+	return 1;
+}
+
+void
+board_to_path(lboard_t *brd, char *path)
+{
+	moduleid_t modnum;
+	char *board_name;
+#if !defined(CONFIG_SGI_IP35) && !defined(CONFIG_IA64_SGI_SN1) && !defined(CONFIG_IA64_GENERIC)
+	slotid_t slot;
+	char slot_name[SLOTNUM_MAXLENGTH];
+#endif
+
+	ASSERT(brd);
+
+	switch (KLCLASS(brd->brd_type)) {
+
+		case KLCLASS_NODE:
+			board_name = EDGE_LBL_NODE;
+			break;
+		case KLCLASS_ROUTER:
+			if (brd->brd_type == KLTYPE_META_ROUTER) {
+				board_name = EDGE_LBL_META_ROUTER;
+				hasmetarouter++;
+			} else
+				board_name = EDGE_LBL_ROUTER;
+			break;
+		case KLCLASS_MIDPLANE:
+			board_name = EDGE_LBL_MIDPLANE;
+			break;
+		case KLCLASS_IO:
+			board_name = EDGE_LBL_IO;
+			break;
+		case KLCLASS_IOBRICK:
+			if (brd->brd_type == KLTYPE_PBRICK)
+				board_name = EDGE_LBL_PBRICK;
+			else if (brd->brd_type == KLTYPE_IBRICK)
+				board_name = EDGE_LBL_IBRICK;
+			else if (brd->brd_type == KLTYPE_XBRICK)
+				board_name = EDGE_LBL_XBRICK;
+			else
+				board_name = EDGE_LBL_IOBRICK;
+			break;
+		default:
+			board_name = EDGE_LBL_UNKNOWN;
+	}
+			
+	modnum = brd->brd_module;
+
+#if defined(SN0)
+	slot = brd->brd_slot;
+	get_slotname(slot, slot_name);
+
+	ASSERT(modnum >= 0);
+
+	sprintf(path, "%H/" EDGE_LBL_SLOT "/%s/%s", 
+		modnum, slot_name, board_name);
+#else
+	ASSERT(modnum != MODULE_UNKNOWN && modnum != INVALID_MODULE);
+#ifdef BRINGUP /* fix IP35 hwgraph */
+	sprintf(path, EDGE_LBL_MODULE "/%x/%s", modnum, board_name);
+#else
+	sprintf(path, "%H/%s", modnum, board_name);
+#endif
+#endif
+}
+
+/*
+ * Get the module number for a NASID.
+ */
+moduleid_t
+get_module_id(nasid_t nasid)
+{
+	lboard_t *brd;
+
+	brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27);
+
+	if (!brd)
+		return INVALID_MODULE;
+	else
+		return brd->brd_module;
+}
+
+
+#ifndef CONFIG_IA64_SGI_IO
+#if 1
+/*
+ *  find_gfxpipe(#)
+ *
+ *  XXXmacko
+ *  This is only used by graphics drivers, and should be moved
+ *  over to gfx/kern/graphics/SN0 as soon as it's convenient.
+ */
+static klgfx_t *graphics_pipe_list = NULL;
+static devfs_handle_t hwgraph_all_gfxids = GRAPH_VERTEX_NONE;
+
+void
+setup_gfxpipe_link(devfs_handle_t vhdl,int pipenum)
+{
+	char idbuf[8];
+	extern graph_hdl_t hwgraph;
+
+	graph_info_add_LBL(hwgraph, vhdl, INFO_LBL_GFXID, INFO_DESC_EXPORT, 
+		(arbitrary_info_t)pipenum);
+	if (hwgraph_all_gfxids == GRAPH_VERTEX_NONE)
+		hwgraph_path_add(hwgraph_root, EDGE_LBL_GFX, &hwgraph_all_gfxids);
+	sprintf(idbuf, "%d", pipenum);
+	hwgraph_edge_add(hwgraph_all_gfxids, vhdl, idbuf);
+
+}
+#endif
+
+/* 
+ * find the pipenum'th logical graphics pipe (KLCLASS_GFX)
+ */
+lboard_t *
+find_gfxpipe(int pipenum)
+{
+        gda_t           *gdap;
+        cnodeid_t       cnode;
+        nasid_t         nasid;
+        lboard_t        *lb;
+	klgfx_t		*kg,**pkg;
+	int		i;
+
+        gdap = (gda_t *)GDA_ADDR(get_nasid());
+        if (gdap->g_magic != GDA_MAGIC)
+        	return NULL;
+
+	if (!graphics_pipe_list) {
+		/* for all nodes */
+        	for (cnode = 0; cnode < MAX_COMPACT_NODES; cnode ++) {
+                	nasid = gdap->g_nasidtable[cnode];
+                	if (nasid == INVALID_NASID)
+                        	continue;
+			lb = KL_CONFIG_INFO(nasid) ;
+			while (lb = find_lboard_class(lb, KLCLASS_GFX)) {
+				moduleid_t kgm, pkgm;
+				int	kgs, pkgs;
+
+#if defined(DEBUG) && (defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1 || defined(CONFIG_IA64_GENERIC))) && defined(BRINGUP)
+				printf("find_gfxpipe(): PIPE: %s mod %M slot %d\n",lb?lb->brd_name:"!LBRD",
+					lb->brd_module,lb->brd_slot);
+#endif
+				/* insert lb into list */
+				if (!(kg = (klgfx_t*)find_first_component(lb,KLSTRUCT_GFX))) {
+					lb = KLCF_NEXT(lb);
+					continue;
+				}
+				/* set moduleslot now that we have brd_module set */
+				kg->moduleslot = (lb->brd_module << 8) | SLOTNUM_GETSLOT(lb->brd_slot);
+				/* make sure board has device flag set */
+				kg->gfx_info.flags |= KLINFO_DEVICE;
+				if (kg->cookie < KLGFX_COOKIE) {
+				    kg->gfx_next_pipe = NULL;
+				    kg->cookie = KLGFX_COOKIE;
+				}
+
+				kgm = kg->moduleslot>>8;
+				kgs = kg->moduleslot&0xff;
+				pkg = &graphics_pipe_list;
+				while (*pkg) {
+					pkgm = (*pkg)->moduleslot>>8;
+					pkgs = (*pkg)->moduleslot&0xff;
+
+					if (!(MODULE_CMP(kgm, pkgm) > 0 ||
+					      (MODULE_CMP(kgm, pkgm) == 0 &&
+					       kgs > pkgs)))
+					    break;
+
+					pkg = &(*pkg)->gfx_next_pipe;
+				}
+				kg->gfx_next_pipe = *pkg;
+				*pkg = kg;
+				lb = KLCF_NEXT(lb);
+			}
+		}
+#ifdef FIND_GFXPIPE_DEBUG
+		i = 0;
+		kg = graphics_pipe_list;
+		while (kg) {
+			lboard_t *lb;
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+			lb = find_lboard_class(KL_CONFIG_INFO(kg->gfx_info.nasid), KLCLASS_GFX);
+#else
+#error Need to figure out how to find graphics boards ...
+#endif
+#if defined(SUPPORT_PRINTING_M_FORMAT)
+			printf("find_gfxpipe(): %s pipe %d mod %M slot %d\n",lb?lb->brd_name:"!LBRD",i,
+				(kg->moduleslot>>8),(kg->moduleslot&0xff));
+#else
+			printf("find_gfxpipe(): %s pipe %d mod 0x%x slot %d\n",lb?lb->brd_name:"!LBRD",i,
+				(kg->moduleslot>>8),(kg->moduleslot&0xff));
+#endif
+			kg = kg->gfx_next_pipe;
+			i++;
+		}
+#endif
+        }
+
+	i = 0;
+	kg = graphics_pipe_list;
+	while (kg && (i < pipenum)) {
+		kg = kg->gfx_next_pipe;
+		i++;
+		}
+
+	if (!kg) return NULL;
+
+#if defined(SN0)
+	return find_lboard_modslot(KL_CONFIG_INFO(kg->gfx_info.nasid),
+				(kg->moduleslot>>8),
+				SLOTNUM_XTALK_CLASS|(kg->moduleslot&0xff));
+#elif defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+	return find_lboard_class(KL_CONFIG_INFO(kg->gfx_info.nasid), KLCLASS_GFX);
+#else
+#error Need to figure out how to find graphics boards ...
+#endif
+}
+#endif
+
+
+#define MHZ	1000000
+
+#ifndef CONFIG_IA64_SGI_IO
+uint
+cpu_cycles_adjust(uint orig_cycles)
+{
+	klcpu_t *acpu;
+	uint speed;
+
+	acpu  = nasid_slice_to_cpuinfo(get_nasid(), get_slice());
+
+	if (acpu == NULL) return orig_cycles;
+
+	/*
+	 * cpu cycles seem to be half of the real value, hack and mult by 2
+	 * for now.
+	 */
+	speed = (orig_cycles * 2) / MHZ;
+
+	/*
+	 * if the cpu thinks its running at some random speed nowhere close 
+	 * the programmed speed, do nothing.
+	 */
+	if ((speed < (acpu->cpu_speed - 2)) || (speed > (acpu->cpu_speed + 2)))
+	    return orig_cycles;
+	return (acpu->cpu_speed * MHZ/2);
+}
+#endif /* CONFIG_IA64_SGI_IO */
+
+/* Get the canonical hardware graph name for the given pci component
+ * on the given io board.
+ */
+void
+device_component_canonical_name_get(lboard_t 	*brd,
+				    klinfo_t 	*component,
+				    char 	*name)
+{
+	moduleid_t 	modnum;
+	slotid_t 	slot;
+	char 		board_name[20];
+#ifdef SN0
+	char 		slot_name[SLOTNUM_MAXLENGTH];
+#endif
+
+	ASSERT(brd);
+
+	/* Get the module number of this board */
+	modnum = brd->brd_module;
+
+	/* Convert the [ CLASS | TYPE ] kind of slotid
+	 * into a string 
+	 */
+	slot = brd->brd_slot;
+#ifdef SN0
+	get_slotname(slot, slot_name);
+
+	ASSERT(modnum >= 0);
+#else
+	ASSERT(modnum != MODULE_UNKNOWN && modnum != INVALID_MODULE);
+#endif
+
+	/* Get the io board name  */
+	if (!brd || (brd->brd_sversion < 2)) {
+		strcpy(name, EDGE_LBL_XWIDGET);
+	} else {
+		nic_name_convert(brd->brd_name, board_name);
+	}
+
+	/* Give out the canonical  name of the pci device*/
+#ifdef SN0
+	sprintf(name, 
+		"/hw/"EDGE_LBL_MODULE "/%M/"EDGE_LBL_SLOT"/%s/%s/"
+		EDGE_LBL_PCI"/%d", 
+		modnum, slot_name, board_name,KLCF_BRIDGE_W_ID(component));
+#elif defined (CONFIG_SGI_IP35)  || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+	sprintf(name, 
+		"/dev/hw/"EDGE_LBL_MODULE "/%x/"EDGE_LBL_SLOT"/%s/"
+		EDGE_LBL_PCI"/%d", 
+		modnum, board_name,KLCF_BRIDGE_W_ID(component));
+#endif
+	
+}
+
+/*
+ * Get the serial number of the main  component of a board
+ * Returns 0 if a valid serial number is found
+ * 1 otherwise.
+ * Assumptions: Nic manufacturing string  has the following format
+ *			*Serial:<serial_number>;*
+ */
+static int
+component_serial_number_get(lboard_t 		*board,
+			    klconf_off_t 	mfg_nic_offset,
+			    char		*serial_number,
+			    char		*key_pattern)
+{
+
+	char	*mfg_nic_string;
+	char	*serial_string,*str;
+	int	i;
+	char	*serial_pattern = "Serial:";
+
+	/* We have an error on a null mfg nic offset */
+	if (!mfg_nic_offset)
+		return(1);
+	/* Get the hub's manufacturing nic information
+	 * which is in the form of a pre-formatted string
+	 */
+	mfg_nic_string = 
+		(char *)NODE_OFFSET_TO_K0(NASID_GET(board),
+					  mfg_nic_offset);
+	/* There is no manufacturing nic info */
+	if (!mfg_nic_string)
+		return(1);
+
+	str = mfg_nic_string;
+	/* Look for the key pattern first (if it is  specified)
+	 * and then print the serial number corresponding to that.
+	 */
+	if (strcmp(key_pattern,"") && 
+	    !(str = strstr(mfg_nic_string,key_pattern)))
+		return(1);
+
+	/* There is no serial number info in the manufacturing
+	 * nic info
+	 */
+	if (!(serial_string = strstr(str,serial_pattern)))
+		return(1);
+
+	serial_string = serial_string + strlen(serial_pattern);
+	/*  Copy the serial number information from the klconfig */
+	i = 0;
+	while (serial_string[i] != ';') {
+		serial_number[i] = serial_string[i];
+		i++;
+	}
+	serial_number[i] = 0;
+	
+	return(0);
+}
+/*
+ * Get the serial number of a board
+ * Returns 0 if a valid serial number is found
+ * 1 otherwise.
+ */
+
+int
+board_serial_number_get(lboard_t *board,char *serial_number)
+{
+	ASSERT(board && serial_number);
+	if (!board || !serial_number)
+		return(1);
+
+	strcpy(serial_number,"");
+	switch(KLCLASS(board->brd_type)) {
+	case KLCLASS_CPU: {	/* Node board */
+		klhub_t	*hub;
+		
+		/* Get the hub component information */
+		hub = (klhub_t *)find_first_component(board,
+						      KLSTRUCT_HUB);
+		/* If we don't have a hub component on an IP27
+		 * then we have a weird klconfig.
+		 */
+		if (!hub)
+			return(1);
+		/* Get the serial number information from
+		 * the hub's manufacturing nic info
+		 */
+		if (component_serial_number_get(board,
+						hub->hub_mfg_nic,
+						serial_number,
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+						"IP35"))
+#else
+						"IP27"))
+			/* Try with IP31 key if IP27 key fails */
+			if (component_serial_number_get(board,
+							hub->hub_mfg_nic,
+							serial_number,
+							"IP31"))
+#endif /* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
+				return(1);
+		break;
+	}
+	case KLCLASS_IO: {	/* IO board */
+		if (KLTYPE(board->brd_type) == KLTYPE_TPU) {
+		/* Special case for TPU boards */
+			kltpu_t *tpu;	
+		
+			/* Get the tpu component information */
+			tpu = (kltpu_t *)find_first_component(board,
+						      KLSTRUCT_TPU);
+			/* If we don't have a tpu component on a tpu board
+			 * then we have a weird klconfig.
+			 */
+			if (!tpu)
+				return(1);
+			/* Get the serial number information from
+			 * the tpu's manufacturing nic info
+			 */
+			if (component_serial_number_get(board,
+						tpu->tpu_mfg_nic,
+						serial_number,
+						""))
+				return(1);
+			break;
+		} else  if ((KLTYPE(board->brd_type) == KLTYPE_GSN_A) ||
+		            (KLTYPE(board->brd_type) == KLTYPE_GSN_B)) {
+		/* Special case for GSN boards */
+			klgsn_t *gsn;	
+		
+			/* Get the gsn component information */
+			gsn = (klgsn_t *)find_first_component(board,
+			      ((KLTYPE(board->brd_type) == KLTYPE_GSN_A) ?
+					KLSTRUCT_GSN_A : KLSTRUCT_GSN_B));
+			/* If we don't have a gsn component on a gsn board
+			 * then we have a weird klconfig.
+			 */
+			if (!gsn)
+				return(1);
+			/* Get the serial number information from
+			 * the gsn's manufacturing nic info
+			 */
+			if (component_serial_number_get(board,
+						gsn->gsn_mfg_nic,
+						serial_number,
+						""))
+				return(1);
+			break;
+		} else {
+		     	klbri_t	*bridge;
+		
+			/* Get the bridge component information */
+			bridge = (klbri_t *)find_first_component(board,
+							 KLSTRUCT_BRI);
+			/* If we don't have a bridge component on an IO board
+			 * then we have a weird klconfig.
+			 */
+			if (!bridge)
+				return(1);
+			/* Get the serial number information from
+		 	 * the bridge's manufacturing nic info
+			 */
+			if (component_serial_number_get(board,
+						bridge->bri_mfg_nic,
+						serial_number,
+						""))
+				return(1);
+			break;
+		}
+	}
+	case KLCLASS_ROUTER: {	/* Router board */
+		klrou_t *router;	
+		
+		/* Get the router component information */
+		router = (klrou_t *)find_first_component(board,
+							 KLSTRUCT_ROU);
+		/* If we don't have a router component on a router board
+		 * then we have a weird klconfig.
+		 */
+		if (!router)
+			return(1);
+		/* Get the serial number information from
+		 * the router's manufacturing nic info
+		 */
+		if (component_serial_number_get(board,
+						router->rou_mfg_nic,
+						serial_number,
+						""))
+			return(1);
+		break;
+	}
+	case KLCLASS_GFX: {	/* Gfx board */
+		klgfx_t *graphics;
+		
+		/* Get the graphics component information */
+		graphics = (klgfx_t *)find_first_component(board, KLSTRUCT_GFX);
+		/* If we don't have a gfx component on a gfx board
+		 * then we have a weird klconfig.
+		 */
+		if (!graphics)
+			return(1);
+		/* Get the serial number information from
+		 * the graphics's manufacturing nic info
+		 */
+		if (component_serial_number_get(board,
+						graphics->gfx_mfg_nic,
+						serial_number,
+						""))
+			return(1);
+		break;
+	}
+	default:
+		strcpy(serial_number,"");
+		break;
+	}
+	return(0);
+}
+
+#include "asm/sn/sn_private.h"
+#ifndef CONFIG_IA64_SGI_IO
+/*
+ * Given a physical address get the name of memory dimm bank
+ * in a hwgraph name format.
+ */
+void
+membank_pathname_get(paddr_t paddr,char *name)
+{
+	cnodeid_t	cnode;
+	char		slotname[SLOTNUM_MAXLENGTH];
+
+	cnode = paddr_cnode(paddr);
+	/* Make sure that we have a valid name buffer */
+	if (!name)
+		return;
+
+	name[0] = 0;
+	/* Make sure that the cnode is valid */
+	if ((cnode == CNODEID_NONE) || (cnode > numnodes))
+		return;
+	/* Given a slotid(class:type) get the slotname */
+#if defined (SN0)
+	get_slotname(NODE_SLOTID(cnode),slotname);
+	sprintf(name,
+		"/hw/"EDGE_LBL_MODULE"/%M/"EDGE_LBL_SLOT"/%s/"EDGE_LBL_NODE
+		"/"EDGE_LBL_MEMORY"/dimm_bank/%d",
+		NODE_MODULEID(cnode),slotname,paddr_dimm(paddr));
+#elif defined (CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+	sprintf(name,
+		"/dev/hw/"EDGE_LBL_MODULE"/%M/"EDGE_LBL_NODE
+		"/"EDGE_LBL_MEMORY"/dimm_bank/%d",
+		NODE_MODULEID(cnode),paddr_dimm(paddr));
+#endif
+}
+
+
+
+int
+membank_check_mixed_hidensity(nasid_t nasid)
+{
+	lboard_t *brd;
+	klmembnk_t *mem;
+	int min_size = 1024, max_size = 0;
+	int bank, mem_size;
+
+	brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27);
+	ASSERT(brd);
+
+	mem = (klmembnk_t *)find_first_component(brd, KLSTRUCT_MEMBNK);
+	ASSERT(mem);
+
+
+	for (mem_size = 0, bank = 0; bank < MD_MEM_BANKS; bank++) {
+		mem_size = KLCONFIG_MEMBNK_SIZE(mem, bank);
+		if (mem_size < min_size)
+		    min_size = mem_size;
+		if (mem_size > max_size)
+		    max_size = mem_size;
+	}
+	
+	if ((max_size == 512) && (max_size != min_size))
+	    return 1;
+
+	return 0;
+}
+
+
+int
+mem_mixed_hidensity_banks(void)
+{
+	cnodeid_t cnode;
+	nasid_t nasid;
+
+	for (cnode = 0; cnode < maxnodes; cnode++) {
+		nasid = COMPACT_TO_NASID_NODEID(cnode);
+		if (nasid == INVALID_NASID)
+		    continue;
+		if (membank_check_mixed_hidensity(nasid))
+		    return 1;
+	}
+	return 0;
+
+}
+#endif /* CONFIG_IA64_SGI_IO */
+
+xwidgetnum_t
+nodevertex_widgetnum_get(devfs_handle_t node_vtx)
+{
+	hubinfo_t hubinfo_p;
+
+	hwgraph_info_get_LBL(node_vtx, INFO_LBL_NODE_INFO, 
+			     (arbitrary_info_t *) &hubinfo_p);
+	return(hubinfo_p->h_widgetid);
+}
+
+devfs_handle_t
+nodevertex_xbow_peer_get(devfs_handle_t node_vtx)
+{
+	hubinfo_t hubinfo_p;
+	nasid_t xbow_peer_nasid;
+	cnodeid_t xbow_peer;
+
+	hwgraph_info_get_LBL(node_vtx, INFO_LBL_NODE_INFO,
+				     (arbitrary_info_t *) &hubinfo_p);
+	xbow_peer_nasid = hubinfo_p->h_nodepda->xbow_peer;
+	if(xbow_peer_nasid == INVALID_NASID) 
+			return ( (devfs_handle_t)-1);
+	xbow_peer = NASID_TO_COMPACT_NODEID(xbow_peer_nasid);
+	return(NODEPDA(xbow_peer)->node_vertex);
+}
+
+/* NIC Sorting Support */
+
+#define MAX_NICS_PER_STRING 	32
+#define MAX_NIC_NAME_LEN	32
+
+static char *
+get_nic_string(lboard_t *lb)
+{
+        int         	i;
+        klinfo_t    	*k = NULL ;
+    	klconf_off_t    mfg_off = 0 ;
+    	char            *mfg_nic = NULL ;
+
+        for (i = 0; i < KLCF_NUM_COMPS(lb); i++) {
+                k = KLCF_COMP(lb, i) ;
+                switch(k->struct_type) {
+                        case KLSTRUCT_BRI:
+            			mfg_off = ((klbri_t *)k)->bri_mfg_nic ;
+				break ;
+
+                        case KLSTRUCT_HUB:
+            			mfg_off = ((klhub_t *)k)->hub_mfg_nic ;
+				break ;
+
+                        case KLSTRUCT_ROU:
+            			mfg_off = ((klrou_t *)k)->rou_mfg_nic ;
+				break ;
+
+                        case KLSTRUCT_GFX:
+            			mfg_off = ((klgfx_t *)k)->gfx_mfg_nic ;
+				break ;
+
+                        case KLSTRUCT_TPU:
+            			mfg_off = ((kltpu_t *)k)->tpu_mfg_nic ;
+				break ;
+
+                        case KLSTRUCT_GSN_A:
+                        case KLSTRUCT_GSN_B:
+            			mfg_off = ((klgsn_t *)k)->gsn_mfg_nic ;
+				break ;
+
+                        case KLSTRUCT_XTHD:
+                                mfg_off = ((klxthd_t *)k)->xthd_mfg_nic ;
+                                break;
+
+			default:
+				mfg_off = 0 ;
+                                break ;
+                }
+		if (mfg_off)
+			break ;
+        }
+
+	if ((mfg_off) && (k))
+		mfg_nic = (char *)NODE_OFFSET_TO_K0(k->nasid, mfg_off) ;
+
+        return mfg_nic ;
+}
+
+char *
+get_first_string(char **ptrs, int n)
+{
+        int     i ;
+        char    *tmpptr ;
+
+        if ((ptrs == NULL) || (n == 0))
+                return NULL ;
+
+        tmpptr = ptrs[0] ;
+
+        if (n == 1)
+                return tmpptr ;
+
+        for (i = 0 ; i < n ; i++) {
+                if (strcmp(tmpptr, ptrs[i]) > 0)
+                        tmpptr = ptrs[i] ;
+        }
+
+        return tmpptr ;
+}
+
+int
+get_ptrs(char *idata, char **ptrs, int n, char *label)
+{
+        int     i = 0 ;
+        char    *tmp = idata ;
+
+        if ((ptrs == NULL) || (idata == NULL) || (label == NULL) || (n == 0))
+                return 0 ;
+
+        while  ( (tmp = strstr(tmp, label)) ){
+                tmp += strlen(label) ;
+                /* check for empty name field, and last NULL ptr */
+                if ((i < (n-1)) && (*tmp != ';')) {
+                        ptrs[i++] = tmp ;
+                }
+        }
+
+        ptrs[i] = NULL ;
+
+        return i ;
+}
+
+/*
+ * sort_nic_names
+ *
+ * 	Does not really do sorting. Find the alphabetically lowest
+ *	name among all the nic names found in a nic string.
+ *
+ * Return:
+ *	Nothing
+ *
+ * Side Effects:
+ *
+ *	lb->brd_name gets the new name found
+ */
+
+static void
+sort_nic_names(lboard_t *lb)
+{
+	char 	*nic_str ;
+        char    *ptrs[MAX_NICS_PER_STRING] ;
+        char    name[MAX_NIC_NAME_LEN] ;
+        char    *tmp, *tmp1 ;
+
+	*name = 0 ;
+
+	/* Get the nic pointer from the lb */
+
+	if ((nic_str = get_nic_string(lb)) == NULL)
+		return ;
+
+        tmp = get_first_string(ptrs,
+                        get_ptrs(nic_str, ptrs, MAX_NICS_PER_STRING, "Name:")) ;
+
+        if (tmp == NULL)
+		return ;
+
+        if  ( (tmp1 = strchr(tmp, ';')) ){
+                strncpy(name, tmp, tmp1-tmp) ;
+                name[tmp1-tmp] = 0 ;
+        } else {
+                strncpy(name, tmp, (sizeof(name) -1)) ;
+                name[sizeof(name)-1] = 0 ;
+        }
+
+	strcpy(lb->brd_name, name) ;
+}
+
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+
+char brick_types[MAX_BRICK_TYPES + 1] = "crikxdp789012345";
+
+/*
+ * Format a module id for printing.
+ */
+void
+format_module_id(char *buffer, moduleid_t m, int fmt)
+{
+	int rack, position;
+	char brickchar;
+
+	rack = MODULE_GET_RACK(m);
+	ASSERT(MODULE_GET_BTYPE(m) < MAX_BRICK_TYPES);
+	brickchar = MODULE_GET_BTCHAR(m);
+	position = MODULE_GET_BPOS(m);
+
+	if (fmt == MODULE_FORMAT_BRIEF) {
+	    /* Brief module number format, eg. 002c15 */
+
+	    /* Decompress the rack number */
+	    *buffer++ = '0' + RACK_GET_CLASS(rack);
+	    *buffer++ = '0' + RACK_GET_GROUP(rack);
+	    *buffer++ = '0' + RACK_GET_NUM(rack);
+
+	    /* Add the brick type */
+	    *buffer++ = brickchar;
+	}
+	else if (fmt == MODULE_FORMAT_LONG) {
+	    /* Fuller hwgraph format, eg. rack/002/bay/15 */
+
+	    strcpy(buffer, EDGE_LBL_RACK "/");  buffer += strlen(buffer);
+
+	    *buffer++ = '0' + RACK_GET_CLASS(rack);
+	    *buffer++ = '0' + RACK_GET_GROUP(rack);
+	    *buffer++ = '0' + RACK_GET_NUM(rack);
+
+	    strcpy(buffer, "/" EDGE_LBL_RPOS "/");  buffer += strlen(buffer);
+	}
+
+	/* Add the bay position, using at least two digits */
+	if (position < 10)
+	    *buffer++ = '0';
+	sprintf(buffer, "%d", position);
+
+}
+
+/*
+ * Parse a module id, in either brief or long form.
+ * Returns < 0 on error.
+ * The long form does not include a brick type, so it defaults to 0 (CBrick)
+ */
+int
+parse_module_id(char *buffer)
+{
+	unsigned int	v, rack, bay, type, form;
+	moduleid_t	m;
+	char 		c;
+
+	if (strstr(buffer, EDGE_LBL_RACK "/") == buffer) {
+		form = MODULE_FORMAT_LONG;
+		buffer += strlen(EDGE_LBL_RACK "/");
+
+		/* A long module ID must be exactly 5 non-template chars. */
+		if (strlen(buffer) != strlen("/" EDGE_LBL_RPOS "/") + 5)
+			return -1;
+	}
+	else {
+		form = MODULE_FORMAT_BRIEF;
+
+		/* A brief module id must be exactly 6 characters */
+		if (strlen(buffer) != 6)
+			return -2;
+	}
+
+	/* The rack number must be exactly 3 digits */
+	if (!(isdigit(buffer[0]) && isdigit(buffer[1]) && isdigit(buffer[2])))
+		return -3;
+
+	rack = 0;
+	v = *buffer++ - '0';
+	if (v > RACK_CLASS_MASK(rack) >> RACK_CLASS_SHFT(rack))
+		return -4;
+	RACK_ADD_CLASS(rack, v);
+
+	v = *buffer++ - '0';
+	if (v > RACK_GROUP_MASK(rack) >> RACK_GROUP_SHFT(rack))
+		return -5;
+	RACK_ADD_GROUP(rack, v);
+
+	v = *buffer++ - '0';
+	/* rack numbers are 1-based */
+	if (v-1 > RACK_NUM_MASK(rack) >> RACK_NUM_SHFT(rack))
+		return -6;
+	RACK_ADD_NUM(rack, v);
+
+	if (form == MODULE_FORMAT_BRIEF) {
+		/* Next should be a module type character.  Accept ucase or lcase. */
+		c = *buffer++;
+		if (!isalpha(c))
+			return -7;
+
+		/* strchr() returns a pointer into brick_types[], or NULL */
+		type = (unsigned int)(strchr(brick_types, tolower(c)) - brick_types);
+		if (type > MODULE_BTYPE_MASK >> MODULE_BTYPE_SHFT)
+			return -8;
+	}
+	else {
+		/* Hardcode the module type, and skip over the boilerplate */
+		type = MODULE_CBRICK;
+
+		if (strstr(buffer, "/" EDGE_LBL_RPOS "/") != buffer)
+			return -9;
+
+		buffer += strlen("/" EDGE_LBL_RPOS "/");
+	}
+		
+	/* The bay number is last.  Make sure it's exactly two digits */
+
+	if (!(isdigit(buffer[0]) && isdigit(buffer[1]) && !buffer[2]))
+		return -10;
+
+	bay = 10 * (buffer[0] - '0') + (buffer[1] - '0');
+
+	if (bay > MODULE_BPOS_MASK >> MODULE_BPOS_SHFT)
+		return -11;
+
+	m = RBT_TO_MODULE(rack, bay, type);
+
+	/* avoid sign extending the moduleid_t */
+	return (int)(unsigned short)m;
+}
+
+#else /* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
+
+/*
+ * Format a module id for printing.
+ */
+void
+format_module_id(char *buffer, moduleid_t m, int fmt)
+{
+    if (fmt == MODULE_FORMAT_BRIEF) {
+		sprintf(buffer, "%d", m);
+    }
+    else if (fmt == MODULE_FORMAT_LONG) {
+		sprintf(buffer, EDGE_LBL_MODULE "/%d", m);
+    }
+}
+
+/*
+ * Parse a module id, in either brief or long form.
+ * Returns < 0 on error.
+ */
+int
+parse_module_id(char *buffer)
+{
+    moduleid_t m;
+    char c;
+
+    if (strstr(buffer, EDGE_LBL_MODULE "/") == buffer)
+	buffer += strlen(EDGE_LBL_MODULE "/");
+
+    m = 0;
+    while(c = *buffer++) {
+	if (!isdigit(c))
+	    return -1;
+	m = 10 * m + (c - '0');
+    }
+
+    /* avoid sign extending the moduleid_t */
+    return (int)(unsigned short)m;
+}
+
+#endif /* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
+
+
diff --git a/arch/ia64/sn/io/klgraph.c b/arch/ia64/sn/io/klgraph.c
new file mode 100644
index 000000000..dcd7c2316
--- /dev/null
+++ b/arch/ia64/sn/io/klgraph.c
@@ -0,0 +1,971 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+/*
+ * klgraph.c-
+ *      This file specifies the interface between the kernel and the PROM's
+ *      configuration data structures.
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+
+#include <asm/sn/cmn_err.h>
+#include <asm/sn/agent.h>
+#ifdef CONFIG_IA64_SGI_IO
+#include <asm/sn/kldir.h>
+#endif
+#include <asm/sn/gda.h> 
+#include <asm/sn/klconfig.h>
+#include <asm/sn/router.h>
+#include <asm/sn/xtalk/xbow.h>
+#include <asm/sn/hcl_util.h>
+
+#define KLGRAPH_DEBUG 1
+#ifdef KLGRAPH_DEBUG
+#define GRPRINTF(x)	printk x
+#define CE_GRPANIC	CE_PANIC
+#else
+#define GRPRINTF(x)
+#define CE_GRPANIC	CE_PANIC
+#endif
+
+#include <asm/sn/sn_private.h>
+
+extern char arg_maxnodes[];
+extern int maxnodes;
+
+#ifndef BRINGUP
+/*
+ * Gets reason for diagval using table lookup.
+ */
+static char*
+get_diag_string(uint diagcode)
+{
+  int num_entries;
+  int i;
+  num_entries = sizeof(diagval_map) / sizeof(diagval_t);
+  for (i = 0; i < num_entries; i++){
+    if ((unchar)diagval_map[i].dv_code == (unchar)diagcode)
+      return diagval_map[i].dv_msg;
+  }
+  return "Unknown";
+}
+
+#endif /* ndef BRINGUP */
+
+
+/*
+ * Support for verbose inventory via hardware graph. 
+ * klhwg_invent_alloc allocates the necessary size of inventory information
+ * and fills in the generic information.
+ */
+invent_generic_t *
+klhwg_invent_alloc(cnodeid_t cnode, int class, int size)
+{
+	invent_generic_t *invent;
+
+	invent = kern_malloc(size);
+	if (!invent) return NULL;
+	
+	invent->ig_module = NODE_MODULEID(cnode);
+	invent->ig_slot = SLOTNUM_GETSLOT(NODE_SLOTID(cnode));
+	invent->ig_invclass = class;
+
+	return invent;
+}
+
+/* 
+ * Add information about the baseio prom version number
+ * as a part of detailed inventory info in the hwgraph.
+ */
+void
+klhwg_baseio_inventory_add(devfs_handle_t baseio_vhdl,cnodeid_t cnode)
+{
+	invent_miscinfo_t	*baseio_inventory;
+	unsigned char		version = 0,revision = 0;
+
+	/* Allocate memory for the "detailed inventory" info
+	 * for the baseio
+	 */
+	baseio_inventory = (invent_miscinfo_t *) 
+		klhwg_invent_alloc(cnode, INV_PROM, sizeof(invent_miscinfo_t));
+	baseio_inventory->im_type = INV_IO6PROM;
+	/* Read the io6prom revision from the nvram */
+#ifndef CONFIG_IA64_SGI_IO
+	nvram_prom_version_get(&version,&revision);
+#endif
+	/* Store the revision info  in the inventory */
+	baseio_inventory->im_version = version;
+	baseio_inventory->im_rev = revision;
+	/* Put the inventory info in the hardware graph */
+	hwgraph_info_add_LBL(baseio_vhdl, INFO_LBL_DETAIL_INVENT, 
+			     (arbitrary_info_t) baseio_inventory);
+	/* Make the information available to the user programs
+	 * thru hwgfs.
+	 */
+        hwgraph_info_export_LBL(baseio_vhdl, INFO_LBL_DETAIL_INVENT,
+				sizeof(invent_miscinfo_t));
+}
+
+char	*hub_rev[] = {
+	"0.0",
+	"1.0",
+	"2.0",
+	"2.1",
+	"2.2",
+	"2.3"
+};
+
+/*
+ * Add detailed cpu inventory info to the hardware graph.
+ */
+void
+klhwg_hub_invent_info(devfs_handle_t hubv,
+		      cnodeid_t cnode, 
+		      klhub_t *hub)
+{
+	invent_miscinfo_t *hub_invent;
+
+	hub_invent = (invent_miscinfo_t *) 
+	    klhwg_invent_alloc(cnode, INV_MISC, sizeof(invent_miscinfo_t));
+	if (!hub_invent)
+	    return;
+
+	if (KLCONFIG_INFO_ENABLED((klinfo_t *)hub))
+	    hub_invent->im_gen.ig_flag = INVENT_ENABLED;
+
+	hub_invent->im_type = INV_HUB;
+	hub_invent->im_rev = hub->hub_info.revision;
+	hub_invent->im_speed = hub->hub_speed;
+	hwgraph_info_add_LBL(hubv, INFO_LBL_DETAIL_INVENT, 
+			     (arbitrary_info_t) hub_invent);
+        hwgraph_info_export_LBL(hubv, INFO_LBL_DETAIL_INVENT,
+				sizeof(invent_miscinfo_t));
+}
+
+/* ARGSUSED */
+void
+klhwg_add_hub(devfs_handle_t node_vertex, klhub_t *hub, cnodeid_t cnode)
+{
+	devfs_handle_t myhubv;
+	int rc;
+
+	GRPRINTF(("klhwg_add_hub: adding %s\n", EDGE_LBL_HUB));
+
+	(void) hwgraph_path_add(node_vertex, EDGE_LBL_HUB, &myhubv);
+	rc = device_master_set(myhubv, node_vertex);
+
+#ifndef CONFIG_IA64_SGI_IO
+	/*
+	 * Activate when we support hub stats.
+	 */
+	rc = hwgraph_info_add_LBL(myhubv, INFO_LBL_HUB_INFO,
+                        (arbitrary_info_t)(&NODEPDA(cnode)->hubstats));
+#endif
+
+	if (rc != GRAPH_SUCCESS) {
+		cmn_err(CE_WARN,
+			"klhwg_add_hub: Can't add hub info label 0x%p, code %d",
+			myhubv, rc);
+	}
+
+	klhwg_hub_invent_info(myhubv, cnode, hub);
+
+#ifndef BRINGUP
+	init_hub_stats(cnode, NODEPDA(cnode));
+#endif /* ndef BRINGUP */
+
+#ifndef CONFIG_IA64_SGI_IO
+	sndrv_attach(myhubv);
+#else
+	/*
+	 * Need to call our driver to do the attach?
+	 */
+	printk("klhwg_add_hub: Need to add code to do the attach.\n");
+#endif
+}
+
+#ifndef BRINGUP
+
+void
+klhwg_add_rps(devfs_handle_t node_vertex, cnodeid_t cnode, int flag)
+{
+	devfs_handle_t myrpsv;
+	invent_rpsinfo_t *rps_invent;
+	int rc;
+
+        if(cnode == CNODEID_NONE)
+                return;                                                        
+	
+	GRPRINTF(("klhwg_add_rps: adding %s to vertex 0x%x\n", EDGE_LBL_RPS,
+		node_vertex));
+
+	rc = hwgraph_path_add(node_vertex, EDGE_LBL_RPS, &myrpsv);
+	if (rc != GRAPH_SUCCESS)
+		return;
+
+	device_master_set(myrpsv, node_vertex);
+
+        rps_invent = (invent_rpsinfo_t *)
+            klhwg_invent_alloc(cnode, INV_RPS, sizeof(invent_rpsinfo_t));
+
+        if (!rps_invent)
+            return;
+
+	rps_invent->ir_xbox = 0;	/* not an xbox RPS */
+
+        if (flag)
+            rps_invent->ir_gen.ig_flag = INVENT_ENABLED;
+        else
+            rps_invent->ir_gen.ig_flag = 0x0;                                  
+
+        hwgraph_info_add_LBL(myrpsv, INFO_LBL_DETAIL_INVENT,
+                             (arbitrary_info_t) rps_invent);
+        hwgraph_info_export_LBL(myrpsv, INFO_LBL_DETAIL_INVENT,
+                                sizeof(invent_rpsinfo_t));                     
+	
+}
+
+/*
+ * klhwg_update_rps gets invoked when the system controller sends an 
+ * interrupt indicating the power supply has lost/regained the redundancy.
+ * It's responsible for updating the Hardware graph information.
+ *	rps_state = 0 -> if the rps lost the redundancy
+ *		  = 1 -> If it is redundant. 
+ */
+void 
+klhwg_update_rps(cnodeid_t cnode, int rps_state)
+{
+        devfs_handle_t node_vertex;
+        devfs_handle_t rpsv;
+        invent_rpsinfo_t *rps_invent;                                          
+        int rc;
+        if(cnode == CNODEID_NONE)
+                return;                                                        
+
+        node_vertex = cnodeid_to_vertex(cnode);                                
+	rc = hwgraph_edge_get(node_vertex, EDGE_LBL_RPS, &rpsv);
+        if (rc != GRAPH_SUCCESS)  {
+		return;
+	}
+
+	rc = hwgraph_info_get_LBL(rpsv, INFO_LBL_DETAIL_INVENT, 
+				  (arbitrary_info_t *)&rps_invent);
+        if (rc != GRAPH_SUCCESS)  {
+                return;
+        }                                                                      
+
+	if (rps_state == 0 ) 
+		rps_invent->ir_gen.ig_flag = 0;
+	else 
+		rps_invent->ir_gen.ig_flag = INVENT_ENABLED;
+}
+
+void
+klhwg_add_xbox_rps(devfs_handle_t node_vertex, cnodeid_t cnode, int flag)
+{
+	devfs_handle_t myrpsv;
+	invent_rpsinfo_t *rps_invent;
+	int rc;
+
+        if(cnode == CNODEID_NONE)
+                return;                                                        
+	
+	GRPRINTF(("klhwg_add_rps: adding %s to vertex 0x%x\n", 
+		  EDGE_LBL_XBOX_RPS, node_vertex));
+
+	rc = hwgraph_path_add(node_vertex, EDGE_LBL_XBOX_RPS, &myrpsv);
+	if (rc != GRAPH_SUCCESS)
+		return;
+
+	device_master_set(myrpsv, node_vertex);
+
+        rps_invent = (invent_rpsinfo_t *)
+            klhwg_invent_alloc(cnode, INV_RPS, sizeof(invent_rpsinfo_t));
+
+        if (!rps_invent)
+            return;
+
+	rps_invent->ir_xbox = 1;	/* xbox RPS */
+
+        if (flag)
+            rps_invent->ir_gen.ig_flag = INVENT_ENABLED;
+        else
+            rps_invent->ir_gen.ig_flag = 0x0;                                  
+
+        hwgraph_info_add_LBL(myrpsv, INFO_LBL_DETAIL_INVENT,
+                             (arbitrary_info_t) rps_invent);
+        hwgraph_info_export_LBL(myrpsv, INFO_LBL_DETAIL_INVENT,
+                                sizeof(invent_rpsinfo_t));                     
+	
+}
+
+/*
+ * klhwg_update_xbox_rps gets invoked when the xbox system controller
+ * polls the status register and discovers that the power supply has 
+ * lost/regained the redundancy.
+ * It's responsible for updating the Hardware graph information.
+ *	rps_state = 0 -> if the rps lost the redundancy
+ *		  = 1 -> If it is redundant. 
+ */
+void 
+klhwg_update_xbox_rps(cnodeid_t cnode, int rps_state)
+{
+        devfs_handle_t node_vertex;
+        devfs_handle_t rpsv;
+        invent_rpsinfo_t *rps_invent;                                          
+        int rc;
+        if(cnode == CNODEID_NONE)
+                return;                                                        
+
+        node_vertex = cnodeid_to_vertex(cnode);                                
+	rc = hwgraph_edge_get(node_vertex, EDGE_LBL_XBOX_RPS, &rpsv);
+        if (rc != GRAPH_SUCCESS)  {
+		return;
+	}
+
+	rc = hwgraph_info_get_LBL(rpsv, INFO_LBL_DETAIL_INVENT, 
+				  (arbitrary_info_t *)&rps_invent);
+        if (rc != GRAPH_SUCCESS)  {
+                return;
+        }                                                                      
+
+	if (rps_state == 0 ) 
+		rps_invent->ir_gen.ig_flag = 0;
+	else 
+		rps_invent->ir_gen.ig_flag = INVENT_ENABLED;
+}
+
+#endif /* ndef BRINGUP */
+
+void
+klhwg_add_xbow(cnodeid_t cnode, nasid_t nasid)
+{
+	lboard_t *brd;
+	klxbow_t *xbow_p;
+	nasid_t hub_nasid;
+	cnodeid_t hub_cnode;
+	int widgetnum;
+	devfs_handle_t xbow_v, hubv;
+	/*REFERENCED*/
+	graph_error_t err;
+
+#if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || defined(CONFIG_IA64_GENERIC)
+	if ((brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid),
+				KLTYPE_PBRICK_XBOW)) == NULL)
+			return;
+#endif
+
+	if (KL_CONFIG_DUPLICATE_BOARD(brd))
+	    return;
+
+	GRPRINTF(("klhwg_add_xbow: adding cnode %d nasid %d xbow edges\n",
+			cnode, nasid));
+
+	if ((xbow_p = (klxbow_t *)find_component(brd, NULL, KLSTRUCT_XBOW))
+	    == NULL)
+	    return;
+
+#ifndef CONFIG_IA64_SGI_IO
+	/*
+	 * We cannot support this function in devfs .. see below where 
+	 * we use hwgraph_path_add() to create this vertex with a known 
+	 * name.
+	 */
+	err = hwgraph_vertex_create(&xbow_v);
+	ASSERT(err == GRAPH_SUCCESS);
+
+	xswitch_vertex_init(xbow_v);
+#endif /* !CONFIG_IA64_SGI_IO */
+
+	for (widgetnum = HUB_WIDGET_ID_MIN; widgetnum <= HUB_WIDGET_ID_MAX; widgetnum++) {
+		if (!XBOW_PORT_TYPE_HUB(xbow_p, widgetnum)) 
+		    continue;
+
+		hub_nasid = XBOW_PORT_NASID(xbow_p, widgetnum);
+		printk("klhwg_add_xbow: Found xbow port type hub hub_nasid %d widgetnum %d\n", hub_nasid, widgetnum);
+		if (hub_nasid == INVALID_NASID) {
+			cmn_err(CE_WARN, "hub widget %d, skipping xbow graph\n", widgetnum);
+			continue;
+		}
+
+		hub_cnode = NASID_TO_COMPACT_NODEID(hub_nasid);
+		printk("klhwg_add_xbow: cnode %d cnode %d\n", nasid_to_compact_node[0], nasid_to_compact_node[1]);
+
+		if (is_specified(arg_maxnodes) && hub_cnode == INVALID_CNODEID) {
+			continue;
+		}
+			
+		hubv = cnodeid_to_vertex(hub_cnode);
+
+#ifdef CONFIG_IA64_SGI_IO
+		printk("klhwg_add_xbow: Hub Vertex found = %p hub_cnode %d\n", hubv, hub_cnode);
+		err = hwgraph_path_add(hubv, EDGE_LBL_XTALK, &xbow_v);
+                if (err != GRAPH_SUCCESS) {
+                        if (err == GRAPH_DUP)
+                                cmn_err(CE_WARN, "klhwg_add_xbow: Check for "
+                                        "working routers and router links!");
+
+                        cmn_err(CE_GRPANIC, "klhwg_add_xbow: Failed to add "
+                                "edge: vertex 0x%p (0x%p) to vertex 0x%p (0x%p),"
+                                "error %d\n",
+                                hubv, hubv, xbow_v, xbow_v, err);
+                }
+		xswitch_vertex_init(xbow_v); 
+#endif
+
+		NODEPDA(hub_cnode)->xbow_vhdl = xbow_v;
+
+		/*
+		 * XXX - This won't work is we ever hook up two hubs
+		 * by crosstown through a crossbow.
+		 */
+		if (hub_nasid != nasid) {
+			NODEPDA(hub_cnode)->xbow_peer = nasid;
+			NODEPDA(NASID_TO_COMPACT_NODEID(nasid))->xbow_peer =
+				hub_nasid;
+		}
+
+		GRPRINTF(("klhwg_add_xbow: adding port nasid %d %s to vertex 0x%p\n",
+			hub_nasid, EDGE_LBL_XTALK, hubv));
+
+#ifndef CONFIG_IA64_SGI_IO
+		err = hwgraph_edge_add(hubv, xbow_v, EDGE_LBL_XTALK);
+		if (err != GRAPH_SUCCESS) {
+			if (err == GRAPH_DUP)
+				cmn_err(CE_WARN, "klhwg_add_xbow: Check for "
+					"working routers and router links!");
+
+			cmn_err(CE_GRPANIC, "klhwg_add_xbow: Failed to add "
+				"edge: vertex 0x%p (0x%p) to vertex 0x%p (0x%p), "
+				"error %d\n",
+				hubv, hubv, xbow_v, xbow_v, err);
+		}
+#endif
+	}
+}
+
+
+/* ARGSUSED */
+void
+klhwg_add_node(devfs_handle_t hwgraph_root, cnodeid_t cnode, gda_t *gdap)
+{
+	nasid_t nasid;
+	lboard_t *brd;
+	klhub_t *hub;
+	devfs_handle_t node_vertex = NULL;
+	char path_buffer[100];
+	int rv;
+	char *s;
+	int board_disabled = 0;
+
+	nasid = COMPACT_TO_NASID_NODEID(cnode);
+	brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_IP27);
+	GRPRINTF(("klhwg_add_node: Adding cnode %d, nasid %d, brd 0x%p\n",
+                cnode, nasid, brd));
+	ASSERT(brd);
+
+	do {
+
+		/* Generate a hardware graph path for this board. */
+		board_to_path(brd, path_buffer);
+
+		GRPRINTF(("klhwg_add_node: adding %s to vertex 0x%p\n",
+			path_buffer, hwgraph_root));
+		rv = hwgraph_path_add(hwgraph_root, path_buffer, &node_vertex);
+
+		printk("klhwg_add_node: rv = %d graph success %d node_vertex 0x%p\n", rv, GRAPH_SUCCESS, node_vertex);
+		if (rv != GRAPH_SUCCESS)
+			cmn_err(CE_PANIC, "Node vertex creation failed.  "
+					  "Path == %s",
+				path_buffer);
+
+		hub = (klhub_t *)find_first_component(brd, KLSTRUCT_HUB);
+		ASSERT(hub);
+		if(hub->hub_info.flags & KLINFO_ENABLE)
+			board_disabled = 0;
+		else
+			board_disabled = 1;
+		
+		if(!board_disabled) {
+			mark_nodevertex_as_node(node_vertex,
+					    cnode + board_disabled * numnodes);
+			printk("klhwg_add_node: node_vertex %p, cnode %d numnodes %d\n", node_vertex, cnode, numnodes);
+
+			s = dev_to_name(node_vertex, path_buffer, sizeof(path_buffer));
+			printk("klhwg_add_node: s %s\n", s);
+
+			NODEPDA(cnode)->hwg_node_name =
+						kmalloc(strlen(s) + 1,
+						GFP_KERNEL);
+			ASSERT_ALWAYS(NODEPDA(cnode)->hwg_node_name != NULL);
+			strcpy(NODEPDA(cnode)->hwg_node_name, s);
+
+			hubinfo_set(node_vertex, NODEPDA(cnode)->pdinfo);
+
+			/* Set up node board's slot */
+			NODEPDA(cnode)->slotdesc = brd->brd_slot;
+
+			/* Set up the module we're in */
+			NODEPDA(cnode)->module_id = brd->brd_module;
+			NODEPDA(cnode)->module = module_lookup(brd->brd_module);
+		}
+
+		if(!board_disabled)
+		klhwg_add_hub(node_vertex, hub, cnode);
+		
+		brd = KLCF_NEXT(brd);
+		if (brd)
+			brd = find_lboard(brd, KLTYPE_IP27);
+		else
+			break;
+	} while(brd);
+}
+
+
+/* ARGSUSED */
+void
+klhwg_add_all_routers(devfs_handle_t hwgraph_root)
+{
+	nasid_t nasid;
+	cnodeid_t cnode;
+	lboard_t *brd;
+	devfs_handle_t node_vertex;
+	char path_buffer[100];
+	int rv;
+
+	for (cnode = 0; cnode < maxnodes; cnode++) {
+		nasid = COMPACT_TO_NASID_NODEID(cnode);
+
+		GRPRINTF(("klhwg_add_all_routers: adding router on cnode %d\n",
+			cnode));
+
+		brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid),
+				KLTYPE_ROUTER);
+
+		if (!brd)
+			/* No routers stored in this node's memory */
+			continue;
+
+		do {
+			ASSERT(brd);
+			GRPRINTF(("Router board struct is %p\n", brd));
+
+			/* Don't add duplicate boards. */
+			if (brd->brd_flags & DUPLICATE_BOARD)
+				continue;
+
+			GRPRINTF(("Router 0x%p module number is %d\n", brd, brd->brd_module));
+			/* Generate a hardware graph path for this board. */
+			board_to_path(brd, path_buffer);
+
+			GRPRINTF(("Router path is %s\n", path_buffer));
+
+			/* Add the router */
+			GRPRINTF(("klhwg_add_all_routers: adding %s to vertex 0x%p\n",
+				path_buffer, hwgraph_root));
+			rv = hwgraph_path_add(hwgraph_root, path_buffer, &node_vertex);
+
+			if (rv != GRAPH_SUCCESS)
+				cmn_err(CE_PANIC, "Router vertex creation "
+						  "failed.  Path == %s",
+					path_buffer);
+
+			GRPRINTF(("klhwg_add_all_routers: get next board from 0x%p\n",
+					brd));
+		/* Find the rest of the routers stored on this node. */
+		} while ( (brd = find_lboard_class(KLCF_NEXT(brd),
+			 KLTYPE_ROUTER)) );
+
+		GRPRINTF(("klhwg_add_all_routers: Done.\n"));
+	}
+
+}
+
+/* ARGSUSED */
+void
+klhwg_connect_one_router(devfs_handle_t hwgraph_root, lboard_t *brd,
+			 cnodeid_t cnode, nasid_t nasid)
+{
+	klrou_t *router;
+	char path_buffer[50];
+	char dest_path[50];
+	devfs_handle_t router_hndl;
+	devfs_handle_t dest_hndl;
+	int rc;
+	int port;
+	lboard_t *dest_brd;
+
+	GRPRINTF(("klhwg_connect_one_router: Connecting router on cnode %d\n",
+			cnode));
+
+	/* Don't add duplicate boards. */
+	if (brd->brd_flags & DUPLICATE_BOARD) {
+		GRPRINTF(("klhwg_connect_one_router: Duplicate router 0x%p on cnode %d\n",
+			brd, cnode));
+		return;
+	}
+
+	/* Generate a hardware graph path for this board. */
+	board_to_path(brd, path_buffer);
+
+	rc = hwgraph_traverse(hwgraph_root, path_buffer, &router_hndl);
+
+	if (rc != GRAPH_SUCCESS && is_specified(arg_maxnodes))
+			return;
+
+	if (rc != GRAPH_SUCCESS)
+		cmn_err(CE_WARN, "Can't find router: %s", path_buffer);
+
+	/* We don't know what to do with multiple router components */
+	if (brd->brd_numcompts != 1) {
+		cmn_err(CE_PANIC,
+			"klhwg_connect_one_router: %d cmpts on router\n",
+			brd->brd_numcompts);
+		return;
+	}
+
+
+	/* Convert component 0 to klrou_t ptr */
+	router = (klrou_t *)NODE_OFFSET_TO_K0(NASID_GET(brd),
+					      brd->brd_compts[0]);
+
+	for (port = 1; port <= MAX_ROUTER_PORTS; port++) {
+		/* See if the port's active */
+		if (router->rou_port[port].port_nasid == INVALID_NASID) {
+			GRPRINTF(("klhwg_connect_one_router: port %d inactive.\n",
+				 port));
+			continue;
+		}
+		if (is_specified(arg_maxnodes) && NASID_TO_COMPACT_NODEID(router->rou_port[port].port_nasid) 
+		    == INVALID_CNODEID) {
+			continue;
+		}
+
+		dest_brd = (lboard_t *)NODE_OFFSET_TO_K0(
+				router->rou_port[port].port_nasid,
+				router->rou_port[port].port_offset);
+
+		/* Generate a hardware graph path for this board. */
+		board_to_path(dest_brd, dest_path);
+
+		rc = hwgraph_traverse(hwgraph_root, dest_path, &dest_hndl);
+
+		if (rc != GRAPH_SUCCESS) {
+			if (is_specified(arg_maxnodes) && KL_CONFIG_DUPLICATE_BOARD(dest_brd))
+				continue;
+			cmn_err(CE_PANIC, "Can't find router: %s", dest_path);
+		}
+		GRPRINTF(("klhwg_connect_one_router: Link from %s/%d to %s\n",
+			  path_buffer, port, dest_path));
+
+		sprintf(dest_path, "%d", port);
+
+		rc = hwgraph_edge_add(router_hndl, dest_hndl, dest_path);
+
+		if (rc == GRAPH_DUP) {
+			GRPRINTF(("Skipping port %d. nasid %d %s/%s\n",
+				  port, router->rou_port[port].port_nasid,
+				  path_buffer, dest_path));
+			continue;
+		}
+
+		if (rc != GRAPH_SUCCESS && !is_specified(arg_maxnodes))
+			cmn_err(CE_GRPANIC, "Can't create edge: %s/%s to vertex 0x%p error 0x%x\n",
+				path_buffer, dest_path, dest_hndl, rc);
+		
+	}
+}
+
+
+void
+klhwg_connect_routers(devfs_handle_t hwgraph_root)
+{
+	nasid_t nasid;
+	cnodeid_t cnode;
+	lboard_t *brd;
+
+	for (cnode = 0; cnode < maxnodes; cnode++) {
+		nasid = COMPACT_TO_NASID_NODEID(cnode);
+
+		GRPRINTF(("klhwg_connect_routers: Connecting routers on cnode %d\n",
+			cnode));
+
+		brd = find_lboard_class((lboard_t *)KL_CONFIG_INFO(nasid),
+				KLTYPE_ROUTER);
+
+		if (!brd)
+			continue;
+
+		do {
+
+			nasid = COMPACT_TO_NASID_NODEID(cnode);
+
+			klhwg_connect_one_router(hwgraph_root, brd,
+						 cnode, nasid);
+
+		/* Find the rest of the routers stored on this node. */
+		} while ( (brd = find_lboard_class(KLCF_NEXT(brd), KLTYPE_ROUTER)) );
+	}
+}
+
+
+
+void
+klhwg_connect_hubs(devfs_handle_t hwgraph_root)
+{
+	nasid_t nasid;
+	cnodeid_t cnode;
+	lboard_t *brd;
+	klhub_t *hub;
+	lboard_t *dest_brd;
+	devfs_handle_t hub_hndl;
+	devfs_handle_t dest_hndl;
+	char path_buffer[50];
+	char dest_path[50];
+	graph_error_t rc;
+
+	for (cnode = 0; cnode < maxnodes; cnode++) {
+		nasid = COMPACT_TO_NASID_NODEID(cnode);
+
+		GRPRINTF(("klhwg_connect_hubs: Connecting hubs on cnode %d\n",
+			cnode));
+
+		brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid),
+				KLTYPE_IP27);
+		ASSERT(brd);
+
+		hub = (klhub_t *)find_first_component(brd, KLSTRUCT_HUB);
+		ASSERT(hub);
+
+		/* See if the port's active */
+		if (hub->hub_port.port_nasid == INVALID_NASID) {
+			GRPRINTF(("klhwg_connect_hubs: port inactive.\n"));
+			continue;
+		}
+
+		if (is_specified(arg_maxnodes) && NASID_TO_COMPACT_NODEID(hub->hub_port.port_nasid) == INVALID_CNODEID)
+			continue;
+
+		/* Generate a hardware graph path for this board. */
+		board_to_path(brd, path_buffer);
+
+		GRPRINTF(("klhwg_connect_hubs: Hub path is %s.\n", path_buffer));
+		rc = hwgraph_traverse(hwgraph_root, path_buffer, &hub_hndl);
+
+		if (rc != GRAPH_SUCCESS)
+			cmn_err(CE_WARN, "Can't find hub: %s", path_buffer);
+
+		dest_brd = (lboard_t *)NODE_OFFSET_TO_K0(
+				hub->hub_port.port_nasid,
+				hub->hub_port.port_offset);
+
+		/* Generate a hardware graph path for this board. */
+		board_to_path(dest_brd, dest_path);
+
+		rc = hwgraph_traverse(hwgraph_root, dest_path, &dest_hndl);
+
+		if (rc != GRAPH_SUCCESS) {
+			if (is_specified(arg_maxnodes) && KL_CONFIG_DUPLICATE_BOARD(dest_brd))
+				continue;
+			cmn_err(CE_PANIC, "Can't find board: %s", dest_path);
+		} else {
+		
+
+			GRPRINTF(("klhwg_connect_hubs: Link from %s to %s.\n",
+			  path_buffer, dest_path));
+
+			rc = hwgraph_edge_add(hub_hndl, dest_hndl, EDGE_LBL_INTERCONNECT);
+
+			if (rc != GRAPH_SUCCESS)
+				cmn_err(CE_GRPANIC, "Can't create edge: %s/%s to vertex 0x%p, error 0x%x\n",
+				path_buffer, dest_path, dest_hndl, rc);
+
+		}
+	}
+}
+
+/* Store the pci/vme disabled board information as extended administrative
+ * hints which can later be used by the drivers using the device/driver
+ * admin interface. 
+ */
+void
+klhwg_device_disable_hints_add(void)
+{
+	cnodeid_t	cnode; 		/* node we are looking at */
+	nasid_t		nasid;		/* nasid of the node */
+	lboard_t	*board;		/* board we are looking at */
+	int		comp_index;	/* component index */
+	klinfo_t	*component;	/* component in the board we are
+					 * looking at 
+					 */
+	char		device_name[MAXDEVNAME];
+	
+#ifndef CONFIG_IA64_SGI_IO
+	device_admin_table_init();
+#endif
+	for(cnode = 0; cnode < numnodes; cnode++) {
+		nasid = COMPACT_TO_NASID_NODEID(cnode);
+		board = (lboard_t *)KL_CONFIG_INFO(nasid);
+		/* Check out all the board info stored  on a node */
+		while(board) {
+			/* No need to look at duplicate boards or non-io 
+			 * boards
+			 */
+			if (KL_CONFIG_DUPLICATE_BOARD(board) ||
+			    KLCLASS(board->brd_type) != KLCLASS_IO) {
+				board = KLCF_NEXT(board);
+				continue;
+			}
+			/* Check out all the components of a board */
+			for (comp_index = 0; 
+			     comp_index < KLCF_NUM_COMPS(board);
+			     comp_index++) {
+				component = KLCF_COMP(board,comp_index);
+				/* If the component is enabled move on to
+				 * the next component
+				 */
+				if (KLCONFIG_INFO_ENABLED(component))
+					continue;
+				/* NOTE : Since the prom only supports
+				 * the disabling of pci devices the following
+				 * piece of code makes sense. 
+				 * Make sure that this assumption is valid
+				 */
+				/* This component is disabled. Store this
+				 * hint in the extended device admin table
+				 */
+				/* Get the canonical name of the pci device */
+				device_component_canonical_name_get(board,
+							    component,
+							    device_name);
+#ifndef CONFIG_IA64_SGI_IO
+				device_admin_table_update(device_name,
+							  ADMIN_LBL_DISABLED,
+							  "yes");
+#endif
+#ifdef DEBUG
+				printf("%s DISABLED\n",device_name);
+#endif				
+			}
+			/* go to the next board info stored on this 
+			 * node 
+			 */
+			board = KLCF_NEXT(board);
+		}
+	}
+}
+
+void
+klhwg_add_all_modules(devfs_handle_t hwgraph_root)
+{
+	cmoduleid_t	cm;
+	char		name[128];
+	devfs_handle_t	vhdl;
+	int		rc;
+
+	/* Add devices under each module */
+
+	for (cm = 0; cm < nummodules; cm++) {
+		/* Use module as module vertex fastinfo */
+
+		sprintf(name, EDGE_LBL_MODULE "/%x", modules[cm]->id);
+
+		rc = hwgraph_path_add(hwgraph_root, name, &vhdl);
+		ASSERT(rc == GRAPH_SUCCESS);
+		rc = rc;
+
+		hwgraph_fastinfo_set(vhdl, (arbitrary_info_t) modules[cm]);
+
+		/* Add system controller */
+
+		sprintf(name,
+			EDGE_LBL_MODULE "/%x/" EDGE_LBL_L1,
+			modules[cm]->id);
+
+		rc = hwgraph_path_add(hwgraph_root, name, &vhdl);
+		ASSERT_ALWAYS(rc == GRAPH_SUCCESS); 
+		rc = rc;
+
+		hwgraph_info_add_LBL(vhdl,
+				     INFO_LBL_ELSC,
+				     (arbitrary_info_t) (__psint_t) 1);
+
+#ifndef CONFIG_IA64_SGI_IO
+		sndrv_attach(vhdl);
+#else
+		/*
+		 * We need to call the drivers attach routine ..
+		 */
+		FIXME("klhwg_add_all_modules: Need code to call driver attach.\n");
+#endif
+	}
+}
+
+void
+klhwg_add_all_nodes(devfs_handle_t hwgraph_root)
+{
+	//gda_t		*gdap = GDA;
+	gda_t		*gdap;
+	cnodeid_t	cnode;
+
+#ifdef SIMULATED_KLGRAPH
+	//gdap = 0xa800000000011000;
+	gdap = (gda_t *)0xe000000000011000;
+	printk("klhwg_add_all_nodes: SIMULATED_KLGRAPH FIXME: gdap= 0x%p\n", gdap);
+#else
+	gdap = GDA;
+#endif /* SIMULATED_KLGRAPH */
+	for (cnode = 0; cnode < numnodes; cnode++) {
+		ASSERT(gdap->g_nasidtable[cnode] != INVALID_NASID);
+		klhwg_add_node(hwgraph_root, cnode, gdap);
+	}
+
+	for (cnode = 0; cnode < numnodes; cnode++) {
+		ASSERT(gdap->g_nasidtable[cnode] != INVALID_NASID);
+
+#ifndef CONFIG_IA64_SGI_IO
+		klhwg_add_xbow(cnode, gdap->g_nasidtable[cnode]);
+#else
+		printk("klhwg_add_all_nodes: Fix me by getting real nasid\n");
+		klhwg_add_xbow(cnode, 0);
+#endif
+	}
+
+	/*
+	 * As for router hardware inventory information, we set this
+	 * up in router.c. 
+	 */
+	
+	klhwg_add_all_routers(hwgraph_root);
+	klhwg_connect_routers(hwgraph_root);
+	klhwg_connect_hubs(hwgraph_root);
+
+	/* Assign guardian nodes to each of the
+	 * routers in the system.
+	 */
+
+#ifndef CONFIG_IA64_SGI_IO
+	router_guardians_set(hwgraph_root);
+#endif
+
+	/* Go through the entire system's klconfig
+	 * to figure out which pci components have been disabled
+	 */
+	klhwg_device_disable_hints_add();
+
+}
diff --git a/arch/ia64/sn/io/klgraph_hack.c b/arch/ia64/sn/io/klgraph_hack.c
new file mode 100644
index 000000000..cc9d77871
--- /dev/null
+++ b/arch/ia64/sn/io/klgraph_hack.c
@@ -0,0 +1,847 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+
+/*
+ * This is a temporary file that statically initializes the expected 
+ * initial klgraph information that is normally provided by prom.
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/klconfig.h>
+
+void * real_port;
+void * real_io_base;
+void * real_addr;
+
+char *BW0 = NULL;
+
+kl_config_hdr_t *linux_klcfg;
+
+#ifdef BRINGUP
+/* forward declarations */
+extern void dump_ii(void), dump_lb(void), dump_crossbow(void);
+extern void clear_ii_error(void);
+#endif /* BRINGUP */
+
+void
+simulated_BW0_init(void)
+{
+
+	unsigned long *cnode0_hub;
+	unsigned long hub_widget = 0x1000000;
+	unsigned long hub_offset = 0x800000;
+	unsigned long hub_reg_base = 0;
+	extern void * vmalloc(unsigned long);
+
+	memset(&nasid_to_compact_node[0], 0, sizeof(cnodeid_t) * MAX_NASIDS);
+
+	BW0 = vmalloc(0x10000000);
+	if (BW0 == NULL) {
+		printk("Darn it .. cannot create space for Big Window 0\n");
+	}
+	printk("BW0: Start Address %p\n", BW0);
+	
+	memset(BW0+(0x10000000 - 8), 0xf, 0x8);
+
+	printk("BW0: Last WORD address %p has value 0x%lx\n", (char *)(BW0 +(0x10000000 - 8)), *(long *)(BW0 +(0x10000000 - 8)));
+
+	printk("XWIDGET 8 Address = 0x%p\n", (unsigned long *)(NODE_SWIN_BASE(0, 8)) ); 
+
+	/*
+	 * Do some HUB Register Hack ..
+	 */
+	hub_reg_base = (unsigned long)BW0 + hub_widget + hub_offset;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_WID); *cnode0_hub = 0x1c110049;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_WSTAT); *cnode0_hub = 0x0;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_WCR); *cnode0_hub = 0x401b;
+	printk("IIO_WCR address = 0x%p\n", cnode0_hub);
+
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_ILAPR); *cnode0_hub = 0xffffffffffffffff;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_ILAPO); *cnode0_hub = 0x0;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_IOWA); *cnode0_hub = 0xff01;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_IIWA); *cnode0_hub = 0xff01;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_IIDEM); *cnode0_hub = 0xffffffffffffffff;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_ILCSR); *cnode0_hub = 0x3fc03ff640a;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_ILLR); *cnode0_hub = 0x0;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_IIDSR); *cnode0_hub = 0x1000040;
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_IGFX0); *cnode0_hub = 0x0;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_IGFX1); *cnode0_hub = 0x0;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_ISCR0); *cnode0_hub = 0x23d;
+        cnode0_hub = (unsigned long *)(hub_reg_base + IIO_ISCR1); *cnode0_hub = 0x0;
+#endif	/* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
+}
+
+#define SYNERGY_WIDGET          ((char *)0xc0000e0000000000)
+#define SYNERGY_SWIZZLE         ((char *)0xc0000e0000000400)
+#define HUBREG                  ((char *)0xc0000a0001e00000)
+#define WIDGET0                 ((char *)0xc0000a0000000000)
+#define WIDGET4                 ((char *)0xc0000a0000000004)
+
+#define SYNERGY_WIDGET          ((char *)0xc0000e0000000000)
+#define SYNERGY_SWIZZLE         ((char *)0xc0000e0000000400)
+#define HUBREG                  ((char *)0xc0000a0001e00000)
+#define WIDGET0                 ((char *)0xc0000a0000000000)
+
+int test = 0;
+
+/*
+ * Hack to loop for test.
+ */
+void
+test_io_regs(void)
+{
+
+	uint32_t reg_32bits;
+	uint64_t reg_64bits;
+
+	while (test) {
+
+		reg_32bits = (uint32_t)(*(volatile uint32_t *) SYNERGY_WIDGET);
+		reg_64bits = (uint64_t) (*(volatile uint64_t *) SYNERGY_WIDGET);
+
+	}
+
+        printk("Synergy Widget Address = 0x%p, Value = 0x%lx\n", SYNERGY_WIDGET, (uint64_t)*(SYNERGY_WIDGET));
+
+        printk("Synergy swizzle Address = 0x%p, Value = 0x%lx\n", SYNERGY_SWIZZLE, (uint64_t)*(SYNERGY_SWIZZLE));
+        printk("HUBREG  Address = 0x%p, Value = 0x%lx\n",  HUBREG, (uint64_t)*(HUBREG));
+        printk("WIDGET0 Address = 0x%p, Value = 0x%lx\n", WIDGET0, (uint64_t)*(WIDGET0));
+        printk("WIDGET4 Address = 0x%p, Value = 0x%x\n", WIDGET4, (uint32_t)*(WIDGET4));
+
+}
+
+void
+klgraph_hack_init(void)
+{
+
+	kl_config_hdr_t *kl_hdr_ptr;
+	lboard_t	*lb_ptr;
+	lboard_t	*temp_ptr;
+	klhub_t		*klhub_ptr;
+	klioc3_t	*klioc3_ptr;
+	klbri_t		*klbri_ptr;
+	klxbow_t	*klxbow_ptr;
+	klinfo_t	*klinfo_ptr;
+	klcomp_t	*klcomp_ptr;
+	uint64_t	*tmp;
+	volatile u32	*tmp32;
+
+#ifdef 0
+	/* Preset some values */
+	/* Write IOERR clear to clear the CRAZY bit in the status */
+	tmp = (uint64_t *)0xc0000a0001c001f8; *tmp = (uint64_t)0xffffffff;
+	/* set widget control register...setting bedrock widget id to b */
+	/* tmp = (uint64_t *)0xc0000a0001c00020; *tmp = (uint64_t)0x801b; */
+	/* set io outbound widget access...allow all */
+	tmp = (uint64_t *)0xc0000a0001c00110; *tmp = (uint64_t)0xff01;
+	/* set io inbound widget access...allow all */
+	tmp = (uint64_t *)0xc0000a0001c00118; *tmp = (uint64_t)0xff01;
+	/* set io crb timeout to max */
+	tmp = (uint64_t *)0xc0000a0001c003c0; *tmp = (uint64_t)0xffffff;
+	tmp = (uint64_t *)0xc0000a0001c003c0; *tmp = (uint64_t)0xffffff;
+	
+	/* set local block io permission...allow all */
+	tmp = (uint64_t *)0xc0000a0001e04010; *tmp = (uint64_t)0xfffffffffffffff;
+
+	/* clear any errors */
+	clear_ii_error();
+
+	/* set default read response buffers in bridge */
+	tmp32 = (volatile u32 *)0xc0000a000f000280L;
+	*tmp32 = 0xba98;
+	tmp32 = (volatile u32 *)0xc0000a000f000288L;
+	*tmp32 = 0xba98;
+#endif
+
+printk("Widget ID Address 0x%p Value 0x%lx\n", (uint64_t *)0xc0000a0001e00000, *( (volatile uint64_t *)0xc0000a0001e00000) );
+
+printk("Widget ID Address 0x%p Value 0x%lx\n", (uint64_t *)0xc0000a0001c00000, *( (volatile uint64_t *)0xc0000a0001c00000) );
+
+printk("Widget ID Address 0x%p Value 0x%lx\n", (uint64_t *)0xc000020001e00000, *( (volatile uint64_t *)0xc000020001e00000) );
+
+
+printk("Widget ID Address 0x%p Value 0x%lx\n", (uint64_t *)0xc000020001c00000, *( (volatile uint64_t *)0xc000020001c00000) );
+
+printk("Widget ID Address 0x%p Value 0x%lx\n", (uint64_t *)0xc0000a0001e00000, *( (volatile uint64_t *)0xc0000a0001e00000) );
+
+printk("Xbow ID Address 0x%p Value 0x%x\n", (uint64_t *)0xc0000a0000000000, *( (volatile uint32_t *)0xc0000a0000000000) );
+
+printk("Xbow ID Address 0x%p Value 0x%x\n", (uint64_t *)0xc000020000000004, *( (volatile uint32_t *)0xc000020000000004) );
+
+
+	if ( test )
+		test_io_regs();
+	/*
+	 * Klconfig header.
+	 */
+	kl_hdr_ptr = kmalloc(sizeof(kl_config_hdr_t), GFP_KERNEL);
+        kl_hdr_ptr->ch_magic = 0xbeedbabe;
+        kl_hdr_ptr->ch_version = 0x0;
+        kl_hdr_ptr->ch_malloc_hdr_off = 0x48;
+        kl_hdr_ptr->ch_cons_off = 0x18;
+        kl_hdr_ptr->ch_board_info = 0x0;
+        kl_hdr_ptr->ch_cons_info.uart_base = 0x920000000f820178;
+        kl_hdr_ptr->ch_cons_info.config_base = 0x920000000f024000;
+        kl_hdr_ptr->ch_cons_info.memory_base = 0x920000000f800000;
+        kl_hdr_ptr->ch_cons_info.baud = 0x2580;
+        kl_hdr_ptr->ch_cons_info.flag = 0x1;
+        kl_hdr_ptr->ch_cons_info.type = 0x300fafa;
+        kl_hdr_ptr->ch_cons_info.nasid = 0x0;
+        kl_hdr_ptr->ch_cons_info.wid = 0xf;
+        kl_hdr_ptr->ch_cons_info.npci = 0x4;
+        kl_hdr_ptr->ch_cons_info.baseio_nic = 0x0;
+
+	/*
+	 * We need to know whether we are booting from PROM or 
+	 * boot from disk.
+	 */
+	linux_klcfg = (kl_config_hdr_t *)0xe000000000030000;
+	if (linux_klcfg->ch_magic == 0xbeedbabe) {
+		printk("Linux Kernel Booted from Disk\n");
+	} else {
+		printk("Linux Kernel Booted from PROM\n");
+		linux_klcfg = kl_hdr_ptr;
+	}
+
+	/*
+	 * lboard KLTYPE_IP35
+	 */
+	lb_ptr = kmalloc(sizeof(lboard_t), GFP_KERNEL);
+	kl_hdr_ptr->ch_board_info = (klconf_off_t) lb_ptr;
+	temp_ptr = lb_ptr;
+	printk("First Lboard = %p\n", temp_ptr);
+
+        lb_ptr->brd_next = 0;
+        lb_ptr->struct_type = 0x1;
+        lb_ptr->brd_type  = 0x11;
+        lb_ptr->brd_sversion = 0x3;
+        lb_ptr->brd_brevision = 0x1;
+        lb_ptr->brd_promver = 0x1;
+        lb_ptr->brd_promver = 0x1;
+        lb_ptr->brd_slot = 0x0;
+        lb_ptr->brd_debugsw = 0x0;
+        lb_ptr->brd_module = 0x145;
+        lb_ptr->brd_partition = 0x0;
+        lb_ptr->brd_diagval = 0x0;
+        lb_ptr->brd_diagparm = 0x0;
+        lb_ptr->brd_inventory = 0x0;
+        lb_ptr->brd_numcompts = 0x5;
+        lb_ptr->brd_nic = 0x2a0aed35;
+        lb_ptr->brd_nasid = 0x0;
+        lb_ptr->brd_errinfo = 0x0;
+        lb_ptr->brd_parent = 0x0;
+        lb_ptr->brd_graph_link  = (devfs_handle_t)0x26;
+        lb_ptr->brd_owner = 0x0;
+        lb_ptr->brd_nic_flags = 0x0;
+	memcpy(&lb_ptr->brd_name[0], "IP35", 4);
+
+	/*
+	 * Hub Component
+	 */
+	klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+	klhub_ptr = (klhub_t *)klcomp_ptr;
+	klinfo_ptr = (klinfo_t *)klcomp_ptr;
+	lb_ptr->brd_compts[0] = (klconf_off_t)klcomp_ptr;
+	printk("hub info = %p lboard = %p\n", klhub_ptr, lb_ptr);
+
+	klinfo_ptr = (klinfo_t *)klhub_ptr;
+        klinfo_ptr->struct_type = 0x2;
+        klinfo_ptr->struct_version = 0x1;
+        klinfo_ptr->flags = 0x1;
+        klinfo_ptr->revision = 0x1;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0x2a0aed35;
+        klinfo_ptr->physid = 0x0;
+        klinfo_ptr->virtid = 0x0;
+        klinfo_ptr->widid = 0x0;
+        klinfo_ptr->nasid = 0x0;
+
+        klhub_ptr->hub_flags = 0x0;
+        klhub_ptr->hub_port.port_nasid = (nasid_t)0x0ffffffff;
+        klhub_ptr->hub_port.port_flag = 0x0;
+        klhub_ptr->hub_port.port_offset = 0x0;
+        klhub_ptr->hub_box_nic = 0x0;
+        klhub_ptr->hub_mfg_nic = 0x3f420;
+        klhub_ptr->hub_speed = 0xbebc200;
+
+	/*
+	 * Memory Component
+	 */
+        klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+	lb_ptr->brd_compts[1] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0x3;
+        klinfo_ptr->struct_version = 0x2;
+        klinfo_ptr->flags = 0x1;
+        klinfo_ptr->revision = 0xff;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0xff;
+        klinfo_ptr->virtid = 0xffffffff;
+        klinfo_ptr->widid = 0x0;
+        klinfo_ptr->nasid = 0x0;
+
+	/*
+	 * KLSTRUCT_HUB_UART Component
+	 */
+	klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+	klinfo_ptr = (klinfo_t *)klcomp_ptr;
+	lb_ptr->brd_compts[2] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0x11;
+        klinfo_ptr->struct_version = 0x1;
+        klinfo_ptr->flags = 0x31;
+        klinfo_ptr->revision = 0xff;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0x0;
+        klinfo_ptr->virtid = 0x0;
+        klinfo_ptr->widid = 0x0;
+        klinfo_ptr->nasid = 0x0;
+
+	/*
+	 * KLSTRUCT_CPU Component
+	 */
+	klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+	lb_ptr->brd_compts[3] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0x1;
+        klinfo_ptr->struct_version = 0x2;
+        klinfo_ptr->flags = 0x1;
+        klinfo_ptr->revision = 0xff;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0x0;
+        klinfo_ptr->virtid = 0x0;
+        klinfo_ptr->widid = 0x0;
+        klinfo_ptr->nasid = 0x0;
+
+	/*
+	 * KLSTRUCT_CPU Component
+	 */
+	klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+	lb_ptr->brd_compts[4] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0x1;
+        klinfo_ptr->struct_version = 0x2;
+        klinfo_ptr->flags = 0x1;
+        klinfo_ptr->revision = 0xff;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0x1;
+        klinfo_ptr->virtid = 0x1;
+        klinfo_ptr->widid = 0x0;
+        klinfo_ptr->nasid = 0x0;
+
+	lb_ptr->brd_compts[5] = 0; /* Set the next one to 0 .. end */
+	lb_ptr->brd_numcompts = 5; /* 0 to 4 */
+
+	/*
+	 * lboard(0x42) KLTYPE_PBRICK_XBOW
+	 */
+	lb_ptr = kmalloc(sizeof(lboard_t), GFP_KERNEL);
+	temp_ptr->brd_next = (klconf_off_t)lb_ptr; /* Let the previous point at the new .. */
+	temp_ptr = lb_ptr;
+	printk("Second Lboard = %p\n", temp_ptr);
+
+        lb_ptr->brd_next = 0;
+        lb_ptr->struct_type = 0x1;
+        lb_ptr->brd_type  = 0x42;
+        lb_ptr->brd_sversion = 0x2;
+        lb_ptr->brd_brevision = 0x0;
+        lb_ptr->brd_promver = 0x1;
+        lb_ptr->brd_promver = 0x1;
+        lb_ptr->brd_slot = 0x0;
+        lb_ptr->brd_debugsw = 0x0;
+        lb_ptr->brd_module = 0x145;
+        lb_ptr->brd_partition = 0x1;
+        lb_ptr->brd_diagval = 0x0;
+        lb_ptr->brd_diagparm = 0x0;
+        lb_ptr->brd_inventory = 0x0;
+        lb_ptr->brd_numcompts = 0x1;
+        lb_ptr->brd_nic = 0xffffffffffffffff;
+        lb_ptr->brd_nasid = 0x0;
+        lb_ptr->brd_errinfo = 0x0;
+        lb_ptr->brd_parent = (struct lboard_s *)0x9600000000030070;
+        lb_ptr->brd_graph_link  = (devfs_handle_t)0xffffffff;
+        lb_ptr->brd_owner = 0x0;
+        lb_ptr->brd_nic_flags = 0x0;
+        memcpy(&lb_ptr->brd_name[0], "IOBRICK", 7);
+
+	/*
+	 * KLSTRUCT_XBOW Component
+	 */
+        klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+	memset(klcomp_ptr, 0, sizeof(klcomp_t));
+        klxbow_ptr = (klxbow_t *)klcomp_ptr;
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+        lb_ptr->brd_compts[0] = (klconf_off_t)klcomp_ptr;
+	printk("xbow_p 0x%p\n", klcomp_ptr);
+
+        klinfo_ptr->struct_type = 0x4;
+        klinfo_ptr->struct_version = 0x1;
+        klinfo_ptr->flags = 0x1;
+        klinfo_ptr->revision = 0x2;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0xff;
+        klinfo_ptr->virtid = 0x0;
+        klinfo_ptr->widid = 0x0;
+        klinfo_ptr->nasid = 0x0;
+
+        klxbow_ptr->xbow_master_hub_link = 0xb;
+        klxbow_ptr->xbow_port_info[0].port_nasid = 0x0;
+        klxbow_ptr->xbow_port_info[0].port_flag = 0x0;
+        klxbow_ptr->xbow_port_info[0].port_offset = 0x0;
+
+        klxbow_ptr->xbow_port_info[1].port_nasid = 0x401;
+        klxbow_ptr->xbow_port_info[1].port_flag = 0x0;
+        klxbow_ptr->xbow_port_info[1].port_offset = 0x0;
+
+        klxbow_ptr->xbow_port_info[2].port_nasid = 0x0;
+        klxbow_ptr->xbow_port_info[2].port_flag = 0x0;
+        klxbow_ptr->xbow_port_info[2].port_offset = 0x0;
+
+        klxbow_ptr->xbow_port_info[3].port_nasid = 0x0; /* ffffffff */
+        klxbow_ptr->xbow_port_info[3].port_flag = 0x6;
+        klxbow_ptr->xbow_port_info[3].port_offset = 0x30070;
+
+        klxbow_ptr->xbow_port_info[4].port_nasid = 0x0; /* ffffff00; */
+        klxbow_ptr->xbow_port_info[4].port_flag = 0x0;
+        klxbow_ptr->xbow_port_info[4].port_offset = 0x0;
+
+        klxbow_ptr->xbow_port_info[5].port_nasid = 0x0;
+        klxbow_ptr->xbow_port_info[5].port_flag = 0x0;
+        klxbow_ptr->xbow_port_info[5].port_offset = 0x0;
+        klxbow_ptr->xbow_port_info[6].port_nasid = 0x0;
+        klxbow_ptr->xbow_port_info[6].port_flag = 0x5;
+        klxbow_ptr->xbow_port_info[6].port_offset = 0x30210;
+        klxbow_ptr->xbow_port_info[7].port_nasid = 0x3;
+        klxbow_ptr->xbow_port_info[7].port_flag = 0x5;
+        klxbow_ptr->xbow_port_info[7].port_offset = 0x302e0;
+	
+	lb_ptr->brd_compts[1] = 0;
+        lb_ptr->brd_numcompts = 1;
+
+
+	/*
+	 * lboard KLTYPE_PBRICK
+	 */
+	lb_ptr = kmalloc(sizeof(lboard_t), GFP_KERNEL);
+	temp_ptr->brd_next = (klconf_off_t)lb_ptr; /* Let the previous point at the new .. */
+	temp_ptr = lb_ptr;
+	printk("Third Lboard %p\n", lb_ptr);
+
+        lb_ptr->brd_next = 0;
+        lb_ptr->struct_type = 0x1;
+        lb_ptr->brd_type  = 0x72;
+        lb_ptr->brd_sversion = 0x2;
+        lb_ptr->brd_brevision = 0x0;
+        lb_ptr->brd_promver = 0x1;
+        lb_ptr->brd_promver = 0x41;
+        lb_ptr->brd_slot = 0xe;
+        lb_ptr->brd_debugsw = 0x0;
+        lb_ptr->brd_module = 0x145;
+        lb_ptr->brd_partition = 0x1;
+        lb_ptr->brd_diagval = 0x0;
+        lb_ptr->brd_diagparm = 0x0;
+        lb_ptr->brd_inventory = 0x0;
+        lb_ptr->brd_numcompts = 0x1;
+        lb_ptr->brd_nic = 0x30e3fd;
+        lb_ptr->brd_nasid = 0x0;
+        lb_ptr->brd_errinfo = 0x0;
+        lb_ptr->brd_parent = (struct lboard_s *)0x9600000000030140;
+        lb_ptr->brd_graph_link  = (devfs_handle_t)0xffffffff;
+        lb_ptr->brd_owner = 0x0;
+        lb_ptr->brd_nic_flags = 0x0;
+	memcpy(&lb_ptr->brd_name[0], "IP35", 4);
+
+	/*
+	 * KLSTRUCT_BRI Component
+	 */
+        klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+        klbri_ptr = (klbri_t *)klcomp_ptr;
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+        lb_ptr->brd_compts[0] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0x5;
+        klinfo_ptr->struct_version = 0x2;
+        klinfo_ptr->flags = 0x1;
+        klinfo_ptr->revision = 0x2;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0xd002;
+        klinfo_ptr->nic = 0x30e3fd;
+        klinfo_ptr->physid = 0xe;
+        klinfo_ptr->virtid = 0xe;
+        klinfo_ptr->widid = 0xe;
+        klinfo_ptr->nasid = 0x0;
+
+        klbri_ptr->bri_eprominfo = 0xff;
+        klbri_ptr->bri_bustype = 0x7;
+        klbri_ptr->bri_mfg_nic = 0x3f4a8;
+
+        lb_ptr->brd_compts[1] = 0;
+        lb_ptr->brd_numcompts = 1;
+
+	/*
+	 * lboard KLTYPE_PBRICK
+	 */
+	lb_ptr = kmalloc(sizeof(lboard_t), GFP_KERNEL);
+	temp_ptr->brd_next = (klconf_off_t)lb_ptr; /* Let the previous point at the new .. */
+	temp_ptr = lb_ptr;
+	printk("Fourth Lboard %p\n", lb_ptr);
+
+        lb_ptr->brd_next = 0x0;
+        lb_ptr->struct_type = 0x1;
+        lb_ptr->brd_type  = 0x72;
+        lb_ptr->brd_sversion = 0x2;
+        lb_ptr->brd_brevision = 0x0;
+        lb_ptr->brd_promver = 0x1;
+        lb_ptr->brd_promver = 0x31;
+        lb_ptr->brd_slot = 0xf;
+        lb_ptr->brd_debugsw = 0x0;
+        lb_ptr->brd_module = 0x145;
+        lb_ptr->brd_partition = 0x1;
+        lb_ptr->brd_diagval = 0x0;
+        lb_ptr->brd_diagparm = 0x0;
+        lb_ptr->brd_inventory = 0x0;
+        lb_ptr->brd_numcompts = 0x6;
+        lb_ptr->brd_nic = 0x30e3fd;
+        lb_ptr->brd_nasid = 0x0;
+        lb_ptr->brd_errinfo = 0x0;
+        lb_ptr->brd_parent = (struct lboard_s *)0x9600000000030140;
+        lb_ptr->brd_graph_link  = (devfs_handle_t)0xffffffff;
+        lb_ptr->brd_owner = 0x0;
+        lb_ptr->brd_nic_flags = 0x0;
+	memcpy(&lb_ptr->brd_name[0], "IP35", 4);
+
+
+	/*
+	 * KLSTRUCT_BRI Component
+	 */
+        klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+	klbri_ptr = (klbri_t *)klcomp_ptr;
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+        lb_ptr->brd_compts[0] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0x5;
+        klinfo_ptr->struct_version = 0x2;
+        klinfo_ptr->flags = 0x1;
+        klinfo_ptr->revision = 0x2;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0xd002;
+        klinfo_ptr->nic = 0x30e3fd;
+        klinfo_ptr->physid = 0xf;
+        klinfo_ptr->virtid = 0xf;
+        klinfo_ptr->widid = 0xf;
+        klinfo_ptr->nasid = 0x0;
+
+        klbri_ptr->bri_eprominfo = 0xff;
+        klbri_ptr->bri_bustype = 0x7;
+        klbri_ptr->bri_mfg_nic = 0x3f528;
+
+	/*
+	 * KLSTRUCT_SCSI component
+	 */
+        klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+        lb_ptr->brd_compts[1] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0xb;
+        klinfo_ptr->struct_version = 0x1;
+        klinfo_ptr->flags = 0x31;
+        klinfo_ptr->revision = 0x5;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0x1;
+        klinfo_ptr->virtid = 0x0;
+        klinfo_ptr->widid = 0xf;
+        klinfo_ptr->nasid = 0x0;
+
+	/*
+	 * KLSTRUCT_IOC3 Component
+	 */
+        klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+        klioc3_ptr = (klioc3_t *)klcomp_ptr;
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+        lb_ptr->brd_compts[2] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0x6;
+        klinfo_ptr->struct_version = 0x1;
+        klinfo_ptr->flags = 0x31;
+        klinfo_ptr->revision = 0x1;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0x4;
+        klinfo_ptr->virtid = 0x0;
+        klinfo_ptr->widid = 0xf;
+        klinfo_ptr->nasid = 0x0;
+
+        klioc3_ptr->ioc3_ssram = 0x0;
+        klioc3_ptr->ioc3_nvram = 0x0;
+
+	/*
+	 * KLSTRUCT_UNKNOWN Component
+	 */
+        klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+        lb_ptr->brd_compts[3] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0x0;
+        klinfo_ptr->struct_version = 0x1;
+        klinfo_ptr->flags = 0x31;
+        klinfo_ptr->revision = 0xff;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0x5;
+        klinfo_ptr->virtid = 0x0;
+        klinfo_ptr->widid = 0xf;
+        klinfo_ptr->nasid = 0x0;
+
+	/*
+	 * KLSTRUCT_SCSI Component
+	 */
+        klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+        lb_ptr->brd_compts[4] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0xb;
+        klinfo_ptr->struct_version = 0x1;
+        klinfo_ptr->flags = 0x31;
+        klinfo_ptr->revision = 0x1;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0x6;
+        klinfo_ptr->virtid = 0x5;
+        klinfo_ptr->widid = 0xf;
+        klinfo_ptr->nasid = 0x0;
+
+	/*
+	 * KLSTRUCT_UNKNOWN
+	 */
+        klcomp_ptr = kmalloc(sizeof(klcomp_t), GFP_KERNEL);
+        klinfo_ptr = (klinfo_t *)klcomp_ptr;
+        lb_ptr->brd_compts[5] = (klconf_off_t)klcomp_ptr;
+
+        klinfo_ptr->struct_type = 0x0;
+        klinfo_ptr->struct_version = 0x1;
+        klinfo_ptr->flags = 0x31;
+        klinfo_ptr->revision = 0xff;
+        klinfo_ptr->diagval = 0x0;
+        klinfo_ptr->diagparm = 0x0;
+        klinfo_ptr->inventory = 0x0;
+        klinfo_ptr->partid = 0x0;
+        klinfo_ptr->nic = 0xffffffffffffffff;
+        klinfo_ptr->physid = 0x7;
+        klinfo_ptr->virtid = 0x0;
+        klinfo_ptr->widid = 0xf;
+        klinfo_ptr->nasid = 0x0;
+
+	lb_ptr->brd_compts[6] = 0;
+	lb_ptr->brd_numcompts = 6;
+
+}
+
+
+
+
+	
+#ifdef BRINGUP
+/* 
+ * these were useful for printing out registers etc
+ * during bringup  
+ */
+
+void
+xdump(long long *addr, int count)
+{
+	int ii;
+	volatile long long *xx = addr;
+
+	for ( ii = 0; ii < count; ii++, xx++ ) {
+		printk("0x%p : 0x%p\n", xx, *xx);
+	}
+}
+
+void
+xdump32(unsigned int *addr, int count)
+{
+	int ii;
+	volatile unsigned int *xx = addr;
+
+	for ( ii = 0; ii < count; ii++, xx++ ) {
+		printk("0x%p : 0x%0x\n", xx, *xx);
+	}
+}
+
+
+
+void
+clear_ii_error(void)
+{
+	volatile long long *tmp;
+
+	printk("... WSTAT ");
+	xdump((long long *)0xc0000a0001c00008, 1);
+	printk("... WCTRL ");
+	xdump((long long *)0xc0000a0001c00020, 1);
+	printk("... WLCSR ");
+	xdump((long long *)0xc0000a0001c00128, 1);
+	printk("... IIDSR ");
+	xdump((long long *)0xc0000a0001c00138, 1);
+        printk("... IOPRBs ");
+	xdump((long long *)0xc0000a0001c00198, 9);
+	printk("... IXSS ");
+	xdump((long long *)0xc0000a0001c00210, 1);
+	printk("... IBLS0 ");
+	xdump((long long *)0xc0000a0001c10000, 1);
+	printk("... IBLS1 ");
+	xdump((long long *)0xc0000a0001c20000, 1);
+
+        /* Write IOERR clear to clear the CRAZY bit in the status */
+        tmp = (long long *)0xc0000a0001c001f8; *tmp = (long long)0xffffffff;
+
+	/* dump out local block error registers */
+	printk("... ");
+	xdump((long long *)0xc0000a0001e04040, 1);	/* LB_ERROR_BITS */
+	printk("... ");
+	xdump((long long *)0xc0000a0001e04050, 1);	/* LB_ERROR_HDR1 */
+	printk("... ");
+	xdump((long long *)0xc0000a0001e04058, 1);	/* LB_ERROR_HDR2 */
+	/* and clear the LB_ERROR_BITS */
+	tmp = (long long *)0xc0000a0001e04040; *tmp = 0x0;
+	printk("clr: ");
+	xdump((long long *)0xc0000a0001e04040, 1);	/* LB_ERROR_BITS */
+	tmp = (long long *)0xc0000a0001e04050; *tmp = 0x0;
+	tmp = (long long *)0xc0000a0001e04058; *tmp = 0x0;
+}
+
+
+void
+dump_ii()
+{
+	printk("===== Dump the II regs =====\n");
+	xdump((long long *)0xc0000a0001c00000, 2);
+	xdump((long long *)0xc0000a0001c00020, 1);
+	xdump((long long *)0xc0000a0001c00100, 37);
+	xdump((long long *)0xc0000a0001c00300, 98);
+	xdump((long long *)0xc0000a0001c10000, 6);
+	xdump((long long *)0xc0000a0001c20000, 6);
+	xdump((long long *)0xc0000a0001c30000, 2);
+
+	xdump((long long *)0xc0000a0000000000, 1);
+	xdump((long long *)0xc0000a0001000000, 1);
+	xdump((long long *)0xc0000a0002000000, 1);
+	xdump((long long *)0xc0000a0003000000, 1);
+	xdump((long long *)0xc0000a0004000000, 1);
+	xdump((long long *)0xc0000a0005000000, 1);
+	xdump((long long *)0xc0000a0006000000, 1);
+	xdump((long long *)0xc0000a0007000000, 1);
+	xdump((long long *)0xc0000a0008000000, 1);
+	xdump((long long *)0xc0000a0009000000, 1);
+	xdump((long long *)0xc0000a000a000000, 1);
+	xdump((long long *)0xc0000a000b000000, 1);
+	xdump((long long *)0xc0000a000c000000, 1);
+	xdump((long long *)0xc0000a000d000000, 1);
+	xdump((long long *)0xc0000a000e000000, 1);
+	xdump((long long *)0xc0000a000f000000, 1);
+}
+
+void
+dump_lb()
+{
+	printk("===== Dump the LB regs =====\n");
+	xdump((long long *)0xc0000a0001e00000, 1);
+	xdump((long long *)0xc0000a0001e04000, 13);
+	xdump((long long *)0xc0000a0001e04100, 2);
+	xdump((long long *)0xc0000a0001e04200, 2);
+	xdump((long long *)0xc0000a0001e08000, 5);
+	xdump((long long *)0xc0000a0001e08040, 2);
+	xdump((long long *)0xc0000a0001e08050, 3);
+	xdump((long long *)0xc0000a0001e0c000, 3);
+	xdump((long long *)0xc0000a0001e0c020, 4);
+}
+
+void
+dump_crossbow()
+{
+	printk("===== Dump the Crossbow regs =====\n");
+	clear_ii_error();
+	xdump32((unsigned int *)0xc0000a0000000004, 1);
+	clear_ii_error();
+	xdump32((unsigned int *)0xc0000a0000000000, 1);
+	printk("and again..\n");
+	xdump32((unsigned int *)0xc0000a0000000000, 1);
+	xdump32((unsigned int *)0xc0000a0000000000, 1);
+
+
+	clear_ii_error();
+
+	xdump32((unsigned int *)0xc000020000000004, 1);
+	clear_ii_error();
+	xdump32((unsigned int *)0xc000020000000000, 1);
+	clear_ii_error();
+
+	xdump32((unsigned int *)0xc0000a0000800004, 1);
+	clear_ii_error();
+	xdump32((unsigned int *)0xc0000a0000800000, 1);
+	clear_ii_error();
+
+	xdump32((unsigned int *)0xc000020000800004, 1);
+	clear_ii_error();
+	xdump32((unsigned int *)0xc000020000800000, 1);
+	clear_ii_error();
+
+
+}
+#endif /* BRINGUP */
diff --git a/arch/ia64/sn/io/l1.c b/arch/ia64/sn/io/l1.c
new file mode 100644
index 000000000..b8c5af674
--- /dev/null
+++ b/arch/ia64/sn/io/l1.c
@@ -0,0 +1,2974 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+/* In general, this file is organized in a hierarchy from lower-level
+ * to higher-level layers, as follows:
+ *
+ *	UART routines
+ *	Bedrock/L1 "PPP-like" protocol implementation
+ *	System controller "message" interface (allows multiplexing
+ *		of various kinds of requests and responses with
+ *		console I/O)
+ *	Console interfaces (there are two):
+ *	  (1) "elscuart", used in the IP35prom and (maybe) some
+ *		debugging situations elsewhere, and
+ *	  (2) "l1_cons", the glue that allows the L1 to act
+ *		as the system console for the stdio libraries
+ *
+ * Routines making use of the system controller "message"-style interface
+ * can be found in l1_command.c.  Their names are leftover from early SN0, 
+ * when the "module system controller" (msc) was known as the "entry level
+ * system controller" (elsc).  The names and signatures of those functions 
+ * remain unchanged in order to keep the SN0 -> SN1 system controller
+ * changes fairly localized.
+ */
+
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/eeprom.h>
+#include <asm/sn/ksys/i2c.h>
+#include <asm/sn/cmn_err.h>
+#include <asm/sn/router.h>
+#include <asm/sn/module.h>
+#include <asm/sn/ksys/l1.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/clksupport.h>
+
+#include <asm/sn/sn1/uart16550.h>
+
+
+#if defined(EEPROM_DEBUG)
+#define db_printf(x) printk x
+#else
+#define db_printf(x)
+#endif
+
+// From irix/kern/sys/SN/SN1/bdrkhspecregs.h
+#define    HSPEC_UART_0              0x00000080    /* UART Registers         */
+
+/*********************************************************************
+ * Hardware-level (UART) driver routines.
+ */
+
+/* macros for reading/writing registers */
+
+#define LD(x)		(*(volatile uint64_t *)(x))
+#define SD(x, v)        (LD(x) = (uint64_t) (v))
+
+/* location of uart receive/xmit data register */
+#define L1_UART_BASE(n)	((ulong)REMOTE_HSPEC_ADDR((n), HSPEC_UART_0))
+#define LOCAL_HUB	LOCAL_HUB_ADDR
+
+#define ADDR_L1_REG(n, r)	\
+    (L1_UART_BASE(n) | ( (r) << 3 ))
+
+#define READ_L1_UART_REG(n, r) \
+    ( LD(ADDR_L1_REG((n), (r))) )
+
+#define WRITE_L1_UART_REG(n, r, v) \
+    ( SD(ADDR_L1_REG((n), (r)), (v)) )
+
+
+/* Avoid conflicts with symmon...*/
+#define CONS_HW_LOCK(x)
+#define CONS_HW_UNLOCK(x)
+
+#define L1_CONS_HW_LOCK(sc)	CONS_HW_LOCK(sc->uart == BRL1_LOCALUART)
+#define L1_CONS_HW_UNLOCK(sc)	CONS_HW_UNLOCK(sc->uart == BRL1_LOCALUART)
+
+#if DEBUG
+static int debuglock_ospl; /* For CONS_HW_LOCK macro */
+#endif
+
+/* UART-related #defines */
+
+#define UART_BAUD_RATE		57600
+#define UART_FIFO_DEPTH		16
+#define UART_DELAY_SPAN		10
+#define UART_PUTC_TIMEOUT	50000
+#define UART_INIT_TIMEOUT	100000
+
+/* error codes */
+#define UART_SUCCESS		  0
+#define UART_TIMEOUT		(-1)
+#define UART_LINK		(-2)
+#define UART_NO_CHAR		(-3)
+#define UART_VECTOR		(-4)
+
+#ifdef BRINGUP
+#define UART_DELAY(x)	{ int i; i = x * 1000; while (--i); }
+#else
+#define UART_DELAY(x)	us_delay(x)
+#endif
+
+/*
+ *	Some macros for handling Endian-ness
+ */
+
+#ifdef	LITTLE_ENDIAN
+#define COPY_INT_TO_BUFFER(_b, _i, _n)		\
+	{					\
+		_b[_i++] = (_n >> 24) & 0xff;	\
+		_b[_i++] = (_n >> 16) & 0xff;	\
+		_b[_i++] = (_n >>  8) & 0xff;	\
+		_b[_i++] =  _n        & 0xff;	\
+	}
+
+#define COPY_BUFFER_TO_INT(_b, _i, _n)		\
+	{					\
+		_n  = (_b[_i++] << 24) & 0xff;	\
+		_n |= (_b[_i++] << 16) & 0xff;	\
+		_n |= (_b[_i++] <<  8) & 0xff;	\
+		_n |=  _b[_i++]        & 0xff;	\
+	}
+
+#define COPY_BUFFER_TO_BUFFER(_b, _i, _bn)	\
+	{					\
+	    char *_xyz = (char *)_bn;		\
+	    _xyz[3] = _b[_i++];			\
+	    _xyz[2] = _b[_i++];			\
+	    _xyz[1] = _b[_i++];			\
+	    _xyz[0] = _b[_i++];			\
+	}
+#else	/* BIG_ENDIAN */
+#define COPY_INT_TO_BUFFER(_b, _i, _n)			\
+	{						\
+		bcopy((char *)&_n, _b, sizeof(_n));	\
+		_i += sizeof(_n);			\
+	}
+
+#define COPY_BUFFER_TO_INT(_b, _i, _n)			\
+	{						\
+		bcopy(&_b[_i], &_n, sizeof(_n));	\
+		_i += sizeof(_n);			\
+	}
+
+#define COPY_BUFFER_TO_BUFFER(_b, _i, _bn)		\
+	{						\
+            bcopy(&(_b[_i]), _bn, sizeof(int));		\
+            _i += sizeof(int);				\
+	}
+#endif	/* LITTLE_ENDIAN */
+
+int atomicAddInt(int *int_ptr, int value);
+int atomicClearInt(int *int_ptr, int value);
+void kmem_free(void *where, int size);
+
+#define BCOPY(x,y,z)	memcpy(y,x,z)
+
+extern char *bcopy(const char * src, char * dest, int count);
+
+
+int 
+get_L1_baud(void)
+{
+    return UART_BAUD_RATE;
+}
+
+
+/* uart driver functions */
+
+static void
+uart_delay( rtc_time_t delay_span )
+{
+    UART_DELAY( delay_span );
+}
+
+#define UART_PUTC_READY(n)	(READ_L1_UART_REG((n), REG_LSR) & LSR_XHRE)
+
+static int
+uart_putc( l1sc_t *sc ) 
+{
+#ifdef BRINGUP
+    /* need a delay to avoid dropping chars */
+    UART_DELAY(57);
+#endif
+    WRITE_L1_UART_REG( sc->nasid, REG_DAT,
+		       sc->send[sc->sent] );
+    return UART_SUCCESS;
+}
+
+
+static int
+uart_getc( l1sc_t *sc )
+{
+    u_char lsr_reg = 0;
+    nasid_t nasid = sc->nasid;
+
+    if( (lsr_reg = READ_L1_UART_REG( nasid, REG_LSR )) & 
+	(LSR_RCA | LSR_PARERR | LSR_FRMERR) ) 
+    {
+	if( lsr_reg & LSR_RCA ) 
+	    return( (u_char)READ_L1_UART_REG( nasid, REG_DAT ) );
+	else if( lsr_reg & (LSR_PARERR | LSR_FRMERR) ) {
+	    return UART_LINK;
+	}
+    }
+
+    return UART_NO_CHAR;
+}
+
+
+#define PROM_SER_CLK_SPEED	12000000
+#define PROM_SER_DIVISOR(x)	(PROM_SER_CLK_SPEED / ((x) * 16))
+
+static void
+uart_init( l1sc_t *sc, int baud )
+{
+    rtc_time_t expire;
+    int clkdiv;
+    nasid_t nasid;
+
+    clkdiv = PROM_SER_DIVISOR(baud);
+    expire = rtc_time() + UART_INIT_TIMEOUT;
+    nasid = sc->nasid;
+    
+    /* make sure the transmit FIFO is empty */
+    while( !(READ_L1_UART_REG( nasid, REG_LSR ) & LSR_XSRE) ) {
+	uart_delay( UART_DELAY_SPAN );
+	if( rtc_time() > expire ) {
+	    break;
+	}
+    }
+
+    L1_CONS_HW_LOCK( sc );
+
+    WRITE_L1_UART_REG( nasid, REG_LCR, LCR_DLAB );
+	uart_delay( UART_DELAY_SPAN );
+    WRITE_L1_UART_REG( nasid, REG_DLH, (clkdiv >> 8) & 0xff );
+	uart_delay( UART_DELAY_SPAN );
+    WRITE_L1_UART_REG( nasid, REG_DLL, clkdiv & 0xff );
+	uart_delay( UART_DELAY_SPAN );
+
+    /* set operating parameters and set DLAB to 0 */
+    WRITE_L1_UART_REG( nasid, REG_LCR, LCR_BITS8 | LCR_STOP1 );
+	uart_delay( UART_DELAY_SPAN );
+    WRITE_L1_UART_REG( nasid, REG_MCR, MCR_RTS | MCR_AFE );
+	uart_delay( UART_DELAY_SPAN );
+
+    /* disable interrupts */
+    WRITE_L1_UART_REG( nasid, REG_ICR, 0x0 );
+	uart_delay( UART_DELAY_SPAN );
+
+    /* enable FIFO mode and reset both FIFOs */
+    WRITE_L1_UART_REG( nasid, REG_FCR, FCR_FIFOEN );
+	uart_delay( UART_DELAY_SPAN );
+    WRITE_L1_UART_REG( nasid, REG_FCR,
+	FCR_FIFOEN | FCR_RxFIFO | FCR_TxFIFO );
+
+    L1_CONS_HW_UNLOCK( sc );
+}
+
+static void
+uart_intr_enable( l1sc_t *sc, u_char mask )
+{
+    u_char lcr_reg, icr_reg;
+    nasid_t nasid = sc->nasid;
+
+    L1_CONS_HW_LOCK(sc);
+
+    /* make sure that the DLAB bit in the LCR register is 0
+     */
+    lcr_reg = READ_L1_UART_REG( nasid, REG_LCR );
+    lcr_reg &= ~(LCR_DLAB);
+    WRITE_L1_UART_REG( nasid, REG_LCR, lcr_reg );
+
+    /* enable indicated interrupts
+     */
+    icr_reg = READ_L1_UART_REG( nasid, REG_ICR );
+    icr_reg |= mask;
+    WRITE_L1_UART_REG( nasid, REG_ICR, icr_reg /*(ICR_RIEN | ICR_TIEN)*/ );
+
+    L1_CONS_HW_UNLOCK(sc);
+}
+
+static void
+uart_intr_disable( l1sc_t *sc, u_char mask )
+{
+    u_char lcr_reg, icr_reg;
+    nasid_t nasid = sc->nasid;
+
+    L1_CONS_HW_LOCK(sc);
+
+    /* make sure that the DLAB bit in the LCR register is 0
+     */
+    lcr_reg = READ_L1_UART_REG( nasid, REG_LCR );
+    lcr_reg &= ~(LCR_DLAB);
+    WRITE_L1_UART_REG( nasid, REG_LCR, lcr_reg );
+
+    /* enable indicated interrupts
+     */
+    icr_reg = READ_L1_UART_REG( nasid, REG_ICR );
+    icr_reg &= mask;
+    WRITE_L1_UART_REG( nasid, REG_ICR, icr_reg /*(ICR_RIEN | ICR_TIEN)*/ );
+
+    L1_CONS_HW_UNLOCK(sc);
+}
+
+#define uart_enable_xmit_intr(sc) \
+	uart_intr_enable((sc), ICR_TIEN)
+
+#define uart_disable_xmit_intr(sc) \
+        uart_intr_disable((sc), ~(ICR_TIEN))
+
+#define uart_enable_recv_intr(sc) \
+        uart_intr_enable((sc), ICR_RIEN)
+
+#define uart_disable_recv_intr(sc) \
+        uart_intr_disable((sc), ~(ICR_RIEN))
+
+
+/*********************************************************************
+ * Routines for accessing a remote (router) UART
+ */
+
+#define READ_RTR_L1_UART_REG(p, n, r, v)		\
+    {							\
+	if( vector_read_node( (p), (n), 0,		\
+			      RR_JBUS1(r), (v) ) ) {	\
+	    return UART_VECTOR;				\
+	}						\
+    }
+
+#define WRITE_RTR_L1_UART_REG(p, n, r, v)		\
+    {							\
+	if( vector_write_node( (p), (n), 0,		\
+			       RR_JBUS1(r), (v) ) ) {	\
+	    return UART_VECTOR;				\
+	}						\
+    }
+
+#ifdef SABLE
+#define RTR_UART_PUTC_TIMEOUT	0
+#define RTR_UART_DELAY_SPAN	0
+#define RTR_UART_INIT_TIMEOUT	0
+#else
+#define RTR_UART_PUTC_TIMEOUT	UART_PUTC_TIMEOUT*10
+#define RTR_UART_DELAY_SPAN	UART_DELAY_SPAN
+#define RTR_UART_INIT_TIMEOUT	UART_INIT_TIMEOUT*10
+#endif
+
+static int
+rtr_uart_putc( l1sc_t *sc )
+{
+    uint64_t regval, c;
+    nasid_t nasid = sc->nasid;
+    net_vec_t path = sc->uart;
+    rtc_time_t expire = rtc_time() + RTR_UART_PUTC_TIMEOUT;
+    
+    c = (sc->send[sc->sent] & 0xffULL);
+    
+    while( 1 ) 
+    {
+        /* Check for "tx hold reg empty" bit. */
+	READ_RTR_L1_UART_REG( path, nasid, REG_LSR, &regval );
+	if( regval & LSR_XHRE )
+	{
+	    WRITE_RTR_L1_UART_REG( path, nasid, REG_DAT, c );
+	    return UART_SUCCESS;
+	}
+
+	if( rtc_time() >= expire ) 
+	{
+	    return UART_TIMEOUT;
+	}
+	uart_delay( RTR_UART_DELAY_SPAN );
+    }
+}
+
+
+static int
+rtr_uart_getc( l1sc_t *sc )
+{
+    uint64_t regval;
+    nasid_t nasid = sc->nasid;
+    net_vec_t path = sc->uart;
+
+    READ_RTR_L1_UART_REG( path, nasid, REG_LSR, &regval );
+    if( regval & (LSR_RCA | LSR_PARERR | LSR_FRMERR) )
+    {
+	if( regval & LSR_RCA )
+	{
+	    READ_RTR_L1_UART_REG( path, nasid, REG_DAT, &regval );
+	    return( (int)regval );
+	}
+	else
+	{
+	    return UART_LINK;
+	}
+    }
+
+    return UART_NO_CHAR;
+}
+
+
+static int
+rtr_uart_init( l1sc_t *sc, int baud )
+{
+    rtc_time_t expire;
+    int clkdiv;
+    nasid_t nasid;
+    net_vec_t path;
+    uint64_t regval;
+
+    clkdiv = PROM_SER_DIVISOR(baud);
+    expire = rtc_time() + RTR_UART_INIT_TIMEOUT;
+    nasid = sc->nasid;
+    path = sc->uart;
+
+    /* make sure the transmit FIFO is empty */
+    while(1) {
+	READ_RTR_L1_UART_REG( path, nasid, REG_LSR, &regval );
+	if( regval & LSR_XSRE ) {
+	    break;
+	}
+	if( rtc_time() > expire ) {
+	    break;
+	}
+	uart_delay( RTR_UART_DELAY_SPAN );
+    }
+
+    WRITE_RTR_L1_UART_REG( path, nasid, REG_LCR, LCR_DLAB  );
+	uart_delay( UART_DELAY_SPAN );
+    WRITE_RTR_L1_UART_REG( path, nasid, REG_DLH, (clkdiv >> 8) & 0xff  );
+	uart_delay( UART_DELAY_SPAN );
+    WRITE_RTR_L1_UART_REG( path, nasid, REG_DLL, clkdiv & 0xff  );
+	uart_delay( UART_DELAY_SPAN );
+
+    /* set operating parameters and set DLAB to 0 */
+    WRITE_RTR_L1_UART_REG( path, nasid, REG_LCR, LCR_BITS8 | LCR_STOP1  );
+	uart_delay( UART_DELAY_SPAN );
+    WRITE_RTR_L1_UART_REG( path, nasid, REG_MCR, MCR_RTS | MCR_AFE  );
+	uart_delay( UART_DELAY_SPAN );
+
+    /* disable interrupts */
+    WRITE_RTR_L1_UART_REG( path, nasid, REG_ICR, 0x0  );
+	uart_delay( UART_DELAY_SPAN );
+
+    /* enable FIFO mode and reset both FIFOs */
+    WRITE_RTR_L1_UART_REG( path, nasid, REG_FCR, FCR_FIFOEN  );
+	uart_delay( UART_DELAY_SPAN );
+    WRITE_RTR_L1_UART_REG( path, nasid, REG_FCR,
+	FCR_FIFOEN | FCR_RxFIFO | FCR_TxFIFO );
+
+    return 0;
+}
+    
+	
+
+/*********************************************************************
+ * locking macros 
+ */
+
+#define L1SC_SEND_LOCK(l,pl)						\
+     { if( (l)->uart == BRL1_LOCALUART )				\
+	 (pl) = mutex_spinlock_spl( &((l)->send_lock), spl7 ); }
+
+#define L1SC_SEND_UNLOCK(l,pl)				\
+     { if( (l)->uart == BRL1_LOCALUART )		\
+	 mutex_spinunlock( &((l)->send_lock), (pl)); }
+
+#define L1SC_RECV_LOCK(l,pl)						\
+     { if( (l)->uart == BRL1_LOCALUART )				\
+	 (pl) = mutex_spinlock_spl( &((l)->recv_lock), spl7 ); }
+
+#define L1SC_RECV_UNLOCK(l,pl)				\
+     { if( (l)->uart == BRL1_LOCALUART )		\
+	 mutex_spinunlock( &((l)->recv_lock), (pl)); }
+
+
+/*********************************************************************
+ * subchannel manipulation 
+ *
+ * The SUBCH_[UN]LOCK macros are used to arbitrate subchannel
+ * allocation.  SUBCH_DATA_[UN]LOCK control access to data structures
+ * associated with particular subchannels (e.g., receive queues).
+ *
+ */
+
+
+#ifdef SPINLOCKS_WORK
+#define SUBCH_LOCK(sc,pl) \
+     (pl) = mutex_spinlock_spl( &((sc)->subch_lock), spl7 )
+#define SUBCH_UNLOCK(sc,pl) \
+     mutex_spinunlock( &((sc)->subch_lock), (pl) )
+
+#define SUBCH_DATA_LOCK(sbch,pl) \
+     (pl) = mutex_spinlock_spl( &((sbch)->data_lock), spl7 )
+#define SUBCH_DATA_UNLOCK(sbch,pl) \
+     mutex_spinunlock( &((sbch)->data_lock), (pl) )
+#else
+#define SUBCH_LOCK(sc,pl) 
+#define SUBCH_UNLOCK(sc,pl)
+#define SUBCH_DATA_LOCK(sbch,pl)
+#define SUBCH_DATA_UNLOCK(sbch,pl)
+#endif	/* SPINLOCKS_WORK */
+
+/*
+ * set a function to be called for subchannel ch in the event of
+ * a transmission low-water interrupt from the uart
+ */
+void
+subch_set_tx_notify( l1sc_t *sc, int ch, brl1_notif_t func )
+{
+    int pl;
+    L1SC_SEND_LOCK( sc, pl );
+    sc->subch[ch].tx_notify = func;
+    
+    /* some upper layer is asking to be notified of low-water, but if the 
+     * send buffer isn't already in use, we're going to need to get the
+     * interrupts going on the uart...
+     */
+    if( func && !sc->send_in_use )
+	uart_enable_xmit_intr( sc );
+    L1SC_SEND_UNLOCK(sc, pl );
+}
+
+/*
+ * set a function to be called for subchannel ch when data is received
+ */
+void
+subch_set_rx_notify( l1sc_t *sc, int ch, brl1_notif_t func )
+{
+#ifdef SPINLOCKS_WORK
+    int pl;
+#endif
+    brl1_sch_t *subch = &(sc->subch[ch]);
+
+    SUBCH_DATA_LOCK( subch, pl );
+    sc->subch[ch].rx_notify = func;
+    SUBCH_DATA_UNLOCK( subch, pl );
+}
+
+
+
+/* get_myid is an internal function that reads the PI_CPU_NUM
+ * register of the local bedrock to determine which of the
+ * four possible CPU's "this" one is
+ */
+static int
+get_myid( void )
+{
+    return( LD(LOCAL_HUB(PI_CPU_NUM)) );
+}
+
+
+
+/*********************************************************************
+ * Queue manipulation macros
+ *
+ *
+ */
+#define NEXT(p)         (((p) + 1) & (BRL1_QSIZE-1)) /* assume power of 2 */
+
+#define cq_init(q)      bzero((q), sizeof (*(q)))
+#define cq_empty(q)     ((q)->ipos == (q)->opos)
+#define cq_full(q)      (NEXT((q)->ipos) == (q)->opos)
+#define cq_used(q)      ((q)->opos <= (q)->ipos ?                       \
+                         (q)->ipos - (q)->opos :                        \
+                         BRL1_QSIZE + (q)->ipos - (q)->opos)
+#define cq_room(q)      ((q)->opos <= (q)->ipos ?                       \
+                         BRL1_QSIZE - 1 + (q)->opos - (q)->ipos :       \
+                         (q)->opos - (q)->ipos - 1)
+#define cq_add(q, c)    ((q)->buf[(q)->ipos] = (u_char) (c),            \
+                         (q)->ipos = NEXT((q)->ipos))
+#define cq_rem(q, c)    ((c) = (q)->buf[(q)->opos],                     \
+                         (q)->opos = NEXT((q)->opos))
+#define cq_discard(q)	((q)->opos = NEXT((q)->opos))
+
+#define cq_tent_full(q)	(NEXT((q)->tent_next) == (q)->opos)
+#define cq_tent_len(q)	((q)->ipos <= (q)->tent_next ?			\
+			 (q)->tent_next - (q)->ipos :			\
+			 BRL1_QSIZE + (q)->tent_next - (q)->ipos)
+#define cq_tent_add(q, c)						\
+			((q)->buf[(q)->tent_next] = (u_char) (c),	\
+			 (q)->tent_next = NEXT((q)->tent_next))
+#define cq_commit_tent(q)						\
+			((q)->ipos = (q)->tent_next)
+#define cq_discard_tent(q)						\
+			((q)->tent_next = (q)->ipos)
+
+
+
+
+/*********************************************************************
+ * CRC-16 (for checking bedrock/L1 packets).
+ *
+ * These are based on RFC 1662 ("PPP in HDLC-like framing").
+ */
+
+static unsigned short fcstab[256] = {
+      0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+      0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+      0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+      0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+      0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+      0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+      0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+      0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+      0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+      0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+      0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+      0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+      0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+      0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+      0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+      0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+      0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+      0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+      0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+      0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+      0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+      0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+      0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+      0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+      0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+      0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+      0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+      0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+      0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+      0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+      0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+      0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+
+#define INIT_CRC	0xFFFF	/* initial CRC value	  */
+#define	GOOD_CRC	0xF0B8	/* "good" final CRC value */
+
+static unsigned short crc16_calc( unsigned short crc, u_char c )
+{
+    return( (crc >> 8) ^ fcstab[(crc ^ c) & 0xff] );
+}
+
+
+/***********************************************************************
+ * The following functions implement the PPP-like bedrock/L1 protocol
+ * layer.
+ *
+ */
+
+#define BRL1_FLAG_CH	0x7e
+#define BRL1_ESC_CH	0x7d
+#define BRL1_XOR_CH	0x20
+
+/* L1<->Bedrock packet types */
+#define BRL1_REQUEST    0x00
+#define BRL1_RESPONSE   0x20
+#define BRL1_EVENT      0x40
+
+#define BRL1_PKT_TYPE_MASK      0xE0
+#define BRL1_SUBCH_MASK         0x1F
+
+#define PKT_TYPE(tsb)   ((tsb) & BRL1_PKT_TYPE_MASK)
+#define SUBCH(tsb)	((tsb) & BRL1_SUBCH_MASK)
+
+/* timeouts */
+#define BRL1_INIT_TIMEOUT	500000
+
+extern l1sc_t * get_elsc( void );
+
+/*
+ * brl1_discard_packet is a dummy "receive callback" used to get rid
+ * of packets we don't want
+ */
+void brl1_discard_packet( l1sc_t *sc, int ch )
+{
+    int pl;
+    brl1_sch_t *subch = &sc->subch[ch];
+    sc_cq_t *q = subch->iqp;
+    SUBCH_DATA_LOCK( subch, pl );
+    q->opos = q->ipos;
+    atomicClearInt( &(subch->packet_arrived), ~((unsigned)0) );
+    SUBCH_DATA_UNLOCK( subch, pl );
+}
+
+
+/*
+ * brl1_send_chars sends the send buffer in the l1sc_t structure
+ * out through the uart.  Assumes that the caller has locked the
+ * UART (or send buffer in the kernel).
+ *
+ * This routine doesn't block-- if you want it to, call it in
+ * a loop.
+ */
+static int
+brl1_send_chars( l1sc_t *sc )
+{
+    /* In the kernel, we track the depth of the C brick's UART's
+     * fifo in software, and only check if the UART is accepting
+     * characters when our count indicates that the fifo should
+     * be full.
+     *
+     * For remote (router) UARTs, and also for the local (C brick)
+     * UART in the prom, we check with the UART before sending every
+     * character.
+     */
+    if( sc->uart == BRL1_LOCALUART ) 
+    {
+	CONS_HW_LOCK(1);
+	if( !(sc->fifo_space) && UART_PUTC_READY( sc->nasid ) )
+//	    sc->fifo_space = UART_FIFO_DEPTH;
+	    sc->fifo_space = 1000;
+	
+	while( (sc->sent < sc->send_len) && (sc->fifo_space) ) {
+	    uart_putc( sc );
+	    sc->fifo_space--;
+	    sc->sent++;
+	}
+
+	CONS_HW_UNLOCK(1);
+    }
+
+    else
+
+    /* The following applies to all UARTs in the prom, and to remote
+     * (router) UARTs in the kernel...
+     */
+
+#define TIMEOUT_RETRIES	30
+
+    {
+	int result;
+	int tries = 0;
+
+	while( sc->sent < sc->send_len ) {
+	    result = sc->putc_f( sc );
+	    if( result >= 0 ) {
+		(sc->sent)++;
+		continue;
+	    }
+	    if( result == UART_TIMEOUT ) {
+		tries++;
+		/* send this character in TIMEOUT_RETRIES... */
+		if( tries < TIMEOUT_RETRIES ) {
+		    continue;
+		}
+		/* ...or else... */
+		else {
+		    /* ...drop the packet. */
+		    sc->sent = sc->send_len;
+		    return sc->send_len;
+		}
+	    }
+	    if( result < 0 ) {
+		return result;
+	    }
+	}
+    }
+    
+    return sc->sent;
+}
+
+
+/* brl1_send formats up a packet and (at least begins to) send it
+ * to the uart.  If the send buffer is in use when this routine obtains
+ * the lock, it will behave differently depending on the "wait" parameter.
+ * For wait == 0 (most I/O), it will return 0 (as in "zero bytes sent"),
+ * hopefully encouraging the caller to back off (unlock any high-level 
+ * spinlocks) and allow the buffer some time to drain.  For wait==1 (high-
+ * priority I/O along the lines of kernel error messages), we will flush
+ * the current contents of the send buffer and beat on the uart
+ * until our message has been completely transmitted.
+ */
+
+int
+brl1_send( l1sc_t *sc, char *msg, int len, u_char type_and_subch, int wait )
+{
+    int pl;
+    int index;
+    int pkt_len = 0;
+    unsigned short crc = INIT_CRC;
+    char *send_ptr = sc->send;
+
+    L1SC_SEND_LOCK(sc, pl);
+
+    if( sc->send_in_use ) {
+	if( !wait ) {
+	    L1SC_SEND_UNLOCK(sc, pl);
+	    return 0; /* couldn't send anything; wait for buffer to drain */
+	}
+	else {
+	    /* buffer's in use, but we're synchronous I/O, so we're going
+	     * to send whatever's in there right now and take the buffer
+	     */
+	    while( sc->sent < sc->send_len )
+		brl1_send_chars( sc );
+	}
+    }
+    else {
+	sc->send_in_use = 1;
+    }
+    *send_ptr++ = BRL1_FLAG_CH;
+    *send_ptr++ = type_and_subch;
+    pkt_len += 2;
+    crc = crc16_calc( crc, type_and_subch );
+
+    /* limit number of characters accepted to max payload size */
+    if( len > (BRL1_QSIZE - 1) )
+	len = (BRL1_QSIZE - 1);
+
+    /* copy in the message buffer (inserting PPP 
+     * framing info where necessary)
+     */
+    for( index = 0; index < len; index++ ) {
+
+	switch( *msg ) {
+	    
+	  case BRL1_FLAG_CH:
+	    *send_ptr++ = BRL1_ESC_CH;
+	    *send_ptr++ = (*msg) ^ BRL1_XOR_CH;
+	    pkt_len += 2;
+	    break;
+	    
+	  case BRL1_ESC_CH:
+	    *send_ptr++ = BRL1_ESC_CH;
+	    *send_ptr++ = (*msg) ^ BRL1_XOR_CH;
+	    pkt_len += 2;
+	    break;
+	    
+	  default:
+	    *send_ptr++ = *msg;
+	    pkt_len++;
+	}
+	crc = crc16_calc( crc, *msg );
+	msg++;
+    }
+    crc ^= 0xffff;
+
+    for( index = 0; index < sizeof(crc); index++ ) {
+	char crc_char = (char)(crc & 0x00FF);
+	if( (crc_char == BRL1_ESC_CH) || (crc_char == BRL1_FLAG_CH) ) {
+	    *send_ptr++ = BRL1_ESC_CH;
+	    pkt_len++;
+	    crc_char ^= BRL1_XOR_CH;
+	}
+	*send_ptr++ = crc_char;
+	pkt_len++;
+	crc >>= 8;
+    }
+    
+    *send_ptr++ = BRL1_FLAG_CH;
+    pkt_len++;
+
+    sc->send_len = pkt_len;
+    sc->sent = 0;
+
+    do {
+	brl1_send_chars( sc );
+    } while( (sc->sent < sc->send_len) && wait );
+
+    if( sc->sent == sc->send_len ) {
+	/* success! release the send buffer */
+	sc->send_in_use = 0;
+    }
+    else if( !wait ) {
+	/* enable low-water interrupts so buffer will be drained */
+	uart_enable_xmit_intr(sc);
+    }
+    L1SC_SEND_UNLOCK(sc, pl);
+    return len;
+}
+
+
+/* brl1_send_cont is intended to be called as an interrupt service
+ * routine.  It sends until the UART won't accept any more characters,
+ * or until an error is encountered (in which case we surrender the
+ * send buffer and give up trying to send the packet).  Once the
+ * last character in the packet has been sent, this routine releases
+ * the send buffer and calls any previously-registered "low-water"
+ * output routines.
+ */
+int
+brl1_send_cont( l1sc_t *sc )
+{
+    int pl;
+    int done = 0;
+    brl1_notif_t callups[BRL1_NUM_SUBCHANS];
+    brl1_notif_t *callup;
+    brl1_sch_t *subch;
+    int index;
+
+    L1SC_SEND_LOCK(sc, pl);
+    brl1_send_chars( sc );
+    done = (sc->sent == sc->send_len);
+    if( done ) {
+
+	sc->send_in_use = 0;
+	uart_disable_xmit_intr(sc);
+
+	/* collect pointers to callups *before* unlocking */
+	subch = sc->subch;
+	callup = callups;
+	for( index = 0; index < BRL1_NUM_SUBCHANS; index++ ) {
+	    *callup = subch->tx_notify;
+	    subch++;
+	    callup++;
+	}
+    }
+    L1SC_SEND_UNLOCK(sc, pl);
+
+    if( done ) {
+	/* call any upper layer that's asked for low-water notification */
+	callup = callups;
+	for( index = 0; index < BRL1_NUM_SUBCHANS; index++ ) {
+	    if( *callup )
+		(*(*callup))( sc, index );
+	    callup++;
+	}
+    }
+    return 0;
+}
+
+
+/* internal function -- used by brl1_receive to read a character 
+ * from the uart and check whether errors occurred in the process.
+ */
+static int
+read_uart( l1sc_t *sc, int *c, int *result )
+{
+    *c = sc->getc_f( sc );
+
+    /* no character is available */
+    if( *c == UART_NO_CHAR ) {
+	*result = BRL1_NO_MESSAGE;
+	return 0;
+    }
+
+    /* some error in UART */
+    if( *c < 0 ) {
+	*result = BRL1_LINK;
+	return 0;
+    }
+
+    /* everything's fine */
+    *result = BRL1_VALID;
+    return 1;
+}
+
+
+/*
+ * brl1_receive
+ *
+ * This function reads a Bedrock-L1 protocol packet into the l1sc_t
+ * response buffer.
+ *
+ * The operation of this function can be expressed as a finite state
+ * machine:
+ *
+
+START STATE			INPUT		TRANSITION
+==========================================================
+BRL1_IDLE (reset or error)	flag		BRL1_FLAG
+				other		BRL1_IDLE@
+
+BRL1_FLAG (saw a flag (0x7e))	flag		BRL1_FLAG
+				escape		BRL1_IDLE@
+				header byte	BRL1_HDR
+				other		BRL1_IDLE@
+
+BRL1_HDR (saw a type/subch byte)(see below)	BRL1_BODY
+						BRL1_HDR
+
+BRL1_BODY (reading packet body)	flag		BRL1_FLAG
+				escape		BRL1_ESC
+				other		BRL1_BODY
+
+BRL1_ESC (saw an escape (0x7d))	flag		BRL1_FLAG@
+				escape		BRL1_IDLE@
+				other		BRL1_BODY
+==========================================================
+
+"@" denotes an error transition.
+
+ * The BRL1_HDR state is a transient state which doesn't read input,
+ * but just provides a way in to code which decides to whom an
+ * incoming packet should be directed.
+ *
+ * brl1_receive can be used to poll for input from the L1, or as 
+ * an interrupt service routine.  It reads as much data as is
+ * ready from the junk bus UART and places into the appropriate
+ * input queues according to subchannel.  The header byte is
+ * stripped from console-type data, but is retained for message-
+ * type data (L1 responses).  A length byte will also be
+ * prepended to message-type packets.
+ *
+ * This routine is non-blocking; if the caller needs to block
+ * for input, it must call brl1_receive in a loop.
+ *
+ * brl1_receive returns when there is no more input, the queue
+ * for the current incoming message is full, or there is an
+ * error (parity error, bad header, bad CRC, etc.).
+ */
+
+#define STATE_SET(l,s)	((l)->brl1_state = (s))
+#define STATE_GET(l)	((l)->brl1_state)
+
+#define LAST_HDR_SET(l,h)	((l)->brl1_last_hdr = (h))
+#define LAST_HDR_GET(l)		((l)->brl1_last_hdr)
+
+#define SEQSTAMP_INCR(l)
+#define SEQSTAMP_GET(l)
+
+#define VALID_HDR(c)				\
+    ( SUBCH((c)) <= SC_CONS_SYSTEM		\
+	? PKT_TYPE((c)) == BRL1_REQUEST		\
+	: ( PKT_TYPE((c)) == BRL1_RESPONSE ||	\
+	    PKT_TYPE((c)) == BRL1_EVENT ) )
+
+#define IS_TTY_PKT(l) \
+         ( SUBCH(LAST_HDR_GET(l)) <= SC_CONS_SYSTEM ? 1 : 0 )
+
+
+int
+brl1_receive( l1sc_t *sc )
+{
+    int result;		/* value to be returned by brl1_receive */
+    int c;		/* most-recently-read character	     	*/
+    int pl;		/* priority level for UART receive lock */
+    int done;		/* set done to break out of recv loop	*/
+    sc_cq_t *q;		/* pointer to queue we're working with	*/
+
+    result = BRL1_NO_MESSAGE;
+
+    L1SC_RECV_LOCK( sc, pl );
+    L1_CONS_HW_LOCK( sc );
+
+    done = 0;
+    while( !done )
+    {
+	switch( STATE_GET(sc) )
+	{
+
+	  case BRL1_IDLE:
+	    /* Initial or error state.  Waiting for a flag character
+             * to resynchronize with the L1.
+             */
+
+	    if( !read_uart( sc, &c, &result ) ) {
+
+		/* error reading uart */
+		done = 1;
+		continue;
+	    }
+	    
+	    if( c == BRL1_FLAG_CH ) {
+		/* saw a flag character */
+		STATE_SET( sc, BRL1_FLAG );
+		continue;
+	    }
+	    break;
+	    
+	  case BRL1_FLAG:
+	    /* One or more flag characters have been read; look for
+	     * the beginning of a packet (header byte).
+	     */
+	    
+	    if( !read_uart( sc, &c, &result ) ) {
+
+		/* error reading uart */
+		if( c != UART_NO_CHAR )
+		    STATE_SET( sc, BRL1_IDLE );
+
+		done = 1;
+		continue;
+	    }
+	    
+	    if( c == BRL1_FLAG_CH ) {
+		/* multiple flags are OK */
+		continue;
+	    }
+
+	    if( !VALID_HDR( c ) ) {
+		/* if c isn't a flag it should have been
+		 * a valid header, so we have an error
+		 */
+		result = BRL1_PROTOCOL;
+		STATE_SET( sc, BRL1_IDLE );
+		done = 1;
+		continue;
+	    }
+
+	    /* we have a valid header byte */
+	    LAST_HDR_SET( sc, c );
+	    STATE_SET( sc, BRL1_HDR );
+
+	    break; 
+
+	  case BRL1_HDR:
+	    /* A header byte has been read. Do some bookkeeping. */
+	    q = sc->subch[ SUBCH( LAST_HDR_GET(sc) ) ].iqp;
+	    ASSERT(q);
+	    
+	    if( !IS_TTY_PKT(sc) ) {
+		/* if this is an event or command response rather
+		 * than console I/O, we need to reserve a couple
+		 * of extra spaces in the queue for the header
+		 * byte and a length byte; if we can't, stay in
+		 * the BRL1_HDR state.
+		 */
+		if( cq_room( q ) < 2 ) {
+		    result = BRL1_FULL_Q;
+		    done = 1;
+		    continue;
+		}
+		cq_tent_add( q, 0 );			/* reserve length byte */
+		cq_tent_add( q, LAST_HDR_GET( sc ) );	/* record header byte  */
+	    }
+	    STATE_SET( sc, BRL1_BODY );
+
+	    break;
+
+	  case BRL1_BODY:
+	    /* A header byte has been read.  We are now attempting
+	     * to receive the packet body.
+	     */
+
+	    q = sc->subch[ SUBCH( LAST_HDR_GET(sc) ) ].iqp;
+	    ASSERT(q);
+
+	    /* if the queue we want to write into is full, don't read from
+	     * the uart (this provides backpressure to the L1 side)
+	     */
+	    if( cq_tent_full( q ) ) {
+		result = BRL1_FULL_Q;
+		done = 1;
+		continue;
+	    }
+	    
+	    if( !read_uart( sc, &c, &result ) ) {
+
+		/* error reading uart */
+		if( c != UART_NO_CHAR )
+		    STATE_SET( sc, BRL1_IDLE );
+		done = 1;
+		continue;
+	    }
+
+	    if( c == BRL1_ESC_CH ) {
+		/* prepare to unescape the next character */
+		STATE_SET( sc, BRL1_ESC );
+		continue;
+	    }
+	    
+	    if( c == BRL1_FLAG_CH ) {
+		/* flag signifies the end of a packet */
+
+		unsigned short crc;	/* holds the crc as we calculate it */
+		int i;			/* index variable */
+		brl1_sch_t *subch;      /* subchannel for received packet */
+		int sch_pl;		/* cookie for subchannel lock */
+		brl1_notif_t callup;	/* "data ready" callup */
+
+		/* whatever else may happen, we've seen a flag and we're
+		 * starting a new packet
+		 */
+		STATE_SET( sc, BRL1_FLAG );
+		SEQSTAMP_INCR(sc); /* bump the packet sequence counter */
+		
+		/* if the packet body has less than 2 characters,
+		 * it can't be a well-formed packet.  Discard it.
+		 */
+		if( cq_tent_len( q ) < /* 2 + possible length byte */
+		    (2 + (IS_TTY_PKT(sc) ? 0 : 1)) )
+		{
+		    result = BRL1_PROTOCOL;
+		    cq_discard_tent( q );
+		    STATE_SET( sc, BRL1_FLAG );
+		    done = 1;
+		    continue;
+		}
+		
+		/* check CRC */
+
+		/* accumulate CRC, starting with the header byte and
+		 * ending with the transmitted CRC.  This should
+		 * result in a known good value.
+		 */
+		crc = crc16_calc( INIT_CRC, LAST_HDR_GET(sc) );
+		for( i = (q->ipos + (IS_TTY_PKT(sc) ? 0 : 2)) % BRL1_QSIZE;
+		     i != q->tent_next;
+		     i = (i + 1) % BRL1_QSIZE )
+		{
+		    crc = crc16_calc( crc, q->buf[i] );
+		}
+
+		/* verify the caclulated crc against the "good" crc value;
+		 * if we fail, discard the bad packet and return an error.
+		 */
+		if( crc != (unsigned short)GOOD_CRC ) {
+		    result = BRL1_CRC;
+		    cq_discard_tent( q );
+		    STATE_SET( sc, BRL1_FLAG );
+		    done = 1;
+		    continue;
+		}
+		
+		/* so the crc check was ok.  Now we discard the CRC
+		 * from the end of the received bytes.
+		 */
+		q->tent_next += (BRL1_QSIZE - 2);
+		q->tent_next %= BRL1_QSIZE;
+
+		/* get the subchannel and lock it */
+		subch = &(sc->subch[SUBCH( LAST_HDR_GET(sc) )]);
+		SUBCH_DATA_LOCK( subch, sch_pl );
+		
+		/* if this isn't a console packet, we need to record
+		 * a length byte
+		 */
+		if( !IS_TTY_PKT(sc) ) {
+		    q->buf[q->ipos] = cq_tent_len( q ) - 1;
+		}
+		
+		/* record packet for posterity */
+		cq_commit_tent( q );
+		result = BRL1_VALID;
+
+		/* notify subchannel owner that there's something
+		 * on the queue for them
+		 */
+		atomicAddInt( &(subch->packet_arrived), 1);
+		callup = subch->rx_notify;
+		SUBCH_DATA_UNLOCK( subch, sch_pl );
+
+		if( callup ) {
+		    L1_CONS_HW_UNLOCK( sc );
+		    L1SC_RECV_UNLOCK( sc, pl );
+		    (*callup)( sc, SUBCH(LAST_HDR_GET(sc)) );
+		    L1SC_RECV_LOCK( sc, pl );
+		    L1_CONS_HW_LOCK( sc );
+		}
+		continue;	/* go back for more! */
+	    }
+	    
+	    /* none of the special cases applied; we've got a normal
+	     * body character
+	     */
+	    cq_tent_add( q, c );
+
+	    break;
+
+	  case BRL1_ESC:
+	    /* saw an escape character.  The next character will need
+	     * to be unescaped.
+	     */
+
+	    q = sc->subch[ SUBCH( LAST_HDR_GET(sc) ) ].iqp;
+	    ASSERT(q);
+
+	    /* if the queue we want to write into is full, don't read from
+	     * the uart (this provides backpressure to the L1 side)
+	     */
+	    if( cq_tent_full( q ) ) {
+		result = BRL1_FULL_Q;
+		done = 1;
+		continue;
+	    }
+	    
+	    if( !read_uart( sc, &c, &result ) ) {
+
+		/* error reading uart */
+		if( c != UART_NO_CHAR ) {
+		    cq_discard_tent( q );
+		    STATE_SET( sc, BRL1_IDLE );
+		}
+		done = 1;
+		continue;
+	    }
+	    
+	    if( c == BRL1_FLAG_CH ) {
+		/* flag after escape is an error */
+		STATE_SET( sc, BRL1_FLAG );
+		cq_discard_tent( q );
+		result = BRL1_PROTOCOL;
+		done = 1;
+		continue;
+	    }
+
+	    if( c == BRL1_ESC_CH ) {
+		/* two consecutive escapes is an error */
+		STATE_SET( sc, BRL1_IDLE );
+		cq_discard_tent( q );
+		result = BRL1_PROTOCOL;
+		done = 1;
+		continue;
+	    }
+	    
+	    /* otherwise, we've got a character that needs
+	     * to be unescaped
+	     */
+	    cq_tent_add( q, (c ^ BRL1_XOR_CH) );
+	    STATE_SET( sc, BRL1_BODY );
+
+	    break;
+
+	} /* end of switch( STATE_GET(sc) ) */
+    } /* end of while(!done) */
+    
+    L1_CONS_HW_UNLOCK( sc );
+    L1SC_RECV_UNLOCK(sc, pl);
+
+    return result;
+}	    
+
+
+/* brl1_init initializes the Bedrock/L1 protocol layer.  This includes
+ * zeroing out the send and receive state information.
+ */
+
+void
+brl1_init( l1sc_t *sc, nasid_t nasid, net_vec_t uart )
+{
+    int i;
+    brl1_sch_t *subch;
+
+    bzero( sc, sizeof( *sc ) );
+    sc->nasid = nasid;
+    sc->uart = uart;
+    sc->getc_f = (uart == BRL1_LOCALUART ? uart_getc : rtr_uart_getc);
+    sc->putc_f = (uart == BRL1_LOCALUART ? uart_putc : rtr_uart_putc);
+    sc->sol = 1;
+    subch = sc->subch;
+
+    /* initialize L1 subchannels
+     */
+
+    /* assign processor TTY channels */
+    for( i = 0; i < CPUS_PER_NODE; i++, subch++ ) {
+	subch->use = BRL1_SUBCH_RSVD;
+	subch->packet_arrived = 0;
+	spinlock_init( &(subch->data_lock), NULL );
+	sv_init( &(subch->arrive_sv), SV_FIFO, NULL );
+	subch->tx_notify = NULL;
+	/* (for now, drop elscuart packets in the kernel) */
+	subch->rx_notify = brl1_discard_packet;
+	subch->iqp = &sc->garbage_q;
+    }
+
+    /* assign system TTY channel (first free subchannel after each
+     * processor's individual TTY channel has been assigned)
+     */
+    subch->use = BRL1_SUBCH_RSVD;
+    subch->packet_arrived = 0;
+    spinlock_init( &(subch->data_lock), NULL );
+    sv_init( &(subch->arrive_sv), SV_FIFO, NULL );
+    subch->tx_notify = NULL;
+    if( sc->uart == BRL1_LOCALUART ) {
+	subch->iqp = kmem_zalloc_node( sizeof(sc_cq_t), KM_NOSLEEP,
+				       NASID_TO_COMPACT_NODEID(nasid) );
+	ASSERT( subch->iqp );
+	cq_init( subch->iqp );
+	subch->rx_notify = NULL;
+    }
+    else {
+	/* we shouldn't be getting console input from remote UARTs */
+	subch->iqp = &sc->garbage_q;
+	subch->rx_notify = brl1_discard_packet;
+    }
+    subch++; i++;
+
+    /* "reserved" subchannels (0x05-0x0F); for now, throw away
+     * incoming packets
+     */
+    for( ; i < 0x10; i++, subch++ ) {
+	subch->use = BRL1_SUBCH_FREE;
+	subch->packet_arrived = 0;
+	subch->tx_notify = NULL;
+	subch->rx_notify = brl1_discard_packet;
+	subch->iqp = &sc->garbage_q;
+    }
+
+    /* remaining subchannels are free */
+    for( ; i < BRL1_NUM_SUBCHANS; i++, subch++ ) {
+	subch->use = BRL1_SUBCH_FREE;
+	subch->packet_arrived = 0;
+	subch->tx_notify = NULL;
+	subch->rx_notify = brl1_discard_packet;
+	subch->iqp = &sc->garbage_q;
+    }
+
+    /* initialize synchronization structures
+     */
+    spinlock_init( &(sc->send_lock), NULL );
+    spinlock_init( &(sc->recv_lock), NULL );
+    spinlock_init( &(sc->subch_lock), NULL );
+
+    if( sc->uart == BRL1_LOCALUART ) {
+	uart_init( sc, UART_BAUD_RATE );
+    }
+    else {
+	rtr_uart_init( sc, UART_BAUD_RATE );
+    }
+
+    /* Set up remaining fields using L1 command functions-- elsc_module_get
+     * to read the module id, elsc_debug_get to see whether or not we're
+     * in verbose mode.
+     */
+    {
+	extern int elsc_module_get(l1sc_t *);
+
+	sc->modid = elsc_module_get( sc );
+	sc->modid = 
+	    (sc->modid < 0 ? INVALID_MODULE : sc->modid);
+
+	sc->verbose = 1;
+    }
+}
+
+
+/*********************************************************************
+ * These are interrupt-related functions used in the kernel to service
+ * the L1.
+ */
+
+/*
+ * brl1_intrd is the function which is called in a loop by the
+ * xthread that services L1 interrupts.
+ */
+#ifdef IRIX
+void
+brl1_intrd( struct eframe_s *ep )
+{
+    u_char isr_reg;
+    l1sc_t *sc = get_elsc();
+
+    isr_reg = READ_L1_UART_REG(sc->nasid, REG_ISR);
+
+    while( isr_reg & (ISR_RxRDY | ISR_TxRDY) ) {
+
+	if( isr_reg & ISR_RxRDY ) {
+	    brl1_receive(sc);
+	}
+	if( (isr_reg & ISR_TxRDY) || 
+	    (sc->send_in_use && UART_PUTC_READY(sc->nasid)) ) 
+	{
+	    brl1_send_cont(sc);
+	}
+	isr_reg = READ_L1_UART_REG(sc->nasid, REG_ISR);
+    }
+
+    /* uart interrupts were blocked at bedrock when the the interrupt
+     * was initially answered; reenable them now
+     */
+    intr_unblock_bit( sc->intr_cpu, UART_INTR );
+    ep = ep; /* placate the compiler */
+}
+#endif
+
+
+
+/* brl1_intr is called directly from the uart interrupt; after it runs, the
+ * interrupt "daemon" xthread is signalled to continue.
+ */
+#ifdef IRIX
+void
+brl1_intr( struct eframe_s *ep )
+{
+    /* Disable the UART interrupt, giving the xthread time to respond.
+     * When the daemon (xthread) finishes doing its thing, it will
+     * unblock the interrupt.
+     */
+    intr_block_bit( get_elsc()->intr_cpu, UART_INTR );
+    ep = ep; /* placate the compiler */
+}
+
+
+/* set up uart interrupt handling for this node's uart
+ */
+void
+brl1_connect_intr( l1sc_t *sc )
+{
+    cpuid_t last_cpu;
+
+    sc->intr_cpu = nodepda->node_first_cpu;
+
+    if( intr_connect_level(sc->intr_cpu, UART_INTR, INTPEND0_MAXMASK,
+			   (intr_func_t)brl1_intrd, 0, 
+			   (intr_func_t)brl1_intr) )
+	cmn_err(CE_PANIC, "brl1_connect_intr: Can't connect UART interrupt.");
+
+    uart_enable_recv_intr( sc );
+}
+#endif	/* IRIX */
+
+#ifdef SABLE
+/* this function is called periodically to generate fake interrupts
+ * and allow brl1_intrd to send/receive characters
+ */
+void
+hubuart_service( void )
+{
+    l1sc_t *sc = get_elsc();
+    /* note that we'll lose error state by reading the lsr_reg.
+     * This is probably ok in the frictionless domain of sable.
+     */
+    int lsr_reg;
+    nasid_t nasid = sc->nasid;
+    lsr_reg = READ_L1_UART_REG( nasid, REG_LSR );
+    if( lsr_reg & (LSR_RCA | LSR_XSRE) ) {
+        REMOTE_HUB_PI_SEND_INTR(0, 0, UART_INTR);
+    }
+}
+#endif /* SABLE */
+
+
+/*********************************************************************
+ * The following function allows the kernel to "go around" the
+ * uninitialized l1sc structure to allow console output during
+ * early system startup.
+ */
+
+/* These are functions to use from serial_in/out when in protocol
+ * mode to send and receive uart control regs.
+ */
+void
+brl1_send_control(int offset, int value)
+{
+	nasid_t nasid = get_nasid();
+	WRITE_L1_UART_REG(nasid, offset, value); 
+}
+
+int
+brl1_get_control(int offset)
+{
+	nasid_t nasid = get_nasid();
+	return(READ_L1_UART_REG(nasid, offset)); 
+}
+
+#define PUTCHAR(ch) \
+    { \
+        while( !(READ_L1_UART_REG( nasid, REG_LSR ) & LSR_XHRE) );  \
+        WRITE_L1_UART_REG( nasid, REG_DAT, (ch) ); \
+    }
+
+int
+brl1_send_console_packet( char *str, int len )
+{
+    int sent = len;
+    char crc_char;
+    unsigned short crc = INIT_CRC;
+    nasid_t nasid = get_nasid();
+
+    PUTCHAR( BRL1_FLAG_CH );
+    PUTCHAR( BRL1_EVENT | SC_CONS_SYSTEM );
+    crc = crc16_calc( crc, (BRL1_EVENT | SC_CONS_SYSTEM) );
+
+    while( len ) {
+
+	if( (*str == BRL1_FLAG_CH) || (*str == BRL1_ESC_CH) ) {
+	    PUTCHAR( BRL1_ESC_CH );
+	    PUTCHAR( (*str) ^ BRL1_XOR_CH );
+	}
+	else {
+	    PUTCHAR( *str );
+	}
+	
+	crc = crc16_calc( crc, *str );
+
+	str++; len--;
+    }
+    
+    crc ^= 0xffff;
+    crc_char = crc & 0xff;
+    if( (crc_char == BRL1_ESC_CH) || (crc_char == BRL1_FLAG_CH) ) {
+	crc_char ^= BRL1_XOR_CH;
+	PUTCHAR( BRL1_ESC_CH );
+    }
+    PUTCHAR( crc_char );
+    crc_char = (crc >> 8) & 0xff;
+    if( (crc_char == BRL1_ESC_CH) || (crc_char == BRL1_FLAG_CH) ) {
+	crc_char ^= BRL1_XOR_CH;
+	PUTCHAR( BRL1_ESC_CH );
+    }
+    PUTCHAR( crc_char );
+    PUTCHAR( BRL1_FLAG_CH );
+
+    return sent - len;
+}
+
+
+/*********************************************************************
+ * l1_cons functions
+ *
+ * These allow the L1 to act as the system console.  They're intended
+ * to abstract away most of the br/l1 internal details from the
+ * _L1_cons_* functions (in the prom-- see "l1_console.c") and
+ * l1_* functions (in the kernel-- see "sio_l1.c") that they support.
+ *
+ */
+
+int
+l1_cons_poll( l1sc_t *sc )
+{
+    /* in case this gets called before the l1sc_t structure for the module_t
+     * struct for this node is initialized (i.e., if we're called with a
+     * zero l1sc_t pointer)...
+     */
+    if( !sc ) {
+	return 0;
+    }
+
+    if( sc->subch[SC_CONS_SYSTEM].packet_arrived ) {
+	return 1;
+    }
+
+    brl1_receive( sc );
+
+    if( sc->subch[SC_CONS_SYSTEM].packet_arrived ) {
+	return 1;
+    }
+    return 0;
+}
+
+
+/* pull a character off of the system console queue (if one is available)
+ */
+int
+l1_cons_getc( l1sc_t *sc )
+{
+    int c;
+#ifdef SPINLOCKS_WORK
+    int pl;
+#endif
+    brl1_sch_t *subch = &(sc->subch[SC_CONS_SYSTEM]);
+    sc_cq_t *q = subch->iqp;
+
+    if( !l1_cons_poll( sc ) ) {
+	return 0;
+    }
+
+    SUBCH_DATA_LOCK( subch, pl );
+    if( cq_empty( q ) ) {
+	subch->packet_arrived = 0;
+	SUBCH_DATA_UNLOCK( subch, pl );
+	return 0;
+    }
+    cq_rem( q, c );
+    if( cq_empty( q ) )
+	subch->packet_arrived = 0;
+    SUBCH_DATA_UNLOCK( subch, pl );
+
+    return c;
+}
+
+
+/* initialize the system console subchannel
+ */
+void
+l1_cons_init( l1sc_t *sc )
+{
+#ifdef SPINLOCKS_WORK
+    int pl;
+#endif
+    brl1_sch_t *subch = &(sc->subch[SC_CONS_SYSTEM]);
+
+    SUBCH_DATA_LOCK( subch, pl );
+    subch->packet_arrived = 0;
+    cq_init( subch->iqp );
+    SUBCH_DATA_UNLOCK( subch, pl );
+}
+
+
+/*
+ * Write a message to the L1 on the system console subchannel.
+ *
+ * Danger: don't use a non-zero value for the wait parameter unless you're
+ * someone important (like a kernel error message).
+ */
+int
+l1_cons_write( l1sc_t *sc, char *msg, int len, int wait )
+{
+    return( brl1_send( sc, msg, len, (SC_CONS_SYSTEM | BRL1_EVENT), wait ) );
+}
+
+
+/* 
+ * Read as many characters from the system console receive queue as are
+ * available there (up to avail bytes).
+ */
+int
+l1_cons_read( l1sc_t *sc, char *buf, int avail )
+{
+    int pl;
+    int before_wrap, after_wrap;
+    brl1_sch_t *subch = &(sc->subch[SC_CONS_SYSTEM]);
+    sc_cq_t *q = subch->iqp;
+
+    if( !(subch->packet_arrived) )
+	return 0;
+
+    SUBCH_DATA_LOCK( subch, pl );
+    if( q->opos > q->ipos ) {
+	before_wrap = BRL1_QSIZE - q->opos;
+	if( before_wrap >= avail ) {
+	    before_wrap = avail;
+	    after_wrap = 0;
+	}
+	else {
+	    avail -= before_wrap;
+	    after_wrap = q->ipos;
+	    if( after_wrap > avail )
+		after_wrap = avail;
+	}
+    }
+    else {
+	before_wrap = q->ipos - q->opos;
+	if( before_wrap > avail )
+	    before_wrap = avail;
+	after_wrap = 0;
+    }
+
+
+    BCOPY( q->buf + q->opos, buf, before_wrap  );
+    if( after_wrap )
+        BCOPY( q->buf, buf + before_wrap, after_wrap  );
+    q->opos = ((q->opos + before_wrap + after_wrap) % BRL1_QSIZE);
+
+    subch->packet_arrived = 0;
+    SUBCH_DATA_UNLOCK( subch, pl );
+
+    return( before_wrap + after_wrap );
+}
+	
+
+/*
+ * Install a callback function for the system console subchannel 
+ * to allow an upper layer to be notified when the send buffer 
+ * has been emptied.
+ */
+void
+l1_cons_tx_notif( l1sc_t *sc, brl1_notif_t func )
+{
+    subch_set_tx_notify( sc, SC_CONS_SYSTEM, func );
+}
+
+
+/*
+ * Install a callback function for the system console subchannel
+ * to allow an upper layer to be notified when a packet has been
+ * received.
+ */
+void
+l1_cons_rx_notif( l1sc_t *sc, brl1_notif_t func )
+{
+    subch_set_rx_notify( sc, SC_CONS_SYSTEM, func );
+}
+
+
+
+
+/*********************************************************************
+ * The following functions and definitions implement the "message"-
+ * style interface to the L1 system controller.
+ *
+ * Note that throughout this file, "sc" generally stands for "system
+ * controller", while "subchannels" tend to be represented by
+ * variables with names like subch or ch.
+ *
+ */
+
+#ifdef L1_DEBUG
+#define L1_DBG_PRF(x) printf x
+#else
+#define L1_DBG_PRF(x)
+#endif
+
+/* sc_data_ready is called to signal threads that are blocked on 
+ * l1 input.
+ */
+void
+sc_data_ready( l1sc_t *sc, int ch )
+{
+    brl1_sch_t *subch = &(sc->subch[ch]);
+    sv_signal( &(subch->arrive_sv) );
+}
+
+/* sc_open reserves a subchannel to send a request to the L1 (the
+ * L1's response will arrive on the same channel).  The number
+ * returned by sc_open is the system controller subchannel
+ * acquired.
+ */
+int
+sc_open( l1sc_t *sc, uint target )
+{
+    /* The kernel version implements a locking scheme to arbitrate
+     * subchannel assignment.
+     */
+    int ch;
+    int pl;
+    brl1_sch_t *subch;
+
+    SUBCH_LOCK( sc, pl );
+
+    /* Look for a free subchannel. Subchannels 0-15 are reserved
+     * for other purposes.
+     */
+    for( subch = &(sc->subch[BRL1_CMD_SUBCH]), ch = BRL1_CMD_SUBCH; 
+			ch < BRL1_NUM_SUBCHANS; subch++, ch++ ) {
+        if( subch->use == BRL1_SUBCH_FREE )
+            break;
+    }
+
+    if( ch == BRL1_NUM_SUBCHANS ) {
+        /* there were no subchannels available! */
+        SUBCH_UNLOCK( sc, pl );
+        return SC_NSUBCH;
+    }
+
+    subch->use = BRL1_SUBCH_RSVD;
+    SUBCH_UNLOCK( sc, pl );
+
+    subch->packet_arrived = 0;
+    subch->target = target;
+    sv_init( &(subch->arrive_sv), SV_FIFO, NULL );
+    spinlock_init( &(subch->data_lock), NULL );
+    subch->tx_notify = NULL;
+    subch->rx_notify = sc_data_ready;
+    subch->iqp = kmem_zalloc_node( sizeof(sc_cq_t), KM_NOSLEEP,
+				   NASID_TO_COMPACT_NODEID(sc->nasid) );
+    ASSERT( subch->iqp );
+    cq_init( subch->iqp );
+
+    return ch;
+}
+
+
+/* sc_close frees a Bedrock<->L1 subchannel.
+ */
+int
+sc_close( l1sc_t *sc, int ch )
+{
+    brl1_sch_t *subch;
+    int pl;
+
+    SUBCH_LOCK( sc, pl );
+    subch = &(sc->subch[ch]);
+    if( subch->use != BRL1_SUBCH_RSVD ) {
+        /* we're trying to close a subchannel that's not open */
+        return SC_NOPEN;
+    }
+
+    subch->packet_arrived = 0;
+    subch->use = BRL1_SUBCH_FREE;
+
+    sv_broadcast( &(subch->arrive_sv) );
+    sv_destroy( &(subch->arrive_sv) );
+    spinlock_destroy( &(subch->data_lock) );
+
+    ASSERT( subch->iqp && (subch->iqp != &sc->garbage_q) );
+    kmem_free( subch->iqp, sizeof(sc_cq_t) );
+    subch->iqp = &sc->garbage_q;
+
+    SUBCH_UNLOCK( sc, pl );
+
+    return SC_SUCCESS;
+}
+
+
+/* sc_construct_msg builds a bedrock-to-L1 request in the supplied
+ * buffer.  Returns the length of the message.  The
+ * safest course when passing a buffer to be filled in is to use
+ * BRL1_QSIZE as the buffer size.
+ *
+ * Command arguments are passed as type/argument pairs, i.e., to
+ * pass the number 5 as an argument to an L1 command, call
+ * sc_construct_msg as follows:
+ *
+ *    char msg[BRL1_QSIZE];
+ *    msg_len = sc_construct_msg( msg,
+ *				  BRL1_QSIZE,
+ *				  target_component,
+ *                                L1_ADDR_TASK_BOGUSTASK,
+ *                                L1_BOGUSTASK_REQ_BOGUSREQ,
+ *                                2,
+ *                                L1_ARG_INT, 5 );
+ *
+ * To pass an additional ASCII argument, you'd do the following:
+ *
+ *    char *str;
+ *    ... str points to a null-terminated ascii string ...
+ *    msg_len = sc_construct_msg( msg,
+ *                                BRL1_QSIZE,
+ *				  target_component,
+ *                                L1_ADDR_TASK_BOGUSTASK,
+ *                                L1_BOGUSTASK_REQ_BOGUSREQ,
+ *                                4,
+ *                                L1_ARG_INT, 5,
+ *                                L1_ARG_ASCII, str );
+ *
+ * Finally, arbitrary data of unknown type is passed using the argtype
+ * code L1_ARG_UNKNOWN, a data length, and a buffer pointer, e.g.
+ *
+ *    msg_len = sc_construct_msg( msg,
+ *                                BRL1_QSIZE,
+ *				  target_component,
+ *                                L1_ADDR_TASK_BOGUSTASK,
+ *                                L1_BOGUSTASK_REQ_BOGUSREQ,
+ *                                3,
+ *                                L1_ARG_UNKNOWN, 32, bufptr );
+ *
+ * ...passes 32 bytes of data starting at bufptr.  Note that no string or
+ * "unknown"-type argument should be long enough to overflow the message
+ * buffer.
+ *
+ * To construct a message for an L1 command that requires no arguments,
+ * you'd use the following:
+ *
+ *    msg_len = sc_construct_msg( msg,
+ *                                BRL1_QSIZE,
+ *				  target_component,
+ *                                L1_ADDR_TASK_BOGUSTASK,
+ *                                L1_BOGUSTASK_REQ_BOGUSREQ,
+ *                                0 );
+ *
+ * The final 0 means "no varargs".  Notice that this parameter is used to hold
+ * the number of additional arguments to sc_construct_msg, _not_ the actual
+ * number of arguments used by the L1 command (so 2 per L1_ARG_[INT,ASCII]
+ * type argument, and 3 per L1_ARG_UNKOWN type argument).  A call to construct
+ * an L1 command which required three integer arguments and two arguments of
+ * some arbitrary (unknown) type would pass 12 as the value for this parameter.
+ *
+ * ENDIANNESS WARNING: The following code does a lot of copying back-and-forth
+ * between byte arrays and four-byte big-endian integers.  Depending on the
+ * system controller connection and endianness of future architectures, some
+ * rewriting might be necessary.
+ */
+int
+sc_construct_msg( l1sc_t  *sc,		/* system controller struct */
+		  int	   ch,           /* subchannel for this message */
+		  char    *msg,          /* message buffer */
+		  int      msg_len,      /* size of message buffer */
+                  l1addr_t addr_task,    /* target system controller task */
+                  short    req_code,     /* 16-bit request code */
+                  int      req_nargs,    /* # of arguments (varargs) passed */
+                  ... )                 /* any additional parameters */
+{
+    uint32_t buf32;   /* 32-bit buffer used to bounce things around */
+    void *bufptr;       /* used to hold command argument addresses */
+    va_list al;         /* variable argument list */
+    int index;          /* current index into msg buffer */
+    int argno;          /* current position in varargs list */
+    int l1_argno;       /* running total of arguments to l1 */
+    int l1_arg_t;       /* argument type/length */
+    int l1_argno_byte;  /* offset of argument count byte */
+
+    index = argno = 0;
+
+    /* set up destination address */
+    if( (msg_len -= sizeof( buf32 )) < 0 )
+	return -1;
+    L1_ADDRESS_TO_TASK( &buf32, sc->subch[ch].target, addr_task );
+    COPY_INT_TO_BUFFER(msg, index, buf32);
+
+    /* copy request code */
+    if( (msg_len -= 2) < 0 )
+	return( -1 );
+    msg[index++] = ((req_code >> 8) & 0xff);
+    msg[index++] = (req_code & 0xff);
+
+    if( !req_nargs ) {
+        return index;
+    }
+
+    /* reserve a byte for the argument count */
+    if( (msg_len -= 1) < 0 )
+	return( -1 );
+    l1_argno_byte = index++;
+    l1_argno = 0;
+
+    /* copy additional arguments */
+    va_start( al, req_nargs );
+    while( argno < req_nargs ) {
+        l1_argno++;
+        l1_arg_t = va_arg( al, int ); argno++;
+        switch( l1_arg_t )
+        {
+          case L1_ARG_INT:
+	    if( (msg_len -= (sizeof( buf32 ) + 1)) < 0 )
+		return( -1 );
+            msg[index++] = L1_ARG_INT;
+            buf32 = (unsigned)va_arg( al, int ); argno++;
+	    COPY_INT_TO_BUFFER(msg, index, buf32);
+            break;
+
+          case L1_ARG_ASCII:
+            bufptr = va_arg( al, char* ); argno++;
+	    if( (msg_len -= (strlen( bufptr ) + 2)) < 0 )
+		return( -1 );
+            msg[index++] = L1_ARG_ASCII;
+            strcpy( (char *)&(msg[index]), (char *)bufptr );
+            index += (strlen( bufptr ) + 1); /* include terminating null */
+            break;
+
+	  case L1_ARG_UNKNOWN:
+              {
+                  int arglen;
+		  
+                  arglen = va_arg( al, int ); argno++;
+                  bufptr = va_arg( al, void* ); argno++;
+		  if( (msg_len -= (arglen + 1)) < 0 )
+		      return( -1 );
+                  msg[index++] = L1_ARG_UNKNOWN | arglen;
+                  BCOPY( bufptr, &(msg[index]), arglen  );
+                  index += arglen;
+		  break;
+              }
+	  
+	  default: /* unhandled argument type */
+	    return -1;
+        }
+    }
+
+    va_end( al );
+    msg[l1_argno_byte] = l1_argno;
+
+    return index;
+}
+
+
+
+/* sc_interpret_resp verifies an L1 response to a bedrock request, and
+ * breaks the response data up into the constituent parts.  If the
+ * response message indicates error, or if a mismatch is found in the
+ * expected number and type of arguments, an error is returned.  The
+ * arguments to this function work very much like the arguments to
+ * sc_construct_msg, above, except that L1_ARG_INTs must be followed
+ * by a _pointer_ to an integer that can be filled in by this function.
+ */
+int
+sc_interpret_resp( char *resp,          /* buffer received from L1 */
+                   int   resp_nargs,    /* number of _varargs_ passed in */
+                   ... )
+{
+    uint32_t buf32;   /* 32-bit buffer used to bounce things around */
+    void *bufptr;       /* used to hold response field addresses */
+    va_list al;         /* variable argument list */
+    int index;          /* current index into response buffer */
+    int argno;          /* current position in varargs list */
+    int l1_fldno;       /* number of resp fields received from l1 */
+    int l1_fld_t;       /* field type/length */
+
+    index = argno = 0;
+
+#if defined(L1_DEBUG)
+#define DUMP_RESP							  \
+    {									  \
+	int ix;								  \
+        char outbuf[512];						  \
+        sprintf( outbuf, "sc_interpret_resp error line %d: ", __LINE__ ); \
+	for( ix = 0; ix < 16; ix++ ) {					  \
+	    sprintf( &outbuf[strlen(outbuf)], "%x ", resp[ix] );	  \
+	}								  \
+	printk( "%s\n", outbuf );					  \
+    }
+#else
+#define DUMP_RESP
+#endif /* L1_DEBUG */
+
+    /* check response code */
+    COPY_BUFFER_TO_INT(resp, index, buf32);
+    if( buf32 != L1_RESP_OK ) {
+	DUMP_RESP;
+        return buf32;
+    }
+
+    /* get number of response fields */
+    l1_fldno = resp[index++];
+
+    va_start( al, resp_nargs );
+
+    /* copy out response fields */
+    while( argno < resp_nargs ) {
+        l1_fldno--;
+        l1_fld_t = va_arg( al, int ); argno++;
+        switch( l1_fld_t )
+        {
+          case L1_ARG_INT:
+            if( resp[index++] != L1_ARG_INT ) {
+                /* type mismatch */
+		va_end( al );
+		DUMP_RESP;
+		return -1;
+            }
+            bufptr = va_arg( al, int* ); argno++;
+	    COPY_BUFFER_TO_BUFFER(resp, index, bufptr);
+            break;
+
+          case L1_ARG_ASCII:
+            if( resp[index++] != L1_ARG_ASCII ) {
+                /* type mismatch */
+		va_end( al );
+		DUMP_RESP;
+                return -1;
+            }
+            bufptr = va_arg( al, char* ); argno++;
+            strcpy( (char *)bufptr, (char *)&(resp[index]) );
+            /* include terminating null */
+            index += (strlen( &(resp[index]) ) + 1);
+            break;
+
+          default:
+	    if( (l1_fld_t & L1_ARG_UNKNOWN) == L1_ARG_UNKNOWN )
+	    {
+		int *arglen;
+		
+		arglen = va_arg( al, int* ); argno++;
+		bufptr = va_arg( al, void* ); argno++;
+		*arglen = ((resp[index++] & ~L1_ARG_UNKNOWN) & 0xff);
+		BCOPY( &(resp[index]), bufptr, *arglen  );
+		index += (*arglen);
+	    }
+	    
+	    else {
+		/* unhandled type */
+		va_end( al );
+		DUMP_RESP;
+		return -1;
+	    }
+        }
+    }
+    va_end( al );
+  
+    if( (l1_fldno != 0) || (argno != resp_nargs) ) {
+        /* wrong number of arguments */
+	DUMP_RESP;
+        return -1;
+    }
+    return 0;
+}
+
+
+
+
+/* sc_send takes as arguments a system controller struct, a
+ * buffer which contains a Bedrock<->L1 "request" message,
+ * the message length, and the subchannel (presumably obtained
+ * from an earlier invocation of sc_open) over which the
+ * message is to be sent.  The final argument ("wait") indicates
+ * whether the send is to be performed synchronously or not.
+ *
+ * sc_send returns either zero or an error value.  Synchronous sends 
+ * (wait != 0) will not return until the data has actually been sent
+ * to the UART.  Synchronous sends generally receive privileged
+ * treatment.  The intent is that they be used sparingly, for such
+ * purposes as kernel printf's (the "ducons" routines).  Run-of-the-mill
+ * console output and L1 requests should NOT use a non-zero value
+ * for wait.
+ */
+int
+sc_send( l1sc_t *sc, int ch, char *msg, int len, int wait )
+{
+    char type_and_subch;
+    int result;
+
+    if( (ch < 0) || ( ch >= BRL1_NUM_SUBCHANS) ) {
+        return SC_BADSUBCH;
+    }
+
+    /* Verify that this is an open subchannel
+     */
+    if( sc->subch[ch].use == BRL1_SUBCH_FREE )
+    {
+        return SC_NOPEN;
+    }
+       
+    type_and_subch = (BRL1_REQUEST | ((u_char)ch));
+    result = brl1_send( sc, msg, len, type_and_subch, wait );
+
+    /* If we sent as much as we asked to, return "ok". */
+    if( result == len )
+	return( SC_SUCCESS );
+
+    /* Or, if we sent less, than either the UART is busy or
+     * we're trying to send too large a packet anyway.
+     */
+    else if( result >= 0 && result < len )
+	return( SC_BUSY );
+
+    /* Or, if something else went wrong (result < 0), then
+     * return that error value.
+     */
+    else
+	return( result );
+}
+
+
+
+/* subch_pull_msg pulls a message off the receive queue for subch
+ * and places it the buffer pointed to by msg.  This routine should only
+ * be called when the caller already knows a message is available on the
+ * receive queue (and, in the kernel, only when the subchannel data lock
+ * is held by the caller).
+ */
+static void
+subch_pull_msg( brl1_sch_t *subch, char *msg, int *len )
+{
+    sc_cq_t *q;         /* receive queue */
+    int before_wrap,    /* packet may be split into two different       */
+        after_wrap;     /*   pieces to acommodate queue wraparound      */
+
+    /* pull message off the receive queue */
+    q = subch->iqp;
+
+    cq_rem( q, *len );   /* remove length byte and store */
+    cq_discard( q );     /* remove type/subch byte and discard */
+
+    if ( *len > 0 )
+	(*len)--;        /* don't count type/subch byte in length returned */
+
+    if( (q->opos + (*len)) > BRL1_QSIZE ) {
+        before_wrap = BRL1_QSIZE - q->opos;
+        after_wrap = (*len) - before_wrap;
+    }
+    else {
+        before_wrap = (*len);
+        after_wrap = 0;
+    }
+
+    BCOPY( q->buf + q->opos, msg, before_wrap  );
+    if( after_wrap ) {
+        BCOPY( q->buf, msg + before_wrap, after_wrap  );
+	q->opos = after_wrap;
+    }
+    else {
+	q->opos = ((q->opos + before_wrap) & (BRL1_QSIZE - 1));
+    }
+    atomicAddInt( &(subch->packet_arrived), -1 );
+}
+
+
+/* sc_recv_poll can be called as a blocking or non-blocking function;
+ * it attempts to pull a message off of the subchannel specified
+ * in the argument list (ch).
+ *
+ * The "block" argument, if non-zero, is interpreted as a timeout
+ * delay (to avoid permanent waiting).
+ */
+
+int
+sc_recv_poll( l1sc_t *sc, int ch, char *msg, int *len, uint64_t block )
+{
+    int pl;             /* lock cookie */
+    int is_msg = 0;
+    brl1_sch_t *subch = &(sc->subch[ch]);
+
+    rtc_time_t exp_time = rtc_time() + block;
+
+    /* sanity check-- make sure this is an open subchannel */
+    if( subch->use == BRL1_SUBCH_FREE )
+	return( SC_NOPEN );
+
+    do {
+
+        /* kick the next lower layer and see if it pulls anything in
+         */
+	brl1_receive( sc );
+	is_msg = subch->packet_arrived;
+
+    } while( block && !is_msg && (rtc_time() < exp_time) );
+
+    if( !is_msg ) {
+	/* no message and we didn't care to wait for one */
+	return( SC_NMSG );
+    }
+
+    SUBCH_DATA_LOCK( subch, pl );
+    subch_pull_msg( subch, msg, len );
+    SUBCH_DATA_UNLOCK( subch, pl );
+
+    return( SC_SUCCESS );
+}
+    
+
+/* Like sc_recv_poll, sc_recv_intr can be called in either a blocking
+ * or non-blocking mode.  Rather than polling until an appointed timeout,
+ * however, sc_recv_intr sleeps on a syncrhonization variable until a
+ * signal from the lower layer tells us that a packet has arrived.
+ *
+ * sc_recv_intr can't be used with remote (router) L1s.
+ */
+int
+sc_recv_intr( l1sc_t *sc, int ch, char *msg, int *len, uint64_t block )
+{
+    int pl;             /* lock cookie */
+    int is_msg = 0;
+    brl1_sch_t *subch = &(sc->subch[ch]);
+
+    do {
+	SUBCH_DATA_LOCK(subch, pl);
+	is_msg = subch->packet_arrived;
+	if( !is_msg && block ) {
+	    /* wake me when you've got something */
+	    subch->rx_notify = sc_data_ready;
+	    sv_wait( &(subch->arrive_sv), 0, &(subch->data_lock), pl );
+	    if( subch->use == BRL1_SUBCH_FREE ) {
+		/* oops-- somebody closed our subchannel while we were
+		 * sleeping!
+		 */
+
+		/* no need to unlock since the channel's closed anyhow */
+		return( SC_NOPEN );
+	    }
+	}
+    } while( !is_msg && block );
+
+    if( !is_msg ) {
+	/* no message and we didn't care to wait for one */
+	SUBCH_DATA_UNLOCK( subch, pl );
+	return( SC_NMSG );
+    }
+
+    subch_pull_msg( subch, msg, len );
+    SUBCH_DATA_UNLOCK( subch, pl );
+
+    return( SC_SUCCESS );
+}
+
+/* sc_command implements a (blocking) combination of sc_send and sc_recv.
+ * It is intended to be the SN1 equivalent of SN0's "elsc_command", which
+ * issued a system controller command and then waited for a response from
+ * the system controller before returning.
+ *
+ * cmd points to the outgoing command; resp points to the buffer in
+ * which the response is to be stored.  Both buffers are assumed to
+ * be the same length; if there is any doubt as to whether the
+ * response buffer is long enough to hold the L1's response, then
+ * make it BRL1_QSIZE bytes-- no Bedrock<->L1 message can be any
+ * bigger.
+ *
+ * Be careful using the same buffer for both cmd and resp; it could get
+ * hairy if there were ever an L1 command reqeuest that spanned multiple
+ * packets.  (On the other hand, that would require some additional
+ * rewriting of the L1 command interface anyway.)
+ */
+#define __RETRIES	50
+#define __WAIT_SEND	( sc->uart != BRL1_LOCALUART )
+#define __WAIT_RECV	10000000
+
+
+int
+sc_command( l1sc_t *sc, int ch, char *cmd, char *resp, int *len )
+{
+#ifndef CONFIG_SERIAL_SGI_L1_PROTOCOL
+    return SC_NMSG;
+#else
+    int result;
+    int retries;
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+    	return SC_NMSG;
+
+    retries = __RETRIES;
+
+    while( (result = sc_send( sc, ch, cmd, *len, __WAIT_SEND )) < 0 ) {
+	if( result == SC_BUSY ) {
+	    retries--;
+	    if( retries <= 0 )
+		return result;
+	    uart_delay(500);
+	}
+	else {
+	    return result;
+	}
+    }
+    
+    /* block on sc_recv_* */
+#ifdef notyet
+    if( sc->uart == BRL1_LOCALUART ) {
+	return( sc_recv_intr( sc, ch, resp, len, __WAIT_RECV ) );
+    }
+    else
+#endif
+    {
+	return( sc_recv_poll( sc, ch, resp, len, __WAIT_RECV ) );
+    }
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
+
+/* sc_command_kern is a knuckle-dragging, no-patience version of sc_command
+ * used in situations where the kernel has a command that shouldn't be
+ * delayed until the send buffer clears.  sc_command should be used instead
+ * under most circumstances.
+ */
+int
+sc_command_kern( l1sc_t *sc, int ch, char *cmd, char *resp, int *len )
+{
+#ifndef CONFIG_SERIAL_SGI_L1_PROTOCOL
+    return SC_NMSG;
+#else
+    int result;
+
+    if ( IS_RUNNING_ON_SIMULATOR() )
+    	return SC_NMSG;
+
+    if( (result = sc_send( sc, ch, cmd, *len, 1 )) < 0 ) {
+	return result;
+    }
+
+    return( sc_recv_poll( sc, ch, resp, len, __WAIT_RECV ) );
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
+
+
+
+/* sc_poll checks the queue corresponding to the given
+ * subchannel to see if there's anything available.  If
+ * not, it kicks the brl1 layer and then checks again.
+ *
+ * Returns 1 if input is available on the given queue,
+ * 0 otherwise.
+ */
+int
+sc_poll( l1sc_t *sc, int ch )
+{
+    brl1_sch_t *subch = &(sc->subch[ch]);
+
+    if( subch->packet_arrived )
+	return 1;
+
+    brl1_receive( sc );
+
+    if( subch->packet_arrived )
+	return 1;
+
+    return 0;
+}
+
+/* for now, sc_init just calls brl1_init
+ */
+void
+sc_init( l1sc_t *sc, nasid_t nasid, net_vec_t uart )
+{
+    if ( !IS_RUNNING_ON_SIMULATOR() )
+    	brl1_init( sc, nasid, uart );
+}
+
+/* sc_dispatch_env_event handles events sent from the system control
+ * network's environmental monitor tasks.
+ */
+static void
+sc_dispatch_env_event( uint code, int argc, char *args, int maxlen )
+{
+    int j, i = 0;
+    uint32_t ESPcode;
+
+    switch( code ) {
+	/* for now, all codes do the same thing: grab two arguments
+	 * and print a cmn_err_tag message */
+      default:
+	/* check number of arguments */
+	if( argc != 2 ) {
+	    L1_DBG_PRF(( "sc_dispatch_env_event: "
+			 "expected 2 arguments, got %d\n", argc ));
+	    return;
+	}
+	
+	/* get ESP code (integer argument) */
+	if( args[i++] != L1_ARG_INT ) {
+	    L1_DBG_PRF(( "sc_dispatch_env_event: "
+			 "expected integer argument\n" ));
+	    return;
+	}
+	/* WARNING: highly endian */
+	COPY_BUFFER_TO_INT(args, i, ESPcode);
+
+	/* verify string argument */
+	if( args[i++] != L1_ARG_ASCII ) {
+	    L1_DBG_PRF(( "sc_dispatch_env_event: "
+			 "expected an ASCII string\n" ));
+	    return;
+	}
+	for( j = i; j < maxlen; j++ ) {
+	    if( args[j] == '\0' ) break; /* found string termination */
+	}
+	if( j == maxlen ) {
+	    j--;
+	    L1_DBG_PRF(( "sc_dispatch_env_event: "
+			 "message too long-- truncating\n" ));
+	}
+
+	/* strip out trailing cr/lf */
+	for( ; 
+	     j > 1 && ((args[j-1] == 0xd) || (args[j-1] == 0xa)); 
+	     j-- );
+	args[j] = '\0';
+	
+	/* strip out leading cr/lf */
+	for( ;
+	     i < j && ((args[i] == 0xd) || (args[i] == 0xa));
+	     i++ );
+	
+	/* write the event to syslog */
+#ifdef IRIX
+	cmn_err_tag( ESPcode, CE_WARN, &(args[i]) );
+#endif
+    }
+}
+
+
+/* sc_event waits for events to arrive from the system controller, and
+ * prints appropriate messages to the syslog.
+ */
+static void
+sc_event( l1sc_t *sc, int ch )
+{
+    char event[BRL1_QSIZE];
+    int i;
+    int result;
+    int event_len;
+    uint32_t ev_src;
+    uint32_t ev_code;
+    int ev_argc;
+
+    while(1) {
+	
+	bzero( event, BRL1_QSIZE );
+
+	/*
+	 * wait for an event 
+	 */
+	result = sc_recv_intr( sc, ch, event, &event_len, 1 );
+	if( result != SC_SUCCESS ) {
+	    cmn_err( CE_WARN, "Error receiving sysctl event on nasid %d\n",
+		     sc->nasid );
+	}
+	else {
+	    /*
+	     * an event arrived; break it down into useful pieces
+	     */
+#if defined(L1_DEBUG) && 0
+	    int ix;
+	    printf( "Event packet received:\n" );
+	    for (ix = 0; ix < 64; ix++) {
+		printf( "%x%x ", ((event[ix] >> 4) & ((uint64_t)0xf)),
+			(event[ix] & ((uint64_t)0xf)) );
+		if( (ix % 16) == 0xf ) printf( "\n" );
+	    }
+#endif /* L1_DEBUG */
+
+	    i = 0;
+
+	    /* get event source */
+	    COPY_BUFFER_TO_INT(event, i, ev_src);
+	    COPY_BUFFER_TO_INT(event, i, ev_code);
+
+	    /* get arg count */
+	    ev_argc = (event[i++] & 0xffUL);
+	    
+	    /* dispatch events by task */
+	    switch( (ev_src & L1_ADDR_TASK_MASK) >> L1_ADDR_TASK_SHFT )
+	    {
+	      case L1_ADDR_TASK_ENV: /* environmental monitor event */
+		sc_dispatch_env_event( ev_code, ev_argc, &(event[i]), 
+				       BRL1_QSIZE - i );
+		break;
+
+	      default: /* unhandled task type */
+		L1_DBG_PRF(( "Unhandled event type received from system "
+			     "controllers: source task %x\n",
+			     (ev_src & L1_ADDR_TASK_MASK) >> L1_ADDR_TASK_SHFT
+			   ));
+	    }
+	}
+	
+    }			
+}
+
+/* sc_listen sets up a service thread to listen for incoming events.
+ */
+void
+sc_listen( l1sc_t *sc )
+{
+    int pl;
+    int result;
+    brl1_sch_t *subch;
+
+    char        msg[BRL1_QSIZE];
+    int         len;    /* length of message being sent */
+    int         ch;     /* system controller subchannel used */
+
+    extern int msc_shutdown_pri;
+
+    /* grab the designated "event subchannel" */
+    SUBCH_LOCK( sc, pl );
+    subch = &(sc->subch[BRL1_EVENT_SUBCH]);
+    if( subch->use != BRL1_SUBCH_FREE ) {
+	SUBCH_UNLOCK( sc, pl );
+	cmn_err( CE_WARN, "sysctl event subchannel in use! "
+		 "Not monitoring sysctl events.\n" );
+	return;
+    }
+    subch->use = BRL1_SUBCH_RSVD;
+    SUBCH_UNLOCK( sc, pl );
+
+    subch->packet_arrived = 0;
+    subch->target = BRL1_LOCALUART;
+    sv_init( &(subch->arrive_sv), SV_FIFO, NULL );
+    spinlock_init( &(subch->data_lock), NULL );
+    subch->tx_notify = NULL;
+    subch->rx_notify = sc_data_ready;
+    subch->iqp = kmem_zalloc_node( sizeof(sc_cq_t), KM_NOSLEEP,
+				   NASID_TO_COMPACT_NODEID(sc->nasid) );
+    ASSERT( subch->iqp );
+    cq_init( subch->iqp );
+
+#ifdef LINUX_KERNEL_THREADS
+    /* set up a thread to listen for events */
+    sthread_create( "sysctl event handler", 0, 0, 0, msc_shutdown_pri,
+		    KT_PS, (st_func_t *) sc_event,
+		    (void *)sc, (void *)(uint64_t)BRL1_EVENT_SUBCH, 0, 0 );
+#endif
+
+    /* signal the L1 to begin sending events */
+    bzero( msg, BRL1_QSIZE );
+    ch = sc_open( sc, L1_ADDR_LOCAL );
+
+    if( (len = sc_construct_msg( sc, ch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_EVENT_SUBCH, 2,
+				 L1_ARG_INT, BRL1_EVENT_SUBCH )) < 0 )
+    {
+	sc_close( sc, ch );
+	L1_DBG_PRF(( "Failure in sc_construct_msg (%d)\n", len ));
+	goto err_return;
+    }
+
+    result = sc_command_kern( sc, ch, msg, msg, &len );
+    if( result < 0 )
+    {
+	sc_close( sc, ch );
+	L1_DBG_PRF(( "Failure in sc_command_kern (%d)\n", result ));
+	goto err_return;
+    }
+
+    sc_close( sc, ch );
+
+    result = sc_interpret_resp( msg, 0 );
+    if( result < 0 )
+    {
+	L1_DBG_PRF(( "Failure in sc_interpret_resp (%d)\n", result ));
+	goto err_return;
+    }
+
+    /* everything went fine; just return */
+    return;
+	
+err_return:
+    /* there was a problem; complain */
+    cmn_err( CE_WARN, "failed to set sysctl event-monitoring subchannel.  "
+	     "Sysctl events will not be monitored.\n" );
+}
+
+
+/*********************************************************************
+ * elscuart functions.  These provide a uart-like interface to the
+ * bedrock/l1 protocol console channels.  They are similar in form
+ * and intent to the elscuart_* functions defined for SN0 in elsc.c.
+ *
+ */
+
+int _elscuart_flush( l1sc_t *sc );
+
+/* Leave room in queue for CR/LF */
+#define ELSCUART_LINE_MAX       (BRL1_QSIZE - 2)
+
+
+/*
+ * _elscuart_putc provides an entry point to the L1 interface driver;
+ * writes a single character to the output queue.  Flushes at the
+ * end of each line, and translates newlines into CR/LF.
+ *
+ * The kernel should generally use l1_cons_write instead, since it assumes
+ * buffering, translation, prefixing, etc. are done at a higher
+ * level.
+ *
+ */
+int
+_elscuart_putc( l1sc_t *sc, int c )
+{
+    sc_cq_t *q;
+    
+    q = &(sc->oq[ MAP_OQ(L1_ELSCUART_SUBCH(get_myid())) ]);
+
+    if( c != '\n' && c != '\r' && cq_used(q) >= ELSCUART_LINE_MAX ) {
+        cq_add( q, '\r' );
+        cq_add( q, '\n' );
+         _elscuart_flush( sc );
+        sc->sol = 1;
+    }
+
+    if( sc->sol && c != '\r' ) {
+        char            prefix[16], *s;
+
+        if( cq_room( q ) < 8 && _elscuart_flush(sc) < 0 )
+        {
+            return -1;
+        }
+	
+	if( sc->verbose )
+	{
+#ifdef  SUPPORT_PRINTING_M_FORMAT
+	    sprintf( prefix,
+		     "%c %d%d%d %M:",
+		     'A' + get_myid(),
+		     sc->nasid / 100,
+		     (sc->nasid / 10) % 10,
+		     sc->nasid / 10,
+		     sc->modid );
+#else
+	    sprintf( prefix,
+		     "%c %d%d%d 0x%x:",
+		     'A' + get_myid(),
+		     sc->nasid / 100,
+		     (sc->nasid / 10) % 10,
+		     sc->nasid / 10,
+		     sc->modid );
+#endif
+	    
+	    for( s = prefix; *s; s++ )
+		cq_add( q, *s );
+	}	    
+	sc->sol = 0;
+
+    }
+
+    if( cq_room( q ) < 2 && _elscuart_flush(sc) < 0 )
+    {
+        return -1;
+    }
+
+    if( c == '\n' ) {
+        cq_add( q, '\r' );
+        sc->sol = 1;
+    }
+
+    cq_add( q, (u_char) c );
+
+    if( c == '\n' ) {
+        /* flush buffered line */
+        if( _elscuart_flush( sc ) < 0 )
+        {
+            return -1;
+        }
+    }
+
+    if( c== '\r' )
+    {
+        sc->sol = 1;
+    }
+
+    return 0;
+}
+
+
+/*
+ * _elscuart_getc reads a character from the input queue.  This
+ * routine blocks.
+ */
+int
+_elscuart_getc( l1sc_t *sc )
+{
+    int r;
+
+    while( (r = _elscuart_poll( sc )) == 0 );
+
+    if( r < 0 ) {
+	/* some error occured */
+	return r;
+    }
+
+    return _elscuart_readc( sc );
+}
+
+
+
+/*
+ * _elscuart_poll returns 1 if characters are ready for the
+ * calling processor, 0 if they are not
+ */
+int
+_elscuart_poll( l1sc_t *sc )
+{
+    int result;
+
+    if( sc->cons_listen ) {
+        result = l1_cons_poll( sc );
+        if( result )
+            return result;
+    }
+
+    return sc_poll( sc, L1_ELSCUART_SUBCH(get_myid()) );
+}
+
+
+
+/* _elscuart_readc is to be used only when _elscuart_poll has
+ * indicated that a character is waiting.  Pulls a character
+ * of this processor's console queue and returns it.
+ *
+ */
+int
+_elscuart_readc( l1sc_t *sc )
+{
+    int c, pl;
+    sc_cq_t *q;
+    brl1_sch_t *subch;
+
+    if( sc->cons_listen ) {
+	subch = &(sc->subch[ SC_CONS_SYSTEM ]);
+	q = subch->iqp;
+	
+	SUBCH_DATA_LOCK( subch, pl );
+        if( !cq_empty( q ) ) {
+            cq_rem( q, c );
+	    if( cq_empty( q ) ) {
+		subch->packet_arrived = 0;
+	    }
+	    SUBCH_DATA_UNLOCK( subch, pl );
+            return c;
+        }
+	SUBCH_DATA_UNLOCK( subch, pl );
+    }
+
+    subch = &(sc->subch[ L1_ELSCUART_SUBCH(get_myid()) ]);
+    q = subch->iqp;
+
+    SUBCH_DATA_LOCK( subch, pl );
+    if( cq_empty( q ) ) {
+	SUBCH_DATA_UNLOCK( subch, pl );
+        return -1;
+    }
+
+    cq_rem( q, c );
+    if( cq_empty ( q ) ) {
+	subch->packet_arrived = 0;
+    }
+    SUBCH_DATA_UNLOCK( subch, pl );
+
+    return c;
+}
+
+
+/*
+ * _elscuart_flush flushes queued output to the the L1.
+ * This routine blocks until the queue is flushed.
+ */
+int
+_elscuart_flush( l1sc_t *sc )
+{
+    int r, n;
+    char buf[BRL1_QSIZE];
+    sc_cq_t *q = &(sc->oq[ MAP_OQ(L1_ELSCUART_SUBCH(get_myid())) ]);
+
+    while( (n = cq_used(q)) ) {
+
+        /* buffer queue contents */
+        r = BRL1_QSIZE - q->opos;
+
+        if( n > r ) {
+            BCOPY( q->buf + q->opos, buf, r  );
+            BCOPY( q->buf, buf + r, n - r  );
+        } else {
+            BCOPY( q->buf + q->opos, buf, n  );
+        }
+
+        /* attempt to send buffer contents */
+        r = brl1_send( sc, buf, cq_used( q ), 
+		       (BRL1_EVENT | L1_ELSCUART_SUBCH(get_myid())), 1 );
+
+        /* if no error, dequeue the sent characters; otherwise,
+         * return the error
+         */
+        if( r >= SC_SUCCESS ) {
+            q->opos = (q->opos + r) % BRL1_QSIZE;
+        }
+        else {
+            return r;
+        }
+    }
+
+    return 0;
+}
+
+
+
+/* _elscuart_probe returns non-zero if the L1 (and
+ * consequently the elscuart) can be accessed
+ */
+int
+_elscuart_probe( l1sc_t *sc )
+{
+#ifndef CONFIG_SERIAL_SGI_L1_PROTOCOL
+    return 0;
+#else
+    char ver[BRL1_QSIZE];
+    extern int elsc_version( l1sc_t *, char * );
+    if ( IS_RUNNING_ON_SIMULATOR() )
+    	return 0;
+    return( elsc_version(sc, ver) >= 0 );
+#endif /* CONFIG_SERIAL_SGI_L1_PROTOCOL */
+}
+
+
+
+/* _elscuart_init zeroes out the l1sc_t console
+ * queues for this processor's console subchannel.
+ */
+void
+_elscuart_init( l1sc_t *sc )
+{
+    int pl;
+    brl1_sch_t *subch = &sc->subch[L1_ELSCUART_SUBCH(get_myid())];
+
+    SUBCH_DATA_LOCK(subch, pl);
+
+    subch->packet_arrived = 0;
+    cq_init( subch->iqp );
+    cq_init( &sc->oq[MAP_OQ(L1_ELSCUART_SUBCH(get_myid()))] );
+
+    SUBCH_DATA_UNLOCK(subch, pl);
+}
+
+
+#ifdef IRIX
+
+/* elscuart_syscon_listen causes the processor on which it's
+ * invoked to "listen" to the system console subchannel (that
+ * is, subchannel 4) for console input.
+ */
+void
+elscuart_syscon_listen( l1sc_t *sc )
+{
+    int pl;
+    brl1_sch_t *subch = &(sc->subch[SC_CONS_SYSTEM]);
+
+    /* if we're already listening, don't bother */
+    if( sc->cons_listen )
+        return;
+
+    SUBCH_DATA_LOCK( subch, pl );
+
+    subch->use = BRL1_SUBCH_RSVD;
+    subch->packet_arrived = 0;
+
+    SUBCH_DATA_UNLOCK( subch, pl );
+
+
+    sc->cons_listen = 1;
+}
+#endif	/* IRIX */
diff --git a/arch/ia64/sn/io/l1_command.c b/arch/ia64/sn/io/l1_command.c
new file mode 100644
index 000000000..1cf6a3c87
--- /dev/null
+++ b/arch/ia64/sn/io/l1_command.c
@@ -0,0 +1,1356 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */ 
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/eeprom.h>
+#include <asm/sn/ksys/i2c.h>
+#include <asm/sn/cmn_err.h>
+#include <asm/sn/router.h>
+#include <asm/sn/module.h>
+#include <asm/sn/ksys/l1.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/clksupport.h>
+
+#define ELSC_TIMEOUT	1000000		/* ELSC response timeout (usec) */
+#define LOCK_TIMEOUT	5000000		/* Hub lock timeout (usec) */
+
+#define LOCAL_HUB	LOCAL_HUB_ADDR
+#define LD(x)		(*(volatile uint64_t *)(x))
+#define SD(x, v)	(LD(x) = (uint64_t) (v))
+
+#define hub_cpu_get()	0
+
+#define LBYTE(caddr)	(*(char *) caddr)
+
+extern char *bcopy(const char * src, char * dest, int count);
+
+#define LDEBUG		0
+
+/*
+ * ELSC data is in NVRAM page 7 at the following offsets.
+ */
+
+#define NVRAM_MAGIC_AD	0x700		/* magic number used for init */
+#define NVRAM_PASS_WD	0x701		/* password (4 bytes in length) */
+#define NVRAM_DBG1	0x705		/* virtual XOR debug switches */
+#define NVRAM_DBG2	0x706		/* physical XOR debug switches */
+#define NVRAM_CFG	0x707		/* ELSC Configuration info */
+#define NVRAM_MODULE	0x708		/* system module number */
+#define NVRAM_BIST_FLG	0x709		/* BIST flags (2 bits per nodeboard) */
+#define NVRAM_PARTITION 0x70a		/* module's partition id */
+#define	NVRAM_DOMAIN	0x70b		/* module's domain id */
+#define	NVRAM_CLUSTER	0x70c		/* module's cluster id */
+#define	NVRAM_CELL	0x70d		/* module's cellid */
+
+#define NVRAM_MAGIC_NO	0x37		/* value of magic number */
+#define NVRAM_SIZE	16		/* 16 bytes in nvram */
+
+/*
+ * Declare a static ELSC NVRAM buffer to hold all data read from
+ * and written to NVRAM.  This nvram "cache" will be used only during the
+ * IP27prom execution.
+ */
+static char elsc_nvram_buffer[NVRAM_SIZE];
+
+#define SC_COMMAND sc_command
+
+
+/*
+ * elsc_init
+ *
+ *   Initialize ELSC structure
+ */
+
+void elsc_init(elsc_t *e, nasid_t nasid)
+{
+    sc_init((l1sc_t *)e, nasid, BRL1_LOCALUART);
+}
+
+
+/*
+ * elsc_errmsg
+ *
+ *   Given a negative error code,
+ *   returns a corresponding static error string.
+ */
+
+char *elsc_errmsg(int code)
+{
+    switch (code) {
+    case ELSC_ERROR_CMD_SEND:
+	return "Command send error";
+    case ELSC_ERROR_CMD_CHECKSUM:
+	return "Command packet checksum error";
+    case ELSC_ERROR_CMD_UNKNOWN:
+	return "Unknown command";
+    case ELSC_ERROR_CMD_ARGS:
+	return "Invalid command argument(s)";
+    case ELSC_ERROR_CMD_PERM:
+	return "Permission denied";
+    case ELSC_ERROR_RESP_TIMEOUT:
+	return "System controller response timeout";
+    case ELSC_ERROR_RESP_CHECKSUM:
+	return "Response packet checksum error";
+    case ELSC_ERROR_RESP_FORMAT:
+	return "Response format error";
+    case ELSC_ERROR_RESP_DIR:
+	return "Response direction error";
+    case ELSC_ERROR_MSG_LOST:
+	return "Message lost because queue is full";
+    case ELSC_ERROR_LOCK_TIMEOUT:
+	return "Timed out getting ELSC lock";
+    case ELSC_ERROR_DATA_SEND:
+	return "Error sending data";
+    case ELSC_ERROR_NIC:
+	return "NIC protocol error";
+    case ELSC_ERROR_NVMAGIC:
+	return "Bad magic number in NVRAM";
+    case ELSC_ERROR_MODULE:
+	return "Module location protocol error";
+    default:
+	return "Unknown error";
+    }
+}
+
+/*
+ * elsc_nvram_init
+ *
+ *   Initializes reads and writes to NVRAM.  This will perform a single
+ *   read to NVRAM, getting all data at once.  When the PROM tries to
+ *   read NVRAM, it returns the data from the buffer being read.  If the
+ *   PROM tries to write out to NVRAM, the write is done, and the internal
+ *   buffer is updated.
+ */
+
+void elsc_nvram_init(nasid_t nasid, uchar_t *elsc_nvram_data)
+{
+    /* This might require implementation of multiple-packet request/responses
+     * if it's to provide the same behavior that was available in SN0.
+     */
+    nasid = nasid;
+    elsc_nvram_data = elsc_nvram_data;
+}
+
+/*
+ * elsc_nvram_copy
+ *
+ *   Copies the content of a buffer into the static buffer in this library.
+ */
+
+void elsc_nvram_copy(uchar_t *elsc_nvram_data)
+{
+    memcpy(elsc_nvram_buffer, elsc_nvram_data, NVRAM_SIZE);
+}
+
+/*
+ * elsc_nvram_write
+ *
+ *   Copies bytes from 'buf' into NVRAM, starting at NVRAM address
+ *   'addr' which must be between 0 and 2047.
+ *
+ *   If 'len' is non-negative, the routine copies 'len' bytes.
+ *
+ *   If 'len' is negative, the routine treats the data as a string and
+ *   copies bytes up to and including a NUL-terminating zero, but not
+ *   to exceed '-len' bytes.
+ */
+
+int elsc_nvram_write(elsc_t *e, int addr, char *buf, int len)
+{
+    /* Here again, we might need to work out the details of a
+     * multiple-packet protocol.
+     */
+
+    /* For now, pretend it worked. */
+    e = e;
+    addr = addr;
+    buf = buf;
+    return (len < 0 ? -len : len);
+}
+
+/*
+ * elsc_nvram_read
+ *
+ *   Copies bytes from NVRAM into 'buf', starting at NVRAM address
+ *   'addr' which must be between 0 and 2047.
+ *
+ *   If 'len' is non-negative, the routine copies 'len' bytes.
+ *
+ *   If 'len' is negative, the routine treats the data as a string and
+ *   copies bytes up to and including a NUL-terminating zero, but not
+ *   to exceed '-len' bytes.  NOTE:  This method is no longer supported.
+ *   It was never used in the first place.
+ */
+
+int elsc_nvram_read(elsc_t *e, int addr, char *buf, int len)
+{
+    /* multiple packets? */
+    e = e;
+    addr = addr;
+    buf = buf;
+    len = len;
+    return -1;
+}
+
+/*
+ * Command Set
+ */
+
+int elsc_version(elsc_t *e, char *result)
+{
+    char	msg[BRL1_QSIZE];
+    int		len;    /* length of message being sent */
+    int		subch;  /* system controller subchannel used */
+    int		major,  /* major rev number */
+	        minor,  /* minor rev number */
+                bugfix; /* bugfix rev number */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    subch = sc_open( (l1sc_t *)e, L1_ADDR_LOCAL );
+
+    if( (len = sc_construct_msg( (l1sc_t *)e, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_FW_REV, 0 )) < 0 )
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( SC_COMMAND( (l1sc_t *)e, subch, msg, msg, &len ) < 0 )
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( (l1sc_t *)e, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 6, L1_ARG_INT, &major,
+			   L1_ARG_INT, &minor, L1_ARG_INT, &bugfix )
+	< 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    sprintf( result, "%d.%d.%d", major, minor, bugfix );
+
+    return 0;
+}
+
+int elsc_debug_set(elsc_t *e, u_char byte1, u_char byte2)
+{
+    /* shush compiler */
+    e = e;
+    byte1 = byte1;
+    byte2 = byte2;
+
+    /* fill in a buffer with the opcode & params; call sc_command */
+
+    return 0;
+}
+
+int elsc_debug_get(elsc_t *e, u_char *byte1, u_char *byte2)
+{
+    char	msg[BRL1_QSIZE];
+    int		subch;  /* system controller subchannel used */
+    int		dbg_sw; /* holds debug switch settings */
+    int		len;	/* number of msg buffer bytes used */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    if( (subch = sc_open( (l1sc_t *)e, L1_ADDR_LOCAL )) < 0 ) {
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    if( (len = sc_construct_msg( (l1sc_t *)e, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_RDBG, 0 ) ) < 0 )
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( sc_command( (l1sc_t *)e, subch, msg, msg, &len ) < 0 )
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( (l1sc_t *)e, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 2, L1_ARG_INT, &dbg_sw ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    /* copy out debug switch settings (last two bytes of the
+     * integer response)
+     */
+    *byte1 = ((dbg_sw >> 8) & 0xFF);
+    *byte2 = (dbg_sw & 0xFF);
+
+    return 0;
+}
+
+/*
+ * elsc_rack_bay_get fills in the two int * arguments with the
+ * rack number and bay number of the L1 being addressed
+ */
+int elsc_rack_bay_get(elsc_t *e, uint *rack, uint *bay)
+{
+    char msg[BRL1_QSIZE];	/* L1 request/response info */
+    int subch;			/* system controller subchannel used */
+    int len;			/* length of message */
+    uint32_t	buf32;		/* used to copy 32-bit rack/bay out of msg */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    if( (subch = sc_open( (l1sc_t *)e, L1_ADDR_LOCAL )) < 0 ) {
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    if( (len = sc_construct_msg( (l1sc_t *)e, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_RRACK, 0 )) < 0 ) 
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+
+    /* send the request to the L1 */
+    if( sc_command( (l1sc_t *)e, subch, msg, msg, &len ) ) {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close(e, subch);
+
+    /* check response */
+    if( sc_interpret_resp( msg, 2, L1_ARG_INT, &buf32 ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    /* extract rack/bay info
+     *
+     * note that the 32-bit value returned by the L1 actually
+     * only uses the low-order sixteen bits for rack and bay
+     * information.  A "normal" L1 address puts rack and bay
+     * information in bit positions 12 through 28.  So if
+     * we initially shift the value returned 12 bits to the left,
+     * we can use the L1 addressing #define's to extract the
+     * values we need (see ksys/l1.h for a complete list of the
+     * various fields of an L1 address).
+     */
+    buf32 <<= L1_ADDR_BAY_SHFT;
+
+    *rack = (buf32 & L1_ADDR_RACK_MASK) >> L1_ADDR_RACK_SHFT;
+    *bay = (buf32 & L1_ADDR_BAY_MASK) >> L1_ADDR_BAY_SHFT;
+
+    return 0;
+}
+
+
+/* elsc_rack_bay_type_get fills in the three int * arguments with the
+ * rack number, bay number and brick type of the L1 being addressed.  Note
+ * that if the L1 operation fails and this function returns an error value, 
+ * garbage may be written to brick_type.
+ */
+int elsc_rack_bay_type_get( l1sc_t *sc, uint *rack, 
+			       uint *bay, uint *brick_type )
+{
+    char msg[BRL1_QSIZE];       /* L1 request/response info */
+    int subch;                  /* system controller subchannel used */
+    int len;                    /* length of message */
+    uint32_t buf32;	        /* used to copy 32-bit rack & bay out of msg */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    if( (subch = sc_open( sc, L1_ADDR_LOCAL )) < 0 ) {
+	return ELSC_ERROR_CMD_SEND;
+    }
+
+    if( (len = sc_construct_msg( sc, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_RRBT, 0 )) < 0 )
+    {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( SC_COMMAND( sc, subch, msg, msg, &len ) ) {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( sc, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 4, L1_ARG_INT, &buf32, 
+			           L1_ARG_INT, brick_type ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    /* extract rack/bay info
+     *
+     * note that the 32-bit value returned by the L1 actually
+     * only uses the low-order sixteen bits for rack and bay
+     * information.  A "normal" L1 address puts rack and bay
+     * information in bit positions 12 through 28.  So if
+     * we initially shift the value returned 12 bits to the left,
+     * we can use the L1 addressing #define's to extract the
+     * values we need (see ksys/l1.h for a complete list of the
+     * various fields of an L1 address).
+     */
+    buf32 <<= L1_ADDR_BAY_SHFT;
+
+    *rack = (buf32 & L1_ADDR_RACK_MASK) >> L1_ADDR_RACK_SHFT;
+    *bay = (buf32 & L1_ADDR_BAY_MASK) >> L1_ADDR_BAY_SHFT;
+
+    /* convert brick_type to lower case */
+    *brick_type = *brick_type - 'A' + 'a';
+
+    return 0;
+}
+
+
+int elsc_module_get(elsc_t *e)
+{
+    extern char brick_types[];
+    uint rnum, rack, bay, bricktype, t;
+    int ret;
+
+    /* construct module ID from rack and slot info */
+
+    if ((ret = elsc_rack_bay_type_get(e, &rnum, &bay, &bricktype)) < 0)
+	return ret;
+
+    /* report unset location info. with a special, otherwise invalid modid */
+    if (rnum == 0 && bay == 0)
+	return MODULE_NOT_SET;
+
+    if (bay > MODULE_BPOS_MASK >> MODULE_BPOS_SHFT)
+	return ELSC_ERROR_MODULE;
+
+    /* Build a moduleid_t-compatible rack number */
+
+    rack = 0;		
+    t = rnum / 100;		/* rack class (CPU/IO) */
+    if (t > RACK_CLASS_MASK(rack) >> RACK_CLASS_SHFT(rack))
+	return ELSC_ERROR_MODULE;
+    RACK_ADD_CLASS(rack, t);
+    rnum %= 100;
+
+    t = rnum / 10;		/* rack group */
+    if (t > RACK_GROUP_MASK(rack) >> RACK_GROUP_SHFT(rack))
+	return ELSC_ERROR_MODULE;
+    RACK_ADD_GROUP(rack, t);
+
+    t = rnum % 10;		/* rack number (one-based) */
+    if (t-1 > RACK_NUM_MASK(rack) >> RACK_NUM_SHFT(rack))
+	return ELSC_ERROR_MODULE;
+    RACK_ADD_NUM(rack, t);
+
+    for( t = 0; t < MAX_BRICK_TYPES; t++ ) {
+	if( brick_types[t] == bricktype )
+	    return RBT_TO_MODULE(rack, bay, t);
+    }
+    
+    return ELSC_ERROR_MODULE;
+}
+
+int elsc_partition_set(elsc_t *e, int partition)
+{
+    char msg[BRL1_QSIZE];       /* L1 request/response info */
+    int subch;                  /* system controller subchannel used */
+    int len;                    /* length of message */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    if( (subch = sc_open( e, L1_ADDR_LOCAL )) < 0 ) {
+	return ELSC_ERROR_CMD_SEND;
+    }
+
+    if( (len = sc_construct_msg( e, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_PARTITION_SET, 2,
+				 L1_ARG_INT, partition )) < 0 )
+    {
+	
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( sc_command( e, subch, msg, msg, &len ) ) {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( e, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 0 ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+    
+    return( 0 );
+}
+
+int elsc_partition_get(elsc_t *e)
+{
+    char msg[BRL1_QSIZE];       /* L1 request/response info */
+    int subch;                  /* system controller subchannel used */
+    int len;                    /* length of message */
+    uint32_t partition_id;    /* used to copy partition id out of msg */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    if( (subch = sc_open( e, L1_ADDR_LOCAL )) < 0 ) {
+	return ELSC_ERROR_CMD_SEND;
+    }
+
+    if( (len = sc_construct_msg( e, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_PARTITION_GET, 0 )) < 0 )
+
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( sc_command( e, subch, msg, msg, &len ) ) {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( e, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 2, L1_ARG_INT, &partition_id ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+    
+    return( partition_id );
+}
+
+
+/*
+ * elsc_cons_subch selects the "active" console subchannel for this node
+ * (i.e., the one that will currently receive input)
+ */
+int elsc_cons_subch(elsc_t *e, uint ch)
+{
+    char msg[BRL1_QSIZE];       /* L1 request/response info */
+    int subch;                  /* system controller subchannel used */
+    int len;                    /* length of message */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    subch = sc_open( e, L1_ADDR_LOCAL );
+    
+    if( (len = sc_construct_msg( e, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_CONS_SUBCH, 2,
+				 L1_ARG_INT, ch)) < 0 )
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( SC_COMMAND( e, subch, msg, msg, &len ) ) {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( e, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 0 ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    return 0;
+}
+
+
+/*
+ * elsc_cons_node should only be executed by one node.  It declares to
+ * the system controller that the node from which it is called will be
+ * the owner of the system console.
+ */
+int elsc_cons_node(elsc_t *e)
+{
+    char msg[BRL1_QSIZE];       /* L1 request/response info */
+    int subch;                  /* system controller subchannel used */
+    int len;                    /* length of message */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    subch = sc_open( e, L1_ADDR_LOCAL );
+    
+    if( (len = sc_construct_msg( e, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_CONS_NODE, 0 )) < 0 )
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( SC_COMMAND( e, subch, msg, msg, &len ) ) {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( e, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 0 ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    return 0;
+}
+    
+
+/* elsc_display_line writes up to 12 characters to either the top or bottom
+ * line of the L1 display.  line points to a buffer containing the message
+ * to be displayed.  The zero-based line number is specified by lnum (so
+ * lnum == 0 specifies the top line and lnum == 1 specifies the bottom).
+ * Lines longer than 12 characters, or line numbers not less than
+ * L1_DISPLAY_LINES, cause elsc_display_line to return an error.
+ */
+int elsc_display_line(elsc_t *e, char *line, int lnum)
+{
+    char	msg[BRL1_QSIZE];
+    int		subch;  /* system controller subchannel used */
+    int		len;	/* number of msg buffer bytes used */
+
+    /* argument sanity checking */
+    if( !(lnum < L1_DISPLAY_LINES) )
+	return( ELSC_ERROR_CMD_ARGS );
+    if( !(strlen( line ) <= L1_DISPLAY_LINE_LENGTH) )
+	return( ELSC_ERROR_CMD_ARGS );
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    subch = sc_open( (l1sc_t *)e, L1_ADDR_LOCAL );
+
+    if( (len = sc_construct_msg( (l1sc_t *)e, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 (L1_REQ_DISP1+lnum), 2,
+				 L1_ARG_ASCII, line )) < 0 )
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( SC_COMMAND( (l1sc_t *)e, subch, msg, msg, &len ) < 0 )
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( (l1sc_t *)e, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 0 ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    return 0;
+}
+
+
+/* elsc_display_mesg silently drops message characters beyond the 12th.
+ */
+int elsc_display_mesg(elsc_t *e, char *chr)
+{
+
+    char line[L1_DISPLAY_LINE_LENGTH+1];
+    int numlines, i;
+    int result;
+
+    numlines = (strlen( chr ) + L1_DISPLAY_LINE_LENGTH - 1) /
+	L1_DISPLAY_LINE_LENGTH;
+
+    if( numlines > L1_DISPLAY_LINES )
+	numlines = L1_DISPLAY_LINES;
+
+    for( i = 0; i < numlines; i++ )
+    {
+	strncpy( line, chr, L1_DISPLAY_LINE_LENGTH );
+	line[L1_DISPLAY_LINE_LENGTH] = '\0';
+
+	/* generally we want to leave the first line of the L1 display
+	 * alone (so the L1 can manipulate it).  If you need to be able
+	 * to display to both lines (for debugging purposes), define
+	 * L1_DISP_2LINES in irix/kern/ksys/l1.h, or add -DL1_DISP_2LINES
+	 * to your 'defs file.
+	 */
+#if defined(L1_DISP_2LINES)
+	if( (result = elsc_display_line( e, line, i )) < 0 )
+#else
+	if( (result = elsc_display_line( e, line, i+1 )) < 0 )
+#endif
+
+	    return result;
+
+	chr += L1_DISPLAY_LINE_LENGTH;
+    }
+    
+    return 0;
+}
+
+
+int elsc_password_set(elsc_t *e, char *password)
+{
+    /* shush compiler */
+    e = e;
+    password = password;
+
+    /* fill in buffer with the opcode & params; call elsc_command */
+
+    return 0;
+}
+
+int elsc_password_get(elsc_t *e, char *password)
+{
+    /* shush compiler */
+    e = e;
+    password = password;
+
+    /* fill in buffer with the opcode & params; call elsc_command */
+
+    return 0;
+}
+
+
+/*
+ * sc_portspeed_get
+ *
+ * retrieve the current portspeed setting for the bedrock II
+ */
+int sc_portspeed_get(l1sc_t *sc)
+{
+    char	msg[BRL1_QSIZE];
+    int         len;    /* length of message being sent */
+    int         subch;  /* system controller subchannel used */
+    int		portspeed_a, portspeed_b;
+			/* ioport clock rates */
+
+    bzero( msg, BRL1_QSIZE );
+    subch = sc_open( sc, L1_ADDR_LOCAL );
+
+    if( (len = sc_construct_msg( sc, subch, msg, BRL1_QSIZE,
+                                 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_PORTSPEED,
+				 0 )) < 0 )
+    {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+    
+    /* send the request to the L1 */
+    if( sc_command( sc, subch, msg, msg, &len ) < 0 )
+    {
+        sc_close( sc, subch );
+        return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( sc, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 4, 
+			   L1_ARG_INT, &portspeed_a,
+			   L1_ARG_INT, &portspeed_b ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    /* for the c-brick, we ignore the portspeed_b value */
+    return (portspeed_a ? 600 : 400);
+}
+
+/*
+ * elsc_power_query
+ *
+ *   To be used after system reset, this command returns 1 if the reset
+ *   was the result of a power-on, 0 otherwise.
+ *
+ *   The power query status is cleared to 0 after it is read.
+ */
+
+int elsc_power_query(elsc_t *e)
+{
+    e = e; /* shush the compiler */
+
+    /* fill in buffer with the opcode & params; call elsc_command */
+
+    return 1;
+}
+
+int elsc_rpwr_query(elsc_t *e, int is_master)
+{
+    /* shush the compiler */
+    e = e;
+    is_master = is_master;
+
+    /* fill in buffer with the opcode & params; call elsc_command */
+
+    return 0;
+} 
+
+/*
+ * elsc_power_down
+ *
+ *   Sets up system to shut down in "sec" seconds (or modifies the
+ *   shutdown time if one is already in effect).  Use 0 to power
+ *   down immediately.
+ */
+
+int elsc_power_down(elsc_t *e, int sec)
+{
+    /* shush compiler */
+    e = e;
+    sec = sec;
+
+    /* fill in buffer with the opcode & params; call elsc_command */
+
+    return 0;
+}
+
+
+int elsc_system_reset(elsc_t *e)
+{
+    char	msg[BRL1_QSIZE];
+    int		subch;  /* system controller subchannel used */
+    int		len;	/* number of msg buffer bytes used */
+    int		result;
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    if( (subch = sc_open( e, L1_ADDR_LOCAL )) < 0 ) {
+	return ELSC_ERROR_CMD_SEND;
+    }
+
+    if( (len = sc_construct_msg( e, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_RESET, 0 )) < 0 )
+    {
+	sc_close( e, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( (result = sc_command( e, subch, msg, msg, &len )) ) {
+	sc_close( e, subch );
+	if( result == SC_NMSG ) {
+	    /* timeout is OK.  We've sent the reset.  Now it's just
+	     * a matter of time...
+	     */
+	    return( 0 );
+	}
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( e, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 0 ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    return 0;
+}
+
+
+int elsc_power_cycle(elsc_t *e)
+{
+    /* shush compiler */
+    e = e;
+
+    /* fill in buffer with the opcode & params; call sc_command */
+
+    return 0;
+}
+
+
+/*
+ * L1 Support for reading 
+ * cbrick uid.
+ */
+
+int elsc_nic_get(elsc_t *e, uint64_t *nic, int verbose)
+{
+    /* this parameter included only for SN0 compatibility */
+    verbose = verbose;
+
+    /* We don't go straight to the bedrock/L1 protocol on this one, but let
+     * the eeprom layer prepare the eeprom data as we would like it to
+     * appear to the caller
+     */
+    return cbrick_uid_get( e->nasid, nic );
+}
+
+int _elsc_hbt(elsc_t *e, int ival, int rdly)
+{
+    e = e;
+    ival = ival;
+    rdly = rdly;
+
+    /* fill in buffer with the opcode & params; call elsc_command */
+
+    return 0;
+}
+
+
+/* send a command string to an L1 */
+int sc_command_interp( l1sc_t *sc, l1addr_t compt, l1addr_t rack, l1addr_t bay,
+		       char *cmd )
+{
+    char        msg[BRL1_QSIZE];
+    int         len;    /* length of message being sent */
+    int         subch;  /* system controller subchannel used */
+    l1addr_t	target; /* target system controller for command */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    subch = sc_open( sc, L1_ADDR_LOCAL );
+
+    L1_BUILD_ADDR( &target, compt, rack, bay, L1_ADDR_TASK_CMD );
+    if( (len = sc_construct_msg( sc, subch, msg, BRL1_QSIZE,
+				 target, L1_REQ_EXEC_CMD, 2,
+				 L1_ARG_ASCII, cmd )) < 0 )
+    {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+		   
+    /* send the request to the L1 */
+    if( sc_command( sc, subch, msg, msg, &len ) < 0 )
+    {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( sc, subch );
+    
+    /* check response */
+    if( sc_interpret_resp( msg, 0 ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    return 0;
+}
+
+
+/*
+ * Routines for reading the R-brick's L1
+ */
+
+int router_module_get( nasid_t nasid, net_vec_t path )
+{
+    uint rnum, rack, bay, t;
+    int ret;
+    l1sc_t sc;
+
+    /* prepare l1sc_t struct */
+    sc_init( &sc, nasid, path );
+
+    /* construct module ID from rack and slot info */
+
+    if ((ret = elsc_rack_bay_get(&sc, &rnum, &bay)) < 0)
+	return ret;
+
+    /* report unset location info. with a special, otherwise invalid modid */
+    if (rnum == 0 && bay == 0)
+	return MODULE_NOT_SET;
+
+    if (bay > MODULE_BPOS_MASK >> MODULE_BPOS_SHFT)
+	return ELSC_ERROR_MODULE;
+
+    /* Build a moduleid_t-compatible rack number */
+
+    rack = 0;		
+    t = rnum / 100;		/* rack class (CPU/IO) */
+    if (t > RACK_CLASS_MASK(rack) >> RACK_CLASS_SHFT(rack))
+	return ELSC_ERROR_MODULE;
+    RACK_ADD_CLASS(rack, t);
+    rnum %= 100;
+
+    t = rnum / 10;		/* rack group */
+    if (t > RACK_GROUP_MASK(rack) >> RACK_GROUP_SHFT(rack))
+	return ELSC_ERROR_MODULE;
+    RACK_ADD_GROUP(rack, t);
+
+    t = rnum % 10;		/* rack number (one-based) */
+    if (t-1 > RACK_NUM_MASK(rack) >> RACK_NUM_SHFT(rack))
+	return ELSC_ERROR_MODULE;
+    RACK_ADD_NUM(rack, t);
+
+    ret = RBT_TO_MODULE(rack, bay, MODULE_RBRICK);
+    return ret;
+}
+    
+
+/*
+ * iobrick routines
+ */
+
+/* iobrick_rack_bay_type_get fills in the three int * arguments with the
+ * rack number, bay number and brick type of the L1 being addressed.  Note
+ * that if the L1 operation fails and this function returns an error value, 
+ * garbage may be written to brick_type.
+ */
+int iobrick_rack_bay_type_get( l1sc_t *sc, uint *rack, 
+			       uint *bay, uint *brick_type )
+{
+    char msg[BRL1_QSIZE];       /* L1 request/response info */
+    int subch;                  /* system controller subchannel used */
+    int len;                    /* length of message */
+    uint32_t buf32;	        /* used to copy 32-bit rack & bay out of msg */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    if( (subch = sc_open( sc, L1_ADDR_LOCALIO )) < 0 ) {
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    if( (len = sc_construct_msg( sc, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_RRBT, 0 )) < 0 )
+    {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( sc_command( sc, subch, msg, msg, &len ) ) {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( sc, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 4, L1_ARG_INT, &buf32, 
+			           L1_ARG_INT, brick_type ) < 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    /* extract rack/bay info
+     *
+     * note that the 32-bit value returned by the L1 actually
+     * only uses the low-order sixteen bits for rack and bay
+     * information.  A "normal" L1 address puts rack and bay
+     * information in bit positions 12 through 28.  So if
+     * we initially shift the value returned 12 bits to the left,
+     * we can use the L1 addressing #define's to extract the
+     * values we need (see ksys/l1.h for a complete list of the
+     * various fields of an L1 address).
+     */
+    buf32 <<= L1_ADDR_BAY_SHFT;
+
+    *rack = (buf32 & L1_ADDR_RACK_MASK) >> L1_ADDR_RACK_SHFT;
+    *bay = (buf32 & L1_ADDR_BAY_MASK) >> L1_ADDR_BAY_SHFT;
+
+    return 0;
+}
+
+
+int iobrick_module_get(l1sc_t *sc)
+{
+    uint rnum, rack, bay, brick_type, t;
+    int ret;
+
+    /* construct module ID from rack and slot info */
+
+    if ((ret = iobrick_rack_bay_type_get(sc, &rnum, &bay, &brick_type)) < 0)
+        return ret;
+
+    /* report unset location info. with a special, otherwise invalid modid */
+    if (rnum == 0 && bay == 0)
+        return MODULE_NOT_SET;
+
+    if (bay > MODULE_BPOS_MASK >> MODULE_BPOS_SHFT)
+        return ELSC_ERROR_MODULE;
+
+    /* Build a moduleid_t-compatible rack number */
+
+    rack = 0;           
+    t = rnum / 100;             /* rack class (CPU/IO) */
+    if (t > RACK_CLASS_MASK(rack) >> RACK_CLASS_SHFT(rack))
+        return ELSC_ERROR_MODULE;
+    RACK_ADD_CLASS(rack, t);
+    rnum %= 100;
+
+    t = rnum / 10;              /* rack group */
+    if (t > RACK_GROUP_MASK(rack) >> RACK_GROUP_SHFT(rack))
+        return ELSC_ERROR_MODULE;
+    RACK_ADD_GROUP(rack, t);
+
+    t = rnum % 10;              /* rack number (one-based) */
+    if (t-1 > RACK_NUM_MASK(rack) >> RACK_NUM_SHFT(rack))
+        return ELSC_ERROR_MODULE;
+    RACK_ADD_NUM(rack, t);
+
+    switch( brick_type ) {
+      case 'I': 
+	brick_type = MODULE_IBRICK; break;
+      case 'P':
+	brick_type = MODULE_PBRICK; break;
+      case 'X':
+	brick_type = MODULE_XBRICK; break;
+    }
+
+    ret = RBT_TO_MODULE(rack, bay, brick_type);
+
+    return ret;
+}
+
+/* iobrick_get_sys_snum asks the attached iobrick for the system
+ * serial number.  This function will only be relevant to the master
+ * cbrick (the one attached to the bootmaster ibrick); other nodes
+ * may call the function, but the value returned to the master node
+ * will be the one used as the system serial number by the kernel.
+ */
+
+int
+iobrick_get_sys_snum( l1sc_t *sc, char *snum_str )
+{
+    char msg[BRL1_QSIZE];       /* L1 request/response info */
+    int subch;                  /* system controller subchannel used */
+    int len;                    /* length of message */
+    
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    if( (subch = sc_open( sc, L1_ADDR_LOCALIO )) < 0 ) {
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    if( (len = sc_construct_msg( sc, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_SYS_SERIAL, 0 )) < 0 )
+    {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( sc_command( sc, subch, msg, msg, &len ) ) {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( sc, subch );
+
+    /* check response */
+    return( sc_interpret_resp( msg, 2, L1_ARG_ASCII, snum_str ) );
+}
+
+
+/*
+ * The following functions apply (or cut off) power to the specified
+ * pci bus or slot.
+ */
+
+int
+iobrick_pci_slot_pwr( l1sc_t *sc, int bus, int slot, int up )
+{
+    char cmd[BRL1_QSIZE];
+    unsigned rack, bay, brick_type;
+    if( iobrick_rack_bay_type_get( sc, &rack, &bay, &brick_type ) < 0 )
+	return( ELSC_ERROR_CMD_SEND );
+    sprintf( cmd, "pci %d %d %s", bus, slot,
+	     (up ? "u" : "d") );
+    return( sc_command_interp
+	    ( sc, L1_ADDR_TYPE_L1, rack, bay, cmd ) );
+}
+
+int
+iobrick_pci_bus_pwr( l1sc_t *sc, int bus, int up )
+{
+    char cmd[BRL1_QSIZE];
+    unsigned rack, bay, brick_type;
+    if( iobrick_rack_bay_type_get( sc, &rack, &bay, &brick_type ) < 0 )
+	return( ELSC_ERROR_CMD_SEND );
+    sprintf( cmd, "pci %d %s", bus, (up ? "u" : "d") );
+    return( sc_command_interp
+	    ( sc, L1_ADDR_TYPE_L1, rack, bay, cmd ) );
+}
+
+
+/* get the L1 firmware version for an iobrick */
+int
+iobrick_sc_version( l1sc_t *sc, char *result )
+{
+    char	msg[BRL1_QSIZE];
+    int		len;    /* length of message being sent */
+    int		subch;  /* system controller subchannel used */
+    int		major,  /* major rev number */
+	        minor,  /* minor rev number */
+                bugfix; /* bugfix rev number */
+
+    /* fill in msg with the opcode & params */
+    bzero( msg, BRL1_QSIZE );
+    subch = sc_open( sc, L1_ADDR_LOCALIO );
+
+    if( (len = sc_construct_msg( sc, subch, msg, BRL1_QSIZE,
+				 L1_ADDR_TASK_GENERAL,
+				 L1_REQ_FW_REV, 0 )) < 0 )
+    {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_ARGS );
+    }
+
+    /* send the request to the L1 */
+    if( SC_COMMAND(sc, subch, msg, msg, &len ) < 0 )
+    {
+	sc_close( sc, subch );
+	return( ELSC_ERROR_CMD_SEND );
+    }
+
+    /* free up subchannel */
+    sc_close( sc, subch );
+
+    /* check response */
+    if( sc_interpret_resp( msg, 6, L1_ARG_INT, &major,
+			   L1_ARG_INT, &minor, L1_ARG_INT, &bugfix )
+	< 0 )
+    {
+	return( ELSC_ERROR_RESP_FORMAT );
+    }
+
+    sprintf( result, "%d.%d.%d", major, minor, bugfix );
+
+    return 0;
+}
+
+
+
+/* elscuart routines 
+ *
+ * Most of the elscuart functionality is implemented in l1.c.  The following
+ * is directly "recycled" from elsc.c.
+ */
+
+
+/*
+ * _elscuart_puts
+ */
+
+int _elscuart_puts(elsc_t *e, char *s)
+{
+    int			c;
+
+    if (s == 0)
+	s = "<NULL>";
+
+    while ((c = LBYTE(s)) != 0) {
+	if (_elscuart_putc(e, c) < 0)
+	    return -1;
+	s++;
+    }
+
+    return 0;
+}
+
+
+/*
+ * elscuart wrapper routines
+ *
+ *   The following routines are similar to their counterparts in l1.c,
+ *   except instead of taking an elsc_t pointer directly, they call
+ *   a global routine "get_elsc" to obtain the pointer.
+ *   This is useful when the elsc is employed for stdio.
+ */
+
+int elscuart_probe(void)
+{
+    return _elscuart_probe(get_elsc());
+}
+
+void elscuart_init(void *init_data)
+{
+    _elscuart_init(get_elsc());
+    /* dummy variable included for driver compatability */
+    init_data = init_data;
+}
+
+int elscuart_poll(void)
+{
+    return _elscuart_poll(get_elsc());
+}
+
+int elscuart_readc(void)
+{
+    return _elscuart_readc(get_elsc());
+}
+
+int elscuart_getc(void)
+{
+    return _elscuart_getc(get_elsc());
+}
+
+int elscuart_puts(char *s)
+{
+    return _elscuart_puts(get_elsc(), s);
+}
+
+int elscuart_putc(int c)
+{
+    return _elscuart_putc(get_elsc(), c);
+}
+
+int elscuart_flush(void)
+{
+    return _elscuart_flush(get_elsc());
+}
diff --git a/arch/ia64/sn/io/labelcl.c b/arch/ia64/sn/io/labelcl.c
new file mode 100644
index 000000000..c8b714508
--- /dev/null
+++ b/arch/ia64/sn/io/labelcl.c
@@ -0,0 +1,665 @@
+/*  labelcl - SGI's Hwgraph Compatibility Layer.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+    Colin Ngam may be reached by email at cngam@sgi.com
+
+*/
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <linux/devfs_fs.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+
+/*
+** Very simple and dumb string table that supports only find/insert.
+** In practice, if this table gets too large, we may need a more
+** efficient data structure.   Also note that currently there is no 
+** way to delete an item once it's added.  Therefore, name collision 
+** will return an error.
+*/
+
+struct string_table label_string_table;
+
+
+
+/*
+ * string_table_init - Initialize the given string table.
+ */
+void
+string_table_init(struct string_table *string_table)
+{
+	string_table->string_table_head = NULL;
+	string_table->string_table_generation = 0;
+
+	/*
+	 * We nedd to initialize locks here!
+	 */
+
+	return;
+}
+
+
+/*
+ * string_table_destroy - Destroy the given string table.
+ */
+void
+string_table_destroy(struct string_table *string_table)
+{
+	struct string_table_item *item, *next_item;
+
+	item = string_table->string_table_head;
+	while (item) {
+		next_item = item->next;
+
+		STRTBL_FREE(item);
+		item = next_item;
+	}
+
+	/*
+	 * We need to destroy whatever lock we have here
+	 */
+
+	return;
+}
+
+
+
+/*
+ * string_table_insert - Insert an entry in the string table .. duplicate 
+ *	names are not allowed.
+ */
+char *
+string_table_insert(struct string_table *string_table, char *name)
+{
+	struct string_table_item *item, *new_item = NULL, *last_item = NULL;
+
+again:
+	/*
+	 * Need to lock the table ..
+	 */
+	item = string_table->string_table_head;
+	last_item = NULL;
+
+	while (item) {
+		if (!strcmp(item->string, name)) {
+			/*
+			 * If we allocated space for the string and the found that
+			 * someone else already entered it into the string table,
+			 * free the space we just allocated.
+			 */
+			if (new_item)
+				STRTBL_FREE(new_item);
+
+
+			/*
+			 * Search optimization: move the found item to the head
+			 * of the list.
+			 */
+			if (last_item != NULL) {
+				last_item->next = item->next;
+				item->next = string_table->string_table_head;
+				string_table->string_table_head = item;
+			}
+			goto out;
+		}
+		last_item = item;
+		item=item->next;
+	}
+
+	/*
+	 * name was not found, so add it to the string table.
+	 */
+	if (new_item == NULL) {
+		long old_generation = string_table->string_table_generation;
+
+		new_item = STRTBL_ALLOC(strlen(name));
+
+		strcpy(new_item->string, name);
+
+		/*
+		 * While we allocated memory for the new string, someone else 
+		 * changed the string table.
+		 */
+		if (old_generation != string_table->string_table_generation) {
+			goto again;
+		}
+	} else {
+		/* At this we only have the string table lock in access mode.
+		 * Promote the access lock to an update lock for the string
+		 * table insertion below.
+		 */
+			long old_generation = 
+				string_table->string_table_generation;
+
+			/*
+			 * After we did the unlock and wer waiting for update
+			 * lock someone could have potentially updated
+			 * the string table. Check the generation number
+			 * for this case. If it is the case we have to
+			 * try all over again.
+			 */
+			if (old_generation != 
+			    string_table->string_table_generation) {
+				goto again;
+			}
+		}
+
+	/*
+	 * At this point, we're committed to adding new_item to the string table.
+	 */
+	new_item->next = string_table->string_table_head;
+	item = string_table->string_table_head = new_item;
+	string_table->string_table_generation++;
+
+out:
+	/*
+	 * Need to unlock here.
+	 */
+	return(item->string);
+}
+
+/*
+ * labelcl_info_create - Creates the data structure that will hold the
+ *	device private information asscoiated with a devfs entry.
+ *	The pointer to this structure is what gets stored in the devfs 
+ *	(void * info).
+ */
+labelcl_info_t *
+labelcl_info_create()
+{
+
+	labelcl_info_t *new = NULL;
+
+	/* Initial allocation does not include any area for labels */
+	if ( ( new = (labelcl_info_t *)kmalloc (sizeof(labelcl_info_t), GFP_KERNEL) ) == NULL )
+		return NULL;
+
+	memset (new, 0, sizeof(labelcl_info_t));
+	new->hwcl_magic = LABELCL_MAGIC;
+	return( new);
+
+}
+
+/*
+ * labelcl_info_destroy - Frees the data structure that holds the
+ *      device private information asscoiated with a devfs entry.  This 
+ *	data structure was created by device_info_create().
+ *
+ *	The caller is responsible for nulling the (void *info) in the 
+ *	corresponding devfs entry.
+ */
+int
+labelcl_info_destroy(labelcl_info_t *labelcl_info)
+{
+
+	if (labelcl_info == NULL)
+		return(0);
+
+	/* Free the label list */
+	if (labelcl_info->label_list)
+		kfree(labelcl_info->label_list);
+
+	/* Now free the label info area */
+	labelcl_info->hwcl_magic = 0;
+	kfree(labelcl_info);
+
+	return(0);
+}
+
+/*
+ * labelcl_info_add_LBL - Adds a new label entry in the labelcl info 
+ *	structure.
+ *
+ *	Error is returned if we find another label with the same name.
+ */
+int
+labelcl_info_add_LBL(devfs_handle_t de,
+			char *info_name,
+			arb_info_desc_t info_desc,
+			arbitrary_info_t info)
+{
+	labelcl_info_t	*labelcl_info = NULL;
+	int num_labels;
+	int new_label_list_size;
+	label_info_t *old_label_list, *new_label_list = NULL;
+	char *name;
+	int i;
+
+	if (de == NULL)
+		return(-1);
+
+        labelcl_info = devfs_get_info(de);
+	if (labelcl_info == NULL)
+		return(-1);
+
+	if (labelcl_info->hwcl_magic != LABELCL_MAGIC)
+		return(-1);
+
+	if (info_name == NULL)
+		return(-1);
+
+	if (strlen(info_name) >= LABEL_LENGTH_MAX)
+		return(-1);
+
+	name = string_table_insert(&label_string_table, info_name);
+
+	num_labels = labelcl_info->num_labels;
+	new_label_list_size = sizeof(label_info_t) * (num_labels+1);
+
+	/*
+	 * Create a new label info area.
+	 */
+	if (new_label_list_size != 0) {
+		new_label_list = (label_info_t *) kmalloc(new_label_list_size, GFP_KERNEL);
+
+		if (new_label_list == NULL)
+			return(-1);
+	}
+
+	/*
+	 * At this point, we are committed to adding the labelled info, 
+	 * if there isn't already information there with the same name.
+	 */
+	old_label_list = labelcl_info->label_list;
+
+	/* 
+	 * Look for matching info name.
+	 */
+	for (i=0; i<num_labels; i++) {
+		if (!strcmp(info_name, old_label_list[i].name)) {
+			/* Not allowed to add duplicate labelled info names. */
+			kfree(new_label_list);
+			printk("labelcl_info_add_LBL: Duplicate label name %s for vertex 0x%p\n", info_name, de);
+			return(-1);
+		}
+		new_label_list[i] = old_label_list[i]; /* structure copy */
+	}
+
+	new_label_list[num_labels].name = name;
+	new_label_list[num_labels].desc = info_desc;
+	new_label_list[num_labels].info = info;
+
+	labelcl_info->num_labels = num_labels+1;
+	labelcl_info->label_list = new_label_list;
+
+	if (old_label_list != NULL)
+		kfree(old_label_list);
+
+	return(0);
+}
+
+/*
+ * labelcl_info_remove_LBL - Remove a label entry.
+ */
+int
+labelcl_info_remove_LBL(devfs_handle_t de,
+			 char *info_name,
+			 arb_info_desc_t *info_desc,
+			 arbitrary_info_t *info)
+{
+	labelcl_info_t	*labelcl_info = NULL;
+	int num_labels;
+	int new_label_list_size;
+	label_info_t *old_label_list, *new_label_list = NULL;
+	arb_info_desc_t label_desc_found;
+	arbitrary_info_t label_info_found;
+	int i;
+
+	if (de == NULL)
+		return(-1);
+
+	labelcl_info = devfs_get_info(de);
+	if (labelcl_info == NULL)
+		return(-1);
+
+	if (labelcl_info->hwcl_magic != LABELCL_MAGIC)
+		return(-1);
+
+	num_labels = labelcl_info->num_labels;
+	if (num_labels == 0) {
+		return(-1);
+	}
+
+	/*
+	 * Create a new info area.
+	 */
+	new_label_list_size = sizeof(label_info_t) * (num_labels-1);
+	if (new_label_list_size) {
+		new_label_list = (label_info_t *) kmalloc(new_label_list_size, GFP_KERNEL);
+		if (new_label_list == NULL)
+			return(-1);
+	}
+
+	/*
+	 * At this point, we are committed to removing the labelled info, 
+	 * if it still exists.
+	 */
+	old_label_list = labelcl_info->label_list;
+
+	/* 
+	 * Find matching info name.
+	 */
+	for (i=0; i<num_labels; i++) {
+		if (!strcmp(info_name, old_label_list[i].name)) {
+			label_desc_found = old_label_list[i].desc;
+			label_info_found = old_label_list[i].info;
+			goto found;
+		}
+		if (i < num_labels-1) /* avoid walking off the end of the new vertex */
+			new_label_list[i] = old_label_list[i]; /* structure copy */
+	}
+
+	/* The named info doesn't exist. */
+	if (new_label_list)
+		kfree(new_label_list);
+
+	return(-1);
+
+found:
+	/* Finish up rest of labelled info */
+	for (i=i+1; i<num_labels; i++)
+		new_label_list[i-1] = old_label_list[i]; /* structure copy */
+
+	labelcl_info->num_labels = num_labels+1;
+	labelcl_info->label_list = new_label_list;
+
+	kfree(old_label_list);
+
+	if (info != NULL)
+		*info = label_info_found;
+
+	if (info_desc != NULL)
+		*info_desc = label_desc_found;
+
+	return(0);
+}
+
+
+/*
+ * labelcl_info_replace_LBL - Replace an existing label entry with the 
+ *	given new information.
+ *
+ *	Label entry must exist.
+ */
+int
+labelcl_info_replace_LBL(devfs_handle_t de,
+			char *info_name,
+			arb_info_desc_t info_desc,
+			arbitrary_info_t info,
+			arb_info_desc_t *old_info_desc,
+			arbitrary_info_t *old_info)
+{
+	labelcl_info_t	*labelcl_info = NULL;
+	int num_labels;
+	label_info_t *label_list;
+	int i;
+
+	if (de == NULL)
+		return(-1);
+
+	labelcl_info = devfs_get_info(de);
+	if (labelcl_info == NULL)
+		return(-1);
+
+	if (labelcl_info->hwcl_magic != LABELCL_MAGIC)
+		return(-1);
+
+	num_labels = labelcl_info->num_labels;
+	if (num_labels == 0) {
+		return(-1);
+	}
+
+	if (info_name == NULL)
+		return(-1);
+
+	label_list = labelcl_info->label_list;
+
+	/* 
+	 * Verify that information under info_name already exists.
+	 */
+	for (i=0; i<num_labels; i++)
+		if (!strcmp(info_name, label_list[i].name)) {
+			if (old_info != NULL)
+				*old_info = label_list[i].info;
+
+			if (old_info_desc != NULL)
+				*old_info_desc = label_list[i].desc;
+
+			label_list[i].info = info;
+			label_list[i].desc = info_desc;
+
+			return(0);
+		}
+
+
+	return(-1);
+}
+
+/*
+ * labelcl_info_get_LBL - Retrieve and return the information for the 
+ *	given label entry.
+ */
+int
+labelcl_info_get_LBL(devfs_handle_t de,
+		      char *info_name,
+		      arb_info_desc_t *info_desc,
+		      arbitrary_info_t *info)
+{
+	labelcl_info_t	*labelcl_info = NULL;
+	int num_labels;
+	label_info_t *label_list;
+	int i;
+
+	if (de == NULL)
+		return(-1);
+
+	labelcl_info = devfs_get_info(de);
+	if (labelcl_info == NULL)
+		return(-1);
+
+	if (labelcl_info->hwcl_magic != LABELCL_MAGIC)
+		return(-1);
+
+	num_labels = labelcl_info->num_labels;
+	if (num_labels == 0) {
+		return(-1);
+	}
+
+	label_list = labelcl_info->label_list;
+
+	/* 
+	 * Find information under info_name.
+	 */
+	for (i=0; i<num_labels; i++)
+		if (!strcmp(info_name, label_list[i].name)) {
+			if (info != NULL)
+				*info = label_list[i].info;
+			if (info_desc != NULL)
+				*info_desc = label_list[i].desc;
+
+			return(0);
+		}
+
+	return(-1);
+}
+
+/*
+ * labelcl_info_get_next_LBL - returns the next label entry on the list.
+ */
+int
+labelcl_info_get_next_LBL(devfs_handle_t de,
+			   char *buffer,
+			   arb_info_desc_t *info_descp,
+			   arbitrary_info_t *infop,
+			   labelcl_info_place_t *placeptr)
+{
+	labelcl_info_t	*labelcl_info = NULL;
+	uint which_info;
+	label_info_t *label_list;
+
+	if ((buffer == NULL) && (infop == NULL))
+		return(-1);
+
+	if (placeptr == NULL)
+		return(-1);
+
+	if (de == NULL)
+		return(-1);
+
+	labelcl_info = devfs_get_info(de);
+	if (labelcl_info == NULL)
+		return(-1);
+
+	if (labelcl_info->hwcl_magic != LABELCL_MAGIC)
+		return(-1);
+
+	which_info = *placeptr;
+
+	if (which_info >= labelcl_info->num_labels) {
+		return(-1);
+	}
+
+	label_list = (label_info_t *) labelcl_info->label_list;
+
+	if (buffer != NULL)
+		strcpy(buffer, label_list[which_info].name);
+
+	if (infop)
+		*infop = label_list[which_info].info;
+
+	if (info_descp)
+		*info_descp = label_list[which_info].desc;
+
+	*placeptr = which_info + 1;
+
+	return(0);
+}
+
+
+int
+labelcl_info_replace_IDX(devfs_handle_t de,
+			int index,
+			arbitrary_info_t info,
+			arbitrary_info_t *old_info)
+{
+	arbitrary_info_t *info_list_IDX;
+	labelcl_info_t	*labelcl_info = NULL;
+
+	if (de == NULL) {
+		printk(KERN_ALERT "labelcl: NULL devfs handle given.\n");
+		return(-1);
+	}
+
+	labelcl_info = devfs_get_info(de);
+	if (labelcl_info == NULL) {
+		printk(KERN_ALERT "labelcl: Entry does not have info pointer.\n");
+		return(-1);
+	}
+
+	if (labelcl_info->hwcl_magic != LABELCL_MAGIC)
+		return(-1);
+
+	if ( (index < 0) || (index >= HWGRAPH_NUM_INDEX_INFO) )
+		return(-1);
+
+	/*
+	 * Replace information at the appropriate index in this vertex with 
+	 * the new info.
+	 */
+	info_list_IDX = labelcl_info->IDX_list;
+	if (old_info != NULL)
+		*old_info = info_list_IDX[index];
+	info_list_IDX[index] = info;
+
+	return(0);
+
+}
+
+/*
+ * labelcl_info_connectpt_set - Sets the connectpt.
+ */
+int
+labelcl_info_connectpt_set(struct devfs_entry *de,
+			  struct devfs_entry *connect_de)
+{
+	arbitrary_info_t old_info;
+	int	rv;
+
+	rv = labelcl_info_replace_IDX(de, HWGRAPH_CONNECTPT, 
+		(arbitrary_info_t) connect_de, &old_info);
+
+	if (rv) {
+		return(rv);
+	}
+
+	return(0);
+}
+
+
+/*
+ * labelcl_info_get_IDX - Returns the information pointed at by index.
+ *
+ */
+int
+labelcl_info_get_IDX(devfs_handle_t de,
+			int index,
+			arbitrary_info_t *info)
+{
+	arbitrary_info_t *info_list_IDX;
+	labelcl_info_t	*labelcl_info = NULL;
+
+	if (de == NULL)
+		return(-1);
+
+	labelcl_info = devfs_get_info(de);
+	if (labelcl_info == NULL)
+		return(-1);
+
+	if (labelcl_info->hwcl_magic != LABELCL_MAGIC)
+		return(-1);
+
+	if ( (index < 0) || (index >= HWGRAPH_NUM_INDEX_INFO) )
+		return(-1);
+
+	/*
+	 * Return information at the appropriate index in this vertex.
+	 */
+	info_list_IDX = labelcl_info->IDX_list;
+	if (info != NULL)
+		*info = info_list_IDX[index];
+
+	return(0);
+}
+
+/*
+ * labelcl_info_connectpt_get - Retrieve the connect point for a device entry.
+ */
+struct devfs_entry *
+labelcl_info_connectpt_get(struct devfs_entry *de)
+{
+	int rv;
+	arbitrary_info_t info;
+
+	rv = labelcl_info_get_IDX(de, HWGRAPH_CONNECTPT, &info);
+	if (rv)
+		return(NULL);
+
+	return((struct devfs_entry *)info);
+}
diff --git a/arch/ia64/sn/io/mem_refcnt.c b/arch/ia64/sn/io/mem_refcnt.c
new file mode 100644
index 000000000..b2df4279b
--- /dev/null
+++ b/arch/ia64/sn/io/mem_refcnt.c
@@ -0,0 +1,233 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/nodepda.h>
+#include <asm/sn/hubspc.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/mem_refcnt.h>
+#include <asm/sn/hwcntrs.h>
+// From numa_hw.h
+
+#define MIGR_COUNTER_MAX_GET(nodeid) \
+        (NODEPDA_MCD((nodeid))->migr_system_kparms.migr_threshold_reference)
+/*
+ * Get the Absolute Theshold
+ */
+#define MIGR_THRESHOLD_ABS_GET(nodeid) ( \
+        MD_MIG_VALUE_THRESH_GET(COMPACT_TO_NASID_NODEID(nodeid)))
+/*
+ * Get the current Differential Threshold
+ */
+#define MIGR_THRESHOLD_DIFF_GET(nodeid) \
+        (NODEPDA_MCD(nodeid)->migr_as_kparms.migr_base_threshold)
+
+#define NUM_OF_HW_PAGES_PER_SW_PAGE()   (NBPP / MD_PAGE_SIZE)
+
+// #include "migr_control.h"
+
+int
+mem_refcnt_attach(devfs_handle_t hub)
+{
+        devfs_handle_t refcnt_dev;
+        
+        hwgraph_char_device_add(hub,
+                                "refcnt",
+                                "hubspc_", 
+				&refcnt_dev);
+        device_info_set(refcnt_dev, (void*)(ulong)HUBSPC_REFCOUNTERS);
+
+        return (0);
+}
+
+
+/*ARGSUSED*/
+int
+mem_refcnt_open(devfs_handle_t *devp, mode_t oflag, int otyp, cred_t *crp)
+{
+        cnodeid_t node;
+#ifndef CONFIG_IA64_SGI_SN1
+	extern int numnodes;
+#endif
+        
+        ASSERT( (hubspc_subdevice_t)(ulong)device_info_get(*devp) == HUBSPC_REFCOUNTERS );
+
+        if (!cap_able(CAP_MEMORY_MGT)) {
+                return (EPERM);
+        }
+
+        node = master_node_get(*devp);
+
+        ASSERT( (node >= 0) && (node < numnodes) );
+
+        if (NODEPDA(node)->migr_refcnt_counterbuffer == NULL) {
+                return (ENODEV);
+        }
+
+        ASSERT( NODEPDA(node)->migr_refcnt_counterbase != NULL );
+        ASSERT( NODEPDA(node)->migr_refcnt_cbsize != (size_t)0 );
+
+        return (0);
+}
+
+/*ARGSUSED*/
+int
+mem_refcnt_close(devfs_handle_t dev, int oflag, int otyp, cred_t *crp)
+{
+        return 0;
+}
+
+/*ARGSUSED*/
+int
+mem_refcnt_mmap(devfs_handle_t dev, vhandl_t *vt, off_t off, size_t len, uint prot)
+{
+        cnodeid_t node;
+        int errcode;
+        char* buffer;
+        size_t blen;
+#ifndef CONFIG_IA64_SGI_SN1
+	extern int numnodes;
+#endif
+        
+        ASSERT( (hubspc_subdevice_t)(ulong)device_info_get(dev) == HUBSPC_REFCOUNTERS );
+
+        node = master_node_get(dev);
+
+        ASSERT( (node >= 0) && (node < numnodes) );
+
+        ASSERT( NODEPDA(node)->migr_refcnt_counterbuffer != NULL);
+        ASSERT( NODEPDA(node)->migr_refcnt_counterbase != NULL );
+        ASSERT( NODEPDA(node)->migr_refcnt_cbsize != 0 );
+
+        /*
+         * XXXX deal with prot's somewhere around here....
+         */
+
+        buffer = NODEPDA(node)->migr_refcnt_counterbuffer;
+        blen = NODEPDA(node)->migr_refcnt_cbsize;
+
+        /*
+         * Force offset to be a multiple of sizeof(refcnt_t)
+         * We round up.
+         */
+
+        off = (((off - 1)/sizeof(refcnt_t)) + 1) * sizeof(refcnt_t);
+
+        if ( ((buffer + blen) - (buffer + off + len)) < 0 ) {
+                return (EPERM);
+        }
+
+        errcode = v_mapphys(vt,
+                            buffer + off,
+                            len);
+
+        return errcode;
+}
+
+/*ARGSUSED*/
+int
+mem_refcnt_unmap(devfs_handle_t dev, vhandl_t *vt)
+{
+        return 0;
+}
+
+/* ARGSUSED */
+int
+mem_refcnt_ioctl(devfs_handle_t dev,
+                 int cmd,
+                 void *arg,
+                 int mode,
+                 cred_t *cred_p,
+                 int *rvalp)
+{
+        cnodeid_t node;
+        int errcode;
+	extern int numnodes;
+        
+        ASSERT( (hubspc_subdevice_t)(ulong)device_info_get(dev) == HUBSPC_REFCOUNTERS );
+
+        node = master_node_get(dev);
+
+        ASSERT( (node >= 0) && (node < numnodes) );
+
+        ASSERT( NODEPDA(node)->migr_refcnt_counterbuffer != NULL);
+        ASSERT( NODEPDA(node)->migr_refcnt_counterbase != NULL );
+        ASSERT( NODEPDA(node)->migr_refcnt_cbsize != 0 );
+
+        errcode = 0;
+        
+        switch (cmd) {
+        case RCB_INFO_GET:
+        {
+                rcb_info_t rcb;
+                
+                rcb.rcb_len = NODEPDA(node)->migr_refcnt_cbsize;
+                
+                rcb.rcb_sw_sets = NODEPDA(node)->migr_refcnt_numsets;
+                rcb.rcb_sw_counters_per_set = numnodes;
+                rcb.rcb_sw_counter_size = sizeof(refcnt_t);
+
+                rcb.rcb_base_pages = NODEPDA(node)->migr_refcnt_numsets /
+                                     NUM_OF_HW_PAGES_PER_SW_PAGE();  
+                rcb.rcb_base_page_size = NBPP;
+                rcb.rcb_base_paddr = ctob(slot_getbasepfn(node, 0));
+                
+                rcb.rcb_cnodeid = node;
+                rcb.rcb_granularity = MD_PAGE_SIZE;
+#ifdef notyet
+                rcb.rcb_hw_counter_max = MIGR_COUNTER_MAX_GET(node);
+                rcb.rcb_diff_threshold = MIGR_THRESHOLD_DIFF_GET(node);
+#endif
+                rcb.rcb_abs_threshold = MIGR_THRESHOLD_ABS_GET(node);
+                rcb.rcb_num_slots = node_getnumslots(node);
+
+                if (COPYOUT(&rcb, arg, sizeof(rcb_info_t))) {
+                        errcode = EFAULT;
+                }
+
+                break;
+        }
+        case RCB_SLOT_GET:
+        {
+                rcb_slot_t slot[MAX_MEM_SLOTS];
+                int s;
+                int nslots;
+
+                nslots = node_getnumslots(node);
+                ASSERT(nslots <= MAX_MEM_SLOTS);
+                for (s = 0; s < nslots; s++) {
+                        slot[s].base = (uint64_t)ctob(slot_getbasepfn(node, s));
+#ifdef notyet
+                        slot[s].size  = (uint64_t)ctob(slot_getsize(node, s));
+#else
+                        slot[s].size  = (uint64_t)1;
+#endif
+                }
+                if (COPYOUT(&slot[0], arg, nslots * sizeof(rcb_slot_t))) {
+                        errcode = EFAULT;
+                }
+                
+                *rvalp = nslots;
+                break;
+        }
+                
+        default:
+                errcode = EINVAL;
+                break;
+
+        }
+        
+        return errcode;
+}
diff --git a/arch/ia64/sn/io/ml_SN_init.c b/arch/ia64/sn/io/ml_SN_init.c
new file mode 100644
index 000000000..4dbce801d
--- /dev/null
+++ b/arch/ia64/sn/io/ml_SN_init.c
@@ -0,0 +1,661 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/nodemask.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/synergy.h>
+
+
+#if defined (CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+#include <asm/sn/sn1/ip27config.h>
+#include <asm/sn/sn1/hubdev.h>
+#include <asm/sn/sn1/sn1.h>
+#endif /* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
+
+
+extern int numcpus;
+extern char arg_maxnodes[];
+extern cpuid_t master_procid;
+extern void * kmem_alloc_node(register size_t, register int , cnodeid_t);
+extern synergy_da_t    *Synergy_da_indr[];
+
+extern int hasmetarouter;
+
+int		maxcpus;
+cpumask_t	boot_cpumask;
+hubreg_t	region_mask = 0;
+
+
+extern xwidgetnum_t hub_widget_id(nasid_t);
+
+#ifndef CONFIG_IA64_SGI_IO
+#if defined (IP27)
+short		cputype = CPU_IP27;
+#elif defined (IP33)
+short		cputype = CPU_IP33;
+#elif defined (IP35)
+short		cputype = CPU_IP35;
+#else
+#error <BOMB! define new cputype here >
+#endif
+#endif /* CONFIG_IA64_SGI_IO */
+
+static int	fine_mode = 0;
+
+#ifndef CONFIG_IA64_SGI_IO
+/* Global variables */
+pdaindr_t	pdaindr[MAXCPUS];
+#endif
+
+static cnodemask_t	hub_init_mask;	/* Mask of cpu in a node doing init */
+static volatile cnodemask_t hub_init_done_mask;
+					/* Node mask where we wait for
+					 * per hub initialization
+					 */
+spinlock_t		hub_mask_lock;  /* Lock for hub_init_mask above. */
+
+extern int valid_icache_reasons;	/* Reasons to flush the icache */
+extern int valid_dcache_reasons;	/* Reasons to flush the dcache */
+extern int numnodes;
+extern u_char miniroot;
+extern volatile int	need_utlbmiss_patch;
+extern void iograph_early_init(void);
+
+nasid_t master_nasid = INVALID_NASID;
+
+
+/*
+ * mlreset(int slave)
+ * 	very early machine reset - at this point NO interrupts have been
+ * 	enabled; nor is memory, tlb, p0, etc setup.
+ *
+ * 	slave is zero when mlreset is called for the master processor and
+ *	is nonzero thereafter.
+ */
+
+
+void
+mlreset(int slave)
+{
+	if (!slave) {
+		/*
+		 * We are the master cpu and node.
+		 */ 
+		master_nasid = get_nasid();
+		set_master_bridge_base();
+		FIXME("mlreset: Enable when we support ioc3 ..");
+#ifndef CONFIG_IA64_SGI_IO
+		if (get_console_nasid() == master_nasid) 
+			/* Set up the IOC3 */
+			ioc3_mlreset((ioc3_cfg_t *)KL_CONFIG_CH_CONS_INFO(master_nasid)->config_base,
+				     (ioc3_mem_t *)KL_CONFIG_CH_CONS_INFO(master_nasid)->memory_base);
+
+		/*
+		 * Initialize Master nvram base.
+		 */
+		nvram_baseinit();
+
+		fine_mode = is_fine_dirmode();
+#endif /* CONFIG_IA64_SGI_IO */
+
+		/* We're the master processor */
+		master_procid = smp_processor_id();
+		master_nasid = cpuid_to_nasid(master_procid);
+
+		/*
+		 * master_nasid we get back better be same as one from
+		 * get_nasid()
+		 */
+		ASSERT_ALWAYS(master_nasid == get_nasid());
+
+#ifndef CONFIG_IA64_SGI_IO
+
+	/*
+	 * Activate when calias is implemented.
+	 */
+		/* Set all nodes' calias sizes to 8k */
+		for (i = 0; i < maxnodes; i++) {
+			nasid_t nasid;
+			int	sn;
+
+			nasid = COMPACT_TO_NASID_NODEID(i);
+
+			/*
+			 * Always have node 0 in the region mask, otherwise CALIAS accesses
+			 * get exceptions since the hub thinks it is a node 0 address.
+			 */
+			for (sn=0; sn<NUM_SUBNODES; sn++) {
+				REMOTE_HUB_PI_S(nasid, sn, PI_REGION_PRESENT, (region_mask | 1));
+				REMOTE_HUB_PI_S(nasid, sn, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K);
+			}
+
+			/*
+			 * Set up all hubs to havew a big window pointing at
+			 * widget 0.
+			 * Memory mode, widget 0, offset 0
+			 */
+			REMOTE_HUB_S(nasid, IIO_ITTE(SWIN0_BIGWIN),
+				((HUB_PIO_MAP_TO_MEM << IIO_ITTE_IOSP_SHIFT) |
+				(0 << IIO_ITTE_WIDGET_SHIFT)));
+		}
+#endif /* CONFIG_IA64_SGI_IO */
+
+		/* Set up the hub initialization mask and init the lock */
+		CNODEMASK_CLRALL(hub_init_mask);
+		CNODEMASK_CLRALL(hub_init_done_mask);
+
+		spin_lock_init(&hub_mask_lock);
+
+		/* early initialization of iograph */
+		iograph_early_init();
+
+		/* Initialize Hub Pseudodriver Management */
+		hubdev_init();
+
+#ifndef CONFIG_IA64_SGI_IO
+		/*
+		 * Our IO system doesn't require cache writebacks.  Set some
+		 * variables appropriately.
+		 */
+		cachewrback = 0;
+		valid_icache_reasons &= ~(CACH_AVOID_VCES | CACH_IO_COHERENCY);
+		valid_dcache_reasons &= ~(CACH_AVOID_VCES | CACH_IO_COHERENCY);
+
+		/*
+		 * make sure we are running with the right rev of chips
+		 */
+		verify_snchip_rev();
+
+		/*
+                 * Since we've wiped out memory at this point, we
+                 * need to reset the ARCS vector table so that it
+                 * points to appropriate functions in the kernel
+                 * itself.  In this way, we can maintain the ARCS
+                 * vector table conventions without having to actually
+                 * keep redundant PROM code in memory.
+                 */
+		he_arcs_set_vectors();
+#endif /* CONFIG_IA64_SGI_IO */
+
+	} else { /* slave != 0 */
+		/*
+		 * This code is performed ONLY by slave processors.
+		 */
+
+	}
+}
+
+
+/* XXX - Move the meat of this to intr.c ? */
+/*
+ * Set up the platform-dependent fields in the nodepda.
+ */
+void init_platform_nodepda(nodepda_t *npda, cnodeid_t node)
+{
+	hubinfo_t hubinfo;
+	int	  sn;
+	cnodeid_t i;
+	ushort *numcpus_p;
+
+	extern void router_map_init(nodepda_t *);
+	extern void router_queue_init(nodepda_t *,cnodeid_t);
+#if defined(DEBUG)
+	extern lock_t		intr_dev_targ_map_lock;
+	extern uint64_t 	intr_dev_targ_map_size;
+
+	/* Initialize the lock to access the device - target cpu mapping
+	 * table. This table is explicitly for debugging purposes only and
+	 * to aid the "intrmap" idbg command
+	 */
+	if (node == 0) {
+		/* Make sure we do this only once .
+		 * There is always a cnode 0 present.
+		 */
+		intr_dev_targ_map_size = 0;
+		init_spinlock(&intr_dev_targ_map_lock,"dtmap_lock",0);
+	}
+#endif	/* DEBUG */
+	/* Allocate per-node platform-dependent data */
+	hubinfo = (hubinfo_t)kmem_alloc_node(sizeof(struct hubinfo_s), GFP_ATOMIC, node);
+
+	ASSERT_ALWAYS(hubinfo);
+	npda->pdinfo = (void *)hubinfo;
+	hubinfo->h_nodepda = npda;
+	hubinfo->h_cnodeid = node;
+	hubinfo->h_nasid = COMPACT_TO_NASID_NODEID(node);
+
+	printk("init_platform_nodepda: hubinfo 0x%p, &hubinfo->h_crblock 0x%p\n", hubinfo, &hubinfo->h_crblock);
+
+	spin_lock_init(&hubinfo->h_crblock);
+
+	hubinfo->h_widgetid = hub_widget_id(hubinfo->h_nasid);
+	npda->xbow_peer = INVALID_NASID;
+	/* Initialize the linked list of
+	 * router info pointers to the dependent routers
+	 */
+	npda->npda_rip_first = NULL;
+	/* npda_rip_last always points to the place
+	 * where the next element is to be inserted
+	 * into the list 
+	 */
+	npda->npda_rip_last = &npda->npda_rip_first;
+	npda->dependent_routers = 0;
+	npda->module_id = INVALID_MODULE;
+
+	/*
+	 * Initialize the subnodePDA.
+	 */
+	for (sn=0; sn<NUM_SUBNODES; sn++) {
+		SNPDA(npda,sn)->prof_count = 0;
+		SNPDA(npda,sn)->next_prof_timeout = 0;
+// ajm
+#ifndef CONFIG_IA64_SGI_IO
+		intr_init_vecblk(npda, node, sn);
+#endif
+	}
+
+	npda->vector_unit_busy = 0;
+
+	spin_lock_init(&npda->vector_lock);
+	init_MUTEX_LOCKED(&npda->xbow_sema); /* init it locked? */
+	spin_lock_init(&npda->fprom_lock);
+
+	spin_lock_init(&npda->node_utlbswitchlock);
+	npda->ni_error_print = 0;
+#ifndef CONFIG_IA64_SGI_IO
+	if (need_utlbmiss_patch) {
+		npda->node_need_utlbmiss_patch = 1;
+		npda->node_utlbmiss_patched = 1;
+	}
+#endif
+
+	/*
+	 * Clear out the nasid mask.
+	 */
+	for (i = 0; i < NASID_MASK_BYTES; i++)
+		npda->nasid_mask[i] = 0;
+
+	for (i = 0; i < numnodes; i++) {
+		nasid_t nasid = COMPACT_TO_NASID_NODEID(i);
+
+		/* Set my mask bit */
+		npda->nasid_mask[nasid / 8] |= (1 << nasid % 8);
+	}
+
+#ifndef CONFIG_IA64_SGI_IO
+	npda->node_first_cpu = get_cnode_cpu(node);
+#endif
+
+	if (npda->node_first_cpu != CPU_NONE) {
+		/*
+		 * Count number of cpus only if first CPU is valid.
+		 */
+		numcpus_p = &npda->node_num_cpus;
+		*numcpus_p = 0;
+		for (i = npda->node_first_cpu; i < MAXCPUS; i++) {
+			if (CPUID_TO_COMPACT_NODEID(i) != node)
+			    break;
+			else
+			    (*numcpus_p)++;
+		}
+	} else {
+		npda->node_num_cpus = 0; 
+	}
+
+	/* Allocate memory for the dump stack on each node 
+	 * This is useful during nmi handling since we
+	 * may not be guaranteed shared memory at that time
+	 * which precludes depending on a global dump stack
+	 */
+#ifndef CONFIG_IA64_SGI_IO
+	npda->dump_stack = (uint64_t *)kmem_zalloc_node(DUMP_STACK_SIZE,VM_NOSLEEP,
+							  node);
+	ASSERT_ALWAYS(npda->dump_stack);
+	ASSERT(npda->dump_stack);
+#endif
+	/* Initialize the counter which prevents
+	 * both the cpus on a node to proceed with nmi
+	 * handling.
+	 */
+#ifndef CONFIG_IA64_SGI_IO
+	npda->dump_count = 0;
+
+	/* Setup the (module,slot) --> nic mapping for all the routers
+	 * in the system. This is useful during error handling when
+	 * there is no shared memory.
+	 */
+	router_map_init(npda);
+
+	/* Allocate memory for the per-node router traversal queue */
+	router_queue_init(npda,node);
+	npda->sbe_info = kmem_zalloc_node_hint(sizeof (sbe_info_t), 0, node);
+	ASSERT(npda->sbe_info);
+
+#ifdef CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
+	/*
+	 * Initialize bte info pointers to NULL
+	 */
+	for (i = 0; i < BTES_PER_NODE; i++) {
+		npda->node_bte_info[i] = (bteinfo_t *)NULL;
+	}
+#endif
+#endif /* CONFIG_IA64_SGI_IO */
+}
+
+/* XXX - Move the interrupt stuff to intr.c ? */
+/*
+ * Set up the platform-dependent fields in the processor pda.
+ * Must be done _after_ init_platform_nodepda().
+ * If we need a lock here, something else is wrong!
+ */
+// void init_platform_pda(pda_t *ppda, cpuid_t cpu)
+void init_platform_pda(cpuid_t cpu)
+{
+	hub_intmasks_t *intmasks;
+	cpuinfo_t cpuinfo;
+	int i;
+	cnodeid_t	cnode;
+	synergy_da_t	*sda;
+	int	which_synergy;
+
+#ifndef CONFIG_IA64_SGI_IO
+	/* Allocate per-cpu platform-dependent data */
+	cpuinfo = (cpuinfo_t)kmem_alloc_node(sizeof(struct cpuinfo_s), GFP_ATOMIC, cputocnode(cpu));
+	ASSERT_ALWAYS(cpuinfo);
+	ppda->pdinfo = (void *)cpuinfo;
+	cpuinfo->ci_cpupda = ppda;
+	cpuinfo->ci_cpuid = cpu;
+#endif
+
+	cnode = cpuid_to_cnodeid(cpu);
+	which_synergy = cpuid_to_synergy(cpu);
+	sda = Synergy_da_indr[(cnode * 2) + which_synergy];
+	// intmasks = &ppda->p_intmasks;
+	intmasks = &sda->s_intmasks;
+
+#ifndef CONFIG_IA64_SGI_IO
+	ASSERT_ALWAYS(&ppda->p_nodepda);
+#endif
+
+	/* Clear INT_PEND0 masks. */
+	for (i = 0; i < N_INTPEND0_MASKS; i++)
+		intmasks->intpend0_masks[i] = 0;
+
+	/* Set up pointer to the vector block in the nodepda. */
+	/* (Cant use SUBNODEPDA - not working yet) */
+	intmasks->dispatch0 = &Nodepdaindr[cnode]->snpda[cputosubnode(cpu)].intr_dispatch0;
+	intmasks->dispatch1 = &Nodepdaindr[cnode]->snpda[cputosubnode(cpu)].intr_dispatch1;
+
+	/* Clear INT_PEND1 masks. */
+	for (i = 0; i < N_INTPEND1_MASKS; i++)
+		intmasks->intpend1_masks[i] = 0;
+
+
+#ifndef CONFIG_IA64_SGI_IO
+	/* Don't read the routers unless we're the master. */
+	ppda->p_routertick = 0;
+#endif
+
+}
+
+#if (defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)) && !defined(BRINGUP)	/* protect low mem for IP35/7 */
+#error "need protect_hub_calias, protect_nmi_handler_data"
+#endif
+
+#ifndef CONFIG_IA64_SGI_IO
+/*
+ * For now, just protect the first page (exception handlers). We
+ * may want to protect more stuff later.
+ */
+void
+protect_hub_calias(nasid_t nasid)
+{
+	paddr_t pa = NODE_OFFSET(nasid) + 0; /* page 0 on node nasid */
+	int i;
+
+	for (i = 0; i < MAX_REGIONS; i++) {
+		if (i == nasid_to_region(nasid))
+			continue;
+#ifndef BRINGUP
+		/* Protect the exception handlers. */
+		*(__psunsigned_t *)BDPRT_ENTRY(pa, i) = MD_PROT_NO;
+
+		/* Protect the ARCS SPB. */
+		*(__psunsigned_t *)BDPRT_ENTRY(pa + 4096, i) = MD_PROT_NO;
+#endif
+	}
+}
+
+/*
+ * Protect the page of low memory used to communicate with the NMI handler.
+ */
+void
+protect_nmi_handler_data(nasid_t nasid, int slice)
+{
+	paddr_t pa = NODE_OFFSET(nasid) + NMI_OFFSET(nasid, slice);
+	int i;
+
+	for (i = 0; i < MAX_REGIONS; i++) {
+		if (i == nasid_to_region(nasid))
+			continue;
+#ifndef BRINGUP
+		*(__psunsigned_t *)BDPRT_ENTRY(pa, i) = MD_PROT_NO;
+#endif
+	}
+}
+#endif /* CONFIG_IA64_SGI_IO */
+
+
+#ifdef IRIX
+/*
+ * Protect areas of memory that we access uncached by marking them as
+ * poisoned so the T5 can't read them speculatively and erroneously
+ * mark them dirty in its cache only to write them back with old data
+ * later.
+ */
+static void
+protect_low_memory(nasid_t nasid)
+{
+	/* Protect low memory directory */
+	poison_state_alter_range(KLDIR_ADDR(nasid), KLDIR_SIZE, 1);
+
+	/* Protect klconfig area */
+	poison_state_alter_range(KLCONFIG_ADDR(nasid), KLCONFIG_SIZE(nasid), 1);
+
+	/* Protect the PI error spool area. */
+	poison_state_alter_range(PI_ERROR_ADDR(nasid), PI_ERROR_SIZE(nasid), 1);
+
+	/* Protect CPU A's cache error eframe area. */
+	poison_state_alter_range(TO_NODE_UNCAC(nasid, CACHE_ERR_EFRAME),
+				CACHE_ERR_AREA_SIZE, 1);
+
+	/* Protect CPU B's area */
+	poison_state_alter_range(TO_NODE_UNCAC(nasid, CACHE_ERR_EFRAME)
+				^ UALIAS_FLIP_BIT,
+				CACHE_ERR_AREA_SIZE, 1);
+#error "SN1 not handled correctly"
+}
+#endif	/* IRIX */
+
+/*
+ * per_hub_init
+ *
+ * 	This code is executed once for each Hub chip.
+ */
+void
+per_hub_init(cnodeid_t cnode)
+{
+	uint64_t	done;
+	nasid_t		nasid;
+	nodepda_t	*npdap;
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)	/* SN1 specific */
+	ii_icmr_u_t	ii_icmr;
+	ii_ibcr_u_t	ii_ibcr;
+#endif
+#ifndef CONFIG_IA64_SGI_IO
+	int i;
+#endif
+
+#ifdef SIMULATED_KLGRAPH
+	compact_to_nasid_node[0] = 0;
+	nasid_to_compact_node[0] = 0;
+	FIXME("per_hub_init: SIMULATED_KLCONFIG: compact_to_nasid_node[0] = 0\n");
+#endif /* SIMULATED_KLGRAPH */
+	nasid = COMPACT_TO_NASID_NODEID(cnode);
+
+	ASSERT(nasid != INVALID_NASID);
+	ASSERT(NASID_TO_COMPACT_NODEID(nasid) == cnode);
+
+	/* Grab the hub_mask lock. */
+	spin_lock(&hub_mask_lock);
+
+	/* Test our bit. */
+	if (!(done = CNODEMASK_TSTB(hub_init_mask, cnode))) {
+
+		/* Turn our bit on in the mask. */
+		CNODEMASK_SETB(hub_init_mask, cnode);
+	}
+
+#if defined(SN0_HWDEBUG)
+	hub_config_setup();
+#endif
+	/* Release the hub_mask lock. */
+	spin_unlock(&hub_mask_lock);
+
+	/*
+	 * Do the actual initialization if it hasn't been done yet.
+	 * We don't need to hold a lock for this work.
+	 */
+	if (!done) {
+		npdap = NODEPDA(cnode);
+
+		npdap->hub_chip_rev = get_hub_chiprev(nasid);
+
+#ifndef CONFIG_IA64_SGI_IO
+		for (i = 0; i < CPUS_PER_NODE; i++) {
+			cpu = cnode_slice_to_cpuid(cnode, i);
+			if (!cpu_enabled(cpu))
+			    SET_CPU_LEDS(nasid, i, 0xf);
+		}
+#endif /* CONFIG_IA64_SGI_IO */
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC) /* SN1 specific */
+
+		/*
+		 * Set the total number of CRBs that can be used.
+		 */
+		ii_icmr.ii_icmr_regval= 0x0;
+		ii_icmr.ii_icmr_fld_s.i_c_cnt = 0xF;
+		REMOTE_HUB_S(nasid, IIO_ICMR, ii_icmr.ii_icmr_regval);
+
+		/*
+		 * Set the number of CRBs that both of the BTEs combined
+		 * can use minus 1.
+		 */
+		ii_ibcr.ii_ibcr_regval= 0x0;
+		ii_ibcr.ii_ibcr_fld_s.i_count = 0x8;
+		REMOTE_HUB_S(nasid, IIO_IBCR, ii_ibcr.ii_ibcr_regval);
+
+		/*
+		 * Set CRB timeout to be 10ms.
+		 */
+		REMOTE_HUB_S(nasid, IIO_ICTP, 0x1000 );
+		REMOTE_HUB_S(nasid, IIO_ICTO, 0xff);
+
+#endif /* SN0_HWDEBUG */
+
+
+#ifndef CONFIG_IA64_SGI_IO
+
+		/* Reserve all of the hardwired interrupt levels. */
+		intr_reserve_hardwired(cnode);
+
+		/* Initialize error interrupts for this hub. */
+		hub_error_init(cnode);
+
+		/* Set up correctable memory/directory ECC error interrupt. */
+		install_eccintr(cnode);
+
+		/* Protect our exception vectors from accidental corruption. */
+		protect_hub_calias(nasid);
+
+		/* Enable RT clock interrupts */
+		hub_rtc_init(cnode);
+		hub_migrintr_init(cnode); /* Enable migration interrupt */
+#endif
+
+		spin_lock(&hub_mask_lock);
+		CNODEMASK_SETB(hub_init_done_mask, cnode);
+		spin_unlock(&hub_mask_lock);
+
+	} else {
+		/*
+		 * Wait for the other CPU to complete the initialization.
+		 */
+		while (CNODEMASK_TSTB(hub_init_done_mask, cnode) == 0)
+			/* LOOP */
+			;
+	}
+}
+
+extern void
+update_node_information(cnodeid_t cnodeid)
+{
+	nodepda_t *npda = NODEPDA(cnodeid);
+	nodepda_router_info_t *npda_rip;
+	
+	/* Go through the list of router info 
+	 * structures and copy some frequently
+	 * accessed info from the info hanging
+	 * off the corresponding router vertices
+	 */
+	npda_rip = npda->npda_rip_first;
+	while(npda_rip) {
+		if (npda_rip->router_infop) {
+			npda_rip->router_portmask = 
+				npda_rip->router_infop->ri_portmask;
+			npda_rip->router_slot = 
+				npda_rip->router_infop->ri_slotnum;
+		} else {
+			/* No router, no ports. */
+			npda_rip->router_portmask = 0;
+		}
+		npda_rip = npda_rip->router_next;
+	}
+}
+
+hubreg_t
+get_region(cnodeid_t cnode)
+{
+	if (fine_mode)
+		return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_FINEREG_SHFT;
+	else
+		return COMPACT_TO_NASID_NODEID(cnode) >> NASID_TO_COARSEREG_SHFT;
+}
+
+hubreg_t
+nasid_to_region(nasid_t nasid)
+{
+	if (fine_mode)
+		return nasid >> NASID_TO_FINEREG_SHFT;
+	else
+		return nasid >> NASID_TO_COARSEREG_SHFT;
+}
+
diff --git a/arch/ia64/sn/io/ml_SN_intr.c b/arch/ia64/sn/io/ml_SN_intr.c
new file mode 100644
index 000000000..c643b6e8b
--- /dev/null
+++ b/arch/ia64/sn/io/ml_SN_intr.c
@@ -0,0 +1,1730 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Alan Mayer
+ */
+
+/*
+ * intr.c-
+ *	This file contains all of the routines necessary to set up and
+ *	handle interrupts on an IP27 board.
+ */
+
+#ident  "$Revision: 1.167 $"
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/smp.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/nodemask.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/synergy.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/pci/pciio.h>
+#include <asm/sn/pci/pcibr.h>
+#include <asm/sn/xtalk/xtalk.h>
+#include <asm/sn/pci/pcibr_private.h>
+
+#if DEBUG_INTR_TSTAMP_DEBUG
+#include <sys/debug.h>
+#include <sys/idbg.h>
+#include <sys/inst.h>
+void do_splx_log(int, int);
+void spldebug_log_event(int);
+#endif
+
+// FIXME - BRINGUP
+#ifdef CONFIG_SMP
+extern unsigned long cpu_online_map;
+#endif
+#define cpu_allows_intr(cpu)	(1)
+// If I understand what's going on with this, 32 should work.
+// physmem_maxradius seems to be the maximum number of router
+// hops to get from one end of the system to the other.  With
+// a maximally configured machine, with the dumbest possible
+// topology, we would make 32 router hops.  For what we're using
+// it for, the dumbest possible should suffice.
+#define physmem_maxradius()	32
+
+#define SUBNODE_ANY -1
+
+extern int	nmied;
+extern int	hub_intr_wakeup_cnt;
+extern synergy_da_t	*Synergy_da_indr[];
+extern cpuid_t         master_procid;
+
+extern cnodeid_t master_node_get(devfs_handle_t vhdl);
+
+
+#define INTR_LOCK(vecblk) \
+     (s = mutex_spinlock(&(vecblk)->vector_lock))
+#define INTR_UNLOCK(vecblk) \
+      mutex_spinunlock(&(vecblk)->vector_lock, s)
+
+/*
+ * REACT/Pro
+ */
+
+
+
+/* 
+ * Find first bit set 
+ * Used outside this file also 
+ */
+int ms1bit(unsigned long x)
+{
+    int			b;
+
+    if (x >> 32)	b  = 32, x >>= 32;
+    else		b  =  0;
+    if (x >> 16)	b += 16, x >>= 16;
+    if (x >>  8)	b +=  8, x >>=  8;
+    if (x >>  4)	b +=  4, x >>=  4;
+    if (x >>  2)	b +=  2, x >>=  2;
+
+    return b + (int) (x >> 1);
+}
+
+/* ARGSUSED */
+void
+intr_stray(void *lvl)
+{
+    printk("Stray Interrupt - level %ld to cpu %d", (long)lvl, cpuid());
+}
+
+#if defined(DEBUG)
+
+/* Infrastructure  to gather the device - target cpu mapping info */
+#define MAX_DEVICES	1000	/* Reasonable large number . Need not be 
+				 * the exact maximum # devices possible.
+				 */
+#define MAX_NAME	100	
+typedef struct {
+	dev_t		dev;	/* device */
+	cpuid_t		cpuid;	/* target cpu */
+	cnodeid_t	cnodeid;/* node on which the target cpu is present */
+	int		bit;	/* intr bit reserved */
+	char		intr_name[MAX_NAME]; /* name of the interrupt */
+} intr_dev_targ_map_t;
+
+intr_dev_targ_map_t 	intr_dev_targ_map[MAX_DEVICES];
+uint64_t		intr_dev_targ_map_size;
+lock_t			intr_dev_targ_map_lock;
+
+/* Print out the device - target cpu mapping.
+ * This routine is used only in the idbg command
+ * "intrmap" 
+ */
+void
+intr_dev_targ_map_print(cnodeid_t cnodeid)
+{
+	int  i,j,size = 0;
+	int  print_flag = 0,verbose = 0;	
+	char node_name[10];
+	
+	if (cnodeid != CNODEID_NONE) {
+		nodepda_t 	*npda;
+
+		npda = NODEPDA(cnodeid);
+		for (j=0; j<NUM_SUBNODES; j++) {
+			qprintf("\n SUBNODE %d\n INT_PEND0: ", j);
+			for(i = 0 ; i < N_INTPEND_BITS ; i++)
+				qprintf("%d",SNPDA(npda,j)->intr_dispatch0.info[i].ii_flags);
+			qprintf("\n INT_PEND1: ");
+			for(i = 0 ; i < N_INTPEND_BITS ; i++)
+				qprintf("%d",SNPDA(npda,j)->intr_dispatch1.info[i].ii_flags);
+		}
+		verbose = 1;
+	}
+	qprintf("\n Device - Target Map [Interrupts: %s Node%s]\n\n",
+		(verbose ? "All" : "Non-hardwired"),
+		(cnodeid == CNODEID_NONE) ? "s: All" : node_name); 
+		
+	qprintf("Device\tCpu\tCnode\tIntr_bit\tIntr_name\n");
+	for (i = 0 ; i < intr_dev_targ_map_size ; i++) {
+
+		print_flag = 0;
+		if (verbose) {
+			if (cnodeid != CNODEID_NONE) {
+				if (cnodeid == intr_dev_targ_map[i].cnodeid)
+					print_flag = 1;
+			} else {
+				print_flag = 1;
+			}
+		} else {
+			if (intr_dev_targ_map[i].dev != 0) {
+				if (cnodeid != CNODEID_NONE) {
+					if (cnodeid == 
+					    intr_dev_targ_map[i].cnodeid)
+						print_flag = 1;
+				} else {
+					print_flag = 1;
+				}
+			}
+		}
+		if (print_flag) {
+			size++;
+			qprintf("%d\t%d\t%d\t%d\t%s\n",
+				intr_dev_targ_map[i].dev,
+				intr_dev_targ_map[i].cpuid,
+				intr_dev_targ_map[i].cnodeid,
+				intr_dev_targ_map[i].bit,
+				intr_dev_targ_map[i].intr_name);
+		}
+
+	}
+	qprintf("\nTotal : %d\n",size);
+}
+#endif /* DEBUG */
+
+/*
+ * The spinlocks have already been initialized.  Now initialize the interrupt
+ * vectors.  One processor on each hub does the work.
+ */
+void
+intr_init_vecblk(nodepda_t *npda, cnodeid_t node, int sn)
+{
+    int			i, ip=0;
+    intr_vecblk_t	*vecblk;
+    subnode_pda_t	*snpda;
+
+
+    snpda = SNPDA(npda,sn);
+    do {
+	if (ip == 0) {
+	    vecblk = &snpda->intr_dispatch0;
+	} else {
+	    vecblk = &snpda->intr_dispatch1;
+	}
+
+	/* Initialize this vector. */
+	for (i = 0; i < N_INTPEND_BITS; i++) {
+		vecblk->vectors[i].iv_func = intr_stray;
+		vecblk->vectors[i].iv_prefunc = NULL;
+		vecblk->vectors[i].iv_arg = (void *)(__psint_t)(ip * N_INTPEND_BITS + i);
+
+		vecblk->info[i].ii_owner_dev = 0;
+		strcpy(vecblk->info[i].ii_name, "Unused");
+		vecblk->info[i].ii_flags = 0;	/* No flags */
+		vecblk->vectors[i].iv_mustruncpu = -1; /* No CPU yet. */
+
+	    }
+
+	spinlock_init(&vecblk->vector_lock, "ivecb");
+
+	vecblk->vector_count = 0;    
+	for (i = 0; i < CPUS_PER_SUBNODE; i++)
+		vecblk->cpu_count[i] = 0;
+
+	vecblk->vector_state = VECTOR_UNINITED;
+
+    } while (++ip < 2);
+
+}
+
+
+/*
+ * do_intr_reserve_level(cpuid_t cpu, int bit, int resflags, int reserve, 
+ *					devfs_handle_t owner_dev, char *name)
+ *	Internal work routine to reserve or unreserve an interrupt level.
+ *		cpu is the CPU to which the interrupt will be sent.
+ *		bit is the level bit to reserve.  -1 means any level
+ *		resflags should include II_ERRORINT if this is an
+ *			error interrupt, II_THREADED if the interrupt handler
+ *			will be threaded, or 0 otherwise.
+ *		reserve should be set to II_RESERVE or II_UNRESERVE
+ *			to get or clear a reservation.
+ *		owner_dev is the device that "owns" this interrupt, if supplied
+ *		name is a human-readable name for this interrupt, if supplied
+ *	intr_reserve_level returns the bit reserved or -1 to indicate an error
+ */
+static int
+do_intr_reserve_level(cpuid_t cpu, int bit, int resflags, int reserve, 
+					devfs_handle_t owner_dev, char *name)
+{
+    intr_vecblk_t	*vecblk;
+    hub_intmasks_t 	*hub_intmasks;
+    int s;
+    int rv = 0;
+    int ip;
+    synergy_da_t	*sda;
+    int		which_synergy;
+    cnodeid_t	cnode;
+
+    ASSERT(bit < N_INTPEND_BITS * 2);
+
+    cnode = cpuid_to_cnodeid(cpu);
+    which_synergy = cpuid_to_synergy(cpu);
+    sda = Synergy_da_indr[(cnode * 2) + which_synergy];
+    hub_intmasks = &sda->s_intmasks;
+    // hub_intmasks = &pdaindr[cpu].pda->p_intmasks;
+
+    // if (pdaindr[cpu].pda == NULL) return -1;
+    if ((bit < N_INTPEND_BITS) && !(resflags & II_ERRORINT)) {
+	vecblk = hub_intmasks->dispatch0;
+	ip = 0;
+    } else {
+	ASSERT((bit >= N_INTPEND_BITS) || (bit == -1));
+	bit -= N_INTPEND_BITS;	/* Get position relative to INT_PEND1 reg. */
+	vecblk = hub_intmasks->dispatch1;
+	ip = 1;
+    }
+
+    INTR_LOCK(vecblk);
+
+    if (bit <= -1) {
+	// bit = 0;
+	bit = 7;  /* First available on SNIA */
+	ASSERT(reserve == II_RESERVE);
+	/* Choose any available level */
+	for (; bit < N_INTPEND_BITS; bit++) {
+	    if (!(vecblk->info[bit].ii_flags & II_RESERVE)) {
+		rv = bit;
+		break;
+	    }
+	}
+
+	/* Return -1 if all interrupt levels int this register are taken. */
+	if (bit == N_INTPEND_BITS)
+	    rv = -1;
+
+    } else {
+	/* Reserve a particular level if it's available. */
+	if ((vecblk->info[bit].ii_flags & II_RESERVE) == reserve) {
+	    /* Can't (un)reserve a level that's already (un)reserved. */
+	    rv = -1;
+	} else {
+	    rv = bit;
+	}
+    }
+
+    /* Reserve the level and bump the count. */
+    if (rv != -1) {
+	if (reserve) {
+	    int maxlen = sizeof(vecblk->info[bit].ii_name) - 1;
+	    int namelen;
+	    vecblk->info[bit].ii_flags |= (II_RESERVE | resflags);
+	    vecblk->info[bit].ii_owner_dev = owner_dev;
+	    /* Copy in the name. */
+	    namelen = name ? strlen(name) : 0;
+	    strncpy(vecblk->info[bit].ii_name, name, MIN(namelen, maxlen)); 
+	    vecblk->info[bit].ii_name[maxlen] = '\0';
+	    vecblk->vector_count++;
+	} else {
+	    vecblk->info[bit].ii_flags = 0;	/* Clear all the flags */
+	    vecblk->info[bit].ii_owner_dev = 0;
+	    /* Clear the name. */
+	    vecblk->info[bit].ii_name[0] = '\0';
+	    vecblk->vector_count--;
+	}
+    }
+
+    INTR_UNLOCK(vecblk);
+
+#if defined(DEBUG)
+    if (rv >= 0) {
+	    int namelen = name ? strlen(name) : 0;
+	    /* Gather this device - target cpu mapping information
+	     * in a table which can be used later by the idbg "intrmap"
+	     * command
+	     */
+	    s = mutex_spinlock(&intr_dev_targ_map_lock);
+	    if (intr_dev_targ_map_size < MAX_DEVICES) {
+		    intr_dev_targ_map_t	*p;
+
+		    p 		= &intr_dev_targ_map[intr_dev_targ_map_size];
+		    p->dev  	= owner_dev;
+		    p->cpuid 	= cpu; 
+		    p->cnodeid 	= cputocnode(cpu); 
+		    p->bit 	= ip * N_INTPEND_BITS + rv;
+		    strncpy(p->intr_name,
+			    name,
+			    MIN(MAX_NAME,namelen));
+		    intr_dev_targ_map_size++;
+	    }
+	    mutex_spinunlock(&intr_dev_targ_map_lock,s);
+    }
+#endif /* DEBUG */
+
+    return (((rv == -1) ? rv : (ip * N_INTPEND_BITS) + rv)) ;
+}
+
+
+/*
+ * WARNING:  This routine should only be called from within ml/SN.
+ *	Reserve an interrupt level.
+ */
+int
+intr_reserve_level(cpuid_t cpu, int bit, int resflags, devfs_handle_t owner_dev, char *name)
+{
+	return(do_intr_reserve_level(cpu, bit, resflags, II_RESERVE, owner_dev, name));
+}
+
+
+/*
+ * WARNING:  This routine should only be called from within ml/SN.
+ *	Unreserve an interrupt level.
+ */
+void
+intr_unreserve_level(cpuid_t cpu, int bit)
+{
+	(void)do_intr_reserve_level(cpu, bit, 0, II_UNRESERVE, 0, NULL);
+}
+
+/*
+ * Get values that vary depending on which CPU and bit we're operating on
+ */
+static hub_intmasks_t *
+intr_get_ptrs(cpuid_t cpu, int bit,
+	      int *new_bit,		/* Bit relative to the register */
+	      hubreg_t **intpend_masks, /* Masks for this register */
+	      intr_vecblk_t **vecblk,	/* Vecblock for this interrupt */
+	      int *ip)			/* Which intpend register */
+{
+	hub_intmasks_t *hub_intmasks;
+	synergy_da_t	*sda;
+	int		which_synergy;
+	cnodeid_t	cnode;
+
+	ASSERT(bit < N_INTPEND_BITS * 2);
+
+	cnode = cpuid_to_cnodeid(cpu);
+	which_synergy = cpuid_to_synergy(cpu);
+	sda = Synergy_da_indr[(cnode * 2) + which_synergy];
+	hub_intmasks = &sda->s_intmasks;
+
+	// hub_intmasks = &pdaindr[cpu].pda->p_intmasks;
+
+	if (bit < N_INTPEND_BITS) {
+		*intpend_masks = hub_intmasks->intpend0_masks;
+		*vecblk = hub_intmasks->dispatch0;
+		*ip = 0;
+		*new_bit = bit;
+	} else {
+		*intpend_masks = hub_intmasks->intpend1_masks;
+		*vecblk = hub_intmasks->dispatch1;
+		*ip = 1;
+		*new_bit = bit - N_INTPEND_BITS;
+	}
+
+	return hub_intmasks;
+}
+
+
+/*
+ * intr_connect_level(cpuid_t cpu, int bit, ilvl_t intr_swlevel, 
+ *		intr_func_t intr_func, void *intr_arg);
+ *	This is the lowest-level interface to the interrupt code.  It shouldn't
+ *	be called from outside the ml/SN directory.
+ *	intr_connect_level hooks up an interrupt to a particular bit in
+ *	the INT_PEND0/1 masks.  Returns 0 on success.
+ *		cpu is the CPU to which the interrupt will be sent.
+ *		bit is the level bit to connect to
+ *		intr_swlevel tells which software level to use
+ *		intr_func is the interrupt handler
+ *		intr_arg is an arbitrary argument interpreted by the handler
+ *		intr_prefunc is a prologue function, to be called
+ *			with interrupts disabled, to disable
+ *			the interrupt at source.  It is called
+ *			with the same argument.  Should be NULL for
+ *			typical interrupts, which can be masked
+ *			by the infrastructure at the level bit.
+ *	intr_connect_level returns 0 on success or nonzero on an error
+ */
+/* ARGSUSED */
+int
+intr_connect_level(cpuid_t cpu, int bit, ilvl_t intr_swlevel, 
+		intr_func_t intr_func, void *intr_arg,
+		intr_func_t intr_prefunc)
+{
+    intr_vecblk_t	*vecblk;
+    hubreg_t		*intpend_masks;
+    int s;
+    int rv = 0;
+    int ip;
+
+    ASSERT(bit < N_INTPEND_BITS * 2);
+
+    (void)intr_get_ptrs(cpu, bit, &bit, &intpend_masks,
+				 &vecblk, &ip);
+
+    INTR_LOCK(vecblk);
+
+    if ((vecblk->info[bit].ii_flags & II_INUSE) ||
+	(!(vecblk->info[bit].ii_flags & II_RESERVE))) {
+	/* Can't assign to a level that's in use or isn't reserved. */
+	rv = -1;
+    } else {
+	/* Stuff parameters into vector and info */
+	vecblk->vectors[bit].iv_func = intr_func;
+	vecblk->vectors[bit].iv_prefunc = intr_prefunc;
+	vecblk->vectors[bit].iv_arg = intr_arg;
+	vecblk->info[bit].ii_flags |= II_INUSE;
+    }
+
+    /* Now stuff the masks if everything's okay. */
+    if (!rv) {
+	int lslice;
+	volatile hubreg_t *mask_reg;
+	// nasid_t nasid = COMPACT_TO_NASID_NODEID(cputocnode(cpu));
+	nasid_t nasid = cpuid_to_nasid(cpu);
+	int	subnode = cpuid_to_subnode(cpu);
+
+	/* Make sure it's not already pending when we connect it. */
+	REMOTE_HUB_PI_CLR_INTR(nasid, subnode, bit + ip * N_INTPEND_BITS);
+
+	intpend_masks[0] |= (1ULL << (uint64_t)bit);
+
+	lslice = cputolocalslice(cpu);
+	vecblk->cpu_count[lslice]++;
+#if SN1
+	/*
+	 * On SN1, there are 8 interrupt mask registers per node:
+	 * 	PI_0 MASK_0 A
+	 * 	PI_0 MASK_1 A
+	 * 	PI_0 MASK_0 B
+	 * 	PI_0 MASK_1 B
+	 * 	PI_1 MASK_0 A
+	 * 	PI_1 MASK_1 A
+	 * 	PI_1 MASK_0 B
+	 * 	PI_1 MASK_1 B
+	 */
+#endif
+	if (ip == 0) {
+		mask_reg = REMOTE_HUB_PI_ADDR(nasid, subnode, 
+		        PI_INT_MASK0_A + PI_INT_MASK_OFFSET * lslice);
+	} else {
+		mask_reg = REMOTE_HUB_PI_ADDR(nasid, subnode,
+			PI_INT_MASK1_A + PI_INT_MASK_OFFSET * lslice);
+	}
+
+	HUB_S(mask_reg, intpend_masks[0]);
+    }
+
+    INTR_UNLOCK(vecblk);
+
+    return rv;
+}
+
+
+/*
+ * intr_disconnect_level(cpuid_t cpu, int bit)
+ *
+ *	This is the lowest-level interface to the interrupt code.  It should
+ *	not be called from outside the ml/SN directory.
+ *	intr_disconnect_level removes a particular bit from an interrupt in
+ * 	the INT_PEND0/1 masks.  Returns 0 on success or nonzero on failure.
+ */
+int
+intr_disconnect_level(cpuid_t cpu, int bit)
+{
+    intr_vecblk_t	*vecblk;
+    hubreg_t		*intpend_masks;
+    int s;
+    int rv = 0;
+    int ip;
+
+    (void)intr_get_ptrs(cpu, bit, &bit, &intpend_masks,
+				 &vecblk, &ip);
+
+    INTR_LOCK(vecblk);
+
+    if ((vecblk->info[bit].ii_flags & (II_RESERVE | II_INUSE)) !=
+	((II_RESERVE | II_INUSE))) {
+	/* Can't remove a level that's not in use or isn't reserved. */
+	rv = -1;
+    } else {
+	/* Stuff parameters into vector and info */
+	vecblk->vectors[bit].iv_func = (intr_func_t)NULL;
+	vecblk->vectors[bit].iv_prefunc = (intr_func_t)NULL;
+	vecblk->vectors[bit].iv_arg = 0;
+	vecblk->info[bit].ii_flags &= ~II_INUSE;
+#ifdef BASE_ITHRTEAD
+	vecblk->vectors[bit].iv_mustruncpu = -1; /* No mustrun CPU any more. */
+#endif
+    }
+
+    /* Now clear the masks if everything's okay. */
+    if (!rv) {
+	int lslice;
+	volatile hubreg_t *mask_reg;
+
+	intpend_masks[0] &= ~(1ULL << (uint64_t)bit);
+	lslice = cputolocalslice(cpu);
+	vecblk->cpu_count[lslice]--;
+	mask_reg = REMOTE_HUB_PI_ADDR(COMPACT_TO_NASID_NODEID(cputocnode(cpu)), 
+				   cpuid_to_subnode(cpu),
+				   ip == 0 ? PI_INT_MASK0_A : PI_INT_MASK1_A);
+	mask_reg = (volatile hubreg_t *)((__psunsigned_t)mask_reg +
+					(PI_INT_MASK_OFFSET * lslice));
+	*mask_reg = intpend_masks[0];
+    }
+
+    INTR_UNLOCK(vecblk);
+
+    return rv;
+}
+
+/*
+ * Actually block or unblock an interrupt
+ */
+void
+do_intr_block_bit(cpuid_t cpu, int bit, int block)
+{
+	intr_vecblk_t *vecblk;
+	int s;
+	int ip;
+	hubreg_t *intpend_masks;
+	volatile hubreg_t mask_value;
+	volatile hubreg_t *mask_reg;
+
+	intr_get_ptrs(cpu, bit, &bit, &intpend_masks, &vecblk, &ip);
+
+	INTR_LOCK(vecblk);
+
+	if (block)
+		/* Block */
+		intpend_masks[0] &= ~(1ULL << (uint64_t)bit);
+	else
+		/* Unblock */
+		intpend_masks[0] |= (1ULL << (uint64_t)bit);
+
+	if (ip == 0) {
+		mask_reg = REMOTE_HUB_PI_ADDR(COMPACT_TO_NASID_NODEID(cputocnode(cpu)), 
+		        cpuid_to_subnode(cpu), PI_INT_MASK0_A);
+	} else {
+		mask_reg = REMOTE_HUB_PI_ADDR(COMPACT_TO_NASID_NODEID(cputocnode(cpu)), 
+			cpuid_to_subnode(cpu), PI_INT_MASK1_A);
+	}
+
+	HUB_S(mask_reg, intpend_masks[0]);
+
+	/*
+	 * Wait for it to take effect.  (One read should suffice.)
+	 * This is only necessary when blocking an interrupt
+	 */
+	if (block)
+		while ((mask_value = HUB_L(mask_reg)) != intpend_masks[0])
+			;
+
+	INTR_UNLOCK(vecblk);
+}
+
+
+/*
+ * Block a particular interrupt (cpu/bit pair).
+ */
+/* ARGSUSED */
+void
+intr_block_bit(cpuid_t cpu, int bit)
+{
+	do_intr_block_bit(cpu, bit, 1);
+}
+
+
+/*
+ * Unblock a particular interrupt (cpu/bit pair).
+ */
+/* ARGSUSED */
+void
+intr_unblock_bit(cpuid_t cpu, int bit)
+{
+	do_intr_block_bit(cpu, bit, 0);
+}
+
+
+/* verifies that the specified CPUID is on the specified SUBNODE (if any) */
+#define cpu_on_subnode(cpuid, which_subnode) \
+	   (((which_subnode) == SUBNODE_ANY) || (cpuid_to_subnode(cpuid) == (which_subnode)))
+
+
+/*
+ * Choose one of the CPUs on a specified node or subnode to receive
+ * interrupts. Don't pick a cpu which has been specified as a NOINTR cpu.
+ *
+ * Among all acceptable CPUs, the CPU that has the fewest total number
+ * of interrupts targetted towards it is chosen.  Note that we never
+ * consider how frequent each of these interrupts might occur, so a rare
+ * hardware error interrupt is weighted equally with a disk interrupt.
+ */
+static cpuid_t
+do_intr_cpu_choose(cnodeid_t cnode, int which_subnode)
+{
+	cpuid_t 	cpu, best_cpu = CPU_NONE;
+	int		slice, min_count=1000;
+
+	min_count = 1000;
+	for (slice=0; slice < CPUS_PER_NODE; slice++) {
+		intr_vecblk_t 	*vecblk0, *vecblk1;
+		int total_intrs_to_slice;
+		subnode_pda_t *snpda;
+		int local_cpu_num;
+
+		cpu = cnode_slice_to_cpuid(cnode, slice);
+		cpu = cpu_logical_id(cpu);
+		if (cpu == CPU_NONE)
+			continue;
+
+		/* If this cpu isn't enabled for interrupts, skip it */
+		if (!cpu_enabled(cpu) || !cpu_allows_intr(cpu))
+			continue;
+
+		/* If this isn't the right subnode, skip it */
+		if (!cpu_on_subnode(cpu, which_subnode))
+			continue;
+
+		/* OK, this one's a potential CPU for interrupts */
+		snpda = SUBNODEPDA(cnode,SUBNODE(slice));
+		vecblk0 = &snpda->intr_dispatch0;
+		vecblk1 = &snpda->intr_dispatch1;
+		local_cpu_num = LOCALCPU(slice);
+		total_intrs_to_slice = vecblk0->cpu_count[local_cpu_num] +
+		              vecblk1->cpu_count[local_cpu_num];
+
+		if (min_count > total_intrs_to_slice) {
+			min_count = total_intrs_to_slice;
+			best_cpu = cpu;
+		}
+	}
+	return best_cpu;
+}
+
+/*
+ * Choose an appropriate interrupt target CPU on a specified node.
+ * If which_subnode is SUBNODE_ANY, then subnode is not considered.
+ * Otherwise, the chosen CPU must be on the specified subnode.
+ */
+static cpuid_t
+intr_cpu_choose_from_node(cnodeid_t cnode, int which_subnode)
+{
+	return(do_intr_cpu_choose(cnode, which_subnode));
+}
+
+
+#ifndef CONFIG_IA64_SGI_IO
+/*
+ * Convert a subnode vertex into a (cnodeid, which_subnode) pair.
+ * Return 0 on success, non-zero on failure.
+ */
+static int
+subnodevertex_to_subnode(devfs_handle_t vhdl, cnodeid_t *cnodeidp, int *which_subnodep)
+{
+	arbitrary_info_t which_subnode;
+	cnodeid_t cnodeid;
+
+	/* Try to grab subnode information */
+	if (hwgraph_info_get_LBL(vhdl, INFO_LBL_CPUBUS, &which_subnode) != GRAPH_SUCCESS)
+		return(-1);
+
+	/* On which node? */
+	cnodeid = master_node_get(vhdl);
+	if (cnodeid == CNODEID_NONE)
+		return(-1);
+
+	*which_subnodep = (int)which_subnode;
+	*cnodeidp = cnodeid;
+	return(0); /* success */
+}
+
+#endif /* CONFIG_IA64_SGI_IO */
+
+/* Make it easy to identify subnode vertices in the hwgraph */
+void
+mark_subnodevertex_as_subnode(devfs_handle_t vhdl, int which_subnode)
+{
+	graph_error_t rv;
+
+	ASSERT(0 <= which_subnode);
+	ASSERT(which_subnode < NUM_SUBNODES);
+
+	rv = hwgraph_info_add_LBL(vhdl, INFO_LBL_CPUBUS, (arbitrary_info_t)which_subnode);
+	ASSERT_ALWAYS(rv == GRAPH_SUCCESS);
+
+	rv = hwgraph_info_export_LBL(vhdl, INFO_LBL_CPUBUS, sizeof(arbitrary_info_t));
+	ASSERT_ALWAYS(rv == GRAPH_SUCCESS);
+}
+
+
+#ifndef CONFIG_IA64_SGI_IO
+/*
+ * Given a device descriptor, extract interrupt target information and
+ * choose an appropriate CPU.  Return CPU_NONE if we can't make sense
+ * out of the target information.
+ * TBD: Should this be considered platform-independent code?
+ */
+static cpuid_t
+intr_target_from_desc(device_desc_t dev_desc, int favor_subnode)
+{
+	cpuid_t cpuid = CPU_NONE;
+	cnodeid_t cnodeid;
+	int which_subnode;
+	devfs_handle_t intr_target_dev;
+
+	if ((intr_target_dev = device_desc_intr_target_get(dev_desc)) != GRAPH_VERTEX_NONE) {
+		/* 
+		 * A valid device was specified.  If it's a particular
+		 * CPU, then use that CPU as target.  
+		 */
+		cpuid = cpuvertex_to_cpuid(intr_target_dev);
+		if (cpuid != CPU_NONE)
+			goto cpuchosen;
+
+		/* If a subnode vertex was specified, pick a CPU on that subnode. */
+		if (subnodevertex_to_subnode(intr_target_dev, &cnodeid, &which_subnode) == 0) {
+			cpuid = intr_cpu_choose_from_node(cnodeid, which_subnode);
+			goto cpuchosen;
+		}
+
+		/*
+		 * Otherwise, pick a CPU on the node that owns the 
+		 * specified target.  Favor "favor_subnode", if specified.
+		 */
+		cnodeid = master_node_get(intr_target_dev);
+		if (cnodeid != CNODEID_NONE) {
+			cpuid = intr_cpu_choose_from_node(cnodeid, favor_subnode);
+			goto cpuchosen;
+		}
+	}
+
+cpuchosen:
+	return(cpuid);
+}
+#endif /* CONFIG_IA64_SGI_IO */
+
+
+#ifndef CONFIG_IA64_SGI_IO
+/*
+ * Check if we had already visited this candidate cnode
+ */
+static void *
+intr_cnode_seen(cnodeid_t 	candidate,
+		void 		*arg1,
+		void 		*arg2)
+{
+	int		i;
+	cnodeid_t	*visited_cnodes = (cnodeid_t *)arg1;
+	int		*num_visited_cnodes = (int *)arg2;
+
+	ASSERT(visited_cnodes);
+	ASSERT(*num_visited_cnodes <= numnodes);
+	for(i = 0 ; i < *num_visited_cnodes; i++) {
+		if (candidate == visited_cnodes[i])
+			return(NULL);
+	}
+	return(visited_cnodes);
+}
+
+#endif /* CONFIG_IA64_SGI_IO */
+
+
+
+/*
+ * intr_bit_reserve_test(cpuid,which_subnode,cnode,req_bit,intr_resflags,
+ *		owner_dev,intr_name,*resp_bit)
+ *	Either cpuid is not CPU_NONE or cnodeid not CNODE_NONE but
+ * 	not both.
+ * 1. 	If cpuid is specified, this routine tests if this cpu can be a valid
+ * 	interrupt target candidate.
+ * 2. 	If cnodeid is specified, this routine tests if there is a cpu on 
+ *	this node which can be a valid interrupt target candidate.
+ * 3.	If a valid interrupt target cpu candidate is found then an attempt at 
+ * 	reserving an interrupt bit on the corresponding cnode is made.
+ *
+ * If steps 1 & 2 both fail or step 3 fails then we are not able to get a valid
+ * interrupt target cpu then routine returns CPU_NONE (failure)
+ * Otherwise routine returns cpuid of interrupt target (success)
+ */
+static cpuid_t
+intr_bit_reserve_test(cpuid_t 		cpuid,
+		      int		favor_subnode,
+		      cnodeid_t 	cnodeid,
+		      int		req_bit,
+		      int 		intr_resflags,
+		      devfs_handle_t 	owner_dev,
+		      char		*intr_name,
+		      int		*resp_bit)
+{
+
+	ASSERT((cpuid==CPU_NONE) || (cnodeid==CNODEID_NONE));
+
+	if (cnodeid != CNODEID_NONE) {
+		/* Try to choose a interrupt cpu candidate */
+		cpuid = intr_cpu_choose_from_node(cnodeid, favor_subnode);
+	}
+
+	if (cpuid != CPU_NONE) {
+		/* Try to reserve an interrupt bit on the hub 
+		 * corresponding to the canidate cnode. If we
+		 * are successful then we got a cpu which can
+		 * act as an interrupt target for the io device.
+		 * Otherwise we need to continue the search
+		 * further.
+		 */
+		*resp_bit = do_intr_reserve_level(cpuid, 
+						  req_bit,
+						  intr_resflags,
+						  II_RESERVE,
+						  owner_dev, 
+						  intr_name);
+
+		if (*resp_bit >= 0)
+			/* The interrupt target  specified was fine */
+			return(cpuid);
+	}
+	return(CPU_NONE);
+}
+/*
+ * intr_heuristic(dev_t dev,device_desc_t dev_desc,
+ *		  int req_bit,int intr_resflags,dev_t owner_dev,
+ *		  char *intr_name,int *resp_bit)
+ *
+ * Choose an interrupt destination for an interrupt.
+ *	dev is the device for which the interrupt is being set up
+ *	dev_desc is a description of hardware and policy that could
+ *		help determine where this interrupt should go
+ *	req_bit is the interrupt bit requested 
+ *		(can be INTRCONNECT_ANY_BIT in which the first available
+ * 		 interrupt bit is used)
+ *	intr_resflags indicates whether we want to (un)reserve bit
+ *	owner_dev is the owner device
+ *	intr_name is the readable interrupt name	
+ * 	resp_bit indicates whether we succeeded in getting the required
+ *		 action  { (un)reservation} done	
+ *		 negative value indicates failure
+ *
+ */
+/* ARGSUSED */
+cpuid_t
+intr_heuristic(devfs_handle_t 		dev,
+	       device_desc_t 	dev_desc,
+	       int		req_bit,
+	       int 		intr_resflags,
+	       devfs_handle_t 		owner_dev,
+	       char		*intr_name,
+	       int		*resp_bit)
+{
+	cpuid_t		cpuid;				/* possible intr targ*/
+	cnodeid_t 	candidate;			/* possible canidate */
+#ifndef BRINGUP
+	cnodeid_t	visited_cnodes[MAX_NASIDS], 	/* nodes seen so far */
+		        center,				/* node we are on */
+		        candidate;			/* possible canidate */
+	int		num_visited_cnodes = 0;		/* # nodes seen */
+
+	int		radius = 1,			/* start looking at the
+							 * current node
+							 */
+		        maxradius = physmem_maxradius();
+	void		*rv;
+#endif /* BRINGUP */
+	int		which_subnode = SUBNODE_ANY;
+
+#if CONFIG_IA64_SGI_IO /* SN1 + pcibr Addressing Limitation */
+	{
+	devfs_handle_t pconn_vhdl;
+	pcibr_soft_t pcibr_soft;
+
+	/*
+	 * This combination of SN1 and Bridge hardware has an odd "limitation".
+	 * Due to the choice of addresses for PI0 and PI1 registers on SN1
+	 * and historical limitations in Bridge, Bridge is unable to
+	 * send interrupts to both PI0 CPUs and PI1 CPUs -- we have
+	 * to choose one set or the other.  That choice is implicitly
+	 * made when Bridge first attaches its error interrupt.  After
+	 * that point, all subsequent interrupts are restricted to the
+	 * same PI number (though it's possible to send interrupts to
+	 * the same PI number on a different node).
+	 *
+	 * Since neither SN1 nor Bridge designers are willing to admit a
+	 * bug, we can't really call this a "workaround".  It's a permanent
+	 * solution for an SN1-specific and Bridge-specific hardware
+	 * limitation that won't ever be lifted.
+	 */
+        if ((hwgraph_edge_get(dev, EDGE_LBL_PCI, &pconn_vhdl) == GRAPH_SUCCESS) &&
+	   ((pcibr_soft = pcibr_soft_get(pconn_vhdl)) != NULL)) {
+		/*
+		 * We "know" that the error interrupt is the first
+		 * interrupt set up by pcibr_attach.  Send all interrupts
+		 * on this bridge to the same subnode number.
+		 */
+		if (pcibr_soft->bsi_err_intr) {
+			which_subnode = cpuid_to_subnode(((hub_intr_t) pcibr_soft->bsi_err_intr)->i_cpuid);
+		}
+	}
+	}
+#endif /* CONFIG_IA64_SGI_IO */
+
+#ifndef CONFIG_IA64_SGI_IO
+	/* 
+	 * If an interrupt target was specified for this
+	 * interrupt allocation, try to use it.
+	 */
+	if (dev_desc) {
+
+		/* Try to see if the interrupt target specified in the
+		 * device descriptor is a legal candidate.
+		 */
+		cpuid = intr_bit_reserve_test(intr_target_from_desc(dev_desc, which_subnode),
+					      which_subnode,
+					      CNODEID_NONE,
+					      req_bit,
+					      intr_resflags,
+					      owner_dev,
+					      intr_name,
+					      resp_bit);
+
+		if (cpuid != CPU_NONE) {
+			if (cpu_on_subnode(cpuid, which_subnode))
+				return(cpuid);	/* got a valid interrupt target */
+
+			printk("Override explicit interrupt targetting: %v (0x%x)\n",
+				owner_dev, owner_dev);
+
+			intr_unreserve_level(cpuid, *resp_bit);
+		}
+
+		/* Fall through on to the next step in the search for
+		 * the interrupt candidate.
+		 */
+
+	}
+#endif  /* CONFIG_IA64_SGI_IO */
+	
+	/* Check if we can find a valid interrupt target candidate on
+	 * the master node for the device.
+	 */
+	cpuid = intr_bit_reserve_test(CPU_NONE,
+				      which_subnode,	
+				      master_node_get(dev),
+				      req_bit,
+				      intr_resflags,
+				      owner_dev,
+				      intr_name,
+				      resp_bit);
+
+	if (cpuid != CPU_NONE) {
+		if (cpu_on_subnode(cpuid, which_subnode))
+			return(cpuid);	/* got a valid interrupt target */
+		else
+			intr_unreserve_level(cpuid, *resp_bit);
+	}
+
+	printk("Cannot target interrupts to closest node(%d): %ld (0x%lx)\n",
+		master_node_get(dev),(long) owner_dev, (unsigned long)owner_dev);
+
+	/* Fall through into the default algorithm
+	 * (exhaustive-search-for-the-nearest-possible-interrupt-target)
+	 * for finding the interrupt target
+	 */
+
+#ifndef BRINGUP
+	// Use of this algorithm is deferred until the supporting
+	// code has been implemented.
+	/* 
+	 * No valid interrupt specification exists.
+	 * Try to find a node which is closest to the current node
+	 * which can process interrupts from a device
+	 */
+
+	center = cpuid_to_cnodeid(smp_processor_id());
+	while (radius <= maxradius) {
+
+		/* Try to find a node at the given radius and which
+		 * we haven't seen already.
+		 */
+		rv = physmem_select_neighbor_node(center,radius,&candidate,
+						  intr_cnode_seen,
+						  (void *)visited_cnodes,
+						  (void *)&num_visited_cnodes);
+		if (!rv) {
+			/* We have seen all the nodes at this particular radius
+			 * Go on to the next radius level.
+			 */
+			radius++;
+			continue;
+		}			      
+		/* We are seeing this candidate  cnode for the first time
+		 */
+		visited_cnodes[num_visited_cnodes++] = candidate;
+
+		cpuid = intr_bit_reserve_test(CPU_NONE,
+					      which_subnode,
+					      candidate,
+					      req_bit,
+					      intr_resflags,
+					      owner_dev,
+					      intr_name,
+					      resp_bit);
+
+		if (cpuid != CPU_NONE) {
+			if (cpu_on_subnode(cpuid, which_subnode))
+				return(cpuid);	/* got a valid interrupt target */
+			else
+				intr_unreserve_level(cpuid, *resp_bit);
+		}
+	}
+#else  /* BRINGUP */
+	{
+	// Do a stupid round-robin assignment of the node.
+		static cnodeid_t last_node = 0;
+
+		if (last_node > numnodes) last_node = 0;
+		for (candidate = last_node; candidate <= numnodes; candidate++) {
+			cpuid = intr_bit_reserve_test(CPU_NONE,
+					      which_subnode,
+					      candidate,
+					      req_bit,
+					      intr_resflags,
+					      owner_dev,
+					      intr_name,
+					      resp_bit);
+
+			if (cpuid != CPU_NONE) {
+				if (cpu_on_subnode(cpuid, which_subnode)) {
+					last_node++;
+					return(cpuid);	/* got a valid interrupt target */
+				}
+				else
+					intr_unreserve_level(cpuid, *resp_bit);
+			}
+			last_node++;
+		}
+	}
+#endif
+
+	printk("Cannot target interrupts to any close node: %ld (0x%lx)\n",
+		(long)owner_dev, (unsigned long)owner_dev);
+
+	/* In the worst case try to allocate interrupt bits on the
+	 * master processor's node. We may get here during error interrupt
+	 * allocation phase when the topology matrix is not yet setup
+	 * and hence cannot do an exhaustive search.
+	 */
+	ASSERT(cpu_allows_intr(master_procid));
+	cpuid = intr_bit_reserve_test(master_procid,
+				      which_subnode,
+				      CNODEID_NONE,
+				      req_bit,
+				      intr_resflags,
+				      owner_dev,
+				      intr_name,
+				      resp_bit);
+
+	if (cpuid != CPU_NONE) {
+		if (cpu_on_subnode(cpuid, which_subnode))
+			return(cpuid);
+		else
+			intr_unreserve_level(cpuid, *resp_bit);
+	}
+
+	printk("Cannot target interrupts: %ld (0x%lx)\n",
+		(long)owner_dev, (unsigned long)owner_dev);
+
+	return(CPU_NONE);	/* Should never get here */
+}
+
+
+
+
+#ifndef BRINGUP
+/*
+ * Should never receive an exception while running on the idle 
+ * stack.  It IS possible to handle *interrupts* while on the
+ * idle stack, but a non-interrupt *exception* is a problem.
+ */
+void
+idle_err(inst_t *epc, uint cause, void *fep, void *sp)
+{
+	eframe_t *ep = (eframe_t *)fep;
+
+    if ((cause & CAUSE_EXCMASK) == EXC_IBE ||
+	(cause & CAUSE_EXCMASK) == EXC_DBE) {
+	(void)dobuserre((eframe_t *)ep, epc, 0);
+    }
+
+    /* XXX - This will have to change to deal with various SN errors. */
+    panic( "exception on IDLE stack "
+	    "ep:0x%x epc:0x%x cause:0x%w32x sp:0x%x badvaddr:0x%x",
+	    ep, epc, cause, sp, getbadvaddr());
+    /* NOTREACHED */
+}
+
+
+/*
+ * earlynofault - handle very early global faults - usually just while
+ *      sizing memory
+ * Returns: 1 if should do nofault
+ *          0 if not
+ */
+/* ARGSUSED */
+int
+earlynofault(eframe_t *ep, uint code)
+{
+	switch(code) {
+	case EXC_DBE:
+		return(1);
+	default:
+		return(0);
+	}
+}
+
+
+
+/* ARGSUSED */
+static void
+cpuintr(void *arg1, void *arg2)
+{
+#if RTE
+	static int rte_intrdebug = 1;
+#endif
+	/*
+	 * Frame Scheduler
+	 */
+	LOG_TSTAMP_EVENT(RTMON_INTR, TSTAMP_EV_CPUINTR, NULL, NULL,
+			 NULL, NULL);
+
+	/*
+	 * Hardware clears the IO interrupts, but we need to clear software-
+	 * generated interrupts.
+	 */
+	LOCAL_HUB_CLR_INTR(CPU_ACTION_A + cputolocalslice(cpuid()));
+
+#if 0
+	/* XXX - Handle error interrupts. */
+	if (error_intr_reason)
+		error_intr();
+#endif /* 0 */
+
+	/*
+	 * If we're headed for panicspin and it is due to a NMI, save the
+	 * eframe in the NMI area
+	 */
+	if (private.p_va_panicspin && nmied) {
+		caddr_t	nmi_save_area;
+
+		nmi_save_area = (caddr_t) (TO_UNCAC(TO_NODE(
+			cputonasid(cpuid()), IP27_NMI_EFRAME_OFFSET)) + 
+			cputoslice(cpuid()) * IP27_NMI_EFRAME_SIZE);
+		bcopy((caddr_t) arg2, nmi_save_area, sizeof(eframe_t));
+	}
+
+	doacvec();
+#if RTE
+	if (private.p_flags & PDAF_ISOLATED && !rte_intrdebug)
+		goto end_cpuintr;
+#endif
+	doactions();
+#if RTE
+end_cpuintr:
+#endif
+	LOG_TSTAMP_EVENT(RTMON_INTR, TSTAMP_EV_INTREXIT, TSTAMP_EV_CPUINTR, NULL, NULL, NULL);
+}
+
+void
+install_cpuintr(cpuid_t cpu)
+{
+	int		intr_bit = CPU_ACTION_A + cputolocalslice(cpu);
+
+	if (intr_connect_level(cpu, intr_bit, INTPEND0_MAXMASK,
+				(intr_func_t) cpuintr, NULL, NULL))
+		panic("install_cpuintr: Can't connect interrupt.");
+}
+#endif /* BRINGUP */
+
+#ifdef DEBUG_INTR_TSTAMP
+/* We allocate an array, but only use element number 64.  This guarantees that
+ * the entry is in a cacheline by itself.
+ */
+#define DINTR_CNTIDX	32
+#define DINTR_TSTAMP1	48
+#define	DINTR_TSTAMP2	64
+volatile long long dintr_tstamp_cnt[128];
+int dintr_debug_output=0;
+extern void idbg_tstamp_debug(void);
+#ifdef SPLDEBUG
+extern void idbg_splx_log(int);
+#endif
+#if DEBUG_INTR_TSTAMP_DEBUG
+int dintr_enter_symmon=1000;	/* 1000 microseconds is 1 millisecond */
+#endif
+
+#ifndef BRINGUP
+/* ARGSUSED */
+static void
+cpulatintr(void *arg)
+{
+	/*
+	 * Hardware only clears IO interrupts so we have to clear our level
+	 * here.
+	 */
+	LOCAL_HUB_CLR_INTR(CPU_INTRLAT_A + cputolocalslice(cpuid()));
+
+#if DEBUG_INTR_TSTAMP_DEBUG
+	dintr_tstamp_cnt[DINTR_TSTAMP2] =  GET_LOCAL_RTC;
+	if ((dintr_tstamp_cnt[DINTR_TSTAMP2] - dintr_tstamp_cnt[DINTR_TSTAMP1])
+	    > dintr_enter_symmon) {
+#ifdef SPLDEBUG
+		extern int spldebug_log_off;
+
+		spldebug_log_off = 1;
+#endif /* SPLDEBUG */
+		debug("ring");
+#ifdef SPLDEBUG
+		spldebug_log_off = 0;
+#endif /* SPLDEBUG */
+	}
+#endif
+	dintr_tstamp_cnt[DINTR_CNTIDX]++;
+
+	return;
+}
+
+static int install_cpulat_first=0;
+
+void
+install_cpulatintr(cpuid_t cpu)
+{
+	int		intr_bit;
+	devfs_handle_t	cpuv = cpuid_to_vertex(cpu);
+
+	intr_bit = CPU_INTRLAT_A + cputolocalslice(cpu);
+	if (intr_bit != intr_reserve_level(cpu, intr_bit, II_THREADED,
+					   cpuv, "intrlat"))
+		panic( "install_cpulatintr: Can't reserve interrupt.");
+
+	if (intr_connect_level(cpu, intr_bit, INTPEND0_MAXMASK,
+				cpulatintr, NULL, NULL))
+		panic( "install_cpulatintr: Can't connect interrupt.");
+
+	if (!install_cpulat_first) {
+		install_cpulat_first++;
+		idbg_addfunc("tstamp_debug", (void (*)())idbg_tstamp_debug);
+#if defined(SPLDEBUG) || defined(SPLDEBUG_CPU_EVENTS)
+		idbg_addfunc("splx_log", (void (*)())idbg_splx_log);
+#endif /* SPLDEBUG || SPLDEBUG_CPU_EVENTS */
+	}
+}
+#endif /* BRINGUP */
+
+#endif /* DEBUG_INTR_TSTAMP */
+
+#ifndef BRINGUP
+/* ARGSUSED */
+static void
+dbgintr(void *arg)
+{
+	/*
+	 * Hardware only clears IO interrupts so we have to clear our level
+	 * here.
+	 */
+	LOCAL_HUB_CLR_INTR(N_INTPEND_BITS + DEBUG_INTR_A + cputolocalslice(cpuid()));
+
+	debug("zing");
+	return;
+}
+
+
+void
+install_dbgintr(cpuid_t cpu)
+{
+	int		intr_bit;
+	devfs_handle_t	cpuv = cpuid_to_vertex(cpu);
+
+	intr_bit = N_INTPEND_BITS + DEBUG_INTR_A + cputolocalslice(cpu);
+	if (intr_bit != intr_reserve_level(cpu, intr_bit, 1, cpuv, "DEBUG"))
+		panic("install_dbgintr: Can't reserve interrupt. "
+			" intr_bit %d" ,intr_bit);
+
+	if (intr_connect_level(cpu, intr_bit, INTPEND1_MAXMASK,
+				dbgintr, NULL, NULL))
+		panic("install_dbgintr: Can't connect interrupt.");
+
+#ifdef DEBUG_INTR_TSTAMP
+	/* Set up my interrupt latency test interrupt */
+	install_cpulatintr(cpu);
+#endif
+}
+
+/* ARGSUSED */
+static void
+tlbintr(void *arg)
+{
+	extern void tlbflush_rand(void);
+
+	/*
+	 * Hardware only clears IO interrupts so we have to clear our level
+	 * here.
+	 */
+	LOCAL_HUB_CLR_INTR(N_INTPEND_BITS + TLB_INTR_A + cputolocalslice(cpuid()));
+
+	tlbflush_rand();
+	return;
+}
+
+
+void
+install_tlbintr(cpuid_t cpu)
+{
+	int		intr_bit;
+	devfs_handle_t	cpuv = cpuid_to_vertex(cpu);
+
+	intr_bit = N_INTPEND_BITS + TLB_INTR_A + cputolocalslice(cpu);
+	if (intr_bit != intr_reserve_level(cpu, intr_bit, 1, cpuv, "DEBUG"))
+		panic("install_tlbintr: Can't reserve interrupt. "
+			" intr_bit %d" ,intr_bit);
+
+	if (intr_connect_level(cpu, intr_bit, INTPEND1_MAXMASK,
+				tlbintr, NULL, NULL))
+		panic("install_tlbintr: Can't connect interrupt.");
+
+}
+
+
+/*
+ * Send an interrupt to all nodes.  Don't panic if we get an error.
+ * Returns 1 if any exceptions occurred.
+ */
+int
+protected_broadcast(hubreg_t intrbit)
+{
+	nodepda_t *npdap = private.p_nodepda;
+	int byte, bit, sn;
+	int error = 0;
+
+	extern int _wbadaddr_val(volatile void *, int, volatile int *);
+
+	/* Send rather than clear an interrupt. */
+	intrbit |= 0x100;
+	
+	for (byte = 0; byte < NASID_MASK_BYTES; byte++) {
+		for (bit = 0; bit < 8; bit++) {
+			if (npdap->nasid_mask[byte] & (1 << bit)) {
+				nasid_t nasid = byte * 8 + bit;
+				for (sn=0; sn<NUM_SUBNODES; sn++) {
+					error += _wbadaddr_val(REMOTE_HUB_PI_ADDR(nasid,
+					      sn, PI_INT_PEND_MOD),
+					      sizeof(hubreg_t),
+					      (volatile int *)&intrbit);
+				}
+			}
+		}
+	}
+
+	return error;
+}
+
+
+/*
+ * Poll the interrupt register to see if another cpu has asked us
+ * to drop into the debugger (without lowering spl).
+ */
+void
+chkdebug(void)
+{
+	if (LOCAL_HUB_L(PI_INT_PEND1) & (1L << (DEBUG_INTR_A + cputolocalslice(cpuid()))))
+		dbgintr((void *)NULL);
+}
+
+
+/*
+ * Install special graphics interrupt.
+ */
+void
+install_gfxintr(cpuid_t cpu, ilvl_t swlevel, intr_func_t intr_func, void *intr_arg)
+{
+	int intr_bit = GFX_INTR_A + cputolocalslice(cpu);
+
+	if (intr_connect_level(cpu, intr_bit, swlevel,
+				intr_func, intr_arg, NULL))
+		panic("install_gfxintr: Can't connect interrupt.");
+}
+
+
+/*
+ * Install page migration interrupt handler.
+ */
+void
+hub_migrintr_init(cnodeid_t cnode)
+{
+	cpuid_t cpu = cnodetocpu(cnode);
+	int intr_bit = INT_PEND0_BASELVL + PG_MIG_INTR;
+
+	if (numnodes == 1){
+		/* 
+		 * No migration with just one node..
+		 */
+		return;
+	}
+	
+	if (cpu != -1) {
+		if (intr_connect_level(cpu, intr_bit, 0,
+			       (intr_func_t) migr_intr_handler, 0, (intr_func_t) migr_intr_prologue_handler))
+			panic( "hub_migrintr_init: Can't connect interrupt.");
+	}
+}
+
+
+/*
+ * Cause all CPUs to stop by sending them each a DEBUG interrupt.
+ * Parameter is actually a (cpumask_t *).
+ */
+void
+debug_stop_all_cpus(void *stoplist)
+{
+	int cpu;
+	ulong level;
+
+	for (cpu=0; cpu<maxcpus; cpu++) {
+		if (cpu == cpuid())
+			continue;
+		if (!cpu_enabled(cpu))
+		        continue;
+		/* "-1" is the old style parameter OR could be the new style
+		 * if no-one is currently stopped.  We only stop the
+		 * requested cpus, the others are already stopped (probably
+		 * at a breakpoint).
+		 */
+
+		if (((cpumask_t *)stoplist != (cpumask_t *)-1LL) &&
+		    (!CPUMASK_TSTB(*(cpumask_t*)stoplist, cpu)))
+			continue;
+
+		/*
+		 * CPU lslice A gets level DEBUG_INTR_A
+		 * CPU lslice B gets level DEBUG_INTR_B
+		 */
+		level = DEBUG_INTR_A + LOCALCPU(get_cpu_slice(cpu));
+		/*
+		 * Convert the compact hub number to the NASID to get the
+		 * correct part of the address space.  Then set the interrupt
+		 * bit associated with the CPU we want to send the interrupt
+		 * to.
+		 */
+		REMOTE_CPU_SEND_INTR(cpu, N_INTPEND_BITS + level);
+
+	}
+}
+
+
+struct hardwired_intr_s {
+	signed char level;
+	int flags;
+	char *name;
+} const hardwired_intr[] = {
+	{ INT_PEND0_BASELVL + RESERVED_INTR,	0,	"Reserved" },
+	{ INT_PEND0_BASELVL + GFX_INTR_A,	0, 	"Gfx A" },
+	{ INT_PEND0_BASELVL + GFX_INTR_B,	0, 	"Gfx B" },
+	{ INT_PEND0_BASELVL + PG_MIG_INTR,	II_THREADED, "Migration" },
+#if defined(SN1) && !defined(DIRECT_L1_CONSOLE)
+	{ INT_PEND0_BASELVL + UART_INTR,	II_THREADED, "Bedrock/L1" },
+#else
+	{ INT_PEND0_BASELVL + UART_INTR,	0,	"Hub I2C" },
+#endif
+	{ INT_PEND0_BASELVL + CC_PEND_A,	0,	"Crosscall A" },
+	{ INT_PEND0_BASELVL + CC_PEND_B,	0,	"Crosscall B" },
+	{ INT_PEND0_BASELVL + MSC_MESG_INTR,	II_THREADED, "MSC Message" },
+	{ INT_PEND0_BASELVL + CPU_ACTION_A,	0,	"CPU Action A" },
+	{ INT_PEND0_BASELVL + CPU_ACTION_B,	0,	"CPU Action B" },
+	{ INT_PEND1_BASELVL + IO_ERROR_INTR,	II_ERRORINT, "IO Error" },
+	{ INT_PEND1_BASELVL + CLK_ERR_INTR,	II_ERRORINT, "Clock Error" },
+	{ INT_PEND1_BASELVL + COR_ERR_INTR_A,	II_ERRORINT, "Correctable Error A" },
+	{ INT_PEND1_BASELVL + COR_ERR_INTR_B,	II_ERRORINT, "Correctable Error B" },
+	{ INT_PEND1_BASELVL + MD_COR_ERR_INTR,	II_ERRORINT, "MD Correct. Error" },
+	{ INT_PEND1_BASELVL + NI_ERROR_INTR,	II_ERRORINT, "NI Error" },
+	{ INT_PEND1_BASELVL + NI_BRDCAST_ERR_A,	II_ERRORINT, "Remote NI Error"},
+	{ INT_PEND1_BASELVL + NI_BRDCAST_ERR_B,	II_ERRORINT, "Remote NI Error"},
+	{ INT_PEND1_BASELVL + MSC_PANIC_INTR,	II_ERRORINT, "MSC Panic" },
+	{ INT_PEND1_BASELVL + LLP_PFAIL_INTR_A,	II_ERRORINT, "LLP Pfail WAR" },
+	{ INT_PEND1_BASELVL + LLP_PFAIL_INTR_B,	II_ERRORINT, "LLP Pfail WAR" },
+#ifdef SN1
+	{ INT_PEND1_BASELVL + NACK_INT_A,	0, "CPU A Nack count == NACK_CMP" },
+	{ INT_PEND1_BASELVL + NACK_INT_B,	0, "CPU B Nack count == NACK_CMP" },
+	{ INT_PEND1_BASELVL + LB_ERROR,		0, "Local Block Error" },
+	{ INT_PEND1_BASELVL + XB_ERROR,		0, "Local XBar Error" },
+#endif /* SN1 */	
+	{ -1, 0, (char *)NULL}
+};
+
+/*
+ * Reserve all of the hardwired interrupt levels so they're not used as
+ * general purpose bits later.
+ */
+void
+intr_reserve_hardwired(cnodeid_t cnode)
+{
+	cpuid_t cpu;
+	int level;
+	int i;
+	char subnode_done[NUM_SUBNODES];
+
+	cpu = cnodetocpu(cnode);
+	if (cpu == CPU_NONE) {
+		printk("Node %d has no CPUs", cnode);
+		return;
+	}
+
+	for (i=0; i<NUM_SUBNODES; i++)
+		subnode_done[i] = 0;
+
+	for (; cpu<maxcpus && cpu_enabled(cpu) && cputocnode(cpu) == cnode; cpu++) {
+		int which_subnode = cpuid_to_subnode(cpu);
+		if (subnode_done[which_subnode])
+			continue;
+		subnode_done[which_subnode] = 1;
+
+		for (i = 0; hardwired_intr[i].level != -1; i++) {
+			level = hardwired_intr[i].level;
+
+			if (level != intr_reserve_level(cpu, level,
+						hardwired_intr[i].flags,
+						(devfs_handle_t) NULL,
+						hardwired_intr[i].name))
+				panic("intr_reserve_hardwired: Can't reserve level %d.", level);
+		}
+	}
+}
+
+#endif /* BRINGUP */
+
+/*
+ * Check and clear interrupts.
+ */
+/*ARGSUSED*/
+static void
+intr_clear_bits(nasid_t nasid, volatile hubreg_t *pend, int base_level,
+		char *name)
+{
+	volatile hubreg_t bits;
+	int i;
+
+	/* Check pending interrupts */
+	if ((bits = HUB_L(pend)) != 0) {
+		for (i = 0; i < N_INTPEND_BITS; i++) {
+			if (bits & (1 << i)) {
+#ifdef INTRDEBUG
+				printk( "Nasid %d interrupt bit %d set in %s",
+					nasid, i, name);
+#endif
+				LOCAL_HUB_CLR_INTR(base_level + i);
+			}
+		}
+	}
+}
+
+/*
+ * Clear out our interrupt registers.
+ */
+void
+intr_clear_all(nasid_t nasid)
+{
+	int	sn;
+
+	for(sn=0; sn<NUM_SUBNODES; sn++) {
+		REMOTE_HUB_PI_S(nasid, sn, PI_INT_MASK0_A, 0);
+		REMOTE_HUB_PI_S(nasid, sn, PI_INT_MASK0_B, 0);
+		REMOTE_HUB_PI_S(nasid, sn, PI_INT_MASK1_A, 0);
+		REMOTE_HUB_PI_S(nasid, sn, PI_INT_MASK1_B, 0);
+	
+		intr_clear_bits(nasid, REMOTE_HUB_PI_ADDR(nasid, sn, PI_INT_PEND0),
+				INT_PEND0_BASELVL, "INT_PEND0");
+		intr_clear_bits(nasid, REMOTE_HUB_PI_ADDR(nasid, sn, PI_INT_PEND1),
+				INT_PEND1_BASELVL, "INT_PEND1");
+	}
+}
+
+/* 
+ * Dump information about a particular interrupt vector.
+ */
+static void
+dump_vector(intr_info_t *info, intr_vector_t *vector, int bit, hubreg_t ip,
+		hubreg_t ima, hubreg_t imb, void (*pf)(char *, ...))
+{
+	hubreg_t value = 1LL << bit;
+
+	pf("  Bit %02d: %s: func 0x%x arg 0x%x prefunc 0x%x\n",
+		bit, info->ii_name,
+		vector->iv_func, vector->iv_arg, vector->iv_prefunc);
+	pf("   vertex 0x%x %s%s",
+		info->ii_owner_dev,
+		((info->ii_flags) & II_RESERVE) ? "R" : "U",
+		((info->ii_flags) & II_INUSE) ? "C" : "-");
+	pf("%s%s%s%s",
+		ip & value ? "P" : "-",
+		ima & value ? "A" : "-",
+		imb & value ? "B" : "-",
+		((info->ii_flags) & II_ERRORINT) ? "E" : "-");
+	pf("\n");
+}
+
+
+/*
+ * Dump information about interrupt vector assignment.
+ */
+void
+intr_dumpvec(cnodeid_t cnode, void (*pf)(char *, ...))
+{
+	nodepda_t *npda;
+	int ip, sn, bit;
+	intr_vecblk_t *dispatch;
+	hubreg_t ipr, ima, imb;
+	nasid_t nasid;
+
+	if ((cnode < 0) || (cnode >= numnodes)) {
+		pf("intr_dumpvec: cnodeid out of range: %d\n", cnode);
+		return ;
+	}
+
+	nasid = COMPACT_TO_NASID_NODEID(cnode);
+
+	if (nasid == INVALID_NASID) {
+		pf("intr_dumpvec: Bad cnodeid: %d\n", cnode);
+		return ;
+	}
+		
+
+	npda = NODEPDA(cnode);
+
+	for (sn = 0; sn < NUM_SUBNODES; sn++) {
+		for (ip = 0; ip < 2; ip++) {
+			dispatch = ip ? &(SNPDA(npda,sn)->intr_dispatch1) : &(SNPDA(npda,sn)->intr_dispatch0);
+			ipr = REMOTE_HUB_PI_L(nasid, sn, ip ? PI_INT_PEND1 : PI_INT_PEND0);
+			ima = REMOTE_HUB_PI_L(nasid, sn, ip ? PI_INT_MASK1_A : PI_INT_MASK0_A);
+			imb = REMOTE_HUB_PI_L(nasid, sn, ip ? PI_INT_MASK1_B : PI_INT_MASK0_B);
+	
+			pf("Node %d INT_PEND%d:\n", cnode, ip);
+	
+			if (dispatch->ithreads_enabled)
+				pf(" Ithreads enabled\n");
+			else
+				pf(" Ithreads disabled\n");
+			pf(" vector_count = %d, vector_state = %d\n",
+				dispatch->vector_count,
+				dispatch->vector_state);
+			pf(" CPU A count %d, CPU B count %d\n",
+ 		   	dispatch->cpu_count[0],
+ 		   	dispatch->cpu_count[1]);
+			pf(" &vector_lock = 0x%x\n",
+				&(dispatch->vector_lock));
+			for (bit = 0; bit < N_INTPEND_BITS; bit++) {
+				if ((dispatch->info[bit].ii_flags & II_RESERVE) ||
+			    	(ipr & (1L << bit))) {
+					dump_vector(&(dispatch->info[bit]),
+					    	&(dispatch->vectors[bit]),
+					    	bit, ipr, ima, imb, pf);
+				}
+			}
+			pf("\n");
+		}
+	}
+}
+
diff --git a/arch/ia64/sn/io/ml_iograph.c b/arch/ia64/sn/io/ml_iograph.c
new file mode 100644
index 000000000..a43fe74cc
--- /dev/null
+++ b/arch/ia64/sn/io/ml_iograph.c
@@ -0,0 +1,1582 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/xtalk/xbow.h>
+#include <asm/sn/pci/bridge.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/eeprom.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/pci/pcibr.h>
+#include <asm/sn/xtalk/xtalk.h>
+#include <asm/sn/xtalk/xswitch.h>
+#include <asm/sn/xtalk/xwidget.h>
+#include <asm/sn/xtalk/xtalk_private.h>
+#include <asm/sn/xtalk/xtalkaddrs.h>
+
+extern int maxnodes;
+
+/* #define PROBE_TEST */
+
+/* At most 2 hubs can be connected to an xswitch */
+#define NUM_XSWITCH_VOLUNTEER 2
+
+/*
+ * Track which hubs have volunteered to manage devices hanging off of
+ * a Crosstalk Switch (e.g. xbow).  This structure is allocated,
+ * initialized, and hung off the xswitch vertex early on when the
+ * xswitch vertex is created.
+ */
+typedef struct xswitch_vol_s {
+	struct semaphore xswitch_volunteer_mutex;
+	int		xswitch_volunteer_count;
+	devfs_handle_t	xswitch_volunteer[NUM_XSWITCH_VOLUNTEER];
+} *xswitch_vol_t;
+
+void
+xswitch_vertex_init(devfs_handle_t xswitch)
+{
+	xswitch_vol_t xvolinfo;
+	int rc;
+
+	xvolinfo = kmalloc(sizeof(struct xswitch_vol_s), GFP_KERNEL);
+	init_MUTEX(&xvolinfo->xswitch_volunteer_mutex);
+	xvolinfo->xswitch_volunteer_count = 0;
+	rc = hwgraph_info_add_LBL(xswitch, 
+			INFO_LBL_XSWITCH_VOL,
+			(arbitrary_info_t)xvolinfo);
+	ASSERT(rc == GRAPH_SUCCESS); rc = rc;
+}
+
+
+/*
+ * When assignment of hubs to widgets is complete, we no longer need the
+ * xswitch volunteer structure hanging around.  Destroy it.
+ */
+static void
+xswitch_volunteer_delete(devfs_handle_t xswitch)
+{
+	xswitch_vol_t xvolinfo;
+	int rc;
+
+	rc = hwgraph_info_remove_LBL(xswitch, 
+				INFO_LBL_XSWITCH_VOL,
+				(arbitrary_info_t *)&xvolinfo);
+#ifndef CONFIG_IA64_SGI_IO
+	ASSERT(rc == GRAPH_SUCCESS); rc = rc;
+#endif
+
+	kfree(xvolinfo);
+}
+/*
+ * A Crosstalk master volunteers to manage xwidgets on the specified xswitch.
+ */
+/* ARGSUSED */
+static void
+volunteer_for_widgets(devfs_handle_t xswitch, devfs_handle_t master)
+{
+	xswitch_vol_t xvolinfo = NULL;
+
+	(void)hwgraph_info_get_LBL(xswitch, 
+				INFO_LBL_XSWITCH_VOL, 
+				(arbitrary_info_t *)&xvolinfo);
+	if (xvolinfo == NULL) {
+#ifndef CONFIG_IA64_SGI_IO
+	    if (!is_headless_node_vertex(master))
+		cmn_err(CE_WARN, 
+			"volunteer for widgets: vertex %v has no info label",
+			xswitch);
+#endif
+	    return;
+	}
+
+#ifndef CONFIG_IA64_SGI_IO
+	mutex_lock(&xvolinfo->xswitch_volunteer_mutex, PZERO);
+#endif
+	ASSERT(xvolinfo->xswitch_volunteer_count < NUM_XSWITCH_VOLUNTEER);
+	xvolinfo->xswitch_volunteer[xvolinfo->xswitch_volunteer_count] = master;
+	xvolinfo->xswitch_volunteer_count++;
+#ifndef CONFIG_IA64_SGI_IO
+	mutex_unlock(&xvolinfo->xswitch_volunteer_mutex);
+#endif
+}
+
+#ifndef	BRINGUP
+/* 
+ * The "ideal fixed assignment" of 12 IO slots to 4 node slots.
+ * At index N is the node slot number of the node board that should
+ * ideally control the widget in IO slot N.  Note that if there is
+ * only one node board on a given xbow, it will control all of the
+ * devices on that xbow regardless of these defaults.
+ *
+ * 	N1 controls IO slots IO1, IO3, IO5	(upper left)
+ * 	N3 controls IO slots IO2, IO4, IO6	(upper right)
+ * 	N2 controls IO slots IO7, IO9, IO11	(lower left)
+ * 	N4 controls IO slots IO8, IO10, IO12	(lower right)
+ *
+ * This makes assignments predictable and easily controllable.
+ * TBD: Allow administrator to override these defaults.
+ */
+static slotid_t ideal_assignment[] = {
+	-1,	/* IO0 -->non-existent */
+	1,	/* IO1 -->N1 */
+	3,	/* IO2 -->N3 */
+	1,	/* IO3 -->N1 */
+	3,	/* IO4 -->N3 */
+	1,	/* IO5 -->N1 */
+	3,	/* IO6 -->N3 */
+	2,	/* IO7 -->N2 */
+	4,	/* IO8 -->N4 */
+	2,	/* IO9 -->N2 */
+	4,	/* IO10-->N4 */
+	2,	/* IO11-->N2 */
+	4	/* IO12-->N4 */
+};
+
+static int
+is_ideal_assignment(slotid_t hubslot, slotid_t ioslot)
+{
+	return(ideal_assignment[ioslot] == hubslot);
+}
+#endif /* ifndef BRINGUP */
+
+extern int xbow_port_io_enabled(nasid_t nasid, int widgetnum);
+
+/*
+ * Assign all the xwidgets hanging off the specified xswitch to the
+ * Crosstalk masters that have volunteered for xswitch duty.
+ */
+/* ARGSUSED */
+static void
+assign_widgets_to_volunteers(devfs_handle_t xswitch, devfs_handle_t hubv)
+{
+	xswitch_info_t xswitch_info;
+	xswitch_vol_t xvolinfo = NULL;
+	xwidgetnum_t widgetnum;
+	int curr_volunteer, num_volunteer;
+	nasid_t nasid;
+	hubinfo_t hubinfo;
+#ifndef BRINGUP
+	int xbownum;
+#endif
+
+	hubinfo_get(hubv, &hubinfo);
+	nasid = hubinfo->h_nasid;
+	
+	xswitch_info = xswitch_info_get(xswitch);
+	ASSERT(xswitch_info != NULL);
+
+	(void)hwgraph_info_get_LBL(xswitch, 
+				INFO_LBL_XSWITCH_VOL, 
+				(arbitrary_info_t *)&xvolinfo);
+	if (xvolinfo == NULL) {
+#ifndef CONFIG_IA64_SGI_IO
+	    if (!is_headless_node_vertex(hubv))
+		cmn_err(CE_WARN, 
+			"assign_widgets_to_volunteers:vertex %v has "
+			" no info label",
+			xswitch);
+#endif
+	    return;
+	}
+
+	num_volunteer = xvolinfo->xswitch_volunteer_count;
+	ASSERT(num_volunteer > 0);
+	curr_volunteer = 0;
+
+	/* Assign master hub for xswitch itself.  */
+	if (HUB_WIDGET_ID_MIN > 0) {
+		hubv = xvolinfo->xswitch_volunteer[0];
+		xswitch_info_master_assignment_set(xswitch_info, (xwidgetnum_t)0, hubv);
+	}
+
+#ifndef	BRINGUP
+	xbownum = get_node_crossbow(nasid);
+#endif /* ifndef BRINGUP */
+
+	/*
+	 * TBD: Use administrative information to alter assignment of
+	 * widgets to hubs.
+	 */
+	for (widgetnum=HUB_WIDGET_ID_MIN; widgetnum <= HUB_WIDGET_ID_MAX; widgetnum++) {
+
+#ifndef BRINGUP
+		int i;
+#endif
+		/*
+		 * Ignore disabled/empty ports.
+		 */
+		if (!xbow_port_io_enabled(nasid, widgetnum)) 
+		    continue;
+
+		/*
+		 * If this is the master IO board, assign it to the same 
+		 * hub that owned it in the prom.
+		 */
+		if (is_master_nasid_widget(nasid, widgetnum)) {
+			int i;
+
+			for (i=0; i<num_volunteer; i++) {
+				hubv = xvolinfo->xswitch_volunteer[i];
+				hubinfo_get(hubv, &hubinfo);
+				nasid = hubinfo->h_nasid;
+				if (nasid == get_console_nasid())
+					goto do_assignment;
+			}
+#ifndef CONFIG_IA64_SGI_IO
+			cmn_err(CE_PANIC,
+				"Nasid == %d, console nasid == %d",
+				nasid, get_console_nasid());
+#endif
+		}
+
+#ifndef	BRINGUP
+		/*
+		 * Try to do the "ideal" assignment if IO slots to nodes.
+		 */
+		for (i=0; i<num_volunteer; i++) {
+			hubv = xvolinfo->xswitch_volunteer[i];
+			hubinfo_get(hubv, &hubinfo);
+			nasid = hubinfo->h_nasid;
+			if (is_ideal_assignment(SLOTNUM_GETSLOT(get_node_slotid(nasid)),
+						SLOTNUM_GETSLOT(get_widget_slotnum(xbownum, widgetnum)))) {
+
+				goto do_assignment;
+				
+			}
+		}
+#endif /* ifndef BRINGUP */
+
+		/*
+		 * Do a round-robin assignment among the volunteer nodes.
+		 */
+		hubv = xvolinfo->xswitch_volunteer[curr_volunteer];
+		curr_volunteer = (curr_volunteer + 1) % num_volunteer;
+		/* fall through */
+
+do_assignment:
+		/*
+		 * At this point, we want to make hubv the master of widgetnum.
+		 */
+		xswitch_info_master_assignment_set(xswitch_info, widgetnum, hubv);
+	}
+
+	xswitch_volunteer_delete(xswitch);
+}
+
+/*
+ * Early iograph initialization.  Called by master CPU in mlreset().
+ * Useful for including iograph.o in kernel.o.
+ */
+void
+iograph_early_init(void)
+{
+/*
+ * Need new way to get this information ..
+ */
+	cnodeid_t cnode;
+	nasid_t nasid;
+	lboard_t *board;
+
+	/*
+	 * Init. the board-to-hwgraph link early, so FRU analyzer
+	 * doesn't trip on leftover values if we panic early on.
+	 */
+	for(cnode = 0; cnode < numnodes; cnode++) {
+		nasid = COMPACT_TO_NASID_NODEID(cnode);
+		board = (lboard_t *)KL_CONFIG_INFO(nasid);
+		printk("iograph_early_init: Found board 0x%p\n", board);
+
+		/* Check out all the board info stored on a node */
+		while(board) {
+			board->brd_graph_link = GRAPH_VERTEX_NONE;
+			board = KLCF_NEXT(board);
+			printk("iograph_early_init: Found board 0x%p\n", board);
+
+
+		}
+	}
+
+	hubio_init();
+}
+
+#ifndef CONFIG_IA64_SGI_IO
+/* There is an identical definition of this in os/scheduler/runq.c */
+#define INIT_COOKIE(cookie) cookie.must_run = 0; cookie.cpu = PDA_RUNANYWHERE
+/*
+ * These functions absolutely doesn't belong here.  It's  here, though, 
+ * until the scheduler provides a platform-independent version
+ * that works the way it should.  The interface will definitely change, 
+ * too.  Currently used only in this file and by io/cdl.c in order to
+ * bind various I/O threads to a CPU on the proper node.
+ */
+cpu_cookie_t
+setnoderun(cnodeid_t cnodeid)
+{
+	int i;
+	cpuid_t cpunum;
+	cpu_cookie_t cookie;
+
+	INIT_COOKIE(cookie);
+	if (cnodeid == CNODEID_NONE)
+		return(cookie);
+
+	/*
+	 * Do a setmustrun to one of the CPUs on the specified
+	 * node.
+	 */
+	if ((cpunum = CNODE_TO_CPU_BASE(cnodeid)) == CPU_NONE) {
+		return(cookie);
+	}
+
+	cpunum += CNODE_NUM_CPUS(cnodeid) - 1;
+
+	for (i = 0; i < CNODE_NUM_CPUS(cnodeid); i++, cpunum--) {
+
+		if (cpu_enabled(cpunum)) {
+			cookie = setmustrun(cpunum);
+			break;
+		}
+	}
+
+	return(cookie);
+}
+
+void
+restorenoderun(cpu_cookie_t cookie)
+{
+	restoremustrun(cookie);
+}
+static sema_t io_init_sema;
+
+#endif	/* !CONFIG_IA64_SGI_IO */
+
+struct semaphore io_init_sema;
+
+
+/*
+ * Let boot processor know that we're done initializing our node's IO
+ * and then exit.
+ */
+/* ARGSUSED */
+static void
+io_init_done(cnodeid_t cnodeid,cpu_cookie_t c)
+{
+#ifndef CONFIG_IA64_SGI_IO
+	/* Let boot processor know that we're done. */
+	up(&io_init_sema);
+	/* This is for the setnoderun done when the io_init thread
+	 * started 
+	 */
+	restorenoderun(c);
+	sthread_exit();
+#endif
+}
+
+/* 
+ * Probe to see if this hub's xtalk link is active.  If so,
+ * return the Crosstalk Identification of the widget that we talk to.  
+ * This is called before any of the Crosstalk infrastructure for 
+ * this hub is set up.  It's usually called on the node that we're
+ * probing, but not always.
+ *
+ * TBD: Prom code should actually do this work, and pass through 
+ * hwid for our use.
+ */
+static void
+early_probe_for_widget(devfs_handle_t hubv, xwidget_hwid_t hwid)
+{
+	hubreg_t llp_csr_reg;
+	nasid_t nasid;
+	hubinfo_t hubinfo;
+
+	hubinfo_get(hubv, &hubinfo);
+	nasid = hubinfo->h_nasid;
+
+	llp_csr_reg = REMOTE_HUB_L(nasid, IIO_LLP_CSR);
+	/* 
+	 * If link is up, read the widget's part number.
+	 * A direct connect widget must respond to widgetnum=0.
+	 */
+	if (llp_csr_reg & IIO_LLP_CSR_IS_UP) {
+		/* TBD: Put hub into "indirect" mode */
+		/*
+		 * We're able to read from a widget because our hub's 
+		 * WIDGET_ID was set up earlier.
+		 */
+#ifdef	BRINGUP
+		widgetreg_t widget_id = *(volatile widgetreg_t *)
+			(RAW_NODE_SWIN_BASE(nasid, 0x0) + WIDGET_ID);
+
+		printk("early_probe_for_widget: Hub Vertex 0x%p is UP widget_id = 0x%x Register 0x%p\n", hubv, widget_id,
+		(volatile widgetreg_t *)(RAW_NODE_SWIN_BASE(nasid, 0x0) + WIDGET_ID) );
+
+#else	/* !BRINGUP */
+		widgetreg_t widget_id = XWIDGET_ID_READ(nasid, 0);
+#endif	/* BRINGUP */
+
+		hwid->part_num = XWIDGET_PART_NUM(widget_id);
+		hwid->rev_num = XWIDGET_REV_NUM(widget_id);
+		hwid->mfg_num = XWIDGET_MFG_NUM(widget_id);
+
+		/* TBD: link reset */
+	} else {
+
+		panic("\n\n**** early_probe_for_widget: Hub Vertex 0x%p is DOWN llp_csr_reg 0x%x ****\n\n", hubv, llp_csr_reg);
+
+		hwid->part_num = XWIDGET_PART_NUM_NONE;
+		hwid->rev_num = XWIDGET_REV_NUM_NONE;
+		hwid->mfg_num = XWIDGET_MFG_NUM_NONE;
+	}
+
+}
+
+/* Add inventory information to the widget vertex 
+ * Right now (module,slot,revision) is being
+ * added as inventory information.
+ */
+static void
+xwidget_inventory_add(devfs_handle_t 		widgetv,
+		      lboard_t 			*board,
+		      struct xwidget_hwid_s 	hwid)
+{
+	if (!board)
+		return;
+	/* Donot add inventory information for the baseio
+	 * on a speedo with an xbox. It has already been
+	 * taken care of in SN00_vmc.
+	 * Speedo with xbox's baseio comes in at slot io1 (widget 9)
+	 */
+	device_inventory_add(widgetv,INV_IOBD,board->brd_type,
+			     board->brd_module,
+			     SLOTNUM_GETSLOT(board->brd_slot),
+			     hwid.rev_num);
+}
+
+/*
+ * io_xswitch_widget_init
+ *	
+ */
+
+/* defined in include/linux/ctype.h  */
+/* #define toupper(c)	(islower(c) ? (c) - 'a' + 'A' : (c)) */
+
+void
+io_xswitch_widget_init(devfs_handle_t  	xswitchv,
+		       devfs_handle_t	hubv,
+		       xwidgetnum_t	widgetnum,
+		       async_attach_t	aa)
+{
+	xswitch_info_t		xswitch_info;
+	xwidgetnum_t		hub_widgetid;
+	devfs_handle_t		widgetv;
+	cnodeid_t		cnode;
+	widgetreg_t		widget_id;
+	nasid_t			nasid, peer_nasid;
+	struct xwidget_hwid_s 	hwid;
+	hubinfo_t		hubinfo;
+	/*REFERENCED*/
+	int			rc;
+	char			slotname[SLOTNUM_MAXLENGTH];
+	char 			pathname[128];
+	char			new_name[64];
+	moduleid_t		module;
+	slotid_t		slot;
+	lboard_t		*board = NULL;
+	
+	printk("\nio_xswitch_widget_init: hubv 0x%p, xswitchv 0x%p, widgetnum 0x%x\n", hubv, xswitchv, widgetnum);
+	/*
+	 * Verify that xswitchv is indeed an attached xswitch.
+	 */
+	xswitch_info = xswitch_info_get(xswitchv);
+	ASSERT(xswitch_info != NULL);
+
+	hubinfo_get(hubv, &hubinfo);
+	nasid = hubinfo->h_nasid;
+	cnode = NASID_TO_COMPACT_NODEID(nasid);
+	hub_widgetid = hubinfo->h_widgetid;
+
+
+	/* Who's the other guy on out crossbow (if anyone) */
+	peer_nasid = NODEPDA(cnode)->xbow_peer;
+	if (peer_nasid == INVALID_NASID)
+		/* If I don't have a peer, use myself. */
+		peer_nasid = nasid;
+
+
+	/* Check my xbow structure and my peer's */
+	if (!xbow_port_io_enabled(nasid, widgetnum) &&
+	    !xbow_port_io_enabled(peer_nasid, widgetnum)) {
+		return;
+	}
+
+	if (xswitch_info_link_ok(xswitch_info, widgetnum)) {
+		char			name[4];
+		/*
+		 * If the current hub is not supposed to be the master 
+		 * for this widgetnum, then skip this widget.
+		 */
+		if (xswitch_info_master_assignment_get(xswitch_info,
+						       widgetnum) != hubv) {
+			return;
+		}
+
+		module  = NODEPDA(cnode)->module_id;
+#ifdef XBRIDGE_REGS_SIM
+		/* hardwire for now...could do this with something like:
+		 * xbow_soft_t soft = hwgraph_fastinfo_get(vhdl);
+		 * xbow_t xbow = soft->base;
+		 * xbowreg_t xwidget_id = xbow->xb_wid_id;
+		 * but I don't feel like figuring out vhdl right now..
+		 * and I know for a fact the answer is 0x2d000049 
+		 */
+		printk("io_xswitch_widget_init: XBRIDGE_REGS_SIM FIXME: reading xwidget id: hardwired to xbridge (0x2d000049).\n");
+		printk("XWIDGET_PART_NUM(0x2d000049)= 0x%x\n", XWIDGET_PART_NUM(0x2d000049));
+		if (XWIDGET_PART_NUM(0x2d000049)==XXBOW_WIDGET_PART_NUM) {
+#else
+		if (nasid_has_xbridge(nasid)) {
+#endif /* XBRIDGE_REGS_SIM */
+			board = find_lboard_module_class(
+				(lboard_t *)KL_CONFIG_INFO(nasid),
+				module,
+				KLTYPE_IOBRICK);
+
+			if (board)
+				printk("io_xswitch_widget_init: Found KLTYPE_IOBRICK Board 0x%p brd_type 0x%x\n", board, board->brd_type);
+
+			/*
+			 * BRINGUP
+	 		 * Make sure we really want to say xbrick, pbrick,
+			 * etc. rather than XIO, graphics, etc.
+	 		 */
+
+#ifdef SUPPORT_PRINTING_M_FORMAT
+			sprintf(pathname, EDGE_LBL_MODULE "/%M/"
+#else
+			sprintf(pathname, EDGE_LBL_MODULE "/%x/"
+#endif
+				"%cbrick" "/%s/%d",
+				NODEPDA(cnode)->module_id,
+#ifdef BRINGUP
+
+				(board->brd_type == KLTYPE_IBRICK) ? 'I' :
+				(board->brd_type == KLTYPE_PBRICK) ? 'P' :
+				(board->brd_type == KLTYPE_XBRICK) ? 'X' : '?',
+#else
+				toupper(MODULE_GET_BTCHAR(NODEPDA(cnode)->module_id)),
+#endif /* BRINGUP */
+				EDGE_LBL_XTALK, widgetnum);
+		} 
+		
+		printk("io_xswitch_widget_init: path= %s\n", pathname);
+		rc = hwgraph_path_add(hwgraph_root, pathname, &widgetv);
+		
+		ASSERT(rc == GRAPH_SUCCESS);
+
+		/* This is needed to let the user programs to map the
+		 * module,slot numbers to the corresponding widget numbers
+		 * on the crossbow.
+		 */
+		rc = device_master_set(hwgraph_connectpt_get(widgetv), hubv);
+
+		/* If we are looking at the global master io6
+		 * then add information about the version of
+		 * the io6prom as a part of "detailed inventory"
+		 * information.
+		 */
+		if (is_master_baseio(nasid,
+				     NODEPDA(cnode)->module_id,
+#ifdef BRINGUP
+ 				     get_widget_slotnum(0,widgetnum))) {
+#else
+	<<< BOMB! >>> Need a new way to get slot numbers on IP35/IP37
+#endif
+			extern void klhwg_baseio_inventory_add(devfs_handle_t,
+							       cnodeid_t);
+			module 	= NODEPDA(cnode)->module_id;
+
+#ifdef XBRIDGE_REGS_SIM
+			printk("io_xswitch_widget_init: XBRIDGE_REGS_SIM FIXME: reading xwidget id: hardwired to xbridge (0x2d000049).\n");
+			if (XWIDGET_PART_NUM(0x2d000049)==XXBOW_WIDGET_PART_NUM) {
+#else
+			if (nasid_has_xbridge(nasid)) {
+#endif /* XBRIDGE_REGS_SIM */
+				board = find_lboard_module(
+					(lboard_t *)KL_CONFIG_INFO(nasid),
+					module);
+				/*
+			 	 * BRINGUP
+				 * Change iobrick to correct i/o brick
+				 */
+#ifdef SUPPORT_PRINTING_M_FORMAT
+				sprintf(pathname, EDGE_LBL_MODULE "/%M/"
+#else
+				sprintf(pathname, EDGE_LBL_MODULE "/%x/"
+#endif
+					"iobrick" "/%s/%d",
+					NODEPDA(cnode)->module_id,
+					EDGE_LBL_XTALK, widgetnum);
+			} else {
+#ifdef BRINGUP
+				slot = get_widget_slotnum(0, widgetnum);
+#else
+	<<< BOMB! Need a new way to get slot numbers on IP35/IP37
+#endif
+				board = get_board_name(nasid, module, slot,
+								new_name);
+				/*
+			 	 * Create the vertex for the widget, 
+				 * using the decimal 
+			 	 * widgetnum as the name of the primary edge.
+			 	 */
+#ifdef SUPPORT_PRINTING_M_FORMAT
+				sprintf(pathname, EDGE_LBL_MODULE "/%M/"
+#else
+				sprintf(pathname, EDGE_LBL_MODULE "/%x/"
+#endif
+					  	EDGE_LBL_SLOT "/%s/%s",
+					NODEPDA(cnode)->module_id,
+					slotname, new_name);
+			}
+
+			rc = hwgraph_path_add(hwgraph_root, pathname, &widgetv);
+			printk("io_xswitch_widget_init: (2) path= %s\n", pathname);
+		        /*
+		         * This is a weird ass code needed for error injection
+		         * purposes.
+		         */
+		        rc = device_master_set(hwgraph_connectpt_get(widgetv), hubv);
+			
+			klhwg_baseio_inventory_add(widgetv,cnode);
+		}
+		sprintf(name, "%d", widgetnum);
+		printk("io_xswitch_widget_init: FIXME hwgraph_edge_add %s xswitchv 0x%p, widgetv 0x%p\n", name, xswitchv, widgetv);
+		rc = hwgraph_edge_add(xswitchv, widgetv, name);
+		
+		/*
+		 * crosstalk switch code tracks which
+		 * widget is attached to each link.
+		 */
+		xswitch_info_vhdl_set(xswitch_info, widgetnum, widgetv);
+		
+		/*
+		 * Peek at the widget to get its crosstalk part and
+		 * mfgr numbers, then present it to the generic xtalk
+		 * bus provider to have its driver attach routine
+		 * called (or not).
+		 */
+#ifdef XBRIDGE_REGS_SIM
+		widget_id = 0x2d000049;
+		printk("io_xswitch_widget_init: XBRIDGE_REGS_SIM FIXME: id hardwired to widget_id\n");
+#else
+		widget_id = XWIDGET_ID_READ(nasid, widgetnum);
+#endif /* XBRIDGE_REGS_SIM */
+		hwid.part_num = XWIDGET_PART_NUM(widget_id);
+		hwid.rev_num = XWIDGET_REV_NUM(widget_id);
+		hwid.mfg_num = XWIDGET_MFG_NUM(widget_id);
+		/* Store some inventory information about
+		 * the xwidget in the hardware graph.
+		 */
+		xwidget_inventory_add(widgetv,board,hwid);
+		
+		(void)xwidget_register(&hwid, widgetv, widgetnum,
+				       hubv, hub_widgetid,
+				       aa);
+
+#ifdef	SN0_USE_BTE
+		bte_bpush_war(cnode, (void *)board);
+#endif
+	}
+
+}
+
+
+static void
+io_init_xswitch_widgets(devfs_handle_t xswitchv, cnodeid_t cnode)
+{
+	xwidgetnum_t		widgetnum;
+	async_attach_t          aa;
+
+	aa = async_attach_new();
+	
+	printk("io_init_xswitch_widgets: xswitchv 0x%p for cnode %d\n", xswitchv, cnode);
+
+	for (widgetnum = HUB_WIDGET_ID_MIN; widgetnum <= HUB_WIDGET_ID_MAX; 
+	     widgetnum++) {
+#ifdef BRINGUP
+		if (widgetnum != 0xe) 
+			io_xswitch_widget_init(xswitchv,
+				       cnodeid_to_vertex(cnode),
+				       widgetnum, aa);
+
+#else
+		io_xswitch_widget_init(xswitchv,
+				       cnodeid_to_vertex(cnode),
+				       widgetnum, aa);
+#endif /* BRINGUP */
+	}
+	/* 
+	 * Wait for parallel attach threads, if any, to complete.
+	 */
+	async_attach_waitall(aa);
+	async_attach_free(aa);
+}
+
+/*
+ * For each PCI bridge connected to the xswitch, add a link from the
+ * board's klconfig info to the bridge's hwgraph vertex.  This lets
+ * the FRU analyzer find the bridge without traversing the hardware
+ * graph and risking hangs.
+ */
+static void
+io_link_xswitch_widgets(devfs_handle_t xswitchv, cnodeid_t cnodeid)
+{
+	xwidgetnum_t		widgetnum;
+	char 			pathname[128];
+	devfs_handle_t		vhdl;
+	nasid_t			nasid, peer_nasid;
+	lboard_t		*board;
+
+
+
+	/* And its connected hub's nasids */
+	nasid = COMPACT_TO_NASID_NODEID(cnodeid);
+	peer_nasid = NODEPDA(cnodeid)->xbow_peer;
+
+	/* 
+	 * Look for paths matching "<widgetnum>/pci" under xswitchv.
+	 * For every widget, init. its lboard's hwgraph link.  If the
+	 * board has a PCI bridge, point the link to it.
+	 */
+	for (widgetnum = HUB_WIDGET_ID_MIN; widgetnum <= HUB_WIDGET_ID_MAX;
+		 widgetnum++) {
+		sprintf(pathname, "%d", widgetnum);
+		if (hwgraph_traverse(xswitchv, pathname, &vhdl) !=
+		    GRAPH_SUCCESS)
+			continue;
+
+#if defined (CONFIG_SGI_IP35) || defined (CONFIG_IA64_SGI_SN1) || defined (CONFIG_IA64_GENERIC)
+		board = find_lboard_module((lboard_t *)KL_CONFIG_INFO(nasid),
+				NODEPDA(cnodeid)->module_id);
+#else
+		{
+		slotid_t	slot;
+		slot = get_widget_slotnum(xbow_num, widgetnum);
+		board = find_lboard_modslot((lboard_t *)KL_CONFIG_INFO(nasid),
+				    NODEPDA(cnodeid)->module_id, slot);
+		}
+#endif /* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
+		if (board == NULL && peer_nasid != INVALID_NASID) {
+			/*
+			 * Try to find the board on our peer
+			 */
+#if defined (CONFIG_SGI_IP35) || defined (CONFIG_IA64_SGI_SN1) || defined (CONFIG_IA64_GENERIC)
+			board = find_lboard_module(
+				(lboard_t *)KL_CONFIG_INFO(peer_nasid),
+				NODEPDA(cnodeid)->module_id);
+
+#else
+			board = find_lboard_modslot((lboard_t *)KL_CONFIG_INFO(peer_nasid),
+						    NODEPDA(cnodeid)->module_id, slot);
+
+#endif /* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
+		}
+		if (board == NULL) {
+#ifndef CONFIG_IA64_SGI_IO
+			cmn_err(CE_WARN,
+				"Could not find PROM info for vertex %v, "
+				"FRU analyzer may fail",
+				vhdl);
+#endif
+			return;
+		}
+
+		sprintf(pathname, "%d/"EDGE_LBL_PCI, widgetnum);
+		if (hwgraph_traverse(xswitchv, pathname, &vhdl) == 
+		    GRAPH_SUCCESS)
+			board->brd_graph_link = vhdl;
+		else
+			board->brd_graph_link = GRAPH_VERTEX_NONE;
+	}
+}
+
+/*
+ * Initialize all I/O on the specified node.
+ */
+static void
+io_init_node(cnodeid_t cnodeid)
+{
+	/*REFERENCED*/
+	devfs_handle_t hubv, switchv, widgetv;
+	struct xwidget_hwid_s hwid;
+	hubinfo_t hubinfo;
+	int is_xswitch;
+	nodepda_t	*npdap;
+#ifndef CONFIG_IA64_SGI_IO
+	sema_t 		*peer_sema = 0;
+#else
+	struct semaphore *peer_sema = 0;
+#endif
+	uint32_t	widget_partnum;
+	nodepda_router_info_t *npda_rip;
+	cpu_cookie_t	c = 0;
+
+#ifndef CONFIG_IA64_SGI_IO
+	/* Try to execute on the node that we're initializing. */
+	c = setnoderun(cnodeid);
+#endif
+	npdap = NODEPDA(cnodeid);
+
+	/*
+	 * Get the "top" vertex for this node's hardware
+	 * graph; it will carry the per-hub hub-specific
+	 * data, and act as the crosstalk provider master.
+	 * It's canonical path is probably something of the
+	 * form /hw/module/%M/slot/%d/node
+	 */
+	hubv = cnodeid_to_vertex(cnodeid);
+	printk("io_init_node: Initialize IO for cnode %d hubv(node) 0x%p npdap 0x%p\n", cnodeid, hubv, npdap);
+
+	ASSERT(hubv != GRAPH_VERTEX_NONE);
+
+#if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
+	hubdev_docallouts(hubv);
+#endif
+
+	/*
+	 * Set up the dependent routers if we have any.
+	 */
+	npda_rip = npdap->npda_rip_first;
+
+	while(npda_rip) {
+		/* If the router info has not been initialized
+		 * then we need to do the router initialization
+		 */
+		if (!npda_rip->router_infop) {
+			router_init(cnodeid,0,npda_rip);
+		}
+		npda_rip = npda_rip->router_next;
+	}
+
+	/*
+	 * Read mfg info on this hub
+	 */
+#ifndef CONFIG_IA64_SGI_IO
+	printk("io_init_node: FIXME need to implement HUB_VERTEX_MFG_INFO\n");
+	HUB_VERTEX_MFG_INFO(hubv);
+#endif /* CONFIG_IA64_SGI_IO */
+
+	/* 
+	 * If nothing connected to this hub's xtalk port, we're done.
+	 */
+	early_probe_for_widget(hubv, &hwid);
+	if (hwid.part_num == XWIDGET_PART_NUM_NONE) {
+#ifdef PROBE_TEST
+		if ((cnodeid == 1) || (cnodeid == 2)) {
+			int index;
+
+			for (index = 0; index < 600; index++)
+				printk("Interfering with device probing!!!\n");
+		}
+#endif
+		/* io_init_done takes cpu cookie as 2nd argument 
+		 * to do a restorenoderun for the setnoderun done 
+		 * at the start of this thread 
+		 */
+		
+		printk("**** io_init_node: Node's 0x%p hub widget has XWIDGET_PART_NUM_NONE ****\n", hubv);
+		io_init_done(cnodeid,c);
+		/* NOTREACHED */
+	}
+
+	/* 
+	 * attach our hub_provider information to hubv,
+	 * so we can use it as a crosstalk provider "master"
+	 * vertex.
+	 */
+	xtalk_provider_register(hubv, &hub_provider);
+	xtalk_provider_startup(hubv);
+
+	/*
+	 * Create a vertex to represent the crosstalk bus
+	 * attached to this hub, and a vertex to be used
+	 * as the connect point for whatever is out there
+	 * on the other side of our crosstalk connection.
+	 *
+	 * Crosstalk Switch drivers "climb up" from their
+	 * connection point to try and take over the switch
+	 * point.
+	 *
+	 * Of course, the edges and verticies may already
+	 * exist, in which case our net effect is just to
+	 * associate the "xtalk_" driver with the connection
+	 * point for the device.
+	 */
+
+	(void)hwgraph_path_add(hubv, EDGE_LBL_XTALK, &switchv);
+
+	printk("io_init_node: Created 'xtalk' entry to '../node/' xtalk vertex 0x%p\n", switchv);
+
+	ASSERT(switchv != GRAPH_VERTEX_NONE);
+
+	(void)hwgraph_edge_add(hubv, switchv, EDGE_LBL_IO);
+
+	printk("io_init_node: Created symlink 'io' from ../node/io to ../node/xtalk \n");
+
+	/*
+	 * We need to find the widget id and update the basew_id field
+	 * accordingly. In particular, SN00 has direct connected bridge,
+	 * and hence widget id is Not 0.
+	 */
+
+	widget_partnum = (((*(volatile int32_t *)(NODE_SWIN_BASE(COMPACT_TO_NASID_NODEID(cnodeid), 0) + WIDGET_ID))) & WIDGET_PART_NUM) >> WIDGET_PART_NUM_SHFT;
+
+	if (widget_partnum == BRIDGE_WIDGET_PART_NUM ||
+				widget_partnum == XBRIDGE_WIDGET_PART_NUM){
+		npdap->basew_id = (((*(volatile int32_t *)(NODE_SWIN_BASE(COMPACT_TO_NASID_NODEID(cnodeid), 0) + BRIDGE_WID_CONTROL))) & WIDGET_WIDGET_ID);
+
+		printk("io_init_node: Found XBRIDGE widget_partnum= 0x%x\n", widget_partnum);
+
+	} else if (widget_partnum == XBOW_WIDGET_PART_NUM ||
+				widget_partnum == XXBOW_WIDGET_PART_NUM) {
+		/* 
+		 * Xbow control register does not have the widget ID field.
+		 * So, hard code the widget ID to be zero.
+		 */
+		printk("io_init_node: Found XBOW widget_partnum= 0x%x\n", widget_partnum);
+		npdap->basew_id = 0;
+
+#if defined(BRINGUP)
+	} else if (widget_partnum == XG_WIDGET_PART_NUM) {
+		/* 
+		 * OK, WTF do we do here if we have an XG direct connected to a HUB/Bedrock???
+		 * So, hard code the widget ID to be zero?
+		 */
+		npdap->basew_id = 0;
+		npdap->basew_id = (((*(volatile int32_t *)(NODE_SWIN_BASE(COMPACT_TO_NASID_NODEID(cnodeid), 0) + BRIDGE_WID_CONTROL))) & WIDGET_WIDGET_ID);
+#endif
+	} else { 
+		npdap->basew_id = (((*(volatile int32_t *)(NODE_SWIN_BASE(COMPACT_TO_NASID_NODEID(cnodeid), 0) + BRIDGE_WID_CONTROL))) & WIDGET_WIDGET_ID);
+
+		panic(" ****io_init_node: Unknown Widget Part Number 0x%x Widgt ID 0x%x attached to Hubv 0x%p ****\n", widget_partnum, npdap->basew_id, hubv);
+
+		/*NOTREACHED*/
+	}
+	{
+		char widname[10];
+		sprintf(widname, "%x", npdap->basew_id);
+		(void)hwgraph_path_add(switchv, widname, &widgetv);
+		printk("io_init_node: Created '%s' to '..node/xtalk/' vertex 0x%p\n", widname, widgetv);
+		ASSERT(widgetv != GRAPH_VERTEX_NONE);
+	}
+	
+	nodepda->basew_xc = widgetv;
+
+	is_xswitch = xwidget_hwid_is_xswitch(&hwid);
+
+	/* 
+	 * Try to become the master of the widget.  If this is an xswitch
+	 * with multiple hubs connected, only one will succeed.  Mastership
+	 * of an xswitch is used only when touching registers on that xswitch.
+	 * The slave xwidgets connected to the xswitch can be owned by various
+	 * masters.
+	 */
+	if (device_master_set(widgetv, hubv) == 0) {
+
+		/* Only one hub (thread) per Crosstalk device or switch makes
+		 * it to here.
+		 */
+
+		/* 
+		 * Initialize whatever xwidget is hanging off our hub.
+		 * Whatever it is, it's accessible through widgetnum 0.
+		 */
+		hubinfo_get(hubv, &hubinfo);
+
+		(void)xwidget_register(&hwid, widgetv, npdap->basew_id, hubv, hubinfo->h_widgetid, NULL);
+
+		if (!is_xswitch) {
+			/* io_init_done takes cpu cookie as 2nd argument 
+			 * to do a restorenoderun for the setnoderun done 
+			 * at the start of this thread 
+			 */
+			io_init_done(cnodeid,c);
+			/* NOTREACHED */
+		}
+
+		/* 
+		 * Special handling for Crosstalk Switches (e.g. xbow).
+		 * We need to do things in roughly the following order:
+		 *	1) Initialize xswitch hardware (done above)
+		 *	2) Determine which hubs are available to be widget masters
+		 *	3) Discover which links are active from the xswitch
+		 *	4) Assign xwidgets hanging off the xswitch to hubs
+		 *	5) Initialize all xwidgets on the xswitch
+		 */
+
+		volunteer_for_widgets(switchv, hubv);
+
+		/* If there's someone else on this crossbow, recognize him */
+		if (npdap->xbow_peer != INVALID_NASID) {
+			nodepda_t *peer_npdap = NODEPDA(NASID_TO_COMPACT_NODEID(npdap->xbow_peer));
+			peer_sema = &peer_npdap->xbow_sema;
+			volunteer_for_widgets(switchv, peer_npdap->node_vertex);
+		}
+
+		assign_widgets_to_volunteers(switchv, hubv);
+
+		/* Signal that we're done */
+		if (peer_sema) {
+			up(peer_sema);
+		}
+		
+	}
+	else {
+	    /* Wait 'til master is done assigning widgets. */
+	    down(&npdap->xbow_sema);
+	}
+
+#ifdef PROBE_TEST
+	if ((cnodeid == 1) || (cnodeid == 2)) {
+		int index;
+
+		for (index = 0; index < 500; index++)
+			printk("Interfering with device probing!!!\n");
+	}
+#endif
+	/* Now both nodes can safely inititialize widgets */
+	io_init_xswitch_widgets(switchv, cnodeid);
+	io_link_xswitch_widgets(switchv, cnodeid);
+
+	/* io_init_done takes cpu cookie as 2nd argument 
+	 * to do a restorenoderun for the setnoderun done 
+	 * at the start of this thread 
+	 */
+	io_init_done(cnodeid,c);
+
+	printk("\nio_init_node: DONE INITIALIZED ALL I/O FOR CNODEID %d\n\n", cnodeid);
+}
+
+
+#define IOINIT_STKSZ	(16 * 1024)
+
+#ifndef CONFIG_IA64_SGI_IO
+#include <sys/sn/iograph.h>
+#endif
+#define __DEVSTR1 	"/../.master/"
+#define __DEVSTR2 	"/target/"
+#define __DEVSTR3 	"/lun/0/disk/partition/"
+#define	__DEVSTR4	"/../ef"
+
+#if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
+/*
+ * Currently, we need to allow for 5 IBrick slots with 1 FC each
+ * plus an internal 1394.
+ *
+ * ioconfig starts numbering SCSI's at NUM_BASE_IO_SCSI_CTLR.
+ */
+#define NUM_BASE_IO_SCSI_CTLR 6
+#endif
+/*
+ * This tells ioconfig where it can start numbering scsi controllers.
+ * Below this base number, platform-specific handles the numbering.
+ * XXX Irix legacy..controller numbering should be part of devfsd's job
+ */
+int num_base_io_scsi_ctlr = 2; /* used by syssgi */
+devfs_handle_t		base_io_scsi_ctlr_vhdl[NUM_BASE_IO_SCSI_CTLR];
+static devfs_handle_t	baseio_enet_vhdl,baseio_console_vhdl;
+
+/*
+ * Put the logical controller number information in the 
+ * scsi controller vertices for each scsi controller that
+ * is in a "fixed position".
+ */
+static void
+scsi_ctlr_nums_add(devfs_handle_t pci_vhdl)
+{
+	{
+		int i;
+
+		num_base_io_scsi_ctlr = NUM_BASE_IO_SCSI_CTLR;
+
+		/* Initialize base_io_scsi_ctlr_vhdl array */
+		for (i=0; i<NUM_BASE_IO_SCSI_CTLR; i++)
+			base_io_scsi_ctlr_vhdl[i] = GRAPH_VERTEX_NONE;
+	}
+#if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
+	{
+	/*
+	 * May want to consider changing the SN0 code, above, to work more like
+	 * the way this works.
+	 */
+	devfs_handle_t base_ibrick_xbridge_vhdl;
+	devfs_handle_t base_ibrick_xtalk_widget_vhdl;
+	devfs_handle_t scsi_ctlr_vhdl;
+	int i;
+	graph_error_t rv;
+
+	/*
+	 * This is a table of "well-known" SCSI controllers and their well-known
+	 * controller numbers.  The names in the table start from the base IBrick's
+	 * Xbridge vertex, so the first component is the xtalk widget number.
+	 */
+	static struct {
+		char	*base_ibrick_scsi_path;
+		int	controller_number;
+	} hardwired_scsi_controllers[] = {
+		{"15/" EDGE_LBL_PCI "/1/" EDGE_LBL_SCSI_CTLR "/0", 0},
+		{"15/" EDGE_LBL_PCI "/2/" EDGE_LBL_SCSI_CTLR "/0", 1},
+		{"15/" EDGE_LBL_PCI "/3/" EDGE_LBL_SCSI_CTLR "/0", 2},
+		{"14/" EDGE_LBL_PCI "/1/" EDGE_LBL_SCSI_CTLR "/0", 3},
+		{"14/" EDGE_LBL_PCI "/2/" EDGE_LBL_SCSI_CTLR "/0", 4},
+		{NULL, -1} /* must be last */
+	};
+
+	base_ibrick_xtalk_widget_vhdl = hwgraph_connectpt_get(pci_vhdl);
+	ASSERT_ALWAYS(base_ibrick_xtalk_widget_vhdl != GRAPH_VERTEX_NONE);
+
+	base_ibrick_xbridge_vhdl = hwgraph_connectpt_get(base_ibrick_xtalk_widget_vhdl);
+	ASSERT_ALWAYS(base_ibrick_xbridge_vhdl != GRAPH_VERTEX_NONE);
+	hwgraph_vertex_unref(base_ibrick_xtalk_widget_vhdl);
+
+	/*
+	 * Iterate through the list of well-known SCSI controllers.
+	 * For each controller found, set it's controller number according
+	 * to the table.
+	 */
+	for (i=0; hardwired_scsi_controllers[i].base_ibrick_scsi_path != NULL; i++) {
+		rv = hwgraph_path_lookup(base_ibrick_xbridge_vhdl,
+			hardwired_scsi_controllers[i].base_ibrick_scsi_path, &scsi_ctlr_vhdl, NULL);
+
+		if (rv != GRAPH_SUCCESS) /* No SCSI at this path */
+			continue;
+
+		ASSERT(hardwired_scsi_controllers[i].controller_number < NUM_BASE_IO_SCSI_CTLR);
+		base_io_scsi_ctlr_vhdl[hardwired_scsi_controllers[i].controller_number] = scsi_ctlr_vhdl;
+		device_controller_num_set(scsi_ctlr_vhdl, hardwired_scsi_controllers[i].controller_number);
+		hwgraph_vertex_unref(scsi_ctlr_vhdl); /* (even though we're actually keeping a reference) */
+	}
+
+	hwgraph_vertex_unref(base_ibrick_xbridge_vhdl);
+	}
+#else
+#pragma error Bomb!
+#endif
+}
+
+
+#ifndef CONFIG_IA64_SGI_IO
+#include <sys/asm/sn/ioerror_handling.h>
+#else
+#include <asm/sn/ioerror_handling.h>
+#endif
+extern devfs_handle_t 	ioc3_console_vhdl_get(void);
+devfs_handle_t		sys_critical_graph_root = GRAPH_VERTEX_NONE;
+
+/* Define the system critical vertices and connect them through
+ * a canonical parent-child relationships for easy traversal
+ * during io error handling.
+ */
+static void
+sys_critical_graph_init(void)
+{
+	devfs_handle_t		bridge_vhdl,master_node_vhdl;
+	devfs_handle_t  		xbow_vhdl = GRAPH_VERTEX_NONE;
+	extern devfs_handle_t	hwgraph_root;
+	devfs_handle_t		pci_slot_conn;
+	int			slot;
+	devfs_handle_t		baseio_console_conn;
+
+	printk("sys_critical_graph_init: FIXME.\n");
+	baseio_console_conn = hwgraph_connectpt_get(baseio_console_vhdl);
+
+	if (baseio_console_conn == NULL) {
+		return;
+	}
+
+	/* Get the vertex handle for the baseio bridge */
+	bridge_vhdl = device_master_get(baseio_console_conn);
+
+	/* Get the master node of the baseio card */
+	master_node_vhdl = cnodeid_to_vertex(
+				master_node_get(baseio_console_vhdl));
+	
+	/* Add the "root->node" part of the system critical graph */
+
+	sys_critical_graph_vertex_add(hwgraph_root,master_node_vhdl);
+
+	/* Check if we have a crossbow */
+	if (hwgraph_traverse(master_node_vhdl,
+			     EDGE_LBL_XTALK"/0",
+			     &xbow_vhdl) == GRAPH_SUCCESS) {
+		/* We have a crossbow.Add "node->xbow" part of the system 
+		 * critical graph.
+		 */
+		sys_critical_graph_vertex_add(master_node_vhdl,xbow_vhdl);
+		
+		/* Add "xbow->baseio bridge" of the system critical graph */
+		sys_critical_graph_vertex_add(xbow_vhdl,bridge_vhdl);
+
+		hwgraph_vertex_unref(xbow_vhdl);
+	} else 
+		/* We donot have a crossbow. Add "node->baseio_bridge"
+		 * part of the system critical graph.
+		 */
+		sys_critical_graph_vertex_add(master_node_vhdl,bridge_vhdl);
+
+	/* Add all the populated PCI slot vertices to the system critical
+	 * graph with the bridge vertex as the parent.
+	 */
+	for (slot = 0 ; slot < 8; slot++) {
+		char	slot_edge[10];
+
+		sprintf(slot_edge,"%d",slot);
+		if (hwgraph_traverse(bridge_vhdl,slot_edge, &pci_slot_conn)
+		    != GRAPH_SUCCESS)
+			continue;
+		sys_critical_graph_vertex_add(bridge_vhdl,pci_slot_conn);
+		hwgraph_vertex_unref(pci_slot_conn);
+	}
+
+	hwgraph_vertex_unref(bridge_vhdl);
+
+	/* Add the "ioc3 pci connection point  -> console ioc3" part 
+	 * of the system critical graph
+	 */
+
+	if (hwgraph_traverse(baseio_console_vhdl,"..",&pci_slot_conn) ==
+	    GRAPH_SUCCESS) {
+		sys_critical_graph_vertex_add(pci_slot_conn, 
+					      baseio_console_vhdl);
+		hwgraph_vertex_unref(pci_slot_conn);
+	}
+
+	/* Add the "ethernet pci connection point  -> base ethernet" part of 
+	 * the system  critical graph
+	 */
+	if (hwgraph_traverse(baseio_enet_vhdl,"..",&pci_slot_conn) ==
+	    GRAPH_SUCCESS) {
+		sys_critical_graph_vertex_add(pci_slot_conn, 
+					      baseio_enet_vhdl);
+		hwgraph_vertex_unref(pci_slot_conn);
+	}
+
+	/* Add the "scsi controller pci connection point  -> base scsi 
+	 * controller" part of the system critical graph
+	 */
+	if (hwgraph_traverse(base_io_scsi_ctlr_vhdl[0],
+			     "../..",&pci_slot_conn) == GRAPH_SUCCESS) {
+		sys_critical_graph_vertex_add(pci_slot_conn, 
+					      base_io_scsi_ctlr_vhdl[0]);
+		hwgraph_vertex_unref(pci_slot_conn);
+	}
+	if (hwgraph_traverse(base_io_scsi_ctlr_vhdl[1],
+			     "../..",&pci_slot_conn) == GRAPH_SUCCESS) {
+		sys_critical_graph_vertex_add(pci_slot_conn, 
+					      base_io_scsi_ctlr_vhdl[1]);
+		hwgraph_vertex_unref(pci_slot_conn);
+	}
+	hwgraph_vertex_unref(baseio_console_conn);
+
+}
+
+static void
+baseio_ctlr_num_set(void)
+{
+	char 			name[MAXDEVNAME];
+	devfs_handle_t		console_vhdl, pci_vhdl, enet_vhdl;
+
+
+	printk("baseio_ctlr_num_set; FIXME\n");
+	console_vhdl = ioc3_console_vhdl_get();
+	if (console_vhdl == GRAPH_VERTEX_NONE)
+		return;
+	/* Useful for setting up the system critical graph */
+	baseio_console_vhdl = console_vhdl;
+
+	vertex_to_name(console_vhdl,name,MAXDEVNAME);
+
+	strcat(name,__DEVSTR1);
+	pci_vhdl =  hwgraph_path_to_vertex(name);
+	scsi_ctlr_nums_add(pci_vhdl);
+	/* Unref the pci_vhdl due to the reference by hwgraph_path_to_vertex
+	 */
+	hwgraph_vertex_unref(pci_vhdl);
+
+	vertex_to_name(console_vhdl, name, MAXDEVNAME);
+	strcat(name, __DEVSTR4);
+	enet_vhdl = hwgraph_path_to_vertex(name);
+
+	/* Useful for setting up the system critical graph */
+	baseio_enet_vhdl = enet_vhdl;
+
+	device_controller_num_set(enet_vhdl, 0);
+	/* Unref the enet_vhdl due to the reference by hwgraph_path_to_vertex
+	 */
+	hwgraph_vertex_unref(enet_vhdl);
+}
+/* #endif */
+
+void
+sn00_rrb_alloc(devfs_handle_t vhdl, int *vendor_list)
+{
+	/* REFERENCED */
+	int rtn_val;
+
+	/* 
+	** sn00 population:		errb	orrb
+	**	0- ql			3+?
+	**	1- ql			        2
+	**	2- ioc3 ethernet	2+?
+	**	3- ioc3 secondary	        1
+	**	4-                      0
+	** 	5- PCI slot
+	** 	6- PCI slot
+	** 	7- PCI slot
+	*/	
+	
+	/* The following code implements this heuristic for getting 
+	 * maximum usage out of the rrbs
+	 *
+	 * constraints:
+	 *  8 bit ql1 needs 1+1
+	 *  ql0 or ql5,6,7 wants 1+2
+	 *  ethernet wants 2 or more
+	 *
+	 * rules for even rrbs:
+	 *  if nothing in slot 6 
+	 *   4 rrbs to 0 and 2  (0xc8889999)
+	 *  else 
+         *   3 2 3 to slots 0 2 6  (0xc8899bbb)
+	 *
+         * rules for odd rrbs
+	 *  if nothing in slot 5 or 7  (0xc8889999)
+	 *   4 rrbs to 1 and 3
+	 *  else if 1 thing in 5 or 7  (0xc8899aaa) or (0xc8899bbb)
+         *   3 2 3 to slots 1 3 5|7
+         *  else
+         *   2 1 3 2 to slots 1 3 5 7 (note: if there's a ql card in 7 this
+	 *           (0xc89aaabb)      may short what it wants therefore the
+	 *			       rule should be to plug pci slots in order)
+	 */
+
+
+	if (vendor_list[6] != PCIIO_VENDOR_ID_NONE) {
+		/* something in slot 6 */
+		rtn_val = pcibr_alloc_all_rrbs(vhdl, 0, 3,1, 2,0, 0,0, 3,0);
+	}
+	else {
+		rtn_val = pcibr_alloc_all_rrbs(vhdl, 0, 4,1, 4,0, 0,0, 0,0);
+	}
+#ifndef CONFIG_IA64_SGI_IO
+	if (rtn_val)
+		cmn_err(CE_WARN, "sn00_rrb_alloc: pcibr_alloc_all_rrbs failed");
+#endif
+
+	if ((vendor_list[5] != PCIIO_VENDOR_ID_NONE) && 
+	    (vendor_list[7] != PCIIO_VENDOR_ID_NONE)) {
+		/* soemthing in slot 5 and 7 */
+		rtn_val = pcibr_alloc_all_rrbs(vhdl, 1, 2,1, 1,0, 3,0, 2,0);
+	}
+	else if (vendor_list[5] != PCIIO_VENDOR_ID_NONE) {
+		/* soemthing in slot 5 but not 7 */
+		rtn_val = pcibr_alloc_all_rrbs(vhdl, 1, 3,1, 2,0, 3,0, 0,0);
+	}
+	else if (vendor_list[7] != PCIIO_VENDOR_ID_NONE) {
+		/* soemthing in slot 7 but not 5 */
+		rtn_val = pcibr_alloc_all_rrbs(vhdl, 1, 3,1, 2,0, 0,0, 3,0);
+	}
+	else {
+		/* nothing in slot 5 or 7 */
+		rtn_val = pcibr_alloc_all_rrbs(vhdl, 1, 4,1, 4,0, 0,0, 0,0);
+	}
+#ifndef CONFIG_IA64_SGI_IO
+	if (rtn_val)
+		cmn_err(CE_WARN, "sn00_rrb_alloc: pcibr_alloc_all_rrbs failed");
+#endif
+}
+
+
+/*
+ * Initialize all I/O devices.  Starting closest to nodes, probe and
+ * initialize outward.
+ */
+void
+init_all_devices(void)
+{
+	/* Governor on init threads..bump up when safe 
+	 * (beware many devfs races) 
+	 */
+#ifndef CONFIG_IA64_SGI_IO
+	int io_init_node_threads = 2;	
+#endif
+	cnodeid_t cnodeid, active;
+
+	init_MUTEX(&io_init_sema);
+
+
+	active = 0;
+	for (cnodeid = 0; cnodeid < maxnodes; cnodeid++) {
+#ifndef CONFIG_IA64_SGI_IO
+		char thread_name[16];
+		extern int io_init_pri;
+
+		/*
+		 * Spawn a service thread for each node to initialize all
+		 * I/O on that node.  Each thread attempts to bind itself 
+		 * to the node whose I/O it's initializing.
+		 */
+		sprintf(thread_name, "IO_init[%d]", cnodeid);
+
+		(void)sthread_create(thread_name, 0, IOINIT_STKSZ, 0,
+			io_init_pri, KT_PS, (st_func_t *)io_init_node,
+			(void *)(long)cnodeid, 0, 0, 0);
+#else
+                printk("init_all_devices: Calling io_init_node() for cnode %d\n", cnodeid);
+                io_init_node(cnodeid);
+
+		printk("init_all_devices: Done io_init_node() for cnode %d\n", cnodeid);
+
+#endif /* !CONFIG_IA64_SGI_IO */
+
+
+		/* Limit how many nodes go at once, to not overload hwgraph */
+		/* TBD: Should timeout */
+#ifdef AA_DEBUG
+		printk("started thread for cnode %d\n", cnodeid);
+#endif
+#ifdef LINUX_KERNEL_THREADS
+		active++;
+		if (io_init_node_threads && 
+			active >= io_init_node_threads) {
+			down(&io_init_sema);
+			active--;
+		}
+#endif /* LINUX_KERNEL_THREADS */
+	}
+
+#ifdef LINUX_KERNEL_THREADS
+	/* Wait until all IO_init threads are done */
+
+	while (active > 0) {
+#ifdef AA_DEBUG
+	    printk("waiting, %d still active\n", active);
+#endif
+	    sema(&io_init_sema);
+	    active--;
+	}
+
+#endif /* LINUX_KERNEL_THREADS */
+
+	for (cnodeid = 0; cnodeid < maxnodes; cnodeid++)
+		/*
+	 	 * Update information generated by IO init.
+		 */
+		update_node_information(cnodeid);
+
+	baseio_ctlr_num_set();
+	/* Setup the system critical graph (which is a subgraph of the
+	 * main hwgraph). This information is useful during io error
+	 * handling.
+	 */
+	sys_critical_graph_init();
+
+#if HWG_PRINT
+	hwgraph_print();
+#endif
+
+}
+
+#define toint(x) ((int)(x) - (int)('0'))
+
+void
+devnamefromarcs(char *devnm)
+{
+	int 			val;
+	char 			tmpnm[MAXDEVNAME];
+	char 			*tmp1, *tmp2;
+	
+	val = strncmp(devnm, "dks", 3);
+	if (val != 0) 
+		return;
+	tmp1 = devnm + 3;
+	if (!isdigit(*tmp1))
+		return;
+
+	val = 0;
+	while (isdigit(*tmp1)) {
+		val = 10*val+toint(*tmp1);
+		tmp1++;
+	}
+
+	if(*tmp1 != 'd')
+		return;
+	else
+		tmp1++;
+
+	if ((val < 0) || (val >= NUM_BASE_IO_SCSI_CTLR)) {
+		int i;
+		int viable_found = 0;
+
+		printk("Only controller numbers 0..%d  are supported for\n", NUM_BASE_IO_SCSI_CTLR-1);
+		printk("prom \"root\" variables of the form dksXdXsX.\n");
+		printk("To use another disk you must use the full hardware graph path\n\n");
+		printk("Possible controller numbers for use in 'dksXdXsX' on this system: ");
+		for (i=0; i<NUM_BASE_IO_SCSI_CTLR; i++) {
+			if (base_io_scsi_ctlr_vhdl[i] != GRAPH_VERTEX_NONE) {
+				printk("%d ", i);
+				viable_found=1;
+			}
+		}
+		if (viable_found)
+			printk("\n");
+		else
+			printk("none found!\n");
+
+#ifndef CONFIG_IA64_SGI_IO
+		if (kdebug)
+			debug("ring");
+#endif
+		DELAY(15000000);
+		//prom_reboot();
+		panic("FIXME: devnamefromarcs: should call prom_reboot here.\n");
+		/* NOTREACHED */
+	}
+		
+	ASSERT(base_io_scsi_ctlr_vhdl[val] != GRAPH_VERTEX_NONE);
+	vertex_to_name(base_io_scsi_ctlr_vhdl[val],
+		       tmpnm,
+		       MAXDEVNAME);
+	tmp2 = 	tmpnm + strlen(tmpnm);
+	strcpy(tmp2, __DEVSTR2);
+	tmp2 += strlen(__DEVSTR2);
+	while (*tmp1 != 's') {
+		if((*tmp2++ = *tmp1++) == '\0')
+			return;
+	}	
+	tmp1++;
+	strcpy(tmp2, __DEVSTR3);
+	tmp2 += strlen(__DEVSTR3);
+	while ( (*tmp2++ = *tmp1++) )
+		;
+	tmp2--;
+	*tmp2++ = '/';
+	strcpy(tmp2, EDGE_LBL_BLOCK);
+	strcpy(devnm,tmpnm);
+}
diff --git a/arch/ia64/sn/io/module.c b/arch/ia64/sn/io/module.c
new file mode 100644
index 000000000..7a2d5f0e2
--- /dev/null
+++ b/arch/ia64/sn/io/module.c
@@ -0,0 +1,311 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/cmn_err.h>
+#include <asm/sn/xtalk/xbow.h>
+#include <asm/sn/pci/bridge.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/sn1/hubdev.h>
+#include <asm/sn/module.h>
+#include <asm/sn/pci/pcibr.h>
+#include <asm/sn/xtalk/xswitch.h>
+#include <asm/sn/nodepda.h>
+
+
+#define LDEBUG	1	
+
+#define DPRINTF		if (LDEBUG) printk
+#define printf		printk
+
+module_t	       *modules[MODULE_MAX];
+int			nummodules;
+
+#define SN00_SERIAL_FUDGE	0x3b1af409d513c2
+#define SN0_SERIAL_FUDGE	0x6e
+
+void
+encode_int_serial(uint64_t src,uint64_t *dest)
+{
+    uint64_t val;
+    int i;
+
+    val = src + SN00_SERIAL_FUDGE;
+
+
+    for (i = 0; i < sizeof(long long); i++) {
+	((char*)dest)[i] =
+	    ((char*)&val)[sizeof(long long)/2 +
+			 ((i%2) ? ((i/2 * -1) - 1) : (i/2))];
+    }
+}
+
+
+void
+decode_int_serial(uint64_t src, uint64_t *dest)
+{
+    uint64_t val;
+    int i;
+
+    for (i = 0; i < sizeof(long long); i++) {
+	((char*)&val)[sizeof(long long)/2 +
+		     ((i%2) ? ((i/2 * -1) - 1) : (i/2))] =
+	    ((char*)&src)[i];
+    }
+
+    *dest = val - SN00_SERIAL_FUDGE;
+}
+
+
+void
+encode_str_serial(const char *src, char *dest)
+{
+    int i;
+
+    for (i = 0; i < MAX_SERIAL_NUM_SIZE; i++) {
+
+	dest[i] = src[MAX_SERIAL_NUM_SIZE/2 +
+		     ((i%2) ? ((i/2 * -1) - 1) : (i/2))] +
+	    SN0_SERIAL_FUDGE;
+    }
+}
+
+void
+decode_str_serial(const char *src, char *dest)
+{
+    int i;
+
+    for (i = 0; i < MAX_SERIAL_NUM_SIZE; i++) {
+	dest[MAX_SERIAL_NUM_SIZE/2 +
+	    ((i%2) ? ((i/2 * -1) - 1) : (i/2))] = src[i] -
+	    SN0_SERIAL_FUDGE;
+    }
+}
+
+
+module_t *module_lookup(moduleid_t id)
+{
+    int			i;
+
+    DPRINTF("module_lookup: id=%d\n", id);
+
+    for (i = 0; i < nummodules; i++)
+	if (modules[i]->id == id) {
+	    DPRINTF("module_lookup: found m=0x%p\n", modules[i]);
+	    return modules[i];
+	}
+
+    return NULL;
+}
+
+/*
+ * module_add_node
+ *
+ *   The first time a new module number is seen, a module structure is
+ *   inserted into the module list in order sorted by module number
+ *   and the structure is initialized.
+ *
+ *   The node number is added to the list of nodes in the module.
+ */
+
+module_t *module_add_node(moduleid_t id, cnodeid_t n)
+{
+    module_t	       *m;
+    int			i;
+
+    DPRINTF("module_add_node: id=%x node=%d\n", id, n);
+
+    if ((m = module_lookup(id)) == 0) {
+#ifndef CONFIG_IA64_SGI_IO
+	m = kmem_zalloc_node(sizeof (module_t), KM_NOSLEEP, n);
+#else
+	m = kmalloc(sizeof (module_t), GFP_KERNEL);
+	memset(m, 0 , sizeof(module_t));
+	printk("Module nodecnt = %d\n", m->nodecnt); 
+#endif
+	ASSERT_ALWAYS(m);
+
+	DPRINTF("module_add_node: m=0x%p\n", m);
+
+	m->id = id;
+	spin_lock_init(&m->lock);
+
+	init_MUTEX_LOCKED(&m->thdcnt);
+
+printk("Set elsc to 0x%p on node %d\n", &m->elsc, get_nasid());
+
+set_elsc(&m->elsc);
+	elsc_init(&m->elsc, COMPACT_TO_NASID_NODEID(n));
+	spin_lock_init(&m->elsclock);
+
+	/* Insert in sorted order by module number */
+
+	for (i = nummodules; i > 0 && modules[i - 1]->id > id; i--)
+	    modules[i] = modules[i - 1];
+
+	modules[i] = m;
+	nummodules++;
+    }
+
+    m->nodes[m->nodecnt++] = n;
+
+printk("module_add_node: module %x now has %d nodes\n", id, m->nodecnt);
+    DPRINTF("module_add_node: module %x now has %d nodes\n", id, m->nodecnt);
+
+    return m;
+}
+
+int module_probe_snum(module_t *m, nasid_t nasid)
+{
+    lboard_t	       *board;
+    klmod_serial_num_t *comp;
+
+    board = find_lboard((lboard_t *) KL_CONFIG_INFO(nasid),
+			KLTYPE_MIDPLANE8);
+
+    if (! board || KL_CONFIG_DUPLICATE_BOARD(board))
+	return 0;
+
+    comp = GET_SNUM_COMP(board);
+
+    if (comp) {
+#if LDEBUG
+	    int i;
+
+	    printf("********found module with id %x and string", m->id);
+
+	    for (i = 0; i < MAX_SERIAL_NUM_SIZE; i++)
+		printf(" %x ", comp->snum.snum_str[i]);
+
+	    printf("\n");	/* Fudged string is not ASCII */
+#endif
+
+	    if (comp->snum.snum_str[0] != '\0') {
+		bcopy(comp->snum.snum_str,
+		      m->snum.snum_str,
+		      MAX_SERIAL_NUM_SIZE);
+		m->snum_valid = 1;
+	    }
+    }
+
+    if (m->snum_valid)
+	return 1;
+    else {
+#ifndef CONFIG_IA64_SGI_IO
+	cmn_err(CE_WARN | CE_MAINTENANCE,
+		"Invalid serial number for module %d, "
+		"possible missing or invalid NIC.", m->id);
+#else
+	printk("Invalid serial number for module %d, "
+		"possible missing or invalid NIC.", m->id);
+#endif
+	return 0;
+    }
+}
+
+void
+io_module_init(void)
+{
+    cnodeid_t		node;
+    lboard_t	       *board;
+    nasid_t		nasid;
+    int			nserial;
+    module_t	       *m;
+
+    DPRINTF("*******module_init\n");
+
+    nserial = 0;
+
+    for (node = 0; node < numnodes; node++) {
+	nasid = COMPACT_TO_NASID_NODEID(node);
+
+	board = find_lboard((lboard_t *) KL_CONFIG_INFO(nasid),
+			    KLTYPE_IP27);
+	ASSERT(board);
+
+	m = module_add_node(board->brd_module, node);
+
+	if (! m->snum_valid && module_probe_snum(m, nasid))
+	    nserial++;
+    }
+
+    DPRINTF("********found total of %d serial numbers in the system\n",
+	    nserial);
+
+    if (nserial == 0)
+	cmn_err(CE_WARN, "No serial number found.");
+}
+
+#ifdef BRINGUP
+elsc_t *Elsc[100];
+
+void
+set_elsc(elsc_t *p)
+{
+      Elsc[get_nasid()] = p;
+}
+#endif
+
+elsc_t *get_elsc(void)
+{
+#ifdef BRINGUP
+return(Elsc[get_nasid()]);
+#else
+	if ( NODEPDA(get_nasid())->module == (module_t *)0 ) {
+		printf("get_elsc() for nasd %d fails\n", get_nasid());
+//		return((elsc_t *)0);
+	}
+	return &NODEPDA(get_nasid())->module->elsc;
+
+//	return &NODEPDA(NASID_TO_COMPACT_NODEID(0))->module->elsc;
+#endif
+}
+
+int
+get_kmod_info(cmoduleid_t cmod, module_info_t *mod_info)
+{
+    int i;
+
+    if (cmod < 0 || cmod >= nummodules)
+	return EINVAL;
+
+    if (! modules[cmod]->snum_valid)
+	return ENXIO;
+
+    mod_info->mod_num = modules[cmod]->id;
+    {
+	char temp[MAX_SERIAL_NUM_SIZE];
+
+	decode_str_serial(modules[cmod]->snum.snum_str, temp);
+
+	/* if this is an invalid serial number return an error */
+	if (temp[0] != 'K')
+	    return ENXIO;
+
+	mod_info->serial_num = 0;
+
+	for (i = 0; i < MAX_SERIAL_NUM_SIZE && temp[i] != '\0'; i++) {
+	    mod_info->serial_num <<= 4;
+	    mod_info->serial_num |= (temp[i] & 0xf);
+
+	    mod_info->serial_str[i] = temp[i];
+	}
+
+	mod_info->serial_str[i] = '\0';
+    }
+
+    return 0;
+}
diff --git a/arch/ia64/sn/io/pci.c b/arch/ia64/sn/io/pci.c
new file mode 100644
index 000000000..e0745c91f
--- /dev/null
+++ b/arch/ia64/sn/io/pci.c
@@ -0,0 +1,306 @@
+/* 
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SNI64 specific PCI support for SNI IO.
+ *
+ * Copyright (C) 1997, 1998, 2000 Colin Ngam
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/pci.h>
+#include <asm/sn/types.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/cmn_err.h>
+#include <asm/sn/iobus.h>
+#include <asm/sn/iograph.h>
+#include <asm/param.h>
+#include <asm/sn/pio.h>
+#include <asm/sn/xtalk/xwidget.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/pci/pciio.h>
+#include <asm/sn/pci/pcibr.h>
+#include <asm/sn/pci/pcibr_private.h>
+#include <asm/sn/pci/bridge.h>
+
+#ifdef DEBUG_CONFIG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+
+
+#ifdef CONFIG_PCI
+
+extern devfs_handle_t pci_bus_to_vertex(unsigned char);
+extern devfs_handle_t devfn_to_vertex(unsigned char bus, unsigned char devfn);
+
+/*
+ * snia64_read_config_byte - Read a byte from the config area of the device.
+ */
+static int snia64_read_config_byte (struct pci_dev *dev,
+                                   int where, unsigned char *val)
+{
+	unsigned long res = 0;
+	unsigned size = 1;
+	devfs_handle_t device_vertex;
+
+	if ( (dev == (struct pci_dev *)0) || (val == (unsigned char *)0) ) {
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	device_vertex = devfn_to_vertex(dev->bus->number, dev->devfn);
+	if (!device_vertex) {
+		DBG("%s : nonexistent device: bus= 0x%x  slot= 0x%x  func= 0x%x\n", 
+		__FUNCTION__, dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+		return(-1);
+	}
+	res = pciio_config_get(device_vertex, (unsigned) where, size);
+	*val = (unsigned char) res;
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * snia64_read_config_word - Read 2 bytes from the config area of the device.
+ */
+static int snia64_read_config_word (struct pci_dev *dev,
+                                   int where, unsigned short *val)
+{
+	unsigned long res = 0;
+	unsigned size = 2; /* 2 bytes */
+	devfs_handle_t device_vertex;
+
+	if ( (dev == (struct pci_dev *)0) || (val == (unsigned short *)0) ) {
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	device_vertex = devfn_to_vertex(dev->bus->number, dev->devfn);
+	if (!device_vertex) {
+		DBG("%s : nonexistent device: bus= 0x%x  slot= 0x%x  func= 0x%x\n", 
+		__FUNCTION__, dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+		return(-1);
+	}
+	res = pciio_config_get(device_vertex, (unsigned) where, size);
+	*val = (unsigned short) res;
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * snia64_read_config_dword - Read 4 bytes from the config area of the device.
+ */
+static int snia64_read_config_dword (struct pci_dev *dev,
+                                    int where, unsigned int *val)
+{
+	unsigned long res = 0;
+	unsigned size = 4; /* 4 bytes */
+	devfs_handle_t device_vertex;
+
+	if (where & 3) {
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+	if ( (dev == (struct pci_dev *)0) || (val == (unsigned int *)0) ) {
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	device_vertex = devfn_to_vertex(dev->bus->number, dev->devfn);
+	if (!device_vertex) {
+		DBG("%s : nonexistent device: bus= 0x%x  slot= 0x%x  func= 0x%x\n", 
+		__FUNCTION__, dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+		return(-1);
+	}
+	res = pciio_config_get(device_vertex, (unsigned) where, size);
+	*val = (unsigned int) res;
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * snia64_write_config_byte - Writes 1 byte to the config area of the device.
+ */
+static int snia64_write_config_byte (struct pci_dev *dev,
+                                    int where, unsigned char val)
+{
+	devfs_handle_t device_vertex;
+
+	if ( dev == (struct pci_dev *)0 ) {
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	/* 
+	 * if it's an IOC3 then we bail out, we special
+	 * case them with pci_fixup_ioc3
+	 */
+	if (dev->vendor == PCI_VENDOR_ID_SGI && 
+	    dev->device == PCI_DEVICE_ID_SGI_IOC3 )
+		return PCIBIOS_SUCCESSFUL;
+
+	device_vertex = devfn_to_vertex(dev->bus->number, dev->devfn);
+	if (!device_vertex) {
+		DBG("%s : nonexistent device: bus= 0x%x  slot= 0x%x  func= 0x%x\n", 
+		__FUNCTION__, dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+		return(-1);
+	}
+	pciio_config_set( device_vertex, (unsigned)where, 1, (uint64_t) val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * snia64_write_config_word - Writes 2 bytes to the config area of the device.
+ */
+static int snia64_write_config_word (struct pci_dev *dev,
+                                    int where, unsigned short val)
+{
+	devfs_handle_t device_vertex = NULL;
+
+	if (where & 1) {
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+	if ( dev == (struct pci_dev *)0 ) {
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	/* 
+	 * if it's an IOC3 then we bail out, we special
+	 * case them with pci_fixup_ioc3
+	 */
+	if (dev->vendor == PCI_VENDOR_ID_SGI && 
+	    dev->device == PCI_DEVICE_ID_SGI_IOC3)
+		return PCIBIOS_SUCCESSFUL;
+
+	device_vertex = devfn_to_vertex(dev->bus->number, dev->devfn);
+	if (!device_vertex) {
+		DBG("%s : nonexistent device: bus= 0x%x  slot= 0x%x  func= 0x%x\n", 
+		__FUNCTION__, dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+		return(-1);
+	}
+	pciio_config_set( device_vertex, (unsigned)where, 2, (uint64_t) val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * snia64_write_config_dword - Writes 4 bytes to the config area of the device.
+ */
+static int snia64_write_config_dword (struct pci_dev *dev,
+                                     int where, unsigned int val)
+{
+	devfs_handle_t device_vertex;
+
+	if (where & 3) {
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+	if ( dev == (struct pci_dev *)0 ) {
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+	/* 
+	 * if it's an IOC3 then we bail out, we special
+	 * case them with pci_fixup_ioc3
+	 */
+	if (dev->vendor == PCI_VENDOR_ID_SGI && 
+	    dev->device == PCI_DEVICE_ID_SGI_IOC3)
+		return PCIBIOS_SUCCESSFUL;
+
+	device_vertex = devfn_to_vertex(dev->bus->number, dev->devfn);
+	if (!device_vertex) {
+		DBG("%s : nonexistent device: bus= 0x%x  slot= 0x%x  func= 0x%x\n", 
+		__FUNCTION__, dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+		return(-1);
+	}
+	pciio_config_set( device_vertex, (unsigned)where, 4, (uint64_t) val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops snia64_pci_ops = {
+	snia64_read_config_byte,
+	snia64_read_config_word,
+	snia64_read_config_dword,
+	snia64_write_config_byte,
+	snia64_write_config_word,
+	snia64_write_config_dword
+};
+
+/*
+ * snia64_pci_find_bios - SNIA64 pci_find_bios() platform specific code.
+ */
+void __init
+sn1_pci_find_bios(void)
+{
+	extern struct pci_ops pci_conf;
+	/*
+	 * Go initialize our IO Infrastructure ..
+	 */
+	extern void sgi_master_io_infr_init(void);
+
+	sgi_master_io_infr_init();
+
+#ifdef BRINGUP
+	if ( IS_RUNNING_ON_SIMULATOR() )
+		return;
+#endif
+	/* sn1_io_infrastructure_init(); */
+	pci_conf = snia64_pci_ops;
+}
+
+void
+pci_fixup_ioc3(struct pci_dev *d)
+{
+        int 		i;
+	int 		slot;
+	unsigned long 	res = 0;
+	unsigned int 	val, size;
+	int 		ret;
+	u_short 	command;
+
+	devfs_handle_t 	device_vertex;
+	devfs_handle_t	bridge_vhdl = pci_bus_to_vertex(d->bus->number);
+	pcibr_soft_t 	pcibr_soft = (pcibr_soft_t) hwgraph_fastinfo_get(bridge_vhdl);
+	devfs_handle_t  xconn_vhdl = pcibr_soft->bs_conn;
+	bridge_t 	*bridge = pcibr_soft->bs_base;
+	bridgereg_t 	devreg;
+
+        /* IOC3 only decodes 0x20 bytes of the config space, reading
+	 * beyond that is relatively benign but writing beyond that
+	 * (especially the base address registers) will shut down the
+	 * pci bus...so avoid doing so.
+	 * NOTE: this means we can't program the intr_pin into the device,
+	 *       currently we hack this with special code in 
+	 *	 sgi_pci_intr_support()
+	 */
+        printk("pci_fixup_ioc3: Fixing base addresses for ioc3 device %s\n", d->slot_name);
+
+	/* I happen to know from the spec that the ioc3 needs only 0xfffff 
+	 * The standard pci trick of writing ~0 to the baddr and seeing
+	 * what comes back doesn't work with the ioc3
+	 */
+	size = 0xfffff;
+	d->resource[0].end = (unsigned long) d->resource[0].start + (unsigned long) size;
+
+	/*
+	 * Zero out the resource structure .. because we did not go through 
+	 * the normal PCI Infrastructure Init, garbbage are left in these 
+	 * fileds.
+	 */
+        for (i = 1; i <= PCI_ROM_RESOURCE; i++) {
+                d->resource[i].start = 0UL;
+                d->resource[i].end = 0UL;
+                d->resource[i].flags = 0UL;
+        }
+
+	/*
+	 * Hardcode Device 4 register(IOC3 is in Slot 4) to set the 
+	 * DEV_DIRECT bit.  This will not work if IOC3 is not on Slot 
+	 * 4.
+	 */
+	*(volatile u32 *)0xc0000a000f000220 |= 0x90000;
+
+        d->subsystem_vendor = 0;
+        d->subsystem_device = 0;
+
+}
+
+#endif /* CONFIG_PCI */
diff --git a/arch/ia64/sn/io/pci_bus_cvlink.c b/arch/ia64/sn/io/pci_bus_cvlink.c
new file mode 100644
index 000000000..a90b6456f
--- /dev/null
+++ b/arch/ia64/sn/io/pci_bus_cvlink.c
@@ -0,0 +1,591 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <asm/sn/types.h>
+#include <asm/sn/hack.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/cmn_err.h>
+#include <asm/sn/iobus.h>
+#include <asm/sn/iograph.h>
+#include <asm/param.h>
+#include <asm/sn/pio.h>
+#include <asm/sn/xtalk/xwidget.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/agent.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/xtalk/xtalkaddrs.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/io.h>
+#include <asm/sn/pci/pci_bus_cvlink.h>
+
+#include <asm/sn/pci/pciio.h>
+// #include <sys/ql.h>
+#include <asm/sn/pci/pcibr.h>
+#include <asm/sn/pci/pcibr_private.h>
+extern int bridge_rev_b_data_check_disable;
+
+#define MAX_PCI_XWIDGET 256
+devfs_handle_t busnum_to_xwidget[MAX_PCI_XWIDGET];
+nasid_t busnum_to_nid[MAX_PCI_XWIDGET];
+unsigned char num_bridges;
+static int done_probing = 0;
+
+static int pci_bus_map_create(devfs_handle_t xtalk);
+devfs_handle_t devfn_to_vertex(unsigned char busnum, unsigned int devfn);
+
+/*
+ * pci_bus_cvlink_init() - To be called once during initialization before 
+ *	SGI IO Infrastructure init is called.
+ */
+void
+pci_bus_cvlink_init(void)
+{
+
+	memset(busnum_to_xwidget, 0x0, sizeof(devfs_handle_t) * MAX_PCI_XWIDGET);
+	memset(busnum_to_nid, 0x0, sizeof(nasid_t) * MAX_PCI_XWIDGET);
+	num_bridges = 0;
+}
+
+/*
+ * pci_bus_to_vertex() - Given a logical Linux Bus Number returns the associated 
+ *	pci bus vertex from the SGI IO Infrastructure.
+ */
+devfs_handle_t
+pci_bus_to_vertex(unsigned char busnum)
+{
+
+	devfs_handle_t	xwidget;
+	devfs_handle_t	pci_bus = NULL;
+
+
+	/*
+	 * First get the xwidget vertex.
+	 */
+	xwidget = busnum_to_xwidget[busnum];
+	if (!xwidget)
+		return (NULL);
+
+	/*
+	 * Use devfs to get the pci vertex from xwidget.
+	 */
+	if (hwgraph_traverse(xwidget, EDGE_LBL_PCI, &pci_bus) != GRAPH_SUCCESS) {
+		if (!pci_bus) {
+			printk("pci_bus_to_vertex: Cannot find pci bus for given bus number %d\n", busnum);
+			return (NULL);
+		}
+	}
+
+	return(pci_bus);
+}
+
+/*
+ * devfn_to_vertex() - returns the vertex of the device given the bus, slot, 
+ *	and function numbers.
+ */
+devfs_handle_t
+devfn_to_vertex(unsigned char busnum, unsigned int devfn)
+{
+
+	int slot = 0;
+	int func = 0;
+	char	name[16];
+	devfs_handle_t  pci_bus = NULL;
+	devfs_handle_t	device_vertex = NULL;
+
+	/*
+	 * Go get the pci bus vertex.
+	 */
+	pci_bus = pci_bus_to_vertex(busnum);
+	if (!pci_bus) {
+		/*
+		 * During probing, the Linux pci code invents non existant
+		 * bus numbers and pci_dev structures and tries to access
+		 * them to determine existance. Don't crib during probing.
+		 */
+		if (done_probing)
+			printk("devfn_to_vertex: Invalid bus number %d given.\n", busnum);
+		return(NULL);
+	}
+
+
+	/*
+	 * Go get the slot&function vertex.
+	 * Should call pciio_slot_func_to_name() when ready.
+	 */
+	slot = PCI_SLOT(devfn);
+	func = PCI_FUNC(devfn);
+
+	if (func == 0)
+        	sprintf(name, "%d", slot);
+	else
+		sprintf(name, "%d%c", slot, 'a'+func);
+
+	if (hwgraph_traverse(pci_bus, name, &device_vertex) != GRAPH_SUCCESS) {
+		if (!device_vertex) {
+			printk("devfn_to_vertex: Unable to get slot&func %s from pci vertex 0x%p\n", name, pci_bus);
+			return(NULL);
+		}
+	}
+
+	return(device_vertex);
+}
+
+/*
+ * Most drivers currently do not properly tell the arch specific pci dma
+ * interfaces whether they can handle A64. Here is where we privately
+ * keep track of this.
+ */
+static void __init
+set_sn1_pci64(struct pci_dev *dev)
+{
+	unsigned short vendor = dev->vendor;
+	unsigned short device = dev->device;
+
+	if (vendor == PCI_VENDOR_ID_QLOGIC) {
+		if ((device == PCI_DEVICE_ID_QLOGIC_ISP2100) ||
+				(device == PCI_DEVICE_ID_QLOGIC_ISP2200)) {
+			SET_PCIA64(dev);
+			return;
+		}
+	}
+
+	if (vendor == PCI_VENDOR_ID_SGI) {
+		if (device == PCI_DEVICE_ID_SGI_IOC3) {
+			SET_PCIA64(dev);
+			return;
+		}
+	}
+
+}
+
+/*
+ * sn1_pci_fixup() - This routine is called when platform_pci_fixup() is 
+ *	invoked at the end of pcibios_init() to link the Linux pci 
+ *	infrastructure to SGI IO Infrasturcture - ia64/kernel/pci.c
+ *
+ *	Other platform specific fixup can also be done here.
+ */
+void
+sn1_pci_fixup(int arg)
+{
+	struct list_head *ln;
+	struct pci_bus *pci_bus = NULL;
+	struct pci_dev *device_dev = NULL;
+	struct sn1_widget_sysdata *widget_sysdata;
+	struct sn1_device_sysdata *device_sysdata;
+	extern void sn1_pci_find_bios(void);
+
+
+unsigned long   res;
+
+	if (arg == 0) {
+		sn1_pci_find_bios();
+		return;
+	}
+
+#if 0
+{
+        devfs_handle_t  bridge_vhdl = pci_bus_to_vertex(0);
+        pcibr_soft_t    pcibr_soft = (pcibr_soft_t) hwgraph_fastinfo_get(bridge_vhdl);
+	bridge_t        *bridge = pcibr_soft->bs_base;
+printk("Before Changing PIO Map Address:\n");
+        printk("pci_fixup_ioc3: Before devreg fixup\n");
+        printk("pci_fixup_ioc3: Devreg 0 0x%x\n", bridge->b_device[0].reg);
+        printk("pci_fixup_ioc3: Devreg 1 0x%x\n", bridge->b_device[1].reg);
+        printk("pci_fixup_ioc3: Devreg 2 0x%x\n", bridge->b_device[2].reg);
+        printk("pci_fixup_ioc3: Devreg 3 0x%x\n", bridge->b_device[3].reg);
+        printk("pci_fixup_ioc3: Devreg 4 0x%x\n", bridge->b_device[4].reg);
+        printk("pci_fixup_ioc3: Devreg 5 0x%x\n", bridge->b_device[5].reg);
+        printk("pci_fixup_ioc3: Devreg 6 0x%x\n", bridge->b_device[6].reg);
+        printk("pci_fixup_ioc3: Devreg 7 0x%x\n", bridge->b_device[7].reg);
+}
+#endif
+	done_probing = 1;
+
+	if ( IS_RUNNING_ON_SIMULATOR() ) {
+		printk("sn1_pci_fixup not supported on simulator.\n");
+		return;
+	}
+
+#ifdef REAL_HARDWARE
+
+	/*
+	 * Initialize the pci bus vertex in the pci_bus struct.
+	 */
+	for( ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
+		pci_bus = pci_bus_b(ln);
+		widget_sysdata = kmalloc(sizeof(struct sn1_widget_sysdata), 
+					GFP_KERNEL);
+		widget_sysdata->vhdl = pci_bus_to_vertex(pci_bus->number);
+		pci_bus->sysdata = (void *)widget_sysdata;
+	}
+
+	/*
+ 	 * set the root start and end so that drivers calling check_region()
+	 * won't see a conflict
+	 */
+	ioport_resource.start |= IO_SWIZ_BASE;
+	ioport_resource.end |= (HSPEC_SWIZ_BASE-1);
+	/*
+	 * Initialize the device vertex in the pci_dev struct.
+	 */
+	pci_for_each_dev(device_dev) {
+		unsigned int irq;
+		int idx;
+		u16 cmd;
+		devfs_handle_t vhdl;
+		unsigned long size;
+
+		if (device_dev->vendor == PCI_VENDOR_ID_SGI &&
+				device_dev->device == PCI_DEVICE_ID_SGI_IOC3) {
+			extern void pci_fixup_ioc3(struct pci_dev *d);
+			pci_fixup_ioc3(device_dev);
+		}
+
+		/* Set the device vertex */
+
+		device_sysdata = kmalloc(sizeof(struct sn1_device_sysdata),
+					GFP_KERNEL);
+		device_sysdata->vhdl = devfn_to_vertex(device_dev->bus->number, device_dev->devfn);
+		device_sysdata->isa64 = 0;
+		device_dev->sysdata = (void *) device_sysdata;
+		set_sn1_pci64(device_dev);
+		pci_read_config_word(device_dev, PCI_COMMAND, &cmd);
+
+		/*
+		 * Set the resources address correctly.  The assumption here 
+		 * is that the addresses in the resource structure has been
+		 * read from the card and it was set in the card by our
+		 * Infrastructure ..
+		 */
+		vhdl = device_sysdata->vhdl;
+		for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) {
+			size = 0;
+			size = device_dev->resource[idx].end -
+				device_dev->resource[idx].start;
+			if (size) {
+res = 0;
+res = pciio_config_get(vhdl, (unsigned) PCI_BASE_ADDRESS_0 + idx, 4);
+printk("Before pciio_pio_addr Base address %d = 0x%lx\n", idx, res);
+
+				printk(" Changing device %d:%d resource start address from 0x%lx", 
+				PCI_SLOT(device_dev->devfn),PCI_FUNC(device_dev->devfn),
+				device_dev->resource[idx].start);
+				device_dev->resource[idx].start = 
+				(unsigned long)pciio_pio_addr(vhdl, 0, 
+					PCIIO_SPACE_WIN(idx), 0, size, 0, PCIIO_BYTE_STREAM);
+			}
+			else
+				continue;
+
+			device_dev->resource[idx].end = 
+				device_dev->resource[idx].start + size;
+
+			/*
+			 * Adjust the addresses to go to the SWIZZLE ..
+			 */
+			device_dev->resource[idx].start = 
+				device_dev->resource[idx].start & 0xfffff7ffffffffff;
+			device_dev->resource[idx].end = 
+				device_dev->resource[idx].end & 0xfffff7ffffffffff;
+			printk(" to 0x%lx\n", device_dev->resource[idx].start);
+res = 0;
+res = pciio_config_get(vhdl, (unsigned) PCI_BASE_ADDRESS_0 + idx, 4);
+printk("After pciio_pio_addr Base address %d = 0x%lx\n", idx, res);
+
+			if (device_dev->resource[idx].flags & IORESOURCE_IO)
+				cmd |= PCI_COMMAND_IO;
+			else if (device_dev->resource[idx].flags & IORESOURCE_MEM)
+				cmd |= PCI_COMMAND_MEMORY;
+		}
+		/*
+		 * Now handle the ROM resource ..
+		 */
+		size = device_dev->resource[PCI_ROM_RESOURCE].end -
+			device_dev->resource[PCI_ROM_RESOURCE].start;
+		printk(" Changing device %d:%d ROM resource start address from 0x%lx", 
+			PCI_SLOT(device_dev->devfn),PCI_FUNC(device_dev->devfn),
+			device_dev->resource[PCI_ROM_RESOURCE].start);
+		device_dev->resource[PCI_ROM_RESOURCE].start =
+			(unsigned long) pciio_pio_addr(vhdl, 0, PCIIO_SPACE_ROM, 0, 
+				size, 0, PCIIO_BYTE_STREAM);
+		device_dev->resource[PCI_ROM_RESOURCE].end =
+			device_dev->resource[PCI_ROM_RESOURCE].start + size;
+
+                /*
+                 * go through synergy swizzled space
+                 */
+		device_dev->resource[PCI_ROM_RESOURCE].start &= 0xfffff7ffffffffffUL;
+		device_dev->resource[PCI_ROM_RESOURCE].end   &= 0xfffff7ffffffffffUL;
+
+		/*
+		 * Update the Command Word on the Card.
+		 */
+		cmd |= PCI_COMMAND_MASTER; /* If the device doesn't support */
+					   /* bit gets dropped .. no harm */
+		pci_write_config_word(device_dev, PCI_COMMAND, cmd);
+
+		printk("  to 0x%lx\n", device_dev->resource[PCI_ROM_RESOURCE].start);
+
+		/*
+		 * Set the irq correctly.
+		 * Bits 7:3 = slot
+		 * Bits 2:0 = function
+		 *
+		 * In the IRQ we will have:
+		 *	Bits 24:16 = bus number
+		 *	Bits 15:8 = slot|func number
+		 */
+		irq = 0;
+		irq = (irq | (device_dev->devfn << 8));
+		irq = (irq | ( (device_dev->bus->number & 0xff) << 16) );
+		device_dev->irq = irq;
+printk("sn1_pci_fixup: slot= %d  fn= %d  vendor= 0x%x  device= 0x%x  irq= 0x%x\n",
+PCI_SLOT(device_dev->devfn),PCI_FUNC(device_dev->devfn),device_dev->vendor,
+device_dev->device, device_dev->irq);
+
+	}
+#endif	/* REAL_HARDWARE */
+#if 0
+
+{
+        devfs_handle_t  bridge_vhdl = pci_bus_to_vertex(0);
+        pcibr_soft_t    pcibr_soft = (pcibr_soft_t) hwgraph_fastinfo_get(bridge_vhdl);
+        bridge_t        *bridge = pcibr_soft->bs_base;
+
+printk("After Changing PIO Map Address:\n");
+        printk("pci_fixup_ioc3: Before devreg fixup\n");
+        printk("pci_fixup_ioc3: Devreg 0 0x%x\n", bridge->b_device[0].reg);
+        printk("pci_fixup_ioc3: Devreg 1 0x%x\n", bridge->b_device[1].reg);
+        printk("pci_fixup_ioc3: Devreg 2 0x%x\n", bridge->b_device[2].reg);
+        printk("pci_fixup_ioc3: Devreg 3 0x%x\n", bridge->b_device[3].reg);
+        printk("pci_fixup_ioc3: Devreg 4 0x%x\n", bridge->b_device[4].reg);
+        printk("pci_fixup_ioc3: Devreg 5 0x%x\n", bridge->b_device[5].reg);
+        printk("pci_fixup_ioc3: Devreg 6 0x%x\n", bridge->b_device[6].reg);
+        printk("pci_fixup_ioc3: Devreg 7 0x%x\n", bridge->b_device[7].reg);
+}
+#endif
+
+}
+
+/*
+ * pci_bus_map_create() - Called by pci_bus_to_hcl_cvlink() to finish the job.
+ */
+static int 
+pci_bus_map_create(devfs_handle_t xtalk)
+{
+
+	devfs_handle_t master_node_vertex = NULL;
+	devfs_handle_t xwidget = NULL;
+	devfs_handle_t pci_bus = NULL;
+	hubinfo_t hubinfo = NULL;
+	xwidgetnum_t widgetnum;
+	char pathname[128];
+	graph_error_t rv;
+
+	/*
+	 * Loop throught this vertex and get the Xwidgets ..
+	 */
+	for (widgetnum = HUB_WIDGET_ID_MIN; widgetnum <= HUB_WIDGET_ID_MAX; widgetnum++) {
+		sprintf(pathname, "%d", widgetnum);
+		xwidget = NULL;
+		
+		rv = hwgraph_traverse(xtalk, pathname, &xwidget);
+		if ( (rv != GRAPH_SUCCESS) ) {
+			if (!xwidget)
+				continue;
+		}
+
+		sprintf(pathname, "%d/"EDGE_LBL_PCI, widgetnum);
+		pci_bus = NULL;
+		if (hwgraph_traverse(xtalk, pathname, &pci_bus) != GRAPH_SUCCESS)
+			if (!pci_bus)
+				continue;
+
+		/*
+		 * Assign the correct bus number and also the nasid of this 
+		 * pci Xwidget.
+		 * 
+		 * Should not be any race here ...
+		 */
+		num_bridges++;
+		busnum_to_xwidget[num_bridges - 1] = xwidget;
+
+		/*
+		 * Get the master node and from there get the NASID.
+		 */
+		master_node_vertex = device_master_get(xwidget);
+		if (!master_node_vertex) {
+			printk(" **** pci_bus_map_create: Unable to get .master for vertex 0x%p **** \n", xwidget);
+		}
+	
+		hubinfo_get(master_node_vertex, &hubinfo);
+		if (!hubinfo) {
+			printk(" **** pci_bus_map_create: Unable to get hubinfo for master node vertex 0x%p ****\n", master_node_vertex);
+			return(1);
+		} else {
+			busnum_to_nid[num_bridges - 1] = hubinfo->h_nasid;
+		}
+
+		printk("pci_bus_map_create: Found Hub nasid %d PCI Xwidget 0x%p  widgetnum= %d\n", hubinfo->h_nasid, xwidget, widgetnum);
+	}
+
+        return(0);
+}
+
+/*
+ * pci_bus_to_hcl_cvlink() - This routine is called after SGI IO Infrastructure   
+ *      initialization has completed to set up the mappings between Xbridge
+ *      and logical pci bus numbers.  We also set up the NASID for each of these
+ *      xbridges.
+ *
+ *      Must be called before pci_init() is invoked.
+ */
+int
+pci_bus_to_hcl_cvlink(void) 
+{
+
+	devfs_handle_t devfs_hdl = NULL;
+	devfs_handle_t module_comp = NULL;
+	devfs_handle_t node = NULL;
+	devfs_handle_t xtalk = NULL;
+	graph_vertex_place_t placeptr = EDGE_PLACE_WANT_REAL_EDGES;
+	int rv = 0;
+	char name[256];
+
+	/*
+	 * Iterate throught each xtalk links in the system ..
+	 * /hw/module/001c01/node/xtalk/ 8|9|10|11|12|13|14|15 
+	 *
+	 * /hw/module/001c01/node/xtalk/15 -> /hw/module/001c01/Ibrick/xtalk/15
+	 *
+	 * What if it is not pci?
+	 */
+	devfs_hdl = hwgraph_path_to_vertex("/dev/hw/module");
+
+	/*
+	 * Loop throught this directory "/devfs/hw/module/" and get each 
+	 * of it's entry.
+	 */
+	while (1) {
+	
+		/* Get vertex of component /dev/hw/<module_number> */
+		memset((char *)name, '0', 256);
+		module_comp = NULL;
+		rv = hwgraph_edge_get_next(devfs_hdl, (char *)name, &module_comp, (uint *)&placeptr);
+		if ((rv == 0) && (module_comp)) {
+			/* Found a valid entry */
+			node = NULL;
+			rv = hwgraph_edge_get(module_comp, "node", &node);
+
+		} else {
+			printk("pci_bus_to_hcl_cvlink: No more Module Component.\n");
+			return(0);
+		}
+
+		if ( (rv != 0) || (!node) ){
+			printk("pci_bus_to_hcl_cvlink: Module Component does not have node vertex.\n");
+			continue;
+		} else {
+			xtalk = NULL;
+			rv = hwgraph_edge_get(node, "xtalk", &xtalk);
+			if ( (rv != 0) || (xtalk == NULL) ){
+				printk("pci_bus_to_hcl_cvlink: Node has no xtalk vertex.\n");
+				continue;
+			}
+		}
+
+		printk("pci_bus_to_hcl_cvlink: Found Module %s node vertex = 0x%p xtalk vertex = 0x%p\n", name, node, xtalk);
+		/*
+		 * Call routine to get the existing PCI Xwidget and create
+		 * the convenience link from "/devfs/hw/pci_bus/.."
+		 */
+		pci_bus_map_create(xtalk);
+	}
+
+	return(0);
+}
+
+/*
+ * sgi_pci_intr_support -
+ */
+int
+sgi_pci_intr_support (unsigned int requested_irq, device_desc_t *dev_desc,
+	devfs_handle_t *bus_vertex, pciio_intr_line_t *lines,
+	devfs_handle_t *device_vertex)
+
+{
+
+	unsigned int bus;
+	unsigned int devfn;
+	struct pci_dev *pci_dev;
+	unsigned char intr_pin = 0;
+	struct sn1_widget_sysdata *widget_sysdata;
+	struct sn1_device_sysdata *device_sysdata;
+
+	printk("sgi_pci_intr_support: Called with requested_irq 0x%x\n", requested_irq);
+
+	if (!dev_desc || !bus_vertex || !device_vertex) {
+		printk("sgi_pci_intr_support: Invalid parameter dev_desc 0x%p, bus_vertex 0x%p, device_vertex 0x%p\n", dev_desc, bus_vertex, device_vertex);
+		return(-1);
+	}
+
+	devfn = (requested_irq >> 8) & 0xff;
+	bus = (requested_irq >> 16) & 0xffff;
+	pci_dev = pci_find_slot(bus, devfn);
+	widget_sysdata = (struct sn1_widget_sysdata *)pci_dev->bus->sysdata;
+	*bus_vertex = widget_sysdata->vhdl;
+	device_sysdata = (struct sn1_device_sysdata *)pci_dev->sysdata;
+	*device_vertex = device_sysdata->vhdl;
+#if 0
+	{
+		int pos;
+		char dname[256];
+		pos = devfs_generate_path(*device_vertex, dname, 256);
+		printk("%s : path= %s pos %d\n", __FUNCTION__, &dname[pos], pos);
+	}
+#endif /* BRINGUP */
+
+
+	/*
+	 * Get the Interrupt PIN.
+	 */
+	pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intr_pin);
+	*lines = (pciio_intr_line_t)intr_pin;
+
+#ifdef BRINGUP
+	/*
+	 * ioc3 can't decode the PCI_INTERRUPT_PIN field of its config
+	 * space so we have to set it here
+	 */
+	if (pci_dev->vendor == PCI_VENDOR_ID_SGI &&
+	    pci_dev->device == PCI_DEVICE_ID_SGI_IOC3 ) {
+		*lines = 1;
+		printk("%s : IOC3 HACK: lines= %d\n", __FUNCTION__, *lines);
+	}
+#endif /* BRINGUP */
+
+	/* Not supported currently */
+	*dev_desc = NULL;
+
+	printk("sgi_pci_intr_support: Device Descriptor 0x%p, Bus Vertex 0x%p, Interrupt Pins 0x%x, Device Vertex 0x%p\n", *dev_desc, *bus_vertex, *lines, *device_vertex);
+
+	return(0);
+
+}
diff --git a/arch/ia64/sn/io/pci_dma.c b/arch/ia64/sn/io/pci_dma.c
new file mode 100644
index 000000000..cab036127
--- /dev/null
+++ b/arch/ia64/sn/io/pci_dma.c
@@ -0,0 +1,334 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Leo Dagum
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/devfs_fs_kernel.h>
+
+#ifndef LANGUAGE_C 
+#define LANGUAGE_C 99
+#endif
+#ifndef _LANGUAGE_C
+#define _LANGUAGE_C 99
+#endif
+#ifndef CONFIG_IA64_SGI_IO
+#define CONFIG_IA64_SGI_IO 99
+#endif
+
+#include <asm/io.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/pci/pcibr.h>
+#include <asm/sn/pci/pcibr_private.h>
+#include <asm/sn/iobus.h>
+#include <asm/sn/pci/pci_bus_cvlink.h>
+#include <asm/sn/types.h>
+#include <asm/sn/alenlist.h>
+
+/*
+ * this is REALLY ugly, blame it on gcc's lame inlining that we
+ * have to put procedures in header files
+ */
+#if LANGUAGE_C == 99
+#undef LANGUAGE_C
+#endif
+#if _LANGUAGE_C == 99
+#undef _LANGUAGE_C
+#endif
+#if CONFIG_IA64_SGI_IO == 99
+#undef CONFIG_IA64_SGI_IO
+#endif
+
+/*
+ * sn1 platform specific pci_alloc_consistent()
+ *
+ * this interface is meant for "command" streams, i.e. called only
+ * once for initializing a device, so we don't want prefetching or
+ * write gathering turned on, hence the PCIIO_DMA_CMD flag
+ */
+void *
+sn1_pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
+{
+        void *ret;
+        int gfp = GFP_ATOMIC;
+	devfs_handle_t    vhdl;
+	struct sn1_device_sysdata *device_sysdata;
+	paddr_t temp_ptr;
+
+	*dma_handle = (dma_addr_t) NULL;
+
+	/*
+	 * get vertex for the device
+	 */
+	device_sysdata = (struct sn1_device_sysdata *) hwdev->sysdata;
+	vhdl = device_sysdata->vhdl;
+
+        if ( ret = (void *)__get_free_pages(gfp, get_order(size)) ) {
+		memset(ret, 0, size);
+	} else {
+		return(NULL);
+	}
+
+	temp_ptr = (paddr_t) __pa(ret);
+	if (IS_PCIA64(hwdev)) {
+
+		/*
+		 * This device supports 64bits DMA addresses.
+		 */
+		*dma_handle = pciio_dmatrans_addr(vhdl, NULL, temp_ptr, size,
+			PCIBR_BARRIER | PCIIO_BYTE_STREAM | PCIIO_DMA_CMD
+			| PCIIO_DMA_A64 );
+		return (ret);
+	}
+
+	/*
+	 * Devices that supports 32 Bits upto 63 Bits DMA Address gets
+	 * 32 Bits DMA addresses.
+	 *
+	 * First try to get 32 Bit Direct Map Support.
+	 */
+	if (IS_PCI32G(hwdev)) {
+		*dma_handle = pciio_dmatrans_addr(vhdl, NULL, temp_ptr, size,
+			PCIBR_BARRIER | PCIIO_BYTE_STREAM | PCIIO_DMA_CMD);
+		if (dma_handle) {
+			return (ret);
+		} else {
+			/*
+			 * We need to map this request by using ATEs.
+			 */
+			printk("sn1_pci_alloc_consistent: 32Bits DMA Page Map support not available yet!");
+			BUG();
+		}
+	}
+
+	if (IS_PCI32L(hwdev)) {
+		/*
+		 * SNIA64 cannot support DMA Addresses smaller than 32 bits.
+		 */
+		return (NULL);
+	}
+
+        return NULL;
+}
+
+void
+sn1_pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+	free_pages((unsigned long) vaddr, get_order(size));
+}
+
+/*
+ * On sn1 we use the alt_address entry of the scatterlist to store
+ * the physical address corresponding to the given virtual address
+ */
+int
+sn1_pci_map_sg (struct pci_dev *hwdev,
+                        struct scatterlist *sg, int nents, int direction)
+{
+
+	int i;
+	devfs_handle_t	vhdl;
+	dma_addr_t dma_addr;
+	paddr_t temp_ptr;
+	struct sn1_device_sysdata *device_sysdata;
+
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	/*
+	 * Handle 64 bit cards.
+	 */
+	device_sysdata = (struct sn1_device_sysdata *) hwdev->sysdata;
+	vhdl = device_sysdata->vhdl;
+	for (i = 0; i < nents; i++, sg++) {
+		sg->orig_address = sg->address;
+		dma_addr = 0;
+		temp_ptr = (paddr_t) __pa(sg->address);
+
+		/*
+		 * Handle the most common case 64Bit cards.
+		 */
+		if (IS_PCIA64(hwdev)) {
+			dma_addr = (dma_addr_t) pciio_dmatrans_addr(vhdl, NULL,
+				temp_ptr, sg->length,
+				PCIBR_BARRIER | PCIIO_BYTE_STREAM |
+				PCIIO_DMA_CMD | PCIIO_DMA_A64 );
+			sg->address = (char *)dma_addr;
+/* printk("pci_map_sg: 64Bits hwdev %p DMA Address 0x%p alt_address 0x%p orig_address 0x%p length 0x%x\n", hwdev, sg->address, sg->alt_address, sg->orig_address, sg->length); */
+			continue;
+		}
+
+		/*
+		 * Handle 32Bits and greater cards.
+		 */
+		if (IS_PCI32G(hwdev)) {
+			dma_addr = (dma_addr_t) pciio_dmatrans_addr(vhdl, NULL,
+				temp_ptr, sg->length,
+				PCIBR_BARRIER | PCIIO_BYTE_STREAM |
+				PCIIO_DMA_CMD);
+			if (dma_addr) {
+				sg->address = (char *)dma_addr;
+/* printk("pci_map_single: 32Bit direct pciio_dmatrans_addr pcidev %p returns dma_addr 0x%lx\n", hwdev, dma_addr); */
+				continue;
+			} else {
+				/*
+				 * We need to map this request by using ATEs.
+				 */
+				printk("pci_map_single: 32Bits DMA Page Map support not available yet!");
+				BUG();
+
+			}
+		}
+	}
+
+	return nents;
+
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+void
+sn1_pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	for (i = 0; i < nelems; i++, sg++)
+		if (sg->orig_address != sg->address) {
+			/* phys_to_virt((dma_addr_t)sg->address | ~0x80000000); */
+			sg->address = sg->orig_address;
+			sg->orig_address = 0;
+		}
+}
+
+/*
+ * We map this to the one step pciio_dmamap_trans interface rather than
+ * the two step pciio_dmamap_alloc/pciio_dmamap_addr because we have
+ * no way of saving the dmamap handle from the alloc to later free
+ * (which is pretty much unacceptable).
+ *
+ * TODO: simplify our interface;
+ *       get rid of dev_desc and vhdl (seems redundant given a pci_dev);
+ *       figure out how to save dmamap handle so can use two step.
+ */
+dma_addr_t sn1_pci_map_single (struct pci_dev *hwdev,
+				void *ptr, size_t size, int direction)
+{
+	devfs_handle_t	vhdl;
+	dma_addr_t dma_addr;
+	paddr_t temp_ptr;
+	struct sn1_device_sysdata *device_sysdata;
+
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	if (IS_PCI32L(hwdev)) {
+		/*
+		 * SNIA64 cannot support DMA Addresses smaller than 32 bits.
+		 */
+		return ((dma_addr_t) NULL);
+	}
+
+	/*
+	 * find vertex for the device
+	 */
+	device_sysdata = (struct sn1_device_sysdata *)hwdev->sysdata;
+	vhdl = device_sysdata->vhdl;
+/* printk("pci_map_single: Called vhdl = 0x%p ptr = 0x%p size = %d\n", vhdl, ptr, size); */
+	/*
+	 * Call our dmamap interface
+	 */
+	dma_addr = 0;
+	temp_ptr = (paddr_t) __pa(ptr);
+
+	if (IS_PCIA64(hwdev)) {
+		/*
+		 * This device supports 64bits DMA addresses.
+		 */
+		dma_addr = (dma_addr_t) pciio_dmatrans_addr(vhdl, NULL,
+			temp_ptr, size,
+			PCIBR_BARRIER | PCIIO_BYTE_STREAM | PCIIO_DMA_CMD
+			| PCIIO_DMA_A64 );
+/* printk("pci_map_single: 64Bit pciio_dmatrans_addr pcidev %p returns dma_addr 0x%lx\n", hwdev, dma_addr); */
+		return (dma_addr);
+	}
+
+	/*
+	 * Devices that supports 32 Bits upto 63 Bits DMA Address gets
+	 * 32 Bits DMA addresses.
+	 *
+	 * First try to get 32 Bit Direct Map Support.
+	 */
+	if (IS_PCI32G(hwdev)) {
+		dma_addr = (dma_addr_t) pciio_dmatrans_addr(vhdl, NULL,
+			temp_ptr, size,
+			PCIBR_BARRIER | PCIIO_BYTE_STREAM | PCIIO_DMA_CMD);
+		if (dma_addr) {
+/* printk("pci_map_single: 32Bit direct pciio_dmatrans_addr pcidev %p returns dma_addr 0x%lx\n", hwdev, dma_addr); */
+			return (dma_addr);
+		} else {
+			/*
+			 * We need to map this request by using ATEs.
+			 */
+			printk("pci_map_single: 32Bits DMA Page Map support not available yet!");
+			BUG();
+		}
+	}
+
+	if (IS_PCI32L(hwdev)) {
+		/*
+		 * SNIA64 cannot support DMA Addresses smaller than 32 bits.
+		 */
+		return ((dma_addr_t) NULL);
+	}
+
+	return ((dma_addr_t) NULL);
+
+}
+
+void
+sn1_pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+
+void
+sn1_pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+
+void
+sn1_pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+
+unsigned long
+sn1_dma_address (struct scatterlist *sg)
+{
+	return (sg->address);
+}
diff --git a/arch/ia64/sn/io/pcibr.c b/arch/ia64/sn/io/pcibr.c
new file mode 100644
index 000000000..e5279fefd
--- /dev/null
+++ b/arch/ia64/sn/io/pcibr.c
@@ -0,0 +1,9572 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#ifdef BRINGUP
+int NeedXbridgeSwap = 0;
+#endif
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/arch.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/cmn_err.h>
+#include <asm/sn/xtalk/xwidget.h>
+#include <asm/sn/pci/bridge.h>
+#include <asm/sn/pci/pciio.h>
+#include <asm/sn/pci/pcibr.h>
+#include <asm/sn/pci/pcibr_private.h>
+#include <asm/sn/pci/pci_defs.h>
+#include <asm/sn/prio.h>
+#include <asm/sn/ioerror_handling.h>
+#include <asm/sn/xtalk/xbow.h>
+#include <asm/sn/ioc3.h>
+#include <asm/sn/eeprom.h>
+#include <asm/sn/sn1/bedrock.h>
+#include <asm/sn/sn_private.h>
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+#include <asm/sn/sn1/hubio.h>
+#include <asm/sn/sn1/hubio_next.h>
+#endif
+
+#if defined(BRINGUP)
+#if 0
+#define DEBUG 1	 /* To avoid lots of bad printk() formats leave off */
+#endif
+#define PCI_DEBUG 1
+#define ATTACH_DEBUG 1
+#define PCIBR_SOFT_LIST 1
+#endif
+
+#ifndef	LOCAL
+#define	LOCAL		static
+#endif
+
+#define PCIBR_LLP_CONTROL_WAR
+#if defined (PCIBR_LLP_CONTROL_WAR)
+int                     pcibr_llp_control_war_cnt;
+#endif				/* PCIBR_LLP_CONTROL_WAR */
+
+#define	NEWAf(ptr,n,f)	(ptr = kmem_zalloc((n)*sizeof (*(ptr)), (f&PCIIO_NOSLEEP)?KM_NOSLEEP:KM_SLEEP))
+#define NEWA(ptr,n)	(ptr = kmem_zalloc((n)*sizeof (*(ptr)), KM_SLEEP))
+#define DELA(ptr,n)	(kfree(ptr))
+
+#define NEWf(ptr,f)	NEWAf(ptr,1,f)
+#define NEW(ptr)	NEWA(ptr,1)
+#define DEL(ptr)	DELA(ptr,1)
+
+int                     pcibr_devflag = D_MP;
+
+#define F(s,n)		{ 1l<<(s),-(s), n }
+
+struct reg_desc         bridge_int_status_desc[] =
+{
+    F(31, "MULTI_ERR"),
+    F(30, "PMU_ESIZE_EFAULT"),
+    F(29, "UNEXPECTED_RESP"),
+    F(28, "BAD_XRESP_PACKET"),
+    F(27, "BAD_XREQ_PACKET"),
+    F(26, "RESP_XTALK_ERROR"),
+    F(25, "REQ_XTALK_ERROR"),
+    F(24, "INVALID_ADDRESS"),
+    F(23, "UNSUPPORTED_XOP"),
+    F(22, "XREQ_FIFO_OFLOW"),
+    F(21, "LLP_REC_SNERROR"),
+    F(20, "LLP_REC_CBERROR"),
+    F(19, "LLP_RCTY"),
+    F(18, "LLP_TX_RETRY"),
+    F(17, "LLP_TCTY"),
+    F(16, "SSRAM_PERR"),
+    F(15, "PCI_ABORT"),
+    F(14, "PCI_PARITY"),
+    F(13, "PCI_SERR"),
+    F(12, "PCI_PERR"),
+    F(11, "PCI_MASTER_TOUT"),
+    F(10, "PCI_RETRY_CNT"),
+    F(9, "XREAD_REQ_TOUT"),
+    F(8, "GIO_BENABLE_ERR"),
+    F(7, "INT7"),
+    F(6, "INT6"),
+    F(5, "INT5"),
+    F(4, "INT4"),
+    F(3, "INT3"),
+    F(2, "INT2"),
+    F(1, "INT1"),
+    F(0, "INT0"),
+    {0}
+};
+
+struct reg_values       space_v[] =
+{
+    {PCIIO_SPACE_NONE, "none"},
+    {PCIIO_SPACE_ROM, "ROM"},
+    {PCIIO_SPACE_IO, "I/O"},
+    {PCIIO_SPACE_MEM, "MEM"},
+    {PCIIO_SPACE_MEM32, "MEM(32)"},
+    {PCIIO_SPACE_MEM64, "MEM(64)"},
+    {PCIIO_SPACE_CFG, "CFG"},
+    {PCIIO_SPACE_WIN(0), "WIN(0)"},
+    {PCIIO_SPACE_WIN(1), "WIN(1)"},
+    {PCIIO_SPACE_WIN(2), "WIN(2)"},
+    {PCIIO_SPACE_WIN(3), "WIN(3)"},
+    {PCIIO_SPACE_WIN(4), "WIN(4)"},
+    {PCIIO_SPACE_WIN(5), "WIN(5)"},
+    {PCIIO_SPACE_BAD, "BAD"},
+    {0}
+};
+
+struct reg_desc         space_desc[] =
+{
+    {0xFF, 0, "space", 0, space_v},
+    {0}
+};
+
+#if DEBUG
+#define	device_desc	device_bits
+LOCAL struct reg_desc   device_bits[] =
+{
+    {BRIDGE_DEV_ERR_LOCK_EN, 0, "ERR_LOCK_EN"},
+    {BRIDGE_DEV_PAGE_CHK_DIS, 0, "PAGE_CHK_DIS"},
+    {BRIDGE_DEV_FORCE_PCI_PAR, 0, "FORCE_PCI_PAR"},
+    {BRIDGE_DEV_VIRTUAL_EN, 0, "VIRTUAL_EN"},
+    {BRIDGE_DEV_PMU_WRGA_EN, 0, "PMU_WRGA_EN"},
+    {BRIDGE_DEV_DIR_WRGA_EN, 0, "DIR_WRGA_EN"},
+    {BRIDGE_DEV_DEV_SIZE, 0, "DEV_SIZE"},
+    {BRIDGE_DEV_RT, 0, "RT"},
+    {BRIDGE_DEV_SWAP_PMU, 0, "SWAP_PMU"},
+    {BRIDGE_DEV_SWAP_DIR, 0, "SWAP_DIR"},
+    {BRIDGE_DEV_PREF, 0, "PREF"},
+    {BRIDGE_DEV_PRECISE, 0, "PRECISE"},
+    {BRIDGE_DEV_COH, 0, "COH"},
+    {BRIDGE_DEV_BARRIER, 0, "BARRIER"},
+    {BRIDGE_DEV_GBR, 0, "GBR"},
+    {BRIDGE_DEV_DEV_SWAP, 0, "DEV_SWAP"},
+    {BRIDGE_DEV_DEV_IO_MEM, 0, "DEV_IO_MEM"},
+    {BRIDGE_DEV_OFF_MASK, BRIDGE_DEV_OFF_ADDR_SHFT, "DEV_OFF", "%x"},
+    {0}
+};
+#endif	/* DEBUG */
+
+#ifdef SUPPORT_PRINTING_R_FORMAT
+LOCAL struct reg_values xio_cmd_pactyp[] =
+{
+    {0x0, "RdReq"},
+    {0x1, "RdResp"},
+    {0x2, "WrReqWithResp"},
+    {0x3, "WrResp"},
+    {0x4, "WrReqNoResp"},
+    {0x5, "Reserved(5)"},
+    {0x6, "FetchAndOp"},
+    {0x7, "Reserved(7)"},
+    {0x8, "StoreAndOp"},
+    {0x9, "Reserved(9)"},
+    {0xa, "Reserved(a)"},
+    {0xb, "Reserved(b)"},
+    {0xc, "Reserved(c)"},
+    {0xd, "Reserved(d)"},
+    {0xe, "SpecialReq"},
+    {0xf, "SpecialResp"},
+    {0}
+};
+
+LOCAL struct reg_desc   xio_cmd_bits[] =
+{
+    {WIDGET_DIDN, -28, "DIDN", "%x"},
+    {WIDGET_SIDN, -24, "SIDN", "%x"},
+    {WIDGET_PACTYP, -20, "PACTYP", 0, xio_cmd_pactyp},
+    {WIDGET_TNUM, -15, "TNUM", "%x"},
+    {WIDGET_COHERENT, 0, "COHERENT"},
+    {WIDGET_DS, 0, "DS"},
+    {WIDGET_GBR, 0, "GBR"},
+    {WIDGET_VBPM, 0, "VBPM"},
+    {WIDGET_ERROR, 0, "ERROR"},
+    {WIDGET_BARRIER, 0, "BARRIER"},
+    {0}
+};
+#endif	/* SUPPORT_PRINTING_R_FORMAT */
+
+#if PCIBR_FREEZE_TIME || PCIBR_ATE_DEBUG
+LOCAL struct reg_desc   ate_bits[] =
+{
+    {0xFFFF000000000000ull, -48, "RMF", "%x"},
+    {~(IOPGSIZE - 1) &			/* may trim off some low bits */
+     0x0000FFFFFFFFF000ull, 0, "XIO", "%x"},
+    {0x0000000000000F00ull, -8, "port", "%x"},
+    {0x0000000000000010ull, 0, "Barrier"},
+    {0x0000000000000008ull, 0, "Prefetch"},
+    {0x0000000000000004ull, 0, "Precise"},
+    {0x0000000000000002ull, 0, "Coherent"},
+    {0x0000000000000001ull, 0, "Valid"},
+    {0}
+};
+#endif
+
+#if PCIBR_ATE_DEBUG
+LOCAL struct reg_values ssram_sizes[] =
+{
+    {BRIDGE_CTRL_SSRAM_512K, "512k"},
+    {BRIDGE_CTRL_SSRAM_128K, "128k"},
+    {BRIDGE_CTRL_SSRAM_64K, "64k"},
+    {BRIDGE_CTRL_SSRAM_1K, "1k"},
+    {0}
+};
+
+LOCAL struct reg_desc   control_bits[] =
+{
+    {BRIDGE_CTRL_FLASH_WR_EN, 0, "FLASH_WR_EN"},
+    {BRIDGE_CTRL_EN_CLK50, 0, "EN_CLK50"},
+    {BRIDGE_CTRL_EN_CLK40, 0, "EN_CLK40"},
+    {BRIDGE_CTRL_EN_CLK33, 0, "EN_CLK33"},
+    {BRIDGE_CTRL_RST_MASK, -24, "RST", "%x"},
+    {BRIDGE_CTRL_IO_SWAP, 0, "IO_SWAP"},
+    {BRIDGE_CTRL_MEM_SWAP, 0, "MEM_SWAP"},
+    {BRIDGE_CTRL_PAGE_SIZE, 0, "PAGE_SIZE"},
+    {BRIDGE_CTRL_SS_PAR_BAD, 0, "SS_PAR_BAD"},
+    {BRIDGE_CTRL_SS_PAR_EN, 0, "SS_PAR_EN"},
+    {BRIDGE_CTRL_SSRAM_SIZE_MASK, 0, "SSRAM_SIZE", 0, ssram_sizes},
+    {BRIDGE_CTRL_F_BAD_PKT, 0, "F_BAD_PKT"},
+    {BRIDGE_CTRL_LLP_XBAR_CRD_MASK, -12, "LLP_XBAR_CRD", "%d"},
+    {BRIDGE_CTRL_CLR_RLLP_CNT, 0, "CLR_RLLP_CNT"},
+    {BRIDGE_CTRL_CLR_TLLP_CNT, 0, "CLR_TLLP_CNT"},
+    {BRIDGE_CTRL_SYS_END, 0, "SYS_END"},
+    {BRIDGE_CTRL_MAX_TRANS_MASK, -4, "MAX_TRANS", "%d"},
+    {BRIDGE_CTRL_WIDGET_ID_MASK, 0, "WIDGET_ID", "%x"},
+    {0}
+};
+#endif
+
+/* kbrick widgetnum-to-bus layout */
+int p_busnum[MAX_PORT_NUM] = {                  /* widget#      */
+        0, 0, 0, 0, 0, 0, 0, 0,                 /* 0x0 - 0x7    */
+        2,                                      /* 0x8          */
+        1,                                      /* 0x9          */
+        0, 0,                                   /* 0xa - 0xb    */
+        5,                                      /* 0xc          */
+        6,                                      /* 0xd          */
+        4,                                      /* 0xe          */
+        3,                                      /* 0xf          */
+};
+
+/*
+ * Additional PIO spaces per slot are
+ * recorded in this structure.
+ */
+struct pciio_piospace_s {
+    pciio_piospace_t        next;	/* another space for this device */
+    char                    free;	/* 1 if free, 0 if in use               */
+    pciio_space_t           space;	/* Which space is in use                */
+    iopaddr_t               start;	/* Starting address of the PIO space    */
+    size_t                  count;	/* size of PIO space                    */
+};
+
+/* Use io spin locks. This ensures that all the PIO writes from a particular
+ * CPU to a particular IO device are synched before the start of the next
+ * set of PIO operations to the same device.
+ */
+#define pcibr_lock(pcibr_soft)		io_splock(pcibr_soft->bs_lock)
+#define pcibr_unlock(pcibr_soft, s)	io_spunlock(pcibr_soft->bs_lock,s)
+
+#if PCIBR_SOFT_LIST
+typedef struct pcibr_list_s *pcibr_list_p;
+struct pcibr_list_s {
+    pcibr_list_p            bl_next;
+    pcibr_soft_t            bl_soft;
+    devfs_handle_t            bl_vhdl;
+};
+pcibr_list_p            pcibr_list = 0;
+#endif
+
+typedef volatile unsigned *cfg_p;
+typedef volatile bridgereg_t *reg_p;
+
+#define	INFO_LBL_PCIBR_ASIC_REV	"_pcibr_asic_rev"
+
+#define	PCIBR_D64_BASE_UNSET	(0xFFFFFFFFFFFFFFFF)
+#define	PCIBR_D32_BASE_UNSET	(0xFFFFFFFF)
+
+#define PCIBR_VALID_SLOT(s)	(s < 8)
+
+#ifdef SN_XXX
+extern int      hub_device_flags_set(devfs_handle_t       widget_dev,
+                                     hub_widget_flags_t flags);
+#endif
+
+extern devfs_handle_t hwgraph_root;
+extern graph_error_t hwgraph_vertex_unref(devfs_handle_t vhdl);
+extern int cap_able(uint64_t x);
+extern uint64_t rmalloc(struct map *mp, size_t size);
+extern void rmfree(struct map *mp, size_t size, uint64_t a);
+extern int hwgraph_vertex_name_get(devfs_handle_t vhdl, char *buf, uint buflen);
+extern long atoi(register char *p);
+extern void *swap_ptr(void **loc, void *new);
+extern char *dev_to_name(devfs_handle_t dev, char *buf, uint buflen);
+extern cnodeid_t nodevertex_to_cnodeid(devfs_handle_t vhdl);
+extern graph_error_t hwgraph_edge_remove(devfs_handle_t from, char *name, devfs_handle_t *toptr);
+extern struct map *rmallocmap(uint64_t mapsiz);
+extern void rmfreemap(struct map *mp);
+extern int compare_and_swap_ptr(void **location, void *old_ptr, void *new_ptr);
+extern void cmn_err_tag(int seqnumber, register int level, char *fmt, ...);
+
+
+
+/* =====================================================================
+ *    Function Table of Contents
+ *
+ *      The order of functions in this file has stopped
+ *      making much sense. We might want to take a look
+ *      at it some time and bring back some sanity, or
+ *      perhaps bust this file into smaller chunks.
+ */
+
+LOCAL void              do_pcibr_rrb_clear(bridge_t *, int);
+LOCAL void              do_pcibr_rrb_flush(bridge_t *, int);
+LOCAL int               do_pcibr_rrb_count_valid(bridge_t *, pciio_slot_t);
+LOCAL int               do_pcibr_rrb_count_avail(bridge_t *, pciio_slot_t);
+LOCAL int               do_pcibr_rrb_alloc(bridge_t *, pciio_slot_t, int);
+LOCAL int               do_pcibr_rrb_free(bridge_t *, pciio_slot_t, int);
+
+LOCAL void              do_pcibr_rrb_autoalloc(pcibr_soft_t, int, int);
+
+int			pcibr_wrb_flush(devfs_handle_t);
+int                     pcibr_rrb_alloc(devfs_handle_t, int *, int *);
+int                     pcibr_rrb_check(devfs_handle_t, int *, int *, int *, int *);
+int                     pcibr_alloc_all_rrbs(devfs_handle_t, int, int, int, int, int, int, int, int, int);
+void                    pcibr_rrb_flush(devfs_handle_t);
+
+LOCAL int               pcibr_try_set_device(pcibr_soft_t, pciio_slot_t, unsigned, bridgereg_t);
+void                    pcibr_release_device(pcibr_soft_t, pciio_slot_t, bridgereg_t);
+
+LOCAL void              pcibr_clearwidint(bridge_t *);
+LOCAL void              pcibr_setwidint(xtalk_intr_t);
+LOCAL int               pcibr_probe_slot(bridge_t *, cfg_p, unsigned *);
+
+void                    pcibr_init(void);
+int                     pcibr_attach(devfs_handle_t);
+int			pcibr_detach(devfs_handle_t);
+int                     pcibr_open(devfs_handle_t *, int, int, cred_t *);
+int                     pcibr_close(devfs_handle_t, int, int, cred_t *);
+int                     pcibr_map(devfs_handle_t, vhandl_t *, off_t, size_t, uint);
+int                     pcibr_unmap(devfs_handle_t, vhandl_t *);
+int                     pcibr_ioctl(devfs_handle_t, int, void *, int, struct cred *, int *);
+
+void                    pcibr_freeblock_sub(iopaddr_t *, iopaddr_t *, iopaddr_t, size_t);
+
+#ifndef BRINGUP
+LOCAL int               pcibr_init_ext_ate_ram(bridge_t *);
+#endif
+LOCAL int               pcibr_ate_alloc(pcibr_soft_t, int);
+LOCAL void              pcibr_ate_free(pcibr_soft_t, int, int);
+
+LOCAL pcibr_info_t      pcibr_info_get(devfs_handle_t);
+LOCAL pcibr_info_t      pcibr_device_info_new(pcibr_soft_t, pciio_slot_t, pciio_function_t, pciio_vendor_id_t, pciio_device_id_t);
+LOCAL void		pcibr_device_info_free(devfs_handle_t, pciio_slot_t);
+LOCAL int		pcibr_device_attach(devfs_handle_t,pciio_slot_t);
+LOCAL int		pcibr_device_detach(devfs_handle_t,pciio_slot_t);
+LOCAL iopaddr_t         pcibr_addr_pci_to_xio(devfs_handle_t, pciio_slot_t, pciio_space_t, iopaddr_t, size_t, unsigned);
+
+pcibr_piomap_t          pcibr_piomap_alloc(devfs_handle_t, device_desc_t, pciio_space_t, iopaddr_t, size_t, size_t, unsigned);
+void                    pcibr_piomap_free(pcibr_piomap_t);
+caddr_t                 pcibr_piomap_addr(pcibr_piomap_t, iopaddr_t, size_t);
+void                    pcibr_piomap_done(pcibr_piomap_t);
+caddr_t                 pcibr_piotrans_addr(devfs_handle_t, device_desc_t, pciio_space_t, iopaddr_t, size_t, unsigned);
+iopaddr_t               pcibr_piospace_alloc(devfs_handle_t, device_desc_t, pciio_space_t, size_t, size_t);
+void                    pcibr_piospace_free(devfs_handle_t, pciio_space_t, iopaddr_t, size_t);
+
+LOCAL iopaddr_t         pcibr_flags_to_d64(unsigned, pcibr_soft_t);
+LOCAL bridge_ate_t      pcibr_flags_to_ate(unsigned);
+
+pcibr_dmamap_t          pcibr_dmamap_alloc(devfs_handle_t, device_desc_t, size_t, unsigned);
+void                    pcibr_dmamap_free(pcibr_dmamap_t);
+LOCAL bridge_ate_p      pcibr_ate_addr(pcibr_soft_t, int);
+LOCAL iopaddr_t         pcibr_addr_xio_to_pci(pcibr_soft_t, iopaddr_t, size_t);
+iopaddr_t               pcibr_dmamap_addr(pcibr_dmamap_t, paddr_t, size_t);
+alenlist_t              pcibr_dmamap_list(pcibr_dmamap_t, alenlist_t, unsigned);
+void                    pcibr_dmamap_done(pcibr_dmamap_t);
+cnodeid_t		pcibr_get_dmatrans_node(devfs_handle_t);
+iopaddr_t               pcibr_dmatrans_addr(devfs_handle_t, device_desc_t, paddr_t, size_t, unsigned);
+alenlist_t              pcibr_dmatrans_list(devfs_handle_t, device_desc_t, alenlist_t, unsigned);
+void                    pcibr_dmamap_drain(pcibr_dmamap_t);
+void                    pcibr_dmaaddr_drain(devfs_handle_t, paddr_t, size_t);
+void                    pcibr_dmalist_drain(devfs_handle_t, alenlist_t);
+iopaddr_t               pcibr_dmamap_pciaddr_get(pcibr_dmamap_t);
+
+static unsigned		pcibr_intr_bits(pciio_info_t info, pciio_intr_line_t lines);
+pcibr_intr_t            pcibr_intr_alloc(devfs_handle_t, device_desc_t, pciio_intr_line_t, devfs_handle_t);
+void                    pcibr_intr_free(pcibr_intr_t);
+LOCAL void              pcibr_setpciint(xtalk_intr_t);
+int                     pcibr_intr_connect(pcibr_intr_t, intr_func_t, intr_arg_t, void *);
+void                    pcibr_intr_disconnect(pcibr_intr_t);
+
+devfs_handle_t            pcibr_intr_cpu_get(pcibr_intr_t);
+void                    pcibr_xintr_preset(void *, int, xwidgetnum_t, iopaddr_t, xtalk_intr_vector_t);
+void                    pcibr_intr_list_func(intr_arg_t);
+
+LOCAL void              print_bridge_errcmd(uint32_t, char *);
+
+void                    pcibr_error_dump(pcibr_soft_t);
+uint32_t              pcibr_errintr_group(uint32_t);
+LOCAL void		pcibr_pioerr_check(pcibr_soft_t);
+LOCAL void              pcibr_error_intr_handler(intr_arg_t);
+
+LOCAL int               pcibr_addr_toslot(pcibr_soft_t, iopaddr_t, pciio_space_t *, iopaddr_t *, pciio_function_t *);
+LOCAL void              pcibr_error_cleanup(pcibr_soft_t, int);
+void                    pcibr_device_disable(pcibr_soft_t, int);
+LOCAL int               pcibr_pioerror(pcibr_soft_t, int, ioerror_mode_t, ioerror_t *);
+int                     pcibr_dmard_error(pcibr_soft_t, int, ioerror_mode_t, ioerror_t *);
+int                     pcibr_dmawr_error(pcibr_soft_t, int, ioerror_mode_t, ioerror_t *);
+LOCAL int               pcibr_error_handler(error_handler_arg_t, int, ioerror_mode_t, ioerror_t *);
+int                     pcibr_error_devenable(devfs_handle_t, int);
+
+void                    pcibr_provider_startup(devfs_handle_t);
+void                    pcibr_provider_shutdown(devfs_handle_t);
+
+int                     pcibr_reset(devfs_handle_t);
+pciio_endian_t          pcibr_endian_set(devfs_handle_t, pciio_endian_t, pciio_endian_t);
+int                     pcibr_priority_bits_set(pcibr_soft_t, pciio_slot_t, pciio_priority_t);
+pciio_priority_t        pcibr_priority_set(devfs_handle_t, pciio_priority_t);
+int                     pcibr_device_flags_set(devfs_handle_t, pcibr_device_flags_t);
+
+LOCAL cfg_p             pcibr_config_addr(devfs_handle_t, unsigned);
+uint64_t                pcibr_config_get(devfs_handle_t, unsigned, unsigned);
+LOCAL uint64_t          do_pcibr_config_get(cfg_p, unsigned, unsigned);
+void                    pcibr_config_set(devfs_handle_t, unsigned, unsigned, uint64_t);
+LOCAL void              do_pcibr_config_set(cfg_p, unsigned, unsigned, uint64_t);
+
+LOCAL pcibr_hints_t     pcibr_hints_get(devfs_handle_t, int);
+void                    pcibr_hints_fix_rrbs(devfs_handle_t);
+void                    pcibr_hints_dualslot(devfs_handle_t, pciio_slot_t, pciio_slot_t);
+void			pcibr_hints_intr_bits(devfs_handle_t, pcibr_intr_bits_f *);
+void                    pcibr_set_rrb_callback(devfs_handle_t, rrb_alloc_funct_t);
+void                    pcibr_hints_handsoff(devfs_handle_t);
+void                    pcibr_hints_subdevs(devfs_handle_t, pciio_slot_t, uint64_t);
+
+LOCAL int		pcibr_slot_reset(devfs_handle_t,pciio_slot_t);
+LOCAL int		pcibr_slot_info_init(devfs_handle_t,pciio_slot_t);
+LOCAL int		pcibr_slot_info_free(devfs_handle_t,pciio_slot_t);
+LOCAL int		pcibr_slot_addr_space_init(devfs_handle_t,pciio_slot_t);
+LOCAL int		pcibr_slot_device_init(devfs_handle_t, pciio_slot_t);
+LOCAL int		pcibr_slot_guest_info_init(devfs_handle_t,pciio_slot_t);
+LOCAL int		pcibr_slot_initial_rrb_alloc(devfs_handle_t,pciio_slot_t);
+LOCAL int		pcibr_slot_call_device_attach(devfs_handle_t,pciio_slot_t);
+LOCAL int		pcibr_slot_call_device_detach(devfs_handle_t,pciio_slot_t);
+
+int			pcibr_slot_powerup(devfs_handle_t,pciio_slot_t);
+int			pcibr_slot_shutdown(devfs_handle_t,pciio_slot_t);
+int			pcibr_slot_inquiry(devfs_handle_t,pciio_slot_t);		
+
+/* =====================================================================
+ *    RRB management
+ */
+
+#define LSBIT(word)		((word) &~ ((word)-1))
+
+#define PCIBR_RRB_SLOT_VIRTUAL	8
+
+LOCAL void
+do_pcibr_rrb_clear(bridge_t *bridge, int rrb)
+{
+    bridgereg_t             status;
+
+    /* bridge_lock must be held;
+     * this RRB must be disabled.
+     */
+
+    /* wait until RRB has no outstanduing XIO packets. */
+    while ((status = bridge->b_resp_status) & BRIDGE_RRB_INUSE(rrb)) {
+	;				/* XXX- beats on bridge. bad idea? */
+    }
+
+    /* if the RRB has data, drain it. */
+    if (status & BRIDGE_RRB_VALID(rrb)) {
+	bridge->b_resp_clear = BRIDGE_RRB_CLEAR(rrb);
+
+	/* wait until RRB is no longer valid. */
+	while ((status = bridge->b_resp_status) & BRIDGE_RRB_VALID(rrb)) {
+	    ;				/* XXX- beats on bridge. bad idea? */
+	}
+    }
+}
+
+LOCAL void
+do_pcibr_rrb_flush(bridge_t *bridge, int rrbn)
+{
+    reg_p                   rrbp = &bridge->b_rrb_map[rrbn & 1].reg;
+    bridgereg_t             rrbv;
+    int                     shft = 4 * (rrbn >> 1);
+    unsigned                ebit = BRIDGE_RRB_EN << shft;
+
+    rrbv = *rrbp;
+    if (rrbv & ebit)
+	*rrbp = rrbv & ~ebit;
+
+    do_pcibr_rrb_clear(bridge, rrbn);
+
+    if (rrbv & ebit)
+	*rrbp = rrbv;
+}
+
+/*
+ *    pcibr_rrb_count_valid: count how many RRBs are
+ *      marked valid for the specified PCI slot on this
+ *      bridge.
+ *
+ *      NOTE: The "slot" parameter for all pcibr_rrb
+ *      management routines must include the "virtual"
+ *      bit; when manageing both the normal and the
+ *      virtual channel, separate calls to these
+ *      routines must be made. To denote the virtual
+ *      channel, add PCIBR_RRB_SLOT_VIRTUAL to the slot
+ *      number.
+ *
+ *      IMPL NOTE: The obvious algorithm is to iterate
+ *      through the RRB fields, incrementing a count if
+ *      the RRB is valid and matches the slot. However,
+ *      it is much simpler to use an algorithm derived
+ *      from the "partitioned add" idea. First, XOR in a
+ *      pattern such that the fields that match this
+ *      slot come up "all ones" and all other fields
+ *      have zeros in the mismatching bits. Then AND
+ *      together the bits in the field, so we end up
+ *      with one bit turned on for each field that
+ *      matched. Now we need to count these bits. This
+ *      can be done either with a series of shift/add
+ *      instructions or by using "tmp % 15"; I expect
+ *      that the cascaded shift/add will be faster.
+ */
+
+LOCAL int
+do_pcibr_rrb_count_valid(bridge_t *bridge,
+			 pciio_slot_t slot)
+{
+    bridgereg_t             tmp;
+
+    tmp = bridge->b_rrb_map[slot & 1].reg;
+    tmp ^= 0x11111111 * (7 - slot / 2);
+    tmp &= (0xCCCCCCCC & tmp) >> 2;
+    tmp &= (0x22222222 & tmp) >> 1;
+    tmp += tmp >> 4;
+    tmp += tmp >> 8;
+    tmp += tmp >> 16;
+    return tmp & 15;
+}
+
+/*
+ *    do_pcibr_rrb_count_avail: count how many RRBs are
+ *      available to be allocated for the specified slot.
+ *
+ *      IMPL NOTE: similar to the above, except we are
+ *      just counting how many fields have the valid bit
+ *      turned off.
+ */
+LOCAL int
+do_pcibr_rrb_count_avail(bridge_t *bridge,
+			 pciio_slot_t slot)
+{
+    bridgereg_t             tmp;
+
+    tmp = bridge->b_rrb_map[slot & 1].reg;
+    tmp = (0x88888888 & ~tmp) >> 3;
+    tmp += tmp >> 4;
+    tmp += tmp >> 8;
+    tmp += tmp >> 16;
+    return tmp & 15;
+}
+
+/*
+ *    do_pcibr_rrb_alloc: allocate some additional RRBs
+ *      for the specified slot. Returns -1 if there were
+ *      insufficient free RRBs to satisfy the request,
+ *      or 0 if the request was fulfilled.
+ *
+ *      Note that if a request can be partially filled,
+ *      it will be, even if we return failure.
+ *
+ *      IMPL NOTE: again we avoid iterating across all
+ *      the RRBs; instead, we form up a word containing
+ *      one bit for each free RRB, then peel the bits
+ *      off from the low end.
+ */
+LOCAL int
+do_pcibr_rrb_alloc(bridge_t *bridge,
+		   pciio_slot_t slot,
+		   int more)
+{
+    int                     rv = 0;
+    bridgereg_t             reg, tmp, bit;
+
+    reg = bridge->b_rrb_map[slot & 1].reg;
+    tmp = (0x88888888 & ~reg) >> 3;
+    while (more-- > 0) {
+	bit = LSBIT(tmp);
+	if (!bit) {
+	    rv = -1;
+	    break;
+	}
+	tmp &= ~bit;
+	reg = ((reg & ~(bit * 15)) | (bit * (8 + slot / 2)));
+    }
+    bridge->b_rrb_map[slot & 1].reg = reg;
+    return rv;
+}
+
+/*
+ *    do_pcibr_rrb_free: release some of the RRBs that
+ *      have been allocated for the specified
+ *      slot. Returns zero for success, or negative if
+ *      it was unable to free that many RRBs.
+ *
+ *      IMPL NOTE: We form up a bit for each RRB
+ *      allocated to the slot, aligned with the VALID
+ *      bitfield this time; then we peel bits off one at
+ *      a time, releasing the corresponding RRB.
+ */
+LOCAL int
+do_pcibr_rrb_free(bridge_t *bridge,
+		  pciio_slot_t slot,
+		  int less)
+{
+    int                     rv = 0;
+    bridgereg_t             reg, tmp, clr, bit;
+    int                     i;
+
+    clr = 0;
+    reg = bridge->b_rrb_map[slot & 1].reg;
+
+    /* This needs to be done otherwise the rrb's on the virtual channel
+     * for this slot won't be freed !!
+     */
+    tmp = reg & 0xbbbbbbbb;
+
+    tmp ^= (0x11111111 * (7 - slot / 2));
+    tmp &= (0x33333333 & tmp) << 2;
+    tmp &= (0x44444444 & tmp) << 1;
+    while (less-- > 0) {
+	bit = LSBIT(tmp);
+	if (!bit) {
+	    rv = -1;
+	    break;
+	}
+	tmp &= ~bit;
+	reg &= ~bit;
+	clr |= bit;
+    }
+    bridge->b_rrb_map[slot & 1].reg = reg;
+
+    for (i = 0; i < 8; i++)
+	if (clr & (8 << (4 * i)))
+	    do_pcibr_rrb_clear(bridge, (2 * i) + (slot & 1));
+
+    return rv;
+}
+
+LOCAL void
+do_pcibr_rrb_autoalloc(pcibr_soft_t pcibr_soft,
+		       int slot,
+		       int more_rrbs)
+{
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    int                     got;
+
+    for (got = 0; got < more_rrbs; ++got) {
+	if (pcibr_soft->bs_rrb_res[slot & 7] > 0)
+	    pcibr_soft->bs_rrb_res[slot & 7]--;
+	else if (pcibr_soft->bs_rrb_avail[slot & 1] > 0)
+	    pcibr_soft->bs_rrb_avail[slot & 1]--;
+	else
+	    break;
+	if (do_pcibr_rrb_alloc(bridge, slot, 1) < 0)
+	    break;
+#if PCIBR_RRB_DEBUG
+	printk( "do_pcibr_rrb_autoalloc: add one to slot %d%s\n",
+		slot & 7, slot & 8 ? "v" : "");
+#endif
+	pcibr_soft->bs_rrb_valid[slot]++;
+    }
+#if PCIBR_RRB_DEBUG
+    printk("%s: %d+%d free RRBs. Allocation list:\n", pcibr_soft->bs_name,
+	    pcibr_soft->bs_rrb_avail[0],
+	    pcibr_soft->bs_rrb_avail[1]);
+    for (slot = 0; slot < 8; ++slot)
+	printk("\t%d+%d+%d",
+		0xFFF & pcibr_soft->bs_rrb_valid[slot],
+		0xFFF & pcibr_soft->bs_rrb_valid[slot + PCIBR_RRB_SLOT_VIRTUAL],
+		pcibr_soft->bs_rrb_res[slot]);
+	printk("\n");
+#endif
+}
+
+/*
+ * Device driver interface to flush the write buffers for a specified
+ * device hanging off the bridge.
+ */
+int
+pcibr_wrb_flush(devfs_handle_t pconn_vhdl)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    volatile bridgereg_t   *wrb_flush;
+
+    wrb_flush = &(bridge->b_wr_req_buf[pciio_slot].reg);
+    while (*wrb_flush);
+
+    return(0);
+}
+/*
+ * Device driver interface to request RRBs for a specified device
+ * hanging off a Bridge.  The driver requests the total number of
+ * RRBs it would like for the normal channel (vchan0) and for the
+ * "virtual channel" (vchan1).  The actual number allocated to each
+ * channel is returned.
+ *
+ * If we cannot allocate at least one RRB to a channel that needs
+ * at least one, return -1 (failure).  Otherwise, satisfy the request
+ * as best we can and return 0.
+ */
+int
+pcibr_rrb_alloc(devfs_handle_t pconn_vhdl,
+		int *count_vchan0,
+		int *count_vchan1)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    int                     desired_vchan0;
+    int                     desired_vchan1;
+    int                     orig_vchan0;
+    int                     orig_vchan1;
+    int                     delta_vchan0;
+    int                     delta_vchan1;
+    int                     final_vchan0;
+    int                     final_vchan1;
+    int                     avail_rrbs;
+    unsigned                s;
+    int                     error;
+
+    /*
+     * TBD: temper request with admin info about RRB allocation,
+     * and according to demand from other devices on this Bridge.
+     *
+     * One way of doing this would be to allocate two RRBs
+     * for each device on the bus, before any drivers start
+     * asking for extras. This has the weakness that one
+     * driver might not give back an "extra" RRB until after
+     * another driver has already failed to get one that
+     * it wanted.
+     */
+
+    s = pcibr_lock(pcibr_soft);
+
+    /* How many RRBs do we own? */
+    orig_vchan0 = pcibr_soft->bs_rrb_valid[pciio_slot];
+    orig_vchan1 = pcibr_soft->bs_rrb_valid[pciio_slot + PCIBR_RRB_SLOT_VIRTUAL];
+
+    /* How many RRBs do we want? */
+    desired_vchan0 = count_vchan0 ? *count_vchan0 : orig_vchan0;
+    desired_vchan1 = count_vchan1 ? *count_vchan1 : orig_vchan1;
+
+    /* How many RRBs are free? */
+    avail_rrbs = pcibr_soft->bs_rrb_avail[pciio_slot & 1]
+	+ pcibr_soft->bs_rrb_res[pciio_slot];
+
+    /* Figure desired deltas */
+    delta_vchan0 = desired_vchan0 - orig_vchan0;
+    delta_vchan1 = desired_vchan1 - orig_vchan1;
+
+    /* Trim back deltas to something
+     * that we can actually meet, by
+     * decreasing the ending allocation
+     * for whichever channel wants
+     * more RRBs. If both want the same
+     * number, cut the second channel.
+     * NOTE: do not change the allocation for
+     * a channel that was passed as NULL.
+     */
+    while ((delta_vchan0 + delta_vchan1) > avail_rrbs) {
+	if (count_vchan0 &&
+	    (!count_vchan1 ||
+	     ((orig_vchan0 + delta_vchan0) >
+	      (orig_vchan1 + delta_vchan1))))
+	    delta_vchan0--;
+	else
+	    delta_vchan1--;
+    }
+
+    /* Figure final RRB allocations
+     */
+    final_vchan0 = orig_vchan0 + delta_vchan0;
+    final_vchan1 = orig_vchan1 + delta_vchan1;
+
+    /* If either channel wants RRBs but our actions
+     * would leave it with none, declare an error,
+     * but DO NOT change any RRB allocations.
+     */
+    if ((desired_vchan0 && !final_vchan0) ||
+	(desired_vchan1 && !final_vchan1)) {
+
+	error = -1;
+
+    } else {
+
+	/* Commit the allocations: free, then alloc.
+	 */
+	if (delta_vchan0 < 0)
+	    (void) do_pcibr_rrb_free(bridge, pciio_slot, -delta_vchan0);
+	if (delta_vchan1 < 0)
+	    (void) do_pcibr_rrb_free(bridge, PCIBR_RRB_SLOT_VIRTUAL + pciio_slot, -delta_vchan1);
+
+	if (delta_vchan0 > 0)
+	    (void) do_pcibr_rrb_alloc(bridge, pciio_slot, delta_vchan0);
+	if (delta_vchan1 > 0)
+	    (void) do_pcibr_rrb_alloc(bridge, PCIBR_RRB_SLOT_VIRTUAL + pciio_slot, delta_vchan1);
+
+	/* Return final values to caller.
+	 */
+	if (count_vchan0)
+	    *count_vchan0 = final_vchan0;
+	if (count_vchan1)
+	    *count_vchan1 = final_vchan1;
+
+	/* prevent automatic changes to this slot's RRBs
+	 */
+	pcibr_soft->bs_rrb_fixed |= 1 << pciio_slot;
+
+	/* Track the actual allocations, release
+	 * any further reservations, and update the
+	 * number of available RRBs.
+	 */
+
+	pcibr_soft->bs_rrb_valid[pciio_slot] = final_vchan0;
+	pcibr_soft->bs_rrb_valid[pciio_slot + PCIBR_RRB_SLOT_VIRTUAL] = final_vchan1;
+	pcibr_soft->bs_rrb_avail[pciio_slot & 1] =
+	    pcibr_soft->bs_rrb_avail[pciio_slot & 1]
+	    + pcibr_soft->bs_rrb_res[pciio_slot]
+	    - delta_vchan0
+	    - delta_vchan1;
+	pcibr_soft->bs_rrb_res[pciio_slot] = 0;
+
+#if PCIBR_RRB_DEBUG
+	printk("pcibr_rrb_alloc: slot %d set to %d+%d; %d+%d free\n",
+		pciio_slot, final_vchan0, final_vchan1,
+		pcibr_soft->bs_rrb_avail[0],
+		pcibr_soft->bs_rrb_avail[1]);
+	for (pciio_slot = 0; pciio_slot < 8; ++pciio_slot)
+	    printk("\t%d+%d+%d",
+		    0xFFF & pcibr_soft->bs_rrb_valid[pciio_slot],
+		    0xFFF & pcibr_soft->bs_rrb_valid[pciio_slot + PCIBR_RRB_SLOT_VIRTUAL],
+		    pcibr_soft->bs_rrb_res[pciio_slot]);
+	printk("\n");
+#endif
+
+	error = 0;
+    }
+
+    pcibr_unlock(pcibr_soft, s);
+    return error;
+}
+
+/*
+ * Device driver interface to check the current state
+ * of the RRB allocations.
+ *
+ *   pconn_vhdl is your PCI connection point (specifies which
+ *      PCI bus and which slot).
+ *
+ *   count_vchan0 points to where to return the number of RRBs
+ *      assigned to the primary DMA channel, used by all DMA
+ *      that does not explicitly ask for the alternate virtual
+ *      channel.
+ *
+ *   count_vchan1 points to where to return the number of RRBs
+ *      assigned to the secondary DMA channel, used when
+ *      PCIBR_VCHAN1 and PCIIO_DMA_A64 are specified.
+ *
+ *   count_reserved points to where to return the number of RRBs
+ *      that have been automatically reserved for your device at
+ *      startup, but which have not been assigned to a
+ *      channel. RRBs must be assigned to a channel to be used;
+ *      this can be done either with an explicit pcibr_rrb_alloc
+ *      call, or automatically by the infrastructure when a DMA
+ *      translation is constructed. Any call to pcibr_rrb_alloc
+ *      will release any unassigned reserved RRBs back to the
+ *      free pool.
+ *
+ *   count_pool points to where to return the number of RRBs
+ *      that are currently unassigned and unreserved. This
+ *      number can (and will) change as other drivers make calls
+ *      to pcibr_rrb_alloc, or automatically allocate RRBs for
+ *      DMA beyond their initial reservation.
+ *
+ * NULL may be passed for any of the return value pointers
+ * the caller is not interested in.
+ *
+ * The return value is "0" if all went well, or "-1" if
+ * there is a problem. Additionally, if the wrong vertex
+ * is passed in, one of the subsidiary support functions
+ * could panic with a "bad pciio fingerprint."
+ */
+
+int
+pcibr_rrb_check(devfs_handle_t pconn_vhdl,
+		int *count_vchan0,
+		int *count_vchan1,
+		int *count_reserved,
+		int *count_pool)
+{
+    pciio_info_t            pciio_info;
+    pciio_slot_t            pciio_slot;
+    pcibr_soft_t            pcibr_soft;
+    unsigned                s;
+    int                     error = -1;
+
+    if ((pciio_info = pciio_info_get(pconn_vhdl)) &&
+	(pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info)) &&
+	((pciio_slot = pciio_info_slot_get(pciio_info)) < 8)) {
+
+	s = pcibr_lock(pcibr_soft);
+
+	if (count_vchan0)
+	    *count_vchan0 =
+		pcibr_soft->bs_rrb_valid[pciio_slot];
+
+	if (count_vchan1)
+	    *count_vchan1 =
+		pcibr_soft->bs_rrb_valid[pciio_slot + PCIBR_RRB_SLOT_VIRTUAL];
+
+	if (count_reserved)
+	    *count_reserved =
+		pcibr_soft->bs_rrb_res[pciio_slot];
+
+	if (count_pool)
+	    *count_pool =
+		pcibr_soft->bs_rrb_avail[pciio_slot & 1];
+
+	error = 0;
+
+	pcibr_unlock(pcibr_soft, s);
+    }
+    return error;
+}
+
+/* pcibr_alloc_all_rrbs allocates all the rrbs available in the quantities
+ * requested for each of the devies.  The evn_odd argument indicates whether
+ * allcoation for the odd or even rrbs is requested and next group of four pairse
+ * are the amount to assign to each device (they should sum to <= 8) and
+ * whether to set the viritual bit for that device (1 indictaes yes, 0 indicates no)
+ * the devices in order are either 0, 2, 4, 6 or 1, 3, 5, 7
+ * if even_odd is even we alloc even rrbs else we allocate odd rrbs
+ * returns 0 if no errors else returns -1
+ */
+
+int
+pcibr_alloc_all_rrbs(devfs_handle_t vhdl, int even_odd,
+		     int dev_1_rrbs, int virt1, int dev_2_rrbs, int virt2,
+		     int dev_3_rrbs, int virt3, int dev_4_rrbs, int virt4)
+{
+    devfs_handle_t            pcibr_vhdl;
+#ifdef colin
+    pcibr_soft_t            pcibr_soft;
+#else
+    pcibr_soft_t	pcibr_soft = NULL;
+#endif
+    bridge_t               *bridge = NULL;
+
+    uint32_t              rrb_setting = 0;
+    int                     rrb_shift = 7;
+    uint32_t              cur_rrb;
+    int                     dev_rrbs[4];
+    int                     virt[4];
+    int                     i, j;
+    unsigned                s;
+
+    if (GRAPH_SUCCESS ==
+	hwgraph_traverse(vhdl, EDGE_LBL_PCI, &pcibr_vhdl)) {
+	pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+	if (pcibr_soft)
+	    bridge = pcibr_soft->bs_base;
+	hwgraph_vertex_unref(pcibr_vhdl);
+    }
+    if (bridge == NULL)
+	bridge = (bridge_t *) xtalk_piotrans_addr
+	    (vhdl, NULL, 0, sizeof(bridge_t), 0);
+
+    even_odd &= 1;
+
+    dev_rrbs[0] = dev_1_rrbs;
+    dev_rrbs[1] = dev_2_rrbs;
+    dev_rrbs[2] = dev_3_rrbs;
+    dev_rrbs[3] = dev_4_rrbs;
+
+    virt[0] = virt1;
+    virt[1] = virt2;
+    virt[2] = virt3;
+    virt[3] = virt4;
+
+    if ((dev_1_rrbs + dev_2_rrbs + dev_3_rrbs + dev_4_rrbs) > 8) {
+	return -1;
+    }
+    if ((dev_1_rrbs < 0) || (dev_2_rrbs < 0) || (dev_3_rrbs < 0) || (dev_4_rrbs < 0)) {
+	return -1;
+    }
+    /* walk through rrbs */
+    for (i = 0; i < 4; i++) {
+	if (virt[i]) {
+	    cur_rrb = i | 0xc;
+	    cur_rrb = cur_rrb << (rrb_shift * 4);
+	    rrb_shift--;
+	    rrb_setting = rrb_setting | cur_rrb;
+	    dev_rrbs[i] = dev_rrbs[i] - 1;
+	}
+	for (j = 0; j < dev_rrbs[i]; j++) {
+	    cur_rrb = i | 0x8;
+	    cur_rrb = cur_rrb << (rrb_shift * 4);
+	    rrb_shift--;
+	    rrb_setting = rrb_setting | cur_rrb;
+	}
+    }
+
+    if (pcibr_soft)
+	s = pcibr_lock(pcibr_soft);
+
+    bridge->b_rrb_map[even_odd].reg = rrb_setting;
+
+    if (pcibr_soft) {
+
+	pcibr_soft->bs_rrb_fixed |= 0x55 << even_odd;
+
+	/* since we've "FIXED" the allocations
+	 * for these slots, we probably can dispense
+	 * with tracking avail/res/valid data, but
+	 * keeping it up to date helps debugging.
+	 */
+
+	pcibr_soft->bs_rrb_avail[even_odd] =
+	    8 - (dev_1_rrbs + dev_2_rrbs + dev_3_rrbs + dev_4_rrbs);
+
+	pcibr_soft->bs_rrb_res[even_odd + 0] = 0;
+	pcibr_soft->bs_rrb_res[even_odd + 2] = 0;
+	pcibr_soft->bs_rrb_res[even_odd + 4] = 0;
+	pcibr_soft->bs_rrb_res[even_odd + 6] = 0;
+
+	pcibr_soft->bs_rrb_valid[even_odd + 0] = dev_1_rrbs - virt1;
+	pcibr_soft->bs_rrb_valid[even_odd + 2] = dev_2_rrbs - virt2;
+	pcibr_soft->bs_rrb_valid[even_odd + 4] = dev_3_rrbs - virt3;
+	pcibr_soft->bs_rrb_valid[even_odd + 6] = dev_4_rrbs - virt4;
+
+	pcibr_soft->bs_rrb_valid[even_odd + 0 + PCIBR_RRB_SLOT_VIRTUAL] = virt1;
+	pcibr_soft->bs_rrb_valid[even_odd + 2 + PCIBR_RRB_SLOT_VIRTUAL] = virt2;
+	pcibr_soft->bs_rrb_valid[even_odd + 4 + PCIBR_RRB_SLOT_VIRTUAL] = virt3;
+	pcibr_soft->bs_rrb_valid[even_odd + 6 + PCIBR_RRB_SLOT_VIRTUAL] = virt4;
+
+	pcibr_unlock(pcibr_soft, s);
+    }
+    return 0;
+}
+
+/*
+ *    pcibr_rrb_flush: chase down all the RRBs assigned
+ *      to the specified connection point, and flush
+ *      them.
+ */
+void
+pcibr_rrb_flush(devfs_handle_t pconn_vhdl)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    unsigned                s;
+    reg_p                   rrbp;
+    unsigned                rrbm;
+    int                     i;
+    int                     rrbn;
+    unsigned                sval;
+    unsigned                mask;
+
+    sval = BRIDGE_RRB_EN | (pciio_slot >> 1);
+    mask = BRIDGE_RRB_EN | BRIDGE_RRB_PDEV;
+    rrbn = pciio_slot & 1;
+    rrbp = &bridge->b_rrb_map[rrbn].reg;
+
+    s = pcibr_lock(pcibr_soft);
+    rrbm = *rrbp;
+    for (i = 0; i < 8; ++i) {
+	if ((rrbm & mask) == sval)
+	    do_pcibr_rrb_flush(bridge, rrbn);
+	rrbm >>= 4;
+	rrbn += 2;
+    }
+    pcibr_unlock(pcibr_soft, s);
+}
+
+/* =====================================================================
+ *    Device(x) register management
+ */
+
+/* pcibr_try_set_device: attempt to modify Device(x)
+ * for the specified slot on the specified bridge
+ * as requested in flags, limited to the specified
+ * bits. Returns which BRIDGE bits were in conflict,
+ * or ZERO if everything went OK.
+ *
+ * Caller MUST hold pcibr_lock when calling this function.
+ */
+LOCAL int
+pcibr_try_set_device(pcibr_soft_t pcibr_soft,
+		     pciio_slot_t slot,
+		     unsigned flags,
+		     bridgereg_t mask)
+{
+    bridge_t               *bridge;
+    pcibr_soft_slot_t       slotp;
+    bridgereg_t             old;
+    bridgereg_t             new;
+    bridgereg_t             chg;
+    bridgereg_t             bad;
+    bridgereg_t             badpmu;
+    bridgereg_t             badd32;
+    bridgereg_t             badd64;
+    bridgereg_t             fix;
+    unsigned                s;
+    bridgereg_t             xmask;
+
+    xmask = mask;
+    if (pcibr_soft->bs_xbridge) {
+    	if (mask == BRIDGE_DEV_PMU_BITS)
+		xmask = XBRIDGE_DEV_PMU_BITS;
+	if (mask == BRIDGE_DEV_D64_BITS)
+		xmask = XBRIDGE_DEV_D64_BITS;
+    }
+
+    slotp = &pcibr_soft->bs_slot[slot];
+
+    s = pcibr_lock(pcibr_soft);
+
+    bridge = pcibr_soft->bs_base;
+
+    old = slotp->bss_device;
+
+    /* figure out what the desired
+     * Device(x) bits are based on
+     * the flags specified.
+     */
+
+    new = old;
+
+    /* Currently, we inherit anything that
+     * the new caller has not specified in
+     * one way or another, unless we take
+     * action here to not inherit.
+     *
+     * This is needed for the "swap" stuff,
+     * since it could have been set via
+     * pcibr_endian_set -- altho note that
+     * any explicit PCIBR_BYTE_STREAM or
+     * PCIBR_WORD_VALUES will freely override
+     * the effect of that call (and vice
+     * versa, no protection either way).
+     *
+     * I want to get rid of pcibr_endian_set
+     * in favor of tracking DMA endianness
+     * using the flags specified when DMA
+     * channels are created.
+     */
+
+#define	BRIDGE_DEV_WRGA_BITS	(BRIDGE_DEV_PMU_WRGA_EN | BRIDGE_DEV_DIR_WRGA_EN)
+#define	BRIDGE_DEV_SWAP_BITS	(BRIDGE_DEV_SWAP_PMU | BRIDGE_DEV_SWAP_DIR)
+
+    /* Do not use Barrier, Write Gather,
+     * or Prefetch unless asked.
+     * Leave everything else as it
+     * was from the last time.
+     */
+    new = new
+	& ~BRIDGE_DEV_BARRIER
+	& ~BRIDGE_DEV_WRGA_BITS
+	& ~BRIDGE_DEV_PREF
+	;
+
+    /* Generic macro flags
+     */
+    if (flags & PCIIO_DMA_DATA) {
+#ifdef colin
+	new = new
+	    & ~BRIDGE_DEV_BARRIER	/* barrier off */
+	    | BRIDGE_DEV_PREF;		/* prefetch on */
+#else
+	new = (new
+            & ~BRIDGE_DEV_BARRIER)      /* barrier off */
+            | BRIDGE_DEV_PREF;          /* prefetch on */
+#endif
+
+    }
+    if (flags & PCIIO_DMA_CMD) {
+#ifdef colin
+	new = new
+	    & ~BRIDGE_DEV_PREF		/* prefetch off */
+	    & ~BRIDGE_DEV_WRGA_BITS	/* write gather off */
+	    | BRIDGE_DEV_BARRIER;	/* barrier on */
+#else
+        new = ((new
+            & ~BRIDGE_DEV_PREF)         /* prefetch off */
+            & ~BRIDGE_DEV_WRGA_BITS)    /* write gather off */
+            | BRIDGE_DEV_BARRIER;       /* barrier on */
+#endif
+    }
+    /* Generic detail flags
+     */
+    if (flags & PCIIO_WRITE_GATHER)
+	new |= BRIDGE_DEV_WRGA_BITS;
+    if (flags & PCIIO_NOWRITE_GATHER)
+	new &= ~BRIDGE_DEV_WRGA_BITS;
+
+    if (flags & PCIIO_PREFETCH)
+	new |= BRIDGE_DEV_PREF;
+    if (flags & PCIIO_NOPREFETCH)
+	new &= ~BRIDGE_DEV_PREF;
+
+    if (flags & PCIBR_WRITE_GATHER)
+	new |= BRIDGE_DEV_WRGA_BITS;
+    if (flags & PCIBR_NOWRITE_GATHER)
+	new &= ~BRIDGE_DEV_WRGA_BITS;
+
+    if (flags & PCIIO_BYTE_STREAM)
+	new |= (pcibr_soft->bs_xbridge) ? 
+			BRIDGE_DEV_SWAP_DIR : BRIDGE_DEV_SWAP_BITS;
+    if (flags & PCIIO_WORD_VALUES)
+	new &= (pcibr_soft->bs_xbridge) ? 
+			~BRIDGE_DEV_SWAP_DIR : ~BRIDGE_DEV_SWAP_BITS;
+
+    /* Provider-specific flags
+     */
+    if (flags & PCIBR_PREFETCH)
+	new |= BRIDGE_DEV_PREF;
+    if (flags & PCIBR_NOPREFETCH)
+	new &= ~BRIDGE_DEV_PREF;
+
+    if (flags & PCIBR_PRECISE)
+	new |= BRIDGE_DEV_PRECISE;
+    if (flags & PCIBR_NOPRECISE)
+	new &= ~BRIDGE_DEV_PRECISE;
+
+    if (flags & PCIBR_BARRIER)
+	new |= BRIDGE_DEV_BARRIER;
+    if (flags & PCIBR_NOBARRIER)
+	new &= ~BRIDGE_DEV_BARRIER;
+
+    if (flags & PCIBR_64BIT)
+	new |= BRIDGE_DEV_DEV_SIZE;
+    if (flags & PCIBR_NO64BIT)
+	new &= ~BRIDGE_DEV_DEV_SIZE;
+
+    chg = old ^ new;				/* what are we changing, */
+    chg &= xmask;				/* of the interesting bits */
+
+    if (chg) {
+
+	badd32 = slotp->bss_d32_uctr ? (BRIDGE_DEV_D32_BITS & chg) : 0;
+	if (pcibr_soft->bs_xbridge) {
+		badpmu = slotp->bss_pmu_uctr ? (XBRIDGE_DEV_PMU_BITS & chg) : 0;
+		badd64 = slotp->bss_d64_uctr ? (XBRIDGE_DEV_D64_BITS & chg) : 0;
+	} else {
+		badpmu = slotp->bss_pmu_uctr ? (BRIDGE_DEV_PMU_BITS & chg) : 0;
+		badd64 = slotp->bss_d64_uctr ? (BRIDGE_DEV_D64_BITS & chg) : 0;
+	}
+	bad = badpmu | badd32 | badd64;
+
+	if (bad) {
+
+	    /* some conflicts can be resolved by
+	     * forcing the bit on. this may cause
+	     * some performance degredation in
+	     * the stream(s) that want the bit off,
+	     * but the alternative is not allowing
+	     * the new stream at all.
+	     */
+#ifdef colin
+	    if (fix = bad & (BRIDGE_DEV_PRECISE |
+			     BRIDGE_DEV_BARRIER)) {
+#else
+            if ( (fix = bad & (BRIDGE_DEV_PRECISE |
+                             BRIDGE_DEV_BARRIER)) ){
+#endif
+		bad &= ~fix;
+		/* don't change these bits if
+		 * they are already set in "old"
+		 */
+		chg &= ~(fix & old);
+	    }
+	    /* some conflicts can be resolved by
+	     * forcing the bit off. this may cause
+	     * some performance degredation in
+	     * the stream(s) that want the bit on,
+	     * but the alternative is not allowing
+	     * the new stream at all.
+	     */
+#ifdef colin
+	    if (fix = bad & (BRIDGE_DEV_WRGA_BITS |
+			     BRIDGE_DEV_PREF)) {
+#else
+	    if ( (fix = bad & (BRIDGE_DEV_WRGA_BITS |
+			     BRIDGE_DEV_PREF)) ){
+#endif
+		bad &= ~fix;
+		/* don't change these bits if
+		 * we wanted to turn them on.
+		 */
+		chg &= ~(fix & new);
+	    }
+	    /* conflicts in other bits mean
+	     * we can not establish this DMA
+	     * channel while the other(s) are
+	     * still present.
+	     */
+	    if (bad) {
+		pcibr_unlock(pcibr_soft, s);
+#if (DEBUG && PCIBR_DEV_DEBUG)
+		printk("pcibr_try_set_device: mod blocked by %R\n", bad, device_bits);
+#endif
+		return bad;
+	    }
+	}
+    }
+    if (mask == BRIDGE_DEV_PMU_BITS)
+	slotp->bss_pmu_uctr++;
+    if (mask == BRIDGE_DEV_D32_BITS)
+	slotp->bss_d32_uctr++;
+    if (mask == BRIDGE_DEV_D64_BITS)
+	slotp->bss_d64_uctr++;
+
+    /* the value we want to write is the
+     * original value, with the bits for
+     * our selected changes flipped, and
+     * with any disabled features turned off.
+     */
+    new = old ^ chg;			/* only change what we want to change */
+
+    if (slotp->bss_device == new) {
+	pcibr_unlock(pcibr_soft, s);
+	return 0;
+    }
+    bridge->b_device[slot].reg = new;
+    slotp->bss_device = new;
+    bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+    pcibr_unlock(pcibr_soft, s);
+#if DEBUG && PCIBR_DEV_DEBUG
+    printk("pcibr Device(%d): 0x%p\n", slot, bridge->b_device[slot].reg);
+#endif
+
+    return 0;
+}
+
+void
+pcibr_release_device(pcibr_soft_t pcibr_soft,
+		     pciio_slot_t slot,
+		     bridgereg_t mask)
+{
+    pcibr_soft_slot_t       slotp;
+    unsigned                s;
+
+    slotp = &pcibr_soft->bs_slot[slot];
+
+    s = pcibr_lock(pcibr_soft);
+
+    if (mask == BRIDGE_DEV_PMU_BITS)
+	slotp->bss_pmu_uctr--;
+    if (mask == BRIDGE_DEV_D32_BITS)
+	slotp->bss_d32_uctr--;
+    if (mask == BRIDGE_DEV_D64_BITS)
+	slotp->bss_d64_uctr--;
+
+    pcibr_unlock(pcibr_soft, s);
+}
+
+/*
+ * flush write gather buffer for slot
+ */
+LOCAL void
+pcibr_device_write_gather_flush(pcibr_soft_t pcibr_soft,
+              pciio_slot_t slot)
+{
+    bridge_t               *bridge;
+    unsigned                s;
+    volatile uint32_t     wrf;
+    s = pcibr_lock(pcibr_soft);
+    bridge = pcibr_soft->bs_base;
+    wrf = bridge->b_wr_req_buf[slot].reg;
+    pcibr_unlock(pcibr_soft, s);
+}
+
+/* =====================================================================
+ *    Bridge (pcibr) "Device Driver" entry points
+ */
+
+/*
+ * pcibr_probe_slot: read a config space word
+ * while trapping any errors; reutrn zero if
+ * all went OK, or nonzero if there was an error.
+ * The value read, if any, is passed back
+ * through the valp parameter.
+ */
+LOCAL int
+pcibr_probe_slot(bridge_t *bridge,
+		 cfg_p cfg,
+		 unsigned *valp)
+{
+    int                     rv;
+    bridgereg_t             old_enable, new_enable;
+
+    old_enable = bridge->b_int_enable;
+    new_enable = old_enable & ~BRIDGE_IMR_PCI_MST_TIMEOUT;
+
+    bridge->b_int_enable = new_enable;
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+#if defined(BRINGUP)
+	/*
+	 * The xbridge doesn't clear b_err_int_view unless
+	 * multi-err is cleared...
+	 */
+	if (is_xbridge(bridge))
+	    if (bridge->b_err_int_view & BRIDGE_ISR_PCI_MST_TIMEOUT) {
+		bridge->b_int_rst_stat = BRIDGE_IRR_MULTI_CLR;
+	    }
+#endif	/* BRINGUP */
+#endif	/* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
+
+    if (bridge->b_int_status & BRIDGE_IRR_PCI_GRP) {
+	bridge->b_int_rst_stat = BRIDGE_IRR_PCI_GRP_CLR;
+	(void) bridge->b_wid_tflush;	/* flushbus */
+    }
+    rv = badaddr_val((void *) cfg, 4, valp);
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+#if defined(BRINGUP)
+	/*
+	 * The xbridge doesn't set master timeout in b_int_status
+	 * here.  Fortunately it's in error_interrupt_view.
+	 */
+	if (is_xbridge(bridge))
+	    if (bridge->b_err_int_view & BRIDGE_ISR_PCI_MST_TIMEOUT) {
+		bridge->b_int_rst_stat = BRIDGE_IRR_MULTI_CLR;
+		rv = 1;		/* unoccupied slot */
+	    }
+#endif	/* BRINGUP */
+#endif /* CONFIG_SGI_IP35 */
+
+    bridge->b_int_enable = old_enable;
+    bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+
+    return rv;
+}
+
+/*
+ *    pcibr_init: called once during system startup or
+ *      when a loadable driver is loaded.
+ *
+ *      The driver_register function should normally
+ *      be in _reg, not _init.  But the pcibr driver is
+ *      required by devinit before the _reg routines
+ *      are called, so this is an exception.
+ */
+void
+pcibr_init(void)
+{
+#if DEBUG && ATTACH_DEBUG
+    printk("pcibr_init\n");
+#endif
+
+    xwidget_driver_register(XBRIDGE_WIDGET_PART_NUM,
+			    XBRIDGE_WIDGET_MFGR_NUM,
+			    "pcibr_",
+			    0);
+    xwidget_driver_register(BRIDGE_WIDGET_PART_NUM,
+			    BRIDGE_WIDGET_MFGR_NUM,
+			    "pcibr_",
+			    0);
+}
+
+/*
+ * open/close mmap/munmap interface would be used by processes
+ * that plan to map the PCI bridge, and muck around with the
+ * registers. This is dangerous to do, and will be allowed
+ * to a select brand of programs. Typically these are
+ * diagnostics programs, or some user level commands we may
+ * write to do some weird things.
+ * To start with expect them to have root priveleges.
+ * We will ask for more later.
+ */
+/* ARGSUSED */
+int
+pcibr_open(devfs_handle_t *devp, int oflag, int otyp, cred_t *credp)
+{
+#ifndef CONFIG_IA64_SGI_IO
+    if (!_CAP_CRABLE((uint64_t)credp, (uint64_t)CAP_DEVICE_MGT))
+	return EPERM;
+#endif
+    return 0;
+}
+
+/*ARGSUSED */
+int
+pcibr_close(devfs_handle_t dev, int oflag, int otyp, cred_t *crp)
+{
+    return 0;
+}
+
+/*ARGSUSED */
+int
+pcibr_map(devfs_handle_t dev, vhandl_t *vt, off_t off, size_t len, uint prot)
+{
+    int                     error;
+    devfs_handle_t            vhdl = dev_to_vhdl(dev);
+    devfs_handle_t            pcibr_vhdl = hwgraph_connectpt_get(vhdl);
+    pcibr_soft_t            pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    bridge_t               *bridge = pcibr_soft->bs_base;
+
+    hwgraph_vertex_unref(pcibr_vhdl);
+
+    ASSERT(pcibr_soft);
+    len = ctob(btoc(len));		/* Make len page aligned */
+    error = v_mapphys(vt, (void *) ((__psunsigned_t) bridge + off), len);
+
+    /*
+     * If the offset being mapped corresponds to the flash prom
+     * base, and if the mapping succeeds, and if the user
+     * has requested the protections to be WRITE, enable the
+     * flash prom to be written.
+     *
+     * XXX- deprecate this in favor of using the
+     * real flash driver ...
+     */
+    if (!error &&
+	((off == BRIDGE_EXTERNAL_FLASH) ||
+	 (len > BRIDGE_EXTERNAL_FLASH))) {
+	int                     s;
+
+	/*
+	 * ensure that we write and read without any interruption.
+	 * The read following the write is required for the Bridge war
+	 */
+	s = splhi();
+	bridge->b_wid_control |= BRIDGE_CTRL_FLASH_WR_EN;
+	bridge->b_wid_control;		/* inval addr bug war */
+	splx(s);
+    }
+    return error;
+}
+
+/*ARGSUSED */
+int
+pcibr_unmap(devfs_handle_t dev, vhandl_t *vt)
+{
+    devfs_handle_t            pcibr_vhdl = hwgraph_connectpt_get((devfs_handle_t) dev);
+    pcibr_soft_t            pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    bridge_t               *bridge = pcibr_soft->bs_base;
+
+    hwgraph_vertex_unref(pcibr_vhdl);
+
+    /*
+     * If flashprom write was enabled, disable it, as
+     * this is the last unmap.
+     */
+    if (bridge->b_wid_control & BRIDGE_CTRL_FLASH_WR_EN) {
+	int                     s;
+
+	/*
+	 * ensure that we write and read without any interruption.
+	 * The read following the write is required for the Bridge war
+	 */
+	s = splhi();
+	bridge->b_wid_control &= ~BRIDGE_CTRL_FLASH_WR_EN;
+	bridge->b_wid_control;		/* inval addr bug war */
+	splx(s);
+    }
+    return 0;
+}
+
+/* This is special case code used by grio. There are plans to make
+ * this a bit more general in the future, but till then this should
+ * be sufficient.
+ */
+pciio_slot_t
+pcibr_device_slot_get(devfs_handle_t dev_vhdl)
+{
+    char                    devname[MAXDEVNAME];
+    devfs_handle_t            tdev;
+    pciio_info_t            pciio_info;
+    pciio_slot_t            slot = PCIIO_SLOT_NONE;
+
+    vertex_to_name(dev_vhdl, devname, MAXDEVNAME);
+
+    /* run back along the canonical path
+     * until we find a PCI connection point.
+     */
+    tdev = hwgraph_connectpt_get(dev_vhdl);
+    while (tdev != GRAPH_VERTEX_NONE) {
+	pciio_info = pciio_info_chk(tdev);
+	if (pciio_info) {
+	    slot = pciio_info_slot_get(pciio_info);
+	    break;
+	}
+	hwgraph_vertex_unref(tdev);
+	tdev = hwgraph_connectpt_get(tdev);
+    }
+    hwgraph_vertex_unref(tdev);
+
+    return slot;
+}
+/*==========================================================================
+ *	BRIDGE PCI SLOT RELATED IOCTLs
+ */
+/*
+ * pcibr_slot_powerup
+ *	Software initialize the pci slot.
+ */
+int
+pcibr_slot_powerup(devfs_handle_t pcibr_vhdl,pciio_slot_t slot)
+{
+    /* Check for the valid slot */
+    if (!PCIBR_VALID_SLOT(slot))
+	return(EINVAL);
+
+    if (pcibr_device_attach(pcibr_vhdl,slot))
+	return(EINVAL);
+
+    return(0);
+}
+/*
+ * pcibr_slot_shutdown
+ *	Software shutdown the pci slot
+ */
+int
+pcibr_slot_shutdown(devfs_handle_t pcibr_vhdl,pciio_slot_t slot)
+{
+    /* Check for valid slot */
+    if (!PCIBR_VALID_SLOT(slot))
+	return(EINVAL);
+
+    if (pcibr_device_detach(pcibr_vhdl,slot))
+	return(EINVAL);
+
+    return(0);
+}
+
+char *pci_space_name[] = {"NONE", 
+			  "ROM",
+			  "IO",
+			  "",
+			  "MEM",
+			  "MEM32",
+			  "MEM64",
+			  "CFG",
+			  "WIN0",
+			  "WIN1",
+			  "WIN2",
+			  "WIN3",
+			  "WIN4",
+			  "WIN5",
+			  "",
+			  "BAD"};
+
+void
+pcibr_slot_func_info_print(pcibr_info_h pcibr_infoh, int func, int verbose)
+{
+    pcibr_info_t	pcibr_info = pcibr_infoh[func];
+    char		name[MAXDEVNAME];
+    int			win;
+    
+    if (!pcibr_info)
+	return;
+
+#ifdef SUPPORT_PRINTING_V_FORMAT
+    sprintf(name, "%v", pcibr_info->f_vertex);
+#endif
+    if (!verbose) {
+	printk("\tSlot Name : %s\n",name);
+    } else {
+	printk("\tPER-SLOT FUNCTION INFO\n");
+#ifdef SUPPORT_PRINTING_V_FORMAT
+	sprintf(name, "%v", pcibr_info->f_vertex);
+#endif
+	printk("\tSlot Name : %s\n",name);
+	printk("\tPCI Bus : %d ",pcibr_info->f_bus);
+	printk("Slot : %d ", pcibr_info->f_slot);
+	printk("Function : %d\n", pcibr_info->f_func);
+#ifdef SUPPORT_PRINTING_V_FORMAT
+	sprintf(name, "%v", pcibr_info->f_master);
+#endif
+	printk("\tBus provider : %s\n",name);
+	printk("\tProvider Fns : 0x%p ", pcibr_info->f_pops);
+	printk("Error Handler : 0x%p Arg 0x%p\n", 
+		pcibr_info->f_efunc,pcibr_info->f_einfo);
+    }
+    printk("\tVendorId : 0x%x " , pcibr_info->f_vendor);
+    printk("DeviceId : 0x%x\n", pcibr_info->f_device);
+
+    printk("\n\tBase Register Info\n");
+    printk("\t\tReg#\tBase\t\tSize\t\tSpace\n");
+    for(win = 0 ; win < 6 ; win++) 
+	printk("\t\t%d\t0x%lx\t%s0x%lx\t%s%s\n",
+		win,
+		pcibr_info->f_window[win].w_base,
+		pcibr_info->f_window[win].w_base >= 0x100000 ? "": "\t",
+		pcibr_info->f_window[win].w_size,
+		pcibr_info->f_window[win].w_size >= 0x100000 ? "": "\t",
+		pci_space_name[pcibr_info->f_window[win].w_space]);
+
+    printk("\t\t7\t0x%x\t%s0x%x\t%sROM\n", 
+	    pcibr_info->f_rbase,
+	    pcibr_info->f_rbase > 0x100000 ? "" : "\t",
+	    pcibr_info->f_rsize,
+	    pcibr_info->f_rsize > 0x100000 ? "" : "\t");
+
+    printk("\n\tInterrupt Bit Map\n");
+    printk("\t\tPCI Int#\tBridge Pin#\n");
+    for (win = 0 ; win < 4; win++)
+	printk("\t\tINT%c\t\t%d\n",win+'A',pcibr_info->f_ibit[win]);
+    printk("\n");
+}
+
+
+void
+pcibr_slot_info_print(pcibr_soft_t 	pcibr_soft, 
+		      pciio_slot_t 	slot, 
+		      int	   	verbose)
+{
+    pcibr_soft_slot_t	pss;
+    char		slot_conn_name[MAXDEVNAME];
+    int			func;
+    bridge_t		*bridge = pcibr_soft->bs_base;
+    bridgereg_t		b_resp;
+    reg_p		b_respp;
+    int			dev;
+    bridgereg_t		b_int_device;
+    bridgereg_t		b_int_host;
+    bridgereg_t		b_int_enable;
+    int			pin = 0;
+    int			int_bits = 0;
+
+    pss = &pcibr_soft->bs_slot[slot];
+    
+    printk("\nPCI INFRASTRUCTURAL INFO FOR SLOT %d\n\n", slot);
+
+    if (verbose) {
+	printk("\tHost Present ? %s ", pss->has_host ? "yes" : "no");
+	printk("\tHost Slot : %d\n",pss->host_slot);
+#ifdef SUPPORT_PRINTING_V_FORMAT
+	sprintf(slot_conn_name, "%v", pss->slot_conn);
+#endif
+	printk("\tSlot Conn : %s\n",slot_conn_name);	
+	printk("\t#Functions : %d\n",pss->bss_ninfo);
+    }
+    for (func = 0; func < pss->bss_ninfo; func++)
+	pcibr_slot_func_info_print(pss->bss_infos,func, verbose);
+    printk("\tDevio[Space:%s,Base:0x%lx,Shadow:0x%x]\n",
+	    pci_space_name[pss->bss_devio.bssd_space],
+	    pss->bss_devio.bssd_base,
+	    pss->bss_device);
+
+    if (verbose) {
+	printk("\tUsage counts : pmu %d d32 %d d64 %d\n",
+		pss->bss_pmu_uctr,pss->bss_d32_uctr,pss->bss_d64_uctr);
+    
+	printk("\tDirect Trans Info : d64_base 0x%x d64_flags 0x%x"
+		"d32_base 0x%x d32_flags 0x%x\n",
+		(unsigned int)pss->bss_d64_base, pss->bss_d64_flags,
+		(unsigned int)pss->bss_d32_base, pss->bss_d32_flags);
+    
+	printk("\tExt ATEs active ? %s", 
+		pss->bss_ext_ates_active ? "yes" : "no");
+	printk(" Command register : 0x%p ", pss->bss_cmd_pointer);
+	printk(" Shadow command val : 0x%x\n", pss->bss_cmd_shadow);
+    }
+
+    printk("\tSoft RRB Info[Valid %d+%d, Reserved %d]\n",
+	    pcibr_soft->bs_rrb_valid[slot],
+	    pcibr_soft->bs_rrb_valid[slot + PCIBR_RRB_SLOT_VIRTUAL],
+	    pcibr_soft->bs_rrb_res[slot]);
+
+
+    if (slot & 1)
+	b_respp = &bridge->b_odd_resp;
+    else
+	b_respp = &bridge->b_even_resp;
+
+    b_resp = *b_respp;
+
+    printk("\n\tBridge RRB Info\n");
+    printk("\t\tRRB#\tVirtual\n");
+    for (dev = 0; dev < 8; dev++) {
+	if ((b_resp & BRIDGE_RRB_EN) &&
+	    (b_resp & BRIDGE_RRB_PDEV) == (slot >> 1))
+	    printk( "\t\t%d\t%s\n", 
+		    dev,
+		    (b_resp & BRIDGE_RRB_VDEV) ? "yes" : "no");
+	b_resp >>= 4;
+	    
+    }
+    b_int_device = bridge->b_int_device;
+    b_int_enable = bridge->b_int_enable;
+
+    printk("\n\tBridge Interrupt Info\n"
+	    "\t\tInt_device 0x%x\n\t\tInt_enable 0x%x "
+	    "\n\t\tEnabled pin#s for this slot: ",
+	    b_int_device,
+	    b_int_enable);
+
+    while (b_int_device) {
+	if (((b_int_device & 7) == slot) &&
+	    (b_int_enable & (1 << pin))) {
+	    int_bits |= (1 << pin);
+	    printk("%d ", pin); 
+	}
+	pin++;
+	b_int_device >>= 3;
+    }
+
+    if (!int_bits)
+	printk("NONE ");
+
+    b_int_host = bridge->b_int_addr[slot].addr;
+
+    printk("\n\t\tInt_host_addr 0x%x\n",
+	    b_int_host);
+    
+}
+
+int verbose = 0;
+/*
+ * pcibr_slot_inquiry
+ *	Print information about the pci slot maintained by the infrastructure.
+ *	Current information displayed
+ *		Slot hwgraph name
+ *		Vendor/Device info
+ *		Base register info
+ *		Interrupt mapping from device pins to the bridge pins
+ *		Devio register
+ *		Software RRB info
+ *		RRB register info
+ *	In verbose mode following additional info is displayed
+ *		Host/Gues info
+ *		PCI Bus #,slot #, function #
+ *		Slot provider hwgraph name
+ *		Provider Functions
+ *		Error handler
+ *		DMA mapping usage counters
+ *		DMA direct translation info
+ *		External SSRAM workaround info
+ */
+int
+pcibr_slot_inquiry(devfs_handle_t pcibr_vhdl, pciio_slot_t slot)
+{
+    pcibr_soft_t	pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+
+    /* Make sure that we are dealing with a bridge device vertex */
+    if (!pcibr_soft)
+	return(EINVAL);
+
+    /* Make sure that we have a valid pci slot number or PCIIO_SLOT_NONE */
+    if ((!PCIBR_VALID_SLOT(slot)) && (slot != PCIIO_SLOT_NONE))
+	return(EINVAL);
+
+    /* Print information for the requested pci slot */
+    if (slot != PCIIO_SLOT_NONE) {
+	pcibr_slot_info_print(pcibr_soft,slot,verbose);
+	return(0);
+    }
+    /* Print information for all the slots */
+    for (slot = 0; slot < 8; slot++)
+	pcibr_slot_info_print(pcibr_soft, slot,verbose);
+    return(0);
+}
+
+/*ARGSUSED */
+int
+pcibr_ioctl(devfs_handle_t dev,
+	    int cmd,
+	    void *arg,
+	    int flag,
+	    struct cred *cr,
+	    int *rvalp)
+{
+    devfs_handle_t            pcibr_vhdl = hwgraph_connectpt_get((devfs_handle_t)dev);
+#ifdef colin
+    pcibr_soft_t            pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+#endif
+    int                     error = 0;
+
+    hwgraph_vertex_unref(pcibr_vhdl);
+
+    switch (cmd) {
+#ifdef colin
+    case GIOCSETBW:
+	{
+	    grio_ioctl_info_t       info;
+	    pciio_slot_t            slot = 0;
+
+	    if (!cap_able((uint64_t)CAP_DEVICE_MGT)) {
+		error = EPERM;
+		break;
+	    }
+	    if (COPYIN(arg, &info, sizeof(grio_ioctl_info_t))) {
+		error = EFAULT;
+		break;
+	    }
+#ifdef GRIO_DEBUG
+	    printk("pcibr:: prev_vhdl: %d reqbw: %lld\n",
+		    info.prev_vhdl, info.reqbw);
+#endif				/* GRIO_DEBUG */
+
+	    if ((slot = pcibr_device_slot_get(info.prev_vhdl)) ==
+		PCIIO_SLOT_NONE) {
+		error = EIO;
+		break;
+	    }
+	    if (info.reqbw)
+		pcibr_priority_bits_set(pcibr_soft, slot, PCI_PRIO_HIGH);
+	    break;
+	}
+
+    case GIOCRELEASEBW:
+	{
+	    grio_ioctl_info_t       info;
+	    pciio_slot_t            slot = 0;
+
+	    if (!cap_able(CAP_DEVICE_MGT)) {
+		error = EPERM;
+		break;
+	    }
+	    if (COPYIN(arg, &info, sizeof(grio_ioctl_info_t))) {
+		error = EFAULT;
+		break;
+	    }
+#ifdef GRIO_DEBUG
+	    printk("pcibr:: prev_vhdl: %d reqbw: %lld\n",
+		    info.prev_vhdl, info.reqbw);
+#endif				/* GRIO_DEBUG */
+
+	    if ((slot = pcibr_device_slot_get(info.prev_vhdl)) ==
+		PCIIO_SLOT_NONE) {
+		error = EIO;
+		break;
+	    }
+	    if (info.reqbw)
+		pcibr_priority_bits_set(pcibr_soft, slot, PCI_PRIO_LOW);
+	    break;
+	}
+#endif /* colin */
+
+    case PCIBR_SLOT_POWERUP:
+	{
+	    pciio_slot_t	slot;
+
+	    if (!cap_able(CAP_DEVICE_MGT)) {
+		error = EPERM;
+		break;
+	    }
+
+	    slot = (pciio_slot_t)(uint64_t)arg;
+	    error = pcibr_slot_powerup(pcibr_vhdl,slot);
+	    break;
+	}
+    case PCIBR_SLOT_SHUTDOWN:
+	{
+	    pciio_slot_t	slot;
+
+	    if (!cap_able(CAP_DEVICE_MGT)) {
+		error = EPERM;
+		break;
+	    }
+
+	    slot = (pciio_slot_t)(uint64_t)arg;
+	    error = pcibr_slot_shutdown(pcibr_vhdl,slot);
+	    break;
+	}
+    case PCIBR_SLOT_INQUIRY:
+	{
+	    pciio_slot_t	slot;
+
+	    if (!cap_able(CAP_DEVICE_MGT)) {
+		error = EPERM;
+		break;
+	    }
+
+	    slot = (pciio_slot_t)(uint64_t)arg;
+	    error = pcibr_slot_inquiry(pcibr_vhdl,slot);
+	    break;
+	}
+    default:
+	break;
+
+    }
+
+    return error;
+}
+
+void
+pcibr_freeblock_sub(iopaddr_t *free_basep,
+		    iopaddr_t *free_lastp,
+		    iopaddr_t base,
+		    size_t size)
+{
+    iopaddr_t               free_base = *free_basep;
+    iopaddr_t               free_last = *free_lastp;
+    iopaddr_t               last = base + size - 1;
+
+    if ((last < free_base) || (base > free_last));	/* free block outside arena */
+
+    else if ((base <= free_base) && (last >= free_last))
+	/* free block contains entire arena */
+	*free_basep = *free_lastp = 0;
+
+    else if (base <= free_base)
+	/* free block is head of arena */
+	*free_basep = last + 1;
+
+    else if (last >= free_last)
+	/* free block is tail of arena */
+	*free_lastp = base - 1;
+
+    /*
+     * We are left with two regions: the free area
+     * in the arena "below" the block, and the free
+     * area in the arena "above" the block. Keep
+     * the one that is bigger.
+     */
+
+    else if ((base - free_base) > (free_last - last))
+	*free_lastp = base - 1;		/* keep lower chunk */
+    else
+	*free_basep = last + 1;		/* keep upper chunk */
+}
+
+#ifdef IRIX
+/* Convert from ssram_bits in control register to number of SSRAM entries */
+#define ATE_NUM_ENTRIES(n) _ate_info[n]
+
+/* Possible choices for number of ATE entries in Bridge's SSRAM */
+LOCAL int               _ate_info[] =
+{
+    0,					/* 0 entries */
+    8 * 1024,				/* 8K entries */
+    16 * 1024,				/* 16K entries */
+    64 * 1024				/* 64K entries */
+};
+
+#define ATE_NUM_SIZES (sizeof(_ate_info) / sizeof(int))
+#define ATE_PROBE_VALUE 0x0123456789abcdefULL
+#endif	/* IRIX */
+
+/*
+ * Determine the size of this bridge's external mapping SSRAM, and set
+ * the control register appropriately to reflect this size, and initialize
+ * the external SSRAM.
+ */
+#ifndef BRINGUP
+LOCAL int
+pcibr_init_ext_ate_ram(bridge_t *bridge)
+{
+    int                     largest_working_size = 0;
+    int                     num_entries, entry;
+    int                     i, j;
+    bridgereg_t             old_enable, new_enable;
+    int                     s;
+
+    if (is_xbridge(bridge))
+	return 0;
+
+    /* Probe SSRAM to determine its size. */
+    old_enable = bridge->b_int_enable;
+    new_enable = old_enable & ~BRIDGE_IMR_PCI_MST_TIMEOUT;
+    bridge->b_int_enable = new_enable;
+
+    for (i = 1; i < ATE_NUM_SIZES; i++) {
+	/* Try writing a value */
+	bridge->b_ext_ate_ram[ATE_NUM_ENTRIES(i) - 1] = ATE_PROBE_VALUE;
+
+	/* Guard against wrap */
+	for (j = 1; j < i; j++)
+	    bridge->b_ext_ate_ram[ATE_NUM_ENTRIES(j) - 1] = 0;
+
+	/* See if value was written */
+	if (bridge->b_ext_ate_ram[ATE_NUM_ENTRIES(i) - 1] == ATE_PROBE_VALUE)
+	    largest_working_size = i;
+    }
+    bridge->b_int_enable = old_enable;
+    bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+
+    /*
+     * ensure that we write and read without any interruption.
+     * The read following the write is required for the Bridge war
+     */
+
+    s = splhi();
+#ifdef colin
+    bridge->b_wid_control = (bridge->b_wid_control
+	& ~BRIDGE_CTRL_SSRAM_SIZE_MASK)
+	| BRIDGE_CTRL_SSRAM_SIZE(largest_working_size);
+#endif
+    bridge->b_wid_control;		/* inval addr bug war */
+    splx(s);
+
+    num_entries = ATE_NUM_ENTRIES(largest_working_size);
+
+#if PCIBR_ATE_DEBUG
+    if (num_entries)
+	printk("bridge at 0x%x: clearing %d external ATEs\n", bridge, num_entries);
+    else
+	printk("bridge at 0x%x: no externa9422l ATE RAM found\n", bridge);
+#endif
+
+    /* Initialize external mapping entries */
+    for (entry = 0; entry < num_entries; entry++)
+	bridge->b_ext_ate_ram[entry] = 0;
+
+    return (num_entries);
+}
+#endif	/* !BRINGUP */
+
+/*
+ * Allocate "count" contiguous Bridge Address Translation Entries
+ * on the specified bridge to be used for PCI to XTALK mappings.
+ * Indices in rm map range from 1..num_entries.  Indicies returned
+ * to caller range from 0..num_entries-1.
+ *
+ * Return the start index on success, -1 on failure.
+ */
+LOCAL int
+pcibr_ate_alloc(pcibr_soft_t pcibr_soft, int count)
+{
+    int                     index = 0;
+
+    index = (int) rmalloc(pcibr_soft->bs_int_ate_map, (size_t) count);
+
+    if (!index && pcibr_soft->bs_ext_ate_map)
+	index = (int) rmalloc(pcibr_soft->bs_ext_ate_map, (size_t) count);
+
+    /* rmalloc manages resources in the 1..n
+     * range, with 0 being failure.
+     * pcibr_ate_alloc manages resources
+     * in the 0..n-1 range, with -1 being failure.
+     */
+    return index - 1;
+}
+
+LOCAL void
+pcibr_ate_free(pcibr_soft_t pcibr_soft, int index, int count)
+/* Who says there's no such thing as a free meal? :-) */
+{
+    /* note the "+1" since rmalloc handles 1..n but
+     * we start counting ATEs at zero.
+     */
+    rmfree((index < pcibr_soft->bs_int_ate_size)
+	   ? pcibr_soft->bs_int_ate_map
+	   : pcibr_soft->bs_ext_ate_map,
+	   count, index + 1);
+}
+
+LOCAL pcibr_info_t
+pcibr_info_get(devfs_handle_t vhdl)
+{
+    return (pcibr_info_t) pciio_info_get(vhdl);
+}
+
+pcibr_info_t
+pcibr_device_info_new(
+			 pcibr_soft_t pcibr_soft,
+			 pciio_slot_t slot,
+			 pciio_function_t rfunc,
+			 pciio_vendor_id_t vendor,
+			 pciio_device_id_t device)
+{
+    pcibr_info_t            pcibr_info;
+    pciio_function_t        func;
+    int                     ibit;
+
+    func = (rfunc == PCIIO_FUNC_NONE) ? 0 : rfunc;
+
+    NEW(pcibr_info);
+    pciio_device_info_new(&pcibr_info->f_c,
+			  pcibr_soft->bs_vhdl,
+			  slot, rfunc,
+			  vendor, device);
+
+    if (slot != PCIIO_SLOT_NONE) {
+
+	/*
+	 * Currently favored mapping from PCI
+	 * slot number and INTA/B/C/D to Bridge
+	 * PCI Interrupt Bit Number:
+	 *
+	 *     SLOT     A B C D
+	 *      0       0 4 0 4
+	 *      1       1 5 1 5
+	 *      2       2 6 2 6
+	 *      3       3 7 3 7
+	 *      4       4 0 4 0
+	 *      5       5 1 5 1
+	 *      6       6 2 6 2
+	 *      7       7 3 7 3
+	 *
+	 * XXX- allow pcibr_hints to override default
+	 * XXX- allow ADMIN to override pcibr_hints
+	 */
+	for (ibit = 0; ibit < 4; ++ibit)
+	    pcibr_info->f_ibit[ibit] =
+		(slot + 4 * ibit) & 7;
+
+	/*
+	 * Record the info in the sparse func info space.
+	 */
+printk("pcibr_device_info_new: slot= %d  func= %d  bss_ninfo= %d  pcibr_info= 0x%p\n", slot, func, pcibr_soft->bs_slot[slot].bss_ninfo, pcibr_info);
+
+	if (func < pcibr_soft->bs_slot[slot].bss_ninfo)
+	    pcibr_soft->bs_slot[slot].bss_infos[func] = pcibr_info;
+    }
+    return pcibr_info;
+}
+
+void
+pcibr_device_info_free(devfs_handle_t pcibr_vhdl, pciio_slot_t slot)
+{
+    pcibr_soft_t	pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    pcibr_info_t	pcibr_info;
+    pciio_function_t	func;
+    pcibr_soft_slot_t	slotp = &pcibr_soft->bs_slot[slot];
+    int			nfunc = slotp->bss_ninfo;
+
+
+    for (func = 0; func < nfunc; func++) {
+	pcibr_info = slotp->bss_infos[func];
+
+	if (!pcibr_info) 
+	    continue;
+
+	slotp->bss_infos[func] = 0;
+	pciio_device_info_unregister(pcibr_vhdl, &pcibr_info->f_c);
+	pciio_device_info_free(&pcibr_info->f_c);
+	DEL(pcibr_info);
+    }
+
+    /* Clear the DEVIO(x) for this slot */
+    slotp->bss_devio.bssd_space = PCIIO_SPACE_NONE;
+    slotp->bss_devio.bssd_base = PCIBR_D32_BASE_UNSET;
+    slotp->bss_device  = 0;
+
+    
+    /* Reset the mapping usage counters */
+    slotp->bss_pmu_uctr = 0;
+    slotp->bss_d32_uctr = 0;
+    slotp->bss_d64_uctr = 0;
+
+    /* Clear the Direct translation info */
+    slotp->bss_d64_base = PCIBR_D64_BASE_UNSET;
+    slotp->bss_d64_flags = 0;
+    slotp->bss_d32_base = PCIBR_D32_BASE_UNSET;
+    slotp->bss_d32_flags = 0;
+
+    /* Clear out shadow info necessary for the external SSRAM workaround */
+    slotp->bss_ext_ates_active = 0;
+    slotp->bss_cmd_pointer = 0;
+    slotp->bss_cmd_shadow = 0;
+
+}
+
+/* 
+ * PCI_ADDR_SPACE_LIMITS_LOAD
+ *	Gets the current values of 
+ *		pci io base, 
+ *		pci io last,
+ *		pci low memory base,
+ *		pci low memory last,
+ *		pci high memory base,
+ * 		pci high memory last
+ */
+#define PCI_ADDR_SPACE_LIMITS_LOAD()			\
+    pci_io_fb = pcibr_soft->bs_spinfo.pci_io_base;	\
+    pci_io_fl = pcibr_soft->bs_spinfo.pci_io_last;	\
+    pci_lo_fb = pcibr_soft->bs_spinfo.pci_swin_base;	\
+    pci_lo_fl = pcibr_soft->bs_spinfo.pci_swin_last;	\
+    pci_hi_fb = pcibr_soft->bs_spinfo.pci_mem_base;	\
+    pci_hi_fl = pcibr_soft->bs_spinfo.pci_mem_last;
+/*
+ * PCI_ADDR_SPACE_LIMITS_STORE
+ *	Sets the current values of
+ *		pci io base, 
+ *		pci io last,
+ *		pci low memory base,
+ *		pci low memory last,
+ *		pci high memory base,
+ * 		pci high memory last
+ */
+#define PCI_ADDR_SPACE_LIMITS_STORE()			\
+    pcibr_soft->bs_spinfo.pci_io_base = pci_io_fb;	\
+    pcibr_soft->bs_spinfo.pci_io_last = pci_io_fl;	\
+    pcibr_soft->bs_spinfo.pci_swin_base = pci_lo_fb;	\
+    pcibr_soft->bs_spinfo.pci_swin_last = pci_lo_fl;	\
+    pcibr_soft->bs_spinfo.pci_mem_base = pci_hi_fb;	\
+    pcibr_soft->bs_spinfo.pci_mem_last = pci_hi_fl;
+
+#define PCI_ADDR_SPACE_LIMITS_PRINT()			\
+    printf("+++++++++++++++++++++++\n"			\
+	   "IO base 0x%x last 0x%x\n"			\
+	   "SWIN base 0x%x last 0x%x\n"			\
+	   "MEM base 0x%x last 0x%x\n"			\
+	   "+++++++++++++++++++++++\n",			\
+	   pcibr_soft->bs_spinfo.pci_io_base,		\
+	   pcibr_soft->bs_spinfo.pci_io_last,		\
+	   pcibr_soft->bs_spinfo.pci_swin_base,		\
+	   pcibr_soft->bs_spinfo.pci_swin_last,		\
+	   pcibr_soft->bs_spinfo.pci_mem_base,		\
+	   pcibr_soft->bs_spinfo.pci_mem_last);
+
+/*
+ * pcibr_slot_reset
+ *	Reset the pci device in the particular slot .
+ */
+int
+pcibr_slot_reset(devfs_handle_t pcibr_vhdl,pciio_slot_t slot)
+{
+	pcibr_soft_t		pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+	bridge_t		*bridge;
+	bridgereg_t		ctrlreg,tmp;
+	volatile bridgereg_t	*wrb_flush;
+
+	if (!PCIBR_VALID_SLOT(slot))
+		return(1);
+
+	if (!pcibr_soft)
+		return(1);
+
+	/* Enable the DMA operations from this device of the xtalk widget
+	 * (PCI host bridge in this case).
+	 */
+	xtalk_widgetdev_enable(pcibr_soft->bs_conn, slot);
+	/* Set the reset slot bit in the bridge's wid control register
+	 * to reset the pci slot 
+	 */
+	bridge = pcibr_soft->bs_base;
+	/* Read the bridge widget control and clear out the reset pin
+	 * bit for the corresponding slot. 
+	 */
+	tmp = ctrlreg = bridge->b_wid_control;
+	tmp &= ~BRIDGE_CTRL_RST_PIN(slot); 
+	bridge->b_wid_control = tmp;
+	tmp = bridge->b_wid_control;
+	/* Restore the old control register back.
+	 * NOTE : pci card gets reset when the reset pin bit
+	 * changes from 0 (set above) to 1 (going to be set now).
+	 */
+	bridge->b_wid_control = ctrlreg;
+
+	/* Flush the write buffers if any !! */
+	wrb_flush = &(bridge->b_wr_req_buf[slot].reg);
+	while (*wrb_flush);
+
+	return(0);
+}
+/*
+ * pcibr_slot_info_init
+ *	Probe for this slot and see if it is populated.
+ *	If it is populated initialize the generic pci infrastructural
+ * 	information associated with this particular pci device.
+ */
+int
+pcibr_slot_info_init(devfs_handle_t 	pcibr_vhdl,
+		     pciio_slot_t 	slot)
+{
+    pcibr_soft_t	    pcibr_soft;
+    pcibr_info_h	    pcibr_infoh;
+    pcibr_info_t	    pcibr_info;
+    bridge_t		   *bridge;
+    cfg_p                   cfgw;
+    unsigned                idword;
+    unsigned                pfail;
+    unsigned                idwords[8];
+    pciio_vendor_id_t       vendor;
+    pciio_device_id_t       device;
+    unsigned                htype;
+    cfg_p                   wptr;
+    int                     win;
+    pciio_space_t           space;
+    iopaddr_t		    pci_io_fb,	pci_io_fl;
+    iopaddr_t		    pci_lo_fb,  pci_lo_fl;
+    iopaddr_t		    pci_hi_fb,  pci_hi_fl;
+    int			    nfunc;
+    pciio_function_t	    rfunc;
+    int			    func;
+    devfs_handle_t	    conn_vhdl;
+    pcibr_soft_slot_t	    slotp;
+    
+    /* Get the basic software information required to proceed */
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    if (!pcibr_soft)
+	return(1);
+
+    bridge = pcibr_soft->bs_base;
+    if (!PCIBR_VALID_SLOT(slot))
+	return(1);
+
+    slotp = &pcibr_soft->bs_slot[slot];
+
+    /* Load the current values of allocated pci address spaces */
+    PCI_ADDR_SPACE_LIMITS_LOAD();
+
+    /* If we have a host slot (eg:- IOC3 has 2 pci slots and the initialization
+     * is done by the host slot then we are done.
+     */
+    if (pcibr_soft->bs_slot[slot].has_host)
+	return(0);
+    
+    /* Try to read the device-id/vendor-id from the config space */
+    cfgw = bridge->b_type0_cfg_dev[slot].l;
+
+#ifdef BRINGUP
+    if (slot < 3  || slot == 7) 
+	return (0);
+    else
+#endif /* BRINGUP */
+    if (pcibr_probe_slot(bridge, cfgw, &idword))
+	return(0);
+
+    vendor = 0xFFFF & idword;
+    /* If the vendor id is not valid then the slot is not populated
+     * and we are done.
+     */
+    if (vendor == 0xFFFF)
+	return(0);			/* next slot */
+    
+    device = 0xFFFF & (idword >> 16);
+    htype = do_pcibr_config_get(cfgw, PCI_CFG_HEADER_TYPE, 1);
+
+    nfunc = 1;
+    rfunc = PCIIO_FUNC_NONE;
+    pfail = 0;
+
+    /* NOTE: if a card claims to be multifunction
+     * but only responds to config space 0, treat
+     * it as a unifunction card.
+     */
+
+    if (htype & 0x80) {		/* MULTIFUNCTION */
+	for (func = 1; func < 8; ++func) {
+	    cfgw = bridge->b_type0_cfg_dev[slot].f[func].l;
+	    if (pcibr_probe_slot(bridge, cfgw, &idwords[func])) {
+		pfail |= 1 << func;
+		continue;
+	    }
+	    vendor = 0xFFFF & idwords[func];
+	    if (vendor == 0xFFFF) {
+		pfail |= 1 << func;
+		continue;
+	    }
+	    nfunc = func + 1;
+	    rfunc = 0;
+	}
+	cfgw = bridge->b_type0_cfg_dev[slot].l;
+    }
+    NEWA(pcibr_infoh, nfunc);
+    
+    pcibr_soft->bs_slot[slot].bss_ninfo = nfunc;
+    pcibr_soft->bs_slot[slot].bss_infos = pcibr_infoh;
+
+    for (func = 0; func < nfunc; ++func) {
+	unsigned                cmd_reg;
+	
+	if (func) {
+	    if (pfail & (1 << func))
+		continue;
+	    
+	    idword = idwords[func];
+	    cfgw = bridge->b_type0_cfg_dev[slot].f[func].l;
+	    
+	    device = 0xFFFF & (idword >> 16);
+	    htype = do_pcibr_config_get(cfgw, PCI_CFG_HEADER_TYPE, 1);
+	    rfunc = func;
+	}
+	htype &= 0x7f;
+	if (htype != 0x00) {
+	    PRINT_WARNING("%s pcibr: pci slot %d func %d has strange header type 0x%x\n",
+		    pcibr_soft->bs_name, slot, func, htype);
+	    continue;
+	}
+#if DEBUG && ATTACH_DEBUG
+	PRINT_NOTICE( 
+		"%s pcibr: pci slot %d func %d: vendor 0x%x device 0x%x",
+		pcibr_soft->bs_name, slot, func, vendor, device);
+#endif	
+
+	pcibr_info = pcibr_device_info_new
+	    (pcibr_soft, slot, rfunc, vendor, device);
+	conn_vhdl = pciio_device_info_register(pcibr_vhdl, &pcibr_info->f_c);
+	if (func == 0)
+	    slotp->slot_conn = conn_vhdl;
+	
+	cmd_reg = cfgw[PCI_CFG_COMMAND / 4];
+	
+	wptr = cfgw + PCI_CFG_BASE_ADDR_0 / 4;
+
+
+	for (win = 0; win < PCI_CFG_BASE_ADDRS; ++win) {
+	    iopaddr_t               base, mask, code;
+	    size_t                  size;
+
+	    /*
+	     * GET THE BASE & SIZE OF THIS WINDOW:
+	     *
+	     * The low two or four bits of the BASE register
+	     * determines which address space we are in; the
+	     * rest is a base address. BASE registers
+	     * determine windows that are power-of-two sized
+	     * and naturally aligned, so we can get the size
+	     * of a window by writing all-ones to the
+	     * register, reading it back, and seeing which
+	     * bits are used for decode; the least
+	     * significant nonzero bit is also the size of
+	     * the window.
+	     *
+	     * WARNING: someone may already have allocated
+	     * some PCI space to this window, and in fact
+	     * PIO may be in process at this very moment
+	     * from another processor (or even from this
+	     * one, if we get interrupted)! So, if the BASE
+	     * already has a nonzero address, be generous
+	     * and use the LSBit of that address as the
+	     * size; this could overstate the window size.
+	     * Usually, when one card is set up, all are set
+	     * up; so, since we don't bitch about
+	     * overlapping windows, we are ok.
+	     *
+	     * UNFORTUNATELY, some cards do not clear their
+	     * BASE registers on reset. I have two heuristics
+	     * that can detect such cards: first, if the
+	     * decode enable is turned off for the space
+	     * that the window uses, we can disregard the
+	     * initial value. second, if the address is
+	     * outside the range that we use, we can disregard
+	     * it as well.
+	     *
+	     * This is looking very PCI generic. Except for
+	     * knowing how many slots and where their config
+	     * spaces are, this window loop and the next one
+	     * could probably be shared with other PCI host
+	     * adapters. It would be interesting to see if
+	     * this could be pushed up into pciio, when we
+	     * start supporting more PCI providers.
+	     */
+#ifdef LITTLE_ENDIAN
+	    base = wptr[((win*4)^4)/4];
+#else
+	    base = wptr[win];
+#endif /* LITTLE_ENDIAN */
+
+	    if (base & 1) {
+		/* BASE is in I/O space. */
+		space = PCIIO_SPACE_IO;
+		mask = -4;
+		code = base & 3;
+		base = base & mask;
+		if (base == 0) {
+		    ;		/* not assigned */
+		} else if (!(cmd_reg & PCI_CMD_IO_SPACE)) {
+		    base = 0;	/* decode not enabled */
+		}
+	    } else {
+		/* BASE is in MEM space. */
+		space = PCIIO_SPACE_MEM;
+		mask = -16;
+		code = base & 15;
+		base = base & mask;
+		if (base == 0) {
+		    ;		/* not assigned */
+		} else if (!(cmd_reg & PCI_CMD_MEM_SPACE)) {
+		    base = 0;	/* decode not enabled */
+		} else if (base & 0xC0000000) {
+		    base = 0;	/* outside permissable range */
+		} else if ((code == PCI_BA_MEM_64BIT) &&
+#ifdef LITTLE_ENDIAN
+			   (wptr[(((win + 1)*4)^4)/4] != 0)) {
+#else 
+			   (wptr[win + 1] != 0)) {
+#endif /* LITTLE_ENDIAN */
+		    base = 0;	/* outside permissable range */
+		}
+	    }
+
+	    if (base != 0) {	/* estimate size */
+		size = base & -base;
+	    } else {		/* calculate size */
+#ifdef LITTLE_ENDIAN
+		wptr[((win*4)^4)/4] = ~0;	/* turn on all bits */
+		size = wptr[((win*4)^4)/4];	/* get stored bits */
+#else 
+		wptr[win] = ~0;	/* turn on all bits */
+		size = wptr[win];	/* get stored bits */
+#endif /* LITTLE_ENDIAN */
+		size &= mask;	/* keep addr */
+		size &= -size;	/* keep lsbit */
+		if (size == 0)
+		    continue;
+	    }	
+
+	    pcibr_info->f_window[win].w_space = space;
+	    pcibr_info->f_window[win].w_base = base;
+	    pcibr_info->f_window[win].w_size = size;
+
+	    /*
+	     * If this window already has PCI space
+	     * allocated for it, "subtract" that space from
+	     * our running freeblocks. Don't worry about
+	     * overlaps in existing allocated windows; we
+	     * may be overstating their sizes anyway.
+	     */
+
+	    if (base && size) {
+		if (space == PCIIO_SPACE_IO) {
+		    pcibr_freeblock_sub(&pci_io_fb,
+					&pci_io_fl,
+					base, size);
+		} else {
+		    pcibr_freeblock_sub(&pci_lo_fb,
+					&pci_lo_fl,
+					base, size);
+		    pcibr_freeblock_sub(&pci_hi_fb,
+					&pci_hi_fl,
+					base, size);
+		}	
+	    }
+#if defined(IOC3_VENDOR_ID_NUM) && defined(IOC3_DEVICE_ID_NUM)
+	    /*
+	     * IOC3 BASE_ADDR* BUG WORKAROUND
+	     *
+	     
+	     * If we write to BASE1 on the IOC3, the
+	     * data in BASE0 is replaced. The
+	     * original workaround was to remember
+	     * the value of BASE0 and restore it
+	     * when we ran off the end of the BASE
+	     * registers; however, a later
+	     * workaround was added (I think it was
+	     * rev 1.44) to avoid setting up
+	     * anything but BASE0, with the comment
+	     * that writing all ones to BASE1 set
+	     * the enable-parity-error test feature
+	     * in IOC3's SCR bit 14.
+	     *
+	     * So, unless we defer doing any PCI
+	     * space allocation until drivers
+	     * attach, and set up a way for drivers
+	     * (the IOC3 in paricular) to tell us
+	     * generically to keep our hands off
+	     * BASE registers, we gotta "know" about
+	     * the IOC3 here.
+	     *
+	     * Too bad the PCI folks didn't reserve the
+	     * all-zero value for 'no BASE here' (it is a
+	     * valid code for an uninitialized BASE in
+	     * 32-bit PCI memory space).
+	     */
+	    
+	    if ((vendor == IOC3_VENDOR_ID_NUM) &&
+		(device == IOC3_DEVICE_ID_NUM))
+		break;
+#endif
+	    if (code == PCI_BA_MEM_64BIT) {
+		win++;		/* skip upper half */
+#ifdef LITTLE_ENDIAN
+		wptr[((win*4)^4)/4] = 0;	/* which must be zero */
+#else 
+		wptr[win] = 0;	/* which must be zero */
+#endif /* LITTLE_ENDIAN */
+	    }
+	}				/* next win */
+    }				/* next func */
+
+    /* Store back the values for allocated pci address spaces */
+    PCI_ADDR_SPACE_LIMITS_STORE();
+    return(0);
+}					
+
+/*
+ * pcibr_slot_info_free
+ *	Remove all the pci infrastructural information associated
+ * 	with a particular pci device.
+ */
+int
+pcibr_slot_info_free(devfs_handle_t 	pcibr_vhdl,
+		     pciio_slot_t	slot)
+{
+    pcibr_soft_t	pcibr_soft;
+    pcibr_info_h	pcibr_infoh;
+    int			nfunc;
+#if defined(PCI_HOTSWAP_DEBUG)
+    cfg_p		cfgw;
+    bridge_t		*bridge;
+    int			win;
+    cfg_p		wptr;
+#endif /* PCI_HOTSWAP_DEBUG */
+
+    
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    if (!pcibr_soft || !PCIBR_VALID_SLOT(slot))
+	return(1);
+
+#if defined(PCI_HOTSWAP_DEBUG)
+    /* Clean out all the base registers */
+    bridge = pcibr_soft->bs_base;
+    cfgw = bridge->b_type0_cfg_dev[slot].l;
+    wptr = cfgw + PCI_CFG_BASE_ADDR_0 / 4;
+    
+    for (win = 0; win < PCI_CFG_BASE_ADDRS; ++win) 
+#ifdef LITTLE_ENDIAN
+	wptr[((win*4)^4)/4] = 0;
+#else
+	wptr[win] = 0;
+#endif  /* LITTLE_ENDIAN */
+#endif /* PCI_HOTSWAP_DEBUG */
+
+    nfunc = pcibr_soft->bs_slot[slot].bss_ninfo;
+
+    pcibr_device_info_free(pcibr_vhdl, slot);
+
+    pcibr_infoh = pcibr_soft->bs_slot[slot].bss_infos;
+    DELA(pcibr_infoh,nfunc);
+    pcibr_soft->bs_slot[slot].bss_ninfo = 0;
+
+    return(0);
+    
+
+}
+int as_debug = 0;
+/*
+ * pcibr_slot_addr_space_init
+ *	Reserve chunks of pci address space as required by 
+ * 	the base registers in the card.
+ */
+int
+pcibr_slot_addr_space_init(devfs_handle_t pcibr_vhdl,
+			   pciio_slot_t	slot)
+{
+    pcibr_soft_t	pcibr_soft;
+    pcibr_info_h	pcibr_infoh;
+    pcibr_info_t	pcibr_info;
+    bridge_t		*bridge;
+    iopaddr_t		pci_io_fb, pci_io_fl;
+    iopaddr_t		pci_lo_fb, pci_lo_fl;
+    iopaddr_t		pci_hi_fb, pci_hi_fl;
+    size_t              align;
+    iopaddr_t           mask;
+    int		       	nfunc;
+    int			func;
+    int			win;
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    if (!pcibr_soft || !PCIBR_VALID_SLOT(slot))
+	return(1);
+
+    bridge = pcibr_soft->bs_base;
+
+    /* Get the current values for the allocated pci address spaces */
+    PCI_ADDR_SPACE_LIMITS_LOAD();
+
+    if (as_debug)
+#ifdef colin
+    PCI_ADDR_SPACE_LIMITS_PRINT();
+#endif
+    /* allocate address space,
+     * for windows that have not been
+     * previously assigned.
+     */
+
+    if (pcibr_soft->bs_slot[slot].has_host)
+	return(0);
+
+    nfunc = pcibr_soft->bs_slot[slot].bss_ninfo;
+    if (nfunc < 1)
+	return(0);
+
+    pcibr_infoh = pcibr_soft->bs_slot[slot].bss_infos;
+    if (!pcibr_infoh)
+	return(0);
+
+    /*
+     * Try to make the DevIO windows not
+     * overlap by pushing the "io" and "hi"
+     * allocation areas up to the next one
+     * or two megabyte bound. This also
+     * keeps them from being zero.
+     *
+     * DO NOT do this with "pci_lo" since
+     * the entire "lo" area is only a
+     * megabyte, total ...
+     */
+    align = (slot < 2) ? 0x200000 : 0x100000;
+    mask = -align;
+    pci_io_fb = (pci_io_fb + align - 1) & mask;
+    pci_hi_fb = (pci_hi_fb + align - 1) & mask;
+
+    for (func = 0; func < nfunc; ++func) {
+	cfg_p                   cfgw;
+	cfg_p                   wptr;
+	pciio_space_t           space;
+	iopaddr_t               base;
+	size_t                  size;
+	cfg_p                   pci_cfg_cmd_reg_p;
+	unsigned                pci_cfg_cmd_reg;
+	unsigned                pci_cfg_cmd_reg_add = 0;
+
+	pcibr_info = pcibr_infoh[func];
+
+	if (!pcibr_info)
+	    continue;
+
+	if (pcibr_info->f_vendor == PCIIO_VENDOR_ID_NONE)
+	    continue;
+	
+	cfgw = bridge->b_type0_cfg_dev[slot].f[func].l;
+	wptr = cfgw + PCI_CFG_BASE_ADDR_0 / 4;
+
+	for (win = 0; win < PCI_CFG_BASE_ADDRS; ++win) {
+
+	    space = pcibr_info->f_window[win].w_space;
+	    base = pcibr_info->f_window[win].w_base;
+	    size = pcibr_info->f_window[win].w_size;
+	    
+	    if (size < 1)
+		continue;
+
+	    if (base >= size) {
+#if DEBUG && PCI_DEBUG
+		printk("pcibr: slot %d func %d window %d is in %d[0x%x..0x%x], alloc by prom\n",
+			slot, func, win, space, base, base + size - 1);
+#endif
+		continue;		/* already allocated */
+	    }
+	    align = size;		/* ie. 0x00001000 */
+	    if (align < _PAGESZ)
+		align = _PAGESZ;	/* ie. 0x00004000 */
+	    mask = -align;		/* ie. 0xFFFFC000 */
+
+	    switch (space) {
+	    case PCIIO_SPACE_IO:
+		base = (pci_io_fb + align - 1) & mask;
+		if ((base + size) > pci_io_fl) {
+		    base = 0;
+		    break;
+		}
+		pci_io_fb = base + size;
+		break;
+		
+	    case PCIIO_SPACE_MEM:
+#ifdef LITTLE_ENDIAN
+		if ((wptr[((win*4)^4)/4] & PCI_BA_MEM_LOCATION) ==
+#else
+		if ((wptr[win] & PCI_BA_MEM_LOCATION) ==
+#endif  /* LITTLE_ENDIAN */
+		    PCI_BA_MEM_1MEG) {
+		    /* allocate from 20-bit PCI space */
+		    base = (pci_lo_fb + align - 1) & mask;
+		    if ((base + size) > pci_lo_fl) {
+			base = 0;
+			break;
+		    }
+		    pci_lo_fb = base + size;
+		} else {
+		    /* allocate from 32-bit or 64-bit PCI space */
+		    base = (pci_hi_fb + align - 1) & mask;
+		    if ((base + size) > pci_hi_fl) {
+			base = 0;
+			break;
+		    }
+		    pci_hi_fb = base + size;
+		}
+		break;
+		
+	    default:
+		base = 0;
+#if DEBUG && PCI_DEBUG
+		printk("pcibr: slot %d window %d had bad space code %d\n",
+			slot, win, space);
+#endif
+	    }
+	    pcibr_info->f_window[win].w_base = base;
+#ifdef LITTLE_ENDIAN
+	    wptr[((win*4)^4)/4] = base;
+		printk("Setting base address 0x%p base 0x%x\n", &(wptr[((win*4)^4)/4]), base);
+#else
+	    wptr[win] = base;
+#endif  /* LITTLE_ENDIAN */
+
+#if DEBUG && PCI_DEBUG
+	    if (base >= size)
+		printk("pcibr: slot %d func %d window %d is in %d [0x%x..0x%x], alloc by pcibr\n",
+			slot, func, win, space, base, base + size - 1);
+	    else
+		printk("pcibr: slot %d func %d window %d, unable to alloc 0x%x in 0x%p\n",
+			slot, func, win, size, space);
+#endif
+	}				/* next base */
+
+	/*
+	 * Allocate space for the EXPANSION ROM
+	 * NOTE: DO NOT DO THIS ON AN IOC3,
+	 * as it blows the system away.
+	 */
+	base = size = 0;
+	if ((pcibr_soft->bs_slot[slot].bss_vendor_id != IOC3_VENDOR_ID_NUM) ||
+	    (pcibr_soft->bs_slot[slot].bss_device_id != IOC3_DEVICE_ID_NUM)) {
+
+	    wptr = cfgw + PCI_EXPANSION_ROM / 4;
+#ifdef LITTLE_ENDIAN
+	    wptr[1] = 0xFFFFF000;
+	    mask = wptr[1];
+#else
+	    *wptr = 0xFFFFF000;
+	    mask = *wptr;
+#endif  /* LITTLE_ENDIAN */
+	    if (mask & 0xFFFFF000) {
+		size = mask & -mask;
+		align = size;
+		if (align < _PAGESZ)
+		    align = _PAGESZ;
+		mask = -align;
+		base = (pci_hi_fb + align - 1) & mask;
+		if ((base + size) > pci_hi_fl)
+		    base = size = 0;
+		else {
+		    pci_hi_fb = base + size;
+#ifdef LITTLE_ENDIAN
+		    wptr[1] = base;
+#else
+		    *wptr = base;
+#endif  /* LITTLE_ENDIAN */
+#if DEBUG && PCI_DEBUG
+		    printk("%s/%d ROM in 0x%lx..0x%lx (alloc by pcibr)\n",
+			    pcibr_soft->bs_name, slot,
+			    base, base + size - 1);
+#endif
+		}
+	    }
+	}
+	pcibr_info->f_rbase = base;
+	pcibr_info->f_rsize = size;
+	
+	/*
+	 * if necessary, update the board's
+	 * command register to enable decoding
+	 * in the windows we added.
+	 *
+	 * There are some bits we always want to
+	 * be sure are set.
+	 */
+	pci_cfg_cmd_reg_add |= PCI_CMD_IO_SPACE;
+	pci_cfg_cmd_reg_add |= PCI_CMD_MEM_SPACE;
+	pci_cfg_cmd_reg_add |= PCI_CMD_BUS_MASTER;
+
+	pci_cfg_cmd_reg_p = cfgw + PCI_CFG_COMMAND / 4;
+	pci_cfg_cmd_reg = *pci_cfg_cmd_reg_p;
+#if PCI_FBBE	/* XXX- check here to see if dev can do fast-back-to-back */
+	if (!((pci_cfg_cmd_reg >> 16) & PCI_STAT_F_BK_BK_CAP))
+	    fast_back_to_back_enable = 0;
+#endif
+	pci_cfg_cmd_reg &= 0xFFFF;
+	if (pci_cfg_cmd_reg_add & ~pci_cfg_cmd_reg)
+	    *pci_cfg_cmd_reg_p = pci_cfg_cmd_reg | pci_cfg_cmd_reg_add;
+	
+    }				/* next func */
+
+    /* Now that we have allocated new chunks of pci address spaces to this
+     * card we need to update the bookkeeping values which indicate
+     * the current pci address space allocations.
+     */
+    PCI_ADDR_SPACE_LIMITS_STORE();
+    return(0);
+}
+/*
+ * pcibr_slot_device_init
+ * 	Setup the device register in the bridge for this pci slot.
+ */
+int
+pcibr_slot_device_init(devfs_handle_t 	pcibr_vhdl,
+		       pciio_slot_t    	slot)
+{
+    pcibr_soft_t	pcibr_soft;
+    bridge_t		*bridge;
+    bridgereg_t		devreg;
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    if (!pcibr_soft || !PCIBR_VALID_SLOT(slot))
+	return(1);
+
+    bridge = pcibr_soft->bs_base;
+
+    /*
+     * Adjustments to Device(x)
+     * and init of bss_device shadow
+     */
+    devreg = bridge->b_device[slot].reg;
+    devreg &= ~BRIDGE_DEV_PAGE_CHK_DIS;
+    devreg |= BRIDGE_DEV_COH | BRIDGE_DEV_VIRTUAL_EN;
+#ifdef LITTLE_ENDIAN
+    devreg |= BRIDGE_DEV_DEV_SWAP;
+#endif
+    pcibr_soft->bs_slot[slot].bss_device = devreg;
+    bridge->b_device[slot].reg = devreg;
+
+#if DEBUG && PCI_DEBUG
+	printk("pcibr Device(%d): 0x%lx\n", slot, bridge->b_device[slot].reg);
+#endif
+
+#if DEBUG && PCI_DEBUG
+    printk("pcibr: PCI space allocation done.\n");
+#endif
+	
+    return(0);
+}
+
+/*
+ * pcibr_slot_guest_info_init
+ *	Setup the host/guest relations for a pci slot.
+ */
+int
+pcibr_slot_guest_info_init(devfs_handle_t pcibr_vhdl,
+			   pciio_slot_t	slot)
+{
+    pcibr_soft_t	pcibr_soft;
+    pcibr_info_h	pcibr_infoh;
+    pcibr_info_t	pcibr_info;
+    pcibr_soft_slot_t	slotp;
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+
+    if (!pcibr_soft || !PCIBR_VALID_SLOT(slot))
+	return(1);
+
+    slotp = &pcibr_soft->bs_slot[slot];
+
+    /* create info and verticies for guest slots;
+     * for compatibilitiy macros, create info
+     * for even unpopulated slots (but do not
+     * build verticies for them).
+     */
+    if (pcibr_soft->bs_slot[slot].bss_ninfo < 1) {
+	NEWA(pcibr_infoh, 1);
+	pcibr_soft->bs_slot[slot].bss_ninfo = 1;
+	pcibr_soft->bs_slot[slot].bss_infos = pcibr_infoh;
+
+	pcibr_info = pcibr_device_info_new
+	    (pcibr_soft, slot, PCIIO_FUNC_NONE,
+	     PCIIO_VENDOR_ID_NONE, PCIIO_DEVICE_ID_NONE);
+
+	if (pcibr_soft->bs_slot[slot].has_host) {
+	    slotp->slot_conn = pciio_device_info_register
+		(pcibr_vhdl, &pcibr_info->f_c);
+	}
+    }
+
+    /* generate host/guest relations
+     */
+    if (pcibr_soft->bs_slot[slot].has_host) {
+	int  host = pcibr_soft->bs_slot[slot].host_slot;
+	pcibr_soft_slot_t host_slotp = &pcibr_soft->bs_slot[host];
+
+	hwgraph_edge_add(slotp->slot_conn,
+			 host_slotp->slot_conn,
+			 EDGE_LBL_HOST);
+
+	/* XXX- only gives us one guest edge per
+	 * host. If/when we have a host with more than
+	 * one guest, we will need to figure out how
+	 * the host finds all its guests, and sorts
+	 * out which one is which.
+	 */
+	hwgraph_edge_add(host_slotp->slot_conn,
+			 slotp->slot_conn,
+			 EDGE_LBL_GUEST);
+    }
+
+    return(0);
+}
+/*
+ * pcibr_slot_initial_rrb_alloc
+ *	Allocate a default number of rrbs for this slot on 
+ * 	the two channels. This is dictated by the rrb allocation
+ * 	strategy routine defined per platform.
+ */
+
+int
+pcibr_slot_initial_rrb_alloc(devfs_handle_t 	pcibr_vhdl,
+			     pciio_slot_t	slot)
+
+{
+    pcibr_soft_t	pcibr_soft;
+    pcibr_info_h	pcibr_infoh;
+    pcibr_info_t	pcibr_info;
+    bridge_t		*bridge;
+    int                 c0, c1;
+    int			r;
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    if (!pcibr_soft || !PCIBR_VALID_SLOT(slot))
+	return(1);
+
+    bridge = pcibr_soft->bs_base;
+
+
+    /* How may RRBs are on this slot?
+     */
+    c0 = do_pcibr_rrb_count_valid(bridge, slot);
+    c1 = do_pcibr_rrb_count_valid(bridge, slot + PCIBR_RRB_SLOT_VIRTUAL);
+#if PCIBR_RRB_DEBUG
+    printk("pcibr_attach: slot %d started with %d+%d\n", slot, c0, c1);
+#endif
+
+    /* Do we really need any?
+     */
+    pcibr_infoh = pcibr_soft->bs_slot[slot].bss_infos;
+    pcibr_info = pcibr_infoh[0];
+    if ((pcibr_info->f_vendor == PCIIO_VENDOR_ID_NONE) &&
+	!pcibr_soft->bs_slot[slot].has_host) {
+	if (c0 > 0)
+	    do_pcibr_rrb_free(bridge, slot, c0);
+	if (c1 > 0)
+	    do_pcibr_rrb_free(bridge, slot + PCIBR_RRB_SLOT_VIRTUAL, c1);
+	pcibr_soft->bs_rrb_valid[slot] = 0x1000;
+	pcibr_soft->bs_rrb_valid[slot + PCIBR_RRB_SLOT_VIRTUAL] = 0x1000;
+	return(0);
+    }
+
+    pcibr_soft->bs_rrb_avail[slot & 1] -= c0 + c1;
+    pcibr_soft->bs_rrb_valid[slot] = c0;
+    pcibr_soft->bs_rrb_valid[slot + PCIBR_RRB_SLOT_VIRTUAL] = c1;
+
+    pcibr_soft->bs_rrb_avail[0] = do_pcibr_rrb_count_avail(bridge, 0);
+    pcibr_soft->bs_rrb_avail[1] = do_pcibr_rrb_count_avail(bridge, 1);
+
+    r = 3 - (c0 + c1);
+
+    if (r > 0) {
+	pcibr_soft->bs_rrb_res[slot] = r;
+	pcibr_soft->bs_rrb_avail[slot & 1] -= r;
+    }
+
+#if PCIBR_RRB_DEBUG
+    printk("\t%d+%d+%d",
+	    0xFFF & pcibr_soft->bs_rrb_valid[slot],
+	    0xFFF & pcibr_soft->bs_rrb_valid[slot + PCIBR_RRB_SLOT_VIRTUAL],
+	    pcibr_soft->bs_rrb_res[slot]);
+    printk("\n");
+#endif
+    return(0);
+}
+
+/*
+ * pcibr_slot_call_device_attach
+ *	This calls the associated driver attach routine for the pci
+ * 	card in this slot.
+ */
+int
+pcibr_slot_call_device_attach(devfs_handle_t	pcibr_vhdl,
+			      pciio_slot_t	slot)
+{
+    pcibr_soft_t	pcibr_soft;
+    pcibr_info_h	pcibr_infoh;
+    pcibr_info_t	pcibr_info;
+    async_attach_t	aa = NULL;
+    int			func;
+    devfs_handle_t	xconn_vhdl,conn_vhdl;
+    int			nfunc;
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    if (!pcibr_soft || !PCIBR_VALID_SLOT(slot))
+	return(1);
+
+
+    if (pcibr_soft->bs_slot[slot].has_host)
+        return(0);
+    
+    xconn_vhdl = pcibr_soft->bs_conn;
+    aa = async_attach_get_info(xconn_vhdl);
+
+    nfunc = pcibr_soft->bs_slot[slot].bss_ninfo;
+    pcibr_infoh = pcibr_soft->bs_slot[slot].bss_infos;
+
+    printk("\npcibr_slot_call_device_attach: link 0x%p pci bus 0x%p slot %d\n", xconn_vhdl, pcibr_vhdl, slot);
+
+    for (func = 0; func < nfunc; ++func) {
+
+	pcibr_info = pcibr_infoh[func];
+	
+	if (!pcibr_info)
+	    continue;
+
+	if (pcibr_info->f_vendor == PCIIO_VENDOR_ID_NONE)
+	    continue;
+
+	conn_vhdl = pcibr_info->f_vertex;
+
+	/* If the pci device has been disabled in the prom,
+	 * do not set it up for driver attach. NOTE: usrpci
+	 * and pciba will not "see" this connection point!
+	 */
+	if (device_admin_info_get(conn_vhdl, ADMIN_LBL_DISABLED)) {
+#ifdef SUPPORT_PRINTING_V_FORMAT
+	    PRINT_WARNING( "pcibr_slot_call_device_attach: %v disabled\n", 
+		    conn_vhdl);
+#endif
+	    continue;
+	}
+	if (aa)
+	    async_attach_add_info(conn_vhdl, aa);
+	pciio_device_attach(conn_vhdl);
+    }				/* next func */
+
+    printk("\npcibr_slot_call_device_attach: DONE\n");
+
+    return(0);
+}
+/*
+ * pcibr_slot_call_device_detach
+ *	This calls the associated driver detach routine for the pci
+ * 	card in this slot.
+ */
+int
+pcibr_slot_call_device_detach(devfs_handle_t	pcibr_vhdl,
+			      pciio_slot_t	slot)
+{
+    pcibr_soft_t	pcibr_soft;
+    pcibr_info_h	pcibr_infoh;
+    pcibr_info_t	pcibr_info;
+    int			func;
+    devfs_handle_t	conn_vhdl;
+    int			nfunc;
+    int			ndetach = 1;
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    if (!pcibr_soft || !PCIBR_VALID_SLOT(slot))
+	return(1);
+
+
+    if (pcibr_soft->bs_slot[slot].has_host)
+        return(0);
+    
+
+    nfunc = pcibr_soft->bs_slot[slot].bss_ninfo;
+    pcibr_infoh = pcibr_soft->bs_slot[slot].bss_infos;
+
+    for (func = 0; func < nfunc; ++func) {
+
+	pcibr_info = pcibr_infoh[func];
+	
+	if (!pcibr_info)
+	    continue;
+
+	if (pcibr_info->f_vendor == PCIIO_VENDOR_ID_NONE)
+	    continue;
+
+	conn_vhdl = pcibr_info->f_vertex;
+	
+	/* Make sure that we do not detach a system critical device
+	 * vertex.
+	 */
+	if (is_sys_critical_vertex(conn_vhdl)) {
+#ifdef SUPPORT_PRINTING_V_FORMAT
+	    PRINT_WARNING( "%v is a system critical device vertex\n",
+		    conn_vhdl);
+#endif
+	    continue;
+	}
+	
+	ndetach = 0;
+	pciio_device_detach(conn_vhdl);
+    }				/* next func */
+
+
+    return(ndetach);
+}
+
+/*
+ * pcibr_device_attach
+ *	This is a place holder routine to keep track of all the
+ *	slot-specific initialization that needs to be done.
+ *	This is usually called when we want to initialize a new
+ * 	pci card on the bus.
+ */
+int
+pcibr_device_attach(devfs_handle_t 	pcibr_vhdl,
+		    pciio_slot_t	slot)
+{
+    return (
+	    /* Reset the slot */
+	    pcibr_slot_reset(pcibr_vhdl,slot)			||
+	    /* FInd out what is out there */
+	    pcibr_slot_info_init(pcibr_vhdl,slot)		||
+
+	    /* Set up the address space for this slot in the pci land */
+	    pcibr_slot_addr_space_init(pcibr_vhdl,slot) 	||
+
+	    /* Setup the device register */
+	    pcibr_slot_device_init(pcibr_vhdl, slot)		||
+
+	    /* Setup host/guest relations */
+	    pcibr_slot_guest_info_init(pcibr_vhdl,slot)		||
+
+	    /* Initial RRB management */
+	    pcibr_slot_initial_rrb_alloc(pcibr_vhdl,slot)	||
+
+	    /* Call the device attach */
+	    pcibr_slot_call_device_attach(pcibr_vhdl,slot)
+	    );
+
+}
+/*
+ * pcibr_device_detach
+ *	This is a place holder routine to keep track of all the
+ *	slot-specific freeing that needs to be done.
+ */
+int
+pcibr_device_detach(devfs_handle_t 	pcibr_vhdl,
+		    pciio_slot_t	slot)
+{
+    
+    /* Call the device detach */
+    return (pcibr_slot_call_device_detach(pcibr_vhdl,slot));
+
+}
+/*
+ * pcibr_device_unregister
+ *	This frees up any hardware resources reserved for this pci device
+ * 	and removes any pci infrastructural information setup for it.
+ *	This is usually used at the time of shutting down of the pci card.
+ */
+void
+pcibr_device_unregister(devfs_handle_t pconn_vhdl)
+{
+    pciio_info_t	pciio_info;
+    devfs_handle_t	pcibr_vhdl;
+    pciio_slot_t	slot;
+    pcibr_soft_t	pcibr_soft;
+    bridge_t		*bridge;
+
+    pciio_info = pciio_info_get(pconn_vhdl);
+
+    /* Detach the pciba name space */
+    pciio_device_detach(pconn_vhdl);
+
+    pcibr_vhdl = pciio_info_master_get(pciio_info);
+    slot = pciio_info_slot_get(pciio_info);
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    bridge = pcibr_soft->bs_base;
+
+    /* Clear all the hardware xtalk resources for this device */
+    xtalk_widgetdev_shutdown(pcibr_soft->bs_conn, slot);
+
+    /* Flush all the rrbs */
+    pcibr_rrb_flush(pconn_vhdl);
+
+    /* Free the rrbs allocated to this slot */
+    do_pcibr_rrb_free(bridge, slot, 
+		      pcibr_soft->bs_rrb_valid[slot] +
+		      pcibr_soft->bs_rrb_valid[slot + PCIBR_RRB_SLOT_VIRTUAL]);
+
+
+    pcibr_soft->bs_rrb_valid[slot] = 0;
+    pcibr_soft->bs_rrb_valid[slot + PCIBR_RRB_SLOT_VIRTUAL] = 0;
+    pcibr_soft->bs_rrb_res[slot] = 0;
+
+    /* Flush the write buffers !! */
+    (void)pcibr_wrb_flush(pconn_vhdl);
+    /* Clear the information specific to the slot */
+    (void)pcibr_slot_info_free(pcibr_vhdl, slot);
+    
+}
+
+/* 
+ * build a convenience link path in the
+ * form of ".../<iobrick>/bus/<busnum>"
+ * 
+ * returns 1 on success, 0 otherwise
+ *
+ * depends on hwgraph separator == '/'
+ */
+int
+pcibr_bus_cnvlink(devfs_handle_t f_c, int slot)
+{
+        char dst[MAXDEVNAME];
+	char *dp = dst;
+        char *cp, *xp;
+        int widgetnum;
+        char pcibus[8];
+	devfs_handle_t nvtx, svtx;
+	int rv;
+
+#if DEBUG
+	printk("pcibr_bus_cnvlink: slot= %d f_c= %p\n", 
+		slot, f_c);
+	{
+		int pos;
+		char dname[256];
+		pos = devfs_generate_path(f_c, dname, 256);
+		printk("%s : path= %s\n", __FUNCTION__, &dname[pos]);
+	}
+#endif
+
+	if (GRAPH_SUCCESS != hwgraph_vertex_name_get(f_c, dst, MAXDEVNAME))
+		return 0;
+
+	/* dst example == /hw/module/001c02/Pbrick/xtalk/8/pci/direct */
+
+	/* find the widget number */
+	xp = strstr(dst, "/"EDGE_LBL_XTALK"/");
+	if (xp == NULL)
+		return 0;
+	widgetnum = atoi(xp+7);
+	if (widgetnum < XBOW_PORT_8 || widgetnum > XBOW_PORT_F)
+		return 0;
+
+	/* remove "/pci/direct" from path */
+	cp = strstr(dst, "/" EDGE_LBL_PCI "/" "direct");
+	if (cp == NULL)
+		return 0;
+	*cp = (char)NULL;
+
+	/* get the vertex for the widget */
+	if (GRAPH_SUCCESS != hwgraph_traverse(NULL, dp, &svtx))	
+		return 0;
+
+	*xp = (char)NULL;		/* remove "/xtalk/..." from path */
+
+	/* dst example now == /hw/module/001c02/Pbrick */
+
+	/* get the bus number */
+        strcat(dst, "/bus");
+        sprintf(pcibus, "%d", p_busnum[widgetnum]);
+
+	/* link to bus to widget */
+	rv = hwgraph_path_add(NULL, dp, &nvtx);
+	if (GRAPH_SUCCESS == rv)
+		rv = hwgraph_edge_add(nvtx, svtx, pcibus);
+
+	return (rv == GRAPH_SUCCESS);
+}
+
+/*
+ *    pcibr_attach: called every time the crosstalk
+ *      infrastructure is asked to initialize a widget
+ *      that matches the part number we handed to the
+ *      registration routine above.
+ */
+/*ARGSUSED */
+int
+pcibr_attach(devfs_handle_t xconn_vhdl)
+{
+    /* REFERENCED */
+    graph_error_t           rc;
+    devfs_handle_t            pcibr_vhdl;
+    devfs_handle_t            ctlr_vhdl;
+    bridge_t               *bridge = NULL;
+    bridgereg_t             id;
+    int                     rev;
+    pcibr_soft_t            pcibr_soft;
+    pcibr_info_t            pcibr_info;
+    xwidget_info_t          info;
+    xtalk_intr_t            xtalk_intr;
+    device_desc_t           dev_desc;
+    int                     slot;
+    int                     ibit;
+    devfs_handle_t            noslot_conn;
+    char                    devnm[MAXDEVNAME], *s;
+    pcibr_hints_t           pcibr_hints;
+    bridgereg_t             b_int_enable;
+    unsigned                rrb_fixed = 0;
+
+    iopaddr_t               pci_io_fb, pci_io_fl;
+    iopaddr_t               pci_lo_fb, pci_lo_fl;
+    iopaddr_t               pci_hi_fb, pci_hi_fl;
+
+    int                     spl_level;
+    char		    *nicinfo = (char *)0;
+
+#if PCI_FBBE
+    int                     fast_back_to_back_enable;
+#endif
+
+    async_attach_t          aa = NULL;
+
+    aa = async_attach_get_info(xconn_vhdl);
+
+#if DEBUG && ATTACH_DEBUG
+    printk("pcibr_attach: xconn_vhdl=  %p\n", xconn_vhdl);
+    {
+	int pos;
+	char dname[256];
+	pos = devfs_generate_path(xconn_vhdl, dname, 256);
+	printk("%s : path= %s \n", __FUNCTION__, &dname[pos]);
+    }
+#endif
+
+    /* Setup the PRB for the bridge in CONVEYOR BELT
+     * mode. PRBs are setup in default FIRE-AND-FORGET
+     * mode during the initialization.
+     */
+    hub_device_flags_set(xconn_vhdl, HUB_PIO_CONVEYOR);
+
+    bridge = (bridge_t *)
+	xtalk_piotrans_addr(xconn_vhdl, NULL,
+			    0, sizeof(bridge_t), 0);
+
+#ifndef MEDUSA_HACK
+    if ((bridge->b_wid_stat & BRIDGE_STAT_PCI_GIO_N) == 0)
+	return -1;			/* someone else handles GIO bridges. */
+#endif
+
+#ifdef BRINGUP
+    if (XWIDGET_PART_REV_NUM(bridge->b_wid_id) == XBRIDGE_PART_REV_A)
+	NeedXbridgeSwap = 1;
+#endif
+
+	printk("pcibr_attach: Called with vertex 0x%p, b_wid_stat 0x%x, gio 0x%x\n",xconn_vhdl, bridge->b_wid_stat, BRIDGE_STAT_PCI_GIO_N);
+
+    /*
+     * Create the vertex for the PCI bus, which we
+     * will also use to hold the pcibr_soft and
+     * which will be the "master" vertex for all the
+     * pciio connection points we will hang off it.
+     * This needs to happen before we call nic_bridge_vertex_info
+     * as we are some of the *_vmc functions need access to the edges.
+     *
+     * Opening this vertex will provide access to
+     * the Bridge registers themselves.
+     */
+    rc = hwgraph_path_add(xconn_vhdl, EDGE_LBL_PCI, &pcibr_vhdl);
+    ASSERT(rc == GRAPH_SUCCESS);
+
+    rc = hwgraph_char_device_add(pcibr_vhdl, EDGE_LBL_CONTROLLER, "pcibr_", &ctlr_vhdl);
+    ASSERT(rc == GRAPH_SUCCESS);
+
+    /*
+     * decode the nic, and hang its stuff off our
+     * connection point where other drivers can get
+     * at it.
+     */
+#ifdef LATER
+    nicinfo = BRIDGE_VERTEX_MFG_INFO(xconn_vhdl, (nic_data_t) & bridge->b_nic);
+#endif
+
+    /*
+     * Get the hint structure; if some NIC callback
+     * marked this vertex as "hands-off" then we
+     * just return here, before doing anything else.
+     */
+    pcibr_hints = pcibr_hints_get(xconn_vhdl, 0);
+
+    if (pcibr_hints && pcibr_hints->ph_hands_off)
+	return -1;			/* generic operations disabled */
+
+    id = bridge->b_wid_id;
+    rev = XWIDGET_PART_REV_NUM(id);
+
+    hwgraph_info_add_LBL(pcibr_vhdl, INFO_LBL_PCIBR_ASIC_REV, (arbitrary_info_t) rev);
+
+    /*
+     * allocate soft state structure, fill in some
+     * fields, and hook it up to our vertex.
+     */
+    NEW(pcibr_soft);
+    BZERO(pcibr_soft, sizeof *pcibr_soft);
+    pcibr_soft_set(pcibr_vhdl, pcibr_soft);
+
+    pcibr_soft->bs_conn = xconn_vhdl;
+    pcibr_soft->bs_vhdl = pcibr_vhdl;
+    pcibr_soft->bs_base = bridge;
+    pcibr_soft->bs_rev_num = rev;
+    pcibr_soft->bs_intr_bits = pcibr_intr_bits;
+    if (is_xbridge(bridge)) {
+	pcibr_soft->bs_int_ate_size = XBRIDGE_INTERNAL_ATES;
+	pcibr_soft->bs_xbridge = 1;
+    } else {
+	pcibr_soft->bs_int_ate_size = BRIDGE_INTERNAL_ATES;
+	pcibr_soft->bs_xbridge = 0;
+    }
+
+    pcibr_soft->bsi_err_intr = 0;
+
+    /* Bridges up through REV C
+     * are unable to set the direct
+     * byteswappers to BYTE_STREAM.
+     */
+    if (pcibr_soft->bs_rev_num <= BRIDGE_PART_REV_C) {
+	pcibr_soft->bs_pio_end_io = PCIIO_WORD_VALUES;
+	pcibr_soft->bs_pio_end_mem = PCIIO_WORD_VALUES;
+    }
+#if PCIBR_SOFT_LIST
+    {
+	pcibr_list_p            self;
+
+	NEW(self);
+	self->bl_soft = pcibr_soft;
+	self->bl_vhdl = pcibr_vhdl;
+	self->bl_next = pcibr_list;
+	self->bl_next = swap_ptr((void **) &pcibr_list, (void *)self);
+    }
+#endif
+
+    /*
+     * get the name of this bridge vertex and keep the info. Use this
+     * only where it is really needed now: like error interrupts.
+     */
+    s = dev_to_name(pcibr_vhdl, devnm, MAXDEVNAME);
+    pcibr_soft->bs_name = kmalloc(strlen(s) + 1, GFP_KERNEL);
+    strcpy(pcibr_soft->bs_name, s);
+
+#if SHOW_REVS || DEBUG
+#if !DEBUG
+    if (kdebug)
+#endif
+	printk("%sBridge ASIC: rev %s (code=0x%x) at %s\n",
+		is_xbridge(bridge) ? "X" : "",
+		(rev == BRIDGE_PART_REV_A) ? "A" :
+		(rev == BRIDGE_PART_REV_B) ? "B" :
+		(rev == BRIDGE_PART_REV_C) ? "C" :
+		(rev == BRIDGE_PART_REV_D) ? "D" :
+		(rev == XBRIDGE_PART_REV_A) ? "A" :
+		(rev == XBRIDGE_PART_REV_B) ? "B" :
+		"unknown",
+		rev, pcibr_soft->bs_name);
+#endif
+
+    info = xwidget_info_get(xconn_vhdl);
+    pcibr_soft->bs_xid = xwidget_info_id_get(info);
+    pcibr_soft->bs_master = xwidget_info_master_get(info);
+    pcibr_soft->bs_mxid = xwidget_info_masterid_get(info);
+
+    /*
+     * Init bridge lock.
+     */
+    spinlock_init(&pcibr_soft->bs_lock, "pcibr_loc");
+
+    /*
+     * If we have one, process the hints structure.
+     */
+    if (pcibr_hints) {
+	rrb_fixed = pcibr_hints->ph_rrb_fixed;
+
+	pcibr_soft->bs_rrb_fixed = rrb_fixed;
+
+	if (pcibr_hints->ph_intr_bits)
+	    pcibr_soft->bs_intr_bits = pcibr_hints->ph_intr_bits;
+
+	for (slot = 0; slot < 8; ++slot) {
+	    int                     hslot = pcibr_hints->ph_host_slot[slot] - 1;
+
+	    if (hslot < 0) {
+		pcibr_soft->bs_slot[slot].host_slot = slot;
+	    } else {
+		pcibr_soft->bs_slot[slot].has_host = 1;
+		pcibr_soft->bs_slot[slot].host_slot = hslot;
+	    }
+	}
+    }
+    /*
+     * set up initial values for state fields
+     */
+    for (slot = 0; slot < 8; ++slot) {
+	pcibr_soft->bs_slot[slot].bss_devio.bssd_space = PCIIO_SPACE_NONE;
+	pcibr_soft->bs_slot[slot].bss_d64_base = PCIBR_D64_BASE_UNSET;
+	pcibr_soft->bs_slot[slot].bss_d32_base = PCIBR_D32_BASE_UNSET;
+	pcibr_soft->bs_slot[slot].bss_ext_ates_active = 0;
+    }
+
+    for (ibit = 0; ibit < 8; ++ibit) {
+	pcibr_soft->bs_intr[ibit].bsi_xtalk_intr = 0;
+	pcibr_soft->bs_intr[ibit].bsi_pcibr_intr_list = 0;
+    }
+
+    /*
+     * connect up our error handler
+     */
+    xwidget_error_register(xconn_vhdl, pcibr_error_handler, pcibr_soft);
+
+    /*
+     * Initialize various Bridge registers.
+     */
+
+    /*
+     * On pre-Rev.D bridges, set the PCI_RETRY_CNT
+     * to zero to avoid dropping stores. (#475347)
+     */
+    if (rev < BRIDGE_PART_REV_D)
+	bridge->b_bus_timeout &= ~BRIDGE_BUS_PCI_RETRY_MASK;
+
+    /*
+     * Clear all pending interrupts.
+     */
+    bridge->b_int_rst_stat = (BRIDGE_IRR_ALL_CLR);
+
+    /*
+     * Until otherwise set up,
+     * assume all interrupts are
+     * from slot 7.
+     */
+    bridge->b_int_device = (uint32_t) 0xffffffff;
+
+    {
+	bridgereg_t             dirmap;
+	paddr_t                 paddr;
+	iopaddr_t               xbase;
+	xwidgetnum_t            xport;
+	iopaddr_t               offset;
+	int                     num_entries;
+	int                     entry;
+	cnodeid_t		cnodeid;
+	nasid_t			nasid;
+	char		       *node_val;
+	devfs_handle_t		node_vhdl;
+	char			vname[MAXDEVNAME];
+
+	/* Set the Bridge's 32-bit PCI to XTalk
+	 * Direct Map register to the most useful
+	 * value we can determine.  Note that we
+	 * must use a single xid for all of:
+	 *      direct-mapped 32-bit DMA accesses
+	 *      direct-mapped 64-bit DMA accesses
+	 *      DMA accesses through the PMU
+	 *      interrupts
+	 * This is the only way to guarantee that
+	 * completion interrupts will reach a CPU
+	 * after all DMA data has reached memory.
+	 * (Of course, there may be a few special
+	 * drivers/controlers that explicitly manage
+	 * this ordering problem.)
+	 */
+
+	cnodeid = 0;  /* default node id */
+	/*
+	 * Determine the base address node id to be used for all 32-bit
+	 * Direct Mapping I/O. The default is node 0, but this can be changed
+	 * via a DEVICE_ADMIN directive and the PCIBUS_DMATRANS_NODE
+	 * attribute in the irix.sm config file. A device driver can obtain
+	 * this node value via a call to pcibr_get_dmatrans_node().
+	 */
+	node_val = device_admin_info_get(pcibr_vhdl, ADMIN_LBL_DMATRANS_NODE);
+	if (node_val != NULL) {
+	    node_vhdl = hwgraph_path_to_vertex(node_val);
+	    if (node_vhdl != GRAPH_VERTEX_NONE) {
+		cnodeid = nodevertex_to_cnodeid(node_vhdl);
+	    }
+	    if ((node_vhdl == GRAPH_VERTEX_NONE) || (cnodeid == CNODEID_NONE)) {
+		cnodeid = 0;
+		vertex_to_name(pcibr_vhdl, vname, sizeof(vname));
+		PRINT_WARNING( "Invalid hwgraph node path specified:\n     DEVICE_ADMIN: %s %s=%s\n",
+			vname, ADMIN_LBL_DMATRANS_NODE, node_val);
+	    }
+	}
+	nasid = COMPACT_TO_NASID_NODEID(cnodeid);
+	paddr = NODE_OFFSET(nasid) + 0;
+
+	/* currently, we just assume that if we ask
+	 * for a DMA mapping to "zero" the XIO
+	 * host will transmute this into a request
+	 * for the lowest hunk of memory.
+	 */
+	xbase = xtalk_dmatrans_addr(xconn_vhdl, 0,
+				    paddr, _PAGESZ, 0);
+
+	if (xbase != XIO_NOWHERE) {
+	    if (XIO_PACKED(xbase)) {
+		xport = XIO_PORT(xbase);
+		xbase = XIO_ADDR(xbase);
+	    } else
+		xport = pcibr_soft->bs_mxid;
+
+	    offset = xbase & ((1ull << BRIDGE_DIRMAP_OFF_ADDRSHFT) - 1ull);
+	    xbase >>= BRIDGE_DIRMAP_OFF_ADDRSHFT;
+
+	    dirmap = xport << BRIDGE_DIRMAP_W_ID_SHFT;
+
+#ifdef IRIX
+	    dirmap |= BRIDGE_DIRMAP_RMF_64;
+#endif
+
+	    if (xbase)
+		dirmap |= BRIDGE_DIRMAP_OFF & xbase;
+	    else if (offset >= (512 << 20))
+		dirmap |= BRIDGE_DIRMAP_ADD512;
+
+	    bridge->b_dir_map = dirmap;
+	}
+	/*
+	 * Set bridge's idea of page size according to the system's
+	 * idea of "IO page size".  TBD: The idea of IO page size
+	 * should really go away.
+	 */
+	/*
+	 * ensure that we write and read without any interruption.
+	 * The read following the write is required for the Bridge war
+	 */
+	spl_level = splhi();
+#if IOPGSIZE == 4096
+	bridge->b_wid_control &= ~BRIDGE_CTRL_PAGE_SIZE;
+#elif IOPGSIZE == 16384
+	bridge->b_wid_control |= BRIDGE_CTRL_PAGE_SIZE;
+#else
+	<<<Unable to deal with IOPGSIZE >>>;
+#endif
+	bridge->b_wid_control;		/* inval addr bug war */
+	splx(spl_level);
+
+	/* Initialize internal mapping entries */
+	for (entry = 0; entry < pcibr_soft->bs_int_ate_size; entry++)
+	    bridge->b_int_ate_ram[entry].wr = 0;
+
+	/*
+	 * Determine if there's external mapping SSRAM on this
+	 * bridge.  Set up Bridge control register appropriately,
+	 * inititlize SSRAM, and set software up to manage RAM
+	 * entries as an allocatable resource.
+	 *
+	 * Currently, we just use the rm* routines to manage ATE
+	 * allocation.  We should probably replace this with a
+	 * Best Fit allocator.
+	 *
+	 * For now, if we have external SSRAM, avoid using
+	 * the internal ssram: we can't turn PREFETCH on
+	 * when we use the internal SSRAM; and besides,
+	 * this also guarantees that no allocation will
+	 * straddle the internal/external line, so we
+	 * can increment ATE write addresses rather than
+	 * recomparing against BRIDGE_INTERNAL_ATES every
+	 * time.
+	 */
+#ifdef BRINGUP
+	/*
+	 * 082799: for some reason pcibr_init_ext_ate_ram is causing
+	 * a Data Bus Error.  It should be zero anyway so just force it.
+	 */
+	num_entries = 0;
+#else
+	num_entries = pcibr_init_ext_ate_ram(bridge);
+#endif
+
+	/* we always have 128 ATEs (512 for Xbridge) inside the chip
+	 * even if disabled for debugging.
+	 */
+	pcibr_soft->bs_int_ate_map = rmallocmap(pcibr_soft->bs_int_ate_size);
+	pcibr_ate_free(pcibr_soft, 0, pcibr_soft->bs_int_ate_size);
+#if PCIBR_ATE_DEBUG
+	printk("pcibr_attach: %d INTERNAL ATEs\n", pcibr_soft->bs_int_ate_size);
+#endif
+
+	if (num_entries > pcibr_soft->bs_int_ate_size) {
+#if PCIBR_ATE_NOTBOTH			/* for debug -- forces us to use external ates */
+	    printk("pcibr_attach: disabling internal ATEs.\n");
+	    pcibr_ate_alloc(pcibr_soft, pcibr_soft->bs_int_ate_size);
+#endif
+	    pcibr_soft->bs_ext_ate_map = rmallocmap(num_entries);
+	    pcibr_ate_free(pcibr_soft, pcibr_soft->bs_int_ate_size,
+			   num_entries - pcibr_soft->bs_int_ate_size);
+#if PCIBR_ATE_DEBUG
+	    printk("pcibr_attach: %d EXTERNAL ATEs\n",
+		    num_entries - pcibr_soft->bs_int_ate_size);
+#endif
+	}
+    }
+
+    {
+	bridgereg_t             dirmap;
+	iopaddr_t               xbase;
+
+	/*
+	 * now figure the *real* xtalk base address
+	 * that dirmap sends us to.
+	 */
+	dirmap = bridge->b_dir_map;
+	if (dirmap & BRIDGE_DIRMAP_OFF)
+	    xbase = (iopaddr_t)(dirmap & BRIDGE_DIRMAP_OFF)
+			<< BRIDGE_DIRMAP_OFF_ADDRSHFT;
+	else if (dirmap & BRIDGE_DIRMAP_ADD512)
+	    xbase = 512 << 20;
+	else
+	    xbase = 0;
+
+	pcibr_soft->bs_dir_xbase = xbase;
+
+	/* it is entirely possible that we may, at this
+	 * point, have our dirmap pointing somewhere
+	 * other than our "master" port.
+	 */
+	pcibr_soft->bs_dir_xport =
+	    (dirmap & BRIDGE_DIRMAP_W_ID) >> BRIDGE_DIRMAP_W_ID_SHFT;
+    }
+
+    /* pcibr sources an error interrupt;
+     * figure out where to send it.
+     *
+     * If any interrupts are enabled in bridge,
+     * then the prom set us up and our interrupt
+     * has already been reconnected in mlreset
+     * above.
+     *
+     * Need to set the D_INTR_ISERR flag
+     * in the dev_desc used for alocating the
+     * error interrupt, so our interrupt will
+     * be properly routed and prioritized.
+     *
+     * If our crosstalk provider wants to
+     * fix widget error interrupts to specific
+     * destinations, D_INTR_ISERR is how it
+     * knows to do this.
+     */
+
+    dev_desc = device_desc_dup(pcibr_vhdl);
+    device_desc_flags_set(dev_desc,
+			  device_desc_flags_get(dev_desc) | D_INTR_ISERR);
+    device_desc_intr_name_set(dev_desc, "Bridge error");
+
+    xtalk_intr = xtalk_intr_alloc(xconn_vhdl, dev_desc, pcibr_vhdl);
+    ASSERT(xtalk_intr != NULL);
+
+    device_desc_free(dev_desc);
+
+    pcibr_soft->bsi_err_intr = xtalk_intr;
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+    /*
+     * On IP35 with XBridge, we do some extra checks in pcibr_setwidint
+     * in order to work around some addressing limitations.  In order
+     * for that fire wall to work properly, we need to make sure we
+     * start from a known clean state.
+     */
+    pcibr_clearwidint(bridge);
+#endif
+
+    printk("pribr_attach:  FIXME Error Interrupt not registered\n");
+
+    xtalk_intr_connect(xtalk_intr,
+		       (intr_func_t) pcibr_error_intr_handler,
+		       (intr_arg_t) pcibr_soft,
+		       (xtalk_intr_setfunc_t) pcibr_setwidint,
+		       (void *) bridge,
+		       (void *) 0);
+
+    /*
+     * now we can start handling error interrupts;
+     * enable all of them.
+     * NOTE: some PCI ints may already be enabled.
+     */
+    b_int_enable = bridge->b_int_enable | BRIDGE_ISR_ERRORS;
+
+
+    bridge->b_int_enable = b_int_enable;
+    bridge->b_int_mode = 0;		/* do not send "clear interrupt" packets */
+
+    bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+
+    /*
+     * Depending on the rev of bridge, disable certain features.
+     * Easiest way seems to be to force the PCIBR_NOwhatever
+     * flag to be on for all DMA calls, which overrides any
+     * PCIBR_whatever flag or even the setting of whatever
+     * from the PCIIO_DMA_class flags (or even from the other
+     * PCIBR flags, since NO overrides YES).
+     */
+    pcibr_soft->bs_dma_flags = 0;
+
+    /* PREFETCH:
+     * Always completely disabled for REV.A;
+     * at "pcibr_prefetch_enable_rev", anyone
+     * asking for PCIIO_PREFETCH gets it.
+     * Between these two points, you have to ask
+     * for PCIBR_PREFETCH, which promises that
+     * your driver knows about known Bridge WARs.
+     */
+    if (pcibr_soft->bs_rev_num < BRIDGE_PART_REV_B)
+	pcibr_soft->bs_dma_flags |= PCIBR_NOPREFETCH;
+    else if (pcibr_soft->bs_rev_num < 
+		(BRIDGE_WIDGET_PART_NUM << 4 | pcibr_prefetch_enable_rev))
+	pcibr_soft->bs_dma_flags |= PCIIO_NOPREFETCH;
+
+    /* WRITE_GATHER:
+     * Disabled up to but not including the
+     * rev number in pcibr_wg_enable_rev. There
+     * is no "WAR range" as with prefetch.
+     */
+    if (pcibr_soft->bs_rev_num < 
+		(BRIDGE_WIDGET_PART_NUM << 4 | pcibr_wg_enable_rev))
+	pcibr_soft->bs_dma_flags |= PCIBR_NOWRITE_GATHER;
+
+    pciio_provider_register(pcibr_vhdl, &pcibr_provider);
+    pciio_provider_startup(pcibr_vhdl);
+
+    pci_io_fb = 0x00000004;		/* I/O FreeBlock Base */
+    pci_io_fl = 0xFFFFFFFF;		/* I/O FreeBlock Last */
+
+    pci_lo_fb = 0x00000010;		/* Low Memory FreeBlock Base */
+    pci_lo_fl = 0x001FFFFF;		/* Low Memory FreeBlock Last */
+
+    pci_hi_fb = 0x00200000;		/* High Memory FreeBlock Base */
+    pci_hi_fl = 0x3FFFFFFF;		/* High Memory FreeBlock Last */
+
+
+    PCI_ADDR_SPACE_LIMITS_STORE();
+
+    /* build "no-slot" connection point
+     */
+    pcibr_info = pcibr_device_info_new
+	(pcibr_soft, PCIIO_SLOT_NONE, PCIIO_FUNC_NONE,
+	 PCIIO_VENDOR_ID_NONE, PCIIO_DEVICE_ID_NONE);
+    noslot_conn = pciio_device_info_register
+	(pcibr_vhdl, &pcibr_info->f_c);
+
+    /* Remember the no slot connection point info for tearing it
+     * down during detach.
+     */
+    pcibr_soft->bs_noslot_conn = noslot_conn;
+    pcibr_soft->bs_noslot_info = pcibr_info;
+#if PCI_FBBE
+    fast_back_to_back_enable = 1;
+#endif
+
+#if PCI_FBBE
+    if (fast_back_to_back_enable) {
+	/*
+	 * All devices on the bus are capable of fast back to back, so
+	 * we need to set the fast back to back bit in all devices on
+	 * the bus that are capable of doing such accesses.
+	 */
+    }
+#endif
+
+#ifdef IRIX
+    /* If the bridge has been reset then there is no need to reset
+     * the individual PCI slots.
+     */
+    for (slot = 0; slot < 8; ++slot)  
+	/* Reset all the slots */
+	(void)pcibr_slot_reset(pcibr_vhdl,slot);
+#endif
+
+    for (slot = 0; slot < 8; ++slot)
+	/* Find out what is out there */
+	(void)pcibr_slot_info_init(pcibr_vhdl,slot);
+
+    for (slot = 0; slot < 8; ++slot)  
+	/* Set up the address space for this slot in the pci land */
+	(void)pcibr_slot_addr_space_init(pcibr_vhdl,slot);
+
+    for (slot = 0; slot < 8; ++slot)  
+	/* Setup the device register */
+	(void)pcibr_slot_device_init(pcibr_vhdl, slot);
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+    for (slot = 0; slot < 8; ++slot)  
+	/* Set up convenience links */
+	if (is_xbridge(bridge))
+		if (pcibr_soft->bs_slot[slot].bss_ninfo > 0) /* if occupied */
+			pcibr_bus_cnvlink(pcibr_info->f_vertex, slot);
+#endif
+
+    for (slot = 0; slot < 8; ++slot)  
+	/* Setup host/guest relations */
+	(void)pcibr_slot_guest_info_init(pcibr_vhdl,slot);
+
+    for (slot = 0; slot < 8; ++slot)  
+	/* Initial RRB management */
+	(void)pcibr_slot_initial_rrb_alloc(pcibr_vhdl,slot);
+
+#ifdef dagum
+    /* driver attach routines should be called out from generic linux code */
+    for (slot = 0; slot < 8; ++slot)  
+	/* Call the device attach */
+	(void)pcibr_slot_call_device_attach(pcibr_vhdl,slot);
+#endif /* dagum */
+
+#ifdef LATER
+    if (strstr(nicinfo, XTALK_PCI_PART_NUM)) {
+	do_pcibr_rrb_autoalloc(pcibr_soft, 1, 8);
+#if PCIBR_RRB_DEBUG
+	printf("\n\nFound XTALK_PCI (030-1275) at %v\n", xconn_vhdl);
+
+	printf("pcibr_attach: %v Shoebox RRB MANAGEMENT: %d+%d free\n",
+		pcibr_vhdl,
+		pcibr_soft->bs_rrb_avail[0],
+		pcibr_soft->bs_rrb_avail[1]);
+
+	for (slot = 0; slot < 8; ++slot)
+	    printf("\t%d+%d+%d",
+	    0xFFF & pcibr_soft->bs_rrb_valid[slot],
+	    0xFFF & pcibr_soft->bs_rrb_valid[slot + PCIBR_RRB_SLOT_VIRTUAL],
+	    pcibr_soft->bs_rrb_res[slot]);
+
+	printf("\n");
+#endif
+    }
+#else
+	printk("pcibr_attach: FIXME to call do_pcibr_rrb_autoalloc nicinfo 0x%p\n", nicinfo);
+#endif
+
+    if (aa)
+	    async_attach_add_info(noslot_conn, aa);
+
+    pciio_device_attach(noslot_conn);
+
+
+    /* 
+     * Tear down pointer to async attach info -- async threads for
+     * bridge's descendants may be running but the bridge's work is done.
+     */
+    if (aa)
+	    async_attach_del_info(xconn_vhdl);
+
+    return 0;
+}
+/*
+ * pcibr_detach:
+ *	Detach the bridge device from the hwgraph after cleaning out all the 
+ *	underlying vertices.
+ */
+int
+pcibr_detach(devfs_handle_t xconn)
+{
+    pciio_slot_t	slot;
+    devfs_handle_t	pcibr_vhdl;
+    pcibr_soft_t	pcibr_soft;
+    bridge_t		*bridge;
+
+    /* Get the bridge vertex from its xtalk connection point */
+    if (hwgraph_traverse(xconn, EDGE_LBL_PCI, &pcibr_vhdl) != GRAPH_SUCCESS)
+	return(1);
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    bridge = pcibr_soft->bs_base;
+
+    /* Disable the interrupts from the bridge */
+    bridge->b_int_enable = 0;
+
+    /* Detach all the PCI devices talking to this bridge */
+    for(slot = 0; slot < 8; slot++) {
+#ifdef DEBUG
+	printk("pcibr_device_detach called for %p/%d\n",
+		pcibr_vhdl,slot);
+#endif
+	pcibr_device_detach(pcibr_vhdl, slot);
+    }
+
+    /* Unregister the no-slot connection point */
+    pciio_device_info_unregister(pcibr_vhdl,
+				 &(pcibr_soft->bs_noslot_info->f_c));
+
+    spinlock_destroy(&pcibr_soft->bs_lock);
+    kfree(pcibr_soft->bs_name);
+    
+    /* Error handler gets unregistered when the widget info is 
+     * cleaned 
+     */
+    /* Free the soft ATE maps */
+    if (pcibr_soft->bs_int_ate_map)
+	rmfreemap(pcibr_soft->bs_int_ate_map);
+    if (pcibr_soft->bs_ext_ate_map)
+	rmfreemap(pcibr_soft->bs_ext_ate_map);
+
+    /* Disconnect the error interrupt and free the xtalk resources 
+     * associated with it.
+     */
+    xtalk_intr_disconnect(pcibr_soft->bsi_err_intr);
+    xtalk_intr_free(pcibr_soft->bsi_err_intr);
+
+    /* Clear the software state maintained by the bridge driver for this
+     * bridge.
+     */
+    DEL(pcibr_soft);
+    /* Remove the Bridge revision labelled info */
+    (void)hwgraph_info_remove_LBL(pcibr_vhdl, INFO_LBL_PCIBR_ASIC_REV, NULL);
+    /* Remove the character device associated with this bridge */
+    (void)hwgraph_edge_remove(pcibr_vhdl, EDGE_LBL_CONTROLLER, NULL);
+    /* Remove the PCI bridge vertex */
+    (void)hwgraph_edge_remove(xconn, EDGE_LBL_PCI, NULL);
+
+    return(0);
+}
+
+int
+pcibr_asic_rev(devfs_handle_t pconn_vhdl)
+{
+    devfs_handle_t            pcibr_vhdl;
+    arbitrary_info_t        ainfo;
+
+    if (GRAPH_SUCCESS !=
+	hwgraph_traverse(pconn_vhdl, EDGE_LBL_MASTER, &pcibr_vhdl))
+	return -1;
+
+    if (GRAPH_SUCCESS !=
+	hwgraph_info_get_LBL(pcibr_vhdl, INFO_LBL_PCIBR_ASIC_REV, &ainfo))
+	return -1;
+
+    return (int) ainfo;
+}
+
+int
+pcibr_write_gather_flush(devfs_handle_t pconn_vhdl)
+{
+    pciio_info_t  pciio_info = pciio_info_get(pconn_vhdl);
+    pcibr_soft_t  pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    pciio_slot_t  slot;
+    slot = pciio_info_slot_get(pciio_info);
+    pcibr_device_write_gather_flush(pcibr_soft, slot);
+    return 0;
+}
+
+/* =====================================================================
+ *    PIO MANAGEMENT
+ */
+
+LOCAL iopaddr_t
+pcibr_addr_pci_to_xio(devfs_handle_t pconn_vhdl,
+		      pciio_slot_t slot,
+		      pciio_space_t space,
+		      iopaddr_t pci_addr,
+		      size_t req_size,
+		      unsigned flags)
+{
+    pcibr_info_t            pcibr_info = pcibr_info_get(pconn_vhdl);
+    pciio_info_t            pciio_info = &pcibr_info->f_c;
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    bridge_t               *bridge = pcibr_soft->bs_base;
+
+    unsigned                bar;	/* which BASE reg on device is decoding */
+    iopaddr_t               xio_addr = XIO_NOWHERE;
+
+    pciio_space_t           wspace;	/* which space device is decoding */
+    iopaddr_t               wbase;	/* base of device decode on PCI */
+    size_t                  wsize;	/* size of device decode on PCI */
+
+    int                     try;	/* DevIO(x) window scanning order control */
+    int                     win;	/* which DevIO(x) window is being used */
+    pciio_space_t           mspace;	/* target space for devio(x) register */
+    iopaddr_t               mbase;	/* base of devio(x) mapped area on PCI */
+    size_t                  msize;	/* size of devio(x) mapped area on PCI */
+    size_t                  mmask;	/* addr bits stored in Device(x) */
+
+    unsigned                s;
+
+    s = pcibr_lock(pcibr_soft);
+
+    if (pcibr_soft->bs_slot[slot].has_host) {
+	slot = pcibr_soft->bs_slot[slot].host_slot;
+	pcibr_info = pcibr_soft->bs_slot[slot].bss_infos[0];
+    }
+    if (space == PCIIO_SPACE_NONE)
+	goto done;
+
+    if (space == PCIIO_SPACE_CFG) {
+	/*
+	 * Usually, the first mapping
+	 * established to a PCI device
+	 * is to its config space.
+	 *
+	 * In any case, we definitely
+	 * do NOT need to worry about
+	 * PCI BASE registers, and
+	 * MUST NOT attempt to point
+	 * the DevIO(x) window at
+	 * this access ...
+	 */
+	if (((flags & PCIIO_BYTE_STREAM) == 0) &&
+	    ((pci_addr + req_size) <= BRIDGE_TYPE0_CFG_FUNC_OFF))
+	    xio_addr = pci_addr + BRIDGE_TYPE0_CFG_DEV(slot);
+
+	goto done;
+    }
+    if (space == PCIIO_SPACE_ROM) {
+	/* PIO to the Expansion Rom.
+	 * Driver is responsible for
+	 * enabling and disabling
+	 * decodes properly.
+	 */
+	wbase = pcibr_info->f_rbase;
+	wsize = pcibr_info->f_rsize;
+
+	/*
+	 * While the driver should know better
+	 * than to attempt to map more space
+	 * than the device is decoding, he might
+	 * do it; better to bail out here.
+	 */
+	if ((pci_addr + req_size) > wsize)
+	    goto done;
+
+	pci_addr += wbase;
+	space = PCIIO_SPACE_MEM;
+    }
+    /*
+     * reduce window mappings to raw
+     * space mappings (maybe allocating
+     * windows), and try for DevIO(x)
+     * usage (setting it if it is available).
+     */
+    bar = space - PCIIO_SPACE_WIN0;
+    if (bar < 6) {
+	wspace = pcibr_info->f_window[bar].w_space;
+	if (wspace == PCIIO_SPACE_NONE)
+	    goto done;
+
+	/* get pci base and size */
+	wbase = pcibr_info->f_window[bar].w_base;
+	wsize = pcibr_info->f_window[bar].w_size;
+
+	/*
+	 * While the driver should know better
+	 * than to attempt to map more space
+	 * than the device is decoding, he might
+	 * do it; better to bail out here.
+	 */
+	if ((pci_addr + req_size) > wsize)
+	    goto done;
+
+	/* shift from window relative to
+	 * decoded space relative.
+	 */
+	pci_addr += wbase;
+	space = wspace;
+    } else
+	bar = -1;
+
+    /* Scan all the DevIO(x) windows twice looking for one
+     * that can satisfy our request. The first time through,
+     * only look at assigned windows; the second time, also
+     * look at PCIIO_SPACE_NONE windows. Arrange the order
+     * so we always look at our own window first.
+     *
+     * We will not attempt to satisfy a single request
+     * by concatinating multiple windows.
+     */
+    for (try = 0; try < 16; ++try) {
+	bridgereg_t             devreg;
+	unsigned                offset;
+
+	win = (try + slot) % 8;
+
+	/* If this DevIO(x) mapping area can provide
+	 * a mapping to this address, use it.
+	 */
+	msize = (win < 2) ? 0x200000 : 0x100000;
+	mmask = -msize;
+	if (space != PCIIO_SPACE_IO)
+	    mmask &= 0x3FFFFFFF;
+
+	offset = pci_addr & (msize - 1);
+
+	/* If this window can't possibly handle that request,
+	 * go on to the next window.
+	 */
+	if (((pci_addr & (msize - 1)) + req_size) > msize)
+	    continue;
+
+	devreg = pcibr_soft->bs_slot[win].bss_device;
+
+	/* Is this window "nailed down"?
+	 * If not, maybe we can use it.
+	 * (only check this the second time through)
+	 */
+	mspace = pcibr_soft->bs_slot[win].bss_devio.bssd_space;
+	if ((try > 7) && (mspace == PCIIO_SPACE_NONE)) {
+
+	    /* If this is the primary DevIO(x) window
+	     * for some other device, skip it.
+	     */
+	    if ((win != slot) &&
+		(PCIIO_VENDOR_ID_NONE !=
+		 pcibr_soft->bs_slot[win].bss_vendor_id))
+		continue;
+
+	    /* It's a free window, and we fit in it.
+	     * Set up Device(win) to our taste.
+	     */
+	    mbase = pci_addr & mmask;
+
+	    /* check that we would really get from
+	     * here to there.
+	     */
+	    if ((mbase | offset) != pci_addr)
+		continue;
+
+	    devreg &= ~BRIDGE_DEV_OFF_MASK;
+	    if (space != PCIIO_SPACE_IO)
+		devreg |= BRIDGE_DEV_DEV_IO_MEM;
+	    else
+		devreg &= ~BRIDGE_DEV_DEV_IO_MEM;
+	    devreg |= (mbase >> 20) & BRIDGE_DEV_OFF_MASK;
+
+	    /* default is WORD_VALUES.
+	     * if you specify both,
+	     * operation is undefined.
+	     */
+	    if (flags & PCIIO_BYTE_STREAM)
+		devreg |= BRIDGE_DEV_DEV_SWAP;
+	    else
+		devreg &= ~BRIDGE_DEV_DEV_SWAP;
+
+	    if (pcibr_soft->bs_slot[win].bss_device != devreg) {
+		bridge->b_device[win].reg = devreg;
+		pcibr_soft->bs_slot[win].bss_device = devreg;
+		bridge->b_wid_tflush;	/* wait until Bridge PIO complete */
+
+#if DEBUG && PCI_DEBUG
+		printk("pcibr Device(%d): 0x%lx\n", win, bridge->b_device[win].reg);
+#endif
+	    }
+	    pcibr_soft->bs_slot[win].bss_devio.bssd_space = space;
+	    pcibr_soft->bs_slot[win].bss_devio.bssd_base = mbase;
+	    xio_addr = BRIDGE_DEVIO(win) + (pci_addr - mbase);
+
+#if DEBUG && PCI_DEBUG
+	    printk("%s LINE %d map to space %d space desc 0x%x[%lx..%lx] for slot %d allocates DevIO(%d) devreg 0x%x\n", 
+		    __FUNCTION__, __LINE__, space, space_desc,
+		    pci_addr, pci_addr + req_size - 1,
+		    slot, win, devreg);
+#endif
+
+	    goto done;
+	}				/* endif DevIO(x) not pointed */
+	mbase = pcibr_soft->bs_slot[win].bss_devio.bssd_base;
+
+	/* Now check for request incompat with DevIO(x)
+	 */
+	if ((mspace != space) ||
+	    (pci_addr < mbase) ||
+	    ((pci_addr + req_size) > (mbase + msize)) ||
+	    ((flags & PCIIO_BYTE_STREAM) && !(devreg & BRIDGE_DEV_DEV_SWAP)) ||
+	    (!(flags & PCIIO_BYTE_STREAM) && (devreg & BRIDGE_DEV_DEV_SWAP)))
+	    continue;
+
+	/* DevIO(x) window is pointed at PCI space
+	 * that includes our target. Calculate the
+	 * final XIO address, release the lock and
+	 * return.
+	 */
+	xio_addr = BRIDGE_DEVIO(win) + (pci_addr - mbase);
+
+#if DEBUG && PCI_DEBUG
+	printk("%s LINE %d map to space %d [0x%p..0x%p] for slot %d uses DevIO(%d)\n",
+		__FUNCTION__, __LINE__, space,  pci_addr, pci_addr + req_size - 1, slot, win);
+#endif
+	goto done;
+    }
+
+    switch (space) {
+	/*
+	 * Accesses to device decode
+	 * areas that do a not fit
+	 * within the DevIO(x) space are
+	 * modified to be accesses via
+	 * the direct mapping areas.
+	 *
+	 * If necessary, drivers can
+	 * explicitly ask for mappings
+	 * into these address spaces,
+	 * but this should never be needed.
+	 */
+    case PCIIO_SPACE_MEM:		/* "mem space" */
+    case PCIIO_SPACE_MEM32:		/* "mem, use 32-bit-wide bus" */
+	if ((pci_addr + BRIDGE_PCI_MEM32_BASE + req_size - 1) <=
+	    BRIDGE_PCI_MEM32_LIMIT)
+	    xio_addr = pci_addr + BRIDGE_PCI_MEM32_BASE;
+	break;
+
+    case PCIIO_SPACE_MEM64:		/* "mem, use 64-bit-wide bus" */
+	if ((pci_addr + BRIDGE_PCI_MEM64_BASE + req_size - 1) <=
+	    BRIDGE_PCI_MEM64_LIMIT)
+	    xio_addr = pci_addr + BRIDGE_PCI_MEM64_BASE;
+	break;
+
+    case PCIIO_SPACE_IO:		/* "i/o space" */
+	/* Bridge Hardware Bug WAR #482741:
+	 * The 4G area that maps directly from
+	 * XIO space to PCI I/O space is busted
+	 * until Bridge Rev D.
+	 */
+	if ((pcibr_soft->bs_rev_num > BRIDGE_PART_REV_C) &&
+	    ((pci_addr + BRIDGE_PCI_IO_BASE + req_size - 1) <=
+	     BRIDGE_PCI_IO_LIMIT))
+	    xio_addr = pci_addr + BRIDGE_PCI_IO_BASE;
+	break;
+    }
+
+    /* Check that "Direct PIO" byteswapping matches,
+     * try to change it if it does not.
+     */
+    if (xio_addr != XIO_NOWHERE) {
+	unsigned                bst;	/* nonzero to set bytestream */
+	unsigned               *bfp;	/* addr of record of how swapper is set */
+	unsigned                swb;	/* which control bit to mung */
+	unsigned                bfo;	/* current swapper setting */
+	unsigned                bfn;	/* desired swapper setting */
+
+	bfp = ((space == PCIIO_SPACE_IO)
+	       ? (&pcibr_soft->bs_pio_end_io)
+	       : (&pcibr_soft->bs_pio_end_mem));
+
+	bfo = *bfp;
+
+	bst = flags & PCIIO_BYTE_STREAM;
+
+	bfn = bst ? PCIIO_BYTE_STREAM : PCIIO_WORD_VALUES;
+
+	if (bfn == bfo) {		/* we already match. */
+	    ;
+	} else if (bfo != 0) {		/* we have a conflict. */
+#if DEBUG && PCI_DEBUG
+	    printk("pcibr_addr_pci_to_xio: swap conflict in space %d , was%s%s, want%s%s\n",
+		    space, 
+		    bfo & PCIIO_BYTE_STREAM ? " BYTE_STREAM" : "",
+		    bfo & PCIIO_WORD_VALUES ? " WORD_VALUES" : "",
+		    bfn & PCIIO_BYTE_STREAM ? " BYTE_STREAM" : "",
+		    bfn & PCIIO_WORD_VALUES ? " WORD_VALUES" : "");
+#endif
+	    xio_addr = XIO_NOWHERE;
+	} else {			/* OK to make the change. */
+	    bridgereg_t             octl, nctl;
+
+	    swb = (space == PCIIO_SPACE_IO) ? BRIDGE_CTRL_IO_SWAP : BRIDGE_CTRL_MEM_SWAP;
+	    octl = bridge->b_wid_control;
+	    nctl = bst ? octl | swb : octl & ~swb;
+
+	    if (octl != nctl)		/* make the change if any */
+		bridge->b_wid_control = nctl;
+
+	    *bfp = bfn;			/* record the assignment */
+
+#if DEBUG && PCI_DEBUG
+	    printk("pcibr_addr_pci_to_xio: swap for space %d  set to%s%s\n",
+		    space, 
+		    bfn & PCIIO_BYTE_STREAM ? " BYTE_STREAM" : "",
+		    bfn & PCIIO_WORD_VALUES ? " WORD_VALUES" : "");
+#endif
+	}
+    }
+  done:
+    pcibr_unlock(pcibr_soft, s);
+    return xio_addr;
+}
+
+/*ARGSUSED6 */
+pcibr_piomap_t
+pcibr_piomap_alloc(devfs_handle_t pconn_vhdl,
+		   device_desc_t dev_desc,
+		   pciio_space_t space,
+		   iopaddr_t pci_addr,
+		   size_t req_size,
+		   size_t req_size_max,
+		   unsigned flags)
+{
+    pcibr_info_t	    pcibr_info = pcibr_info_get(pconn_vhdl);
+    pciio_info_t            pciio_info = &pcibr_info->f_c;
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    devfs_handle_t            xconn_vhdl = pcibr_soft->bs_conn;
+
+    pcibr_piomap_t         *mapptr;
+    pcibr_piomap_t          maplist;
+    pcibr_piomap_t          pcibr_piomap;
+    iopaddr_t               xio_addr;
+    xtalk_piomap_t          xtalk_piomap;
+    unsigned                s;
+
+    /* Make sure that the req sizes are non-zero */
+    if ((req_size < 1) || (req_size_max < 1))
+	return NULL;
+
+    /*
+     * Code to translate slot/space/addr
+     * into xio_addr is common between
+     * this routine and pcibr_piotrans_addr.
+     */
+    xio_addr = pcibr_addr_pci_to_xio(pconn_vhdl, pciio_slot, space, pci_addr, req_size, flags);
+
+    if (xio_addr == XIO_NOWHERE)
+	return NULL;
+
+    /* Check the piomap list to see if there is already an allocated
+     * piomap entry but not in use. If so use that one. Otherwise
+     * allocate a new piomap entry and add it to the piomap list
+     */
+    mapptr = &(pcibr_info->f_piomap);
+
+    s = pcibr_lock(pcibr_soft);
+    for (pcibr_piomap = *mapptr;
+	 pcibr_piomap != NULL;
+	 pcibr_piomap = pcibr_piomap->bp_next) {
+	if (pcibr_piomap->bp_mapsz == 0)
+	    break;
+    }
+
+    if (pcibr_piomap)
+	mapptr = NULL;
+    else {
+	pcibr_unlock(pcibr_soft, s);
+	NEW(pcibr_piomap);
+    }
+
+    pcibr_piomap->bp_dev = pconn_vhdl;
+    pcibr_piomap->bp_slot = pciio_slot;
+    pcibr_piomap->bp_flags = flags;
+    pcibr_piomap->bp_space = space;
+    pcibr_piomap->bp_pciaddr = pci_addr;
+    pcibr_piomap->bp_mapsz = req_size;
+    pcibr_piomap->bp_soft = pcibr_soft;
+    pcibr_piomap->bp_toc[0] = 0;
+
+    if (mapptr) {
+	s = pcibr_lock(pcibr_soft);
+	maplist = *mapptr;
+	pcibr_piomap->bp_next = maplist;
+	*mapptr = pcibr_piomap;
+    }
+    pcibr_unlock(pcibr_soft, s);
+
+
+    if (pcibr_piomap) {
+	xtalk_piomap =
+	    xtalk_piomap_alloc(xconn_vhdl, 0,
+			       xio_addr,
+			       req_size, req_size_max,
+			       flags & PIOMAP_FLAGS);
+	if (xtalk_piomap) {
+	    pcibr_piomap->bp_xtalk_addr = xio_addr;
+	    pcibr_piomap->bp_xtalk_pio = xtalk_piomap;
+	} else {
+	    pcibr_piomap->bp_mapsz = 0;
+	    pcibr_piomap = 0;
+	}
+    }
+    return pcibr_piomap;
+}
+
+/*ARGSUSED */
+void
+pcibr_piomap_free(pcibr_piomap_t pcibr_piomap)
+{
+    xtalk_piomap_free(pcibr_piomap->bp_xtalk_pio);
+    pcibr_piomap->bp_xtalk_pio = 0;
+    pcibr_piomap->bp_mapsz = 0;
+}
+
+/*ARGSUSED */
+caddr_t
+pcibr_piomap_addr(pcibr_piomap_t pcibr_piomap,
+		  iopaddr_t pci_addr,
+		  size_t req_size)
+{
+    return xtalk_piomap_addr(pcibr_piomap->bp_xtalk_pio,
+			     pcibr_piomap->bp_xtalk_addr +
+			     pci_addr - pcibr_piomap->bp_pciaddr,
+			     req_size);
+}
+
+/*ARGSUSED */
+void
+pcibr_piomap_done(pcibr_piomap_t pcibr_piomap)
+{
+    xtalk_piomap_done(pcibr_piomap->bp_xtalk_pio);
+}
+
+/*ARGSUSED */
+caddr_t
+pcibr_piotrans_addr(devfs_handle_t pconn_vhdl,
+		    device_desc_t dev_desc,
+		    pciio_space_t space,
+		    iopaddr_t pci_addr,
+		    size_t req_size,
+		    unsigned flags)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    devfs_handle_t            xconn_vhdl = pcibr_soft->bs_conn;
+
+    iopaddr_t               xio_addr;
+
+    xio_addr = pcibr_addr_pci_to_xio(pconn_vhdl, pciio_slot, space, pci_addr, req_size, flags);
+
+    if (xio_addr == XIO_NOWHERE)
+	return NULL;
+
+    return xtalk_piotrans_addr(xconn_vhdl, 0, xio_addr, req_size, flags & PIOMAP_FLAGS);
+}
+
+/*
+ * PIO Space allocation and management.
+ *      Allocate and Manage the PCI PIO space (mem and io space)
+ *      This routine is pretty simplistic at this time, and
+ *      does pretty trivial management of allocation and freeing..
+ *      The current scheme is prone for fragmentation..
+ *      Change the scheme to use bitmaps.
+ */
+
+/*ARGSUSED */
+iopaddr_t
+pcibr_piospace_alloc(devfs_handle_t pconn_vhdl,
+		     device_desc_t dev_desc,
+		     pciio_space_t space,
+		     size_t req_size,
+		     size_t alignment)
+{
+    pcibr_info_t            pcibr_info = pcibr_info_get(pconn_vhdl);
+    pciio_info_t            pciio_info = &pcibr_info->f_c;
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+
+    pciio_piospace_t        piosp;
+    int                     s;
+
+    iopaddr_t              *pciaddr, *pcilast;
+    iopaddr_t               start_addr;
+    size_t                  align_mask;
+
+    /*
+     * Check for proper alignment
+     */
+    ASSERT(alignment >= NBPP);
+    ASSERT((alignment & (alignment - 1)) == 0);
+
+    align_mask = alignment - 1;
+    s = pcibr_lock(pcibr_soft);
+
+    /*
+     * First look if a previously allocated chunk exists.
+     */
+    if ((piosp = pcibr_info->f_piospace) != (pciio_piospace_t)0) {
+	/*
+	 * Look through the list for a right sized free chunk.
+	 */
+	do {
+	    if (piosp->free &&
+		(piosp->space == space) &&
+		(piosp->count >= req_size) &&
+		!(piosp->start & align_mask)) {
+		piosp->free = 0;
+		pcibr_unlock(pcibr_soft, s);
+		return piosp->start;
+	    }
+	    piosp = piosp->next;
+	} while (piosp);
+    }
+    ASSERT(!piosp);
+
+    switch (space) {
+    case PCIIO_SPACE_IO:
+	pciaddr = &pcibr_soft->bs_spinfo.pci_io_base;
+	pcilast = &pcibr_soft->bs_spinfo.pci_io_last;
+	break;
+    case PCIIO_SPACE_MEM:
+    case PCIIO_SPACE_MEM32:
+	pciaddr = &pcibr_soft->bs_spinfo.pci_mem_base;
+	pcilast = &pcibr_soft->bs_spinfo.pci_mem_last;
+	break;
+    default:
+	ASSERT(0);
+	pcibr_unlock(pcibr_soft, s);
+	return 0;
+    }
+
+    start_addr = *pciaddr;
+
+    /*
+     * Align start_addr.
+     */
+    if (start_addr & align_mask)
+	start_addr = (start_addr + align_mask) & ~align_mask;
+
+    if ((start_addr + req_size) > *pcilast) {
+	/*
+	 * If too big a request, reject it.
+	 */
+	pcibr_unlock(pcibr_soft, s);
+	return 0;
+    }
+    *pciaddr = (start_addr + req_size);
+
+    NEW(piosp);
+    piosp->free = 0;
+    piosp->space = space;
+    piosp->start = start_addr;
+    piosp->count = req_size;
+    piosp->next = pcibr_info->f_piospace;
+    pcibr_info->f_piospace = piosp;
+
+    pcibr_unlock(pcibr_soft, s);
+    return start_addr;
+}
+
+/*ARGSUSED */
+void
+pcibr_piospace_free(devfs_handle_t pconn_vhdl,
+		    pciio_space_t space,
+		    iopaddr_t pciaddr,
+		    size_t req_size)
+{
+    pcibr_info_t            pcibr_info = pcibr_info_get(pconn_vhdl);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pcibr_info->f_mfast;
+
+    pciio_piospace_t        piosp;
+    int                     s;
+    char                    name[1024];
+
+    /*
+     * Look through the bridge data structures for the pciio_piospace_t
+     * structure corresponding to  'pciaddr'
+     */
+    s = pcibr_lock(pcibr_soft);
+    piosp = pcibr_info->f_piospace;
+    while (piosp) {
+	/*
+	 * Piospace free can only be for the complete
+	 * chunk and not parts of it..
+	 */
+	if (piosp->start == pciaddr) {
+	    if (piosp->count == req_size)
+		break;
+	    /*
+	     * Improper size passed for freeing..
+	     * Print a message and break;
+	     */
+	    hwgraph_vertex_name_get(pconn_vhdl, name, 1024);
+	    PRINT_WARNING("pcibr_piospace_free: error");
+	    PRINT_WARNING("Device %s freeing size (0x%lx) different than allocated (0x%lx)",
+					name, req_size, piosp->count);
+	    PRINT_WARNING("Freeing 0x%lx instead", piosp->count);
+	    break;
+	}
+	piosp = piosp->next;
+    }
+
+    if (!piosp) {
+	PRINT_WARNING(
+		"pcibr_piospace_free: Address 0x%lx size 0x%lx - No match\n",
+		pciaddr, req_size);
+	pcibr_unlock(pcibr_soft, s);
+	return;
+    }
+    piosp->free = 1;
+    pcibr_unlock(pcibr_soft, s);
+    return;
+}
+
+/* =====================================================================
+ *    DMA MANAGEMENT
+ *
+ *      The Bridge ASIC provides three methods of doing
+ *      DMA: via a "direct map" register available in
+ *      32-bit PCI space (which selects a contiguous 2G
+ *      address space on some other widget), via
+ *      "direct" addressing via 64-bit PCI space (all
+ *      destination information comes from the PCI
+ *      address, including transfer attributes), and via
+ *      a "mapped" region that allows a bunch of
+ *      different small mappings to be established with
+ *      the PMU.
+ *
+ *      For efficiency, we most prefer to use the 32-bit
+ *      direct mapping facility, since it requires no
+ *      resource allocations. The advantage of using the
+ *      PMU over the 64-bit direct is that single-cycle
+ *      PCI addressing can be used; the advantage of
+ *      using 64-bit direct over PMU addressing is that
+ *      we do not have to allocate entries in the PMU.
+ */
+
+/*
+ * Convert PCI-generic software flags and Bridge-specific software flags
+ * into Bridge-specific Direct Map attribute bits.
+ */
+LOCAL iopaddr_t
+pcibr_flags_to_d64(unsigned flags, pcibr_soft_t pcibr_soft)
+{
+    iopaddr_t               attributes = 0;
+
+    /* Sanity check: Bridge only allows use of VCHAN1 via 64-bit addrs */
+#ifdef IRIX
+    ASSERT_ALWAYS(!(flags & PCIBR_VCHAN1) || (flags & PCIIO_DMA_A64));
+#endif
+
+    /* Generic macro flags
+     */
+    if (flags & PCIIO_DMA_DATA) {	/* standard data channel */
+	attributes &= ~PCI64_ATTR_BAR;	/* no barrier bit */
+	attributes |= PCI64_ATTR_PREF;	/* prefetch on */
+    }
+    if (flags & PCIIO_DMA_CMD) {	/* standard command channel */
+	attributes |= PCI64_ATTR_BAR;	/* barrier bit on */
+	attributes &= ~PCI64_ATTR_PREF;	/* disable prefetch */
+    }
+    /* Generic detail flags
+     */
+    if (flags & PCIIO_PREFETCH)
+	attributes |= PCI64_ATTR_PREF;
+    if (flags & PCIIO_NOPREFETCH)
+	attributes &= ~PCI64_ATTR_PREF;
+
+    /* the swap bit is in the address attributes for xbridge */
+    if (pcibr_soft->bs_xbridge) {
+    	if (flags & PCIIO_BYTE_STREAM)
+        	attributes |= PCI64_ATTR_SWAP;
+    	if (flags & PCIIO_WORD_VALUES)
+        	attributes &= ~PCI64_ATTR_SWAP;
+    }
+
+    /* Provider-specific flags
+     */
+    if (flags & PCIBR_BARRIER)
+	attributes |= PCI64_ATTR_BAR;
+    if (flags & PCIBR_NOBARRIER)
+	attributes &= ~PCI64_ATTR_BAR;
+
+    if (flags & PCIBR_PREFETCH)
+	attributes |= PCI64_ATTR_PREF;
+    if (flags & PCIBR_NOPREFETCH)
+	attributes &= ~PCI64_ATTR_PREF;
+
+    if (flags & PCIBR_PRECISE)
+	attributes |= PCI64_ATTR_PREC;
+    if (flags & PCIBR_NOPRECISE)
+	attributes &= ~PCI64_ATTR_PREC;
+
+    if (flags & PCIBR_VCHAN1)
+	attributes |= PCI64_ATTR_VIRTUAL;
+    if (flags & PCIBR_VCHAN0)
+	attributes &= ~PCI64_ATTR_VIRTUAL;
+
+    return (attributes);
+}
+
+/*
+ * Convert PCI-generic software flags and Bridge-specific software flags
+ * into Bridge-specific Address Translation Entry attribute bits.
+ */
+LOCAL bridge_ate_t
+pcibr_flags_to_ate(unsigned flags)
+{
+    bridge_ate_t            attributes;
+
+    /* default if nothing specified:
+     * NOBARRIER
+     * NOPREFETCH
+     * NOPRECISE
+     * COHERENT
+     * Plus the valid bit
+     */
+    attributes = ATE_CO | ATE_V;
+
+    /* Generic macro flags
+     */
+    if (flags & PCIIO_DMA_DATA) {	/* standard data channel */
+	attributes &= ~ATE_BAR;		/* no barrier */
+	attributes |= ATE_PREF;		/* prefetch on */
+    }
+    if (flags & PCIIO_DMA_CMD) {	/* standard command channel */
+	attributes |= ATE_BAR;		/* barrier bit on */
+	attributes &= ~ATE_PREF;	/* disable prefetch */
+    }
+    /* Generic detail flags
+     */
+    if (flags & PCIIO_PREFETCH)
+	attributes |= ATE_PREF;
+    if (flags & PCIIO_NOPREFETCH)
+	attributes &= ~ATE_PREF;
+
+    /* Provider-specific flags
+     */
+    if (flags & PCIBR_BARRIER)
+	attributes |= ATE_BAR;
+    if (flags & PCIBR_NOBARRIER)
+	attributes &= ~ATE_BAR;
+
+    if (flags & PCIBR_PREFETCH)
+	attributes |= ATE_PREF;
+    if (flags & PCIBR_NOPREFETCH)
+	attributes &= ~ATE_PREF;
+
+    if (flags & PCIBR_PRECISE)
+	attributes |= ATE_PREC;
+    if (flags & PCIBR_NOPRECISE)
+	attributes &= ~ATE_PREC;
+
+    return (attributes);
+}
+
+/*ARGSUSED */
+pcibr_dmamap_t
+pcibr_dmamap_alloc(devfs_handle_t pconn_vhdl,
+		   device_desc_t dev_desc,
+		   size_t req_size_max,
+		   unsigned flags)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    devfs_handle_t            xconn_vhdl = pcibr_soft->bs_conn;
+    pciio_slot_t            slot;
+    xwidgetnum_t            xio_port;
+
+    xtalk_dmamap_t          xtalk_dmamap;
+    pcibr_dmamap_t          pcibr_dmamap;
+    int                     ate_count;
+    int                     ate_index;
+
+    /* merge in forced flags */
+    flags |= pcibr_soft->bs_dma_flags;
+
+    NEWf(pcibr_dmamap, flags);
+    if (!pcibr_dmamap)
+	return 0;
+
+    xtalk_dmamap = xtalk_dmamap_alloc(xconn_vhdl, dev_desc, req_size_max,
+				      flags & DMAMAP_FLAGS);
+    if (!xtalk_dmamap) {
+#if PCIBR_ATE_DEBUG
+	printk("pcibr_attach: xtalk_dmamap_alloc failed\n");
+#endif
+	DEL(pcibr_dmamap);
+	return 0;
+    }
+    xio_port = pcibr_soft->bs_mxid;
+    slot = pciio_info_slot_get(pciio_info);
+
+    pcibr_dmamap->bd_dev = pconn_vhdl;
+    pcibr_dmamap->bd_slot = slot;
+    pcibr_dmamap->bd_soft = pcibr_soft;
+    pcibr_dmamap->bd_xtalk = xtalk_dmamap;
+    pcibr_dmamap->bd_max_size = req_size_max;
+    pcibr_dmamap->bd_xio_port = xio_port;
+
+    if (flags & PCIIO_DMA_A64) {
+	if (!pcibr_try_set_device(pcibr_soft, slot, flags, BRIDGE_DEV_D64_BITS)) {
+	    iopaddr_t               pci_addr;
+	    int                     have_rrbs;
+	    int                     min_rrbs;
+
+	    /* Device is capable of A64 operations,
+	     * and the attributes of the DMA are
+	     * consistant with any previous DMA
+	     * mappings using shared resources.
+	     */
+
+	    pci_addr = pcibr_flags_to_d64(flags, pcibr_soft);
+
+	    pcibr_dmamap->bd_flags = flags;
+	    pcibr_dmamap->bd_xio_addr = 0;
+	    pcibr_dmamap->bd_pci_addr = pci_addr;
+
+	    /* Make sure we have an RRB (or two).
+	     */
+	    if (!(pcibr_soft->bs_rrb_fixed & (1 << slot))) {
+		if (flags & PCIBR_VCHAN1)
+		    slot += PCIBR_RRB_SLOT_VIRTUAL;
+		have_rrbs = pcibr_soft->bs_rrb_valid[slot];
+		if (have_rrbs < 2) {
+		    if (pci_addr & PCI64_ATTR_PREF)
+			min_rrbs = 2;
+		    else
+			min_rrbs = 1;
+		    if (have_rrbs < min_rrbs)
+			do_pcibr_rrb_autoalloc(pcibr_soft, slot, min_rrbs - have_rrbs);
+		}
+	    }
+#if PCIBR_ATE_DEBUG
+	    printk("pcibr_dmamap_alloc: using direct64\n");
+#endif
+	    return pcibr_dmamap;
+	}
+#if PCIBR_ATE_DEBUG
+	printk("pcibr_dmamap_alloc: unable to use direct64\n");
+#endif
+	flags &= ~PCIIO_DMA_A64;
+    }
+    if (flags & PCIIO_FIXED) {
+	/* warning: mappings may fail later,
+	 * if direct32 can't get to the address.
+	 */
+	if (!pcibr_try_set_device(pcibr_soft, slot, flags, BRIDGE_DEV_D32_BITS)) {
+	    /* User desires DIRECT A32 operations,
+	     * and the attributes of the DMA are
+	     * consistant with any previous DMA
+	     * mappings using shared resources.
+	     * Mapping calls may fail if target
+	     * is outside the direct32 range.
+	     */
+#if PCIBR_ATE_DEBUG
+	    printk("pcibr_dmamap_alloc: using direct32\n");
+#endif
+	    pcibr_dmamap->bd_flags = flags;
+	    pcibr_dmamap->bd_xio_addr = pcibr_soft->bs_dir_xbase;
+	    pcibr_dmamap->bd_pci_addr = PCI32_DIRECT_BASE;
+	    return pcibr_dmamap;
+	}
+#if PCIBR_ATE_DEBUG
+	printk("pcibr_dmamap_alloc: unable to use direct32\n");
+#endif
+	/* If the user demands FIXED and we can't
+	 * give it to him, fail.
+	 */
+	xtalk_dmamap_free(xtalk_dmamap);
+	DEL(pcibr_dmamap);
+	return 0;
+    }
+    /*
+     * Allocate Address Translation Entries from the mapping RAM.
+     * Unless the PCIBR_NO_ATE_ROUNDUP flag is specified,
+     * the maximum number of ATEs is based on the worst-case
+     * scenario, where the requested target is in the
+     * last byte of an ATE; thus, mapping IOPGSIZE+2
+     * does end up requiring three ATEs.
+     */
+    if (!(flags & PCIBR_NO_ATE_ROUNDUP)) {
+	ate_count = IOPG((IOPGSIZE - 1)	/* worst case start offset */
+		     +req_size_max	/* max mapping bytes */
+		     - 1) + 1;		/* round UP */
+    } else {	/* assume requested target is page aligned */
+	ate_count = IOPG(req_size_max   /* max mapping bytes */
+		     - 1) + 1;		/* round UP */
+    }
+
+    ate_index = pcibr_ate_alloc(pcibr_soft, ate_count);
+
+    if (ate_index != -1) {
+	if (!pcibr_try_set_device(pcibr_soft, slot, flags, BRIDGE_DEV_PMU_BITS)) {
+	    bridge_ate_t            ate_proto;
+	    int                     have_rrbs;
+	    int                     min_rrbs;
+
+#if PCIBR_ATE_DEBUG
+	    printk("pcibr_dmamap_alloc: using PMU\n");
+#endif
+
+	    ate_proto = pcibr_flags_to_ate(flags);
+
+	    pcibr_dmamap->bd_flags = flags;
+	    pcibr_dmamap->bd_pci_addr =
+		PCI32_MAPPED_BASE + IOPGSIZE * ate_index;
+	    /*
+	     * for xbridge the byte-swap bit == bit 29 of pci address
+	     */
+	    if (pcibr_soft->bs_xbridge) {
+		    if (flags & PCIIO_BYTE_STREAM)
+			    ATE_SWAP_ON(pcibr_dmamap->bd_pci_addr);
+		    /*
+		     * If swap was set in bss_device in pcibr_endian_set()
+		     * we need to change the address bit.
+		     */
+		    if (pcibr_soft->bs_slot[slot].bss_device & 
+							BRIDGE_DEV_SWAP_PMU)
+			    ATE_SWAP_ON(pcibr_dmamap->bd_pci_addr);
+		    if (flags & PCIIO_WORD_VALUES)
+			    ATE_SWAP_OFF(pcibr_dmamap->bd_pci_addr);
+	    }
+	    pcibr_dmamap->bd_xio_addr = 0;
+	    pcibr_dmamap->bd_ate_ptr = pcibr_ate_addr(pcibr_soft, ate_index);
+	    pcibr_dmamap->bd_ate_index = ate_index;
+	    pcibr_dmamap->bd_ate_count = ate_count;
+	    pcibr_dmamap->bd_ate_proto = ate_proto;
+
+	    /* Make sure we have an RRB (or two).
+	     */
+	    if (!(pcibr_soft->bs_rrb_fixed & (1 << slot))) {
+		have_rrbs = pcibr_soft->bs_rrb_valid[slot];
+		if (have_rrbs < 2) {
+		    if (ate_proto & ATE_PREF)
+			min_rrbs = 2;
+		    else
+			min_rrbs = 1;
+		    if (have_rrbs < min_rrbs)
+			do_pcibr_rrb_autoalloc(pcibr_soft, slot, min_rrbs - have_rrbs);
+		}
+	    }
+	    if (ate_index >= pcibr_soft->bs_int_ate_size && 
+				!pcibr_soft->bs_xbridge) {
+		bridge_t               *bridge = pcibr_soft->bs_base;
+		volatile unsigned      *cmd_regp;
+		unsigned                cmd_reg;
+		unsigned                s;
+
+		pcibr_dmamap->bd_flags |= PCIBR_DMAMAP_SSRAM;
+
+		s = pcibr_lock(pcibr_soft);
+		cmd_regp = &(bridge->
+			     b_type0_cfg_dev[slot].
+			     l[PCI_CFG_COMMAND / 4]);
+		cmd_reg = *cmd_regp;
+		pcibr_soft->bs_slot[slot].bss_cmd_pointer = cmd_regp;
+		pcibr_soft->bs_slot[slot].bss_cmd_shadow = cmd_reg;
+		pcibr_unlock(pcibr_soft, s);
+	    }
+	    return pcibr_dmamap;
+	}
+#if PCIBR_ATE_DEBUG
+	printk("pcibr_dmamap_alloc: unable to use PMU\n");
+#endif
+	pcibr_ate_free(pcibr_soft, ate_index, ate_count);
+    }
+    /* total failure: sorry, you just can't
+     * get from here to there that way.
+     */
+#if PCIBR_ATE_DEBUG
+    printk("pcibr_dmamap_alloc: complete failure.\n");
+#endif
+    xtalk_dmamap_free(xtalk_dmamap);
+    DEL(pcibr_dmamap);
+    return 0;
+}
+
+/*ARGSUSED */
+void
+pcibr_dmamap_free(pcibr_dmamap_t pcibr_dmamap)
+{
+    pcibr_soft_t            pcibr_soft = pcibr_dmamap->bd_soft;
+    pciio_slot_t            slot = pcibr_dmamap->bd_slot;
+
+#ifdef IRIX
+    unsigned                flags = pcibr_dmamap->bd_flags;
+#endif
+
+    /* Make sure that bss_ext_ates_active
+     * is properly kept up to date.
+     */
+#ifdef IRIX
+    if (PCIBR_DMAMAP_BUSY & flags)
+	if (PCIBR_DMAMAP_SSRAM & flags)
+	    atomicAddInt(&(pcibr_soft->
+			   bs_slot[slot].
+			   bss_ext_ates_active), -1);
+#endif
+
+    xtalk_dmamap_free(pcibr_dmamap->bd_xtalk);
+
+    if (pcibr_dmamap->bd_flags & PCIIO_DMA_A64) {
+	pcibr_release_device(pcibr_soft, slot, BRIDGE_DEV_D64_BITS);
+    }
+    if (pcibr_dmamap->bd_ate_count) {
+	pcibr_ate_free(pcibr_dmamap->bd_soft,
+		       pcibr_dmamap->bd_ate_index,
+		       pcibr_dmamap->bd_ate_count);
+	pcibr_release_device(pcibr_soft, slot, BRIDGE_DEV_PMU_BITS);
+    }
+    DEL(pcibr_dmamap);
+}
+
+/*
+ * Setup an Address Translation Entry as specified.  Use either the Bridge
+ * internal maps or the external map RAM, as appropriate.
+ */
+LOCAL bridge_ate_p
+pcibr_ate_addr(pcibr_soft_t pcibr_soft,
+	       int ate_index)
+{
+    bridge_t *bridge = pcibr_soft->bs_base;
+
+    return (ate_index < pcibr_soft->bs_int_ate_size)
+	? &(bridge->b_int_ate_ram[ate_index].wr)
+	: &(bridge->b_ext_ate_ram[ate_index]);
+}
+
+/*
+ *    pcibr_addr_xio_to_pci: given a PIO range, hand
+ *      back the corresponding base PCI MEM address;
+ *      this is used to short-circuit DMA requests that
+ *      loop back onto this PCI bus.
+ */
+LOCAL iopaddr_t
+pcibr_addr_xio_to_pci(pcibr_soft_t soft,
+		      iopaddr_t xio_addr,
+		      size_t req_size)
+{
+    iopaddr_t               xio_lim = xio_addr + req_size - 1;
+    iopaddr_t               pci_addr;
+    pciio_slot_t            slot;
+
+    if ((xio_addr >= BRIDGE_PCI_MEM32_BASE) &&
+	(xio_lim <= BRIDGE_PCI_MEM32_LIMIT)) {
+	pci_addr = xio_addr - BRIDGE_PCI_MEM32_BASE;
+	return pci_addr;
+    }
+    if ((xio_addr >= BRIDGE_PCI_MEM64_BASE) &&
+	(xio_lim <= BRIDGE_PCI_MEM64_LIMIT)) {
+	pci_addr = xio_addr - BRIDGE_PCI_MEM64_BASE;
+	return pci_addr;
+    }
+    for (slot = 0; slot < 8; ++slot)
+	if ((xio_addr >= BRIDGE_DEVIO(slot)) &&
+	    (xio_lim < BRIDGE_DEVIO(slot + 1))) {
+	    bridgereg_t             dev;
+
+	    dev = soft->bs_slot[slot].bss_device;
+	    pci_addr = dev & BRIDGE_DEV_OFF_MASK;
+	    pci_addr <<= BRIDGE_DEV_OFF_ADDR_SHFT;
+	    pci_addr += xio_addr - BRIDGE_DEVIO(slot);
+	    return (dev & BRIDGE_DEV_DEV_IO_MEM) ? pci_addr : PCI_NOWHERE;
+	}
+    return 0;
+}
+
+/* We are starting to get more complexity
+ * surrounding writing ATEs, so pull
+ * the writing code into this new function.
+ * XXX mail ranga@engr for IP27 prom!
+ */
+
+#if PCIBR_FREEZE_TIME
+#define	ATE_FREEZE()	s = ate_freeze(pcibr_dmamap, &freeze_time, cmd_regs)
+#else
+#define	ATE_FREEZE()	s = ate_freeze(pcibr_dmamap, cmd_regs)
+#endif
+
+LOCAL unsigned
+ate_freeze(pcibr_dmamap_t pcibr_dmamap,
+#if PCIBR_FREEZE_TIME
+	   unsigned *freeze_time_ptr,
+#endif
+	   unsigned *cmd_regs)
+{
+    pcibr_soft_t            pcibr_soft = pcibr_dmamap->bd_soft;
+#ifdef IRIX
+    int                     dma_slot = pcibr_dmamap->bd_slot;
+#endif
+    int                     ext_ates = pcibr_dmamap->bd_flags & PCIBR_DMAMAP_SSRAM;
+    int                     slot;
+
+    unsigned                s;
+    unsigned                cmd_reg;
+    volatile unsigned      *cmd_lwa;
+    unsigned                cmd_lwd;
+
+    if (!ext_ates)
+	return 0;
+
+    /* Bridge Hardware Bug WAR #484930:
+     * Bridge can't handle updating External ATEs
+     * while DMA is occuring that uses External ATEs,
+     * even if the particular ATEs involved are disjoint.
+     */
+
+    /* need to prevent anyone else from
+     * unfreezing the grant while we
+     * are working; also need to prevent
+     * this thread from being interrupted
+     * to keep PCI grant freeze time
+     * at an absolute minimum.
+     */
+    s = pcibr_lock(pcibr_soft);
+
+#ifdef IRIX
+    /* just in case pcibr_dmamap_done was not called */
+    if (pcibr_dmamap->bd_flags & PCIBR_DMAMAP_BUSY) {
+	pcibr_dmamap->bd_flags &= ~PCIBR_DMAMAP_BUSY;
+	if (pcibr_dmamap->bd_flags & PCIBR_DMAMAP_SSRAM)
+	    atomicAddInt(&(pcibr_soft->
+			   bs_slot[dma_slot].
+			   bss_ext_ates_active), -1);
+	xtalk_dmamap_done(pcibr_dmamap->bd_xtalk);
+    }
+#endif
+#if PCIBR_FREEZE_TIME
+    *freeze_time_ptr = get_timestamp();
+#endif
+
+    cmd_lwa = 0;
+    for (slot = 0; slot < 8; ++slot)
+	if (pcibr_soft->
+	    bs_slot[slot].
+	    bss_ext_ates_active) {
+
+	    cmd_reg = pcibr_soft->
+		bs_slot[slot].
+		bss_cmd_shadow;
+	    if (cmd_reg & PCI_CMD_BUS_MASTER) {
+		cmd_lwa = pcibr_soft->
+		    bs_slot[slot].
+		    bss_cmd_pointer;
+		cmd_lwd = cmd_reg ^ PCI_CMD_BUS_MASTER;
+		cmd_lwa[0] = cmd_lwd;
+	    }
+	    cmd_regs[slot] = cmd_reg;
+	} else
+	    cmd_regs[slot] = 0;
+
+    if (cmd_lwa) {
+	    bridge_t	*bridge = pcibr_soft->bs_base;
+
+	    /* Read the last master bit that has been cleared. This PIO read
+	     * on the PCI bus is to ensure the completion of any DMAs that
+	     * are due to bus requests issued by PCI devices before the
+	     * clearing of master bits.
+	     */
+	    cmd_lwa[0];
+
+	    /* Flush all the write buffers in the bridge */
+	    for (slot = 0; slot < 8; ++slot)
+		    if (pcibr_soft->
+			bs_slot[slot].
+			bss_ext_ates_active) {
+			    /* Flush the write buffer associated with this
+			     * PCI device which might be using dma map RAM.
+			     */
+			    bridge->b_wr_req_buf[slot].reg;
+		    }
+    }
+    return s;
+}
+
+#define	ATE_WRITE()    ate_write(ate_ptr, ate_count, ate)
+
+LOCAL void
+ate_write(bridge_ate_p ate_ptr,
+	  int ate_count,
+	  bridge_ate_t ate)
+{
+    while (ate_count-- > 0) {
+	*ate_ptr++ = ate;
+	ate += IOPGSIZE;
+    }
+}
+
+
+#if PCIBR_FREEZE_TIME
+#define	ATE_THAW()	ate_thaw(pcibr_dmamap, ate_index, ate, ate_total, freeze_time, cmd_regs, s)
+#else
+#define	ATE_THAW()	ate_thaw(pcibr_dmamap, ate_index, cmd_regs, s)
+#endif
+
+LOCAL void
+ate_thaw(pcibr_dmamap_t pcibr_dmamap,
+	 int ate_index,
+#if PCIBR_FREEZE_TIME
+	 bridge_ate_t ate,
+	 int ate_total,
+	 unsigned freeze_time_start,
+#endif
+	 unsigned *cmd_regs,
+	 unsigned s)
+{
+    pcibr_soft_t            pcibr_soft = pcibr_dmamap->bd_soft;
+#ifdef IRIX
+    int                     dma_slot = pcibr_dmamap->bd_slot;
+#endif
+    int                     slot;
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    int                     ext_ates = pcibr_dmamap->bd_flags & PCIBR_DMAMAP_SSRAM;
+
+    unsigned                cmd_reg;
+
+#if PCIBR_FREEZE_TIME
+    unsigned                freeze_time;
+    static unsigned         max_freeze_time = 0;
+    static unsigned         max_ate_total;
+#endif
+
+    if (!ext_ates)
+	return;
+
+    /* restore cmd regs */
+    for (slot = 0; slot < 8; ++slot)
+	if ((cmd_reg = cmd_regs[slot]) & PCI_CMD_BUS_MASTER)
+	    bridge->b_type0_cfg_dev[slot].l[PCI_CFG_COMMAND / 4] = cmd_reg;
+
+    pcibr_dmamap->bd_flags |= PCIBR_DMAMAP_BUSY;
+#ifdef IRIX
+    atomicAddInt(&(pcibr_soft->
+		   bs_slot[dma_slot].
+		   bss_ext_ates_active), 1);
+#endif
+
+#if PCIBR_FREEZE_TIME
+    freeze_time = get_timestamp() - freeze_time_start;
+
+    if ((max_freeze_time < freeze_time) ||
+	(max_ate_total < ate_total)) {
+	if (max_freeze_time < freeze_time)
+	    max_freeze_time = freeze_time;
+	if (max_ate_total < ate_total)
+	    max_ate_total = ate_total;
+	pcibr_unlock(pcibr_soft, s);
+	printk("%s: pci freeze time %d usec for %d ATEs\n"
+		"\tfirst ate: %R\n",
+		pcibr_soft->bs_name,
+		freeze_time * 1000 / 1250,
+		ate_total,
+		ate, ate_bits);
+    } else
+#endif
+	pcibr_unlock(pcibr_soft, s);
+}
+
+/*ARGSUSED */
+iopaddr_t
+pcibr_dmamap_addr(pcibr_dmamap_t pcibr_dmamap,
+		  paddr_t paddr,
+		  size_t req_size)
+{
+    pcibr_soft_t            pcibr_soft;
+    iopaddr_t               xio_addr;
+    xwidgetnum_t            xio_port;
+    iopaddr_t               pci_addr;
+    unsigned                flags;
+
+    ASSERT(pcibr_dmamap != NULL);
+    ASSERT(req_size > 0);
+    ASSERT(req_size <= pcibr_dmamap->bd_max_size);
+
+    pcibr_soft = pcibr_dmamap->bd_soft;
+
+    flags = pcibr_dmamap->bd_flags;
+
+    xio_addr = xtalk_dmamap_addr(pcibr_dmamap->bd_xtalk, paddr, req_size);
+    if (XIO_PACKED(xio_addr)) {
+	xio_port = XIO_PORT(xio_addr);
+	xio_addr = XIO_ADDR(xio_addr);
+    } else
+	xio_port = pcibr_dmamap->bd_xio_port;
+
+    /* If this DMA is to an addres that
+     * refers back to this Bridge chip,
+     * reduce it back to the correct
+     * PCI MEM address.
+     */
+    if (xio_port == pcibr_soft->bs_xid) {
+	pci_addr = pcibr_addr_xio_to_pci(pcibr_soft, xio_addr, req_size);
+    } else if (flags & PCIIO_DMA_A64) {
+	/* A64 DMA:
+	 * always use 64-bit direct mapping,
+	 * which always works.
+	 * Device(x) was set up during
+	 * dmamap allocation.
+	 */
+
+	/* attributes are already bundled up into bd_pci_addr.
+	 */
+	pci_addr = pcibr_dmamap->bd_pci_addr
+	    | ((uint64_t) xio_port << PCI64_ATTR_TARG_SHFT)
+	    | xio_addr;
+
+	/* Bridge Hardware WAR #482836:
+	 * If the transfer is not cache aligned
+	 * and the Bridge Rev is <= B, force
+	 * prefetch to be off.
+	 */
+	if (flags & PCIBR_NOPREFETCH)
+	    pci_addr &= ~PCI64_ATTR_PREF;
+
+#if DEBUG && PCIBR_DMA_DEBUG
+	printk("pcibr_dmamap_addr (direct64):\n"
+		"\twanted paddr [0x%x..0x%x]\n"
+		"\tXIO port 0x%x offset 0x%x\n"
+		"\treturning PCI 0x%x\n",
+		paddr, paddr + req_size - 1,
+		xio_port, xio_addr, pci_addr);
+#endif
+    } else if (flags & PCIIO_FIXED) {
+	/* A32 direct DMA:
+	 * always use 32-bit direct mapping,
+	 * which may fail.
+	 * Device(x) was set up during
+	 * dmamap allocation.
+	 */
+
+	if (xio_port != pcibr_soft->bs_dir_xport)
+	    pci_addr = 0;		/* wrong DIDN */
+	else if (xio_addr < pcibr_dmamap->bd_xio_addr)
+	    pci_addr = 0;		/* out of range */
+	else if ((xio_addr + req_size) >
+		 (pcibr_dmamap->bd_xio_addr + BRIDGE_DMA_DIRECT_SIZE))
+	    pci_addr = 0;		/* out of range */
+	else
+	    pci_addr = pcibr_dmamap->bd_pci_addr +
+		xio_addr - pcibr_dmamap->bd_xio_addr;
+
+#if DEBUG && PCIBR_DMA_DEBUG
+	printk("pcibr_dmamap_addr (direct32):\n"
+		"\twanted paddr [0x%x..0x%x]\n"
+		"\tXIO port 0x%x offset 0x%x\n"
+		"\treturning PCI 0x%x\n",
+		paddr, paddr + req_size - 1,
+		xio_port, xio_addr, pci_addr);
+#endif
+    } else {
+	bridge_t               *bridge = pcibr_soft->bs_base;
+	iopaddr_t               offset = IOPGOFF(xio_addr);
+	bridge_ate_t            ate_proto = pcibr_dmamap->bd_ate_proto;
+	int                     ate_count = IOPG(offset + req_size - 1) + 1;
+
+	int                     ate_index = pcibr_dmamap->bd_ate_index;
+	unsigned                cmd_regs[8];
+	unsigned                s;
+
+#if PCIBR_FREEZE_TIME
+	int                     ate_total = ate_count;
+	unsigned                freeze_time;
+#endif
+
+#if PCIBR_ATE_DEBUG
+	bridge_ate_t            ate_cmp;
+	bridge_ate_p            ate_cptr;
+	unsigned                ate_lo, ate_hi;
+	int                     ate_bad = 0;
+	int                     ate_rbc = 0;
+#endif
+	bridge_ate_p            ate_ptr = pcibr_dmamap->bd_ate_ptr;
+	bridge_ate_t            ate;
+
+	/* Bridge Hardware WAR #482836:
+	 * If the transfer is not cache aligned
+	 * and the Bridge Rev is <= B, force
+	 * prefetch to be off.
+	 */
+	if (flags & PCIBR_NOPREFETCH)
+	    ate_proto &= ~ATE_PREF;
+
+	ate = ate_proto
+	    | (xio_port << ATE_TIDSHIFT)
+	    | (xio_addr - offset);
+
+	pci_addr = pcibr_dmamap->bd_pci_addr + offset;
+
+	/* Fill in our mapping registers
+	 * with the appropriate xtalk data,
+	 * and hand back the PCI address.
+	 */
+
+	ASSERT(ate_count > 0);
+	if (ate_count <= pcibr_dmamap->bd_ate_count) {
+		ATE_FREEZE();
+		ATE_WRITE();
+		ATE_THAW();
+		bridge->b_wid_tflush;	/* wait until Bridge PIO complete */
+	} else {
+		/* The number of ATE's required is greater than the number
+		 * allocated for this map. One way this can happen is if
+		 * pcibr_dmamap_alloc() was called with the PCIBR_NO_ATE_ROUNDUP
+		 * flag, and then when that map is used (right now), the
+		 * target address tells us we really did need to roundup.
+		 * The other possibility is that the map is just plain too
+		 * small to handle the requested target area.
+		 */
+#if PCIBR_ATE_DEBUG
+		PRINT_WARNING( "pcibr_dmamap_addr :\n"
+			"\twanted paddr [0x%x..0x%x]\n"
+			"\tate_count 0x%x bd_ate_count 0x%x\n"
+			"\tATE's required > number allocated\n",
+			paddr, paddr + req_size - 1,
+			ate_count, pcibr_dmamap->bd_ate_count);
+#endif
+		pci_addr = 0;
+	}
+
+    }
+    return pci_addr;
+}
+
+/*ARGSUSED */
+alenlist_t
+pcibr_dmamap_list(pcibr_dmamap_t pcibr_dmamap,
+		  alenlist_t palenlist,
+		  unsigned flags)
+{
+    pcibr_soft_t            pcibr_soft;
+#ifdef IRIX
+    bridge_t               *bridge;
+#else
+    bridge_t               *bridge=NULL;
+#endif
+
+    unsigned                al_flags = (flags & PCIIO_NOSLEEP) ? AL_NOSLEEP : 0;
+    int                     inplace = flags & PCIIO_INPLACE;
+
+    alenlist_t              pciio_alenlist = 0;
+    alenlist_t              xtalk_alenlist;
+    size_t                  length;
+    iopaddr_t               offset;
+    unsigned                direct64;
+#ifdef IRIX
+    int                     ate_index;
+    int                     ate_count;
+    int                     ate_total = 0;
+    bridge_ate_p            ate_ptr;
+    bridge_ate_t            ate_proto;
+#else
+    int                     ate_index = 0;
+    int                     ate_count = 0;
+    int                     ate_total = 0;
+    bridge_ate_p            ate_ptr = (bridge_ate_p)0;
+    bridge_ate_t            ate_proto = (bridge_ate_t)0;
+#endif
+    bridge_ate_t            ate_prev;
+    bridge_ate_t            ate;
+    alenaddr_t              xio_addr;
+    xwidgetnum_t            xio_port;
+    iopaddr_t               pci_addr;
+    alenaddr_t              new_addr;
+
+    unsigned                cmd_regs[8];
+    unsigned                s = 0;
+
+#if PCIBR_FREEZE_TIME
+    unsigned                freeze_time;
+#endif
+    int			    ate_freeze_done = 0;	/* To pair ATE_THAW
+							 * with an ATE_FREEZE
+							 */
+
+    pcibr_soft = pcibr_dmamap->bd_soft;
+
+    xtalk_alenlist = xtalk_dmamap_list(pcibr_dmamap->bd_xtalk, palenlist,
+				       flags & DMAMAP_FLAGS);
+    if (!xtalk_alenlist)
+	goto fail;
+
+    alenlist_cursor_init(xtalk_alenlist, 0, NULL);
+
+    if (inplace) {
+	pciio_alenlist = xtalk_alenlist;
+    } else {
+	pciio_alenlist = alenlist_create(al_flags);
+	if (!pciio_alenlist)
+	    goto fail;
+    }
+
+    direct64 = pcibr_dmamap->bd_flags & PCIIO_DMA_A64;
+    if (!direct64) {
+	bridge = pcibr_soft->bs_base;
+	ate_ptr = pcibr_dmamap->bd_ate_ptr;
+	ate_index = pcibr_dmamap->bd_ate_index;
+	ate_proto = pcibr_dmamap->bd_ate_proto;
+	ATE_FREEZE();
+	ate_freeze_done = 1;	/* Remember that we need to do an ATE_THAW */
+    }
+    pci_addr = pcibr_dmamap->bd_pci_addr;
+
+    ate_prev = 0;			/* matches no valid ATEs */
+    while (ALENLIST_SUCCESS ==
+	   alenlist_get(xtalk_alenlist, NULL, 0,
+			&xio_addr, &length, al_flags)) {
+	if (XIO_PACKED(xio_addr)) {
+	    xio_port = XIO_PORT(xio_addr);
+	    xio_addr = XIO_ADDR(xio_addr);
+	} else
+	    xio_port = pcibr_dmamap->bd_xio_port;
+
+	if (xio_port == pcibr_soft->bs_xid) {
+	    new_addr = pcibr_addr_xio_to_pci(pcibr_soft, xio_addr, length);
+	    if (new_addr == PCI_NOWHERE)
+		goto fail;
+	} else if (direct64) {
+	    new_addr = pci_addr | xio_addr
+		| ((uint64_t) xio_port << PCI64_ATTR_TARG_SHFT);
+
+	    /* Bridge Hardware WAR #482836:
+	     * If the transfer is not cache aligned
+	     * and the Bridge Rev is <= B, force
+	     * prefetch to be off.
+	     */
+	    if (flags & PCIBR_NOPREFETCH)
+		new_addr &= ~PCI64_ATTR_PREF;
+
+	} else {
+	    /* calculate the ate value for
+	     * the first address. If it
+	     * matches the previous
+	     * ATE written (ie. we had
+	     * multiple blocks in the
+	     * same IOPG), then back up
+	     * and reuse that ATE.
+	     *
+	     * We are NOT going to
+	     * aggressively try to
+	     * reuse any other ATEs.
+	     */
+	    offset = IOPGOFF(xio_addr);
+	    ate = ate_proto
+		| (xio_port << ATE_TIDSHIFT)
+		| (xio_addr - offset);
+	    if (ate == ate_prev) {
+#if PCIBR_ATE_DEBUG
+		printk("pcibr_dmamap_list: ATE share\n");
+#endif
+		ate_ptr--;
+		ate_index--;
+		pci_addr -= IOPGSIZE;
+	    }
+	    new_addr = pci_addr + offset;
+
+	    /* Fill in the hardware ATEs
+	     * that contain this block.
+	     */
+	    ate_count = IOPG(offset + length - 1) + 1;
+	    ate_total += ate_count;
+
+	    /* Ensure that this map contains enough ATE's */
+	    if (ate_total > pcibr_dmamap->bd_ate_count) {
+#if PCIBR_ATE_DEBUG
+		PRINT_WARNING( "pcibr_dmamap_list :\n"
+			"\twanted xio_addr [0x%x..0x%x]\n"
+			"\tate_total 0x%x bd_ate_count 0x%x\n"
+			"\tATE's required > number allocated\n",
+			xio_addr, xio_addr + length - 1,
+			ate_total, pcibr_dmamap->bd_ate_count);
+#endif
+		goto fail;
+	    }
+
+	    ATE_WRITE();
+
+	    ate_index += ate_count;
+	    ate_ptr += ate_count;
+
+	    ate_count <<= IOPFNSHIFT;
+	    ate += ate_count;
+	    pci_addr += ate_count;
+	}
+
+	/* write the PCI DMA address
+	 * out to the scatter-gather list.
+	 */
+	if (inplace) {
+	    if (ALENLIST_SUCCESS !=
+		alenlist_replace(pciio_alenlist, NULL,
+				 &new_addr, &length, al_flags))
+		goto fail;
+	} else {
+	    if (ALENLIST_SUCCESS !=
+		alenlist_append(pciio_alenlist,
+				new_addr, length, al_flags))
+		goto fail;
+	}
+    }
+    if (!inplace)
+	alenlist_done(xtalk_alenlist);
+
+    /* Reset the internal cursor of the alenlist to be returned back
+     * to the caller.
+     */
+    alenlist_cursor_init(pciio_alenlist, 0, NULL);
+
+
+    /* In case an ATE_FREEZE was done do the ATE_THAW to unroll all the
+     * changes that ATE_FREEZE has done to implement the external SSRAM
+     * bug workaround.
+     */
+    if (ate_freeze_done) {
+	ATE_THAW();
+	bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+    }
+    return pciio_alenlist;
+
+  fail:
+    /* There are various points of failure after doing an ATE_FREEZE
+     * We need to do an ATE_THAW. Otherwise the ATEs are locked forever.
+     * The decision to do an ATE_THAW needs to be based on whether a
+     * an ATE_FREEZE was done before.
+     */
+    if (ate_freeze_done) {
+	ATE_THAW();
+	bridge->b_wid_tflush;
+    }
+    if (pciio_alenlist && !inplace)
+	alenlist_destroy(pciio_alenlist);
+    return 0;
+}
+
+/*ARGSUSED */
+void
+pcibr_dmamap_done(pcibr_dmamap_t pcibr_dmamap)
+{
+    /*
+     * We could go through and invalidate ATEs here;
+     * for performance reasons, we don't.
+     * We also don't enforce the strict alternation
+     * between _addr/_list and _done, but Hub does.
+     */
+
+#ifdef IRIX
+    if (pcibr_dmamap->bd_flags & PCIBR_DMAMAP_BUSY) {
+	pcibr_dmamap->bd_flags &= ~PCIBR_DMAMAP_BUSY;
+
+	if (pcibr_dmamap->bd_flags & PCIBR_DMAMAP_SSRAM)
+	    atomicAddInt(&(pcibr_dmamap->bd_soft->
+			   bs_slot[pcibr_dmamap->bd_slot].
+			   bss_ext_ates_active), -1);
+    }
+#endif
+
+    xtalk_dmamap_done(pcibr_dmamap->bd_xtalk);
+}
+
+
+/*
+ * For each bridge, the DIR_OFF value in the Direct Mapping Register
+ * determines the PCI to Crosstalk memory mapping to be used for all
+ * 32-bit Direct Mapping memory accesses. This mapping can be to any
+ * node in the system. This function will return that compact node id.
+ */
+
+/*ARGSUSED */
+cnodeid_t
+pcibr_get_dmatrans_node(devfs_handle_t pconn_vhdl)
+{
+
+	pciio_info_t	pciio_info = pciio_info_get(pconn_vhdl);
+	pcibr_soft_t	pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+
+	return(NASID_TO_COMPACT_NODEID(NASID_GET(pcibr_soft->bs_dir_xbase)));
+}
+
+/*ARGSUSED */
+iopaddr_t
+pcibr_dmatrans_addr(devfs_handle_t pconn_vhdl,
+		    device_desc_t dev_desc,
+		    paddr_t paddr,
+		    size_t req_size,
+		    unsigned flags)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    devfs_handle_t            xconn_vhdl = pcibr_soft->bs_conn;
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_slot_t       slotp = &pcibr_soft->bs_slot[pciio_slot];
+
+    xwidgetnum_t            xio_port;
+    iopaddr_t               xio_addr;
+    iopaddr_t               pci_addr;
+
+    int                     have_rrbs;
+    int                     min_rrbs;
+
+    /* merge in forced flags */
+    flags |= pcibr_soft->bs_dma_flags;
+
+    xio_addr = xtalk_dmatrans_addr(xconn_vhdl, 0, paddr, req_size,
+				   flags & DMAMAP_FLAGS);
+
+    if (!xio_addr) {
+#if PCIBR_DMA_DEBUG
+	printk("pcibr_dmatrans_addr:\n"
+		"\tpciio connection point %v\n"
+		"\txtalk connection point %v\n"
+		"\twanted paddr [0x%x..0x%x]\n"
+		"\txtalk_dmatrans_addr returned 0x%x\n",
+		pconn_vhdl, xconn_vhdl,
+		paddr, paddr + req_size - 1,
+		xio_addr);
+#endif
+	return 0;
+    }
+    /*
+     * find which XIO port this goes to.
+     */
+    if (XIO_PACKED(xio_addr)) {
+	if (xio_addr == XIO_NOWHERE) {
+#if PCIBR_DMA_DEBUG
+	    printk("pcibr_dmatrans_addr:\n"
+		    "\tpciio connection point %v\n"
+		    "\txtalk connection point %v\n"
+		    "\twanted paddr [0x%x..0x%x]\n"
+		    "\txtalk_dmatrans_addr returned 0x%x\n",
+		    pconn_vhdl, xconn_vhdl,
+		    paddr, paddr + req_size - 1,
+		    xio_addr);
+#endif
+	    return 0;
+	}
+	xio_port = XIO_PORT(xio_addr);
+	xio_addr = XIO_ADDR(xio_addr);
+
+    } else
+	xio_port = pcibr_soft->bs_mxid;
+
+    /*
+     * If this DMA comes back to us,
+     * return the PCI MEM address on
+     * which it would land, or NULL
+     * if the target is something
+     * on bridge other than PCI MEM.
+     */
+    if (xio_port == pcibr_soft->bs_xid) {
+	pci_addr = pcibr_addr_xio_to_pci(pcibr_soft, xio_addr, req_size);
+	return pci_addr;
+    }
+    /* If the caller can use A64, try to
+     * satisfy the request with the 64-bit
+     * direct map. This can fail if the
+     * configuration bits in Device(x)
+     * conflict with our flags.
+     */
+
+    if (flags & PCIIO_DMA_A64) {
+	pci_addr = slotp->bss_d64_base;
+	if (!(flags & PCIBR_VCHAN1))
+	    flags |= PCIBR_VCHAN0;
+	if ((pci_addr != PCIBR_D64_BASE_UNSET) &&
+	    (flags == slotp->bss_d64_flags)) {
+
+	    pci_addr |= xio_addr
+		| ((uint64_t) xio_port << PCI64_ATTR_TARG_SHFT);
+
+#if DEBUG && PCIBR_DMA_DEBUG
+#if HWG_PERF_CHECK
+	    if (xio_addr != 0x20000000)
+#endif
+		printk("pcibr_dmatrans_addr: [reuse]\n"
+			"\tpciio connection point %v\n"
+			"\txtalk connection point %v\n"
+			"\twanted paddr [0x%x..0x%x]\n"
+			"\txtalk_dmatrans_addr returned 0x%x\n"
+			"\tdirect 64bit address is 0x%x\n",
+			pconn_vhdl, xconn_vhdl,
+			paddr, paddr + req_size - 1,
+			xio_addr, pci_addr);
+#endif
+	    return (pci_addr);
+	}
+	if (!pcibr_try_set_device(pcibr_soft, pciio_slot, flags, BRIDGE_DEV_D64_BITS)) {
+	    pci_addr = pcibr_flags_to_d64(flags, pcibr_soft);
+	    slotp->bss_d64_flags = flags;
+	    slotp->bss_d64_base = pci_addr;
+	    pci_addr |= xio_addr
+		| ((uint64_t) xio_port << PCI64_ATTR_TARG_SHFT);
+
+	    /* Make sure we have an RRB (or two).
+	     */
+	    if (!(pcibr_soft->bs_rrb_fixed & (1 << pciio_slot))) {
+		if (flags & PCIBR_VCHAN1)
+		    pciio_slot += PCIBR_RRB_SLOT_VIRTUAL;
+		have_rrbs = pcibr_soft->bs_rrb_valid[pciio_slot];
+		if (have_rrbs < 2) {
+		    if (pci_addr & PCI64_ATTR_PREF)
+			min_rrbs = 2;
+		    else
+			min_rrbs = 1;
+		    if (have_rrbs < min_rrbs)
+			do_pcibr_rrb_autoalloc(pcibr_soft, pciio_slot, min_rrbs - have_rrbs);
+		}
+	    }
+#if PCIBR_DMA_DEBUG
+#if HWG_PERF_CHECK
+	    if (xio_addr != 0x20000000)
+#endif
+		printk("pcibr_dmatrans_addr:\n"
+			"\tpciio connection point %v\n"
+			"\txtalk connection point %v\n"
+			"\twanted paddr [0x%x..0x%x]\n"
+			"\txtalk_dmatrans_addr returned 0x%x\n"
+			"\tdirect 64bit address is 0x%x\n"
+			"\tnew flags: 0x%x\n",
+			pconn_vhdl, xconn_vhdl,
+			paddr, paddr + req_size - 1,
+			xio_addr, pci_addr, (uint64_t) flags);
+#endif
+	    return (pci_addr);
+	}
+	/* our flags conflict with Device(x).
+	 */
+	flags = flags
+	    & ~PCIIO_DMA_A64
+	    & ~PCIBR_VCHAN0
+	    ;
+
+#if PCIBR_DMA_DEBUG
+	printk("pcibr_dmatrans_addr:\n"
+		"\tpciio connection point %v\n"
+		"\txtalk connection point %v\n"
+		"\twanted paddr [0x%x..0x%x]\n"
+		"\txtalk_dmatrans_addr returned 0x%x\n"
+		"\tUnable to set Device(x) bits for Direct-64\n",
+		pconn_vhdl, xconn_vhdl,
+		paddr, paddr + req_size - 1,
+		xio_addr);
+#endif
+    }
+    /* Try to satisfy the request with the 32-bit direct
+     * map. This can fail if the configuration bits in
+     * Device(x) conflict with our flags, or if the
+     * target address is outside where DIR_OFF points.
+     */
+    {
+	size_t                  map_size = 1ULL << 31;
+	iopaddr_t               xio_base = pcibr_soft->bs_dir_xbase;
+	iopaddr_t               offset = xio_addr - xio_base;
+	iopaddr_t               endoff = req_size + offset;
+
+	if ((req_size > map_size) ||
+	    (xio_addr < xio_base) ||
+	    (xio_port != pcibr_soft->bs_dir_xport) ||
+	    (endoff > map_size)) {
+#if PCIBR_DMA_DEBUG
+	    printk("pcibr_dmatrans_addr:\n"
+		    "\tpciio connection point %v\n"
+		    "\txtalk connection point %v\n"
+		    "\twanted paddr [0x%x..0x%x]\n"
+		    "\txtalk_dmatrans_addr returned 0x%x\n"
+		    "\txio region outside direct32 target\n",
+		    pconn_vhdl, xconn_vhdl,
+		    paddr, paddr + req_size - 1,
+		    xio_addr);
+#endif
+	} else {
+	    pci_addr = slotp->bss_d32_base;
+	    if ((pci_addr != PCIBR_D32_BASE_UNSET) &&
+		(flags == slotp->bss_d32_flags)) {
+
+		pci_addr |= offset;
+
+#if DEBUG && PCIBR_DMA_DEBUG
+		printk("pcibr_dmatrans_addr: [reuse]\n"
+			"\tpciio connection point %v\n"
+			"\txtalk connection point %v\n"
+			"\twanted paddr [0x%x..0x%x]\n"
+			"\txtalk_dmatrans_addr returned 0x%x\n"
+			"\tmapped via direct32 offset 0x%x\n"
+			"\twill DMA via pci addr 0x%x\n",
+			pconn_vhdl, xconn_vhdl,
+			paddr, paddr + req_size - 1,
+			xio_addr, offset, pci_addr);
+#endif
+		return (pci_addr);
+	    }
+	    if (!pcibr_try_set_device(pcibr_soft, pciio_slot, flags, BRIDGE_DEV_D32_BITS)) {
+
+		pci_addr = PCI32_DIRECT_BASE;
+		slotp->bss_d32_flags = flags;
+		slotp->bss_d32_base = pci_addr;
+		pci_addr |= offset;
+
+		/* Make sure we have an RRB (or two).
+		 */
+		if (!(pcibr_soft->bs_rrb_fixed & (1 << pciio_slot))) {
+		    have_rrbs = pcibr_soft->bs_rrb_valid[pciio_slot];
+		    if (have_rrbs < 2) {
+			if (slotp->bss_device & BRIDGE_DEV_PREF)
+			    min_rrbs = 2;
+			else
+			    min_rrbs = 1;
+			if (have_rrbs < min_rrbs)
+			    do_pcibr_rrb_autoalloc(pcibr_soft, pciio_slot, min_rrbs - have_rrbs);
+		    }
+		}
+#if PCIBR_DMA_DEBUG
+#if HWG_PERF_CHECK
+		if (xio_addr != 0x20000000)
+#endif
+		    printk("pcibr_dmatrans_addr:\n"
+			    "\tpciio connection point %v\n"
+			    "\txtalk connection point %v\n"
+			    "\twanted paddr [0x%x..0x%x]\n"
+			    "\txtalk_dmatrans_addr returned 0x%x\n"
+			    "\tmapped via direct32 offset 0x%x\n"
+			    "\twill DMA via pci addr 0x%x\n"
+			    "\tnew flags: 0x%x\n",
+			    pconn_vhdl, xconn_vhdl,
+			    paddr, paddr + req_size - 1,
+			    xio_addr, offset, pci_addr, (uint64_t) flags);
+#endif
+		return (pci_addr);
+	    }
+	    /* our flags conflict with Device(x).
+	     */
+#if PCIBR_DMA_DEBUG
+	    printk("pcibr_dmatrans_addr:\n"
+		    "\tpciio connection point %v\n"
+		    "\txtalk connection point %v\n"
+		    "\twanted paddr [0x%x..0x%x]\n"
+		    "\txtalk_dmatrans_addr returned 0x%x\n"
+		    "\tUnable to set Device(x) bits for Direct-32\n",
+		    pconn_vhdl, xconn_vhdl,
+		    paddr, paddr + req_size - 1,
+		    xio_addr);
+#endif
+	}
+    }
+
+#if PCIBR_DMA_DEBUG
+    printk("pcibr_dmatrans_addr:\n"
+	    "\tpciio connection point %v\n"
+	    "\txtalk connection point %v\n"
+	    "\twanted paddr [0x%x..0x%x]\n"
+	    "\txtalk_dmatrans_addr returned 0x%x\n"
+	    "\tno acceptable PCI address found or constructable\n",
+	    pconn_vhdl, xconn_vhdl,
+	    paddr, paddr + req_size - 1,
+	    xio_addr);
+#endif
+
+    return 0;
+}
+
+/*ARGSUSED */
+alenlist_t
+pcibr_dmatrans_list(devfs_handle_t pconn_vhdl,
+		    device_desc_t dev_desc,
+		    alenlist_t palenlist,
+		    unsigned flags)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    devfs_handle_t            xconn_vhdl = pcibr_soft->bs_conn;
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_slot_t       slotp = &pcibr_soft->bs_slot[pciio_slot];
+    xwidgetnum_t            xio_port;
+
+    alenlist_t              pciio_alenlist = 0;
+    alenlist_t              xtalk_alenlist = 0;
+
+    int                     inplace;
+    unsigned                direct64;
+    unsigned                al_flags;
+
+    iopaddr_t               xio_base;
+    alenaddr_t              xio_addr;
+    size_t                  xio_size;
+
+    size_t                  map_size;
+    iopaddr_t               pci_base;
+    alenaddr_t              pci_addr;
+
+    unsigned                relbits = 0;
+
+    /* merge in forced flags */
+    flags |= pcibr_soft->bs_dma_flags;
+
+    inplace = flags & PCIIO_INPLACE;
+    direct64 = flags & PCIIO_DMA_A64;
+    al_flags = (flags & PCIIO_NOSLEEP) ? AL_NOSLEEP : 0;
+
+    if (direct64) {
+	map_size = 1ull << 48;
+	xio_base = 0;
+	pci_base = slotp->bss_d64_base;
+	if ((pci_base != PCIBR_D64_BASE_UNSET) &&
+	    (flags == slotp->bss_d64_flags)) {
+	    /* reuse previous base info */
+	} else if (pcibr_try_set_device(pcibr_soft, pciio_slot, flags, BRIDGE_DEV_D64_BITS) < 0) {
+	    /* DMA configuration conflict */
+	    goto fail;
+	} else {
+	    relbits = BRIDGE_DEV_D64_BITS;
+	    pci_base =
+		pcibr_flags_to_d64(flags, pcibr_soft);
+	}
+    } else {
+	xio_base = pcibr_soft->bs_dir_xbase;
+	map_size = 1ull << 31;
+	pci_base = slotp->bss_d32_base;
+	if ((pci_base != PCIBR_D32_BASE_UNSET) &&
+	    (flags == slotp->bss_d32_flags)) {
+	    /* reuse previous base info */
+	} else if (pcibr_try_set_device(pcibr_soft, pciio_slot, flags, BRIDGE_DEV_D32_BITS) < 0) {
+	    /* DMA configuration conflict */
+	    goto fail;
+	} else {
+	    relbits = BRIDGE_DEV_D32_BITS;
+	    pci_base = PCI32_DIRECT_BASE;
+	}
+    }
+
+    xtalk_alenlist = xtalk_dmatrans_list(xconn_vhdl, 0, palenlist,
+					 flags & DMAMAP_FLAGS);
+    if (!xtalk_alenlist)
+	goto fail;
+
+    alenlist_cursor_init(xtalk_alenlist, 0, NULL);
+
+    if (inplace) {
+	pciio_alenlist = xtalk_alenlist;
+    } else {
+	pciio_alenlist = alenlist_create(al_flags);
+	if (!pciio_alenlist)
+	    goto fail;
+    }
+
+    while (ALENLIST_SUCCESS ==
+	   alenlist_get(xtalk_alenlist, NULL, 0,
+			&xio_addr, &xio_size, al_flags)) {
+
+	/*
+	 * find which XIO port this goes to.
+	 */
+	if (XIO_PACKED(xio_addr)) {
+	    if (xio_addr == XIO_NOWHERE) {
+#if PCIBR_DMA_DEBUG
+		printk("pcibr_dmatrans_addr:\n"
+			"\tpciio connection point %v\n"
+			"\txtalk connection point %v\n"
+			"\twanted paddr [0x%x..0x%x]\n"
+			"\txtalk_dmatrans_addr returned 0x%x\n",
+			pconn_vhdl, xconn_vhdl,
+			paddr, paddr + req_size - 1,
+			xio_addr);
+#endif
+		return 0;
+	    }
+	    xio_port = XIO_PORT(xio_addr);
+	    xio_addr = XIO_ADDR(xio_addr);
+	} else
+	    xio_port = pcibr_soft->bs_mxid;
+
+	/*
+	 * If this DMA comes back to us,
+	 * return the PCI MEM address on
+	 * which it would land, or NULL
+	 * if the target is something
+	 * on bridge other than PCI MEM.
+	 */
+	if (xio_port == pcibr_soft->bs_xid) {
+	    pci_addr = pcibr_addr_xio_to_pci(pcibr_soft, xio_addr, xio_size);
+#ifdef IRIX
+	    if (pci_addr == NULL)
+#else
+	    if ( (pci_addr == (alenaddr_t)NULL) )
+#endif
+		goto fail;
+	} else if (direct64) {
+	    ASSERT(xio_port != 0);
+	    pci_addr = pci_base | xio_addr
+		| ((uint64_t) xio_port << PCI64_ATTR_TARG_SHFT);
+	} else {
+	    iopaddr_t               offset = xio_addr - xio_base;
+	    iopaddr_t               endoff = xio_size + offset;
+
+	    if ((xio_size > map_size) ||
+		(xio_addr < xio_base) ||
+		(xio_port != pcibr_soft->bs_dir_xport) ||
+		(endoff > map_size))
+		goto fail;
+
+	    pci_addr = pci_base + (xio_addr - xio_base);
+	}
+
+	/* write the PCI DMA address
+	 * out to the scatter-gather list.
+	 */
+	if (inplace) {
+	    if (ALENLIST_SUCCESS !=
+		alenlist_replace(pciio_alenlist, NULL,
+				 &pci_addr, &xio_size, al_flags))
+		goto fail;
+	} else {
+	    if (ALENLIST_SUCCESS !=
+		alenlist_append(pciio_alenlist,
+				pci_addr, xio_size, al_flags))
+		goto fail;
+	}
+    }
+
+#ifdef IRIX
+    if (relbits)
+#else
+    if (relbits) {
+#endif
+	if (direct64) {
+	    slotp->bss_d64_flags = flags;
+	    slotp->bss_d64_base = pci_base;
+	} else {
+	    slotp->bss_d32_flags = flags;
+	    slotp->bss_d32_base = pci_base;
+	}
+#ifndef IRIX
+    }
+#endif
+    if (!inplace)
+	alenlist_done(xtalk_alenlist);
+
+    /* Reset the internal cursor of the alenlist to be returned back
+     * to the caller.
+     */
+    alenlist_cursor_init(pciio_alenlist, 0, NULL);
+    return pciio_alenlist;
+
+  fail:
+    if (relbits)
+	pcibr_release_device(pcibr_soft, pciio_slot, relbits);
+    if (pciio_alenlist && !inplace)
+	alenlist_destroy(pciio_alenlist);
+    return 0;
+}
+
+void
+pcibr_dmamap_drain(pcibr_dmamap_t map)
+{
+    xtalk_dmamap_drain(map->bd_xtalk);
+}
+
+void
+pcibr_dmaaddr_drain(devfs_handle_t pconn_vhdl,
+		    paddr_t paddr,
+		    size_t bytes)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    devfs_handle_t            xconn_vhdl = pcibr_soft->bs_conn;
+
+    xtalk_dmaaddr_drain(xconn_vhdl, paddr, bytes);
+}
+
+void
+pcibr_dmalist_drain(devfs_handle_t pconn_vhdl,
+		    alenlist_t list)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    devfs_handle_t            xconn_vhdl = pcibr_soft->bs_conn;
+
+    xtalk_dmalist_drain(xconn_vhdl, list);
+}
+
+/*
+ * Get the starting PCIbus address out of the given DMA map.
+ * This function is supposed to be used by a close friend of PCI bridge
+ * since it relies on the fact that the starting address of the map is fixed at
+ * the allocation time in the current implementation of PCI bridge.
+ */
+iopaddr_t
+pcibr_dmamap_pciaddr_get(pcibr_dmamap_t pcibr_dmamap)
+{
+    return (pcibr_dmamap->bd_pci_addr);
+}
+
+/* =====================================================================
+ *    INTERRUPT MANAGEMENT
+ */
+
+static unsigned
+pcibr_intr_bits(pciio_info_t info,
+		pciio_intr_line_t lines)
+{
+    pciio_slot_t            slot = pciio_info_slot_get(info);
+    unsigned		    bbits = 0;
+
+    /*
+     * Currently favored mapping from PCI
+     * slot number and INTA/B/C/D to Bridge
+     * PCI Interrupt Bit Number:
+     *
+     *     SLOT     A B C D
+     *      0       0 4 0 4
+     *      1       1 5 1 5
+     *      2       2 6 2 6
+     *      3       3 7 3 7
+     *      4       4 0 4 0
+     *      5       5 1 5 1
+     *      6       6 2 6 2
+     *      7       7 3 7 3
+     */
+
+    if (slot < 8) {
+	if (lines & (PCIIO_INTR_LINE_A| PCIIO_INTR_LINE_C))
+	    bbits |= 1 << slot;
+	if (lines & (PCIIO_INTR_LINE_B| PCIIO_INTR_LINE_D))
+	    bbits |= 1 << (slot ^ 4);
+    }
+    return bbits;
+}
+
+#ifdef IRIX
+/* Wrapper for pcibr interrupt threads. */
+static void
+pcibr_intrd(pcibr_intr_t intr)
+{
+	/* Called on each restart */
+	ASSERT(cpuid() == intr->bi_mustruncpu);
+
+#ifdef ITHREAD_LATENCY
+	xthread_update_latstats(intr->bi_tinfo->thd_latstats);
+#endif /* ITHREAD_LATENCY */
+
+	ASSERT(intr->bi_func != NULL);
+	intr->bi_func(intr->bi_arg);		/* Invoke the interrupt handler */
+
+	ipsema(&intr->bi_tinfo.thd_isync);	/* Sleep 'till next interrupt */
+	/* NOTREACHED */
+}
+
+
+static void
+pcibr_intrd_start(pcibr_intr_t intr)
+{
+	ASSERT(intr->bi_mustruncpu >= 0);
+	setmustrun(intr->bi_mustruncpu);
+
+	xthread_set_func(KT_TO_XT(curthreadp), (xt_func_t *)pcibr_intrd, (void *)intr);
+	atomicSetInt(&intr->bi_tinfo.thd_flags, THD_INIT);
+	ipsema(&intr->bi_tinfo.thd_isync);  /* Comes out in pcibr_intrd */
+	/* NOTREACHED */
+}
+
+
+static void
+pcibr_thread_setup(pcibr_intr_t intr, int bridge_levels, ilvl_t intr_swlevel)
+{
+	char thread_name[32];
+
+	sprintf(thread_name, "pcibr_intrd[0x%x]", bridge_levels);
+
+	/* XXX need to adjust priority whenever an interrupt is connected */
+	atomicSetInt(&intr->bi_tinfo.thd_flags, THD_ISTHREAD | THD_REG);
+	xthread_setup(thread_name, intr_swlevel, &intr->bi_tinfo,
+			(xt_func_t *)pcibr_intrd_start,
+			(void *)intr);
+}
+#endif	/* IRIX */
+
+
+
+/*ARGSUSED */
+pcibr_intr_t
+pcibr_intr_alloc(devfs_handle_t pconn_vhdl,
+		 device_desc_t dev_desc,
+		 pciio_intr_line_t lines,
+		 devfs_handle_t owner_dev)
+{
+    pcibr_info_t            pcibr_info = pcibr_info_get(pconn_vhdl);
+    pciio_slot_t            pciio_slot = pcibr_info->f_slot;
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pcibr_info->f_mfast;
+    devfs_handle_t            xconn_vhdl = pcibr_soft->bs_conn;
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    int                     is_threaded;
+    int                     thread_swlevel;
+
+    xtalk_intr_t           *xtalk_intr_p;
+    pcibr_intr_t           *pcibr_intr_p;
+    pcibr_intr_list_t      *intr_list_p;
+    pcibr_intr_wrap_t      *intr_wrap_p;
+
+    unsigned                pcibr_int_bits;
+    unsigned                pcibr_int_bit;
+    xtalk_intr_t            xtalk_intr = (xtalk_intr_t)0;
+    hub_intr_t		    hub_intr;
+    pcibr_intr_t            pcibr_intr;
+    pcibr_intr_list_t       intr_entry;
+    pcibr_intr_list_t       intr_list;
+    pcibr_intr_wrap_t       intr_wrap;
+    bridgereg_t             int_dev;
+
+#if DEBUG && INTR_DEBUG
+    printk("%v: pcibr_intr_alloc\n"
+	    "%v:%s%s%s%s%s\n",
+	    owner_dev, pconn_vhdl,
+	    !(lines & 15) ? " No INTs?" : "",
+	    lines & 1 ? " INTA" : "",
+	    lines & 2 ? " INTB" : "",
+	    lines & 4 ? " INTC" : "",
+	    lines & 8 ? " INTD" : "");
+#endif
+
+    NEW(pcibr_intr);
+    if (!pcibr_intr)
+	return NULL;
+
+    if (dev_desc) {
+	is_threaded = !(device_desc_flags_get(dev_desc) & D_INTR_NOTHREAD);
+	if (is_threaded)
+		thread_swlevel = device_desc_intr_swlevel_get(dev_desc);
+    } else {
+	extern int default_intr_pri;
+
+	is_threaded = 1; /* PCI interrupts are threaded, by default */
+	thread_swlevel = default_intr_pri;
+    }
+
+    pcibr_intr->bi_dev = pconn_vhdl;
+    pcibr_intr->bi_lines = lines;
+    pcibr_intr->bi_soft = pcibr_soft;
+    pcibr_intr->bi_ibits = 0;		/* bits will be added below */
+    pcibr_intr->bi_func = 0;		/* unset until connect */
+    pcibr_intr->bi_arg = 0;		/* unset until connect */
+    pcibr_intr->bi_flags = is_threaded ? 0 : PCIIO_INTR_NOTHREAD;
+    pcibr_intr->bi_mustruncpu = CPU_NONE;
+
+    pcibr_int_bits = pcibr_soft->bs_intr_bits((pciio_info_t)pcibr_info, lines);
+
+
+    /*
+     * For each PCI interrupt line requested, figure
+     * out which Bridge PCI Interrupt Line it maps
+     * to, and make sure there are xtalk resources
+     * allocated for it.
+     */
+#if DEBUG && INTR_DEBUG
+    printk("pcibr_int_bits: 0x%X\n", pcibr_int_bits);
+#endif 
+    for (pcibr_int_bit = 0; pcibr_int_bit < 8; pcibr_int_bit ++) {
+	if (pcibr_int_bits & (1 << pcibr_int_bit)) {
+	    xtalk_intr_p = &pcibr_soft->bs_intr[pcibr_int_bit].bsi_xtalk_intr;
+
+	    xtalk_intr = *xtalk_intr_p;
+
+	    if (xtalk_intr == NULL) {
+		/*
+		 * This xtalk_intr_alloc is constrained for two reasons:
+		 * 1) Normal interrupts and error interrupts need to be delivered
+		 *    through a single xtalk target widget so that there aren't any
+		 *    ordering problems with DMA, completion interrupts, and error
+		 *    interrupts. (Use of xconn_vhdl forces this.)
+		 *
+		 * 2) On IP35, addressing constraints on IP35 and Bridge force
+		 *    us to use a single PI number for all interrupts from a
+		 *    single Bridge. (IP35-specific code forces this, and we
+		 *    verify in pcibr_setwidint.)
+		 */
+		xtalk_intr = xtalk_intr_alloc(xconn_vhdl, dev_desc, owner_dev);
+#if DEBUG && INTR_DEBUG
+		printk("%v: xtalk_intr=0x%X\n", xconn_vhdl, xtalk_intr);
+#endif
+
+		/* both an assert and a runtime check on this:
+		 * we need to check in non-DEBUG kernels, and
+		 * the ASSERT gets us more information when
+		 * we use DEBUG kernels.
+		 */
+		ASSERT(xtalk_intr != NULL);
+		if (xtalk_intr == NULL) {
+		    /* it is quite possible that our
+		     * xtalk_intr_alloc failed because
+		     * someone else got there first,
+		     * and we can find their results
+		     * in xtalk_intr_p.
+		     */
+		    if (!*xtalk_intr_p) {
+#ifdef SUPPORT_PRINTING_V_FORMAT
+			PRINT_ALERT(
+				"pcibr_intr_alloc %v: unable to get xtalk interrupt resources",
+				xconn_vhdl);
+#endif
+			/* yes, we leak resources here. */
+			return 0;
+		    }
+		} else if (compare_and_swap_ptr((void **) xtalk_intr_p, NULL, xtalk_intr)) {
+		    /*
+		     * now tell the bridge which slot is
+		     * using this interrupt line.
+		     */
+		    int_dev = bridge->b_int_device;
+		    int_dev &= ~BRIDGE_INT_DEV_MASK(pcibr_int_bit);
+		    int_dev |= pciio_slot << BRIDGE_INT_DEV_SHFT(pcibr_int_bit);
+		    bridge->b_int_device = int_dev;	/* XXXMP */
+
+#if DEBUG && INTR_DEBUG
+		    printk("%v: bridge intr bit %d clears my wrb\n",
+			    pconn_vhdl, pcibr_int_bit);
+#endif
+		} else {
+		    /* someone else got one allocated first;
+		     * free the one we just created, and
+		     * retrieve the one they allocated.
+		     */
+		    xtalk_intr_free(xtalk_intr);
+		    xtalk_intr = *xtalk_intr_p;
+#if PARANOID
+		    /* once xtalk_intr is set, we never clear it,
+		     * so if the CAS fails above, this condition
+		     * can "never happen" ...
+		     */
+		    if (!xtalk_intr) {
+			PRINT_ALERT(
+				"pcibr_intr_alloc %v: unable to set xtalk interrupt resources",
+				xconn_vhdl);
+			/* yes, we leak resources here. */
+			return 0;
+		    }
+#endif
+		}
+	    }
+
+	    /*
+	     * For threaded drivers, set the interrupt thread to run wherever
+	     * the interrupt is targeted.
+	     */
+#ifdef notyet
+	    if (is_threaded) {
+		cpuid_t old_mustrun = pcibr_intr->bi_mustruncpu;
+		pcibr_intr->bi_mustruncpu = cpuvertex_to_cpuid(xtalk_intr_cpu_get(xtalk_intr));
+		ASSERT(pcibr_intr->bi_mustruncpu >= 0);
+
+		/*
+		 * This is possible, but very unlikely: It means that 2 (or more) interrupts
+		 * originating on a single Bridge and used by a single device were unable to
+		 * find sufficient xtalk interrupt resources that would allow them all to be
+		 * handled by the same CPU.  If someone tries to target lots of interrupts to
+		 * a single CPU, we might hit this case.  Things should still operate correctly,
+		 * but it's a sub-optimal configuration.
+		 */
+		if ((old_mustrun != CPU_NONE) && (old_mustrun != pcibr_intr->bi_mustruncpu)) {
+#ifdef SUPPORT_PRINTING_V_FORMAT
+			PRINT_WARNING( "Conflict on where to schedule interrupts for %v\n", pconn_vhdl);
+#endif
+			PRINT_WARNING( "(on cpu %d or on cpu %d)\n", old_mustrun, pcibr_intr->bi_mustruncpu);
+		}
+	    }
+#endif
+
+	    pcibr_intr->bi_ibits |= 1 << pcibr_int_bit;
+
+	    NEW(intr_entry);
+	    intr_entry->il_next = NULL;
+	    intr_entry->il_intr = pcibr_intr;
+	    intr_entry->il_wrbf = &(bridge->b_wr_req_buf[pciio_slot].reg);
+
+	    intr_list_p = &pcibr_soft->bs_intr[pcibr_int_bit].bsi_pcibr_intr_list;
+	    if (compare_and_swap_ptr((void **) intr_list_p, NULL, intr_entry)) {
+		/* we are the first interrupt on this bridge bit.
+		 */
+#if DEBUG && INTR_DEBUG
+		printk("%v INT 0x%x (bridge bit %d) allocated [FIRST]\n",
+			pconn_vhdl, pcibr_int_bits, pcibr_int_bit);
+#endif
+		continue;
+	    }
+	    intr_list = *intr_list_p;
+	    pcibr_intr_p = &intr_list->il_intr;
+	    if (compare_and_swap_ptr((void **) pcibr_intr_p, NULL, pcibr_intr)) {
+		/* first entry on list was erased,
+		 * and we replaced it, so we
+		 * don't need our intr_entry.
+		 */
+		DEL(intr_entry);
+#if DEBUG && INTR_DEBUG
+		printk("%v INT 0x%x (bridge bit %d) replaces erased first\n",
+			pconn_vhdl, pcibr_int_bits, pcibr_int_bit);
+#endif
+		continue;
+	    }
+	    intr_list_p = &intr_list->il_next;
+	    if (compare_and_swap_ptr((void **) intr_list_p, NULL, intr_entry)) {
+		/* we are the new second interrupt on this bit.
+		 * switch to local wrapper.
+		 */
+#if DEBUG && INTR_DEBUG
+		printk("%v INT 0x%x (bridge bit %d) is new SECOND\n",
+			pconn_vhdl, pcibr_int_bits, pcibr_int_bit);
+#endif
+		NEW(intr_wrap);
+		intr_wrap->iw_soft = pcibr_soft;
+		intr_wrap->iw_stat = &(bridge->b_int_status);
+		intr_wrap->iw_intr = 1 << pcibr_int_bit;
+		intr_wrap->iw_list = intr_list;
+		intr_wrap_p = &pcibr_soft->bs_intr[pcibr_int_bit].bsi_pcibr_intr_wrap;
+		if (!compare_and_swap_ptr((void **) intr_wrap_p, NULL, intr_wrap)) {
+		    /* someone else set up the wrapper.
+		     */
+		    DEL(intr_wrap);
+		    continue;
+#if DEBUG && INTR_DEBUG
+		} else {
+		    printk("%v bridge bit %d wrapper state created\n",
+			    pconn_vhdl, pcibr_int_bit);
+#endif
+		}
+		continue;
+	    }
+	    while (1) {
+		pcibr_intr_p = &intr_list->il_intr;
+		if (compare_and_swap_ptr((void **) pcibr_intr_p, NULL, pcibr_intr)) {
+		    /* an entry on list was erased,
+		     * and we replaced it, so we
+		     * don't need our intr_entry.
+		     */
+		    DEL(intr_entry);
+#if DEBUG && INTR_DEBUG
+		    printk("%v INT 0x%x (bridge bit %d) replaces erased Nth\n",
+			    pconn_vhdl, pcibr_int_bits, pcibr_int_bit);
+#endif
+		    break;
+		}
+		intr_list_p = &intr_list->il_next;
+		if (compare_and_swap_ptr((void **) intr_list_p, NULL, intr_entry)) {
+		    /* entry appended to share list
+		     */
+#if DEBUG && INTR_DEBUG
+		    printk("%v INT 0x%x (bridge bit %d) is new Nth\n",
+			    pconn_vhdl, pcibr_int_bits, pcibr_int_bit);
+#endif
+		    break;
+		}
+		/* step to next record in chain
+		 */
+		intr_list = *intr_list_p;
+	    }
+	}
+    }
+
+#ifdef IRIX
+    if (is_threaded) {
+	/* Set pcibr_intr->bi_tinfo */
+	pcibr_thread_setup(pcibr_intr, pcibr_int_bits, thread_swlevel);
+	ASSERT(!(pcibr_intr->bi_flags & PCIIO_INTR_CONNECTED));
+    }
+#endif
+
+#if DEBUG && INTR_DEBUG
+    printk("%v pcibr_intr_alloc complete\n", pconn_vhdl);
+#endif
+    hub_intr = (hub_intr_t)xtalk_intr;
+    pcibr_intr->bi_irq = hub_intr->i_bit;
+    pcibr_intr->bi_cpu = hub_intr->i_cpuid;
+    return pcibr_intr;
+}
+
+/*ARGSUSED */
+void
+pcibr_intr_free(pcibr_intr_t pcibr_intr)
+{
+    unsigned                pcibr_int_bits = pcibr_intr->bi_ibits;
+    pcibr_soft_t            pcibr_soft = pcibr_intr->bi_soft;
+    unsigned                pcibr_int_bit;
+    pcibr_intr_list_t       intr_list;
+    pcibr_intr_wrap_t	    intr_wrap;
+    xtalk_intr_t	    *xtalk_intrp;
+
+    for (pcibr_int_bit = 0; pcibr_int_bit < 8; pcibr_int_bit++) {
+	if (pcibr_int_bits & (1 << pcibr_int_bit)) {
+	    for (intr_list = 
+		     pcibr_soft->bs_intr[pcibr_int_bit].bsi_pcibr_intr_list;
+		 intr_list != NULL;
+		 intr_list = intr_list->il_next)
+		if (compare_and_swap_ptr((void **) &intr_list->il_intr, 
+					 pcibr_intr, 
+					 NULL)) {
+#if DEBUG && INTR_DEBUG
+		    printk("%s: cleared a handler from bit %d\n",
+			    pcibr_soft->bs_name, pcibr_int_bit);
+#endif
+		}
+	    /* If this interrupt line is not being shared between multiple
+	     * devices release the xtalk interrupt resources.
+	     */
+	    intr_wrap = 
+		pcibr_soft->bs_intr[pcibr_int_bit].bsi_pcibr_intr_wrap;
+	    xtalk_intrp = &pcibr_soft->bs_intr[pcibr_int_bit].bsi_xtalk_intr;
+	    if ((intr_wrap == NULL) && (*xtalk_intrp)) {
+
+		bridge_t 	*bridge = pcibr_soft->bs_base;
+		bridgereg_t	int_dev;
+
+		xtalk_intr_free(*xtalk_intrp);
+		*xtalk_intrp = 0;
+
+		/* Clear the PCI device interrupt to bridge interrupt pin
+		 * mapping.
+		 */
+		int_dev = bridge->b_int_device;
+		int_dev &= ~BRIDGE_INT_DEV_MASK(pcibr_int_bit);
+		bridge->b_int_device = int_dev;
+
+	    }
+	}
+    }
+    DEL(pcibr_intr);
+}
+
+LOCAL void
+pcibr_setpciint(xtalk_intr_t xtalk_intr)
+{
+    iopaddr_t               addr = xtalk_intr_addr_get(xtalk_intr);
+    xtalk_intr_vector_t     vect = xtalk_intr_vector_get(xtalk_intr);
+    bridgereg_t            *int_addr = (bridgereg_t *)
+    xtalk_intr_sfarg_get(xtalk_intr);
+
+    *int_addr = ((BRIDGE_INT_ADDR_HOST & (addr >> 30)) |
+		 (BRIDGE_INT_ADDR_FLD & vect));
+}
+
+/*ARGSUSED */
+int
+pcibr_intr_connect(pcibr_intr_t pcibr_intr,
+		   intr_func_t intr_func,
+		   intr_arg_t intr_arg,
+		   void *thread)
+{
+    pcibr_soft_t            pcibr_soft = pcibr_intr->bi_soft;
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    unsigned                pcibr_int_bits = pcibr_intr->bi_ibits;
+    unsigned                pcibr_int_bit;
+    bridgereg_t             b_int_enable;
+    unsigned                s;
+
+    if (pcibr_intr == NULL)
+	return -1;
+
+#if DEBUG && INTR_DEBUG
+    printk("%v: pcibr_intr_connect 0x%X(0x%X)\n",
+	    pcibr_intr->bi_dev, intr_func, intr_arg);
+#endif
+
+    pcibr_intr->bi_func = intr_func;
+    pcibr_intr->bi_arg = intr_arg;
+    *((volatile unsigned *)&pcibr_intr->bi_flags) |= PCIIO_INTR_CONNECTED;
+
+    /*
+     * For each PCI interrupt line requested, figure
+     * out which Bridge PCI Interrupt Line it maps
+     * to, and make sure there are xtalk resources
+     * allocated for it.
+     */
+    for (pcibr_int_bit = 0; pcibr_int_bit < 8; pcibr_int_bit++)
+	if (pcibr_int_bits & (1 << pcibr_int_bit)) {
+	    pcibr_intr_wrap_t       intr_wrap;
+	    xtalk_intr_t            xtalk_intr;
+	    int                    *setptr;
+
+	    xtalk_intr = pcibr_soft->bs_intr[pcibr_int_bit].bsi_xtalk_intr;
+
+	    /* if we have no wrap structure,
+	     * tell xtalk to deliver the interrupt
+	     * directly to the client.
+	     */
+	    intr_wrap = pcibr_soft->bs_intr[pcibr_int_bit].bsi_pcibr_intr_wrap;
+	    if (intr_wrap == NULL) {
+		xtalk_intr_connect(xtalk_intr,
+				   (intr_func_t) intr_func,
+				   (intr_arg_t) intr_arg,
+				   (xtalk_intr_setfunc_t) pcibr_setpciint,
+				   (void *) &(bridge->b_int_addr[pcibr_int_bit].addr),
+				   thread);
+#if DEBUG && INTR_DEBUG
+		printk("%v bridge bit %d routed by xtalk\n",
+			pcibr_intr->bi_dev, pcibr_int_bit);
+#endif
+		continue;
+	    }
+
+	    setptr = &pcibr_soft->bs_intr[pcibr_int_bit].bsi_pcibr_wrap_set;
+	    if (*setptr)
+		continue;
+
+
+	    /* We have a wrap structure, so we're sharing a Bridge interrupt level */
+
+	    xtalk_intr_disconnect(xtalk_intr); /* Disconnect old interrupt */
+
+	    /*
+		If the existing xtalk_intr was allocated without the NOTHREAD flag,
+		we need to allocate a new one that's NOTHREAD, and connect to the
+		new one.   pcibr_intr_list_func expects to run at interrupt level
+		rather than in a thread.  With today's devices, this can't happen,
+		so let's punt on writing the code till we need it (probably never).
+		Instead, just ASSERT that we're a NOTHREAD xtalk_intr.
+	    */
+#ifdef IRIX
+	    ASSERT_ALWAYS(!(pcibr_intr->bi_flags & PCIIO_INTR_NOTHREAD) ||
+			xtalk_intr_flags_get(xtalk_intr) & XTALK_INTR_NOTHREAD);
+#endif
+
+	    /* Use the wrapper dispatch function to handle shared Bridge interrupts */
+	    xtalk_intr_connect(xtalk_intr,
+			       pcibr_intr_list_func,
+			       (intr_arg_t) intr_wrap,
+			       (xtalk_intr_setfunc_t) pcibr_setpciint,
+			       (void *) &(bridge->b_int_addr[pcibr_int_bit].addr),
+			       0);
+	    *setptr = 1;
+
+#if DEBUG && INTR_DEBUG
+	    printk("%v bridge bit %d wrapper connected\n",
+		    pcibr_intr->bi_dev, pcibr_int_bit);
+#endif
+	}
+    s = pcibr_lock(pcibr_soft);
+    b_int_enable = bridge->b_int_enable;
+    b_int_enable |= pcibr_int_bits;
+    bridge->b_int_enable = b_int_enable;
+    bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+    pcibr_unlock(pcibr_soft, s);
+
+    return 0;
+}
+
+/*ARGSUSED */
+void
+pcibr_intr_disconnect(pcibr_intr_t pcibr_intr)
+{
+    pcibr_soft_t            pcibr_soft = pcibr_intr->bi_soft;
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    unsigned                pcibr_int_bits = pcibr_intr->bi_ibits;
+    unsigned                pcibr_int_bit;
+    pcibr_intr_wrap_t       intr_wrap;
+    bridgereg_t             b_int_enable;
+    unsigned                s;
+
+    /* Stop calling the function. Now.
+     */
+    *((volatile unsigned *)&pcibr_intr->bi_flags) &= ~PCIIO_INTR_CONNECTED;
+    pcibr_intr->bi_func = 0;
+    pcibr_intr->bi_arg = 0;
+    /*
+     * For each PCI interrupt line requested, figure
+     * out which Bridge PCI Interrupt Line it maps
+     * to, and disconnect the interrupt.
+     */
+
+    /* don't disable interrupts for lines that
+     * are shared between devices.
+     */
+    for (pcibr_int_bit = 0; pcibr_int_bit < 8; pcibr_int_bit++)
+	if ((pcibr_int_bits & (1 << pcibr_int_bit)) &&
+	    (pcibr_soft->bs_intr[pcibr_int_bit].bsi_pcibr_wrap_set))
+	    pcibr_int_bits &= ~(1 << pcibr_int_bit);
+    if (!pcibr_int_bits)
+	return;
+
+    s = pcibr_lock(pcibr_soft);
+    b_int_enable = bridge->b_int_enable;
+    b_int_enable &= ~pcibr_int_bits;
+    bridge->b_int_enable = b_int_enable;
+    bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+    pcibr_unlock(pcibr_soft, s);
+
+    for (pcibr_int_bit = 0; pcibr_int_bit < 8; pcibr_int_bit++)
+	if (pcibr_int_bits & (1 << pcibr_int_bit)) {
+	    /* if we have set up the share wrapper,
+	     * do not disconnect it.
+	     */
+	    if (pcibr_soft->bs_intr[pcibr_int_bit].bsi_pcibr_wrap_set)
+		continue;
+
+	    xtalk_intr_disconnect(pcibr_soft->bs_intr[pcibr_int_bit].bsi_xtalk_intr);
+
+	    /* if we have a share wrapper state,
+	     * connect us up; this closes the hole
+	     * where the connection of the wrapper
+	     * was in progress as we disconnected.
+	     */
+	    intr_wrap = pcibr_soft->bs_intr[pcibr_int_bit].bsi_pcibr_intr_wrap;
+	    if (intr_wrap == NULL) 
+		continue;
+
+
+	    xtalk_intr_connect(pcibr_soft->bs_intr[pcibr_int_bit].bsi_xtalk_intr,
+			       pcibr_intr_list_func,
+			       (intr_arg_t) intr_wrap,
+			       (xtalk_intr_setfunc_t) pcibr_setpciint,
+			       (void *) &(bridge->b_int_addr[pcibr_int_bit].addr),
+			       0);
+	}
+}
+
+/*ARGSUSED */
+devfs_handle_t
+pcibr_intr_cpu_get(pcibr_intr_t pcibr_intr)
+{
+    pcibr_soft_t            pcibr_soft = pcibr_intr->bi_soft;
+    unsigned                pcibr_int_bits = pcibr_intr->bi_ibits;
+    unsigned                pcibr_int_bit;
+
+    for (pcibr_int_bit = 0; pcibr_int_bit < 8; pcibr_int_bit++)
+	if (pcibr_int_bits & (1 << pcibr_int_bit))
+	    return xtalk_intr_cpu_get(pcibr_soft->bs_intr[pcibr_int_bit].bsi_xtalk_intr);
+    return 0;
+}
+
+/* =====================================================================
+ *    INTERRUPT HANDLING
+ */
+LOCAL void
+pcibr_clearwidint(bridge_t *bridge)
+{
+    bridge->b_wid_int_upper = 0;
+    bridge->b_wid_int_lower = 0;
+}
+
+
+LOCAL void
+pcibr_setwidint(xtalk_intr_t intr)
+{
+    xwidgetnum_t            targ = xtalk_intr_target_get(intr);
+    iopaddr_t               addr = xtalk_intr_addr_get(intr);
+    xtalk_intr_vector_t     vect = xtalk_intr_vector_get(intr);
+    widgetreg_t		    NEW_b_wid_int_upper, NEW_b_wid_int_lower;
+    widgetreg_t		    OLD_b_wid_int_upper, OLD_b_wid_int_lower;
+
+    bridge_t               *bridge = (bridge_t *)xtalk_intr_sfarg_get(intr);
+
+    NEW_b_wid_int_upper = ( (0x000F0000 & (targ << 16)) |
+			       XTALK_ADDR_TO_UPPER(addr));
+    NEW_b_wid_int_lower = XTALK_ADDR_TO_LOWER(addr);
+
+    OLD_b_wid_int_upper = bridge->b_wid_int_upper;
+    OLD_b_wid_int_lower = bridge->b_wid_int_lower;
+
+#if defined(CONFIG_SGI_IP35) || defined(CONFIG_IA64_SGI_SN1) || defined(CONFIG_IA64_GENERIC)
+    /* Verify that all interrupts from this Bridge are using a single PI */
+    if ((OLD_b_wid_int_upper != 0) && (OLD_b_wid_int_lower != 0)) {
+	/*
+	 * Once set, these registers shouldn't change; they should
+	 * be set multiple times with the same values.
+	 *
+	 * If we're attempting to change these registers, it means
+	 * that our heuristics for allocating interrupts in a way
+	 * appropriate for IP35 have failed, and the admin needs to
+	 * explicitly direct some interrupts (or we need to make the
+	 * heuristics more clever).
+	 *
+	 * In practice, we hope this doesn't happen very often, if
+	 * at all.
+	 */
+	if ((OLD_b_wid_int_upper != NEW_b_wid_int_upper) ||
+	    (OLD_b_wid_int_lower != NEW_b_wid_int_lower)) {
+		PRINT_WARNING("Interrupt allocation is too complex.\n");
+		PRINT_WARNING("Use explicit administrative interrupt targetting.\n");
+		PRINT_WARNING("bridge=0x%lx targ=0x%x\n", (unsigned long)bridge, targ);
+		PRINT_WARNING("NEW=0x%x/0x%x  OLD=0x%x/0x%x\n",
+			NEW_b_wid_int_upper, NEW_b_wid_int_lower,
+			OLD_b_wid_int_upper, OLD_b_wid_int_lower);
+		PRINT_PANIC("PCI Bridge interrupt targetting error\n");
+	}
+    }
+#endif /* CONFIG_SGI_IP35 */
+
+    bridge->b_wid_int_upper = NEW_b_wid_int_upper;
+    bridge->b_wid_int_lower = NEW_b_wid_int_lower;
+    bridge->b_int_host_err = vect;
+}
+
+/*
+ * pcibr_intr_preset: called during mlreset time
+ * if the platform specific code needs to route
+ * one of the Bridge's xtalk interrupts before the
+ * xtalk infrastructure is available.
+ */
+void
+pcibr_xintr_preset(void *which_widget,
+		   int which_widget_intr,
+		   xwidgetnum_t targ,
+		   iopaddr_t addr,
+		   xtalk_intr_vector_t vect)
+{
+    bridge_t               *bridge = (bridge_t *) which_widget;
+
+    if (which_widget_intr == -1) {
+	/* bridge widget error interrupt */
+	bridge->b_wid_int_upper = ( (0x000F0000 & (targ << 16)) |
+				   XTALK_ADDR_TO_UPPER(addr));
+	bridge->b_wid_int_lower = XTALK_ADDR_TO_LOWER(addr);
+	bridge->b_int_host_err = vect;
+
+	/* turn on all interrupts except
+	 * the PCI interrupt requests,
+	 * at least at heart.
+	 */
+	bridge->b_int_enable |= ~BRIDGE_IMR_INT_MSK;
+
+    } else {
+	/* routing a pci device interrupt.
+	 * targ and low 38 bits of addr must
+	 * be the same as the already set
+	 * value for the widget error interrupt.
+	 */
+	bridge->b_int_addr[which_widget_intr].addr =
+	    ((BRIDGE_INT_ADDR_HOST & (addr >> 30)) |
+	     (BRIDGE_INT_ADDR_FLD & vect));
+	/*
+	 * now bridge can let it through;
+	 * NB: still should be blocked at
+	 * xtalk provider end, until the service
+	 * function is set.
+	 */
+	bridge->b_int_enable |= 1 << vect;
+    }
+    bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+}
+
+void
+pcibr_intr_list_func(intr_arg_t arg)
+{
+    pcibr_intr_wrap_t       wrap = (pcibr_intr_wrap_t) arg;
+    reg_p                   statp = wrap->iw_stat;
+    bridgereg_t             mask = wrap->iw_intr;
+    reg_p                   wrbf;
+    pcibr_intr_list_t       list;
+    pcibr_intr_t            intr;
+    intr_func_t             func;
+    int                     clearit;
+    int                     thread_count = 0;
+
+    /*
+     * Loop until either
+     * 1) All interrupts have been removed by direct-called interrupt handlers OR
+     * 2) We've woken up at least one interrupt thread that will presumably clear
+     *    Bridge interrupt bits
+     */
+	
+    while ((!thread_count) && (mask & *statp)) {
+	clearit = 1;
+	for (list = wrap->iw_list;
+	     list != NULL;
+	     list = list->il_next) {
+	    if ((intr = list->il_intr) &&
+		(intr->bi_flags & PCIIO_INTR_CONNECTED)) {
+    		int is_threaded;
+
+		ASSERT(intr->bi_func);
+
+		/*
+		 * This device may have initiated write
+		 * requests since the bridge last saw
+		 * an edge on this interrupt input; flushing
+		 * the buffer here should help but may not
+		 * be sufficient if we get more requests after
+		 * the flush, followed by the card deciding
+		 * it wants service, before the interrupt
+		 * handler checks to see if things need
+		 * to be done.
+		 *
+		 * There is a similar race condition if
+		 * an interrupt handler loops around and
+		 * notices further service is requred.
+		 * Perhaps we need to have an explicit
+		 * call that interrupt handlers need to
+		 * do between noticing that DMA to memory
+		 * has completed, but before observing the
+		 * contents of memory?
+		 */
+#ifdef IRIX
+		if (wrbf = list->il_wrbf)
+#else
+		if ((wrbf = list->il_wrbf))
+#endif
+		    (void) *wrbf;	/* write request buffer flush */
+
+		is_threaded = !(intr->bi_flags & PCIIO_INTR_NOTHREAD);
+
+		if (is_threaded) {
+			thread_count++;
+#ifdef IRIX
+			icvsema(&intr->bi_tinfo.thd_isync, intr->bi_tinfo.thd_pri,
+				NULL, NULL, NULL);
+#endif
+		} else {
+			/* Non-threaded.  Call the interrupt handler at interrupt level */
+			func = intr->bi_func;
+			func(intr->bi_arg);
+		}
+
+		clearit = 0;
+	    }
+	}
+
+	/* If there were no handlers,
+	 * disable the interrupt and return.
+	 * It will get enabled again after
+	 * a handler is connected.
+	 * If we don't do this, we would
+	 * sit here and spin through the
+	 * list forever.
+	 */
+	if (clearit) {
+	    pcibr_soft_t            pcibr_soft = wrap->iw_soft;
+	    bridge_t               *bridge = pcibr_soft->bs_base;
+	    bridgereg_t             b_int_enable;
+	    unsigned                s;
+
+	    s = pcibr_lock(pcibr_soft);
+	    b_int_enable = bridge->b_int_enable;
+	    b_int_enable &= ~mask;
+	    bridge->b_int_enable = b_int_enable;
+	    bridge->b_wid_tflush;	/* wait until Bridge PIO complete */
+	    pcibr_unlock(pcibr_soft, s);
+	    return;
+	}
+    }
+}
+
+/* =====================================================================
+ *    ERROR HANDLING
+ */
+
+#ifdef	DEBUG
+#ifdef	ERROR_DEBUG
+#define BRIDGE_PIOERR_TIMEOUT	100	/* Timeout with ERROR_DEBUG defined */
+#else
+#define BRIDGE_PIOERR_TIMEOUT	40	/* Timeout in debug mode  */
+#endif
+#else
+#define BRIDGE_PIOERR_TIMEOUT	1	/* Timeout in non-debug mode                            */
+#endif
+
+LOCAL void
+print_bridge_errcmd(uint32_t cmdword, char *errtype)
+{
+#ifdef SUPPORT_PRINTING_R_FORMAT
+    PRINT_WARNING(
+	    "	Bridge %s error command word register %R",
+	    errtype, cmdword, xio_cmd_bits);
+#else
+    PRINT_WARNING(
+	    "	Bridge %s error command word register 0x%x",
+	    errtype, cmdword);
+#endif
+}
+
+LOCAL char             *pcibr_isr_errs[] =
+{
+    "", "", "", "", "", "", "", "",
+    "08: GIO non-contiguous byte enable in crosstalk packet",
+    "09: PCI to Crosstalk read request timeout",
+    "10: PCI retry operation count exhausted.",
+    "11: PCI bus device select timeout",
+    "12: PCI device reported parity error",
+    "13: PCI Address/Cmd parity error ",
+    "14: PCI Bridge detected parity error",
+    "15: PCI abort condition",
+    "16: SSRAM parity error",
+    "17: LLP Transmitter Retry count wrapped",
+    "18: LLP Transmitter side required Retry",
+    "19: LLP Receiver retry count wrapped",
+    "20: LLP Receiver check bit error",
+    "21: LLP Receiver sequence number error",
+    "22: Request packet overflow",
+    "23: Request operation not supported by bridge",
+    "24: Request packet has invalid address for bridge widget",
+    "25: Incoming request xtalk command word error bit set or invalid sideband",
+    "26: Incoming response xtalk command word error bit set or invalid sideband",
+    "27: Framing error, request cmd data size does not match actual",
+    "28: Framing error, response cmd data size does not match actual",
+    "29: Unexpected response arrived",
+    "30: Access to SSRAM beyond device limits",
+    "31: Multiple errors occurred",
+};
+
+/*
+ * PCI Bridge Error interrupt handling.
+ * This routine gets invoked from system interrupt dispatcher
+ * and is responsible for invoking appropriate error handler,
+ * depending on the type of error.
+ * This IS a duplicate of bridge_errintr defined specfic to IP30.
+ * There are some minor differences in terms of the return value and
+ * parameters passed. One of these two should be removed at some point
+ * of time.
+ */
+/*ARGSUSED */
+void
+pcibr_error_dump(pcibr_soft_t pcibr_soft)
+{
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    bridgereg_t             int_status;
+    int                     i;
+
+    int_status = (bridge->b_int_status & ~BRIDGE_ISR_INT_MSK);
+
+    PRINT_ALERT( "%s PCI BRIDGE ERROR: int_status is 0x%X",
+	    pcibr_soft->bs_name, int_status);
+
+    for (i = PCIBR_ISR_ERR_START; i < PCIBR_ISR_MAX_ERRS; i++) {
+	if (int_status & (1 << i)) {
+	    PRINT_WARNING( "%s", pcibr_isr_errs[i]);
+	}
+    }
+
+    if (int_status & BRIDGE_ISR_XTALK_ERROR) {
+	print_bridge_errcmd(bridge->b_wid_err_cmdword, "");
+
+	PRINT_WARNING("   Bridge error address 0x%lx",
+		(((uint64_t) bridge->b_wid_err_upper << 32) |
+		 bridge->b_wid_err_lower));
+
+	print_bridge_errcmd(bridge->b_wid_aux_err, "Aux");
+
+	if (int_status & (BRIDGE_ISR_BAD_XRESP_PKT | BRIDGE_ISR_RESP_XTLK_ERR)) {
+	    PRINT_WARNING("	Bridge response buffer: dev-num %d buff-num %d addr 0x%lx\n",
+		    ((bridge->b_wid_resp_upper >> 20) & 0x3),
+		    ((bridge->b_wid_resp_upper >> 16) & 0xF),
+		    (((uint64_t) (bridge->b_wid_resp_upper & 0xFFFF) << 32) |
+		     bridge->b_wid_resp_lower));
+	}
+    }
+    if (int_status & BRIDGE_ISR_SSRAM_PERR)
+	PRINT_WARNING("   Bridge SSRAM parity error register 0x%x",
+		bridge->b_ram_perr);
+
+    if (int_status & BRIDGE_ISR_PCIBUS_ERROR) {
+	PRINT_WARNING("   PCI/GIO error upper address register 0x%x",
+		bridge->b_pci_err_upper);
+
+	PRINT_WARNING("   PCI/GIO error lower address register 0x%x",
+		bridge->b_pci_err_lower);
+    }
+    if (int_status & BRIDGE_ISR_ERROR_FATAL) {
+	cmn_err_tag(14, (int)CE_PANIC, "PCI Bridge Error interrupt killed the system");
+	/*NOTREACHED */
+    } else {
+	PRINT_ALERT( "Non-fatal Error in Bridge..");
+    }
+}
+
+#define PCIBR_ERRINTR_GROUP(error)	\
+		(( error & (BRIDGE_IRR_PCI_GRP|BRIDGE_IRR_GIO_GRP)
+
+uint32_t
+pcibr_errintr_group(uint32_t error)
+{
+    uint32_t              group = BRIDGE_IRR_MULTI_CLR;
+
+    if (error & BRIDGE_IRR_PCI_GRP)
+	group |= BRIDGE_IRR_PCI_GRP_CLR;
+    if (error & BRIDGE_IRR_SSRAM_GRP)
+	group |= BRIDGE_IRR_SSRAM_GRP_CLR;
+    if (error & BRIDGE_IRR_LLP_GRP)
+	group |= BRIDGE_IRR_LLP_GRP_CLR;
+    if (error & BRIDGE_IRR_REQ_DSP_GRP)
+	group |= BRIDGE_IRR_REQ_DSP_GRP_CLR;
+    if (error & BRIDGE_IRR_RESP_BUF_GRP)
+	group |= BRIDGE_IRR_RESP_BUF_GRP_CLR;
+    if (error & BRIDGE_IRR_CRP_GRP)
+	group |= BRIDGE_IRR_CRP_GRP_CLR;
+
+    return group;
+
+}
+
+
+/* pcibr_pioerr_check():
+ *	Check to see if this pcibr has a PCI PIO
+ *	TIMEOUT error; if so, clear it and bump
+ *	the timeout-count on any piomaps that
+ *	could cover the address.
+ */
+static void
+pcibr_pioerr_check(pcibr_soft_t soft)
+{
+    bridge_t		   *bridge;
+    bridgereg_t		    b_int_status;
+    bridgereg_t		    b_pci_err_lower;
+    bridgereg_t		    b_pci_err_upper;
+    iopaddr_t		    pci_addr;
+    pciio_slot_t	    slot;
+    pcibr_piomap_t	    map;
+    iopaddr_t		    base;
+    size_t		    size;
+    unsigned		    win;
+    int			    func;
+
+    bridge = soft->bs_base;
+    b_int_status = bridge->b_int_status;
+    if (b_int_status & BRIDGE_ISR_PCIBUS_PIOERR) {
+	b_pci_err_lower = bridge->b_pci_err_lower;
+	b_pci_err_upper = bridge->b_pci_err_upper;
+	b_int_status = bridge->b_int_status;
+	if (b_int_status & BRIDGE_ISR_PCIBUS_PIOERR) {
+	    bridge->b_int_rst_stat = (BRIDGE_IRR_PCI_GRP_CLR|
+				      BRIDGE_IRR_MULTI_CLR);
+
+	    pci_addr = b_pci_err_upper & BRIDGE_ERRUPPR_ADDRMASK;
+	    pci_addr = (pci_addr << 32) | b_pci_err_lower;
+
+	    slot = 8;
+	    while (slot-- > 0) {
+		int 		nfunc = soft->bs_slot[slot].bss_ninfo;
+		pcibr_info_h	pcibr_infoh = soft->bs_slot[slot].bss_infos;
+
+		for (func = 0; func < nfunc; func++) {
+		    pcibr_info_t 	pcibr_info = pcibr_infoh[func];
+
+		    if (!pcibr_info)
+			continue;
+
+		    for (map = pcibr_info->f_piomap;
+			 map != NULL; map = map->bp_next) {
+			base = map->bp_pciaddr;
+			size = map->bp_mapsz;
+			win = map->bp_space - PCIIO_SPACE_WIN(0);
+			if (win < 6)
+			    base += 
+				soft->bs_slot[slot].bss_window[win].bssw_base;
+			else if (map->bp_space == PCIIO_SPACE_ROM)
+			    base += pcibr_info->f_rbase;
+#ifdef IRIX
+			if ((pci_addr >= base) && (pci_addr < (base + size)))
+			    atomicAddInt(map->bp_toc, 1);
+#endif
+		    }
+		}
+	    }
+	}
+    }
+}
+
+/*
+ * PCI Bridge Error interrupt handler.
+ *      This gets invoked, whenever a PCI bridge sends an error interrupt.
+ *      Primarily this servers two purposes.
+ *              - If an error can be handled (typically a PIO read/write
+ *                error, we try to do it silently.
+ *              - If an error cannot be handled, we die violently.
+ *      Interrupt due to PIO errors:
+ *              - Bridge sends an interrupt, whenever a PCI operation
+ *                done by the bridge as the master fails. Operations could
+ *                be either a PIO read or a PIO write.
+ *                PIO Read operation also triggers a bus error, and it's
+ *                We primarily ignore this interrupt in that context..
+ *                For PIO write errors, this is the only indication.
+ *                and we have to handle with the info from here.
+ *
+ *                So, there is no way to distinguish if an interrupt is
+ *                due to read or write error!.
+ */
+
+
+LOCAL void
+pcibr_error_intr_handler(intr_arg_t arg)
+{
+    pcibr_soft_t            pcibr_soft;
+    bridge_t               *bridge;
+    bridgereg_t             int_status;
+    bridgereg_t             err_status;
+    int                     i;
+
+#if defined(SN0_HWDEBUG)
+    extern int		    la_trigger_nasid1;
+    extern int		    la_trigger_nasid2;
+    extern long		    la_trigger_val;
+#endif
+
+    /* REFERENCED */
+    bridgereg_t             disable_errintr_mask = 0;
+#ifdef IRIX
+    int 		    rv;
+#else
+    int                     rv = 0;
+#endif
+    int 		    error_code = IOECODE_DMA | IOECODE_READ;
+    ioerror_mode_t 	    mode = MODE_DEVERROR;
+    ioerror_t 	            ioe;
+
+#if defined(SN0_HWDEBUG)
+   /*
+    * trigger points for logic analyzer. Used to debug the DMA timeout
+    * note that 0xcafe is added to the trigger values to avoid false
+    * triggers when la_trigger_val shows up in a cacheline as data
+    */
+   if (la_trigger_nasid1 != -1) 
+	REMOTE_HUB_PI_S(la_trigger_nasid1, 0, PI_CPU_NUM, la_trigger_val + 0xcafe);
+   if (la_trigger_nasid2 != -1) 
+	REMOTE_HUB_PI_S(la_trigger_nasid2, 0, PI_CPU_NUM, la_trigger_val + 0xcafe);
+#endif
+
+#if PCIBR_SOFT_LIST
+    /* IP27 seems to be handing us junk.
+     */
+    {
+	pcibr_list_p            entry;
+
+	entry = pcibr_list;
+	while (1) {
+	    if (entry == NULL) {
+		printk("pcibr_error_intr_handler:\n"
+			"\tparameter (0x%p) is not a pcibr_soft!",
+			arg);
+		PRINT_PANIC("Invalid parameter to pcibr_error_intr_handler");
+	    }
+	    if ((intr_arg_t) entry->bl_soft == arg)
+		break;
+	    entry = entry->bl_next;
+	}
+    }
+#endif
+    pcibr_soft = (pcibr_soft_t) arg;
+    bridge = pcibr_soft->bs_base;
+
+    /*
+     * pcibr_error_intr_handler gets invoked whenever bridge encounters
+     * an error situation, and the interrupt for that error is enabled.
+     * This routine decides if the error is fatal or not, and takes
+     * action accordingly.
+     *
+     * In one case there is a need for special action.
+     * In case of PIO read/write timeouts due to user level, we do
+     * get an error interrupt. In this case, way to handle would
+     * be to start a timeout. If the error was due to "read", bus
+     * error handling code takes care of it. If error is due to write,
+     * it's handled at timeout
+     */
+
+    /* int_status is which bits we have to clear;
+     * err_status is the bits we haven't handled yet.
+     */
+
+    int_status = bridge->b_int_status &  ~BRIDGE_ISR_INT_MSK;
+    err_status = int_status & ~BRIDGE_ISR_MULTI_ERR;
+
+    if (!(int_status & ~BRIDGE_ISR_INT_MSK)) {
+	/*
+	 * No error bit set!!.
+	 */
+	return;
+    }
+    /* If we have a PCIBUS_PIOERR,
+     * hand it to the logger but otherwise
+     * ignore the event.
+     */
+    if (int_status & BRIDGE_ISR_PCIBUS_PIOERR) {
+	pcibr_pioerr_check(pcibr_soft);
+	err_status &= ~BRIDGE_ISR_PCIBUS_PIOERR;
+	int_status &= ~BRIDGE_ISR_PCIBUS_PIOERR;
+    }
+
+
+    if (err_status) {
+	struct bs_errintr_stat_s *bs_estat = pcibr_soft->bs_errintr_stat;
+
+	for (i = PCIBR_ISR_ERR_START; i < PCIBR_ISR_MAX_ERRS; i++, bs_estat++) {
+	    if (err_status & (1 << i)) {
+		uint32_t              errrate = 0;
+		uint32_t              errcount = 0;
+		uint32_t              errinterval = 0, current_tick = 0;
+		int                     panic_on_llp_tx_retry = 0;
+		int                     is_llp_tx_retry_intr = 0;
+
+		bs_estat->bs_errcount_total++;
+
+#ifdef IRIX
+		current_tick = lbolt;
+#else
+		current_tick = 0;
+#endif
+		errinterval = (current_tick - bs_estat->bs_lasterr_timestamp);
+		errcount = (bs_estat->bs_errcount_total -
+			    bs_estat->bs_lasterr_snapshot);
+
+		is_llp_tx_retry_intr = (BRIDGE_ISR_LLP_TX_RETRY == (1 << i));
+
+		/* On a non-zero error rate (which is equivalent to
+		 * to 100 errors /sec at least) for the LLP transmitter
+		 * retry interrupt we need to panic the system
+		 * to prevent potential data corruption .
+		 * NOTE : errcount is being compared to PCIBR_ERRTIME_THRESHOLD
+		 * to make sure that we are not seing cases like x error
+		 * interrupts per y ticks for very low x ,y (x > y ) which
+		 * makes error rate be > 100 /sec.
+		 */
+
+		/* Check for the divide by zero condition while
+		 * calculating the error rates.
+		 */
+
+		if (errinterval) {
+		    errrate = errcount / errinterval;
+		    /* If able to calculate error rate
+		     * on a LLP transmitter retry interrupt check
+		     * if the error rate is nonzero and we have seen
+		     * a certain minimum number of errors.
+		     */
+		    if (is_llp_tx_retry_intr &&
+			errrate &&
+			(errcount >= PCIBR_ERRTIME_THRESHOLD)) {
+			panic_on_llp_tx_retry = 1;
+		    }
+		} else {
+		    errrate = 0;
+		    /* Since we are not able to calculate the
+		     * error rate check if we exceeded a certain
+		     * minimum number of errors for LLP transmitter
+		     * retries. Note that this can only happen
+		     * within the first tick after the last snapshot.
+		     */
+		    if (is_llp_tx_retry_intr &&
+			(errcount >= PCIBR_ERRINTR_DISABLE_LEVEL)) {
+			panic_on_llp_tx_retry = 1;
+		    }
+		}
+		if (panic_on_llp_tx_retry) {
+		    static uint32_t       last_printed_rate;
+
+		    if (errrate > last_printed_rate) {
+			last_printed_rate = errrate;
+			/* Print the warning only if the error rate
+			 * for the transmitter retry interrupt
+			 * exceeded the previously printed rate.
+			 */
+			PRINT_WARNING(
+				"%s: %s, Excessive error interrupts : %d/tick\n",
+				pcibr_soft->bs_name,
+				pcibr_isr_errs[i],
+				errrate);
+
+		    }
+		    /*
+		     * Update snapshot, and time
+		     */
+		    bs_estat->bs_lasterr_timestamp = current_tick;
+		    bs_estat->bs_lasterr_snapshot =
+			bs_estat->bs_errcount_total;
+
+		}
+		/*
+		 * If the error rate is high enough, print the error rate.
+		 */
+		if (errinterval > PCIBR_ERRTIME_THRESHOLD) {
+
+		    if (errrate > PCIBR_ERRRATE_THRESHOLD) {
+			PRINT_NOTICE( "%s: %s, Error rate %d/tick",
+				pcibr_soft->bs_name,
+				pcibr_isr_errs[i],
+				errrate);
+			/*
+			 * Update snapshot, and time
+			 */
+			bs_estat->bs_lasterr_timestamp = current_tick;
+			bs_estat->bs_lasterr_snapshot =
+			    bs_estat->bs_errcount_total;
+		    }
+		}
+		if (bs_estat->bs_errcount_total > PCIBR_ERRINTR_DISABLE_LEVEL) {
+		    /*
+		     * We have seen a fairly large number of errors of
+		     * this type. Let's disable the interrupt. But flash
+		     * a message about the interrupt being disabled.
+		     */
+		    PRINT_NOTICE(
+			    "%s Disabling error interrupt type %s. Error count %d",
+			    pcibr_soft->bs_name,
+			    pcibr_isr_errs[i],
+			    bs_estat->bs_errcount_total);
+		    disable_errintr_mask |= (1 << i);
+		}
+	    }
+	}
+    }
+
+    if (disable_errintr_mask) {
+	/*
+	 * Disable some high frequency errors as they
+	 * could eat up too much cpu time.
+	 */
+	bridge->b_int_enable &= ~disable_errintr_mask;
+    }
+    /*
+     * If we leave the PROM cacheable, T5 might
+     * try to do a cache line sized writeback to it,
+     * which will cause a BRIDGE_ISR_INVLD_ADDR.
+     */
+    if ((err_status & BRIDGE_ISR_INVLD_ADDR) &&
+	(0x00000000 == bridge->b_wid_err_upper) &&
+	(0x00C00000 == (0xFFC00000 & bridge->b_wid_err_lower)) &&
+	(0x00402000 == (0x00F07F00 & bridge->b_wid_err_cmdword))) {
+	err_status &= ~BRIDGE_ISR_INVLD_ADDR;
+    }
+#if defined (PCIBR_LLP_CONTROL_WAR)
+    /*
+     * The bridge bug, where the llp_config or control registers
+     * need to be read back after being written, affects an MP
+     * system since there could be small windows between writing
+     * the register and reading it back on one cpu while another
+     * cpu is fielding an interrupt. If we run into this scenario,
+     * workaround the problem by ignoring the error. (bug 454474)
+     * pcibr_llp_control_war_cnt keeps an approximate number of
+     * times we saw this problem on a system.
+     */
+
+    if ((err_status & BRIDGE_ISR_INVLD_ADDR) &&
+	((((uint64_t) bridge->b_wid_err_upper << 32) | (bridge->b_wid_err_lower))
+	 == (BRIDGE_INT_RST_STAT & 0xff0))) {
+#ifdef IRIX
+	if (kdebug)
+	    PRINT_NOTICE( "%s bridge: ignoring llp/control address interrupt",
+		    pcibr_soft->bs_name);
+#endif
+	pcibr_llp_control_war_cnt++;
+	err_status &= ~BRIDGE_ISR_INVLD_ADDR;
+    }
+#endif				/* PCIBR_LLP_CONTROL_WAR */
+
+    /* Check if this is the RESP_XTALK_ERROR interrupt. 
+     * This can happen due to a failed DMA READ operation.
+     */
+    if (err_status & BRIDGE_ISR_RESP_XTLK_ERR) {
+	/* Phase 1 : Look at the error state in the bridge and further
+	 * down in the device layers.
+	 */
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+	(void)error_state_set(pcibr_soft->bs_conn, ERROR_STATE_LOOKUP);
+#endif
+	IOERROR_SETVALUE(&ioe, widgetnum, pcibr_soft->bs_xid);
+	(void)pcibr_error_handler((error_handler_arg_t)pcibr_soft,
+				  error_code,
+				  mode,
+				  &ioe);
+	/* Phase 2 : Perform the action agreed upon in phase 1.
+	 */
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+	(void)error_state_set(pcibr_soft->bs_conn, ERROR_STATE_ACTION);
+#endif
+	rv = pcibr_error_handler((error_handler_arg_t)pcibr_soft,
+				 error_code,
+				 mode,
+				 &ioe);
+    }
+    if (rv != IOERROR_HANDLED) {
+#ifdef	DEBUG
+	if (err_status & BRIDGE_ISR_ERROR_DUMP)
+	    pcibr_error_dump(pcibr_soft);
+#else	
+	if (err_status & BRIDGE_ISR_ERROR_FATAL) {
+	    printk("BRIDGE ERR STATUS 0x%x\n", err_status);
+	    pcibr_error_dump(pcibr_soft);
+	}
+#endif
+    }
+    /*
+     * We can't return without re-enabling the interrupt, since
+     * it would cause problems for devices like IOC3 (Lost
+     * interrupts ?.). So, just cleanup the interrupt, and
+     * use saved values later..
+     */
+    bridge->b_int_rst_stat = pcibr_errintr_group(int_status);
+}
+
+/*
+ * pcibr_addr_toslot
+ *      Given the 'pciaddr' find out which slot this address is
+ *      allocated to, and return the slot number.
+ *      While we have the info handy, construct the
+ *      function number, space code and offset as well.
+ *
+ * NOTE: if this routine is called, we don't know whether
+ * the address is in CFG, MEM, or I/O space. We have to guess.
+ * This will be the case on PIO stores, where the only way
+ * we have of getting the address is to check the Bridge, which
+ * stores the PCI address but not the space and not the xtalk
+ * address (from which we could get it).
+ */
+LOCAL int
+pcibr_addr_toslot(pcibr_soft_t pcibr_soft,
+		  iopaddr_t pciaddr,
+		  pciio_space_t *spacep,
+		  iopaddr_t *offsetp,
+		  pciio_function_t *funcp)
+{
+#ifdef IRIX
+    int                     s, f, w;
+#else
+    int                     s, f=0, w;
+#endif
+    iopaddr_t               base;
+    size_t                  size;
+    pciio_piospace_t        piosp;
+
+    /*
+     * Check if the address is in config space
+     */
+
+    if ((pciaddr >= BRIDGE_CONFIG_BASE) && (pciaddr < BRIDGE_CONFIG_END)) {
+
+	if (pciaddr >= BRIDGE_CONFIG1_BASE)
+	    pciaddr -= BRIDGE_CONFIG1_BASE;
+	else
+	    pciaddr -= BRIDGE_CONFIG_BASE;
+
+	s = pciaddr / BRIDGE_CONFIG_SLOT_SIZE;
+	pciaddr %= BRIDGE_CONFIG_SLOT_SIZE;
+
+	if (funcp) {
+	    f = pciaddr / 0x100;
+	    pciaddr %= 0x100;
+	}
+	if (spacep)
+	    *spacep = PCIIO_SPACE_CFG;
+	if (offsetp)
+	    *offsetp = pciaddr;
+	if (funcp)
+	    *funcp = f;
+
+	return s;
+    }
+    for (s = 0; s < 8; s++) {
+	int                     nf = pcibr_soft->bs_slot[s].bss_ninfo;
+	pcibr_info_h            pcibr_infoh = pcibr_soft->bs_slot[s].bss_infos;
+
+	for (f = 0; f < nf; f++) {
+	    pcibr_info_t            pcibr_info = pcibr_infoh[f];
+
+	    if (!pcibr_info)
+		continue;
+	    for (w = 0; w < 6; w++) {
+		if (pcibr_info->f_window[w].w_space
+		    == PCIIO_SPACE_NONE) {
+		    continue;
+		}
+		base = pcibr_info->f_window[w].w_base;
+		size = pcibr_info->f_window[w].w_size;
+
+		if ((pciaddr >= base) && (pciaddr < (base + size))) {
+		    if (spacep)
+			*spacep = PCIIO_SPACE_WIN(w);
+		    if (offsetp)
+			*offsetp = pciaddr - base;
+		    if (funcp)
+			*funcp = f;
+		    return s;
+		}			/* endif match */
+	    }				/* next window */
+	}				/* next func */
+    }					/* next slot */
+
+    /*
+     * Check if the address was allocated as part of the
+     * pcibr_piospace_alloc calls.
+     */
+    for (s = 0; s < 8; s++) {
+	int                     nf = pcibr_soft->bs_slot[s].bss_ninfo;
+	pcibr_info_h            pcibr_infoh = pcibr_soft->bs_slot[s].bss_infos;
+
+	for (f = 0; f < nf; f++) {
+	    pcibr_info_t            pcibr_info = pcibr_infoh[f];
+
+	    if (!pcibr_info)
+		continue;
+	    piosp = pcibr_info->f_piospace;
+	    while (piosp) {
+		if ((piosp->start <= pciaddr) &&
+		    ((piosp->count + piosp->start) > pciaddr)) {
+		    if (spacep)
+			*spacep = piosp->space;
+		    if (offsetp)
+			*offsetp = pciaddr - piosp->start;
+		    return s;
+		}			/* endif match */
+		piosp = piosp->next;
+	    }				/* next piosp */
+	}				/* next func */
+    }					/* next slot */
+
+    /*
+     * Some other random address on the PCI bus ...
+     * we have no way of knowing whether this was
+     * a MEM or I/O access; so, for now, we just
+     * assume that the low 1G is MEM, the next
+     * 3G is I/O, and anything above the 4G limit
+     * is obviously MEM.
+     */
+
+    if (spacep)
+	*spacep = ((pciaddr < (1ul << 30)) ? PCIIO_SPACE_MEM :
+		   (pciaddr < (4ul << 30)) ? PCIIO_SPACE_IO :
+		   PCIIO_SPACE_MEM);
+    if (offsetp)
+	*offsetp = pciaddr;
+
+    return PCIIO_SLOT_NONE;
+
+}
+
+LOCAL void
+pcibr_error_cleanup(pcibr_soft_t pcibr_soft, int error_code)
+{
+    bridge_t               *bridge = pcibr_soft->bs_base;
+
+    ASSERT(error_code & IOECODE_PIO);
+    error_code = error_code;
+
+    bridge->b_int_rst_stat =
+	(BRIDGE_IRR_PCI_GRP_CLR | BRIDGE_IRR_MULTI_CLR);
+    (void) bridge->b_wid_tflush;	/* flushbus */
+}
+
+/*
+ * pcibr_error_extract
+ *      Given the 'pcibr vertex handle' find out which slot
+ *      the bridge status error address (from pcibr_soft info
+ *      hanging off the vertex)
+ *      allocated to, and return the slot number.
+ *      While we have the info handy, construct the
+ *      space code and offset as well.
+ *
+ * NOTE: if this routine is called, we don't know whether
+ * the address is in CFG, MEM, or I/O space. We have to guess.
+ * This will be the case on PIO stores, where the only way
+ * we have of getting the address is to check the Bridge, which
+ * stores the PCI address but not the space and not the xtalk
+ * address (from which we could get it).
+ *
+ * XXX- this interface has no way to return the function
+ * number on a multifunction card, even though that data
+ * is available.
+ */
+
+pciio_slot_t
+pcibr_error_extract(devfs_handle_t pcibr_vhdl,
+		    pciio_space_t *spacep,
+		    iopaddr_t *offsetp)
+{
+    pcibr_soft_t            pcibr_soft = 0;
+    iopaddr_t               bserr_addr;
+    bridge_t               *bridge;
+    pciio_slot_t            slot = PCIIO_SLOT_NONE;
+    arbitrary_info_t	    rev;
+
+    /* Do a sanity check as to whether we really got a 
+     * bridge vertex handle.
+     */
+    if (hwgraph_info_get_LBL(pcibr_vhdl, INFO_LBL_PCIBR_ASIC_REV, &rev) !=
+	GRAPH_SUCCESS) 
+	return(slot);
+
+    pcibr_soft = pcibr_soft_get(pcibr_vhdl);
+    if (pcibr_soft) {
+	bridge = pcibr_soft->bs_base;
+	bserr_addr =
+	    bridge->b_pci_err_lower |
+	    ((uint64_t) (bridge->b_pci_err_upper &
+			   BRIDGE_ERRUPPR_ADDRMASK) << 32);
+
+	slot = pcibr_addr_toslot(pcibr_soft, bserr_addr,
+				 spacep, offsetp, NULL);
+    }
+    return slot;
+}
+
+/*ARGSUSED */
+void
+pcibr_device_disable(pcibr_soft_t pcibr_soft, int devnum)
+{
+    /*
+     * XXX
+     * Device failed to handle error. Take steps to
+     * disable this device ? HOW TO DO IT ?
+     *
+     * If there are any Read response buffers associated
+     * with this device, it's time to get them back!!
+     *
+     * We can disassociate any interrupt level associated
+     * with this device, and disable that interrupt level
+     *
+     * For now it's just a place holder
+     */
+}
+
+/*
+ * pcibr_pioerror
+ *      Handle PIO error that happened at the bridge pointed by pcibr_soft.
+ *
+ *      Queries the Bus interface attached to see if the device driver
+ *      mapping the device-number that caused error can handle the
+ *      situation. If so, it will clean up any error, and return
+ *      indicating the error was handled. If the device driver is unable
+ *      to handle the error, it expects the bus-interface to disable that
+ *      device, and takes any steps needed here to take away any resources
+ *      associated with this device.
+ */
+
+#define BEM_ADD_STR(s)	printk("%s", (s))
+#ifdef SUPPORT_SGI_CMN_ERR_STUFF
+#define BEM_ADD_VAR(v)	printk("\t%20s: 0x%x\n", #v, (v))
+#define BEM_ADD_REG(r)	printk("\t%20s: %R\n", #r, (r), r ## _desc)
+
+#define BEM_ADD_NSPC(n,s)	printk("\t%20s: %R\n", n, s, space_desc)
+#else
+#define BEM_ADD_VAR(v)	
+#define BEM_ADD_REG(r)	
+#define BEM_ADD_NSPC(n,s)
+#endif
+#define BEM_ADD_SPC(s)		BEM_ADD_NSPC(#s, s)
+
+/* BEM_ADD_IOE doesn't dump the whole ioerror, it just
+ * decodes the PCI specific portions -- we count on our
+ * callers to dump the raw IOE data.
+ */
+#ifdef colin
+#define BEM_ADD_IOE(ioe)						\
+	do {								\
+	    if (IOERROR_FIELDVALID(ioe, busspace)) {			\
+		unsigned		spc;				\
+		unsigned		win;				\
+									\
+		spc = IOERROR_GETVALUE(ioe, busspace);			\
+		win = spc - PCIIO_SPACE_WIN(0);				\
+									\
+		switch (spc) {						\
+		case PCIIO_SPACE_CFG:					\
+		    printk("\tPCI Slot %d Func %d CFG space Offset 0x%x\n",	\
+	    pciio_widgetdev_slot_get(IOERROR_GETVALUE(ioe, widgetdev)),	\
+	    pciio_widgetdev_func_get(IOERROR_GETVALUE(ioe, widgetdev)),	\
+			    IOERROR_GETVALUE(ioe, busaddr));		\
+		    break;						\
+		case PCIIO_SPACE_IO:					\
+		    printk("\tPCI I/O space  Offset 0x%x\n",		\
+			    IOERROR_GETVALUE(ioe, busaddr));		\
+		    break;						\
+		case PCIIO_SPACE_MEM:					\
+		case PCIIO_SPACE_MEM32:					\
+		case PCIIO_SPACE_MEM64:					\
+		    printk("\tPCI MEM space Offset 0x%x\n",		\
+			    IOERROR_GETVALUE(ioe, busaddr));		\
+		    break;						\
+		default:						\
+		    if (win < 6) {					\
+		    printk("\tPCI Slot %d Func %d Window %d Offset 0x%x\n",\
+	    pciio_widgetdev_slot_get(IOERROR_GETVALUE(ioe, widgetdev)),	\
+	    pciio_widgetdev_func_get(IOERROR_GETVALUE(ioe, widgetdev)),	\
+			    win,					\
+			    IOERROR_GETVALUE(ioe, busaddr));		\
+		    }							\
+		    break;						\
+		}							\
+	    }								\
+	} while (0)
+#else
+#define BEM_ADD_IOE(ioe)
+#endif
+
+/*ARGSUSED */
+LOCAL int
+pcibr_pioerror(
+		  pcibr_soft_t pcibr_soft,
+		  int error_code,
+		  ioerror_mode_t mode,
+		  ioerror_t *ioe)
+{
+    int                     retval = IOERROR_HANDLED;
+
+    devfs_handle_t            pcibr_vhdl = pcibr_soft->bs_vhdl;
+    bridge_t               *bridge = pcibr_soft->bs_base;
+
+    bridgereg_t             bridge_int_status;
+    bridgereg_t             bridge_pci_err_lower;
+    bridgereg_t             bridge_pci_err_upper;
+    bridgereg_t             bridge_pci_err_addr;
+
+    iopaddr_t               bad_xaddr;
+
+    pciio_space_t           raw_space;	/* raw PCI space */
+    iopaddr_t               raw_paddr;	/* raw PCI address */
+
+    pciio_space_t           space;	/* final PCI space */
+    pciio_slot_t            slot;	/* final PCI slot, if appropriate */
+    pciio_function_t        func;	/* final PCI func, if appropriate */
+    iopaddr_t               offset;	/* final PCI offset */
+    
+    int                     cs, cw, cf;
+    pciio_space_t           wx;
+    iopaddr_t               wb;
+    size_t                  ws;
+    iopaddr_t               wl;
+
+
+    /*
+     * We expect to have an "xtalkaddr" coming in,
+     * and need to construct the slot/space/offset.
+     */
+
+#ifdef colin
+    bad_xaddr = IOERROR_GETVALUE(ioe, xtalkaddr);
+#else
+    bad_xaddr = -1;
+#endif
+
+    slot = PCIIO_SLOT_NONE;
+    func = PCIIO_FUNC_NONE;
+    raw_space = PCIIO_SPACE_NONE;
+    raw_paddr = 0;
+
+    if ((bad_xaddr >= BRIDGE_TYPE0_CFG_DEV0) &&
+	(bad_xaddr < BRIDGE_TYPE1_CFG)) {
+	raw_paddr = bad_xaddr - BRIDGE_TYPE0_CFG_DEV0;
+	slot = raw_paddr / BRIDGE_TYPE0_CFG_SLOT_OFF;
+	raw_paddr = raw_paddr % BRIDGE_TYPE0_CFG_SLOT_OFF;
+	raw_space = PCIIO_SPACE_CFG;
+    }
+    if ((bad_xaddr >= BRIDGE_TYPE1_CFG) &&
+	(bad_xaddr < (BRIDGE_TYPE1_CFG + 0x1000))) {
+	/* Type 1 config space:
+	 * slot and function numbers not known.
+	 * Perhaps we can read them back?
+	 */
+	raw_paddr = bad_xaddr - BRIDGE_TYPE1_CFG;
+	raw_space = PCIIO_SPACE_CFG;
+    }
+    if ((bad_xaddr >= BRIDGE_DEVIO0) &&
+	(bad_xaddr < BRIDGE_DEVIO(BRIDGE_DEV_CNT))) {
+	int                     x;
+
+	raw_paddr = bad_xaddr - BRIDGE_DEVIO0;
+	x = raw_paddr / BRIDGE_DEVIO_OFF;
+	raw_paddr %= BRIDGE_DEVIO_OFF;
+	/* first two devio windows are double-sized */
+	if ((x == 1) || (x == 3))
+	    raw_paddr += BRIDGE_DEVIO_OFF;
+	if (x > 0)
+	    x--;
+	if (x > 1)
+	    x--;
+	/* x is which devio reg; no guarantee
+	 * pci slot x will be responding.
+	 * still need to figure out who decodes
+	 * space/offset on the bus.
+	 */
+	raw_space = pcibr_soft->bs_slot[x].bss_devio.bssd_space;
+	if (raw_space == PCIIO_SPACE_NONE) {
+	    /* Someone got an error because they
+	     * accessed the PCI bus via a DevIO(x)
+	     * window that pcibr has not yet assigned
+	     * to any specific PCI address. It is
+	     * quite possible that the Device(x)
+	     * register has been changed since they
+	     * made their access, but we will give it
+	     * our best decode shot.
+	     */
+	    raw_space = pcibr_soft->bs_slot[x].bss_device
+		& BRIDGE_DEV_DEV_IO_MEM
+		? PCIIO_SPACE_MEM
+		: PCIIO_SPACE_IO;
+	    raw_paddr +=
+		(pcibr_soft->bs_slot[x].bss_device &
+		 BRIDGE_DEV_OFF_MASK) <<
+		BRIDGE_DEV_OFF_ADDR_SHFT;
+	} else
+	    raw_paddr += pcibr_soft->bs_slot[x].bss_devio.bssd_base;
+    }
+    if ((bad_xaddr >= BRIDGE_PCI_MEM32_BASE) &&
+	(bad_xaddr <= BRIDGE_PCI_MEM32_LIMIT)) {
+	raw_space = PCIIO_SPACE_MEM32;
+	raw_paddr = bad_xaddr - BRIDGE_PCI_MEM32_BASE;
+    }
+    if ((bad_xaddr >= BRIDGE_PCI_MEM64_BASE) &&
+	(bad_xaddr <= BRIDGE_PCI_MEM64_LIMIT)) {
+	raw_space = PCIIO_SPACE_MEM64;
+	raw_paddr = bad_xaddr - BRIDGE_PCI_MEM64_BASE;
+    }
+    if ((bad_xaddr >= BRIDGE_PCI_IO_BASE) &&
+	(bad_xaddr <= BRIDGE_PCI_IO_LIMIT)) {
+	raw_space = PCIIO_SPACE_IO;
+	raw_paddr = bad_xaddr - BRIDGE_PCI_IO_BASE;
+    }
+    space = raw_space;
+    offset = raw_paddr;
+
+    if ((slot == PCIIO_SLOT_NONE) && (space != PCIIO_SPACE_NONE)) {
+	/* we've got a space/offset but not which
+	 * pci slot decodes it. Check through our
+	 * notions of which devices decode where.
+	 *
+	 * Yes, this "duplicates" some logic in
+	 * pcibr_addr_toslot; the difference is,
+	 * this code knows which space we are in,
+	 * and can really really tell what is
+	 * going on (no guessing).
+	 */
+
+	for (cs = 0; (cs < 8) && (slot == PCIIO_SLOT_NONE); cs++) {
+	    int                     nf = pcibr_soft->bs_slot[cs].bss_ninfo;
+	    pcibr_info_h            pcibr_infoh = pcibr_soft->bs_slot[cs].bss_infos;
+
+	    for (cf = 0; (cf < nf) && (slot == PCIIO_SLOT_NONE); cf++) {
+		pcibr_info_t            pcibr_info = pcibr_infoh[cf];
+
+		if (!pcibr_info)
+		    continue;
+		for (cw = 0; (cw < 6) && (slot == PCIIO_SLOT_NONE); ++cw) {
+		    if (((wx = pcibr_info->f_window[cw].w_space) != PCIIO_SPACE_NONE) &&
+			((wb = pcibr_info->f_window[cw].w_base) != 0) &&
+			((ws = pcibr_info->f_window[cw].w_size) != 0) &&
+			((wl = wb + ws) > wb) &&
+			((wb <= offset) && (wl > offset))) {
+			/* MEM, MEM32 and MEM64 need to
+			 * compare as equal ...
+			 */
+			if ((wx == space) ||
+			    (((wx == PCIIO_SPACE_MEM) ||
+			      (wx == PCIIO_SPACE_MEM32) ||
+			      (wx == PCIIO_SPACE_MEM64)) &&
+			     ((space == PCIIO_SPACE_MEM) ||
+			      (space == PCIIO_SPACE_MEM32) ||
+			      (space == PCIIO_SPACE_MEM64)))) {
+			    slot = cs;
+			    func = cf;
+			    space = PCIIO_SPACE_WIN(cw);
+			    offset -= wb;
+			}		/* endif window space match */
+		    }			/* endif window valid and addr match */
+		}			/* next window unless slot set */
+	    }				/* next func unless slot set */
+	}				/* next slot unless slot set */
+	/* XXX- if slot is still -1, no PCI devices are
+	 * decoding here using their standard PCI BASE
+	 * registers. This would be a really good place
+	 * to cross-coordinate with the pciio PCI
+	 * address space allocation routines, to find
+	 * out if this address is "allocated" by any of
+	 * our subsidiary devices.
+	 */
+    }
+    /* Scan all piomap records on this PCI bus to update
+     * the TimeOut Counters on all matching maps. If we
+     * don't already know the slot number, take it from
+     * the first matching piomap. Note that we have to
+     * compare maps against raw_space and raw_paddr
+     * since space and offset could already be
+     * window-relative.
+     *
+     * There is a chance that one CPU could update
+     * through this path, and another CPU could also
+     * update due to an interrupt. Closing this hole
+     * would only result in the possibility of some
+     * errors never getting logged at all, and since the
+     * use for bp_toc is as a logical test rather than a
+     * strict count, the excess counts are not a
+     * problem.
+     */
+    for (cs = 0; cs < 8; ++cs) {
+	int 		nf = pcibr_soft->bs_slot[cs].bss_ninfo;
+	pcibr_info_h	pcibr_infoh = pcibr_soft->bs_slot[cs].bss_infos;
+
+	for (cf = 0; cf < nf; cf++) {
+	    pcibr_info_t 	pcibr_info = pcibr_infoh[cf];
+	    pcibr_piomap_t	map;    
+
+	    if (!pcibr_info)
+		continue;
+
+	    for (map = pcibr_info->f_piomap;
+	     map != NULL; map = map->bp_next) {
+	    wx = map->bp_space;
+	    wb = map->bp_pciaddr;
+	    ws = map->bp_mapsz;
+	    cw = wx - PCIIO_SPACE_WIN(0);
+	    if (cw < 6) {
+		wb += pcibr_soft->bs_slot[cs].bss_window[cw].bssw_base;
+		wx = pcibr_soft->bs_slot[cs].bss_window[cw].bssw_space;
+	    }
+	    if (wx == PCIIO_SPACE_ROM) {
+		wb += pcibr_info->f_rbase;
+		wx = PCIIO_SPACE_MEM;
+	    }
+	    if ((wx == PCIIO_SPACE_MEM32) ||
+		(wx == PCIIO_SPACE_MEM64))
+		wx = PCIIO_SPACE_MEM;
+	    wl = wb + ws;
+	    if ((wx == raw_space) && (raw_paddr >= wb) && (raw_paddr < wl)) {
+#ifdef IRIX
+		atomicAddInt(map->bp_toc, 1);
+#endif
+		if (slot == PCIIO_SLOT_NONE) {
+		    slot = cs;
+		    space = map->bp_space;
+		    if (cw < 6)
+			offset -= pcibr_soft->bs_slot[cs].bss_window[cw].bssw_base;
+		}
+	    }
+	    }
+	}
+    }
+
+    if (space != PCIIO_SPACE_NONE) {
+	if (slot != PCIIO_SLOT_NONE) {
+#ifdef IRIX
+	    if (func != PCIIO_FUNC_NONE)
+		IOERROR_SETVALUE(ioe, widgetdev, 
+				 pciio_widgetdev_create(slot,func));
+	    else
+    		IOERROR_SETVALUE(ioe, widgetdev, 
+				 pciio_widgetdev_create(slot,0));
+#else
+            if (func != PCIIO_FUNC_NONE) {
+                IOERROR_SETVALUE(ioe, widgetdev,
+                                 pciio_widgetdev_create(slot,func));
+            } else {
+                IOERROR_SETVALUE(ioe, widgetdev,
+                                 pciio_widgetdev_create(slot,0));
+	    }
+#endif
+	}
+
+	IOERROR_SETVALUE(ioe, busspace, space);
+	IOERROR_SETVALUE(ioe, busaddr, offset);
+    }
+    if (mode == MODE_DEVPROBE) {
+	/*
+	 * During probing, we don't really care what the
+	 * error is. Clean up the error in Bridge, notify
+	 * subsidiary devices, and return success.
+	 */
+	pcibr_error_cleanup(pcibr_soft, error_code);
+
+	/* if appropriate, give the error handler for this slot
+	 * a shot at this probe access as well.
+	 */
+	return (slot == PCIIO_SLOT_NONE) ? IOERROR_HANDLED :
+	    pciio_error_handler(pcibr_vhdl, error_code, mode, ioe);
+    }
+    /*
+     * If we don't know what "PCI SPACE" the access
+     * was targeting, we may have problems at the
+     * Bridge itself. Don't touch any bridge registers,
+     * and do complain loudly.
+     */
+
+    if (space == PCIIO_SPACE_NONE) {
+	printk("XIO Bus Error at %s\n"
+		"\taccess to XIO bus offset 0x%lx\n"
+		"\tdoes not correspond to any PCI address\n",
+		pcibr_soft->bs_name, bad_xaddr);
+
+	/* caller will dump contents of ioe struct */
+	return IOERROR_XTALKLEVEL;
+    }
+    /*
+     * Read the PCI Bridge error log registers.
+     */
+    bridge_int_status = bridge->b_int_status;
+    bridge_pci_err_upper = bridge->b_pci_err_upper;
+    bridge_pci_err_lower = bridge->b_pci_err_lower;
+
+    bridge_pci_err_addr =
+	bridge_pci_err_lower
+	| (((iopaddr_t) bridge_pci_err_upper
+	    & BRIDGE_ERRUPPR_ADDRMASK) << 32);
+
+    /*
+     * Actual PCI Error handling situation.
+     * Typically happens when a user level process accesses
+     * PCI space, and it causes some error.
+     *
+     * Due to PCI Bridge implementation, we get two indication
+     * for a read error: an interrupt and a Bus error.
+     * We like to handle read error in the bus error context.
+     * But the interrupt comes and goes before bus error
+     * could make much progress. (NOTE: interrupd does
+     * come in _after_ bus error processing starts. But it's
+     * completed by the time bus error code reaches PCI PIO
+     * error handling.
+     * Similarly write error results in just an interrupt,
+     * and error handling has to be done at interrupt level.
+     * There is no way to distinguish at interrupt time, if an
+     * error interrupt is due to read/write error..
+     */
+
+    /* We know the xtalk addr, the raw pci bus space,
+     * the raw pci bus address, the decoded pci bus
+     * space, the offset within that space, and the
+     * decoded pci slot (which may be "PCIIO_SLOT_NONE" if no slot
+     * is known to be involved).
+     */
+
+    /*
+     * Hand the error off to the handler registered
+     * for the slot that should have decoded the error,
+     * or to generic PCI handling (if pciio decides that
+     * such is appropriate).
+     */
+    retval = pciio_error_handler(pcibr_vhdl, error_code, mode, ioe);
+
+    if (retval != IOERROR_HANDLED) {
+
+	/* Generate a generic message for IOERROR_UNHANDLED
+	 * since the subsidiary handlers were silent, and
+	 * did no recovery.
+	 */
+	if (retval == IOERROR_UNHANDLED) {
+	    retval = IOERROR_PANIC;
+
+	    /* we may or may not want to print some of this,
+	     * depending on debug level and which error code.
+	     */
+
+	    PRINT_ALERT(
+		    "PIO Error on PCI Bus %s",
+		    pcibr_soft->bs_name);
+	    /* this decodes part of the ioe; our caller
+	     * will dump the raw details in DEBUG and
+	     * kdebug kernels.
+	     */
+	    BEM_ADD_IOE(ioe);
+	}
+#if defined(FORCE_ERRORS)
+	if (0) {
+#elif !DEBUG
+	if (kdebug) {
+#endif
+	    /*
+	       * dump raw data from bridge
+	     */
+
+	    BEM_ADD_STR("DEBUG DATA -- raw info from Bridge ASIC:\n");
+	    BEM_ADD_REG(bridge_int_status);
+	    BEM_ADD_VAR(bridge_pci_err_upper);
+	    BEM_ADD_VAR(bridge_pci_err_lower);
+	    BEM_ADD_VAR(bridge_pci_err_addr);
+	    BEM_ADD_SPC(raw_space);
+	    BEM_ADD_VAR(raw_paddr);
+	    if (IOERROR_FIELDVALID(ioe, widgetdev)) {
+
+#ifdef colin
+		slot = pciio_widgetdev_slot_get(IOERROR_GETVALUE(ioe, 
+								 widgetdev));
+		func = pciio_widgetdev_func_get(IOERROR_GETVALUE(ioe, 
+								 widgetdev));
+#else
+		slot = -1;
+		func = -1;
+#endif
+		if (slot < 8) {
+#ifdef SUPPORT_SGI_CMN_ERR_STUFF
+		    bridgereg_t             device = bridge->b_device[slot].reg;
+#endif
+
+		    BEM_ADD_VAR(slot);
+		    BEM_ADD_VAR(func);
+		    BEM_ADD_REG(device);
+		}
+	    }
+#if !DEBUG || defined(FORCE_ERRORS)
+	}
+#endif
+
+	/*
+	 * Since error could not be handled at lower level,
+	 * error data logged has not  been cleared.
+	 * Clean up errors, and
+	 * re-enable bridge to interrupt on error conditions.
+	 * NOTE: Wheather we get the interrupt on PCI_ABORT or not is
+	 * dependent on INT_ENABLE register. This write just makes sure
+	 * that if the interrupt was enabled, we do get the interrupt.
+	 *
+	 * CAUTION: Resetting bit BRIDGE_IRR_PCI_GRP_CLR, acknowledges
+	 *      a group of interrupts. If while handling this error,
+	 *      some other error has occured, that would be
+	 *      implicitly cleared by this write.
+	 *      Need a way to ensure we don't inadvertently clear some
+	 *      other errors.
+	 */
+#ifdef IRIX
+	if (IOERROR_FIELDVALID(ioe, widgetdev))
+	    pcibr_device_disable(pcibr_soft, 
+				 pciio_widgetdev_slot_get(
+					  IOERROR_GETVALUE(ioe, widgetdev)));
+#endif
+
+	if (mode == MODE_DEVUSERERROR)
+	    pcibr_error_cleanup(pcibr_soft, error_code);
+    }
+    return retval;
+}
+
+/*
+ * bridge_dmaerror
+ *      Some error was identified in a DMA transaction.
+ *      This routine will identify the <device, address> that caused the error,
+ *      and try to invoke the appropriate bus service to handle this.
+ */
+
+#define BRIDGE_DMA_READ_ERROR (BRIDGE_ISR_RESP_XTLK_ERR|BRIDGE_ISR_XREAD_REQ_TIMEOUT)
+
+int
+pcibr_dmard_error(
+		     pcibr_soft_t pcibr_soft,
+		     int error_code,
+		     ioerror_mode_t mode,
+		     ioerror_t *ioe)
+{
+    devfs_handle_t            pcibr_vhdl = pcibr_soft->bs_vhdl;
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    bridgereg_t             bus_lowaddr, bus_uppraddr;
+    int                     retval = 0;
+    int                     bufnum;
+
+    /*
+     * In case of DMA errors, bridge should have logged the
+     * address that caused the error.
+     * Look up the address, in the bridge error registers, and
+     * take appropriate action
+     */
+#ifdef colin
+    ASSERT(IOERROR_GETVALUE(ioe, widgetnum) == pcibr_soft->bs_xid);
+    ASSERT(bridge);
+#endif
+
+    /*
+     * read error log registers
+     */
+    bus_lowaddr = bridge->b_wid_resp_lower;
+    bus_uppraddr = bridge->b_wid_resp_upper;
+
+    bufnum = BRIDGE_RESP_ERRUPPR_BUFNUM(bus_uppraddr);
+    IOERROR_SETVALUE(ioe, widgetdev, 
+		     pciio_widgetdev_create(
+				    BRIDGE_RESP_ERRUPPR_DEVICE(bus_uppraddr),
+				    0));
+    IOERROR_SETVALUE(ioe, busaddr,
+		     (bus_lowaddr |
+		      ((iopaddr_t)
+		       (bus_uppraddr &
+			BRIDGE_ERRUPPR_ADDRMASK) << 32)));
+
+    /*
+     * need to ensure that the xtalk adress in ioe
+     * maps to PCI error address read from bridge.
+     * How to convert PCI address back to Xtalk address ?
+     * (better idea: convert XTalk address to PCI address
+     * and then do the compare!)
+     */
+
+    retval = pciio_error_handler(pcibr_vhdl, error_code, mode, ioe);
+    if (retval != IOERROR_HANDLED)
+#ifdef colin
+	pcibr_device_disable(pcibr_soft, 
+			     pciio_widgetdev_slot_get(
+				      IOERROR_GETVALUE(ioe,widgetdev)));
+#else
+	pcibr_device_disable(pcibr_soft,
+			     pciio_widgetdev_slot_get(-1));
+#endif
+
+    /*
+     * Re-enable bridge to interrupt on BRIDGE_IRR_RESP_BUF_GRP_CLR
+     * NOTE: Wheather we get the interrupt on BRIDGE_IRR_RESP_BUF_GRP_CLR or
+     * not is dependent on INT_ENABLE register. This write just makes sure
+     * that if the interrupt was enabled, we do get the interrupt.
+     */
+    bridge->b_int_rst_stat = BRIDGE_IRR_RESP_BUF_GRP_CLR;
+
+    /*
+     * Also, release the "bufnum" back to buffer pool that could be re-used.
+     * This is done by "disabling" the buffer for a moment, then restoring
+     * the original assignment.
+     */
+
+    {
+	reg_p                   regp;
+	bridgereg_t             regv;
+	bridgereg_t             mask;
+
+	regp = (bufnum & 1)
+	    ? &bridge->b_odd_resp
+	    : &bridge->b_even_resp;
+
+	mask = 0xF << ((bufnum >> 1) * 4);
+
+	regv = *regp;
+	*regp = regv & ~mask;
+	*regp = regv;
+    }
+
+    return retval;
+}
+
+/*
+ * pcibr_dmawr_error:
+ *      Handle a dma write error caused by a device attached to this bridge.
+ *
+ *      ioe has the widgetnum, widgetdev, and memaddr fields updated
+ *      But we don't know the PCI address that corresponds to "memaddr"
+ *      nor do we know which device driver is generating this address.
+ *
+ *      There is no easy way to find out the PCI address(es) that map
+ *      to a specific system memory address. Bus handling code is also
+ *      of not much help, since they don't keep track of the DMA mapping
+ *      that have been handed out.
+ *      So it's a dead-end at this time.
+ *
+ *      If translation is available, we could invoke the error handling
+ *      interface of the device driver.
+ */
+/*ARGSUSED */
+int
+pcibr_dmawr_error(
+		     pcibr_soft_t pcibr_soft,
+		     int error_code,
+		     ioerror_mode_t mode,
+		     ioerror_t *ioe)
+{
+    devfs_handle_t            pcibr_vhdl = pcibr_soft->bs_vhdl;
+    int                     retval;
+
+    retval = pciio_error_handler(pcibr_vhdl, error_code, mode, ioe);
+
+#ifdef IRIX
+    if (retval != IOERROR_HANDLED) {
+	pcibr_device_disable(pcibr_soft, 
+			     pciio_widgetdev_slot_get(
+				      IOERROR_GETVALUE(ioe, widgetdev)));
+
+    }
+#endif
+    return retval;
+}
+
+/*
+ * Bridge error handler.
+ *      Interface to handle all errors that involve bridge in some way.
+ *
+ *      This normally gets called from xtalk error handler.
+ *      ioe has different set of fields set depending on the error that
+ *      was encountered. So, we have a bit field indicating which of the
+ *      fields are valid.
+ *
+ * NOTE: This routine could be operating in interrupt context. So,
+ *      don't try to sleep here (till interrupt threads work!!)
+ */
+LOCAL int
+pcibr_error_handler(
+		       error_handler_arg_t einfo,
+		       int error_code,
+		       ioerror_mode_t mode,
+		       ioerror_t *ioe)
+{
+    pcibr_soft_t            pcibr_soft;
+    int                     retval = IOERROR_BADERRORCODE;
+    devfs_handle_t	    xconn_vhdl,pcibr_vhdl;
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+    error_state_t	    e_state;
+#endif
+    pcibr_soft = (pcibr_soft_t) einfo;
+
+    xconn_vhdl = pcibr_soft->bs_conn;
+    pcibr_vhdl = pcibr_soft->bs_vhdl;
+
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+    e_state = error_state_get(xconn_vhdl);
+    
+    if (error_state_set(pcibr_vhdl, e_state) == 
+	ERROR_RETURN_CODE_CANNOT_SET_STATE)
+	return(IOERROR_UNHANDLED);
+#endif
+
+    /* If we are in the action handling phase clean out the error state
+     * on the xswitch.
+     */
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+    if (e_state == ERROR_STATE_ACTION)
+	(void)error_state_set(xconn_vhdl, ERROR_STATE_NONE);
+#endif
+
+#if DEBUG && ERROR_DEBUG
+    printk("%s: pcibr_error_handler\n", pcibr_soft->bs_name);
+#endif
+
+    ASSERT(pcibr_soft != NULL);
+
+    if (error_code & IOECODE_PIO)
+	retval = pcibr_pioerror(pcibr_soft, error_code, mode, ioe);
+
+    if (error_code & IOECODE_DMA) {
+	if (error_code & IOECODE_READ) {
+	    /*
+	     * DMA read error occurs when a device attached to the bridge
+	     * tries to read some data from system memory, and this
+	     * either results in a timeout or access error.
+	     * First case is indicated by the bit "XREAD_REQ_TOUT"
+	     * and second case by "RESP_XTALK_ERROR" bit in bridge error
+	     * interrupt status register.
+	     *
+	     * pcibr_error_intr_handler would get invoked first, and it has
+	     * the responsibility of calling pcibr_error_handler with
+	     * suitable parameters.
+	     */
+
+	    retval = pcibr_dmard_error(pcibr_soft, error_code, MODE_DEVERROR, ioe);
+	}
+	if (error_code & IOECODE_WRITE) {
+	    /*
+	     * A device attached to this bridge has been generating
+	     * bad DMA writes. Find out the device attached, and
+	     * slap on it's wrist.
+	     */
+
+	    retval = pcibr_dmawr_error(pcibr_soft, error_code, MODE_DEVERROR, ioe);
+	}
+    }
+    return retval;
+
+}
+
+/*
+ * Reenable a device after handling the error.
+ * This is called by the lower layers when they wish to be reenabled
+ * after an error.
+ * Note that each layer would be calling the previous layer to reenable
+ * first, before going ahead with their own re-enabling.
+ */
+
+int
+pcibr_error_devenable(devfs_handle_t pconn_vhdl, int error_code)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+
+    ASSERT(error_code & IOECODE_PIO);
+
+    /* If the error is not known to be a write,
+     * we have to call devenable.
+     * write errors are isolated to the bridge.
+     */
+    if (!(error_code & IOECODE_WRITE)) {
+	devfs_handle_t            xconn_vhdl = pcibr_soft->bs_conn;
+	int                     rc;
+
+	rc = xtalk_error_devenable(xconn_vhdl, pciio_slot, error_code);
+	if (rc != IOERROR_HANDLED)
+	    return rc;
+    }
+    pcibr_error_cleanup(pcibr_soft, error_code);
+    return IOERROR_HANDLED;
+}
+
+/* =====================================================================
+ *    CONFIGURATION MANAGEMENT
+ */
+/*ARGSUSED */
+void
+pcibr_provider_startup(devfs_handle_t pcibr)
+{
+}
+
+/*ARGSUSED */
+void
+pcibr_provider_shutdown(devfs_handle_t pcibr)
+{
+}
+
+int
+pcibr_reset(devfs_handle_t conn)
+{
+    pciio_info_t            pciio_info = pciio_info_get(conn);
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    bridge_t               *bridge = pcibr_soft->bs_base;
+    bridgereg_t             ctlreg;
+    unsigned                cfgctl[8];
+    unsigned                s;
+    int                     f, nf;
+    pcibr_info_h            pcibr_infoh;
+    pcibr_info_t            pcibr_info;
+    int                     win;
+
+    if (pcibr_soft->bs_slot[pciio_slot].has_host) {
+	pciio_slot = pcibr_soft->bs_slot[pciio_slot].host_slot;
+	pcibr_info = pcibr_soft->bs_slot[pciio_slot].bss_infos[0];
+    }
+    if (pciio_slot < 4) {
+	s = pcibr_lock(pcibr_soft);
+	nf = pcibr_soft->bs_slot[pciio_slot].bss_ninfo;
+	pcibr_infoh = pcibr_soft->bs_slot[pciio_slot].bss_infos;
+	for (f = 0; f < nf; ++f)
+	    if (pcibr_infoh[f])
+		cfgctl[f] = bridge->b_type0_cfg_dev[pciio_slot].f[f].l[PCI_CFG_COMMAND / 4];
+
+	ctlreg = bridge->b_wid_control;
+	bridge->b_wid_control = ctlreg | BRIDGE_CTRL_RST(pciio_slot);
+	/* XXX delay? */
+	bridge->b_wid_control = ctlreg;
+	/* XXX delay? */
+
+	for (f = 0; f < nf; ++f)
+#ifdef IRIX
+	    if (pcibr_info = pcibr_infoh[f])
+#else
+	    if ((pcibr_info = pcibr_infoh[f]))
+#endif
+		for (win = 0; win < 6; ++win)
+		    if (pcibr_info->f_window[win].w_base != 0)
+			bridge->b_type0_cfg_dev[pciio_slot].f[f].l[PCI_CFG_BASE_ADDR(win) / 4] =
+			    pcibr_info->f_window[win].w_base;
+	for (f = 0; f < nf; ++f)
+	    if (pcibr_infoh[f])
+		bridge->b_type0_cfg_dev[pciio_slot].f[f].l[PCI_CFG_COMMAND / 4] = cfgctl[f];
+	pcibr_unlock(pcibr_soft, s);
+
+	return 0;
+    }
+#ifdef SUPPORT_PRINTING_V_FORMAT
+    PRINT_WARNING( "%v: pcibr_reset unimplemented for slot %d\n",
+	    conn, pciio_slot);
+#endif
+    return -1;
+}
+
+pciio_endian_t
+pcibr_endian_set(devfs_handle_t pconn_vhdl,
+		 pciio_endian_t device_end,
+		 pciio_endian_t desired_end)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    bridgereg_t             devreg;
+    unsigned                s;
+
+    /*
+     * Bridge supports hardware swapping; so we can always
+     * arrange for the caller's desired endianness.
+     */
+
+    s = pcibr_lock(pcibr_soft);
+    devreg = pcibr_soft->bs_slot[pciio_slot].bss_device;
+    if (device_end != desired_end)
+	devreg |= BRIDGE_DEV_SWAP_BITS;
+    else
+	devreg &= ~BRIDGE_DEV_SWAP_BITS;
+
+    /* NOTE- if we ever put SWAP bits
+     * onto the disabled list, we will
+     * have to change the logic here.
+     */
+    if (pcibr_soft->bs_slot[pciio_slot].bss_device != devreg) {
+	bridge_t               *bridge = pcibr_soft->bs_base;
+
+	bridge->b_device[pciio_slot].reg = devreg;
+	pcibr_soft->bs_slot[pciio_slot].bss_device = devreg;
+	bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+    }
+    pcibr_unlock(pcibr_soft, s);
+
+#if DEBUG && PCIBR_DEV_DEBUG
+    printk("pcibr Device(%d): 0x%p\n", pciio_slot, bridge->b_device[pciio_slot].reg);
+#endif
+
+    return desired_end;
+}
+
+/* This (re)sets the GBR and REALTIME bits and also keeps track of how
+ * many sets are outstanding. Reset succeeds only if the number of outstanding
+ * sets == 1.
+ */
+int
+pcibr_priority_bits_set(pcibr_soft_t pcibr_soft,
+			pciio_slot_t pciio_slot,
+			pciio_priority_t device_prio)
+{
+    int                     s;
+    int                    *counter;
+    bridgereg_t             rtbits = 0;
+    bridgereg_t             devreg;
+    int                     rc = PRIO_SUCCESS;
+
+    /* in dual-slot configurations, the host and the
+     * guest have separate DMA resources, so they
+     * have separate requirements for priority bits.
+     */
+
+    counter = &(pcibr_soft->bs_slot[pciio_slot].bss_pri_uctr);
+
+    /*
+     * Bridge supports PCI notions of LOW and HIGH priority
+     * arbitration rings via a "REAL_TIME" bit in the per-device
+     * Bridge register. The "GBR" bit controls access to the GBR
+     * ring on the xbow. These two bits are (re)set together.
+     *
+     * XXX- Bug in Rev B Bridge Si:
+     * Symptom: Prefetcher starts operating incorrectly. This happens
+     * due to corruption of the address storage ram in the prefetcher
+     * when a non-real time pci request is pulled and a real-time one is
+     * put in it's place. Workaround: Use only a single arbitration ring
+     * on pci bus. GBR and RR can still be uniquely used per
+     * device. NETLIST MERGE DONE, WILL BE FIXED IN REV C.
+     */
+
+    if (pcibr_soft->bs_rev_num != BRIDGE_PART_REV_B)
+	rtbits |= BRIDGE_DEV_RT;
+
+    /* NOTE- if we ever put DEV_RT or DEV_GBR on
+     * the disabled list, we will have to take
+     * it into account here.
+     */
+
+    s = pcibr_lock(pcibr_soft);
+    devreg = pcibr_soft->bs_slot[pciio_slot].bss_device;
+    if (device_prio == PCI_PRIO_HIGH) {
+#ifdef IRIX
+	if (++*counter == 1)
+#else
+	if ((++*counter == 1)) {
+#endif
+	    if (rtbits)
+		devreg |= rtbits;
+	    else
+		rc = PRIO_FAIL;
+#ifndef IRIX
+	}
+#endif
+    } else if (device_prio == PCI_PRIO_LOW) {
+	if (*counter <= 0)
+	    rc = PRIO_FAIL;
+	else if (--*counter == 0)
+	    if (rtbits)
+		devreg &= ~rtbits;
+    }
+    if (pcibr_soft->bs_slot[pciio_slot].bss_device != devreg) {
+	bridge_t               *bridge = pcibr_soft->bs_base;
+
+	bridge->b_device[pciio_slot].reg = devreg;
+	pcibr_soft->bs_slot[pciio_slot].bss_device = devreg;
+	bridge->b_wid_tflush;		/* wait until Bridge PIO complete */
+    }
+    pcibr_unlock(pcibr_soft, s);
+
+    return rc;
+}
+
+pciio_priority_t
+pcibr_priority_set(devfs_handle_t pconn_vhdl,
+		   pciio_priority_t device_prio)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+
+    (void) pcibr_priority_bits_set(pcibr_soft, pciio_slot, device_prio);
+
+    return device_prio;
+}
+
+/*
+ * Interfaces to allow special (e.g. SGI) drivers to set/clear
+ * Bridge-specific device flags.  Many flags are modified through
+ * PCI-generic interfaces; we don't allow them to be directly
+ * manipulated here.  Only flags that at this point seem pretty
+ * Bridge-specific can be set through these special interfaces.
+ * We may add more flags as the need arises, or remove flags and
+ * create PCI-generic interfaces as the need arises.
+ *
+ * Returns 0 on failure, 1 on success
+ */
+int
+pcibr_device_flags_set(devfs_handle_t pconn_vhdl,
+		       pcibr_device_flags_t flags)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pciio_slot_t            pciio_slot = pciio_info_slot_get(pciio_info);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+    bridgereg_t             set = 0;
+    bridgereg_t             clr = 0;
+
+    ASSERT((flags & PCIBR_DEVICE_FLAGS) == flags);
+
+    if (flags & PCIBR_WRITE_GATHER)
+	set |= BRIDGE_DEV_PMU_WRGA_EN;
+    if (flags & PCIBR_NOWRITE_GATHER)
+	clr |= BRIDGE_DEV_PMU_WRGA_EN;
+
+    if (flags & PCIBR_WRITE_GATHER)
+	set |= BRIDGE_DEV_DIR_WRGA_EN;
+    if (flags & PCIBR_NOWRITE_GATHER)
+	clr |= BRIDGE_DEV_DIR_WRGA_EN;
+
+    if (flags & PCIBR_PREFETCH)
+	set |= BRIDGE_DEV_PREF;
+    if (flags & PCIBR_NOPREFETCH)
+	clr |= BRIDGE_DEV_PREF;
+
+    if (flags & PCIBR_PRECISE)
+	set |= BRIDGE_DEV_PRECISE;
+    if (flags & PCIBR_NOPRECISE)
+	clr |= BRIDGE_DEV_PRECISE;
+
+    if (flags & PCIBR_BARRIER)
+	set |= BRIDGE_DEV_BARRIER;
+    if (flags & PCIBR_NOBARRIER)
+	clr |= BRIDGE_DEV_BARRIER;
+
+    if (flags & PCIBR_64BIT)
+	set |= BRIDGE_DEV_DEV_SIZE;
+    if (flags & PCIBR_NO64BIT)
+	clr |= BRIDGE_DEV_DEV_SIZE;
+
+    if (set || clr) {
+	bridgereg_t             devreg;
+	unsigned                s;
+
+	s = pcibr_lock(pcibr_soft);
+	devreg = pcibr_soft->bs_slot[pciio_slot].bss_device;
+#ifdef IRIX
+	devreg = devreg & ~clr | set;
+#else
+	devreg = (devreg & ~clr) | set;
+#endif
+	if (pcibr_soft->bs_slot[pciio_slot].bss_device != devreg) {
+	    bridge_t               *bridge = pcibr_soft->bs_base;
+
+	    bridge->b_device[pciio_slot].reg = devreg;
+	    pcibr_soft->bs_slot[pciio_slot].bss_device = devreg;
+	    bridge->b_wid_tflush;	/* wait until Bridge PIO complete */
+	}
+	pcibr_unlock(pcibr_soft, s);
+#if DEBUG && PCIBR_DEV_DEBUG
+	printk("pcibr Device(%d): %R\n", pciio_slot, bridge->b_device[pciio_slot].regbridge->b_device[pciio_slot].reg, device_bits);
+#endif
+    }
+    return (1);
+}
+
+#ifdef LITTLE_ENDIAN
+/*
+ * on sn-ia we need to twiddle the the addresses going out
+ * the pci bus because we use the unswizzled synergy space
+ * (the alternative is to use the swizzled synergy space
+ * and byte swap the data)
+ */
+#define	CB(b,r)	(((volatile uint8_t *) b)[((r)^4)])
+#define	CS(b,r)	(((volatile uint16_t *) b)[((r^4)/2)])
+#define	CW(b,r)	(((volatile uint32_t *) b)[((r^4)/4)])
+#else
+#define	CB(b,r)	(((volatile uint8_t *) cfgbase)[(r)^3])
+#define	CS(b,r)	(((volatile uint16_t *) cfgbase)[((r)/2)^1])
+#define	CW(b,r)	(((volatile uint32_t *) cfgbase)[(r)/4])
+#endif /* LITTLE_ENDIAN */
+
+
+LOCAL                   cfg_p
+pcibr_config_addr(devfs_handle_t conn,
+		  unsigned reg)
+{
+    pcibr_info_t            pcibr_info;
+    pciio_slot_t            pciio_slot;
+    pciio_function_t        pciio_func;
+    pcibr_soft_t            pcibr_soft;
+    bridge_t               *bridge;
+    cfg_p                   cfgbase = (cfg_p)0;
+
+    pcibr_info = pcibr_info_get(conn);
+
+    pciio_slot = pcibr_info->f_slot;
+    if (pciio_slot == PCIIO_SLOT_NONE)
+	pciio_slot = PCI_TYPE1_SLOT(reg);
+
+    pciio_func = pcibr_info->f_func;
+    if (pciio_func == PCIIO_FUNC_NONE)
+	pciio_func = PCI_TYPE1_FUNC(reg);
+
+    pcibr_soft = (pcibr_soft_t) pcibr_info->f_mfast;
+
+    if ( (pcibr_soft_t)0 != pcibr_soft ) {
+	bridge = pcibr_soft->bs_base;
+	if ( (bridge_t *)0 != bridge ) {
+		cfgbase = bridge->b_type0_cfg_dev[pciio_slot].f[pciio_func].l;
+	}
+    }
+
+
+    return cfgbase;
+}
+
+uint64_t
+pcibr_config_get(devfs_handle_t conn,
+		 unsigned reg,
+		 unsigned size)
+{
+    return do_pcibr_config_get(pcibr_config_addr(conn, reg),
+			       PCI_TYPE1_REG(reg), size);
+}
+
+LOCAL uint64_t
+do_pcibr_config_get(
+		       cfg_p cfgbase,
+		       unsigned reg,
+		       unsigned size)
+{
+    unsigned                value;
+
+   
+    value = CW(cfgbase, reg);
+
+    if (reg & 3)
+	value >>= 8 * (reg & 3);
+    if (size < 4)
+	value &= (1 << (8 * size)) - 1;
+
+    return value;
+}
+
+void
+pcibr_config_set(devfs_handle_t conn,
+		 unsigned reg,
+		 unsigned size,
+		 uint64_t value)
+{
+    do_pcibr_config_set(pcibr_config_addr(conn, reg),
+			PCI_TYPE1_REG(reg), size, value);
+}
+
+LOCAL void
+do_pcibr_config_set(cfg_p cfgbase,
+		    unsigned reg,
+		    unsigned size,
+		    uint64_t value)
+{
+    switch (size) {
+    case 1:
+	CB(cfgbase, reg) = value;
+	break;
+    case 2:
+	if (reg & 1) {
+	    CB(cfgbase, reg) = value;
+	    CB(cfgbase, reg + 1) = value >> 8;
+	} else
+	    CS(cfgbase, reg) = value;
+	break;
+    case 3:
+	if (reg & 1) {
+	    CB(cfgbase, reg) = value;
+	    CS(cfgbase, reg + 1) = value >> 8;
+	} else {
+	    CS(cfgbase, reg) = value;
+	    CB(cfgbase, reg + 2) = value >> 16;
+	}
+	break;
+
+    case 4:
+	CW(cfgbase, reg) = value;
+	break;
+    }
+}
+
+pciio_provider_t        pcibr_provider =
+{
+    (pciio_piomap_alloc_f *) pcibr_piomap_alloc,
+    (pciio_piomap_free_f *) pcibr_piomap_free,
+    (pciio_piomap_addr_f *) pcibr_piomap_addr,
+    (pciio_piomap_done_f *) pcibr_piomap_done,
+    (pciio_piotrans_addr_f *) pcibr_piotrans_addr,
+    (pciio_piospace_alloc_f *) pcibr_piospace_alloc,
+    (pciio_piospace_free_f *) pcibr_piospace_free,
+
+    (pciio_dmamap_alloc_f *) pcibr_dmamap_alloc,
+    (pciio_dmamap_free_f *) pcibr_dmamap_free,
+    (pciio_dmamap_addr_f *) pcibr_dmamap_addr,
+    (pciio_dmamap_list_f *) pcibr_dmamap_list,
+    (pciio_dmamap_done_f *) pcibr_dmamap_done,
+    (pciio_dmatrans_addr_f *) pcibr_dmatrans_addr,
+    (pciio_dmatrans_list_f *) pcibr_dmatrans_list,
+    (pciio_dmamap_drain_f *) pcibr_dmamap_drain,
+    (pciio_dmaaddr_drain_f *) pcibr_dmaaddr_drain,
+    (pciio_dmalist_drain_f *) pcibr_dmalist_drain,
+
+    (pciio_intr_alloc_f *) pcibr_intr_alloc,
+    (pciio_intr_free_f *) pcibr_intr_free,
+    (pciio_intr_connect_f *) pcibr_intr_connect,
+    (pciio_intr_disconnect_f *) pcibr_intr_disconnect,
+    (pciio_intr_cpu_get_f *) pcibr_intr_cpu_get,
+
+    (pciio_provider_startup_f *) pcibr_provider_startup,
+    (pciio_provider_shutdown_f *) pcibr_provider_shutdown,
+    (pciio_reset_f *) pcibr_reset,
+    (pciio_write_gather_flush_f *) pcibr_write_gather_flush,
+    (pciio_endian_set_f *) pcibr_endian_set,
+    (pciio_priority_set_f *) pcibr_priority_set,
+    (pciio_config_get_f *) pcibr_config_get,
+    (pciio_config_set_f *) pcibr_config_set,
+
+    (pciio_error_devenable_f *) pcibr_error_devenable,
+    (pciio_error_extract_f *) pcibr_error_extract,
+};
+
+LOCAL                   pcibr_hints_t
+pcibr_hints_get(devfs_handle_t xconn_vhdl, int alloc)
+{
+    arbitrary_info_t        ainfo = 0;
+    graph_error_t	    rv;
+    pcibr_hints_t           hint;
+
+    rv = hwgraph_info_get_LBL(xconn_vhdl, INFO_LBL_PCIBR_HINTS, &ainfo);
+
+    if (alloc && (rv != GRAPH_SUCCESS)) {
+
+	NEW(hint);
+	hint->rrb_alloc_funct = NULL;
+	hint->ph_intr_bits = NULL;
+	rv = hwgraph_info_add_LBL(xconn_vhdl, 
+				  INFO_LBL_PCIBR_HINTS, 	
+				  (arbitrary_info_t) hint);
+	if (rv != GRAPH_SUCCESS)
+	    goto abnormal_exit;
+
+	rv = hwgraph_info_get_LBL(xconn_vhdl, INFO_LBL_PCIBR_HINTS, &ainfo);
+	
+	if (rv != GRAPH_SUCCESS)
+	    goto abnormal_exit;
+
+	if (ainfo != (arbitrary_info_t) hint)
+	    goto abnormal_exit;
+    }
+    return (pcibr_hints_t) ainfo;
+
+abnormal_exit:
+#ifdef IRIX
+    printf("SHOULD NOT BE HERE\n");
+#endif
+    DEL(hint);
+    return(NULL);
+
+}
+
+void
+pcibr_hints_fix_some_rrbs(devfs_handle_t xconn_vhdl, unsigned mask)
+{
+    pcibr_hints_t           hint = pcibr_hints_get(xconn_vhdl, 1);
+
+    if (hint)
+	hint->ph_rrb_fixed = mask;
+#if DEBUG
+    else
+	printk("pcibr_hints_fix_rrbs: pcibr_hints_get failed at\n"
+		"\t%p\n", xconn_vhdl);
+#endif
+}
+
+void
+pcibr_hints_fix_rrbs(devfs_handle_t xconn_vhdl)
+{
+    pcibr_hints_fix_some_rrbs(xconn_vhdl, 0xFF);
+}
+
+void
+pcibr_hints_dualslot(devfs_handle_t xconn_vhdl,
+		     pciio_slot_t host,
+		     pciio_slot_t guest)
+{
+    pcibr_hints_t           hint = pcibr_hints_get(xconn_vhdl, 1);
+
+    if (hint)
+	hint->ph_host_slot[guest] = host + 1;
+#if DEBUG
+    else
+	printk("pcibr_hints_dualslot: pcibr_hints_get failed at\n"
+		"\t%p\n", xconn_vhdl);
+#endif
+}
+
+void
+pcibr_hints_intr_bits(devfs_handle_t xconn_vhdl,
+		      pcibr_intr_bits_f *xxx_intr_bits)
+{
+    pcibr_hints_t           hint = pcibr_hints_get(xconn_vhdl, 1);
+
+    if (hint)
+	hint->ph_intr_bits = xxx_intr_bits;
+#if DEBUG
+    else
+	printk("pcibr_hints_intr_bits: pcibr_hints_get failed at\n"
+	       "\t%p\n", xconn_vhdl);
+#endif
+}
+
+void
+pcibr_set_rrb_callback(devfs_handle_t xconn_vhdl, rrb_alloc_funct_t rrb_alloc_funct)
+{
+    pcibr_hints_t           hint = pcibr_hints_get(xconn_vhdl, 1);
+
+    if (hint)
+	hint->rrb_alloc_funct = rrb_alloc_funct;
+}
+
+void
+pcibr_hints_handsoff(devfs_handle_t xconn_vhdl)
+{
+    pcibr_hints_t           hint = pcibr_hints_get(xconn_vhdl, 1);
+
+    if (hint)
+	hint->ph_hands_off = 1;
+#if DEBUG
+    else
+	printk("pcibr_hints_handsoff: pcibr_hints_get failed at\n"
+		"\t%p\n", xconn_vhdl);
+#endif
+}
+
+void
+pcibr_hints_subdevs(devfs_handle_t xconn_vhdl,
+		    pciio_slot_t slot,
+		    uint64_t subdevs)
+{
+    arbitrary_info_t        ainfo = 0;
+    char                    sdname[16];
+    devfs_handle_t            pconn_vhdl = GRAPH_VERTEX_NONE;
+
+    sprintf(sdname, "pci/%d", slot);
+    (void) hwgraph_path_add(xconn_vhdl, sdname, &pconn_vhdl);
+    if (pconn_vhdl == GRAPH_VERTEX_NONE) {
+#if DEBUG
+	printk("pcibr_hints_subdevs: hwgraph_path_create failed at\n"
+		"\t%p (seeking %s)\n", xconn_vhdl, sdname);
+#endif
+	return;
+    }
+    hwgraph_info_get_LBL(pconn_vhdl, INFO_LBL_SUBDEVS, &ainfo);
+    if (ainfo == 0) {
+	uint64_t                *subdevp;
+
+	NEW(subdevp);
+	if (!subdevp) {
+#if DEBUG
+	    printk("pcibr_hints_subdevs: subdev ptr alloc failed at\n"
+		    "\t%p\n", pconn_vhdl);
+#endif
+	    return;
+	}
+	*subdevp = subdevs;
+	hwgraph_info_add_LBL(pconn_vhdl, INFO_LBL_SUBDEVS, (arbitrary_info_t) subdevp);
+	hwgraph_info_get_LBL(pconn_vhdl, INFO_LBL_SUBDEVS, &ainfo);
+	if (ainfo == (arbitrary_info_t) subdevp)
+	    return;
+	DEL(subdevp);
+#ifdef IRIX
+	if (ainfo == NULL)
+#else
+	if (ainfo == (arbitrary_info_t) NULL)
+#endif
+	{
+#if DEBUG
+	    printk("pcibr_hints_subdevs: null subdevs ptr at\n"
+		    "\t%p\n", pconn_vhdl);
+#endif
+	    return;
+	}
+#if DEBUG
+	printk("pcibr_subdevs_get: dup subdev add_LBL at\n"
+		"\t%p\n", pconn_vhdl);
+#endif
+    }
+    *(uint64_t *) ainfo = subdevs;
+}
+
+
+#ifdef colin
+
+#include <sys/idbg.h>
+#include <sys/idbgentry.h>
+
+char *pci_space[] = {"NONE", 
+		     "ROM",
+		     "IO",
+		     "",
+		     "MEM",
+		     "MEM32",
+		     "MEM64",
+		     "CFG",
+		     "WIN0",
+		     "WIN1",
+		     "WIN2",
+		     "WIN3",
+		     "WIN4",
+		     "WIN5",
+		     "",
+		     "BAD"};
+
+void
+idbg_pss_func(pcibr_info_h pcibr_infoh, int func)
+{
+    pcibr_info_t	pcibr_info = pcibr_infoh[func];
+    char		name[MAXDEVNAME];
+    int			win;
+    
+    if (!pcibr_info)
+	return;
+    qprintf("Per-slot Function Info\n");
+#ifdef SUPPORT_PRINTING_V_FORMAT
+    sprintf(name, "%v", pcibr_info->f_vertex);
+#endif
+    qprintf("\tSlot Name : %s\n",name);
+    qprintf("\tPCI Bus : %d ",pcibr_info->f_bus);
+    qprintf("Slot : %d ", pcibr_info->f_slot);
+    qprintf("Function : %d ", pcibr_info->f_func);
+    qprintf("VendorId : 0x%x " , pcibr_info->f_vendor);
+    qprintf("DeviceId : 0x%x\n", pcibr_info->f_device);
+#ifdef SUPPORT_PRINTING_V_FORMAT
+    sprintf(name, "%v", pcibr_info->f_master);
+#endif
+    qprintf("\tBus provider : %s\n",name);
+    qprintf("\tProvider Fns : 0x%x ", pcibr_info->f_pops);
+    qprintf("Error Handler : 0x%x Arg 0x%x\n", 
+	    pcibr_info->f_efunc,pcibr_info->f_einfo);
+    for(win = 0 ; win < 6 ; win++) 
+	qprintf("\tBase Reg #%d space %s base 0x%x size 0x%x\n",
+		win,pci_space[pcibr_info->f_window[win].w_space],
+		pcibr_info->f_window[win].w_base,
+		pcibr_info->f_window[win].w_size);
+
+    qprintf("\tRom base 0x%x size 0x%x\n", 
+	    pcibr_info->f_rbase,pcibr_info->f_rsize);
+
+    qprintf("\tInterrupt Bit Map\n");
+    qprintf("\t\tPCI Int#\tBridge Pin#\n");
+    for (win = 0 ; win < 4; win++)
+	qprintf("\t\tINT%c\t\t%d\n",win+'A',pcibr_info->f_ibit[win]);
+    qprintf("\n");
+}
+
+
+void
+idbg_pss_info(pcibr_soft_t pcibr_soft, pciio_slot_t slot)
+{
+    pcibr_soft_slot_t	pss;
+    char		slot_conn_name[MAXDEVNAME];
+    int			func;
+
+    pss = &pcibr_soft->bs_slot[slot];
+    qprintf("PCI INFRASTRUCTURAL INFO FOR SLOT %d\n", slot);
+    qprintf("\tHost Present ? %s ", pss->has_host ? "yes" : "no");
+    qprintf("\tHost Slot : %d\n",pss->host_slot);
+    sprintf(slot_conn_name, "%v", pss->slot_conn);
+    qprintf("\tSlot Conn : %s\n",slot_conn_name);	
+    qprintf("\t#Functions : %d\n",pss->bss_ninfo);
+    for (func = 0; func < pss->bss_ninfo; func++)
+	idbg_pss_func(pss->bss_infos,func);
+    qprintf("\tSpace : %s ",pci_space[pss->bss_devio.bssd_space]);
+    qprintf("\tBase : 0x%x ", pss->bss_devio.bssd_base);
+    qprintf("\tShadow Devreg : 0x%x\n", pss->bss_device);
+    qprintf("\tUsage counts : pmu %d d32 %d d64 %d\n",
+	    pss->bss_pmu_uctr,pss->bss_d32_uctr,pss->bss_d64_uctr);
+    
+    qprintf("\tDirect Trans Info : d64_base 0x%x d64_flags 0x%x"
+	    "d32_base 0x%x d32_flags 0x%x\n",
+	    pss->bss_d64_base, pss->bss_d64_flags,
+	    pss->bss_d32_base, pss->bss_d32_flags);
+    
+    qprintf("\tExt ATEs active ? %s", 
+	    pss->bss_ext_ates_active ? "yes" : "no");
+    qprintf(" Command register : 0x%x ", pss->bss_cmd_pointer);
+    qprintf(" Shadow command val : 0x%x\n", pss->bss_cmd_shadow);
+
+    qprintf("\tRRB Info : Valid %d+%d Reserved %d\n",
+	    pcibr_soft->bs_rrb_valid[slot],
+	    pcibr_soft->bs_rrb_valid[slot + PCIBR_RRB_SLOT_VIRTUAL],
+	    pcibr_soft->bs_rrb_res[slot]);
+		
+}
+
+int	ips = 0;
+
+void
+idbg_pss(pcibr_soft_t pcibr_soft)
+{
+    pciio_slot_t	slot;
+
+    
+    if (ips >= 0 && ips < 8)
+	idbg_pss_info(pcibr_soft,ips);
+    else if (ips < 0)
+	for (slot = 0; slot < 8; slot++) 
+	    idbg_pss_info(pcibr_soft,slot);
+    else
+	qprintf("Invalid ips %d\n",ips);
+}
+
+#endif /* colin */
+
+int
+pcibr_dma_enabled(devfs_handle_t pconn_vhdl)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+    pcibr_soft_t            pcibr_soft = (pcibr_soft_t) pciio_info_mfast_get(pciio_info);
+	
+
+    return xtalk_dma_enabled(pcibr_soft->bs_conn);
+}
diff --git a/arch/ia64/sn/io/pciio.c b/arch/ia64/sn/io/pciio.c
new file mode 100644
index 000000000..618fcf51c
--- /dev/null
+++ b/arch/ia64/sn/io/pciio.c
@@ -0,0 +1,1562 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#define	USRPCI	0
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/hcl_util.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/pci/bridge.h>
+#include <asm/sn/ioerror_handling.h>
+#include <asm/sn/pci/pciio.h>
+#include <asm/sn/pci/pciio_private.h>
+
+#define DEBUG_PCIIO
+#undef DEBUG_PCIIO	/* turn this on for yet more console output */
+
+
+#define NEW(ptr)	(ptr = kmalloc(sizeof (*(ptr)), GFP_KERNEL))
+#define DEL(ptr)	(kfree(ptr))
+
+char                    pciio_info_fingerprint[] = "pciio_info";
+
+cdl_p                   pciio_registry = NULL;
+
+int
+badaddr_val(volatile void *addr, int len, volatile void *ptr)
+{
+	switch (len) {
+		case 4: *(volatile u32*)ptr = *(((volatile u32*)(((u64) addr)^4)));
+		default: printk("FIXME: argh fix badaddr_val\n");
+	}
+	/* no such thing as a bad addr .... */
+	return(0);
+}
+
+
+void
+cmn_err_tag(int seqnumber, register int level, char *fmt, ...)
+{
+}
+
+nasid_t
+get_console_nasid(void)
+{
+#ifdef IRIX
+	return console_nasid;
+#else
+	return 0;
+#endif
+}
+
+int
+hub_dma_enabled(devfs_handle_t xconn_vhdl)
+{
+	return(0);
+}
+
+int
+hub_error_devenable(devfs_handle_t xconn_vhdl, int devnum, int error_code)
+{
+	return(0);
+}
+
+void
+ioerror_dump(char *name, int error_code, int error_mode, ioerror_t *ioerror)
+{
+}
+
+/******
+ ****** end hack defines ......
+ ******/
+
+
+
+
+/* =====================================================================
+ *    PCI Generic Bus Provider
+ * Implement PCI provider operations.  The pciio* layer provides a
+ * platform-independent interface for PCI devices.  This layer
+ * switches among the possible implementations of a PCI adapter.
+ */
+
+/* =====================================================================
+ *    Provider Function Location SHORTCUT
+ *
+ * On platforms with only one possible PCI provider, macros can be
+ * set up at the top that cause the table lookups and indirections to
+ * completely disappear.
+ */
+
+#if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
+/*
+ *    For the moment, we will assume that IP27
+ *      only use Bridge ASICs to provide PCI support.
+ */
+#include <asm/sn/pci/pcibr.h>
+#define DEV_FUNC(dev,func)	pcibr_##func
+#define CAST_PIOMAP(x)		((pcibr_piomap_t)(x))
+#define CAST_DMAMAP(x)		((pcibr_dmamap_t)(x))
+#define CAST_INTR(x)		((pcibr_intr_t)(x))
+#endif /* CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 */
+
+/* =====================================================================
+ *    Function Table of Contents
+ */
+
+#if !defined(DEV_FUNC)
+static pciio_provider_t *pciio_to_provider_fns(devfs_handle_t dev);
+#endif
+
+pciio_piomap_t          pciio_piomap_alloc(devfs_handle_t, device_desc_t, pciio_space_t, iopaddr_t, size_t, size_t, unsigned);
+void                    pciio_piomap_free(pciio_piomap_t);
+caddr_t                 pciio_piomap_addr(pciio_piomap_t, iopaddr_t, size_t);
+
+void                    pciio_piomap_done(pciio_piomap_t);
+caddr_t                 pciio_piotrans_addr(devfs_handle_t, device_desc_t, pciio_space_t, iopaddr_t, size_t, unsigned);
+caddr_t			pciio_pio_addr(devfs_handle_t, device_desc_t, pciio_space_t, iopaddr_t, size_t, pciio_piomap_t *, unsigned);
+
+iopaddr_t               pciio_piospace_alloc(devfs_handle_t, device_desc_t, pciio_space_t, size_t, size_t);
+void                    pciio_piospace_free(devfs_handle_t, pciio_space_t, iopaddr_t, size_t);
+
+pciio_dmamap_t          pciio_dmamap_alloc(devfs_handle_t, device_desc_t, size_t, unsigned);
+void                    pciio_dmamap_free(pciio_dmamap_t);
+iopaddr_t               pciio_dmamap_addr(pciio_dmamap_t, paddr_t, size_t);
+alenlist_t              pciio_dmamap_list(pciio_dmamap_t, alenlist_t, unsigned);
+void                    pciio_dmamap_done(pciio_dmamap_t);
+iopaddr_t               pciio_dmatrans_addr(devfs_handle_t, device_desc_t, paddr_t, size_t, unsigned);
+alenlist_t              pciio_dmatrans_list(devfs_handle_t, device_desc_t, alenlist_t, unsigned);
+void			pciio_dmamap_drain(pciio_dmamap_t);
+void			pciio_dmaaddr_drain(devfs_handle_t, paddr_t, size_t);
+void			pciio_dmalist_drain(devfs_handle_t, alenlist_t);
+iopaddr_t               pciio_dma_addr(devfs_handle_t, device_desc_t, paddr_t, size_t, pciio_dmamap_t *, unsigned);
+
+pciio_intr_t            pciio_intr_alloc(devfs_handle_t, device_desc_t, pciio_intr_line_t, devfs_handle_t);
+void                    pciio_intr_free(pciio_intr_t);
+int                     pciio_intr_connect(pciio_intr_t, intr_func_t, intr_arg_t, void *thread);
+void                    pciio_intr_disconnect(pciio_intr_t);
+devfs_handle_t            pciio_intr_cpu_get(pciio_intr_t);
+
+void			pciio_slot_func_to_name(char *, pciio_slot_t, pciio_function_t);
+static pciio_info_t     pciio_cardinfo_get(devfs_handle_t, pciio_slot_t);
+int                     pciio_error_handler(devfs_handle_t, int, ioerror_mode_t, ioerror_t *);
+int                     pciio_error_devenable(devfs_handle_t, int);
+
+void                    pciio_provider_startup(devfs_handle_t);
+void                    pciio_provider_shutdown(devfs_handle_t);
+
+pciio_endian_t          pciio_endian_set(devfs_handle_t, pciio_endian_t, pciio_endian_t);
+pciio_priority_t        pciio_priority_set(devfs_handle_t, pciio_priority_t);
+devfs_handle_t            pciio_intr_dev_get(pciio_intr_t);
+
+devfs_handle_t            pciio_pio_dev_get(pciio_piomap_t);
+pciio_slot_t            pciio_pio_slot_get(pciio_piomap_t);
+pciio_space_t           pciio_pio_space_get(pciio_piomap_t);
+iopaddr_t               pciio_pio_pciaddr_get(pciio_piomap_t);
+ulong                   pciio_pio_mapsz_get(pciio_piomap_t);
+caddr_t                 pciio_pio_kvaddr_get(pciio_piomap_t);
+
+devfs_handle_t            pciio_dma_dev_get(pciio_dmamap_t);
+pciio_slot_t            pciio_dma_slot_get(pciio_dmamap_t);
+
+pciio_info_t            pciio_info_chk(devfs_handle_t);
+pciio_info_t            pciio_info_get(devfs_handle_t);
+void                    pciio_info_set(devfs_handle_t, pciio_info_t);
+devfs_handle_t            pciio_info_dev_get(pciio_info_t);
+pciio_slot_t            pciio_info_slot_get(pciio_info_t);
+pciio_function_t        pciio_info_function_get(pciio_info_t);
+pciio_vendor_id_t       pciio_info_vendor_id_get(pciio_info_t);
+pciio_device_id_t       pciio_info_device_id_get(pciio_info_t);
+devfs_handle_t            pciio_info_master_get(pciio_info_t);
+arbitrary_info_t        pciio_info_mfast_get(pciio_info_t);
+pciio_provider_t       *pciio_info_pops_get(pciio_info_t);
+error_handler_f	       *pciio_info_efunc_get(pciio_info_t);
+error_handler_arg_t    *pciio_info_einfo_get(pciio_info_t);
+pciio_space_t		pciio_info_bar_space_get(pciio_info_t, int);
+iopaddr_t		pciio_info_bar_base_get(pciio_info_t, int);
+size_t			pciio_info_bar_size_get(pciio_info_t, int);
+iopaddr_t		pciio_info_rom_base_get(pciio_info_t);
+size_t			pciio_info_rom_size_get(pciio_info_t);
+
+void                    pciio_init(void);
+int                     pciio_attach(devfs_handle_t);
+
+void                    pciio_provider_register(devfs_handle_t, pciio_provider_t *pciio_fns);
+void                    pciio_provider_unregister(devfs_handle_t);
+pciio_provider_t       *pciio_provider_fns_get(devfs_handle_t);
+
+int                     pciio_driver_register(pciio_vendor_id_t, pciio_device_id_t, char *driver_prefix, unsigned);
+void                    pciio_driver_unregister(char *driver_prefix);
+
+devfs_handle_t            pciio_device_register(devfs_handle_t, devfs_handle_t, pciio_slot_t, pciio_function_t, pciio_vendor_id_t, pciio_device_id_t);
+
+void			pciio_device_unregister(devfs_handle_t);
+pciio_info_t		pciio_device_info_new(pciio_info_t, devfs_handle_t, pciio_slot_t, pciio_function_t, pciio_vendor_id_t, pciio_device_id_t);
+void			pciio_device_info_free(pciio_info_t);
+devfs_handle_t		pciio_device_info_register(devfs_handle_t, pciio_info_t);
+void			pciio_device_info_unregister(devfs_handle_t, pciio_info_t);
+int                     pciio_device_attach(devfs_handle_t);
+int			pciio_device_detach(devfs_handle_t);
+void                    pciio_error_register(devfs_handle_t, error_handler_f *, error_handler_arg_t);
+
+int                     pciio_reset(devfs_handle_t);
+int                     pciio_write_gather_flush(devfs_handle_t);
+int                     pciio_slot_inuse(devfs_handle_t);
+
+/* =====================================================================
+ *    Provider Function Location
+ *
+ *      If there is more than one possible provider for
+ *      this platform, we need to examine the master
+ *      vertex of the current vertex for a provider
+ *      function structure, and indirect through the
+ *      appropriately named member.
+ */
+
+#if !defined(DEV_FUNC)
+
+static pciio_provider_t *
+pciio_to_provider_fns(devfs_handle_t dev)
+{
+    pciio_info_t            card_info;
+    pciio_provider_t       *provider_fns;
+
+    card_info = pciio_info_get(dev);
+    ASSERT(card_info != NULL);
+
+    provider_fns = pciio_info_pops_get(card_info);
+    ASSERT(provider_fns != NULL);
+
+    return (provider_fns);
+}
+
+#define DEV_FUNC(dev,func)	pciio_to_provider_fns(dev)->func
+#define CAST_PIOMAP(x)		((pciio_piomap_t)(x))
+#define CAST_DMAMAP(x)		((pciio_dmamap_t)(x))
+#define CAST_INTR(x)		((pciio_intr_t)(x))
+#endif
+
+/*
+ * Many functions are not passed their vertex
+ * information directly; rather, they must
+ * dive through a resource map. These macros
+ * are available to coordinate this detail.
+ */
+#define PIOMAP_FUNC(map,func)		DEV_FUNC((map)->pp_dev,func)
+#define DMAMAP_FUNC(map,func)		DEV_FUNC((map)->pd_dev,func)
+#define INTR_FUNC(intr_hdl,func)	DEV_FUNC((intr_hdl)->pi_dev,func)
+
+/* =====================================================================
+ *          PIO MANAGEMENT
+ *
+ *      For mapping system virtual address space to
+ *      pciio space on a specified card
+ */
+
+pciio_piomap_t
+pciio_piomap_alloc(devfs_handle_t dev,	/* set up mapping for this device */
+		   device_desc_t dev_desc,	/* device descriptor */
+		   pciio_space_t space,	/* CFG, MEM, IO, or a device-decoded window */
+		   iopaddr_t addr,	/* lowest address (or offset in window) */
+		   size_t byte_count,	/* size of region containing our mappings */
+		   size_t byte_count_max,	/* maximum size of a mapping */
+		   unsigned flags)
+{					/* defined in sys/pio.h */
+    return (pciio_piomap_t) DEV_FUNC(dev, piomap_alloc)
+	(dev, dev_desc, space, addr, byte_count, byte_count_max, flags);
+}
+
+void
+pciio_piomap_free(pciio_piomap_t pciio_piomap)
+{
+    PIOMAP_FUNC(pciio_piomap, piomap_free)
+	(CAST_PIOMAP(pciio_piomap));
+}
+
+caddr_t
+pciio_piomap_addr(pciio_piomap_t pciio_piomap,	/* mapping resources */
+		  iopaddr_t pciio_addr,	/* map for this pciio address */
+		  size_t byte_count)
+{					/* map this many bytes */
+    pciio_piomap->pp_kvaddr = PIOMAP_FUNC(pciio_piomap, piomap_addr)
+	(CAST_PIOMAP(pciio_piomap), pciio_addr, byte_count);
+
+    return pciio_piomap->pp_kvaddr;
+}
+
+void
+pciio_piomap_done(pciio_piomap_t pciio_piomap)
+{
+    PIOMAP_FUNC(pciio_piomap, piomap_done)
+	(CAST_PIOMAP(pciio_piomap));
+}
+
+caddr_t
+pciio_piotrans_addr(devfs_handle_t dev,	/* translate for this device */
+		    device_desc_t dev_desc,	/* device descriptor */
+		    pciio_space_t space,	/* CFG, MEM, IO, or a device-decoded window */
+		    iopaddr_t addr,	/* starting address (or offset in window) */
+		    size_t byte_count,	/* map this many bytes */
+		    unsigned flags)
+{					/* (currently unused) */
+    return DEV_FUNC(dev, piotrans_addr)
+	(dev, dev_desc, space, addr, byte_count, flags);
+}
+
+caddr_t
+pciio_pio_addr(devfs_handle_t dev,	/* translate for this device */
+	       device_desc_t dev_desc,	/* device descriptor */
+	       pciio_space_t space,	/* CFG, MEM, IO, or a device-decoded window */
+	       iopaddr_t addr,		/* starting address (or offset in window) */
+	       size_t byte_count,	/* map this many bytes */
+	       pciio_piomap_t *mapp,	/* where to return the map pointer */
+	       unsigned flags)
+{					/* PIO flags */
+    pciio_piomap_t          map = 0;
+    int			    errfree = 0;
+    caddr_t                 res;
+
+    if (mapp) {
+	map = *mapp;			/* possible pre-allocated map */
+	*mapp = 0;			/* record "no map used" */
+    }
+
+    res = pciio_piotrans_addr
+	(dev, dev_desc, space, addr, byte_count, flags);
+    if (res)
+	return res;			/* pciio_piotrans worked */
+
+    if (!map) {
+	map = pciio_piomap_alloc
+	    (dev, dev_desc, space, addr, byte_count, byte_count, flags);
+	if (!map)
+	    return res;			/* pciio_piomap_alloc failed */
+	errfree = 1;
+    }
+
+    res = pciio_piomap_addr
+	(map, addr, byte_count);
+    if (!res) {
+	if (errfree)
+	    pciio_piomap_free(map);
+	return res;			/* pciio_piomap_addr failed */
+    }
+    if (mapp)
+	*mapp = map;			/* pass back map used */
+
+    return res;				/* pciio_piomap_addr succeeded */
+}
+
+iopaddr_t
+pciio_piospace_alloc(devfs_handle_t dev,	/* Device requiring space */
+		     device_desc_t dev_desc,	/* Device descriptor */
+		     pciio_space_t space,	/* MEM32/MEM64/IO */
+		     size_t byte_count,	/* Size of mapping */
+		     size_t align)
+{					/* Alignment needed */
+    if (align < NBPP)
+	align = NBPP;
+    return DEV_FUNC(dev, piospace_alloc)
+	(dev, dev_desc, space, byte_count, align);
+}
+
+void
+pciio_piospace_free(devfs_handle_t dev,	/* Device freeing space */
+		    pciio_space_t space,	/* Type of space        */
+		    iopaddr_t pciaddr,	/* starting address */
+		    size_t byte_count)
+{					/* Range of address   */
+    DEV_FUNC(dev, piospace_free)
+	(dev, space, pciaddr, byte_count);
+}
+
+/* =====================================================================
+ *          DMA MANAGEMENT
+ *
+ *      For mapping from pci space to system
+ *      physical space.
+ */
+
+pciio_dmamap_t
+pciio_dmamap_alloc(devfs_handle_t dev,	/* set up mappings for this device */
+		   device_desc_t dev_desc,	/* device descriptor */
+		   size_t byte_count_max,	/* max size of a mapping */
+		   unsigned flags)
+{					/* defined in dma.h */
+    return (pciio_dmamap_t) DEV_FUNC(dev, dmamap_alloc)
+	(dev, dev_desc, byte_count_max, flags);
+}
+
+void
+pciio_dmamap_free(pciio_dmamap_t pciio_dmamap)
+{
+    DMAMAP_FUNC(pciio_dmamap, dmamap_free)
+	(CAST_DMAMAP(pciio_dmamap));
+}
+
+iopaddr_t
+pciio_dmamap_addr(pciio_dmamap_t pciio_dmamap,	/* use these mapping resources */
+		  paddr_t paddr,	/* map for this address */
+		  size_t byte_count)
+{					/* map this many bytes */
+    return DMAMAP_FUNC(pciio_dmamap, dmamap_addr)
+	(CAST_DMAMAP(pciio_dmamap), paddr, byte_count);
+}
+
+alenlist_t
+pciio_dmamap_list(pciio_dmamap_t pciio_dmamap,	/* use these mapping resources */
+		  alenlist_t alenlist,	/* map this Address/Length List */
+		  unsigned flags)
+{
+    return DMAMAP_FUNC(pciio_dmamap, dmamap_list)
+	(CAST_DMAMAP(pciio_dmamap), alenlist, flags);
+}
+
+void
+pciio_dmamap_done(pciio_dmamap_t pciio_dmamap)
+{
+    DMAMAP_FUNC(pciio_dmamap, dmamap_done)
+	(CAST_DMAMAP(pciio_dmamap));
+}
+
+iopaddr_t
+pciio_dmatrans_addr(devfs_handle_t dev,	/* translate for this device */
+		    device_desc_t dev_desc,	/* device descriptor */
+		    paddr_t paddr,	/* system physical address */
+		    size_t byte_count,	/* length */
+		    unsigned flags)
+{					/* defined in dma.h */
+    return DEV_FUNC(dev, dmatrans_addr)
+	(dev, dev_desc, paddr, byte_count, flags);
+}
+
+alenlist_t
+pciio_dmatrans_list(devfs_handle_t dev,	/* translate for this device */
+		    device_desc_t dev_desc,	/* device descriptor */
+		    alenlist_t palenlist,	/* system address/length list */
+		    unsigned flags)
+{					/* defined in dma.h */
+    return DEV_FUNC(dev, dmatrans_list)
+	(dev, dev_desc, palenlist, flags);
+}
+
+iopaddr_t
+pciio_dma_addr(devfs_handle_t dev,	/* translate for this device */
+	       device_desc_t dev_desc,	/* device descriptor */
+	       paddr_t paddr,		/* system physical address */
+	       size_t byte_count,	/* length */
+	       pciio_dmamap_t *mapp,	/* map to use, then map we used */
+	       unsigned flags)
+{					/* PIO flags */
+    pciio_dmamap_t          map = 0;
+    int			    errfree = 0;
+    iopaddr_t               res;
+
+    if (mapp) {
+	map = *mapp;			/* possible pre-allocated map */
+	*mapp = 0;			/* record "no map used" */
+    }
+
+    res = pciio_dmatrans_addr
+	(dev, dev_desc, paddr, byte_count, flags);
+    if (res)
+	return res;			/* pciio_dmatrans worked */
+
+    if (!map) {
+	map = pciio_dmamap_alloc
+	    (dev, dev_desc, byte_count, flags);
+	if (!map)
+	    return res;			/* pciio_dmamap_alloc failed */
+	errfree = 1;
+    }
+
+    res = pciio_dmamap_addr
+	(map, paddr, byte_count);
+    if (!res) {
+	if (errfree)
+	    pciio_dmamap_free(map);
+	return res;			/* pciio_dmamap_addr failed */
+    }
+    if (mapp)
+	*mapp = map;			/* pass back map used */
+
+    return res;				/* pciio_dmamap_addr succeeded */
+}
+
+void
+pciio_dmamap_drain(pciio_dmamap_t map)
+{
+    DMAMAP_FUNC(map, dmamap_drain)
+	(CAST_DMAMAP(map));
+}
+
+void
+pciio_dmaaddr_drain(devfs_handle_t dev, paddr_t addr, size_t size)
+{
+    DEV_FUNC(dev, dmaaddr_drain)
+	(dev, addr, size);
+}
+
+void
+pciio_dmalist_drain(devfs_handle_t dev, alenlist_t list)
+{
+    DEV_FUNC(dev, dmalist_drain)
+	(dev, list);
+}
+
+/* =====================================================================
+ *          INTERRUPT MANAGEMENT
+ *
+ *      Allow crosstalk devices to establish interrupts
+ */
+
+/*
+ * Allocate resources required for an interrupt as specified in intr_desc.
+ * Return resource handle in intr_hdl.
+ */
+pciio_intr_t
+pciio_intr_alloc(devfs_handle_t dev,	/* which Crosstalk device */
+		 device_desc_t dev_desc,	/* device descriptor */
+		 pciio_intr_line_t lines,	/* INTR line(s) to attach */
+		 devfs_handle_t owner_dev)
+{					/* owner of this interrupt */
+    return (pciio_intr_t) DEV_FUNC(dev, intr_alloc)
+	(dev, dev_desc, lines, owner_dev);
+}
+
+/*
+ * Free resources consumed by intr_alloc.
+ */
+void
+pciio_intr_free(pciio_intr_t intr_hdl)
+{
+    INTR_FUNC(intr_hdl, intr_free)
+	(CAST_INTR(intr_hdl));
+}
+
+/*
+ * Associate resources allocated with a previous pciio_intr_alloc call with the
+ * described handler, arg, name, etc.
+ *
+ * Returns 0 on success, returns <0 on failure.
+ */
+int
+pciio_intr_connect(pciio_intr_t intr_hdl,	/* pciio intr resource handle */
+		   intr_func_t intr_func,	/* pciio intr handler */
+		   intr_arg_t intr_arg,	/* arg to intr handler */
+		   void *thread)
+{					/* intr thread to use */
+    return INTR_FUNC(intr_hdl, intr_connect)
+	(CAST_INTR(intr_hdl), intr_func, intr_arg, thread);
+}
+
+/*
+ * Disassociate handler with the specified interrupt.
+ */
+void
+pciio_intr_disconnect(pciio_intr_t intr_hdl)
+{
+    INTR_FUNC(intr_hdl, intr_disconnect)
+	(CAST_INTR(intr_hdl));
+}
+
+/*
+ * Return a hwgraph vertex that represents the CPU currently
+ * targeted by an interrupt.
+ */
+devfs_handle_t
+pciio_intr_cpu_get(pciio_intr_t intr_hdl)
+{
+    return INTR_FUNC(intr_hdl, intr_cpu_get)
+	(CAST_INTR(intr_hdl));
+}
+
+/* =====================================================================
+ *          ERROR MANAGEMENT
+ */
+
+void
+pciio_slot_func_to_name(char		       *name,
+			pciio_slot_t		slot,
+			pciio_function_t	func)
+{
+    /*
+     * standard connection points:
+     *
+     * PCIIO_SLOT_NONE:	.../pci/direct
+     * PCIIO_FUNC_NONE: .../pci/<SLOT>			ie. .../pci/3
+     * multifunction:   .../pci/<SLOT><FUNC>		ie. .../pci/3c
+     */
+
+    if (slot == PCIIO_SLOT_NONE)
+	sprintf(name, "direct");
+    else if (func == PCIIO_FUNC_NONE)
+	sprintf(name, "%d", slot);
+    else
+	sprintf(name, "%d%c", slot, 'a'+func);
+}
+
+/*
+ * pciio_cardinfo_get
+ *
+ * Get the pciio info structure corresponding to the
+ * specified PCI "slot" (we like it when the same index
+ * number is used for the PCI IDSEL, the REQ/GNT pair,
+ * and the interrupt line being used for INTA. We like
+ * it so much we call it the slot number).
+ */
+static pciio_info_t
+pciio_cardinfo_get(
+		      devfs_handle_t pciio_vhdl,
+		      pciio_slot_t pci_slot)
+{
+    char                    namebuf[16];
+    pciio_info_t	    info = 0;
+    devfs_handle_t	    conn;
+
+    pciio_slot_func_to_name(namebuf, pci_slot, PCIIO_FUNC_NONE);
+    if (GRAPH_SUCCESS ==
+	hwgraph_traverse(pciio_vhdl, namebuf, &conn)) {
+	info = pciio_info_chk(conn);
+	hwgraph_vertex_unref(conn);
+    }
+
+    return info;
+}
+
+/*
+ * pciio_error_handler:
+ * dispatch an error to the appropriate
+ * pciio connection point, or process
+ * it as a generic pci error.
+ * Yes, the first parameter is the
+ * provider vertex at the middle of
+ * the bus; we get to the pciio connect
+ * point using the ioerror widgetdev field.
+ *
+ * This function is called by the
+ * specific PCI provider, after it has figured
+ * out where on the PCI bus (including which slot,
+ * if it can tell) the error came from.
+ */
+/*ARGSUSED */
+int
+pciio_error_handler(
+		       devfs_handle_t pciio_vhdl,
+		       int error_code,
+		       ioerror_mode_t mode,
+		       ioerror_t *ioerror)
+{
+    pciio_info_t            pciio_info;
+    devfs_handle_t            pconn_vhdl;
+#if USRPCI
+    devfs_handle_t            usrpci_v;
+#endif
+    pciio_slot_t            slot;
+
+    int                     retval;
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+    error_state_t	    e_state;
+#endif
+
+#ifdef IRIX
+#if DEBUG && ERROR_DEBUG
+    cmn_err(CE_CONT, "%v: pciio_error_handler\n", pciio_vhdl);
+#endif
+#endif
+
+    IOERR_PRINTF(cmn_err(CE_NOTE,
+			 "%v: PCI Bus Error: Error code: %d Error mode: %d\n",
+			 pciio_vhdl, error_code, mode));
+
+    /* If there is an error handler sitting on
+     * the "no-slot" connection point, give it
+     * first crack at the error. NOTE: it is
+     * quite possible that this function may
+     * do further refining of the ioerror.
+     */
+    pciio_info = pciio_cardinfo_get(pciio_vhdl, PCIIO_SLOT_NONE);
+    if (pciio_info && pciio_info->c_efunc) {
+	pconn_vhdl = pciio_info_dev_get(pciio_info);
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+	e_state = error_state_get(pciio_vhdl);
+
+	if (e_state == ERROR_STATE_ACTION)
+	    (void)error_state_set(pciio_vhdl, ERROR_STATE_NONE);
+
+	if (error_state_set(pconn_vhdl,e_state) ==
+	    ERROR_RETURN_CODE_CANNOT_SET_STATE)
+	    return(IOERROR_UNHANDLED);
+#endif
+	retval = pciio_info->c_efunc
+	    (pciio_info->c_einfo, error_code, mode, ioerror);
+	if (retval != IOERROR_UNHANDLED)
+	    return retval;
+    }
+
+    /* Is the error associated with a particular slot?
+     */
+    if (IOERROR_FIELDVALID(ioerror, widgetdev)) {
+	/*
+	 * NOTE : 
+	 * widgetdev is a 4byte value encoded as slot in the higher order
+	 * 2 bytes and function in the lower order 2 bytes.
+	 */
+#ifdef IRIX
+	slot = pciio_widgetdev_slot_get(IOERROR_GETVALUE(ioerror, widgetdev));
+#else
+	slot = 0;
+#endif
+
+	/* If this slot has an error handler,
+	 * deliver the error to it.
+	 */
+	pciio_info = pciio_cardinfo_get(pciio_vhdl, slot);
+	if (pciio_info != NULL) {
+	    if (pciio_info->c_efunc != NULL) {
+
+		pconn_vhdl = pciio_info_dev_get(pciio_info);
+#if defined(CONFIG_SGI_IO_ERROR_HANDLING)
+		e_state = error_state_get(pciio_vhdl);
+
+		if (e_state == ERROR_STATE_ACTION)
+		    (void)error_state_set(pciio_vhdl, ERROR_STATE_NONE);
+
+		if (error_state_set(pconn_vhdl,e_state) ==
+		    ERROR_RETURN_CODE_CANNOT_SET_STATE)
+		    return(IOERROR_UNHANDLED);
+#endif
+		retval = pciio_info->c_efunc
+		    (pciio_info->c_einfo, error_code, mode, ioerror);
+		if (retval != IOERROR_UNHANDLED)
+		    return retval;
+	    }
+
+#if USRPCI
+	    /* If the USRPCI driver is available and
+	     * knows about this connection point,
+	     * deliver the error to it.
+	     *
+	     * OK to use pconn_vhdl here, even though we
+	     * have already UNREF'd it, since we know that
+	     * it is not going away.
+	     */
+	    pconn_vhdl = pciio_info_dev_get(pciio_info);
+	    if (GRAPH_SUCCESS ==
+		hwgraph_traverse(pconn_vhdl, EDGE_LBL_USRPCI, &usrpci_v)) {
+		retval = usrpci_error_handler
+		    (usrpci_v, error_code, IOERROR_GETVALUE(ioerror, busaddr));
+		hwgraph_vertex_unref(usrpci_v);
+		if (retval != IOERROR_UNHANDLED) {
+		    /*
+		     * This unref is not needed.  If this code is called often enough,
+		     * the system will crash, due to vertex reference count reaching 0,
+		     * causing vertex to be unallocated.  -jeremy
+		     * hwgraph_vertex_unref(pconn_vhdl);
+		     */
+		    return retval;
+		}
+	    }
+#endif
+	}
+    }
+
+    return (mode == MODE_DEVPROBE)
+	? IOERROR_HANDLED	/* probes are OK */
+	: IOERROR_UNHANDLED;	/* otherwise, foo! */
+}
+
+int
+pciio_error_devenable(devfs_handle_t pconn_vhdl, int error_code)
+{
+    return DEV_FUNC(pconn_vhdl, error_devenable)
+	(pconn_vhdl, error_code);
+    /* no cleanup specific to this layer. */
+}
+
+/* =====================================================================
+ *          CONFIGURATION MANAGEMENT
+ */
+
+/*
+ * Startup a crosstalk provider
+ */
+void
+pciio_provider_startup(devfs_handle_t pciio_provider)
+{
+    DEV_FUNC(pciio_provider, provider_startup)
+	(pciio_provider);
+}
+
+/*
+ * Shutdown a crosstalk provider
+ */
+void
+pciio_provider_shutdown(devfs_handle_t pciio_provider)
+{
+    DEV_FUNC(pciio_provider, provider_shutdown)
+	(pciio_provider);
+}
+
+/*
+ * Specify endianness constraints.  The driver tells us what the device
+ * does and how it would like to see things in memory.  We reply with
+ * how things will actually appear in memory.
+ */
+pciio_endian_t
+pciio_endian_set(devfs_handle_t dev,
+		 pciio_endian_t device_end,
+		 pciio_endian_t desired_end)
+{
+    ASSERT((device_end == PCIDMA_ENDIAN_BIG) || (device_end == PCIDMA_ENDIAN_LITTLE));
+    ASSERT((desired_end == PCIDMA_ENDIAN_BIG) || (desired_end == PCIDMA_ENDIAN_LITTLE));
+
+#if DEBUG
+    cmn_err(CE_ALERT,
+	    "%v: pciio_endian_set is going away.\n"
+	    "\tplease use PCIIO_BYTE_STREAM or PCIIO_WORD_VALUES in your\n"
+	    "\tpciio_dmamap_alloc and pciio_dmatrans calls instead.\n",
+	    dev);
+#endif
+
+    return DEV_FUNC(dev, endian_set)
+	(dev, device_end, desired_end);
+}
+
+/*
+ * Specify PCI arbitration priority.
+ */
+pciio_priority_t
+pciio_priority_set(devfs_handle_t dev,
+		   pciio_priority_t device_prio)
+{
+    ASSERT((device_prio == PCI_PRIO_HIGH) || (device_prio == PCI_PRIO_LOW));
+
+    return DEV_FUNC(dev, priority_set)
+	(dev, device_prio);
+}
+
+/*
+ * Read value of configuration register
+ */
+uint64_t
+pciio_config_get(devfs_handle_t	dev,
+		 unsigned	reg,
+		 unsigned	size)
+{
+    uint64_t	value = 0;
+    unsigned	shift = 0;
+
+    /* handle accesses that cross words here,
+     * since that's common code between all
+     * possible providers.
+     */
+    while (size > 0) {
+	unsigned	biw = 4 - (reg&3);
+	if (biw > size)
+	    biw = size;
+
+	value |= DEV_FUNC(dev, config_get)
+	    (dev, reg, biw) << shift;
+
+	shift += 8*biw;
+	reg += biw;
+	size -= biw;
+    }
+    return value;
+}
+
+/*
+ * Change value of configuration register
+ */
+void
+pciio_config_set(devfs_handle_t	dev,
+		 unsigned	reg,
+		 unsigned	size,
+		 uint64_t	value)
+{
+    /* handle accesses that cross words here,
+     * since that's common code between all
+     * possible providers.
+     */
+    while (size > 0) {
+	unsigned	biw = 4 - (reg&3);
+	if (biw > size)
+	    biw = size;
+	    
+	DEV_FUNC(dev, config_set)
+	    (dev, reg, biw, value);
+	reg += biw;
+	size -= biw;
+	value >>= biw * 8;
+    }
+}
+
+/* =====================================================================
+ *          GENERIC PCI SUPPORT FUNCTIONS
+ */
+pciio_slot_t
+pciio_error_extract(devfs_handle_t 	dev,
+		   pciio_space_t 	*space,
+		   iopaddr_t		*offset)
+{
+	ASSERT(dev != NODEV);
+	return DEV_FUNC(dev,error_extract)(dev,space,offset);
+}
+
+/*
+ * Issue a hardware reset to a card.
+ */
+int
+pciio_reset(devfs_handle_t dev)
+{
+    return DEV_FUNC(dev, reset) (dev);
+}
+
+/*
+ * flush write gather buffers
+ */
+int
+pciio_write_gather_flush(devfs_handle_t dev)
+{
+    return DEV_FUNC(dev, write_gather_flush) (dev);
+}
+
+devfs_handle_t
+pciio_intr_dev_get(pciio_intr_t pciio_intr)
+{
+    return (pciio_intr->pi_dev);
+}
+
+/****** Generic crosstalk pio interfaces ******/
+devfs_handle_t
+pciio_pio_dev_get(pciio_piomap_t pciio_piomap)
+{
+    return (pciio_piomap->pp_dev);
+}
+
+pciio_slot_t
+pciio_pio_slot_get(pciio_piomap_t pciio_piomap)
+{
+    return (pciio_piomap->pp_slot);
+}
+
+pciio_space_t
+pciio_pio_space_get(pciio_piomap_t pciio_piomap)
+{
+    return (pciio_piomap->pp_space);
+}
+
+iopaddr_t
+pciio_pio_pciaddr_get(pciio_piomap_t pciio_piomap)
+{
+    return (pciio_piomap->pp_pciaddr);
+}
+
+ulong
+pciio_pio_mapsz_get(pciio_piomap_t pciio_piomap)
+{
+    return (pciio_piomap->pp_mapsz);
+}
+
+caddr_t
+pciio_pio_kvaddr_get(pciio_piomap_t pciio_piomap)
+{
+    return (pciio_piomap->pp_kvaddr);
+}
+
+/****** Generic crosstalk dma interfaces ******/
+devfs_handle_t
+pciio_dma_dev_get(pciio_dmamap_t pciio_dmamap)
+{
+    return (pciio_dmamap->pd_dev);
+}
+
+pciio_slot_t
+pciio_dma_slot_get(pciio_dmamap_t pciio_dmamap)
+{
+    return (pciio_dmamap->pd_slot);
+}
+
+/****** Generic pci slot information interfaces ******/
+
+pciio_info_t
+pciio_info_chk(devfs_handle_t pciio)
+{
+    arbitrary_info_t        ainfo = 0;
+
+    hwgraph_info_get_LBL(pciio, INFO_LBL_PCIIO, &ainfo);
+    return (pciio_info_t) ainfo;
+}
+
+pciio_info_t
+pciio_info_get(devfs_handle_t pciio)
+{
+    pciio_info_t            pciio_info;
+
+    pciio_info = (pciio_info_t) hwgraph_fastinfo_get(pciio);
+
+#ifdef DEBUG_PCIIO
+    {
+	int pos;
+	char dname[256];
+	pos = devfs_generate_path(pciio, dname, 256);
+	printk("%s : path= %s\n", __FUNCTION__, &dname[pos]);
+    }
+#endif /* DEBUG_PCIIO */
+
+#ifdef BRINGUP
+    if ((pciio_info != NULL) &&
+	(pciio_info->c_fingerprint != pciio_info_fingerprint)
+	&& (pciio_info->c_fingerprint != NULL)) {
+#else
+    if ((pciio_info != NULL) &&
+	(pciio_info->c_fingerprint != pciio_info_fingerprint)) {
+#endif /* BRINGUP */
+
+	printk("pciio_info_get: Found fastinfo 0x%p but wrong fingerprint %s\n", pciio_info,
+	pciio_info->c_fingerprint);
+	return((pciio_info_t)-1); /* Should panic .. */
+    }
+	
+
+    return pciio_info;
+}
+
+void
+pciio_info_set(devfs_handle_t pciio, pciio_info_t pciio_info)
+{
+    if (pciio_info != NULL)
+	pciio_info->c_fingerprint = pciio_info_fingerprint;
+    hwgraph_fastinfo_set(pciio, (arbitrary_info_t) pciio_info);
+
+    /* Also, mark this vertex as a PCI slot
+     * and use the pciio_info, so pciio_info_chk
+     * can work (and be fairly efficient).
+     */
+    hwgraph_info_add_LBL(pciio, INFO_LBL_PCIIO,
+			 (arbitrary_info_t) pciio_info);
+}
+
+devfs_handle_t
+pciio_info_dev_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_vertex);
+}
+
+/*ARGSUSED*/
+pciio_bus_t
+pciio_info_bus_get(pciio_info_t pciio_info)
+{
+    /* XXX for now O2 always gets back bus 0 */
+    return (pciio_bus_t)0;
+}
+
+pciio_slot_t
+pciio_info_slot_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_slot);
+}
+
+pciio_function_t
+pciio_info_function_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_func);
+}
+
+pciio_vendor_id_t
+pciio_info_vendor_id_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_vendor);
+}
+
+pciio_device_id_t
+pciio_info_device_id_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_device);
+}
+
+devfs_handle_t
+pciio_info_master_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_master);
+}
+
+arbitrary_info_t
+pciio_info_mfast_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_mfast);
+}
+
+pciio_provider_t       *
+pciio_info_pops_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_pops);
+}
+
+error_handler_f	       *
+pciio_info_efunc_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_efunc);
+}
+
+error_handler_arg_t    *
+pciio_info_einfo_get(pciio_info_t pciio_info)
+{
+    return (pciio_info->c_einfo);
+}
+
+pciio_space_t
+pciio_info_bar_space_get(pciio_info_t info, int win)
+{
+    return info->c_window[win].w_space;
+}
+
+iopaddr_t
+pciio_info_bar_base_get(pciio_info_t info, int win)
+{
+    return info->c_window[win].w_base;
+}
+
+size_t
+pciio_info_bar_size_get(pciio_info_t info, int win)
+{
+    return info->c_window[win].w_size;
+}
+
+iopaddr_t
+pciio_info_rom_base_get(pciio_info_t info)
+{
+    return info->c_rbase;
+}
+
+size_t
+pciio_info_rom_size_get(pciio_info_t info)
+{
+    return info->c_rsize;
+}
+
+
+/* =====================================================================
+ *          GENERIC PCI INITIALIZATION FUNCTIONS
+ */
+
+/*
+ *    pciioinit: called once during device driver
+ *      initializtion if this driver is configured into
+ *      the system.
+ */
+void
+pciio_init(void)
+{
+    cdl_p                   cp;
+
+#if DEBUG && ATTACH_DEBUG
+    printf("pciio_init\n");
+#endif
+    /* Allocate the registry.
+     * We might already have one.
+     * If we don't, go get one.
+     * MPness: someone might have
+     * set one up for us while we
+     * were not looking; use an atomic
+     * compare-and-swap to commit to
+     * using the new registry if and
+     * only if nobody else did first.
+     * If someone did get there first,
+     * toss the one we allocated back
+     * into the pool.
+     */
+    if (pciio_registry == NULL) {
+	cp = cdl_new(EDGE_LBL_PCI, "vendor", "device");
+	if (!compare_and_swap_ptr((void **) &pciio_registry, NULL, (void *) cp)) {
+	    cdl_del(cp);
+	}
+    }
+    ASSERT(pciio_registry != NULL);
+}
+
+/*
+ *    pciioattach: called for each vertex in the graph
+ *      that is a PCI provider.
+ */
+/*ARGSUSED */
+int
+pciio_attach(devfs_handle_t pciio)
+{
+#if DEBUG && ATTACH_DEBUG
+    cmn_err(CE_CONT, "%v: pciio_attach\n", pciio);
+#endif
+    return 0;
+}
+
+/*
+ * Associate a set of pciio_provider functions with a vertex.
+ */
+void
+pciio_provider_register(devfs_handle_t provider, pciio_provider_t *pciio_fns)
+{
+    hwgraph_info_add_LBL(provider, INFO_LBL_PFUNCS, (arbitrary_info_t) pciio_fns);
+}
+
+/*
+ * Disassociate a set of pciio_provider functions with a vertex.
+ */
+void
+pciio_provider_unregister(devfs_handle_t provider)
+{
+    arbitrary_info_t        ainfo;
+
+#ifdef IRIX
+    hwgraph_info_remove_LBL(provider, INFO_LBL_PFUNCS, &ainfo);
+#else
+    hwgraph_info_remove_LBL(provider, INFO_LBL_PFUNCS, (long *) &ainfo);
+#endif
+}
+
+/*
+ * Obtain a pointer to the pciio_provider functions for a specified Crosstalk
+ * provider.
+ */
+pciio_provider_t       *
+pciio_provider_fns_get(devfs_handle_t provider)
+{
+    arbitrary_info_t        ainfo = 0;
+
+    (void) hwgraph_info_get_LBL(provider, INFO_LBL_PFUNCS, &ainfo);
+    return (pciio_provider_t *) ainfo;
+}
+
+/*ARGSUSED4 */
+int
+pciio_driver_register(
+			 pciio_vendor_id_t vendor_id,
+			 pciio_device_id_t device_id,
+			 char *driver_prefix,
+			 unsigned flags)
+{
+    /* a driver's init routine might call
+     * pciio_driver_register before the
+     * system calls pciio_init; so we
+     * make the init call ourselves here.
+     */
+    if (pciio_registry == NULL)
+	pciio_init();
+
+    return cdl_add_driver(pciio_registry,
+			  vendor_id, device_id,
+			  driver_prefix, flags);
+}
+
+/*
+ * Remove an initialization function.
+ */
+void
+pciio_driver_unregister(
+			   char *driver_prefix)
+{
+    /* before a driver calls unregister,
+     * it must have called register; so
+     * we can assume we have a registry here.
+     */
+    ASSERT(pciio_registry != NULL);
+
+    cdl_del_driver(pciio_registry, driver_prefix);
+}
+
+/*
+ * Call some function with each vertex that
+ * might be one of this driver's attach points.
+ */
+void
+pciio_iterate(char *driver_prefix,
+	      pciio_iter_f * func)
+{
+    /* a driver's init routine might call
+     * pciio_iterate before the
+     * system calls pciio_init; so we
+     * make the init call ourselves here.
+     */
+    if (pciio_registry == NULL)
+	pciio_init();
+
+    ASSERT(pciio_registry != NULL);
+
+    cdl_iterate(pciio_registry, driver_prefix, (cdl_iter_f *) func);
+}
+
+devfs_handle_t
+pciio_device_register(
+		devfs_handle_t connectpt,	/* vertex for /hw/.../pciio/%d */
+		devfs_handle_t master,	/* card's master ASIC (PCI provider) */
+		pciio_slot_t slot,	/* card's slot */
+		pciio_function_t func,	/* card's func */
+		pciio_vendor_id_t vendor_id,
+		pciio_device_id_t device_id)
+{
+
+    return pciio_device_info_register
+	(connectpt, pciio_device_info_new (NULL, master, slot, func,
+					   vendor_id, device_id));
+}
+
+void
+pciio_device_unregister(devfs_handle_t pconn)
+{
+    DEV_FUNC(pconn,device_unregister)(pconn);
+}
+
+pciio_info_t
+pciio_device_info_new(
+		pciio_info_t pciio_info,
+		devfs_handle_t master,
+		pciio_slot_t slot,
+		pciio_function_t func,
+		pciio_vendor_id_t vendor_id,
+		pciio_device_id_t device_id)
+{
+    if (!pciio_info)
+	NEW(pciio_info);
+    ASSERT(pciio_info != NULL);
+
+    pciio_info->c_slot = slot;
+    pciio_info->c_func = func;
+    pciio_info->c_vendor = vendor_id;
+    pciio_info->c_device = device_id;
+    pciio_info->c_master = master;
+    pciio_info->c_mfast = hwgraph_fastinfo_get(master);
+    pciio_info->c_pops = pciio_provider_fns_get(master);
+    pciio_info->c_efunc = 0;
+    pciio_info->c_einfo = 0;
+
+    return pciio_info;
+}
+
+void
+pciio_device_info_free(pciio_info_t pciio_info)
+{
+    /* NOTE : pciio_info is a structure within the pcibr_info
+     *	      and not a pointer to memory allocated on the heap !!
+     */
+    BZERO((char *)pciio_info,sizeof(pciio_info));
+}
+
+devfs_handle_t
+pciio_device_info_register(
+		devfs_handle_t connectpt,		/* vertex at center of bus */
+		pciio_info_t pciio_info)	/* details about the connectpt */
+{
+    char		name[32];
+    devfs_handle_t	pconn;
+
+    pciio_slot_func_to_name(name,
+			    pciio_info->c_slot,
+			    pciio_info->c_func);
+
+    printk("pciio_device_info_register: connectpt 0x%p, pciio_info 0x%p\n", connectpt, pciio_info);
+
+    if (GRAPH_SUCCESS !=
+	hwgraph_path_add(connectpt, name, &pconn))
+	return pconn;
+
+    pciio_info->c_vertex = pconn;
+    pciio_info_set(pconn, pciio_info);
+#ifdef BRINGUP
+    {
+	int pos;
+	char dname[256];
+	pos = devfs_generate_path(pconn, dname, 256);
+	printk("%s : pconn path= %s \n", __FUNCTION__, &dname[pos]);
+    }
+#endif /* BRINGUP */
+
+    /*
+     * create link to our pci provider
+     */
+
+    device_master_set(pconn, pciio_info->c_master);
+
+#if USRPCI
+    /*
+     * Call into usrpci provider to let it initialize for
+     * the given slot.
+     */
+    if (pciio_info->c_slot != PCIIO_SLOT_NONE)
+	usrpci_device_register(pconn, pciio_info->c_master, pciio_info->c_slot);
+#endif
+
+    return pconn;
+}
+
+void
+pciio_device_info_unregister(devfs_handle_t connectpt,
+			     pciio_info_t pciio_info)
+{
+    char		name[32];
+    devfs_handle_t	pconn;
+
+    if (!pciio_info)
+	return;
+
+    pciio_slot_func_to_name(name,
+			    pciio_info->c_slot,
+			    pciio_info->c_func);
+
+    hwgraph_edge_remove(connectpt,name,&pconn);
+    pciio_info_set(pconn,0);
+
+    /* Remove the link to our pci provider */
+    hwgraph_edge_remove(pconn, EDGE_LBL_MASTER, NULL);
+
+    hwgraph_vertex_unref(pconn);
+    hwgraph_vertex_destroy(pconn);
+    
+}
+/* Add the pci card inventory information to the hwgraph
+ */
+static void
+pciio_device_inventory_add(devfs_handle_t pconn_vhdl)
+{
+    pciio_info_t	pciio_info = pciio_info_get(pconn_vhdl);
+
+    ASSERT(pciio_info);
+    ASSERT(pciio_info->c_vertex == pconn_vhdl);
+
+    /* Donot add inventory  for non-existent devices */
+    if ((pciio_info->c_vendor == PCIIO_VENDOR_ID_NONE)	||
+	(pciio_info->c_device == PCIIO_DEVICE_ID_NONE))
+	return;
+    device_inventory_add(pconn_vhdl,INV_IOBD,INV_PCIADAP,
+			 pciio_info->c_vendor,pciio_info->c_device,
+			 pciio_info->c_slot);
+}
+
+static void
+pciio_device_inventory_remove(devfs_handle_t pconn_vhdl)
+{
+#ifdef IRIX
+    hwgraph_inventory_remove(pconn_vhdl,-1,-1,-1,-1,-1);
+#endif
+}
+
+/*ARGSUSED */
+int
+pciio_device_attach(devfs_handle_t pconn)
+{
+    pciio_info_t            pciio_info;
+    pciio_vendor_id_t       vendor_id;
+    pciio_device_id_t       device_id;
+
+    pciio_device_inventory_add(pconn);
+    pciio_info = pciio_info_get(pconn);
+
+    vendor_id = pciio_info->c_vendor;
+    device_id = pciio_info->c_device;
+
+    printk("pciio_device_attach: Function 0x%p, vendor 0x%x, device_id %x\n", pconn, vendor_id, device_id);
+
+    /* we don't start attaching things until
+     * all the driver init routines (including
+     * pciio_init) have been called; so we
+     * can assume here that we have a registry.
+     */
+    ASSERT(pciio_registry != NULL);
+
+    return(cdl_add_connpt(pciio_registry, vendor_id, device_id, pconn));
+
+}
+
+int
+pciio_device_detach(devfs_handle_t pconn)
+{
+    pciio_info_t            pciio_info;
+    pciio_vendor_id_t       vendor_id;
+    pciio_device_id_t       device_id;
+
+    pciio_device_inventory_remove(pconn);
+    pciio_info = pciio_info_get(pconn);
+
+    vendor_id = pciio_info->c_vendor;
+    device_id = pciio_info->c_device;
+
+    /* we don't start attaching things until
+     * all the driver init routines (including
+     * pciio_init) have been called; so we
+     * can assume here that we have a registry.
+     */
+    ASSERT(pciio_registry != NULL);
+
+    cdl_del_connpt(pciio_registry, vendor_id, device_id, pconn);
+
+    return(0);
+    
+}
+
+/*
+ * pciio_error_register:
+ * arrange for a function to be called with
+ * a specified first parameter plus other
+ * information when an error is encountered
+ * and traced to the pci slot corresponding
+ * to the connection point pconn.
+ *
+ * may also be called with a null function
+ * pointer to "unregister" the error handler.
+ *
+ * NOTE: subsequent calls silently overwrite
+ * previous data for this vertex. We assume that
+ * cooperating drivers, well, cooperate ...
+ */
+void
+pciio_error_register(devfs_handle_t pconn,
+		     error_handler_f *efunc,
+		     error_handler_arg_t einfo)
+{
+    pciio_info_t            pciio_info;
+
+    pciio_info = pciio_info_get(pconn);
+    ASSERT(pciio_info != NULL);
+    pciio_info->c_efunc = efunc;
+    pciio_info->c_einfo = einfo;
+}
+
+/*
+ * Check if any device has been found in this slot, and return
+ * true or false
+ * vhdl is the vertex for the slot
+ */
+int
+pciio_slot_inuse(devfs_handle_t pconn_vhdl)
+{
+    pciio_info_t            pciio_info = pciio_info_get(pconn_vhdl);
+
+    ASSERT(pciio_info);
+    ASSERT(pciio_info->c_vertex == pconn_vhdl);
+    if (pciio_info->c_vendor) {
+	/*
+	 * Non-zero value for vendor indicate
+	 * a board being found in this slot.
+	 */
+	return 1;
+    }
+    return 0;
+}
+
+int
+pciio_dma_enabled(devfs_handle_t pconn_vhdl)
+{
+	return DEV_FUNC(pconn_vhdl, dma_enabled)(pconn_vhdl);
+}
diff --git a/arch/ia64/sn/io/sgi_if.c b/arch/ia64/sn/io/sgi_if.c
new file mode 100644
index 000000000..6ab200fb7
--- /dev/null
+++ b/arch/ia64/sn/io/sgi_if.c
@@ -0,0 +1,72 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/pci/bridge.h>
+#include <asm/sn/ioerror_handling.h>
+#include <asm/sn/pci/pciio.h>
+#include <asm/sn/slotnum.h>
+
+#define spinlock_init(x,name) mutex_init(x, MUTEX_DEFAULT, name);
+
+void *
+kmem_zalloc(size_t size, int flag)
+{
+        void *ptr = kmalloc(size, GFP_KERNEL);
+        BZERO(ptr, size);
+        return ptr;
+}
+
+#define xtod(c)         ((c) <= '9' ? '0' - (c) : 'a' - (c) - 10)
+long
+atoi(register char *p)
+{
+        register long n;
+        register int c, neg = 0;
+
+        if (p == NULL)
+                return 0;
+
+        if (!isdigit(c = *p)) {
+                while (isspace(c))
+                        c = *++p;
+                switch (c) {
+                case '-':
+                        neg++;
+                case '+': /* fall-through */
+                        c = *++p;
+                }
+                if (!isdigit(c))
+                        return (0);
+        }
+        if (c == '0' && *(p + 1) == 'x') {
+                p += 2;
+                c = *p;
+                n = xtod(c);
+                while ((c = *++p) && isxdigit(c)) {
+                        n *= 16; /* two steps to avoid unnecessary overflow */
+                        n += xtod(c); /* accum neg to avoid surprises at MAX */
+                }
+        } else {
+                n = '0' - c;
+                while ((c = *++p) && isdigit(c)) {
+                        n *= 10; /* two steps to avoid unnecessary overflow */
+                        n += '0' - c; /* accum neg to avoid surprises at MAX */
+                }
+        }
+        return (neg ? n : -n);
+}
diff --git a/arch/ia64/sn/io/sgi_io_init.c b/arch/ia64/sn/io/sgi_io_init.c
new file mode 100644
index 000000000..87ab61b62
--- /dev/null
+++ b/arch/ia64/sn/io/sgi_io_init.c
@@ -0,0 +1,312 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/agent.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/synergy.h>
+#include <linux/smp.h>
+
+extern void mlreset(int );
+extern int init_hcl(void);
+extern void klgraph_hack_init(void);
+extern void per_hub_init(cnodeid_t);
+extern void hubspc_init(void);
+extern void pciba_init(void);
+extern void pciio_init(void);
+extern void pcibr_init(void);
+extern void xtalk_init(void);
+extern void xbow_init(void);
+extern void xbmon_init(void);
+extern void pciiox_init(void);
+extern void usrpci_init(void);
+extern void ioc3_init(void);
+extern void initialize_io(void);
+extern void init_platform_nodepda(nodepda_t *, cnodeid_t );
+extern void intr_clear_all(nasid_t);
+extern void klhwg_add_all_modules(devfs_handle_t);
+extern void klhwg_add_all_nodes(devfs_handle_t);
+
+void sn_mp_setup(void);
+extern devfs_handle_t hwgraph_root;
+extern void io_module_init(void);
+extern cnodeid_t nasid_to_compact_node[];
+extern void pci_bus_cvlink_init(void);
+extern void temp_hack(void);
+extern void init_platform_pda(cpuid_t cpu);
+
+extern int pci_bus_to_hcl_cvlink(void);
+extern synergy_da_t	*Synergy_da_indr[];
+
+#define DEBUG_IO_INIT
+#ifdef DEBUG_IO_INIT
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif /* DEBUG_IO_INIT */
+
+/*
+ * kern/ml/csu.s calls mlsetup
+ *   mlsetup calls mlreset(master) - kern/os/startup.c
+ *   j main
+ *
+ 
+ * SN/slave.s start_slave_loop calls slave_entry
+ * SN/slave.s slave_entry calls slave_loop
+ * SN/slave.s slave_loop calls bootstrap
+ * bootstrap in SN1/SN1asm.s calls cboot
+ * cboot calls mlreset(slave) - ml/SN/mp.c
+ *
+ * sgi_io_infrastructure_init() gets called right before pci_init() 
+ * in Linux mainline.  This routine actually mirrors the IO Infrastructure 
+ * call sequence in IRIX, ofcourse, nicely modified for Linux.
+ *
+ * It is very IMPORTANT that this call is only made by the Master CPU!
+ *
+ */
+
+void
+sgi_master_io_infr_init(void)
+{
+#ifdef Colin
+	/*
+	 * Simulate Big Window 0.
+	 * Only when we build for lutsen etc. ..
+	 */
+	simulated_BW0_init();
+#endif
+
+	/*
+	 * Do any early init stuff .. einit_tbl[] etc.
+	 */
+	DBG("--> sgi_master_io_infr_init: calling init_hcl().\n");
+	init_hcl(); /* Sets up the hwgraph compatibility layer with devfs */
+
+	/*
+	 * initialize the Linux PCI to xwidget vertexes ..
+	 */
+	DBG("--> sgi_master_io_infr_init: calling pci_bus_cvlink_init().\n");
+	pci_bus_cvlink_init();
+
+	/*
+	 * Hack to provide statically initialzed klgraph entries.
+	 */
+	DBG("--> sgi_master_io_infr_init: calling klgraph_hack_init()\n");
+	klgraph_hack_init();
+
+	/*
+	 * This is the Master CPU.  Emulate mlsetup and main.c in Irix.
+	 */
+	DBG("--> sgi_master_io_infr_init: calling mlreset(0).\n");
+	mlreset(0); /* Master .. */
+
+	/*
+	 * allowboot() is called by kern/os/main.c in main()
+	 * Emulate allowboot() ...
+	 *   per_cpu_init() - only need per_hub_init()
+	 *   cpu_io_setup() - Nothing to do.
+	 * 
+	 */
+	DBG("--> sgi_master_io_infr_init: calling sn_mp_setup().\n");
+	sn_mp_setup();
+
+	DBG("--> sgi_master_io_infr_init: calling per_hub_init(0).\n");
+	per_hub_init(0); /* Need to get and send in actual cnode number */
+
+	/* We can do headless hub cnodes here .. */
+
+	/*
+	 * io_init[] stuff.
+	 *
+	 * Get SGI IO Infrastructure drivers to init and register with 
+	 * each other etc.
+	 */
+
+	DBG("--> sgi_master_io_infr_init: calling hubspc_init()\n");
+	hubspc_init();
+
+	DBG("--> sgi_master_io_infr_init: calling pciba_init()\n");
+	pciba_init();
+
+	DBG("--> sgi_master_io_infr_init: calling pciio_init()\n");
+	pciio_init();
+
+	DBG("--> sgi_master_io_infr_init: calling pcibr_init()\n");
+	pcibr_init();
+
+	DBG("--> sgi_master_io_infr_init: calling xtalk_init()\n");
+	xtalk_init();
+
+	DBG("--> sgi_master_io_infr_init: calling xbow_init()\n");
+	xbow_init();
+
+	DBG("--> sgi_master_io_infr_init: calling xbmon_init()\n");
+	xbmon_init();
+
+	DBG("--> sgi_master_io_infr_init: calling pciiox_init()\n");
+	pciiox_init();
+
+	DBG("--> sgi_master_io_infr_init: calling usrpci_init()\n");
+	usrpci_init();
+
+	DBG("--> sgi_master_io_infr_init: calling ioc3_init()\n");
+	ioc3_init();
+
+	/*
+	 *
+	 * Our IO Infrastructure drivers are in place .. 
+	 * Initialize the whole IO Infrastructure .. xwidget/device probes.
+	 *
+	 */
+	DBG("--> sgi_master_io_infr_init: Start Probe and IO Initialization\n");
+	initialize_io();
+
+	DBG("--> sgi_master_io_infr_init: Setting up SGI IO Links for Linux PCI\n");
+	pci_bus_to_hcl_cvlink();
+
+	DBG("--> Leave sgi_master_io_infr_init: DONE setting up SGI Links for PCI\n");
+}
+
+/*
+ * sgi_slave_io_infr_init - This routine must be called on all cpus except 
+ * the Master CPU.
+ */
+void
+sgi_slave_io_infr_init(void)
+{
+	/* Emulate cboot() .. */
+	mlreset(1); /* This is a slave cpu */
+
+	per_hub_init(0); /* Need to get and send in actual cnode number */
+
+	/* Done */
+}
+
+/*
+ * One-time setup for MP SN.
+ * Allocate per-node data, slurp prom klconfig information and
+ * convert it to hwgraph information.
+ */
+void
+sn_mp_setup(void)
+{
+	cnodeid_t	cnode;
+	extern int	maxnodes;
+	cpuid_t		cpu;
+
+	DBG("sn_mp_setup: Entered.\n");
+	/*
+	 * NODEPDA(x) Macro depends on nodepda
+	 * subnodepda is also statically set to calias space which we 
+	 * do not currently support yet .. just a hack for now.
+	 */
+#ifdef NUMA_BASE
+	DBG("sn_mp_setup(): maxnodes= %d  numnodes= %d\n", maxnodes,numnodes);
+        maxnodes = numnodes;
+#ifdef SIMULATED_KLGRAPH
+	maxnodes = 1;
+	numnodes = 1;
+#endif /* SIMULATED_KLGRAPH */
+        printk("sn_mp_setup(): Allocating backing store for *Nodepdaindr[%2d] \n",
+                maxnodes);
+
+        /*
+         * Initialize Nodpdaindr and per-node nodepdaindr array
+         */
+        *Nodepdaindr = (nodepda_t *) kmalloc(sizeof(nodepda_t *)*numnodes, GFP_KERNEL);
+        for (cnode=0; cnode<maxnodes; cnode++) {
+            Nodepdaindr[cnode] = (nodepda_t *) kmalloc(sizeof(struct nodepda_s),
+                                                                GFP_KERNEL);
+	    Synergy_da_indr[cnode * 2] = (synergy_da_t *) kmalloc(
+		sizeof(synergy_da_t), GFP_KERNEL);
+	    Synergy_da_indr[(cnode * 2) + 1] = (synergy_da_t *) kmalloc(
+		sizeof(synergy_da_t), GFP_KERNEL);
+            Nodepdaindr[cnode]->pernode_pdaindr = Nodepdaindr;
+            subnodepda = &Nodepdaindr[cnode]->snpda[cnode];
+        }
+        nodepda = Nodepdaindr[0];
+#else
+        Nodepdaindr = (nodepda_t *) kmalloc(sizeof(struct nodepda_s), GFP_KERNEL);
+        nodepda = Nodepdaindr[0];
+        subnodepda = &Nodepdaindr[0]->snpda[0];
+
+#endif /* NUMA_BASE */
+
+	/*
+	 * Before we let the other processors run, set up the platform specific
+	 * stuff in the nodepda.
+	 *
+	 * ???? maxnodes set in mlreset .. who sets it now ????
+	 * ???? cpu_node_probe() called in mlreset to set up the following:
+	 *      compact_to_nasid_node[] - cnode id gives nasid
+	 *      nasid_to_compact_node[] - nasid gives cnode id
+	 *
+	 *	do_cpumask() sets the following:
+	 *      cpuid_to_compact_node[] - cpuid gives cnode id
+	 *
+	 *      nasid comes from gdap->g_nasidtable[]
+	 *      ml/SN/promif.c
+	 */
+
+	for (cnode = 0; cnode < maxnodes; cnode++) {
+		/*
+		 * Set up platform-dependent nodepda fields.
+		 * The following routine actually sets up the hubinfo struct
+		 * in nodepda.
+		 */
+		DBG("sn_mp_io_setup: calling init_platform_nodepda(%2d)\n",cnode);
+		init_platform_nodepda(Nodepdaindr[cnode], cnode);
+
+		/*
+		 * This routine clears the Hub's Interrupt registers.
+		 */
+#ifndef CONFIG_IA64_SGI_IO
+		/*
+		 * We need to move this intr_clear_all() routine 
+		 * from SN/intr.c to a more appropriate file.
+		 * Talk to Al Mayer.
+		 */
+                intr_clear_all(COMPACT_TO_NASID_NODEID(cnode));
+#endif
+	}
+
+#ifdef CONFIG_IA64_SGI_IO
+	for (cpu = 0; cpu < smp_num_cpus; cpu++) {
+		/* Skip holes in CPU space */
+		if (cpu_enabled(cpu)) {
+			init_platform_pda(cpu);
+		}
+	}
+#endif
+
+	/*
+	 * Initialize platform-dependent vertices in the hwgraph:
+	 *	module
+	 *	node
+	 *	cpu
+	 *	memory
+	 *	slot
+	 *	hub
+	 *	router
+	 *	xbow
+	 */
+
+	DBG("sn_mp_io_setup: calling io_module_init()\n");
+	io_module_init(); /* Use to be called module_init() .. */
+
+	DBG("sn_mp_setup: calling klhwg_add_all_modules()\n");
+	klhwg_add_all_modules(hwgraph_root);
+	DBG("sn_mp_setup: calling klhwg_add_all_nodes()\n");
+	klhwg_add_all_nodes(hwgraph_root);
+}
diff --git a/arch/ia64/sn/io/sgi_io_sim.c b/arch/ia64/sn/io/sgi_io_sim.c
new file mode 100644
index 000000000..41ce62d51
--- /dev/null
+++ b/arch/ia64/sn/io/sgi_io_sim.c
@@ -0,0 +1,161 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/agent.h>
+#include <asm/sn/klconfig.h>
+#include <asm/sn/module.h>
+#include <asm/sn/nic.h>
+#include <asm/sn/sn_private.h>
+#include <asm/sn/synergy.h>
+
+cnodeid_t nasid_to_compact_node[MAX_NASIDS];
+nasid_t compact_to_nasid_node[MAX_COMPACT_NODES];
+cnodeid_t cpuid_to_compact_node[MAXCPUS];
+cpuid_t         master_procid = 0;
+int maxnodes;
+char arg_maxnodes[4];
+
+nodepda_t       *Nodepdaindr[MAX_COMPACT_NODES];
+nodepda_t        *nodepda;
+subnode_pda_t    *subnodepda;
+
+synergy_da_t	*Synergy_da_indr[MAX_COMPACT_NODES * 2];
+
+extern void init_all_devices(void);
+
+
+/*
+ * Return non-zero if the given variable was specified
+ */
+int
+is_specified(char *s)
+{
+        return (strlen(s) != 0);
+}
+
+
+void pciba_init(void)
+{
+	FIXME("pciba_init : no-op\n");
+}
+
+void xbmon_init(void)
+{
+	FIXME("xbmon_init : no-op\n");
+
+}
+
+void pciiox_init(void)
+{
+	FIXME("pciiox_init : no-op\n");
+
+}
+
+void usrpci_init(void)
+{
+	FIXME("usrpci_init : no-op\n");
+
+}
+
+void ioc3_init(void)
+{
+	FIXME("ioc3_init : no-op\n");
+
+}
+
+void initialize_io(void)
+{
+
+	init_all_devices();
+}
+
+/*
+ * Routines provided by ml/SN/promif.c.
+ */
+static __psunsigned_t master_bridge_base = (__psunsigned_t)NULL;
+static nasid_t console_nasid;
+static char console_wid;
+static char console_pcislot;
+
+void
+set_master_bridge_base(void)
+{
+
+#ifdef SIMULATED_KLGRAPH
+	printk("set_master_bridge_base: SIMULATED_KLGRAPH FIXME hardwired master.\n");
+	console_nasid = 0;
+	console_wid = 0x8;
+	console_pcislot = 0x2;
+#else
+        console_nasid = KL_CONFIG_CH_CONS_INFO(master_nasid)->nasid;
+        console_wid = WIDGETID_GET(KL_CONFIG_CH_CONS_INFO(master_nasid)->memory_base);
+        console_pcislot = KL_CONFIG_CH_CONS_INFO(master_nasid)->npci;
+#endif /* SIMULATED_KLGRAPH */
+
+        master_bridge_base = (__psunsigned_t)NODE_SWIN_BASE(console_nasid,
+                                                            console_wid);
+}
+
+int
+check_nasid_equiv(nasid_t nasida, nasid_t nasidb)
+{
+        if ((nasida == nasidb) ||
+            (nasida == NODEPDA(NASID_TO_COMPACT_NODEID(nasidb))->xbow_peer))
+                return 1;
+        else
+                return 0;
+}
+
+int
+is_master_nasid_widget(nasid_t test_nasid, xwidgetnum_t test_wid)
+{
+
+        /*
+         * If the widget numbers are different, we're not the master.
+         */
+        if (test_wid != (xwidgetnum_t)console_wid)
+                return 0;
+
+        /*
+         * If the NASIDs are the same or equivalent, we're the master.
+         */
+        if (check_nasid_equiv(test_nasid, console_nasid)) {
+                return 1;
+        } else {
+                return 0;
+        }
+}
+
+cnodeid_t
+nasid_to_compact_nodeid(nasid_t nasid)
+{
+        ASSERT(nasid >= 0 && nasid < MAX_NASIDS);
+        return nasid_to_compact_node[nasid];
+}
+
+nasid_t
+compact_to_nasid_nodeid(cnodeid_t cnode)
+{
+        ASSERT(cnode >= 0 && cnode <= MAX_COMPACT_NODES);
+        ASSERT(compact_to_nasid_node[cnode] >= 0);
+        return compact_to_nasid_node[cnode];
+}
+
+/*
+ * Routines provided by ml/SN/nvram.c
+ */
+void
+nvram_baseinit(void)
+{
+	FIXME("nvram_baseinit : no-op\n");
+
+}
diff --git a/arch/ia64/sn/io/stubs.c b/arch/ia64/sn/io/stubs.c
new file mode 100644
index 000000000..3dacf2fe5
--- /dev/null
+++ b/arch/ia64/sn/io/stubs.c
@@ -0,0 +1,256 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/pci/bridge.h>
+#include <asm/sn/ioerror_handling.h>
+#include <asm/sn/pci/pciio.h>
+#include <asm/sn/slotnum.h>
+#include <asm/sn/vector.h>
+
+/******
+ ****** hack defines ......
+ ******/
+
+int pcibr_prefetch_enable_rev, pcibr_wg_enable_rev;
+int default_intr_pri;
+int force_fire_and_forget;
+int ignore_conveyor_override;
+
+#define spinlock_init(x,name) mutex_init(x, MUTEX_DEFAULT, name);
+
+devfs_handle_t dummy_vrtx;	/* Needed for cpuid_to_vertex() in hack.h */
+
+
+/* ARGSUSED */
+void hub_widgetdev_enable(devfs_handle_t xconn_vhdl, int devnum)
+        {FIXME("hub_widgetdev_enable");}
+
+/* ARGSUSED */
+void hub_widgetdev_shutdown(devfs_handle_t xconn_vhdl, int devnum)
+        {FIXME("hub_widgetdev_shutdown");}
+
+/* ARGSUSED */
+void hub_widget_reset(devfs_handle_t hubv, xwidgetnum_t widget)
+        {FIXME("hub_widget_reset");}
+
+boolean_t
+is_sys_critical_vertex(devfs_handle_t x)
+{
+	FIXME("is_sys_critical_vertex : returns 0");
+	return(0);
+}
+
+char *
+nic_bridge_vertex_info(devfs_handle_t v, nic_data_t mcr)
+{
+	FIXME("nic_bridge_vertex_info : returns NULL");
+	return((char *)0);
+}
+
+void *
+kmem_alloc_node(register size_t size, register int flags, cnodeid_t node)
+{
+        /* Allocates on node 'node' */
+	FIXME("kmem_alloc_node : use kmalloc");
+	return(kmalloc(size, GFP_KERNEL));
+}
+
+void *
+kmem_zalloc_node(register size_t size, register int flags, cnodeid_t node)
+{
+	FIXME("kmem_zalloc_node : use kmalloc");
+	return(kmalloc(size, GFP_KERNEL));
+}
+
+void
+kmem_free(void *where, int size)
+{
+	FIXME("kmem_free : use kfree");
+	return(kfree(where));
+}
+
+
+void *
+kmem_zone_alloc(register zone_t *zone, int flags)
+{
+	FIXME("kmem_zone_alloc : return null");
+	return((void *)0);
+}
+
+void
+kmem_zone_free(register zone_t *zone, void *ptr)
+{
+	FIXME("kmem_zone_free : no-op");
+}
+
+zone_t *
+kmem_zone_init(register int size, char *zone_name)
+{
+	FIXME("kmem_zone_free : returns NULL");
+	return((zone_t *)0);
+}
+
+uint64_t
+rmalloc(struct map *mp, size_t size)
+{
+	FIXME("rmalloc : returns NULL");
+	return((uint64_t)0);
+}
+
+void
+rmfree(struct map *mp, size_t size, uint64_t a)
+{
+	FIXME("rmfree : no-op");
+}
+
+struct map *
+rmallocmap(uint64_t mapsiz)
+{
+	FIXME("rmallocmap : returns NULL");
+	return((struct map *)0);
+}
+
+void
+rmfreemap(struct map *mp)
+{
+	FIXME("rmfreemap : no-op");
+}
+
+int
+compare_and_swap_ptr(void **location, void *old_ptr, void *new_ptr)
+{
+	FIXME("compare_and_swap_ptr : NOT ATOMIC");
+	if (*location == old_ptr) {
+		*location = new_ptr;
+		return(1);
+	}
+	else
+		return(0);
+}
+
+void *
+swap_ptr(void **loc, void *new)
+{
+	FIXME("swap_ptr : returns null");
+	return((void *)0);
+}
+
+/* For ml/SN/SN1/slots.c */
+/* ARGSUSED */
+slotid_t get_widget_slotnum(int xbow, int widget)
+        {FIXME("get_widget_slotnum"); return (unsigned char)NULL;}
+
+/* For router */
+int
+router_init(cnodeid_t cnode,int writeid, void *npda_rip)
+        {FIXME("router_init"); return(0);}
+
+/* From io/ioerror_handling.c */
+error_return_code_t
+sys_critical_graph_vertex_add(devfs_handle_t parent, devfs_handle_t child)
+	{FIXME("sys_critical_graph_vertex_add"); return(0);}
+
+/* From io/ioc3.c */
+devfs_handle_t
+ioc3_console_vhdl_get(void)
+	{FIXME("ioc3_console_vhdl_get"); return( (devfs_handle_t)-1);}
+
+
+#if 0
+#define io_splock(l) 1
+#define io_spunlock(l,s)
+
+#define spinlock_destroy(a)     /* needed by pcibr_detach() */
+#define mutex_spinlock(a) 0
+#define mutex_spinunlock(a,b)
+#define mutex_init(a,b,c)               ;
+#define mutex_lock(a,b)                 ;
+#define mutex_unlock(a)                 ;
+#define dev_to_vhdl(dev) 0
+#define get_timestamp() 0
+#define us_delay(a)
+#define v_mapphys(a,b,c) 0
+#define splhi()  0
+#define splx(s)
+#define spinlock_init(x,name) mutex_init(x, MUTEX_DEFAULT, name);
+#endif /* 0 */
+
+int
+cap_able(uint64_t x)
+{
+	FIXME("cap_able : returns 1");
+	return(1);
+}
+
+int
+cap_able_cred(uint64_t a, uint64_t b)
+{
+	FIXME("cap_able_cred : returns 1");
+	return(1);
+}
+
+void
+nic_vmc_check(devfs_handle_t vhdl, char *nicinfo)
+{
+
+	FIXME("nic_vmc_check\n");
+
+}
+
+char *
+nic_vertex_info_get(devfs_handle_t v)
+{
+
+	FIXME("nic_vertex_info_get\n");
+	return(NULL);
+
+}
+
+int
+vector_read_node(net_vec_t dest, nasid_t nasid,
+             int write_id, int address,
+             uint64_t *value)
+{
+	FIXME("vector_read_node\n");
+	return(0);
+}
+
+int
+vector_write_node(net_vec_t dest, nasid_t nasid,
+              int write_id, int address,
+              uint64_t value)
+{
+	FIXME("vector_write_node\n");
+	return(0);
+}
+
+int
+atomicAddInt(int *int_ptr, int value)
+{
+//	FIXME("atomicAddInt : simple add\n");
+	*int_ptr += value;
+	return(0);
+}
+
+int
+atomicClearInt(int *int_ptr, int value)
+{
+	FIXME("atomicClearInt : simple clear\n");
+	*int_ptr &= ~value;
+	return(0);
+}
diff --git a/arch/ia64/sn/io/xbow.c b/arch/ia64/sn/io/xbow.c
new file mode 100644
index 000000000..904cf732c
--- /dev/null
+++ b/arch/ia64/sn/io/xbow.c
@@ -0,0 +1,1866 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/pci/bridge.h>
+#include <asm/sn/xtalk/xtalk_private.h>
+
+#define DEBUG		1
+#define XBOW_DEBUG	1
+
+
+/*
+ * Files needed to get the device driver entry points
+ */
+
+/* #include <asm/cred.h> */
+
+#include <asm/sn/xtalk/xbow.h>
+#include <asm/sn/xtalk/xtalk.h>
+#include <asm/sn/xtalk/xswitch.h>
+#include <asm/sn/xtalk/xwidget.h>
+
+#include <asm/sn/prio.h>
+#include <asm/sn/hcl_util.h>
+
+
+#define NEW(ptr)	(ptr = kmalloc(sizeof (*(ptr)), GFP_KERNEL))
+#define DEL(ptr)	(kfree(ptr))
+
+int                     xbow_devflag = D_MP;
+
+/*
+ * This file supports the Xbow chip.  Main functions: initializtion,
+ * error handling, and GBR.
+ */
+
+/*
+ * each vertex corresponding to an xbow chip
+ * has a "fastinfo" pointer pointing at one
+ * of these things.
+ */
+typedef struct xbow_soft_s *xbow_soft_t;
+
+struct xbow_soft_s {
+    devfs_handle_t            conn;	/* our connection point */
+    devfs_handle_t            vhdl;	/* xbow's private vertex */
+    devfs_handle_t            busv;	/* the xswitch vertex */
+    xbow_t                 *base;	/* PIO pointer to crossbow chip */
+    char                   *name;	/* hwgraph name */
+
+    xbow_perf_t             xbow_perfcnt[XBOW_PERF_COUNTERS];
+    xbow_perf_link_t        xbow_perflink[MAX_XBOW_PORTS];
+    xbow_link_status_t      xbow_link_status[MAX_XBOW_PORTS];
+    lock_t                  xbow_perf_lock;
+    int                     link_monitor;
+    widget_cfg_t	   *wpio[MAX_XBOW_PORTS];	/* cached PIO pointer */
+
+    /* Bandwidth allocation state. Bandwidth values are for the
+     * destination port since contention happens there.
+     * Implicit mapping from xbow ports (8..f) -> (0..7) array indices.
+     */
+    lock_t		    xbow_bw_alloc_lock;		/* bw allocation lock */
+    unsigned long long	    bw_hiwm[MAX_XBOW_PORTS];	/* hiwater mark values */
+    unsigned long long      bw_cur_used[MAX_XBOW_PORTS]; /* bw used currently */
+};
+
+#define xbow_soft_set(v,i)	hwgraph_fastinfo_set((v), (arbitrary_info_t)(i))
+#define xbow_soft_get(v)	((xbow_soft_t)hwgraph_fastinfo_get((v)))
+
+/*
+ * Function Table of Contents
+ */
+
+void                    xbow_mlreset(xbow_t *);
+void                    xbow_init(void);
+int                     xbow_attach(devfs_handle_t);
+
+int                     xbow_open(devfs_handle_t *, int, int, cred_t *);
+int                     xbow_close(devfs_handle_t, int, int, cred_t *);
+
+int                     xbow_map(devfs_handle_t, vhandl_t *, off_t, size_t, uint);
+int                     xbow_unmap(devfs_handle_t, vhandl_t *);
+int                     xbow_ioctl(devfs_handle_t, int, void *, int, struct cred *, int *);
+
+int                     xbow_widget_present(xbow_t *, int);
+static int              xbow_link_alive(xbow_t *, int);
+devfs_handle_t            xbow_widget_lookup(devfs_handle_t, int);
+
+#ifdef LATER
+static void             xbow_setwidint(xtalk_intr_t);
+static void             xbow_errintr_handler(intr_arg_t);
+static error_handler_f  xbow_error_handler;
+#endif
+void                    xbow_intr_preset(void *, int, xwidgetnum_t, iopaddr_t, xtalk_intr_vector_t);
+
+
+
+void                    xbow_update_perf_counters(devfs_handle_t);
+xbow_perf_link_t       *xbow_get_perf_counters(devfs_handle_t);
+int                     xbow_enable_perf_counter(devfs_handle_t, int, int, int);
+xbow_link_status_t     *xbow_get_llp_status(devfs_handle_t);
+void                    xbow_update_llp_status(devfs_handle_t);
+
+int                     xbow_disable_llp_monitor(devfs_handle_t);
+int                     xbow_enable_llp_monitor(devfs_handle_t);
+
+#ifdef IRIX
+int			xbow_prio_bw_alloc(devfs_handle_t, xwidgetnum_t, xwidgetnum_t,
+				unsigned long long, unsigned long long);
+#else
+int                     xbow_prio_bw_alloc(devfs_handle_t, xwidgetnum_t, xwidgetnum_t,
+                                unsigned long long, unsigned long long);
+#endif
+
+
+xswitch_reset_link_f    xbow_reset_link;
+
+void                    idbg_xbowregs(int64_t);
+
+xswitch_provider_t      xbow_provider =
+{
+    xbow_reset_link,
+};
+
+/*
+ *    xbow_mlreset: called at mlreset time if the
+ *      platform specific code determines that there is
+ *      a crossbow in a critical path that must be
+ *      functional before the driver would normally get
+ *      the device properly set up.
+ *
+ *      what do we need to do, that the boot prom can
+ *      not be counted on to have already done, that is
+ *      generic across all platforms using crossbows?
+ */
+/*ARGSUSED */
+void
+xbow_mlreset(xbow_t * xbow)
+{
+}
+
+/*
+ *    xbow_init: called with the rest of the device
+ *      driver XXX_init routines. This platform *might*
+ *      have a Crossbow chip, or even several, but it
+ *      might have none. Register with the crosstalk
+ *      generic provider so when we encounter the chip
+ *      the right magic happens.
+ */
+void
+xbow_init(void)
+{
+
+#if DEBUG && ATTACH_DEBUG
+    printf("xbow_init\n");
+#endif
+
+    xwidget_driver_register(XXBOW_WIDGET_PART_NUM,
+			    0, /* XXBOW_WIDGET_MFGR_NUM, */
+			    "xbow_",
+			    CDL_PRI_HI);	/* attach before friends */
+
+    xwidget_driver_register(XBOW_WIDGET_PART_NUM,
+			    XBOW_WIDGET_MFGR_NUM,
+			    "xbow_",
+			    CDL_PRI_HI);	/* attach before friends */
+}
+
+#ifdef XBRIDGE_REGS_SIM
+/*    xbow_set_simulated_regs: sets xbow regs as needed
+ *	for powering through the boot
+ */
+void
+xbow_set_simulated_regs(xbow_t *xbow, int port)
+{
+    /*
+     * turn on link
+     */
+    xbow->xb_link(port).link_status = (1<<31);
+    /*
+     * and give it a live widget too
+     */
+    xbow->xb_link(port).link_aux_status = XB_AUX_STAT_PRESENT;
+    /*
+     * zero the link control reg
+     */
+    xbow->xb_link(port).link_control = 0x0;
+}
+#endif /* XBRIDGE_REGS_SIM */
+
+/*
+ *    xbow_attach: the crosstalk provider has
+ *      determined that there is a crossbow widget
+ *      present, and has handed us the connection
+ *      point for that vertex.
+ *
+ *      We not only add our own vertex, but add
+ *      some "xtalk switch" data to the switch
+ *      vertex (at the connect point's parent) if
+ *      it does not have any.
+ */
+
+/*ARGSUSED */
+int
+xbow_attach(devfs_handle_t conn)
+{
+    /*REFERENCED */
+    devfs_handle_t            vhdl;
+    devfs_handle_t            busv;
+    xbow_t                 *xbow;
+    xbow_soft_t             soft;
+    int                     port;
+    xswitch_info_t          info;
+#ifdef LATER
+    xtalk_intr_t            intr_hdl;
+    device_desc_t           dev_desc;
+#endif
+    char                    devnm[MAXDEVNAME], *s;
+    xbowreg_t               id;
+    int                     rev;
+    int			    i;
+    int			    xbow_num;
+	
+#if DEBUG && ATTACH_DEBUG
+    cmn_err(CE_CONT, "%v: xbow_attach\n", conn);
+#endif
+
+    /*
+     * Get a PIO pointer to the base of the crossbow
+     * chip.
+     */
+#ifdef XBRIDGE_REGS_SIM
+    printk("xbow_attach: XBRIDGE_REGS_SIM FIXME: allocating %ld bytes for xbow_s\n", sizeof(xbow_t));
+    xbow = (xbow_t *) kmalloc(sizeof(xbow_t), GFP_KERNEL);
+    /*
+     * turn on ports e and f like in a real live ibrick
+     */
+    xbow_set_simulated_regs(xbow, 0xe);
+    xbow_set_simulated_regs(xbow, 0xf);
+#else
+    xbow = (xbow_t *) xtalk_piotrans_addr(conn, 0, 0, sizeof(xbow_t), 0);
+#endif /* XBRIDGE_REGS_SIM */
+
+    /*
+     * Locate the "switch" vertex: it is the parent
+     * of our connection point.
+     */
+    busv = hwgraph_connectpt_get(conn);
+    printk("xbow_attach: Bus Vertex 0x%p, conn 0x%p, xbow register 0x%p wid= 0x%x\n", busv, conn, xbow, *(volatile u32 *)xbow);
+
+    ASSERT(busv != GRAPH_VERTEX_NONE);
+
+    /*
+     * Create our private vertex, and connect our
+     * driver information to it. This makes it possible
+     * for diagnostic drivers to open the crossbow
+     * vertex for access to registers.
+     */
+
+    /*
+     * We need to teach xbow drivers to provide the right set of
+     * file ops.
+     */
+    vhdl = NULL;
+    vhdl = hwgraph_register(conn, EDGE_LBL_XBOW,
+                        0, DEVFS_FL_AUTO_DEVNUM,
+                        0, 0,
+                        S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, 0, 0,
+                        /* &hcl_fops */ (void *)&vhdl, NULL);
+    if (!vhdl) {
+        printk("xbow_attach: Unable to create char device for xbow conn
+0x%p\n",
+                conn);
+    }
+
+    /*
+     * Allocate the soft state structure and attach
+     * it to the xbow's vertex
+     */
+    NEW(soft);
+    soft->conn = conn;
+    soft->vhdl = vhdl;
+    soft->busv = busv;
+    soft->base = xbow;
+    /* does the universe really need another macro?  */
+    /* xbow_soft_set(vhdl, (arbitrary_info_t) soft); */
+    hwgraph_fastinfo_set(vhdl, (arbitrary_info_t) soft);
+
+#define XBOW_NUM_SUFFIX_FORMAT	"[xbow# %d]"
+
+    /* Add xbow number as a suffix to the hwgraph name of the xbow.
+     * This is helpful while looking at the error/warning messages.
+     */
+#if CONFIG_SGI_IP35 || CONFIG_IA64_SGI_SN1 || CONFIG_IA64_GENERIC
+    xbow_num = 0;
+#else
+    xbow_num = xswitch_id_get(busv);
+#endif
+
+    /*
+     * get the name of this xbow vertex and keep the info.
+     * This is needed during errors and interupts, but as
+     * long as we have it, we can use it elsewhere.
+     */
+    s = dev_to_name(vhdl, devnm, MAXDEVNAME);
+    soft->name = kmalloc(strlen(s) + strlen(XBOW_NUM_SUFFIX_FORMAT) + 1, 
+			    GFP_KERNEL);
+    sprintf(soft->name,"%s"XBOW_NUM_SUFFIX_FORMAT, s,xbow_num);
+
+#ifdef XBRIDGE_REGS_SIM
+    /* my o200/ibrick has id=0x2d002049, but XXBOW_WIDGET_PART_NUM is defined
+     * as 0xd000, so I'm using that for the partnum bitfield.
+     */
+    printk("xbow_attach: XBRIDGE_REGS_SIM FIXME: need xb_wid_id value!!\n");
+    id = 0x2d000049;
+#else
+    id = xbow->xb_wid_id;
+#endif /* XBRIDGE_REGS_SIM */
+    rev = XWIDGET_PART_REV_NUM(id);
+
+    /*
+     * Print the revision if DEBUG, or SHOW_REVS and kdebug,
+     * or the xbow is downrev.
+     *
+     * If xbow is downrev, make it a WARNING that the
+     * Crossbow is DOWNREV: these chips are not good
+     * to have around, and the operator should be told.
+     */
+#ifdef IRIX
+#if !DEBUG
+    if (
+#if SHOW_REVS
+	   (kdebug) ||
+#endif	/* SHOW_REVS */
+	   (rev < XBOW_REV_1_1))
+#endif	/* !DEBUG  */
+	cmn_err((rev < XBOW_REV_1_1) ? CE_WARN : CE_CONT,
+		"%sCrossbow ASIC: rev %s (code=%d) at %s%s",
+		(rev < XBOW_REV_1_1) ? "DOWNREV " : "",
+		(rev == XBOW_REV_1_0) ? "1.0" :
+		(rev == XBOW_REV_1_1) ? "1.1" :
+		(rev == XBOW_REV_1_2) ? "1.2" :
+		(rev == XBOW_REV_1_3) ? "1.3" :
+		(rev == XBOW_REV_2_0) ? "2.0" :
+		(rev == XXBOW_PART_REV_1_0) ? "Xbridge 1.0" :
+		(rev == XXBOW_PART_REV_2_0) ? "Xbridge 2.0" :
+		"unknown",
+		rev, soft->name,
+		(rev < XBOW_REV_1_1) ? "" : "\n");
+#endif	/* IRIX */
+
+    spinlock_init(&soft->xbow_perf_lock, "xbow_perf_lock");
+    soft->xbow_perfcnt[0].xp_perf_reg = &xbow->xb_perf_ctr_a;
+    soft->xbow_perfcnt[1].xp_perf_reg = &xbow->xb_perf_ctr_b;
+
+    /* Initialization for GBR bw allocation */
+    spinlock_init(&soft->xbow_bw_alloc_lock, "xbow_bw_alloc_lock");
+
+#define	XBOW_8_BIT_PORT_BW_MAX		(400 * 1000 * 1000)	/* 400 MB/s */
+#define XBOW_16_BIT_PORT_BW_MAX		(800 * 1000 * 1000)	/* 800 MB/s */
+
+    /* Set bandwidth hiwatermark and current values */
+    for (i = 0; i < MAX_XBOW_PORTS; i++) {
+	soft->bw_hiwm[i] = XBOW_16_BIT_PORT_BW_MAX;	/* for now */
+	soft->bw_cur_used[i] = 0;
+    }
+
+    /*
+     * attach the crossbow error interrupt.
+     */
+#ifdef LATER
+    dev_desc = device_desc_dup(vhdl);
+    device_desc_flags_set(dev_desc,
+			  device_desc_flags_get(dev_desc) | D_INTR_ISERR);
+    device_desc_intr_name_set(dev_desc, "Crossbow error");
+
+    intr_hdl = xtalk_intr_alloc(conn, dev_desc, vhdl);
+    ASSERT(intr_hdl != NULL);
+
+    xtalk_intr_connect(intr_hdl,
+		       (intr_func_t) xbow_errintr_handler,
+		       (intr_arg_t) soft,
+		       (xtalk_intr_setfunc_t) xbow_setwidint,
+		       (void *) xbow,
+		       (void *) 0);
+    device_desc_free(dev_desc);
+
+    xwidget_error_register(conn, xbow_error_handler, soft);
+
+#else
+    printk("xbow_attach: Fixme: we bypassed attaching xbow error interrupt.\n");
+#endif /* LATER */
+
+    /*
+     * Enable xbow error interrupts
+     */
+    xbow->xb_wid_control = (XB_WID_CTRL_REG_ACC_IE |
+			    XB_WID_CTRL_XTALK_IE);
+
+    /*
+     * take a census of the widgets present,
+     * leaving notes at the switch vertex.
+     */
+    info = xswitch_info_new(busv);
+
+    for (port = MAX_PORT_NUM - MAX_XBOW_PORTS;
+	 port < MAX_PORT_NUM; ++port) {
+	if (!xbow_link_alive(xbow, port)) {
+#if DEBUG && XBOW_DEBUG
+	    printk(KERN_INFO "0x%p link %d is not alive\n",
+		    busv, port);
+#endif
+	    continue;
+	}
+	if (!xbow_widget_present(xbow, port)) {
+#if DEBUG && XBOW_DEBUG
+	    printk(KERN_INFO "0x%p link %d is alive but no widget is present\n", busv, port);
+#endif
+	    continue;
+	}
+#if DEBUG && XBOW_DEBUG
+	printk(KERN_INFO "0x%p link %d has a widget\n",
+		busv, port);
+#endif
+
+	xswitch_info_link_is_ok(info, port);
+	/*
+	 * Turn some error interrupts on
+	 * and turn others off. The PROM has
+	 * some things turned on we don't
+	 * want to see (bandwidth allocation
+	 * errors for instance); so if it
+	 * is not listed here, it is not on.
+	 */
+	xbow->xb_link(port).link_control =
+	    ( (xbow->xb_link(port).link_control
+	/*
+	 * Turn off these bits; they are non-fatal,
+	 * but we might want to save some statistics
+	 * on the frequency of these errors.
+	 * XXX FIXME XXX
+	 */
+	    & ~XB_CTRL_RCV_CNT_OFLOW_IE
+	    & ~XB_CTRL_XMT_CNT_OFLOW_IE
+	    & ~XB_CTRL_BNDWDTH_ALLOC_IE
+	    & ~XB_CTRL_RCV_IE)
+	/*
+	 * These are the ones we want to turn on.
+	 */
+	    | (XB_CTRL_ILLEGAL_DST_IE
+	    | XB_CTRL_OALLOC_IBUF_IE
+	    | XB_CTRL_XMT_MAX_RTRY_IE
+	    | XB_CTRL_MAXREQ_TOUT_IE
+	    | XB_CTRL_XMT_RTRY_IE
+	    | XB_CTRL_SRC_TOUT_IE) );
+    }
+
+    xswitch_provider_register(busv, &xbow_provider);
+
+    return 0;				/* attach successful */
+}
+
+/*ARGSUSED */
+int
+xbow_open(devfs_handle_t *devp, int oflag, int otyp, cred_t *credp)
+{
+    if (!_CAP_CRABLE((uint64_t)credp, CAP_DEVICE_MGT))
+	return EPERM;
+    return 0;
+
+}
+
+/*ARGSUSED */
+int
+xbow_close(devfs_handle_t dev, int oflag, int otyp, cred_t *crp)
+{
+    return 0;
+}
+
+/*ARGSUSED */
+int
+xbow_map(devfs_handle_t dev, vhandl_t *vt, off_t off, size_t len, uint prot)
+{
+    devfs_handle_t            vhdl = dev_to_vhdl(dev);
+    xbow_soft_t             soft = xbow_soft_get(vhdl);
+    int                     error;
+
+    ASSERT(soft);
+    len = ctob(btoc(len));
+    /* XXX- this ignores the offset!!! */
+    error = v_mapphys(vt, (void *) soft->base, len);
+    return error;
+}
+
+/*ARGSUSED */
+int
+xbow_unmap(devfs_handle_t dev, vhandl_t *vt)
+{
+    return 0;
+}
+
+/* This contains special-case code for grio. There are plans to make
+ * this general sometime in the future, but till then this should
+ * be good enough.
+ */
+xwidgetnum_t
+xbow_widget_num_get(devfs_handle_t dev)
+{
+	devfs_handle_t	tdev;
+	char		devname[MAXDEVNAME];
+	xwidget_info_t	xwidget_info;
+	int		i;
+#if IP27
+	cnodeid_t	cnodeid = CNODEID_NONE;
+#endif
+
+	vertex_to_name(dev, devname, MAXDEVNAME);
+
+#if IP30
+	/* If there is a ".connection" edge from this vertex,
+	 * then it must be "/hw/node" vertex. Return the widget
+	 * number for heart: 8.
+	 */
+	if (hwgraph_edge_get(dev, EDGE_LBL_CONN, &tdev) ==
+			GRAPH_SUCCESS) {
+		return ((xwidgetnum_t) 8);
+	}
+#elif IP27
+	if ((cnodeid = nodevertex_to_cnodeid(dev)) != CNODEID_NONE) {
+		ASSERT(cnodeid < maxnodes);
+		return(hub_widget_id(COMPACT_TO_NASID_NODEID(cnodeid)));
+	}
+#endif
+
+	/* If this is a pci controller vertex, traverse up using
+	 * the ".." links to get to the widget.
+	 */
+	if (strstr(devname, EDGE_LBL_PCI) &&
+			strstr(devname, EDGE_LBL_CONTROLLER)) {
+		tdev = dev;
+		for (i=0; i< 2; i++) {
+			if (hwgraph_edge_get(tdev,
+				HWGRAPH_EDGELBL_DOTDOT, &tdev) !=
+					GRAPH_SUCCESS)
+				return XWIDGET_NONE;
+		}
+
+		if ((xwidget_info = xwidget_info_chk(tdev)) != NULL) {
+			return (xwidget_info_id_get(xwidget_info));
+		} else {
+			return XWIDGET_NONE;
+		}
+	}
+
+	return XWIDGET_NONE;
+}
+
+int
+xbow_ioctl(devfs_handle_t dev,
+	   int cmd,
+	   void *arg,
+	   int flag,
+	   struct cred *cr,
+	   int *rvalp)
+{
+    devfs_handle_t            vhdl;
+    int                     error = 0;
+
+#if defined (DEBUG)
+    int                     rc;
+    devfs_handle_t            conn;
+    struct xwidget_info_s  *xwidget_info;
+    xbow_soft_t             xbow_soft;
+#endif
+    *rvalp = 0;
+
+    vhdl = dev_to_vhdl(dev);
+#if defined (DEBUG)
+    xbow_soft = xbow_soft_get(vhdl);
+    conn = xbow_soft->conn;
+
+    xwidget_info = xwidget_info_get(conn);
+    ASSERT_ALWAYS(xwidget_info != NULL);
+
+    rc = xwidget_hwid_is_xswitch(&xwidget_info->w_hwid);
+    ASSERT_ALWAYS(rc != 0);
+#endif
+    switch (cmd) {
+#ifdef IRIX
+    case XBOWIOC_PERF_ENABLE:
+    case XBOWIOC_PERF_DISABLE:
+	{
+	    struct xbow_perfarg_t   xbow_perf_en;
+
+	    if (!_CAP_CRABLE(cr, CAP_DEVICE_MGT)) {
+		error = EPERM;
+		break;
+	    }
+	    if ((flag & FWRITE) == 0) {
+		error = EBADF;
+		break;
+	    }
+	    if (COPYIN(arg, &xbow_perf_en, sizeof(xbow_perf_en))) {
+		error = EFAULT;
+		break;
+	    }
+	    if (error = xbow_enable_perf_counter(vhdl,
+						 xbow_perf_en.link,
+						 (cmd == XBOWIOC_PERF_DISABLE) ? 0 : xbow_perf_en.mode,
+						 xbow_perf_en.counter)) {
+		error = EINVAL;
+		break;
+	    }
+	    break;
+	}
+#endif
+
+#ifdef IRIX
+    case XBOWIOC_PERF_GET:
+	{
+	    xbow_perf_link_t       *xbow_perf_cnt;
+
+	    if ((flag & FREAD) == 0) {
+		error = EBADF;
+		break;
+	    }
+	    xbow_perf_cnt = xbow_get_perf_counters(vhdl);
+	    ASSERT_ALWAYS(xbow_perf_cnt != NULL);
+
+	    if (COPYOUT((void *) xbow_perf_cnt, (void *) arg,
+			MAX_XBOW_PORTS * sizeof(xbow_perf_link_t))) {
+		error = EFAULT;
+		break;
+	    }
+	    break;
+	}
+#endif
+
+    case XBOWIOC_LLP_ERROR_ENABLE:
+	if (!_CAP_CRABLE((uint64_t)cr, CAP_DEVICE_MGT)) {
+	    error = EPERM;
+	    break;
+	}
+	if ((error = xbow_enable_llp_monitor(vhdl)) != 0)
+	    error = EINVAL;
+
+	break;
+
+    case XBOWIOC_LLP_ERROR_DISABLE:
+
+	if (!_CAP_CRABLE((uint64_t)cr, CAP_DEVICE_MGT)) {
+	    error = EPERM;
+	    break;
+	}
+	if ((error = xbow_disable_llp_monitor(vhdl)) != 0)
+	    error = EINVAL;
+
+	break;
+
+#ifdef IRIX
+    case XBOWIOC_LLP_ERROR_GET:
+	{
+	    xbow_link_status_t     *xbow_llp_status;
+
+	    if ((flag & FREAD) == 0) {
+		error = EBADF;
+		break;
+	    }
+	    xbow_llp_status = xbow_get_llp_status(vhdl);
+	    ASSERT_ALWAYS(xbow_llp_status != NULL);
+
+	    if (COPYOUT((void *) xbow_llp_status, (void *) arg,
+			MAX_XBOW_PORTS * sizeof(xbow_link_status_t))) {
+		error = EFAULT;
+		break;
+	    }
+	    break;
+	}
+#endif
+
+#ifdef IRIX
+    case GIOCSETBW:
+	{
+	    grio_ioctl_info_t info;
+	    xwidgetnum_t src_widgetnum, dest_widgetnum;
+
+	    if (!cap_able(CAP_DEVICE_MGT)) {
+		error = EPERM;
+		break;
+	    }
+
+	    if (COPYIN(arg, &info, sizeof(grio_ioctl_info_t))) {
+		error = EFAULT;
+		break;
+	    }
+#ifdef GRIO_DEBUG
+	    printf("xbow:: prev_vhdl: %d next_vhdl: %d reqbw: %lld\n",
+			info.prev_vhdl, info.next_vhdl, info.reqbw);
+#endif /* GRIO_DEBUG */
+
+	    src_widgetnum = xbow_widget_num_get(info.prev_vhdl);
+	    dest_widgetnum = xbow_widget_num_get(info.next_vhdl);
+
+	    /* Bandwidth allocation is bi-directional. Since bandwidth
+	     * reservations have already been done at an earlier stage,
+	     * we cannot fail here for lack of bandwidth.
+	     */
+	    xbow_prio_bw_alloc(dev, src_widgetnum, dest_widgetnum,
+			0, info.reqbw);
+	    xbow_prio_bw_alloc(dev, dest_widgetnum, src_widgetnum,
+			0, info.reqbw);
+
+	    break;
+	}
+
+    case GIOCRELEASEBW:
+	{
+	    grio_ioctl_info_t info;
+	    xwidgetnum_t src_widgetnum, dest_widgetnum;
+
+	    if (!cap_able(CAP_DEVICE_MGT)) {
+		error = EPERM;
+		break;
+	    }
+
+	    if (COPYIN(arg, &info, sizeof(grio_ioctl_info_t))) {
+		error = EFAULT;
+		break;
+	    }
+#ifdef GRIO_DEBUG
+	    printf("xbow:: prev_vhdl: %d next_vhdl: %d reqbw: %lld\n",
+			info.prev_vhdl, info.next_vhdl, info.reqbw);
+#endif /* GRIO_DEBUG */
+
+	    src_widgetnum = xbow_widget_num_get(info.prev_vhdl);
+	    dest_widgetnum = xbow_widget_num_get(info.next_vhdl);
+
+	    /* Bandwidth reservation is bi-directional. Hence, remove
+	     * bandwidth reservations for both directions.
+	     */
+	    xbow_prio_bw_alloc(dev, src_widgetnum, dest_widgetnum,
+			info.reqbw, (-1 * info.reqbw));
+	    xbow_prio_bw_alloc(dev, dest_widgetnum, src_widgetnum,
+			info.reqbw, (-1 * info.reqbw));
+
+	    break;
+	}
+#endif
+
+    default:
+	break;
+
+    }
+    return error;
+}
+
+/*
+ * xbow_widget_present: See if a device is present
+ * on the specified port of this crossbow.
+ */
+int
+xbow_widget_present(xbow_t * xbow, int port)
+{
+	if ( IS_RUNNING_ON_SIMULATOR() ) {
+		if ( (port == 14) || (port == 15) ) {
+			return 1;
+		}
+		else {
+			return 0;
+		}
+	}
+	else {
+		return xbow->xb_link(port).link_aux_status & XB_AUX_STAT_PRESENT;
+	}
+}
+
+static int
+xbow_link_alive(xbow_t * xbow, int port)
+{
+    xbwX_stat_t             xbow_linkstat;
+
+    xbow_linkstat.linkstatus = xbow->xb_link(port).link_status;
+    return (xbow_linkstat.link_alive);
+}
+
+/*
+ * xbow_widget_lookup
+ *      Lookup the edges connected to the xbow specified, and
+ *      retrieve the handle corresponding to the widgetnum
+ *      specified.
+ *      If not found, return 0.
+ */
+devfs_handle_t
+xbow_widget_lookup(devfs_handle_t vhdl,
+		   int widgetnum)
+{
+    xswitch_info_t          xswitch_info;
+    devfs_handle_t            conn;
+
+    xswitch_info = xswitch_info_get(vhdl);
+    conn = xswitch_info_vhdl_get(xswitch_info, widgetnum);
+    return conn;
+}
+
+/*
+ * xbow_setwidint: called when xtalk
+ * is establishing or migrating our
+ * interrupt service.
+ */
+#ifdef LATER
+static void
+xbow_setwidint(xtalk_intr_t intr)
+{
+    xwidgetnum_t            targ = xtalk_intr_target_get(intr);
+    iopaddr_t               addr = xtalk_intr_addr_get(intr);
+    xtalk_intr_vector_t     vect = xtalk_intr_vector_get(intr);
+    xbow_t                 *xbow = (xbow_t *) xtalk_intr_sfarg_get(intr);
+
+    xbow_intr_preset((void *) xbow, 0, targ, addr, vect);
+}
+#endif	/* LATER */
+
+/*
+ * xbow_intr_preset: called during mlreset time
+ * if the platform specific code needs to route
+ * an xbow interrupt before the xtalk infrastructure
+ * is available for use.
+ *
+ * Also called from xbow_setwidint, so we don't
+ * replicate the guts of the routine.
+ *
+ * XXX- probably should be renamed xbow_wid_intr_set or
+ * something to reduce confusion.
+ */
+/*ARGSUSED3 */
+void
+xbow_intr_preset(void *which_widget,
+		 int which_widget_intr,
+		 xwidgetnum_t targ,
+		 iopaddr_t addr,
+		 xtalk_intr_vector_t vect)
+{
+    xbow_t                 *xbow = (xbow_t *) which_widget;
+
+    xbow->xb_wid_int_upper = ((0xFF000000 & (vect << 24)) |
+			      (0x000F0000 & (targ << 16)) |
+			      XTALK_ADDR_TO_UPPER(addr));
+    xbow->xb_wid_int_lower = XTALK_ADDR_TO_LOWER(addr);
+}
+
+#define	XEM_ADD_STR(s)		cmn_err(CE_CONT, "%s", (s))
+#define	XEM_ADD_NVAR(n,v)	cmn_err(CE_CONT, "\t%20s: 0x%x\n", (n), (v))
+#define	XEM_ADD_VAR(v)		XEM_ADD_NVAR(#v,(v))
+#define XEM_ADD_IOEF(n) 	if (IOERROR_FIELDVALID(ioe,n))		    \
+				    XEM_ADD_NVAR("ioe." #n,		    \
+						 IOERROR_GETVALUE(ioe,n))
+
+#ifdef IRIX
+static void
+xem_add_ioe(ioerror_t *ioe)
+{
+    XEM_ADD_IOEF(errortype);
+    XEM_ADD_IOEF(widgetnum);
+    XEM_ADD_IOEF(widgetdev);
+    XEM_ADD_IOEF(srccpu);
+    XEM_ADD_IOEF(srcnode);
+    XEM_ADD_IOEF(errnode);
+    XEM_ADD_IOEF(sysioaddr);
+    XEM_ADD_IOEF(xtalkaddr);
+    XEM_ADD_IOEF(busspace);
+    XEM_ADD_IOEF(busaddr);
+    XEM_ADD_IOEF(vaddr);
+    XEM_ADD_IOEF(memaddr);
+    XEM_ADD_IOEF(epc);
+    XEM_ADD_IOEF(ef);
+}
+
+#define XEM_ADD_IOE()	(xem_add_ioe(ioe))
+#endif	/* IRIX */
+
+int                     xbow_xmit_retry_errors = 0;
+
+int
+xbow_xmit_retry_error(xbow_soft_t soft,
+		      int port)
+{
+    xswitch_info_t          info;
+    devfs_handle_t            vhdl;
+    widget_cfg_t           *wid;
+    widgetreg_t             id;
+    int                     part;
+    int                     mfgr;
+
+    wid = soft->wpio[port - BASE_XBOW_PORT];
+    if (wid == NULL) {
+	/* If we can't track down a PIO
+	 * pointer to our widget yet,
+	 * leave our caller knowing that
+	 * we are interested in this
+	 * interrupt if it occurs in
+	 * the future.
+	 */
+	info = xswitch_info_get(soft->busv);
+	if (!info)
+	    return 1;
+	vhdl = xswitch_info_vhdl_get(info, port);
+	if (vhdl == GRAPH_VERTEX_NONE)
+	    return 1;
+	wid = (widget_cfg_t *) xtalk_piotrans_addr
+	    (vhdl, 0, 0, sizeof *wid, 0);
+	if (!wid)
+	    return 1;
+	soft->wpio[port - BASE_XBOW_PORT] = wid;
+    }
+    id = wid->w_id;
+    part = XWIDGET_PART_NUM(id);
+    mfgr = XWIDGET_MFG_NUM(id);
+
+    /* If this thing is not a Bridge,
+     * do not activate the WAR, and
+     * tell our caller we do not need
+     * to be called again.
+     */
+    if ((part != BRIDGE_WIDGET_PART_NUM) ||
+	(mfgr != BRIDGE_WIDGET_MFGR_NUM)) {
+		/* FIXME: add Xbridge to the WAR.
+		 * Shouldn't hurt anything.  Later need to
+		 * check if we can remove this.
+                 */
+    		if ((part != XBRIDGE_WIDGET_PART_NUM) ||
+		    (mfgr != XBRIDGE_WIDGET_MFGR_NUM))
+			return 0;
+    }
+
+    /* count how many times we
+     * have picked up after
+     * LLP Transmit problems.
+     */
+    xbow_xmit_retry_errors++;
+
+    /* rewrite the control register
+     * to fix things up.
+     */
+    wid->w_control = wid->w_control;
+    wid->w_control;
+
+    return 1;
+}
+
+/*
+ * xbow_errintr_handler will be called if the xbow
+ * sends an interrupt request to report an error.
+ */
+
+#ifdef LATER
+static void
+xbow_errintr_handler(intr_arg_t arg)
+{
+#ifdef IRIX
+    ioerror_t               ioe[1];
+    xbow_soft_t             soft = (xbow_soft_t) arg;
+    xbow_t                 *xbow = soft->base;
+    xbowreg_t               wid_control;
+    xbowreg_t               wid_stat;
+    xbowreg_t               wid_err_cmdword;
+    xbowreg_t               wid_err_upper;
+    xbowreg_t               wid_err_lower;
+    w_err_cmd_word_u        wid_err;
+    uint64_t                 wid_err_addr;
+
+    int                     fatal = 0;
+    int                     dump_ioe = 0;
+
+    wid_control = xbow->xb_wid_control;
+    wid_stat = xbow->xb_wid_stat_clr;
+    wid_err_cmdword = xbow->xb_wid_err_cmdword;
+    wid_err_upper = xbow->xb_wid_err_upper;
+    wid_err_lower = xbow->xb_wid_err_lower;
+    xbow->xb_wid_err_cmdword = 0;
+
+    wid_err_addr =
+	wid_err_lower
+	| (((iopaddr_t) wid_err_upper
+	    & WIDGET_ERR_UPPER_ADDR_ONLY)
+	   << 32);
+
+    if (wid_stat & XB_WID_STAT_LINK_INTR_MASK) {
+	int                     port;
+
+	wid_err.r = wid_err_cmdword;
+
+	for (port = MAX_PORT_NUM - MAX_XBOW_PORTS;
+	     port < MAX_PORT_NUM; port++) {
+	    if (wid_stat & XB_WID_STAT_LINK_INTR(port)) {
+		xb_linkregs_t          *link = &(xbow->xb_link(port));
+		xbowreg_t               link_control = link->link_control;
+		xbowreg_t               link_status = link->link_status_clr;
+		xbowreg_t               link_aux_status = link->link_aux_status;
+		xbowreg_t               link_pend;
+
+		link_pend = link_status & link_control &
+		    (XB_STAT_ILLEGAL_DST_ERR
+		     | XB_STAT_OALLOC_IBUF_ERR
+		     | XB_STAT_RCV_CNT_OFLOW_ERR
+		     | XB_STAT_XMT_CNT_OFLOW_ERR
+		     | XB_STAT_XMT_MAX_RTRY_ERR
+		     | XB_STAT_RCV_ERR
+		     | XB_STAT_XMT_RTRY_ERR
+		     | XB_STAT_MAXREQ_TOUT_ERR
+		     | XB_STAT_SRC_TOUT_ERR
+		    );
+
+		if (link_pend & XB_STAT_ILLEGAL_DST_ERR) {
+		    if (wid_err.f.sidn == port) {
+			IOERROR_INIT(ioe);
+			IOERROR_SETVALUE(ioe, widgetnum, port);
+			IOERROR_SETVALUE(ioe, xtalkaddr, wid_err_addr);
+			if (IOERROR_HANDLED ==
+			    xbow_error_handler(soft,
+					       IOECODE_DMA,
+					       MODE_DEVERROR,
+					       ioe)) {
+			    link_pend &= ~XB_STAT_ILLEGAL_DST_ERR;
+			} else {
+			    dump_ioe++;
+			}
+		    }
+		}
+		/* Xbow/Bridge WAR:
+		 * if the bridge signals an LLP Transmitter Retry,
+		 * rewrite its control register.
+		 * If someone else triggers this interrupt,
+		 * ignore (and disable) the interrupt.
+		 */
+		if (link_pend & XB_STAT_XMT_RTRY_ERR) {
+		    if (!xbow_xmit_retry_error(soft, port)) {
+			link_control &= ~XB_CTRL_XMT_RTRY_IE;
+			link->link_control = link_control;
+			link->link_control;	/* stall until written */
+		    }
+		    link_pend &= ~XB_STAT_XMT_RTRY_ERR;
+		}
+		if (link_pend) {
+		    devfs_handle_t	xwidget_vhdl;
+		    char		*xwidget_name;
+		    
+		    /* Get the widget name corresponding to the current
+		     * xbow link.
+		     */
+		    xwidget_vhdl = xbow_widget_lookup(soft->busv,port);
+		    xwidget_name = xwidget_name_get(xwidget_vhdl);
+
+#ifdef IRIX
+		    cmn_err(CE_CONT,
+			    "%s port %X[%s] XIO Bus Error",
+			    soft->name, port, xwidget_name);
+		    if (link_status & XB_STAT_MULTI_ERR)
+			XEM_ADD_STR("\tMultiple Errors\n");
+		    if (link_status & XB_STAT_ILLEGAL_DST_ERR)
+			XEM_ADD_STR("\tInvalid Packet Destination\n");
+		    if (link_status & XB_STAT_OALLOC_IBUF_ERR)
+			XEM_ADD_STR("\tInput Overallocation Error\n");
+		    if (link_status & XB_STAT_RCV_CNT_OFLOW_ERR)
+			XEM_ADD_STR("\tLLP receive error counter overflow\n");
+		    if (link_status & XB_STAT_XMT_CNT_OFLOW_ERR)
+			XEM_ADD_STR("\tLLP transmit retry counter overflow\n");
+		    if (link_status & XB_STAT_XMT_MAX_RTRY_ERR)
+			XEM_ADD_STR("\tLLP Max Transmitter Retry\n");
+		    if (link_status & XB_STAT_RCV_ERR)
+			XEM_ADD_STR("\tLLP Receiver error\n");
+		    if (link_status & XB_STAT_XMT_RTRY_ERR)
+			XEM_ADD_STR("\tLLP Transmitter Retry\n");
+		    if (link_status & XB_STAT_MAXREQ_TOUT_ERR)
+			XEM_ADD_STR("\tMaximum Request Timeout\n");
+		    if (link_status & XB_STAT_SRC_TOUT_ERR)
+			XEM_ADD_STR("\tSource Timeout Error\n");
+#endif
+
+		    {
+			int                     other_port;
+
+			for (other_port = 8; other_port < 16; ++other_port) {
+			    if (link_aux_status & (1 << other_port)) {
+				/* XXX- need to go to "other_port"
+				 * and clean up after the timeout?
+				 */
+				XEM_ADD_VAR(other_port);
+			    }
+			}
+		    }
+
+#if !DEBUG
+		    if (kdebug) {
+#endif
+			XEM_ADD_VAR(link_control);
+			XEM_ADD_VAR(link_status);
+			XEM_ADD_VAR(link_aux_status);
+
+			if (dump_ioe) {
+			    XEM_ADD_IOE();
+			    dump_ioe = 0;
+			}
+#if !DEBUG
+		    }
+#endif
+		    fatal++;
+		}
+	    }
+	}
+    }
+    if (wid_stat & wid_control & XB_WID_STAT_WIDGET0_INTR) {
+	/* we have a "widget zero" problem */
+
+	if (wid_stat & (XB_WID_STAT_MULTI_ERR
+			| XB_WID_STAT_XTALK_ERR
+			| XB_WID_STAT_REG_ACC_ERR)) {
+
+	    cmn_err(CE_CONT,
+		    "%s Port 0 XIO Bus Error",
+		    soft->name);
+	    if (wid_stat & XB_WID_STAT_MULTI_ERR)
+		XEM_ADD_STR("\tMultiple Error\n");
+	    if (wid_stat & XB_WID_STAT_XTALK_ERR)
+		XEM_ADD_STR("\tXIO Error\n");
+	    if (wid_stat & XB_WID_STAT_REG_ACC_ERR)
+		XEM_ADD_STR("\tRegister Access Error\n");
+
+	    fatal++;
+	}
+    }
+    if (fatal) {
+	XEM_ADD_VAR(wid_stat);
+	XEM_ADD_VAR(wid_control);
+	XEM_ADD_VAR(wid_err_cmdword);
+	XEM_ADD_VAR(wid_err_upper);
+	XEM_ADD_VAR(wid_err_lower);
+	XEM_ADD_VAR(wid_err_addr);
+	cmn_err_tag(8, CE_PANIC, "XIO Bus Error");
+    }
+#endif
+}
+#endif	/* LATER */
+
+/*
+ * XBOW ERROR Handling routines.
+ * These get invoked as part of walking down the error handling path
+ * from hub/heart towards the I/O device that caused the error.
+ */
+
+/*
+ * xbow_error_handler
+ *      XBow error handling dispatch routine.
+ *      This is the primary interface used by external world to invoke
+ *      in case of an error related to a xbow.
+ *      Only functionality in this layer is to identify the widget handle
+ *      given the widgetnum. Otherwise, xbow does not gathers any error
+ *      data.
+ */
+
+#ifdef LATER
+static int
+xbow_error_handler(
+		      void *einfo,
+		      int error_code,
+		      ioerror_mode_t mode,
+		      ioerror_t *ioerror)
+{
+#ifdef IRIX
+    int                     retval = IOERROR_WIDGETLEVEL;
+
+    xbow_soft_t             soft = (xbow_soft_t) einfo;
+    int                     port;
+    devfs_handle_t            conn;
+    devfs_handle_t            busv;
+
+    xbow_t                 *xbow = soft->base;
+    xbowreg_t               wid_stat;
+    xbowreg_t               wid_err_cmdword;
+    xbowreg_t               wid_err_upper;
+    xbowreg_t               wid_err_lower;
+    uint64_t                 wid_err_addr;
+
+    xb_linkregs_t          *link;
+    xbowreg_t               link_control;
+    xbowreg_t               link_status;
+    xbowreg_t               link_aux_status;
+
+    ASSERT(soft != 0);
+    busv = soft->busv;
+
+#if DEBUG && ERROR_DEBUG
+    cmn_err(CE_CONT, "%s: xbow_error_handler\n", soft->name, busv);
+#endif
+
+    port = IOERROR_GETVALUE(ioerror, widgetnum);
+
+    if (port == 0) {
+	/* error during access to xbow:
+	 * do NOT attempt to access xbow regs.
+	 */
+	if (mode == MODE_DEVPROBE)
+	    return IOERROR_HANDLED;
+
+	if (error_code & IOECODE_DMA) {
+	    cmn_err(CE_ALERT,
+		    "DMA error blamed on Crossbow at %s\n"
+		    "\tbut Crosbow never initiates DMA!",
+		    soft->name);
+	}
+	if (error_code & IOECODE_PIO) {
+	    cmn_err(CE_ALERT,
+		    "PIO Error on XIO Bus %s\n"
+		    "\tattempting to access XIO controller\n"
+		    "\twith offset 0x%X",
+		    soft->name,
+		    IOERROR_GETVALUE(ioerror, xtalkaddr));
+	}
+	/* caller will dump contents of ioerror
+	 * in DEBUG and kdebug kernels.
+	 */
+
+	return retval;
+    }
+    /*
+     * error not on port zero:
+     * safe to read xbow registers.
+     */
+    wid_stat = xbow->xb_wid_stat;
+    wid_err_cmdword = xbow->xb_wid_err_cmdword;
+    wid_err_upper = xbow->xb_wid_err_upper;
+    wid_err_lower = xbow->xb_wid_err_lower;
+
+    wid_err_addr =
+	wid_err_lower
+	| (((iopaddr_t) wid_err_upper
+	    & WIDGET_ERR_UPPER_ADDR_ONLY)
+	   << 32);
+
+    if ((port < BASE_XBOW_PORT) ||
+	(port >= MAX_PORT_NUM)) {
+
+	if (mode == MODE_DEVPROBE)
+	    return IOERROR_HANDLED;
+
+	if (error_code & IOECODE_DMA) {
+	    cmn_err(CE_ALERT,
+		    "DMA error blamed on XIO port at %s/%d\n"
+		    "\tbut Crossbow does not support that port",
+		    soft->name, port);
+	}
+	if (error_code & IOECODE_PIO) {
+	    cmn_err(CE_ALERT,
+		    "PIO Error on XIO Bus %s\n"
+		    "\tattempting to access XIO port %d\n"
+		    "\t(which Crossbow does not support)"
+		    "\twith offset 0x%X",
+		    soft->name, port,
+		    IOERROR_GETVALUE(ioerror, xtalkaddr));
+	}
+#if !DEBUG
+	if (kdebug) {
+#endif
+	    XEM_ADD_STR("Raw status values for Crossbow:\n");
+	    XEM_ADD_VAR(wid_stat);
+	    XEM_ADD_VAR(wid_err_cmdword);
+	    XEM_ADD_VAR(wid_err_upper);
+	    XEM_ADD_VAR(wid_err_lower);
+	    XEM_ADD_VAR(wid_err_addr);
+#if !DEBUG
+	}
+#endif
+
+	/* caller will dump contents of ioerror
+	 * in DEBUG and kdebug kernels.
+	 */
+
+	return retval;
+    }
+    /* access to valid port:
+     * ok to check port status.
+     */
+
+    link = &(xbow->xb_link(port));
+    link_control = link->link_control;
+    link_status = link->link_status;
+    link_aux_status = link->link_aux_status;
+
+    /* Check that there is something present
+     * in that XIO port.
+     */
+    if (!(link_aux_status & XB_AUX_STAT_PRESENT)) {
+	/* nobody connected. */
+	if (mode == MODE_DEVPROBE)
+	    return IOERROR_HANDLED;
+
+	if (error_code & IOECODE_DMA) {
+	    cmn_err(CE_ALERT,
+		    "DMA error blamed on XIO port at %s/%d\n"
+		    "\tbut there is no device connected there.",
+		    soft->name, port);
+	}
+	if (error_code & IOECODE_PIO) {
+	    cmn_err(CE_ALERT,
+		    "PIO Error on XIO Bus %s\n"
+		    "\tattempting to access XIO port %d\n"
+		    "\t(which has no device connected)"
+		    "\twith offset 0x%X",
+		    soft->name, port,
+		    IOERROR_GETVALUE(ioerror, xtalkaddr));
+	}
+#if !DEBUG
+	if (kdebug) {
+#endif
+	    XEM_ADD_STR("Raw status values for Crossbow:\n");
+	    XEM_ADD_VAR(wid_stat);
+	    XEM_ADD_VAR(wid_err_cmdword);
+	    XEM_ADD_VAR(wid_err_upper);
+	    XEM_ADD_VAR(wid_err_lower);
+	    XEM_ADD_VAR(wid_err_addr);
+	    XEM_ADD_VAR(port);
+	    XEM_ADD_VAR(link_control);
+	    XEM_ADD_VAR(link_status);
+	    XEM_ADD_VAR(link_aux_status);
+#if !DEBUG
+	}
+#endif
+	return retval;
+
+    }
+    /* Check that the link is alive.
+     */
+    if (!(link_status & XB_STAT_LINKALIVE)) {
+	/* nobody connected. */
+	if (mode == MODE_DEVPROBE)
+	    return IOERROR_HANDLED;
+
+	cmn_err(CE_ALERT,
+		"%s%sError on XIO Bus %s port %d",
+		(error_code & IOECODE_DMA) ? "DMA " : "",
+		(error_code & IOECODE_PIO) ? "PIO " : "",
+		soft->name, port);
+
+	if ((error_code & IOECODE_PIO) &&
+	    (IOERROR_FIELDVALID(ioerror, xtalkaddr))) {
+	    cmn_err(CE_CONT,
+		    "\tAccess attempted to offset 0x%X\n",
+		    IOERROR_GETVALUE(ioerror, xtalkaddr));
+	}
+	if (link_aux_status & XB_AUX_LINKFAIL_RST_BAD)
+	    XEM_ADD_STR("\tLink never came out of reset\n");
+	else
+	    XEM_ADD_STR("\tLink failed while transferring data\n");
+
+    }
+    /* get the connection point for the widget
+     * involved in this error; if it exists and
+     * is not our connectpoint, cycle back through
+     * xtalk_error_handler to deliver control to
+     * the proper handler (or to report a generic
+     * crosstalk error).
+     *
+     * If the downstream handler won't handle
+     * the problem, we let our upstream caller
+     * deal with it, after (in DEBUG and kdebug
+     * kernels) dumping the xbow state for this
+     * port.
+     */
+    conn = xbow_widget_lookup(busv, port);
+    if ((conn != GRAPH_VERTEX_NONE) &&
+	(conn != soft->conn)) {
+	retval = xtalk_error_handler(conn, error_code, mode, ioerror);
+	if (retval == IOERROR_HANDLED)
+	    return IOERROR_HANDLED;
+    }
+    if (mode == MODE_DEVPROBE)
+	return IOERROR_HANDLED;
+
+    if (retval == IOERROR_UNHANDLED) {
+	retval = IOERROR_PANIC;
+
+	cmn_err(CE_ALERT,
+		"%s%sError on XIO Bus %s port %d",
+		(error_code & IOECODE_DMA) ? "DMA " : "",
+		(error_code & IOECODE_PIO) ? "PIO " : "",
+		soft->name, port);
+
+	if ((error_code & IOECODE_PIO) &&
+	    (IOERROR_FIELDVALID(ioerror, xtalkaddr))) {
+	    cmn_err(CE_CONT,
+		    "\tAccess attempted to offset 0x%X\n",
+		    IOERROR_GETVALUE(ioerror, xtalkaddr));
+	}
+    }
+
+#if !DEBUG
+    if (kdebug) {
+#endif
+	XEM_ADD_STR("Raw status values for Crossbow:\n");
+	XEM_ADD_VAR(wid_stat);
+	XEM_ADD_VAR(wid_err_cmdword);
+	XEM_ADD_VAR(wid_err_upper);
+	XEM_ADD_VAR(wid_err_lower);
+	XEM_ADD_VAR(wid_err_addr);
+	XEM_ADD_VAR(port);
+	XEM_ADD_VAR(link_control);
+	XEM_ADD_VAR(link_status);
+	XEM_ADD_VAR(link_aux_status);
+#if !DEBUG
+    }
+#endif
+    /* caller will dump raw ioerror data
+     * in DEBUG and kdebug kernels.
+     */
+
+    return retval;
+#endif /* IRIX */
+}
+
+#endif	/* LATER */
+
+void
+xbow_update_perf_counters(devfs_handle_t vhdl)
+{
+    xbow_soft_t             xbow_soft = xbow_soft_get(vhdl);
+    xbow_perf_t            *xbow_perf = xbow_soft->xbow_perfcnt;
+    xbow_perf_link_t       *xbow_plink = xbow_soft->xbow_perflink;
+    xbow_perfcount_t        perf_reg;
+    int                     link, s, i;
+
+    for (i = 0; i < XBOW_PERF_COUNTERS; i++, xbow_perf++) {
+	if (xbow_perf->xp_mode == XBOW_MONITOR_NONE)
+	    continue;
+
+	s = mutex_spinlock(&xbow_soft->xbow_perf_lock);
+
+	perf_reg.xb_counter_val = *(xbowreg_t *) xbow_perf->xp_perf_reg;
+
+	link = perf_reg.xb_perf.link_select;
+
+	(xbow_plink + link)->xlp_cumulative[xbow_perf->xp_curmode] +=
+	    ((perf_reg.xb_perf.count - xbow_perf->xp_current) & XBOW_COUNTER_MASK);
+	xbow_perf->xp_current = perf_reg.xb_perf.count;
+
+	mutex_spinunlock(&xbow_soft->xbow_perf_lock, s);
+    }
+    /* Do port /mode multiplexing here */
+
+#ifdef IRIX
+    (void) timeout(xbow_update_perf_counters,
+		   (void *) (__psunsigned_t) vhdl, XBOW_PERF_TIMEOUT);
+#endif
+
+}
+
+xbow_perf_link_t       *
+xbow_get_perf_counters(devfs_handle_t vhdl)
+{
+    xbow_soft_t             xbow_soft = xbow_soft_get(vhdl);
+    xbow_perf_link_t       *xbow_perf_link = xbow_soft->xbow_perflink;
+
+    return xbow_perf_link;
+}
+
+int
+xbow_enable_perf_counter(devfs_handle_t vhdl, int link, int mode, int counter)
+{
+    xbow_soft_t             xbow_soft = xbow_soft_get(vhdl);
+    xbow_perf_t            *xbow_perf = xbow_soft->xbow_perfcnt;
+    xbow_linkctrl_t         xbow_link_ctrl;
+    xbow_t                 *xbow = xbow_soft->base;
+    xbow_perfcount_t        perf_reg;
+    int                     s, i;
+
+    link -= BASE_XBOW_PORT;
+    if ((link < 0) || (link >= MAX_XBOW_PORTS))
+	return -1;
+
+    if ((mode < XBOW_MONITOR_NONE) || (mode > XBOW_MONITOR_DEST_LINK))
+	return -1;
+
+    if ((counter < 0) || (counter >= XBOW_PERF_COUNTERS))
+	return -1;
+
+    s = mutex_spinlock(&xbow_soft->xbow_perf_lock);
+
+    if ((xbow_perf + counter)->xp_mode && mode) {
+	mutex_spinunlock(&xbow_soft->xbow_perf_lock, s);
+	return -1;
+    }
+    for (i = 0; i < XBOW_PERF_COUNTERS; i++) {
+	if (i == counter)
+	    continue;
+	if (((xbow_perf + i)->xp_link == link) &&
+	    ((xbow_perf + i)->xp_mode)) {
+	    mutex_spinunlock(&xbow_soft->xbow_perf_lock, s);
+	    return -1;
+	}
+    }
+    xbow_perf += counter;
+
+    xbow_perf->xp_curlink = xbow_perf->xp_link = link;
+    xbow_perf->xp_curmode = xbow_perf->xp_mode = mode;
+
+    xbow_link_ctrl.xbl_ctrlword = xbow->xb_link_raw[link].link_control;
+    xbow_link_ctrl.xb_linkcontrol.perf_mode = mode;
+    xbow->xb_link_raw[link].link_control = xbow_link_ctrl.xbl_ctrlword;
+
+    perf_reg.xb_counter_val = *(xbowreg_t *) xbow_perf->xp_perf_reg;
+    perf_reg.xb_perf.link_select = link;
+    *(xbowreg_t *) xbow_perf->xp_perf_reg = perf_reg.xb_counter_val;
+    xbow_perf->xp_current = perf_reg.xb_perf.count;
+
+#ifdef IRIX
+    (void) timeout(xbow_update_perf_counters,
+		   (void *) (__psunsigned_t) vhdl, XBOW_PERF_TIMEOUT);
+#endif
+
+    mutex_spinunlock(&xbow_soft->xbow_perf_lock, s);
+
+    return 0;
+}
+
+xbow_link_status_t     *
+xbow_get_llp_status(devfs_handle_t vhdl)
+{
+    xbow_soft_t             xbow_soft = xbow_soft_get(vhdl);
+    xbow_link_status_t     *xbow_llp_status = xbow_soft->xbow_link_status;
+
+    return xbow_llp_status;
+}
+
+void
+xbow_update_llp_status(devfs_handle_t vhdl)
+{
+    xbow_soft_t             xbow_soft = xbow_soft_get(vhdl);
+    xbow_link_status_t     *xbow_llp_status = xbow_soft->xbow_link_status;
+    xbow_t                 *xbow;
+    xbwX_stat_t             lnk_sts;
+    xbow_aux_link_status_t  aux_sts;
+    int                     link;
+    devfs_handle_t	    xwidget_vhdl;
+    char		   *xwidget_name;	
+
+    xbow = (xbow_t *) xbow_soft->base;
+    for (link = 0; link < MAX_XBOW_PORTS; link++, xbow_llp_status++) {
+	/* Get the widget name corresponding the current link.
+	 * Note : 0 <= link < MAX_XBOW_PORTS(8).
+	 * 	  BASE_XBOW_PORT(0x8) <= xwidget number < MAX_PORT_NUM (0x10)
+	 */
+	xwidget_vhdl = xbow_widget_lookup(xbow_soft->busv,link+BASE_XBOW_PORT);
+	xwidget_name = xwidget_name_get(xwidget_vhdl);
+	aux_sts.aux_linkstatus
+	    = xbow->xb_link_raw[link].link_aux_status;
+	lnk_sts.linkstatus = xbow->xb_link_raw[link].link_status_clr;
+
+	if (lnk_sts.link_alive == 0)
+	    continue;
+
+	xbow_llp_status->rx_err_count +=
+	    aux_sts.xb_aux_linkstatus.rx_err_cnt;
+
+	xbow_llp_status->tx_retry_count +=
+	    aux_sts.xb_aux_linkstatus.tx_retry_cnt;
+
+	if (lnk_sts.linkstatus & ~(XB_STAT_RCV_ERR | XB_STAT_XMT_RTRY_ERR | XB_STAT_LINKALIVE)) {
+#ifdef IRIX
+	    cmn_err(CE_WARN, "link %d[%s]: bad status 0x%x\n",
+		    link, xwidget_name, lnk_sts.linkstatus);
+#endif
+	}
+    }
+#ifdef IRIX
+    if (xbow_soft->link_monitor)
+	(void) timeout(xbow_update_llp_status,
+		       (void *) (__psunsigned_t) vhdl, XBOW_STATS_TIMEOUT);
+#endif
+}
+
+int
+xbow_disable_llp_monitor(devfs_handle_t vhdl)
+{
+    xbow_soft_t             xbow_soft = xbow_soft_get(vhdl);
+    int                     port;
+
+    for (port = 0; port < MAX_XBOW_PORTS; port++) {
+	xbow_soft->xbow_link_status[port].rx_err_count = 0;
+	xbow_soft->xbow_link_status[port].tx_retry_count = 0;
+    }
+
+    xbow_soft->link_monitor = 0;
+    return 0;
+}
+
+int
+xbow_enable_llp_monitor(devfs_handle_t vhdl)
+{
+    xbow_soft_t             xbow_soft = xbow_soft_get(vhdl);
+
+#ifdef IRIX
+    (void) timeout(xbow_update_llp_status,
+		   (void *) (__psunsigned_t) vhdl, XBOW_STATS_TIMEOUT);
+#endif
+    xbow_soft->link_monitor = 1;
+    return 0;
+}
+
+
+int
+xbow_reset_link(devfs_handle_t xconn_vhdl)
+{
+    xwidget_info_t          widget_info;
+    xwidgetnum_t            port;
+    xbow_t                 *xbow;
+    xbowreg_t               ctrl;
+    xbwX_stat_t             stat;
+    unsigned                itick;
+    unsigned                dtick;
+    static int              ticks_per_ms = 0;
+
+    if (!ticks_per_ms) {
+	itick = get_timestamp();
+	us_delay(1000);
+	ticks_per_ms = get_timestamp() - itick;
+    }
+    widget_info = xwidget_info_get(xconn_vhdl);
+    port = xwidget_info_id_get(widget_info);
+
+#ifdef XBOW_K1PTR			/* defined if we only have one xbow ... */
+    xbow = XBOW_K1PTR;
+#else
+    {
+	devfs_handle_t            xbow_vhdl;
+	xbow_soft_t             xbow_soft;
+
+	hwgraph_traverse(xconn_vhdl, ".master/xtalk/0/xbow", &xbow_vhdl);
+	xbow_soft = xbow_soft_get(xbow_vhdl);
+	xbow = xbow_soft->base;
+    }
+#endif
+
+    /*
+     * This requires three PIOs (reset the link, check for the
+     * reset, restore the control register for the link) plus
+     * 10us to wait for the reset. We allow up to 1ms for the
+     * widget to come out of reset before giving up and
+     * returning a failure.
+     */
+    ctrl = xbow->xb_link(port).link_control;
+    xbow->xb_link(port).link_reset = 0;
+    itick = get_timestamp();
+    while (1) {
+	stat.linkstatus = xbow->xb_link(port).link_status;
+	if (stat.link_alive)
+	    break;
+	dtick = get_timestamp() - itick;
+	if (dtick > ticks_per_ms) {
+	    return -1;			/* never came out of reset */
+	}
+	DELAY(2);			/* don't beat on link_status */
+    }
+    xbow->xb_link(port).link_control = ctrl;
+    return 0;
+}
+
+/*
+ * Dump xbow registers.
+ * input parameter is either a pointer to
+ * the xbow chip or the vertex handle for
+ * an xbow vertex.
+ */
+void
+idbg_xbowregs(int64_t regs)
+{
+    xbow_t                 *xbow;
+    int                     i;
+    xb_linkregs_t          *link;
+
+#ifdef IRIX
+    if (dev_is_vertex((devfs_handle_t) regs)) {
+	devfs_handle_t            vhdl = (devfs_handle_t) regs;
+	xbow_soft_t             soft = xbow_soft_get(vhdl);
+
+	xbow = soft->base;
+    } else
+#endif
+    {
+	xbow = (xbow_t *) regs;
+    }
+
+#ifdef IRIX
+    qprintf("Printing xbow registers starting at 0x%x\n", xbow);
+    qprintf("wid %x status %x erruppr %x errlower %x control %x timeout %x\n",
+	    xbow->xb_wid_id, xbow->xb_wid_stat, xbow->xb_wid_err_upper,
+	    xbow->xb_wid_err_lower, xbow->xb_wid_control,
+	    xbow->xb_wid_req_timeout);
+    qprintf("intr uppr %x lower %x errcmd %x llp ctrl %x arb_reload %x\n",
+	    xbow->xb_wid_int_upper, xbow->xb_wid_int_lower,
+	    xbow->xb_wid_err_cmdword, xbow->xb_wid_llp,
+	    xbow->xb_wid_arb_reload);
+#endif
+
+    for (i = 8; i <= 0xf; i++) {
+	link = &xbow->xb_link(i);
+#ifdef IRIX
+	qprintf("Link %d registers\n", i);
+	qprintf("\tctrl %x stat %x arbuppr %x arblowr %x auxstat %x\n",
+		link->link_control, link->link_status,
+		link->link_arb_upper, link->link_arb_lower,
+		link->link_aux_status);
+#endif
+    }
+}
+
+
+#define XBOW_ARB_RELOAD_TICKS		25
+					/* granularity: 4 MB/s, max: 124 MB/s */
+#define GRANULARITY			((100 * 1000000) / XBOW_ARB_RELOAD_TICKS)
+
+#define XBOW_BYTES_TO_GBR(BYTES_per_s)	(int) (BYTES_per_s / GRANULARITY)
+
+#define XBOW_GBR_TO_BYTES(cnt)		(bandwidth_t) ((cnt) * GRANULARITY)
+
+#define CEILING_BYTES_TO_GBR(gbr, bytes_per_sec)	\
+			((XBOW_GBR_TO_BYTES(gbr) < bytes_per_sec) ? gbr+1 : gbr)
+
+#define XBOW_ARB_GBR_MAX		31
+
+#define ABS(x)				((x > 0) ? (x) : (-1 * x))
+					/* absolute value */
+
+int
+xbow_bytes_to_gbr(bandwidth_t old_bytes_per_sec, bandwidth_t bytes_per_sec)
+{
+    int                     gbr_granted;
+    int                     new_total_gbr;
+    int                     change_gbr;
+    bandwidth_t             new_total_bw;
+
+#ifdef GRIO_DEBUG
+    printf("xbow_bytes_to_gbr: old_bytes_per_sec %lld bytes_per_sec %lld\n",
+		old_bytes_per_sec, bytes_per_sec);
+#endif	/* GRIO_DEBUG */
+
+    gbr_granted = CEILING_BYTES_TO_GBR((XBOW_BYTES_TO_GBR(old_bytes_per_sec)),
+			old_bytes_per_sec);
+    new_total_bw = old_bytes_per_sec + bytes_per_sec;
+    new_total_gbr = CEILING_BYTES_TO_GBR((XBOW_BYTES_TO_GBR(new_total_bw)),
+			new_total_bw);
+
+    change_gbr = new_total_gbr - gbr_granted;
+
+#ifdef GRIO_DEBUG
+    printf("xbow_bytes_to_gbr: gbr_granted %d new_total_gbr %d change_gbr %d\n",
+		gbr_granted, new_total_gbr, change_gbr);
+#endif	/* GRIO_DEBUG */
+
+    return (change_gbr);
+}
+
+/* Conversion from GBR to bytes */
+bandwidth_t
+xbow_gbr_to_bytes(int gbr)
+{
+    return (XBOW_GBR_TO_BYTES(gbr));
+}
+
+/* Given the vhdl for the desired xbow, the src and dest. widget ids
+ * and the req_bw value, this xbow driver entry point accesses the
+ * xbow registers and allocates the desired bandwidth if available.
+ *
+ * If bandwidth allocation is successful, return success else return failure.
+ */
+int
+xbow_prio_bw_alloc(devfs_handle_t vhdl,
+		xwidgetnum_t src_wid,
+		xwidgetnum_t dest_wid,
+		unsigned long long old_alloc_bw,
+		unsigned long long req_bw)
+{
+    xbow_soft_t             soft = xbow_soft_get(vhdl);
+    volatile xbowreg_t     *xreg;
+    xbowreg_t               mask;
+    int                     s;
+    int                     error = 0;
+    bandwidth_t             old_bw_BYTES, req_bw_BYTES;
+    xbowreg_t               old_xreg;
+    int                     old_bw_GBR, req_bw_GBR, new_bw_GBR;
+
+#ifdef GRIO_DEBUG
+    printf("xbow_prio_bw_alloc: vhdl %d src_wid %d dest_wid %d req_bw %lld\n",
+		(int) vhdl, (int) src_wid, (int) dest_wid, req_bw);
+#endif
+
+    ASSERT(XBOW_WIDGET_IS_VALID(src_wid));
+    ASSERT(XBOW_WIDGET_IS_VALID(dest_wid));
+
+    s = mutex_spinlock(&soft->xbow_bw_alloc_lock);
+
+    /* Get pointer to the correct register */
+    xreg = XBOW_PRIO_ARBREG_PTR(soft->base, dest_wid, src_wid);
+
+    /* Get mask for GBR count value */
+    mask = XB_ARB_GBR_MSK << XB_ARB_GBR_SHFT(src_wid);
+
+    req_bw_GBR = xbow_bytes_to_gbr(old_alloc_bw, req_bw);
+    req_bw_BYTES = (req_bw_GBR < 0) ? (-1 * xbow_gbr_to_bytes(ABS(req_bw_GBR)))
+		: xbow_gbr_to_bytes(req_bw_GBR);
+
+#ifdef GRIO_DEBUG
+    printf("req_bw %lld req_bw_BYTES %lld req_bw_GBR %d\n",
+		req_bw, req_bw_BYTES, req_bw_GBR);
+#endif	/* GRIO_DEBUG */
+
+    old_bw_BYTES = soft->bw_cur_used[(int) dest_wid - MAX_XBOW_PORTS];
+    old_xreg = *xreg;
+    old_bw_GBR = (((*xreg) & mask) >> XB_ARB_GBR_SHFT(src_wid));
+
+#ifdef GRIO_DEBUG
+    ASSERT(XBOW_BYTES_TO_GBR(old_bw_BYTES) == old_bw_GBR);
+
+    printf("old_bw_BYTES %lld old_bw_GBR %d\n", old_bw_BYTES, old_bw_GBR);
+
+    printf("req_bw_BYTES %lld old_bw_BYTES %lld soft->bw_hiwm %lld\n",
+		req_bw_BYTES, old_bw_BYTES,
+		soft->bw_hiwm[(int) dest_wid - MAX_XBOW_PORTS]);
+	   
+#endif				/* GRIO_DEBUG */
+
+    /* Accept the request only if we don't exceed the destination
+     * port HIWATER_MARK *AND* the max. link GBR arbitration count
+     */
+    if (((old_bw_BYTES + req_bw_BYTES) <=
+		soft->bw_hiwm[(int) dest_wid - MAX_XBOW_PORTS]) &&
+		(req_bw_GBR + old_bw_GBR <= XBOW_ARB_GBR_MAX)) {
+
+	new_bw_GBR = (old_bw_GBR + req_bw_GBR);
+
+	/* Set this in the xbow link register */
+	*xreg = (old_xreg & ~mask) | \
+	    (new_bw_GBR << XB_ARB_GBR_SHFT(src_wid) & mask);
+
+	soft->bw_cur_used[(int) dest_wid - MAX_XBOW_PORTS] =
+			xbow_gbr_to_bytes(new_bw_GBR);
+    } else {
+	error = 1;
+    }
+
+    mutex_spinunlock(&soft->xbow_bw_alloc_lock, s);
+
+    return (error);
+}
diff --git a/arch/ia64/sn/io/xswitch.c b/arch/ia64/sn/io/xswitch.c
new file mode 100644
index 000000000..7255650da
--- /dev/null
+++ b/arch/ia64/sn/io/xswitch.c
@@ -0,0 +1,267 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iobus.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/xtalk/xtalk.h>
+#include <asm/sn/xtalk/xswitch.h>
+#include <asm/sn/xtalk/xwidget.h>
+#include <asm/sn/xtalk/xtalk_private.h>
+
+#define	NEW(ptr)	(ptr = kmalloc(sizeof (*(ptr)), GFP_KERNEL))
+#define	DEL(ptr)	(kfree(ptr))
+
+int                     xswitch_devflag = D_MP;
+
+/*
+ * This file provides generic support for Crosstalk
+ * Switches, in a way that insulates crosstalk providers
+ * from specifics about the switch chips being used.
+ */
+
+#include <asm/sn/xtalk/xbow.h>
+#define DEV_FUNC(dev,func)      xbow_##func
+
+#if !defined(DEV_FUNC)
+/*
+ * There is more than one possible provider
+ * for this platform. We need to examine the
+ * master vertex of the current vertex for
+ * a provider function structure, and indirect
+ * through the appropriately named member.
+ */
+#define	DEV_FUNC(dev,func)	xwidget_to_provider_fns(dev)->func
+
+static xswitch_provider_t *
+xwidget_to_provider_fns(devfs_handle_t xconn)
+{
+    devfs_handle_t            busv;
+    xswitch_info_t          xswitch_info;
+    xswitch_provider_t      provider_fns;
+
+    busv = hwgraph_connectpt_get(xconn_vhdl);
+    ASSERT(busv != GRAPH_VERTEX_NONE);
+
+    xswitch_info = xswitch_info_get(busv);
+    ASSERT(xswitch_info != NULL);
+
+    provider_fns = xswitch_info->xswitch_fns;
+    ASSERT(provider_fns != NULL);
+
+    return provider_fns;
+}
+#endif
+
+#define	XSWITCH_CENSUS_BIT(port)		(1<<(port))
+#define	XSWITCH_CENSUS_PORT_MIN			(0x0)
+#define	XSWITCH_CENSUS_PORT_MAX			(0xF)
+#define	XSWITCH_CENSUS_PORTS			(0x10)
+#define	XSWITCH_WIDGET_PRESENT(infop,port)	((infop)->census & XSWITCH_CENSUS_BIT(port))
+
+static char             xswitch_info_fingerprint[] = "xswitch_info";
+
+struct xswitch_info_s {
+    char                   *fingerprint;
+    unsigned                census;
+    devfs_handle_t            vhdl[XSWITCH_CENSUS_PORTS];
+    devfs_handle_t            master_vhdl[XSWITCH_CENSUS_PORTS];
+    xswitch_provider_t     *xswitch_fns;
+};
+
+xswitch_info_t
+xswitch_info_get(devfs_handle_t xwidget)
+{
+    xswitch_info_t          xswitch_info;
+
+    xswitch_info = (xswitch_info_t)
+	hwgraph_fastinfo_get(xwidget);
+#ifdef IRIX
+    if ((xswitch_info != NULL) &&
+	(xswitch_info->fingerprint != xswitch_info_fingerprint))
+	cmn_err(CE_PANIC, "%v xswitch_info_get bad fingerprint", xwidget);
+#endif
+
+    printk("xswitch_info_get: xwidget 0x%p xswitch_info 0x%p\n", xwidget, xswitch_info);
+
+    return (xswitch_info);
+}
+
+void
+xswitch_info_vhdl_set(xswitch_info_t xswitch_info,
+		      xwidgetnum_t port,
+		      devfs_handle_t xwidget)
+{
+#if XSWITCH_CENSUS_PORT_MIN
+    if (port < XSWITCH_CENSUS_PORT_MIN)
+	return;
+#endif
+    if (port > XSWITCH_CENSUS_PORT_MAX)
+	return;
+
+    xswitch_info->vhdl[port - XSWITCH_CENSUS_PORT_MIN] = xwidget;
+}
+
+devfs_handle_t
+xswitch_info_vhdl_get(xswitch_info_t xswitch_info,
+		      xwidgetnum_t port)
+{
+#ifdef IRIX
+    if (xswitch_info == NULL)
+	cmn_err(CE_PANIC, "xswitch_info_vhdl_get: null xswitch_info");
+#endif
+
+#if XSWITCH_CENSUS_PORT_MIN
+    if (port < XSWITCH_CENSUS_PORT_MIN)
+	return GRAPH_VERTEX_NONE;
+#endif
+    if (port > XSWITCH_CENSUS_PORT_MAX)
+	return GRAPH_VERTEX_NONE;
+
+    return xswitch_info->vhdl[port - XSWITCH_CENSUS_PORT_MIN];
+}
+
+/*
+ * Some systems may allow for multiple switch masters.  On such systems,
+ * we assign a master for each port on the switch.  These interfaces
+ * establish and retrieve that assignment.
+ */
+void
+xswitch_info_master_assignment_set(xswitch_info_t xswitch_info,
+				   xwidgetnum_t port,
+				   devfs_handle_t master_vhdl)
+{
+#if XSWITCH_CENSUS_PORT_MIN
+    if (port < XSWITCH_CENSUS_PORT_MIN)
+	return;
+#endif
+    if (port > XSWITCH_CENSUS_PORT_MAX)
+	return;
+
+    xswitch_info->master_vhdl[port - XSWITCH_CENSUS_PORT_MIN] = master_vhdl;
+}
+
+devfs_handle_t
+xswitch_info_master_assignment_get(xswitch_info_t xswitch_info,
+				   xwidgetnum_t port)
+{
+#if XSWITCH_CENSUS_PORT_MIN
+    if (port < XSWITCH_CENSUS_PORT_MIN)
+	return GRAPH_VERTEX_NONE;
+#endif
+    if (port > XSWITCH_CENSUS_PORT_MAX)
+	return GRAPH_VERTEX_NONE;
+
+    return xswitch_info->master_vhdl[port - XSWITCH_CENSUS_PORT_MIN];
+}
+
+void
+xswitch_info_set(devfs_handle_t xwidget, xswitch_info_t xswitch_info)
+{
+    xswitch_info->fingerprint = xswitch_info_fingerprint;
+    hwgraph_fastinfo_set(xwidget, (arbitrary_info_t) xswitch_info);
+}
+
+xswitch_info_t
+xswitch_info_new(devfs_handle_t xwidget)
+{
+    xswitch_info_t          xswitch_info;
+
+    xswitch_info = xswitch_info_get(xwidget);
+    if (xswitch_info == NULL) {
+	int                     port;
+
+	NEW(xswitch_info);
+	xswitch_info->census = 0;
+	for (port = XSWITCH_CENSUS_PORT_MIN;
+	     port <= XSWITCH_CENSUS_PORT_MAX;
+	     port++) {
+	    xswitch_info_vhdl_set(xswitch_info, port,
+				  GRAPH_VERTEX_NONE);
+
+	    xswitch_info_master_assignment_set(xswitch_info,
+					       port,
+					       GRAPH_VERTEX_NONE);
+	}
+	xswitch_info_set(xwidget, xswitch_info);
+	printk("xswitch_info_new: xswitch_info_set xwidget 0x%p, xswitch_info 0x%p\n",
+		xwidget, xswitch_info);
+    }
+    return xswitch_info;
+}
+
+void
+xswitch_provider_register(devfs_handle_t busv,
+			  xswitch_provider_t * xswitch_fns)
+{
+    xswitch_info_t          xswitch_info = xswitch_info_get(busv);
+
+    ASSERT(xswitch_info);
+    xswitch_info->xswitch_fns = xswitch_fns;
+}
+
+void
+xswitch_info_link_is_ok(xswitch_info_t xswitch_info, xwidgetnum_t port)
+{
+    xswitch_info->census |= XSWITCH_CENSUS_BIT(port);
+}
+
+int
+xswitch_info_link_ok(xswitch_info_t xswitch_info, xwidgetnum_t port)
+{
+#if XSWITCH_CENSUS_PORT_MIN
+    if (port < XSWITCH_CENSUS_PORT_MIN)
+	return 0;
+#endif
+
+    if (port > XSWITCH_CENSUS_PORT_MAX)
+	return 0;
+
+    return (xswitch_info->census & XSWITCH_CENSUS_BIT(port));
+}
+
+int
+xswitch_reset_link(devfs_handle_t xconn_vhdl)
+{
+    return DEV_FUNC(xconn_vhdl, reset_link)
+	(xconn_vhdl);
+}
+
+/* Given a vertex handle to the xswitch get its logical
+ * id.
+ */
+int
+xswitch_id_get(devfs_handle_t	xconn_vhdl)
+{
+    arbitrary_info_t 	xbow_num;
+    graph_error_t	rv;
+
+    rv = hwgraph_info_get_LBL(xconn_vhdl,INFO_LBL_XSWITCH_ID,&xbow_num);
+    ASSERT(rv == GRAPH_SUCCESS);
+    return(xbow_num);
+}
+
+/* Given a vertex handle to the xswitch set its logical
+ * id.
+ */
+void
+xswitch_id_set(devfs_handle_t	xconn_vhdl,int xbow_num)
+{
+    graph_error_t	rv;
+
+    rv = hwgraph_info_add_LBL(xconn_vhdl,INFO_LBL_XSWITCH_ID,
+			      (arbitrary_info_t)xbow_num);
+    ASSERT(rv == GRAPH_SUCCESS);
+}
diff --git a/arch/ia64/sn/io/xtalk.c b/arch/ia64/sn/io/xtalk.c
new file mode 100644
index 000000000..22810d54c
--- /dev/null
+++ b/arch/ia64/sn/io/xtalk.c
@@ -0,0 +1,1137 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#include <linux/types.h>
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iobus.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/labelcl.h>
+#include <asm/sn/hcl_util.h>
+
+#include <asm/sn/xtalk/xtalk.h>
+#include <asm/sn/xtalk/xswitch.h>
+#include <asm/sn/xtalk/xwidget.h>
+
+#include <asm/sn/xtalk/xtalk_private.h>
+
+/*
+ * Implement crosstalk provider operations.  The xtalk* layer provides a
+ * platform-independent interface for crosstalk devices.  This layer
+ * switches among the possible implementations of a crosstalk adapter.
+ *
+ * On platforms with only one possible xtalk provider, macros can be
+ * set up at the top that cause the table lookups and indirections to
+ * completely disappear.
+ */
+
+#define	NEW(ptr)	(ptr = kmalloc(sizeof (*(ptr)), GFP_KERNEL))
+#define	DEL(ptr)	(kfree(ptr))
+
+char                    widget_info_fingerprint[] = "widget_info";
+
+cdl_p                   xtalk_registry = NULL;
+
+#include <asm/sn/agent.h>
+#define	DEV_FUNC(dev,func)	hub_##func
+#define	CAST_PIOMAP(x)		((hub_piomap_t)(x))
+#define	CAST_DMAMAP(x)		((hub_dmamap_t)(x))
+#define	CAST_INTR(x)		((hub_intr_t)(x))
+
+/* =====================================================================
+ *            Function Table of Contents
+ */
+xtalk_piomap_t          xtalk_piomap_alloc(devfs_handle_t, device_desc_t, iopaddr_t, size_t, size_t, unsigned);
+void                    xtalk_piomap_free(xtalk_piomap_t);
+caddr_t                 xtalk_piomap_addr(xtalk_piomap_t, iopaddr_t, size_t);
+void                    xtalk_piomap_done(xtalk_piomap_t);
+caddr_t                 xtalk_piotrans_addr(devfs_handle_t, device_desc_t, iopaddr_t, size_t, unsigned);
+caddr_t                 xtalk_pio_addr(devfs_handle_t, device_desc_t, iopaddr_t, size_t, xtalk_piomap_t *, unsigned);
+void                    xtalk_set_early_piotrans_addr(xtalk_early_piotrans_addr_f *);
+caddr_t                 xtalk_early_piotrans_addr(xwidget_part_num_t, xwidget_mfg_num_t, int, iopaddr_t, size_t, unsigned);
+static caddr_t          null_xtalk_early_piotrans_addr(xwidget_part_num_t, xwidget_mfg_num_t, int, iopaddr_t, size_t, unsigned);
+xtalk_dmamap_t          xtalk_dmamap_alloc(devfs_handle_t, device_desc_t, size_t, unsigned);
+void                    xtalk_dmamap_free(xtalk_dmamap_t);
+iopaddr_t               xtalk_dmamap_addr(xtalk_dmamap_t, paddr_t, size_t);
+alenlist_t              xtalk_dmamap_list(xtalk_dmamap_t, alenlist_t, unsigned);
+void                    xtalk_dmamap_done(xtalk_dmamap_t);
+iopaddr_t               xtalk_dmatrans_addr(devfs_handle_t, device_desc_t, paddr_t, size_t, unsigned);
+alenlist_t              xtalk_dmatrans_list(devfs_handle_t, device_desc_t, alenlist_t, unsigned);
+void			xtalk_dmamap_drain(xtalk_dmamap_t);
+void			xtalk_dmaaddr_drain(devfs_handle_t, iopaddr_t, size_t);
+void			xtalk_dmalist_drain(devfs_handle_t, alenlist_t);
+xtalk_intr_t            xtalk_intr_alloc(devfs_handle_t, device_desc_t, devfs_handle_t);
+void                    xtalk_intr_free(xtalk_intr_t);
+int                     xtalk_intr_connect(xtalk_intr_t, intr_func_t, intr_arg_t, xtalk_intr_setfunc_t, void *, void *);
+void                    xtalk_intr_disconnect(xtalk_intr_t);
+devfs_handle_t            xtalk_intr_cpu_get(xtalk_intr_t);
+int                     xtalk_error_handler(devfs_handle_t, int, ioerror_mode_t, ioerror_t *);
+int                     xtalk_error_devenable(devfs_handle_t, int, int);
+void                    xtalk_provider_startup(devfs_handle_t);
+void                    xtalk_provider_shutdown(devfs_handle_t);
+devfs_handle_t            xtalk_intr_dev_get(xtalk_intr_t);
+xwidgetnum_t            xtalk_intr_target_get(xtalk_intr_t);
+xtalk_intr_vector_t     xtalk_intr_vector_get(xtalk_intr_t);
+iopaddr_t               xtalk_intr_addr_get(struct xtalk_intr_s *);
+void                   *xtalk_intr_sfarg_get(xtalk_intr_t);
+devfs_handle_t            xtalk_pio_dev_get(xtalk_piomap_t);
+xwidgetnum_t            xtalk_pio_target_get(xtalk_piomap_t);
+iopaddr_t               xtalk_pio_xtalk_addr_get(xtalk_piomap_t);
+ulong                   xtalk_pio_mapsz_get(xtalk_piomap_t);
+caddr_t                 xtalk_pio_kvaddr_get(xtalk_piomap_t);
+devfs_handle_t            xtalk_dma_dev_get(xtalk_dmamap_t);
+xwidgetnum_t            xtalk_dma_target_get(xtalk_dmamap_t);
+xwidget_info_t          xwidget_info_chk(devfs_handle_t);
+xwidget_info_t          xwidget_info_get(devfs_handle_t);
+void                    xwidget_info_set(devfs_handle_t, xwidget_info_t);
+devfs_handle_t            xwidget_info_dev_get(xwidget_info_t);
+xwidgetnum_t            xwidget_info_id_get(xwidget_info_t);
+devfs_handle_t            xwidget_info_master_get(xwidget_info_t);
+xwidgetnum_t            xwidget_info_masterid_get(xwidget_info_t);
+xwidget_part_num_t      xwidget_info_part_num_get(xwidget_info_t);
+xwidget_mfg_num_t       xwidget_info_mfg_num_get(xwidget_info_t);
+char 			*xwidget_info_name_get(xwidget_info_t);
+void                    xtalk_init(void);
+void                    xtalk_provider_register(devfs_handle_t, xtalk_provider_t *);
+void                    xtalk_provider_unregister(devfs_handle_t);
+xtalk_provider_t       *xtalk_provider_fns_get(devfs_handle_t);
+int                     xwidget_driver_register(xwidget_part_num_t, 
+						xwidget_mfg_num_t, 
+						char *, unsigned);
+void                    xwidget_driver_unregister(char *);
+int                     xwidget_register(xwidget_hwid_t, devfs_handle_t, 
+					 xwidgetnum_t, devfs_handle_t, 
+					 xwidgetnum_t, async_attach_t);
+int			xwidget_unregister(devfs_handle_t);
+void                    xwidget_error_register(devfs_handle_t, error_handler_f *,
+					       error_handler_arg_t);
+void                    xwidget_reset(devfs_handle_t);
+char			*xwidget_name_get(devfs_handle_t);
+#if !defined(DEV_FUNC)
+/*
+ * There is more than one possible provider
+ * for this platform. We need to examine the
+ * master vertex of the current vertex for
+ * a provider function structure, and indirect
+ * through the appropriately named member.
+ */
+#define	DEV_FUNC(dev,func)	xwidget_to_provider_fns(dev)->func
+#define	CAST_PIOMAP(x)		((xtalk_piomap_t)(x))
+#define	CAST_DMAMAP(x)		((xtalk_dmamap_t)(x))
+#define	CAST_INTR(x)		((xtalk_intr_t)(x))
+
+static xtalk_provider_t *
+xwidget_to_provider_fns(devfs_handle_t xconn)
+{
+    xwidget_info_t          widget_info;
+    xtalk_provider_t       *provider_fns;
+
+    widget_info = xwidget_info_get(xconn);
+    ASSERT(widget_info != NULL);
+
+    provider_fns = xwidget_info_pops_get(widget_info);
+    ASSERT(provider_fns != NULL);
+
+    return (provider_fns);
+}
+#endif
+
+/*
+ * Many functions are not passed their vertex
+ * information directly; rather, they must
+ * dive through a resource map. These macros
+ * are available to coordinate this detail.
+ */
+#define	PIOMAP_FUNC(map,func)	DEV_FUNC(map->xp_dev,func)
+#define	DMAMAP_FUNC(map,func)	DEV_FUNC(map->xd_dev,func)
+#define	INTR_FUNC(intr,func)	DEV_FUNC(intr_hdl->xi_dev,func)
+
+/* =====================================================================
+ *                    PIO MANAGEMENT
+ *
+ *      For mapping system virtual address space to
+ *      xtalk space on a specified widget
+ */
+
+xtalk_piomap_t
+xtalk_piomap_alloc(devfs_handle_t dev,	/* set up mapping for this device */
+		   device_desc_t dev_desc,	/* device descriptor */
+		   iopaddr_t xtalk_addr,	/* map for this xtalk_addr range */
+		   size_t byte_count,
+		   size_t byte_count_max,	/* maximum size of a mapping */
+		   unsigned flags)
+{				/* defined in sys/pio.h */
+    return (xtalk_piomap_t) DEV_FUNC(dev, piomap_alloc)
+	(dev, dev_desc, xtalk_addr, byte_count, byte_count_max, flags);
+}
+
+
+void
+xtalk_piomap_free(xtalk_piomap_t xtalk_piomap)
+{
+    PIOMAP_FUNC(xtalk_piomap, piomap_free)
+	(CAST_PIOMAP(xtalk_piomap));
+}
+
+
+caddr_t
+xtalk_piomap_addr(xtalk_piomap_t xtalk_piomap,	/* mapping resources */
+		  iopaddr_t xtalk_addr,		/* map for this xtalk address */
+		  size_t byte_count)
+{				/* map this many bytes */
+    return PIOMAP_FUNC(xtalk_piomap, piomap_addr)
+	(CAST_PIOMAP(xtalk_piomap), xtalk_addr, byte_count);
+}
+
+
+void
+xtalk_piomap_done(xtalk_piomap_t xtalk_piomap)
+{
+    PIOMAP_FUNC(xtalk_piomap, piomap_done)
+	(CAST_PIOMAP(xtalk_piomap));
+}
+
+
+caddr_t
+xtalk_piotrans_addr(devfs_handle_t dev,	/* translate for this device */
+		    device_desc_t dev_desc,	/* device descriptor */
+		    iopaddr_t xtalk_addr,	/* Crosstalk address */
+		    size_t byte_count,	/* map this many bytes */
+		    unsigned flags)
+{				/* (currently unused) */
+    return DEV_FUNC(dev, piotrans_addr)
+	(dev, dev_desc, xtalk_addr, byte_count, flags);
+}
+
+caddr_t
+xtalk_pio_addr(devfs_handle_t dev,	/* translate for this device */
+	       device_desc_t dev_desc,	/* device descriptor */
+	       iopaddr_t addr,		/* starting address (or offset in window) */
+	       size_t byte_count,	/* map this many bytes */
+	       xtalk_piomap_t *mapp,	/* where to return the map pointer */
+	       unsigned flags)
+{					/* PIO flags */
+    xtalk_piomap_t          map = 0;
+    caddr_t                 res;
+
+    if (mapp)
+	*mapp = 0;			/* record "no map used" */
+
+    res = xtalk_piotrans_addr
+	(dev, dev_desc, addr, byte_count, flags);
+    if (res)
+	return res;			/* xtalk_piotrans worked */
+
+    map = xtalk_piomap_alloc
+	(dev, dev_desc, addr, byte_count, byte_count, flags);
+    if (!map)
+	return res;			/* xtalk_piomap_alloc failed */
+
+    res = xtalk_piomap_addr
+	(map, addr, byte_count);
+    if (!res) {
+	xtalk_piomap_free(map);
+	return res;			/* xtalk_piomap_addr failed */
+    }
+    if (mapp)
+	*mapp = map;			/* pass back map used */
+
+    return res;				/* xtalk_piomap_addr succeeded */
+}
+
+/* =====================================================================
+ *            EARLY PIOTRANS SUPPORT
+ *
+ *      There are places where drivers (mgras, for instance)
+ *      need to get PIO translations before the infrastructure
+ *      is extended to them (setting up textports, for
+ *      instance). These drivers should call
+ *      xtalk_early_piotrans_addr with their xtalk ID
+ *      information, a sequence number (so we can use the second
+ *      mgras for instance), and the usual piotrans parameters.
+ *
+ *      Machine specific code should provide an implementation
+ *      of early_piotrans_addr, and present a pointer to this
+ *      function to xtalk_set_early_piotrans_addr so it can be
+ *      used by clients without the clients having to know what
+ *      platform or what xtalk provider is in use.
+ */
+
+static xtalk_early_piotrans_addr_f null_xtalk_early_piotrans_addr;
+
+xtalk_early_piotrans_addr_f *impl_early_piotrans_addr = null_xtalk_early_piotrans_addr;
+
+/* xtalk_set_early_piotrans_addr:
+ * specify the early_piotrans_addr implementation function.
+ */
+void
+xtalk_set_early_piotrans_addr(xtalk_early_piotrans_addr_f *impl)
+{
+    impl_early_piotrans_addr = impl;
+}
+
+/* xtalk_early_piotrans_addr:
+ * figure out a PIO address for the "nth" crosstalk widget that
+ * matches the specified part and mfgr number. Returns NULL if
+ * there is no such widget, or if the requested mapping can not
+ * be constructed.
+ * Limitations on which crosstalk slots (and busses) are
+ * checked, and definitions of the ordering of the search across
+ * the crosstalk slots, are defined by the platform.
+ */
+caddr_t
+xtalk_early_piotrans_addr(xwidget_part_num_t part_num,
+			  xwidget_mfg_num_t mfg_num,
+			  int which,
+			  iopaddr_t xtalk_addr,
+			  size_t byte_count,
+			  unsigned flags)
+{
+    return impl_early_piotrans_addr
+	(part_num, mfg_num, which, xtalk_addr, byte_count, flags);
+}
+
+/* null_xtalk_early_piotrans_addr:
+ * used as the early_piotrans_addr implementation until and
+ * unless a real implementation is provided. In DEBUG kernels,
+ * we want to know who is calling before the implementation is
+ * registered; in non-DEBUG kernels, return NULL representing
+ * lack of mapping support.
+ */
+/*ARGSUSED */
+static caddr_t
+null_xtalk_early_piotrans_addr(xwidget_part_num_t part_num,
+			       xwidget_mfg_num_t mfg_num,
+			       int which,
+			       iopaddr_t xtalk_addr,
+			       size_t byte_count,
+			       unsigned flags)
+{
+#if DEBUG
+    cmn_err(CE_PANIC, "null_xtalk_early_piotrans_addr");
+#endif
+    return NULL;
+}
+
+/* =====================================================================
+ *                    DMA MANAGEMENT
+ *
+ *      For mapping from crosstalk space to system
+ *      physical space.
+ */
+
+xtalk_dmamap_t
+xtalk_dmamap_alloc(devfs_handle_t dev,	/* set up mappings for this device */
+		   device_desc_t dev_desc,	/* device descriptor */
+		   size_t byte_count_max,	/* max size of a mapping */
+		   unsigned flags)
+{				/* defined in dma.h */
+    return (xtalk_dmamap_t) DEV_FUNC(dev, dmamap_alloc)
+	(dev, dev_desc, byte_count_max, flags);
+}
+
+
+void
+xtalk_dmamap_free(xtalk_dmamap_t xtalk_dmamap)
+{
+    DMAMAP_FUNC(xtalk_dmamap, dmamap_free)
+	(CAST_DMAMAP(xtalk_dmamap));
+}
+
+
+iopaddr_t
+xtalk_dmamap_addr(xtalk_dmamap_t xtalk_dmamap,	/* use these mapping resources */
+		  paddr_t paddr,	/* map for this address */
+		  size_t byte_count)
+{				/* map this many bytes */
+    return DMAMAP_FUNC(xtalk_dmamap, dmamap_addr)
+	(CAST_DMAMAP(xtalk_dmamap), paddr, byte_count);
+}
+
+
+alenlist_t
+xtalk_dmamap_list(xtalk_dmamap_t xtalk_dmamap,	/* use these mapping resources */
+		  alenlist_t alenlist,	/* map this Address/Length List */
+		  unsigned flags)
+{
+    return DMAMAP_FUNC(xtalk_dmamap, dmamap_list)
+	(CAST_DMAMAP(xtalk_dmamap), alenlist, flags);
+}
+
+
+void
+xtalk_dmamap_done(xtalk_dmamap_t xtalk_dmamap)
+{
+    DMAMAP_FUNC(xtalk_dmamap, dmamap_done)
+	(CAST_DMAMAP(xtalk_dmamap));
+}
+
+
+iopaddr_t
+xtalk_dmatrans_addr(devfs_handle_t dev,	/* translate for this device */
+		    device_desc_t dev_desc,	/* device descriptor */
+		    paddr_t paddr,	/* system physical address */
+		    size_t byte_count,	/* length */
+		    unsigned flags)
+{				/* defined in dma.h */
+    return DEV_FUNC(dev, dmatrans_addr)
+	(dev, dev_desc, paddr, byte_count, flags);
+}
+
+
+alenlist_t
+xtalk_dmatrans_list(devfs_handle_t dev,	/* translate for this device */
+		    device_desc_t dev_desc,	/* device descriptor */
+		    alenlist_t palenlist,	/* system address/length list */
+		    unsigned flags)
+{				/* defined in dma.h */
+    return DEV_FUNC(dev, dmatrans_list)
+	(dev, dev_desc, palenlist, flags);
+}
+
+void
+xtalk_dmamap_drain(xtalk_dmamap_t map)
+{
+    DMAMAP_FUNC(map, dmamap_drain)
+	(CAST_DMAMAP(map));
+}
+
+void
+xtalk_dmaaddr_drain(devfs_handle_t dev, paddr_t addr, size_t size)
+{
+    DEV_FUNC(dev, dmaaddr_drain)
+	(dev, addr, size);
+}
+
+void
+xtalk_dmalist_drain(devfs_handle_t dev, alenlist_t list)
+{
+    DEV_FUNC(dev, dmalist_drain)
+	(dev, list);
+}
+
+/* =====================================================================
+ *                    INTERRUPT MANAGEMENT
+ *
+ *      Allow crosstalk devices to establish interrupts
+ */
+
+/*
+ * Allocate resources required for an interrupt as specified in intr_desc.
+ * Return resource handle in intr_hdl.
+ */
+xtalk_intr_t
+xtalk_intr_alloc(devfs_handle_t dev,	/* which Crosstalk device */
+		 device_desc_t dev_desc,	/* device descriptor */
+		 devfs_handle_t owner_dev)
+{				/* owner of this interrupt */
+    return (xtalk_intr_t) DEV_FUNC(dev, intr_alloc)
+	(dev, dev_desc, owner_dev);
+}
+
+
+/*
+ * Free resources consumed by intr_alloc.
+ */
+void
+xtalk_intr_free(xtalk_intr_t intr_hdl)
+{
+    INTR_FUNC(intr_hdl, intr_free)
+	(CAST_INTR(intr_hdl));
+}
+
+
+/*
+ * Associate resources allocated with a previous xtalk_intr_alloc call with the
+ * described handler, arg, name, etc.
+ *
+ * Returns 0 on success, returns <0 on failure.
+ */
+int
+xtalk_intr_connect(xtalk_intr_t intr_hdl,	/* xtalk intr resource handle */
+		   intr_func_t intr_func,	/* xtalk intr handler */
+		   intr_arg_t intr_arg,		/* arg to intr handler */
+		   xtalk_intr_setfunc_t setfunc,	/* func to set intr hw */
+		   void *setfunc_arg,	/* arg to setfunc */
+		   void *thread)
+{				/* intr thread to use */
+    return INTR_FUNC(intr_hdl, intr_connect)
+	(CAST_INTR(intr_hdl), intr_func, intr_arg, setfunc, setfunc_arg, thread);
+}
+
+
+/*
+ * Disassociate handler with the specified interrupt.
+ */
+void
+xtalk_intr_disconnect(xtalk_intr_t intr_hdl)
+{
+    INTR_FUNC(intr_hdl, intr_disconnect)
+	(CAST_INTR(intr_hdl));
+}
+
+
+/*
+ * Return a hwgraph vertex that represents the CPU currently
+ * targeted by an interrupt.
+ */
+devfs_handle_t
+xtalk_intr_cpu_get(xtalk_intr_t intr_hdl)
+{
+    return INTR_FUNC(intr_hdl, intr_cpu_get)
+	(CAST_INTR(intr_hdl));
+}
+
+
+/*
+ * =====================================================================
+ *                      ERROR MANAGEMENT
+ */
+
+/*
+ * xtalk_error_handler:
+ * pass this error on to the handler registered
+ * at the specified xtalk connecdtion point,
+ * or complain about it here if there is no handler.
+ *
+ * This routine plays two roles during error delivery
+ * to most widgets: first, the external agent (heart,
+ * hub, or whatever) calls in with the error and the
+ * connect point representing the crosstalk switch,
+ * or whatever crosstalk device is directly connected
+ * to the agent.
+ *
+ * If there is a switch, it will generally look at the
+ * widget number stashed in the ioerror structure; and,
+ * if the error came from some widget other than the
+ * switch, it will call back into xtalk_error_handler
+ * with the connection point of the offending port.
+ */
+int
+xtalk_error_handler(
+		       devfs_handle_t xconn,
+		       int error_code,
+		       ioerror_mode_t mode,
+		       ioerror_t *ioerror)
+{
+    xwidget_info_t          xwidget_info;
+
+#if DEBUG && ERROR_DEBUG
+    cmn_err(CE_CONT, "%v: xtalk_error_handler\n", xconn);
+#endif
+
+    xwidget_info = xwidget_info_get(xconn);
+    /* Make sure that xwidget_info is a valid pointer before derefencing it.
+     * We could come in here during very early initialization. 
+     */
+    if (xwidget_info && xwidget_info->w_efunc)
+	return xwidget_info->w_efunc
+	    (xwidget_info->w_einfo,
+	     error_code, mode, ioerror);
+    /*
+     * no error handler registered for
+     * the offending port. it's not clear
+     * what needs to be done, but reporting
+     * it would be a good thing, unless it
+     * is a mode that requires nothing.
+     */
+    if ((mode == MODE_DEVPROBE) || (mode == MODE_DEVUSERERROR) ||
+	(mode == MODE_DEVREENABLE))
+	return IOERROR_HANDLED;
+
+#ifdef IRIX
+    cmn_err(CE_WARN, "Xbow at %v encountered Fatal error", xconn);
+#endif
+    ioerror_dump("xtalk", error_code, mode, ioerror);
+
+    return IOERROR_UNHANDLED;
+}
+
+int
+xtalk_error_devenable(devfs_handle_t xconn_vhdl, int devnum, int error_code)
+{
+    return DEV_FUNC(xconn_vhdl, error_devenable) (xconn_vhdl, devnum, error_code);
+}
+
+
+/* =====================================================================
+ *                    CONFIGURATION MANAGEMENT
+ */
+
+/*
+ * Startup a crosstalk provider
+ */
+void
+xtalk_provider_startup(devfs_handle_t xtalk_provider)
+{
+    DEV_FUNC(xtalk_provider, provider_startup)
+	(xtalk_provider);
+}
+
+
+/*
+ * Shutdown a crosstalk provider
+ */
+void
+xtalk_provider_shutdown(devfs_handle_t xtalk_provider)
+{
+    DEV_FUNC(xtalk_provider, provider_shutdown)
+	(xtalk_provider);
+}
+
+/* 
+ * Enable a device on a xtalk widget 
+ */
+void
+xtalk_widgetdev_enable(devfs_handle_t xconn_vhdl, int devnum)
+{
+    DEV_FUNC(xconn_vhdl, widgetdev_enable) (xconn_vhdl, devnum);
+}
+
+/* 
+ * Shutdown a device on a xtalk widget 
+ */
+void
+xtalk_widgetdev_shutdown(devfs_handle_t xconn_vhdl, int devnum)
+{
+    DEV_FUNC(xconn_vhdl, widgetdev_shutdown) (xconn_vhdl, devnum);
+}
+
+int
+xtalk_dma_enabled(devfs_handle_t xconn_vhdl)
+{
+    return DEV_FUNC(xconn_vhdl, dma_enabled) (xconn_vhdl);
+}
+/*
+ * Generic crosstalk functions, for use with all crosstalk providers
+ * and all crosstalk devices.
+ */
+
+/****** Generic crosstalk interrupt interfaces ******/
+devfs_handle_t
+xtalk_intr_dev_get(xtalk_intr_t xtalk_intr)
+{
+    return (xtalk_intr->xi_dev);
+}
+
+xwidgetnum_t
+xtalk_intr_target_get(xtalk_intr_t xtalk_intr)
+{
+    return (xtalk_intr->xi_target);
+}
+
+xtalk_intr_vector_t
+xtalk_intr_vector_get(xtalk_intr_t xtalk_intr)
+{
+    return (xtalk_intr->xi_vector);
+}
+
+iopaddr_t
+xtalk_intr_addr_get(struct xtalk_intr_s *xtalk_intr)
+{
+    return (xtalk_intr->xi_addr);
+}
+
+void                   *
+xtalk_intr_sfarg_get(xtalk_intr_t xtalk_intr)
+{
+    return (xtalk_intr->xi_sfarg);
+}
+
+
+int
+xtalk_intr_flags_get(xtalk_intr_t xtalk_intr)
+{
+	return(xtalk_intr->xi_flags);
+}
+
+/****** Generic crosstalk pio interfaces ******/
+devfs_handle_t
+xtalk_pio_dev_get(xtalk_piomap_t xtalk_piomap)
+{
+    return (xtalk_piomap->xp_dev);
+}
+
+xwidgetnum_t
+xtalk_pio_target_get(xtalk_piomap_t xtalk_piomap)
+{
+    return (xtalk_piomap->xp_target);
+}
+
+iopaddr_t
+xtalk_pio_xtalk_addr_get(xtalk_piomap_t xtalk_piomap)
+{
+    return (xtalk_piomap->xp_xtalk_addr);
+}
+
+ulong
+xtalk_pio_mapsz_get(xtalk_piomap_t xtalk_piomap)
+{
+    return (xtalk_piomap->xp_mapsz);
+}
+
+caddr_t
+xtalk_pio_kvaddr_get(xtalk_piomap_t xtalk_piomap)
+{
+    return (xtalk_piomap->xp_kvaddr);
+}
+
+
+/****** Generic crosstalk dma interfaces ******/
+devfs_handle_t
+xtalk_dma_dev_get(xtalk_dmamap_t xtalk_dmamap)
+{
+    return (xtalk_dmamap->xd_dev);
+}
+
+xwidgetnum_t
+xtalk_dma_target_get(xtalk_dmamap_t xtalk_dmamap)
+{
+    return (xtalk_dmamap->xd_target);
+}
+
+
+/****** Generic crosstalk widget information interfaces ******/
+
+/* xwidget_info_chk:
+ * check to see if this vertex is a widget;
+ * if so, return its widget_info (if any).
+ * if not, return NULL.
+ */
+xwidget_info_t
+xwidget_info_chk(devfs_handle_t xwidget)
+{
+    arbitrary_info_t        ainfo = 0;
+
+    hwgraph_info_get_LBL(xwidget, INFO_LBL_XWIDGET, &ainfo);
+    return (xwidget_info_t) ainfo;
+}
+
+
+xwidget_info_t
+xwidget_info_get(devfs_handle_t xwidget)
+{
+    xwidget_info_t          widget_info;
+
+    widget_info = (xwidget_info_t)
+	hwgraph_fastinfo_get(xwidget);
+
+#ifdef IRIX
+    if ((widget_info != NULL) &&
+	(widget_info->w_fingerprint != widget_info_fingerprint))
+	cmn_err(CE_PANIC, "%v bad xwidget_info", xwidget);
+#endif
+
+    return (widget_info);
+}
+
+void
+xwidget_info_set(devfs_handle_t xwidget, xwidget_info_t widget_info)
+{
+    if (widget_info != NULL)
+	widget_info->w_fingerprint = widget_info_fingerprint;
+
+    hwgraph_fastinfo_set(xwidget, (arbitrary_info_t) widget_info);
+
+    /* Also, mark this vertex as an xwidget,
+     * and use the widget_info, so xwidget_info_chk
+     * can work (and be fairly efficient).
+     */
+    hwgraph_info_add_LBL(xwidget, INFO_LBL_XWIDGET,
+			 (arbitrary_info_t) widget_info);
+}
+
+devfs_handle_t
+xwidget_info_dev_get(xwidget_info_t xwidget_info)
+{
+    if (xwidget_info == NULL)
+	panic("null xwidget_info");
+    return (xwidget_info->w_vertex);
+}
+
+xwidgetnum_t
+xwidget_info_id_get(xwidget_info_t xwidget_info)
+{
+    if (xwidget_info == NULL)
+	panic("null xwidget_info");
+    return (xwidget_info->w_id);
+}
+
+
+devfs_handle_t
+xwidget_info_master_get(xwidget_info_t xwidget_info)
+{
+    if (xwidget_info == NULL)
+	panic("null xwidget_info");
+    return (xwidget_info->w_master);
+}
+
+xwidgetnum_t
+xwidget_info_masterid_get(xwidget_info_t xwidget_info)
+{
+    if (xwidget_info == NULL)
+	panic("null xwidget_info");
+    return (xwidget_info->w_masterid);
+}
+
+xwidget_part_num_t
+xwidget_info_part_num_get(xwidget_info_t xwidget_info)
+{
+    if (xwidget_info == NULL)
+	panic("null xwidget_info");
+    return (xwidget_info->w_hwid.part_num);
+}
+
+xwidget_mfg_num_t
+xwidget_info_mfg_num_get(xwidget_info_t xwidget_info)
+{
+    if (xwidget_info == NULL)
+	panic("null xwidget_info");
+    return (xwidget_info->w_hwid.mfg_num);
+}
+/* Extract the widget name from the widget information
+ * for the xtalk widget.
+ */
+char *
+xwidget_info_name_get(xwidget_info_t xwidget_info)
+{
+    if (xwidget_info == NULL)
+	panic("null xwidget info");
+    return(xwidget_info->w_name);
+}
+/****** Generic crosstalk initialization interfaces ******/
+
+/*
+ * One-time initialization needed for systems that support crosstalk.
+ */
+void
+xtalk_init(void)
+{
+    cdl_p                   cp;
+
+#if DEBUG && ATTACH_DEBUG
+    printf("xtalk_init\n");
+#endif
+    /* Allocate the registry.
+     * We might already have one.
+     * If we don't, go get one.
+     * MPness: someone might have
+     * set one up for us while we
+     * were not looking; use an atomic
+     * compare-and-swap to commit to
+     * using the new registry if and
+     * only if nobody else did first.
+     * If someone did get there first,
+     * toss the one we allocated back
+     * into the pool.
+     */
+    if (xtalk_registry == NULL) {
+	cp = cdl_new(EDGE_LBL_XIO, "part", "mfgr");
+	if (!compare_and_swap_ptr((void **) &xtalk_registry, NULL, (void *) cp)) {
+	    cdl_del(cp);
+	}
+    }
+    ASSERT(xtalk_registry != NULL);
+}
+
+/*
+ * Associate a set of xtalk_provider functions with a vertex.
+ */
+void
+xtalk_provider_register(devfs_handle_t provider, xtalk_provider_t *xtalk_fns)
+{
+    hwgraph_fastinfo_set(provider, (arbitrary_info_t) xtalk_fns);
+}
+
+/*
+ * Disassociate a set of xtalk_provider functions with a vertex.
+ */
+void
+xtalk_provider_unregister(devfs_handle_t provider)
+{
+    hwgraph_fastinfo_set(provider, (arbitrary_info_t)NULL);
+}
+
+/*
+ * Obtain a pointer to the xtalk_provider functions for a specified Crosstalk
+ * provider.
+ */
+xtalk_provider_t       *
+xtalk_provider_fns_get(devfs_handle_t provider)
+{
+    return ((xtalk_provider_t *) hwgraph_fastinfo_get(provider));
+}
+
+/*
+ * Announce a driver for a particular crosstalk part.
+ * Returns 0 on success or -1 on failure.  Failure occurs if the
+ * specified hardware already has a driver.
+ */
+/*ARGSUSED4 */
+int
+xwidget_driver_register(xwidget_part_num_t part_num,
+			xwidget_mfg_num_t mfg_num,
+			char *driver_prefix,
+			unsigned flags)
+{
+    /* a driver's init routine could call
+     * xwidget_driver_register before the
+     * system calls xtalk_init; so, we
+     * make the call here.
+     */
+    if (xtalk_registry == NULL)
+	xtalk_init();
+
+    return cdl_add_driver(xtalk_registry,
+			  part_num, mfg_num,
+			  driver_prefix, flags);
+}
+
+/*
+ * Inform xtalk infrastructure that a driver is no longer available for
+ * handling any widgets.
+ */
+void
+xwidget_driver_unregister(char *driver_prefix)
+{
+    /* before a driver calls unregister,
+     * it must have called registger; so we
+     * can assume we have a registry here.
+     */
+    ASSERT(xtalk_registry != NULL);
+
+    cdl_del_driver(xtalk_registry, driver_prefix);
+}
+
+/*
+ * Call some function with each vertex that
+ * might be one of this driver's attach points.
+ */
+void
+xtalk_iterate(char *driver_prefix,
+	      xtalk_iter_f *func)
+{
+    ASSERT(xtalk_registry != NULL);
+
+    cdl_iterate(xtalk_registry, driver_prefix, (cdl_iter_f *)func);
+}
+
+/*
+ * xwidget_register:
+ *	Register a xtalk device (xwidget) by doing the following.
+ *      -allocate and initialize xwidget_info data
+ *      -allocate a hwgraph vertex with name based on widget number (id)
+ *      -look up the widget's initialization function and call it,
+ *      or remember the vertex for later initialization.
+ *
+ */
+int
+xwidget_register(xwidget_hwid_t hwid,		/* widget's hardware ID */
+		 devfs_handle_t 	widget,		/* widget to initialize */
+		 xwidgetnum_t 	id,		/* widget's target id (0..f) */
+		 devfs_handle_t 	master,		/* widget's master vertex */
+		 xwidgetnum_t 	targetid,	/* master's target id (9/a) */
+		 async_attach_t aa)
+{			
+    xwidget_info_t          widget_info;
+    char		    *s,devnm[MAXDEVNAME];
+
+    /* Allocate widget_info and associate it with widget vertex */
+    NEW(widget_info);
+
+    /* Initialize widget_info */
+    widget_info->w_vertex = widget;
+    widget_info->w_id = id;
+    widget_info->w_master = master;
+    widget_info->w_masterid = targetid;
+    widget_info->w_hwid = *hwid;	/* structure copy */
+    widget_info->w_efunc = 0;
+    widget_info->w_einfo = 0;
+    /*
+     * get the name of this xwidget vertex and keep the info.
+     * This is needed during errors and interupts, but as
+     * long as we have it, we can use it elsewhere.
+     */
+    s = dev_to_name(widget,devnm,MAXDEVNAME);
+    printk("xwidget_register: dev_to_name widget id 0x%p, s = %s\n", widget, s);
+    widget_info->w_name = kmalloc(strlen(s) + 1, GFP_KERNEL);
+    strcpy(widget_info->w_name,s);
+    
+    xwidget_info_set(widget, widget_info);
+
+    device_master_set(widget, master);
+
+    /* All the driver init routines (including
+     * xtalk_init) are called before we get into
+     * attaching devices, so we can assume we
+     * have a registry here.
+     */
+    ASSERT(xtalk_registry != NULL);
+
+    /* 
+     * Add pointer to async attach info -- tear down will be done when
+     * the particular descendant is done with the info.
+     */
+    if (aa)
+	    async_attach_add_info(widget, aa);
+
+    return cdl_add_connpt(xtalk_registry, hwid->part_num, hwid->mfg_num, widget);
+}
+
+/*
+ * xwidget_unregister :
+ *	Unregister the xtalk device and detach all its hwgraph namespace.
+ */
+int
+xwidget_unregister(devfs_handle_t widget)
+{
+    xwidget_info_t	widget_info;
+    xwidget_hwid_t	hwid;
+
+    /* Make sure that we have valid widget information initialized */
+    if (!(widget_info = xwidget_info_get(widget)))
+	return(1);
+
+    /* Remove the inventory information associated
+     * with the widget.
+     */
+    hwgraph_inventory_remove(widget, -1, -1, -1, -1, -1);
+    
+    hwid = &(widget_info->w_hwid);
+
+    cdl_del_connpt(xtalk_registry, hwid->part_num, 
+		   hwid->mfg_num, widget);
+
+    /* Clean out the xwidget information */
+    (void)kfree(widget_info->w_name);
+    BZERO((void *)widget_info, sizeof(widget_info));
+    DEL(widget_info);
+    
+    return(0);
+}
+
+void
+xwidget_error_register(devfs_handle_t xwidget,
+		       error_handler_f *efunc,
+		       error_handler_arg_t einfo)
+{
+    xwidget_info_t          xwidget_info;
+
+    xwidget_info = xwidget_info_get(xwidget);
+    ASSERT(xwidget_info != NULL);
+    xwidget_info->w_efunc = efunc;
+    xwidget_info->w_einfo = einfo;
+}
+
+/*
+ * Issue a link reset to a widget.
+ */
+void
+xwidget_reset(devfs_handle_t xwidget)
+{
+    xswitch_reset_link(xwidget);
+
+}
+
+
+void
+xwidget_gfx_reset(devfs_handle_t xwidget)
+{
+    xwidget_info_t info;
+
+    xswitch_reset_link(xwidget);
+    info = xwidget_info_get(xwidget);
+#ifdef IRIX
+    ASSERT_ALWAYS(info != NULL);
+#endif
+
+    /*
+     * Enable this for other architectures once we add widget_reset to the
+     * xtalk provider interface.
+     */
+    DEV_FUNC(xtalk_provider, widget_reset)
+	(xwidget_info_master_get(info), xwidget_info_id_get(info));
+}
+
+#define ANON_XWIDGET_NAME	"No Name"	/* Default Widget Name */
+
+/* Get the canonical hwgraph  name of xtalk widget */
+char *
+xwidget_name_get(devfs_handle_t xwidget_vhdl)
+{
+	xwidget_info_t  info;
+
+	/* If we have a bogus widget handle then return
+	 * a default anonymous widget name.
+	 */
+	if (xwidget_vhdl == GRAPH_VERTEX_NONE)
+	    return(ANON_XWIDGET_NAME);
+	/* Read the widget name stored in the widget info
+	 * for the widget setup during widget initialization.
+	 */
+	info = xwidget_info_get(xwidget_vhdl);
+	ASSERT(info != NULL);
+	return(xwidget_info_name_get(info));
+}
+/*
+ * xtalk_device_powerup
+ *	Reset and initialize the specified xtalk widget
+ */
+int 
+xtalk_device_powerup(devfs_handle_t xbus_vhdl, xwidgetnum_t widget)
+{
+#ifndef CONFIG_IA64_SGI_IO
+	extern void	io_xswitch_widget_init(devfs_handle_t,
+					       devfs_handle_t,
+					       xwidgetnum_t,
+					       async_attach_t);
+	io_xswitch_widget_init(xbus_vhdl, 
+			       hwgraph_connectpt_get(xbus_vhdl),
+			       widget,
+			       NULL);
+#endif	/* CONFIG_IA64_SGI_IO */
+	
+	return(0);
+}
+/*
+ * xtalk_device_shutdown
+ *	Disable  the specified xtalk widget and clean out all the software
+ *	state associated with it.
+ */
+int
+xtalk_device_shutdown(devfs_handle_t xbus_vhdl, xwidgetnum_t widget)
+{
+	devfs_handle_t	widget_vhdl;
+	char		edge_name[8];
+
+	sprintf(edge_name, "%d", widget);
+	if (hwgraph_traverse(xbus_vhdl, edge_name, &widget_vhdl) 
+	    != GRAPH_SUCCESS)
+		return(1);
+
+	xwidget_unregister(widget_vhdl);
+	
+	return(0);
+}
+/*
+ * xtalk_device_inquiry
+ *	Find out hardware information about the xtalk widget.
+ */
+int
+xtalk_device_inquiry(devfs_handle_t xbus_vhdl, xwidgetnum_t widget)
+{
+
+	extern void hub_device_inquiry(devfs_handle_t, xwidgetnum_t);
+	hub_device_inquiry(xbus_vhdl, widget);
+	return(0);
+}
diff --git a/arch/ia64/sn/sn1/Makefile b/arch/ia64/sn/sn1/Makefile
index fbb8e83ab..f995c8dc4 100644
--- a/arch/ia64/sn/sn1/Makefile
+++ b/arch/ia64/sn/sn1/Makefile
@@ -5,20 +5,27 @@
 # Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
 #
 
-CFLAGS          :=     $(CFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSN -DSOFTSDV \
-			-DLANGUAGE_C=1 -D_LANGUAGE_C=1
-AFLAGS          :=      $(AFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSOFTSDV
+EXTRA_CFLAGS	:= -DSN -DLANGUAGE_C=1 -D_LANGUAGE_C=1 -I. -DBRINGUP \
+		   -DDIRECT_L1_CONSOLE -DNUMA_BASE -DSIMULATED_KLGRAPH \
+		   -DNUMA_MIGR_CONTROL -DLITTLE_ENDIAN -DREAL_HARDWARE \
+		   -DNEW_INTERRUPTS -DCONFIG_IA64_SGI_IO
 
 .S.s:
-	$(CPP) $(AFLAGS) -o $*.s $<
+	$(CPP) $(AFLAGS) $(AFLAGS_KERNEL) -o $*.s $<
 .S.o:
-	$(CC) $(AFLAGS) -c -o $*.o $<
+	$(CC) $(AFLAGS) $(AFLAGS_KERNEL) -c -o $*.o $<
 
 all: sn1.a
 
 O_TARGET        = sn1.a
 O_HEADERS       =
-O_OBJS          = irq.o setup.o
+O_OBJS          = irq.o setup.o iomv.o mm.o smp.o synergy.o sn1_asm.o \
+		discontig.o
+
+ifeq ($(CONFIG_IA64_SGI_AUTOTEST),y)
+O_OBJS          += llsc4.o
+endif
+
 
 ifeq ($(CONFIG_IA64_GENERIC),y)
 O_OBJS		+= machvec.o
diff --git a/arch/ia64/sn/sn1/discontig.c b/arch/ia64/sn/sn1/discontig.c
new file mode 100644
index 000000000..7251ee066
--- /dev/null
+++ b/arch/ia64/sn/sn1/discontig.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2000, Silicon Graphics, sprasad@engr.sgi.com
+ * Copyright 2000, Kanoj Sarcar, kanoj@sgi.com
+ */
+
+/*
+ * Contains common definitions and globals for NUMA platform
+ * support. For now, SN-IA64 and SN-MIPS are the NUMA platforms.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <asm/sn/mmzone.h>
+#include <asm/efi.h>
+
+extern int numnodes ;
+
+plat_pg_data_t plat_node_data[MAXNODES];
+bootmem_data_t bdata[MAXNODES];
+int chunktonid[MAXCHUNKS];
+int nasid_map[MAXNASIDS];
+
+void __init
+init_chunktonid(void)
+{
+	memset(chunktonid, -1, sizeof(chunktonid)) ;
+}
+
+void __init
+init_nodeidmap(void)
+{
+	memset(nasid_map, -1, sizeof(nasid_map)) ;
+}
+
+int		cnodeid_map[MAXNODES] ;
+void __init
+init_cnodeidmap(void)
+{
+	memset(cnodeid_map, -1, sizeof(cnodeid_map)) ;
+}
+
+int
+numa_debug(void)
+{
+       panic("NUMA debug\n");
+       return(0);
+}
+
+int __init
+build_cnodeid_map(void)
+{
+	int	i,j ;
+
+	for (i=0,j=0;i<MAXNASIDS;i++) {
+		if (nasid_map[i] >= 0)
+			cnodeid_map[j++] = i ;
+	}
+	return j ;
+}
+
+/*
+ * Since efi_memmap_walk merges contiguous banks, this code will need
+ * to find all the nasids covered by the input memory descriptor.
+ */
+static int __init
+build_nasid_map(unsigned long start, unsigned long end, void *arg)
+{
+	unsigned long vaddr = start;
+	int nasid = GetNasId(__pa(vaddr));
+
+	while (vaddr < end) {
+		if (nasid < MAXNASIDS)
+			nasid_map[nasid] = 0;
+		else
+			panic("build_nasid_map");
+		vaddr = (unsigned long)__va((unsigned long)(++nasid) << 
+							SN1_NODE_ADDR_SHIFT);
+	}
+	return 0;
+}
+
+void __init
+fix_nasid_map(void)
+{
+	int	i ;
+	int		j ;
+
+	/* For every nasid */
+	for (j=0;j<MAXNASIDS;j++) {
+		for (i=0;i<MAXNODES;i++) {
+			if (CNODEID_TO_NASID(i) == j)
+				break ;
+		}
+		if (i<MAXNODES)
+			nasid_map[j] = i ;
+	}
+}
+
+static void __init
+dump_bootmem_info(void)
+{
+        int     i;
+        struct bootmem_data *bdata ;
+
+	printk("CNODE INFO ....\n") ;
+        for (i=0;i<numnodes;i++) {
+		printk("%d ", CNODEID_TO_NASID(i)) ;
+	}
+	printk("\n") ;
+
+	printk("BOOT MEM INFO ....\n") ;
+        printk("Node   Start                LowPfn               BootmemMap\n") ;
+        for (i=0;i<numnodes;i++) {
+                bdata = NODE_DATA(i)->bdata ;
+                printk("%d      0x%016lx   0x%016lx   0x%016lx\n", i,
+                        bdata->node_boot_start, bdata->node_low_pfn,
+                        (unsigned long)bdata->node_bootmem_map) ;
+        }
+}
+
+void __init
+discontig_mem_init(void)
+{
+	extern void setup_sn1_bootmem(int);
+	int		maxnodes ;
+
+        init_chunktonid() ;
+	init_nodeidmap() ;
+	init_cnodeidmap() ;
+	efi_memmap_walk(build_nasid_map, 0) ;
+	maxnodes = build_cnodeid_map() ;
+	fix_nasid_map() ;
+#ifdef CONFIG_DISCONTIGMEM
+	setup_sn1_bootmem(maxnodes) ;
+#endif
+	numnodes = maxnodes;
+	dump_bootmem_info() ;
+}
+
+void __init
+discontig_paging_init(void)
+{
+	int i;
+	unsigned long max_dma, zones_size[MAX_NR_ZONES];
+	void dump_node_data(void);
+
+        max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	for (i = 0; i < numnodes; i++) {
+	       extern void free_unused_memmap_node(int);
+               unsigned long startpfn = __pa((void *)NODE_START(i)) >> PAGE_SHIFT;
+               unsigned long numpfn = NODE_SIZE(i) >> PAGE_SHIFT;
+               memset(zones_size, 0, sizeof(zones_size));
+
+               if ((startpfn + numpfn) < max_dma) {
+                       zones_size[ZONE_DMA] = numpfn;
+               } else if (startpfn > max_dma) {
+                       zones_size[ZONE_NORMAL] = numpfn;
+               } else {
+                       zones_size[ZONE_DMA] = (max_dma - startpfn);
+                       zones_size[ZONE_NORMAL] = numpfn - zones_size[ZONE_DMA];
+               }
+               free_area_init_node(i, NODE_DATA(i), NULL, zones_size, startpfn<<PAGE_SHIFT, 0);
+	       free_unused_memmap_node(i);
+	}
+	dump_node_data();
+}
+
+
+void
+dump_node_data(void)
+{
+        int     i;
+
+	printk("NODE DATA ....\n") ;
+	printk("Node, Start, Size, MemMap, BitMap, StartP, Mapnr, Size, Id\n") ;
+        for (i=0;i<numnodes;i++) {
+		printk("%d, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, %d\n", 
+			CNODEID_TO_NASID(i), NODE_START(i), NODE_SIZE(i), 
+			(long)NODE_MEM_MAP(i), (long)NODE_DATA(i)->valid_addr_bitmap, 
+			NODE_DATA(i)->node_start_paddr, 
+			NODE_DATA(i)->node_start_mapnr,
+			NODE_DATA(i)->node_size,
+			NODE_DATA(i)->node_id)  ;
+	}
+}
+
diff --git a/arch/ia64/sn/sn1/iomv.c b/arch/ia64/sn/sn1/iomv.c
new file mode 100644
index 000000000..1d90a924f
--- /dev/null
+++ b/arch/ia64/sn/sn1/iomv.c
@@ -0,0 +1,100 @@
+/* 
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com)
+ * Copyright (C) 2000 Kanoj Sarcar (kanoj@sgi.com)
+ */
+
+#include <asm/io.h>
+#include <linux/pci.h>
+
+static inline void *
+sn1_io_addr(unsigned long port)
+{
+	if (!IS_RUNNING_ON_SIMULATOR()) {
+		return( (void *)  (port | __IA64_UNCACHED_OFFSET));
+	} else {
+		unsigned long io_base;
+		unsigned long addr;
+ 
+		/*
+ 		 * word align port, but need more than 10 bits
+ 		 * for accessing registers in bedrock local block
+ 		 * (so we don't do port&0xfff)
+ 		 */
+		if (port == 0x1f6 || port == 0x1f7
+			|| port == 0x3f6 || port == 0x3f7
+			|| port == 0x1f0 || port == 0x1f1
+			|| port == 0x1f3 || port == 0x1f4
+			|| port == 0x1f2 || port == 0x1f5)  {
+			io_base = __IA64_UNCACHED_OFFSET | 0x00000FFFFC000000;
+			addr = io_base | ((port >> 2) << 12) | (port & 0xfff);
+		} else {
+			addr = __ia64_get_io_port_base() | ((port >> 2) << 2);
+		}
+		return(void *) addr;
+	}
+}
+
+unsigned int
+sn1_inb (unsigned long port)
+{
+	volatile unsigned char *addr = sn1_io_addr(port);
+	unsigned char ret;
+
+	ret = *addr;
+	__ia64_mf_a();
+	return ret;
+}
+
+unsigned int
+sn1_inw (unsigned long port)
+{
+	volatile unsigned short *addr = sn1_io_addr(port);
+	unsigned short ret;
+
+	ret = *addr;
+	__ia64_mf_a();
+	return ret;
+}
+
+unsigned int
+sn1_inl (unsigned long port)
+{
+	volatile unsigned int *addr = sn1_io_addr(port);
+	unsigned int ret;
+
+	ret = *addr;
+	__ia64_mf_a();
+	return ret;
+}
+
+void
+sn1_outb (unsigned char val, unsigned long port)
+{
+	volatile unsigned char *addr = sn1_io_addr(port);
+
+	*addr = val;
+	__ia64_mf_a();
+}
+
+void
+sn1_outw (unsigned short val, unsigned long port)
+{
+	volatile unsigned short *addr = sn1_io_addr(port);
+
+	*addr = val;
+	__ia64_mf_a();
+}
+
+void
+sn1_outl (unsigned int val, unsigned long port)
+{
+	volatile unsigned int *addr = sn1_io_addr(port);
+
+	*addr = val;
+	__ia64_mf_a();
+}
diff --git a/arch/ia64/sn/sn1/irq.c b/arch/ia64/sn/sn1/irq.c
index a8270fd2a..b487f88d4 100644
--- a/arch/ia64/sn/sn1/irq.c
+++ b/arch/ia64/sn/sn1/irq.c
@@ -1,8 +1,57 @@
-#include <linux/kernel.h>
+/*
+ * Platform dependent support for SGI SN1
+ *
+ * Copyright (C) 2000   Silicon Graphics
+ * Copyright (C) 2000   Jack Steiner (steiner@sgi.com)
+ * Copyright (C) 2000   Alan Mayer (ajm@sgi.com)
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
 #include <linux/sched.h>
+#include <asm/current.h>
 #include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <asm/sn/sgi.h>
+#include <asm/sn/iograph.h>
+#include <asm/sn/invent.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm/sn/hcl.h>
+#include <asm/sn/types.h>
+#include <asm/sn/pci/bridge.h>
+#include <asm/sn/pci/pciio.h>
+#include <asm/sn/pci/pciio_private.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/sn1/bedrock.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/sn1/addrs.h>
+#include <asm/sn/iobus.h>
+#include <asm/sn/sn1/arch.h>
+#include <asm/sn/synergy.h>
+
+
+int bit_pos_to_irq(int bit);
+int irq_to_bit_pos(int irq);
+void add_interrupt_randomness(int irq);
+void * kmalloc(size_t size, int flags);
+void kfree(const void *);
+int sgi_pci_intr_support (unsigned int, device_desc_t *, devfs_handle_t *, pciio_intr_line_t *, devfs_handle_t *);
+pciio_intr_t pciio_intr_alloc(devfs_handle_t, device_desc_t, pciio_intr_line_t, devfs_handle_t);
+int request_irq(unsigned int, void (*)(int, void *, struct pt_regs *), unsigned long, const char *, void *);
+
+/* This should be dynamically allocated, at least part of it. */
+/* For the time being, though, we'll statically allocate it */
+/* because kmalloc hasn't been initiallized at the time this */
+/* array is initiallized.  One way to do it would be to statically */
+/* allocate the data for node 0, then let other nodes, as they */
+/* need it, dynamically allocate their own data space. */
 
-#include <asm/ptrace.h>
+struct sn1_cnode_action_list *sn1_node_actions[MAX_COMPACT_NODES];
+struct sn1_cnode_action_list sn1_actions[MAX_COMPACT_NODES][256];
+
+
+extern int numnodes;
 
 static unsigned int
 sn1_startup_irq(unsigned int irq)
@@ -25,20 +74,192 @@ sn1_enable_irq(unsigned int irq)
 {
 }
 
+static void
+sn1_ack_irq(unsigned int irq)
+{
+}
+
+static void
+sn1_end_irq(unsigned int irq)
+{
+}
+
+static void
+sn1_set_affinity_irq(unsigned int irq, unsigned long mask)
+{
+}
+
+
+static void
+sn1_handle_irq(int irq, void *dummy, struct pt_regs *regs)
+{
+	int bit, cnode;
+	struct sn1_cnode_action_list *alp;
+	struct sn1_intr_action *ap;
+	void (*handler)(int, void *, struct pt_regs *);
+	unsigned long flags = 0;
+	int cpuid = smp_processor_id();
+
+
+	bit = irq_to_bit_pos(irq);
+	LOCAL_HUB_CLR_INTR(bit);
+	cnode = cpuid_to_cnodeid(cpuid);
+	alp = sn1_node_actions[cnode];
+	ap = alp[irq].action_list;
+	if (ap == NULL) {
+		return;
+	}
+	while (ap) {
+		flags |= ap->flags;
+		handler = ap->handler;
+		(*handler)(irq,ap->intr_arg,regs);
+		ap = ap->next;
+	}
+	if ((flags & SA_SAMPLE_RANDOM) != 0)
+                add_interrupt_randomness(irq);
+
+        return;
+}
+
 struct hw_interrupt_type irq_type_sn1 = {
         "sn1_irq",
         sn1_startup_irq,
         sn1_shutdown_irq,
         sn1_enable_irq,
-        sn1_disable_irq
+        sn1_disable_irq,
+        sn1_ack_irq,
+        sn1_end_irq,
+        sn1_set_affinity_irq
+};
+
+struct irqaction sn1_irqaction = {
+	sn1_handle_irq,
+	0,
+	0,
+	NULL,
+	NULL,
+	NULL,
 };
 
 void
 sn1_irq_init (void)
 {
-	int i;
+	int i,j;
+
+	for (i = 0; i <= NR_IRQS; ++i) {
+		if (irq_desc[i].handler == &no_irq_type) {
+			irq_desc[i].handler = &irq_type_sn1;
+			if (i >=71 && i <= 181) {
+				irq_desc[i].action = &sn1_irqaction;
+			}
+		}
+	}
+
+	for (i = 0; i < numnodes; i++) {
+		sn1_node_actions[i] = sn1_actions[i];
+		memset(sn1_node_actions[i], 0,
+			sizeof(struct sn1_cnode_action_list) *
+			(IA64_MAX_VECTORED_IRQ + 1));
+		for (j=0; j<IA64_MAX_VECTORED_IRQ+1; j++) {
+			spin_lock_init(&sn1_node_actions[i][j].action_list_lock);
+		}
+	}
+}
+
+
+int          
+sn1_request_irq (unsigned int requested_irq, void (*handler)(int, void *, struct pt_regs *),
+             unsigned long irqflags, const char * devname, void *dev_id)
+{ 
+	devfs_handle_t curr_dev;
+	devfs_handle_t dev;
+	pciio_intr_t intr_handle;
+	pciio_intr_line_t line;
+	device_desc_t dev_desc;
+        int cpuid, bit, cnode;
+	struct sn1_intr_action *ap, *new_ap;
+	struct sn1_cnode_action_list *alp;
+	int irq;
 
-	for (i = IA64_MIN_VECTORED_IRQ; i <= IA64_MAX_VECTORED_IRQ; ++i) {
-		irq_desc[i].handler = &irq_type_sn1;
+	if ( (requested_irq & 0xff) == 0 ) {
+		int ret;
+
+		sgi_pci_intr_support(requested_irq,
+			&dev_desc, &dev, &line, &curr_dev);
+		intr_handle = pciio_intr_alloc(curr_dev, NULL, line, curr_dev);
+		bit = intr_handle->pi_irq;
+		cpuid = intr_handle->pi_cpu;
+		irq = bit_pos_to_irq(bit);
+		cnode = cpuid_to_cnodeid(cpuid);
+		new_ap = (struct sn1_intr_action *)kmalloc(
+			sizeof(struct sn1_intr_action), GFP_KERNEL);
+		irq_desc[irq].status = 0;
+		new_ap->handler = handler;
+		new_ap->intr_arg = dev_id;
+		new_ap->flags = irqflags;
+		new_ap->next = NULL;
+		alp = sn1_node_actions[cnode];
+
+		spin_lock(&alp[irq].action_list_lock);
+		ap = alp[irq].action_list;
+		/* check action list for "share" consistency */
+		while (ap){
+			if (!(ap->flags & irqflags & SA_SHIRQ) ) {
+				return(-EBUSY);
+				spin_unlock(&alp[irq].action_list_lock);
+			}
+			ap = ap->next;
+		}
+		ap = alp[irq].action_list;
+		if (ap) {
+			while (ap->next) {
+				ap = ap->next;
+			}
+			ap->next = new_ap;
+		} else {
+			alp[irq].action_list = new_ap;
+		}
+		ret = pciio_intr_connect(intr_handle, (intr_func_t)handler, dev_id, NULL);
+		if (ret) { /* connect failed, undo what we did. */
+			new_ap = alp[irq].action_list;
+			if (new_ap == ap) {
+				alp[irq].action_list = NULL;
+				kfree(ap);
+			} else {
+				while (new_ap->next && new_ap->next != ap) {
+					new_ap = new_ap->next;
+				}
+				if (new_ap->next == ap) {
+					new_ap->next = ap->next;
+					kfree(ap);
+				}
+			}
+		}
+			
+		spin_unlock(&alp[irq].action_list_lock);
+		return(ret);
+	} else {
+		return(request_irq(requested_irq, handler, irqflags, devname, dev_id));
 	}
 }
+
+#if !defined(CONFIG_IA64_SGI_IO)
+void
+sn1_pci_fixup(int arg)
+{
+}
+#endif
+
+int
+bit_pos_to_irq(int bit) {
+#define BIT_TO_IRQ 64
+
+        return bit + BIT_TO_IRQ;
+}
+
+int
+irq_to_bit_pos(int irq) {
+#define IRQ_TO_BIT 64
+
+        return irq - IRQ_TO_BIT;
+}
diff --git a/arch/ia64/sn/sn1/llsc4.c b/arch/ia64/sn/sn1/llsc4.c
new file mode 100644
index 000000000..98b98a906
--- /dev/null
+++ b/arch/ia64/sn/sn1/llsc4.c
@@ -0,0 +1,944 @@
+/* 
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com)
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <asm/efi.h>
+#include <asm/page.h>
+#include <linux/threads.h>
+
+extern void bringup_set_led_bits(u8 bits, u8 mask);
+
+#include "llsc4.h"
+
+
+#ifdef STANDALONE
+#include "lock.h"
+#endif
+
+#ifdef INTTEST
+static int	inttest=0;
+#endif
+
+
+/*
+ * Test parameter table for AUTOTEST
+ */
+typedef struct {
+	int	passes;
+	int	linecount;
+	int	linepad;
+} autotest_table_t;
+
+autotest_table_t autotest_table[] = {
+	{1000000,	2,	0x2b4		},
+	{1000000,	16,	0,		},
+	{1000000,	16,	4,		},
+	{1000000,	128,	0x44		},
+	{1000000,	128,	0x84		},
+	{1000000,	128,	0x200		},
+	{1000000,	128,	0x204		},
+	{1000000,	128,	0x2b4		},
+	{1000000,	2,	8*MB+0x2b4	},
+	{1000000,	16,	8*MB+0		},
+	{1000000,	16,	8*MB+4		},
+	{1000000,	128,	8*MB+0x44	},
+	{1000000,	128,	8*MB+0x84	},
+	{1000000,	128,	8*MB+0x200	},
+	{1000000,	128,	8*MB+0x204	},
+	{1000000,	128,	8*MB+0x2b4	},
+	{0}};
+
+/*
+ * Array of virtual addresses available for test purposes.
+ */
+
+typedef struct {
+	long	vstart;
+	long	vend;
+	long	nextaddr;
+	int	wrapcount;
+} memmap_t;
+
+memmap_t 	memmap[MAXCHUNKS];
+int		memmapx=0;
+
+typedef struct {
+	void	*addr;
+	long	data[16];
+	long	data_fc[16];
+} capture_line_t;
+
+typedef struct {
+	int	size;
+	void	*blockaddr;
+	void	*shadaddr;
+	long	blockdata[16];
+	long	shaddata[16];
+	long	blockdata_fc[16];
+	long	shaddata_fc[16];
+	long	synerr;
+} capture_t;
+
+/*
+ * PORTING NOTE: revisit this statement. On hardware we put mbase at 0 and
+ * the rest of the tables have to start at 1MB to skip PROM tables.
+ */
+#define THREADPRIVATE(t)	((threadprivate_t*)(((long)mbase)+1024*1024+t*((sizeof(threadprivate_t)+511)/512*512)))
+
+#define k_capture		mbase->sk_capture
+#define k_go			mbase->sk_go
+#define k_linecount		mbase->sk_linecount
+#define k_passes		mbase->sk_passes
+#define k_napticks		mbase->sk_napticks
+#define k_stop_on_error		mbase->sk_stop_on_error
+#define k_verbose		mbase->sk_verbose
+#define k_threadprivate		mbase->sk_threadprivate
+#define k_blocks		mbase->sk_blocks
+#define k_iter_msg		mbase->sk_iter_msg
+#define k_vv			mbase->sk_vv
+#define k_linepad		mbase->sk_linepad
+#define k_options		mbase->sk_options
+#define k_testnumber		mbase->sk_testnumber
+#define k_currentpass		mbase->sk_currentpass
+
+static long		blocks[MAX_LINECOUNT];		/* addresses of data blocks */
+static control_t	*mbase;
+static vint		initialized=0;
+
+static unsigned int ran_conf_llsc(int);
+static int  rerr(capture_t *, char *, void *, void *, int, int, int, int, int, int);
+static void dumpline(void *, char *, char *, void *, void *, int);
+static int  checkstop(int, int, uint);
+static void spin(int);
+static void capturedata(capture_t *, uint, void *, void *, int);
+static int  randn(uint max, uint *seed);
+static uint zrandom (uint *zranseed);
+static int  set_lock(uint *, uint);
+static int  clr_lock(uint *, uint);
+static void Speedo(void);
+
+int autotest_enabled=0;
+static int autotest_explicit_flush=0;
+static int llsctest_number=-1;
+static int errstop_enabled=0;
+static int fail_enabled=0;
+static int selective_trigger=0;
+
+static int __init autotest_enable(char *str)
+{
+        autotest_enabled = 1;
+	return 1;
+}
+static int __init set_llscxflush(char *str)
+{
+	autotest_explicit_flush = 1;
+	return 1;
+}
+static int __init set_llscselt(char *str)
+{
+	selective_trigger = 1;
+	return 1;
+}
+static int __init set_llsctest(char *str)
+{
+        llsctest_number = simple_strtol(str, &str, 10);
+	if (llsctest_number < 0 || llsctest_number > 15)
+		llsctest_number = -1;
+	return 1;
+}
+static int __init set_llscerrstop(char *str)
+{
+        errstop_enabled = 1;
+	return 1;
+}
+static int __init set_llscfail(char *str)
+{
+        fail_enabled = 8;
+	return 1;
+}
+
+static void print_params(void)
+{
+	printk ("********* Enter AUTOTEST facility on master cpu *************\n");
+	printk ("  Test options:\n");
+	printk ("     llsctest=<n>\t%d\tTest number to run (all = -1)\n", llsctest_number);
+	printk ("     llscerrstop \t%s\tStop on error\n", errstop_enabled ? "on" : "off");
+	printk ("     llscxflush  \t%s\tEnable explicit FC in test\n", autotest_explicit_flush ? "on" : "off");
+	printk ("     llscfail    \t%s\tForce a failure to test the trigger & error messages\n", fail_enabled ? "on" : "off");
+	printk ("     llscselt    \t%s\tSelective triger on failures\n", selective_trigger ? "on" : "off");
+	printk ("\n");
+}
+__setup("autotest", autotest_enable);
+__setup("llsctest=", set_llsctest);
+__setup("llscerrstop", set_llscerrstop);
+__setup("llscxflush", set_llscxflush);
+__setup("llscfail", set_llscfail);
+__setup("llscselt", set_llscselt);
+
+
+extern inline void
+flush_buddy(void *p)
+{
+	long	lp;
+
+	if (autotest_explicit_flush)  {
+		lp = (long)p;
+		lp ^= 0x40;
+		asm volatile ("fc %0" :: "r"(lp) : "memory");
+		ia64_sync_i();
+		ia64_srlz_d();
+	}
+}
+
+static int
+set_lock(uint *lock, uint id)
+{
+	uint	old;
+	flush_buddy(lock);
+	old = cmpxchg_acq(lock, 0, id);
+	return (old == 0);
+}
+
+static int
+clr_lock(uint *lock, uint id)
+{
+	uint	old;
+	flush_buddy(lock);
+	old = cmpxchg_rel(lock, id, 0);
+	return (old == id);
+}
+
+static void
+zero_lock(uint *lock)
+{
+	flush_buddy(lock);
+	*lock = 0;
+}
+
+/*------------------------------------------------------------------------+
+| Routine  :  ran_conf_llsc - ll/sc shared data test                      |
+| Description: This test checks the coherency of shared data              |
++------------------------------------------------------------------------*/
+static unsigned int
+ran_conf_llsc(int thread)
+{
+	private_t	pval;
+	share_t		sval, sval2;
+	uint		vv, linei, slinei, sharei, pass;
+	long		t;
+	lock_t		lockpat;
+	share_t		*sharecopy;
+	long		verbose, napticks, passes, linecount, lcount;
+	dataline_t	*linep, *slinep;
+	int		s, seed;
+	threadprivate_t	*tp;
+	uint		iter_msg, iter_msg_i=0;
+	int		vv_mask;
+	int		correct_errors;
+	int		errs=0;
+	int		stillbad;
+	capture_t	capdata;
+	private_t	*privp;
+	share_t		*sharep;
+
+
+	linecount = k_linecount;
+	napticks = k_napticks;
+	verbose = k_verbose;
+	passes = k_passes;
+	iter_msg = k_iter_msg;
+	seed = (thread + 1) * 647;
+	tp = THREADPRIVATE(thread);
+	vv_mask = (k_vv>>((thread%16)*4)) & 0xf;
+	correct_errors = k_options&0xff;
+
+	memset (&tp->private, 0, sizeof(tp->private));
+	memset (&capdata, 0, sizeof(capdata));
+
+	for (pass = 1; passes == 0 || pass < passes; pass++) {
+		lockpat = (pass & 0x0fffffff) + (thread <<28);
+		tp->threadpasses = pass;
+		if (checkstop(thread, pass, lockpat))
+			return 0;
+		iter_msg_i++;
+		if (iter_msg && iter_msg_i > iter_msg) {
+			printk("Thread %d, Pass %d\n", thread, pass);
+			iter_msg_i = 0;
+		}
+		lcount = 0;
+
+		/*
+		 * Select line to perform operations on.
+		 */
+		linei = randn(linecount, &seed);
+		sharei = randn(2, &seed);
+		slinei = (linei + (linecount/2))%linecount;		/* I dont like this - fix later */
+
+		linep = (dataline_t *)blocks[linei];
+		slinep = (dataline_t *)blocks[slinei];
+		if (sharei == 0)
+			sharecopy = &slinep->share0;
+		else
+			sharecopy = &slinep->share1;
+
+
+		vv = randn(4, &seed);
+		if ((vv_mask & (1<<vv)) == 0)
+			continue;
+
+		if (napticks) {
+			t = randn(napticks, &seed);
+			udelay(t);
+		}
+		privp = &linep->private[thread];
+		sharep = &linep->share[sharei];
+		
+		switch(vv) {
+		case 0:
+			/* Read and verify private count on line. */
+			pval = *privp;
+			if (verbose)
+				printk("Line:%3d, Thread:%d:%d. Val: %x\n", linei, thread, vv, tp->private[linei]);
+			if (pval != tp->private[linei]) {
+				capturedata(&capdata, pass, privp, NULL, sizeof(*privp));
+				stillbad = (*privp != tp->private[linei]);
+				if (rerr(&capdata, "Private count", linep, slinep, thread, pass, linei, tp->private[linei], pval, stillbad)) {
+					return 1;
+				}
+				if (correct_errors) {
+					flush_buddy(privp);
+					tp->private[linei] = *privp;
+				}
+				errs++;
+			}
+			break;
+
+		case 1:
+			/* Read, verify, and increment private count on line. */
+			pval = *privp;
+			if (verbose)
+				printk("Line:%3d, Thread:%d:%d. Val: %x\n", linei, thread, vv, tp->private[linei]);
+			if (pval != tp->private[linei]) {
+				capturedata(&capdata, pass, privp, NULL, sizeof(*privp));
+				stillbad = (*privp != tp->private[linei]);
+				if (rerr(&capdata, "Private count & inc", linep, slinep, thread, pass, linei, tp->private[linei], pval, stillbad)) {
+					return 1;
+				}
+				errs++;
+			}
+			pval++;
+			flush_buddy(privp);
+			*privp = pval;
+			tp->private[linei] = pval;
+			break;
+
+		case 2:
+			/* Lock line, read and verify shared data. */
+			if (verbose)
+				printk("Line:%3d, Thread:%d:%d. Val: %x\n", linei, thread, vv, *sharecopy);
+			lcount = 0;
+			while (LOCK(sharei) != 1) {
+				if (checkstop(thread, pass, lockpat))
+					return 0;
+				if (lcount++>1000000) {
+					capturedata(&capdata, pass, LOCKADDR(sharei), NULL, sizeof(lock_t));
+					stillbad = (GETLOCK(sharei) != 0);
+					rerr(&capdata, "Shared data lock", linep, slinep, thread, pass, linei, 0, GETLOCK(sharei), stillbad);
+					return 1;
+				}
+				if ((lcount&0x3fff) == 0)
+					udelay(1000);
+			}
+
+			sval = *sharep;
+			sval2 = *sharecopy;
+			if (pass > 12 && thread == 0 && fail_enabled == 1)
+				sval++;
+			if (sval != sval2) {
+				capturedata(&capdata, pass, sharep, sharecopy, sizeof(*sharecopy));
+				stillbad = (*sharep != *sharecopy);
+				if (!stillbad && *sharep != sval && *sharecopy == sval2)
+					stillbad = 2;
+				if (rerr(&capdata, "Shared data", linep, slinep, thread, pass, linei, sval2, sval, stillbad)) {
+					return 1;
+				}
+				if (correct_errors)
+					*sharep = *sharecopy;
+				errs++;
+			}
+
+
+			if ( (s=UNLOCK(sharei)) != 1) {
+				capturedata(&capdata, pass, LOCKADDR(sharei), NULL, 4);
+				stillbad = (GETLOCK(sharei) != lockpat);
+				if (rerr(&capdata, "Shared data unlock", linep, slinep, thread, pass, linei, lockpat, GETLOCK(sharei), stillbad))
+					return 1;
+				if (correct_errors)
+					ZEROLOCK(sharei);	
+				errs++;
+			}
+			break;
+
+		case 3:
+			/* Lock line, read and verify shared data, modify shared data. */
+			if (verbose)
+				printk("Line:%3d, Thread:%d:%d. Val: %x\n", linei, thread, vv, *sharecopy);
+			lcount = 0;
+			while (LOCK(sharei) != 1) {
+				if (checkstop(thread, pass, lockpat))
+					return 0;
+				if (lcount++>1000000) {
+					capturedata(&capdata, pass, LOCKADDR(sharei), NULL, sizeof(lock_t));
+					stillbad = (GETLOCK(sharei) != 0);
+					rerr(&capdata, "Shared data lock & inc", linep, slinep, thread, pass, linei, 0, GETLOCK(sharei), stillbad);
+					return 1;
+				}
+				if ((lcount&0x3fff) == 0)
+					udelay(1000);
+			}
+			sval = *sharep;
+			sval2 = *sharecopy;
+			if (sval != sval2) {
+				capturedata(&capdata, pass, sharep, sharecopy, sizeof(*sharecopy));
+				stillbad = (*sharep != *sharecopy);
+				if (!stillbad && *sharep != sval && *sharecopy == sval2)
+					stillbad = 2;
+				if (rerr(&capdata, "Shared data & inc", linep, slinep, thread, pass, linei, sval2, sval, stillbad)) {
+					return 1;
+				}
+				errs++;
+			}
+
+			flush_buddy(sharep);
+			*sharep = lockpat;
+			flush_buddy(sharecopy);
+			*sharecopy = lockpat;
+
+
+			if ( (s=UNLOCK(sharei)) != 1) {
+				capturedata(&capdata, pass, LOCKADDR(sharei), NULL, 4);
+				stillbad = (GETLOCK(sharei) != lockpat);
+				if (rerr(&capdata, "Shared data & inc unlock", linep, slinep, thread, pass, linei, thread, GETLOCK(sharei), stillbad))
+					return 1;
+				if (correct_errors)
+					ZEROLOCK(sharei);	
+				errs++;
+			}
+			break;
+		}
+	}
+
+	return (errs > 0);
+}
+
+static void
+trigger_la(long val)
+{
+	long	*p;
+
+	p = (long*)0xc0000a0001000020L; /* PI_CPU_NUM */
+	*p = val;
+}
+
+static long
+getsynerr(void)
+{
+	long	err, *errp;
+
+	errp = (long*)0xc0000e0000000340L;	/* SYN_ERR */
+	err = *errp;
+	if (err)
+		*errp = -1L;
+	return (err & ~0x60);
+}
+
+static int
+rerr(capture_t *cap, char *msg, void *lp, void *slp, int thread, int pass, int linei, int exp, int found, int stillbad)
+{
+	int		cpu;
+	long 		synerr;
+	int		selt;
+
+
+	selt = selective_trigger && stillbad > 1 && 
+			memcmp(cap->blockdata, cap->blockdata_fc, 128) != 0 &&
+			memcmp(cap->shaddata, cap->shaddata_fc, 128) == 0;
+	if (selt) {
+		trigger_la(pass);
+	} else if (selective_trigger) {
+		k_go = ST_STOP;
+		return k_stop_on_error;;
+	}
+
+	spin(1);
+	printk ("\nDataError!: %-20s, test %ld, thread %d, line:%d, pass %d (0x%x), time %ld expected:%x, found:%x\n",
+	    msg, k_testnumber, thread, linei, pass, pass, jiffies, exp, found);
+
+	dumpline (lp, "Corrupted data", "D ", cap->blockaddr, cap->blockdata, cap->size);
+	if (memcmp(cap->blockdata, cap->blockdata_fc, 128))
+		dumpline (lp, "Corrupted data", "DF", cap->blockaddr, cap->blockdata_fc, cap->size);
+
+	if (cap->shadaddr) {
+		dumpline (slp, "Shadow    data", "S ", cap->shadaddr, cap->shaddata, cap->size);
+		if (memcmp(cap->shaddata, cap->shaddata_fc, 128))
+			dumpline (slp, "Shadow    data", "SF", cap->shadaddr, cap->shaddata_fc, cap->size);
+	}
+	
+	printk("Threadpasses: ");
+	for (cpu=0; cpu<MAXCPUS; cpu++)
+		if (k_threadprivate[cpu]->threadpasses)
+			printk("  %d:0x%x", cpu, k_threadprivate[cpu]->threadpasses);
+
+
+	printk("\nData was %sfixed by flushcache\n", (stillbad == 1 ? "**** NOT **** " : " "));
+	synerr = getsynerr();
+	if (synerr)
+		printk("SYNERR: Thread %d, Synerr: 0x%lx\n", thread, synerr);
+	spin(2);
+	printk("\n\n");
+
+	if (errstop_enabled) {
+		local_irq_disable();
+		while(1);
+	}
+	return k_stop_on_error;
+}
+
+
+static void
+dumpline(void *lp, char *str1, char *str2, void *addr, void *data, int size)
+{
+	long *p;
+	int i, off;
+
+	printk("%s at 0x%lx, size %d, block starts at 0x%lx\n", str1, (long)addr, size, (long)lp);
+	p = (long*) data;
+	for (i=0; i<16; i++, p++) {
+		if (i==0) printk("%2s", str2);
+		if (i==8) printk("  ");
+		printk(" %016lx", *p);
+		if ((i&7)==7) printk("\n");
+	}
+	printk("   ");
+	off = (((long)addr) ^ size) & 63L;
+	for (i=0; i<off+size; i++) {
+		printk("%s", (i>=off) ? "--" : "  ");
+		if ((i%8) == 7)
+			printk(" ");
+	}
+
+	off = ((long)addr) & 127;
+	printk(" (line %d)\n", off/64+1);
+}
+
+
+static int
+randn(uint max, uint *seedp)
+{
+	if (max == 1)
+		return(0);
+	else
+		return((int)(zrandom(seedp)>>10) % max);
+}
+
+
+static int
+checkstop(int thread, int pass, uint lockpat)
+{
+	long	synerr;
+
+	if (k_go == ST_RUN)
+		return 0;
+	if (k_go == ST_STOP)
+		return 1;
+
+	if (errstop_enabled) {
+		local_irq_disable();
+		while(1);
+	}
+	synerr = getsynerr();
+	spin(2);
+	if (k_go == ST_STOP)
+		return 1;
+	if (synerr)
+		printk("SYNERR: Thread %d, Synerr: 0x%lx\n", thread, synerr);
+	return 1;
+}
+
+
+static void
+spin(int j)
+{
+	udelay(j * 500000);
+}
+
+static void
+capturedata(capture_t *cap, uint pass, void *blockaddr, void *shadaddr, int size)
+{
+
+	if (!selective_trigger)
+		trigger_la (pass);
+
+	memcpy (cap->blockdata, CACHEALIGN(blockaddr), 128);
+	if (shadaddr) 
+		memcpy (cap->shaddata, CACHEALIGN(shadaddr), 128);
+
+	if (k_stop_on_error) {
+		k_go = ST_ERRSTOP;
+	}
+
+ 	cap->size = size;
+	cap->blockaddr = blockaddr;
+	cap->shadaddr = shadaddr;
+
+	asm volatile ("fc %0" :: "r"(blockaddr) : "memory");
+	ia64_sync_i();
+	ia64_srlz_d();
+	memcpy (cap->blockdata_fc, CACHEALIGN(blockaddr), 128);
+
+	if (shadaddr) {
+		asm volatile ("fc %0" :: "r"(shadaddr) : "memory");
+		ia64_sync_i();
+		ia64_srlz_d();
+		memcpy (cap->shaddata_fc, CACHEALIGN(shadaddr), 128);
+	}
+}
+
+int             zranmult = 0x48c27395;
+
+static uint  
+zrandom (uint *seedp)
+{
+        *seedp = (*seedp * zranmult) & 0x7fffffff;
+        return (*seedp);
+}
+
+
+void
+set_autotest_params(void)
+{
+	static int	testnumber=-1;
+
+	if (llsctest_number >= 0) {
+		testnumber = llsctest_number;
+	} else {
+		testnumber++;
+		if (autotest_table[testnumber].passes == 0)
+			testnumber = 0;
+	}
+	k_passes = autotest_table[testnumber].passes;
+	k_linepad = autotest_table[testnumber].linepad;
+	k_linecount = autotest_table[testnumber].linecount;
+	k_testnumber = testnumber;
+
+	if (IS_RUNNING_ON_SIMULATOR()) {
+		printk ("llsc start test %ld\n", k_testnumber);
+		k_passes = 1000;
+	}
+}
+
+
+static void
+set_leds(int errs)
+{
+	unsigned char	leds=0;
+
+	/*
+	 * Leds are:
+	 * 	ppppeee-  
+	 *   where
+	 *      pppp = test number
+	 *       eee = error count but top bit is stick
+	 */
+
+	leds =  ((errs&7)<<1) | ((k_testnumber&15)<<4) | (errs ? 0x08 : 0);
+	bringup_set_led_bits(leds, 0xfe);
+}
+
+static void
+setup_block_addresses(void)
+{
+	int		i, stride, memmapi;
+
+	stride = LINESTRIDE;
+	memmapi = 0;
+	for (i=0; i<memmapx; i++) {
+		memmap[i].nextaddr = memmap[i].vstart;
+		memmap[i].wrapcount = 0;
+	}
+
+	for (i=0; i<k_linecount; i++) {
+		blocks[i] = memmap[memmapi].nextaddr;
+		memmap[memmapi].nextaddr += stride;
+		if (memmap[memmapi].nextaddr + sizeof(dataline_t) >= memmap[memmapi].vend) {
+			memmap[memmapi].wrapcount++;
+			memmap[memmapi].nextaddr = memmap[memmapi].vstart + 
+					memmap[memmapi].wrapcount * sizeof(dataline_t);
+		}
+
+		memset((void*)blocks[i], 0, sizeof(dataline_t));
+
+		if (stride > 16384) {
+			memmapi++;
+			if (memmapi == memmapx)
+				memmapi = 0;
+		}
+	}
+
+}
+
+static void
+set_thread_state(int cpuid, int state)
+{
+	if (k_threadprivate[cpuid]->threadstate == TS_KILLED) {
+		bringup_set_led_bits(0xfe, 0xfe);
+		while(1);
+	}
+	k_threadprivate[cpuid]->threadstate = state;
+}
+
+static int
+build_mem_map(unsigned long start, unsigned long end, void *arg)
+{
+	long	lstart;
+	/*
+	 * HACK - skip the kernel on the first node 
+	 */
+
+	printk ("LLSC memmap: start 0x%lx, end 0x%lx, (0x%lx - 0x%lx)\n", 
+		start, end, (long) virt_to_page(start), (long) virt_to_page(end-PAGE_SIZE));
+
+	while (end > start && (PageReserved(virt_to_page(end-PAGE_SIZE)) || virt_to_page(end-PAGE_SIZE)->count.counter > 0))
+		end -= PAGE_SIZE;
+
+	lstart = end;
+	while (lstart > start && (!PageReserved(virt_to_page(lstart-PAGE_SIZE)) && virt_to_page(lstart-PAGE_SIZE)->count.counter == 0))
+		lstart -= PAGE_SIZE;
+
+	printk ("     memmap: start 0x%lx, end 0x%lx\n", lstart, end);
+	if (lstart >= end)
+		return 0;
+
+	memmap[memmapx].vstart = lstart;
+	memmap[memmapx].vend = end;
+	memmapx++;
+	return 0;
+}
+
+void int_test(void);
+
+int
+llsc_main (int cpuid, long mbasex)
+{
+	int		i, cpu, is_master, repeatcnt=0;
+	unsigned int	preverr=0, errs=0, pass=0;
+	int		automode=0;
+
+#ifdef INTTEST
+	if (inttest)
+		int_test();
+#endif
+
+	if (!autotest_enabled)
+		return 0;
+
+#ifdef CONFIG_SMP
+	is_master = !smp_processor_id();
+#else
+	is_master = 1;
+#endif
+
+
+	if (is_master) {
+		print_params();
+		if(!IS_RUNNING_ON_SIMULATOR())
+			spin(10);
+		mbase = (control_t*)mbasex;
+		k_currentpass = 0;
+		k_go = ST_IDLE;
+		k_passes = DEF_PASSES;
+		k_napticks = DEF_NAPTICKS;
+		k_stop_on_error = DEF_STOP_ON_ERROR;
+		k_verbose = DEF_VERBOSE;
+		k_linecount = DEF_LINECOUNT;
+		k_iter_msg = DEF_ITER_MSG;
+		k_vv = DEF_VV;
+		k_linepad = DEF_LINEPAD;
+		k_blocks = (void*)blocks;
+		efi_memmap_walk(build_mem_map, 0);
+	
+#ifdef CONFIG_IA64_SGI_AUTOTEST
+		automode = 1;
+#endif
+
+		for (i=0; i<MAXCPUS; i++) {
+			k_threadprivate[i] = THREADPRIVATE(i);
+			memset(k_threadprivate[i], 0, sizeof(*k_threadprivate[i]));
+		}
+		initialized = 1;
+	} else {
+		while (initialized == 0)
+			udelay(100);
+	}
+
+loop:
+	if (is_master) {
+		if (automode) {
+			if (!preverr || repeatcnt++ > 5) {
+				set_autotest_params();
+				repeatcnt = 0;
+			}
+		} else {
+			while (k_go == ST_IDLE);
+		}
+
+		k_go = ST_INIT;
+		if (k_linecount > MAX_LINECOUNT) k_linecount = MAX_LINECOUNT;
+		k_linecount = k_linecount & ~1;
+		setup_block_addresses();
+
+		k_currentpass = pass++;
+		k_go = ST_RUN;
+		if (fail_enabled)
+			fail_enabled--;
+
+	} else {
+		while (k_go != ST_RUN || k_currentpass != pass);
+		pass++;
+	}
+
+
+	set_leds(errs);
+	set_thread_state(cpuid, TS_RUNNING);
+
+	errs += ran_conf_llsc(cpuid);
+	preverr = (k_go == ST_ERRSTOP);
+
+	set_leds(errs);
+	set_thread_state(cpuid, TS_STOPPED);
+
+	if (is_master) {
+		Speedo();
+		for (i=0, cpu=0; cpu<MAXCPUS; cpu++) {
+			while (k_threadprivate[cpu]->threadstate == TS_RUNNING) {
+				i++;
+				if (i == 10000) { 
+					k_go = ST_STOP;
+					printk ("  llsc master stopping test number %ld\n", k_testnumber);
+				}
+				if (i > 100000) {
+					k_threadprivate[cpu]->threadstate = TS_KILLED;
+					printk ("  llsc: master killing cpuid %d, running test number %ld\n", 
+							cpu, k_testnumber);
+				}
+				udelay(1000);
+			}
+		}
+	}
+
+	goto loop;
+}
+
+
+static void
+Speedo(void)
+{
+	static int i = 0;
+
+	switch (++i%4) {
+	case 0:
+		printk("|\b");
+		break;
+	case 1:
+		printk("\\\b");
+		break;
+	case 2:
+		printk("-\b");
+		break;
+	case 3:
+		printk("/\b");
+		break;
+	}
+}
+
+#ifdef INTTEST
+
+/* ======================================================================================================== 
+ *
+ * Some test code to verify that interrupts work
+ *
+ * Add the following to the arch/ia64/kernel/smp.c after the comment "Reschedule callback"
+ * 		if (zzzprint_resched) printk("  cpu %d got interrupt\n", smp_processor_id());
+ *
+ * Enable the code in arch/ia64/sn/sn1/smp.c to print sending IPIs.
+ *
+ */
+
+static int __init set_inttest(char *str)
+{
+        inttest = 1;
+	autotest_enabled = 1;
+
+	return 1;
+}	
+
+__setup("inttest=", set_inttest);
+
+int	zzzprint_resched=0;
+
+void
+int_test() {
+	int			mycpu, cpu;
+	static volatile int	control_cpu=0;
+
+	mycpu = smp_processor_id();
+	zzzprint_resched = 2;
+
+	printk("Testing cross interrupts\n");
+	
+	while (control_cpu != smp_num_cpus) {
+		if (mycpu == cpu_logical_map(control_cpu)) {
+			for (cpu=0; cpu<smp_num_cpus; cpu++) {
+				printk("Sending interrupt from %d to %d\n", mycpu, cpu_logical_map(cpu));
+				udelay(IS_RUNNING_ON_SIMULATOR ? 10000 : 400000);
+				smp_send_reschedule(cpu_logical_map(cpu));
+				udelay(IS_RUNNING_ON_SIMULATOR ? 10000 : 400000);
+				smp_send_reschedule(cpu_logical_map(cpu));
+				udelay(IS_RUNNING_ON_SIMULATOR ? 10000 : 400000);
+			}
+			control_cpu++;
+		}
+	}
+
+	zzzprint_resched = 1;
+
+	if (mycpu == cpu_logical_map(smp_num_cpus-1)) {
+		printk("\nTight loop of cpu %d sending ints to cpu 0 (every 100 us)\n", mycpu);
+		udelay(IS_RUNNING_ON_SIMULATOR ? 1000 : 1000000);
+		__cli();
+		while (1) {
+			smp_send_reschedule(0);
+			udelay(100);
+		}
+
+	}
+
+	while(1);
+}
+#endif
diff --git a/arch/ia64/sn/sn1/llsc4.h b/arch/ia64/sn/sn1/llsc4.h
new file mode 100644
index 000000000..b305caf2b
--- /dev/null
+++ b/arch/ia64/sn/sn1/llsc4.h
@@ -0,0 +1,104 @@
+/* 
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Jack Steiner (steiner@sgi.com)
+ */
+
+#ifdef STANDALONE
+#include "lock.h"
+#endif
+
+
+#define DEF_NAPTICKS		0
+#define DEF_PASSES		0
+#define DEF_AUTO_PASSES		1000000
+#define DEF_STOP_ON_ERROR	1
+#define DEF_VERBOSE		0
+#define DEF_LINECOUNT		2
+#define DEF_ITER_MSG		0
+#define DEF_VV			0xffffffff
+#define DEF_LINEPAD		0x234
+
+
+
+#define MAXCPUS			16
+#define CACHELINE		64
+#define MAX_LINECOUNT		1024
+#define K			1024
+#define	MB			(K*K)
+
+
+#define	uint 		unsigned int
+#define	ushort		unsigned short
+#define vint		volatile int
+#define vlong		volatile long
+
+#define LOCKADDR(i)	&linep->lock[(i)]
+#define LOCK(i)		set_lock(LOCKADDR(i), lockpat)
+#define UNLOCK(i)	clr_lock(LOCKADDR(i), lockpat)
+#define GETLOCK(i)	*LOCKADDR(i)
+#define ZEROLOCK(i)	zero_lock(LOCKADDR(i))
+
+#define CACHEALIGN(a)	((void*)((long)(a) & ~127L))
+
+typedef uint		lock_t;
+typedef uint		share_t;
+typedef uint		private_t;
+
+typedef struct {
+	lock_t		lock[2];
+	share_t		share[2];
+	private_t	private[MAXCPUS];
+	share_t		share0;
+	share_t		share1;
+} dataline_t ;
+
+
+#define LINEPAD			k_linepad
+#define LINESTRIDE		(((sizeof(dataline_t)+CACHELINE-1)/CACHELINE)*CACHELINE + LINEPAD)
+
+
+typedef struct {
+	vint		threadstate;
+	uint		threadpasses;
+	private_t	private[MAX_LINECOUNT];
+} threadprivate_t;
+
+typedef struct {
+	vlong		sk_go;		/* 0=idle, 1=init, 2=run */
+	long		sk_linecount;
+	long		sk_passes;
+	long		sk_napticks;
+	long		sk_stop_on_error;
+	long		sk_verbose;
+	long		sk_iter_msg;
+	long		sk_vv;
+	long		sk_linepad;
+	long		sk_options;
+	long		sk_testnumber;
+	vlong		sk_currentpass;
+	void 		*sk_blocks;
+	threadprivate_t	*sk_threadprivate[MAXCPUS];
+} control_t;
+
+/* Run state (k_go) constants */
+#define ST_IDLE		0
+#define ST_INIT		1
+#define ST_RUN		2
+#define ST_STOP		3
+#define ST_ERRSTOP	4
+
+
+/* Threadstate constants */
+#define TS_STOPPED	0
+#define	TS_RUNNING	1
+#define TS_KILLED	2
+
+
+
+int llsc_main (int cpuid, long mbasex);
+
diff --git a/arch/ia64/sn/sn1/mm.c b/arch/ia64/sn/sn1/mm.c
new file mode 100644
index 000000000..e0010782c
--- /dev/null
+++ b/arch/ia64/sn/sn1/mm.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright, 2000, Silicon Graphics.
+ * Copyright Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+ * Copyright 2000 Kanoj Sarcar (kanoj@sgi.com)
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <asm/page.h>
+#include <asm/efi.h>
+#include <asm/sn/mmzone_sn1.h>
+
+#       define MIN(a,b)         ((a) < (b) ? (a) : (b))
+#       define MAX(a,b)         ((a) > (b) ? (a) : (b))
+
+/*
+ * Note that the nodemem[] data structure does not support arbitrary
+ * memory types and memory descriptors inside the node. For example, 
+ * you can not have multiple efi-mem-type segments in the node and
+ * expect the OS not to use specific mem-types. Currently, the 
+ * assumption is that "start" is the start of virtual/physical memory 
+ * on the node. PROM can reserve some memory _only_ at the beginning. 
+ * This is tracked via the "usable" field, that maintains where the 
+ * os can start using memory from on a node (ie end of PROM memory).
+ * setup_node_bootmem() is passed the above "usable" value, and is
+ * expected to make bootmem calls that ensure lower memory is not used.
+ * Note that the bootmem for a node is initialized on the entire node, 
+ * without regards to any holes - then we reserve the holes in 
+ * setup_sn1_bootmem(), to make sure the holes are not handed out by
+ * alloc_bootmem, as well as the corresponding mem_map entries are not
+ * considered allocatable by the page_alloc routines.
+ */
+struct nodemem_s {
+        u64     start ;
+        u64     end   ;
+        u64 	hole[SN1_MAX_BANK_PER_NODE] ;
+	u64	usable;
+} nodemem[MAXNODES] ;
+static int nodemem_valid = 0;
+
+static int __init
+free_unused_memmap_hole(int nid, unsigned long start, unsigned long end)
+{
+        struct page * page, *pageend;
+        unsigned long count = 0;
+
+	if (start >= end)
+		return 0 ;
+
+	/*
+	 * Get the memmap ptrs to the start and end of the holes.
+	 * virt_to_page(start) will panic, if start is in hole.
+	 * Can we do virt_to_page(end), if end is on the next node?
+	 */
+
+	page = virt_to_page(start-1);
+	page++ ;
+	pageend = virt_to_page(end) ;
+
+	printk("hpage=0x%lx, hpageend=0x%lx\n", (u64)page, (u64)pageend) ;
+	free_bootmem_node(NODE_DATA(nid), __pa(page), (u64)pageend - (u64)page);
+
+	return count ;
+}
+
+void
+free_unused_memmap_node(int nid)
+{
+	u64	i = 0 ;
+	u64	holestart = -1 ;
+
+	do {
+		holestart = nodemem[nid].hole[i] ;
+		i++ ;
+		while ((i < SN1_MAX_BANK_PER_NODE) && 
+			(nodemem[nid].hole[i] == (u64)-1))
+			i++ ;
+		if (i < SN1_MAX_BANK_PER_NODE)
+			free_unused_memmap_hole(nid, holestart, 
+				nodemem[nid].start + (i<<SN1_BANK_ADDR_SHIFT));
+	} while (i<SN1_MAX_BANK_PER_NODE);
+}
+
+/*
+ * Since efi_memmap_walk merges contiguous banks, this code will need
+ * to find all the nasid/banks covered by the input memory descriptor.
+ */
+static int __init
+build_nodemem_map(unsigned long start, unsigned long end, void *arg)
+{
+	unsigned long vaddr = start;
+	unsigned long nvaddr;
+	int nasid = GetNasId(__pa(vaddr));
+	int cnodeid, bankid;
+
+	while (vaddr < end) {
+		cnodeid = NASID_TO_CNODEID(nasid);
+		bankid = GetBankId(__pa(vaddr));
+		nodemem[cnodeid].start = MIN(nodemem[cnodeid].start, vaddr);
+		nodemem[cnodeid].usable = MIN(nodemem[cnodeid].usable, vaddr);
+		nvaddr = (unsigned long)__va((unsigned long)(++nasid) << 
+							SN1_NODE_ADDR_SHIFT);
+		nodemem[cnodeid].end = MAX(nodemem[cnodeid].end, MIN(end, nvaddr));
+		while ((bankid < SN1_MAX_BANK_PER_NODE) && 
+					(vaddr < nodemem[cnodeid].end)) {
+			nvaddr = nodemem[cnodeid].start + 
+			  ((unsigned long)(bankid + 1) << SN1_BANK_ADDR_SHIFT);
+			nodemem[cnodeid].hole[bankid++] = MIN(nvaddr, end);
+			vaddr = nvaddr;
+		}
+	}
+
+	return 0;
+}
+
+static int __init
+pgtbl_size_ok(int nid)
+{
+	unsigned long numpfn, bank0size, nodesize ;
+	
+	nodesize 	= nodemem[nid].end - nodemem[nid].start ;
+	numpfn 		= nodesize >> PAGE_SHIFT;
+
+	bank0size 	= nodemem[nid].hole[0] - nodemem[nid].start ;
+	/* If nid == master node && no kernel text replication */
+	bank0size      -= 0xA00000 ;	/* Kernel text + stuff */
+	bank0size      -= ((numpfn + 7) >> 3);
+
+	if ((numpfn * sizeof(mem_map_t)) > bank0size) {
+		printk("nid = %d, ns=0x%lx, npfn=0x%lx, bank0size=0x%lx\n", 
+			nid, nodesize, numpfn, bank0size) ;
+		return 0 ;
+	}
+
+	return 1 ;
+}
+
+static void __init
+check_pgtbl_size(int nid)
+{
+	int	bank = SN1_MAX_BANK_PER_NODE - 1 ;
+
+	/* Find highest bank with valid memory */
+        while ((nodemem[nid].hole[bank] == -1) && (bank))
+               bank-- ;
+
+	while (!pgtbl_size_ok(nid)) {
+		/* Remove that bank of memory */
+		/* Collect some numbers later */
+		printk("Ignoring node %d bank %d\n", nid, bank) ;
+		nodemem[nid].hole[bank--] = -1 ;
+		/* Get to the next populated bank */
+		while ((nodemem[nid].hole[bank] == -1) && (bank))
+			bank-- ;
+		printk("Using only upto bank %d on node %d\n", bank,nid) ;
+		nodemem[nid].end = nodemem[nid].hole[bank] ; 
+		if (!bank) break ;
+	}
+}
+
+void dump_nodemem_map(int) ;
+
+#ifdef CONFIG_DISCONTIGMEM
+
+extern bootmem_data_t 	bdata[] ;
+static int	 	curnodeid ;
+
+static int __init
+setup_node_bootmem(unsigned long start, unsigned long end, unsigned long nodefree)
+{
+	extern char _end;
+	int i;
+	unsigned long kernelend = PAGE_ALIGN((unsigned long)(&_end));
+	unsigned long pkernelend = __pa(kernelend);
+	unsigned long bootmap_start, bootmap_size;
+	unsigned long pstart, pend;
+
+	pstart = __pa(start) ;
+	pend   = __pa(end) ;
+
+	/* If we are past a node mem boundary, on simulated dig numa
+	 * increment current node id. */
+
+	curnodeid = NASID_TO_CNODEID(GetNasId(pstart)) ;
+
+       /*
+        * Make sure we are being passed page aligned addresses.
+        */
+	if ((start & (PAGE_SIZE - 1)) || (end & (PAGE_SIZE - 1)))
+               panic("setup_node_bootmem:align");
+
+
+	/* For now, just go to the lower CHUNK alignment so that 
+	 * chunktonid of 0-8MB and other lower mem pages get initted. */
+
+	pstart &= CHUNKMASK ;
+	pend = (pend+CHUNKSZ-1) & CHUNKMASK;
+
+	/* If pend == 0, both addrs below 8 MB, special case it
+	 * FIX: CHUNKNUM(pend-1) broken if pend == 0 
+	 * both addrs within 8MB */
+
+	if (pend == 0) {
+		chunktonid[0] = 0;
+		return 0;
+	}
+
+	/* Fill up the chunktonid array first. */
+
+        for (i = PCHUNKNUM(pstart); i <= PCHUNKNUM(pend-1); i++)
+               chunktonid[i] = curnodeid;
+
+	/* This check is bogus for now till MAXCHUNKS is properly
+	 * defined to say if it includes holes or not. */
+
+	if ((CHUNKTONID(PCHUNKNUM(pend)) > MAXCHUNKS) || 
+		(PCHUNKNUM(pstart) >= PCHUNKNUM(pend))) {
+		printk("Ign 0x%lx-0x%lx, ", __pa(start), __pa(end));
+		return(0);
+	}
+
+	/* This routine gets called many times in node 0.
+	 * The first one to reach here would be the one after
+	 * kernelend to end of first node. */
+
+	NODE_DATA(curnodeid)->bdata = &(bdata[curnodeid]);
+
+	if (curnodeid == 0) {
+		/* for master node, forcibly assign these values
+		 * This gets called many times on dig but we
+		 * want these exact values 
+		 * Also on softsdv, the memdesc for 0 is missing */
+		NODE_START(curnodeid) = PAGE_OFFSET;
+		NODE_SIZE(curnodeid) = (end - PAGE_OFFSET);
+	} else {
+		/* This gets called only once for non zero nodes
+		 * If it does not, then NODE_STARt should be 
+		 * LOCAL_BASE(nid) */
+
+		NODE_START(curnodeid) = start;
+		NODE_SIZE(curnodeid) = (end - start);
+	}
+
+	/* if end < kernelend do not do anything below this */
+	if (pend < pkernelend)
+		return 0 ;
+
+       /*
+        * Handle the node that contains kernel text/data. It would
+        * be nice if the loader loads the kernel at a "chunk", ie
+        * not in memory that the kernel will ignore (else free_initmem
+        * has to worry about not freeing memory that the kernel ignores).
+        * Note that we assume the space from the node start to
+        * KERNEL_START can not hold all the bootmem data, but from kernel
+        * end to node end can.
+        */
+
+	/* TBD: This may be bogus in light of the above check. */
+
+	if ((pstart < pkernelend) && (pend >= pkernelend)) {
+               bootmap_start = pkernelend;
+	} else {
+               bootmap_start = __pa(start);    /* chunk & page aligned */
+	}
+
+	/*
+	 * Low memory is reserved for PROM use on SN1. The current node
+	 * memory model is [PROM mem ... kernel ... free], where the 
+	 * first two components are optional on a node.
+	 */
+	if (bootmap_start < __pa(nodefree))
+		bootmap_start = __pa(nodefree);
+
+/* XXX TBD */
+/* For curnodeid of 0, this gets called many times because of many
+ * < 8MB segments. start gets bumped each time. We want to fix it
+ * to 0 now. 
+ */
+	if (curnodeid == 0)
+		start=PAGE_OFFSET;
+/*
+ * This makes sure that in free_area_init_core - paging_init
+ * idx is the entire node page range and for loop goes thro
+ * all pages. test_bit for kernel pages should remain reserved
+ * because free available mem takes care of kernel_start and end
+ */
+
+        bootmap_size = init_bootmem_node(NODE_DATA(curnodeid),
+			(bootmap_start >> PAGE_SHIFT),
+			(__pa(start) >> PAGE_SHIFT), (__pa(end) >> PAGE_SHIFT));
+
+	free_bootmem_node(NODE_DATA(curnodeid), bootmap_start + bootmap_size,
+				__pa(end) - (bootmap_start + bootmap_size));
+
+	return(0);
+}
+
+void
+setup_sn1_bootmem(int maxnodes)
+{
+        int     i;
+
+        for (i=0;i<MAXNODES;i++) {
+                nodemem[i].usable = nodemem[i].start = -1 ;
+                nodemem[i].end   = 0 ;
+		memset(&nodemem[i].hole, -1, sizeof(nodemem[i].hole)) ;
+        }
+        efi_memmap_walk(build_nodemem_map, 0) ;
+
+	/*
+	 * Run thru all the nodes, adjusting their starts. This is needed
+	 * because efi_memmap_walk() might not process certain mds that 
+	 * are marked reserved for PROM at node low memory.
+	 */
+	for (i = 0; i < maxnodes; i++)
+		nodemem[i].start = ((nodemem[i].start >> SN1_NODE_ADDR_SHIFT) <<
+					SN1_NODE_ADDR_SHIFT);
+	nodemem_valid = 1 ;
+
+	/* After building the nodemem map, check if the page table
+	 * will fit in the first bank of each node. If not change
+	 * the node end addr till it fits. We dont want to do this
+	 * in mm/page_alloc.c
+ 	 */
+
+        for (i=0;i<maxnodes;i++)
+		check_pgtbl_size(i) ;
+
+        for (i=0;i<maxnodes;i++)
+                setup_node_bootmem(nodemem[i].start, nodemem[i].end, nodemem[i].usable);
+
+	/*
+	 * Mark the holes as reserved, so the corresponding mem_map
+	 * entries will not be marked allocatable in free_all_bootmem*().
+	 */
+	for (i = 0; i < maxnodes; i++) {
+		int j = 0 ;
+		u64 holestart = -1 ;
+
+		do {
+			holestart = nodemem[i].hole[j++];
+			while ((j < SN1_MAX_BANK_PER_NODE) && 
+					(nodemem[i].hole[j] == (u64)-1))
+				j++;
+			if (j < SN1_MAX_BANK_PER_NODE)
+				reserve_bootmem_node(NODE_DATA(i), 
+					__pa(holestart), (nodemem[i].start + 
+					((long)j <<  SN1_BANK_ADDR_SHIFT) - 
+					 holestart));
+		} while (j < SN1_MAX_BANK_PER_NODE);
+	}
+
+	dump_nodemem_map(maxnodes) ;
+}
+#endif
+
+/*
+ * This used to be invoked from an SN1 specific hack in efi_memmap_walk.
+ * It tries to ignore banks which the kernel is ignoring because bank 0 
+ * is too small to hold the memmap entries for this bank.
+ * The current SN1 efi_memmap_walk callbacks do not need this. That 
+ * leaves the generic ia64 callbacks find_max_pfn, count_pages and
+ * count_reserved_pages, of which the first can probably get by without
+ * this, the last two probably need this, although they also can probably
+ * get by. 
+ */
+int
+sn1_bank_ignore(u64 start, u64 end)
+{
+	int 	nid = NASID_TO_CNODEID(GetNasId(__pa(end))) ;
+	int	bank = GetBankId(__pa(end)) ;
+
+	if (!nodemem_valid)
+		return 0 ;
+
+	if (nodemem[nid].hole[bank] == -1)
+		return 1 ;
+	else
+		return 0 ;
+}
+
+void
+dump_nodemem_map(int maxnodes)
+{
+	int	i,j;
+
+        printk("NODEMEM_S info ....\n") ;
+        printk("Node         start                end                 usable\n");
+        for (i=0;i<maxnodes;i++) {
+                printk("%d      0x%lx   0x%lx   0x%lx\n",
+                       i, nodemem[i].start, nodemem[i].end, nodemem[i].usable);
+                printk("Holes -> ") ;
+                for (j=0;j<SN1_MAX_BANK_PER_NODE;j++)
+                        printk("0x%lx ", nodemem[i].hole[j]) ;
+		printk("\n");
+        }
+}
+
diff --git a/arch/ia64/sn/sn1/setup.c b/arch/ia64/sn/sn1/setup.c
index 7b397bb6b..3bfce39e7 100644
--- a/arch/ia64/sn/sn1/setup.c
+++ b/arch/ia64/sn/sn1/setup.c
@@ -14,13 +14,14 @@
 #include <linux/timex.h>
 #include <linux/sched.h>
 #include <linux/ioport.h>
+#include <linux/mm.h>
 
+#include <asm/sn/mmzone_sn1.h>
 #include <asm/io.h>
 #include <asm/machvec.h>
 #include <asm/system.h>
 #include <asm/processor.h>
 
-
 /*
  * The format of "screen_info" is strange, and due to early i386-setup
  * code. This is just enough to make the console code think we're on a
@@ -50,29 +51,48 @@ char drive_info[4*16];
 unsigned long
 sn1_map_nr (unsigned long addr)
 {
+#ifdef CONFIG_DISCONTIGMEM
 	return MAP_NR_SN1(addr);
+#else
+	return MAP_NR_DENSE(addr);
+#endif
 }
 
-void
+void __init
 sn1_setup(char **cmdline_p)
 {
-
+	extern void init_sn1_smp_config(void);
 	ROOT_DEV = to_kdev_t(0x0301);		/* default to first IDE drive */
 
+	init_sn1_smp_config();
+#ifdef ZZZ
 #if !defined (CONFIG_IA64_SOFTSDV_HACKS)
-	/* 
-	 * Program the timer to deliver timer ticks.  0x40 is the I/O port
-	 * address of PIT counter 0, 0x43 is the I/O port address of the 
-	 * PIT control word. 
-	 */
-	request_region(0x40,0x20,"timer");
-	outb(0x34, 0x43);            /* Control word */
-	outb(LATCH & 0xff , 0x40);   /* LSB */
-	outb(LATCH >> 8, 0x40);	     /* MSB */
-	printk("PIT: LATCH at 0x%x%x for %d HZ\n", LATCH >> 8, LATCH & 0xff, HZ);
+        /*
+         * Program the timer to deliver timer ticks.  0x40 is the I/O port
+         * address of PIT counter 0, 0x43 is the I/O port address of the
+         * PIT control word.
+         */
+        request_region(0x40,0x20,"timer");
+        outb(0x34, 0x43);            /* Control word */
+        outb(LATCH & 0xff , 0x40);   /* LSB */
+        outb(LATCH >> 8, 0x40);      /* MSB */
+        printk("PIT: LATCH at 0x%x%x for %d HZ\n", LATCH >> 8, LATCH & 0xff, HZ);
+#endif
 #endif
 #ifdef CONFIG_SMP
 	init_smp_config();
 #endif
 	screen_info = sn1_screen_info;
 }
+
+int
+IS_RUNNING_ON_SIMULATOR(void)
+{
+#ifdef CONFIG_IA64_SGI_SN1_SIM
+	long sn;
+	asm("mov %0=cpuid[%1]" : "=r"(sn) : "r"(2));
+	return(sn == SNMAGIC);
+#else
+	return(0);
+#endif
+}
diff --git a/arch/ia64/sn/sn1/smp.c b/arch/ia64/sn/sn1/smp.c
new file mode 100644
index 000000000..a1e26a549
--- /dev/null
+++ b/arch/ia64/sn/sn1/smp.c
@@ -0,0 +1,186 @@
+/*
+ * SN1 Platform specific SMP Support
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 Jack Steiner <steiner@sgi.com>
+ */
+
+
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/sn/mmzone_sn1.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/current.h>
+#include <asm/sn/sn_cpuid.h>
+
+
+
+
+/*
+ * The following structure is used to pass params thru smp_call_function
+ * to other cpus for flushing TLB ranges.
+ */
+typedef struct {
+	unsigned long	start;
+	unsigned long	end;
+	unsigned long	nbits;
+} ptc_params_t;
+
+
+/*
+ * The following table/struct is for remembering PTC coherency domains. It
+ * is also used to translate sapicid into cpuids. We dont want to start 
+ * cpus unless we know their cache domain.
+ */
+#ifdef PTC_NOTYET
+sn_sapicid_info_t	sn_sapicid_info[NR_CPUS];
+#endif
+
+
+
+#ifdef PTC_NOTYET
+/*
+ * NOTE: This is probably not good enough, but I dont want to try to make
+ * it better until I get some statistics on a running system. 
+ * At a minimum, we should only send IPIs to 1 processor in each TLB domain
+ * & have it issue a ptc.g on it's own FSB. Also, serialize per FSB, not 
+ * globally.
+ *
+ * More likely, we will have to do some work to reduce the frequency of calls to
+ * this routine.
+ */
+
+static void
+sn1_ptc_local(void *arg)
+{
+	ptc_params_t	*params = arg;
+	unsigned long	start, end, nbits;
+
+	start = params->start;
+	end = params->end;
+	nbits = params->nbits;
+
+	do {
+		__asm__ __volatile__ ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory");
+		start += (1UL << nbits);
+	} while (start < end);
+}
+
+
+void
+sn1_ptc_global (unsigned long start, unsigned long end, unsigned long nbits)
+{
+	ptc_params_t	params;
+
+	params.start = start;
+	params.end = end;
+	params.nbits = nbits;
+
+	if (smp_call_function(sn1_ptc_local, &params, 1, 0) != 0)
+		panic("Unable to do ptc_global - timed out");
+
+	sn1_ptc_local(&params);
+}
+#endif
+
+
+
+
+void
+sn1_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
+{
+	long		*p, nasid, slice;
+	static int 	off[4] = {0x1800080, 0x1800088, 0x1a00080, 0x1a00088};
+
+	/*
+	 * ZZZ - Replace with standard macros when available.
+	 */
+	nasid = cpuid_to_nasid(cpuid);
+	slice = cpuid_to_slice(cpuid);
+	p = (long*)(0xc0000a0000000000LL | (nasid<<33) | off[slice]);
+
+#if defined(ZZZBRINGUP)
+	{
+	static int count=0;
+	if (count++ < 10) printk("ZZ sendIPI 0x%x->0x%x, vec %d, nasid 0x%lx, slice %ld, adr 0x%lx\n",
+		smp_processor_id(), cpuid, vector, nasid, slice, (long)p);
+	}
+#endif
+	mb();
+	*p = (delivery_mode << 8) | (vector & 0xff);
+	
+}
+
+
+#ifdef CONFIG_SMP
+
+static void __init
+process_sal_ptc_domain_info(ia64_sal_ptc_domain_info_t *di, int domain)
+{
+#ifdef PTC_NOTYET
+	ia64_sal_ptc_domain_proc_entry_t	*pe;
+	int 					i, sapicid, cpuid;
+
+	pe = __va(di->proc_list);
+	for (i=0; i<di->proc_count; i++, pe++) {
+		sapicid = id_eid_to_sapicid(pe->id, pe->eid);
+		cpuid = cpu_logical_id(sapicid);
+		sn_sapicid_info[cpuid].domain = domain;
+		sn_sapicid_info[cpuid].sapicid = sapicid;
+	}
+#endif
+}
+
+
+static void __init
+process_sal_desc_ptc(ia64_sal_desc_ptc_t *ptc)
+{
+	ia64_sal_ptc_domain_info_t	*di;
+	int i;
+
+	di = __va(ptc->domain_info);
+	for (i=0; i<ptc->num_domains; i++, di++) {
+		process_sal_ptc_domain_info(di, i);	
+	}
+}
+
+
+void __init
+init_sn1_smp_config(void)
+{
+
+	if (!ia64_ptc_domain_info)  {
+		printk("SMP: Can't find PTC domain info. Forcing UP mode\n");
+		smp_num_cpus = 1;
+		return;
+	}
+
+#ifdef PTC_NOTYET
+	memset (sn_sapicid_info, -1, sizeof(sn_sapicid_info));
+	process_sal_desc_ptc(ia64_ptc_domain_info);
+#endif
+
+}
+
+#else /* CONFIG_SMP */
+
+void __init
+init_sn1_smp_config(void)
+{
+
+#ifdef PTC_NOTYET
+	sn_sapicid_info[0].sapicid = hard_processor_sapicid();
+#endif
+}
+
+#endif /* CONFIG_SMP */
diff --git a/arch/ia64/sn/sn1/sn1_asm.S b/arch/ia64/sn/sn1/sn1_asm.S
new file mode 100644
index 000000000..3419d9374
--- /dev/null
+++ b/arch/ia64/sn/sn1/sn1_asm.S
@@ -0,0 +1,6 @@
+
+/*
+ * Copyright (C) 2000 Silicon Graphics
+ * Copyright (C) 2000 Jack Steiner (steiner@sgi.com)
+ */
+
diff --git a/arch/ia64/sn/sn1/synergy.c b/arch/ia64/sn/sn1/synergy.c
new file mode 100644
index 000000000..76b583c73
--- /dev/null
+++ b/arch/ia64/sn/sn1/synergy.c
@@ -0,0 +1,204 @@
+
+/*
+ * SN1 Platform specific synergy Support
+ *
+ * Copyright (C) 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 Alan Mayer (ajm@sgi.com)
+ */
+
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/ptrace.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm/smp.h>
+#include <asm/sn/sn_cpuid.h>
+#include <asm/sn/sn1/bedrock.h>
+#include <asm/sn/intr.h>
+#include <asm/sn/addrs.h>
+#include <asm/sn/synergy.h>
+
+int bit_pos_to_irq(int bit);
+void setclear_mask_b(int irq, int cpuid, int set);
+void setclear_mask_a(int irq, int cpuid, int set);
+void * kmalloc(size_t size, int flags);
+
+extern struct sn1_cnode_action_list *sn1_node_actions[];
+
+
+void
+synergy_intr_alloc(int bit, int cpuid) {
+	return;
+}
+
+int
+synergy_intr_connect(int bit, 
+		int cpuid)
+{
+	int irq;
+	unsigned is_b;
+int nasid;
+
+nasid = cpuid_to_nasid(cpuid);
+	irq = bit_pos_to_irq(bit);
+
+	is_b = (cpuid_to_slice(cpuid)) & 1;
+	if (is_b) {
+		setclear_mask_b(irq,cpuid,1);
+		setclear_mask_a(irq,cpuid, 0);
+	} else {
+		setclear_mask_a(irq, cpuid, 1);
+		setclear_mask_b(irq, cpuid, 0);
+	}
+	return 0;
+}
+void
+setclear_mask_a(int irq, int cpuid, int set)
+{
+	int synergy;
+	int nasid;
+	int reg_num;
+	unsigned long mask;
+	unsigned long addr;
+	unsigned long reg;
+	unsigned long val;
+	int my_cnode, my_synergy;
+	int target_cnode, target_synergy;
+
+        /*
+         * Perform some idiot checks ..
+         */
+        if ( (irq < 0) || (irq > 255) ||
+                (cpuid < 0) || (cpuid > 512) ) {
+                printk("clear_mask_a: Invalid parameter irq %d cpuid %d\n", irq, cpuid);
+		return;
+	}
+
+	target_cnode = cpuid_to_cnodeid(cpuid);
+	target_synergy = cpuid_to_synergy(cpuid);
+	my_cnode = cpuid_to_cnodeid(smp_processor_id());
+	my_synergy = cpuid_to_synergy(smp_processor_id());
+
+	reg_num = irq / 64;
+	mask = 1;
+	mask <<= (irq % 64);
+	switch (reg_num) {
+		case 0: 
+			reg = VEC_MASK0A;
+			addr = VEC_MASK0A_ADDR;
+			break;
+		case 1: 
+			reg = VEC_MASK1A;
+			addr = VEC_MASK1A_ADDR;
+			break;
+		case 2: 
+			reg = VEC_MASK2A;
+			addr = VEC_MASK2A_ADDR;
+			break;
+		case 3: 
+			reg = VEC_MASK3A;
+			addr = VEC_MASK3A_ADDR;
+			break;
+		default:
+			reg = addr = 0;
+			break;
+	}
+	if (my_cnode == target_cnode && my_synergy == target_synergy) {
+		// local synergy
+		val = READ_LOCAL_SYNERGY_REG(addr);
+		if (set) {
+			val |= mask;
+		} else {
+			val &= ~mask;
+		}
+		WRITE_LOCAL_SYNERGY_REG(addr, val);
+		val = READ_LOCAL_SYNERGY_REG(addr);
+	} else { /* remote synergy */
+		synergy = cpuid_to_synergy(cpuid);
+		nasid = cpuid_to_nasid(cpuid);
+		val = REMOTE_SYNERGY_LOAD(nasid, synergy, reg);
+		if (set) {
+			val |= mask;
+		} else {
+			val &= ~mask;
+		}
+		REMOTE_SYNERGY_STORE(nasid, synergy, reg, val);
+	}
+}
+
+void
+setclear_mask_b(int irq, int cpuid, int set)
+{
+	int synergy;
+	int nasid;
+	int reg_num;
+	unsigned long mask;
+	unsigned long addr;
+	unsigned long reg;
+	unsigned long val;
+	int my_cnode, my_synergy;
+	int target_cnode, target_synergy;
+
+	/*
+	 * Perform some idiot checks ..
+	 */
+	if ( (irq < 0) || (irq > 255) ||
+		(cpuid < 0) || (cpuid > 512) ) {
+		printk("clear_mask_b: Invalid parameter irq %d cpuid %d\n", irq, cpuid);
+		return;
+	}
+
+	target_cnode = cpuid_to_cnodeid(cpuid);
+	target_synergy = cpuid_to_synergy(cpuid);
+	my_cnode = cpuid_to_cnodeid(smp_processor_id());
+	my_synergy = cpuid_to_synergy(smp_processor_id());
+
+	reg_num = irq / 64;
+	mask = 1;
+	mask <<= (irq % 64);
+	switch (reg_num) {
+		case 0: 
+			reg = VEC_MASK0B;
+			addr = VEC_MASK0B_ADDR;
+			break;
+		case 1: 
+			reg = VEC_MASK1B;
+			addr = VEC_MASK1B_ADDR;
+			break;
+		case 2: 
+			reg = VEC_MASK2B;
+			addr = VEC_MASK2B_ADDR;
+			break;
+		case 3: 
+			reg = VEC_MASK3B;
+			addr = VEC_MASK3B_ADDR;
+			break;
+		default:
+			reg = addr = 0;
+			break;
+	}
+	if (my_cnode == target_cnode && my_synergy == target_synergy) {
+		// local synergy
+		val = READ_LOCAL_SYNERGY_REG(addr);
+		if (set) {
+			val |= mask;
+		} else {
+			val &= ~mask;
+		}
+		WRITE_LOCAL_SYNERGY_REG(addr, val);
+		val = READ_LOCAL_SYNERGY_REG(addr);
+	} else { /* remote synergy */
+		synergy = cpuid_to_synergy(cpuid);
+		nasid = cpuid_to_nasid(cpuid);
+		val = REMOTE_SYNERGY_LOAD(nasid, synergy, reg);
+		if (set) {
+			val |= mask;
+		} else {
+			val &= ~mask;
+		}
+		REMOTE_SYNERGY_STORE(nasid, synergy, reg, val);
+	}
+}
diff --git a/arch/ia64/sn/tools/make_textsym b/arch/ia64/sn/tools/make_textsym
new file mode 100644
index 000000000..0a0787f08
--- /dev/null
+++ b/arch/ia64/sn/tools/make_textsym
@@ -0,0 +1,138 @@
+#!/bin/sh
+# Build a textsym file for use in the Arium ITP probe.
+
+help() {
+cat <<END
+Build a WinDD "symtxt" file for use with the Arium ECM-30 probe.
+
+	Usage: $0 [<vmlinux file> [<output file>]]
+		If no input file is specified, it defaults to vmlinux.
+		If no output file name is specified, it defaults to "textsym".
+END
+exit 1
+}
+
+err () {
+	echo "ERROR - $*" >&2
+	exit 1
+}
+
+
+OPTS="H"
+while getopts "$OPTS" c ; do
+	case $c in
+		H)  help;;
+		\?) help;;
+	esac
+
+done
+shift `expr $OPTIND - 1`
+
+LINUX=${1:-vmlinux}
+TEXTSYM=${2:-${LINUX}.sym}
+
+[ -f $VMLINUX ] || help
+
+
+# pipe everything thru sort
+echo "TEXTSYM V1.0"
+(cat <<END 
+GLOBAL | e000000000500000 | CODE | VEC_VHPT_Translation_0000
+GLOBAL | e000000000500400 | CODE | VEC_ITLB_0400
+GLOBAL | e000000000500800 | CODE | VEC_DTLB_0800
+GLOBAL | e000000000500c00 | CODE | VEC_Alt_ITLB_0c00
+GLOBAL | e000000000501000 | CODE | VEC_Alt_DTLB_1000
+GLOBAL | e000000000501400 | CODE | VEC_Data_nested_TLB_1400
+GLOBAL | e000000000501800 | CODE | VEC_Instruction_Key_Miss_1800
+GLOBAL | e000000000501c00 | CODE | VEC_Data_Key_Miss_1c00
+GLOBAL | e000000000502000 | CODE | VEC_Dirty-bit_2000
+GLOBAL | e000000000502400 | CODE | VEC_Instruction_Access-bit_2400
+GLOBAL | e000000000502800 | CODE | VEC_Data_Access-bit_2800
+GLOBAL | e000000000502c00 | CODE | VEC_Break_instruction_2c00
+GLOBAL | e000000000503000 | CODE | VEC_External_Interrupt_3000
+GLOBAL | e000000000503400 | CODE | VEC_Reserved_3400
+GLOBAL | e000000000503800 | CODE | VEC_Reserved_3800
+GLOBAL | e000000000503c00 | CODE | VEC_Reserved_3c00
+GLOBAL | e000000000504000 | CODE | VEC_Reserved_4000
+GLOBAL | e000000000504400 | CODE | VEC_Reserved_4400
+GLOBAL | e000000000504800 | CODE | VEC_Reserved_4800
+GLOBAL | e000000000504c00 | CODE | VEC_Reserved_4c00
+GLOBAL | e000000000505000 | CODE | VEC_Page_Not_Present_5000
+GLOBAL | e000000000505100 | CODE | VEC_Key_Permission_5100
+GLOBAL | e000000000505200 | CODE | VEC_Instruction_Access_Rights_5200
+GLOBAL | e000000000505300 | CODE | VEC_Data_Access_Rights_5300
+GLOBAL | e000000000505400 | CODE | VEC_General_Exception_5400
+GLOBAL | e000000000505500 | CODE | VEC_Disabled_FP-Register_5500
+GLOBAL | e000000000505600 | CODE | VEC_Nat_Consumption_5600
+GLOBAL | e000000000505700 | CODE | VEC_Speculation_5700
+GLOBAL | e000000000505800 | CODE | VEC_Reserved_5800
+GLOBAL | e000000000505900 | CODE | VEC_Debug_5900
+GLOBAL | e000000000505a00 | CODE | VEC_Unaligned_Reference_5a00
+GLOBAL | e000000000505b00 | CODE | VEC_Unsupported_Data_Reference_5b00
+GLOBAL | e000000000505c00 | CODE | VEC_Floating-Point_Fault_5c00
+GLOBAL | e000000000505d00 | CODE | VEC_Floating_Point_Trap_5d00
+GLOBAL | e000000000505e00 | CODE | VEC_Lower_Privilege_Tranfer_Trap_5e00
+GLOBAL | e000000000505f00 | CODE | VEC_Taken_Branch_Trap_5f00
+GLOBAL | e000000000506000 | CODE | VEC_Single_Step_Trap_6000
+GLOBAL | e000000000506100 | CODE | VEC_Reserved_6100
+GLOBAL | e000000000506200 | CODE | VEC_Reserved_6200
+GLOBAL | e000000000506300 | CODE | VEC_Reserved_6300
+GLOBAL | e000000000506400 | CODE | VEC_Reserved_6400
+GLOBAL | e000000000506500 | CODE | VEC_Reserved_6500
+GLOBAL | e000000000506600 | CODE | VEC_Reserved_6600
+GLOBAL | e000000000506700 | CODE | VEC_Reserved_6700
+GLOBAL | e000000000506800 | CODE | VEC_Reserved_6800
+GLOBAL | e000000000506900 | CODE | VEC_IA-32_Exeception_6900
+GLOBAL | e000000000506a00 | CODE | VEC_IA-32_Intercept_6a00
+GLOBAL | e000000000506b00 | CODE | VEC_IA-32_Interrupt_6b00
+GLOBAL | e000000000506c00 | CODE | VEC_Reserved_6c00
+GLOBAL | e000000000506d00 | CODE | VEC_Reserved_6d00
+GLOBAL | e000000000506e00 | CODE | VEC_Reserved_6e00
+GLOBAL | e000000000506f00 | CODE | VEC_Reserved_6f00
+GLOBAL | e000000000507000 | CODE | VEC_Reserved_7000
+GLOBAL | e000000000507100 | CODE | VEC_Reserved_7100
+GLOBAL | e000000000507200 | CODE | VEC_Reserved_7200
+GLOBAL | e000000000507300 | CODE | VEC_Reserved_7300
+GLOBAL | e000000000507400 | CODE | VEC_Reserved_7400
+GLOBAL | e000000000507500 | CODE | VEC_Reserved_7500
+GLOBAL | e000000000507600 | CODE | VEC_Reserved_7600
+GLOBAL | e000000000507700 | CODE | VEC_Reserved_7700
+GLOBAL | e000000000507800 | CODE | VEC_Reserved_7800
+GLOBAL | e000000000507900 | CODE | VEC_Reserved_7900
+GLOBAL | e000000000507a00 | CODE | VEC_Reserved_7a00
+GLOBAL | e000000000507b00 | CODE | VEC_Reserved_7b00
+GLOBAL | e000000000507c00 | CODE | VEC_Reserved_7c00
+GLOBAL | e000000000507d00 | CODE | VEC_Reserved_7d00
+GLOBAL | e000000000507e00 | CODE | VEC_Reserved_7e00
+GLOBAL | e000000000507f00 | CODE | VEC_Reserved_7f00
+END
+
+$OBJDUMP -t $LINUX | sort | awk '
+/empty_zero_page/ {start=1}
+/e0000000/ {
+	if ($4 == ".kdb")
+		next
+	if (start && substr($NF,1,1) != "0") {
+		type = substr($0,26,5)
+		if (type == ".text")
+			printf "GLOBAL | %s | CODE | %s\n", $1, $NF
+		else
+			printf "GLOBAL | %s | DATA | %s | %d\n", $1, $NF, $(NF-1)
+	}
+	if($NF == "_end") 
+		exit
+
+}
+' ) | egrep -v " __device| __vendor" | awk '
+/GLOBAL/ {
+	print $0
+	print substr($0,1,9) substr($0,18,18) "Phy_" substr($0,36)
+
+} ' | sort -k3
+
+
+
+N=`wc -l $TEXTSYM|awk '{print $1}'`
+echo "Generated TEXTSYM file" >&2
+echo "  $LINUX --> $TEXTSYM" >&2
+echo "  Found $N symbols" >&2
diff --git a/arch/ia64/tools/print_offsets.c b/arch/ia64/tools/print_offsets.c
index 3a7259b09..3765e09c6 100644
--- a/arch/ia64/tools/print_offsets.c
+++ b/arch/ia64/tools/print_offsets.c
@@ -149,7 +149,7 @@ tab[] =
     { "IA64_SWITCH_STACK_AR_UNAT_OFFSET",	offsetof (struct switch_stack, ar_unat) },
     { "IA64_SWITCH_STACK_AR_RNAT_OFFSET",	offsetof (struct switch_stack, ar_rnat) },
     { "IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET",	offsetof (struct switch_stack, ar_bspstore) },
-    { "IA64_SWITCH_STACK_PR_OFFSET",	offsetof (struct switch_stack, b0) },
+    { "IA64_SWITCH_STACK_PR_OFFSET",	offsetof (struct switch_stack, pr) },
     { "IA64_SIGCONTEXT_AR_BSP_OFFSET",	offsetof (struct sigcontext, sc_ar_bsp) },
     { "IA64_SIGCONTEXT_AR_RNAT_OFFSET",	offsetof (struct sigcontext, sc_ar_rnat) },
     { "IA64_SIGCONTEXT_FLAGS_OFFSET",	offsetof (struct sigcontext, sc_flags) },
author	Ralf Baechle <ralf@linux-mips.org>	2001-01-11 04:02:40 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	2001-01-11 04:02:40 +0000
commit	e47f00743fc4776491344f2c618cc8dc2c23bcbc (patch)
tree	13e03a113a82a184c51c19c209867cfd3a59b3b9 /arch/ia64
parent	b2ad5f821b1381492d792ca10b1eb7a107b48f14 (diff)