Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c

driver due to the Origin A64 hacks.
author: Ralf Baechle <ralf@linux-mips.org> 2000-02-23 00:40:54 +0000
committer: Ralf Baechle <ralf@linux-mips.org> 2000-02-23 00:40:54 +0000
commit: 529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree: 78f1c0b805f5656aa7b0417a043c5346f700a2cf /arch/ia64/kernel
parent: 0bd079751d25808d1972baee5c4eaa1db2227257 (diff)
35 files changed, 13909 insertions, 0 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
new file mode 100644
index 000000000..7cb47da72
--- /dev/null
+++ b/arch/ia64/kernel/Makefile
@@ -0,0 +1,42 @@
+#
+# Makefile for the linux kernel.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.S.s:
+	$(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $<
+.S.o:
+	$(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $<
+
+all: kernel.o head.o init_task.o
+
+O_TARGET := kernel.o
+O_OBJS	 := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_default.o irq_internal.o ivt.o \
+	    pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o sal_stub.o semaphore.o setup.o signal.o \
+	    sys_ia64.o traps.o time.o unaligned.o unwind.o
+#O_OBJS   := fpreg.o
+#OX_OBJS  := ia64_ksyms.o
+
+ifeq ($(CONFIG_IA64_GENERIC),y)
+O_OBJS	+= machvec.o
+endif
+
+ifdef CONFIG_PCI
+O_OBJS	+= pci.o
+endif
+
+ifdef CONFIG_SMP
+O_OBJS	+= smp.o irq_lock.o
+endif
+
+ifeq ($(CONFIG_MCA),y)
+O_OBJS	+= mca.o mca_asm.o
+endif
+
+clean::
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
new file mode 100644
index 000000000..e289efab6
--- /dev/null
+++ b/arch/ia64/kernel/acpi.c
@@ -0,0 +1,308 @@
+/*
+ * Advanced Configuration and Power Interface 
+ *
+ * Based on 'ACPI Specification 1.0b' February 2, 1999 and 
+ * 'IA-64 Extensions to ACPI Specification' Revision 0.6
+ * 
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/config.h>
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/acpi-ext.h>
+#include <asm/page.h>
+#include <asm/efi.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/irq.h>
+
+#undef ACPI_DEBUG		/* Guess what this does? */
+
+#ifdef CONFIG_SMP
+extern unsigned long ipi_base_addr;
+#endif
+
+/* These are ugly but will be reclaimed by the kernel */
+int __initdata acpi_cpus = 0;	
+int __initdata acpi_apic_map[32];
+int __initdata cpu_cnt = 0;
+
+void (*pm_idle) (void);
+
+/*
+ * Identify usable CPU's and remember them for SMP bringup later.
+ */
+static void __init
+acpi_lsapic(char *p) 
+{
+	int add = 1;
+
+	acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p;
+
+	if ((lsapic->flags & LSAPIC_PRESENT) == 0) 
+		return;
+
+	printk("      CPU %d (%.04x:%.04x): ", cpu_cnt, lsapic->eid, lsapic->id);
+
+	if ((lsapic->flags & LSAPIC_ENABLED) == 0) {
+		printk("Disabled.\n");
+		add = 0;
+	} else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) {
+		printk("Performance Restricted; ignoring.\n");
+		add = 0;
+	}
+	
+	if (add) {
+		printk("Available.\n");
+		acpi_cpus++;
+		acpi_apic_map[cpu_cnt] = (lsapic->id << 8) | lsapic->eid;
+	}
+	
+	cpu_cnt++;
+}
+
+/*
+ * Find all IOSAPICs and tag the iosapic_vector structure with the appropriate 
+ * base addresses.
+ */
+static void __init
+acpi_iosapic(char *p) 
+{
+	/*
+	 * This is not good.  ACPI is not necessarily limited to CONFIG_IA64_SV, yet
+	 * ACPI does not necessarily imply IOSAPIC either.  Perhaps there should be
+	 * a means for platform_setup() to register ACPI handlers?
+	 */
+#ifdef CONFIG_IA64_DIG
+	acpi_entry_iosapic_t *iosapic = (acpi_entry_iosapic_t *) p;
+	unsigned int ver;
+	int l, v, pins;
+
+	ver = iosapic_version(iosapic->address);
+	pins = (ver >> 16) & 0xff;
+	
+	printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", 
+	       (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, 
+	       iosapic->irq_base, iosapic->irq_base + pins);
+	
+	for (l = 0; l < pins; l++) {
+		v = map_legacy_irq(iosapic->irq_base + l);
+		if (v > IA64_MAX_VECTORED_IRQ) {
+			printk("    !!! IRQ %d > 255\n", v);
+			continue;
+		}
+		/* XXX Check for IOSAPIC collisions */
+		iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
+		iosapic_baseirq(v) = iosapic->irq_base;
+	}
+	iosapic_init(iosapic->address);
+#endif
+}
+
+
+/*
+ * Configure legacy IRQ information in iosapic_vector
+ */
+static void __init
+acpi_legacy_irq(char *p)
+{
+	/*
+	 * This is not good.  ACPI is not necessarily limited to CONFIG_IA64_SV, yet
+	 * ACPI does not necessarily imply IOSAPIC either.  Perhaps there should be
+	 * a means for platform_setup() to register ACPI handlers?
+	 */
+#ifdef CONFIG_IA64_IRQ_ACPI
+	acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p;
+	unsigned char vector; 
+	int i;
+
+	vector = map_legacy_irq(legacy->isa_irq);
+
+	/*
+	 * Clobber any old pin mapping.  It may be that it gets replaced later on
+	 */
+	for (i = 0; i < IA64_MAX_VECTORED_IRQ; i++) {
+		if (i == vector) 
+			continue;
+		if (iosapic_pin(i) == iosapic_pin(vector))
+			iosapic_pin(i) = 0xff;
+        }
+
+	iosapic_pin(vector) = legacy->pin;
+	iosapic_bus(vector) = BUS_ISA;	/* This table only overrides the ISA devices */
+	iosapic_busdata(vector) = 0;
+	
+	/* 
+	 * External timer tick is special... 
+	 */
+	if (vector != TIMER_IRQ)
+		iosapic_dmode(vector) = IO_SAPIC_LOWEST_PRIORITY;
+	else 
+		iosapic_dmode(vector) = IO_SAPIC_FIXED;
+	
+	/* See MPS 1.4 section 4.3.4 */
+	switch (legacy->flags) {
+	case 0x5:
+		iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
+		iosapic_trigger(vector) = IO_SAPIC_EDGE;
+		break;
+	case 0x8:
+		iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
+		iosapic_trigger(vector) = IO_SAPIC_EDGE;
+		break;
+	case 0xd:
+		iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
+		iosapic_trigger(vector) = IO_SAPIC_LEVEL;
+		break;
+	case 0xf:
+		iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
+		iosapic_trigger(vector) = IO_SAPIC_LEVEL;
+		break;
+	default:
+		printk("    ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq,
+		       legacy->flags);
+		break;
+	}
+
+#ifdef ACPI_DEBUG
+	printk("Legacy ISA IRQ %x -> IA64 Vector %x IOSAPIC Pin %x Active %s %s Trigger\n", 
+	       legacy->isa_irq, vector, iosapic_pin(vector), 
+	       ((iosapic_polarity(vector) == IO_SAPIC_POL_LOW) ? "Low" : "High"),
+	       ((iosapic_trigger(vector) == IO_SAPIC_LEVEL) ? "Level" : "Edge"));
+#endif /* ACPI_DEBUG */
+
+#endif /* CONFIG_IA64_IRQ_ACPI */
+}
+
+/*
+ * Info on platform interrupt sources: NMI. PMI, INIT, etc.
+ */
+static void __init
+acpi_platform(char *p)
+{
+	acpi_entry_platform_src_t *plat = (acpi_entry_platform_src_t *) p;
+
+	printk("PLATFORM: IOSAPIC %x -> Vector %lx on CPU %.04u:%.04u\n",
+	       plat->iosapic_vector, plat->global_vector, plat->eid, plat->id);
+}
+
+/*
+ * Parse the ACPI Multiple SAPIC Table
+ */
+static void __init
+acpi_parse_msapic(acpi_sapic_t *msapic)
+{
+	char *p, *end;
+
+	memset(&acpi_apic_map, -1, sizeof(acpi_apic_map));
+
+#ifdef CONFIG_SMP
+	/* Base address of IPI Message Block */
+	ipi_base_addr = ioremap(msapic->interrupt_block, 0);
+#endif
+	
+	p = (char *) (msapic + 1);
+	end = p + (msapic->header.length - sizeof(acpi_sapic_t));
+
+	while (p < end) {
+		
+		switch (*p) {
+		case ACPI_ENTRY_LOCAL_SAPIC:
+			acpi_lsapic(p);
+			break;
+	
+		case ACPI_ENTRY_IO_SAPIC:
+			acpi_iosapic(p);
+			break;
+
+		case ACPI_ENTRY_INT_SRC_OVERRIDE:
+			acpi_legacy_irq(p);
+			break;
+		
+		case ACPI_ENTRY_PLATFORM_INT_SOURCE:
+			acpi_platform(p);
+			break;
+		
+		default:
+			break;
+		}
+
+		/* Move to next table entry. */
+		p += *(p + 1);
+	}
+
+	/* Make bootup pretty */
+	printk("      %d CPUs available, %d CPUs total\n", acpi_cpus, cpu_cnt);
+}
+
+int __init 
+acpi_parse(acpi_rsdp_t *rsdp)
+{
+	acpi_rsdt_t *rsdt;
+	acpi_desc_table_hdr_t *hdrp;
+	long tables, i;
+
+	if (!rsdp) {
+		printk("Uh-oh, no ACPI Root System Description Pointer table!\n");
+		return 0;
+	}
+
+	if (strncmp(rsdp->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) {
+		printk("Uh-oh, ACPI RSDP signature incorrect!\n");
+		return 0;
+	}
+
+	rsdp->rsdt = __va(rsdp->rsdt);
+	rsdt = rsdp->rsdt;
+	if (strncmp(rsdt->header.signature, ACPI_RSDT_SIG, ACPI_RSDT_SIG_LEN)) {
+		printk("Uh-oh, ACPI RDST signature incorrect!\n");
+		return 0;
+	}
+
+	printk("ACPI: %.6s %.8s %d.%d\n", rsdt->header.oem_id, rsdt->header.oem_table_id, 
+	       rsdt->header.oem_revision >> 16, rsdt->header.oem_revision & 0xffff);
+	
+	tables = (rsdt->header.length - sizeof(acpi_desc_table_hdr_t)) / 8;
+	for (i = 0; i < tables; i++) {
+		hdrp = (acpi_desc_table_hdr_t *) __va(rsdt->entry_ptrs[i]);
+
+		/* Only interested int the MSAPIC table for now ... */
+		if (strncmp(hdrp->signature, ACPI_SAPIC_SIG, ACPI_SAPIC_SIG_LEN) != 0)
+			continue;
+
+		acpi_parse_msapic((acpi_sapic_t *) hdrp);
+	} /* while() */
+
+	if (acpi_cpus == 0) {
+		printk("ACPI: Found 0 CPUS; assuming 1\n");
+		acpi_cpus = 1; /* We've got at least one of these, no? */
+	}
+	return 1;
+}
+
+const char *
+acpi_get_sysname (void)
+{       
+	/* the following should go away once we have an ACPI parser: */
+#ifdef CONFIG_IA64_GENERIC
+	return "hpsim";
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+	return "hpsim";
+# elif defined (CONFIG_IA64_SGI_SN1_SIM)
+	return "sn1";
+# elif defined (CONFIG_IA64_DIG)
+	return "dig";
+# else
+#	error Unknown platform.  Fix acpi.c.
+# endif
+#endif
+}        
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
new file mode 100644
index 000000000..dd7de2ab0
--- /dev/null
+++ b/arch/ia64/kernel/efi.c
@@ -0,0 +1,365 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Hewlett-Packard Co.
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version.  --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls.  --davidm
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/time.h>
+
+#include <asm/efi.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#define EFI_DEBUG
+
+extern efi_status_t efi_call_phys (void *, ...);
+
+struct efi efi;
+		    
+static efi_runtime_services_t *runtime;
+
+static efi_status_t
+phys_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+	return efi_call_phys(__va(runtime->get_time), __pa(tm), __pa(tc));
+}
+
+static efi_status_t
+phys_set_time (efi_time_t *tm)
+{
+	return efi_call_phys(__va(runtime->set_time), __pa(tm));
+}
+
+static efi_status_t
+phys_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm)
+{
+	return efi_call_phys(__va(runtime->get_wakeup_time), __pa(enabled), __pa(pending),
+			     __pa(tm));
+}
+
+static efi_status_t
+phys_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm)
+{
+	return efi_call_phys(__va(runtime->set_wakeup_time), enabled, __pa(tm));
+}
+
+static efi_status_t
+phys_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr,
+		   unsigned long *data_size, void *data)
+{
+	return efi_call_phys(__va(runtime->get_variable), __pa(name), __pa(vendor), __pa(attr),
+			     __pa(data_size), __pa(data));
+}
+
+static efi_status_t
+phys_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor)
+{
+	return efi_call_phys(__va(runtime->get_next_variable), __pa(name_size), __pa(name),
+			     __pa(vendor));
+}
+
+static efi_status_t
+phys_set_variable (efi_char16_t *name, efi_guid_t *vendor, u32 attr,
+		   unsigned long data_size, void *data)
+{
+	return efi_call_phys(__va(runtime->set_variable), __pa(name), __pa(vendor), attr,
+			     data_size, __pa(data));
+}
+
+static efi_status_t
+phys_get_next_high_mono_count (u64 *count)
+{
+	return efi_call_phys(__va(runtime->get_next_high_mono_count), __pa(count));
+}
+
+static void
+phys_reset_system (int reset_type, efi_status_t status,
+		   unsigned long data_size, efi_char16_t *data)
+{
+	efi_call_phys(__va(runtime->reset_system), status, data_size, __pa(data));
+}
+
+/*
+ * Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines were long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+static inline unsigned long 
+mktime (unsigned int year, unsigned int mon, unsigned int day, unsigned int hour,
+	unsigned int min, unsigned int sec)
+{
+        if (0 >= (int) (mon -= 2)) {    /* 1..12 -> 11,12,1..10 */
+                mon += 12;      /* Puts Feb last since it has leap day */
+                year -= 1;
+        }
+        return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)
+		  + year*365 - 719499
+		)*24 + hour /* now have hours */
+		)*60 + min /* now have minutes */
+		)*60 + sec; /* finally seconds */
+}
+
+void
+efi_gettimeofday (struct timeval *tv)
+{
+	efi_time_t tm;
+
+	memset(tv, 0, sizeof(tv));
+	if ((*efi.get_time)(&tm, 0) != EFI_SUCCESS)
+		return;
+
+	tv->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
+	tv->tv_usec = tm.nanosecond / 1000;
+}
+
+/*
+ * Walks the EFI memory map and calls CALLBACK once for each EFI
+ * memory descriptor that has memory that is available for OS use.
+ */
+void
+efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
+{
+	int prev_valid = 0;
+	struct range {
+		u64 start;
+		u64 end;
+	} prev, curr;
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	u64 efi_desc_size, start, end;
+
+	efi_map_start = __va(ia64_boot_param.efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param.efi_memmap_size;
+	efi_desc_size = ia64_boot_param.efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		switch (md->type) {
+		      case EFI_LOADER_CODE:
+		      case EFI_LOADER_DATA:
+		      case EFI_BOOT_SERVICES_CODE:
+		      case EFI_BOOT_SERVICES_DATA:
+		      case EFI_CONVENTIONAL_MEMORY:
+#ifndef CONFIG_IA64_VIRTUAL_MEM_MAP
+			if (md->phys_addr > 1024*1024*1024UL) {
+				printk("Warning: ignoring %luMB of memory above 1GB!\n",
+				       md->num_pages >> 8);
+				md->type = EFI_UNUSABLE_MEMORY;
+				continue;
+			}
+#endif
+
+			curr.start = PAGE_OFFSET + md->phys_addr;
+			curr.end   = curr.start + (md->num_pages << 12);
+
+			if (!prev_valid) {
+				prev = curr;
+				prev_valid = 1;
+			} else {
+				if (curr.start < prev.start)
+					printk("Oops: EFI memory table not ordered!\n");
+
+				if (prev.end == curr.start) {
+					/* merge two consecutive memory ranges */
+					prev.end = curr.end;
+				} else {
+					start = PAGE_ALIGN(prev.start);
+					end = prev.end & PAGE_MASK;
+					if ((end > start) && (*callback)(start, end, arg) < 0)
+						return;
+					prev = curr;
+				}
+			}
+			break;
+
+		      default:
+			continue;
+		}
+	}
+	if (prev_valid) {
+		start = PAGE_ALIGN(prev.start);
+		end = prev.end & PAGE_MASK;
+		if (end > start)
+			(*callback)(start, end, arg);
+	}
+}
+
+void __init 
+efi_init (void)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_config_table_t *config_tables;
+	efi_memory_desc_t *md;
+	efi_char16_t *c16;
+	u64 efi_desc_size;
+	char vendor[100] = "unknown";
+	int i;
+
+	efi.systab = __va(ia64_boot_param.efi_systab);
+
+	/*
+	 * Verify the EFI Table
+ 	 */
+	if (efi.systab == NULL) 
+		panic("Woah! Can't find EFI system table.\n");
+	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) 
+		panic("Woah! EFI system table signature incorrect\n");
+	if (efi.systab->hdr.revision != EFI_SYSTEM_TABLE_REVISION)
+		printk("Warning: EFI system table version mismatch: "
+		       "got %d.%02d, expected %d.%02d\n",
+		       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
+		       EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
+
+	config_tables = __va(efi.systab->tables);
+
+	/* Show what we know for posterity */
+	c16 = __va(efi.systab->fw_vendor);
+	if (c16) {
+		for (i = 0;i < sizeof(vendor) && *c16; ++i)
+			vendor[i] = *c16++;
+		vendor[i] = '\0';
+	}
+
+	printk("EFI v%u.%.02u by %s:",
+	       efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
+
+	for (i = 0; i < efi.systab->nr_tables; i++) {
+		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
+			efi.mps = __va(config_tables[i].table);
+			printk(" MPS=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
+			efi.acpi = __va(config_tables[i].table);
+			printk(" ACPI=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
+			efi.smbios = __va(config_tables[i].table);
+			printk(" SMBIOS=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
+			efi.sal_systab = __va(config_tables[i].table);
+			printk(" SALsystab=0x%lx", config_tables[i].table);
+		}
+	}
+	printk("\n");
+
+	runtime = __va(efi.systab->runtime);
+	efi.get_time = phys_get_time;
+	efi.set_time = phys_set_time;
+	efi.get_wakeup_time = phys_get_wakeup_time;
+	efi.set_wakeup_time = phys_set_wakeup_time;
+	efi.get_variable = phys_get_variable;
+	efi.get_next_variable = phys_get_next_variable;
+	efi.set_variable = phys_set_variable;
+	efi.get_next_high_mono_count = phys_get_next_high_mono_count;
+	efi.reset_system = phys_reset_system;
+
+	efi_map_start = __va(ia64_boot_param.efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param.efi_memmap_size;
+	efi_desc_size = ia64_boot_param.efi_memdesc_size;
+
+#ifdef EFI_DEBUG
+	/* print EFI memory map: */
+	for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
+		md = p;
+		printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
+		       i, md->type, md->attribute,
+		       md->phys_addr, md->phys_addr + (md->num_pages<<12) - 1, md->num_pages >> 8);
+	}
+#endif
+}
+
+void
+efi_enter_virtual_mode (void)
+{
+	void *efi_map_start, *efi_map_end, *p;
+	efi_memory_desc_t *md;
+	efi_status_t status;
+	u64 efi_desc_size;
+
+	efi_map_start = __va(ia64_boot_param.efi_memmap);
+	efi_map_end   = efi_map_start + ia64_boot_param.efi_memmap_size;
+	efi_desc_size = ia64_boot_param.efi_memdesc_size;
+
+	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+		md = p;
+		if (md->attribute & EFI_MEMORY_RUNTIME) {
+			/*
+			 * Some descriptors have multiple bits set, so the order of
+			 * the tests is relevant.
+			 */
+			if (md->attribute & EFI_MEMORY_WB) {
+				md->virt_addr = (u64) __va(md->phys_addr);
+			} else if (md->attribute & EFI_MEMORY_UC) {
+				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+			} else if (md->attribute & EFI_MEMORY_WC) {
+#if 0
+				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+									   | _PAGE_D
+									   | _PAGE_MA_WC
+									   | _PAGE_PL_0
+									   | _PAGE_AR_RW));
+#else
+				printk("EFI_MEMORY_WC mapping\n");
+				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+			} else if (md->attribute & EFI_MEMORY_WT) {
+#if 0
+				md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+									   | _PAGE_D | _PAGE_MA_WT
+									   | _PAGE_PL_0
+									   | _PAGE_AR_RW));
+#else
+				printk("EFI_MEMORY_WT mapping\n");
+				md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+			}
+		}
+	}
+
+	status = efi_call_phys(__va(runtime->set_virtual_address_map),
+			       ia64_boot_param.efi_memmap_size,
+			       efi_desc_size, ia64_boot_param.efi_memdesc_version,
+			       ia64_boot_param.efi_memmap);
+	if (status != EFI_SUCCESS) {
+		printk("Warning: unable to switch EFI into virtual mode (status=%lu)\n", status);
+		return;
+	}
+
+	/*
+	 * Now that EFI is in virtual mode, we arrange for EFI functions to be
+	 * called directly:
+	 */
+	efi.get_time = __va(runtime->get_time);
+	efi.set_time = __va(runtime->set_time);
+	efi.get_wakeup_time = __va(runtime->get_wakeup_time);
+	efi.set_wakeup_time = __va(runtime->set_wakeup_time);
+	efi.get_variable = __va(runtime->get_variable);
+	efi.get_next_variable = __va(runtime->get_next_variable);
+	efi.set_variable = __va(runtime->set_variable);
+	efi.get_next_high_mono_count = __va(runtime->get_next_high_mono_count);
+	efi.reset_system = __va(runtime->reset_system);
+}
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
new file mode 100644
index 000000000..4e6f1fc63
--- /dev/null
+++ b/arch/ia64/kernel/efi_stub.S
@@ -0,0 +1,141 @@
+/*
+ * EFI call stub.
+ *
+ * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com>
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off.  We need this because we can't call SetVirtualMap() until
+ * the kernel has booted far enough to allow allocation of struct vma_struct
+ * entries (which we would need to map stuff with memory attributes other
+ * than uncached or writeback...).  Since the GetTime() service gets called
+ * earlier than that, we need to be able to make physical mode EFI calls from
+ * the kernel.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e).  Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared).  Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR						\
+	(IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT |		\
+	 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED |	\
+	 IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET							\
+	(IA64_PSR_BN)
+
+#include <asm/processor.h>
+
+	.text
+	.psr abi64
+	.psr lsb
+	.lsb
+
+	.text
+
+/*
+ * Switch execution mode from virtual to physical or vice versa.
+ *
+ * Inputs:
+ *	r16 = new psr to establish
+ */
+	.proc switch_mode
+switch_mode:
+ {
+	alloc r2=ar.pfs,0,0,0,0
+	rsm psr.i | psr.ic		// disable interrupts and interrupt collection
+	mov r15=ip
+ }
+	;;
+ {
+	flushrs				// must be first insn in group
+	srlz.i
+	shr.u r19=r15,61		// r19 <- top 3 bits of current IP
+ }
+	;;
+	mov cr.ipsr=r16			// set new PSR
+	add r3=1f-switch_mode,r15
+	xor r15=0x7,r19			// flip the region bits
+
+	mov r17=ar.bsp
+	mov r14=rp			// get return address into a general register
+
+	// switch RSE backing store:
+	;;
+	dep r17=r15,r17,61,3		// make ar.bsp physical or virtual
+	mov r18=ar.rnat			// save ar.rnat
+	;;
+	mov ar.bspstore=r17		// this steps on ar.rnat
+	dep r3=r15,r3,61,3		// make rfi return address physical or virtual
+	;;
+	mov cr.iip=r3
+	mov cr.ifs=r0
+	dep sp=r15,sp,61,3		// make stack pointer physical or virtual
+	;;
+	mov ar.rnat=r18			// restore ar.rnat
+	dep r14=r15,r14,61,3		// make function return address physical or virtual
+	rfi				// must be last insn in group
+	;;
+1:	mov rp=r14
+	br.ret.sptk.few rp
+	.endp switch_mode
+
+/*
+ * Inputs:
+ *	in0 = address of function descriptor of EFI routine to call
+ *	in1..in7 = arguments to routine
+ *
+ * Outputs:
+ *	r8 = EFI_STATUS returned by called function
+ */
+
+	.global efi_call_phys
+	.proc efi_call_phys
+efi_call_phys:
+
+	alloc loc0=ar.pfs,8,5,7,0
+	ld8 r2=[in0],8			// load EFI function's entry point
+	mov loc1=rp
+	;;
+	mov loc2=gp			// save global pointer
+	mov loc4=ar.rsc			// save RSE configuration
+	mov ar.rsc=r0			// put RSE in enforced lazy, LE mode
+	;;
+
+	ld8 gp=[in0]			// load EFI function's global pointer
+	mov out0=in1
+	mov out1=in2
+	movl r16=PSR_BITS_TO_CLEAR
+
+	mov loc3=psr			// save processor status word
+	movl r17=PSR_BITS_TO_SET
+	;;
+	mov out2=in3
+	or loc3=loc3,r17
+	mov b6=r2
+	;;
+	andcm r16=loc3,r16		// get psr with IT, DT, and RT bits cleared
+	mov out3=in4
+	br.call.sptk.few rp=switch_mode
+.ret0:
+	mov out4=in5
+	mov out5=in6
+	mov out6=in7
+	br.call.sptk.few rp=b6		// call the EFI function
+.ret1:
+	mov ar.rsc=r0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3
+	br.call.sptk.few rp=switch_mode	// return to virtual mode
+.ret2:
+	mov ar.rsc=loc4			// restore RSE configuration
+	mov ar.pfs=loc0
+	mov rp=loc1
+	mov gp=loc2
+	br.ret.sptk.few rp
+	
+	.endp efi_call_phys
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
new file mode 100644
index 000000000..87e77c677
--- /dev/null
+++ b/arch/ia64/kernel/entry.S
@@ -0,0 +1,1261 @@
+/*
+ * ia64/kernel/entry.S
+ *
+ * Kernel entry points.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ */
+/*
+ * Global (preserved) predicate usage on syscall entry/exit path:
+ *
+ * 
+ *	pEOI:		See entry.h.
+ *	pKern:		See entry.h.
+ *	pSys:		See entry.h.
+ *	pNonSys:	!pSys
+ *	p2:		(Alias of pKern!) True if any signals are pending.
+ *	p16/p17:	Used by stubs calling ia64_do_signal to indicate if current task
+ *			has PF_PTRACED flag bit set.  p16 is true if so, p17 is the complement.
+ */
+
+#include <linux/config.h>
+
+#include <asm/errno.h>
+#include <asm/offsets.h>
+#include <asm/processor.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+	.text
+	.psr abi64
+	.psr lsb
+	.lsb
+
+	/*
+	 * execve() is special because in case of success, we need to
+	 * setup a null register window frame.
+	 */
+	.align 16
+	.proc ia64_execve
+ia64_execve:
+	alloc loc0=ar.pfs,3,2,4,0
+	mov loc1=rp
+	mov out0=in0			// filename
+	;;				// stop bit between alloc and call
+	mov out1=in1			// argv
+	mov out2=in2			// envp
+	add out3=16,sp			// regs
+	br.call.sptk.few rp=sys_execve
+.ret0:	cmp4.ge p6,p0=r8,r0
+	mov ar.pfs=loc0			// restore ar.pfs
+	;;
+(p6)	mov ar.pfs=r0			// clear ar.pfs in case of success
+	sxt4 r8=r8			// return 64-bit result
+	mov rp=loc1
+
+	br.ret.sptk.few rp
+	.endp ia64_execve
+
+	.align 16
+	.global sys_clone
+	.proc sys_clone
+sys_clone:
+	alloc r16=ar.pfs,2,2,3,0;;
+	movl r28=1f
+	mov loc1=rp
+	br.cond.sptk.many save_switch_stack
+1:
+	mov loc0=r16				// save ar.pfs across do_fork
+	adds out2=IA64_SWITCH_STACK_SIZE+16,sp
+	adds r2=IA64_SWITCH_STACK_SIZE+IA64_PT_REGS_R12_OFFSET+16,sp
+	cmp.eq p8,p9=in1,r0			// usp == 0?
+	mov out0=in0				// out0 = clone_flags
+	;;
+(p8)	ld8 out1=[r2]				// fetch usp from pt_regs.r12
+(p9)	mov out1=in1
+	br.call.sptk.few rp=do_fork
+.ret1:
+	mov ar.pfs=loc0
+	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop the switch stack
+	mov rp=loc1
+	;;
+	br.ret.sptk.many rp
+	.endp sys_clone
+
+/*
+ * prev_task <- switch_to(struct task_struct *next)
+ */
+	.align 16
+	.global ia64_switch_to
+	.proc ia64_switch_to
+ia64_switch_to:
+	alloc r16=ar.pfs,1,0,0,0
+	movl r28=1f
+	br.cond.sptk.many save_switch_stack
+1:
+	// disable interrupts to ensure atomicity for next few instructions:
+	mov r17=psr		// M-unit
+	;;
+	rsm psr.i		// M-unit
+	dep r18=-1,r0,0,61	// build mask 0x1fffffffffffffff
+	;;
+	srlz.d
+	;;
+	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
+	;;
+	st8 [r22]=sp		// save kernel stack pointer of old task
+	ld8 sp=[r21]		// load kernel stack pointer of new task
+	and r20=in0,r18		// physical address of "current"
+	;;
+	mov r8=r13		// return pointer to previously running task
+	mov r13=in0		// set "current" pointer
+	mov ar.k6=r20		// copy "current" into ar.k6
+	;;
+	// restore interrupts
+	mov psr.l=r17
+	;;
+	srlz.d
+
+	movl r28=1f
+	br.cond.sptk.many load_switch_stack
+1:
+	br.ret.sptk.few rp
+	.endp ia64_switch_to
+
+	/*
+	 * Like save_switch_stack, but also save the stack frame that is active
+	 * at the time this function is called.
+	 */
+	.align 16
+	.proc save_switch_stack_with_current_frame
+save_switch_stack_with_current_frame:
+1:	{
+	  alloc r16=ar.pfs,0,0,0,0		// pass ar.pfs to save_switch_stack
+	  mov r28=ip
+	}
+	;;
+	adds r28=1f-1b,r28
+	br.cond.sptk.many save_switch_stack
+1:	br.ret.sptk.few rp
+	.endp save_switch_stack_with_current_frame
+/*
+ * Note that interrupts are enabled during save_switch_stack and
+ * load_switch_stack.  This means that we may get an interrupt with
+ * "sp" pointing to the new kernel stack while ar.bspstore is still
+ * pointing to the old kernel backing store area.  Since ar.rsc,
+ * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts,
+ * this is not a problem.
+ */
+
+/*
+ * save_switch_stack:
+ *	- r16 holds ar.pfs
+ *	- r28 holds address to return to
+ *	- rp (b0) holds return address to save
+ */
+	.align 16
+	.global save_switch_stack
+	.proc save_switch_stack
+save_switch_stack:
+	flushrs			// flush dirty regs to backing store (must be first in insn group)
+	mov r17=ar.unat		// preserve caller's
+	adds r2=-IA64_SWITCH_STACK_SIZE+16,sp	// r2 = &sw->caller_unat
+	;;
+	mov r18=ar.fpsr		// preserve fpsr
+	mov ar.rsc=r0		// put RSE in mode: enforced lazy, little endian, pl 0
+	;;
+	mov r19=ar.rnat
+	adds r3=-IA64_SWITCH_STACK_SIZE+24,sp	// r3 = &sw->ar_fpsr
+
+	// Note: the instruction ordering is important here: we can't
+	// store anything to the switch stack before sp is updated
+	// as otherwise an interrupt might overwrite the memory!
+	adds sp=-IA64_SWITCH_STACK_SIZE,sp
+	;;
+	st8 [r2]=r17,16
+	st8 [r3]=r18,24
+	;;
+	stf.spill [r2]=f2,32
+	stf.spill [r3]=f3,32
+	mov r21=b0
+	;;
+	stf.spill [r2]=f4,32
+	stf.spill [r3]=f5,32
+	;;
+	stf.spill [r2]=f10,32
+	stf.spill [r3]=f11,32
+	mov r22=b1
+	;;
+	stf.spill [r2]=f12,32
+	stf.spill [r3]=f13,32
+	mov r23=b2
+	;;
+	stf.spill [r2]=f14,32
+	stf.spill [r3]=f15,32
+	mov r24=b3
+	;;
+	stf.spill [r2]=f16,32
+	stf.spill [r3]=f17,32
+	mov r25=b4
+	;;
+	stf.spill [r2]=f18,32
+	stf.spill [r3]=f19,32
+	mov r26=b5
+	;;
+	stf.spill [r2]=f20,32
+	stf.spill [r3]=f21,32
+	mov r17=ar.lc				// I-unit
+	;;
+	stf.spill [r2]=f22,32
+	stf.spill [r3]=f23,32
+	;;
+	stf.spill [r2]=f24,32
+	stf.spill [r3]=f25,32
+	;;
+	stf.spill [r2]=f26,32
+	stf.spill [r3]=f27,32
+	;;
+	stf.spill [r2]=f28,32
+	stf.spill [r3]=f29,32
+	;;
+	stf.spill [r2]=f30,32
+	stf.spill [r3]=f31,24
+	;;
+	st8.spill [r2]=r4,16
+	st8.spill [r3]=r5,16
+	;;
+	st8.spill [r2]=r6,16
+	st8.spill [r3]=r7,16
+	;;
+	st8 [r2]=r21,16		// save b0
+	st8 [r3]=r22,16		// save b1
+	/* since we're done with the spills, read and save ar.unat: */
+	mov r18=ar.unat		// M-unit
+	mov r20=ar.bspstore	// M-unit
+	;;
+	st8 [r2]=r23,16		// save b2
+	st8 [r3]=r24,16		// save b3
+	;;
+	st8 [r2]=r25,16		// save b4
+	st8 [r3]=r26,16		// save b5
+	;;
+	st8 [r2]=r16,16		// save ar.pfs
+	st8 [r3]=r17,16		// save ar.lc
+	mov r21=pr
+	;;
+	st8 [r2]=r18,16		// save ar.unat
+	st8 [r3]=r19,16		// save ar.rnat
+	mov b7=r28
+	;;
+	st8 [r2]=r20		// save ar.bspstore
+	st8 [r3]=r21		// save predicate registers
+	mov ar.rsc=3		// put RSE back into eager mode, pl 0
+	br.cond.sptk.few b7
+	.endp save_switch_stack
+
+/*
+ * load_switch_stack:
+ *	- r28 holds address to return to
+ */
+	.align 16
+	.proc load_switch_stack
+load_switch_stack:
+	invala			// invalidate ALAT
+	adds r2=IA64_SWITCH_STACK_B0_OFFSET+16,sp	// get pointer to switch_stack.b0
+	mov ar.rsc=r0		// put RSE into enforced lazy mode
+	adds r3=IA64_SWITCH_STACK_B0_OFFSET+24,sp	// get pointer to switch_stack.b1
+	;;
+	ld8 r21=[r2],16		// restore b0
+	ld8 r22=[r3],16		// restore b1
+	;;
+	ld8 r23=[r2],16		// restore b2
+	ld8 r24=[r3],16		// restore b3
+	;;
+	ld8 r25=[r2],16		// restore b4
+	ld8 r26=[r3],16		// restore b5
+	;;
+	ld8 r16=[r2],16		// restore ar.pfs
+	ld8 r17=[r3],16		// restore ar.lc
+	;;
+	ld8 r18=[r2],16		// restore ar.unat
+	ld8 r19=[r3],16		// restore ar.rnat
+	mov b0=r21
+	;;
+	ld8 r20=[r2]		// restore ar.bspstore
+	ld8 r21=[r3]		// restore predicate registers
+	mov ar.pfs=r16
+	;;
+	mov ar.bspstore=r20
+	;;
+	loadrs			// invalidate stacked regs outside current frame
+	adds r2=16-IA64_SWITCH_STACK_SIZE,r2	// get pointer to switch_stack.caller_unat
+	;;			// stop bit for rnat dependency
+	mov ar.rnat=r19
+	mov ar.unat=r18		// establish unat holding the NaT bits for r4-r7
+	adds r3=16-IA64_SWITCH_STACK_SIZE,r3	// get pointer to switch_stack.ar_fpsr
+	;;
+	ld8 r18=[r2],16		// restore caller's unat
+	ld8 r19=[r3],24		// restore fpsr
+	mov ar.lc=r17
+	;;
+	ldf.fill f2=[r2],32
+	ldf.fill f3=[r3],32
+	mov pr=r21,-1
+	;;
+	ldf.fill f4=[r2],32
+	ldf.fill f5=[r3],32
+	;;
+	ldf.fill f10=[r2],32
+	ldf.fill f11=[r3],32
+	mov b1=r22
+	;;
+	ldf.fill f12=[r2],32
+	ldf.fill f13=[r3],32
+	mov b2=r23
+	;;
+	ldf.fill f14=[r2],32
+	ldf.fill f15=[r3],32
+	mov b3=r24
+	;;
+	ldf.fill f16=[r2],32
+	ldf.fill f17=[r3],32
+	mov b4=r25
+	;;
+	ldf.fill f18=[r2],32
+	ldf.fill f19=[r3],32
+	mov b5=r26
+	;;
+	ldf.fill f20=[r2],32
+	ldf.fill f21=[r3],32
+	;;
+	ldf.fill f22=[r2],32
+	ldf.fill f23=[r3],32
+	;;
+	ldf.fill f24=[r2],32
+	ldf.fill f25=[r3],32
+	;;
+	ldf.fill f26=[r2],32
+	ldf.fill f27=[r3],32
+	;;
+	ldf.fill f28=[r2],32
+	ldf.fill f29=[r3],32
+	;;
+	ldf.fill f30=[r2],32
+	ldf.fill f31=[r3],24
+	;;
+	ld8.fill r4=[r2],16
+	ld8.fill r5=[r3],16
+	mov b7=r28
+	;;
+	ld8.fill r6=[r2],16
+	ld8.fill r7=[r3],16
+	mov ar.unat=r18				// restore caller's unat
+	mov ar.fpsr=r19				// restore fpsr
+	mov ar.rsc=3				// put RSE back into eager mode, pl 0
+	adds sp=IA64_SWITCH_STACK_SIZE,sp	// pop switch_stack
+	br.cond.sptk.few b7
+	.endp load_switch_stack
+
+	.align 16
+	.global __ia64_syscall
+	.proc __ia64_syscall
+__ia64_syscall:
+	.regstk 6,0,0,0
+	mov r15=in5				// put syscall number in place
+	break __BREAK_SYSCALL
+	movl r2=errno
+	cmp.eq p6,p7=-1,r10
+	;;
+(p6)	st4 [r2]=r8
+(p6)	mov r8=-1
+	br.ret.sptk.few rp
+	.endp __ia64_syscall
+
+	//
+	// We invoke syscall_trace through this intermediate function to
+	// ensure that the syscall input arguments are not clobbered.  We
+	// also use it to preserve b6, which contains the syscall entry point.
+	//
+	.align 16
+	.global invoke_syscall_trace
+	.proc invoke_syscall_trace
+invoke_syscall_trace:
+	alloc loc0=ar.pfs,8,3,0,0
+	;;			// WAW on CFM at the br.call
+	mov loc1=rp
+	br.call.sptk.many rp=save_switch_stack_with_current_frame	// must preserve b6!!
+.ret2:	mov loc2=b6
+	br.call.sptk.few rp=syscall_trace
+.ret3:	adds sp=IA64_SWITCH_STACK_SIZE,sp	// drop switch_stack frame
+	mov rp=loc1
+	mov ar.pfs=loc0
+	mov b6=loc2
+	;;
+	br.ret.sptk.few rp
+	.endp invoke_syscall_trace
+
+	//
+	// Invoke a system call, but do some tracing before and after the call.
+	// We MUST preserve the current register frame throughout this routine
+	// because some system calls (such as ia64_execve) directly
+	// manipulate ar.pfs.
+	//
+	// Input:
+	//	r15 = syscall number
+	//	b6  = syscall entry point
+	//
+	.global ia64_trace_syscall
+	.global ia64_strace_leave_kernel
+	.global ia64_strace_clear_r8
+
+	.proc ia64_strace_clear_r8
+ia64_strace_clear_r8:		// this is where we return after cloning when PF_TRACESYS is on
+# ifdef CONFIG_SMP
+	br.call.sptk.few rp=invoke_schedule_tail
+# endif
+	mov r8=0
+	br strace_check_retval
+	.endp ia64_strace_clear_r8
+
+	.proc ia64_trace_syscall
+ia64_trace_syscall:
+	br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch syscall args
+.ret4:	br.call.sptk.few rp=b6			// do the syscall
+strace_check_retval:
+.ret5:	cmp.lt p6,p0=r8,r0			// syscall failed?
+	;;
+	adds r2=IA64_PT_REGS_R8_OFFSET+16,sp	// r2 = &pt_regs.r8
+	adds r3=IA64_PT_REGS_R8_OFFSET+32,sp	// r3 = &pt_regs.r10
+	mov r10=0
+(p6)	br.cond.sptk.few strace_error		// syscall failed ->
+	;;					// avoid RAW on r10
+strace_save_retval:
+	st8.spill [r2]=r8			// store return value in slot for r8
+	st8.spill [r3]=r10			// clear error indication in slot for r10
+ia64_strace_leave_kernel:
+	br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch return value
+.ret6:	br.cond.sptk.many ia64_leave_kernel
+
+strace_error:
+	ld8 r3=[r2]				// load pt_regs.r8
+	sub r9=0,r8				// negate return value to get errno value
+	;;
+	cmp.ne p6,p0=r3,r0			// is pt_regs.r8!=0?
+	adds r3=16,r2				// r3=&pt_regs.r10
+	;;
+(p6)	mov r10=-1
+(p6)	mov r8=r9
+	br.cond.sptk.few strace_save_retval
+	.endp ia64_trace_syscall
+
+/*
+ * A couple of convenience macros to help implement/understand the state
+ * restoration that happens at the end of ia64_ret_from_syscall.
+ */
+#define rARPR		r31
+#define rCRIFS		r30
+#define rCRIPSR		r29
+#define rCRIIP		r28
+#define rARRSC		r27
+#define rARPFS		r26
+#define rARUNAT		r25
+#define rARRNAT		r24
+#define rARBSPSTORE	r23
+#define rKRBS		r22
+#define rB6		r21
+
+	.align 16
+	.global ia64_ret_from_syscall
+	.global ia64_ret_from_syscall_clear_r8
+	.global ia64_leave_kernel
+	.proc ia64_ret_from_syscall
+ia64_ret_from_syscall_clear_r8:
+#ifdef CONFIG_SMP
+	// In SMP mode, we need to call schedule_tail to complete the scheduling process.
+	// Called by ia64_switch_to after do_fork()->copy_thread().  r8 contains the
+	// address of the previously executing task.
+	br.call.sptk.few rp=invoke_schedule_tail
+.ret7:
+#endif                  
+	mov r8=0
+	;;					// added stop bits to prevent r8 dependency
+ia64_ret_from_syscall:
+	cmp.ge p6,p7=r8,r0			// syscall executed successfully?
+	adds r2=IA64_PT_REGS_R8_OFFSET+16,sp	// r2 = &pt_regs.r8
+	adds r3=IA64_PT_REGS_R8_OFFSET+32,sp	// r3 = &pt_regs.r10
+	;;
+(p6)	st8.spill [r2]=r8	// store return value in slot for r8 and set unat bit
+(p6)	st8.spill [r3]=r0	// clear error indication in slot for r10 and set unat bit
+(p7)	br.cond.spnt.few handle_syscall_error	// handle potential syscall failure
+
+ia64_leave_kernel:
+	// check & deliver software interrupts (bottom half handlers):
+
+	movl r2=bh_active		// sheesh, why aren't these two in
+	movl r3=bh_mask			// a struct??
+	;;
+	ld8 r2=[r2]
+	ld8 r3=[r3]
+	;;
+	and r2=r2,r3
+	;;
+	cmp.ne p6,p7=r2,r0		// any soft interrupts ready for delivery?
+(p6)	br.call.dpnt.few rp=invoke_do_bottom_half
+1:
+(pKern)	br.cond.dpnt.many restore_all	// yup -> skip check for rescheduling & signal delivery
+
+	// call schedule() until we find a task that doesn't have need_resched set:
+
+back_from_resched:
+	{ .mii
+	  adds r2=IA64_TASK_NEED_RESCHED_OFFSET,r13
+	  mov r3=ip
+	  adds r14=IA64_TASK_SIGPENDING_OFFSET,r13
+	}
+	;;
+	ld8 r2=[r2]
+	ld4 r14=[r14]
+	mov rp=r3			// arrange for schedule() to return to back_from_resched
+	;;
+	/*
+	 * If pEOI is set, we need to write the cr.eoi now and then
+	 * clear pEOI because both invoke_schedule() and
+	 * handle_signal_delivery() may call the scheduler.  Since
+	 * we're returning to user-level, we get at most one nested
+	 * interrupt of the same priority level, which doesn't tax the
+	 * kernel stack too much.
+	 */
+(pEOI)	mov cr.eoi=r0
+	cmp.ne p6,p0=r2,r0
+	cmp.ne p2,p0=r14,r0		// NOTE: pKern is an alias for p2!!
+(pEOI)	cmp.ne pEOI,p0=r0,r0		// clear pEOI before calling schedule()
+	srlz.d
+(p6)	br.call.spnt.many b6=invoke_schedule	// ignore return value
+2:
+	// check & deliver pending signals:
+(p2)	br.call.spnt.few rp=handle_signal_delivery
+restore_all:
+
+	// start restoring the state saved on the kernel stack (struct pt_regs):
+
+	adds r2=IA64_PT_REGS_R8_OFFSET+16,r12
+	adds r3=IA64_PT_REGS_R8_OFFSET+24,r12
+	;;
+	ld8.fill r8=[r2],16
+	ld8.fill r9=[r3],16
+	;;
+	ld8.fill r10=[r2],16
+	ld8.fill r11=[r3],16
+	;;
+	ld8.fill r16=[r2],16
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	;;
+	ld8.fill r22=[r2],16
+	ld8.fill r23=[r3],16
+	;;
+	ld8.fill r24=[r2],16
+	ld8.fill r25=[r3],16
+	;;
+	ld8.fill r26=[r2],16
+	ld8.fill r27=[r3],16
+	;;
+	ld8.fill r28=[r2],16
+	ld8.fill r29=[r3],16
+	;;
+	ld8.fill r30=[r2],16
+	ld8.fill r31=[r3],16
+	;;
+	ld8 r1=[r2],16		// ar.ccv
+	ld8 r13=[r3],16		// ar.fpsr
+	;;
+	ld8 r14=[r2],16		// b0
+	ld8 r15=[r3],16+8	// b7
+	;;
+	ldf.fill f6=[r2],32
+	ldf.fill f7=[r3],32
+	;;
+	ldf.fill f8=[r2],32
+	ldf.fill f9=[r3],32
+	;;
+	mov ar.ccv=r1
+	mov ar.fpsr=r13
+	mov b0=r14
+	// turn off interrupts, interrupt collection, & data translation
+	rsm psr.i | psr.ic | psr.dt
+	;;
+	srlz.i			// EAS 2.5
+	mov b7=r15
+	;;
+	invala			// invalidate ALAT
+	dep r12=0,r12,61,3	// convert sp to physical address
+	bsw.0;;			// switch back to bank 0 (must be last in insn group)
+	;;
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+	nop.i 0x0
+	;;
+	nop.i 0x0
+	;;
+	nop.i 0x0
+	;;
+#endif
+	adds r16=16,r12
+	adds r17=24,r12
+	;;
+	ld8 rCRIPSR=[r16],16	// load cr.ipsr
+	ld8 rCRIIP=[r17],16	// load cr.iip
+	;;
+	ld8 rCRIFS=[r16],16	// load cr.ifs
+	ld8 rARUNAT=[r17],16	// load ar.unat
+	;;
+	ld8 rARPFS=[r16],16	// load ar.pfs
+	ld8 rARRSC=[r17],16	// load ar.rsc
+	;;
+	ld8 rARRNAT=[r16],16	// load ar.rnat (may be garbage)
+	ld8 rARBSPSTORE=[r17],16	// load ar.bspstore (may be garbage)
+	;;
+	ld8 rARPR=[r16],16	// load predicates
+	ld8 rB6=[r17],16	// load b6
+	;;
+	ld8 r18=[r16],16	// load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16	// load r1
+	;;
+	ld8.fill r2=[r16],16
+	ld8.fill r3=[r17],16
+	;;
+	ld8.fill r12=[r16],16
+	ld8.fill r13=[r17],16
+	extr.u r19=rCRIPSR,32,2	// extract ps.cpl
+	;;
+	ld8.fill r14=[r16],16
+	ld8.fill r15=[r17],16
+	cmp.eq p6,p7=r0,r19	// are we returning to kernel mode? (psr.cpl==0)
+	;;
+	mov b6=rB6
+	mov ar.pfs=rARPFS
+(p6)	br.cond.dpnt.few skip_rbs_switch
+
+	/*
+	 * Restore user backing store.
+	 *
+	 * NOTE: alloc, loadrs, and cover can't be predicated.
+	 *
+	 * XXX This needs some scheduling/tuning once we believe it
+	 *     really does work as intended.
+	 */
+	mov r16=ar.bsp			// get existing backing store pointer
+(pNonSys) br.cond.dpnt.few dont_preserve_current_frame
+	cover				// add current frame into dirty partition
+	;;
+	mov rCRIFS=cr.ifs		// fetch the cr.ifs value that "cover" produced
+	mov r17=ar.bsp			// get new backing store pointer
+	;;
+	sub r16=r17,r16			// calculate number of bytes that were added to rbs
+	;;
+	shl r16=r16,16			// shift additional frame size into position for loadrs
+	;;
+	add r18=r16,r18			// adjust the loadrs value
+	;;
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
+	// Reset ITM if we've missed a timer tick.  Workaround for SoftSDV bug
+	mov r16 = r2
+	mov r2 = ar.itc
+	mov r17 = cr.itm
+	;; 
+	cmp.gt p6,p7 = r2, r17
+(p6)	addl r17 = 100, r2
+	;;
+	mov cr.itm = r17
+	mov r2 = r16
+#endif
+dont_preserve_current_frame:
+	alloc r16=ar.pfs,0,0,0,0	// drop the current call frame (noop for syscalls)
+	;;
+	mov ar.rsc=r18			// load ar.rsc to be used for "loadrs"
+#ifdef CONFIG_IA32_SUPPORT
+	tbit.nz p6,p0=rCRIPSR,IA64_PSR_IS_BIT
+	;;
+(p6)	mov ar.rsc=r0                   // returning to IA32 mode
+#endif
+ 	;;
+	loadrs
+	;;
+	mov ar.bspstore=rARBSPSTORE
+	;;
+	mov ar.rnat=rARRNAT	// must happen with RSE in lazy mode
+
+skip_rbs_switch:
+	mov ar.rsc=rARRSC
+	mov ar.unat=rARUNAT
+	mov cr.ifs=rCRIFS	// restore cr.ifs only if not a (synchronous) syscall
+(pEOI)	mov cr.eoi=r0
+	mov pr=rARPR,-1
+	mov cr.iip=rCRIIP
+	mov cr.ipsr=rCRIPSR
+	;;
+	rfi;;			// must be last instruction in an insn group
+
+handle_syscall_error:
+	/*
+	 * Some system calls (e.g., ptrace, mmap) can return arbitrary
+	 * values which could lead us to mistake a negative return
+	 * value as a failed syscall.  Those syscall must deposit
+	 * a non-zero value in pt_regs.r8 to indicate an error.
+	 * If pt_regs.r8 is zero, we assume that the call completed
+	 * successfully.
+	 */
+	ld8 r3=[r2]		// load pt_regs.r8
+	sub r9=0,r8		// negate return value to get errno
+	;;
+	mov r10=-1		// return -1 in pt_regs.r10 to indicate error
+	cmp.eq p6,p7=r3,r0	// is pt_regs.r8==0?
+	adds r3=16,r2		// r3=&pt_regs.r10
+	;;
+(p6)	mov r9=r8
+(p6)	mov r10=0
+	;;
+	st8.spill [r2]=r9	// store errno in pt_regs.r8 and set unat bit
+	st8.spill [r3]=r10	// store error indication in pt_regs.r10 and set unat bit
+	br.cond.sptk.many ia64_leave_kernel
+	.endp __ret_from_syscall
+
+#ifdef CONFIG_SMP
+	/*
+	 * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
+	 * in case a system call gets restarted.
+	 */
+	.proc invoke_schedule_tail
+invoke_schedule_tail:
+	alloc loc0=ar.pfs,8,2,1,0
+	mov loc1=rp
+	mov out0=r8				// Address of previous task
+	;;
+	br.call.sptk.few rp=schedule_tail
+.ret8:
+	mov ar.pfs=loc0
+	mov rp=loc1
+	br.ret.sptk.many rp
+	.endp invoke_schedule_tail
+#endif /* CONFIG_SMP */
+
+	/*
+	 * Invoke do_bottom_half() while preserving in0-in7, which may be needed
+	 * in case a system call gets restarted.
+	 */
+	.proc invoke_do_bottom_half
+invoke_do_bottom_half:
+	alloc loc0=ar.pfs,8,2,0,0
+	mov loc1=rp
+	;;
+	br.call.sptk.few rp=do_bottom_half
+.ret9:
+	mov ar.pfs=loc0
+	mov rp=loc1
+	br.ret.sptk.many rp
+	.endp invoke_do_bottom_half
+
+	/*
+	 * Invoke schedule() while preserving in0-in7, which may be needed
+	 * in case a system call gets restarted.
+	 */
+	.proc invoke_schedule
+invoke_schedule:
+	alloc loc0=ar.pfs,8,2,0,0
+	mov loc1=rp
+	;;
+	br.call.sptk.few rp=schedule
+.ret10:
+	mov ar.pfs=loc0
+	mov rp=loc1
+	br.ret.sptk.many rp
+	.endp invoke_schedule
+
+	//
+	// Setup stack and call ia64_do_signal.  Note that pSys and pNonSys need to
+	// be set up by the caller.  We declare 8 input registers so the system call
+	// args get preserved, in case we need to restart a system call.
+	//
+	.align 16
+	.proc handle_signal_delivery
+handle_signal_delivery:
+	alloc loc0=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
+	mov r9=ar.unat
+
+	// If the process is being ptraced, the signal may not actually be delivered to
+	// the process.  Instead, SIGCHLD will be sent to the parent.  We need to
+	// setup a switch_stack so ptrace can inspect the processes state if necessary.
+	adds r2=IA64_TASK_FLAGS_OFFSET,r13
+	;;
+	ld8 r2=[r2]
+	mov out0=0				// there is no "oldset"
+	adds out1=16,sp				// out1=&pt_regs
+	;;
+(pSys)	mov out2=1				// out2==1 => we're in a syscall
+	tbit.nz p16,p17=r2,PF_PTRACED_BIT
+(p16)	br.cond.spnt.many setup_switch_stack
+	;;
+back_from_setup_switch_stack:
+(pNonSys) mov out2=0				// out2==0 => not a syscall
+	adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+(p17)	adds sp=-IA64_SWITCH_STACK_SIZE,sp	// make space for (dummy) switch_stack
+	;;
+(p17)	st8 [r3]=r9				// save ar.unat in sw->caller_unat
+	mov loc1=rp				// save return address
+	br.call.sptk.few rp=ia64_do_signal
+.ret11:
+	adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+	;;
+	ld8 r9=[r3]				// load new unat from sw->caller_unat
+	mov rp=loc1
+	;;
+(p17)	adds sp=IA64_SWITCH_STACK_SIZE,sp	// drop (dummy) switch_stack
+(p17)	mov ar.unat=r9
+(p17)	mov ar.pfs=loc0
+(p17)	br.ret.sptk.many rp
+
+	// restore the switch stack (ptrace may have modified it):
+	movl r28=1f
+	br.cond.sptk.many load_switch_stack
+1:	br.ret.sptk.many rp
+	// NOT REACHED
+
+setup_switch_stack:
+	movl r28=back_from_setup_switch_stack
+	mov r16=loc0
+	br.cond.sptk.many save_switch_stack
+	// NOT REACHED
+
+	.endp handle_signal_delivery
+
+	.align 16
+	.proc sys_rt_sigsuspend
+	.global sys_rt_sigsuspend
+sys_rt_sigsuspend:
+	alloc loc0=ar.pfs,2,2,3,0
+	mov r9=ar.unat
+
+	// If the process is being ptraced, the signal may not actually be delivered to
+	// the process.  Instead, SIGCHLD will be sent to the parent.  We need to
+	// setup a switch_stack so ptrace can inspect the processes state if necessary.
+	adds r2=IA64_TASK_FLAGS_OFFSET,r13
+	;;
+	ld8 r2=[r2]
+	mov out0=in0				// mask
+	mov out1=in1				// sigsetsize
+	;;
+	adds out2=16,sp				// out1=&pt_regs
+	tbit.nz p16,p17=r2,PF_PTRACED_BIT
+(p16)	br.cond.spnt.many sigsuspend_setup_switch_stack
+	;;
+back_from_sigsuspend_setup_switch_stack:
+	adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+(p17)	adds sp=-IA64_SWITCH_STACK_SIZE,sp	// make space for (dummy) switch_stack
+	;;
+(p17)	st8 [r3]=r9				// save ar.unat in sw->caller_unat
+	mov loc1=rp				// save return address
+	br.call.sptk.many rp=ia64_rt_sigsuspend
+.ret12:
+	adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+	;;
+	ld8 r9=[r3]				// load new unat from sw->caller_unat
+	mov rp=loc1
+	;;
+(p17)	adds sp=IA64_SWITCH_STACK_SIZE,sp	// drop (dummy) switch_stack
+(p17)	mov ar.unat=r9
+(p17)	mov ar.pfs=loc0
+(p17)	br.ret.sptk.many rp
+
+	// restore the switch stack (ptrace may have modified it):
+	movl r28=1f
+	br.cond.sptk.many load_switch_stack
+1:	br.ret.sptk.many rp
+	// NOT REACHED
+
+sigsuspend_setup_switch_stack:
+	movl r28=back_from_sigsuspend_setup_switch_stack
+	mov r16=loc0
+	br.cond.sptk.many save_switch_stack
+	// NOT REACHED
+
+	.endp sys_rt_sigsuspend
+
+	.align 16
+	.proc sys_rt_sigreturn
+sys_rt_sigreturn:
+	alloc loc0=ar.pfs,8,1,1,0 // preserve all eight input regs in case of syscall restart!
+	adds out0=16,sp				// out0 = &pt_regs
+	;;
+	adds sp=-IA64_SWITCH_STACK_SIZE,sp	// make space for unat and padding
+	br.call.sptk.few rp=ia64_rt_sigreturn
+.ret13:
+	adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+	;;
+	ld8 r9=[r3]			// load new ar.unat
+	mov rp=r8
+	;;
+	adds sp=IA64_SWITCH_STACK_SIZE,sp	// drop (dummy) switch-stack frame
+	mov ar.unat=r9
+	mov ar.pfs=loc0
+	br.ret.sptk.many rp
+	.endp sys_rt_sigreturn
+
+	.align 16
+	.global ia64_prepare_handle_unaligned
+	.proc ia64_prepare_handle_unaligned
+ia64_prepare_handle_unaligned:
+	movl r28=1f
+	//
+	// r16 = fake ar.pfs, we simply need to make sure 
+	// privilege is still 0
+	//
+	mov r16=r0 				
+	br.cond.sptk.few save_switch_stack
+1: 	br.call.sptk.few rp=ia64_handle_unaligned // stack frame setup in ivt
+.ret14:
+	movl r28=2f
+	br.cond.sptk.many load_switch_stack
+2:	br.cond.sptk.many rp			  // goes to ia64_leave_kernel
+	.endp ia64_prepare_handle_unaligned
+
+#ifdef CONFIG_KDB
+	//
+	// This gets called from ivt.S with:
+	//	SAVE MIN with cover done
+	//	SAVE REST done
+	//	no parameters
+	//	r15 has return value = ia64_leave_kernel
+	//
+	.align 16
+	.global ia64_invoke_kdb
+	.proc ia64_invoke_kdb
+ia64_invoke_kdb:
+	alloc r16=ar.pfs,0,0,4,0
+	movl r28=1f				// save_switch_stack protocol
+	;;			// avoid WAW on CFM
+	br.cond.sptk.many save_switch_stack	// to flushrs
+1:	mov out0=4				// kdb entry reason
+	mov out1=0				// err number
+	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// pt_regs
+	add out3=16,sp				// switch_stack
+	br.call.sptk.few rp=kdb
+.ret15:
+	movl r28=1f				// load_switch_stack proto
+	br.cond.sptk.many load_switch_stack
+1:	br.ret.sptk.many rp
+	.endp ia64_invoke_kdb
+
+	//
+	// When KDB is compiled in, we intercept each fault and give
+	// kdb a chance to run before calling the normal fault handler.
+	//
+        .align 16
+        .global ia64_invoke_kdb_fault_handler
+        .proc ia64_invoke_kdb_fault_handler
+ia64_invoke_kdb_fault_handler:
+        alloc r16=ar.pfs,5,1,5,0
+        movl r28=1f
+	mov loc0=rp			// save this
+	br.cond.sptk.many save_switch_stack // to flushrs
+	;;			// avoid WAW on CFM
+1:	mov out0=in0			// vector number
+	mov out1=in1			// cr.isr
+	mov out2=in2			// cr.ifa
+	mov out3=in3			// cr.iim
+	mov out4=in4			// cr.itir
+	br.call.sptk.few rp=ia64_kdb_fault_handler
+.ret16:
+
+	movl r28=1f
+	br.cond.sptk.many load_switch_stack
+1:	cmp.ne p6,p0=r8,r0		// did ia64_kdb_fault_handler return 0?
+	mov rp=loc0
+(p6)	br.ret.spnt.many rp		// no, we're done
+	;;		// avoid WAW on rp
+	mov out0=in0			// vector number
+	mov out1=in1			// cr.isr
+	mov out2=in2			// cr.ifa
+	mov out3=in3			// cr.iim
+	mov out4=in4			// cr.itir
+	mov in0=ar.pfs			// preserve ar.pfs returned by load_switch_stack
+	br.call.sptk.few rp=ia64_fault	// yup -> we need to invoke normal fault handler now
+.ret17:
+	mov ar.pfs=in0
+	mov rp=loc0
+	br.ret.sptk.many rp
+
+	.endp ia64_invoke_kdb_fault_handler
+
+#endif /* CONFIG_KDB */
+
+	.rodata
+	.align 8
+	.globl sys_call_table
+sys_call_table:
+	data8 sys_ni_syscall		//  This must be sys_ni_syscall!  See ivt.S.
+	data8 sys_exit				// 1025
+	data8 sys_read
+	data8 sys_write
+	data8 sys_open
+	data8 sys_close
+	data8 sys_creat				// 1030
+	data8 sys_link
+	data8 sys_unlink
+	data8 ia64_execve
+	data8 sys_chdir
+	data8 sys_fchdir			// 1035
+	data8 sys_utimes
+	data8 sys_mknod
+	data8 sys_chmod
+	data8 sys_chown
+	data8 sys_lseek				// 1040
+	data8 sys_getpid
+	data8 sys_getppid
+	data8 sys_mount
+	data8 sys_umount
+	data8 sys_setuid			// 1045
+	data8 sys_getuid
+	data8 sys_geteuid
+	data8 sys_ptrace
+	data8 sys_access
+	data8 sys_sync				// 1050
+	data8 sys_fsync
+	data8 sys_fdatasync
+	data8 sys_kill
+	data8 sys_rename
+	data8 sys_mkdir				// 1055
+	data8 sys_rmdir
+	data8 sys_dup
+	data8 sys_pipe
+	data8 sys_times
+	data8 ia64_brk				// 1060
+	data8 sys_setgid
+	data8 sys_getgid
+	data8 sys_getegid
+	data8 sys_acct
+	data8 sys_ioctl				// 1065
+	data8 sys_fcntl
+	data8 sys_umask
+	data8 sys_chroot
+	data8 sys_ustat
+	data8 sys_dup2				// 1070
+	data8 sys_setreuid
+	data8 sys_setregid
+	data8 sys_getresuid
+	data8 sys_setresuid
+	data8 sys_getresgid			// 1075
+	data8 sys_setresgid
+	data8 sys_getgroups
+	data8 sys_setgroups
+	data8 sys_getpgid
+	data8 sys_setpgid			// 1080
+	data8 sys_setsid
+	data8 sys_getsid
+	data8 sys_sethostname
+	data8 sys_setrlimit
+	data8 sys_getrlimit			// 1085
+	data8 sys_getrusage
+	data8 sys_gettimeofday
+	data8 sys_settimeofday
+	data8 sys_select
+	data8 sys_poll				// 1090
+	data8 sys_symlink
+	data8 sys_readlink
+	data8 sys_uselib
+	data8 sys_swapon
+	data8 sys_swapoff			// 1095
+	data8 sys_reboot
+	data8 sys_truncate
+	data8 sys_ftruncate
+	data8 sys_fchmod
+	data8 sys_fchown			// 1100
+	data8 ia64_getpriority
+	data8 sys_setpriority
+	data8 sys_statfs
+	data8 sys_fstatfs
+	data8 sys_ioperm			// 1105
+	data8 sys_semget
+	data8 sys_semop
+	data8 sys_semctl
+	data8 sys_msgget
+	data8 sys_msgsnd			// 1110
+	data8 sys_msgrcv
+	data8 sys_msgctl
+	data8 sys_shmget
+	data8 ia64_shmat
+	data8 sys_shmdt				// 1115
+	data8 sys_shmctl
+	data8 sys_syslog
+	data8 sys_setitimer
+	data8 sys_getitimer
+	data8 sys_newstat			// 1120
+	data8 sys_newlstat
+	data8 sys_newfstat
+	data8 sys_vhangup
+	data8 sys_lchown
+	data8 sys_vm86				// 1125
+	data8 sys_wait4
+	data8 sys_sysinfo
+	data8 sys_clone
+	data8 sys_setdomainname
+	data8 sys_newuname			// 1130
+	data8 sys_adjtimex
+	data8 sys_create_module
+	data8 sys_init_module
+	data8 sys_delete_module
+	data8 sys_get_kernel_syms		// 1135
+	data8 sys_query_module
+	data8 sys_quotactl
+	data8 sys_bdflush
+	data8 sys_sysfs
+	data8 sys_personality			// 1140
+	data8 ia64_ni_syscall		// sys_afs_syscall
+	data8 sys_setfsuid
+	data8 sys_setfsgid
+	data8 sys_getdents
+	data8 sys_flock				// 1145
+	data8 sys_readv
+	data8 sys_writev
+	data8 sys_pread
+	data8 sys_pwrite
+	data8 sys_sysctl			// 1150
+	data8 sys_mmap
+	data8 sys_munmap
+	data8 sys_mlock
+	data8 sys_mlockall
+	data8 sys_mprotect			// 1155
+	data8 sys_mremap
+	data8 sys_msync
+	data8 sys_munlock
+	data8 sys_munlockall
+	data8 sys_sched_getparam		// 1160
+	data8 sys_sched_setparam
+	data8 sys_sched_getscheduler
+	data8 sys_sched_setscheduler
+	data8 sys_sched_yield
+	data8 sys_sched_get_priority_max	// 1165
+	data8 sys_sched_get_priority_min
+	data8 sys_sched_rr_get_interval
+	data8 sys_nanosleep
+	data8 sys_nfsservctl
+	data8 sys_prctl				// 1170
+	data8 sys_getpagesize
+	data8 sys_mmap2
+	data8 sys_pciconfig_read
+	data8 sys_pciconfig_write
+	data8 sys_perfmonctl			// 1175
+	data8 sys_sigaltstack
+	data8 sys_rt_sigaction
+	data8 sys_rt_sigpending
+	data8 sys_rt_sigprocmask
+	data8 sys_rt_sigqueueinfo		// 1180
+	data8 sys_rt_sigreturn
+	data8 sys_rt_sigsuspend
+	data8 sys_rt_sigtimedwait
+	data8 sys_getcwd
+	data8 sys_capget			// 1185
+	data8 sys_capset
+	data8 sys_sendfile
+	data8 sys_ni_syscall		// sys_getpmsg (STREAMS)
+	data8 sys_ni_syscall		// sys_putpmsg (STREAMS)
+	data8 sys_socket			// 1190
+	data8 sys_bind
+	data8 sys_connect
+	data8 sys_listen
+	data8 sys_accept
+	data8 sys_getsockname			// 1195
+	data8 sys_getpeername
+	data8 sys_socketpair 
+	data8 sys_send
+	data8 sys_sendto
+	data8 sys_recv				// 1200
+	data8 sys_recvfrom
+	data8 sys_shutdown
+	data8 sys_setsockopt
+	data8 sys_getsockopt
+	data8 sys_sendmsg			// 1205
+	data8 sys_recvmsg
+	data8 sys_pivot_root
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1210
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1215
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1220
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1225
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1230
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1235
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1240
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1245
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1250
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1255
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1260
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1265
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1270
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall			// 1275
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+	data8 ia64_ni_syscall
+
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
new file mode 100644
index 000000000..ecef44f60
--- /dev/null
+++ b/arch/ia64/kernel/entry.h
@@ -0,0 +1,8 @@
+/*
+ * Preserved registers that are shared between code in ivt.S and entry.S.  Be
+ * careful not to step on these!
+ */
+#define pEOI		p1	/* should leave_kernel write EOI? */
+#define pKern		p2	/* will leave_kernel return to kernel-mode? */
+#define pSys		p4	/* are we processing a (synchronous) system call? */
+#define pNonSys		p5	/* complement of pSys */
diff --git a/arch/ia64/kernel/fw-emu.c b/arch/ia64/kernel/fw-emu.c
new file mode 100644
index 000000000..212ff299c
--- /dev/null
+++ b/arch/ia64/kernel/fw-emu.c
@@ -0,0 +1,444 @@
+/*
+ * PAL & SAL emulation.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * For the HP simulator, this file gets include in boot/bootloader.c.
+ * For SoftSDV, this file gets included in sys_softsdv.c.
+ */
+#include <linux/config.h>
+
+#ifdef CONFIG_PCI
+# include <linux/pci.h>
+#endif
+
+#include <asm/efi.h>
+#include <asm/io.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+
+#define MB	(1024*1024UL)
+
+#define NUM_MEM_DESCS	3
+
+static char fw_mem[(  sizeof(efi_system_table_t)
+		    + sizeof(efi_runtime_services_t)
+		    + 1*sizeof(efi_config_table_t)
+		    + sizeof(struct ia64_sal_systab)
+		    + sizeof(struct ia64_sal_desc_entry_point)
+		    + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t))
+		    + 1024)] __attribute__ ((aligned (8)));
+
+#ifdef CONFIG_IA64_HP_SIM
+
+/* Simulator system calls: */
+
+#define SSC_EXIT	66
+
+/*
+ * Simulator system call.
+ */
+static long
+ssc (long arg0, long arg1, long arg2, long arg3, int nr)
+{
+	register long r8 asm ("r8");
+
+	asm volatile ("mov r15=%1\n\t"
+		      "break 0x80001"
+		      : "=r"(r8)
+		      : "r"(nr), "r"(arg0), "r"(arg1), "r"(arg2), "r"(arg3));
+	return r8;
+}
+
+#define SECS_PER_HOUR   (60 * 60)
+#define SECS_PER_DAY    (SECS_PER_HOUR * 24)
+
+/* Compute the `struct tm' representation of *T,
+   offset OFFSET seconds east of UTC,
+   and store year, yday, mon, mday, wday, hour, min, sec into *TP.
+   Return nonzero if successful.  */
+int
+offtime (unsigned long t, efi_time_t *tp)
+{
+	const unsigned short int __mon_yday[2][13] =
+	{
+		/* Normal years.  */
+		{ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+		/* Leap years.  */
+		{ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+	};
+	long int days, rem, y;
+	const unsigned short int *ip;
+
+	days = t / SECS_PER_DAY;
+	rem = t % SECS_PER_DAY;
+	while (rem < 0) {
+		rem += SECS_PER_DAY;
+		--days;
+	}
+	while (rem >= SECS_PER_DAY) {
+		rem -= SECS_PER_DAY;
+		++days;
+	}
+	tp->hour = rem / SECS_PER_HOUR;
+	rem %= SECS_PER_HOUR;
+	tp->minute = rem / 60;
+	tp->second = rem % 60;
+	/* January 1, 1970 was a Thursday.  */
+	y = 1970;
+
+#	define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+#	define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+#	define __isleap(year) \
+	  ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+	while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
+		/* Guess a corrected year, assuming 365 days per year.  */
+		long int yg = y + days / 365 - (days % 365 < 0);
+
+		/* Adjust DAYS and Y to match the guessed year.  */
+		days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
+			 - LEAPS_THRU_END_OF (y - 1));
+		y = yg;
+	}
+	tp->year = y;
+	ip = __mon_yday[__isleap(y)];
+	for (y = 11; days < (long int) ip[y]; --y)
+		continue;
+	days -= ip[y];
+	tp->month = y + 1;
+	tp->day = days + 1;
+	return 1;
+}
+
+#endif /* CONFIG_IA64_HP_SIM */
+
+/*
+ * Very ugly, but we need this in the simulator only.  Once we run on
+ * real hw, this can all go away.
+ */
+extern void pal_emulator_static (void);
+
+asm ("
+	.proc pal_emulator_static
+pal_emulator_static:
+	mov r8=-1
+	cmp.eq p6,p7=6,r28		/* PAL_PTCE_INFO */
+(p7)	br.cond.sptk.few 1f
+	;;
+	mov r8=0			/* status = 0 */
+	movl r9=0x100000000		/* tc.base */
+	movl r10=0x0000000200000003	/* count[0], count[1] */
+	movl r11=0x1000000000002000	/* stride[0], stride[1] */
+	br.cond.sptk.few rp
+
+1:	cmp.eq p6,p7=14,r28		/* PAL_FREQ_RATIOS */
+(p7)	br.cond.sptk.few 1f
+	mov r8=0			/* status = 0 */
+	movl r9 =0x100000064		/* proc_ratio (1/100) */
+	movl r10=0x100000100		/* bus_ratio<<32 (1/256) */
+	movl r11=0x100000064		/* itc_ratio<<32 (1/100) */
+1:	br.cond.sptk.few rp
+	.endp pal_emulator_static\n");
+
+/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
+
+#define BUILD_CMD(addr)		((0x80000000 | (addr)) & ~3)
+
+#define REG_OFFSET(addr)	(0x00000000000000FF & (addr))
+#define DEVICE_FUNCTION(addr)	(0x000000000000FF00 & (addr))
+#define BUS_NUMBER(addr)	(0x0000000000FF0000 & (addr))
+
+static efi_status_t
+efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+#ifdef CONFIG_IA64_HP_SIM
+	struct {
+		int tv_sec;	/* must be 32bits to work */
+		int tv_usec;
+	} tv32bits;
+
+	ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
+
+	memset(tm, 0, sizeof(*tm));
+	offtime(tv32bits.tv_sec, tm);
+
+	if (tc)
+		memset(tc, 0, sizeof(*tc));
+#else
+#	error Not implemented yet...
+#endif
+	return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data)
+{
+#ifdef CONFIG_IA64_HP_SIM
+	ssc(status, 0, 0, 0, SSC_EXIT);
+#else
+#	error Not implemented yet...
+#endif
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+	return EFI_UNSUPPORTED;
+}
+
+static long
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+	      unsigned long in3, unsigned long in4, unsigned long in5,
+	      unsigned long in6, unsigned long in7)
+{
+	register long r9 asm ("r9") = 0;
+	register long r10 asm ("r10") = 0;
+	register long r11 asm ("r11") = 0;
+	long status;
+
+	/*
+	 * Don't do a "switch" here since that gives us code that
+	 * isn't self-relocatable.
+	 */
+	status = 0;
+	if (index == SAL_FREQ_BASE) {
+		switch (in1) {
+		      case SAL_FREQ_BASE_PLATFORM:
+			r9 = 100000000;
+			break;
+
+		      case SAL_FREQ_BASE_INTERVAL_TIMER:
+			/*
+			 * Is this supposed to be the cr.itc frequency
+			 * or something platform specific?  The SAL
+			 * doc ain't exactly clear on this...
+			 */
+#if defined(CONFIG_IA64_SOFTSDV_HACKS)
+			r9 =   4000000;
+#elif defined(CONFIG_IA64_SDV)
+			r9 = 300000000;
+#else
+			r9 = 700000000;
+#endif
+			break;
+
+		      case SAL_FREQ_BASE_REALTIME_CLOCK:
+			r9 = 1;
+			break;
+
+		      default:
+			status = -1;
+			break;
+		}
+	} else if (index == SAL_SET_VECTORS) {
+		;
+	} else if (index == SAL_GET_STATE_INFO) {
+		;
+	} else if (index == SAL_GET_STATE_INFO_SIZE) {
+		;
+	} else if (index == SAL_CLEAR_STATE_INFO) {
+		;
+	} else if (index == SAL_MC_RENDEZ) {
+		;
+	} else if (index == SAL_MC_SET_PARAMS) {
+		;
+	} else if (index == SAL_CACHE_FLUSH) {
+		;
+	} else if (index == SAL_CACHE_INIT) {
+		;
+#ifdef CONFIG_PCI
+	} else if (index == SAL_PCI_CONFIG_READ) {
+		/*
+		 * in1 contains the PCI configuration address and in2
+		 * the size of the read.  The value that is read is
+		 * returned via the general register r9.
+		 */
+                outl(BUILD_CMD(in1), 0xCF8);
+                if (in2 == 1)                           /* Reading byte  */
+                        r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3)));
+                else if (in2 == 2)                      /* Reading word  */
+                        r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2)));
+                else                                    /* Reading dword */
+                        r9 = inl(0xCFC);
+                status = PCIBIOS_SUCCESSFUL;
+	} else if (index == SAL_PCI_CONFIG_WRITE) {
+	      	/*
+		 * in1 contains the PCI configuration address, in2 the
+		 * size of the write, and in3 the actual value to be
+		 * written out.
+		 */
+                outl(BUILD_CMD(in1), 0xCF8);
+                if (in2 == 1)                           /* Writing byte  */
+                        outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3)));
+                else if (in2 == 2)                      /* Writing word  */
+                        outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2)));
+                else                                    /* Writing dword */
+                        outl(in3, 0xCFC);
+                status = PCIBIOS_SUCCESSFUL;
+#endif /* CONFIG_PCI */
+	} else if (index == SAL_UPDATE_PAL) {
+		;
+	} else {
+		status = -1;
+	}
+	asm volatile ("" :: "r"(r9), "r"(r10), "r"(r11));
+	return status;
+}
+
+
+/*
+ * This is here to work around a bug in egcs-1.1.1b that causes the
+ * compiler to crash (seems like a bug in the new alias analysis code.
+ */
+void *
+id (long addr)
+{
+	return (void *) addr;
+}
+
+void
+sys_fw_init (const char *args, int arglen)
+{
+	efi_system_table_t *efi_systab;
+	efi_runtime_services_t *efi_runtime;
+	efi_config_table_t *efi_tables;
+	struct ia64_sal_systab *sal_systab;
+	efi_memory_desc_t *efi_memmap, *md;
+	unsigned long *pal_desc, *sal_desc;
+	struct ia64_sal_desc_entry_point *sal_ed;
+	struct ia64_boot_param *bp;
+	unsigned char checksum = 0;
+	char *cp, *cmd_line;
+
+	memset(fw_mem, 0, sizeof(fw_mem));
+
+	pal_desc = (unsigned long *) &pal_emulator_static;
+	sal_desc = (unsigned long *) &sal_emulator;
+
+	cp = fw_mem;
+	efi_systab  = (void *) cp; cp += sizeof(*efi_systab);
+	efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+	efi_tables  = (void *) cp; cp += sizeof(*efi_tables);
+	sal_systab  = (void *) cp; cp += sizeof(*sal_systab);
+	sal_ed      = (void *) cp; cp += sizeof(*sal_ed);
+	efi_memmap  = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
+	cmd_line    = (void *) cp;
+
+	if (args) {
+		if (arglen >= 1024)
+			arglen = 1023;
+		memcpy(cmd_line, args, arglen);
+	} else {
+		arglen = 0;
+	}
+	cmd_line[arglen] = '\0';
+
+	memset(efi_systab, 0, sizeof(efi_systab));
+	efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+	efi_systab->hdr.revision  = EFI_SYSTEM_TABLE_REVISION;
+	efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+	efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0");
+	efi_systab->fw_revision = 1;
+	efi_systab->runtime = __pa(efi_runtime);
+	efi_systab->nr_tables = 1;
+	efi_systab->tables = __pa(efi_tables);
+
+	efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+	efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+	efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+	efi_runtime->get_time = __pa(&efi_get_time);
+	efi_runtime->set_time = __pa(&efi_unimplemented);
+	efi_runtime->get_wakeup_time = __pa(&efi_unimplemented);
+	efi_runtime->set_wakeup_time = __pa(&efi_unimplemented);
+	efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented);
+	efi_runtime->get_variable = __pa(&efi_unimplemented);
+	efi_runtime->get_next_variable = __pa(&efi_unimplemented);
+	efi_runtime->set_variable = __pa(&efi_unimplemented);
+	efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented);
+	efi_runtime->reset_system = __pa(&efi_reset_system);
+
+	efi_tables->guid = SAL_SYSTEM_TABLE_GUID;
+	efi_tables->table = __pa(sal_systab);
+
+	/* fill in the SAL system table: */
+	memcpy(sal_systab->signature, "SST_", 4);
+	sal_systab->size = sizeof(*sal_systab);
+	sal_systab->sal_rev_minor = 1;
+	sal_systab->sal_rev_major = 0;
+	sal_systab->entry_count = 1;
+	sal_systab->ia32_bios_present = 0;
+
+#ifdef CONFIG_IA64_GENERIC
+        strcpy(sal_systab->oem_id, "Generic");
+        strcpy(sal_systab->product_id, "IA-64 system");
+#endif
+
+#ifdef CONFIG_IA64_HP_SIM
+	strcpy(sal_systab->oem_id, "Hewlett-Packard");
+	strcpy(sal_systab->product_id, "HP-simulator");
+#endif
+
+#ifdef CONFIG_IA64_SDV
+	strcpy(sal_systab->oem_id, "Intel");
+	strcpy(sal_systab->product_id, "SDV");
+#endif
+
+#ifdef CONFIG_IA64_SGI_SN1_SIM
+	strcpy(sal_systab->oem_id, "SGI");
+	strcpy(sal_systab->product_id, "SN1");
+#endif
+
+	/* fill in an entry point: */	
+	sal_ed->type = SAL_DESC_ENTRY_POINT;
+	sal_ed->pal_proc = __pa(pal_desc[0]);
+	sal_ed->sal_proc = __pa(sal_desc[0]);
+	sal_ed->gp = __pa(sal_desc[1]);
+
+	for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+		checksum += *cp;
+
+	sal_systab->checksum = -checksum;
+
+	/* fill in a memory descriptor: */
+	md = &efi_memmap[0];
+	md->type = EFI_CONVENTIONAL_MEMORY;
+	md->pad = 0;
+	md->phys_addr = 2*MB;
+	md->virt_addr = 0;
+	md->num_pages = (64*MB) >> 12;	/* 64MB (in 4KB pages) */
+	md->attribute = EFI_MEMORY_WB;
+
+	/* descriptor for firmware emulator: */
+	md = &efi_memmap[1];
+	md->type = EFI_RUNTIME_SERVICES_DATA;
+	md->pad = 0;
+	md->phys_addr = 1*MB;
+	md->virt_addr = 0;
+	md->num_pages = (1*MB) >> 12;	/* 1MB (in 4KB pages) */
+	md->attribute = EFI_MEMORY_WB;
+
+	/* descriptor for high memory (>4GB): */
+	md = &efi_memmap[2];
+	md->type = EFI_CONVENTIONAL_MEMORY;
+	md->pad = 0;
+	md->phys_addr = 4096*MB;
+	md->virt_addr = 0;
+	md->num_pages = (32*MB) >> 12;	/* 32MB (in 4KB pages) */
+	md->attribute = EFI_MEMORY_WB;
+
+	bp = id(ZERO_PAGE_ADDR);
+	bp->efi_systab = __pa(&fw_mem);
+	bp->efi_memmap = __pa(efi_memmap);
+	bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
+	bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
+	bp->efi_memdesc_version = 1;
+	bp->command_line = __pa(cmd_line);
+	bp->console_info.num_cols = 80;
+	bp->console_info.num_rows = 25;
+	bp->console_info.orig_x = 0;
+	bp->console_info.orig_y = 24;
+	bp->num_pci_vectors = 0;
+	bp->fpswa = 0;
+}
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
new file mode 100644
index 000000000..24dc10ee4
--- /dev/null
+++ b/arch/ia64/kernel/gate.S
@@ -0,0 +1,200 @@
+/*
+ * This file contains the code that gets mapped at the upper end of
+ * each task's text region.  For now, it contains the signal
+ * trampoline code only.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/offsets.h>
+#include <asm/sigcontext.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+
+	.psr abi64
+	.psr lsb
+	.lsb
+
+	.section __gate_section,"ax"
+
+	.align PAGE_SIZE
+
+#	define SIGINFO_OFF	16
+#	define SIGCONTEXT_OFF	(SIGINFO_OFF + ((IA64_SIGINFO_SIZE + 15) & ~15))
+#	define FLAGS_OFF	IA64_SIGCONTEXT_FLAGS_OFFSET
+#	define CFM_OFF		IA64_SIGCONTEXT_CFM_OFFSET
+#	define FR6_OFF		IA64_SIGCONTEXT_FR6_OFFSET
+#	define BSP_OFF		IA64_SIGCONTEXT_AR_BSP_OFFSET
+#	define RNAT_OFF		IA64_SIGCONTEXT_AR_RNAT_OFFSET
+#	define base0		r2
+#	define base1		r3
+	/*
+	 * When we get here, the memory stack looks like this:
+	 *
+	 *   +===============================+
+       	 *   |				     |
+       	 *   //	    struct sigcontext        //
+       	 *   |				     |
+	 *   +===============================+ <-- sp+SIGCONTEXT_OFF
+       	 *   |				     |
+	 *   //     rest of siginfo    	     //
+       	 *   | 			   	     |
+       	 *   +               +---------------+
+       	 *   | 		     | siginfo.code  |
+	 *   +---------------+---------------+
+	 *   | siginfo.errno | siginfo.signo |
+	 *   +-------------------------------+ <-- sp+SIGINFO_OFF
+	 *   |      16 byte of scratch       |
+	 *   |            space              |
+	 *   +-------------------------------+ <-- sp
+	 *
+	 * The register stack looks _exactly_ the way it looked at the
+	 * time the signal occurred.  In other words, we're treading
+	 * on a potential mine-field: each incoming general register
+	 * may be a NaT value (includeing sp, in which case the process
+	 * ends up dying with a SIGSEGV).
+	 *
+	 * The first need to do is a cover to get the registers onto
+	 * the backing store.  Once that is done, we invoke the signal
+	 * handler which may modify some of the machine state.  After
+	 * returning from the signal handler, we return control to the
+	 * previous context by executing a sigreturn system call.  A
+	 * signal handler may call the rt_sigreturn() function to
+	 * directly return to a given sigcontext.  However, the
+	 * user-level sigreturn() needs to do much more than calling
+	 * the rt_sigreturn() system call as it needs to unwind the
+	 * stack to restore preserved registers that may have been
+	 * saved on the signal handler's call stack.
+	 *
+	 * On entry:
+	 *	r2	= signal number
+	 *	r3	= plabel of signal handler
+	 *	r15	= new register backing store (ignored)
+	 *	[sp+16] = sigframe
+	 */
+
+	.global ia64_sigtramp
+	.proc ia64_sigtramp
+ia64_sigtramp:
+	ld8 r10=[r3],8				// get signal handler entry point
+	br.call.sptk.many rp=invoke_sighandler
+.ret0:	mov r15=__NR_rt_sigreturn
+	break __BREAK_SYSCALL
+	.endp ia64_sigramp
+
+	.proc invoke_sighandler
+invoke_sighandler:
+	ld8 gp=[r3]			// get signal handler's global pointer
+	mov b6=r10
+	cover				// push args in interrupted frame onto backing store
+	;;
+	alloc r8=ar.pfs,0,1,3,0		// get CFM0, EC0, and CPL0 into r8
+	mov r17=ar.bsp			// fetch ar.bsp
+	mov loc0=rp			// save return pointer
+	;;
+	cmp.ne p8,p0=r15,r0		// do we need to switch the rbs?
+	mov out0=r2			// signal number
+(p8)	br.cond.spnt.few setup_rbs	// yup -> (clobbers r14 and r16)
+back_from_setup_rbs:
+	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+	;;
+	st8 [base0]=r17,(CFM_OFF-BSP_OFF)	// save sc_ar_bsp
+	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+	;;
+
+	st8 [base0]=r8				// save CFM0, EC0, and CPL0
+	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+	;;
+	stf.spill [base0]=f6,32
+	stf.spill [base1]=f7,32
+	;;
+	stf.spill [base0]=f8,32
+	stf.spill [base1]=f9,32
+	;;
+	stf.spill [base0]=f10,32
+	stf.spill [base1]=f11,32
+	adds out1=SIGINFO_OFF,sp	// siginfo pointer
+	;;
+	stf.spill [base0]=f12,32
+	stf.spill [base1]=f13,32
+	adds out2=SIGCONTEXT_OFF,sp	// sigcontext pointer
+	;;
+	stf.spill [base0]=f14,32
+	stf.spill [base1]=f15,32
+	br.call.sptk.few rp=b6			// call the signal handler
+.ret2:	adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+	;;
+	ld8 r15=[base0],(CFM_OFF-BSP_OFF)	// fetch sc_ar_bsp and advance to CFM_OFF
+	mov r14=ar.bsp
+	;;
+	ld8 r8=[base0]				// restore (perhaps modified) CFM0, EC0, and CPL0
+	cmp.ne p8,p0=r14,r15			// do we need to restore the rbs?
+(p8)	br.cond.spnt.few restore_rbs		// yup -> (clobbers r14 and r16)
+back_from_restore_rbs:
+	{
+	  and r9=0x7f,r8			// r9  <- CFM0.sof
+	  extr.u r10=r8,7,7			// r10 <- CFM0.sol
+	  mov r11=ip
+	}
+	;;
+	adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+	adds r11=(cont-back_from_restore_rbs),r11
+	sub r9=r9,r10				// r9 <- CFM0.sof - CFM0.sol == CFM0.nout
+	;;
+	adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+	dep r9=r9,r9,7,7			// r9.sol = r9.sof
+	mov b6=r11
+	;;
+	ldf.fill f6=[base0],32
+	ldf.fill f7=[base1],32
+	mov rp=loc0			// copy return pointer out of stacked register
+	;;
+	ldf.fill f8=[base0],32
+	ldf.fill f9=[base1],32
+	;;
+	ldf.fill f10=[base0],32
+	ldf.fill f11=[base1],32
+	;;
+	ldf.fill f12=[base0],32
+	ldf.fill f13=[base1],32
+	mov ar.pfs=r9
+	;;
+	ldf.fill f14=[base0],32
+	ldf.fill f15=[base1],32
+	br.ret.sptk.few b6
+cont:	mov ar.pfs=r8				// ar.pfs = CFM0
+	br.ret.sptk.few rp			// re-establish CFM0
+	.endp invoke_signal_handler
+
+	.proc setup_rbs
+setup_rbs:
+	flushrs					// must be first in insn
+	;;
+	mov ar.rsc=r0				// put RSE into enforced lazy mode
+	adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp
+	mov r14=ar.rnat				// get rnat as updated by flushrs
+	;;
+	mov ar.bspstore=r15			// set new register backing store area
+	st8 [r16]=r14				// save sc_ar_rnat
+	;;
+	mov ar.rsc=0xf				// set RSE into eager mode, pl 3
+	invala					// invalidate ALAT
+	br.cond.sptk.many back_from_setup_rbs
+
+	.proc restore_rbs
+restore_rbs:
+	flushrs
+	mov ar.rsc=r0				// put RSE into enforced lazy mode
+	adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp
+	;;
+	ld8 r14=[r16]				// get new rnat
+	mov ar.bspstore=r15			// set old register backing store area
+	;;
+	mov ar.rnat=r14				// establish new rnat
+	mov ar.rsc=0xf				// (will be restored later on from sc_ar_rsc)
+	// invala not necessary as that will happen when returning to user-mode
+	br.cond.sptk.many back_from_restore_rbs
+
+	.endp restore_rbs
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
new file mode 100644
index 000000000..50d965e02
--- /dev/null
+++ b/arch/ia64/kernel/head.S
@@ -0,0 +1,646 @@
+/*
+ * Here is where the ball gets rolling as far as the kernel is concerned.
+ * When control is transferred to _start, the bootload has already
+ * loaded us to the correct address.  All that's left to do here is
+ * to set up the kernel's global pointer and jump to the kernel
+ * entry point.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/fpu.h>
+#include <asm/pal.h>
+#include <asm/offsets.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+	.psr abi64
+	.psr lsb
+	.lsb
+
+	.section __special_page_section,"ax"
+
+	.global empty_zero_page
+empty_zero_page:
+	.skip PAGE_SIZE
+
+	.global swapper_pg_dir
+swapper_pg_dir:
+	.skip PAGE_SIZE
+
+	.global empty_bad_page
+empty_bad_page:
+	.skip PAGE_SIZE
+
+	.global empty_bad_pte_table
+empty_bad_pte_table:
+	.skip PAGE_SIZE
+
+	.global empty_bad_pmd_table
+empty_bad_pmd_table:
+	.skip PAGE_SIZE
+
+	.rodata
+halt_msg:
+	stringz "Halting kernel\n"
+
+	.text
+	.align 16
+	.global _start
+	.proc _start
+_start:
+	// set IVT entry point---can't access I/O ports without it
+	movl r3=ia64_ivt
+	;;
+	mov cr.iva=r3
+	movl r2=FPSR_DEFAULT
+	;;
+	srlz.i
+	movl gp=__gp
+
+	mov ar.fpsr=r2
+	;;
+
+#ifdef CONFIG_IA64_EARLY_PRINTK
+	mov r2=6
+	mov r3=(8<<8) | (28<<2)
+	;;
+	mov rr[r2]=r3
+	;;
+	srlz.i
+	;;
+#endif
+
+#define isAP	p2	// are we booting an Application Processor (not the BSP)?
+
+	// Find the init_task for the currently booting CPU.  At poweron, and in
+	// UP mode, cpu_now_booting is 0
+	movl r3=cpu_now_booting
+ 	;;
+	ld4 r3=[r3]
+	movl r2=init_tasks
+	;; 
+	shladd r2=r3,3,r2
+	;;
+	ld8 r2=[r2]
+	cmp4.ne isAP,p0=r3,r0	// p9 == true if this is an application processor (ap)
+	;;			// RAW on r2
+	extr r3=r2,0,61		// r3 == phys addr of task struct
+	;;
+
+	// load the "current" pointer (r13) and ar.k6 with the current task 
+	mov r13=r2
+	mov ar.k6=r3		// Physical address
+	;;
+	/*
+	 * Reserve space at the top of the stack for "struct pt_regs".  Kernel threads
+	 * don't store interesting values in that structure, but the space still needs
+	 * to be there because time-critical stuff such as the context switching can
+	 * be implemented more efficiently (for example, __switch_to()
+	 * always sets the psr.dfh bit of the task it is switching to).
+	 */
+	addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
+	addl r2=IA64_RBS_OFFSET,r2	// initialize the RSE
+	mov ar.rsc=r0		// place RSE in enforced lazy mode
+	;;
+	mov ar.bspstore=r2	// establish the new RSE stack
+	;;
+	loadrs			// load zero bytes from the register stack
+	;;
+	mov ar.rsc=0x3		// place RSE in eager mode
+	;;
+
+#ifdef CONFIG_IA64_EARLY_PRINTK
+	.rodata
+alive_msg:
+	stringz "I'm alive and well\n"
+	.previous
+
+	alloc r2=ar.pfs,0,0,2,0
+	movl out0=alive_msg
+	;;
+	br.call.sptk.few rp=early_printk
+1:	// force new bundle
+#endif /* CONFIG_IA64_EARLY_PRINTK */
+
+	alloc r2=ar.pfs,8,0,2,0
+#ifdef CONFIG_SMP
+(isAP)	br.call.sptk.few rp=smp_callin
+.ret1:
+(isAP)	br.cond.sptk.few self
+#endif
+
+#undef isAP
+
+	// This is executed by the bootstrap processor (bsp) only:
+
+#ifdef CONFIG_IA64_FW_EMU
+	// initialize PAL & SAL emulator:
+	br.call.sptk.few rp=sys_fw_init
+	;;
+#endif
+	br.call.sptk.few rp=start_kernel
+.ret2:
+	addl r2=@ltoff(halt_msg),gp
+	;;
+	ld8 out0=[r2]
+	br.call.sptk.few b0=console_print
+self:	br.sptk.few self		// endless loop
+	.endp _start
+
+	.align 16
+	.global ia64_save_debug_regs
+	.proc ia64_save_debug_regs
+ia64_save_debug_regs:
+	alloc r16=ar.pfs,1,0,0,0
+	mov r20=ar.lc			// preserve ar.lc
+	mov ar.lc=IA64_NUM_DBG_REGS-1
+	mov r18=0
+	add r19=IA64_NUM_DBG_REGS*8,in0
+	;;
+1:	mov r16=dbr[r18]
+	mov r17=ibr[r18]
+	add r18=1,r18
+	;;
+	st8.nta [in0]=r16,8
+	st8.nta [r19]=r17,8
+	br.cloop.sptk.few 1b
+
+	mov ar.lc=r20			// restore ar.lc
+	br.ret.sptk.few b0
+	.endp ia64_save_debug_regs
+
+	.align 16
+	.global ia64_load_debug_regs
+	.proc ia64_load_debug_regs
+ia64_load_debug_regs:
+	alloc r16=ar.pfs,1,0,0,0
+	lfetch.nta [in0]
+	mov r20=ar.lc			// preserve ar.lc
+	add r19=IA64_NUM_DBG_REGS*8,in0
+	mov ar.lc=IA64_NUM_DBG_REGS-1
+	mov r18=-1
+	;;
+1:	ld8.nta r16=[in0],8
+	ld8.nta r17=[r19],8
+	add r18=1,r18
+	;;
+	mov dbr[r18]=r16
+	mov ibr[r18]=r17
+	br.cloop.sptk.few 1b
+
+	mov ar.lc=r20			// restore ar.lc
+	br.ret.sptk.few b0
+	.endp ia64_load_debug_regs
+
+	.align 16
+	.global __ia64_save_fpu
+	.proc __ia64_save_fpu
+__ia64_save_fpu:
+	alloc r2=ar.pfs,1,0,0,0
+	adds r3=16,in0
+	;;
+	stf.spill.nta [in0]=f32,32
+	stf.spill.nta [ r3]=f33,32
+	;;
+	stf.spill.nta [in0]=f34,32
+	stf.spill.nta [ r3]=f35,32
+	;;
+	stf.spill.nta [in0]=f36,32
+	stf.spill.nta [ r3]=f37,32
+	;;
+	stf.spill.nta [in0]=f38,32
+	stf.spill.nta [ r3]=f39,32
+	;;
+	stf.spill.nta [in0]=f40,32
+	stf.spill.nta [ r3]=f41,32
+	;;
+	stf.spill.nta [in0]=f42,32
+	stf.spill.nta [ r3]=f43,32
+	;;
+	stf.spill.nta [in0]=f44,32
+	stf.spill.nta [ r3]=f45,32
+	;;
+	stf.spill.nta [in0]=f46,32
+	stf.spill.nta [ r3]=f47,32
+	;;
+	stf.spill.nta [in0]=f48,32
+	stf.spill.nta [ r3]=f49,32
+	;;
+	stf.spill.nta [in0]=f50,32
+	stf.spill.nta [ r3]=f51,32
+	;;
+	stf.spill.nta [in0]=f52,32
+	stf.spill.nta [ r3]=f53,32
+	;;
+	stf.spill.nta [in0]=f54,32
+	stf.spill.nta [ r3]=f55,32
+	;;
+	stf.spill.nta [in0]=f56,32
+	stf.spill.nta [ r3]=f57,32
+	;;
+	stf.spill.nta [in0]=f58,32
+	stf.spill.nta [ r3]=f59,32
+	;;
+	stf.spill.nta [in0]=f60,32
+	stf.spill.nta [ r3]=f61,32
+	;;
+	stf.spill.nta [in0]=f62,32
+	stf.spill.nta [ r3]=f63,32
+	;;
+	stf.spill.nta [in0]=f64,32
+	stf.spill.nta [ r3]=f65,32
+	;;
+	stf.spill.nta [in0]=f66,32
+	stf.spill.nta [ r3]=f67,32
+	;;
+	stf.spill.nta [in0]=f68,32
+	stf.spill.nta [ r3]=f69,32
+	;;
+	stf.spill.nta [in0]=f70,32
+	stf.spill.nta [ r3]=f71,32
+	;;
+	stf.spill.nta [in0]=f72,32
+	stf.spill.nta [ r3]=f73,32
+	;;
+	stf.spill.nta [in0]=f74,32
+	stf.spill.nta [ r3]=f75,32
+	;;
+	stf.spill.nta [in0]=f76,32
+	stf.spill.nta [ r3]=f77,32
+	;;
+	stf.spill.nta [in0]=f78,32
+	stf.spill.nta [ r3]=f79,32
+	;;
+	stf.spill.nta [in0]=f80,32
+	stf.spill.nta [ r3]=f81,32
+	;;
+	stf.spill.nta [in0]=f82,32
+	stf.spill.nta [ r3]=f83,32
+	;;
+	stf.spill.nta [in0]=f84,32
+	stf.spill.nta [ r3]=f85,32
+	;;
+	stf.spill.nta [in0]=f86,32
+	stf.spill.nta [ r3]=f87,32
+	;;
+	stf.spill.nta [in0]=f88,32
+	stf.spill.nta [ r3]=f89,32
+	;;
+	stf.spill.nta [in0]=f90,32
+	stf.spill.nta [ r3]=f91,32
+	;;
+	stf.spill.nta [in0]=f92,32
+	stf.spill.nta [ r3]=f93,32
+	;;
+	stf.spill.nta [in0]=f94,32
+	stf.spill.nta [ r3]=f95,32
+	;;
+	stf.spill.nta [in0]=f96,32
+	stf.spill.nta [ r3]=f97,32
+	;;
+	stf.spill.nta [in0]=f98,32
+	stf.spill.nta [ r3]=f99,32
+	;;
+	stf.spill.nta [in0]=f100,32
+	stf.spill.nta [ r3]=f101,32
+	;;
+	stf.spill.nta [in0]=f102,32
+	stf.spill.nta [ r3]=f103,32
+	;;
+	stf.spill.nta [in0]=f104,32
+	stf.spill.nta [ r3]=f105,32
+	;;
+	stf.spill.nta [in0]=f106,32
+	stf.spill.nta [ r3]=f107,32
+	;;
+	stf.spill.nta [in0]=f108,32
+	stf.spill.nta [ r3]=f109,32
+	;;
+	stf.spill.nta [in0]=f110,32
+	stf.spill.nta [ r3]=f111,32
+	;;
+	stf.spill.nta [in0]=f112,32
+	stf.spill.nta [ r3]=f113,32
+	;;
+	stf.spill.nta [in0]=f114,32
+	stf.spill.nta [ r3]=f115,32
+	;;
+	stf.spill.nta [in0]=f116,32
+	stf.spill.nta [ r3]=f117,32
+	;;
+	stf.spill.nta [in0]=f118,32
+	stf.spill.nta [ r3]=f119,32
+	;;
+	stf.spill.nta [in0]=f120,32
+	stf.spill.nta [ r3]=f121,32
+	;;
+	stf.spill.nta [in0]=f122,32
+	stf.spill.nta [ r3]=f123,32
+	;;
+	stf.spill.nta [in0]=f124,32
+	stf.spill.nta [ r3]=f125,32
+	;;
+	stf.spill.nta [in0]=f126,32
+	stf.spill.nta [ r3]=f127,32
+	br.ret.sptk.few rp
+	.endp __ia64_save_fpu
+
+	.align 16
+	.global __ia64_load_fpu
+	.proc __ia64_load_fpu
+__ia64_load_fpu:
+	alloc r2=ar.pfs,1,0,0,0
+	adds r3=16,in0
+	;;
+	ldf.fill.nta f32=[in0],32
+	ldf.fill.nta f33=[ r3],32
+	;;
+	ldf.fill.nta f34=[in0],32
+	ldf.fill.nta f35=[ r3],32
+	;;
+	ldf.fill.nta f36=[in0],32
+	ldf.fill.nta f37=[ r3],32
+	;;
+	ldf.fill.nta f38=[in0],32
+	ldf.fill.nta f39=[ r3],32
+	;;
+	ldf.fill.nta f40=[in0],32
+	ldf.fill.nta f41=[ r3],32
+	;;
+	ldf.fill.nta f42=[in0],32
+	ldf.fill.nta f43=[ r3],32
+	;;
+	ldf.fill.nta f44=[in0],32
+	ldf.fill.nta f45=[ r3],32
+	;;
+	ldf.fill.nta f46=[in0],32
+	ldf.fill.nta f47=[ r3],32
+	;;
+	ldf.fill.nta f48=[in0],32
+	ldf.fill.nta f49=[ r3],32
+	;;
+	ldf.fill.nta f50=[in0],32
+	ldf.fill.nta f51=[ r3],32
+	;;
+	ldf.fill.nta f52=[in0],32
+	ldf.fill.nta f53=[ r3],32
+	;;
+	ldf.fill.nta f54=[in0],32
+	ldf.fill.nta f55=[ r3],32
+	;;
+	ldf.fill.nta f56=[in0],32
+	ldf.fill.nta f57=[ r3],32
+	;;
+	ldf.fill.nta f58=[in0],32
+	ldf.fill.nta f59=[ r3],32
+	;;
+	ldf.fill.nta f60=[in0],32
+	ldf.fill.nta f61=[ r3],32
+	;;
+	ldf.fill.nta f62=[in0],32
+	ldf.fill.nta f63=[ r3],32
+	;;
+	ldf.fill.nta f64=[in0],32
+	ldf.fill.nta f65=[ r3],32
+	;;
+	ldf.fill.nta f66=[in0],32
+	ldf.fill.nta f67=[ r3],32
+	;;
+	ldf.fill.nta f68=[in0],32
+	ldf.fill.nta f69=[ r3],32
+	;;
+	ldf.fill.nta f70=[in0],32
+	ldf.fill.nta f71=[ r3],32
+	;;
+	ldf.fill.nta f72=[in0],32
+	ldf.fill.nta f73=[ r3],32
+	;;
+	ldf.fill.nta f74=[in0],32
+	ldf.fill.nta f75=[ r3],32
+	;;
+	ldf.fill.nta f76=[in0],32
+	ldf.fill.nta f77=[ r3],32
+	;;
+	ldf.fill.nta f78=[in0],32
+	ldf.fill.nta f79=[ r3],32
+	;;
+	ldf.fill.nta f80=[in0],32
+	ldf.fill.nta f81=[ r3],32
+	;;
+	ldf.fill.nta f82=[in0],32
+	ldf.fill.nta f83=[ r3],32
+	;;
+	ldf.fill.nta f84=[in0],32
+	ldf.fill.nta f85=[ r3],32
+	;;
+	ldf.fill.nta f86=[in0],32
+	ldf.fill.nta f87=[ r3],32
+	;;
+	ldf.fill.nta f88=[in0],32
+	ldf.fill.nta f89=[ r3],32
+	;;
+	ldf.fill.nta f90=[in0],32
+	ldf.fill.nta f91=[ r3],32
+	;;
+	ldf.fill.nta f92=[in0],32
+	ldf.fill.nta f93=[ r3],32
+	;;
+	ldf.fill.nta f94=[in0],32
+	ldf.fill.nta f95=[ r3],32
+	;;
+	ldf.fill.nta f96=[in0],32
+	ldf.fill.nta f97=[ r3],32
+	;;
+	ldf.fill.nta f98=[in0],32
+	ldf.fill.nta f99=[ r3],32
+	;;
+	ldf.fill.nta f100=[in0],32
+	ldf.fill.nta f101=[ r3],32
+	;;
+	ldf.fill.nta f102=[in0],32
+	ldf.fill.nta f103=[ r3],32
+	;;
+	ldf.fill.nta f104=[in0],32
+	ldf.fill.nta f105=[ r3],32
+	;;
+	ldf.fill.nta f106=[in0],32
+	ldf.fill.nta f107=[ r3],32
+	;;
+	ldf.fill.nta f108=[in0],32
+	ldf.fill.nta f109=[ r3],32
+	;;
+	ldf.fill.nta f110=[in0],32
+	ldf.fill.nta f111=[ r3],32
+	;;
+	ldf.fill.nta f112=[in0],32
+	ldf.fill.nta f113=[ r3],32
+	;;
+	ldf.fill.nta f114=[in0],32
+	ldf.fill.nta f115=[ r3],32
+	;;
+	ldf.fill.nta f116=[in0],32
+	ldf.fill.nta f117=[ r3],32
+	;;
+	ldf.fill.nta f118=[in0],32
+	ldf.fill.nta f119=[ r3],32
+	;;
+	ldf.fill.nta f120=[in0],32
+	ldf.fill.nta f121=[ r3],32
+	;;
+	ldf.fill.nta f122=[in0],32
+	ldf.fill.nta f123=[ r3],32
+	;;
+	ldf.fill.nta f124=[in0],32
+	ldf.fill.nta f125=[ r3],32
+	;;
+	ldf.fill.nta f126=[in0],32
+	ldf.fill.nta f127=[ r3],32
+	br.ret.sptk.few rp
+	.endp __ia64_load_fpu
+
+	.align 16
+	.global __ia64_init_fpu
+	.proc __ia64_init_fpu
+__ia64_init_fpu:
+	alloc r2=ar.pfs,0,0,0,0
+	stf.spill [sp]=f0
+	mov      f32=f0
+	;;
+	ldf.fill f33=[sp]
+	ldf.fill f34=[sp]
+	mov      f35=f0
+	;;
+	ldf.fill f36=[sp]
+	ldf.fill f37=[sp]
+	mov      f38=f0
+	;;
+	ldf.fill f39=[sp]
+	ldf.fill f40=[sp]
+	mov      f41=f0
+	;;
+	ldf.fill f42=[sp]
+	ldf.fill f43=[sp]
+	mov      f44=f0
+	;;
+	ldf.fill f45=[sp]
+	ldf.fill f46=[sp]
+	mov      f47=f0
+	;;
+	ldf.fill f48=[sp]
+	ldf.fill f49=[sp]
+	mov      f50=f0
+	;;
+	ldf.fill f51=[sp]
+	ldf.fill f52=[sp]
+	mov      f53=f0
+	;;
+	ldf.fill f54=[sp]
+	ldf.fill f55=[sp]
+	mov      f56=f0
+	;;
+	ldf.fill f57=[sp]
+	ldf.fill f58=[sp]
+	mov      f59=f0
+	;;
+	ldf.fill f60=[sp]
+	ldf.fill f61=[sp]
+	mov      f62=f0
+	;;
+	ldf.fill f63=[sp]
+	ldf.fill f64=[sp]
+	mov      f65=f0
+	;;
+	ldf.fill f66=[sp]
+	ldf.fill f67=[sp]
+	mov      f68=f0
+	;;
+	ldf.fill f69=[sp]
+	ldf.fill f70=[sp]
+	mov      f71=f0
+	;;
+	ldf.fill f72=[sp]
+	ldf.fill f73=[sp]
+	mov      f74=f0
+	;;
+	ldf.fill f75=[sp]
+	ldf.fill f76=[sp]
+	mov      f77=f0
+	;;
+	ldf.fill f78=[sp]
+	ldf.fill f79=[sp]
+	mov      f80=f0
+	;;
+	ldf.fill f81=[sp]
+	ldf.fill f82=[sp]
+	mov      f83=f0
+	;;
+	ldf.fill f84=[sp]
+	ldf.fill f85=[sp]
+	mov      f86=f0
+	;;
+	ldf.fill f87=[sp]
+	ldf.fill f88=[sp]
+	mov      f89=f0
+	;;
+	ldf.fill f90=[sp]
+	ldf.fill f91=[sp]
+	mov      f92=f0
+	;;
+	ldf.fill f93=[sp]
+	ldf.fill f94=[sp]
+	mov      f95=f0
+	;;
+	ldf.fill f96=[sp]
+	ldf.fill f97=[sp]
+	mov      f98=f0
+	;;
+	ldf.fill f99=[sp]
+	ldf.fill f100=[sp]
+	mov      f101=f0
+	;;
+	ldf.fill f102=[sp]
+	ldf.fill f103=[sp]
+	mov      f104=f0
+	;;
+	ldf.fill f105=[sp]
+	ldf.fill f106=[sp]
+	mov      f107=f0
+	;;
+	ldf.fill f108=[sp]
+	ldf.fill f109=[sp]
+	mov      f110=f0
+	;;
+	ldf.fill f111=[sp]
+	ldf.fill f112=[sp]
+	mov      f113=f0
+	;;
+	ldf.fill f114=[sp]
+	ldf.fill f115=[sp]
+	mov      f116=f0
+	;;
+	ldf.fill f117=[sp]
+	ldf.fill f118=[sp]
+	mov      f119=f0
+	;;
+	ldf.fill f120=[sp]
+	ldf.fill f121=[sp]
+	mov      f122=f0
+	;;
+	ldf.fill f123=[sp]
+	ldf.fill f124=[sp]
+	mov      f125=f0
+	;;
+	ldf.fill f126=[sp]
+	mov      f127=f0
+	br.ret.sptk.few rp
+	.endp __ia64_init_fpu
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
new file mode 100644
index 000000000..122650461
--- /dev/null
+++ b/arch/ia64/kernel/init_task.c
@@ -0,0 +1,31 @@
+/*
+ * This is where we statically allocate and initialize the initial
+ * task.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct vm_area_struct init_mmap = INIT_MMAP;
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS;
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is page aligned due to the way
+ * process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union task_union init_task_union
+	__attribute__((section("init_task"))) =
+		{ INIT_TASK(init_task_union.task) };
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
new file mode 100644
index 000000000..01c201137
--- /dev/null
+++ b/arch/ia64/kernel/irq.c
@@ -0,0 +1,657 @@
+/*
+ * linux/arch/ia64/kernel/irq.c
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ *  6/10/99: Updated to bring in sync with x86 version to facilitate
+ *	     support for SMP and different interrupt controllers.
+ */
+
+#include <linux/config.h>
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/malloc.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>	/* for rand_initialize_irq() */
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/threads.h>
+
+#ifdef CONFIG_KDB
+# include <linux/kdb.h>
+#endif
+
+#include <asm/bitops.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+/* This is used to detect bad usage of probe_irq_on()/probe_irq_off().  */
+#define PROBE_IRQ_COOKIE	0xfeedC0FFEE
+
+struct irq_desc irq_desc[NR_IRQS];
+
+/*
+ * Micro-access to controllers is serialized over the whole
+ * system. We never hold this lock when we call the actual
+ * IRQ handler.
+ */
+spinlock_t irq_controller_lock;
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+spinlock_t ivr_read_lock;
+#endif
+
+unsigned int local_bh_count[NR_CPUS];
+/*
+ * used in irq_enter()/irq_exit()
+ */
+unsigned int local_irq_count[NR_CPUS];
+
+static struct irqaction timer_action = { NULL, 0, 0, NULL, NULL, NULL};
+
+#ifdef CONFIG_SMP
+static struct irqaction ipi_action = { NULL, 0, 0, NULL, NULL, NULL};
+#endif
+
+/*
+ * Legacy IRQ to IA-64 vector translation table.  Any vector not in
+ * this table maps to itself (ie: irq 0x30 => IA64 vector 0x30)
+ */
+__u8 irq_to_vector_map[IA64_MIN_VECTORED_IRQ] = {
+	/* 8259 IRQ translation, first 16 entries */
+	TIMER_IRQ, 0x50, 0x0f, 0x51, 0x52, 0x53, 0x43, 0x54,
+	0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x40, 0x41,
+};
+
+/*
+ * Reverse of the above table.
+ */
+static __u8 vector_to_legacy_map[256];
+
+/*
+ * used by proc fs (/proc/interrupts)
+ */
+int
+get_irq_list (char *buf)
+{
+	int i;
+	struct irqaction * action;
+	char *p = buf;
+
+#ifdef CONFIG_SMP
+	p += sprintf(p, "           ");
+	for (i = 0; i < smp_num_cpus; i++)
+		p += sprintf(p, "CPU%d       ", i);
+	*p++ = '\n';
+#endif
+	/*
+	 * Simply scans the external vectored interrupts
+	 */
+	for (i = 0; i < NR_IRQS; i++) {
+		action = irq_desc[i].action;
+		if (!action) 
+			continue;
+		p += sprintf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+		p += sprintf(p, "%10u ", kstat_irqs(i));
+#else
+		{
+			int j;
+			for (j = 0; j < smp_num_cpus; j++)
+				p += sprintf(p, "%10u ",
+					     kstat.irqs[cpu_logical_map(j)][i]);
+		}
+#endif
+		p += sprintf(p, " %14s", irq_desc[i].handler->typename);
+		p += sprintf(p, "  %c%s", (action->flags & SA_INTERRUPT) ? '+' : ' ',
+			     action->name);
+
+		for (action = action->next; action; action = action->next) {
+			p += sprintf(p, ", %c%s",
+				     (action->flags & SA_INTERRUPT)?'+':' ',
+				     action->name);
+		}
+		*p++ = '\n';
+	}
+	return p - buf;
+}
+
+/*
+ * That's where the IVT branches when we get an external
+ * interrupt. This branches to the correct hardware IRQ handler via
+ * function ptr.
+ */
+void
+ia64_handle_irq (unsigned long irq, struct pt_regs *regs)
+{
+	unsigned long bsp, sp, saved_tpr;
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+# ifndef CONFIG_SMP
+	static unsigned int max_prio = 0;
+# endif
+	unsigned int prev_prio;
+	unsigned long eoi_ptr;
+ 
+# ifdef CONFIG_USB
+	disable_usb();
+# endif
+	/*
+	 * Stop IPIs by getting the ivr_read_lock
+	 */
+	spin_lock(&ivr_read_lock);
+
+	/*
+	 * Disable PCI writes
+	 */
+	outl(0x80ff81c0, 0xcf8);
+	outl(0x73002188, 0xcfc);
+	eoi_ptr = inl(0xcfc);
+
+	irq = ia64_get_ivr();
+
+	/*
+	 * Enable PCI writes
+	 */
+	outl(0x73182188, 0xcfc);
+
+	spin_unlock(&ivr_read_lock);
+
+# ifdef CONFIG_USB
+	reenable_usb();
+# endif
+
+# ifndef CONFIG_SMP
+	prev_prio = max_prio;
+	if (irq < max_prio) {
+		printk ("ia64_handle_irq: got irq %lu while %u was in progress!\n",
+			irq, max_prio);
+		
+	} else
+		max_prio = irq;
+# endif /* !CONFIG_SMP */
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+	/* Always set TPR to limit maximum interrupt nesting depth to
+	 * 16 (without this, it would be ~240, which could easily lead
+	 * to kernel stack overflows.
+	 */
+	saved_tpr = ia64_get_tpr();
+	ia64_srlz_d();
+	ia64_set_tpr(irq);
+	ia64_srlz_d();
+
+	asm ("mov %0=ar.bsp" : "=r"(bsp));
+	asm ("mov %0=sp" : "=r"(sp));
+
+	if ((sp - bsp) < 1024) {
+		static long last_time;
+		static unsigned char count;
+
+		if (count > 5 && jiffies - last_time > 5*HZ)
+			count = 0;
+		if (++count < 5) {
+			last_time = jiffies;
+			printk("ia64_handle_irq: DANGER: less than 1KB of free stack space!!\n"
+			       "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
+		}
+#ifdef CONFIG_KDB
+		kdb(KDB_REASON_PANIC, 0, regs);
+#endif		
+	}
+
+	/*
+	 * The interrupt is now said to be in service
+	 */
+	if (irq >= NR_IRQS) {
+		printk("handle_irq: invalid irq=%lu\n", irq);
+		goto out;
+	}
+
+	++kstat.irqs[smp_processor_id()][irq];
+
+	if (irq == IA64_SPURIOUS_INT) {
+		printk("handle_irq: spurious interrupt\n");
+		goto out;
+	}
+
+	/* 
+	 * Handle the interrupt by calling the hardware specific handler (IOSAPIC, Internal, etc).
+	 */
+	(*irq_desc[irq].handler->handle)(irq, regs);
+  out:
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+	{
+		long pEOI;
+
+		asm ("mov %0=0;; (p1) mov %0=1" : "=r"(pEOI));
+		if (!pEOI) {
+			printk("Yikes: ia64_handle_irq() without pEOI!!\n");
+			asm volatile ("cmp.eq p1,p0=r0,r0" : "=r"(pEOI));
+# ifdef CONFIG_KDB
+			kdb(KDB_REASON_PANIC, 0, regs);
+# endif
+		}
+	}
+
+	local_irq_disable();
+# ifndef CONFIG_SMP
+	if (max_prio == irq)
+		max_prio = prev_prio;
+# endif /* !CONFIG_SMP */
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+	ia64_srlz_d();
+	ia64_set_tpr(saved_tpr);
+	ia64_srlz_d();
+}
+
+
+/*
+ * This should really return information about whether we should do
+ * bottom half handling etc. Right now we end up _always_ checking the
+ * bottom half, which is a waste of time and is not what some drivers
+ * would prefer.
+ */
+int
+invoke_irq_handlers (unsigned int irq, struct pt_regs *regs, struct irqaction *action)
+{
+	void (*handler)(int, void *, struct pt_regs *);
+	unsigned long flags, flags_union = 0;
+	int cpu = smp_processor_id();
+	unsigned int requested_irq;
+	void *dev_id;
+
+	irq_enter(cpu, irq);
+
+	if ((action->flags & SA_INTERRUPT) == 0)
+		__sti();
+
+	do {
+		flags = action->flags;
+		requested_irq = irq;
+		if ((flags & SA_LEGACY) != 0)
+			requested_irq = vector_to_legacy_map[irq];
+		flags_union |= flags;
+		handler = action->handler;
+		dev_id = action->dev_id;
+		action = action->next;
+		(*handler)(requested_irq, dev_id, regs);
+	} while (action);
+	if ((flags_union & SA_SAMPLE_RANDOM) != 0)
+		add_interrupt_randomness(irq);
+	__cli();
+
+	irq_exit(cpu, irq);
+	return flags_union | 1;	/* force the "do bottom halves" bit */
+}
+
+void
+disable_irq_nosync (unsigned int irq)
+{
+	unsigned long flags;
+
+	irq = map_legacy_irq(irq);
+
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	if (irq_desc[irq].depth++ > 0) {
+		irq_desc[irq].status &= ~IRQ_ENABLED;
+		irq_desc[irq].handler->disable(irq);
+	}
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+/*
+ * Synchronous version of the above, making sure the IRQ is
+ * no longer running on any other IRQ..
+ */
+void
+disable_irq (unsigned int irq)
+{
+	disable_irq_nosync(irq);
+
+	irq = map_legacy_irq(irq);
+
+	if (!local_irq_count[smp_processor_id()]) {
+		do {
+			barrier();
+		} while ((irq_desc[irq].status & IRQ_INPROGRESS) != 0);
+	}
+}
+
+void
+enable_irq (unsigned int irq)
+{
+	unsigned long flags;
+
+	irq = map_legacy_irq(irq);
+
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	switch (irq_desc[irq].depth) {
+	      case 1:
+		irq_desc[irq].status |= IRQ_ENABLED;
+		(*irq_desc[irq].handler->enable)(irq);
+		/* fall through */
+	      default:
+		--irq_desc[irq].depth;
+		break;
+
+	      case 0:
+		printk("enable_irq: unbalanced from %p\n", __builtin_return_address(0));
+	}
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+/*
+ * This function encapsulates the initialization that needs to be
+ * performed under the protection of lock irq_controller_lock.  The
+ * lock must have been acquired by the time this is called.
+ */
+static inline int
+setup_irq (unsigned int irq, struct irqaction *new)
+{
+	int shared = 0;
+	struct irqaction *old, **p;
+
+	p = &irq_desc[irq].action;
+	old = *p;
+	if (old) {
+		if (!(old->flags & new->flags & SA_SHIRQ)) {
+			return -EBUSY;
+		}
+		/* add new interrupt at end of irq queue */
+		do {
+			p = &old->next;
+			old = *p;
+		} while (old);
+		shared = 1;
+	}
+	*p = new;
+
+	/* when sharing do not unmask */
+	if (!shared) {
+		irq_desc[irq].depth = 0;
+		irq_desc[irq].status |= IRQ_ENABLED;
+		(*irq_desc[irq].handler->startup)(irq);
+	}
+	return 0;
+}
+
+int
+request_irq (unsigned int requested_irq, void (*handler)(int, void *, struct pt_regs *),
+	     unsigned long irqflags, const char * devname, void *dev_id)
+{
+	int retval, need_kfree = 0;
+	struct irqaction *action;
+	unsigned long flags;
+	unsigned int irq;
+
+#ifdef IA64_DEBUG
+	printk("request_irq(0x%x) called\n", requested_irq);
+#endif
+	/*
+	 * Sanity-check: shared interrupts should REALLY pass in
+	 * a real dev-ID, otherwise we'll have trouble later trying
+	 * to figure out which interrupt is which (messes up the
+	 * interrupt freeing logic etc).
+	 */
+	if ((irqflags & SA_SHIRQ) && !dev_id)
+		printk("Bad boy: %s (at %p) called us without a dev_id!\n",
+		       devname, current_text_addr());
+
+	irq = map_legacy_irq(requested_irq);
+	if (irq != requested_irq)
+		irqflags |= SA_LEGACY;
+
+	if (irq >= NR_IRQS)
+		return -EINVAL;
+
+	if (!handler)
+		return -EINVAL;
+
+	/*
+	 * The timer_action and ipi_action cannot be allocated
+	 * dynamically because its initialization happens really early
+	 * on in init/main.c at this point the memory allocator has
+	 * not yet been initialized.  So we use a statically reserved
+	 * buffer for it. In some sense that's no big deal because we
+	 * need one no matter what.
+	 */
+	if (irq == TIMER_IRQ)
+		action = &timer_action;
+#ifdef CONFIG_SMP
+	else if (irq == IPI_IRQ)
+		action = &ipi_action;
+#endif
+	else {
+		action = kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+		need_kfree = 1;
+	}
+
+	if (!action)
+		return -ENOMEM;
+
+	action->handler = handler;
+	action->flags = irqflags;
+	action->mask = 0;
+	action->name = devname;
+	action->next = NULL;
+	action->dev_id = dev_id;
+
+	if ((irqflags & SA_SAMPLE_RANDOM) != 0)
+		rand_initialize_irq(irq);
+
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	retval = setup_irq(irq, action);
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+
+	if (need_kfree && retval)
+		kfree(action);
+
+	return retval;
+}
+		
+void
+free_irq (unsigned int irq, void *dev_id)
+{
+	struct irqaction *action, **p;
+	unsigned long flags;
+
+	/*
+	 * some sanity checks first
+	 */
+	if (irq >= NR_IRQS) {
+		printk("Trying to free IRQ%d\n",irq);
+		return;
+	}
+
+	irq = map_legacy_irq(irq);
+
+	/*
+	 * Find the corresponding irqaction
+	 */
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
+		if (action->dev_id != dev_id)
+			continue;
+
+		/* Found it - now remove it from the list of entries */
+		*p = action->next;
+		if (!irq_desc[irq].action) {
+			irq_desc[irq].status &= ~IRQ_ENABLED;
+			(*irq_desc[irq].handler->shutdown)(irq);
+		}
+
+		spin_unlock_irqrestore(&irq_controller_lock, flags);
+
+#ifdef CONFIG_SMP
+		/* Wait to make sure it's not being used on another CPU */
+		while (irq_desc[irq].status & IRQ_INPROGRESS)
+			barrier();
+#endif
+
+		if (action != &timer_action
+#ifdef CONFIG_SMP
+		    && action != &ipi_action
+#endif
+		   )
+			kfree(action);
+		return;
+	}
+	printk("Trying to free free IRQ%d\n", irq);
+}
+
+/*
+ * IRQ autodetection code.  Note that the return value of
+ * probe_irq_on() is no longer being used (it's role has been replaced
+ * by the IRQ_AUTODETECT flag).
+ */
+unsigned long
+probe_irq_on (void)
+{
+	struct irq_desc *id;
+	unsigned long delay;
+
+#ifdef IA64_DEBUG
+	printk("probe_irq_on() called\n");
+#endif
+
+	spin_lock_irq(&irq_controller_lock);
+	for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) {
+		if (!id->action) {
+			id->status |= IRQ_AUTODETECT | IRQ_WAITING;
+			(*id->handler->startup)(id - irq_desc);
+		}
+	}
+	spin_unlock_irq(&irq_controller_lock);
+
+	/* wait for spurious interrupts to trigger: */
+
+	for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+		/* about 100ms delay */
+		synchronize_irq();
+
+	/* filter out obviously spurious interrupts: */
+	spin_lock_irq(&irq_controller_lock);
+	for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) {
+		unsigned int status = id->status;
+
+		if (!(status & IRQ_AUTODETECT))
+			continue;
+
+		if (!(status & IRQ_WAITING)) {
+			id->status = status & ~IRQ_AUTODETECT;
+			(*id->handler->shutdown)(id - irq_desc);
+		}
+	}
+	spin_unlock_irq(&irq_controller_lock);
+	return PROBE_IRQ_COOKIE;		/* return meaningless return value  */
+}
+
+int
+probe_irq_off (unsigned long cookie)
+{
+	int irq_found, nr_irqs;
+	struct irq_desc *id;
+
+#ifdef IA64_DEBUG
+	printk("probe_irq_off(cookie=0x%lx) -> ", cookie);
+#endif
+
+	if (cookie != PROBE_IRQ_COOKIE)
+		printk("bad irq probe from %p\n", __builtin_return_address(0));
+
+	nr_irqs = 0;
+	irq_found = 0;
+	spin_lock_irq(&irq_controller_lock);
+	for (id = irq_desc + IA64_MIN_VECTORED_IRQ; id < irq_desc + NR_IRQS; ++id) {
+		unsigned int status = id->status;
+
+		if (!(status & IRQ_AUTODETECT))
+			continue;
+
+		if (!(status & IRQ_WAITING)) {
+			if (!nr_irqs)
+				irq_found = (id - irq_desc);
+			++nr_irqs;
+		}
+		id->status = status & ~IRQ_AUTODETECT;
+		(*id->handler->shutdown)(id - irq_desc);
+	}
+	spin_unlock_irq(&irq_controller_lock);
+
+	if (nr_irqs > 1)
+		irq_found = -irq_found;
+
+#ifdef IA64_DEBUG
+	printk("%d\n", irq_found);
+#endif
+	return irq_found;
+}
+
+#ifdef CONFIG_SMP
+
+void __init
+init_IRQ_SMP (void)
+{
+	if (request_irq(IPI_IRQ, handle_IPI, 0, "IPI", NULL))
+		panic("Could not allocate IPI Interrupt Handler!");
+}
+
+#endif
+
+void __init
+init_IRQ (void)
+{
+	int i;
+
+	for (i = 0; i < IA64_MIN_VECTORED_IRQ; ++i)
+		vector_to_legacy_map[irq_to_vector_map[i]] = i;
+
+	for (i = 0; i < NR_IRQS; ++i) {
+		irq_desc[i].handler = &irq_type_default;
+	}
+
+	irq_desc[TIMER_IRQ].handler = &irq_type_ia64_internal;
+#ifdef CONFIG_SMP
+	/* 
+	 * Configure the IPI vector and handler
+	 */
+	irq_desc[IPI_IRQ].handler = &irq_type_ia64_internal;
+	init_IRQ_SMP();
+#endif
+
+	ia64_set_pmv(1 << 16);
+	ia64_set_cmcv(CMC_IRQ);			/* XXX fix me */
+
+	platform_irq_init(irq_desc);
+
+	/* clear TPR to enable all interrupt classes: */
+	ia64_set_tpr(0);
+}
+
+/* TBD:
+ * 	Certain IA64 platforms can have inter-processor interrupt support.
+ * 	This interface is supposed to default to the IA64 IPI block-based
+ * 	mechanism if the platform doesn't provide a separate mechanism
+ *	for IPIs.
+ *	Choices : (1) Extend hw_interrupt_type interfaces 
+ *		  (2) Use machine vector mechanism
+ *	For now defining the following interface as a place holder.
+ */
+void
+ipi_send (int cpu, int vector, int delivery_mode)
+{
+}
diff --git a/arch/ia64/kernel/irq_default.c b/arch/ia64/kernel/irq_default.c
new file mode 100644
index 000000000..bf8c62642
--- /dev/null
+++ b/arch/ia64/kernel/irq_default.c
@@ -0,0 +1,30 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+
+static int
+irq_default_handle_irq (unsigned int irq, struct pt_regs *regs)
+{
+	printk("Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
+	return 0;		/* don't call do_bottom_half() for spurious interrupts */
+}
+
+static void
+irq_default_noop (unsigned int irq)
+{
+	/* nuthing to do... */
+}
+
+struct hw_interrupt_type irq_type_default = {
+	"default",
+	(void (*)(unsigned long)) irq_default_noop,	/* init */
+	irq_default_noop,				/* startup */
+	irq_default_noop,				/* shutdown */
+	irq_default_handle_irq,				/* handle */
+	irq_default_noop,				/* enable */
+	irq_default_noop				/* disable */
+};
diff --git a/arch/ia64/kernel/irq_internal.c b/arch/ia64/kernel/irq_internal.c
new file mode 100644
index 000000000..1ae904fe8
--- /dev/null
+++ b/arch/ia64/kernel/irq_internal.c
@@ -0,0 +1,71 @@
+/*
+ * Internal Interrupt Vectors 
+ *
+ * This takes care of interrupts that are generated by the CPU
+ * internally, such as the ITC and IPI interrupts.
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+/*
+ * This is identical to IOSAPIC handle_irq.  It may go away . . .
+ */
+static int
+internal_handle_irq (unsigned int irq, struct pt_regs *regs)
+{
+	struct irqaction *action = 0;
+	struct irq_desc *id = irq_desc + irq;
+	unsigned int status;
+	int retval;
+
+	spin_lock(&irq_controller_lock);
+	{
+		status = id->status;
+		if ((status & IRQ_ENABLED) != 0)
+			action = id->action;
+		id->status = status & ~(IRQ_REPLAY | IRQ_WAITING);
+	}
+	spin_unlock(&irq_controller_lock);
+
+	if (!action) {
+		if (!(id->status & IRQ_AUTODETECT))
+			printk("irq_hpsim_handle_irq: unexpected interrupt %u\n", irq);
+		return 0;
+	}
+
+	retval = invoke_irq_handlers(irq, regs, action);
+
+	spin_lock(&irq_controller_lock);
+	{
+		status = (id->status & ~IRQ_INPROGRESS);
+		id->status = status;
+	}
+	spin_unlock(&irq_controller_lock);
+
+	return retval;
+}
+
+static void
+internal_noop (unsigned int irq)
+{
+	/* nuthing to do... */
+}
+
+struct hw_interrupt_type irq_type_ia64_internal = {
+	"IA64 internal",
+	(void (*)(unsigned long)) internal_noop,	/* init */
+	internal_noop,					/* startup */
+	internal_noop,					/* shutdown */
+	internal_handle_irq,				/* handle */
+	internal_noop,					/* enable */
+	internal_noop					/* disable */
+};
+
diff --git a/arch/ia64/kernel/irq_lock.c b/arch/ia64/kernel/irq_lock.c
new file mode 100644
index 000000000..9c512dd4e
--- /dev/null
+++ b/arch/ia64/kernel/irq_lock.c
@@ -0,0 +1,287 @@
+/*
+ * SMP IRQ Lock support
+ *
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ * These function usually appear in irq.c, but I think it's cleaner this way.
+ * 
+ * Copyright (C) 1999 VA Linux Systems 
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */ 
+
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/bitops.h>
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+
+int global_irq_holder = NO_PROC_ID;
+spinlock_t global_irq_lock;
+atomic_t global_irq_count;
+atomic_t global_bh_count;
+atomic_t global_bh_lock;
+
+#define INIT_STUCK (1<<26)
+
+void
+irq_enter(int cpu, int irq)
+{
+        int stuck = INIT_STUCK;
+
+        hardirq_enter(cpu, irq);
+        barrier();
+        while (global_irq_lock.lock) {
+                if (cpu == global_irq_holder) {
+			break;
+                }
+
+		if (!--stuck) {
+			printk("irq_enter stuck (irq=%d, cpu=%d, global=%d)\n",
+			       irq, cpu,global_irq_holder);
+			stuck = INIT_STUCK;
+		}
+                barrier();
+        }
+}
+
+void
+irq_exit(int cpu, int irq)
+{
+        hardirq_exit(cpu, irq);
+        release_irqlock(cpu);
+}
+
+static void
+show(char * str)
+{
+        int i;
+        unsigned long *stack;
+        int cpu = smp_processor_id();
+
+        printk("\n%s, CPU %d:\n", str, cpu);
+        printk("irq:  %d [%d %d]\n",
+                atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]);
+        printk("bh:   %d [%d %d]\n",
+                atomic_read(&global_bh_count), local_bh_count[0], local_bh_count[1]);
+
+        stack = (unsigned long *) &stack;
+        for (i = 40; i ; i--) {
+                unsigned long x = *++stack;
+                if (x > (unsigned long) &get_options && x < (unsigned long) &vsprintf) {
+                        printk("<[%08lx]> ", x);
+                }
+        }
+}
+        
+#define MAXCOUNT 100000000
+
+static inline void 
+wait_on_bh(void)
+{
+        int count = MAXCOUNT;
+        do {
+                if (!--count) {
+                        show("wait_on_bh");
+                        count = ~0;
+                }
+                /* nothing .. wait for the other bh's to go away */
+        } while (atomic_read(&global_bh_count) != 0);
+}
+
+static inline void 
+wait_on_irq(int cpu)
+{
+        int count = MAXCOUNT;
+
+        for (;;) {
+
+                /*
+                 * Wait until all interrupts are gone. Wait
+                 * for bottom half handlers unless we're
+                 * already executing in one..
+                 */
+                if (!atomic_read(&global_irq_count)) {
+                        if (local_bh_count[cpu] || !atomic_read(&global_bh_count))
+                                break;
+                }
+
+                /* Duh, we have to loop. Release the lock to avoid deadlocks */
+		spin_unlock(&global_irq_lock);
+		mb();
+
+                for (;;) {
+                        if (!--count) {
+                                show("wait_on_irq");
+                                count = ~0;
+                        }
+                        __sti();
+                        udelay(cpu + 1);
+                        __cli();
+                        if (atomic_read(&global_irq_count))
+                                continue;
+                        if (global_irq_lock.lock)
+                                continue;
+                        if (!local_bh_count[cpu] && atomic_read(&global_bh_count))
+                                continue;
+                        if (spin_trylock(&global_irq_lock))
+                                break;
+                }
+        }
+}
+
+/*
+ * This is called when we want to synchronize with
+ * bottom half handlers. We need to wait until
+ * no other CPU is executing any bottom half handler.
+ *
+ * Don't wait if we're already running in an interrupt
+ * context or are inside a bh handler. 
+ */
+void 
+synchronize_bh(void)
+{
+        if (atomic_read(&global_bh_count)) {
+                int cpu = smp_processor_id();
+                if (!local_irq_count[cpu] && !local_bh_count[cpu]) {
+                        wait_on_bh();
+                }
+        }
+}
+
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void 
+synchronize_irq(void)
+{
+        int cpu = smp_processor_id();
+        int local_count;
+        int global_count;
+
+        mb();
+        do {
+                local_count = local_irq_count[cpu];
+                global_count = atomic_read(&global_irq_count);
+        } while (global_count != local_count);
+}
+
+static inline void 
+get_irqlock(int cpu)
+{
+        if (!spin_trylock(&global_irq_lock)) {
+                /* do we already hold the lock? */
+                if ((unsigned char) cpu == global_irq_holder)
+                        return;
+                /* Uhhuh.. Somebody else got it. Wait.. */
+		spin_lock(&global_irq_lock);
+        }
+        /* 
+         * We also to make sure that nobody else is running
+         * in an interrupt context. 
+         */
+        wait_on_irq(cpu);
+
+        /*
+         * Ok, finally..
+         */
+        global_irq_holder = cpu;
+}
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void 
+__global_cli(void)
+{
+	unsigned long flags;
+
+	__save_flags(flags);
+	if (flags & IA64_PSR_I) {
+		int cpu = smp_processor_id();
+		__cli();
+		if (!local_irq_count[cpu])
+			get_irqlock(cpu);
+	}
+}
+
+void 
+__global_sti(void)
+{
+        int cpu = smp_processor_id();
+
+        if (!local_irq_count[cpu])
+                release_irqlock(cpu);
+        __sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long 
+__global_save_flags(void)
+{
+        int retval;
+        int local_enabled;
+        unsigned long flags;
+
+        __save_flags(flags);
+        local_enabled = flags & IA64_PSR_I;
+        /* default to local */
+        retval = 2 + local_enabled;
+
+        /* check for global flags if we're not in an interrupt */
+        if (!local_irq_count[smp_processor_id()]) {
+                if (local_enabled)
+                        retval = 1;
+                if (global_irq_holder == (unsigned char) smp_processor_id())
+                        retval = 0;
+        }
+        return retval;
+}
+
+void 
+__global_restore_flags(unsigned long flags)
+{
+        switch (flags) {
+        case 0:
+                __global_cli();
+                break;
+        case 1:
+                __global_sti();
+                break;
+        case 2:
+                __cli();
+                break;
+        case 3:
+                __sti();
+                break;
+        default:
+                printk("global_restore_flags: %08lx (%08lx)\n",
+                        flags, (&flags)[-1]);
+        }
+}
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
new file mode 100644
index 000000000..4c3ac242a
--- /dev/null
+++ b/arch/ia64/kernel/ivt.S
@@ -0,0 +1,1342 @@
+/*
+ * arch/ia64/kernel/ivt.S
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1998-2000 David Mosberger <davidm@hpl.hp.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/break.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+/*
+ * A couple of convenience macros that make writing and reading
+ * SAVE_MIN and SAVE_REST easier.
+ */
+#define rARPR		r31
+#define rCRIFS		r30
+#define rCRIPSR		r29
+#define rCRIIP		r28
+#define rARRSC		r27
+#define rARPFS		r26
+#define rARUNAT		r25
+#define rARRNAT		r24
+#define rARBSPSTORE	r23
+#define rKRBS		r22
+#define rB6		r21
+#define rR1		r20
+
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *	psr.ic: off
+ *	psr.dt: off
+ *	r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *	psr.ic: off
+ *	psr.dt: off
+ *	r2 = points to &pt_regs.r16
+ *	r12 = kernel sp (kernel virtual address)
+ *	r13 = points to current task_struct (kernel virtual address)
+ *	p15 = TRUE if psr.i is set in cr.ipsr
+ *	predicate registers (other than p6, p7, and p15), b6, r3, r8, r9, r10, r11, r14, r15:
+ *		preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define DO_SAVE_MIN(COVER,EXTRA)								  \
+	mov rARRSC=ar.rsc;									  \
+	mov rARPFS=ar.pfs;									  \
+	mov rR1=r1;										  \
+	mov rARUNAT=ar.unat;									  \
+	mov rCRIPSR=cr.ipsr;									  \
+	mov rB6=b6;		/* rB6 = branch reg 6 */					  \
+	mov rCRIIP=cr.iip;									  \
+	mov r1=ar.k6;		/* r1 = current */						  \
+	;;											  \
+	invala;											  \
+	extr.u r16=rCRIPSR,32,2;		/* extract psr.cpl */				  \
+	;;											  \
+	cmp.eq pKern,p7=r0,r16;			/* are we in kernel mode already? (psr.cpl==0) */ \
+	/* switch from user to kernel RBS: */							  \
+	COVER;											  \
+	;;											  \
+(p7)	mov ar.rsc=r0;		/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */	  \
+(p7)	addl rKRBS=IA64_RBS_OFFSET,r1;		/* compute base of register backing store */	  \
+	;;											  \
+(p7)	mov rARRNAT=ar.rnat;									  \
+(pKern)	dep r1=0,sp,61,3;				/* compute physical addr of sp  */	  \
+(p7)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1;	/* compute base of memory stack */	  \
+(p7)	mov rARBSPSTORE=ar.bspstore;			/* save ar.bspstore */			  \
+(p7)	dep rKRBS=-1,rKRBS,61,3;			/* compute kernel virtual addr of RBS */  \
+	;;											  \
+(pKern)	addl r1=-IA64_PT_REGS_SIZE,r1;		/* if in kernel mode, use sp (r12) */		  \
+(p7)	mov ar.bspstore=rKRBS;			/* switch to kernel RBS */			  \
+	;;											  \
+(p7)	mov r18=ar.bsp;										  \
+(p7)	mov ar.rsc=0x3;		/* set eager mode, pl 0, little-endian, loadrs=0 */		  \
+												  \
+	mov r16=r1;		/* initialize first base pointer */				  \
+	adds r17=8,r1;		/* initialize second base pointer */				  \
+	;;											  \
+	st8 [r16]=rCRIPSR,16;	/* save cr.ipsr */						  \
+	st8 [r17]=rCRIIP,16;	/* save cr.iip */						  \
+(pKern)	mov r18=r0;		/* make sure r18 isn't NaT */					  \
+	;;											  \
+	st8 [r16]=rCRIFS,16;	/* save cr.ifs */						  \
+	st8 [r17]=rARUNAT,16;	/* save ar.unat */						  \
+(p7)	sub r18=r18,rKRBS;	/* r18=RSE.ndirty*8 */						  \
+	;;											  \
+	st8 [r16]=rARPFS,16;	/* save ar.pfs */						  \
+	st8 [r17]=rARRSC,16;	/* save ar.rsc */						  \
+	tbit.nz p15,p0=rCRIPSR,IA64_PSR_I_BIT							  \
+	;;			/* avoid RAW on r16 & r17 */					  \
+(pKern)	adds r16=16,r16;	/* skip over ar_rnat field */					  \
+(pKern)	adds r17=16,r17;	/* skip over ar_bspstore field */				  \
+(p7)	st8 [r16]=rARRNAT,16;	/* save ar.rnat */						  \
+(p7)	st8 [r17]=rARBSPSTORE,16;	/* save ar.bspstore */					  \
+	;;											  \
+	st8 [r16]=rARPR,16;	/* save predicates */						  \
+	st8 [r17]=rB6,16;	/* save b6 */							  \
+	shl r18=r18,16;		/* compute ar.rsc to be used for "loadrs" */			  \
+	;;											  \
+	st8 [r16]=r18,16;	/* save ar.rsc value for "loadrs" */				  \
+	st8.spill [r17]=rR1,16;	/* save original r1 */						  \
+	cmp.ne pEOI,p0=r0,r0	/* clear pEOI by default */					  \
+	;;											  \
+	st8.spill [r16]=r2,16;									  \
+	st8.spill [r17]=r3,16;									  \
+	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							  \
+	;;											  \
+	st8.spill [r16]=r12,16;									  \
+	st8.spill [r17]=r13,16;									  \
+	cmp.eq pNonSys,pSys=r0,r0	/* initialize pSys=0, pNonSys=1 */			  \
+	;;											  \
+	st8.spill [r16]=r14,16;									  \
+	st8.spill [r17]=r15,16;									  \
+	dep r14=-1,r0,61,3;									  \
+	;;											  \
+	st8.spill [r16]=r8,16;									  \
+	st8.spill [r17]=r9,16;									  \
+	adds r12=-16,r1;	/* switch to kernel memory stack (with 16 bytes of scratch) */	  \
+	;;											  \
+	st8.spill [r16]=r10,16;									  \
+	st8.spill [r17]=r11,16;									  \
+	mov r13=ar.k6;		/* establish `current' */					  \
+	;;											  \
+	or r2=r2,r14;		/* make first base a kernel virtual address */			  \
+	EXTRA;											  \
+	movl r1=__gp;		/* establish kernel global pointer */				  \
+	;;											  \
+	or r12=r12,r14;		/* make sp a kernel virtual address */				  \
+	or r13=r13,r14;		/* make `current' a kernel virtual address */			  \
+	bsw.1;;			/* switch back to bank 1 (must be last in insn group) */
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+# define STOPS	nop.i 0x0;; nop.i 0x0;; nop.i 0x0;;
+#else
+# define STOPS
+#endif
+
+#define SAVE_MIN_WITH_COVER	DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs,) STOPS
+#define SAVE_MIN_WITH_COVER_R19	DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs, mov r15=r19) STOPS
+#define SAVE_MIN		DO_SAVE_MIN(mov rCRIFS=r0,) STOPS
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).  This
+ * macro guarantees to preserve all predicate registers, r8, r9, r10,
+ * r11, r14, and r15.
+ *
+ * Assumed state upon entry:
+ *	psr.ic: on
+ *	psr.dt: on
+ *	r2:	points to &pt_regs.r16
+ *	r3:	points to &pt_regs.r17
+ */
+#define SAVE_REST				\
+	st8.spill [r2]=r16,16;			\
+	st8.spill [r3]=r17,16;			\
+	;;					\
+	st8.spill [r2]=r18,16;			\
+	st8.spill [r3]=r19,16;			\
+	;;					\
+	mov r16=ar.ccv;		/* M-unit */	\
+	movl r18=FPSR_DEFAULT	/* L-unit */	\
+	;;					\
+	mov r17=ar.fpsr;	/* M-unit */	\
+	mov ar.fpsr=r18;	/* M-unit */	\
+	;;					\
+	st8.spill [r2]=r20,16;			\
+	st8.spill [r3]=r21,16;			\
+	mov r18=b0;				\
+	;;					\
+	st8.spill [r2]=r22,16;			\
+	st8.spill [r3]=r23,16;			\
+	mov r19=b7;				\
+	;;					\
+	st8.spill [r2]=r24,16;			\
+	st8.spill [r3]=r25,16;			\
+	;;					\
+	st8.spill [r2]=r26,16;			\
+	st8.spill [r3]=r27,16;			\
+	;;					\
+	st8.spill [r2]=r28,16;			\
+	st8.spill [r3]=r29,16;			\
+	;;					\
+	st8.spill [r2]=r30,16;			\
+	st8.spill [r3]=r31,16;			\
+	;;					\
+	st8 [r2]=r16,16;	/* ar.ccv */	\
+	st8 [r3]=r17,16;	/* ar.fpsr */	\
+	;;					\
+	st8 [r2]=r18,16;	/* b0 */	\
+	st8 [r3]=r19,16+8;	/* b7 */	\
+	;;					\
+	stf.spill [r2]=f6,32;			\
+	stf.spill [r3]=f7,32;			\
+	;;					\
+	stf.spill [r2]=f8,32;			\
+	stf.spill [r3]=f9,32
+
+/*
+ * This file defines the interrupt vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * External interrupts only use 1 entry. All others are internal interrupts
+ *
+ * The first 20 entries of the table contain 64 bundles each while the 
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interrupts like TLB misses.
+ *
+ *  For each entry, the comment is as follows:
+ *
+ * 		// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ *  entry offset ----/     /         /                  /          /
+ *  entry number ---------/         /                  /          /
+ *  size of the entry -------------/                  /          /
+ *  vector name -------------------------------------/          /
+ *  related interrupts (what is the real interrupt?) ----------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.4 (June 1998)
+ */
+
+#define FAULT(n)									\
+	rsm psr.dt;			/* avoid nested faults due to TLB misses... */	\
+	;;										\
+	srlz.d;				/* ensure everyone knows psr.dt is off... */	\
+	mov r31=pr;									\
+	mov r19=n;;			/* prepare to save predicates */		\
+	br.cond.sptk.many dispatch_to_fault_handler
+
+/*
+ * As we don't (hopefully) use the space available, we need to fill it with
+ * nops. the parameter may be used for debugging and is representing the entry
+ * number
+ */ 
+#define BREAK_BUNDLE(a) 	break.m (a); \
+				break.i (a); \
+				break.i (a)
+/*
+ * 4 breaks bundles all together
+ */
+#define BREAK_BUNDLE4(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a)
+
+/*
+ * 8 bundles all together (too lazy to use only 4 at a time !)
+ */
+#define BREAK_BUNDLE8(a); BREAK_BUNDLE4(a); BREAK_BUNDLE4(a)
+
+	.psr abi64
+	.psr lsb
+	.lsb
+
+	.section __ivt_section,"ax"
+
+	.align 32768	// align on 32KB boundary
+	.global ia64_ivt
+ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+	/*
+	 * The VHPT vector is invoked when the TLB entry for the virtual page table
+	 * is missing.  This happens only as a result of a previous
+	 * (the "original") TLB miss, which may either be caused by an instruction
+	 * fetch or a data access (or non-access).
+	 *
+	 * What we do here is normal TLB miss handing for the _original_ miss, followed
+	 * by inserting the TLB entry for the virtual page table page that the VHPT
+	 * walker was attempting to access.  The latter gets inserted as long
+	 * as both L1 and L2 have valid mappings for the faulting address.
+	 * The TLB entry for the original miss gets inserted only if
+	 * the L3 entry indicates that the page is present.
+	 *
+	 * do_page_fault gets invoked in the following cases:
+	 *	- the faulting virtual address uses unimplemented address bits
+	 *	- the faulting virtual address has no L1, L2, or L3 mapping
+	 */
+	mov r16=cr.ifa				// get address that caused the TLB miss
+	;;
+	rsm psr.dt				// use physical addressing for data
+	mov r31=pr				// save the predicate registers
+	mov r19=ar.k7				// get page table base address
+	shl r21=r16,3				// shift bit 60 into sign bit
+	shr.u r17=r16,61			// get the region number into r17
+	;;
+	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
+	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of the faulting address
+	;;
+(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
+	srlz.d					// ensure "rsm psr.dt" has taken effect
+(p6)	movl r19=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
+(p6)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+	;;
+(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
+(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
+	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
+	;;
+(p6)	cmp.eq p7,p6=-1,r21			// unused address bits all ones?
+	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
+	;;
+(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
+	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
+	;;
+(p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
+	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
+	;;
+(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
+	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
+	;;
+(p7)	ld8 r18=[r17]				// read the L3 PTE
+	mov r19=cr.isr				// cr.isr bit 0 tells us if this is an insn miss
+	;;
+(p7)	tbit.z p6,p7=r18,0			// page present bit cleared?
+	mov r21=cr.iha				// get the VHPT address that caused the TLB miss
+	;;					// avoid RAW on p7
+(p7)	tbit.nz.unc p10,p11=r19,32		// is it an instruction TLB miss?
+	dep r17=0,r17,0,PAGE_SHIFT		// clear low bits to get page address
+	;;
+(p10)	itc.i r18;;	// insert the instruction TLB entry (EAS2.6: must be last in insn group!)
+(p11)	itc.d r18;;	// insert the data TLB entry (EAS2.6: must be last in insn group!)
+(p6)	br.spnt.few page_fault			// handle bad address/page not present (page fault)
+	mov cr.ifa=r21
+
+	// Now compute and insert the TLB entry for the virtual page table.
+	// We never execute in a page table page so there is no need to set
+	// the exception deferral bit.
+	adds r16=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r17
+	;;
+(p7)	itc.d r16;;	// EAS2.6: must be last in insn group!
+	mov pr=r31,-1				// restore predicate registers
+	rfi;;					// must be last insn in an insn group
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+	/*
+	 * The ITLB basically does the same as the VHPT handler except
+	 * that we always insert exactly one instruction TLB entry.
+	 */
+	mov r16=cr.ifa				// get address that caused the TLB miss
+	;;
+	rsm psr.dt				// use physical addressing for data
+	mov r31=pr				// save the predicate registers
+	mov r19=ar.k7				// get page table base address
+	shl r21=r16,3				// shift bit 60 into sign bit
+	shr.u r17=r16,61			// get the region number into r17
+	;;
+	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
+	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of the faulting address
+	;;
+(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
+	srlz.d					// ensure "rsm psr.dt" has taken effect
+(p6)	movl r19=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
+(p6)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+	;;
+(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
+(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
+	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
+	;;
+(p6)	cmp.eq p7,p6=-1,r21			// unused address bits all ones?
+	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
+	;;
+(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
+	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
+	;;
+(p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
+	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
+	;;
+(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
+	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
+	;;
+(p7)	ld8 r18=[r17]				// read the L3 PTE
+	;;
+(p7)	tbit.z p6,p7=r18,0			// page present bit cleared?
+	;;
+(p7)	itc.i r18;;	// insert the instruction TLB entry (EAS2.6: must be last in insn group!)
+(p6)	br.spnt.few page_fault			// handle bad address/page not present (page fault)
+	;;
+	mov pr=r31,-1				// restore predicate registers
+	rfi;;					// must be last insn in an insn group
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+	/*
+	 * The DTLB basically does the same as the VHPT handler except
+	 * that we always insert exactly one data TLB entry.
+	 */
+	mov r16=cr.ifa				// get address that caused the TLB miss
+	;;
+	rsm psr.dt				// use physical addressing for data
+	mov r31=pr				// save the predicate registers
+	mov r19=ar.k7				// get page table base address
+	shl r21=r16,3				// shift bit 60 into sign bit
+	shr.u r17=r16,61			// get the region number into r17
+	;;
+	cmp.eq p6,p7=5,r17			// is IFA pointing into to region 5?
+	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of the faulting address
+	;;
+(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
+	srlz.d					// ensure "rsm psr.dt" has taken effect
+(p6)	movl r19=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
+(p6)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+	;;
+(p6)	dep r17=r18,r19,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
+(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+	cmp.eq p7,p6=0,r21			// unused address bits all zeroes?
+	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
+	;;
+(p6)	cmp.eq p7,p6=-1,r21			// unused address bits all ones?
+	ld8 r17=[r17]				// fetch the L1 entry (may be 0)
+	;;
+(p7)	cmp.eq p6,p7=r17,r0			// was L1 entry NULL?
+	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
+	;;
+(p7)	ld8 r17=[r17]				// fetch the L2 entry (may be 0)
+	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
+	;;
+(p7)	cmp.eq.or.andcm p6,p7=r17,r0		// was L2 entry NULL?
+	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
+	;;
+(p7)	ld8 r18=[r17]				// read the L3 PTE
+	;;
+(p7)	tbit.z p6,p7=r18,0			// page present bit cleared?
+	;;
+(p7)	itc.d r18;;	// insert the instruction TLB entry (EAS2.6: must be last in insn group!)
+(p6)	br.spnt.few page_fault			// handle bad address/page not present (page fault)
+	;;
+	mov pr=r31,-1				// restore predicate registers
+	rfi;;					// must be last insn in an insn group
+
+	//-----------------------------------------------------------------------------------
+	// call do_page_fault (predicates are in r31, psr.dt is off, r16 is faulting address)
+page_fault:
+	SAVE_MIN_WITH_COVER
+	//
+	// Copy control registers to temporary registers, then turn on psr bits,
+	// then copy the temporary regs to the output regs.  We have to do this
+	// because the "alloc" can cause a mandatory store which could lead to
+	// an "Alt DTLB" fault which we can handle only if psr.ic is on.
+	//
+	mov r8=cr.ifa
+	mov r9=cr.isr
+	adds r3=8,r2				// set up second base pointer
+	;;
+	ssm psr.ic | psr.dt
+	;;
+	srlz.d					// guarantee that interrupt collection is enabled
+(p15)	ssm psr.i				// restore psr.i
+	;;
+	srlz.i					// must precede "alloc"! (srlz.i implies srlz.d)
+	movl r14=ia64_leave_kernel
+	;;
+	alloc r15=ar.pfs,0,0,3,0		// must be first in insn group
+	mov out0=r8
+	mov out1=r9
+	;;
+	SAVE_REST
+	mov rp=r14
+	;;
+	adds out2=16,r12			// out2 = pointer to pt_regs
+	br.call.sptk.few b6=ia64_do_page_fault	// ignore return address
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+	mov r16=cr.ifa		// get address that caused the TLB miss
+	movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RX
+	;;
+	shr.u r18=r16,57	// move address bit 61 to bit 4
+	dep r16=0,r16,52,12	// clear top 12 bits of address
+	;;
+	andcm r18=0x10,r18	// bit 4=~address-bit(61)
+	dep r16=r17,r16,0,12	// insert PTE control bits into r16
+	;;
+	or r16=r16,r18		// set bit 4 (uncached) if the access was to region 6
+	;;
+	itc.i r16;;	// insert the TLB entry(EAS2.6: must be last in insn group!)
+	rfi;;			// must be last insn in an insn group
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+	mov r16=cr.ifa		// get address that caused the TLB miss
+	movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW
+	;;
+	shr.u r18=r16,57	// move address bit 61 to bit 4
+	dep r16=0,r16,52,12	// clear top 12 bits of address
+	;;
+	andcm r18=0x10,r18	// bit 4=~address-bit(61)
+	dep r16=r17,r16,0,12	// insert PTE control bits into r16
+	;;
+	or r16=r16,r18		// set bit 4 (uncached) if the access was to region 6
+	;;
+	itc.d r16;;	// insert the TLB entry (EAS2.6: must be last in insn group!)
+	rfi;;			// must be last insn in an insn group
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+	//
+	// In the absence of kernel bugs, we get here when the Dirty-bit, Instruction
+	// Access-bit, or Data Access-bit faults cause a nested fault because the
+	// dTLB entry for the virtual page table isn't present.  In such a case,
+	// we lookup the pte for the faulting address by walking the page table
+	// and return to the contination point passed in register r30.
+	// In accessing the page tables, we don't need to check for NULL entries
+	// because if the page tables didn't map the faulting address, it would not
+	// be possible to receive one of the above faults.
+	//
+	// Input:	r16:	faulting address
+	//		r29:	saved b0
+	//		r30:	continuation address
+	//
+	// Output:	r17:	physical address of L3 PTE of faulting address
+	//		r29:	saved b0
+	//		r30:	continuation address
+	//
+	// Clobbered:	b0, r18, r19, r21, r31, psr.dt (cleared)
+	//
+	rsm psr.dt				// switch to using physical data addressing
+	mov r19=ar.k7				// get the page table base address
+	shl r21=r16,3				// shift bit 60 into sign bit
+	;;
+	mov r31=pr				// save the predicate registers
+	shr.u r17=r16,61			// get the region number into r17
+	;;
+	cmp.eq p6,p7=5,r17			// is faulting address in region 5?
+	shr.u r18=r16,PGDIR_SHIFT		// get bits 33-63 of faulting address
+	;;
+(p7)	dep r17=r17,r19,(PAGE_SHIFT-3),3	// put region number bits in place
+	srlz.d
+(p6)	movl r17=__pa(SWAPPER_PGD_ADDR)		// region 5 is rooted at swapper_pg_dir
+(p6)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7)	shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+	;;
+(p6)	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// r17=PTA + IFA(33,42)*8
+(p7)	dep r17=r18,r17,3,(PAGE_SHIFT-6)	// r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+	shr.u r18=r16,PMD_SHIFT			// shift L2 index into position
+	;;
+	ld8 r17=[r17]				// fetch the L1 entry
+	mov b0=r30
+	;;
+	dep r17=r18,r17,3,(PAGE_SHIFT-3)	// compute address of L2 page table entry
+	;;
+	ld8 r17=[r17]				// fetch the L2 entry
+	shr.u r19=r16,PAGE_SHIFT		// shift L3 index into position
+	;;
+	dep r17=r19,r17,3,(PAGE_SHIFT-3)	// compute address of L3 page table entry
+	;;
+	mov pr=r31,-1				// restore predicates
+	br.cond.sptk.few b0			// return to continuation point
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+	FAULT(6)
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+	FAULT(7)
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+	//
+	// What we do here is to simply turn on the dirty bit in the PTE.  We need
+	// to update both the page-table and the TLB entry.  To efficiently access
+	// the PTE, we address it through the virtual page table.  Most likely, the
+	// TLB entry for the relevant virtual page table page is still present in
+	// the TLB so we can normally do this without additional TLB misses.
+	// In case the necessary virtual page table TLB entry isn't present, we take
+	// a nested TLB miss hit where we look up the physical address of the L3 PTE
+	// and then continue at label 1 below.
+	//
+	mov r16=cr.ifa				// get the address that caused the fault
+	movl r30=1f				// load continuation point in case of nested fault
+	;;
+	thash r17=r16				// compute virtual address of L3 PTE
+	mov r29=b0				// save b0 in case of nested fault
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	or r18=_PAGE_D,r18			// set the dirty bit
+	mov b0=r29				// restore b0
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	itc.d r18;;		// install updated PTE (EAS2.6: must be last in insn group!)
+	rfi;;					// must be last insn in an insn group
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+	// Like Entry 8, except for instruction access
+	mov r16=cr.ifa				// get the address that caused the fault
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+	mov r31=pr				// save predicates
+	mov r30=cr.ipsr
+	;;
+	extr.u r17=r30,IA64_PSR_IS_BIT,1	// get instruction arch. indicator
+	;;
+	cmp.eq p6,p0 = r17,r0			// check if IA64 instruction set
+	;;
+(p6)	mov r16=cr.iip				// get real faulting address
+	;;
+(p6)	mov cr.ifa=r16				// reset IFA
+	mov pr=r31,-1
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+	movl r30=1f				// load continuation point in case of nested fault
+	;;
+	thash r17=r16				// compute virtual address of L3 PTE
+	mov r29=b0				// save b0 in case of nested fault)
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid raw on r18
+	or r18=_PAGE_A,r18			// set the accessed bit
+	mov b0=r29				// restore b0
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	itc.i r18;;		// install updated PTE (EAS2.6: must be last in insn group!)
+	rfi;;					// must be last insn in an insn group
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+	// Like Entry 8, except for data access
+	mov r16=cr.ifa				// get the address that caused the fault
+	movl r30=1f				// load continuation point in case of nested fault
+	;;
+	thash r17=r16				// compute virtual address of L3 PTE
+	mov r29=b0				// save b0 in case of nested fault)
+	;;
+1:	ld8 r18=[r17]
+	;;					// avoid RAW on r18
+	or r18=_PAGE_A,r18			// set the accessed bit
+	mov b0=r29				// restore b0
+	;;
+	st8 [r17]=r18				// store back updated PTE
+	itc.d r18;;		// install updated PTE (EAS2.6: must be last in insn group!)
+	rfi;;					// must be last insn in an insn group
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+	mov r16=cr.iim
+	mov r17=__IA64_BREAK_SYSCALL
+	mov r31=pr		// prepare to save predicates
+	rsm psr.dt		// avoid nested faults due to TLB misses...
+	;;
+	srlz.d			// ensure everyone knows psr.dt is off...
+	cmp.eq p0,p7=r16,r17	// is this a system call? (p7 <- false, if so)
+
+#if 1
+	// Allow syscalls via the old system call number for the time being.  This is
+	// so we can transition to the new syscall number in a relatively smooth
+	// fashion.
+	mov r17=0x80000
+	;;
+(p7)	cmp.eq.or.andcm p0,p7=r16,r17		// is this the old syscall number?
+#endif
+
+(p7)	br.cond.spnt.many non_syscall
+
+	SAVE_MIN				// uses r31; defines r2:
+
+	// turn interrupt collection and data translation back on:
+	ssm psr.ic | psr.dt
+	srlz.d					// guarantee that interrupt collection is enabled
+	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
+	;;
+(p15)	ssm psr.i		// restore psr.i
+	;;
+	srlz.i			// ensure everybody knows psr.ic and psr.dt are back on
+	adds r8=(IA64_PT_REGS_R8_OFFSET-IA64_PT_REGS_R16_OFFSET),r2
+	;;
+	stf8 [r8]=f1		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	;;
+	SAVE_REST
+	;;			// avoid WAW on r2 & r3
+
+	mov r3=255
+	adds r15=-1024,r15			// r15 contains the syscall number---subtract 1024
+	adds r2=IA64_TASK_FLAGS_OFFSET,r13	// r2 = &current->flags
+
+	;;
+	cmp.geu.unc p6,p7=r3,r15		// (syscall > 0 && syscall <= 1024+255) ?
+	movl r16=sys_call_table
+	;;
+(p6)	shladd r16=r15,3,r16
+	movl r15=ia64_ret_from_syscall
+(p7)	adds r16=(__NR_ni_syscall-1024)*8,r16	// force __NR_ni_syscall
+	;;
+	ld8 r16=[r16]				// load address of syscall entry point
+	mov rp=r15				// set the real return addr
+	;;
+	ld8 r2=[r2]				// r2 = current->flags
+	mov b6=r16
+
+	// arrange things so we skip over break instruction when returning:
+
+	adds r16=16,sp				// get pointer to cr_ipsr
+	adds r17=24,sp				// get pointer to cr_iip
+	;;
+	ld8 r18=[r16]				// fetch cr_ipsr
+	tbit.z p8,p0=r2,5			// (current->flags & PF_TRACESYS) == 0?
+	;;
+	ld8 r19=[r17]				// fetch cr_iip
+	extr.u r20=r18,41,2			// extract ei field
+	;;
+	cmp.eq p6,p7=2,r20			// isr.ei==2?
+	adds r19=16,r19				// compute address of next bundle
+	;;
+(p6)	mov r20=0				// clear ei to 0
+(p7)	adds r20=1,r20				// increment ei to next slot
+	;;
+(p6)	st8 [r17]=r19				// store new cr.iip if cr.isr.ei wrapped around
+	dep r18=r20,r18,41,2			// insert new ei into cr.isr
+	;;
+	st8 [r16]=r18				// store new value for cr.isr
+
+(p8)	br.call.sptk.few b6=b6			// ignore this return addr 
+	br.call.sptk.few rp=ia64_trace_syscall	// rp will be overwritten (ignored)
+	// NOT REACHED
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+	rsm psr.dt		// avoid nested faults due to TLB misses...
+	;;
+	srlz.d			// ensure everyone knows psr.dt is off...
+	mov r31=pr		// prepare to save predicates
+	;;
+
+	SAVE_MIN_WITH_COVER	// uses r31; defines r2 and r3
+	ssm psr.ic | psr.dt	// turn interrupt collection and data translation back on
+	;;
+	adds r3=8,r2		// set up second base pointer for SAVE_REST
+	cmp.eq pEOI,p0=r0,r0	// set pEOI flag so that ia64_leave_kernel writes cr.eoi
+	srlz.i			// ensure everybody knows psr.ic and psr.dt are back on
+	;;
+	SAVE_REST
+	;;
+	alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+#ifdef	CONFIG_ITANIUM_ASTEP_SPECIFIC
+	mov out0=r0		// defer reading of cr.ivr to handle_irq...
+#else
+	mov out0=cr.ivr		// pass cr.ivr as first arg
+#endif
+	add out1=16,sp		// pass pointer to pt_regs as second arg
+	;;
+	srlz.d			// make  sure we see the effect of cr.ivr
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+	br.call.sptk.few b6=ia64_handle_irq
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+	FAULT(13)
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+	FAULT(14)
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+	FAULT(15)
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+	FAULT(16)
+
+#ifdef CONFIG_IA32_SUPPORT
+
+	// There is no particular reason for this code to be here, other than that
+	// there happens to be space here that would go unused otherwise.  If this
+	// fault ever gets "unreserved", simply moved the following code to a more
+	// suitable spot...
+
+	// IA32 interrupt entry point
+
+dispatch_to_ia32_handler:
+	SAVE_MIN
+	;;
+	mov r14=cr.isr
+	ssm psr.ic | psr.dt
+	srlz.d					// guarantee that interrupt collection is enabled
+	;;
+(p15)	ssm psr.i
+	;;
+	srlz.d
+	adds r3=8,r2            // Base pointer for SAVE_REST
+	;;
+	SAVE_REST
+	;;
+	mov r15=0x80
+	shr r14=r14,16          // Get interrupt number
+	;; 
+	cmp.ne p6,p0=r14,r15
+(p6)    br.call.dpnt.few b6=non_ia32_syscall
+
+	adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp	// 16 byte hole per SW conventions
+
+	;;
+	alloc r15=ar.pfs,0,0,6,0	// must first in an insn group
+	;; 
+	ld4 r8=[r14],8          // r8 == EAX (syscall number)
+	mov r15=0xff
+	;;
+	cmp.ltu.unc p6,p7=r8,r15
+	ld4 out1=[r14],8        // r9 == ecx
+	;;
+	ld4 out2=[r14],8         // r10 == edx
+	;;
+	ld4 out0=[r14]           // r11 == ebx
+	adds r14=(IA64_PT_REGS_R8_OFFSET-(8*3)) + 16,sp
+	;;
+	ld4 out5=[r14],8         // r13 == ebp
+	;;
+	ld4 out3=[r14],8         // r14 == esi
+	adds r2=IA64_TASK_FLAGS_OFFSET,r13	// r2 = &current->flags
+	;;
+	ld4 out4=[r14]           // R15 == edi
+	movl r16=ia32_syscall_table
+	;; 
+(p6)    shladd r16=r8,3,r16     // Force ni_syscall if not valid syscall number
+	ld8 r2=[r2]		// r2 = current->flags
+	;; 
+	ld8 r16=[r16]
+	tbit.z p8,p0=r2,5	// (current->flags & PF_TRACESYS) == 0?
+	;;
+	movl r15=ia32_ret_from_syscall
+	mov b6=r16
+	;;
+	mov rp=r15
+(p8)	br.call.sptk.few b6=b6
+	br.call.sptk.few rp=ia32_trace_syscall	// rp will be overwritten (ignored)
+
+non_ia32_syscall:       
+	alloc r15=ar.pfs,0,0,2,0
+	mov out0=r14                            // interrupt #
+	add out1=16,sp                          // pointer to pt_regs
+	;;			// avoid WAW on CFM
+	br.call.sptk.few rp=ia32_bad_interrupt
+	;; 
+	movl r15=ia64_leave_kernel
+	;;
+	mov rp=r15
+	br.ret.sptk.many rp
+
+#endif /* CONFIG_IA32_SUPPORT */
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+	FAULT(17)
+
+non_syscall:
+
+#ifdef CONFIG_KDB
+	mov r17=__IA64_BREAK_KDB
+	;;
+	cmp.eq p8,p0=r16,r17		// is this a kernel breakpoint?
+#endif
+
+	SAVE_MIN_WITH_COVER
+
+	// There is no particular reason for this code to be here, other than that
+	// there happens to be space here that would go unused otherwise.  If this
+	// fault ever gets "unreserved", simply moved the following code to a more
+	// suitable spot...
+
+	mov r8=cr.iim			// get break immediate (must be done while psr.ic is off)
+	adds r3=8,r2			// set up second base pointer for SAVE_REST
+
+	// turn interrupt collection and data translation back on:
+	ssm psr.ic | psr.dt
+	srlz.d					// guarantee that interrupt collection is enabled
+	;;
+(p15)	ssm psr.i			// restore psr.i
+	;;
+	srlz.i				// ensure everybody knows psr.ic and psr.dt are back on
+	movl r15=ia64_leave_kernel
+	;;
+	alloc r14=ar.pfs,0,0,2,0
+	mov out0=r8			// break number
+	add out1=16,sp			// pointer to pt_regs
+	;;
+	SAVE_REST
+	mov rp=r15
+	;;
+#ifdef CONFIG_KDB
+(p8)	br.call.sptk.few b6=ia64_invoke_kdb
+#endif
+	br.call.sptk.few b6=ia64_bad_break	// avoid WAW on CFM and ignore return addr
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+	FAULT(18)
+
+	// There is no particular reason for this code to be here, other than that
+	// there happens to be space here that would go unused otherwise.  If this
+	// fault ever gets "unreserved", simply moved the following code to a more
+	// suitable spot...
+
+dispatch_unaligned_handler:
+	SAVE_MIN_WITH_COVER
+	;;
+	//
+	// we can't have the alloc while psr.ic is cleared because 
+	// we might get a mandatory RSE (when you reach the end of the 
+	// rotating partition when doing the alloc) spill which could cause 
+	// a page fault on the kernel virtual address and the handler 
+	// wouldn't get the state to recover.
+	//
+	mov r15=cr.ifa
+	ssm psr.ic | psr.dt
+	srlz.d					// guarantee that interrupt collection is enabled
+	;;
+(p15)	ssm psr.i				// restore psr.i
+	;;
+	srlz.i
+	adds r3=8,r2				// set up second base pointer
+	;;
+	SAVE_REST
+	;;
+	alloc r14=ar.pfs,0,0,2,0		// now it's safe (must be first in insn group!)
+	;;					// avoid WAW on r14
+	movl r14=ia64_leave_kernel
+	mov out0=r15				// out0 = faulting address
+	adds out1=16,sp				// out1 = pointer to pt_regs
+	;;
+	mov rp=r14
+	br.sptk.few ia64_prepare_handle_unaligned
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+	FAULT(19)
+
+	// There is no particular reason for this code to be here, other than that
+	// there happens to be space here that would go unused otherwise.  If this
+	// fault ever gets "unreserved", simply moved the following code to a more
+	// suitable spot...
+
+dispatch_to_fault_handler:
+	//
+	// Input:
+	//	psr.ic:	off
+	//	psr.dt:	off
+	//	r19:	fault vector number (e.g., 24 for General Exception)
+	//	r31:	contains saved predicates (pr)
+	//
+	SAVE_MIN_WITH_COVER_R19
+	//
+	// Copy control registers to temporary registers, then turn on psr bits,
+	// then copy the temporary regs to the output regs.  We have to do this
+	// because the "alloc" can cause a mandatory store which could lead to
+	// an "Alt DTLB" fault which we can handle only if psr.ic is on.
+	//
+	mov r8=cr.isr
+	mov r9=cr.ifa
+	mov r10=cr.iim
+	mov r11=cr.itir
+	;;
+	ssm psr.ic | psr.dt
+	srlz.d					// guarantee that interrupt collection is enabled
+	;;
+(p15)	ssm psr.i				// restore psr.i
+	adds r3=8,r2				// set up second base pointer for SAVE_REST
+	;;
+	srlz.i					// must precede "alloc"!
+	;;
+	alloc r14=ar.pfs,0,0,5,0		// must be first in insn group
+	mov out0=r15
+	mov out1=r8
+	mov out2=r9
+	mov out3=r10
+	mov out4=r11
+	;;
+	SAVE_REST
+	movl r14=ia64_leave_kernel
+	;;
+	mov rp=r14
+#ifdef CONFIG_KDB
+	br.call.sptk.few b6=ia64_invoke_kdb_fault_handler
+#else
+	br.call.sptk.few b6=ia64_fault
+#endif
+//
+// --- End of long entries, Beginning of short entries
+//
+
+	.align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+	mov r16=cr.ifa
+	rsm psr.dt
+#if 0
+	// If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h
+	mov r17=_PAGE_SIZE_4K<<2
+	;;
+	ptc.l r16,r17
+#endif
+	;;
+	mov r31=pr
+	srlz.d
+	br.cond.sptk.many page_fault
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+	mov r16=cr.ifa
+	rsm psr.dt
+	mov r31=pr
+	;;
+	srlz.d
+	br.cond.sptk.many page_fault
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+	mov r16=cr.ifa
+	rsm psr.dt
+	mov r31=pr
+	;;
+	srlz.d
+	br.cond.sptk.many page_fault
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+	mov r16=cr.ifa
+	rsm psr.dt
+	mov r31=pr
+	;;
+	srlz.d
+	br.cond.sptk.many page_fault
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+	FAULT(24)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+	rsm psr.dt | psr.dfh			// ensure we can access fph
+	;;
+	srlz.d
+	mov r31=pr
+	mov r19=25
+	br.cond.sptk.many dispatch_to_fault_handler
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+	FAULT(26)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+	//
+	// A [f]chk.[as] instruction needs to take the branch to
+	// the recovery code but this part of the architecture is
+	// not implemented in hardware on some CPUs, such as Itanium.
+	// Thus, in general we need to emulate the behavior.
+	// IIM contains the relative target (not yet sign extended).
+	// So after sign extending it we simply add it to IIP.
+	// We also need to reset the EI field of the IPSR to zero,
+	// i.e., the slot to restart into.
+	//
+	// cr.imm contains zero_ext(imm21)
+	//
+	mov r18=cr.iim
+	;;
+	mov r17=cr.iip
+	shl r18=r18,43			// put sign bit in position (43=64-21)
+	;;
+
+	mov r16=cr.ipsr
+	shr r18=r18,39			// sign extend (39=43-4)
+	;;
+
+	add r17=r17,r18			// now add the offset
+	;;
+	mov cr.iip=r17
+	dep r16=0,r16,41,2		// clear EI
+	;;
+
+	mov cr.ipsr=r16
+	;;
+
+	rfi;;				// and go back (must be last insn in group)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+	FAULT(28)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+	FAULT(29)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+	rsm psr.dt		// avoid nested faults due to TLB misses...
+	mov r16=cr.ipsr
+	mov r31=pr		// prepare to save predicates
+	;;									
+	srlz.d			// ensure everyone knows psr.dt is off
+	mov r19=30		// error vector for fault_handler (when kernel)
+	extr.u r16=r16,32,2	// extract psr.cpl
+	;;
+	cmp.eq p6,p7=r0,r16	// if kernel cpl then fault else emulate
+(p7)	br.cond.sptk.many dispatch_unaligned_handler
+(p6)	br.cond.sptk.many dispatch_to_fault_handler
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+	FAULT(31)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+	FAULT(32)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+	FAULT(33)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Tranfer Trap (66)
+	FAULT(34)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+	FAULT(35)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+	FAULT(36)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Reserved
+	FAULT(37)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+	FAULT(38)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+	FAULT(39)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+	FAULT(40)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+	FAULT(41)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+	FAULT(42)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+	FAULT(43)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+	FAULT(44)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+	FAULT(45)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
+	FAULT(46)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt  (74)
+#ifdef CONFIG_IA32_SUPPORT
+	rsm psr.dt
+	;;
+	srlz.d
+	mov r31=pr
+	br.cond.sptk.many dispatch_to_ia32_handler
+#else
+	FAULT(47)
+#endif
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+	FAULT(48)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+	FAULT(49)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+	FAULT(50)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+	FAULT(51)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+	FAULT(52)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+	FAULT(53)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+	FAULT(54)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+	FAULT(55)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+	FAULT(56)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+	FAULT(57)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+	FAULT(58)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+	FAULT(59)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+	FAULT(60)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+	FAULT(61)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+	FAULT(62)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+	FAULT(63)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+	FAULT(64)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+	FAULT(65)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+	FAULT(66)
+
+	.align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+	FAULT(67)
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
new file mode 100644
index 000000000..153fb5684
--- /dev/null
+++ b/arch/ia64/kernel/machvec.c
@@ -0,0 +1,48 @@
+#include <linux/kernel.h>
+
+#include <asm/page.h>
+#include <asm/machvec.h>
+
+struct ia64_machine_vector ia64_mv;
+
+void
+machvec_noop (void)
+{
+}
+
+/*
+ * Most platforms use this routine for mapping page frame addresses
+ * into a memory map index.
+ */
+unsigned long
+map_nr_dense (unsigned long addr)
+{
+	return MAP_NR_DENSE(addr);
+}
+
+static struct ia64_machine_vector *
+lookup_machvec (const char *name)
+{
+	extern struct ia64_machine_vector machvec_start[];
+	extern struct ia64_machine_vector machvec_end[];
+	struct ia64_machine_vector *mv;
+
+	for (mv = machvec_start; mv < machvec_end; ++mv)
+		if (strcmp (mv->name, name) == 0)
+			return mv;
+
+	return 0;
+}
+
+void
+machvec_init (const char *name)
+{
+	struct ia64_machine_vector *mv;
+
+	mv = lookup_machvec(name);
+	if (!mv) {
+		panic("generic kernel failed to find machine vector for platform %s!", name);
+	}
+	ia64_mv = *mv;
+	printk("booting generic kernel on platform %s\n", name);
+}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
new file mode 100644
index 000000000..320c56ebc
--- /dev/null
+++ b/arch/ia64/kernel/mca.c
@@ -0,0 +1,842 @@
+/*
+ * File: 	mca.c
+ * Purpose: 	Generic MCA handling layer
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander(vijay@engr.sgi.com)
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+#include <asm/spinlock.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+
+
+ia64_mc_info_t			ia64_mc_info;
+ia64_mca_sal_to_os_state_t	ia64_sal_to_os_handoff_state;
+ia64_mca_os_to_sal_state_t	ia64_os_to_sal_handoff_state;
+u64				ia64_mca_proc_state_dump[256];
+u64				ia64_mca_stack[1024];
+u64				ia64_mca_stackframe[32];
+u64				ia64_mca_bspstore[1024];
+
+static void			ia64_mca_cmc_vector_setup(int 		enable, 
+							  int_vector_t 	cmc_vector);
+static void			ia64_mca_wakeup_ipi_wait(void);
+static void			ia64_mca_wakeup(int cpu);
+static void			ia64_mca_wakeup_all(void);
+static void			ia64_log_init(int,int);
+static void			ia64_log_get(int,int, prfunc_t);
+static void			ia64_log_clear(int,int,int, prfunc_t);
+
+/*
+ * ia64_mca_cmc_vector_setup
+ *	Setup the correctable machine check vector register in the processor
+ * Inputs
+ *	Enable (1 - enable cmc interrupt , 0 - disable)
+ *	CMC handler entry point (if enabled)
+ *
+ * Outputs
+ *	None
+ */
+static void
+ia64_mca_cmc_vector_setup(int 		enable, 
+			  int_vector_t 	cmc_vector)
+{
+	cmcv_reg_t	cmcv;
+
+	cmcv.cmcv_regval 	= 0;
+	cmcv.cmcv_mask  	= enable;
+	cmcv.cmcv_vector 	= cmc_vector;
+	ia64_set_cmcv(cmcv.cmcv_regval);
+}
+
+
+#if defined(MCA_TEST)
+
+sal_log_processor_info_t	slpi_buf;
+
+void
+mca_test(void)
+{
+	slpi_buf.slpi_valid.slpi_psi = 1;
+	slpi_buf.slpi_valid.slpi_cache_check = 1;
+	slpi_buf.slpi_valid.slpi_tlb_check = 1;
+	slpi_buf.slpi_valid.slpi_bus_check = 1;
+	slpi_buf.slpi_valid.slpi_minstate = 1;
+	slpi_buf.slpi_valid.slpi_bank1_gr = 1;
+	slpi_buf.slpi_valid.slpi_br = 1;
+	slpi_buf.slpi_valid.slpi_cr = 1;
+	slpi_buf.slpi_valid.slpi_ar = 1;
+	slpi_buf.slpi_valid.slpi_rr = 1;
+	slpi_buf.slpi_valid.slpi_fr = 1;
+
+	ia64_os_mca_dispatch();
+}
+
+#endif /* #if defined(MCA_TEST) */
+
+/*
+ * mca_init
+ *	Do all the mca specific initialization on a per-processor basis.
+ *
+ *	1. Register spinloop and wakeup request interrupt vectors
+ *
+ *	2. Register OS_MCA handler entry point
+ *
+ *	3. Register OS_INIT handler entry point
+ *
+ *	4. Initialize CMCV register to enable/disable CMC interrupt on the
+ *	   processor and hook a handler in the platform-specific mca_init.
+ *
+ *	5. Initialize MCA/CMC/INIT related log buffers maintained by the OS.
+ *
+ * Inputs
+ *	None
+ * Outputs
+ *	None
+ */
+void __init
+mca_init(void)
+{
+	int	i;
+
+	MCA_DEBUG("mca_init : begin\n");
+	/* Clear the Rendez checkin flag for all cpus */
+	for(i = 0 ; i < IA64_MAXCPUS; i++)
+		ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+
+	/* NOTE : The actual irqs for the rendez, wakeup and 
+	 * cmc interrupts are requested in the platform-specific
+	 * mca initialization code.
+	 */
+	/* 
+	 * Register the rendezvous spinloop and wakeup mechanism with SAL
+	 */
+
+	/* Register the rendezvous interrupt vector with SAL */
+	if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT,
+				   SAL_MC_PARAM_MECHANISM_INT,
+				   IA64_MCA_RENDEZ_INT_VECTOR,
+				   IA64_MCA_RENDEZ_TIMEOUT))
+		return;
+
+	/* Register the wakeup interrupt vector with SAL */
+	if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP,
+				   SAL_MC_PARAM_MECHANISM_INT,
+				   IA64_MCA_WAKEUP_INT_VECTOR,
+				   0))
+		return;
+
+	MCA_DEBUG("mca_init : registered mca rendezvous spinloop and wakeup mech.\n");
+	/*
+	 * Setup the correctable machine check vector
+	 */
+	ia64_mca_cmc_vector_setup(IA64_CMC_INT_ENABLE, 
+				  IA64_MCA_CMC_INT_VECTOR);
+
+	MCA_DEBUG("mca_init : correctable mca vector setup done\n");
+
+	ia64_mc_info.imi_mca_handler 		= __pa(ia64_os_mca_dispatch);
+	ia64_mc_info.imi_mca_handler_size	= 
+		__pa(ia64_os_mca_dispatch_end) - __pa(ia64_os_mca_dispatch);
+	/* Register the os mca handler with SAL */
+	if (ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
+				 ia64_mc_info.imi_mca_handler,
+				 __pa(ia64_get_gp()),
+				 ia64_mc_info.imi_mca_handler_size,
+				 0,0,0))
+
+		return;
+
+	MCA_DEBUG("mca_init : registered os mca handler with SAL\n");
+
+	ia64_mc_info.imi_monarch_init_handler 		= __pa(ia64_monarch_init_handler);
+	ia64_mc_info.imi_monarch_init_handler_size	= IA64_INIT_HANDLER_SIZE;
+	ia64_mc_info.imi_slave_init_handler 		= __pa(ia64_slave_init_handler);
+	ia64_mc_info.imi_slave_init_handler_size	= IA64_INIT_HANDLER_SIZE;
+	/* Register the os init handler with SAL */
+	if (ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
+				 ia64_mc_info.imi_monarch_init_handler,
+				 __pa(ia64_get_gp()),
+				 ia64_mc_info.imi_monarch_init_handler_size,
+				 ia64_mc_info.imi_slave_init_handler,
+				 __pa(ia64_get_gp()),
+				 ia64_mc_info.imi_slave_init_handler_size))
+
+
+		return;
+
+	MCA_DEBUG("mca_init : registered os init handler with SAL\n");
+
+	/* Initialize the areas set aside by the OS to buffer the 
+	 * platform/processor error states for MCA/INIT/CMC
+	 * handling.
+	 */
+	ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR);
+	ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM);
+	ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PROCESSOR);
+	ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PLATFORM);
+	ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR);
+	ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM);
+
+	mca_init_platform();
+	
+	MCA_DEBUG("mca_init : platform-specific mca handling setup done\n");
+
+#if defined(MCA_TEST)
+	mca_test();
+#endif /* #if defined(MCA_TEST) */
+
+	printk("Mca related initialization done\n");
+}
+
+/*
+ * ia64_mca_wakeup_ipi_wait
+ *	Wait for the inter-cpu interrupt to be sent by the
+ * 	monarch processor once it is done with handling the
+ *	MCA.
+ * Inputs 
+ *	None
+ * Outputs
+ *	None
+ */
+void
+ia64_mca_wakeup_ipi_wait(void)
+{
+	int	irr_num = (IA64_MCA_WAKEUP_INT_VECTOR >> 6);
+	int	irr_bit = (IA64_MCA_WAKEUP_INT_VECTOR & 0x3f);
+	u64	irr = 0;
+
+	do {
+		switch(irr_num) {
+		case 0:
+			irr = ia64_get_irr0();
+			break;
+		case 1:
+			irr = ia64_get_irr1();
+			break;
+		case 2:
+			irr = ia64_get_irr2();
+			break;
+		case 3:
+			irr = ia64_get_irr3();
+			break;
+		}
+	} while (!(irr & (1 << irr_bit))) ;
+}
+
+/*
+ * ia64_mca_wakeup
+ *	Send an inter-cpu interrupt to wake-up a particular cpu
+ * 	and mark that cpu to be out of rendez.
+ * Inputs
+ *	cpuid
+ * Outputs
+ *	None
+ */
+void
+ia64_mca_wakeup(int cpu)
+{
+	ipi_send(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT);
+	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+	
+}
+/*
+ * ia64_mca_wakeup_all
+ *	Wakeup all the cpus which have rendez'ed previously.
+ * Inputs
+ *	None
+ * Outputs
+ *	None
+ */
+void
+ia64_mca_wakeup_all(void)
+{
+	int cpu;
+
+	/* Clear the Rendez checkin flag for all cpus */
+	for(cpu = 0 ; cpu < IA64_MAXCPUS; cpu++)
+		if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE)
+			ia64_mca_wakeup(cpu);
+
+}
+/*
+ * ia64_mca_rendez_interrupt_handler
+ *	This is handler used to put slave processors into spinloop 
+ *	while the monarch processor does the mca handling and later
+ *	wake each slave up once the monarch is done.
+ * Inputs 
+ *	None
+ * Outputs
+ *	None
+ */
+void
+ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs)
+{
+	int flags;
+	/* Mask all interrupts */
+        save_and_cli(flags);
+
+	ia64_mc_info.imi_rendez_checkin[ia64_get_cpuid(0)] = IA64_MCA_RENDEZ_CHECKIN_DONE;
+	/* Register with the SAL monarch that the slave has
+	 * reached SAL
+	 */
+	ia64_sal_mc_rendez();
+
+	/* Wait for the wakeup IPI from the monarch 
+	 * This waiting is done by polling on the wakeup-interrupt
+	 * vector bit in the processor's IRRs
+	 */
+	ia64_mca_wakeup_ipi_wait();
+
+	/* Enable all interrupts */
+	restore_flags(flags);
+
+
+}
+
+
+/*
+ * ia64_mca_wakeup_int_handler
+ *	The interrupt handler for processing the inter-cpu interrupt to the
+ * 	slave cpu which was spinning in the rendez loop.
+ *	Since this spinning is done by turning off the interrupts and
+ *	polling on the wakeup-interrupt bit in the IRR, there is 
+ *	nothing useful to be done in the handler.
+ *  Inputs
+ *	wakeup_irq	(Wakeup-interrupt bit)
+ *	arg		(Interrupt handler specific argument)
+ *	ptregs		(Exception frame at the time of the interrupt)
+ *  Outputs
+ *	
+ */
+void
+ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs)
+{
+	
+}
+
+/*
+ * ia64_return_to_sal_check
+ *	This is function called before going back from the OS_MCA handler
+ * 	to the OS_MCA dispatch code which finally takes the control back
+ * 	to the SAL.
+ *	The main purpose of this routine is to setup the OS_MCA to SAL
+ *	return state which can be used by the OS_MCA dispatch code 
+ *	just before going back to SAL.
+ * Inputs
+ *	None
+ * Outputs
+ *	None
+ */
+
+void
+ia64_return_to_sal_check(void)
+{
+	/* Copy over some relevant stuff from the sal_to_os_mca_handoff
+	 * so that it can be used at the time of os_mca_to_sal_handoff
+	 */
+	ia64_os_to_sal_handoff_state.imots_sal_gp = 
+		ia64_sal_to_os_handoff_state.imsto_sal_gp;
+
+	ia64_os_to_sal_handoff_state.imots_sal_check_ra = 
+		ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
+
+	/* For now ignore the MCA */
+	ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
+}
+/* 
+ * ia64_mca_ucmc_handler
+ *	This is uncorrectable machine check handler called from OS_MCA
+ *	dispatch code which is in turn called from SAL_CHECK().
+ *	This is the place where the core of OS MCA handling is done.
+ *	Right now the logs are extracted and displayed in a well-defined
+ *	format. This handler code is supposed to be run only on the
+ *	monarch processor. Once the  monarch is done with MCA handling
+ *	further MCA logging is enabled by clearing logs.
+ *	Monarch also has the duty of sending wakeup-IPIs to pull the
+ *	slave processors out of rendez. spinloop.
+ * Inputs
+ *	None
+ * Outputs
+ *	None
+ */
+void
+ia64_mca_ucmc_handler(void)
+{
+
+	/* Get the MCA processor log */
+	ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+	/* Get the MCA platform log */
+	ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk);
+
+	ia64_log_print(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+
+	/* 
+	 * Do some error handling - Platform-specific mca handler is called at this point
+	 */
+
+	mca_handler_platform() ;
+
+	/* Clear the SAL MCA logs */
+	ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, 1, printk);
+	ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, 1, printk);
+
+	/* Wakeup all the processors which are spinning in the rendezvous
+	 * loop.
+	 */
+	ia64_mca_wakeup_all();
+	ia64_return_to_sal_check();
+}
+
+/*
+ * SAL to OS entry point for INIT on the monarch processor
+ * This has been defined for registration purposes with SAL 
+ * as a part of mca_init.
+ */
+void
+ia64_monarch_init_handler()
+{
+}
+/*
+ * SAL to OS entry point for INIT on the slave processor
+ * This has been defined for registration purposes with SAL 
+ * as a part of mca_init.
+ */
+
+void
+ia64_slave_init_handler()
+{
+}
+/* 
+ * ia64_mca_cmc_int_handler
+ *	This is correctable machine check interrupt handler.
+ *	Right now the logs are extracted and displayed in a well-defined
+ *	format. 
+ * Inputs
+ *	None
+ * Outputs
+ *	None
+ */
+void
+ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
+{	
+	/* Get the CMC processor log */
+	ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+	/* Get the CMC platform log */
+	ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk);
+
+
+	ia64_log_print(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+	cmci_handler_platform(cmc_irq, arg, ptregs);
+
+	/* Clear the CMC SAL logs now that they have been saved in the OS buffer */
+	ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR);
+	ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM);
+}
+
+/*
+ * IA64_MCA log support
+ */
+#define IA64_MAX_LOGS		2	/* Double-buffering for nested MCAs */
+#define IA64_MAX_LOG_TYPES	3	/* MCA, CMC, INIT */
+#define IA64_MAX_LOG_SUBTYPES	2	/* Processor, Platform */
+
+typedef struct ia64_state_log_s {
+	spinlock_t		isl_lock;
+	int			isl_index;
+	sal_log_header_t	isl_log[IA64_MAX_LOGS];
+
+} ia64_state_log_t;
+
+static ia64_state_log_t	ia64_state_log[IA64_MAX_LOG_TYPES][IA64_MAX_LOG_SUBTYPES];
+
+#define IA64_LOG_LOCK_INIT(it, sit)	spin_lock_init(&ia64_state_log[it][sit].isl_lock)
+#define IA64_LOG_LOCK(it, sit)	 	spin_lock_irqsave(&ia64_state_log[it][sit].isl_lock, s)
+#define IA64_LOG_UNLOCK(it, sit) 	spin_unlock_irqrestore(&ia64_state_log[it][sit].isl_lock,\
+							       s)
+#define IA64_LOG_NEXT_INDEX(it, sit)	ia64_state_log[it][sit].isl_index
+#define IA64_LOG_CURR_INDEX(it, sit) 	1 - ia64_state_log[it][sit].isl_index
+#define IA64_LOG_INDEX_INC(it, sit)	\
+	ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index
+#define IA64_LOG_INDEX_DEC(it, sit)	\
+	ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index
+#define IA64_LOG_NEXT_BUFFER(it, sit) 	(void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_NEXT_INDEX(it,sit)]))
+#define IA64_LOG_CURR_BUFFER(it, sit) 	(void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_CURR_INDEX(it,sit)]))
+
+/*
+ * ia64_log_init
+ * 	Reset the OS ia64 log buffer
+ * Inputs 	:	info_type 	(SAL_INFO_TYPE_{MCA,INIT,CMC})
+ *			sub_info_type	(SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * Outputs	: 	None
+ */
+void
+ia64_log_init(int sal_info_type, int sal_sub_info_type)
+{
+	IA64_LOG_LOCK_INIT(sal_info_type, sal_sub_info_type);
+	IA64_LOG_NEXT_INDEX(sal_info_type, sal_sub_info_type) = 0;
+	memset(IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type), 0, 
+	       sizeof(sal_log_header_t) * IA64_MAX_LOGS);
+}
+
+/* 
+ * ia64_log_get
+ * 	Get the current MCA log from SAL and copy it into the OS log buffer.
+ * Inputs 	:	info_type 	(SAL_INFO_TYPE_{MCA,INIT,CMC})
+ *			sub_info_type	(SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * Outputs	: 	None
+ *
+ */
+void
+ia64_log_get(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc)
+{
+	sal_log_header_t	*log_buffer;
+	int			s;
+
+	IA64_LOG_LOCK(sal_info_type, sal_sub_info_type);
+
+
+	/* Get the process state information */
+	log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type);
+
+	if (ia64_sal_get_state_info(sal_info_type, sal_sub_info_type ,(u64 *)log_buffer))
+		prfunc("ia64_mca_log_get : Getting processor log failed\n");
+
+	IA64_LOG_INDEX_INC(sal_info_type, sal_sub_info_type);
+
+	IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type);
+
+}
+
+/* 
+ * ia64_log_clear
+ * 	Clear the current MCA log from SAL and dpending on the clear_os_buffer flags
+ *	clear the OS log buffer also
+ * Inputs 	:	info_type 	(SAL_INFO_TYPE_{MCA,INIT,CMC})
+ *			sub_info_type	(SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ *			clear_os_buffer 
+ *			prfunc		(print function)
+ * Outputs	: 	None
+ *
+ */
+void
+ia64_log_clear(int sal_info_type, int sal_sub_info_type, int clear_os_buffer, prfunc_t prfunc)
+{
+	if (ia64_sal_clear_state_info(sal_info_type, sal_sub_info_type))
+		prfunc("ia64_mca_log_get : Clearing processor log failed\n");
+
+	if (clear_os_buffer) {
+		sal_log_header_t	*log_buffer;
+		int			s;
+
+		IA64_LOG_LOCK(sal_info_type, sal_sub_info_type);
+
+		/* Get the process state information */
+		log_buffer = IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type);
+
+		memset(log_buffer, 0, sizeof(sal_log_header_t));
+
+		IA64_LOG_INDEX_DEC(sal_info_type, sal_sub_info_type);
+	
+		IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type);
+	}
+
+}
+
+/*
+ * ia64_log_processor_regs_print
+ *	Print the contents of the saved processor register(s) in the format
+ *		<reg_prefix>[<index>] <value>
+ *		
+ * Inputs	:	regs 		(Register save buffer)
+ *			reg_num 	(# of registers)
+ *			reg_class	(application/banked/control/bank1_general)
+ *			reg_prefix	(ar/br/cr/b1_gr)
+ * Outputs	: 	None
+ *
+ */
+void
+ia64_log_processor_regs_print(u64 	*regs, 
+			      int 	reg_num,
+			      char 	*reg_class,
+			      char 	*reg_prefix,
+			      prfunc_t	prfunc)
+{
+	int i;
+
+	prfunc("+%s Registers\n", reg_class);
+	for (i = 0; i < reg_num; i++)
+		prfunc("+ %s[%d] 0x%lx\n", reg_prefix, i, regs[i]);
+}
+
+static char *pal_mesi_state[] = {
+	"Invalid",
+	"Shared",
+	"Exclusive",
+	"Modified",
+	"Reserved1",
+	"Reserved2",
+	"Reserved3",
+	"Reserved4"
+};
+
+static char *pal_cache_op[] = {
+	"Unknown",
+	"Move in",
+	"Cast out",
+	"Coherency check",
+	"Internal",
+	"Instruction fetch",
+	"Implicit Writeback",
+	"Reserved"
+};
+
+/*
+ * ia64_log_cache_check_info_print
+ *	Display the machine check information related to cache error(s).
+ * Inputs	:	i		(Multiple errors are logged, i - index of logged error)
+ *			info		(Machine check info logged by the PAL and later 
+ *					 captured by the SAL)
+ *			target_addr	(Address which caused the cache error)
+ * Outputs	: 	None
+ */
+void 
+ia64_log_cache_check_info_print(int 			i, 
+				pal_cache_check_info_t 	info,
+				u64			target_addr,
+				prfunc_t		prfunc)
+{
+	prfunc("+ Cache check info[%d]\n+", i);
+	prfunc("  Level: L%d",info.level);
+	if (info.mv)
+		prfunc(" ,Mesi: %s",pal_mesi_state[info.mesi]);
+	prfunc(" ,Index: %d,", info.index);
+	if (info.ic)
+		prfunc(" ,Cache: Instruction");
+	if (info.dc)
+		prfunc(" ,Cache: Data");
+	if (info.tl)
+		prfunc(" ,Line: Tag");
+	if (info.dl)
+		prfunc(" ,Line: Data");
+	prfunc(" ,Operation: %s,", pal_cache_op[info.op]);
+	if (info.wv)
+		prfunc(" ,Way: %d,", info.way);
+	if (info.tv)
+		prfunc(" ,Target Addr: 0x%lx", target_addr);
+	if (info.mc)
+		prfunc(" ,MC: Corrected");
+	prfunc("\n");
+}
+
+/*
+ * ia64_log_tlb_check_info_print
+ *	Display the machine check information related to tlb error(s).
+ * Inputs	:	i		(Multiple errors are logged, i - index of logged error)
+ *			info		(Machine check info logged by the PAL and later 
+ *					 captured by the SAL)
+ * Outputs	: 	None
+ */
+
+void
+ia64_log_tlb_check_info_print(int 			i,
+			      pal_tlb_check_info_t	info,
+			      prfunc_t			prfunc)
+{
+	prfunc("+ TLB Check Info [%d]\n+", i);
+	if (info.itc)
+		prfunc("  Failure: Instruction Translation Cache");
+	if (info.dtc)
+		prfunc("  Failure: Data Translation Cache");
+	if (info.itr) {
+		prfunc("  Failure: Instruction Translation Register");
+		prfunc(" ,Slot: %d", info.tr_slot);
+	}
+	if (info.dtr) {
+		prfunc("  Failure: Data Translation Register");
+		prfunc(" ,Slot: %d", info.tr_slot);
+	}
+	if (info.mc)
+		prfunc(" ,MC: Corrected");
+	prfunc("\n");
+}
+
+/*
+ * ia64_log_bus_check_info_print
+ *	Display the machine check information related to bus error(s).
+ * Inputs	:	i		(Multiple errors are logged, i - index of logged error)
+ *			info		(Machine check info logged by the PAL and later 
+ *					 captured by the SAL)
+ *			req_addr	(Address of the requestor of the transaction)
+ *			resp_addr	(Address of the responder of the transaction)
+ *			target_addr	(Address where the data was to be delivered to  or
+ *					 obtained from)
+ * Outputs	: 	None
+ */
+void
+ia64_log_bus_check_info_print(int 			i,
+			      pal_bus_check_info_t	info,
+			      u64			req_addr,
+			      u64			resp_addr,
+			      u64			targ_addr,
+			      prfunc_t			prfunc)
+{
+	prfunc("+ BUS Check Info [%d]\n+", i);
+	prfunc(" Status Info: %d", info.bsi);
+	prfunc(" ,Severity: %d", info.sev);
+	prfunc(" ,Transaction Type: %d", info.type);
+	prfunc(" ,Transaction Size: %d", info.size);
+	if (info.cc)
+		prfunc(" ,Cache-cache-transfer");
+	if (info.ib)
+		prfunc(" ,Error: Internal");
+	if (info.eb)
+		prfunc(" ,Error: External");
+	if (info.mc)
+		prfunc(" ,MC: Corrected");
+	if (info.tv)
+		prfunc(" ,Target Address: 0x%lx", targ_addr);
+	if (info.rq)
+		prfunc(" ,Requestor Address: 0x%lx", req_addr);
+	if (info.tv)
+		prfunc(" ,Responder Address: 0x%lx", resp_addr);
+	prfunc("\n");
+}
+
+/*
+ * ia64_log_processor_info_print
+ *	Display the processor-specific information logged by PAL as a part
+ *	of MCA or INIT or CMC.
+ * Inputs 	:	lh	(Pointer of the sal log header which specifies the format
+ *				 of SAL state info as specified by the SAL spec).
+ * Outputs	: 	None
+ */
+void
+ia64_log_processor_info_print(sal_log_header_t *lh, prfunc_t prfunc)
+{
+	sal_log_processor_info_t	*slpi;
+	int				i;
+
+	if (!lh)
+		return;
+
+	if (lh->slh_log_type != SAL_SUB_INFO_TYPE_PROCESSOR)
+		return;
+
+#if defined(MCA_TEST)
+	slpi = &slpi_buf;
+#else
+	slpi = (sal_log_processor_info_t *)lh->slh_log_dev_spec_info;
+#endif /#if defined(MCA_TEST) */
+
+	if (!slpi) {
+		prfunc("No Processor Error Log found\n");
+		return;
+	}
+
+	/* Print branch register contents if valid */
+	if (slpi->slpi_valid.slpi_br) 
+		ia64_log_processor_regs_print(slpi->slpi_br, 8, "Branch", "br", prfunc);
+
+	/* Print control register contents if valid */
+	if (slpi->slpi_valid.slpi_cr) 
+		ia64_log_processor_regs_print(slpi->slpi_cr, 128, "Control", "cr", prfunc);
+
+	/* Print application register contents if valid */
+	if (slpi->slpi_valid.slpi_ar) 
+		ia64_log_processor_regs_print(slpi->slpi_br, 128, "Application", "ar", prfunc);
+
+	/* Print region register contents if valid */
+	if (slpi->slpi_valid.slpi_rr) 
+		ia64_log_processor_regs_print(slpi->slpi_rr, 8, "Region", "rr", prfunc);
+
+	/* Print floating-point register contents if valid */
+	if (slpi->slpi_valid.slpi_fr) 
+		ia64_log_processor_regs_print(slpi->slpi_fr, 128, "Floating-point", "fr", 
+					      prfunc);
+
+	/* Print bank1-gr NAT register contents if valid */
+	ia64_log_processor_regs_print(&slpi->slpi_bank1_nat_bits, 1, "NAT", "nat", prfunc);
+
+	/* Print bank 1 register contents if valid */
+	if (slpi->slpi_valid.slpi_bank1_gr) 
+		ia64_log_processor_regs_print(slpi->slpi_bank1_gr, 16, "Bank1-General", "gr",
+					      prfunc);
+
+	/* Print the cache check information if any*/
+	for (i = 0 ; i < MAX_CACHE_ERRORS; i++)
+		ia64_log_cache_check_info_print(i, 
+					slpi->slpi_cache_check_info[i].slpi_cache_check,
+					slpi->slpi_cache_check_info[i].slpi_target_address,
+						prfunc);
+	/* Print the tlb check information if any*/
+	for (i = 0 ; i < MAX_TLB_ERRORS; i++)
+		ia64_log_tlb_check_info_print(i,slpi->slpi_tlb_check_info[i], prfunc);
+
+	/* Print the bus check information if any*/
+	for (i = 0 ; i < MAX_BUS_ERRORS; i++)
+		ia64_log_bus_check_info_print(i, 
+					slpi->slpi_bus_check_info[i].slpi_bus_check,
+					slpi->slpi_bus_check_info[i].slpi_requestor_addr,
+					slpi->slpi_bus_check_info[i].slpi_responder_addr,
+					slpi->slpi_bus_check_info[i].slpi_target_addr,
+					      prfunc);
+
+}
+
+/*
+ * ia64_log_print
+ * 	Display the contents of the OS error log information
+ * Inputs 	:	info_type 	(SAL_INFO_TYPE_{MCA,INIT,CMC})
+ *			sub_info_type	(SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * Outputs	: 	None
+ */
+void
+ia64_log_print(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc)
+{
+	char 	*info_type, *sub_info_type;
+
+	switch(sal_info_type) {
+	case SAL_INFO_TYPE_MCA:
+		info_type = "MCA";
+		break;
+	case SAL_INFO_TYPE_INIT:
+		info_type = "INIT";
+		break;
+	case SAL_INFO_TYPE_CMC:
+		info_type = "CMC";
+		break;
+	default:
+		info_type = "UNKNOWN";
+		break;
+	}
+
+	switch(sal_sub_info_type) {
+	case SAL_SUB_INFO_TYPE_PROCESSOR:
+		sub_info_type = "PROCESSOR";
+		break;
+	case SAL_SUB_INFO_TYPE_PLATFORM:
+		sub_info_type = "PLATFORM";
+		break;
+	default:
+		sub_info_type = "UNKNOWN";
+		break;
+	}
+
+	prfunc("+BEGIN HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type);
+	if (sal_sub_info_type == SAL_SUB_INFO_TYPE_PROCESSOR)
+		ia64_log_processor_info_print(
+				      IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type),
+				      prfunc);
+	else
+		log_print_platform(IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type),prfunc);
+	prfunc("+END HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type);
+}
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
new file mode 100644
index 000000000..3d49ac06e
--- /dev/null
+++ b/arch/ia64/kernel/mca_asm.S
@@ -0,0 +1,621 @@
+#include <asm/processor.h>
+#include <asm/mcaasm.h>
+#include <asm/page.h>
+#include <asm/mca.h>
+	
+	.psr abi64
+	.psr lsb
+	.lsb
+
+/*
+ *	SAL_TO_OS_MCA_HANDOFF_STATE
+ *		1. GR1 = OS GP
+ *		2. GR8 = PAL_PROC physical address
+ *		3. GR9 = SAL_PROC physical address
+ *		4. GR10 = SAL GP (physical)
+ *		5. GR11 = Rendez state
+ *		6. GR12 = Return address to location within SAL_CHECK
+ */
+#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp)		\
+        movl	_tmp=ia64_sal_to_os_handoff_state;;	\
+        st8	[_tmp]=r1,0x08;;			\
+        st8	[_tmp]=r8,0x08;;			\
+        st8	[_tmp]=r9,0x08;;			\
+        st8	[_tmp]=r10,0x08;;			\
+        st8	[_tmp]=r11,0x08;;			\
+        st8	[_tmp]=r12,0x08;;
+
+/*
+ *	OS_MCA_TO_SAL_HANDOFF_STATE
+ *		1. GR8 = OS_MCA status
+ *		2. GR9 = SAL GP (physical)
+ *		3. GR22 = New min state save area pointer
+ */	
+#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp)	\
+        movl	_tmp=ia64_os_to_sal_handoff_state;;	\
+	DATA_VA_TO_PA(_tmp);;				\
+        ld8	r8=[_tmp],0x08;;			\
+        ld8	r9=[_tmp],0x08;;			\
+        ld8	r22=[_tmp],0x08;;
+
+/*
+ *	BRANCH
+ *		Jump to the instruction referenced by
+ *	"to_label".
+ *		Branch is taken only if the predicate
+ *	register "p" is true.
+ *		"ip" is the address of the instruction
+ *	located at "from_label".
+ *		"temp" is a scratch register like r2
+ *		"adjust" needed for HP compiler. 
+ *	A screwup somewhere with constant arithmetic.
+ */	
+#define BRANCH(to_label, temp, p, adjust)		\
+100:	(p)	mov		temp=ip;		\
+		;;					\
+	(p)	adds		temp=to_label-100b,temp;\
+	(p)	adds		temp=adjust,temp;	\
+	(p)	mov		b1=temp	;		\
+	(p)	br		b1	
+
+	.global ia64_os_mca_dispatch
+	.global ia64_os_mca_dispatch_end
+	.global ia64_sal_to_os_handoff_state
+	.global	ia64_os_to_sal_handoff_state
+	.global	ia64_os_mca_ucmc_handler
+	.global	ia64_mca_proc_state_dump
+	.global ia64_mca_proc_state_restore
+	.global	ia64_mca_stack
+	.global	ia64_mca_stackframe
+	.global	ia64_mca_bspstore
+			
+	.text
+	.align 16
+	
+ia64_os_mca_dispatch:
+
+#if defined(MCA_TEST)
+	// Pretend that we are in interrupt context
+	mov		r2=psr
+	dep		r2=0, r2, PSR_IC, 2;
+	mov		psr.l = r2
+#endif	/* #if defined(MCA_TEST) */
+
+	// Save the SAL to OS MCA handoff state as defined
+	// by SAL SPEC 2.5
+	// NOTE : The order in which the state gets saved
+	//	  is dependent on the way the C-structure
+	//	  for ia64_mca_sal_to_os_state_t has been
+	//	  defined in include/asm/mca.h
+	SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+
+	// LOG PROCESSOR STATE INFO FROM HERE ON..
+	;;
+begin_os_mca_dump:
+	BRANCH(ia64_os_mca_proc_state_dump, r2, p0, 0x0)
+        ;;
+ia64_os_mca_done_dump:
+
+	// Setup new stack frame for OS_MCA handling 
+        movl        r2=ia64_mca_bspstore		// local bspstore area location in r2
+        movl        r3=ia64_mca_stackframe		// save stack frame to memory in r3
+        rse_switch_context(r6,r3,r2);;                  // RSC management in this new context
+        movl        r12=ia64_mca_stack;;
+
+	// Enter virtual mode from physical mode
+	VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
+ia64_os_mca_virtual_begin:
+
+	// call our handler
+        movl		r2=ia64_mca_ucmc_handler;;
+        mov		b6=r2;;
+        br.call.sptk.few	b0=b6
+        ;;
+
+	// Revert back to physical mode before going back to SAL
+	PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
+ia64_os_mca_virtual_end:
+
+#if defined(MCA_TEST)
+	// Pretend that we are in interrupt context
+	mov		r2=psr
+	dep		r2=0, r2, PSR_IC, 2;
+	mov		psr.l = r2
+#endif	/* #if defined(MCA_TEST) */
+
+	// restore the original stack frame here
+        movl    r2=ia64_mca_stackframe               // restore stack frame from memory at r2
+	;;
+	DATA_VA_TO_PA(r2)
+        movl    r4=IA64_PSR_MC
+	;;
+        rse_return_context(r4,r3,r2)                 // switch from interrupt context for RSE
+	
+	// let us restore all the registers from our PSI structure
+        mov		r8=gp
+	;;
+begin_os_mca_restore:
+	BRANCH(ia64_os_mca_proc_state_restore, r2, p0, 0x0)
+        ;;
+
+ia64_os_mca_done_restore:
+	;;
+#ifdef SOFTSDV
+	VIRTUAL_MODE_ENTER(r2,r3, vmode_enter, r4)
+vmode_enter:	
+	br.ret.sptk.few		b0	
+#else
+	// branch back to SALE_CHECK
+	OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2)
+        ld8		r3=[r2];;              
+        mov		b0=r3                       // SAL_CHECK return address
+        br		b0
+        ;;
+#endif /* #ifdef SOFTSDV */
+ia64_os_mca_dispatch_end:	
+//EndMain//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//      ia64_os_mca_proc_state_dump()
+// 
+// Stub Description:
+//
+//       This stub dumps the processor state during MCHK to a data area
+//
+//--
+
+ia64_os_mca_proc_state_dump:
+// Get and save GR0-31 from Proc. Min. State Save Area to SAL PSI
+        movl		r2=ia64_mca_proc_state_dump;;           // Os state dump area
+
+// save ar.NaT 
+        mov		r5=ar.unat                  // ar.unat
+
+// save banked GRs 16-31 along with NaT bits
+        bsw.1;;
+        st8.spill	[r2]=r16,8;;
+        st8.spill	[r2]=r17,8;;
+        st8.spill	[r2]=r18,8;;
+        st8.spill	[r2]=r19,8;;
+        st8.spill	[r2]=r20,8;;
+        st8.spill	[r2]=r21,8;;
+        st8.spill	[r2]=r22,8;;
+        st8.spill	[r2]=r23,8;;
+        st8.spill	[r2]=r24,8;;
+        st8.spill	[r2]=r25,8;;
+        st8.spill	[r2]=r26,8;;
+        st8.spill	[r2]=r27,8;;
+        st8.spill	[r2]=r28,8;;
+        st8.spill	[r2]=r29,8;;
+        st8.spill	[r2]=r30,8;;
+        st8.spill	[r2]=r31,8;;
+
+        mov		r4=ar.unat;;
+        st8		[r2]=r4,8                // save User NaT bits for r16-r31
+        mov		ar.unat=r5                  // restore original unat
+        bsw.0;;
+
+//save BRs
+        add		r4=8,r2                  // duplicate r2 in r4
+        add		r6=2*8,r2                // duplicate r2 in r4
+
+        mov		r3=b0
+        mov		r5=b1
+        mov		r7=b2;;
+        st8		[r2]=r3,3*8
+        st8		[r4]=r5,3*8
+        st8		[r6]=r7,3*8;;  
+
+        mov		r3=b3
+        mov		r5=b4
+        mov		r7=b5;;
+        st8		[r2]=r3,3*8
+        st8		[r4]=r5,3*8
+        st8		[r6]=r7,3*8;;  
+
+        mov		r3=b6
+        mov		r5=b7;;
+        st8		[r2]=r3,2*8
+        st8		[r4]=r5,2*8;;
+
+cSaveCRs:
+// save CRs
+        add		r4=8,r2                  // duplicate r2 in r4
+        add		r6=2*8,r2                // duplicate r2 in r4
+
+        mov		r3=cr0                      // cr.dcr
+        mov		r5=cr1                      // cr.itm
+        mov		r7=cr2;;                    // cr.iva
+
+        st8		[r2]=r3,8*8
+        st8		[r4]=r5,3*8
+        st8		[r6]=r7,3*8;;            // 48 byte rements
+
+        mov		r3=cr8;;                    // cr.pta
+        st8		[r2]=r3,8*8;;            // 64 byte rements
+
+// if PSR.ic=0, reading interruption registers causes an illegal operation fault
+        mov		r3=psr;;
+        tbit.nz.unc	p2,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
+(p2)    st8		[r2]=r0,9*8+160             // increment by 168 byte inc.
+begin_skip_intr_regs:	
+	BRANCH(SkipIntrRegs,  r9, p2, 0x0)
+	;; 
+        add		r4=8,r2                  // duplicate r2 in r4
+        add		r6=2*8,r2                // duplicate r2 in r6
+        
+        mov		r3=cr16                     // cr.ipsr
+        mov		r5=cr17                     // cr.isr
+        mov		r7=r0;;                     // cr.ida => cr18
+        st8		[r2]=r3,3*8
+        st8		[r4]=r5,3*8
+        st8		[r6]=r7,3*8;;                                      
+
+        mov		r3=cr19                     // cr.iip
+        mov		r5=cr20                     // cr.idtr
+        mov		r7=cr21;;                   // cr.iitr
+        st8		[r2]=r3,3*8
+        st8		[r4]=r5,3*8
+        st8		[r6]=r7,3*8;;                                      
+
+        mov		r3=cr22                     // cr.iipa
+        mov		r5=cr23                     // cr.ifs
+        mov		r7=cr24;;                   // cr.iim
+        st8		[r2]=r3,3*8
+        st8		[r4]=r5,3*8
+        st8		[r6]=r7,3*8;;    
+                                          
+        mov		r3=cr25;;                   // cr.iha
+        st8		[r2]=r3,160;;               // 160 byte rement
+
+SkipIntrRegs:
+        st8		[r2]=r0,168                 // another 168 byte .
+
+        mov		r3=cr66;;                   // cr.lid
+        st8		[r2]=r3,40                  // 40 byte rement
+
+        mov		r3=cr71;;                   // cr.ivr
+        st8		[r2]=r3,8
+
+        mov		r3=cr72;;                   // cr.tpr
+        st8		[r2]=r3,24                  // 24 byte increment
+    
+        mov		r3=r0;;                     // cr.eoi => cr75
+        st8		[r2]=r3,168                 // 168 byte inc.
+    
+        mov		r3=r0;;                     // cr.irr0 => cr96
+        st8		[r2]=r3,16               // 16 byte inc.
+
+        mov		r3=r0;;                     // cr.irr1 => cr98
+        st8		[r2]=r3,16               // 16 byte inc.
+
+        mov		r3=r0;;                     // cr.irr2 => cr100
+        st8		[r2]=r3,16               // 16 byte inc
+
+        mov		r3=r0;;                     // cr.irr3 => cr100
+        st8		[r2]=r3,16               // 16b inc.
+
+        mov		r3=r0;;                     // cr.itv => cr114
+        st8		[r2]=r3,16               // 16 byte inc.
+
+        mov		r3=r0;;                     // cr.pmv => cr116
+        st8		[r2]=r3,8
+
+        mov		r3=r0;;                     // cr.lrr0 => cr117
+        st8		[r2]=r3,8
+
+        mov		r3=r0;;                     // cr.lrr1 => cr118
+        st8		[r2]=r3,8
+
+        mov		r3=r0;;                     // cr.cmcv => cr119
+        st8		[r2]=r3,8*10;;
+
+cSaveARs:
+// save ARs
+        add		r4=8,r2                  // duplicate r2 in r4
+        add		r6=2*8,r2                // duplicate r2 in r6
+
+        mov		r3=ar0                      // ar.kro
+        mov		r5=ar1                      // ar.kr1
+        mov		r7=ar2;;                    // ar.kr2
+        st8		[r2]=r3,3*8
+        st8		[r4]=r5,3*8
+        st8		[r6]=r7,3*8;;
+
+        mov		r3=ar3                      // ar.kr3                               
+        mov		r5=ar4                      // ar.kr4
+        mov		r7=ar5;;                    // ar.kr5
+        st8		[r2]=r3,3*8
+        st8		[r4]=r5,3*8
+        st8		[r6]=r7,3*8;;
+
+        mov		r3=ar6                      // ar.kr6
+        mov		r5=ar7                      // ar.kr7
+        mov		r7=r0;;                     // ar.kr8
+        st8		[r2]=r3,10*8
+        st8		[r4]=r5,10*8
+        st8		[r6]=r7,10*8;;           // rement by 72 bytes
+
+        mov		r3=ar16                     // ar.rsc
+	mov		ar16=r0			    // put RSE in enforced lazy mode
+        mov		r5=ar17                     // ar.bsp
+        mov		r7=ar18;;                   // ar.bspstore
+        st8		[r2]=r3,3*8
+        st8		[r4]=r5,3*8
+        st8		[r6]=r7,3*8;;
+
+        mov		r3=ar19;;                   // ar.rnat
+        st8		[r2]=r3,8*13             // increment by 13x8 bytes
+
+        mov		r3=ar32;;                   // ar.ccv
+        st8		[r2]=r3,8*4
+
+        mov		r3=ar36;;                   // ar.unat
+        st8		[r2]=r3,8*4
+
+        mov		r3=ar40;;                   // ar.fpsr
+        st8		[r2]=r3,8*4
+
+        mov		r3=ar44;;                   // ar.itc
+        st8		[r2]=r3,160                 // 160
+
+        mov		r3=ar64;;                   // ar.pfs
+        st8		[r2]=r3,8
+
+        mov		r3=ar65;;                   // ar.lc
+        st8		[r2]=r3,8
+
+        mov		r3=ar66;;                   // ar.ec
+        st8		[r2]=r3
+        add		r2=8*62,r2               //padding
+    
+// save RRs
+        mov		ar.lc=0x08-1
+        movl		r4=0x00;;
+
+cStRR:
+        mov		r3=rr[r4];;
+        st8		[r2]=r3,8
+        add		r4=1,r4
+        br.cloop.sptk.few	cStRR
+        ;;
+end_os_mca_dump:
+	BRANCH(ia64_os_mca_done_dump, r2, p0, -0x10)
+        ;;
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+//       ia64_os_mca_proc_state_restore()
+// 
+// Stub Description:
+//
+//       This is a stub to restore the saved processor state during MCHK
+//
+//--
+
+ia64_os_mca_proc_state_restore:
+
+// Restore bank1 GR16-31 
+	movl		r2=ia64_mca_proc_state_dump	// Convert virtual address
+	;;						// of OS state dump area
+	DATA_VA_TO_PA(r2)				// to physical address
+	;;
+restore_GRs:                                    // restore bank-1 GRs 16-31
+        bsw.1;;
+        add		r3=16*8,r2;;                // to get to NaT of GR 16-31
+        ld8		r3=[r3];;
+        mov		ar.unat=r3;;                // first restore NaT
+
+        ld8.fill	r16=[r2],8;;
+        ld8.fill	r17=[r2],8;;
+        ld8.fill	r18=[r2],8;;
+        ld8.fill	r19=[r2],8;;
+        ld8.fill	r20=[r2],8;;
+        ld8.fill	r21=[r2],8;;
+        ld8.fill	r22=[r2],8;;
+        ld8.fill	r23=[r2],8;;
+        ld8.fill	r24=[r2],8;;
+        ld8.fill	r25=[r2],8;;
+        ld8.fill	r26=[r2],8;;
+        ld8.fill	r27=[r2],8;;
+        ld8.fill	r28=[r2],8;;
+        ld8.fill	r29=[r2],8;;
+        ld8.fill	r30=[r2],8;;
+        ld8.fill	r31=[r2],8;;
+
+        ld8		r3=[r2],8;;              // increment to skip NaT
+        bsw.0;;
+
+restore_BRs:
+        add		r4=8,r2                  // duplicate r2 in r4
+        add		r6=2*8,r2;;              // duplicate r2 in r4
+
+        ld8		r3=[r2],3*8
+        ld8		r5=[r4],3*8
+        ld8		r7=[r6],3*8;;  
+        mov		b0=r3
+        mov		b1=r5
+        mov		b2=r7;;
+
+        ld8		r3=[r2],3*8
+        ld8		r5=[r4],3*8
+        ld8		r7=[r6],3*8;;  
+        mov		b3=r3
+        mov		b4=r5
+        mov		b5=r7;;
+
+        ld8		r3=[r2],2*8
+        ld8		r5=[r4],2*8;;  
+        mov		b6=r3
+        mov		b7=r5;;
+
+restore_CRs:
+        add		r4=8,r2                  // duplicate r2 in r4
+        add		r6=2*8,r2;;              // duplicate r2 in r4
+
+        ld8		r3=[r2],8*8
+        ld8		r5=[r4],3*8
+        ld8		r7=[r6],3*8;;            // 48 byte increments
+        mov		cr0=r3                      // cr.dcr
+        mov		cr1=r5                      // cr.itm
+        mov		cr2=r7;;                    // cr.iva
+
+        ld8		r3=[r2],8*8;;            // 64 byte increments
+//      mov		cr8=r3                      // cr.pta
+
+
+// if PSR.ic=1, reading interruption registers causes an illegal operation fault
+        mov		r3=psr;;
+        tbit.nz.unc	p2,p0=r3,PSR_IC;;           // PSI Valid Log bit pos. test
+(p2)    st8		[r2]=r0,9*8+160             // increment by 160 byte inc.
+
+begin_rskip_intr_regs:	
+	BRANCH(rSkipIntrRegs, r9, p2, 0x0)
+	;; 
+
+        add		r4=8,r2                  // duplicate r2 in r4
+        add		r6=2*8,r2;;              // duplicate r2 in r4
+
+        ld8		r3=[r2],3*8
+        ld8		r5=[r4],3*8
+        ld8		r7=[r6],3*8;;
+        mov		cr16=r3                     // cr.ipsr
+        mov		cr17=r5                     // cr.isr is read only
+//      mov		cr18=r7;;                   // cr.ida
+
+        ld8		r3=[r2],3*8
+        ld8		r5=[r4],3*8
+        ld8		r7=[r6],3*8;;
+        mov		cr19=r3                     // cr.iip
+        mov		cr20=r5                     // cr.idtr
+        mov		cr21=r7;;                   // cr.iitr
+
+        ld8		r3=[r2],3*8
+        ld8		r5=[r4],3*8
+        ld8		r7=[r6],3*8;;
+        mov		cr22=r3                     // cr.iipa
+        mov		cr23=r5                     // cr.ifs
+        mov		cr24=r7                     // cr.iim
+
+        ld8		r3=[r2],160;;               // 160 byte increment
+        mov		cr25=r3                     // cr.iha 
+
+rSkipIntrRegs:
+        ld8		r3=[r2],168;;               // another 168 byte inc.
+
+        ld8		r3=[r2],40;;                // 40 byte increment
+        mov		cr66=r3                     // cr.lid
+
+        ld8		r3=[r2],8;;
+//      mov		cr71=r3                     // cr.ivr is read only
+        ld8		r3=[r2],24;;                // 24 byte increment
+        mov		cr72=r3                     // cr.tpr
+   
+        ld8		r3=[r2],168;;               // 168 byte inc.
+//      mov		cr75=r3                     // cr.eoi
+   
+        ld8		r3=[r2],16;;             // 16 byte inc.
+//      mov		cr96=r3                     // cr.irr0 is read only
+
+        ld8		r3=[r2],16;;             // 16 byte inc.
+//      mov		cr98=r3                     // cr.irr1 is read only
+
+        ld8		r3=[r2],16;;             // 16 byte inc
+//      mov		cr100=r3                    // cr.irr2 is read only
+
+        ld8		r3=[r2],16;;             // 16b inc.
+//      mov		cr102=r3                    // cr.irr3 is read only
+
+        ld8		r3=[r2],16;;             // 16 byte inc.
+//      mov		cr114=r3                    // cr.itv
+
+        ld8		r3=[r2],8;;
+//      mov		cr116=r3                    // cr.pmv
+        ld8		r3=[r2],8;;
+//      mov		cr117=r3                    // cr.lrr0
+        ld8		r3=[r2],8;;
+//      mov		cr118=r3                    // cr.lrr1
+        ld8		r3=[r2],8*10;;
+//      mov		cr119=r3                    // cr.cmcv
+
+restore_ARs:
+        add		r4=8,r2                  // duplicate r2 in r4
+        add		r6=2*8,r2;;              // duplicate r2 in r4
+
+        ld8		r3=[r2],3*8
+        ld8		r5=[r4],3*8
+        ld8		r7=[r6],3*8;;
+        mov		ar0=r3                      // ar.kro
+        mov		ar1=r5                      // ar.kr1
+        mov		ar2=r7;;                    // ar.kr2
+
+        ld8		r3=[r2],3*8
+        ld8		r5=[r4],3*8
+        ld8		r7=[r6],3*8;;
+        mov		ar3=r3                      // ar.kr3                               
+        mov		ar4=r5                      // ar.kr4
+        mov		ar5=r7;;                    // ar.kr5
+
+        ld8		r3=[r2],10*8
+        ld8		r5=[r4],10*8
+        ld8		r7=[r6],10*8;;
+        mov		ar6=r3                      // ar.kr6
+        mov		ar7=r5                      // ar.kr7
+//      mov		ar8=r6                      // ar.kr8
+        ;;
+
+        ld8		r3=[r2],3*8
+        ld8		r5=[r4],3*8
+        ld8		r7=[r6],3*8;;
+//      mov		ar16=r3                     // ar.rsc
+//      mov		ar17=r5                     // ar.bsp is read only
+	mov		ar16=r0			    // make sure that RSE is in enforced lazy mode
+        mov		ar18=r7;;                   // ar.bspstore
+
+        ld8		r9=[r2],8*13;;
+        mov		ar19=r9                     // ar.rnat
+
+	mov		ar16=r3			    // ar.rsc
+        ld8		r3=[r2],8*4;;
+        mov		ar32=r3                     // ar.ccv
+
+        ld8		r3=[r2],8*4;;
+        mov		ar36=r3                     // ar.unat
+
+        ld8		r3=[r2],8*4;;
+        mov		ar40=r3                     // ar.fpsr
+
+        ld8		r3=[r2],160;;               // 160
+//      mov		ar44=r3                     // ar.itc
+
+        ld8		r3=[r2],8;;
+        mov		ar64=r3                     // ar.pfs
+
+        ld8		r3=[r2],8;;
+        mov		ar65=r3                     // ar.lc
+
+        ld8		r3=[r2];;
+        mov		ar66=r3                     // ar.ec
+        add		r2=8*62,r2;;             // padding 
+    
+restore_RRs:
+        mov		r5=ar.lc
+        mov		ar.lc=0x08-1
+        movl		r4=0x00
+cStRRr:
+        ld8		r3=[r2],8;;
+//      mov		rr[r4]=r3                   // what are its access previledges?
+        add		r4=1,r4
+        br.cloop.sptk.few	cStRRr
+        ;;
+        mov		ar.lc=r5
+	;;
+end_os_mca_restore:
+	BRANCH(ia64_os_mca_done_restore, r2, p0, -0x20)
+	;; 
+//EndStub//////////////////////////////////////////////////////////////////////
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
new file mode 100644
index 000000000..1506bacc2
--- /dev/null
+++ b/arch/ia64/kernel/pal.S
@@ -0,0 +1,119 @@
+/*
+ * PAL Firmware support
+ * IA-64 Processor Programmers Reference Vol 2
+ *
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com>
+ */
+
+	.text
+	.psr abi64
+	.psr lsb
+	.lsb
+
+	.data
+pal_entry_point:
+	data8 ia64_pal_default_handler
+	.text
+
+/*
+ * Set the PAL entry point address.  This could be written in C code, but we do it here
+ * to keep it all in one module (besides, it's so trivial that it's
+ * not a big deal).
+ *
+ * in0		Address of the PAL entry point (text address, NOT a function descriptor).
+ */
+	.align 16
+	.global ia64_pal_handler_init
+	.proc ia64_pal_handler_init
+ia64_pal_handler_init:
+	alloc r3=ar.pfs,1,0,0,0
+	movl r2=pal_entry_point
+	;;
+	st8 [r2]=in0
+	br.ret.sptk.few rp
+	
+	.endp ia64_pal_handler_init	
+
+/*
+ * Default PAL call handler.  This needs to be coded in assembly because it uses
+ * the static calling convention, i.e., the RSE may not be used and calls are
+ * done via "br.cond" (not "br.call").
+ */
+	.align 16
+	.global ia64_pal_default_handler
+	.proc ia64_pal_default_handler
+ia64_pal_default_handler:
+	mov r8=-1
+	br.cond.sptk.few rp
+
+/*
+ * Make a PAL call using the static calling convention.
+ *
+ * in0         Pointer to struct ia64_pal_retval
+ * in1         Index of PAL service
+ * in2 - in4   Remaning PAL arguments
+ *
+ */
+
+#ifdef __GCC_MULTIREG_RETVALS__
+# define arg0	in0
+# define arg1	in1
+# define arg2	in2
+# define arg3	in3
+# define arg4	in4
+#else
+# define arg0	in1
+# define arg1	in2
+# define arg2	in3
+# define arg3	in4
+# define arg4	in5
+#endif
+
+	.text
+	.psr abi64
+	.psr lsb
+	.lsb
+
+	.align 16
+	.global	ia64_pal_call_static
+	.proc ia64_pal_call_static
+ia64_pal_call_static:
+	alloc	loc0 = ar.pfs,6,90,0,0
+	movl	loc2 = pal_entry_point
+1:	{
+	  mov	r28 = arg0
+	  mov	r29 = arg1
+	  mov	r8 = ip
+	}
+	;;
+	ld8	loc2 = [loc2]		// loc2 <- entry point
+	mov	r30 = arg2
+	mov	r31 = arg3
+	;;
+	mov	loc3 = psr
+	mov	loc1 = rp
+	adds	r8 = .ret0-1b,r8
+	;; 
+	rsm	psr.i
+	mov	b7 = loc2
+	mov	rp = r8
+	;; 
+	br.cond.sptk.few b7
+.ret0:	mov	psr.l = loc3
+#ifndef __GCC_MULTIREG_RETVALS__
+	st8	[in0] = r8, 8
+	;;
+	st8	[in0] = r9, 8 
+	;;
+	st8	[in0] = r10, 8
+	;;
+	st8	[in0] = r11, 8
+#endif
+	mov	ar.pfs = loc0
+	mov	rp = loc1
+	;;
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.few	b0
+	.endp ia64_pal_call_static
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
new file mode 100644
index 000000000..f86f45537
--- /dev/null
+++ b/arch/ia64/kernel/pci-dma.c
@@ -0,0 +1,56 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is for IA-64 platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ *
+ * XXX This doesn't do the right thing yet.  It appears we would have
+ * to add additional zones so we can implement the various address
+ * mask constraints that we might encounter.  A zone for memory < 32
+ * bits is obviously necessary...
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+
+#include <asm/io.h>
+
+/* Pure 2^n version of get_order */
+extern __inline__ unsigned long
+get_order (unsigned long size)
+{
+	unsigned long order = ia64_fls(size);
+
+	printk ("get_order: size=%lu, order=%lu\n", size, order);
+
+	if (order > PAGE_SHIFT)
+		order -= PAGE_SHIFT;
+	else
+		order = 0;
+	return order;
+}
+
+void *
+pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
+{
+	void *ret;
+	int gfp = GFP_ATOMIC;
+
+	if (!hwdev || hwdev->dma_mask != 0xffffffff)
+		gfp |= GFP_DMA;
+	ret = (void *)__get_free_pages(gfp, get_order(size));
+
+	if (ret) {
+		memset(ret, 0, size);
+		*dma_handle = virt_to_bus(ret);
+	}
+	return ret;
+}
+
+void
+pci_free_consistent (struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+	free_pages((unsigned long) vaddr, get_order(size));
+}
diff --git a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c
new file mode 100644
index 000000000..3bceeed8e
--- /dev/null
+++ b/arch/ia64/kernel/pci.c
@@ -0,0 +1,239 @@
+/*
+ * pci.c - Low-Level PCI Access in IA64
+ * 
+ * Derived from bios32.c of i386 tree.
+ *
+ */
+
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/malloc.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <asm/sal.h>
+
+
+#ifdef CONFIG_SMP
+# include <asm/smp.h>
+#endif
+#include <asm/irq.h>
+
+
+#undef DEBUG
+#define DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+
+spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
+
+struct pci_fixup pcibios_fixups[] = { { 0 } };
+
+#define PCI_NO_CHECKS		0x400
+#define PCI_NO_PEER_FIXUP	0x800
+
+static unsigned int pci_probe = PCI_NO_CHECKS;
+
+/* Macro to build a PCI configuration address to be passed as a parameter to SAL. */
+
+#define PCI_CONFIG_ADDRESS(dev, where) (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff))
+
+static int 
+pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+	s64 status;
+	u64 lval;
+
+	status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 1, &lval);
+	*value = lval;
+	return status;
+}
+
+static int 
+pci_conf_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+	s64 status;
+	u64 lval;
+
+	status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 2, &lval);
+	*value = lval;
+	return status;
+}
+
+static int 
+pci_conf_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+	s64 status;
+	u64 lval;
+
+	status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 4, &lval);
+	*value = lval;
+	return status;
+}
+
+static int 
+pci_conf_write_config_byte (struct pci_dev *dev, int where, u8 value)
+{
+	return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 1, value);
+}
+
+static int 
+pci_conf_write_config_word (struct pci_dev *dev, int where, u16 value)
+{
+	return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 2, value);
+}
+
+static int 
+pci_conf_write_config_dword (struct pci_dev *dev, int where, u32 value)
+{
+	return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 4, value);
+}
+
+
+static struct pci_ops pci_conf = {
+      pci_conf_read_config_byte,
+      pci_conf_read_config_word,
+      pci_conf_read_config_dword,
+      pci_conf_write_config_byte,
+      pci_conf_write_config_word,
+      pci_conf_write_config_dword
+};
+
+/*
+ * Try to find PCI BIOS.  This will always work for IA64.
+ */
+
+static struct pci_ops * __init
+pci_find_bios(void)
+{
+	return &pci_conf;
+}
+
+/*
+ * Initialization. Uses the SAL interface
+ */
+
+#define PCI_BUSSES_TO_SCAN 2	/* On "real" ;) hardware this will be 255 */
+
+void __init 
+pcibios_init(void)
+{
+	struct pci_ops *ops = NULL;
+	int i;
+
+	if ((ops = pci_find_bios()) == NULL) {
+		printk("PCI: No PCI bus detected\n");
+		return;
+	}
+
+	printk("PCI: Probing PCI hardware\n");
+	for (i = 0; i < PCI_BUSSES_TO_SCAN; i++) 
+		pci_scan_bus(i, ops, NULL);
+	platform_pci_fixup();
+	return;
+}
+
+/*
+ *  Called after each bus is probed, but before its children
+ *  are examined.
+ */
+
+void __init
+pcibios_fixup_bus(struct pci_bus *b)
+{
+	return;
+}
+
+int
+pci_assign_resource (struct pci_dev *dev, int i)
+{
+	printk("pci_assign_resource: not implemented!\n");
+	return -ENODEV;
+}
+
+void __init
+pcibios_update_resource(struct pci_dev *dev, struct resource *root,
+			struct resource *res, int resource)
+{
+        unsigned long where, size;
+        u32 reg;
+
+        where = PCI_BASE_ADDRESS_0 + (resource * 4);
+        size = res->end - res->start;
+        pci_read_config_dword(dev, where, &reg);
+        reg = (reg & size) | (((u32)(res->start - root->start)) & ~size);
+        pci_write_config_dword(dev, where, reg);
+
+	/* ??? FIXME -- record old value for shutdown.  */
+}
+
+void __init
+pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+
+	/* ??? FIXME -- record old value for shutdown.  */
+}
+
+void __init
+pcibios_fixup_pbus_ranges (struct pci_bus * bus, struct pbus_set_ranges_data * ranges)
+{
+	ranges->io_start -= bus->resource[0]->start;
+	ranges->io_end -= bus->resource[0]->start;
+	ranges->mem_start -= bus->resource[1]->start;
+	ranges->mem_end -= bus->resource[1]->start;
+}
+
+int __init
+pcibios_enable_device (struct pci_dev *dev)
+{
+	/* Not needed, since we enable all devices at startup.  */
+	return 0;
+}
+
+/*
+ * PCI BIOS setup, always defaults to SAL interface
+ */
+
+char * __init 
+pcibios_setup(char *str)
+{
+	pci_probe =  PCI_NO_CHECKS;
+	return NULL;
+}
+
+void
+pcibios_align_resource (void *data, struct resource *res, unsigned long size)
+{
+}
+
+#if 0 /*def CONFIG_PROC_FS*/
+/*
+ * This is an ugly hack to get a (weak) unresolved reference to something that is
+ * in drivers/pci/proc.c.  Without this, the file does not get linked in at all
+ * (I suspect the reason this isn't needed on Linux/x86 is that most people compile
+ * with module support, in which case the EXPORT_SYMBOL() stuff will ensure the
+ * code gets linked in.  Sigh...  --davidm 99/12/20.
+ */
+asm ("data8 proc_bus_pci_add");
+#endif
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
new file mode 100644
index 000000000..274b68a73
--- /dev/null
+++ b/arch/ia64/kernel/perfmon.c
@@ -0,0 +1,227 @@
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+
+#include <asm/errno.h>
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_PERFMON
+
+#define MAX_PERF_COUNTER	4	/* true for Itanium, at least */
+#define WRITE_PMCS_AND_START	0xa0
+#define WRITE_PMCS		0xa1
+#define READ_PMDS		0xa2
+#define STOP_PMCS		0xa3
+#define IA64_COUNTER_MASK	0xffffffffffffff6f
+#define PERF_OVFL_VAL		0xffffffff
+
+struct perfmon_counter {
+        unsigned long data;
+        int counter_num;
+};
+
+unsigned long pmds[MAX_PERF_COUNTER];
+struct task_struct *perf_owner;
+
+/*
+ * We set dcr.pp, psr.pp, and the appropriate pmc control values with
+ * this.  Notice that we go about modifying _each_ task's pt_regs to
+ * set cr_ipsr.pp.  This will start counting when "current" does an
+ * _rfi_. Also, since each task's cr_ipsr.pp, and cr_ipsr is inherited
+ * across forks, we do _not_ need additional code on context
+ * switches. On stopping of the counters we dont _need_ to go about
+ * changing every task's cr_ipsr back to where it wuz, because we can
+ * just set pmc[0]=1. But we do it anyways becuase we will probably
+ * add thread specific accounting later.
+ *
+ * The obvious problem with this is that on SMP systems, it is a bit
+ * of work (when someone wants to do it) - it would be easier if we
+ * just added code to the context-switch path.  I think we would need
+ * to lock the run queue to ensure no context switches, send an IPI to
+ * each processor, and in that IPI handler, just modify the psr bit of
+ * only the _current_ thread, since we have modified the psr bit
+ * correctly in the kernel stack for every process which is not
+ * running.  Might crash on SMP systems without the
+ * lock_kernel(). Hence the lock..
+ */
+asmlinkage unsigned long
+sys_perfmonctl (int cmd1, int cmd2, void *ptr)
+{
+        struct perfmon_counter tmp, *cptr = ptr;
+        unsigned long pmd, cnum, dcr, flags;
+        struct task_struct *p;
+        struct pt_regs *regs;
+        struct perf_counter;
+        int i;
+
+        switch (cmd1) {
+	      case WRITE_PMCS:           /* Writes to PMC's and clears PMDs */
+	      case WRITE_PMCS_AND_START: /* Also starts counting */
+
+		if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2))
+			return -EFAULT;
+
+		if (cmd2 >= MAX_PERF_COUNTER)
+			return -EFAULT;
+
+		if (perf_owner && perf_owner != current)
+			return -EBUSY;
+		perf_owner = current;
+
+		for (i = 0; i < cmd2; i++, cptr++) {
+			copy_from_user(&tmp, cptr, sizeof(tmp));
+			/* XXX need to check validity of counter_num and perhaps data!! */
+			ia64_set_pmc(tmp.counter_num, tmp.data);
+			ia64_set_pmd(tmp.counter_num, 0);
+			pmds[tmp.counter_num - 4] = 0;
+		}
+
+		if (cmd1 == WRITE_PMCS_AND_START) {
+			local_irq_save(flags);
+			dcr = ia64_get_dcr();
+			dcr |= IA64_DCR_PP;
+			ia64_set_dcr(dcr);
+			local_irq_restore(flags);
+
+			/*
+			 * This is a no can do.  It obviously wouldn't
+			 * work on SMP where another process may not
+			 * be blocked at all.
+			 *
+			 * Perhaps we need a global predicate in the
+			 * leave_kernel path to control if pp should
+			 * be on or off?
+			 */
+			lock_kernel();
+			for_each_task(p) {
+				regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1;
+				ia64_psr(regs)->pp = 1;
+			}
+			unlock_kernel();
+			ia64_set_pmc(0, 0);
+		}
+                break;
+
+	      case READ_PMDS:
+		if (cmd2 >= MAX_PERF_COUNTER)
+			return -EFAULT;
+		if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perf_counter)*cmd2))
+			return -EFAULT;
+		local_irq_save(flags);
+		/* XXX this looks wrong */
+		__asm__ __volatile__("rsm psr.pp\n");
+		dcr = ia64_get_dcr();
+		dcr &= ~IA64_DCR_PP;
+		ia64_set_dcr(dcr);
+		local_irq_restore(flags);
+
+		/*
+		 * We cannot touch pmc[0] to stop counting here, as
+		 * that particular instruction might cause an overflow
+		 * and the mask in pmc[0] might get lost. I'm not very
+		 * sure of the hardware behavior here. So we stop
+		 * counting by psr.pp = 0. And we reset dcr.pp to
+		 * prevent an interrupt from mucking up psr.pp in the
+		 * meanwhile. Perfmon interrupts are pended, hence the
+		 * above code should be ok if one of the above
+		 * instructions cause overflows. Is this ok?  When I
+		 * muck with dcr, is the cli/sti needed??
+		 */
+		for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; i++, cnum++, cptr++) {
+			pmd = pmds[i] + (ia64_get_pmd(cnum) & PERF_OVFL_VAL);
+			put_user(pmd, &cptr->data);
+		}
+		local_irq_save(flags);
+		/* XXX this looks wrong */
+		__asm__ __volatile__("ssm psr.pp");
+		dcr = ia64_get_dcr();
+		dcr |= IA64_DCR_PP;
+		ia64_set_dcr(dcr);
+		local_irq_restore(flags);
+                break;
+
+	      case STOP_PMCS:
+		ia64_set_pmc(0, 1);
+		for (i = 0; i < MAX_PERF_COUNTER; ++i)
+			ia64_set_pmc(i, 0);
+
+		local_irq_save(flags);
+		dcr = ia64_get_dcr();
+		dcr &= ~IA64_DCR_PP;
+		ia64_set_dcr(dcr);
+		local_irq_restore(flags);
+		/*
+		 * This is a no can do.  It obviously wouldn't
+		 * work on SMP where another process may not
+		 * be blocked at all.
+		 *
+		 * Perhaps we need a global predicate in the
+		 * leave_kernel path to control if pp should
+		 * be on or off?
+		 */
+		lock_kernel();
+		for_each_task(p) {
+			regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1;
+			ia64_psr(regs)->pp = 0;
+		}
+		unlock_kernel();
+		perf_owner = 0;
+		break;
+
+	      default:
+		break;
+        }
+        return 0;
+}
+
+static inline void
+update_counters (void)
+{
+	unsigned long mask, i, cnum, val;
+
+	mask = ia64_get_pmd(0) >> 4;
+	for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; cnum++, i++, mask >>= 1) {
+		if (mask & 0x1) 
+			val = PERF_OVFL_VAL;
+		else
+			/* since we got an interrupt, might as well clear every pmd. */
+			val = ia64_get_pmd(cnum) & PERF_OVFL_VAL;
+		pmds[i] += val;
+		ia64_set_pmd(cnum, 0);
+	}
+}
+
+static void
+perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
+{
+	update_counters();
+	ia64_set_pmc(0, 0);
+	ia64_srlz_d();
+}
+
+void
+perfmon_init (void)
+{
+        if (request_irq(PERFMON_IRQ, perfmon_interrupt, 0, "perfmon", NULL)) {
+		printk("perfmon_init: could not allocate performance monitor vector %u\n",
+		       PERFMON_IRQ);
+		return;
+	}
+	ia64_set_pmv(PERFMON_IRQ);
+	ia64_srlz_d();
+}
+
+#else /* !CONFIG_PERFMON */
+
+asmlinkage unsigned long
+sys_perfmonctl (int cmd1, int cmd2, void *ptr)
+{
+	return -ENOSYS;
+}
+
+#endif /* !CONFIG_PERFMON */
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
new file mode 100644
index 000000000..5b6deb5f5
--- /dev/null
+++ b/arch/ia64/kernel/process.c
@@ -0,0 +1,421 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#define __KERNEL_SYSCALLS__	/* see <asm/unistd.h> */
+#include <linux/config.h>
+
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/delay.h>
+#include <asm/efi.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/uaccess.h>
+#include <asm/user.h>
+
+
+void
+show_regs (struct pt_regs *regs)
+{
+	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+	printk("\npsr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
+	       regs->cr_ipsr, regs->cr_ifs, ip);
+	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+	printk("rnat: %016lx bsps: %016lx pr  : %016lx\n",
+	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
+	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0, regs->b6, regs->b7);
+	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
+	       regs->f6.u.bits[1], regs->f6.u.bits[0],
+	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
+	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
+	       regs->f8.u.bits[1], regs->f8.u.bits[0],
+	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
+
+	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1, regs->r2, regs->r3);
+	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10);
+	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13);
+	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16);
+	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19);
+	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22);
+	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25);
+	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28);
+	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31);
+
+	/* print the stacked registers if cr.ifs is valid: */
+	if (regs->cr_ifs & 0x8000000000000000) {
+		unsigned long val, sof, *bsp, ndirty;
+		int i, is_nat = 0;
+
+		sof = regs->cr_ifs & 0x7f;	/* size of frame */
+		ndirty = (regs->loadrs >> 19);
+		bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty);
+		for (i = 0; i < sof; ++i) {
+			get_user(val, ia64_rse_skip_regs(bsp, i));
+			printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val,
+			       ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+		}
+	}
+}
+
+void __attribute__((noreturn))
+cpu_idle (void *unused)
+{
+	/* endless idle loop with no priority at all */
+	init_idle();
+	current->priority = 0;
+	current->counter = -100;
+
+#ifdef CONFIG_SMP
+	if (!current->need_resched)
+		min_xtp();
+#endif
+
+	while (1) {
+		while (!current->need_resched) {
+			continue;
+		}
+#ifdef CONFIG_SMP
+		normal_xtp();
+#endif
+		schedule();
+		check_pgt_cache();
+		if (pm_idle)
+			(*pm_idle)();
+	}
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following  call chain:
+ *
+ *	<clone syscall>
+ *	sys_clone
+ *	do_fork
+ *	copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ *	+---------------------+ (highest addr)
+ *	|   struct pt_regs    |
+ *	+---------------------+
+ *	| struct switch_stack |
+ *	+---------------------+
+ *	|                     |
+ *	|    memory stack     |
+ *	|                     | <-- sp (lowest addr)
+ *	+---------------------+
+ *
+ * Note: if we get called through kernel_thread() then the memory
+ * above "(highest addr)" is valid kernel stack memory that needs to
+ * be copied as well.
+ *
+ * Observe that we copy the unat values that are in pt_regs and
+ * switch_stack.  Since the interpretation of unat is dependent upon
+ * the address to which the registers got spilled, doing this is valid
+ * only as long as we preserve the alignment of the stack.  Since the
+ * stack is always page aligned, we know this is the case.
+ *
+ * XXX Actually, the above isn't true when we create kernel_threads().
+ * If we ever needs to create kernel_threads() that preserve the unat
+ * values we'll need to fix this.  Perhaps an easy workaround would be
+ * to always clear the unat bits in the child thread.
+ */
+int
+copy_thread (int nr, unsigned long clone_flags, unsigned long usp,
+	     struct task_struct *p, struct pt_regs *regs)
+{
+	unsigned long rbs, child_rbs, rbs_size, stack_offset, stack_top, stack_used;
+	struct switch_stack *child_stack, *stack;
+	extern char ia64_ret_from_syscall_clear_r8;
+	extern char ia64_strace_clear_r8;
+	struct pt_regs *child_ptregs;
+
+#ifdef CONFIG_SMP
+	/*
+	 * For SMP idle threads, fork_by_hand() calls do_fork with
+	 * NULL regs.
+	 */
+	if (!regs)
+		return 0;
+#endif
+
+	stack_top = (unsigned long) current + IA64_STK_OFFSET;
+	stack = ((struct switch_stack *) regs) - 1;
+	stack_used = stack_top - (unsigned long) stack;
+	stack_offset = IA64_STK_OFFSET - stack_used;
+
+	child_stack = (struct switch_stack *) ((unsigned long) p + stack_offset);
+	child_ptregs = (struct pt_regs *) (child_stack + 1);
+
+	/* copy parent's switch_stack & pt_regs to child: */
+	memcpy(child_stack, stack, stack_used);
+
+	rbs = (unsigned long) current + IA64_RBS_OFFSET;
+	child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+	rbs_size = stack->ar_bspstore - rbs;
+
+	/* copy the parent's register backing store to the child: */
+	memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+
+	child_ptregs->r8 = 0;			/* child gets a zero return value */
+	if (user_mode(child_ptregs))
+		child_ptregs->r12 = usp;			/* user stack pointer */
+	else {
+		/*
+		 * Note: we simply preserve the relative position of
+		 * the stack pointer here.  There is no need to
+		 * allocate a scratch area here, since that will have
+		 * been taken care of by the caller of sys_clone()
+		 * already.
+		 */
+		child_ptregs->r12 = (unsigned long) (child_ptregs + 1); /* kernel sp */
+		child_ptregs->r13 = (unsigned long) p;		/* set `current' pointer */
+	}
+	if (p->flags & PF_TRACESYS)
+		child_stack->b0 = (unsigned long) &ia64_strace_clear_r8;
+	else
+		child_stack->b0 = (unsigned long) &ia64_ret_from_syscall_clear_r8;
+	child_stack->ar_bspstore = child_rbs + rbs_size;
+
+	/* copy the thread_struct: */
+	p->thread.ksp = (unsigned long) child_stack - 16;
+	/*
+	 * NOTE: The calling convention considers all floating point
+	 * registers in the high partition (fph) to be scratch.  Since
+	 * the only way to get to this point is through a system call,
+	 * we know that the values in fph are all dead.  Hence, there
+	 * is no need to inherit the fph state from the parent to the
+	 * child and all we have to do is to make sure that
+	 * IA64_THREAD_FPH_VALID is cleared in the child.
+	 *
+	 * XXX We could push this optimization a bit further by
+	 * clearing IA64_THREAD_FPH_VALID on ANY system call.
+	 * However, it's not clear this is worth doing.  Also, it
+	 * would be a slight deviation from the normal Linux system
+	 * call behavior where scratch registers are preserved across
+	 * system calls (unless used by the system call itself).
+	 *
+	 * If we wanted to inherit the fph state from the parent to the
+	 * child, we would have to do something along the lines of:
+	 *
+	 *	if (ia64_get_fpu_owner() == current && ia64_psr(regs)->mfh) {
+	 *		p->thread.flags |= IA64_THREAD_FPH_VALID;
+	 *		ia64_save_fpu(&p->thread.fph);
+	 *	} else if (current->thread.flags & IA64_THREAD_FPH_VALID) {
+	 *		memcpy(p->thread.fph, current->thread.fph, sizeof(p->thread.fph));
+	 *	}
+	 */
+	p->thread.flags = (current->thread.flags & ~IA64_THREAD_FPH_VALID);
+	return 0;
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+	struct switch_stack *sw = ((struct switch_stack *) pt) - 1;
+	unsigned long ar_ec, cfm, ar_bsp, ndirty, *krbs;
+
+	ar_ec = (sw->ar_pfs >> 52) & 0x3f;
+
+	cfm = pt->cr_ifs & ((1UL << 63) - 1);
+	if ((pt->cr_ifs & (1UL << 63)) == 0) {
+		/* if cr_ifs isn't valid, we got here through a syscall or a break */
+		cfm = sw->ar_pfs & ((1UL << 38) - 1);
+	}
+
+	krbs = (unsigned long *) current + IA64_RBS_OFFSET/8;
+	ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 16));
+	ar_bsp = (long) ia64_rse_skip_regs((long *) pt->ar_bspstore, ndirty);
+
+	/*	r0-r31
+	 *	NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *	predicate registers (p0-p63)
+	 *	b0-b7
+	 *	ip cfm user-mask
+	 *	ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *	ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+	 */
+	memset(dst, 0, sizeof (dst));	/* don't leak any "random" bits */
+
+	/* r0 is zero */   dst[ 1] =  pt->r1; dst[ 2] =  pt->r2; dst[ 3] = pt->r3;
+	dst[ 4] =  sw->r4; dst[ 5] =  sw->r5; dst[ 6] =  sw->r6; dst[ 7] = sw->r7;
+	dst[ 8] =  pt->r8; dst[ 9] =  pt->r9; dst[10] = pt->r10; dst[11] = pt->r11;
+	dst[12] = pt->r12; dst[13] = pt->r13; dst[14] = pt->r14; dst[15] = pt->r15;
+	memcpy(dst + 16, &pt->r16, 16*8);	/* r16-r31 are contiguous */
+
+	dst[32] = ia64_get_nat_bits(pt, sw);
+	dst[33] = pt->pr;
+
+	/* branch regs: */
+	dst[34] = pt->b0; dst[35] = sw->b1; dst[36] = sw->b2; dst[37] = sw->b3;
+	dst[38] = sw->b4; dst[39] = sw->b5; dst[40] = pt->b6; dst[41] = pt->b7;
+
+	dst[42] = pt->cr_iip; dst[43] = pt->cr_ifs;
+	dst[44] = pt->cr_ipsr;	/* XXX perhaps we should filter out some bits here? --davidm */
+
+	dst[45] = pt->ar_rsc; dst[46] = ar_bsp; dst[47] = pt->ar_bspstore;  dst[48] = pt->ar_rnat;
+	dst[49] = pt->ar_ccv; dst[50] = pt->ar_unat; dst[51] = sw->ar_fpsr; dst[52] = pt->ar_pfs;
+	dst[53] = sw->ar_lc; dst[54] = (sw->ar_pfs >> 52) & 0x3f;
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+	struct switch_stack *sw = ((struct switch_stack *) pt) - 1;
+	struct task_struct *fpu_owner = ia64_get_fpu_owner();
+
+	memset(dst, 0, sizeof (dst));	/* don't leak any "random" bits */
+
+	/* f0 is 0.0 */  /* f1 is 1.0 */  dst[2] = sw->f2; dst[3] = sw->f3;
+	dst[4] = sw->f4; dst[5] = sw->f5; dst[6] = pt->f6; dst[7] = pt->f7;
+	dst[8] = pt->f8; dst[9] = pt->f9;
+	memcpy(dst + 10, &sw->f10, 22*16);	/* f10-f31 are contiguous */
+
+	if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) {
+		if (fpu_owner == current) {
+			__ia64_save_fpu(current->thread.fph);
+		}
+		memcpy(dst + 32, current->thread.fph, 96*16);
+	}
+	return 1;	/* f0-f31 are always valid so we always return 1 */
+}
+
+asmlinkage long
+sys_execve (char *filename, char **argv, char **envp, struct pt_regs *regs)
+{
+	int error;
+
+	lock_kernel();
+	filename = getname(filename);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+	error = do_execve(filename, argv, envp, regs);
+	putname(filename);
+out:
+	unlock_kernel();
+	return error;
+}
+
+pid_t
+kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
+{
+	struct task_struct *parent = current;
+	int result;
+
+	clone(flags | CLONE_VM, 0);
+	if (parent != current) {
+		result = (*fn)(arg);
+		_exit(result);
+	}
+	return 0;		/* parent: just return */
+}
+
+/*
+ * Flush thread state.  This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+	/* drop floating-point and debug-register state if it exists: */
+	current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
+
+	if (ia64_get_fpu_owner() == current) {
+		ia64_set_fpu_owner(0);
+	}
+}
+
+/*
+ * Clean up state associated with current thread.  This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+	if (ia64_get_fpu_owner() == current) {
+		ia64_set_fpu_owner(0);
+	}
+}
+
+/*
+ * Free remaining state associated with DEAD_TASK.  This is called
+ * after the parent of DEAD_TASK has collected the exist status of the
+ * task via wait().
+ */
+void
+release_thread (struct task_struct *dead_task)
+{
+	/* nothing to do */
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+	struct ia64_frame_info info;
+	unsigned long ip;
+	int count = 0;
+	/*
+	 * These bracket the sleeping functions..
+	 */
+	extern void scheduling_functions_start_here(void);
+	extern void scheduling_functions_end_here(void);
+#	define first_sched	((unsigned long) scheduling_functions_start_here)
+#	define last_sched	((unsigned long) scheduling_functions_end_here)
+
+	/*
+	 * Note: p may not be a blocked task (it could be current or
+	 * another process running on some other CPU.  Rather than
+	 * trying to determine if p is really blocked, we just assume
+	 * it's blocked and rely on the unwind routines to fail
+	 * gracefully if the process wasn't really blocked after all.
+	 * --davidm 99/12/15
+	 */
+	ia64_unwind_init_from_blocked_task(&info, p);
+	do {
+		if (ia64_unwind_to_previous_frame(&info) < 0)
+			return 0;
+		ip = ia64_unwind_get_ip(&info);
+		if (ip < first_sched || ip >= last_sched)
+			return ip;
+	} while (count++ < 16);
+	return 0;
+#	undef first_sched
+#	undef last_sched
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+	(*efi.reset_system)(EFI_RESET_WARM, 0, 0, 0);
+}
+
+void
+machine_halt (void)
+{
+	printk("machine_halt: need PAL or ACPI version here!!\n");
+	machine_restart(0);
+}
+
+void
+machine_power_off (void)
+{
+	printk("machine_power_off: unimplemented (need ACPI version here)\n");
+	machine_halt ();
+}
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
new file mode 100644
index 000000000..18a8e342e
--- /dev/null
+++ b/arch/ia64/kernel/ptrace.c
@@ -0,0 +1,653 @@
+/*
+ * Kernel support for the ptrace() and syscall tracing interfaces.
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from the x86 and Alpha versions.  Most of the code in here
+ * could actually be factored into a common set of routines.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/smp_lock.h>
+#include <linux/user.h>
+
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+/*
+ * Collect the NaT bits for r1-r31 from sw->caller_unat and
+ * sw->ar_unat and return a NaT bitset where bit i is set iff the NaT
+ * bit of register i is set.
+ */
+long
+ia64_get_nat_bits (struct pt_regs *pt, struct switch_stack *sw)
+{
+#	define GET_BITS(str, first, last, unat)						\
+	({										\
+		unsigned long bit = ia64_unat_pos(&str->r##first);			\
+		unsigned long mask = ((1UL << (last - first + 1)) - 1) << first;	\
+		(ia64_rotl(unat, first) >> bit) & mask;					\
+	})
+	unsigned long val;
+
+	val  = GET_BITS(pt,  1,  3, sw->caller_unat);
+	val |= GET_BITS(pt, 12, 15, sw->caller_unat);
+	val |= GET_BITS(pt,  8, 11, sw->caller_unat);
+	val |= GET_BITS(pt, 16, 31, sw->caller_unat);
+	val |= GET_BITS(sw,  4,  7, sw->ar_unat);
+	return val;
+
+#	undef GET_BITS
+}
+
+/*
+ * Store the NaT bitset NAT in pt->caller_unat and sw->ar_unat.
+ */
+void
+ia64_put_nat_bits (struct pt_regs *pt, struct switch_stack *sw, unsigned long nat)
+{
+#	define PUT_BITS(str, first, last, nat)					\
+	({									\
+		unsigned long bit = ia64_unat_pos(&str->r##first);		\
+		unsigned long mask = ((1UL << (last - first + 1)) - 1) << bit;	\
+		(ia64_rotr(nat, first) << bit) & mask;				\
+	})
+	sw->caller_unat  = PUT_BITS(pt,  1,  3, nat);
+	sw->caller_unat |= PUT_BITS(pt, 12, 15, nat);
+	sw->caller_unat |= PUT_BITS(pt,  8, 11, nat);
+	sw->caller_unat |= PUT_BITS(pt, 16, 31, nat);
+	sw->ar_unat      = PUT_BITS(sw,  4,  7, nat);
+
+#	undef PUT_BITS
+}
+
+#define IA64_MLI_TEMPLATE	0x2
+#define IA64_MOVL_OPCODE	6
+
+void
+ia64_increment_ip (struct pt_regs *regs)
+{
+	unsigned long w0, w1, ri = ia64_psr(regs)->ri + 1;
+
+	if (ri > 2) {
+		ri = 0;
+		regs->cr_iip += 16;
+	} else if (ri == 2) {
+		get_user(w0, (char *) regs->cr_iip + 0);
+		get_user(w1, (char *) regs->cr_iip + 8);
+		if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) {
+			/*
+			 * rfi'ing to slot 2 of an MLI bundle causes
+			 * an illegal operation fault.  We don't want
+			 * that to happen...  Note that we check the
+			 * opcode only.  "movl" has a vc bit of 0, but
+			 * since a vc bit of 1 is currently reserved,
+			 * we might just as well treat it like a movl.
+			 */
+			ri = 0;
+			regs->cr_iip += 16;
+		}
+	}
+	ia64_psr(regs)->ri = ri;
+}
+
+void
+ia64_decrement_ip (struct pt_regs *regs)
+{
+	unsigned long w0, w1, ri = ia64_psr(regs)->ri - 1;
+
+	if (ia64_psr(regs)->ri == 0) {
+		regs->cr_iip -= 16;
+		ri = 2;
+		get_user(w0, (char *) regs->cr_iip + 0);
+		get_user(w1, (char *) regs->cr_iip + 8);
+		if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) {
+			/*
+			 * rfi'ing to slot 2 of an MLI bundle causes
+			 * an illegal operation fault.  We don't want
+			 * that to happen...  Note that we check the
+			 * opcode only.  "movl" has a vc bit of 0, but
+			 * since a vc bit of 1 is currently reserved,
+			 * we might just as well treat it like a movl.
+			 */
+			ri = 1;
+		}
+	}
+	ia64_psr(regs)->ri = ri;
+}
+
+/*
+ * This routine is used to read an rnat bits that are stored on the
+ * kernel backing store.  Since, in general, the alignment of the user
+ * and kernel are different, this is not completely trivial.  In
+ * essence, we need to construct the user RNAT based on up to two
+ * kernel RNAT values and/or the RNAT value saved in the child's
+ * pt_regs.
+ *
+ * user rbs
+ *
+ * +--------+ <-- lowest address
+ * | slot62 |
+ * +--------+
+ * |  rnat  | 0x....1f8
+ * +--------+
+ * | slot00 | \
+ * +--------+ |
+ * | slot01 | > child_regs->ar_rnat
+ * +--------+ |
+ * | slot02 | /				kernel rbs
+ * +--------+ 				+--------+
+ *	    <- child_regs->ar_bspstore	| slot61 | <-- krbs
+ * +- - - - +				+--------+
+ *					| slot62 |
+ * +- - - - +				+--------+
+ *					|  rnat	 |
+ * +- - - - +				+--------+
+ *   vrnat				| slot00 |
+ * +- - - - +				+--------+
+ *					=	 =
+ *					+--------+
+ *					| slot00 | \
+ *					+--------+ |
+ *					| slot01 | > child_stack->ar_rnat
+ *					+--------+ |
+ *					| slot02 | /
+ *					+--------+
+ *						  <--- child_stack->ar_bspstore
+ *
+ * The way to think of this code is as follows: bit 0 in the user rnat
+ * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat
+ * value.  The kernel rnat value holding this bit is stored in
+ * variable rnat0.  rnat1 is loaded with the kernel rnat value that
+ * form the upper bits of the user rnat value.
+ *
+ * Boundary cases:
+ *
+ * o when reading the rnat "below" the first rnat slot on the kernel
+ *   backing store, rnat0/rnat1 are set to 0 and the low order bits
+ *   are merged in from pt->ar_rnat.
+ *
+ * o when reading the rnat "above" the last rnat slot on the kernel
+ *   backing store, rnat0/rnat1 gets its value from sw->ar_rnat.
+ */
+static unsigned long
+get_rnat (struct pt_regs *pt, struct switch_stack *sw,
+	  unsigned long *krbs, unsigned long *urnat_addr)
+{
+	unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr, kmask = ~0UL;
+	unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+	long num_regs;
+
+	kbsp = (unsigned long *) sw->ar_bspstore;
+	ubspstore = (unsigned long *) pt->ar_bspstore;
+	/*
+	 * First, figure out which bit number slot 0 in user-land maps
+	 * to in the kernel rnat.  Do this by figuring out how many
+	 * register slots we're beyond the user's backingstore and
+	 * then computing the equivalent address in kernel space.
+	 */
+	num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+	slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+	shift = ia64_rse_slot_num(slot0_kaddr);
+	rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+	rnat0_kaddr = rnat1_kaddr - 64;
+
+	if (ubspstore + 63 > urnat_addr) {
+		/* some bits need to be merged in from pt->ar_rnat */
+		kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1);
+		urnat = (pt->ar_rnat & ~kmask);
+	} 
+	if (rnat0_kaddr >= kbsp) {
+		rnat0 = sw->ar_rnat;
+	} else if (rnat0_kaddr > krbs) {
+		rnat0 = *rnat0_kaddr;
+	}
+	if (rnat1_kaddr >= kbsp) {
+		rnat1 = sw->ar_rnat;
+	} else if (rnat1_kaddr > krbs) {
+		rnat1 = *rnat1_kaddr;
+	}
+	urnat |= ((rnat1 << (63 - shift)) | (rnat0 >> shift)) & kmask;
+	return urnat;
+}
+
+/*
+ * The reverse of get_rnat.
+ */
+static void
+put_rnat (struct pt_regs *pt, struct switch_stack *sw,
+	  unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat)
+{
+	unsigned long rnat0 = 0, rnat1 = 0, rnat = 0, *slot0_kaddr, kmask = ~0UL, mask;
+	unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+	long num_regs;
+
+	kbsp = (unsigned long *) sw->ar_bspstore;
+	ubspstore = (unsigned long *) pt->ar_bspstore;
+	/*
+	 * First, figure out which bit number slot 0 in user-land maps
+	 * to in the kernel rnat.  Do this by figuring out how many
+	 * register slots we're beyond the user's backingstore and
+	 * then computing the equivalent address in kernel space.
+	 */
+	num_regs = (long) ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+	slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+	shift = ia64_rse_slot_num(slot0_kaddr);
+	rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+	rnat0_kaddr = rnat1_kaddr - 64;
+
+	if (ubspstore + 63 > urnat_addr) {
+		/* some bits need to be place in pt->ar_rnat: */
+		kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1);
+		pt->ar_rnat = (pt->ar_rnat & kmask) | (rnat & ~kmask);
+	} 
+	/*
+	 * Note: Section 11.1 of the EAS guarantees that bit 63 of an
+	 * rnat slot is ignored. so we don't have to clear it here.
+	 */
+	rnat0 = (urnat << shift);
+	mask = ~0UL << shift;
+	if (rnat0_kaddr >= kbsp) {
+		sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat0 & mask);
+	} else if (rnat0_kaddr > krbs) {
+		*rnat0_kaddr = ((*rnat0_kaddr & ~mask) | (rnat0 & mask));
+	}
+
+	rnat1 = (urnat >> (63 - shift));
+	mask = ~0UL >> (63 - shift);
+	if (rnat1_kaddr >= kbsp) {
+		sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat1 & mask);
+	} else if (rnat1_kaddr > krbs) {
+		*rnat1_kaddr = ((*rnat1_kaddr & ~mask) | (rnat1 & mask));
+	}
+}
+
+long
+ia64_peek (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long *val)
+{
+	unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr;
+	struct switch_stack *child_stack;
+	struct pt_regs *child_regs;
+	size_t copied;
+	long ret;
+
+	laddr = (unsigned long *) addr;
+	child_regs = ia64_task_regs(child);
+	child_stack = (struct switch_stack *) child_regs - 1;
+	bspstore = (unsigned long *) child_regs->ar_bspstore;
+	krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+	krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore);
+	rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs);
+	if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) {
+		/*
+		 * Attempt to read the RBS in an area that's actually
+		 * on the kernel RBS => read the corresponding bits in
+		 * the kernel RBS.
+		 */
+		if (ia64_rse_is_rnat_slot(laddr))
+			ret = get_rnat(child_regs, child_stack, krbs, laddr);
+		else {
+			regnum = ia64_rse_num_regs(bspstore, laddr);
+			laddr = ia64_rse_skip_regs(krbs, regnum);
+			if (regnum >= krbs_num_regs) {
+				ret = 0;
+			} else {
+				if  ((unsigned long) laddr >= (unsigned long) high_memory) {
+					printk("yikes: trying to access long at %p\n", laddr);
+					return -EIO;
+				}
+				ret = *laddr;
+			}
+		}
+	} else {
+		copied = access_process_vm(child, addr, &ret, sizeof(ret), 0);
+		if (copied != sizeof(ret))
+			return -EIO;
+	}
+	*val = ret;
+	return 0;
+}
+
+long
+ia64_poke (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long val)
+{
+	unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr;
+	struct switch_stack *child_stack;
+	struct pt_regs *child_regs;
+
+	laddr = (unsigned long *) addr;
+	child_regs = ia64_task_regs(child);
+	child_stack = (struct switch_stack *) child_regs - 1;
+	bspstore = (unsigned long *) child_regs->ar_bspstore;
+	krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+	krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore);
+	rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs);
+	if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) {
+		/*
+		 * Attempt to write the RBS in an area that's actually
+		 * on the kernel RBS => write the corresponding bits
+		 * in the kernel RBS.
+		 */
+		if (ia64_rse_is_rnat_slot(laddr))
+			put_rnat(child_regs, child_stack, krbs, laddr, val);
+		else {
+			regnum = ia64_rse_num_regs(bspstore, laddr);
+			laddr = ia64_rse_skip_regs(krbs, regnum);
+			if (regnum < krbs_num_regs) {
+				*laddr = val;
+			}
+		}
+	} else if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val)) {
+		return -EIO;
+	}
+	return 0;
+}
+
+/*
+ * Ensure the state in child->thread.fph is up-to-date.
+ */
+static void
+sync_fph (struct task_struct *child)
+{
+	if (ia64_psr(ia64_task_regs(child))->mfh && ia64_get_fpu_owner() == child) {
+		ia64_save_fpu(&child->thread.fph[0]);
+		child->thread.flags |= IA64_THREAD_FPH_VALID;
+	}
+	if (!(child->thread.flags & IA64_THREAD_FPH_VALID)) {
+		memset(&child->thread.fph, 0, sizeof(child->thread.fph));
+		child->thread.flags |= IA64_THREAD_FPH_VALID;
+	}
+}
+
+asmlinkage long
+sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data,
+	    long arg4, long arg5, long arg6, long arg7, long stack)
+{
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+	struct switch_stack *child_stack;
+	struct pt_regs *child_regs;
+	struct task_struct *child;
+	unsigned long flags, *base;
+	long ret, regnum;
+
+	lock_kernel();
+	ret = -EPERM;
+	if (request == PTRACE_TRACEME) {
+		/* are we already being traced? */
+		if (current->flags & PF_PTRACED)
+			goto out;
+		current->flags |= PF_PTRACED;
+		ret = 0;
+		goto out;
+	}
+
+	ret = -ESRCH;
+	read_lock(&tasklist_lock);
+	child = find_task_by_pid(pid);
+	read_unlock(&tasklist_lock);
+	if (!child)
+		goto out;
+	ret = -EPERM;
+	if (pid == 1)		/* no messing around with init! */
+		goto out;
+
+	if (request == PTRACE_ATTACH) {
+		if (child == current)
+			goto out;
+		if ((!child->dumpable ||
+		    (current->uid != child->euid) ||
+		    (current->uid != child->suid) ||
+		    (current->uid != child->uid) ||
+	 	    (current->gid != child->egid) ||
+	 	    (current->gid != child->sgid) ||
+	 	    (!cap_issubset(child->cap_permitted, current->cap_permitted)) ||
+	 	    (current->gid != child->gid)) && !capable(CAP_SYS_PTRACE))
+			goto out;
+		/* the same process cannot be attached many times */
+		if (child->flags & PF_PTRACED)
+			goto out;
+		child->flags |= PF_PTRACED;
+		if (child->p_pptr != current) {
+			unsigned long flags;
+
+			write_lock_irqsave(&tasklist_lock, flags);
+			REMOVE_LINKS(child);
+			child->p_pptr = current;
+			SET_LINKS(child);
+			write_unlock_irqrestore(&tasklist_lock, flags);
+		}
+		send_sig(SIGSTOP, child, 1);
+		ret = 0;
+		goto out;
+	}
+	ret = -ESRCH;
+	if (!(child->flags & PF_PTRACED))
+		goto out;
+	if (child->state != TASK_STOPPED) {
+		if (request != PTRACE_KILL)
+			goto out;
+	}
+	if (child->p_pptr != current)
+		goto out;
+
+	switch (request) {
+	      case PTRACE_PEEKTEXT:
+	      case PTRACE_PEEKDATA:		/* read word at location addr */
+		ret = ia64_peek(regs, child, addr, &data);
+		if (ret == 0) {
+			ret = data;
+			regs->r8 = 0;	/* ensure "ret" is not mistaken as an error code */
+		}
+		goto out;
+
+	      case PTRACE_POKETEXT:
+	      case PTRACE_POKEDATA:		/* write the word at location addr */
+		ret = ia64_poke(regs, child, addr, data);
+		goto out;
+
+	      case PTRACE_PEEKUSR:		/* read the word at addr in the USER area */
+		ret = -EIO;
+		if ((addr & 0x7) != 0)
+			goto out;
+
+		if (addr < PT_CALLER_UNAT) {
+			/* accessing fph */
+			sync_fph(child);
+			addr += (unsigned long) &child->thread.fph;
+			ret = *(unsigned long *) addr;
+		} else if (addr < PT_F9+16) {
+			/* accessing switch_stack or pt_regs: */
+			child_regs = ia64_task_regs(child);
+			child_stack = (struct switch_stack *) child_regs - 1;
+			ret = *(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT);
+
+			if (addr == PT_AR_BSP) {
+				/* ret currently contains pt_regs.loadrs */
+				unsigned long *rbs, *bspstore, ndirty;
+
+				rbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+				bspstore = (unsigned long *) child_regs->ar_bspstore;
+				ndirty = ia64_rse_num_regs(rbs, rbs + (ret >> 19));
+				ret = (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
+			}
+		} else {
+			if (addr >= PT_IBR) {
+				regnum = (addr - PT_IBR) >> 3;
+				base = &child->thread.ibr[0];
+			} else {
+				regnum = (addr - PT_DBR) >> 3;
+				base = &child->thread.dbr[0];
+			}
+			if (regnum >= 8)
+				goto out;
+			data = base[regnum];
+		}
+		regs->r8 = 0;	/* ensure "ret" is not mistaken as an error code */
+		goto out;
+
+	      case PTRACE_POKEUSR:	      /* write the word at addr in the USER area */
+		ret = -EIO;
+		if ((addr & 0x7) != 0)
+			goto out;
+
+		if (addr < PT_CALLER_UNAT) {
+			/* accessing fph */
+			sync_fph(child);
+			addr += (unsigned long) &child->thread.fph;
+			*(unsigned long *) addr = data;
+			if (ret < 0)
+				goto out;
+		} else if (addr < PT_F9+16) {
+			/* accessing switch_stack or pt_regs */
+			child_regs = ia64_task_regs(child);
+			child_stack = (struct switch_stack *) child_regs - 1;
+
+			if (addr == PT_AR_BSP) {
+				/* compute the loadrs value based on bsp and bspstore: */
+				unsigned long *rbs, *bspstore, ndirty, *kbsp;
+
+				bspstore = (unsigned long *) child_regs->ar_bspstore;
+				ndirty = ia64_rse_num_regs(bspstore, (unsigned long *) data);
+				rbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+				kbsp = ia64_rse_skip_regs(rbs, ndirty);
+				data = (kbsp - rbs) << 19;
+			}
+			*(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT) = data;
+		} else {
+			if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+				child->thread.flags |= IA64_THREAD_DBG_VALID;
+				memset(current->thread.dbr, 0, sizeof current->thread.dbr);
+				memset(current->thread.ibr, 0, sizeof current->thread.ibr);
+			}
+
+			if (addr >= PT_IBR) {
+				regnum = (addr - PT_IBR) >> 3;
+				base = &child->thread.ibr[0];
+			} else {
+				regnum = (addr - PT_DBR) >> 3;
+				base = &child->thread.dbr[0];
+			}
+			if (regnum >= 8)
+				goto out;
+			if (regnum & 1) {
+				/* force breakpoint to be effective a most for user-level: */
+				data &= ~(0x7UL << 56);
+			}
+			base[regnum] = data;
+		}
+		ret = 0;
+		goto out;
+
+	      case PTRACE_SYSCALL:	/* continue and stop at next (return from) syscall */
+	      case PTRACE_CONT:		/* restart after signal. */
+		ret = -EIO;
+		if (data > _NSIG)
+			goto out;
+		if (request == PTRACE_SYSCALL)
+			child->flags |= PF_TRACESYS;
+		else
+			child->flags &= ~PF_TRACESYS;
+		child->exit_code = data;
+
+		/* make sure the single step/take-branch tra bits are not set: */
+		ia64_psr(ia64_task_regs(child))->ss = 0;
+		ia64_psr(ia64_task_regs(child))->tb = 0;
+
+		wake_up_process(child);
+		ret = 0;
+		goto out;
+
+	      case PTRACE_KILL:
+		/*
+		 * Make the child exit.  Best I can do is send it a
+		 * sigkill.  Perhaps it should be put in the status
+		 * that it wants to exit.
+		 */
+		if (child->state == TASK_ZOMBIE)		/* already dead */
+			goto out;
+		child->exit_code = SIGKILL;
+
+		/* make sure the single step/take-branch tra bits are not set: */
+		ia64_psr(ia64_task_regs(child))->ss = 0;
+		ia64_psr(ia64_task_regs(child))->tb = 0;
+
+		wake_up_process(child);
+		ret = 0;
+		goto out;
+
+	      case PTRACE_SINGLESTEP:		/* let child execute for one instruction */
+	      case PTRACE_SINGLEBLOCK:
+		ret = -EIO;
+		if (data > _NSIG)
+			goto out;
+
+		child->flags &= ~PF_TRACESYS;
+		if (request == PTRACE_SINGLESTEP) {
+			ia64_psr(ia64_task_regs(child))->ss = 1;
+		} else {
+			ia64_psr(ia64_task_regs(child))->tb = 1;
+		}
+		child->exit_code = data;
+
+		/* give it a chance to run. */
+		wake_up_process(child);
+		ret = 0;
+		goto out;
+
+	      case PTRACE_DETACH:		/* detach a process that was attached. */
+		ret = -EIO;
+		if (data > _NSIG)
+			goto out;
+
+		child->flags &= ~(PF_PTRACED|PF_TRACESYS);
+		child->exit_code = data;
+		write_lock_irqsave(&tasklist_lock, flags);
+		REMOVE_LINKS(child);
+		child->p_pptr = child->p_opptr;
+		SET_LINKS(child);
+		write_unlock_irqrestore(&tasklist_lock, flags);
+
+		/* make sure the single step/take-branch tra bits are not set: */
+		ia64_psr(ia64_task_regs(child))->ss = 0;
+		ia64_psr(ia64_task_regs(child))->tb = 0;
+
+		wake_up_process(child);
+		ret = 0;
+		goto out;
+
+	      default:
+		ret = -EIO;
+		goto out;
+	}
+  out:
+	unlock_kernel();
+	return ret;
+}
+
+void
+syscall_trace (void)
+{
+	if ((current->flags & (PF_PTRACED|PF_TRACESYS)) != (PF_PTRACED|PF_TRACESYS))
+		return;
+	current->exit_code = SIGTRAP;
+	set_current_state(TASK_STOPPED);
+	notify_parent(current, SIGCHLD);
+	schedule();
+	/*
+	 * This isn't the same as continuing with a signal, but it
+	 * will do for normal use.  strace only continues with a
+	 * signal if the stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+}
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
new file mode 100644
index 000000000..8743f6588
--- /dev/null
+++ b/arch/ia64/kernel/sal.c
@@ -0,0 +1,157 @@
+/*
+ * System Abstraction Layer (SAL) interface routines.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+#include <asm/sal.h>
+#include <asm/pal.h>
+
+#define SAL_DEBUG
+
+spinlock_t sal_lock = SPIN_LOCK_UNLOCKED;
+
+static struct {
+	void *addr;	/* function entry point */
+	void *gpval;	/* gp value to use */
+} pdesc;
+
+static long
+default_handler (void)
+{
+	return -1;
+}
+
+ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler;
+
+const char *
+ia64_sal_strerror (long status)
+{
+	const char *str;
+	switch (status) {
+	      case 0: str = "Call completed without error"; break;
+	      case 1: str = "Effect a warm boot of the system to complete "
+			      "the update"; break;
+	      case -1: str = "Not implemented"; break;
+	      case -2: str = "Invalid argument"; break;
+	      case -3: str = "Call completed with error"; break;
+	      case -4: str = "Virtual address not registered"; break;
+	      case -5: str = "No information available"; break;
+	      case -6: str = "Insufficient space to add the entry"; break;
+	      case -7: str = "Invalid entry_addr value"; break;
+	      case -8: str = "Invalid interrupt vector"; break;
+	      case -9: str = "Requested memory not available"; break;
+	      case -10: str = "Unable to write to the NVM device"; break;
+	      case -11: str = "Invalid partition type specified"; break;
+	      case -12: str = "Invalid NVM_Object id specified"; break;
+	      case -13: str = "NVM_Object already has the maximum number "
+				"of partitions"; break;
+	      case -14: str = "Insufficient space in partition for the "
+				"requested write sub-function"; break;
+	      case -15: str = "Insufficient data buffer space for the "
+				"requested read record sub-function"; break;
+	      case -16: str = "Scratch buffer required for the write/delete "
+				"sub-function"; break;
+	      case -17: str = "Insufficient space in the NVM_Object for the "
+				"requested create sub-function"; break;
+	      case -18: str = "Invalid value specified in the partition_rec "
+				"argument"; break;
+	      case -19: str = "Record oriented I/O not supported for this "
+				"partition"; break;
+	      case -20: str = "Bad format of record to be written or "
+				"required keyword variable not "
+				"specified"; break;
+	      default: str = "Unknown SAL status code"; break;
+	}
+	return str;
+}
+
+static void __init 
+ia64_sal_handler_init (void *entry_point, void *gpval)
+{
+	/* fill in the SAL procedure descriptor and point ia64_sal to it: */
+	pdesc.addr = entry_point;
+	pdesc.gpval = gpval;
+	ia64_sal = (ia64_sal_handler) &pdesc;
+}
+
+
+void __init
+ia64_sal_init (struct ia64_sal_systab *systab)
+{
+	unsigned long min, max;
+	char *p;
+	struct ia64_sal_desc_entry_point *ep;
+	int i;
+
+	if (!systab) {
+		printk("Hmm, no SAL System Table.\n");
+		return;
+	}
+
+	if (strncmp(systab->signature, "SST_", 4) != 0)
+		printk("bad signature in system table!");
+
+	printk("SAL v%u.%02u: ia32bios=%s, oem=%.32s, product=%.32s\n",
+	       systab->sal_rev_major, systab->sal_rev_minor,
+	       systab->ia32_bios_present ? "present" : "absent",
+	       systab->oem_id, systab->product_id);
+
+	min = ~0UL;
+	max = 0;
+
+	p = (char *) (systab + 1);
+	for (i = 0; i < systab->entry_count; i++) {
+		/*
+		 * The first byte of each entry type contains the type desciptor.
+		 */
+		switch (*p) {
+		      case SAL_DESC_ENTRY_POINT:
+			ep = (struct ia64_sal_desc_entry_point *) p;
+#ifdef SAL_DEBUG
+			printk("sal[%d] - entry: pal_proc=0x%lx, sal_proc=0x%lx\n",
+			       i, ep->pal_proc, ep->sal_proc);
+#endif
+			ia64_pal_handler_init(__va(ep->pal_proc));
+			ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
+			break;
+
+		      case SAL_DESC_AP_WAKEUP:
+#ifdef CONFIG_SMP
+		      {
+			      struct ia64_sal_desc_ap_wakeup *ap = (void *) p;
+# ifdef SAL_DEBUG
+			      printk("sal[%d] - wakeup type %x, 0x%lx\n",
+				     i, ap->mechanism, ap->vector);
+# endif
+			      switch (ap->mechanism) {
+				    case IA64_SAL_AP_EXTERNAL_INT:
+				      ap_wakeup_vector = ap->vector;
+# ifdef SAL_DEBUG
+				      printk("SAL: AP wakeup using external interrupt; "
+					     "vector 0x%lx\n", ap_wakeup_vector);
+# endif
+				      break;
+
+				    default:
+				      printk("SAL: AP wakeup mechanism unsupported!\n");
+				      break;
+			      }
+			      break;
+		      }
+#endif
+		}
+		p += SAL_DESC_SIZE(*p);
+	}
+}
diff --git a/arch/ia64/kernel/sal_stub.S b/arch/ia64/kernel/sal_stub.S
new file mode 100644
index 000000000..7ab16bbcd
--- /dev/null
+++ b/arch/ia64/kernel/sal_stub.S
@@ -0,0 +1,116 @@
+/*
+ * gcc currently does not conform to the ia-64 calling convention as far
+ * as returning function values are concerned.  Instead of returning
+ * values up to 32 bytes in size in r8-r11, gcc returns any value
+ * bigger than a doubleword via a structure that's allocated by the
+ * caller and whose address is passed into the function.  Since
+ * SAL_PROC returns values according to the calling convention, this
+ * stub takes care of copying r8-r11 to the place where gcc expects
+ * them.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef __GCC_MULTIREG_RETVALS__
+	.text
+	.psr abi64
+	.psr lsb
+	.lsb
+
+	.align 16
+	.global ia64_sal_stub
+ia64_sal_stub:
+	/*
+	 * Sheesh, the Cygnus backend passes the pointer to a return value structure in
+	 * in0 whereas the HP backend passes it in r8.  Don't you hate those little
+	 * differences...
+	 */
+#ifdef GCC_RETVAL_POINTER_IN_R8
+	adds r2=-24,sp
+	adds sp=-48,sp
+	mov r14=rp
+	;;
+	st8	[r2]=r8,8	// save pointer to return value
+	addl	r3=@ltoff(ia64_sal),gp
+	;;
+	ld8	r3=[r3]
+	st8	[r2]=gp,8	// save global pointer
+	;;
+	ld8	r3=[r3]		// fetch the value of ia64_sal
+	st8	[r2]=r14	// save return pointer
+	;;
+	ld8	r2=[r3],8	// load function's entry point
+	;;
+	ld8	gp=[r3]		// load function's global pointer
+	;;
+	mov	b6=r2
+	br.call.sptk.few rp=b6
+.ret0:	adds	r2=24,sp
+	;;
+	ld8	r3=[r2],8	// restore pointer to return value
+	;;
+	ld8	gp=[r2],8	// restore global pointer
+	st8	[r3]=r8,8
+	;;
+	ld8	r14=[r2]	// restore return pointer
+	st8	[r3]=r9,8
+	;;
+	mov	rp=r14
+	st8	[r3]=r10,8
+	;;
+	st8	[r3]=r11,8
+	adds	sp=48,sp
+	br.sptk.few rp
+#else
+	/*
+	 * On input:
+	 *	in0 = pointer to return value structure
+	 *	in1 = index of SAL function to call
+	 *	in2..inN = remaining args to SAL call
+	 */
+	/*
+	 * We allocate one input and eight output register such that the br.call instruction
+	 * will rename in1-in7 to in0-in6---exactly what we want because SAL doesn't want to
+	 * see the pointer to the return value structure.
+	 */
+	alloc	r15=ar.pfs,1,0,8,0
+
+	adds	r2=-24,sp
+	adds	sp=-48,sp
+	mov	r14=rp
+	;;
+	st8	[r2]=r15,8	// save ar.pfs
+	addl	r3=@ltoff(ia64_sal),gp
+	;;
+	ld8	r3=[r3]		// get address of ia64_sal
+	st8	[r2]=gp,8	// save global pointer
+	;;
+	ld8	r3=[r3]		// get value of ia64_sal
+	st8	[r2]=r14,8	// save return address (rp)
+	;;
+	ld8	r2=[r3],8	// load function's entry point
+	;;
+	ld8	gp=[r3]		// load function's global pointer
+	mov	b6=r2
+	br.call.sptk.few rp=b6	// make SAL call
+.ret0:	adds	r2=24,sp
+	;;
+	ld8	r15=[r2],8	// restore ar.pfs
+	;;
+	ld8	gp=[r2],8	// restore global pointer
+	st8	[in0]=r8,8	// store 1. dword of return value
+	;;
+	ld8	r14=[r2]	// restore return address (rp)
+	st8	[in0]=r9,8	// store 2. dword of return value
+	;;
+	mov	rp=r14
+	st8	[in0]=r10,8	// store 3. dword of return value
+	;;
+	st8	[in0]=r11,8
+	adds	sp=48,sp	// pop stack frame
+	mov	ar.pfs=r15
+	br.ret.sptk.few rp
+#endif
+
+	.endp ia64_sal_stub
+#endif /* __GCC_MULTIREG_RETVALS__ */
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
new file mode 100644
index 000000000..84581af2e
--- /dev/null
+++ b/arch/ia64/kernel/semaphore.c
@@ -0,0 +1,336 @@
+/*
+ * IA-64 semaphore implementation (derived from x86 version).
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * Semaphores are implemented using a two-way counter: The "count"
+ * variable is decremented for each process that tries to aquire the
+ * semaphore, while the "sleepers" variable is a count of such
+ * aquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can efficiently
+ * test if they need to do any extra work (up needs to do something
+ * only if count was negative before the increment operation.
+ *
+ * "sleepers" and the contention routine ordering is protected by the
+ * semaphore spinlock.
+ *
+ * Note that these functions are only called when there is contention
+ * on the lock, and as such all this is the "non-critical" part of the
+ * whole semaphore business. The critical part is the inline stuff in
+ * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
+ */
+#include <linux/sched.h>
+
+#include <asm/semaphore.h>
+
+/*
+ * Logic:
+ *  - Only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - When we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleepers" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+
+void
+__up (struct semaphore *sem)
+{
+	wake_up(&sem->wait);
+}
+
+static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED;
+
+void
+__down (struct semaphore *sem)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE;
+	add_wait_queue_exclusive(&sem->wait, &wait);
+
+	spin_lock_irq(&semaphore_lock);
+	sem->sleepers++;
+	for (;;) {
+		int sleepers = sem->sleepers;
+
+		/*
+		 * Add "everybody else" into it. They aren't
+		 * playing, because we own the spinlock.
+		 */
+		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+			sem->sleepers = 0;
+			break;
+		}
+		sem->sleepers = 1;	/* us - see -1 above */
+		spin_unlock_irq(&semaphore_lock);
+
+		schedule();
+		tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE;
+		spin_lock_irq(&semaphore_lock);
+	}
+	spin_unlock_irq(&semaphore_lock);
+	remove_wait_queue(&sem->wait, &wait);
+	tsk->state = TASK_RUNNING;
+	wake_up(&sem->wait);
+}
+
+int
+__down_interruptible (struct semaphore * sem)
+{
+	int retval = 0;
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE;
+	add_wait_queue_exclusive(&sem->wait, &wait);
+
+	spin_lock_irq(&semaphore_lock);
+	sem->sleepers ++;
+	for (;;) {
+		int sleepers = sem->sleepers;
+
+		/*
+		 * With signals pending, this turns into
+		 * the trylock failure case - we won't be
+		 * sleeping, and we* can't get the lock as
+		 * it has contention. Just correct the count
+		 * and exit.
+		 */
+		if (signal_pending(current)) {
+			retval = -EINTR;
+			sem->sleepers = 0;
+			atomic_add(sleepers, &sem->count);
+			break;
+		}
+
+		/*
+		 * Add "everybody else" into it. They aren't
+		 * playing, because we own the spinlock. The
+		 * "-1" is because we're still hoping to get
+		 * the lock.
+		 */
+		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+			sem->sleepers = 0;
+			break;
+		}
+		sem->sleepers = 1;	/* us - see -1 above */
+		spin_unlock_irq(&semaphore_lock);
+
+		schedule();
+		tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE;
+		spin_lock_irq(&semaphore_lock);
+	}
+	spin_unlock_irq(&semaphore_lock);
+	tsk->state = TASK_RUNNING;
+	remove_wait_queue(&sem->wait, &wait);
+	wake_up(&sem->wait);
+	return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for having decremented the
+ * count.
+ */
+int
+__down_trylock (struct semaphore *sem)
+{
+	int sleepers;
+
+	spin_lock_irq(&semaphore_lock);
+	sleepers = sem->sleepers + 1;
+	sem->sleepers = 0;
+
+	/*
+	 * Add "everybody else" and us into it. They aren't
+	 * playing, because we own the spinlock.
+	 */
+	if (!atomic_add_negative(sleepers, &sem->count))
+		wake_up(&sem->wait);
+
+	spin_unlock_irq(&semaphore_lock);
+	return 1;
+}
+
+/*
+ * Helper routines for rw semaphores.  These could be optimized some
+ * more, but since they're off the critical path, I prefer clarity for
+ * now...
+ */
+
+/*
+ * This gets called if we failed to acquire the lock, but we're biased
+ * to acquire the lock by virtue of causing the count to change from 0
+ * to -1.  Being biased, we sleep and attempt to grab the lock until
+ * we succeed.  When this function returns, we own the lock.
+ */
+static inline void
+down_read_failed_biased (struct rw_semaphore *sem)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	add_wait_queue(&sem->wait, &wait);	/* put ourselves at the head of the list */
+
+	for (;;) {
+		if (sem->read_bias_granted && xchg(&sem->read_bias_granted, 0))
+			break;
+		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+		if (!sem->read_bias_granted)
+			schedule();
+	}
+	remove_wait_queue(&sem->wait, &wait);
+	tsk->state = TASK_RUNNING;
+}
+
+/*
+ * This gets called if we failed to aquire the lock and we are not
+ * biased to acquire the lock.  We undo the decrement that was
+ * done earlier, go to sleep, and then attempt to re-acquire the
+ * lock afterwards.
+ */
+static inline void
+down_read_failed (struct rw_semaphore *sem)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	/*
+	 * Undo the decrement we did in down_read() and check if we
+	 * need to wake up someone.
+	 */
+	__up_read(sem);
+
+	add_wait_queue(&sem->wait, &wait);
+	while (sem->count < 0) {
+		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+		if (sem->count >= 0)
+			break;
+		schedule();
+	}
+	remove_wait_queue(&sem->wait, &wait);
+	tsk->state = TASK_RUNNING;
+}
+
+/*
+ * Wait for the lock to become unbiased.  Readers are non-exclusive.
+ */
+void
+__down_read_failed (struct rw_semaphore *sem, long count)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	while (1) {
+		if (count == -1) {
+			down_read_failed_biased(sem);
+			return;
+		}
+		/* unbiased */
+		down_read_failed(sem);
+
+		count = ia64_fetch_and_add(-1, &sem->count);
+		if (count >= 0)
+			return;
+	}
+}
+
+static inline void
+down_write_failed_biased (struct rw_semaphore *sem)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	/* put ourselves at the end of the list */
+	add_wait_queue_exclusive(&sem->write_bias_wait, &wait);
+
+	for (;;) {
+		if (sem->write_bias_granted && xchg(&sem->write_bias_granted, 0))
+			break;
+		set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE);
+		if (!sem->write_bias_granted)
+			schedule();
+	}
+
+	remove_wait_queue(&sem->write_bias_wait, &wait);
+	tsk->state = TASK_RUNNING;
+
+	/*
+	 * If the lock is currently unbiased, awaken the sleepers
+	 * FIXME: this wakes up the readers early in a bit of a
+	 * stampede -> bad!
+	 */
+	if (sem->count >= 0)
+		wake_up(&sem->wait);
+}
+
+
+static inline void
+down_write_failed (struct rw_semaphore *sem)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	__up_write(sem);	/* this takes care of granting the lock */
+
+	add_wait_queue_exclusive(&sem->wait, &wait);
+
+	while (sem->count < 0) {
+		set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE);
+		if (sem->count >= 0)
+			break;	/* we must attempt to aquire or bias the lock */
+		schedule();
+	}
+
+	remove_wait_queue(&sem->wait, &wait);
+	tsk->state = TASK_RUNNING;
+}
+
+
+/*
+ * Wait for the lock to become unbiased.  Since we're a writer, we'll
+ * make ourselves exclusive.
+ */
+void
+__down_write_failed (struct rw_semaphore *sem, long count)
+{
+	long old_count;
+
+	while (1) {
+		if (count == -RW_LOCK_BIAS) {
+			down_write_failed_biased(sem);
+			return;
+		}
+		down_write_failed(sem);
+
+		do {
+			old_count = sem->count;
+			count = old_count - RW_LOCK_BIAS;
+		} while (cmpxchg(&sem->count, old_count, count) != old_count);
+
+		if (count == 0)
+			return;
+	}
+}
+
+void
+__rwsem_wake (struct rw_semaphore *sem, long count)
+{
+	wait_queue_head_t *wq;
+
+	if (count == 0) {
+		/* wake a writer */
+		if (xchg(&sem->write_bias_granted, 1))
+			BUG();
+		wq = &sem->write_bias_wait;
+	} else {
+		/* wake reader(s) */
+		if (xchg(&sem->read_bias_granted, 1))
+			BUG();
+		wq = &sem->wait;
+	}
+	wake_up(wq);	/* wake up everyone on the wait queue */
+}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
new file mode 100644
index 000000000..f3283d535
--- /dev/null
+++ b/arch/ia64/kernel/setup.c
@@ -0,0 +1,326 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *
+ * 02/04/00 D.Mosberger some more get_cpuinfo fixes...
+ * 02/01/00 R.Seth fixed get_cpuinfo for SMP
+ * 01/07/99 S.Eranian added the support for command line argument
+ * 06/24/99 W.Drummond added boot_cpu_data.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+#include <linux/console.h>
+
+#include <asm/acpi-ext.h>
+#include <asm/page.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/efi.h>
+
+extern char _end;
+
+/* cpu_data[bootstrap_processor] is data for the bootstrap processor: */
+struct cpuinfo_ia64 cpu_data[NR_CPUS];
+
+unsigned long ia64_cycles_per_usec;
+struct ia64_boot_param ia64_boot_param;
+struct screen_info screen_info;
+unsigned long cpu_initialized = 0;
+/* This tells _start which CPU is booting.  */
+int cpu_now_booting = 0;
+
+#define COMMAND_LINE_SIZE	512
+
+char saved_command_line[COMMAND_LINE_SIZE]; /* used in proc filesystem */
+
+static int
+find_max_pfn (unsigned long start, unsigned long end, void *arg)
+{
+	unsigned long *max_pfn = arg, pfn;
+
+	pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
+	if (pfn > *max_pfn)
+		*max_pfn = pfn;
+	return 0;
+}
+
+static int
+free_available_memory (unsigned long start, unsigned long end, void *arg)
+{
+#	define KERNEL_END	((unsigned long) &_end)
+#	define MIN(a,b)		((a) < (b) ? (a) : (b))
+#	define MAX(a,b)		((a) > (b) ? (a) : (b))
+	unsigned long range_start, range_end;
+
+	range_start = MIN(start, KERNEL_START);
+	range_end   = MIN(end, KERNEL_START);
+
+	/*
+	 * XXX This should not be necessary, but the bootmem allocator
+	 * is broken and fails to work correctly when the starting
+	 * address is not properly aligned.
+	 */
+	range_start = PAGE_ALIGN(range_start);
+
+	if (range_start < range_end)
+		free_bootmem(__pa(range_start), range_end - range_start);
+
+	range_start = MAX(start, KERNEL_END);
+	range_end   = MAX(end, KERNEL_END);
+
+	/*
+	 * XXX This should not be necessary, but the bootmem allocator
+	 * is broken and fails to work correctly when the starting
+	 * address is not properly aligned.
+	 */
+	range_start = PAGE_ALIGN(range_start);
+
+	if (range_start < range_end)
+		free_bootmem(__pa(range_start), range_end - range_start);
+
+	return 0;
+}
+
+void __init
+setup_arch (char **cmdline_p)
+{
+	unsigned long max_pfn, bootmap_start, bootmap_size;
+
+	/*
+	 * The secondary bootstrap loader passes us the boot
+	 * parameters at the beginning of the ZERO_PAGE, so let's
+	 * stash away those values before ZERO_PAGE gets cleared out.
+	 */
+	memcpy(&ia64_boot_param, (void *) ZERO_PAGE_ADDR, sizeof(ia64_boot_param));
+
+	efi_init();
+
+	max_pfn = 0;
+	efi_memmap_walk(find_max_pfn, &max_pfn);
+
+	/*
+	 * This is wrong, wrong, wrong.  Darn it, you'd think if they
+	 * change APIs, they'd do things for the better.  Grumble...
+	 */
+	bootmap_start = PAGE_ALIGN(__pa(&_end));
+	bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
+
+	efi_memmap_walk(free_available_memory, 0);
+
+	reserve_bootmem(bootmap_start, bootmap_size);
+#if 0
+	/* XXX fix me */
+	init_mm.start_code = (unsigned long) &_stext;
+	init_mm.end_code = (unsigned long) &_etext;
+	init_mm.end_data = (unsigned long) &_edata;
+	init_mm.brk = (unsigned long) &_end;
+
+	code_resource.start = virt_to_bus(&_text);
+	code_resource.end = virt_to_bus(&_etext) - 1;
+	data_resource.start = virt_to_bus(&_etext);
+	data_resource.end = virt_to_bus(&_edata) - 1;
+#endif
+
+	/* process SAL system table: */
+	ia64_sal_init(efi.sal_systab);
+
+	*cmdline_p = __va(ia64_boot_param.command_line);
+	strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
+	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';		/* for safety */
+
+	printk("args to kernel: %s\n", *cmdline_p);
+
+#ifndef CONFIG_SMP
+	cpu_init();
+	identify_cpu(&cpu_data[0]);
+#endif
+
+	if (efi.acpi) {
+		/* Parse the ACPI tables */
+		acpi_parse(efi.acpi);
+	}
+
+#ifdef CONFIG_IA64_GENERIC
+	machvec_init(acpi_get_sysname());
+#endif
+
+#ifdef CONFIG_VT
+# if defined(CONFIG_VGA_CONSOLE)
+	conswitchp = &vga_con;
+# elif defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+# endif
+#endif
+	platform_setup(cmdline_p);
+}
+
+/*
+ * Display cpu info for all cpu's.
+ */
+int
+get_cpuinfo (char *buffer)
+{
+	char family[32], model[32], features[128], *cp, *p = buffer;
+	struct cpuinfo_ia64 *c;
+	unsigned long mask;
+
+	for (c = cpu_data; c < cpu_data + NR_CPUS; ++c) {
+		if (!(cpu_initialized & (1UL << (c - cpu_data))))
+			continue;
+
+		mask = c->features;
+
+		if (c->family == 7)
+			memcpy(family, "IA-64", 6);
+		else
+			sprintf(family, "%u", c->family);
+
+		switch (c->model) {
+		      case 0:	strcpy(model, "Itanium"); break;
+		      default:	sprintf(model, "%u", c->model); break;
+		}
+
+		/* build the feature string: */
+		memcpy(features, " standard", 10);
+		cp = features;
+		if (mask & 1) {
+			strcpy(cp, " branchlong");
+			cp = strchr(cp, '\0');
+			mask &= ~1UL;
+		}
+		if (mask)
+			sprintf(cp, " 0x%lx", mask);
+
+		p += sprintf(buffer,
+			     "CPU# %lu\n"
+			     "\tvendor     : %s\n"
+			     "\tfamily     : %s\n"
+			     "\tmodel      : %s\n"
+			     "\trevision   : %u\n"
+			     "\tarchrev    : %u\n"
+			     "\tfeatures   :%s\n"	/* don't change this---it _is_ right! */
+			     "\tcpu number : %lu\n"
+			     "\tcpu regs   : %u\n"
+			     "\tcpu MHz    : %lu.%06lu\n"
+			     "\titc MHz    : %lu.%06lu\n"
+			     "\tBogoMIPS   : %lu.%02lu\n\n",
+			     c - cpu_data, c->vendor, family, model, c->revision, c->archrev,
+			     features,
+			     c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000,
+			     c->itc_freq / 1000000, c->itc_freq % 1000000,
+			     loops_per_sec() / 500000, (loops_per_sec() / 5000) % 100);
+        }
+	return p - buffer;
+}
+
+void
+identify_cpu (struct cpuinfo_ia64 *c)
+{
+	union {
+		unsigned long bits[5];
+		struct {
+			/* id 0 & 1: */
+			char vendor[16];
+
+			/* id 2 */
+			u64 ppn;		/* processor serial number */
+
+			/* id 3: */
+			unsigned number		:  8;
+			unsigned revision	:  8;
+			unsigned model		:  8;
+			unsigned family		:  8;
+			unsigned archrev	:  8;
+			unsigned reserved	: 24;
+
+			/* id 4: */
+			u64 features;
+		} field;
+	} cpuid;
+	int i;
+
+	for (i = 0; i < 5; ++i) {
+		cpuid.bits[i] = ia64_get_cpuid(i);
+	}
+
+#ifdef CONFIG_SMP
+	/*
+	 * XXX Instead of copying the ITC info from the bootstrap
+	 * processor, ia64_init_itm() should be done per CPU.  That
+	 * should get you the right info.  --davidm 1/24/00
+	 */
+	if (c != &cpu_data[bootstrap_processor]) {
+		memset(c, 0, sizeof(struct cpuinfo_ia64));
+		c->proc_freq = cpu_data[bootstrap_processor].proc_freq;
+		c->itc_freq = cpu_data[bootstrap_processor].itc_freq;
+		c->cyc_per_usec = cpu_data[bootstrap_processor].cyc_per_usec;
+		c->usec_per_cyc = cpu_data[bootstrap_processor].usec_per_cyc;
+	}
+#else
+	memset(c, 0, sizeof(struct cpuinfo_ia64));
+#endif
+
+	memcpy(c->vendor, cpuid.field.vendor, 16);
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
+        /* BUG: SoftSDV doesn't support the cpuid registers. */
+	if (c->vendor[0] == '\0') 
+		memcpy(c->vendor, "Intel", 6);
+#endif                                   
+	c->ppn = cpuid.field.ppn;
+	c->number = cpuid.field.number;
+	c->revision = cpuid.field.revision;
+	c->model = cpuid.field.model;
+	c->family = cpuid.field.family;
+	c->archrev = cpuid.field.archrev;
+	c->features = cpuid.field.features;
+#ifdef CONFIG_SMP
+	c->loops_per_sec = loops_per_sec;
+#endif
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU.  This function acts
+ * as a 'CPU state barrier', nothing should get across.
+ */
+void
+cpu_init (void)
+{
+	int nr = smp_processor_id();
+
+	/* Clear the stack memory reserved for pt_regs: */
+	memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
+
+	/*
+	 * Initialize default control register to defer speculative
+	 * faults.  On a speculative load, we want to defer access
+	 * right, key miss, and key permission faults.  We currently
+	 * do NOT defer TLB misses, page-not-present, access bit, or
+	 * debug faults but kernel code should not rely on any
+	 * particular setting of these bits.
+	 */
+	ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP);
+	ia64_set_fpu_owner(0);		/* initialize ar.k5 */
+
+	if (test_and_set_bit(nr, &cpu_initialized)) {
+		printk("CPU#%d already initialized!\n", nr);
+		machine_halt();
+	}
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+}
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
new file mode 100644
index 000000000..19be1f840
--- /dev/null
+++ b/arch/ia64/kernel/signal.c
@@ -0,0 +1,537 @@
+/*
+ * Architecture-specific signal handling support.
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from i386 and Alpha versions.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+
+#include <asm/ia32.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/sigcontext.h>
+
+#define DEBUG_SIG	0
+#define STACK_ALIGN	16		/* minimal alignment for stack pointer */
+#define _BLOCKABLE	(~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+#if _NSIG_WORDS > 1
+# define PUT_SIGSET(k,u)	__copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
+# define GET_SIGSET(k,u)	__copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t))
+#else
+# define PUT_SIGSET(k,u)	__put_user((k)->sig[0], &(u)->sig[0])
+# define GET_SIGSET(k,u)	__get_user((k)->sig[0], &(u)->sig[0])
+#endif
+
+struct sigframe {
+	struct siginfo info;
+	struct sigcontext sc;
+};
+
+extern long sys_wait4 (int, int *, int, struct rusage *);
+extern long ia64_do_signal (sigset_t *, struct pt_regs *, long);	/* forward decl */
+
+long
+ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct pt_regs *pt)
+{
+	sigset_t oldset, set;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+	if (GET_SIGSET(&set, uset))
+		return -EFAULT;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+
+	spin_lock_irq(&current->sigmask_lock);
+	{
+		oldset = current->blocked;
+		current->blocked = set;
+		recalc_sigpending(current);
+	}
+	spin_unlock_irq(&current->sigmask_lock);
+
+	/*
+	 * The return below usually returns to the signal handler.  We need to
+	 * pre-set the correct error code here to ensure that the right values
+	 * get saved in sigcontext by ia64_do_signal.
+	 */
+	pt->r8 = EINTR;
+	pt->r10 = -1;
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		if (ia64_do_signal(&oldset, pt, 1))
+			return -EINTR;
+	}
+}
+
+asmlinkage long
+sys_sigaltstack (const stack_t *uss, stack_t *uoss, long arg2, long arg3, long arg4,
+		 long arg5, long arg6, long arg7, long stack)
+{
+	struct pt_regs *pt = (struct pt_regs *) &stack;
+
+	return do_sigaltstack(uss, uoss, pt->r12);
+}
+
+static long
+restore_sigcontext (struct sigcontext *sc, struct pt_regs *pt)
+{
+	struct switch_stack *sw = (struct switch_stack *) pt - 1;
+	unsigned long ip, flags, nat, um;
+	long err;
+
+	/* restore scratch that always needs gets updated during signal delivery: */
+	err = __get_user(flags, &sc->sc_flags);
+
+	err |= __get_user(nat, &sc->sc_nat);
+	err |= __get_user(ip, &sc->sc_ip);			/* instruction pointer */
+	err |= __get_user(pt->ar_fpsr, &sc->sc_ar_fpsr);
+	err |= __get_user(pt->ar_pfs, &sc->sc_ar_pfs);
+	err |= __get_user(um, &sc->sc_um);			/* user mask */
+	err |= __get_user(pt->ar_rsc, &sc->sc_ar_rsc);
+	err |= __get_user(pt->ar_ccv, &sc->sc_ar_ccv);
+	err |= __get_user(pt->ar_unat, &sc->sc_ar_unat);
+	err |= __get_user(pt->pr, &sc->sc_pr);			/* predicates */
+	err |= __get_user(pt->b0, &sc->sc_br[0]);		/* b0 (rp) */
+	err |= __get_user(pt->b6, &sc->sc_br[6]);
+	err |= __copy_from_user(&pt->r1, &sc->sc_gr[1], 3*8);	/* r1-r3 */
+	err |= __copy_from_user(&pt->r8, &sc->sc_gr[8], 4*8);	/* r8-r11 */
+	err |= __copy_from_user(&pt->r12, &sc->sc_gr[12], 4*8);	/* r12-r15 */
+	err |= __copy_from_user(&pt->r16, &sc->sc_gr[16], 16*8);	/* r16-r31 */
+
+	/* establish new instruction pointer: */
+	pt->cr_iip = ip & ~0x3UL;
+	ia64_psr(pt)->ri = ip & 0x3;
+	pt->cr_ipsr = (pt->cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);
+
+	ia64_put_nat_bits (pt, sw, nat);	/* restore the original scratch NaT bits */
+
+	if (flags & IA64_SC_FLAG_FPH_VALID) {
+		struct task_struct *fpu_owner = ia64_get_fpu_owner();
+
+		__copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
+		if (fpu_owner == current) {
+			__ia64_load_fpu(current->thread.fph);
+		}
+	}
+	return err;
+}
+
+/*
+ * When we get here, ((struct switch_stack *) pt - 1) is a
+ * switch_stack frame that has no defined value.  Upon return, we
+ * expect sw->caller_unat to contain the new unat value.  The reason
+ * we use a full switch_stack frame is so everything is symmetric
+ * with ia64_do_signal().
+ */
+long
+ia64_rt_sigreturn (struct pt_regs *pt)
+{
+	extern char ia64_strace_leave_kernel, ia64_leave_kernel;
+	struct sigcontext *sc;
+	struct siginfo si;
+	sigset_t set;
+	long retval;
+
+	sc = &((struct sigframe *) (pt->r12 + 16))->sc;
+
+	/*
+	 * When we return to the previously executing context, r8 and
+	 * r10 have already been setup the way we want them.  Indeed,
+	 * if the signal wasn't delivered while in a system call, we
+	 * must not touch r8 or r10 as otherwise user-level stat could
+	 * be corrupted.
+	 */
+	retval = (long) &ia64_leave_kernel | 1;
+	if ((current->flags & PF_TRACESYS)
+	    && (sc->sc_flags & IA64_SC_FLAG_IN_SYSCALL))
+		retval = (long) &ia64_strace_leave_kernel;
+
+	if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
+		goto give_sigsegv;
+
+	if (GET_SIGSET(&set, &sc->sc_mask))
+		goto give_sigsegv;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sigmask_lock);
+	current->blocked = set;
+	recalc_sigpending(current);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	if (restore_sigcontext(sc, pt))
+		goto give_sigsegv;
+
+#if DEBUG_SIG
+	printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
+	       current->comm, current->pid, pt->r12, pt->cr_iip);
+#endif
+	/*
+	 * It is more difficult to avoid calling this function than to
+	 * call it and ignore errors.
+	 */
+	do_sigaltstack(&sc->sc_stack, 0, pt->r12);
+	return retval;
+
+  give_sigsegv:
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = SI_KERNEL;
+	si.si_pid = current->pid;
+	si.si_uid = current->uid;
+	si.si_addr = sc;
+	force_sig_info(SIGSEGV, &si, current);
+	return retval;
+}
+
+/*
+ * This does just the minimum required setup of sigcontext.
+ * Specifically, it only installs data that is either not knowable at
+ * the user-level or that gets modified before execution in the
+ * trampoline starts.  Everything else is done at the user-level.
+ */
+static long
+setup_sigcontext (struct sigcontext *sc, sigset_t *mask, struct pt_regs *pt)
+{
+	struct switch_stack *sw = (struct switch_stack *) pt - 1;
+	struct task_struct *fpu_owner = ia64_get_fpu_owner();
+	unsigned long flags = 0, ifs, nat;
+	long err;
+
+	ifs = pt->cr_ifs;
+
+	if (on_sig_stack((unsigned long) sc))
+		flags |= IA64_SC_FLAG_ONSTACK;
+	if ((ifs & (1UL << 63)) == 0) {
+		/* if cr_ifs isn't valid, we got here through a syscall */
+		flags |= IA64_SC_FLAG_IN_SYSCALL;
+	}
+	if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) {
+		flags |= IA64_SC_FLAG_FPH_VALID;
+		if (fpu_owner == current) {
+			__ia64_save_fpu(current->thread.fph);
+		}
+		__copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
+	}
+
+	/*
+	 * Note: sw->ar_unat is UNDEFINED unless the process is being
+	 * PTRACED.  However, this is OK because the NaT bits of the
+	 * preserved registers (r4-r7) are never being looked at by
+	 * the signal handler (register r4-r7 are used instead).
+	 */
+	nat = ia64_get_nat_bits(pt, sw);
+
+	err  = __put_user(flags, &sc->sc_flags);
+	err |= __put_user(nat, &sc->sc_nat);
+	err |= PUT_SIGSET(mask, &sc->sc_mask);
+	err |= __put_user(pt->cr_ipsr & IA64_PSR_UM, &sc->sc_um);
+	err |= __put_user(pt->ar_rsc, &sc->sc_ar_rsc);
+	err |= __put_user(pt->ar_ccv, &sc->sc_ar_ccv);
+	err |= __put_user(pt->ar_unat, &sc->sc_ar_unat);		/* ar.unat */
+	err |= __put_user(pt->ar_fpsr, &sc->sc_ar_fpsr);		/* ar.fpsr */
+	err |= __put_user(pt->ar_pfs, &sc->sc_ar_pfs);
+	err |= __put_user(pt->pr, &sc->sc_pr);				/* predicates */
+	err |= __put_user(pt->b0, &sc->sc_br[0]);			/* b0 (rp) */
+	err |= __put_user(pt->b6, &sc->sc_br[6]);			/* b6 */
+	err |= __put_user(pt->b7, &sc->sc_br[7]);			/* b7 */
+
+	err |= __copy_to_user(&sc->sc_gr[1], &pt->r1, 3*8);		/* r1-r3 */
+	err |= __copy_to_user(&sc->sc_gr[8], &pt->r8, 4*8);		/* r8-r11 */
+	err |= __copy_to_user(&sc->sc_gr[12], &pt->r12, 4*8);	/* r12-r15 */
+	err |= __copy_to_user(&sc->sc_gr[16], &pt->r16, 16*8);	/* r16-r31 */
+
+	err |= __put_user(pt->cr_iip + ia64_psr(pt)->ri, &sc->sc_ip);
+	err |= __put_user(pt->r12, &sc->sc_gr[12]);			/* r12 */
+	return err;
+}
+
+static long
+setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *pt)
+{
+	struct switch_stack *sw = (struct switch_stack *) pt - 1;
+	extern char ia64_sigtramp[], __start_gate_section[];
+	unsigned long tramp_addr, new_rbs = 0;
+	struct sigframe *frame;
+	struct siginfo si;
+	long err;
+
+	frame = (void *) pt->r12;
+	tramp_addr = GATE_ADDR + (ia64_sigtramp - __start_gate_section);
+	if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack((unsigned long) frame)) {
+		new_rbs  = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
+		frame = (void *) ((current->sas_ss_sp + current->sas_ss_size)
+				  & ~(STACK_ALIGN - 1));
+	}
+	frame = (void *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1));
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+		goto give_sigsegv;
+
+	err  = __copy_to_user(&frame->info, info, sizeof(siginfo_t));
+
+	err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp);
+	err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size);
+	err |= __put_user(sas_ss_flags(pt->r12), &frame->sc.sc_stack.ss_flags);
+	err |= setup_sigcontext(&frame->sc, set, pt);
+
+	if (err)
+		goto give_sigsegv;
+
+	pt->r12 = (unsigned long) frame - 16;		/* new stack pointer */
+	pt->r2  = sig;					/* signal number */
+	pt->r3  = (unsigned long) ka->sa.sa_handler;	/* addr. of handler's proc. descriptor */
+	pt->r15 = new_rbs;
+	pt->ar_fpsr = FPSR_DEFAULT;			/* reset fpsr for signal handler */
+	pt->cr_iip = tramp_addr;
+	ia64_psr(pt)->ri = 0;				/* start executing in first slot */
+
+	/*
+	 * Note: this affects only the NaT bits of the scratch regs
+	 * (the ones saved in pt_regs, which is exactly what we want.
+	 * The NaT bits for the preserved regs (r4-r7) are in
+	 * sw->ar_unat iff this process is being PTRACED.
+	 */
+	sw->caller_unat = 0;	/* ensure NaT bits of at least r2, r3, r12, and r15 are clear */
+
+#if DEBUG_SIG
+	printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%lx\n",
+	       current->comm, current->pid, sig, pt->r12, pt->cr_iip, pt->r3);
+#endif
+	return 1;
+
+  give_sigsegv:
+	if (sig == SIGSEGV)
+		ka->sa.sa_handler = SIG_DFL;
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = SI_KERNEL;
+	si.si_pid = current->pid;
+	si.si_uid = current->uid;
+	si.si_addr = frame;
+	force_sig_info(SIGSEGV, &si, current);
+	return 0;
+}
+
+static long
+handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
+	       struct pt_regs *pt)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	if (IS_IA32_PROCESS(pt)) {
+		/* send signal to IA-32 process */
+		if (!ia32_setup_frame1(sig, ka, info, oldset, pt))
+			return 0;
+	} else
+#endif
+	/* send signal to IA-64 process */
+	if (!setup_frame(sig, ka, info, oldset, pt))
+		return 0;
+
+	if (ka->sa.sa_flags & SA_ONESHOT)
+		ka->sa.sa_handler = SIG_DFL;
+
+	if (!(ka->sa.sa_flags & SA_NODEFER)) {
+		spin_lock_irq(&current->sigmask_lock);
+		sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+		sigaddset(&current->blocked, sig);
+		recalc_sigpending(current);
+		spin_unlock_irq(&current->sigmask_lock);
+	}
+	return 1;
+}
+
+/*
+ * When we get here, `pt' points to struct pt_regs and ((struct
+ * switch_stack *) pt - 1) points to a switch stack structure.
+ * HOWEVER, in the normal case, the ONLY value valid in the
+ * switch_stack is the caller_unat field.  The entire switch_stack is
+ * valid ONLY if current->flags has PF_PTRACED set.
+ *
+ * Note that `init' is a special process: it doesn't get signals it
+ * doesn't want to handle.  Thus you cannot kill init even with a
+ * SIGKILL even by mistake.
+ *
+ * Note that we go through the signals twice: once to check the
+ * signals that the kernel can handle, and then we build all the
+ * user-level signal handling stack-frames in one go after that.
+ */
+long
+ia64_do_signal (sigset_t *oldset, struct pt_regs *pt, long in_syscall)
+{
+	struct k_sigaction *ka;
+	siginfo_t info;
+	long restart = in_syscall;
+
+	/*
+	 * In the ia64_leave_kernel code path, we want the common case
+	 * to go fast, which is why we may in certain cases get here
+	 * from kernel mode. Just return without doing anything if so.
+	 */
+	if (!user_mode(pt))
+		return 0;
+
+	if (!oldset)
+		oldset = &current->blocked;
+
+	if (pt->r10 != -1) {
+		/*
+		 * A system calls has to be restarted only if one of
+		 * the error codes ERESTARTNOHAND, ERESTARTSYS, or
+		 * ERESTARTNOINTR is returned.  If r10 isn't -1 then
+		 * r8 doesn't hold an error code and we don't need to
+		 * restart the syscall, so we set in_syscall to zero.
+		 */
+		restart = 0;
+	}
+
+	for (;;) {
+		unsigned long signr;
+
+		spin_lock_irq(&current->sigmask_lock);
+		signr = dequeue_signal(&current->blocked, &info);
+		spin_unlock_irq(&current->sigmask_lock);
+
+		if (!signr)
+			break;
+
+		if ((current->flags & PF_PTRACED) && signr != SIGKILL) {
+			/* Let the debugger run.  */
+			current->exit_code = signr;
+			set_current_state(TASK_STOPPED);
+			notify_parent(current, SIGCHLD);
+			schedule();
+			signr = current->exit_code;
+
+			/* We're back.  Did the debugger cancel the sig?  */
+			if (!signr)
+				continue;
+			current->exit_code = 0;
+
+			/* The debugger continued.  Ignore SIGSTOP.  */
+			if (signr == SIGSTOP)
+				continue;
+
+			/* Update the siginfo structure.  Is this good?  */
+			if (signr != info.si_signo) {
+				info.si_signo = signr;
+				info.si_errno = 0;
+				info.si_code = SI_USER;
+				info.si_pid = current->p_pptr->pid;
+				info.si_uid = current->p_pptr->uid;
+			}
+
+			/* If the (new) signal is now blocked, requeue it.  */
+			if (sigismember(&current->blocked, signr)) {
+				send_sig_info(signr, &info, current);
+				continue;
+			}
+		}
+
+		ka = &current->sig->action[signr - 1];
+		if (ka->sa.sa_handler == SIG_IGN) {
+			if (signr != SIGCHLD)
+				continue;
+			/* Check for SIGCHLD: it's special.  */
+			while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0)
+				/* nothing */;
+			continue;
+		}
+
+		if (ka->sa.sa_handler == SIG_DFL) {
+			int exit_code = signr;
+
+			/* Init gets no signals it doesn't want.  */
+			if (current->pid == 1)
+				continue;
+
+			switch (signr) {
+			      case SIGCONT: case SIGCHLD: case SIGWINCH:
+				continue;
+
+			      case SIGTSTP: case SIGTTIN: case SIGTTOU:
+				if (is_orphaned_pgrp(current->pgrp))
+					continue;
+				/* FALLTHRU */
+
+			      case SIGSTOP:
+				set_current_state(TASK_STOPPED);
+				current->exit_code = signr;
+				if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags
+				      & SA_NOCLDSTOP))
+					notify_parent(current, SIGCHLD);
+				schedule();
+				continue;
+
+			      case SIGQUIT: case SIGILL: case SIGTRAP:
+			      case SIGABRT: case SIGFPE: case SIGSEGV:
+			      case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ:
+				if (do_coredump(signr, pt))
+					exit_code |= 0x80;
+				/* FALLTHRU */
+
+			      default:
+				lock_kernel();
+				sigaddset(&current->signal, signr);
+				recalc_sigpending(current);
+				current->flags |= PF_SIGNALED;
+				do_exit(exit_code);
+				/* NOTREACHED */
+			}
+		}
+
+		if (restart) {
+			switch (pt->r8) {
+			      case ERESTARTSYS:
+				if ((ka->sa.sa_flags & SA_RESTART) == 0) {
+			      case ERESTARTNOHAND:
+					pt->r8 = EINTR;
+					/* note: pt->r10 is already -1 */
+					break;
+				}
+			      case ERESTARTNOINTR:
+				ia64_decrement_ip(pt);
+			}
+		}
+
+		/* Whee!  Actually deliver the signal.  If the
+		   delivery failed, we need to continue to iterate in
+		   this loop so we can deliver the SIGSEGV... */
+		if (handle_signal(signr, ka, &info, oldset, pt))
+			return 1;
+	}
+
+	/* Did we come from a system call? */
+	if (restart) {
+		/* Restart the system call - no handlers present */
+		if (pt->r8 == ERESTARTNOHAND ||
+		    pt->r8 == ERESTARTSYS ||
+		    pt->r8 == ERESTARTNOINTR) {
+			/*
+			 * Note: the syscall number is in r15 which is
+			 * saved in pt_regs so all we need to do here
+			 * is adjust ip so that the "break"
+			 * instruction gets re-executed.
+			 */
+			ia64_decrement_ip(pt);
+		}
+	}
+	return 0;
+}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
new file mode 100644
index 000000000..48a3d68b4
--- /dev/null
+++ b/arch/ia64/kernel/smp.c
@@ -0,0 +1,777 @@
+/*
+ * SMP Support
+ *
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * 
+ * Lots of stuff stolen from arch/alpha/kernel/smp.c
+ *
+ *  99/10/05 davidm	Update to bring it in sync with new command-line processing scheme.
+ */
+#define __KERNEL_SYSCALLS__
+
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
+void smp_kdb_interrupt (struct pt_regs* regs);
+void kdb_global(int cpuid);
+extern unsigned long smp_kdb_wait;
+extern int kdb_new_cpu;
+#endif
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+extern int cpu_idle(void * unused);
+extern void _start(void);
+
+extern int cpu_now_booting;                          /* Used by head.S to find idle task */
+extern unsigned long cpu_initialized;                /* Bitmap of available cpu's */
+extern struct cpuinfo_ia64 cpu_data[NR_CPUS];        /* Duh... */
+
+spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_KDB
+unsigned long cpu_online_map = 1;
+#endif
+
+volatile int cpu_number_map[NR_CPUS] = { -1, };      /* SAPIC ID -> Logical ID */
+volatile int __cpu_logical_map[NR_CPUS] = { -1, };   /* logical ID -> SAPIC ID */
+int smp_num_cpus = 1;		
+int bootstrap_processor = -1;                        /* SAPIC ID of BSP */
+int smp_threads_ready = 0;	                     /* Set when the idlers are all forked */
+unsigned long ipi_base_addr = IPI_DEFAULT_BASE_ADDR; /* Base addr of IPI table */
+cycles_t cacheflush_time = 0;
+unsigned long ap_wakeup_vector = -1;                 /* External Int to use to wakeup AP's */
+static int max_cpus = -1;	                     /* Command line */
+static unsigned long ipi_op[NR_CPUS];
+struct smp_call_struct {
+	void (*func) (void *info);
+	void *info;
+	long wait;
+	atomic_t unstarted_count;
+	atomic_t unfinished_count;
+};
+static struct smp_call_struct *smp_call_function_data;
+
+#ifdef CONFIG_KDB
+unsigned long smp_kdb_wait = 0;                         /* Bitmask of waiters */
+#endif
+
+#ifdef	CONFIG_ITANIUM_ASTEP_SPECIFIC
+extern spinlock_t ivr_read_lock;
+#endif
+
+int use_xtp = 0;		/* XXX */
+
+#define IPI_RESCHEDULE	        0
+#define IPI_CALL_FUNC	        1
+#define IPI_CPU_STOP	        2
+#define IPI_KDB_INTERRUPT	4
+
+/*
+ *	Setup routine for controlling SMP activation
+ *
+ *	Command-line option of "nosmp" or "maxcpus=0" will disable SMP
+ *      activation entirely (the MPS table probe still happens, though).
+ *
+ *	Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
+ *	greater than 0, limits the maximum number of CPUs activated in
+ *	SMP mode to <NUM>.
+ */
+
+static int __init nosmp(char *str)
+{
+	max_cpus = 0;
+	return 1;
+}
+
+__setup("nosmp", nosmp);
+
+static int __init maxcpus(char *str)
+{
+	get_option(&str, &max_cpus);
+	return 1;
+}
+
+__setup("maxcpus=", maxcpus);
+
+/*
+ * Yoink this CPU from the runnable list... 
+ */
+void
+halt_processor(void) 
+{
+        clear_bit(smp_processor_id(), &cpu_initialized);
+	max_xtp();
+	__cli();
+        for (;;)
+		;
+
+}
+
+void
+handle_IPI(int irq, void *dev_id, struct pt_regs *regs) 
+{
+	int this_cpu = smp_processor_id();
+	unsigned long *pending_ipis = &ipi_op[this_cpu];
+	unsigned long ops;
+
+	/* Count this now; we may make a call that never returns. */
+	cpu_data[this_cpu].ipi_count++;
+
+	mb();	/* Order interrupt and bit testing. */
+	while ((ops = xchg(pending_ipis, 0)) != 0) {
+	  mb();	/* Order bit clearing and data access. */
+	  do {
+		unsigned long which;
+
+		which = ffz(~ops);
+		ops &= ~(1 << which);
+		
+		switch (which) {
+		case IPI_RESCHEDULE:
+			/* 
+			 * Reschedule callback.  Everything to be done is done by the 
+			 * interrupt return path.  
+			 */
+			break;
+			
+		case IPI_CALL_FUNC: 
+			{
+				struct smp_call_struct *data;
+				void (*func)(void *info);
+				void *info;
+				int wait;
+
+				data = smp_call_function_data;
+				func = data->func;
+				info = data->info;
+				wait = data->wait;
+
+				mb();
+				atomic_dec (&data->unstarted_count);
+
+				/* At this point the structure may be gone unless wait is true.  */
+				(*func)(info);
+
+				/* Notify the sending CPU that the task is done.  */
+				mb();
+				if (wait) 
+					atomic_dec (&data->unfinished_count);
+			}
+			break;
+
+		case IPI_CPU_STOP:
+			halt_processor();
+			break;
+
+#ifdef CONFIG_KDB
+		case IPI_KDB_INTERRUPT:
+			smp_kdb_interrupt(regs);
+			break;
+#endif
+
+		default:
+			printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
+			break;
+		} /* Switch */
+	  } while (ops);
+
+	  mb();	/* Order data access and bit testing. */
+	}
+}
+
+static inline void
+send_IPI(int dest_cpu, unsigned char vector)
+{
+	unsigned long ipi_addr;
+	unsigned long ipi_data;
+#ifdef	CONFIG_ITANIUM_ASTEP_SPECIFIC
+	unsigned long flags;
+#endif
+
+	ipi_data = vector;
+	ipi_addr = ipi_base_addr | ((dest_cpu << 8) << 4); /* 16-bit SAPIC ID's; assume CPU bus 0 */
+	mb();
+
+#ifdef	CONFIG_ITANIUM_ASTEP_SPECIFIC
+	/*
+	 * Disable IVR reads
+	 */
+	spin_lock_irqsave(&ivr_read_lock, flags);
+	writeq(ipi_data, ipi_addr);
+	spin_unlock_irqrestore(&ivr_read_lock, flags);
+#else
+ 	writeq(ipi_data, ipi_addr);
+#endif	/* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+}
+
+static inline void
+send_IPI_single(int dest_cpu, int op) 
+{
+	
+	if (dest_cpu == -1) 
+                return;
+        
+        ipi_op[dest_cpu] |= (1 << op);
+	send_IPI(dest_cpu, IPI_IRQ);
+}
+
+static inline void
+send_IPI_allbutself(int op)
+{
+	int i;
+	int cpu_id = 0;
+	
+	for (i = 0; i < smp_num_cpus; i++) {
+		cpu_id = __cpu_logical_map[i];
+		if (cpu_id != smp_processor_id())
+			send_IPI_single(cpu_id, op);
+	}
+}
+
+static inline void
+send_IPI_all(int op)
+{
+	int i;
+
+	for (i = 0; i < smp_num_cpus; i++)
+		send_IPI_single(__cpu_logical_map[i], op);
+}
+
+static inline void
+send_IPI_self(int op)
+{
+	send_IPI_single(smp_processor_id(), op);
+}
+
+void
+smp_send_reschedule(int cpu)
+{
+	send_IPI_single(cpu, IPI_RESCHEDULE);
+}
+
+void
+smp_send_stop(void)
+{
+	send_IPI_allbutself(IPI_CPU_STOP);
+}
+
+/*
+ * Run a function on all other CPUs.
+ *  <func>	The function to run. This must be fast and non-blocking.
+ *  <info>	An arbitrary pointer to pass to the function.
+ *  <retry>	If true, keep retrying until ready.
+ *  <wait>	If true, wait until function has completed on other CPUs.
+ *  [RETURNS]   0 on success, else a negative status code.
+ *
+ * Does not return until remote CPUs are nearly ready to execute <func>
+ * or are or have executed.
+ */
+
+int
+smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
+{
+	struct smp_call_struct data;
+	long timeout;
+	static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+	
+	data.func = func;
+	data.info = info;
+	data.wait = wait;
+	atomic_set(&data.unstarted_count, smp_num_cpus - 1);
+	atomic_set(&data.unfinished_count, smp_num_cpus - 1);
+
+	if (retry) {
+		while (1) {
+			if (smp_call_function_data) {
+				schedule ();  /*  Give a mate a go  */
+				continue;
+			}
+			spin_lock (&lock);
+			if (smp_call_function_data) {
+				spin_unlock (&lock);  /*  Bad luck  */
+				continue;
+			}
+			/*  Mine, all mine!  */
+			break;
+		}
+	}
+	else {
+		if (smp_call_function_data) 
+			return -EBUSY;
+		spin_lock (&lock);
+		if (smp_call_function_data) {
+			spin_unlock (&lock);
+			return -EBUSY;
+		}
+	}
+
+	smp_call_function_data = &data;
+	spin_unlock (&lock);
+	data.func = func;
+	data.info = info;
+	atomic_set (&data.unstarted_count, smp_num_cpus - 1);
+	data.wait = wait;
+	if (wait) 
+		atomic_set (&data.unfinished_count, smp_num_cpus - 1);
+	
+	/*  Send a message to all other CPUs and wait for them to respond  */
+	send_IPI_allbutself(IPI_CALL_FUNC);
+
+	/*  Wait for response  */
+	timeout = jiffies + HZ;
+	while ( (atomic_read (&data.unstarted_count) > 0) &&
+		time_before (jiffies, timeout) )
+		barrier ();
+	if (atomic_read (&data.unstarted_count) > 0) {
+		smp_call_function_data = NULL;
+		return -ETIMEDOUT;
+	}
+	if (wait)
+		while (atomic_read (&data.unfinished_count) > 0)
+			barrier ();
+	smp_call_function_data = NULL;
+	return 0;
+}
+
+/*
+ * Flush all other CPU's tlb and then mine.  Do this with smp_call_function() as we
+ * want to ensure all TLB's flushed before proceeding.
+ *
+ * XXX: Is it OK to use the same ptc.e info on all cpus?
+ */
+void
+smp_flush_tlb_all(void)
+{
+	smp_call_function((void (*)(void *))__flush_tlb_all, NULL, 1, 1);
+	__flush_tlb_all();
+}
+
+/*
+ * Ideally sets up per-cpu profiling hooks.  Doesn't do much now...
+ */
+static inline void __init
+smp_setup_percpu_timer(int cpuid)
+{
+        cpu_data[cpuid].prof_counter = 1;
+        cpu_data[cpuid].prof_multiplier = 1;
+}
+
+void 
+smp_do_timer(struct pt_regs *regs)
+{
+        int cpu = smp_processor_id();
+        int user = user_mode(regs);
+	struct cpuinfo_ia64 *data = &cpu_data[cpu];
+
+	extern void update_one_process(struct task_struct *, unsigned long, unsigned long, 
+				       unsigned long, int);
+        if (!--data->prof_counter) {
+		irq_enter(cpu, TIMER_IRQ);
+
+		update_one_process(current, 1, user, !user, cpu);
+		if (current->pid) { 
+			if (--current->counter < 0) {
+				current->counter = 0;
+				current->need_resched = 1;
+			}
+
+			if (user) {
+				if (current->priority < DEF_PRIORITY) {
+					kstat.cpu_nice++;
+					kstat.per_cpu_nice[cpu]++;
+				} else {
+					kstat.cpu_user++;
+					kstat.per_cpu_user[cpu]++;
+				}
+			} else {
+				kstat.cpu_system++;
+				kstat.per_cpu_system[cpu]++;
+			}
+		} 
+		
+		data->prof_counter = data->prof_multiplier;
+		irq_exit(cpu, TIMER_IRQ);
+	}
+}
+
+
+/*
+ * Called by both boot and secondaries to move global data into
+ * per-processor storage.
+ */
+static inline void __init
+smp_store_cpu_info(int cpuid)
+{
+	struct cpuinfo_ia64 *c = &cpu_data[cpuid];
+
+	identify_cpu(c);
+}
+
+/* 
+ * SAL shoves the AP's here when we start them.  Physical mode, no kernel TR, 
+ * no RRs set, better than even chance that psr is bogus.  Fix all that and 
+ * call _start.  In effect, pretend to be lilo.
+ *
+ * Stolen from lilo_start.c.  Thanks David! 
+ */
+void
+start_ap(void)
+{
+	unsigned long flags;
+
+	/*
+	 * Install a translation register that identity maps the
+	 * kernel's 256MB page(s).
+	 */
+	ia64_clear_ic(flags);
+	ia64_set_rr(          0, (0x1000 << 8) | (_PAGE_SIZE_1M << 2));
+	ia64_set_rr(PAGE_OFFSET, (ia64_rid(0, PAGE_OFFSET) << 8) | (_PAGE_SIZE_256M << 2));
+	ia64_itr(0x3, 1, PAGE_OFFSET,
+		 pte_val(mk_pte_phys(0, __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX))),
+		 _PAGE_SIZE_256M);
+
+	flags = (IA64_PSR_IT | IA64_PSR_IC | IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH | 
+		 IA64_PSR_BN);
+	
+	asm volatile ("movl r8 = 1f\n"
+		      ";;\n"
+		      "mov cr.ipsr=%0\n"
+		      "mov cr.iip=r8\n" 
+		      "mov cr.ifs=r0\n"
+		      ";;\n"
+		      "rfi;;"
+		      "1:\n"
+		      "movl r1 = __gp" :: "r"(flags) : "r8");
+	_start();
+}
+
+
+/*
+ * AP's start using C here.
+ */
+void __init
+smp_callin(void) 
+{
+	extern void ia64_rid_init(void);
+	extern void ia64_init_itm(void);
+	extern void ia64_cpu_local_tick(void);
+
+	ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP);
+	ia64_set_fpu_owner(0);	       
+	ia64_rid_init();		/* initialize region ids */
+
+	cpu_init();
+	__flush_tlb_all();
+
+	smp_store_cpu_info(smp_processor_id());
+	smp_setup_percpu_timer(smp_processor_id());
+
+	while (!smp_threads_ready) 
+		mb();
+
+	normal_xtp();
+
+	/* setup the CPU local timer tick */
+	ia64_cpu_local_tick();
+
+	/* Disable all local interrupts */
+	ia64_set_lrr0(0, 1);	
+	ia64_set_lrr1(0, 1);	
+
+	__sti();		/* Interrupts have been off till now. */
+	cpu_idle(NULL);
+}
+
+/*
+ * Create the idle task for a new AP.  DO NOT use kernel_thread() because
+ * that could end up calling schedule() in the ia64_leave_kernel exit
+ * path in which case the new idle task could get scheduled before we
+ * had a chance to remove it from the run-queue...
+ */
+static int __init 
+fork_by_hand(void)
+{
+	/*
+	 * Don't care about the usp and regs settings since we'll never
+	 * reschedule the forked task.
+	 */
+	return do_fork(CLONE_VM|CLONE_PID, 0, 0);
+}
+
+/*
+ * Bring one cpu online.
+ *
+ * NB: cpuid is the CPU BUS-LOCAL ID, not the entire SAPIC ID.  See asm/smp.h.
+ */
+static int __init
+smp_boot_one_cpu(int cpuid, int cpunum)
+{
+	struct task_struct *idle;
+	long timeout;
+
+	/* 
+	 * Create an idle task for this CPU.  Note that the address we
+	 * give to kernel_thread is irrelevant -- it's going to start
+	 * where OS_BOOT_RENDEVZ vector in SAL says to start.  But
+	 * this gets all the other task-y sort of data structures set
+	 * up like we wish.   We need to pull the just created idle task 
+	 * off the run queue and stuff it into the init_tasks[] array.  
+	 * Sheesh . . .
+	 */
+	if (fork_by_hand() < 0) 
+		panic("failed fork for CPU %d", cpuid);
+	/*
+	 * We remove it from the pidhash and the runqueue
+	 * once we got the process:
+	 */
+	idle = init_task.prev_task;
+	if (!idle)
+		panic("No idle process for CPU %d", cpuid);
+	init_tasks[cpunum] = idle;
+	del_from_runqueue(idle);
+        unhash_process(idle);
+
+	/* Schedule the first task manually.  */
+	idle->processor = cpuid;
+	idle->has_cpu = 1;
+
+	/* Let _start know what logical CPU we're booting (offset into init_tasks[] */
+	cpu_now_booting = cpunum;
+	
+	/* Kick the AP in the butt */
+	send_IPI(cpuid, ap_wakeup_vector);
+	ia64_srlz_i();
+	mb();
+
+	/* 
+	 * OK, wait a bit for that CPU to finish staggering about.  smp_callin() will
+	 * call cpu_init() which will set a bit for this AP.  When that bit flips, the AP
+	 * is waiting for smp_threads_ready to be 1 and we can move on.
+	 */
+	for (timeout = 0; timeout < 100000; timeout++) {
+		if (test_bit(cpuid, &cpu_initialized))
+			goto alive;
+		udelay(10);
+		barrier();
+	}
+
+	printk(KERN_ERR "SMP: Processor %d is stuck.\n", cpuid);
+	return -1;
+
+alive:
+	/* Remember the AP data */
+	cpu_number_map[cpuid] = cpunum;
+#ifdef CONFIG_KDB
+        cpu_online_map |= (1<<cpunum);
+        printk ("DEBUGGER: cpu_online_map = 0x%08x\n", cpu_online_map);
+#endif
+	__cpu_logical_map[cpunum] = cpuid;
+	return 0;
+}
+
+
+
+/*
+ * Called by smp_init bring all the secondaries online and hold them.  
+ * XXX: this is ACPI specific; it uses "magic" variables exported from acpi.c 
+ *      to 'discover' the AP's.  Blech.
+ */
+void __init
+smp_boot_cpus(void)
+{
+	int i, cpu_count = 1;
+	unsigned long bogosum;
+	int sapic_id;
+	extern int acpi_cpus;
+	extern int acpi_apic_map[32];
+
+	/* Take care of some initial bookkeeping.  */
+	memset(&cpu_number_map, -1, sizeof(cpu_number_map));
+	memset(&__cpu_logical_map, -1, sizeof(__cpu_logical_map));
+	memset(&ipi_op, 0, sizeof(ipi_op));
+
+	/* Setup BSP mappings */
+	cpu_number_map[bootstrap_processor] = 0;
+	__cpu_logical_map[0] = bootstrap_processor;
+	current->processor = bootstrap_processor;
+
+	/* Mark BSP booted and get active_mm context */
+	cpu_init();
+
+	/* reset XTP for interrupt routing */
+	normal_xtp();
+
+	/* And generate an entry in cpu_data */
+	smp_store_cpu_info(bootstrap_processor);
+#if 0
+	smp_tune_scheduling();
+#endif
+ 	smp_setup_percpu_timer(bootstrap_processor);
+
+	init_idle();
+
+	/* Nothing to do when told not to.  */
+	if (max_cpus == 0) {
+	        printk(KERN_INFO "SMP mode deactivated.\n");
+		return;
+	}
+
+	if (acpi_cpus > 1) {
+		printk(KERN_INFO "SMP: starting up secondaries.\n");
+
+		for (i = 0; i < NR_CPUS; i++) {
+			if (acpi_apic_map[i] == -1 || 
+			    acpi_apic_map[i] == bootstrap_processor << 8) /* XXX Fix me Walt */
+				continue;
+
+			/*
+			 * IA64 SAPIC ID's are 16-bits.  See asm/smp.h for more info 
+			 */
+			sapic_id = acpi_apic_map[i] >> 8;
+			if (smp_boot_one_cpu(sapic_id, cpu_count))
+				continue;
+
+			cpu_count++; /* Count good CPUs only... */
+		}
+	}
+
+	if (cpu_count == 1) {
+		printk(KERN_ERR "SMP: Bootstrap processor only.\n");
+		return;
+	}
+
+	bogosum = 0;
+        for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_initialized & (1L << i))
+			bogosum += cpu_data[i].loops_per_sec;
+        }
+
+	printk(KERN_INFO "SMP: Total of %d processors activated "
+	       "(%lu.%02lu BogoMIPS).\n",
+	       cpu_count, (bogosum + 2500) / 500000,
+	       ((bogosum + 2500) / 5000) % 100);
+
+	smp_num_cpus = cpu_count;
+}
+
+/* 
+ * Called from main.c by each AP.
+ */
+void __init 
+smp_commence(void)
+{
+	mb();
+}
+
+/*
+ * Not used; part of the i386 bringup
+ */
+void __init 
+initialize_secondary(void)
+{
+}
+
+int __init
+setup_profiling_timer(unsigned int multiplier)
+{
+        return -EINVAL;
+}
+
+/*
+ * Assume that CPU's have been discovered by some platform-dependant
+ * interface.  For SoftSDV/Lion, that would be ACPI.
+ *
+ * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
+ *
+ * So this just gets the BSP SAPIC ID and print's it out.  Dull, huh?
+ *
+ * Not anymore.  This also registers the AP OS_MC_REDVEZ address with SAL.
+ */
+void __init
+init_smp_config(void)
+{
+	struct fptr {
+		unsigned long fp;
+		unsigned long gp;
+	} *ap_startup;
+	long sal_ret;
+
+	/* Grab the BSP ID */
+	bootstrap_processor = hard_smp_processor_id();
+
+	/* Tell SAL where to drop the AP's.  */
+	ap_startup = (struct fptr *) start_ap;
+	sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
+				       __pa(ap_startup->fp), __pa(ap_startup->gp), 0, 
+				       0, 0, 0);
+	if (sal_ret < 0) {
+		printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n", ia64_sal_strerror(sal_ret));
+		printk("     Forcing UP mode\n");
+		smp_num_cpus = 1; 
+	}
+
+}
+
+#ifdef CONFIG_KDB
+void smp_kdb_stop (int all, struct pt_regs* regs)
+{
+        if (all)
+      {
+              printk ("Sending IPI to all on CPU %i\n", smp_processor_id ());
+                smp_kdb_wait = 0xffffffff;
+                clear_bit (smp_processor_id(), &smp_kdb_wait);
+                send_IPI_allbutself (IPI_KDB_INTERRUPT);
+        }
+      else
+      {
+              printk ("Sending IPI to self on CPU %i\n",
+                      smp_processor_id ());
+                set_bit (smp_processor_id(), &smp_kdb_wait);
+              clear_bit (__cpu_logical_map[kdb_new_cpu], &smp_kdb_wait);
+                smp_kdb_interrupt (regs);
+        }
+}
+
+void smp_kdb_interrupt (struct pt_regs* regs)
+{
+        printk ("kdb: IPI on CPU %i with mask 0x%08x\n",
+              smp_processor_id (), smp_kdb_wait);
+
+      /* All CPUs spin here forever */
+        while (test_bit (smp_processor_id(), &smp_kdb_wait));
+
+      /* Enter KDB on CPU selected by KDB on the last CPU */
+        if (__cpu_logical_map[kdb_new_cpu] == smp_processor_id ())
+      {
+                kdb (KDB_REASON_SWITCH, 0, regs);
+        }
+}
+
+#endif
+
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
new file mode 100644
index 000000000..18a498a09
--- /dev/null
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -0,0 +1,216 @@
+/*
+ * This file contains various system calls that have different calling
+ * conventions on different platforms.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/file.h>		/* doh, must come after sched.h... */
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+
+asmlinkage long
+ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6, 
+		  long arg7, long stack)
+{
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+	extern long sys_getpriority (int, int);
+	long prio;
+
+	prio = sys_getpriority(which, who);
+	if (prio >= 0) {
+		regs->r8 = 0;	/* ensure negative priority is not mistaken as error code */
+		prio = 20 - prio;
+	}
+	return prio;
+}
+
+asmlinkage unsigned long
+sys_getpagesize (void)
+{
+	return PAGE_SIZE;
+}
+
+asmlinkage unsigned long
+ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg5, long arg6,
+	    long arg7, long stack)
+{
+	extern int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr);
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+	unsigned long raddr;
+	int retval;
+
+	retval = sys_shmat(shmid, shmaddr, shmflg, &raddr);
+	if (retval < 0)
+		return retval;
+
+	regs->r8 = 0;	/* ensure negative addresses are not mistaken as an error code */
+	return raddr;
+}
+
+asmlinkage unsigned long
+ia64_brk (long brk, long arg1, long arg2, long arg3,
+	  long arg4, long arg5, long arg6, long arg7, long stack)
+{
+	extern unsigned long sys_brk (unsigned long brk);
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+	unsigned long retval;
+
+	retval = sys_brk(brk);
+
+	regs->r8 = 0;	/* ensure large retval isn't mistaken as error code */
+	return retval;
+}
+
+/*
+ * On IA-64, we return the two file descriptors in ret0 and ret1 (r8
+ * and r9) as this is faster than doing a copy_to_user().
+ */
+asmlinkage long
+sys_pipe (long arg0, long arg1, long arg2, long arg3,
+	  long arg4, long arg5, long arg6, long arg7, long stack)
+{
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+	int fd[2];
+	int retval;
+
+	lock_kernel();
+	retval = do_pipe(fd);
+	if (retval)
+		goto out;
+	retval = fd[0];
+	regs->r9 = fd[1];
+  out:
+	unlock_kernel();
+	return retval;
+}
+
+static inline unsigned long
+do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
+{
+	struct file *file = 0;
+
+	/*
+	 * A zero mmap always succeeds in Linux, independent of
+	 * whether or not the remaining arguments are valid.
+	 */
+	if (PAGE_ALIGN(len) == 0)
+		return addr;
+
+#ifdef notyet
+	/* Don't permit mappings that would cross a region boundary: */
+	region_start = IA64_GET_REGION(addr);
+	region_end   = IA64_GET_REGION(addr + len);
+	if (region_start != region_end)
+		return -EINVAL;
+
+	<<x??x>>
+#endif
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			return -EBADF;
+	}
+
+	down(&current->mm->mmap_sem);
+	lock_kernel();
+
+	addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+
+	unlock_kernel();
+	up(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+	return addr;
+}
+
+/*
+ * mmap2() is like mmap() except that the offset is expressed in units
+ * of PAGE_SIZE (instead of bytes).  This allows to mmap2() (pieces
+ * of) files that are larger than the address space of the CPU.
+ */
+asmlinkage unsigned long
+sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff,
+	   long arg6, long arg7, long stack)
+{
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+
+	addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
+	if (!IS_ERR(addr))
+		regs->r8 = 0;	/* ensure large addresses are not mistaken as failures... */
+	return addr;
+}
+
+asmlinkage unsigned long
+sys_mmap (unsigned long addr, unsigned long len, int prot, int flags,
+	  int fd, long off, long arg6, long arg7, long stack)
+{
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+
+	addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+	if (!IS_ERR(addr))
+		regs->r8 = 0;	/* ensure large addresses are not mistaken as failures... */
+	return addr;
+}
+
+asmlinkage long
+sys_ioperm (unsigned long from, unsigned long num, int on)
+{
+        printk(KERN_ERR "sys_ioperm(from=%lx, num=%lx, on=%d)\n", from, num, on);
+        return -EIO;
+}
+
+asmlinkage long
+sys_iopl (int level, long arg1, long arg2, long arg3)
+{
+        lock_kernel();
+        printk(KERN_ERR "sys_iopl(level=%d)!\n", level);
+        unlock_kernel();
+        return -ENOSYS;
+}
+
+asmlinkage long
+sys_vm86 (long arg0, long arg1, long arg2, long arg3)
+{
+        lock_kernel();
+        printk(KERN_ERR "sys_vm86(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3);
+        unlock_kernel();
+        return -ENOSYS;
+}
+
+asmlinkage long
+sys_modify_ldt (long arg0, long arg1, long arg2, long arg3)
+{
+        lock_kernel();
+        printk(KERN_ERR "sys_modify_ldt(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3);
+        unlock_kernel();
+        return -ENOSYS;
+}
+
+#ifndef CONFIG_PCI
+
+asmlinkage long
+sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+		    void *buf)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long
+sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+		     void *buf)
+{
+	return -ENOSYS;
+}
+
+
+#endif /* CONFIG_PCI */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
new file mode 100644
index 000000000..7c5ace740
--- /dev/null
+++ b/arch/ia64/kernel/time.c
@@ -0,0 +1,290 @@
+/*
+ * linux/arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999-2000 David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+
+#include <asm/delay.h>
+#include <asm/efi.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+
+extern rwlock_t xtime_lock;
+extern volatile unsigned long lost_ticks;
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+
+unsigned long last_cli_ip;
+
+#endif
+
+static struct {
+	unsigned long delta;
+	unsigned long next[NR_CPUS];
+} itm;
+
+static void
+do_profile (unsigned long ip)
+{
+	extern char _stext;
+
+	if (prof_buffer && current->pid) {
+		ip -= (unsigned long) &_stext;
+		ip >>= prof_shift;
+		/*
+		 * Don't ignore out-of-bounds IP values silently,
+		 * put them into the last histogram slot, so if
+		 * present, they will show up as a sharp peak.
+		 */
+		if (ip > prof_len - 1)
+			ip = prof_len - 1;
+
+		atomic_inc((atomic_t *) &prof_buffer[ip]);
+	} 
+}
+
+/*
+ * Return the number of micro-seconds that elapsed since the last
+ * update to jiffy.  The xtime_lock must be at least read-locked when
+ * calling this routine.
+ */
+static inline unsigned long
+gettimeoffset (void)
+{
+	unsigned long now = ia64_get_itc();
+	unsigned long elapsed_cycles, lost;
+
+	elapsed_cycles = now - (itm.next[smp_processor_id()] - itm.delta);
+
+	lost = lost_ticks;
+	if (lost)
+		elapsed_cycles += lost*itm.delta;
+
+	return (elapsed_cycles*my_cpu_data.usec_per_cyc) >> IA64_USEC_PER_CYC_SHIFT;
+}
+
+void
+do_settimeofday (struct timeval *tv)
+{
+	write_lock_irq(&xtime_lock);
+	{
+		/*
+		 * This is revolting. We need to set the xtime.tv_usec
+		 * correctly. However, the value in this location is
+		 * is value at the last tick.  Discover what
+		 * correction gettimeofday would have done, and then
+		 * undo it!
+		 */
+		tv->tv_usec -= gettimeoffset();
+		while (tv->tv_usec < 0) {
+			tv->tv_usec += 1000000;
+			tv->tv_sec--;
+		}
+
+		xtime = *tv;
+		time_adjust = 0;		/* stop active adjtime() */
+		time_status |= STA_UNSYNC;
+		time_maxerror = NTP_PHASE_LIMIT;
+		time_esterror = NTP_PHASE_LIMIT;
+	}
+	write_unlock_irq(&xtime_lock);
+}
+
+void
+do_gettimeofday (struct timeval *tv)
+{
+	unsigned long flags, usec, sec;
+
+	read_lock_irqsave(&xtime_lock, flags);
+	{
+		usec = gettimeoffset();
+	
+		sec = xtime.tv_sec;
+		usec += xtime.tv_usec;
+	}
+	read_unlock_irqrestore(&xtime_lock, flags);
+
+	while (usec >= 1000000) {
+		usec -= 1000000;
+		++sec;
+	}
+
+	tv->tv_sec = sec;
+	tv->tv_usec = usec;
+}
+
+static void
+timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	static unsigned long last_time;
+	static unsigned char count;
+	int cpu = smp_processor_id();
+
+	/*
+	 * Here we are in the timer irq handler. We have irqs locally
+	 * disabled, but we don't know if the timer_bh is running on
+	 * another CPU. We need to avoid to SMP race by acquiring the
+	 * xtime_lock.
+	 */
+	write_lock(&xtime_lock);
+	while (1) {
+		/* do kernel PC profiling here.  */
+		if (!user_mode(regs)) 
+			do_profile(regs->cr_iip);
+
+#ifdef CONFIG_SMP
+		smp_do_timer(regs);
+		if (smp_processor_id() == bootstrap_processor)
+			do_timer(regs);
+#else
+		do_timer(regs);
+#endif
+
+		itm.next[cpu] += itm.delta;
+		/*
+		 * There is a race condition here: to be on the "safe"
+		 * side, we process timer ticks until itm.next is
+		 * ahead of the itc by at least half the timer
+		 * interval.  This should give us enough time to set
+		 * the new itm value without losing a timer tick.
+		 */
+		if (time_after(itm.next[cpu], ia64_get_itc() + itm.delta/2)) {
+			ia64_set_itm(itm.next[cpu]);
+			break;
+		}
+
+#if !(defined(CONFIG_IA64_SOFTSDV_HACKS) && defined(CONFIG_SMP))
+		/*
+		 * SoftSDV in SMP mode is _slow_, so we do "loose" ticks, 
+		 * but it's really OK...
+		 */
+		if (count > 0 && jiffies - last_time > 5*HZ)
+			count = 0;
+		if (count++ == 0) {
+			last_time = jiffies;
+			printk("Lost clock tick on CPU %d (now=%lx, next=%lx)!!\n",
+			       cpu, ia64_get_itc(), itm.next[cpu]);
+# ifdef CONFIG_IA64_DEBUG_IRQ
+			printk("last_cli_ip=%lx\n", last_cli_ip);
+# endif
+		}
+#endif
+	}
+	write_unlock(&xtime_lock);
+}
+
+/*
+ * Encapsulate access to the itm structure for SMP.
+ */
+void __init
+ia64_cpu_local_tick(void)
+{
+	/* arrange for the cycle counter to generate a timer interrupt: */
+	ia64_set_itv(TIMER_IRQ, 0);
+	ia64_set_itc(0);
+	itm.next[smp_processor_id()] = ia64_get_itc() + itm.delta;
+	ia64_set_itm(itm.next[smp_processor_id()]);
+}
+
+void __init
+ia64_init_itm (void)
+{
+	unsigned long platform_base_freq, itc_freq, drift;
+	struct pal_freq_ratio itc_ratio, proc_ratio;
+	long status;
+
+	/*
+	 * According to SAL v2.6, we need to use a SAL call to determine the
+	 * platform base frequency and then a PAL call to determine the
+	 * frequency ratio between the ITC and the base frequency.
+	 */
+	status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, &platform_base_freq, &drift);
+	if (status != 0) {
+		printk("SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
+	} else {
+		status = ia64_pal_freq_ratios(&proc_ratio, 0, &itc_ratio);
+		if (status != 0)
+			printk("PAL_FREQ_RATIOS failed with status=%ld\n", status);
+	}
+	if (status != 0) {
+		/* invent "random" values */
+		printk("SAL/PAL failed to obtain frequency info---inventing reasonably values\n");
+		platform_base_freq = 100000000;
+		itc_ratio.num = 3;
+		itc_ratio.den = 1;
+	}
+#if defined(CONFIG_IA64_LION_HACKS)
+	/* Our Lion currently returns base freq 104.857MHz, which
+	   ain't right (it really is 100MHz).  */
+	printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n",
+	       platform_base_freq, itc_ratio.num, itc_ratio.den,
+	       proc_ratio.num, proc_ratio.den);
+	platform_base_freq = 100000000;
+#elif 0 && defined(CONFIG_IA64_BIGSUR_HACKS)
+	/* BigSur with 991020 firmware returned itc-ratio=9/2 and base
+	   freq 75MHz, which wasn't right.  The 991119 firmware seems
+	   to return the right values, so this isn't necessary
+	   anymore... */
+	printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n",
+	       platform_base_freq, itc_ratio.num, itc_ratio.den,
+	       proc_ratio.num, proc_ratio.den);
+	platform_base_freq = 100000000;
+	proc_ratio.num = 5; proc_ratio.den = 1;
+	itc_ratio.num  = 5; itc_ratio.den  = 1;
+#elif defined(CONFIG_IA64_SOFTSDV_HACKS)
+	platform_base_freq = 10000000;
+	proc_ratio.num = 4; proc_ratio.den = 1;
+	itc_ratio.num  = 4; itc_ratio.den  = 1;
+#else
+	if (platform_base_freq < 40000000) {
+		printk("Platform base frequency %lu bogus---resetting to 75MHz!\n",
+		       platform_base_freq);
+		platform_base_freq = 75000000;
+	}
+#endif
+	if (!proc_ratio.den)
+		proc_ratio.num = 1;	/* avoid division by zero */
+	if (!itc_ratio.den)
+		itc_ratio.num = 1;	/* avoid division by zero */
+
+        itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
+        itm.delta = itc_freq / HZ;
+        printk("timer: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, ITC freq=%lu.%03luMHz\n",
+               platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
+               itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
+
+	my_cpu_data.proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
+	my_cpu_data.itc_freq = itc_freq;
+	my_cpu_data.cyc_per_usec = itc_freq / 1000000;
+	my_cpu_data.usec_per_cyc = (1000000UL << IA64_USEC_PER_CYC_SHIFT) / itc_freq;
+
+	/* Setup the CPU local timer tick */
+	ia64_cpu_local_tick();
+}
+
+void __init
+time_init (void)
+{
+	/*
+	 * Request the IRQ _before_ doing anything to cause that
+	 * interrupt to be posted.
+	 */
+	if (request_irq(TIMER_IRQ, timer_interrupt, 0, "timer", NULL)) 
+		panic("Could not allocate timer IRQ!");
+
+	efi_gettimeofday(&xtime);
+	ia64_init_itm();
+}
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
new file mode 100644
index 000000000..c242622ec
--- /dev/null
+++ b/arch/ia64/kernel/traps.c
@@ -0,0 +1,423 @@
+/*
+ * Architecture-specific trap handling.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * The fpu_fault() handler needs to be able to access and update all
+ * floating point registers.  Those saved in pt_regs can be accessed
+ * through that structure, but those not saved, will be accessed
+ * directly.  To make this work, we need to ensure that the compiler
+ * does not end up using a preserved floating point register on its
+ * own.  The following achieves this by declaring preserved registers
+ * that are not marked as "fixed" as global register variables.
+ */
+register double f2 asm ("f2"); register double f3 asm ("f3");
+register double f4 asm ("f4"); register double f5 asm ("f5");
+
+register long f16 asm ("f16"); register long f17 asm ("f17");
+register long f18 asm ("f18"); register long f19 asm ("f19");
+register long f20 asm ("f20"); register long f21 asm ("f21");
+register long f22 asm ("f22"); register long f23 asm ("f23");
+
+register double f24 asm ("f24"); register double f25 asm ("f25");
+register double f26 asm ("f26"); register double f27 asm ("f27");
+register double f28 asm ("f28"); register double f29 asm ("f29");
+register double f30 asm ("f30"); register double f31 asm ("f31");
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_KDB
+# include <linux/kdb.h>
+#endif
+
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+#include <asm/fpswa.h>
+
+static fpswa_interface_t *fpswa_interface;
+
+void __init
+trap_init (void)
+{
+	printk("fpswa interface at %lx\n", ia64_boot_param.fpswa);
+	if (ia64_boot_param.fpswa) {
+#define OLD_FIRMWARE
+#ifdef OLD_FIRMWARE
+		/*
+		 * HACK to work around broken firmware.  This code
+		 * applies the label fixup to the FPSWA interface and
+		 * works both with old and new (fixed) firmware.
+		 */
+		unsigned long addr = (unsigned long) __va(ia64_boot_param.fpswa);
+		unsigned long gp_val = *(unsigned long *)(addr + 8);
+
+		/* go indirect and indexed to get table address */
+		addr = gp_val;
+		gp_val = *(unsigned long *)(addr + 8);
+
+		while (gp_val == *(unsigned long *)(addr + 8)) {
+			*(unsigned long *)addr |= PAGE_OFFSET;
+			*(unsigned long *)(addr + 8) |= PAGE_OFFSET;
+			addr += 16;
+		}
+#endif
+		/* FPSWA fixup: make the interface pointer a kernel virtual address: */
+		fpswa_interface = __va(ia64_boot_param.fpswa);
+	}
+}
+
+void
+die_if_kernel (char *str, struct pt_regs *regs, long err)
+{
+	if (user_mode(regs)) {
+#if 1
+		/* XXX for debugging only */
+		printk ("!!die_if_kernel: %s(%d): %s %ld\n",
+			current->comm, current->pid, str, err);
+		show_regs(regs);
+#endif
+		return;
+	}
+
+	printk("%s[%d]: %s %ld\n", current->comm, current->pid, str, err);
+
+#ifdef CONFIG_KDB
+	while (1) {
+                kdb(KDB_REASON_PANIC, 0, regs);
+                printk("Cant go anywhere from Panic!\n");
+	}
+#endif
+
+	show_regs(regs);
+
+	if (current->thread.flags & IA64_KERNEL_DEATH) {
+		printk("die_if_kernel recursion detected.\n");
+		sti();
+		while (1);
+	}
+	current->thread.flags |= IA64_KERNEL_DEATH;
+	do_exit(SIGSEGV);
+}
+
+void
+ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
+{
+	siginfo_t siginfo;
+
+	/* gdb uses a break number of 0xccccc for debug breakpoints: */
+	if (break_num != 0xccccc)
+		die_if_kernel("Bad break", regs, break_num);
+
+	siginfo.si_signo = SIGTRAP;
+	siginfo.si_errno = break_num;	/* XXX is it legal to abuse si_errno like this? */
+	siginfo.si_code = TRAP_BRKPT;
+	send_sig_info(SIGTRAP, &siginfo, current);
+}
+
+/*
+ * Unimplemented system calls.  This is called only for stuff that
+ * we're supposed to implement but haven't done so yet.  Everything
+ * else goes to sys_ni_syscall.
+ */
+asmlinkage long
+ia64_ni_syscall (unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3,
+		 unsigned long arg4, unsigned long arg5, unsigned long arg6, unsigned long arg7,
+		 unsigned long stack)
+{
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+
+	printk("<sc%ld(%lx,%lx,%lx,%lx)>\n", regs->r15, arg0, arg1, arg2, arg3);
+	return -ENOSYS;
+}
+
+/*
+ * disabled_fp_fault() is called when a user-level process attempts to
+ * access one of the registers f32..f127 while it doesn't own the
+ * fp-high register partition.  When this happens, we save the current
+ * fph partition in the task_struct of the fpu-owner (if necessary)
+ * and then load the fp-high partition of the current task (if
+ * necessary).
+ */
+static inline void
+disabled_fph_fault (struct pt_regs *regs)
+{
+	struct task_struct *fpu_owner = ia64_get_fpu_owner();
+
+	regs->cr_ipsr &= ~(IA64_PSR_DFH | IA64_PSR_MFH);
+	if (fpu_owner != current) {
+		ia64_set_fpu_owner(current);
+
+		if (fpu_owner && ia64_psr(ia64_task_regs(fpu_owner))->mfh) {
+			fpu_owner->thread.flags |= IA64_THREAD_FPH_VALID;
+			__ia64_save_fpu(fpu_owner->thread.fph);
+		}
+		if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) {
+			__ia64_load_fpu(current->thread.fph);
+		} else {
+			__ia64_init_fpu();
+		}
+	}
+}
+
+static inline int
+fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs,
+	    struct pt_regs *regs)
+{
+	fp_state_t fp_state;
+	fpswa_ret_t ret;
+#ifdef FPSWA_BUG
+	struct ia64_fpreg f6_15[10];
+#endif
+
+	if (!fpswa_interface)
+		return -1;
+
+	memset(&fp_state, 0, sizeof(fp_state_t));
+
+	/*
+	 * compute fp_state.  only FP registers f6 - f11 are used by the 
+	 * kernel, so set those bits in the mask and set the low volatile
+	 * pointer to point to these registers.
+	 */
+	fp_state.bitmask_low64 = 0xffc0;  /* bit6..bit15 */
+#ifndef FPSWA_BUG
+	fp_state.fp_state_low_volatile = &regs->f6;
+#else
+	f6_15[0] = regs->f6;
+	f6_15[1] = regs->f7;
+	f6_15[2] = regs->f8;
+	f6_15[3] = regs->f9;
+ 	__asm__ ("stf.spill %0=f10" : "=m"(f6_15[4]));
+ 	__asm__ ("stf.spill %0=f11" : "=m"(f6_15[5]));
+ 	__asm__ ("stf.spill %0=f12" : "=m"(f6_15[6]));
+ 	__asm__ ("stf.spill %0=f13" : "=m"(f6_15[7]));
+ 	__asm__ ("stf.spill %0=f14" : "=m"(f6_15[8]));
+ 	__asm__ ("stf.spill %0=f15" : "=m"(f6_15[9]));
+	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) f6_15;
+#endif
+        /*
+	 * unsigned long (*EFI_FPSWA) (
+	 *      unsigned long    trap_type,
+	 *	void             *Bundle,
+	 *	unsigned long    *pipsr,
+	 *	unsigned long    *pfsr,
+	 *	unsigned long    *pisr,
+	 *	unsigned long    *ppreds,
+	 *	unsigned long    *pifs,
+	 *	void             *fp_state);
+	 */
+	ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle,
+					(unsigned long *) ipsr, (unsigned long *) fpsr,
+					(unsigned long *) isr, (unsigned long *) pr,
+					(unsigned long *) ifs, &fp_state);
+#ifdef FPSWA_BUG
+ 	__asm__ ("ldf.fill f10=%0" :: "m"(f6_15[4]));
+ 	__asm__ ("ldf.fill f11=%0" :: "m"(f6_15[5]));
+ 	__asm__ ("ldf.fill f12=%0" :: "m"(f6_15[6]));
+ 	__asm__ ("ldf.fill f13=%0" :: "m"(f6_15[7]));
+ 	__asm__ ("ldf.fill f14=%0" :: "m"(f6_15[8]));
+ 	__asm__ ("ldf.fill f15=%0" :: "m"(f6_15[9]));
+	regs->f6 = f6_15[0];
+	regs->f7 = f6_15[1];
+	regs->f8 = f6_15[2];
+	regs->f9 = f6_15[3];
+#endif
+	return ret.status;
+}
+
+/*
+ * Handle floating-point assist faults and traps.
+ */
+static int
+handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
+{
+	long exception, bundle[2];
+	unsigned long fault_ip;
+	static int fpu_swa_count = 0;
+	static unsigned long last_time;
+
+	fault_ip = regs->cr_iip;
+	if (!fp_fault && (ia64_psr(regs)->ri == 0))
+		fault_ip -= 16;
+	if (copy_from_user(bundle, (void *) fault_ip, sizeof(bundle)))
+		return -1;
+
+	if (fpu_swa_count > 5 && jiffies - last_time > 5*HZ)
+		fpu_swa_count = 0;
+	if (++fpu_swa_count < 5) {
+		last_time = jiffies;
+		printk("%s(%d): floating-point assist fault at ip %016lx\n",
+		       current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri);
+	}
+
+	exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
+ 			       &regs->cr_ifs, regs);
+	if (fp_fault) {
+		if (exception == 0) {
+			/* emulation was successful */
+ 			ia64_increment_ip(regs);
+		} else if (exception == -1) {
+			printk("handle_fpu_swa: fp_emulate() returned -1\n");
+			return -2;
+		} else {
+			/* is next instruction a trap? */
+			if (exception & 2) {
+				ia64_increment_ip(regs);
+			}
+			return -1;
+		}
+	} else {
+		if (exception == -1) {
+			printk("handle_fpu_swa: fp_emulate() returned -1\n");
+			return -2;
+		} else if (exception != 0) {
+			/* raise exception */
+			return -1;
+		}
+	}
+	return 0;
+}
+
+void
+ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+	    unsigned long iim, unsigned long itir, unsigned long arg5,
+	    unsigned long arg6, unsigned long arg7, unsigned long stack)
+{
+	struct pt_regs *regs = (struct pt_regs *) &stack;
+	unsigned long code, error = isr;
+	struct siginfo siginfo;
+	char buf[128];
+	int result;
+	static const char *reason[] = {
+		"IA-64 Illegal Operation fault",
+		"IA-64 Privileged Operation fault",
+		"IA-64 Privileged Register fault",
+		"IA-64 Reserved Register/Field fault",
+		"Disabled Instruction Set Transition fault",
+		"Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault",
+		"Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", 
+		"Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
+	};
+
+#if 0
+	/* this is for minimal trust debugging; yeah this kind of stuff is useful at times... */
+
+	if (vector != 25) {
+		static unsigned long last_time;
+		static char count;
+		unsigned long n = vector;
+		char buf[32], *cp;
+
+		if (count > 5 && jiffies - last_time > 5*HZ)
+			count = 0;
+
+		if (count++ < 5) {
+			last_time = jiffies;
+			cp = buf + sizeof(buf);
+			*--cp = '\0';
+			while (n) {
+				*--cp = "0123456789abcdef"[n & 0xf];
+				n >>= 4;
+			}
+			printk("<0x%s>", cp);
+		}
+	}
+#endif
+
+	switch (vector) {
+	      case 24: /* General Exception */
+		code = (isr >> 4) & 0xf;
+		sprintf(buf, "General Exception: %s%s", reason[code],
+			(code == 3) ? ((isr & (1UL << 37))
+				       ? " (RSE access)" : " (data access)") : "");
+#ifndef CONFIG_ITANIUM_ASTEP_SPECIFIC
+		if (code == 8) {
+# ifdef CONFIG_IA64_PRINT_HAZARDS
+			printk("%016lx:possible hazard, pr = %016lx\n", regs->cr_iip, regs->pr);
+# endif
+			return;
+		}
+#endif
+		break;
+
+	      case 25: /* Disabled FP-Register */
+		if (isr & 2) {
+			disabled_fph_fault(regs);
+			return;
+		}
+		sprintf(buf, "Disabled FPL fault---not supposed to happen!");
+		break;
+
+	      case 29: /* Debug */
+	      case 35: /* Taken Branch Trap */
+	      case 36: /* Single Step Trap */
+		switch (vector) {
+		      case 29: siginfo.si_code = TRAP_BRKPT; break;
+		      case 35: siginfo.si_code = TRAP_BRANCH; break;
+		      case 36: siginfo.si_code = TRAP_TRACE; break;
+		}
+		siginfo.si_signo = SIGTRAP;
+		siginfo.si_errno = 0;
+		force_sig_info(SIGTRAP, &siginfo, current);
+		return;
+
+	      case 30: /* Unaligned fault */
+		sprintf(buf, "Unaligned access in kernel mode---don't do this!");
+		break;
+
+	      case 32: /* fp fault */
+	      case 33: /* fp trap */
+		result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
+		if (result < 0) {
+			siginfo.si_signo = SIGFPE;
+			siginfo.si_errno = 0;
+			siginfo.si_code = 0;	/* XXX fix me */
+			siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
+			send_sig_info(SIGFPE, &siginfo, current);
+			if (result == -1)
+				send_sig_info(SIGFPE, &siginfo, current);
+			else
+				force_sig(SIGFPE, current);
+		}
+		return;
+
+	      case 34:		/* Unimplemented Instruction Address Trap */
+		if (user_mode(regs)) {
+			printk("Woah! Unimplemented Instruction Address Trap!\n");
+			siginfo.si_code = ILL_BADIADDR;
+			siginfo.si_signo = SIGILL;
+			siginfo.si_errno = 0;
+			force_sig_info(SIGILL, &siginfo, current);
+			return;
+		}
+		sprintf(buf, "Unimplemented Instruction Address fault");
+		break;
+
+	      case 45:
+		printk("Unexpected IA-32 exception\n");
+		force_sig(SIGSEGV, current);
+		return;
+
+	      case 46:
+		printk("Unexpected IA-32 intercept trap\n");
+		force_sig(SIGSEGV, current);
+		return;
+
+	      case 47:
+		sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
+		break;
+
+	      default:
+		sprintf(buf, "Fault %lu", vector);
+		break;
+	}
+	die_if_kernel(buf, regs, error);
+	force_sig(SIGILL, current);
+}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
new file mode 100644
index 000000000..0bd213f6b
--- /dev/null
+++ b/arch/ia64/kernel/unaligned.c
@@ -0,0 +1,1554 @@
+/*
+ * Architecture-specific unaligned trap handling.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/processor.h>
+#include <asm/unaligned.h>
+
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
+
+#undef DEBUG_UNALIGNED_TRAP
+
+#ifdef DEBUG_UNALIGNED_TRAP
+#define DPRINT(a) { printk("%s, line %d: ", __FUNCTION__, __LINE__); printk a;}
+#else
+#define DPRINT(a)
+#endif
+
+#define IA64_FIRST_STACKED_GR	32
+#define IA64_FIRST_ROTATING_FR	32
+#define SIGN_EXT9		__IA64_UL(0xffffffffffffff00)
+
+/*
+ * For M-unit:
+ *
+ *  opcode |   m  |   x6    |
+ * --------|------|---------|
+ * [40-37] | [36] | [35:30] |
+ * --------|------|---------|
+ *     4   |   1  |    6    | = 11 bits
+ * --------------------------
+ * However bits [31:30] are not directly useful to distinguish between 
+ * load/store so we can use [35:32] instead, which gives the following 
+ * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
+ * checking the m-bit until later in the load/store emulation.
+ */
+#define IA64_OPCODE_MASK	0x1ef00000000
+
+/*
+ * Table C-28 Integer Load/Store
+ * 
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on 
+ * the address (bits [8:3]), so we must failed.
+ */
+#define LD_OP            0x08000000000
+#define LDS_OP           0x08100000000
+#define LDA_OP           0x08200000000
+#define LDSA_OP          0x08300000000
+#define LDBIAS_OP        0x08400000000
+#define LDACQ_OP         0x08500000000
+/* 0x086, 0x087 are not relevant */
+#define LDCCLR_OP        0x08800000000
+#define LDCNC_OP         0x08900000000
+#define LDCCLRACQ_OP     0x08a00000000
+#define ST_OP            0x08c00000000
+#define STREL_OP         0x08d00000000
+/* 0x08e,0x8f are not relevant */
+
+/*
+ * Table C-29 Integer Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-30 Integer Load/Store +Imm
+ * 
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill  must be aligned because the Nat register are based on 
+ * the address, so we must fail and the program must be fixed.
+ */
+#define LD_IMM_OP            0x0a000000000
+#define LDS_IMM_OP           0x0a100000000
+#define LDA_IMM_OP           0x0a200000000
+#define LDSA_IMM_OP          0x0a300000000
+#define LDBIAS_IMM_OP        0x0a400000000
+#define LDACQ_IMM_OP         0x0a500000000
+/* 0x0a6, 0xa7 are not relevant */
+#define LDCCLR_IMM_OP        0x0a800000000
+#define LDCNC_IMM_OP         0x0a900000000
+#define LDCCLRACQ_IMM_OP     0x0aa00000000
+#define ST_IMM_OP            0x0ac00000000
+#define STREL_IMM_OP         0x0ad00000000
+/* 0x0ae,0xaf are not relevant */
+
+/*
+ * Table C-32 Floating-point Load/Store
+ */
+#define LDF_OP           0x0c000000000
+#define LDFS_OP          0x0c100000000
+#define LDFA_OP          0x0c200000000
+#define LDFSA_OP         0x0c300000000
+/* 0x0c6 is irrelevant */
+#define LDFCCLR_OP       0x0c800000000
+#define LDFCNC_OP        0x0c900000000
+/* 0x0cb is irrelevant  */
+#define STF_OP           0x0cc00000000
+
+/*
+ * Table C-33 Floating-point Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-34 Floating-point Load/Store +Imm
+ */
+#define LDF_IMM_OP       0x0e000000000
+#define LDFS_IMM_OP      0x0e100000000
+#define LDFA_IMM_OP      0x0e200000000
+#define LDFSA_IMM_OP     0x0e300000000
+/* 0x0e6 is irrelevant */
+#define LDFCCLR_IMM_OP   0x0e800000000
+#define LDFCNC_IMM_OP    0x0e900000000
+#define STF_IMM_OP       0x0ec00000000
+
+typedef struct {
+	unsigned long  	 qp:6;	/* [0:5]   */
+	unsigned long    r1:7;	/* [6:12]  */
+	unsigned long   imm:7;	/* [13:19] */
+	unsigned long    r3:7;	/* [20:26] */
+	unsigned long     x:1;  /* [27:27] */
+	unsigned long  hint:2;	/* [28:29] */
+	unsigned long x6_sz:2;	/* [30:31] */
+	unsigned long x6_op:4;	/* [32:35], x6 = x6_sz|x6_op */
+	unsigned long     m:1;	/* [36:36] */
+	unsigned long    op:4;	/* [37:40] */
+	unsigned long   pad:23; /* [41:63] */
+} load_store_t;
+
+
+typedef enum {
+	UPD_IMMEDIATE,	/* ldXZ r1=[r3],imm(9) */
+	UPD_REG		/* ldXZ r1=[r3],r2     */
+} update_t;
+
+/*
+ * We use tables to keep track of the offsets of registers in the saved state.
+ * This way we save having big switch/case statements.
+ *
+ * We use bit 0 to indicate switch_stack or pt_regs.
+ * The offset is simply shifted by 1 bit.
+ * A 2-byte value should be enough to hold any kind of offset
+ *
+ * In case the calling convention changes (and thus pt_regs/switch_stack)
+ * simply use RSW instead of RPT or vice-versa.
+ */
+
+#define RPO(x)	((size_t) &((struct pt_regs *)0)->x)
+#define RSO(x)	((size_t) &((struct switch_stack *)0)->x)
+
+#define RPT(x)		(RPO(x) << 1)
+#define RSW(x)		(1| RSO(x)<<1)
+
+#define GR_OFFS(x)	(gr_info[x]>>1)
+#define GR_IN_SW(x)	(gr_info[x] & 0x1)
+
+#define FR_OFFS(x)	(fr_info[x]>>1)
+#define FR_IN_SW(x)	(fr_info[x] & 0x1)
+
+static u16 gr_info[32]={
+	0, 			/* r0 is read-only : WE SHOULD NEVER GET THIS */
+
+	RPT(r1), RPT(r2), RPT(r3),
+
+	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
+
+	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+
+	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+static u16 fr_info[32]={
+	0, 			/* constant : WE SHOULD NEVER GET THIS */
+	0,			/* constant : WE SHOULD NEVER GET THIS */
+
+	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
+
+	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
+
+	RSW(f10), RSW(f11), RSW(f12), RSW(f13), RSW(f14),
+	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
+	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
+	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
+	RSW(f30), RSW(f31)
+};
+
+/* Invalidate ALAT entry for integer register REGNO.  */
+static void
+invala_gr (int regno)
+{
+#	define F(reg)	case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break
+
+	switch (regno) {
+		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
+		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+	}
+#	undef F
+}
+
+/* Invalidate ALAT entry for floating-point register REGNO.  */
+static void
+invala_fr (int regno)
+{
+#	define F(reg)	case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break
+
+	switch (regno) {
+		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
+		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+	}
+#	undef F
+}
+
+static void
+set_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
+	unsigned long *kbs	= ((unsigned long *)current) + IA64_RBS_OFFSET/8;
+	unsigned long on_kbs;
+	unsigned long *bsp, *bspstore, *addr, *ubs_end, *slot;
+	unsigned long rnats;
+	long nlocals;
+
+	/*
+	 * cr_ifs=[rv:ifm], ifm=[....:sof(6)]
+	 * nlocal=number of locals (in+loc) register of the faulting function
+	 */
+	nlocals = (regs->cr_ifs) & 0x7f;
+
+	DPRINT(("sw.bsptore=%lx pt.bspstore=%lx\n", sw->ar_bspstore, regs->ar_bspstore));
+	DPRINT(("cr.ifs=%lx sof=%ld sol=%ld\n",
+		regs->cr_ifs, regs->cr_ifs &0x7f, (regs->cr_ifs>>7)&0x7f));
+
+	on_kbs   = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore);
+	bspstore = (unsigned long *)regs->ar_bspstore;
+
+	DPRINT(("rse_slot_num=0x%lx\n",ia64_rse_slot_num((unsigned long *)sw->ar_bspstore)));
+	DPRINT(("kbs=%p nlocals=%ld\n", kbs, nlocals));
+	DPRINT(("bspstore next rnat slot %p\n",
+		ia64_rse_rnat_addr((unsigned long *)sw->ar_bspstore)));
+	DPRINT(("on_kbs=%ld rnats=%ld\n",
+		on_kbs, ((sw->ar_bspstore-(unsigned long)kbs)>>3) - on_kbs));
+
+	/*
+	 * See get_rse_reg() for an explanation on the following instructions
+	 */
+	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+	bsp     = ia64_rse_skip_regs(ubs_end, -nlocals);
+	addr    = slot = ia64_rse_skip_regs(bsp, r1 - 32);
+
+	DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n",
+		ubs_end, bsp, addr, ia64_rse_slot_num(addr)));
+
+	ia64_poke(regs, current, (unsigned long)addr, val);
+
+	/*
+	 * addr will now contain the address of the RNAT for the register
+	 */
+	addr = ia64_rse_rnat_addr(addr);
+
+	ia64_peek(regs, current, (unsigned long)addr, &rnats);
+	DPRINT(("rnat @%p = 0x%lx nat=%d rnatval=%lx\n",
+		addr, rnats, nat, rnats &ia64_rse_slot_num(slot)));
+	
+	if ( nat ) {
+		rnats |= __IA64_UL(1) << ia64_rse_slot_num(slot);
+	} else {
+		rnats &= ~(__IA64_UL(1) << ia64_rse_slot_num(slot));
+	}
+	ia64_poke(regs, current, (unsigned long)addr, rnats);
+
+	DPRINT(("rnat changed to @%p = 0x%lx\n", addr, rnats));
+}
+
+
+static void
+get_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
+	unsigned long *kbs	= (unsigned long *)current + IA64_RBS_OFFSET/8;
+	unsigned long on_kbs;
+	long nlocals;
+	unsigned long *bsp, *addr, *ubs_end, *slot, *bspstore;
+	unsigned long rnats;
+
+	/*
+	 * cr_ifs=[rv:ifm], ifm=[....:sof(6)]
+	 * nlocals=number of local registers in the faulting function
+	 */
+	nlocals = (regs->cr_ifs) & 0x7f;
+
+	/*
+	 * save_switch_stack does a flushrs and saves bspstore.
+	 * on_kbs = actual number of registers saved on kernel backing store
+	 *          (taking into accound potential RNATs)
+	 *
+	 * Note that this number can be greater than nlocals if the dirty
+	 * parititions included more than one stack frame at the time we
+	 * switched to KBS
+	 */
+	on_kbs   = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore);
+	bspstore = (unsigned long *)regs->ar_bspstore;
+
+	/*
+	 * To simplify the logic, we calculate everything as if there was only
+	 * one backing store i.e., the user one (UBS). We let it to peek/poke
+	 * to figure out whether the register we're looking for really is
+	 * on the UBS or on KBS.
+	 *
+	 * regs->ar_bsptore = address of last register saved on UBS (before switch)
+	 *
+	 * ubs_end = virtual end of the UBS (if everything had been spilled there)
+	 *
+	 * We know that ubs_end is the point where the last register on the
+	 * stack frame we're interested in as been saved. So we need to walk
+	 * our way backward to figure out what the BSP "was" for that frame,
+	 * this will give us the location of r32. 
+	 *
+	 * bsp = "virtual UBS" address of r32 for our frame
+	 *
+	 * Finally, get compute the address of the register we're looking for
+	 * using bsp as our base (move up again).
+	 *
+	 * Please note that in our case, we know that the register is necessarily
+	 * on the KBS because we are only interested in the current frame at the moment
+	 * we got the exception i.e., bsp is not changed until we switch to KBS.
+	 */
+	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+	bsp     = ia64_rse_skip_regs(ubs_end, -nlocals);
+	addr    = slot = ia64_rse_skip_regs(bsp, r1 - 32);
+
+	DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n",
+		ubs_end, bsp, addr, ia64_rse_slot_num(addr)));
+	
+	ia64_peek(regs, current, (unsigned long)addr, val);
+
+	/*
+	 * addr will now contain the address of the RNAT for the register
+	 */
+	addr = ia64_rse_rnat_addr(addr);
+
+	ia64_peek(regs, current, (unsigned long)addr, &rnats);
+	DPRINT(("rnat @%p = 0x%lx\n", addr, rnats));
+	
+	if ( nat ) *nat = rnats >> ia64_rse_slot_num(slot) & 0x1;
+}
+
+
+static void
+setreg(unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs -1;
+	unsigned long addr;
+	unsigned long bitmask;
+	unsigned long *unat;
+
+
+	/*
+	 * First takes care of stacked registers
+	 */
+ 	if ( regnum >= IA64_FIRST_STACKED_GR ) {
+		set_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Using r0 as a target raises a General Exception fault which has 
+	 * higher priority than the Unaligned Reference fault.
+	 */ 
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	if ( GR_IN_SW(regnum) ) {
+		addr = (unsigned long)sw;
+		unat = &sw->ar_unat;
+	} else {
+		addr = (unsigned long)regs;
+		unat = &sw->caller_unat;
+	}
+	DPRINT(("tmp_base=%lx switch_stack=%s offset=%d\n",
+		addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)));
+	/*
+	 * add offset from base of struct
+	 * and do it !
+	 */
+	addr += GR_OFFS(regnum);
+
+	*(unsigned long *)addr = val;
+
+	/*
+	 * We need to clear the corresponding UNAT bit to fully emulate the load
+	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+	 */
+	bitmask   = __IA64_UL(1) << (addr >> 3 & 0x3f);
+	DPRINT(("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, unat, *unat));
+	if ( nat ) {
+		*unat |= bitmask;
+	} else {
+		*unat &= ~bitmask;
+	}
+	DPRINT(("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, unat,*unat));
+}
+
+#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
+
+static void
+setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
+	unsigned long addr;
+
+	/*
+	 * From EAS-2.5: FPDisableFault has higher priority than 
+	 * Unaligned Fault. Thus, when we get here, we know the partition is 
+	 * enabled.
+	 *
+	 * The registers [32-127] are ususally saved in the tss. When get here,
+	 * they are NECESSARY live because they are only saved explicitely.
+	 * We have 3 ways of updating the values: force a save of the range
+	 * in tss, use a gigantic switch/case statement or generate code on the
+	 * fly to store to the right register.
+	 * For now, we are using the (slow) save/restore way.
+	 */
+ 	if ( regnum >= IA64_FIRST_ROTATING_FR ) {
+		/*
+		 * force a save of [32-127] to tss
+		 * we use the __() form to avoid fiddling with the dfh bit
+		 */
+		__ia64_save_fpu(&current->thread.fph[0]);
+
+		current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
+
+		__ia64_load_fpu(&current->thread.fph[0]);
+
+		/*
+	 	 * mark the high partition as being used now
+		 *
+		 * This is REQUIRED because the disabled_fph_fault() does
+		 * not set it, it's relying on the faulting instruction to
+		 * do it. In our case the faulty instruction never gets executed
+		 * completely, so we need to toggle the bit.
+	 	 */
+		regs->cr_ipsr |= IA64_PSR_MFH;
+	} else {
+		/*
+		 * pt_regs or switch_stack ?
+		 */
+		if ( FR_IN_SW(regnum) ) {
+			addr = (unsigned long)sw;
+		} else {
+			addr = (unsigned long)regs;
+		}
+		
+		DPRINT(("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)));
+
+		addr += FR_OFFS(regnum);
+		*(struct ia64_fpreg *)addr = *fpval;
+
+		/*
+	 	 * mark the low partition as being used now
+		 *
+		 * It is highly unlikely that this bit is not already set, but
+		 * let's do it for safety.
+	 	 */
+		regs->cr_ipsr |= IA64_PSR_MFL;
+
+	}
+}
+
+/*
+ * Those 2 inline functions generate the spilled versions of the constant floating point
+ * registers which can be used with stfX
+ */
+static inline void 
+float_spill_f0(struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");
+}
+
+static inline void 
+float_spill_f1(struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");
+}
+
+static void
+getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs -1;
+	unsigned long addr;
+
+	/*
+	 * From EAS-2.5: FPDisableFault has higher priority than 
+	 * Unaligned Fault. Thus, when we get here, we know the partition is 
+	 * enabled.
+	 *
+	 * When regnum > 31, the register is still live and
+	 * we need to force a save to the tss to get access to it.
+	 * See discussion in setfpreg() for reasons and other ways of doing this.
+	 */
+ 	if ( regnum >= IA64_FIRST_ROTATING_FR ) {
+	
+		/*
+		 * force a save of [32-127] to tss
+		 * we use the__ia64_save_fpu() form to avoid fiddling with
+		 * the dfh bit.
+		 */
+		__ia64_save_fpu(&current->thread.fph[0]);
+
+		*fpval =  current->thread.fph[IA64_FPH_OFFS(regnum)];
+	} else {
+		/*
+		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
+	 	 * not saved, we must generate their spilled form on the fly
+		 */
+		switch(regnum) {
+		case 0:
+			float_spill_f0(fpval);
+			break;
+		case 1:
+			float_spill_f1(fpval);
+			break;
+		default:
+			/*
+			 * pt_regs or switch_stack ?
+			 */
+			addr =  FR_IN_SW(regnum) ? (unsigned long)sw
+						 : (unsigned long)regs;
+
+			DPRINT(("is_sw=%d tmp_base=%lx offset=0x%x\n",
+				FR_IN_SW(regnum), addr, FR_OFFS(regnum)));
+
+			addr  += FR_OFFS(regnum);
+			*fpval = *(struct ia64_fpreg *)addr;
+		}
+	}
+}
+
+
+static void
+getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *)regs -1;
+	unsigned long addr, *unat;
+
+ 	if ( regnum >= IA64_FIRST_STACKED_GR ) {
+		get_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * take care of r0 (read-only always evaluate to 0)
+	 */
+	if ( regnum == 0 ) {
+		*val = 0;
+		*nat = 0;
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	if ( GR_IN_SW(regnum) ) {
+		addr = (unsigned long)sw;
+		unat = &sw->ar_unat;
+	} else {
+		addr = (unsigned long)regs;
+		unat = &sw->caller_unat;
+	}
+
+	DPRINT(("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum)));
+
+	addr += GR_OFFS(regnum);
+
+	*val  = *(unsigned long *)addr;
+
+	/*
+	 * do it only when requested
+	 */
+	if ( nat ) *nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
+}
+
+static void
+emulate_load_updates(update_t type, load_store_t *ld, struct pt_regs *regs, unsigned long ifa)
+{		
+	/*
+	 * IMPORTANT: 
+	 * Given the way we handle unaligned speculative loads, we should
+	 * not get to this point in the code but we keep this sanity check,
+	 * just in case.
+	 */
+	if ( ld->x6_op == 1 || ld->x6_op == 3 ) {
+		printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n");	
+		die_if_kernel("unaligned reference on specualtive load with register update\n",
+			      regs, 30);
+	}
+
+
+	/*
+	 * at this point, we know that the base register to update is valid i.e.,
+	 * it's not r0
+	 */
+	if ( type == UPD_IMMEDIATE ) {
+		unsigned long imm;
+
+		/* 
+	 	 * Load +Imm: ldXZ r1=[r3],imm(9)
+	 	 *
+		 *
+	   	 * form imm9: [13:19] contain the first 7 bits
+      		 */		 
+		imm = ld->x << 7 | ld->imm;
+
+		/*
+		 * sign extend (1+8bits) if m set
+		 */
+		if (ld->m) imm |= SIGN_EXT9;
+
+		/*
+		 * ifa == r3 and we know that the NaT bit on r3 was clear so
+		 * we can directly use ifa.
+		 */
+		ifa += imm;
+
+		setreg(ld->r3, ifa, 0, regs);
+
+		DPRINT(("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld->x, ld->m, imm, ifa));
+
+	} else if ( ld->m ) {
+		unsigned long r2;
+		int nat_r2;
+
+		/*
+		 * Load +Reg Opcode: ldXZ r1=[r3],r2
+		 *
+		 * Note: that we update r3 even in the case of ldfX.a 
+		 * (where the load does not happen)
+		 *
+		 * The way the load algorithm works, we know that r3 does not
+		 * have its NaT bit set (would have gotten NaT consumption
+		 * before getting the unaligned fault). So we can use ifa 
+		 * which equals r3 at this point.
+		 *
+		 * IMPORTANT:
+	 	 * The above statement holds ONLY because we know that we
+		 * never reach this code when trying to do a ldX.s.
+		 * If we ever make it to here on an ldfX.s then 
+		 */
+		getreg(ld->imm, &r2, &nat_r2, regs);
+		
+		ifa += r2;
+		
+		/*
+		 * propagate Nat r2 -> r3
+		 */
+		setreg(ld->r3, ifa, nat_r2, regs);
+
+		DPRINT(("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld->imm, r2, ifa, nat_r2));
+	}
+}
+
+
+static int
+emulate_load_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+	unsigned long val;
+	unsigned int len = 1<< ld->x6_sz;
+
+	/*
+	 * the macro supposes sequential access (which is the case)
+	 * if the first byte is an invalid address we return here. Otherwise
+	 * there is a guard page at the top of the user's address page and 
+	 * the first access would generate a NaT consumption fault and return
+	 * with a SIGSEGV, which is what we want.
+	 *
+	 * Note: the first argument is ignored 
+	 */
+	if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) {
+		DPRINT(("verify area failed on %lx\n", ifa));
+		return -1;
+	}
+
+	/*
+	 * r0, as target, doesn't need to be checked because Illegal Instruction
+	 * faults have higher priority than unaligned faults.
+	 *
+	 * r0 cannot be found as the base as it would never generate an 
+	 * unaligned reference.
+	 */
+
+	/*
+	 * ldX.a we don't try to emulate anything but we must
+	 * invalidate the ALAT entry.
+	 * See comment below for explanation on how we handle ldX.a
+	 */
+	if ( ld->x6_op != 0x2 ) {
+		/*
+		 * we rely on the macros in unaligned.h for now i.e.,
+		 * we let the compiler figure out how to read memory gracefully.
+		 *
+		 * We need this switch/case because the way the inline function
+		 * works. The code is optimized by the compiler and looks like
+		 * a single switch/case.
+		 */
+		switch(len) {
+			case 2:
+				val = ia64_get_unaligned((void *)ifa, 2);
+				break;
+			case 4:
+				val = ia64_get_unaligned((void *)ifa, 4);
+				break;
+			case 8:
+				val = ia64_get_unaligned((void *)ifa, 8);
+				break;
+			default:
+				DPRINT(("unknown size: x6=%d\n", ld->x6_sz));
+				return -1;
+		}
+
+		setreg(ld->r1, val, 0, regs);
+	}
+
+	/*
+	 * check for updates on any kind of loads
+	 */
+	if ( ld->op == 0x5 || ld->m )
+		emulate_load_updates(ld->op == 0x5 ? UPD_IMMEDIATE: UPD_REG, 
+				ld, regs, ifa);
+
+	/*
+	 * handling of various loads (based on EAS2.4):
+	 *
+	 * ldX.acq (ordered load):
+	 *	- acquire semantics would have been used, so force fence instead.
+	 *
+	 *
+	 * ldX.c.clr (check load and clear):
+	 *	- if we get to this handler, it's because the entry was not in the ALAT.
+	 *	  Therefore the operation reverts to a normal load
+	 *
+	 * ldX.c.nc (check load no clear):
+	 *	- same as previous one
+	 *
+	 * ldX.c.clr.acq (ordered check load and clear):
+	 *	- same as above for c.clr part. The load needs to have acquire semantics. So
+	 *	  we use the fence semantics which is stronger and thus ensures correctness.
+	 *	
+	 * ldX.a (advanced load):
+	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the 
+	 * 	  address doesn't match requested size alignement. This means that we would 
+	 *	  possibly need more than one load to get the result.
+	 *
+	 *	  The load part can be handled just like a normal load, however the difficult
+	 *	  part is to get the right thing into the ALAT. The critical piece of information
+	 * 	  in the base address of the load & size. To do that, a ld.a must be executed,
+	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
+	 *	  if we use the same target register, we will be okay for the check.a instruction.
+	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
+	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the
+	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good
+	 *	  enough, take the following example:
+	 *		r3=3
+	 *		ld4.a r1=[r3]
+	 *
+	 *	  Could be emulated by doing:
+	 *		ld1.a r1=[r3],1
+	 *		store to temporary;
+	 *		ld1.a r1=[r3],1
+	 *		store & shift to temporary;
+	 *		ld1.a r1=[r3],1
+	 *		store & shift to temporary;
+	 *		ld1.a r1=[r3]
+	 *		store & shift to temporary;
+	 * 		r1=temporary
+	 *
+	 *	  So int this case, you would get the right value is r1 but the wrong info in
+	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3
+	 *	  but you would still get the size wrong.  To get the size right, one needs to
+	 *	  execute exactly the same kind of load. You could do it from a aligned
+	 *	  temporary location, but you would get the address wrong.
+	 *
+	 *	  So no matter what, it is not possible to emulate an advanced load
+	 *	  correctly. But is that really critical ?
+	 *
+	 *
+	 *	  Now one has to look at how ld.a is used, one must either do a ld.c.* or
+	 *	  chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
+	 *	  entry found in ALAT), and that's perfectly ok because:
+	 *
+	 *		- ld.c.*, if the entry is not present a  normal load is executed
+	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code
+	 *
+	 *	  In either case, the load can be potentially retried in another form.
+	 *
+	 *	  So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT
+	 *	  must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up
+	 *	  a stale entry later) The register base update MUST also be performed.
+	 *	  
+	 *	  Now what is the content of the register and its NaT bit in the case we don't
+	 *	  do the load ?  EAS2.4, says (in case an actual load is needed)
+	 *
+	 *		- r1 = [r3], Nat = 0 if succeeds
+	 *		- r1 = 0 Nat = 0 if trying to access non-speculative memory
+	 *
+	 *	  For us, there is nothing to do, because both ld.c.* and chk.a.* are going to
+	 *	  retry and thus eventually reload the register thereby changing Nat and
+	 *	  register content.
+	 */
+
+	/*
+	 * when the load has the .acq completer then 
+	 * use ordering fence.
+	 */
+	if (ld->x6_op == 0x5 || ld->x6_op == 0xa)
+		mb();
+
+	/*
+	 * invalidate ALAT entry in case of advanced load
+	 */
+	if (ld->x6_op == 0x2)
+		invala_gr(ld->r1);
+
+	return 0;
+}
+
+static int
+emulate_store_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+	unsigned long r2;
+	unsigned int len = 1<< ld->x6_sz;
+	
+	/*
+	 * the macro supposes sequential access (which is the case)
+	 * if the first byte is an invalid address we return here. Otherwise
+	 * there is a guard page at the top of the user's address page and 
+	 * the first access would generate a NaT consumption fault and return
+	 * with a SIGSEGV, which is what we want.
+	 *
+	 * Note: the first argument is ignored 
+	 */
+	if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) {
+		DPRINT(("verify area failed on %lx\n",ifa));
+		return -1;
+	}
+
+	/*
+	 * if we get to this handler, Nat bits on both r3 and r2 have already
+	 * been checked. so we don't need to do it
+	 *
+	 * extract the value to be stored
+	 */
+	getreg(ld->imm, &r2, 0, regs);
+
+	/*
+	 * we rely on the macros in unaligned.h for now i.e.,
+	 * we let the compiler figure out how to read memory gracefully.
+	 *
+	 * We need this switch/case because the way the inline function
+	 * works. The code is optimized by the compiler and looks like
+	 * a single switch/case.
+	 */
+	DPRINT(("st%d [%lx]=%lx\n", len, ifa, r2));
+
+	switch(len) {
+		case 2:
+			ia64_put_unaligned(r2, (void *)ifa, 2);
+			break;
+		case 4:
+			ia64_put_unaligned(r2, (void *)ifa, 4);
+			break;
+		case 8:
+			ia64_put_unaligned(r2, (void *)ifa, 8);
+			break;
+		default:
+			DPRINT(("unknown size: x6=%d\n", ld->x6_sz));
+			return -1;
+	}
+	/*
+	 * stX [r3]=r2,imm(9)
+	 *
+	 * NOTE:
+	 * ld->r3 can never be r0, because r0 would not generate an 
+	 * unaligned access.
+	 */
+	if ( ld->op == 0x5 ) {
+		unsigned long imm;
+
+		/*
+		 * form imm9: [12:6] contain first 7bits
+		 */
+		imm = ld->x << 7 | ld->r1;
+		/*
+		 * sign extend (8bits) if m set
+		 */
+		if ( ld->m ) imm |= SIGN_EXT9; 
+		/*
+		 * ifa == r3 (NaT is necessarily cleared)
+		 */
+		ifa += imm;
+
+		DPRINT(("imm=%lx r3=%lx\n", imm, ifa));
+	
+		setreg(ld->r3, ifa, 0, regs);
+	}
+	/*
+	 * we don't have alat_invalidate_multiple() so we need
+	 * to do the complete flush :-<<
+	 */
+	ia64_invala();
+
+	/*
+	 * stX.rel: use fence instead of release
+	 */
+	if ( ld->x6_op == 0xd ) mb();
+
+	return 0;
+}
+
+/*
+ * floating point operations sizes in bytes
+ */
+static const unsigned short float_fsz[4]={
+	16, /* extended precision (e) */
+	8,  /* integer (8)            */
+	4,  /* single precision (s)   */
+	8   /* double precision (d)   */
+};
+
+static inline void 
+mem2float_extended(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"
+			      :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void 
+mem2float_integer(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"
+			      :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void 
+mem2float_single(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"
+			      :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void 
+mem2float_double(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"
+			      :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void 
+float2mem_extended(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"
+			      :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void 
+float2mem_integer(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"
+			      :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void 
+float2mem_single(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"
+			      :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void 
+float2mem_double(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"
+			      :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static int
+emulate_load_floatpair(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+	struct ia64_fpreg fpr_init[2];
+	struct ia64_fpreg fpr_final[2];
+	unsigned long len = float_fsz[ld->x6_sz];
+
+	if ( access_ok(VERIFY_READ, (void *)ifa, len<<1) < 0 ) {
+		DPRINT(("verify area failed on %lx\n", ifa));
+		return -1;
+	}
+	/*
+	 * fr0 & fr1 don't need to be checked because Illegal Instruction
+	 * faults have higher priority than unaligned faults.
+	 *
+	 * r0 cannot be found as the base as it would never generate an 
+	 * unaligned reference.
+	 */
+
+	/* 
+	 * make sure we get clean buffers
+	 */
+	memset(&fpr_init,0, sizeof(fpr_init));
+	memset(&fpr_final,0, sizeof(fpr_final));
+
+	/*
+	 * ldfpX.a: we don't try to emulate anything but we must
+	 * invalidate the ALAT entry and execute updates, if any.
+	 */
+	if ( ld->x6_op != 0x2 ) {
+		/*
+		 * does the unaligned access
+		 */
+		memcpy(&fpr_init[0], (void *)ifa, len);
+		memcpy(&fpr_init[1], (void *)(ifa+len), len);
+
+		DPRINT(("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld->r1, ld->imm, ld->x6_sz));
+#ifdef DEBUG_UNALIGNED_TRAP
+		{ int i; char *c = (char *)&fpr_init;
+			printk("fpr_init= ");
+			for(i=0; i < len<<1; i++ ) {
+				printk("%02x ", c[i]&0xff);
+			}
+			printk("\n");
+		}
+#endif
+		/*
+		 * XXX fixme
+		 * Could optimize inlines by using ldfpX & 2 spills 
+		 */
+		switch( ld->x6_sz ) {
+			case 0:
+				mem2float_extended(&fpr_init[0], &fpr_final[0]);
+				mem2float_extended(&fpr_init[1], &fpr_final[1]);
+				break;
+			case 1:
+				mem2float_integer(&fpr_init[0], &fpr_final[0]);
+				mem2float_integer(&fpr_init[1], &fpr_final[1]);
+				break;
+			case 2:
+				mem2float_single(&fpr_init[0], &fpr_final[0]);
+				mem2float_single(&fpr_init[1], &fpr_final[1]);
+				break;
+			case 3:
+				mem2float_double(&fpr_init[0], &fpr_final[0]);
+				mem2float_double(&fpr_init[1], &fpr_final[1]);
+				break;
+		}
+#ifdef DEBUG_UNALIGNED_TRAP
+		{ int i; char *c = (char *)&fpr_final;
+			printk("fpr_final= ");
+			for(i=0; i < len<<1; i++ ) {
+				printk("%02x ", c[i]&0xff);
+			}
+			printk("\n");
+		}
+#endif
+		/*
+		 * XXX fixme
+		 *
+		 * A possible optimization would be to drop fpr_final
+		 * and directly use the storage from the saved context i.e.,
+		 * the actual final destination (pt_regs, switch_stack or tss).
+		 */
+		setfpreg(ld->r1, &fpr_final[0], regs);
+		setfpreg(ld->imm, &fpr_final[1], regs);
+	}
+
+	/*
+	 * Check for updates: only immediate updates are available for this
+	 * instruction.
+	 */
+	if ( ld->m ) {
+
+		/*
+		 * the immediate is implicit given the ldsz of the operation:
+		 * single: 8 (2x4) and for  all others it's 16 (2x8)
+		 */
+		ifa += len<<1;
+
+		/*
+		 * IMPORTANT: 
+		 * the fact that we force the NaT of r3 to zero is ONLY valid
+		 * as long as we don't come here with a ldfpX.s.
+		 * For this reason we keep this sanity check
+		 */
+		if ( ld->x6_op == 1 || ld->x6_op == 3 ) {
+			printk(KERN_ERR "%s: register update on speculative load pair, error\n", __FUNCTION__);	
+		}
+
+
+		setreg(ld->r3, ifa, 0, regs);
+	}
+
+	/*
+	 * Invalidate ALAT entries, if any, for both registers.
+	 */
+	if ( ld->x6_op == 0x2 ) {
+		invala_fr(ld->r1);
+		invala_fr(ld->imm);
+	}
+	return 0;
+}
+
+
+static int
+emulate_load_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+	struct ia64_fpreg fpr_init;
+	struct ia64_fpreg fpr_final;
+	unsigned long len = float_fsz[ld->x6_sz];
+
+	/*
+	 * check for load pair because our masking scheme is not fine grain enough
+	if ( ld->x == 1 ) return emulate_load_floatpair(ifa,ld,regs);
+	 */
+
+	if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) {
+		DPRINT(("verify area failed on %lx\n", ifa));
+		return -1;
+	}
+	/*
+	 * fr0 & fr1 don't need to be checked because Illegal Instruction
+	 * faults have higher priority than unaligned faults.
+	 *
+	 * r0 cannot be found as the base as it would never generate an 
+	 * unaligned reference.
+	 */
+
+
+	/* 
+	 * make sure we get clean buffers
+	 */
+	memset(&fpr_init,0, sizeof(fpr_init));
+	memset(&fpr_final,0, sizeof(fpr_final));
+
+	/*
+	 * ldfX.a we don't try to emulate anything but we must
+	 * invalidate the ALAT entry.
+	 * See comments in ldX for descriptions on how the various loads are handled.
+	 */
+	if ( ld->x6_op != 0x2 ) {
+
+		/*
+		 * does the unaligned access
+		 */
+		memcpy(&fpr_init, (void *)ifa, len);
+
+		DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz));
+#ifdef DEBUG_UNALIGNED_TRAP
+		{ int i; char *c = (char *)&fpr_init;
+			printk("fpr_init= ");
+			for(i=0; i < len; i++ ) {
+				printk("%02x ", c[i]&0xff);
+			}
+			printk("\n");
+		}
+#endif
+		/*
+		 * we only do something for x6_op={0,8,9}
+		 */
+		switch( ld->x6_sz ) {
+			case 0:
+				mem2float_extended(&fpr_init, &fpr_final);
+				break;
+			case 1:
+				mem2float_integer(&fpr_init, &fpr_final);
+				break;
+			case 2:
+				mem2float_single(&fpr_init, &fpr_final);
+				break;
+			case 3:
+				mem2float_double(&fpr_init, &fpr_final);
+				break;
+		}
+#ifdef DEBUG_UNALIGNED_TRAP
+		{ int i; char *c = (char *)&fpr_final;
+			printk("fpr_final= ");
+			for(i=0; i < len; i++ ) {
+				printk("%02x ", c[i]&0xff);
+			}
+			printk("\n");
+		}
+#endif
+		/*
+		 * XXX fixme
+		 *
+		 * A possible optimization would be to drop fpr_final
+		 * and directly use the storage from the saved context i.e.,
+		 * the actual final destination (pt_regs, switch_stack or tss).
+		 */
+		setfpreg(ld->r1, &fpr_final, regs);
+	}
+
+	/*
+	 * check for updates on any loads
+	 */
+	if ( ld->op == 0x7 || ld->m )
+		emulate_load_updates(ld->op == 0x7 ? UPD_IMMEDIATE: UPD_REG, 
+				ld, regs, ifa);
+
+
+	/*
+	 * invalidate ALAT entry in case of advanced floating point loads
+	 */
+	if (ld->x6_op == 0x2)
+		invala_fr(ld->r1);
+
+	return 0;
+}
+
+
+static int
+emulate_store_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+	struct ia64_fpreg fpr_init;
+	struct ia64_fpreg fpr_final;
+	unsigned long len = float_fsz[ld->x6_sz];
+	
+	/*
+	 * the macro supposes sequential access (which is the case)
+	 * if the first byte is an invalid address we return here. Otherwise
+	 * there is a guard page at the top of the user's address page and 
+	 * the first access would generate a NaT consumption fault and return
+	 * with a SIGSEGV, which is what we want.
+	 *
+	 * Note: the first argument is ignored 
+	 */
+	if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) {
+		DPRINT(("verify area failed on %lx\n",ifa));
+		return -1;
+	}
+
+	/* 
+	 * make sure we get clean buffers
+	 */
+	memset(&fpr_init,0, sizeof(fpr_init));
+	memset(&fpr_final,0, sizeof(fpr_final));
+
+
+	/*
+	 * if we get to this handler, Nat bits on both r3 and r2 have already
+	 * been checked. so we don't need to do it
+	 *
+	 * extract the value to be stored
+	 */
+	getfpreg(ld->imm, &fpr_init, regs);
+	/*
+	 * during this step, we extract the spilled registers from the saved
+	 * context i.e., we refill. Then we store (no spill) to temporary
+	 * aligned location
+	 */
+	switch( ld->x6_sz ) {
+		case 0:
+			float2mem_extended(&fpr_init, &fpr_final);
+			break;
+		case 1:
+			float2mem_integer(&fpr_init, &fpr_final);
+			break;
+		case 2:
+			float2mem_single(&fpr_init, &fpr_final);
+			break;
+		case 3:
+			float2mem_double(&fpr_init, &fpr_final);
+			break;
+	}
+	DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz));
+#ifdef DEBUG_UNALIGNED_TRAP
+		{ int i; char *c = (char *)&fpr_init;
+			printk("fpr_init= ");
+			for(i=0; i < len; i++ ) {
+				printk("%02x ", c[i]&0xff);
+			}
+			printk("\n");
+		}
+		{ int i; char *c = (char *)&fpr_final;
+			printk("fpr_final= ");
+			for(i=0; i < len; i++ ) {
+				printk("%02x ", c[i]&0xff);
+			}
+			printk("\n");
+		}
+#endif
+
+	/*
+	 * does the unaligned store
+	 */
+	memcpy((void *)ifa, &fpr_final, len);
+
+	/*
+	 * stfX [r3]=r2,imm(9)
+	 *
+	 * NOTE:
+	 * ld->r3 can never be r0, because r0 would not generate an 
+	 * unaligned access.
+	 */
+	if ( ld->op == 0x7 ) {
+		unsigned long imm;
+
+		/*
+		 * form imm9: [12:6] contain first 7bits
+		 */
+		imm = ld->x << 7 | ld->r1;
+		/*
+		 * sign extend (8bits) if m set
+		 */
+		if ( ld->m ) imm |= SIGN_EXT9; 
+		/*
+		 * ifa == r3 (NaT is necessarily cleared)
+		 */
+		ifa += imm;
+
+		DPRINT(("imm=%lx r3=%lx\n", imm, ifa));
+	
+		setreg(ld->r3, ifa, 0, regs);
+	}
+	/*
+	 * we don't have alat_invalidate_multiple() so we need
+	 * to do the complete flush :-<<
+	 */
+	ia64_invala();
+
+	return 0;
+}
+
+void
+ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs)
+{
+	static unsigned long unalign_count;
+	static long last_time;
+
+	struct ia64_psr *ipsr = ia64_psr(regs);
+	unsigned long *bundle_addr;
+	unsigned long opcode;
+	unsigned long op;
+	load_store_t *insn;
+	int ret = -1;
+
+	/*
+	 * We flag unaligned references while in kernel as
+	 * errors: the kernel must be fixed. The switch code
+	 * is in ivt.S at entry 30.
+	 *
+	 * So here we keep a simple sanity check.
+	 */
+	if ( !user_mode(regs) ) {
+		die_if_kernel("Unaligned reference while in kernel\n", regs, 30);
+		/* NOT_REACHED */
+	}
+
+	/*
+	 * Make sure we log the unaligned access, so that user/sysadmin can notice it
+	 * and eventually fix the program.
+	 *
+	 * We don't want to do that for every access so we pace it with jiffies.
+	 */
+	if ( unalign_count > 5 && jiffies - last_time > 5*HZ )  unalign_count = 0;
+	if ( ++unalign_count < 5 ) {
+		last_time = jiffies;
+		printk("%s(%d): unaligned trap accessing %016lx (ip=%016lx)\n",
+			current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri);
+		
+	}
+
+	DPRINT(("iip=%lx ifa=%lx isr=%lx\n", regs->cr_iip, ifa, regs->cr_ipsr));
+	DPRINT(("ISR.ei=%d ISR.sp=%d\n", ipsr->ri, ipsr->it));
+
+	bundle_addr = (unsigned long *)(regs->cr_iip);
+
+	/*
+	 * extract the instruction from the bundle given the slot number
+	 */
+	switch ( ipsr->ri ) {
+		case 0: op = *bundle_addr >> 5;
+			break;
+
+		case 1: op = *bundle_addr >> 46 | (*(bundle_addr+1) & 0x7fffff)<<18;
+			break;
+
+		case 2: op = *(bundle_addr+1) >> 23;
+		 	break;
+	}
+
+	insn   = (load_store_t *)&op;
+	opcode = op & IA64_OPCODE_MASK;
+
+	DPRINT(("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
+		"ld.x6=0x%x ld.m=%d ld.op=%d\n",
+		opcode,
+		insn->qp,
+		insn->r1,
+		insn->imm,
+		insn->r3,
+		insn->x,
+		insn->hint,
+		insn->x6_sz,
+		insn->m,
+		insn->op));
+
+	/*
+  	 * IMPORTANT:
+	 * Notice that the swictch statement DOES not cover all possible instructions
+	 * that DO generate unaligned references. This is made on purpose because for some
+	 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
+	 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
+	 * the program will get a signal and die:
+	 *
+	 *	load/store:
+	 *		- ldX.spill
+	 *		- stX.spill
+	 * 	Reason: RNATs are based on addresses
+	 *
+	 *	synchronization:
+	 *		- cmpxchg
+	 *		- fetchadd
+	 *		- xchg
+	 * 	Reason: ATOMIC operations cannot be emulated properly using multiple 
+	 * 	        instructions.
+	 *
+	 *	speculative loads:
+	 *		- ldX.sZ
+	 *	Reason: side effects, code must be ready to deal with failure so simpler 
+	 * 		to let the load fail.
+	 * ---------------------------------------------------------------------------------
+	 * XXX fixme
+	 *
+	 * I would like to get rid of this switch case and do something
+	 * more elegant.
+	 */
+	switch(opcode) {
+		case LDS_OP:
+		case LDSA_OP:
+		case LDS_IMM_OP:
+		case LDSA_IMM_OP:
+		case LDFS_OP:
+		case LDFSA_OP:
+		case LDFS_IMM_OP:
+			/*
+			 * The instruction will be retried with defered exceptions
+			 * turned on, and we should get Nat bit installed
+			 *
+			 * IMPORTANT:
+			 * When PSR_ED is set, the register & immediate update
+			 * forms are actually executed even though the operation
+			 * failed. So we don't need to take care of this.
+			 */
+			DPRINT(("forcing PSR_ED\n"));
+			regs->cr_ipsr |= IA64_PSR_ED;
+			return;
+
+		case LD_OP:
+		case LDA_OP:
+		case LDBIAS_OP:
+		case LDACQ_OP:
+		case LDCCLR_OP:
+		case LDCNC_OP:
+		case LDCCLRACQ_OP:
+		case LD_IMM_OP:
+		case LDA_IMM_OP:
+		case LDBIAS_IMM_OP:
+		case LDACQ_IMM_OP:
+		case LDCCLR_IMM_OP:
+		case LDCNC_IMM_OP:
+		case LDCCLRACQ_IMM_OP:
+			ret = emulate_load_int(ifa, insn, regs);
+			break;
+		case ST_OP:
+		case STREL_OP:
+		case ST_IMM_OP:
+		case STREL_IMM_OP:
+			ret = emulate_store_int(ifa, insn, regs);
+			break;
+		case LDF_OP:
+		case LDFA_OP:
+		case LDFCCLR_OP:
+		case LDFCNC_OP:
+		case LDF_IMM_OP:
+		case LDFA_IMM_OP:
+		case LDFCCLR_IMM_OP:
+		case LDFCNC_IMM_OP:
+			ret = insn->x ? 
+			      emulate_load_floatpair(ifa, insn, regs):
+			      emulate_load_float(ifa, insn, regs);
+			break;
+		case STF_OP:
+		case STF_IMM_OP:
+			ret = emulate_store_float(ifa, insn, regs);
+	}
+
+	DPRINT(("ret=%d\n", ret));
+	if ( ret ) {
+		lock_kernel();
+	        force_sig(SIGSEGV, current);
+	        unlock_kernel();
+	} else {
+		/*
+	 	 * given today's architecture this case is not likely to happen
+	 	 * because a memory access instruction (M) can never be in the 
+	 	 * last slot of a bundle. But let's keep it for  now.
+	 	 */
+		if ( ipsr->ri == 2 ) regs->cr_iip += 16;
+		ipsr->ri = ++ipsr->ri & 3;
+	}
+
+	DPRINT(("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip));
+}
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
new file mode 100644
index 000000000..c2b772e68
--- /dev/null
+++ b/arch/ia64/kernel/unwind.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/unwind.h>
+
+void
+ia64_unwind_init_from_blocked_task (struct ia64_frame_info *info, struct task_struct *t)
+{
+	struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
+	unsigned long sol, limit, top;
+
+	memset(info, 0, sizeof(*info));
+
+	sol = (sw->ar_pfs >> 7) & 0x7f;	/* size of locals */
+
+	limit = (unsigned long) t + IA64_RBS_OFFSET;
+	top   = sw->ar_bspstore;
+	if (top - (unsigned long) t >= IA64_STK_OFFSET)
+		top = limit;
+
+	info->regstk.limit = (unsigned long *) limit;
+	info->regstk.top   = (unsigned long *) top;
+	info->bsp	   = ia64_rse_skip_regs(info->regstk.top, -sol);
+	info->top_rnat	   = sw->ar_rnat;
+	info->cfm	   = sw->ar_pfs;
+	info->ip	   = sw->b0;
+}
+
+void
+ia64_unwind_init_from_current (struct ia64_frame_info *info, struct pt_regs *regs)
+{
+	struct switch_stack *sw = (struct switch_stack *) regs - 1;
+	unsigned long sol, sof, *bsp, limit, top;
+
+	limit = (unsigned long) current + IA64_RBS_OFFSET;
+	top   = sw->ar_bspstore;
+	if (top - (unsigned long) current >= IA64_STK_OFFSET)
+		top = limit;
+
+	memset(info, 0, sizeof(*info));
+
+	sol = (sw->ar_pfs >> 7) & 0x7f;	/* size of frame */
+	info->regstk.limit = (unsigned long *) limit;
+	info->regstk.top   = (unsigned long *) top;
+	info->top_rnat	   = sw->ar_rnat;
+
+	/* this gives us the bsp top level frame (kdb interrupt frame): */
+	bsp = ia64_rse_skip_regs((unsigned long *) top, -sol);
+
+	/* now skip past the interrupt frame: */
+	sof = regs->cr_ifs & 0x7f;	/* size of frame */
+	info->cfm = regs->cr_ifs;
+	info->bsp = ia64_rse_skip_regs(bsp, -sof);
+	info->ip  = regs->cr_iip;
+}
+
+static unsigned long
+read_reg (struct ia64_frame_info *info, int regnum, int *is_nat)
+{
+	unsigned long *addr, *rnat_addr, rnat;
+
+	addr = ia64_rse_skip_regs(info->bsp, regnum);
+	if (addr < info->regstk.limit || addr >= info->regstk.top || ((long) addr & 0x7) != 0) {
+		*is_nat = 1;
+		return 0xdeadbeefdeadbeef;
+	}
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	if (rnat_addr >= info->regstk.top)
+		rnat = info->top_rnat;
+	else
+		rnat = *rnat_addr;
+	*is_nat = (rnat & (1UL << ia64_rse_slot_num(addr))) != 0;
+	return *addr;
+}
+
+/*
+ * On entry, info->regstk.top should point to the register backing
+ * store for r32.
+ */
+int
+ia64_unwind_to_previous_frame (struct ia64_frame_info *info)
+{
+	unsigned long sol, cfm = info->cfm;
+	int is_nat;
+
+	sol = (cfm >> 7) & 0x7f;	/* size of locals */
+
+	/*
+	 * In general, we would have to make use of unwind info to
+	 * unwind an IA-64 stack, but for now gcc uses a special
+	 * convention that makes this possible without full-fledged
+	 * unwindo info.  Specifically, we expect "rp" in the second
+	 * last, and "ar.pfs" in the last local register, so the
+	 * number of locals in a frame must be at least two.  If it's
+	 * less than that, we reached the end of the C call stack.
+	 */
+	if (sol < 2)
+		return -1;
+
+	info->ip = read_reg(info, sol - 2, &is_nat);
+	if (is_nat)
+		return -1;
+
+	cfm = read_reg(info, sol - 1, &is_nat);
+	if (is_nat)
+		return -1;
+
+	sol = (cfm >> 7) & 0x7f;
+
+	info->cfm = cfm;
+	info->bsp = ia64_rse_skip_regs(info->bsp, -sol);
+	return 0;
+}
author	Ralf Baechle <ralf@linux-mips.org>	2000-02-23 00:40:54 +0000
committer	Ralf Baechle <ralf@linux-mips.org>	2000-02-23 00:40:54 +0000
commit	529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree	78f1c0b805f5656aa7b0417a043c5346f700a2cf /arch/ia64/kernel
parent	0bd079751d25808d1972baee5c4eaa1db2227257 (diff)