summaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-02-23 00:40:54 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-02-23 00:40:54 +0000
commit529c593ece216e4aaffd36bd940cb94f1fa63129 (patch)
tree78f1c0b805f5656aa7b0417a043c5346f700a2cf /arch/ia64/kernel
parent0bd079751d25808d1972baee5c4eaa1db2227257 (diff)
Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c
driver due to the Origin A64 hacks.
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r--arch/ia64/kernel/Makefile42
-rw-r--r--arch/ia64/kernel/acpi.c308
-rw-r--r--arch/ia64/kernel/efi.c365
-rw-r--r--arch/ia64/kernel/efi_stub.S141
-rw-r--r--arch/ia64/kernel/entry.S1261
-rw-r--r--arch/ia64/kernel/entry.h8
-rw-r--r--arch/ia64/kernel/fw-emu.c444
-rw-r--r--arch/ia64/kernel/gate.S200
-rw-r--r--arch/ia64/kernel/head.S646
-rw-r--r--arch/ia64/kernel/init_task.c31
-rw-r--r--arch/ia64/kernel/irq.c657
-rw-r--r--arch/ia64/kernel/irq_default.c30
-rw-r--r--arch/ia64/kernel/irq_internal.c71
-rw-r--r--arch/ia64/kernel/irq_lock.c287
-rw-r--r--arch/ia64/kernel/ivt.S1342
-rw-r--r--arch/ia64/kernel/machvec.c48
-rw-r--r--arch/ia64/kernel/mca.c842
-rw-r--r--arch/ia64/kernel/mca_asm.S621
-rw-r--r--arch/ia64/kernel/pal.S119
-rw-r--r--arch/ia64/kernel/pci-dma.c56
-rw-r--r--arch/ia64/kernel/pci.c239
-rw-r--r--arch/ia64/kernel/perfmon.c227
-rw-r--r--arch/ia64/kernel/process.c421
-rw-r--r--arch/ia64/kernel/ptrace.c653
-rw-r--r--arch/ia64/kernel/sal.c157
-rw-r--r--arch/ia64/kernel/sal_stub.S116
-rw-r--r--arch/ia64/kernel/semaphore.c336
-rw-r--r--arch/ia64/kernel/setup.c326
-rw-r--r--arch/ia64/kernel/signal.c537
-rw-r--r--arch/ia64/kernel/smp.c777
-rw-r--r--arch/ia64/kernel/sys_ia64.c216
-rw-r--r--arch/ia64/kernel/time.c290
-rw-r--r--arch/ia64/kernel/traps.c423
-rw-r--r--arch/ia64/kernel/unaligned.c1554
-rw-r--r--arch/ia64/kernel/unwind.c118
35 files changed, 13909 insertions, 0 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
new file mode 100644
index 000000000..7cb47da72
--- /dev/null
+++ b/arch/ia64/kernel/Makefile
@@ -0,0 +1,42 @@
+#
+# Makefile for the linux kernel.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.S.s:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $<
+.S.o:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $<
+
+all: kernel.o head.o init_task.o
+
+O_TARGET := kernel.o
+O_OBJS := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_default.o irq_internal.o ivt.o \
+ pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o sal_stub.o semaphore.o setup.o signal.o \
+ sys_ia64.o traps.o time.o unaligned.o unwind.o
+#O_OBJS := fpreg.o
+#OX_OBJS := ia64_ksyms.o
+
+ifeq ($(CONFIG_IA64_GENERIC),y)
+O_OBJS += machvec.o
+endif
+
+ifdef CONFIG_PCI
+O_OBJS += pci.o
+endif
+
+ifdef CONFIG_SMP
+O_OBJS += smp.o irq_lock.o
+endif
+
+ifeq ($(CONFIG_MCA),y)
+O_OBJS += mca.o mca_asm.o
+endif
+
+clean::
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
new file mode 100644
index 000000000..e289efab6
--- /dev/null
+++ b/arch/ia64/kernel/acpi.c
@@ -0,0 +1,308 @@
+/*
+ * Advanced Configuration and Power Interface
+ *
+ * Based on 'ACPI Specification 1.0b' February 2, 1999 and
+ * 'IA-64 Extensions to ACPI Specification' Revision 0.6
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/config.h>
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/acpi-ext.h>
+#include <asm/page.h>
+#include <asm/efi.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/irq.h>
+
+#undef ACPI_DEBUG /* Guess what this does? */
+
+#ifdef CONFIG_SMP
+extern unsigned long ipi_base_addr;
+#endif
+
+/* These are ugly but will be reclaimed by the kernel */
+int __initdata acpi_cpus = 0;
+int __initdata acpi_apic_map[32];
+int __initdata cpu_cnt = 0;
+
+void (*pm_idle) (void);
+
+/*
+ * Identify usable CPU's and remember them for SMP bringup later.
+ */
+static void __init
+acpi_lsapic(char *p)
+{
+ int add = 1;
+
+ acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p;
+
+ if ((lsapic->flags & LSAPIC_PRESENT) == 0)
+ return;
+
+ printk(" CPU %d (%.04x:%.04x): ", cpu_cnt, lsapic->eid, lsapic->id);
+
+ if ((lsapic->flags & LSAPIC_ENABLED) == 0) {
+ printk("Disabled.\n");
+ add = 0;
+ } else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) {
+ printk("Performance Restricted; ignoring.\n");
+ add = 0;
+ }
+
+ if (add) {
+ printk("Available.\n");
+ acpi_cpus++;
+ acpi_apic_map[cpu_cnt] = (lsapic->id << 8) | lsapic->eid;
+ }
+
+ cpu_cnt++;
+}
+
+/*
+ * Find all IOSAPICs and tag the iosapic_vector structure with the appropriate
+ * base addresses.
+ */
+static void __init
+acpi_iosapic(char *p)
+{
+ /*
+ * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet
+ * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be
+ * a means for platform_setup() to register ACPI handlers?
+ */
+#ifdef CONFIG_IA64_DIG
+ acpi_entry_iosapic_t *iosapic = (acpi_entry_iosapic_t *) p;
+ unsigned int ver;
+ int l, v, pins;
+
+ ver = iosapic_version(iosapic->address);
+ pins = (ver >> 16) & 0xff;
+
+ printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n",
+ (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address,
+ iosapic->irq_base, iosapic->irq_base + pins);
+
+ for (l = 0; l < pins; l++) {
+ v = map_legacy_irq(iosapic->irq_base + l);
+ if (v > IA64_MAX_VECTORED_IRQ) {
+ printk(" !!! IRQ %d > 255\n", v);
+ continue;
+ }
+ /* XXX Check for IOSAPIC collisions */
+ iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
+ iosapic_baseirq(v) = iosapic->irq_base;
+ }
+ iosapic_init(iosapic->address);
+#endif
+}
+
+
+/*
+ * Configure legacy IRQ information in iosapic_vector
+ */
+static void __init
+acpi_legacy_irq(char *p)
+{
+ /*
+ * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet
+ * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be
+ * a means for platform_setup() to register ACPI handlers?
+ */
+#ifdef CONFIG_IA64_IRQ_ACPI
+ acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p;
+ unsigned char vector;
+ int i;
+
+ vector = map_legacy_irq(legacy->isa_irq);
+
+ /*
+ * Clobber any old pin mapping. It may be that it gets replaced later on
+ */
+ for (i = 0; i < IA64_MAX_VECTORED_IRQ; i++) {
+ if (i == vector)
+ continue;
+ if (iosapic_pin(i) == iosapic_pin(vector))
+ iosapic_pin(i) = 0xff;
+ }
+
+ iosapic_pin(vector) = legacy->pin;
+ iosapic_bus(vector) = BUS_ISA; /* This table only overrides the ISA devices */
+ iosapic_busdata(vector) = 0;
+
+ /*
+ * External timer tick is special...
+ */
+ if (vector != TIMER_IRQ)
+ iosapic_dmode(vector) = IO_SAPIC_LOWEST_PRIORITY;
+ else
+ iosapic_dmode(vector) = IO_SAPIC_FIXED;
+
+ /* See MPS 1.4 section 4.3.4 */
+ switch (legacy->flags) {
+ case 0x5:
+ iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
+ iosapic_trigger(vector) = IO_SAPIC_EDGE;
+ break;
+ case 0x8:
+ iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
+ iosapic_trigger(vector) = IO_SAPIC_EDGE;
+ break;
+ case 0xd:
+ iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
+ iosapic_trigger(vector) = IO_SAPIC_LEVEL;
+ break;
+ case 0xf:
+ iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
+ iosapic_trigger(vector) = IO_SAPIC_LEVEL;
+ break;
+ default:
+ printk(" ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq,
+ legacy->flags);
+ break;
+ }
+
+#ifdef ACPI_DEBUG
+ printk("Legacy ISA IRQ %x -> IA64 Vector %x IOSAPIC Pin %x Active %s %s Trigger\n",
+ legacy->isa_irq, vector, iosapic_pin(vector),
+ ((iosapic_polarity(vector) == IO_SAPIC_POL_LOW) ? "Low" : "High"),
+ ((iosapic_trigger(vector) == IO_SAPIC_LEVEL) ? "Level" : "Edge"));
+#endif /* ACPI_DEBUG */
+
+#endif /* CONFIG_IA64_IRQ_ACPI */
+}
+
+/*
+ * Info on platform interrupt sources: NMI. PMI, INIT, etc.
+ */
+static void __init
+acpi_platform(char *p)
+{
+ acpi_entry_platform_src_t *plat = (acpi_entry_platform_src_t *) p;
+
+ printk("PLATFORM: IOSAPIC %x -> Vector %lx on CPU %.04u:%.04u\n",
+ plat->iosapic_vector, plat->global_vector, plat->eid, plat->id);
+}
+
+/*
+ * Parse the ACPI Multiple SAPIC Table
+ */
+static void __init
+acpi_parse_msapic(acpi_sapic_t *msapic)
+{
+ char *p, *end;
+
+ memset(&acpi_apic_map, -1, sizeof(acpi_apic_map));
+
+#ifdef CONFIG_SMP
+ /* Base address of IPI Message Block */
+ ipi_base_addr = ioremap(msapic->interrupt_block, 0);
+#endif
+
+ p = (char *) (msapic + 1);
+ end = p + (msapic->header.length - sizeof(acpi_sapic_t));
+
+ while (p < end) {
+
+ switch (*p) {
+ case ACPI_ENTRY_LOCAL_SAPIC:
+ acpi_lsapic(p);
+ break;
+
+ case ACPI_ENTRY_IO_SAPIC:
+ acpi_iosapic(p);
+ break;
+
+ case ACPI_ENTRY_INT_SRC_OVERRIDE:
+ acpi_legacy_irq(p);
+ break;
+
+ case ACPI_ENTRY_PLATFORM_INT_SOURCE:
+ acpi_platform(p);
+ break;
+
+ default:
+ break;
+ }
+
+ /* Move to next table entry. */
+ p += *(p + 1);
+ }
+
+ /* Make bootup pretty */
+ printk(" %d CPUs available, %d CPUs total\n", acpi_cpus, cpu_cnt);
+}
+
+int __init
+acpi_parse(acpi_rsdp_t *rsdp)
+{
+ acpi_rsdt_t *rsdt;
+ acpi_desc_table_hdr_t *hdrp;
+ long tables, i;
+
+ if (!rsdp) {
+ printk("Uh-oh, no ACPI Root System Description Pointer table!\n");
+ return 0;
+ }
+
+ if (strncmp(rsdp->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) {
+ printk("Uh-oh, ACPI RSDP signature incorrect!\n");
+ return 0;
+ }
+
+ rsdp->rsdt = __va(rsdp->rsdt);
+ rsdt = rsdp->rsdt;
+ if (strncmp(rsdt->header.signature, ACPI_RSDT_SIG, ACPI_RSDT_SIG_LEN)) {
+ printk("Uh-oh, ACPI RDST signature incorrect!\n");
+ return 0;
+ }
+
+ printk("ACPI: %.6s %.8s %d.%d\n", rsdt->header.oem_id, rsdt->header.oem_table_id,
+ rsdt->header.oem_revision >> 16, rsdt->header.oem_revision & 0xffff);
+
+ tables = (rsdt->header.length - sizeof(acpi_desc_table_hdr_t)) / 8;
+ for (i = 0; i < tables; i++) {
+ hdrp = (acpi_desc_table_hdr_t *) __va(rsdt->entry_ptrs[i]);
+
+ /* Only interested int the MSAPIC table for now ... */
+ if (strncmp(hdrp->signature, ACPI_SAPIC_SIG, ACPI_SAPIC_SIG_LEN) != 0)
+ continue;
+
+ acpi_parse_msapic((acpi_sapic_t *) hdrp);
+ } /* while() */
+
+ if (acpi_cpus == 0) {
+ printk("ACPI: Found 0 CPUS; assuming 1\n");
+ acpi_cpus = 1; /* We've got at least one of these, no? */
+ }
+ return 1;
+}
+
+const char *
+acpi_get_sysname (void)
+{
+ /* the following should go away once we have an ACPI parser: */
+#ifdef CONFIG_IA64_GENERIC
+ return "hpsim";
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+ return "hpsim";
+# elif defined (CONFIG_IA64_SGI_SN1_SIM)
+ return "sn1";
+# elif defined (CONFIG_IA64_DIG)
+ return "dig";
+# else
+# error Unknown platform. Fix acpi.c.
+# endif
+#endif
+}
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
new file mode 100644
index 000000000..dd7de2ab0
--- /dev/null
+++ b/arch/ia64/kernel/efi.c
@@ -0,0 +1,365 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Hewlett-Packard Co.
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version. --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls. --davidm
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/time.h>
+
+#include <asm/efi.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#define EFI_DEBUG
+
+extern efi_status_t efi_call_phys (void *, ...);
+
+struct efi efi;
+
+static efi_runtime_services_t *runtime;
+
+static efi_status_t
+phys_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+ return efi_call_phys(__va(runtime->get_time), __pa(tm), __pa(tc));
+}
+
+static efi_status_t
+phys_set_time (efi_time_t *tm)
+{
+ return efi_call_phys(__va(runtime->set_time), __pa(tm));
+}
+
+static efi_status_t
+phys_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm)
+{
+ return efi_call_phys(__va(runtime->get_wakeup_time), __pa(enabled), __pa(pending),
+ __pa(tm));
+}
+
+static efi_status_t
+phys_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm)
+{
+ return efi_call_phys(__va(runtime->set_wakeup_time), enabled, __pa(tm));
+}
+
+static efi_status_t
+phys_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr,
+ unsigned long *data_size, void *data)
+{
+ return efi_call_phys(__va(runtime->get_variable), __pa(name), __pa(vendor), __pa(attr),
+ __pa(data_size), __pa(data));
+}
+
+static efi_status_t
+phys_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor)
+{
+ return efi_call_phys(__va(runtime->get_next_variable), __pa(name_size), __pa(name),
+ __pa(vendor));
+}
+
+static efi_status_t
+phys_set_variable (efi_char16_t *name, efi_guid_t *vendor, u32 attr,
+ unsigned long data_size, void *data)
+{
+ return efi_call_phys(__va(runtime->set_variable), __pa(name), __pa(vendor), attr,
+ data_size, __pa(data));
+}
+
+static efi_status_t
+phys_get_next_high_mono_count (u64 *count)
+{
+ return efi_call_phys(__va(runtime->get_next_high_mono_count), __pa(count));
+}
+
+static void
+phys_reset_system (int reset_type, efi_status_t status,
+ unsigned long data_size, efi_char16_t *data)
+{
+ efi_call_phys(__va(runtime->reset_system), status, data_size, __pa(data));
+}
+
+/*
+ * Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines were long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+static inline unsigned long
+mktime (unsigned int year, unsigned int mon, unsigned int day, unsigned int hour,
+ unsigned int min, unsigned int sec)
+{
+ if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
+ mon += 12; /* Puts Feb last since it has leap day */
+ year -= 1;
+ }
+ return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)
+ + year*365 - 719499
+ )*24 + hour /* now have hours */
+ )*60 + min /* now have minutes */
+ )*60 + sec; /* finally seconds */
+}
+
+void
+efi_gettimeofday (struct timeval *tv)
+{
+ efi_time_t tm;
+
+ memset(tv, 0, sizeof(tv));
+ if ((*efi.get_time)(&tm, 0) != EFI_SUCCESS)
+ return;
+
+ tv->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
+ tv->tv_usec = tm.nanosecond / 1000;
+}
+
+/*
+ * Walks the EFI memory map and calls CALLBACK once for each EFI
+ * memory descriptor that has memory that is available for OS use.
+ */
+void
+efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
+{
+ int prev_valid = 0;
+ struct range {
+ u64 start;
+ u64 end;
+ } prev, curr;
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size, start, end;
+
+ efi_map_start = __va(ia64_boot_param.efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size;
+ efi_desc_size = ia64_boot_param.efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ switch (md->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+#ifndef CONFIG_IA64_VIRTUAL_MEM_MAP
+ if (md->phys_addr > 1024*1024*1024UL) {
+ printk("Warning: ignoring %luMB of memory above 1GB!\n",
+ md->num_pages >> 8);
+ md->type = EFI_UNUSABLE_MEMORY;
+ continue;
+ }
+#endif
+
+ curr.start = PAGE_OFFSET + md->phys_addr;
+ curr.end = curr.start + (md->num_pages << 12);
+
+ if (!prev_valid) {
+ prev = curr;
+ prev_valid = 1;
+ } else {
+ if (curr.start < prev.start)
+ printk("Oops: EFI memory table not ordered!\n");
+
+ if (prev.end == curr.start) {
+ /* merge two consecutive memory ranges */
+ prev.end = curr.end;
+ } else {
+ start = PAGE_ALIGN(prev.start);
+ end = prev.end & PAGE_MASK;
+ if ((end > start) && (*callback)(start, end, arg) < 0)
+ return;
+ prev = curr;
+ }
+ }
+ break;
+
+ default:
+ continue;
+ }
+ }
+ if (prev_valid) {
+ start = PAGE_ALIGN(prev.start);
+ end = prev.end & PAGE_MASK;
+ if (end > start)
+ (*callback)(start, end, arg);
+ }
+}
+
+void __init
+efi_init (void)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_config_table_t *config_tables;
+ efi_memory_desc_t *md;
+ efi_char16_t *c16;
+ u64 efi_desc_size;
+ char vendor[100] = "unknown";
+ int i;
+
+ efi.systab = __va(ia64_boot_param.efi_systab);
+
+ /*
+ * Verify the EFI Table
+ */
+ if (efi.systab == NULL)
+ panic("Woah! Can't find EFI system table.\n");
+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ panic("Woah! EFI system table signature incorrect\n");
+ if (efi.systab->hdr.revision != EFI_SYSTEM_TABLE_REVISION)
+ printk("Warning: EFI system table version mismatch: "
+ "got %d.%02d, expected %d.%02d\n",
+ efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
+ EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
+
+ config_tables = __va(efi.systab->tables);
+
+ /* Show what we know for posterity */
+ c16 = __va(efi.systab->fw_vendor);
+ if (c16) {
+ for (i = 0;i < sizeof(vendor) && *c16; ++i)
+ vendor[i] = *c16++;
+ vendor[i] = '\0';
+ }
+
+ printk("EFI v%u.%.02u by %s:",
+ efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
+
+ for (i = 0; i < efi.systab->nr_tables; i++) {
+ if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
+ efi.mps = __va(config_tables[i].table);
+ printk(" MPS=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
+ efi.acpi = __va(config_tables[i].table);
+ printk(" ACPI=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
+ efi.smbios = __va(config_tables[i].table);
+ printk(" SMBIOS=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
+ efi.sal_systab = __va(config_tables[i].table);
+ printk(" SALsystab=0x%lx", config_tables[i].table);
+ }
+ }
+ printk("\n");
+
+ runtime = __va(efi.systab->runtime);
+ efi.get_time = phys_get_time;
+ efi.set_time = phys_set_time;
+ efi.get_wakeup_time = phys_get_wakeup_time;
+ efi.set_wakeup_time = phys_set_wakeup_time;
+ efi.get_variable = phys_get_variable;
+ efi.get_next_variable = phys_get_next_variable;
+ efi.set_variable = phys_set_variable;
+ efi.get_next_high_mono_count = phys_get_next_high_mono_count;
+ efi.reset_system = phys_reset_system;
+
+ efi_map_start = __va(ia64_boot_param.efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size;
+ efi_desc_size = ia64_boot_param.efi_memdesc_size;
+
+#ifdef EFI_DEBUG
+ /* print EFI memory map: */
+ for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
+ md = p;
+ printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
+ i, md->type, md->attribute,
+ md->phys_addr, md->phys_addr + (md->num_pages<<12) - 1, md->num_pages >> 8);
+ }
+#endif
+}
+
+void
+efi_enter_virtual_mode (void)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ efi_status_t status;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param.efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size;
+ efi_desc_size = ia64_boot_param.efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (md->attribute & EFI_MEMORY_RUNTIME) {
+ /*
+ * Some descriptors have multiple bits set, so the order of
+ * the tests is relevant.
+ */
+ if (md->attribute & EFI_MEMORY_WB) {
+ md->virt_addr = (u64) __va(md->phys_addr);
+ } else if (md->attribute & EFI_MEMORY_UC) {
+ md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+ } else if (md->attribute & EFI_MEMORY_WC) {
+#if 0
+ md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+ | _PAGE_D
+ | _PAGE_MA_WC
+ | _PAGE_PL_0
+ | _PAGE_AR_RW));
+#else
+ printk("EFI_MEMORY_WC mapping\n");
+ md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+ } else if (md->attribute & EFI_MEMORY_WT) {
+#if 0
+ md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+ | _PAGE_D | _PAGE_MA_WT
+ | _PAGE_PL_0
+ | _PAGE_AR_RW));
+#else
+ printk("EFI_MEMORY_WT mapping\n");
+ md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+ }
+ }
+ }
+
+ status = efi_call_phys(__va(runtime->set_virtual_address_map),
+ ia64_boot_param.efi_memmap_size,
+ efi_desc_size, ia64_boot_param.efi_memdesc_version,
+ ia64_boot_param.efi_memmap);
+ if (status != EFI_SUCCESS) {
+ printk("Warning: unable to switch EFI into virtual mode (status=%lu)\n", status);
+ return;
+ }
+
+ /*
+ * Now that EFI is in virtual mode, we arrange for EFI functions to be
+ * called directly:
+ */
+ efi.get_time = __va(runtime->get_time);
+ efi.set_time = __va(runtime->set_time);
+ efi.get_wakeup_time = __va(runtime->get_wakeup_time);
+ efi.set_wakeup_time = __va(runtime->set_wakeup_time);
+ efi.get_variable = __va(runtime->get_variable);
+ efi.get_next_variable = __va(runtime->get_next_variable);
+ efi.set_variable = __va(runtime->set_variable);
+ efi.get_next_high_mono_count = __va(runtime->get_next_high_mono_count);
+ efi.reset_system = __va(runtime->reset_system);
+}
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
new file mode 100644
index 000000000..4e6f1fc63
--- /dev/null
+++ b/arch/ia64/kernel/efi_stub.S
@@ -0,0 +1,141 @@
+/*
+ * EFI call stub.
+ *
+ * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com>
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off. We need this because we can't call SetVirtualMap() until
+ * the kernel has booted far enough to allow allocation of struct vma_struct
+ * entries (which we would need to map stuff with memory attributes other
+ * than uncached or writeback...). Since the GetTime() service gets called
+ * earlier than that, we need to be able to make physical mode EFI calls from
+ * the kernel.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e). Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared). Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR \
+ (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
+ IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
+ IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET \
+ (IA64_PSR_BN)
+
+#include <asm/processor.h>
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .text
+
+/*
+ * Switch execution mode from virtual to physical or vice versa.
+ *
+ * Inputs:
+ * r16 = new psr to establish
+ */
+ .proc switch_mode
+switch_mode:
+ {
+ alloc r2=ar.pfs,0,0,0,0
+ rsm psr.i | psr.ic // disable interrupts and interrupt collection
+ mov r15=ip
+ }
+ ;;
+ {
+ flushrs // must be first insn in group
+ srlz.i
+ shr.u r19=r15,61 // r19 <- top 3 bits of current IP
+ }
+ ;;
+ mov cr.ipsr=r16 // set new PSR
+ add r3=1f-switch_mode,r15
+ xor r15=0x7,r19 // flip the region bits
+
+ mov r17=ar.bsp
+ mov r14=rp // get return address into a general register
+
+ // switch RSE backing store:
+ ;;
+ dep r17=r15,r17,61,3 // make ar.bsp physical or virtual
+ mov r18=ar.rnat // save ar.rnat
+ ;;
+ mov ar.bspstore=r17 // this steps on ar.rnat
+ dep r3=r15,r3,61,3 // make rfi return address physical or virtual
+ ;;
+ mov cr.iip=r3
+ mov cr.ifs=r0
+ dep sp=r15,sp,61,3 // make stack pointer physical or virtual
+ ;;
+ mov ar.rnat=r18 // restore ar.rnat
+ dep r14=r15,r14,61,3 // make function return address physical or virtual
+ rfi // must be last insn in group
+ ;;
+1: mov rp=r14
+ br.ret.sptk.few rp
+ .endp switch_mode
+
+/*
+ * Inputs:
+ * in0 = address of function descriptor of EFI routine to call
+ * in1..in7 = arguments to routine
+ *
+ * Outputs:
+ * r8 = EFI_STATUS returned by called function
+ */
+
+ .global efi_call_phys
+ .proc efi_call_phys
+efi_call_phys:
+
+ alloc loc0=ar.pfs,8,5,7,0
+ ld8 r2=[in0],8 // load EFI function's entry point
+ mov loc1=rp
+ ;;
+ mov loc2=gp // save global pointer
+ mov loc4=ar.rsc // save RSE configuration
+ mov ar.rsc=r0 // put RSE in enforced lazy, LE mode
+ ;;
+
+ ld8 gp=[in0] // load EFI function's global pointer
+ mov out0=in1
+ mov out1=in2
+ movl r16=PSR_BITS_TO_CLEAR
+
+ mov loc3=psr // save processor status word
+ movl r17=PSR_BITS_TO_SET
+ ;;
+ mov out2=in3
+ or loc3=loc3,r17
+ mov b6=r2
+ ;;
+ andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
+ mov out3=in4
+ br.call.sptk.few rp=switch_mode
+.ret0:
+ mov out4=in5
+ mov out5=in6
+ mov out6=in7
+ br.call.sptk.few rp=b6 // call the EFI function
+.ret1:
+ mov ar.rsc=r0 // put RSE in enforced lazy, LE mode
+ mov r16=loc3
+ br.call.sptk.few rp=switch_mode // return to virtual mode
+.ret2:
+ mov ar.rsc=loc4 // restore RSE configuration
+ mov ar.pfs=loc0
+ mov rp=loc1
+ mov gp=loc2
+ br.ret.sptk.few rp
+
+ .endp efi_call_phys
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
new file mode 100644
index 000000000..87e77c677
--- /dev/null
+++ b/arch/ia64/kernel/entry.S
@@ -0,0 +1,1261 @@
+/*
+ * ia64/kernel/entry.S
+ *
+ * Kernel entry points.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ */
+/*
+ * Global (preserved) predicate usage on syscall entry/exit path:
+ *
+ *
+ * pEOI: See entry.h.
+ * pKern: See entry.h.
+ * pSys: See entry.h.
+ * pNonSys: !pSys
+ * p2: (Alias of pKern!) True if any signals are pending.
+ * p16/p17: Used by stubs calling ia64_do_signal to indicate if current task
+ * has PF_PTRACED flag bit set. p16 is true if so, p17 is the complement.
+ */
+
+#include <linux/config.h>
+
+#include <asm/errno.h>
+#include <asm/offsets.h>
+#include <asm/processor.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ /*
+ * execve() is special because in case of success, we need to
+ * setup a null register window frame.
+ */
+ .align 16
+ .proc ia64_execve
+ia64_execve:
+ alloc loc0=ar.pfs,3,2,4,0
+ mov loc1=rp
+ mov out0=in0 // filename
+ ;; // stop bit between alloc and call
+ mov out1=in1 // argv
+ mov out2=in2 // envp
+ add out3=16,sp // regs
+ br.call.sptk.few rp=sys_execve
+.ret0: cmp4.ge p6,p0=r8,r0
+ mov ar.pfs=loc0 // restore ar.pfs
+ ;;
+(p6) mov ar.pfs=r0 // clear ar.pfs in case of success
+ sxt4 r8=r8 // return 64-bit result
+ mov rp=loc1
+
+ br.ret.sptk.few rp
+ .endp ia64_execve
+
+ .align 16
+ .global sys_clone
+ .proc sys_clone
+sys_clone:
+ alloc r16=ar.pfs,2,2,3,0;;
+ movl r28=1f
+ mov loc1=rp
+ br.cond.sptk.many save_switch_stack
+1:
+ mov loc0=r16 // save ar.pfs across do_fork
+ adds out2=IA64_SWITCH_STACK_SIZE+16,sp
+ adds r2=IA64_SWITCH_STACK_SIZE+IA64_PT_REGS_R12_OFFSET+16,sp
+ cmp.eq p8,p9=in1,r0 // usp == 0?
+ mov out0=in0 // out0 = clone_flags
+ ;;
+(p8) ld8 out1=[r2] // fetch usp from pt_regs.r12
+(p9) mov out1=in1
+ br.call.sptk.few rp=do_fork
+.ret1:
+ mov ar.pfs=loc0
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
+ mov rp=loc1
+ ;;
+ br.ret.sptk.many rp
+ .endp sys_clone
+
+/*
+ * prev_task <- switch_to(struct task_struct *next)
+ */
+ .align 16
+ .global ia64_switch_to
+ .proc ia64_switch_to
+ia64_switch_to:
+ alloc r16=ar.pfs,1,0,0,0
+ movl r28=1f
+ br.cond.sptk.many save_switch_stack
+1:
+ // disable interrupts to ensure atomicity for next few instructions:
+ mov r17=psr // M-unit
+ ;;
+ rsm psr.i // M-unit
+ dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff
+ ;;
+ srlz.d
+ ;;
+ adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+ adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
+ ;;
+ st8 [r22]=sp // save kernel stack pointer of old task
+ ld8 sp=[r21] // load kernel stack pointer of new task
+ and r20=in0,r18 // physical address of "current"
+ ;;
+ mov r8=r13 // return pointer to previously running task
+ mov r13=in0 // set "current" pointer
+ mov ar.k6=r20 // copy "current" into ar.k6
+ ;;
+ // restore interrupts
+ mov psr.l=r17
+ ;;
+ srlz.d
+
+ movl r28=1f
+ br.cond.sptk.many load_switch_stack
+1:
+ br.ret.sptk.few rp
+ .endp ia64_switch_to
+
+ /*
+ * Like save_switch_stack, but also save the stack frame that is active
+ * at the time this function is called.
+ */
+ .align 16
+ .proc save_switch_stack_with_current_frame
+save_switch_stack_with_current_frame:
+1: {
+ alloc r16=ar.pfs,0,0,0,0 // pass ar.pfs to save_switch_stack
+ mov r28=ip
+ }
+ ;;
+ adds r28=1f-1b,r28
+ br.cond.sptk.many save_switch_stack
+1: br.ret.sptk.few rp
+ .endp save_switch_stack_with_current_frame
+/*
+ * Note that interrupts are enabled during save_switch_stack and
+ * load_switch_stack. This means that we may get an interrupt with
+ * "sp" pointing to the new kernel stack while ar.bspstore is still
+ * pointing to the old kernel backing store area. Since ar.rsc,
+ * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts,
+ * this is not a problem.
+ */
+
+/*
+ * save_switch_stack:
+ * - r16 holds ar.pfs
+ * - r28 holds address to return to
+ * - rp (b0) holds return address to save
+ */
+ .align 16
+ .global save_switch_stack
+ .proc save_switch_stack
+save_switch_stack:
+ flushrs // flush dirty regs to backing store (must be first in insn group)
+ mov r17=ar.unat // preserve caller's
+ adds r2=-IA64_SWITCH_STACK_SIZE+16,sp // r2 = &sw->caller_unat
+ ;;
+ mov r18=ar.fpsr // preserve fpsr
+ mov ar.rsc=r0 // put RSE in mode: enforced lazy, little endian, pl 0
+ ;;
+ mov r19=ar.rnat
+ adds r3=-IA64_SWITCH_STACK_SIZE+24,sp // r3 = &sw->ar_fpsr
+
+ // Note: the instruction ordering is important here: we can't
+ // store anything to the switch stack before sp is updated
+ // as otherwise an interrupt might overwrite the memory!
+ adds sp=-IA64_SWITCH_STACK_SIZE,sp
+ ;;
+ st8 [r2]=r17,16
+ st8 [r3]=r18,24
+ ;;
+ stf.spill [r2]=f2,32
+ stf.spill [r3]=f3,32
+ mov r21=b0
+ ;;
+ stf.spill [r2]=f4,32
+ stf.spill [r3]=f5,32
+ ;;
+ stf.spill [r2]=f10,32
+ stf.spill [r3]=f11,32
+ mov r22=b1
+ ;;
+ stf.spill [r2]=f12,32
+ stf.spill [r3]=f13,32
+ mov r23=b2
+ ;;
+ stf.spill [r2]=f14,32
+ stf.spill [r3]=f15,32
+ mov r24=b3
+ ;;
+ stf.spill [r2]=f16,32
+ stf.spill [r3]=f17,32
+ mov r25=b4
+ ;;
+ stf.spill [r2]=f18,32
+ stf.spill [r3]=f19,32
+ mov r26=b5
+ ;;
+ stf.spill [r2]=f20,32
+ stf.spill [r3]=f21,32
+ mov r17=ar.lc // I-unit
+ ;;
+ stf.spill [r2]=f22,32
+ stf.spill [r3]=f23,32
+ ;;
+ stf.spill [r2]=f24,32
+ stf.spill [r3]=f25,32
+ ;;
+ stf.spill [r2]=f26,32
+ stf.spill [r3]=f27,32
+ ;;
+ stf.spill [r2]=f28,32
+ stf.spill [r3]=f29,32
+ ;;
+ stf.spill [r2]=f30,32
+ stf.spill [r3]=f31,24
+ ;;
+ st8.spill [r2]=r4,16
+ st8.spill [r3]=r5,16
+ ;;
+ st8.spill [r2]=r6,16
+ st8.spill [r3]=r7,16
+ ;;
+ st8 [r2]=r21,16 // save b0
+ st8 [r3]=r22,16 // save b1
+ /* since we're done with the spills, read and save ar.unat: */
+ mov r18=ar.unat // M-unit
+ mov r20=ar.bspstore // M-unit
+ ;;
+ st8 [r2]=r23,16 // save b2
+ st8 [r3]=r24,16 // save b3
+ ;;
+ st8 [r2]=r25,16 // save b4
+ st8 [r3]=r26,16 // save b5
+ ;;
+ st8 [r2]=r16,16 // save ar.pfs
+ st8 [r3]=r17,16 // save ar.lc
+ mov r21=pr
+ ;;
+ st8 [r2]=r18,16 // save ar.unat
+ st8 [r3]=r19,16 // save ar.rnat
+ mov b7=r28
+ ;;
+ st8 [r2]=r20 // save ar.bspstore
+ st8 [r3]=r21 // save predicate registers
+ mov ar.rsc=3 // put RSE back into eager mode, pl 0
+ br.cond.sptk.few b7
+ .endp save_switch_stack
+
+/*
+ * load_switch_stack:
+ * - r28 holds address to return to
+ */
+ .align 16
+ .proc load_switch_stack
+load_switch_stack:
+ invala // invalidate ALAT
+ adds r2=IA64_SWITCH_STACK_B0_OFFSET+16,sp // get pointer to switch_stack.b0
+ mov ar.rsc=r0 // put RSE into enforced lazy mode
+ adds r3=IA64_SWITCH_STACK_B0_OFFSET+24,sp // get pointer to switch_stack.b1
+ ;;
+ ld8 r21=[r2],16 // restore b0
+ ld8 r22=[r3],16 // restore b1
+ ;;
+ ld8 r23=[r2],16 // restore b2
+ ld8 r24=[r3],16 // restore b3
+ ;;
+ ld8 r25=[r2],16 // restore b4
+ ld8 r26=[r3],16 // restore b5
+ ;;
+ ld8 r16=[r2],16 // restore ar.pfs
+ ld8 r17=[r3],16 // restore ar.lc
+ ;;
+ ld8 r18=[r2],16 // restore ar.unat
+ ld8 r19=[r3],16 // restore ar.rnat
+ mov b0=r21
+ ;;
+ ld8 r20=[r2] // restore ar.bspstore
+ ld8 r21=[r3] // restore predicate registers
+ mov ar.pfs=r16
+ ;;
+ mov ar.bspstore=r20
+ ;;
+ loadrs // invalidate stacked regs outside current frame
+ adds r2=16-IA64_SWITCH_STACK_SIZE,r2 // get pointer to switch_stack.caller_unat
+ ;; // stop bit for rnat dependency
+ mov ar.rnat=r19
+ mov ar.unat=r18 // establish unat holding the NaT bits for r4-r7
+ adds r3=16-IA64_SWITCH_STACK_SIZE,r3 // get pointer to switch_stack.ar_fpsr
+ ;;
+ ld8 r18=[r2],16 // restore caller's unat
+ ld8 r19=[r3],24 // restore fpsr
+ mov ar.lc=r17
+ ;;
+ ldf.fill f2=[r2],32
+ ldf.fill f3=[r3],32
+ mov pr=r21,-1
+ ;;
+ ldf.fill f4=[r2],32
+ ldf.fill f5=[r3],32
+ ;;
+ ldf.fill f10=[r2],32
+ ldf.fill f11=[r3],32
+ mov b1=r22
+ ;;
+ ldf.fill f12=[r2],32
+ ldf.fill f13=[r3],32
+ mov b2=r23
+ ;;
+ ldf.fill f14=[r2],32
+ ldf.fill f15=[r3],32
+ mov b3=r24
+ ;;
+ ldf.fill f16=[r2],32
+ ldf.fill f17=[r3],32
+ mov b4=r25
+ ;;
+ ldf.fill f18=[r2],32
+ ldf.fill f19=[r3],32
+ mov b5=r26
+ ;;
+ ldf.fill f20=[r2],32
+ ldf.fill f21=[r3],32
+ ;;
+ ldf.fill f22=[r2],32
+ ldf.fill f23=[r3],32
+ ;;
+ ldf.fill f24=[r2],32
+ ldf.fill f25=[r3],32
+ ;;
+ ldf.fill f26=[r2],32
+ ldf.fill f27=[r3],32
+ ;;
+ ldf.fill f28=[r2],32
+ ldf.fill f29=[r3],32
+ ;;
+ ldf.fill f30=[r2],32
+ ldf.fill f31=[r3],24
+ ;;
+ ld8.fill r4=[r2],16
+ ld8.fill r5=[r3],16
+ mov b7=r28
+ ;;
+ ld8.fill r6=[r2],16
+ ld8.fill r7=[r3],16
+ mov ar.unat=r18 // restore caller's unat
+ mov ar.fpsr=r19 // restore fpsr
+ mov ar.rsc=3 // put RSE back into eager mode, pl 0
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // pop switch_stack
+ br.cond.sptk.few b7
+ .endp load_switch_stack
+
+ .align 16
+ .global __ia64_syscall
+ .proc __ia64_syscall
+__ia64_syscall:
+ .regstk 6,0,0,0
+ mov r15=in5 // put syscall number in place
+ break __BREAK_SYSCALL
+ movl r2=errno
+ cmp.eq p6,p7=-1,r10
+ ;;
+(p6) st4 [r2]=r8
+(p6) mov r8=-1
+ br.ret.sptk.few rp
+ .endp __ia64_syscall
+
+ //
+ // We invoke syscall_trace through this intermediate function to
+ // ensure that the syscall input arguments are not clobbered. We
+ // also use it to preserve b6, which contains the syscall entry point.
+ //
+ .align 16
+ .global invoke_syscall_trace
+ .proc invoke_syscall_trace
+invoke_syscall_trace:
+ alloc loc0=ar.pfs,8,3,0,0
+ ;; // WAW on CFM at the br.call
+ mov loc1=rp
+ br.call.sptk.many rp=save_switch_stack_with_current_frame // must preserve b6!!
+.ret2: mov loc2=b6
+ br.call.sptk.few rp=syscall_trace
+.ret3: adds sp=IA64_SWITCH_STACK_SIZE,sp // drop switch_stack frame
+ mov rp=loc1
+ mov ar.pfs=loc0
+ mov b6=loc2
+ ;;
+ br.ret.sptk.few rp
+ .endp invoke_syscall_trace
+
+ //
+ // Invoke a system call, but do some tracing before and after the call.
+ // We MUST preserve the current register frame throughout this routine
+ // because some system calls (such as ia64_execve) directly
+ // manipulate ar.pfs.
+ //
+ // Input:
+ // r15 = syscall number
+ // b6 = syscall entry point
+ //
+ .global ia64_trace_syscall
+ .global ia64_strace_leave_kernel
+ .global ia64_strace_clear_r8
+
+ .proc ia64_strace_clear_r8
+ia64_strace_clear_r8: // this is where we return after cloning when PF_TRACESYS is on
+# ifdef CONFIG_SMP
+ br.call.sptk.few rp=invoke_schedule_tail
+# endif
+ mov r8=0
+ br strace_check_retval
+ .endp ia64_strace_clear_r8
+
+ .proc ia64_trace_syscall
+ia64_trace_syscall:
+ br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch syscall args
+.ret4: br.call.sptk.few rp=b6 // do the syscall
+strace_check_retval:
+.ret5: cmp.lt p6,p0=r8,r0 // syscall failed?
+ ;;
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
+ adds r3=IA64_PT_REGS_R8_OFFSET+32,sp // r3 = &pt_regs.r10
+ mov r10=0
+(p6) br.cond.sptk.few strace_error // syscall failed ->
+ ;; // avoid RAW on r10
+strace_save_retval:
+ st8.spill [r2]=r8 // store return value in slot for r8
+ st8.spill [r3]=r10 // clear error indication in slot for r10
+ia64_strace_leave_kernel:
+ br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch return value
+.ret6: br.cond.sptk.many ia64_leave_kernel
+
+strace_error:
+ ld8 r3=[r2] // load pt_regs.r8
+ sub r9=0,r8 // negate return value to get errno value
+ ;;
+ cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0?
+ adds r3=16,r2 // r3=&pt_regs.r10
+ ;;
+(p6) mov r10=-1
+(p6) mov r8=r9
+ br.cond.sptk.few strace_save_retval
+ .endp ia64_trace_syscall
+
+/*
+ * A couple of convenience macros to help implement/understand the state
+ * restoration that happens at the end of ia64_ret_from_syscall.
+ */
+#define rARPR r31
+#define rCRIFS r30
+#define rCRIPSR r29
+#define rCRIIP r28
+#define rARRSC r27
+#define rARPFS r26
+#define rARUNAT r25
+#define rARRNAT r24
+#define rARBSPSTORE r23
+#define rKRBS r22
+#define rB6 r21
+
+ .align 16
+ .global ia64_ret_from_syscall
+ .global ia64_ret_from_syscall_clear_r8
+ .global ia64_leave_kernel
+ .proc ia64_ret_from_syscall
+ia64_ret_from_syscall_clear_r8:
+#ifdef CONFIG_SMP
+ // In SMP mode, we need to call schedule_tail to complete the scheduling process.
+ // Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the
+ // address of the previously executing task.
+ br.call.sptk.few rp=invoke_schedule_tail
+.ret7:
+#endif
+ mov r8=0
+ ;; // added stop bits to prevent r8 dependency
+ia64_ret_from_syscall:
+ cmp.ge p6,p7=r8,r0 // syscall executed successfully?
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
+ adds r3=IA64_PT_REGS_R8_OFFSET+32,sp // r3 = &pt_regs.r10
+ ;;
+(p6) st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit
+(p6) st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit
+(p7) br.cond.spnt.few handle_syscall_error // handle potential syscall failure
+
+ia64_leave_kernel:
+ // check & deliver software interrupts (bottom half handlers):
+
+ movl r2=bh_active // sheesh, why aren't these two in
+ movl r3=bh_mask // a struct??
+ ;;
+ ld8 r2=[r2]
+ ld8 r3=[r3]
+ ;;
+ and r2=r2,r3
+ ;;
+ cmp.ne p6,p7=r2,r0 // any soft interrupts ready for delivery?
+(p6) br.call.dpnt.few rp=invoke_do_bottom_half
+1:
+(pKern) br.cond.dpnt.many restore_all // yup -> skip check for rescheduling & signal delivery
+
+ // call schedule() until we find a task that doesn't have need_resched set:
+
+back_from_resched:
+ { .mii
+ adds r2=IA64_TASK_NEED_RESCHED_OFFSET,r13
+ mov r3=ip
+ adds r14=IA64_TASK_SIGPENDING_OFFSET,r13
+ }
+ ;;
+ ld8 r2=[r2]
+ ld4 r14=[r14]
+ mov rp=r3 // arrange for schedule() to return to back_from_resched
+ ;;
+ /*
+ * If pEOI is set, we need to write the cr.eoi now and then
+ * clear pEOI because both invoke_schedule() and
+ * handle_signal_delivery() may call the scheduler. Since
+ * we're returning to user-level, we get at most one nested
+ * interrupt of the same priority level, which doesn't tax the
+ * kernel stack too much.
+ */
+(pEOI) mov cr.eoi=r0
+ cmp.ne p6,p0=r2,r0
+ cmp.ne p2,p0=r14,r0 // NOTE: pKern is an alias for p2!!
+(pEOI) cmp.ne pEOI,p0=r0,r0 // clear pEOI before calling schedule()
+ srlz.d
+(p6) br.call.spnt.many b6=invoke_schedule // ignore return value
+2:
+ // check & deliver pending signals:
+(p2) br.call.spnt.few rp=handle_signal_delivery
+restore_all:
+
+ // start restoring the state saved on the kernel stack (struct pt_regs):
+
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,r12
+ adds r3=IA64_PT_REGS_R8_OFFSET+24,r12
+ ;;
+ ld8.fill r8=[r2],16
+ ld8.fill r9=[r3],16
+ ;;
+ ld8.fill r10=[r2],16
+ ld8.fill r11=[r3],16
+ ;;
+ ld8.fill r16=[r2],16
+ ld8.fill r17=[r3],16
+ ;;
+ ld8.fill r18=[r2],16
+ ld8.fill r19=[r3],16
+ ;;
+ ld8.fill r20=[r2],16
+ ld8.fill r21=[r3],16
+ ;;
+ ld8.fill r22=[r2],16
+ ld8.fill r23=[r3],16
+ ;;
+ ld8.fill r24=[r2],16
+ ld8.fill r25=[r3],16
+ ;;
+ ld8.fill r26=[r2],16
+ ld8.fill r27=[r3],16
+ ;;
+ ld8.fill r28=[r2],16
+ ld8.fill r29=[r3],16
+ ;;
+ ld8.fill r30=[r2],16
+ ld8.fill r31=[r3],16
+ ;;
+ ld8 r1=[r2],16 // ar.ccv
+ ld8 r13=[r3],16 // ar.fpsr
+ ;;
+ ld8 r14=[r2],16 // b0
+ ld8 r15=[r3],16+8 // b7
+ ;;
+ ldf.fill f6=[r2],32
+ ldf.fill f7=[r3],32
+ ;;
+ ldf.fill f8=[r2],32
+ ldf.fill f9=[r3],32
+ ;;
+ mov ar.ccv=r1
+ mov ar.fpsr=r13
+ mov b0=r14
+ // turn off interrupts, interrupt collection, & data translation
+ rsm psr.i | psr.ic | psr.dt
+ ;;
+ srlz.i // EAS 2.5
+ mov b7=r15
+ ;;
+ invala // invalidate ALAT
+ dep r12=0,r12,61,3 // convert sp to physical address
+ bsw.0;; // switch back to bank 0 (must be last in insn group)
+ ;;
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ nop.i 0x0
+ ;;
+ nop.i 0x0
+ ;;
+ nop.i 0x0
+ ;;
+#endif
+ adds r16=16,r12
+ adds r17=24,r12
+ ;;
+ ld8 rCRIPSR=[r16],16 // load cr.ipsr
+ ld8 rCRIIP=[r17],16 // load cr.iip
+ ;;
+ ld8 rCRIFS=[r16],16 // load cr.ifs
+ ld8 rARUNAT=[r17],16 // load ar.unat
+ ;;
+ ld8 rARPFS=[r16],16 // load ar.pfs
+ ld8 rARRSC=[r17],16 // load ar.rsc
+ ;;
+ ld8 rARRNAT=[r16],16 // load ar.rnat (may be garbage)
+ ld8 rARBSPSTORE=[r17],16 // load ar.bspstore (may be garbage)
+ ;;
+ ld8 rARPR=[r16],16 // load predicates
+ ld8 rB6=[r17],16 // load b6
+ ;;
+ ld8 r18=[r16],16 // load ar.rsc value for "loadrs"
+ ld8.fill r1=[r17],16 // load r1
+ ;;
+ ld8.fill r2=[r16],16
+ ld8.fill r3=[r17],16
+ ;;
+ ld8.fill r12=[r16],16
+ ld8.fill r13=[r17],16
+ extr.u r19=rCRIPSR,32,2 // extract ps.cpl
+ ;;
+ ld8.fill r14=[r16],16
+ ld8.fill r15=[r17],16
+ cmp.eq p6,p7=r0,r19 // are we returning to kernel mode? (psr.cpl==0)
+ ;;
+ mov b6=rB6
+ mov ar.pfs=rARPFS
+(p6) br.cond.dpnt.few skip_rbs_switch
+
+ /*
+ * Restore user backing store.
+ *
+ * NOTE: alloc, loadrs, and cover can't be predicated.
+ *
+ * XXX This needs some scheduling/tuning once we believe it
+ * really does work as intended.
+ */
+ mov r16=ar.bsp // get existing backing store pointer
+(pNonSys) br.cond.dpnt.few dont_preserve_current_frame
+ cover // add current frame into dirty partition
+ ;;
+ mov rCRIFS=cr.ifs // fetch the cr.ifs value that "cover" produced
+ mov r17=ar.bsp // get new backing store pointer
+ ;;
+ sub r16=r17,r16 // calculate number of bytes that were added to rbs
+ ;;
+ shl r16=r16,16 // shift additional frame size into position for loadrs
+ ;;
+ add r18=r16,r18 // adjust the loadrs value
+ ;;
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
+ // Reset ITM if we've missed a timer tick. Workaround for SoftSDV bug
+ mov r16 = r2
+ mov r2 = ar.itc
+ mov r17 = cr.itm
+ ;;
+ cmp.gt p6,p7 = r2, r17
+(p6) addl r17 = 100, r2
+ ;;
+ mov cr.itm = r17
+ mov r2 = r16
+#endif
+dont_preserve_current_frame:
+ alloc r16=ar.pfs,0,0,0,0 // drop the current call frame (noop for syscalls)
+ ;;
+ mov ar.rsc=r18 // load ar.rsc to be used for "loadrs"
+#ifdef CONFIG_IA32_SUPPORT
+ tbit.nz p6,p0=rCRIPSR,IA64_PSR_IS_BIT
+ ;;
+(p6) mov ar.rsc=r0 // returning to IA32 mode
+#endif
+ ;;
+ loadrs
+ ;;
+ mov ar.bspstore=rARBSPSTORE
+ ;;
+ mov ar.rnat=rARRNAT // must happen with RSE in lazy mode
+
+skip_rbs_switch:
+ mov ar.rsc=rARRSC
+ mov ar.unat=rARUNAT
+ mov cr.ifs=rCRIFS // restore cr.ifs only if not a (synchronous) syscall
+(pEOI) mov cr.eoi=r0
+ mov pr=rARPR,-1
+ mov cr.iip=rCRIIP
+ mov cr.ipsr=rCRIPSR
+ ;;
+ rfi;; // must be last instruction in an insn group
+
+handle_syscall_error:
+ /*
+ * Some system calls (e.g., ptrace, mmap) can return arbitrary
+ * values which could lead us to mistake a negative return
+ * value as a failed syscall. Those syscall must deposit
+ * a non-zero value in pt_regs.r8 to indicate an error.
+ * If pt_regs.r8 is zero, we assume that the call completed
+ * successfully.
+ */
+ ld8 r3=[r2] // load pt_regs.r8
+ sub r9=0,r8 // negate return value to get errno
+ ;;
+ mov r10=-1 // return -1 in pt_regs.r10 to indicate error
+ cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0?
+ adds r3=16,r2 // r3=&pt_regs.r10
+ ;;
+(p6) mov r9=r8
+(p6) mov r10=0
+ ;;
+ st8.spill [r2]=r9 // store errno in pt_regs.r8 and set unat bit
+ st8.spill [r3]=r10 // store error indication in pt_regs.r10 and set unat bit
+ br.cond.sptk.many ia64_leave_kernel
+ .endp __ret_from_syscall
+
+#ifdef CONFIG_SMP
+ /*
+ * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
+ * in case a system call gets restarted.
+ */
+ .proc invoke_schedule_tail
+invoke_schedule_tail:
+ alloc loc0=ar.pfs,8,2,1,0
+ mov loc1=rp
+ mov out0=r8 // Address of previous task
+ ;;
+ br.call.sptk.few rp=schedule_tail
+.ret8:
+ mov ar.pfs=loc0
+ mov rp=loc1
+ br.ret.sptk.many rp
+ .endp invoke_schedule_tail
+#endif /* CONFIG_SMP */
+
+ /*
+ * Invoke do_bottom_half() while preserving in0-in7, which may be needed
+ * in case a system call gets restarted.
+ */
+ .proc invoke_do_bottom_half
+invoke_do_bottom_half:
+ alloc loc0=ar.pfs,8,2,0,0
+ mov loc1=rp
+ ;;
+ br.call.sptk.few rp=do_bottom_half
+.ret9:
+ mov ar.pfs=loc0
+ mov rp=loc1
+ br.ret.sptk.many rp
+ .endp invoke_do_bottom_half
+
+ /*
+ * Invoke schedule() while preserving in0-in7, which may be needed
+ * in case a system call gets restarted.
+ */
+ .proc invoke_schedule
+invoke_schedule:
+ alloc loc0=ar.pfs,8,2,0,0
+ mov loc1=rp
+ ;;
+ br.call.sptk.few rp=schedule
+.ret10:
+ mov ar.pfs=loc0
+ mov rp=loc1
+ br.ret.sptk.many rp
+ .endp invoke_schedule
+
+ //
+ // Setup stack and call ia64_do_signal. Note that pSys and pNonSys need to
+ // be set up by the caller. We declare 8 input registers so the system call
+ // args get preserved, in case we need to restart a system call.
+ //
+ .align 16
+ .proc handle_signal_delivery
+handle_signal_delivery:
+ alloc loc0=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
+ mov r9=ar.unat
+
+ // If the process is being ptraced, the signal may not actually be delivered to
+ // the process. Instead, SIGCHLD will be sent to the parent. We need to
+ // setup a switch_stack so ptrace can inspect the processes state if necessary.
+ adds r2=IA64_TASK_FLAGS_OFFSET,r13
+ ;;
+ ld8 r2=[r2]
+ mov out0=0 // there is no "oldset"
+ adds out1=16,sp // out1=&pt_regs
+ ;;
+(pSys) mov out2=1 // out2==1 => we're in a syscall
+ tbit.nz p16,p17=r2,PF_PTRACED_BIT
+(p16) br.cond.spnt.many setup_switch_stack
+ ;;
+back_from_setup_switch_stack:
+(pNonSys) mov out2=0 // out2==0 => not a syscall
+ adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+(p17) adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for (dummy) switch_stack
+ ;;
+(p17) st8 [r3]=r9 // save ar.unat in sw->caller_unat
+ mov loc1=rp // save return address
+ br.call.sptk.few rp=ia64_do_signal
+.ret11:
+ adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+ ;;
+ ld8 r9=[r3] // load new unat from sw->caller_unat
+ mov rp=loc1
+ ;;
+(p17) adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch_stack
+(p17) mov ar.unat=r9
+(p17) mov ar.pfs=loc0
+(p17) br.ret.sptk.many rp
+
+ // restore the switch stack (ptrace may have modified it):
+ movl r28=1f
+ br.cond.sptk.many load_switch_stack
+1: br.ret.sptk.many rp
+ // NOT REACHED
+
+setup_switch_stack:
+ movl r28=back_from_setup_switch_stack
+ mov r16=loc0
+ br.cond.sptk.many save_switch_stack
+ // NOT REACHED
+
+ .endp handle_signal_delivery
+
+ .align 16
+ .proc sys_rt_sigsuspend
+ .global sys_rt_sigsuspend
+sys_rt_sigsuspend:
+ alloc loc0=ar.pfs,2,2,3,0
+ mov r9=ar.unat
+
+ // If the process is being ptraced, the signal may not actually be delivered to
+ // the process. Instead, SIGCHLD will be sent to the parent. We need to
+ // setup a switch_stack so ptrace can inspect the processes state if necessary.
+ adds r2=IA64_TASK_FLAGS_OFFSET,r13
+ ;;
+ ld8 r2=[r2]
+ mov out0=in0 // mask
+ mov out1=in1 // sigsetsize
+ ;;
+ adds out2=16,sp // out1=&pt_regs
+ tbit.nz p16,p17=r2,PF_PTRACED_BIT
+(p16) br.cond.spnt.many sigsuspend_setup_switch_stack
+ ;;
+back_from_sigsuspend_setup_switch_stack:
+ adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+(p17) adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for (dummy) switch_stack
+ ;;
+(p17) st8 [r3]=r9 // save ar.unat in sw->caller_unat
+ mov loc1=rp // save return address
+ br.call.sptk.many rp=ia64_rt_sigsuspend
+.ret12:
+ adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+ ;;
+ ld8 r9=[r3] // load new unat from sw->caller_unat
+ mov rp=loc1
+ ;;
+(p17) adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch_stack
+(p17) mov ar.unat=r9
+(p17) mov ar.pfs=loc0
+(p17) br.ret.sptk.many rp
+
+ // restore the switch stack (ptrace may have modified it):
+ movl r28=1f
+ br.cond.sptk.many load_switch_stack
+1: br.ret.sptk.many rp
+ // NOT REACHED
+
+sigsuspend_setup_switch_stack:
+ movl r28=back_from_sigsuspend_setup_switch_stack
+ mov r16=loc0
+ br.cond.sptk.many save_switch_stack
+ // NOT REACHED
+
+ .endp sys_rt_sigsuspend
+
+ .align 16
+ .proc sys_rt_sigreturn
+sys_rt_sigreturn:
+ alloc loc0=ar.pfs,8,1,1,0 // preserve all eight input regs in case of syscall restart!
+ adds out0=16,sp // out0 = &pt_regs
+ ;;
+ adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for unat and padding
+ br.call.sptk.few rp=ia64_rt_sigreturn
+.ret13:
+ adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+ ;;
+ ld8 r9=[r3] // load new ar.unat
+ mov rp=r8
+ ;;
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch-stack frame
+ mov ar.unat=r9
+ mov ar.pfs=loc0
+ br.ret.sptk.many rp
+ .endp sys_rt_sigreturn
+
+ .align 16
+ .global ia64_prepare_handle_unaligned
+ .proc ia64_prepare_handle_unaligned
+ia64_prepare_handle_unaligned:
+ movl r28=1f
+ //
+ // r16 = fake ar.pfs, we simply need to make sure
+ // privilege is still 0
+ //
+ mov r16=r0
+ br.cond.sptk.few save_switch_stack
+1: br.call.sptk.few rp=ia64_handle_unaligned // stack frame setup in ivt
+.ret14:
+ movl r28=2f
+ br.cond.sptk.many load_switch_stack
+2: br.cond.sptk.many rp // goes to ia64_leave_kernel
+ .endp ia64_prepare_handle_unaligned
+
+#ifdef CONFIG_KDB
+ //
+ // This gets called from ivt.S with:
+ // SAVE MIN with cover done
+ // SAVE REST done
+ // no parameters
+ // r15 has return value = ia64_leave_kernel
+ //
+ .align 16
+ .global ia64_invoke_kdb
+ .proc ia64_invoke_kdb
+ia64_invoke_kdb:
+ alloc r16=ar.pfs,0,0,4,0
+ movl r28=1f // save_switch_stack protocol
+ ;; // avoid WAW on CFM
+ br.cond.sptk.many save_switch_stack // to flushrs
+1: mov out0=4 // kdb entry reason
+ mov out1=0 // err number
+ adds out2=IA64_SWITCH_STACK_SIZE+16,sp // pt_regs
+ add out3=16,sp // switch_stack
+ br.call.sptk.few rp=kdb
+.ret15:
+ movl r28=1f // load_switch_stack proto
+ br.cond.sptk.many load_switch_stack
+1: br.ret.sptk.many rp
+ .endp ia64_invoke_kdb
+
+ //
+ // When KDB is compiled in, we intercept each fault and give
+ // kdb a chance to run before calling the normal fault handler.
+ //
+ .align 16
+ .global ia64_invoke_kdb_fault_handler
+ .proc ia64_invoke_kdb_fault_handler
+ia64_invoke_kdb_fault_handler:
+ alloc r16=ar.pfs,5,1,5,0
+ movl r28=1f
+ mov loc0=rp // save this
+ br.cond.sptk.many save_switch_stack // to flushrs
+ ;; // avoid WAW on CFM
+1: mov out0=in0 // vector number
+ mov out1=in1 // cr.isr
+ mov out2=in2 // cr.ifa
+ mov out3=in3 // cr.iim
+ mov out4=in4 // cr.itir
+ br.call.sptk.few rp=ia64_kdb_fault_handler
+.ret16:
+
+ movl r28=1f
+ br.cond.sptk.many load_switch_stack
+1: cmp.ne p6,p0=r8,r0 // did ia64_kdb_fault_handler return 0?
+ mov rp=loc0
+(p6) br.ret.spnt.many rp // no, we're done
+ ;; // avoid WAW on rp
+ mov out0=in0 // vector number
+ mov out1=in1 // cr.isr
+ mov out2=in2 // cr.ifa
+ mov out3=in3 // cr.iim
+ mov out4=in4 // cr.itir
+ mov in0=ar.pfs // preserve ar.pfs returned by load_switch_stack
+ br.call.sptk.few rp=ia64_fault // yup -> we need to invoke normal fault handler now
+.ret17:
+ mov ar.pfs=in0
+ mov rp=loc0
+ br.ret.sptk.many rp
+
+ .endp ia64_invoke_kdb_fault_handler
+
+#endif /* CONFIG_KDB */
+
+ .rodata
+ .align 8
+ .globl sys_call_table
+sys_call_table:
+ data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S.
+ data8 sys_exit // 1025
+ data8 sys_read
+ data8 sys_write
+ data8 sys_open
+ data8 sys_close
+ data8 sys_creat // 1030
+ data8 sys_link
+ data8 sys_unlink
+ data8 ia64_execve
+ data8 sys_chdir
+ data8 sys_fchdir // 1035
+ data8 sys_utimes
+ data8 sys_mknod
+ data8 sys_chmod
+ data8 sys_chown
+ data8 sys_lseek // 1040
+ data8 sys_getpid
+ data8 sys_getppid
+ data8 sys_mount
+ data8 sys_umount
+ data8 sys_setuid // 1045
+ data8 sys_getuid
+ data8 sys_geteuid
+ data8 sys_ptrace
+ data8 sys_access
+ data8 sys_sync // 1050
+ data8 sys_fsync
+ data8 sys_fdatasync
+ data8 sys_kill
+ data8 sys_rename
+ data8 sys_mkdir // 1055
+ data8 sys_rmdir
+ data8 sys_dup
+ data8 sys_pipe
+ data8 sys_times
+ data8 ia64_brk // 1060
+ data8 sys_setgid
+ data8 sys_getgid
+ data8 sys_getegid
+ data8 sys_acct
+ data8 sys_ioctl // 1065
+ data8 sys_fcntl
+ data8 sys_umask
+ data8 sys_chroot
+ data8 sys_ustat
+ data8 sys_dup2 // 1070
+ data8 sys_setreuid
+ data8 sys_setregid
+ data8 sys_getresuid
+ data8 sys_setresuid
+ data8 sys_getresgid // 1075
+ data8 sys_setresgid
+ data8 sys_getgroups
+ data8 sys_setgroups
+ data8 sys_getpgid
+ data8 sys_setpgid // 1080
+ data8 sys_setsid
+ data8 sys_getsid
+ data8 sys_sethostname
+ data8 sys_setrlimit
+ data8 sys_getrlimit // 1085
+ data8 sys_getrusage
+ data8 sys_gettimeofday
+ data8 sys_settimeofday
+ data8 sys_select
+ data8 sys_poll // 1090
+ data8 sys_symlink
+ data8 sys_readlink
+ data8 sys_uselib
+ data8 sys_swapon
+ data8 sys_swapoff // 1095
+ data8 sys_reboot
+ data8 sys_truncate
+ data8 sys_ftruncate
+ data8 sys_fchmod
+ data8 sys_fchown // 1100
+ data8 ia64_getpriority
+ data8 sys_setpriority
+ data8 sys_statfs
+ data8 sys_fstatfs
+ data8 sys_ioperm // 1105
+ data8 sys_semget
+ data8 sys_semop
+ data8 sys_semctl
+ data8 sys_msgget
+ data8 sys_msgsnd // 1110
+ data8 sys_msgrcv
+ data8 sys_msgctl
+ data8 sys_shmget
+ data8 ia64_shmat
+ data8 sys_shmdt // 1115
+ data8 sys_shmctl
+ data8 sys_syslog
+ data8 sys_setitimer
+ data8 sys_getitimer
+ data8 sys_newstat // 1120
+ data8 sys_newlstat
+ data8 sys_newfstat
+ data8 sys_vhangup
+ data8 sys_lchown
+ data8 sys_vm86 // 1125
+ data8 sys_wait4
+ data8 sys_sysinfo
+ data8 sys_clone
+ data8 sys_setdomainname
+ data8 sys_newuname // 1130
+ data8 sys_adjtimex
+ data8 sys_create_module
+ data8 sys_init_module
+ data8 sys_delete_module
+ data8 sys_get_kernel_syms // 1135
+ data8 sys_query_module
+ data8 sys_quotactl
+ data8 sys_bdflush
+ data8 sys_sysfs
+ data8 sys_personality // 1140
+ data8 ia64_ni_syscall // sys_afs_syscall
+ data8 sys_setfsuid
+ data8 sys_setfsgid
+ data8 sys_getdents
+ data8 sys_flock // 1145
+ data8 sys_readv
+ data8 sys_writev
+ data8 sys_pread
+ data8 sys_pwrite
+ data8 sys_sysctl // 1150
+ data8 sys_mmap
+ data8 sys_munmap
+ data8 sys_mlock
+ data8 sys_mlockall
+ data8 sys_mprotect // 1155
+ data8 sys_mremap
+ data8 sys_msync
+ data8 sys_munlock
+ data8 sys_munlockall
+ data8 sys_sched_getparam // 1160
+ data8 sys_sched_setparam
+ data8 sys_sched_getscheduler
+ data8 sys_sched_setscheduler
+ data8 sys_sched_yield
+ data8 sys_sched_get_priority_max // 1165
+ data8 sys_sched_get_priority_min
+ data8 sys_sched_rr_get_interval
+ data8 sys_nanosleep
+ data8 sys_nfsservctl
+ data8 sys_prctl // 1170
+ data8 sys_getpagesize
+ data8 sys_mmap2
+ data8 sys_pciconfig_read
+ data8 sys_pciconfig_write
+ data8 sys_perfmonctl // 1175
+ data8 sys_sigaltstack
+ data8 sys_rt_sigaction
+ data8 sys_rt_sigpending
+ data8 sys_rt_sigprocmask
+ data8 sys_rt_sigqueueinfo // 1180
+ data8 sys_rt_sigreturn
+ data8 sys_rt_sigsuspend
+ data8 sys_rt_sigtimedwait
+ data8 sys_getcwd
+ data8 sys_capget // 1185
+ data8 sys_capset
+ data8 sys_sendfile
+ data8 sys_ni_syscall // sys_getpmsg (STREAMS)
+ data8 sys_ni_syscall // sys_putpmsg (STREAMS)
+ data8 sys_socket // 1190
+ data8 sys_bind
+ data8 sys_connect
+ data8 sys_listen
+ data8 sys_accept
+ data8 sys_getsockname // 1195
+ data8 sys_getpeername
+ data8 sys_socketpair
+ data8 sys_send
+ data8 sys_sendto
+ data8 sys_recv // 1200
+ data8 sys_recvfrom
+ data8 sys_shutdown
+ data8 sys_setsockopt
+ data8 sys_getsockopt
+ data8 sys_sendmsg // 1205
+ data8 sys_recvmsg
+ data8 sys_pivot_root
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1210
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1215
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1220
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1225
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1230
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1235
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1240
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1245
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1250
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1255
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1260
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1265
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1270
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1275
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
new file mode 100644
index 000000000..ecef44f60
--- /dev/null
+++ b/arch/ia64/kernel/entry.h
@@ -0,0 +1,8 @@
+/*
+ * Preserved registers that are shared between code in ivt.S and entry.S. Be
+ * careful not to step on these!
+ */
+#define pEOI p1 /* should leave_kernel write EOI? */
+#define pKern p2 /* will leave_kernel return to kernel-mode? */
+#define pSys p4 /* are we processing a (synchronous) system call? */
+#define pNonSys p5 /* complement of pSys */
diff --git a/arch/ia64/kernel/fw-emu.c b/arch/ia64/kernel/fw-emu.c
new file mode 100644
index 000000000..212ff299c
--- /dev/null
+++ b/arch/ia64/kernel/fw-emu.c
@@ -0,0 +1,444 @@
+/*
+ * PAL & SAL emulation.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * For the HP simulator, this file gets include in boot/bootloader.c.
+ * For SoftSDV, this file gets included in sys_softsdv.c.
+ */
+#include <linux/config.h>
+
+#ifdef CONFIG_PCI
+# include <linux/pci.h>
+#endif
+
+#include <asm/efi.h>
+#include <asm/io.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+
+#define MB (1024*1024UL)
+
+#define NUM_MEM_DESCS 3
+
+static char fw_mem[( sizeof(efi_system_table_t)
+ + sizeof(efi_runtime_services_t)
+ + 1*sizeof(efi_config_table_t)
+ + sizeof(struct ia64_sal_systab)
+ + sizeof(struct ia64_sal_desc_entry_point)
+ + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t))
+ + 1024)] __attribute__ ((aligned (8)));
+
+#ifdef CONFIG_IA64_HP_SIM
+
+/* Simulator system calls: */
+
+#define SSC_EXIT 66
+
+/*
+ * Simulator system call.
+ */
+static long
+ssc (long arg0, long arg1, long arg2, long arg3, int nr)
+{
+ register long r8 asm ("r8");
+
+ asm volatile ("mov r15=%1\n\t"
+ "break 0x80001"
+ : "=r"(r8)
+ : "r"(nr), "r"(arg0), "r"(arg1), "r"(arg2), "r"(arg3));
+ return r8;
+}
+
+#define SECS_PER_HOUR (60 * 60)
+#define SECS_PER_DAY (SECS_PER_HOUR * 24)
+
+/* Compute the `struct tm' representation of *T,
+ offset OFFSET seconds east of UTC,
+ and store year, yday, mon, mday, wday, hour, min, sec into *TP.
+ Return nonzero if successful. */
+int
+offtime (unsigned long t, efi_time_t *tp)
+{
+ const unsigned short int __mon_yday[2][13] =
+ {
+ /* Normal years. */
+ { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+ /* Leap years. */
+ { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+ };
+ long int days, rem, y;
+ const unsigned short int *ip;
+
+ days = t / SECS_PER_DAY;
+ rem = t % SECS_PER_DAY;
+ while (rem < 0) {
+ rem += SECS_PER_DAY;
+ --days;
+ }
+ while (rem >= SECS_PER_DAY) {
+ rem -= SECS_PER_DAY;
+ ++days;
+ }
+ tp->hour = rem / SECS_PER_HOUR;
+ rem %= SECS_PER_HOUR;
+ tp->minute = rem / 60;
+ tp->second = rem % 60;
+ /* January 1, 1970 was a Thursday. */
+ y = 1970;
+
+# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+# define __isleap(year) \
+ ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+ while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
+ /* Guess a corrected year, assuming 365 days per year. */
+ long int yg = y + days / 365 - (days % 365 < 0);
+
+ /* Adjust DAYS and Y to match the guessed year. */
+ days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
+ - LEAPS_THRU_END_OF (y - 1));
+ y = yg;
+ }
+ tp->year = y;
+ ip = __mon_yday[__isleap(y)];
+ for (y = 11; days < (long int) ip[y]; --y)
+ continue;
+ days -= ip[y];
+ tp->month = y + 1;
+ tp->day = days + 1;
+ return 1;
+}
+
+#endif /* CONFIG_IA64_HP_SIM */
+
+/*
+ * Very ugly, but we need this in the simulator only. Once we run on
+ * real hw, this can all go away.
+ */
+extern void pal_emulator_static (void);
+
+asm ("
+ .proc pal_emulator_static
+pal_emulator_static:
+ mov r8=-1
+ cmp.eq p6,p7=6,r28 /* PAL_PTCE_INFO */
+(p7) br.cond.sptk.few 1f
+ ;;
+ mov r8=0 /* status = 0 */
+ movl r9=0x100000000 /* tc.base */
+ movl r10=0x0000000200000003 /* count[0], count[1] */
+ movl r11=0x1000000000002000 /* stride[0], stride[1] */
+ br.cond.sptk.few rp
+
+1: cmp.eq p6,p7=14,r28 /* PAL_FREQ_RATIOS */
+(p7) br.cond.sptk.few 1f
+ mov r8=0 /* status = 0 */
+ movl r9 =0x100000064 /* proc_ratio (1/100) */
+ movl r10=0x100000100 /* bus_ratio<<32 (1/256) */
+ movl r11=0x100000064 /* itc_ratio<<32 (1/100) */
+1: br.cond.sptk.few rp
+ .endp pal_emulator_static\n");
+
+/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
+
+#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3)
+
+#define REG_OFFSET(addr) (0x00000000000000FF & (addr))
+#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr))
+#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr))
+
+static efi_status_t
+efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+#ifdef CONFIG_IA64_HP_SIM
+ struct {
+ int tv_sec; /* must be 32bits to work */
+ int tv_usec;
+ } tv32bits;
+
+ ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
+
+ memset(tm, 0, sizeof(*tm));
+ offtime(tv32bits.tv_sec, tm);
+
+ if (tc)
+ memset(tc, 0, sizeof(*tc));
+#else
+# error Not implemented yet...
+#endif
+ return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data)
+{
+#ifdef CONFIG_IA64_HP_SIM
+ ssc(status, 0, 0, 0, SSC_EXIT);
+#else
+# error Not implemented yet...
+#endif
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+ return EFI_UNSUPPORTED;
+}
+
+static long
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+ unsigned long in3, unsigned long in4, unsigned long in5,
+ unsigned long in6, unsigned long in7)
+{
+ register long r9 asm ("r9") = 0;
+ register long r10 asm ("r10") = 0;
+ register long r11 asm ("r11") = 0;
+ long status;
+
+ /*
+ * Don't do a "switch" here since that gives us code that
+ * isn't self-relocatable.
+ */
+ status = 0;
+ if (index == SAL_FREQ_BASE) {
+ switch (in1) {
+ case SAL_FREQ_BASE_PLATFORM:
+ r9 = 100000000;
+ break;
+
+ case SAL_FREQ_BASE_INTERVAL_TIMER:
+ /*
+ * Is this supposed to be the cr.itc frequency
+ * or something platform specific? The SAL
+ * doc ain't exactly clear on this...
+ */
+#if defined(CONFIG_IA64_SOFTSDV_HACKS)
+ r9 = 4000000;
+#elif defined(CONFIG_IA64_SDV)
+ r9 = 300000000;
+#else
+ r9 = 700000000;
+#endif
+ break;
+
+ case SAL_FREQ_BASE_REALTIME_CLOCK:
+ r9 = 1;
+ break;
+
+ default:
+ status = -1;
+ break;
+ }
+ } else if (index == SAL_SET_VECTORS) {
+ ;
+ } else if (index == SAL_GET_STATE_INFO) {
+ ;
+ } else if (index == SAL_GET_STATE_INFO_SIZE) {
+ ;
+ } else if (index == SAL_CLEAR_STATE_INFO) {
+ ;
+ } else if (index == SAL_MC_RENDEZ) {
+ ;
+ } else if (index == SAL_MC_SET_PARAMS) {
+ ;
+ } else if (index == SAL_CACHE_FLUSH) {
+ ;
+ } else if (index == SAL_CACHE_INIT) {
+ ;
+#ifdef CONFIG_PCI
+ } else if (index == SAL_PCI_CONFIG_READ) {
+ /*
+ * in1 contains the PCI configuration address and in2
+ * the size of the read. The value that is read is
+ * returned via the general register r9.
+ */
+ outl(BUILD_CMD(in1), 0xCF8);
+ if (in2 == 1) /* Reading byte */
+ r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3)));
+ else if (in2 == 2) /* Reading word */
+ r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2)));
+ else /* Reading dword */
+ r9 = inl(0xCFC);
+ status = PCIBIOS_SUCCESSFUL;
+ } else if (index == SAL_PCI_CONFIG_WRITE) {
+ /*
+ * in1 contains the PCI configuration address, in2 the
+ * size of the write, and in3 the actual value to be
+ * written out.
+ */
+ outl(BUILD_CMD(in1), 0xCF8);
+ if (in2 == 1) /* Writing byte */
+ outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3)));
+ else if (in2 == 2) /* Writing word */
+ outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2)));
+ else /* Writing dword */
+ outl(in3, 0xCFC);
+ status = PCIBIOS_SUCCESSFUL;
+#endif /* CONFIG_PCI */
+ } else if (index == SAL_UPDATE_PAL) {
+ ;
+ } else {
+ status = -1;
+ }
+ asm volatile ("" :: "r"(r9), "r"(r10), "r"(r11));
+ return status;
+}
+
+
+/*
+ * This is here to work around a bug in egcs-1.1.1b that causes the
+ * compiler to crash (seems like a bug in the new alias analysis code.
+ */
+void *
+id (long addr)
+{
+ return (void *) addr;
+}
+
+void
+sys_fw_init (const char *args, int arglen)
+{
+ efi_system_table_t *efi_systab;
+ efi_runtime_services_t *efi_runtime;
+ efi_config_table_t *efi_tables;
+ struct ia64_sal_systab *sal_systab;
+ efi_memory_desc_t *efi_memmap, *md;
+ unsigned long *pal_desc, *sal_desc;
+ struct ia64_sal_desc_entry_point *sal_ed;
+ struct ia64_boot_param *bp;
+ unsigned char checksum = 0;
+ char *cp, *cmd_line;
+
+ memset(fw_mem, 0, sizeof(fw_mem));
+
+ pal_desc = (unsigned long *) &pal_emulator_static;
+ sal_desc = (unsigned long *) &sal_emulator;
+
+ cp = fw_mem;
+ efi_systab = (void *) cp; cp += sizeof(*efi_systab);
+ efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+ efi_tables = (void *) cp; cp += sizeof(*efi_tables);
+ sal_systab = (void *) cp; cp += sizeof(*sal_systab);
+ sal_ed = (void *) cp; cp += sizeof(*sal_ed);
+ efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
+ cmd_line = (void *) cp;
+
+ if (args) {
+ if (arglen >= 1024)
+ arglen = 1023;
+ memcpy(cmd_line, args, arglen);
+ } else {
+ arglen = 0;
+ }
+ cmd_line[arglen] = '\0';
+
+ memset(efi_systab, 0, sizeof(efi_systab));
+ efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+ efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION;
+ efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+ efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0");
+ efi_systab->fw_revision = 1;
+ efi_systab->runtime = __pa(efi_runtime);
+ efi_systab->nr_tables = 1;
+ efi_systab->tables = __pa(efi_tables);
+
+ efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+ efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+ efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+ efi_runtime->get_time = __pa(&efi_get_time);
+ efi_runtime->set_time = __pa(&efi_unimplemented);
+ efi_runtime->get_wakeup_time = __pa(&efi_unimplemented);
+ efi_runtime->set_wakeup_time = __pa(&efi_unimplemented);
+ efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented);
+ efi_runtime->get_variable = __pa(&efi_unimplemented);
+ efi_runtime->get_next_variable = __pa(&efi_unimplemented);
+ efi_runtime->set_variable = __pa(&efi_unimplemented);
+ efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented);
+ efi_runtime->reset_system = __pa(&efi_reset_system);
+
+ efi_tables->guid = SAL_SYSTEM_TABLE_GUID;
+ efi_tables->table = __pa(sal_systab);
+
+ /* fill in the SAL system table: */
+ memcpy(sal_systab->signature, "SST_", 4);
+ sal_systab->size = sizeof(*sal_systab);
+ sal_systab->sal_rev_minor = 1;
+ sal_systab->sal_rev_major = 0;
+ sal_systab->entry_count = 1;
+ sal_systab->ia32_bios_present = 0;
+
+#ifdef CONFIG_IA64_GENERIC
+ strcpy(sal_systab->oem_id, "Generic");
+ strcpy(sal_systab->product_id, "IA-64 system");
+#endif
+
+#ifdef CONFIG_IA64_HP_SIM
+ strcpy(sal_systab->oem_id, "Hewlett-Packard");
+ strcpy(sal_systab->product_id, "HP-simulator");
+#endif
+
+#ifdef CONFIG_IA64_SDV
+ strcpy(sal_systab->oem_id, "Intel");
+ strcpy(sal_systab->product_id, "SDV");
+#endif
+
+#ifdef CONFIG_IA64_SGI_SN1_SIM
+ strcpy(sal_systab->oem_id, "SGI");
+ strcpy(sal_systab->product_id, "SN1");
+#endif
+
+ /* fill in an entry point: */
+ sal_ed->type = SAL_DESC_ENTRY_POINT;
+ sal_ed->pal_proc = __pa(pal_desc[0]);
+ sal_ed->sal_proc = __pa(sal_desc[0]);
+ sal_ed->gp = __pa(sal_desc[1]);
+
+ for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+ checksum += *cp;
+
+ sal_systab->checksum = -checksum;
+
+ /* fill in a memory descriptor: */
+ md = &efi_memmap[0];
+ md->type = EFI_CONVENTIONAL_MEMORY;
+ md->pad = 0;
+ md->phys_addr = 2*MB;
+ md->virt_addr = 0;
+ md->num_pages = (64*MB) >> 12; /* 64MB (in 4KB pages) */
+ md->attribute = EFI_MEMORY_WB;
+
+ /* descriptor for firmware emulator: */
+ md = &efi_memmap[1];
+ md->type = EFI_RUNTIME_SERVICES_DATA;
+ md->pad = 0;
+ md->phys_addr = 1*MB;
+ md->virt_addr = 0;
+ md->num_pages = (1*MB) >> 12; /* 1MB (in 4KB pages) */
+ md->attribute = EFI_MEMORY_WB;
+
+ /* descriptor for high memory (>4GB): */
+ md = &efi_memmap[2];
+ md->type = EFI_CONVENTIONAL_MEMORY;
+ md->pad = 0;
+ md->phys_addr = 4096*MB;
+ md->virt_addr = 0;
+ md->num_pages = (32*MB) >> 12; /* 32MB (in 4KB pages) */
+ md->attribute = EFI_MEMORY_WB;
+
+ bp = id(ZERO_PAGE_ADDR);
+ bp->efi_systab = __pa(&fw_mem);
+ bp->efi_memmap = __pa(efi_memmap);
+ bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_version = 1;
+ bp->command_line = __pa(cmd_line);
+ bp->console_info.num_cols = 80;
+ bp->console_info.num_rows = 25;
+ bp->console_info.orig_x = 0;
+ bp->console_info.orig_y = 24;
+ bp->num_pci_vectors = 0;
+ bp->fpswa = 0;
+}
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
new file mode 100644
index 000000000..24dc10ee4
--- /dev/null
+++ b/arch/ia64/kernel/gate.S
@@ -0,0 +1,200 @@
+/*
+ * This file contains the code that gets mapped at the upper end of
+ * each task's text region. For now, it contains the signal
+ * trampoline code only.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/offsets.h>
+#include <asm/sigcontext.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .section __gate_section,"ax"
+
+ .align PAGE_SIZE
+
+# define SIGINFO_OFF 16
+# define SIGCONTEXT_OFF (SIGINFO_OFF + ((IA64_SIGINFO_SIZE + 15) & ~15))
+# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET
+# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET
+# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET
+# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET
+# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET
+# define base0 r2
+# define base1 r3
+ /*
+ * When we get here, the memory stack looks like this:
+ *
+ * +===============================+
+ * | |
+ * // struct sigcontext //
+ * | |
+ * +===============================+ <-- sp+SIGCONTEXT_OFF
+ * | |
+ * // rest of siginfo //
+ * | |
+ * + +---------------+
+ * | | siginfo.code |
+ * +---------------+---------------+
+ * | siginfo.errno | siginfo.signo |
+ * +-------------------------------+ <-- sp+SIGINFO_OFF
+ * | 16 byte of scratch |
+ * | space |
+ * +-------------------------------+ <-- sp
+ *
+ * The register stack looks _exactly_ the way it looked at the
+ * time the signal occurred. In other words, we're treading
+ * on a potential mine-field: each incoming general register
+ * may be a NaT value (includeing sp, in which case the process
+ * ends up dying with a SIGSEGV).
+ *
+ * The first need to do is a cover to get the registers onto
+ * the backing store. Once that is done, we invoke the signal
+ * handler which may modify some of the machine state. After
+ * returning from the signal handler, we return control to the
+ * previous context by executing a sigreturn system call. A
+ * signal handler may call the rt_sigreturn() function to
+ * directly return to a given sigcontext. However, the
+ * user-level sigreturn() needs to do much more than calling
+ * the rt_sigreturn() system call as it needs to unwind the
+ * stack to restore preserved registers that may have been
+ * saved on the signal handler's call stack.
+ *
+ * On entry:
+ * r2 = signal number
+ * r3 = plabel of signal handler
+ * r15 = new register backing store (ignored)
+ * [sp+16] = sigframe
+ */
+
+ .global ia64_sigtramp
+ .proc ia64_sigtramp
+ia64_sigtramp:
+ ld8 r10=[r3],8 // get signal handler entry point
+ br.call.sptk.many rp=invoke_sighandler
+.ret0: mov r15=__NR_rt_sigreturn
+ break __BREAK_SYSCALL
+ .endp ia64_sigramp
+
+ .proc invoke_sighandler
+invoke_sighandler:
+ ld8 gp=[r3] // get signal handler's global pointer
+ mov b6=r10
+ cover // push args in interrupted frame onto backing store
+ ;;
+ alloc r8=ar.pfs,0,1,3,0 // get CFM0, EC0, and CPL0 into r8
+ mov r17=ar.bsp // fetch ar.bsp
+ mov loc0=rp // save return pointer
+ ;;
+ cmp.ne p8,p0=r15,r0 // do we need to switch the rbs?
+ mov out0=r2 // signal number
+(p8) br.cond.spnt.few setup_rbs // yup -> (clobbers r14 and r16)
+back_from_setup_rbs:
+ adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ st8 [base0]=r17,(CFM_OFF-BSP_OFF) // save sc_ar_bsp
+ adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+ ;;
+
+ st8 [base0]=r8 // save CFM0, EC0, and CPL0
+ adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ stf.spill [base0]=f6,32
+ stf.spill [base1]=f7,32
+ ;;
+ stf.spill [base0]=f8,32
+ stf.spill [base1]=f9,32
+ ;;
+ stf.spill [base0]=f10,32
+ stf.spill [base1]=f11,32
+ adds out1=SIGINFO_OFF,sp // siginfo pointer
+ ;;
+ stf.spill [base0]=f12,32
+ stf.spill [base1]=f13,32
+ adds out2=SIGCONTEXT_OFF,sp // sigcontext pointer
+ ;;
+ stf.spill [base0]=f14,32
+ stf.spill [base1]=f15,32
+ br.call.sptk.few rp=b6 // call the signal handler
+.ret2: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ ld8 r15=[base0],(CFM_OFF-BSP_OFF) // fetch sc_ar_bsp and advance to CFM_OFF
+ mov r14=ar.bsp
+ ;;
+ ld8 r8=[base0] // restore (perhaps modified) CFM0, EC0, and CPL0
+ cmp.ne p8,p0=r14,r15 // do we need to restore the rbs?
+(p8) br.cond.spnt.few restore_rbs // yup -> (clobbers r14 and r16)
+back_from_restore_rbs:
+ {
+ and r9=0x7f,r8 // r9 <- CFM0.sof
+ extr.u r10=r8,7,7 // r10 <- CFM0.sol
+ mov r11=ip
+ }
+ ;;
+ adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+ adds r11=(cont-back_from_restore_rbs),r11
+ sub r9=r9,r10 // r9 <- CFM0.sof - CFM0.sol == CFM0.nout
+ ;;
+ adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+ dep r9=r9,r9,7,7 // r9.sol = r9.sof
+ mov b6=r11
+ ;;
+ ldf.fill f6=[base0],32
+ ldf.fill f7=[base1],32
+ mov rp=loc0 // copy return pointer out of stacked register
+ ;;
+ ldf.fill f8=[base0],32
+ ldf.fill f9=[base1],32
+ ;;
+ ldf.fill f10=[base0],32
+ ldf.fill f11=[base1],32
+ ;;
+ ldf.fill f12=[base0],32
+ ldf.fill f13=[base1],32
+ mov ar.pfs=r9
+ ;;
+ ldf.fill f14=[base0],32
+ ldf.fill f15=[base1],32
+ br.ret.sptk.few b6
+cont: mov ar.pfs=r8 // ar.pfs = CFM0
+ br.ret.sptk.few rp // re-establish CFM0
+ .endp invoke_signal_handler
+
+ .proc setup_rbs
+setup_rbs:
+ flushrs // must be first in insn
+ ;;
+ mov ar.rsc=r0 // put RSE into enforced lazy mode
+ adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp
+ mov r14=ar.rnat // get rnat as updated by flushrs
+ ;;
+ mov ar.bspstore=r15 // set new register backing store area
+ st8 [r16]=r14 // save sc_ar_rnat
+ ;;
+ mov ar.rsc=0xf // set RSE into eager mode, pl 3
+ invala // invalidate ALAT
+ br.cond.sptk.many back_from_setup_rbs
+
+ .proc restore_rbs
+restore_rbs:
+ flushrs
+ mov ar.rsc=r0 // put RSE into enforced lazy mode
+ adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ ld8 r14=[r16] // get new rnat
+ mov ar.bspstore=r15 // set old register backing store area
+ ;;
+ mov ar.rnat=r14 // establish new rnat
+ mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc)
+ // invala not necessary as that will happen when returning to user-mode
+ br.cond.sptk.many back_from_restore_rbs
+
+ .endp restore_rbs
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
new file mode 100644
index 000000000..50d965e02
--- /dev/null
+++ b/arch/ia64/kernel/head.S
@@ -0,0 +1,646 @@
+/*
+ * Here is where the ball gets rolling as far as the kernel is concerned.
+ * When control is transferred to _start, the bootload has already
+ * loaded us to the correct address. All that's left to do here is
+ * to set up the kernel's global pointer and jump to the kernel
+ * entry point.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/fpu.h>
+#include <asm/pal.h>
+#include <asm/offsets.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .section __special_page_section,"ax"
+
+ .global empty_zero_page
+empty_zero_page:
+ .skip PAGE_SIZE
+
+ .global swapper_pg_dir
+swapper_pg_dir:
+ .skip PAGE_SIZE
+
+ .global empty_bad_page
+empty_bad_page:
+ .skip PAGE_SIZE
+
+ .global empty_bad_pte_table
+empty_bad_pte_table:
+ .skip PAGE_SIZE
+
+ .global empty_bad_pmd_table
+empty_bad_pmd_table:
+ .skip PAGE_SIZE
+
+ .rodata
+halt_msg:
+ stringz "Halting kernel\n"
+
+ .text
+ .align 16
+ .global _start
+ .proc _start
+_start:
+ // set IVT entry point---can't access I/O ports without it
+ movl r3=ia64_ivt
+ ;;
+ mov cr.iva=r3
+ movl r2=FPSR_DEFAULT
+ ;;
+ srlz.i
+ movl gp=__gp
+
+ mov ar.fpsr=r2
+ ;;
+
+#ifdef CONFIG_IA64_EARLY_PRINTK
+ mov r2=6
+ mov r3=(8<<8) | (28<<2)
+ ;;
+ mov rr[r2]=r3
+ ;;
+ srlz.i
+ ;;
+#endif
+
+#define isAP p2 // are we booting an Application Processor (not the BSP)?
+
+ // Find the init_task for the currently booting CPU. At poweron, and in
+ // UP mode, cpu_now_booting is 0
+ movl r3=cpu_now_booting
+ ;;
+ ld4 r3=[r3]
+ movl r2=init_tasks
+ ;;
+ shladd r2=r3,3,r2
+ ;;
+ ld8 r2=[r2]
+ cmp4.ne isAP,p0=r3,r0 // p9 == true if this is an application processor (ap)
+ ;; // RAW on r2
+ extr r3=r2,0,61 // r3 == phys addr of task struct
+ ;;
+
+ // load the "current" pointer (r13) and ar.k6 with the current task
+ mov r13=r2
+ mov ar.k6=r3 // Physical address
+ ;;
+ /*
+ * Reserve space at the top of the stack for "struct pt_regs". Kernel threads
+ * don't store interesting values in that structure, but the space still needs
+ * to be there because time-critical stuff such as the context switching can
+ * be implemented more efficiently (for example, __switch_to()
+ * always sets the psr.dfh bit of the task it is switching to).
+ */
+ addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
+ addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE
+ mov ar.rsc=r0 // place RSE in enforced lazy mode
+ ;;
+ mov ar.bspstore=r2 // establish the new RSE stack
+ ;;
+ loadrs // load zero bytes from the register stack
+ ;;
+ mov ar.rsc=0x3 // place RSE in eager mode
+ ;;
+
+#ifdef CONFIG_IA64_EARLY_PRINTK
+ .rodata
+alive_msg:
+ stringz "I'm alive and well\n"
+ .previous
+
+ alloc r2=ar.pfs,0,0,2,0
+ movl out0=alive_msg
+ ;;
+ br.call.sptk.few rp=early_printk
+1: // force new bundle
+#endif /* CONFIG_IA64_EARLY_PRINTK */
+
+ alloc r2=ar.pfs,8,0,2,0
+#ifdef CONFIG_SMP
+(isAP) br.call.sptk.few rp=smp_callin
+.ret1:
+(isAP) br.cond.sptk.few self
+#endif
+
+#undef isAP
+
+ // This is executed by the bootstrap processor (bsp) only:
+
+#ifdef CONFIG_IA64_FW_EMU
+ // initialize PAL & SAL emulator:
+ br.call.sptk.few rp=sys_fw_init
+ ;;
+#endif
+ br.call.sptk.few rp=start_kernel
+.ret2:
+ addl r2=@ltoff(halt_msg),gp
+ ;;
+ ld8 out0=[r2]
+ br.call.sptk.few b0=console_print
+self: br.sptk.few self // endless loop
+ .endp _start
+
+ .align 16
+ .global ia64_save_debug_regs
+ .proc ia64_save_debug_regs
+ia64_save_debug_regs:
+ alloc r16=ar.pfs,1,0,0,0
+ mov r20=ar.lc // preserve ar.lc
+ mov ar.lc=IA64_NUM_DBG_REGS-1
+ mov r18=0
+ add r19=IA64_NUM_DBG_REGS*8,in0
+ ;;
+1: mov r16=dbr[r18]
+ mov r17=ibr[r18]
+ add r18=1,r18
+ ;;
+ st8.nta [in0]=r16,8
+ st8.nta [r19]=r17,8
+ br.cloop.sptk.few 1b
+
+ mov ar.lc=r20 // restore ar.lc
+ br.ret.sptk.few b0
+ .endp ia64_save_debug_regs
+
+ .align 16
+ .global ia64_load_debug_regs
+ .proc ia64_load_debug_regs
+ia64_load_debug_regs:
+ alloc r16=ar.pfs,1,0,0,0
+ lfetch.nta [in0]
+ mov r20=ar.lc // preserve ar.lc
+ add r19=IA64_NUM_DBG_REGS*8,in0
+ mov ar.lc=IA64_NUM_DBG_REGS-1
+ mov r18=-1
+ ;;
+1: ld8.nta r16=[in0],8
+ ld8.nta r17=[r19],8
+ add r18=1,r18
+ ;;
+ mov dbr[r18]=r16
+ mov ibr[r18]=r17
+ br.cloop.sptk.few 1b
+
+ mov ar.lc=r20 // restore ar.lc
+ br.ret.sptk.few b0
+ .endp ia64_load_debug_regs
+
+ .align 16
+ .global __ia64_save_fpu
+ .proc __ia64_save_fpu
+__ia64_save_fpu:
+ alloc r2=ar.pfs,1,0,0,0
+ adds r3=16,in0
+ ;;
+ stf.spill.nta [in0]=f32,32
+ stf.spill.nta [ r3]=f33,32
+ ;;
+ stf.spill.nta [in0]=f34,32
+ stf.spill.nta [ r3]=f35,32
+ ;;
+ stf.spill.nta [in0]=f36,32
+ stf.spill.nta [ r3]=f37,32
+ ;;
+ stf.spill.nta [in0]=f38,32
+ stf.spill.nta [ r3]=f39,32
+ ;;
+ stf.spill.nta [in0]=f40,32
+ stf.spill.nta [ r3]=f41,32
+ ;;
+ stf.spill.nta [in0]=f42,32
+ stf.spill.nta [ r3]=f43,32
+ ;;
+ stf.spill.nta [in0]=f44,32
+ stf.spill.nta [ r3]=f45,32
+ ;;
+ stf.spill.nta [in0]=f46,32
+ stf.spill.nta [ r3]=f47,32
+ ;;
+ stf.spill.nta [in0]=f48,32
+ stf.spill.nta [ r3]=f49,32
+ ;;
+ stf.spill.nta [in0]=f50,32
+ stf.spill.nta [ r3]=f51,32
+ ;;
+ stf.spill.nta [in0]=f52,32
+ stf.spill.nta [ r3]=f53,32
+ ;;
+ stf.spill.nta [in0]=f54,32
+ stf.spill.nta [ r3]=f55,32
+ ;;
+ stf.spill.nta [in0]=f56,32
+ stf.spill.nta [ r3]=f57,32
+ ;;
+ stf.spill.nta [in0]=f58,32
+ stf.spill.nta [ r3]=f59,32
+ ;;
+ stf.spill.nta [in0]=f60,32
+ stf.spill.nta [ r3]=f61,32
+ ;;
+ stf.spill.nta [in0]=f62,32
+ stf.spill.nta [ r3]=f63,32
+ ;;
+ stf.spill.nta [in0]=f64,32
+ stf.spill.nta [ r3]=f65,32
+ ;;
+ stf.spill.nta [in0]=f66,32
+ stf.spill.nta [ r3]=f67,32
+ ;;
+ stf.spill.nta [in0]=f68,32
+ stf.spill.nta [ r3]=f69,32
+ ;;
+ stf.spill.nta [in0]=f70,32
+ stf.spill.nta [ r3]=f71,32
+ ;;
+ stf.spill.nta [in0]=f72,32
+ stf.spill.nta [ r3]=f73,32
+ ;;
+ stf.spill.nta [in0]=f74,32
+ stf.spill.nta [ r3]=f75,32
+ ;;
+ stf.spill.nta [in0]=f76,32
+ stf.spill.nta [ r3]=f77,32
+ ;;
+ stf.spill.nta [in0]=f78,32
+ stf.spill.nta [ r3]=f79,32
+ ;;
+ stf.spill.nta [in0]=f80,32
+ stf.spill.nta [ r3]=f81,32
+ ;;
+ stf.spill.nta [in0]=f82,32
+ stf.spill.nta [ r3]=f83,32
+ ;;
+ stf.spill.nta [in0]=f84,32
+ stf.spill.nta [ r3]=f85,32
+ ;;
+ stf.spill.nta [in0]=f86,32
+ stf.spill.nta [ r3]=f87,32
+ ;;
+ stf.spill.nta [in0]=f88,32
+ stf.spill.nta [ r3]=f89,32
+ ;;
+ stf.spill.nta [in0]=f90,32
+ stf.spill.nta [ r3]=f91,32
+ ;;
+ stf.spill.nta [in0]=f92,32
+ stf.spill.nta [ r3]=f93,32
+ ;;
+ stf.spill.nta [in0]=f94,32
+ stf.spill.nta [ r3]=f95,32
+ ;;
+ stf.spill.nta [in0]=f96,32
+ stf.spill.nta [ r3]=f97,32
+ ;;
+ stf.spill.nta [in0]=f98,32
+ stf.spill.nta [ r3]=f99,32
+ ;;
+ stf.spill.nta [in0]=f100,32
+ stf.spill.nta [ r3]=f101,32
+ ;;
+ stf.spill.nta [in0]=f102,32
+ stf.spill.nta [ r3]=f103,32
+ ;;
+ stf.spill.nta [in0]=f104,32
+ stf.spill.nta [ r3]=f105,32
+ ;;
+ stf.spill.nta [in0]=f106,32
+ stf.spill.nta [ r3]=f107,32
+ ;;
+ stf.spill.nta [in0]=f108,32
+ stf.spill.nta [ r3]=f109,32
+ ;;
+ stf.spill.nta [in0]=f110,32
+ stf.spill.nta [ r3]=f111,32
+ ;;
+ stf.spill.nta [in0]=f112,32
+ stf.spill.nta [ r3]=f113,32
+ ;;
+ stf.spill.nta [in0]=f114,32
+ stf.spill.nta [ r3]=f115,32
+ ;;
+ stf.spill.nta [in0]=f116,32
+ stf.spill.nta [ r3]=f117,32
+ ;;
+ stf.spill.nta [in0]=f118,32
+ stf.spill.nta [ r3]=f119,32
+ ;;
+ stf.spill.nta [in0]=f120,32
+ stf.spill.nta [ r3]=f121,32
+ ;;
+ stf.spill.nta [in0]=f122,32
+ stf.spill.nta [ r3]=f123,32
+ ;;
+ stf.spill.nta [in0]=f124,32
+ stf.spill.nta [ r3]=f125,32
+ ;;
+ stf.spill.nta [in0]=f126,32
+ stf.spill.nta [ r3]=f127,32
+ br.ret.sptk.few rp
+ .endp __ia64_save_fpu
+
+ .align 16
+ .global __ia64_load_fpu
+ .proc __ia64_load_fpu
+__ia64_load_fpu:
+ alloc r2=ar.pfs,1,0,0,0
+ adds r3=16,in0
+ ;;
+ ldf.fill.nta f32=[in0],32
+ ldf.fill.nta f33=[ r3],32
+ ;;
+ ldf.fill.nta f34=[in0],32
+ ldf.fill.nta f35=[ r3],32
+ ;;
+ ldf.fill.nta f36=[in0],32
+ ldf.fill.nta f37=[ r3],32
+ ;;
+ ldf.fill.nta f38=[in0],32
+ ldf.fill.nta f39=[ r3],32
+ ;;
+ ldf.fill.nta f40=[in0],32
+ ldf.fill.nta f41=[ r3],32
+ ;;
+ ldf.fill.nta f42=[in0],32
+ ldf.fill.nta f43=[ r3],32
+ ;;
+ ldf.fill.nta f44=[in0],32
+ ldf.fill.nta f45=[ r3],32
+ ;;
+ ldf.fill.nta f46=[in0],32
+ ldf.fill.nta f47=[ r3],32
+ ;;
+ ldf.fill.nta f48=[in0],32
+ ldf.fill.nta f49=[ r3],32
+ ;;
+ ldf.fill.nta f50=[in0],32
+ ldf.fill.nta f51=[ r3],32
+ ;;
+ ldf.fill.nta f52=[in0],32
+ ldf.fill.nta f53=[ r3],32
+ ;;
+ ldf.fill.nta f54=[in0],32
+ ldf.fill.nta f55=[ r3],32
+ ;;
+ ldf.fill.nta f56=[in0],32
+ ldf.fill.nta f57=[ r3],32
+ ;;
+ ldf.fill.nta f58=[in0],32
+ ldf.fill.nta f59=[ r3],32
+ ;;
+ ldf.fill.nta f60=[in0],32
+ ldf.fill.nta f61=[ r3],32
+ ;;
+ ldf.fill.nta f62=[in0],32
+ ldf.fill.nta f63=[ r3],32
+ ;;
+ ldf.fill.nta f64=[in0],32
+ ldf.fill.nta f65=[ r3],32
+ ;;
+ ldf.fill.nta f66=[in0],32
+ ldf.fill.nta f67=[ r3],32
+ ;;
+ ldf.fill.nta f68=[in0],32
+ ldf.fill.nta f69=[ r3],32
+ ;;
+ ldf.fill.nta f70=[in0],32
+ ldf.fill.nta f71=[ r3],32
+ ;;
+ ldf.fill.nta f72=[in0],32
+ ldf.fill.nta f73=[ r3],32
+ ;;
+ ldf.fill.nta f74=[in0],32
+ ldf.fill.nta f75=[ r3],32
+ ;;
+ ldf.fill.nta f76=[in0],32
+ ldf.fill.nta f77=[ r3],32
+ ;;
+ ldf.fill.nta f78=[in0],32
+ ldf.fill.nta f79=[ r3],32
+ ;;
+ ldf.fill.nta f80=[in0],32
+ ldf.fill.nta f81=[ r3],32
+ ;;
+ ldf.fill.nta f82=[in0],32
+ ldf.fill.nta f83=[ r3],32
+ ;;
+ ldf.fill.nta f84=[in0],32
+ ldf.fill.nta f85=[ r3],32
+ ;;
+ ldf.fill.nta f86=[in0],32
+ ldf.fill.nta f87=[ r3],32
+ ;;
+ ldf.fill.nta f88=[in0],32
+ ldf.fill.nta f89=[ r3],32
+ ;;
+ ldf.fill.nta f90=[in0],32
+ ldf.fill.nta f91=[ r3],32
+ ;;
+ ldf.fill.nta f92=[in0],32
+ ldf.fill.nta f93=[ r3],32
+ ;;
+ ldf.fill.nta f94=[in0],32
+ ldf.fill.nta f95=[ r3],32
+ ;;
+ ldf.fill.nta f96=[in0],32
+ ldf.fill.nta f97=[ r3],32
+ ;;
+ ldf.fill.nta f98=[in0],32
+ ldf.fill.nta f99=[ r3],32
+ ;;
+ ldf.fill.nta f100=[in0],32
+ ldf.fill.nta f101=[ r3],32
+ ;;
+ ldf.fill.nta f102=[in0],32
+ ldf.fill.nta f103=[ r3],32
+ ;;
+ ldf.fill.nta f104=[in0],32
+ ldf.fill.nta f105=[ r3],32
+ ;;
+ ldf.fill.nta f106=[in0],32
+ ldf.fill.nta f107=[ r3],32
+ ;;
+ ldf.fill.nta f108=[in0],32
+ ldf.fill.nta f109=[ r3],32
+ ;;
+ ldf.fill.nta f110=[in0],32
+ ldf.fill.nta f111=[ r3],32
+ ;;
+ ldf.fill.nta f112=[in0],32
+ ldf.fill.nta f113=[ r3],32
+ ;;
+ ldf.fill.nta f114=[in0],32
+ ldf.fill.nta f115=[ r3],32
+ ;;
+ ldf.fill.nta f116=[in0],32
+ ldf.fill.nta f117=[ r3],32
+ ;;
+ ldf.fill.nta f118=[in0],32
+ ldf.fill.nta f119=[ r3],32
+ ;;
+ ldf.fill.nta f120=[in0],32
+ ldf.fill.nta f121=[ r3],32
+ ;;
+ ldf.fill.nta f122=[in0],32
+ ldf.fill.nta f123=[ r3],32
+ ;;
+ ldf.fill.nta f124=[in0],32
+ ldf.fill.nta f125=[ r3],32
+ ;;
+ ldf.fill.nta f126=[in0],32
+ ldf.fill.nta f127=[ r3],32
+ br.ret.sptk.few rp
+ .endp __ia64_load_fpu
+
+ .align 16
+ .global __ia64_init_fpu
+ .proc __ia64_init_fpu
+__ia64_init_fpu:
+ alloc r2=ar.pfs,0,0,0,0
+ stf.spill [sp]=f0
+ mov f32=f0
+ ;;
+ ldf.fill f33=[sp]
+ ldf.fill f34=[sp]
+ mov f35=f0
+ ;;
+ ldf.fill f36=[sp]
+ ldf.fill f37=[sp]
+ mov f38=f0
+ ;;
+ ldf.fill f39=[sp]
+ ldf.fill f40=[sp]
+ mov f41=f0
+ ;;
+ ldf.fill f42=[sp]
+ ldf.fill f43=[sp]
+ mov f44=f0
+ ;;
+ ldf.fill f45=[sp]
+ ldf.fill f46=[sp]
+ mov f47=f0
+ ;;
+ ldf.fill f48=[sp]
+ ldf.fill f49=[sp]
+ mov f50=f0
+ ;;
+ ldf.fill f51=[sp]
+ ldf.fill f52=[sp]
+ mov f53=f0
+ ;;
+ ldf.fill f54=[sp]
+ ldf.fill f55=[sp]
+ mov f56=f0
+ ;;
+ ldf.fill f57=[sp]
+ ldf.fill f58=[sp]
+ mov f59=f0
+ ;;
+ ldf.fill f60=[sp]
+ ldf.fill f61=[sp]
+ mov f62=f0
+ ;;
+ ldf.fill f63=[sp]
+ ldf.fill f64=[sp]
+ mov f65=f0
+ ;;
+ ldf.fill f66=[sp]
+ ldf.fill f67=[sp]
+ mov f68=f0
+ ;;
+ ldf.fill f69=[sp]
+ ldf.fill f70=[sp]
+ mov f71=f0
+ ;;
+ ldf.fill f72=[sp]
+ ldf.fill f73=[sp]
+ mov f74=f0
+ ;;
+ ldf.fill f75=[sp]
+ ldf.fill f76=[sp]
+ mov f77=f0
+ ;;
+ ldf.fill f78=[sp]
+ ldf.fill f79=[sp]
+ mov f80=f0
+ ;;
+ ldf.fill f81=[sp]
+ ldf.fill f82=[sp]
+ mov f83=f0
+ ;;
+ ldf.fill f84=[sp]
+ ldf.fill f85=[sp]
+ mov f86=f0
+ ;;
+ ldf.fill f87=[sp]
+ ldf.fill f88=[sp]
+ mov f89=f0
+ ;;
+ ldf.fill f90=[sp]
+ ldf.fill f91=[sp]
+ mov f92=f0
+ ;;
+ ldf.fill f93=[sp]
+ ldf.fill f94=[sp]
+ mov f95=f0
+ ;;
+ ldf.fill f96=[sp]
+ ldf.fill f97=[sp]
+ mov f98=f0
+ ;;
+ ldf.fill f99=[sp]
+ ldf.fill f100=[sp]
+ mov f101=f0
+ ;;
+ ldf.fill f102=[sp]
+ ldf.fill f103=[sp]
+ mov f104=f0
+ ;;
+ ldf.fill f105=[sp]
+ ldf.fill f106=[sp]
+ mov f107=f0
+ ;;
+ ldf.fill f108=[sp]
+ ldf.fill f109=[sp]
+ mov f110=f0
+ ;;
+ ldf.fill f111=[sp]
+ ldf.fill f112=[sp]
+ mov f113=f0
+ ;;
+ ldf.fill f114=[sp]
+ ldf.fill f115=[sp]
+ mov f116=f0
+ ;;
+ ldf.fill f117=[sp]
+ ldf.fill f118=[sp]
+ mov f119=f0
+ ;;
+ ldf.fill f120=[sp]
+ ldf.fill f121=[sp]
+ mov f122=f0
+ ;;
+ ldf.fill f123=[sp]
+ ldf.fill f124=[sp]
+ mov f125=f0
+ ;;
+ ldf.fill f126=[sp]
+ mov f127=f0
+ br.ret.sptk.few rp
+ .endp __ia64_init_fpu
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
new file mode 100644
index 000000000..122650461
--- /dev/null
+++ b/arch/ia64/kernel/init_task.c
@@ -0,0 +1,31 @@
+/*
+ * This is where we statically allocate and initialize the initial
+ * task.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct vm_area_struct init_mmap = INIT_MMAP;
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS;
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is page aligned due to the way
+ * process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union task_union init_task_union
+ __attribute__((section("init_task"))) =
+ { INIT_TASK(init_task_union.task) };
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
new file mode 100644
index 000000000..01c201137
--- /dev/null
+++ b/arch/ia64/kernel/irq.c
@@ -0,0 +1,657 @@
+/*
+ * linux/arch/ia64/kernel/irq.c
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 6/10/99: Updated to bring in sync with x86 version to facilitate
+ * support for SMP and different interrupt controllers.
+ */
+
+#include <linux/config.h>
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/malloc.h>
+#include <linux/ptrace.h>
+#include <linux/random.h> /* for rand_initialize_irq() */
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/threads.h>
+
+#ifdef CONFIG_KDB
+# include <linux/kdb.h>
+#endif
+
+#include <asm/bitops.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+/* This is used to detect bad usage of probe_irq_on()/probe_irq_off(). */
+#define PROBE_IRQ_COOKIE 0xfeedC0FFEE
+
+struct irq_desc irq_desc[NR_IRQS];
+
+/*
+ * Micro-access to controllers is serialized over the whole
+ * system. We never hold this lock when we call the actual
+ * IRQ handler.
+ */
+spinlock_t irq_controller_lock;
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+spinlock_t ivr_read_lock;
+#endif
+
+unsigned int local_bh_count[NR_CPUS];
+/*
+ * used in irq_enter()/irq_exit()
+ */
+unsigned int local_irq_count[NR_CPUS];
+
+static struct irqaction timer_action = { NULL, 0, 0, NULL, NULL, NULL};
+
+#ifdef CONFIG_SMP
+static struct irqaction ipi_action = { NULL, 0, 0, NULL, NULL, NULL};
+#endif
+
+/*
+ * Legacy IRQ to IA-64 vector translation table. Any vector not in
+ * this table maps to itself (ie: irq 0x30 => IA64 vector 0x30)
+ */
+__u8 irq_to_vector_map[IA64_MIN_VECTORED_IRQ] = {
+ /* 8259 IRQ translation, first 16 entries */
+ TIMER_IRQ, 0x50, 0x0f, 0x51, 0x52, 0x53, 0x43, 0x54,
+ 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x40, 0x41,
+};
+
+/*
+ * Reverse of the above table.
+ */
+static __u8 vector_to_legacy_map[256];
+
+/*
+ * used by proc fs (/proc/interrupts)
+ */
+int
+get_irq_list (char *buf)
+{
+ int i;
+ struct irqaction * action;
+ char *p = buf;
+
+#ifdef CONFIG_SMP
+ p += sprintf(p, " ");
+ for (i = 0; i < smp_num_cpus; i++)
+ p += sprintf(p, "CPU%d ", i);
+ *p++ = '\n';
+#endif
+ /*
+ * Simply scans the external vectored interrupts
+ */
+ for (i = 0; i < NR_IRQS; i++) {
+ action = irq_desc[i].action;
+ if (!action)
+ continue;
+ p += sprintf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ p += sprintf(p, "%10u ", kstat_irqs(i));
+#else
+ {
+ int j;
+ for (j = 0; j < smp_num_cpus; j++)
+ p += sprintf(p, "%10u ",
+ kstat.irqs[cpu_logical_map(j)][i]);
+ }
+#endif
+ p += sprintf(p, " %14s", irq_desc[i].handler->typename);
+ p += sprintf(p, " %c%s", (action->flags & SA_INTERRUPT) ? '+' : ' ',
+ action->name);
+
+ for (action = action->next; action; action = action->next) {
+ p += sprintf(p, ", %c%s",
+ (action->flags & SA_INTERRUPT)?'+':' ',
+ action->name);
+ }
+ *p++ = '\n';
+ }
+ return p - buf;
+}
+
+/*
+ * That's where the IVT branches when we get an external
+ * interrupt. This branches to the correct hardware IRQ handler via
+ * function ptr.
+ */
+void
+ia64_handle_irq (unsigned long irq, struct pt_regs *regs)
+{
+ unsigned long bsp, sp, saved_tpr;
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+# ifndef CONFIG_SMP
+ static unsigned int max_prio = 0;
+# endif
+ unsigned int prev_prio;
+ unsigned long eoi_ptr;
+
+# ifdef CONFIG_USB
+ disable_usb();
+# endif
+ /*
+ * Stop IPIs by getting the ivr_read_lock
+ */
+ spin_lock(&ivr_read_lock);
+
+ /*
+ * Disable PCI writes
+ */
+ outl(0x80ff81c0, 0xcf8);
+ outl(0x73002188, 0xcfc);
+ eoi_ptr = inl(0xcfc);
+
+ irq = ia64_get_ivr();
+
+ /*
+ * Enable PCI writes
+ */
+ outl(0x73182188, 0xcfc);
+
+ spin_unlock(&ivr_read_lock);
+
+# ifdef CONFIG_USB
+ reenable_usb();
+# endif
+
+# ifndef CONFIG_SMP
+ prev_prio = max_prio;
+ if (irq < max_prio) {
+ printk ("ia64_handle_irq: got irq %lu while %u was in progress!\n",
+ irq, max_prio);
+
+ } else
+ max_prio = irq;
+# endif /* !CONFIG_SMP */
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+ /* Always set TPR to limit maximum interrupt nesting depth to
+ * 16 (without this, it would be ~240, which could easily lead
+ * to kernel stack overflows.
+ */
+ saved_tpr = ia64_get_tpr();
+ ia64_srlz_d();
+ ia64_set_tpr(irq);
+ ia64_srlz_d();
+
+ asm ("mov %0=ar.bsp" : "=r"(bsp));
+ asm ("mov %0=sp" : "=r"(sp));
+
+ if ((sp - bsp) < 1024) {
+ static long last_time;
+ static unsigned char count;
+
+ if (count > 5 && jiffies - last_time > 5*HZ)
+ count = 0;
+ if (++count < 5) {
+ last_time = jiffies;
+ printk("ia64_handle_irq: DANGER: less than 1KB of free stack space!!\n"
+ "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
+ }
+#ifdef CONFIG_KDB
+ kdb(KDB_REASON_PANIC, 0, regs);
+#endif
+ }
+
+ /*
+ * The interrupt is now said to be in service
+ */
+ if (irq >= NR_IRQS) {
+ printk("handle_irq: invalid irq=%lu\n", irq);
+ goto out;
+ }
+
+ ++kstat.irqs[smp_processor_id()][irq];
+
+ if (irq == IA64_SPURIOUS_INT) {
+ printk("handle_irq: spurious interrupt\n");
+ goto out;
+ }
+
+ /*
+ * Handle the interrupt by calling the hardware specific handler (IOSAPIC, Internal, etc).
+ */
+ (*irq_desc[irq].handler->handle)(irq, regs);
+ out:
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ {
+ long pEOI;
+
+ asm ("mov %0=0;; (p1) mov %0=1" : "=r"(pEOI));
+ if (!pEOI) {
+ printk("Yikes: ia64_handle_irq() without pEOI!!\n");
+ asm volatile ("cmp.eq p1,p0=r0,r0" : "=r"(pEOI));
+# ifdef CONFIG_KDB
+ kdb(KDB_REASON_PANIC, 0, regs);
+# endif
+ }
+ }
+
+ local_irq_disable();
+# ifndef CONFIG_SMP
+ if (max_prio == irq)
+ max_prio = prev_prio;
+# endif /* !CONFIG_SMP */
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+ ia64_srlz_d();
+ ia64_set_tpr(saved_tpr);
+ ia64_srlz_d();
+}
+
+
+/*
+ * This should really return information about whether we should do
+ * bottom half handling etc. Right now we end up _always_ checking the
+ * bottom half, which is a waste of time and is not what some drivers
+ * would prefer.
+ */
+int
+invoke_irq_handlers (unsigned int irq, struct pt_regs *regs, struct irqaction *action)
+{
+ void (*handler)(int, void *, struct pt_regs *);
+ unsigned long flags, flags_union = 0;
+ int cpu = smp_processor_id();
+ unsigned int requested_irq;
+ void *dev_id;
+
+ irq_enter(cpu, irq);
+
+ if ((action->flags & SA_INTERRUPT) == 0)
+ __sti();
+
+ do {
+ flags = action->flags;
+ requested_irq = irq;
+ if ((flags & SA_LEGACY) != 0)
+ requested_irq = vector_to_legacy_map[irq];
+ flags_union |= flags;
+ handler = action->handler;
+ dev_id = action->dev_id;
+ action = action->next;
+ (*handler)(requested_irq, dev_id, regs);
+ } while (action);
+ if ((flags_union & SA_SAMPLE_RANDOM) != 0)
+ add_interrupt_randomness(irq);
+ __cli();
+
+ irq_exit(cpu, irq);
+ return flags_union | 1; /* force the "do bottom halves" bit */
+}
+
+void
+disable_irq_nosync (unsigned int irq)
+{
+ unsigned long flags;
+
+ irq = map_legacy_irq(irq);
+
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ if (irq_desc[irq].depth++ > 0) {
+ irq_desc[irq].status &= ~IRQ_ENABLED;
+ irq_desc[irq].handler->disable(irq);
+ }
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+/*
+ * Synchronous version of the above, making sure the IRQ is
+ * no longer running on any other IRQ..
+ */
+void
+disable_irq (unsigned int irq)
+{
+ disable_irq_nosync(irq);
+
+ irq = map_legacy_irq(irq);
+
+ if (!local_irq_count[smp_processor_id()]) {
+ do {
+ barrier();
+ } while ((irq_desc[irq].status & IRQ_INPROGRESS) != 0);
+ }
+}
+
+void
+enable_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ irq = map_legacy_irq(irq);
+
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ switch (irq_desc[irq].depth) {
+ case 1:
+ irq_desc[irq].status |= IRQ_ENABLED;
+ (*irq_desc[irq].handler->enable)(irq);
+ /* fall through */
+ default:
+ --irq_desc[irq].depth;
+ break;
+
+ case 0:
+ printk("enable_irq: unbalanced from %p\n", __builtin_return_address(0));
+ }
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+/*
+ * This function encapsulates the initialization that needs to be
+ * performed under the protection of lock irq_controller_lock. The
+ * lock must have been acquired by the time this is called.
+ */
+static inline int
+setup_irq (unsigned int irq, struct irqaction *new)
+{
+ int shared = 0;
+ struct irqaction *old, **p;
+
+ p = &irq_desc[irq].action;
+ old = *p;
+ if (old) {
+ if (!(old->flags & new->flags & SA_SHIRQ)) {
+ return -EBUSY;
+ }
+ /* add new interrupt at end of irq queue */
+ do {
+ p = &old->next;
+ old = *p;
+ } while (old);
+ shared = 1;
+ }
+ *p = new;
+
+ /* when sharing do not unmask */
+ if (!shared) {
+ irq_desc[irq].depth = 0;
+ irq_desc[irq].status |= IRQ_ENABLED;
+ (*irq_desc[irq].handler->startup)(irq);
+ }
+ return 0;
+}
+
+int
+request_irq (unsigned int requested_irq, void (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags, const char * devname, void *dev_id)
+{
+ int retval, need_kfree = 0;
+ struct irqaction *action;
+ unsigned long flags;
+ unsigned int irq;
+
+#ifdef IA64_DEBUG
+ printk("request_irq(0x%x) called\n", requested_irq);
+#endif
+ /*
+ * Sanity-check: shared interrupts should REALLY pass in
+ * a real dev-ID, otherwise we'll have trouble later trying
+ * to figure out which interrupt is which (messes up the
+ * interrupt freeing logic etc).
+ */
+ if ((irqflags & SA_SHIRQ) && !dev_id)
+ printk("Bad boy: %s (at %p) called us without a dev_id!\n",
+ devname, current_text_addr());
+
+ irq = map_legacy_irq(requested_irq);
+ if (irq != requested_irq)
+ irqflags |= SA_LEGACY;
+
+ if (irq >= NR_IRQS)
+ return -EINVAL;
+
+ if (!handler)
+ return -EINVAL;
+
+ /*
+ * The timer_action and ipi_action cannot be allocated
+ * dynamically because its initialization happens really early
+ * on in init/main.c at this point the memory allocator has
+ * not yet been initialized. So we use a statically reserved
+ * buffer for it. In some sense that's no big deal because we
+ * need one no matter what.
+ */
+ if (irq == TIMER_IRQ)
+ action = &timer_action;
+#ifdef CONFIG_SMP
+ else if (irq == IPI_IRQ)
+ action = &ipi_action;
+#endif
+ else {
+ action = kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+ need_kfree = 1;
+ }
+
+ if (!action)
+ return -ENOMEM;
+
+ action->handler = handler;
+ action->flags = irqflags;
+ action->mask = 0;
+ action->name = devname;
+ action->next = NULL;
+ action->dev_id = dev_id;
+
+ if ((irqflags & SA_SAMPLE_RANDOM) != 0)
+ rand_initialize_irq(irq);
+
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ retval = setup_irq(irq, action);
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+
+ if (need_kfree && retval)
+ kfree(action);
+
+ return retval;
+}
+
+void
+free_irq (unsigned int irq, void *dev_id)
+{
+ struct irqaction *action, **p;
+ unsigned long flags;
+
+ /*
+ * some sanity checks first
+ */
+ if (irq >= NR_IRQS) {
+ printk("Trying to free IRQ%d\n",irq);
+ return;
+ }
+
+ irq = map_legacy_irq(irq);
+
+ /*
+ * Find the corresponding irqaction
+ */
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
+ if (action->dev_id != dev_id)
+ continue;
+
+ /* Found it - now remove it from the list of entries */
+ *p = action->next;
+ if (!irq_desc[irq].action) {
+ irq_desc[irq].status &= ~IRQ_ENABLED;
+ (*irq_desc[irq].handler->shutdown)(irq);
+ }
+
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+
+#ifdef CONFIG_SMP
+ /* Wait to make sure it's not being used on another CPU */
+ while (irq_desc[irq].status & IRQ_INPROGRESS)
+ barrier();
+#endif
+
+ if (action != &timer_action
+#ifdef CONFIG_SMP
+ && action != &ipi_action
+#endif
+ )
+ kfree(action);
+ return;
+ }
+ printk("Trying to free free IRQ%d\n", irq);
+}
+
+/*
+ * IRQ autodetection code. Note that the return value of
+ * probe_irq_on() is no longer being used (it's role has been replaced
+ * by the IRQ_AUTODETECT flag).
+ */
+unsigned long
+probe_irq_on (void)
+{
+ struct irq_desc *id;
+ unsigned long delay;
+
+#ifdef IA64_DEBUG
+ printk("probe_irq_on() called\n");
+#endif
+
+ spin_lock_irq(&irq_controller_lock);
+ for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) {
+ if (!id->action) {
+ id->status |= IRQ_AUTODETECT | IRQ_WAITING;
+ (*id->handler->startup)(id - irq_desc);
+ }
+ }
+ spin_unlock_irq(&irq_controller_lock);
+
+ /* wait for spurious interrupts to trigger: */
+
+ for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+ /* about 100ms delay */
+ synchronize_irq();
+
+ /* filter out obviously spurious interrupts: */
+ spin_lock_irq(&irq_controller_lock);
+ for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) {
+ unsigned int status = id->status;
+
+ if (!(status & IRQ_AUTODETECT))
+ continue;
+
+ if (!(status & IRQ_WAITING)) {
+ id->status = status & ~IRQ_AUTODETECT;
+ (*id->handler->shutdown)(id - irq_desc);
+ }
+ }
+ spin_unlock_irq(&irq_controller_lock);
+ return PROBE_IRQ_COOKIE; /* return meaningless return value */
+}
+
+int
+probe_irq_off (unsigned long cookie)
+{
+ int irq_found, nr_irqs;
+ struct irq_desc *id;
+
+#ifdef IA64_DEBUG
+ printk("probe_irq_off(cookie=0x%lx) -> ", cookie);
+#endif
+
+ if (cookie != PROBE_IRQ_COOKIE)
+ printk("bad irq probe from %p\n", __builtin_return_address(0));
+
+ nr_irqs = 0;
+ irq_found = 0;
+ spin_lock_irq(&irq_controller_lock);
+ for (id = irq_desc + IA64_MIN_VECTORED_IRQ; id < irq_desc + NR_IRQS; ++id) {
+ unsigned int status = id->status;
+
+ if (!(status & IRQ_AUTODETECT))
+ continue;
+
+ if (!(status & IRQ_WAITING)) {
+ if (!nr_irqs)
+ irq_found = (id - irq_desc);
+ ++nr_irqs;
+ }
+ id->status = status & ~IRQ_AUTODETECT;
+ (*id->handler->shutdown)(id - irq_desc);
+ }
+ spin_unlock_irq(&irq_controller_lock);
+
+ if (nr_irqs > 1)
+ irq_found = -irq_found;
+
+#ifdef IA64_DEBUG
+ printk("%d\n", irq_found);
+#endif
+ return irq_found;
+}
+
+#ifdef CONFIG_SMP
+
+void __init
+init_IRQ_SMP (void)
+{
+ if (request_irq(IPI_IRQ, handle_IPI, 0, "IPI", NULL))
+ panic("Could not allocate IPI Interrupt Handler!");
+}
+
+#endif
+
+void __init
+init_IRQ (void)
+{
+ int i;
+
+ for (i = 0; i < IA64_MIN_VECTORED_IRQ; ++i)
+ vector_to_legacy_map[irq_to_vector_map[i]] = i;
+
+ for (i = 0; i < NR_IRQS; ++i) {
+ irq_desc[i].handler = &irq_type_default;
+ }
+
+ irq_desc[TIMER_IRQ].handler = &irq_type_ia64_internal;
+#ifdef CONFIG_SMP
+ /*
+ * Configure the IPI vector and handler
+ */
+ irq_desc[IPI_IRQ].handler = &irq_type_ia64_internal;
+ init_IRQ_SMP();
+#endif
+
+ ia64_set_pmv(1 << 16);
+ ia64_set_cmcv(CMC_IRQ); /* XXX fix me */
+
+ platform_irq_init(irq_desc);
+
+ /* clear TPR to enable all interrupt classes: */
+ ia64_set_tpr(0);
+}
+
+/* TBD:
+ * Certain IA64 platforms can have inter-processor interrupt support.
+ * This interface is supposed to default to the IA64 IPI block-based
+ * mechanism if the platform doesn't provide a separate mechanism
+ * for IPIs.
+ * Choices : (1) Extend hw_interrupt_type interfaces
+ * (2) Use machine vector mechanism
+ * For now defining the following interface as a place holder.
+ */
+void
+ipi_send (int cpu, int vector, int delivery_mode)
+{
+}
diff --git a/arch/ia64/kernel/irq_default.c b/arch/ia64/kernel/irq_default.c
new file mode 100644
index 000000000..bf8c62642
--- /dev/null
+++ b/arch/ia64/kernel/irq_default.c
@@ -0,0 +1,30 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+
+static int
+irq_default_handle_irq (unsigned int irq, struct pt_regs *regs)
+{
+ printk("Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
+ return 0; /* don't call do_bottom_half() for spurious interrupts */
+}
+
+static void
+irq_default_noop (unsigned int irq)
+{
+ /* nuthing to do... */
+}
+
+struct hw_interrupt_type irq_type_default = {
+ "default",
+ (void (*)(unsigned long)) irq_default_noop, /* init */
+ irq_default_noop, /* startup */
+ irq_default_noop, /* shutdown */
+ irq_default_handle_irq, /* handle */
+ irq_default_noop, /* enable */
+ irq_default_noop /* disable */
+};
diff --git a/arch/ia64/kernel/irq_internal.c b/arch/ia64/kernel/irq_internal.c
new file mode 100644
index 000000000..1ae904fe8
--- /dev/null
+++ b/arch/ia64/kernel/irq_internal.c
@@ -0,0 +1,71 @@
+/*
+ * Internal Interrupt Vectors
+ *
+ * This takes care of interrupts that are generated by the CPU
+ * internally, such as the ITC and IPI interrupts.
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+/*
+ * This is identical to IOSAPIC handle_irq. It may go away . . .
+ */
+static int
+internal_handle_irq (unsigned int irq, struct pt_regs *regs)
+{
+ struct irqaction *action = 0;
+ struct irq_desc *id = irq_desc + irq;
+ unsigned int status;
+ int retval;
+
+ spin_lock(&irq_controller_lock);
+ {
+ status = id->status;
+ if ((status & IRQ_ENABLED) != 0)
+ action = id->action;
+ id->status = status & ~(IRQ_REPLAY | IRQ_WAITING);
+ }
+ spin_unlock(&irq_controller_lock);
+
+ if (!action) {
+ if (!(id->status & IRQ_AUTODETECT))
+ printk("irq_hpsim_handle_irq: unexpected interrupt %u\n", irq);
+ return 0;
+ }
+
+ retval = invoke_irq_handlers(irq, regs, action);
+
+ spin_lock(&irq_controller_lock);
+ {
+ status = (id->status & ~IRQ_INPROGRESS);
+ id->status = status;
+ }
+ spin_unlock(&irq_controller_lock);
+
+ return retval;
+}
+
+static void
+internal_noop (unsigned int irq)
+{
+ /* nuthing to do... */
+}
+
+struct hw_interrupt_type irq_type_ia64_internal = {
+ "IA64 internal",
+ (void (*)(unsigned long)) internal_noop, /* init */
+ internal_noop, /* startup */
+ internal_noop, /* shutdown */
+ internal_handle_irq, /* handle */
+ internal_noop, /* enable */
+ internal_noop /* disable */
+};
+
diff --git a/arch/ia64/kernel/irq_lock.c b/arch/ia64/kernel/irq_lock.c
new file mode 100644
index 000000000..9c512dd4e
--- /dev/null
+++ b/arch/ia64/kernel/irq_lock.c
@@ -0,0 +1,287 @@
+/*
+ * SMP IRQ Lock support
+ *
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ * These function usually appear in irq.c, but I think it's cleaner this way.
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/bitops.h>
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+
+int global_irq_holder = NO_PROC_ID;
+spinlock_t global_irq_lock;
+atomic_t global_irq_count;
+atomic_t global_bh_count;
+atomic_t global_bh_lock;
+
+#define INIT_STUCK (1<<26)
+
+void
+irq_enter(int cpu, int irq)
+{
+ int stuck = INIT_STUCK;
+
+ hardirq_enter(cpu, irq);
+ barrier();
+ while (global_irq_lock.lock) {
+ if (cpu == global_irq_holder) {
+ break;
+ }
+
+ if (!--stuck) {
+ printk("irq_enter stuck (irq=%d, cpu=%d, global=%d)\n",
+ irq, cpu,global_irq_holder);
+ stuck = INIT_STUCK;
+ }
+ barrier();
+ }
+}
+
+void
+irq_exit(int cpu, int irq)
+{
+ hardirq_exit(cpu, irq);
+ release_irqlock(cpu);
+}
+
+static void
+show(char * str)
+{
+ int i;
+ unsigned long *stack;
+ int cpu = smp_processor_id();
+
+ printk("\n%s, CPU %d:\n", str, cpu);
+ printk("irq: %d [%d %d]\n",
+ atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]);
+ printk("bh: %d [%d %d]\n",
+ atomic_read(&global_bh_count), local_bh_count[0], local_bh_count[1]);
+
+ stack = (unsigned long *) &stack;
+ for (i = 40; i ; i--) {
+ unsigned long x = *++stack;
+ if (x > (unsigned long) &get_options && x < (unsigned long) &vsprintf) {
+ printk("<[%08lx]> ", x);
+ }
+ }
+}
+
+#define MAXCOUNT 100000000
+
+static inline void
+wait_on_bh(void)
+{
+ int count = MAXCOUNT;
+ do {
+ if (!--count) {
+ show("wait_on_bh");
+ count = ~0;
+ }
+ /* nothing .. wait for the other bh's to go away */
+ } while (atomic_read(&global_bh_count) != 0);
+}
+
+static inline void
+wait_on_irq(int cpu)
+{
+ int count = MAXCOUNT;
+
+ for (;;) {
+
+ /*
+ * Wait until all interrupts are gone. Wait
+ * for bottom half handlers unless we're
+ * already executing in one..
+ */
+ if (!atomic_read(&global_irq_count)) {
+ if (local_bh_count[cpu] || !atomic_read(&global_bh_count))
+ break;
+ }
+
+ /* Duh, we have to loop. Release the lock to avoid deadlocks */
+ spin_unlock(&global_irq_lock);
+ mb();
+
+ for (;;) {
+ if (!--count) {
+ show("wait_on_irq");
+ count = ~0;
+ }
+ __sti();
+ udelay(cpu + 1);
+ __cli();
+ if (atomic_read(&global_irq_count))
+ continue;
+ if (global_irq_lock.lock)
+ continue;
+ if (!local_bh_count[cpu] && atomic_read(&global_bh_count))
+ continue;
+ if (spin_trylock(&global_irq_lock))
+ break;
+ }
+ }
+}
+
+/*
+ * This is called when we want to synchronize with
+ * bottom half handlers. We need to wait until
+ * no other CPU is executing any bottom half handler.
+ *
+ * Don't wait if we're already running in an interrupt
+ * context or are inside a bh handler.
+ */
+void
+synchronize_bh(void)
+{
+ if (atomic_read(&global_bh_count)) {
+ int cpu = smp_processor_id();
+ if (!local_irq_count[cpu] && !local_bh_count[cpu]) {
+ wait_on_bh();
+ }
+ }
+}
+
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void
+synchronize_irq(void)
+{
+ int cpu = smp_processor_id();
+ int local_count;
+ int global_count;
+
+ mb();
+ do {
+ local_count = local_irq_count[cpu];
+ global_count = atomic_read(&global_irq_count);
+ } while (global_count != local_count);
+}
+
+static inline void
+get_irqlock(int cpu)
+{
+ if (!spin_trylock(&global_irq_lock)) {
+ /* do we already hold the lock? */
+ if ((unsigned char) cpu == global_irq_holder)
+ return;
+ /* Uhhuh.. Somebody else got it. Wait.. */
+ spin_lock(&global_irq_lock);
+ }
+ /*
+ * We also to make sure that nobody else is running
+ * in an interrupt context.
+ */
+ wait_on_irq(cpu);
+
+ /*
+ * Ok, finally..
+ */
+ global_irq_holder = cpu;
+}
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void
+__global_cli(void)
+{
+ unsigned long flags;
+
+ __save_flags(flags);
+ if (flags & IA64_PSR_I) {
+ int cpu = smp_processor_id();
+ __cli();
+ if (!local_irq_count[cpu])
+ get_irqlock(cpu);
+ }
+}
+
+void
+__global_sti(void)
+{
+ int cpu = smp_processor_id();
+
+ if (!local_irq_count[cpu])
+ release_irqlock(cpu);
+ __sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long
+__global_save_flags(void)
+{
+ int retval;
+ int local_enabled;
+ unsigned long flags;
+
+ __save_flags(flags);
+ local_enabled = flags & IA64_PSR_I;
+ /* default to local */
+ retval = 2 + local_enabled;
+
+ /* check for global flags if we're not in an interrupt */
+ if (!local_irq_count[smp_processor_id()]) {
+ if (local_enabled)
+ retval = 1;
+ if (global_irq_holder == (unsigned char) smp_processor_id())
+ retval = 0;
+ }
+ return retval;
+}
+
+void
+__global_restore_flags(unsigned long flags)
+{
+ switch (flags) {
+ case 0:
+ __global_cli();
+ break;
+ case 1:
+ __global_sti();
+ break;
+ case 2:
+ __cli();
+ break;
+ case 3:
+ __sti();
+ break;
+ default:
+ printk("global_restore_flags: %08lx (%08lx)\n",
+ flags, (&flags)[-1]);
+ }
+}
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
new file mode 100644
index 000000000..4c3ac242a
--- /dev/null
+++ b/arch/ia64/kernel/ivt.S
@@ -0,0 +1,1342 @@
+/*
+ * arch/ia64/kernel/ivt.S
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1998-2000 David Mosberger <davidm@hpl.hp.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/break.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+/*
+ * A couple of convenience macros that make writing and reading
+ * SAVE_MIN and SAVE_REST easier.
+ */
+#define rARPR r31
+#define rCRIFS r30
+#define rCRIPSR r29
+#define rCRIIP r28
+#define rARRSC r27
+#define rARPFS r26
+#define rARUNAT r25
+#define rARRNAT r24
+#define rARBSPSTORE r23
+#define rKRBS r22
+#define rB6 r21
+#define rR1 r20
+
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ * psr.ic: off
+ * psr.dt: off
+ * r31: contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ * psr.ic: off
+ * psr.dt: off
+ * r2 = points to &pt_regs.r16
+ * r12 = kernel sp (kernel virtual address)
+ * r13 = points to current task_struct (kernel virtual address)
+ * p15 = TRUE if psr.i is set in cr.ipsr
+ * predicate registers (other than p6, p7, and p15), b6, r3, r8, r9, r10, r11, r14, r15:
+ * preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro. This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define DO_SAVE_MIN(COVER,EXTRA) \
+ mov rARRSC=ar.rsc; \
+ mov rARPFS=ar.pfs; \
+ mov rR1=r1; \
+ mov rARUNAT=ar.unat; \
+ mov rCRIPSR=cr.ipsr; \
+ mov rB6=b6; /* rB6 = branch reg 6 */ \
+ mov rCRIIP=cr.iip; \
+ mov r1=ar.k6; /* r1 = current */ \
+ ;; \
+ invala; \
+ extr.u r16=rCRIPSR,32,2; /* extract psr.cpl */ \
+ ;; \
+ cmp.eq pKern,p7=r0,r16; /* are we in kernel mode already? (psr.cpl==0) */ \
+ /* switch from user to kernel RBS: */ \
+ COVER; \
+ ;; \
+(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
+(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
+ ;; \
+(p7) mov rARRNAT=ar.rnat; \
+(pKern) dep r1=0,sp,61,3; /* compute physical addr of sp */ \
+(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \
+(p7) dep rKRBS=-1,rKRBS,61,3; /* compute kernel virtual addr of RBS */ \
+ ;; \
+(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
+(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \
+ ;; \
+(p7) mov r18=ar.bsp; \
+(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
+ \
+ mov r16=r1; /* initialize first base pointer */ \
+ adds r17=8,r1; /* initialize second base pointer */ \
+ ;; \
+ st8 [r16]=rCRIPSR,16; /* save cr.ipsr */ \
+ st8 [r17]=rCRIIP,16; /* save cr.iip */ \
+(pKern) mov r18=r0; /* make sure r18 isn't NaT */ \
+ ;; \
+ st8 [r16]=rCRIFS,16; /* save cr.ifs */ \
+ st8 [r17]=rARUNAT,16; /* save ar.unat */ \
+(p7) sub r18=r18,rKRBS; /* r18=RSE.ndirty*8 */ \
+ ;; \
+ st8 [r16]=rARPFS,16; /* save ar.pfs */ \
+ st8 [r17]=rARRSC,16; /* save ar.rsc */ \
+ tbit.nz p15,p0=rCRIPSR,IA64_PSR_I_BIT \
+ ;; /* avoid RAW on r16 & r17 */ \
+(pKern) adds r16=16,r16; /* skip over ar_rnat field */ \
+(pKern) adds r17=16,r17; /* skip over ar_bspstore field */ \
+(p7) st8 [r16]=rARRNAT,16; /* save ar.rnat */ \
+(p7) st8 [r17]=rARBSPSTORE,16; /* save ar.bspstore */ \
+ ;; \
+ st8 [r16]=rARPR,16; /* save predicates */ \
+ st8 [r17]=rB6,16; /* save b6 */ \
+ shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
+ ;; \
+ st8 [r16]=r18,16; /* save ar.rsc value for "loadrs" */ \
+ st8.spill [r17]=rR1,16; /* save original r1 */ \
+ cmp.ne pEOI,p0=r0,r0 /* clear pEOI by default */ \
+ ;; \
+ st8.spill [r16]=r2,16; \
+ st8.spill [r17]=r3,16; \
+ adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
+ ;; \
+ st8.spill [r16]=r12,16; \
+ st8.spill [r17]=r13,16; \
+ cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
+ ;; \
+ st8.spill [r16]=r14,16; \
+ st8.spill [r17]=r15,16; \
+ dep r14=-1,r0,61,3; \
+ ;; \
+ st8.spill [r16]=r8,16; \
+ st8.spill [r17]=r9,16; \
+ adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
+ ;; \
+ st8.spill [r16]=r10,16; \
+ st8.spill [r17]=r11,16; \
+ mov r13=ar.k6; /* establish `current' */ \
+ ;; \
+ or r2=r2,r14; /* make first base a kernel virtual address */ \
+ EXTRA; \
+ movl r1=__gp; /* establish kernel global pointer */ \
+ ;; \
+ or r12=r12,r14; /* make sp a kernel virtual address */ \
+ or r13=r13,r14; /* make `current' a kernel virtual address */ \
+ bsw.1;; /* switch back to bank 1 (must be last in insn group) */
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+# define STOPS nop.i 0x0;; nop.i 0x0;; nop.i 0x0;;
+#else
+# define STOPS
+#endif
+
+#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs,) STOPS
+#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs, mov r15=r19) STOPS
+#define SAVE_MIN DO_SAVE_MIN(mov rCRIFS=r0,) STOPS
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on). This
+ * macro guarantees to preserve all predicate registers, r8, r9, r10,
+ * r11, r14, and r15.
+ *
+ * Assumed state upon entry:
+ * psr.ic: on
+ * psr.dt: on
+ * r2: points to &pt_regs.r16
+ * r3: points to &pt_regs.r17
+ */
+#define SAVE_REST \
+ st8.spill [r2]=r16,16; \
+ st8.spill [r3]=r17,16; \
+ ;; \
+ st8.spill [r2]=r18,16; \
+ st8.spill [r3]=r19,16; \
+ ;; \
+ mov r16=ar.ccv; /* M-unit */ \
+ movl r18=FPSR_DEFAULT /* L-unit */ \
+ ;; \
+ mov r17=ar.fpsr; /* M-unit */ \
+ mov ar.fpsr=r18; /* M-unit */ \
+ ;; \
+ st8.spill [r2]=r20,16; \
+ st8.spill [r3]=r21,16; \
+ mov r18=b0; \
+ ;; \
+ st8.spill [r2]=r22,16; \
+ st8.spill [r3]=r23,16; \
+ mov r19=b7; \
+ ;; \
+ st8.spill [r2]=r24,16; \
+ st8.spill [r3]=r25,16; \
+ ;; \
+ st8.spill [r2]=r26,16; \
+ st8.spill [r3]=r27,16; \
+ ;; \
+ st8.spill [r2]=r28,16; \
+ st8.spill [r3]=r29,16; \
+ ;; \
+ st8.spill [r2]=r30,16; \
+ st8.spill [r3]=r31,16; \
+ ;; \
+ st8 [r2]=r16,16; /* ar.ccv */ \
+ st8 [r3]=r17,16; /* ar.fpsr */ \
+ ;; \
+ st8 [r2]=r18,16; /* b0 */ \
+ st8 [r3]=r19,16+8; /* b7 */ \
+ ;; \
+ stf.spill [r2]=f6,32; \
+ stf.spill [r3]=f7,32; \
+ ;; \
+ stf.spill [r2]=f8,32; \
+ stf.spill [r3]=f9,32
+
+/*
+ * This file defines the interrupt vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * External interrupts only use 1 entry. All others are internal interrupts
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interrupts like TLB misses.
+ *
+ * For each entry, the comment is as follows:
+ *
+ * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ * entry offset ----/ / / / /
+ * entry number ---------/ / / /
+ * size of the entry -------------/ / /
+ * vector name -------------------------------------/ /
+ * related interrupts (what is the real interrupt?) ----------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.4 (June 1998)
+ */
+
+#define FAULT(n) \
+ rsm psr.dt; /* avoid nested faults due to TLB misses... */ \
+ ;; \
+ srlz.d; /* ensure everyone knows psr.dt is off... */ \
+ mov r31=pr; \
+ mov r19=n;; /* prepare to save predicates */ \
+ br.cond.sptk.many dispatch_to_fault_handler
+
+/*
+ * As we don't (hopefully) use the space available, we need to fill it with
+ * nops. the parameter may be used for debugging and is representing the entry
+ * number
+ */
+#define BREAK_BUNDLE(a) break.m (a); \
+ break.i (a); \
+ break.i (a)
+/*
+ * 4 breaks bundles all together
+ */
+#define BREAK_BUNDLE4(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a)
+
+/*
+ * 8 bundles all together (too lazy to use only 4 at a time !)
+ */
+#define BREAK_BUNDLE8(a); BREAK_BUNDLE4(a); BREAK_BUNDLE4(a)
+
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .section __ivt_section,"ax"
+
+ .align 32768 // align on 32KB boundary
+ .global ia64_ivt
+ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ /*
+ * The VHPT vector is invoked when the TLB entry for the virtual page table
+ * is missing. This happens only as a result of a previous
+ * (the "original") TLB miss, which may either be caused by an instruction
+ * fetch or a data access (or non-access).
+ *
+ * What we do here is normal TLB miss handing for the _original_ miss, followed
+ * by inserting the TLB entry for the virtual page table page that the VHPT
+ * walker was attempting to access. The latter gets inserted as long
+ * as both L1 and L2 have valid mappings for the faulting address.
+ * The TLB entry for the original miss gets inserted only if
+ * the L3 entry indicates that the page is present.
+ *
+ * do_page_fault gets invoked in the following cases:
+ * - the faulting virtual address uses unimplemented address bits
+ * - the faulting virtual address has no L1, L2, or L3 mapping
+ */
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ rsm psr.dt // use physical addressing for data
+ mov r31=pr // save the predicate registers
+ mov r19=ar.k7 // get page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
+(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones?
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+(p7) ld8 r18=[r17] // read the L3 PTE
+ mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss
+ ;;
+(p7) tbit.z p6,p7=r18,0 // page present bit cleared?
+ mov r21=cr.iha // get the VHPT address that caused the TLB miss
+ ;; // avoid RAW on p7
+(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss?
+ dep r17=0,r17,0,PAGE_SHIFT // clear low bits to get page address
+ ;;
+(p10) itc.i r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!)
+(p11) itc.d r18;; // insert the data TLB entry (EAS2.6: must be last in insn group!)
+(p6) br.spnt.few page_fault // handle bad address/page not present (page fault)
+ mov cr.ifa=r21
+
+ // Now compute and insert the TLB entry for the virtual page table.
+ // We never execute in a page table page so there is no need to set
+ // the exception deferral bit.
+ adds r16=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r17
+ ;;
+(p7) itc.d r16;; // EAS2.6: must be last in insn group!
+ mov pr=r31,-1 // restore predicate registers
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ /*
+ * The ITLB basically does the same as the VHPT handler except
+ * that we always insert exactly one instruction TLB entry.
+ */
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ rsm psr.dt // use physical addressing for data
+ mov r31=pr // save the predicate registers
+ mov r19=ar.k7 // get page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
+(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones?
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+(p7) ld8 r18=[r17] // read the L3 PTE
+ ;;
+(p7) tbit.z p6,p7=r18,0 // page present bit cleared?
+ ;;
+(p7) itc.i r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!)
+(p6) br.spnt.few page_fault // handle bad address/page not present (page fault)
+ ;;
+ mov pr=r31,-1 // restore predicate registers
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ /*
+ * The DTLB basically does the same as the VHPT handler except
+ * that we always insert exactly one data TLB entry.
+ */
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ rsm psr.dt // use physical addressing for data
+ mov r31=pr // save the predicate registers
+ mov r19=ar.k7 // get page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
+(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones?
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+(p7) ld8 r18=[r17] // read the L3 PTE
+ ;;
+(p7) tbit.z p6,p7=r18,0 // page present bit cleared?
+ ;;
+(p7) itc.d r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!)
+(p6) br.spnt.few page_fault // handle bad address/page not present (page fault)
+ ;;
+ mov pr=r31,-1 // restore predicate registers
+ rfi;; // must be last insn in an insn group
+
+ //-----------------------------------------------------------------------------------
+ // call do_page_fault (predicates are in r31, psr.dt is off, r16 is faulting address)
+page_fault:
+ SAVE_MIN_WITH_COVER
+ //
+ // Copy control registers to temporary registers, then turn on psr bits,
+ // then copy the temporary regs to the output regs. We have to do this
+ // because the "alloc" can cause a mandatory store which could lead to
+ // an "Alt DTLB" fault which we can handle only if psr.ic is on.
+ //
+ mov r8=cr.ifa
+ mov r9=cr.isr
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic | psr.dt
+ ;;
+ srlz.d // guarantee that interrupt collection is enabled
+(p15) ssm psr.i // restore psr.i
+ ;;
+ srlz.i // must precede "alloc"! (srlz.i implies srlz.d)
+ movl r14=ia64_leave_kernel
+ ;;
+ alloc r15=ar.pfs,0,0,3,0 // must be first in insn group
+ mov out0=r8
+ mov out1=r9
+ ;;
+ SAVE_REST
+ mov rp=r14
+ ;;
+ adds out2=16,r12 // out2 = pointer to pt_regs
+ br.call.sptk.few b6=ia64_do_page_fault // ignore return address
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RX
+ ;;
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ dep r16=0,r16,52,12 // clear top 12 bits of address
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+ dep r16=r17,r16,0,12 // insert PTE control bits into r16
+ ;;
+ or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6
+ ;;
+ itc.i r16;; // insert the TLB entry(EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW
+ ;;
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ dep r16=0,r16,52,12 // clear top 12 bits of address
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+ dep r16=r17,r16,0,12 // insert PTE control bits into r16
+ ;;
+ or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6
+ ;;
+ itc.d r16;; // insert the TLB entry (EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ //
+ // In the absence of kernel bugs, we get here when the Dirty-bit, Instruction
+ // Access-bit, or Data Access-bit faults cause a nested fault because the
+ // dTLB entry for the virtual page table isn't present. In such a case,
+ // we lookup the pte for the faulting address by walking the page table
+ // and return to the contination point passed in register r30.
+ // In accessing the page tables, we don't need to check for NULL entries
+ // because if the page tables didn't map the faulting address, it would not
+ // be possible to receive one of the above faults.
+ //
+ // Input: r16: faulting address
+ // r29: saved b0
+ // r30: continuation address
+ //
+ // Output: r17: physical address of L3 PTE of faulting address
+ // r29: saved b0
+ // r30: continuation address
+ //
+ // Clobbered: b0, r18, r19, r21, r31, psr.dt (cleared)
+ //
+ rsm psr.dt // switch to using physical data addressing
+ mov r19=ar.k7 // get the page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ ;;
+ mov r31=pr // save the predicate registers
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is faulting address in region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ srlz.d
+(p6) movl r17=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
+(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+ ;;
+(p6) dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+ ld8 r17=[r17] // fetch the L1 entry
+ mov b0=r30
+ ;;
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+ ld8 r17=[r17] // fetch the L2 entry
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+ mov pr=r31,-1 // restore predicates
+ br.cond.sptk.few b0 // return to continuation point
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ FAULT(6)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ FAULT(7)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ //
+ // What we do here is to simply turn on the dirty bit in the PTE. We need
+ // to update both the page-table and the TLB entry. To efficiently access
+ // the PTE, we address it through the virtual page table. Most likely, the
+ // TLB entry for the relevant virtual page table page is still present in
+ // the TLB so we can normally do this without additional TLB misses.
+ // In case the necessary virtual page table TLB entry isn't present, we take
+ // a nested TLB miss hit where we look up the physical address of the L3 PTE
+ // and then continue at label 1 below.
+ //
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_D,r18 // set the dirty bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18;; // install updated PTE (EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ // Like Entry 8, except for instruction access
+ mov r16=cr.ifa // get the address that caused the fault
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ mov r31=pr // save predicates
+ mov r30=cr.ipsr
+ ;;
+ extr.u r17=r30,IA64_PSR_IS_BIT,1 // get instruction arch. indicator
+ ;;
+ cmp.eq p6,p0 = r17,r0 // check if IA64 instruction set
+ ;;
+(p6) mov r16=cr.iip // get real faulting address
+ ;;
+(p6) mov cr.ifa=r16 // reset IFA
+ mov pr=r31,-1
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault)
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid raw on r18
+ or r18=_PAGE_A,r18 // set the accessed bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.i r18;; // install updated PTE (EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ // Like Entry 8, except for data access
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault)
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_A,r18 // set the accessed bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18;; // install updated PTE (EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ mov r16=cr.iim
+ mov r17=__IA64_BREAK_SYSCALL
+ mov r31=pr // prepare to save predicates
+ rsm psr.dt // avoid nested faults due to TLB misses...
+ ;;
+ srlz.d // ensure everyone knows psr.dt is off...
+ cmp.eq p0,p7=r16,r17 // is this a system call? (p7 <- false, if so)
+
+#if 1
+ // Allow syscalls via the old system call number for the time being. This is
+ // so we can transition to the new syscall number in a relatively smooth
+ // fashion.
+ mov r17=0x80000
+ ;;
+(p7) cmp.eq.or.andcm p0,p7=r16,r17 // is this the old syscall number?
+#endif
+
+(p7) br.cond.spnt.many non_syscall
+
+ SAVE_MIN // uses r31; defines r2:
+
+ // turn interrupt collection and data translation back on:
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+ ;;
+(p15) ssm psr.i // restore psr.i
+ ;;
+ srlz.i // ensure everybody knows psr.ic and psr.dt are back on
+ adds r8=(IA64_PT_REGS_R8_OFFSET-IA64_PT_REGS_R16_OFFSET),r2
+ ;;
+ stf8 [r8]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ SAVE_REST
+ ;; // avoid WAW on r2 & r3
+
+ mov r3=255
+ adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
+ adds r2=IA64_TASK_FLAGS_OFFSET,r13 // r2 = &current->flags
+
+ ;;
+ cmp.geu.unc p6,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ?
+ movl r16=sys_call_table
+ ;;
+(p6) shladd r16=r15,3,r16
+ movl r15=ia64_ret_from_syscall
+(p7) adds r16=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall
+ ;;
+ ld8 r16=[r16] // load address of syscall entry point
+ mov rp=r15 // set the real return addr
+ ;;
+ ld8 r2=[r2] // r2 = current->flags
+ mov b6=r16
+
+ // arrange things so we skip over break instruction when returning:
+
+ adds r16=16,sp // get pointer to cr_ipsr
+ adds r17=24,sp // get pointer to cr_iip
+ ;;
+ ld8 r18=[r16] // fetch cr_ipsr
+ tbit.z p8,p0=r2,5 // (current->flags & PF_TRACESYS) == 0?
+ ;;
+ ld8 r19=[r17] // fetch cr_iip
+ extr.u r20=r18,41,2 // extract ei field
+ ;;
+ cmp.eq p6,p7=2,r20 // isr.ei==2?
+ adds r19=16,r19 // compute address of next bundle
+ ;;
+(p6) mov r20=0 // clear ei to 0
+(p7) adds r20=1,r20 // increment ei to next slot
+ ;;
+(p6) st8 [r17]=r19 // store new cr.iip if cr.isr.ei wrapped around
+ dep r18=r20,r18,41,2 // insert new ei into cr.isr
+ ;;
+ st8 [r16]=r18 // store new value for cr.isr
+
+(p8) br.call.sptk.few b6=b6 // ignore this return addr
+ br.call.sptk.few rp=ia64_trace_syscall // rp will be overwritten (ignored)
+ // NOT REACHED
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ rsm psr.dt // avoid nested faults due to TLB misses...
+ ;;
+ srlz.d // ensure everyone knows psr.dt is off...
+ mov r31=pr // prepare to save predicates
+ ;;
+
+ SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
+ ssm psr.ic | psr.dt // turn interrupt collection and data translation back on
+ ;;
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ cmp.eq pEOI,p0=r0,r0 // set pEOI flag so that ia64_leave_kernel writes cr.eoi
+ srlz.i // ensure everybody knows psr.ic and psr.dt are back on
+ ;;
+ SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ mov out0=r0 // defer reading of cr.ivr to handle_irq...
+#else
+ mov out0=cr.ivr // pass cr.ivr as first arg
+#endif
+ add out1=16,sp // pass pointer to pt_regs as second arg
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.call.sptk.few b6=ia64_handle_irq
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ FAULT(13)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+ FAULT(14)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+ FAULT(15)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ FAULT(16)
+
+#ifdef CONFIG_IA32_SUPPORT
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+ // IA32 interrupt entry point
+
+dispatch_to_ia32_handler:
+ SAVE_MIN
+ ;;
+ mov r14=cr.isr
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ ;;
+(p15) ssm psr.i
+ ;;
+ srlz.d
+ adds r3=8,r2 // Base pointer for SAVE_REST
+ ;;
+ SAVE_REST
+ ;;
+ mov r15=0x80
+ shr r14=r14,16 // Get interrupt number
+ ;;
+ cmp.ne p6,p0=r14,r15
+(p6) br.call.dpnt.few b6=non_ia32_syscall
+
+ adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions
+
+ ;;
+ alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
+ ;;
+ ld4 r8=[r14],8 // r8 == EAX (syscall number)
+ mov r15=0xff
+ ;;
+ cmp.ltu.unc p6,p7=r8,r15
+ ld4 out1=[r14],8 // r9 == ecx
+ ;;
+ ld4 out2=[r14],8 // r10 == edx
+ ;;
+ ld4 out0=[r14] // r11 == ebx
+ adds r14=(IA64_PT_REGS_R8_OFFSET-(8*3)) + 16,sp
+ ;;
+ ld4 out5=[r14],8 // r13 == ebp
+ ;;
+ ld4 out3=[r14],8 // r14 == esi
+ adds r2=IA64_TASK_FLAGS_OFFSET,r13 // r2 = &current->flags
+ ;;
+ ld4 out4=[r14] // R15 == edi
+ movl r16=ia32_syscall_table
+ ;;
+(p6) shladd r16=r8,3,r16 // Force ni_syscall if not valid syscall number
+ ld8 r2=[r2] // r2 = current->flags
+ ;;
+ ld8 r16=[r16]
+ tbit.z p8,p0=r2,5 // (current->flags & PF_TRACESYS) == 0?
+ ;;
+ movl r15=ia32_ret_from_syscall
+ mov b6=r16
+ ;;
+ mov rp=r15
+(p8) br.call.sptk.few b6=b6
+ br.call.sptk.few rp=ia32_trace_syscall // rp will be overwritten (ignored)
+
+non_ia32_syscall:
+ alloc r15=ar.pfs,0,0,2,0
+ mov out0=r14 // interrupt #
+ add out1=16,sp // pointer to pt_regs
+ ;; // avoid WAW on CFM
+ br.call.sptk.few rp=ia32_bad_interrupt
+ ;;
+ movl r15=ia64_leave_kernel
+ ;;
+ mov rp=r15
+ br.ret.sptk.many rp
+
+#endif /* CONFIG_IA32_SUPPORT */
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+ FAULT(17)
+
+non_syscall:
+
+#ifdef CONFIG_KDB
+ mov r17=__IA64_BREAK_KDB
+ ;;
+ cmp.eq p8,p0=r16,r17 // is this a kernel breakpoint?
+#endif
+
+ SAVE_MIN_WITH_COVER
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+ mov r8=cr.iim // get break immediate (must be done while psr.ic is off)
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+
+ // turn interrupt collection and data translation back on:
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ ;;
+(p15) ssm psr.i // restore psr.i
+ ;;
+ srlz.i // ensure everybody knows psr.ic and psr.dt are back on
+ movl r15=ia64_leave_kernel
+ ;;
+ alloc r14=ar.pfs,0,0,2,0
+ mov out0=r8 // break number
+ add out1=16,sp // pointer to pt_regs
+ ;;
+ SAVE_REST
+ mov rp=r15
+ ;;
+#ifdef CONFIG_KDB
+(p8) br.call.sptk.few b6=ia64_invoke_kdb
+#endif
+ br.call.sptk.few b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+ FAULT(18)
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+dispatch_unaligned_handler:
+ SAVE_MIN_WITH_COVER
+ ;;
+ //
+ // we can't have the alloc while psr.ic is cleared because
+ // we might get a mandatory RSE (when you reach the end of the
+ // rotating partition when doing the alloc) spill which could cause
+ // a page fault on the kernel virtual address and the handler
+ // wouldn't get the state to recover.
+ //
+ mov r15=cr.ifa
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ ;;
+(p15) ssm psr.i // restore psr.i
+ ;;
+ srlz.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+ ;; // avoid WAW on r14
+ movl r14=ia64_leave_kernel
+ mov out0=r15 // out0 = faulting address
+ adds out1=16,sp // out1 = pointer to pt_regs
+ ;;
+ mov rp=r14
+ br.sptk.few ia64_prepare_handle_unaligned
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+ FAULT(19)
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+dispatch_to_fault_handler:
+ //
+ // Input:
+ // psr.ic: off
+ // psr.dt: off
+ // r19: fault vector number (e.g., 24 for General Exception)
+ // r31: contains saved predicates (pr)
+ //
+ SAVE_MIN_WITH_COVER_R19
+ //
+ // Copy control registers to temporary registers, then turn on psr bits,
+ // then copy the temporary regs to the output regs. We have to do this
+ // because the "alloc" can cause a mandatory store which could lead to
+ // an "Alt DTLB" fault which we can handle only if psr.ic is on.
+ //
+ mov r8=cr.isr
+ mov r9=cr.ifa
+ mov r10=cr.iim
+ mov r11=cr.itir
+ ;;
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ srlz.i // must precede "alloc"!
+ ;;
+ alloc r14=ar.pfs,0,0,5,0 // must be first in insn group
+ mov out0=r15
+ mov out1=r8
+ mov out2=r9
+ mov out3=r10
+ mov out4=r11
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+#ifdef CONFIG_KDB
+ br.call.sptk.few b6=ia64_invoke_kdb_fault_handler
+#else
+ br.call.sptk.few b6=ia64_fault
+#endif
+//
+// --- End of long entries, Beginning of short entries
+//
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+ mov r16=cr.ifa
+ rsm psr.dt
+#if 0
+ // If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h
+ mov r17=_PAGE_SIZE_4K<<2
+ ;;
+ ptc.l r16,r17
+#endif
+ ;;
+ mov r31=pr
+ srlz.d
+ br.cond.sptk.many page_fault
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.cond.sptk.many page_fault
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.cond.sptk.many page_fault
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.cond.sptk.many page_fault
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ FAULT(24)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ rsm psr.dt | psr.dfh // ensure we can access fph
+ ;;
+ srlz.d
+ mov r31=pr
+ mov r19=25
+ br.cond.sptk.many dispatch_to_fault_handler
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ FAULT(26)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ //
+ // A [f]chk.[as] instruction needs to take the branch to
+ // the recovery code but this part of the architecture is
+ // not implemented in hardware on some CPUs, such as Itanium.
+ // Thus, in general we need to emulate the behavior.
+ // IIM contains the relative target (not yet sign extended).
+ // So after sign extending it we simply add it to IIP.
+ // We also need to reset the EI field of the IPSR to zero,
+ // i.e., the slot to restart into.
+ //
+ // cr.imm contains zero_ext(imm21)
+ //
+ mov r18=cr.iim
+ ;;
+ mov r17=cr.iip
+ shl r18=r18,43 // put sign bit in position (43=64-21)
+ ;;
+
+ mov r16=cr.ipsr
+ shr r18=r18,39 // sign extend (39=43-4)
+ ;;
+
+ add r17=r17,r18 // now add the offset
+ ;;
+ mov cr.iip=r17
+ dep r16=0,r16,41,2 // clear EI
+ ;;
+
+ mov cr.ipsr=r16
+ ;;
+
+ rfi;; // and go back (must be last insn in group)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+ FAULT(28)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ FAULT(29)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ rsm psr.dt // avoid nested faults due to TLB misses...
+ mov r16=cr.ipsr
+ mov r31=pr // prepare to save predicates
+ ;;
+ srlz.d // ensure everyone knows psr.dt is off
+ mov r19=30 // error vector for fault_handler (when kernel)
+ extr.u r16=r16,32,2 // extract psr.cpl
+ ;;
+ cmp.eq p6,p7=r0,r16 // if kernel cpl then fault else emulate
+(p7) br.cond.sptk.many dispatch_unaligned_handler
+(p6) br.cond.sptk.many dispatch_to_fault_handler
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ FAULT(31)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ FAULT(32)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ FAULT(33)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Tranfer Trap (66)
+ FAULT(34)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ FAULT(35)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ FAULT(36)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Reserved
+ FAULT(37)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+ FAULT(38)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+ FAULT(39)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+ FAULT(40)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+ FAULT(41)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+ FAULT(42)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+ FAULT(43)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+ FAULT(44)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ FAULT(45)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
+ FAULT(46)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
+#ifdef CONFIG_IA32_SUPPORT
+ rsm psr.dt
+ ;;
+ srlz.d
+ mov r31=pr
+ br.cond.sptk.many dispatch_to_ia32_handler
+#else
+ FAULT(47)
+#endif
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+ FAULT(48)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+ FAULT(49)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+ FAULT(50)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+ FAULT(51)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+ FAULT(52)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+ FAULT(53)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+ FAULT(54)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+ FAULT(55)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+ FAULT(56)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+ FAULT(57)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+ FAULT(58)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+ FAULT(59)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+ FAULT(60)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+ FAULT(61)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+ FAULT(62)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+ FAULT(63)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+ FAULT(64)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+ FAULT(65)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+ FAULT(66)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+ FAULT(67)
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
new file mode 100644
index 000000000..153fb5684
--- /dev/null
+++ b/arch/ia64/kernel/machvec.c
@@ -0,0 +1,48 @@
+#include <linux/kernel.h>
+
+#include <asm/page.h>
+#include <asm/machvec.h>
+
+struct ia64_machine_vector ia64_mv;
+
+void
+machvec_noop (void)
+{
+}
+
+/*
+ * Most platforms use this routine for mapping page frame addresses
+ * into a memory map index.
+ */
+unsigned long
+map_nr_dense (unsigned long addr)
+{
+ return MAP_NR_DENSE(addr);
+}
+
+static struct ia64_machine_vector *
+lookup_machvec (const char *name)
+{
+ extern struct ia64_machine_vector machvec_start[];
+ extern struct ia64_machine_vector machvec_end[];
+ struct ia64_machine_vector *mv;
+
+ for (mv = machvec_start; mv < machvec_end; ++mv)
+ if (strcmp (mv->name, name) == 0)
+ return mv;
+
+ return 0;
+}
+
+void
+machvec_init (const char *name)
+{
+ struct ia64_machine_vector *mv;
+
+ mv = lookup_machvec(name);
+ if (!mv) {
+ panic("generic kernel failed to find machine vector for platform %s!", name);
+ }
+ ia64_mv = *mv;
+ printk("booting generic kernel on platform %s\n", name);
+}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
new file mode 100644
index 000000000..320c56ebc
--- /dev/null
+++ b/arch/ia64/kernel/mca.c
@@ -0,0 +1,842 @@
+/*
+ * File: mca.c
+ * Purpose: Generic MCA handling layer
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander(vijay@engr.sgi.com)
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+#include <asm/spinlock.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+
+
+ia64_mc_info_t ia64_mc_info;
+ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state;
+ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state;
+u64 ia64_mca_proc_state_dump[256];
+u64 ia64_mca_stack[1024];
+u64 ia64_mca_stackframe[32];
+u64 ia64_mca_bspstore[1024];
+
+static void ia64_mca_cmc_vector_setup(int enable,
+ int_vector_t cmc_vector);
+static void ia64_mca_wakeup_ipi_wait(void);
+static void ia64_mca_wakeup(int cpu);
+static void ia64_mca_wakeup_all(void);
+static void ia64_log_init(int,int);
+static void ia64_log_get(int,int, prfunc_t);
+static void ia64_log_clear(int,int,int, prfunc_t);
+
+/*
+ * ia64_mca_cmc_vector_setup
+ * Setup the correctable machine check vector register in the processor
+ * Inputs
+ * Enable (1 - enable cmc interrupt , 0 - disable)
+ * CMC handler entry point (if enabled)
+ *
+ * Outputs
+ * None
+ */
+static void
+ia64_mca_cmc_vector_setup(int enable,
+ int_vector_t cmc_vector)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv.cmcv_regval = 0;
+ cmcv.cmcv_mask = enable;
+ cmcv.cmcv_vector = cmc_vector;
+ ia64_set_cmcv(cmcv.cmcv_regval);
+}
+
+
+#if defined(MCA_TEST)
+
+sal_log_processor_info_t slpi_buf;
+
+void
+mca_test(void)
+{
+ slpi_buf.slpi_valid.slpi_psi = 1;
+ slpi_buf.slpi_valid.slpi_cache_check = 1;
+ slpi_buf.slpi_valid.slpi_tlb_check = 1;
+ slpi_buf.slpi_valid.slpi_bus_check = 1;
+ slpi_buf.slpi_valid.slpi_minstate = 1;
+ slpi_buf.slpi_valid.slpi_bank1_gr = 1;
+ slpi_buf.slpi_valid.slpi_br = 1;
+ slpi_buf.slpi_valid.slpi_cr = 1;
+ slpi_buf.slpi_valid.slpi_ar = 1;
+ slpi_buf.slpi_valid.slpi_rr = 1;
+ slpi_buf.slpi_valid.slpi_fr = 1;
+
+ ia64_os_mca_dispatch();
+}
+
+#endif /* #if defined(MCA_TEST) */
+
+/*
+ * mca_init
+ * Do all the mca specific initialization on a per-processor basis.
+ *
+ * 1. Register spinloop and wakeup request interrupt vectors
+ *
+ * 2. Register OS_MCA handler entry point
+ *
+ * 3. Register OS_INIT handler entry point
+ *
+ * 4. Initialize CMCV register to enable/disable CMC interrupt on the
+ * processor and hook a handler in the platform-specific mca_init.
+ *
+ * 5. Initialize MCA/CMC/INIT related log buffers maintained by the OS.
+ *
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void __init
+mca_init(void)
+{
+ int i;
+
+ MCA_DEBUG("mca_init : begin\n");
+ /* Clear the Rendez checkin flag for all cpus */
+ for(i = 0 ; i < IA64_MAXCPUS; i++)
+ ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+
+ /* NOTE : The actual irqs for the rendez, wakeup and
+ * cmc interrupts are requested in the platform-specific
+ * mca initialization code.
+ */
+ /*
+ * Register the rendezvous spinloop and wakeup mechanism with SAL
+ */
+
+ /* Register the rendezvous interrupt vector with SAL */
+ if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT,
+ SAL_MC_PARAM_MECHANISM_INT,
+ IA64_MCA_RENDEZ_INT_VECTOR,
+ IA64_MCA_RENDEZ_TIMEOUT))
+ return;
+
+ /* Register the wakeup interrupt vector with SAL */
+ if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP,
+ SAL_MC_PARAM_MECHANISM_INT,
+ IA64_MCA_WAKEUP_INT_VECTOR,
+ 0))
+ return;
+
+ MCA_DEBUG("mca_init : registered mca rendezvous spinloop and wakeup mech.\n");
+ /*
+ * Setup the correctable machine check vector
+ */
+ ia64_mca_cmc_vector_setup(IA64_CMC_INT_ENABLE,
+ IA64_MCA_CMC_INT_VECTOR);
+
+ MCA_DEBUG("mca_init : correctable mca vector setup done\n");
+
+ ia64_mc_info.imi_mca_handler = __pa(ia64_os_mca_dispatch);
+ ia64_mc_info.imi_mca_handler_size =
+ __pa(ia64_os_mca_dispatch_end) - __pa(ia64_os_mca_dispatch);
+ /* Register the os mca handler with SAL */
+ if (ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
+ ia64_mc_info.imi_mca_handler,
+ __pa(ia64_get_gp()),
+ ia64_mc_info.imi_mca_handler_size,
+ 0,0,0))
+
+ return;
+
+ MCA_DEBUG("mca_init : registered os mca handler with SAL\n");
+
+ ia64_mc_info.imi_monarch_init_handler = __pa(ia64_monarch_init_handler);
+ ia64_mc_info.imi_monarch_init_handler_size = IA64_INIT_HANDLER_SIZE;
+ ia64_mc_info.imi_slave_init_handler = __pa(ia64_slave_init_handler);
+ ia64_mc_info.imi_slave_init_handler_size = IA64_INIT_HANDLER_SIZE;
+ /* Register the os init handler with SAL */
+ if (ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
+ ia64_mc_info.imi_monarch_init_handler,
+ __pa(ia64_get_gp()),
+ ia64_mc_info.imi_monarch_init_handler_size,
+ ia64_mc_info.imi_slave_init_handler,
+ __pa(ia64_get_gp()),
+ ia64_mc_info.imi_slave_init_handler_size))
+
+
+ return;
+
+ MCA_DEBUG("mca_init : registered os init handler with SAL\n");
+
+ /* Initialize the areas set aside by the OS to buffer the
+ * platform/processor error states for MCA/INIT/CMC
+ * handling.
+ */
+ ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR);
+ ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM);
+ ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PROCESSOR);
+ ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PLATFORM);
+ ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR);
+ ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM);
+
+ mca_init_platform();
+
+ MCA_DEBUG("mca_init : platform-specific mca handling setup done\n");
+
+#if defined(MCA_TEST)
+ mca_test();
+#endif /* #if defined(MCA_TEST) */
+
+ printk("Mca related initialization done\n");
+}
+
+/*
+ * ia64_mca_wakeup_ipi_wait
+ * Wait for the inter-cpu interrupt to be sent by the
+ * monarch processor once it is done with handling the
+ * MCA.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_wakeup_ipi_wait(void)
+{
+ int irr_num = (IA64_MCA_WAKEUP_INT_VECTOR >> 6);
+ int irr_bit = (IA64_MCA_WAKEUP_INT_VECTOR & 0x3f);
+ u64 irr = 0;
+
+ do {
+ switch(irr_num) {
+ case 0:
+ irr = ia64_get_irr0();
+ break;
+ case 1:
+ irr = ia64_get_irr1();
+ break;
+ case 2:
+ irr = ia64_get_irr2();
+ break;
+ case 3:
+ irr = ia64_get_irr3();
+ break;
+ }
+ } while (!(irr & (1 << irr_bit))) ;
+}
+
+/*
+ * ia64_mca_wakeup
+ * Send an inter-cpu interrupt to wake-up a particular cpu
+ * and mark that cpu to be out of rendez.
+ * Inputs
+ * cpuid
+ * Outputs
+ * None
+ */
+void
+ia64_mca_wakeup(int cpu)
+{
+ ipi_send(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT);
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+
+}
+/*
+ * ia64_mca_wakeup_all
+ * Wakeup all the cpus which have rendez'ed previously.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_wakeup_all(void)
+{
+ int cpu;
+
+ /* Clear the Rendez checkin flag for all cpus */
+ for(cpu = 0 ; cpu < IA64_MAXCPUS; cpu++)
+ if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE)
+ ia64_mca_wakeup(cpu);
+
+}
+/*
+ * ia64_mca_rendez_interrupt_handler
+ * This is handler used to put slave processors into spinloop
+ * while the monarch processor does the mca handling and later
+ * wake each slave up once the monarch is done.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs)
+{
+ int flags;
+ /* Mask all interrupts */
+ save_and_cli(flags);
+
+ ia64_mc_info.imi_rendez_checkin[ia64_get_cpuid(0)] = IA64_MCA_RENDEZ_CHECKIN_DONE;
+ /* Register with the SAL monarch that the slave has
+ * reached SAL
+ */
+ ia64_sal_mc_rendez();
+
+ /* Wait for the wakeup IPI from the monarch
+ * This waiting is done by polling on the wakeup-interrupt
+ * vector bit in the processor's IRRs
+ */
+ ia64_mca_wakeup_ipi_wait();
+
+ /* Enable all interrupts */
+ restore_flags(flags);
+
+
+}
+
+
+/*
+ * ia64_mca_wakeup_int_handler
+ * The interrupt handler for processing the inter-cpu interrupt to the
+ * slave cpu which was spinning in the rendez loop.
+ * Since this spinning is done by turning off the interrupts and
+ * polling on the wakeup-interrupt bit in the IRR, there is
+ * nothing useful to be done in the handler.
+ * Inputs
+ * wakeup_irq (Wakeup-interrupt bit)
+ * arg (Interrupt handler specific argument)
+ * ptregs (Exception frame at the time of the interrupt)
+ * Outputs
+ *
+ */
+void
+ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs)
+{
+
+}
+
+/*
+ * ia64_return_to_sal_check
+ * This is function called before going back from the OS_MCA handler
+ * to the OS_MCA dispatch code which finally takes the control back
+ * to the SAL.
+ * The main purpose of this routine is to setup the OS_MCA to SAL
+ * return state which can be used by the OS_MCA dispatch code
+ * just before going back to SAL.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+
+void
+ia64_return_to_sal_check(void)
+{
+ /* Copy over some relevant stuff from the sal_to_os_mca_handoff
+ * so that it can be used at the time of os_mca_to_sal_handoff
+ */
+ ia64_os_to_sal_handoff_state.imots_sal_gp =
+ ia64_sal_to_os_handoff_state.imsto_sal_gp;
+
+ ia64_os_to_sal_handoff_state.imots_sal_check_ra =
+ ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
+
+ /* For now ignore the MCA */
+ ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
+}
+/*
+ * ia64_mca_ucmc_handler
+ * This is uncorrectable machine check handler called from OS_MCA
+ * dispatch code which is in turn called from SAL_CHECK().
+ * This is the place where the core of OS MCA handling is done.
+ * Right now the logs are extracted and displayed in a well-defined
+ * format. This handler code is supposed to be run only on the
+ * monarch processor. Once the monarch is done with MCA handling
+ * further MCA logging is enabled by clearing logs.
+ * Monarch also has the duty of sending wakeup-IPIs to pull the
+ * slave processors out of rendez. spinloop.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_ucmc_handler(void)
+{
+
+ /* Get the MCA processor log */
+ ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+ /* Get the MCA platform log */
+ ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk);
+
+ ia64_log_print(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+
+ /*
+ * Do some error handling - Platform-specific mca handler is called at this point
+ */
+
+ mca_handler_platform() ;
+
+ /* Clear the SAL MCA logs */
+ ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, 1, printk);
+ ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, 1, printk);
+
+ /* Wakeup all the processors which are spinning in the rendezvous
+ * loop.
+ */
+ ia64_mca_wakeup_all();
+ ia64_return_to_sal_check();
+}
+
+/*
+ * SAL to OS entry point for INIT on the monarch processor
+ * This has been defined for registration purposes with SAL
+ * as a part of mca_init.
+ */
+void
+ia64_monarch_init_handler()
+{
+}
+/*
+ * SAL to OS entry point for INIT on the slave processor
+ * This has been defined for registration purposes with SAL
+ * as a part of mca_init.
+ */
+
+void
+ia64_slave_init_handler()
+{
+}
+/*
+ * ia64_mca_cmc_int_handler
+ * This is correctable machine check interrupt handler.
+ * Right now the logs are extracted and displayed in a well-defined
+ * format.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
+{
+ /* Get the CMC processor log */
+ ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+ /* Get the CMC platform log */
+ ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk);
+
+
+ ia64_log_print(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+ cmci_handler_platform(cmc_irq, arg, ptregs);
+
+ /* Clear the CMC SAL logs now that they have been saved in the OS buffer */
+ ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR);
+ ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM);
+}
+
+/*
+ * IA64_MCA log support
+ */
+#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */
+#define IA64_MAX_LOG_TYPES 3 /* MCA, CMC, INIT */
+#define IA64_MAX_LOG_SUBTYPES 2 /* Processor, Platform */
+
+typedef struct ia64_state_log_s {
+ spinlock_t isl_lock;
+ int isl_index;
+ sal_log_header_t isl_log[IA64_MAX_LOGS];
+
+} ia64_state_log_t;
+
+static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES][IA64_MAX_LOG_SUBTYPES];
+
+#define IA64_LOG_LOCK_INIT(it, sit) spin_lock_init(&ia64_state_log[it][sit].isl_lock)
+#define IA64_LOG_LOCK(it, sit) spin_lock_irqsave(&ia64_state_log[it][sit].isl_lock, s)
+#define IA64_LOG_UNLOCK(it, sit) spin_unlock_irqrestore(&ia64_state_log[it][sit].isl_lock,\
+ s)
+#define IA64_LOG_NEXT_INDEX(it, sit) ia64_state_log[it][sit].isl_index
+#define IA64_LOG_CURR_INDEX(it, sit) 1 - ia64_state_log[it][sit].isl_index
+#define IA64_LOG_INDEX_INC(it, sit) \
+ ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index
+#define IA64_LOG_INDEX_DEC(it, sit) \
+ ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index
+#define IA64_LOG_NEXT_BUFFER(it, sit) (void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_NEXT_INDEX(it,sit)]))
+#define IA64_LOG_CURR_BUFFER(it, sit) (void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_CURR_INDEX(it,sit)]))
+
+/*
+ * ia64_log_init
+ * Reset the OS ia64 log buffer
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC})
+ * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * Outputs : None
+ */
+void
+ia64_log_init(int sal_info_type, int sal_sub_info_type)
+{
+ IA64_LOG_LOCK_INIT(sal_info_type, sal_sub_info_type);
+ IA64_LOG_NEXT_INDEX(sal_info_type, sal_sub_info_type) = 0;
+ memset(IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type), 0,
+ sizeof(sal_log_header_t) * IA64_MAX_LOGS);
+}
+
+/*
+ * ia64_log_get
+ * Get the current MCA log from SAL and copy it into the OS log buffer.
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC})
+ * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * Outputs : None
+ *
+ */
+void
+ia64_log_get(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc)
+{
+ sal_log_header_t *log_buffer;
+ int s;
+
+ IA64_LOG_LOCK(sal_info_type, sal_sub_info_type);
+
+
+ /* Get the process state information */
+ log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type);
+
+ if (ia64_sal_get_state_info(sal_info_type, sal_sub_info_type ,(u64 *)log_buffer))
+ prfunc("ia64_mca_log_get : Getting processor log failed\n");
+
+ IA64_LOG_INDEX_INC(sal_info_type, sal_sub_info_type);
+
+ IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type);
+
+}
+
+/*
+ * ia64_log_clear
+ * Clear the current MCA log from SAL and dpending on the clear_os_buffer flags
+ * clear the OS log buffer also
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC})
+ * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * clear_os_buffer
+ * prfunc (print function)
+ * Outputs : None
+ *
+ */
+void
+ia64_log_clear(int sal_info_type, int sal_sub_info_type, int clear_os_buffer, prfunc_t prfunc)
+{
+ if (ia64_sal_clear_state_info(sal_info_type, sal_sub_info_type))
+ prfunc("ia64_mca_log_get : Clearing processor log failed\n");
+
+ if (clear_os_buffer) {
+ sal_log_header_t *log_buffer;
+ int s;
+
+ IA64_LOG_LOCK(sal_info_type, sal_sub_info_type);
+
+ /* Get the process state information */
+ log_buffer = IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type);
+
+ memset(log_buffer, 0, sizeof(sal_log_header_t));
+
+ IA64_LOG_INDEX_DEC(sal_info_type, sal_sub_info_type);
+
+ IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type);
+ }
+
+}
+
+/*
+ * ia64_log_processor_regs_print
+ * Print the contents of the saved processor register(s) in the format
+ * <reg_prefix>[<index>] <value>
+ *
+ * Inputs : regs (Register save buffer)
+ * reg_num (# of registers)
+ * reg_class (application/banked/control/bank1_general)
+ * reg_prefix (ar/br/cr/b1_gr)
+ * Outputs : None
+ *
+ */
+void
+ia64_log_processor_regs_print(u64 *regs,
+ int reg_num,
+ char *reg_class,
+ char *reg_prefix,
+ prfunc_t prfunc)
+{
+ int i;
+
+ prfunc("+%s Registers\n", reg_class);
+ for (i = 0; i < reg_num; i++)
+ prfunc("+ %s[%d] 0x%lx\n", reg_prefix, i, regs[i]);
+}
+
+static char *pal_mesi_state[] = {
+ "Invalid",
+ "Shared",
+ "Exclusive",
+ "Modified",
+ "Reserved1",
+ "Reserved2",
+ "Reserved3",
+ "Reserved4"
+};
+
+static char *pal_cache_op[] = {
+ "Unknown",
+ "Move in",
+ "Cast out",
+ "Coherency check",
+ "Internal",
+ "Instruction fetch",
+ "Implicit Writeback",
+ "Reserved"
+};
+
+/*
+ * ia64_log_cache_check_info_print
+ * Display the machine check information related to cache error(s).
+ * Inputs : i (Multiple errors are logged, i - index of logged error)
+ * info (Machine check info logged by the PAL and later
+ * captured by the SAL)
+ * target_addr (Address which caused the cache error)
+ * Outputs : None
+ */
+void
+ia64_log_cache_check_info_print(int i,
+ pal_cache_check_info_t info,
+ u64 target_addr,
+ prfunc_t prfunc)
+{
+ prfunc("+ Cache check info[%d]\n+", i);
+ prfunc(" Level: L%d",info.level);
+ if (info.mv)
+ prfunc(" ,Mesi: %s",pal_mesi_state[info.mesi]);
+ prfunc(" ,Index: %d,", info.index);
+ if (info.ic)
+ prfunc(" ,Cache: Instruction");
+ if (info.dc)
+ prfunc(" ,Cache: Data");
+ if (info.tl)
+ prfunc(" ,Line: Tag");
+ if (info.dl)
+ prfunc(" ,Line: Data");
+ prfunc(" ,Operation: %s,", pal_cache_op[info.op]);
+ if (info.wv)
+ prfunc(" ,Way: %d,", info.way);
+ if (info.tv)
+ prfunc(" ,Target Addr: 0x%lx", target_addr);
+ if (info.mc)
+ prfunc(" ,MC: Corrected");
+ prfunc("\n");
+}
+
+/*
+ * ia64_log_tlb_check_info_print
+ * Display the machine check information related to tlb error(s).
+ * Inputs : i (Multiple errors are logged, i - index of logged error)
+ * info (Machine check info logged by the PAL and later
+ * captured by the SAL)
+ * Outputs : None
+ */
+
+void
+ia64_log_tlb_check_info_print(int i,
+ pal_tlb_check_info_t info,
+ prfunc_t prfunc)
+{
+ prfunc("+ TLB Check Info [%d]\n+", i);
+ if (info.itc)
+ prfunc(" Failure: Instruction Translation Cache");
+ if (info.dtc)
+ prfunc(" Failure: Data Translation Cache");
+ if (info.itr) {
+ prfunc(" Failure: Instruction Translation Register");
+ prfunc(" ,Slot: %d", info.tr_slot);
+ }
+ if (info.dtr) {
+ prfunc(" Failure: Data Translation Register");
+ prfunc(" ,Slot: %d", info.tr_slot);
+ }
+ if (info.mc)
+ prfunc(" ,MC: Corrected");
+ prfunc("\n");
+}
+
+/*
+ * ia64_log_bus_check_info_print
+ * Display the machine check information related to bus error(s).
+ * Inputs : i (Multiple errors are logged, i - index of logged error)
+ * info (Machine check info logged by the PAL and later
+ * captured by the SAL)
+ * req_addr (Address of the requestor of the transaction)
+ * resp_addr (Address of the responder of the transaction)
+ * target_addr (Address where the data was to be delivered to or
+ * obtained from)
+ * Outputs : None
+ */
+void
+ia64_log_bus_check_info_print(int i,
+ pal_bus_check_info_t info,
+ u64 req_addr,
+ u64 resp_addr,
+ u64 targ_addr,
+ prfunc_t prfunc)
+{
+ prfunc("+ BUS Check Info [%d]\n+", i);
+ prfunc(" Status Info: %d", info.bsi);
+ prfunc(" ,Severity: %d", info.sev);
+ prfunc(" ,Transaction Type: %d", info.type);
+ prfunc(" ,Transaction Size: %d", info.size);
+ if (info.cc)
+ prfunc(" ,Cache-cache-transfer");
+ if (info.ib)
+ prfunc(" ,Error: Internal");
+ if (info.eb)
+ prfunc(" ,Error: External");
+ if (info.mc)
+ prfunc(" ,MC: Corrected");
+ if (info.tv)
+ prfunc(" ,Target Address: 0x%lx", targ_addr);
+ if (info.rq)
+ prfunc(" ,Requestor Address: 0x%lx", req_addr);
+ if (info.tv)
+ prfunc(" ,Responder Address: 0x%lx", resp_addr);
+ prfunc("\n");
+}
+
+/*
+ * ia64_log_processor_info_print
+ * Display the processor-specific information logged by PAL as a part
+ * of MCA or INIT or CMC.
+ * Inputs : lh (Pointer of the sal log header which specifies the format
+ * of SAL state info as specified by the SAL spec).
+ * Outputs : None
+ */
+void
+ia64_log_processor_info_print(sal_log_header_t *lh, prfunc_t prfunc)
+{
+ sal_log_processor_info_t *slpi;
+ int i;
+
+ if (!lh)
+ return;
+
+ if (lh->slh_log_type != SAL_SUB_INFO_TYPE_PROCESSOR)
+ return;
+
+#if defined(MCA_TEST)
+ slpi = &slpi_buf;
+#else
+ slpi = (sal_log_processor_info_t *)lh->slh_log_dev_spec_info;
+#endif /#if defined(MCA_TEST) */
+
+ if (!slpi) {
+ prfunc("No Processor Error Log found\n");
+ return;
+ }
+
+ /* Print branch register contents if valid */
+ if (slpi->slpi_valid.slpi_br)
+ ia64_log_processor_regs_print(slpi->slpi_br, 8, "Branch", "br", prfunc);
+
+ /* Print control register contents if valid */
+ if (slpi->slpi_valid.slpi_cr)
+ ia64_log_processor_regs_print(slpi->slpi_cr, 128, "Control", "cr", prfunc);
+
+ /* Print application register contents if valid */
+ if (slpi->slpi_valid.slpi_ar)
+ ia64_log_processor_regs_print(slpi->slpi_br, 128, "Application", "ar", prfunc);
+
+ /* Print region register contents if valid */
+ if (slpi->slpi_valid.slpi_rr)
+ ia64_log_processor_regs_print(slpi->slpi_rr, 8, "Region", "rr", prfunc);
+
+ /* Print floating-point register contents if valid */
+ if (slpi->slpi_valid.slpi_fr)
+ ia64_log_processor_regs_print(slpi->slpi_fr, 128, "Floating-point", "fr",
+ prfunc);
+
+ /* Print bank1-gr NAT register contents if valid */
+ ia64_log_processor_regs_print(&slpi->slpi_bank1_nat_bits, 1, "NAT", "nat", prfunc);
+
+ /* Print bank 1 register contents if valid */
+ if (slpi->slpi_valid.slpi_bank1_gr)
+ ia64_log_processor_regs_print(slpi->slpi_bank1_gr, 16, "Bank1-General", "gr",
+ prfunc);
+
+ /* Print the cache check information if any*/
+ for (i = 0 ; i < MAX_CACHE_ERRORS; i++)
+ ia64_log_cache_check_info_print(i,
+ slpi->slpi_cache_check_info[i].slpi_cache_check,
+ slpi->slpi_cache_check_info[i].slpi_target_address,
+ prfunc);
+ /* Print the tlb check information if any*/
+ for (i = 0 ; i < MAX_TLB_ERRORS; i++)
+ ia64_log_tlb_check_info_print(i,slpi->slpi_tlb_check_info[i], prfunc);
+
+ /* Print the bus check information if any*/
+ for (i = 0 ; i < MAX_BUS_ERRORS; i++)
+ ia64_log_bus_check_info_print(i,
+ slpi->slpi_bus_check_info[i].slpi_bus_check,
+ slpi->slpi_bus_check_info[i].slpi_requestor_addr,
+ slpi->slpi_bus_check_info[i].slpi_responder_addr,
+ slpi->slpi_bus_check_info[i].slpi_target_addr,
+ prfunc);
+
+}
+
+/*
+ * ia64_log_print
+ * Display the contents of the OS error log information
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC})
+ * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * Outputs : None
+ */
+void
+ia64_log_print(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc)
+{
+ char *info_type, *sub_info_type;
+
+ switch(sal_info_type) {
+ case SAL_INFO_TYPE_MCA:
+ info_type = "MCA";
+ break;
+ case SAL_INFO_TYPE_INIT:
+ info_type = "INIT";
+ break;
+ case SAL_INFO_TYPE_CMC:
+ info_type = "CMC";
+ break;
+ default:
+ info_type = "UNKNOWN";
+ break;
+ }
+
+ switch(sal_sub_info_type) {
+ case SAL_SUB_INFO_TYPE_PROCESSOR:
+ sub_info_type = "PROCESSOR";
+ break;
+ case SAL_SUB_INFO_TYPE_PLATFORM:
+ sub_info_type = "PLATFORM";
+ break;
+ default:
+ sub_info_type = "UNKNOWN";
+ break;
+ }
+
+ prfunc("+BEGIN HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type);
+ if (sal_sub_info_type == SAL_SUB_INFO_TYPE_PROCESSOR)
+ ia64_log_processor_info_print(
+ IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type),
+ prfunc);
+ else
+ log_print_platform(IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type),prfunc);
+ prfunc("+END HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type);
+}
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
new file mode 100644
index 000000000..3d49ac06e
--- /dev/null
+++ b/arch/ia64/kernel/mca_asm.S
@@ -0,0 +1,621 @@
+#include <asm/processor.h>
+#include <asm/mcaasm.h>
+#include <asm/page.h>
+#include <asm/mca.h>
+
+ .psr abi64
+ .psr lsb
+ .lsb
+
+/*
+ * SAL_TO_OS_MCA_HANDOFF_STATE
+ * 1. GR1 = OS GP
+ * 2. GR8 = PAL_PROC physical address
+ * 3. GR9 = SAL_PROC physical address
+ * 4. GR10 = SAL GP (physical)
+ * 5. GR11 = Rendez state
+ * 6. GR12 = Return address to location within SAL_CHECK
+ */
+#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp) \
+ movl _tmp=ia64_sal_to_os_handoff_state;; \
+ st8 [_tmp]=r1,0x08;; \
+ st8 [_tmp]=r8,0x08;; \
+ st8 [_tmp]=r9,0x08;; \
+ st8 [_tmp]=r10,0x08;; \
+ st8 [_tmp]=r11,0x08;; \
+ st8 [_tmp]=r12,0x08;;
+
+/*
+ * OS_MCA_TO_SAL_HANDOFF_STATE
+ * 1. GR8 = OS_MCA status
+ * 2. GR9 = SAL GP (physical)
+ * 3. GR22 = New min state save area pointer
+ */
+#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \
+ movl _tmp=ia64_os_to_sal_handoff_state;; \
+ DATA_VA_TO_PA(_tmp);; \
+ ld8 r8=[_tmp],0x08;; \
+ ld8 r9=[_tmp],0x08;; \
+ ld8 r22=[_tmp],0x08;;
+
+/*
+ * BRANCH
+ * Jump to the instruction referenced by
+ * "to_label".
+ * Branch is taken only if the predicate
+ * register "p" is true.
+ * "ip" is the address of the instruction
+ * located at "from_label".
+ * "temp" is a scratch register like r2
+ * "adjust" needed for HP compiler.
+ * A screwup somewhere with constant arithmetic.
+ */
+#define BRANCH(to_label, temp, p, adjust) \
+100: (p) mov temp=ip; \
+ ;; \
+ (p) adds temp=to_label-100b,temp;\
+ (p) adds temp=adjust,temp; \
+ (p) mov b1=temp ; \
+ (p) br b1
+
+ .global ia64_os_mca_dispatch
+ .global ia64_os_mca_dispatch_end
+ .global ia64_sal_to_os_handoff_state
+ .global ia64_os_to_sal_handoff_state
+ .global ia64_os_mca_ucmc_handler
+ .global ia64_mca_proc_state_dump
+ .global ia64_mca_proc_state_restore
+ .global ia64_mca_stack
+ .global ia64_mca_stackframe
+ .global ia64_mca_bspstore
+
+ .text
+ .align 16
+
+ia64_os_mca_dispatch:
+
+#if defined(MCA_TEST)
+ // Pretend that we are in interrupt context
+ mov r2=psr
+ dep r2=0, r2, PSR_IC, 2;
+ mov psr.l = r2
+#endif /* #if defined(MCA_TEST) */
+
+ // Save the SAL to OS MCA handoff state as defined
+ // by SAL SPEC 2.5
+ // NOTE : The order in which the state gets saved
+ // is dependent on the way the C-structure
+ // for ia64_mca_sal_to_os_state_t has been
+ // defined in include/asm/mca.h
+ SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+
+ // LOG PROCESSOR STATE INFO FROM HERE ON..
+ ;;
+begin_os_mca_dump:
+ BRANCH(ia64_os_mca_proc_state_dump, r2, p0, 0x0)
+ ;;
+ia64_os_mca_done_dump:
+
+ // Setup new stack frame for OS_MCA handling
+ movl r2=ia64_mca_bspstore // local bspstore area location in r2
+ movl r3=ia64_mca_stackframe // save stack frame to memory in r3
+ rse_switch_context(r6,r3,r2);; // RSC management in this new context
+ movl r12=ia64_mca_stack;;
+
+ // Enter virtual mode from physical mode
+ VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
+ia64_os_mca_virtual_begin:
+
+ // call our handler
+ movl r2=ia64_mca_ucmc_handler;;
+ mov b6=r2;;
+ br.call.sptk.few b0=b6
+ ;;
+
+ // Revert back to physical mode before going back to SAL
+ PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
+ia64_os_mca_virtual_end:
+
+#if defined(MCA_TEST)
+ // Pretend that we are in interrupt context
+ mov r2=psr
+ dep r2=0, r2, PSR_IC, 2;
+ mov psr.l = r2
+#endif /* #if defined(MCA_TEST) */
+
+ // restore the original stack frame here
+ movl r2=ia64_mca_stackframe // restore stack frame from memory at r2
+ ;;
+ DATA_VA_TO_PA(r2)
+ movl r4=IA64_PSR_MC
+ ;;
+ rse_return_context(r4,r3,r2) // switch from interrupt context for RSE
+
+ // let us restore all the registers from our PSI structure
+ mov r8=gp
+ ;;
+begin_os_mca_restore:
+ BRANCH(ia64_os_mca_proc_state_restore, r2, p0, 0x0)
+ ;;
+
+ia64_os_mca_done_restore:
+ ;;
+#ifdef SOFTSDV
+ VIRTUAL_MODE_ENTER(r2,r3, vmode_enter, r4)
+vmode_enter:
+ br.ret.sptk.few b0
+#else
+ // branch back to SALE_CHECK
+ OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2)
+ ld8 r3=[r2];;
+ mov b0=r3 // SAL_CHECK return address
+ br b0
+ ;;
+#endif /* #ifdef SOFTSDV */
+ia64_os_mca_dispatch_end:
+//EndMain//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+// ia64_os_mca_proc_state_dump()
+//
+// Stub Description:
+//
+// This stub dumps the processor state during MCHK to a data area
+//
+//--
+
+ia64_os_mca_proc_state_dump:
+// Get and save GR0-31 from Proc. Min. State Save Area to SAL PSI
+ movl r2=ia64_mca_proc_state_dump;; // Os state dump area
+
+// save ar.NaT
+ mov r5=ar.unat // ar.unat
+
+// save banked GRs 16-31 along with NaT bits
+ bsw.1;;
+ st8.spill [r2]=r16,8;;
+ st8.spill [r2]=r17,8;;
+ st8.spill [r2]=r18,8;;
+ st8.spill [r2]=r19,8;;
+ st8.spill [r2]=r20,8;;
+ st8.spill [r2]=r21,8;;
+ st8.spill [r2]=r22,8;;
+ st8.spill [r2]=r23,8;;
+ st8.spill [r2]=r24,8;;
+ st8.spill [r2]=r25,8;;
+ st8.spill [r2]=r26,8;;
+ st8.spill [r2]=r27,8;;
+ st8.spill [r2]=r28,8;;
+ st8.spill [r2]=r29,8;;
+ st8.spill [r2]=r30,8;;
+ st8.spill [r2]=r31,8;;
+
+ mov r4=ar.unat;;
+ st8 [r2]=r4,8 // save User NaT bits for r16-r31
+ mov ar.unat=r5 // restore original unat
+ bsw.0;;
+
+//save BRs
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r4
+
+ mov r3=b0
+ mov r5=b1
+ mov r7=b2;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=b3
+ mov r5=b4
+ mov r7=b5;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=b6
+ mov r5=b7;;
+ st8 [r2]=r3,2*8
+ st8 [r4]=r5,2*8;;
+
+cSaveCRs:
+// save CRs
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r4
+
+ mov r3=cr0 // cr.dcr
+ mov r5=cr1 // cr.itm
+ mov r7=cr2;; // cr.iva
+
+ st8 [r2]=r3,8*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;; // 48 byte rements
+
+ mov r3=cr8;; // cr.pta
+ st8 [r2]=r3,8*8;; // 64 byte rements
+
+// if PSR.ic=0, reading interruption registers causes an illegal operation fault
+ mov r3=psr;;
+ tbit.nz.unc p2,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test
+(p2) st8 [r2]=r0,9*8+160 // increment by 168 byte inc.
+begin_skip_intr_regs:
+ BRANCH(SkipIntrRegs, r9, p2, 0x0)
+ ;;
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r6
+
+ mov r3=cr16 // cr.ipsr
+ mov r5=cr17 // cr.isr
+ mov r7=r0;; // cr.ida => cr18
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr19 // cr.iip
+ mov r5=cr20 // cr.idtr
+ mov r7=cr21;; // cr.iitr
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr22 // cr.iipa
+ mov r5=cr23 // cr.ifs
+ mov r7=cr24;; // cr.iim
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr25;; // cr.iha
+ st8 [r2]=r3,160;; // 160 byte rement
+
+SkipIntrRegs:
+ st8 [r2]=r0,168 // another 168 byte .
+
+ mov r3=cr66;; // cr.lid
+ st8 [r2]=r3,40 // 40 byte rement
+
+ mov r3=cr71;; // cr.ivr
+ st8 [r2]=r3,8
+
+ mov r3=cr72;; // cr.tpr
+ st8 [r2]=r3,24 // 24 byte increment
+
+ mov r3=r0;; // cr.eoi => cr75
+ st8 [r2]=r3,168 // 168 byte inc.
+
+ mov r3=r0;; // cr.irr0 => cr96
+ st8 [r2]=r3,16 // 16 byte inc.
+
+ mov r3=r0;; // cr.irr1 => cr98
+ st8 [r2]=r3,16 // 16 byte inc.
+
+ mov r3=r0;; // cr.irr2 => cr100
+ st8 [r2]=r3,16 // 16 byte inc
+
+ mov r3=r0;; // cr.irr3 => cr100
+ st8 [r2]=r3,16 // 16b inc.
+
+ mov r3=r0;; // cr.itv => cr114
+ st8 [r2]=r3,16 // 16 byte inc.
+
+ mov r3=r0;; // cr.pmv => cr116
+ st8 [r2]=r3,8
+
+ mov r3=r0;; // cr.lrr0 => cr117
+ st8 [r2]=r3,8
+
+ mov r3=r0;; // cr.lrr1 => cr118
+ st8 [r2]=r3,8
+
+ mov r3=r0;; // cr.cmcv => cr119
+ st8 [r2]=r3,8*10;;
+
+cSaveARs:
+// save ARs
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r6
+
+ mov r3=ar0 // ar.kro
+ mov r5=ar1 // ar.kr1
+ mov r7=ar2;; // ar.kr2
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=ar3 // ar.kr3
+ mov r5=ar4 // ar.kr4
+ mov r7=ar5;; // ar.kr5
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=ar6 // ar.kr6
+ mov r5=ar7 // ar.kr7
+ mov r7=r0;; // ar.kr8
+ st8 [r2]=r3,10*8
+ st8 [r4]=r5,10*8
+ st8 [r6]=r7,10*8;; // rement by 72 bytes
+
+ mov r3=ar16 // ar.rsc
+ mov ar16=r0 // put RSE in enforced lazy mode
+ mov r5=ar17 // ar.bsp
+ mov r7=ar18;; // ar.bspstore
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=ar19;; // ar.rnat
+ st8 [r2]=r3,8*13 // increment by 13x8 bytes
+
+ mov r3=ar32;; // ar.ccv
+ st8 [r2]=r3,8*4
+
+ mov r3=ar36;; // ar.unat
+ st8 [r2]=r3,8*4
+
+ mov r3=ar40;; // ar.fpsr
+ st8 [r2]=r3,8*4
+
+ mov r3=ar44;; // ar.itc
+ st8 [r2]=r3,160 // 160
+
+ mov r3=ar64;; // ar.pfs
+ st8 [r2]=r3,8
+
+ mov r3=ar65;; // ar.lc
+ st8 [r2]=r3,8
+
+ mov r3=ar66;; // ar.ec
+ st8 [r2]=r3
+ add r2=8*62,r2 //padding
+
+// save RRs
+ mov ar.lc=0x08-1
+ movl r4=0x00;;
+
+cStRR:
+ mov r3=rr[r4];;
+ st8 [r2]=r3,8
+ add r4=1,r4
+ br.cloop.sptk.few cStRR
+ ;;
+end_os_mca_dump:
+ BRANCH(ia64_os_mca_done_dump, r2, p0, -0x10)
+ ;;
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+// ia64_os_mca_proc_state_restore()
+//
+// Stub Description:
+//
+// This is a stub to restore the saved processor state during MCHK
+//
+//--
+
+ia64_os_mca_proc_state_restore:
+
+// Restore bank1 GR16-31
+ movl r2=ia64_mca_proc_state_dump // Convert virtual address
+ ;; // of OS state dump area
+ DATA_VA_TO_PA(r2) // to physical address
+ ;;
+restore_GRs: // restore bank-1 GRs 16-31
+ bsw.1;;
+ add r3=16*8,r2;; // to get to NaT of GR 16-31
+ ld8 r3=[r3];;
+ mov ar.unat=r3;; // first restore NaT
+
+ ld8.fill r16=[r2],8;;
+ ld8.fill r17=[r2],8;;
+ ld8.fill r18=[r2],8;;
+ ld8.fill r19=[r2],8;;
+ ld8.fill r20=[r2],8;;
+ ld8.fill r21=[r2],8;;
+ ld8.fill r22=[r2],8;;
+ ld8.fill r23=[r2],8;;
+ ld8.fill r24=[r2],8;;
+ ld8.fill r25=[r2],8;;
+ ld8.fill r26=[r2],8;;
+ ld8.fill r27=[r2],8;;
+ ld8.fill r28=[r2],8;;
+ ld8.fill r29=[r2],8;;
+ ld8.fill r30=[r2],8;;
+ ld8.fill r31=[r2],8;;
+
+ ld8 r3=[r2],8;; // increment to skip NaT
+ bsw.0;;
+
+restore_BRs:
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov b0=r3
+ mov b1=r5
+ mov b2=r7;;
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov b3=r3
+ mov b4=r5
+ mov b5=r7;;
+
+ ld8 r3=[r2],2*8
+ ld8 r5=[r4],2*8;;
+ mov b6=r3
+ mov b7=r5;;
+
+restore_CRs:
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],8*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;; // 48 byte increments
+ mov cr0=r3 // cr.dcr
+ mov cr1=r5 // cr.itm
+ mov cr2=r7;; // cr.iva
+
+ ld8 r3=[r2],8*8;; // 64 byte increments
+// mov cr8=r3 // cr.pta
+
+
+// if PSR.ic=1, reading interruption registers causes an illegal operation fault
+ mov r3=psr;;
+ tbit.nz.unc p2,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test
+(p2) st8 [r2]=r0,9*8+160 // increment by 160 byte inc.
+
+begin_rskip_intr_regs:
+ BRANCH(rSkipIntrRegs, r9, p2, 0x0)
+ ;;
+
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov cr16=r3 // cr.ipsr
+ mov cr17=r5 // cr.isr is read only
+// mov cr18=r7;; // cr.ida
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov cr19=r3 // cr.iip
+ mov cr20=r5 // cr.idtr
+ mov cr21=r7;; // cr.iitr
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov cr22=r3 // cr.iipa
+ mov cr23=r5 // cr.ifs
+ mov cr24=r7 // cr.iim
+
+ ld8 r3=[r2],160;; // 160 byte increment
+ mov cr25=r3 // cr.iha
+
+rSkipIntrRegs:
+ ld8 r3=[r2],168;; // another 168 byte inc.
+
+ ld8 r3=[r2],40;; // 40 byte increment
+ mov cr66=r3 // cr.lid
+
+ ld8 r3=[r2],8;;
+// mov cr71=r3 // cr.ivr is read only
+ ld8 r3=[r2],24;; // 24 byte increment
+ mov cr72=r3 // cr.tpr
+
+ ld8 r3=[r2],168;; // 168 byte inc.
+// mov cr75=r3 // cr.eoi
+
+ ld8 r3=[r2],16;; // 16 byte inc.
+// mov cr96=r3 // cr.irr0 is read only
+
+ ld8 r3=[r2],16;; // 16 byte inc.
+// mov cr98=r3 // cr.irr1 is read only
+
+ ld8 r3=[r2],16;; // 16 byte inc
+// mov cr100=r3 // cr.irr2 is read only
+
+ ld8 r3=[r2],16;; // 16b inc.
+// mov cr102=r3 // cr.irr3 is read only
+
+ ld8 r3=[r2],16;; // 16 byte inc.
+// mov cr114=r3 // cr.itv
+
+ ld8 r3=[r2],8;;
+// mov cr116=r3 // cr.pmv
+ ld8 r3=[r2],8;;
+// mov cr117=r3 // cr.lrr0
+ ld8 r3=[r2],8;;
+// mov cr118=r3 // cr.lrr1
+ ld8 r3=[r2],8*10;;
+// mov cr119=r3 // cr.cmcv
+
+restore_ARs:
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov ar0=r3 // ar.kro
+ mov ar1=r5 // ar.kr1
+ mov ar2=r7;; // ar.kr2
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov ar3=r3 // ar.kr3
+ mov ar4=r5 // ar.kr4
+ mov ar5=r7;; // ar.kr5
+
+ ld8 r3=[r2],10*8
+ ld8 r5=[r4],10*8
+ ld8 r7=[r6],10*8;;
+ mov ar6=r3 // ar.kr6
+ mov ar7=r5 // ar.kr7
+// mov ar8=r6 // ar.kr8
+ ;;
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+// mov ar16=r3 // ar.rsc
+// mov ar17=r5 // ar.bsp is read only
+ mov ar16=r0 // make sure that RSE is in enforced lazy mode
+ mov ar18=r7;; // ar.bspstore
+
+ ld8 r9=[r2],8*13;;
+ mov ar19=r9 // ar.rnat
+
+ mov ar16=r3 // ar.rsc
+ ld8 r3=[r2],8*4;;
+ mov ar32=r3 // ar.ccv
+
+ ld8 r3=[r2],8*4;;
+ mov ar36=r3 // ar.unat
+
+ ld8 r3=[r2],8*4;;
+ mov ar40=r3 // ar.fpsr
+
+ ld8 r3=[r2],160;; // 160
+// mov ar44=r3 // ar.itc
+
+ ld8 r3=[r2],8;;
+ mov ar64=r3 // ar.pfs
+
+ ld8 r3=[r2],8;;
+ mov ar65=r3 // ar.lc
+
+ ld8 r3=[r2];;
+ mov ar66=r3 // ar.ec
+ add r2=8*62,r2;; // padding
+
+restore_RRs:
+ mov r5=ar.lc
+ mov ar.lc=0x08-1
+ movl r4=0x00
+cStRRr:
+ ld8 r3=[r2],8;;
+// mov rr[r4]=r3 // what are its access previledges?
+ add r4=1,r4
+ br.cloop.sptk.few cStRRr
+ ;;
+ mov ar.lc=r5
+ ;;
+end_os_mca_restore:
+ BRANCH(ia64_os_mca_done_restore, r2, p0, -0x20)
+ ;;
+//EndStub//////////////////////////////////////////////////////////////////////
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
new file mode 100644
index 000000000..1506bacc2
--- /dev/null
+++ b/arch/ia64/kernel/pal.S
@@ -0,0 +1,119 @@
+/*
+ * PAL Firmware support
+ * IA-64 Processor Programmers Reference Vol 2
+ *
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com>
+ */
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .data
+pal_entry_point:
+ data8 ia64_pal_default_handler
+ .text
+
+/*
+ * Set the PAL entry point address. This could be written in C code, but we do it here
+ * to keep it all in one module (besides, it's so trivial that it's
+ * not a big deal).
+ *
+ * in0 Address of the PAL entry point (text address, NOT a function descriptor).
+ */
+ .align 16
+ .global ia64_pal_handler_init
+ .proc ia64_pal_handler_init
+ia64_pal_handler_init:
+ alloc r3=ar.pfs,1,0,0,0
+ movl r2=pal_entry_point
+ ;;
+ st8 [r2]=in0
+ br.ret.sptk.few rp
+
+ .endp ia64_pal_handler_init
+
+/*
+ * Default PAL call handler. This needs to be coded in assembly because it uses
+ * the static calling convention, i.e., the RSE may not be used and calls are
+ * done via "br.cond" (not "br.call").
+ */
+ .align 16
+ .global ia64_pal_default_handler
+ .proc ia64_pal_default_handler
+ia64_pal_default_handler:
+ mov r8=-1
+ br.cond.sptk.few rp
+
+/*
+ * Make a PAL call using the static calling convention.
+ *
+ * in0 Pointer to struct ia64_pal_retval
+ * in1 Index of PAL service
+ * in2 - in4 Remaning PAL arguments
+ *
+ */
+
+#ifdef __GCC_MULTIREG_RETVALS__
+# define arg0 in0
+# define arg1 in1
+# define arg2 in2
+# define arg3 in3
+# define arg4 in4
+#else
+# define arg0 in1
+# define arg1 in2
+# define arg2 in3
+# define arg3 in4
+# define arg4 in5
+#endif
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 16
+ .global ia64_pal_call_static
+ .proc ia64_pal_call_static
+ia64_pal_call_static:
+ alloc loc0 = ar.pfs,6,90,0,0
+ movl loc2 = pal_entry_point
+1: {
+ mov r28 = arg0
+ mov r29 = arg1
+ mov r8 = ip
+ }
+ ;;
+ ld8 loc2 = [loc2] // loc2 <- entry point
+ mov r30 = arg2
+ mov r31 = arg3
+ ;;
+ mov loc3 = psr
+ mov loc1 = rp
+ adds r8 = .ret0-1b,r8
+ ;;
+ rsm psr.i
+ mov b7 = loc2
+ mov rp = r8
+ ;;
+ br.cond.sptk.few b7
+.ret0: mov psr.l = loc3
+#ifndef __GCC_MULTIREG_RETVALS__
+ st8 [in0] = r8, 8
+ ;;
+ st8 [in0] = r9, 8
+ ;;
+ st8 [in0] = r10, 8
+ ;;
+ st8 [in0] = r11, 8
+#endif
+ mov ar.pfs = loc0
+ mov rp = loc1
+ ;;
+ srlz.d // seralize restoration of psr.l
+ br.ret.sptk.few b0
+ .endp ia64_pal_call_static
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
new file mode 100644
index 000000000..f86f45537
--- /dev/null
+++ b/arch/ia64/kernel/pci-dma.c
@@ -0,0 +1,56 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is for IA-64 platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ *
+ * XXX This doesn't do the right thing yet. It appears we would have
+ * to add additional zones so we can implement the various address
+ * mask constraints that we might encounter. A zone for memory < 32
+ * bits is obviously necessary...
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+
+#include <asm/io.h>
+
+/* Pure 2^n version of get_order */
+extern __inline__ unsigned long
+get_order (unsigned long size)
+{
+ unsigned long order = ia64_fls(size);
+
+ printk ("get_order: size=%lu, order=%lu\n", size, order);
+
+ if (order > PAGE_SHIFT)
+ order -= PAGE_SHIFT;
+ else
+ order = 0;
+ return order;
+}
+
+void *
+pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
+{
+ void *ret;
+ int gfp = GFP_ATOMIC;
+
+ if (!hwdev || hwdev->dma_mask != 0xffffffff)
+ gfp |= GFP_DMA;
+ ret = (void *)__get_free_pages(gfp, get_order(size));
+
+ if (ret) {
+ memset(ret, 0, size);
+ *dma_handle = virt_to_bus(ret);
+ }
+ return ret;
+}
+
+void
+pci_free_consistent (struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+ free_pages((unsigned long) vaddr, get_order(size));
+}
diff --git a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c
new file mode 100644
index 000000000..3bceeed8e
--- /dev/null
+++ b/arch/ia64/kernel/pci.c
@@ -0,0 +1,239 @@
+/*
+ * pci.c - Low-Level PCI Access in IA64
+ *
+ * Derived from bios32.c of i386 tree.
+ *
+ */
+
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/malloc.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <asm/sal.h>
+
+
+#ifdef CONFIG_SMP
+# include <asm/smp.h>
+#endif
+#include <asm/irq.h>
+
+
+#undef DEBUG
+#define DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+
+spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
+
+struct pci_fixup pcibios_fixups[] = { { 0 } };
+
+#define PCI_NO_CHECKS 0x400
+#define PCI_NO_PEER_FIXUP 0x800
+
+static unsigned int pci_probe = PCI_NO_CHECKS;
+
+/* Macro to build a PCI configuration address to be passed as a parameter to SAL. */
+
+#define PCI_CONFIG_ADDRESS(dev, where) (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff))
+
+static int
+pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ s64 status;
+ u64 lval;
+
+ status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 1, &lval);
+ *value = lval;
+ return status;
+}
+
+static int
+pci_conf_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ s64 status;
+ u64 lval;
+
+ status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 2, &lval);
+ *value = lval;
+ return status;
+}
+
+static int
+pci_conf_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ s64 status;
+ u64 lval;
+
+ status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 4, &lval);
+ *value = lval;
+ return status;
+}
+
+static int
+pci_conf_write_config_byte (struct pci_dev *dev, int where, u8 value)
+{
+ return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 1, value);
+}
+
+static int
+pci_conf_write_config_word (struct pci_dev *dev, int where, u16 value)
+{
+ return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 2, value);
+}
+
+static int
+pci_conf_write_config_dword (struct pci_dev *dev, int where, u32 value)
+{
+ return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 4, value);
+}
+
+
+static struct pci_ops pci_conf = {
+ pci_conf_read_config_byte,
+ pci_conf_read_config_word,
+ pci_conf_read_config_dword,
+ pci_conf_write_config_byte,
+ pci_conf_write_config_word,
+ pci_conf_write_config_dword
+};
+
+/*
+ * Try to find PCI BIOS. This will always work for IA64.
+ */
+
+static struct pci_ops * __init
+pci_find_bios(void)
+{
+ return &pci_conf;
+}
+
+/*
+ * Initialization. Uses the SAL interface
+ */
+
+#define PCI_BUSSES_TO_SCAN 2 /* On "real" ;) hardware this will be 255 */
+
+void __init
+pcibios_init(void)
+{
+ struct pci_ops *ops = NULL;
+ int i;
+
+ if ((ops = pci_find_bios()) == NULL) {
+ printk("PCI: No PCI bus detected\n");
+ return;
+ }
+
+ printk("PCI: Probing PCI hardware\n");
+ for (i = 0; i < PCI_BUSSES_TO_SCAN; i++)
+ pci_scan_bus(i, ops, NULL);
+ platform_pci_fixup();
+ return;
+}
+
+/*
+ * Called after each bus is probed, but before its children
+ * are examined.
+ */
+
+void __init
+pcibios_fixup_bus(struct pci_bus *b)
+{
+ return;
+}
+
+int
+pci_assign_resource (struct pci_dev *dev, int i)
+{
+ printk("pci_assign_resource: not implemented!\n");
+ return -ENODEV;
+}
+
+void __init
+pcibios_update_resource(struct pci_dev *dev, struct resource *root,
+ struct resource *res, int resource)
+{
+ unsigned long where, size;
+ u32 reg;
+
+ where = PCI_BASE_ADDRESS_0 + (resource * 4);
+ size = res->end - res->start;
+ pci_read_config_dword(dev, where, &reg);
+ reg = (reg & size) | (((u32)(res->start - root->start)) & ~size);
+ pci_write_config_dword(dev, where, reg);
+
+ /* ??? FIXME -- record old value for shutdown. */
+}
+
+void __init
+pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+
+ /* ??? FIXME -- record old value for shutdown. */
+}
+
+void __init
+pcibios_fixup_pbus_ranges (struct pci_bus * bus, struct pbus_set_ranges_data * ranges)
+{
+ ranges->io_start -= bus->resource[0]->start;
+ ranges->io_end -= bus->resource[0]->start;
+ ranges->mem_start -= bus->resource[1]->start;
+ ranges->mem_end -= bus->resource[1]->start;
+}
+
+int __init
+pcibios_enable_device (struct pci_dev *dev)
+{
+ /* Not needed, since we enable all devices at startup. */
+ return 0;
+}
+
+/*
+ * PCI BIOS setup, always defaults to SAL interface
+ */
+
+char * __init
+pcibios_setup(char *str)
+{
+ pci_probe = PCI_NO_CHECKS;
+ return NULL;
+}
+
+void
+pcibios_align_resource (void *data, struct resource *res, unsigned long size)
+{
+}
+
+#if 0 /*def CONFIG_PROC_FS*/
+/*
+ * This is an ugly hack to get a (weak) unresolved reference to something that is
+ * in drivers/pci/proc.c. Without this, the file does not get linked in at all
+ * (I suspect the reason this isn't needed on Linux/x86 is that most people compile
+ * with module support, in which case the EXPORT_SYMBOL() stuff will ensure the
+ * code gets linked in. Sigh... --davidm 99/12/20.
+ */
+asm ("data8 proc_bus_pci_add");
+#endif
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
new file mode 100644
index 000000000..274b68a73
--- /dev/null
+++ b/arch/ia64/kernel/perfmon.c
@@ -0,0 +1,227 @@
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+
+#include <asm/errno.h>
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_PERFMON
+
+#define MAX_PERF_COUNTER 4 /* true for Itanium, at least */
+#define WRITE_PMCS_AND_START 0xa0
+#define WRITE_PMCS 0xa1
+#define READ_PMDS 0xa2
+#define STOP_PMCS 0xa3
+#define IA64_COUNTER_MASK 0xffffffffffffff6f
+#define PERF_OVFL_VAL 0xffffffff
+
+struct perfmon_counter {
+ unsigned long data;
+ int counter_num;
+};
+
+unsigned long pmds[MAX_PERF_COUNTER];
+struct task_struct *perf_owner;
+
+/*
+ * We set dcr.pp, psr.pp, and the appropriate pmc control values with
+ * this. Notice that we go about modifying _each_ task's pt_regs to
+ * set cr_ipsr.pp. This will start counting when "current" does an
+ * _rfi_. Also, since each task's cr_ipsr.pp, and cr_ipsr is inherited
+ * across forks, we do _not_ need additional code on context
+ * switches. On stopping of the counters we dont _need_ to go about
+ * changing every task's cr_ipsr back to where it wuz, because we can
+ * just set pmc[0]=1. But we do it anyways becuase we will probably
+ * add thread specific accounting later.
+ *
+ * The obvious problem with this is that on SMP systems, it is a bit
+ * of work (when someone wants to do it) - it would be easier if we
+ * just added code to the context-switch path. I think we would need
+ * to lock the run queue to ensure no context switches, send an IPI to
+ * each processor, and in that IPI handler, just modify the psr bit of
+ * only the _current_ thread, since we have modified the psr bit
+ * correctly in the kernel stack for every process which is not
+ * running. Might crash on SMP systems without the
+ * lock_kernel(). Hence the lock..
+ */
+asmlinkage unsigned long
+sys_perfmonctl (int cmd1, int cmd2, void *ptr)
+{
+ struct perfmon_counter tmp, *cptr = ptr;
+ unsigned long pmd, cnum, dcr, flags;
+ struct task_struct *p;
+ struct pt_regs *regs;
+ struct perf_counter;
+ int i;
+
+ switch (cmd1) {
+ case WRITE_PMCS: /* Writes to PMC's and clears PMDs */
+ case WRITE_PMCS_AND_START: /* Also starts counting */
+
+ if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2))
+ return -EFAULT;
+
+ if (cmd2 >= MAX_PERF_COUNTER)
+ return -EFAULT;
+
+ if (perf_owner && perf_owner != current)
+ return -EBUSY;
+ perf_owner = current;
+
+ for (i = 0; i < cmd2; i++, cptr++) {
+ copy_from_user(&tmp, cptr, sizeof(tmp));
+ /* XXX need to check validity of counter_num and perhaps data!! */
+ ia64_set_pmc(tmp.counter_num, tmp.data);
+ ia64_set_pmd(tmp.counter_num, 0);
+ pmds[tmp.counter_num - 4] = 0;
+ }
+
+ if (cmd1 == WRITE_PMCS_AND_START) {
+ local_irq_save(flags);
+ dcr = ia64_get_dcr();
+ dcr |= IA64_DCR_PP;
+ ia64_set_dcr(dcr);
+ local_irq_restore(flags);
+
+ /*
+ * This is a no can do. It obviously wouldn't
+ * work on SMP where another process may not
+ * be blocked at all.
+ *
+ * Perhaps we need a global predicate in the
+ * leave_kernel path to control if pp should
+ * be on or off?
+ */
+ lock_kernel();
+ for_each_task(p) {
+ regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1;
+ ia64_psr(regs)->pp = 1;
+ }
+ unlock_kernel();
+ ia64_set_pmc(0, 0);
+ }
+ break;
+
+ case READ_PMDS:
+ if (cmd2 >= MAX_PERF_COUNTER)
+ return -EFAULT;
+ if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perf_counter)*cmd2))
+ return -EFAULT;
+ local_irq_save(flags);
+ /* XXX this looks wrong */
+ __asm__ __volatile__("rsm psr.pp\n");
+ dcr = ia64_get_dcr();
+ dcr &= ~IA64_DCR_PP;
+ ia64_set_dcr(dcr);
+ local_irq_restore(flags);
+
+ /*
+ * We cannot touch pmc[0] to stop counting here, as
+ * that particular instruction might cause an overflow
+ * and the mask in pmc[0] might get lost. I'm not very
+ * sure of the hardware behavior here. So we stop
+ * counting by psr.pp = 0. And we reset dcr.pp to
+ * prevent an interrupt from mucking up psr.pp in the
+ * meanwhile. Perfmon interrupts are pended, hence the
+ * above code should be ok if one of the above
+ * instructions cause overflows. Is this ok? When I
+ * muck with dcr, is the cli/sti needed??
+ */
+ for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; i++, cnum++, cptr++) {
+ pmd = pmds[i] + (ia64_get_pmd(cnum) & PERF_OVFL_VAL);
+ put_user(pmd, &cptr->data);
+ }
+ local_irq_save(flags);
+ /* XXX this looks wrong */
+ __asm__ __volatile__("ssm psr.pp");
+ dcr = ia64_get_dcr();
+ dcr |= IA64_DCR_PP;
+ ia64_set_dcr(dcr);
+ local_irq_restore(flags);
+ break;
+
+ case STOP_PMCS:
+ ia64_set_pmc(0, 1);
+ for (i = 0; i < MAX_PERF_COUNTER; ++i)
+ ia64_set_pmc(i, 0);
+
+ local_irq_save(flags);
+ dcr = ia64_get_dcr();
+ dcr &= ~IA64_DCR_PP;
+ ia64_set_dcr(dcr);
+ local_irq_restore(flags);
+ /*
+ * This is a no can do. It obviously wouldn't
+ * work on SMP where another process may not
+ * be blocked at all.
+ *
+ * Perhaps we need a global predicate in the
+ * leave_kernel path to control if pp should
+ * be on or off?
+ */
+ lock_kernel();
+ for_each_task(p) {
+ regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1;
+ ia64_psr(regs)->pp = 0;
+ }
+ unlock_kernel();
+ perf_owner = 0;
+ break;
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+static inline void
+update_counters (void)
+{
+ unsigned long mask, i, cnum, val;
+
+ mask = ia64_get_pmd(0) >> 4;
+ for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; cnum++, i++, mask >>= 1) {
+ if (mask & 0x1)
+ val = PERF_OVFL_VAL;
+ else
+ /* since we got an interrupt, might as well clear every pmd. */
+ val = ia64_get_pmd(cnum) & PERF_OVFL_VAL;
+ pmds[i] += val;
+ ia64_set_pmd(cnum, 0);
+ }
+}
+
+static void
+perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
+{
+ update_counters();
+ ia64_set_pmc(0, 0);
+ ia64_srlz_d();
+}
+
+void
+perfmon_init (void)
+{
+ if (request_irq(PERFMON_IRQ, perfmon_interrupt, 0, "perfmon", NULL)) {
+ printk("perfmon_init: could not allocate performance monitor vector %u\n",
+ PERFMON_IRQ);
+ return;
+ }
+ ia64_set_pmv(PERFMON_IRQ);
+ ia64_srlz_d();
+}
+
+#else /* !CONFIG_PERFMON */
+
+asmlinkage unsigned long
+sys_perfmonctl (int cmd1, int cmd2, void *ptr)
+{
+ return -ENOSYS;
+}
+
+#endif /* !CONFIG_PERFMON */
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
new file mode 100644
index 000000000..5b6deb5f5
--- /dev/null
+++ b/arch/ia64/kernel/process.c
@@ -0,0 +1,421 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */
+#include <linux/config.h>
+
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/delay.h>
+#include <asm/efi.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/uaccess.h>
+#include <asm/user.h>
+
+
+void
+show_regs (struct pt_regs *regs)
+{
+ unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+ printk("\npsr : %016lx ifs : %016lx ip : [<%016lx>]\n",
+ regs->cr_ipsr, regs->cr_ifs, ip);
+ printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+ regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+ printk("rnat: %016lx bsps: %016lx pr : %016lx\n",
+ regs->ar_rnat, regs->ar_bspstore, regs->pr);
+ printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+ regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+ printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7);
+ printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
+ regs->f6.u.bits[1], regs->f6.u.bits[0],
+ regs->f7.u.bits[1], regs->f7.u.bits[0]);
+ printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
+ regs->f8.u.bits[1], regs->f8.u.bits[0],
+ regs->f9.u.bits[1], regs->f9.u.bits[0]);
+
+ printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3);
+ printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10);
+ printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13);
+ printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16);
+ printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19);
+ printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22);
+ printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25);
+ printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28);
+ printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31);
+
+ /* print the stacked registers if cr.ifs is valid: */
+ if (regs->cr_ifs & 0x8000000000000000) {
+ unsigned long val, sof, *bsp, ndirty;
+ int i, is_nat = 0;
+
+ sof = regs->cr_ifs & 0x7f; /* size of frame */
+ ndirty = (regs->loadrs >> 19);
+ bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty);
+ for (i = 0; i < sof; ++i) {
+ get_user(val, ia64_rse_skip_regs(bsp, i));
+ printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val,
+ ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+ }
+ }
+}
+
+void __attribute__((noreturn))
+cpu_idle (void *unused)
+{
+ /* endless idle loop with no priority at all */
+ init_idle();
+ current->priority = 0;
+ current->counter = -100;
+
+#ifdef CONFIG_SMP
+ if (!current->need_resched)
+ min_xtp();
+#endif
+
+ while (1) {
+ while (!current->need_resched) {
+ continue;
+ }
+#ifdef CONFIG_SMP
+ normal_xtp();
+#endif
+ schedule();
+ check_pgt_cache();
+ if (pm_idle)
+ (*pm_idle)();
+ }
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following call chain:
+ *
+ * <clone syscall>
+ * sys_clone
+ * do_fork
+ * copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ * +---------------------+ (highest addr)
+ * | struct pt_regs |
+ * +---------------------+
+ * | struct switch_stack |
+ * +---------------------+
+ * | |
+ * | memory stack |
+ * | | <-- sp (lowest addr)
+ * +---------------------+
+ *
+ * Note: if we get called through kernel_thread() then the memory
+ * above "(highest addr)" is valid kernel stack memory that needs to
+ * be copied as well.
+ *
+ * Observe that we copy the unat values that are in pt_regs and
+ * switch_stack. Since the interpretation of unat is dependent upon
+ * the address to which the registers got spilled, doing this is valid
+ * only as long as we preserve the alignment of the stack. Since the
+ * stack is always page aligned, we know this is the case.
+ *
+ * XXX Actually, the above isn't true when we create kernel_threads().
+ * If we ever needs to create kernel_threads() that preserve the unat
+ * values we'll need to fix this. Perhaps an easy workaround would be
+ * to always clear the unat bits in the child thread.
+ */
+int
+copy_thread (int nr, unsigned long clone_flags, unsigned long usp,
+ struct task_struct *p, struct pt_regs *regs)
+{
+ unsigned long rbs, child_rbs, rbs_size, stack_offset, stack_top, stack_used;
+ struct switch_stack *child_stack, *stack;
+ extern char ia64_ret_from_syscall_clear_r8;
+ extern char ia64_strace_clear_r8;
+ struct pt_regs *child_ptregs;
+
+#ifdef CONFIG_SMP
+ /*
+ * For SMP idle threads, fork_by_hand() calls do_fork with
+ * NULL regs.
+ */
+ if (!regs)
+ return 0;
+#endif
+
+ stack_top = (unsigned long) current + IA64_STK_OFFSET;
+ stack = ((struct switch_stack *) regs) - 1;
+ stack_used = stack_top - (unsigned long) stack;
+ stack_offset = IA64_STK_OFFSET - stack_used;
+
+ child_stack = (struct switch_stack *) ((unsigned long) p + stack_offset);
+ child_ptregs = (struct pt_regs *) (child_stack + 1);
+
+ /* copy parent's switch_stack & pt_regs to child: */
+ memcpy(child_stack, stack, stack_used);
+
+ rbs = (unsigned long) current + IA64_RBS_OFFSET;
+ child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+ rbs_size = stack->ar_bspstore - rbs;
+
+ /* copy the parent's register backing store to the child: */
+ memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+
+ child_ptregs->r8 = 0; /* child gets a zero return value */
+ if (user_mode(child_ptregs))
+ child_ptregs->r12 = usp; /* user stack pointer */
+ else {
+ /*
+ * Note: we simply preserve the relative position of
+ * the stack pointer here. There is no need to
+ * allocate a scratch area here, since that will have
+ * been taken care of by the caller of sys_clone()
+ * already.
+ */
+ child_ptregs->r12 = (unsigned long) (child_ptregs + 1); /* kernel sp */
+ child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */
+ }
+ if (p->flags & PF_TRACESYS)
+ child_stack->b0 = (unsigned long) &ia64_strace_clear_r8;
+ else
+ child_stack->b0 = (unsigned long) &ia64_ret_from_syscall_clear_r8;
+ child_stack->ar_bspstore = child_rbs + rbs_size;
+
+ /* copy the thread_struct: */
+ p->thread.ksp = (unsigned long) child_stack - 16;
+ /*
+ * NOTE: The calling convention considers all floating point
+ * registers in the high partition (fph) to be scratch. Since
+ * the only way to get to this point is through a system call,
+ * we know that the values in fph are all dead. Hence, there
+ * is no need to inherit the fph state from the parent to the
+ * child and all we have to do is to make sure that
+ * IA64_THREAD_FPH_VALID is cleared in the child.
+ *
+ * XXX We could push this optimization a bit further by
+ * clearing IA64_THREAD_FPH_VALID on ANY system call.
+ * However, it's not clear this is worth doing. Also, it
+ * would be a slight deviation from the normal Linux system
+ * call behavior where scratch registers are preserved across
+ * system calls (unless used by the system call itself).
+ *
+ * If we wanted to inherit the fph state from the parent to the
+ * child, we would have to do something along the lines of:
+ *
+ * if (ia64_get_fpu_owner() == current && ia64_psr(regs)->mfh) {
+ * p->thread.flags |= IA64_THREAD_FPH_VALID;
+ * ia64_save_fpu(&p->thread.fph);
+ * } else if (current->thread.flags & IA64_THREAD_FPH_VALID) {
+ * memcpy(p->thread.fph, current->thread.fph, sizeof(p->thread.fph));
+ * }
+ */
+ p->thread.flags = (current->thread.flags & ~IA64_THREAD_FPH_VALID);
+ return 0;
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+ struct switch_stack *sw = ((struct switch_stack *) pt) - 1;
+ unsigned long ar_ec, cfm, ar_bsp, ndirty, *krbs;
+
+ ar_ec = (sw->ar_pfs >> 52) & 0x3f;
+
+ cfm = pt->cr_ifs & ((1UL << 63) - 1);
+ if ((pt->cr_ifs & (1UL << 63)) == 0) {
+ /* if cr_ifs isn't valid, we got here through a syscall or a break */
+ cfm = sw->ar_pfs & ((1UL << 38) - 1);
+ }
+
+ krbs = (unsigned long *) current + IA64_RBS_OFFSET/8;
+ ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 16));
+ ar_bsp = (long) ia64_rse_skip_regs((long *) pt->ar_bspstore, ndirty);
+
+ /* r0-r31
+ * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+ * predicate registers (p0-p63)
+ * b0-b7
+ * ip cfm user-mask
+ * ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+ */
+ memset(dst, 0, sizeof (dst)); /* don't leak any "random" bits */
+
+ /* r0 is zero */ dst[ 1] = pt->r1; dst[ 2] = pt->r2; dst[ 3] = pt->r3;
+ dst[ 4] = sw->r4; dst[ 5] = sw->r5; dst[ 6] = sw->r6; dst[ 7] = sw->r7;
+ dst[ 8] = pt->r8; dst[ 9] = pt->r9; dst[10] = pt->r10; dst[11] = pt->r11;
+ dst[12] = pt->r12; dst[13] = pt->r13; dst[14] = pt->r14; dst[15] = pt->r15;
+ memcpy(dst + 16, &pt->r16, 16*8); /* r16-r31 are contiguous */
+
+ dst[32] = ia64_get_nat_bits(pt, sw);
+ dst[33] = pt->pr;
+
+ /* branch regs: */
+ dst[34] = pt->b0; dst[35] = sw->b1; dst[36] = sw->b2; dst[37] = sw->b3;
+ dst[38] = sw->b4; dst[39] = sw->b5; dst[40] = pt->b6; dst[41] = pt->b7;
+
+ dst[42] = pt->cr_iip; dst[43] = pt->cr_ifs;
+ dst[44] = pt->cr_ipsr; /* XXX perhaps we should filter out some bits here? --davidm */
+
+ dst[45] = pt->ar_rsc; dst[46] = ar_bsp; dst[47] = pt->ar_bspstore; dst[48] = pt->ar_rnat;
+ dst[49] = pt->ar_ccv; dst[50] = pt->ar_unat; dst[51] = sw->ar_fpsr; dst[52] = pt->ar_pfs;
+ dst[53] = sw->ar_lc; dst[54] = (sw->ar_pfs >> 52) & 0x3f;
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+ struct switch_stack *sw = ((struct switch_stack *) pt) - 1;
+ struct task_struct *fpu_owner = ia64_get_fpu_owner();
+
+ memset(dst, 0, sizeof (dst)); /* don't leak any "random" bits */
+
+ /* f0 is 0.0 */ /* f1 is 1.0 */ dst[2] = sw->f2; dst[3] = sw->f3;
+ dst[4] = sw->f4; dst[5] = sw->f5; dst[6] = pt->f6; dst[7] = pt->f7;
+ dst[8] = pt->f8; dst[9] = pt->f9;
+ memcpy(dst + 10, &sw->f10, 22*16); /* f10-f31 are contiguous */
+
+ if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) {
+ if (fpu_owner == current) {
+ __ia64_save_fpu(current->thread.fph);
+ }
+ memcpy(dst + 32, current->thread.fph, 96*16);
+ }
+ return 1; /* f0-f31 are always valid so we always return 1 */
+}
+
+asmlinkage long
+sys_execve (char *filename, char **argv, char **envp, struct pt_regs *regs)
+{
+ int error;
+
+ lock_kernel();
+ filename = getname(filename);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ goto out;
+ error = do_execve(filename, argv, envp, regs);
+ putname(filename);
+out:
+ unlock_kernel();
+ return error;
+}
+
+pid_t
+kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
+{
+ struct task_struct *parent = current;
+ int result;
+
+ clone(flags | CLONE_VM, 0);
+ if (parent != current) {
+ result = (*fn)(arg);
+ _exit(result);
+ }
+ return 0; /* parent: just return */
+}
+
+/*
+ * Flush thread state. This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+ /* drop floating-point and debug-register state if it exists: */
+ current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
+
+ if (ia64_get_fpu_owner() == current) {
+ ia64_set_fpu_owner(0);
+ }
+}
+
+/*
+ * Clean up state associated with current thread. This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+ if (ia64_get_fpu_owner() == current) {
+ ia64_set_fpu_owner(0);
+ }
+}
+
+/*
+ * Free remaining state associated with DEAD_TASK. This is called
+ * after the parent of DEAD_TASK has collected the exist status of the
+ * task via wait().
+ */
+void
+release_thread (struct task_struct *dead_task)
+{
+ /* nothing to do */
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+ struct ia64_frame_info info;
+ unsigned long ip;
+ int count = 0;
+ /*
+ * These bracket the sleeping functions..
+ */
+ extern void scheduling_functions_start_here(void);
+ extern void scheduling_functions_end_here(void);
+# define first_sched ((unsigned long) scheduling_functions_start_here)
+# define last_sched ((unsigned long) scheduling_functions_end_here)
+
+ /*
+ * Note: p may not be a blocked task (it could be current or
+ * another process running on some other CPU. Rather than
+ * trying to determine if p is really blocked, we just assume
+ * it's blocked and rely on the unwind routines to fail
+ * gracefully if the process wasn't really blocked after all.
+ * --davidm 99/12/15
+ */
+ ia64_unwind_init_from_blocked_task(&info, p);
+ do {
+ if (ia64_unwind_to_previous_frame(&info) < 0)
+ return 0;
+ ip = ia64_unwind_get_ip(&info);
+ if (ip < first_sched || ip >= last_sched)
+ return ip;
+ } while (count++ < 16);
+ return 0;
+# undef first_sched
+# undef last_sched
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+ (*efi.reset_system)(EFI_RESET_WARM, 0, 0, 0);
+}
+
+void
+machine_halt (void)
+{
+ printk("machine_halt: need PAL or ACPI version here!!\n");
+ machine_restart(0);
+}
+
+void
+machine_power_off (void)
+{
+ printk("machine_power_off: unimplemented (need ACPI version here)\n");
+ machine_halt ();
+}
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
new file mode 100644
index 000000000..18a8e342e
--- /dev/null
+++ b/arch/ia64/kernel/ptrace.c
@@ -0,0 +1,653 @@
+/*
+ * Kernel support for the ptrace() and syscall tracing interfaces.
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from the x86 and Alpha versions. Most of the code in here
+ * could actually be factored into a common set of routines.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/smp_lock.h>
+#include <linux/user.h>
+
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+/*
+ * Collect the NaT bits for r1-r31 from sw->caller_unat and
+ * sw->ar_unat and return a NaT bitset where bit i is set iff the NaT
+ * bit of register i is set.
+ */
+long
+ia64_get_nat_bits (struct pt_regs *pt, struct switch_stack *sw)
+{
+# define GET_BITS(str, first, last, unat) \
+ ({ \
+ unsigned long bit = ia64_unat_pos(&str->r##first); \
+ unsigned long mask = ((1UL << (last - first + 1)) - 1) << first; \
+ (ia64_rotl(unat, first) >> bit) & mask; \
+ })
+ unsigned long val;
+
+ val = GET_BITS(pt, 1, 3, sw->caller_unat);
+ val |= GET_BITS(pt, 12, 15, sw->caller_unat);
+ val |= GET_BITS(pt, 8, 11, sw->caller_unat);
+ val |= GET_BITS(pt, 16, 31, sw->caller_unat);
+ val |= GET_BITS(sw, 4, 7, sw->ar_unat);
+ return val;
+
+# undef GET_BITS
+}
+
+/*
+ * Store the NaT bitset NAT in pt->caller_unat and sw->ar_unat.
+ */
+void
+ia64_put_nat_bits (struct pt_regs *pt, struct switch_stack *sw, unsigned long nat)
+{
+# define PUT_BITS(str, first, last, nat) \
+ ({ \
+ unsigned long bit = ia64_unat_pos(&str->r##first); \
+ unsigned long mask = ((1UL << (last - first + 1)) - 1) << bit; \
+ (ia64_rotr(nat, first) << bit) & mask; \
+ })
+ sw->caller_unat = PUT_BITS(pt, 1, 3, nat);
+ sw->caller_unat |= PUT_BITS(pt, 12, 15, nat);
+ sw->caller_unat |= PUT_BITS(pt, 8, 11, nat);
+ sw->caller_unat |= PUT_BITS(pt, 16, 31, nat);
+ sw->ar_unat = PUT_BITS(sw, 4, 7, nat);
+
+# undef PUT_BITS
+}
+
+#define IA64_MLI_TEMPLATE 0x2
+#define IA64_MOVL_OPCODE 6
+
+void
+ia64_increment_ip (struct pt_regs *regs)
+{
+ unsigned long w0, w1, ri = ia64_psr(regs)->ri + 1;
+
+ if (ri > 2) {
+ ri = 0;
+ regs->cr_iip += 16;
+ } else if (ri == 2) {
+ get_user(w0, (char *) regs->cr_iip + 0);
+ get_user(w1, (char *) regs->cr_iip + 8);
+ if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) {
+ /*
+ * rfi'ing to slot 2 of an MLI bundle causes
+ * an illegal operation fault. We don't want
+ * that to happen... Note that we check the
+ * opcode only. "movl" has a vc bit of 0, but
+ * since a vc bit of 1 is currently reserved,
+ * we might just as well treat it like a movl.
+ */
+ ri = 0;
+ regs->cr_iip += 16;
+ }
+ }
+ ia64_psr(regs)->ri = ri;
+}
+
+void
+ia64_decrement_ip (struct pt_regs *regs)
+{
+ unsigned long w0, w1, ri = ia64_psr(regs)->ri - 1;
+
+ if (ia64_psr(regs)->ri == 0) {
+ regs->cr_iip -= 16;
+ ri = 2;
+ get_user(w0, (char *) regs->cr_iip + 0);
+ get_user(w1, (char *) regs->cr_iip + 8);
+ if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) {
+ /*
+ * rfi'ing to slot 2 of an MLI bundle causes
+ * an illegal operation fault. We don't want
+ * that to happen... Note that we check the
+ * opcode only. "movl" has a vc bit of 0, but
+ * since a vc bit of 1 is currently reserved,
+ * we might just as well treat it like a movl.
+ */
+ ri = 1;
+ }
+ }
+ ia64_psr(regs)->ri = ri;
+}
+
+/*
+ * This routine is used to read an rnat bits that are stored on the
+ * kernel backing store. Since, in general, the alignment of the user
+ * and kernel are different, this is not completely trivial. In
+ * essence, we need to construct the user RNAT based on up to two
+ * kernel RNAT values and/or the RNAT value saved in the child's
+ * pt_regs.
+ *
+ * user rbs
+ *
+ * +--------+ <-- lowest address
+ * | slot62 |
+ * +--------+
+ * | rnat | 0x....1f8
+ * +--------+
+ * | slot00 | \
+ * +--------+ |
+ * | slot01 | > child_regs->ar_rnat
+ * +--------+ |
+ * | slot02 | / kernel rbs
+ * +--------+ +--------+
+ * <- child_regs->ar_bspstore | slot61 | <-- krbs
+ * +- - - - + +--------+
+ * | slot62 |
+ * +- - - - + +--------+
+ * | rnat |
+ * +- - - - + +--------+
+ * vrnat | slot00 |
+ * +- - - - + +--------+
+ * = =
+ * +--------+
+ * | slot00 | \
+ * +--------+ |
+ * | slot01 | > child_stack->ar_rnat
+ * +--------+ |
+ * | slot02 | /
+ * +--------+
+ * <--- child_stack->ar_bspstore
+ *
+ * The way to think of this code is as follows: bit 0 in the user rnat
+ * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat
+ * value. The kernel rnat value holding this bit is stored in
+ * variable rnat0. rnat1 is loaded with the kernel rnat value that
+ * form the upper bits of the user rnat value.
+ *
+ * Boundary cases:
+ *
+ * o when reading the rnat "below" the first rnat slot on the kernel
+ * backing store, rnat0/rnat1 are set to 0 and the low order bits
+ * are merged in from pt->ar_rnat.
+ *
+ * o when reading the rnat "above" the last rnat slot on the kernel
+ * backing store, rnat0/rnat1 gets its value from sw->ar_rnat.
+ */
+static unsigned long
+get_rnat (struct pt_regs *pt, struct switch_stack *sw,
+ unsigned long *krbs, unsigned long *urnat_addr)
+{
+ unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr, kmask = ~0UL;
+ unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+ long num_regs;
+
+ kbsp = (unsigned long *) sw->ar_bspstore;
+ ubspstore = (unsigned long *) pt->ar_bspstore;
+ /*
+ * First, figure out which bit number slot 0 in user-land maps
+ * to in the kernel rnat. Do this by figuring out how many
+ * register slots we're beyond the user's backingstore and
+ * then computing the equivalent address in kernel space.
+ */
+ num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+ slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+ shift = ia64_rse_slot_num(slot0_kaddr);
+ rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+ rnat0_kaddr = rnat1_kaddr - 64;
+
+ if (ubspstore + 63 > urnat_addr) {
+ /* some bits need to be merged in from pt->ar_rnat */
+ kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1);
+ urnat = (pt->ar_rnat & ~kmask);
+ }
+ if (rnat0_kaddr >= kbsp) {
+ rnat0 = sw->ar_rnat;
+ } else if (rnat0_kaddr > krbs) {
+ rnat0 = *rnat0_kaddr;
+ }
+ if (rnat1_kaddr >= kbsp) {
+ rnat1 = sw->ar_rnat;
+ } else if (rnat1_kaddr > krbs) {
+ rnat1 = *rnat1_kaddr;
+ }
+ urnat |= ((rnat1 << (63 - shift)) | (rnat0 >> shift)) & kmask;
+ return urnat;
+}
+
+/*
+ * The reverse of get_rnat.
+ */
+static void
+put_rnat (struct pt_regs *pt, struct switch_stack *sw,
+ unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat)
+{
+ unsigned long rnat0 = 0, rnat1 = 0, rnat = 0, *slot0_kaddr, kmask = ~0UL, mask;
+ unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+ long num_regs;
+
+ kbsp = (unsigned long *) sw->ar_bspstore;
+ ubspstore = (unsigned long *) pt->ar_bspstore;
+ /*
+ * First, figure out which bit number slot 0 in user-land maps
+ * to in the kernel rnat. Do this by figuring out how many
+ * register slots we're beyond the user's backingstore and
+ * then computing the equivalent address in kernel space.
+ */
+ num_regs = (long) ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+ slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+ shift = ia64_rse_slot_num(slot0_kaddr);
+ rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+ rnat0_kaddr = rnat1_kaddr - 64;
+
+ if (ubspstore + 63 > urnat_addr) {
+ /* some bits need to be place in pt->ar_rnat: */
+ kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1);
+ pt->ar_rnat = (pt->ar_rnat & kmask) | (rnat & ~kmask);
+ }
+ /*
+ * Note: Section 11.1 of the EAS guarantees that bit 63 of an
+ * rnat slot is ignored. so we don't have to clear it here.
+ */
+ rnat0 = (urnat << shift);
+ mask = ~0UL << shift;
+ if (rnat0_kaddr >= kbsp) {
+ sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat0 & mask);
+ } else if (rnat0_kaddr > krbs) {
+ *rnat0_kaddr = ((*rnat0_kaddr & ~mask) | (rnat0 & mask));
+ }
+
+ rnat1 = (urnat >> (63 - shift));
+ mask = ~0UL >> (63 - shift);
+ if (rnat1_kaddr >= kbsp) {
+ sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat1 & mask);
+ } else if (rnat1_kaddr > krbs) {
+ *rnat1_kaddr = ((*rnat1_kaddr & ~mask) | (rnat1 & mask));
+ }
+}
+
+long
+ia64_peek (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long *val)
+{
+ unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr;
+ struct switch_stack *child_stack;
+ struct pt_regs *child_regs;
+ size_t copied;
+ long ret;
+
+ laddr = (unsigned long *) addr;
+ child_regs = ia64_task_regs(child);
+ child_stack = (struct switch_stack *) child_regs - 1;
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore);
+ rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs);
+ if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) {
+ /*
+ * Attempt to read the RBS in an area that's actually
+ * on the kernel RBS => read the corresponding bits in
+ * the kernel RBS.
+ */
+ if (ia64_rse_is_rnat_slot(laddr))
+ ret = get_rnat(child_regs, child_stack, krbs, laddr);
+ else {
+ regnum = ia64_rse_num_regs(bspstore, laddr);
+ laddr = ia64_rse_skip_regs(krbs, regnum);
+ if (regnum >= krbs_num_regs) {
+ ret = 0;
+ } else {
+ if ((unsigned long) laddr >= (unsigned long) high_memory) {
+ printk("yikes: trying to access long at %p\n", laddr);
+ return -EIO;
+ }
+ ret = *laddr;
+ }
+ }
+ } else {
+ copied = access_process_vm(child, addr, &ret, sizeof(ret), 0);
+ if (copied != sizeof(ret))
+ return -EIO;
+ }
+ *val = ret;
+ return 0;
+}
+
+long
+ia64_poke (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long val)
+{
+ unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr;
+ struct switch_stack *child_stack;
+ struct pt_regs *child_regs;
+
+ laddr = (unsigned long *) addr;
+ child_regs = ia64_task_regs(child);
+ child_stack = (struct switch_stack *) child_regs - 1;
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore);
+ rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs);
+ if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) {
+ /*
+ * Attempt to write the RBS in an area that's actually
+ * on the kernel RBS => write the corresponding bits
+ * in the kernel RBS.
+ */
+ if (ia64_rse_is_rnat_slot(laddr))
+ put_rnat(child_regs, child_stack, krbs, laddr, val);
+ else {
+ regnum = ia64_rse_num_regs(bspstore, laddr);
+ laddr = ia64_rse_skip_regs(krbs, regnum);
+ if (regnum < krbs_num_regs) {
+ *laddr = val;
+ }
+ }
+ } else if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val)) {
+ return -EIO;
+ }
+ return 0;
+}
+
+/*
+ * Ensure the state in child->thread.fph is up-to-date.
+ */
+static void
+sync_fph (struct task_struct *child)
+{
+ if (ia64_psr(ia64_task_regs(child))->mfh && ia64_get_fpu_owner() == child) {
+ ia64_save_fpu(&child->thread.fph[0]);
+ child->thread.flags |= IA64_THREAD_FPH_VALID;
+ }
+ if (!(child->thread.flags & IA64_THREAD_FPH_VALID)) {
+ memset(&child->thread.fph, 0, sizeof(child->thread.fph));
+ child->thread.flags |= IA64_THREAD_FPH_VALID;
+ }
+}
+
+asmlinkage long
+sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data,
+ long arg4, long arg5, long arg6, long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ struct switch_stack *child_stack;
+ struct pt_regs *child_regs;
+ struct task_struct *child;
+ unsigned long flags, *base;
+ long ret, regnum;
+
+ lock_kernel();
+ ret = -EPERM;
+ if (request == PTRACE_TRACEME) {
+ /* are we already being traced? */
+ if (current->flags & PF_PTRACED)
+ goto out;
+ current->flags |= PF_PTRACED;
+ ret = 0;
+ goto out;
+ }
+
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+ child = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock);
+ if (!child)
+ goto out;
+ ret = -EPERM;
+ if (pid == 1) /* no messing around with init! */
+ goto out;
+
+ if (request == PTRACE_ATTACH) {
+ if (child == current)
+ goto out;
+ if ((!child->dumpable ||
+ (current->uid != child->euid) ||
+ (current->uid != child->suid) ||
+ (current->uid != child->uid) ||
+ (current->gid != child->egid) ||
+ (current->gid != child->sgid) ||
+ (!cap_issubset(child->cap_permitted, current->cap_permitted)) ||
+ (current->gid != child->gid)) && !capable(CAP_SYS_PTRACE))
+ goto out;
+ /* the same process cannot be attached many times */
+ if (child->flags & PF_PTRACED)
+ goto out;
+ child->flags |= PF_PTRACED;
+ if (child->p_pptr != current) {
+ unsigned long flags;
+
+ write_lock_irqsave(&tasklist_lock, flags);
+ REMOVE_LINKS(child);
+ child->p_pptr = current;
+ SET_LINKS(child);
+ write_unlock_irqrestore(&tasklist_lock, flags);
+ }
+ send_sig(SIGSTOP, child, 1);
+ ret = 0;
+ goto out;
+ }
+ ret = -ESRCH;
+ if (!(child->flags & PF_PTRACED))
+ goto out;
+ if (child->state != TASK_STOPPED) {
+ if (request != PTRACE_KILL)
+ goto out;
+ }
+ if (child->p_pptr != current)
+ goto out;
+
+ switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA: /* read word at location addr */
+ ret = ia64_peek(regs, child, addr, &data);
+ if (ret == 0) {
+ ret = data;
+ regs->r8 = 0; /* ensure "ret" is not mistaken as an error code */
+ }
+ goto out;
+
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA: /* write the word at location addr */
+ ret = ia64_poke(regs, child, addr, data);
+ goto out;
+
+ case PTRACE_PEEKUSR: /* read the word at addr in the USER area */
+ ret = -EIO;
+ if ((addr & 0x7) != 0)
+ goto out;
+
+ if (addr < PT_CALLER_UNAT) {
+ /* accessing fph */
+ sync_fph(child);
+ addr += (unsigned long) &child->thread.fph;
+ ret = *(unsigned long *) addr;
+ } else if (addr < PT_F9+16) {
+ /* accessing switch_stack or pt_regs: */
+ child_regs = ia64_task_regs(child);
+ child_stack = (struct switch_stack *) child_regs - 1;
+ ret = *(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT);
+
+ if (addr == PT_AR_BSP) {
+ /* ret currently contains pt_regs.loadrs */
+ unsigned long *rbs, *bspstore, ndirty;
+
+ rbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ ndirty = ia64_rse_num_regs(rbs, rbs + (ret >> 19));
+ ret = (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
+ }
+ } else {
+ if (addr >= PT_IBR) {
+ regnum = (addr - PT_IBR) >> 3;
+ base = &child->thread.ibr[0];
+ } else {
+ regnum = (addr - PT_DBR) >> 3;
+ base = &child->thread.dbr[0];
+ }
+ if (regnum >= 8)
+ goto out;
+ data = base[regnum];
+ }
+ regs->r8 = 0; /* ensure "ret" is not mistaken as an error code */
+ goto out;
+
+ case PTRACE_POKEUSR: /* write the word at addr in the USER area */
+ ret = -EIO;
+ if ((addr & 0x7) != 0)
+ goto out;
+
+ if (addr < PT_CALLER_UNAT) {
+ /* accessing fph */
+ sync_fph(child);
+ addr += (unsigned long) &child->thread.fph;
+ *(unsigned long *) addr = data;
+ if (ret < 0)
+ goto out;
+ } else if (addr < PT_F9+16) {
+ /* accessing switch_stack or pt_regs */
+ child_regs = ia64_task_regs(child);
+ child_stack = (struct switch_stack *) child_regs - 1;
+
+ if (addr == PT_AR_BSP) {
+ /* compute the loadrs value based on bsp and bspstore: */
+ unsigned long *rbs, *bspstore, ndirty, *kbsp;
+
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ ndirty = ia64_rse_num_regs(bspstore, (unsigned long *) data);
+ rbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ kbsp = ia64_rse_skip_regs(rbs, ndirty);
+ data = (kbsp - rbs) << 19;
+ }
+ *(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT) = data;
+ } else {
+ if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+ child->thread.flags |= IA64_THREAD_DBG_VALID;
+ memset(current->thread.dbr, 0, sizeof current->thread.dbr);
+ memset(current->thread.ibr, 0, sizeof current->thread.ibr);
+ }
+
+ if (addr >= PT_IBR) {
+ regnum = (addr - PT_IBR) >> 3;
+ base = &child->thread.ibr[0];
+ } else {
+ regnum = (addr - PT_DBR) >> 3;
+ base = &child->thread.dbr[0];
+ }
+ if (regnum >= 8)
+ goto out;
+ if (regnum & 1) {
+ /* force breakpoint to be effective a most for user-level: */
+ data &= ~(0x7UL << 56);
+ }
+ base[regnum] = data;
+ }
+ ret = 0;
+ goto out;
+
+ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+ case PTRACE_CONT: /* restart after signal. */
+ ret = -EIO;
+ if (data > _NSIG)
+ goto out;
+ if (request == PTRACE_SYSCALL)
+ child->flags |= PF_TRACESYS;
+ else
+ child->flags &= ~PF_TRACESYS;
+ child->exit_code = data;
+
+ /* make sure the single step/take-branch tra bits are not set: */
+ ia64_psr(ia64_task_regs(child))->ss = 0;
+ ia64_psr(ia64_task_regs(child))->tb = 0;
+
+ wake_up_process(child);
+ ret = 0;
+ goto out;
+
+ case PTRACE_KILL:
+ /*
+ * Make the child exit. Best I can do is send it a
+ * sigkill. Perhaps it should be put in the status
+ * that it wants to exit.
+ */
+ if (child->state == TASK_ZOMBIE) /* already dead */
+ goto out;
+ child->exit_code = SIGKILL;
+
+ /* make sure the single step/take-branch tra bits are not set: */
+ ia64_psr(ia64_task_regs(child))->ss = 0;
+ ia64_psr(ia64_task_regs(child))->tb = 0;
+
+ wake_up_process(child);
+ ret = 0;
+ goto out;
+
+ case PTRACE_SINGLESTEP: /* let child execute for one instruction */
+ case PTRACE_SINGLEBLOCK:
+ ret = -EIO;
+ if (data > _NSIG)
+ goto out;
+
+ child->flags &= ~PF_TRACESYS;
+ if (request == PTRACE_SINGLESTEP) {
+ ia64_psr(ia64_task_regs(child))->ss = 1;
+ } else {
+ ia64_psr(ia64_task_regs(child))->tb = 1;
+ }
+ child->exit_code = data;
+
+ /* give it a chance to run. */
+ wake_up_process(child);
+ ret = 0;
+ goto out;
+
+ case PTRACE_DETACH: /* detach a process that was attached. */
+ ret = -EIO;
+ if (data > _NSIG)
+ goto out;
+
+ child->flags &= ~(PF_PTRACED|PF_TRACESYS);
+ child->exit_code = data;
+ write_lock_irqsave(&tasklist_lock, flags);
+ REMOVE_LINKS(child);
+ child->p_pptr = child->p_opptr;
+ SET_LINKS(child);
+ write_unlock_irqrestore(&tasklist_lock, flags);
+
+ /* make sure the single step/take-branch tra bits are not set: */
+ ia64_psr(ia64_task_regs(child))->ss = 0;
+ ia64_psr(ia64_task_regs(child))->tb = 0;
+
+ wake_up_process(child);
+ ret = 0;
+ goto out;
+
+ default:
+ ret = -EIO;
+ goto out;
+ }
+ out:
+ unlock_kernel();
+ return ret;
+}
+
+void
+syscall_trace (void)
+{
+ if ((current->flags & (PF_PTRACED|PF_TRACESYS)) != (PF_PTRACED|PF_TRACESYS))
+ return;
+ current->exit_code = SIGTRAP;
+ set_current_state(TASK_STOPPED);
+ notify_parent(current, SIGCHLD);
+ schedule();
+ /*
+ * This isn't the same as continuing with a signal, but it
+ * will do for normal use. strace only continues with a
+ * signal if the stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
new file mode 100644
index 000000000..8743f6588
--- /dev/null
+++ b/arch/ia64/kernel/sal.c
@@ -0,0 +1,157 @@
+/*
+ * System Abstraction Layer (SAL) interface routines.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+#include <asm/sal.h>
+#include <asm/pal.h>
+
+#define SAL_DEBUG
+
+spinlock_t sal_lock = SPIN_LOCK_UNLOCKED;
+
+static struct {
+ void *addr; /* function entry point */
+ void *gpval; /* gp value to use */
+} pdesc;
+
+static long
+default_handler (void)
+{
+ return -1;
+}
+
+ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler;
+
+const char *
+ia64_sal_strerror (long status)
+{
+ const char *str;
+ switch (status) {
+ case 0: str = "Call completed without error"; break;
+ case 1: str = "Effect a warm boot of the system to complete "
+ "the update"; break;
+ case -1: str = "Not implemented"; break;
+ case -2: str = "Invalid argument"; break;
+ case -3: str = "Call completed with error"; break;
+ case -4: str = "Virtual address not registered"; break;
+ case -5: str = "No information available"; break;
+ case -6: str = "Insufficient space to add the entry"; break;
+ case -7: str = "Invalid entry_addr value"; break;
+ case -8: str = "Invalid interrupt vector"; break;
+ case -9: str = "Requested memory not available"; break;
+ case -10: str = "Unable to write to the NVM device"; break;
+ case -11: str = "Invalid partition type specified"; break;
+ case -12: str = "Invalid NVM_Object id specified"; break;
+ case -13: str = "NVM_Object already has the maximum number "
+ "of partitions"; break;
+ case -14: str = "Insufficient space in partition for the "
+ "requested write sub-function"; break;
+ case -15: str = "Insufficient data buffer space for the "
+ "requested read record sub-function"; break;
+ case -16: str = "Scratch buffer required for the write/delete "
+ "sub-function"; break;
+ case -17: str = "Insufficient space in the NVM_Object for the "
+ "requested create sub-function"; break;
+ case -18: str = "Invalid value specified in the partition_rec "
+ "argument"; break;
+ case -19: str = "Record oriented I/O not supported for this "
+ "partition"; break;
+ case -20: str = "Bad format of record to be written or "
+ "required keyword variable not "
+ "specified"; break;
+ default: str = "Unknown SAL status code"; break;
+ }
+ return str;
+}
+
+static void __init
+ia64_sal_handler_init (void *entry_point, void *gpval)
+{
+ /* fill in the SAL procedure descriptor and point ia64_sal to it: */
+ pdesc.addr = entry_point;
+ pdesc.gpval = gpval;
+ ia64_sal = (ia64_sal_handler) &pdesc;
+}
+
+
+void __init
+ia64_sal_init (struct ia64_sal_systab *systab)
+{
+ unsigned long min, max;
+ char *p;
+ struct ia64_sal_desc_entry_point *ep;
+ int i;
+
+ if (!systab) {
+ printk("Hmm, no SAL System Table.\n");
+ return;
+ }
+
+ if (strncmp(systab->signature, "SST_", 4) != 0)
+ printk("bad signature in system table!");
+
+ printk("SAL v%u.%02u: ia32bios=%s, oem=%.32s, product=%.32s\n",
+ systab->sal_rev_major, systab->sal_rev_minor,
+ systab->ia32_bios_present ? "present" : "absent",
+ systab->oem_id, systab->product_id);
+
+ min = ~0UL;
+ max = 0;
+
+ p = (char *) (systab + 1);
+ for (i = 0; i < systab->entry_count; i++) {
+ /*
+ * The first byte of each entry type contains the type desciptor.
+ */
+ switch (*p) {
+ case SAL_DESC_ENTRY_POINT:
+ ep = (struct ia64_sal_desc_entry_point *) p;
+#ifdef SAL_DEBUG
+ printk("sal[%d] - entry: pal_proc=0x%lx, sal_proc=0x%lx\n",
+ i, ep->pal_proc, ep->sal_proc);
+#endif
+ ia64_pal_handler_init(__va(ep->pal_proc));
+ ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
+ break;
+
+ case SAL_DESC_AP_WAKEUP:
+#ifdef CONFIG_SMP
+ {
+ struct ia64_sal_desc_ap_wakeup *ap = (void *) p;
+# ifdef SAL_DEBUG
+ printk("sal[%d] - wakeup type %x, 0x%lx\n",
+ i, ap->mechanism, ap->vector);
+# endif
+ switch (ap->mechanism) {
+ case IA64_SAL_AP_EXTERNAL_INT:
+ ap_wakeup_vector = ap->vector;
+# ifdef SAL_DEBUG
+ printk("SAL: AP wakeup using external interrupt; "
+ "vector 0x%lx\n", ap_wakeup_vector);
+# endif
+ break;
+
+ default:
+ printk("SAL: AP wakeup mechanism unsupported!\n");
+ break;
+ }
+ break;
+ }
+#endif
+ }
+ p += SAL_DESC_SIZE(*p);
+ }
+}
diff --git a/arch/ia64/kernel/sal_stub.S b/arch/ia64/kernel/sal_stub.S
new file mode 100644
index 000000000..7ab16bbcd
--- /dev/null
+++ b/arch/ia64/kernel/sal_stub.S
@@ -0,0 +1,116 @@
+/*
+ * gcc currently does not conform to the ia-64 calling convention as far
+ * as returning function values are concerned. Instead of returning
+ * values up to 32 bytes in size in r8-r11, gcc returns any value
+ * bigger than a doubleword via a structure that's allocated by the
+ * caller and whose address is passed into the function. Since
+ * SAL_PROC returns values according to the calling convention, this
+ * stub takes care of copying r8-r11 to the place where gcc expects
+ * them.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef __GCC_MULTIREG_RETVALS__
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 16
+ .global ia64_sal_stub
+ia64_sal_stub:
+ /*
+ * Sheesh, the Cygnus backend passes the pointer to a return value structure in
+ * in0 whereas the HP backend passes it in r8. Don't you hate those little
+ * differences...
+ */
+#ifdef GCC_RETVAL_POINTER_IN_R8
+ adds r2=-24,sp
+ adds sp=-48,sp
+ mov r14=rp
+ ;;
+ st8 [r2]=r8,8 // save pointer to return value
+ addl r3=@ltoff(ia64_sal),gp
+ ;;
+ ld8 r3=[r3]
+ st8 [r2]=gp,8 // save global pointer
+ ;;
+ ld8 r3=[r3] // fetch the value of ia64_sal
+ st8 [r2]=r14 // save return pointer
+ ;;
+ ld8 r2=[r3],8 // load function's entry point
+ ;;
+ ld8 gp=[r3] // load function's global pointer
+ ;;
+ mov b6=r2
+ br.call.sptk.few rp=b6
+.ret0: adds r2=24,sp
+ ;;
+ ld8 r3=[r2],8 // restore pointer to return value
+ ;;
+ ld8 gp=[r2],8 // restore global pointer
+ st8 [r3]=r8,8
+ ;;
+ ld8 r14=[r2] // restore return pointer
+ st8 [r3]=r9,8
+ ;;
+ mov rp=r14
+ st8 [r3]=r10,8
+ ;;
+ st8 [r3]=r11,8
+ adds sp=48,sp
+ br.sptk.few rp
+#else
+ /*
+ * On input:
+ * in0 = pointer to return value structure
+ * in1 = index of SAL function to call
+ * in2..inN = remaining args to SAL call
+ */
+ /*
+ * We allocate one input and eight output register such that the br.call instruction
+ * will rename in1-in7 to in0-in6---exactly what we want because SAL doesn't want to
+ * see the pointer to the return value structure.
+ */
+ alloc r15=ar.pfs,1,0,8,0
+
+ adds r2=-24,sp
+ adds sp=-48,sp
+ mov r14=rp
+ ;;
+ st8 [r2]=r15,8 // save ar.pfs
+ addl r3=@ltoff(ia64_sal),gp
+ ;;
+ ld8 r3=[r3] // get address of ia64_sal
+ st8 [r2]=gp,8 // save global pointer
+ ;;
+ ld8 r3=[r3] // get value of ia64_sal
+ st8 [r2]=r14,8 // save return address (rp)
+ ;;
+ ld8 r2=[r3],8 // load function's entry point
+ ;;
+ ld8 gp=[r3] // load function's global pointer
+ mov b6=r2
+ br.call.sptk.few rp=b6 // make SAL call
+.ret0: adds r2=24,sp
+ ;;
+ ld8 r15=[r2],8 // restore ar.pfs
+ ;;
+ ld8 gp=[r2],8 // restore global pointer
+ st8 [in0]=r8,8 // store 1. dword of return value
+ ;;
+ ld8 r14=[r2] // restore return address (rp)
+ st8 [in0]=r9,8 // store 2. dword of return value
+ ;;
+ mov rp=r14
+ st8 [in0]=r10,8 // store 3. dword of return value
+ ;;
+ st8 [in0]=r11,8
+ adds sp=48,sp // pop stack frame
+ mov ar.pfs=r15
+ br.ret.sptk.few rp
+#endif
+
+ .endp ia64_sal_stub
+#endif /* __GCC_MULTIREG_RETVALS__ */
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
new file mode 100644
index 000000000..84581af2e
--- /dev/null
+++ b/arch/ia64/kernel/semaphore.c
@@ -0,0 +1,336 @@
+/*
+ * IA-64 semaphore implementation (derived from x86 version).
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * Semaphores are implemented using a two-way counter: The "count"
+ * variable is decremented for each process that tries to aquire the
+ * semaphore, while the "sleepers" variable is a count of such
+ * aquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can efficiently
+ * test if they need to do any extra work (up needs to do something
+ * only if count was negative before the increment operation.
+ *
+ * "sleepers" and the contention routine ordering is protected by the
+ * semaphore spinlock.
+ *
+ * Note that these functions are only called when there is contention
+ * on the lock, and as such all this is the "non-critical" part of the
+ * whole semaphore business. The critical part is the inline stuff in
+ * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
+ */
+#include <linux/sched.h>
+
+#include <asm/semaphore.h>
+
+/*
+ * Logic:
+ * - Only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - When we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleepers" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void
+__up (struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED;
+
+void
+__down (struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+ wake_up(&sem->wait);
+}
+
+int
+__down_interruptible (struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers ++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock. The
+ * "-1" is because we're still hoping to get
+ * the lock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(&sem->wait, &wait);
+ wake_up(&sem->wait);
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for having decremented the
+ * count.
+ */
+int
+__down_trylock (struct semaphore *sem)
+{
+ int sleepers;
+
+ spin_lock_irq(&semaphore_lock);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count))
+ wake_up(&sem->wait);
+
+ spin_unlock_irq(&semaphore_lock);
+ return 1;
+}
+
+/*
+ * Helper routines for rw semaphores. These could be optimized some
+ * more, but since they're off the critical path, I prefer clarity for
+ * now...
+ */
+
+/*
+ * This gets called if we failed to acquire the lock, but we're biased
+ * to acquire the lock by virtue of causing the count to change from 0
+ * to -1. Being biased, we sleep and attempt to grab the lock until
+ * we succeed. When this function returns, we own the lock.
+ */
+static inline void
+down_read_failed_biased (struct rw_semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ add_wait_queue(&sem->wait, &wait); /* put ourselves at the head of the list */
+
+ for (;;) {
+ if (sem->read_bias_granted && xchg(&sem->read_bias_granted, 0))
+ break;
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ if (!sem->read_bias_granted)
+ schedule();
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+}
+
+/*
+ * This gets called if we failed to aquire the lock and we are not
+ * biased to acquire the lock. We undo the decrement that was
+ * done earlier, go to sleep, and then attempt to re-acquire the
+ * lock afterwards.
+ */
+static inline void
+down_read_failed (struct rw_semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ /*
+ * Undo the decrement we did in down_read() and check if we
+ * need to wake up someone.
+ */
+ __up_read(sem);
+
+ add_wait_queue(&sem->wait, &wait);
+ while (sem->count < 0) {
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ if (sem->count >= 0)
+ break;
+ schedule();
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+}
+
+/*
+ * Wait for the lock to become unbiased. Readers are non-exclusive.
+ */
+void
+__down_read_failed (struct rw_semaphore *sem, long count)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ while (1) {
+ if (count == -1) {
+ down_read_failed_biased(sem);
+ return;
+ }
+ /* unbiased */
+ down_read_failed(sem);
+
+ count = ia64_fetch_and_add(-1, &sem->count);
+ if (count >= 0)
+ return;
+ }
+}
+
+static inline void
+down_write_failed_biased (struct rw_semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ /* put ourselves at the end of the list */
+ add_wait_queue_exclusive(&sem->write_bias_wait, &wait);
+
+ for (;;) {
+ if (sem->write_bias_granted && xchg(&sem->write_bias_granted, 0))
+ break;
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE);
+ if (!sem->write_bias_granted)
+ schedule();
+ }
+
+ remove_wait_queue(&sem->write_bias_wait, &wait);
+ tsk->state = TASK_RUNNING;
+
+ /*
+ * If the lock is currently unbiased, awaken the sleepers
+ * FIXME: this wakes up the readers early in a bit of a
+ * stampede -> bad!
+ */
+ if (sem->count >= 0)
+ wake_up(&sem->wait);
+}
+
+
+static inline void
+down_write_failed (struct rw_semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __up_write(sem); /* this takes care of granting the lock */
+
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ while (sem->count < 0) {
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE);
+ if (sem->count >= 0)
+ break; /* we must attempt to aquire or bias the lock */
+ schedule();
+ }
+
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+}
+
+
+/*
+ * Wait for the lock to become unbiased. Since we're a writer, we'll
+ * make ourselves exclusive.
+ */
+void
+__down_write_failed (struct rw_semaphore *sem, long count)
+{
+ long old_count;
+
+ while (1) {
+ if (count == -RW_LOCK_BIAS) {
+ down_write_failed_biased(sem);
+ return;
+ }
+ down_write_failed(sem);
+
+ do {
+ old_count = sem->count;
+ count = old_count - RW_LOCK_BIAS;
+ } while (cmpxchg(&sem->count, old_count, count) != old_count);
+
+ if (count == 0)
+ return;
+ }
+}
+
+void
+__rwsem_wake (struct rw_semaphore *sem, long count)
+{
+ wait_queue_head_t *wq;
+
+ if (count == 0) {
+ /* wake a writer */
+ if (xchg(&sem->write_bias_granted, 1))
+ BUG();
+ wq = &sem->write_bias_wait;
+ } else {
+ /* wake reader(s) */
+ if (xchg(&sem->read_bias_granted, 1))
+ BUG();
+ wq = &sem->wait;
+ }
+ wake_up(wq); /* wake up everyone on the wait queue */
+}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
new file mode 100644
index 000000000..f3283d535
--- /dev/null
+++ b/arch/ia64/kernel/setup.c
@@ -0,0 +1,326 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *
+ * 02/04/00 D.Mosberger some more get_cpuinfo fixes...
+ * 02/01/00 R.Seth fixed get_cpuinfo for SMP
+ * 01/07/99 S.Eranian added the support for command line argument
+ * 06/24/99 W.Drummond added boot_cpu_data.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+#include <linux/console.h>
+
+#include <asm/acpi-ext.h>
+#include <asm/page.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/efi.h>
+
+extern char _end;
+
+/* cpu_data[bootstrap_processor] is data for the bootstrap processor: */
+struct cpuinfo_ia64 cpu_data[NR_CPUS];
+
+unsigned long ia64_cycles_per_usec;
+struct ia64_boot_param ia64_boot_param;
+struct screen_info screen_info;
+unsigned long cpu_initialized = 0;
+/* This tells _start which CPU is booting. */
+int cpu_now_booting = 0;
+
+#define COMMAND_LINE_SIZE 512
+
+char saved_command_line[COMMAND_LINE_SIZE]; /* used in proc filesystem */
+
+static int
+find_max_pfn (unsigned long start, unsigned long end, void *arg)
+{
+ unsigned long *max_pfn = arg, pfn;
+
+ pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
+ if (pfn > *max_pfn)
+ *max_pfn = pfn;
+ return 0;
+}
+
+static int
+free_available_memory (unsigned long start, unsigned long end, void *arg)
+{
+# define KERNEL_END ((unsigned long) &_end)
+# define MIN(a,b) ((a) < (b) ? (a) : (b))
+# define MAX(a,b) ((a) > (b) ? (a) : (b))
+ unsigned long range_start, range_end;
+
+ range_start = MIN(start, KERNEL_START);
+ range_end = MIN(end, KERNEL_START);
+
+ /*
+ * XXX This should not be necessary, but the bootmem allocator
+ * is broken and fails to work correctly when the starting
+ * address is not properly aligned.
+ */
+ range_start = PAGE_ALIGN(range_start);
+
+ if (range_start < range_end)
+ free_bootmem(__pa(range_start), range_end - range_start);
+
+ range_start = MAX(start, KERNEL_END);
+ range_end = MAX(end, KERNEL_END);
+
+ /*
+ * XXX This should not be necessary, but the bootmem allocator
+ * is broken and fails to work correctly when the starting
+ * address is not properly aligned.
+ */
+ range_start = PAGE_ALIGN(range_start);
+
+ if (range_start < range_end)
+ free_bootmem(__pa(range_start), range_end - range_start);
+
+ return 0;
+}
+
+void __init
+setup_arch (char **cmdline_p)
+{
+ unsigned long max_pfn, bootmap_start, bootmap_size;
+
+ /*
+ * The secondary bootstrap loader passes us the boot
+ * parameters at the beginning of the ZERO_PAGE, so let's
+ * stash away those values before ZERO_PAGE gets cleared out.
+ */
+ memcpy(&ia64_boot_param, (void *) ZERO_PAGE_ADDR, sizeof(ia64_boot_param));
+
+ efi_init();
+
+ max_pfn = 0;
+ efi_memmap_walk(find_max_pfn, &max_pfn);
+
+ /*
+ * This is wrong, wrong, wrong. Darn it, you'd think if they
+ * change APIs, they'd do things for the better. Grumble...
+ */
+ bootmap_start = PAGE_ALIGN(__pa(&_end));
+ bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
+
+ efi_memmap_walk(free_available_memory, 0);
+
+ reserve_bootmem(bootmap_start, bootmap_size);
+#if 0
+ /* XXX fix me */
+ init_mm.start_code = (unsigned long) &_stext;
+ init_mm.end_code = (unsigned long) &_etext;
+ init_mm.end_data = (unsigned long) &_edata;
+ init_mm.brk = (unsigned long) &_end;
+
+ code_resource.start = virt_to_bus(&_text);
+ code_resource.end = virt_to_bus(&_etext) - 1;
+ data_resource.start = virt_to_bus(&_etext);
+ data_resource.end = virt_to_bus(&_edata) - 1;
+#endif
+
+ /* process SAL system table: */
+ ia64_sal_init(efi.sal_systab);
+
+ *cmdline_p = __va(ia64_boot_param.command_line);
+ strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
+ saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */
+
+ printk("args to kernel: %s\n", *cmdline_p);
+
+#ifndef CONFIG_SMP
+ cpu_init();
+ identify_cpu(&cpu_data[0]);
+#endif
+
+ if (efi.acpi) {
+ /* Parse the ACPI tables */
+ acpi_parse(efi.acpi);
+ }
+
+#ifdef CONFIG_IA64_GENERIC
+ machvec_init(acpi_get_sysname());
+#endif
+
+#ifdef CONFIG_VT
+# if defined(CONFIG_VGA_CONSOLE)
+ conswitchp = &vga_con;
+# elif defined(CONFIG_DUMMY_CONSOLE)
+ conswitchp = &dummy_con;
+# endif
+#endif
+ platform_setup(cmdline_p);
+}
+
+/*
+ * Display cpu info for all cpu's.
+ */
+int
+get_cpuinfo (char *buffer)
+{
+ char family[32], model[32], features[128], *cp, *p = buffer;
+ struct cpuinfo_ia64 *c;
+ unsigned long mask;
+
+ for (c = cpu_data; c < cpu_data + NR_CPUS; ++c) {
+ if (!(cpu_initialized & (1UL << (c - cpu_data))))
+ continue;
+
+ mask = c->features;
+
+ if (c->family == 7)
+ memcpy(family, "IA-64", 6);
+ else
+ sprintf(family, "%u", c->family);
+
+ switch (c->model) {
+ case 0: strcpy(model, "Itanium"); break;
+ default: sprintf(model, "%u", c->model); break;
+ }
+
+ /* build the feature string: */
+ memcpy(features, " standard", 10);
+ cp = features;
+ if (mask & 1) {
+ strcpy(cp, " branchlong");
+ cp = strchr(cp, '\0');
+ mask &= ~1UL;
+ }
+ if (mask)
+ sprintf(cp, " 0x%lx", mask);
+
+ p += sprintf(buffer,
+ "CPU# %lu\n"
+ "\tvendor : %s\n"
+ "\tfamily : %s\n"
+ "\tmodel : %s\n"
+ "\trevision : %u\n"
+ "\tarchrev : %u\n"
+ "\tfeatures :%s\n" /* don't change this---it _is_ right! */
+ "\tcpu number : %lu\n"
+ "\tcpu regs : %u\n"
+ "\tcpu MHz : %lu.%06lu\n"
+ "\titc MHz : %lu.%06lu\n"
+ "\tBogoMIPS : %lu.%02lu\n\n",
+ c - cpu_data, c->vendor, family, model, c->revision, c->archrev,
+ features,
+ c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000,
+ c->itc_freq / 1000000, c->itc_freq % 1000000,
+ loops_per_sec() / 500000, (loops_per_sec() / 5000) % 100);
+ }
+ return p - buffer;
+}
+
+void
+identify_cpu (struct cpuinfo_ia64 *c)
+{
+ union {
+ unsigned long bits[5];
+ struct {
+ /* id 0 & 1: */
+ char vendor[16];
+
+ /* id 2 */
+ u64 ppn; /* processor serial number */
+
+ /* id 3: */
+ unsigned number : 8;
+ unsigned revision : 8;
+ unsigned model : 8;
+ unsigned family : 8;
+ unsigned archrev : 8;
+ unsigned reserved : 24;
+
+ /* id 4: */
+ u64 features;
+ } field;
+ } cpuid;
+ int i;
+
+ for (i = 0; i < 5; ++i) {
+ cpuid.bits[i] = ia64_get_cpuid(i);
+ }
+
+#ifdef CONFIG_SMP
+ /*
+ * XXX Instead of copying the ITC info from the bootstrap
+ * processor, ia64_init_itm() should be done per CPU. That
+ * should get you the right info. --davidm 1/24/00
+ */
+ if (c != &cpu_data[bootstrap_processor]) {
+ memset(c, 0, sizeof(struct cpuinfo_ia64));
+ c->proc_freq = cpu_data[bootstrap_processor].proc_freq;
+ c->itc_freq = cpu_data[bootstrap_processor].itc_freq;
+ c->cyc_per_usec = cpu_data[bootstrap_processor].cyc_per_usec;
+ c->usec_per_cyc = cpu_data[bootstrap_processor].usec_per_cyc;
+ }
+#else
+ memset(c, 0, sizeof(struct cpuinfo_ia64));
+#endif
+
+ memcpy(c->vendor, cpuid.field.vendor, 16);
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
+ /* BUG: SoftSDV doesn't support the cpuid registers. */
+ if (c->vendor[0] == '\0')
+ memcpy(c->vendor, "Intel", 6);
+#endif
+ c->ppn = cpuid.field.ppn;
+ c->number = cpuid.field.number;
+ c->revision = cpuid.field.revision;
+ c->model = cpuid.field.model;
+ c->family = cpuid.field.family;
+ c->archrev = cpuid.field.archrev;
+ c->features = cpuid.field.features;
+#ifdef CONFIG_SMP
+ c->loops_per_sec = loops_per_sec;
+#endif
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU. This function acts
+ * as a 'CPU state barrier', nothing should get across.
+ */
+void
+cpu_init (void)
+{
+ int nr = smp_processor_id();
+
+ /* Clear the stack memory reserved for pt_regs: */
+ memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
+
+ /*
+ * Initialize default control register to defer speculative
+ * faults. On a speculative load, we want to defer access
+ * right, key miss, and key permission faults. We currently
+ * do NOT defer TLB misses, page-not-present, access bit, or
+ * debug faults but kernel code should not rely on any
+ * particular setting of these bits.
+ */
+ ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP);
+ ia64_set_fpu_owner(0); /* initialize ar.k5 */
+
+ if (test_and_set_bit(nr, &cpu_initialized)) {
+ printk("CPU#%d already initialized!\n", nr);
+ machine_halt();
+ }
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+}
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
new file mode 100644
index 000000000..19be1f840
--- /dev/null
+++ b/arch/ia64/kernel/signal.c
@@ -0,0 +1,537 @@
+/*
+ * Architecture-specific signal handling support.
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from i386 and Alpha versions.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+
+#include <asm/ia32.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/sigcontext.h>
+
+#define DEBUG_SIG 0
+#define STACK_ALIGN 16 /* minimal alignment for stack pointer */
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+#if _NSIG_WORDS > 1
+# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
+# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t))
+#else
+# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0])
+# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0])
+#endif
+
+struct sigframe {
+ struct siginfo info;
+ struct sigcontext sc;
+};
+
+extern long sys_wait4 (int, int *, int, struct rusage *);
+extern long ia64_do_signal (sigset_t *, struct pt_regs *, long); /* forward decl */
+
+long
+ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct pt_regs *pt)
+{
+ sigset_t oldset, set;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+ if (GET_SIGSET(&set, uset))
+ return -EFAULT;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+
+ spin_lock_irq(&current->sigmask_lock);
+ {
+ oldset = current->blocked;
+ current->blocked = set;
+ recalc_sigpending(current);
+ }
+ spin_unlock_irq(&current->sigmask_lock);
+
+ /*
+ * The return below usually returns to the signal handler. We need to
+ * pre-set the correct error code here to ensure that the right values
+ * get saved in sigcontext by ia64_do_signal.
+ */
+ pt->r8 = EINTR;
+ pt->r10 = -1;
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ if (ia64_do_signal(&oldset, pt, 1))
+ return -EINTR;
+ }
+}
+
+asmlinkage long
+sys_sigaltstack (const stack_t *uss, stack_t *uoss, long arg2, long arg3, long arg4,
+ long arg5, long arg6, long arg7, long stack)
+{
+ struct pt_regs *pt = (struct pt_regs *) &stack;
+
+ return do_sigaltstack(uss, uoss, pt->r12);
+}
+
+static long
+restore_sigcontext (struct sigcontext *sc, struct pt_regs *pt)
+{
+ struct switch_stack *sw = (struct switch_stack *) pt - 1;
+ unsigned long ip, flags, nat, um;
+ long err;
+
+ /* restore scratch that always needs gets updated during signal delivery: */
+ err = __get_user(flags, &sc->sc_flags);
+
+ err |= __get_user(nat, &sc->sc_nat);
+ err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */
+ err |= __get_user(pt->ar_fpsr, &sc->sc_ar_fpsr);
+ err |= __get_user(pt->ar_pfs, &sc->sc_ar_pfs);
+ err |= __get_user(um, &sc->sc_um); /* user mask */
+ err |= __get_user(pt->ar_rsc, &sc->sc_ar_rsc);
+ err |= __get_user(pt->ar_ccv, &sc->sc_ar_ccv);
+ err |= __get_user(pt->ar_unat, &sc->sc_ar_unat);
+ err |= __get_user(pt->pr, &sc->sc_pr); /* predicates */
+ err |= __get_user(pt->b0, &sc->sc_br[0]); /* b0 (rp) */
+ err |= __get_user(pt->b6, &sc->sc_br[6]);
+ err |= __copy_from_user(&pt->r1, &sc->sc_gr[1], 3*8); /* r1-r3 */
+ err |= __copy_from_user(&pt->r8, &sc->sc_gr[8], 4*8); /* r8-r11 */
+ err |= __copy_from_user(&pt->r12, &sc->sc_gr[12], 4*8); /* r12-r15 */
+ err |= __copy_from_user(&pt->r16, &sc->sc_gr[16], 16*8); /* r16-r31 */
+
+ /* establish new instruction pointer: */
+ pt->cr_iip = ip & ~0x3UL;
+ ia64_psr(pt)->ri = ip & 0x3;
+ pt->cr_ipsr = (pt->cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);
+
+ ia64_put_nat_bits (pt, sw, nat); /* restore the original scratch NaT bits */
+
+ if (flags & IA64_SC_FLAG_FPH_VALID) {
+ struct task_struct *fpu_owner = ia64_get_fpu_owner();
+
+ __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
+ if (fpu_owner == current) {
+ __ia64_load_fpu(current->thread.fph);
+ }
+ }
+ return err;
+}
+
+/*
+ * When we get here, ((struct switch_stack *) pt - 1) is a
+ * switch_stack frame that has no defined value. Upon return, we
+ * expect sw->caller_unat to contain the new unat value. The reason
+ * we use a full switch_stack frame is so everything is symmetric
+ * with ia64_do_signal().
+ */
+long
+ia64_rt_sigreturn (struct pt_regs *pt)
+{
+ extern char ia64_strace_leave_kernel, ia64_leave_kernel;
+ struct sigcontext *sc;
+ struct siginfo si;
+ sigset_t set;
+ long retval;
+
+ sc = &((struct sigframe *) (pt->r12 + 16))->sc;
+
+ /*
+ * When we return to the previously executing context, r8 and
+ * r10 have already been setup the way we want them. Indeed,
+ * if the signal wasn't delivered while in a system call, we
+ * must not touch r8 or r10 as otherwise user-level stat could
+ * be corrupted.
+ */
+ retval = (long) &ia64_leave_kernel | 1;
+ if ((current->flags & PF_TRACESYS)
+ && (sc->sc_flags & IA64_SC_FLAG_IN_SYSCALL))
+ retval = (long) &ia64_strace_leave_kernel;
+
+ if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
+ goto give_sigsegv;
+
+ if (GET_SIGSET(&set, &sc->sc_mask))
+ goto give_sigsegv;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sigmask_lock);
+ current->blocked = set;
+ recalc_sigpending(current);
+ spin_unlock_irq(&current->sigmask_lock);
+
+ if (restore_sigcontext(sc, pt))
+ goto give_sigsegv;
+
+#if DEBUG_SIG
+ printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
+ current->comm, current->pid, pt->r12, pt->cr_iip);
+#endif
+ /*
+ * It is more difficult to avoid calling this function than to
+ * call it and ignore errors.
+ */
+ do_sigaltstack(&sc->sc_stack, 0, pt->r12);
+ return retval;
+
+ give_sigsegv:
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+ si.si_pid = current->pid;
+ si.si_uid = current->uid;
+ si.si_addr = sc;
+ force_sig_info(SIGSEGV, &si, current);
+ return retval;
+}
+
+/*
+ * This does just the minimum required setup of sigcontext.
+ * Specifically, it only installs data that is either not knowable at
+ * the user-level or that gets modified before execution in the
+ * trampoline starts. Everything else is done at the user-level.
+ */
+static long
+setup_sigcontext (struct sigcontext *sc, sigset_t *mask, struct pt_regs *pt)
+{
+ struct switch_stack *sw = (struct switch_stack *) pt - 1;
+ struct task_struct *fpu_owner = ia64_get_fpu_owner();
+ unsigned long flags = 0, ifs, nat;
+ long err;
+
+ ifs = pt->cr_ifs;
+
+ if (on_sig_stack((unsigned long) sc))
+ flags |= IA64_SC_FLAG_ONSTACK;
+ if ((ifs & (1UL << 63)) == 0) {
+ /* if cr_ifs isn't valid, we got here through a syscall */
+ flags |= IA64_SC_FLAG_IN_SYSCALL;
+ }
+ if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) {
+ flags |= IA64_SC_FLAG_FPH_VALID;
+ if (fpu_owner == current) {
+ __ia64_save_fpu(current->thread.fph);
+ }
+ __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
+ }
+
+ /*
+ * Note: sw->ar_unat is UNDEFINED unless the process is being
+ * PTRACED. However, this is OK because the NaT bits of the
+ * preserved registers (r4-r7) are never being looked at by
+ * the signal handler (register r4-r7 are used instead).
+ */
+ nat = ia64_get_nat_bits(pt, sw);
+
+ err = __put_user(flags, &sc->sc_flags);
+ err |= __put_user(nat, &sc->sc_nat);
+ err |= PUT_SIGSET(mask, &sc->sc_mask);
+ err |= __put_user(pt->cr_ipsr & IA64_PSR_UM, &sc->sc_um);
+ err |= __put_user(pt->ar_rsc, &sc->sc_ar_rsc);
+ err |= __put_user(pt->ar_ccv, &sc->sc_ar_ccv);
+ err |= __put_user(pt->ar_unat, &sc->sc_ar_unat); /* ar.unat */
+ err |= __put_user(pt->ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */
+ err |= __put_user(pt->ar_pfs, &sc->sc_ar_pfs);
+ err |= __put_user(pt->pr, &sc->sc_pr); /* predicates */
+ err |= __put_user(pt->b0, &sc->sc_br[0]); /* b0 (rp) */
+ err |= __put_user(pt->b6, &sc->sc_br[6]); /* b6 */
+ err |= __put_user(pt->b7, &sc->sc_br[7]); /* b7 */
+
+ err |= __copy_to_user(&sc->sc_gr[1], &pt->r1, 3*8); /* r1-r3 */
+ err |= __copy_to_user(&sc->sc_gr[8], &pt->r8, 4*8); /* r8-r11 */
+ err |= __copy_to_user(&sc->sc_gr[12], &pt->r12, 4*8); /* r12-r15 */
+ err |= __copy_to_user(&sc->sc_gr[16], &pt->r16, 16*8); /* r16-r31 */
+
+ err |= __put_user(pt->cr_iip + ia64_psr(pt)->ri, &sc->sc_ip);
+ err |= __put_user(pt->r12, &sc->sc_gr[12]); /* r12 */
+ return err;
+}
+
+static long
+setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *pt)
+{
+ struct switch_stack *sw = (struct switch_stack *) pt - 1;
+ extern char ia64_sigtramp[], __start_gate_section[];
+ unsigned long tramp_addr, new_rbs = 0;
+ struct sigframe *frame;
+ struct siginfo si;
+ long err;
+
+ frame = (void *) pt->r12;
+ tramp_addr = GATE_ADDR + (ia64_sigtramp - __start_gate_section);
+ if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack((unsigned long) frame)) {
+ new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
+ frame = (void *) ((current->sas_ss_sp + current->sas_ss_size)
+ & ~(STACK_ALIGN - 1));
+ }
+ frame = (void *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ err = __copy_to_user(&frame->info, info, sizeof(siginfo_t));
+
+ err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp);
+ err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size);
+ err |= __put_user(sas_ss_flags(pt->r12), &frame->sc.sc_stack.ss_flags);
+ err |= setup_sigcontext(&frame->sc, set, pt);
+
+ if (err)
+ goto give_sigsegv;
+
+ pt->r12 = (unsigned long) frame - 16; /* new stack pointer */
+ pt->r2 = sig; /* signal number */
+ pt->r3 = (unsigned long) ka->sa.sa_handler; /* addr. of handler's proc. descriptor */
+ pt->r15 = new_rbs;
+ pt->ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */
+ pt->cr_iip = tramp_addr;
+ ia64_psr(pt)->ri = 0; /* start executing in first slot */
+
+ /*
+ * Note: this affects only the NaT bits of the scratch regs
+ * (the ones saved in pt_regs, which is exactly what we want.
+ * The NaT bits for the preserved regs (r4-r7) are in
+ * sw->ar_unat iff this process is being PTRACED.
+ */
+ sw->caller_unat = 0; /* ensure NaT bits of at least r2, r3, r12, and r15 are clear */
+
+#if DEBUG_SIG
+ printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%lx\n",
+ current->comm, current->pid, sig, pt->r12, pt->cr_iip, pt->r3);
+#endif
+ return 1;
+
+ give_sigsegv:
+ if (sig == SIGSEGV)
+ ka->sa.sa_handler = SIG_DFL;
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+ si.si_pid = current->pid;
+ si.si_uid = current->uid;
+ si.si_addr = frame;
+ force_sig_info(SIGSEGV, &si, current);
+ return 0;
+}
+
+static long
+handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
+ struct pt_regs *pt)
+{
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(pt)) {
+ /* send signal to IA-32 process */
+ if (!ia32_setup_frame1(sig, ka, info, oldset, pt))
+ return 0;
+ } else
+#endif
+ /* send signal to IA-64 process */
+ if (!setup_frame(sig, ka, info, oldset, pt))
+ return 0;
+
+ if (ka->sa.sa_flags & SA_ONESHOT)
+ ka->sa.sa_handler = SIG_DFL;
+
+ if (!(ka->sa.sa_flags & SA_NODEFER)) {
+ spin_lock_irq(&current->sigmask_lock);
+ sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+ sigaddset(&current->blocked, sig);
+ recalc_sigpending(current);
+ spin_unlock_irq(&current->sigmask_lock);
+ }
+ return 1;
+}
+
+/*
+ * When we get here, `pt' points to struct pt_regs and ((struct
+ * switch_stack *) pt - 1) points to a switch stack structure.
+ * HOWEVER, in the normal case, the ONLY value valid in the
+ * switch_stack is the caller_unat field. The entire switch_stack is
+ * valid ONLY if current->flags has PF_PTRACED set.
+ *
+ * Note that `init' is a special process: it doesn't get signals it
+ * doesn't want to handle. Thus you cannot kill init even with a
+ * SIGKILL even by mistake.
+ *
+ * Note that we go through the signals twice: once to check the
+ * signals that the kernel can handle, and then we build all the
+ * user-level signal handling stack-frames in one go after that.
+ */
+long
+ia64_do_signal (sigset_t *oldset, struct pt_regs *pt, long in_syscall)
+{
+ struct k_sigaction *ka;
+ siginfo_t info;
+ long restart = in_syscall;
+
+ /*
+ * In the ia64_leave_kernel code path, we want the common case
+ * to go fast, which is why we may in certain cases get here
+ * from kernel mode. Just return without doing anything if so.
+ */
+ if (!user_mode(pt))
+ return 0;
+
+ if (!oldset)
+ oldset = &current->blocked;
+
+ if (pt->r10 != -1) {
+ /*
+ * A system calls has to be restarted only if one of
+ * the error codes ERESTARTNOHAND, ERESTARTSYS, or
+ * ERESTARTNOINTR is returned. If r10 isn't -1 then
+ * r8 doesn't hold an error code and we don't need to
+ * restart the syscall, so we set in_syscall to zero.
+ */
+ restart = 0;
+ }
+
+ for (;;) {
+ unsigned long signr;
+
+ spin_lock_irq(&current->sigmask_lock);
+ signr = dequeue_signal(&current->blocked, &info);
+ spin_unlock_irq(&current->sigmask_lock);
+
+ if (!signr)
+ break;
+
+ if ((current->flags & PF_PTRACED) && signr != SIGKILL) {
+ /* Let the debugger run. */
+ current->exit_code = signr;
+ set_current_state(TASK_STOPPED);
+ notify_parent(current, SIGCHLD);
+ schedule();
+ signr = current->exit_code;
+
+ /* We're back. Did the debugger cancel the sig? */
+ if (!signr)
+ continue;
+ current->exit_code = 0;
+
+ /* The debugger continued. Ignore SIGSTOP. */
+ if (signr == SIGSTOP)
+ continue;
+
+ /* Update the siginfo structure. Is this good? */
+ if (signr != info.si_signo) {
+ info.si_signo = signr;
+ info.si_errno = 0;
+ info.si_code = SI_USER;
+ info.si_pid = current->p_pptr->pid;
+ info.si_uid = current->p_pptr->uid;
+ }
+
+ /* If the (new) signal is now blocked, requeue it. */
+ if (sigismember(&current->blocked, signr)) {
+ send_sig_info(signr, &info, current);
+ continue;
+ }
+ }
+
+ ka = &current->sig->action[signr - 1];
+ if (ka->sa.sa_handler == SIG_IGN) {
+ if (signr != SIGCHLD)
+ continue;
+ /* Check for SIGCHLD: it's special. */
+ while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0)
+ /* nothing */;
+ continue;
+ }
+
+ if (ka->sa.sa_handler == SIG_DFL) {
+ int exit_code = signr;
+
+ /* Init gets no signals it doesn't want. */
+ if (current->pid == 1)
+ continue;
+
+ switch (signr) {
+ case SIGCONT: case SIGCHLD: case SIGWINCH:
+ continue;
+
+ case SIGTSTP: case SIGTTIN: case SIGTTOU:
+ if (is_orphaned_pgrp(current->pgrp))
+ continue;
+ /* FALLTHRU */
+
+ case SIGSTOP:
+ set_current_state(TASK_STOPPED);
+ current->exit_code = signr;
+ if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags
+ & SA_NOCLDSTOP))
+ notify_parent(current, SIGCHLD);
+ schedule();
+ continue;
+
+ case SIGQUIT: case SIGILL: case SIGTRAP:
+ case SIGABRT: case SIGFPE: case SIGSEGV:
+ case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ:
+ if (do_coredump(signr, pt))
+ exit_code |= 0x80;
+ /* FALLTHRU */
+
+ default:
+ lock_kernel();
+ sigaddset(&current->signal, signr);
+ recalc_sigpending(current);
+ current->flags |= PF_SIGNALED;
+ do_exit(exit_code);
+ /* NOTREACHED */
+ }
+ }
+
+ if (restart) {
+ switch (pt->r8) {
+ case ERESTARTSYS:
+ if ((ka->sa.sa_flags & SA_RESTART) == 0) {
+ case ERESTARTNOHAND:
+ pt->r8 = EINTR;
+ /* note: pt->r10 is already -1 */
+ break;
+ }
+ case ERESTARTNOINTR:
+ ia64_decrement_ip(pt);
+ }
+ }
+
+ /* Whee! Actually deliver the signal. If the
+ delivery failed, we need to continue to iterate in
+ this loop so we can deliver the SIGSEGV... */
+ if (handle_signal(signr, ka, &info, oldset, pt))
+ return 1;
+ }
+
+ /* Did we come from a system call? */
+ if (restart) {
+ /* Restart the system call - no handlers present */
+ if (pt->r8 == ERESTARTNOHAND ||
+ pt->r8 == ERESTARTSYS ||
+ pt->r8 == ERESTARTNOINTR) {
+ /*
+ * Note: the syscall number is in r15 which is
+ * saved in pt_regs so all we need to do here
+ * is adjust ip so that the "break"
+ * instruction gets re-executed.
+ */
+ ia64_decrement_ip(pt);
+ }
+ }
+ return 0;
+}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
new file mode 100644
index 000000000..48a3d68b4
--- /dev/null
+++ b/arch/ia64/kernel/smp.c
@@ -0,0 +1,777 @@
+/*
+ * SMP Support
+ *
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Lots of stuff stolen from arch/alpha/kernel/smp.c
+ *
+ * 99/10/05 davidm Update to bring it in sync with new command-line processing scheme.
+ */
+#define __KERNEL_SYSCALLS__
+
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
+void smp_kdb_interrupt (struct pt_regs* regs);
+void kdb_global(int cpuid);
+extern unsigned long smp_kdb_wait;
+extern int kdb_new_cpu;
+#endif
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+extern int cpu_idle(void * unused);
+extern void _start(void);
+
+extern int cpu_now_booting; /* Used by head.S to find idle task */
+extern unsigned long cpu_initialized; /* Bitmap of available cpu's */
+extern struct cpuinfo_ia64 cpu_data[NR_CPUS]; /* Duh... */
+
+spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_KDB
+unsigned long cpu_online_map = 1;
+#endif
+
+volatile int cpu_number_map[NR_CPUS] = { -1, }; /* SAPIC ID -> Logical ID */
+volatile int __cpu_logical_map[NR_CPUS] = { -1, }; /* logical ID -> SAPIC ID */
+int smp_num_cpus = 1;
+int bootstrap_processor = -1; /* SAPIC ID of BSP */
+int smp_threads_ready = 0; /* Set when the idlers are all forked */
+unsigned long ipi_base_addr = IPI_DEFAULT_BASE_ADDR; /* Base addr of IPI table */
+cycles_t cacheflush_time = 0;
+unsigned long ap_wakeup_vector = -1; /* External Int to use to wakeup AP's */
+static int max_cpus = -1; /* Command line */
+static unsigned long ipi_op[NR_CPUS];
+struct smp_call_struct {
+ void (*func) (void *info);
+ void *info;
+ long wait;
+ atomic_t unstarted_count;
+ atomic_t unfinished_count;
+};
+static struct smp_call_struct *smp_call_function_data;
+
+#ifdef CONFIG_KDB
+unsigned long smp_kdb_wait = 0; /* Bitmask of waiters */
+#endif
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+extern spinlock_t ivr_read_lock;
+#endif
+
+int use_xtp = 0; /* XXX */
+
+#define IPI_RESCHEDULE 0
+#define IPI_CALL_FUNC 1
+#define IPI_CPU_STOP 2
+#define IPI_KDB_INTERRUPT 4
+
+/*
+ * Setup routine for controlling SMP activation
+ *
+ * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
+ * activation entirely (the MPS table probe still happens, though).
+ *
+ * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
+ * greater than 0, limits the maximum number of CPUs activated in
+ * SMP mode to <NUM>.
+ */
+
+static int __init nosmp(char *str)
+{
+ max_cpus = 0;
+ return 1;
+}
+
+__setup("nosmp", nosmp);
+
+static int __init maxcpus(char *str)
+{
+ get_option(&str, &max_cpus);
+ return 1;
+}
+
+__setup("maxcpus=", maxcpus);
+
+/*
+ * Yoink this CPU from the runnable list...
+ */
+void
+halt_processor(void)
+{
+ clear_bit(smp_processor_id(), &cpu_initialized);
+ max_xtp();
+ __cli();
+ for (;;)
+ ;
+
+}
+
+void
+handle_IPI(int irq, void *dev_id, struct pt_regs *regs)
+{
+ int this_cpu = smp_processor_id();
+ unsigned long *pending_ipis = &ipi_op[this_cpu];
+ unsigned long ops;
+
+ /* Count this now; we may make a call that never returns. */
+ cpu_data[this_cpu].ipi_count++;
+
+ mb(); /* Order interrupt and bit testing. */
+ while ((ops = xchg(pending_ipis, 0)) != 0) {
+ mb(); /* Order bit clearing and data access. */
+ do {
+ unsigned long which;
+
+ which = ffz(~ops);
+ ops &= ~(1 << which);
+
+ switch (which) {
+ case IPI_RESCHEDULE:
+ /*
+ * Reschedule callback. Everything to be done is done by the
+ * interrupt return path.
+ */
+ break;
+
+ case IPI_CALL_FUNC:
+ {
+ struct smp_call_struct *data;
+ void (*func)(void *info);
+ void *info;
+ int wait;
+
+ data = smp_call_function_data;
+ func = data->func;
+ info = data->info;
+ wait = data->wait;
+
+ mb();
+ atomic_dec (&data->unstarted_count);
+
+ /* At this point the structure may be gone unless wait is true. */
+ (*func)(info);
+
+ /* Notify the sending CPU that the task is done. */
+ mb();
+ if (wait)
+ atomic_dec (&data->unfinished_count);
+ }
+ break;
+
+ case IPI_CPU_STOP:
+ halt_processor();
+ break;
+
+#ifdef CONFIG_KDB
+ case IPI_KDB_INTERRUPT:
+ smp_kdb_interrupt(regs);
+ break;
+#endif
+
+ default:
+ printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
+ break;
+ } /* Switch */
+ } while (ops);
+
+ mb(); /* Order data access and bit testing. */
+ }
+}
+
+static inline void
+send_IPI(int dest_cpu, unsigned char vector)
+{
+ unsigned long ipi_addr;
+ unsigned long ipi_data;
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ unsigned long flags;
+#endif
+
+ ipi_data = vector;
+ ipi_addr = ipi_base_addr | ((dest_cpu << 8) << 4); /* 16-bit SAPIC ID's; assume CPU bus 0 */
+ mb();
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ /*
+ * Disable IVR reads
+ */
+ spin_lock_irqsave(&ivr_read_lock, flags);
+ writeq(ipi_data, ipi_addr);
+ spin_unlock_irqrestore(&ivr_read_lock, flags);
+#else
+ writeq(ipi_data, ipi_addr);
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+}
+
+static inline void
+send_IPI_single(int dest_cpu, int op)
+{
+
+ if (dest_cpu == -1)
+ return;
+
+ ipi_op[dest_cpu] |= (1 << op);
+ send_IPI(dest_cpu, IPI_IRQ);
+}
+
+static inline void
+send_IPI_allbutself(int op)
+{
+ int i;
+ int cpu_id = 0;
+
+ for (i = 0; i < smp_num_cpus; i++) {
+ cpu_id = __cpu_logical_map[i];
+ if (cpu_id != smp_processor_id())
+ send_IPI_single(cpu_id, op);
+ }
+}
+
+static inline void
+send_IPI_all(int op)
+{
+ int i;
+
+ for (i = 0; i < smp_num_cpus; i++)
+ send_IPI_single(__cpu_logical_map[i], op);
+}
+
+static inline void
+send_IPI_self(int op)
+{
+ send_IPI_single(smp_processor_id(), op);
+}
+
+void
+smp_send_reschedule(int cpu)
+{
+ send_IPI_single(cpu, IPI_RESCHEDULE);
+}
+
+void
+smp_send_stop(void)
+{
+ send_IPI_allbutself(IPI_CPU_STOP);
+}
+
+/*
+ * Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <retry> If true, keep retrying until ready.
+ * <wait> If true, wait until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code.
+ *
+ * Does not return until remote CPUs are nearly ready to execute <func>
+ * or are or have executed.
+ */
+
+int
+smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
+{
+ struct smp_call_struct data;
+ long timeout;
+ static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+
+ data.func = func;
+ data.info = info;
+ data.wait = wait;
+ atomic_set(&data.unstarted_count, smp_num_cpus - 1);
+ atomic_set(&data.unfinished_count, smp_num_cpus - 1);
+
+ if (retry) {
+ while (1) {
+ if (smp_call_function_data) {
+ schedule (); /* Give a mate a go */
+ continue;
+ }
+ spin_lock (&lock);
+ if (smp_call_function_data) {
+ spin_unlock (&lock); /* Bad luck */
+ continue;
+ }
+ /* Mine, all mine! */
+ break;
+ }
+ }
+ else {
+ if (smp_call_function_data)
+ return -EBUSY;
+ spin_lock (&lock);
+ if (smp_call_function_data) {
+ spin_unlock (&lock);
+ return -EBUSY;
+ }
+ }
+
+ smp_call_function_data = &data;
+ spin_unlock (&lock);
+ data.func = func;
+ data.info = info;
+ atomic_set (&data.unstarted_count, smp_num_cpus - 1);
+ data.wait = wait;
+ if (wait)
+ atomic_set (&data.unfinished_count, smp_num_cpus - 1);
+
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_allbutself(IPI_CALL_FUNC);
+
+ /* Wait for response */
+ timeout = jiffies + HZ;
+ while ( (atomic_read (&data.unstarted_count) > 0) &&
+ time_before (jiffies, timeout) )
+ barrier ();
+ if (atomic_read (&data.unstarted_count) > 0) {
+ smp_call_function_data = NULL;
+ return -ETIMEDOUT;
+ }
+ if (wait)
+ while (atomic_read (&data.unfinished_count) > 0)
+ barrier ();
+ smp_call_function_data = NULL;
+ return 0;
+}
+
+/*
+ * Flush all other CPU's tlb and then mine. Do this with smp_call_function() as we
+ * want to ensure all TLB's flushed before proceeding.
+ *
+ * XXX: Is it OK to use the same ptc.e info on all cpus?
+ */
+void
+smp_flush_tlb_all(void)
+{
+ smp_call_function((void (*)(void *))__flush_tlb_all, NULL, 1, 1);
+ __flush_tlb_all();
+}
+
+/*
+ * Ideally sets up per-cpu profiling hooks. Doesn't do much now...
+ */
+static inline void __init
+smp_setup_percpu_timer(int cpuid)
+{
+ cpu_data[cpuid].prof_counter = 1;
+ cpu_data[cpuid].prof_multiplier = 1;
+}
+
+void
+smp_do_timer(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+ int user = user_mode(regs);
+ struct cpuinfo_ia64 *data = &cpu_data[cpu];
+
+ extern void update_one_process(struct task_struct *, unsigned long, unsigned long,
+ unsigned long, int);
+ if (!--data->prof_counter) {
+ irq_enter(cpu, TIMER_IRQ);
+
+ update_one_process(current, 1, user, !user, cpu);
+ if (current->pid) {
+ if (--current->counter < 0) {
+ current->counter = 0;
+ current->need_resched = 1;
+ }
+
+ if (user) {
+ if (current->priority < DEF_PRIORITY) {
+ kstat.cpu_nice++;
+ kstat.per_cpu_nice[cpu]++;
+ } else {
+ kstat.cpu_user++;
+ kstat.per_cpu_user[cpu]++;
+ }
+ } else {
+ kstat.cpu_system++;
+ kstat.per_cpu_system[cpu]++;
+ }
+ }
+
+ data->prof_counter = data->prof_multiplier;
+ irq_exit(cpu, TIMER_IRQ);
+ }
+}
+
+
+/*
+ * Called by both boot and secondaries to move global data into
+ * per-processor storage.
+ */
+static inline void __init
+smp_store_cpu_info(int cpuid)
+{
+ struct cpuinfo_ia64 *c = &cpu_data[cpuid];
+
+ identify_cpu(c);
+}
+
+/*
+ * SAL shoves the AP's here when we start them. Physical mode, no kernel TR,
+ * no RRs set, better than even chance that psr is bogus. Fix all that and
+ * call _start. In effect, pretend to be lilo.
+ *
+ * Stolen from lilo_start.c. Thanks David!
+ */
+void
+start_ap(void)
+{
+ unsigned long flags;
+
+ /*
+ * Install a translation register that identity maps the
+ * kernel's 256MB page(s).
+ */
+ ia64_clear_ic(flags);
+ ia64_set_rr( 0, (0x1000 << 8) | (_PAGE_SIZE_1M << 2));
+ ia64_set_rr(PAGE_OFFSET, (ia64_rid(0, PAGE_OFFSET) << 8) | (_PAGE_SIZE_256M << 2));
+ ia64_itr(0x3, 1, PAGE_OFFSET,
+ pte_val(mk_pte_phys(0, __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX))),
+ _PAGE_SIZE_256M);
+
+ flags = (IA64_PSR_IT | IA64_PSR_IC | IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH |
+ IA64_PSR_BN);
+
+ asm volatile ("movl r8 = 1f\n"
+ ";;\n"
+ "mov cr.ipsr=%0\n"
+ "mov cr.iip=r8\n"
+ "mov cr.ifs=r0\n"
+ ";;\n"
+ "rfi;;"
+ "1:\n"
+ "movl r1 = __gp" :: "r"(flags) : "r8");
+ _start();
+}
+
+
+/*
+ * AP's start using C here.
+ */
+void __init
+smp_callin(void)
+{
+ extern void ia64_rid_init(void);
+ extern void ia64_init_itm(void);
+ extern void ia64_cpu_local_tick(void);
+
+ ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP);
+ ia64_set_fpu_owner(0);
+ ia64_rid_init(); /* initialize region ids */
+
+ cpu_init();
+ __flush_tlb_all();
+
+ smp_store_cpu_info(smp_processor_id());
+ smp_setup_percpu_timer(smp_processor_id());
+
+ while (!smp_threads_ready)
+ mb();
+
+ normal_xtp();
+
+ /* setup the CPU local timer tick */
+ ia64_cpu_local_tick();
+
+ /* Disable all local interrupts */
+ ia64_set_lrr0(0, 1);
+ ia64_set_lrr1(0, 1);
+
+ __sti(); /* Interrupts have been off till now. */
+ cpu_idle(NULL);
+}
+
+/*
+ * Create the idle task for a new AP. DO NOT use kernel_thread() because
+ * that could end up calling schedule() in the ia64_leave_kernel exit
+ * path in which case the new idle task could get scheduled before we
+ * had a chance to remove it from the run-queue...
+ */
+static int __init
+fork_by_hand(void)
+{
+ /*
+ * Don't care about the usp and regs settings since we'll never
+ * reschedule the forked task.
+ */
+ return do_fork(CLONE_VM|CLONE_PID, 0, 0);
+}
+
+/*
+ * Bring one cpu online.
+ *
+ * NB: cpuid is the CPU BUS-LOCAL ID, not the entire SAPIC ID. See asm/smp.h.
+ */
+static int __init
+smp_boot_one_cpu(int cpuid, int cpunum)
+{
+ struct task_struct *idle;
+ long timeout;
+
+ /*
+ * Create an idle task for this CPU. Note that the address we
+ * give to kernel_thread is irrelevant -- it's going to start
+ * where OS_BOOT_RENDEVZ vector in SAL says to start. But
+ * this gets all the other task-y sort of data structures set
+ * up like we wish. We need to pull the just created idle task
+ * off the run queue and stuff it into the init_tasks[] array.
+ * Sheesh . . .
+ */
+ if (fork_by_hand() < 0)
+ panic("failed fork for CPU %d", cpuid);
+ /*
+ * We remove it from the pidhash and the runqueue
+ * once we got the process:
+ */
+ idle = init_task.prev_task;
+ if (!idle)
+ panic("No idle process for CPU %d", cpuid);
+ init_tasks[cpunum] = idle;
+ del_from_runqueue(idle);
+ unhash_process(idle);
+
+ /* Schedule the first task manually. */
+ idle->processor = cpuid;
+ idle->has_cpu = 1;
+
+ /* Let _start know what logical CPU we're booting (offset into init_tasks[] */
+ cpu_now_booting = cpunum;
+
+ /* Kick the AP in the butt */
+ send_IPI(cpuid, ap_wakeup_vector);
+ ia64_srlz_i();
+ mb();
+
+ /*
+ * OK, wait a bit for that CPU to finish staggering about. smp_callin() will
+ * call cpu_init() which will set a bit for this AP. When that bit flips, the AP
+ * is waiting for smp_threads_ready to be 1 and we can move on.
+ */
+ for (timeout = 0; timeout < 100000; timeout++) {
+ if (test_bit(cpuid, &cpu_initialized))
+ goto alive;
+ udelay(10);
+ barrier();
+ }
+
+ printk(KERN_ERR "SMP: Processor %d is stuck.\n", cpuid);
+ return -1;
+
+alive:
+ /* Remember the AP data */
+ cpu_number_map[cpuid] = cpunum;
+#ifdef CONFIG_KDB
+ cpu_online_map |= (1<<cpunum);
+ printk ("DEBUGGER: cpu_online_map = 0x%08x\n", cpu_online_map);
+#endif
+ __cpu_logical_map[cpunum] = cpuid;
+ return 0;
+}
+
+
+
+/*
+ * Called by smp_init bring all the secondaries online and hold them.
+ * XXX: this is ACPI specific; it uses "magic" variables exported from acpi.c
+ * to 'discover' the AP's. Blech.
+ */
+void __init
+smp_boot_cpus(void)
+{
+ int i, cpu_count = 1;
+ unsigned long bogosum;
+ int sapic_id;
+ extern int acpi_cpus;
+ extern int acpi_apic_map[32];
+
+ /* Take care of some initial bookkeeping. */
+ memset(&cpu_number_map, -1, sizeof(cpu_number_map));
+ memset(&__cpu_logical_map, -1, sizeof(__cpu_logical_map));
+ memset(&ipi_op, 0, sizeof(ipi_op));
+
+ /* Setup BSP mappings */
+ cpu_number_map[bootstrap_processor] = 0;
+ __cpu_logical_map[0] = bootstrap_processor;
+ current->processor = bootstrap_processor;
+
+ /* Mark BSP booted and get active_mm context */
+ cpu_init();
+
+ /* reset XTP for interrupt routing */
+ normal_xtp();
+
+ /* And generate an entry in cpu_data */
+ smp_store_cpu_info(bootstrap_processor);
+#if 0
+ smp_tune_scheduling();
+#endif
+ smp_setup_percpu_timer(bootstrap_processor);
+
+ init_idle();
+
+ /* Nothing to do when told not to. */
+ if (max_cpus == 0) {
+ printk(KERN_INFO "SMP mode deactivated.\n");
+ return;
+ }
+
+ if (acpi_cpus > 1) {
+ printk(KERN_INFO "SMP: starting up secondaries.\n");
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (acpi_apic_map[i] == -1 ||
+ acpi_apic_map[i] == bootstrap_processor << 8) /* XXX Fix me Walt */
+ continue;
+
+ /*
+ * IA64 SAPIC ID's are 16-bits. See asm/smp.h for more info
+ */
+ sapic_id = acpi_apic_map[i] >> 8;
+ if (smp_boot_one_cpu(sapic_id, cpu_count))
+ continue;
+
+ cpu_count++; /* Count good CPUs only... */
+ }
+ }
+
+ if (cpu_count == 1) {
+ printk(KERN_ERR "SMP: Bootstrap processor only.\n");
+ return;
+ }
+
+ bogosum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_initialized & (1L << i))
+ bogosum += cpu_data[i].loops_per_sec;
+ }
+
+ printk(KERN_INFO "SMP: Total of %d processors activated "
+ "(%lu.%02lu BogoMIPS).\n",
+ cpu_count, (bogosum + 2500) / 500000,
+ ((bogosum + 2500) / 5000) % 100);
+
+ smp_num_cpus = cpu_count;
+}
+
+/*
+ * Called from main.c by each AP.
+ */
+void __init
+smp_commence(void)
+{
+ mb();
+}
+
+/*
+ * Not used; part of the i386 bringup
+ */
+void __init
+initialize_secondary(void)
+{
+}
+
+int __init
+setup_profiling_timer(unsigned int multiplier)
+{
+ return -EINVAL;
+}
+
+/*
+ * Assume that CPU's have been discovered by some platform-dependant
+ * interface. For SoftSDV/Lion, that would be ACPI.
+ *
+ * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
+ *
+ * So this just gets the BSP SAPIC ID and print's it out. Dull, huh?
+ *
+ * Not anymore. This also registers the AP OS_MC_REDVEZ address with SAL.
+ */
+void __init
+init_smp_config(void)
+{
+ struct fptr {
+ unsigned long fp;
+ unsigned long gp;
+ } *ap_startup;
+ long sal_ret;
+
+ /* Grab the BSP ID */
+ bootstrap_processor = hard_smp_processor_id();
+
+ /* Tell SAL where to drop the AP's. */
+ ap_startup = (struct fptr *) start_ap;
+ sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
+ __pa(ap_startup->fp), __pa(ap_startup->gp), 0,
+ 0, 0, 0);
+ if (sal_ret < 0) {
+ printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n", ia64_sal_strerror(sal_ret));
+ printk(" Forcing UP mode\n");
+ smp_num_cpus = 1;
+ }
+
+}
+
+#ifdef CONFIG_KDB
+void smp_kdb_stop (int all, struct pt_regs* regs)
+{
+ if (all)
+ {
+ printk ("Sending IPI to all on CPU %i\n", smp_processor_id ());
+ smp_kdb_wait = 0xffffffff;
+ clear_bit (smp_processor_id(), &smp_kdb_wait);
+ send_IPI_allbutself (IPI_KDB_INTERRUPT);
+ }
+ else
+ {
+ printk ("Sending IPI to self on CPU %i\n",
+ smp_processor_id ());
+ set_bit (smp_processor_id(), &smp_kdb_wait);
+ clear_bit (__cpu_logical_map[kdb_new_cpu], &smp_kdb_wait);
+ smp_kdb_interrupt (regs);
+ }
+}
+
+void smp_kdb_interrupt (struct pt_regs* regs)
+{
+ printk ("kdb: IPI on CPU %i with mask 0x%08x\n",
+ smp_processor_id (), smp_kdb_wait);
+
+ /* All CPUs spin here forever */
+ while (test_bit (smp_processor_id(), &smp_kdb_wait));
+
+ /* Enter KDB on CPU selected by KDB on the last CPU */
+ if (__cpu_logical_map[kdb_new_cpu] == smp_processor_id ())
+ {
+ kdb (KDB_REASON_SWITCH, 0, regs);
+ }
+}
+
+#endif
+
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
new file mode 100644
index 000000000..18a498a09
--- /dev/null
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -0,0 +1,216 @@
+/*
+ * This file contains various system calls that have different calling
+ * conventions on different platforms.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/file.h> /* doh, must come after sched.h... */
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+
+asmlinkage long
+ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6,
+ long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ extern long sys_getpriority (int, int);
+ long prio;
+
+ prio = sys_getpriority(which, who);
+ if (prio >= 0) {
+ regs->r8 = 0; /* ensure negative priority is not mistaken as error code */
+ prio = 20 - prio;
+ }
+ return prio;
+}
+
+asmlinkage unsigned long
+sys_getpagesize (void)
+{
+ return PAGE_SIZE;
+}
+
+asmlinkage unsigned long
+ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg5, long arg6,
+ long arg7, long stack)
+{
+ extern int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr);
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ unsigned long raddr;
+ int retval;
+
+ retval = sys_shmat(shmid, shmaddr, shmflg, &raddr);
+ if (retval < 0)
+ return retval;
+
+ regs->r8 = 0; /* ensure negative addresses are not mistaken as an error code */
+ return raddr;
+}
+
+asmlinkage unsigned long
+ia64_brk (long brk, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7, long stack)
+{
+ extern unsigned long sys_brk (unsigned long brk);
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ unsigned long retval;
+
+ retval = sys_brk(brk);
+
+ regs->r8 = 0; /* ensure large retval isn't mistaken as error code */
+ return retval;
+}
+
+/*
+ * On IA-64, we return the two file descriptors in ret0 and ret1 (r8
+ * and r9) as this is faster than doing a copy_to_user().
+ */
+asmlinkage long
+sys_pipe (long arg0, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ int fd[2];
+ int retval;
+
+ lock_kernel();
+ retval = do_pipe(fd);
+ if (retval)
+ goto out;
+ retval = fd[0];
+ regs->r9 = fd[1];
+ out:
+ unlock_kernel();
+ return retval;
+}
+
+static inline unsigned long
+do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
+{
+ struct file *file = 0;
+
+ /*
+ * A zero mmap always succeeds in Linux, independent of
+ * whether or not the remaining arguments are valid.
+ */
+ if (PAGE_ALIGN(len) == 0)
+ return addr;
+
+#ifdef notyet
+ /* Don't permit mappings that would cross a region boundary: */
+ region_start = IA64_GET_REGION(addr);
+ region_end = IA64_GET_REGION(addr + len);
+ if (region_start != region_end)
+ return -EINVAL;
+
+ <<x??x>>
+#endif
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+ if (!(flags & MAP_ANONYMOUS)) {
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+ }
+
+ down(&current->mm->mmap_sem);
+ lock_kernel();
+
+ addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+
+ unlock_kernel();
+ up(&current->mm->mmap_sem);
+
+ if (file)
+ fput(file);
+ return addr;
+}
+
+/*
+ * mmap2() is like mmap() except that the offset is expressed in units
+ * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces
+ * of) files that are larger than the address space of the CPU.
+ */
+asmlinkage unsigned long
+sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff,
+ long arg6, long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+
+ addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
+ if (!IS_ERR(addr))
+ regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */
+ return addr;
+}
+
+asmlinkage unsigned long
+sys_mmap (unsigned long addr, unsigned long len, int prot, int flags,
+ int fd, long off, long arg6, long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+
+ addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+ if (!IS_ERR(addr))
+ regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */
+ return addr;
+}
+
+asmlinkage long
+sys_ioperm (unsigned long from, unsigned long num, int on)
+{
+ printk(KERN_ERR "sys_ioperm(from=%lx, num=%lx, on=%d)\n", from, num, on);
+ return -EIO;
+}
+
+asmlinkage long
+sys_iopl (int level, long arg1, long arg2, long arg3)
+{
+ lock_kernel();
+ printk(KERN_ERR "sys_iopl(level=%d)!\n", level);
+ unlock_kernel();
+ return -ENOSYS;
+}
+
+asmlinkage long
+sys_vm86 (long arg0, long arg1, long arg2, long arg3)
+{
+ lock_kernel();
+ printk(KERN_ERR "sys_vm86(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3);
+ unlock_kernel();
+ return -ENOSYS;
+}
+
+asmlinkage long
+sys_modify_ldt (long arg0, long arg1, long arg2, long arg3)
+{
+ lock_kernel();
+ printk(KERN_ERR "sys_modify_ldt(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3);
+ unlock_kernel();
+ return -ENOSYS;
+}
+
+#ifndef CONFIG_PCI
+
+asmlinkage long
+sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+ void *buf)
+{
+ return -ENOSYS;
+}
+
+asmlinkage long
+sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+ void *buf)
+{
+ return -ENOSYS;
+}
+
+
+#endif /* CONFIG_PCI */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
new file mode 100644
index 000000000..7c5ace740
--- /dev/null
+++ b/arch/ia64/kernel/time.c
@@ -0,0 +1,290 @@
+/*
+ * linux/arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999-2000 David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+
+#include <asm/delay.h>
+#include <asm/efi.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+
+extern rwlock_t xtime_lock;
+extern volatile unsigned long lost_ticks;
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+
+unsigned long last_cli_ip;
+
+#endif
+
+static struct {
+ unsigned long delta;
+ unsigned long next[NR_CPUS];
+} itm;
+
+static void
+do_profile (unsigned long ip)
+{
+ extern char _stext;
+
+ if (prof_buffer && current->pid) {
+ ip -= (unsigned long) &_stext;
+ ip >>= prof_shift;
+ /*
+ * Don't ignore out-of-bounds IP values silently,
+ * put them into the last histogram slot, so if
+ * present, they will show up as a sharp peak.
+ */
+ if (ip > prof_len - 1)
+ ip = prof_len - 1;
+
+ atomic_inc((atomic_t *) &prof_buffer[ip]);
+ }
+}
+
+/*
+ * Return the number of micro-seconds that elapsed since the last
+ * update to jiffy. The xtime_lock must be at least read-locked when
+ * calling this routine.
+ */
+static inline unsigned long
+gettimeoffset (void)
+{
+ unsigned long now = ia64_get_itc();
+ unsigned long elapsed_cycles, lost;
+
+ elapsed_cycles = now - (itm.next[smp_processor_id()] - itm.delta);
+
+ lost = lost_ticks;
+ if (lost)
+ elapsed_cycles += lost*itm.delta;
+
+ return (elapsed_cycles*my_cpu_data.usec_per_cyc) >> IA64_USEC_PER_CYC_SHIFT;
+}
+
+void
+do_settimeofday (struct timeval *tv)
+{
+ write_lock_irq(&xtime_lock);
+ {
+ /*
+ * This is revolting. We need to set the xtime.tv_usec
+ * correctly. However, the value in this location is
+ * is value at the last tick. Discover what
+ * correction gettimeofday would have done, and then
+ * undo it!
+ */
+ tv->tv_usec -= gettimeoffset();
+ while (tv->tv_usec < 0) {
+ tv->tv_usec += 1000000;
+ tv->tv_sec--;
+ }
+
+ xtime = *tv;
+ time_adjust = 0; /* stop active adjtime() */
+ time_status |= STA_UNSYNC;
+ time_maxerror = NTP_PHASE_LIMIT;
+ time_esterror = NTP_PHASE_LIMIT;
+ }
+ write_unlock_irq(&xtime_lock);
+}
+
+void
+do_gettimeofday (struct timeval *tv)
+{
+ unsigned long flags, usec, sec;
+
+ read_lock_irqsave(&xtime_lock, flags);
+ {
+ usec = gettimeoffset();
+
+ sec = xtime.tv_sec;
+ usec += xtime.tv_usec;
+ }
+ read_unlock_irqrestore(&xtime_lock, flags);
+
+ while (usec >= 1000000) {
+ usec -= 1000000;
+ ++sec;
+ }
+
+ tv->tv_sec = sec;
+ tv->tv_usec = usec;
+}
+
+static void
+timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ static unsigned long last_time;
+ static unsigned char count;
+ int cpu = smp_processor_id();
+
+ /*
+ * Here we are in the timer irq handler. We have irqs locally
+ * disabled, but we don't know if the timer_bh is running on
+ * another CPU. We need to avoid to SMP race by acquiring the
+ * xtime_lock.
+ */
+ write_lock(&xtime_lock);
+ while (1) {
+ /* do kernel PC profiling here. */
+ if (!user_mode(regs))
+ do_profile(regs->cr_iip);
+
+#ifdef CONFIG_SMP
+ smp_do_timer(regs);
+ if (smp_processor_id() == bootstrap_processor)
+ do_timer(regs);
+#else
+ do_timer(regs);
+#endif
+
+ itm.next[cpu] += itm.delta;
+ /*
+ * There is a race condition here: to be on the "safe"
+ * side, we process timer ticks until itm.next is
+ * ahead of the itc by at least half the timer
+ * interval. This should give us enough time to set
+ * the new itm value without losing a timer tick.
+ */
+ if (time_after(itm.next[cpu], ia64_get_itc() + itm.delta/2)) {
+ ia64_set_itm(itm.next[cpu]);
+ break;
+ }
+
+#if !(defined(CONFIG_IA64_SOFTSDV_HACKS) && defined(CONFIG_SMP))
+ /*
+ * SoftSDV in SMP mode is _slow_, so we do "loose" ticks,
+ * but it's really OK...
+ */
+ if (count > 0 && jiffies - last_time > 5*HZ)
+ count = 0;
+ if (count++ == 0) {
+ last_time = jiffies;
+ printk("Lost clock tick on CPU %d (now=%lx, next=%lx)!!\n",
+ cpu, ia64_get_itc(), itm.next[cpu]);
+# ifdef CONFIG_IA64_DEBUG_IRQ
+ printk("last_cli_ip=%lx\n", last_cli_ip);
+# endif
+ }
+#endif
+ }
+ write_unlock(&xtime_lock);
+}
+
+/*
+ * Encapsulate access to the itm structure for SMP.
+ */
+void __init
+ia64_cpu_local_tick(void)
+{
+ /* arrange for the cycle counter to generate a timer interrupt: */
+ ia64_set_itv(TIMER_IRQ, 0);
+ ia64_set_itc(0);
+ itm.next[smp_processor_id()] = ia64_get_itc() + itm.delta;
+ ia64_set_itm(itm.next[smp_processor_id()]);
+}
+
+void __init
+ia64_init_itm (void)
+{
+ unsigned long platform_base_freq, itc_freq, drift;
+ struct pal_freq_ratio itc_ratio, proc_ratio;
+ long status;
+
+ /*
+ * According to SAL v2.6, we need to use a SAL call to determine the
+ * platform base frequency and then a PAL call to determine the
+ * frequency ratio between the ITC and the base frequency.
+ */
+ status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, &platform_base_freq, &drift);
+ if (status != 0) {
+ printk("SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
+ } else {
+ status = ia64_pal_freq_ratios(&proc_ratio, 0, &itc_ratio);
+ if (status != 0)
+ printk("PAL_FREQ_RATIOS failed with status=%ld\n", status);
+ }
+ if (status != 0) {
+ /* invent "random" values */
+ printk("SAL/PAL failed to obtain frequency info---inventing reasonably values\n");
+ platform_base_freq = 100000000;
+ itc_ratio.num = 3;
+ itc_ratio.den = 1;
+ }
+#if defined(CONFIG_IA64_LION_HACKS)
+ /* Our Lion currently returns base freq 104.857MHz, which
+ ain't right (it really is 100MHz). */
+ printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n",
+ platform_base_freq, itc_ratio.num, itc_ratio.den,
+ proc_ratio.num, proc_ratio.den);
+ platform_base_freq = 100000000;
+#elif 0 && defined(CONFIG_IA64_BIGSUR_HACKS)
+ /* BigSur with 991020 firmware returned itc-ratio=9/2 and base
+ freq 75MHz, which wasn't right. The 991119 firmware seems
+ to return the right values, so this isn't necessary
+ anymore... */
+ printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n",
+ platform_base_freq, itc_ratio.num, itc_ratio.den,
+ proc_ratio.num, proc_ratio.den);
+ platform_base_freq = 100000000;
+ proc_ratio.num = 5; proc_ratio.den = 1;
+ itc_ratio.num = 5; itc_ratio.den = 1;
+#elif defined(CONFIG_IA64_SOFTSDV_HACKS)
+ platform_base_freq = 10000000;
+ proc_ratio.num = 4; proc_ratio.den = 1;
+ itc_ratio.num = 4; itc_ratio.den = 1;
+#else
+ if (platform_base_freq < 40000000) {
+ printk("Platform base frequency %lu bogus---resetting to 75MHz!\n",
+ platform_base_freq);
+ platform_base_freq = 75000000;
+ }
+#endif
+ if (!proc_ratio.den)
+ proc_ratio.num = 1; /* avoid division by zero */
+ if (!itc_ratio.den)
+ itc_ratio.num = 1; /* avoid division by zero */
+
+ itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
+ itm.delta = itc_freq / HZ;
+ printk("timer: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, ITC freq=%lu.%03luMHz\n",
+ platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
+ itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
+
+ my_cpu_data.proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
+ my_cpu_data.itc_freq = itc_freq;
+ my_cpu_data.cyc_per_usec = itc_freq / 1000000;
+ my_cpu_data.usec_per_cyc = (1000000UL << IA64_USEC_PER_CYC_SHIFT) / itc_freq;
+
+ /* Setup the CPU local timer tick */
+ ia64_cpu_local_tick();
+}
+
+void __init
+time_init (void)
+{
+ /*
+ * Request the IRQ _before_ doing anything to cause that
+ * interrupt to be posted.
+ */
+ if (request_irq(TIMER_IRQ, timer_interrupt, 0, "timer", NULL))
+ panic("Could not allocate timer IRQ!");
+
+ efi_gettimeofday(&xtime);
+ ia64_init_itm();
+}
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
new file mode 100644
index 000000000..c242622ec
--- /dev/null
+++ b/arch/ia64/kernel/traps.c
@@ -0,0 +1,423 @@
+/*
+ * Architecture-specific trap handling.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * The fpu_fault() handler needs to be able to access and update all
+ * floating point registers. Those saved in pt_regs can be accessed
+ * through that structure, but those not saved, will be accessed
+ * directly. To make this work, we need to ensure that the compiler
+ * does not end up using a preserved floating point register on its
+ * own. The following achieves this by declaring preserved registers
+ * that are not marked as "fixed" as global register variables.
+ */
+register double f2 asm ("f2"); register double f3 asm ("f3");
+register double f4 asm ("f4"); register double f5 asm ("f5");
+
+register long f16 asm ("f16"); register long f17 asm ("f17");
+register long f18 asm ("f18"); register long f19 asm ("f19");
+register long f20 asm ("f20"); register long f21 asm ("f21");
+register long f22 asm ("f22"); register long f23 asm ("f23");
+
+register double f24 asm ("f24"); register double f25 asm ("f25");
+register double f26 asm ("f26"); register double f27 asm ("f27");
+register double f28 asm ("f28"); register double f29 asm ("f29");
+register double f30 asm ("f30"); register double f31 asm ("f31");
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_KDB
+# include <linux/kdb.h>
+#endif
+
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+#include <asm/fpswa.h>
+
+static fpswa_interface_t *fpswa_interface;
+
+void __init
+trap_init (void)
+{
+ printk("fpswa interface at %lx\n", ia64_boot_param.fpswa);
+ if (ia64_boot_param.fpswa) {
+#define OLD_FIRMWARE
+#ifdef OLD_FIRMWARE
+ /*
+ * HACK to work around broken firmware. This code
+ * applies the label fixup to the FPSWA interface and
+ * works both with old and new (fixed) firmware.
+ */
+ unsigned long addr = (unsigned long) __va(ia64_boot_param.fpswa);
+ unsigned long gp_val = *(unsigned long *)(addr + 8);
+
+ /* go indirect and indexed to get table address */
+ addr = gp_val;
+ gp_val = *(unsigned long *)(addr + 8);
+
+ while (gp_val == *(unsigned long *)(addr + 8)) {
+ *(unsigned long *)addr |= PAGE_OFFSET;
+ *(unsigned long *)(addr + 8) |= PAGE_OFFSET;
+ addr += 16;
+ }
+#endif
+ /* FPSWA fixup: make the interface pointer a kernel virtual address: */
+ fpswa_interface = __va(ia64_boot_param.fpswa);
+ }
+}
+
+void
+die_if_kernel (char *str, struct pt_regs *regs, long err)
+{
+ if (user_mode(regs)) {
+#if 1
+ /* XXX for debugging only */
+ printk ("!!die_if_kernel: %s(%d): %s %ld\n",
+ current->comm, current->pid, str, err);
+ show_regs(regs);
+#endif
+ return;
+ }
+
+ printk("%s[%d]: %s %ld\n", current->comm, current->pid, str, err);
+
+#ifdef CONFIG_KDB
+ while (1) {
+ kdb(KDB_REASON_PANIC, 0, regs);
+ printk("Cant go anywhere from Panic!\n");
+ }
+#endif
+
+ show_regs(regs);
+
+ if (current->thread.flags & IA64_KERNEL_DEATH) {
+ printk("die_if_kernel recursion detected.\n");
+ sti();
+ while (1);
+ }
+ current->thread.flags |= IA64_KERNEL_DEATH;
+ do_exit(SIGSEGV);
+}
+
+void
+ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
+{
+ siginfo_t siginfo;
+
+ /* gdb uses a break number of 0xccccc for debug breakpoints: */
+ if (break_num != 0xccccc)
+ die_if_kernel("Bad break", regs, break_num);
+
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_errno = break_num; /* XXX is it legal to abuse si_errno like this? */
+ siginfo.si_code = TRAP_BRKPT;
+ send_sig_info(SIGTRAP, &siginfo, current);
+}
+
+/*
+ * Unimplemented system calls. This is called only for stuff that
+ * we're supposed to implement but haven't done so yet. Everything
+ * else goes to sys_ni_syscall.
+ */
+asmlinkage long
+ia64_ni_syscall (unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5, unsigned long arg6, unsigned long arg7,
+ unsigned long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+
+ printk("<sc%ld(%lx,%lx,%lx,%lx)>\n", regs->r15, arg0, arg1, arg2, arg3);
+ return -ENOSYS;
+}
+
+/*
+ * disabled_fp_fault() is called when a user-level process attempts to
+ * access one of the registers f32..f127 while it doesn't own the
+ * fp-high register partition. When this happens, we save the current
+ * fph partition in the task_struct of the fpu-owner (if necessary)
+ * and then load the fp-high partition of the current task (if
+ * necessary).
+ */
+static inline void
+disabled_fph_fault (struct pt_regs *regs)
+{
+ struct task_struct *fpu_owner = ia64_get_fpu_owner();
+
+ regs->cr_ipsr &= ~(IA64_PSR_DFH | IA64_PSR_MFH);
+ if (fpu_owner != current) {
+ ia64_set_fpu_owner(current);
+
+ if (fpu_owner && ia64_psr(ia64_task_regs(fpu_owner))->mfh) {
+ fpu_owner->thread.flags |= IA64_THREAD_FPH_VALID;
+ __ia64_save_fpu(fpu_owner->thread.fph);
+ }
+ if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) {
+ __ia64_load_fpu(current->thread.fph);
+ } else {
+ __ia64_init_fpu();
+ }
+ }
+}
+
+static inline int
+fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs,
+ struct pt_regs *regs)
+{
+ fp_state_t fp_state;
+ fpswa_ret_t ret;
+#ifdef FPSWA_BUG
+ struct ia64_fpreg f6_15[10];
+#endif
+
+ if (!fpswa_interface)
+ return -1;
+
+ memset(&fp_state, 0, sizeof(fp_state_t));
+
+ /*
+ * compute fp_state. only FP registers f6 - f11 are used by the
+ * kernel, so set those bits in the mask and set the low volatile
+ * pointer to point to these registers.
+ */
+ fp_state.bitmask_low64 = 0xffc0; /* bit6..bit15 */
+#ifndef FPSWA_BUG
+ fp_state.fp_state_low_volatile = &regs->f6;
+#else
+ f6_15[0] = regs->f6;
+ f6_15[1] = regs->f7;
+ f6_15[2] = regs->f8;
+ f6_15[3] = regs->f9;
+ __asm__ ("stf.spill %0=f10" : "=m"(f6_15[4]));
+ __asm__ ("stf.spill %0=f11" : "=m"(f6_15[5]));
+ __asm__ ("stf.spill %0=f12" : "=m"(f6_15[6]));
+ __asm__ ("stf.spill %0=f13" : "=m"(f6_15[7]));
+ __asm__ ("stf.spill %0=f14" : "=m"(f6_15[8]));
+ __asm__ ("stf.spill %0=f15" : "=m"(f6_15[9]));
+ fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) f6_15;
+#endif
+ /*
+ * unsigned long (*EFI_FPSWA) (
+ * unsigned long trap_type,
+ * void *Bundle,
+ * unsigned long *pipsr,
+ * unsigned long *pfsr,
+ * unsigned long *pisr,
+ * unsigned long *ppreds,
+ * unsigned long *pifs,
+ * void *fp_state);
+ */
+ ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle,
+ (unsigned long *) ipsr, (unsigned long *) fpsr,
+ (unsigned long *) isr, (unsigned long *) pr,
+ (unsigned long *) ifs, &fp_state);
+#ifdef FPSWA_BUG
+ __asm__ ("ldf.fill f10=%0" :: "m"(f6_15[4]));
+ __asm__ ("ldf.fill f11=%0" :: "m"(f6_15[5]));
+ __asm__ ("ldf.fill f12=%0" :: "m"(f6_15[6]));
+ __asm__ ("ldf.fill f13=%0" :: "m"(f6_15[7]));
+ __asm__ ("ldf.fill f14=%0" :: "m"(f6_15[8]));
+ __asm__ ("ldf.fill f15=%0" :: "m"(f6_15[9]));
+ regs->f6 = f6_15[0];
+ regs->f7 = f6_15[1];
+ regs->f8 = f6_15[2];
+ regs->f9 = f6_15[3];
+#endif
+ return ret.status;
+}
+
+/*
+ * Handle floating-point assist faults and traps.
+ */
+static int
+handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
+{
+ long exception, bundle[2];
+ unsigned long fault_ip;
+ static int fpu_swa_count = 0;
+ static unsigned long last_time;
+
+ fault_ip = regs->cr_iip;
+ if (!fp_fault && (ia64_psr(regs)->ri == 0))
+ fault_ip -= 16;
+ if (copy_from_user(bundle, (void *) fault_ip, sizeof(bundle)))
+ return -1;
+
+ if (fpu_swa_count > 5 && jiffies - last_time > 5*HZ)
+ fpu_swa_count = 0;
+ if (++fpu_swa_count < 5) {
+ last_time = jiffies;
+ printk("%s(%d): floating-point assist fault at ip %016lx\n",
+ current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri);
+ }
+
+ exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
+ &regs->cr_ifs, regs);
+ if (fp_fault) {
+ if (exception == 0) {
+ /* emulation was successful */
+ ia64_increment_ip(regs);
+ } else if (exception == -1) {
+ printk("handle_fpu_swa: fp_emulate() returned -1\n");
+ return -2;
+ } else {
+ /* is next instruction a trap? */
+ if (exception & 2) {
+ ia64_increment_ip(regs);
+ }
+ return -1;
+ }
+ } else {
+ if (exception == -1) {
+ printk("handle_fpu_swa: fp_emulate() returned -1\n");
+ return -2;
+ } else if (exception != 0) {
+ /* raise exception */
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void
+ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+ unsigned long iim, unsigned long itir, unsigned long arg5,
+ unsigned long arg6, unsigned long arg7, unsigned long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ unsigned long code, error = isr;
+ struct siginfo siginfo;
+ char buf[128];
+ int result;
+ static const char *reason[] = {
+ "IA-64 Illegal Operation fault",
+ "IA-64 Privileged Operation fault",
+ "IA-64 Privileged Register fault",
+ "IA-64 Reserved Register/Field fault",
+ "Disabled Instruction Set Transition fault",
+ "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault",
+ "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12",
+ "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
+ };
+
+#if 0
+ /* this is for minimal trust debugging; yeah this kind of stuff is useful at times... */
+
+ if (vector != 25) {
+ static unsigned long last_time;
+ static char count;
+ unsigned long n = vector;
+ char buf[32], *cp;
+
+ if (count > 5 && jiffies - last_time > 5*HZ)
+ count = 0;
+
+ if (count++ < 5) {
+ last_time = jiffies;
+ cp = buf + sizeof(buf);
+ *--cp = '\0';
+ while (n) {
+ *--cp = "0123456789abcdef"[n & 0xf];
+ n >>= 4;
+ }
+ printk("<0x%s>", cp);
+ }
+ }
+#endif
+
+ switch (vector) {
+ case 24: /* General Exception */
+ code = (isr >> 4) & 0xf;
+ sprintf(buf, "General Exception: %s%s", reason[code],
+ (code == 3) ? ((isr & (1UL << 37))
+ ? " (RSE access)" : " (data access)") : "");
+#ifndef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ if (code == 8) {
+# ifdef CONFIG_IA64_PRINT_HAZARDS
+ printk("%016lx:possible hazard, pr = %016lx\n", regs->cr_iip, regs->pr);
+# endif
+ return;
+ }
+#endif
+ break;
+
+ case 25: /* Disabled FP-Register */
+ if (isr & 2) {
+ disabled_fph_fault(regs);
+ return;
+ }
+ sprintf(buf, "Disabled FPL fault---not supposed to happen!");
+ break;
+
+ case 29: /* Debug */
+ case 35: /* Taken Branch Trap */
+ case 36: /* Single Step Trap */
+ switch (vector) {
+ case 29: siginfo.si_code = TRAP_BRKPT; break;
+ case 35: siginfo.si_code = TRAP_BRANCH; break;
+ case 36: siginfo.si_code = TRAP_TRACE; break;
+ }
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_errno = 0;
+ force_sig_info(SIGTRAP, &siginfo, current);
+ return;
+
+ case 30: /* Unaligned fault */
+ sprintf(buf, "Unaligned access in kernel mode---don't do this!");
+ break;
+
+ case 32: /* fp fault */
+ case 33: /* fp trap */
+ result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
+ if (result < 0) {
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_errno = 0;
+ siginfo.si_code = 0; /* XXX fix me */
+ siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
+ send_sig_info(SIGFPE, &siginfo, current);
+ if (result == -1)
+ send_sig_info(SIGFPE, &siginfo, current);
+ else
+ force_sig(SIGFPE, current);
+ }
+ return;
+
+ case 34: /* Unimplemented Instruction Address Trap */
+ if (user_mode(regs)) {
+ printk("Woah! Unimplemented Instruction Address Trap!\n");
+ siginfo.si_code = ILL_BADIADDR;
+ siginfo.si_signo = SIGILL;
+ siginfo.si_errno = 0;
+ force_sig_info(SIGILL, &siginfo, current);
+ return;
+ }
+ sprintf(buf, "Unimplemented Instruction Address fault");
+ break;
+
+ case 45:
+ printk("Unexpected IA-32 exception\n");
+ force_sig(SIGSEGV, current);
+ return;
+
+ case 46:
+ printk("Unexpected IA-32 intercept trap\n");
+ force_sig(SIGSEGV, current);
+ return;
+
+ case 47:
+ sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
+ break;
+
+ default:
+ sprintf(buf, "Fault %lu", vector);
+ break;
+ }
+ die_if_kernel(buf, regs, error);
+ force_sig(SIGILL, current);
+}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
new file mode 100644
index 000000000..0bd213f6b
--- /dev/null
+++ b/arch/ia64/kernel/unaligned.c
@@ -0,0 +1,1554 @@
+/*
+ * Architecture-specific unaligned trap handling.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/processor.h>
+#include <asm/unaligned.h>
+
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
+
+#undef DEBUG_UNALIGNED_TRAP
+
+#ifdef DEBUG_UNALIGNED_TRAP
+#define DPRINT(a) { printk("%s, line %d: ", __FUNCTION__, __LINE__); printk a;}
+#else
+#define DPRINT(a)
+#endif
+
+#define IA64_FIRST_STACKED_GR 32
+#define IA64_FIRST_ROTATING_FR 32
+#define SIGN_EXT9 __IA64_UL(0xffffffffffffff00)
+
+/*
+ * For M-unit:
+ *
+ * opcode | m | x6 |
+ * --------|------|---------|
+ * [40-37] | [36] | [35:30] |
+ * --------|------|---------|
+ * 4 | 1 | 6 | = 11 bits
+ * --------------------------
+ * However bits [31:30] are not directly useful to distinguish between
+ * load/store so we can use [35:32] instead, which gives the following
+ * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
+ * checking the m-bit until later in the load/store emulation.
+ */
+#define IA64_OPCODE_MASK 0x1ef00000000
+
+/*
+ * Table C-28 Integer Load/Store
+ *
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
+ * the address (bits [8:3]), so we must failed.
+ */
+#define LD_OP 0x08000000000
+#define LDS_OP 0x08100000000
+#define LDA_OP 0x08200000000
+#define LDSA_OP 0x08300000000
+#define LDBIAS_OP 0x08400000000
+#define LDACQ_OP 0x08500000000
+/* 0x086, 0x087 are not relevant */
+#define LDCCLR_OP 0x08800000000
+#define LDCNC_OP 0x08900000000
+#define LDCCLRACQ_OP 0x08a00000000
+#define ST_OP 0x08c00000000
+#define STREL_OP 0x08d00000000
+/* 0x08e,0x8f are not relevant */
+
+/*
+ * Table C-29 Integer Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-30 Integer Load/Store +Imm
+ *
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill must be aligned because the Nat register are based on
+ * the address, so we must fail and the program must be fixed.
+ */
+#define LD_IMM_OP 0x0a000000000
+#define LDS_IMM_OP 0x0a100000000
+#define LDA_IMM_OP 0x0a200000000
+#define LDSA_IMM_OP 0x0a300000000
+#define LDBIAS_IMM_OP 0x0a400000000
+#define LDACQ_IMM_OP 0x0a500000000
+/* 0x0a6, 0xa7 are not relevant */
+#define LDCCLR_IMM_OP 0x0a800000000
+#define LDCNC_IMM_OP 0x0a900000000
+#define LDCCLRACQ_IMM_OP 0x0aa00000000
+#define ST_IMM_OP 0x0ac00000000
+#define STREL_IMM_OP 0x0ad00000000
+/* 0x0ae,0xaf are not relevant */
+
+/*
+ * Table C-32 Floating-point Load/Store
+ */
+#define LDF_OP 0x0c000000000
+#define LDFS_OP 0x0c100000000
+#define LDFA_OP 0x0c200000000
+#define LDFSA_OP 0x0c300000000
+/* 0x0c6 is irrelevant */
+#define LDFCCLR_OP 0x0c800000000
+#define LDFCNC_OP 0x0c900000000
+/* 0x0cb is irrelevant */
+#define STF_OP 0x0cc00000000
+
+/*
+ * Table C-33 Floating-point Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-34 Floating-point Load/Store +Imm
+ */
+#define LDF_IMM_OP 0x0e000000000
+#define LDFS_IMM_OP 0x0e100000000
+#define LDFA_IMM_OP 0x0e200000000
+#define LDFSA_IMM_OP 0x0e300000000
+/* 0x0e6 is irrelevant */
+#define LDFCCLR_IMM_OP 0x0e800000000
+#define LDFCNC_IMM_OP 0x0e900000000
+#define STF_IMM_OP 0x0ec00000000
+
+typedef struct {
+ unsigned long qp:6; /* [0:5] */
+ unsigned long r1:7; /* [6:12] */
+ unsigned long imm:7; /* [13:19] */
+ unsigned long r3:7; /* [20:26] */
+ unsigned long x:1; /* [27:27] */
+ unsigned long hint:2; /* [28:29] */
+ unsigned long x6_sz:2; /* [30:31] */
+ unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
+ unsigned long m:1; /* [36:36] */
+ unsigned long op:4; /* [37:40] */
+ unsigned long pad:23; /* [41:63] */
+} load_store_t;
+
+
+typedef enum {
+ UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
+ UPD_REG /* ldXZ r1=[r3],r2 */
+} update_t;
+
+/*
+ * We use tables to keep track of the offsets of registers in the saved state.
+ * This way we save having big switch/case statements.
+ *
+ * We use bit 0 to indicate switch_stack or pt_regs.
+ * The offset is simply shifted by 1 bit.
+ * A 2-byte value should be enough to hold any kind of offset
+ *
+ * In case the calling convention changes (and thus pt_regs/switch_stack)
+ * simply use RSW instead of RPT or vice-versa.
+ */
+
+#define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
+#define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
+
+#define RPT(x) (RPO(x) << 1)
+#define RSW(x) (1| RSO(x)<<1)
+
+#define GR_OFFS(x) (gr_info[x]>>1)
+#define GR_IN_SW(x) (gr_info[x] & 0x1)
+
+#define FR_OFFS(x) (fr_info[x]>>1)
+#define FR_IN_SW(x) (fr_info[x] & 0x1)
+
+static u16 gr_info[32]={
+ 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
+
+ RPT(r1), RPT(r2), RPT(r3),
+
+ RSW(r4), RSW(r5), RSW(r6), RSW(r7),
+
+ RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+ RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+
+ RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+ RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+ RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+ RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+static u16 fr_info[32]={
+ 0, /* constant : WE SHOULD NEVER GET THIS */
+ 0, /* constant : WE SHOULD NEVER GET THIS */
+
+ RSW(f2), RSW(f3), RSW(f4), RSW(f5),
+
+ RPT(f6), RPT(f7), RPT(f8), RPT(f9),
+
+ RSW(f10), RSW(f11), RSW(f12), RSW(f13), RSW(f14),
+ RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
+ RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
+ RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
+ RSW(f30), RSW(f31)
+};
+
+/* Invalidate ALAT entry for integer register REGNO. */
+static void
+invala_gr (int regno)
+{
+# define F(reg) case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break
+
+ switch (regno) {
+ F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
+ F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+ F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+ F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+ F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+ F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+ F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+ F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+ F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+ F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+ F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+ F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+ F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+ F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+ F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+ F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+ }
+# undef F
+}
+
+/* Invalidate ALAT entry for floating-point register REGNO. */
+static void
+invala_fr (int regno)
+{
+# define F(reg) case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break
+
+ switch (regno) {
+ F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
+ F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+ F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+ F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+ F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+ F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+ F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+ F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+ F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+ F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+ F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+ F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+ F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+ F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+ F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+ F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+ }
+# undef F
+}
+
+static void
+set_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs - 1;
+ unsigned long *kbs = ((unsigned long *)current) + IA64_RBS_OFFSET/8;
+ unsigned long on_kbs;
+ unsigned long *bsp, *bspstore, *addr, *ubs_end, *slot;
+ unsigned long rnats;
+ long nlocals;
+
+ /*
+ * cr_ifs=[rv:ifm], ifm=[....:sof(6)]
+ * nlocal=number of locals (in+loc) register of the faulting function
+ */
+ nlocals = (regs->cr_ifs) & 0x7f;
+
+ DPRINT(("sw.bsptore=%lx pt.bspstore=%lx\n", sw->ar_bspstore, regs->ar_bspstore));
+ DPRINT(("cr.ifs=%lx sof=%ld sol=%ld\n",
+ regs->cr_ifs, regs->cr_ifs &0x7f, (regs->cr_ifs>>7)&0x7f));
+
+ on_kbs = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore);
+ bspstore = (unsigned long *)regs->ar_bspstore;
+
+ DPRINT(("rse_slot_num=0x%lx\n",ia64_rse_slot_num((unsigned long *)sw->ar_bspstore)));
+ DPRINT(("kbs=%p nlocals=%ld\n", kbs, nlocals));
+ DPRINT(("bspstore next rnat slot %p\n",
+ ia64_rse_rnat_addr((unsigned long *)sw->ar_bspstore)));
+ DPRINT(("on_kbs=%ld rnats=%ld\n",
+ on_kbs, ((sw->ar_bspstore-(unsigned long)kbs)>>3) - on_kbs));
+
+ /*
+ * See get_rse_reg() for an explanation on the following instructions
+ */
+ ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+ bsp = ia64_rse_skip_regs(ubs_end, -nlocals);
+ addr = slot = ia64_rse_skip_regs(bsp, r1 - 32);
+
+ DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n",
+ ubs_end, bsp, addr, ia64_rse_slot_num(addr)));
+
+ ia64_poke(regs, current, (unsigned long)addr, val);
+
+ /*
+ * addr will now contain the address of the RNAT for the register
+ */
+ addr = ia64_rse_rnat_addr(addr);
+
+ ia64_peek(regs, current, (unsigned long)addr, &rnats);
+ DPRINT(("rnat @%p = 0x%lx nat=%d rnatval=%lx\n",
+ addr, rnats, nat, rnats &ia64_rse_slot_num(slot)));
+
+ if ( nat ) {
+ rnats |= __IA64_UL(1) << ia64_rse_slot_num(slot);
+ } else {
+ rnats &= ~(__IA64_UL(1) << ia64_rse_slot_num(slot));
+ }
+ ia64_poke(regs, current, (unsigned long)addr, rnats);
+
+ DPRINT(("rnat changed to @%p = 0x%lx\n", addr, rnats));
+}
+
+
+static void
+get_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs - 1;
+ unsigned long *kbs = (unsigned long *)current + IA64_RBS_OFFSET/8;
+ unsigned long on_kbs;
+ long nlocals;
+ unsigned long *bsp, *addr, *ubs_end, *slot, *bspstore;
+ unsigned long rnats;
+
+ /*
+ * cr_ifs=[rv:ifm], ifm=[....:sof(6)]
+ * nlocals=number of local registers in the faulting function
+ */
+ nlocals = (regs->cr_ifs) & 0x7f;
+
+ /*
+ * save_switch_stack does a flushrs and saves bspstore.
+ * on_kbs = actual number of registers saved on kernel backing store
+ * (taking into accound potential RNATs)
+ *
+ * Note that this number can be greater than nlocals if the dirty
+ * parititions included more than one stack frame at the time we
+ * switched to KBS
+ */
+ on_kbs = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore);
+ bspstore = (unsigned long *)regs->ar_bspstore;
+
+ /*
+ * To simplify the logic, we calculate everything as if there was only
+ * one backing store i.e., the user one (UBS). We let it to peek/poke
+ * to figure out whether the register we're looking for really is
+ * on the UBS or on KBS.
+ *
+ * regs->ar_bsptore = address of last register saved on UBS (before switch)
+ *
+ * ubs_end = virtual end of the UBS (if everything had been spilled there)
+ *
+ * We know that ubs_end is the point where the last register on the
+ * stack frame we're interested in as been saved. So we need to walk
+ * our way backward to figure out what the BSP "was" for that frame,
+ * this will give us the location of r32.
+ *
+ * bsp = "virtual UBS" address of r32 for our frame
+ *
+ * Finally, get compute the address of the register we're looking for
+ * using bsp as our base (move up again).
+ *
+ * Please note that in our case, we know that the register is necessarily
+ * on the KBS because we are only interested in the current frame at the moment
+ * we got the exception i.e., bsp is not changed until we switch to KBS.
+ */
+ ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+ bsp = ia64_rse_skip_regs(ubs_end, -nlocals);
+ addr = slot = ia64_rse_skip_regs(bsp, r1 - 32);
+
+ DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n",
+ ubs_end, bsp, addr, ia64_rse_slot_num(addr)));
+
+ ia64_peek(regs, current, (unsigned long)addr, val);
+
+ /*
+ * addr will now contain the address of the RNAT for the register
+ */
+ addr = ia64_rse_rnat_addr(addr);
+
+ ia64_peek(regs, current, (unsigned long)addr, &rnats);
+ DPRINT(("rnat @%p = 0x%lx\n", addr, rnats));
+
+ if ( nat ) *nat = rnats >> ia64_rse_slot_num(slot) & 0x1;
+}
+
+
+static void
+setreg(unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs -1;
+ unsigned long addr;
+ unsigned long bitmask;
+ unsigned long *unat;
+
+
+ /*
+ * First takes care of stacked registers
+ */
+ if ( regnum >= IA64_FIRST_STACKED_GR ) {
+ set_rse_reg(regs, regnum, val, nat);
+ return;
+ }
+
+ /*
+ * Using r0 as a target raises a General Exception fault which has
+ * higher priority than the Unaligned Reference fault.
+ */
+
+ /*
+ * Now look at registers in [0-31] range and init correct UNAT
+ */
+ if ( GR_IN_SW(regnum) ) {
+ addr = (unsigned long)sw;
+ unat = &sw->ar_unat;
+ } else {
+ addr = (unsigned long)regs;
+ unat = &sw->caller_unat;
+ }
+ DPRINT(("tmp_base=%lx switch_stack=%s offset=%d\n",
+ addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)));
+ /*
+ * add offset from base of struct
+ * and do it !
+ */
+ addr += GR_OFFS(regnum);
+
+ *(unsigned long *)addr = val;
+
+ /*
+ * We need to clear the corresponding UNAT bit to fully emulate the load
+ * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+ */
+ bitmask = __IA64_UL(1) << (addr >> 3 & 0x3f);
+ DPRINT(("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, unat, *unat));
+ if ( nat ) {
+ *unat |= bitmask;
+ } else {
+ *unat &= ~bitmask;
+ }
+ DPRINT(("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, unat,*unat));
+}
+
+#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
+
+static void
+setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs - 1;
+ unsigned long addr;
+
+ /*
+ * From EAS-2.5: FPDisableFault has higher priority than
+ * Unaligned Fault. Thus, when we get here, we know the partition is
+ * enabled.
+ *
+ * The registers [32-127] are ususally saved in the tss. When get here,
+ * they are NECESSARY live because they are only saved explicitely.
+ * We have 3 ways of updating the values: force a save of the range
+ * in tss, use a gigantic switch/case statement or generate code on the
+ * fly to store to the right register.
+ * For now, we are using the (slow) save/restore way.
+ */
+ if ( regnum >= IA64_FIRST_ROTATING_FR ) {
+ /*
+ * force a save of [32-127] to tss
+ * we use the __() form to avoid fiddling with the dfh bit
+ */
+ __ia64_save_fpu(&current->thread.fph[0]);
+
+ current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
+
+ __ia64_load_fpu(&current->thread.fph[0]);
+
+ /*
+ * mark the high partition as being used now
+ *
+ * This is REQUIRED because the disabled_fph_fault() does
+ * not set it, it's relying on the faulting instruction to
+ * do it. In our case the faulty instruction never gets executed
+ * completely, so we need to toggle the bit.
+ */
+ regs->cr_ipsr |= IA64_PSR_MFH;
+ } else {
+ /*
+ * pt_regs or switch_stack ?
+ */
+ if ( FR_IN_SW(regnum) ) {
+ addr = (unsigned long)sw;
+ } else {
+ addr = (unsigned long)regs;
+ }
+
+ DPRINT(("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)));
+
+ addr += FR_OFFS(regnum);
+ *(struct ia64_fpreg *)addr = *fpval;
+
+ /*
+ * mark the low partition as being used now
+ *
+ * It is highly unlikely that this bit is not already set, but
+ * let's do it for safety.
+ */
+ regs->cr_ipsr |= IA64_PSR_MFL;
+
+ }
+}
+
+/*
+ * Those 2 inline functions generate the spilled versions of the constant floating point
+ * registers which can be used with stfX
+ */
+static inline void
+float_spill_f0(struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");
+}
+
+static inline void
+float_spill_f1(struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");
+}
+
+static void
+getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs -1;
+ unsigned long addr;
+
+ /*
+ * From EAS-2.5: FPDisableFault has higher priority than
+ * Unaligned Fault. Thus, when we get here, we know the partition is
+ * enabled.
+ *
+ * When regnum > 31, the register is still live and
+ * we need to force a save to the tss to get access to it.
+ * See discussion in setfpreg() for reasons and other ways of doing this.
+ */
+ if ( regnum >= IA64_FIRST_ROTATING_FR ) {
+
+ /*
+ * force a save of [32-127] to tss
+ * we use the__ia64_save_fpu() form to avoid fiddling with
+ * the dfh bit.
+ */
+ __ia64_save_fpu(&current->thread.fph[0]);
+
+ *fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];
+ } else {
+ /*
+ * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
+ * not saved, we must generate their spilled form on the fly
+ */
+ switch(regnum) {
+ case 0:
+ float_spill_f0(fpval);
+ break;
+ case 1:
+ float_spill_f1(fpval);
+ break;
+ default:
+ /*
+ * pt_regs or switch_stack ?
+ */
+ addr = FR_IN_SW(regnum) ? (unsigned long)sw
+ : (unsigned long)regs;
+
+ DPRINT(("is_sw=%d tmp_base=%lx offset=0x%x\n",
+ FR_IN_SW(regnum), addr, FR_OFFS(regnum)));
+
+ addr += FR_OFFS(regnum);
+ *fpval = *(struct ia64_fpreg *)addr;
+ }
+ }
+}
+
+
+static void
+getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs -1;
+ unsigned long addr, *unat;
+
+ if ( regnum >= IA64_FIRST_STACKED_GR ) {
+ get_rse_reg(regs, regnum, val, nat);
+ return;
+ }
+
+ /*
+ * take care of r0 (read-only always evaluate to 0)
+ */
+ if ( regnum == 0 ) {
+ *val = 0;
+ *nat = 0;
+ return;
+ }
+
+ /*
+ * Now look at registers in [0-31] range and init correct UNAT
+ */
+ if ( GR_IN_SW(regnum) ) {
+ addr = (unsigned long)sw;
+ unat = &sw->ar_unat;
+ } else {
+ addr = (unsigned long)regs;
+ unat = &sw->caller_unat;
+ }
+
+ DPRINT(("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)));
+
+ addr += GR_OFFS(regnum);
+
+ *val = *(unsigned long *)addr;
+
+ /*
+ * do it only when requested
+ */
+ if ( nat ) *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
+}
+
+static void
+emulate_load_updates(update_t type, load_store_t *ld, struct pt_regs *regs, unsigned long ifa)
+{
+ /*
+ * IMPORTANT:
+ * Given the way we handle unaligned speculative loads, we should
+ * not get to this point in the code but we keep this sanity check,
+ * just in case.
+ */
+ if ( ld->x6_op == 1 || ld->x6_op == 3 ) {
+ printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n");
+ die_if_kernel("unaligned reference on specualtive load with register update\n",
+ regs, 30);
+ }
+
+
+ /*
+ * at this point, we know that the base register to update is valid i.e.,
+ * it's not r0
+ */
+ if ( type == UPD_IMMEDIATE ) {
+ unsigned long imm;
+
+ /*
+ * Load +Imm: ldXZ r1=[r3],imm(9)
+ *
+ *
+ * form imm9: [13:19] contain the first 7 bits
+ */
+ imm = ld->x << 7 | ld->imm;
+
+ /*
+ * sign extend (1+8bits) if m set
+ */
+ if (ld->m) imm |= SIGN_EXT9;
+
+ /*
+ * ifa == r3 and we know that the NaT bit on r3 was clear so
+ * we can directly use ifa.
+ */
+ ifa += imm;
+
+ setreg(ld->r3, ifa, 0, regs);
+
+ DPRINT(("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld->x, ld->m, imm, ifa));
+
+ } else if ( ld->m ) {
+ unsigned long r2;
+ int nat_r2;
+
+ /*
+ * Load +Reg Opcode: ldXZ r1=[r3],r2
+ *
+ * Note: that we update r3 even in the case of ldfX.a
+ * (where the load does not happen)
+ *
+ * The way the load algorithm works, we know that r3 does not
+ * have its NaT bit set (would have gotten NaT consumption
+ * before getting the unaligned fault). So we can use ifa
+ * which equals r3 at this point.
+ *
+ * IMPORTANT:
+ * The above statement holds ONLY because we know that we
+ * never reach this code when trying to do a ldX.s.
+ * If we ever make it to here on an ldfX.s then
+ */
+ getreg(ld->imm, &r2, &nat_r2, regs);
+
+ ifa += r2;
+
+ /*
+ * propagate Nat r2 -> r3
+ */
+ setreg(ld->r3, ifa, nat_r2, regs);
+
+ DPRINT(("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld->imm, r2, ifa, nat_r2));
+ }
+}
+
+
+static int
+emulate_load_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ unsigned long val;
+ unsigned int len = 1<< ld->x6_sz;
+
+ /*
+ * the macro supposes sequential access (which is the case)
+ * if the first byte is an invalid address we return here. Otherwise
+ * there is a guard page at the top of the user's address page and
+ * the first access would generate a NaT consumption fault and return
+ * with a SIGSEGV, which is what we want.
+ *
+ * Note: the first argument is ignored
+ */
+ if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) {
+ DPRINT(("verify area failed on %lx\n", ifa));
+ return -1;
+ }
+
+ /*
+ * r0, as target, doesn't need to be checked because Illegal Instruction
+ * faults have higher priority than unaligned faults.
+ *
+ * r0 cannot be found as the base as it would never generate an
+ * unaligned reference.
+ */
+
+ /*
+ * ldX.a we don't try to emulate anything but we must
+ * invalidate the ALAT entry.
+ * See comment below for explanation on how we handle ldX.a
+ */
+ if ( ld->x6_op != 0x2 ) {
+ /*
+ * we rely on the macros in unaligned.h for now i.e.,
+ * we let the compiler figure out how to read memory gracefully.
+ *
+ * We need this switch/case because the way the inline function
+ * works. The code is optimized by the compiler and looks like
+ * a single switch/case.
+ */
+ switch(len) {
+ case 2:
+ val = ia64_get_unaligned((void *)ifa, 2);
+ break;
+ case 4:
+ val = ia64_get_unaligned((void *)ifa, 4);
+ break;
+ case 8:
+ val = ia64_get_unaligned((void *)ifa, 8);
+ break;
+ default:
+ DPRINT(("unknown size: x6=%d\n", ld->x6_sz));
+ return -1;
+ }
+
+ setreg(ld->r1, val, 0, regs);
+ }
+
+ /*
+ * check for updates on any kind of loads
+ */
+ if ( ld->op == 0x5 || ld->m )
+ emulate_load_updates(ld->op == 0x5 ? UPD_IMMEDIATE: UPD_REG,
+ ld, regs, ifa);
+
+ /*
+ * handling of various loads (based on EAS2.4):
+ *
+ * ldX.acq (ordered load):
+ * - acquire semantics would have been used, so force fence instead.
+ *
+ *
+ * ldX.c.clr (check load and clear):
+ * - if we get to this handler, it's because the entry was not in the ALAT.
+ * Therefore the operation reverts to a normal load
+ *
+ * ldX.c.nc (check load no clear):
+ * - same as previous one
+ *
+ * ldX.c.clr.acq (ordered check load and clear):
+ * - same as above for c.clr part. The load needs to have acquire semantics. So
+ * we use the fence semantics which is stronger and thus ensures correctness.
+ *
+ * ldX.a (advanced load):
+ * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
+ * address doesn't match requested size alignement. This means that we would
+ * possibly need more than one load to get the result.
+ *
+ * The load part can be handled just like a normal load, however the difficult
+ * part is to get the right thing into the ALAT. The critical piece of information
+ * in the base address of the load & size. To do that, a ld.a must be executed,
+ * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
+ * if we use the same target register, we will be okay for the check.a instruction.
+ * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
+ * which would overlap within [r3,r3+X] (the size of the load was store in the
+ * ALAT). If such an entry is found the entry is invalidated. But this is not good
+ * enough, take the following example:
+ * r3=3
+ * ld4.a r1=[r3]
+ *
+ * Could be emulated by doing:
+ * ld1.a r1=[r3],1
+ * store to temporary;
+ * ld1.a r1=[r3],1
+ * store & shift to temporary;
+ * ld1.a r1=[r3],1
+ * store & shift to temporary;
+ * ld1.a r1=[r3]
+ * store & shift to temporary;
+ * r1=temporary
+ *
+ * So int this case, you would get the right value is r1 but the wrong info in
+ * the ALAT. Notice that you could do it in reverse to finish with address 3
+ * but you would still get the size wrong. To get the size right, one needs to
+ * execute exactly the same kind of load. You could do it from a aligned
+ * temporary location, but you would get the address wrong.
+ *
+ * So no matter what, it is not possible to emulate an advanced load
+ * correctly. But is that really critical ?
+ *
+ *
+ * Now one has to look at how ld.a is used, one must either do a ld.c.* or
+ * chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
+ * entry found in ALAT), and that's perfectly ok because:
+ *
+ * - ld.c.*, if the entry is not present a normal load is executed
+ * - chk.a.*, if the entry is not present, execution jumps to recovery code
+ *
+ * In either case, the load can be potentially retried in another form.
+ *
+ * So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT
+ * must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up
+ * a stale entry later) The register base update MUST also be performed.
+ *
+ * Now what is the content of the register and its NaT bit in the case we don't
+ * do the load ? EAS2.4, says (in case an actual load is needed)
+ *
+ * - r1 = [r3], Nat = 0 if succeeds
+ * - r1 = 0 Nat = 0 if trying to access non-speculative memory
+ *
+ * For us, there is nothing to do, because both ld.c.* and chk.a.* are going to
+ * retry and thus eventually reload the register thereby changing Nat and
+ * register content.
+ */
+
+ /*
+ * when the load has the .acq completer then
+ * use ordering fence.
+ */
+ if (ld->x6_op == 0x5 || ld->x6_op == 0xa)
+ mb();
+
+ /*
+ * invalidate ALAT entry in case of advanced load
+ */
+ if (ld->x6_op == 0x2)
+ invala_gr(ld->r1);
+
+ return 0;
+}
+
+static int
+emulate_store_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ unsigned long r2;
+ unsigned int len = 1<< ld->x6_sz;
+
+ /*
+ * the macro supposes sequential access (which is the case)
+ * if the first byte is an invalid address we return here. Otherwise
+ * there is a guard page at the top of the user's address page and
+ * the first access would generate a NaT consumption fault and return
+ * with a SIGSEGV, which is what we want.
+ *
+ * Note: the first argument is ignored
+ */
+ if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) {
+ DPRINT(("verify area failed on %lx\n",ifa));
+ return -1;
+ }
+
+ /*
+ * if we get to this handler, Nat bits on both r3 and r2 have already
+ * been checked. so we don't need to do it
+ *
+ * extract the value to be stored
+ */
+ getreg(ld->imm, &r2, 0, regs);
+
+ /*
+ * we rely on the macros in unaligned.h for now i.e.,
+ * we let the compiler figure out how to read memory gracefully.
+ *
+ * We need this switch/case because the way the inline function
+ * works. The code is optimized by the compiler and looks like
+ * a single switch/case.
+ */
+ DPRINT(("st%d [%lx]=%lx\n", len, ifa, r2));
+
+ switch(len) {
+ case 2:
+ ia64_put_unaligned(r2, (void *)ifa, 2);
+ break;
+ case 4:
+ ia64_put_unaligned(r2, (void *)ifa, 4);
+ break;
+ case 8:
+ ia64_put_unaligned(r2, (void *)ifa, 8);
+ break;
+ default:
+ DPRINT(("unknown size: x6=%d\n", ld->x6_sz));
+ return -1;
+ }
+ /*
+ * stX [r3]=r2,imm(9)
+ *
+ * NOTE:
+ * ld->r3 can never be r0, because r0 would not generate an
+ * unaligned access.
+ */
+ if ( ld->op == 0x5 ) {
+ unsigned long imm;
+
+ /*
+ * form imm9: [12:6] contain first 7bits
+ */
+ imm = ld->x << 7 | ld->r1;
+ /*
+ * sign extend (8bits) if m set
+ */
+ if ( ld->m ) imm |= SIGN_EXT9;
+ /*
+ * ifa == r3 (NaT is necessarily cleared)
+ */
+ ifa += imm;
+
+ DPRINT(("imm=%lx r3=%lx\n", imm, ifa));
+
+ setreg(ld->r3, ifa, 0, regs);
+ }
+ /*
+ * we don't have alat_invalidate_multiple() so we need
+ * to do the complete flush :-<<
+ */
+ ia64_invala();
+
+ /*
+ * stX.rel: use fence instead of release
+ */
+ if ( ld->x6_op == 0xd ) mb();
+
+ return 0;
+}
+
+/*
+ * floating point operations sizes in bytes
+ */
+static const unsigned short float_fsz[4]={
+ 16, /* extended precision (e) */
+ 8, /* integer (8) */
+ 4, /* single precision (s) */
+ 8 /* double precision (d) */
+};
+
+static inline void
+mem2float_extended(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+mem2float_integer(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+mem2float_single(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+mem2float_double(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+float2mem_extended(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+float2mem_integer(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+float2mem_single(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+float2mem_double(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static int
+emulate_load_floatpair(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ struct ia64_fpreg fpr_init[2];
+ struct ia64_fpreg fpr_final[2];
+ unsigned long len = float_fsz[ld->x6_sz];
+
+ if ( access_ok(VERIFY_READ, (void *)ifa, len<<1) < 0 ) {
+ DPRINT(("verify area failed on %lx\n", ifa));
+ return -1;
+ }
+ /*
+ * fr0 & fr1 don't need to be checked because Illegal Instruction
+ * faults have higher priority than unaligned faults.
+ *
+ * r0 cannot be found as the base as it would never generate an
+ * unaligned reference.
+ */
+
+ /*
+ * make sure we get clean buffers
+ */
+ memset(&fpr_init,0, sizeof(fpr_init));
+ memset(&fpr_final,0, sizeof(fpr_final));
+
+ /*
+ * ldfpX.a: we don't try to emulate anything but we must
+ * invalidate the ALAT entry and execute updates, if any.
+ */
+ if ( ld->x6_op != 0x2 ) {
+ /*
+ * does the unaligned access
+ */
+ memcpy(&fpr_init[0], (void *)ifa, len);
+ memcpy(&fpr_init[1], (void *)(ifa+len), len);
+
+ DPRINT(("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld->r1, ld->imm, ld->x6_sz));
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_init;
+ printk("fpr_init= ");
+ for(i=0; i < len<<1; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+ /*
+ * XXX fixme
+ * Could optimize inlines by using ldfpX & 2 spills
+ */
+ switch( ld->x6_sz ) {
+ case 0:
+ mem2float_extended(&fpr_init[0], &fpr_final[0]);
+ mem2float_extended(&fpr_init[1], &fpr_final[1]);
+ break;
+ case 1:
+ mem2float_integer(&fpr_init[0], &fpr_final[0]);
+ mem2float_integer(&fpr_init[1], &fpr_final[1]);
+ break;
+ case 2:
+ mem2float_single(&fpr_init[0], &fpr_final[0]);
+ mem2float_single(&fpr_init[1], &fpr_final[1]);
+ break;
+ case 3:
+ mem2float_double(&fpr_init[0], &fpr_final[0]);
+ mem2float_double(&fpr_init[1], &fpr_final[1]);
+ break;
+ }
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_final;
+ printk("fpr_final= ");
+ for(i=0; i < len<<1; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+ /*
+ * XXX fixme
+ *
+ * A possible optimization would be to drop fpr_final
+ * and directly use the storage from the saved context i.e.,
+ * the actual final destination (pt_regs, switch_stack or tss).
+ */
+ setfpreg(ld->r1, &fpr_final[0], regs);
+ setfpreg(ld->imm, &fpr_final[1], regs);
+ }
+
+ /*
+ * Check for updates: only immediate updates are available for this
+ * instruction.
+ */
+ if ( ld->m ) {
+
+ /*
+ * the immediate is implicit given the ldsz of the operation:
+ * single: 8 (2x4) and for all others it's 16 (2x8)
+ */
+ ifa += len<<1;
+
+ /*
+ * IMPORTANT:
+ * the fact that we force the NaT of r3 to zero is ONLY valid
+ * as long as we don't come here with a ldfpX.s.
+ * For this reason we keep this sanity check
+ */
+ if ( ld->x6_op == 1 || ld->x6_op == 3 ) {
+ printk(KERN_ERR "%s: register update on speculative load pair, error\n", __FUNCTION__);
+ }
+
+
+ setreg(ld->r3, ifa, 0, regs);
+ }
+
+ /*
+ * Invalidate ALAT entries, if any, for both registers.
+ */
+ if ( ld->x6_op == 0x2 ) {
+ invala_fr(ld->r1);
+ invala_fr(ld->imm);
+ }
+ return 0;
+}
+
+
+static int
+emulate_load_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ struct ia64_fpreg fpr_init;
+ struct ia64_fpreg fpr_final;
+ unsigned long len = float_fsz[ld->x6_sz];
+
+ /*
+ * check for load pair because our masking scheme is not fine grain enough
+ if ( ld->x == 1 ) return emulate_load_floatpair(ifa,ld,regs);
+ */
+
+ if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) {
+ DPRINT(("verify area failed on %lx\n", ifa));
+ return -1;
+ }
+ /*
+ * fr0 & fr1 don't need to be checked because Illegal Instruction
+ * faults have higher priority than unaligned faults.
+ *
+ * r0 cannot be found as the base as it would never generate an
+ * unaligned reference.
+ */
+
+
+ /*
+ * make sure we get clean buffers
+ */
+ memset(&fpr_init,0, sizeof(fpr_init));
+ memset(&fpr_final,0, sizeof(fpr_final));
+
+ /*
+ * ldfX.a we don't try to emulate anything but we must
+ * invalidate the ALAT entry.
+ * See comments in ldX for descriptions on how the various loads are handled.
+ */
+ if ( ld->x6_op != 0x2 ) {
+
+ /*
+ * does the unaligned access
+ */
+ memcpy(&fpr_init, (void *)ifa, len);
+
+ DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz));
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_init;
+ printk("fpr_init= ");
+ for(i=0; i < len; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+ /*
+ * we only do something for x6_op={0,8,9}
+ */
+ switch( ld->x6_sz ) {
+ case 0:
+ mem2float_extended(&fpr_init, &fpr_final);
+ break;
+ case 1:
+ mem2float_integer(&fpr_init, &fpr_final);
+ break;
+ case 2:
+ mem2float_single(&fpr_init, &fpr_final);
+ break;
+ case 3:
+ mem2float_double(&fpr_init, &fpr_final);
+ break;
+ }
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_final;
+ printk("fpr_final= ");
+ for(i=0; i < len; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+ /*
+ * XXX fixme
+ *
+ * A possible optimization would be to drop fpr_final
+ * and directly use the storage from the saved context i.e.,
+ * the actual final destination (pt_regs, switch_stack or tss).
+ */
+ setfpreg(ld->r1, &fpr_final, regs);
+ }
+
+ /*
+ * check for updates on any loads
+ */
+ if ( ld->op == 0x7 || ld->m )
+ emulate_load_updates(ld->op == 0x7 ? UPD_IMMEDIATE: UPD_REG,
+ ld, regs, ifa);
+
+
+ /*
+ * invalidate ALAT entry in case of advanced floating point loads
+ */
+ if (ld->x6_op == 0x2)
+ invala_fr(ld->r1);
+
+ return 0;
+}
+
+
+static int
+emulate_store_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ struct ia64_fpreg fpr_init;
+ struct ia64_fpreg fpr_final;
+ unsigned long len = float_fsz[ld->x6_sz];
+
+ /*
+ * the macro supposes sequential access (which is the case)
+ * if the first byte is an invalid address we return here. Otherwise
+ * there is a guard page at the top of the user's address page and
+ * the first access would generate a NaT consumption fault and return
+ * with a SIGSEGV, which is what we want.
+ *
+ * Note: the first argument is ignored
+ */
+ if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) {
+ DPRINT(("verify area failed on %lx\n",ifa));
+ return -1;
+ }
+
+ /*
+ * make sure we get clean buffers
+ */
+ memset(&fpr_init,0, sizeof(fpr_init));
+ memset(&fpr_final,0, sizeof(fpr_final));
+
+
+ /*
+ * if we get to this handler, Nat bits on both r3 and r2 have already
+ * been checked. so we don't need to do it
+ *
+ * extract the value to be stored
+ */
+ getfpreg(ld->imm, &fpr_init, regs);
+ /*
+ * during this step, we extract the spilled registers from the saved
+ * context i.e., we refill. Then we store (no spill) to temporary
+ * aligned location
+ */
+ switch( ld->x6_sz ) {
+ case 0:
+ float2mem_extended(&fpr_init, &fpr_final);
+ break;
+ case 1:
+ float2mem_integer(&fpr_init, &fpr_final);
+ break;
+ case 2:
+ float2mem_single(&fpr_init, &fpr_final);
+ break;
+ case 3:
+ float2mem_double(&fpr_init, &fpr_final);
+ break;
+ }
+ DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz));
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_init;
+ printk("fpr_init= ");
+ for(i=0; i < len; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+ { int i; char *c = (char *)&fpr_final;
+ printk("fpr_final= ");
+ for(i=0; i < len; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+
+ /*
+ * does the unaligned store
+ */
+ memcpy((void *)ifa, &fpr_final, len);
+
+ /*
+ * stfX [r3]=r2,imm(9)
+ *
+ * NOTE:
+ * ld->r3 can never be r0, because r0 would not generate an
+ * unaligned access.
+ */
+ if ( ld->op == 0x7 ) {
+ unsigned long imm;
+
+ /*
+ * form imm9: [12:6] contain first 7bits
+ */
+ imm = ld->x << 7 | ld->r1;
+ /*
+ * sign extend (8bits) if m set
+ */
+ if ( ld->m ) imm |= SIGN_EXT9;
+ /*
+ * ifa == r3 (NaT is necessarily cleared)
+ */
+ ifa += imm;
+
+ DPRINT(("imm=%lx r3=%lx\n", imm, ifa));
+
+ setreg(ld->r3, ifa, 0, regs);
+ }
+ /*
+ * we don't have alat_invalidate_multiple() so we need
+ * to do the complete flush :-<<
+ */
+ ia64_invala();
+
+ return 0;
+}
+
+void
+ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs)
+{
+ static unsigned long unalign_count;
+ static long last_time;
+
+ struct ia64_psr *ipsr = ia64_psr(regs);
+ unsigned long *bundle_addr;
+ unsigned long opcode;
+ unsigned long op;
+ load_store_t *insn;
+ int ret = -1;
+
+ /*
+ * We flag unaligned references while in kernel as
+ * errors: the kernel must be fixed. The switch code
+ * is in ivt.S at entry 30.
+ *
+ * So here we keep a simple sanity check.
+ */
+ if ( !user_mode(regs) ) {
+ die_if_kernel("Unaligned reference while in kernel\n", regs, 30);
+ /* NOT_REACHED */
+ }
+
+ /*
+ * Make sure we log the unaligned access, so that user/sysadmin can notice it
+ * and eventually fix the program.
+ *
+ * We don't want to do that for every access so we pace it with jiffies.
+ */
+ if ( unalign_count > 5 && jiffies - last_time > 5*HZ ) unalign_count = 0;
+ if ( ++unalign_count < 5 ) {
+ last_time = jiffies;
+ printk("%s(%d): unaligned trap accessing %016lx (ip=%016lx)\n",
+ current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri);
+
+ }
+
+ DPRINT(("iip=%lx ifa=%lx isr=%lx\n", regs->cr_iip, ifa, regs->cr_ipsr));
+ DPRINT(("ISR.ei=%d ISR.sp=%d\n", ipsr->ri, ipsr->it));
+
+ bundle_addr = (unsigned long *)(regs->cr_iip);
+
+ /*
+ * extract the instruction from the bundle given the slot number
+ */
+ switch ( ipsr->ri ) {
+ case 0: op = *bundle_addr >> 5;
+ break;
+
+ case 1: op = *bundle_addr >> 46 | (*(bundle_addr+1) & 0x7fffff)<<18;
+ break;
+
+ case 2: op = *(bundle_addr+1) >> 23;
+ break;
+ }
+
+ insn = (load_store_t *)&op;
+ opcode = op & IA64_OPCODE_MASK;
+
+ DPRINT(("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
+ "ld.x6=0x%x ld.m=%d ld.op=%d\n",
+ opcode,
+ insn->qp,
+ insn->r1,
+ insn->imm,
+ insn->r3,
+ insn->x,
+ insn->hint,
+ insn->x6_sz,
+ insn->m,
+ insn->op));
+
+ /*
+ * IMPORTANT:
+ * Notice that the swictch statement DOES not cover all possible instructions
+ * that DO generate unaligned references. This is made on purpose because for some
+ * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
+ * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
+ * the program will get a signal and die:
+ *
+ * load/store:
+ * - ldX.spill
+ * - stX.spill
+ * Reason: RNATs are based on addresses
+ *
+ * synchronization:
+ * - cmpxchg
+ * - fetchadd
+ * - xchg
+ * Reason: ATOMIC operations cannot be emulated properly using multiple
+ * instructions.
+ *
+ * speculative loads:
+ * - ldX.sZ
+ * Reason: side effects, code must be ready to deal with failure so simpler
+ * to let the load fail.
+ * ---------------------------------------------------------------------------------
+ * XXX fixme
+ *
+ * I would like to get rid of this switch case and do something
+ * more elegant.
+ */
+ switch(opcode) {
+ case LDS_OP:
+ case LDSA_OP:
+ case LDS_IMM_OP:
+ case LDSA_IMM_OP:
+ case LDFS_OP:
+ case LDFSA_OP:
+ case LDFS_IMM_OP:
+ /*
+ * The instruction will be retried with defered exceptions
+ * turned on, and we should get Nat bit installed
+ *
+ * IMPORTANT:
+ * When PSR_ED is set, the register & immediate update
+ * forms are actually executed even though the operation
+ * failed. So we don't need to take care of this.
+ */
+ DPRINT(("forcing PSR_ED\n"));
+ regs->cr_ipsr |= IA64_PSR_ED;
+ return;
+
+ case LD_OP:
+ case LDA_OP:
+ case LDBIAS_OP:
+ case LDACQ_OP:
+ case LDCCLR_OP:
+ case LDCNC_OP:
+ case LDCCLRACQ_OP:
+ case LD_IMM_OP:
+ case LDA_IMM_OP:
+ case LDBIAS_IMM_OP:
+ case LDACQ_IMM_OP:
+ case LDCCLR_IMM_OP:
+ case LDCNC_IMM_OP:
+ case LDCCLRACQ_IMM_OP:
+ ret = emulate_load_int(ifa, insn, regs);
+ break;
+ case ST_OP:
+ case STREL_OP:
+ case ST_IMM_OP:
+ case STREL_IMM_OP:
+ ret = emulate_store_int(ifa, insn, regs);
+ break;
+ case LDF_OP:
+ case LDFA_OP:
+ case LDFCCLR_OP:
+ case LDFCNC_OP:
+ case LDF_IMM_OP:
+ case LDFA_IMM_OP:
+ case LDFCCLR_IMM_OP:
+ case LDFCNC_IMM_OP:
+ ret = insn->x ?
+ emulate_load_floatpair(ifa, insn, regs):
+ emulate_load_float(ifa, insn, regs);
+ break;
+ case STF_OP:
+ case STF_IMM_OP:
+ ret = emulate_store_float(ifa, insn, regs);
+ }
+
+ DPRINT(("ret=%d\n", ret));
+ if ( ret ) {
+ lock_kernel();
+ force_sig(SIGSEGV, current);
+ unlock_kernel();
+ } else {
+ /*
+ * given today's architecture this case is not likely to happen
+ * because a memory access instruction (M) can never be in the
+ * last slot of a bundle. But let's keep it for now.
+ */
+ if ( ipsr->ri == 2 ) regs->cr_iip += 16;
+ ipsr->ri = ++ipsr->ri & 3;
+ }
+
+ DPRINT(("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip));
+}
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
new file mode 100644
index 000000000..c2b772e68
--- /dev/null
+++ b/arch/ia64/kernel/unwind.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/unwind.h>
+
+void
+ia64_unwind_init_from_blocked_task (struct ia64_frame_info *info, struct task_struct *t)
+{
+ struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
+ unsigned long sol, limit, top;
+
+ memset(info, 0, sizeof(*info));
+
+ sol = (sw->ar_pfs >> 7) & 0x7f; /* size of locals */
+
+ limit = (unsigned long) t + IA64_RBS_OFFSET;
+ top = sw->ar_bspstore;
+ if (top - (unsigned long) t >= IA64_STK_OFFSET)
+ top = limit;
+
+ info->regstk.limit = (unsigned long *) limit;
+ info->regstk.top = (unsigned long *) top;
+ info->bsp = ia64_rse_skip_regs(info->regstk.top, -sol);
+ info->top_rnat = sw->ar_rnat;
+ info->cfm = sw->ar_pfs;
+ info->ip = sw->b0;
+}
+
+void
+ia64_unwind_init_from_current (struct ia64_frame_info *info, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+ unsigned long sol, sof, *bsp, limit, top;
+
+ limit = (unsigned long) current + IA64_RBS_OFFSET;
+ top = sw->ar_bspstore;
+ if (top - (unsigned long) current >= IA64_STK_OFFSET)
+ top = limit;
+
+ memset(info, 0, sizeof(*info));
+
+ sol = (sw->ar_pfs >> 7) & 0x7f; /* size of frame */
+ info->regstk.limit = (unsigned long *) limit;
+ info->regstk.top = (unsigned long *) top;
+ info->top_rnat = sw->ar_rnat;
+
+ /* this gives us the bsp top level frame (kdb interrupt frame): */
+ bsp = ia64_rse_skip_regs((unsigned long *) top, -sol);
+
+ /* now skip past the interrupt frame: */
+ sof = regs->cr_ifs & 0x7f; /* size of frame */
+ info->cfm = regs->cr_ifs;
+ info->bsp = ia64_rse_skip_regs(bsp, -sof);
+ info->ip = regs->cr_iip;
+}
+
+static unsigned long
+read_reg (struct ia64_frame_info *info, int regnum, int *is_nat)
+{
+ unsigned long *addr, *rnat_addr, rnat;
+
+ addr = ia64_rse_skip_regs(info->bsp, regnum);
+ if (addr < info->regstk.limit || addr >= info->regstk.top || ((long) addr & 0x7) != 0) {
+ *is_nat = 1;
+ return 0xdeadbeefdeadbeef;
+ }
+ rnat_addr = ia64_rse_rnat_addr(addr);
+
+ if (rnat_addr >= info->regstk.top)
+ rnat = info->top_rnat;
+ else
+ rnat = *rnat_addr;
+ *is_nat = (rnat & (1UL << ia64_rse_slot_num(addr))) != 0;
+ return *addr;
+}
+
+/*
+ * On entry, info->regstk.top should point to the register backing
+ * store for r32.
+ */
+int
+ia64_unwind_to_previous_frame (struct ia64_frame_info *info)
+{
+ unsigned long sol, cfm = info->cfm;
+ int is_nat;
+
+ sol = (cfm >> 7) & 0x7f; /* size of locals */
+
+ /*
+ * In general, we would have to make use of unwind info to
+ * unwind an IA-64 stack, but for now gcc uses a special
+ * convention that makes this possible without full-fledged
+ * unwindo info. Specifically, we expect "rp" in the second
+ * last, and "ar.pfs" in the last local register, so the
+ * number of locals in a frame must be at least two. If it's
+ * less than that, we reached the end of the C call stack.
+ */
+ if (sol < 2)
+ return -1;
+
+ info->ip = read_reg(info, sol - 2, &is_nat);
+ if (is_nat)
+ return -1;
+
+ cfm = read_reg(info, sol - 1, &is_nat);
+ if (is_nat)
+ return -1;
+
+ sol = (cfm >> 7) & 0x7f;
+
+ info->cfm = cfm;
+ info->bsp = ia64_rse_skip_regs(info->bsp, -sol);
+ return 0;
+}