summaryrefslogtreecommitdiffstats
path: root/arch/ia64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64')
-rw-r--r--arch/ia64/Makefile125
-rw-r--r--arch/ia64/boot/Makefile33
-rw-r--r--arch/ia64/boot/bootloader.c234
-rw-r--r--arch/ia64/boot/bootloader.lds65
-rw-r--r--arch/ia64/config.in172
-rw-r--r--arch/ia64/defconfig146
-rw-r--r--arch/ia64/dig/Makefile24
-rw-r--r--arch/ia64/dig/iosapic.c553
-rw-r--r--arch/ia64/dig/machvec.c4
-rw-r--r--arch/ia64/dig/setup.c93
-rw-r--r--arch/ia64/hp/Makefile19
-rw-r--r--arch/ia64/hp/hpsim_console.c78
-rw-r--r--arch/ia64/hp/hpsim_irq.c83
-rw-r--r--arch/ia64/hp/hpsim_machvec.c4
-rw-r--r--arch/ia64/hp/hpsim_setup.c71
-rw-r--r--arch/ia64/hp/hpsim_ssc.h36
-rw-r--r--arch/ia64/ia32/Makefile17
-rw-r--r--arch/ia64/ia32/binfmt_elf32.c180
-rw-r--r--arch/ia64/ia32/ia32_entry.S255
-rw-r--r--arch/ia64/ia32/ia32_signal.c412
-rw-r--r--arch/ia64/ia32/ia32_support.c61
-rw-r--r--arch/ia64/ia32/sys_ia32.c4309
-rw-r--r--arch/ia64/kdb/Makefile21
-rw-r--r--arch/ia64/kdb/kdb_bt.c104
-rw-r--r--arch/ia64/kdb/kdb_io.c350
-rw-r--r--arch/ia64/kdb/kdb_traps.c55
-rw-r--r--arch/ia64/kdb/kdbsupport.c1310
-rw-r--r--arch/ia64/kdb/pc_keyb.h127
-rw-r--r--arch/ia64/kernel/Makefile42
-rw-r--r--arch/ia64/kernel/acpi.c308
-rw-r--r--arch/ia64/kernel/efi.c365
-rw-r--r--arch/ia64/kernel/efi_stub.S141
-rw-r--r--arch/ia64/kernel/entry.S1261
-rw-r--r--arch/ia64/kernel/entry.h8
-rw-r--r--arch/ia64/kernel/fw-emu.c444
-rw-r--r--arch/ia64/kernel/gate.S200
-rw-r--r--arch/ia64/kernel/head.S646
-rw-r--r--arch/ia64/kernel/init_task.c31
-rw-r--r--arch/ia64/kernel/irq.c657
-rw-r--r--arch/ia64/kernel/irq_default.c30
-rw-r--r--arch/ia64/kernel/irq_internal.c71
-rw-r--r--arch/ia64/kernel/irq_lock.c287
-rw-r--r--arch/ia64/kernel/ivt.S1342
-rw-r--r--arch/ia64/kernel/machvec.c48
-rw-r--r--arch/ia64/kernel/mca.c842
-rw-r--r--arch/ia64/kernel/mca_asm.S621
-rw-r--r--arch/ia64/kernel/pal.S119
-rw-r--r--arch/ia64/kernel/pci-dma.c56
-rw-r--r--arch/ia64/kernel/pci.c239
-rw-r--r--arch/ia64/kernel/perfmon.c227
-rw-r--r--arch/ia64/kernel/process.c421
-rw-r--r--arch/ia64/kernel/ptrace.c653
-rw-r--r--arch/ia64/kernel/sal.c157
-rw-r--r--arch/ia64/kernel/sal_stub.S116
-rw-r--r--arch/ia64/kernel/semaphore.c336
-rw-r--r--arch/ia64/kernel/setup.c326
-rw-r--r--arch/ia64/kernel/signal.c537
-rw-r--r--arch/ia64/kernel/smp.c777
-rw-r--r--arch/ia64/kernel/sys_ia64.c216
-rw-r--r--arch/ia64/kernel/time.c290
-rw-r--r--arch/ia64/kernel/traps.c423
-rw-r--r--arch/ia64/kernel/unaligned.c1554
-rw-r--r--arch/ia64/kernel/unwind.c118
-rw-r--r--arch/ia64/lib/Makefile42
-rw-r--r--arch/ia64/lib/checksum.c110
-rw-r--r--arch/ia64/lib/clear_page.S42
-rw-r--r--arch/ia64/lib/clear_user.S224
-rw-r--r--arch/ia64/lib/copy_page.S87
-rw-r--r--arch/ia64/lib/copy_user.S71
-rw-r--r--arch/ia64/lib/csum_partial_copy.c165
-rw-r--r--arch/ia64/lib/do_csum.S230
-rw-r--r--arch/ia64/lib/flush.S37
-rw-r--r--arch/ia64/lib/idiv.S158
-rw-r--r--arch/ia64/lib/memset.S111
-rw-r--r--arch/ia64/lib/strlen.S197
-rw-r--r--arch/ia64/lib/strlen_user.S213
-rw-r--r--arch/ia64/lib/strncpy_from_user.S53
-rw-r--r--arch/ia64/lib/strnlen_user.S55
-rw-r--r--arch/ia64/mm/Makefile14
-rw-r--r--arch/ia64/mm/extable.c68
-rw-r--r--arch/ia64/mm/fault.c164
-rw-r--r--arch/ia64/mm/init.c461
-rw-r--r--arch/ia64/mm/tlb.c166
-rw-r--r--arch/ia64/sn/Makefile25
-rw-r--r--arch/ia64/sn/sn1/Makefile29
-rw-r--r--arch/ia64/sn/sn1/irq.c50
-rw-r--r--arch/ia64/sn/sn1/machvec.c4
-rw-r--r--arch/ia64/sn/sn1/setup.c77
-rw-r--r--arch/ia64/tools/Makefile49
-rw-r--r--arch/ia64/tools/print_offsets.awk70
-rw-r--r--arch/ia64/tools/print_offsets.c109
-rw-r--r--arch/ia64/vmlinux.lds.S164
92 files changed, 26095 insertions, 0 deletions
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
new file mode 100644
index 000000000..7dd3caabc
--- /dev/null
+++ b/arch/ia64/Makefile
@@ -0,0 +1,125 @@
+#
+# ia64/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1998, 1999 by David Mosberger-Tang <davidm@hpl.hp.com>
+#
+
+NM := $(CROSS_COMPILE)nm -B
+
+LINKFLAGS = -static -T arch/$(ARCH)/vmlinux.lds
+# next line is for HP compiler backend:
+#AFLAGS += -DGCC_RETVAL_POINTER_IN_R8
+# The next line is needed when compiling with the July snapshot of the Cygnus compiler:
+#EXTRA = -ma0-bugs -D__GCC_DOESNT_KNOW_IN_REGS__
+# next two lines are for the September snapshot of the Cygnus compiler:
+AFLAGS += -D__GCC_MULTIREG_RETVALS__
+EXTRA = -ma0-bugs -D__GCC_MULTIREG_RETVALS__
+
+CFLAGS := -g $(CFLAGS) -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f10-f15,f32-f127
+
+ifdef CONFIG_IA64_GENERIC
+ CORE_FILES := arch/$(ARCH)/hp/hp.a \
+ arch/$(ARCH)/sn/sn.a \
+ arch/$(ARCH)/dig/dig.a \
+ $(CORE_FILES)
+ SUBDIRS := arch/$(ARCH)/hp \
+ arch/$(ARCH)/sn/sn1 \
+ arch/$(ARCH)/sn \
+ arch/$(ARCH)/dig \
+ $(SUBDIRS)
+
+else # !GENERIC
+
+ifeq ($(CONFIG_IA64_HP_SIM),y)
+ SUBDIRS := arch/$(ARCH)/hp \
+ $(SUBDIRS)
+ CORE_FILES := arch/$(ARCH)/hp/hp.a \
+ $(CORE_FILES)
+endif
+
+ifeq ($(CONFIG_IA64_SGI_SN1_SIM),y)
+ SUBDIRS := arch/$(ARCH)/sn/sn1 \
+ arch/$(ARCH)/sn \
+ $(SUBDIRS)
+ CORE_FILES := arch/$(ARCH)/sn/sn.a \
+ $(CORE_FILES)
+endif
+
+ifeq ($(CONFIG_IA64_SOFTSDV),y)
+ SUBDIRS := arch/$(ARCH)/dig \
+ $(SUBDIRS)
+ CORE_FILES := arch/$(ARCH)/dig/dig.a \
+ $(CORE_FILES)
+endif
+
+ifeq ($(CONFIG_IA64_DIG),y)
+ SUBDIRS := arch/$(ARCH)/dig \
+ $(SUBDIRS)
+ CORE_FILES := arch/$(ARCH)/dig/dig.a \
+ $(CORE_FILES)
+endif
+
+endif # !GENERIC
+
+ifeq ($(CONFIG_IA32_SUPPORT),y)
+ SUBDIRS := arch/$(ARCH)/ia32 $(SUBDIRS)
+ CORE_FILES := arch/$(ARCH)/ia32/ia32.o $(CORE_FILES)
+endif
+
+ifdef CONFIG_KDB
+ LIBS := $(LIBS) $(TOPDIR)/arch/$(ARCH)/kdb/kdb.a
+ SUBDIRS := $(SUBDIRS) arch/$(ARCH)/kdb
+endif
+
+HEAD := arch/$(ARCH)/kernel/head.o arch/ia64/kernel/init_task.o
+
+SUBDIRS := arch/$(ARCH)/tools arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/lib $(SUBDIRS)
+CORE_FILES := arch/$(ARCH)/kernel/kernel.o arch/$(ARCH)/mm/mm.o $(CORE_FILES)
+
+LIBS := $(TOPDIR)/arch/$(ARCH)/lib/lib.a $(LIBS) \
+ $(TOPDIR)/arch/$(ARCH)/lib/lib.a
+
+MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot
+
+vmlinux: arch/$(ARCH)/vmlinux.lds
+
+arch/$(ARCH)/vmlinux.lds: arch/$(ARCH)/vmlinux.lds.S FORCE
+ gcc -D__ASSEMBLY__ -E -C -P -I$(HPATH) -I$(HPATH)/asm-$(ARCH) \
+ arch/$(ARCH)/vmlinux.lds.S > $@
+
+FORCE: ;
+
+rawboot:
+ @$(MAKEBOOT) rawboot
+
+#
+# My boot writes directly to a specific disk partition, I doubt most
+# people will want to do that without changes..
+#
+msb my-special-boot:
+ @$(MAKEBOOT) msb
+
+bootimage:
+ @$(MAKEBOOT) bootimage
+
+srmboot:
+ @$(MAKEBOOT) srmboot
+
+archclean:
+ @$(MAKE) -C arch/$(ARCH)/kernel clean
+ @$(MAKE) -C arch/$(ARCH)/tools clean
+ @$(MAKEBOOT) clean
+
+archmrproper:
+ rm -f arch/$(ARCH)/vmlinux.lds
+ @$(MAKE) -C arch/$(ARCH)/tools mrproper
+
+archdep:
+ @$(MAKEBOOT) dep
+
+bootpfile:
+ @$(MAKEBOOT) bootpfile
diff --git a/arch/ia64/boot/Makefile b/arch/ia64/boot/Makefile
new file mode 100644
index 000000000..cba4fad66
--- /dev/null
+++ b/arch/ia64/boot/Makefile
@@ -0,0 +1,33 @@
+#
+# ia64/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1998 by David Mosberger-Tang <davidm@hpl.hp.com>
+#
+
+LINKFLAGS = -static -T bootloader.lds
+
+.S.s:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -E -o $*.o $<
+.S.o:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c -o $*.o $<
+
+OBJECTS = bootloader.o
+TARGETS =
+
+ifdef CONFIG_IA64_HP_SIM
+ TARGETS += bootloader
+endif
+
+all: $(TARGETS)
+
+bootloader: $(OBJECTS)
+ $(LD) $(LINKFLAGS) $(OBJECTS) $(LIBS) -o bootloader
+
+clean:
+ rm -f $(TARGETS)
+
+dep:
diff --git a/arch/ia64/boot/bootloader.c b/arch/ia64/boot/bootloader.c
new file mode 100644
index 000000000..cb6fc1f96
--- /dev/null
+++ b/arch/ia64/boot/bootloader.c
@@ -0,0 +1,234 @@
+/*
+ * arch/ia64/boot/bootloader.c
+ *
+ * Loads an ELF kernel.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 01/07/99 S.Eranian modified to pass command line arguments to kernel
+ */
+#include <linux/config.h>
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+#include <asm/elf.h>
+#include <asm/pal.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+
+/* Simulator system calls: */
+
+#define SSC_CONSOLE_INIT 20
+#define SSC_GETCHAR 21
+#define SSC_PUTCHAR 31
+#define SSC_OPEN 50
+#define SSC_CLOSE 51
+#define SSC_READ 52
+#define SSC_WRITE 53
+#define SSC_GET_COMPLETION 54
+#define SSC_WAIT_COMPLETION 55
+#define SSC_CONNECT_INTERRUPT 58
+#define SSC_GENERATE_INTERRUPT 59
+#define SSC_SET_PERIODIC_INTERRUPT 60
+#define SSC_GET_RTC 65
+#define SSC_EXIT 66
+#define SSC_LOAD_SYMBOLS 69
+#define SSC_GET_TOD 74
+
+#define SSC_GET_ARGS 75
+
+struct disk_req {
+ unsigned long addr;
+ unsigned len;
+};
+
+struct disk_stat {
+ int fd;
+ unsigned count;
+};
+
+#include "../kernel/fw-emu.c"
+
+static void
+cons_write (const char *buf)
+{
+ unsigned long ch;
+
+ while ((ch = *buf++) != '\0') {
+ ssc(ch, 0, 0, 0, SSC_PUTCHAR);
+ if (ch == '\n')
+ ssc('\r', 0, 0, 0, SSC_PUTCHAR);
+ }
+}
+
+void
+enter_virtual_mode (unsigned long new_psr)
+{
+ asm volatile ("mov cr.ipsr=%0" :: "r"(new_psr));
+ asm volatile ("mov cr.iip=%0" :: "r"(&&target));
+ asm volatile ("mov cr.ifs=r0");
+ asm volatile ("rfi;;"); /* must be last insn in an insn group */
+
+ target:
+}
+
+
+#define MAX_ARGS 32
+
+void
+_start (void)
+{
+ register long sp asm ("sp");
+ static char stack[16384] __attribute__ ((aligned (16)));
+ static char mem[4096];
+ static char buffer[1024];
+ unsigned long flags, off;
+ int fd, i;
+ struct disk_req req;
+ struct disk_stat stat;
+ struct elfhdr *elf;
+ struct elf_phdr *elf_phdr; /* program header */
+ unsigned long e_entry, e_phoff, e_phnum;
+ char *kpath, *args;
+ long arglen = 0;
+
+ asm volatile ("movl gp=__gp" ::: "memory");
+ asm volatile ("mov sp=%0" :: "r"(stack) : "memory");
+ asm volatile ("bsw.1;;");
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ asm volative ("nop 0;; nop 0;; nop 0;;");
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+ ssc(0, 0, 0, 0, SSC_CONSOLE_INIT);
+
+ /*
+ * S.Eranian: extract the commandline argument from the
+ * simulator
+ *
+ * The expected format is as follows:
+ *
+ * kernelname args...
+ *
+ * Both are optional but you can't have the second one without the
+ * first.
+ */
+ arglen = ssc((long) buffer, 0, 0, 0, SSC_GET_ARGS);
+
+ kpath = "vmlinux";
+ args = buffer;
+ if (arglen > 0) {
+ kpath = buffer;
+ while (*args != ' ' && *args != '\0')
+ ++args, --arglen;
+ if (*args == ' ')
+ *args++ = '\0', --arglen;
+ }
+
+ if (arglen <= 0) {
+ args = "";
+ arglen = 1;
+ }
+
+ fd = ssc((long) kpath, 1, 0, 0, SSC_OPEN);
+
+ if (fd < 0) {
+ cons_write(kpath);
+ cons_write(": file not found, reboot now\n");
+ for(;;);
+ }
+ stat.fd = fd;
+ off = 0;
+
+ req.len = sizeof(mem);
+ req.addr = (long) mem;
+ ssc(fd, 1, (long) &req, off, SSC_READ);
+ ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+
+ elf = (struct elfhdr *) mem;
+ if (elf->e_ident[0] == 0x7f && strncmp(elf->e_ident + 1, "ELF", 3) != 0) {
+ cons_write("not an ELF file\n");
+ return;
+ }
+ if (elf->e_type != ET_EXEC) {
+ cons_write("not an ELF executable\n");
+ return;
+ }
+ if (!elf_check_arch(elf->e_machine)) {
+ cons_write("kernel not for this processor\n");
+ return;
+ }
+
+ e_entry = elf->e_entry;
+ e_phnum = elf->e_phnum;
+ e_phoff = elf->e_phoff;
+
+ cons_write("loading ");
+ cons_write(kpath);
+ cons_write("...\n");
+
+ for (i = 0; i < e_phnum; ++i) {
+ req.len = sizeof(*elf_phdr);
+ req.addr = (long) mem;
+ ssc(fd, 1, (long) &req, e_phoff, SSC_READ);
+ ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+ if (stat.count != sizeof(*elf_phdr)) {
+ cons_write("failed to read phdr\n");
+ return;
+ }
+ e_phoff += sizeof(*elf_phdr);
+
+ elf_phdr = (struct elf_phdr *) mem;
+ req.len = elf_phdr->p_filesz;
+ req.addr = __pa(elf_phdr->p_vaddr);
+ ssc(fd, 1, (long) &req, elf_phdr->p_offset, SSC_READ);
+ ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
+ memset((char *)__pa(elf_phdr->p_vaddr) + elf_phdr->p_filesz, 0,
+ elf_phdr->p_memsz - elf_phdr->p_filesz);
+ }
+ ssc(fd, 0, 0, 0, SSC_CLOSE);
+
+ cons_write("starting kernel...\n");
+
+ /* fake an I/O base address: */
+ asm volatile ("mov ar.k0=%0" :: "r"(0xffffc000000UL));
+
+ /*
+ * Install a translation register that identity maps the
+ * kernel's 256MB page.
+ */
+ ia64_clear_ic(flags);
+ ia64_set_rr( 0, (0x1000 << 8) | (_PAGE_SIZE_1M << 2));
+ ia64_set_rr(PAGE_OFFSET, (ia64_rid(0, PAGE_OFFSET) << 8) | (_PAGE_SIZE_256M << 2));
+ ia64_srlz_d();
+ ia64_itr(0x3, 0, 1024*1024,
+ pte_val(mk_pte_phys(1024*1024, __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX))),
+ _PAGE_SIZE_1M);
+ ia64_itr(0x3, 1, PAGE_OFFSET,
+ pte_val(mk_pte_phys(0, __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX))),
+ _PAGE_SIZE_256M);
+ ia64_srlz_i();
+
+ enter_virtual_mode(flags | IA64_PSR_IT | IA64_PSR_IC | IA64_PSR_DT | IA64_PSR_RT
+ | IA64_PSR_DFH | IA64_PSR_BN);
+
+ sys_fw_init(args, arglen);
+
+ ssc(0, (long) kpath, 0, 0, SSC_LOAD_SYMBOLS);
+
+ /*
+ * Install the kernel's command line argument on ZERO_PAGE
+ * just after the botoparam structure.
+ * In case we don't have any argument just put \0
+ */
+ memcpy(((struct ia64_boot_param *)ZERO_PAGE_ADDR) + 1, args, arglen);
+ sp = __pa(&stack);
+
+ asm volatile ("br.sptk.few %0" :: "b"(e_entry));
+
+ cons_write("kernel returned!\n");
+ ssc(-1, 0, 0, 0, SSC_EXIT);
+}
diff --git a/arch/ia64/boot/bootloader.lds b/arch/ia64/boot/bootloader.lds
new file mode 100644
index 000000000..a73518406
--- /dev/null
+++ b/arch/ia64/boot/bootloader.lds
@@ -0,0 +1,65 @@
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(_start)
+SECTIONS
+{
+ /* Read-only sections, merged into text segment: */
+ . = 0x100000;
+
+ _text = .;
+ .text : { *(__ivt_section) *(.text) }
+ _etext = .;
+
+ /* Global data */
+ _data = .;
+ .rodata : { *(.rodata) }
+ .data : { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
+ __gp = ALIGN (8) + 0x200000;
+ .got : { *(.got.plt) *(.got) }
+ /* We want the small data sections together, so single-instruction offsets
+ can access them all, and initialized data all before uninitialized, so
+ we can shorten the on-disk segment size. */
+ .sdata : { *(.sdata) }
+ _edata = .;
+
+ _bss = .;
+ .sbss : { *(.sbss) *(.scommon) }
+ .bss : { *(.bss) *(COMMON) }
+ . = ALIGN(64 / 8);
+ _end = . ;
+
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* These must appear regardless of . */
+}
diff --git a/arch/ia64/config.in b/arch/ia64/config.in
new file mode 100644
index 000000000..d006c1d05
--- /dev/null
+++ b/arch/ia64/config.in
@@ -0,0 +1,172 @@
+mainmenu_name "Kernel configuration of Linux for IA-64 machines"
+
+mainmenu_option next_comment
+comment 'General setup'
+
+choice 'IA-64 system type' \
+ "Generic CONFIG_IA64_GENERIC \
+ HP-simulator CONFIG_IA64_HP_SIM \
+ SN1-simulator CONFIG_IA64_SGI_SN1_SIM \
+ DIG-compliant CONFIG_IA64_DIG" Generic
+
+choice 'Kernel page size' \
+ "4KB CONFIG_IA64_PAGE_SIZE_4KB \
+ 8KB CONFIG_IA64_PAGE_SIZE_8KB \
+ 16KB CONFIG_IA64_PAGE_SIZE_16KB \
+ 64KB CONFIG_IA64_PAGE_SIZE_64KB" 16KB
+
+if [ "$CONFIG_IA64_DIG" = "y" ]; then
+ bool ' Enable Itanium A-step specific code' CONFIG_ITANIUM_ASTEP_SPECIFIC
+ bool ' Enable SoftSDV hacks' CONFIG_IA64_SOFTSDV_HACKS n
+ bool ' Enable BigSur hacks' CONFIG_IA64_BIGSUR_HACKS y
+ bool ' Enable Lion hacks' CONFIG_IA64_LION_HACKS n
+ bool ' Emulate PAL/SAL/EFI firmware' CONFIG_IA64_FW_EMU n
+ bool ' Get PCI IRQ routing from firmware/ACPI' CONFIG_IA64_IRQ_ACPI y
+fi
+
+if [ "$CONFIG_IA64_GENERIC" = "y" ]; then
+ define_bool CONFIG_IA64_SOFTSDV_HACKS y
+fi
+
+if [ "$CONFIG_IA64_SGI_SN1_SIM" = "y" ]; then
+ define_bool CONFIG_NUMA y
+ define_bool CONFIG_IA64_SOFTSDV_HACKS y
+fi
+
+define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /dev/kcore.
+
+bool 'SMP support' CONFIG_SMP n
+bool 'Performance monitor support' CONFIG_PERFMON n
+
+bool 'Networking support' CONFIG_NET n
+bool 'System V IPC' CONFIG_SYSVIPC n
+bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT n
+bool 'Sysctl support' CONFIG_SYSCTL n
+tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
+tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
+
+bool 'PCI support' CONFIG_PCI n
+source drivers/pci/Config.in
+
+source drivers/pcmcia/Config.in
+
+mainmenu_option next_comment
+ comment 'Code maturity level options'
+ bool 'Prompt for development and/or incomplete code/drivers' \
+ CONFIG_EXPERIMENTAL n
+endmenu
+
+mainmenu_option next_comment
+ comment 'Loadable module support'
+ bool 'Enable loadable module support' CONFIG_MODULES n
+ if [ "$CONFIG_MODULES" = "y" ]; then
+ bool 'Set version information on all symbols for modules' CONFIG_MODVERSIONS n
+ bool 'Kernel module loader' CONFIG_KMOD n
+ fi
+endmenu
+
+source drivers/parport/Config.in
+
+endmenu
+
+source drivers/pnp/Config.in
+source drivers/block/Config.in
+source drivers/i2o/Config.in
+
+if [ "$CONFIG_NET" = "y" ]; then
+ source net/Config.in
+fi
+
+mainmenu_option next_comment
+comment 'SCSI support'
+
+tristate 'SCSI support' CONFIG_SCSI
+
+if [ "$CONFIG_SCSI" != "n" ]; then
+ source drivers/scsi/Config.in
+ bool 'Simulated SCSI disk' CONFIG_SCSI_SIM n
+fi
+endmenu
+
+if [ "$CONFIG_NET" = "y" ]; then
+ mainmenu_option next_comment
+ comment 'Network device support'
+
+ bool 'Network device support' CONFIG_NETDEVICES n
+ if [ "$CONFIG_NETDEVICES" = "y" ]; then
+ source drivers/net/Config.in
+ fi
+ endmenu
+fi
+
+source net/ax25/Config.in
+
+mainmenu_option next_comment
+comment 'ISDN subsystem'
+
+tristate 'ISDN support' CONFIG_ISDN
+if [ "$CONFIG_ISDN" != "n" ]; then
+ source drivers/isdn/Config.in
+fi
+endmenu
+
+mainmenu_option next_comment
+comment 'CD-ROM drivers (not for SCSI or IDE/ATAPI drives)'
+
+bool 'Support non-SCSI/IDE/ATAPI drives' CONFIG_CD_NO_IDESCSI n
+if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then
+ source drivers/cdrom/Config.in
+fi
+endmenu
+
+source drivers/char/Config.in
+source drivers/usb/Config.in
+source drivers/misc/Config.in
+
+source fs/Config.in
+
+source fs/nls/Config.in
+
+if [ "$CONFIG_VT" = "y" ]; then
+ mainmenu_option next_comment
+ comment 'Console drivers'
+ bool 'VGA text console' CONFIG_VGA_CONSOLE n
+ if [ "$CONFIG_FB" = "y" ]; then
+ define_bool CONFIG_PCI_CONSOLE y
+ fi
+ source drivers/video/Config.in
+ endmenu
+fi
+
+mainmenu_option next_comment
+comment 'Sound'
+
+tristate 'Sound card support' CONFIG_SOUND
+if [ "$CONFIG_SOUND" != "n" ]; then
+ source drivers/sound/Config.in
+fi
+endmenu
+
+mainmenu_option next_comment
+comment 'Kernel hacking'
+
+#bool 'Debug kmalloc/kfree' CONFIG_DEBUG_MALLOC
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+ tristate 'Kernel support for IA-32 emulation' CONFIG_IA32_SUPPORT
+ tristate 'Kernel FP software completion' CONFIG_MATHEMU
+else
+ define_bool CONFIG_MATHEMU y
+fi
+
+bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ n
+bool 'Early printk support (requires VGA!)' CONFIG_IA64_EARLY_PRINTK n
+bool 'Turn on compare-and-exchange bug checking (slow!)' CONFIG_IA64_DEBUG_CMPXCHG n
+bool 'Turn on irq debug checks (slow!)' CONFIG_IA64_DEBUG_IRQ n
+bool 'Print possible IA64 hazards to console' CONFIG_IA64_PRINT_HAZARDS n
+bool 'Built-in Kernel Debugger support' CONFIG_KDB
+if [ "$CONFIG_KDB" = "y" ]; then
+ bool 'Compile the kernel with frame pointers' CONFIG_KDB_FRAMEPTR
+ int 'KDB Kernel Symbol Table size?' CONFIG_KDB_STBSIZE 10000
+fi
+
+endmenu
diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig
new file mode 100644
index 000000000..a96599889
--- /dev/null
+++ b/arch/ia64/defconfig
@@ -0,0 +1,146 @@
+#
+# Automatically generated make config: don't edit
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# Loadable module support
+#
+# CONFIG_MODULES is not set
+
+#
+# General setup
+#
+CONFIG_IA64_SIM=y
+CONFIG_PCI=y
+# CONFIG_PCI_QUIRKS is not set
+CONFIG_PCI_OLD_PROC=y
+# CONFIG_NET is not set
+# CONFIG_SYSVIPC is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_SYSCTL is not set
+# CONFIG_BINFMT_ELF is not set
+# CONFIG_BINFMT_MISC is not set
+# CONFIG_BINFMT_JAVA is not set
+# CONFIG_BINFMT_EM86 is not set
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_IDE is not set
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_ONLY is not set
+
+#
+# Additional Block Devices
+#
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_MD is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_BLK_DEV_XD is not set
+CONFIG_PARIDE_PARPORT=y
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI support
+#
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_G_NCR5380_PORT is not set
+# CONFIG_SCSI_G_NCR5380_MEM is not set
+
+#
+# Amateur Radio support
+#
+# CONFIG_HAMRADIO is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# CD-ROM drivers (not for SCSI or IDE/ATAPI drives)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL is not set
+# CONFIG_SERIAL_EXTENDED is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+# CONFIG_UNIX98_PTYS is not set
+# CONFIG_MOUSE is not set
+# CONFIG_QIC02_TAPE is not set
+# CONFIG_WATCHDOG is not set
+# CONFIG_RTC is not set
+CONFIG_EFI_RTC=y
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_NVRAM is not set
+# CONFIG_JOYSTICK is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_FT_NORMAL_DEBUG is not set
+# CONFIG_FT_FULL_DEBUG is not set
+# CONFIG_FT_NO_TRACE is not set
+# CONFIG_FT_NO_TRACE_AT_ALL is not set
+# CONFIG_FT_STD_FDC is not set
+# CONFIG_FT_MACH2 is not set
+# CONFIG_FT_PROBE_FC10 is not set
+# CONFIG_FT_ALT_FDC is not set
+
+#
+# Filesystems
+#
+# CONFIG_QUOTA is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_EXT2_FS is not set
+# CONFIG_ISO9660_FS is not set
+# CONFIG_FAT_FS is not set
+# CONFIG_PROC_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_SMD_DISKLABEL is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_ADFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_MAC_PARTITION is not set
+# CONFIG_NLS is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_MATHEMU is not set
+# CONFIG_MAGIC_SYSRQ is not set
diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile
new file mode 100644
index 000000000..cfc48eec1
--- /dev/null
+++ b/arch/ia64/dig/Makefile
@@ -0,0 +1,24 @@
+#
+# ia64/platform/dig/Makefile
+#
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+.S.s:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $<
+.S.o:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $<
+
+all: dig.a
+
+O_TARGET = dig.a
+O_OBJS = iosapic.o setup.o
+
+ifeq ($(CONFIG_IA64_GENERIC),y)
+O_OBJS += machvec.o
+endif
+
+clean::
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/dig/iosapic.c b/arch/ia64/dig/iosapic.c
new file mode 100644
index 000000000..6a392226e
--- /dev/null
+++ b/arch/ia64/dig/iosapic.c
@@ -0,0 +1,553 @@
+/*
+ * Streamlined APIC support.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999-2000 Hewlett-Packard Co.
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/string.h>
+
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/irq.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/delay.h>
+#include <asm/processor.h>
+
+#undef DEBUG_IRQ_ROUTING
+
+/*
+ * IRQ vectors 0..15 are treated as the legacy interrupts of the PC-AT
+ * platform. No new drivers should ever ask for specific irqs, but we
+ * provide compatibility here in case there is an old driver that does
+ * ask for specific irqs (serial, keyboard, stuff like that). Since
+ * IA-64 doesn't allow irq 0..15 to be used for external interrupts
+ * anyhow, this in no way prevents us from doing the Right Thing
+ * with new drivers.
+ */
+struct iosapic_vector iosapic_vector[NR_IRQS] = {
+ [0 ... NR_IRQS-1] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }
+};
+
+#ifndef CONFIG_IA64_IRQ_ACPI
+/*
+ * Defines the default interrupt routing information for the LION platform
+ * XXX - this information should be obtained from the ACPI and hardcoded since
+ * we do not have ACPI AML support.
+ */
+
+struct intr_routing_entry intr_routing[] = {
+ {0,0,0,2,0,0,0,0},
+ {0,0,1,1,0,0,0,0},
+ {0,0,2,0xff,0,0,0,0},
+ {0,0,3,3,0,0,0,0},
+ {0,0,4,4,0,0,0,0},
+ {0,0,5,5,0,0,0,0},
+ {0,0,6,6,0,0,0,0},
+ {0,0,7,7,0,0,0,0},
+ {0,0,8,8,0,0,0,0},
+ {0,0,9,9,0,0,0,0},
+ {0,0,10,10,0,0,0,0},
+ {0,0,11,11,0,0,0,0},
+ {0,0,12,12,0,0,0,0},
+ {0,0,13,13,0,0,0,0},
+ {0,0,14,14,0,0,0,0},
+ {0,0,15,15,0,0,0,0},
+#ifdef CONFIG_IA64_LION_HACKS
+ {1, 0, 0x04, 16, 0, 0, 1, 1}, /* bus 0, device id 1, INTA */
+ {1, 0, 0x05, 26, 0, 0, 1, 1}, /* bus 0, device id 1, INTB */
+ {1, 0, 0x06, 36, 0, 0, 1, 1}, /* bus 0, device id 1, INTC */
+ {1, 0, 0x07, 42, 0, 0, 1, 1}, /* bus 0, device id 1, INTD */
+
+ {1, 0, 0x08, 17, 0, 0, 1, 1}, /* bus 0, device id 2, INTA */
+ {1, 0, 0x09, 27, 0, 0, 1, 1}, /* bus 0, device id 2, INTB */
+ {1, 0, 0x0a, 37, 0, 0, 1, 1}, /* bus 0, device id 2, INTC */
+ {1, 0, 0x0b, 42, 0, 0, 1, 1}, /* bus 0, device id 2, INTD */
+
+ {1, 0, 0x0f, 50, 0, 0, 1, 1}, /* bus 0, device id 3, INTD */
+
+ {1, 0, 0x14, 51, 0, 0, 1, 1}, /* bus 0, device id 5, INTA */
+
+ {1, 0, 0x18, 49, 0, 0, 1, 1}, /* bus 0, device id 6, INTA */
+
+ {1, 1, 0x04, 18, 0, 0, 1, 1}, /* bus 1, device id 1, INTA */
+ {1, 1, 0x05, 28, 0, 0, 1, 1}, /* bus 1, device id 1, INTB */
+ {1, 1, 0x06, 38, 0, 0, 1, 1}, /* bus 1, device id 1, INTC */
+ {1, 1, 0x07, 43, 0, 0, 1, 1}, /* bus 1, device id 1, INTD */
+
+ {1, 1, 0x08, 48, 0, 0, 1, 1}, /* bus 1, device id 2, INTA */
+
+ {1, 1, 0x0c, 19, 0, 0, 1, 1}, /* bus 1, device id 3, INTA */
+ {1, 1, 0x0d, 29, 0, 0, 1, 1}, /* bus 1, device id 3, INTB */
+ {1, 1, 0x0e, 38, 0, 0, 1, 1}, /* bus 1, device id 3, INTC */
+ {1, 1, 0x0f, 44, 0, 0, 1, 1}, /* bus 1, device id 3, INTD */
+
+ {1, 1, 0x10, 20, 0, 0, 1, 1}, /* bus 1, device id 4, INTA */
+ {1, 1, 0x11, 30, 0, 0, 1, 1}, /* bus 1, device id 4, INTB */
+ {1, 1, 0x12, 39, 0, 0, 1, 1}, /* bus 1, device id 4, INTC */
+ {1, 1, 0x13, 45, 0, 0, 1, 1}, /* bus 1, device id 4, INTD */
+
+ {1, 2, 0x04, 21, 0, 0, 1, 1}, /* bus 2, device id 1, INTA */
+ {1, 2, 0x05, 31, 0, 0, 1, 1}, /* bus 2, device id 1, INTB */
+ {1, 2, 0x06, 39, 0, 0, 1, 1}, /* bus 2, device id 1, INTC */
+ {1, 2, 0x07, 45, 0, 0, 1, 1}, /* bus 2, device id 1, INTD */
+
+ {1, 2, 0x08, 22, 0, 0, 1, 1}, /* bus 2, device id 2, INTA */
+ {1, 2, 0x09, 32, 0, 0, 1, 1}, /* bus 2, device id 2, INTB */
+ {1, 2, 0x0a, 40, 0, 0, 1, 1}, /* bus 2, device id 2, INTC */
+ {1, 2, 0x0b, 46, 0, 0, 1, 1}, /* bus 2, device id 2, INTD */
+
+ {1, 2, 0x0c, 23, 0, 0, 1, 1}, /* bus 2, device id 3, INTA */
+ {1, 2, 0x0d, 33, 0, 0, 1, 1}, /* bus 2, device id 3, INTB */
+ {1, 2, 0x0e, 40, 0, 0, 1, 1}, /* bus 2, device id 3, INTC */
+ {1, 2, 0x0f, 46, 0, 0, 1, 1}, /* bus 2, device id 3, INTD */
+
+ {1, 3, 0x04, 24, 0, 0, 1, 1}, /* bus 3, device id 1, INTA */
+ {1, 3, 0x05, 34, 0, 0, 1, 1}, /* bus 3, device id 1, INTB */
+ {1, 3, 0x06, 41, 0, 0, 1, 1}, /* bus 3, device id 1, INTC */
+ {1, 3, 0x07, 47, 0, 0, 1, 1}, /* bus 3, device id 1, INTD */
+
+ {1, 3, 0x08, 25, 0, 0, 1, 1}, /* bus 3, device id 2, INTA */
+ {1, 3, 0x09, 35, 0, 0, 1, 1}, /* bus 3, device id 2, INTB */
+ {1, 3, 0x0a, 41, 0, 0, 1, 1}, /* bus 3, device id 2, INTC */
+ {1, 3, 0x0b, 47, 0, 0, 1, 1}, /* bus 3, device id 2, INTD */
+#else
+ /*
+ * BigSur platform, bus 0, device 1,2,4 and bus 1 device 0-3
+ */
+ {1,1,0x0,19,0,0,1,1}, /* bus 1, device id 0, INTA */
+ {1,1,0x1,18,0,0,1,1}, /* bus 1, device id 0, INTB */
+ {1,1,0x2,17,0,0,1,1}, /* bus 1, device id 0, INTC */
+ {1,1,0x3,16,0,0,1,1}, /* bus 1, device id 0, INTD */
+
+ {1,1,0x4,23,0,0,1,1}, /* bus 1, device id 1, INTA */
+ {1,1,0x5,22,0,0,1,1}, /* bus 1, device id 1, INTB */
+ {1,1,0x6,21,0,0,1,1}, /* bus 1, device id 1, INTC */
+ {1,1,0x7,20,0,0,1,1}, /* bus 1, device id 1, INTD */
+
+ {1,1,0x8,27,0,0,1,1}, /* bus 1, device id 2, INTA */
+ {1,1,0x9,26,0,0,1,1}, /* bus 1, device id 2, INTB */
+ {1,1,0xa,25,0,0,1,1}, /* bus 1, device id 2, INTC */
+ {1,1,0xb,24,0,0,1,1}, /* bus 1, device id 2, INTD */
+
+ {1,1,0xc,31,0,0,1,1}, /* bus 1, device id 3, INTA */
+ {1,1,0xd,30,0,0,1,1}, /* bus 1, device id 3, INTB */
+ {1,1,0xe,29,0,0,1,1}, /* bus 1, device id 3, INTC */
+ {1,1,0xf,28,0,0,1,1}, /* bus 1, device id 3, INTD */
+
+ {1,0,0x4,35,0,0,1,1}, /* bus 0, device id 1, INTA */
+ {1,0,0x5,34,0,0,1,1}, /* bus 0, device id 1, INTB */
+ {1,0,0x6,33,0,0,1,1}, /* bus 0, device id 1, INTC */
+ {1,0,0x7,32,0,0,1,1}, /* bus 0, device id 1, INTD */
+
+ {1,0,0x8,39,0,0,1,1}, /* bus 0, device id 2, INTA */
+ {1,0,0x9,38,0,0,1,1}, /* bus 0, device id 2, INTB */
+ {1,0,0xa,37,0,0,1,1}, /* bus 0, device id 2, INTC */
+ {1,0,0xb,36,0,0,1,1}, /* bus 0, device id 2, INTD */
+
+ {1,0,0x10,43,0,0,1,1}, /* bus 0, device id 4, INTA */
+ {1,0,0x11,42,0,0,1,1}, /* bus 0, device id 4, INTB */
+ {1,0,0x12,41,0,0,1,1}, /* bus 0, device id 4, INTC */
+ {1,0,0x13,40,0,0,1,1}, /* bus 0, device id 4, INTD */
+
+ {1,0,0x14,17,0,0,1,1}, /* bus 0, device id 5, INTA */
+ {1,0,0x18,18,0,0,1,1}, /* bus 0, device id 6, INTA */
+ {1,0,0x1c,19,0,0,1,1}, /* bus 0, device id 7, INTA */
+#endif
+ {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff},
+};
+
+int
+iosapic_get_PCI_irq_vector(int bus, int slot, int pci_pin)
+{
+ int i = -1;
+
+ while (intr_routing[++i].srcbus != 0xff) {
+ if (intr_routing[i].srcbus == BUS_PCI) {
+ if ((intr_routing[i].srcbusirq == ((slot << 2) | pci_pin))
+ && (intr_routing[i].srcbusno == bus)) {
+ return(intr_routing[i].iosapic_pin);
+ }
+ }
+ }
+ return -1;
+}
+
+#else /* CONFIG_IA64_IRQ_ACPI */
+
+/*
+ * find the IRQ in the IOSAPIC map for the PCI device on bus/slot/pin
+ */
+int
+iosapic_get_PCI_irq_vector(int bus, int slot, int pci_pin)
+{
+ int i;
+
+ for (i = 0; i < NR_IRQS; i++) {
+ if ((iosapic_bustype(i) == BUS_PCI) &&
+ (iosapic_bus(i) == bus) &&
+ (iosapic_busdata(i) == ((slot << 16) | pci_pin))) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+#endif /* !CONFIG_IA64_IRQ_ACPI */
+
+static void
+set_rte (unsigned long iosapic_addr, int entry, int pol, int trigger, int delivery,
+ long dest, int vector)
+{
+ int low32;
+ int high32;
+
+ low32 = ((pol << IO_SAPIC_POLARITY_SHIFT) |
+ (trigger << IO_SAPIC_TRIGGER_SHIFT) |
+ (delivery << IO_SAPIC_DELIVERY_SHIFT) |
+ vector);
+
+ /* dest contains both id and eid */
+ high32 = (dest << IO_SAPIC_DEST_SHIFT);
+
+ /*
+ * program the rte
+ */
+ writel(IO_SAPIC_RTE_HIGH(entry), iosapic_addr + IO_SAPIC_REG_SELECT);
+ writel(high32, iosapic_addr + IO_SAPIC_WINDOW);
+ writel(IO_SAPIC_RTE_LOW(entry), iosapic_addr + IO_SAPIC_REG_SELECT);
+ writel(low32, iosapic_addr + IO_SAPIC_WINDOW);
+}
+
+
+static void
+enable_pin (unsigned int pin, unsigned long iosapic_addr)
+{
+ int low32;
+
+ writel(IO_SAPIC_RTE_LOW(pin), iosapic_addr + IO_SAPIC_REG_SELECT);
+ low32 = readl(iosapic_addr + IO_SAPIC_WINDOW);
+
+ low32 &= ~(1 << IO_SAPIC_MASK_SHIFT); /* Zero only the mask bit */
+ writel(low32, iosapic_addr + IO_SAPIC_WINDOW);
+}
+
+
+static void
+disable_pin (unsigned int pin, unsigned long iosapic_addr)
+{
+ int low32;
+
+ writel(IO_SAPIC_RTE_LOW(pin), iosapic_addr + IO_SAPIC_REG_SELECT);
+ low32 = readl(iosapic_addr + IO_SAPIC_WINDOW);
+
+ low32 |= (1 << IO_SAPIC_MASK_SHIFT); /* Set only the mask bit */
+ writel(low32, iosapic_addr + IO_SAPIC_WINDOW);
+}
+
+#define iosapic_shutdown_irq iosapic_disable_irq
+
+static void
+iosapic_enable_irq (unsigned int irq)
+{
+ int pin = iosapic_pin(irq);
+
+ if (pin < 0)
+ /* happens during irq auto probing... */
+ return;
+ enable_pin(pin, iosapic_addr(irq));
+}
+
+static void
+iosapic_disable_irq (unsigned int irq)
+{
+ int pin = iosapic_pin(irq);
+
+ if (pin < 0)
+ return;
+ disable_pin(pin, iosapic_addr(irq));
+}
+
+unsigned int
+iosapic_version(unsigned long base_addr)
+{
+ /*
+ * IOSAPIC Version Register return 32 bit structure like:
+ * {
+ * unsigned int version : 8;
+ * unsigned int reserved1 : 8;
+ * unsigned int pins : 8;
+ * unsigned int reserved2 : 8;
+ * }
+ */
+ writel(IO_SAPIC_VERSION, base_addr + IO_SAPIC_REG_SELECT);
+ return readl(IO_SAPIC_WINDOW + base_addr);
+}
+
+static int
+iosapic_handle_irq (unsigned int irq, struct pt_regs *regs)
+{
+ struct irqaction *action = 0;
+ struct irq_desc *id = irq_desc + irq;
+ unsigned int status;
+ int retval;
+
+ spin_lock(&irq_controller_lock);
+ {
+ status = id->status;
+
+ /* do we need to do something IOSAPIC-specific to ACK the irq here??? */
+ /* Yes, but only level-triggered interrupts. We'll do that later */
+ if ((status & IRQ_INPROGRESS) == 0 && (status & IRQ_ENABLED) != 0) {
+ action = id->action;
+ status |= IRQ_INPROGRESS;
+ }
+ id->status = status & ~(IRQ_REPLAY | IRQ_WAITING);
+ }
+ spin_unlock(&irq_controller_lock);
+
+ if (!action) {
+ if (!(id->status & IRQ_AUTODETECT))
+ printk("iosapic_handle_irq: unexpected interrupt %u;"
+ "disabling it (status=%x)\n", irq, id->status);
+ /*
+ * If we don't have a handler, disable the pin so we
+ * won't get any further interrupts (until
+ * re-enabled). --davidm 99/12/17
+ */
+ iosapic_disable_irq(irq);
+ return 0;
+ }
+
+ retval = invoke_irq_handlers (irq, regs, action);
+
+ if (iosapic_trigger(irq) == IO_SAPIC_LEVEL) /* ACK Level trigger interrupts */
+ writel(irq, iosapic_addr(irq) + IO_SAPIC_EOI);
+
+ spin_lock(&irq_controller_lock);
+ {
+ status = (id->status & ~IRQ_INPROGRESS);
+ id->status = status;
+ }
+ spin_unlock(&irq_controller_lock);
+
+ return retval;
+}
+
+void __init
+iosapic_init (unsigned long addr)
+{
+ int i;
+#ifdef CONFIG_IA64_IRQ_ACPI
+ struct pci_vector_struct *vectors;
+ int irq;
+#else
+ int vector;
+#endif
+
+ /*
+ * Disable all local interrupts
+ */
+
+ ia64_set_itv(0, 1);
+ ia64_set_lrr0(0, 1);
+ ia64_set_lrr1(0, 1);
+
+ /*
+ * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
+ * enabled.
+ */
+
+ outb(0xff, 0xA1);
+ outb(0xff, 0x21);
+
+#if defined(CONFIG_IA64_SOFTSDV_HACKS)
+ memset(iosapic_vector, 0x0, sizeof(iosapic_vector));
+ for (i = 0; i < NR_IRQS; i++) {
+ iosapic_pin(i) = 0xff;
+ iosapic_addr(i) = (unsigned long) ioremap(IO_SAPIC_DEFAULT_ADDR, 0);
+ }
+ /* XXX this should come from systab or some such: */
+ iosapic_pin(TIMER_IRQ) = 5; /* System Clock Interrupt */
+ iosapic_pin(0x40) = 3; /* Keyboard */
+ iosapic_pin(0x92) = 9; /* COM1 Serial Port */
+ iosapic_pin(0x80) = 4; /* Periodic Interrupt */
+ iosapic_pin(0xc0) = 2; /* Mouse */
+ iosapic_pin(0xe0) = 1; /* IDE Disk */
+ iosapic_pin(0xf0) = 6; /* E-IDE CDROM */
+ iosapic_pin(0xa0) = 10; /* Real PCI Interrupt */
+#elif !defined(CONFIG_IA64_IRQ_ACPI)
+ /*
+ * For systems where the routing info in ACPI is
+ * unavailable/wrong, use the intr_routing information to
+ * initialize the iosapic array
+ */
+ i = -1;
+ while (intr_routing[++i].srcbus != 0xff) {
+ if (intr_routing[i].srcbus == BUS_ISA) {
+ vector = map_legacy_irq(intr_routing[i].srcbusirq);
+ } else if (intr_routing[i].srcbus == BUS_PCI) {
+ vector = intr_routing[i].iosapic_pin;
+ } else {
+ printk("unknown bus type %d for intr_routing[%d]\n",
+ intr_routing[i].srcbus, i);
+ continue;
+ }
+ iosapic_pin(vector) = intr_routing[i].iosapic_pin;
+ iosapic_dmode(vector) = intr_routing[i].mode;
+ iosapic_polarity(vector) = intr_routing[i].polarity;
+ iosapic_trigger(vector) = intr_routing[i].trigger;
+# ifdef DEBUG_IRQ_ROUTING
+ printk("irq[0x%x(0x%x)]:0x%x, %d, %d, %d\n", vector, intr_routing[i].srcbusirq,
+ iosapic_pin(vector), iosapic_dmode(vector), iosapic_polarity(vector),
+ iosapic_trigger(vector));
+# endif
+ }
+#else /* !defined(CONFIG_IA64_SOFTSDV_HACKS) && !defined(CONFIG_IA64_IRQ_ACPI) */
+ /*
+ * Map the legacy ISA devices into the IOAPIC data; We'll override these
+ * later with data from the ACPI Interrupt Source Override table.
+ *
+ * Huh, the Lion w/ FPSWA firmware has entries for _all_ of the legacy IRQs,
+ * including those that are not different from PC/AT standard. I don't know
+ * if this is a bug in the other firmware or not. I'm going to leave this code
+ * here, so that this works on BigSur but will go ask Intel. --wfd 2000-Jan-19
+ *
+ */
+ for (i =0 ; i < IA64_MIN_VECTORED_IRQ; i++) {
+ irq = map_legacy_irq(i);
+ iosapic_pin(irq) = i;
+ iosapic_bus(irq) = BUS_ISA;
+ iosapic_busdata(irq) = 0;
+ iosapic_dmode(irq) = IO_SAPIC_LOWEST_PRIORITY;
+ iosapic_trigger(irq) = IO_SAPIC_EDGE;
+ iosapic_polarity(irq) = IO_SAPIC_POL_HIGH;
+#ifdef DEBUG_IRQ_ROUTING
+ printk("ISA: IRQ %02x -> Vector %02x IOSAPIC Pin %d\n", i, irq, iosapic_pin(irq));
+#endif
+ }
+
+ /*
+ * Map the PCI Interrupt data into the ACPI IOSAPIC data using
+ * the info that the bootstrap loader passed to us.
+ */
+ ia64_boot_param.pci_vectors = (__u64) __va(ia64_boot_param.pci_vectors);
+ vectors = (struct pci_vector_struct *) ia64_boot_param.pci_vectors;
+ for (i = 0; i < ia64_boot_param.num_pci_vectors; i++) {
+ irq = map_legacy_irq(vectors[i].irq);
+
+ iosapic_bustype(irq) = BUS_PCI;
+ iosapic_pin(irq) = irq - iosapic_baseirq(irq);
+ iosapic_bus(irq) = vectors[i].bus;
+ /*
+ * Map the PCI slot and pin data into iosapic_busdata()
+ */
+ iosapic_busdata(irq) = (vectors[i].pci_id & 0xffff0000) | vectors[i].pin;
+
+ /* Default settings for PCI */
+ iosapic_dmode(irq) = IO_SAPIC_LOWEST_PRIORITY;
+ iosapic_trigger(irq) = IO_SAPIC_LEVEL;
+ iosapic_polarity(irq) = IO_SAPIC_POL_LOW;
+
+#ifdef DEBUG_IRQ_ROUTING
+ printk("PCI: BUS %d Slot %x Pin %x IRQ %02x --> Vector %02x IOSAPIC Pin %d\n",
+ vectors[i].bus, vectors[i].pci_id>>16, vectors[i].pin, vectors[i].irq,
+ irq, iosapic_pin(irq));
+#endif
+ }
+#endif /* !CONFIG_IA64_IRQ_ACPI */
+}
+
+static void
+iosapic_startup_irq (unsigned int irq)
+{
+ int pin;
+
+ if (irq == TIMER_IRQ)
+ return;
+ pin = iosapic_pin(irq);
+ if (pin < 0)
+ /* happens during irq auto probing... */
+ return;
+ set_rte(iosapic_addr(irq), pin, iosapic_polarity(irq), iosapic_trigger(irq),
+ iosapic_dmode(irq), (ia64_get_lid() >> 16) & 0xffff, irq);
+ enable_pin(pin, iosapic_addr(irq));
+}
+
+struct hw_interrupt_type irq_type_iosapic = {
+ "IOSAPIC",
+ iosapic_init,
+ iosapic_startup_irq,
+ iosapic_shutdown_irq,
+ iosapic_handle_irq,
+ iosapic_enable_irq,
+ iosapic_disable_irq
+};
+
+void
+dig_irq_init (struct irq_desc desc[NR_IRQS])
+{
+ int i;
+
+ /*
+ * Claim all non-legacy irq vectors as ours unless they're
+ * claimed by someone else already (e.g., timer or IPI are
+ * handled internally).
+ */
+ for (i = IA64_MIN_VECTORED_IRQ; i <= IA64_MAX_VECTORED_IRQ; ++i) {
+ if (irq_desc[i].handler == &irq_type_default)
+ irq_desc[i].handler = &irq_type_iosapic;
+ }
+}
+
+void
+dig_pci_fixup (void)
+{
+ struct pci_dev *dev;
+ int irq;
+ unsigned char pin;
+
+ pci_for_each_dev(dev) {
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (pin) {
+ pin--; /* interrupt pins are numbered starting from 1 */
+ irq = iosapic_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn),
+ pin);
+ if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+ struct pci_dev * bridge = dev->bus->self;
+
+ /* do the bridge swizzle... */
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ irq = iosapic_get_PCI_irq_vector(bridge->bus->number,
+ PCI_SLOT(bridge->devfn), pin);
+ if (irq >= 0)
+ printk(KERN_WARNING
+ "PCI: using PPB(B%d,I%d,P%d) to get irq %02x\n",
+ bridge->bus->number, PCI_SLOT(bridge->devfn),
+ pin, irq);
+ }
+ if (irq >= 0) {
+ printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %02x\n",
+ dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
+ dev->irq = irq;
+ }
+ }
+ /*
+ * Nothing to fixup
+ * Fix out-of-range IRQ numbers
+ */
+ if (dev->irq >= NR_IRQS)
+ dev->irq = 15; /* Spurious interrupts */
+ }
+}
diff --git a/arch/ia64/dig/machvec.c b/arch/ia64/dig/machvec.c
new file mode 100644
index 000000000..640412d7e
--- /dev/null
+++ b/arch/ia64/dig/machvec.c
@@ -0,0 +1,4 @@
+#include <asm/machvec_init.h>
+#include <asm/machvec_dig.h>
+
+MACHVEC_DEFINE(dig)
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
new file mode 100644
index 000000000..6ae40319d
--- /dev/null
+++ b/arch/ia64/dig/setup.c
@@ -0,0 +1,93 @@
+/*
+ * Platform dependent support for Intel SoftSDV simulator.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/config.h>
+
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+#include <linux/console.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_IA64_FW_EMU
+# include "../../kernel/fw-emu.c"
+#endif
+
+/*
+ * This is here so we can use the CMOS detection in ide-probe.c to
+ * determine what drives are present. In theory, we don't need this
+ * as the auto-detection could be done via ide-probe.c:do_probe() but
+ * in practice that would be much slower, which is painful when
+ * running in the simulator. Note that passing zeroes in DRIVE_INFO
+ * is sufficient (the IDE driver will autodetect the drive geometry).
+ */
+char drive_info[4*16];
+
+unsigned char aux_device_present = 0xaa; /* XXX remove this when legacy I/O is gone */
+
+void __init
+dig_setup (char **cmdline_p)
+{
+ unsigned int orig_x, orig_y, num_cols, num_rows, font_height;
+
+ /*
+ * This assumes that the EFI partition is physical disk 1
+ * partition 1 and the Linux root disk is physical disk 1
+ * partition 2.
+ */
+#ifdef CONFIG_IA64_LION_HACKS
+ /* default to /dev/sda2 on Lion... */
+ ROOT_DEV = to_kdev_t(0x0802); /* default to second partition on first drive */
+#else
+ /* default to /dev/dha2 on BigSur... */
+ ROOT_DEV = to_kdev_t(0x0302); /* default to second partition on first drive */
+#endif
+
+#ifdef CONFIG_SMP
+ init_smp_config();
+#endif
+
+ memset(&screen_info, 0, sizeof(screen_info));
+
+ if (!ia64_boot_param.console_info.num_rows
+ || !ia64_boot_param.console_info.num_cols)
+ {
+ printk("dig_setup: warning: invalid screen-info, guessing 80x25\n");
+ orig_x = 0;
+ orig_y = 0;
+ num_cols = 80;
+ num_rows = 25;
+ font_height = 16;
+ } else {
+ orig_x = ia64_boot_param.console_info.orig_x;
+ orig_y = ia64_boot_param.console_info.orig_y;
+ num_cols = ia64_boot_param.console_info.num_cols;
+ num_rows = ia64_boot_param.console_info.num_rows;
+ font_height = 400 / num_rows;
+ }
+
+ screen_info.orig_x = orig_x;
+ screen_info.orig_y = orig_y;
+ screen_info.orig_video_cols = num_cols;
+ screen_info.orig_video_lines = num_rows;
+ screen_info.orig_video_points = font_height;
+ screen_info.orig_video_mode = 3; /* XXX fake */
+ screen_info.orig_video_isVGA = 1; /* XXX fake */
+ screen_info.orig_video_ega_bx = 3; /* XXX fake */
+}
diff --git a/arch/ia64/hp/Makefile b/arch/ia64/hp/Makefile
new file mode 100644
index 000000000..64899f4be
--- /dev/null
+++ b/arch/ia64/hp/Makefile
@@ -0,0 +1,19 @@
+#
+# ia64/platform/hp/Makefile
+#
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+all: hp.a
+
+O_TARGET = hp.a
+O_OBJS = hpsim_console.o hpsim_irq.o hpsim_setup.o
+
+ifeq ($(CONFIG_IA64_GENERIC),y)
+O_OBJS += hpsim_machvec.o
+endif
+
+clean::
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/hp/hpsim_console.c b/arch/ia64/hp/hpsim_console.c
new file mode 100644
index 000000000..b97116cee
--- /dev/null
+++ b/arch/ia64/hp/hpsim_console.c
@@ -0,0 +1,78 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/pal.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+
+#include "hpsim_ssc.h"
+
+static int simcons_init (struct console *, char *);
+static void simcons_write (struct console *, const char *, unsigned);
+static int simcons_wait_key (struct console *);
+static kdev_t simcons_console_device (struct console *);
+
+struct console hpsim_cons = {
+ "simcons",
+ simcons_write, /* write */
+ NULL, /* read */
+ simcons_console_device, /* device */
+ simcons_wait_key, /* wait_key */
+ NULL, /* unblank */
+ simcons_init, /* setup */
+ CON_PRINTBUFFER, /* flags */
+ -1, /* index */
+ 0, /* cflag */
+ NULL /* next */
+};
+
+static int
+simcons_init (struct console *cons, char *options)
+{
+ return 0;
+}
+
+static void
+simcons_write (struct console *cons, const char *buf, unsigned count)
+{
+ unsigned long ch;
+
+ while (count-- > 0) {
+ ch = *buf++;
+ ia64_ssc(ch, 0, 0, 0, SSC_PUTCHAR);
+ if (ch == '\n')
+ ia64_ssc('\r', 0, 0, 0, SSC_PUTCHAR);
+ }
+}
+
+static int
+simcons_wait_key (struct console *cons)
+{
+ char ch;
+
+ do {
+ ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR);
+ } while (ch == '\0');
+ return ch;
+}
+
+static kdev_t
+simcons_console_device (struct console *c)
+{
+ return MKDEV(TTY_MAJOR, 64 + c->index);
+}
diff --git a/arch/ia64/hp/hpsim_irq.c b/arch/ia64/hp/hpsim_irq.c
new file mode 100644
index 000000000..72b36d6d6
--- /dev/null
+++ b/arch/ia64/hp/hpsim_irq.c
@@ -0,0 +1,83 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/pal.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+
+
+static int
+irq_hp_sim_handle_irq (unsigned int irq, struct pt_regs *regs)
+{
+ struct irqaction *action = 0;
+ struct irq_desc *id = irq_desc + irq;
+ unsigned int status;
+ int retval;
+
+ spin_lock(&irq_controller_lock);
+ {
+ status = id->status;
+ if ((status & IRQ_INPROGRESS) == 0 && (status & IRQ_ENABLED) != 0) {
+ action = id->action;
+ status |= IRQ_INPROGRESS;
+ }
+ id->status = status & ~(IRQ_REPLAY | IRQ_WAITING);
+ }
+ spin_unlock(&irq_controller_lock);
+
+ if (!action) {
+ if (!(id->status & IRQ_AUTODETECT))
+ printk("irq_hpsim_handle_irq: unexpected interrupt %u\n", irq);
+ return 0;
+ }
+
+ retval = invoke_irq_handlers(irq, regs, action);
+
+ spin_lock(&irq_controller_lock);
+ {
+ id->status &= ~IRQ_INPROGRESS;
+ }
+ spin_unlock(&irq_controller_lock);
+
+ return retval;
+}
+
+static void
+irq_hp_sim_noop (unsigned int irq)
+{
+}
+
+static struct hw_interrupt_type irq_type_hp_sim = {
+ "hp_sim",
+ (void (*)(unsigned long)) irq_hp_sim_noop, /* init */
+ irq_hp_sim_noop, /* startup */
+ irq_hp_sim_noop, /* shutdown */
+ irq_hp_sim_handle_irq, /* handle */
+ irq_hp_sim_noop, /* enable */
+ irq_hp_sim_noop, /* disable */
+};
+
+void
+hpsim_irq_init (struct irq_desc desc[NR_IRQS])
+{
+ int i;
+
+ for (i = IA64_MIN_VECTORED_IRQ; i <= IA64_MAX_VECTORED_IRQ; ++i) {
+ irq_desc[i].handler = &irq_type_hp_sim;
+ }
+}
diff --git a/arch/ia64/hp/hpsim_machvec.c b/arch/ia64/hp/hpsim_machvec.c
new file mode 100644
index 000000000..7d78f4961
--- /dev/null
+++ b/arch/ia64/hp/hpsim_machvec.c
@@ -0,0 +1,4 @@
+#include <asm/machvec_init.h>
+#include <asm/machvec_hpsim.h>
+
+MACHVEC_DEFINE(hpsim)
diff --git a/arch/ia64/hp/hpsim_setup.c b/arch/ia64/hp/hpsim_setup.c
new file mode 100644
index 000000000..dfa83e135
--- /dev/null
+++ b/arch/ia64/hp/hpsim_setup.c
@@ -0,0 +1,71 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/pal.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/sal.h>
+
+#include "hpsim_ssc.h"
+
+extern struct console hpsim_cons;
+
+/*
+ * Simulator system call.
+ */
+inline long
+ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr)
+{
+#ifdef __GCC_DOESNT_KNOW_IN_REGS__
+ register long in0 asm ("r32") = arg0;
+ register long in1 asm ("r33") = arg1;
+ register long in2 asm ("r34") = arg2;
+ register long in3 asm ("r35") = arg3;
+#else
+ register long in0 asm ("in0") = arg0;
+ register long in1 asm ("in1") = arg1;
+ register long in2 asm ("in2") = arg2;
+ register long in3 asm ("in3") = arg3;
+#endif
+ register long r8 asm ("r8");
+ register long r15 asm ("r15") = nr;
+
+ asm volatile ("break 0x80001"
+ : "=r"(r8)
+ : "r"(r15), "r"(in0), "r"(in1), "r"(in2), "r"(in3));
+ return r8;
+}
+
+void
+ia64_ssc_connect_irq (long intr, long irq)
+{
+ ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT);
+}
+
+void
+ia64_ctl_trace (long on)
+{
+ ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE);
+}
+
+void __init
+hpsim_setup (char **cmdline_p)
+{
+ ROOT_DEV = to_kdev_t(0x0801); /* default to first SCSI drive */
+
+ register_console (&hpsim_cons);
+}
diff --git a/arch/ia64/hp/hpsim_ssc.h b/arch/ia64/hp/hpsim_ssc.h
new file mode 100644
index 000000000..bfa390627
--- /dev/null
+++ b/arch/ia64/hp/hpsim_ssc.h
@@ -0,0 +1,36 @@
+/*
+ * Platform dependent support for HP simulator.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
+ */
+#ifndef _IA64_PLATFORM_HPSIM_SSC_H
+#define _IA64_PLATFORM_HPSIM_SSC_H
+
+/* Simulator system calls: */
+
+#define SSC_CONSOLE_INIT 20
+#define SSC_GETCHAR 21
+#define SSC_PUTCHAR 31
+#define SSC_CONNECT_INTERRUPT 58
+#define SSC_GENERATE_INTERRUPT 59
+#define SSC_SET_PERIODIC_INTERRUPT 60
+#define SSC_GET_RTC 65
+#define SSC_EXIT 66
+#define SSC_LOAD_SYMBOLS 69
+#define SSC_GET_TOD 74
+#define SSC_CTL_TRACE 76
+
+#define SSC_NETDEV_PROBE 100
+#define SSC_NETDEV_SEND 101
+#define SSC_NETDEV_RECV 102
+#define SSC_NETDEV_ATTACH 103
+#define SSC_NETDEV_DETACH 104
+
+/*
+ * Simulator system call.
+ */
+extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+#endif /* _IA64_PLATFORM_HPSIM_SSC_H */
diff --git a/arch/ia64/ia32/Makefile b/arch/ia64/ia32/Makefile
new file mode 100644
index 000000000..674a6eb6e
--- /dev/null
+++ b/arch/ia64/ia32/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for the ia32 kernel emulation subsystem.
+#
+
+.S.s:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $<
+.S.o:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $<
+
+all: ia32.o
+
+O_TARGET := ia32.o
+O_OBJS := ia32_entry.o ia32_signal.o sys_ia32.o ia32_support.o binfmt_elf32.o
+
+clean::
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
new file mode 100644
index 000000000..685d85b20
--- /dev/null
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -0,0 +1,180 @@
+/*
+ * IA-32 ELF support.
+ *
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ */
+#include <linux/config.h>
+#include <linux/posix_types.h>
+
+#include <asm/signal.h>
+#include <asm/ia32.h>
+
+#define CONFIG_BINFMT_ELF32
+
+/* Override some function names */
+#undef start_thread
+#define start_thread ia32_start_thread
+#define init_elf_binfmt init_elf32_binfmt
+
+#undef CONFIG_BINFMT_ELF
+#ifdef CONFIG_BINFMT_ELF32
+# define CONFIG_BINFMT_ELF CONFIG_BINFMT_ELF32
+#endif
+
+#undef CONFIG_BINFMT_ELF_MODULE
+#ifdef CONFIG_BINFMT_ELF32_MODULE
+# define CONFIG_BINFMT_ELF_MODULE CONFIG_BINFMT_ELF32_MODULE
+#endif
+
+void ia64_elf32_init(struct pt_regs *regs);
+#define ELF_PLAT_INIT(_r) ia64_elf32_init(_r)
+
+#define setup_arg_pages(bprm) ia32_setup_arg_pages(bprm)
+
+/* Ugly but avoids duplication */
+#include "../../../fs/binfmt_elf.c"
+
+/* Global descriptor table */
+unsigned long *ia32_gdt_table, *ia32_tss;
+
+struct page *
+put_shared_page(struct task_struct * tsk, struct page *page, unsigned long address)
+{
+ pgd_t * pgd;
+ pmd_t * pmd;
+ pte_t * pte;
+
+ if (page_count(page) != 1)
+ printk("mem_map disagrees with %p at %08lx\n", page, address);
+ pgd = pgd_offset(tsk->mm, address);
+ pmd = pmd_alloc(pgd, address);
+ if (!pmd) {
+ __free_page(page);
+ oom(tsk);
+ return 0;
+ }
+ pte = pte_alloc(pmd, address);
+ if (!pte) {
+ __free_page(page);
+ oom(tsk);
+ return 0;
+ }
+ if (!pte_none(*pte)) {
+ pte_ERROR(*pte);
+ __free_page(page);
+ return 0;
+ }
+ flush_page_to_ram(page);
+ set_pte(pte, pte_mkwrite(mk_pte(page, PAGE_SHARED)));
+ /* no need for flush_tlb */
+ return page;
+}
+
+void ia64_elf32_init(struct pt_regs *regs)
+{
+ int nr;
+
+ put_shared_page(current, mem_map + MAP_NR(ia32_gdt_table), IA32_PAGE_OFFSET);
+ if (PAGE_SHIFT <= IA32_PAGE_SHIFT)
+ put_shared_page(current, mem_map + MAP_NR(ia32_tss), IA32_PAGE_OFFSET + PAGE_SIZE);
+
+ nr = smp_processor_id();
+
+ /* Do all the IA-32 setup here */
+
+ /* CS descriptor */
+ __asm__("mov ar.csd = %0" : /* no outputs */
+ : "r" IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0xBL, 1L,
+ 3L, 1L, 1L, 1L));
+ /* SS descriptor */
+ __asm__("mov ar.ssd = %0" : /* no outputs */
+ : "r" IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L,
+ 3L, 1L, 1L, 1L));
+ /* EFLAGS */
+ __asm__("mov ar.eflag = %0" : /* no outputs */ : "r" (IA32_EFLAG));
+
+ /* Control registers */
+ __asm__("mov ar.cflg = %0"
+ : /* no outputs */
+ : "r" (((ulong) IA32_CR4 << 32) | IA32_CR0));
+ __asm__("mov ar.fsr = %0"
+ : /* no outputs */
+ : "r" ((ulong)IA32_FSR_DEFAULT));
+ __asm__("mov ar.fcr = %0"
+ : /* no outputs */
+ : "r" ((ulong)IA32_FCR_DEFAULT));
+ __asm__("mov ar.fir = r0");
+ __asm__("mov ar.fdr = r0");
+ /* TSS */
+ __asm__("mov ar.k1 = %0"
+ : /* no outputs */
+ : "r" IA64_SEG_DESCRIPTOR(IA32_PAGE_OFFSET + PAGE_SIZE,
+ 0x1FFFL, 0xBL, 1L,
+ 3L, 1L, 1L, 1L));
+
+ /* Get the segment selectors right */
+ regs->r16 = (__USER_DS << 16) | (__USER_DS); /* ES == DS, GS, FS are zero */
+ regs->r17 = (_TSS(nr) << 48) | (_LDT(nr) << 32)
+ | (__USER_DS << 16) | __USER_CS;
+
+ /* Setup other segment descriptors - ESD, DSD, FSD, GSD */
+ regs->r24 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L);
+ regs->r27 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L);
+ regs->r28 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L);
+ regs->r29 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L);
+
+ /* Setup the LDT and GDT */
+ regs->r30 = ia32_gdt_table[_LDT(nr)];
+ regs->r31 = IA64_SEG_DESCRIPTOR(0xc0000000L, 0x400L, 0x3L, 1L, 3L,
+ 1L, 1L, 1L);
+
+ /* Clear psr.ac */
+ regs->cr_ipsr &= ~IA64_PSR_AC;
+
+ regs->loadrs = 0;
+}
+
+#undef STACK_TOP
+#define STACK_TOP ((IA32_PAGE_OFFSET/3) * 2)
+
+int ia32_setup_arg_pages(struct linux_binprm *bprm)
+{
+ unsigned long stack_base;
+ struct vm_area_struct *mpnt;
+ int i;
+
+ stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
+
+ bprm->p += stack_base;
+ if (bprm->loader)
+ bprm->loader += stack_base;
+ bprm->exec += stack_base;
+
+ mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (!mpnt)
+ return -ENOMEM;
+
+ {
+ mpnt->vm_mm = current->mm;
+ mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+ mpnt->vm_end = STACK_TOP;
+ mpnt->vm_page_prot = PAGE_COPY;
+ mpnt->vm_flags = VM_STACK_FLAGS;
+ mpnt->vm_ops = NULL;
+ mpnt->vm_pgoff = 0;
+ mpnt->vm_file = NULL;
+ mpnt->vm_private_data = 0;
+ insert_vm_struct(current->mm, mpnt);
+ current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ }
+
+ for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
+ if (bprm->page[i]) {
+ current->mm->rss++;
+ put_dirty_page(current,bprm->page[i],stack_base);
+ }
+ stack_base += PAGE_SIZE;
+ }
+
+ return 0;
+}
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
new file mode 100644
index 000000000..1342e64f0
--- /dev/null
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -0,0 +1,255 @@
+#include <asm/offsets.h>
+#include <asm/signal.h>
+
+ .global ia32_ret_from_syscall
+ .proc ia64_ret_from_syscall
+ia32_ret_from_syscall:
+ cmp.ge p6,p7=r8,r0 // syscall executed successfully?
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
+ ;;
+ st8 [r2]=r8 // store return value in slot for r8
+ br.cond.sptk.few ia64_leave_kernel
+
+ //
+ // Invoke a system call, but do some tracing before and after the call.
+ // We MUST preserve the current register frame throughout this routine
+ // because some system calls (such as ia64_execve) directly
+ // manipulate ar.pfs.
+ //
+ // Input:
+ // r15 = syscall number
+ // b6 = syscall entry point
+ //
+ .global ia32_trace_syscall
+ .proc ia32_trace_syscall
+ia32_trace_syscall:
+ br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch syscall args
+.Lret4: br.call.sptk.few rp=b6 // do the syscall
+.Lret5: cmp.lt p6,p0=r8,r0 // syscall failed?
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
+ ;;
+ st8.spill [r2]=r8 // store return value in slot for r8
+ br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch return value
+.Lret6: br.cond.sptk.many ia64_leave_kernel // rp MUST be != ia64_leave_kernel!
+
+ .endp ia32_trace_syscall
+
+ .align 16
+ .global sys32_fork
+ .proc sys32_fork
+sys32_fork:
+ alloc r16=ar.pfs,2,2,3,0;;
+ movl r28=1f
+ mov loc1=rp
+ br.cond.sptk.many save_switch_stack
+1:
+ mov loc0=r16 // save ar.pfs across do_fork
+ adds out2=IA64_SWITCH_STACK_SIZE+16,sp
+ adds r2=IA64_SWITCH_STACK_SIZE+IA64_PT_REGS_R12_OFFSET+16,sp
+ mov out0=SIGCHLD // out0 = clone_flags
+ ;;
+ ld8 out1=[r2] // fetch usp from pt_regs.r12
+ br.call.sptk.few rp=do_fork
+.ret1:
+ mov ar.pfs=loc0
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
+ mov rp=loc1
+ ;;
+ br.ret.sptk.many rp
+ .endp sys32_fork
+
+ .rodata
+ .align 8
+ .globl ia32_syscall_table
+ia32_syscall_table:
+ data8 sys_ni_syscall /* 0 - old "setup(" system call*/
+ data8 sys_exit
+ data8 sys32_fork
+ data8 sys_read
+ data8 sys_write
+ data8 sys_open /* 5 */
+ data8 sys_close
+ data8 sys32_waitpid
+ data8 sys_creat
+ data8 sys_link
+ data8 sys_unlink /* 10 */
+ data8 sys32_execve
+ data8 sys_chdir
+ data8 sys_ni_syscall /* sys_time is not supported on ia64 */
+ data8 sys_mknod
+ data8 sys_chmod /* 15 */
+ data8 sys_lchown
+ data8 sys_ni_syscall /* old break syscall holder */
+ data8 sys_ni_syscall
+ data8 sys_lseek
+ data8 sys_getpid /* 20 */
+ data8 sys_mount
+ data8 sys_oldumount
+ data8 sys_setuid
+ data8 sys_getuid
+ data8 sys_ni_syscall /* sys_stime is not supported on IA64 */ /* 25 */
+ data8 sys_ptrace
+ data8 sys32_alarm
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall /* 30 */
+ data8 sys_ni_syscall /* old stty syscall holder */
+ data8 sys_ni_syscall /* old gtty syscall holder */
+ data8 sys_access
+ data8 sys_nice
+ data8 sys_ni_syscall /* 35 */ /* old ftime syscall holder */
+ data8 sys_sync
+ data8 sys_kill
+ data8 sys_rename
+ data8 sys_mkdir
+ data8 sys_rmdir /* 40 */
+ data8 sys_dup
+ data8 sys32_pipe
+ data8 sys_times
+ data8 sys_ni_syscall /* old prof syscall holder */
+ data8 sys_brk /* 45 */
+ data8 sys_setgid
+ data8 sys_getgid
+ data8 sys_ni_syscall
+ data8 sys_geteuid
+ data8 sys_getegid /* 50 */
+ data8 sys_acct
+ data8 sys_umount /* recycled never used phys( */
+ data8 sys_ni_syscall /* old lock syscall holder */
+ data8 sys_ioctl
+ data8 sys_fcntl /* 55 */
+ data8 sys_ni_syscall /* old mpx syscall holder */
+ data8 sys_setpgid
+ data8 sys_ni_syscall /* old ulimit syscall holder */
+ data8 sys_ni_syscall
+ data8 sys_umask /* 60 */
+ data8 sys_chroot
+ data8 sys_ustat
+ data8 sys_dup2
+ data8 sys_getppid
+ data8 sys_getpgrp /* 65 */
+ data8 sys_setsid
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_setreuid /* 70 */
+ data8 sys_setregid
+ data8 sys_ni_syscall
+ data8 sys_sigpending
+ data8 sys_sethostname
+ data8 sys32_setrlimit /* 75 */
+ data8 sys32_getrlimit
+ data8 sys_getrusage
+ data8 sys32_gettimeofday
+ data8 sys32_settimeofday
+ data8 sys_getgroups /* 80 */
+ data8 sys_setgroups
+ data8 sys_ni_syscall
+ data8 sys_symlink
+ data8 sys_ni_syscall
+ data8 sys_readlink /* 85 */
+ data8 sys_uselib
+ data8 sys_swapon
+ data8 sys_reboot
+ data8 sys32_readdir
+ data8 sys32_mmap /* 90 */
+ data8 sys_munmap
+ data8 sys_truncate
+ data8 sys_ftruncate
+ data8 sys_fchmod
+ data8 sys_fchown /* 95 */
+ data8 sys_getpriority
+ data8 sys_setpriority
+ data8 sys_ni_syscall /* old profil syscall holder */
+ data8 sys32_statfs
+ data8 sys32_fstatfs /* 100 */
+ data8 sys_ioperm
+ data8 sys32_socketcall
+ data8 sys_syslog
+ data8 sys32_setitimer
+ data8 sys32_getitimer /* 105 */
+ data8 sys32_newstat
+ data8 sys32_newlstat
+ data8 sys32_newfstat
+ data8 sys_ni_syscall
+ data8 sys_iopl /* 110 */
+ data8 sys_vhangup
+ data8 sys_ni_syscall // used to be sys_idle
+ data8 sys_ni_syscall
+ data8 sys32_wait4
+ data8 sys_swapoff /* 115 */
+ data8 sys_sysinfo
+ data8 sys32_ipc
+ data8 sys_fsync
+ data8 sys32_sigreturn
+ data8 sys_clone /* 120 */
+ data8 sys_setdomainname
+ data8 sys_newuname
+ data8 sys_modify_ldt
+ data8 sys_adjtimex
+ data8 sys32_mprotect /* 125 */
+ data8 sys_sigprocmask
+ data8 sys_create_module
+ data8 sys_init_module
+ data8 sys_delete_module
+ data8 sys_get_kernel_syms /* 130 */
+ data8 sys_quotactl
+ data8 sys_getpgid
+ data8 sys_fchdir
+ data8 sys_bdflush
+ data8 sys_sysfs /* 135 */
+ data8 sys_personality
+ data8 sys_ni_syscall /* for afs_syscall */
+ data8 sys_setfsuid
+ data8 sys_setfsgid
+ data8 sys_llseek /* 140 */
+ data8 sys32_getdents
+ data8 sys32_select
+ data8 sys_flock
+ data8 sys_msync
+ data8 sys32_readv /* 145 */
+ data8 sys32_writev
+ data8 sys_getsid
+ data8 sys_fdatasync
+ data8 sys_sysctl
+ data8 sys_mlock /* 150 */
+ data8 sys_munlock
+ data8 sys_mlockall
+ data8 sys_munlockall
+ data8 sys_sched_setparam
+ data8 sys_sched_getparam /* 155 */
+ data8 sys_sched_setscheduler
+ data8 sys_sched_getscheduler
+ data8 sys_sched_yield
+ data8 sys_sched_get_priority_max
+ data8 sys_sched_get_priority_min /* 160 */
+ data8 sys_sched_rr_get_interval
+ data8 sys32_nanosleep
+ data8 sys_mremap
+ data8 sys_setresuid
+ data8 sys_getresuid /* 165 */
+ data8 sys_vm86
+ data8 sys_query_module
+ data8 sys_poll
+ data8 sys_nfsservctl
+ data8 sys_setresgid /* 170 */
+ data8 sys_getresgid
+ data8 sys_prctl
+ data8 sys32_rt_sigreturn
+ data8 sys32_rt_sigaction
+ data8 sys32_rt_sigprocmask /* 175 */
+ data8 sys_rt_sigpending
+ data8 sys_rt_sigtimedwait
+ data8 sys_rt_sigqueueinfo
+ data8 sys_rt_sigsuspend
+ data8 sys_pread /* 180 */
+ data8 sys_pwrite
+ data8 sys_chown
+ data8 sys_getcwd
+ data8 sys_capget
+ data8 sys_capset /* 185 */
+ data8 sys_sigaltstack
+ data8 sys_sendfile
+ data8 sys_ni_syscall /* streams1 */
+ data8 sys_ni_syscall /* streams2 */
+ data8 sys32_vfork /* 190 */
diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
new file mode 100644
index 000000000..ed443ee66
--- /dev/null
+++ b/arch/ia64/ia32/ia32_signal.c
@@ -0,0 +1,412 @@
+/*
+ * IA32 Architecture-specific signal handling support.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 2000 VA Linux Co
+ * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
+ *
+ * Derived from i386 and Alpha versions.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/sigcontext.h>
+#include <asm/segment.h>
+#include <asm/ia32.h>
+
+#define DEBUG_SIG 0
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+
+struct sigframe_ia32
+{
+ int pretcode;
+ int sig;
+ struct sigcontext_ia32 sc;
+ struct _fpstate_ia32 fpstate;
+ unsigned int extramask[_IA32_NSIG_WORDS-1];
+ char retcode[8];
+};
+
+struct rt_sigframe_ia32
+{
+ int pretcode;
+ int sig;
+ int pinfo;
+ int puc;
+ struct siginfo info;
+ struct ucontext_ia32 uc;
+ struct _fpstate_ia32 fpstate;
+ char retcode[8];
+};
+
+static int
+setup_sigcontext_ia32(struct sigcontext_ia32 *sc, struct _fpstate_ia32 *fpstate,
+ struct pt_regs *regs, unsigned long mask)
+{
+ int err = 0;
+
+ err |= __put_user((regs->r16 >> 32) & 0xffff , (unsigned int *)&sc->fs);
+ err |= __put_user((regs->r16 >> 48) & 0xffff , (unsigned int *)&sc->gs);
+
+ err |= __put_user((regs->r16 >> 56) & 0xffff, (unsigned int *)&sc->es);
+ err |= __put_user(regs->r16 & 0xffff, (unsigned int *)&sc->ds);
+ err |= __put_user(regs->r15, &sc->edi);
+ err |= __put_user(regs->r14, &sc->esi);
+ err |= __put_user(regs->r13, &sc->ebp);
+ err |= __put_user(regs->r12, &sc->esp);
+ err |= __put_user(regs->r11, &sc->ebx);
+ err |= __put_user(regs->r10, &sc->edx);
+ err |= __put_user(regs->r9, &sc->ecx);
+ err |= __put_user(regs->r8, &sc->eax);
+#if 0
+ err |= __put_user(current->tss.trap_no, &sc->trapno);
+ err |= __put_user(current->tss.error_code, &sc->err);
+#endif
+ err |= __put_user(regs->cr_iip, &sc->eip);
+ err |= __put_user(regs->r17 & 0xffff, (unsigned int *)&sc->cs);
+#if 0
+ err |= __put_user(regs->eflags, &sc->eflags);
+#endif
+
+ err |= __put_user(regs->r12, &sc->esp_at_signal);
+ err |= __put_user((regs->r17 >> 16) & 0xffff, (unsigned int *)&sc->ss);
+
+#if 0
+ tmp = save_i387(fpstate);
+ if (tmp < 0)
+ err = 1;
+ else
+ err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+
+ /* non-iBCS2 extensions.. */
+ err |= __put_user(mask, &sc->oldmask);
+ err |= __put_user(current->tss.cr2, &sc->cr2);
+#endif
+
+ return err;
+}
+
+static int
+restore_sigcontext_ia32(struct pt_regs *regs, struct sigcontext_ia32 *sc, int *peax)
+{
+ unsigned int err = 0;
+
+#define COPY(ia64x, ia32x) err |= __get_user(regs->ia64x, &sc->ia32x)
+
+#define copyseg_gs(tmp) (regs->r16 |= (unsigned long) tmp << 48)
+#define copyseg_fs(tmp) (regs->r16 |= (unsigned long) tmp << 32)
+#define copyseg_cs(tmp) (regs->r17 |= tmp)
+#define copyseg_ss(tmp) (regs->r17 |= (unsigned long) tmp << 16)
+#define copyseg_es(tmp) (regs->r16 |= (unsigned long) tmp << 16)
+#define copyseg_ds(tmp) (regs->r16 |= tmp)
+
+#define COPY_SEG(seg) \
+ { unsigned short tmp; \
+ err |= __get_user(tmp, &sc->seg); \
+ copyseg_##seg(tmp); }
+
+#define COPY_SEG_STRICT(seg) \
+ { unsigned short tmp; \
+ err |= __get_user(tmp, &sc->seg); \
+ copyseg_##seg(tmp|3); }
+
+ /* To make COPY_SEGs easier, we zero r16, r17 */
+ regs->r16 = 0;
+ regs->r17 = 0;
+
+ COPY_SEG(gs);
+ COPY_SEG(fs);
+ COPY_SEG(es);
+ COPY_SEG(ds);
+ COPY(r15, edi);
+ COPY(r14, esi);
+ COPY(r13, ebp);
+ COPY(r12, esp);
+ COPY(r11, ebx);
+ COPY(r10, edx);
+ COPY(r9, ecx);
+ COPY(cr_iip, eip);
+ COPY_SEG_STRICT(cs);
+ COPY_SEG_STRICT(ss);
+#if 0
+ {
+ unsigned int tmpflags;
+ err |= __get_user(tmpflags, &sc->eflags);
+ /* XXX: Change this to ar.eflags */
+ regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+ regs->orig_eax = -1; /* disable syscall checks */
+ }
+
+ {
+ struct _fpstate * buf;
+ err |= __get_user(buf, &sc->fpstate);
+ if (buf) {
+ if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+ goto badframe;
+ err |= restore_i387(buf);
+ }
+ }
+#endif
+
+ err |= __get_user(*peax, &sc->eax);
+ return err;
+
+#if 0
+badframe:
+ return 1;
+#endif
+
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+ unsigned long esp;
+ unsigned int xss;
+
+ /* Default to using normal stack */
+ esp = regs->r12;
+ xss = regs->r16 >> 16;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (! on_sig_stack(esp))
+ esp = current->sas_ss_sp + current->sas_ss_size;
+ }
+ /* Legacy stack switching not supported */
+
+ return (void *)((esp - frame_size) & -8ul);
+}
+
+static void
+setup_frame_ia32(int sig, struct k_sigaction *ka, sigset_t *set,
+ struct pt_regs * regs)
+{
+ struct sigframe_ia32 *frame;
+ int err = 0;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ err |= __put_user((current->exec_domain
+ && current->exec_domain->signal_invmap
+ && sig < 32
+ ? (int)(current->exec_domain->signal_invmap[sig])
+ : sig),
+ &frame->sig);
+
+ err |= setup_sigcontext_ia32(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+
+ if (_NSIG_WORDS > 1) {
+ err |= __copy_to_user(frame->extramask, &set->sig[1],
+ sizeof(frame->extramask));
+ }
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ err |= __put_user(frame->retcode, &frame->pretcode);
+ /* This is popl %eax ; movl $,%eax ; int $0x80 */
+ err |= __put_user(0xb858, (short *)(frame->retcode+0));
+#define __IA32_NR_sigreturn 119
+ err |= __put_user(__IA32_NR_sigreturn & 0xffff, (short *)(frame->retcode+2));
+ err |= __put_user(__IA32_NR_sigreturn >> 16, (short *)(frame->retcode+4));
+ err |= __put_user(0x80cd, (short *)(frame->retcode+6));
+
+ if (err)
+ goto give_sigsegv;
+
+ /* Set up registers for signal handler */
+ regs->r12 = (unsigned long) frame;
+ regs->cr_iip = (unsigned long) ka->sa.sa_handler;
+
+ set_fs(USER_DS);
+ regs->r16 = (__USER_DS << 16) | (__USER_DS); /* ES == DS, GS, FS are zero */
+ regs->r17 = (__USER_DS << 16) | __USER_CS;
+
+#if 0
+ regs->eflags &= ~TF_MASK;
+#endif
+
+#if 1
+ printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%x\n",
+ current->comm, current->pid, frame, regs->cr_iip, frame->pretcode);
+#endif
+
+ return;
+
+give_sigsegv:
+ if (sig == SIGSEGV)
+ ka->sa.sa_handler = SIG_DFL;
+ force_sig(SIGSEGV, current);
+}
+
+static void
+setup_rt_frame_ia32(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs * regs)
+{
+ struct rt_sigframe_ia32 *frame;
+ int err = 0;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ err |= __put_user((current->exec_domain
+ && current->exec_domain->signal_invmap
+ && sig < 32
+ ? current->exec_domain->signal_invmap[sig]
+ : sig),
+ &frame->sig);
+ err |= __put_user(&frame->info, &frame->pinfo);
+ err |= __put_user(&frame->uc, &frame->puc);
+ err |= __copy_to_user(&frame->info, info, sizeof(*info));
+
+ /* Create the ucontext. */
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(0, &frame->uc.uc_link);
+ err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ err |= __put_user(sas_ss_flags(regs->r12),
+ &frame->uc.uc_stack.ss_flags);
+ err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= setup_sigcontext_ia32(&frame->uc.uc_mcontext, &frame->fpstate,
+ regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+ err |= __put_user(frame->retcode, &frame->pretcode);
+ /* This is movl $,%eax ; int $0x80 */
+ err |= __put_user(0xb8, (char *)(frame->retcode+0));
+#define __IA32_NR_rt_sigreturn 173
+ err |= __put_user(__IA32_NR_rt_sigreturn, (int *)(frame->retcode+1));
+ err |= __put_user(0x80cd, (short *)(frame->retcode+5));
+
+ if (err)
+ goto give_sigsegv;
+
+ /* Set up registers for signal handler */
+ regs->r12 = (unsigned long) frame;
+ regs->cr_iip = (unsigned long) ka->sa.sa_handler;
+
+ set_fs(USER_DS);
+
+ regs->r16 = (__USER_DS << 16) | (__USER_DS); /* ES == DS, GS, FS are zero */
+ regs->r17 = (__USER_DS << 16) | __USER_CS;
+
+#if 0
+ regs->eflags &= ~TF_MASK;
+#endif
+
+#if 1
+ printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%x\n",
+ current->comm, current->pid, frame, regs->cr_iip, frame->pretcode);
+#endif
+
+ return;
+
+give_sigsegv:
+ if (sig == SIGSEGV)
+ ka->sa.sa_handler = SIG_DFL;
+ force_sig(SIGSEGV, current);
+}
+
+long
+ia32_setup_frame1 (int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ /* Set up the stack frame */
+ if (ka->sa.sa_flags & SA_SIGINFO)
+ setup_rt_frame_ia32(sig, ka, info, set, regs);
+ else
+ setup_frame_ia32(sig, ka, set, regs);
+
+}
+
+asmlinkage int
+sys32_sigreturn(int arg1, int arg2, int arg3, int arg4, int arg5, unsigned long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ struct sigframe_ia32 *frame = (struct sigframe_ia32 *)(regs->r12- 8);
+ sigset_t set;
+ int eax;
+
+ if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+
+ if (__get_user(set.sig[0], &frame->sc.oldmask)
+ || (_IA32_NSIG_WORDS > 1
+ && __copy_from_user((((char *) &set.sig) + 4),
+ &frame->extramask,
+ sizeof(frame->extramask))))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sigmask_lock);
+ current->blocked = (sigset_t) set;
+ recalc_sigpending(current);
+ spin_unlock_irq(&current->sigmask_lock);
+
+ if (restore_sigcontext_ia32(regs, &frame->sc, &eax))
+ goto badframe;
+ return eax;
+
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+asmlinkage int
+sys32_rt_sigreturn(int arg1, int arg2, int arg3, int arg4, int arg5, unsigned long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ struct rt_sigframe_ia32 *frame = (struct rt_sigframe_ia32 *)(regs->r12 - 4);
+ sigset_t set;
+ stack_t st;
+ int eax;
+
+ if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sigmask_lock);
+ current->blocked = set;
+ recalc_sigpending(current);
+ spin_unlock_irq(&current->sigmask_lock);
+
+ if (restore_sigcontext_ia32(regs, &frame->uc.uc_mcontext, &eax))
+ goto badframe;
+
+ if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
+ goto badframe;
+ /* It is more difficult to avoid calling this function than to
+ call it and ignore errors. */
+ do_sigaltstack(&st, NULL, regs->r12);
+
+ return eax;
+
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c
new file mode 100644
index 000000000..dcf61e8e4
--- /dev/null
+++ b/arch/ia64/ia32/ia32_support.c
@@ -0,0 +1,61 @@
+/*
+ * IA32 helper functions
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/ia32.h>
+
+extern unsigned long *ia32_gdt_table, *ia32_tss;
+
+extern void die_if_kernel (char *str, struct pt_regs *regs, long err);
+
+/*
+ * Setup IA32 GDT and TSS
+ */
+void
+ia32_gdt_init(void)
+{
+ unsigned long gdt_and_tss_page;
+
+ /* allocate two IA-32 pages of memory: */
+ gdt_and_tss_page = __get_free_pages(GFP_KERNEL,
+ (IA32_PAGE_SHIFT < PAGE_SHIFT)
+ ? 0 : (IA32_PAGE_SHIFT + 1) - PAGE_SHIFT);
+ ia32_gdt_table = (unsigned long *) gdt_and_tss_page;
+ ia32_tss = (unsigned long *) (gdt_and_tss_page + IA32_PAGE_SIZE);
+
+ /* Zero the gdt and tss */
+ memset((void *) gdt_and_tss_page, 0, 2*IA32_PAGE_SIZE);
+
+ /* CS descriptor in IA-32 format */
+ ia32_gdt_table[4] = IA32_SEG_DESCRIPTOR(0L, 0xBFFFFFFFL, 0xBL, 1L,
+ 3L, 1L, 1L, 1L, 1L);
+
+ /* DS descriptor in IA-32 format */
+ ia32_gdt_table[5] = IA32_SEG_DESCRIPTOR(0L, 0xBFFFFFFFL, 0x3L, 1L,
+ 3L, 1L, 1L, 1L, 1L);
+}
+
+/*
+ * Handle bad IA32 interrupt via syscall
+ */
+void
+ia32_bad_interrupt (unsigned long int_num, struct pt_regs *regs)
+{
+ siginfo_t siginfo;
+
+ die_if_kernel("Bad IA-32 interrupt", regs, int_num);
+
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_errno = int_num; /* XXX is it legal to abuse si_errno like this? */
+ siginfo.si_code = TRAP_BRKPT;
+ force_sig_info(SIGTRAP, &siginfo, current);
+}
+
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
new file mode 100644
index 000000000..7b4c4995e
--- /dev/null
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -0,0 +1,4309 @@
+/*
+ * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
+ * sys_sparc32
+ *
+ * Copyright (C) 2000 VA Linux Co
+ * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * environment.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/signal.h>
+#include <linux/utime.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/utsname.h>
+#include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/malloc.h>
+#include <linux/uio.h>
+#include <linux/nfs_fs.h>
+#include <linux/smb_fs.h>
+#include <linux/smb_mount.h>
+#include <linux/ncp_fs.h>
+#include <linux/quota.h>
+#include <linux/module.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/syscall.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/ipc.h>
+
+#include <asm/types.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+#include <asm/ipc.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+#include <asm/ia32.h>
+
+#define A(__x) ((unsigned long)(__x))
+#define AA(__x) ((unsigned long)(__x))
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage int sys32_vfork(
+int dummy0,
+int dummy1,
+int dummy2,
+int dummy3,
+int dummy4,
+int dummy5,
+int dummy6,
+int dummy7,
+int stack)
+{
+ struct pt_regs *regs = (struct pt_regs *)&stack;
+
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->r12, regs);
+}
+
+static int
+nargs(unsigned int arg, char **ap)
+{
+ char *ptr;
+ int n, err;
+
+ n = 0;
+ do {
+ if (err = get_user(ptr, (int *)arg))
+ return(err);
+ if (ap)
+ *ap++ = ptr;
+ arg += sizeof(unsigned int);
+ n++;
+ } while (ptr);
+ return(n - 1);
+}
+
+asmlinkage long
+sys32_execve(
+char *filename,
+unsigned int argv,
+unsigned int envp,
+int dummy3,
+int dummy4,
+int dummy5,
+int dummy6,
+int dummy7,
+int stack)
+{
+ struct pt_regs *regs = (struct pt_regs *)&stack;
+ char **av, **ae;
+ int na, ne, r, len;
+
+ na = nargs(argv, NULL);
+ ne = nargs(envp, NULL);
+ len = (na + ne + 2) * sizeof(*av);
+ /*
+ * kmalloc won't work because the `sys_exec' code will attempt
+ * to do a `get_user' on the arg list and `get_user' will fail
+ * on a kernel address (simplifies `get_user'). Instead we
+ * do an mmap to get a user address. Note that since a successful
+ * `execve' frees all current memory we only have to do an
+ * `munmap' if the `execve' failes.
+ */
+ down(&current->mm->mmap_sem);
+ lock_kernel();
+
+ av = do_mmap_pgoff(0, NULL, len,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0);
+
+ unlock_kernel();
+ up(&current->mm->mmap_sem);
+
+ if (IS_ERR(av))
+ return(av);
+ ae = av + na + 1;
+ av[na] = (char *)0;
+ ae[ne] = (char *)0;
+ (void)nargs(argv, av);
+ (void)nargs(envp, ae);
+ r = sys_execve(filename, av, ae, regs);
+ if (IS_ERR(r))
+ sys_munmap(av, len);
+ return(r);
+}
+
+static inline int
+putstat(struct stat32 *ubuf, struct stat *kbuf)
+{
+ int err;
+
+ err = put_user (kbuf->st_dev, &ubuf->st_dev);
+ err |= __put_user (kbuf->st_ino, &ubuf->st_ino);
+ err |= __put_user (kbuf->st_mode, &ubuf->st_mode);
+ err |= __put_user (kbuf->st_nlink, &ubuf->st_nlink);
+ err |= __put_user (kbuf->st_uid, &ubuf->st_uid);
+ err |= __put_user (kbuf->st_gid, &ubuf->st_gid);
+ err |= __put_user (kbuf->st_rdev, &ubuf->st_rdev);
+ err |= __put_user (kbuf->st_size, &ubuf->st_size);
+ err |= __put_user (kbuf->st_atime, &ubuf->st_atime);
+ err |= __put_user (kbuf->st_mtime, &ubuf->st_mtime);
+ err |= __put_user (kbuf->st_ctime, &ubuf->st_ctime);
+ err |= __put_user (kbuf->st_blksize, &ubuf->st_blksize);
+ err |= __put_user (kbuf->st_blocks, &ubuf->st_blocks);
+ return err;
+}
+
+extern asmlinkage int sys_newstat(char * filename, struct stat * statbuf);
+
+asmlinkage int
+sys32_newstat(char * filename, struct stat32 *statbuf)
+{
+ int ret;
+ struct stat s;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_newstat(filename, &s);
+ set_fs (old_fs);
+ if (putstat (statbuf, &s))
+ return -EFAULT;
+ return ret;
+}
+
+extern asmlinkage int sys_newlstat(char * filename, struct stat * statbuf);
+
+asmlinkage int
+sys32_newlstat(char * filename, struct stat32 *statbuf)
+{
+ int ret;
+ struct stat s;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_newlstat(filename, &s);
+ set_fs (old_fs);
+ if (putstat (statbuf, &s))
+ return -EFAULT;
+ return ret;
+}
+
+extern asmlinkage int sys_newfstat(unsigned int fd, struct stat * statbuf);
+
+asmlinkage int
+sys32_newfstat(unsigned int fd, struct stat32 *statbuf)
+{
+ int ret;
+ struct stat s;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_newfstat(fd, &s);
+ set_fs (old_fs);
+ if (putstat (statbuf, &s))
+ return -EFAULT;
+ return ret;
+}
+
+#define ALIGN4K(a) (((a) + 0xfff) & ~0xfff)
+#define OFFSET4K(a) ((a) & 0xfff)
+
+unsigned long
+do_mmap_fake(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags, unsigned long off)
+{
+ struct inode *inode;
+ void *front, *back;
+ unsigned long baddr;
+ int r;
+ char c;
+
+ if (OFFSET4K(addr) || OFFSET4K(off))
+ return -EINVAL;
+ if (prot & PROT_WRITE)
+ prot |= PROT_EXEC;
+ front = NULL;
+ back = NULL;
+ if ((baddr = (addr & PAGE_MASK)) != addr && get_user(c, (char *)baddr) == 0) {
+ front = kmalloc(addr - baddr, GFP_KERNEL);
+ memcpy(front, (void *)baddr, addr - baddr);
+ }
+ if ((addr + len) & ~PAGE_MASK && get_user(c, (char *)(addr + len)) == 0) {
+ back = kmalloc(PAGE_SIZE - ((addr + len) & ~PAGE_MASK), GFP_KERNEL);
+ memcpy(back, addr + len, PAGE_SIZE - ((addr + len) & ~PAGE_MASK));
+ }
+ if ((r = do_mmap(0, baddr, len + (addr - baddr), prot, flags | MAP_ANONYMOUS, 0)) < 0)
+ return(r);
+ if (back) {
+ memcpy(addr + len, back, PAGE_SIZE - ((addr + len) & ~PAGE_MASK));
+ kfree(back);
+ }
+ if (front) {
+ memcpy((void *)baddr, front, addr - baddr);
+ kfree(front);
+ }
+ if (flags & MAP_ANONYMOUS) {
+ memset(addr, 0, len);
+ return(addr);
+ }
+ if (!file)
+ return -EINVAL;
+ inode = file->f_dentry->d_inode;
+ if (!inode->i_op || !inode->i_op->default_file_ops)
+ return -EINVAL;
+ if (!file->f_op->read)
+ return -EINVAL;
+ if (file->f_op->llseek) {
+ if (file->f_op->llseek(file,off,0) != off)
+ return -EINVAL;
+ } else
+ file->f_pos = off;
+ r = file->f_op->read(file, (char *)addr, len, &file->f_pos);
+ return (r < 0) ? -EINVAL : addr;
+}
+
+/*
+ * Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct {
+ unsigned int addr;
+ unsigned int len;
+ unsigned int prot;
+ unsigned int flags;
+ unsigned int fd;
+ unsigned int offset;
+};
+
+asmlinkage int
+sys32_mmap(struct mmap_arg_struct *arg)
+{
+ int error = -EFAULT;
+ struct file * file = NULL;
+ struct mmap_arg_struct a;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+
+ down(&current->mm->mmap_sem);
+ lock_kernel();
+ if (!(a.flags & MAP_ANONYMOUS)) {
+ error = -EBADF;
+ file = fget(a.fd);
+ if (!file)
+ goto out;
+ }
+ a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+
+ if ((a.flags & MAP_FIXED) && ((a.addr & ~PAGE_MASK) || (a.offset & ~PAGE_MASK))) {
+ unlock_kernel();
+ up(&current->mm->mmap_sem);
+ error = do_mmap_fake(file, a.addr, a.len, a.prot, a.flags, a.offset);
+ down(&current->mm->mmap_sem);
+ lock_kernel();
+ } else
+ error = do_mmap(file, a.addr, a.len, a.prot, a.flags, a.offset);
+ if (file)
+ fput(file);
+out:
+ unlock_kernel();
+ up(&current->mm->mmap_sem);
+ return error;
+}
+
+asmlinkage long
+sys32_pipe(int *fd)
+{
+ int retval;
+ int fds[2];
+
+ lock_kernel();
+ retval = do_pipe(fds);
+ if (retval)
+ goto out;
+ if (copy_to_user(fd, fds, sizeof(fds)))
+ retval = -EFAULT;
+ out:
+ unlock_kernel();
+ return retval;
+}
+
+asmlinkage long
+sys32_mprotect(unsigned long start, size_t len, unsigned long prot)
+{
+
+ if (prot == 0)
+ return(0);
+ len += start & ~PAGE_MASK;
+ if ((start & ~PAGE_MASK) && (prot & PROT_WRITE))
+ prot |= PROT_EXEC;
+ return(sys_mprotect(start & PAGE_MASK, len & PAGE_MASK, prot));
+}
+
+asmlinkage int
+sys32_rt_sigaction(int sig, struct sigaction32 *act,
+ struct sigaction32 *oact, unsigned int sigsetsize)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+ sigset32_t set32;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(sigset32_t))
+ return -EINVAL;
+
+ if (act) {
+ ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler);
+ ret |= __copy_from_user(&set32, &act->sa_mask,
+ sizeof(sigset32_t));
+ switch (_NSIG_WORDS) {
+ case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
+ | (((long)set32.sig[7]) << 32);
+ case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
+ | (((long)set32.sig[5]) << 32);
+ case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
+ | (((long)set32.sig[3]) << 32);
+ case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
+ | (((long)set32.sig[1]) << 32);
+ }
+ ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+
+ if (ret)
+ return -EFAULT;
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ switch (_NSIG_WORDS) {
+ case 4:
+ set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
+ set32.sig[6] = old_ka.sa.sa_mask.sig[3];
+ case 3:
+ set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
+ set32.sig[4] = old_ka.sa.sa_mask.sig[2];
+ case 2:
+ set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
+ set32.sig[2] = old_ka.sa.sa_mask.sig[1];
+ case 1:
+ set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
+ set32.sig[0] = old_ka.sa.sa_mask.sig[0];
+ }
+ ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
+ ret |= __copy_to_user(&oact->sa_mask, &set32,
+ sizeof(sigset32_t));
+ ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+ }
+
+ return ret;
+}
+
+
+extern asmlinkage int sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset,
+ size_t sigsetsize);
+
+asmlinkage int
+sys32_rt_sigprocmask(int how, sigset32_t *set, sigset32_t *oset,
+ unsigned int sigsetsize)
+{
+ sigset_t s;
+ sigset32_t s32;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ if (set) {
+ if (copy_from_user (&s32, set, sizeof(sigset32_t)))
+ return -EFAULT;
+ switch (_NSIG_WORDS) {
+ case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
+ case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
+ case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
+ case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+ }
+ }
+ set_fs (KERNEL_DS);
+ ret = sys_rt_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL,
+ sigsetsize);
+ set_fs (old_fs);
+ if (ret) return ret;
+ if (oset) {
+ switch (_NSIG_WORDS) {
+ case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
+ case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
+ case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
+ case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
+ }
+ if (copy_to_user (oset, &s32, sizeof(sigset32_t)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static inline int
+put_statfs (struct statfs32 *ubuf, struct statfs *kbuf)
+{
+ int err;
+
+ err = put_user (kbuf->f_type, &ubuf->f_type);
+ err |= __put_user (kbuf->f_bsize, &ubuf->f_bsize);
+ err |= __put_user (kbuf->f_blocks, &ubuf->f_blocks);
+ err |= __put_user (kbuf->f_bfree, &ubuf->f_bfree);
+ err |= __put_user (kbuf->f_bavail, &ubuf->f_bavail);
+ err |= __put_user (kbuf->f_files, &ubuf->f_files);
+ err |= __put_user (kbuf->f_ffree, &ubuf->f_ffree);
+ err |= __put_user (kbuf->f_namelen, &ubuf->f_namelen);
+ err |= __put_user (kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]);
+ err |= __put_user (kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]);
+ return err;
+}
+
+extern asmlinkage int sys_statfs(const char * path, struct statfs * buf);
+
+asmlinkage int
+sys32_statfs(const char * path, struct statfs32 *buf)
+{
+ int ret;
+ struct statfs s;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_statfs((const char *)path, &s);
+ set_fs (old_fs);
+ if (put_statfs(buf, &s))
+ return -EFAULT;
+ return ret;
+}
+
+extern asmlinkage int sys_fstatfs(unsigned int fd, struct statfs * buf);
+
+asmlinkage int
+sys32_fstatfs(unsigned int fd, struct statfs32 *buf)
+{
+ int ret;
+ struct statfs s;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_fstatfs(fd, &s);
+ set_fs (old_fs);
+ if (put_statfs(buf, &s))
+ return -EFAULT;
+ return ret;
+}
+
+struct timeval32
+{
+ int tv_sec, tv_usec;
+};
+
+struct itimerval32
+{
+ struct timeval32 it_interval;
+ struct timeval32 it_value;
+};
+
+static inline long
+get_tv32(struct timeval *o, struct timeval32 *i)
+{
+ return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
+ (__get_user(o->tv_sec, &i->tv_sec) |
+ __get_user(o->tv_usec, &i->tv_usec)));
+ return ENOSYS;
+}
+
+static inline long
+put_tv32(struct timeval32 *o, struct timeval *i)
+{
+ return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
+ (__put_user(i->tv_sec, &o->tv_sec) |
+ __put_user(i->tv_usec, &o->tv_usec)));
+}
+
+static inline long
+get_it32(struct itimerval *o, struct itimerval32 *i)
+{
+ return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
+ (__get_user(o->it_interval.tv_sec, &i->it_interval.tv_sec) |
+ __get_user(o->it_interval.tv_usec, &i->it_interval.tv_usec) |
+ __get_user(o->it_value.tv_sec, &i->it_value.tv_sec) |
+ __get_user(o->it_value.tv_usec, &i->it_value.tv_usec)));
+ return ENOSYS;
+}
+
+static inline long
+put_it32(struct itimerval32 *o, struct itimerval *i)
+{
+ return (!access_ok(VERIFY_WRITE, i, sizeof(*i)) ||
+ (__put_user(i->it_interval.tv_sec, &o->it_interval.tv_sec) |
+ __put_user(i->it_interval.tv_usec, &o->it_interval.tv_usec) |
+ __put_user(i->it_value.tv_sec, &o->it_value.tv_sec) |
+ __put_user(i->it_value.tv_usec, &o->it_value.tv_usec)));
+ return ENOSYS;
+}
+
+extern int do_getitimer(int which, struct itimerval *value);
+
+asmlinkage int
+sys32_getitimer(int which, struct itimerval32 *it)
+{
+ struct itimerval kit;
+ int error;
+
+ error = do_getitimer(which, &kit);
+ if (!error && put_it32(it, &kit))
+ error = -EFAULT;
+
+ return error;
+}
+
+extern int do_setitimer(int which, struct itimerval *, struct itimerval *);
+
+asmlinkage int
+sys32_setitimer(int which, struct itimerval32 *in, struct itimerval32 *out)
+{
+ struct itimerval kin, kout;
+ int error;
+
+ if (in) {
+ if (get_it32(&kin, in))
+ return -EFAULT;
+ } else
+ memset(&kin, 0, sizeof(kin));
+
+ error = do_setitimer(which, &kin, out ? &kout : NULL);
+ if (error || !out)
+ return error;
+ if (put_it32(out, &kout))
+ return -EFAULT;
+
+ return 0;
+
+}
+asmlinkage unsigned long
+sys32_alarm(unsigned int seconds)
+{
+ struct itimerval it_new, it_old;
+ unsigned int oldalarm;
+
+ it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+ it_new.it_value.tv_sec = seconds;
+ it_new.it_value.tv_usec = 0;
+ do_setitimer(ITIMER_REAL, &it_new, &it_old);
+ oldalarm = it_old.it_value.tv_sec;
+ /* ehhh.. We can't return 0 if we have an alarm pending.. */
+ /* And we'd better return too much than too little anyway */
+ if (it_old.it_value.tv_usec)
+ oldalarm++;
+ return oldalarm;
+}
+
+/* Translations due to time_t size differences. Which affects all
+ sorts of things, like timeval and itimerval. */
+
+extern struct timezone sys_tz;
+extern int do_sys_settimeofday(struct timeval *tv, struct timezone *tz);
+
+asmlinkage int
+sys32_gettimeofday(struct timeval32 *tv, struct timezone *tz)
+{
+ if (tv) {
+ struct timeval ktv;
+ do_gettimeofday(&ktv);
+ if (put_tv32(tv, &ktv))
+ return -EFAULT;
+ }
+ if (tz) {
+ if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+asmlinkage int
+sys32_settimeofday(struct timeval32 *tv, struct timezone *tz)
+{
+ struct timeval ktv;
+ struct timezone ktz;
+
+ if (tv) {
+ if (get_tv32(&ktv, tv))
+ return -EFAULT;
+ }
+ if (tz) {
+ if (copy_from_user(&ktz, tz, sizeof(ktz)))
+ return -EFAULT;
+ }
+
+ return do_sys_settimeofday(tv ? &ktv : NULL, tz ? &ktz : NULL);
+}
+
+struct dirent32 {
+ unsigned int d_ino;
+ unsigned int d_off;
+ unsigned short d_reclen;
+ char d_name[NAME_MAX + 1];
+};
+
+static void
+xlate_dirent(void *dirent, long n)
+{
+ long off;
+ struct dirent *dirp;
+ struct dirent32 *dirp32;
+
+ off = 0;
+ while (off < n) {
+ dirp = (struct dirent *)(dirent + off);
+ off += dirp->d_reclen;
+ dirp32 = (struct dirent32 *)dirp;
+ dirp32->d_ino = dirp->d_ino;
+ dirp32->d_off = (unsigned int)dirp->d_off;
+ dirp32->d_reclen = dirp->d_reclen;
+ strncpy(dirp32->d_name, dirp->d_name, dirp->d_reclen - ((3 * 4) + 2));
+ }
+ return;
+}
+
+asmlinkage long
+sys32_getdents(unsigned int fd, void * dirent, unsigned int count)
+{
+ long n;
+
+ if ((n = sys_getdents(fd, dirent, count)) < 0)
+ return(n);
+ xlate_dirent(dirent, n);
+ return(n);
+}
+
+asmlinkage int
+sys32_readdir(unsigned int fd, void * dirent, unsigned int count)
+{
+ int n;
+ struct dirent *dirp;
+
+ if ((n = old_readdir(fd, dirent, count)) < 0)
+ return(n);
+ dirp = (struct dirent *)dirent;
+ xlate_dirent(dirent, dirp->d_reclen);
+ return(n);
+}
+
+/*
+ * We can actually return ERESTARTSYS instead of EINTR, but I'd
+ * like to be certain this leads to no problems. So I return
+ * EINTR just for safety.
+ *
+ * Update: ERESTARTSYS breaks at least the xview clock binary, so
+ * I'm trying ERESTARTNOHAND which restart only when you want to.
+ */
+#define MAX_SELECT_SECONDS \
+ ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
+#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
+
+asmlinkage int
+sys32_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval32 *tvp32)
+{
+ fd_set_bits fds;
+ char *bits;
+ long timeout;
+ int ret, size;
+
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ if (tvp32) {
+ time_t sec, usec;
+
+ get_user(sec, &tvp32->tv_sec);
+ get_user(usec, &tvp32->tv_usec);
+
+ ret = -EINVAL;
+ if (sec < 0 || usec < 0)
+ goto out_nofds;
+
+ if ((unsigned long) sec < MAX_SELECT_SECONDS) {
+ timeout = ROUND_UP(usec, 1000000/HZ);
+ timeout += sec * (unsigned long) HZ;
+ }
+ }
+
+ ret = -EINVAL;
+ if (n < 0)
+ goto out_nofds;
+
+ if (n > current->files->max_fdset)
+ n = current->files->max_fdset;
+
+ /*
+ * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
+ * since we used fdset we need to allocate memory in units of
+ * long-words.
+ */
+ ret = -ENOMEM;
+ size = FDS_BYTES(n);
+ bits = kmalloc(6 * size, GFP_KERNEL);
+ if (!bits)
+ goto out_nofds;
+ fds.in = (unsigned long *) bits;
+ fds.out = (unsigned long *) (bits + size);
+ fds.ex = (unsigned long *) (bits + 2*size);
+ fds.res_in = (unsigned long *) (bits + 3*size);
+ fds.res_out = (unsigned long *) (bits + 4*size);
+ fds.res_ex = (unsigned long *) (bits + 5*size);
+
+ if ((ret = get_fd_set(n, inp, fds.in)) ||
+ (ret = get_fd_set(n, outp, fds.out)) ||
+ (ret = get_fd_set(n, exp, fds.ex)))
+ goto out;
+ zero_fd_set(n, fds.res_in);
+ zero_fd_set(n, fds.res_out);
+ zero_fd_set(n, fds.res_ex);
+
+ ret = do_select(n, &fds, &timeout);
+
+ if (tvp32 && !(current->personality & STICKY_TIMEOUTS)) {
+ time_t sec = 0, usec = 0;
+ if (timeout) {
+ sec = timeout / HZ;
+ usec = timeout % HZ;
+ usec *= (1000000/HZ);
+ }
+ put_user(sec, (int *)&tvp32->tv_sec);
+ put_user(usec, (int *)&tvp32->tv_usec);
+ }
+
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ ret = -ERESTARTNOHAND;
+ if (signal_pending(current))
+ goto out;
+ ret = 0;
+ }
+
+ set_fd_set(n, inp, fds.res_in);
+ set_fd_set(n, outp, fds.res_out);
+ set_fd_set(n, exp, fds.res_ex);
+
+out:
+ kfree(bits);
+out_nofds:
+ return ret;
+}
+
+struct rusage32 {
+ struct timeval32 ru_utime;
+ struct timeval32 ru_stime;
+ int ru_maxrss;
+ int ru_ixrss;
+ int ru_idrss;
+ int ru_isrss;
+ int ru_minflt;
+ int ru_majflt;
+ int ru_nswap;
+ int ru_inblock;
+ int ru_oublock;
+ int ru_msgsnd;
+ int ru_msgrcv;
+ int ru_nsignals;
+ int ru_nvcsw;
+ int ru_nivcsw;
+};
+
+static int
+put_rusage (struct rusage32 *ru, struct rusage *r)
+{
+ int err;
+
+ err = put_user (r->ru_utime.tv_sec, &ru->ru_utime.tv_sec);
+ err |= __put_user (r->ru_utime.tv_usec, &ru->ru_utime.tv_usec);
+ err |= __put_user (r->ru_stime.tv_sec, &ru->ru_stime.tv_sec);
+ err |= __put_user (r->ru_stime.tv_usec, &ru->ru_stime.tv_usec);
+ err |= __put_user (r->ru_maxrss, &ru->ru_maxrss);
+ err |= __put_user (r->ru_ixrss, &ru->ru_ixrss);
+ err |= __put_user (r->ru_idrss, &ru->ru_idrss);
+ err |= __put_user (r->ru_isrss, &ru->ru_isrss);
+ err |= __put_user (r->ru_minflt, &ru->ru_minflt);
+ err |= __put_user (r->ru_majflt, &ru->ru_majflt);
+ err |= __put_user (r->ru_nswap, &ru->ru_nswap);
+ err |= __put_user (r->ru_inblock, &ru->ru_inblock);
+ err |= __put_user (r->ru_oublock, &ru->ru_oublock);
+ err |= __put_user (r->ru_msgsnd, &ru->ru_msgsnd);
+ err |= __put_user (r->ru_msgrcv, &ru->ru_msgrcv);
+ err |= __put_user (r->ru_nsignals, &ru->ru_nsignals);
+ err |= __put_user (r->ru_nvcsw, &ru->ru_nvcsw);
+ err |= __put_user (r->ru_nivcsw, &ru->ru_nivcsw);
+ return err;
+}
+
+extern asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr,
+ int options, struct rusage * ru);
+
+asmlinkage int
+sys32_wait4(__kernel_pid_t32 pid, unsigned int *stat_addr, int options,
+ struct rusage32 *ru)
+{
+ if (!ru)
+ return sys_wait4(pid, stat_addr, options, NULL);
+ else {
+ struct rusage r;
+ int ret;
+ unsigned int status;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_wait4(pid, stat_addr ? &status : NULL, options, &r);
+ set_fs (old_fs);
+ if (put_rusage (ru, &r)) return -EFAULT;
+ if (stat_addr && put_user (status, stat_addr))
+ return -EFAULT;
+ return ret;
+ }
+}
+
+asmlinkage int
+sys32_waitpid(__kernel_pid_t32 pid, unsigned int *stat_addr, int options)
+{
+ return sys32_wait4(pid, stat_addr, options, NULL);
+}
+
+struct timespec32 {
+ int tv_sec;
+ int tv_nsec;
+};
+
+extern asmlinkage int sys_nanosleep(struct timespec *rqtp,
+ struct timespec *rmtp);
+
+asmlinkage int
+sys32_nanosleep(struct timespec32 *rqtp, struct timespec32 *rmtp)
+{
+ struct timespec t;
+ int ret;
+ mm_segment_t old_fs = get_fs ();
+
+ if (get_user (t.tv_sec, &rqtp->tv_sec) ||
+ __get_user (t.tv_nsec, &rqtp->tv_nsec))
+ return -EFAULT;
+ set_fs (KERNEL_DS);
+ ret = sys_nanosleep(&t, rmtp ? &t : NULL);
+ set_fs (old_fs);
+ if (rmtp && ret == -EINTR) {
+ if (__put_user (t.tv_sec, &rmtp->tv_sec) ||
+ __put_user (t.tv_nsec, &rmtp->tv_nsec))
+ return -EFAULT;
+ }
+ return ret;
+}
+
+struct iovec32 { unsigned int iov_base; int iov_len; };
+
+typedef ssize_t (*IO_fn_t)(struct file *, char *, size_t, loff_t *);
+
+static long
+do_readv_writev32(int type, struct file *file, const struct iovec32 *vector,
+ u32 count)
+{
+ unsigned long tot_len;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov=iovstack, *ivp;
+ struct inode *inode;
+ long retval, i;
+ IO_fn_t fn;
+
+ /* First get the "struct iovec" from user memory and
+ * verify all the pointers
+ */
+ if (!count)
+ return 0;
+ if(verify_area(VERIFY_READ, vector, sizeof(struct iovec32)*count))
+ return -EFAULT;
+ if (count > UIO_MAXIOV)
+ return -EINVAL;
+ if (count > UIO_FASTIOV) {
+ iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
+ if (!iov)
+ return -ENOMEM;
+ }
+
+ tot_len = 0;
+ i = count;
+ ivp = iov;
+ while(i > 0) {
+ u32 len;
+ u32 buf;
+
+ __get_user(len, &vector->iov_len);
+ __get_user(buf, &vector->iov_base);
+ tot_len += len;
+ ivp->iov_base = (void *)A(buf);
+ ivp->iov_len = (__kernel_size_t) len;
+ vector++;
+ ivp++;
+ i--;
+ }
+
+ inode = file->f_dentry->d_inode;
+ /* VERIFY_WRITE actually means a read, as we write to user space */
+ retval = locks_verify_area((type == VERIFY_WRITE
+ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE),
+ inode, file, file->f_pos, tot_len);
+ if (retval) {
+ if (iov != iovstack)
+ kfree(iov);
+ return retval;
+ }
+
+ /* Then do the actual IO. Note that sockets need to be handled
+ * specially as they have atomicity guarantees and can handle
+ * iovec's natively
+ */
+ if (inode->i_sock) {
+ int err;
+ err = sock_readv_writev(type, inode, file, iov, count, tot_len);
+ if (iov != iovstack)
+ kfree(iov);
+ return err;
+ }
+
+ if (!file->f_op) {
+ if (iov != iovstack)
+ kfree(iov);
+ return -EINVAL;
+ }
+ /* VERIFY_WRITE actually means a read, as we write to user space */
+ fn = file->f_op->read;
+ if (type == VERIFY_READ)
+ fn = (IO_fn_t) file->f_op->write;
+ ivp = iov;
+ while (count > 0) {
+ void * base;
+ int len, nr;
+
+ base = ivp->iov_base;
+ len = ivp->iov_len;
+ ivp++;
+ count--;
+ nr = fn(file, base, len, &file->f_pos);
+ if (nr < 0) {
+ if (retval)
+ break;
+ retval = nr;
+ break;
+ }
+ retval += nr;
+ if (nr != len)
+ break;
+ }
+ if (iov != iovstack)
+ kfree(iov);
+ return retval;
+}
+
+asmlinkage long
+sys32_readv(int fd, struct iovec32 *vector, u32 count)
+{
+ struct file *file;
+ long ret = -EBADF;
+
+ lock_kernel();
+ file = fget(fd);
+ if(!file)
+ goto bad_file;
+
+ if(!(file->f_mode & 1))
+ goto out;
+
+ ret = do_readv_writev32(VERIFY_WRITE, file,
+ vector, count);
+out:
+ fput(file);
+bad_file:
+ unlock_kernel();
+ return ret;
+}
+
+asmlinkage long
+sys32_writev(int fd, struct iovec32 *vector, u32 count)
+{
+ struct file *file;
+ int ret = -EBADF;
+
+ lock_kernel();
+ file = fget(fd);
+ if(!file)
+ goto bad_file;
+
+ if(!(file->f_mode & 2))
+ goto out;
+
+ down(&file->f_dentry->d_inode->i_sem);
+ ret = do_readv_writev32(VERIFY_READ, file,
+ vector, count);
+ up(&file->f_dentry->d_inode->i_sem);
+out:
+ fput(file);
+bad_file:
+ unlock_kernel();
+ return ret;
+}
+
+#define RLIM_INFINITY32 0x7fffffff
+#define RESOURCE32(x) ((x > RLIM_INFINITY32) ? RLIM_INFINITY32 : x)
+
+struct rlimit32 {
+ int rlim_cur;
+ int rlim_max;
+};
+
+extern asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim);
+
+asmlinkage int
+sys32_getrlimit(unsigned int resource, struct rlimit32 *rlim)
+{
+ struct rlimit r;
+ int ret;
+ mm_segment_t old_fs = get_fs ();
+
+ set_fs (KERNEL_DS);
+ ret = sys_getrlimit(resource, &r);
+ set_fs (old_fs);
+ if (!ret) {
+ ret = put_user (RESOURCE32(r.rlim_cur), &rlim->rlim_cur);
+ ret |= __put_user (RESOURCE32(r.rlim_max), &rlim->rlim_max);
+ }
+ return ret;
+}
+
+extern asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim);
+
+asmlinkage int
+sys32_setrlimit(unsigned int resource, struct rlimit32 *rlim)
+{
+ struct rlimit r;
+ int ret;
+ mm_segment_t old_fs = get_fs ();
+
+ if (resource >= RLIM_NLIMITS) return -EINVAL;
+ if (get_user (r.rlim_cur, &rlim->rlim_cur) ||
+ __get_user (r.rlim_max, &rlim->rlim_max))
+ return -EFAULT;
+ if (r.rlim_cur == RLIM_INFINITY32)
+ r.rlim_cur = RLIM_INFINITY;
+ if (r.rlim_max == RLIM_INFINITY32)
+ r.rlim_max = RLIM_INFINITY;
+ set_fs (KERNEL_DS);
+ ret = sys_setrlimit(resource, &r);
+ set_fs (old_fs);
+ return ret;
+}
+
+/* Argument list sizes for sys_socketcall */
+#define AL(x) ((x) * sizeof(u32))
+static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+ AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+ AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+#undef AL
+
+extern asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen);
+extern asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr,
+ int addrlen);
+extern asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr,
+ int *upeer_addrlen);
+extern asmlinkage int sys_getsockname(int fd, struct sockaddr *usockaddr,
+ int *usockaddr_len);
+extern asmlinkage int sys_getpeername(int fd, struct sockaddr *usockaddr,
+ int *usockaddr_len);
+extern asmlinkage int sys_send(int fd, void *buff, size_t len, unsigned flags);
+extern asmlinkage int sys_sendto(int fd, u32 buff, __kernel_size_t32 len,
+ unsigned flags, u32 addr, int addr_len);
+extern asmlinkage int sys_recv(int fd, void *ubuf, size_t size, unsigned flags);
+extern asmlinkage int sys_recvfrom(int fd, u32 ubuf, __kernel_size_t32 size,
+ unsigned flags, u32 addr, u32 addr_len);
+extern asmlinkage int sys_setsockopt(int fd, int level, int optname,
+ char *optval, int optlen);
+extern asmlinkage int sys_getsockopt(int fd, int level, int optname,
+ u32 optval, u32 optlen);
+
+extern asmlinkage int sys_socket(int family, int type, int protocol);
+extern asmlinkage int sys_socketpair(int family, int type, int protocol,
+ int usockvec[2]);
+extern asmlinkage int sys_shutdown(int fd, int how);
+extern asmlinkage int sys_listen(int fd, int backlog);
+
+asmlinkage int sys32_socketcall(int call, u32 *args)
+{
+ int i, ret;
+ u32 a[6];
+ u32 a0,a1;
+
+ if (call<SYS_SOCKET||call>SYS_RECVMSG)
+ return -EINVAL;
+ if (copy_from_user(a, args, nas[call]))
+ return -EFAULT;
+ a0=a[0];
+ a1=a[1];
+
+ switch(call)
+ {
+ case SYS_SOCKET:
+ ret = sys_socket(a0, a1, a[2]);
+ break;
+ case SYS_BIND:
+ ret = sys_bind(a0, (struct sockaddr *)A(a1), a[2]);
+ break;
+ case SYS_CONNECT:
+ ret = sys_connect(a0, (struct sockaddr *)A(a1), a[2]);
+ break;
+ case SYS_LISTEN:
+ ret = sys_listen(a0, a1);
+ break;
+ case SYS_ACCEPT:
+ ret = sys_accept(a0, (struct sockaddr *)A(a1),
+ (int *)A(a[2]));
+ break;
+ case SYS_GETSOCKNAME:
+ ret = sys_getsockname(a0, (struct sockaddr *)A(a1),
+ (int *)A(a[2]));
+ break;
+ case SYS_GETPEERNAME:
+ ret = sys_getpeername(a0, (struct sockaddr *)A(a1),
+ (int *)A(a[2]));
+ break;
+ case SYS_SOCKETPAIR:
+ ret = sys_socketpair(a0, a1, a[2], (int *)A(a[3]));
+ break;
+ case SYS_SEND:
+ ret = sys_send(a0, (void *)A(a1), a[2], a[3]);
+ break;
+ case SYS_SENDTO:
+ ret = sys_sendto(a0, a1, a[2], a[3], a[4], a[5]);
+ break;
+ case SYS_RECV:
+ ret = sys_recv(a0, (void *)A(a1), a[2], a[3]);
+ break;
+ case SYS_RECVFROM:
+ ret = sys_recvfrom(a0, a1, a[2], a[3], a[4], a[5]);
+ break;
+ case SYS_SHUTDOWN:
+ ret = sys_shutdown(a0,a1);
+ break;
+ case SYS_SETSOCKOPT:
+ ret = sys_setsockopt(a0, a1, a[2], (char *)A(a[3]),
+ a[4]);
+ break;
+ case SYS_GETSOCKOPT:
+ ret = sys_getsockopt(a0, a1, a[2], a[3], a[4]);
+ break;
+ case SYS_SENDMSG:
+ ret = sys32_sendmsg(a0, (struct msghdr32 *)A(a1),
+ a[2]);
+ break;
+ case SYS_RECVMSG:
+ ret = sys32_recvmsg(a0, (struct msghdr32 *)A(a1),
+ a[2]);
+ break;
+ default:
+ ret = EINVAL;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Declare the IA32 version of the msghdr
+ */
+
+struct msghdr32 {
+ unsigned int msg_name; /* Socket name */
+ int msg_namelen; /* Length of name */
+ unsigned int msg_iov; /* Data blocks */
+ unsigned int msg_iovlen; /* Number of blocks */
+ unsigned int msg_control; /* Per protocol magic (eg BSD file descriptor passing) */
+ unsigned int msg_controllen; /* Length of cmsg list */
+ unsigned msg_flags;
+};
+
+static inline int
+shape_msg(struct msghdr *mp, struct msghdr32 *mp32)
+{
+ unsigned int i;
+
+ if (!access_ok(VERIFY_READ, mp32, sizeof(*mp32)))
+ return(-EFAULT);
+ __get_user(i, &mp32->msg_name);
+ mp->msg_name = (void *)i;
+ __get_user(mp->msg_namelen, &mp32->msg_namelen);
+ __get_user(i, &mp32->msg_iov);
+ mp->msg_iov = (struct iov *)i;
+ __get_user(mp->msg_iovlen, &mp32->msg_iovlen);
+ __get_user(i, &mp32->msg_control);
+ mp->msg_control = (void *)i;
+ __get_user(mp->msg_controllen, &mp32->msg_controllen);
+ __get_user(mp->msg_flags, &mp32->msg_flags);
+ return(0);
+}
+
+/*
+ * Verify & re-shape IA32 iovec. The caller must ensure that the
+ * iovec is big enough to hold the re-shaped message iovec.
+ *
+ * Save time not doing verify_area. copy_*_user will make this work
+ * in any case.
+ *
+ * Don't need to check the total size for overflow (cf net/core/iovec.c),
+ * 32-bit sizes can't overflow a 64-bit count.
+ */
+
+static inline int
+verify_iovec32(struct msghdr *m, struct iovec *iov, char *address, int mode)
+{
+ int size, err, ct;
+ struct iovec32 *iov32;
+
+ if(m->msg_namelen)
+ {
+ if(mode==VERIFY_READ)
+ {
+ err=move_addr_to_kernel(m->msg_name, m->msg_namelen, address);
+ if(err<0)
+ goto out;
+ }
+
+ m->msg_name = address;
+ } else
+ m->msg_name = NULL;
+
+ err = -EFAULT;
+ size = m->msg_iovlen * sizeof(struct iovec32);
+ if (copy_from_user(iov, m->msg_iov, size))
+ goto out;
+ m->msg_iov=iov;
+
+ err = 0;
+ iov32 = (struct iovec32 *)iov;
+ for (ct = m->msg_iovlen; ct-- > 0; ) {
+ iov[ct].iov_len = (__kernel_size_t)iov32[ct].iov_len;
+ iov[ct].iov_base = (void *)iov32[ct].iov_base;
+ err += iov[ct].iov_len;
+ }
+out:
+ return err;
+}
+
+extern __inline__ void
+sockfd_put(struct socket *sock)
+{
+ fput(sock->file);
+}
+
+/* XXX This really belongs in some header file... -DaveM */
+#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
+ 16 for IP, 16 for IPX,
+ 24 for IPv6,
+ about 80 for AX.25 */
+
+extern struct socket *sockfd_lookup(int fd, int *err);
+
+/*
+ * BSD sendmsg interface
+ */
+
+asmlinkage int sys32_sendmsg(int fd, struct msghdr32 *msg, unsigned flags)
+{
+ struct socket *sock;
+ char address[MAX_SOCK_ADDR];
+ struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+ unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
+ unsigned char *ctl_buf = ctl;
+ struct msghdr msg_sys;
+ int err, ctl_len, iov_size, total_len;
+
+ err = -EFAULT;
+ if (shape_msg(&msg_sys, msg))
+ goto out;
+
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+
+ /* do not move before msg_sys is valid */
+ err = -EINVAL;
+ if (msg_sys.msg_iovlen > UIO_MAXIOV)
+ goto out_put;
+
+ /* Check whether to allocate the iovec area*/
+ err = -ENOMEM;
+ iov_size = msg_sys.msg_iovlen * sizeof(struct iovec32);
+ if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+ iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+ if (!iov)
+ goto out_put;
+ }
+
+ /* This will also move the address data into kernel space */
+ err = verify_iovec32(&msg_sys, iov, address, VERIFY_READ);
+ if (err < 0)
+ goto out_freeiov;
+ total_len = err;
+
+ err = -ENOBUFS;
+
+ if (msg_sys.msg_controllen > INT_MAX)
+ goto out_freeiov;
+ ctl_len = msg_sys.msg_controllen;
+ if (ctl_len)
+ {
+ if (ctl_len > sizeof(ctl))
+ {
+ err = -ENOBUFS;
+ ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
+ if (ctl_buf == NULL)
+ goto out_freeiov;
+ }
+ err = -EFAULT;
+ if (copy_from_user(ctl_buf, msg_sys.msg_control, ctl_len))
+ goto out_freectl;
+ msg_sys.msg_control = ctl_buf;
+ }
+ msg_sys.msg_flags = flags;
+
+ if (sock->file->f_flags & O_NONBLOCK)
+ msg_sys.msg_flags |= MSG_DONTWAIT;
+ err = sock_sendmsg(sock, &msg_sys, total_len);
+
+out_freectl:
+ if (ctl_buf != ctl)
+ sock_kfree_s(sock->sk, ctl_buf, ctl_len);
+out_freeiov:
+ if (iov != iovstack)
+ sock_kfree_s(sock->sk, iov, iov_size);
+out_put:
+ sockfd_put(sock);
+out:
+ return err;
+}
+
+/*
+ * BSD recvmsg interface
+ */
+
+asmlinkage int sys32_recvmsg(int fd, struct msghdr32 *msg, unsigned int flags)
+{
+ struct socket *sock;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov=iovstack;
+ struct msghdr msg_sys;
+ unsigned long cmsg_ptr;
+ int err, iov_size, total_len, len;
+
+ /* kernel mode address */
+ char addr[MAX_SOCK_ADDR];
+
+ /* user mode address pointers */
+ struct sockaddr *uaddr;
+ int *uaddr_len;
+
+ err=-EFAULT;
+ if (shape_msg(&msg_sys, msg))
+ goto out;
+
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ goto out;
+
+ err = -EINVAL;
+ if (msg_sys.msg_iovlen > UIO_MAXIOV)
+ goto out_put;
+
+ /* Check whether to allocate the iovec area*/
+ err = -ENOMEM;
+ iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
+ if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+ iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+ if (!iov)
+ goto out_put;
+ }
+
+ /*
+ * Save the user-mode address (verify_iovec will change the
+ * kernel msghdr to use the kernel address space)
+ */
+
+ uaddr = msg_sys.msg_name;
+ uaddr_len = &msg->msg_namelen;
+ err = verify_iovec32(&msg_sys, iov, addr, VERIFY_WRITE);
+ if (err < 0)
+ goto out_freeiov;
+ total_len=err;
+
+ cmsg_ptr = (unsigned long)msg_sys.msg_control;
+ msg_sys.msg_flags = 0;
+
+ if (sock->file->f_flags & O_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ err = sock_recvmsg(sock, &msg_sys, total_len, flags);
+ if (err < 0)
+ goto out_freeiov;
+ len = err;
+
+ if (uaddr != NULL) {
+ err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
+ if (err < 0)
+ goto out_freeiov;
+ }
+ err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
+ if (err)
+ goto out_freeiov;
+ err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
+ &msg->msg_controllen);
+ if (err)
+ goto out_freeiov;
+ err = len;
+
+out_freeiov:
+ if (iov != iovstack)
+ sock_kfree_s(sock->sk, iov, iov_size);
+out_put:
+ sockfd_put(sock);
+out:
+ return err;
+}
+
+/*
+ * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation..
+ *
+ * This is really horribly ugly.
+ */
+
+struct msgbuf32 { s32 mtype; char mtext[1]; };
+
+struct ipc_perm32
+{
+ key_t key;
+ __kernel_uid_t32 uid;
+ __kernel_gid_t32 gid;
+ __kernel_uid_t32 cuid;
+ __kernel_gid_t32 cgid;
+ __kernel_mode_t32 mode;
+ unsigned short seq;
+};
+
+struct semid_ds32 {
+ struct ipc_perm32 sem_perm; /* permissions .. see ipc.h */
+ __kernel_time_t32 sem_otime; /* last semop time */
+ __kernel_time_t32 sem_ctime; /* last change time */
+ u32 sem_base; /* ptr to first semaphore in array */
+ u32 sem_pending; /* pending operations to be processed */
+ u32 sem_pending_last; /* last pending operation */
+ u32 undo; /* undo requests on this array */
+ unsigned short sem_nsems; /* no. of semaphores in array */
+};
+
+struct msqid_ds32
+{
+ struct ipc_perm32 msg_perm;
+ u32 msg_first;
+ u32 msg_last;
+ __kernel_time_t32 msg_stime;
+ __kernel_time_t32 msg_rtime;
+ __kernel_time_t32 msg_ctime;
+ u32 wwait;
+ u32 rwait;
+ unsigned short msg_cbytes;
+ unsigned short msg_qnum;
+ unsigned short msg_qbytes;
+ __kernel_ipc_pid_t32 msg_lspid;
+ __kernel_ipc_pid_t32 msg_lrpid;
+};
+
+struct shmid_ds32 {
+ struct ipc_perm32 shm_perm;
+ int shm_segsz;
+ __kernel_time_t32 shm_atime;
+ __kernel_time_t32 shm_dtime;
+ __kernel_time_t32 shm_ctime;
+ __kernel_ipc_pid_t32 shm_cpid;
+ __kernel_ipc_pid_t32 shm_lpid;
+ unsigned short shm_nattch;
+};
+
+#define IPCOP_MASK(__x) (1UL << (__x))
+
+static int
+do_sys32_semctl(int first, int second, int third, void *uptr)
+{
+ union semun fourth;
+ u32 pad;
+ int err = -EINVAL;
+
+ if (!uptr)
+ goto out;
+ err = -EFAULT;
+ if (get_user (pad, (u32 *)uptr))
+ goto out;
+ if(third == SETVAL)
+ fourth.val = (int)pad;
+ else
+ fourth.__pad = (void *)A(pad);
+ if (IPCOP_MASK (third) &
+ (IPCOP_MASK (IPC_INFO) | IPCOP_MASK (SEM_INFO) |
+ IPCOP_MASK (GETVAL) | IPCOP_MASK (GETPID) |
+ IPCOP_MASK (GETNCNT) | IPCOP_MASK (GETZCNT) |
+ IPCOP_MASK (GETALL) | IPCOP_MASK (SETALL) |
+ IPCOP_MASK (IPC_RMID))) {
+ err = sys_semctl (first, second, third, fourth);
+ } else {
+ struct semid_ds s;
+ struct semid_ds32 *usp = (struct semid_ds32 *)A(pad);
+ mm_segment_t old_fs;
+ int need_back_translation;
+
+ if (third == IPC_SET) {
+ err = get_user (s.sem_perm.uid, &usp->sem_perm.uid);
+ err |= __get_user(s.sem_perm.gid, &usp->sem_perm.gid);
+ err |= __get_user(s.sem_perm.mode, &usp->sem_perm.mode);
+ if (err)
+ goto out;
+ fourth.__pad = &s;
+ }
+ need_back_translation =
+ (IPCOP_MASK (third) &
+ (IPCOP_MASK (SEM_STAT) | IPCOP_MASK (IPC_STAT))) != 0;
+ if (need_back_translation)
+ fourth.__pad = &s;
+ old_fs = get_fs ();
+ set_fs (KERNEL_DS);
+ err = sys_semctl (first, second, third, fourth);
+ set_fs (old_fs);
+ if (need_back_translation) {
+ int err2 = put_user(s.sem_perm.key, &usp->sem_perm.key);
+ err2 |= __put_user(s.sem_perm.uid, &usp->sem_perm.uid);
+ err2 |= __put_user(s.sem_perm.gid, &usp->sem_perm.gid);
+ err2 |= __put_user(s.sem_perm.cuid,
+ &usp->sem_perm.cuid);
+ err2 |= __put_user (s.sem_perm.cgid,
+ &usp->sem_perm.cgid);
+ err2 |= __put_user (s.sem_perm.mode,
+ &usp->sem_perm.mode);
+ err2 |= __put_user (s.sem_perm.seq, &usp->sem_perm.seq);
+ err2 |= __put_user (s.sem_otime, &usp->sem_otime);
+ err2 |= __put_user (s.sem_ctime, &usp->sem_ctime);
+ err2 |= __put_user (s.sem_nsems, &usp->sem_nsems);
+ if (err2) err = -EFAULT;
+ }
+ }
+out:
+ return err;
+}
+
+static int
+do_sys32_msgsnd (int first, int second, int third, void *uptr)
+{
+ struct msgbuf *p = kmalloc (second + sizeof (struct msgbuf)
+ + 4, GFP_USER);
+ struct msgbuf32 *up = (struct msgbuf32 *)uptr;
+ mm_segment_t old_fs;
+ int err;
+
+ if (!p)
+ return -ENOMEM;
+ err = get_user (p->mtype, &up->mtype);
+ err |= __copy_from_user (p->mtext, &up->mtext, second);
+ if (err)
+ goto out;
+ old_fs = get_fs ();
+ set_fs (KERNEL_DS);
+ err = sys_msgsnd (first, p, second, third);
+ set_fs (old_fs);
+out:
+ kfree (p);
+ return err;
+}
+
+static int
+do_sys32_msgrcv (int first, int second, int msgtyp, int third,
+ int version, void *uptr)
+{
+ struct msgbuf32 *up;
+ struct msgbuf *p;
+ mm_segment_t old_fs;
+ int err;
+
+ if (!version) {
+ struct ipc_kludge *uipck = (struct ipc_kludge *)uptr;
+ struct ipc_kludge ipck;
+
+ err = -EINVAL;
+ if (!uptr)
+ goto out;
+ err = -EFAULT;
+ if (copy_from_user (&ipck, uipck, sizeof (struct ipc_kludge)))
+ goto out;
+ uptr = (void *)A(ipck.msgp);
+ msgtyp = ipck.msgtyp;
+ }
+ err = -ENOMEM;
+ p = kmalloc (second + sizeof (struct msgbuf) + 4, GFP_USER);
+ if (!p)
+ goto out;
+ old_fs = get_fs ();
+ set_fs (KERNEL_DS);
+ err = sys_msgrcv (first, p, second + 4, msgtyp, third);
+ set_fs (old_fs);
+ if (err < 0)
+ goto free_then_out;
+ up = (struct msgbuf32 *)uptr;
+ if (put_user (p->mtype, &up->mtype) ||
+ __copy_to_user (&up->mtext, p->mtext, err))
+ err = -EFAULT;
+free_then_out:
+ kfree (p);
+out:
+ return err;
+}
+
+static int
+do_sys32_msgctl (int first, int second, void *uptr)
+{
+ int err;
+
+ if (IPCOP_MASK (second) &
+ (IPCOP_MASK (IPC_INFO) | IPCOP_MASK (MSG_INFO) |
+ IPCOP_MASK (IPC_RMID))) {
+ err = sys_msgctl (first, second, (struct msqid_ds *)uptr);
+ } else {
+ struct msqid_ds m;
+ struct msqid_ds32 *up = (struct msqid_ds32 *)uptr;
+ mm_segment_t old_fs;
+
+ if (second == IPC_SET) {
+ err = get_user (m.msg_perm.uid, &up->msg_perm.uid);
+ err |= __get_user (m.msg_perm.gid, &up->msg_perm.gid);
+ err |= __get_user (m.msg_perm.mode, &up->msg_perm.mode);
+ err |= __get_user (m.msg_qbytes, &up->msg_qbytes);
+ if (err)
+ goto out;
+ }
+ old_fs = get_fs ();
+ set_fs (KERNEL_DS);
+ err = sys_msgctl (first, second, &m);
+ set_fs (old_fs);
+ if (IPCOP_MASK (second) &
+ (IPCOP_MASK (MSG_STAT) | IPCOP_MASK (IPC_STAT))) {
+ int err2 = put_user (m.msg_perm.key, &up->msg_perm.key);
+ err2 |= __put_user(m.msg_perm.uid, &up->msg_perm.uid);
+ err2 |= __put_user(m.msg_perm.gid, &up->msg_perm.gid);
+ err2 |= __put_user(m.msg_perm.cuid, &up->msg_perm.cuid);
+ err2 |= __put_user(m.msg_perm.cgid, &up->msg_perm.cgid);
+ err2 |= __put_user(m.msg_perm.mode, &up->msg_perm.mode);
+ err2 |= __put_user(m.msg_perm.seq, &up->msg_perm.seq);
+ err2 |= __put_user(m.msg_stime, &up->msg_stime);
+ err2 |= __put_user(m.msg_rtime, &up->msg_rtime);
+ err2 |= __put_user(m.msg_ctime, &up->msg_ctime);
+ err2 |= __put_user(m.msg_cbytes, &up->msg_cbytes);
+ err2 |= __put_user(m.msg_qnum, &up->msg_qnum);
+ err2 |= __put_user(m.msg_qbytes, &up->msg_qbytes);
+ err2 |= __put_user(m.msg_lspid, &up->msg_lspid);
+ err2 |= __put_user(m.msg_lrpid, &up->msg_lrpid);
+ if (err2)
+ err = -EFAULT;
+ }
+ }
+
+out:
+ return err;
+}
+
+static int
+do_sys32_shmat (int first, int second, int third, int version, void *uptr)
+{
+ unsigned long raddr;
+ u32 *uaddr = (u32 *)A((u32)third);
+ int err = -EINVAL;
+
+ if (version == 1)
+ goto out;
+ err = sys_shmat (first, uptr, second, &raddr);
+ if (err)
+ goto out;
+ err = put_user (raddr, uaddr);
+out:
+ return err;
+}
+
+static int
+do_sys32_shmctl (int first, int second, void *uptr)
+{
+ int err;
+
+ if (IPCOP_MASK (second) &
+ (IPCOP_MASK (IPC_INFO) | IPCOP_MASK (SHM_LOCK)
+ | IPCOP_MASK (SHM_UNLOCK) | IPCOP_MASK (IPC_RMID))) {
+ err = sys_shmctl (first, second, (struct shmid_ds *)uptr);
+ } else {
+ struct shmid_ds s;
+ struct shmid_ds32 *up = (struct shmid_ds32 *)uptr;
+ mm_segment_t old_fs;
+
+ if (second == IPC_SET) {
+ err = get_user (s.shm_perm.uid, &up->shm_perm.uid);
+ err |= __get_user (s.shm_perm.gid, &up->shm_perm.gid);
+ err |= __get_user (s.shm_perm.mode, &up->shm_perm.mode);
+ if (err)
+ goto out;
+ }
+ old_fs = get_fs ();
+ set_fs (KERNEL_DS);
+ err = sys_shmctl (first, second, &s);
+ set_fs (old_fs);
+ if (err < 0)
+ goto out;
+
+ /* Mask it even in this case so it becomes a CSE. */
+ if (second == SHM_INFO) {
+ struct shm_info32 {
+ int used_ids;
+ u32 shm_tot, shm_rss, shm_swp;
+ u32 swap_attempts, swap_successes;
+ } *uip = (struct shm_info32 *)uptr;
+ struct shm_info *kp = (struct shm_info *)&s;
+ int err2 = put_user (kp->used_ids, &uip->used_ids);
+ err2 |= __put_user (kp->shm_tot, &uip->shm_tot);
+ err2 |= __put_user (kp->shm_rss, &uip->shm_rss);
+ err2 |= __put_user (kp->shm_swp, &uip->shm_swp);
+ err2 |= __put_user (kp->swap_attempts,
+ &uip->swap_attempts);
+ err2 |= __put_user (kp->swap_successes,
+ &uip->swap_successes);
+ if (err2)
+ err = -EFAULT;
+ } else if (IPCOP_MASK (second) &
+ (IPCOP_MASK (SHM_STAT) | IPCOP_MASK (IPC_STAT))) {
+ int err2 = put_user (s.shm_perm.key, &up->shm_perm.key);
+ err2 |= __put_user (s.shm_perm.uid, &up->shm_perm.uid);
+ err2 |= __put_user (s.shm_perm.gid, &up->shm_perm.gid);
+ err2 |= __put_user (s.shm_perm.cuid,
+ &up->shm_perm.cuid);
+ err2 |= __put_user (s.shm_perm.cgid,
+ &up->shm_perm.cgid);
+ err2 |= __put_user (s.shm_perm.mode,
+ &up->shm_perm.mode);
+ err2 |= __put_user (s.shm_perm.seq, &up->shm_perm.seq);
+ err2 |= __put_user (s.shm_atime, &up->shm_atime);
+ err2 |= __put_user (s.shm_dtime, &up->shm_dtime);
+ err2 |= __put_user (s.shm_ctime, &up->shm_ctime);
+ err2 |= __put_user (s.shm_segsz, &up->shm_segsz);
+ err2 |= __put_user (s.shm_nattch, &up->shm_nattch);
+ err2 |= __put_user (s.shm_cpid, &up->shm_cpid);
+ err2 |= __put_user (s.shm_lpid, &up->shm_lpid);
+ if (err2)
+ err = -EFAULT;
+ }
+ }
+out:
+ return err;
+}
+
+asmlinkage int
+sys32_ipc (u32 call, int first, int second, int third, u32 ptr, u32 fifth)
+{
+ int version, err;
+
+ lock_kernel();
+ version = call >> 16; /* hack for backward compatibility */
+ call &= 0xffff;
+
+ if (call <= SEMCTL)
+ switch (call) {
+ case SEMOP:
+ /* struct sembuf is the same on 32 and 64bit :)) */
+ err = sys_semop (first, (struct sembuf *)AA(ptr),
+ second);
+ goto out;
+ case SEMGET:
+ err = sys_semget (first, second, third);
+ goto out;
+ case SEMCTL:
+ err = do_sys32_semctl (first, second, third,
+ (void *)AA(ptr));
+ goto out;
+ default:
+ err = -EINVAL;
+ goto out;
+ };
+ if (call <= MSGCTL)
+ switch (call) {
+ case MSGSND:
+ err = do_sys32_msgsnd (first, second, third,
+ (void *)AA(ptr));
+ goto out;
+ case MSGRCV:
+ err = do_sys32_msgrcv (first, second, fifth, third,
+ version, (void *)AA(ptr));
+ goto out;
+ case MSGGET:
+ err = sys_msgget ((key_t) first, second);
+ goto out;
+ case MSGCTL:
+ err = do_sys32_msgctl (first, second, (void *)AA(ptr));
+ goto out;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ if (call <= SHMCTL)
+ switch (call) {
+ case SHMAT:
+ err = do_sys32_shmat (first, second, third,
+ version, (void *)AA(ptr));
+ goto out;
+ case SHMDT:
+ err = sys_shmdt ((char *)AA(ptr));
+ goto out;
+ case SHMGET:
+ err = sys_shmget (first, second, third);
+ goto out;
+ case SHMCTL:
+ err = do_sys32_shmctl (first, second, (void *)AA(ptr));
+ goto out;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = -EINVAL;
+
+out:
+ unlock_kernel();
+ return err;
+}
+
+#ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */
+
+/* In order to reduce some races, while at the same time doing additional
+ * checking and hopefully speeding things up, we copy filenames to the
+ * kernel data space before using them..
+ *
+ * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
+ */
+static inline int
+do_getname32(const char *filename, char *page)
+{
+ int retval;
+
+ /* 32bit pointer will be always far below TASK_SIZE :)) */
+ retval = strncpy_from_user((char *)page, (char *)filename, PAGE_SIZE);
+ if (retval > 0) {
+ if (retval < PAGE_SIZE)
+ return 0;
+ return -ENAMETOOLONG;
+ } else if (!retval)
+ retval = -ENOENT;
+ return retval;
+}
+
+char *
+getname32(const char *filename)
+{
+ char *tmp, *result;
+
+ result = ERR_PTR(-ENOMEM);
+ tmp = (char *)__get_free_page(GFP_KERNEL);
+ if (tmp) {
+ int retval = do_getname32(filename, tmp);
+
+ result = tmp;
+ if (retval < 0) {
+ putname(tmp);
+ result = ERR_PTR(retval);
+ }
+ }
+ return result;
+}
+
+/* 32-bit timeval and related flotsam. */
+
+extern asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on);
+
+asmlinkage int
+sys32_ioperm(u32 from, u32 num, int on)
+{
+ return sys_ioperm((unsigned long)from, (unsigned long)num, on);
+}
+
+static inline int
+get_flock(struct flock *kfl, struct flock32 *ufl)
+{
+ int err;
+
+ err = get_user(kfl->l_type, &ufl->l_type);
+ err |= __get_user(kfl->l_whence, &ufl->l_whence);
+ err |= __get_user(kfl->l_start, &ufl->l_start);
+ err |= __get_user(kfl->l_len, &ufl->l_len);
+ err |= __get_user(kfl->l_pid, &ufl->l_pid);
+ return err;
+}
+
+static inline int
+put_flock(struct flock *kfl, struct flock32 *ufl)
+{
+ int err;
+
+ err = __put_user(kfl->l_type, &ufl->l_type);
+ err |= __put_user(kfl->l_whence, &ufl->l_whence);
+ err |= __put_user(kfl->l_start, &ufl->l_start);
+ err |= __put_user(kfl->l_len, &ufl->l_len);
+ err |= __put_user(kfl->l_pid, &ufl->l_pid);
+ return err;
+}
+
+extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd,
+ unsigned long arg);
+
+asmlinkage long
+sys32_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case F_GETLK:
+ case F_SETLK:
+ case F_SETLKW:
+ {
+ struct flock f;
+ mm_segment_t old_fs;
+ long ret;
+
+ if(get_flock(&f, (struct flock32 *)arg))
+ return -EFAULT;
+ old_fs = get_fs(); set_fs (KERNEL_DS);
+ ret = sys_fcntl(fd, cmd, (unsigned long)&f);
+ set_fs (old_fs);
+ if(put_flock(&f, (struct flock32 *)arg))
+ return -EFAULT;
+ return ret;
+ }
+ default:
+ return sys_fcntl(fd, cmd, (unsigned long)arg);
+ }
+}
+
+struct dqblk32 {
+ __u32 dqb_bhardlimit;
+ __u32 dqb_bsoftlimit;
+ __u32 dqb_curblocks;
+ __u32 dqb_ihardlimit;
+ __u32 dqb_isoftlimit;
+ __u32 dqb_curinodes;
+ __kernel_time_t32 dqb_btime;
+ __kernel_time_t32 dqb_itime;
+};
+
+extern asmlinkage int sys_quotactl(int cmd, const char *special, int id,
+ caddr_t addr);
+
+asmlinkage int
+sys32_quotactl(int cmd, const char *special, int id, unsigned long addr)
+{
+ int cmds = cmd >> SUBCMDSHIFT;
+ int err;
+ struct dqblk d;
+ mm_segment_t old_fs;
+ char *spec;
+
+ switch (cmds) {
+ case Q_GETQUOTA:
+ break;
+ case Q_SETQUOTA:
+ case Q_SETUSE:
+ case Q_SETQLIM:
+ if (copy_from_user (&d, (struct dqblk32 *)addr,
+ sizeof (struct dqblk32)))
+ return -EFAULT;
+ d.dqb_itime = ((struct dqblk32 *)&d)->dqb_itime;
+ d.dqb_btime = ((struct dqblk32 *)&d)->dqb_btime;
+ break;
+ default:
+ return sys_quotactl(cmd, special,
+ id, (caddr_t)addr);
+ }
+ spec = getname32 (special);
+ err = PTR_ERR(spec);
+ if (IS_ERR(spec)) return err;
+ old_fs = get_fs ();
+ set_fs (KERNEL_DS);
+ err = sys_quotactl(cmd, (const char *)spec, id, (caddr_t)&d);
+ set_fs (old_fs);
+ putname (spec);
+ if (cmds == Q_GETQUOTA) {
+ __kernel_time_t b = d.dqb_btime, i = d.dqb_itime;
+ ((struct dqblk32 *)&d)->dqb_itime = i;
+ ((struct dqblk32 *)&d)->dqb_btime = b;
+ if (copy_to_user ((struct dqblk32 *)addr, &d,
+ sizeof (struct dqblk32)))
+ return -EFAULT;
+ }
+ return err;
+}
+
+extern asmlinkage int sys_utime(char * filename, struct utimbuf * times);
+
+struct utimbuf32 {
+ __kernel_time_t32 actime, modtime;
+};
+
+asmlinkage int
+sys32_utime(char * filename, struct utimbuf32 *times)
+{
+ struct utimbuf t;
+ mm_segment_t old_fs;
+ int ret;
+ char *filenam;
+
+ if (!times)
+ return sys_utime(filename, NULL);
+ if (get_user (t.actime, &times->actime) ||
+ __get_user (t.modtime, &times->modtime))
+ return -EFAULT;
+ filenam = getname32 (filename);
+ ret = PTR_ERR(filenam);
+ if (!IS_ERR(filenam)) {
+ old_fs = get_fs();
+ set_fs (KERNEL_DS);
+ ret = sys_utime(filenam, &t);
+ set_fs (old_fs);
+ putname (filenam);
+ }
+ return ret;
+}
+
+/*
+ * Ooo, nasty. We need here to frob 32-bit unsigned longs to
+ * 64-bit unsigned longs.
+ */
+
+static inline int
+get_fd_set32(unsigned long n, unsigned long *fdset, u32 *ufdset)
+{
+ if (ufdset) {
+ unsigned long odd;
+
+ if (verify_area(VERIFY_WRITE, ufdset, n*sizeof(u32)))
+ return -EFAULT;
+
+ odd = n & 1UL;
+ n &= ~1UL;
+ while (n) {
+ unsigned long h, l;
+ __get_user(l, ufdset);
+ __get_user(h, ufdset+1);
+ ufdset += 2;
+ *fdset++ = h << 32 | l;
+ n -= 2;
+ }
+ if (odd)
+ __get_user(*fdset, ufdset);
+ } else {
+ /* Tricky, must clear full unsigned long in the
+ * kernel fdset at the end, this makes sure that
+ * actually happens.
+ */
+ memset(fdset, 0, ((n + 1) & ~1)*sizeof(u32));
+ }
+ return 0;
+}
+
+static inline void
+set_fd_set32(unsigned long n, u32 *ufdset, unsigned long *fdset)
+{
+ unsigned long odd;
+
+ if (!ufdset)
+ return;
+
+ odd = n & 1UL;
+ n &= ~1UL;
+ while (n) {
+ unsigned long h, l;
+ l = *fdset++;
+ h = l >> 32;
+ __put_user(l, ufdset);
+ __put_user(h, ufdset+1);
+ ufdset += 2;
+ n -= 2;
+ }
+ if (odd)
+ __put_user(*fdset, ufdset);
+}
+
+extern asmlinkage int sys_sysfs(int option, unsigned long arg1,
+ unsigned long arg2);
+
+asmlinkage int
+sys32_sysfs(int option, u32 arg1, u32 arg2)
+{
+ return sys_sysfs(option, arg1, arg2);
+}
+
+struct ncp_mount_data32 {
+ int version;
+ unsigned int ncp_fd;
+ __kernel_uid_t32 mounted_uid;
+ __kernel_pid_t32 wdog_pid;
+ unsigned char mounted_vol[NCP_VOLNAME_LEN + 1];
+ unsigned int time_out;
+ unsigned int retry_count;
+ unsigned int flags;
+ __kernel_uid_t32 uid;
+ __kernel_gid_t32 gid;
+ __kernel_mode_t32 file_mode;
+ __kernel_mode_t32 dir_mode;
+};
+
+static void *
+do_ncp_super_data_conv(void *raw_data)
+{
+ struct ncp_mount_data *n = (struct ncp_mount_data *)raw_data;
+ struct ncp_mount_data32 *n32 = (struct ncp_mount_data32 *)raw_data;
+
+ n->dir_mode = n32->dir_mode;
+ n->file_mode = n32->file_mode;
+ n->gid = n32->gid;
+ n->uid = n32->uid;
+ memmove (n->mounted_vol, n32->mounted_vol,
+ (sizeof (n32->mounted_vol) + 3 * sizeof (unsigned int)));
+ n->wdog_pid = n32->wdog_pid;
+ n->mounted_uid = n32->mounted_uid;
+ return raw_data;
+}
+
+struct smb_mount_data32 {
+ int version;
+ __kernel_uid_t32 mounted_uid;
+ __kernel_uid_t32 uid;
+ __kernel_gid_t32 gid;
+ __kernel_mode_t32 file_mode;
+ __kernel_mode_t32 dir_mode;
+};
+
+static void *
+do_smb_super_data_conv(void *raw_data)
+{
+ struct smb_mount_data *s = (struct smb_mount_data *)raw_data;
+ struct smb_mount_data32 *s32 = (struct smb_mount_data32 *)raw_data;
+
+ s->version = s32->version;
+ s->mounted_uid = s32->mounted_uid;
+ s->uid = s32->uid;
+ s->gid = s32->gid;
+ s->file_mode = s32->file_mode;
+ s->dir_mode = s32->dir_mode;
+ return raw_data;
+}
+
+static int
+copy_mount_stuff_to_kernel(const void *user, unsigned long *kernel)
+{
+ int i;
+ unsigned long page;
+ struct vm_area_struct *vma;
+
+ *kernel = 0;
+ if(!user)
+ return 0;
+ vma = find_vma(current->mm, (unsigned long)user);
+ if(!vma || (unsigned long)user < vma->vm_start)
+ return -EFAULT;
+ if(!(vma->vm_flags & VM_READ))
+ return -EFAULT;
+ i = vma->vm_end - (unsigned long) user;
+ if(PAGE_SIZE <= (unsigned long) i)
+ i = PAGE_SIZE - 1;
+ if(!(page = __get_free_page(GFP_KERNEL)))
+ return -ENOMEM;
+ if(copy_from_user((void *) page, user, i)) {
+ free_page(page);
+ return -EFAULT;
+ }
+ *kernel = page;
+ return 0;
+}
+
+extern asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
+ unsigned long new_flags, void *data);
+
+#define SMBFS_NAME "smbfs"
+#define NCPFS_NAME "ncpfs"
+
+asmlinkage int
+sys32_mount(char *dev_name, char *dir_name, char *type,
+ unsigned long new_flags, u32 data)
+{
+ unsigned long type_page;
+ int err, is_smb, is_ncp;
+
+ if(!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ is_smb = is_ncp = 0;
+ err = copy_mount_stuff_to_kernel((const void *)type, &type_page);
+ if(err)
+ return err;
+ if(type_page) {
+ is_smb = !strcmp((char *)type_page, SMBFS_NAME);
+ is_ncp = !strcmp((char *)type_page, NCPFS_NAME);
+ }
+ if(!is_smb && !is_ncp) {
+ if(type_page)
+ free_page(type_page);
+ return sys_mount(dev_name, dir_name, type, new_flags,
+ (void *)AA(data));
+ } else {
+ unsigned long dev_page, dir_page, data_page;
+ mm_segment_t old_fs;
+
+ err = copy_mount_stuff_to_kernel((const void *)dev_name,
+ &dev_page);
+ if(err)
+ goto out;
+ err = copy_mount_stuff_to_kernel((const void *)dir_name,
+ &dir_page);
+ if(err)
+ goto dev_out;
+ err = copy_mount_stuff_to_kernel((const void *)AA(data),
+ &data_page);
+ if(err)
+ goto dir_out;
+ if(is_ncp)
+ do_ncp_super_data_conv((void *)data_page);
+ else if(is_smb)
+ do_smb_super_data_conv((void *)data_page);
+ else
+ panic("The problem is here...");
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ err = sys_mount((char *)dev_page, (char *)dir_page,
+ (char *)type_page, new_flags,
+ (void *)data_page);
+ set_fs(old_fs);
+
+ if(data_page)
+ free_page(data_page);
+ dir_out:
+ if(dir_page)
+ free_page(dir_page);
+ dev_out:
+ if(dev_page)
+ free_page(dev_page);
+ out:
+ if(type_page)
+ free_page(type_page);
+ return err;
+ }
+}
+
+struct sysinfo32 {
+ s32 uptime;
+ u32 loads[3];
+ u32 totalram;
+ u32 freeram;
+ u32 sharedram;
+ u32 bufferram;
+ u32 totalswap;
+ u32 freeswap;
+ unsigned short procs;
+ char _f[22];
+};
+
+extern asmlinkage int sys_sysinfo(struct sysinfo *info);
+
+asmlinkage int
+sys32_sysinfo(struct sysinfo32 *info)
+{
+ struct sysinfo s;
+ int ret, err;
+ mm_segment_t old_fs = get_fs ();
+
+ set_fs (KERNEL_DS);
+ ret = sys_sysinfo(&s);
+ set_fs (old_fs);
+ err = put_user (s.uptime, &info->uptime);
+ err |= __put_user (s.loads[0], &info->loads[0]);
+ err |= __put_user (s.loads[1], &info->loads[1]);
+ err |= __put_user (s.loads[2], &info->loads[2]);
+ err |= __put_user (s.totalram, &info->totalram);
+ err |= __put_user (s.freeram, &info->freeram);
+ err |= __put_user (s.sharedram, &info->sharedram);
+ err |= __put_user (s.bufferram, &info->bufferram);
+ err |= __put_user (s.totalswap, &info->totalswap);
+ err |= __put_user (s.freeswap, &info->freeswap);
+ err |= __put_user (s.procs, &info->procs);
+ if (err)
+ return -EFAULT;
+ return ret;
+}
+
+extern asmlinkage int sys_sched_rr_get_interval(pid_t pid,
+ struct timespec *interval);
+
+asmlinkage int
+sys32_sched_rr_get_interval(__kernel_pid_t32 pid, struct timespec32 *interval)
+{
+ struct timespec t;
+ int ret;
+ mm_segment_t old_fs = get_fs ();
+
+ set_fs (KERNEL_DS);
+ ret = sys_sched_rr_get_interval(pid, &t);
+ set_fs (old_fs);
+ if (put_user (t.tv_sec, &interval->tv_sec) ||
+ __put_user (t.tv_nsec, &interval->tv_nsec))
+ return -EFAULT;
+ return ret;
+}
+
+extern asmlinkage int sys_sigprocmask(int how, old_sigset_t *set,
+ old_sigset_t *oset);
+
+asmlinkage int
+sys32_sigprocmask(int how, old_sigset_t32 *set, old_sigset_t32 *oset)
+{
+ old_sigset_t s;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ if (set && get_user (s, set)) return -EFAULT;
+ set_fs (KERNEL_DS);
+ ret = sys_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL);
+ set_fs (old_fs);
+ if (ret) return ret;
+ if (oset && put_user (s, oset)) return -EFAULT;
+ return 0;
+}
+
+extern asmlinkage int sys_sigpending(old_sigset_t *set);
+
+asmlinkage int
+sys32_sigpending(old_sigset_t32 *set)
+{
+ old_sigset_t s;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_sigpending(&s);
+ set_fs (old_fs);
+ if (put_user (s, set)) return -EFAULT;
+ return ret;
+}
+
+extern asmlinkage int sys_rt_sigpending(sigset_t *set, size_t sigsetsize);
+
+asmlinkage int
+sys32_rt_sigpending(sigset_t32 *set, __kernel_size_t32 sigsetsize)
+{
+ sigset_t s;
+ sigset_t32 s32;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_rt_sigpending(&s, sigsetsize);
+ set_fs (old_fs);
+ if (!ret) {
+ switch (_NSIG_WORDS) {
+ case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
+ case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
+ case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
+ case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
+ }
+ if (copy_to_user (set, &s32, sizeof(sigset_t32)))
+ return -EFAULT;
+ }
+ return ret;
+}
+
+siginfo_t32 *
+siginfo64to32(siginfo_t32 *d, siginfo_t *s)
+{
+ memset (&d, 0, sizeof(siginfo_t32));
+ d->si_signo = s->si_signo;
+ d->si_errno = s->si_errno;
+ d->si_code = s->si_code;
+ if (s->si_signo >= SIGRTMIN) {
+ d->si_pid = s->si_pid;
+ d->si_uid = s->si_uid;
+ /* XXX: Ouch, how to find this out??? */
+ d->si_int = s->si_int;
+ } else switch (s->si_signo) {
+ /* XXX: What about POSIX1.b timers */
+ case SIGCHLD:
+ d->si_pid = s->si_pid;
+ d->si_status = s->si_status;
+ d->si_utime = s->si_utime;
+ d->si_stime = s->si_stime;
+ break;
+ case SIGSEGV:
+ case SIGBUS:
+ case SIGFPE:
+ case SIGILL:
+ d->si_addr = (long)(s->si_addr);
+ /* XXX: Do we need to translate this from ia64 to ia32 traps? */
+ d->si_trapno = s->si_trapno;
+ break;
+ case SIGPOLL:
+ d->si_band = s->si_band;
+ d->si_fd = s->si_fd;
+ break;
+ default:
+ d->si_pid = s->si_pid;
+ d->si_uid = s->si_uid;
+ break;
+ }
+ return d;
+}
+
+siginfo_t *
+siginfo32to64(siginfo_t *d, siginfo_t32 *s)
+{
+ d->si_signo = s->si_signo;
+ d->si_errno = s->si_errno;
+ d->si_code = s->si_code;
+ if (s->si_signo >= SIGRTMIN) {
+ d->si_pid = s->si_pid;
+ d->si_uid = s->si_uid;
+ /* XXX: Ouch, how to find this out??? */
+ d->si_int = s->si_int;
+ } else switch (s->si_signo) {
+ /* XXX: What about POSIX1.b timers */
+ case SIGCHLD:
+ d->si_pid = s->si_pid;
+ d->si_status = s->si_status;
+ d->si_utime = s->si_utime;
+ d->si_stime = s->si_stime;
+ break;
+ case SIGSEGV:
+ case SIGBUS:
+ case SIGFPE:
+ case SIGILL:
+ d->si_addr = (void *)A(s->si_addr);
+ /* XXX: Do we need to translate this from ia32 to ia64 traps? */
+ d->si_trapno = s->si_trapno;
+ break;
+ case SIGPOLL:
+ d->si_band = s->si_band;
+ d->si_fd = s->si_fd;
+ break;
+ default:
+ d->si_pid = s->si_pid;
+ d->si_uid = s->si_uid;
+ break;
+ }
+ return d;
+}
+
+extern asmlinkage int
+sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
+ const struct timespec *uts, size_t sigsetsize);
+
+asmlinkage int
+sys32_rt_sigtimedwait(sigset_t32 *uthese, siginfo_t32 *uinfo,
+ struct timespec32 *uts, __kernel_size_t32 sigsetsize)
+{
+ sigset_t s;
+ sigset_t32 s32;
+ struct timespec t;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+ siginfo_t info;
+ siginfo_t32 info32;
+
+ if (copy_from_user (&s32, uthese, sizeof(sigset_t32)))
+ return -EFAULT;
+ switch (_NSIG_WORDS) {
+ case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
+ case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
+ case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
+ case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+ }
+ if (uts) {
+ ret = get_user (t.tv_sec, &uts->tv_sec);
+ ret |= __get_user (t.tv_nsec, &uts->tv_nsec);
+ if (ret)
+ return -EFAULT;
+ }
+ set_fs (KERNEL_DS);
+ ret = sys_rt_sigtimedwait(&s, &info, &t, sigsetsize);
+ set_fs (old_fs);
+ if (ret >= 0 && uinfo) {
+ if (copy_to_user (uinfo, siginfo64to32(&info32, &info),
+ sizeof(siginfo_t32)))
+ return -EFAULT;
+ }
+ return ret;
+}
+
+extern asmlinkage int
+sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo);
+
+asmlinkage int
+sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo)
+{
+ siginfo_t info;
+ siginfo_t32 info32;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ if (copy_from_user (&info32, uinfo, sizeof(siginfo_t32)))
+ return -EFAULT;
+ /* XXX: Is this correct? */
+ siginfo32to64(&info, &info32);
+ set_fs (KERNEL_DS);
+ ret = sys_rt_sigqueueinfo(pid, sig, &info);
+ set_fs (old_fs);
+ return ret;
+}
+
+extern asmlinkage int sys_setreuid(uid_t ruid, uid_t euid);
+
+asmlinkage int sys32_setreuid(__kernel_uid_t32 ruid, __kernel_uid_t32 euid)
+{
+ uid_t sruid, seuid;
+
+ sruid = (ruid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)ruid);
+ seuid = (euid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)euid);
+ return sys_setreuid(sruid, seuid);
+}
+
+extern asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid);
+
+asmlinkage int
+sys32_setresuid(__kernel_uid_t32 ruid, __kernel_uid_t32 euid,
+ __kernel_uid_t32 suid)
+{
+ uid_t sruid, seuid, ssuid;
+
+ sruid = (ruid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)ruid);
+ seuid = (euid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)euid);
+ ssuid = (suid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)suid);
+ return sys_setresuid(sruid, seuid, ssuid);
+}
+
+extern asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid);
+
+asmlinkage int
+sys32_getresuid(__kernel_uid_t32 *ruid, __kernel_uid_t32 *euid,
+ __kernel_uid_t32 *suid)
+{
+ uid_t a, b, c;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_getresuid(&a, &b, &c);
+ set_fs (old_fs);
+ if (put_user (a, ruid) || put_user (b, euid) || put_user (c, suid))
+ return -EFAULT;
+ return ret;
+}
+
+extern asmlinkage int sys_setregid(gid_t rgid, gid_t egid);
+
+asmlinkage int
+sys32_setregid(__kernel_gid_t32 rgid, __kernel_gid_t32 egid)
+{
+ gid_t srgid, segid;
+
+ srgid = (rgid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)rgid);
+ segid = (egid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)egid);
+ return sys_setregid(srgid, segid);
+}
+
+extern asmlinkage int sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid);
+
+asmlinkage int
+sys32_setresgid(__kernel_gid_t32 rgid, __kernel_gid_t32 egid,
+ __kernel_gid_t32 sgid)
+{
+ gid_t srgid, segid, ssgid;
+
+ srgid = (rgid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)rgid);
+ segid = (egid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)egid);
+ ssgid = (sgid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)sgid);
+ return sys_setresgid(srgid, segid, ssgid);
+}
+
+extern asmlinkage int sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid);
+
+asmlinkage int
+sys32_getresgid(__kernel_gid_t32 *rgid, __kernel_gid_t32 *egid,
+ __kernel_gid_t32 *sgid)
+{
+ gid_t a, b, c;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_getresgid(&a, &b, &c);
+ set_fs (old_fs);
+ if (!ret) {
+ ret = put_user (a, rgid);
+ ret |= put_user (b, egid);
+ ret |= put_user (c, sgid);
+ }
+ return ret;
+}
+
+struct tms32 {
+ __kernel_clock_t32 tms_utime;
+ __kernel_clock_t32 tms_stime;
+ __kernel_clock_t32 tms_cutime;
+ __kernel_clock_t32 tms_cstime;
+};
+
+extern asmlinkage long sys_times(struct tms * tbuf);
+
+asmlinkage long
+sys32_times(struct tms32 *tbuf)
+{
+ struct tms t;
+ long ret;
+ mm_segment_t old_fs = get_fs ();
+ int err;
+
+ set_fs (KERNEL_DS);
+ ret = sys_times(tbuf ? &t : NULL);
+ set_fs (old_fs);
+ if (tbuf) {
+ err = put_user (t.tms_utime, &tbuf->tms_utime);
+ err |= __put_user (t.tms_stime, &tbuf->tms_stime);
+ err |= __put_user (t.tms_cutime, &tbuf->tms_cutime);
+ err |= __put_user (t.tms_cstime, &tbuf->tms_cstime);
+ if (err)
+ ret = -EFAULT;
+ }
+ return ret;
+}
+
+extern asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist);
+
+asmlinkage int
+sys32_getgroups(int gidsetsize, __kernel_gid_t32 *grouplist)
+{
+ gid_t gl[NGROUPS];
+ int ret, i;
+ mm_segment_t old_fs = get_fs ();
+
+ set_fs (KERNEL_DS);
+ ret = sys_getgroups(gidsetsize, gl);
+ set_fs (old_fs);
+ if (gidsetsize && ret > 0 && ret <= NGROUPS)
+ for (i = 0; i < ret; i++, grouplist++)
+ if (__put_user (gl[i], grouplist))
+ return -EFAULT;
+ return ret;
+}
+
+extern asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist);
+
+asmlinkage int
+sys32_setgroups(int gidsetsize, __kernel_gid_t32 *grouplist)
+{
+ gid_t gl[NGROUPS];
+ int ret, i;
+ mm_segment_t old_fs = get_fs ();
+
+ if ((unsigned) gidsetsize > NGROUPS)
+ return -EINVAL;
+ for (i = 0; i < gidsetsize; i++, grouplist++)
+ if (__get_user (gl[i], grouplist))
+ return -EFAULT;
+ set_fs (KERNEL_DS);
+ ret = sys_setgroups(gidsetsize, gl);
+ set_fs (old_fs);
+ return ret;
+}
+
+extern asmlinkage int
+sys_getrusage(int who, struct rusage *ru);
+
+asmlinkage int
+sys32_getrusage(int who, struct rusage32 *ru)
+{
+ struct rusage r;
+ int ret;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs (KERNEL_DS);
+ ret = sys_getrusage(who, &r);
+ set_fs (old_fs);
+ if (put_rusage (ru, &r)) return -EFAULT;
+ return ret;
+}
+
+
+/* XXX These as well... */
+extern __inline__ struct socket *
+socki_lookup(struct inode *inode)
+{
+ return &inode->u.socket_i;
+}
+
+extern __inline__ struct socket *
+sockfd_lookup(int fd, int *err)
+{
+ struct file *file;
+ struct inode *inode;
+
+ if (!(file = fget(fd)))
+ {
+ *err = -EBADF;
+ return NULL;
+ }
+
+ inode = file->f_dentry->d_inode;
+ if (!inode || !inode->i_sock || !socki_lookup(inode))
+ {
+ *err = -ENOTSOCK;
+ fput(file);
+ return NULL;
+ }
+
+ return socki_lookup(inode);
+}
+
+struct msghdr32 {
+ u32 msg_name;
+ int msg_namelen;
+ u32 msg_iov;
+ __kernel_size_t32 msg_iovlen;
+ u32 msg_control;
+ __kernel_size_t32 msg_controllen;
+ unsigned msg_flags;
+};
+
+struct cmsghdr32 {
+ __kernel_size_t32 cmsg_len;
+ int cmsg_level;
+ int cmsg_type;
+};
+
+/* Bleech... */
+#define __CMSG32_NXTHDR(ctl, len, cmsg, cmsglen) \
+ __cmsg32_nxthdr((ctl),(len),(cmsg),(cmsglen))
+#define CMSG32_NXTHDR(mhdr, cmsg, cmsglen) \
+ cmsg32_nxthdr((mhdr), (cmsg), (cmsglen))
+
+#define CMSG32_ALIGN(len) ( ((len)+sizeof(int)-1) & ~(sizeof(int)-1) )
+
+#define CMSG32_DATA(cmsg) \
+ ((void *)((char *)(cmsg) + CMSG32_ALIGN(sizeof(struct cmsghdr32))))
+#define CMSG32_SPACE(len) \
+ (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + CMSG32_ALIGN(len))
+#define CMSG32_LEN(len) (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + (len))
+
+#define __CMSG32_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr32) ? \
+ (struct cmsghdr32 *)(ctl) : \
+ (struct cmsghdr32 *)NULL)
+#define CMSG32_FIRSTHDR(msg) \
+ __CMSG32_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen)
+
+__inline__ struct cmsghdr32 *
+__cmsg32_nxthdr(void *__ctl, __kernel_size_t __size,
+ struct cmsghdr32 *__cmsg, int __cmsg_len)
+{
+ struct cmsghdr32 * __ptr;
+
+ __ptr = (struct cmsghdr32 *)(((unsigned char *) __cmsg) +
+ CMSG32_ALIGN(__cmsg_len));
+ if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size)
+ return NULL;
+
+ return __ptr;
+}
+
+__inline__ struct cmsghdr32 *
+cmsg32_nxthdr (struct msghdr *__msg, struct cmsghdr32 *__cmsg, int __cmsg_len)
+{
+ return __cmsg32_nxthdr(__msg->msg_control, __msg->msg_controllen,
+ __cmsg, __cmsg_len);
+}
+
+static inline int
+iov_from_user32_to_kern(struct iovec *kiov, struct iovec32 *uiov32, int niov)
+{
+ int tot_len = 0;
+
+ while(niov > 0) {
+ u32 len, buf;
+
+ if(get_user(len, &uiov32->iov_len) ||
+ get_user(buf, &uiov32->iov_base)) {
+ tot_len = -EFAULT;
+ break;
+ }
+ tot_len += len;
+ kiov->iov_base = (void *)A(buf);
+ kiov->iov_len = (__kernel_size_t) len;
+ uiov32++;
+ kiov++;
+ niov--;
+ }
+ return tot_len;
+}
+
+static inline int
+msghdr_from_user32_to_kern(struct msghdr *kmsg, struct msghdr32 *umsg)
+{
+ u32 tmp1, tmp2, tmp3;
+ int err;
+
+ err = get_user(tmp1, &umsg->msg_name);
+ err |= __get_user(tmp2, &umsg->msg_iov);
+ err |= __get_user(tmp3, &umsg->msg_control);
+ if (err)
+ return -EFAULT;
+
+ kmsg->msg_name = (void *)A(tmp1);
+ kmsg->msg_iov = (struct iovec *)A(tmp2);
+ kmsg->msg_control = (void *)A(tmp3);
+
+ err = get_user(kmsg->msg_namelen, &umsg->msg_namelen);
+ err |= get_user(kmsg->msg_iovlen, &umsg->msg_iovlen);
+ err |= get_user(kmsg->msg_controllen, &umsg->msg_controllen);
+ err |= get_user(kmsg->msg_flags, &umsg->msg_flags);
+
+ return err;
+}
+
+/* I've named the args so it is easy to tell whose space the pointers are in. */
+static int
+verify_iovec32(struct msghdr *kern_msg, struct iovec *kern_iov,
+ char *kern_address, int mode)
+{
+ int tot_len;
+
+ if(kern_msg->msg_namelen) {
+ if(mode==VERIFY_READ) {
+ int err = move_addr_to_kernel(kern_msg->msg_name,
+ kern_msg->msg_namelen,
+ kern_address);
+ if(err < 0)
+ return err;
+ }
+ kern_msg->msg_name = kern_address;
+ } else
+ kern_msg->msg_name = NULL;
+
+ if(kern_msg->msg_iovlen > UIO_FASTIOV) {
+ kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec),
+ GFP_KERNEL);
+ if(!kern_iov)
+ return -ENOMEM;
+ }
+
+ tot_len = iov_from_user32_to_kern(kern_iov,
+ (struct iovec32 *)kern_msg->msg_iov,
+ kern_msg->msg_iovlen);
+ if(tot_len >= 0)
+ kern_msg->msg_iov = kern_iov;
+ else if(kern_msg->msg_iovlen > UIO_FASTIOV)
+ kfree(kern_iov);
+
+ return tot_len;
+}
+
+/* There is a lot of hair here because the alignment rules (and
+ * thus placement) of cmsg headers and length are different for
+ * 32-bit apps. -DaveM
+ */
+static int
+cmsghdr_from_user32_to_kern(struct msghdr *kmsg, unsigned char *stackbuf,
+ int stackbuf_size)
+{
+ struct cmsghdr32 *ucmsg;
+ struct cmsghdr *kcmsg, *kcmsg_base;
+ __kernel_size_t32 ucmlen;
+ __kernel_size_t kcmlen, tmp;
+
+ kcmlen = 0;
+ kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
+ ucmsg = CMSG32_FIRSTHDR(kmsg);
+ while(ucmsg != NULL) {
+ if(get_user(ucmlen, &ucmsg->cmsg_len))
+ return -EFAULT;
+
+ /* Catch bogons. */
+ if(CMSG32_ALIGN(ucmlen) <
+ CMSG32_ALIGN(sizeof(struct cmsghdr32)))
+ return -EINVAL;
+ if((unsigned long)(((char *)ucmsg - (char *)kmsg->msg_control)
+ + ucmlen) > kmsg->msg_controllen)
+ return -EINVAL;
+
+ tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) +
+ CMSG_ALIGN(sizeof(struct cmsghdr)));
+ kcmlen += tmp;
+ ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen);
+ }
+ if(kcmlen == 0)
+ return -EINVAL;
+
+ /* The kcmlen holds the 64-bit version of the control length.
+ * It may not be modified as we do not stick it into the kmsg
+ * until we have successfully copied over all of the data
+ * from the user.
+ */
+ if(kcmlen > stackbuf_size)
+ kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL);
+ if(kcmsg == NULL)
+ return -ENOBUFS;
+
+ /* Now copy them over neatly. */
+ memset(kcmsg, 0, kcmlen);
+ ucmsg = CMSG32_FIRSTHDR(kmsg);
+ while(ucmsg != NULL) {
+ __get_user(ucmlen, &ucmsg->cmsg_len);
+ tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) +
+ CMSG_ALIGN(sizeof(struct cmsghdr)));
+ kcmsg->cmsg_len = tmp;
+ __get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level);
+ __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type);
+
+ /* Copy over the data. */
+ if(copy_from_user(CMSG_DATA(kcmsg),
+ CMSG32_DATA(ucmsg),
+ (ucmlen - CMSG32_ALIGN(sizeof(*ucmsg)))))
+ goto out_free_efault;
+
+ /* Advance. */
+ kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp));
+ ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen);
+ }
+
+ /* Ok, looks like we made it. Hook it up and return success. */
+ kmsg->msg_control = kcmsg_base;
+ kmsg->msg_controllen = kcmlen;
+ return 0;
+
+out_free_efault:
+ if(kcmsg_base != (struct cmsghdr *)stackbuf)
+ kfree(kcmsg_base);
+ return -EFAULT;
+}
+
+static void
+put_cmsg32(struct msghdr *kmsg, int level, int type, int len, void *data)
+{
+ struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control;
+ struct cmsghdr32 cmhdr;
+ int cmlen = CMSG32_LEN(len);
+
+ if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
+ kmsg->msg_flags |= MSG_CTRUNC;
+ return;
+ }
+
+ if(kmsg->msg_controllen < cmlen) {
+ kmsg->msg_flags |= MSG_CTRUNC;
+ cmlen = kmsg->msg_controllen;
+ }
+ cmhdr.cmsg_level = level;
+ cmhdr.cmsg_type = type;
+ cmhdr.cmsg_len = cmlen;
+
+ if(copy_to_user(cm, &cmhdr, sizeof cmhdr))
+ return;
+ if(copy_to_user(CMSG32_DATA(cm), data,
+ cmlen - sizeof(struct cmsghdr32)))
+ return;
+ cmlen = CMSG32_SPACE(len);
+ kmsg->msg_control += cmlen;
+ kmsg->msg_controllen -= cmlen;
+}
+
+static void scm_detach_fds32(struct msghdr *kmsg, struct scm_cookie *scm)
+{
+ struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control;
+ int fdmax = (kmsg->msg_controllen - sizeof(struct cmsghdr32))
+ / sizeof(int);
+ int fdnum = scm->fp->count;
+ struct file **fp = scm->fp->fp;
+ int *cmfptr;
+ int err = 0, i;
+
+ if (fdnum < fdmax)
+ fdmax = fdnum;
+
+ for (i = 0, cmfptr = (int *) CMSG32_DATA(cm);
+ i < fdmax;
+ i++, cmfptr++) {
+ int new_fd;
+ err = get_unused_fd();
+ if (err < 0)
+ break;
+ new_fd = err;
+ err = put_user(new_fd, cmfptr);
+ if (err) {
+ put_unused_fd(new_fd);
+ break;
+ }
+ /* Bump the usage count and install the file. */
+ fp[i]->f_count++;
+ current->files->fd[new_fd] = fp[i];
+ }
+
+ if (i > 0) {
+ int cmlen = CMSG32_LEN(i * sizeof(int));
+ if (!err)
+ err = put_user(SOL_SOCKET, &cm->cmsg_level);
+ if (!err)
+ err = put_user(SCM_RIGHTS, &cm->cmsg_type);
+ if (!err)
+ err = put_user(cmlen, &cm->cmsg_len);
+ if (!err) {
+ cmlen = CMSG32_SPACE(i * sizeof(int));
+ kmsg->msg_control += cmlen;
+ kmsg->msg_controllen -= cmlen;
+ }
+ }
+ if (i < fdnum)
+ kmsg->msg_flags |= MSG_CTRUNC;
+
+ /*
+ * All of the files that fit in the message have had their
+ * usage counts incremented, so we just free the list.
+ */
+ __scm_destroy(scm);
+}
+
+/* In these cases we (currently) can just copy to data over verbatim
+ * because all CMSGs created by the kernel have well defined types which
+ * have the same layout in both the 32-bit and 64-bit API. One must add
+ * some special cased conversions here if we start sending control messages
+ * with incompatible types.
+ *
+ * SCM_RIGHTS and SCM_CREDENTIALS are done by hand in recvmsg32 right after
+ * we do our work. The remaining cases are:
+ *
+ * SOL_IP IP_PKTINFO struct in_pktinfo 32-bit clean
+ * IP_TTL int 32-bit clean
+ * IP_TOS __u8 32-bit clean
+ * IP_RECVOPTS variable length 32-bit clean
+ * IP_RETOPTS variable length 32-bit clean
+ * (these last two are clean because the types are defined
+ * by the IPv4 protocol)
+ * IP_RECVERR struct sock_extended_err +
+ * struct sockaddr_in 32-bit clean
+ * SOL_IPV6 IPV6_RECVERR struct sock_extended_err +
+ * struct sockaddr_in6 32-bit clean
+ * IPV6_PKTINFO struct in6_pktinfo 32-bit clean
+ * IPV6_HOPLIMIT int 32-bit clean
+ * IPV6_FLOWINFO u32 32-bit clean
+ * IPV6_HOPOPTS ipv6 hop exthdr 32-bit clean
+ * IPV6_DSTOPTS ipv6 dst exthdr(s) 32-bit clean
+ * IPV6_RTHDR ipv6 routing exthdr 32-bit clean
+ * IPV6_AUTHHDR ipv6 auth exthdr 32-bit clean
+ */
+static void
+cmsg32_recvmsg_fixup(struct msghdr *kmsg, unsigned long orig_cmsg_uptr)
+{
+ unsigned char *workbuf, *wp;
+ unsigned long bufsz, space_avail;
+ struct cmsghdr *ucmsg;
+
+ bufsz = ((unsigned long)kmsg->msg_control) - orig_cmsg_uptr;
+ space_avail = kmsg->msg_controllen + bufsz;
+ wp = workbuf = kmalloc(bufsz, GFP_KERNEL);
+ if(workbuf == NULL)
+ goto fail;
+
+ /* To make this more sane we assume the kernel sends back properly
+ * formatted control messages. Because of how the kernel will truncate
+ * the cmsg_len for MSG_TRUNC cases, we need not check that case either.
+ */
+ ucmsg = (struct cmsghdr *) orig_cmsg_uptr;
+ while(((unsigned long)ucmsg) < ((unsigned long)kmsg->msg_control)) {
+ struct cmsghdr32 *kcmsg32 = (struct cmsghdr32 *) wp;
+ int clen64, clen32;
+
+ /* UCMSG is the 64-bit format CMSG entry in user-space.
+ * KCMSG32 is within the kernel space temporary buffer
+ * we use to convert into a 32-bit style CMSG.
+ */
+ __get_user(kcmsg32->cmsg_len, &ucmsg->cmsg_len);
+ __get_user(kcmsg32->cmsg_level, &ucmsg->cmsg_level);
+ __get_user(kcmsg32->cmsg_type, &ucmsg->cmsg_type);
+
+ clen64 = kcmsg32->cmsg_len;
+ copy_from_user(CMSG32_DATA(kcmsg32), CMSG_DATA(ucmsg),
+ clen64 - CMSG_ALIGN(sizeof(*ucmsg)));
+ clen32 = ((clen64 - CMSG_ALIGN(sizeof(*ucmsg))) +
+ CMSG32_ALIGN(sizeof(struct cmsghdr32)));
+ kcmsg32->cmsg_len = clen32;
+
+ ucmsg = (struct cmsghdr *) (((char *)ucmsg) +
+ CMSG_ALIGN(clen64));
+ wp = (((char *)kcmsg32) + CMSG32_ALIGN(clen32));
+ }
+
+ /* Copy back fixed up data, and adjust pointers. */
+ bufsz = (wp - workbuf);
+ copy_to_user((void *)orig_cmsg_uptr, workbuf, bufsz);
+
+ kmsg->msg_control = (struct cmsghdr *)
+ (((char *)orig_cmsg_uptr) + bufsz);
+ kmsg->msg_controllen = space_avail - bufsz;
+
+ kfree(workbuf);
+ return;
+
+fail:
+ /* If we leave the 64-bit format CMSG chunks in there,
+ * the application could get confused and crash. So to
+ * ensure greater recovery, we report no CMSGs.
+ */
+ kmsg->msg_controllen += bufsz;
+ kmsg->msg_control = (void *) orig_cmsg_uptr;
+}
+
+asmlinkage int
+sys32_sendmsg(int fd, struct msghdr32 *user_msg, unsigned user_flags)
+{
+ struct socket *sock;
+ char address[MAX_SOCK_ADDR];
+ struct iovec iov[UIO_FASTIOV];
+ unsigned char ctl[sizeof(struct cmsghdr) + 20];
+ unsigned char *ctl_buf = ctl;
+ struct msghdr kern_msg;
+ int err, total_len;
+
+ if(msghdr_from_user32_to_kern(&kern_msg, user_msg))
+ return -EFAULT;
+ if(kern_msg.msg_iovlen > UIO_MAXIOV)
+ return -EINVAL;
+ err = verify_iovec32(&kern_msg, iov, address, VERIFY_READ);
+ if (err < 0)
+ goto out;
+ total_len = err;
+
+ if(kern_msg.msg_controllen) {
+ err = cmsghdr_from_user32_to_kern(&kern_msg, ctl, sizeof(ctl));
+ if(err)
+ goto out_freeiov;
+ ctl_buf = kern_msg.msg_control;
+ }
+ kern_msg.msg_flags = user_flags;
+
+ lock_kernel();
+ sock = sockfd_lookup(fd, &err);
+ if (sock != NULL) {
+ if (sock->file->f_flags & O_NONBLOCK)
+ kern_msg.msg_flags |= MSG_DONTWAIT;
+ err = sock_sendmsg(sock, &kern_msg, total_len);
+ sockfd_put(sock);
+ }
+ unlock_kernel();
+
+ /* N.B. Use kfree here, as kern_msg.msg_controllen might change? */
+ if(ctl_buf != ctl)
+ kfree(ctl_buf);
+out_freeiov:
+ if(kern_msg.msg_iov != iov)
+ kfree(kern_msg.msg_iov);
+out:
+ return err;
+}
+
+asmlinkage int
+sys32_recvmsg(int fd, struct msghdr32 *user_msg, unsigned int user_flags)
+{
+ struct iovec iovstack[UIO_FASTIOV];
+ struct msghdr kern_msg;
+ char addr[MAX_SOCK_ADDR];
+ struct socket *sock;
+ struct iovec *iov = iovstack;
+ struct sockaddr *uaddr;
+ int *uaddr_len;
+ unsigned long cmsg_ptr;
+ int err, total_len, len = 0;
+
+ if(msghdr_from_user32_to_kern(&kern_msg, user_msg))
+ return -EFAULT;
+ if(kern_msg.msg_iovlen > UIO_MAXIOV)
+ return -EINVAL;
+
+ uaddr = kern_msg.msg_name;
+ uaddr_len = &user_msg->msg_namelen;
+ err = verify_iovec32(&kern_msg, iov, addr, VERIFY_WRITE);
+ if (err < 0)
+ goto out;
+ total_len = err;
+
+ cmsg_ptr = (unsigned long) kern_msg.msg_control;
+ kern_msg.msg_flags = 0;
+
+ lock_kernel();
+ sock = sockfd_lookup(fd, &err);
+ if (sock != NULL) {
+ struct scm_cookie scm;
+
+ if (sock->file->f_flags & O_NONBLOCK)
+ user_flags |= MSG_DONTWAIT;
+ memset(&scm, 0, sizeof(scm));
+ err = sock->ops->recvmsg(sock, &kern_msg, total_len,
+ user_flags, &scm);
+ if(err >= 0) {
+ len = err;
+ if(!kern_msg.msg_control) {
+ if(sock->passcred || scm.fp)
+ kern_msg.msg_flags |= MSG_CTRUNC;
+ if(scm.fp)
+ __scm_destroy(&scm);
+ } else {
+ /* If recvmsg processing itself placed some
+ * control messages into user space, it's is
+ * using 64-bit CMSG processing, so we need
+ * to fix it up before we tack on more stuff.
+ */
+ if((unsigned long) kern_msg.msg_control
+ != cmsg_ptr)
+ cmsg32_recvmsg_fixup(&kern_msg,
+ cmsg_ptr);
+
+ /* Wheee... */
+ if(sock->passcred)
+ put_cmsg32(&kern_msg,
+ SOL_SOCKET, SCM_CREDENTIALS,
+ sizeof(scm.creds),
+ &scm.creds);
+ if(scm.fp != NULL)
+ scm_detach_fds32(&kern_msg, &scm);
+ }
+ }
+ sockfd_put(sock);
+ }
+ unlock_kernel();
+
+ if(uaddr != NULL && err >= 0)
+ err = move_addr_to_user(addr, kern_msg.msg_namelen, uaddr,
+ uaddr_len);
+ if(cmsg_ptr != 0 && err >= 0) {
+ unsigned long ucmsg_ptr = ((unsigned long)kern_msg.msg_control);
+ __kernel_size_t32 uclen = (__kernel_size_t32) (ucmsg_ptr
+ - cmsg_ptr);
+ err |= __put_user(uclen, &user_msg->msg_controllen);
+ }
+ if(err >= 0)
+ err = __put_user(kern_msg.msg_flags, &user_msg->msg_flags);
+ if(kern_msg.msg_iov != iov)
+ kfree(kern_msg.msg_iov);
+out:
+ if(err < 0)
+ return err;
+ return len;
+}
+
+extern void check_pending(int signum);
+
+asmlinkage int
+sys32_sigaction (int sig, struct old_sigaction32 *act,
+ struct old_sigaction32 *oact)
+{
+ struct k_sigaction new_ka, old_ka;
+ int ret;
+
+ if(sig < 0) {
+ current->tss.new_signal = 1;
+ sig = -sig;
+ }
+
+ if (act) {
+ old_sigset_t32 mask;
+
+ ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler);
+ ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+ ret |= __get_user(mask, &act->sa_mask);
+ if (ret)
+ return ret;
+ siginitset(&new_ka.sa.sa_mask, mask);
+ }
+
+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+ if (!ret && oact) {
+ ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
+ ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+ ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_MODULES
+
+extern asmlinkage unsigned long sys_create_module(const char *name_user,
+ size_t size);
+
+asmlinkage unsigned long
+sys32_create_module(const char *name_user, __kernel_size_t32 size)
+{
+ return sys_create_module(name_user, (size_t)size);
+}
+
+extern asmlinkage int sys_init_module(const char *name_user,
+ struct module *mod_user);
+
+/* Hey, when you're trying to init module, take time and prepare us a nice 64bit
+ * module structure, even if from 32bit modutils... Why to pollute kernel... :))
+ */
+asmlinkage int
+sys32_init_module(const char *name_user, struct module *mod_user)
+{
+ return sys_init_module(name_user, mod_user);
+}
+
+extern asmlinkage int sys_delete_module(const char *name_user);
+
+asmlinkage int
+sys32_delete_module(const char *name_user)
+{
+ return sys_delete_module(name_user);
+}
+
+struct module_info32 {
+ u32 addr;
+ u32 size;
+ u32 flags;
+ s32 usecount;
+};
+
+/* Query various bits about modules. */
+
+static inline long
+get_mod_name(const char *user_name, char **buf)
+{
+ unsigned long page;
+ long retval;
+
+ if ((unsigned long)user_name >= TASK_SIZE
+ && !segment_eq(get_fs (), KERNEL_DS))
+ return -EFAULT;
+
+ page = __get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ retval = strncpy_from_user((char *)page, user_name, PAGE_SIZE);
+ if (retval > 0) {
+ if (retval < PAGE_SIZE) {
+ *buf = (char *)page;
+ return retval;
+ }
+ retval = -ENAMETOOLONG;
+ } else if (!retval)
+ retval = -EINVAL;
+
+ free_page(page);
+ return retval;
+}
+
+static inline void
+put_mod_name(char *buf)
+{
+ free_page((unsigned long)buf);
+}
+
+static __inline__ struct module *
+find_module(const char *name)
+{
+ struct module *mod;
+
+ for (mod = module_list; mod ; mod = mod->next) {
+ if (mod->flags & MOD_DELETED)
+ continue;
+ if (!strcmp(mod->name, name))
+ break;
+ }
+
+ return mod;
+}
+
+static int
+qm_modules(char *buf, size_t bufsize, __kernel_size_t32 *ret)
+{
+ struct module *mod;
+ size_t nmod, space, len;
+
+ nmod = space = 0;
+
+ for (mod = module_list; mod->next != NULL; mod = mod->next, ++nmod) {
+ len = strlen(mod->name)+1;
+ if (len > bufsize)
+ goto calc_space_needed;
+ if (copy_to_user(buf, mod->name, len))
+ return -EFAULT;
+ buf += len;
+ bufsize -= len;
+ space += len;
+ }
+
+ if (put_user(nmod, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+calc_space_needed:
+ space += len;
+ while ((mod = mod->next)->next != NULL)
+ space += strlen(mod->name)+1;
+
+ if (put_user(space, ret))
+ return -EFAULT;
+ else
+ return -ENOSPC;
+}
+
+static int
+qm_deps(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret)
+{
+ size_t i, space, len;
+
+ if (mod->next == NULL)
+ return -EINVAL;
+ if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING)
+ if (put_user(0, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+ space = 0;
+ for (i = 0; i < mod->ndeps; ++i) {
+ const char *dep_name = mod->deps[i].dep->name;
+
+ len = strlen(dep_name)+1;
+ if (len > bufsize)
+ goto calc_space_needed;
+ if (copy_to_user(buf, dep_name, len))
+ return -EFAULT;
+ buf += len;
+ bufsize -= len;
+ space += len;
+ }
+
+ if (put_user(i, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+calc_space_needed:
+ space += len;
+ while (++i < mod->ndeps)
+ space += strlen(mod->deps[i].dep->name)+1;
+
+ if (put_user(space, ret))
+ return -EFAULT;
+ else
+ return -ENOSPC;
+}
+
+static int
+qm_refs(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret)
+{
+ size_t nrefs, space, len;
+ struct module_ref *ref;
+
+ if (mod->next == NULL)
+ return -EINVAL;
+ if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING)
+ if (put_user(0, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+ space = 0;
+ for (nrefs = 0, ref = mod->refs; ref ; ++nrefs, ref = ref->next_ref) {
+ const char *ref_name = ref->ref->name;
+
+ len = strlen(ref_name)+1;
+ if (len > bufsize)
+ goto calc_space_needed;
+ if (copy_to_user(buf, ref_name, len))
+ return -EFAULT;
+ buf += len;
+ bufsize -= len;
+ space += len;
+ }
+
+ if (put_user(nrefs, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+calc_space_needed:
+ space += len;
+ while ((ref = ref->next_ref) != NULL)
+ space += strlen(ref->ref->name)+1;
+
+ if (put_user(space, ret))
+ return -EFAULT;
+ else
+ return -ENOSPC;
+}
+
+static inline int
+qm_symbols(struct module *mod, char *buf, size_t bufsize,
+ __kernel_size_t32 *ret)
+{
+ size_t i, space, len;
+ struct module_symbol *s;
+ char *strings;
+ unsigned *vals;
+
+ if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING)
+ if (put_user(0, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+ space = mod->nsyms * 2*sizeof(u32);
+
+ i = len = 0;
+ s = mod->syms;
+
+ if (space > bufsize)
+ goto calc_space_needed;
+
+ if (!access_ok(VERIFY_WRITE, buf, space))
+ return -EFAULT;
+
+ bufsize -= space;
+ vals = (unsigned *)buf;
+ strings = buf+space;
+
+ for (; i < mod->nsyms ; ++i, ++s, vals += 2) {
+ len = strlen(s->name)+1;
+ if (len > bufsize)
+ goto calc_space_needed;
+
+ if (copy_to_user(strings, s->name, len)
+ || __put_user(s->value, vals+0)
+ || __put_user(space, vals+1))
+ return -EFAULT;
+
+ strings += len;
+ bufsize -= len;
+ space += len;
+ }
+
+ if (put_user(i, ret))
+ return -EFAULT;
+ else
+ return 0;
+
+calc_space_needed:
+ for (; i < mod->nsyms; ++i, ++s)
+ space += strlen(s->name)+1;
+
+ if (put_user(space, ret))
+ return -EFAULT;
+ else
+ return -ENOSPC;
+}
+
+static inline int
+qm_info(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret)
+{
+ int error = 0;
+
+ if (mod->next == NULL)
+ return -EINVAL;
+
+ if (sizeof(struct module_info32) <= bufsize) {
+ struct module_info32 info;
+ info.addr = (unsigned long)mod;
+ info.size = mod->size;
+ info.flags = mod->flags;
+ info.usecount =
+ ((mod_member_present(mod, can_unload)
+ && mod->can_unload)
+ ? -1 : atomic_read(&mod->uc.usecount));
+
+ if (copy_to_user(buf, &info, sizeof(struct module_info32)))
+ return -EFAULT;
+ } else
+ error = -ENOSPC;
+
+ if (put_user(sizeof(struct module_info32), ret))
+ return -EFAULT;
+
+ return error;
+}
+
+asmlinkage int
+sys32_query_module(char *name_user, int which, char *buf,
+ __kernel_size_t32 bufsize, u32 ret)
+{
+ struct module *mod;
+ int err;
+
+ lock_kernel();
+ if (name_user == 0) {
+ /* This finds "kernel_module" which is not exported. */
+ for(mod = module_list; mod->next != NULL; mod = mod->next)
+ ;
+ } else {
+ long namelen;
+ char *name;
+
+ if ((namelen = get_mod_name(name_user, &name)) < 0) {
+ err = namelen;
+ goto out;
+ }
+ err = -ENOENT;
+ if (namelen == 0) {
+ /* This finds "kernel_module" which is not exported. */
+ for(mod = module_list;
+ mod->next != NULL;
+ mod = mod->next) ;
+ } else if ((mod = find_module(name)) == NULL) {
+ put_mod_name(name);
+ goto out;
+ }
+ put_mod_name(name);
+ }
+
+ switch (which)
+ {
+ case 0:
+ err = 0;
+ break;
+ case QM_MODULES:
+ err = qm_modules(buf, bufsize, (__kernel_size_t32 *)AA(ret));
+ break;
+ case QM_DEPS:
+ err = qm_deps(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret));
+ break;
+ case QM_REFS:
+ err = qm_refs(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret));
+ break;
+ case QM_SYMBOLS:
+ err = qm_symbols(mod, buf, bufsize,
+ (__kernel_size_t32 *)AA(ret));
+ break;
+ case QM_INFO:
+ err = qm_info(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret));
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+out:
+ unlock_kernel();
+ return err;
+}
+
+struct kernel_sym32 {
+ u32 value;
+ char name[60];
+};
+
+extern asmlinkage int sys_get_kernel_syms(struct kernel_sym *table);
+
+asmlinkage int
+sys32_get_kernel_syms(struct kernel_sym32 *table)
+{
+ int len, i;
+ struct kernel_sym *tbl;
+ mm_segment_t old_fs;
+
+ len = sys_get_kernel_syms(NULL);
+ if (!table) return len;
+ tbl = kmalloc (len * sizeof (struct kernel_sym), GFP_KERNEL);
+ if (!tbl) return -ENOMEM;
+ old_fs = get_fs();
+ set_fs (KERNEL_DS);
+ sys_get_kernel_syms(tbl);
+ set_fs (old_fs);
+ for (i = 0; i < len; i++, table += sizeof (struct kernel_sym32)) {
+ if (put_user (tbl[i].value, &table->value) ||
+ copy_to_user (table->name, tbl[i].name, 60))
+ break;
+ }
+ kfree (tbl);
+ return i;
+}
+
+#else /* CONFIG_MODULES */
+
+asmlinkage unsigned long
+sys32_create_module(const char *name_user, size_t size)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int
+sys32_init_module(const char *name_user, struct module *mod_user)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int
+sys32_delete_module(const char *name_user)
+{
+ return -ENOSYS;
+}
+
+asmlinkage int
+sys32_query_module(const char *name_user, int which, char *buf, size_t bufsize,
+ size_t *ret)
+{
+ /* Let the program know about the new interface. Not that
+ it'll do them much good. */
+ if (which == 0)
+ return 0;
+
+ return -ENOSYS;
+}
+
+asmlinkage int
+sys32_get_kernel_syms(struct kernel_sym *table)
+{
+ return -ENOSYS;
+}
+
+#endif /* CONFIG_MODULES */
+
+/* Stuff for NFS server syscalls... */
+struct nfsctl_svc32 {
+ u16 svc32_port;
+ s32 svc32_nthreads;
+};
+
+struct nfsctl_client32 {
+ s8 cl32_ident[NFSCLNT_IDMAX+1];
+ s32 cl32_naddr;
+ struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX];
+ s32 cl32_fhkeytype;
+ s32 cl32_fhkeylen;
+ u8 cl32_fhkey[NFSCLNT_KEYMAX];
+};
+
+struct nfsctl_export32 {
+ s8 ex32_client[NFSCLNT_IDMAX+1];
+ s8 ex32_path[NFS_MAXPATHLEN+1];
+ __kernel_dev_t32 ex32_dev;
+ __kernel_ino_t32 ex32_ino;
+ s32 ex32_flags;
+ __kernel_uid_t32 ex32_anon_uid;
+ __kernel_gid_t32 ex32_anon_gid;
+};
+
+struct nfsctl_uidmap32 {
+ u32 ug32_ident; /* char * */
+ __kernel_uid_t32 ug32_uidbase;
+ s32 ug32_uidlen;
+ u32 ug32_udimap; /* uid_t * */
+ __kernel_uid_t32 ug32_gidbase;
+ s32 ug32_gidlen;
+ u32 ug32_gdimap; /* gid_t * */
+};
+
+struct nfsctl_fhparm32 {
+ struct sockaddr gf32_addr;
+ __kernel_dev_t32 gf32_dev;
+ __kernel_ino_t32 gf32_ino;
+ s32 gf32_version;
+};
+
+struct nfsctl_arg32 {
+ s32 ca32_version; /* safeguard */
+ union {
+ struct nfsctl_svc32 u32_svc;
+ struct nfsctl_client32 u32_client;
+ struct nfsctl_export32 u32_export;
+ struct nfsctl_uidmap32 u32_umap;
+ struct nfsctl_fhparm32 u32_getfh;
+ u32 u32_debug;
+ } u;
+#define ca32_svc u.u32_svc
+#define ca32_client u.u32_client
+#define ca32_export u.u32_export
+#define ca32_umap u.u32_umap
+#define ca32_getfh u.u32_getfh
+#define ca32_authd u.u32_authd
+#define ca32_debug u.u32_debug
+};
+
+union nfsctl_res32 {
+ struct knfs_fh cr32_getfh;
+ u32 cr32_debug;
+};
+
+static int
+nfs_svc32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32)
+{
+ int err;
+
+ err = __get_user(karg->ca_version, &arg32->ca32_version);
+ err |= __get_user(karg->ca_svc.svc_port, &arg32->ca32_svc.svc32_port);
+ err |= __get_user(karg->ca_svc.svc_nthreads,
+ &arg32->ca32_svc.svc32_nthreads);
+ return err;
+}
+
+static int
+nfs_clnt32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32)
+{
+ int err;
+
+ err = __get_user(karg->ca_version, &arg32->ca32_version);
+ err |= copy_from_user(&karg->ca_client.cl_ident[0],
+ &arg32->ca32_client.cl32_ident[0],
+ NFSCLNT_IDMAX);
+ err |= __get_user(karg->ca_client.cl_naddr,
+ &arg32->ca32_client.cl32_naddr);
+ err |= copy_from_user(&karg->ca_client.cl_addrlist[0],
+ &arg32->ca32_client.cl32_addrlist[0],
+ (sizeof(struct in_addr) * NFSCLNT_ADDRMAX));
+ err |= __get_user(karg->ca_client.cl_fhkeytype,
+ &arg32->ca32_client.cl32_fhkeytype);
+ err |= __get_user(karg->ca_client.cl_fhkeylen,
+ &arg32->ca32_client.cl32_fhkeylen);
+ err |= copy_from_user(&karg->ca_client.cl_fhkey[0],
+ &arg32->ca32_client.cl32_fhkey[0],
+ NFSCLNT_KEYMAX);
+ return err;
+}
+
+static int
+nfs_exp32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32)
+{
+ int err;
+
+ err = __get_user(karg->ca_version, &arg32->ca32_version);
+ err |= copy_from_user(&karg->ca_export.ex_client[0],
+ &arg32->ca32_export.ex32_client[0],
+ NFSCLNT_IDMAX);
+ err |= copy_from_user(&karg->ca_export.ex_path[0],
+ &arg32->ca32_export.ex32_path[0],
+ NFS_MAXPATHLEN);
+ err |= __get_user(karg->ca_export.ex_dev,
+ &arg32->ca32_export.ex32_dev);
+ err |= __get_user(karg->ca_export.ex_ino,
+ &arg32->ca32_export.ex32_ino);
+ err |= __get_user(karg->ca_export.ex_flags,
+ &arg32->ca32_export.ex32_flags);
+ err |= __get_user(karg->ca_export.ex_anon_uid,
+ &arg32->ca32_export.ex32_anon_uid);
+ err |= __get_user(karg->ca_export.ex_anon_gid,
+ &arg32->ca32_export.ex32_anon_gid);
+ return err;
+}
+
+static int
+nfs_uud32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32)
+{
+ u32 uaddr;
+ int i;
+ int err;
+
+ memset(karg, 0, sizeof(*karg));
+ if(__get_user(karg->ca_version, &arg32->ca32_version))
+ return -EFAULT;
+ karg->ca_umap.ug_ident = (char *)get_free_page(GFP_USER);
+ if(!karg->ca_umap.ug_ident)
+ return -ENOMEM;
+ err = __get_user(uaddr, &arg32->ca32_umap.ug32_ident);
+ if(strncpy_from_user(karg->ca_umap.ug_ident,
+ (char *)A(uaddr), PAGE_SIZE) <= 0)
+ return -EFAULT;
+ err |= __get_user(karg->ca_umap.ug_uidbase,
+ &arg32->ca32_umap.ug32_uidbase);
+ err |= __get_user(karg->ca_umap.ug_uidlen,
+ &arg32->ca32_umap.ug32_uidlen);
+ err |= __get_user(uaddr, &arg32->ca32_umap.ug32_udimap);
+ if (err)
+ return -EFAULT;
+ karg->ca_umap.ug_udimap = kmalloc((sizeof(uid_t) *
+ karg->ca_umap.ug_uidlen),
+ GFP_USER);
+ if(!karg->ca_umap.ug_udimap)
+ return -ENOMEM;
+ for(i = 0; i < karg->ca_umap.ug_uidlen; i++)
+ err |= __get_user(karg->ca_umap.ug_udimap[i],
+ &(((__kernel_uid_t32 *)A(uaddr))[i]));
+ err |= __get_user(karg->ca_umap.ug_gidbase,
+ &arg32->ca32_umap.ug32_gidbase);
+ err |= __get_user(karg->ca_umap.ug_uidlen,
+ &arg32->ca32_umap.ug32_gidlen);
+ err |= __get_user(uaddr, &arg32->ca32_umap.ug32_gdimap);
+ if (err)
+ return -EFAULT;
+ karg->ca_umap.ug_gdimap = kmalloc((sizeof(gid_t) *
+ karg->ca_umap.ug_uidlen),
+ GFP_USER);
+ if(!karg->ca_umap.ug_gdimap)
+ return -ENOMEM;
+ for(i = 0; i < karg->ca_umap.ug_gidlen; i++)
+ err |= __get_user(karg->ca_umap.ug_gdimap[i],
+ &(((__kernel_gid_t32 *)A(uaddr))[i]));
+
+ return err;
+}
+
+static int
+nfs_getfh32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32)
+{
+ int err;
+
+ err = __get_user(karg->ca_version, &arg32->ca32_version);
+ err |= copy_from_user(&karg->ca_getfh.gf_addr,
+ &arg32->ca32_getfh.gf32_addr,
+ (sizeof(struct sockaddr)));
+ err |= __get_user(karg->ca_getfh.gf_dev,
+ &arg32->ca32_getfh.gf32_dev);
+ err |= __get_user(karg->ca_getfh.gf_ino,
+ &arg32->ca32_getfh.gf32_ino);
+ err |= __get_user(karg->ca_getfh.gf_version,
+ &arg32->ca32_getfh.gf32_version);
+ return err;
+}
+
+static int
+nfs_getfh32_res_trans(union nfsctl_res *kres, union nfsctl_res32 *res32)
+{
+ int err;
+
+ err = copy_to_user(&res32->cr32_getfh,
+ &kres->cr_getfh,
+ sizeof(res32->cr32_getfh));
+ err |= __put_user(kres->cr_debug, &res32->cr32_debug);
+ return err;
+}
+
+extern asmlinkage int sys_nfsservctl(int cmd, void *arg, void *resp);
+
+int asmlinkage
+sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32)
+{
+ struct nfsctl_arg *karg = NULL;
+ union nfsctl_res *kres = NULL;
+ mm_segment_t oldfs;
+ int err;
+
+ karg = kmalloc(sizeof(*karg), GFP_USER);
+ if(!karg)
+ return -ENOMEM;
+ if(res32) {
+ kres = kmalloc(sizeof(*kres), GFP_USER);
+ if(!kres) {
+ kfree(karg);
+ return -ENOMEM;
+ }
+ }
+ switch(cmd) {
+ case NFSCTL_SVC:
+ err = nfs_svc32_trans(karg, arg32);
+ break;
+ case NFSCTL_ADDCLIENT:
+ err = nfs_clnt32_trans(karg, arg32);
+ break;
+ case NFSCTL_DELCLIENT:
+ err = nfs_clnt32_trans(karg, arg32);
+ break;
+ case NFSCTL_EXPORT:
+ err = nfs_exp32_trans(karg, arg32);
+ break;
+ /* This one is unimplemented, be we're ready for it. */
+ case NFSCTL_UGIDUPDATE:
+ err = nfs_uud32_trans(karg, arg32);
+ break;
+ case NFSCTL_GETFH:
+ err = nfs_getfh32_trans(karg, arg32);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ if(err)
+ goto done;
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ err = sys_nfsservctl(cmd, karg, kres);
+ set_fs(oldfs);
+
+ if(!err && cmd == NFSCTL_GETFH)
+ err = nfs_getfh32_res_trans(kres, res32);
+
+done:
+ if(karg) {
+ if(cmd == NFSCTL_UGIDUPDATE) {
+ if(karg->ca_umap.ug_ident)
+ kfree(karg->ca_umap.ug_ident);
+ if(karg->ca_umap.ug_udimap)
+ kfree(karg->ca_umap.ug_udimap);
+ if(karg->ca_umap.ug_gdimap)
+ kfree(karg->ca_umap.ug_gdimap);
+ }
+ kfree(karg);
+ }
+ if(kres)
+ kfree(kres);
+ return err;
+}
+
+asmlinkage int sys_utimes(char *, struct timeval *);
+
+asmlinkage int
+sys32_utimes(char *filename, struct timeval32 *tvs)
+{
+ char *kfilename;
+ struct timeval ktvs[2];
+ mm_segment_t old_fs;
+ int ret;
+
+ kfilename = getname32(filename);
+ ret = PTR_ERR(kfilename);
+ if (!IS_ERR(kfilename)) {
+ if (tvs) {
+ if (get_tv32(&ktvs[0], tvs) ||
+ get_tv32(&ktvs[1], 1+tvs))
+ return -EFAULT;
+ }
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = sys_utimes(kfilename, &ktvs[0]);
+ set_fs(old_fs);
+
+ putname(kfilename);
+ }
+ return ret;
+}
+
+/* These are here just in case some old ia32 binary calls it. */
+asmlinkage int
+sys32_pause(void)
+{
+ current->state = TASK_INTERRUPTIBLE;
+ schedule();
+ return -ERESTARTNOHAND;
+}
+
+/* PCI config space poking. */
+extern asmlinkage int sys_pciconfig_read(unsigned long bus,
+ unsigned long dfn,
+ unsigned long off,
+ unsigned long len,
+ unsigned char *buf);
+
+extern asmlinkage int sys_pciconfig_write(unsigned long bus,
+ unsigned long dfn,
+ unsigned long off,
+ unsigned long len,
+ unsigned char *buf);
+
+asmlinkage int
+sys32_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf)
+{
+ return sys_pciconfig_read((unsigned long) bus,
+ (unsigned long) dfn,
+ (unsigned long) off,
+ (unsigned long) len,
+ (unsigned char *)AA(ubuf));
+}
+
+asmlinkage int
+sys32_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf)
+{
+ return sys_pciconfig_write((unsigned long) bus,
+ (unsigned long) dfn,
+ (unsigned long) off,
+ (unsigned long) len,
+ (unsigned char *)AA(ubuf));
+}
+
+extern asmlinkage int sys_prctl(int option, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5);
+
+asmlinkage int
+sys32_prctl(int option, u32 arg2, u32 arg3, u32 arg4, u32 arg5)
+{
+ return sys_prctl(option,
+ (unsigned long) arg2,
+ (unsigned long) arg3,
+ (unsigned long) arg4,
+ (unsigned long) arg5);
+}
+
+
+extern asmlinkage int sys_newuname(struct new_utsname * name);
+
+asmlinkage int
+sys32_newuname(struct new_utsname * name)
+{
+ int ret = sys_newuname(name);
+
+ if (current->personality == PER_LINUX32 && !ret) {
+ ret = copy_to_user(name->machine, "sparc\0\0", 8);
+ }
+ return ret;
+}
+
+extern asmlinkage ssize_t sys_pread(unsigned int fd, char * buf,
+ size_t count, loff_t pos);
+
+extern asmlinkage ssize_t sys_pwrite(unsigned int fd, const char * buf,
+ size_t count, loff_t pos);
+
+typedef __kernel_ssize_t32 ssize_t32;
+
+asmlinkage ssize_t32
+sys32_pread(unsigned int fd, char *ubuf, __kernel_size_t32 count,
+ u32 poshi, u32 poslo)
+{
+ return sys_pread(fd, ubuf, count,
+ ((loff_t)AA(poshi) << 32) | AA(poslo));
+}
+
+asmlinkage ssize_t32
+sys32_pwrite(unsigned int fd, char *ubuf, __kernel_size_t32 count,
+ u32 poshi, u32 poslo)
+{
+ return sys_pwrite(fd, ubuf, count,
+ ((loff_t)AA(poshi) << 32) | AA(poslo));
+}
+
+
+extern asmlinkage int sys_personality(unsigned long);
+
+asmlinkage int
+sys32_personality(unsigned long personality)
+{
+ int ret;
+ lock_kernel();
+ if (current->personality == PER_LINUX32 && personality == PER_LINUX)
+ personality = PER_LINUX32;
+ ret = sys_personality(personality);
+ unlock_kernel();
+ if (ret == PER_LINUX32)
+ ret = PER_LINUX;
+ return ret;
+}
+
+extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset,
+ size_t count);
+
+asmlinkage int
+sys32_sendfile(int out_fd, int in_fd, __kernel_off_t32 *offset, s32 count)
+{
+ mm_segment_t old_fs = get_fs();
+ int ret;
+ off_t of;
+
+ if (offset && get_user(of, offset))
+ return -EFAULT;
+
+ set_fs(KERNEL_DS);
+ ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
+ set_fs(old_fs);
+
+ if (!ret && offset && put_user(of, offset))
+ return -EFAULT;
+
+ return ret;
+}
+
+/* Handle adjtimex compatability. */
+
+struct timex32 {
+ u32 modes;
+ s32 offset, freq, maxerror, esterror;
+ s32 status, constant, precision, tolerance;
+ struct timeval32 time;
+ s32 tick;
+ s32 ppsfreq, jitter, shift, stabil;
+ s32 jitcnt, calcnt, errcnt, stbcnt;
+ s32 :32; s32 :32; s32 :32; s32 :32;
+ s32 :32; s32 :32; s32 :32; s32 :32;
+ s32 :32; s32 :32; s32 :32; s32 :32;
+};
+
+extern int do_adjtimex(struct timex *);
+
+asmlinkage int
+sys32_adjtimex(struct timex32 *utp)
+{
+ struct timex txc;
+ int ret;
+
+ memset(&txc, 0, sizeof(struct timex));
+
+ if(get_user(txc.modes, &utp->modes) ||
+ __get_user(txc.offset, &utp->offset) ||
+ __get_user(txc.freq, &utp->freq) ||
+ __get_user(txc.maxerror, &utp->maxerror) ||
+ __get_user(txc.esterror, &utp->esterror) ||
+ __get_user(txc.status, &utp->status) ||
+ __get_user(txc.constant, &utp->constant) ||
+ __get_user(txc.precision, &utp->precision) ||
+ __get_user(txc.tolerance, &utp->tolerance) ||
+ __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
+ __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
+ __get_user(txc.tick, &utp->tick) ||
+ __get_user(txc.ppsfreq, &utp->ppsfreq) ||
+ __get_user(txc.jitter, &utp->jitter) ||
+ __get_user(txc.shift, &utp->shift) ||
+ __get_user(txc.stabil, &utp->stabil) ||
+ __get_user(txc.jitcnt, &utp->jitcnt) ||
+ __get_user(txc.calcnt, &utp->calcnt) ||
+ __get_user(txc.errcnt, &utp->errcnt) ||
+ __get_user(txc.stbcnt, &utp->stbcnt))
+ return -EFAULT;
+
+ ret = do_adjtimex(&txc);
+
+ if(put_user(txc.modes, &utp->modes) ||
+ __put_user(txc.offset, &utp->offset) ||
+ __put_user(txc.freq, &utp->freq) ||
+ __put_user(txc.maxerror, &utp->maxerror) ||
+ __put_user(txc.esterror, &utp->esterror) ||
+ __put_user(txc.status, &utp->status) ||
+ __put_user(txc.constant, &utp->constant) ||
+ __put_user(txc.precision, &utp->precision) ||
+ __put_user(txc.tolerance, &utp->tolerance) ||
+ __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
+ __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
+ __put_user(txc.tick, &utp->tick) ||
+ __put_user(txc.ppsfreq, &utp->ppsfreq) ||
+ __put_user(txc.jitter, &utp->jitter) ||
+ __put_user(txc.shift, &utp->shift) ||
+ __put_user(txc.stabil, &utp->stabil) ||
+ __put_user(txc.jitcnt, &utp->jitcnt) ||
+ __put_user(txc.calcnt, &utp->calcnt) ||
+ __put_user(txc.errcnt, &utp->errcnt) ||
+ __put_user(txc.stbcnt, &utp->stbcnt))
+ ret = -EFAULT;
+
+ return ret;
+}
+#endif // NOTYET
+
diff --git a/arch/ia64/kdb/Makefile b/arch/ia64/kdb/Makefile
new file mode 100644
index 000000000..0b29d6b35
--- /dev/null
+++ b/arch/ia64/kdb/Makefile
@@ -0,0 +1,21 @@
+#
+# Makefile for ia64-specific kdb files..
+#
+# Copyright 1999, Silicon Graphics Inc.
+#
+# Written March 1999 by Scott Lurndal at Silicon Graphics, Inc.
+# Code for IA64 written by Goutham Rao <goutham.rao@intel.com> and
+# Sreenivas Subramoney <sreenivas.subramoney@intel.com>
+#
+
+SUB_DIRS :=
+MOD_SUB_DIRS := $(SUB_DIRS)
+ALL_SUB_DIRS := $(SUB_DIRS)
+
+.S.o:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $< -o $*.o
+
+L_TARGET = kdb.a
+L_OBJS = kdbsupport.o kdb_io.o kdb_bt.o kdb_traps.o
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/kdb/kdb_bt.c b/arch/ia64/kdb/kdb_bt.c
new file mode 100644
index 000000000..dbcb7a575
--- /dev/null
+++ b/arch/ia64/kdb/kdb_bt.c
@@ -0,0 +1,104 @@
+/**
+ * Minimalist Kernel Debugger
+ * Machine dependent stack traceback code for IA-64.
+ *
+ * Copyright (C) 1999 Goutham Rao <goutham.rao@intel.com>
+ * Copyright (C) 1999 Sreenivas Subramoney <sreenivas.subramoney@intel.com>
+ * Intel Corporation, August 1999.
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 99/12/03 D. Mosberger Reimplemented based on <asm-ia64/unwind.h> API.
+ * 99/12/06 D. Mosberger Added support for backtracing other processes.
+ */
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/kdb.h>
+#include <asm/system.h>
+#include <asm/current.h>
+#include <asm/kdbsupport.h>
+
+/*
+ * Minimal stack back trace functionality.
+ */
+int
+kdb_bt (int argc, const char **argv, const char **envp, struct pt_regs *regs)
+{
+ struct task_struct *task = current;
+ struct ia64_frame_info info;
+ char *name;
+ int diag;
+
+ if (strcmp(argv[0], "btp") == 0) {
+ unsigned long pid;
+
+ diag = kdbgetularg(argv[1], &pid);
+ if (diag)
+ return diag;
+
+ task = find_task_by_pid(pid);
+ if (!task) {
+ kdb_printf("No process with pid == %d found\n", pid);
+ return 0;
+ }
+ regs = ia64_task_regs(task);
+ } else if (argc) {
+ kdb_printf("bt <address> is unsupported for IA-64\n");
+ return 0;
+ }
+
+ if (task == current) {
+ /*
+ * Upon entering kdb, the stack frame looks like this:
+ *
+ * +---------------------+
+ * | struct pt_regs |
+ * +---------------------+
+ * | |
+ * | kernel stack |
+ * | |
+ * +=====================+ <--- top of stack upon entering kdb
+ * | struct pt_regs |
+ * +---------------------+
+ * | struct switch_stack |
+ * +---------------------+
+ */
+ if (user_mode(regs)) {
+ /* We are not implementing stack backtrace from user mode code */
+ kdb_printf ("Not in Kernel\n");
+ return 0;
+ }
+ ia64_unwind_init_from_current(&info, regs);
+ } else {
+ /*
+ * For a blocked task, the stack frame looks like this:
+ *
+ * +---------------------+
+ * | struct pt_regs |
+ * +---------------------+
+ * | |
+ * | kernel stack |
+ * | |
+ * +---------------------+
+ * | struct switch_stack |
+ * +=====================+ <--- task->thread.ksp
+ */
+ ia64_unwind_init_from_blocked_task(&info, task);
+ }
+
+ kdb_printf("Ret Address Reg Stack base Name\n\n") ;
+ do {
+ unsigned long ip = ia64_unwind_get_ip(&info);
+
+ name = kdbnearsym(ip);
+ if (!name) {
+ kdb_printf("Interrupt\n");
+ return 0;
+ }
+ kdb_printf("0x%016lx: [0x%016lx] %s\n", ip, ia64_unwind_get_bsp(&info), name);
+ } while (ia64_unwind_to_previous_frame(&info) >= 0);
+ return 0;
+}
diff --git a/arch/ia64/kdb/kdb_io.c b/arch/ia64/kdb/kdb_io.c
new file mode 100644
index 000000000..0b5c6fd44
--- /dev/null
+++ b/arch/ia64/kdb/kdb_io.c
@@ -0,0 +1,350 @@
+/*
+ * Kernel Debugger Console I/O handler
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Scott Lurndal (slurn@engr.sgi.com)
+ * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com)
+ * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+ *
+ * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc.
+ *
+ * Modifications from:
+ * Chuck Fleckenstein 1999/07/20
+ * Move kdb_info struct declaration to this file
+ * for cases where serial support is not compiled into
+ * the kernel.
+ *
+ * Masahiro Adegawa 1999/07/20
+ * Handle some peculiarities of japanese 86/106
+ * keyboards.
+ *
+ * marc@mucom.co.il 1999/07/20
+ * Catch buffer overflow for serial input.
+ *
+ * Scott Foehner
+ * Port to ia64
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/serial_reg.h>
+#include <linux/spinlock.h>
+
+#include <asm/io.h>
+
+#include "pc_keyb.h"
+
+int kdb_port = 0;
+
+/*
+ * This module contains code to read characters from the keyboard or a serial
+ * port.
+ *
+ * It is used by the kernel debugger, and is polled, not interrupt driven.
+ *
+ */
+
+/*
+ * send: Send a byte to the keyboard controller. Used primarily to
+ * alter LED settings.
+ */
+
+static void
+kdb_kbdsend(unsigned char byte)
+{
+ while (inb(KBD_STATUS_REG) & KBD_STAT_IBF)
+ ;
+ outb(KBD_DATA_REG, byte);
+}
+
+static void
+kdb_kbdsetled(int leds)
+{
+ kdb_kbdsend(KBD_CMD_SET_LEDS);
+ kdb_kbdsend((unsigned char)leds);
+}
+
+static void
+console_read (char *buffer, size_t bufsize)
+{
+ struct console *in;
+ struct console *out;
+ char *cp, ch;
+
+ for (in = console_drivers; in; in = in->next) {
+ if ((in->flags & CON_ENABLED) && (in->read || in->wait_key))
+ break;
+ }
+ for (out = console_drivers; out; out = out->next) {
+ if ((out->flags & CON_ENABLED) && out->write)
+ break;
+ }
+
+ if ((!in->read && !in->wait_key) || !out->write) {
+ panic("kdb_io: can't do console i/o!");
+ }
+
+ if (in->read) {
+ /* this is untested... */
+ (*in->read)(in, buffer, bufsize);
+ return;
+ }
+
+ bufsize -= 2; /* leave room for CR & NUL terminator */
+ cp = buffer;
+ while (1) {
+ ch = (*in->wait_key)(in);
+ switch (ch) {
+ case '\b':
+ if (cp > buffer) {
+ --cp, ++bufsize;
+ (*out->write)(out, "\b \b", 3);
+ }
+ break;
+
+ case '\025':
+ while (cp > buffer) {
+ --cp, ++bufsize;
+ (*out->write)(out, "\b \b", 3);
+ }
+ break;
+
+ case '\r':
+ case '\n':
+ (*out->write)(out, "\r\n", 2);
+ *cp++ = '\n';
+ *cp++ = '\0';
+ return;
+
+ default:
+ if (bufsize > 0) {
+ (*out->write)(out, &ch, 1);
+ --bufsize;
+ *cp++ = ch;
+ }
+ break;
+ }
+ }
+}
+
+char *
+kdb_getscancode(char *buffer, size_t bufsize)
+{
+ /*
+ * XXX Shouldn't kdb _always_ use console based I/O? That's what the console
+ * abstraction is for, after all... ---davidm
+ */
+#ifdef CONFIG_IA64_HP_SIM
+ extern spinlock_t console_lock;
+ unsigned long flags;
+
+ spin_lock_irqsave(&console_lock, flags);
+ console_read(buffer, bufsize);
+ spin_unlock_irqrestore(&console_lock, flags);
+ return buffer;
+#else /* !CONFIG_IA64_HP_SIM */
+ char *cp = buffer;
+ int scancode, scanstatus;
+ static int shift_lock = 0; /* CAPS LOCK state (0-off, 1-on) */
+ static int shift_key = 0; /* Shift next keypress */
+ static int ctrl_key = 0;
+ static int leds = 2; /* Num lock */
+ u_short keychar;
+ extern u_short plain_map[], shift_map[], ctrl_map[];
+
+ bufsize -= 2; /* Reserve space for newline and null byte */
+
+ /*
+ * If we came in via a serial console, we allow that to
+ * be the input window for kdb.
+ */
+ if (kdb_port != 0) {
+ char ch;
+ int status;
+#define serial_inp(info, offset) inb((info) + (offset))
+#define serial_out(info, offset, v) outb((v), (info) + (offset))
+
+ while(1) {
+ while ((status = serial_inp(kdb_port, UART_LSR))
+ & UART_LSR_DR) {
+readchar:
+ ch = serial_inp(kdb_port, UART_RX);
+ if (ch == 8) { /* BS */
+ if (cp > buffer) {
+ --cp, bufsize++;
+ printk("%c %c", 0x08, 0x08);
+ }
+ continue;
+ }
+ serial_out(kdb_port, UART_TX, ch);
+ if (ch == 13) { /* CR */
+ *cp++ = '\n';
+ *cp++ = '\0';
+ serial_out(kdb_port, UART_TX, 10);
+ return(buffer);
+ }
+ /*
+ * Discard excess characters
+ */
+ if (bufsize > 0) {
+ *cp++ = ch;
+ bufsize--;
+ }
+ }
+ while (((status = serial_inp(kdb_port, UART_LSR))
+ & UART_LSR_DR) == 0);
+ }
+ }
+
+ while (1) {
+
+ /*
+ * Wait for a valid scancode
+ */
+
+ while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0)
+ ;
+
+ /*
+ * Fetch the scancode
+ */
+ scancode = inb(KBD_DATA_REG);
+ scanstatus = inb(KBD_STATUS_REG);
+
+ /*
+ * Ignore mouse events.
+ */
+ if (scanstatus & KBD_STAT_MOUSE_OBF)
+ continue;
+
+ /*
+ * Ignore release, trigger on make
+ * (except for shift keys, where we want to
+ * keep the shift state so long as the key is
+ * held down).
+ */
+
+ if (((scancode&0x7f) == 0x2a)
+ || ((scancode&0x7f) == 0x36)) {
+ /*
+ * Next key may use shift table
+ */
+ if ((scancode & 0x80) == 0) {
+ shift_key=1;
+ } else {
+ shift_key=0;
+ }
+ continue;
+ }
+
+ if ((scancode&0x7f) == 0x1d) {
+ /*
+ * Left ctrl key
+ */
+ if ((scancode & 0x80) == 0) {
+ ctrl_key = 1;
+ } else {
+ ctrl_key = 0;
+ }
+ continue;
+ }
+
+ if ((scancode & 0x80) != 0)
+ continue;
+
+ scancode &= 0x7f;
+
+ /*
+ * Translate scancode
+ */
+
+ if (scancode == 0x3a) {
+ /*
+ * Toggle caps lock
+ */
+ shift_lock ^= 1;
+ leds ^= 0x4; /* toggle caps lock led */
+
+ kdb_kbdsetled(leds);
+ continue;
+ }
+
+ if (scancode == 0x0e) {
+ /*
+ * Backspace
+ */
+ if (cp > buffer) {
+ --cp, bufsize++;
+
+ /*
+ * XXX - erase character on screen
+ */
+ printk("%c %c", 0x08, 0x08);
+ }
+ continue;
+ }
+
+ if (scancode == 0xe0) {
+ continue;
+ }
+
+ /*
+ * For Japanese 86/106 keyboards
+ * See comment in drivers/char/pc_keyb.c.
+ * - Masahiro Adegawa
+ */
+ if (scancode == 0x73) {
+ scancode = 0x59;
+ } else if (scancode == 0x7d) {
+ scancode = 0x7c;
+ }
+
+ if (!shift_lock && !shift_key) {
+ keychar = plain_map[scancode];
+ } else if (shift_lock || shift_key) {
+ keychar = shift_map[scancode];
+ } else if (ctrl_key) {
+ keychar = ctrl_map[scancode];
+ } else {
+ keychar = 0x0020;
+ printk("Unknown state/scancode (%d)\n", scancode);
+ }
+
+ if ((scancode & 0x7f) == 0x1c) {
+ /*
+ * enter key. All done.
+ */
+ printk("\n");
+ break;
+ }
+
+ /*
+ * echo the character.
+ */
+ printk("%c", keychar&0xff);
+
+ if (bufsize) {
+ --bufsize;
+ *cp++ = keychar&0xff;
+ } else {
+ printk("buffer overflow\n");
+ break;
+ }
+
+ }
+
+ *cp++ = '\n'; /* White space for parser */
+ *cp++ = '\0'; /* String termination */
+
+#if defined(NOTNOW)
+ cp = buffer;
+ while (*cp) {
+ printk("char 0x%x\n", *cp++);
+ }
+#endif
+
+ return buffer;
+#endif /* !CONFIG_IA64_HP_SIM */
+}
+
diff --git a/arch/ia64/kdb/kdb_traps.c b/arch/ia64/kdb/kdb_traps.c
new file mode 100644
index 000000000..6358f7a30
--- /dev/null
+++ b/arch/ia64/kdb/kdb_traps.c
@@ -0,0 +1,55 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kdb.h>
+
+static struct kdb_bp_support {
+ unsigned long addr ;
+ int slot ;
+} kdb_bp_info[NR_CPUS] ;
+
+
+extern void kdb_bp_install (void);
+
+/*
+ * This gets invoked right before a call to ia64_fault().
+ * Returns zero the normal fault handler should be invoked.
+ */
+long
+ia64_kdb_fault_handler (unsigned long vector, unsigned long isr, unsigned long ifa,
+ unsigned long iim, unsigned long itir, unsigned long arg5,
+ unsigned long arg6, unsigned long arg7, unsigned long stack)
+{
+ struct switch_stack *sw = (struct switch_stack *) &stack;
+ struct pt_regs *regs = (struct pt_regs *) (sw + 1);
+ int bundle_slot;
+
+ /*
+ * TBD
+ * If KDB is configured, enter KDB for any fault.
+ */
+ if ((vector == 29) || (vector == 35) || (vector == 36)) {
+ if (!user_mode(regs)) {
+ bundle_slot = ia64_psr(regs)->ri;
+ if (vector == 29) {
+ if (bundle_slot == 0) {
+ kdb_bp_info[0].addr = regs->cr_iip;
+ kdb_bp_info[0].slot = bundle_slot;
+ kdb(KDB_REASON_FLTDBG, 0, regs);
+ } else {
+ if ((bundle_slot < 3) &&
+ (kdb_bp_info[0].addr == regs->cr_iip))
+ {
+ ia64_psr(regs)->id = 1;
+ ia64_psr(regs)->db = 1;
+ kdb_bp_install() ;
+ } else /* some error ?? */
+ kdb(KDB_REASON_FLTDBG, 0, regs);
+ }
+ } else /* single step or taken branch */
+ kdb(KDB_REASON_DEBUG, 0, regs);
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/arch/ia64/kdb/kdbsupport.c b/arch/ia64/kdb/kdbsupport.c
new file mode 100644
index 000000000..0b574ae6e
--- /dev/null
+++ b/arch/ia64/kdb/kdbsupport.c
@@ -0,0 +1,1310 @@
+/*
+ * Minimalist Kernel Debugger
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Scott Lurndal (slurn@engr.sgi.com)
+ * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com)
+ * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+ * Copyright (C) David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc.
+ *
+ * Modifications from:
+ * Richard Bass 1999/07/20
+ * Many bug fixes and enhancements.
+ * Scott Foehner
+ * Port to ia64
+ * Srinivasa Thirumalachar
+ * RSE support for ia64
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kdb.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+
+#include <asm/uaccess.h>
+#include <asm/kdbsupport.h>
+#include <asm/rse.h>
+
+extern kdb_state_t kdb_state ;
+k_machreg_t dbregs[KDB_DBREGS];
+
+static int __init
+kdb_setup (char *str)
+{
+ kdb_flags |= KDB_FLAG_EARLYKDB;
+ return 1;
+}
+
+__setup("kdb", kdb_setup);
+
+static int
+kdb_ia64_sir (int argc, const char **argv, const char **envp, struct pt_regs *regs)
+{
+ u64 lid, tpr, lrr0, lrr1, itv, pmv, cmcv;
+
+ asm ("mov %0=cr.lid" : "=r"(lid));
+ asm ("mov %0=cr.tpr" : "=r"(tpr));
+ asm ("mov %0=cr.lrr0" : "=r"(lrr0));
+ asm ("mov %0=cr.lrr1" : "=r"(lrr1));
+ printk ("lid=0x%lx, tpr=0x%lx, lrr0=0x%lx, llr1=0x%lx\n", lid, tpr, lrr0, lrr1);
+
+ asm ("mov %0=cr.itv" : "=r"(itv));
+ asm ("mov %0=cr.pmv" : "=r"(pmv));
+ asm ("mov %0=cr.cmcv" : "=r"(cmcv));
+ printk ("itv=0x%lx, pmv=0x%lx, cmcv=0x%lx\n", itv, pmv, cmcv);
+
+ printk ("irr=0x%016lx,0x%016lx,0x%016lx,0x%016lx\n",
+ ia64_get_irr0(), ia64_get_irr1(), ia64_get_irr2(), ia64_get_irr3());
+ return 0;
+}
+
+void __init
+kdb_init (void)
+{
+ extern void kdb_inittab(void);
+ unsigned long reg;
+
+ kdb_inittab();
+ kdb_initbptab();
+#if 0
+ kdb_disinit();
+#endif
+ kdb_printf("kdb version %d.%d by Scott Lurndal. "\
+ "Copyright SGI, All Rights Reserved\n",
+ KDB_MAJOR_VERSION, KDB_MINOR_VERSION);
+
+ /* Enable debug registers */
+ __asm__ ("mov %0=psr":"=r"(reg));
+ reg |= IA64_PSR_DB;
+ __asm__ ("mov psr.l=%0"::"r"(reg));
+ ia64_srlz_d();
+
+ /* Init kdb state */
+ kdb_state.bkpt_handling_state = BKPTSTATE_NOT_HANDLED ;
+
+ kdb_register("irr", kdb_ia64_sir, "", "Show interrupt registers", 0);
+}
+
+/*
+ * kdbprintf
+ * kdbgetword
+ * kdb_getstr
+ */
+
+char *
+kbd_getstr(char *buffer, size_t bufsize, char *prompt)
+{
+ extern char* kdb_getscancode(char *, size_t);
+
+#if defined(CONFIG_SMP)
+ kdb_printf(prompt, smp_processor_id());
+#else
+ kdb_printf("%s", prompt);
+#endif
+
+ return kdb_getscancode(buffer, bufsize);
+
+}
+
+int
+kdb_printf(const char *fmt, ...)
+{
+ char buffer[256];
+ va_list ap;
+ int diag;
+ int linecount;
+
+ diag = kdbgetintenv("LINES", &linecount);
+ if (diag)
+ linecount = 22;
+
+ va_start(ap, fmt);
+ vsprintf(buffer, fmt, ap);
+ va_end(ap);
+
+ printk("%s", buffer);
+#if 0
+ if (strchr(buffer, '\n') != NULL) {
+ kdb_nextline++;
+ }
+
+ if (kdb_nextline == linecount) {
+ char buf1[16];
+ char buf2[32];
+ extern char* kdb_getscancode(char *, size_t);
+ char *moreprompt;
+
+ /*
+ * Pause until cr.
+ */
+ moreprompt = kdbgetenv("MOREPROMPT");
+ if (moreprompt == NULL) {
+ moreprompt = "more> ";
+ }
+
+#if defined(CONFIG_SMP)
+ if (strchr(moreprompt, '%')) {
+ sprintf(buf2, moreprompt, smp_processor_id());
+ moreprompt = buf2;
+ }
+#endif
+
+ printk(moreprompt);
+ (void) kdb_getscancode(buf1, sizeof(buf1));
+
+ kdb_nextline = 1;
+
+ if ((buf1[0] == 'q')
+ || (buf1[0] == 'Q')) {
+ kdb_longjmp(&kdbjmpbuf, 1);
+ }
+ }
+#endif
+ return 0;
+}
+
+unsigned long
+kdbgetword(unsigned long addr, int width)
+{
+ /*
+ * This function checks the address for validity. Any address
+ * in the range PAGE_OFFSET to high_memory is legal, any address
+ * which maps to a vmalloc region is legal, and any address which
+ * is a user address, we use get_user() to verify validity.
+ */
+
+ if (addr < PAGE_OFFSET) {
+ /*
+ * Usermode address.
+ */
+ unsigned long diag;
+ unsigned long ulval;
+
+ switch (width) {
+ case 8:
+ { unsigned long *lp;
+
+ lp = (unsigned long *) addr;
+ diag = get_user(ulval, lp);
+ break;
+ }
+ case 4:
+ { unsigned int *ip;
+
+ ip = (unsigned int *) addr;
+ diag = get_user(ulval, ip);
+ break;
+ }
+ case 2:
+ { unsigned short *sp;
+
+ sp = (unsigned short *) addr;
+ diag = get_user(ulval, sp);
+ break;
+ }
+ case 1:
+ { unsigned char *cp;
+
+ cp = (unsigned char *) addr;
+ diag = get_user(ulval, cp);
+ break;
+ }
+ default:
+ printk("kdbgetword: Bad width\n");
+ return 0L;
+ }
+
+ if (diag) {
+ if ((kdb_flags & KDB_FLAG_SUPRESS) == 0) {
+ printk("kdb: Bad user address 0x%lx\n", addr);
+ kdb_flags |= KDB_FLAG_SUPRESS;
+ }
+ return 0L;
+ }
+ kdb_flags &= ~KDB_FLAG_SUPRESS;
+ return ulval;
+ }
+
+ if (addr > (unsigned long)high_memory) {
+ extern int kdb_vmlist_check(unsigned long, unsigned long);
+
+ if (!kdb_vmlist_check(addr, addr+width)) {
+ /*
+ * Would appear to be an illegal kernel address;
+ * Print a message once, and don't print again until
+ * a legal address is used.
+ */
+ if ((kdb_flags & KDB_FLAG_SUPRESS) == 0) {
+ printk("kdb: Bad kernel address 0x%lx\n", addr);
+ kdb_flags |= KDB_FLAG_SUPRESS;
+ }
+ return 0L;
+ }
+ }
+
+ /*
+ * A good address. Reset error flag.
+ */
+ kdb_flags &= ~KDB_FLAG_SUPRESS;
+
+ switch (width) {
+ case 8:
+ { unsigned long *lp;
+
+ lp = (unsigned long *)(addr);
+ return *lp;
+ }
+ case 4:
+ { unsigned int *ip;
+
+ ip = (unsigned int *)(addr);
+ return *ip;
+ }
+ case 2:
+ { unsigned short *sp;
+
+ sp = (unsigned short *)(addr);
+ return *sp;
+ }
+ case 1:
+ { unsigned char *cp;
+
+ cp = (unsigned char *)(addr);
+ return *cp;
+ }
+ }
+
+ printk("kdbgetword: Bad width\n");
+ return 0L;
+}
+
+/*
+ * Start of breakpoint management routines
+ */
+
+/*
+ * Arg: bp structure
+ */
+
+int
+kdb_allocdbreg(kdb_bp_t *bp)
+{
+ int i=0;
+
+ /* For inst bkpt, just return. No hw reg alloc to be done. */
+
+ if (bp->bp_mode == BKPTMODE_INST) {
+ return i;
+ } else if (bp->bp_mode == BKPTMODE_DATAW) {
+ for(i=0; i<KDB_DBREGS; i++) {
+ if (dbregs[i] == 0xffffffff) {
+ dbregs[i] = 0;
+ return i;
+ }
+ }
+ }
+
+ return -1;
+}
+
+void
+kdb_freedbreg(kdb_bp_t *bp)
+{
+ if (bp->bp_mode == BKPTMODE_DATAW)
+ dbregs[bp->bp_reg] = 0xffffffff;
+}
+
+void
+kdb_initdbregs(void)
+{
+ int i;
+
+ for(i=0; i<KDB_DBREGS; i++) {
+ dbregs[i] = 0xffffffff;
+ }
+}
+int
+kdbinstalltrap(int type, handler_t newh, handler_t *oldh)
+{
+ /*
+ * Usurp INTn. XXX - TBD.
+ */
+
+ return 0;
+}
+
+int
+install_instbkpt(kdb_bp_t *bp)
+{
+ unsigned long *addr = (unsigned long *)bp->bp_addr ;
+ bundle_t *bundle = (bundle_t *)bp->bp_longinst;
+
+ /* save current bundle */
+ *bundle = *(bundle_t *)addr ;
+
+ /* Set the break point! */
+ ((bundle_t *)addr)->lform.low8 = (
+ (((bundle_t *)addr)->lform.low8 & ~INST_SLOT0_MASK) |
+ BREAK_INSTR);
+
+ /* set flag */
+ bp->bp_instvalid = 1 ;
+
+ /* flush icache as it is stale now */
+ ia64_flush_icache_page((unsigned long)addr) ;
+
+#ifdef KDB_DEBUG
+ kdb_printf ("[0x%016lx]: install 0x%016lx with 0x%016lx\n",
+ addr, bundle->lform.low8, addr[0]) ;
+#endif
+ return 0 ;
+}
+
+int
+install_databkpt(kdb_bp_t *bp)
+{
+ unsigned long dbreg_addr = bp->bp_reg * 2;
+ unsigned long dbreg_cond = dbreg_addr + 1;
+ unsigned long value = 0x8fffffffffffffff;
+ unsigned long addr = (unsigned long)bp->bp_addr;
+ __asm__ ("mov dbr[%0]=%1"::"r"(dbreg_cond),"r"(value));
+// __asm__ ("movl %0,%%db0\n\t"::"r"(contents));
+ __asm__ ("mov dbr[%0]=%1"::"r"(dbreg_addr),"r"(addr));
+ ia64_insn_group_barrier();
+ ia64_srlz_i();
+ ia64_insn_group_barrier();
+
+#ifdef KDB_DEBUG
+ kdb_printf("installed dbkpt at 0x%016lx\n", addr) ;
+#endif
+ return 0;
+}
+
+int
+kdbinstalldbreg(kdb_bp_t *bp)
+{
+ if (bp->bp_mode == BKPTMODE_INST) {
+ return install_instbkpt(bp) ;
+ } else if (bp->bp_mode == BKPTMODE_DATAW) {
+ return install_databkpt(bp) ;
+ }
+ return 0;
+}
+
+void
+remove_instbkpt(kdb_bp_t *bp)
+{
+ unsigned long *addr = (unsigned long *)bp->bp_addr ;
+ bundle_t *bundle = (bundle_t *)bp->bp_longinst;
+
+ if (!bp->bp_instvalid)
+ /* Nothing to remove. If we just alloced the bkpt
+ * but never resumed, the bp_inst will not be valid. */
+ return ;
+
+#ifdef KDB_DEBUG
+ kdb_printf ("[0x%016lx]: remove 0x%016lx with 0x%016lx\n",
+ addr, addr[0], bundle->lform.low8) ;
+#endif
+
+ /* restore current bundle */
+ *(bundle_t *)addr = *bundle ;
+ /* reset the flag */
+ bp->bp_instvalid = 0 ;
+ ia64_flush_icache_page((unsigned long)addr) ;
+}
+
+void
+remove_databkpt(kdb_bp_t *bp)
+{
+ int regnum = bp->bp_reg ;
+ unsigned long dbreg_addr = regnum * 2;
+ unsigned long dbreg_cond = dbreg_addr + 1;
+ unsigned long value = 0x0fffffffffffffff;
+ __asm__ ("mov dbr[%0]=%1"::"r"(dbreg_cond),"r"(value));
+// __asm__ ("movl %0,%%db0\n\t"::"r"(contents));
+ ia64_insn_group_barrier();
+ ia64_srlz_i();
+ ia64_insn_group_barrier();
+
+#ifdef KDB_DEBUG
+ kdb_printf("removed dbkpt at 0x%016lx\n", bp->bp_addr) ;
+#endif
+}
+
+void
+kdbremovedbreg(kdb_bp_t *bp)
+{
+ if (bp->bp_mode == BKPTMODE_INST) {
+ remove_instbkpt(bp) ;
+ } else if (bp->bp_mode == BKPTMODE_DATAW) {
+ remove_databkpt(bp) ;
+ }
+}
+
+k_machreg_t
+kdb_getdr6(void)
+{
+ return kdb_getdr(6);
+}
+
+k_machreg_t
+kdb_getdr7(void)
+{
+ return kdb_getdr(7);
+}
+
+k_machreg_t
+kdb_getdr(int regnum)
+{
+ k_machreg_t contents = 0;
+ unsigned long reg = (unsigned long)regnum;
+
+ __asm__ ("mov %0=ibr[%1]"::"r"(contents),"r"(reg));
+// __asm__ ("mov ibr[%0]=%1"::"r"(dbreg_cond),"r"(value));
+
+ return contents;
+}
+
+
+k_machreg_t
+kdb_getcr(int regnum)
+{
+ k_machreg_t contents = 0;
+ return contents;
+}
+
+void
+kdb_putdr6(k_machreg_t contents)
+{
+ kdb_putdr(6, contents);
+}
+
+void
+kdb_putdr7(k_machreg_t contents)
+{
+ kdb_putdr(7, contents);
+}
+
+void
+kdb_putdr(int regnum, k_machreg_t contents)
+{
+}
+
+void
+get_fault_regs(fault_regs_t *fr)
+{
+ fr->ifa = 0 ;
+ fr->isr = 0 ;
+
+ __asm__ ("rsm psr.ic;;") ;
+ ia64_srlz_d();
+ __asm__ ("mov %0=cr.ifa" : "=r"(fr->ifa));
+ __asm__ ("mov %0=cr.isr" : "=r"(fr->isr));
+ __asm__ ("ssm psr.ic;;") ;
+ ia64_srlz_d();
+}
+
+/*
+ * kdb_db_trap
+ *
+ * Perform breakpoint processing upon entry to the
+ * processor debugger fault. Determine and print
+ * the active breakpoint.
+ *
+ * Parameters:
+ * ef Exception frame containing machine register state
+ * reason Why did we enter kdb - fault or break
+ * Outputs:
+ * None.
+ * Returns:
+ * 0 Standard instruction or data breakpoint encountered
+ * 1 Single Step fault ('ss' command)
+ * 2 Single Step fault, caller should continue ('ssb' command)
+ * Locking:
+ * None.
+ * Remarks:
+ * Yup, there be goto's here.
+ */
+
+int
+kdb_db_trap(struct pt_regs *ef, int reason)
+{
+ int i, rv=0;
+
+ /* Trying very hard to not change the interface to kdb.
+ * So, eventhough we have these values in the fault function
+ * it is not passed in but read again.
+ */
+ fault_regs_t faultregs ;
+
+ if (reason == KDB_REASON_FLTDBG)
+ get_fault_regs(&faultregs) ;
+
+ /* NOTE : XXX: This has to be done only for data bkpts */
+ /* Prevent it from continuously faulting */
+ ef->cr_ipsr |= 0x0000002000000000;
+
+ if (ef->cr_ipsr & 0x0000010000000000) {
+ /* single step */
+ ef->cr_ipsr &= 0xfffffeffffffffff;
+ if ((kdb_state.bkpt_handling_state == BKPTSTATE_HANDLED)
+ && (kdb_state.cmd_given == CMDGIVEN_GO))
+ ;
+ else
+ kdb_printf("SS trap at 0x%lx\n", ef->cr_iip + ia64_psr(ef)->ri);
+ rv = 1;
+ kdb_state.reason_for_entry = ENTRYREASON_SSTEP ;
+ goto handled;
+ } else
+ kdb_state.reason_for_entry = ENTRYREASON_GO ;
+
+ /*
+ * Determine which breakpoint was encountered.
+ */
+ for(i=0; i<KDB_MAXBPT; i++) {
+ if ((breakpoints[i].bp_enabled)
+ && ((breakpoints[i].bp_addr == ef->cr_iip) ||
+ ((faultregs.ifa) &&
+ (breakpoints[i].bp_addr == faultregs.ifa)))) {
+ /*
+ * Hit this breakpoint. Remove it while we are
+ * handling hit to avoid recursion. XXX ??
+ */
+ if (breakpoints[i].bp_addr == faultregs.ifa)
+ kdb_printf("Data breakpoint #%d for 0x%lx at 0x%lx\n",
+ i, breakpoints[i].bp_addr, ef->cr_iip + ia64_psr(ef)->ri);
+ else
+ kdb_printf("%s breakpoint #%d at 0x%lx\n",
+ rwtypes[0],
+ i, breakpoints[i].bp_addr);
+
+ /*
+ * For an instruction breakpoint, disassemble
+ * the current instruction.
+ */
+#if 0
+ if (rw == 0) {
+ kdb_id1(ef->eip);
+ }
+#endif
+
+ goto handled;
+ }
+ }
+
+#if 0
+unknown:
+#endif
+ kdb_printf("Unknown breakpoint. Should forward. \n");
+ /* Need a flag for this. The skip should be done XXX
+ * when a go or single step command is done for this session.
+ * For now it is here.
+ */
+ ia64_increment_ip(ef) ;
+ return rv ;
+
+handled:
+
+ /* We are here after handling a break inst/data bkpt */
+ if (kdb_state.bkpt_handling_state == BKPTSTATE_NOT_HANDLED) {
+ kdb_state.bkpt_handling_state = BKPTSTATE_HANDLED ;
+ if (kdb_state.reason_for_entry == ENTRYREASON_GO) {
+ kdb_setsinglestep(ef) ;
+ kdb_state.kdb_action = ACTION_NOBPINSTALL;
+ /* We dont want bp install just this once */
+ kdb_state.cmd_given = CMDGIVEN_UNKNOWN ;
+ }
+ } else if (kdb_state.bkpt_handling_state == BKPTSTATE_HANDLED) {
+ kdb_state.bkpt_handling_state = BKPTSTATE_NOT_HANDLED ;
+ if (kdb_state.reason_for_entry == ENTRYREASON_SSTEP) {
+ if (kdb_state.cmd_given == CMDGIVEN_GO)
+ kdb_state.kdb_action = ACTION_NOPROMPT ;
+ kdb_state.cmd_given = CMDGIVEN_UNKNOWN ;
+ }
+ } else
+ kdb_printf("Unknown value of bkpt state\n") ;
+
+ return rv;
+
+}
+
+void
+kdb_setsinglestep(struct pt_regs *regs)
+{
+ regs->cr_ipsr |= 0x0000010000000000;
+#if 0
+ regs->eflags |= EF_TF;
+#endif
+}
+
+/*
+ * Symbol table functions.
+ */
+
+/*
+ * kdbgetsym
+ *
+ * Return the symbol table entry for the given symbol
+ *
+ * Parameters:
+ * symname Character string containing symbol name
+ * Outputs:
+ * Returns:
+ * NULL Symbol doesn't exist
+ * ksp Pointer to symbol table entry
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+__ksymtab_t *
+kdbgetsym(const char *symname)
+{
+ __ksymtab_t *ksp = __kdbsymtab;
+ int i;
+
+ if (symname == NULL)
+ return NULL;
+
+ for (i=0; i<__kdbsymtabsize; i++, ksp++) {
+ if (ksp->name && (strcmp(ksp->name, symname)==0)) {
+ return ksp;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * kdbgetsymval
+ *
+ * Return the address of the given symbol.
+ *
+ * Parameters:
+ * symname Character string containing symbol name
+ * Outputs:
+ * Returns:
+ * 0 Symbol name is NULL
+ * addr Address corresponding to symname
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+unsigned long
+kdbgetsymval(const char *symname)
+{
+ __ksymtab_t *ksp = kdbgetsym(symname);
+
+ return (ksp?ksp->value:0);
+}
+
+/*
+ * kdbaddmodsym
+ *
+ * Add a symbol to the kernel debugger symbol table. Called when
+ * a new module is loaded into the kernel.
+ *
+ * Parameters:
+ * symname Character string containing symbol name
+ * value Value of symbol
+ * Outputs:
+ * Returns:
+ * 0 Successfully added to table.
+ * 1 Duplicate symbol
+ * 2 Symbol table full
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+int
+kdbaddmodsym(char *symname, unsigned long value)
+{
+
+ /*
+ * Check for duplicate symbols.
+ */
+ if (kdbgetsym(symname)) {
+ printk("kdb: Attempt to register duplicate symbol '%s' @ 0x%lx\n",
+ symname, value);
+ return 1;
+ }
+
+ if (__kdbsymtabsize < __kdbmaxsymtabsize) {
+ __ksymtab_t *ksp = &__kdbsymtab[__kdbsymtabsize++];
+
+ ksp->name = symname;
+ ksp->value = value;
+ return 0;
+ }
+
+ /*
+ * No room left in kernel symbol table.
+ */
+ {
+ static int __kdbwarn = 0;
+
+ if (__kdbwarn == 0) {
+ __kdbwarn++;
+ printk("kdb: Exceeded symbol table size. Increase CONFIG_KDB_SYMTAB_SIZE in kernel configuration\n");
+ }
+ }
+
+ return 2;
+}
+
+/*
+ * kdbdelmodsym
+ *
+ * Add a symbol to the kernel debugger symbol table. Called when
+ * a new module is loaded into the kernel.
+ *
+ * Parameters:
+ * symname Character string containing symbol name
+ * value Value of symbol
+ * Outputs:
+ * Returns:
+ * 0 Successfully added to table.
+ * 1 Symbol not found
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+int
+kdbdelmodsym(const char *symname)
+{
+ __ksymtab_t *ksp, *endksp;
+
+ if (symname == NULL)
+ return 1;
+
+ /*
+ * Search for the symbol. If found, move
+ * all successive symbols down one position
+ * in the symbol table to avoid leaving holes.
+ */
+ endksp = &__kdbsymtab[__kdbsymtabsize];
+ for (ksp = __kdbsymtab; ksp < endksp; ksp++) {
+ if (ksp->name && (strcmp(ksp->name, symname) == 0)) {
+ endksp--;
+ for ( ; ksp < endksp; ksp++) {
+ *ksp = *(ksp + 1);
+ }
+ __kdbsymtabsize--;
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * kdbnearsym
+ *
+ * Return the name of the symbol with the nearest address
+ * less than 'addr'.
+ *
+ * Parameters:
+ * addr Address to check for symbol near
+ * Outputs:
+ * Returns:
+ * NULL No symbol with address less than 'addr'
+ * symbol Returns the actual name of the symbol.
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+char *
+kdbnearsym(unsigned long addr)
+{
+ __ksymtab_t *ksp = __kdbsymtab;
+ __ksymtab_t *kpp = NULL;
+ int i;
+
+ for(i=0; i<__kdbsymtabsize; i++, ksp++) {
+ if (!ksp->name)
+ continue;
+
+ if (addr == ksp->value) {
+ kpp = ksp;
+ break;
+ }
+ if (addr > ksp->value) {
+ if ((kpp == NULL)
+ || (ksp->value > kpp->value)) {
+ kpp = ksp;
+ }
+ }
+ }
+
+ /*
+ * If more than 128k away, don't bother.
+ */
+ if ((kpp == NULL)
+ || ((addr - kpp->value) > 0x20000)) {
+ return NULL;
+ }
+
+ return kpp->name;
+}
+
+/*
+ * kdbgetregcontents
+ *
+ * Return the contents of the register specified by the
+ * input string argument. Return an error if the string
+ * does not match a machine register.
+ *
+ * The following pseudo register names are supported:
+ * &regs - Prints address of exception frame
+ * kesp - Prints kernel stack pointer at time of fault
+ * sstk - Prints switch stack for ia64
+ * %<regname> - Uses the value of the registers at the
+ * last time the user process entered kernel
+ * mode, instead of the registers at the time
+ * kdb was entered.
+ *
+ * Parameters:
+ * regname Pointer to string naming register
+ * regs Pointer to structure containing registers.
+ * Outputs:
+ * *contents Pointer to unsigned long to recieve register contents
+ * Returns:
+ * 0 Success
+ * KDB_BADREG Invalid register name
+ * Locking:
+ * None.
+ * Remarks:
+ *
+ * Note that this function is really machine independent. The kdb
+ * register list is not, however.
+ */
+
+static struct kdbregs {
+ char *reg_name;
+ size_t reg_offset;
+} kdbreglist[] = {
+ { " psr", offsetof(struct pt_regs, cr_ipsr) },
+ { " ifs", offsetof(struct pt_regs, cr_ifs) },
+ { " ip", offsetof(struct pt_regs, cr_iip) },
+
+ { "unat", offsetof(struct pt_regs, ar_unat) },
+ { " pfs", offsetof(struct pt_regs, ar_pfs) },
+ { " rsc", offsetof(struct pt_regs, ar_rsc) },
+
+ { "rnat", offsetof(struct pt_regs, ar_rnat) },
+ { "bsps", offsetof(struct pt_regs, ar_bspstore) },
+ { " pr", offsetof(struct pt_regs, pr) },
+
+ { "ldrs", offsetof(struct pt_regs, loadrs) },
+ { " ccv", offsetof(struct pt_regs, ar_ccv) },
+ { "fpsr", offsetof(struct pt_regs, ar_fpsr) },
+
+ { " b0", offsetof(struct pt_regs, b0) },
+ { " b6", offsetof(struct pt_regs, b6) },
+ { " b7", offsetof(struct pt_regs, b7) },
+
+ { " r1",offsetof(struct pt_regs, r1) },
+ { " r2",offsetof(struct pt_regs, r2) },
+ { " r3",offsetof(struct pt_regs, r3) },
+
+ { " r8",offsetof(struct pt_regs, r8) },
+ { " r9",offsetof(struct pt_regs, r9) },
+ { " r10",offsetof(struct pt_regs, r10) },
+
+ { " r11",offsetof(struct pt_regs, r11) },
+ { " r12",offsetof(struct pt_regs, r12) },
+ { " r13",offsetof(struct pt_regs, r13) },
+
+ { " r14",offsetof(struct pt_regs, r14) },
+ { " r15",offsetof(struct pt_regs, r15) },
+ { " r16",offsetof(struct pt_regs, r16) },
+
+ { " r17",offsetof(struct pt_regs, r17) },
+ { " r18",offsetof(struct pt_regs, r18) },
+ { " r19",offsetof(struct pt_regs, r19) },
+
+ { " r20",offsetof(struct pt_regs, r20) },
+ { " r21",offsetof(struct pt_regs, r21) },
+ { " r22",offsetof(struct pt_regs, r22) },
+
+ { " r23",offsetof(struct pt_regs, r23) },
+ { " r24",offsetof(struct pt_regs, r24) },
+ { " r25",offsetof(struct pt_regs, r25) },
+
+ { " r26",offsetof(struct pt_regs, r26) },
+ { " r27",offsetof(struct pt_regs, r27) },
+ { " r28",offsetof(struct pt_regs, r28) },
+
+ { " r29",offsetof(struct pt_regs, r29) },
+ { " r30",offsetof(struct pt_regs, r30) },
+ { " r31",offsetof(struct pt_regs, r31) },
+
+};
+
+static const int nkdbreglist = sizeof(kdbreglist) / sizeof(struct kdbregs);
+
+int
+kdbgetregcontents(const char *regname,
+ struct pt_regs *regs,
+ unsigned long *contents)
+{
+ int i;
+
+ if (strcmp(regname, "&regs") == 0) {
+ *contents = (unsigned long)regs;
+ return 0;
+ }
+
+ if (strcmp(regname, "sstk") == 0) {
+ *contents = (unsigned long)getprsregs(regs) ;
+ return 0;
+ }
+
+ if (strcmp(regname, "isr") == 0) {
+ fault_regs_t fr ;
+ get_fault_regs(&fr) ;
+ *contents = fr.isr ;
+ return 0 ;
+ }
+
+#if 0
+ /* XXX need to verify this */
+ if (strcmp(regname, "kesp") == 0) {
+ *contents = (unsigned long)regs + sizeof(struct pt_regs);
+ return 0;
+ }
+
+ if (regname[0] == '%') {
+ /* User registers: %%e[a-c]x, etc */
+ regname++;
+ regs = (struct pt_regs *)
+ (current->thread.ksp - sizeof(struct pt_regs));
+ }
+#endif
+
+ for (i=0; i<nkdbreglist; i++) {
+ if (strstr(kdbreglist[i].reg_name, regname))
+ break;
+ }
+
+ if (i == nkdbreglist) {
+ /* Lets check the rse maybe */
+ if (regname[0] == 'r')
+ if (show_cur_stack_frame(regs, simple_strtoul(regname+1, 0, 0) - 31,
+ contents))
+ return 0 ;
+ return KDB_BADREG;
+ }
+
+ *contents = *(unsigned long *)((unsigned long)regs +
+ kdbreglist[i].reg_offset);
+
+ return 0;
+}
+
+/*
+ * kdbsetregcontents
+ *
+ * Set the contents of the register specified by the
+ * input string argument. Return an error if the string
+ * does not match a machine register.
+ *
+ * Supports modification of user-mode registers via
+ * %<register-name>
+ *
+ * Parameters:
+ * regname Pointer to string naming register
+ * regs Pointer to structure containing registers.
+ * contents Unsigned long containing new register contents
+ * Outputs:
+ * Returns:
+ * 0 Success
+ * KDB_BADREG Invalid register name
+ * Locking:
+ * None.
+ * Remarks:
+ */
+
+int
+kdbsetregcontents(const char *regname,
+ struct pt_regs *regs,
+ unsigned long contents)
+{
+ int i;
+
+ if (regname[0] == '%') {
+ regname++;
+ regs = (struct pt_regs *)
+ (current->thread.ksp - sizeof(struct pt_regs));
+ }
+
+ for (i=0; i<nkdbreglist; i++) {
+ if (strnicmp(kdbreglist[i].reg_name,
+ regname,
+ strlen(regname)) == 0)
+ break;
+ }
+
+ if ((i == nkdbreglist)
+ || (strlen(kdbreglist[i].reg_name) != strlen(regname))) {
+ return KDB_BADREG;
+ }
+
+ *(unsigned long *)((unsigned long)regs + kdbreglist[i].reg_offset) =
+ contents;
+
+ return 0;
+}
+
+/*
+ * kdbdumpregs
+ *
+ * Dump the specified register set to the display.
+ *
+ * Parameters:
+ * regs Pointer to structure containing registers.
+ * type Character string identifying register set to dump
+ * extra string further identifying register (optional)
+ * Outputs:
+ * Returns:
+ * 0 Success
+ * Locking:
+ * None.
+ * Remarks:
+ * This function will dump the general register set if the type
+ * argument is NULL (struct pt_regs). The alternate register
+ * set types supported by this function:
+ *
+ * d Debug registers
+ * c Control registers
+ * u User registers at most recent entry to kernel
+ * Following not yet implemented:
+ * m Model Specific Registers (extra defines register #)
+ * r Memory Type Range Registers (extra defines register)
+ *
+ * For now, all registers are covered as follows:
+ *
+ * rd - dumps all regs
+ * rd %isr - current interrupt status reg, read freshly
+ * rd s - valid stacked regs
+ * rd %sstk - gets switch stack addr. dump memory and search
+ * rd d - debug regs, may not be too useful
+ *
+ * ARs TB Done
+ * Interrupt regs TB Done ??
+ * OTHERS TB Decided ??
+ *
+ * Intel wish list
+ * These will be implemented later - Srinivasa
+ *
+ * type action
+ * ---- ------
+ * g dump all General static registers
+ * s dump all general Stacked registers
+ * f dump all Floating Point registers
+ * p dump all Predicate registers
+ * b dump all Branch registers
+ * a dump all Application registers
+ * c dump all Control registers
+ *
+ */
+
+int
+kdbdumpregs(struct pt_regs *regs,
+ const char *type,
+ const char *extra)
+
+{
+ int i;
+ int count = 0;
+
+ if (type
+ && (type[0] == 'u')) {
+ type = NULL;
+ regs = (struct pt_regs *)
+ (current->thread.ksp - sizeof(struct pt_regs));
+ }
+
+ if (type == NULL) {
+ for (i=0; i<nkdbreglist; i++) {
+ kdb_printf("%s: 0x%16.16lx ",
+ kdbreglist[i].reg_name,
+ *(unsigned long *)((unsigned long)regs +
+ kdbreglist[i].reg_offset));
+
+ if ((++count % 3) == 0)
+ kdb_printf("\n");
+ }
+
+ kdb_printf("&regs = 0x%16.16lx\n", regs);
+
+ return 0;
+ }
+
+ switch (type[0]) {
+ case 'd':
+ {
+ for(i=0; i<8; i+=2) {
+ kdb_printf("idr%d: 0x%16.16lx idr%d: 0x%16.16lx\n", i,
+ kdb_getdr(i), i+1, kdb_getdr(i+1));
+
+ }
+ return 0;
+ }
+#if 0
+ case 'c':
+ {
+ unsigned long cr[5];
+
+ for (i=0; i<5; i++) {
+ cr[i] = kdb_getcr(i);
+ }
+ kdb_printf("cr0 = 0x%8.8x cr1 = 0x%8.8x cr2 = 0x%8.8x cr3 = 0x%8.8x\ncr4 = 0x%8.8x\n",
+ cr[0], cr[1], cr[2], cr[3], cr[4]);
+ return 0;
+ }
+#endif
+ case 'm':
+ break;
+ case 'r':
+ break;
+
+ case 's':
+ {
+ show_cur_stack_frame(regs, 0, NULL) ;
+
+ return 0 ;
+ }
+
+ case '%':
+ {
+ unsigned long contents ;
+
+ if (!kdbgetregcontents(type+1, regs, &contents))
+ kdb_printf("%s = 0x%16.16lx\n", type+1, contents) ;
+ else
+ kdb_printf("diag: Invalid register %s\n", type+1) ;
+
+ return 0 ;
+ }
+
+ default:
+ return KDB_BADREG;
+ }
+
+ /* NOTREACHED */
+ return 0;
+}
+
+k_machreg_t
+kdb_getpc(struct pt_regs *regs)
+{
+ return regs->cr_iip + ia64_psr(regs)->ri;
+}
+
+int
+kdb_setpc(struct pt_regs *regs, k_machreg_t newpc)
+{
+ regs->cr_iip = newpc & ~0xf;
+ ia64_psr(regs)->ri = newpc & 0x3;
+ return 0;
+}
+
+void
+kdb_disableint(kdbintstate_t *state)
+{
+ int *fp = (int *)state;
+ int flags;
+
+ __save_flags(flags);
+ __cli();
+
+ *fp = flags;
+}
+
+void
+kdb_restoreint(kdbintstate_t *state)
+{
+ int flags = *(int *)state;
+ __restore_flags(flags);
+}
+
+int
+kdb_putword(unsigned long addr, unsigned long contents)
+{
+ *(unsigned long *)addr = contents;
+ return 0;
+}
+
+int
+kdb_getcurrentframe(struct pt_regs *regs)
+{
+#if 0
+ regs->xcs = 0;
+#if defined(CONFIG_KDB_FRAMEPTR)
+ asm volatile("movl %%ebp,%0":"=m" (*(int *)&regs->ebp));
+#endif
+ asm volatile("movl %%esp,%0":"=m" (*(int *)&regs->esp));
+#endif
+ return 0;
+}
+
+unsigned long
+show_cur_stack_frame(struct pt_regs *regs, int regno, unsigned long *contents)
+{
+ long sof = regs->cr_ifs & ((1<<7)-1) ; /* size of frame */
+ unsigned long i ;
+ int j;
+ struct switch_stack *prs_regs = getprsregs(regs) ;
+ unsigned long *sofptr = (prs_regs? ia64_rse_skip_regs(
+ (unsigned long *)prs_regs->ar_bspstore, -sof) : NULL) ;
+
+ if (!sofptr) {
+ printk("Unable to display Current Stack Frame\n") ;
+ return 0 ;
+ }
+
+ if (regno < 0)
+ return 0 ;
+
+ for (i=sof, j=0;i;i--,j++) {
+ /* remember to skip the nat collection dword */
+ if ((((unsigned long)sofptr>>3) & (((1<<6)-1)))
+ == ((1<<6)-1))
+ sofptr++ ;
+
+ /* return the value in the reg if regno is non zero */
+
+ if (regno) {
+ if ((j+1) == regno) {
+ if (contents)
+ *contents = *sofptr ;
+ return -1;
+ }
+ sofptr++ ;
+ } else {
+ printk(" r%d: %016lx ", 32+j, *sofptr++) ;
+ if (!((j+1)%3)) printk("\n") ;
+ }
+ }
+
+ if (regno) {
+ if (!i) /* bogus rse number */
+ return 0 ;
+ } else
+ printk("\n") ;
+
+ return 0 ;
+}
diff --git a/arch/ia64/kdb/pc_keyb.h b/arch/ia64/kdb/pc_keyb.h
new file mode 100644
index 000000000..3d4831a80
--- /dev/null
+++ b/arch/ia64/kdb/pc_keyb.h
@@ -0,0 +1,127 @@
+/*
+ * linux/drivers/char/pc_keyb.h
+ *
+ * PC Keyboard And Keyboard Controller
+ *
+ * (c) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ */
+
+/*
+ * Configuration Switches
+ */
+
+#undef KBD_REPORT_ERR /* Report keyboard errors */
+#define KBD_REPORT_UNKN /* Report unknown scan codes */
+#define KBD_REPORT_TIMEOUTS /* Report keyboard timeouts */
+#undef KBD_IS_FOCUS_9000 /* We have the brain-damaged FOCUS-9000 keyboard */
+#undef INITIALIZE_MOUSE /* Define if your PS/2 mouse needs initialization. */
+
+
+
+#define KBD_INIT_TIMEOUT 1000 /* Timeout in ms for initializing the keyboard */
+#define KBC_TIMEOUT 250 /* Timeout in ms for sending to keyboard controller */
+#define KBD_TIMEOUT 1000 /* Timeout in ms for keyboard command acknowledge */
+
+/*
+ * Internal variables of the driver
+ */
+
+extern unsigned char pckbd_read_mask;
+extern unsigned char aux_device_present;
+
+/*
+ * Keyboard Controller Registers
+ */
+
+#define KBD_STATUS_REG 0x64 /* Status register (R) */
+#define KBD_CNTL_REG 0x64 /* Controller command register (W) */
+#define KBD_DATA_REG 0x60 /* Keyboard data register (R/W) */
+
+/*
+ * Keyboard Controller Commands
+ */
+
+#define KBD_CCMD_READ_MODE 0x20 /* Read mode bits */
+#define KBD_CCMD_WRITE_MODE 0x60 /* Write mode bits */
+#define KBD_CCMD_GET_VERSION 0xA1 /* Get controller version */
+#define KBD_CCMD_MOUSE_DISABLE 0xA7 /* Disable mouse interface */
+#define KBD_CCMD_MOUSE_ENABLE 0xA8 /* Enable mouse interface */
+#define KBD_CCMD_TEST_MOUSE 0xA9 /* Mouse interface test */
+#define KBD_CCMD_SELF_TEST 0xAA /* Controller self test */
+#define KBD_CCMD_KBD_TEST 0xAB /* Keyboard interface test */
+#define KBD_CCMD_KBD_DISABLE 0xAD /* Keyboard interface disable */
+#define KBD_CCMD_KBD_ENABLE 0xAE /* Keyboard interface enable */
+#define KBD_CCMD_WRITE_AUX_OBUF 0xD3 /* Write to output buffer as if
+ initiated by the auxiliary device */
+#define KBD_CCMD_WRITE_MOUSE 0xD4 /* Write the following byte to the mouse */
+
+/*
+ * Keyboard Commands
+ */
+
+#define KBD_CMD_SET_LEDS 0xED /* Set keyboard leds */
+#define KBD_CMD_SET_RATE 0xF3 /* Set typematic rate */
+#define KBD_CMD_ENABLE 0xF4 /* Enable scanning */
+#define KBD_CMD_DISABLE 0xF5 /* Disable scanning */
+#define KBD_CMD_RESET 0xFF /* Reset */
+
+/*
+ * Keyboard Replies
+ */
+
+#define KBD_REPLY_POR 0xAA /* Power on reset */
+#define KBD_REPLY_ACK 0xFA /* Command ACK */
+#define KBD_REPLY_RESEND 0xFE /* Command NACK, send the cmd again */
+
+/*
+ * Status Register Bits
+ */
+
+#define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */
+#define KBD_STAT_IBF 0x02 /* Keyboard input buffer full */
+#define KBD_STAT_SELFTEST 0x04 /* Self test successful */
+#define KBD_STAT_CMD 0x08 /* Last write was a command write (0=data) */
+#define KBD_STAT_UNLOCKED 0x10 /* Zero if keyboard locked */
+#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */
+#define KBD_STAT_GTO 0x40 /* General receive/xmit timeout */
+#define KBD_STAT_PERR 0x80 /* Parity error */
+
+#define AUX_STAT_OBF (KBD_STAT_OBF | KBD_STAT_MOUSE_OBF)
+
+/*
+ * Controller Mode Register Bits
+ */
+
+#define KBD_MODE_KBD_INT 0x01 /* Keyboard data generate IRQ1 */
+#define KBD_MODE_MOUSE_INT 0x02 /* Mouse data generate IRQ12 */
+#define KBD_MODE_SYS 0x04 /* The system flag (?) */
+#define KBD_MODE_NO_KEYLOCK 0x08 /* The keylock doesn't affect the keyboard if set */
+#define KBD_MODE_DISABLE_KBD 0x10 /* Disable keyboard interface */
+#define KBD_MODE_DISABLE_MOUSE 0x20 /* Disable mouse interface */
+#define KBD_MODE_KCC 0x40 /* Scan code conversion to PC format */
+#define KBD_MODE_RFU 0x80
+
+/*
+ * Mouse Commands
+ */
+
+#define AUX_SET_RES 0xE8 /* Set resolution */
+#define AUX_SET_SCALE11 0xE6 /* Set 1:1 scaling */
+#define AUX_SET_SCALE21 0xE7 /* Set 2:1 scaling */
+#define AUX_GET_SCALE 0xE9 /* Get scaling factor */
+#define AUX_SET_STREAM 0xEA /* Set stream mode */
+#define AUX_SET_SAMPLE 0xF3 /* Set sample rate */
+#define AUX_ENABLE_DEV 0xF4 /* Enable aux device */
+#define AUX_DISABLE_DEV 0xF5 /* Disable aux device */
+#define AUX_RESET 0xFF /* Reset aux device */
+
+#define AUX_BUF_SIZE 2048
+
+struct aux_queue {
+ unsigned long head;
+ unsigned long tail;
+ struct wait_queue *proc_list;
+ struct fasync_struct *fasync;
+ unsigned char buf[AUX_BUF_SIZE];
+};
+
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
new file mode 100644
index 000000000..7cb47da72
--- /dev/null
+++ b/arch/ia64/kernel/Makefile
@@ -0,0 +1,42 @@
+#
+# Makefile for the linux kernel.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.S.s:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $<
+.S.o:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $<
+
+all: kernel.o head.o init_task.o
+
+O_TARGET := kernel.o
+O_OBJS := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_default.o irq_internal.o ivt.o \
+ pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o sal_stub.o semaphore.o setup.o signal.o \
+ sys_ia64.o traps.o time.o unaligned.o unwind.o
+#O_OBJS := fpreg.o
+#OX_OBJS := ia64_ksyms.o
+
+ifeq ($(CONFIG_IA64_GENERIC),y)
+O_OBJS += machvec.o
+endif
+
+ifdef CONFIG_PCI
+O_OBJS += pci.o
+endif
+
+ifdef CONFIG_SMP
+O_OBJS += smp.o irq_lock.o
+endif
+
+ifeq ($(CONFIG_MCA),y)
+O_OBJS += mca.o mca_asm.o
+endif
+
+clean::
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
new file mode 100644
index 000000000..e289efab6
--- /dev/null
+++ b/arch/ia64/kernel/acpi.c
@@ -0,0 +1,308 @@
+/*
+ * Advanced Configuration and Power Interface
+ *
+ * Based on 'ACPI Specification 1.0b' February 2, 1999 and
+ * 'IA-64 Extensions to ACPI Specification' Revision 0.6
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/config.h>
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/acpi-ext.h>
+#include <asm/page.h>
+#include <asm/efi.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/irq.h>
+
+#undef ACPI_DEBUG /* Guess what this does? */
+
+#ifdef CONFIG_SMP
+extern unsigned long ipi_base_addr;
+#endif
+
+/* These are ugly but will be reclaimed by the kernel */
+int __initdata acpi_cpus = 0;
+int __initdata acpi_apic_map[32];
+int __initdata cpu_cnt = 0;
+
+void (*pm_idle) (void);
+
+/*
+ * Identify usable CPU's and remember them for SMP bringup later.
+ */
+static void __init
+acpi_lsapic(char *p)
+{
+ int add = 1;
+
+ acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p;
+
+ if ((lsapic->flags & LSAPIC_PRESENT) == 0)
+ return;
+
+ printk(" CPU %d (%.04x:%.04x): ", cpu_cnt, lsapic->eid, lsapic->id);
+
+ if ((lsapic->flags & LSAPIC_ENABLED) == 0) {
+ printk("Disabled.\n");
+ add = 0;
+ } else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) {
+ printk("Performance Restricted; ignoring.\n");
+ add = 0;
+ }
+
+ if (add) {
+ printk("Available.\n");
+ acpi_cpus++;
+ acpi_apic_map[cpu_cnt] = (lsapic->id << 8) | lsapic->eid;
+ }
+
+ cpu_cnt++;
+}
+
+/*
+ * Find all IOSAPICs and tag the iosapic_vector structure with the appropriate
+ * base addresses.
+ */
+static void __init
+acpi_iosapic(char *p)
+{
+ /*
+ * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet
+ * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be
+ * a means for platform_setup() to register ACPI handlers?
+ */
+#ifdef CONFIG_IA64_DIG
+ acpi_entry_iosapic_t *iosapic = (acpi_entry_iosapic_t *) p;
+ unsigned int ver;
+ int l, v, pins;
+
+ ver = iosapic_version(iosapic->address);
+ pins = (ver >> 16) & 0xff;
+
+ printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n",
+ (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address,
+ iosapic->irq_base, iosapic->irq_base + pins);
+
+ for (l = 0; l < pins; l++) {
+ v = map_legacy_irq(iosapic->irq_base + l);
+ if (v > IA64_MAX_VECTORED_IRQ) {
+ printk(" !!! IRQ %d > 255\n", v);
+ continue;
+ }
+ /* XXX Check for IOSAPIC collisions */
+ iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
+ iosapic_baseirq(v) = iosapic->irq_base;
+ }
+ iosapic_init(iosapic->address);
+#endif
+}
+
+
+/*
+ * Configure legacy IRQ information in iosapic_vector
+ */
+static void __init
+acpi_legacy_irq(char *p)
+{
+ /*
+ * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet
+ * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be
+ * a means for platform_setup() to register ACPI handlers?
+ */
+#ifdef CONFIG_IA64_IRQ_ACPI
+ acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p;
+ unsigned char vector;
+ int i;
+
+ vector = map_legacy_irq(legacy->isa_irq);
+
+ /*
+ * Clobber any old pin mapping. It may be that it gets replaced later on
+ */
+ for (i = 0; i < IA64_MAX_VECTORED_IRQ; i++) {
+ if (i == vector)
+ continue;
+ if (iosapic_pin(i) == iosapic_pin(vector))
+ iosapic_pin(i) = 0xff;
+ }
+
+ iosapic_pin(vector) = legacy->pin;
+ iosapic_bus(vector) = BUS_ISA; /* This table only overrides the ISA devices */
+ iosapic_busdata(vector) = 0;
+
+ /*
+ * External timer tick is special...
+ */
+ if (vector != TIMER_IRQ)
+ iosapic_dmode(vector) = IO_SAPIC_LOWEST_PRIORITY;
+ else
+ iosapic_dmode(vector) = IO_SAPIC_FIXED;
+
+ /* See MPS 1.4 section 4.3.4 */
+ switch (legacy->flags) {
+ case 0x5:
+ iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
+ iosapic_trigger(vector) = IO_SAPIC_EDGE;
+ break;
+ case 0x8:
+ iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
+ iosapic_trigger(vector) = IO_SAPIC_EDGE;
+ break;
+ case 0xd:
+ iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
+ iosapic_trigger(vector) = IO_SAPIC_LEVEL;
+ break;
+ case 0xf:
+ iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
+ iosapic_trigger(vector) = IO_SAPIC_LEVEL;
+ break;
+ default:
+ printk(" ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq,
+ legacy->flags);
+ break;
+ }
+
+#ifdef ACPI_DEBUG
+ printk("Legacy ISA IRQ %x -> IA64 Vector %x IOSAPIC Pin %x Active %s %s Trigger\n",
+ legacy->isa_irq, vector, iosapic_pin(vector),
+ ((iosapic_polarity(vector) == IO_SAPIC_POL_LOW) ? "Low" : "High"),
+ ((iosapic_trigger(vector) == IO_SAPIC_LEVEL) ? "Level" : "Edge"));
+#endif /* ACPI_DEBUG */
+
+#endif /* CONFIG_IA64_IRQ_ACPI */
+}
+
+/*
+ * Info on platform interrupt sources: NMI. PMI, INIT, etc.
+ */
+static void __init
+acpi_platform(char *p)
+{
+ acpi_entry_platform_src_t *plat = (acpi_entry_platform_src_t *) p;
+
+ printk("PLATFORM: IOSAPIC %x -> Vector %lx on CPU %.04u:%.04u\n",
+ plat->iosapic_vector, plat->global_vector, plat->eid, plat->id);
+}
+
+/*
+ * Parse the ACPI Multiple SAPIC Table
+ */
+static void __init
+acpi_parse_msapic(acpi_sapic_t *msapic)
+{
+ char *p, *end;
+
+ memset(&acpi_apic_map, -1, sizeof(acpi_apic_map));
+
+#ifdef CONFIG_SMP
+ /* Base address of IPI Message Block */
+ ipi_base_addr = ioremap(msapic->interrupt_block, 0);
+#endif
+
+ p = (char *) (msapic + 1);
+ end = p + (msapic->header.length - sizeof(acpi_sapic_t));
+
+ while (p < end) {
+
+ switch (*p) {
+ case ACPI_ENTRY_LOCAL_SAPIC:
+ acpi_lsapic(p);
+ break;
+
+ case ACPI_ENTRY_IO_SAPIC:
+ acpi_iosapic(p);
+ break;
+
+ case ACPI_ENTRY_INT_SRC_OVERRIDE:
+ acpi_legacy_irq(p);
+ break;
+
+ case ACPI_ENTRY_PLATFORM_INT_SOURCE:
+ acpi_platform(p);
+ break;
+
+ default:
+ break;
+ }
+
+ /* Move to next table entry. */
+ p += *(p + 1);
+ }
+
+ /* Make bootup pretty */
+ printk(" %d CPUs available, %d CPUs total\n", acpi_cpus, cpu_cnt);
+}
+
+int __init
+acpi_parse(acpi_rsdp_t *rsdp)
+{
+ acpi_rsdt_t *rsdt;
+ acpi_desc_table_hdr_t *hdrp;
+ long tables, i;
+
+ if (!rsdp) {
+ printk("Uh-oh, no ACPI Root System Description Pointer table!\n");
+ return 0;
+ }
+
+ if (strncmp(rsdp->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) {
+ printk("Uh-oh, ACPI RSDP signature incorrect!\n");
+ return 0;
+ }
+
+ rsdp->rsdt = __va(rsdp->rsdt);
+ rsdt = rsdp->rsdt;
+ if (strncmp(rsdt->header.signature, ACPI_RSDT_SIG, ACPI_RSDT_SIG_LEN)) {
+ printk("Uh-oh, ACPI RDST signature incorrect!\n");
+ return 0;
+ }
+
+ printk("ACPI: %.6s %.8s %d.%d\n", rsdt->header.oem_id, rsdt->header.oem_table_id,
+ rsdt->header.oem_revision >> 16, rsdt->header.oem_revision & 0xffff);
+
+ tables = (rsdt->header.length - sizeof(acpi_desc_table_hdr_t)) / 8;
+ for (i = 0; i < tables; i++) {
+ hdrp = (acpi_desc_table_hdr_t *) __va(rsdt->entry_ptrs[i]);
+
+ /* Only interested int the MSAPIC table for now ... */
+ if (strncmp(hdrp->signature, ACPI_SAPIC_SIG, ACPI_SAPIC_SIG_LEN) != 0)
+ continue;
+
+ acpi_parse_msapic((acpi_sapic_t *) hdrp);
+ } /* while() */
+
+ if (acpi_cpus == 0) {
+ printk("ACPI: Found 0 CPUS; assuming 1\n");
+ acpi_cpus = 1; /* We've got at least one of these, no? */
+ }
+ return 1;
+}
+
+const char *
+acpi_get_sysname (void)
+{
+ /* the following should go away once we have an ACPI parser: */
+#ifdef CONFIG_IA64_GENERIC
+ return "hpsim";
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+ return "hpsim";
+# elif defined (CONFIG_IA64_SGI_SN1_SIM)
+ return "sn1";
+# elif defined (CONFIG_IA64_DIG)
+ return "dig";
+# else
+# error Unknown platform. Fix acpi.c.
+# endif
+#endif
+}
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
new file mode 100644
index 000000000..dd7de2ab0
--- /dev/null
+++ b/arch/ia64/kernel/efi.c
@@ -0,0 +1,365 @@
+/*
+ * Extensible Firmware Interface
+ *
+ * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Hewlett-Packard Co.
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * All EFI Runtime Services are not implemented yet as EFI only
+ * supports physical mode addressing on SoftSDV. This is to be fixed
+ * in a future version. --drummond 1999-07-20
+ *
+ * Implemented EFI runtime services and virtual mode calls. --davidm
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/time.h>
+
+#include <asm/efi.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#define EFI_DEBUG
+
+extern efi_status_t efi_call_phys (void *, ...);
+
+struct efi efi;
+
+static efi_runtime_services_t *runtime;
+
+static efi_status_t
+phys_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+ return efi_call_phys(__va(runtime->get_time), __pa(tm), __pa(tc));
+}
+
+static efi_status_t
+phys_set_time (efi_time_t *tm)
+{
+ return efi_call_phys(__va(runtime->set_time), __pa(tm));
+}
+
+static efi_status_t
+phys_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm)
+{
+ return efi_call_phys(__va(runtime->get_wakeup_time), __pa(enabled), __pa(pending),
+ __pa(tm));
+}
+
+static efi_status_t
+phys_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm)
+{
+ return efi_call_phys(__va(runtime->set_wakeup_time), enabled, __pa(tm));
+}
+
+static efi_status_t
+phys_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr,
+ unsigned long *data_size, void *data)
+{
+ return efi_call_phys(__va(runtime->get_variable), __pa(name), __pa(vendor), __pa(attr),
+ __pa(data_size), __pa(data));
+}
+
+static efi_status_t
+phys_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor)
+{
+ return efi_call_phys(__va(runtime->get_next_variable), __pa(name_size), __pa(name),
+ __pa(vendor));
+}
+
+static efi_status_t
+phys_set_variable (efi_char16_t *name, efi_guid_t *vendor, u32 attr,
+ unsigned long data_size, void *data)
+{
+ return efi_call_phys(__va(runtime->set_variable), __pa(name), __pa(vendor), attr,
+ data_size, __pa(data));
+}
+
+static efi_status_t
+phys_get_next_high_mono_count (u64 *count)
+{
+ return efi_call_phys(__va(runtime->get_next_high_mono_count), __pa(count));
+}
+
+static void
+phys_reset_system (int reset_type, efi_status_t status,
+ unsigned long data_size, efi_char16_t *data)
+{
+ efi_call_phys(__va(runtime->reset_system), status, data_size, __pa(data));
+}
+
+/*
+ * Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines were long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+static inline unsigned long
+mktime (unsigned int year, unsigned int mon, unsigned int day, unsigned int hour,
+ unsigned int min, unsigned int sec)
+{
+ if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
+ mon += 12; /* Puts Feb last since it has leap day */
+ year -= 1;
+ }
+ return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day)
+ + year*365 - 719499
+ )*24 + hour /* now have hours */
+ )*60 + min /* now have minutes */
+ )*60 + sec; /* finally seconds */
+}
+
+void
+efi_gettimeofday (struct timeval *tv)
+{
+ efi_time_t tm;
+
+ memset(tv, 0, sizeof(tv));
+ if ((*efi.get_time)(&tm, 0) != EFI_SUCCESS)
+ return;
+
+ tv->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
+ tv->tv_usec = tm.nanosecond / 1000;
+}
+
+/*
+ * Walks the EFI memory map and calls CALLBACK once for each EFI
+ * memory descriptor that has memory that is available for OS use.
+ */
+void
+efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
+{
+ int prev_valid = 0;
+ struct range {
+ u64 start;
+ u64 end;
+ } prev, curr;
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size, start, end;
+
+ efi_map_start = __va(ia64_boot_param.efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size;
+ efi_desc_size = ia64_boot_param.efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ switch (md->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+#ifndef CONFIG_IA64_VIRTUAL_MEM_MAP
+ if (md->phys_addr > 1024*1024*1024UL) {
+ printk("Warning: ignoring %luMB of memory above 1GB!\n",
+ md->num_pages >> 8);
+ md->type = EFI_UNUSABLE_MEMORY;
+ continue;
+ }
+#endif
+
+ curr.start = PAGE_OFFSET + md->phys_addr;
+ curr.end = curr.start + (md->num_pages << 12);
+
+ if (!prev_valid) {
+ prev = curr;
+ prev_valid = 1;
+ } else {
+ if (curr.start < prev.start)
+ printk("Oops: EFI memory table not ordered!\n");
+
+ if (prev.end == curr.start) {
+ /* merge two consecutive memory ranges */
+ prev.end = curr.end;
+ } else {
+ start = PAGE_ALIGN(prev.start);
+ end = prev.end & PAGE_MASK;
+ if ((end > start) && (*callback)(start, end, arg) < 0)
+ return;
+ prev = curr;
+ }
+ }
+ break;
+
+ default:
+ continue;
+ }
+ }
+ if (prev_valid) {
+ start = PAGE_ALIGN(prev.start);
+ end = prev.end & PAGE_MASK;
+ if (end > start)
+ (*callback)(start, end, arg);
+ }
+}
+
+void __init
+efi_init (void)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_config_table_t *config_tables;
+ efi_memory_desc_t *md;
+ efi_char16_t *c16;
+ u64 efi_desc_size;
+ char vendor[100] = "unknown";
+ int i;
+
+ efi.systab = __va(ia64_boot_param.efi_systab);
+
+ /*
+ * Verify the EFI Table
+ */
+ if (efi.systab == NULL)
+ panic("Woah! Can't find EFI system table.\n");
+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ panic("Woah! EFI system table signature incorrect\n");
+ if (efi.systab->hdr.revision != EFI_SYSTEM_TABLE_REVISION)
+ printk("Warning: EFI system table version mismatch: "
+ "got %d.%02d, expected %d.%02d\n",
+ efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
+ EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
+
+ config_tables = __va(efi.systab->tables);
+
+ /* Show what we know for posterity */
+ c16 = __va(efi.systab->fw_vendor);
+ if (c16) {
+ for (i = 0;i < sizeof(vendor) && *c16; ++i)
+ vendor[i] = *c16++;
+ vendor[i] = '\0';
+ }
+
+ printk("EFI v%u.%.02u by %s:",
+ efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
+
+ for (i = 0; i < efi.systab->nr_tables; i++) {
+ if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
+ efi.mps = __va(config_tables[i].table);
+ printk(" MPS=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
+ efi.acpi = __va(config_tables[i].table);
+ printk(" ACPI=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
+ efi.smbios = __va(config_tables[i].table);
+ printk(" SMBIOS=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
+ efi.sal_systab = __va(config_tables[i].table);
+ printk(" SALsystab=0x%lx", config_tables[i].table);
+ }
+ }
+ printk("\n");
+
+ runtime = __va(efi.systab->runtime);
+ efi.get_time = phys_get_time;
+ efi.set_time = phys_set_time;
+ efi.get_wakeup_time = phys_get_wakeup_time;
+ efi.set_wakeup_time = phys_set_wakeup_time;
+ efi.get_variable = phys_get_variable;
+ efi.get_next_variable = phys_get_next_variable;
+ efi.set_variable = phys_set_variable;
+ efi.get_next_high_mono_count = phys_get_next_high_mono_count;
+ efi.reset_system = phys_reset_system;
+
+ efi_map_start = __va(ia64_boot_param.efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size;
+ efi_desc_size = ia64_boot_param.efi_memdesc_size;
+
+#ifdef EFI_DEBUG
+ /* print EFI memory map: */
+ for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
+ md = p;
+ printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
+ i, md->type, md->attribute,
+ md->phys_addr, md->phys_addr + (md->num_pages<<12) - 1, md->num_pages >> 8);
+ }
+#endif
+}
+
+void
+efi_enter_virtual_mode (void)
+{
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ efi_status_t status;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param.efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size;
+ efi_desc_size = ia64_boot_param.efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (md->attribute & EFI_MEMORY_RUNTIME) {
+ /*
+ * Some descriptors have multiple bits set, so the order of
+ * the tests is relevant.
+ */
+ if (md->attribute & EFI_MEMORY_WB) {
+ md->virt_addr = (u64) __va(md->phys_addr);
+ } else if (md->attribute & EFI_MEMORY_UC) {
+ md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+ } else if (md->attribute & EFI_MEMORY_WC) {
+#if 0
+ md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+ | _PAGE_D
+ | _PAGE_MA_WC
+ | _PAGE_PL_0
+ | _PAGE_AR_RW));
+#else
+ printk("EFI_MEMORY_WC mapping\n");
+ md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+ } else if (md->attribute & EFI_MEMORY_WT) {
+#if 0
+ md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
+ | _PAGE_D | _PAGE_MA_WT
+ | _PAGE_PL_0
+ | _PAGE_AR_RW));
+#else
+ printk("EFI_MEMORY_WT mapping\n");
+ md->virt_addr = (u64) ioremap(md->phys_addr, 0);
+#endif
+ }
+ }
+ }
+
+ status = efi_call_phys(__va(runtime->set_virtual_address_map),
+ ia64_boot_param.efi_memmap_size,
+ efi_desc_size, ia64_boot_param.efi_memdesc_version,
+ ia64_boot_param.efi_memmap);
+ if (status != EFI_SUCCESS) {
+ printk("Warning: unable to switch EFI into virtual mode (status=%lu)\n", status);
+ return;
+ }
+
+ /*
+ * Now that EFI is in virtual mode, we arrange for EFI functions to be
+ * called directly:
+ */
+ efi.get_time = __va(runtime->get_time);
+ efi.set_time = __va(runtime->set_time);
+ efi.get_wakeup_time = __va(runtime->get_wakeup_time);
+ efi.set_wakeup_time = __va(runtime->set_wakeup_time);
+ efi.get_variable = __va(runtime->get_variable);
+ efi.get_next_variable = __va(runtime->get_next_variable);
+ efi.set_variable = __va(runtime->set_variable);
+ efi.get_next_high_mono_count = __va(runtime->get_next_high_mono_count);
+ efi.reset_system = __va(runtime->reset_system);
+}
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
new file mode 100644
index 000000000..4e6f1fc63
--- /dev/null
+++ b/arch/ia64/kernel/efi_stub.S
@@ -0,0 +1,141 @@
+/*
+ * EFI call stub.
+ *
+ * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com>
+ *
+ * This stub allows us to make EFI calls in physical mode with interrupts
+ * turned off. We need this because we can't call SetVirtualMap() until
+ * the kernel has booted far enough to allow allocation of struct vma_struct
+ * entries (which we would need to map stuff with memory attributes other
+ * than uncached or writeback...). Since the GetTime() service gets called
+ * earlier than that, we need to be able to make physical mode EFI calls from
+ * the kernel.
+ */
+
+/*
+ * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
+ * Abstraction Layer Specification", revision 2.6e). Note that
+ * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
+ * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
+ * (the br.ia instruction fails unless psr.dfl and psr.dfh are
+ * cleared). Fortunately, SAL promises not to touch the floating
+ * point regs, so at least we don't have to save f2-f127.
+ */
+#define PSR_BITS_TO_CLEAR \
+ (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
+ IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
+ IA64_PSR_DFL | IA64_PSR_DFH)
+
+#define PSR_BITS_TO_SET \
+ (IA64_PSR_BN)
+
+#include <asm/processor.h>
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .text
+
+/*
+ * Switch execution mode from virtual to physical or vice versa.
+ *
+ * Inputs:
+ * r16 = new psr to establish
+ */
+ .proc switch_mode
+switch_mode:
+ {
+ alloc r2=ar.pfs,0,0,0,0
+ rsm psr.i | psr.ic // disable interrupts and interrupt collection
+ mov r15=ip
+ }
+ ;;
+ {
+ flushrs // must be first insn in group
+ srlz.i
+ shr.u r19=r15,61 // r19 <- top 3 bits of current IP
+ }
+ ;;
+ mov cr.ipsr=r16 // set new PSR
+ add r3=1f-switch_mode,r15
+ xor r15=0x7,r19 // flip the region bits
+
+ mov r17=ar.bsp
+ mov r14=rp // get return address into a general register
+
+ // switch RSE backing store:
+ ;;
+ dep r17=r15,r17,61,3 // make ar.bsp physical or virtual
+ mov r18=ar.rnat // save ar.rnat
+ ;;
+ mov ar.bspstore=r17 // this steps on ar.rnat
+ dep r3=r15,r3,61,3 // make rfi return address physical or virtual
+ ;;
+ mov cr.iip=r3
+ mov cr.ifs=r0
+ dep sp=r15,sp,61,3 // make stack pointer physical or virtual
+ ;;
+ mov ar.rnat=r18 // restore ar.rnat
+ dep r14=r15,r14,61,3 // make function return address physical or virtual
+ rfi // must be last insn in group
+ ;;
+1: mov rp=r14
+ br.ret.sptk.few rp
+ .endp switch_mode
+
+/*
+ * Inputs:
+ * in0 = address of function descriptor of EFI routine to call
+ * in1..in7 = arguments to routine
+ *
+ * Outputs:
+ * r8 = EFI_STATUS returned by called function
+ */
+
+ .global efi_call_phys
+ .proc efi_call_phys
+efi_call_phys:
+
+ alloc loc0=ar.pfs,8,5,7,0
+ ld8 r2=[in0],8 // load EFI function's entry point
+ mov loc1=rp
+ ;;
+ mov loc2=gp // save global pointer
+ mov loc4=ar.rsc // save RSE configuration
+ mov ar.rsc=r0 // put RSE in enforced lazy, LE mode
+ ;;
+
+ ld8 gp=[in0] // load EFI function's global pointer
+ mov out0=in1
+ mov out1=in2
+ movl r16=PSR_BITS_TO_CLEAR
+
+ mov loc3=psr // save processor status word
+ movl r17=PSR_BITS_TO_SET
+ ;;
+ mov out2=in3
+ or loc3=loc3,r17
+ mov b6=r2
+ ;;
+ andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
+ mov out3=in4
+ br.call.sptk.few rp=switch_mode
+.ret0:
+ mov out4=in5
+ mov out5=in6
+ mov out6=in7
+ br.call.sptk.few rp=b6 // call the EFI function
+.ret1:
+ mov ar.rsc=r0 // put RSE in enforced lazy, LE mode
+ mov r16=loc3
+ br.call.sptk.few rp=switch_mode // return to virtual mode
+.ret2:
+ mov ar.rsc=loc4 // restore RSE configuration
+ mov ar.pfs=loc0
+ mov rp=loc1
+ mov gp=loc2
+ br.ret.sptk.few rp
+
+ .endp efi_call_phys
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
new file mode 100644
index 000000000..87e77c677
--- /dev/null
+++ b/arch/ia64/kernel/entry.S
@@ -0,0 +1,1261 @@
+/*
+ * ia64/kernel/entry.S
+ *
+ * Kernel entry points.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ */
+/*
+ * Global (preserved) predicate usage on syscall entry/exit path:
+ *
+ *
+ * pEOI: See entry.h.
+ * pKern: See entry.h.
+ * pSys: See entry.h.
+ * pNonSys: !pSys
+ * p2: (Alias of pKern!) True if any signals are pending.
+ * p16/p17: Used by stubs calling ia64_do_signal to indicate if current task
+ * has PF_PTRACED flag bit set. p16 is true if so, p17 is the complement.
+ */
+
+#include <linux/config.h>
+
+#include <asm/errno.h>
+#include <asm/offsets.h>
+#include <asm/processor.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ /*
+ * execve() is special because in case of success, we need to
+ * setup a null register window frame.
+ */
+ .align 16
+ .proc ia64_execve
+ia64_execve:
+ alloc loc0=ar.pfs,3,2,4,0
+ mov loc1=rp
+ mov out0=in0 // filename
+ ;; // stop bit between alloc and call
+ mov out1=in1 // argv
+ mov out2=in2 // envp
+ add out3=16,sp // regs
+ br.call.sptk.few rp=sys_execve
+.ret0: cmp4.ge p6,p0=r8,r0
+ mov ar.pfs=loc0 // restore ar.pfs
+ ;;
+(p6) mov ar.pfs=r0 // clear ar.pfs in case of success
+ sxt4 r8=r8 // return 64-bit result
+ mov rp=loc1
+
+ br.ret.sptk.few rp
+ .endp ia64_execve
+
+ .align 16
+ .global sys_clone
+ .proc sys_clone
+sys_clone:
+ alloc r16=ar.pfs,2,2,3,0;;
+ movl r28=1f
+ mov loc1=rp
+ br.cond.sptk.many save_switch_stack
+1:
+ mov loc0=r16 // save ar.pfs across do_fork
+ adds out2=IA64_SWITCH_STACK_SIZE+16,sp
+ adds r2=IA64_SWITCH_STACK_SIZE+IA64_PT_REGS_R12_OFFSET+16,sp
+ cmp.eq p8,p9=in1,r0 // usp == 0?
+ mov out0=in0 // out0 = clone_flags
+ ;;
+(p8) ld8 out1=[r2] // fetch usp from pt_regs.r12
+(p9) mov out1=in1
+ br.call.sptk.few rp=do_fork
+.ret1:
+ mov ar.pfs=loc0
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
+ mov rp=loc1
+ ;;
+ br.ret.sptk.many rp
+ .endp sys_clone
+
+/*
+ * prev_task <- switch_to(struct task_struct *next)
+ */
+ .align 16
+ .global ia64_switch_to
+ .proc ia64_switch_to
+ia64_switch_to:
+ alloc r16=ar.pfs,1,0,0,0
+ movl r28=1f
+ br.cond.sptk.many save_switch_stack
+1:
+ // disable interrupts to ensure atomicity for next few instructions:
+ mov r17=psr // M-unit
+ ;;
+ rsm psr.i // M-unit
+ dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff
+ ;;
+ srlz.d
+ ;;
+ adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+ adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
+ ;;
+ st8 [r22]=sp // save kernel stack pointer of old task
+ ld8 sp=[r21] // load kernel stack pointer of new task
+ and r20=in0,r18 // physical address of "current"
+ ;;
+ mov r8=r13 // return pointer to previously running task
+ mov r13=in0 // set "current" pointer
+ mov ar.k6=r20 // copy "current" into ar.k6
+ ;;
+ // restore interrupts
+ mov psr.l=r17
+ ;;
+ srlz.d
+
+ movl r28=1f
+ br.cond.sptk.many load_switch_stack
+1:
+ br.ret.sptk.few rp
+ .endp ia64_switch_to
+
+ /*
+ * Like save_switch_stack, but also save the stack frame that is active
+ * at the time this function is called.
+ */
+ .align 16
+ .proc save_switch_stack_with_current_frame
+save_switch_stack_with_current_frame:
+1: {
+ alloc r16=ar.pfs,0,0,0,0 // pass ar.pfs to save_switch_stack
+ mov r28=ip
+ }
+ ;;
+ adds r28=1f-1b,r28
+ br.cond.sptk.many save_switch_stack
+1: br.ret.sptk.few rp
+ .endp save_switch_stack_with_current_frame
+/*
+ * Note that interrupts are enabled during save_switch_stack and
+ * load_switch_stack. This means that we may get an interrupt with
+ * "sp" pointing to the new kernel stack while ar.bspstore is still
+ * pointing to the old kernel backing store area. Since ar.rsc,
+ * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts,
+ * this is not a problem.
+ */
+
+/*
+ * save_switch_stack:
+ * - r16 holds ar.pfs
+ * - r28 holds address to return to
+ * - rp (b0) holds return address to save
+ */
+ .align 16
+ .global save_switch_stack
+ .proc save_switch_stack
+save_switch_stack:
+ flushrs // flush dirty regs to backing store (must be first in insn group)
+ mov r17=ar.unat // preserve caller's
+ adds r2=-IA64_SWITCH_STACK_SIZE+16,sp // r2 = &sw->caller_unat
+ ;;
+ mov r18=ar.fpsr // preserve fpsr
+ mov ar.rsc=r0 // put RSE in mode: enforced lazy, little endian, pl 0
+ ;;
+ mov r19=ar.rnat
+ adds r3=-IA64_SWITCH_STACK_SIZE+24,sp // r3 = &sw->ar_fpsr
+
+ // Note: the instruction ordering is important here: we can't
+ // store anything to the switch stack before sp is updated
+ // as otherwise an interrupt might overwrite the memory!
+ adds sp=-IA64_SWITCH_STACK_SIZE,sp
+ ;;
+ st8 [r2]=r17,16
+ st8 [r3]=r18,24
+ ;;
+ stf.spill [r2]=f2,32
+ stf.spill [r3]=f3,32
+ mov r21=b0
+ ;;
+ stf.spill [r2]=f4,32
+ stf.spill [r3]=f5,32
+ ;;
+ stf.spill [r2]=f10,32
+ stf.spill [r3]=f11,32
+ mov r22=b1
+ ;;
+ stf.spill [r2]=f12,32
+ stf.spill [r3]=f13,32
+ mov r23=b2
+ ;;
+ stf.spill [r2]=f14,32
+ stf.spill [r3]=f15,32
+ mov r24=b3
+ ;;
+ stf.spill [r2]=f16,32
+ stf.spill [r3]=f17,32
+ mov r25=b4
+ ;;
+ stf.spill [r2]=f18,32
+ stf.spill [r3]=f19,32
+ mov r26=b5
+ ;;
+ stf.spill [r2]=f20,32
+ stf.spill [r3]=f21,32
+ mov r17=ar.lc // I-unit
+ ;;
+ stf.spill [r2]=f22,32
+ stf.spill [r3]=f23,32
+ ;;
+ stf.spill [r2]=f24,32
+ stf.spill [r3]=f25,32
+ ;;
+ stf.spill [r2]=f26,32
+ stf.spill [r3]=f27,32
+ ;;
+ stf.spill [r2]=f28,32
+ stf.spill [r3]=f29,32
+ ;;
+ stf.spill [r2]=f30,32
+ stf.spill [r3]=f31,24
+ ;;
+ st8.spill [r2]=r4,16
+ st8.spill [r3]=r5,16
+ ;;
+ st8.spill [r2]=r6,16
+ st8.spill [r3]=r7,16
+ ;;
+ st8 [r2]=r21,16 // save b0
+ st8 [r3]=r22,16 // save b1
+ /* since we're done with the spills, read and save ar.unat: */
+ mov r18=ar.unat // M-unit
+ mov r20=ar.bspstore // M-unit
+ ;;
+ st8 [r2]=r23,16 // save b2
+ st8 [r3]=r24,16 // save b3
+ ;;
+ st8 [r2]=r25,16 // save b4
+ st8 [r3]=r26,16 // save b5
+ ;;
+ st8 [r2]=r16,16 // save ar.pfs
+ st8 [r3]=r17,16 // save ar.lc
+ mov r21=pr
+ ;;
+ st8 [r2]=r18,16 // save ar.unat
+ st8 [r3]=r19,16 // save ar.rnat
+ mov b7=r28
+ ;;
+ st8 [r2]=r20 // save ar.bspstore
+ st8 [r3]=r21 // save predicate registers
+ mov ar.rsc=3 // put RSE back into eager mode, pl 0
+ br.cond.sptk.few b7
+ .endp save_switch_stack
+
+/*
+ * load_switch_stack:
+ * - r28 holds address to return to
+ */
+ .align 16
+ .proc load_switch_stack
+load_switch_stack:
+ invala // invalidate ALAT
+ adds r2=IA64_SWITCH_STACK_B0_OFFSET+16,sp // get pointer to switch_stack.b0
+ mov ar.rsc=r0 // put RSE into enforced lazy mode
+ adds r3=IA64_SWITCH_STACK_B0_OFFSET+24,sp // get pointer to switch_stack.b1
+ ;;
+ ld8 r21=[r2],16 // restore b0
+ ld8 r22=[r3],16 // restore b1
+ ;;
+ ld8 r23=[r2],16 // restore b2
+ ld8 r24=[r3],16 // restore b3
+ ;;
+ ld8 r25=[r2],16 // restore b4
+ ld8 r26=[r3],16 // restore b5
+ ;;
+ ld8 r16=[r2],16 // restore ar.pfs
+ ld8 r17=[r3],16 // restore ar.lc
+ ;;
+ ld8 r18=[r2],16 // restore ar.unat
+ ld8 r19=[r3],16 // restore ar.rnat
+ mov b0=r21
+ ;;
+ ld8 r20=[r2] // restore ar.bspstore
+ ld8 r21=[r3] // restore predicate registers
+ mov ar.pfs=r16
+ ;;
+ mov ar.bspstore=r20
+ ;;
+ loadrs // invalidate stacked regs outside current frame
+ adds r2=16-IA64_SWITCH_STACK_SIZE,r2 // get pointer to switch_stack.caller_unat
+ ;; // stop bit for rnat dependency
+ mov ar.rnat=r19
+ mov ar.unat=r18 // establish unat holding the NaT bits for r4-r7
+ adds r3=16-IA64_SWITCH_STACK_SIZE,r3 // get pointer to switch_stack.ar_fpsr
+ ;;
+ ld8 r18=[r2],16 // restore caller's unat
+ ld8 r19=[r3],24 // restore fpsr
+ mov ar.lc=r17
+ ;;
+ ldf.fill f2=[r2],32
+ ldf.fill f3=[r3],32
+ mov pr=r21,-1
+ ;;
+ ldf.fill f4=[r2],32
+ ldf.fill f5=[r3],32
+ ;;
+ ldf.fill f10=[r2],32
+ ldf.fill f11=[r3],32
+ mov b1=r22
+ ;;
+ ldf.fill f12=[r2],32
+ ldf.fill f13=[r3],32
+ mov b2=r23
+ ;;
+ ldf.fill f14=[r2],32
+ ldf.fill f15=[r3],32
+ mov b3=r24
+ ;;
+ ldf.fill f16=[r2],32
+ ldf.fill f17=[r3],32
+ mov b4=r25
+ ;;
+ ldf.fill f18=[r2],32
+ ldf.fill f19=[r3],32
+ mov b5=r26
+ ;;
+ ldf.fill f20=[r2],32
+ ldf.fill f21=[r3],32
+ ;;
+ ldf.fill f22=[r2],32
+ ldf.fill f23=[r3],32
+ ;;
+ ldf.fill f24=[r2],32
+ ldf.fill f25=[r3],32
+ ;;
+ ldf.fill f26=[r2],32
+ ldf.fill f27=[r3],32
+ ;;
+ ldf.fill f28=[r2],32
+ ldf.fill f29=[r3],32
+ ;;
+ ldf.fill f30=[r2],32
+ ldf.fill f31=[r3],24
+ ;;
+ ld8.fill r4=[r2],16
+ ld8.fill r5=[r3],16
+ mov b7=r28
+ ;;
+ ld8.fill r6=[r2],16
+ ld8.fill r7=[r3],16
+ mov ar.unat=r18 // restore caller's unat
+ mov ar.fpsr=r19 // restore fpsr
+ mov ar.rsc=3 // put RSE back into eager mode, pl 0
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // pop switch_stack
+ br.cond.sptk.few b7
+ .endp load_switch_stack
+
+ .align 16
+ .global __ia64_syscall
+ .proc __ia64_syscall
+__ia64_syscall:
+ .regstk 6,0,0,0
+ mov r15=in5 // put syscall number in place
+ break __BREAK_SYSCALL
+ movl r2=errno
+ cmp.eq p6,p7=-1,r10
+ ;;
+(p6) st4 [r2]=r8
+(p6) mov r8=-1
+ br.ret.sptk.few rp
+ .endp __ia64_syscall
+
+ //
+ // We invoke syscall_trace through this intermediate function to
+ // ensure that the syscall input arguments are not clobbered. We
+ // also use it to preserve b6, which contains the syscall entry point.
+ //
+ .align 16
+ .global invoke_syscall_trace
+ .proc invoke_syscall_trace
+invoke_syscall_trace:
+ alloc loc0=ar.pfs,8,3,0,0
+ ;; // WAW on CFM at the br.call
+ mov loc1=rp
+ br.call.sptk.many rp=save_switch_stack_with_current_frame // must preserve b6!!
+.ret2: mov loc2=b6
+ br.call.sptk.few rp=syscall_trace
+.ret3: adds sp=IA64_SWITCH_STACK_SIZE,sp // drop switch_stack frame
+ mov rp=loc1
+ mov ar.pfs=loc0
+ mov b6=loc2
+ ;;
+ br.ret.sptk.few rp
+ .endp invoke_syscall_trace
+
+ //
+ // Invoke a system call, but do some tracing before and after the call.
+ // We MUST preserve the current register frame throughout this routine
+ // because some system calls (such as ia64_execve) directly
+ // manipulate ar.pfs.
+ //
+ // Input:
+ // r15 = syscall number
+ // b6 = syscall entry point
+ //
+ .global ia64_trace_syscall
+ .global ia64_strace_leave_kernel
+ .global ia64_strace_clear_r8
+
+ .proc ia64_strace_clear_r8
+ia64_strace_clear_r8: // this is where we return after cloning when PF_TRACESYS is on
+# ifdef CONFIG_SMP
+ br.call.sptk.few rp=invoke_schedule_tail
+# endif
+ mov r8=0
+ br strace_check_retval
+ .endp ia64_strace_clear_r8
+
+ .proc ia64_trace_syscall
+ia64_trace_syscall:
+ br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch syscall args
+.ret4: br.call.sptk.few rp=b6 // do the syscall
+strace_check_retval:
+.ret5: cmp.lt p6,p0=r8,r0 // syscall failed?
+ ;;
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
+ adds r3=IA64_PT_REGS_R8_OFFSET+32,sp // r3 = &pt_regs.r10
+ mov r10=0
+(p6) br.cond.sptk.few strace_error // syscall failed ->
+ ;; // avoid RAW on r10
+strace_save_retval:
+ st8.spill [r2]=r8 // store return value in slot for r8
+ st8.spill [r3]=r10 // clear error indication in slot for r10
+ia64_strace_leave_kernel:
+ br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch return value
+.ret6: br.cond.sptk.many ia64_leave_kernel
+
+strace_error:
+ ld8 r3=[r2] // load pt_regs.r8
+ sub r9=0,r8 // negate return value to get errno value
+ ;;
+ cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0?
+ adds r3=16,r2 // r3=&pt_regs.r10
+ ;;
+(p6) mov r10=-1
+(p6) mov r8=r9
+ br.cond.sptk.few strace_save_retval
+ .endp ia64_trace_syscall
+
+/*
+ * A couple of convenience macros to help implement/understand the state
+ * restoration that happens at the end of ia64_ret_from_syscall.
+ */
+#define rARPR r31
+#define rCRIFS r30
+#define rCRIPSR r29
+#define rCRIIP r28
+#define rARRSC r27
+#define rARPFS r26
+#define rARUNAT r25
+#define rARRNAT r24
+#define rARBSPSTORE r23
+#define rKRBS r22
+#define rB6 r21
+
+ .align 16
+ .global ia64_ret_from_syscall
+ .global ia64_ret_from_syscall_clear_r8
+ .global ia64_leave_kernel
+ .proc ia64_ret_from_syscall
+ia64_ret_from_syscall_clear_r8:
+#ifdef CONFIG_SMP
+ // In SMP mode, we need to call schedule_tail to complete the scheduling process.
+ // Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the
+ // address of the previously executing task.
+ br.call.sptk.few rp=invoke_schedule_tail
+.ret7:
+#endif
+ mov r8=0
+ ;; // added stop bits to prevent r8 dependency
+ia64_ret_from_syscall:
+ cmp.ge p6,p7=r8,r0 // syscall executed successfully?
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
+ adds r3=IA64_PT_REGS_R8_OFFSET+32,sp // r3 = &pt_regs.r10
+ ;;
+(p6) st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit
+(p6) st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit
+(p7) br.cond.spnt.few handle_syscall_error // handle potential syscall failure
+
+ia64_leave_kernel:
+ // check & deliver software interrupts (bottom half handlers):
+
+ movl r2=bh_active // sheesh, why aren't these two in
+ movl r3=bh_mask // a struct??
+ ;;
+ ld8 r2=[r2]
+ ld8 r3=[r3]
+ ;;
+ and r2=r2,r3
+ ;;
+ cmp.ne p6,p7=r2,r0 // any soft interrupts ready for delivery?
+(p6) br.call.dpnt.few rp=invoke_do_bottom_half
+1:
+(pKern) br.cond.dpnt.many restore_all // yup -> skip check for rescheduling & signal delivery
+
+ // call schedule() until we find a task that doesn't have need_resched set:
+
+back_from_resched:
+ { .mii
+ adds r2=IA64_TASK_NEED_RESCHED_OFFSET,r13
+ mov r3=ip
+ adds r14=IA64_TASK_SIGPENDING_OFFSET,r13
+ }
+ ;;
+ ld8 r2=[r2]
+ ld4 r14=[r14]
+ mov rp=r3 // arrange for schedule() to return to back_from_resched
+ ;;
+ /*
+ * If pEOI is set, we need to write the cr.eoi now and then
+ * clear pEOI because both invoke_schedule() and
+ * handle_signal_delivery() may call the scheduler. Since
+ * we're returning to user-level, we get at most one nested
+ * interrupt of the same priority level, which doesn't tax the
+ * kernel stack too much.
+ */
+(pEOI) mov cr.eoi=r0
+ cmp.ne p6,p0=r2,r0
+ cmp.ne p2,p0=r14,r0 // NOTE: pKern is an alias for p2!!
+(pEOI) cmp.ne pEOI,p0=r0,r0 // clear pEOI before calling schedule()
+ srlz.d
+(p6) br.call.spnt.many b6=invoke_schedule // ignore return value
+2:
+ // check & deliver pending signals:
+(p2) br.call.spnt.few rp=handle_signal_delivery
+restore_all:
+
+ // start restoring the state saved on the kernel stack (struct pt_regs):
+
+ adds r2=IA64_PT_REGS_R8_OFFSET+16,r12
+ adds r3=IA64_PT_REGS_R8_OFFSET+24,r12
+ ;;
+ ld8.fill r8=[r2],16
+ ld8.fill r9=[r3],16
+ ;;
+ ld8.fill r10=[r2],16
+ ld8.fill r11=[r3],16
+ ;;
+ ld8.fill r16=[r2],16
+ ld8.fill r17=[r3],16
+ ;;
+ ld8.fill r18=[r2],16
+ ld8.fill r19=[r3],16
+ ;;
+ ld8.fill r20=[r2],16
+ ld8.fill r21=[r3],16
+ ;;
+ ld8.fill r22=[r2],16
+ ld8.fill r23=[r3],16
+ ;;
+ ld8.fill r24=[r2],16
+ ld8.fill r25=[r3],16
+ ;;
+ ld8.fill r26=[r2],16
+ ld8.fill r27=[r3],16
+ ;;
+ ld8.fill r28=[r2],16
+ ld8.fill r29=[r3],16
+ ;;
+ ld8.fill r30=[r2],16
+ ld8.fill r31=[r3],16
+ ;;
+ ld8 r1=[r2],16 // ar.ccv
+ ld8 r13=[r3],16 // ar.fpsr
+ ;;
+ ld8 r14=[r2],16 // b0
+ ld8 r15=[r3],16+8 // b7
+ ;;
+ ldf.fill f6=[r2],32
+ ldf.fill f7=[r3],32
+ ;;
+ ldf.fill f8=[r2],32
+ ldf.fill f9=[r3],32
+ ;;
+ mov ar.ccv=r1
+ mov ar.fpsr=r13
+ mov b0=r14
+ // turn off interrupts, interrupt collection, & data translation
+ rsm psr.i | psr.ic | psr.dt
+ ;;
+ srlz.i // EAS 2.5
+ mov b7=r15
+ ;;
+ invala // invalidate ALAT
+ dep r12=0,r12,61,3 // convert sp to physical address
+ bsw.0;; // switch back to bank 0 (must be last in insn group)
+ ;;
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ nop.i 0x0
+ ;;
+ nop.i 0x0
+ ;;
+ nop.i 0x0
+ ;;
+#endif
+ adds r16=16,r12
+ adds r17=24,r12
+ ;;
+ ld8 rCRIPSR=[r16],16 // load cr.ipsr
+ ld8 rCRIIP=[r17],16 // load cr.iip
+ ;;
+ ld8 rCRIFS=[r16],16 // load cr.ifs
+ ld8 rARUNAT=[r17],16 // load ar.unat
+ ;;
+ ld8 rARPFS=[r16],16 // load ar.pfs
+ ld8 rARRSC=[r17],16 // load ar.rsc
+ ;;
+ ld8 rARRNAT=[r16],16 // load ar.rnat (may be garbage)
+ ld8 rARBSPSTORE=[r17],16 // load ar.bspstore (may be garbage)
+ ;;
+ ld8 rARPR=[r16],16 // load predicates
+ ld8 rB6=[r17],16 // load b6
+ ;;
+ ld8 r18=[r16],16 // load ar.rsc value for "loadrs"
+ ld8.fill r1=[r17],16 // load r1
+ ;;
+ ld8.fill r2=[r16],16
+ ld8.fill r3=[r17],16
+ ;;
+ ld8.fill r12=[r16],16
+ ld8.fill r13=[r17],16
+ extr.u r19=rCRIPSR,32,2 // extract ps.cpl
+ ;;
+ ld8.fill r14=[r16],16
+ ld8.fill r15=[r17],16
+ cmp.eq p6,p7=r0,r19 // are we returning to kernel mode? (psr.cpl==0)
+ ;;
+ mov b6=rB6
+ mov ar.pfs=rARPFS
+(p6) br.cond.dpnt.few skip_rbs_switch
+
+ /*
+ * Restore user backing store.
+ *
+ * NOTE: alloc, loadrs, and cover can't be predicated.
+ *
+ * XXX This needs some scheduling/tuning once we believe it
+ * really does work as intended.
+ */
+ mov r16=ar.bsp // get existing backing store pointer
+(pNonSys) br.cond.dpnt.few dont_preserve_current_frame
+ cover // add current frame into dirty partition
+ ;;
+ mov rCRIFS=cr.ifs // fetch the cr.ifs value that "cover" produced
+ mov r17=ar.bsp // get new backing store pointer
+ ;;
+ sub r16=r17,r16 // calculate number of bytes that were added to rbs
+ ;;
+ shl r16=r16,16 // shift additional frame size into position for loadrs
+ ;;
+ add r18=r16,r18 // adjust the loadrs value
+ ;;
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
+ // Reset ITM if we've missed a timer tick. Workaround for SoftSDV bug
+ mov r16 = r2
+ mov r2 = ar.itc
+ mov r17 = cr.itm
+ ;;
+ cmp.gt p6,p7 = r2, r17
+(p6) addl r17 = 100, r2
+ ;;
+ mov cr.itm = r17
+ mov r2 = r16
+#endif
+dont_preserve_current_frame:
+ alloc r16=ar.pfs,0,0,0,0 // drop the current call frame (noop for syscalls)
+ ;;
+ mov ar.rsc=r18 // load ar.rsc to be used for "loadrs"
+#ifdef CONFIG_IA32_SUPPORT
+ tbit.nz p6,p0=rCRIPSR,IA64_PSR_IS_BIT
+ ;;
+(p6) mov ar.rsc=r0 // returning to IA32 mode
+#endif
+ ;;
+ loadrs
+ ;;
+ mov ar.bspstore=rARBSPSTORE
+ ;;
+ mov ar.rnat=rARRNAT // must happen with RSE in lazy mode
+
+skip_rbs_switch:
+ mov ar.rsc=rARRSC
+ mov ar.unat=rARUNAT
+ mov cr.ifs=rCRIFS // restore cr.ifs only if not a (synchronous) syscall
+(pEOI) mov cr.eoi=r0
+ mov pr=rARPR,-1
+ mov cr.iip=rCRIIP
+ mov cr.ipsr=rCRIPSR
+ ;;
+ rfi;; // must be last instruction in an insn group
+
+handle_syscall_error:
+ /*
+ * Some system calls (e.g., ptrace, mmap) can return arbitrary
+ * values which could lead us to mistake a negative return
+ * value as a failed syscall. Those syscall must deposit
+ * a non-zero value in pt_regs.r8 to indicate an error.
+ * If pt_regs.r8 is zero, we assume that the call completed
+ * successfully.
+ */
+ ld8 r3=[r2] // load pt_regs.r8
+ sub r9=0,r8 // negate return value to get errno
+ ;;
+ mov r10=-1 // return -1 in pt_regs.r10 to indicate error
+ cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0?
+ adds r3=16,r2 // r3=&pt_regs.r10
+ ;;
+(p6) mov r9=r8
+(p6) mov r10=0
+ ;;
+ st8.spill [r2]=r9 // store errno in pt_regs.r8 and set unat bit
+ st8.spill [r3]=r10 // store error indication in pt_regs.r10 and set unat bit
+ br.cond.sptk.many ia64_leave_kernel
+ .endp __ret_from_syscall
+
+#ifdef CONFIG_SMP
+ /*
+ * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
+ * in case a system call gets restarted.
+ */
+ .proc invoke_schedule_tail
+invoke_schedule_tail:
+ alloc loc0=ar.pfs,8,2,1,0
+ mov loc1=rp
+ mov out0=r8 // Address of previous task
+ ;;
+ br.call.sptk.few rp=schedule_tail
+.ret8:
+ mov ar.pfs=loc0
+ mov rp=loc1
+ br.ret.sptk.many rp
+ .endp invoke_schedule_tail
+#endif /* CONFIG_SMP */
+
+ /*
+ * Invoke do_bottom_half() while preserving in0-in7, which may be needed
+ * in case a system call gets restarted.
+ */
+ .proc invoke_do_bottom_half
+invoke_do_bottom_half:
+ alloc loc0=ar.pfs,8,2,0,0
+ mov loc1=rp
+ ;;
+ br.call.sptk.few rp=do_bottom_half
+.ret9:
+ mov ar.pfs=loc0
+ mov rp=loc1
+ br.ret.sptk.many rp
+ .endp invoke_do_bottom_half
+
+ /*
+ * Invoke schedule() while preserving in0-in7, which may be needed
+ * in case a system call gets restarted.
+ */
+ .proc invoke_schedule
+invoke_schedule:
+ alloc loc0=ar.pfs,8,2,0,0
+ mov loc1=rp
+ ;;
+ br.call.sptk.few rp=schedule
+.ret10:
+ mov ar.pfs=loc0
+ mov rp=loc1
+ br.ret.sptk.many rp
+ .endp invoke_schedule
+
+ //
+ // Setup stack and call ia64_do_signal. Note that pSys and pNonSys need to
+ // be set up by the caller. We declare 8 input registers so the system call
+ // args get preserved, in case we need to restart a system call.
+ //
+ .align 16
+ .proc handle_signal_delivery
+handle_signal_delivery:
+ alloc loc0=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
+ mov r9=ar.unat
+
+ // If the process is being ptraced, the signal may not actually be delivered to
+ // the process. Instead, SIGCHLD will be sent to the parent. We need to
+ // setup a switch_stack so ptrace can inspect the processes state if necessary.
+ adds r2=IA64_TASK_FLAGS_OFFSET,r13
+ ;;
+ ld8 r2=[r2]
+ mov out0=0 // there is no "oldset"
+ adds out1=16,sp // out1=&pt_regs
+ ;;
+(pSys) mov out2=1 // out2==1 => we're in a syscall
+ tbit.nz p16,p17=r2,PF_PTRACED_BIT
+(p16) br.cond.spnt.many setup_switch_stack
+ ;;
+back_from_setup_switch_stack:
+(pNonSys) mov out2=0 // out2==0 => not a syscall
+ adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+(p17) adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for (dummy) switch_stack
+ ;;
+(p17) st8 [r3]=r9 // save ar.unat in sw->caller_unat
+ mov loc1=rp // save return address
+ br.call.sptk.few rp=ia64_do_signal
+.ret11:
+ adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+ ;;
+ ld8 r9=[r3] // load new unat from sw->caller_unat
+ mov rp=loc1
+ ;;
+(p17) adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch_stack
+(p17) mov ar.unat=r9
+(p17) mov ar.pfs=loc0
+(p17) br.ret.sptk.many rp
+
+ // restore the switch stack (ptrace may have modified it):
+ movl r28=1f
+ br.cond.sptk.many load_switch_stack
+1: br.ret.sptk.many rp
+ // NOT REACHED
+
+setup_switch_stack:
+ movl r28=back_from_setup_switch_stack
+ mov r16=loc0
+ br.cond.sptk.many save_switch_stack
+ // NOT REACHED
+
+ .endp handle_signal_delivery
+
+ .align 16
+ .proc sys_rt_sigsuspend
+ .global sys_rt_sigsuspend
+sys_rt_sigsuspend:
+ alloc loc0=ar.pfs,2,2,3,0
+ mov r9=ar.unat
+
+ // If the process is being ptraced, the signal may not actually be delivered to
+ // the process. Instead, SIGCHLD will be sent to the parent. We need to
+ // setup a switch_stack so ptrace can inspect the processes state if necessary.
+ adds r2=IA64_TASK_FLAGS_OFFSET,r13
+ ;;
+ ld8 r2=[r2]
+ mov out0=in0 // mask
+ mov out1=in1 // sigsetsize
+ ;;
+ adds out2=16,sp // out1=&pt_regs
+ tbit.nz p16,p17=r2,PF_PTRACED_BIT
+(p16) br.cond.spnt.many sigsuspend_setup_switch_stack
+ ;;
+back_from_sigsuspend_setup_switch_stack:
+ adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+(p17) adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for (dummy) switch_stack
+ ;;
+(p17) st8 [r3]=r9 // save ar.unat in sw->caller_unat
+ mov loc1=rp // save return address
+ br.call.sptk.many rp=ia64_rt_sigsuspend
+.ret12:
+ adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+ ;;
+ ld8 r9=[r3] // load new unat from sw->caller_unat
+ mov rp=loc1
+ ;;
+(p17) adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch_stack
+(p17) mov ar.unat=r9
+(p17) mov ar.pfs=loc0
+(p17) br.ret.sptk.many rp
+
+ // restore the switch stack (ptrace may have modified it):
+ movl r28=1f
+ br.cond.sptk.many load_switch_stack
+1: br.ret.sptk.many rp
+ // NOT REACHED
+
+sigsuspend_setup_switch_stack:
+ movl r28=back_from_sigsuspend_setup_switch_stack
+ mov r16=loc0
+ br.cond.sptk.many save_switch_stack
+ // NOT REACHED
+
+ .endp sys_rt_sigsuspend
+
+ .align 16
+ .proc sys_rt_sigreturn
+sys_rt_sigreturn:
+ alloc loc0=ar.pfs,8,1,1,0 // preserve all eight input regs in case of syscall restart!
+ adds out0=16,sp // out0 = &pt_regs
+ ;;
+ adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for unat and padding
+ br.call.sptk.few rp=ia64_rt_sigreturn
+.ret13:
+ adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp
+ ;;
+ ld8 r9=[r3] // load new ar.unat
+ mov rp=r8
+ ;;
+ adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch-stack frame
+ mov ar.unat=r9
+ mov ar.pfs=loc0
+ br.ret.sptk.many rp
+ .endp sys_rt_sigreturn
+
+ .align 16
+ .global ia64_prepare_handle_unaligned
+ .proc ia64_prepare_handle_unaligned
+ia64_prepare_handle_unaligned:
+ movl r28=1f
+ //
+ // r16 = fake ar.pfs, we simply need to make sure
+ // privilege is still 0
+ //
+ mov r16=r0
+ br.cond.sptk.few save_switch_stack
+1: br.call.sptk.few rp=ia64_handle_unaligned // stack frame setup in ivt
+.ret14:
+ movl r28=2f
+ br.cond.sptk.many load_switch_stack
+2: br.cond.sptk.many rp // goes to ia64_leave_kernel
+ .endp ia64_prepare_handle_unaligned
+
+#ifdef CONFIG_KDB
+ //
+ // This gets called from ivt.S with:
+ // SAVE MIN with cover done
+ // SAVE REST done
+ // no parameters
+ // r15 has return value = ia64_leave_kernel
+ //
+ .align 16
+ .global ia64_invoke_kdb
+ .proc ia64_invoke_kdb
+ia64_invoke_kdb:
+ alloc r16=ar.pfs,0,0,4,0
+ movl r28=1f // save_switch_stack protocol
+ ;; // avoid WAW on CFM
+ br.cond.sptk.many save_switch_stack // to flushrs
+1: mov out0=4 // kdb entry reason
+ mov out1=0 // err number
+ adds out2=IA64_SWITCH_STACK_SIZE+16,sp // pt_regs
+ add out3=16,sp // switch_stack
+ br.call.sptk.few rp=kdb
+.ret15:
+ movl r28=1f // load_switch_stack proto
+ br.cond.sptk.many load_switch_stack
+1: br.ret.sptk.many rp
+ .endp ia64_invoke_kdb
+
+ //
+ // When KDB is compiled in, we intercept each fault and give
+ // kdb a chance to run before calling the normal fault handler.
+ //
+ .align 16
+ .global ia64_invoke_kdb_fault_handler
+ .proc ia64_invoke_kdb_fault_handler
+ia64_invoke_kdb_fault_handler:
+ alloc r16=ar.pfs,5,1,5,0
+ movl r28=1f
+ mov loc0=rp // save this
+ br.cond.sptk.many save_switch_stack // to flushrs
+ ;; // avoid WAW on CFM
+1: mov out0=in0 // vector number
+ mov out1=in1 // cr.isr
+ mov out2=in2 // cr.ifa
+ mov out3=in3 // cr.iim
+ mov out4=in4 // cr.itir
+ br.call.sptk.few rp=ia64_kdb_fault_handler
+.ret16:
+
+ movl r28=1f
+ br.cond.sptk.many load_switch_stack
+1: cmp.ne p6,p0=r8,r0 // did ia64_kdb_fault_handler return 0?
+ mov rp=loc0
+(p6) br.ret.spnt.many rp // no, we're done
+ ;; // avoid WAW on rp
+ mov out0=in0 // vector number
+ mov out1=in1 // cr.isr
+ mov out2=in2 // cr.ifa
+ mov out3=in3 // cr.iim
+ mov out4=in4 // cr.itir
+ mov in0=ar.pfs // preserve ar.pfs returned by load_switch_stack
+ br.call.sptk.few rp=ia64_fault // yup -> we need to invoke normal fault handler now
+.ret17:
+ mov ar.pfs=in0
+ mov rp=loc0
+ br.ret.sptk.many rp
+
+ .endp ia64_invoke_kdb_fault_handler
+
+#endif /* CONFIG_KDB */
+
+ .rodata
+ .align 8
+ .globl sys_call_table
+sys_call_table:
+ data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S.
+ data8 sys_exit // 1025
+ data8 sys_read
+ data8 sys_write
+ data8 sys_open
+ data8 sys_close
+ data8 sys_creat // 1030
+ data8 sys_link
+ data8 sys_unlink
+ data8 ia64_execve
+ data8 sys_chdir
+ data8 sys_fchdir // 1035
+ data8 sys_utimes
+ data8 sys_mknod
+ data8 sys_chmod
+ data8 sys_chown
+ data8 sys_lseek // 1040
+ data8 sys_getpid
+ data8 sys_getppid
+ data8 sys_mount
+ data8 sys_umount
+ data8 sys_setuid // 1045
+ data8 sys_getuid
+ data8 sys_geteuid
+ data8 sys_ptrace
+ data8 sys_access
+ data8 sys_sync // 1050
+ data8 sys_fsync
+ data8 sys_fdatasync
+ data8 sys_kill
+ data8 sys_rename
+ data8 sys_mkdir // 1055
+ data8 sys_rmdir
+ data8 sys_dup
+ data8 sys_pipe
+ data8 sys_times
+ data8 ia64_brk // 1060
+ data8 sys_setgid
+ data8 sys_getgid
+ data8 sys_getegid
+ data8 sys_acct
+ data8 sys_ioctl // 1065
+ data8 sys_fcntl
+ data8 sys_umask
+ data8 sys_chroot
+ data8 sys_ustat
+ data8 sys_dup2 // 1070
+ data8 sys_setreuid
+ data8 sys_setregid
+ data8 sys_getresuid
+ data8 sys_setresuid
+ data8 sys_getresgid // 1075
+ data8 sys_setresgid
+ data8 sys_getgroups
+ data8 sys_setgroups
+ data8 sys_getpgid
+ data8 sys_setpgid // 1080
+ data8 sys_setsid
+ data8 sys_getsid
+ data8 sys_sethostname
+ data8 sys_setrlimit
+ data8 sys_getrlimit // 1085
+ data8 sys_getrusage
+ data8 sys_gettimeofday
+ data8 sys_settimeofday
+ data8 sys_select
+ data8 sys_poll // 1090
+ data8 sys_symlink
+ data8 sys_readlink
+ data8 sys_uselib
+ data8 sys_swapon
+ data8 sys_swapoff // 1095
+ data8 sys_reboot
+ data8 sys_truncate
+ data8 sys_ftruncate
+ data8 sys_fchmod
+ data8 sys_fchown // 1100
+ data8 ia64_getpriority
+ data8 sys_setpriority
+ data8 sys_statfs
+ data8 sys_fstatfs
+ data8 sys_ioperm // 1105
+ data8 sys_semget
+ data8 sys_semop
+ data8 sys_semctl
+ data8 sys_msgget
+ data8 sys_msgsnd // 1110
+ data8 sys_msgrcv
+ data8 sys_msgctl
+ data8 sys_shmget
+ data8 ia64_shmat
+ data8 sys_shmdt // 1115
+ data8 sys_shmctl
+ data8 sys_syslog
+ data8 sys_setitimer
+ data8 sys_getitimer
+ data8 sys_newstat // 1120
+ data8 sys_newlstat
+ data8 sys_newfstat
+ data8 sys_vhangup
+ data8 sys_lchown
+ data8 sys_vm86 // 1125
+ data8 sys_wait4
+ data8 sys_sysinfo
+ data8 sys_clone
+ data8 sys_setdomainname
+ data8 sys_newuname // 1130
+ data8 sys_adjtimex
+ data8 sys_create_module
+ data8 sys_init_module
+ data8 sys_delete_module
+ data8 sys_get_kernel_syms // 1135
+ data8 sys_query_module
+ data8 sys_quotactl
+ data8 sys_bdflush
+ data8 sys_sysfs
+ data8 sys_personality // 1140
+ data8 ia64_ni_syscall // sys_afs_syscall
+ data8 sys_setfsuid
+ data8 sys_setfsgid
+ data8 sys_getdents
+ data8 sys_flock // 1145
+ data8 sys_readv
+ data8 sys_writev
+ data8 sys_pread
+ data8 sys_pwrite
+ data8 sys_sysctl // 1150
+ data8 sys_mmap
+ data8 sys_munmap
+ data8 sys_mlock
+ data8 sys_mlockall
+ data8 sys_mprotect // 1155
+ data8 sys_mremap
+ data8 sys_msync
+ data8 sys_munlock
+ data8 sys_munlockall
+ data8 sys_sched_getparam // 1160
+ data8 sys_sched_setparam
+ data8 sys_sched_getscheduler
+ data8 sys_sched_setscheduler
+ data8 sys_sched_yield
+ data8 sys_sched_get_priority_max // 1165
+ data8 sys_sched_get_priority_min
+ data8 sys_sched_rr_get_interval
+ data8 sys_nanosleep
+ data8 sys_nfsservctl
+ data8 sys_prctl // 1170
+ data8 sys_getpagesize
+ data8 sys_mmap2
+ data8 sys_pciconfig_read
+ data8 sys_pciconfig_write
+ data8 sys_perfmonctl // 1175
+ data8 sys_sigaltstack
+ data8 sys_rt_sigaction
+ data8 sys_rt_sigpending
+ data8 sys_rt_sigprocmask
+ data8 sys_rt_sigqueueinfo // 1180
+ data8 sys_rt_sigreturn
+ data8 sys_rt_sigsuspend
+ data8 sys_rt_sigtimedwait
+ data8 sys_getcwd
+ data8 sys_capget // 1185
+ data8 sys_capset
+ data8 sys_sendfile
+ data8 sys_ni_syscall // sys_getpmsg (STREAMS)
+ data8 sys_ni_syscall // sys_putpmsg (STREAMS)
+ data8 sys_socket // 1190
+ data8 sys_bind
+ data8 sys_connect
+ data8 sys_listen
+ data8 sys_accept
+ data8 sys_getsockname // 1195
+ data8 sys_getpeername
+ data8 sys_socketpair
+ data8 sys_send
+ data8 sys_sendto
+ data8 sys_recv // 1200
+ data8 sys_recvfrom
+ data8 sys_shutdown
+ data8 sys_setsockopt
+ data8 sys_getsockopt
+ data8 sys_sendmsg // 1205
+ data8 sys_recvmsg
+ data8 sys_pivot_root
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1210
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1215
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1220
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1225
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1230
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1235
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1240
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1245
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1250
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1255
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1260
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1265
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1270
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1275
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+ data8 ia64_ni_syscall
+
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
new file mode 100644
index 000000000..ecef44f60
--- /dev/null
+++ b/arch/ia64/kernel/entry.h
@@ -0,0 +1,8 @@
+/*
+ * Preserved registers that are shared between code in ivt.S and entry.S. Be
+ * careful not to step on these!
+ */
+#define pEOI p1 /* should leave_kernel write EOI? */
+#define pKern p2 /* will leave_kernel return to kernel-mode? */
+#define pSys p4 /* are we processing a (synchronous) system call? */
+#define pNonSys p5 /* complement of pSys */
diff --git a/arch/ia64/kernel/fw-emu.c b/arch/ia64/kernel/fw-emu.c
new file mode 100644
index 000000000..212ff299c
--- /dev/null
+++ b/arch/ia64/kernel/fw-emu.c
@@ -0,0 +1,444 @@
+/*
+ * PAL & SAL emulation.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * For the HP simulator, this file gets include in boot/bootloader.c.
+ * For SoftSDV, this file gets included in sys_softsdv.c.
+ */
+#include <linux/config.h>
+
+#ifdef CONFIG_PCI
+# include <linux/pci.h>
+#endif
+
+#include <asm/efi.h>
+#include <asm/io.h>
+#include <asm/pal.h>
+#include <asm/sal.h>
+
+#define MB (1024*1024UL)
+
+#define NUM_MEM_DESCS 3
+
+static char fw_mem[( sizeof(efi_system_table_t)
+ + sizeof(efi_runtime_services_t)
+ + 1*sizeof(efi_config_table_t)
+ + sizeof(struct ia64_sal_systab)
+ + sizeof(struct ia64_sal_desc_entry_point)
+ + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t))
+ + 1024)] __attribute__ ((aligned (8)));
+
+#ifdef CONFIG_IA64_HP_SIM
+
+/* Simulator system calls: */
+
+#define SSC_EXIT 66
+
+/*
+ * Simulator system call.
+ */
+static long
+ssc (long arg0, long arg1, long arg2, long arg3, int nr)
+{
+ register long r8 asm ("r8");
+
+ asm volatile ("mov r15=%1\n\t"
+ "break 0x80001"
+ : "=r"(r8)
+ : "r"(nr), "r"(arg0), "r"(arg1), "r"(arg2), "r"(arg3));
+ return r8;
+}
+
+#define SECS_PER_HOUR (60 * 60)
+#define SECS_PER_DAY (SECS_PER_HOUR * 24)
+
+/* Compute the `struct tm' representation of *T,
+ offset OFFSET seconds east of UTC,
+ and store year, yday, mon, mday, wday, hour, min, sec into *TP.
+ Return nonzero if successful. */
+int
+offtime (unsigned long t, efi_time_t *tp)
+{
+ const unsigned short int __mon_yday[2][13] =
+ {
+ /* Normal years. */
+ { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
+ /* Leap years. */
+ { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
+ };
+ long int days, rem, y;
+ const unsigned short int *ip;
+
+ days = t / SECS_PER_DAY;
+ rem = t % SECS_PER_DAY;
+ while (rem < 0) {
+ rem += SECS_PER_DAY;
+ --days;
+ }
+ while (rem >= SECS_PER_DAY) {
+ rem -= SECS_PER_DAY;
+ ++days;
+ }
+ tp->hour = rem / SECS_PER_HOUR;
+ rem %= SECS_PER_HOUR;
+ tp->minute = rem / 60;
+ tp->second = rem % 60;
+ /* January 1, 1970 was a Thursday. */
+ y = 1970;
+
+# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
+# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
+# define __isleap(year) \
+ ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
+
+ while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
+ /* Guess a corrected year, assuming 365 days per year. */
+ long int yg = y + days / 365 - (days % 365 < 0);
+
+ /* Adjust DAYS and Y to match the guessed year. */
+ days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
+ - LEAPS_THRU_END_OF (y - 1));
+ y = yg;
+ }
+ tp->year = y;
+ ip = __mon_yday[__isleap(y)];
+ for (y = 11; days < (long int) ip[y]; --y)
+ continue;
+ days -= ip[y];
+ tp->month = y + 1;
+ tp->day = days + 1;
+ return 1;
+}
+
+#endif /* CONFIG_IA64_HP_SIM */
+
+/*
+ * Very ugly, but we need this in the simulator only. Once we run on
+ * real hw, this can all go away.
+ */
+extern void pal_emulator_static (void);
+
+asm ("
+ .proc pal_emulator_static
+pal_emulator_static:
+ mov r8=-1
+ cmp.eq p6,p7=6,r28 /* PAL_PTCE_INFO */
+(p7) br.cond.sptk.few 1f
+ ;;
+ mov r8=0 /* status = 0 */
+ movl r9=0x100000000 /* tc.base */
+ movl r10=0x0000000200000003 /* count[0], count[1] */
+ movl r11=0x1000000000002000 /* stride[0], stride[1] */
+ br.cond.sptk.few rp
+
+1: cmp.eq p6,p7=14,r28 /* PAL_FREQ_RATIOS */
+(p7) br.cond.sptk.few 1f
+ mov r8=0 /* status = 0 */
+ movl r9 =0x100000064 /* proc_ratio (1/100) */
+ movl r10=0x100000100 /* bus_ratio<<32 (1/256) */
+ movl r11=0x100000064 /* itc_ratio<<32 (1/100) */
+1: br.cond.sptk.few rp
+ .endp pal_emulator_static\n");
+
+/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
+
+#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3)
+
+#define REG_OFFSET(addr) (0x00000000000000FF & (addr))
+#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr))
+#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr))
+
+static efi_status_t
+efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
+{
+#ifdef CONFIG_IA64_HP_SIM
+ struct {
+ int tv_sec; /* must be 32bits to work */
+ int tv_usec;
+ } tv32bits;
+
+ ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
+
+ memset(tm, 0, sizeof(*tm));
+ offtime(tv32bits.tv_sec, tm);
+
+ if (tc)
+ memset(tc, 0, sizeof(*tc));
+#else
+# error Not implemented yet...
+#endif
+ return EFI_SUCCESS;
+}
+
+static void
+efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data)
+{
+#ifdef CONFIG_IA64_HP_SIM
+ ssc(status, 0, 0, 0, SSC_EXIT);
+#else
+# error Not implemented yet...
+#endif
+}
+
+static efi_status_t
+efi_unimplemented (void)
+{
+ return EFI_UNSUPPORTED;
+}
+
+static long
+sal_emulator (long index, unsigned long in1, unsigned long in2,
+ unsigned long in3, unsigned long in4, unsigned long in5,
+ unsigned long in6, unsigned long in7)
+{
+ register long r9 asm ("r9") = 0;
+ register long r10 asm ("r10") = 0;
+ register long r11 asm ("r11") = 0;
+ long status;
+
+ /*
+ * Don't do a "switch" here since that gives us code that
+ * isn't self-relocatable.
+ */
+ status = 0;
+ if (index == SAL_FREQ_BASE) {
+ switch (in1) {
+ case SAL_FREQ_BASE_PLATFORM:
+ r9 = 100000000;
+ break;
+
+ case SAL_FREQ_BASE_INTERVAL_TIMER:
+ /*
+ * Is this supposed to be the cr.itc frequency
+ * or something platform specific? The SAL
+ * doc ain't exactly clear on this...
+ */
+#if defined(CONFIG_IA64_SOFTSDV_HACKS)
+ r9 = 4000000;
+#elif defined(CONFIG_IA64_SDV)
+ r9 = 300000000;
+#else
+ r9 = 700000000;
+#endif
+ break;
+
+ case SAL_FREQ_BASE_REALTIME_CLOCK:
+ r9 = 1;
+ break;
+
+ default:
+ status = -1;
+ break;
+ }
+ } else if (index == SAL_SET_VECTORS) {
+ ;
+ } else if (index == SAL_GET_STATE_INFO) {
+ ;
+ } else if (index == SAL_GET_STATE_INFO_SIZE) {
+ ;
+ } else if (index == SAL_CLEAR_STATE_INFO) {
+ ;
+ } else if (index == SAL_MC_RENDEZ) {
+ ;
+ } else if (index == SAL_MC_SET_PARAMS) {
+ ;
+ } else if (index == SAL_CACHE_FLUSH) {
+ ;
+ } else if (index == SAL_CACHE_INIT) {
+ ;
+#ifdef CONFIG_PCI
+ } else if (index == SAL_PCI_CONFIG_READ) {
+ /*
+ * in1 contains the PCI configuration address and in2
+ * the size of the read. The value that is read is
+ * returned via the general register r9.
+ */
+ outl(BUILD_CMD(in1), 0xCF8);
+ if (in2 == 1) /* Reading byte */
+ r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3)));
+ else if (in2 == 2) /* Reading word */
+ r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2)));
+ else /* Reading dword */
+ r9 = inl(0xCFC);
+ status = PCIBIOS_SUCCESSFUL;
+ } else if (index == SAL_PCI_CONFIG_WRITE) {
+ /*
+ * in1 contains the PCI configuration address, in2 the
+ * size of the write, and in3 the actual value to be
+ * written out.
+ */
+ outl(BUILD_CMD(in1), 0xCF8);
+ if (in2 == 1) /* Writing byte */
+ outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3)));
+ else if (in2 == 2) /* Writing word */
+ outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2)));
+ else /* Writing dword */
+ outl(in3, 0xCFC);
+ status = PCIBIOS_SUCCESSFUL;
+#endif /* CONFIG_PCI */
+ } else if (index == SAL_UPDATE_PAL) {
+ ;
+ } else {
+ status = -1;
+ }
+ asm volatile ("" :: "r"(r9), "r"(r10), "r"(r11));
+ return status;
+}
+
+
+/*
+ * This is here to work around a bug in egcs-1.1.1b that causes the
+ * compiler to crash (seems like a bug in the new alias analysis code.
+ */
+void *
+id (long addr)
+{
+ return (void *) addr;
+}
+
+void
+sys_fw_init (const char *args, int arglen)
+{
+ efi_system_table_t *efi_systab;
+ efi_runtime_services_t *efi_runtime;
+ efi_config_table_t *efi_tables;
+ struct ia64_sal_systab *sal_systab;
+ efi_memory_desc_t *efi_memmap, *md;
+ unsigned long *pal_desc, *sal_desc;
+ struct ia64_sal_desc_entry_point *sal_ed;
+ struct ia64_boot_param *bp;
+ unsigned char checksum = 0;
+ char *cp, *cmd_line;
+
+ memset(fw_mem, 0, sizeof(fw_mem));
+
+ pal_desc = (unsigned long *) &pal_emulator_static;
+ sal_desc = (unsigned long *) &sal_emulator;
+
+ cp = fw_mem;
+ efi_systab = (void *) cp; cp += sizeof(*efi_systab);
+ efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
+ efi_tables = (void *) cp; cp += sizeof(*efi_tables);
+ sal_systab = (void *) cp; cp += sizeof(*sal_systab);
+ sal_ed = (void *) cp; cp += sizeof(*sal_ed);
+ efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
+ cmd_line = (void *) cp;
+
+ if (args) {
+ if (arglen >= 1024)
+ arglen = 1023;
+ memcpy(cmd_line, args, arglen);
+ } else {
+ arglen = 0;
+ }
+ cmd_line[arglen] = '\0';
+
+ memset(efi_systab, 0, sizeof(efi_systab));
+ efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
+ efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION;
+ efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
+ efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0");
+ efi_systab->fw_revision = 1;
+ efi_systab->runtime = __pa(efi_runtime);
+ efi_systab->nr_tables = 1;
+ efi_systab->tables = __pa(efi_tables);
+
+ efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
+ efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
+ efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
+ efi_runtime->get_time = __pa(&efi_get_time);
+ efi_runtime->set_time = __pa(&efi_unimplemented);
+ efi_runtime->get_wakeup_time = __pa(&efi_unimplemented);
+ efi_runtime->set_wakeup_time = __pa(&efi_unimplemented);
+ efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented);
+ efi_runtime->get_variable = __pa(&efi_unimplemented);
+ efi_runtime->get_next_variable = __pa(&efi_unimplemented);
+ efi_runtime->set_variable = __pa(&efi_unimplemented);
+ efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented);
+ efi_runtime->reset_system = __pa(&efi_reset_system);
+
+ efi_tables->guid = SAL_SYSTEM_TABLE_GUID;
+ efi_tables->table = __pa(sal_systab);
+
+ /* fill in the SAL system table: */
+ memcpy(sal_systab->signature, "SST_", 4);
+ sal_systab->size = sizeof(*sal_systab);
+ sal_systab->sal_rev_minor = 1;
+ sal_systab->sal_rev_major = 0;
+ sal_systab->entry_count = 1;
+ sal_systab->ia32_bios_present = 0;
+
+#ifdef CONFIG_IA64_GENERIC
+ strcpy(sal_systab->oem_id, "Generic");
+ strcpy(sal_systab->product_id, "IA-64 system");
+#endif
+
+#ifdef CONFIG_IA64_HP_SIM
+ strcpy(sal_systab->oem_id, "Hewlett-Packard");
+ strcpy(sal_systab->product_id, "HP-simulator");
+#endif
+
+#ifdef CONFIG_IA64_SDV
+ strcpy(sal_systab->oem_id, "Intel");
+ strcpy(sal_systab->product_id, "SDV");
+#endif
+
+#ifdef CONFIG_IA64_SGI_SN1_SIM
+ strcpy(sal_systab->oem_id, "SGI");
+ strcpy(sal_systab->product_id, "SN1");
+#endif
+
+ /* fill in an entry point: */
+ sal_ed->type = SAL_DESC_ENTRY_POINT;
+ sal_ed->pal_proc = __pa(pal_desc[0]);
+ sal_ed->sal_proc = __pa(sal_desc[0]);
+ sal_ed->gp = __pa(sal_desc[1]);
+
+ for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
+ checksum += *cp;
+
+ sal_systab->checksum = -checksum;
+
+ /* fill in a memory descriptor: */
+ md = &efi_memmap[0];
+ md->type = EFI_CONVENTIONAL_MEMORY;
+ md->pad = 0;
+ md->phys_addr = 2*MB;
+ md->virt_addr = 0;
+ md->num_pages = (64*MB) >> 12; /* 64MB (in 4KB pages) */
+ md->attribute = EFI_MEMORY_WB;
+
+ /* descriptor for firmware emulator: */
+ md = &efi_memmap[1];
+ md->type = EFI_RUNTIME_SERVICES_DATA;
+ md->pad = 0;
+ md->phys_addr = 1*MB;
+ md->virt_addr = 0;
+ md->num_pages = (1*MB) >> 12; /* 1MB (in 4KB pages) */
+ md->attribute = EFI_MEMORY_WB;
+
+ /* descriptor for high memory (>4GB): */
+ md = &efi_memmap[2];
+ md->type = EFI_CONVENTIONAL_MEMORY;
+ md->pad = 0;
+ md->phys_addr = 4096*MB;
+ md->virt_addr = 0;
+ md->num_pages = (32*MB) >> 12; /* 32MB (in 4KB pages) */
+ md->attribute = EFI_MEMORY_WB;
+
+ bp = id(ZERO_PAGE_ADDR);
+ bp->efi_systab = __pa(&fw_mem);
+ bp->efi_memmap = __pa(efi_memmap);
+ bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
+ bp->efi_memdesc_version = 1;
+ bp->command_line = __pa(cmd_line);
+ bp->console_info.num_cols = 80;
+ bp->console_info.num_rows = 25;
+ bp->console_info.orig_x = 0;
+ bp->console_info.orig_y = 24;
+ bp->num_pci_vectors = 0;
+ bp->fpswa = 0;
+}
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
new file mode 100644
index 000000000..24dc10ee4
--- /dev/null
+++ b/arch/ia64/kernel/gate.S
@@ -0,0 +1,200 @@
+/*
+ * This file contains the code that gets mapped at the upper end of
+ * each task's text region. For now, it contains the signal
+ * trampoline code only.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <asm/offsets.h>
+#include <asm/sigcontext.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .section __gate_section,"ax"
+
+ .align PAGE_SIZE
+
+# define SIGINFO_OFF 16
+# define SIGCONTEXT_OFF (SIGINFO_OFF + ((IA64_SIGINFO_SIZE + 15) & ~15))
+# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET
+# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET
+# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET
+# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET
+# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET
+# define base0 r2
+# define base1 r3
+ /*
+ * When we get here, the memory stack looks like this:
+ *
+ * +===============================+
+ * | |
+ * // struct sigcontext //
+ * | |
+ * +===============================+ <-- sp+SIGCONTEXT_OFF
+ * | |
+ * // rest of siginfo //
+ * | |
+ * + +---------------+
+ * | | siginfo.code |
+ * +---------------+---------------+
+ * | siginfo.errno | siginfo.signo |
+ * +-------------------------------+ <-- sp+SIGINFO_OFF
+ * | 16 byte of scratch |
+ * | space |
+ * +-------------------------------+ <-- sp
+ *
+ * The register stack looks _exactly_ the way it looked at the
+ * time the signal occurred. In other words, we're treading
+ * on a potential mine-field: each incoming general register
+ * may be a NaT value (includeing sp, in which case the process
+ * ends up dying with a SIGSEGV).
+ *
+ * The first need to do is a cover to get the registers onto
+ * the backing store. Once that is done, we invoke the signal
+ * handler which may modify some of the machine state. After
+ * returning from the signal handler, we return control to the
+ * previous context by executing a sigreturn system call. A
+ * signal handler may call the rt_sigreturn() function to
+ * directly return to a given sigcontext. However, the
+ * user-level sigreturn() needs to do much more than calling
+ * the rt_sigreturn() system call as it needs to unwind the
+ * stack to restore preserved registers that may have been
+ * saved on the signal handler's call stack.
+ *
+ * On entry:
+ * r2 = signal number
+ * r3 = plabel of signal handler
+ * r15 = new register backing store (ignored)
+ * [sp+16] = sigframe
+ */
+
+ .global ia64_sigtramp
+ .proc ia64_sigtramp
+ia64_sigtramp:
+ ld8 r10=[r3],8 // get signal handler entry point
+ br.call.sptk.many rp=invoke_sighandler
+.ret0: mov r15=__NR_rt_sigreturn
+ break __BREAK_SYSCALL
+ .endp ia64_sigramp
+
+ .proc invoke_sighandler
+invoke_sighandler:
+ ld8 gp=[r3] // get signal handler's global pointer
+ mov b6=r10
+ cover // push args in interrupted frame onto backing store
+ ;;
+ alloc r8=ar.pfs,0,1,3,0 // get CFM0, EC0, and CPL0 into r8
+ mov r17=ar.bsp // fetch ar.bsp
+ mov loc0=rp // save return pointer
+ ;;
+ cmp.ne p8,p0=r15,r0 // do we need to switch the rbs?
+ mov out0=r2 // signal number
+(p8) br.cond.spnt.few setup_rbs // yup -> (clobbers r14 and r16)
+back_from_setup_rbs:
+ adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ st8 [base0]=r17,(CFM_OFF-BSP_OFF) // save sc_ar_bsp
+ adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+ ;;
+
+ st8 [base0]=r8 // save CFM0, EC0, and CPL0
+ adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ stf.spill [base0]=f6,32
+ stf.spill [base1]=f7,32
+ ;;
+ stf.spill [base0]=f8,32
+ stf.spill [base1]=f9,32
+ ;;
+ stf.spill [base0]=f10,32
+ stf.spill [base1]=f11,32
+ adds out1=SIGINFO_OFF,sp // siginfo pointer
+ ;;
+ stf.spill [base0]=f12,32
+ stf.spill [base1]=f13,32
+ adds out2=SIGCONTEXT_OFF,sp // sigcontext pointer
+ ;;
+ stf.spill [base0]=f14,32
+ stf.spill [base1]=f15,32
+ br.call.sptk.few rp=b6 // call the signal handler
+.ret2: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ ld8 r15=[base0],(CFM_OFF-BSP_OFF) // fetch sc_ar_bsp and advance to CFM_OFF
+ mov r14=ar.bsp
+ ;;
+ ld8 r8=[base0] // restore (perhaps modified) CFM0, EC0, and CPL0
+ cmp.ne p8,p0=r14,r15 // do we need to restore the rbs?
+(p8) br.cond.spnt.few restore_rbs // yup -> (clobbers r14 and r16)
+back_from_restore_rbs:
+ {
+ and r9=0x7f,r8 // r9 <- CFM0.sof
+ extr.u r10=r8,7,7 // r10 <- CFM0.sol
+ mov r11=ip
+ }
+ ;;
+ adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
+ adds r11=(cont-back_from_restore_rbs),r11
+ sub r9=r9,r10 // r9 <- CFM0.sof - CFM0.sol == CFM0.nout
+ ;;
+ adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
+ dep r9=r9,r9,7,7 // r9.sol = r9.sof
+ mov b6=r11
+ ;;
+ ldf.fill f6=[base0],32
+ ldf.fill f7=[base1],32
+ mov rp=loc0 // copy return pointer out of stacked register
+ ;;
+ ldf.fill f8=[base0],32
+ ldf.fill f9=[base1],32
+ ;;
+ ldf.fill f10=[base0],32
+ ldf.fill f11=[base1],32
+ ;;
+ ldf.fill f12=[base0],32
+ ldf.fill f13=[base1],32
+ mov ar.pfs=r9
+ ;;
+ ldf.fill f14=[base0],32
+ ldf.fill f15=[base1],32
+ br.ret.sptk.few b6
+cont: mov ar.pfs=r8 // ar.pfs = CFM0
+ br.ret.sptk.few rp // re-establish CFM0
+ .endp invoke_signal_handler
+
+ .proc setup_rbs
+setup_rbs:
+ flushrs // must be first in insn
+ ;;
+ mov ar.rsc=r0 // put RSE into enforced lazy mode
+ adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp
+ mov r14=ar.rnat // get rnat as updated by flushrs
+ ;;
+ mov ar.bspstore=r15 // set new register backing store area
+ st8 [r16]=r14 // save sc_ar_rnat
+ ;;
+ mov ar.rsc=0xf // set RSE into eager mode, pl 3
+ invala // invalidate ALAT
+ br.cond.sptk.many back_from_setup_rbs
+
+ .proc restore_rbs
+restore_rbs:
+ flushrs
+ mov ar.rsc=r0 // put RSE into enforced lazy mode
+ adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp
+ ;;
+ ld8 r14=[r16] // get new rnat
+ mov ar.bspstore=r15 // set old register backing store area
+ ;;
+ mov ar.rnat=r14 // establish new rnat
+ mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc)
+ // invala not necessary as that will happen when returning to user-mode
+ br.cond.sptk.many back_from_restore_rbs
+
+ .endp restore_rbs
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
new file mode 100644
index 000000000..50d965e02
--- /dev/null
+++ b/arch/ia64/kernel/head.S
@@ -0,0 +1,646 @@
+/*
+ * Here is where the ball gets rolling as far as the kernel is concerned.
+ * When control is transferred to _start, the bootload has already
+ * loaded us to the correct address. All that's left to do here is
+ * to set up the kernel's global pointer and jump to the kernel
+ * entry point.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/fpu.h>
+#include <asm/pal.h>
+#include <asm/offsets.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .section __special_page_section,"ax"
+
+ .global empty_zero_page
+empty_zero_page:
+ .skip PAGE_SIZE
+
+ .global swapper_pg_dir
+swapper_pg_dir:
+ .skip PAGE_SIZE
+
+ .global empty_bad_page
+empty_bad_page:
+ .skip PAGE_SIZE
+
+ .global empty_bad_pte_table
+empty_bad_pte_table:
+ .skip PAGE_SIZE
+
+ .global empty_bad_pmd_table
+empty_bad_pmd_table:
+ .skip PAGE_SIZE
+
+ .rodata
+halt_msg:
+ stringz "Halting kernel\n"
+
+ .text
+ .align 16
+ .global _start
+ .proc _start
+_start:
+ // set IVT entry point---can't access I/O ports without it
+ movl r3=ia64_ivt
+ ;;
+ mov cr.iva=r3
+ movl r2=FPSR_DEFAULT
+ ;;
+ srlz.i
+ movl gp=__gp
+
+ mov ar.fpsr=r2
+ ;;
+
+#ifdef CONFIG_IA64_EARLY_PRINTK
+ mov r2=6
+ mov r3=(8<<8) | (28<<2)
+ ;;
+ mov rr[r2]=r3
+ ;;
+ srlz.i
+ ;;
+#endif
+
+#define isAP p2 // are we booting an Application Processor (not the BSP)?
+
+ // Find the init_task for the currently booting CPU. At poweron, and in
+ // UP mode, cpu_now_booting is 0
+ movl r3=cpu_now_booting
+ ;;
+ ld4 r3=[r3]
+ movl r2=init_tasks
+ ;;
+ shladd r2=r3,3,r2
+ ;;
+ ld8 r2=[r2]
+ cmp4.ne isAP,p0=r3,r0 // p9 == true if this is an application processor (ap)
+ ;; // RAW on r2
+ extr r3=r2,0,61 // r3 == phys addr of task struct
+ ;;
+
+ // load the "current" pointer (r13) and ar.k6 with the current task
+ mov r13=r2
+ mov ar.k6=r3 // Physical address
+ ;;
+ /*
+ * Reserve space at the top of the stack for "struct pt_regs". Kernel threads
+ * don't store interesting values in that structure, but the space still needs
+ * to be there because time-critical stuff such as the context switching can
+ * be implemented more efficiently (for example, __switch_to()
+ * always sets the psr.dfh bit of the task it is switching to).
+ */
+ addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
+ addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE
+ mov ar.rsc=r0 // place RSE in enforced lazy mode
+ ;;
+ mov ar.bspstore=r2 // establish the new RSE stack
+ ;;
+ loadrs // load zero bytes from the register stack
+ ;;
+ mov ar.rsc=0x3 // place RSE in eager mode
+ ;;
+
+#ifdef CONFIG_IA64_EARLY_PRINTK
+ .rodata
+alive_msg:
+ stringz "I'm alive and well\n"
+ .previous
+
+ alloc r2=ar.pfs,0,0,2,0
+ movl out0=alive_msg
+ ;;
+ br.call.sptk.few rp=early_printk
+1: // force new bundle
+#endif /* CONFIG_IA64_EARLY_PRINTK */
+
+ alloc r2=ar.pfs,8,0,2,0
+#ifdef CONFIG_SMP
+(isAP) br.call.sptk.few rp=smp_callin
+.ret1:
+(isAP) br.cond.sptk.few self
+#endif
+
+#undef isAP
+
+ // This is executed by the bootstrap processor (bsp) only:
+
+#ifdef CONFIG_IA64_FW_EMU
+ // initialize PAL & SAL emulator:
+ br.call.sptk.few rp=sys_fw_init
+ ;;
+#endif
+ br.call.sptk.few rp=start_kernel
+.ret2:
+ addl r2=@ltoff(halt_msg),gp
+ ;;
+ ld8 out0=[r2]
+ br.call.sptk.few b0=console_print
+self: br.sptk.few self // endless loop
+ .endp _start
+
+ .align 16
+ .global ia64_save_debug_regs
+ .proc ia64_save_debug_regs
+ia64_save_debug_regs:
+ alloc r16=ar.pfs,1,0,0,0
+ mov r20=ar.lc // preserve ar.lc
+ mov ar.lc=IA64_NUM_DBG_REGS-1
+ mov r18=0
+ add r19=IA64_NUM_DBG_REGS*8,in0
+ ;;
+1: mov r16=dbr[r18]
+ mov r17=ibr[r18]
+ add r18=1,r18
+ ;;
+ st8.nta [in0]=r16,8
+ st8.nta [r19]=r17,8
+ br.cloop.sptk.few 1b
+
+ mov ar.lc=r20 // restore ar.lc
+ br.ret.sptk.few b0
+ .endp ia64_save_debug_regs
+
+ .align 16
+ .global ia64_load_debug_regs
+ .proc ia64_load_debug_regs
+ia64_load_debug_regs:
+ alloc r16=ar.pfs,1,0,0,0
+ lfetch.nta [in0]
+ mov r20=ar.lc // preserve ar.lc
+ add r19=IA64_NUM_DBG_REGS*8,in0
+ mov ar.lc=IA64_NUM_DBG_REGS-1
+ mov r18=-1
+ ;;
+1: ld8.nta r16=[in0],8
+ ld8.nta r17=[r19],8
+ add r18=1,r18
+ ;;
+ mov dbr[r18]=r16
+ mov ibr[r18]=r17
+ br.cloop.sptk.few 1b
+
+ mov ar.lc=r20 // restore ar.lc
+ br.ret.sptk.few b0
+ .endp ia64_load_debug_regs
+
+ .align 16
+ .global __ia64_save_fpu
+ .proc __ia64_save_fpu
+__ia64_save_fpu:
+ alloc r2=ar.pfs,1,0,0,0
+ adds r3=16,in0
+ ;;
+ stf.spill.nta [in0]=f32,32
+ stf.spill.nta [ r3]=f33,32
+ ;;
+ stf.spill.nta [in0]=f34,32
+ stf.spill.nta [ r3]=f35,32
+ ;;
+ stf.spill.nta [in0]=f36,32
+ stf.spill.nta [ r3]=f37,32
+ ;;
+ stf.spill.nta [in0]=f38,32
+ stf.spill.nta [ r3]=f39,32
+ ;;
+ stf.spill.nta [in0]=f40,32
+ stf.spill.nta [ r3]=f41,32
+ ;;
+ stf.spill.nta [in0]=f42,32
+ stf.spill.nta [ r3]=f43,32
+ ;;
+ stf.spill.nta [in0]=f44,32
+ stf.spill.nta [ r3]=f45,32
+ ;;
+ stf.spill.nta [in0]=f46,32
+ stf.spill.nta [ r3]=f47,32
+ ;;
+ stf.spill.nta [in0]=f48,32
+ stf.spill.nta [ r3]=f49,32
+ ;;
+ stf.spill.nta [in0]=f50,32
+ stf.spill.nta [ r3]=f51,32
+ ;;
+ stf.spill.nta [in0]=f52,32
+ stf.spill.nta [ r3]=f53,32
+ ;;
+ stf.spill.nta [in0]=f54,32
+ stf.spill.nta [ r3]=f55,32
+ ;;
+ stf.spill.nta [in0]=f56,32
+ stf.spill.nta [ r3]=f57,32
+ ;;
+ stf.spill.nta [in0]=f58,32
+ stf.spill.nta [ r3]=f59,32
+ ;;
+ stf.spill.nta [in0]=f60,32
+ stf.spill.nta [ r3]=f61,32
+ ;;
+ stf.spill.nta [in0]=f62,32
+ stf.spill.nta [ r3]=f63,32
+ ;;
+ stf.spill.nta [in0]=f64,32
+ stf.spill.nta [ r3]=f65,32
+ ;;
+ stf.spill.nta [in0]=f66,32
+ stf.spill.nta [ r3]=f67,32
+ ;;
+ stf.spill.nta [in0]=f68,32
+ stf.spill.nta [ r3]=f69,32
+ ;;
+ stf.spill.nta [in0]=f70,32
+ stf.spill.nta [ r3]=f71,32
+ ;;
+ stf.spill.nta [in0]=f72,32
+ stf.spill.nta [ r3]=f73,32
+ ;;
+ stf.spill.nta [in0]=f74,32
+ stf.spill.nta [ r3]=f75,32
+ ;;
+ stf.spill.nta [in0]=f76,32
+ stf.spill.nta [ r3]=f77,32
+ ;;
+ stf.spill.nta [in0]=f78,32
+ stf.spill.nta [ r3]=f79,32
+ ;;
+ stf.spill.nta [in0]=f80,32
+ stf.spill.nta [ r3]=f81,32
+ ;;
+ stf.spill.nta [in0]=f82,32
+ stf.spill.nta [ r3]=f83,32
+ ;;
+ stf.spill.nta [in0]=f84,32
+ stf.spill.nta [ r3]=f85,32
+ ;;
+ stf.spill.nta [in0]=f86,32
+ stf.spill.nta [ r3]=f87,32
+ ;;
+ stf.spill.nta [in0]=f88,32
+ stf.spill.nta [ r3]=f89,32
+ ;;
+ stf.spill.nta [in0]=f90,32
+ stf.spill.nta [ r3]=f91,32
+ ;;
+ stf.spill.nta [in0]=f92,32
+ stf.spill.nta [ r3]=f93,32
+ ;;
+ stf.spill.nta [in0]=f94,32
+ stf.spill.nta [ r3]=f95,32
+ ;;
+ stf.spill.nta [in0]=f96,32
+ stf.spill.nta [ r3]=f97,32
+ ;;
+ stf.spill.nta [in0]=f98,32
+ stf.spill.nta [ r3]=f99,32
+ ;;
+ stf.spill.nta [in0]=f100,32
+ stf.spill.nta [ r3]=f101,32
+ ;;
+ stf.spill.nta [in0]=f102,32
+ stf.spill.nta [ r3]=f103,32
+ ;;
+ stf.spill.nta [in0]=f104,32
+ stf.spill.nta [ r3]=f105,32
+ ;;
+ stf.spill.nta [in0]=f106,32
+ stf.spill.nta [ r3]=f107,32
+ ;;
+ stf.spill.nta [in0]=f108,32
+ stf.spill.nta [ r3]=f109,32
+ ;;
+ stf.spill.nta [in0]=f110,32
+ stf.spill.nta [ r3]=f111,32
+ ;;
+ stf.spill.nta [in0]=f112,32
+ stf.spill.nta [ r3]=f113,32
+ ;;
+ stf.spill.nta [in0]=f114,32
+ stf.spill.nta [ r3]=f115,32
+ ;;
+ stf.spill.nta [in0]=f116,32
+ stf.spill.nta [ r3]=f117,32
+ ;;
+ stf.spill.nta [in0]=f118,32
+ stf.spill.nta [ r3]=f119,32
+ ;;
+ stf.spill.nta [in0]=f120,32
+ stf.spill.nta [ r3]=f121,32
+ ;;
+ stf.spill.nta [in0]=f122,32
+ stf.spill.nta [ r3]=f123,32
+ ;;
+ stf.spill.nta [in0]=f124,32
+ stf.spill.nta [ r3]=f125,32
+ ;;
+ stf.spill.nta [in0]=f126,32
+ stf.spill.nta [ r3]=f127,32
+ br.ret.sptk.few rp
+ .endp __ia64_save_fpu
+
+ .align 16
+ .global __ia64_load_fpu
+ .proc __ia64_load_fpu
+__ia64_load_fpu:
+ alloc r2=ar.pfs,1,0,0,0
+ adds r3=16,in0
+ ;;
+ ldf.fill.nta f32=[in0],32
+ ldf.fill.nta f33=[ r3],32
+ ;;
+ ldf.fill.nta f34=[in0],32
+ ldf.fill.nta f35=[ r3],32
+ ;;
+ ldf.fill.nta f36=[in0],32
+ ldf.fill.nta f37=[ r3],32
+ ;;
+ ldf.fill.nta f38=[in0],32
+ ldf.fill.nta f39=[ r3],32
+ ;;
+ ldf.fill.nta f40=[in0],32
+ ldf.fill.nta f41=[ r3],32
+ ;;
+ ldf.fill.nta f42=[in0],32
+ ldf.fill.nta f43=[ r3],32
+ ;;
+ ldf.fill.nta f44=[in0],32
+ ldf.fill.nta f45=[ r3],32
+ ;;
+ ldf.fill.nta f46=[in0],32
+ ldf.fill.nta f47=[ r3],32
+ ;;
+ ldf.fill.nta f48=[in0],32
+ ldf.fill.nta f49=[ r3],32
+ ;;
+ ldf.fill.nta f50=[in0],32
+ ldf.fill.nta f51=[ r3],32
+ ;;
+ ldf.fill.nta f52=[in0],32
+ ldf.fill.nta f53=[ r3],32
+ ;;
+ ldf.fill.nta f54=[in0],32
+ ldf.fill.nta f55=[ r3],32
+ ;;
+ ldf.fill.nta f56=[in0],32
+ ldf.fill.nta f57=[ r3],32
+ ;;
+ ldf.fill.nta f58=[in0],32
+ ldf.fill.nta f59=[ r3],32
+ ;;
+ ldf.fill.nta f60=[in0],32
+ ldf.fill.nta f61=[ r3],32
+ ;;
+ ldf.fill.nta f62=[in0],32
+ ldf.fill.nta f63=[ r3],32
+ ;;
+ ldf.fill.nta f64=[in0],32
+ ldf.fill.nta f65=[ r3],32
+ ;;
+ ldf.fill.nta f66=[in0],32
+ ldf.fill.nta f67=[ r3],32
+ ;;
+ ldf.fill.nta f68=[in0],32
+ ldf.fill.nta f69=[ r3],32
+ ;;
+ ldf.fill.nta f70=[in0],32
+ ldf.fill.nta f71=[ r3],32
+ ;;
+ ldf.fill.nta f72=[in0],32
+ ldf.fill.nta f73=[ r3],32
+ ;;
+ ldf.fill.nta f74=[in0],32
+ ldf.fill.nta f75=[ r3],32
+ ;;
+ ldf.fill.nta f76=[in0],32
+ ldf.fill.nta f77=[ r3],32
+ ;;
+ ldf.fill.nta f78=[in0],32
+ ldf.fill.nta f79=[ r3],32
+ ;;
+ ldf.fill.nta f80=[in0],32
+ ldf.fill.nta f81=[ r3],32
+ ;;
+ ldf.fill.nta f82=[in0],32
+ ldf.fill.nta f83=[ r3],32
+ ;;
+ ldf.fill.nta f84=[in0],32
+ ldf.fill.nta f85=[ r3],32
+ ;;
+ ldf.fill.nta f86=[in0],32
+ ldf.fill.nta f87=[ r3],32
+ ;;
+ ldf.fill.nta f88=[in0],32
+ ldf.fill.nta f89=[ r3],32
+ ;;
+ ldf.fill.nta f90=[in0],32
+ ldf.fill.nta f91=[ r3],32
+ ;;
+ ldf.fill.nta f92=[in0],32
+ ldf.fill.nta f93=[ r3],32
+ ;;
+ ldf.fill.nta f94=[in0],32
+ ldf.fill.nta f95=[ r3],32
+ ;;
+ ldf.fill.nta f96=[in0],32
+ ldf.fill.nta f97=[ r3],32
+ ;;
+ ldf.fill.nta f98=[in0],32
+ ldf.fill.nta f99=[ r3],32
+ ;;
+ ldf.fill.nta f100=[in0],32
+ ldf.fill.nta f101=[ r3],32
+ ;;
+ ldf.fill.nta f102=[in0],32
+ ldf.fill.nta f103=[ r3],32
+ ;;
+ ldf.fill.nta f104=[in0],32
+ ldf.fill.nta f105=[ r3],32
+ ;;
+ ldf.fill.nta f106=[in0],32
+ ldf.fill.nta f107=[ r3],32
+ ;;
+ ldf.fill.nta f108=[in0],32
+ ldf.fill.nta f109=[ r3],32
+ ;;
+ ldf.fill.nta f110=[in0],32
+ ldf.fill.nta f111=[ r3],32
+ ;;
+ ldf.fill.nta f112=[in0],32
+ ldf.fill.nta f113=[ r3],32
+ ;;
+ ldf.fill.nta f114=[in0],32
+ ldf.fill.nta f115=[ r3],32
+ ;;
+ ldf.fill.nta f116=[in0],32
+ ldf.fill.nta f117=[ r3],32
+ ;;
+ ldf.fill.nta f118=[in0],32
+ ldf.fill.nta f119=[ r3],32
+ ;;
+ ldf.fill.nta f120=[in0],32
+ ldf.fill.nta f121=[ r3],32
+ ;;
+ ldf.fill.nta f122=[in0],32
+ ldf.fill.nta f123=[ r3],32
+ ;;
+ ldf.fill.nta f124=[in0],32
+ ldf.fill.nta f125=[ r3],32
+ ;;
+ ldf.fill.nta f126=[in0],32
+ ldf.fill.nta f127=[ r3],32
+ br.ret.sptk.few rp
+ .endp __ia64_load_fpu
+
+ .align 16
+ .global __ia64_init_fpu
+ .proc __ia64_init_fpu
+__ia64_init_fpu:
+ alloc r2=ar.pfs,0,0,0,0
+ stf.spill [sp]=f0
+ mov f32=f0
+ ;;
+ ldf.fill f33=[sp]
+ ldf.fill f34=[sp]
+ mov f35=f0
+ ;;
+ ldf.fill f36=[sp]
+ ldf.fill f37=[sp]
+ mov f38=f0
+ ;;
+ ldf.fill f39=[sp]
+ ldf.fill f40=[sp]
+ mov f41=f0
+ ;;
+ ldf.fill f42=[sp]
+ ldf.fill f43=[sp]
+ mov f44=f0
+ ;;
+ ldf.fill f45=[sp]
+ ldf.fill f46=[sp]
+ mov f47=f0
+ ;;
+ ldf.fill f48=[sp]
+ ldf.fill f49=[sp]
+ mov f50=f0
+ ;;
+ ldf.fill f51=[sp]
+ ldf.fill f52=[sp]
+ mov f53=f0
+ ;;
+ ldf.fill f54=[sp]
+ ldf.fill f55=[sp]
+ mov f56=f0
+ ;;
+ ldf.fill f57=[sp]
+ ldf.fill f58=[sp]
+ mov f59=f0
+ ;;
+ ldf.fill f60=[sp]
+ ldf.fill f61=[sp]
+ mov f62=f0
+ ;;
+ ldf.fill f63=[sp]
+ ldf.fill f64=[sp]
+ mov f65=f0
+ ;;
+ ldf.fill f66=[sp]
+ ldf.fill f67=[sp]
+ mov f68=f0
+ ;;
+ ldf.fill f69=[sp]
+ ldf.fill f70=[sp]
+ mov f71=f0
+ ;;
+ ldf.fill f72=[sp]
+ ldf.fill f73=[sp]
+ mov f74=f0
+ ;;
+ ldf.fill f75=[sp]
+ ldf.fill f76=[sp]
+ mov f77=f0
+ ;;
+ ldf.fill f78=[sp]
+ ldf.fill f79=[sp]
+ mov f80=f0
+ ;;
+ ldf.fill f81=[sp]
+ ldf.fill f82=[sp]
+ mov f83=f0
+ ;;
+ ldf.fill f84=[sp]
+ ldf.fill f85=[sp]
+ mov f86=f0
+ ;;
+ ldf.fill f87=[sp]
+ ldf.fill f88=[sp]
+ mov f89=f0
+ ;;
+ ldf.fill f90=[sp]
+ ldf.fill f91=[sp]
+ mov f92=f0
+ ;;
+ ldf.fill f93=[sp]
+ ldf.fill f94=[sp]
+ mov f95=f0
+ ;;
+ ldf.fill f96=[sp]
+ ldf.fill f97=[sp]
+ mov f98=f0
+ ;;
+ ldf.fill f99=[sp]
+ ldf.fill f100=[sp]
+ mov f101=f0
+ ;;
+ ldf.fill f102=[sp]
+ ldf.fill f103=[sp]
+ mov f104=f0
+ ;;
+ ldf.fill f105=[sp]
+ ldf.fill f106=[sp]
+ mov f107=f0
+ ;;
+ ldf.fill f108=[sp]
+ ldf.fill f109=[sp]
+ mov f110=f0
+ ;;
+ ldf.fill f111=[sp]
+ ldf.fill f112=[sp]
+ mov f113=f0
+ ;;
+ ldf.fill f114=[sp]
+ ldf.fill f115=[sp]
+ mov f116=f0
+ ;;
+ ldf.fill f117=[sp]
+ ldf.fill f118=[sp]
+ mov f119=f0
+ ;;
+ ldf.fill f120=[sp]
+ ldf.fill f121=[sp]
+ mov f122=f0
+ ;;
+ ldf.fill f123=[sp]
+ ldf.fill f124=[sp]
+ mov f125=f0
+ ;;
+ ldf.fill f126=[sp]
+ mov f127=f0
+ br.ret.sptk.few rp
+ .endp __ia64_init_fpu
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
new file mode 100644
index 000000000..122650461
--- /dev/null
+++ b/arch/ia64/kernel/init_task.c
@@ -0,0 +1,31 @@
+/*
+ * This is where we statically allocate and initialize the initial
+ * task.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct vm_area_struct init_mmap = INIT_MMAP;
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS;
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+/*
+ * Initial task structure.
+ *
+ * We need to make sure that this is page aligned due to the way
+ * process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union task_union init_task_union
+ __attribute__((section("init_task"))) =
+ { INIT_TASK(init_task_union.task) };
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
new file mode 100644
index 000000000..01c201137
--- /dev/null
+++ b/arch/ia64/kernel/irq.c
@@ -0,0 +1,657 @@
+/*
+ * linux/arch/ia64/kernel/irq.c
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 6/10/99: Updated to bring in sync with x86 version to facilitate
+ * support for SMP and different interrupt controllers.
+ */
+
+#include <linux/config.h>
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/malloc.h>
+#include <linux/ptrace.h>
+#include <linux/random.h> /* for rand_initialize_irq() */
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/threads.h>
+
+#ifdef CONFIG_KDB
+# include <linux/kdb.h>
+#endif
+
+#include <asm/bitops.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+
+/* This is used to detect bad usage of probe_irq_on()/probe_irq_off(). */
+#define PROBE_IRQ_COOKIE 0xfeedC0FFEE
+
+struct irq_desc irq_desc[NR_IRQS];
+
+/*
+ * Micro-access to controllers is serialized over the whole
+ * system. We never hold this lock when we call the actual
+ * IRQ handler.
+ */
+spinlock_t irq_controller_lock;
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+spinlock_t ivr_read_lock;
+#endif
+
+unsigned int local_bh_count[NR_CPUS];
+/*
+ * used in irq_enter()/irq_exit()
+ */
+unsigned int local_irq_count[NR_CPUS];
+
+static struct irqaction timer_action = { NULL, 0, 0, NULL, NULL, NULL};
+
+#ifdef CONFIG_SMP
+static struct irqaction ipi_action = { NULL, 0, 0, NULL, NULL, NULL};
+#endif
+
+/*
+ * Legacy IRQ to IA-64 vector translation table. Any vector not in
+ * this table maps to itself (ie: irq 0x30 => IA64 vector 0x30)
+ */
+__u8 irq_to_vector_map[IA64_MIN_VECTORED_IRQ] = {
+ /* 8259 IRQ translation, first 16 entries */
+ TIMER_IRQ, 0x50, 0x0f, 0x51, 0x52, 0x53, 0x43, 0x54,
+ 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x40, 0x41,
+};
+
+/*
+ * Reverse of the above table.
+ */
+static __u8 vector_to_legacy_map[256];
+
+/*
+ * used by proc fs (/proc/interrupts)
+ */
+int
+get_irq_list (char *buf)
+{
+ int i;
+ struct irqaction * action;
+ char *p = buf;
+
+#ifdef CONFIG_SMP
+ p += sprintf(p, " ");
+ for (i = 0; i < smp_num_cpus; i++)
+ p += sprintf(p, "CPU%d ", i);
+ *p++ = '\n';
+#endif
+ /*
+ * Simply scans the external vectored interrupts
+ */
+ for (i = 0; i < NR_IRQS; i++) {
+ action = irq_desc[i].action;
+ if (!action)
+ continue;
+ p += sprintf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+ p += sprintf(p, "%10u ", kstat_irqs(i));
+#else
+ {
+ int j;
+ for (j = 0; j < smp_num_cpus; j++)
+ p += sprintf(p, "%10u ",
+ kstat.irqs[cpu_logical_map(j)][i]);
+ }
+#endif
+ p += sprintf(p, " %14s", irq_desc[i].handler->typename);
+ p += sprintf(p, " %c%s", (action->flags & SA_INTERRUPT) ? '+' : ' ',
+ action->name);
+
+ for (action = action->next; action; action = action->next) {
+ p += sprintf(p, ", %c%s",
+ (action->flags & SA_INTERRUPT)?'+':' ',
+ action->name);
+ }
+ *p++ = '\n';
+ }
+ return p - buf;
+}
+
+/*
+ * That's where the IVT branches when we get an external
+ * interrupt. This branches to the correct hardware IRQ handler via
+ * function ptr.
+ */
+void
+ia64_handle_irq (unsigned long irq, struct pt_regs *regs)
+{
+ unsigned long bsp, sp, saved_tpr;
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+# ifndef CONFIG_SMP
+ static unsigned int max_prio = 0;
+# endif
+ unsigned int prev_prio;
+ unsigned long eoi_ptr;
+
+# ifdef CONFIG_USB
+ disable_usb();
+# endif
+ /*
+ * Stop IPIs by getting the ivr_read_lock
+ */
+ spin_lock(&ivr_read_lock);
+
+ /*
+ * Disable PCI writes
+ */
+ outl(0x80ff81c0, 0xcf8);
+ outl(0x73002188, 0xcfc);
+ eoi_ptr = inl(0xcfc);
+
+ irq = ia64_get_ivr();
+
+ /*
+ * Enable PCI writes
+ */
+ outl(0x73182188, 0xcfc);
+
+ spin_unlock(&ivr_read_lock);
+
+# ifdef CONFIG_USB
+ reenable_usb();
+# endif
+
+# ifndef CONFIG_SMP
+ prev_prio = max_prio;
+ if (irq < max_prio) {
+ printk ("ia64_handle_irq: got irq %lu while %u was in progress!\n",
+ irq, max_prio);
+
+ } else
+ max_prio = irq;
+# endif /* !CONFIG_SMP */
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+ /* Always set TPR to limit maximum interrupt nesting depth to
+ * 16 (without this, it would be ~240, which could easily lead
+ * to kernel stack overflows.
+ */
+ saved_tpr = ia64_get_tpr();
+ ia64_srlz_d();
+ ia64_set_tpr(irq);
+ ia64_srlz_d();
+
+ asm ("mov %0=ar.bsp" : "=r"(bsp));
+ asm ("mov %0=sp" : "=r"(sp));
+
+ if ((sp - bsp) < 1024) {
+ static long last_time;
+ static unsigned char count;
+
+ if (count > 5 && jiffies - last_time > 5*HZ)
+ count = 0;
+ if (++count < 5) {
+ last_time = jiffies;
+ printk("ia64_handle_irq: DANGER: less than 1KB of free stack space!!\n"
+ "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
+ }
+#ifdef CONFIG_KDB
+ kdb(KDB_REASON_PANIC, 0, regs);
+#endif
+ }
+
+ /*
+ * The interrupt is now said to be in service
+ */
+ if (irq >= NR_IRQS) {
+ printk("handle_irq: invalid irq=%lu\n", irq);
+ goto out;
+ }
+
+ ++kstat.irqs[smp_processor_id()][irq];
+
+ if (irq == IA64_SPURIOUS_INT) {
+ printk("handle_irq: spurious interrupt\n");
+ goto out;
+ }
+
+ /*
+ * Handle the interrupt by calling the hardware specific handler (IOSAPIC, Internal, etc).
+ */
+ (*irq_desc[irq].handler->handle)(irq, regs);
+ out:
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ {
+ long pEOI;
+
+ asm ("mov %0=0;; (p1) mov %0=1" : "=r"(pEOI));
+ if (!pEOI) {
+ printk("Yikes: ia64_handle_irq() without pEOI!!\n");
+ asm volatile ("cmp.eq p1,p0=r0,r0" : "=r"(pEOI));
+# ifdef CONFIG_KDB
+ kdb(KDB_REASON_PANIC, 0, regs);
+# endif
+ }
+ }
+
+ local_irq_disable();
+# ifndef CONFIG_SMP
+ if (max_prio == irq)
+ max_prio = prev_prio;
+# endif /* !CONFIG_SMP */
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+ ia64_srlz_d();
+ ia64_set_tpr(saved_tpr);
+ ia64_srlz_d();
+}
+
+
+/*
+ * This should really return information about whether we should do
+ * bottom half handling etc. Right now we end up _always_ checking the
+ * bottom half, which is a waste of time and is not what some drivers
+ * would prefer.
+ */
+int
+invoke_irq_handlers (unsigned int irq, struct pt_regs *regs, struct irqaction *action)
+{
+ void (*handler)(int, void *, struct pt_regs *);
+ unsigned long flags, flags_union = 0;
+ int cpu = smp_processor_id();
+ unsigned int requested_irq;
+ void *dev_id;
+
+ irq_enter(cpu, irq);
+
+ if ((action->flags & SA_INTERRUPT) == 0)
+ __sti();
+
+ do {
+ flags = action->flags;
+ requested_irq = irq;
+ if ((flags & SA_LEGACY) != 0)
+ requested_irq = vector_to_legacy_map[irq];
+ flags_union |= flags;
+ handler = action->handler;
+ dev_id = action->dev_id;
+ action = action->next;
+ (*handler)(requested_irq, dev_id, regs);
+ } while (action);
+ if ((flags_union & SA_SAMPLE_RANDOM) != 0)
+ add_interrupt_randomness(irq);
+ __cli();
+
+ irq_exit(cpu, irq);
+ return flags_union | 1; /* force the "do bottom halves" bit */
+}
+
+void
+disable_irq_nosync (unsigned int irq)
+{
+ unsigned long flags;
+
+ irq = map_legacy_irq(irq);
+
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ if (irq_desc[irq].depth++ > 0) {
+ irq_desc[irq].status &= ~IRQ_ENABLED;
+ irq_desc[irq].handler->disable(irq);
+ }
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+/*
+ * Synchronous version of the above, making sure the IRQ is
+ * no longer running on any other IRQ..
+ */
+void
+disable_irq (unsigned int irq)
+{
+ disable_irq_nosync(irq);
+
+ irq = map_legacy_irq(irq);
+
+ if (!local_irq_count[smp_processor_id()]) {
+ do {
+ barrier();
+ } while ((irq_desc[irq].status & IRQ_INPROGRESS) != 0);
+ }
+}
+
+void
+enable_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ irq = map_legacy_irq(irq);
+
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ switch (irq_desc[irq].depth) {
+ case 1:
+ irq_desc[irq].status |= IRQ_ENABLED;
+ (*irq_desc[irq].handler->enable)(irq);
+ /* fall through */
+ default:
+ --irq_desc[irq].depth;
+ break;
+
+ case 0:
+ printk("enable_irq: unbalanced from %p\n", __builtin_return_address(0));
+ }
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+/*
+ * This function encapsulates the initialization that needs to be
+ * performed under the protection of lock irq_controller_lock. The
+ * lock must have been acquired by the time this is called.
+ */
+static inline int
+setup_irq (unsigned int irq, struct irqaction *new)
+{
+ int shared = 0;
+ struct irqaction *old, **p;
+
+ p = &irq_desc[irq].action;
+ old = *p;
+ if (old) {
+ if (!(old->flags & new->flags & SA_SHIRQ)) {
+ return -EBUSY;
+ }
+ /* add new interrupt at end of irq queue */
+ do {
+ p = &old->next;
+ old = *p;
+ } while (old);
+ shared = 1;
+ }
+ *p = new;
+
+ /* when sharing do not unmask */
+ if (!shared) {
+ irq_desc[irq].depth = 0;
+ irq_desc[irq].status |= IRQ_ENABLED;
+ (*irq_desc[irq].handler->startup)(irq);
+ }
+ return 0;
+}
+
+int
+request_irq (unsigned int requested_irq, void (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags, const char * devname, void *dev_id)
+{
+ int retval, need_kfree = 0;
+ struct irqaction *action;
+ unsigned long flags;
+ unsigned int irq;
+
+#ifdef IA64_DEBUG
+ printk("request_irq(0x%x) called\n", requested_irq);
+#endif
+ /*
+ * Sanity-check: shared interrupts should REALLY pass in
+ * a real dev-ID, otherwise we'll have trouble later trying
+ * to figure out which interrupt is which (messes up the
+ * interrupt freeing logic etc).
+ */
+ if ((irqflags & SA_SHIRQ) && !dev_id)
+ printk("Bad boy: %s (at %p) called us without a dev_id!\n",
+ devname, current_text_addr());
+
+ irq = map_legacy_irq(requested_irq);
+ if (irq != requested_irq)
+ irqflags |= SA_LEGACY;
+
+ if (irq >= NR_IRQS)
+ return -EINVAL;
+
+ if (!handler)
+ return -EINVAL;
+
+ /*
+ * The timer_action and ipi_action cannot be allocated
+ * dynamically because its initialization happens really early
+ * on in init/main.c at this point the memory allocator has
+ * not yet been initialized. So we use a statically reserved
+ * buffer for it. In some sense that's no big deal because we
+ * need one no matter what.
+ */
+ if (irq == TIMER_IRQ)
+ action = &timer_action;
+#ifdef CONFIG_SMP
+ else if (irq == IPI_IRQ)
+ action = &ipi_action;
+#endif
+ else {
+ action = kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+ need_kfree = 1;
+ }
+
+ if (!action)
+ return -ENOMEM;
+
+ action->handler = handler;
+ action->flags = irqflags;
+ action->mask = 0;
+ action->name = devname;
+ action->next = NULL;
+ action->dev_id = dev_id;
+
+ if ((irqflags & SA_SAMPLE_RANDOM) != 0)
+ rand_initialize_irq(irq);
+
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ retval = setup_irq(irq, action);
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+
+ if (need_kfree && retval)
+ kfree(action);
+
+ return retval;
+}
+
+void
+free_irq (unsigned int irq, void *dev_id)
+{
+ struct irqaction *action, **p;
+ unsigned long flags;
+
+ /*
+ * some sanity checks first
+ */
+ if (irq >= NR_IRQS) {
+ printk("Trying to free IRQ%d\n",irq);
+ return;
+ }
+
+ irq = map_legacy_irq(irq);
+
+ /*
+ * Find the corresponding irqaction
+ */
+ spin_lock_irqsave(&irq_controller_lock, flags);
+ for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
+ if (action->dev_id != dev_id)
+ continue;
+
+ /* Found it - now remove it from the list of entries */
+ *p = action->next;
+ if (!irq_desc[irq].action) {
+ irq_desc[irq].status &= ~IRQ_ENABLED;
+ (*irq_desc[irq].handler->shutdown)(irq);
+ }
+
+ spin_unlock_irqrestore(&irq_controller_lock, flags);
+
+#ifdef CONFIG_SMP
+ /* Wait to make sure it's not being used on another CPU */
+ while (irq_desc[irq].status & IRQ_INPROGRESS)
+ barrier();
+#endif
+
+ if (action != &timer_action
+#ifdef CONFIG_SMP
+ && action != &ipi_action
+#endif
+ )
+ kfree(action);
+ return;
+ }
+ printk("Trying to free free IRQ%d\n", irq);
+}
+
+/*
+ * IRQ autodetection code. Note that the return value of
+ * probe_irq_on() is no longer being used (it's role has been replaced
+ * by the IRQ_AUTODETECT flag).
+ */
+unsigned long
+probe_irq_on (void)
+{
+ struct irq_desc *id;
+ unsigned long delay;
+
+#ifdef IA64_DEBUG
+ printk("probe_irq_on() called\n");
+#endif
+
+ spin_lock_irq(&irq_controller_lock);
+ for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) {
+ if (!id->action) {
+ id->status |= IRQ_AUTODETECT | IRQ_WAITING;
+ (*id->handler->startup)(id - irq_desc);
+ }
+ }
+ spin_unlock_irq(&irq_controller_lock);
+
+ /* wait for spurious interrupts to trigger: */
+
+ for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+ /* about 100ms delay */
+ synchronize_irq();
+
+ /* filter out obviously spurious interrupts: */
+ spin_lock_irq(&irq_controller_lock);
+ for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) {
+ unsigned int status = id->status;
+
+ if (!(status & IRQ_AUTODETECT))
+ continue;
+
+ if (!(status & IRQ_WAITING)) {
+ id->status = status & ~IRQ_AUTODETECT;
+ (*id->handler->shutdown)(id - irq_desc);
+ }
+ }
+ spin_unlock_irq(&irq_controller_lock);
+ return PROBE_IRQ_COOKIE; /* return meaningless return value */
+}
+
+int
+probe_irq_off (unsigned long cookie)
+{
+ int irq_found, nr_irqs;
+ struct irq_desc *id;
+
+#ifdef IA64_DEBUG
+ printk("probe_irq_off(cookie=0x%lx) -> ", cookie);
+#endif
+
+ if (cookie != PROBE_IRQ_COOKIE)
+ printk("bad irq probe from %p\n", __builtin_return_address(0));
+
+ nr_irqs = 0;
+ irq_found = 0;
+ spin_lock_irq(&irq_controller_lock);
+ for (id = irq_desc + IA64_MIN_VECTORED_IRQ; id < irq_desc + NR_IRQS; ++id) {
+ unsigned int status = id->status;
+
+ if (!(status & IRQ_AUTODETECT))
+ continue;
+
+ if (!(status & IRQ_WAITING)) {
+ if (!nr_irqs)
+ irq_found = (id - irq_desc);
+ ++nr_irqs;
+ }
+ id->status = status & ~IRQ_AUTODETECT;
+ (*id->handler->shutdown)(id - irq_desc);
+ }
+ spin_unlock_irq(&irq_controller_lock);
+
+ if (nr_irqs > 1)
+ irq_found = -irq_found;
+
+#ifdef IA64_DEBUG
+ printk("%d\n", irq_found);
+#endif
+ return irq_found;
+}
+
+#ifdef CONFIG_SMP
+
+void __init
+init_IRQ_SMP (void)
+{
+ if (request_irq(IPI_IRQ, handle_IPI, 0, "IPI", NULL))
+ panic("Could not allocate IPI Interrupt Handler!");
+}
+
+#endif
+
+void __init
+init_IRQ (void)
+{
+ int i;
+
+ for (i = 0; i < IA64_MIN_VECTORED_IRQ; ++i)
+ vector_to_legacy_map[irq_to_vector_map[i]] = i;
+
+ for (i = 0; i < NR_IRQS; ++i) {
+ irq_desc[i].handler = &irq_type_default;
+ }
+
+ irq_desc[TIMER_IRQ].handler = &irq_type_ia64_internal;
+#ifdef CONFIG_SMP
+ /*
+ * Configure the IPI vector and handler
+ */
+ irq_desc[IPI_IRQ].handler = &irq_type_ia64_internal;
+ init_IRQ_SMP();
+#endif
+
+ ia64_set_pmv(1 << 16);
+ ia64_set_cmcv(CMC_IRQ); /* XXX fix me */
+
+ platform_irq_init(irq_desc);
+
+ /* clear TPR to enable all interrupt classes: */
+ ia64_set_tpr(0);
+}
+
+/* TBD:
+ * Certain IA64 platforms can have inter-processor interrupt support.
+ * This interface is supposed to default to the IA64 IPI block-based
+ * mechanism if the platform doesn't provide a separate mechanism
+ * for IPIs.
+ * Choices : (1) Extend hw_interrupt_type interfaces
+ * (2) Use machine vector mechanism
+ * For now defining the following interface as a place holder.
+ */
+void
+ipi_send (int cpu, int vector, int delivery_mode)
+{
+}
diff --git a/arch/ia64/kernel/irq_default.c b/arch/ia64/kernel/irq_default.c
new file mode 100644
index 000000000..bf8c62642
--- /dev/null
+++ b/arch/ia64/kernel/irq_default.c
@@ -0,0 +1,30 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+
+static int
+irq_default_handle_irq (unsigned int irq, struct pt_regs *regs)
+{
+ printk("Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
+ return 0; /* don't call do_bottom_half() for spurious interrupts */
+}
+
+static void
+irq_default_noop (unsigned int irq)
+{
+ /* nuthing to do... */
+}
+
+struct hw_interrupt_type irq_type_default = {
+ "default",
+ (void (*)(unsigned long)) irq_default_noop, /* init */
+ irq_default_noop, /* startup */
+ irq_default_noop, /* shutdown */
+ irq_default_handle_irq, /* handle */
+ irq_default_noop, /* enable */
+ irq_default_noop /* disable */
+};
diff --git a/arch/ia64/kernel/irq_internal.c b/arch/ia64/kernel/irq_internal.c
new file mode 100644
index 000000000..1ae904fe8
--- /dev/null
+++ b/arch/ia64/kernel/irq_internal.c
@@ -0,0 +1,71 @@
+/*
+ * Internal Interrupt Vectors
+ *
+ * This takes care of interrupts that are generated by the CPU
+ * internally, such as the ITC and IPI interrupts.
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+
+/*
+ * This is identical to IOSAPIC handle_irq. It may go away . . .
+ */
+static int
+internal_handle_irq (unsigned int irq, struct pt_regs *regs)
+{
+ struct irqaction *action = 0;
+ struct irq_desc *id = irq_desc + irq;
+ unsigned int status;
+ int retval;
+
+ spin_lock(&irq_controller_lock);
+ {
+ status = id->status;
+ if ((status & IRQ_ENABLED) != 0)
+ action = id->action;
+ id->status = status & ~(IRQ_REPLAY | IRQ_WAITING);
+ }
+ spin_unlock(&irq_controller_lock);
+
+ if (!action) {
+ if (!(id->status & IRQ_AUTODETECT))
+ printk("irq_hpsim_handle_irq: unexpected interrupt %u\n", irq);
+ return 0;
+ }
+
+ retval = invoke_irq_handlers(irq, regs, action);
+
+ spin_lock(&irq_controller_lock);
+ {
+ status = (id->status & ~IRQ_INPROGRESS);
+ id->status = status;
+ }
+ spin_unlock(&irq_controller_lock);
+
+ return retval;
+}
+
+static void
+internal_noop (unsigned int irq)
+{
+ /* nuthing to do... */
+}
+
+struct hw_interrupt_type irq_type_ia64_internal = {
+ "IA64 internal",
+ (void (*)(unsigned long)) internal_noop, /* init */
+ internal_noop, /* startup */
+ internal_noop, /* shutdown */
+ internal_handle_irq, /* handle */
+ internal_noop, /* enable */
+ internal_noop /* disable */
+};
+
diff --git a/arch/ia64/kernel/irq_lock.c b/arch/ia64/kernel/irq_lock.c
new file mode 100644
index 000000000..9c512dd4e
--- /dev/null
+++ b/arch/ia64/kernel/irq_lock.c
@@ -0,0 +1,287 @@
+/*
+ * SMP IRQ Lock support
+ *
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ * These function usually appear in irq.c, but I think it's cleaner this way.
+ *
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/irq.h>
+#include <asm/bitops.h>
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+
+int global_irq_holder = NO_PROC_ID;
+spinlock_t global_irq_lock;
+atomic_t global_irq_count;
+atomic_t global_bh_count;
+atomic_t global_bh_lock;
+
+#define INIT_STUCK (1<<26)
+
+void
+irq_enter(int cpu, int irq)
+{
+ int stuck = INIT_STUCK;
+
+ hardirq_enter(cpu, irq);
+ barrier();
+ while (global_irq_lock.lock) {
+ if (cpu == global_irq_holder) {
+ break;
+ }
+
+ if (!--stuck) {
+ printk("irq_enter stuck (irq=%d, cpu=%d, global=%d)\n",
+ irq, cpu,global_irq_holder);
+ stuck = INIT_STUCK;
+ }
+ barrier();
+ }
+}
+
+void
+irq_exit(int cpu, int irq)
+{
+ hardirq_exit(cpu, irq);
+ release_irqlock(cpu);
+}
+
+static void
+show(char * str)
+{
+ int i;
+ unsigned long *stack;
+ int cpu = smp_processor_id();
+
+ printk("\n%s, CPU %d:\n", str, cpu);
+ printk("irq: %d [%d %d]\n",
+ atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]);
+ printk("bh: %d [%d %d]\n",
+ atomic_read(&global_bh_count), local_bh_count[0], local_bh_count[1]);
+
+ stack = (unsigned long *) &stack;
+ for (i = 40; i ; i--) {
+ unsigned long x = *++stack;
+ if (x > (unsigned long) &get_options && x < (unsigned long) &vsprintf) {
+ printk("<[%08lx]> ", x);
+ }
+ }
+}
+
+#define MAXCOUNT 100000000
+
+static inline void
+wait_on_bh(void)
+{
+ int count = MAXCOUNT;
+ do {
+ if (!--count) {
+ show("wait_on_bh");
+ count = ~0;
+ }
+ /* nothing .. wait for the other bh's to go away */
+ } while (atomic_read(&global_bh_count) != 0);
+}
+
+static inline void
+wait_on_irq(int cpu)
+{
+ int count = MAXCOUNT;
+
+ for (;;) {
+
+ /*
+ * Wait until all interrupts are gone. Wait
+ * for bottom half handlers unless we're
+ * already executing in one..
+ */
+ if (!atomic_read(&global_irq_count)) {
+ if (local_bh_count[cpu] || !atomic_read(&global_bh_count))
+ break;
+ }
+
+ /* Duh, we have to loop. Release the lock to avoid deadlocks */
+ spin_unlock(&global_irq_lock);
+ mb();
+
+ for (;;) {
+ if (!--count) {
+ show("wait_on_irq");
+ count = ~0;
+ }
+ __sti();
+ udelay(cpu + 1);
+ __cli();
+ if (atomic_read(&global_irq_count))
+ continue;
+ if (global_irq_lock.lock)
+ continue;
+ if (!local_bh_count[cpu] && atomic_read(&global_bh_count))
+ continue;
+ if (spin_trylock(&global_irq_lock))
+ break;
+ }
+ }
+}
+
+/*
+ * This is called when we want to synchronize with
+ * bottom half handlers. We need to wait until
+ * no other CPU is executing any bottom half handler.
+ *
+ * Don't wait if we're already running in an interrupt
+ * context or are inside a bh handler.
+ */
+void
+synchronize_bh(void)
+{
+ if (atomic_read(&global_bh_count)) {
+ int cpu = smp_processor_id();
+ if (!local_irq_count[cpu] && !local_bh_count[cpu]) {
+ wait_on_bh();
+ }
+ }
+}
+
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void
+synchronize_irq(void)
+{
+ int cpu = smp_processor_id();
+ int local_count;
+ int global_count;
+
+ mb();
+ do {
+ local_count = local_irq_count[cpu];
+ global_count = atomic_read(&global_irq_count);
+ } while (global_count != local_count);
+}
+
+static inline void
+get_irqlock(int cpu)
+{
+ if (!spin_trylock(&global_irq_lock)) {
+ /* do we already hold the lock? */
+ if ((unsigned char) cpu == global_irq_holder)
+ return;
+ /* Uhhuh.. Somebody else got it. Wait.. */
+ spin_lock(&global_irq_lock);
+ }
+ /*
+ * We also to make sure that nobody else is running
+ * in an interrupt context.
+ */
+ wait_on_irq(cpu);
+
+ /*
+ * Ok, finally..
+ */
+ global_irq_holder = cpu;
+}
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void
+__global_cli(void)
+{
+ unsigned long flags;
+
+ __save_flags(flags);
+ if (flags & IA64_PSR_I) {
+ int cpu = smp_processor_id();
+ __cli();
+ if (!local_irq_count[cpu])
+ get_irqlock(cpu);
+ }
+}
+
+void
+__global_sti(void)
+{
+ int cpu = smp_processor_id();
+
+ if (!local_irq_count[cpu])
+ release_irqlock(cpu);
+ __sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long
+__global_save_flags(void)
+{
+ int retval;
+ int local_enabled;
+ unsigned long flags;
+
+ __save_flags(flags);
+ local_enabled = flags & IA64_PSR_I;
+ /* default to local */
+ retval = 2 + local_enabled;
+
+ /* check for global flags if we're not in an interrupt */
+ if (!local_irq_count[smp_processor_id()]) {
+ if (local_enabled)
+ retval = 1;
+ if (global_irq_holder == (unsigned char) smp_processor_id())
+ retval = 0;
+ }
+ return retval;
+}
+
+void
+__global_restore_flags(unsigned long flags)
+{
+ switch (flags) {
+ case 0:
+ __global_cli();
+ break;
+ case 1:
+ __global_sti();
+ break;
+ case 2:
+ __cli();
+ break;
+ case 3:
+ __sti();
+ break;
+ default:
+ printk("global_restore_flags: %08lx (%08lx)\n",
+ flags, (&flags)[-1]);
+ }
+}
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
new file mode 100644
index 000000000..4c3ac242a
--- /dev/null
+++ b/arch/ia64/kernel/ivt.S
@@ -0,0 +1,1342 @@
+/*
+ * arch/ia64/kernel/ivt.S
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1998-2000 David Mosberger <davidm@hpl.hp.com>
+ */
+
+#include <linux/config.h>
+
+#include <asm/break.h>
+#include <asm/offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+
+/*
+ * A couple of convenience macros that make writing and reading
+ * SAVE_MIN and SAVE_REST easier.
+ */
+#define rARPR r31
+#define rCRIFS r30
+#define rCRIPSR r29
+#define rCRIIP r28
+#define rARRSC r27
+#define rARPFS r26
+#define rARUNAT r25
+#define rARRNAT r24
+#define rARBSPSTORE r23
+#define rKRBS r22
+#define rB6 r21
+#define rR1 r20
+
+/*
+ * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ * psr.ic: off
+ * psr.dt: off
+ * r31: contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ * psr.ic: off
+ * psr.dt: off
+ * r2 = points to &pt_regs.r16
+ * r12 = kernel sp (kernel virtual address)
+ * r13 = points to current task_struct (kernel virtual address)
+ * p15 = TRUE if psr.i is set in cr.ipsr
+ * predicate registers (other than p6, p7, and p15), b6, r3, r8, r9, r10, r11, r14, r15:
+ * preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro. This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+#define DO_SAVE_MIN(COVER,EXTRA) \
+ mov rARRSC=ar.rsc; \
+ mov rARPFS=ar.pfs; \
+ mov rR1=r1; \
+ mov rARUNAT=ar.unat; \
+ mov rCRIPSR=cr.ipsr; \
+ mov rB6=b6; /* rB6 = branch reg 6 */ \
+ mov rCRIIP=cr.iip; \
+ mov r1=ar.k6; /* r1 = current */ \
+ ;; \
+ invala; \
+ extr.u r16=rCRIPSR,32,2; /* extract psr.cpl */ \
+ ;; \
+ cmp.eq pKern,p7=r0,r16; /* are we in kernel mode already? (psr.cpl==0) */ \
+ /* switch from user to kernel RBS: */ \
+ COVER; \
+ ;; \
+(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
+(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
+ ;; \
+(p7) mov rARRNAT=ar.rnat; \
+(pKern) dep r1=0,sp,61,3; /* compute physical addr of sp */ \
+(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \
+(p7) dep rKRBS=-1,rKRBS,61,3; /* compute kernel virtual addr of RBS */ \
+ ;; \
+(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
+(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \
+ ;; \
+(p7) mov r18=ar.bsp; \
+(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
+ \
+ mov r16=r1; /* initialize first base pointer */ \
+ adds r17=8,r1; /* initialize second base pointer */ \
+ ;; \
+ st8 [r16]=rCRIPSR,16; /* save cr.ipsr */ \
+ st8 [r17]=rCRIIP,16; /* save cr.iip */ \
+(pKern) mov r18=r0; /* make sure r18 isn't NaT */ \
+ ;; \
+ st8 [r16]=rCRIFS,16; /* save cr.ifs */ \
+ st8 [r17]=rARUNAT,16; /* save ar.unat */ \
+(p7) sub r18=r18,rKRBS; /* r18=RSE.ndirty*8 */ \
+ ;; \
+ st8 [r16]=rARPFS,16; /* save ar.pfs */ \
+ st8 [r17]=rARRSC,16; /* save ar.rsc */ \
+ tbit.nz p15,p0=rCRIPSR,IA64_PSR_I_BIT \
+ ;; /* avoid RAW on r16 & r17 */ \
+(pKern) adds r16=16,r16; /* skip over ar_rnat field */ \
+(pKern) adds r17=16,r17; /* skip over ar_bspstore field */ \
+(p7) st8 [r16]=rARRNAT,16; /* save ar.rnat */ \
+(p7) st8 [r17]=rARBSPSTORE,16; /* save ar.bspstore */ \
+ ;; \
+ st8 [r16]=rARPR,16; /* save predicates */ \
+ st8 [r17]=rB6,16; /* save b6 */ \
+ shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
+ ;; \
+ st8 [r16]=r18,16; /* save ar.rsc value for "loadrs" */ \
+ st8.spill [r17]=rR1,16; /* save original r1 */ \
+ cmp.ne pEOI,p0=r0,r0 /* clear pEOI by default */ \
+ ;; \
+ st8.spill [r16]=r2,16; \
+ st8.spill [r17]=r3,16; \
+ adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
+ ;; \
+ st8.spill [r16]=r12,16; \
+ st8.spill [r17]=r13,16; \
+ cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
+ ;; \
+ st8.spill [r16]=r14,16; \
+ st8.spill [r17]=r15,16; \
+ dep r14=-1,r0,61,3; \
+ ;; \
+ st8.spill [r16]=r8,16; \
+ st8.spill [r17]=r9,16; \
+ adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
+ ;; \
+ st8.spill [r16]=r10,16; \
+ st8.spill [r17]=r11,16; \
+ mov r13=ar.k6; /* establish `current' */ \
+ ;; \
+ or r2=r2,r14; /* make first base a kernel virtual address */ \
+ EXTRA; \
+ movl r1=__gp; /* establish kernel global pointer */ \
+ ;; \
+ or r12=r12,r14; /* make sp a kernel virtual address */ \
+ or r13=r13,r14; /* make `current' a kernel virtual address */ \
+ bsw.1;; /* switch back to bank 1 (must be last in insn group) */
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+# define STOPS nop.i 0x0;; nop.i 0x0;; nop.i 0x0;;
+#else
+# define STOPS
+#endif
+
+#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs,) STOPS
+#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs, mov r15=r19) STOPS
+#define SAVE_MIN DO_SAVE_MIN(mov rCRIFS=r0,) STOPS
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on). This
+ * macro guarantees to preserve all predicate registers, r8, r9, r10,
+ * r11, r14, and r15.
+ *
+ * Assumed state upon entry:
+ * psr.ic: on
+ * psr.dt: on
+ * r2: points to &pt_regs.r16
+ * r3: points to &pt_regs.r17
+ */
+#define SAVE_REST \
+ st8.spill [r2]=r16,16; \
+ st8.spill [r3]=r17,16; \
+ ;; \
+ st8.spill [r2]=r18,16; \
+ st8.spill [r3]=r19,16; \
+ ;; \
+ mov r16=ar.ccv; /* M-unit */ \
+ movl r18=FPSR_DEFAULT /* L-unit */ \
+ ;; \
+ mov r17=ar.fpsr; /* M-unit */ \
+ mov ar.fpsr=r18; /* M-unit */ \
+ ;; \
+ st8.spill [r2]=r20,16; \
+ st8.spill [r3]=r21,16; \
+ mov r18=b0; \
+ ;; \
+ st8.spill [r2]=r22,16; \
+ st8.spill [r3]=r23,16; \
+ mov r19=b7; \
+ ;; \
+ st8.spill [r2]=r24,16; \
+ st8.spill [r3]=r25,16; \
+ ;; \
+ st8.spill [r2]=r26,16; \
+ st8.spill [r3]=r27,16; \
+ ;; \
+ st8.spill [r2]=r28,16; \
+ st8.spill [r3]=r29,16; \
+ ;; \
+ st8.spill [r2]=r30,16; \
+ st8.spill [r3]=r31,16; \
+ ;; \
+ st8 [r2]=r16,16; /* ar.ccv */ \
+ st8 [r3]=r17,16; /* ar.fpsr */ \
+ ;; \
+ st8 [r2]=r18,16; /* b0 */ \
+ st8 [r3]=r19,16+8; /* b7 */ \
+ ;; \
+ stf.spill [r2]=f6,32; \
+ stf.spill [r3]=f7,32; \
+ ;; \
+ stf.spill [r2]=f8,32; \
+ stf.spill [r3]=f9,32
+
+/*
+ * This file defines the interrupt vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * External interrupts only use 1 entry. All others are internal interrupts
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for critical
+ * interrupts like TLB misses.
+ *
+ * For each entry, the comment is as follows:
+ *
+ * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ * entry offset ----/ / / / /
+ * entry number ---------/ / / /
+ * size of the entry -------------/ / /
+ * vector name -------------------------------------/ /
+ * related interrupts (what is the real interrupt?) ----------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.4 (June 1998)
+ */
+
+#define FAULT(n) \
+ rsm psr.dt; /* avoid nested faults due to TLB misses... */ \
+ ;; \
+ srlz.d; /* ensure everyone knows psr.dt is off... */ \
+ mov r31=pr; \
+ mov r19=n;; /* prepare to save predicates */ \
+ br.cond.sptk.many dispatch_to_fault_handler
+
+/*
+ * As we don't (hopefully) use the space available, we need to fill it with
+ * nops. the parameter may be used for debugging and is representing the entry
+ * number
+ */
+#define BREAK_BUNDLE(a) break.m (a); \
+ break.i (a); \
+ break.i (a)
+/*
+ * 4 breaks bundles all together
+ */
+#define BREAK_BUNDLE4(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a)
+
+/*
+ * 8 bundles all together (too lazy to use only 4 at a time !)
+ */
+#define BREAK_BUNDLE8(a); BREAK_BUNDLE4(a); BREAK_BUNDLE4(a)
+
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .section __ivt_section,"ax"
+
+ .align 32768 // align on 32KB boundary
+ .global ia64_ivt
+ia64_ivt:
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ /*
+ * The VHPT vector is invoked when the TLB entry for the virtual page table
+ * is missing. This happens only as a result of a previous
+ * (the "original") TLB miss, which may either be caused by an instruction
+ * fetch or a data access (or non-access).
+ *
+ * What we do here is normal TLB miss handing for the _original_ miss, followed
+ * by inserting the TLB entry for the virtual page table page that the VHPT
+ * walker was attempting to access. The latter gets inserted as long
+ * as both L1 and L2 have valid mappings for the faulting address.
+ * The TLB entry for the original miss gets inserted only if
+ * the L3 entry indicates that the page is present.
+ *
+ * do_page_fault gets invoked in the following cases:
+ * - the faulting virtual address uses unimplemented address bits
+ * - the faulting virtual address has no L1, L2, or L3 mapping
+ */
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ rsm psr.dt // use physical addressing for data
+ mov r31=pr // save the predicate registers
+ mov r19=ar.k7 // get page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
+(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones?
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+(p7) ld8 r18=[r17] // read the L3 PTE
+ mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss
+ ;;
+(p7) tbit.z p6,p7=r18,0 // page present bit cleared?
+ mov r21=cr.iha // get the VHPT address that caused the TLB miss
+ ;; // avoid RAW on p7
+(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss?
+ dep r17=0,r17,0,PAGE_SHIFT // clear low bits to get page address
+ ;;
+(p10) itc.i r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!)
+(p11) itc.d r18;; // insert the data TLB entry (EAS2.6: must be last in insn group!)
+(p6) br.spnt.few page_fault // handle bad address/page not present (page fault)
+ mov cr.ifa=r21
+
+ // Now compute and insert the TLB entry for the virtual page table.
+ // We never execute in a page table page so there is no need to set
+ // the exception deferral bit.
+ adds r16=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r17
+ ;;
+(p7) itc.d r16;; // EAS2.6: must be last in insn group!
+ mov pr=r31,-1 // restore predicate registers
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ /*
+ * The ITLB basically does the same as the VHPT handler except
+ * that we always insert exactly one instruction TLB entry.
+ */
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ rsm psr.dt // use physical addressing for data
+ mov r31=pr // save the predicate registers
+ mov r19=ar.k7 // get page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
+(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones?
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+(p7) ld8 r18=[r17] // read the L3 PTE
+ ;;
+(p7) tbit.z p6,p7=r18,0 // page present bit cleared?
+ ;;
+(p7) itc.i r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!)
+(p6) br.spnt.few page_fault // handle bad address/page not present (page fault)
+ ;;
+ mov pr=r31,-1 // restore predicate registers
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ /*
+ * The DTLB basically does the same as the VHPT handler except
+ * that we always insert exactly one data TLB entry.
+ */
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ rsm psr.dt // use physical addressing for data
+ mov r31=pr // save the predicate registers
+ mov r19=ar.k7 // get page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
+(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+ ;;
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones?
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
+ ;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+(p7) ld8 r18=[r17] // read the L3 PTE
+ ;;
+(p7) tbit.z p6,p7=r18,0 // page present bit cleared?
+ ;;
+(p7) itc.d r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!)
+(p6) br.spnt.few page_fault // handle bad address/page not present (page fault)
+ ;;
+ mov pr=r31,-1 // restore predicate registers
+ rfi;; // must be last insn in an insn group
+
+ //-----------------------------------------------------------------------------------
+ // call do_page_fault (predicates are in r31, psr.dt is off, r16 is faulting address)
+page_fault:
+ SAVE_MIN_WITH_COVER
+ //
+ // Copy control registers to temporary registers, then turn on psr bits,
+ // then copy the temporary regs to the output regs. We have to do this
+ // because the "alloc" can cause a mandatory store which could lead to
+ // an "Alt DTLB" fault which we can handle only if psr.ic is on.
+ //
+ mov r8=cr.ifa
+ mov r9=cr.isr
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic | psr.dt
+ ;;
+ srlz.d // guarantee that interrupt collection is enabled
+(p15) ssm psr.i // restore psr.i
+ ;;
+ srlz.i // must precede "alloc"! (srlz.i implies srlz.d)
+ movl r14=ia64_leave_kernel
+ ;;
+ alloc r15=ar.pfs,0,0,3,0 // must be first in insn group
+ mov out0=r8
+ mov out1=r9
+ ;;
+ SAVE_REST
+ mov rp=r14
+ ;;
+ adds out2=16,r12 // out2 = pointer to pt_regs
+ br.call.sptk.few b6=ia64_do_page_fault // ignore return address
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RX
+ ;;
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ dep r16=0,r16,52,12 // clear top 12 bits of address
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+ dep r16=r17,r16,0,12 // insert PTE control bits into r16
+ ;;
+ or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6
+ ;;
+ itc.i r16;; // insert the TLB entry(EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ mov r16=cr.ifa // get address that caused the TLB miss
+ movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW
+ ;;
+ shr.u r18=r16,57 // move address bit 61 to bit 4
+ dep r16=0,r16,52,12 // clear top 12 bits of address
+ ;;
+ andcm r18=0x10,r18 // bit 4=~address-bit(61)
+ dep r16=r17,r16,0,12 // insert PTE control bits into r16
+ ;;
+ or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6
+ ;;
+ itc.d r16;; // insert the TLB entry (EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ //
+ // In the absence of kernel bugs, we get here when the Dirty-bit, Instruction
+ // Access-bit, or Data Access-bit faults cause a nested fault because the
+ // dTLB entry for the virtual page table isn't present. In such a case,
+ // we lookup the pte for the faulting address by walking the page table
+ // and return to the contination point passed in register r30.
+ // In accessing the page tables, we don't need to check for NULL entries
+ // because if the page tables didn't map the faulting address, it would not
+ // be possible to receive one of the above faults.
+ //
+ // Input: r16: faulting address
+ // r29: saved b0
+ // r30: continuation address
+ //
+ // Output: r17: physical address of L3 PTE of faulting address
+ // r29: saved b0
+ // r30: continuation address
+ //
+ // Clobbered: b0, r18, r19, r21, r31, psr.dt (cleared)
+ //
+ rsm psr.dt // switch to using physical data addressing
+ mov r19=ar.k7 // get the page table base address
+ shl r21=r16,3 // shift bit 60 into sign bit
+ ;;
+ mov r31=pr // save the predicate registers
+ shr.u r17=r16,61 // get the region number into r17
+ ;;
+ cmp.eq p6,p7=5,r17 // is faulting address in region 5?
+ shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
+ ;;
+(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ srlz.d
+(p6) movl r17=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
+(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
+(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+ ;;
+(p6) dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ shr.u r18=r16,PMD_SHIFT // shift L2 index into position
+ ;;
+ ld8 r17=[r17] // fetch the L1 entry
+ mov b0=r30
+ ;;
+ dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
+ ;;
+ ld8 r17=[r17] // fetch the L2 entry
+ shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
+ ;;
+ dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
+ ;;
+ mov pr=r31,-1 // restore predicates
+ br.cond.sptk.few b0 // return to continuation point
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ FAULT(6)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ FAULT(7)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ //
+ // What we do here is to simply turn on the dirty bit in the PTE. We need
+ // to update both the page-table and the TLB entry. To efficiently access
+ // the PTE, we address it through the virtual page table. Most likely, the
+ // TLB entry for the relevant virtual page table page is still present in
+ // the TLB so we can normally do this without additional TLB misses.
+ // In case the necessary virtual page table TLB entry isn't present, we take
+ // a nested TLB miss hit where we look up the physical address of the L3 PTE
+ // and then continue at label 1 below.
+ //
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_D,r18 // set the dirty bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18;; // install updated PTE (EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ // Like Entry 8, except for instruction access
+ mov r16=cr.ifa // get the address that caused the fault
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ mov r31=pr // save predicates
+ mov r30=cr.ipsr
+ ;;
+ extr.u r17=r30,IA64_PSR_IS_BIT,1 // get instruction arch. indicator
+ ;;
+ cmp.eq p6,p0 = r17,r0 // check if IA64 instruction set
+ ;;
+(p6) mov r16=cr.iip // get real faulting address
+ ;;
+(p6) mov cr.ifa=r16 // reset IFA
+ mov pr=r31,-1
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault)
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid raw on r18
+ or r18=_PAGE_A,r18 // set the accessed bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.i r18;; // install updated PTE (EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ // Like Entry 8, except for data access
+ mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ ;;
+ thash r17=r16 // compute virtual address of L3 PTE
+ mov r29=b0 // save b0 in case of nested fault)
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_A,r18 // set the accessed bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18;; // install updated PTE (EAS2.6: must be last in insn group!)
+ rfi;; // must be last insn in an insn group
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ mov r16=cr.iim
+ mov r17=__IA64_BREAK_SYSCALL
+ mov r31=pr // prepare to save predicates
+ rsm psr.dt // avoid nested faults due to TLB misses...
+ ;;
+ srlz.d // ensure everyone knows psr.dt is off...
+ cmp.eq p0,p7=r16,r17 // is this a system call? (p7 <- false, if so)
+
+#if 1
+ // Allow syscalls via the old system call number for the time being. This is
+ // so we can transition to the new syscall number in a relatively smooth
+ // fashion.
+ mov r17=0x80000
+ ;;
+(p7) cmp.eq.or.andcm p0,p7=r16,r17 // is this the old syscall number?
+#endif
+
+(p7) br.cond.spnt.many non_syscall
+
+ SAVE_MIN // uses r31; defines r2:
+
+ // turn interrupt collection and data translation back on:
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
+ ;;
+(p15) ssm psr.i // restore psr.i
+ ;;
+ srlz.i // ensure everybody knows psr.ic and psr.dt are back on
+ adds r8=(IA64_PT_REGS_R8_OFFSET-IA64_PT_REGS_R16_OFFSET),r2
+ ;;
+ stf8 [r8]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ SAVE_REST
+ ;; // avoid WAW on r2 & r3
+
+ mov r3=255
+ adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
+ adds r2=IA64_TASK_FLAGS_OFFSET,r13 // r2 = &current->flags
+
+ ;;
+ cmp.geu.unc p6,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ?
+ movl r16=sys_call_table
+ ;;
+(p6) shladd r16=r15,3,r16
+ movl r15=ia64_ret_from_syscall
+(p7) adds r16=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall
+ ;;
+ ld8 r16=[r16] // load address of syscall entry point
+ mov rp=r15 // set the real return addr
+ ;;
+ ld8 r2=[r2] // r2 = current->flags
+ mov b6=r16
+
+ // arrange things so we skip over break instruction when returning:
+
+ adds r16=16,sp // get pointer to cr_ipsr
+ adds r17=24,sp // get pointer to cr_iip
+ ;;
+ ld8 r18=[r16] // fetch cr_ipsr
+ tbit.z p8,p0=r2,5 // (current->flags & PF_TRACESYS) == 0?
+ ;;
+ ld8 r19=[r17] // fetch cr_iip
+ extr.u r20=r18,41,2 // extract ei field
+ ;;
+ cmp.eq p6,p7=2,r20 // isr.ei==2?
+ adds r19=16,r19 // compute address of next bundle
+ ;;
+(p6) mov r20=0 // clear ei to 0
+(p7) adds r20=1,r20 // increment ei to next slot
+ ;;
+(p6) st8 [r17]=r19 // store new cr.iip if cr.isr.ei wrapped around
+ dep r18=r20,r18,41,2 // insert new ei into cr.isr
+ ;;
+ st8 [r16]=r18 // store new value for cr.isr
+
+(p8) br.call.sptk.few b6=b6 // ignore this return addr
+ br.call.sptk.few rp=ia64_trace_syscall // rp will be overwritten (ignored)
+ // NOT REACHED
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ rsm psr.dt // avoid nested faults due to TLB misses...
+ ;;
+ srlz.d // ensure everyone knows psr.dt is off...
+ mov r31=pr // prepare to save predicates
+ ;;
+
+ SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
+ ssm psr.ic | psr.dt // turn interrupt collection and data translation back on
+ ;;
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ cmp.eq pEOI,p0=r0,r0 // set pEOI flag so that ia64_leave_kernel writes cr.eoi
+ srlz.i // ensure everybody knows psr.ic and psr.dt are back on
+ ;;
+ SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ mov out0=r0 // defer reading of cr.ivr to handle_irq...
+#else
+ mov out0=cr.ivr // pass cr.ivr as first arg
+#endif
+ add out1=16,sp // pass pointer to pt_regs as second arg
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+ br.call.sptk.few b6=ia64_handle_irq
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ FAULT(13)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+ FAULT(14)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+ FAULT(15)
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ FAULT(16)
+
+#ifdef CONFIG_IA32_SUPPORT
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+ // IA32 interrupt entry point
+
+dispatch_to_ia32_handler:
+ SAVE_MIN
+ ;;
+ mov r14=cr.isr
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ ;;
+(p15) ssm psr.i
+ ;;
+ srlz.d
+ adds r3=8,r2 // Base pointer for SAVE_REST
+ ;;
+ SAVE_REST
+ ;;
+ mov r15=0x80
+ shr r14=r14,16 // Get interrupt number
+ ;;
+ cmp.ne p6,p0=r14,r15
+(p6) br.call.dpnt.few b6=non_ia32_syscall
+
+ adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions
+
+ ;;
+ alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
+ ;;
+ ld4 r8=[r14],8 // r8 == EAX (syscall number)
+ mov r15=0xff
+ ;;
+ cmp.ltu.unc p6,p7=r8,r15
+ ld4 out1=[r14],8 // r9 == ecx
+ ;;
+ ld4 out2=[r14],8 // r10 == edx
+ ;;
+ ld4 out0=[r14] // r11 == ebx
+ adds r14=(IA64_PT_REGS_R8_OFFSET-(8*3)) + 16,sp
+ ;;
+ ld4 out5=[r14],8 // r13 == ebp
+ ;;
+ ld4 out3=[r14],8 // r14 == esi
+ adds r2=IA64_TASK_FLAGS_OFFSET,r13 // r2 = &current->flags
+ ;;
+ ld4 out4=[r14] // R15 == edi
+ movl r16=ia32_syscall_table
+ ;;
+(p6) shladd r16=r8,3,r16 // Force ni_syscall if not valid syscall number
+ ld8 r2=[r2] // r2 = current->flags
+ ;;
+ ld8 r16=[r16]
+ tbit.z p8,p0=r2,5 // (current->flags & PF_TRACESYS) == 0?
+ ;;
+ movl r15=ia32_ret_from_syscall
+ mov b6=r16
+ ;;
+ mov rp=r15
+(p8) br.call.sptk.few b6=b6
+ br.call.sptk.few rp=ia32_trace_syscall // rp will be overwritten (ignored)
+
+non_ia32_syscall:
+ alloc r15=ar.pfs,0,0,2,0
+ mov out0=r14 // interrupt #
+ add out1=16,sp // pointer to pt_regs
+ ;; // avoid WAW on CFM
+ br.call.sptk.few rp=ia32_bad_interrupt
+ ;;
+ movl r15=ia64_leave_kernel
+ ;;
+ mov rp=r15
+ br.ret.sptk.many rp
+
+#endif /* CONFIG_IA32_SUPPORT */
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+ FAULT(17)
+
+non_syscall:
+
+#ifdef CONFIG_KDB
+ mov r17=__IA64_BREAK_KDB
+ ;;
+ cmp.eq p8,p0=r16,r17 // is this a kernel breakpoint?
+#endif
+
+ SAVE_MIN_WITH_COVER
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+ mov r8=cr.iim // get break immediate (must be done while psr.ic is off)
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+
+ // turn interrupt collection and data translation back on:
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ ;;
+(p15) ssm psr.i // restore psr.i
+ ;;
+ srlz.i // ensure everybody knows psr.ic and psr.dt are back on
+ movl r15=ia64_leave_kernel
+ ;;
+ alloc r14=ar.pfs,0,0,2,0
+ mov out0=r8 // break number
+ add out1=16,sp // pointer to pt_regs
+ ;;
+ SAVE_REST
+ mov rp=r15
+ ;;
+#ifdef CONFIG_KDB
+(p8) br.call.sptk.few b6=ia64_invoke_kdb
+#endif
+ br.call.sptk.few b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+ FAULT(18)
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+dispatch_unaligned_handler:
+ SAVE_MIN_WITH_COVER
+ ;;
+ //
+ // we can't have the alloc while psr.ic is cleared because
+ // we might get a mandatory RSE (when you reach the end of the
+ // rotating partition when doing the alloc) spill which could cause
+ // a page fault on the kernel virtual address and the handler
+ // wouldn't get the state to recover.
+ //
+ mov r15=cr.ifa
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ ;;
+(p15) ssm psr.i // restore psr.i
+ ;;
+ srlz.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ SAVE_REST
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+ ;; // avoid WAW on r14
+ movl r14=ia64_leave_kernel
+ mov out0=r15 // out0 = faulting address
+ adds out1=16,sp // out1 = pointer to pt_regs
+ ;;
+ mov rp=r14
+ br.sptk.few ia64_prepare_handle_unaligned
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+ FAULT(19)
+
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
+
+dispatch_to_fault_handler:
+ //
+ // Input:
+ // psr.ic: off
+ // psr.dt: off
+ // r19: fault vector number (e.g., 24 for General Exception)
+ // r31: contains saved predicates (pr)
+ //
+ SAVE_MIN_WITH_COVER_R19
+ //
+ // Copy control registers to temporary registers, then turn on psr bits,
+ // then copy the temporary regs to the output regs. We have to do this
+ // because the "alloc" can cause a mandatory store which could lead to
+ // an "Alt DTLB" fault which we can handle only if psr.ic is on.
+ //
+ mov r8=cr.isr
+ mov r9=cr.ifa
+ mov r10=cr.iim
+ mov r11=cr.itir
+ ;;
+ ssm psr.ic | psr.dt
+ srlz.d // guarantee that interrupt collection is enabled
+ ;;
+(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ srlz.i // must precede "alloc"!
+ ;;
+ alloc r14=ar.pfs,0,0,5,0 // must be first in insn group
+ mov out0=r15
+ mov out1=r8
+ mov out2=r9
+ mov out3=r10
+ mov out4=r11
+ ;;
+ SAVE_REST
+ movl r14=ia64_leave_kernel
+ ;;
+ mov rp=r14
+#ifdef CONFIG_KDB
+ br.call.sptk.few b6=ia64_invoke_kdb_fault_handler
+#else
+ br.call.sptk.few b6=ia64_fault
+#endif
+//
+// --- End of long entries, Beginning of short entries
+//
+
+ .align 1024
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
+ mov r16=cr.ifa
+ rsm psr.dt
+#if 0
+ // If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h
+ mov r17=_PAGE_SIZE_4K<<2
+ ;;
+ ptc.l r16,r17
+#endif
+ ;;
+ mov r31=pr
+ srlz.d
+ br.cond.sptk.many page_fault
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.cond.sptk.many page_fault
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.cond.sptk.many page_fault
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ mov r16=cr.ifa
+ rsm psr.dt
+ mov r31=pr
+ ;;
+ srlz.d
+ br.cond.sptk.many page_fault
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ FAULT(24)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ rsm psr.dt | psr.dfh // ensure we can access fph
+ ;;
+ srlz.d
+ mov r31=pr
+ mov r19=25
+ br.cond.sptk.many dispatch_to_fault_handler
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ FAULT(26)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ //
+ // A [f]chk.[as] instruction needs to take the branch to
+ // the recovery code but this part of the architecture is
+ // not implemented in hardware on some CPUs, such as Itanium.
+ // Thus, in general we need to emulate the behavior.
+ // IIM contains the relative target (not yet sign extended).
+ // So after sign extending it we simply add it to IIP.
+ // We also need to reset the EI field of the IPSR to zero,
+ // i.e., the slot to restart into.
+ //
+ // cr.imm contains zero_ext(imm21)
+ //
+ mov r18=cr.iim
+ ;;
+ mov r17=cr.iip
+ shl r18=r18,43 // put sign bit in position (43=64-21)
+ ;;
+
+ mov r16=cr.ipsr
+ shr r18=r18,39 // sign extend (39=43-4)
+ ;;
+
+ add r17=r17,r18 // now add the offset
+ ;;
+ mov cr.iip=r17
+ dep r16=0,r16,41,2 // clear EI
+ ;;
+
+ mov cr.ipsr=r16
+ ;;
+
+ rfi;; // and go back (must be last insn in group)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+ FAULT(28)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ FAULT(29)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ rsm psr.dt // avoid nested faults due to TLB misses...
+ mov r16=cr.ipsr
+ mov r31=pr // prepare to save predicates
+ ;;
+ srlz.d // ensure everyone knows psr.dt is off
+ mov r19=30 // error vector for fault_handler (when kernel)
+ extr.u r16=r16,32,2 // extract psr.cpl
+ ;;
+ cmp.eq p6,p7=r0,r16 // if kernel cpl then fault else emulate
+(p7) br.cond.sptk.many dispatch_unaligned_handler
+(p6) br.cond.sptk.many dispatch_to_fault_handler
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ FAULT(31)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
+ FAULT(32)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ FAULT(33)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Tranfer Trap (66)
+ FAULT(34)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ FAULT(35)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ FAULT(36)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Reserved
+ FAULT(37)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+ FAULT(38)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+ FAULT(39)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+ FAULT(40)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+ FAULT(41)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+ FAULT(42)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+ FAULT(43)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+ FAULT(44)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ FAULT(45)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
+ FAULT(46)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
+#ifdef CONFIG_IA32_SUPPORT
+ rsm psr.dt
+ ;;
+ srlz.d
+ mov r31=pr
+ br.cond.sptk.many dispatch_to_ia32_handler
+#else
+ FAULT(47)
+#endif
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+ FAULT(48)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+ FAULT(49)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+ FAULT(50)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+ FAULT(51)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7000 Entry 52 (size 16 bundles) Reserved
+ FAULT(52)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+ FAULT(53)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+ FAULT(54)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+ FAULT(55)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+ FAULT(56)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+ FAULT(57)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+ FAULT(58)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+ FAULT(59)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+ FAULT(60)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+ FAULT(61)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+ FAULT(62)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+ FAULT(63)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+ FAULT(64)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+ FAULT(65)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+ FAULT(66)
+
+ .align 256
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+ FAULT(67)
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
new file mode 100644
index 000000000..153fb5684
--- /dev/null
+++ b/arch/ia64/kernel/machvec.c
@@ -0,0 +1,48 @@
+#include <linux/kernel.h>
+
+#include <asm/page.h>
+#include <asm/machvec.h>
+
+struct ia64_machine_vector ia64_mv;
+
+void
+machvec_noop (void)
+{
+}
+
+/*
+ * Most platforms use this routine for mapping page frame addresses
+ * into a memory map index.
+ */
+unsigned long
+map_nr_dense (unsigned long addr)
+{
+ return MAP_NR_DENSE(addr);
+}
+
+static struct ia64_machine_vector *
+lookup_machvec (const char *name)
+{
+ extern struct ia64_machine_vector machvec_start[];
+ extern struct ia64_machine_vector machvec_end[];
+ struct ia64_machine_vector *mv;
+
+ for (mv = machvec_start; mv < machvec_end; ++mv)
+ if (strcmp (mv->name, name) == 0)
+ return mv;
+
+ return 0;
+}
+
+void
+machvec_init (const char *name)
+{
+ struct ia64_machine_vector *mv;
+
+ mv = lookup_machvec(name);
+ if (!mv) {
+ panic("generic kernel failed to find machine vector for platform %s!", name);
+ }
+ ia64_mv = *mv;
+ printk("booting generic kernel on platform %s\n", name);
+}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
new file mode 100644
index 000000000..320c56ebc
--- /dev/null
+++ b/arch/ia64/kernel/mca.c
@@ -0,0 +1,842 @@
+/*
+ * File: mca.c
+ * Purpose: Generic MCA handling layer
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander(vijay@engr.sgi.com)
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
+#include <asm/spinlock.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+
+
+ia64_mc_info_t ia64_mc_info;
+ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state;
+ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state;
+u64 ia64_mca_proc_state_dump[256];
+u64 ia64_mca_stack[1024];
+u64 ia64_mca_stackframe[32];
+u64 ia64_mca_bspstore[1024];
+
+static void ia64_mca_cmc_vector_setup(int enable,
+ int_vector_t cmc_vector);
+static void ia64_mca_wakeup_ipi_wait(void);
+static void ia64_mca_wakeup(int cpu);
+static void ia64_mca_wakeup_all(void);
+static void ia64_log_init(int,int);
+static void ia64_log_get(int,int, prfunc_t);
+static void ia64_log_clear(int,int,int, prfunc_t);
+
+/*
+ * ia64_mca_cmc_vector_setup
+ * Setup the correctable machine check vector register in the processor
+ * Inputs
+ * Enable (1 - enable cmc interrupt , 0 - disable)
+ * CMC handler entry point (if enabled)
+ *
+ * Outputs
+ * None
+ */
+static void
+ia64_mca_cmc_vector_setup(int enable,
+ int_vector_t cmc_vector)
+{
+ cmcv_reg_t cmcv;
+
+ cmcv.cmcv_regval = 0;
+ cmcv.cmcv_mask = enable;
+ cmcv.cmcv_vector = cmc_vector;
+ ia64_set_cmcv(cmcv.cmcv_regval);
+}
+
+
+#if defined(MCA_TEST)
+
+sal_log_processor_info_t slpi_buf;
+
+void
+mca_test(void)
+{
+ slpi_buf.slpi_valid.slpi_psi = 1;
+ slpi_buf.slpi_valid.slpi_cache_check = 1;
+ slpi_buf.slpi_valid.slpi_tlb_check = 1;
+ slpi_buf.slpi_valid.slpi_bus_check = 1;
+ slpi_buf.slpi_valid.slpi_minstate = 1;
+ slpi_buf.slpi_valid.slpi_bank1_gr = 1;
+ slpi_buf.slpi_valid.slpi_br = 1;
+ slpi_buf.slpi_valid.slpi_cr = 1;
+ slpi_buf.slpi_valid.slpi_ar = 1;
+ slpi_buf.slpi_valid.slpi_rr = 1;
+ slpi_buf.slpi_valid.slpi_fr = 1;
+
+ ia64_os_mca_dispatch();
+}
+
+#endif /* #if defined(MCA_TEST) */
+
+/*
+ * mca_init
+ * Do all the mca specific initialization on a per-processor basis.
+ *
+ * 1. Register spinloop and wakeup request interrupt vectors
+ *
+ * 2. Register OS_MCA handler entry point
+ *
+ * 3. Register OS_INIT handler entry point
+ *
+ * 4. Initialize CMCV register to enable/disable CMC interrupt on the
+ * processor and hook a handler in the platform-specific mca_init.
+ *
+ * 5. Initialize MCA/CMC/INIT related log buffers maintained by the OS.
+ *
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void __init
+mca_init(void)
+{
+ int i;
+
+ MCA_DEBUG("mca_init : begin\n");
+ /* Clear the Rendez checkin flag for all cpus */
+ for(i = 0 ; i < IA64_MAXCPUS; i++)
+ ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+
+ /* NOTE : The actual irqs for the rendez, wakeup and
+ * cmc interrupts are requested in the platform-specific
+ * mca initialization code.
+ */
+ /*
+ * Register the rendezvous spinloop and wakeup mechanism with SAL
+ */
+
+ /* Register the rendezvous interrupt vector with SAL */
+ if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT,
+ SAL_MC_PARAM_MECHANISM_INT,
+ IA64_MCA_RENDEZ_INT_VECTOR,
+ IA64_MCA_RENDEZ_TIMEOUT))
+ return;
+
+ /* Register the wakeup interrupt vector with SAL */
+ if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP,
+ SAL_MC_PARAM_MECHANISM_INT,
+ IA64_MCA_WAKEUP_INT_VECTOR,
+ 0))
+ return;
+
+ MCA_DEBUG("mca_init : registered mca rendezvous spinloop and wakeup mech.\n");
+ /*
+ * Setup the correctable machine check vector
+ */
+ ia64_mca_cmc_vector_setup(IA64_CMC_INT_ENABLE,
+ IA64_MCA_CMC_INT_VECTOR);
+
+ MCA_DEBUG("mca_init : correctable mca vector setup done\n");
+
+ ia64_mc_info.imi_mca_handler = __pa(ia64_os_mca_dispatch);
+ ia64_mc_info.imi_mca_handler_size =
+ __pa(ia64_os_mca_dispatch_end) - __pa(ia64_os_mca_dispatch);
+ /* Register the os mca handler with SAL */
+ if (ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
+ ia64_mc_info.imi_mca_handler,
+ __pa(ia64_get_gp()),
+ ia64_mc_info.imi_mca_handler_size,
+ 0,0,0))
+
+ return;
+
+ MCA_DEBUG("mca_init : registered os mca handler with SAL\n");
+
+ ia64_mc_info.imi_monarch_init_handler = __pa(ia64_monarch_init_handler);
+ ia64_mc_info.imi_monarch_init_handler_size = IA64_INIT_HANDLER_SIZE;
+ ia64_mc_info.imi_slave_init_handler = __pa(ia64_slave_init_handler);
+ ia64_mc_info.imi_slave_init_handler_size = IA64_INIT_HANDLER_SIZE;
+ /* Register the os init handler with SAL */
+ if (ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
+ ia64_mc_info.imi_monarch_init_handler,
+ __pa(ia64_get_gp()),
+ ia64_mc_info.imi_monarch_init_handler_size,
+ ia64_mc_info.imi_slave_init_handler,
+ __pa(ia64_get_gp()),
+ ia64_mc_info.imi_slave_init_handler_size))
+
+
+ return;
+
+ MCA_DEBUG("mca_init : registered os init handler with SAL\n");
+
+ /* Initialize the areas set aside by the OS to buffer the
+ * platform/processor error states for MCA/INIT/CMC
+ * handling.
+ */
+ ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR);
+ ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM);
+ ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PROCESSOR);
+ ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PLATFORM);
+ ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR);
+ ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM);
+
+ mca_init_platform();
+
+ MCA_DEBUG("mca_init : platform-specific mca handling setup done\n");
+
+#if defined(MCA_TEST)
+ mca_test();
+#endif /* #if defined(MCA_TEST) */
+
+ printk("Mca related initialization done\n");
+}
+
+/*
+ * ia64_mca_wakeup_ipi_wait
+ * Wait for the inter-cpu interrupt to be sent by the
+ * monarch processor once it is done with handling the
+ * MCA.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_wakeup_ipi_wait(void)
+{
+ int irr_num = (IA64_MCA_WAKEUP_INT_VECTOR >> 6);
+ int irr_bit = (IA64_MCA_WAKEUP_INT_VECTOR & 0x3f);
+ u64 irr = 0;
+
+ do {
+ switch(irr_num) {
+ case 0:
+ irr = ia64_get_irr0();
+ break;
+ case 1:
+ irr = ia64_get_irr1();
+ break;
+ case 2:
+ irr = ia64_get_irr2();
+ break;
+ case 3:
+ irr = ia64_get_irr3();
+ break;
+ }
+ } while (!(irr & (1 << irr_bit))) ;
+}
+
+/*
+ * ia64_mca_wakeup
+ * Send an inter-cpu interrupt to wake-up a particular cpu
+ * and mark that cpu to be out of rendez.
+ * Inputs
+ * cpuid
+ * Outputs
+ * None
+ */
+void
+ia64_mca_wakeup(int cpu)
+{
+ ipi_send(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT);
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+
+}
+/*
+ * ia64_mca_wakeup_all
+ * Wakeup all the cpus which have rendez'ed previously.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_wakeup_all(void)
+{
+ int cpu;
+
+ /* Clear the Rendez checkin flag for all cpus */
+ for(cpu = 0 ; cpu < IA64_MAXCPUS; cpu++)
+ if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE)
+ ia64_mca_wakeup(cpu);
+
+}
+/*
+ * ia64_mca_rendez_interrupt_handler
+ * This is handler used to put slave processors into spinloop
+ * while the monarch processor does the mca handling and later
+ * wake each slave up once the monarch is done.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs)
+{
+ int flags;
+ /* Mask all interrupts */
+ save_and_cli(flags);
+
+ ia64_mc_info.imi_rendez_checkin[ia64_get_cpuid(0)] = IA64_MCA_RENDEZ_CHECKIN_DONE;
+ /* Register with the SAL monarch that the slave has
+ * reached SAL
+ */
+ ia64_sal_mc_rendez();
+
+ /* Wait for the wakeup IPI from the monarch
+ * This waiting is done by polling on the wakeup-interrupt
+ * vector bit in the processor's IRRs
+ */
+ ia64_mca_wakeup_ipi_wait();
+
+ /* Enable all interrupts */
+ restore_flags(flags);
+
+
+}
+
+
+/*
+ * ia64_mca_wakeup_int_handler
+ * The interrupt handler for processing the inter-cpu interrupt to the
+ * slave cpu which was spinning in the rendez loop.
+ * Since this spinning is done by turning off the interrupts and
+ * polling on the wakeup-interrupt bit in the IRR, there is
+ * nothing useful to be done in the handler.
+ * Inputs
+ * wakeup_irq (Wakeup-interrupt bit)
+ * arg (Interrupt handler specific argument)
+ * ptregs (Exception frame at the time of the interrupt)
+ * Outputs
+ *
+ */
+void
+ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs)
+{
+
+}
+
+/*
+ * ia64_return_to_sal_check
+ * This is function called before going back from the OS_MCA handler
+ * to the OS_MCA dispatch code which finally takes the control back
+ * to the SAL.
+ * The main purpose of this routine is to setup the OS_MCA to SAL
+ * return state which can be used by the OS_MCA dispatch code
+ * just before going back to SAL.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+
+void
+ia64_return_to_sal_check(void)
+{
+ /* Copy over some relevant stuff from the sal_to_os_mca_handoff
+ * so that it can be used at the time of os_mca_to_sal_handoff
+ */
+ ia64_os_to_sal_handoff_state.imots_sal_gp =
+ ia64_sal_to_os_handoff_state.imsto_sal_gp;
+
+ ia64_os_to_sal_handoff_state.imots_sal_check_ra =
+ ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
+
+ /* For now ignore the MCA */
+ ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
+}
+/*
+ * ia64_mca_ucmc_handler
+ * This is uncorrectable machine check handler called from OS_MCA
+ * dispatch code which is in turn called from SAL_CHECK().
+ * This is the place where the core of OS MCA handling is done.
+ * Right now the logs are extracted and displayed in a well-defined
+ * format. This handler code is supposed to be run only on the
+ * monarch processor. Once the monarch is done with MCA handling
+ * further MCA logging is enabled by clearing logs.
+ * Monarch also has the duty of sending wakeup-IPIs to pull the
+ * slave processors out of rendez. spinloop.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_ucmc_handler(void)
+{
+
+ /* Get the MCA processor log */
+ ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+ /* Get the MCA platform log */
+ ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk);
+
+ ia64_log_print(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+
+ /*
+ * Do some error handling - Platform-specific mca handler is called at this point
+ */
+
+ mca_handler_platform() ;
+
+ /* Clear the SAL MCA logs */
+ ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, 1, printk);
+ ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, 1, printk);
+
+ /* Wakeup all the processors which are spinning in the rendezvous
+ * loop.
+ */
+ ia64_mca_wakeup_all();
+ ia64_return_to_sal_check();
+}
+
+/*
+ * SAL to OS entry point for INIT on the monarch processor
+ * This has been defined for registration purposes with SAL
+ * as a part of mca_init.
+ */
+void
+ia64_monarch_init_handler()
+{
+}
+/*
+ * SAL to OS entry point for INIT on the slave processor
+ * This has been defined for registration purposes with SAL
+ * as a part of mca_init.
+ */
+
+void
+ia64_slave_init_handler()
+{
+}
+/*
+ * ia64_mca_cmc_int_handler
+ * This is correctable machine check interrupt handler.
+ * Right now the logs are extracted and displayed in a well-defined
+ * format.
+ * Inputs
+ * None
+ * Outputs
+ * None
+ */
+void
+ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
+{
+ /* Get the CMC processor log */
+ ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+ /* Get the CMC platform log */
+ ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk);
+
+
+ ia64_log_print(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk);
+ cmci_handler_platform(cmc_irq, arg, ptregs);
+
+ /* Clear the CMC SAL logs now that they have been saved in the OS buffer */
+ ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR);
+ ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM);
+}
+
+/*
+ * IA64_MCA log support
+ */
+#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */
+#define IA64_MAX_LOG_TYPES 3 /* MCA, CMC, INIT */
+#define IA64_MAX_LOG_SUBTYPES 2 /* Processor, Platform */
+
+typedef struct ia64_state_log_s {
+ spinlock_t isl_lock;
+ int isl_index;
+ sal_log_header_t isl_log[IA64_MAX_LOGS];
+
+} ia64_state_log_t;
+
+static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES][IA64_MAX_LOG_SUBTYPES];
+
+#define IA64_LOG_LOCK_INIT(it, sit) spin_lock_init(&ia64_state_log[it][sit].isl_lock)
+#define IA64_LOG_LOCK(it, sit) spin_lock_irqsave(&ia64_state_log[it][sit].isl_lock, s)
+#define IA64_LOG_UNLOCK(it, sit) spin_unlock_irqrestore(&ia64_state_log[it][sit].isl_lock,\
+ s)
+#define IA64_LOG_NEXT_INDEX(it, sit) ia64_state_log[it][sit].isl_index
+#define IA64_LOG_CURR_INDEX(it, sit) 1 - ia64_state_log[it][sit].isl_index
+#define IA64_LOG_INDEX_INC(it, sit) \
+ ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index
+#define IA64_LOG_INDEX_DEC(it, sit) \
+ ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index
+#define IA64_LOG_NEXT_BUFFER(it, sit) (void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_NEXT_INDEX(it,sit)]))
+#define IA64_LOG_CURR_BUFFER(it, sit) (void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_CURR_INDEX(it,sit)]))
+
+/*
+ * ia64_log_init
+ * Reset the OS ia64 log buffer
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC})
+ * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * Outputs : None
+ */
+void
+ia64_log_init(int sal_info_type, int sal_sub_info_type)
+{
+ IA64_LOG_LOCK_INIT(sal_info_type, sal_sub_info_type);
+ IA64_LOG_NEXT_INDEX(sal_info_type, sal_sub_info_type) = 0;
+ memset(IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type), 0,
+ sizeof(sal_log_header_t) * IA64_MAX_LOGS);
+}
+
+/*
+ * ia64_log_get
+ * Get the current MCA log from SAL and copy it into the OS log buffer.
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC})
+ * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * Outputs : None
+ *
+ */
+void
+ia64_log_get(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc)
+{
+ sal_log_header_t *log_buffer;
+ int s;
+
+ IA64_LOG_LOCK(sal_info_type, sal_sub_info_type);
+
+
+ /* Get the process state information */
+ log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type);
+
+ if (ia64_sal_get_state_info(sal_info_type, sal_sub_info_type ,(u64 *)log_buffer))
+ prfunc("ia64_mca_log_get : Getting processor log failed\n");
+
+ IA64_LOG_INDEX_INC(sal_info_type, sal_sub_info_type);
+
+ IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type);
+
+}
+
+/*
+ * ia64_log_clear
+ * Clear the current MCA log from SAL and dpending on the clear_os_buffer flags
+ * clear the OS log buffer also
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC})
+ * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * clear_os_buffer
+ * prfunc (print function)
+ * Outputs : None
+ *
+ */
+void
+ia64_log_clear(int sal_info_type, int sal_sub_info_type, int clear_os_buffer, prfunc_t prfunc)
+{
+ if (ia64_sal_clear_state_info(sal_info_type, sal_sub_info_type))
+ prfunc("ia64_mca_log_get : Clearing processor log failed\n");
+
+ if (clear_os_buffer) {
+ sal_log_header_t *log_buffer;
+ int s;
+
+ IA64_LOG_LOCK(sal_info_type, sal_sub_info_type);
+
+ /* Get the process state information */
+ log_buffer = IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type);
+
+ memset(log_buffer, 0, sizeof(sal_log_header_t));
+
+ IA64_LOG_INDEX_DEC(sal_info_type, sal_sub_info_type);
+
+ IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type);
+ }
+
+}
+
+/*
+ * ia64_log_processor_regs_print
+ * Print the contents of the saved processor register(s) in the format
+ * <reg_prefix>[<index>] <value>
+ *
+ * Inputs : regs (Register save buffer)
+ * reg_num (# of registers)
+ * reg_class (application/banked/control/bank1_general)
+ * reg_prefix (ar/br/cr/b1_gr)
+ * Outputs : None
+ *
+ */
+void
+ia64_log_processor_regs_print(u64 *regs,
+ int reg_num,
+ char *reg_class,
+ char *reg_prefix,
+ prfunc_t prfunc)
+{
+ int i;
+
+ prfunc("+%s Registers\n", reg_class);
+ for (i = 0; i < reg_num; i++)
+ prfunc("+ %s[%d] 0x%lx\n", reg_prefix, i, regs[i]);
+}
+
+static char *pal_mesi_state[] = {
+ "Invalid",
+ "Shared",
+ "Exclusive",
+ "Modified",
+ "Reserved1",
+ "Reserved2",
+ "Reserved3",
+ "Reserved4"
+};
+
+static char *pal_cache_op[] = {
+ "Unknown",
+ "Move in",
+ "Cast out",
+ "Coherency check",
+ "Internal",
+ "Instruction fetch",
+ "Implicit Writeback",
+ "Reserved"
+};
+
+/*
+ * ia64_log_cache_check_info_print
+ * Display the machine check information related to cache error(s).
+ * Inputs : i (Multiple errors are logged, i - index of logged error)
+ * info (Machine check info logged by the PAL and later
+ * captured by the SAL)
+ * target_addr (Address which caused the cache error)
+ * Outputs : None
+ */
+void
+ia64_log_cache_check_info_print(int i,
+ pal_cache_check_info_t info,
+ u64 target_addr,
+ prfunc_t prfunc)
+{
+ prfunc("+ Cache check info[%d]\n+", i);
+ prfunc(" Level: L%d",info.level);
+ if (info.mv)
+ prfunc(" ,Mesi: %s",pal_mesi_state[info.mesi]);
+ prfunc(" ,Index: %d,", info.index);
+ if (info.ic)
+ prfunc(" ,Cache: Instruction");
+ if (info.dc)
+ prfunc(" ,Cache: Data");
+ if (info.tl)
+ prfunc(" ,Line: Tag");
+ if (info.dl)
+ prfunc(" ,Line: Data");
+ prfunc(" ,Operation: %s,", pal_cache_op[info.op]);
+ if (info.wv)
+ prfunc(" ,Way: %d,", info.way);
+ if (info.tv)
+ prfunc(" ,Target Addr: 0x%lx", target_addr);
+ if (info.mc)
+ prfunc(" ,MC: Corrected");
+ prfunc("\n");
+}
+
+/*
+ * ia64_log_tlb_check_info_print
+ * Display the machine check information related to tlb error(s).
+ * Inputs : i (Multiple errors are logged, i - index of logged error)
+ * info (Machine check info logged by the PAL and later
+ * captured by the SAL)
+ * Outputs : None
+ */
+
+void
+ia64_log_tlb_check_info_print(int i,
+ pal_tlb_check_info_t info,
+ prfunc_t prfunc)
+{
+ prfunc("+ TLB Check Info [%d]\n+", i);
+ if (info.itc)
+ prfunc(" Failure: Instruction Translation Cache");
+ if (info.dtc)
+ prfunc(" Failure: Data Translation Cache");
+ if (info.itr) {
+ prfunc(" Failure: Instruction Translation Register");
+ prfunc(" ,Slot: %d", info.tr_slot);
+ }
+ if (info.dtr) {
+ prfunc(" Failure: Data Translation Register");
+ prfunc(" ,Slot: %d", info.tr_slot);
+ }
+ if (info.mc)
+ prfunc(" ,MC: Corrected");
+ prfunc("\n");
+}
+
+/*
+ * ia64_log_bus_check_info_print
+ * Display the machine check information related to bus error(s).
+ * Inputs : i (Multiple errors are logged, i - index of logged error)
+ * info (Machine check info logged by the PAL and later
+ * captured by the SAL)
+ * req_addr (Address of the requestor of the transaction)
+ * resp_addr (Address of the responder of the transaction)
+ * target_addr (Address where the data was to be delivered to or
+ * obtained from)
+ * Outputs : None
+ */
+void
+ia64_log_bus_check_info_print(int i,
+ pal_bus_check_info_t info,
+ u64 req_addr,
+ u64 resp_addr,
+ u64 targ_addr,
+ prfunc_t prfunc)
+{
+ prfunc("+ BUS Check Info [%d]\n+", i);
+ prfunc(" Status Info: %d", info.bsi);
+ prfunc(" ,Severity: %d", info.sev);
+ prfunc(" ,Transaction Type: %d", info.type);
+ prfunc(" ,Transaction Size: %d", info.size);
+ if (info.cc)
+ prfunc(" ,Cache-cache-transfer");
+ if (info.ib)
+ prfunc(" ,Error: Internal");
+ if (info.eb)
+ prfunc(" ,Error: External");
+ if (info.mc)
+ prfunc(" ,MC: Corrected");
+ if (info.tv)
+ prfunc(" ,Target Address: 0x%lx", targ_addr);
+ if (info.rq)
+ prfunc(" ,Requestor Address: 0x%lx", req_addr);
+ if (info.tv)
+ prfunc(" ,Responder Address: 0x%lx", resp_addr);
+ prfunc("\n");
+}
+
+/*
+ * ia64_log_processor_info_print
+ * Display the processor-specific information logged by PAL as a part
+ * of MCA or INIT or CMC.
+ * Inputs : lh (Pointer of the sal log header which specifies the format
+ * of SAL state info as specified by the SAL spec).
+ * Outputs : None
+ */
+void
+ia64_log_processor_info_print(sal_log_header_t *lh, prfunc_t prfunc)
+{
+ sal_log_processor_info_t *slpi;
+ int i;
+
+ if (!lh)
+ return;
+
+ if (lh->slh_log_type != SAL_SUB_INFO_TYPE_PROCESSOR)
+ return;
+
+#if defined(MCA_TEST)
+ slpi = &slpi_buf;
+#else
+ slpi = (sal_log_processor_info_t *)lh->slh_log_dev_spec_info;
+#endif /#if defined(MCA_TEST) */
+
+ if (!slpi) {
+ prfunc("No Processor Error Log found\n");
+ return;
+ }
+
+ /* Print branch register contents if valid */
+ if (slpi->slpi_valid.slpi_br)
+ ia64_log_processor_regs_print(slpi->slpi_br, 8, "Branch", "br", prfunc);
+
+ /* Print control register contents if valid */
+ if (slpi->slpi_valid.slpi_cr)
+ ia64_log_processor_regs_print(slpi->slpi_cr, 128, "Control", "cr", prfunc);
+
+ /* Print application register contents if valid */
+ if (slpi->slpi_valid.slpi_ar)
+ ia64_log_processor_regs_print(slpi->slpi_br, 128, "Application", "ar", prfunc);
+
+ /* Print region register contents if valid */
+ if (slpi->slpi_valid.slpi_rr)
+ ia64_log_processor_regs_print(slpi->slpi_rr, 8, "Region", "rr", prfunc);
+
+ /* Print floating-point register contents if valid */
+ if (slpi->slpi_valid.slpi_fr)
+ ia64_log_processor_regs_print(slpi->slpi_fr, 128, "Floating-point", "fr",
+ prfunc);
+
+ /* Print bank1-gr NAT register contents if valid */
+ ia64_log_processor_regs_print(&slpi->slpi_bank1_nat_bits, 1, "NAT", "nat", prfunc);
+
+ /* Print bank 1 register contents if valid */
+ if (slpi->slpi_valid.slpi_bank1_gr)
+ ia64_log_processor_regs_print(slpi->slpi_bank1_gr, 16, "Bank1-General", "gr",
+ prfunc);
+
+ /* Print the cache check information if any*/
+ for (i = 0 ; i < MAX_CACHE_ERRORS; i++)
+ ia64_log_cache_check_info_print(i,
+ slpi->slpi_cache_check_info[i].slpi_cache_check,
+ slpi->slpi_cache_check_info[i].slpi_target_address,
+ prfunc);
+ /* Print the tlb check information if any*/
+ for (i = 0 ; i < MAX_TLB_ERRORS; i++)
+ ia64_log_tlb_check_info_print(i,slpi->slpi_tlb_check_info[i], prfunc);
+
+ /* Print the bus check information if any*/
+ for (i = 0 ; i < MAX_BUS_ERRORS; i++)
+ ia64_log_bus_check_info_print(i,
+ slpi->slpi_bus_check_info[i].slpi_bus_check,
+ slpi->slpi_bus_check_info[i].slpi_requestor_addr,
+ slpi->slpi_bus_check_info[i].slpi_responder_addr,
+ slpi->slpi_bus_check_info[i].slpi_target_addr,
+ prfunc);
+
+}
+
+/*
+ * ia64_log_print
+ * Display the contents of the OS error log information
+ * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC})
+ * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM})
+ * Outputs : None
+ */
+void
+ia64_log_print(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc)
+{
+ char *info_type, *sub_info_type;
+
+ switch(sal_info_type) {
+ case SAL_INFO_TYPE_MCA:
+ info_type = "MCA";
+ break;
+ case SAL_INFO_TYPE_INIT:
+ info_type = "INIT";
+ break;
+ case SAL_INFO_TYPE_CMC:
+ info_type = "CMC";
+ break;
+ default:
+ info_type = "UNKNOWN";
+ break;
+ }
+
+ switch(sal_sub_info_type) {
+ case SAL_SUB_INFO_TYPE_PROCESSOR:
+ sub_info_type = "PROCESSOR";
+ break;
+ case SAL_SUB_INFO_TYPE_PLATFORM:
+ sub_info_type = "PLATFORM";
+ break;
+ default:
+ sub_info_type = "UNKNOWN";
+ break;
+ }
+
+ prfunc("+BEGIN HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type);
+ if (sal_sub_info_type == SAL_SUB_INFO_TYPE_PROCESSOR)
+ ia64_log_processor_info_print(
+ IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type),
+ prfunc);
+ else
+ log_print_platform(IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type),prfunc);
+ prfunc("+END HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type);
+}
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
new file mode 100644
index 000000000..3d49ac06e
--- /dev/null
+++ b/arch/ia64/kernel/mca_asm.S
@@ -0,0 +1,621 @@
+#include <asm/processor.h>
+#include <asm/mcaasm.h>
+#include <asm/page.h>
+#include <asm/mca.h>
+
+ .psr abi64
+ .psr lsb
+ .lsb
+
+/*
+ * SAL_TO_OS_MCA_HANDOFF_STATE
+ * 1. GR1 = OS GP
+ * 2. GR8 = PAL_PROC physical address
+ * 3. GR9 = SAL_PROC physical address
+ * 4. GR10 = SAL GP (physical)
+ * 5. GR11 = Rendez state
+ * 6. GR12 = Return address to location within SAL_CHECK
+ */
+#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp) \
+ movl _tmp=ia64_sal_to_os_handoff_state;; \
+ st8 [_tmp]=r1,0x08;; \
+ st8 [_tmp]=r8,0x08;; \
+ st8 [_tmp]=r9,0x08;; \
+ st8 [_tmp]=r10,0x08;; \
+ st8 [_tmp]=r11,0x08;; \
+ st8 [_tmp]=r12,0x08;;
+
+/*
+ * OS_MCA_TO_SAL_HANDOFF_STATE
+ * 1. GR8 = OS_MCA status
+ * 2. GR9 = SAL GP (physical)
+ * 3. GR22 = New min state save area pointer
+ */
+#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \
+ movl _tmp=ia64_os_to_sal_handoff_state;; \
+ DATA_VA_TO_PA(_tmp);; \
+ ld8 r8=[_tmp],0x08;; \
+ ld8 r9=[_tmp],0x08;; \
+ ld8 r22=[_tmp],0x08;;
+
+/*
+ * BRANCH
+ * Jump to the instruction referenced by
+ * "to_label".
+ * Branch is taken only if the predicate
+ * register "p" is true.
+ * "ip" is the address of the instruction
+ * located at "from_label".
+ * "temp" is a scratch register like r2
+ * "adjust" needed for HP compiler.
+ * A screwup somewhere with constant arithmetic.
+ */
+#define BRANCH(to_label, temp, p, adjust) \
+100: (p) mov temp=ip; \
+ ;; \
+ (p) adds temp=to_label-100b,temp;\
+ (p) adds temp=adjust,temp; \
+ (p) mov b1=temp ; \
+ (p) br b1
+
+ .global ia64_os_mca_dispatch
+ .global ia64_os_mca_dispatch_end
+ .global ia64_sal_to_os_handoff_state
+ .global ia64_os_to_sal_handoff_state
+ .global ia64_os_mca_ucmc_handler
+ .global ia64_mca_proc_state_dump
+ .global ia64_mca_proc_state_restore
+ .global ia64_mca_stack
+ .global ia64_mca_stackframe
+ .global ia64_mca_bspstore
+
+ .text
+ .align 16
+
+ia64_os_mca_dispatch:
+
+#if defined(MCA_TEST)
+ // Pretend that we are in interrupt context
+ mov r2=psr
+ dep r2=0, r2, PSR_IC, 2;
+ mov psr.l = r2
+#endif /* #if defined(MCA_TEST) */
+
+ // Save the SAL to OS MCA handoff state as defined
+ // by SAL SPEC 2.5
+ // NOTE : The order in which the state gets saved
+ // is dependent on the way the C-structure
+ // for ia64_mca_sal_to_os_state_t has been
+ // defined in include/asm/mca.h
+ SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
+
+ // LOG PROCESSOR STATE INFO FROM HERE ON..
+ ;;
+begin_os_mca_dump:
+ BRANCH(ia64_os_mca_proc_state_dump, r2, p0, 0x0)
+ ;;
+ia64_os_mca_done_dump:
+
+ // Setup new stack frame for OS_MCA handling
+ movl r2=ia64_mca_bspstore // local bspstore area location in r2
+ movl r3=ia64_mca_stackframe // save stack frame to memory in r3
+ rse_switch_context(r6,r3,r2);; // RSC management in this new context
+ movl r12=ia64_mca_stack;;
+
+ // Enter virtual mode from physical mode
+ VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
+ia64_os_mca_virtual_begin:
+
+ // call our handler
+ movl r2=ia64_mca_ucmc_handler;;
+ mov b6=r2;;
+ br.call.sptk.few b0=b6
+ ;;
+
+ // Revert back to physical mode before going back to SAL
+ PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
+ia64_os_mca_virtual_end:
+
+#if defined(MCA_TEST)
+ // Pretend that we are in interrupt context
+ mov r2=psr
+ dep r2=0, r2, PSR_IC, 2;
+ mov psr.l = r2
+#endif /* #if defined(MCA_TEST) */
+
+ // restore the original stack frame here
+ movl r2=ia64_mca_stackframe // restore stack frame from memory at r2
+ ;;
+ DATA_VA_TO_PA(r2)
+ movl r4=IA64_PSR_MC
+ ;;
+ rse_return_context(r4,r3,r2) // switch from interrupt context for RSE
+
+ // let us restore all the registers from our PSI structure
+ mov r8=gp
+ ;;
+begin_os_mca_restore:
+ BRANCH(ia64_os_mca_proc_state_restore, r2, p0, 0x0)
+ ;;
+
+ia64_os_mca_done_restore:
+ ;;
+#ifdef SOFTSDV
+ VIRTUAL_MODE_ENTER(r2,r3, vmode_enter, r4)
+vmode_enter:
+ br.ret.sptk.few b0
+#else
+ // branch back to SALE_CHECK
+ OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2)
+ ld8 r3=[r2];;
+ mov b0=r3 // SAL_CHECK return address
+ br b0
+ ;;
+#endif /* #ifdef SOFTSDV */
+ia64_os_mca_dispatch_end:
+//EndMain//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+// ia64_os_mca_proc_state_dump()
+//
+// Stub Description:
+//
+// This stub dumps the processor state during MCHK to a data area
+//
+//--
+
+ia64_os_mca_proc_state_dump:
+// Get and save GR0-31 from Proc. Min. State Save Area to SAL PSI
+ movl r2=ia64_mca_proc_state_dump;; // Os state dump area
+
+// save ar.NaT
+ mov r5=ar.unat // ar.unat
+
+// save banked GRs 16-31 along with NaT bits
+ bsw.1;;
+ st8.spill [r2]=r16,8;;
+ st8.spill [r2]=r17,8;;
+ st8.spill [r2]=r18,8;;
+ st8.spill [r2]=r19,8;;
+ st8.spill [r2]=r20,8;;
+ st8.spill [r2]=r21,8;;
+ st8.spill [r2]=r22,8;;
+ st8.spill [r2]=r23,8;;
+ st8.spill [r2]=r24,8;;
+ st8.spill [r2]=r25,8;;
+ st8.spill [r2]=r26,8;;
+ st8.spill [r2]=r27,8;;
+ st8.spill [r2]=r28,8;;
+ st8.spill [r2]=r29,8;;
+ st8.spill [r2]=r30,8;;
+ st8.spill [r2]=r31,8;;
+
+ mov r4=ar.unat;;
+ st8 [r2]=r4,8 // save User NaT bits for r16-r31
+ mov ar.unat=r5 // restore original unat
+ bsw.0;;
+
+//save BRs
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r4
+
+ mov r3=b0
+ mov r5=b1
+ mov r7=b2;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=b3
+ mov r5=b4
+ mov r7=b5;;
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=b6
+ mov r5=b7;;
+ st8 [r2]=r3,2*8
+ st8 [r4]=r5,2*8;;
+
+cSaveCRs:
+// save CRs
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r4
+
+ mov r3=cr0 // cr.dcr
+ mov r5=cr1 // cr.itm
+ mov r7=cr2;; // cr.iva
+
+ st8 [r2]=r3,8*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;; // 48 byte rements
+
+ mov r3=cr8;; // cr.pta
+ st8 [r2]=r3,8*8;; // 64 byte rements
+
+// if PSR.ic=0, reading interruption registers causes an illegal operation fault
+ mov r3=psr;;
+ tbit.nz.unc p2,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test
+(p2) st8 [r2]=r0,9*8+160 // increment by 168 byte inc.
+begin_skip_intr_regs:
+ BRANCH(SkipIntrRegs, r9, p2, 0x0)
+ ;;
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r6
+
+ mov r3=cr16 // cr.ipsr
+ mov r5=cr17 // cr.isr
+ mov r7=r0;; // cr.ida => cr18
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr19 // cr.iip
+ mov r5=cr20 // cr.idtr
+ mov r7=cr21;; // cr.iitr
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr22 // cr.iipa
+ mov r5=cr23 // cr.ifs
+ mov r7=cr24;; // cr.iim
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=cr25;; // cr.iha
+ st8 [r2]=r3,160;; // 160 byte rement
+
+SkipIntrRegs:
+ st8 [r2]=r0,168 // another 168 byte .
+
+ mov r3=cr66;; // cr.lid
+ st8 [r2]=r3,40 // 40 byte rement
+
+ mov r3=cr71;; // cr.ivr
+ st8 [r2]=r3,8
+
+ mov r3=cr72;; // cr.tpr
+ st8 [r2]=r3,24 // 24 byte increment
+
+ mov r3=r0;; // cr.eoi => cr75
+ st8 [r2]=r3,168 // 168 byte inc.
+
+ mov r3=r0;; // cr.irr0 => cr96
+ st8 [r2]=r3,16 // 16 byte inc.
+
+ mov r3=r0;; // cr.irr1 => cr98
+ st8 [r2]=r3,16 // 16 byte inc.
+
+ mov r3=r0;; // cr.irr2 => cr100
+ st8 [r2]=r3,16 // 16 byte inc
+
+ mov r3=r0;; // cr.irr3 => cr100
+ st8 [r2]=r3,16 // 16b inc.
+
+ mov r3=r0;; // cr.itv => cr114
+ st8 [r2]=r3,16 // 16 byte inc.
+
+ mov r3=r0;; // cr.pmv => cr116
+ st8 [r2]=r3,8
+
+ mov r3=r0;; // cr.lrr0 => cr117
+ st8 [r2]=r3,8
+
+ mov r3=r0;; // cr.lrr1 => cr118
+ st8 [r2]=r3,8
+
+ mov r3=r0;; // cr.cmcv => cr119
+ st8 [r2]=r3,8*10;;
+
+cSaveARs:
+// save ARs
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2 // duplicate r2 in r6
+
+ mov r3=ar0 // ar.kro
+ mov r5=ar1 // ar.kr1
+ mov r7=ar2;; // ar.kr2
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=ar3 // ar.kr3
+ mov r5=ar4 // ar.kr4
+ mov r7=ar5;; // ar.kr5
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=ar6 // ar.kr6
+ mov r5=ar7 // ar.kr7
+ mov r7=r0;; // ar.kr8
+ st8 [r2]=r3,10*8
+ st8 [r4]=r5,10*8
+ st8 [r6]=r7,10*8;; // rement by 72 bytes
+
+ mov r3=ar16 // ar.rsc
+ mov ar16=r0 // put RSE in enforced lazy mode
+ mov r5=ar17 // ar.bsp
+ mov r7=ar18;; // ar.bspstore
+ st8 [r2]=r3,3*8
+ st8 [r4]=r5,3*8
+ st8 [r6]=r7,3*8;;
+
+ mov r3=ar19;; // ar.rnat
+ st8 [r2]=r3,8*13 // increment by 13x8 bytes
+
+ mov r3=ar32;; // ar.ccv
+ st8 [r2]=r3,8*4
+
+ mov r3=ar36;; // ar.unat
+ st8 [r2]=r3,8*4
+
+ mov r3=ar40;; // ar.fpsr
+ st8 [r2]=r3,8*4
+
+ mov r3=ar44;; // ar.itc
+ st8 [r2]=r3,160 // 160
+
+ mov r3=ar64;; // ar.pfs
+ st8 [r2]=r3,8
+
+ mov r3=ar65;; // ar.lc
+ st8 [r2]=r3,8
+
+ mov r3=ar66;; // ar.ec
+ st8 [r2]=r3
+ add r2=8*62,r2 //padding
+
+// save RRs
+ mov ar.lc=0x08-1
+ movl r4=0x00;;
+
+cStRR:
+ mov r3=rr[r4];;
+ st8 [r2]=r3,8
+ add r4=1,r4
+ br.cloop.sptk.few cStRR
+ ;;
+end_os_mca_dump:
+ BRANCH(ia64_os_mca_done_dump, r2, p0, -0x10)
+ ;;
+
+//EndStub//////////////////////////////////////////////////////////////////////
+
+
+//++
+// Name:
+// ia64_os_mca_proc_state_restore()
+//
+// Stub Description:
+//
+// This is a stub to restore the saved processor state during MCHK
+//
+//--
+
+ia64_os_mca_proc_state_restore:
+
+// Restore bank1 GR16-31
+ movl r2=ia64_mca_proc_state_dump // Convert virtual address
+ ;; // of OS state dump area
+ DATA_VA_TO_PA(r2) // to physical address
+ ;;
+restore_GRs: // restore bank-1 GRs 16-31
+ bsw.1;;
+ add r3=16*8,r2;; // to get to NaT of GR 16-31
+ ld8 r3=[r3];;
+ mov ar.unat=r3;; // first restore NaT
+
+ ld8.fill r16=[r2],8;;
+ ld8.fill r17=[r2],8;;
+ ld8.fill r18=[r2],8;;
+ ld8.fill r19=[r2],8;;
+ ld8.fill r20=[r2],8;;
+ ld8.fill r21=[r2],8;;
+ ld8.fill r22=[r2],8;;
+ ld8.fill r23=[r2],8;;
+ ld8.fill r24=[r2],8;;
+ ld8.fill r25=[r2],8;;
+ ld8.fill r26=[r2],8;;
+ ld8.fill r27=[r2],8;;
+ ld8.fill r28=[r2],8;;
+ ld8.fill r29=[r2],8;;
+ ld8.fill r30=[r2],8;;
+ ld8.fill r31=[r2],8;;
+
+ ld8 r3=[r2],8;; // increment to skip NaT
+ bsw.0;;
+
+restore_BRs:
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov b0=r3
+ mov b1=r5
+ mov b2=r7;;
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov b3=r3
+ mov b4=r5
+ mov b5=r7;;
+
+ ld8 r3=[r2],2*8
+ ld8 r5=[r4],2*8;;
+ mov b6=r3
+ mov b7=r5;;
+
+restore_CRs:
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],8*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;; // 48 byte increments
+ mov cr0=r3 // cr.dcr
+ mov cr1=r5 // cr.itm
+ mov cr2=r7;; // cr.iva
+
+ ld8 r3=[r2],8*8;; // 64 byte increments
+// mov cr8=r3 // cr.pta
+
+
+// if PSR.ic=1, reading interruption registers causes an illegal operation fault
+ mov r3=psr;;
+ tbit.nz.unc p2,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test
+(p2) st8 [r2]=r0,9*8+160 // increment by 160 byte inc.
+
+begin_rskip_intr_regs:
+ BRANCH(rSkipIntrRegs, r9, p2, 0x0)
+ ;;
+
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov cr16=r3 // cr.ipsr
+ mov cr17=r5 // cr.isr is read only
+// mov cr18=r7;; // cr.ida
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov cr19=r3 // cr.iip
+ mov cr20=r5 // cr.idtr
+ mov cr21=r7;; // cr.iitr
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov cr22=r3 // cr.iipa
+ mov cr23=r5 // cr.ifs
+ mov cr24=r7 // cr.iim
+
+ ld8 r3=[r2],160;; // 160 byte increment
+ mov cr25=r3 // cr.iha
+
+rSkipIntrRegs:
+ ld8 r3=[r2],168;; // another 168 byte inc.
+
+ ld8 r3=[r2],40;; // 40 byte increment
+ mov cr66=r3 // cr.lid
+
+ ld8 r3=[r2],8;;
+// mov cr71=r3 // cr.ivr is read only
+ ld8 r3=[r2],24;; // 24 byte increment
+ mov cr72=r3 // cr.tpr
+
+ ld8 r3=[r2],168;; // 168 byte inc.
+// mov cr75=r3 // cr.eoi
+
+ ld8 r3=[r2],16;; // 16 byte inc.
+// mov cr96=r3 // cr.irr0 is read only
+
+ ld8 r3=[r2],16;; // 16 byte inc.
+// mov cr98=r3 // cr.irr1 is read only
+
+ ld8 r3=[r2],16;; // 16 byte inc
+// mov cr100=r3 // cr.irr2 is read only
+
+ ld8 r3=[r2],16;; // 16b inc.
+// mov cr102=r3 // cr.irr3 is read only
+
+ ld8 r3=[r2],16;; // 16 byte inc.
+// mov cr114=r3 // cr.itv
+
+ ld8 r3=[r2],8;;
+// mov cr116=r3 // cr.pmv
+ ld8 r3=[r2],8;;
+// mov cr117=r3 // cr.lrr0
+ ld8 r3=[r2],8;;
+// mov cr118=r3 // cr.lrr1
+ ld8 r3=[r2],8*10;;
+// mov cr119=r3 // cr.cmcv
+
+restore_ARs:
+ add r4=8,r2 // duplicate r2 in r4
+ add r6=2*8,r2;; // duplicate r2 in r4
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov ar0=r3 // ar.kro
+ mov ar1=r5 // ar.kr1
+ mov ar2=r7;; // ar.kr2
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+ mov ar3=r3 // ar.kr3
+ mov ar4=r5 // ar.kr4
+ mov ar5=r7;; // ar.kr5
+
+ ld8 r3=[r2],10*8
+ ld8 r5=[r4],10*8
+ ld8 r7=[r6],10*8;;
+ mov ar6=r3 // ar.kr6
+ mov ar7=r5 // ar.kr7
+// mov ar8=r6 // ar.kr8
+ ;;
+
+ ld8 r3=[r2],3*8
+ ld8 r5=[r4],3*8
+ ld8 r7=[r6],3*8;;
+// mov ar16=r3 // ar.rsc
+// mov ar17=r5 // ar.bsp is read only
+ mov ar16=r0 // make sure that RSE is in enforced lazy mode
+ mov ar18=r7;; // ar.bspstore
+
+ ld8 r9=[r2],8*13;;
+ mov ar19=r9 // ar.rnat
+
+ mov ar16=r3 // ar.rsc
+ ld8 r3=[r2],8*4;;
+ mov ar32=r3 // ar.ccv
+
+ ld8 r3=[r2],8*4;;
+ mov ar36=r3 // ar.unat
+
+ ld8 r3=[r2],8*4;;
+ mov ar40=r3 // ar.fpsr
+
+ ld8 r3=[r2],160;; // 160
+// mov ar44=r3 // ar.itc
+
+ ld8 r3=[r2],8;;
+ mov ar64=r3 // ar.pfs
+
+ ld8 r3=[r2],8;;
+ mov ar65=r3 // ar.lc
+
+ ld8 r3=[r2];;
+ mov ar66=r3 // ar.ec
+ add r2=8*62,r2;; // padding
+
+restore_RRs:
+ mov r5=ar.lc
+ mov ar.lc=0x08-1
+ movl r4=0x00
+cStRRr:
+ ld8 r3=[r2],8;;
+// mov rr[r4]=r3 // what are its access previledges?
+ add r4=1,r4
+ br.cloop.sptk.few cStRRr
+ ;;
+ mov ar.lc=r5
+ ;;
+end_os_mca_restore:
+ BRANCH(ia64_os_mca_done_restore, r2, p0, -0x20)
+ ;;
+//EndStub//////////////////////////////////////////////////////////////////////
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
new file mode 100644
index 000000000..1506bacc2
--- /dev/null
+++ b/arch/ia64/kernel/pal.S
@@ -0,0 +1,119 @@
+/*
+ * PAL Firmware support
+ * IA-64 Processor Programmers Reference Vol 2
+ *
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com>
+ */
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .data
+pal_entry_point:
+ data8 ia64_pal_default_handler
+ .text
+
+/*
+ * Set the PAL entry point address. This could be written in C code, but we do it here
+ * to keep it all in one module (besides, it's so trivial that it's
+ * not a big deal).
+ *
+ * in0 Address of the PAL entry point (text address, NOT a function descriptor).
+ */
+ .align 16
+ .global ia64_pal_handler_init
+ .proc ia64_pal_handler_init
+ia64_pal_handler_init:
+ alloc r3=ar.pfs,1,0,0,0
+ movl r2=pal_entry_point
+ ;;
+ st8 [r2]=in0
+ br.ret.sptk.few rp
+
+ .endp ia64_pal_handler_init
+
+/*
+ * Default PAL call handler. This needs to be coded in assembly because it uses
+ * the static calling convention, i.e., the RSE may not be used and calls are
+ * done via "br.cond" (not "br.call").
+ */
+ .align 16
+ .global ia64_pal_default_handler
+ .proc ia64_pal_default_handler
+ia64_pal_default_handler:
+ mov r8=-1
+ br.cond.sptk.few rp
+
+/*
+ * Make a PAL call using the static calling convention.
+ *
+ * in0 Pointer to struct ia64_pal_retval
+ * in1 Index of PAL service
+ * in2 - in4 Remaning PAL arguments
+ *
+ */
+
+#ifdef __GCC_MULTIREG_RETVALS__
+# define arg0 in0
+# define arg1 in1
+# define arg2 in2
+# define arg3 in3
+# define arg4 in4
+#else
+# define arg0 in1
+# define arg1 in2
+# define arg2 in3
+# define arg3 in4
+# define arg4 in5
+#endif
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 16
+ .global ia64_pal_call_static
+ .proc ia64_pal_call_static
+ia64_pal_call_static:
+ alloc loc0 = ar.pfs,6,90,0,0
+ movl loc2 = pal_entry_point
+1: {
+ mov r28 = arg0
+ mov r29 = arg1
+ mov r8 = ip
+ }
+ ;;
+ ld8 loc2 = [loc2] // loc2 <- entry point
+ mov r30 = arg2
+ mov r31 = arg3
+ ;;
+ mov loc3 = psr
+ mov loc1 = rp
+ adds r8 = .ret0-1b,r8
+ ;;
+ rsm psr.i
+ mov b7 = loc2
+ mov rp = r8
+ ;;
+ br.cond.sptk.few b7
+.ret0: mov psr.l = loc3
+#ifndef __GCC_MULTIREG_RETVALS__
+ st8 [in0] = r8, 8
+ ;;
+ st8 [in0] = r9, 8
+ ;;
+ st8 [in0] = r10, 8
+ ;;
+ st8 [in0] = r11, 8
+#endif
+ mov ar.pfs = loc0
+ mov rp = loc1
+ ;;
+ srlz.d // seralize restoration of psr.l
+ br.ret.sptk.few b0
+ .endp ia64_pal_call_static
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
new file mode 100644
index 000000000..f86f45537
--- /dev/null
+++ b/arch/ia64/kernel/pci-dma.c
@@ -0,0 +1,56 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is for IA-64 platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ *
+ * XXX This doesn't do the right thing yet. It appears we would have
+ * to add additional zones so we can implement the various address
+ * mask constraints that we might encounter. A zone for memory < 32
+ * bits is obviously necessary...
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+
+#include <asm/io.h>
+
+/* Pure 2^n version of get_order */
+extern __inline__ unsigned long
+get_order (unsigned long size)
+{
+ unsigned long order = ia64_fls(size);
+
+ printk ("get_order: size=%lu, order=%lu\n", size, order);
+
+ if (order > PAGE_SHIFT)
+ order -= PAGE_SHIFT;
+ else
+ order = 0;
+ return order;
+}
+
+void *
+pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
+{
+ void *ret;
+ int gfp = GFP_ATOMIC;
+
+ if (!hwdev || hwdev->dma_mask != 0xffffffff)
+ gfp |= GFP_DMA;
+ ret = (void *)__get_free_pages(gfp, get_order(size));
+
+ if (ret) {
+ memset(ret, 0, size);
+ *dma_handle = virt_to_bus(ret);
+ }
+ return ret;
+}
+
+void
+pci_free_consistent (struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
+{
+ free_pages((unsigned long) vaddr, get_order(size));
+}
diff --git a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c
new file mode 100644
index 000000000..3bceeed8e
--- /dev/null
+++ b/arch/ia64/kernel/pci.c
@@ -0,0 +1,239 @@
+/*
+ * pci.c - Low-Level PCI Access in IA64
+ *
+ * Derived from bios32.c of i386 tree.
+ *
+ */
+
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/malloc.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+
+#include <asm/machvec.h>
+#include <asm/page.h>
+#include <asm/segment.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
+#include <asm/sal.h>
+
+
+#ifdef CONFIG_SMP
+# include <asm/smp.h>
+#endif
+#include <asm/irq.h>
+
+
+#undef DEBUG
+#define DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+/*
+ * This interrupt-safe spinlock protects all accesses to PCI
+ * configuration space.
+ */
+
+spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
+
+struct pci_fixup pcibios_fixups[] = { { 0 } };
+
+#define PCI_NO_CHECKS 0x400
+#define PCI_NO_PEER_FIXUP 0x800
+
+static unsigned int pci_probe = PCI_NO_CHECKS;
+
+/* Macro to build a PCI configuration address to be passed as a parameter to SAL. */
+
+#define PCI_CONFIG_ADDRESS(dev, where) (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff))
+
+static int
+pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+ s64 status;
+ u64 lval;
+
+ status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 1, &lval);
+ *value = lval;
+ return status;
+}
+
+static int
+pci_conf_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+ s64 status;
+ u64 lval;
+
+ status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 2, &lval);
+ *value = lval;
+ return status;
+}
+
+static int
+pci_conf_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+ s64 status;
+ u64 lval;
+
+ status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 4, &lval);
+ *value = lval;
+ return status;
+}
+
+static int
+pci_conf_write_config_byte (struct pci_dev *dev, int where, u8 value)
+{
+ return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 1, value);
+}
+
+static int
+pci_conf_write_config_word (struct pci_dev *dev, int where, u16 value)
+{
+ return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 2, value);
+}
+
+static int
+pci_conf_write_config_dword (struct pci_dev *dev, int where, u32 value)
+{
+ return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 4, value);
+}
+
+
+static struct pci_ops pci_conf = {
+ pci_conf_read_config_byte,
+ pci_conf_read_config_word,
+ pci_conf_read_config_dword,
+ pci_conf_write_config_byte,
+ pci_conf_write_config_word,
+ pci_conf_write_config_dword
+};
+
+/*
+ * Try to find PCI BIOS. This will always work for IA64.
+ */
+
+static struct pci_ops * __init
+pci_find_bios(void)
+{
+ return &pci_conf;
+}
+
+/*
+ * Initialization. Uses the SAL interface
+ */
+
+#define PCI_BUSSES_TO_SCAN 2 /* On "real" ;) hardware this will be 255 */
+
+void __init
+pcibios_init(void)
+{
+ struct pci_ops *ops = NULL;
+ int i;
+
+ if ((ops = pci_find_bios()) == NULL) {
+ printk("PCI: No PCI bus detected\n");
+ return;
+ }
+
+ printk("PCI: Probing PCI hardware\n");
+ for (i = 0; i < PCI_BUSSES_TO_SCAN; i++)
+ pci_scan_bus(i, ops, NULL);
+ platform_pci_fixup();
+ return;
+}
+
+/*
+ * Called after each bus is probed, but before its children
+ * are examined.
+ */
+
+void __init
+pcibios_fixup_bus(struct pci_bus *b)
+{
+ return;
+}
+
+int
+pci_assign_resource (struct pci_dev *dev, int i)
+{
+ printk("pci_assign_resource: not implemented!\n");
+ return -ENODEV;
+}
+
+void __init
+pcibios_update_resource(struct pci_dev *dev, struct resource *root,
+ struct resource *res, int resource)
+{
+ unsigned long where, size;
+ u32 reg;
+
+ where = PCI_BASE_ADDRESS_0 + (resource * 4);
+ size = res->end - res->start;
+ pci_read_config_dword(dev, where, &reg);
+ reg = (reg & size) | (((u32)(res->start - root->start)) & ~size);
+ pci_write_config_dword(dev, where, reg);
+
+ /* ??? FIXME -- record old value for shutdown. */
+}
+
+void __init
+pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+
+ /* ??? FIXME -- record old value for shutdown. */
+}
+
+void __init
+pcibios_fixup_pbus_ranges (struct pci_bus * bus, struct pbus_set_ranges_data * ranges)
+{
+ ranges->io_start -= bus->resource[0]->start;
+ ranges->io_end -= bus->resource[0]->start;
+ ranges->mem_start -= bus->resource[1]->start;
+ ranges->mem_end -= bus->resource[1]->start;
+}
+
+int __init
+pcibios_enable_device (struct pci_dev *dev)
+{
+ /* Not needed, since we enable all devices at startup. */
+ return 0;
+}
+
+/*
+ * PCI BIOS setup, always defaults to SAL interface
+ */
+
+char * __init
+pcibios_setup(char *str)
+{
+ pci_probe = PCI_NO_CHECKS;
+ return NULL;
+}
+
+void
+pcibios_align_resource (void *data, struct resource *res, unsigned long size)
+{
+}
+
+#if 0 /*def CONFIG_PROC_FS*/
+/*
+ * This is an ugly hack to get a (weak) unresolved reference to something that is
+ * in drivers/pci/proc.c. Without this, the file does not get linked in at all
+ * (I suspect the reason this isn't needed on Linux/x86 is that most people compile
+ * with module support, in which case the EXPORT_SYMBOL() stuff will ensure the
+ * code gets linked in. Sigh... --davidm 99/12/20.
+ */
+asm ("data8 proc_bus_pci_add");
+#endif
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
new file mode 100644
index 000000000..274b68a73
--- /dev/null
+++ b/arch/ia64/kernel/perfmon.c
@@ -0,0 +1,227 @@
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+
+#include <asm/errno.h>
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_PERFMON
+
+#define MAX_PERF_COUNTER 4 /* true for Itanium, at least */
+#define WRITE_PMCS_AND_START 0xa0
+#define WRITE_PMCS 0xa1
+#define READ_PMDS 0xa2
+#define STOP_PMCS 0xa3
+#define IA64_COUNTER_MASK 0xffffffffffffff6f
+#define PERF_OVFL_VAL 0xffffffff
+
+struct perfmon_counter {
+ unsigned long data;
+ int counter_num;
+};
+
+unsigned long pmds[MAX_PERF_COUNTER];
+struct task_struct *perf_owner;
+
+/*
+ * We set dcr.pp, psr.pp, and the appropriate pmc control values with
+ * this. Notice that we go about modifying _each_ task's pt_regs to
+ * set cr_ipsr.pp. This will start counting when "current" does an
+ * _rfi_. Also, since each task's cr_ipsr.pp, and cr_ipsr is inherited
+ * across forks, we do _not_ need additional code on context
+ * switches. On stopping of the counters we dont _need_ to go about
+ * changing every task's cr_ipsr back to where it wuz, because we can
+ * just set pmc[0]=1. But we do it anyways becuase we will probably
+ * add thread specific accounting later.
+ *
+ * The obvious problem with this is that on SMP systems, it is a bit
+ * of work (when someone wants to do it) - it would be easier if we
+ * just added code to the context-switch path. I think we would need
+ * to lock the run queue to ensure no context switches, send an IPI to
+ * each processor, and in that IPI handler, just modify the psr bit of
+ * only the _current_ thread, since we have modified the psr bit
+ * correctly in the kernel stack for every process which is not
+ * running. Might crash on SMP systems without the
+ * lock_kernel(). Hence the lock..
+ */
+asmlinkage unsigned long
+sys_perfmonctl (int cmd1, int cmd2, void *ptr)
+{
+ struct perfmon_counter tmp, *cptr = ptr;
+ unsigned long pmd, cnum, dcr, flags;
+ struct task_struct *p;
+ struct pt_regs *regs;
+ struct perf_counter;
+ int i;
+
+ switch (cmd1) {
+ case WRITE_PMCS: /* Writes to PMC's and clears PMDs */
+ case WRITE_PMCS_AND_START: /* Also starts counting */
+
+ if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2))
+ return -EFAULT;
+
+ if (cmd2 >= MAX_PERF_COUNTER)
+ return -EFAULT;
+
+ if (perf_owner && perf_owner != current)
+ return -EBUSY;
+ perf_owner = current;
+
+ for (i = 0; i < cmd2; i++, cptr++) {
+ copy_from_user(&tmp, cptr, sizeof(tmp));
+ /* XXX need to check validity of counter_num and perhaps data!! */
+ ia64_set_pmc(tmp.counter_num, tmp.data);
+ ia64_set_pmd(tmp.counter_num, 0);
+ pmds[tmp.counter_num - 4] = 0;
+ }
+
+ if (cmd1 == WRITE_PMCS_AND_START) {
+ local_irq_save(flags);
+ dcr = ia64_get_dcr();
+ dcr |= IA64_DCR_PP;
+ ia64_set_dcr(dcr);
+ local_irq_restore(flags);
+
+ /*
+ * This is a no can do. It obviously wouldn't
+ * work on SMP where another process may not
+ * be blocked at all.
+ *
+ * Perhaps we need a global predicate in the
+ * leave_kernel path to control if pp should
+ * be on or off?
+ */
+ lock_kernel();
+ for_each_task(p) {
+ regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1;
+ ia64_psr(regs)->pp = 1;
+ }
+ unlock_kernel();
+ ia64_set_pmc(0, 0);
+ }
+ break;
+
+ case READ_PMDS:
+ if (cmd2 >= MAX_PERF_COUNTER)
+ return -EFAULT;
+ if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perf_counter)*cmd2))
+ return -EFAULT;
+ local_irq_save(flags);
+ /* XXX this looks wrong */
+ __asm__ __volatile__("rsm psr.pp\n");
+ dcr = ia64_get_dcr();
+ dcr &= ~IA64_DCR_PP;
+ ia64_set_dcr(dcr);
+ local_irq_restore(flags);
+
+ /*
+ * We cannot touch pmc[0] to stop counting here, as
+ * that particular instruction might cause an overflow
+ * and the mask in pmc[0] might get lost. I'm not very
+ * sure of the hardware behavior here. So we stop
+ * counting by psr.pp = 0. And we reset dcr.pp to
+ * prevent an interrupt from mucking up psr.pp in the
+ * meanwhile. Perfmon interrupts are pended, hence the
+ * above code should be ok if one of the above
+ * instructions cause overflows. Is this ok? When I
+ * muck with dcr, is the cli/sti needed??
+ */
+ for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; i++, cnum++, cptr++) {
+ pmd = pmds[i] + (ia64_get_pmd(cnum) & PERF_OVFL_VAL);
+ put_user(pmd, &cptr->data);
+ }
+ local_irq_save(flags);
+ /* XXX this looks wrong */
+ __asm__ __volatile__("ssm psr.pp");
+ dcr = ia64_get_dcr();
+ dcr |= IA64_DCR_PP;
+ ia64_set_dcr(dcr);
+ local_irq_restore(flags);
+ break;
+
+ case STOP_PMCS:
+ ia64_set_pmc(0, 1);
+ for (i = 0; i < MAX_PERF_COUNTER; ++i)
+ ia64_set_pmc(i, 0);
+
+ local_irq_save(flags);
+ dcr = ia64_get_dcr();
+ dcr &= ~IA64_DCR_PP;
+ ia64_set_dcr(dcr);
+ local_irq_restore(flags);
+ /*
+ * This is a no can do. It obviously wouldn't
+ * work on SMP where another process may not
+ * be blocked at all.
+ *
+ * Perhaps we need a global predicate in the
+ * leave_kernel path to control if pp should
+ * be on or off?
+ */
+ lock_kernel();
+ for_each_task(p) {
+ regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1;
+ ia64_psr(regs)->pp = 0;
+ }
+ unlock_kernel();
+ perf_owner = 0;
+ break;
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+static inline void
+update_counters (void)
+{
+ unsigned long mask, i, cnum, val;
+
+ mask = ia64_get_pmd(0) >> 4;
+ for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; cnum++, i++, mask >>= 1) {
+ if (mask & 0x1)
+ val = PERF_OVFL_VAL;
+ else
+ /* since we got an interrupt, might as well clear every pmd. */
+ val = ia64_get_pmd(cnum) & PERF_OVFL_VAL;
+ pmds[i] += val;
+ ia64_set_pmd(cnum, 0);
+ }
+}
+
+static void
+perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
+{
+ update_counters();
+ ia64_set_pmc(0, 0);
+ ia64_srlz_d();
+}
+
+void
+perfmon_init (void)
+{
+ if (request_irq(PERFMON_IRQ, perfmon_interrupt, 0, "perfmon", NULL)) {
+ printk("perfmon_init: could not allocate performance monitor vector %u\n",
+ PERFMON_IRQ);
+ return;
+ }
+ ia64_set_pmv(PERFMON_IRQ);
+ ia64_srlz_d();
+}
+
+#else /* !CONFIG_PERFMON */
+
+asmlinkage unsigned long
+sys_perfmonctl (int cmd1, int cmd2, void *ptr)
+{
+ return -ENOSYS;
+}
+
+#endif /* !CONFIG_PERFMON */
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
new file mode 100644
index 000000000..5b6deb5f5
--- /dev/null
+++ b/arch/ia64/kernel/process.c
@@ -0,0 +1,421 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */
+#include <linux/config.h>
+
+#include <linux/pm.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/delay.h>
+#include <asm/efi.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/uaccess.h>
+#include <asm/user.h>
+
+
+void
+show_regs (struct pt_regs *regs)
+{
+ unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
+
+ printk("\npsr : %016lx ifs : %016lx ip : [<%016lx>]\n",
+ regs->cr_ipsr, regs->cr_ifs, ip);
+ printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
+ regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
+ printk("rnat: %016lx bsps: %016lx pr : %016lx\n",
+ regs->ar_rnat, regs->ar_bspstore, regs->pr);
+ printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
+ regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
+ printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7);
+ printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
+ regs->f6.u.bits[1], regs->f6.u.bits[0],
+ regs->f7.u.bits[1], regs->f7.u.bits[0]);
+ printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
+ regs->f8.u.bits[1], regs->f8.u.bits[0],
+ regs->f9.u.bits[1], regs->f9.u.bits[0]);
+
+ printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3);
+ printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10);
+ printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13);
+ printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16);
+ printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19);
+ printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22);
+ printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25);
+ printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28);
+ printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31);
+
+ /* print the stacked registers if cr.ifs is valid: */
+ if (regs->cr_ifs & 0x8000000000000000) {
+ unsigned long val, sof, *bsp, ndirty;
+ int i, is_nat = 0;
+
+ sof = regs->cr_ifs & 0x7f; /* size of frame */
+ ndirty = (regs->loadrs >> 19);
+ bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty);
+ for (i = 0; i < sof; ++i) {
+ get_user(val, ia64_rse_skip_regs(bsp, i));
+ printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val,
+ ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
+ }
+ }
+}
+
+void __attribute__((noreturn))
+cpu_idle (void *unused)
+{
+ /* endless idle loop with no priority at all */
+ init_idle();
+ current->priority = 0;
+ current->counter = -100;
+
+#ifdef CONFIG_SMP
+ if (!current->need_resched)
+ min_xtp();
+#endif
+
+ while (1) {
+ while (!current->need_resched) {
+ continue;
+ }
+#ifdef CONFIG_SMP
+ normal_xtp();
+#endif
+ schedule();
+ check_pgt_cache();
+ if (pm_idle)
+ (*pm_idle)();
+ }
+}
+
+/*
+ * Copy the state of an ia-64 thread.
+ *
+ * We get here through the following call chain:
+ *
+ * <clone syscall>
+ * sys_clone
+ * do_fork
+ * copy_thread
+ *
+ * This means that the stack layout is as follows:
+ *
+ * +---------------------+ (highest addr)
+ * | struct pt_regs |
+ * +---------------------+
+ * | struct switch_stack |
+ * +---------------------+
+ * | |
+ * | memory stack |
+ * | | <-- sp (lowest addr)
+ * +---------------------+
+ *
+ * Note: if we get called through kernel_thread() then the memory
+ * above "(highest addr)" is valid kernel stack memory that needs to
+ * be copied as well.
+ *
+ * Observe that we copy the unat values that are in pt_regs and
+ * switch_stack. Since the interpretation of unat is dependent upon
+ * the address to which the registers got spilled, doing this is valid
+ * only as long as we preserve the alignment of the stack. Since the
+ * stack is always page aligned, we know this is the case.
+ *
+ * XXX Actually, the above isn't true when we create kernel_threads().
+ * If we ever needs to create kernel_threads() that preserve the unat
+ * values we'll need to fix this. Perhaps an easy workaround would be
+ * to always clear the unat bits in the child thread.
+ */
+int
+copy_thread (int nr, unsigned long clone_flags, unsigned long usp,
+ struct task_struct *p, struct pt_regs *regs)
+{
+ unsigned long rbs, child_rbs, rbs_size, stack_offset, stack_top, stack_used;
+ struct switch_stack *child_stack, *stack;
+ extern char ia64_ret_from_syscall_clear_r8;
+ extern char ia64_strace_clear_r8;
+ struct pt_regs *child_ptregs;
+
+#ifdef CONFIG_SMP
+ /*
+ * For SMP idle threads, fork_by_hand() calls do_fork with
+ * NULL regs.
+ */
+ if (!regs)
+ return 0;
+#endif
+
+ stack_top = (unsigned long) current + IA64_STK_OFFSET;
+ stack = ((struct switch_stack *) regs) - 1;
+ stack_used = stack_top - (unsigned long) stack;
+ stack_offset = IA64_STK_OFFSET - stack_used;
+
+ child_stack = (struct switch_stack *) ((unsigned long) p + stack_offset);
+ child_ptregs = (struct pt_regs *) (child_stack + 1);
+
+ /* copy parent's switch_stack & pt_regs to child: */
+ memcpy(child_stack, stack, stack_used);
+
+ rbs = (unsigned long) current + IA64_RBS_OFFSET;
+ child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
+ rbs_size = stack->ar_bspstore - rbs;
+
+ /* copy the parent's register backing store to the child: */
+ memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+
+ child_ptregs->r8 = 0; /* child gets a zero return value */
+ if (user_mode(child_ptregs))
+ child_ptregs->r12 = usp; /* user stack pointer */
+ else {
+ /*
+ * Note: we simply preserve the relative position of
+ * the stack pointer here. There is no need to
+ * allocate a scratch area here, since that will have
+ * been taken care of by the caller of sys_clone()
+ * already.
+ */
+ child_ptregs->r12 = (unsigned long) (child_ptregs + 1); /* kernel sp */
+ child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */
+ }
+ if (p->flags & PF_TRACESYS)
+ child_stack->b0 = (unsigned long) &ia64_strace_clear_r8;
+ else
+ child_stack->b0 = (unsigned long) &ia64_ret_from_syscall_clear_r8;
+ child_stack->ar_bspstore = child_rbs + rbs_size;
+
+ /* copy the thread_struct: */
+ p->thread.ksp = (unsigned long) child_stack - 16;
+ /*
+ * NOTE: The calling convention considers all floating point
+ * registers in the high partition (fph) to be scratch. Since
+ * the only way to get to this point is through a system call,
+ * we know that the values in fph are all dead. Hence, there
+ * is no need to inherit the fph state from the parent to the
+ * child and all we have to do is to make sure that
+ * IA64_THREAD_FPH_VALID is cleared in the child.
+ *
+ * XXX We could push this optimization a bit further by
+ * clearing IA64_THREAD_FPH_VALID on ANY system call.
+ * However, it's not clear this is worth doing. Also, it
+ * would be a slight deviation from the normal Linux system
+ * call behavior where scratch registers are preserved across
+ * system calls (unless used by the system call itself).
+ *
+ * If we wanted to inherit the fph state from the parent to the
+ * child, we would have to do something along the lines of:
+ *
+ * if (ia64_get_fpu_owner() == current && ia64_psr(regs)->mfh) {
+ * p->thread.flags |= IA64_THREAD_FPH_VALID;
+ * ia64_save_fpu(&p->thread.fph);
+ * } else if (current->thread.flags & IA64_THREAD_FPH_VALID) {
+ * memcpy(p->thread.fph, current->thread.fph, sizeof(p->thread.fph));
+ * }
+ */
+ p->thread.flags = (current->thread.flags & ~IA64_THREAD_FPH_VALID);
+ return 0;
+}
+
+void
+ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
+{
+ struct switch_stack *sw = ((struct switch_stack *) pt) - 1;
+ unsigned long ar_ec, cfm, ar_bsp, ndirty, *krbs;
+
+ ar_ec = (sw->ar_pfs >> 52) & 0x3f;
+
+ cfm = pt->cr_ifs & ((1UL << 63) - 1);
+ if ((pt->cr_ifs & (1UL << 63)) == 0) {
+ /* if cr_ifs isn't valid, we got here through a syscall or a break */
+ cfm = sw->ar_pfs & ((1UL << 38) - 1);
+ }
+
+ krbs = (unsigned long *) current + IA64_RBS_OFFSET/8;
+ ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 16));
+ ar_bsp = (long) ia64_rse_skip_regs((long *) pt->ar_bspstore, ndirty);
+
+ /* r0-r31
+ * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+ * predicate registers (p0-p63)
+ * b0-b7
+ * ip cfm user-mask
+ * ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+ */
+ memset(dst, 0, sizeof (dst)); /* don't leak any "random" bits */
+
+ /* r0 is zero */ dst[ 1] = pt->r1; dst[ 2] = pt->r2; dst[ 3] = pt->r3;
+ dst[ 4] = sw->r4; dst[ 5] = sw->r5; dst[ 6] = sw->r6; dst[ 7] = sw->r7;
+ dst[ 8] = pt->r8; dst[ 9] = pt->r9; dst[10] = pt->r10; dst[11] = pt->r11;
+ dst[12] = pt->r12; dst[13] = pt->r13; dst[14] = pt->r14; dst[15] = pt->r15;
+ memcpy(dst + 16, &pt->r16, 16*8); /* r16-r31 are contiguous */
+
+ dst[32] = ia64_get_nat_bits(pt, sw);
+ dst[33] = pt->pr;
+
+ /* branch regs: */
+ dst[34] = pt->b0; dst[35] = sw->b1; dst[36] = sw->b2; dst[37] = sw->b3;
+ dst[38] = sw->b4; dst[39] = sw->b5; dst[40] = pt->b6; dst[41] = pt->b7;
+
+ dst[42] = pt->cr_iip; dst[43] = pt->cr_ifs;
+ dst[44] = pt->cr_ipsr; /* XXX perhaps we should filter out some bits here? --davidm */
+
+ dst[45] = pt->ar_rsc; dst[46] = ar_bsp; dst[47] = pt->ar_bspstore; dst[48] = pt->ar_rnat;
+ dst[49] = pt->ar_ccv; dst[50] = pt->ar_unat; dst[51] = sw->ar_fpsr; dst[52] = pt->ar_pfs;
+ dst[53] = sw->ar_lc; dst[54] = (sw->ar_pfs >> 52) & 0x3f;
+}
+
+int
+dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
+{
+ struct switch_stack *sw = ((struct switch_stack *) pt) - 1;
+ struct task_struct *fpu_owner = ia64_get_fpu_owner();
+
+ memset(dst, 0, sizeof (dst)); /* don't leak any "random" bits */
+
+ /* f0 is 0.0 */ /* f1 is 1.0 */ dst[2] = sw->f2; dst[3] = sw->f3;
+ dst[4] = sw->f4; dst[5] = sw->f5; dst[6] = pt->f6; dst[7] = pt->f7;
+ dst[8] = pt->f8; dst[9] = pt->f9;
+ memcpy(dst + 10, &sw->f10, 22*16); /* f10-f31 are contiguous */
+
+ if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) {
+ if (fpu_owner == current) {
+ __ia64_save_fpu(current->thread.fph);
+ }
+ memcpy(dst + 32, current->thread.fph, 96*16);
+ }
+ return 1; /* f0-f31 are always valid so we always return 1 */
+}
+
+asmlinkage long
+sys_execve (char *filename, char **argv, char **envp, struct pt_regs *regs)
+{
+ int error;
+
+ lock_kernel();
+ filename = getname(filename);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ goto out;
+ error = do_execve(filename, argv, envp, regs);
+ putname(filename);
+out:
+ unlock_kernel();
+ return error;
+}
+
+pid_t
+kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
+{
+ struct task_struct *parent = current;
+ int result;
+
+ clone(flags | CLONE_VM, 0);
+ if (parent != current) {
+ result = (*fn)(arg);
+ _exit(result);
+ }
+ return 0; /* parent: just return */
+}
+
+/*
+ * Flush thread state. This is called when a thread does an execve().
+ */
+void
+flush_thread (void)
+{
+ /* drop floating-point and debug-register state if it exists: */
+ current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
+
+ if (ia64_get_fpu_owner() == current) {
+ ia64_set_fpu_owner(0);
+ }
+}
+
+/*
+ * Clean up state associated with current thread. This is called when
+ * the thread calls exit().
+ */
+void
+exit_thread (void)
+{
+ if (ia64_get_fpu_owner() == current) {
+ ia64_set_fpu_owner(0);
+ }
+}
+
+/*
+ * Free remaining state associated with DEAD_TASK. This is called
+ * after the parent of DEAD_TASK has collected the exist status of the
+ * task via wait().
+ */
+void
+release_thread (struct task_struct *dead_task)
+{
+ /* nothing to do */
+}
+
+unsigned long
+get_wchan (struct task_struct *p)
+{
+ struct ia64_frame_info info;
+ unsigned long ip;
+ int count = 0;
+ /*
+ * These bracket the sleeping functions..
+ */
+ extern void scheduling_functions_start_here(void);
+ extern void scheduling_functions_end_here(void);
+# define first_sched ((unsigned long) scheduling_functions_start_here)
+# define last_sched ((unsigned long) scheduling_functions_end_here)
+
+ /*
+ * Note: p may not be a blocked task (it could be current or
+ * another process running on some other CPU. Rather than
+ * trying to determine if p is really blocked, we just assume
+ * it's blocked and rely on the unwind routines to fail
+ * gracefully if the process wasn't really blocked after all.
+ * --davidm 99/12/15
+ */
+ ia64_unwind_init_from_blocked_task(&info, p);
+ do {
+ if (ia64_unwind_to_previous_frame(&info) < 0)
+ return 0;
+ ip = ia64_unwind_get_ip(&info);
+ if (ip < first_sched || ip >= last_sched)
+ return ip;
+ } while (count++ < 16);
+ return 0;
+# undef first_sched
+# undef last_sched
+}
+
+void
+machine_restart (char *restart_cmd)
+{
+ (*efi.reset_system)(EFI_RESET_WARM, 0, 0, 0);
+}
+
+void
+machine_halt (void)
+{
+ printk("machine_halt: need PAL or ACPI version here!!\n");
+ machine_restart(0);
+}
+
+void
+machine_power_off (void)
+{
+ printk("machine_power_off: unimplemented (need ACPI version here)\n");
+ machine_halt ();
+}
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
new file mode 100644
index 000000000..18a8e342e
--- /dev/null
+++ b/arch/ia64/kernel/ptrace.c
@@ -0,0 +1,653 @@
+/*
+ * Kernel support for the ptrace() and syscall tracing interfaces.
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from the x86 and Alpha versions. Most of the code in here
+ * could actually be factored into a common set of routines.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/smp_lock.h>
+#include <linux/user.h>
+
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/ptrace_offsets.h>
+#include <asm/rse.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+/*
+ * Collect the NaT bits for r1-r31 from sw->caller_unat and
+ * sw->ar_unat and return a NaT bitset where bit i is set iff the NaT
+ * bit of register i is set.
+ */
+long
+ia64_get_nat_bits (struct pt_regs *pt, struct switch_stack *sw)
+{
+# define GET_BITS(str, first, last, unat) \
+ ({ \
+ unsigned long bit = ia64_unat_pos(&str->r##first); \
+ unsigned long mask = ((1UL << (last - first + 1)) - 1) << first; \
+ (ia64_rotl(unat, first) >> bit) & mask; \
+ })
+ unsigned long val;
+
+ val = GET_BITS(pt, 1, 3, sw->caller_unat);
+ val |= GET_BITS(pt, 12, 15, sw->caller_unat);
+ val |= GET_BITS(pt, 8, 11, sw->caller_unat);
+ val |= GET_BITS(pt, 16, 31, sw->caller_unat);
+ val |= GET_BITS(sw, 4, 7, sw->ar_unat);
+ return val;
+
+# undef GET_BITS
+}
+
+/*
+ * Store the NaT bitset NAT in pt->caller_unat and sw->ar_unat.
+ */
+void
+ia64_put_nat_bits (struct pt_regs *pt, struct switch_stack *sw, unsigned long nat)
+{
+# define PUT_BITS(str, first, last, nat) \
+ ({ \
+ unsigned long bit = ia64_unat_pos(&str->r##first); \
+ unsigned long mask = ((1UL << (last - first + 1)) - 1) << bit; \
+ (ia64_rotr(nat, first) << bit) & mask; \
+ })
+ sw->caller_unat = PUT_BITS(pt, 1, 3, nat);
+ sw->caller_unat |= PUT_BITS(pt, 12, 15, nat);
+ sw->caller_unat |= PUT_BITS(pt, 8, 11, nat);
+ sw->caller_unat |= PUT_BITS(pt, 16, 31, nat);
+ sw->ar_unat = PUT_BITS(sw, 4, 7, nat);
+
+# undef PUT_BITS
+}
+
+#define IA64_MLI_TEMPLATE 0x2
+#define IA64_MOVL_OPCODE 6
+
+void
+ia64_increment_ip (struct pt_regs *regs)
+{
+ unsigned long w0, w1, ri = ia64_psr(regs)->ri + 1;
+
+ if (ri > 2) {
+ ri = 0;
+ regs->cr_iip += 16;
+ } else if (ri == 2) {
+ get_user(w0, (char *) regs->cr_iip + 0);
+ get_user(w1, (char *) regs->cr_iip + 8);
+ if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) {
+ /*
+ * rfi'ing to slot 2 of an MLI bundle causes
+ * an illegal operation fault. We don't want
+ * that to happen... Note that we check the
+ * opcode only. "movl" has a vc bit of 0, but
+ * since a vc bit of 1 is currently reserved,
+ * we might just as well treat it like a movl.
+ */
+ ri = 0;
+ regs->cr_iip += 16;
+ }
+ }
+ ia64_psr(regs)->ri = ri;
+}
+
+void
+ia64_decrement_ip (struct pt_regs *regs)
+{
+ unsigned long w0, w1, ri = ia64_psr(regs)->ri - 1;
+
+ if (ia64_psr(regs)->ri == 0) {
+ regs->cr_iip -= 16;
+ ri = 2;
+ get_user(w0, (char *) regs->cr_iip + 0);
+ get_user(w1, (char *) regs->cr_iip + 8);
+ if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) {
+ /*
+ * rfi'ing to slot 2 of an MLI bundle causes
+ * an illegal operation fault. We don't want
+ * that to happen... Note that we check the
+ * opcode only. "movl" has a vc bit of 0, but
+ * since a vc bit of 1 is currently reserved,
+ * we might just as well treat it like a movl.
+ */
+ ri = 1;
+ }
+ }
+ ia64_psr(regs)->ri = ri;
+}
+
+/*
+ * This routine is used to read an rnat bits that are stored on the
+ * kernel backing store. Since, in general, the alignment of the user
+ * and kernel are different, this is not completely trivial. In
+ * essence, we need to construct the user RNAT based on up to two
+ * kernel RNAT values and/or the RNAT value saved in the child's
+ * pt_regs.
+ *
+ * user rbs
+ *
+ * +--------+ <-- lowest address
+ * | slot62 |
+ * +--------+
+ * | rnat | 0x....1f8
+ * +--------+
+ * | slot00 | \
+ * +--------+ |
+ * | slot01 | > child_regs->ar_rnat
+ * +--------+ |
+ * | slot02 | / kernel rbs
+ * +--------+ +--------+
+ * <- child_regs->ar_bspstore | slot61 | <-- krbs
+ * +- - - - + +--------+
+ * | slot62 |
+ * +- - - - + +--------+
+ * | rnat |
+ * +- - - - + +--------+
+ * vrnat | slot00 |
+ * +- - - - + +--------+
+ * = =
+ * +--------+
+ * | slot00 | \
+ * +--------+ |
+ * | slot01 | > child_stack->ar_rnat
+ * +--------+ |
+ * | slot02 | /
+ * +--------+
+ * <--- child_stack->ar_bspstore
+ *
+ * The way to think of this code is as follows: bit 0 in the user rnat
+ * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat
+ * value. The kernel rnat value holding this bit is stored in
+ * variable rnat0. rnat1 is loaded with the kernel rnat value that
+ * form the upper bits of the user rnat value.
+ *
+ * Boundary cases:
+ *
+ * o when reading the rnat "below" the first rnat slot on the kernel
+ * backing store, rnat0/rnat1 are set to 0 and the low order bits
+ * are merged in from pt->ar_rnat.
+ *
+ * o when reading the rnat "above" the last rnat slot on the kernel
+ * backing store, rnat0/rnat1 gets its value from sw->ar_rnat.
+ */
+static unsigned long
+get_rnat (struct pt_regs *pt, struct switch_stack *sw,
+ unsigned long *krbs, unsigned long *urnat_addr)
+{
+ unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr, kmask = ~0UL;
+ unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+ long num_regs;
+
+ kbsp = (unsigned long *) sw->ar_bspstore;
+ ubspstore = (unsigned long *) pt->ar_bspstore;
+ /*
+ * First, figure out which bit number slot 0 in user-land maps
+ * to in the kernel rnat. Do this by figuring out how many
+ * register slots we're beyond the user's backingstore and
+ * then computing the equivalent address in kernel space.
+ */
+ num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+ slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+ shift = ia64_rse_slot_num(slot0_kaddr);
+ rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+ rnat0_kaddr = rnat1_kaddr - 64;
+
+ if (ubspstore + 63 > urnat_addr) {
+ /* some bits need to be merged in from pt->ar_rnat */
+ kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1);
+ urnat = (pt->ar_rnat & ~kmask);
+ }
+ if (rnat0_kaddr >= kbsp) {
+ rnat0 = sw->ar_rnat;
+ } else if (rnat0_kaddr > krbs) {
+ rnat0 = *rnat0_kaddr;
+ }
+ if (rnat1_kaddr >= kbsp) {
+ rnat1 = sw->ar_rnat;
+ } else if (rnat1_kaddr > krbs) {
+ rnat1 = *rnat1_kaddr;
+ }
+ urnat |= ((rnat1 << (63 - shift)) | (rnat0 >> shift)) & kmask;
+ return urnat;
+}
+
+/*
+ * The reverse of get_rnat.
+ */
+static void
+put_rnat (struct pt_regs *pt, struct switch_stack *sw,
+ unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat)
+{
+ unsigned long rnat0 = 0, rnat1 = 0, rnat = 0, *slot0_kaddr, kmask = ~0UL, mask;
+ unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
+ long num_regs;
+
+ kbsp = (unsigned long *) sw->ar_bspstore;
+ ubspstore = (unsigned long *) pt->ar_bspstore;
+ /*
+ * First, figure out which bit number slot 0 in user-land maps
+ * to in the kernel rnat. Do this by figuring out how many
+ * register slots we're beyond the user's backingstore and
+ * then computing the equivalent address in kernel space.
+ */
+ num_regs = (long) ia64_rse_num_regs(ubspstore, urnat_addr + 1);
+ slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
+ shift = ia64_rse_slot_num(slot0_kaddr);
+ rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
+ rnat0_kaddr = rnat1_kaddr - 64;
+
+ if (ubspstore + 63 > urnat_addr) {
+ /* some bits need to be place in pt->ar_rnat: */
+ kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1);
+ pt->ar_rnat = (pt->ar_rnat & kmask) | (rnat & ~kmask);
+ }
+ /*
+ * Note: Section 11.1 of the EAS guarantees that bit 63 of an
+ * rnat slot is ignored. so we don't have to clear it here.
+ */
+ rnat0 = (urnat << shift);
+ mask = ~0UL << shift;
+ if (rnat0_kaddr >= kbsp) {
+ sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat0 & mask);
+ } else if (rnat0_kaddr > krbs) {
+ *rnat0_kaddr = ((*rnat0_kaddr & ~mask) | (rnat0 & mask));
+ }
+
+ rnat1 = (urnat >> (63 - shift));
+ mask = ~0UL >> (63 - shift);
+ if (rnat1_kaddr >= kbsp) {
+ sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat1 & mask);
+ } else if (rnat1_kaddr > krbs) {
+ *rnat1_kaddr = ((*rnat1_kaddr & ~mask) | (rnat1 & mask));
+ }
+}
+
+long
+ia64_peek (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long *val)
+{
+ unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr;
+ struct switch_stack *child_stack;
+ struct pt_regs *child_regs;
+ size_t copied;
+ long ret;
+
+ laddr = (unsigned long *) addr;
+ child_regs = ia64_task_regs(child);
+ child_stack = (struct switch_stack *) child_regs - 1;
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore);
+ rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs);
+ if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) {
+ /*
+ * Attempt to read the RBS in an area that's actually
+ * on the kernel RBS => read the corresponding bits in
+ * the kernel RBS.
+ */
+ if (ia64_rse_is_rnat_slot(laddr))
+ ret = get_rnat(child_regs, child_stack, krbs, laddr);
+ else {
+ regnum = ia64_rse_num_regs(bspstore, laddr);
+ laddr = ia64_rse_skip_regs(krbs, regnum);
+ if (regnum >= krbs_num_regs) {
+ ret = 0;
+ } else {
+ if ((unsigned long) laddr >= (unsigned long) high_memory) {
+ printk("yikes: trying to access long at %p\n", laddr);
+ return -EIO;
+ }
+ ret = *laddr;
+ }
+ }
+ } else {
+ copied = access_process_vm(child, addr, &ret, sizeof(ret), 0);
+ if (copied != sizeof(ret))
+ return -EIO;
+ }
+ *val = ret;
+ return 0;
+}
+
+long
+ia64_poke (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long val)
+{
+ unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr;
+ struct switch_stack *child_stack;
+ struct pt_regs *child_regs;
+
+ laddr = (unsigned long *) addr;
+ child_regs = ia64_task_regs(child);
+ child_stack = (struct switch_stack *) child_regs - 1;
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore);
+ rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs);
+ if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) {
+ /*
+ * Attempt to write the RBS in an area that's actually
+ * on the kernel RBS => write the corresponding bits
+ * in the kernel RBS.
+ */
+ if (ia64_rse_is_rnat_slot(laddr))
+ put_rnat(child_regs, child_stack, krbs, laddr, val);
+ else {
+ regnum = ia64_rse_num_regs(bspstore, laddr);
+ laddr = ia64_rse_skip_regs(krbs, regnum);
+ if (regnum < krbs_num_regs) {
+ *laddr = val;
+ }
+ }
+ } else if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val)) {
+ return -EIO;
+ }
+ return 0;
+}
+
+/*
+ * Ensure the state in child->thread.fph is up-to-date.
+ */
+static void
+sync_fph (struct task_struct *child)
+{
+ if (ia64_psr(ia64_task_regs(child))->mfh && ia64_get_fpu_owner() == child) {
+ ia64_save_fpu(&child->thread.fph[0]);
+ child->thread.flags |= IA64_THREAD_FPH_VALID;
+ }
+ if (!(child->thread.flags & IA64_THREAD_FPH_VALID)) {
+ memset(&child->thread.fph, 0, sizeof(child->thread.fph));
+ child->thread.flags |= IA64_THREAD_FPH_VALID;
+ }
+}
+
+asmlinkage long
+sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data,
+ long arg4, long arg5, long arg6, long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ struct switch_stack *child_stack;
+ struct pt_regs *child_regs;
+ struct task_struct *child;
+ unsigned long flags, *base;
+ long ret, regnum;
+
+ lock_kernel();
+ ret = -EPERM;
+ if (request == PTRACE_TRACEME) {
+ /* are we already being traced? */
+ if (current->flags & PF_PTRACED)
+ goto out;
+ current->flags |= PF_PTRACED;
+ ret = 0;
+ goto out;
+ }
+
+ ret = -ESRCH;
+ read_lock(&tasklist_lock);
+ child = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock);
+ if (!child)
+ goto out;
+ ret = -EPERM;
+ if (pid == 1) /* no messing around with init! */
+ goto out;
+
+ if (request == PTRACE_ATTACH) {
+ if (child == current)
+ goto out;
+ if ((!child->dumpable ||
+ (current->uid != child->euid) ||
+ (current->uid != child->suid) ||
+ (current->uid != child->uid) ||
+ (current->gid != child->egid) ||
+ (current->gid != child->sgid) ||
+ (!cap_issubset(child->cap_permitted, current->cap_permitted)) ||
+ (current->gid != child->gid)) && !capable(CAP_SYS_PTRACE))
+ goto out;
+ /* the same process cannot be attached many times */
+ if (child->flags & PF_PTRACED)
+ goto out;
+ child->flags |= PF_PTRACED;
+ if (child->p_pptr != current) {
+ unsigned long flags;
+
+ write_lock_irqsave(&tasklist_lock, flags);
+ REMOVE_LINKS(child);
+ child->p_pptr = current;
+ SET_LINKS(child);
+ write_unlock_irqrestore(&tasklist_lock, flags);
+ }
+ send_sig(SIGSTOP, child, 1);
+ ret = 0;
+ goto out;
+ }
+ ret = -ESRCH;
+ if (!(child->flags & PF_PTRACED))
+ goto out;
+ if (child->state != TASK_STOPPED) {
+ if (request != PTRACE_KILL)
+ goto out;
+ }
+ if (child->p_pptr != current)
+ goto out;
+
+ switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA: /* read word at location addr */
+ ret = ia64_peek(regs, child, addr, &data);
+ if (ret == 0) {
+ ret = data;
+ regs->r8 = 0; /* ensure "ret" is not mistaken as an error code */
+ }
+ goto out;
+
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA: /* write the word at location addr */
+ ret = ia64_poke(regs, child, addr, data);
+ goto out;
+
+ case PTRACE_PEEKUSR: /* read the word at addr in the USER area */
+ ret = -EIO;
+ if ((addr & 0x7) != 0)
+ goto out;
+
+ if (addr < PT_CALLER_UNAT) {
+ /* accessing fph */
+ sync_fph(child);
+ addr += (unsigned long) &child->thread.fph;
+ ret = *(unsigned long *) addr;
+ } else if (addr < PT_F9+16) {
+ /* accessing switch_stack or pt_regs: */
+ child_regs = ia64_task_regs(child);
+ child_stack = (struct switch_stack *) child_regs - 1;
+ ret = *(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT);
+
+ if (addr == PT_AR_BSP) {
+ /* ret currently contains pt_regs.loadrs */
+ unsigned long *rbs, *bspstore, ndirty;
+
+ rbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ ndirty = ia64_rse_num_regs(rbs, rbs + (ret >> 19));
+ ret = (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
+ }
+ } else {
+ if (addr >= PT_IBR) {
+ regnum = (addr - PT_IBR) >> 3;
+ base = &child->thread.ibr[0];
+ } else {
+ regnum = (addr - PT_DBR) >> 3;
+ base = &child->thread.dbr[0];
+ }
+ if (regnum >= 8)
+ goto out;
+ data = base[regnum];
+ }
+ regs->r8 = 0; /* ensure "ret" is not mistaken as an error code */
+ goto out;
+
+ case PTRACE_POKEUSR: /* write the word at addr in the USER area */
+ ret = -EIO;
+ if ((addr & 0x7) != 0)
+ goto out;
+
+ if (addr < PT_CALLER_UNAT) {
+ /* accessing fph */
+ sync_fph(child);
+ addr += (unsigned long) &child->thread.fph;
+ *(unsigned long *) addr = data;
+ if (ret < 0)
+ goto out;
+ } else if (addr < PT_F9+16) {
+ /* accessing switch_stack or pt_regs */
+ child_regs = ia64_task_regs(child);
+ child_stack = (struct switch_stack *) child_regs - 1;
+
+ if (addr == PT_AR_BSP) {
+ /* compute the loadrs value based on bsp and bspstore: */
+ unsigned long *rbs, *bspstore, ndirty, *kbsp;
+
+ bspstore = (unsigned long *) child_regs->ar_bspstore;
+ ndirty = ia64_rse_num_regs(bspstore, (unsigned long *) data);
+ rbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
+ kbsp = ia64_rse_skip_regs(rbs, ndirty);
+ data = (kbsp - rbs) << 19;
+ }
+ *(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT) = data;
+ } else {
+ if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+ child->thread.flags |= IA64_THREAD_DBG_VALID;
+ memset(current->thread.dbr, 0, sizeof current->thread.dbr);
+ memset(current->thread.ibr, 0, sizeof current->thread.ibr);
+ }
+
+ if (addr >= PT_IBR) {
+ regnum = (addr - PT_IBR) >> 3;
+ base = &child->thread.ibr[0];
+ } else {
+ regnum = (addr - PT_DBR) >> 3;
+ base = &child->thread.dbr[0];
+ }
+ if (regnum >= 8)
+ goto out;
+ if (regnum & 1) {
+ /* force breakpoint to be effective a most for user-level: */
+ data &= ~(0x7UL << 56);
+ }
+ base[regnum] = data;
+ }
+ ret = 0;
+ goto out;
+
+ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+ case PTRACE_CONT: /* restart after signal. */
+ ret = -EIO;
+ if (data > _NSIG)
+ goto out;
+ if (request == PTRACE_SYSCALL)
+ child->flags |= PF_TRACESYS;
+ else
+ child->flags &= ~PF_TRACESYS;
+ child->exit_code = data;
+
+ /* make sure the single step/take-branch tra bits are not set: */
+ ia64_psr(ia64_task_regs(child))->ss = 0;
+ ia64_psr(ia64_task_regs(child))->tb = 0;
+
+ wake_up_process(child);
+ ret = 0;
+ goto out;
+
+ case PTRACE_KILL:
+ /*
+ * Make the child exit. Best I can do is send it a
+ * sigkill. Perhaps it should be put in the status
+ * that it wants to exit.
+ */
+ if (child->state == TASK_ZOMBIE) /* already dead */
+ goto out;
+ child->exit_code = SIGKILL;
+
+ /* make sure the single step/take-branch tra bits are not set: */
+ ia64_psr(ia64_task_regs(child))->ss = 0;
+ ia64_psr(ia64_task_regs(child))->tb = 0;
+
+ wake_up_process(child);
+ ret = 0;
+ goto out;
+
+ case PTRACE_SINGLESTEP: /* let child execute for one instruction */
+ case PTRACE_SINGLEBLOCK:
+ ret = -EIO;
+ if (data > _NSIG)
+ goto out;
+
+ child->flags &= ~PF_TRACESYS;
+ if (request == PTRACE_SINGLESTEP) {
+ ia64_psr(ia64_task_regs(child))->ss = 1;
+ } else {
+ ia64_psr(ia64_task_regs(child))->tb = 1;
+ }
+ child->exit_code = data;
+
+ /* give it a chance to run. */
+ wake_up_process(child);
+ ret = 0;
+ goto out;
+
+ case PTRACE_DETACH: /* detach a process that was attached. */
+ ret = -EIO;
+ if (data > _NSIG)
+ goto out;
+
+ child->flags &= ~(PF_PTRACED|PF_TRACESYS);
+ child->exit_code = data;
+ write_lock_irqsave(&tasklist_lock, flags);
+ REMOVE_LINKS(child);
+ child->p_pptr = child->p_opptr;
+ SET_LINKS(child);
+ write_unlock_irqrestore(&tasklist_lock, flags);
+
+ /* make sure the single step/take-branch tra bits are not set: */
+ ia64_psr(ia64_task_regs(child))->ss = 0;
+ ia64_psr(ia64_task_regs(child))->tb = 0;
+
+ wake_up_process(child);
+ ret = 0;
+ goto out;
+
+ default:
+ ret = -EIO;
+ goto out;
+ }
+ out:
+ unlock_kernel();
+ return ret;
+}
+
+void
+syscall_trace (void)
+{
+ if ((current->flags & (PF_PTRACED|PF_TRACESYS)) != (PF_PTRACED|PF_TRACESYS))
+ return;
+ current->exit_code = SIGTRAP;
+ set_current_state(TASK_STOPPED);
+ notify_parent(current, SIGCHLD);
+ schedule();
+ /*
+ * This isn't the same as continuing with a signal, but it
+ * will do for normal use. strace only continues with a
+ * signal if the stopping signal is not SIGTRAP. -brl
+ */
+ if (current->exit_code) {
+ send_sig(current->exit_code, current, 1);
+ current->exit_code = 0;
+ }
+}
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
new file mode 100644
index 000000000..8743f6588
--- /dev/null
+++ b/arch/ia64/kernel/sal.c
@@ -0,0 +1,157 @@
+/*
+ * System Abstraction Layer (SAL) interface routines.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/page.h>
+#include <asm/sal.h>
+#include <asm/pal.h>
+
+#define SAL_DEBUG
+
+spinlock_t sal_lock = SPIN_LOCK_UNLOCKED;
+
+static struct {
+ void *addr; /* function entry point */
+ void *gpval; /* gp value to use */
+} pdesc;
+
+static long
+default_handler (void)
+{
+ return -1;
+}
+
+ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler;
+
+const char *
+ia64_sal_strerror (long status)
+{
+ const char *str;
+ switch (status) {
+ case 0: str = "Call completed without error"; break;
+ case 1: str = "Effect a warm boot of the system to complete "
+ "the update"; break;
+ case -1: str = "Not implemented"; break;
+ case -2: str = "Invalid argument"; break;
+ case -3: str = "Call completed with error"; break;
+ case -4: str = "Virtual address not registered"; break;
+ case -5: str = "No information available"; break;
+ case -6: str = "Insufficient space to add the entry"; break;
+ case -7: str = "Invalid entry_addr value"; break;
+ case -8: str = "Invalid interrupt vector"; break;
+ case -9: str = "Requested memory not available"; break;
+ case -10: str = "Unable to write to the NVM device"; break;
+ case -11: str = "Invalid partition type specified"; break;
+ case -12: str = "Invalid NVM_Object id specified"; break;
+ case -13: str = "NVM_Object already has the maximum number "
+ "of partitions"; break;
+ case -14: str = "Insufficient space in partition for the "
+ "requested write sub-function"; break;
+ case -15: str = "Insufficient data buffer space for the "
+ "requested read record sub-function"; break;
+ case -16: str = "Scratch buffer required for the write/delete "
+ "sub-function"; break;
+ case -17: str = "Insufficient space in the NVM_Object for the "
+ "requested create sub-function"; break;
+ case -18: str = "Invalid value specified in the partition_rec "
+ "argument"; break;
+ case -19: str = "Record oriented I/O not supported for this "
+ "partition"; break;
+ case -20: str = "Bad format of record to be written or "
+ "required keyword variable not "
+ "specified"; break;
+ default: str = "Unknown SAL status code"; break;
+ }
+ return str;
+}
+
+static void __init
+ia64_sal_handler_init (void *entry_point, void *gpval)
+{
+ /* fill in the SAL procedure descriptor and point ia64_sal to it: */
+ pdesc.addr = entry_point;
+ pdesc.gpval = gpval;
+ ia64_sal = (ia64_sal_handler) &pdesc;
+}
+
+
+void __init
+ia64_sal_init (struct ia64_sal_systab *systab)
+{
+ unsigned long min, max;
+ char *p;
+ struct ia64_sal_desc_entry_point *ep;
+ int i;
+
+ if (!systab) {
+ printk("Hmm, no SAL System Table.\n");
+ return;
+ }
+
+ if (strncmp(systab->signature, "SST_", 4) != 0)
+ printk("bad signature in system table!");
+
+ printk("SAL v%u.%02u: ia32bios=%s, oem=%.32s, product=%.32s\n",
+ systab->sal_rev_major, systab->sal_rev_minor,
+ systab->ia32_bios_present ? "present" : "absent",
+ systab->oem_id, systab->product_id);
+
+ min = ~0UL;
+ max = 0;
+
+ p = (char *) (systab + 1);
+ for (i = 0; i < systab->entry_count; i++) {
+ /*
+ * The first byte of each entry type contains the type desciptor.
+ */
+ switch (*p) {
+ case SAL_DESC_ENTRY_POINT:
+ ep = (struct ia64_sal_desc_entry_point *) p;
+#ifdef SAL_DEBUG
+ printk("sal[%d] - entry: pal_proc=0x%lx, sal_proc=0x%lx\n",
+ i, ep->pal_proc, ep->sal_proc);
+#endif
+ ia64_pal_handler_init(__va(ep->pal_proc));
+ ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
+ break;
+
+ case SAL_DESC_AP_WAKEUP:
+#ifdef CONFIG_SMP
+ {
+ struct ia64_sal_desc_ap_wakeup *ap = (void *) p;
+# ifdef SAL_DEBUG
+ printk("sal[%d] - wakeup type %x, 0x%lx\n",
+ i, ap->mechanism, ap->vector);
+# endif
+ switch (ap->mechanism) {
+ case IA64_SAL_AP_EXTERNAL_INT:
+ ap_wakeup_vector = ap->vector;
+# ifdef SAL_DEBUG
+ printk("SAL: AP wakeup using external interrupt; "
+ "vector 0x%lx\n", ap_wakeup_vector);
+# endif
+ break;
+
+ default:
+ printk("SAL: AP wakeup mechanism unsupported!\n");
+ break;
+ }
+ break;
+ }
+#endif
+ }
+ p += SAL_DESC_SIZE(*p);
+ }
+}
diff --git a/arch/ia64/kernel/sal_stub.S b/arch/ia64/kernel/sal_stub.S
new file mode 100644
index 000000000..7ab16bbcd
--- /dev/null
+++ b/arch/ia64/kernel/sal_stub.S
@@ -0,0 +1,116 @@
+/*
+ * gcc currently does not conform to the ia-64 calling convention as far
+ * as returning function values are concerned. Instead of returning
+ * values up to 32 bytes in size in r8-r11, gcc returns any value
+ * bigger than a doubleword via a structure that's allocated by the
+ * caller and whose address is passed into the function. Since
+ * SAL_PROC returns values according to the calling convention, this
+ * stub takes care of copying r8-r11 to the place where gcc expects
+ * them.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef __GCC_MULTIREG_RETVALS__
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 16
+ .global ia64_sal_stub
+ia64_sal_stub:
+ /*
+ * Sheesh, the Cygnus backend passes the pointer to a return value structure in
+ * in0 whereas the HP backend passes it in r8. Don't you hate those little
+ * differences...
+ */
+#ifdef GCC_RETVAL_POINTER_IN_R8
+ adds r2=-24,sp
+ adds sp=-48,sp
+ mov r14=rp
+ ;;
+ st8 [r2]=r8,8 // save pointer to return value
+ addl r3=@ltoff(ia64_sal),gp
+ ;;
+ ld8 r3=[r3]
+ st8 [r2]=gp,8 // save global pointer
+ ;;
+ ld8 r3=[r3] // fetch the value of ia64_sal
+ st8 [r2]=r14 // save return pointer
+ ;;
+ ld8 r2=[r3],8 // load function's entry point
+ ;;
+ ld8 gp=[r3] // load function's global pointer
+ ;;
+ mov b6=r2
+ br.call.sptk.few rp=b6
+.ret0: adds r2=24,sp
+ ;;
+ ld8 r3=[r2],8 // restore pointer to return value
+ ;;
+ ld8 gp=[r2],8 // restore global pointer
+ st8 [r3]=r8,8
+ ;;
+ ld8 r14=[r2] // restore return pointer
+ st8 [r3]=r9,8
+ ;;
+ mov rp=r14
+ st8 [r3]=r10,8
+ ;;
+ st8 [r3]=r11,8
+ adds sp=48,sp
+ br.sptk.few rp
+#else
+ /*
+ * On input:
+ * in0 = pointer to return value structure
+ * in1 = index of SAL function to call
+ * in2..inN = remaining args to SAL call
+ */
+ /*
+ * We allocate one input and eight output register such that the br.call instruction
+ * will rename in1-in7 to in0-in6---exactly what we want because SAL doesn't want to
+ * see the pointer to the return value structure.
+ */
+ alloc r15=ar.pfs,1,0,8,0
+
+ adds r2=-24,sp
+ adds sp=-48,sp
+ mov r14=rp
+ ;;
+ st8 [r2]=r15,8 // save ar.pfs
+ addl r3=@ltoff(ia64_sal),gp
+ ;;
+ ld8 r3=[r3] // get address of ia64_sal
+ st8 [r2]=gp,8 // save global pointer
+ ;;
+ ld8 r3=[r3] // get value of ia64_sal
+ st8 [r2]=r14,8 // save return address (rp)
+ ;;
+ ld8 r2=[r3],8 // load function's entry point
+ ;;
+ ld8 gp=[r3] // load function's global pointer
+ mov b6=r2
+ br.call.sptk.few rp=b6 // make SAL call
+.ret0: adds r2=24,sp
+ ;;
+ ld8 r15=[r2],8 // restore ar.pfs
+ ;;
+ ld8 gp=[r2],8 // restore global pointer
+ st8 [in0]=r8,8 // store 1. dword of return value
+ ;;
+ ld8 r14=[r2] // restore return address (rp)
+ st8 [in0]=r9,8 // store 2. dword of return value
+ ;;
+ mov rp=r14
+ st8 [in0]=r10,8 // store 3. dword of return value
+ ;;
+ st8 [in0]=r11,8
+ adds sp=48,sp // pop stack frame
+ mov ar.pfs=r15
+ br.ret.sptk.few rp
+#endif
+
+ .endp ia64_sal_stub
+#endif /* __GCC_MULTIREG_RETVALS__ */
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
new file mode 100644
index 000000000..84581af2e
--- /dev/null
+++ b/arch/ia64/kernel/semaphore.c
@@ -0,0 +1,336 @@
+/*
+ * IA-64 semaphore implementation (derived from x86 version).
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * Semaphores are implemented using a two-way counter: The "count"
+ * variable is decremented for each process that tries to aquire the
+ * semaphore, while the "sleepers" variable is a count of such
+ * aquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can efficiently
+ * test if they need to do any extra work (up needs to do something
+ * only if count was negative before the increment operation.
+ *
+ * "sleepers" and the contention routine ordering is protected by the
+ * semaphore spinlock.
+ *
+ * Note that these functions are only called when there is contention
+ * on the lock, and as such all this is the "non-critical" part of the
+ * whole semaphore business. The critical part is the inline stuff in
+ * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
+ */
+#include <linux/sched.h>
+
+#include <asm/semaphore.h>
+
+/*
+ * Logic:
+ * - Only on a boundary condition do we need to care. When we go
+ * from a negative count to a non-negative, we wake people up.
+ * - When we go from a non-negative count to a negative do we
+ * (a) synchronize with the "sleepers" count and (b) make sure
+ * that we're on the wakeup list before we synchronize so that
+ * we cannot lose wakeup events.
+ */
+
+void
+__up (struct semaphore *sem)
+{
+ wake_up(&sem->wait);
+}
+
+static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED;
+
+void
+__down (struct semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+ wake_up(&sem->wait);
+}
+
+int
+__down_interruptible (struct semaphore * sem)
+{
+ int retval = 0;
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+ tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE;
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ spin_lock_irq(&semaphore_lock);
+ sem->sleepers ++;
+ for (;;) {
+ int sleepers = sem->sleepers;
+
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+ * sleeping, and we* can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
+ if (signal_pending(current)) {
+ retval = -EINTR;
+ sem->sleepers = 0;
+ atomic_add(sleepers, &sem->count);
+ break;
+ }
+
+ /*
+ * Add "everybody else" into it. They aren't
+ * playing, because we own the spinlock. The
+ * "-1" is because we're still hoping to get
+ * the lock.
+ */
+ if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+ sem->sleepers = 0;
+ break;
+ }
+ sem->sleepers = 1; /* us - see -1 above */
+ spin_unlock_irq(&semaphore_lock);
+
+ schedule();
+ tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE;
+ spin_lock_irq(&semaphore_lock);
+ }
+ spin_unlock_irq(&semaphore_lock);
+ tsk->state = TASK_RUNNING;
+ remove_wait_queue(&sem->wait, &wait);
+ wake_up(&sem->wait);
+ return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for having decremented the
+ * count.
+ */
+int
+__down_trylock (struct semaphore *sem)
+{
+ int sleepers;
+
+ spin_lock_irq(&semaphore_lock);
+ sleepers = sem->sleepers + 1;
+ sem->sleepers = 0;
+
+ /*
+ * Add "everybody else" and us into it. They aren't
+ * playing, because we own the spinlock.
+ */
+ if (!atomic_add_negative(sleepers, &sem->count))
+ wake_up(&sem->wait);
+
+ spin_unlock_irq(&semaphore_lock);
+ return 1;
+}
+
+/*
+ * Helper routines for rw semaphores. These could be optimized some
+ * more, but since they're off the critical path, I prefer clarity for
+ * now...
+ */
+
+/*
+ * This gets called if we failed to acquire the lock, but we're biased
+ * to acquire the lock by virtue of causing the count to change from 0
+ * to -1. Being biased, we sleep and attempt to grab the lock until
+ * we succeed. When this function returns, we own the lock.
+ */
+static inline void
+down_read_failed_biased (struct rw_semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ add_wait_queue(&sem->wait, &wait); /* put ourselves at the head of the list */
+
+ for (;;) {
+ if (sem->read_bias_granted && xchg(&sem->read_bias_granted, 0))
+ break;
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ if (!sem->read_bias_granted)
+ schedule();
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+}
+
+/*
+ * This gets called if we failed to aquire the lock and we are not
+ * biased to acquire the lock. We undo the decrement that was
+ * done earlier, go to sleep, and then attempt to re-acquire the
+ * lock afterwards.
+ */
+static inline void
+down_read_failed (struct rw_semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ /*
+ * Undo the decrement we did in down_read() and check if we
+ * need to wake up someone.
+ */
+ __up_read(sem);
+
+ add_wait_queue(&sem->wait, &wait);
+ while (sem->count < 0) {
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ if (sem->count >= 0)
+ break;
+ schedule();
+ }
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+}
+
+/*
+ * Wait for the lock to become unbiased. Readers are non-exclusive.
+ */
+void
+__down_read_failed (struct rw_semaphore *sem, long count)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ while (1) {
+ if (count == -1) {
+ down_read_failed_biased(sem);
+ return;
+ }
+ /* unbiased */
+ down_read_failed(sem);
+
+ count = ia64_fetch_and_add(-1, &sem->count);
+ if (count >= 0)
+ return;
+ }
+}
+
+static inline void
+down_write_failed_biased (struct rw_semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ /* put ourselves at the end of the list */
+ add_wait_queue_exclusive(&sem->write_bias_wait, &wait);
+
+ for (;;) {
+ if (sem->write_bias_granted && xchg(&sem->write_bias_granted, 0))
+ break;
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE);
+ if (!sem->write_bias_granted)
+ schedule();
+ }
+
+ remove_wait_queue(&sem->write_bias_wait, &wait);
+ tsk->state = TASK_RUNNING;
+
+ /*
+ * If the lock is currently unbiased, awaken the sleepers
+ * FIXME: this wakes up the readers early in a bit of a
+ * stampede -> bad!
+ */
+ if (sem->count >= 0)
+ wake_up(&sem->wait);
+}
+
+
+static inline void
+down_write_failed (struct rw_semaphore *sem)
+{
+ struct task_struct *tsk = current;
+ DECLARE_WAITQUEUE(wait, tsk);
+
+ __up_write(sem); /* this takes care of granting the lock */
+
+ add_wait_queue_exclusive(&sem->wait, &wait);
+
+ while (sem->count < 0) {
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE);
+ if (sem->count >= 0)
+ break; /* we must attempt to aquire or bias the lock */
+ schedule();
+ }
+
+ remove_wait_queue(&sem->wait, &wait);
+ tsk->state = TASK_RUNNING;
+}
+
+
+/*
+ * Wait for the lock to become unbiased. Since we're a writer, we'll
+ * make ourselves exclusive.
+ */
+void
+__down_write_failed (struct rw_semaphore *sem, long count)
+{
+ long old_count;
+
+ while (1) {
+ if (count == -RW_LOCK_BIAS) {
+ down_write_failed_biased(sem);
+ return;
+ }
+ down_write_failed(sem);
+
+ do {
+ old_count = sem->count;
+ count = old_count - RW_LOCK_BIAS;
+ } while (cmpxchg(&sem->count, old_count, count) != old_count);
+
+ if (count == 0)
+ return;
+ }
+}
+
+void
+__rwsem_wake (struct rw_semaphore *sem, long count)
+{
+ wait_queue_head_t *wq;
+
+ if (count == 0) {
+ /* wake a writer */
+ if (xchg(&sem->write_bias_granted, 1))
+ BUG();
+ wq = &sem->write_bias_wait;
+ } else {
+ /* wake reader(s) */
+ if (xchg(&sem->read_bias_granted, 1))
+ BUG();
+ wq = &sem->wait;
+ }
+ wake_up(wq); /* wake up everyone on the wait queue */
+}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
new file mode 100644
index 000000000..f3283d535
--- /dev/null
+++ b/arch/ia64/kernel/setup.c
@@ -0,0 +1,326 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ *
+ * 02/04/00 D.Mosberger some more get_cpuinfo fixes...
+ * 02/01/00 R.Seth fixed get_cpuinfo for SMP
+ * 01/07/99 S.Eranian added the support for command line argument
+ * 06/24/99 W.Drummond added boot_cpu_data.
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+#include <linux/console.h>
+
+#include <asm/acpi-ext.h>
+#include <asm/page.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/efi.h>
+
+extern char _end;
+
+/* cpu_data[bootstrap_processor] is data for the bootstrap processor: */
+struct cpuinfo_ia64 cpu_data[NR_CPUS];
+
+unsigned long ia64_cycles_per_usec;
+struct ia64_boot_param ia64_boot_param;
+struct screen_info screen_info;
+unsigned long cpu_initialized = 0;
+/* This tells _start which CPU is booting. */
+int cpu_now_booting = 0;
+
+#define COMMAND_LINE_SIZE 512
+
+char saved_command_line[COMMAND_LINE_SIZE]; /* used in proc filesystem */
+
+static int
+find_max_pfn (unsigned long start, unsigned long end, void *arg)
+{
+ unsigned long *max_pfn = arg, pfn;
+
+ pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
+ if (pfn > *max_pfn)
+ *max_pfn = pfn;
+ return 0;
+}
+
+static int
+free_available_memory (unsigned long start, unsigned long end, void *arg)
+{
+# define KERNEL_END ((unsigned long) &_end)
+# define MIN(a,b) ((a) < (b) ? (a) : (b))
+# define MAX(a,b) ((a) > (b) ? (a) : (b))
+ unsigned long range_start, range_end;
+
+ range_start = MIN(start, KERNEL_START);
+ range_end = MIN(end, KERNEL_START);
+
+ /*
+ * XXX This should not be necessary, but the bootmem allocator
+ * is broken and fails to work correctly when the starting
+ * address is not properly aligned.
+ */
+ range_start = PAGE_ALIGN(range_start);
+
+ if (range_start < range_end)
+ free_bootmem(__pa(range_start), range_end - range_start);
+
+ range_start = MAX(start, KERNEL_END);
+ range_end = MAX(end, KERNEL_END);
+
+ /*
+ * XXX This should not be necessary, but the bootmem allocator
+ * is broken and fails to work correctly when the starting
+ * address is not properly aligned.
+ */
+ range_start = PAGE_ALIGN(range_start);
+
+ if (range_start < range_end)
+ free_bootmem(__pa(range_start), range_end - range_start);
+
+ return 0;
+}
+
+void __init
+setup_arch (char **cmdline_p)
+{
+ unsigned long max_pfn, bootmap_start, bootmap_size;
+
+ /*
+ * The secondary bootstrap loader passes us the boot
+ * parameters at the beginning of the ZERO_PAGE, so let's
+ * stash away those values before ZERO_PAGE gets cleared out.
+ */
+ memcpy(&ia64_boot_param, (void *) ZERO_PAGE_ADDR, sizeof(ia64_boot_param));
+
+ efi_init();
+
+ max_pfn = 0;
+ efi_memmap_walk(find_max_pfn, &max_pfn);
+
+ /*
+ * This is wrong, wrong, wrong. Darn it, you'd think if they
+ * change APIs, they'd do things for the better. Grumble...
+ */
+ bootmap_start = PAGE_ALIGN(__pa(&_end));
+ bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
+
+ efi_memmap_walk(free_available_memory, 0);
+
+ reserve_bootmem(bootmap_start, bootmap_size);
+#if 0
+ /* XXX fix me */
+ init_mm.start_code = (unsigned long) &_stext;
+ init_mm.end_code = (unsigned long) &_etext;
+ init_mm.end_data = (unsigned long) &_edata;
+ init_mm.brk = (unsigned long) &_end;
+
+ code_resource.start = virt_to_bus(&_text);
+ code_resource.end = virt_to_bus(&_etext) - 1;
+ data_resource.start = virt_to_bus(&_etext);
+ data_resource.end = virt_to_bus(&_edata) - 1;
+#endif
+
+ /* process SAL system table: */
+ ia64_sal_init(efi.sal_systab);
+
+ *cmdline_p = __va(ia64_boot_param.command_line);
+ strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
+ saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */
+
+ printk("args to kernel: %s\n", *cmdline_p);
+
+#ifndef CONFIG_SMP
+ cpu_init();
+ identify_cpu(&cpu_data[0]);
+#endif
+
+ if (efi.acpi) {
+ /* Parse the ACPI tables */
+ acpi_parse(efi.acpi);
+ }
+
+#ifdef CONFIG_IA64_GENERIC
+ machvec_init(acpi_get_sysname());
+#endif
+
+#ifdef CONFIG_VT
+# if defined(CONFIG_VGA_CONSOLE)
+ conswitchp = &vga_con;
+# elif defined(CONFIG_DUMMY_CONSOLE)
+ conswitchp = &dummy_con;
+# endif
+#endif
+ platform_setup(cmdline_p);
+}
+
+/*
+ * Display cpu info for all cpu's.
+ */
+int
+get_cpuinfo (char *buffer)
+{
+ char family[32], model[32], features[128], *cp, *p = buffer;
+ struct cpuinfo_ia64 *c;
+ unsigned long mask;
+
+ for (c = cpu_data; c < cpu_data + NR_CPUS; ++c) {
+ if (!(cpu_initialized & (1UL << (c - cpu_data))))
+ continue;
+
+ mask = c->features;
+
+ if (c->family == 7)
+ memcpy(family, "IA-64", 6);
+ else
+ sprintf(family, "%u", c->family);
+
+ switch (c->model) {
+ case 0: strcpy(model, "Itanium"); break;
+ default: sprintf(model, "%u", c->model); break;
+ }
+
+ /* build the feature string: */
+ memcpy(features, " standard", 10);
+ cp = features;
+ if (mask & 1) {
+ strcpy(cp, " branchlong");
+ cp = strchr(cp, '\0');
+ mask &= ~1UL;
+ }
+ if (mask)
+ sprintf(cp, " 0x%lx", mask);
+
+ p += sprintf(buffer,
+ "CPU# %lu\n"
+ "\tvendor : %s\n"
+ "\tfamily : %s\n"
+ "\tmodel : %s\n"
+ "\trevision : %u\n"
+ "\tarchrev : %u\n"
+ "\tfeatures :%s\n" /* don't change this---it _is_ right! */
+ "\tcpu number : %lu\n"
+ "\tcpu regs : %u\n"
+ "\tcpu MHz : %lu.%06lu\n"
+ "\titc MHz : %lu.%06lu\n"
+ "\tBogoMIPS : %lu.%02lu\n\n",
+ c - cpu_data, c->vendor, family, model, c->revision, c->archrev,
+ features,
+ c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000,
+ c->itc_freq / 1000000, c->itc_freq % 1000000,
+ loops_per_sec() / 500000, (loops_per_sec() / 5000) % 100);
+ }
+ return p - buffer;
+}
+
+void
+identify_cpu (struct cpuinfo_ia64 *c)
+{
+ union {
+ unsigned long bits[5];
+ struct {
+ /* id 0 & 1: */
+ char vendor[16];
+
+ /* id 2 */
+ u64 ppn; /* processor serial number */
+
+ /* id 3: */
+ unsigned number : 8;
+ unsigned revision : 8;
+ unsigned model : 8;
+ unsigned family : 8;
+ unsigned archrev : 8;
+ unsigned reserved : 24;
+
+ /* id 4: */
+ u64 features;
+ } field;
+ } cpuid;
+ int i;
+
+ for (i = 0; i < 5; ++i) {
+ cpuid.bits[i] = ia64_get_cpuid(i);
+ }
+
+#ifdef CONFIG_SMP
+ /*
+ * XXX Instead of copying the ITC info from the bootstrap
+ * processor, ia64_init_itm() should be done per CPU. That
+ * should get you the right info. --davidm 1/24/00
+ */
+ if (c != &cpu_data[bootstrap_processor]) {
+ memset(c, 0, sizeof(struct cpuinfo_ia64));
+ c->proc_freq = cpu_data[bootstrap_processor].proc_freq;
+ c->itc_freq = cpu_data[bootstrap_processor].itc_freq;
+ c->cyc_per_usec = cpu_data[bootstrap_processor].cyc_per_usec;
+ c->usec_per_cyc = cpu_data[bootstrap_processor].usec_per_cyc;
+ }
+#else
+ memset(c, 0, sizeof(struct cpuinfo_ia64));
+#endif
+
+ memcpy(c->vendor, cpuid.field.vendor, 16);
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
+ /* BUG: SoftSDV doesn't support the cpuid registers. */
+ if (c->vendor[0] == '\0')
+ memcpy(c->vendor, "Intel", 6);
+#endif
+ c->ppn = cpuid.field.ppn;
+ c->number = cpuid.field.number;
+ c->revision = cpuid.field.revision;
+ c->model = cpuid.field.model;
+ c->family = cpuid.field.family;
+ c->archrev = cpuid.field.archrev;
+ c->features = cpuid.field.features;
+#ifdef CONFIG_SMP
+ c->loops_per_sec = loops_per_sec;
+#endif
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU. This function acts
+ * as a 'CPU state barrier', nothing should get across.
+ */
+void
+cpu_init (void)
+{
+ int nr = smp_processor_id();
+
+ /* Clear the stack memory reserved for pt_regs: */
+ memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
+
+ /*
+ * Initialize default control register to defer speculative
+ * faults. On a speculative load, we want to defer access
+ * right, key miss, and key permission faults. We currently
+ * do NOT defer TLB misses, page-not-present, access bit, or
+ * debug faults but kernel code should not rely on any
+ * particular setting of these bits.
+ */
+ ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP);
+ ia64_set_fpu_owner(0); /* initialize ar.k5 */
+
+ if (test_and_set_bit(nr, &cpu_initialized)) {
+ printk("CPU#%d already initialized!\n", nr);
+ machine_halt();
+ }
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+}
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
new file mode 100644
index 000000000..19be1f840
--- /dev/null
+++ b/arch/ia64/kernel/signal.c
@@ -0,0 +1,537 @@
+/*
+ * Architecture-specific signal handling support.
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Derived from i386 and Alpha versions.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+
+#include <asm/ia32.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/sigcontext.h>
+
+#define DEBUG_SIG 0
+#define STACK_ALIGN 16 /* minimal alignment for stack pointer */
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+#if _NSIG_WORDS > 1
+# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
+# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t))
+#else
+# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0])
+# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0])
+#endif
+
+struct sigframe {
+ struct siginfo info;
+ struct sigcontext sc;
+};
+
+extern long sys_wait4 (int, int *, int, struct rusage *);
+extern long ia64_do_signal (sigset_t *, struct pt_regs *, long); /* forward decl */
+
+long
+ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct pt_regs *pt)
+{
+ sigset_t oldset, set;
+
+ /* XXX: Don't preclude handling different sized sigset_t's. */
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+ if (GET_SIGSET(&set, uset))
+ return -EFAULT;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+
+ spin_lock_irq(&current->sigmask_lock);
+ {
+ oldset = current->blocked;
+ current->blocked = set;
+ recalc_sigpending(current);
+ }
+ spin_unlock_irq(&current->sigmask_lock);
+
+ /*
+ * The return below usually returns to the signal handler. We need to
+ * pre-set the correct error code here to ensure that the right values
+ * get saved in sigcontext by ia64_do_signal.
+ */
+ pt->r8 = EINTR;
+ pt->r10 = -1;
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ if (ia64_do_signal(&oldset, pt, 1))
+ return -EINTR;
+ }
+}
+
+asmlinkage long
+sys_sigaltstack (const stack_t *uss, stack_t *uoss, long arg2, long arg3, long arg4,
+ long arg5, long arg6, long arg7, long stack)
+{
+ struct pt_regs *pt = (struct pt_regs *) &stack;
+
+ return do_sigaltstack(uss, uoss, pt->r12);
+}
+
+static long
+restore_sigcontext (struct sigcontext *sc, struct pt_regs *pt)
+{
+ struct switch_stack *sw = (struct switch_stack *) pt - 1;
+ unsigned long ip, flags, nat, um;
+ long err;
+
+ /* restore scratch that always needs gets updated during signal delivery: */
+ err = __get_user(flags, &sc->sc_flags);
+
+ err |= __get_user(nat, &sc->sc_nat);
+ err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */
+ err |= __get_user(pt->ar_fpsr, &sc->sc_ar_fpsr);
+ err |= __get_user(pt->ar_pfs, &sc->sc_ar_pfs);
+ err |= __get_user(um, &sc->sc_um); /* user mask */
+ err |= __get_user(pt->ar_rsc, &sc->sc_ar_rsc);
+ err |= __get_user(pt->ar_ccv, &sc->sc_ar_ccv);
+ err |= __get_user(pt->ar_unat, &sc->sc_ar_unat);
+ err |= __get_user(pt->pr, &sc->sc_pr); /* predicates */
+ err |= __get_user(pt->b0, &sc->sc_br[0]); /* b0 (rp) */
+ err |= __get_user(pt->b6, &sc->sc_br[6]);
+ err |= __copy_from_user(&pt->r1, &sc->sc_gr[1], 3*8); /* r1-r3 */
+ err |= __copy_from_user(&pt->r8, &sc->sc_gr[8], 4*8); /* r8-r11 */
+ err |= __copy_from_user(&pt->r12, &sc->sc_gr[12], 4*8); /* r12-r15 */
+ err |= __copy_from_user(&pt->r16, &sc->sc_gr[16], 16*8); /* r16-r31 */
+
+ /* establish new instruction pointer: */
+ pt->cr_iip = ip & ~0x3UL;
+ ia64_psr(pt)->ri = ip & 0x3;
+ pt->cr_ipsr = (pt->cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);
+
+ ia64_put_nat_bits (pt, sw, nat); /* restore the original scratch NaT bits */
+
+ if (flags & IA64_SC_FLAG_FPH_VALID) {
+ struct task_struct *fpu_owner = ia64_get_fpu_owner();
+
+ __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
+ if (fpu_owner == current) {
+ __ia64_load_fpu(current->thread.fph);
+ }
+ }
+ return err;
+}
+
+/*
+ * When we get here, ((struct switch_stack *) pt - 1) is a
+ * switch_stack frame that has no defined value. Upon return, we
+ * expect sw->caller_unat to contain the new unat value. The reason
+ * we use a full switch_stack frame is so everything is symmetric
+ * with ia64_do_signal().
+ */
+long
+ia64_rt_sigreturn (struct pt_regs *pt)
+{
+ extern char ia64_strace_leave_kernel, ia64_leave_kernel;
+ struct sigcontext *sc;
+ struct siginfo si;
+ sigset_t set;
+ long retval;
+
+ sc = &((struct sigframe *) (pt->r12 + 16))->sc;
+
+ /*
+ * When we return to the previously executing context, r8 and
+ * r10 have already been setup the way we want them. Indeed,
+ * if the signal wasn't delivered while in a system call, we
+ * must not touch r8 or r10 as otherwise user-level stat could
+ * be corrupted.
+ */
+ retval = (long) &ia64_leave_kernel | 1;
+ if ((current->flags & PF_TRACESYS)
+ && (sc->sc_flags & IA64_SC_FLAG_IN_SYSCALL))
+ retval = (long) &ia64_strace_leave_kernel;
+
+ if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
+ goto give_sigsegv;
+
+ if (GET_SIGSET(&set, &sc->sc_mask))
+ goto give_sigsegv;
+
+ sigdelsetmask(&set, ~_BLOCKABLE);
+ spin_lock_irq(&current->sigmask_lock);
+ current->blocked = set;
+ recalc_sigpending(current);
+ spin_unlock_irq(&current->sigmask_lock);
+
+ if (restore_sigcontext(sc, pt))
+ goto give_sigsegv;
+
+#if DEBUG_SIG
+ printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
+ current->comm, current->pid, pt->r12, pt->cr_iip);
+#endif
+ /*
+ * It is more difficult to avoid calling this function than to
+ * call it and ignore errors.
+ */
+ do_sigaltstack(&sc->sc_stack, 0, pt->r12);
+ return retval;
+
+ give_sigsegv:
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+ si.si_pid = current->pid;
+ si.si_uid = current->uid;
+ si.si_addr = sc;
+ force_sig_info(SIGSEGV, &si, current);
+ return retval;
+}
+
+/*
+ * This does just the minimum required setup of sigcontext.
+ * Specifically, it only installs data that is either not knowable at
+ * the user-level or that gets modified before execution in the
+ * trampoline starts. Everything else is done at the user-level.
+ */
+static long
+setup_sigcontext (struct sigcontext *sc, sigset_t *mask, struct pt_regs *pt)
+{
+ struct switch_stack *sw = (struct switch_stack *) pt - 1;
+ struct task_struct *fpu_owner = ia64_get_fpu_owner();
+ unsigned long flags = 0, ifs, nat;
+ long err;
+
+ ifs = pt->cr_ifs;
+
+ if (on_sig_stack((unsigned long) sc))
+ flags |= IA64_SC_FLAG_ONSTACK;
+ if ((ifs & (1UL << 63)) == 0) {
+ /* if cr_ifs isn't valid, we got here through a syscall */
+ flags |= IA64_SC_FLAG_IN_SYSCALL;
+ }
+ if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) {
+ flags |= IA64_SC_FLAG_FPH_VALID;
+ if (fpu_owner == current) {
+ __ia64_save_fpu(current->thread.fph);
+ }
+ __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
+ }
+
+ /*
+ * Note: sw->ar_unat is UNDEFINED unless the process is being
+ * PTRACED. However, this is OK because the NaT bits of the
+ * preserved registers (r4-r7) are never being looked at by
+ * the signal handler (register r4-r7 are used instead).
+ */
+ nat = ia64_get_nat_bits(pt, sw);
+
+ err = __put_user(flags, &sc->sc_flags);
+ err |= __put_user(nat, &sc->sc_nat);
+ err |= PUT_SIGSET(mask, &sc->sc_mask);
+ err |= __put_user(pt->cr_ipsr & IA64_PSR_UM, &sc->sc_um);
+ err |= __put_user(pt->ar_rsc, &sc->sc_ar_rsc);
+ err |= __put_user(pt->ar_ccv, &sc->sc_ar_ccv);
+ err |= __put_user(pt->ar_unat, &sc->sc_ar_unat); /* ar.unat */
+ err |= __put_user(pt->ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */
+ err |= __put_user(pt->ar_pfs, &sc->sc_ar_pfs);
+ err |= __put_user(pt->pr, &sc->sc_pr); /* predicates */
+ err |= __put_user(pt->b0, &sc->sc_br[0]); /* b0 (rp) */
+ err |= __put_user(pt->b6, &sc->sc_br[6]); /* b6 */
+ err |= __put_user(pt->b7, &sc->sc_br[7]); /* b7 */
+
+ err |= __copy_to_user(&sc->sc_gr[1], &pt->r1, 3*8); /* r1-r3 */
+ err |= __copy_to_user(&sc->sc_gr[8], &pt->r8, 4*8); /* r8-r11 */
+ err |= __copy_to_user(&sc->sc_gr[12], &pt->r12, 4*8); /* r12-r15 */
+ err |= __copy_to_user(&sc->sc_gr[16], &pt->r16, 16*8); /* r16-r31 */
+
+ err |= __put_user(pt->cr_iip + ia64_psr(pt)->ri, &sc->sc_ip);
+ err |= __put_user(pt->r12, &sc->sc_gr[12]); /* r12 */
+ return err;
+}
+
+static long
+setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *pt)
+{
+ struct switch_stack *sw = (struct switch_stack *) pt - 1;
+ extern char ia64_sigtramp[], __start_gate_section[];
+ unsigned long tramp_addr, new_rbs = 0;
+ struct sigframe *frame;
+ struct siginfo si;
+ long err;
+
+ frame = (void *) pt->r12;
+ tramp_addr = GATE_ADDR + (ia64_sigtramp - __start_gate_section);
+ if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack((unsigned long) frame)) {
+ new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
+ frame = (void *) ((current->sas_ss_sp + current->sas_ss_size)
+ & ~(STACK_ALIGN - 1));
+ }
+ frame = (void *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1));
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto give_sigsegv;
+
+ err = __copy_to_user(&frame->info, info, sizeof(siginfo_t));
+
+ err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp);
+ err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size);
+ err |= __put_user(sas_ss_flags(pt->r12), &frame->sc.sc_stack.ss_flags);
+ err |= setup_sigcontext(&frame->sc, set, pt);
+
+ if (err)
+ goto give_sigsegv;
+
+ pt->r12 = (unsigned long) frame - 16; /* new stack pointer */
+ pt->r2 = sig; /* signal number */
+ pt->r3 = (unsigned long) ka->sa.sa_handler; /* addr. of handler's proc. descriptor */
+ pt->r15 = new_rbs;
+ pt->ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */
+ pt->cr_iip = tramp_addr;
+ ia64_psr(pt)->ri = 0; /* start executing in first slot */
+
+ /*
+ * Note: this affects only the NaT bits of the scratch regs
+ * (the ones saved in pt_regs, which is exactly what we want.
+ * The NaT bits for the preserved regs (r4-r7) are in
+ * sw->ar_unat iff this process is being PTRACED.
+ */
+ sw->caller_unat = 0; /* ensure NaT bits of at least r2, r3, r12, and r15 are clear */
+
+#if DEBUG_SIG
+ printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%lx\n",
+ current->comm, current->pid, sig, pt->r12, pt->cr_iip, pt->r3);
+#endif
+ return 1;
+
+ give_sigsegv:
+ if (sig == SIGSEGV)
+ ka->sa.sa_handler = SIG_DFL;
+ si.si_signo = SIGSEGV;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+ si.si_pid = current->pid;
+ si.si_uid = current->uid;
+ si.si_addr = frame;
+ force_sig_info(SIGSEGV, &si, current);
+ return 0;
+}
+
+static long
+handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
+ struct pt_regs *pt)
+{
+#ifdef CONFIG_IA32_SUPPORT
+ if (IS_IA32_PROCESS(pt)) {
+ /* send signal to IA-32 process */
+ if (!ia32_setup_frame1(sig, ka, info, oldset, pt))
+ return 0;
+ } else
+#endif
+ /* send signal to IA-64 process */
+ if (!setup_frame(sig, ka, info, oldset, pt))
+ return 0;
+
+ if (ka->sa.sa_flags & SA_ONESHOT)
+ ka->sa.sa_handler = SIG_DFL;
+
+ if (!(ka->sa.sa_flags & SA_NODEFER)) {
+ spin_lock_irq(&current->sigmask_lock);
+ sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+ sigaddset(&current->blocked, sig);
+ recalc_sigpending(current);
+ spin_unlock_irq(&current->sigmask_lock);
+ }
+ return 1;
+}
+
+/*
+ * When we get here, `pt' points to struct pt_regs and ((struct
+ * switch_stack *) pt - 1) points to a switch stack structure.
+ * HOWEVER, in the normal case, the ONLY value valid in the
+ * switch_stack is the caller_unat field. The entire switch_stack is
+ * valid ONLY if current->flags has PF_PTRACED set.
+ *
+ * Note that `init' is a special process: it doesn't get signals it
+ * doesn't want to handle. Thus you cannot kill init even with a
+ * SIGKILL even by mistake.
+ *
+ * Note that we go through the signals twice: once to check the
+ * signals that the kernel can handle, and then we build all the
+ * user-level signal handling stack-frames in one go after that.
+ */
+long
+ia64_do_signal (sigset_t *oldset, struct pt_regs *pt, long in_syscall)
+{
+ struct k_sigaction *ka;
+ siginfo_t info;
+ long restart = in_syscall;
+
+ /*
+ * In the ia64_leave_kernel code path, we want the common case
+ * to go fast, which is why we may in certain cases get here
+ * from kernel mode. Just return without doing anything if so.
+ */
+ if (!user_mode(pt))
+ return 0;
+
+ if (!oldset)
+ oldset = &current->blocked;
+
+ if (pt->r10 != -1) {
+ /*
+ * A system calls has to be restarted only if one of
+ * the error codes ERESTARTNOHAND, ERESTARTSYS, or
+ * ERESTARTNOINTR is returned. If r10 isn't -1 then
+ * r8 doesn't hold an error code and we don't need to
+ * restart the syscall, so we set in_syscall to zero.
+ */
+ restart = 0;
+ }
+
+ for (;;) {
+ unsigned long signr;
+
+ spin_lock_irq(&current->sigmask_lock);
+ signr = dequeue_signal(&current->blocked, &info);
+ spin_unlock_irq(&current->sigmask_lock);
+
+ if (!signr)
+ break;
+
+ if ((current->flags & PF_PTRACED) && signr != SIGKILL) {
+ /* Let the debugger run. */
+ current->exit_code = signr;
+ set_current_state(TASK_STOPPED);
+ notify_parent(current, SIGCHLD);
+ schedule();
+ signr = current->exit_code;
+
+ /* We're back. Did the debugger cancel the sig? */
+ if (!signr)
+ continue;
+ current->exit_code = 0;
+
+ /* The debugger continued. Ignore SIGSTOP. */
+ if (signr == SIGSTOP)
+ continue;
+
+ /* Update the siginfo structure. Is this good? */
+ if (signr != info.si_signo) {
+ info.si_signo = signr;
+ info.si_errno = 0;
+ info.si_code = SI_USER;
+ info.si_pid = current->p_pptr->pid;
+ info.si_uid = current->p_pptr->uid;
+ }
+
+ /* If the (new) signal is now blocked, requeue it. */
+ if (sigismember(&current->blocked, signr)) {
+ send_sig_info(signr, &info, current);
+ continue;
+ }
+ }
+
+ ka = &current->sig->action[signr - 1];
+ if (ka->sa.sa_handler == SIG_IGN) {
+ if (signr != SIGCHLD)
+ continue;
+ /* Check for SIGCHLD: it's special. */
+ while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0)
+ /* nothing */;
+ continue;
+ }
+
+ if (ka->sa.sa_handler == SIG_DFL) {
+ int exit_code = signr;
+
+ /* Init gets no signals it doesn't want. */
+ if (current->pid == 1)
+ continue;
+
+ switch (signr) {
+ case SIGCONT: case SIGCHLD: case SIGWINCH:
+ continue;
+
+ case SIGTSTP: case SIGTTIN: case SIGTTOU:
+ if (is_orphaned_pgrp(current->pgrp))
+ continue;
+ /* FALLTHRU */
+
+ case SIGSTOP:
+ set_current_state(TASK_STOPPED);
+ current->exit_code = signr;
+ if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags
+ & SA_NOCLDSTOP))
+ notify_parent(current, SIGCHLD);
+ schedule();
+ continue;
+
+ case SIGQUIT: case SIGILL: case SIGTRAP:
+ case SIGABRT: case SIGFPE: case SIGSEGV:
+ case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ:
+ if (do_coredump(signr, pt))
+ exit_code |= 0x80;
+ /* FALLTHRU */
+
+ default:
+ lock_kernel();
+ sigaddset(&current->signal, signr);
+ recalc_sigpending(current);
+ current->flags |= PF_SIGNALED;
+ do_exit(exit_code);
+ /* NOTREACHED */
+ }
+ }
+
+ if (restart) {
+ switch (pt->r8) {
+ case ERESTARTSYS:
+ if ((ka->sa.sa_flags & SA_RESTART) == 0) {
+ case ERESTARTNOHAND:
+ pt->r8 = EINTR;
+ /* note: pt->r10 is already -1 */
+ break;
+ }
+ case ERESTARTNOINTR:
+ ia64_decrement_ip(pt);
+ }
+ }
+
+ /* Whee! Actually deliver the signal. If the
+ delivery failed, we need to continue to iterate in
+ this loop so we can deliver the SIGSEGV... */
+ if (handle_signal(signr, ka, &info, oldset, pt))
+ return 1;
+ }
+
+ /* Did we come from a system call? */
+ if (restart) {
+ /* Restart the system call - no handlers present */
+ if (pt->r8 == ERESTARTNOHAND ||
+ pt->r8 == ERESTARTSYS ||
+ pt->r8 == ERESTARTNOINTR) {
+ /*
+ * Note: the syscall number is in r15 which is
+ * saved in pt_regs so all we need to do here
+ * is adjust ip so that the "break"
+ * instruction gets re-executed.
+ */
+ ia64_decrement_ip(pt);
+ }
+ }
+ return 0;
+}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
new file mode 100644
index 000000000..48a3d68b4
--- /dev/null
+++ b/arch/ia64/kernel/smp.c
@@ -0,0 +1,777 @@
+/*
+ * SMP Support
+ *
+ * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Lots of stuff stolen from arch/alpha/kernel/smp.c
+ *
+ * 99/10/05 davidm Update to bring it in sync with new command-line processing scheme.
+ */
+#define __KERNEL_SYSCALLS__
+
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <asm/current.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_KDB
+#include <linux/kdb.h>
+void smp_kdb_interrupt (struct pt_regs* regs);
+void kdb_global(int cpuid);
+extern unsigned long smp_kdb_wait;
+extern int kdb_new_cpu;
+#endif
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+
+extern int cpu_idle(void * unused);
+extern void _start(void);
+
+extern int cpu_now_booting; /* Used by head.S to find idle task */
+extern unsigned long cpu_initialized; /* Bitmap of available cpu's */
+extern struct cpuinfo_ia64 cpu_data[NR_CPUS]; /* Duh... */
+
+spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
+
+#ifdef CONFIG_KDB
+unsigned long cpu_online_map = 1;
+#endif
+
+volatile int cpu_number_map[NR_CPUS] = { -1, }; /* SAPIC ID -> Logical ID */
+volatile int __cpu_logical_map[NR_CPUS] = { -1, }; /* logical ID -> SAPIC ID */
+int smp_num_cpus = 1;
+int bootstrap_processor = -1; /* SAPIC ID of BSP */
+int smp_threads_ready = 0; /* Set when the idlers are all forked */
+unsigned long ipi_base_addr = IPI_DEFAULT_BASE_ADDR; /* Base addr of IPI table */
+cycles_t cacheflush_time = 0;
+unsigned long ap_wakeup_vector = -1; /* External Int to use to wakeup AP's */
+static int max_cpus = -1; /* Command line */
+static unsigned long ipi_op[NR_CPUS];
+struct smp_call_struct {
+ void (*func) (void *info);
+ void *info;
+ long wait;
+ atomic_t unstarted_count;
+ atomic_t unfinished_count;
+};
+static struct smp_call_struct *smp_call_function_data;
+
+#ifdef CONFIG_KDB
+unsigned long smp_kdb_wait = 0; /* Bitmask of waiters */
+#endif
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+extern spinlock_t ivr_read_lock;
+#endif
+
+int use_xtp = 0; /* XXX */
+
+#define IPI_RESCHEDULE 0
+#define IPI_CALL_FUNC 1
+#define IPI_CPU_STOP 2
+#define IPI_KDB_INTERRUPT 4
+
+/*
+ * Setup routine for controlling SMP activation
+ *
+ * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
+ * activation entirely (the MPS table probe still happens, though).
+ *
+ * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
+ * greater than 0, limits the maximum number of CPUs activated in
+ * SMP mode to <NUM>.
+ */
+
+static int __init nosmp(char *str)
+{
+ max_cpus = 0;
+ return 1;
+}
+
+__setup("nosmp", nosmp);
+
+static int __init maxcpus(char *str)
+{
+ get_option(&str, &max_cpus);
+ return 1;
+}
+
+__setup("maxcpus=", maxcpus);
+
+/*
+ * Yoink this CPU from the runnable list...
+ */
+void
+halt_processor(void)
+{
+ clear_bit(smp_processor_id(), &cpu_initialized);
+ max_xtp();
+ __cli();
+ for (;;)
+ ;
+
+}
+
+void
+handle_IPI(int irq, void *dev_id, struct pt_regs *regs)
+{
+ int this_cpu = smp_processor_id();
+ unsigned long *pending_ipis = &ipi_op[this_cpu];
+ unsigned long ops;
+
+ /* Count this now; we may make a call that never returns. */
+ cpu_data[this_cpu].ipi_count++;
+
+ mb(); /* Order interrupt and bit testing. */
+ while ((ops = xchg(pending_ipis, 0)) != 0) {
+ mb(); /* Order bit clearing and data access. */
+ do {
+ unsigned long which;
+
+ which = ffz(~ops);
+ ops &= ~(1 << which);
+
+ switch (which) {
+ case IPI_RESCHEDULE:
+ /*
+ * Reschedule callback. Everything to be done is done by the
+ * interrupt return path.
+ */
+ break;
+
+ case IPI_CALL_FUNC:
+ {
+ struct smp_call_struct *data;
+ void (*func)(void *info);
+ void *info;
+ int wait;
+
+ data = smp_call_function_data;
+ func = data->func;
+ info = data->info;
+ wait = data->wait;
+
+ mb();
+ atomic_dec (&data->unstarted_count);
+
+ /* At this point the structure may be gone unless wait is true. */
+ (*func)(info);
+
+ /* Notify the sending CPU that the task is done. */
+ mb();
+ if (wait)
+ atomic_dec (&data->unfinished_count);
+ }
+ break;
+
+ case IPI_CPU_STOP:
+ halt_processor();
+ break;
+
+#ifdef CONFIG_KDB
+ case IPI_KDB_INTERRUPT:
+ smp_kdb_interrupt(regs);
+ break;
+#endif
+
+ default:
+ printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
+ break;
+ } /* Switch */
+ } while (ops);
+
+ mb(); /* Order data access and bit testing. */
+ }
+}
+
+static inline void
+send_IPI(int dest_cpu, unsigned char vector)
+{
+ unsigned long ipi_addr;
+ unsigned long ipi_data;
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ unsigned long flags;
+#endif
+
+ ipi_data = vector;
+ ipi_addr = ipi_base_addr | ((dest_cpu << 8) << 4); /* 16-bit SAPIC ID's; assume CPU bus 0 */
+ mb();
+
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ /*
+ * Disable IVR reads
+ */
+ spin_lock_irqsave(&ivr_read_lock, flags);
+ writeq(ipi_data, ipi_addr);
+ spin_unlock_irqrestore(&ivr_read_lock, flags);
+#else
+ writeq(ipi_data, ipi_addr);
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
+}
+
+static inline void
+send_IPI_single(int dest_cpu, int op)
+{
+
+ if (dest_cpu == -1)
+ return;
+
+ ipi_op[dest_cpu] |= (1 << op);
+ send_IPI(dest_cpu, IPI_IRQ);
+}
+
+static inline void
+send_IPI_allbutself(int op)
+{
+ int i;
+ int cpu_id = 0;
+
+ for (i = 0; i < smp_num_cpus; i++) {
+ cpu_id = __cpu_logical_map[i];
+ if (cpu_id != smp_processor_id())
+ send_IPI_single(cpu_id, op);
+ }
+}
+
+static inline void
+send_IPI_all(int op)
+{
+ int i;
+
+ for (i = 0; i < smp_num_cpus; i++)
+ send_IPI_single(__cpu_logical_map[i], op);
+}
+
+static inline void
+send_IPI_self(int op)
+{
+ send_IPI_single(smp_processor_id(), op);
+}
+
+void
+smp_send_reschedule(int cpu)
+{
+ send_IPI_single(cpu, IPI_RESCHEDULE);
+}
+
+void
+smp_send_stop(void)
+{
+ send_IPI_allbutself(IPI_CPU_STOP);
+}
+
+/*
+ * Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <retry> If true, keep retrying until ready.
+ * <wait> If true, wait until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code.
+ *
+ * Does not return until remote CPUs are nearly ready to execute <func>
+ * or are or have executed.
+ */
+
+int
+smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
+{
+ struct smp_call_struct data;
+ long timeout;
+ static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+
+ data.func = func;
+ data.info = info;
+ data.wait = wait;
+ atomic_set(&data.unstarted_count, smp_num_cpus - 1);
+ atomic_set(&data.unfinished_count, smp_num_cpus - 1);
+
+ if (retry) {
+ while (1) {
+ if (smp_call_function_data) {
+ schedule (); /* Give a mate a go */
+ continue;
+ }
+ spin_lock (&lock);
+ if (smp_call_function_data) {
+ spin_unlock (&lock); /* Bad luck */
+ continue;
+ }
+ /* Mine, all mine! */
+ break;
+ }
+ }
+ else {
+ if (smp_call_function_data)
+ return -EBUSY;
+ spin_lock (&lock);
+ if (smp_call_function_data) {
+ spin_unlock (&lock);
+ return -EBUSY;
+ }
+ }
+
+ smp_call_function_data = &data;
+ spin_unlock (&lock);
+ data.func = func;
+ data.info = info;
+ atomic_set (&data.unstarted_count, smp_num_cpus - 1);
+ data.wait = wait;
+ if (wait)
+ atomic_set (&data.unfinished_count, smp_num_cpus - 1);
+
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_allbutself(IPI_CALL_FUNC);
+
+ /* Wait for response */
+ timeout = jiffies + HZ;
+ while ( (atomic_read (&data.unstarted_count) > 0) &&
+ time_before (jiffies, timeout) )
+ barrier ();
+ if (atomic_read (&data.unstarted_count) > 0) {
+ smp_call_function_data = NULL;
+ return -ETIMEDOUT;
+ }
+ if (wait)
+ while (atomic_read (&data.unfinished_count) > 0)
+ barrier ();
+ smp_call_function_data = NULL;
+ return 0;
+}
+
+/*
+ * Flush all other CPU's tlb and then mine. Do this with smp_call_function() as we
+ * want to ensure all TLB's flushed before proceeding.
+ *
+ * XXX: Is it OK to use the same ptc.e info on all cpus?
+ */
+void
+smp_flush_tlb_all(void)
+{
+ smp_call_function((void (*)(void *))__flush_tlb_all, NULL, 1, 1);
+ __flush_tlb_all();
+}
+
+/*
+ * Ideally sets up per-cpu profiling hooks. Doesn't do much now...
+ */
+static inline void __init
+smp_setup_percpu_timer(int cpuid)
+{
+ cpu_data[cpuid].prof_counter = 1;
+ cpu_data[cpuid].prof_multiplier = 1;
+}
+
+void
+smp_do_timer(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+ int user = user_mode(regs);
+ struct cpuinfo_ia64 *data = &cpu_data[cpu];
+
+ extern void update_one_process(struct task_struct *, unsigned long, unsigned long,
+ unsigned long, int);
+ if (!--data->prof_counter) {
+ irq_enter(cpu, TIMER_IRQ);
+
+ update_one_process(current, 1, user, !user, cpu);
+ if (current->pid) {
+ if (--current->counter < 0) {
+ current->counter = 0;
+ current->need_resched = 1;
+ }
+
+ if (user) {
+ if (current->priority < DEF_PRIORITY) {
+ kstat.cpu_nice++;
+ kstat.per_cpu_nice[cpu]++;
+ } else {
+ kstat.cpu_user++;
+ kstat.per_cpu_user[cpu]++;
+ }
+ } else {
+ kstat.cpu_system++;
+ kstat.per_cpu_system[cpu]++;
+ }
+ }
+
+ data->prof_counter = data->prof_multiplier;
+ irq_exit(cpu, TIMER_IRQ);
+ }
+}
+
+
+/*
+ * Called by both boot and secondaries to move global data into
+ * per-processor storage.
+ */
+static inline void __init
+smp_store_cpu_info(int cpuid)
+{
+ struct cpuinfo_ia64 *c = &cpu_data[cpuid];
+
+ identify_cpu(c);
+}
+
+/*
+ * SAL shoves the AP's here when we start them. Physical mode, no kernel TR,
+ * no RRs set, better than even chance that psr is bogus. Fix all that and
+ * call _start. In effect, pretend to be lilo.
+ *
+ * Stolen from lilo_start.c. Thanks David!
+ */
+void
+start_ap(void)
+{
+ unsigned long flags;
+
+ /*
+ * Install a translation register that identity maps the
+ * kernel's 256MB page(s).
+ */
+ ia64_clear_ic(flags);
+ ia64_set_rr( 0, (0x1000 << 8) | (_PAGE_SIZE_1M << 2));
+ ia64_set_rr(PAGE_OFFSET, (ia64_rid(0, PAGE_OFFSET) << 8) | (_PAGE_SIZE_256M << 2));
+ ia64_itr(0x3, 1, PAGE_OFFSET,
+ pte_val(mk_pte_phys(0, __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX))),
+ _PAGE_SIZE_256M);
+
+ flags = (IA64_PSR_IT | IA64_PSR_IC | IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH |
+ IA64_PSR_BN);
+
+ asm volatile ("movl r8 = 1f\n"
+ ";;\n"
+ "mov cr.ipsr=%0\n"
+ "mov cr.iip=r8\n"
+ "mov cr.ifs=r0\n"
+ ";;\n"
+ "rfi;;"
+ "1:\n"
+ "movl r1 = __gp" :: "r"(flags) : "r8");
+ _start();
+}
+
+
+/*
+ * AP's start using C here.
+ */
+void __init
+smp_callin(void)
+{
+ extern void ia64_rid_init(void);
+ extern void ia64_init_itm(void);
+ extern void ia64_cpu_local_tick(void);
+
+ ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP);
+ ia64_set_fpu_owner(0);
+ ia64_rid_init(); /* initialize region ids */
+
+ cpu_init();
+ __flush_tlb_all();
+
+ smp_store_cpu_info(smp_processor_id());
+ smp_setup_percpu_timer(smp_processor_id());
+
+ while (!smp_threads_ready)
+ mb();
+
+ normal_xtp();
+
+ /* setup the CPU local timer tick */
+ ia64_cpu_local_tick();
+
+ /* Disable all local interrupts */
+ ia64_set_lrr0(0, 1);
+ ia64_set_lrr1(0, 1);
+
+ __sti(); /* Interrupts have been off till now. */
+ cpu_idle(NULL);
+}
+
+/*
+ * Create the idle task for a new AP. DO NOT use kernel_thread() because
+ * that could end up calling schedule() in the ia64_leave_kernel exit
+ * path in which case the new idle task could get scheduled before we
+ * had a chance to remove it from the run-queue...
+ */
+static int __init
+fork_by_hand(void)
+{
+ /*
+ * Don't care about the usp and regs settings since we'll never
+ * reschedule the forked task.
+ */
+ return do_fork(CLONE_VM|CLONE_PID, 0, 0);
+}
+
+/*
+ * Bring one cpu online.
+ *
+ * NB: cpuid is the CPU BUS-LOCAL ID, not the entire SAPIC ID. See asm/smp.h.
+ */
+static int __init
+smp_boot_one_cpu(int cpuid, int cpunum)
+{
+ struct task_struct *idle;
+ long timeout;
+
+ /*
+ * Create an idle task for this CPU. Note that the address we
+ * give to kernel_thread is irrelevant -- it's going to start
+ * where OS_BOOT_RENDEVZ vector in SAL says to start. But
+ * this gets all the other task-y sort of data structures set
+ * up like we wish. We need to pull the just created idle task
+ * off the run queue and stuff it into the init_tasks[] array.
+ * Sheesh . . .
+ */
+ if (fork_by_hand() < 0)
+ panic("failed fork for CPU %d", cpuid);
+ /*
+ * We remove it from the pidhash and the runqueue
+ * once we got the process:
+ */
+ idle = init_task.prev_task;
+ if (!idle)
+ panic("No idle process for CPU %d", cpuid);
+ init_tasks[cpunum] = idle;
+ del_from_runqueue(idle);
+ unhash_process(idle);
+
+ /* Schedule the first task manually. */
+ idle->processor = cpuid;
+ idle->has_cpu = 1;
+
+ /* Let _start know what logical CPU we're booting (offset into init_tasks[] */
+ cpu_now_booting = cpunum;
+
+ /* Kick the AP in the butt */
+ send_IPI(cpuid, ap_wakeup_vector);
+ ia64_srlz_i();
+ mb();
+
+ /*
+ * OK, wait a bit for that CPU to finish staggering about. smp_callin() will
+ * call cpu_init() which will set a bit for this AP. When that bit flips, the AP
+ * is waiting for smp_threads_ready to be 1 and we can move on.
+ */
+ for (timeout = 0; timeout < 100000; timeout++) {
+ if (test_bit(cpuid, &cpu_initialized))
+ goto alive;
+ udelay(10);
+ barrier();
+ }
+
+ printk(KERN_ERR "SMP: Processor %d is stuck.\n", cpuid);
+ return -1;
+
+alive:
+ /* Remember the AP data */
+ cpu_number_map[cpuid] = cpunum;
+#ifdef CONFIG_KDB
+ cpu_online_map |= (1<<cpunum);
+ printk ("DEBUGGER: cpu_online_map = 0x%08x\n", cpu_online_map);
+#endif
+ __cpu_logical_map[cpunum] = cpuid;
+ return 0;
+}
+
+
+
+/*
+ * Called by smp_init bring all the secondaries online and hold them.
+ * XXX: this is ACPI specific; it uses "magic" variables exported from acpi.c
+ * to 'discover' the AP's. Blech.
+ */
+void __init
+smp_boot_cpus(void)
+{
+ int i, cpu_count = 1;
+ unsigned long bogosum;
+ int sapic_id;
+ extern int acpi_cpus;
+ extern int acpi_apic_map[32];
+
+ /* Take care of some initial bookkeeping. */
+ memset(&cpu_number_map, -1, sizeof(cpu_number_map));
+ memset(&__cpu_logical_map, -1, sizeof(__cpu_logical_map));
+ memset(&ipi_op, 0, sizeof(ipi_op));
+
+ /* Setup BSP mappings */
+ cpu_number_map[bootstrap_processor] = 0;
+ __cpu_logical_map[0] = bootstrap_processor;
+ current->processor = bootstrap_processor;
+
+ /* Mark BSP booted and get active_mm context */
+ cpu_init();
+
+ /* reset XTP for interrupt routing */
+ normal_xtp();
+
+ /* And generate an entry in cpu_data */
+ smp_store_cpu_info(bootstrap_processor);
+#if 0
+ smp_tune_scheduling();
+#endif
+ smp_setup_percpu_timer(bootstrap_processor);
+
+ init_idle();
+
+ /* Nothing to do when told not to. */
+ if (max_cpus == 0) {
+ printk(KERN_INFO "SMP mode deactivated.\n");
+ return;
+ }
+
+ if (acpi_cpus > 1) {
+ printk(KERN_INFO "SMP: starting up secondaries.\n");
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (acpi_apic_map[i] == -1 ||
+ acpi_apic_map[i] == bootstrap_processor << 8) /* XXX Fix me Walt */
+ continue;
+
+ /*
+ * IA64 SAPIC ID's are 16-bits. See asm/smp.h for more info
+ */
+ sapic_id = acpi_apic_map[i] >> 8;
+ if (smp_boot_one_cpu(sapic_id, cpu_count))
+ continue;
+
+ cpu_count++; /* Count good CPUs only... */
+ }
+ }
+
+ if (cpu_count == 1) {
+ printk(KERN_ERR "SMP: Bootstrap processor only.\n");
+ return;
+ }
+
+ bogosum = 0;
+ for (i = 0; i < NR_CPUS; i++) {
+ if (cpu_initialized & (1L << i))
+ bogosum += cpu_data[i].loops_per_sec;
+ }
+
+ printk(KERN_INFO "SMP: Total of %d processors activated "
+ "(%lu.%02lu BogoMIPS).\n",
+ cpu_count, (bogosum + 2500) / 500000,
+ ((bogosum + 2500) / 5000) % 100);
+
+ smp_num_cpus = cpu_count;
+}
+
+/*
+ * Called from main.c by each AP.
+ */
+void __init
+smp_commence(void)
+{
+ mb();
+}
+
+/*
+ * Not used; part of the i386 bringup
+ */
+void __init
+initialize_secondary(void)
+{
+}
+
+int __init
+setup_profiling_timer(unsigned int multiplier)
+{
+ return -EINVAL;
+}
+
+/*
+ * Assume that CPU's have been discovered by some platform-dependant
+ * interface. For SoftSDV/Lion, that would be ACPI.
+ *
+ * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
+ *
+ * So this just gets the BSP SAPIC ID and print's it out. Dull, huh?
+ *
+ * Not anymore. This also registers the AP OS_MC_REDVEZ address with SAL.
+ */
+void __init
+init_smp_config(void)
+{
+ struct fptr {
+ unsigned long fp;
+ unsigned long gp;
+ } *ap_startup;
+ long sal_ret;
+
+ /* Grab the BSP ID */
+ bootstrap_processor = hard_smp_processor_id();
+
+ /* Tell SAL where to drop the AP's. */
+ ap_startup = (struct fptr *) start_ap;
+ sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
+ __pa(ap_startup->fp), __pa(ap_startup->gp), 0,
+ 0, 0, 0);
+ if (sal_ret < 0) {
+ printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n", ia64_sal_strerror(sal_ret));
+ printk(" Forcing UP mode\n");
+ smp_num_cpus = 1;
+ }
+
+}
+
+#ifdef CONFIG_KDB
+void smp_kdb_stop (int all, struct pt_regs* regs)
+{
+ if (all)
+ {
+ printk ("Sending IPI to all on CPU %i\n", smp_processor_id ());
+ smp_kdb_wait = 0xffffffff;
+ clear_bit (smp_processor_id(), &smp_kdb_wait);
+ send_IPI_allbutself (IPI_KDB_INTERRUPT);
+ }
+ else
+ {
+ printk ("Sending IPI to self on CPU %i\n",
+ smp_processor_id ());
+ set_bit (smp_processor_id(), &smp_kdb_wait);
+ clear_bit (__cpu_logical_map[kdb_new_cpu], &smp_kdb_wait);
+ smp_kdb_interrupt (regs);
+ }
+}
+
+void smp_kdb_interrupt (struct pt_regs* regs)
+{
+ printk ("kdb: IPI on CPU %i with mask 0x%08x\n",
+ smp_processor_id (), smp_kdb_wait);
+
+ /* All CPUs spin here forever */
+ while (test_bit (smp_processor_id(), &smp_kdb_wait));
+
+ /* Enter KDB on CPU selected by KDB on the last CPU */
+ if (__cpu_logical_map[kdb_new_cpu] == smp_processor_id ())
+ {
+ kdb (KDB_REASON_SWITCH, 0, regs);
+ }
+}
+
+#endif
+
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
new file mode 100644
index 000000000..18a498a09
--- /dev/null
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -0,0 +1,216 @@
+/*
+ * This file contains various system calls that have different calling
+ * conventions on different platforms.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/file.h> /* doh, must come after sched.h... */
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+
+asmlinkage long
+ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6,
+ long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ extern long sys_getpriority (int, int);
+ long prio;
+
+ prio = sys_getpriority(which, who);
+ if (prio >= 0) {
+ regs->r8 = 0; /* ensure negative priority is not mistaken as error code */
+ prio = 20 - prio;
+ }
+ return prio;
+}
+
+asmlinkage unsigned long
+sys_getpagesize (void)
+{
+ return PAGE_SIZE;
+}
+
+asmlinkage unsigned long
+ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg5, long arg6,
+ long arg7, long stack)
+{
+ extern int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr);
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ unsigned long raddr;
+ int retval;
+
+ retval = sys_shmat(shmid, shmaddr, shmflg, &raddr);
+ if (retval < 0)
+ return retval;
+
+ regs->r8 = 0; /* ensure negative addresses are not mistaken as an error code */
+ return raddr;
+}
+
+asmlinkage unsigned long
+ia64_brk (long brk, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7, long stack)
+{
+ extern unsigned long sys_brk (unsigned long brk);
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ unsigned long retval;
+
+ retval = sys_brk(brk);
+
+ regs->r8 = 0; /* ensure large retval isn't mistaken as error code */
+ return retval;
+}
+
+/*
+ * On IA-64, we return the two file descriptors in ret0 and ret1 (r8
+ * and r9) as this is faster than doing a copy_to_user().
+ */
+asmlinkage long
+sys_pipe (long arg0, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ int fd[2];
+ int retval;
+
+ lock_kernel();
+ retval = do_pipe(fd);
+ if (retval)
+ goto out;
+ retval = fd[0];
+ regs->r9 = fd[1];
+ out:
+ unlock_kernel();
+ return retval;
+}
+
+static inline unsigned long
+do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
+{
+ struct file *file = 0;
+
+ /*
+ * A zero mmap always succeeds in Linux, independent of
+ * whether or not the remaining arguments are valid.
+ */
+ if (PAGE_ALIGN(len) == 0)
+ return addr;
+
+#ifdef notyet
+ /* Don't permit mappings that would cross a region boundary: */
+ region_start = IA64_GET_REGION(addr);
+ region_end = IA64_GET_REGION(addr + len);
+ if (region_start != region_end)
+ return -EINVAL;
+
+ <<x??x>>
+#endif
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+ if (!(flags & MAP_ANONYMOUS)) {
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+ }
+
+ down(&current->mm->mmap_sem);
+ lock_kernel();
+
+ addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+
+ unlock_kernel();
+ up(&current->mm->mmap_sem);
+
+ if (file)
+ fput(file);
+ return addr;
+}
+
+/*
+ * mmap2() is like mmap() except that the offset is expressed in units
+ * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces
+ * of) files that are larger than the address space of the CPU.
+ */
+asmlinkage unsigned long
+sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff,
+ long arg6, long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+
+ addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
+ if (!IS_ERR(addr))
+ regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */
+ return addr;
+}
+
+asmlinkage unsigned long
+sys_mmap (unsigned long addr, unsigned long len, int prot, int flags,
+ int fd, long off, long arg6, long arg7, long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+
+ addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+ if (!IS_ERR(addr))
+ regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */
+ return addr;
+}
+
+asmlinkage long
+sys_ioperm (unsigned long from, unsigned long num, int on)
+{
+ printk(KERN_ERR "sys_ioperm(from=%lx, num=%lx, on=%d)\n", from, num, on);
+ return -EIO;
+}
+
+asmlinkage long
+sys_iopl (int level, long arg1, long arg2, long arg3)
+{
+ lock_kernel();
+ printk(KERN_ERR "sys_iopl(level=%d)!\n", level);
+ unlock_kernel();
+ return -ENOSYS;
+}
+
+asmlinkage long
+sys_vm86 (long arg0, long arg1, long arg2, long arg3)
+{
+ lock_kernel();
+ printk(KERN_ERR "sys_vm86(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3);
+ unlock_kernel();
+ return -ENOSYS;
+}
+
+asmlinkage long
+sys_modify_ldt (long arg0, long arg1, long arg2, long arg3)
+{
+ lock_kernel();
+ printk(KERN_ERR "sys_modify_ldt(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3);
+ unlock_kernel();
+ return -ENOSYS;
+}
+
+#ifndef CONFIG_PCI
+
+asmlinkage long
+sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+ void *buf)
+{
+ return -ENOSYS;
+}
+
+asmlinkage long
+sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
+ void *buf)
+{
+ return -ENOSYS;
+}
+
+
+#endif /* CONFIG_PCI */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
new file mode 100644
index 000000000..7c5ace740
--- /dev/null
+++ b/arch/ia64/kernel/time.c
@@ -0,0 +1,290 @@
+/*
+ * linux/arch/ia64/kernel/time.c
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999-2000 David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ * Copyright (C) 1999-2000 VA Linux Systems
+ * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+
+#include <asm/delay.h>
+#include <asm/efi.h>
+#include <asm/irq.h>
+#include <asm/machvec.h>
+#include <asm/ptrace.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+
+extern rwlock_t xtime_lock;
+extern volatile unsigned long lost_ticks;
+
+#ifdef CONFIG_IA64_DEBUG_IRQ
+
+unsigned long last_cli_ip;
+
+#endif
+
+static struct {
+ unsigned long delta;
+ unsigned long next[NR_CPUS];
+} itm;
+
+static void
+do_profile (unsigned long ip)
+{
+ extern char _stext;
+
+ if (prof_buffer && current->pid) {
+ ip -= (unsigned long) &_stext;
+ ip >>= prof_shift;
+ /*
+ * Don't ignore out-of-bounds IP values silently,
+ * put them into the last histogram slot, so if
+ * present, they will show up as a sharp peak.
+ */
+ if (ip > prof_len - 1)
+ ip = prof_len - 1;
+
+ atomic_inc((atomic_t *) &prof_buffer[ip]);
+ }
+}
+
+/*
+ * Return the number of micro-seconds that elapsed since the last
+ * update to jiffy. The xtime_lock must be at least read-locked when
+ * calling this routine.
+ */
+static inline unsigned long
+gettimeoffset (void)
+{
+ unsigned long now = ia64_get_itc();
+ unsigned long elapsed_cycles, lost;
+
+ elapsed_cycles = now - (itm.next[smp_processor_id()] - itm.delta);
+
+ lost = lost_ticks;
+ if (lost)
+ elapsed_cycles += lost*itm.delta;
+
+ return (elapsed_cycles*my_cpu_data.usec_per_cyc) >> IA64_USEC_PER_CYC_SHIFT;
+}
+
+void
+do_settimeofday (struct timeval *tv)
+{
+ write_lock_irq(&xtime_lock);
+ {
+ /*
+ * This is revolting. We need to set the xtime.tv_usec
+ * correctly. However, the value in this location is
+ * is value at the last tick. Discover what
+ * correction gettimeofday would have done, and then
+ * undo it!
+ */
+ tv->tv_usec -= gettimeoffset();
+ while (tv->tv_usec < 0) {
+ tv->tv_usec += 1000000;
+ tv->tv_sec--;
+ }
+
+ xtime = *tv;
+ time_adjust = 0; /* stop active adjtime() */
+ time_status |= STA_UNSYNC;
+ time_maxerror = NTP_PHASE_LIMIT;
+ time_esterror = NTP_PHASE_LIMIT;
+ }
+ write_unlock_irq(&xtime_lock);
+}
+
+void
+do_gettimeofday (struct timeval *tv)
+{
+ unsigned long flags, usec, sec;
+
+ read_lock_irqsave(&xtime_lock, flags);
+ {
+ usec = gettimeoffset();
+
+ sec = xtime.tv_sec;
+ usec += xtime.tv_usec;
+ }
+ read_unlock_irqrestore(&xtime_lock, flags);
+
+ while (usec >= 1000000) {
+ usec -= 1000000;
+ ++sec;
+ }
+
+ tv->tv_sec = sec;
+ tv->tv_usec = usec;
+}
+
+static void
+timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ static unsigned long last_time;
+ static unsigned char count;
+ int cpu = smp_processor_id();
+
+ /*
+ * Here we are in the timer irq handler. We have irqs locally
+ * disabled, but we don't know if the timer_bh is running on
+ * another CPU. We need to avoid to SMP race by acquiring the
+ * xtime_lock.
+ */
+ write_lock(&xtime_lock);
+ while (1) {
+ /* do kernel PC profiling here. */
+ if (!user_mode(regs))
+ do_profile(regs->cr_iip);
+
+#ifdef CONFIG_SMP
+ smp_do_timer(regs);
+ if (smp_processor_id() == bootstrap_processor)
+ do_timer(regs);
+#else
+ do_timer(regs);
+#endif
+
+ itm.next[cpu] += itm.delta;
+ /*
+ * There is a race condition here: to be on the "safe"
+ * side, we process timer ticks until itm.next is
+ * ahead of the itc by at least half the timer
+ * interval. This should give us enough time to set
+ * the new itm value without losing a timer tick.
+ */
+ if (time_after(itm.next[cpu], ia64_get_itc() + itm.delta/2)) {
+ ia64_set_itm(itm.next[cpu]);
+ break;
+ }
+
+#if !(defined(CONFIG_IA64_SOFTSDV_HACKS) && defined(CONFIG_SMP))
+ /*
+ * SoftSDV in SMP mode is _slow_, so we do "loose" ticks,
+ * but it's really OK...
+ */
+ if (count > 0 && jiffies - last_time > 5*HZ)
+ count = 0;
+ if (count++ == 0) {
+ last_time = jiffies;
+ printk("Lost clock tick on CPU %d (now=%lx, next=%lx)!!\n",
+ cpu, ia64_get_itc(), itm.next[cpu]);
+# ifdef CONFIG_IA64_DEBUG_IRQ
+ printk("last_cli_ip=%lx\n", last_cli_ip);
+# endif
+ }
+#endif
+ }
+ write_unlock(&xtime_lock);
+}
+
+/*
+ * Encapsulate access to the itm structure for SMP.
+ */
+void __init
+ia64_cpu_local_tick(void)
+{
+ /* arrange for the cycle counter to generate a timer interrupt: */
+ ia64_set_itv(TIMER_IRQ, 0);
+ ia64_set_itc(0);
+ itm.next[smp_processor_id()] = ia64_get_itc() + itm.delta;
+ ia64_set_itm(itm.next[smp_processor_id()]);
+}
+
+void __init
+ia64_init_itm (void)
+{
+ unsigned long platform_base_freq, itc_freq, drift;
+ struct pal_freq_ratio itc_ratio, proc_ratio;
+ long status;
+
+ /*
+ * According to SAL v2.6, we need to use a SAL call to determine the
+ * platform base frequency and then a PAL call to determine the
+ * frequency ratio between the ITC and the base frequency.
+ */
+ status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, &platform_base_freq, &drift);
+ if (status != 0) {
+ printk("SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
+ } else {
+ status = ia64_pal_freq_ratios(&proc_ratio, 0, &itc_ratio);
+ if (status != 0)
+ printk("PAL_FREQ_RATIOS failed with status=%ld\n", status);
+ }
+ if (status != 0) {
+ /* invent "random" values */
+ printk("SAL/PAL failed to obtain frequency info---inventing reasonably values\n");
+ platform_base_freq = 100000000;
+ itc_ratio.num = 3;
+ itc_ratio.den = 1;
+ }
+#if defined(CONFIG_IA64_LION_HACKS)
+ /* Our Lion currently returns base freq 104.857MHz, which
+ ain't right (it really is 100MHz). */
+ printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n",
+ platform_base_freq, itc_ratio.num, itc_ratio.den,
+ proc_ratio.num, proc_ratio.den);
+ platform_base_freq = 100000000;
+#elif 0 && defined(CONFIG_IA64_BIGSUR_HACKS)
+ /* BigSur with 991020 firmware returned itc-ratio=9/2 and base
+ freq 75MHz, which wasn't right. The 991119 firmware seems
+ to return the right values, so this isn't necessary
+ anymore... */
+ printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n",
+ platform_base_freq, itc_ratio.num, itc_ratio.den,
+ proc_ratio.num, proc_ratio.den);
+ platform_base_freq = 100000000;
+ proc_ratio.num = 5; proc_ratio.den = 1;
+ itc_ratio.num = 5; itc_ratio.den = 1;
+#elif defined(CONFIG_IA64_SOFTSDV_HACKS)
+ platform_base_freq = 10000000;
+ proc_ratio.num = 4; proc_ratio.den = 1;
+ itc_ratio.num = 4; itc_ratio.den = 1;
+#else
+ if (platform_base_freq < 40000000) {
+ printk("Platform base frequency %lu bogus---resetting to 75MHz!\n",
+ platform_base_freq);
+ platform_base_freq = 75000000;
+ }
+#endif
+ if (!proc_ratio.den)
+ proc_ratio.num = 1; /* avoid division by zero */
+ if (!itc_ratio.den)
+ itc_ratio.num = 1; /* avoid division by zero */
+
+ itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
+ itm.delta = itc_freq / HZ;
+ printk("timer: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, ITC freq=%lu.%03luMHz\n",
+ platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
+ itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
+
+ my_cpu_data.proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
+ my_cpu_data.itc_freq = itc_freq;
+ my_cpu_data.cyc_per_usec = itc_freq / 1000000;
+ my_cpu_data.usec_per_cyc = (1000000UL << IA64_USEC_PER_CYC_SHIFT) / itc_freq;
+
+ /* Setup the CPU local timer tick */
+ ia64_cpu_local_tick();
+}
+
+void __init
+time_init (void)
+{
+ /*
+ * Request the IRQ _before_ doing anything to cause that
+ * interrupt to be posted.
+ */
+ if (request_irq(TIMER_IRQ, timer_interrupt, 0, "timer", NULL))
+ panic("Could not allocate timer IRQ!");
+
+ efi_gettimeofday(&xtime);
+ ia64_init_itm();
+}
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
new file mode 100644
index 000000000..c242622ec
--- /dev/null
+++ b/arch/ia64/kernel/traps.c
@@ -0,0 +1,423 @@
+/*
+ * Architecture-specific trap handling.
+ *
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/*
+ * The fpu_fault() handler needs to be able to access and update all
+ * floating point registers. Those saved in pt_regs can be accessed
+ * through that structure, but those not saved, will be accessed
+ * directly. To make this work, we need to ensure that the compiler
+ * does not end up using a preserved floating point register on its
+ * own. The following achieves this by declaring preserved registers
+ * that are not marked as "fixed" as global register variables.
+ */
+register double f2 asm ("f2"); register double f3 asm ("f3");
+register double f4 asm ("f4"); register double f5 asm ("f5");
+
+register long f16 asm ("f16"); register long f17 asm ("f17");
+register long f18 asm ("f18"); register long f19 asm ("f19");
+register long f20 asm ("f20"); register long f21 asm ("f21");
+register long f22 asm ("f22"); register long f23 asm ("f23");
+
+register double f24 asm ("f24"); register double f25 asm ("f25");
+register double f26 asm ("f26"); register double f27 asm ("f27");
+register double f28 asm ("f28"); register double f29 asm ("f29");
+register double f30 asm ("f30"); register double f31 asm ("f31");
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_KDB
+# include <linux/kdb.h>
+#endif
+
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+#include <asm/fpswa.h>
+
+static fpswa_interface_t *fpswa_interface;
+
+void __init
+trap_init (void)
+{
+ printk("fpswa interface at %lx\n", ia64_boot_param.fpswa);
+ if (ia64_boot_param.fpswa) {
+#define OLD_FIRMWARE
+#ifdef OLD_FIRMWARE
+ /*
+ * HACK to work around broken firmware. This code
+ * applies the label fixup to the FPSWA interface and
+ * works both with old and new (fixed) firmware.
+ */
+ unsigned long addr = (unsigned long) __va(ia64_boot_param.fpswa);
+ unsigned long gp_val = *(unsigned long *)(addr + 8);
+
+ /* go indirect and indexed to get table address */
+ addr = gp_val;
+ gp_val = *(unsigned long *)(addr + 8);
+
+ while (gp_val == *(unsigned long *)(addr + 8)) {
+ *(unsigned long *)addr |= PAGE_OFFSET;
+ *(unsigned long *)(addr + 8) |= PAGE_OFFSET;
+ addr += 16;
+ }
+#endif
+ /* FPSWA fixup: make the interface pointer a kernel virtual address: */
+ fpswa_interface = __va(ia64_boot_param.fpswa);
+ }
+}
+
+void
+die_if_kernel (char *str, struct pt_regs *regs, long err)
+{
+ if (user_mode(regs)) {
+#if 1
+ /* XXX for debugging only */
+ printk ("!!die_if_kernel: %s(%d): %s %ld\n",
+ current->comm, current->pid, str, err);
+ show_regs(regs);
+#endif
+ return;
+ }
+
+ printk("%s[%d]: %s %ld\n", current->comm, current->pid, str, err);
+
+#ifdef CONFIG_KDB
+ while (1) {
+ kdb(KDB_REASON_PANIC, 0, regs);
+ printk("Cant go anywhere from Panic!\n");
+ }
+#endif
+
+ show_regs(regs);
+
+ if (current->thread.flags & IA64_KERNEL_DEATH) {
+ printk("die_if_kernel recursion detected.\n");
+ sti();
+ while (1);
+ }
+ current->thread.flags |= IA64_KERNEL_DEATH;
+ do_exit(SIGSEGV);
+}
+
+void
+ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
+{
+ siginfo_t siginfo;
+
+ /* gdb uses a break number of 0xccccc for debug breakpoints: */
+ if (break_num != 0xccccc)
+ die_if_kernel("Bad break", regs, break_num);
+
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_errno = break_num; /* XXX is it legal to abuse si_errno like this? */
+ siginfo.si_code = TRAP_BRKPT;
+ send_sig_info(SIGTRAP, &siginfo, current);
+}
+
+/*
+ * Unimplemented system calls. This is called only for stuff that
+ * we're supposed to implement but haven't done so yet. Everything
+ * else goes to sys_ni_syscall.
+ */
+asmlinkage long
+ia64_ni_syscall (unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5, unsigned long arg6, unsigned long arg7,
+ unsigned long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+
+ printk("<sc%ld(%lx,%lx,%lx,%lx)>\n", regs->r15, arg0, arg1, arg2, arg3);
+ return -ENOSYS;
+}
+
+/*
+ * disabled_fp_fault() is called when a user-level process attempts to
+ * access one of the registers f32..f127 while it doesn't own the
+ * fp-high register partition. When this happens, we save the current
+ * fph partition in the task_struct of the fpu-owner (if necessary)
+ * and then load the fp-high partition of the current task (if
+ * necessary).
+ */
+static inline void
+disabled_fph_fault (struct pt_regs *regs)
+{
+ struct task_struct *fpu_owner = ia64_get_fpu_owner();
+
+ regs->cr_ipsr &= ~(IA64_PSR_DFH | IA64_PSR_MFH);
+ if (fpu_owner != current) {
+ ia64_set_fpu_owner(current);
+
+ if (fpu_owner && ia64_psr(ia64_task_regs(fpu_owner))->mfh) {
+ fpu_owner->thread.flags |= IA64_THREAD_FPH_VALID;
+ __ia64_save_fpu(fpu_owner->thread.fph);
+ }
+ if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) {
+ __ia64_load_fpu(current->thread.fph);
+ } else {
+ __ia64_init_fpu();
+ }
+ }
+}
+
+static inline int
+fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs,
+ struct pt_regs *regs)
+{
+ fp_state_t fp_state;
+ fpswa_ret_t ret;
+#ifdef FPSWA_BUG
+ struct ia64_fpreg f6_15[10];
+#endif
+
+ if (!fpswa_interface)
+ return -1;
+
+ memset(&fp_state, 0, sizeof(fp_state_t));
+
+ /*
+ * compute fp_state. only FP registers f6 - f11 are used by the
+ * kernel, so set those bits in the mask and set the low volatile
+ * pointer to point to these registers.
+ */
+ fp_state.bitmask_low64 = 0xffc0; /* bit6..bit15 */
+#ifndef FPSWA_BUG
+ fp_state.fp_state_low_volatile = &regs->f6;
+#else
+ f6_15[0] = regs->f6;
+ f6_15[1] = regs->f7;
+ f6_15[2] = regs->f8;
+ f6_15[3] = regs->f9;
+ __asm__ ("stf.spill %0=f10" : "=m"(f6_15[4]));
+ __asm__ ("stf.spill %0=f11" : "=m"(f6_15[5]));
+ __asm__ ("stf.spill %0=f12" : "=m"(f6_15[6]));
+ __asm__ ("stf.spill %0=f13" : "=m"(f6_15[7]));
+ __asm__ ("stf.spill %0=f14" : "=m"(f6_15[8]));
+ __asm__ ("stf.spill %0=f15" : "=m"(f6_15[9]));
+ fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) f6_15;
+#endif
+ /*
+ * unsigned long (*EFI_FPSWA) (
+ * unsigned long trap_type,
+ * void *Bundle,
+ * unsigned long *pipsr,
+ * unsigned long *pfsr,
+ * unsigned long *pisr,
+ * unsigned long *ppreds,
+ * unsigned long *pifs,
+ * void *fp_state);
+ */
+ ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle,
+ (unsigned long *) ipsr, (unsigned long *) fpsr,
+ (unsigned long *) isr, (unsigned long *) pr,
+ (unsigned long *) ifs, &fp_state);
+#ifdef FPSWA_BUG
+ __asm__ ("ldf.fill f10=%0" :: "m"(f6_15[4]));
+ __asm__ ("ldf.fill f11=%0" :: "m"(f6_15[5]));
+ __asm__ ("ldf.fill f12=%0" :: "m"(f6_15[6]));
+ __asm__ ("ldf.fill f13=%0" :: "m"(f6_15[7]));
+ __asm__ ("ldf.fill f14=%0" :: "m"(f6_15[8]));
+ __asm__ ("ldf.fill f15=%0" :: "m"(f6_15[9]));
+ regs->f6 = f6_15[0];
+ regs->f7 = f6_15[1];
+ regs->f8 = f6_15[2];
+ regs->f9 = f6_15[3];
+#endif
+ return ret.status;
+}
+
+/*
+ * Handle floating-point assist faults and traps.
+ */
+static int
+handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
+{
+ long exception, bundle[2];
+ unsigned long fault_ip;
+ static int fpu_swa_count = 0;
+ static unsigned long last_time;
+
+ fault_ip = regs->cr_iip;
+ if (!fp_fault && (ia64_psr(regs)->ri == 0))
+ fault_ip -= 16;
+ if (copy_from_user(bundle, (void *) fault_ip, sizeof(bundle)))
+ return -1;
+
+ if (fpu_swa_count > 5 && jiffies - last_time > 5*HZ)
+ fpu_swa_count = 0;
+ if (++fpu_swa_count < 5) {
+ last_time = jiffies;
+ printk("%s(%d): floating-point assist fault at ip %016lx\n",
+ current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri);
+ }
+
+ exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
+ &regs->cr_ifs, regs);
+ if (fp_fault) {
+ if (exception == 0) {
+ /* emulation was successful */
+ ia64_increment_ip(regs);
+ } else if (exception == -1) {
+ printk("handle_fpu_swa: fp_emulate() returned -1\n");
+ return -2;
+ } else {
+ /* is next instruction a trap? */
+ if (exception & 2) {
+ ia64_increment_ip(regs);
+ }
+ return -1;
+ }
+ } else {
+ if (exception == -1) {
+ printk("handle_fpu_swa: fp_emulate() returned -1\n");
+ return -2;
+ } else if (exception != 0) {
+ /* raise exception */
+ return -1;
+ }
+ }
+ return 0;
+}
+
+void
+ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
+ unsigned long iim, unsigned long itir, unsigned long arg5,
+ unsigned long arg6, unsigned long arg7, unsigned long stack)
+{
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ unsigned long code, error = isr;
+ struct siginfo siginfo;
+ char buf[128];
+ int result;
+ static const char *reason[] = {
+ "IA-64 Illegal Operation fault",
+ "IA-64 Privileged Operation fault",
+ "IA-64 Privileged Register fault",
+ "IA-64 Reserved Register/Field fault",
+ "Disabled Instruction Set Transition fault",
+ "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault",
+ "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12",
+ "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
+ };
+
+#if 0
+ /* this is for minimal trust debugging; yeah this kind of stuff is useful at times... */
+
+ if (vector != 25) {
+ static unsigned long last_time;
+ static char count;
+ unsigned long n = vector;
+ char buf[32], *cp;
+
+ if (count > 5 && jiffies - last_time > 5*HZ)
+ count = 0;
+
+ if (count++ < 5) {
+ last_time = jiffies;
+ cp = buf + sizeof(buf);
+ *--cp = '\0';
+ while (n) {
+ *--cp = "0123456789abcdef"[n & 0xf];
+ n >>= 4;
+ }
+ printk("<0x%s>", cp);
+ }
+ }
+#endif
+
+ switch (vector) {
+ case 24: /* General Exception */
+ code = (isr >> 4) & 0xf;
+ sprintf(buf, "General Exception: %s%s", reason[code],
+ (code == 3) ? ((isr & (1UL << 37))
+ ? " (RSE access)" : " (data access)") : "");
+#ifndef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ if (code == 8) {
+# ifdef CONFIG_IA64_PRINT_HAZARDS
+ printk("%016lx:possible hazard, pr = %016lx\n", regs->cr_iip, regs->pr);
+# endif
+ return;
+ }
+#endif
+ break;
+
+ case 25: /* Disabled FP-Register */
+ if (isr & 2) {
+ disabled_fph_fault(regs);
+ return;
+ }
+ sprintf(buf, "Disabled FPL fault---not supposed to happen!");
+ break;
+
+ case 29: /* Debug */
+ case 35: /* Taken Branch Trap */
+ case 36: /* Single Step Trap */
+ switch (vector) {
+ case 29: siginfo.si_code = TRAP_BRKPT; break;
+ case 35: siginfo.si_code = TRAP_BRANCH; break;
+ case 36: siginfo.si_code = TRAP_TRACE; break;
+ }
+ siginfo.si_signo = SIGTRAP;
+ siginfo.si_errno = 0;
+ force_sig_info(SIGTRAP, &siginfo, current);
+ return;
+
+ case 30: /* Unaligned fault */
+ sprintf(buf, "Unaligned access in kernel mode---don't do this!");
+ break;
+
+ case 32: /* fp fault */
+ case 33: /* fp trap */
+ result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
+ if (result < 0) {
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_errno = 0;
+ siginfo.si_code = 0; /* XXX fix me */
+ siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
+ send_sig_info(SIGFPE, &siginfo, current);
+ if (result == -1)
+ send_sig_info(SIGFPE, &siginfo, current);
+ else
+ force_sig(SIGFPE, current);
+ }
+ return;
+
+ case 34: /* Unimplemented Instruction Address Trap */
+ if (user_mode(regs)) {
+ printk("Woah! Unimplemented Instruction Address Trap!\n");
+ siginfo.si_code = ILL_BADIADDR;
+ siginfo.si_signo = SIGILL;
+ siginfo.si_errno = 0;
+ force_sig_info(SIGILL, &siginfo, current);
+ return;
+ }
+ sprintf(buf, "Unimplemented Instruction Address fault");
+ break;
+
+ case 45:
+ printk("Unexpected IA-32 exception\n");
+ force_sig(SIGSEGV, current);
+ return;
+
+ case 46:
+ printk("Unexpected IA-32 intercept trap\n");
+ force_sig(SIGSEGV, current);
+ return;
+
+ case 47:
+ sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
+ break;
+
+ default:
+ sprintf(buf, "Fault %lu", vector);
+ break;
+ }
+ die_if_kernel(buf, regs, error);
+ force_sig(SIGILL, current);
+}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
new file mode 100644
index 000000000..0bd213f6b
--- /dev/null
+++ b/arch/ia64/kernel/unaligned.c
@@ -0,0 +1,1554 @@
+/*
+ * Architecture-specific unaligned trap handling.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+#include <asm/rse.h>
+#include <asm/processor.h>
+#include <asm/unaligned.h>
+
+extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
+
+#undef DEBUG_UNALIGNED_TRAP
+
+#ifdef DEBUG_UNALIGNED_TRAP
+#define DPRINT(a) { printk("%s, line %d: ", __FUNCTION__, __LINE__); printk a;}
+#else
+#define DPRINT(a)
+#endif
+
+#define IA64_FIRST_STACKED_GR 32
+#define IA64_FIRST_ROTATING_FR 32
+#define SIGN_EXT9 __IA64_UL(0xffffffffffffff00)
+
+/*
+ * For M-unit:
+ *
+ * opcode | m | x6 |
+ * --------|------|---------|
+ * [40-37] | [36] | [35:30] |
+ * --------|------|---------|
+ * 4 | 1 | 6 | = 11 bits
+ * --------------------------
+ * However bits [31:30] are not directly useful to distinguish between
+ * load/store so we can use [35:32] instead, which gives the following
+ * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
+ * checking the m-bit until later in the load/store emulation.
+ */
+#define IA64_OPCODE_MASK 0x1ef00000000
+
+/*
+ * Table C-28 Integer Load/Store
+ *
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
+ * the address (bits [8:3]), so we must failed.
+ */
+#define LD_OP 0x08000000000
+#define LDS_OP 0x08100000000
+#define LDA_OP 0x08200000000
+#define LDSA_OP 0x08300000000
+#define LDBIAS_OP 0x08400000000
+#define LDACQ_OP 0x08500000000
+/* 0x086, 0x087 are not relevant */
+#define LDCCLR_OP 0x08800000000
+#define LDCNC_OP 0x08900000000
+#define LDCCLRACQ_OP 0x08a00000000
+#define ST_OP 0x08c00000000
+#define STREL_OP 0x08d00000000
+/* 0x08e,0x8f are not relevant */
+
+/*
+ * Table C-29 Integer Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-30 Integer Load/Store +Imm
+ *
+ * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
+ *
+ * ld8.fill, st8.fill must be aligned because the Nat register are based on
+ * the address, so we must fail and the program must be fixed.
+ */
+#define LD_IMM_OP 0x0a000000000
+#define LDS_IMM_OP 0x0a100000000
+#define LDA_IMM_OP 0x0a200000000
+#define LDSA_IMM_OP 0x0a300000000
+#define LDBIAS_IMM_OP 0x0a400000000
+#define LDACQ_IMM_OP 0x0a500000000
+/* 0x0a6, 0xa7 are not relevant */
+#define LDCCLR_IMM_OP 0x0a800000000
+#define LDCNC_IMM_OP 0x0a900000000
+#define LDCCLRACQ_IMM_OP 0x0aa00000000
+#define ST_IMM_OP 0x0ac00000000
+#define STREL_IMM_OP 0x0ad00000000
+/* 0x0ae,0xaf are not relevant */
+
+/*
+ * Table C-32 Floating-point Load/Store
+ */
+#define LDF_OP 0x0c000000000
+#define LDFS_OP 0x0c100000000
+#define LDFA_OP 0x0c200000000
+#define LDFSA_OP 0x0c300000000
+/* 0x0c6 is irrelevant */
+#define LDFCCLR_OP 0x0c800000000
+#define LDFCNC_OP 0x0c900000000
+/* 0x0cb is irrelevant */
+#define STF_OP 0x0cc00000000
+
+/*
+ * Table C-33 Floating-point Load +Reg
+ *
+ * we use the ld->m (bit [36:36]) field to determine whether or not we have
+ * a load/store of this form.
+ */
+
+/*
+ * Table C-34 Floating-point Load/Store +Imm
+ */
+#define LDF_IMM_OP 0x0e000000000
+#define LDFS_IMM_OP 0x0e100000000
+#define LDFA_IMM_OP 0x0e200000000
+#define LDFSA_IMM_OP 0x0e300000000
+/* 0x0e6 is irrelevant */
+#define LDFCCLR_IMM_OP 0x0e800000000
+#define LDFCNC_IMM_OP 0x0e900000000
+#define STF_IMM_OP 0x0ec00000000
+
+typedef struct {
+ unsigned long qp:6; /* [0:5] */
+ unsigned long r1:7; /* [6:12] */
+ unsigned long imm:7; /* [13:19] */
+ unsigned long r3:7; /* [20:26] */
+ unsigned long x:1; /* [27:27] */
+ unsigned long hint:2; /* [28:29] */
+ unsigned long x6_sz:2; /* [30:31] */
+ unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
+ unsigned long m:1; /* [36:36] */
+ unsigned long op:4; /* [37:40] */
+ unsigned long pad:23; /* [41:63] */
+} load_store_t;
+
+
+typedef enum {
+ UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
+ UPD_REG /* ldXZ r1=[r3],r2 */
+} update_t;
+
+/*
+ * We use tables to keep track of the offsets of registers in the saved state.
+ * This way we save having big switch/case statements.
+ *
+ * We use bit 0 to indicate switch_stack or pt_regs.
+ * The offset is simply shifted by 1 bit.
+ * A 2-byte value should be enough to hold any kind of offset
+ *
+ * In case the calling convention changes (and thus pt_regs/switch_stack)
+ * simply use RSW instead of RPT or vice-versa.
+ */
+
+#define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
+#define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
+
+#define RPT(x) (RPO(x) << 1)
+#define RSW(x) (1| RSO(x)<<1)
+
+#define GR_OFFS(x) (gr_info[x]>>1)
+#define GR_IN_SW(x) (gr_info[x] & 0x1)
+
+#define FR_OFFS(x) (fr_info[x]>>1)
+#define FR_IN_SW(x) (fr_info[x] & 0x1)
+
+static u16 gr_info[32]={
+ 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
+
+ RPT(r1), RPT(r2), RPT(r3),
+
+ RSW(r4), RSW(r5), RSW(r6), RSW(r7),
+
+ RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+ RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+
+ RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+ RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+ RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+ RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+static u16 fr_info[32]={
+ 0, /* constant : WE SHOULD NEVER GET THIS */
+ 0, /* constant : WE SHOULD NEVER GET THIS */
+
+ RSW(f2), RSW(f3), RSW(f4), RSW(f5),
+
+ RPT(f6), RPT(f7), RPT(f8), RPT(f9),
+
+ RSW(f10), RSW(f11), RSW(f12), RSW(f13), RSW(f14),
+ RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
+ RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
+ RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
+ RSW(f30), RSW(f31)
+};
+
+/* Invalidate ALAT entry for integer register REGNO. */
+static void
+invala_gr (int regno)
+{
+# define F(reg) case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break
+
+ switch (regno) {
+ F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
+ F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+ F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+ F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+ F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+ F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+ F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+ F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+ F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+ F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+ F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+ F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+ F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+ F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+ F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+ F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+ }
+# undef F
+}
+
+/* Invalidate ALAT entry for floating-point register REGNO. */
+static void
+invala_fr (int regno)
+{
+# define F(reg) case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break
+
+ switch (regno) {
+ F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
+ F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
+ F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
+ F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
+ F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
+ F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
+ F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
+ F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
+ F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
+ F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
+ F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
+ F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
+ F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
+ F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
+ F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
+ F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
+ }
+# undef F
+}
+
+static void
+set_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs - 1;
+ unsigned long *kbs = ((unsigned long *)current) + IA64_RBS_OFFSET/8;
+ unsigned long on_kbs;
+ unsigned long *bsp, *bspstore, *addr, *ubs_end, *slot;
+ unsigned long rnats;
+ long nlocals;
+
+ /*
+ * cr_ifs=[rv:ifm], ifm=[....:sof(6)]
+ * nlocal=number of locals (in+loc) register of the faulting function
+ */
+ nlocals = (regs->cr_ifs) & 0x7f;
+
+ DPRINT(("sw.bsptore=%lx pt.bspstore=%lx\n", sw->ar_bspstore, regs->ar_bspstore));
+ DPRINT(("cr.ifs=%lx sof=%ld sol=%ld\n",
+ regs->cr_ifs, regs->cr_ifs &0x7f, (regs->cr_ifs>>7)&0x7f));
+
+ on_kbs = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore);
+ bspstore = (unsigned long *)regs->ar_bspstore;
+
+ DPRINT(("rse_slot_num=0x%lx\n",ia64_rse_slot_num((unsigned long *)sw->ar_bspstore)));
+ DPRINT(("kbs=%p nlocals=%ld\n", kbs, nlocals));
+ DPRINT(("bspstore next rnat slot %p\n",
+ ia64_rse_rnat_addr((unsigned long *)sw->ar_bspstore)));
+ DPRINT(("on_kbs=%ld rnats=%ld\n",
+ on_kbs, ((sw->ar_bspstore-(unsigned long)kbs)>>3) - on_kbs));
+
+ /*
+ * See get_rse_reg() for an explanation on the following instructions
+ */
+ ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+ bsp = ia64_rse_skip_regs(ubs_end, -nlocals);
+ addr = slot = ia64_rse_skip_regs(bsp, r1 - 32);
+
+ DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n",
+ ubs_end, bsp, addr, ia64_rse_slot_num(addr)));
+
+ ia64_poke(regs, current, (unsigned long)addr, val);
+
+ /*
+ * addr will now contain the address of the RNAT for the register
+ */
+ addr = ia64_rse_rnat_addr(addr);
+
+ ia64_peek(regs, current, (unsigned long)addr, &rnats);
+ DPRINT(("rnat @%p = 0x%lx nat=%d rnatval=%lx\n",
+ addr, rnats, nat, rnats &ia64_rse_slot_num(slot)));
+
+ if ( nat ) {
+ rnats |= __IA64_UL(1) << ia64_rse_slot_num(slot);
+ } else {
+ rnats &= ~(__IA64_UL(1) << ia64_rse_slot_num(slot));
+ }
+ ia64_poke(regs, current, (unsigned long)addr, rnats);
+
+ DPRINT(("rnat changed to @%p = 0x%lx\n", addr, rnats));
+}
+
+
+static void
+get_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs - 1;
+ unsigned long *kbs = (unsigned long *)current + IA64_RBS_OFFSET/8;
+ unsigned long on_kbs;
+ long nlocals;
+ unsigned long *bsp, *addr, *ubs_end, *slot, *bspstore;
+ unsigned long rnats;
+
+ /*
+ * cr_ifs=[rv:ifm], ifm=[....:sof(6)]
+ * nlocals=number of local registers in the faulting function
+ */
+ nlocals = (regs->cr_ifs) & 0x7f;
+
+ /*
+ * save_switch_stack does a flushrs and saves bspstore.
+ * on_kbs = actual number of registers saved on kernel backing store
+ * (taking into accound potential RNATs)
+ *
+ * Note that this number can be greater than nlocals if the dirty
+ * parititions included more than one stack frame at the time we
+ * switched to KBS
+ */
+ on_kbs = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore);
+ bspstore = (unsigned long *)regs->ar_bspstore;
+
+ /*
+ * To simplify the logic, we calculate everything as if there was only
+ * one backing store i.e., the user one (UBS). We let it to peek/poke
+ * to figure out whether the register we're looking for really is
+ * on the UBS or on KBS.
+ *
+ * regs->ar_bsptore = address of last register saved on UBS (before switch)
+ *
+ * ubs_end = virtual end of the UBS (if everything had been spilled there)
+ *
+ * We know that ubs_end is the point where the last register on the
+ * stack frame we're interested in as been saved. So we need to walk
+ * our way backward to figure out what the BSP "was" for that frame,
+ * this will give us the location of r32.
+ *
+ * bsp = "virtual UBS" address of r32 for our frame
+ *
+ * Finally, get compute the address of the register we're looking for
+ * using bsp as our base (move up again).
+ *
+ * Please note that in our case, we know that the register is necessarily
+ * on the KBS because we are only interested in the current frame at the moment
+ * we got the exception i.e., bsp is not changed until we switch to KBS.
+ */
+ ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
+ bsp = ia64_rse_skip_regs(ubs_end, -nlocals);
+ addr = slot = ia64_rse_skip_regs(bsp, r1 - 32);
+
+ DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n",
+ ubs_end, bsp, addr, ia64_rse_slot_num(addr)));
+
+ ia64_peek(regs, current, (unsigned long)addr, val);
+
+ /*
+ * addr will now contain the address of the RNAT for the register
+ */
+ addr = ia64_rse_rnat_addr(addr);
+
+ ia64_peek(regs, current, (unsigned long)addr, &rnats);
+ DPRINT(("rnat @%p = 0x%lx\n", addr, rnats));
+
+ if ( nat ) *nat = rnats >> ia64_rse_slot_num(slot) & 0x1;
+}
+
+
+static void
+setreg(unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs -1;
+ unsigned long addr;
+ unsigned long bitmask;
+ unsigned long *unat;
+
+
+ /*
+ * First takes care of stacked registers
+ */
+ if ( regnum >= IA64_FIRST_STACKED_GR ) {
+ set_rse_reg(regs, regnum, val, nat);
+ return;
+ }
+
+ /*
+ * Using r0 as a target raises a General Exception fault which has
+ * higher priority than the Unaligned Reference fault.
+ */
+
+ /*
+ * Now look at registers in [0-31] range and init correct UNAT
+ */
+ if ( GR_IN_SW(regnum) ) {
+ addr = (unsigned long)sw;
+ unat = &sw->ar_unat;
+ } else {
+ addr = (unsigned long)regs;
+ unat = &sw->caller_unat;
+ }
+ DPRINT(("tmp_base=%lx switch_stack=%s offset=%d\n",
+ addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)));
+ /*
+ * add offset from base of struct
+ * and do it !
+ */
+ addr += GR_OFFS(regnum);
+
+ *(unsigned long *)addr = val;
+
+ /*
+ * We need to clear the corresponding UNAT bit to fully emulate the load
+ * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+ */
+ bitmask = __IA64_UL(1) << (addr >> 3 & 0x3f);
+ DPRINT(("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, unat, *unat));
+ if ( nat ) {
+ *unat |= bitmask;
+ } else {
+ *unat &= ~bitmask;
+ }
+ DPRINT(("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, unat,*unat));
+}
+
+#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
+
+static void
+setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs - 1;
+ unsigned long addr;
+
+ /*
+ * From EAS-2.5: FPDisableFault has higher priority than
+ * Unaligned Fault. Thus, when we get here, we know the partition is
+ * enabled.
+ *
+ * The registers [32-127] are ususally saved in the tss. When get here,
+ * they are NECESSARY live because they are only saved explicitely.
+ * We have 3 ways of updating the values: force a save of the range
+ * in tss, use a gigantic switch/case statement or generate code on the
+ * fly to store to the right register.
+ * For now, we are using the (slow) save/restore way.
+ */
+ if ( regnum >= IA64_FIRST_ROTATING_FR ) {
+ /*
+ * force a save of [32-127] to tss
+ * we use the __() form to avoid fiddling with the dfh bit
+ */
+ __ia64_save_fpu(&current->thread.fph[0]);
+
+ current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
+
+ __ia64_load_fpu(&current->thread.fph[0]);
+
+ /*
+ * mark the high partition as being used now
+ *
+ * This is REQUIRED because the disabled_fph_fault() does
+ * not set it, it's relying on the faulting instruction to
+ * do it. In our case the faulty instruction never gets executed
+ * completely, so we need to toggle the bit.
+ */
+ regs->cr_ipsr |= IA64_PSR_MFH;
+ } else {
+ /*
+ * pt_regs or switch_stack ?
+ */
+ if ( FR_IN_SW(regnum) ) {
+ addr = (unsigned long)sw;
+ } else {
+ addr = (unsigned long)regs;
+ }
+
+ DPRINT(("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)));
+
+ addr += FR_OFFS(regnum);
+ *(struct ia64_fpreg *)addr = *fpval;
+
+ /*
+ * mark the low partition as being used now
+ *
+ * It is highly unlikely that this bit is not already set, but
+ * let's do it for safety.
+ */
+ regs->cr_ipsr |= IA64_PSR_MFL;
+
+ }
+}
+
+/*
+ * Those 2 inline functions generate the spilled versions of the constant floating point
+ * registers which can be used with stfX
+ */
+static inline void
+float_spill_f0(struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");
+}
+
+static inline void
+float_spill_f1(struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");
+}
+
+static void
+getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs -1;
+ unsigned long addr;
+
+ /*
+ * From EAS-2.5: FPDisableFault has higher priority than
+ * Unaligned Fault. Thus, when we get here, we know the partition is
+ * enabled.
+ *
+ * When regnum > 31, the register is still live and
+ * we need to force a save to the tss to get access to it.
+ * See discussion in setfpreg() for reasons and other ways of doing this.
+ */
+ if ( regnum >= IA64_FIRST_ROTATING_FR ) {
+
+ /*
+ * force a save of [32-127] to tss
+ * we use the__ia64_save_fpu() form to avoid fiddling with
+ * the dfh bit.
+ */
+ __ia64_save_fpu(&current->thread.fph[0]);
+
+ *fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];
+ } else {
+ /*
+ * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
+ * not saved, we must generate their spilled form on the fly
+ */
+ switch(regnum) {
+ case 0:
+ float_spill_f0(fpval);
+ break;
+ case 1:
+ float_spill_f1(fpval);
+ break;
+ default:
+ /*
+ * pt_regs or switch_stack ?
+ */
+ addr = FR_IN_SW(regnum) ? (unsigned long)sw
+ : (unsigned long)regs;
+
+ DPRINT(("is_sw=%d tmp_base=%lx offset=0x%x\n",
+ FR_IN_SW(regnum), addr, FR_OFFS(regnum)));
+
+ addr += FR_OFFS(regnum);
+ *fpval = *(struct ia64_fpreg *)addr;
+ }
+ }
+}
+
+
+static void
+getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *)regs -1;
+ unsigned long addr, *unat;
+
+ if ( regnum >= IA64_FIRST_STACKED_GR ) {
+ get_rse_reg(regs, regnum, val, nat);
+ return;
+ }
+
+ /*
+ * take care of r0 (read-only always evaluate to 0)
+ */
+ if ( regnum == 0 ) {
+ *val = 0;
+ *nat = 0;
+ return;
+ }
+
+ /*
+ * Now look at registers in [0-31] range and init correct UNAT
+ */
+ if ( GR_IN_SW(regnum) ) {
+ addr = (unsigned long)sw;
+ unat = &sw->ar_unat;
+ } else {
+ addr = (unsigned long)regs;
+ unat = &sw->caller_unat;
+ }
+
+ DPRINT(("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)));
+
+ addr += GR_OFFS(regnum);
+
+ *val = *(unsigned long *)addr;
+
+ /*
+ * do it only when requested
+ */
+ if ( nat ) *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
+}
+
+static void
+emulate_load_updates(update_t type, load_store_t *ld, struct pt_regs *regs, unsigned long ifa)
+{
+ /*
+ * IMPORTANT:
+ * Given the way we handle unaligned speculative loads, we should
+ * not get to this point in the code but we keep this sanity check,
+ * just in case.
+ */
+ if ( ld->x6_op == 1 || ld->x6_op == 3 ) {
+ printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n");
+ die_if_kernel("unaligned reference on specualtive load with register update\n",
+ regs, 30);
+ }
+
+
+ /*
+ * at this point, we know that the base register to update is valid i.e.,
+ * it's not r0
+ */
+ if ( type == UPD_IMMEDIATE ) {
+ unsigned long imm;
+
+ /*
+ * Load +Imm: ldXZ r1=[r3],imm(9)
+ *
+ *
+ * form imm9: [13:19] contain the first 7 bits
+ */
+ imm = ld->x << 7 | ld->imm;
+
+ /*
+ * sign extend (1+8bits) if m set
+ */
+ if (ld->m) imm |= SIGN_EXT9;
+
+ /*
+ * ifa == r3 and we know that the NaT bit on r3 was clear so
+ * we can directly use ifa.
+ */
+ ifa += imm;
+
+ setreg(ld->r3, ifa, 0, regs);
+
+ DPRINT(("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld->x, ld->m, imm, ifa));
+
+ } else if ( ld->m ) {
+ unsigned long r2;
+ int nat_r2;
+
+ /*
+ * Load +Reg Opcode: ldXZ r1=[r3],r2
+ *
+ * Note: that we update r3 even in the case of ldfX.a
+ * (where the load does not happen)
+ *
+ * The way the load algorithm works, we know that r3 does not
+ * have its NaT bit set (would have gotten NaT consumption
+ * before getting the unaligned fault). So we can use ifa
+ * which equals r3 at this point.
+ *
+ * IMPORTANT:
+ * The above statement holds ONLY because we know that we
+ * never reach this code when trying to do a ldX.s.
+ * If we ever make it to here on an ldfX.s then
+ */
+ getreg(ld->imm, &r2, &nat_r2, regs);
+
+ ifa += r2;
+
+ /*
+ * propagate Nat r2 -> r3
+ */
+ setreg(ld->r3, ifa, nat_r2, regs);
+
+ DPRINT(("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld->imm, r2, ifa, nat_r2));
+ }
+}
+
+
+static int
+emulate_load_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ unsigned long val;
+ unsigned int len = 1<< ld->x6_sz;
+
+ /*
+ * the macro supposes sequential access (which is the case)
+ * if the first byte is an invalid address we return here. Otherwise
+ * there is a guard page at the top of the user's address page and
+ * the first access would generate a NaT consumption fault and return
+ * with a SIGSEGV, which is what we want.
+ *
+ * Note: the first argument is ignored
+ */
+ if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) {
+ DPRINT(("verify area failed on %lx\n", ifa));
+ return -1;
+ }
+
+ /*
+ * r0, as target, doesn't need to be checked because Illegal Instruction
+ * faults have higher priority than unaligned faults.
+ *
+ * r0 cannot be found as the base as it would never generate an
+ * unaligned reference.
+ */
+
+ /*
+ * ldX.a we don't try to emulate anything but we must
+ * invalidate the ALAT entry.
+ * See comment below for explanation on how we handle ldX.a
+ */
+ if ( ld->x6_op != 0x2 ) {
+ /*
+ * we rely on the macros in unaligned.h for now i.e.,
+ * we let the compiler figure out how to read memory gracefully.
+ *
+ * We need this switch/case because the way the inline function
+ * works. The code is optimized by the compiler and looks like
+ * a single switch/case.
+ */
+ switch(len) {
+ case 2:
+ val = ia64_get_unaligned((void *)ifa, 2);
+ break;
+ case 4:
+ val = ia64_get_unaligned((void *)ifa, 4);
+ break;
+ case 8:
+ val = ia64_get_unaligned((void *)ifa, 8);
+ break;
+ default:
+ DPRINT(("unknown size: x6=%d\n", ld->x6_sz));
+ return -1;
+ }
+
+ setreg(ld->r1, val, 0, regs);
+ }
+
+ /*
+ * check for updates on any kind of loads
+ */
+ if ( ld->op == 0x5 || ld->m )
+ emulate_load_updates(ld->op == 0x5 ? UPD_IMMEDIATE: UPD_REG,
+ ld, regs, ifa);
+
+ /*
+ * handling of various loads (based on EAS2.4):
+ *
+ * ldX.acq (ordered load):
+ * - acquire semantics would have been used, so force fence instead.
+ *
+ *
+ * ldX.c.clr (check load and clear):
+ * - if we get to this handler, it's because the entry was not in the ALAT.
+ * Therefore the operation reverts to a normal load
+ *
+ * ldX.c.nc (check load no clear):
+ * - same as previous one
+ *
+ * ldX.c.clr.acq (ordered check load and clear):
+ * - same as above for c.clr part. The load needs to have acquire semantics. So
+ * we use the fence semantics which is stronger and thus ensures correctness.
+ *
+ * ldX.a (advanced load):
+ * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
+ * address doesn't match requested size alignement. This means that we would
+ * possibly need more than one load to get the result.
+ *
+ * The load part can be handled just like a normal load, however the difficult
+ * part is to get the right thing into the ALAT. The critical piece of information
+ * in the base address of the load & size. To do that, a ld.a must be executed,
+ * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
+ * if we use the same target register, we will be okay for the check.a instruction.
+ * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
+ * which would overlap within [r3,r3+X] (the size of the load was store in the
+ * ALAT). If such an entry is found the entry is invalidated. But this is not good
+ * enough, take the following example:
+ * r3=3
+ * ld4.a r1=[r3]
+ *
+ * Could be emulated by doing:
+ * ld1.a r1=[r3],1
+ * store to temporary;
+ * ld1.a r1=[r3],1
+ * store & shift to temporary;
+ * ld1.a r1=[r3],1
+ * store & shift to temporary;
+ * ld1.a r1=[r3]
+ * store & shift to temporary;
+ * r1=temporary
+ *
+ * So int this case, you would get the right value is r1 but the wrong info in
+ * the ALAT. Notice that you could do it in reverse to finish with address 3
+ * but you would still get the size wrong. To get the size right, one needs to
+ * execute exactly the same kind of load. You could do it from a aligned
+ * temporary location, but you would get the address wrong.
+ *
+ * So no matter what, it is not possible to emulate an advanced load
+ * correctly. But is that really critical ?
+ *
+ *
+ * Now one has to look at how ld.a is used, one must either do a ld.c.* or
+ * chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
+ * entry found in ALAT), and that's perfectly ok because:
+ *
+ * - ld.c.*, if the entry is not present a normal load is executed
+ * - chk.a.*, if the entry is not present, execution jumps to recovery code
+ *
+ * In either case, the load can be potentially retried in another form.
+ *
+ * So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT
+ * must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up
+ * a stale entry later) The register base update MUST also be performed.
+ *
+ * Now what is the content of the register and its NaT bit in the case we don't
+ * do the load ? EAS2.4, says (in case an actual load is needed)
+ *
+ * - r1 = [r3], Nat = 0 if succeeds
+ * - r1 = 0 Nat = 0 if trying to access non-speculative memory
+ *
+ * For us, there is nothing to do, because both ld.c.* and chk.a.* are going to
+ * retry and thus eventually reload the register thereby changing Nat and
+ * register content.
+ */
+
+ /*
+ * when the load has the .acq completer then
+ * use ordering fence.
+ */
+ if (ld->x6_op == 0x5 || ld->x6_op == 0xa)
+ mb();
+
+ /*
+ * invalidate ALAT entry in case of advanced load
+ */
+ if (ld->x6_op == 0x2)
+ invala_gr(ld->r1);
+
+ return 0;
+}
+
+static int
+emulate_store_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ unsigned long r2;
+ unsigned int len = 1<< ld->x6_sz;
+
+ /*
+ * the macro supposes sequential access (which is the case)
+ * if the first byte is an invalid address we return here. Otherwise
+ * there is a guard page at the top of the user's address page and
+ * the first access would generate a NaT consumption fault and return
+ * with a SIGSEGV, which is what we want.
+ *
+ * Note: the first argument is ignored
+ */
+ if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) {
+ DPRINT(("verify area failed on %lx\n",ifa));
+ return -1;
+ }
+
+ /*
+ * if we get to this handler, Nat bits on both r3 and r2 have already
+ * been checked. so we don't need to do it
+ *
+ * extract the value to be stored
+ */
+ getreg(ld->imm, &r2, 0, regs);
+
+ /*
+ * we rely on the macros in unaligned.h for now i.e.,
+ * we let the compiler figure out how to read memory gracefully.
+ *
+ * We need this switch/case because the way the inline function
+ * works. The code is optimized by the compiler and looks like
+ * a single switch/case.
+ */
+ DPRINT(("st%d [%lx]=%lx\n", len, ifa, r2));
+
+ switch(len) {
+ case 2:
+ ia64_put_unaligned(r2, (void *)ifa, 2);
+ break;
+ case 4:
+ ia64_put_unaligned(r2, (void *)ifa, 4);
+ break;
+ case 8:
+ ia64_put_unaligned(r2, (void *)ifa, 8);
+ break;
+ default:
+ DPRINT(("unknown size: x6=%d\n", ld->x6_sz));
+ return -1;
+ }
+ /*
+ * stX [r3]=r2,imm(9)
+ *
+ * NOTE:
+ * ld->r3 can never be r0, because r0 would not generate an
+ * unaligned access.
+ */
+ if ( ld->op == 0x5 ) {
+ unsigned long imm;
+
+ /*
+ * form imm9: [12:6] contain first 7bits
+ */
+ imm = ld->x << 7 | ld->r1;
+ /*
+ * sign extend (8bits) if m set
+ */
+ if ( ld->m ) imm |= SIGN_EXT9;
+ /*
+ * ifa == r3 (NaT is necessarily cleared)
+ */
+ ifa += imm;
+
+ DPRINT(("imm=%lx r3=%lx\n", imm, ifa));
+
+ setreg(ld->r3, ifa, 0, regs);
+ }
+ /*
+ * we don't have alat_invalidate_multiple() so we need
+ * to do the complete flush :-<<
+ */
+ ia64_invala();
+
+ /*
+ * stX.rel: use fence instead of release
+ */
+ if ( ld->x6_op == 0xd ) mb();
+
+ return 0;
+}
+
+/*
+ * floating point operations sizes in bytes
+ */
+static const unsigned short float_fsz[4]={
+ 16, /* extended precision (e) */
+ 8, /* integer (8) */
+ 4, /* single precision (s) */
+ 8 /* double precision (d) */
+};
+
+static inline void
+mem2float_extended(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+mem2float_integer(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+mem2float_single(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+mem2float_double(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+float2mem_extended(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+float2mem_integer(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+float2mem_single(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static inline void
+float2mem_double(struct ia64_fpreg *init, struct ia64_fpreg *final)
+{
+ __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"
+ :: "r"(init), "r"(final) : "f6","memory");
+}
+
+static int
+emulate_load_floatpair(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ struct ia64_fpreg fpr_init[2];
+ struct ia64_fpreg fpr_final[2];
+ unsigned long len = float_fsz[ld->x6_sz];
+
+ if ( access_ok(VERIFY_READ, (void *)ifa, len<<1) < 0 ) {
+ DPRINT(("verify area failed on %lx\n", ifa));
+ return -1;
+ }
+ /*
+ * fr0 & fr1 don't need to be checked because Illegal Instruction
+ * faults have higher priority than unaligned faults.
+ *
+ * r0 cannot be found as the base as it would never generate an
+ * unaligned reference.
+ */
+
+ /*
+ * make sure we get clean buffers
+ */
+ memset(&fpr_init,0, sizeof(fpr_init));
+ memset(&fpr_final,0, sizeof(fpr_final));
+
+ /*
+ * ldfpX.a: we don't try to emulate anything but we must
+ * invalidate the ALAT entry and execute updates, if any.
+ */
+ if ( ld->x6_op != 0x2 ) {
+ /*
+ * does the unaligned access
+ */
+ memcpy(&fpr_init[0], (void *)ifa, len);
+ memcpy(&fpr_init[1], (void *)(ifa+len), len);
+
+ DPRINT(("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld->r1, ld->imm, ld->x6_sz));
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_init;
+ printk("fpr_init= ");
+ for(i=0; i < len<<1; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+ /*
+ * XXX fixme
+ * Could optimize inlines by using ldfpX & 2 spills
+ */
+ switch( ld->x6_sz ) {
+ case 0:
+ mem2float_extended(&fpr_init[0], &fpr_final[0]);
+ mem2float_extended(&fpr_init[1], &fpr_final[1]);
+ break;
+ case 1:
+ mem2float_integer(&fpr_init[0], &fpr_final[0]);
+ mem2float_integer(&fpr_init[1], &fpr_final[1]);
+ break;
+ case 2:
+ mem2float_single(&fpr_init[0], &fpr_final[0]);
+ mem2float_single(&fpr_init[1], &fpr_final[1]);
+ break;
+ case 3:
+ mem2float_double(&fpr_init[0], &fpr_final[0]);
+ mem2float_double(&fpr_init[1], &fpr_final[1]);
+ break;
+ }
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_final;
+ printk("fpr_final= ");
+ for(i=0; i < len<<1; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+ /*
+ * XXX fixme
+ *
+ * A possible optimization would be to drop fpr_final
+ * and directly use the storage from the saved context i.e.,
+ * the actual final destination (pt_regs, switch_stack or tss).
+ */
+ setfpreg(ld->r1, &fpr_final[0], regs);
+ setfpreg(ld->imm, &fpr_final[1], regs);
+ }
+
+ /*
+ * Check for updates: only immediate updates are available for this
+ * instruction.
+ */
+ if ( ld->m ) {
+
+ /*
+ * the immediate is implicit given the ldsz of the operation:
+ * single: 8 (2x4) and for all others it's 16 (2x8)
+ */
+ ifa += len<<1;
+
+ /*
+ * IMPORTANT:
+ * the fact that we force the NaT of r3 to zero is ONLY valid
+ * as long as we don't come here with a ldfpX.s.
+ * For this reason we keep this sanity check
+ */
+ if ( ld->x6_op == 1 || ld->x6_op == 3 ) {
+ printk(KERN_ERR "%s: register update on speculative load pair, error\n", __FUNCTION__);
+ }
+
+
+ setreg(ld->r3, ifa, 0, regs);
+ }
+
+ /*
+ * Invalidate ALAT entries, if any, for both registers.
+ */
+ if ( ld->x6_op == 0x2 ) {
+ invala_fr(ld->r1);
+ invala_fr(ld->imm);
+ }
+ return 0;
+}
+
+
+static int
+emulate_load_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ struct ia64_fpreg fpr_init;
+ struct ia64_fpreg fpr_final;
+ unsigned long len = float_fsz[ld->x6_sz];
+
+ /*
+ * check for load pair because our masking scheme is not fine grain enough
+ if ( ld->x == 1 ) return emulate_load_floatpair(ifa,ld,regs);
+ */
+
+ if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) {
+ DPRINT(("verify area failed on %lx\n", ifa));
+ return -1;
+ }
+ /*
+ * fr0 & fr1 don't need to be checked because Illegal Instruction
+ * faults have higher priority than unaligned faults.
+ *
+ * r0 cannot be found as the base as it would never generate an
+ * unaligned reference.
+ */
+
+
+ /*
+ * make sure we get clean buffers
+ */
+ memset(&fpr_init,0, sizeof(fpr_init));
+ memset(&fpr_final,0, sizeof(fpr_final));
+
+ /*
+ * ldfX.a we don't try to emulate anything but we must
+ * invalidate the ALAT entry.
+ * See comments in ldX for descriptions on how the various loads are handled.
+ */
+ if ( ld->x6_op != 0x2 ) {
+
+ /*
+ * does the unaligned access
+ */
+ memcpy(&fpr_init, (void *)ifa, len);
+
+ DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz));
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_init;
+ printk("fpr_init= ");
+ for(i=0; i < len; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+ /*
+ * we only do something for x6_op={0,8,9}
+ */
+ switch( ld->x6_sz ) {
+ case 0:
+ mem2float_extended(&fpr_init, &fpr_final);
+ break;
+ case 1:
+ mem2float_integer(&fpr_init, &fpr_final);
+ break;
+ case 2:
+ mem2float_single(&fpr_init, &fpr_final);
+ break;
+ case 3:
+ mem2float_double(&fpr_init, &fpr_final);
+ break;
+ }
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_final;
+ printk("fpr_final= ");
+ for(i=0; i < len; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+ /*
+ * XXX fixme
+ *
+ * A possible optimization would be to drop fpr_final
+ * and directly use the storage from the saved context i.e.,
+ * the actual final destination (pt_regs, switch_stack or tss).
+ */
+ setfpreg(ld->r1, &fpr_final, regs);
+ }
+
+ /*
+ * check for updates on any loads
+ */
+ if ( ld->op == 0x7 || ld->m )
+ emulate_load_updates(ld->op == 0x7 ? UPD_IMMEDIATE: UPD_REG,
+ ld, regs, ifa);
+
+
+ /*
+ * invalidate ALAT entry in case of advanced floating point loads
+ */
+ if (ld->x6_op == 0x2)
+ invala_fr(ld->r1);
+
+ return 0;
+}
+
+
+static int
+emulate_store_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs)
+{
+ struct ia64_fpreg fpr_init;
+ struct ia64_fpreg fpr_final;
+ unsigned long len = float_fsz[ld->x6_sz];
+
+ /*
+ * the macro supposes sequential access (which is the case)
+ * if the first byte is an invalid address we return here. Otherwise
+ * there is a guard page at the top of the user's address page and
+ * the first access would generate a NaT consumption fault and return
+ * with a SIGSEGV, which is what we want.
+ *
+ * Note: the first argument is ignored
+ */
+ if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) {
+ DPRINT(("verify area failed on %lx\n",ifa));
+ return -1;
+ }
+
+ /*
+ * make sure we get clean buffers
+ */
+ memset(&fpr_init,0, sizeof(fpr_init));
+ memset(&fpr_final,0, sizeof(fpr_final));
+
+
+ /*
+ * if we get to this handler, Nat bits on both r3 and r2 have already
+ * been checked. so we don't need to do it
+ *
+ * extract the value to be stored
+ */
+ getfpreg(ld->imm, &fpr_init, regs);
+ /*
+ * during this step, we extract the spilled registers from the saved
+ * context i.e., we refill. Then we store (no spill) to temporary
+ * aligned location
+ */
+ switch( ld->x6_sz ) {
+ case 0:
+ float2mem_extended(&fpr_init, &fpr_final);
+ break;
+ case 1:
+ float2mem_integer(&fpr_init, &fpr_final);
+ break;
+ case 2:
+ float2mem_single(&fpr_init, &fpr_final);
+ break;
+ case 3:
+ float2mem_double(&fpr_init, &fpr_final);
+ break;
+ }
+ DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz));
+#ifdef DEBUG_UNALIGNED_TRAP
+ { int i; char *c = (char *)&fpr_init;
+ printk("fpr_init= ");
+ for(i=0; i < len; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+ { int i; char *c = (char *)&fpr_final;
+ printk("fpr_final= ");
+ for(i=0; i < len; i++ ) {
+ printk("%02x ", c[i]&0xff);
+ }
+ printk("\n");
+ }
+#endif
+
+ /*
+ * does the unaligned store
+ */
+ memcpy((void *)ifa, &fpr_final, len);
+
+ /*
+ * stfX [r3]=r2,imm(9)
+ *
+ * NOTE:
+ * ld->r3 can never be r0, because r0 would not generate an
+ * unaligned access.
+ */
+ if ( ld->op == 0x7 ) {
+ unsigned long imm;
+
+ /*
+ * form imm9: [12:6] contain first 7bits
+ */
+ imm = ld->x << 7 | ld->r1;
+ /*
+ * sign extend (8bits) if m set
+ */
+ if ( ld->m ) imm |= SIGN_EXT9;
+ /*
+ * ifa == r3 (NaT is necessarily cleared)
+ */
+ ifa += imm;
+
+ DPRINT(("imm=%lx r3=%lx\n", imm, ifa));
+
+ setreg(ld->r3, ifa, 0, regs);
+ }
+ /*
+ * we don't have alat_invalidate_multiple() so we need
+ * to do the complete flush :-<<
+ */
+ ia64_invala();
+
+ return 0;
+}
+
+void
+ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs)
+{
+ static unsigned long unalign_count;
+ static long last_time;
+
+ struct ia64_psr *ipsr = ia64_psr(regs);
+ unsigned long *bundle_addr;
+ unsigned long opcode;
+ unsigned long op;
+ load_store_t *insn;
+ int ret = -1;
+
+ /*
+ * We flag unaligned references while in kernel as
+ * errors: the kernel must be fixed. The switch code
+ * is in ivt.S at entry 30.
+ *
+ * So here we keep a simple sanity check.
+ */
+ if ( !user_mode(regs) ) {
+ die_if_kernel("Unaligned reference while in kernel\n", regs, 30);
+ /* NOT_REACHED */
+ }
+
+ /*
+ * Make sure we log the unaligned access, so that user/sysadmin can notice it
+ * and eventually fix the program.
+ *
+ * We don't want to do that for every access so we pace it with jiffies.
+ */
+ if ( unalign_count > 5 && jiffies - last_time > 5*HZ ) unalign_count = 0;
+ if ( ++unalign_count < 5 ) {
+ last_time = jiffies;
+ printk("%s(%d): unaligned trap accessing %016lx (ip=%016lx)\n",
+ current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri);
+
+ }
+
+ DPRINT(("iip=%lx ifa=%lx isr=%lx\n", regs->cr_iip, ifa, regs->cr_ipsr));
+ DPRINT(("ISR.ei=%d ISR.sp=%d\n", ipsr->ri, ipsr->it));
+
+ bundle_addr = (unsigned long *)(regs->cr_iip);
+
+ /*
+ * extract the instruction from the bundle given the slot number
+ */
+ switch ( ipsr->ri ) {
+ case 0: op = *bundle_addr >> 5;
+ break;
+
+ case 1: op = *bundle_addr >> 46 | (*(bundle_addr+1) & 0x7fffff)<<18;
+ break;
+
+ case 2: op = *(bundle_addr+1) >> 23;
+ break;
+ }
+
+ insn = (load_store_t *)&op;
+ opcode = op & IA64_OPCODE_MASK;
+
+ DPRINT(("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
+ "ld.x6=0x%x ld.m=%d ld.op=%d\n",
+ opcode,
+ insn->qp,
+ insn->r1,
+ insn->imm,
+ insn->r3,
+ insn->x,
+ insn->hint,
+ insn->x6_sz,
+ insn->m,
+ insn->op));
+
+ /*
+ * IMPORTANT:
+ * Notice that the swictch statement DOES not cover all possible instructions
+ * that DO generate unaligned references. This is made on purpose because for some
+ * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
+ * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
+ * the program will get a signal and die:
+ *
+ * load/store:
+ * - ldX.spill
+ * - stX.spill
+ * Reason: RNATs are based on addresses
+ *
+ * synchronization:
+ * - cmpxchg
+ * - fetchadd
+ * - xchg
+ * Reason: ATOMIC operations cannot be emulated properly using multiple
+ * instructions.
+ *
+ * speculative loads:
+ * - ldX.sZ
+ * Reason: side effects, code must be ready to deal with failure so simpler
+ * to let the load fail.
+ * ---------------------------------------------------------------------------------
+ * XXX fixme
+ *
+ * I would like to get rid of this switch case and do something
+ * more elegant.
+ */
+ switch(opcode) {
+ case LDS_OP:
+ case LDSA_OP:
+ case LDS_IMM_OP:
+ case LDSA_IMM_OP:
+ case LDFS_OP:
+ case LDFSA_OP:
+ case LDFS_IMM_OP:
+ /*
+ * The instruction will be retried with defered exceptions
+ * turned on, and we should get Nat bit installed
+ *
+ * IMPORTANT:
+ * When PSR_ED is set, the register & immediate update
+ * forms are actually executed even though the operation
+ * failed. So we don't need to take care of this.
+ */
+ DPRINT(("forcing PSR_ED\n"));
+ regs->cr_ipsr |= IA64_PSR_ED;
+ return;
+
+ case LD_OP:
+ case LDA_OP:
+ case LDBIAS_OP:
+ case LDACQ_OP:
+ case LDCCLR_OP:
+ case LDCNC_OP:
+ case LDCCLRACQ_OP:
+ case LD_IMM_OP:
+ case LDA_IMM_OP:
+ case LDBIAS_IMM_OP:
+ case LDACQ_IMM_OP:
+ case LDCCLR_IMM_OP:
+ case LDCNC_IMM_OP:
+ case LDCCLRACQ_IMM_OP:
+ ret = emulate_load_int(ifa, insn, regs);
+ break;
+ case ST_OP:
+ case STREL_OP:
+ case ST_IMM_OP:
+ case STREL_IMM_OP:
+ ret = emulate_store_int(ifa, insn, regs);
+ break;
+ case LDF_OP:
+ case LDFA_OP:
+ case LDFCCLR_OP:
+ case LDFCNC_OP:
+ case LDF_IMM_OP:
+ case LDFA_IMM_OP:
+ case LDFCCLR_IMM_OP:
+ case LDFCNC_IMM_OP:
+ ret = insn->x ?
+ emulate_load_floatpair(ifa, insn, regs):
+ emulate_load_float(ifa, insn, regs);
+ break;
+ case STF_OP:
+ case STF_IMM_OP:
+ ret = emulate_store_float(ifa, insn, regs);
+ }
+
+ DPRINT(("ret=%d\n", ret));
+ if ( ret ) {
+ lock_kernel();
+ force_sig(SIGSEGV, current);
+ unlock_kernel();
+ } else {
+ /*
+ * given today's architecture this case is not likely to happen
+ * because a memory access instruction (M) can never be in the
+ * last slot of a bundle. But let's keep it for now.
+ */
+ if ( ipsr->ri == 2 ) regs->cr_iip += 16;
+ ipsr->ri = ++ipsr->ri & 3;
+ }
+
+ DPRINT(("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip));
+}
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
new file mode 100644
index 000000000..c2b772e68
--- /dev/null
+++ b/arch/ia64/kernel/unwind.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/unwind.h>
+
+void
+ia64_unwind_init_from_blocked_task (struct ia64_frame_info *info, struct task_struct *t)
+{
+ struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
+ unsigned long sol, limit, top;
+
+ memset(info, 0, sizeof(*info));
+
+ sol = (sw->ar_pfs >> 7) & 0x7f; /* size of locals */
+
+ limit = (unsigned long) t + IA64_RBS_OFFSET;
+ top = sw->ar_bspstore;
+ if (top - (unsigned long) t >= IA64_STK_OFFSET)
+ top = limit;
+
+ info->regstk.limit = (unsigned long *) limit;
+ info->regstk.top = (unsigned long *) top;
+ info->bsp = ia64_rse_skip_regs(info->regstk.top, -sol);
+ info->top_rnat = sw->ar_rnat;
+ info->cfm = sw->ar_pfs;
+ info->ip = sw->b0;
+}
+
+void
+ia64_unwind_init_from_current (struct ia64_frame_info *info, struct pt_regs *regs)
+{
+ struct switch_stack *sw = (struct switch_stack *) regs - 1;
+ unsigned long sol, sof, *bsp, limit, top;
+
+ limit = (unsigned long) current + IA64_RBS_OFFSET;
+ top = sw->ar_bspstore;
+ if (top - (unsigned long) current >= IA64_STK_OFFSET)
+ top = limit;
+
+ memset(info, 0, sizeof(*info));
+
+ sol = (sw->ar_pfs >> 7) & 0x7f; /* size of frame */
+ info->regstk.limit = (unsigned long *) limit;
+ info->regstk.top = (unsigned long *) top;
+ info->top_rnat = sw->ar_rnat;
+
+ /* this gives us the bsp top level frame (kdb interrupt frame): */
+ bsp = ia64_rse_skip_regs((unsigned long *) top, -sol);
+
+ /* now skip past the interrupt frame: */
+ sof = regs->cr_ifs & 0x7f; /* size of frame */
+ info->cfm = regs->cr_ifs;
+ info->bsp = ia64_rse_skip_regs(bsp, -sof);
+ info->ip = regs->cr_iip;
+}
+
+static unsigned long
+read_reg (struct ia64_frame_info *info, int regnum, int *is_nat)
+{
+ unsigned long *addr, *rnat_addr, rnat;
+
+ addr = ia64_rse_skip_regs(info->bsp, regnum);
+ if (addr < info->regstk.limit || addr >= info->regstk.top || ((long) addr & 0x7) != 0) {
+ *is_nat = 1;
+ return 0xdeadbeefdeadbeef;
+ }
+ rnat_addr = ia64_rse_rnat_addr(addr);
+
+ if (rnat_addr >= info->regstk.top)
+ rnat = info->top_rnat;
+ else
+ rnat = *rnat_addr;
+ *is_nat = (rnat & (1UL << ia64_rse_slot_num(addr))) != 0;
+ return *addr;
+}
+
+/*
+ * On entry, info->regstk.top should point to the register backing
+ * store for r32.
+ */
+int
+ia64_unwind_to_previous_frame (struct ia64_frame_info *info)
+{
+ unsigned long sol, cfm = info->cfm;
+ int is_nat;
+
+ sol = (cfm >> 7) & 0x7f; /* size of locals */
+
+ /*
+ * In general, we would have to make use of unwind info to
+ * unwind an IA-64 stack, but for now gcc uses a special
+ * convention that makes this possible without full-fledged
+ * unwindo info. Specifically, we expect "rp" in the second
+ * last, and "ar.pfs" in the last local register, so the
+ * number of locals in a frame must be at least two. If it's
+ * less than that, we reached the end of the C call stack.
+ */
+ if (sol < 2)
+ return -1;
+
+ info->ip = read_reg(info, sol - 2, &is_nat);
+ if (is_nat)
+ return -1;
+
+ cfm = read_reg(info, sol - 1, &is_nat);
+ if (is_nat)
+ return -1;
+
+ sol = (cfm >> 7) & 0x7f;
+
+ info->cfm = cfm;
+ info->bsp = ia64_rse_skip_regs(info->bsp, -sol);
+ return 0;
+}
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
new file mode 100644
index 000000000..8a9581747
--- /dev/null
+++ b/arch/ia64/lib/Makefile
@@ -0,0 +1,42 @@
+#
+# Makefile for ia64-specific library routines..
+#
+
+.S.o:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $< -o $@
+
+OBJS = __divdi3.o __divsi3.o __udivdi3.o __udivsi3.o \
+ __moddi3.o __modsi3.o __umoddi3.o __umodsi3.o \
+ checksum.o clear_page.o csum_partial_copy.o copy_page.o \
+ copy_user.o clear_user.o memset.o strncpy_from_user.o \
+ strlen.o strlen_user.o strnlen_user.o \
+ flush.o do_csum.o
+
+lib.a: $(OBJS)
+ $(AR) rcs lib.a $(OBJS)
+
+__divdi3.o: idiv.S
+ $(CC) $(AFLAGS) -c -o $@ $<
+
+__divsi3.o: idiv.S
+ $(CC) $(AFLAGS) -c -DSINGLE -c -o $@ $<
+
+__udivdi3.o: idiv.S
+ $(CC) $(AFLAGS) -c -DUNSIGNED -c -o $@ $<
+
+__udivsi3.o: idiv.S
+ $(CC) $(AFLAGS) -c -DUNSIGNED -DSINGLE -c -o $@ $<
+
+__moddi3.o: idiv.S
+ $(CC) $(AFLAGS) -c -DMODULO -c -o $@ $<
+
+__modsi3.o: idiv.S
+ $(CC) $(AFLAGS) -c -DMODULO -DSINGLE -c -o $@ $<
+
+__umoddi3.o: idiv.S
+ $(CC) $(AFLAGS) -c -DMODULO -DUNSIGNED -c -o $@ $<
+
+__umodsi3.o: idiv.S
+ $(CC) $(AFLAGS) -c -DMODULO -DUNSIGNED -DSINGLE -c -o $@ $<
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c
new file mode 100644
index 000000000..9c4a8af75
--- /dev/null
+++ b/arch/ia64/lib/checksum.c
@@ -0,0 +1,110 @@
+/*
+ * Network checksum routines
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Most of the code coming from arch/alpha/lib/checksum.c
+ *
+ * This file contains network checksum routines that are better done
+ * in an architecture-specific manner due to speed..
+ */
+
+#include <linux/string.h>
+
+#include <asm/byteorder.h>
+
+static inline unsigned short
+from64to16(unsigned long x)
+{
+ /* add up 32-bit words for 33 bits */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up 16-bit and 17-bit words for 17+c bits */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up 16-bit and 2-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented.
+ */
+unsigned short int csum_tcpudp_magic(unsigned long saddr,
+ unsigned long daddr,
+ unsigned short len,
+ unsigned short proto,
+ unsigned int sum)
+{
+ return ~from64to16(saddr + daddr + sum +
+ ((unsigned long) ntohs(len) << 16) +
+ ((unsigned long) proto << 8));
+}
+
+unsigned int csum_tcpudp_nofold(unsigned long saddr,
+ unsigned long daddr,
+ unsigned short len,
+ unsigned short proto,
+ unsigned int sum)
+{
+ unsigned long result;
+
+ result = (saddr + daddr + sum +
+ ((unsigned long) ntohs(len) << 16) +
+ ((unsigned long) proto << 8));
+
+ /* Fold down to 32-bits so we don't loose in the typedef-less
+ network stack. */
+ /* 64 to 33 */
+ result = (result & 0xffffffff) + (result >> 32);
+ /* 33 to 32 */
+ result = (result & 0xffffffff) + (result >> 32);
+ return result;
+}
+
+extern unsigned long do_csum(const unsigned char *, unsigned int, unsigned int);
+extern unsigned long do_csum_c(const unsigned char *, unsigned int, unsigned int);
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl)
+{
+ return ~do_csum(iph,ihl*4,0);
+}
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+{
+ unsigned long result = do_csum(buff, len, 0);
+
+ /* add in old sum, and carry.. */
+ result += sum;
+ /* 32+c bits -> 32 bits */
+ result = (result & 0xffffffff) + (result >> 32);
+ return result;
+}
+
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+unsigned short ip_compute_csum(unsigned char * buff, int len)
+{
+ return ~do_csum(buff,len, 0);
+}
diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S
new file mode 100644
index 000000000..314311c5c
--- /dev/null
+++ b/arch/ia64/lib/clear_page.S
@@ -0,0 +1,42 @@
+/*
+ *
+ * Optimized version of the standard clearpage() function
+ *
+ * Based on comments from ddd. Try not to overflow the write buffer.
+ *
+ * Inputs:
+ * in0: address of page
+ *
+ * Output:
+ * none
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <asm/page.h>
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 32
+ .global clear_page
+ .proc clear_page
+clear_page:
+ alloc r11=ar.pfs,1,0,0,0
+ mov r16=ar.lc // slow
+ mov r17=PAGE_SIZE/32-1 // -1 = repeat/until
+ ;;
+ adds r18=16,in0
+ mov ar.lc=r17
+ ;;
+1: stf.spill.nta [in0]=f0,32
+ stf.spill.nta [r18]=f0,32
+ br.cloop.dptk.few 1b
+ ;;
+ mov ar.lc=r16 // restore lc
+ br.ret.sptk.few rp
+
+ .endp clear_page
diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S
new file mode 100644
index 000000000..0db4a78f8
--- /dev/null
+++ b/arch/ia64/lib/clear_user.S
@@ -0,0 +1,224 @@
+/*
+ * This routine clears to zero a linear memory buffer in user space.
+ *
+ * Inputs:
+ * in0: address of buffer
+ * in1: length of buffer in bytes
+ * Outputs:
+ * r8: number of bytes that didn't get cleared due to a fault
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+//
+// arguments
+//
+#define buf r32
+#define len r33
+
+//
+// local registers
+//
+#define cnt r16
+#define buf2 r17
+#define saved_lc r18
+#define saved_pr r19
+#define saved_pfs r20
+#define tmp r21
+#define len2 r22
+#define len3 r23
+
+//
+// Theory of operations:
+// - we check whether or not the buffer is small, i.e., less than 17
+// in which case we do the byte by byte loop.
+//
+// - Otherwise we go progressively from 1 byte store to 8byte store in
+// the head part, the body is a 16byte store loop and we finish we the
+// tail for the last 15 bytes.
+// The good point about this breakdown is that the long buffer handling
+// contains only 2 branches.
+//
+// The reason for not using shifting & masking for both the head and the
+// tail is to stay semantically correct. This routine is not supposed
+// to write bytes outside of the buffer. While most of the time this would
+// be ok, we can't tolerate a mistake. A classical example is the case
+// of multithreaded code were to the extra bytes touched is actually owned
+// by another thread which runs concurrently to ours. Another, less likely,
+// example is with device drivers where reading an I/O mapped location may
+// have side effects (same thing for writing).
+//
+
+// The label comes first because our store instruction contains a comma
+// and confuse the preprocessor otherwise
+//
+#define EX(y,x...) \
+ .section __ex_table,"a"; \
+ data4 @gprel(99f); \
+ data4 y-99f; \
+ .previous; \
+99: x
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 32
+ .global __do_clear_user
+ .proc __do_clear_user
+
+__do_clear_user:
+ alloc saved_pfs=ar.pfs,2,0,0,0
+ cmp.eq p6,p0=r0,len // check for zero length
+ mov saved_lc=ar.lc // preserve ar.lc (slow)
+ ;; // avoid WAW on CFM
+ adds tmp=-1,len // br.ctop is repeat/until
+ mov ret0=len // return value is length at this point
+(p6) br.ret.spnt.few rp
+ ;;
+ cmp.lt p6,p0=16,len // if len > 16 then long memset
+ mov ar.lc=tmp // initialize lc for small count
+(p6) br.cond.dptk.few long_do_clear
+ ;; // WAR on ar.lc
+ //
+ // worst case 16 cyles, avg 8 cycles
+ //
+ // We could have played with the predicates to use the extra
+ // M slot for 2 stores/iteration but the cost the initialization
+ // the various counters compared to how long the loop is supposed
+ // to last on average does not make this solution viable.
+ //
+1:
+ EX( .Lexit1, st1 [buf]=r0,1 )
+ adds len=-1,len // countdown length using len
+ br.cloop.dptk.few 1b
+ ;; // avoid RAW on ar.lc
+ //
+ // .Lexit4: comes from byte by byte loop
+ // len contains bytes left
+.Lexit1:
+ mov ret0=len // faster than using ar.lc
+ mov ar.lc=saved_lc
+ br.ret.sptk.few rp // end of short clear_user
+
+
+ //
+ // At this point we know we have more than 16 bytes to copy
+ // so we focus on alignment (no branches required)
+ //
+ // The use of len/len2 for countdown of the number of bytes left
+ // instead of ret0 is due to the fact that the exception code
+ // changes the values of r8.
+ //
+long_do_clear:
+ tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear)
+ ;;
+ EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned
+(p6) adds len=-1,len;; // sync because buf is modified
+ tbit.nz p6,p0=buf,1
+ ;;
+ EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned
+(p6) adds len=-2,len;;
+ tbit.nz p6,p0=buf,2
+ ;;
+ EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned
+(p6) adds len=-4,len;;
+ tbit.nz p6,p0=buf,3
+ ;;
+ EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned
+(p6) adds len=-8,len;;
+ shr.u cnt=len,4 // number of 128-bit (2x64bit) words
+ ;;
+ cmp.eq p6,p0=r0,cnt
+ adds tmp=-1,cnt
+(p6) br.cond.dpnt.few .dotail // we have less than 16 bytes left
+ ;;
+ adds buf2=8,buf // setup second base pointer
+ mov ar.lc=tmp
+ ;;
+
+ //
+ // 16bytes/iteration core loop
+ //
+ // The second store can never generate a fault because
+ // we come into the loop only when we are 16-byte aligned.
+ // This means that if we cross a page then it will always be
+ // in the first store and never in the second.
+ //
+ //
+ // We need to keep track of the remaining length. A possible (optimistic)
+ // way would be to ue ar.lc and derive how many byte were left by
+ // doing : left= 16*ar.lc + 16. this would avoid the addition at
+ // every iteration.
+ // However we need to keep the synchronization point. A template
+ // M;;MB does not exist and thus we can keep the addition at no
+ // extra cycle cost (use a nop slot anyway). It also simplifies the
+ // (unlikely) error recovery code
+ //
+
+2:
+
+ EX(.Lexit3, st8 [buf]=r0,16 )
+ ;; // needed to get len correct when error
+ st8 [buf2]=r0,16
+ adds len=-16,len
+ br.cloop.dptk.few 2b
+ ;;
+ mov ar.lc=saved_lc
+ //
+ // tail correction based on len only
+ //
+ // We alternate the use of len3,len2 to allow parallelism and correct
+ // error handling. We also reuse p6/p7 to return correct value.
+ // The addition of len2/len3 does not cost anything more compared to
+ // the regular memset as we had empty slots.
+ //
+.dotail:
+ mov len2=len // for parallelization of error handling
+ mov len3=len
+ tbit.nz p6,p0=len,3
+ ;;
+ EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes
+(p6) adds len3=-8,len2
+ tbit.nz p7,p6=len,2
+ ;;
+ EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes
+(p7) adds len2=-4,len3
+ tbit.nz p6,p7=len,1
+ ;;
+ EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes
+(p6) adds len3=-2,len2
+ tbit.nz p7,p6=len,0
+ ;;
+ EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left
+ mov ret0=r0 // success
+ br.ret.dptk.few rp // end of most likely path
+
+ //
+ // Outlined error handling code
+ //
+
+ //
+ // .Lexit3: comes from core loop, need restore pr/lc
+ // len contains bytes left
+ //
+ //
+ // .Lexit2:
+ // if p6 -> coming from st8 or st2 : len2 contains what's left
+ // if p7 -> coming from st4 or st1 : len3 contains what's left
+ // We must restore lc/pr even though might not have been used.
+.Lexit2:
+(p6) mov len=len2
+(p7) mov len=len3
+ ;;
+ //
+ // .Lexit4: comes from head, need not restore pr/lc
+ // len contains bytes left
+ //
+.Lexit3:
+ mov ret0=len
+ mov ar.lc=saved_lc
+ br.ret.dptk.few rp
+ .endp
diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S
new file mode 100644
index 000000000..0a956e5a2
--- /dev/null
+++ b/arch/ia64/lib/copy_page.S
@@ -0,0 +1,87 @@
+/*
+ *
+ * Optimized version of the standard copy_page() function
+ *
+ * Based on comments from ddd. Try not to overflow write buffer.
+ *
+ * Inputs:
+ * in0: address of target page
+ * in1: address of source page
+ * Output:
+ * no return value
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ */
+#include <asm/page.h>
+
+#define lcount r16
+#define saved_pr r17
+#define saved_lc r18
+#define saved_pfs r19
+#define src1 r20
+#define src2 r21
+#define tgt1 r22
+#define tgt2 r23
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 32
+ .global copy_page
+ .proc copy_page
+
+copy_page:
+ alloc saved_pfs=ar.pfs,10,0,0,8 // we need 6 roatating (8 minimum)
+ // + 2 input
+
+ .rotr t1[4], t2[4] // our 2 pipelines with depth of 4 each
+
+ mov saved_lc=ar.lc // save ar.lc ahead of time
+ mov saved_pr=pr // rotating predicates are preserved
+ // resgisters we must save.
+ mov src1=in1 // initialize 1st stream source
+ adds src2=8,in1 // initialize 2nd stream source
+ mov lcount=PAGE_SIZE/16-1 // as many 16bytes as there are on a page
+ // -1 is because br.ctop is repeat/until
+
+ adds tgt2=8,in0 // initialize 2nd stream target
+ mov tgt1=in0 // initialize 1st stream target
+ ;;
+ mov pr.rot=1<<16 // pr16=1 & pr[17-63]=0 , 63 not modified
+
+ mov ar.lc=lcount // set loop counter
+ mov ar.ec=4 // ar.ec must match pipeline depth
+ ;;
+
+ // We need to preload the n-1 stages of the pipeline (n=depth).
+ // We do this during the "prolog" of the loop: we execute
+ // n-1 times the "load" bundle. Then both loads & stores are
+ // enabled until we reach the end of the last word of the page
+ // on the load side. Then, we enter the epilogue (controlled by ec)
+ // where we just do the stores and no loads n-1 times : drain the pipe.
+ //
+ // The initialization of the prolog is done via the predicate registers:
+ // the choice of pr19 DEPENDS on the depth of the pipeline (n).
+ // When lc > 0 pr63=1 and it is fed back into pr16 and pr16-pr62
+ // are then shifted right at every iteration,
+ // Thus by initializing pr16=1 and pr17-19=0 (19=16+4-1) before the loop
+ // we get pr19=1 after 4 iterations (n in our case).
+ //
+1: // engage loop now, let the magic happen...
+(p16) ld8 t1[0]=[src1],16 // new data on top of pipeline in 1st stream
+(p16) ld8 t2[0]=[src2],16 // new data on top of pipeline in 2nd stream
+ nop.i 0x0
+(p19) st8 [tgt1]=t1[3],16 // store top of 1st pipeline
+(p19) st8 [tgt2]=t2[3],16 // store top of 2nd pipeline
+ br.ctop.dptk.few 1b // once lc==0, ec-- & p16=0
+ // stores but no loads anymore
+ ;;
+ mov pr=saved_pr,0xffffffffffff0000 // restore predicates
+ mov ar.pfs=saved_pfs // restore ar.ec
+ mov ar.lc=saved_lc // restore saved lc
+ br.ret.sptk.few rp // bye...
+
+ .endp copy_page
diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S
new file mode 100644
index 000000000..03a540a80
--- /dev/null
+++ b/arch/ia64/lib/copy_user.S
@@ -0,0 +1,71 @@
+/*
+ * This routine copies a linear memory buffer across the user/kernel boundary. When
+ * reading a byte from the source causes a fault, the remainder of the destination
+ * buffer is zeroed out. Note that this can happen only when copying from user
+ * to kernel memory and we do this to absolutely guarantee that the
+ * kernel doesn't operate on random data.
+ *
+ * This file is derived from arch/alpha/lib/copy_user.S.
+ *
+ * Inputs:
+ * in0: address of destination buffer
+ * in1: address of source buffer
+ * in2: length of buffer in bytes
+ * Outputs:
+ * r8: number of bytes that didn't get copied due to a fault
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#define EXI(x...) \
+99: x; \
+ .section __ex_table,"a"; \
+ data4 @gprel(99b); \
+ data4 .Lexit_in-99b; \
+ .previous
+
+#define EXO(x...) \
+99: x; \
+ .section __ex_table,"a"; \
+ data4 @gprel(99b); \
+ data4 .Lexit_out-99b; \
+ .previous
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 32
+ .global __copy_user
+ .proc __copy_user
+__copy_user:
+ alloc r10=ar.pfs,3,0,0,0
+ mov r9=ar.lc // save ar.lc
+ mov ar.lc=in2 // set ar.lc to length of buffer
+ br.sptk.few .Lentr
+
+ // XXX braindead copy loop---this needs to be optimized
+.Loop1:
+ EXI(ld1 r8=[in1],1)
+ ;;
+ EXO(st1 [in0]=r8,1)
+.Lentr: br.cloop.dptk.few .Loop1 // repeat unless ar.lc--==0
+ ;; // avoid RAW on ar.lc
+.Lexit_out:
+ mov r8=ar.lc // return how many bytes we _didn't_ copy
+ mov ar.lc=r9
+ br.ret.sptk.few rp
+
+.Lexit_in:
+ // clear the remainder of the buffer:
+ mov r8=ar.lc // return how many bytes we _didn't_ copy
+.Loop2:
+ st1 [in0]=r0,1 // this cannot fault because we get here only on user->kernel copies
+ br.cloop.dptk.few .Loop2
+ ;; // avoid RAW on ar.lc
+ mov ar.lc=r9
+ br.ret.sptk.few rp
+
+ .endp __copy_user
diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c
new file mode 100644
index 000000000..d09f11e21
--- /dev/null
+++ b/arch/ia64/lib/csum_partial_copy.c
@@ -0,0 +1,165 @@
+/*
+ * Network Checksum & Copy routine
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Most of the code has been imported from Linux/Alpha
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * XXX Fixme: those 2 inlines are meant for debugging and will go away
+ */
+static inline unsigned
+short from64to16(unsigned long x)
+{
+ /* add up 32-bit words for 33 bits */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up 16-bit and 17-bit words for 17+c bits */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up 16-bit and 2-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+static inline
+unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum)
+{
+ int odd, count;
+ unsigned long result = (unsigned long)psum;
+
+ if (len <= 0)
+ goto out;
+ odd = 1 & (unsigned long) buff;
+ if (odd) {
+ result = *buff << 8;
+ len--;
+ buff++;
+ }
+ count = len >> 1; /* nr of 16-bit words.. */
+ if (count) {
+ if (2 & (unsigned long) buff) {
+ result += *(unsigned short *) buff;
+ count--;
+ len -= 2;
+ buff += 2;
+ }
+ count >>= 1; /* nr of 32-bit words.. */
+ if (count) {
+ if (4 & (unsigned long) buff) {
+ result += *(unsigned int *) buff;
+ count--;
+ len -= 4;
+ buff += 4;
+ }
+ count >>= 1; /* nr of 64-bit words.. */
+ if (count) {
+ unsigned long carry = 0;
+ do {
+ unsigned long w = *(unsigned long *) buff;
+ count--;
+ buff += 8;
+ result += carry;
+ result += w;
+ carry = (w > result);
+ } while (count);
+ result += carry;
+ result = (result & 0xffffffff) + (result >> 32);
+ }
+ if (len & 4) {
+ result += *(unsigned int *) buff;
+ buff += 4;
+ }
+ }
+ if (len & 2) {
+ result += *(unsigned short *) buff;
+ buff += 2;
+ }
+ }
+ if (len & 1)
+ result += *buff;
+
+ result = from64to16(result);
+
+ if (odd)
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+
+out:
+ return result;
+}
+
+/*
+ * XXX Fixme
+ *
+ * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS.
+ * But it's very tricky to get right even in C.
+ */
+extern unsigned long do_csum(const unsigned char *, int);
+
+static unsigned int
+do_csum_partial_copy_from_user (const char *src, char *dst, int len,
+ unsigned int psum, int *errp)
+{
+ const unsigned char *psrc = src;
+ unsigned long result;
+ int cplen = len;
+ int r = 0;
+
+ /* XXX Fixme
+ * for now we separate the copy from checksum for obvious
+ * alignment difficulties. Look at the Alpha code and you'll be
+ * scared.
+ */
+
+ while ( cplen-- ) r |=__get_user(*dst++,psrc++);
+
+ if ( r && errp ) *errp = r;
+
+ result = do_csum(src, len);
+
+ /* add in old sum, and carry.. */
+ result += psum;
+ /* 32+c bits -> 32 bits */
+ result = (result & 0xffffffff) + (result >> 32);
+ return result;
+}
+
+unsigned int
+csum_partial_copy_from_user(const char *src, char *dst, int len,
+ unsigned int sum, int *errp)
+{
+ if (!access_ok(src, len, VERIFY_READ)) {
+ *errp = -EFAULT;
+ memset(dst, 0, len);
+ return sum;
+ }
+
+ return do_csum_partial_copy_from_user(src, dst, len, sum, errp);
+}
+
+unsigned int
+csum_partial_copy_nocheck(const char *src, char *dst, int len, unsigned int sum)
+{
+ return do_csum_partial_copy_from_user(src, dst, len, sum, NULL);
+}
+
+unsigned int
+csum_partial_copy (const char *src, char *dst, int len, unsigned int sum)
+{
+ unsigned int ret;
+ int error = 0;
+
+ ret = do_csum_partial_copy_from_user(src, dst, len, sum, &error);
+ if (error)
+ printk("csum_partial_copy_old(): tell mingo to convert me!\n");
+
+ return ret;
+}
+
diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S
new file mode 100644
index 000000000..d8174f10a
--- /dev/null
+++ b/arch/ia64/lib/do_csum.S
@@ -0,0 +1,230 @@
+/*
+ *
+ * Optmized version of the standard do_csum() function
+ *
+ * Return: a 64bit quantity containing the 16bit Internet checksum
+ *
+ * Inputs:
+ * in0: address of buffer to checksum (char *)
+ * in1: length of the buffer (int)
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ */
+
+//
+// Theory of operations:
+// The goal is to go as quickly as possible to the point where
+// we can checksum 8 bytes/loop. Before reaching that point we must
+// take care of incorrect alignment of first byte.
+//
+// The code hereafter also takes care of the "tail" part of the buffer
+// before entering the core loop, if any. The checksum is a sum so it
+// allows us to commute operations. So we do do the "head" and "tail"
+// first to finish at full speed in the body. Once we get the head and
+// tail values, we feed them into the pipeline, very handy initialization.
+//
+// Of course we deal with the special case where the whole buffer fits
+// into one 8 byte word. In this case we have only one entry in the pipeline.
+//
+// We use a (3+1)-stage pipeline in the loop to account for possible
+// load latency and also to accomodate for head and tail.
+//
+// The end of the function deals with folding the checksum from 64bits
+// down to 16bits taking care of the carry.
+//
+// This version avoids synchronization in the core loop by also using a
+// pipeline for the accumulation of the checksum in result[].
+//
+// p[]
+// |---|
+// 0| | r32 : new value loaded in pipeline
+// |---|
+// 1| | r33 : in transit data
+// |---|
+// 2| | r34 : current value to add to checksum
+// |---|
+// 3| | r35 : previous value added to checksum (previous iteration)
+// |---|
+//
+// result[]
+// |---|
+// 0| | r36 : new checksum
+// |---|
+// 1| | r37 : previous value of checksum
+// |---|
+// 2| | r38 : final checksum when out of the loop (after 2 epilogue rots)
+// |---|
+//
+//
+// NOT YET DONE:
+// - Take advantage of the MMI bandwidth to load more than 8byte per loop
+// iteration
+// - use the lfetch instruction to augment the chances of the data being in
+// the cache when we need it.
+// - Maybe another algorithm which would take care of the folding at the
+// end in a different manner
+// - Work with people more knowledgeable than me on the network stack
+// to figure out if we could not split the function depending on the
+// type of packet or alignment we get. Like the ip_fast_csum() routine
+// where we know we have at least 20bytes worth of data to checksum.
+// - Look at RFCs about checksums to see whether or not we can do better
+//
+// - Do a better job of handling small packets.
+//
+#define saved_pfs r11
+#define hmask r16
+#define tmask r17
+#define first r18
+#define firstval r19
+#define firstoff r20
+#define last r21
+#define lastval r22
+#define lastoff r23
+#define saved_lc r24
+#define saved_pr r25
+#define tmp1 r26
+#define tmp2 r27
+#define tmp3 r28
+#define carry r29
+
+#define buf in0
+#define len in1
+
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+// unsigned long do_csum(unsigned char *buf,int len)
+
+ .align 32
+ .global do_csum
+ .proc do_csum
+do_csum:
+ alloc saved_pfs=ar.pfs,2,8,0,8
+
+ .rotr p[4], result[3]
+ mov ret0=r0 // in case we have zero length
+ cmp4.lt p0,p6=r0,len // check for zero length or negative (32bit len)
+ ;; // avoid WAW on CFM
+ mov tmp3=0x7 // a temporary mask/value
+ add tmp1=buf,len // last byte's address
+(p6) br.ret.spnt.few rp // return if true (hope we can avoid that)
+
+ and firstoff=7,buf // how many bytes off for first element
+ tbit.nz p10,p0=buf,0 // is buf an odd address ?
+ mov hmask=-1 // intialize head mask
+ ;;
+
+ andcm first=buf,tmp3 // 8byte aligned down address of first element
+ mov tmask=-1 // initialize tail mask
+ adds tmp2=-1,tmp1 // last-1
+ ;;
+ and lastoff=7,tmp1 // how many bytes off for last element
+ andcm last=tmp2,tmp3 // address of word containing last byte
+ mov saved_pr=pr // preserve predicates (rotation)
+ ;;
+ sub tmp3=last,first // tmp3=distance from first to last
+ cmp.eq p8,p9=last,first // everything fits in one word ?
+ sub tmp1=8,lastoff // complement to lastoff
+
+ ld8 firstval=[first],8 // load,ahead of time, "first" word
+ shl tmp2=firstoff,3 // number of bits
+ ;;
+ and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0
+
+(p9) ld8 lastval=[last] // load,ahead of time, "last" word, if needed
+(p8) mov lastval=r0 // we don't need lastval if first==last
+ mov result[1]=r0 // initialize result
+ ;;
+
+ shl tmp1=tmp1,3 // number of bits
+ shl hmask=hmask,tmp2 // build head mask, mask off [0,firstoff[
+ ;;
+ shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff]
+ mov saved_lc=ar.lc // save lc
+ ;;
+(p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only
+(p9) and p[1]=lastval,tmask // mask last it as appropriate
+ shr.u tmp3=tmp3,3 // we do 8 bytes per loop
+ ;;
+ cmp.lt p6,p7=2,tmp3 // tmp3 > 2 ?
+ and p[2]=firstval,hmask // and mask it as appropriate
+ add tmp1=-2,tmp3 // -2 = -1 (br.ctop) -1 (last-first)
+ ;;
+ // XXX Fixme: not very nice initialization here
+ //
+ // Setup loop control registers:
+ //
+ // tmp3=0 (1 word) : lc=0, ec=2, p16=F
+ // tmp3=1 (2 words) : lc=0, ec=3, p16=F
+ // tmp3=2 (3 words) : lc=0, ec=4, p16=T
+ // tmp3>2 (4 or more): lc=tmp3-2, ec=4, p16=T
+ //
+ cmp.eq p8,p9=r0,tmp3 // tmp3 == 0 ?
+(p6) mov ar.lc=tmp1
+(p7) mov ar.lc=0
+ ;;
+ cmp.lt p6,p7=1,tmp3 // tmp3 > 1 ?
+(p8) mov ar.ec=2 // we need the extra rotation on result[]
+(p9) mov ar.ec=3 // hard not to set it twice sometimes
+ ;;
+ mov carry=r0 // initialize carry
+(p6) mov ar.ec=4
+(p6) mov pr.rot=0xffffffffffff0000 // p16=T, p18=T
+
+ cmp.ne p8,p0=r0,r0 // p8 is false
+ mov p[3]=r0 // make sure first compare fails
+(p7) mov pr.rot=0xfffffffffffe0000 // p16=F, p18=T
+ ;;
+1:
+(p16) ld8 p[0]=[first],8 // load next
+(p8) adds carry=1,carry // add carry on prev_prev_value
+(p18) add result[0]=result[1],p[2] // new_res = prev_res + cur_val
+ cmp.ltu p8,p0=result[1],p[3] // p8= prev_result < prev_val
+ br.ctop.dptk.few 1b // loop until lc--==0
+ ;; // RAW on carry when loop exits
+ (p8) adds carry=1,carry;; // correct for carry on prev_value
+ add result[2]=carry,result[2];; // add carry to final result
+ cmp.ltu p6,p7=result[2], carry // check for new carry
+ ;;
+(p6) adds result[2]=1,result[1] // correct if required
+ movl tmp3=0xffffffff
+ ;;
+ // XXX Fixme
+ //
+ // now fold 64 into 16 bits taking care of carry
+ // that's not very good because it has lots of sequentiality
+ //
+ and tmp1=result[2],tmp3
+ shr.u tmp2=result[2],32
+ ;;
+ add result[2]=tmp1,tmp2
+ shr.u tmp3=tmp3,16
+ ;;
+ and tmp1=result[2],tmp3
+ shr.u tmp2=result[2],16
+ ;;
+ add result[2]=tmp1,tmp2
+ ;;
+ and tmp1=result[2],tmp3
+ shr.u tmp2=result[2],16
+ ;;
+ add result[2]=tmp1,tmp2
+ ;;
+ and tmp1=result[2],tmp3
+ shr.u tmp2=result[2],16
+ ;;
+ add ret0=tmp1,tmp2
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ // if buf was odd then swap bytes
+ mov ar.pfs=saved_pfs // restore ar.ec
+(p10) mux1 ret0=ret0,@rev // reverse word
+ ;;
+ mov ar.lc=saved_lc
+(p10) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes
+ br.ret.sptk.few rp
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
new file mode 100644
index 000000000..0195ae5f5
--- /dev/null
+++ b/arch/ia64/lib/flush.S
@@ -0,0 +1,37 @@
+/*
+ * Cache flushing routines.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <asm/page.h>
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 16
+ .global ia64_flush_icache_page
+ .proc ia64_flush_icache_page
+ia64_flush_icache_page:
+ alloc r2=ar.pfs,1,0,0,0
+ mov r3=ar.lc // save ar.lc
+ mov r8=PAGE_SIZE/64-1 // repeat/until loop
+ ;;
+ mov ar.lc=r8
+ add r8=32,in0
+ ;;
+.Loop1: fc in0 // issuable on M0 only
+ add in0=64,in0
+ fc r8
+ add r8=64,r8
+ br.cloop.sptk.few .Loop1
+ ;;
+ sync.i
+ ;;
+ srlz.i
+ ;;
+ mov ar.lc=r3 // restore ar.lc
+ br.ret.sptk.few rp
+ .endp ia64_flush_icache_page
diff --git a/arch/ia64/lib/idiv.S b/arch/ia64/lib/idiv.S
new file mode 100644
index 000000000..a12097c94
--- /dev/null
+++ b/arch/ia64/lib/idiv.S
@@ -0,0 +1,158 @@
+/*
+ * Integer division routine.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+/* Simple integer division. It uses the straight forward division
+ algorithm. This may not be the absolutely fastest way to do it,
+ but it's not horrible either. According to ski, the worst case
+ scenario of dividing 0xffffffffffffffff by 1 takes 133 cycles.
+
+ An alternative would be to use an algorithm similar to the
+ floating point division algorithm (Newton-Raphson iteration),
+ but that approach is rather tricky (one has to be very careful
+ to get the last bit right...).
+
+ While this algorithm is straight-forward, it does use a couple
+ of neat ia-64 specific tricks:
+
+ - it uses the floating point unit to determine the initial
+ shift amount (shift = floor(ld(x)) - floor(ld(y)))
+
+ - it uses predication to avoid a branch in the case where
+ x < y (this is what p8 is used for)
+
+ - it uses rotating registers and the br.ctop branch to
+ implement a software-pipelined loop that's unrolled
+ twice (without any code expansion!)
+
+ - the code is relatively well scheduled to avoid unnecessary
+ nops while maximizing parallelism
+*/
+
+#include <asm/break.h>
+
+ .text
+ .psr abi64
+#ifdef __BIG_ENDIAN__
+ .psr msb
+ .msb
+#else
+ .psr lsb
+ .lsb
+#endif
+
+#ifdef MODULO
+# define OP mod
+# define Q r9
+# define R r8
+#else
+# define OP div
+# define Q r8
+# define R r9
+#endif
+
+#ifdef SINGLE
+# define PREC si
+#else
+# define PREC di
+#endif
+
+#ifdef UNSIGNED
+# define SGN u
+# define INT_TO_FP(a,b) fma.s0 a=b,f1,f0
+# define FP_TO_INT(a,b) fcvt.fxu.trunc.s0 a=b
+#else
+# define SGN
+# define INT_TO_FP(a,b) fcvt.xf a=b
+# define FP_TO_INT(a,b) fcvt.fx.trunc.s0 a=b
+#endif
+
+#define PASTE1(a,b) a##b
+#define PASTE(a,b) PASTE1(a,b)
+#define NAME PASTE(PASTE(__,SGN),PASTE(OP,PASTE(PREC,3)))
+
+ .align 32
+ .global NAME
+ .proc NAME
+NAME:
+
+ alloc r2=ar.pfs,2,6,0,8
+ mov r18=pr
+#ifdef SINGLE
+# ifdef UNSIGNED
+ zxt4 in0=in0
+ zxt4 in1=in1
+# else
+ sxt4 in0=in0
+ sxt4 in1=in1
+# endif
+ ;;
+#endif
+
+#ifndef UNSIGNED
+ cmp.lt p6,p0=in0,r0 // x negative?
+ cmp.lt p7,p0=in1,r0 // y negative?
+ ;;
+(p6) sub in0=r0,in0 // make x positive
+(p7) sub in1=r0,in1 // ditto for y
+ ;;
+#endif
+
+ setf.sig f8=in0
+ mov r3=ar.lc // save ar.lc
+ setf.sig f9=in1
+ ;;
+ mov Q=0 // initialize q
+ mov R=in0 // stash away x in a static register
+ mov r16=1 // r16 = 1
+ INT_TO_FP(f8,f8)
+ cmp.eq p8,p0=0,in0 // x==0?
+ cmp.eq p9,p0=0,in1 // y==0?
+ ;;
+ INT_TO_FP(f9,f9)
+(p8) br.dpnt.few .L3
+(p9) break __IA64_BREAK_KDB // attempted division by zero (should never happen)
+ mov ar.ec=r0 // epilogue count = 0
+ ;;
+ getf.exp r14=f8 // r14 = exponent of x
+ getf.exp r15=f9 // r15 = exponent of y
+ mov ar.lc=r0 // loop count = 0
+ ;;
+ sub r17=r14,r15 // r17 = (exp of x - exp y) = shift amount
+ cmp.ge p8,p0=r14,r15
+ ;;
+
+ .rotr y[2], mask[2] // in0 and in1 may no longer be valid after
+ // the first write to a rotating register!
+
+(p8) shl y[1]=in1,r17 // y[1] = y<<shift
+(p8) shl mask[1]=r16,r17 // mask[1] = 1<<shift
+
+(p8) mov ar.lc=r17 // loop count = r17
+ ;;
+.L1:
+(p8) cmp.geu.unc p9,p0=R,y[1]// p9 = (x >= y[1])
+(p8) shr.u mask[0]=mask[1],1 // prepare mask[0] and y[0] for next
+(p8) shr.u y[0]=y[1],1 // iteration
+ ;;
+(p9) sub R=R,y[1] // if (x >= y[1]), subtract y[1] from x
+(p9) add Q=Q,mask[1] // and set corresponding bit in q (Q)
+ br.ctop.dptk.few .L1 // repeated unless ar.lc-- == 0
+ ;;
+.L2:
+#ifndef UNSIGNED
+# ifdef MODULO
+(p6) sub R=r0,R // set sign of remainder according to x
+# else
+(p6) sub Q=r0,Q // set sign of quotient
+ ;;
+(p7) sub Q=r0,Q
+# endif
+#endif
+.L3:
+ mov ar.pfs=r2 // restore ar.pfs
+ mov ar.lc=r3 // restore ar.lc
+ mov pr=r18,0xffffffffffff0000 // restore p16-p63
+ br.ret.sptk.few rp
diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S
new file mode 100644
index 000000000..595720a2d
--- /dev/null
+++ b/arch/ia64/lib/memset.S
@@ -0,0 +1,111 @@
+/*
+ *
+ * Optimized version of the standard memset() function
+ *
+ * Return: none
+ *
+ *
+ * Inputs:
+ * in0: address of buffer
+ * in1: byte value to use for storing
+ * in2: length of the buffer
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ */
+
+
+// arguments
+//
+#define buf r32
+#define val r33
+#define len r34
+
+//
+// local registers
+//
+#define saved_pfs r14
+#define cnt r18
+#define buf2 r19
+#define saved_lc r20
+#define saved_pr r21
+#define tmp r22
+
+ .text
+ .psr abi64
+ .psr lsb
+
+ .align 16
+ .global memset
+ .proc memset
+
+memset:
+ alloc saved_pfs=ar.pfs,3,0,0,0 // cnt is sink here
+ cmp.eq p8,p0=r0,len // check for zero length
+ mov saved_lc=ar.lc // preserve ar.lc (slow)
+ ;;
+ adds tmp=-1,len // br.ctop is repeat/until
+ tbit.nz p6,p0=buf,0 // odd alignment
+(p8) br.ret.spnt.few rp
+
+ cmp.lt p7,p0=16,len // if len > 16 then long memset
+ mux1 val=val,@brcst // prepare value
+(p7) br.cond.dptk.few long_memset
+ ;;
+ mov ar.lc=tmp // initialize lc for small count
+ ;; // avoid RAW and WAW on ar.lc
+1: // worst case 15 cyles, avg 8 cycles
+ st1 [buf]=val,1
+ br.cloop.dptk.few 1b
+ ;; // avoid RAW on ar.lc
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ br.ret.sptk.few rp // end of short memset
+
+ // at this point we know we have more than 16 bytes to copy
+ // so we focus on alignment
+long_memset:
+(p6) st1 [buf]=val,1 // 1-byte aligned
+(p6) adds len=-1,len;; // sync because buf is modified
+ tbit.nz p6,p0=buf,1
+ ;;
+(p6) st2 [buf]=val,2 // 2-byte aligned
+(p6) adds len=-2,len;;
+ tbit.nz p6,p0=buf,2
+ ;;
+(p6) st4 [buf]=val,4 // 4-byte aligned
+(p6) adds len=-4,len;;
+ tbit.nz p6,p0=buf,3
+ ;;
+(p6) st8 [buf]=val,8 // 8-byte aligned
+(p6) adds len=-8,len;;
+ shr.u cnt=len,4 // number of 128-bit (2x64bit) words
+ ;;
+ cmp.eq p6,p0=r0,cnt
+ adds tmp=-1,cnt
+(p6) br.cond.dpnt.few .dotail // we have less than 16 bytes left
+ ;;
+ adds buf2=8,buf // setup second base pointer
+ mov ar.lc=tmp
+ ;;
+2: // 16bytes/iteration
+ st8 [buf]=val,16
+ st8 [buf2]=val,16
+ br.cloop.dptk.few 2b
+ ;;
+.dotail: // tail correction based on len only
+ tbit.nz p6,p0=len,3
+ ;;
+(p6) st8 [buf]=val,8 // at least 8 bytes
+ tbit.nz p6,p0=len,2
+ ;;
+(p6) st4 [buf]=val,4 // at least 4 bytes
+ tbit.nz p6,p0=len,1
+ ;;
+(p6) st2 [buf]=val,2 // at least 2 bytes
+ tbit.nz p6,p0=len,0
+ mov ar.lc=saved_lc
+ ;;
+(p6) st1 [buf]=val // only 1 byte left
+ br.ret.dptk.few rp
+ .endp
diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S
new file mode 100644
index 000000000..3062716b1
--- /dev/null
+++ b/arch/ia64/lib/strlen.S
@@ -0,0 +1,197 @@
+/*
+ *
+ * Optimized version of the standard strlen() function
+ *
+ *
+ * Inputs:
+ * in0 address of string
+ *
+ * Outputs:
+ * ret0 the number of characters in the string (0 if empty string)
+ * does not count the \0
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 09/24/99 S.Eranian add speculation recovery code
+ */
+
+//
+//
+// This is an enhanced version of the basic strlen. it includes a combination
+// of compute zero index (czx), parallel comparisons, speculative loads and
+// loop unroll using rotating registers.
+//
+// General Ideas about the algorithm:
+// The goal is to look at the string in chunks of 8 bytes.
+// so we need to do a few extra checks at the beginning because the
+// string may not be 8-byte aligned. In this case we load the 8byte
+// quantity which includes the start of the string and mask the unused
+// bytes with 0xff to avoid confusing czx.
+// We use speculative loads and software pipelining to hide memory
+// latency and do read ahead safely. This way we defer any exception.
+//
+// Because we don't want the kernel to be relying on particular
+// settings of the DCR register, we provide recovery code in case
+// speculation fails. The recovery code is going to "redo" the work using
+// only normal loads. If we still get a fault then we generate a
+// kernel panic. Otherwise we return the strlen as usual.
+//
+// The fact that speculation may fail can be caused, for instance, by
+// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
+// a NaT bit will be set if the translation is not present. The normal
+// load, on the other hand, will cause the translation to be inserted
+// if the mapping exists.
+//
+// It should be noted that we execute recovery code only when we need
+// to use the data that has been speculatively loaded: we don't execute
+// recovery code on pure read ahead data.
+//
+// Remarks:
+// - the cmp r0,r0 is used as a fast way to initialize a predicate
+// register to 1. This is required to make sure that we get the parallel
+// compare correct.
+//
+// - we don't use the epilogue counter to exit the loop but we need to set
+// it to zero beforehand.
+//
+// - after the loop we must test for Nat values because neither the
+// czx nor cmp instruction raise a NaT consumption fault. We must be
+// careful not to look too far for a Nat for which we don't care.
+// For instance we don't need to look at a NaT in val2 if the zero byte
+// was in val1.
+//
+// - Clearly performance tuning is required.
+//
+//
+//
+#define saved_pfs r11
+#define tmp r10
+#define base r16
+#define orig r17
+#define saved_pr r18
+#define src r19
+#define mask r20
+#define val r21
+#define val1 r22
+#define val2 r23
+
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 32
+ .global strlen
+ .proc strlen
+strlen:
+ alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
+
+ .rotr v[2], w[2] // declares our 4 aliases
+
+ extr.u tmp=in0,0,3 // tmp=least significant 3 bits
+ mov orig=in0 // keep trackof initial byte address
+ dep src=0,in0,0,3 // src=8byte-aligned in0 address
+ mov saved_pr=pr // preserve predicates (rotation)
+ ;;
+ ld8 v[1]=[src],8 // must not speculate: can fail here
+ shl tmp=tmp,3 // multiply by 8bits/byte
+ mov mask=-1 // our mask
+ ;;
+ ld8.s w[1]=[src],8 // speculatively load next
+ cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and
+ sub tmp=64,tmp // how many bits to shift our mask on the right
+ ;;
+ shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part
+ mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs)
+ ;;
+ add base=-16,src // keep track of aligned base
+ or v[1]=v[1],mask // now we have a safe initial byte pattern
+ ;;
+1:
+ ld8.s v[0]=[src],8 // speculatively load next
+ czx1.r val1=v[1] // search 0 byte from right
+ czx1.r val2=w[1] // search 0 byte from right following 8bytes
+ ;;
+ ld8.s w[0]=[src],8 // speculatively load next to next
+ cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8
+ cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8
+(p6) br.wtop.dptk.few 1b // loop until p6 == 0
+ ;;
+ //
+ // We must return try the recovery code iff
+ // val1_is_nat || (val1==8 && val2_is_nat)
+ //
+ // XXX Fixme
+ // - there must be a better way of doing the test
+ //
+ cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
+#ifdef notyet
+ tnat.nz p6,p7=val1 // test NaT on val1
+#else
+ tnat.z p7,p6=val1 // test NaT on val1
+#endif
+(p6) br.cond.spnt.few recover// jump to recovery if val1 is NaT
+ ;;
+ //
+ // if we come here p7 is true, i.e., initialized for // cmp
+ //
+ cmp.eq.and p7,p0=8,val1// val1==8?
+ tnat.nz.and p7,p0=val2 // test NaT if val2
+(p7) br.cond.spnt.few recover// jump to recovery if val2 is NaT
+ ;;
+(p8) mov val1=val2 // the other test got us out of the loop
+(p8) adds src=-16,src // correct position when 3 ahead
+(p9) adds src=-24,src // correct position when 4 ahead
+ ;;
+ sub ret0=src,orig // distance from base
+ sub tmp=8,val1 // which byte in word
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // adjust
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.few rp // end of normal execution
+
+ //
+ // Outlined recovery code when speculation failed
+ //
+ // This time we don't use speculation and rely on the normal exception
+ // mechanism. that's why the loop is not as good as the previous one
+ // because read ahead is not possible
+ //
+ // IMPORTANT:
+ // Please note that in the case of strlen() as opposed to strlen_user()
+ // we don't use the exception mechanism, as this function is not
+ // supposed to fail. If that happens it means we have a bug and the
+ // code will cause of kernel fault.
+ //
+ // XXX Fixme
+ // - today we restart from the beginning of the string instead
+ // of trying to continue where we left off.
+ //
+recover:
+ ld8 val=[base],8 // will fail if unrecoverable fault
+ ;;
+ or val=val,mask // remask first bytes
+ cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop
+ ;;
+ //
+ // ar.ec is still zero here
+ //
+2:
+(p6) ld8 val=[base],8 // will fail if unrecoverable fault
+ ;;
+ czx1.r val1=val // search 0 byte from right
+ ;;
+ cmp.eq p6,p0=8,val1 // val1==8 ?
+(p6) br.wtop.dptk.few 2b // loop until p6 == 0
+ sub ret0=base,orig // distance from base
+ sub tmp=8,val1
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // length=now - back -1
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.few rp // end of sucessful recovery code
+
+ .endp strlen
diff --git a/arch/ia64/lib/strlen_user.S b/arch/ia64/lib/strlen_user.S
new file mode 100644
index 000000000..8149dde8a
--- /dev/null
+++ b/arch/ia64/lib/strlen_user.S
@@ -0,0 +1,213 @@
+/*
+ * Optimized version of the strlen_user() function
+ *
+ * Inputs:
+ * in0 address of buffer
+ *
+ * Outputs:
+ * ret0 0 in case of fault, strlen(buffer)+1 otherwise
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 01/19/99 S.Eranian heavily enhanced version (see details below)
+ * 09/24/99 S.Eranian added speculation recovery code
+ */
+
+//
+// int strlen_user(char *)
+// ------------------------
+// Returns:
+// - length of string + 1
+// - 0 in case an exception is raised
+//
+// This is an enhanced version of the basic strlen_user. it includes a
+// combination of compute zero index (czx), parallel comparisons, speculative
+// loads and loop unroll using rotating registers.
+//
+// General Ideas about the algorithm:
+// The goal is to look at the string in chunks of 8 bytes.
+// so we need to do a few extra checks at the beginning because the
+// string may not be 8-byte aligned. In this case we load the 8byte
+// quantity which includes the start of the string and mask the unused
+// bytes with 0xff to avoid confusing czx.
+// We use speculative loads and software pipelining to hide memory
+// latency and do read ahead safely. This way we defer any exception.
+//
+// Because we don't want the kernel to be relying on particular
+// settings of the DCR register, we provide recovery code in case
+// speculation fails. The recovery code is going to "redo" the work using
+// only normal loads. If we still get a fault then we return an
+// error (ret0=0). Otherwise we return the strlen+1 as usual.
+// The fact that speculation may fail can be caused, for instance, by
+// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
+// a NaT bit will be set if the translation is not present. The normal
+// load, on the other hand, will cause the translation to be inserted
+// if the mapping exists.
+//
+// It should be noted that we execute recovery code only when we need
+// to use the data that has been speculatively loaded: we don't execute
+// recovery code on pure read ahead data.
+//
+// Remarks:
+// - the cmp r0,r0 is used as a fast way to initialize a predicate
+// register to 1. This is required to make sure that we get the parallel
+// compare correct.
+//
+// - we don't use the epilogue counter to exit the loop but we need to set
+// it to zero beforehand.
+//
+// - after the loop we must test for Nat values because neither the
+// czx nor cmp instruction raise a NaT consumption fault. We must be
+// careful not to look too far for a Nat for which we don't care.
+// For instance we don't need to look at a NaT in val2 if the zero byte
+// was in val1.
+//
+// - Clearly performance tuning is required.
+//
+//
+//
+
+#define EX(y,x...) \
+ .section __ex_table,"a"; \
+ data4 @gprel(99f); \
+ data4 y-99f; \
+ .previous; \
+99: x
+
+#define saved_pfs r11
+#define tmp r10
+#define base r16
+#define orig r17
+#define saved_pr r18
+#define src r19
+#define mask r20
+#define val r21
+#define val1 r22
+#define val2 r23
+
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 32
+ .global __strlen_user
+ .proc __strlen_user
+__strlen_user:
+ alloc saved_pfs=ar.pfs,11,0,0,8
+
+ .rotr v[2], w[2] // declares our 4 aliases
+
+ extr.u tmp=in0,0,3 // tmp=least significant 3 bits
+ mov orig=in0 // keep trackof initial byte address
+ dep src=0,in0,0,3 // src=8byte-aligned in0 address
+ mov saved_pr=pr // preserve predicates (rotation)
+ ;;
+ ld8.s v[1]=[src],8 // load the initial 8bytes (must speculate)
+ shl tmp=tmp,3 // multiply by 8bits/byte
+ mov mask=-1 // our mask
+ ;;
+ ld8.s w[1]=[src],8 // load next 8 bytes in 2nd pipeline
+ cmp.eq p6,p0=r0,r0 // sets p6 (required because of // cmp.and)
+ sub tmp=64,tmp // how many bits to shift our mask on the right
+ ;;
+ shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part
+ mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs)
+ ;;
+ add base=-16,src // keep track of aligned base
+ chk.s v[1], recover // if already NaT, then directly skip to recover
+ or v[1]=v[1],mask // now we have a safe initial byte pattern
+ ;;
+1:
+ ld8.s v[0]=[src],8 // speculatively load next
+ czx1.r val1=v[1] // search 0 byte from right
+ czx1.r val2=w[1] // search 0 byte from right following 8bytes
+ ;;
+ ld8.s w[0]=[src],8 // speculatively load next to next
+ cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8
+ cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8
+(p6) br.wtop.dptk.few 1b // loop until p6 == 0
+ ;;
+ //
+ // We must return try the recovery code iff
+ // val1_is_nat || (val1==8 && val2_is_nat)
+ //
+ // XXX Fixme
+ // - there must be a better way of doing the test
+ //
+ cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
+#ifdef notyet
+ tnat.nz p6,p7=val1 // test NaT on val1
+#else
+ tnat.z p7,p6=val1 // test NaT on val1
+#endif
+(p6) br.cond.spnt.few recover// jump to recovery if val1 is NaT
+ ;;
+ //
+ // if we come here p7 is true, i.e., initialized for // cmp
+ //
+ cmp.eq.and p7,p0=8,val1// val1==8?
+ tnat.nz.and p7,p0=val2 // test NaT if val2
+(p7) br.cond.spnt.few recover// jump to recovery if val2 is NaT
+ ;;
+(p8) mov val1=val2 // val2 contains the value
+(p8) adds src=-16,src // correct position when 3 ahead
+(p9) adds src=-24,src // correct position when 4 ahead
+ ;;
+ sub ret0=src,orig // distance from origin
+ sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // length=now - back -1
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.few rp // end of normal execution
+
+ //
+ // Outlined recovery code when speculation failed
+ //
+ // This time we don't use speculation and rely on the normal exception
+ // mechanism. that's why the loop is not as good as the previous one
+ // because read ahead is not possible
+ //
+ // XXX Fixme
+ // - today we restart from the beginning of the string instead
+ // of trying to continue where we left off.
+ //
+recover:
+ EX(.Lexit1, ld8 val=[base],8) // load the initial bytes
+ ;;
+ or val=val,mask // remask first bytes
+ cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop
+ ;;
+ //
+ // ar.ec is still zero here
+ //
+2:
+ EX(.Lexit1, (p6) ld8 val=[base],8)
+ ;;
+ czx1.r val1=val // search 0 byte from right
+ ;;
+ cmp.eq p6,p0=8,val1 // val1==8 ?
+(p6) br.wtop.dptk.few 2b // loop until p6 == 0
+ ;;
+ sub ret0=base,orig // distance from base
+ sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ sub ret0=ret0,tmp // length=now - back -1
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.few rp // end of sucessful recovery code
+
+ //
+ // We failed even on the normal load (called from exception handler)
+ //
+.Lexit1:
+ mov ret0=0
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
+ br.ret.sptk.few rp
+
+ .endp __strlen_user
diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S
new file mode 100644
index 000000000..17f71f1a0
--- /dev/null
+++ b/arch/ia64/lib/strncpy_from_user.S
@@ -0,0 +1,53 @@
+/*
+ * Just like strncpy() except for the return value. If no fault occurs during
+ * the copying, the number of bytes copied is returned. If a fault occurs,
+ * -EFAULT is returned.
+ *
+ * Inputs:
+ * in0: address of destination buffer
+ * in1: address of string to be copied
+ * in2: length of buffer in bytes
+ * Outputs:
+ * r8: -EFAULT in case of fault or number of bytes copied if no fault
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#define EX(x...) \
+99: x; \
+ .section __ex_table,"a"; \
+ data4 @gprel(99b); \
+ data4 .Lexit-99b; \
+ .previous
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 32
+ .global __strncpy_from_user
+ .proc __strncpy_from_user
+__strncpy_from_user:
+ alloc r11=ar.pfs,3,0,0,0
+ mov r9=in1
+ add r10=in1,in2
+
+ // XXX braindead copy loop---this needs to be optimized
+.Loop1:
+ EX(ld1 r8=[in1],1)
+ ;;
+ st1 [in0]=r8,1
+ cmp.ltu p6,p0=in1,r10
+ ;;
+(p6) cmp.ne.and p6,p0=r8,r0
+ ;;
+(p6) br.cond.dpnt.few .Loop1
+
+1: sub r8=in1,r9 // length of string (including NUL character)
+.Lexit:
+ mov ar.pfs=r11
+ br.ret.sptk.few rp
+
+ .endp __strncpy_from_user
diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S
new file mode 100644
index 000000000..c227a9003
--- /dev/null
+++ b/arch/ia64/lib/strnlen_user.S
@@ -0,0 +1,55 @@
+/*
+ * Returns 0 if exception before NUL or reaching the supplied limit (N),
+ * a value greater than N if the string is longer than the limit, else
+ * strlen.
+ *
+ * Inputs:
+ * in0: address of buffer
+ * in1: string length limit N
+ * Outputs:
+ * r8: 0 in case of fault, strlen(buffer)+1 otherwise
+ *
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+/* If a fault occurs, r8 gets set to -EFAULT and r9 gets cleared. */
+#define EX(x...) \
+ .section __ex_table,"a"; \
+ data4 @gprel(99f); \
+ data4 (.Lexit-99f)|1; \
+ .previous \
+99: x;
+
+ .text
+ .psr abi64
+ .psr lsb
+ .lsb
+
+ .align 32
+ .global __strnlen_user
+ .proc __strnlen_user
+__strnlen_user:
+ alloc r2=ar.pfs,2,0,0,0
+ mov r16=ar.lc // preserve ar.lc
+ add r3=-1,in1
+ ;;
+ mov ar.lc=r3
+ mov r9=0
+
+ // XXX braindead strlen loop---this needs to be optimized
+.Loop1:
+ EX(ld1 r8=[in0],1)
+ add r9=1,r9
+ ;;
+ cmp.eq p6,p0=r8,r0
+(p6) br.dpnt.few .Lexit
+ br.cloop.dptk.few .Loop1
+
+ add r9=1,in1 // NUL not found---return N+1
+ ;;
+.Lexit:
+ mov r8=r9
+ mov ar.lc=r16 // restore ar.lc
+ br.ret.sptk.few rp
+
+ .endp __strnlen_user
diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile
new file mode 100644
index 000000000..ab2b95cf9
--- /dev/null
+++ b/arch/ia64/mm/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for the ia64-specific parts of the memory manager.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET := mm.o
+#O_OBJS := ioremap.o
+O_OBJS := init.o fault.o tlb.o extable.o
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
new file mode 100644
index 000000000..bee64e0e3
--- /dev/null
+++ b/arch/ia64/mm/extable.c
@@ -0,0 +1,68 @@
+/*
+ * Kernel exception handling table support. Derived from arch/alpha/mm/extable.c.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
+
+static inline const struct exception_table_entry *
+search_one_table (const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ signed long value)
+{
+ /* Abort early if the search value is out of range. */
+ if (value != (signed int)value)
+ return 0;
+
+ while (first <= last) {
+ const struct exception_table_entry *mid;
+ long diff;
+ /*
+ * We know that first and last are both kernel virtual
+ * pointers (region 7) so first+last will cause an
+ * overflow. We fix that by calling __va() on the
+ * result, which will ensure that the top two bits get
+ * set again.
+ */
+ mid = (void *) __va((((__u64) first + (__u64) last)/2/sizeof(*mid))*sizeof(*mid));
+ diff = mid->addr - value;
+ if (diff == 0)
+ return mid;
+ else if (diff < 0)
+ first = mid+1;
+ else
+ last = mid-1;
+ }
+ return 0;
+}
+
+register unsigned long gp __asm__("gp");
+
+const struct exception_table_entry *
+search_exception_table (unsigned long addr)
+{
+#ifndef CONFIG_MODULE
+ /* There is only the kernel to search. */
+ return search_one_table(__start___ex_table, __stop___ex_table - 1, addr - gp);
+#else
+ struct exception_table_entry *ret;
+ /* The kernel is the last "module" -- no need to treat it special. */
+ struct module *mp;
+
+ for (mp = module_list; mp ; mp = mp->next) {
+ if (!mp->ex_table_start)
+ continue;
+ ret = search_one_table(mp->ex_table_start, mp->ex_table_end - 1, addr - mp->gp);
+ if (ret)
+ return ret;
+ }
+ return 0;
+#endif
+}
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
new file mode 100644
index 000000000..99cf5048c
--- /dev/null
+++ b/arch/ia64/mm/fault.c
@@ -0,0 +1,164 @@
+/*
+ * MMU fault handling support.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/hardirq.h>
+
+extern void die_if_kernel (char *, struct pt_regs *, long);
+
+/*
+ * This routine is analogous to expand_stack() but instead grows the
+ * register backing store (which grows towards higher addresses).
+ * Since the register backing store is access sequentially, we
+ * disallow growing the RBS by more than a page at a time. Note that
+ * the VM_GROWSUP flag can be set on any VM area but that's fine
+ * because the total process size is still limited by RLIMIT_STACK and
+ * RLIMIT_AS.
+ */
+static inline long
+expand_backing_store (struct vm_area_struct *vma, unsigned long address)
+{
+ unsigned long grow;
+
+ grow = PAGE_SIZE >> PAGE_SHIFT;
+ if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur
+ || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur))
+ return -ENOMEM;
+ vma->vm_end += PAGE_SIZE;
+ vma->vm_mm->total_vm += grow;
+ if (vma->vm_flags & VM_LOCKED)
+ vma->vm_mm->locked_vm += grow;
+ return 0;
+}
+
+void
+ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs)
+{
+ struct mm_struct *mm = current->mm;
+ const struct exception_table_entry *fix;
+ struct vm_area_struct *vma, *prev_vma;
+ struct siginfo si;
+ int signal = SIGSEGV;
+ unsigned long mask;
+
+ /*
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+ if (in_interrupt() || !mm)
+ goto no_context;
+
+ down(&mm->mmap_sem);
+
+ vma = find_vma_prev(mm, address, &prev_vma);
+ if (!vma)
+ goto bad_area;
+
+ /* find_vma_prev() returns vma such that address < vma->vm_end or NULL */
+ if (address < vma->vm_start)
+ goto check_expansion;
+
+ good_area:
+ /* OK, we've got a good vm_area for this memory area. Check the access permissions: */
+
+# define VM_READ_BIT 0
+# define VM_WRITE_BIT 1
+# define VM_EXEC_BIT 2
+
+# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \
+ || (1 << VM_EXEC_BIT) != VM_EXEC)
+# error File is out of sync with <linux/mm.h>. Pleaes update.
+# endif
+
+ mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
+ | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)
+ | (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT));
+
+ if ((vma->vm_flags & mask) != mask)
+ goto bad_area;
+
+ /*
+ * If for any reason at all we couldn't handle the fault, make
+ * sure we exit gracefully rather than endlessly redo the
+ * fault.
+ */
+ if (!handle_mm_fault(current, vma, address, (isr & IA64_ISR_W) != 0)) {
+ /*
+ * We ran out of memory, or some other thing happened
+ * to us that made us unable to handle the page fault
+ * gracefully.
+ */
+ signal = SIGBUS;
+ goto bad_area;
+ }
+ up(&mm->mmap_sem);
+ return;
+
+ check_expansion:
+ if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+ if (expand_stack(vma, address))
+ goto bad_area;
+ } else if (expand_backing_store(prev_vma, address))
+ goto bad_area;
+ goto good_area;
+
+ bad_area:
+ up(&mm->mmap_sem);
+ if (isr & IA64_ISR_SP) {
+ /*
+ * This fault was due to a speculative load set the
+ * "ed" bit in the psr to ensure forward progress
+ * (target register will get a NaT).
+ */
+ ia64_psr(regs)->ed = 1;
+ return;
+ }
+ if (user_mode(regs)) {
+#if 0
+printk("%s(%d): segfault accessing %lx\n", current->comm, current->pid, address);
+show_regs(regs);
+#endif
+ si.si_signo = signal;
+ si.si_errno = 0;
+ si.si_code = SI_KERNEL;
+ si.si_addr = (void *) address;
+ force_sig_info(SIGSEGV, &si, current);
+ return;
+ }
+
+ no_context:
+ fix = search_exception_table(regs->cr_iip);
+ if (fix) {
+ regs->r8 = -EFAULT;
+ if (fix->skip & 1) {
+ regs->r9 = 0;
+ }
+ regs->cr_iip += ((long) fix->skip) & ~15;
+ regs->cr_ipsr &= ~IA64_PSR_RI; /* clear exception slot number */
+ return;
+ }
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have
+ * to terminate things with extreme prejudice.
+ */
+ printk(KERN_ALERT "Unable to handle kernel paging request at "
+ "virtual address %016lx\n", address);
+ die_if_kernel("Oops", regs, isr);
+ do_exit(SIGKILL);
+ return;
+}
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
new file mode 100644
index 000000000..388f1fe0c
--- /dev/null
+++ b/arch/ia64/mm/init.c
@@ -0,0 +1,461 @@
+/*
+ * Initialize MMU support.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+
+#include <asm/dma.h>
+#include <asm/efi.h>
+#include <asm/ia32.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <asm/sal.h>
+#include <asm/system.h>
+
+/* References to section boundaries: */
+extern char _stext, _etext, _edata, __init_begin, __init_end;
+
+/*
+ * These are allocated in head.S so that we get proper page alignment.
+ * If you change the size of these then change head.S as well.
+ */
+extern char empty_bad_page[PAGE_SIZE];
+extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD];
+extern pte_t empty_bad_pte_table[PTRS_PER_PTE];
+
+extern void ia64_tlb_init (void);
+
+static unsigned long totalram_pages;
+
+/*
+ * Fill in empty_bad_pmd_table with entries pointing to
+ * empty_bad_pte_table and return the address of this PMD table.
+ */
+static pmd_t *
+get_bad_pmd_table (void)
+{
+ pmd_t v;
+ int i;
+
+ pmd_set(&v, empty_bad_pte_table);
+
+ for (i = 0; i < PTRS_PER_PMD; ++i)
+ empty_bad_pmd_table[i] = v;
+
+ return empty_bad_pmd_table;
+}
+
+/*
+ * Fill in empty_bad_pte_table with PTEs pointing to empty_bad_page
+ * and return the address of this PTE table.
+ */
+static pte_t *
+get_bad_pte_table (void)
+{
+ pte_t v;
+ int i;
+
+ set_pte(&v, pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED)));
+
+ for (i = 0; i < PTRS_PER_PTE; ++i)
+ empty_bad_pte_table[i] = v;
+
+ return empty_bad_pte_table;
+}
+
+void
+__handle_bad_pgd (pgd_t *pgd)
+{
+ pgd_ERROR(*pgd);
+ pgd_set(pgd, get_bad_pmd_table());
+}
+
+void
+__handle_bad_pmd (pmd_t *pmd)
+{
+ pmd_ERROR(*pmd);
+ pmd_set(pmd, get_bad_pte_table());
+}
+
+/*
+ * Allocate and initialize an L3 directory page and set
+ * the L2 directory entry PMD to the newly allocated page.
+ */
+pte_t*
+get_pte_slow (pmd_t *pmd, unsigned long offset)
+{
+ pte_t *pte;
+
+ pte = (pte_t *) __get_free_page(GFP_KERNEL);
+ if (pmd_none(*pmd)) {
+ if (pte) {
+ /* everything A-OK */
+ clear_page(pte);
+ pmd_set(pmd, pte);
+ return pte + offset;
+ }
+ pmd_set(pmd, get_bad_pte_table());
+ return NULL;
+ }
+ free_page((unsigned long) pte);
+ if (pmd_bad(*pmd)) {
+ __handle_bad_pmd(pmd);
+ return NULL;
+ }
+ return (pte_t *) pmd_page(*pmd) + offset;
+}
+
+int
+do_check_pgt_cache (int low, int high)
+{
+ int freed = 0;
+
+ if (pgtable_cache_size > high) {
+ do {
+ if (pgd_quicklist)
+ free_page((unsigned long)get_pgd_fast()), ++freed;
+ if (pmd_quicklist)
+ free_page((unsigned long)get_pmd_fast()), ++freed;
+ if (pte_quicklist)
+ free_page((unsigned long)get_pte_fast()), ++freed;
+ } while (pgtable_cache_size > low);
+ }
+ return freed;
+}
+
+/*
+ * This performs some platform-dependent address space initialization.
+ * On IA-64, we want to setup the VM area for the register backing
+ * store (which grows upwards) and install the gateway page which is
+ * used for signal trampolines, etc.
+ */
+void
+ia64_init_addr_space (void)
+{
+ struct vm_area_struct *vma;
+
+ /*
+ * If we're out of memory and kmem_cache_alloc() returns NULL,
+ * we simply ignore the problem. When the process attempts to
+ * write to the register backing store for the first time, it
+ * will get a SEGFAULT in this case.
+ */
+ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ if (vma) {
+ vma->vm_mm = current->mm;
+ vma->vm_start = IA64_RBS_BOT;
+ vma->vm_end = vma->vm_start + PAGE_SIZE;
+ vma->vm_page_prot = PAGE_COPY;
+ vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
+ vma->vm_ops = NULL;
+ vma->vm_pgoff = 0;
+ vma->vm_file = NULL;
+ vma->vm_private_data = NULL;
+ insert_vm_struct(current->mm, vma);
+ }
+}
+
+void
+free_initmem (void)
+{
+ unsigned long addr;
+
+ addr = (unsigned long) &__init_begin;
+ for (; addr < (unsigned long) &__init_end; addr += PAGE_SIZE) {
+ clear_bit(PG_reserved, &mem_map[MAP_NR(addr)].flags);
+ set_page_count(&mem_map[MAP_NR(addr)], 1);
+ free_page(addr);
+ ++totalram_pages;
+ }
+ printk ("Freeing unused kernel memory: %ldkB freed\n",
+ (&__init_end - &__init_begin) >> 10);
+}
+
+void
+si_meminfo (struct sysinfo *val)
+{
+ val->totalram = totalram_pages;
+ val->sharedram = 0;
+ val->freeram = nr_free_pages();
+ val->bufferram = atomic_read(&buffermem_pages);
+ val->totalhigh = 0;
+ val->freehigh = 0;
+ val->mem_unit = PAGE_SIZE;
+ return;
+}
+
+void
+show_mem (void)
+{
+ int i,free = 0,total = 0,reserved = 0;
+ int shared = 0, cached = 0;
+
+ printk("Mem-info:\n");
+ show_free_areas();
+ printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+ i = max_mapnr;
+ while (i-- > 0) {
+ total++;
+ if (PageReserved(mem_map+i))
+ reserved++;
+ else if (PageSwapCache(mem_map+i))
+ cached++;
+ else if (!page_count(mem_map + i))
+ free++;
+ else
+ shared += page_count(mem_map + i) - 1;
+ }
+ printk("%d pages of RAM\n", total);
+ printk("%d reserved pages\n", reserved);
+ printk("%d pages shared\n", shared);
+ printk("%d pages swap cached\n", cached);
+ printk("%ld pages in page table cache\n", pgtable_cache_size);
+ show_buffers();
+}
+
+/*
+ * This is like put_dirty_page() but installs a clean page with PAGE_GATE protection
+ * (execute-only, typically).
+ */
+struct page *
+put_gate_page (struct page *page, unsigned long address)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if (!PageReserved(page))
+ printk("put_gate_page: gate page at 0x%lx not in reserved memory\n",
+ page_address(page));
+ pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
+ pmd = pmd_alloc(pgd, address);
+ if (!pmd) {
+ __free_page(page);
+ oom(current);
+ return 0;
+ }
+ pte = pte_alloc(pmd, address);
+ if (!pte) {
+ __free_page(page);
+ oom(current);
+ return 0;
+ }
+ if (!pte_none(*pte)) {
+ pte_ERROR(*pte);
+ __free_page(page);
+ return 0;
+ }
+ flush_page_to_ram(page);
+ set_pte(pte, page_pte_prot(page, PAGE_GATE));
+ /* no need for flush_tlb */
+ return page;
+}
+
+void __init
+ia64_rid_init (void)
+{
+ unsigned long flags, rid, pta;
+
+ /* Set up the kernel identity mappings (regions 6 & 7) and the vmalloc area (region 5): */
+ ia64_clear_ic(flags);
+
+ rid = ia64_rid(IA64_REGION_ID_KERNEL, __IA64_UNCACHED_OFFSET);
+ ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (_PAGE_SIZE_256M << 2));
+
+ rid = ia64_rid(IA64_REGION_ID_KERNEL, PAGE_OFFSET);
+ ia64_set_rr(PAGE_OFFSET, (rid << 8) | (_PAGE_SIZE_256M << 2));
+
+ rid = ia64_rid(IA64_REGION_ID_KERNEL, VMALLOC_START);
+ ia64_set_rr(VMALLOC_START, (rid << 8) | (PAGE_SHIFT << 2) | 1);
+
+ __restore_flags(flags);
+
+ /*
+ * Check if the virtually mapped linear page table (VMLPT)
+ * overlaps with a mapped address space. The IA-64
+ * architecture guarantees that at least 50 bits of virtual
+ * address space are implemented but if we pick a large enough
+ * page size (e.g., 64KB), the VMLPT is big enough that it
+ * will overlap with the upper half of the kernel mapped
+ * region. I assume that once we run on machines big enough
+ * to warrant 64KB pages, IMPL_VA_MSB will be significantly
+ * bigger, so we can just adjust the number below to get
+ * things going. Alternatively, we could truncate the upper
+ * half of each regions address space to not permit mappings
+ * that would overlap with the VMLPT. --davidm 99/11/13
+ */
+# define ld_pte_size 3
+# define ld_max_addr_space_pages 3*(PAGE_SHIFT - ld_pte_size) /* max # of mappable pages */
+# define ld_max_addr_space_size (ld_max_addr_space_pages + PAGE_SHIFT)
+# define ld_max_vpt_size (ld_max_addr_space_pages + ld_pte_size)
+# define POW2(n) (1ULL << (n))
+# define IMPL_VA_MSB 50
+ if (POW2(ld_max_addr_space_size - 1) + POW2(ld_max_vpt_size) > POW2(IMPL_VA_MSB))
+ panic("mm/init: overlap between virtually mapped linear page table and "
+ "mapped kernel space!");
+ pta = POW2(61) - POW2(IMPL_VA_MSB);
+ /*
+ * Set the (virtually mapped linear) page table address. Bit
+ * 8 selects between the short and long format, bits 2-7 the
+ * size of the table, and bit 0 whether the VHPT walker is
+ * enabled.
+ */
+ ia64_set_pta(pta | (0<<8) | ((3*(PAGE_SHIFT-3)+3)<<2) | 1);
+}
+
+#ifdef CONFIG_IA64_VIRTUAL_MEM_MAP
+
+static int
+create_mem_map_page_table (u64 start, u64 end, void *arg)
+{
+ unsigned long address, start_page, end_page;
+ struct page *map_start, *map_end;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ void *page;
+
+ map_start = mem_map + MAP_NR(start);
+ map_end = mem_map + MAP_NR(end);
+
+ start_page = (unsigned long) map_start & PAGE_MASK;
+ end_page = PAGE_ALIGN((unsigned long) map_end);
+
+ printk("[%lx,%lx) -> %lx-%lx\n", start, end, start_page, end_page);
+
+ for (address = start_page; address < end_page; address += PAGE_SIZE) {
+ pgd = pgd_offset_k(address);
+ if (pgd_none(*pgd)) {
+ pmd = alloc_bootmem_pages(PAGE_SIZE);
+ clear_page(pmd);
+ pgd_set(pgd, pmd);
+ pmd += (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+ } else
+ pmd = pmd_offset(pgd, address);
+ if (pmd_none(*pmd)) {
+ pte = alloc_bootmem_pages(PAGE_SIZE);
+ clear_page(pte);
+ pmd_set(pmd, pte);
+ pte += (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+ } else
+ pte = pte_offset(pmd, address);
+
+ if (pte_none(*pte)) {
+ page = alloc_bootmem_pages(PAGE_SIZE);
+ clear_page(page);
+ set_pte(pte, mk_pte_phys(__pa(page), PAGE_KERNEL));
+ }
+ }
+ return 0;
+}
+
+#endif /* CONFIG_IA64_VIRTUAL_MEM_MAP */
+
+/*
+ * Set up the page tables.
+ */
+void
+paging_init (void)
+{
+ unsigned long max_dma, zones_size[MAX_NR_ZONES];
+
+ clear_page((void *) ZERO_PAGE_ADDR);
+
+ ia64_rid_init();
+ __flush_tlb_all();
+
+ /* initialize mem_map[] */
+
+ memset(zones_size, 0, sizeof(zones_size));
+
+ max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS);
+ if (max_low_pfn < max_dma)
+ zones_size[ZONE_DMA] = max_low_pfn;
+ else {
+ zones_size[ZONE_DMA] = max_dma;
+ zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
+ }
+ free_area_init(zones_size);
+}
+
+static int
+count_pages (u64 start, u64 end, void *arg)
+{
+ unsigned long *count = arg;
+
+ *count += (end - start) >> PAGE_SHIFT;
+ return 0;
+}
+
+static int
+count_reserved_pages (u64 start, u64 end, void *arg)
+{
+ unsigned long num_reserved = 0;
+ unsigned long *count = arg;
+ struct page *pg;
+
+ for (pg = mem_map + MAP_NR(start); pg < mem_map + MAP_NR(end); ++pg)
+ if (PageReserved(pg))
+ ++num_reserved;
+ *count += num_reserved;
+ return 0;
+}
+
+void
+mem_init (void)
+{
+ extern char __start_gate_section[];
+ long reserved_pages, codesize, datasize, initsize;
+
+ if (!mem_map)
+ BUG();
+
+ num_physpages = 0;
+ efi_memmap_walk(count_pages, &num_physpages);
+
+ max_mapnr = max_low_pfn;
+ high_memory = __va(max_low_pfn * PAGE_SIZE);
+
+ ia64_tlb_init();
+
+ totalram_pages += free_all_bootmem();
+
+ reserved_pages = 0;
+ efi_memmap_walk(count_reserved_pages, &reserved_pages);
+
+ codesize = (unsigned long) &_etext - (unsigned long) &_stext;
+ datasize = (unsigned long) &_edata - (unsigned long) &_etext;
+ initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+ printk("Memory: %luk/%luk available (%luk code, %luk reserved, %luk data, %luk init)\n",
+ (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
+ max_mapnr << (PAGE_SHIFT - 10), codesize >> 10, reserved_pages << (PAGE_SHIFT - 10),
+ datasize >> 10, initsize >> 10);
+
+ /* install the gate page in the global page table: */
+ put_gate_page(mem_map + MAP_NR(__start_gate_section), GATE_ADDR);
+
+#ifndef CONFIG_IA64_SOFTSDV_HACKS
+ /*
+ * (Some) SoftSDVs seem to have a problem with this call.
+ * Since it's mostly a performance optimization, just don't do
+ * it for now... --davidm 99/12/6
+ */
+ efi_enter_virtual_mode();
+#endif
+
+#ifdef CONFIG_IA32_SUPPORT
+ ia32_gdt_init();
+#endif
+ return;
+}
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
new file mode 100644
index 000000000..72ece4147
--- /dev/null
+++ b/arch/ia64/mm/tlb.c
@@ -0,0 +1,166 @@
+/*
+ * TLB support routines.
+ *
+ * Copyright (C) 1998, 1999 Hewlett-Packard Co
+ * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/pal.h>
+
+#define SUPPORTED_PGBITS ( \
+ 1 << _PAGE_SIZE_256M | \
+ 1 << _PAGE_SIZE_64M | \
+ 1 << _PAGE_SIZE_16M | \
+ 1 << _PAGE_SIZE_4M | \
+ 1 << _PAGE_SIZE_1M | \
+ 1 << _PAGE_SIZE_256K | \
+ 1 << _PAGE_SIZE_64K | \
+ 1 << _PAGE_SIZE_16K | \
+ 1 << _PAGE_SIZE_8K | \
+ 1 << _PAGE_SIZE_4K )
+
+static void wrap_context (struct mm_struct *mm);
+
+unsigned long ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1;
+
+ /*
+ * Put everything in a struct so we avoid the global offset table whenever
+ * possible.
+ */
+ia64_ptce_info_t ia64_ptce_info;
+
+/*
+ * Seralize usage of ptc.g
+ */
+spinlock_t ptcg_lock = SPIN_LOCK_UNLOCKED; /* see <asm/pgtable.h> */
+
+void
+get_new_mmu_context (struct mm_struct *mm)
+{
+ if ((ia64_next_context & IA64_HW_CONTEXT_MASK) == 0) {
+ wrap_context(mm);
+ }
+ mm->context = ia64_next_context++;
+}
+
+/*
+ * This is where we handle the case where (ia64_next_context &
+ * IA64_HW_CONTEXT_MASK) == 0. Whenever this happens, we need to
+ * flush the entire TLB and skip over region id number 0, which is
+ * used by the kernel.
+ */
+static void
+wrap_context (struct mm_struct *mm)
+{
+ struct task_struct *task;
+
+ /*
+ * We wrapped back to the first region id so we nuke the TLB
+ * so we can switch to the next generation of region ids.
+ */
+ __flush_tlb_all();
+ if (ia64_next_context++ == 0) {
+ /*
+ * Oops, we've used up all 64 bits of the context
+ * space---walk through task table to ensure we don't
+ * get tricked into using an old context. If this
+ * happens, the machine has been running for a long,
+ * long time!
+ */
+ ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1;
+
+ read_lock(&tasklist_lock);
+ for_each_task (task) {
+ if (task->mm == mm)
+ continue;
+ flush_tlb_mm(mm);
+ }
+ read_unlock(&tasklist_lock);
+ }
+}
+
+void
+__flush_tlb_all (void)
+{
+ unsigned long i, j, flags, count0, count1, stride0, stride1, addr = ia64_ptce_info.base;
+
+ count0 = ia64_ptce_info.count[0];
+ count1 = ia64_ptce_info.count[1];
+ stride0 = ia64_ptce_info.stride[0];
+ stride1 = ia64_ptce_info.stride[1];
+
+ save_and_cli(flags);
+ for (i = 0; i < count0; ++i) {
+ for (j = 0; j < count1; ++j) {
+ asm volatile ("ptc.e %0" :: "r"(addr));
+ addr += stride1;
+ }
+ addr += stride0;
+ }
+ restore_flags(flags);
+ ia64_insn_group_barrier();
+ ia64_srlz_i(); /* srlz.i implies srlz.d */
+ ia64_insn_group_barrier();
+}
+
+void
+flush_tlb_range (struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ unsigned long size = end - start;
+ unsigned long nbits;
+
+ if (mm != current->active_mm) {
+ /* this doesn't happen often, if at all, so it's not worth optimizing for... */
+ mm->context = 0;
+ return;
+ }
+
+ nbits = ia64_fls(size + 0xfff);
+ if (((1UL << nbits) & SUPPORTED_PGBITS) == 0) {
+ if (nbits > _PAGE_SIZE_256M)
+ nbits = _PAGE_SIZE_256M;
+ else
+ /*
+ * Some page sizes are not implemented in the
+ * IA-64 arch, so if we get asked to clear an
+ * unsupported page size, round up to the
+ * nearest page size. Note that we depend on
+ * the fact that if page size N is not
+ * implemented, 2*N _is_ implemented.
+ */
+ ++nbits;
+ if (((1UL << nbits) & SUPPORTED_PGBITS) == 0)
+ panic("flush_tlb_range: BUG: nbits=%lu\n", nbits);
+ }
+ start &= ~((1UL << nbits) - 1);
+
+ spin_lock(&ptcg_lock);
+ do {
+#ifdef CONFIG_SMP
+ __asm__ __volatile__ ("ptc.g %0,%1;;srlz.i;;"
+ :: "r"(start), "r"(nbits<<2) : "memory");
+#else
+ __asm__ __volatile__ ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory");
+#endif
+ start += (1UL << nbits);
+ } while (start < end);
+ spin_unlock(&ptcg_lock);
+ ia64_insn_group_barrier();
+ ia64_srlz_i(); /* srlz.i implies srlz.d */
+ ia64_insn_group_barrier();
+}
+
+void
+ia64_tlb_init (void)
+{
+ ia64_get_ptce(&ia64_ptce_info);
+ __flush_tlb_all(); /* nuke left overs from bootstrapping... */
+}
diff --git a/arch/ia64/sn/Makefile b/arch/ia64/sn/Makefile
new file mode 100644
index 000000000..3c8810967
--- /dev/null
+++ b/arch/ia64/sn/Makefile
@@ -0,0 +1,25 @@
+#
+# ia64/sn/Makefile
+#
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+CFLAGS := $(CFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSN -DSOFTSDV \
+ -DLANGUAGE_C=1 -D_LANGUAGE_C=1
+AFLAGS := $(AFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSOFTSDV
+
+.S.s:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $<
+.S.o:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $<
+
+all: sn.a
+
+O_TARGET = sn.a
+O_HEADERS =
+O_OBJS = sn1/sn1.a
+
+clean::
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/sn/sn1/Makefile b/arch/ia64/sn/sn1/Makefile
new file mode 100644
index 000000000..23758c473
--- /dev/null
+++ b/arch/ia64/sn/sn1/Makefile
@@ -0,0 +1,29 @@
+#
+# ia64/platform/sn/sn1/Makefile
+#
+# Copyright (C) 1999 Silicon Graphics, Inc.
+# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
+#
+
+CFLAGS := $(CFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSN -DSOFTSDV \
+ -DLANGUAGE_C=1 -D_LANGUAGE_C=1
+AFLAGS := $(AFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSOFTSDV
+
+.S.s:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $<
+.S.o:
+ $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $<
+
+all: sn1.a
+
+O_TARGET = sn1.a
+O_HEADERS =
+O_OBJS = irq.o setup.o
+
+ifeq ($(CONFIG_IA64_GENERIC),y)
+O_OBJS += machvec.o
+endif
+
+clean::
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/ia64/sn/sn1/irq.c b/arch/ia64/sn/sn1/irq.c
new file mode 100644
index 000000000..df8e56943
--- /dev/null
+++ b/arch/ia64/sn/sn1/irq.c
@@ -0,0 +1,50 @@
+#include <linux/kernel.h>
+
+#include <asm/irq.h>
+#include <asm/ptrace.h>
+
+static int
+sn1_startup_irq(unsigned int irq)
+{
+ return(0);
+}
+
+static void
+sn1_shutdown_irq(unsigned int irq)
+{
+}
+
+static void
+sn1_disable_irq(unsigned int irq)
+{
+}
+
+static void
+sn1_enable_irq(unsigned int irq)
+{
+}
+
+static int
+sn1_handle_irq(unsigned int irq, struct pt_regs *regs)
+{
+ return(0);
+}
+
+struct hw_interrupt_type irq_type_sn1 = {
+ "sn1_irq",
+ sn1_startup_irq,
+ sn1_shutdown_irq,
+ sn1_handle_irq,
+ sn1_enable_irq,
+ sn1_disable_irq
+};
+
+void
+sn1_irq_init (struct irq_desc desc[NR_IRQS])
+{
+ int i;
+
+ for (i = IA64_MIN_VECTORED_IRQ; i <= IA64_MAX_VECTORED_IRQ; ++i) {
+ irq_desc[i].handler = &irq_type_sn1;
+ }
+}
diff --git a/arch/ia64/sn/sn1/machvec.c b/arch/ia64/sn/sn1/machvec.c
new file mode 100644
index 000000000..2e36b2e08
--- /dev/null
+++ b/arch/ia64/sn/sn1/machvec.c
@@ -0,0 +1,4 @@
+#include <asm/machvec_init.h>
+#include <asm/machvec_sn1.h>
+
+MACHVEC_DEFINE(sn1)
diff --git a/arch/ia64/sn/sn1/setup.c b/arch/ia64/sn/sn1/setup.c
new file mode 100644
index 000000000..1e3a39ae3
--- /dev/null
+++ b/arch/ia64/sn/sn1/setup.c
@@ -0,0 +1,77 @@
+/*
+ *
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander(vijay@engr.sgi.com)
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/kdev_t.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+#include <linux/console.h>
+#include <linux/timex.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/machvec.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+
+
+/*
+ * The format of "screen_info" is strange, and due to early i386-setup
+ * code. This is just enough to make the console code think we're on a
+ * VGA color display.
+ */
+struct screen_info sn1_screen_info = {
+ orig_x: 0,
+ orig_y: 0,
+ orig_video_mode: 3,
+ orig_video_cols: 80,
+ orig_video_ega_bx: 3,
+ orig_video_lines: 25,
+ orig_video_isVGA: 1,
+ orig_video_points: 16
+};
+
+/*
+ * This is here so we can use the CMOS detection in ide-probe.c to
+ * determine what drives are present. In theory, we don't need this
+ * as the auto-detection could be done via ide-probe.c:do_probe() but
+ * in practice that would be much slower, which is painful when
+ * running in the simulator. Note that passing zeroes in DRIVE_INFO
+ * is sufficient (the IDE driver will autodetect the drive geometry).
+ */
+char drive_info[4*16];
+
+unsigned long
+sn1_map_nr (unsigned long addr)
+{
+ return MAP_NR_SN1(addr);
+}
+
+void
+sn1_setup(char **cmdline_p)
+{
+
+ ROOT_DEV = to_kdev_t(0x0301); /* default to first IDE drive */
+
+#if !defined (CONFIG_IA64_SOFTSDV_HACKS)
+ /*
+ * Program the timer to deliver timer ticks. 0x40 is the I/O port
+ * address of PIT counter 0, 0x43 is the I/O port address of the
+ * PIT control word.
+ */
+ request_region(0x40,0x20,"timer");
+ outb(0x34, 0x43); /* Control word */
+ outb(LATCH & 0xff , 0x40); /* LSB */
+ outb(LATCH >> 8, 0x40); /* MSB */
+ printk("PIT: LATCH at 0x%x%x for %d HZ\n", LATCH >> 8, LATCH & 0xff, HZ);
+#endif
+#ifdef __SMP__
+ init_smp_config();
+#endif
+ screen_info = sn1_screen_info;
+}
diff --git a/arch/ia64/tools/Makefile b/arch/ia64/tools/Makefile
new file mode 100644
index 000000000..0491ca943
--- /dev/null
+++ b/arch/ia64/tools/Makefile
@@ -0,0 +1,49 @@
+CFLAGS = -D__KERNEL__ -g -O2 -Wall -I$(TOPDIR)/include
+
+ifdef CONFIG_SMP
+ CFLAGS += -D__SMP__
+endif
+
+TARGET = $(TOPDIR)/include/asm-ia64/offsets.h
+
+all:
+
+clean:
+ rm -f print_offsets.s print_offsets offsets.h
+
+fastdep: offsets.h
+ @if ! cmp -s offsets.h ${TARGET}; then \
+ echo "Updating ${TARGET}..."; \
+ cp offsets.h ${TARGET}; \
+ else \
+ echo "${TARGET} is up to date"; \
+ fi
+
+#
+# If we're cross-compiling, we use the cross-compiler to translate
+# print_offsets.c into an assembly file and then awk to translate this
+# file into offsets.h. This avoids having to use a simulator to
+# generate this file. This is based on an idea suggested by Asit
+# Mallick. If we're running natively, we can of course just build
+# print_offsets and run it. --davidm
+#
+
+ifeq ($(CROSS_COMPILE),)
+
+offsets.h: print_offsets
+ ./print_offsets > offsets.h
+
+print_offsets: print_offsets.c
+ $(CC) $(CFLAGS) print_offsets.c -o $@
+
+else
+
+offsets.h: print_offsets.s
+ $(AWK) -f print_offsets.awk $^ > $@
+
+print_offsets.s: print_offsets.c
+ $(CC) $(CFLAGS) -S print_offsets.c -o $@
+
+endif
+
+.PHONY: all
diff --git a/arch/ia64/tools/print_offsets.awk b/arch/ia64/tools/print_offsets.awk
new file mode 100644
index 000000000..5eb8bcb63
--- /dev/null
+++ b/arch/ia64/tools/print_offsets.awk
@@ -0,0 +1,70 @@
+BEGIN {
+ print "#ifndef _ASM_IA64_OFFSETS_H"
+ print "#define _ASM_IA64_OFFSETS_H"
+ print "/*"
+ print " * DO NOT MODIFY"
+ print " *"
+ print " * This file was generated by arch/ia64/tools/print_offsets.awk."
+ print " *"
+ print " */"
+ #
+ # This is a cheesy hack. Make sure that
+ # PF_PTRACED == 1<<PF_PTRACED_BIT.
+ #
+ print "#define PF_PTRACED_BIT 4"
+}
+
+# look for .tab:
+# stringz "name"
+# data value
+# sequence
+
+/.*[.]size/ {
+ inside_table = 0
+}
+
+/\/\/ end/ {
+ inside_table = 0
+}
+
+{
+ if (inside_table) {
+ if ($1 == "//") getline;
+ name=$2
+ getline
+ getline
+ if ($1 == "//") getline;
+ value=$2
+ len = length(name)
+ name = substr(name, 2, len - 2)
+ len -= 2
+ if (len == 0)
+ print ""
+ else {
+ len += 8
+ if (len >= 40) {
+ space=" "
+ } else {
+ space=""
+ while (len < 40) {
+ len += 8
+ space = space"\t"
+ }
+ }
+ printf("#define %s%s%lu\t/* 0x%lx */\n", name, space, value, value)
+ }
+ }
+}
+
+/tab:/ {
+ inside_table = 1
+}
+
+/tab#:/ {
+ inside_table = 1
+}
+
+END {
+ print ""
+ print "#endif /* _ASM_IA64_OFFSETS_H */"
+}
diff --git a/arch/ia64/tools/print_offsets.c b/arch/ia64/tools/print_offsets.c
new file mode 100644
index 000000000..85b15aae1
--- /dev/null
+++ b/arch/ia64/tools/print_offsets.c
@@ -0,0 +1,109 @@
+/*
+ * Utility to generate asm-ia64/offsets.h.
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * Note that this file has dual use: when building the kernel
+ * natively, the file is translated into a binary and executed. When
+ * building the kernel in a cross-development environment, this file
+ * gets translated into an assembly file which, in turn, is processed
+ * by awk to generate offsets.h. So if you make any changes to this
+ * file, be sure to verify that the awk procedure still works (see
+ * prin_offsets.awk).
+ */
+#include <linux/sched.h>
+
+#include <asm-ia64/processor.h>
+#include <asm-ia64/ptrace.h>
+#include <asm-ia64/siginfo.h>
+#include <asm-ia64/sigcontext.h>
+
+#ifdef offsetof
+# undef offsetof
+#endif
+
+/*
+ * We _can't_ include the host's standard header file, as those are in
+ * potential conflict with the what the Linux kernel declares for the
+ * target system.
+ */
+extern int printf (const char *, ...);
+
+#define offsetof(type,field) ((char *) &((type *) 0)->field - (char *) 0)
+
+struct
+ {
+ const char name[256];
+ unsigned long value;
+ }
+tab[] =
+ {
+ { "IA64_TASK_SIZE", sizeof (struct task_struct) },
+ { "IA64_PT_REGS_SIZE", sizeof (struct pt_regs) },
+ { "IA64_SWITCH_STACK_SIZE", sizeof (struct switch_stack) },
+ { "IA64_SIGINFO_SIZE", sizeof (struct siginfo) },
+ { "", 0 }, /* spacer */
+ { "IA64_TASK_FLAGS_OFFSET", offsetof (struct task_struct, flags) },
+ { "IA64_TASK_SIGPENDING_OFFSET", offsetof (struct task_struct, sigpending) },
+ { "IA64_TASK_NEED_RESCHED_OFFSET", offsetof (struct task_struct, need_resched) },
+ { "IA64_TASK_THREAD_OFFSET", offsetof (struct task_struct, thread) },
+ { "IA64_TASK_THREAD_KSP_OFFSET", offsetof (struct task_struct, thread.ksp) },
+ { "IA64_TASK_PID_OFFSET", offsetof (struct task_struct, pid) },
+ { "IA64_TASK_MM_OFFSET", offsetof (struct task_struct, mm) },
+ { "IA64_PT_REGS_CR_IPSR_OFFSET", offsetof (struct pt_regs, cr_ipsr) },
+ { "IA64_PT_REGS_R12_OFFSET", offsetof (struct pt_regs, r12) },
+ { "IA64_PT_REGS_R8_OFFSET", offsetof (struct pt_regs, r8) },
+ { "IA64_PT_REGS_R16_OFFSET", offsetof (struct pt_regs, r16) },
+ { "IA64_SWITCH_STACK_B0_OFFSET", offsetof (struct switch_stack, b0) },
+ { "IA64_SWITCH_STACK_CALLER_UNAT_OFFSET", offsetof (struct switch_stack, caller_unat) },
+ { "IA64_SIGCONTEXT_AR_BSP_OFFSET", offsetof (struct sigcontext, sc_ar_bsp) },
+ { "IA64_SIGCONTEXT_AR_RNAT_OFFSET", offsetof (struct sigcontext, sc_ar_rnat) },
+ { "IA64_SIGCONTEXT_FLAGS_OFFSET", offsetof (struct sigcontext, sc_flags) },
+ { "IA64_SIGCONTEXT_CFM_OFFSET", offsetof (struct sigcontext, sc_cfm) },
+ { "IA64_SIGCONTEXT_FR6_OFFSET", offsetof (struct sigcontext, sc_fr[6]) },
+};
+
+static const char *tabs = "\t\t\t\t\t\t\t\t\t\t";
+
+int
+main (int argc, char **argv)
+{
+ const char *space;
+ int i, num_tabs;
+ size_t len;
+
+ printf ("#ifndef _ASM_IA64_OFFSETS_H\n");
+ printf ("#define _ASM_IA64_OFFSETS_H\n\n");
+
+ printf ("/*\n * DO NOT MODIFY\n *\n * This file was generated by "
+ "arch/ia64/tools/print_offsets.\n *\n */\n\n");
+
+ /* This is stretching things a bit, but entry.S needs the bit number
+ for PF_PTRACED and it can't include <linux/sched.h> so this seems
+ like a reasonably solution. At least the code won't break shoudl
+ PF_PTRACED ever change. */
+ printf ("#define PF_PTRACED_BIT\t\t\t%u\n\n", ffs (PF_PTRACED) - 1);
+
+ for (i = 0; i < sizeof (tab) / sizeof (tab[0]); ++i)
+ {
+ if (tab[i].name[0] == '\0')
+ printf ("\n");
+ else
+ {
+ len = strlen (tab[i].name);
+
+ num_tabs = (40 - len) / 8;
+ if (num_tabs <= 0)
+ space = " ";
+ else
+ space = strchr(tabs, '\0') - (40 - len) / 8;
+
+ printf ("#define %s%s%lu\t/* 0x%lx */\n",
+ tab[i].name, space, tab[i].value, tab[i].value);
+ }
+ }
+
+ printf ("\n#endif /* _ASM_IA64_OFFSETS_H */\n");
+ return 0;
+}
diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S
new file mode 100644
index 000000000..b095baeb9
--- /dev/null
+++ b/arch/ia64/vmlinux.lds.S
@@ -0,0 +1,164 @@
+#include <linux/config.h>
+
+#include <asm/page.h>
+#include <asm/system.h>
+
+OUTPUT_FORMAT("elf64-ia64-little")
+OUTPUT_ARCH(ia64)
+ENTRY(_start)
+SECTIONS
+{
+ v = PAGE_OFFSET; /* this symbol is here to make debugging with kdb easier... */
+
+ . = KERNEL_START;
+
+ _text = .;
+ _stext = .;
+ .text : AT(ADDR(.text) - PAGE_OFFSET)
+ {
+ *(__ivt_section)
+ /* these are not really text pages, but the zero page needs to be in a fixed location: */
+ *(__special_page_section)
+ __start_gate_section = .;
+ *(__gate_section)
+ __stop_gate_section = .;
+ *(.text)
+ }
+ .text2 : AT(ADDR(.text2) - PAGE_OFFSET)
+ { *(.text2) }
+#ifdef CONFIG_SMP
+ .text.lock : AT(ADDR(.text.lock) - PAGE_OFFSET)
+ { *(.text.lock) }
+#endif
+ _etext = .;
+
+ /* Exception table */
+ . = ALIGN(16);
+ __start___ex_table = .;
+ __ex_table : AT(ADDR(__ex_table) - PAGE_OFFSET)
+ { *(__ex_table) }
+ __stop___ex_table = .;
+
+#if defined(CONFIG_KDB)
+ /* Kernel symbols and strings for kdb */
+# define KDB_MEAN_SYMBOL_SIZE 48
+# define KDB_SPACE (CONFIG_KDB_STBSIZE * KDB_MEAN_SYMBOL_SIZE)
+ . = ALIGN(8);
+ _skdb = .;
+ .kdb : AT(ADDR(.kdb) - PAGE_OFFSET)
+ {
+ *(kdbsymtab)
+ *(kdbstrings)
+ }
+ _ekdb = .;
+ . = _skdb + KDB_SPACE;
+#endif
+
+ /* Kernel symbol names for modules: */
+ .kstrtab : AT(ADDR(.kstrtab) - PAGE_OFFSET)
+ { *(.kstrtab) }
+
+ /* The initial task and kernel stack */
+ . = ALIGN(PAGE_SIZE);
+ init_task : AT(ADDR(init_task) - PAGE_OFFSET)
+ { *(init_task) }
+
+ /* Startup code */
+ __init_begin = .;
+ .text.init : AT(ADDR(.text.init) - PAGE_OFFSET)
+ { *(.text.init) }
+ .data.init : AT(ADDR(.data.init) - PAGE_OFFSET)
+ { *(.data.init) }
+ . = ALIGN(16);
+ __setup_start = .;
+ .setup.init : AT(ADDR(.setup.init) - PAGE_OFFSET)
+ { *(.setup.init) }
+ __setup_end = .;
+ __initcall_start = .;
+ .initcall.init : AT(ADDR(.initcall.init) - PAGE_OFFSET)
+ { *(.initcall.init) }
+ __initcall_end = .;
+ . = ALIGN(PAGE_SIZE);
+ __init_end = .;
+
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - PAGE_OFFSET)
+ { *(.data.idt) }
+
+ . = ALIGN(64);
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - PAGE_OFFSET)
+ { *(.data.cacheline_aligned) }
+
+ /* Global data */
+ _data = .;
+
+ .rodata : AT(ADDR(.rodata) - PAGE_OFFSET)
+ { *(.rodata) }
+ .opd : AT(ADDR(.opd) - PAGE_OFFSET)
+ { *(.opd) }
+ .data : AT(ADDR(.data) - PAGE_OFFSET)
+ { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
+
+ __gp = ALIGN (8) + 0x200000;
+
+ .got : AT(ADDR(.got) - PAGE_OFFSET)
+ { *(.got.plt) *(.got) }
+ /* We want the small data sections together, so single-instruction offsets
+ can access them all, and initialized data all before uninitialized, so
+ we can shorten the on-disk segment size. */
+ .sdata : AT(ADDR(.sdata) - PAGE_OFFSET)
+ { *(.sdata) }
+ _edata = .;
+ _bss = .;
+ .sbss : AT(ADDR(.sbss) - PAGE_OFFSET)
+ { *(.sbss) *(.scommon) }
+ .bss : AT(ADDR(.bss) - PAGE_OFFSET)
+ { *(.bss) *(COMMON) }
+ . = ALIGN(64 / 8);
+ _end = .;
+
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.text.exit)
+ *(.data.exit)
+ }
+
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ /* DWARF debug sections.
+ Symbols in the DWARF debugging sections are relative to the beginning
+ of the section so we begin them at 0. */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* These must appear regardless of . */
+ /* Discard them for now since Intel SoftSDV cannot handle them.
+ .comment 0 : { *(.comment) }
+ .note 0 : { *(.note) }
+ */
+ /DISCARD/ : { *(.comment) }
+ /DISCARD/ : { *(.note) }
+}