diff options
Diffstat (limited to 'arch/ia64')
92 files changed, 26095 insertions, 0 deletions
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile new file mode 100644 index 000000000..7dd3caabc --- /dev/null +++ b/arch/ia64/Makefile @@ -0,0 +1,125 @@ +# +# ia64/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1998, 1999 by David Mosberger-Tang <davidm@hpl.hp.com> +# + +NM := $(CROSS_COMPILE)nm -B + +LINKFLAGS = -static -T arch/$(ARCH)/vmlinux.lds +# next line is for HP compiler backend: +#AFLAGS += -DGCC_RETVAL_POINTER_IN_R8 +# The next line is needed when compiling with the July snapshot of the Cygnus compiler: +#EXTRA = -ma0-bugs -D__GCC_DOESNT_KNOW_IN_REGS__ +# next two lines are for the September snapshot of the Cygnus compiler: +AFLAGS += -D__GCC_MULTIREG_RETVALS__ +EXTRA = -ma0-bugs -D__GCC_MULTIREG_RETVALS__ + +CFLAGS := -g $(CFLAGS) -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f10-f15,f32-f127 + +ifdef CONFIG_IA64_GENERIC + CORE_FILES := arch/$(ARCH)/hp/hp.a \ + arch/$(ARCH)/sn/sn.a \ + arch/$(ARCH)/dig/dig.a \ + $(CORE_FILES) + SUBDIRS := arch/$(ARCH)/hp \ + arch/$(ARCH)/sn/sn1 \ + arch/$(ARCH)/sn \ + arch/$(ARCH)/dig \ + $(SUBDIRS) + +else # !GENERIC + +ifeq ($(CONFIG_IA64_HP_SIM),y) + SUBDIRS := arch/$(ARCH)/hp \ + $(SUBDIRS) + CORE_FILES := arch/$(ARCH)/hp/hp.a \ + $(CORE_FILES) +endif + +ifeq ($(CONFIG_IA64_SGI_SN1_SIM),y) + SUBDIRS := arch/$(ARCH)/sn/sn1 \ + arch/$(ARCH)/sn \ + $(SUBDIRS) + CORE_FILES := arch/$(ARCH)/sn/sn.a \ + $(CORE_FILES) +endif + +ifeq ($(CONFIG_IA64_SOFTSDV),y) + SUBDIRS := arch/$(ARCH)/dig \ + $(SUBDIRS) + CORE_FILES := arch/$(ARCH)/dig/dig.a \ + $(CORE_FILES) +endif + +ifeq ($(CONFIG_IA64_DIG),y) + SUBDIRS := arch/$(ARCH)/dig \ + $(SUBDIRS) + CORE_FILES := arch/$(ARCH)/dig/dig.a \ + $(CORE_FILES) +endif + +endif # !GENERIC + +ifeq ($(CONFIG_IA32_SUPPORT),y) + SUBDIRS := arch/$(ARCH)/ia32 $(SUBDIRS) + CORE_FILES := arch/$(ARCH)/ia32/ia32.o $(CORE_FILES) +endif + +ifdef CONFIG_KDB + LIBS := $(LIBS) $(TOPDIR)/arch/$(ARCH)/kdb/kdb.a + SUBDIRS := $(SUBDIRS) arch/$(ARCH)/kdb +endif + +HEAD := arch/$(ARCH)/kernel/head.o arch/ia64/kernel/init_task.o + +SUBDIRS := arch/$(ARCH)/tools arch/$(ARCH)/kernel arch/$(ARCH)/mm arch/$(ARCH)/lib $(SUBDIRS) +CORE_FILES := arch/$(ARCH)/kernel/kernel.o arch/$(ARCH)/mm/mm.o $(CORE_FILES) + +LIBS := $(TOPDIR)/arch/$(ARCH)/lib/lib.a $(LIBS) \ + $(TOPDIR)/arch/$(ARCH)/lib/lib.a + +MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot + +vmlinux: arch/$(ARCH)/vmlinux.lds + +arch/$(ARCH)/vmlinux.lds: arch/$(ARCH)/vmlinux.lds.S FORCE + gcc -D__ASSEMBLY__ -E -C -P -I$(HPATH) -I$(HPATH)/asm-$(ARCH) \ + arch/$(ARCH)/vmlinux.lds.S > $@ + +FORCE: ; + +rawboot: + @$(MAKEBOOT) rawboot + +# +# My boot writes directly to a specific disk partition, I doubt most +# people will want to do that without changes.. +# +msb my-special-boot: + @$(MAKEBOOT) msb + +bootimage: + @$(MAKEBOOT) bootimage + +srmboot: + @$(MAKEBOOT) srmboot + +archclean: + @$(MAKE) -C arch/$(ARCH)/kernel clean + @$(MAKE) -C arch/$(ARCH)/tools clean + @$(MAKEBOOT) clean + +archmrproper: + rm -f arch/$(ARCH)/vmlinux.lds + @$(MAKE) -C arch/$(ARCH)/tools mrproper + +archdep: + @$(MAKEBOOT) dep + +bootpfile: + @$(MAKEBOOT) bootpfile diff --git a/arch/ia64/boot/Makefile b/arch/ia64/boot/Makefile new file mode 100644 index 000000000..cba4fad66 --- /dev/null +++ b/arch/ia64/boot/Makefile @@ -0,0 +1,33 @@ +# +# ia64/boot/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1998 by David Mosberger-Tang <davidm@hpl.hp.com> +# + +LINKFLAGS = -static -T bootloader.lds + +.S.s: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -E -o $*.o $< +.S.o: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c -o $*.o $< + +OBJECTS = bootloader.o +TARGETS = + +ifdef CONFIG_IA64_HP_SIM + TARGETS += bootloader +endif + +all: $(TARGETS) + +bootloader: $(OBJECTS) + $(LD) $(LINKFLAGS) $(OBJECTS) $(LIBS) -o bootloader + +clean: + rm -f $(TARGETS) + +dep: diff --git a/arch/ia64/boot/bootloader.c b/arch/ia64/boot/bootloader.c new file mode 100644 index 000000000..cb6fc1f96 --- /dev/null +++ b/arch/ia64/boot/bootloader.c @@ -0,0 +1,234 @@ +/* + * arch/ia64/boot/bootloader.c + * + * Loads an ELF kernel. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com> + * + * 01/07/99 S.Eranian modified to pass command line arguments to kernel + */ +#include <linux/config.h> +#include <linux/elf.h> +#include <linux/init.h> +#include <linux/kernel.h> + +#include <asm/elf.h> +#include <asm/pal.h> +#include <asm/pgtable.h> +#include <asm/sal.h> +#include <asm/system.h> + +/* Simulator system calls: */ + +#define SSC_CONSOLE_INIT 20 +#define SSC_GETCHAR 21 +#define SSC_PUTCHAR 31 +#define SSC_OPEN 50 +#define SSC_CLOSE 51 +#define SSC_READ 52 +#define SSC_WRITE 53 +#define SSC_GET_COMPLETION 54 +#define SSC_WAIT_COMPLETION 55 +#define SSC_CONNECT_INTERRUPT 58 +#define SSC_GENERATE_INTERRUPT 59 +#define SSC_SET_PERIODIC_INTERRUPT 60 +#define SSC_GET_RTC 65 +#define SSC_EXIT 66 +#define SSC_LOAD_SYMBOLS 69 +#define SSC_GET_TOD 74 + +#define SSC_GET_ARGS 75 + +struct disk_req { + unsigned long addr; + unsigned len; +}; + +struct disk_stat { + int fd; + unsigned count; +}; + +#include "../kernel/fw-emu.c" + +static void +cons_write (const char *buf) +{ + unsigned long ch; + + while ((ch = *buf++) != '\0') { + ssc(ch, 0, 0, 0, SSC_PUTCHAR); + if (ch == '\n') + ssc('\r', 0, 0, 0, SSC_PUTCHAR); + } +} + +void +enter_virtual_mode (unsigned long new_psr) +{ + asm volatile ("mov cr.ipsr=%0" :: "r"(new_psr)); + asm volatile ("mov cr.iip=%0" :: "r"(&&target)); + asm volatile ("mov cr.ifs=r0"); + asm volatile ("rfi;;"); /* must be last insn in an insn group */ + + target: +} + + +#define MAX_ARGS 32 + +void +_start (void) +{ + register long sp asm ("sp"); + static char stack[16384] __attribute__ ((aligned (16))); + static char mem[4096]; + static char buffer[1024]; + unsigned long flags, off; + int fd, i; + struct disk_req req; + struct disk_stat stat; + struct elfhdr *elf; + struct elf_phdr *elf_phdr; /* program header */ + unsigned long e_entry, e_phoff, e_phnum; + char *kpath, *args; + long arglen = 0; + + asm volatile ("movl gp=__gp" ::: "memory"); + asm volatile ("mov sp=%0" :: "r"(stack) : "memory"); + asm volatile ("bsw.1;;"); +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + asm volative ("nop 0;; nop 0;; nop 0;;"); +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + + ssc(0, 0, 0, 0, SSC_CONSOLE_INIT); + + /* + * S.Eranian: extract the commandline argument from the + * simulator + * + * The expected format is as follows: + * + * kernelname args... + * + * Both are optional but you can't have the second one without the + * first. + */ + arglen = ssc((long) buffer, 0, 0, 0, SSC_GET_ARGS); + + kpath = "vmlinux"; + args = buffer; + if (arglen > 0) { + kpath = buffer; + while (*args != ' ' && *args != '\0') + ++args, --arglen; + if (*args == ' ') + *args++ = '\0', --arglen; + } + + if (arglen <= 0) { + args = ""; + arglen = 1; + } + + fd = ssc((long) kpath, 1, 0, 0, SSC_OPEN); + + if (fd < 0) { + cons_write(kpath); + cons_write(": file not found, reboot now\n"); + for(;;); + } + stat.fd = fd; + off = 0; + + req.len = sizeof(mem); + req.addr = (long) mem; + ssc(fd, 1, (long) &req, off, SSC_READ); + ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION); + + elf = (struct elfhdr *) mem; + if (elf->e_ident[0] == 0x7f && strncmp(elf->e_ident + 1, "ELF", 3) != 0) { + cons_write("not an ELF file\n"); + return; + } + if (elf->e_type != ET_EXEC) { + cons_write("not an ELF executable\n"); + return; + } + if (!elf_check_arch(elf->e_machine)) { + cons_write("kernel not for this processor\n"); + return; + } + + e_entry = elf->e_entry; + e_phnum = elf->e_phnum; + e_phoff = elf->e_phoff; + + cons_write("loading "); + cons_write(kpath); + cons_write("...\n"); + + for (i = 0; i < e_phnum; ++i) { + req.len = sizeof(*elf_phdr); + req.addr = (long) mem; + ssc(fd, 1, (long) &req, e_phoff, SSC_READ); + ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION); + if (stat.count != sizeof(*elf_phdr)) { + cons_write("failed to read phdr\n"); + return; + } + e_phoff += sizeof(*elf_phdr); + + elf_phdr = (struct elf_phdr *) mem; + req.len = elf_phdr->p_filesz; + req.addr = __pa(elf_phdr->p_vaddr); + ssc(fd, 1, (long) &req, elf_phdr->p_offset, SSC_READ); + ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION); + memset((char *)__pa(elf_phdr->p_vaddr) + elf_phdr->p_filesz, 0, + elf_phdr->p_memsz - elf_phdr->p_filesz); + } + ssc(fd, 0, 0, 0, SSC_CLOSE); + + cons_write("starting kernel...\n"); + + /* fake an I/O base address: */ + asm volatile ("mov ar.k0=%0" :: "r"(0xffffc000000UL)); + + /* + * Install a translation register that identity maps the + * kernel's 256MB page. + */ + ia64_clear_ic(flags); + ia64_set_rr( 0, (0x1000 << 8) | (_PAGE_SIZE_1M << 2)); + ia64_set_rr(PAGE_OFFSET, (ia64_rid(0, PAGE_OFFSET) << 8) | (_PAGE_SIZE_256M << 2)); + ia64_srlz_d(); + ia64_itr(0x3, 0, 1024*1024, + pte_val(mk_pte_phys(1024*1024, __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX))), + _PAGE_SIZE_1M); + ia64_itr(0x3, 1, PAGE_OFFSET, + pte_val(mk_pte_phys(0, __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX))), + _PAGE_SIZE_256M); + ia64_srlz_i(); + + enter_virtual_mode(flags | IA64_PSR_IT | IA64_PSR_IC | IA64_PSR_DT | IA64_PSR_RT + | IA64_PSR_DFH | IA64_PSR_BN); + + sys_fw_init(args, arglen); + + ssc(0, (long) kpath, 0, 0, SSC_LOAD_SYMBOLS); + + /* + * Install the kernel's command line argument on ZERO_PAGE + * just after the botoparam structure. + * In case we don't have any argument just put \0 + */ + memcpy(((struct ia64_boot_param *)ZERO_PAGE_ADDR) + 1, args, arglen); + sp = __pa(&stack); + + asm volatile ("br.sptk.few %0" :: "b"(e_entry)); + + cons_write("kernel returned!\n"); + ssc(-1, 0, 0, 0, SSC_EXIT); +} diff --git a/arch/ia64/boot/bootloader.lds b/arch/ia64/boot/bootloader.lds new file mode 100644 index 000000000..a73518406 --- /dev/null +++ b/arch/ia64/boot/bootloader.lds @@ -0,0 +1,65 @@ +OUTPUT_FORMAT("elf64-ia64-little") +OUTPUT_ARCH(ia64) +ENTRY(_start) +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = 0x100000; + + _text = .; + .text : { *(__ivt_section) *(.text) } + _etext = .; + + /* Global data */ + _data = .; + .rodata : { *(.rodata) } + .data : { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS } + __gp = ALIGN (8) + 0x200000; + .got : { *(.got.plt) *(.got) } + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : { *(.sdata) } + _edata = .; + + _bss = .; + .sbss : { *(.sbss) *(.scommon) } + .bss : { *(.bss) *(COMMON) } + . = ALIGN(64 / 8); + _end = . ; + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* These must appear regardless of . */ +} diff --git a/arch/ia64/config.in b/arch/ia64/config.in new file mode 100644 index 000000000..d006c1d05 --- /dev/null +++ b/arch/ia64/config.in @@ -0,0 +1,172 @@ +mainmenu_name "Kernel configuration of Linux for IA-64 machines" + +mainmenu_option next_comment +comment 'General setup' + +choice 'IA-64 system type' \ + "Generic CONFIG_IA64_GENERIC \ + HP-simulator CONFIG_IA64_HP_SIM \ + SN1-simulator CONFIG_IA64_SGI_SN1_SIM \ + DIG-compliant CONFIG_IA64_DIG" Generic + +choice 'Kernel page size' \ + "4KB CONFIG_IA64_PAGE_SIZE_4KB \ + 8KB CONFIG_IA64_PAGE_SIZE_8KB \ + 16KB CONFIG_IA64_PAGE_SIZE_16KB \ + 64KB CONFIG_IA64_PAGE_SIZE_64KB" 16KB + +if [ "$CONFIG_IA64_DIG" = "y" ]; then + bool ' Enable Itanium A-step specific code' CONFIG_ITANIUM_ASTEP_SPECIFIC + bool ' Enable SoftSDV hacks' CONFIG_IA64_SOFTSDV_HACKS n + bool ' Enable BigSur hacks' CONFIG_IA64_BIGSUR_HACKS y + bool ' Enable Lion hacks' CONFIG_IA64_LION_HACKS n + bool ' Emulate PAL/SAL/EFI firmware' CONFIG_IA64_FW_EMU n + bool ' Get PCI IRQ routing from firmware/ACPI' CONFIG_IA64_IRQ_ACPI y +fi + +if [ "$CONFIG_IA64_GENERIC" = "y" ]; then + define_bool CONFIG_IA64_SOFTSDV_HACKS y +fi + +if [ "$CONFIG_IA64_SGI_SN1_SIM" = "y" ]; then + define_bool CONFIG_NUMA y + define_bool CONFIG_IA64_SOFTSDV_HACKS y +fi + +define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /dev/kcore. + +bool 'SMP support' CONFIG_SMP n +bool 'Performance monitor support' CONFIG_PERFMON n + +bool 'Networking support' CONFIG_NET n +bool 'System V IPC' CONFIG_SYSVIPC n +bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT n +bool 'Sysctl support' CONFIG_SYSCTL n +tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF +tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC + +bool 'PCI support' CONFIG_PCI n +source drivers/pci/Config.in + +source drivers/pcmcia/Config.in + +mainmenu_option next_comment + comment 'Code maturity level options' + bool 'Prompt for development and/or incomplete code/drivers' \ + CONFIG_EXPERIMENTAL n +endmenu + +mainmenu_option next_comment + comment 'Loadable module support' + bool 'Enable loadable module support' CONFIG_MODULES n + if [ "$CONFIG_MODULES" = "y" ]; then + bool 'Set version information on all symbols for modules' CONFIG_MODVERSIONS n + bool 'Kernel module loader' CONFIG_KMOD n + fi +endmenu + +source drivers/parport/Config.in + +endmenu + +source drivers/pnp/Config.in +source drivers/block/Config.in +source drivers/i2o/Config.in + +if [ "$CONFIG_NET" = "y" ]; then + source net/Config.in +fi + +mainmenu_option next_comment +comment 'SCSI support' + +tristate 'SCSI support' CONFIG_SCSI + +if [ "$CONFIG_SCSI" != "n" ]; then + source drivers/scsi/Config.in + bool 'Simulated SCSI disk' CONFIG_SCSI_SIM n +fi +endmenu + +if [ "$CONFIG_NET" = "y" ]; then + mainmenu_option next_comment + comment 'Network device support' + + bool 'Network device support' CONFIG_NETDEVICES n + if [ "$CONFIG_NETDEVICES" = "y" ]; then + source drivers/net/Config.in + fi + endmenu +fi + +source net/ax25/Config.in + +mainmenu_option next_comment +comment 'ISDN subsystem' + +tristate 'ISDN support' CONFIG_ISDN +if [ "$CONFIG_ISDN" != "n" ]; then + source drivers/isdn/Config.in +fi +endmenu + +mainmenu_option next_comment +comment 'CD-ROM drivers (not for SCSI or IDE/ATAPI drives)' + +bool 'Support non-SCSI/IDE/ATAPI drives' CONFIG_CD_NO_IDESCSI n +if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then + source drivers/cdrom/Config.in +fi +endmenu + +source drivers/char/Config.in +source drivers/usb/Config.in +source drivers/misc/Config.in + +source fs/Config.in + +source fs/nls/Config.in + +if [ "$CONFIG_VT" = "y" ]; then + mainmenu_option next_comment + comment 'Console drivers' + bool 'VGA text console' CONFIG_VGA_CONSOLE n + if [ "$CONFIG_FB" = "y" ]; then + define_bool CONFIG_PCI_CONSOLE y + fi + source drivers/video/Config.in + endmenu +fi + +mainmenu_option next_comment +comment 'Sound' + +tristate 'Sound card support' CONFIG_SOUND +if [ "$CONFIG_SOUND" != "n" ]; then + source drivers/sound/Config.in +fi +endmenu + +mainmenu_option next_comment +comment 'Kernel hacking' + +#bool 'Debug kmalloc/kfree' CONFIG_DEBUG_MALLOC +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + tristate 'Kernel support for IA-32 emulation' CONFIG_IA32_SUPPORT + tristate 'Kernel FP software completion' CONFIG_MATHEMU +else + define_bool CONFIG_MATHEMU y +fi + +bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ n +bool 'Early printk support (requires VGA!)' CONFIG_IA64_EARLY_PRINTK n +bool 'Turn on compare-and-exchange bug checking (slow!)' CONFIG_IA64_DEBUG_CMPXCHG n +bool 'Turn on irq debug checks (slow!)' CONFIG_IA64_DEBUG_IRQ n +bool 'Print possible IA64 hazards to console' CONFIG_IA64_PRINT_HAZARDS n +bool 'Built-in Kernel Debugger support' CONFIG_KDB +if [ "$CONFIG_KDB" = "y" ]; then + bool 'Compile the kernel with frame pointers' CONFIG_KDB_FRAMEPTR + int 'KDB Kernel Symbol Table size?' CONFIG_KDB_STBSIZE 10000 +fi + +endmenu diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig new file mode 100644 index 000000000..a96599889 --- /dev/null +++ b/arch/ia64/defconfig @@ -0,0 +1,146 @@ +# +# Automatically generated make config: don't edit +# + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# Loadable module support +# +# CONFIG_MODULES is not set + +# +# General setup +# +CONFIG_IA64_SIM=y +CONFIG_PCI=y +# CONFIG_PCI_QUIRKS is not set +CONFIG_PCI_OLD_PROC=y +# CONFIG_NET is not set +# CONFIG_SYSVIPC is not set +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_SYSCTL is not set +# CONFIG_BINFMT_ELF is not set +# CONFIG_BINFMT_MISC is not set +# CONFIG_BINFMT_JAVA is not set +# CONFIG_BINFMT_EM86 is not set +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_IDE is not set + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_HD_ONLY is not set + +# +# Additional Block Devices +# +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_MD is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_BLK_DEV_XD is not set +CONFIG_PARIDE_PARPORT=y +# CONFIG_PARIDE is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI support +# +# CONFIG_SCSI is not set +# CONFIG_SCSI_G_NCR5380_PORT is not set +# CONFIG_SCSI_G_NCR5380_MEM is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# CD-ROM drivers (not for SCSI or IDE/ATAPI drives) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL is not set +# CONFIG_SERIAL_EXTENDED is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_UNIX98_PTYS is not set +# CONFIG_MOUSE is not set +# CONFIG_QIC02_TAPE is not set +# CONFIG_WATCHDOG is not set +# CONFIG_RTC is not set +CONFIG_EFI_RTC=y +# CONFIG_VIDEO_DEV is not set +# CONFIG_NVRAM is not set +# CONFIG_JOYSTICK is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +# CONFIG_FT_NORMAL_DEBUG is not set +# CONFIG_FT_FULL_DEBUG is not set +# CONFIG_FT_NO_TRACE is not set +# CONFIG_FT_NO_TRACE_AT_ALL is not set +# CONFIG_FT_STD_FDC is not set +# CONFIG_FT_MACH2 is not set +# CONFIG_FT_PROBE_FC10 is not set +# CONFIG_FT_ALT_FDC is not set + +# +# Filesystems +# +# CONFIG_QUOTA is not set +# CONFIG_MINIX_FS is not set +# CONFIG_EXT2_FS is not set +# CONFIG_ISO9660_FS is not set +# CONFIG_FAT_FS is not set +# CONFIG_PROC_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_NTFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_UFS_FS is not set +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_SMD_DISKLABEL is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_ADFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_MAC_PARTITION is not set +# CONFIG_NLS is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# Kernel hacking +# +# CONFIG_MATHEMU is not set +# CONFIG_MAGIC_SYSRQ is not set diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile new file mode 100644 index 000000000..cfc48eec1 --- /dev/null +++ b/arch/ia64/dig/Makefile @@ -0,0 +1,24 @@ +# +# ia64/platform/dig/Makefile +# +# Copyright (C) 1999 Silicon Graphics, Inc. +# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) +# + +.S.s: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $< +.S.o: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $< + +all: dig.a + +O_TARGET = dig.a +O_OBJS = iosapic.o setup.o + +ifeq ($(CONFIG_IA64_GENERIC),y) +O_OBJS += machvec.o +endif + +clean:: + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/dig/iosapic.c b/arch/ia64/dig/iosapic.c new file mode 100644 index 000000000..6a392226e --- /dev/null +++ b/arch/ia64/dig/iosapic.c @@ -0,0 +1,553 @@ +/* + * Streamlined APIC support. + * + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com> + * Copyright (C) 1999-2000 Hewlett-Packard Co. + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> + */ +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/string.h> + +#include <asm/io.h> +#include <asm/iosapic.h> +#include <asm/irq.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/delay.h> +#include <asm/processor.h> + +#undef DEBUG_IRQ_ROUTING + +/* + * IRQ vectors 0..15 are treated as the legacy interrupts of the PC-AT + * platform. No new drivers should ever ask for specific irqs, but we + * provide compatibility here in case there is an old driver that does + * ask for specific irqs (serial, keyboard, stuff like that). Since + * IA-64 doesn't allow irq 0..15 to be used for external interrupts + * anyhow, this in no way prevents us from doing the Right Thing + * with new drivers. + */ +struct iosapic_vector iosapic_vector[NR_IRQS] = { + [0 ... NR_IRQS-1] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } +}; + +#ifndef CONFIG_IA64_IRQ_ACPI +/* + * Defines the default interrupt routing information for the LION platform + * XXX - this information should be obtained from the ACPI and hardcoded since + * we do not have ACPI AML support. + */ + +struct intr_routing_entry intr_routing[] = { + {0,0,0,2,0,0,0,0}, + {0,0,1,1,0,0,0,0}, + {0,0,2,0xff,0,0,0,0}, + {0,0,3,3,0,0,0,0}, + {0,0,4,4,0,0,0,0}, + {0,0,5,5,0,0,0,0}, + {0,0,6,6,0,0,0,0}, + {0,0,7,7,0,0,0,0}, + {0,0,8,8,0,0,0,0}, + {0,0,9,9,0,0,0,0}, + {0,0,10,10,0,0,0,0}, + {0,0,11,11,0,0,0,0}, + {0,0,12,12,0,0,0,0}, + {0,0,13,13,0,0,0,0}, + {0,0,14,14,0,0,0,0}, + {0,0,15,15,0,0,0,0}, +#ifdef CONFIG_IA64_LION_HACKS + {1, 0, 0x04, 16, 0, 0, 1, 1}, /* bus 0, device id 1, INTA */ + {1, 0, 0x05, 26, 0, 0, 1, 1}, /* bus 0, device id 1, INTB */ + {1, 0, 0x06, 36, 0, 0, 1, 1}, /* bus 0, device id 1, INTC */ + {1, 0, 0x07, 42, 0, 0, 1, 1}, /* bus 0, device id 1, INTD */ + + {1, 0, 0x08, 17, 0, 0, 1, 1}, /* bus 0, device id 2, INTA */ + {1, 0, 0x09, 27, 0, 0, 1, 1}, /* bus 0, device id 2, INTB */ + {1, 0, 0x0a, 37, 0, 0, 1, 1}, /* bus 0, device id 2, INTC */ + {1, 0, 0x0b, 42, 0, 0, 1, 1}, /* bus 0, device id 2, INTD */ + + {1, 0, 0x0f, 50, 0, 0, 1, 1}, /* bus 0, device id 3, INTD */ + + {1, 0, 0x14, 51, 0, 0, 1, 1}, /* bus 0, device id 5, INTA */ + + {1, 0, 0x18, 49, 0, 0, 1, 1}, /* bus 0, device id 6, INTA */ + + {1, 1, 0x04, 18, 0, 0, 1, 1}, /* bus 1, device id 1, INTA */ + {1, 1, 0x05, 28, 0, 0, 1, 1}, /* bus 1, device id 1, INTB */ + {1, 1, 0x06, 38, 0, 0, 1, 1}, /* bus 1, device id 1, INTC */ + {1, 1, 0x07, 43, 0, 0, 1, 1}, /* bus 1, device id 1, INTD */ + + {1, 1, 0x08, 48, 0, 0, 1, 1}, /* bus 1, device id 2, INTA */ + + {1, 1, 0x0c, 19, 0, 0, 1, 1}, /* bus 1, device id 3, INTA */ + {1, 1, 0x0d, 29, 0, 0, 1, 1}, /* bus 1, device id 3, INTB */ + {1, 1, 0x0e, 38, 0, 0, 1, 1}, /* bus 1, device id 3, INTC */ + {1, 1, 0x0f, 44, 0, 0, 1, 1}, /* bus 1, device id 3, INTD */ + + {1, 1, 0x10, 20, 0, 0, 1, 1}, /* bus 1, device id 4, INTA */ + {1, 1, 0x11, 30, 0, 0, 1, 1}, /* bus 1, device id 4, INTB */ + {1, 1, 0x12, 39, 0, 0, 1, 1}, /* bus 1, device id 4, INTC */ + {1, 1, 0x13, 45, 0, 0, 1, 1}, /* bus 1, device id 4, INTD */ + + {1, 2, 0x04, 21, 0, 0, 1, 1}, /* bus 2, device id 1, INTA */ + {1, 2, 0x05, 31, 0, 0, 1, 1}, /* bus 2, device id 1, INTB */ + {1, 2, 0x06, 39, 0, 0, 1, 1}, /* bus 2, device id 1, INTC */ + {1, 2, 0x07, 45, 0, 0, 1, 1}, /* bus 2, device id 1, INTD */ + + {1, 2, 0x08, 22, 0, 0, 1, 1}, /* bus 2, device id 2, INTA */ + {1, 2, 0x09, 32, 0, 0, 1, 1}, /* bus 2, device id 2, INTB */ + {1, 2, 0x0a, 40, 0, 0, 1, 1}, /* bus 2, device id 2, INTC */ + {1, 2, 0x0b, 46, 0, 0, 1, 1}, /* bus 2, device id 2, INTD */ + + {1, 2, 0x0c, 23, 0, 0, 1, 1}, /* bus 2, device id 3, INTA */ + {1, 2, 0x0d, 33, 0, 0, 1, 1}, /* bus 2, device id 3, INTB */ + {1, 2, 0x0e, 40, 0, 0, 1, 1}, /* bus 2, device id 3, INTC */ + {1, 2, 0x0f, 46, 0, 0, 1, 1}, /* bus 2, device id 3, INTD */ + + {1, 3, 0x04, 24, 0, 0, 1, 1}, /* bus 3, device id 1, INTA */ + {1, 3, 0x05, 34, 0, 0, 1, 1}, /* bus 3, device id 1, INTB */ + {1, 3, 0x06, 41, 0, 0, 1, 1}, /* bus 3, device id 1, INTC */ + {1, 3, 0x07, 47, 0, 0, 1, 1}, /* bus 3, device id 1, INTD */ + + {1, 3, 0x08, 25, 0, 0, 1, 1}, /* bus 3, device id 2, INTA */ + {1, 3, 0x09, 35, 0, 0, 1, 1}, /* bus 3, device id 2, INTB */ + {1, 3, 0x0a, 41, 0, 0, 1, 1}, /* bus 3, device id 2, INTC */ + {1, 3, 0x0b, 47, 0, 0, 1, 1}, /* bus 3, device id 2, INTD */ +#else + /* + * BigSur platform, bus 0, device 1,2,4 and bus 1 device 0-3 + */ + {1,1,0x0,19,0,0,1,1}, /* bus 1, device id 0, INTA */ + {1,1,0x1,18,0,0,1,1}, /* bus 1, device id 0, INTB */ + {1,1,0x2,17,0,0,1,1}, /* bus 1, device id 0, INTC */ + {1,1,0x3,16,0,0,1,1}, /* bus 1, device id 0, INTD */ + + {1,1,0x4,23,0,0,1,1}, /* bus 1, device id 1, INTA */ + {1,1,0x5,22,0,0,1,1}, /* bus 1, device id 1, INTB */ + {1,1,0x6,21,0,0,1,1}, /* bus 1, device id 1, INTC */ + {1,1,0x7,20,0,0,1,1}, /* bus 1, device id 1, INTD */ + + {1,1,0x8,27,0,0,1,1}, /* bus 1, device id 2, INTA */ + {1,1,0x9,26,0,0,1,1}, /* bus 1, device id 2, INTB */ + {1,1,0xa,25,0,0,1,1}, /* bus 1, device id 2, INTC */ + {1,1,0xb,24,0,0,1,1}, /* bus 1, device id 2, INTD */ + + {1,1,0xc,31,0,0,1,1}, /* bus 1, device id 3, INTA */ + {1,1,0xd,30,0,0,1,1}, /* bus 1, device id 3, INTB */ + {1,1,0xe,29,0,0,1,1}, /* bus 1, device id 3, INTC */ + {1,1,0xf,28,0,0,1,1}, /* bus 1, device id 3, INTD */ + + {1,0,0x4,35,0,0,1,1}, /* bus 0, device id 1, INTA */ + {1,0,0x5,34,0,0,1,1}, /* bus 0, device id 1, INTB */ + {1,0,0x6,33,0,0,1,1}, /* bus 0, device id 1, INTC */ + {1,0,0x7,32,0,0,1,1}, /* bus 0, device id 1, INTD */ + + {1,0,0x8,39,0,0,1,1}, /* bus 0, device id 2, INTA */ + {1,0,0x9,38,0,0,1,1}, /* bus 0, device id 2, INTB */ + {1,0,0xa,37,0,0,1,1}, /* bus 0, device id 2, INTC */ + {1,0,0xb,36,0,0,1,1}, /* bus 0, device id 2, INTD */ + + {1,0,0x10,43,0,0,1,1}, /* bus 0, device id 4, INTA */ + {1,0,0x11,42,0,0,1,1}, /* bus 0, device id 4, INTB */ + {1,0,0x12,41,0,0,1,1}, /* bus 0, device id 4, INTC */ + {1,0,0x13,40,0,0,1,1}, /* bus 0, device id 4, INTD */ + + {1,0,0x14,17,0,0,1,1}, /* bus 0, device id 5, INTA */ + {1,0,0x18,18,0,0,1,1}, /* bus 0, device id 6, INTA */ + {1,0,0x1c,19,0,0,1,1}, /* bus 0, device id 7, INTA */ +#endif + {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff}, +}; + +int +iosapic_get_PCI_irq_vector(int bus, int slot, int pci_pin) +{ + int i = -1; + + while (intr_routing[++i].srcbus != 0xff) { + if (intr_routing[i].srcbus == BUS_PCI) { + if ((intr_routing[i].srcbusirq == ((slot << 2) | pci_pin)) + && (intr_routing[i].srcbusno == bus)) { + return(intr_routing[i].iosapic_pin); + } + } + } + return -1; +} + +#else /* CONFIG_IA64_IRQ_ACPI */ + +/* + * find the IRQ in the IOSAPIC map for the PCI device on bus/slot/pin + */ +int +iosapic_get_PCI_irq_vector(int bus, int slot, int pci_pin) +{ + int i; + + for (i = 0; i < NR_IRQS; i++) { + if ((iosapic_bustype(i) == BUS_PCI) && + (iosapic_bus(i) == bus) && + (iosapic_busdata(i) == ((slot << 16) | pci_pin))) { + return i; + } + } + + return -1; +} +#endif /* !CONFIG_IA64_IRQ_ACPI */ + +static void +set_rte (unsigned long iosapic_addr, int entry, int pol, int trigger, int delivery, + long dest, int vector) +{ + int low32; + int high32; + + low32 = ((pol << IO_SAPIC_POLARITY_SHIFT) | + (trigger << IO_SAPIC_TRIGGER_SHIFT) | + (delivery << IO_SAPIC_DELIVERY_SHIFT) | + vector); + + /* dest contains both id and eid */ + high32 = (dest << IO_SAPIC_DEST_SHIFT); + + /* + * program the rte + */ + writel(IO_SAPIC_RTE_HIGH(entry), iosapic_addr + IO_SAPIC_REG_SELECT); + writel(high32, iosapic_addr + IO_SAPIC_WINDOW); + writel(IO_SAPIC_RTE_LOW(entry), iosapic_addr + IO_SAPIC_REG_SELECT); + writel(low32, iosapic_addr + IO_SAPIC_WINDOW); +} + + +static void +enable_pin (unsigned int pin, unsigned long iosapic_addr) +{ + int low32; + + writel(IO_SAPIC_RTE_LOW(pin), iosapic_addr + IO_SAPIC_REG_SELECT); + low32 = readl(iosapic_addr + IO_SAPIC_WINDOW); + + low32 &= ~(1 << IO_SAPIC_MASK_SHIFT); /* Zero only the mask bit */ + writel(low32, iosapic_addr + IO_SAPIC_WINDOW); +} + + +static void +disable_pin (unsigned int pin, unsigned long iosapic_addr) +{ + int low32; + + writel(IO_SAPIC_RTE_LOW(pin), iosapic_addr + IO_SAPIC_REG_SELECT); + low32 = readl(iosapic_addr + IO_SAPIC_WINDOW); + + low32 |= (1 << IO_SAPIC_MASK_SHIFT); /* Set only the mask bit */ + writel(low32, iosapic_addr + IO_SAPIC_WINDOW); +} + +#define iosapic_shutdown_irq iosapic_disable_irq + +static void +iosapic_enable_irq (unsigned int irq) +{ + int pin = iosapic_pin(irq); + + if (pin < 0) + /* happens during irq auto probing... */ + return; + enable_pin(pin, iosapic_addr(irq)); +} + +static void +iosapic_disable_irq (unsigned int irq) +{ + int pin = iosapic_pin(irq); + + if (pin < 0) + return; + disable_pin(pin, iosapic_addr(irq)); +} + +unsigned int +iosapic_version(unsigned long base_addr) +{ + /* + * IOSAPIC Version Register return 32 bit structure like: + * { + * unsigned int version : 8; + * unsigned int reserved1 : 8; + * unsigned int pins : 8; + * unsigned int reserved2 : 8; + * } + */ + writel(IO_SAPIC_VERSION, base_addr + IO_SAPIC_REG_SELECT); + return readl(IO_SAPIC_WINDOW + base_addr); +} + +static int +iosapic_handle_irq (unsigned int irq, struct pt_regs *regs) +{ + struct irqaction *action = 0; + struct irq_desc *id = irq_desc + irq; + unsigned int status; + int retval; + + spin_lock(&irq_controller_lock); + { + status = id->status; + + /* do we need to do something IOSAPIC-specific to ACK the irq here??? */ + /* Yes, but only level-triggered interrupts. We'll do that later */ + if ((status & IRQ_INPROGRESS) == 0 && (status & IRQ_ENABLED) != 0) { + action = id->action; + status |= IRQ_INPROGRESS; + } + id->status = status & ~(IRQ_REPLAY | IRQ_WAITING); + } + spin_unlock(&irq_controller_lock); + + if (!action) { + if (!(id->status & IRQ_AUTODETECT)) + printk("iosapic_handle_irq: unexpected interrupt %u;" + "disabling it (status=%x)\n", irq, id->status); + /* + * If we don't have a handler, disable the pin so we + * won't get any further interrupts (until + * re-enabled). --davidm 99/12/17 + */ + iosapic_disable_irq(irq); + return 0; + } + + retval = invoke_irq_handlers (irq, regs, action); + + if (iosapic_trigger(irq) == IO_SAPIC_LEVEL) /* ACK Level trigger interrupts */ + writel(irq, iosapic_addr(irq) + IO_SAPIC_EOI); + + spin_lock(&irq_controller_lock); + { + status = (id->status & ~IRQ_INPROGRESS); + id->status = status; + } + spin_unlock(&irq_controller_lock); + + return retval; +} + +void __init +iosapic_init (unsigned long addr) +{ + int i; +#ifdef CONFIG_IA64_IRQ_ACPI + struct pci_vector_struct *vectors; + int irq; +#else + int vector; +#endif + + /* + * Disable all local interrupts + */ + + ia64_set_itv(0, 1); + ia64_set_lrr0(0, 1); + ia64_set_lrr1(0, 1); + + /* + * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support + * enabled. + */ + + outb(0xff, 0xA1); + outb(0xff, 0x21); + +#if defined(CONFIG_IA64_SOFTSDV_HACKS) + memset(iosapic_vector, 0x0, sizeof(iosapic_vector)); + for (i = 0; i < NR_IRQS; i++) { + iosapic_pin(i) = 0xff; + iosapic_addr(i) = (unsigned long) ioremap(IO_SAPIC_DEFAULT_ADDR, 0); + } + /* XXX this should come from systab or some such: */ + iosapic_pin(TIMER_IRQ) = 5; /* System Clock Interrupt */ + iosapic_pin(0x40) = 3; /* Keyboard */ + iosapic_pin(0x92) = 9; /* COM1 Serial Port */ + iosapic_pin(0x80) = 4; /* Periodic Interrupt */ + iosapic_pin(0xc0) = 2; /* Mouse */ + iosapic_pin(0xe0) = 1; /* IDE Disk */ + iosapic_pin(0xf0) = 6; /* E-IDE CDROM */ + iosapic_pin(0xa0) = 10; /* Real PCI Interrupt */ +#elif !defined(CONFIG_IA64_IRQ_ACPI) + /* + * For systems where the routing info in ACPI is + * unavailable/wrong, use the intr_routing information to + * initialize the iosapic array + */ + i = -1; + while (intr_routing[++i].srcbus != 0xff) { + if (intr_routing[i].srcbus == BUS_ISA) { + vector = map_legacy_irq(intr_routing[i].srcbusirq); + } else if (intr_routing[i].srcbus == BUS_PCI) { + vector = intr_routing[i].iosapic_pin; + } else { + printk("unknown bus type %d for intr_routing[%d]\n", + intr_routing[i].srcbus, i); + continue; + } + iosapic_pin(vector) = intr_routing[i].iosapic_pin; + iosapic_dmode(vector) = intr_routing[i].mode; + iosapic_polarity(vector) = intr_routing[i].polarity; + iosapic_trigger(vector) = intr_routing[i].trigger; +# ifdef DEBUG_IRQ_ROUTING + printk("irq[0x%x(0x%x)]:0x%x, %d, %d, %d\n", vector, intr_routing[i].srcbusirq, + iosapic_pin(vector), iosapic_dmode(vector), iosapic_polarity(vector), + iosapic_trigger(vector)); +# endif + } +#else /* !defined(CONFIG_IA64_SOFTSDV_HACKS) && !defined(CONFIG_IA64_IRQ_ACPI) */ + /* + * Map the legacy ISA devices into the IOAPIC data; We'll override these + * later with data from the ACPI Interrupt Source Override table. + * + * Huh, the Lion w/ FPSWA firmware has entries for _all_ of the legacy IRQs, + * including those that are not different from PC/AT standard. I don't know + * if this is a bug in the other firmware or not. I'm going to leave this code + * here, so that this works on BigSur but will go ask Intel. --wfd 2000-Jan-19 + * + */ + for (i =0 ; i < IA64_MIN_VECTORED_IRQ; i++) { + irq = map_legacy_irq(i); + iosapic_pin(irq) = i; + iosapic_bus(irq) = BUS_ISA; + iosapic_busdata(irq) = 0; + iosapic_dmode(irq) = IO_SAPIC_LOWEST_PRIORITY; + iosapic_trigger(irq) = IO_SAPIC_EDGE; + iosapic_polarity(irq) = IO_SAPIC_POL_HIGH; +#ifdef DEBUG_IRQ_ROUTING + printk("ISA: IRQ %02x -> Vector %02x IOSAPIC Pin %d\n", i, irq, iosapic_pin(irq)); +#endif + } + + /* + * Map the PCI Interrupt data into the ACPI IOSAPIC data using + * the info that the bootstrap loader passed to us. + */ + ia64_boot_param.pci_vectors = (__u64) __va(ia64_boot_param.pci_vectors); + vectors = (struct pci_vector_struct *) ia64_boot_param.pci_vectors; + for (i = 0; i < ia64_boot_param.num_pci_vectors; i++) { + irq = map_legacy_irq(vectors[i].irq); + + iosapic_bustype(irq) = BUS_PCI; + iosapic_pin(irq) = irq - iosapic_baseirq(irq); + iosapic_bus(irq) = vectors[i].bus; + /* + * Map the PCI slot and pin data into iosapic_busdata() + */ + iosapic_busdata(irq) = (vectors[i].pci_id & 0xffff0000) | vectors[i].pin; + + /* Default settings for PCI */ + iosapic_dmode(irq) = IO_SAPIC_LOWEST_PRIORITY; + iosapic_trigger(irq) = IO_SAPIC_LEVEL; + iosapic_polarity(irq) = IO_SAPIC_POL_LOW; + +#ifdef DEBUG_IRQ_ROUTING + printk("PCI: BUS %d Slot %x Pin %x IRQ %02x --> Vector %02x IOSAPIC Pin %d\n", + vectors[i].bus, vectors[i].pci_id>>16, vectors[i].pin, vectors[i].irq, + irq, iosapic_pin(irq)); +#endif + } +#endif /* !CONFIG_IA64_IRQ_ACPI */ +} + +static void +iosapic_startup_irq (unsigned int irq) +{ + int pin; + + if (irq == TIMER_IRQ) + return; + pin = iosapic_pin(irq); + if (pin < 0) + /* happens during irq auto probing... */ + return; + set_rte(iosapic_addr(irq), pin, iosapic_polarity(irq), iosapic_trigger(irq), + iosapic_dmode(irq), (ia64_get_lid() >> 16) & 0xffff, irq); + enable_pin(pin, iosapic_addr(irq)); +} + +struct hw_interrupt_type irq_type_iosapic = { + "IOSAPIC", + iosapic_init, + iosapic_startup_irq, + iosapic_shutdown_irq, + iosapic_handle_irq, + iosapic_enable_irq, + iosapic_disable_irq +}; + +void +dig_irq_init (struct irq_desc desc[NR_IRQS]) +{ + int i; + + /* + * Claim all non-legacy irq vectors as ours unless they're + * claimed by someone else already (e.g., timer or IPI are + * handled internally). + */ + for (i = IA64_MIN_VECTORED_IRQ; i <= IA64_MAX_VECTORED_IRQ; ++i) { + if (irq_desc[i].handler == &irq_type_default) + irq_desc[i].handler = &irq_type_iosapic; + } +} + +void +dig_pci_fixup (void) +{ + struct pci_dev *dev; + int irq; + unsigned char pin; + + pci_for_each_dev(dev) { + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (pin) { + pin--; /* interrupt pins are numbered starting from 1 */ + irq = iosapic_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), + pin); + if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ + struct pci_dev * bridge = dev->bus->self; + + /* do the bridge swizzle... */ + pin = (pin + PCI_SLOT(dev->devfn)) % 4; + irq = iosapic_get_PCI_irq_vector(bridge->bus->number, + PCI_SLOT(bridge->devfn), pin); + if (irq >= 0) + printk(KERN_WARNING + "PCI: using PPB(B%d,I%d,P%d) to get irq %02x\n", + bridge->bus->number, PCI_SLOT(bridge->devfn), + pin, irq); + } + if (irq >= 0) { + printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %02x\n", + dev->bus->number, PCI_SLOT(dev->devfn), pin, irq); + dev->irq = irq; + } + } + /* + * Nothing to fixup + * Fix out-of-range IRQ numbers + */ + if (dev->irq >= NR_IRQS) + dev->irq = 15; /* Spurious interrupts */ + } +} diff --git a/arch/ia64/dig/machvec.c b/arch/ia64/dig/machvec.c new file mode 100644 index 000000000..640412d7e --- /dev/null +++ b/arch/ia64/dig/machvec.c @@ -0,0 +1,4 @@ +#include <asm/machvec_init.h> +#include <asm/machvec_dig.h> + +MACHVEC_DEFINE(dig) diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c new file mode 100644 index 000000000..6ae40319d --- /dev/null +++ b/arch/ia64/dig/setup.c @@ -0,0 +1,93 @@ +/* + * Platform dependent support for Intel SoftSDV simulator. + * + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com> + */ +#include <linux/config.h> + +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/kdev_t.h> +#include <linux/string.h> +#include <linux/tty.h> +#include <linux/console.h> +#include <linux/timex.h> +#include <linux/sched.h> +#include <linux/mc146818rtc.h> + +#include <asm/io.h> +#include <asm/machvec.h> +#include <asm/system.h> + +#ifdef CONFIG_IA64_FW_EMU +# include "../../kernel/fw-emu.c" +#endif + +/* + * This is here so we can use the CMOS detection in ide-probe.c to + * determine what drives are present. In theory, we don't need this + * as the auto-detection could be done via ide-probe.c:do_probe() but + * in practice that would be much slower, which is painful when + * running in the simulator. Note that passing zeroes in DRIVE_INFO + * is sufficient (the IDE driver will autodetect the drive geometry). + */ +char drive_info[4*16]; + +unsigned char aux_device_present = 0xaa; /* XXX remove this when legacy I/O is gone */ + +void __init +dig_setup (char **cmdline_p) +{ + unsigned int orig_x, orig_y, num_cols, num_rows, font_height; + + /* + * This assumes that the EFI partition is physical disk 1 + * partition 1 and the Linux root disk is physical disk 1 + * partition 2. + */ +#ifdef CONFIG_IA64_LION_HACKS + /* default to /dev/sda2 on Lion... */ + ROOT_DEV = to_kdev_t(0x0802); /* default to second partition on first drive */ +#else + /* default to /dev/dha2 on BigSur... */ + ROOT_DEV = to_kdev_t(0x0302); /* default to second partition on first drive */ +#endif + +#ifdef CONFIG_SMP + init_smp_config(); +#endif + + memset(&screen_info, 0, sizeof(screen_info)); + + if (!ia64_boot_param.console_info.num_rows + || !ia64_boot_param.console_info.num_cols) + { + printk("dig_setup: warning: invalid screen-info, guessing 80x25\n"); + orig_x = 0; + orig_y = 0; + num_cols = 80; + num_rows = 25; + font_height = 16; + } else { + orig_x = ia64_boot_param.console_info.orig_x; + orig_y = ia64_boot_param.console_info.orig_y; + num_cols = ia64_boot_param.console_info.num_cols; + num_rows = ia64_boot_param.console_info.num_rows; + font_height = 400 / num_rows; + } + + screen_info.orig_x = orig_x; + screen_info.orig_y = orig_y; + screen_info.orig_video_cols = num_cols; + screen_info.orig_video_lines = num_rows; + screen_info.orig_video_points = font_height; + screen_info.orig_video_mode = 3; /* XXX fake */ + screen_info.orig_video_isVGA = 1; /* XXX fake */ + screen_info.orig_video_ega_bx = 3; /* XXX fake */ +} diff --git a/arch/ia64/hp/Makefile b/arch/ia64/hp/Makefile new file mode 100644 index 000000000..64899f4be --- /dev/null +++ b/arch/ia64/hp/Makefile @@ -0,0 +1,19 @@ +# +# ia64/platform/hp/Makefile +# +# Copyright (C) 1999 Silicon Graphics, Inc. +# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) +# + +all: hp.a + +O_TARGET = hp.a +O_OBJS = hpsim_console.o hpsim_irq.o hpsim_setup.o + +ifeq ($(CONFIG_IA64_GENERIC),y) +O_OBJS += hpsim_machvec.o +endif + +clean:: + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/hp/hpsim_console.c b/arch/ia64/hp/hpsim_console.c new file mode 100644 index 000000000..b97116cee --- /dev/null +++ b/arch/ia64/hp/hpsim_console.c @@ -0,0 +1,78 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com> + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/kdev_t.h> +#include <linux/console.h> + +#include <asm/delay.h> +#include <asm/irq.h> +#include <asm/pal.h> +#include <asm/machvec.h> +#include <asm/pgtable.h> +#include <asm/sal.h> + +#include "hpsim_ssc.h" + +static int simcons_init (struct console *, char *); +static void simcons_write (struct console *, const char *, unsigned); +static int simcons_wait_key (struct console *); +static kdev_t simcons_console_device (struct console *); + +struct console hpsim_cons = { + "simcons", + simcons_write, /* write */ + NULL, /* read */ + simcons_console_device, /* device */ + simcons_wait_key, /* wait_key */ + NULL, /* unblank */ + simcons_init, /* setup */ + CON_PRINTBUFFER, /* flags */ + -1, /* index */ + 0, /* cflag */ + NULL /* next */ +}; + +static int +simcons_init (struct console *cons, char *options) +{ + return 0; +} + +static void +simcons_write (struct console *cons, const char *buf, unsigned count) +{ + unsigned long ch; + + while (count-- > 0) { + ch = *buf++; + ia64_ssc(ch, 0, 0, 0, SSC_PUTCHAR); + if (ch == '\n') + ia64_ssc('\r', 0, 0, 0, SSC_PUTCHAR); + } +} + +static int +simcons_wait_key (struct console *cons) +{ + char ch; + + do { + ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR); + } while (ch == '\0'); + return ch; +} + +static kdev_t +simcons_console_device (struct console *c) +{ + return MKDEV(TTY_MAJOR, 64 + c->index); +} diff --git a/arch/ia64/hp/hpsim_irq.c b/arch/ia64/hp/hpsim_irq.c new file mode 100644 index 000000000..72b36d6d6 --- /dev/null +++ b/arch/ia64/hp/hpsim_irq.c @@ -0,0 +1,83 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com> + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/kdev_t.h> +#include <linux/console.h> + +#include <asm/delay.h> +#include <asm/irq.h> +#include <asm/pal.h> +#include <asm/machvec.h> +#include <asm/pgtable.h> +#include <asm/sal.h> + + +static int +irq_hp_sim_handle_irq (unsigned int irq, struct pt_regs *regs) +{ + struct irqaction *action = 0; + struct irq_desc *id = irq_desc + irq; + unsigned int status; + int retval; + + spin_lock(&irq_controller_lock); + { + status = id->status; + if ((status & IRQ_INPROGRESS) == 0 && (status & IRQ_ENABLED) != 0) { + action = id->action; + status |= IRQ_INPROGRESS; + } + id->status = status & ~(IRQ_REPLAY | IRQ_WAITING); + } + spin_unlock(&irq_controller_lock); + + if (!action) { + if (!(id->status & IRQ_AUTODETECT)) + printk("irq_hpsim_handle_irq: unexpected interrupt %u\n", irq); + return 0; + } + + retval = invoke_irq_handlers(irq, regs, action); + + spin_lock(&irq_controller_lock); + { + id->status &= ~IRQ_INPROGRESS; + } + spin_unlock(&irq_controller_lock); + + return retval; +} + +static void +irq_hp_sim_noop (unsigned int irq) +{ +} + +static struct hw_interrupt_type irq_type_hp_sim = { + "hp_sim", + (void (*)(unsigned long)) irq_hp_sim_noop, /* init */ + irq_hp_sim_noop, /* startup */ + irq_hp_sim_noop, /* shutdown */ + irq_hp_sim_handle_irq, /* handle */ + irq_hp_sim_noop, /* enable */ + irq_hp_sim_noop, /* disable */ +}; + +void +hpsim_irq_init (struct irq_desc desc[NR_IRQS]) +{ + int i; + + for (i = IA64_MIN_VECTORED_IRQ; i <= IA64_MAX_VECTORED_IRQ; ++i) { + irq_desc[i].handler = &irq_type_hp_sim; + } +} diff --git a/arch/ia64/hp/hpsim_machvec.c b/arch/ia64/hp/hpsim_machvec.c new file mode 100644 index 000000000..7d78f4961 --- /dev/null +++ b/arch/ia64/hp/hpsim_machvec.c @@ -0,0 +1,4 @@ +#include <asm/machvec_init.h> +#include <asm/machvec_hpsim.h> + +MACHVEC_DEFINE(hpsim) diff --git a/arch/ia64/hp/hpsim_setup.c b/arch/ia64/hp/hpsim_setup.c new file mode 100644 index 000000000..dfa83e135 --- /dev/null +++ b/arch/ia64/hp/hpsim_setup.c @@ -0,0 +1,71 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com> + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/kdev_t.h> +#include <linux/console.h> + +#include <asm/delay.h> +#include <asm/irq.h> +#include <asm/pal.h> +#include <asm/machvec.h> +#include <asm/pgtable.h> +#include <asm/sal.h> + +#include "hpsim_ssc.h" + +extern struct console hpsim_cons; + +/* + * Simulator system call. + */ +inline long +ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr) +{ +#ifdef __GCC_DOESNT_KNOW_IN_REGS__ + register long in0 asm ("r32") = arg0; + register long in1 asm ("r33") = arg1; + register long in2 asm ("r34") = arg2; + register long in3 asm ("r35") = arg3; +#else + register long in0 asm ("in0") = arg0; + register long in1 asm ("in1") = arg1; + register long in2 asm ("in2") = arg2; + register long in3 asm ("in3") = arg3; +#endif + register long r8 asm ("r8"); + register long r15 asm ("r15") = nr; + + asm volatile ("break 0x80001" + : "=r"(r8) + : "r"(r15), "r"(in0), "r"(in1), "r"(in2), "r"(in3)); + return r8; +} + +void +ia64_ssc_connect_irq (long intr, long irq) +{ + ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT); +} + +void +ia64_ctl_trace (long on) +{ + ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE); +} + +void __init +hpsim_setup (char **cmdline_p) +{ + ROOT_DEV = to_kdev_t(0x0801); /* default to first SCSI drive */ + + register_console (&hpsim_cons); +} diff --git a/arch/ia64/hp/hpsim_ssc.h b/arch/ia64/hp/hpsim_ssc.h new file mode 100644 index 000000000..bfa390627 --- /dev/null +++ b/arch/ia64/hp/hpsim_ssc.h @@ -0,0 +1,36 @@ +/* + * Platform dependent support for HP simulator. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com> + */ +#ifndef _IA64_PLATFORM_HPSIM_SSC_H +#define _IA64_PLATFORM_HPSIM_SSC_H + +/* Simulator system calls: */ + +#define SSC_CONSOLE_INIT 20 +#define SSC_GETCHAR 21 +#define SSC_PUTCHAR 31 +#define SSC_CONNECT_INTERRUPT 58 +#define SSC_GENERATE_INTERRUPT 59 +#define SSC_SET_PERIODIC_INTERRUPT 60 +#define SSC_GET_RTC 65 +#define SSC_EXIT 66 +#define SSC_LOAD_SYMBOLS 69 +#define SSC_GET_TOD 74 +#define SSC_CTL_TRACE 76 + +#define SSC_NETDEV_PROBE 100 +#define SSC_NETDEV_SEND 101 +#define SSC_NETDEV_RECV 102 +#define SSC_NETDEV_ATTACH 103 +#define SSC_NETDEV_DETACH 104 + +/* + * Simulator system call. + */ +extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr); + +#endif /* _IA64_PLATFORM_HPSIM_SSC_H */ diff --git a/arch/ia64/ia32/Makefile b/arch/ia64/ia32/Makefile new file mode 100644 index 000000000..674a6eb6e --- /dev/null +++ b/arch/ia64/ia32/Makefile @@ -0,0 +1,17 @@ +# +# Makefile for the ia32 kernel emulation subsystem. +# + +.S.s: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $< +.S.o: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $< + +all: ia32.o + +O_TARGET := ia32.o +O_OBJS := ia32_entry.o ia32_signal.o sys_ia32.o ia32_support.o binfmt_elf32.o + +clean:: + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c new file mode 100644 index 000000000..685d85b20 --- /dev/null +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -0,0 +1,180 @@ +/* + * IA-32 ELF support. + * + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + */ +#include <linux/config.h> +#include <linux/posix_types.h> + +#include <asm/signal.h> +#include <asm/ia32.h> + +#define CONFIG_BINFMT_ELF32 + +/* Override some function names */ +#undef start_thread +#define start_thread ia32_start_thread +#define init_elf_binfmt init_elf32_binfmt + +#undef CONFIG_BINFMT_ELF +#ifdef CONFIG_BINFMT_ELF32 +# define CONFIG_BINFMT_ELF CONFIG_BINFMT_ELF32 +#endif + +#undef CONFIG_BINFMT_ELF_MODULE +#ifdef CONFIG_BINFMT_ELF32_MODULE +# define CONFIG_BINFMT_ELF_MODULE CONFIG_BINFMT_ELF32_MODULE +#endif + +void ia64_elf32_init(struct pt_regs *regs); +#define ELF_PLAT_INIT(_r) ia64_elf32_init(_r) + +#define setup_arg_pages(bprm) ia32_setup_arg_pages(bprm) + +/* Ugly but avoids duplication */ +#include "../../../fs/binfmt_elf.c" + +/* Global descriptor table */ +unsigned long *ia32_gdt_table, *ia32_tss; + +struct page * +put_shared_page(struct task_struct * tsk, struct page *page, unsigned long address) +{ + pgd_t * pgd; + pmd_t * pmd; + pte_t * pte; + + if (page_count(page) != 1) + printk("mem_map disagrees with %p at %08lx\n", page, address); + pgd = pgd_offset(tsk->mm, address); + pmd = pmd_alloc(pgd, address); + if (!pmd) { + __free_page(page); + oom(tsk); + return 0; + } + pte = pte_alloc(pmd, address); + if (!pte) { + __free_page(page); + oom(tsk); + return 0; + } + if (!pte_none(*pte)) { + pte_ERROR(*pte); + __free_page(page); + return 0; + } + flush_page_to_ram(page); + set_pte(pte, pte_mkwrite(mk_pte(page, PAGE_SHARED))); + /* no need for flush_tlb */ + return page; +} + +void ia64_elf32_init(struct pt_regs *regs) +{ + int nr; + + put_shared_page(current, mem_map + MAP_NR(ia32_gdt_table), IA32_PAGE_OFFSET); + if (PAGE_SHIFT <= IA32_PAGE_SHIFT) + put_shared_page(current, mem_map + MAP_NR(ia32_tss), IA32_PAGE_OFFSET + PAGE_SIZE); + + nr = smp_processor_id(); + + /* Do all the IA-32 setup here */ + + /* CS descriptor */ + __asm__("mov ar.csd = %0" : /* no outputs */ + : "r" IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0xBL, 1L, + 3L, 1L, 1L, 1L)); + /* SS descriptor */ + __asm__("mov ar.ssd = %0" : /* no outputs */ + : "r" IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, + 3L, 1L, 1L, 1L)); + /* EFLAGS */ + __asm__("mov ar.eflag = %0" : /* no outputs */ : "r" (IA32_EFLAG)); + + /* Control registers */ + __asm__("mov ar.cflg = %0" + : /* no outputs */ + : "r" (((ulong) IA32_CR4 << 32) | IA32_CR0)); + __asm__("mov ar.fsr = %0" + : /* no outputs */ + : "r" ((ulong)IA32_FSR_DEFAULT)); + __asm__("mov ar.fcr = %0" + : /* no outputs */ + : "r" ((ulong)IA32_FCR_DEFAULT)); + __asm__("mov ar.fir = r0"); + __asm__("mov ar.fdr = r0"); + /* TSS */ + __asm__("mov ar.k1 = %0" + : /* no outputs */ + : "r" IA64_SEG_DESCRIPTOR(IA32_PAGE_OFFSET + PAGE_SIZE, + 0x1FFFL, 0xBL, 1L, + 3L, 1L, 1L, 1L)); + + /* Get the segment selectors right */ + regs->r16 = (__USER_DS << 16) | (__USER_DS); /* ES == DS, GS, FS are zero */ + regs->r17 = (_TSS(nr) << 48) | (_LDT(nr) << 32) + | (__USER_DS << 16) | __USER_CS; + + /* Setup other segment descriptors - ESD, DSD, FSD, GSD */ + regs->r24 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L); + regs->r27 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L); + regs->r28 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L); + regs->r29 = IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0x3L, 1L, 3L, 1L, 1L, 1L); + + /* Setup the LDT and GDT */ + regs->r30 = ia32_gdt_table[_LDT(nr)]; + regs->r31 = IA64_SEG_DESCRIPTOR(0xc0000000L, 0x400L, 0x3L, 1L, 3L, + 1L, 1L, 1L); + + /* Clear psr.ac */ + regs->cr_ipsr &= ~IA64_PSR_AC; + + regs->loadrs = 0; +} + +#undef STACK_TOP +#define STACK_TOP ((IA32_PAGE_OFFSET/3) * 2) + +int ia32_setup_arg_pages(struct linux_binprm *bprm) +{ + unsigned long stack_base; + struct vm_area_struct *mpnt; + int i; + + stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE; + + bprm->p += stack_base; + if (bprm->loader) + bprm->loader += stack_base; + bprm->exec += stack_base; + + mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (!mpnt) + return -ENOMEM; + + { + mpnt->vm_mm = current->mm; + mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; + mpnt->vm_end = STACK_TOP; + mpnt->vm_page_prot = PAGE_COPY; + mpnt->vm_flags = VM_STACK_FLAGS; + mpnt->vm_ops = NULL; + mpnt->vm_pgoff = 0; + mpnt->vm_file = NULL; + mpnt->vm_private_data = 0; + insert_vm_struct(current->mm, mpnt); + current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + } + + for (i = 0 ; i < MAX_ARG_PAGES ; i++) { + if (bprm->page[i]) { + current->mm->rss++; + put_dirty_page(current,bprm->page[i],stack_base); + } + stack_base += PAGE_SIZE; + } + + return 0; +} diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S new file mode 100644 index 000000000..1342e64f0 --- /dev/null +++ b/arch/ia64/ia32/ia32_entry.S @@ -0,0 +1,255 @@ +#include <asm/offsets.h> +#include <asm/signal.h> + + .global ia32_ret_from_syscall + .proc ia64_ret_from_syscall +ia32_ret_from_syscall: + cmp.ge p6,p7=r8,r0 // syscall executed successfully? + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8 + ;; + st8 [r2]=r8 // store return value in slot for r8 + br.cond.sptk.few ia64_leave_kernel + + // + // Invoke a system call, but do some tracing before and after the call. + // We MUST preserve the current register frame throughout this routine + // because some system calls (such as ia64_execve) directly + // manipulate ar.pfs. + // + // Input: + // r15 = syscall number + // b6 = syscall entry point + // + .global ia32_trace_syscall + .proc ia32_trace_syscall +ia32_trace_syscall: + br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch syscall args +.Lret4: br.call.sptk.few rp=b6 // do the syscall +.Lret5: cmp.lt p6,p0=r8,r0 // syscall failed? + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8 + ;; + st8.spill [r2]=r8 // store return value in slot for r8 + br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch return value +.Lret6: br.cond.sptk.many ia64_leave_kernel // rp MUST be != ia64_leave_kernel! + + .endp ia32_trace_syscall + + .align 16 + .global sys32_fork + .proc sys32_fork +sys32_fork: + alloc r16=ar.pfs,2,2,3,0;; + movl r28=1f + mov loc1=rp + br.cond.sptk.many save_switch_stack +1: + mov loc0=r16 // save ar.pfs across do_fork + adds out2=IA64_SWITCH_STACK_SIZE+16,sp + adds r2=IA64_SWITCH_STACK_SIZE+IA64_PT_REGS_R12_OFFSET+16,sp + mov out0=SIGCHLD // out0 = clone_flags + ;; + ld8 out1=[r2] // fetch usp from pt_regs.r12 + br.call.sptk.few rp=do_fork +.ret1: + mov ar.pfs=loc0 + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov rp=loc1 + ;; + br.ret.sptk.many rp + .endp sys32_fork + + .rodata + .align 8 + .globl ia32_syscall_table +ia32_syscall_table: + data8 sys_ni_syscall /* 0 - old "setup(" system call*/ + data8 sys_exit + data8 sys32_fork + data8 sys_read + data8 sys_write + data8 sys_open /* 5 */ + data8 sys_close + data8 sys32_waitpid + data8 sys_creat + data8 sys_link + data8 sys_unlink /* 10 */ + data8 sys32_execve + data8 sys_chdir + data8 sys_ni_syscall /* sys_time is not supported on ia64 */ + data8 sys_mknod + data8 sys_chmod /* 15 */ + data8 sys_lchown + data8 sys_ni_syscall /* old break syscall holder */ + data8 sys_ni_syscall + data8 sys_lseek + data8 sys_getpid /* 20 */ + data8 sys_mount + data8 sys_oldumount + data8 sys_setuid + data8 sys_getuid + data8 sys_ni_syscall /* sys_stime is not supported on IA64 */ /* 25 */ + data8 sys_ptrace + data8 sys32_alarm + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall /* 30 */ + data8 sys_ni_syscall /* old stty syscall holder */ + data8 sys_ni_syscall /* old gtty syscall holder */ + data8 sys_access + data8 sys_nice + data8 sys_ni_syscall /* 35 */ /* old ftime syscall holder */ + data8 sys_sync + data8 sys_kill + data8 sys_rename + data8 sys_mkdir + data8 sys_rmdir /* 40 */ + data8 sys_dup + data8 sys32_pipe + data8 sys_times + data8 sys_ni_syscall /* old prof syscall holder */ + data8 sys_brk /* 45 */ + data8 sys_setgid + data8 sys_getgid + data8 sys_ni_syscall + data8 sys_geteuid + data8 sys_getegid /* 50 */ + data8 sys_acct + data8 sys_umount /* recycled never used phys( */ + data8 sys_ni_syscall /* old lock syscall holder */ + data8 sys_ioctl + data8 sys_fcntl /* 55 */ + data8 sys_ni_syscall /* old mpx syscall holder */ + data8 sys_setpgid + data8 sys_ni_syscall /* old ulimit syscall holder */ + data8 sys_ni_syscall + data8 sys_umask /* 60 */ + data8 sys_chroot + data8 sys_ustat + data8 sys_dup2 + data8 sys_getppid + data8 sys_getpgrp /* 65 */ + data8 sys_setsid + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_ni_syscall + data8 sys_setreuid /* 70 */ + data8 sys_setregid + data8 sys_ni_syscall + data8 sys_sigpending + data8 sys_sethostname + data8 sys32_setrlimit /* 75 */ + data8 sys32_getrlimit + data8 sys_getrusage + data8 sys32_gettimeofday + data8 sys32_settimeofday + data8 sys_getgroups /* 80 */ + data8 sys_setgroups + data8 sys_ni_syscall + data8 sys_symlink + data8 sys_ni_syscall + data8 sys_readlink /* 85 */ + data8 sys_uselib + data8 sys_swapon + data8 sys_reboot + data8 sys32_readdir + data8 sys32_mmap /* 90 */ + data8 sys_munmap + data8 sys_truncate + data8 sys_ftruncate + data8 sys_fchmod + data8 sys_fchown /* 95 */ + data8 sys_getpriority + data8 sys_setpriority + data8 sys_ni_syscall /* old profil syscall holder */ + data8 sys32_statfs + data8 sys32_fstatfs /* 100 */ + data8 sys_ioperm + data8 sys32_socketcall + data8 sys_syslog + data8 sys32_setitimer + data8 sys32_getitimer /* 105 */ + data8 sys32_newstat + data8 sys32_newlstat + data8 sys32_newfstat + data8 sys_ni_syscall + data8 sys_iopl /* 110 */ + data8 sys_vhangup + data8 sys_ni_syscall // used to be sys_idle + data8 sys_ni_syscall + data8 sys32_wait4 + data8 sys_swapoff /* 115 */ + data8 sys_sysinfo + data8 sys32_ipc + data8 sys_fsync + data8 sys32_sigreturn + data8 sys_clone /* 120 */ + data8 sys_setdomainname + data8 sys_newuname + data8 sys_modify_ldt + data8 sys_adjtimex + data8 sys32_mprotect /* 125 */ + data8 sys_sigprocmask + data8 sys_create_module + data8 sys_init_module + data8 sys_delete_module + data8 sys_get_kernel_syms /* 130 */ + data8 sys_quotactl + data8 sys_getpgid + data8 sys_fchdir + data8 sys_bdflush + data8 sys_sysfs /* 135 */ + data8 sys_personality + data8 sys_ni_syscall /* for afs_syscall */ + data8 sys_setfsuid + data8 sys_setfsgid + data8 sys_llseek /* 140 */ + data8 sys32_getdents + data8 sys32_select + data8 sys_flock + data8 sys_msync + data8 sys32_readv /* 145 */ + data8 sys32_writev + data8 sys_getsid + data8 sys_fdatasync + data8 sys_sysctl + data8 sys_mlock /* 150 */ + data8 sys_munlock + data8 sys_mlockall + data8 sys_munlockall + data8 sys_sched_setparam + data8 sys_sched_getparam /* 155 */ + data8 sys_sched_setscheduler + data8 sys_sched_getscheduler + data8 sys_sched_yield + data8 sys_sched_get_priority_max + data8 sys_sched_get_priority_min /* 160 */ + data8 sys_sched_rr_get_interval + data8 sys32_nanosleep + data8 sys_mremap + data8 sys_setresuid + data8 sys_getresuid /* 165 */ + data8 sys_vm86 + data8 sys_query_module + data8 sys_poll + data8 sys_nfsservctl + data8 sys_setresgid /* 170 */ + data8 sys_getresgid + data8 sys_prctl + data8 sys32_rt_sigreturn + data8 sys32_rt_sigaction + data8 sys32_rt_sigprocmask /* 175 */ + data8 sys_rt_sigpending + data8 sys_rt_sigtimedwait + data8 sys_rt_sigqueueinfo + data8 sys_rt_sigsuspend + data8 sys_pread /* 180 */ + data8 sys_pwrite + data8 sys_chown + data8 sys_getcwd + data8 sys_capget + data8 sys_capset /* 185 */ + data8 sys_sigaltstack + data8 sys_sendfile + data8 sys_ni_syscall /* streams1 */ + data8 sys_ni_syscall /* streams2 */ + data8 sys32_vfork /* 190 */ diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c new file mode 100644 index 000000000..ed443ee66 --- /dev/null +++ b/arch/ia64/ia32/ia32_signal.c @@ -0,0 +1,412 @@ +/* + * IA32 Architecture-specific signal handling support. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 2000 VA Linux Co + * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> + * + * Derived from i386 and Alpha versions. + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/wait.h> + +#include <asm/uaccess.h> +#include <asm/rse.h> +#include <asm/sigcontext.h> +#include <asm/segment.h> +#include <asm/ia32.h> + +#define DEBUG_SIG 0 +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + + +struct sigframe_ia32 +{ + int pretcode; + int sig; + struct sigcontext_ia32 sc; + struct _fpstate_ia32 fpstate; + unsigned int extramask[_IA32_NSIG_WORDS-1]; + char retcode[8]; +}; + +struct rt_sigframe_ia32 +{ + int pretcode; + int sig; + int pinfo; + int puc; + struct siginfo info; + struct ucontext_ia32 uc; + struct _fpstate_ia32 fpstate; + char retcode[8]; +}; + +static int +setup_sigcontext_ia32(struct sigcontext_ia32 *sc, struct _fpstate_ia32 *fpstate, + struct pt_regs *regs, unsigned long mask) +{ + int err = 0; + + err |= __put_user((regs->r16 >> 32) & 0xffff , (unsigned int *)&sc->fs); + err |= __put_user((regs->r16 >> 48) & 0xffff , (unsigned int *)&sc->gs); + + err |= __put_user((regs->r16 >> 56) & 0xffff, (unsigned int *)&sc->es); + err |= __put_user(regs->r16 & 0xffff, (unsigned int *)&sc->ds); + err |= __put_user(regs->r15, &sc->edi); + err |= __put_user(regs->r14, &sc->esi); + err |= __put_user(regs->r13, &sc->ebp); + err |= __put_user(regs->r12, &sc->esp); + err |= __put_user(regs->r11, &sc->ebx); + err |= __put_user(regs->r10, &sc->edx); + err |= __put_user(regs->r9, &sc->ecx); + err |= __put_user(regs->r8, &sc->eax); +#if 0 + err |= __put_user(current->tss.trap_no, &sc->trapno); + err |= __put_user(current->tss.error_code, &sc->err); +#endif + err |= __put_user(regs->cr_iip, &sc->eip); + err |= __put_user(regs->r17 & 0xffff, (unsigned int *)&sc->cs); +#if 0 + err |= __put_user(regs->eflags, &sc->eflags); +#endif + + err |= __put_user(regs->r12, &sc->esp_at_signal); + err |= __put_user((regs->r17 >> 16) & 0xffff, (unsigned int *)&sc->ss); + +#if 0 + tmp = save_i387(fpstate); + if (tmp < 0) + err = 1; + else + err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); + + /* non-iBCS2 extensions.. */ + err |= __put_user(mask, &sc->oldmask); + err |= __put_user(current->tss.cr2, &sc->cr2); +#endif + + return err; +} + +static int +restore_sigcontext_ia32(struct pt_regs *regs, struct sigcontext_ia32 *sc, int *peax) +{ + unsigned int err = 0; + +#define COPY(ia64x, ia32x) err |= __get_user(regs->ia64x, &sc->ia32x) + +#define copyseg_gs(tmp) (regs->r16 |= (unsigned long) tmp << 48) +#define copyseg_fs(tmp) (regs->r16 |= (unsigned long) tmp << 32) +#define copyseg_cs(tmp) (regs->r17 |= tmp) +#define copyseg_ss(tmp) (regs->r17 |= (unsigned long) tmp << 16) +#define copyseg_es(tmp) (regs->r16 |= (unsigned long) tmp << 16) +#define copyseg_ds(tmp) (regs->r16 |= tmp) + +#define COPY_SEG(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + copyseg_##seg(tmp); } + +#define COPY_SEG_STRICT(seg) \ + { unsigned short tmp; \ + err |= __get_user(tmp, &sc->seg); \ + copyseg_##seg(tmp|3); } + + /* To make COPY_SEGs easier, we zero r16, r17 */ + regs->r16 = 0; + regs->r17 = 0; + + COPY_SEG(gs); + COPY_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); + COPY(r15, edi); + COPY(r14, esi); + COPY(r13, ebp); + COPY(r12, esp); + COPY(r11, ebx); + COPY(r10, edx); + COPY(r9, ecx); + COPY(cr_iip, eip); + COPY_SEG_STRICT(cs); + COPY_SEG_STRICT(ss); +#if 0 + { + unsigned int tmpflags; + err |= __get_user(tmpflags, &sc->eflags); + /* XXX: Change this to ar.eflags */ + regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); + regs->orig_eax = -1; /* disable syscall checks */ + } + + { + struct _fpstate * buf; + err |= __get_user(buf, &sc->fpstate); + if (buf) { + if (verify_area(VERIFY_READ, buf, sizeof(*buf))) + goto badframe; + err |= restore_i387(buf); + } + } +#endif + + err |= __get_user(*peax, &sc->eax); + return err; + +#if 0 +badframe: + return 1; +#endif + +} + +/* + * Determine which stack to use.. + */ +static inline void * +get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +{ + unsigned long esp; + unsigned int xss; + + /* Default to using normal stack */ + esp = regs->r12; + xss = regs->r16 >> 16; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (! on_sig_stack(esp)) + esp = current->sas_ss_sp + current->sas_ss_size; + } + /* Legacy stack switching not supported */ + + return (void *)((esp - frame_size) & -8ul); +} + +static void +setup_frame_ia32(int sig, struct k_sigaction *ka, sigset_t *set, + struct pt_regs * regs) +{ + struct sigframe_ia32 *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? (int)(current->exec_domain->signal_invmap[sig]) + : sig), + &frame->sig); + + err |= setup_sigcontext_ia32(&frame->sc, &frame->fpstate, regs, set->sig[0]); + + if (_NSIG_WORDS > 1) { + err |= __copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask)); + } + + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is popl %eax ; movl $,%eax ; int $0x80 */ + err |= __put_user(0xb858, (short *)(frame->retcode+0)); +#define __IA32_NR_sigreturn 119 + err |= __put_user(__IA32_NR_sigreturn & 0xffff, (short *)(frame->retcode+2)); + err |= __put_user(__IA32_NR_sigreturn >> 16, (short *)(frame->retcode+4)); + err |= __put_user(0x80cd, (short *)(frame->retcode+6)); + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->r12 = (unsigned long) frame; + regs->cr_iip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + regs->r16 = (__USER_DS << 16) | (__USER_DS); /* ES == DS, GS, FS are zero */ + regs->r17 = (__USER_DS << 16) | __USER_CS; + +#if 0 + regs->eflags &= ~TF_MASK; +#endif + +#if 1 + printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%x\n", + current->comm, current->pid, frame, regs->cr_iip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +static void +setup_rt_frame_ia32(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs * regs) +{ + struct rt_sigframe_ia32 *frame; + int err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err |= __put_user((current->exec_domain + && current->exec_domain->signal_invmap + && sig < 32 + ? current->exec_domain->signal_invmap[sig] + : sig), + &frame->sig); + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= __copy_to_user(&frame->info, info, sizeof(*info)); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->r12), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext_ia32(&frame->uc.uc_mcontext, &frame->fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + err |= __put_user(frame->retcode, &frame->pretcode); + /* This is movl $,%eax ; int $0x80 */ + err |= __put_user(0xb8, (char *)(frame->retcode+0)); +#define __IA32_NR_rt_sigreturn 173 + err |= __put_user(__IA32_NR_rt_sigreturn, (int *)(frame->retcode+1)); + err |= __put_user(0x80cd, (short *)(frame->retcode+5)); + + if (err) + goto give_sigsegv; + + /* Set up registers for signal handler */ + regs->r12 = (unsigned long) frame; + regs->cr_iip = (unsigned long) ka->sa.sa_handler; + + set_fs(USER_DS); + + regs->r16 = (__USER_DS << 16) | (__USER_DS); /* ES == DS, GS, FS are zero */ + regs->r17 = (__USER_DS << 16) | __USER_CS; + +#if 0 + regs->eflags &= ~TF_MASK; +#endif + +#if 1 + printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%x\n", + current->comm, current->pid, frame, regs->cr_iip, frame->pretcode); +#endif + + return; + +give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); +} + +long +ia32_setup_frame1 (int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + /* Set up the stack frame */ + if (ka->sa.sa_flags & SA_SIGINFO) + setup_rt_frame_ia32(sig, ka, info, set, regs); + else + setup_frame_ia32(sig, ka, set, regs); + +} + +asmlinkage int +sys32_sigreturn(int arg1, int arg2, int arg3, int arg4, int arg5, unsigned long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + struct sigframe_ia32 *frame = (struct sigframe_ia32 *)(regs->r12- 8); + sigset_t set; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_IA32_NSIG_WORDS > 1 + && __copy_from_user((((char *) &set.sig) + 4), + &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = (sigset_t) set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext_ia32(regs, &frame->sc, &eax)) + goto badframe; + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + +asmlinkage int +sys32_rt_sigreturn(int arg1, int arg2, int arg3, int arg4, int arg5, unsigned long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + struct rt_sigframe_ia32 *frame = (struct rt_sigframe_ia32 *)(regs->r12 - 4); + sigset_t set; + stack_t st; + int eax; + + if (verify_area(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext_ia32(regs, &frame->uc.uc_mcontext, &eax)) + goto badframe; + + if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) + goto badframe; + /* It is more difficult to avoid calling this function than to + call it and ignore errors. */ + do_sigaltstack(&st, NULL, regs->r12); + + return eax; + +badframe: + force_sig(SIGSEGV, current); + return 0; +} + diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c new file mode 100644 index 000000000..dcf61e8e4 --- /dev/null +++ b/arch/ia64/ia32/ia32_support.c @@ -0,0 +1,61 @@ +/* + * IA32 helper functions + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/sched.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/ia32.h> + +extern unsigned long *ia32_gdt_table, *ia32_tss; + +extern void die_if_kernel (char *str, struct pt_regs *regs, long err); + +/* + * Setup IA32 GDT and TSS + */ +void +ia32_gdt_init(void) +{ + unsigned long gdt_and_tss_page; + + /* allocate two IA-32 pages of memory: */ + gdt_and_tss_page = __get_free_pages(GFP_KERNEL, + (IA32_PAGE_SHIFT < PAGE_SHIFT) + ? 0 : (IA32_PAGE_SHIFT + 1) - PAGE_SHIFT); + ia32_gdt_table = (unsigned long *) gdt_and_tss_page; + ia32_tss = (unsigned long *) (gdt_and_tss_page + IA32_PAGE_SIZE); + + /* Zero the gdt and tss */ + memset((void *) gdt_and_tss_page, 0, 2*IA32_PAGE_SIZE); + + /* CS descriptor in IA-32 format */ + ia32_gdt_table[4] = IA32_SEG_DESCRIPTOR(0L, 0xBFFFFFFFL, 0xBL, 1L, + 3L, 1L, 1L, 1L, 1L); + + /* DS descriptor in IA-32 format */ + ia32_gdt_table[5] = IA32_SEG_DESCRIPTOR(0L, 0xBFFFFFFFL, 0x3L, 1L, + 3L, 1L, 1L, 1L, 1L); +} + +/* + * Handle bad IA32 interrupt via syscall + */ +void +ia32_bad_interrupt (unsigned long int_num, struct pt_regs *regs) +{ + siginfo_t siginfo; + + die_if_kernel("Bad IA-32 interrupt", regs, int_num); + + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = int_num; /* XXX is it legal to abuse si_errno like this? */ + siginfo.si_code = TRAP_BRKPT; + force_sig_info(SIGTRAP, &siginfo, current); +} + diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c new file mode 100644 index 000000000..7b4c4995e --- /dev/null +++ b/arch/ia64/ia32/sys_ia32.c @@ -0,0 +1,4309 @@ +/* + * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on + * sys_sparc32 + * + * Copyright (C) 2000 VA Linux Co + * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/signal.h> +#include <linux/utime.h> +#include <linux/resource.h> +#include <linux/times.h> +#include <linux/utsname.h> +#include <linux/timex.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/shm.h> +#include <linux/malloc.h> +#include <linux/uio.h> +#include <linux/nfs_fs.h> +#include <linux/smb_fs.h> +#include <linux/smb_mount.h> +#include <linux/ncp_fs.h> +#include <linux/quota.h> +#include <linux/module.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/nfsd/syscall.h> +#include <linux/poll.h> +#include <linux/personality.h> +#include <linux/stat.h> +#include <linux/ipc.h> + +#include <asm/types.h> +#include <asm/uaccess.h> +#include <asm/semaphore.h> +#include <asm/ipc.h> + +#include <net/scm.h> +#include <net/sock.h> +#include <asm/ia32.h> + +#define A(__x) ((unsigned long)(__x)) +#define AA(__x) ((unsigned long)(__x)) + +/* + * This is trivial, and on the face of it looks like it + * could equally well be done in user mode. + * + * Not so, for quite unobvious reasons - register pressure. + * In user mode vfork() cannot have a stack frame, and if + * done by calling the "clone()" system call directly, you + * do not have enough call-clobbered registers to hold all + * the information you need. + */ +asmlinkage int sys32_vfork( +int dummy0, +int dummy1, +int dummy2, +int dummy3, +int dummy4, +int dummy5, +int dummy6, +int dummy7, +int stack) +{ + struct pt_regs *regs = (struct pt_regs *)&stack; + + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->r12, regs); +} + +static int +nargs(unsigned int arg, char **ap) +{ + char *ptr; + int n, err; + + n = 0; + do { + if (err = get_user(ptr, (int *)arg)) + return(err); + if (ap) + *ap++ = ptr; + arg += sizeof(unsigned int); + n++; + } while (ptr); + return(n - 1); +} + +asmlinkage long +sys32_execve( +char *filename, +unsigned int argv, +unsigned int envp, +int dummy3, +int dummy4, +int dummy5, +int dummy6, +int dummy7, +int stack) +{ + struct pt_regs *regs = (struct pt_regs *)&stack; + char **av, **ae; + int na, ne, r, len; + + na = nargs(argv, NULL); + ne = nargs(envp, NULL); + len = (na + ne + 2) * sizeof(*av); + /* + * kmalloc won't work because the `sys_exec' code will attempt + * to do a `get_user' on the arg list and `get_user' will fail + * on a kernel address (simplifies `get_user'). Instead we + * do an mmap to get a user address. Note that since a successful + * `execve' frees all current memory we only have to do an + * `munmap' if the `execve' failes. + */ + down(¤t->mm->mmap_sem); + lock_kernel(); + + av = do_mmap_pgoff(0, NULL, len, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0); + + unlock_kernel(); + up(¤t->mm->mmap_sem); + + if (IS_ERR(av)) + return(av); + ae = av + na + 1; + av[na] = (char *)0; + ae[ne] = (char *)0; + (void)nargs(argv, av); + (void)nargs(envp, ae); + r = sys_execve(filename, av, ae, regs); + if (IS_ERR(r)) + sys_munmap(av, len); + return(r); +} + +static inline int +putstat(struct stat32 *ubuf, struct stat *kbuf) +{ + int err; + + err = put_user (kbuf->st_dev, &ubuf->st_dev); + err |= __put_user (kbuf->st_ino, &ubuf->st_ino); + err |= __put_user (kbuf->st_mode, &ubuf->st_mode); + err |= __put_user (kbuf->st_nlink, &ubuf->st_nlink); + err |= __put_user (kbuf->st_uid, &ubuf->st_uid); + err |= __put_user (kbuf->st_gid, &ubuf->st_gid); + err |= __put_user (kbuf->st_rdev, &ubuf->st_rdev); + err |= __put_user (kbuf->st_size, &ubuf->st_size); + err |= __put_user (kbuf->st_atime, &ubuf->st_atime); + err |= __put_user (kbuf->st_mtime, &ubuf->st_mtime); + err |= __put_user (kbuf->st_ctime, &ubuf->st_ctime); + err |= __put_user (kbuf->st_blksize, &ubuf->st_blksize); + err |= __put_user (kbuf->st_blocks, &ubuf->st_blocks); + return err; +} + +extern asmlinkage int sys_newstat(char * filename, struct stat * statbuf); + +asmlinkage int +sys32_newstat(char * filename, struct stat32 *statbuf) +{ + int ret; + struct stat s; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_newstat(filename, &s); + set_fs (old_fs); + if (putstat (statbuf, &s)) + return -EFAULT; + return ret; +} + +extern asmlinkage int sys_newlstat(char * filename, struct stat * statbuf); + +asmlinkage int +sys32_newlstat(char * filename, struct stat32 *statbuf) +{ + int ret; + struct stat s; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_newlstat(filename, &s); + set_fs (old_fs); + if (putstat (statbuf, &s)) + return -EFAULT; + return ret; +} + +extern asmlinkage int sys_newfstat(unsigned int fd, struct stat * statbuf); + +asmlinkage int +sys32_newfstat(unsigned int fd, struct stat32 *statbuf) +{ + int ret; + struct stat s; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_newfstat(fd, &s); + set_fs (old_fs); + if (putstat (statbuf, &s)) + return -EFAULT; + return ret; +} + +#define ALIGN4K(a) (((a) + 0xfff) & ~0xfff) +#define OFFSET4K(a) ((a) & 0xfff) + +unsigned long +do_mmap_fake(struct file *file, unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, unsigned long off) +{ + struct inode *inode; + void *front, *back; + unsigned long baddr; + int r; + char c; + + if (OFFSET4K(addr) || OFFSET4K(off)) + return -EINVAL; + if (prot & PROT_WRITE) + prot |= PROT_EXEC; + front = NULL; + back = NULL; + if ((baddr = (addr & PAGE_MASK)) != addr && get_user(c, (char *)baddr) == 0) { + front = kmalloc(addr - baddr, GFP_KERNEL); + memcpy(front, (void *)baddr, addr - baddr); + } + if ((addr + len) & ~PAGE_MASK && get_user(c, (char *)(addr + len)) == 0) { + back = kmalloc(PAGE_SIZE - ((addr + len) & ~PAGE_MASK), GFP_KERNEL); + memcpy(back, addr + len, PAGE_SIZE - ((addr + len) & ~PAGE_MASK)); + } + if ((r = do_mmap(0, baddr, len + (addr - baddr), prot, flags | MAP_ANONYMOUS, 0)) < 0) + return(r); + if (back) { + memcpy(addr + len, back, PAGE_SIZE - ((addr + len) & ~PAGE_MASK)); + kfree(back); + } + if (front) { + memcpy((void *)baddr, front, addr - baddr); + kfree(front); + } + if (flags & MAP_ANONYMOUS) { + memset(addr, 0, len); + return(addr); + } + if (!file) + return -EINVAL; + inode = file->f_dentry->d_inode; + if (!inode->i_op || !inode->i_op->default_file_ops) + return -EINVAL; + if (!file->f_op->read) + return -EINVAL; + if (file->f_op->llseek) { + if (file->f_op->llseek(file,off,0) != off) + return -EINVAL; + } else + file->f_pos = off; + r = file->f_op->read(file, (char *)addr, len, &file->f_pos); + return (r < 0) ? -EINVAL : addr; +} + +/* + * Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct { + unsigned int addr; + unsigned int len; + unsigned int prot; + unsigned int flags; + unsigned int fd; + unsigned int offset; +}; + +asmlinkage int +sys32_mmap(struct mmap_arg_struct *arg) +{ + int error = -EFAULT; + struct file * file = NULL; + struct mmap_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + + down(¤t->mm->mmap_sem); + lock_kernel(); + if (!(a.flags & MAP_ANONYMOUS)) { + error = -EBADF; + file = fget(a.fd); + if (!file) + goto out; + } + a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + + if ((a.flags & MAP_FIXED) && ((a.addr & ~PAGE_MASK) || (a.offset & ~PAGE_MASK))) { + unlock_kernel(); + up(¤t->mm->mmap_sem); + error = do_mmap_fake(file, a.addr, a.len, a.prot, a.flags, a.offset); + down(¤t->mm->mmap_sem); + lock_kernel(); + } else + error = do_mmap(file, a.addr, a.len, a.prot, a.flags, a.offset); + if (file) + fput(file); +out: + unlock_kernel(); + up(¤t->mm->mmap_sem); + return error; +} + +asmlinkage long +sys32_pipe(int *fd) +{ + int retval; + int fds[2]; + + lock_kernel(); + retval = do_pipe(fds); + if (retval) + goto out; + if (copy_to_user(fd, fds, sizeof(fds))) + retval = -EFAULT; + out: + unlock_kernel(); + return retval; +} + +asmlinkage long +sys32_mprotect(unsigned long start, size_t len, unsigned long prot) +{ + + if (prot == 0) + return(0); + len += start & ~PAGE_MASK; + if ((start & ~PAGE_MASK) && (prot & PROT_WRITE)) + prot |= PROT_EXEC; + return(sys_mprotect(start & PAGE_MASK, len & PAGE_MASK, prot)); +} + +asmlinkage int +sys32_rt_sigaction(int sig, struct sigaction32 *act, + struct sigaction32 *oact, unsigned int sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + sigset32_t set32; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset32_t)) + return -EINVAL; + + if (act) { + ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler); + ret |= __copy_from_user(&set32, &act->sa_mask, + sizeof(sigset32_t)); + switch (_NSIG_WORDS) { + case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] + | (((long)set32.sig[7]) << 32); + case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] + | (((long)set32.sig[5]) << 32); + case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] + | (((long)set32.sig[3]) << 32); + case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] + | (((long)set32.sig[1]) << 32); + } + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + + if (ret) + return -EFAULT; + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + switch (_NSIG_WORDS) { + case 4: + set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); + set32.sig[6] = old_ka.sa.sa_mask.sig[3]; + case 3: + set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); + set32.sig[4] = old_ka.sa.sa_mask.sig[2]; + case 2: + set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); + set32.sig[2] = old_ka.sa.sa_mask.sig[1]; + case 1: + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; + } + ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler); + ret |= __copy_to_user(&oact->sa_mask, &set32, + sizeof(sigset32_t)); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + } + + return ret; +} + + +extern asmlinkage int sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset, + size_t sigsetsize); + +asmlinkage int +sys32_rt_sigprocmask(int how, sigset32_t *set, sigset32_t *oset, + unsigned int sigsetsize) +{ + sigset_t s; + sigset32_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + if (set) { + if (copy_from_user (&s32, set, sizeof(sigset32_t))) + return -EFAULT; + switch (_NSIG_WORDS) { + case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); + case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); + case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); + case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + } + } + set_fs (KERNEL_DS); + ret = sys_rt_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL, + sigsetsize); + set_fs (old_fs); + if (ret) return ret; + if (oset) { + switch (_NSIG_WORDS) { + case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; + case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; + case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; + case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; + } + if (copy_to_user (oset, &s32, sizeof(sigset32_t))) + return -EFAULT; + } + return 0; +} + +static inline int +put_statfs (struct statfs32 *ubuf, struct statfs *kbuf) +{ + int err; + + err = put_user (kbuf->f_type, &ubuf->f_type); + err |= __put_user (kbuf->f_bsize, &ubuf->f_bsize); + err |= __put_user (kbuf->f_blocks, &ubuf->f_blocks); + err |= __put_user (kbuf->f_bfree, &ubuf->f_bfree); + err |= __put_user (kbuf->f_bavail, &ubuf->f_bavail); + err |= __put_user (kbuf->f_files, &ubuf->f_files); + err |= __put_user (kbuf->f_ffree, &ubuf->f_ffree); + err |= __put_user (kbuf->f_namelen, &ubuf->f_namelen); + err |= __put_user (kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]); + err |= __put_user (kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]); + return err; +} + +extern asmlinkage int sys_statfs(const char * path, struct statfs * buf); + +asmlinkage int +sys32_statfs(const char * path, struct statfs32 *buf) +{ + int ret; + struct statfs s; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_statfs((const char *)path, &s); + set_fs (old_fs); + if (put_statfs(buf, &s)) + return -EFAULT; + return ret; +} + +extern asmlinkage int sys_fstatfs(unsigned int fd, struct statfs * buf); + +asmlinkage int +sys32_fstatfs(unsigned int fd, struct statfs32 *buf) +{ + int ret; + struct statfs s; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_fstatfs(fd, &s); + set_fs (old_fs); + if (put_statfs(buf, &s)) + return -EFAULT; + return ret; +} + +struct timeval32 +{ + int tv_sec, tv_usec; +}; + +struct itimerval32 +{ + struct timeval32 it_interval; + struct timeval32 it_value; +}; + +static inline long +get_tv32(struct timeval *o, struct timeval32 *i) +{ + return (!access_ok(VERIFY_READ, i, sizeof(*i)) || + (__get_user(o->tv_sec, &i->tv_sec) | + __get_user(o->tv_usec, &i->tv_usec))); + return ENOSYS; +} + +static inline long +put_tv32(struct timeval32 *o, struct timeval *i) +{ + return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || + (__put_user(i->tv_sec, &o->tv_sec) | + __put_user(i->tv_usec, &o->tv_usec))); +} + +static inline long +get_it32(struct itimerval *o, struct itimerval32 *i) +{ + return (!access_ok(VERIFY_READ, i, sizeof(*i)) || + (__get_user(o->it_interval.tv_sec, &i->it_interval.tv_sec) | + __get_user(o->it_interval.tv_usec, &i->it_interval.tv_usec) | + __get_user(o->it_value.tv_sec, &i->it_value.tv_sec) | + __get_user(o->it_value.tv_usec, &i->it_value.tv_usec))); + return ENOSYS; +} + +static inline long +put_it32(struct itimerval32 *o, struct itimerval *i) +{ + return (!access_ok(VERIFY_WRITE, i, sizeof(*i)) || + (__put_user(i->it_interval.tv_sec, &o->it_interval.tv_sec) | + __put_user(i->it_interval.tv_usec, &o->it_interval.tv_usec) | + __put_user(i->it_value.tv_sec, &o->it_value.tv_sec) | + __put_user(i->it_value.tv_usec, &o->it_value.tv_usec))); + return ENOSYS; +} + +extern int do_getitimer(int which, struct itimerval *value); + +asmlinkage int +sys32_getitimer(int which, struct itimerval32 *it) +{ + struct itimerval kit; + int error; + + error = do_getitimer(which, &kit); + if (!error && put_it32(it, &kit)) + error = -EFAULT; + + return error; +} + +extern int do_setitimer(int which, struct itimerval *, struct itimerval *); + +asmlinkage int +sys32_setitimer(int which, struct itimerval32 *in, struct itimerval32 *out) +{ + struct itimerval kin, kout; + int error; + + if (in) { + if (get_it32(&kin, in)) + return -EFAULT; + } else + memset(&kin, 0, sizeof(kin)); + + error = do_setitimer(which, &kin, out ? &kout : NULL); + if (error || !out) + return error; + if (put_it32(out, &kout)) + return -EFAULT; + + return 0; + +} +asmlinkage unsigned long +sys32_alarm(unsigned int seconds) +{ + struct itimerval it_new, it_old; + unsigned int oldalarm; + + it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; + it_new.it_value.tv_sec = seconds; + it_new.it_value.tv_usec = 0; + do_setitimer(ITIMER_REAL, &it_new, &it_old); + oldalarm = it_old.it_value.tv_sec; + /* ehhh.. We can't return 0 if we have an alarm pending.. */ + /* And we'd better return too much than too little anyway */ + if (it_old.it_value.tv_usec) + oldalarm++; + return oldalarm; +} + +/* Translations due to time_t size differences. Which affects all + sorts of things, like timeval and itimerval. */ + +extern struct timezone sys_tz; +extern int do_sys_settimeofday(struct timeval *tv, struct timezone *tz); + +asmlinkage int +sys32_gettimeofday(struct timeval32 *tv, struct timezone *tz) +{ + if (tv) { + struct timeval ktv; + do_gettimeofday(&ktv); + if (put_tv32(tv, &ktv)) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + return 0; +} + +asmlinkage int +sys32_settimeofday(struct timeval32 *tv, struct timezone *tz) +{ + struct timeval ktv; + struct timezone ktz; + + if (tv) { + if (get_tv32(&ktv, tv)) + return -EFAULT; + } + if (tz) { + if (copy_from_user(&ktz, tz, sizeof(ktz))) + return -EFAULT; + } + + return do_sys_settimeofday(tv ? &ktv : NULL, tz ? &ktz : NULL); +} + +struct dirent32 { + unsigned int d_ino; + unsigned int d_off; + unsigned short d_reclen; + char d_name[NAME_MAX + 1]; +}; + +static void +xlate_dirent(void *dirent, long n) +{ + long off; + struct dirent *dirp; + struct dirent32 *dirp32; + + off = 0; + while (off < n) { + dirp = (struct dirent *)(dirent + off); + off += dirp->d_reclen; + dirp32 = (struct dirent32 *)dirp; + dirp32->d_ino = dirp->d_ino; + dirp32->d_off = (unsigned int)dirp->d_off; + dirp32->d_reclen = dirp->d_reclen; + strncpy(dirp32->d_name, dirp->d_name, dirp->d_reclen - ((3 * 4) + 2)); + } + return; +} + +asmlinkage long +sys32_getdents(unsigned int fd, void * dirent, unsigned int count) +{ + long n; + + if ((n = sys_getdents(fd, dirent, count)) < 0) + return(n); + xlate_dirent(dirent, n); + return(n); +} + +asmlinkage int +sys32_readdir(unsigned int fd, void * dirent, unsigned int count) +{ + int n; + struct dirent *dirp; + + if ((n = old_readdir(fd, dirent, count)) < 0) + return(n); + dirp = (struct dirent *)dirent; + xlate_dirent(dirent, dirp->d_reclen); + return(n); +} + +/* + * We can actually return ERESTARTSYS instead of EINTR, but I'd + * like to be certain this leads to no problems. So I return + * EINTR just for safety. + * + * Update: ERESTARTSYS breaks at least the xview clock binary, so + * I'm trying ERESTARTNOHAND which restart only when you want to. + */ +#define MAX_SELECT_SECONDS \ + ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) +#define ROUND_UP(x,y) (((x)+(y)-1)/(y)) + +asmlinkage int +sys32_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval32 *tvp32) +{ + fd_set_bits fds; + char *bits; + long timeout; + int ret, size; + + timeout = MAX_SCHEDULE_TIMEOUT; + if (tvp32) { + time_t sec, usec; + + get_user(sec, &tvp32->tv_sec); + get_user(usec, &tvp32->tv_usec); + + ret = -EINVAL; + if (sec < 0 || usec < 0) + goto out_nofds; + + if ((unsigned long) sec < MAX_SELECT_SECONDS) { + timeout = ROUND_UP(usec, 1000000/HZ); + timeout += sec * (unsigned long) HZ; + } + } + + ret = -EINVAL; + if (n < 0) + goto out_nofds; + + if (n > current->files->max_fdset) + n = current->files->max_fdset; + + /* + * We need 6 bitmaps (in/out/ex for both incoming and outgoing), + * since we used fdset we need to allocate memory in units of + * long-words. + */ + ret = -ENOMEM; + size = FDS_BYTES(n); + bits = kmalloc(6 * size, GFP_KERNEL); + if (!bits) + goto out_nofds; + fds.in = (unsigned long *) bits; + fds.out = (unsigned long *) (bits + size); + fds.ex = (unsigned long *) (bits + 2*size); + fds.res_in = (unsigned long *) (bits + 3*size); + fds.res_out = (unsigned long *) (bits + 4*size); + fds.res_ex = (unsigned long *) (bits + 5*size); + + if ((ret = get_fd_set(n, inp, fds.in)) || + (ret = get_fd_set(n, outp, fds.out)) || + (ret = get_fd_set(n, exp, fds.ex))) + goto out; + zero_fd_set(n, fds.res_in); + zero_fd_set(n, fds.res_out); + zero_fd_set(n, fds.res_ex); + + ret = do_select(n, &fds, &timeout); + + if (tvp32 && !(current->personality & STICKY_TIMEOUTS)) { + time_t sec = 0, usec = 0; + if (timeout) { + sec = timeout / HZ; + usec = timeout % HZ; + usec *= (1000000/HZ); + } + put_user(sec, (int *)&tvp32->tv_sec); + put_user(usec, (int *)&tvp32->tv_usec); + } + + if (ret < 0) + goto out; + if (!ret) { + ret = -ERESTARTNOHAND; + if (signal_pending(current)) + goto out; + ret = 0; + } + + set_fd_set(n, inp, fds.res_in); + set_fd_set(n, outp, fds.res_out); + set_fd_set(n, exp, fds.res_ex); + +out: + kfree(bits); +out_nofds: + return ret; +} + +struct rusage32 { + struct timeval32 ru_utime; + struct timeval32 ru_stime; + int ru_maxrss; + int ru_ixrss; + int ru_idrss; + int ru_isrss; + int ru_minflt; + int ru_majflt; + int ru_nswap; + int ru_inblock; + int ru_oublock; + int ru_msgsnd; + int ru_msgrcv; + int ru_nsignals; + int ru_nvcsw; + int ru_nivcsw; +}; + +static int +put_rusage (struct rusage32 *ru, struct rusage *r) +{ + int err; + + err = put_user (r->ru_utime.tv_sec, &ru->ru_utime.tv_sec); + err |= __put_user (r->ru_utime.tv_usec, &ru->ru_utime.tv_usec); + err |= __put_user (r->ru_stime.tv_sec, &ru->ru_stime.tv_sec); + err |= __put_user (r->ru_stime.tv_usec, &ru->ru_stime.tv_usec); + err |= __put_user (r->ru_maxrss, &ru->ru_maxrss); + err |= __put_user (r->ru_ixrss, &ru->ru_ixrss); + err |= __put_user (r->ru_idrss, &ru->ru_idrss); + err |= __put_user (r->ru_isrss, &ru->ru_isrss); + err |= __put_user (r->ru_minflt, &ru->ru_minflt); + err |= __put_user (r->ru_majflt, &ru->ru_majflt); + err |= __put_user (r->ru_nswap, &ru->ru_nswap); + err |= __put_user (r->ru_inblock, &ru->ru_inblock); + err |= __put_user (r->ru_oublock, &ru->ru_oublock); + err |= __put_user (r->ru_msgsnd, &ru->ru_msgsnd); + err |= __put_user (r->ru_msgrcv, &ru->ru_msgrcv); + err |= __put_user (r->ru_nsignals, &ru->ru_nsignals); + err |= __put_user (r->ru_nvcsw, &ru->ru_nvcsw); + err |= __put_user (r->ru_nivcsw, &ru->ru_nivcsw); + return err; +} + +extern asmlinkage int sys_wait4(pid_t pid,unsigned int * stat_addr, + int options, struct rusage * ru); + +asmlinkage int +sys32_wait4(__kernel_pid_t32 pid, unsigned int *stat_addr, int options, + struct rusage32 *ru) +{ + if (!ru) + return sys_wait4(pid, stat_addr, options, NULL); + else { + struct rusage r; + int ret; + unsigned int status; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_wait4(pid, stat_addr ? &status : NULL, options, &r); + set_fs (old_fs); + if (put_rusage (ru, &r)) return -EFAULT; + if (stat_addr && put_user (status, stat_addr)) + return -EFAULT; + return ret; + } +} + +asmlinkage int +sys32_waitpid(__kernel_pid_t32 pid, unsigned int *stat_addr, int options) +{ + return sys32_wait4(pid, stat_addr, options, NULL); +} + +struct timespec32 { + int tv_sec; + int tv_nsec; +}; + +extern asmlinkage int sys_nanosleep(struct timespec *rqtp, + struct timespec *rmtp); + +asmlinkage int +sys32_nanosleep(struct timespec32 *rqtp, struct timespec32 *rmtp) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs (); + + if (get_user (t.tv_sec, &rqtp->tv_sec) || + __get_user (t.tv_nsec, &rqtp->tv_nsec)) + return -EFAULT; + set_fs (KERNEL_DS); + ret = sys_nanosleep(&t, rmtp ? &t : NULL); + set_fs (old_fs); + if (rmtp && ret == -EINTR) { + if (__put_user (t.tv_sec, &rmtp->tv_sec) || + __put_user (t.tv_nsec, &rmtp->tv_nsec)) + return -EFAULT; + } + return ret; +} + +struct iovec32 { unsigned int iov_base; int iov_len; }; + +typedef ssize_t (*IO_fn_t)(struct file *, char *, size_t, loff_t *); + +static long +do_readv_writev32(int type, struct file *file, const struct iovec32 *vector, + u32 count) +{ + unsigned long tot_len; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov=iovstack, *ivp; + struct inode *inode; + long retval, i; + IO_fn_t fn; + + /* First get the "struct iovec" from user memory and + * verify all the pointers + */ + if (!count) + return 0; + if(verify_area(VERIFY_READ, vector, sizeof(struct iovec32)*count)) + return -EFAULT; + if (count > UIO_MAXIOV) + return -EINVAL; + if (count > UIO_FASTIOV) { + iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); + if (!iov) + return -ENOMEM; + } + + tot_len = 0; + i = count; + ivp = iov; + while(i > 0) { + u32 len; + u32 buf; + + __get_user(len, &vector->iov_len); + __get_user(buf, &vector->iov_base); + tot_len += len; + ivp->iov_base = (void *)A(buf); + ivp->iov_len = (__kernel_size_t) len; + vector++; + ivp++; + i--; + } + + inode = file->f_dentry->d_inode; + /* VERIFY_WRITE actually means a read, as we write to user space */ + retval = locks_verify_area((type == VERIFY_WRITE + ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), + inode, file, file->f_pos, tot_len); + if (retval) { + if (iov != iovstack) + kfree(iov); + return retval; + } + + /* Then do the actual IO. Note that sockets need to be handled + * specially as they have atomicity guarantees and can handle + * iovec's natively + */ + if (inode->i_sock) { + int err; + err = sock_readv_writev(type, inode, file, iov, count, tot_len); + if (iov != iovstack) + kfree(iov); + return err; + } + + if (!file->f_op) { + if (iov != iovstack) + kfree(iov); + return -EINVAL; + } + /* VERIFY_WRITE actually means a read, as we write to user space */ + fn = file->f_op->read; + if (type == VERIFY_READ) + fn = (IO_fn_t) file->f_op->write; + ivp = iov; + while (count > 0) { + void * base; + int len, nr; + + base = ivp->iov_base; + len = ivp->iov_len; + ivp++; + count--; + nr = fn(file, base, len, &file->f_pos); + if (nr < 0) { + if (retval) + break; + retval = nr; + break; + } + retval += nr; + if (nr != len) + break; + } + if (iov != iovstack) + kfree(iov); + return retval; +} + +asmlinkage long +sys32_readv(int fd, struct iovec32 *vector, u32 count) +{ + struct file *file; + long ret = -EBADF; + + lock_kernel(); + file = fget(fd); + if(!file) + goto bad_file; + + if(!(file->f_mode & 1)) + goto out; + + ret = do_readv_writev32(VERIFY_WRITE, file, + vector, count); +out: + fput(file); +bad_file: + unlock_kernel(); + return ret; +} + +asmlinkage long +sys32_writev(int fd, struct iovec32 *vector, u32 count) +{ + struct file *file; + int ret = -EBADF; + + lock_kernel(); + file = fget(fd); + if(!file) + goto bad_file; + + if(!(file->f_mode & 2)) + goto out; + + down(&file->f_dentry->d_inode->i_sem); + ret = do_readv_writev32(VERIFY_READ, file, + vector, count); + up(&file->f_dentry->d_inode->i_sem); +out: + fput(file); +bad_file: + unlock_kernel(); + return ret; +} + +#define RLIM_INFINITY32 0x7fffffff +#define RESOURCE32(x) ((x > RLIM_INFINITY32) ? RLIM_INFINITY32 : x) + +struct rlimit32 { + int rlim_cur; + int rlim_max; +}; + +extern asmlinkage int sys_getrlimit(unsigned int resource, struct rlimit *rlim); + +asmlinkage int +sys32_getrlimit(unsigned int resource, struct rlimit32 *rlim) +{ + struct rlimit r; + int ret; + mm_segment_t old_fs = get_fs (); + + set_fs (KERNEL_DS); + ret = sys_getrlimit(resource, &r); + set_fs (old_fs); + if (!ret) { + ret = put_user (RESOURCE32(r.rlim_cur), &rlim->rlim_cur); + ret |= __put_user (RESOURCE32(r.rlim_max), &rlim->rlim_max); + } + return ret; +} + +extern asmlinkage int sys_setrlimit(unsigned int resource, struct rlimit *rlim); + +asmlinkage int +sys32_setrlimit(unsigned int resource, struct rlimit32 *rlim) +{ + struct rlimit r; + int ret; + mm_segment_t old_fs = get_fs (); + + if (resource >= RLIM_NLIMITS) return -EINVAL; + if (get_user (r.rlim_cur, &rlim->rlim_cur) || + __get_user (r.rlim_max, &rlim->rlim_max)) + return -EFAULT; + if (r.rlim_cur == RLIM_INFINITY32) + r.rlim_cur = RLIM_INFINITY; + if (r.rlim_max == RLIM_INFINITY32) + r.rlim_max = RLIM_INFINITY; + set_fs (KERNEL_DS); + ret = sys_setrlimit(resource, &r); + set_fs (old_fs); + return ret; +} + +/* Argument list sizes for sys_socketcall */ +#define AL(x) ((x) * sizeof(u32)) +static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), + AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; +#undef AL + +extern asmlinkage int sys_bind(int fd, struct sockaddr *umyaddr, int addrlen); +extern asmlinkage int sys_connect(int fd, struct sockaddr *uservaddr, + int addrlen); +extern asmlinkage int sys_accept(int fd, struct sockaddr *upeer_sockaddr, + int *upeer_addrlen); +extern asmlinkage int sys_getsockname(int fd, struct sockaddr *usockaddr, + int *usockaddr_len); +extern asmlinkage int sys_getpeername(int fd, struct sockaddr *usockaddr, + int *usockaddr_len); +extern asmlinkage int sys_send(int fd, void *buff, size_t len, unsigned flags); +extern asmlinkage int sys_sendto(int fd, u32 buff, __kernel_size_t32 len, + unsigned flags, u32 addr, int addr_len); +extern asmlinkage int sys_recv(int fd, void *ubuf, size_t size, unsigned flags); +extern asmlinkage int sys_recvfrom(int fd, u32 ubuf, __kernel_size_t32 size, + unsigned flags, u32 addr, u32 addr_len); +extern asmlinkage int sys_setsockopt(int fd, int level, int optname, + char *optval, int optlen); +extern asmlinkage int sys_getsockopt(int fd, int level, int optname, + u32 optval, u32 optlen); + +extern asmlinkage int sys_socket(int family, int type, int protocol); +extern asmlinkage int sys_socketpair(int family, int type, int protocol, + int usockvec[2]); +extern asmlinkage int sys_shutdown(int fd, int how); +extern asmlinkage int sys_listen(int fd, int backlog); + +asmlinkage int sys32_socketcall(int call, u32 *args) +{ + int i, ret; + u32 a[6]; + u32 a0,a1; + + if (call<SYS_SOCKET||call>SYS_RECVMSG) + return -EINVAL; + if (copy_from_user(a, args, nas[call])) + return -EFAULT; + a0=a[0]; + a1=a[1]; + + switch(call) + { + case SYS_SOCKET: + ret = sys_socket(a0, a1, a[2]); + break; + case SYS_BIND: + ret = sys_bind(a0, (struct sockaddr *)A(a1), a[2]); + break; + case SYS_CONNECT: + ret = sys_connect(a0, (struct sockaddr *)A(a1), a[2]); + break; + case SYS_LISTEN: + ret = sys_listen(a0, a1); + break; + case SYS_ACCEPT: + ret = sys_accept(a0, (struct sockaddr *)A(a1), + (int *)A(a[2])); + break; + case SYS_GETSOCKNAME: + ret = sys_getsockname(a0, (struct sockaddr *)A(a1), + (int *)A(a[2])); + break; + case SYS_GETPEERNAME: + ret = sys_getpeername(a0, (struct sockaddr *)A(a1), + (int *)A(a[2])); + break; + case SYS_SOCKETPAIR: + ret = sys_socketpair(a0, a1, a[2], (int *)A(a[3])); + break; + case SYS_SEND: + ret = sys_send(a0, (void *)A(a1), a[2], a[3]); + break; + case SYS_SENDTO: + ret = sys_sendto(a0, a1, a[2], a[3], a[4], a[5]); + break; + case SYS_RECV: + ret = sys_recv(a0, (void *)A(a1), a[2], a[3]); + break; + case SYS_RECVFROM: + ret = sys_recvfrom(a0, a1, a[2], a[3], a[4], a[5]); + break; + case SYS_SHUTDOWN: + ret = sys_shutdown(a0,a1); + break; + case SYS_SETSOCKOPT: + ret = sys_setsockopt(a0, a1, a[2], (char *)A(a[3]), + a[4]); + break; + case SYS_GETSOCKOPT: + ret = sys_getsockopt(a0, a1, a[2], a[3], a[4]); + break; + case SYS_SENDMSG: + ret = sys32_sendmsg(a0, (struct msghdr32 *)A(a1), + a[2]); + break; + case SYS_RECVMSG: + ret = sys32_recvmsg(a0, (struct msghdr32 *)A(a1), + a[2]); + break; + default: + ret = EINVAL; + break; + } + return ret; +} + +/* + * Declare the IA32 version of the msghdr + */ + +struct msghdr32 { + unsigned int msg_name; /* Socket name */ + int msg_namelen; /* Length of name */ + unsigned int msg_iov; /* Data blocks */ + unsigned int msg_iovlen; /* Number of blocks */ + unsigned int msg_control; /* Per protocol magic (eg BSD file descriptor passing) */ + unsigned int msg_controllen; /* Length of cmsg list */ + unsigned msg_flags; +}; + +static inline int +shape_msg(struct msghdr *mp, struct msghdr32 *mp32) +{ + unsigned int i; + + if (!access_ok(VERIFY_READ, mp32, sizeof(*mp32))) + return(-EFAULT); + __get_user(i, &mp32->msg_name); + mp->msg_name = (void *)i; + __get_user(mp->msg_namelen, &mp32->msg_namelen); + __get_user(i, &mp32->msg_iov); + mp->msg_iov = (struct iov *)i; + __get_user(mp->msg_iovlen, &mp32->msg_iovlen); + __get_user(i, &mp32->msg_control); + mp->msg_control = (void *)i; + __get_user(mp->msg_controllen, &mp32->msg_controllen); + __get_user(mp->msg_flags, &mp32->msg_flags); + return(0); +} + +/* + * Verify & re-shape IA32 iovec. The caller must ensure that the + * iovec is big enough to hold the re-shaped message iovec. + * + * Save time not doing verify_area. copy_*_user will make this work + * in any case. + * + * Don't need to check the total size for overflow (cf net/core/iovec.c), + * 32-bit sizes can't overflow a 64-bit count. + */ + +static inline int +verify_iovec32(struct msghdr *m, struct iovec *iov, char *address, int mode) +{ + int size, err, ct; + struct iovec32 *iov32; + + if(m->msg_namelen) + { + if(mode==VERIFY_READ) + { + err=move_addr_to_kernel(m->msg_name, m->msg_namelen, address); + if(err<0) + goto out; + } + + m->msg_name = address; + } else + m->msg_name = NULL; + + err = -EFAULT; + size = m->msg_iovlen * sizeof(struct iovec32); + if (copy_from_user(iov, m->msg_iov, size)) + goto out; + m->msg_iov=iov; + + err = 0; + iov32 = (struct iovec32 *)iov; + for (ct = m->msg_iovlen; ct-- > 0; ) { + iov[ct].iov_len = (__kernel_size_t)iov32[ct].iov_len; + iov[ct].iov_base = (void *)iov32[ct].iov_base; + err += iov[ct].iov_len; + } +out: + return err; +} + +extern __inline__ void +sockfd_put(struct socket *sock) +{ + fput(sock->file); +} + +/* XXX This really belongs in some header file... -DaveM */ +#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - + 16 for IP, 16 for IPX, + 24 for IPv6, + about 80 for AX.25 */ + +extern struct socket *sockfd_lookup(int fd, int *err); + +/* + * BSD sendmsg interface + */ + +asmlinkage int sys32_sendmsg(int fd, struct msghdr32 *msg, unsigned flags) +{ + struct socket *sock; + char address[MAX_SOCK_ADDR]; + struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; + unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */ + unsigned char *ctl_buf = ctl; + struct msghdr msg_sys; + int err, ctl_len, iov_size, total_len; + + err = -EFAULT; + if (shape_msg(&msg_sys, msg)) + goto out; + + sock = sockfd_lookup(fd, &err); + if (!sock) + goto out; + + /* do not move before msg_sys is valid */ + err = -EINVAL; + if (msg_sys.msg_iovlen > UIO_MAXIOV) + goto out_put; + + /* Check whether to allocate the iovec area*/ + err = -ENOMEM; + iov_size = msg_sys.msg_iovlen * sizeof(struct iovec32); + if (msg_sys.msg_iovlen > UIO_FASTIOV) { + iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); + if (!iov) + goto out_put; + } + + /* This will also move the address data into kernel space */ + err = verify_iovec32(&msg_sys, iov, address, VERIFY_READ); + if (err < 0) + goto out_freeiov; + total_len = err; + + err = -ENOBUFS; + + if (msg_sys.msg_controllen > INT_MAX) + goto out_freeiov; + ctl_len = msg_sys.msg_controllen; + if (ctl_len) + { + if (ctl_len > sizeof(ctl)) + { + err = -ENOBUFS; + ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); + if (ctl_buf == NULL) + goto out_freeiov; + } + err = -EFAULT; + if (copy_from_user(ctl_buf, msg_sys.msg_control, ctl_len)) + goto out_freectl; + msg_sys.msg_control = ctl_buf; + } + msg_sys.msg_flags = flags; + + if (sock->file->f_flags & O_NONBLOCK) + msg_sys.msg_flags |= MSG_DONTWAIT; + err = sock_sendmsg(sock, &msg_sys, total_len); + +out_freectl: + if (ctl_buf != ctl) + sock_kfree_s(sock->sk, ctl_buf, ctl_len); +out_freeiov: + if (iov != iovstack) + sock_kfree_s(sock->sk, iov, iov_size); +out_put: + sockfd_put(sock); +out: + return err; +} + +/* + * BSD recvmsg interface + */ + +asmlinkage int sys32_recvmsg(int fd, struct msghdr32 *msg, unsigned int flags) +{ + struct socket *sock; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov=iovstack; + struct msghdr msg_sys; + unsigned long cmsg_ptr; + int err, iov_size, total_len, len; + + /* kernel mode address */ + char addr[MAX_SOCK_ADDR]; + + /* user mode address pointers */ + struct sockaddr *uaddr; + int *uaddr_len; + + err=-EFAULT; + if (shape_msg(&msg_sys, msg)) + goto out; + + sock = sockfd_lookup(fd, &err); + if (!sock) + goto out; + + err = -EINVAL; + if (msg_sys.msg_iovlen > UIO_MAXIOV) + goto out_put; + + /* Check whether to allocate the iovec area*/ + err = -ENOMEM; + iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); + if (msg_sys.msg_iovlen > UIO_FASTIOV) { + iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); + if (!iov) + goto out_put; + } + + /* + * Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) + */ + + uaddr = msg_sys.msg_name; + uaddr_len = &msg->msg_namelen; + err = verify_iovec32(&msg_sys, iov, addr, VERIFY_WRITE); + if (err < 0) + goto out_freeiov; + total_len=err; + + cmsg_ptr = (unsigned long)msg_sys.msg_control; + msg_sys.msg_flags = 0; + + if (sock->file->f_flags & O_NONBLOCK) + flags |= MSG_DONTWAIT; + err = sock_recvmsg(sock, &msg_sys, total_len, flags); + if (err < 0) + goto out_freeiov; + len = err; + + if (uaddr != NULL) { + err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len); + if (err < 0) + goto out_freeiov; + } + err = __put_user(msg_sys.msg_flags, &msg->msg_flags); + if (err) + goto out_freeiov; + err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, + &msg->msg_controllen); + if (err) + goto out_freeiov; + err = len; + +out_freeiov: + if (iov != iovstack) + sock_kfree_s(sock->sk, iov, iov_size); +out_put: + sockfd_put(sock); +out: + return err; +} + +/* + * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation.. + * + * This is really horribly ugly. + */ + +struct msgbuf32 { s32 mtype; char mtext[1]; }; + +struct ipc_perm32 +{ + key_t key; + __kernel_uid_t32 uid; + __kernel_gid_t32 gid; + __kernel_uid_t32 cuid; + __kernel_gid_t32 cgid; + __kernel_mode_t32 mode; + unsigned short seq; +}; + +struct semid_ds32 { + struct ipc_perm32 sem_perm; /* permissions .. see ipc.h */ + __kernel_time_t32 sem_otime; /* last semop time */ + __kernel_time_t32 sem_ctime; /* last change time */ + u32 sem_base; /* ptr to first semaphore in array */ + u32 sem_pending; /* pending operations to be processed */ + u32 sem_pending_last; /* last pending operation */ + u32 undo; /* undo requests on this array */ + unsigned short sem_nsems; /* no. of semaphores in array */ +}; + +struct msqid_ds32 +{ + struct ipc_perm32 msg_perm; + u32 msg_first; + u32 msg_last; + __kernel_time_t32 msg_stime; + __kernel_time_t32 msg_rtime; + __kernel_time_t32 msg_ctime; + u32 wwait; + u32 rwait; + unsigned short msg_cbytes; + unsigned short msg_qnum; + unsigned short msg_qbytes; + __kernel_ipc_pid_t32 msg_lspid; + __kernel_ipc_pid_t32 msg_lrpid; +}; + +struct shmid_ds32 { + struct ipc_perm32 shm_perm; + int shm_segsz; + __kernel_time_t32 shm_atime; + __kernel_time_t32 shm_dtime; + __kernel_time_t32 shm_ctime; + __kernel_ipc_pid_t32 shm_cpid; + __kernel_ipc_pid_t32 shm_lpid; + unsigned short shm_nattch; +}; + +#define IPCOP_MASK(__x) (1UL << (__x)) + +static int +do_sys32_semctl(int first, int second, int third, void *uptr) +{ + union semun fourth; + u32 pad; + int err = -EINVAL; + + if (!uptr) + goto out; + err = -EFAULT; + if (get_user (pad, (u32 *)uptr)) + goto out; + if(third == SETVAL) + fourth.val = (int)pad; + else + fourth.__pad = (void *)A(pad); + if (IPCOP_MASK (third) & + (IPCOP_MASK (IPC_INFO) | IPCOP_MASK (SEM_INFO) | + IPCOP_MASK (GETVAL) | IPCOP_MASK (GETPID) | + IPCOP_MASK (GETNCNT) | IPCOP_MASK (GETZCNT) | + IPCOP_MASK (GETALL) | IPCOP_MASK (SETALL) | + IPCOP_MASK (IPC_RMID))) { + err = sys_semctl (first, second, third, fourth); + } else { + struct semid_ds s; + struct semid_ds32 *usp = (struct semid_ds32 *)A(pad); + mm_segment_t old_fs; + int need_back_translation; + + if (third == IPC_SET) { + err = get_user (s.sem_perm.uid, &usp->sem_perm.uid); + err |= __get_user(s.sem_perm.gid, &usp->sem_perm.gid); + err |= __get_user(s.sem_perm.mode, &usp->sem_perm.mode); + if (err) + goto out; + fourth.__pad = &s; + } + need_back_translation = + (IPCOP_MASK (third) & + (IPCOP_MASK (SEM_STAT) | IPCOP_MASK (IPC_STAT))) != 0; + if (need_back_translation) + fourth.__pad = &s; + old_fs = get_fs (); + set_fs (KERNEL_DS); + err = sys_semctl (first, second, third, fourth); + set_fs (old_fs); + if (need_back_translation) { + int err2 = put_user(s.sem_perm.key, &usp->sem_perm.key); + err2 |= __put_user(s.sem_perm.uid, &usp->sem_perm.uid); + err2 |= __put_user(s.sem_perm.gid, &usp->sem_perm.gid); + err2 |= __put_user(s.sem_perm.cuid, + &usp->sem_perm.cuid); + err2 |= __put_user (s.sem_perm.cgid, + &usp->sem_perm.cgid); + err2 |= __put_user (s.sem_perm.mode, + &usp->sem_perm.mode); + err2 |= __put_user (s.sem_perm.seq, &usp->sem_perm.seq); + err2 |= __put_user (s.sem_otime, &usp->sem_otime); + err2 |= __put_user (s.sem_ctime, &usp->sem_ctime); + err2 |= __put_user (s.sem_nsems, &usp->sem_nsems); + if (err2) err = -EFAULT; + } + } +out: + return err; +} + +static int +do_sys32_msgsnd (int first, int second, int third, void *uptr) +{ + struct msgbuf *p = kmalloc (second + sizeof (struct msgbuf) + + 4, GFP_USER); + struct msgbuf32 *up = (struct msgbuf32 *)uptr; + mm_segment_t old_fs; + int err; + + if (!p) + return -ENOMEM; + err = get_user (p->mtype, &up->mtype); + err |= __copy_from_user (p->mtext, &up->mtext, second); + if (err) + goto out; + old_fs = get_fs (); + set_fs (KERNEL_DS); + err = sys_msgsnd (first, p, second, third); + set_fs (old_fs); +out: + kfree (p); + return err; +} + +static int +do_sys32_msgrcv (int first, int second, int msgtyp, int third, + int version, void *uptr) +{ + struct msgbuf32 *up; + struct msgbuf *p; + mm_segment_t old_fs; + int err; + + if (!version) { + struct ipc_kludge *uipck = (struct ipc_kludge *)uptr; + struct ipc_kludge ipck; + + err = -EINVAL; + if (!uptr) + goto out; + err = -EFAULT; + if (copy_from_user (&ipck, uipck, sizeof (struct ipc_kludge))) + goto out; + uptr = (void *)A(ipck.msgp); + msgtyp = ipck.msgtyp; + } + err = -ENOMEM; + p = kmalloc (second + sizeof (struct msgbuf) + 4, GFP_USER); + if (!p) + goto out; + old_fs = get_fs (); + set_fs (KERNEL_DS); + err = sys_msgrcv (first, p, second + 4, msgtyp, third); + set_fs (old_fs); + if (err < 0) + goto free_then_out; + up = (struct msgbuf32 *)uptr; + if (put_user (p->mtype, &up->mtype) || + __copy_to_user (&up->mtext, p->mtext, err)) + err = -EFAULT; +free_then_out: + kfree (p); +out: + return err; +} + +static int +do_sys32_msgctl (int first, int second, void *uptr) +{ + int err; + + if (IPCOP_MASK (second) & + (IPCOP_MASK (IPC_INFO) | IPCOP_MASK (MSG_INFO) | + IPCOP_MASK (IPC_RMID))) { + err = sys_msgctl (first, second, (struct msqid_ds *)uptr); + } else { + struct msqid_ds m; + struct msqid_ds32 *up = (struct msqid_ds32 *)uptr; + mm_segment_t old_fs; + + if (second == IPC_SET) { + err = get_user (m.msg_perm.uid, &up->msg_perm.uid); + err |= __get_user (m.msg_perm.gid, &up->msg_perm.gid); + err |= __get_user (m.msg_perm.mode, &up->msg_perm.mode); + err |= __get_user (m.msg_qbytes, &up->msg_qbytes); + if (err) + goto out; + } + old_fs = get_fs (); + set_fs (KERNEL_DS); + err = sys_msgctl (first, second, &m); + set_fs (old_fs); + if (IPCOP_MASK (second) & + (IPCOP_MASK (MSG_STAT) | IPCOP_MASK (IPC_STAT))) { + int err2 = put_user (m.msg_perm.key, &up->msg_perm.key); + err2 |= __put_user(m.msg_perm.uid, &up->msg_perm.uid); + err2 |= __put_user(m.msg_perm.gid, &up->msg_perm.gid); + err2 |= __put_user(m.msg_perm.cuid, &up->msg_perm.cuid); + err2 |= __put_user(m.msg_perm.cgid, &up->msg_perm.cgid); + err2 |= __put_user(m.msg_perm.mode, &up->msg_perm.mode); + err2 |= __put_user(m.msg_perm.seq, &up->msg_perm.seq); + err2 |= __put_user(m.msg_stime, &up->msg_stime); + err2 |= __put_user(m.msg_rtime, &up->msg_rtime); + err2 |= __put_user(m.msg_ctime, &up->msg_ctime); + err2 |= __put_user(m.msg_cbytes, &up->msg_cbytes); + err2 |= __put_user(m.msg_qnum, &up->msg_qnum); + err2 |= __put_user(m.msg_qbytes, &up->msg_qbytes); + err2 |= __put_user(m.msg_lspid, &up->msg_lspid); + err2 |= __put_user(m.msg_lrpid, &up->msg_lrpid); + if (err2) + err = -EFAULT; + } + } + +out: + return err; +} + +static int +do_sys32_shmat (int first, int second, int third, int version, void *uptr) +{ + unsigned long raddr; + u32 *uaddr = (u32 *)A((u32)third); + int err = -EINVAL; + + if (version == 1) + goto out; + err = sys_shmat (first, uptr, second, &raddr); + if (err) + goto out; + err = put_user (raddr, uaddr); +out: + return err; +} + +static int +do_sys32_shmctl (int first, int second, void *uptr) +{ + int err; + + if (IPCOP_MASK (second) & + (IPCOP_MASK (IPC_INFO) | IPCOP_MASK (SHM_LOCK) + | IPCOP_MASK (SHM_UNLOCK) | IPCOP_MASK (IPC_RMID))) { + err = sys_shmctl (first, second, (struct shmid_ds *)uptr); + } else { + struct shmid_ds s; + struct shmid_ds32 *up = (struct shmid_ds32 *)uptr; + mm_segment_t old_fs; + + if (second == IPC_SET) { + err = get_user (s.shm_perm.uid, &up->shm_perm.uid); + err |= __get_user (s.shm_perm.gid, &up->shm_perm.gid); + err |= __get_user (s.shm_perm.mode, &up->shm_perm.mode); + if (err) + goto out; + } + old_fs = get_fs (); + set_fs (KERNEL_DS); + err = sys_shmctl (first, second, &s); + set_fs (old_fs); + if (err < 0) + goto out; + + /* Mask it even in this case so it becomes a CSE. */ + if (second == SHM_INFO) { + struct shm_info32 { + int used_ids; + u32 shm_tot, shm_rss, shm_swp; + u32 swap_attempts, swap_successes; + } *uip = (struct shm_info32 *)uptr; + struct shm_info *kp = (struct shm_info *)&s; + int err2 = put_user (kp->used_ids, &uip->used_ids); + err2 |= __put_user (kp->shm_tot, &uip->shm_tot); + err2 |= __put_user (kp->shm_rss, &uip->shm_rss); + err2 |= __put_user (kp->shm_swp, &uip->shm_swp); + err2 |= __put_user (kp->swap_attempts, + &uip->swap_attempts); + err2 |= __put_user (kp->swap_successes, + &uip->swap_successes); + if (err2) + err = -EFAULT; + } else if (IPCOP_MASK (second) & + (IPCOP_MASK (SHM_STAT) | IPCOP_MASK (IPC_STAT))) { + int err2 = put_user (s.shm_perm.key, &up->shm_perm.key); + err2 |= __put_user (s.shm_perm.uid, &up->shm_perm.uid); + err2 |= __put_user (s.shm_perm.gid, &up->shm_perm.gid); + err2 |= __put_user (s.shm_perm.cuid, + &up->shm_perm.cuid); + err2 |= __put_user (s.shm_perm.cgid, + &up->shm_perm.cgid); + err2 |= __put_user (s.shm_perm.mode, + &up->shm_perm.mode); + err2 |= __put_user (s.shm_perm.seq, &up->shm_perm.seq); + err2 |= __put_user (s.shm_atime, &up->shm_atime); + err2 |= __put_user (s.shm_dtime, &up->shm_dtime); + err2 |= __put_user (s.shm_ctime, &up->shm_ctime); + err2 |= __put_user (s.shm_segsz, &up->shm_segsz); + err2 |= __put_user (s.shm_nattch, &up->shm_nattch); + err2 |= __put_user (s.shm_cpid, &up->shm_cpid); + err2 |= __put_user (s.shm_lpid, &up->shm_lpid); + if (err2) + err = -EFAULT; + } + } +out: + return err; +} + +asmlinkage int +sys32_ipc (u32 call, int first, int second, int third, u32 ptr, u32 fifth) +{ + int version, err; + + lock_kernel(); + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + if (call <= SEMCTL) + switch (call) { + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + err = sys_semop (first, (struct sembuf *)AA(ptr), + second); + goto out; + case SEMGET: + err = sys_semget (first, second, third); + goto out; + case SEMCTL: + err = do_sys32_semctl (first, second, third, + (void *)AA(ptr)); + goto out; + default: + err = -EINVAL; + goto out; + }; + if (call <= MSGCTL) + switch (call) { + case MSGSND: + err = do_sys32_msgsnd (first, second, third, + (void *)AA(ptr)); + goto out; + case MSGRCV: + err = do_sys32_msgrcv (first, second, fifth, third, + version, (void *)AA(ptr)); + goto out; + case MSGGET: + err = sys_msgget ((key_t) first, second); + goto out; + case MSGCTL: + err = do_sys32_msgctl (first, second, (void *)AA(ptr)); + goto out; + default: + err = -EINVAL; + goto out; + } + if (call <= SHMCTL) + switch (call) { + case SHMAT: + err = do_sys32_shmat (first, second, third, + version, (void *)AA(ptr)); + goto out; + case SHMDT: + err = sys_shmdt ((char *)AA(ptr)); + goto out; + case SHMGET: + err = sys_shmget (first, second, third); + goto out; + case SHMCTL: + err = do_sys32_shmctl (first, second, (void *)AA(ptr)); + goto out; + default: + err = -EINVAL; + goto out; + } + + err = -EINVAL; + +out: + unlock_kernel(); + return err; +} + +#ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */ + +/* In order to reduce some races, while at the same time doing additional + * checking and hopefully speeding things up, we copy filenames to the + * kernel data space before using them.. + * + * POSIX.1 2.4: an empty pathname is invalid (ENOENT). + */ +static inline int +do_getname32(const char *filename, char *page) +{ + int retval; + + /* 32bit pointer will be always far below TASK_SIZE :)) */ + retval = strncpy_from_user((char *)page, (char *)filename, PAGE_SIZE); + if (retval > 0) { + if (retval < PAGE_SIZE) + return 0; + return -ENAMETOOLONG; + } else if (!retval) + retval = -ENOENT; + return retval; +} + +char * +getname32(const char *filename) +{ + char *tmp, *result; + + result = ERR_PTR(-ENOMEM); + tmp = (char *)__get_free_page(GFP_KERNEL); + if (tmp) { + int retval = do_getname32(filename, tmp); + + result = tmp; + if (retval < 0) { + putname(tmp); + result = ERR_PTR(retval); + } + } + return result; +} + +/* 32-bit timeval and related flotsam. */ + +extern asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on); + +asmlinkage int +sys32_ioperm(u32 from, u32 num, int on) +{ + return sys_ioperm((unsigned long)from, (unsigned long)num, on); +} + +static inline int +get_flock(struct flock *kfl, struct flock32 *ufl) +{ + int err; + + err = get_user(kfl->l_type, &ufl->l_type); + err |= __get_user(kfl->l_whence, &ufl->l_whence); + err |= __get_user(kfl->l_start, &ufl->l_start); + err |= __get_user(kfl->l_len, &ufl->l_len); + err |= __get_user(kfl->l_pid, &ufl->l_pid); + return err; +} + +static inline int +put_flock(struct flock *kfl, struct flock32 *ufl) +{ + int err; + + err = __put_user(kfl->l_type, &ufl->l_type); + err |= __put_user(kfl->l_whence, &ufl->l_whence); + err |= __put_user(kfl->l_start, &ufl->l_start); + err |= __put_user(kfl->l_len, &ufl->l_len); + err |= __put_user(kfl->l_pid, &ufl->l_pid); + return err; +} + +extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, + unsigned long arg); + +asmlinkage long +sys32_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case F_GETLK: + case F_SETLK: + case F_SETLKW: + { + struct flock f; + mm_segment_t old_fs; + long ret; + + if(get_flock(&f, (struct flock32 *)arg)) + return -EFAULT; + old_fs = get_fs(); set_fs (KERNEL_DS); + ret = sys_fcntl(fd, cmd, (unsigned long)&f); + set_fs (old_fs); + if(put_flock(&f, (struct flock32 *)arg)) + return -EFAULT; + return ret; + } + default: + return sys_fcntl(fd, cmd, (unsigned long)arg); + } +} + +struct dqblk32 { + __u32 dqb_bhardlimit; + __u32 dqb_bsoftlimit; + __u32 dqb_curblocks; + __u32 dqb_ihardlimit; + __u32 dqb_isoftlimit; + __u32 dqb_curinodes; + __kernel_time_t32 dqb_btime; + __kernel_time_t32 dqb_itime; +}; + +extern asmlinkage int sys_quotactl(int cmd, const char *special, int id, + caddr_t addr); + +asmlinkage int +sys32_quotactl(int cmd, const char *special, int id, unsigned long addr) +{ + int cmds = cmd >> SUBCMDSHIFT; + int err; + struct dqblk d; + mm_segment_t old_fs; + char *spec; + + switch (cmds) { + case Q_GETQUOTA: + break; + case Q_SETQUOTA: + case Q_SETUSE: + case Q_SETQLIM: + if (copy_from_user (&d, (struct dqblk32 *)addr, + sizeof (struct dqblk32))) + return -EFAULT; + d.dqb_itime = ((struct dqblk32 *)&d)->dqb_itime; + d.dqb_btime = ((struct dqblk32 *)&d)->dqb_btime; + break; + default: + return sys_quotactl(cmd, special, + id, (caddr_t)addr); + } + spec = getname32 (special); + err = PTR_ERR(spec); + if (IS_ERR(spec)) return err; + old_fs = get_fs (); + set_fs (KERNEL_DS); + err = sys_quotactl(cmd, (const char *)spec, id, (caddr_t)&d); + set_fs (old_fs); + putname (spec); + if (cmds == Q_GETQUOTA) { + __kernel_time_t b = d.dqb_btime, i = d.dqb_itime; + ((struct dqblk32 *)&d)->dqb_itime = i; + ((struct dqblk32 *)&d)->dqb_btime = b; + if (copy_to_user ((struct dqblk32 *)addr, &d, + sizeof (struct dqblk32))) + return -EFAULT; + } + return err; +} + +extern asmlinkage int sys_utime(char * filename, struct utimbuf * times); + +struct utimbuf32 { + __kernel_time_t32 actime, modtime; +}; + +asmlinkage int +sys32_utime(char * filename, struct utimbuf32 *times) +{ + struct utimbuf t; + mm_segment_t old_fs; + int ret; + char *filenam; + + if (!times) + return sys_utime(filename, NULL); + if (get_user (t.actime, ×->actime) || + __get_user (t.modtime, ×->modtime)) + return -EFAULT; + filenam = getname32 (filename); + ret = PTR_ERR(filenam); + if (!IS_ERR(filenam)) { + old_fs = get_fs(); + set_fs (KERNEL_DS); + ret = sys_utime(filenam, &t); + set_fs (old_fs); + putname (filenam); + } + return ret; +} + +/* + * Ooo, nasty. We need here to frob 32-bit unsigned longs to + * 64-bit unsigned longs. + */ + +static inline int +get_fd_set32(unsigned long n, unsigned long *fdset, u32 *ufdset) +{ + if (ufdset) { + unsigned long odd; + + if (verify_area(VERIFY_WRITE, ufdset, n*sizeof(u32))) + return -EFAULT; + + odd = n & 1UL; + n &= ~1UL; + while (n) { + unsigned long h, l; + __get_user(l, ufdset); + __get_user(h, ufdset+1); + ufdset += 2; + *fdset++ = h << 32 | l; + n -= 2; + } + if (odd) + __get_user(*fdset, ufdset); + } else { + /* Tricky, must clear full unsigned long in the + * kernel fdset at the end, this makes sure that + * actually happens. + */ + memset(fdset, 0, ((n + 1) & ~1)*sizeof(u32)); + } + return 0; +} + +static inline void +set_fd_set32(unsigned long n, u32 *ufdset, unsigned long *fdset) +{ + unsigned long odd; + + if (!ufdset) + return; + + odd = n & 1UL; + n &= ~1UL; + while (n) { + unsigned long h, l; + l = *fdset++; + h = l >> 32; + __put_user(l, ufdset); + __put_user(h, ufdset+1); + ufdset += 2; + n -= 2; + } + if (odd) + __put_user(*fdset, ufdset); +} + +extern asmlinkage int sys_sysfs(int option, unsigned long arg1, + unsigned long arg2); + +asmlinkage int +sys32_sysfs(int option, u32 arg1, u32 arg2) +{ + return sys_sysfs(option, arg1, arg2); +} + +struct ncp_mount_data32 { + int version; + unsigned int ncp_fd; + __kernel_uid_t32 mounted_uid; + __kernel_pid_t32 wdog_pid; + unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; + unsigned int time_out; + unsigned int retry_count; + unsigned int flags; + __kernel_uid_t32 uid; + __kernel_gid_t32 gid; + __kernel_mode_t32 file_mode; + __kernel_mode_t32 dir_mode; +}; + +static void * +do_ncp_super_data_conv(void *raw_data) +{ + struct ncp_mount_data *n = (struct ncp_mount_data *)raw_data; + struct ncp_mount_data32 *n32 = (struct ncp_mount_data32 *)raw_data; + + n->dir_mode = n32->dir_mode; + n->file_mode = n32->file_mode; + n->gid = n32->gid; + n->uid = n32->uid; + memmove (n->mounted_vol, n32->mounted_vol, + (sizeof (n32->mounted_vol) + 3 * sizeof (unsigned int))); + n->wdog_pid = n32->wdog_pid; + n->mounted_uid = n32->mounted_uid; + return raw_data; +} + +struct smb_mount_data32 { + int version; + __kernel_uid_t32 mounted_uid; + __kernel_uid_t32 uid; + __kernel_gid_t32 gid; + __kernel_mode_t32 file_mode; + __kernel_mode_t32 dir_mode; +}; + +static void * +do_smb_super_data_conv(void *raw_data) +{ + struct smb_mount_data *s = (struct smb_mount_data *)raw_data; + struct smb_mount_data32 *s32 = (struct smb_mount_data32 *)raw_data; + + s->version = s32->version; + s->mounted_uid = s32->mounted_uid; + s->uid = s32->uid; + s->gid = s32->gid; + s->file_mode = s32->file_mode; + s->dir_mode = s32->dir_mode; + return raw_data; +} + +static int +copy_mount_stuff_to_kernel(const void *user, unsigned long *kernel) +{ + int i; + unsigned long page; + struct vm_area_struct *vma; + + *kernel = 0; + if(!user) + return 0; + vma = find_vma(current->mm, (unsigned long)user); + if(!vma || (unsigned long)user < vma->vm_start) + return -EFAULT; + if(!(vma->vm_flags & VM_READ)) + return -EFAULT; + i = vma->vm_end - (unsigned long) user; + if(PAGE_SIZE <= (unsigned long) i) + i = PAGE_SIZE - 1; + if(!(page = __get_free_page(GFP_KERNEL))) + return -ENOMEM; + if(copy_from_user((void *) page, user, i)) { + free_page(page); + return -EFAULT; + } + *kernel = page; + return 0; +} + +extern asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type, + unsigned long new_flags, void *data); + +#define SMBFS_NAME "smbfs" +#define NCPFS_NAME "ncpfs" + +asmlinkage int +sys32_mount(char *dev_name, char *dir_name, char *type, + unsigned long new_flags, u32 data) +{ + unsigned long type_page; + int err, is_smb, is_ncp; + + if(!capable(CAP_SYS_ADMIN)) + return -EPERM; + is_smb = is_ncp = 0; + err = copy_mount_stuff_to_kernel((const void *)type, &type_page); + if(err) + return err; + if(type_page) { + is_smb = !strcmp((char *)type_page, SMBFS_NAME); + is_ncp = !strcmp((char *)type_page, NCPFS_NAME); + } + if(!is_smb && !is_ncp) { + if(type_page) + free_page(type_page); + return sys_mount(dev_name, dir_name, type, new_flags, + (void *)AA(data)); + } else { + unsigned long dev_page, dir_page, data_page; + mm_segment_t old_fs; + + err = copy_mount_stuff_to_kernel((const void *)dev_name, + &dev_page); + if(err) + goto out; + err = copy_mount_stuff_to_kernel((const void *)dir_name, + &dir_page); + if(err) + goto dev_out; + err = copy_mount_stuff_to_kernel((const void *)AA(data), + &data_page); + if(err) + goto dir_out; + if(is_ncp) + do_ncp_super_data_conv((void *)data_page); + else if(is_smb) + do_smb_super_data_conv((void *)data_page); + else + panic("The problem is here..."); + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_mount((char *)dev_page, (char *)dir_page, + (char *)type_page, new_flags, + (void *)data_page); + set_fs(old_fs); + + if(data_page) + free_page(data_page); + dir_out: + if(dir_page) + free_page(dir_page); + dev_out: + if(dev_page) + free_page(dev_page); + out: + if(type_page) + free_page(type_page); + return err; + } +} + +struct sysinfo32 { + s32 uptime; + u32 loads[3]; + u32 totalram; + u32 freeram; + u32 sharedram; + u32 bufferram; + u32 totalswap; + u32 freeswap; + unsigned short procs; + char _f[22]; +}; + +extern asmlinkage int sys_sysinfo(struct sysinfo *info); + +asmlinkage int +sys32_sysinfo(struct sysinfo32 *info) +{ + struct sysinfo s; + int ret, err; + mm_segment_t old_fs = get_fs (); + + set_fs (KERNEL_DS); + ret = sys_sysinfo(&s); + set_fs (old_fs); + err = put_user (s.uptime, &info->uptime); + err |= __put_user (s.loads[0], &info->loads[0]); + err |= __put_user (s.loads[1], &info->loads[1]); + err |= __put_user (s.loads[2], &info->loads[2]); + err |= __put_user (s.totalram, &info->totalram); + err |= __put_user (s.freeram, &info->freeram); + err |= __put_user (s.sharedram, &info->sharedram); + err |= __put_user (s.bufferram, &info->bufferram); + err |= __put_user (s.totalswap, &info->totalswap); + err |= __put_user (s.freeswap, &info->freeswap); + err |= __put_user (s.procs, &info->procs); + if (err) + return -EFAULT; + return ret; +} + +extern asmlinkage int sys_sched_rr_get_interval(pid_t pid, + struct timespec *interval); + +asmlinkage int +sys32_sched_rr_get_interval(__kernel_pid_t32 pid, struct timespec32 *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs (); + + set_fs (KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, &t); + set_fs (old_fs); + if (put_user (t.tv_sec, &interval->tv_sec) || + __put_user (t.tv_nsec, &interval->tv_nsec)) + return -EFAULT; + return ret; +} + +extern asmlinkage int sys_sigprocmask(int how, old_sigset_t *set, + old_sigset_t *oset); + +asmlinkage int +sys32_sigprocmask(int how, old_sigset_t32 *set, old_sigset_t32 *oset) +{ + old_sigset_t s; + int ret; + mm_segment_t old_fs = get_fs(); + + if (set && get_user (s, set)) return -EFAULT; + set_fs (KERNEL_DS); + ret = sys_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL); + set_fs (old_fs); + if (ret) return ret; + if (oset && put_user (s, oset)) return -EFAULT; + return 0; +} + +extern asmlinkage int sys_sigpending(old_sigset_t *set); + +asmlinkage int +sys32_sigpending(old_sigset_t32 *set) +{ + old_sigset_t s; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_sigpending(&s); + set_fs (old_fs); + if (put_user (s, set)) return -EFAULT; + return ret; +} + +extern asmlinkage int sys_rt_sigpending(sigset_t *set, size_t sigsetsize); + +asmlinkage int +sys32_rt_sigpending(sigset_t32 *set, __kernel_size_t32 sigsetsize) +{ + sigset_t s; + sigset_t32 s32; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_rt_sigpending(&s, sigsetsize); + set_fs (old_fs); + if (!ret) { + switch (_NSIG_WORDS) { + case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; + case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; + case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; + case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; + } + if (copy_to_user (set, &s32, sizeof(sigset_t32))) + return -EFAULT; + } + return ret; +} + +siginfo_t32 * +siginfo64to32(siginfo_t32 *d, siginfo_t *s) +{ + memset (&d, 0, sizeof(siginfo_t32)); + d->si_signo = s->si_signo; + d->si_errno = s->si_errno; + d->si_code = s->si_code; + if (s->si_signo >= SIGRTMIN) { + d->si_pid = s->si_pid; + d->si_uid = s->si_uid; + /* XXX: Ouch, how to find this out??? */ + d->si_int = s->si_int; + } else switch (s->si_signo) { + /* XXX: What about POSIX1.b timers */ + case SIGCHLD: + d->si_pid = s->si_pid; + d->si_status = s->si_status; + d->si_utime = s->si_utime; + d->si_stime = s->si_stime; + break; + case SIGSEGV: + case SIGBUS: + case SIGFPE: + case SIGILL: + d->si_addr = (long)(s->si_addr); + /* XXX: Do we need to translate this from ia64 to ia32 traps? */ + d->si_trapno = s->si_trapno; + break; + case SIGPOLL: + d->si_band = s->si_band; + d->si_fd = s->si_fd; + break; + default: + d->si_pid = s->si_pid; + d->si_uid = s->si_uid; + break; + } + return d; +} + +siginfo_t * +siginfo32to64(siginfo_t *d, siginfo_t32 *s) +{ + d->si_signo = s->si_signo; + d->si_errno = s->si_errno; + d->si_code = s->si_code; + if (s->si_signo >= SIGRTMIN) { + d->si_pid = s->si_pid; + d->si_uid = s->si_uid; + /* XXX: Ouch, how to find this out??? */ + d->si_int = s->si_int; + } else switch (s->si_signo) { + /* XXX: What about POSIX1.b timers */ + case SIGCHLD: + d->si_pid = s->si_pid; + d->si_status = s->si_status; + d->si_utime = s->si_utime; + d->si_stime = s->si_stime; + break; + case SIGSEGV: + case SIGBUS: + case SIGFPE: + case SIGILL: + d->si_addr = (void *)A(s->si_addr); + /* XXX: Do we need to translate this from ia32 to ia64 traps? */ + d->si_trapno = s->si_trapno; + break; + case SIGPOLL: + d->si_band = s->si_band; + d->si_fd = s->si_fd; + break; + default: + d->si_pid = s->si_pid; + d->si_uid = s->si_uid; + break; + } + return d; +} + +extern asmlinkage int +sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo, + const struct timespec *uts, size_t sigsetsize); + +asmlinkage int +sys32_rt_sigtimedwait(sigset_t32 *uthese, siginfo_t32 *uinfo, + struct timespec32 *uts, __kernel_size_t32 sigsetsize) +{ + sigset_t s; + sigset_t32 s32; + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs(); + siginfo_t info; + siginfo_t32 info32; + + if (copy_from_user (&s32, uthese, sizeof(sigset_t32))) + return -EFAULT; + switch (_NSIG_WORDS) { + case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); + case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); + case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); + case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); + } + if (uts) { + ret = get_user (t.tv_sec, &uts->tv_sec); + ret |= __get_user (t.tv_nsec, &uts->tv_nsec); + if (ret) + return -EFAULT; + } + set_fs (KERNEL_DS); + ret = sys_rt_sigtimedwait(&s, &info, &t, sigsetsize); + set_fs (old_fs); + if (ret >= 0 && uinfo) { + if (copy_to_user (uinfo, siginfo64to32(&info32, &info), + sizeof(siginfo_t32))) + return -EFAULT; + } + return ret; +} + +extern asmlinkage int +sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo); + +asmlinkage int +sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo) +{ + siginfo_t info; + siginfo_t32 info32; + int ret; + mm_segment_t old_fs = get_fs(); + + if (copy_from_user (&info32, uinfo, sizeof(siginfo_t32))) + return -EFAULT; + /* XXX: Is this correct? */ + siginfo32to64(&info, &info32); + set_fs (KERNEL_DS); + ret = sys_rt_sigqueueinfo(pid, sig, &info); + set_fs (old_fs); + return ret; +} + +extern asmlinkage int sys_setreuid(uid_t ruid, uid_t euid); + +asmlinkage int sys32_setreuid(__kernel_uid_t32 ruid, __kernel_uid_t32 euid) +{ + uid_t sruid, seuid; + + sruid = (ruid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)ruid); + seuid = (euid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)euid); + return sys_setreuid(sruid, seuid); +} + +extern asmlinkage int sys_setresuid(uid_t ruid, uid_t euid, uid_t suid); + +asmlinkage int +sys32_setresuid(__kernel_uid_t32 ruid, __kernel_uid_t32 euid, + __kernel_uid_t32 suid) +{ + uid_t sruid, seuid, ssuid; + + sruid = (ruid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)ruid); + seuid = (euid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)euid); + ssuid = (suid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)suid); + return sys_setresuid(sruid, seuid, ssuid); +} + +extern asmlinkage int sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid); + +asmlinkage int +sys32_getresuid(__kernel_uid_t32 *ruid, __kernel_uid_t32 *euid, + __kernel_uid_t32 *suid) +{ + uid_t a, b, c; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_getresuid(&a, &b, &c); + set_fs (old_fs); + if (put_user (a, ruid) || put_user (b, euid) || put_user (c, suid)) + return -EFAULT; + return ret; +} + +extern asmlinkage int sys_setregid(gid_t rgid, gid_t egid); + +asmlinkage int +sys32_setregid(__kernel_gid_t32 rgid, __kernel_gid_t32 egid) +{ + gid_t srgid, segid; + + srgid = (rgid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)rgid); + segid = (egid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)egid); + return sys_setregid(srgid, segid); +} + +extern asmlinkage int sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid); + +asmlinkage int +sys32_setresgid(__kernel_gid_t32 rgid, __kernel_gid_t32 egid, + __kernel_gid_t32 sgid) +{ + gid_t srgid, segid, ssgid; + + srgid = (rgid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)rgid); + segid = (egid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)egid); + ssgid = (sgid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)sgid); + return sys_setresgid(srgid, segid, ssgid); +} + +extern asmlinkage int sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid); + +asmlinkage int +sys32_getresgid(__kernel_gid_t32 *rgid, __kernel_gid_t32 *egid, + __kernel_gid_t32 *sgid) +{ + gid_t a, b, c; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_getresgid(&a, &b, &c); + set_fs (old_fs); + if (!ret) { + ret = put_user (a, rgid); + ret |= put_user (b, egid); + ret |= put_user (c, sgid); + } + return ret; +} + +struct tms32 { + __kernel_clock_t32 tms_utime; + __kernel_clock_t32 tms_stime; + __kernel_clock_t32 tms_cutime; + __kernel_clock_t32 tms_cstime; +}; + +extern asmlinkage long sys_times(struct tms * tbuf); + +asmlinkage long +sys32_times(struct tms32 *tbuf) +{ + struct tms t; + long ret; + mm_segment_t old_fs = get_fs (); + int err; + + set_fs (KERNEL_DS); + ret = sys_times(tbuf ? &t : NULL); + set_fs (old_fs); + if (tbuf) { + err = put_user (t.tms_utime, &tbuf->tms_utime); + err |= __put_user (t.tms_stime, &tbuf->tms_stime); + err |= __put_user (t.tms_cutime, &tbuf->tms_cutime); + err |= __put_user (t.tms_cstime, &tbuf->tms_cstime); + if (err) + ret = -EFAULT; + } + return ret; +} + +extern asmlinkage int sys_getgroups(int gidsetsize, gid_t *grouplist); + +asmlinkage int +sys32_getgroups(int gidsetsize, __kernel_gid_t32 *grouplist) +{ + gid_t gl[NGROUPS]; + int ret, i; + mm_segment_t old_fs = get_fs (); + + set_fs (KERNEL_DS); + ret = sys_getgroups(gidsetsize, gl); + set_fs (old_fs); + if (gidsetsize && ret > 0 && ret <= NGROUPS) + for (i = 0; i < ret; i++, grouplist++) + if (__put_user (gl[i], grouplist)) + return -EFAULT; + return ret; +} + +extern asmlinkage int sys_setgroups(int gidsetsize, gid_t *grouplist); + +asmlinkage int +sys32_setgroups(int gidsetsize, __kernel_gid_t32 *grouplist) +{ + gid_t gl[NGROUPS]; + int ret, i; + mm_segment_t old_fs = get_fs (); + + if ((unsigned) gidsetsize > NGROUPS) + return -EINVAL; + for (i = 0; i < gidsetsize; i++, grouplist++) + if (__get_user (gl[i], grouplist)) + return -EFAULT; + set_fs (KERNEL_DS); + ret = sys_setgroups(gidsetsize, gl); + set_fs (old_fs); + return ret; +} + +extern asmlinkage int +sys_getrusage(int who, struct rusage *ru); + +asmlinkage int +sys32_getrusage(int who, struct rusage32 *ru) +{ + struct rusage r; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs (KERNEL_DS); + ret = sys_getrusage(who, &r); + set_fs (old_fs); + if (put_rusage (ru, &r)) return -EFAULT; + return ret; +} + + +/* XXX These as well... */ +extern __inline__ struct socket * +socki_lookup(struct inode *inode) +{ + return &inode->u.socket_i; +} + +extern __inline__ struct socket * +sockfd_lookup(int fd, int *err) +{ + struct file *file; + struct inode *inode; + + if (!(file = fget(fd))) + { + *err = -EBADF; + return NULL; + } + + inode = file->f_dentry->d_inode; + if (!inode || !inode->i_sock || !socki_lookup(inode)) + { + *err = -ENOTSOCK; + fput(file); + return NULL; + } + + return socki_lookup(inode); +} + +struct msghdr32 { + u32 msg_name; + int msg_namelen; + u32 msg_iov; + __kernel_size_t32 msg_iovlen; + u32 msg_control; + __kernel_size_t32 msg_controllen; + unsigned msg_flags; +}; + +struct cmsghdr32 { + __kernel_size_t32 cmsg_len; + int cmsg_level; + int cmsg_type; +}; + +/* Bleech... */ +#define __CMSG32_NXTHDR(ctl, len, cmsg, cmsglen) \ + __cmsg32_nxthdr((ctl),(len),(cmsg),(cmsglen)) +#define CMSG32_NXTHDR(mhdr, cmsg, cmsglen) \ + cmsg32_nxthdr((mhdr), (cmsg), (cmsglen)) + +#define CMSG32_ALIGN(len) ( ((len)+sizeof(int)-1) & ~(sizeof(int)-1) ) + +#define CMSG32_DATA(cmsg) \ + ((void *)((char *)(cmsg) + CMSG32_ALIGN(sizeof(struct cmsghdr32)))) +#define CMSG32_SPACE(len) \ + (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + CMSG32_ALIGN(len)) +#define CMSG32_LEN(len) (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + (len)) + +#define __CMSG32_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr32) ? \ + (struct cmsghdr32 *)(ctl) : \ + (struct cmsghdr32 *)NULL) +#define CMSG32_FIRSTHDR(msg) \ + __CMSG32_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen) + +__inline__ struct cmsghdr32 * +__cmsg32_nxthdr(void *__ctl, __kernel_size_t __size, + struct cmsghdr32 *__cmsg, int __cmsg_len) +{ + struct cmsghdr32 * __ptr; + + __ptr = (struct cmsghdr32 *)(((unsigned char *) __cmsg) + + CMSG32_ALIGN(__cmsg_len)); + if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size) + return NULL; + + return __ptr; +} + +__inline__ struct cmsghdr32 * +cmsg32_nxthdr (struct msghdr *__msg, struct cmsghdr32 *__cmsg, int __cmsg_len) +{ + return __cmsg32_nxthdr(__msg->msg_control, __msg->msg_controllen, + __cmsg, __cmsg_len); +} + +static inline int +iov_from_user32_to_kern(struct iovec *kiov, struct iovec32 *uiov32, int niov) +{ + int tot_len = 0; + + while(niov > 0) { + u32 len, buf; + + if(get_user(len, &uiov32->iov_len) || + get_user(buf, &uiov32->iov_base)) { + tot_len = -EFAULT; + break; + } + tot_len += len; + kiov->iov_base = (void *)A(buf); + kiov->iov_len = (__kernel_size_t) len; + uiov32++; + kiov++; + niov--; + } + return tot_len; +} + +static inline int +msghdr_from_user32_to_kern(struct msghdr *kmsg, struct msghdr32 *umsg) +{ + u32 tmp1, tmp2, tmp3; + int err; + + err = get_user(tmp1, &umsg->msg_name); + err |= __get_user(tmp2, &umsg->msg_iov); + err |= __get_user(tmp3, &umsg->msg_control); + if (err) + return -EFAULT; + + kmsg->msg_name = (void *)A(tmp1); + kmsg->msg_iov = (struct iovec *)A(tmp2); + kmsg->msg_control = (void *)A(tmp3); + + err = get_user(kmsg->msg_namelen, &umsg->msg_namelen); + err |= get_user(kmsg->msg_iovlen, &umsg->msg_iovlen); + err |= get_user(kmsg->msg_controllen, &umsg->msg_controllen); + err |= get_user(kmsg->msg_flags, &umsg->msg_flags); + + return err; +} + +/* I've named the args so it is easy to tell whose space the pointers are in. */ +static int +verify_iovec32(struct msghdr *kern_msg, struct iovec *kern_iov, + char *kern_address, int mode) +{ + int tot_len; + + if(kern_msg->msg_namelen) { + if(mode==VERIFY_READ) { + int err = move_addr_to_kernel(kern_msg->msg_name, + kern_msg->msg_namelen, + kern_address); + if(err < 0) + return err; + } + kern_msg->msg_name = kern_address; + } else + kern_msg->msg_name = NULL; + + if(kern_msg->msg_iovlen > UIO_FASTIOV) { + kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec), + GFP_KERNEL); + if(!kern_iov) + return -ENOMEM; + } + + tot_len = iov_from_user32_to_kern(kern_iov, + (struct iovec32 *)kern_msg->msg_iov, + kern_msg->msg_iovlen); + if(tot_len >= 0) + kern_msg->msg_iov = kern_iov; + else if(kern_msg->msg_iovlen > UIO_FASTIOV) + kfree(kern_iov); + + return tot_len; +} + +/* There is a lot of hair here because the alignment rules (and + * thus placement) of cmsg headers and length are different for + * 32-bit apps. -DaveM + */ +static int +cmsghdr_from_user32_to_kern(struct msghdr *kmsg, unsigned char *stackbuf, + int stackbuf_size) +{ + struct cmsghdr32 *ucmsg; + struct cmsghdr *kcmsg, *kcmsg_base; + __kernel_size_t32 ucmlen; + __kernel_size_t kcmlen, tmp; + + kcmlen = 0; + kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf; + ucmsg = CMSG32_FIRSTHDR(kmsg); + while(ucmsg != NULL) { + if(get_user(ucmlen, &ucmsg->cmsg_len)) + return -EFAULT; + + /* Catch bogons. */ + if(CMSG32_ALIGN(ucmlen) < + CMSG32_ALIGN(sizeof(struct cmsghdr32))) + return -EINVAL; + if((unsigned long)(((char *)ucmsg - (char *)kmsg->msg_control) + + ucmlen) > kmsg->msg_controllen) + return -EINVAL; + + tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) + + CMSG_ALIGN(sizeof(struct cmsghdr))); + kcmlen += tmp; + ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen); + } + if(kcmlen == 0) + return -EINVAL; + + /* The kcmlen holds the 64-bit version of the control length. + * It may not be modified as we do not stick it into the kmsg + * until we have successfully copied over all of the data + * from the user. + */ + if(kcmlen > stackbuf_size) + kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL); + if(kcmsg == NULL) + return -ENOBUFS; + + /* Now copy them over neatly. */ + memset(kcmsg, 0, kcmlen); + ucmsg = CMSG32_FIRSTHDR(kmsg); + while(ucmsg != NULL) { + __get_user(ucmlen, &ucmsg->cmsg_len); + tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) + + CMSG_ALIGN(sizeof(struct cmsghdr))); + kcmsg->cmsg_len = tmp; + __get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level); + __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type); + + /* Copy over the data. */ + if(copy_from_user(CMSG_DATA(kcmsg), + CMSG32_DATA(ucmsg), + (ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))))) + goto out_free_efault; + + /* Advance. */ + kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp)); + ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen); + } + + /* Ok, looks like we made it. Hook it up and return success. */ + kmsg->msg_control = kcmsg_base; + kmsg->msg_controllen = kcmlen; + return 0; + +out_free_efault: + if(kcmsg_base != (struct cmsghdr *)stackbuf) + kfree(kcmsg_base); + return -EFAULT; +} + +static void +put_cmsg32(struct msghdr *kmsg, int level, int type, int len, void *data) +{ + struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control; + struct cmsghdr32 cmhdr; + int cmlen = CMSG32_LEN(len); + + if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) { + kmsg->msg_flags |= MSG_CTRUNC; + return; + } + + if(kmsg->msg_controllen < cmlen) { + kmsg->msg_flags |= MSG_CTRUNC; + cmlen = kmsg->msg_controllen; + } + cmhdr.cmsg_level = level; + cmhdr.cmsg_type = type; + cmhdr.cmsg_len = cmlen; + + if(copy_to_user(cm, &cmhdr, sizeof cmhdr)) + return; + if(copy_to_user(CMSG32_DATA(cm), data, + cmlen - sizeof(struct cmsghdr32))) + return; + cmlen = CMSG32_SPACE(len); + kmsg->msg_control += cmlen; + kmsg->msg_controllen -= cmlen; +} + +static void scm_detach_fds32(struct msghdr *kmsg, struct scm_cookie *scm) +{ + struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control; + int fdmax = (kmsg->msg_controllen - sizeof(struct cmsghdr32)) + / sizeof(int); + int fdnum = scm->fp->count; + struct file **fp = scm->fp->fp; + int *cmfptr; + int err = 0, i; + + if (fdnum < fdmax) + fdmax = fdnum; + + for (i = 0, cmfptr = (int *) CMSG32_DATA(cm); + i < fdmax; + i++, cmfptr++) { + int new_fd; + err = get_unused_fd(); + if (err < 0) + break; + new_fd = err; + err = put_user(new_fd, cmfptr); + if (err) { + put_unused_fd(new_fd); + break; + } + /* Bump the usage count and install the file. */ + fp[i]->f_count++; + current->files->fd[new_fd] = fp[i]; + } + + if (i > 0) { + int cmlen = CMSG32_LEN(i * sizeof(int)); + if (!err) + err = put_user(SOL_SOCKET, &cm->cmsg_level); + if (!err) + err = put_user(SCM_RIGHTS, &cm->cmsg_type); + if (!err) + err = put_user(cmlen, &cm->cmsg_len); + if (!err) { + cmlen = CMSG32_SPACE(i * sizeof(int)); + kmsg->msg_control += cmlen; + kmsg->msg_controllen -= cmlen; + } + } + if (i < fdnum) + kmsg->msg_flags |= MSG_CTRUNC; + + /* + * All of the files that fit in the message have had their + * usage counts incremented, so we just free the list. + */ + __scm_destroy(scm); +} + +/* In these cases we (currently) can just copy to data over verbatim + * because all CMSGs created by the kernel have well defined types which + * have the same layout in both the 32-bit and 64-bit API. One must add + * some special cased conversions here if we start sending control messages + * with incompatible types. + * + * SCM_RIGHTS and SCM_CREDENTIALS are done by hand in recvmsg32 right after + * we do our work. The remaining cases are: + * + * SOL_IP IP_PKTINFO struct in_pktinfo 32-bit clean + * IP_TTL int 32-bit clean + * IP_TOS __u8 32-bit clean + * IP_RECVOPTS variable length 32-bit clean + * IP_RETOPTS variable length 32-bit clean + * (these last two are clean because the types are defined + * by the IPv4 protocol) + * IP_RECVERR struct sock_extended_err + + * struct sockaddr_in 32-bit clean + * SOL_IPV6 IPV6_RECVERR struct sock_extended_err + + * struct sockaddr_in6 32-bit clean + * IPV6_PKTINFO struct in6_pktinfo 32-bit clean + * IPV6_HOPLIMIT int 32-bit clean + * IPV6_FLOWINFO u32 32-bit clean + * IPV6_HOPOPTS ipv6 hop exthdr 32-bit clean + * IPV6_DSTOPTS ipv6 dst exthdr(s) 32-bit clean + * IPV6_RTHDR ipv6 routing exthdr 32-bit clean + * IPV6_AUTHHDR ipv6 auth exthdr 32-bit clean + */ +static void +cmsg32_recvmsg_fixup(struct msghdr *kmsg, unsigned long orig_cmsg_uptr) +{ + unsigned char *workbuf, *wp; + unsigned long bufsz, space_avail; + struct cmsghdr *ucmsg; + + bufsz = ((unsigned long)kmsg->msg_control) - orig_cmsg_uptr; + space_avail = kmsg->msg_controllen + bufsz; + wp = workbuf = kmalloc(bufsz, GFP_KERNEL); + if(workbuf == NULL) + goto fail; + + /* To make this more sane we assume the kernel sends back properly + * formatted control messages. Because of how the kernel will truncate + * the cmsg_len for MSG_TRUNC cases, we need not check that case either. + */ + ucmsg = (struct cmsghdr *) orig_cmsg_uptr; + while(((unsigned long)ucmsg) < ((unsigned long)kmsg->msg_control)) { + struct cmsghdr32 *kcmsg32 = (struct cmsghdr32 *) wp; + int clen64, clen32; + + /* UCMSG is the 64-bit format CMSG entry in user-space. + * KCMSG32 is within the kernel space temporary buffer + * we use to convert into a 32-bit style CMSG. + */ + __get_user(kcmsg32->cmsg_len, &ucmsg->cmsg_len); + __get_user(kcmsg32->cmsg_level, &ucmsg->cmsg_level); + __get_user(kcmsg32->cmsg_type, &ucmsg->cmsg_type); + + clen64 = kcmsg32->cmsg_len; + copy_from_user(CMSG32_DATA(kcmsg32), CMSG_DATA(ucmsg), + clen64 - CMSG_ALIGN(sizeof(*ucmsg))); + clen32 = ((clen64 - CMSG_ALIGN(sizeof(*ucmsg))) + + CMSG32_ALIGN(sizeof(struct cmsghdr32))); + kcmsg32->cmsg_len = clen32; + + ucmsg = (struct cmsghdr *) (((char *)ucmsg) + + CMSG_ALIGN(clen64)); + wp = (((char *)kcmsg32) + CMSG32_ALIGN(clen32)); + } + + /* Copy back fixed up data, and adjust pointers. */ + bufsz = (wp - workbuf); + copy_to_user((void *)orig_cmsg_uptr, workbuf, bufsz); + + kmsg->msg_control = (struct cmsghdr *) + (((char *)orig_cmsg_uptr) + bufsz); + kmsg->msg_controllen = space_avail - bufsz; + + kfree(workbuf); + return; + +fail: + /* If we leave the 64-bit format CMSG chunks in there, + * the application could get confused and crash. So to + * ensure greater recovery, we report no CMSGs. + */ + kmsg->msg_controllen += bufsz; + kmsg->msg_control = (void *) orig_cmsg_uptr; +} + +asmlinkage int +sys32_sendmsg(int fd, struct msghdr32 *user_msg, unsigned user_flags) +{ + struct socket *sock; + char address[MAX_SOCK_ADDR]; + struct iovec iov[UIO_FASTIOV]; + unsigned char ctl[sizeof(struct cmsghdr) + 20]; + unsigned char *ctl_buf = ctl; + struct msghdr kern_msg; + int err, total_len; + + if(msghdr_from_user32_to_kern(&kern_msg, user_msg)) + return -EFAULT; + if(kern_msg.msg_iovlen > UIO_MAXIOV) + return -EINVAL; + err = verify_iovec32(&kern_msg, iov, address, VERIFY_READ); + if (err < 0) + goto out; + total_len = err; + + if(kern_msg.msg_controllen) { + err = cmsghdr_from_user32_to_kern(&kern_msg, ctl, sizeof(ctl)); + if(err) + goto out_freeiov; + ctl_buf = kern_msg.msg_control; + } + kern_msg.msg_flags = user_flags; + + lock_kernel(); + sock = sockfd_lookup(fd, &err); + if (sock != NULL) { + if (sock->file->f_flags & O_NONBLOCK) + kern_msg.msg_flags |= MSG_DONTWAIT; + err = sock_sendmsg(sock, &kern_msg, total_len); + sockfd_put(sock); + } + unlock_kernel(); + + /* N.B. Use kfree here, as kern_msg.msg_controllen might change? */ + if(ctl_buf != ctl) + kfree(ctl_buf); +out_freeiov: + if(kern_msg.msg_iov != iov) + kfree(kern_msg.msg_iov); +out: + return err; +} + +asmlinkage int +sys32_recvmsg(int fd, struct msghdr32 *user_msg, unsigned int user_flags) +{ + struct iovec iovstack[UIO_FASTIOV]; + struct msghdr kern_msg; + char addr[MAX_SOCK_ADDR]; + struct socket *sock; + struct iovec *iov = iovstack; + struct sockaddr *uaddr; + int *uaddr_len; + unsigned long cmsg_ptr; + int err, total_len, len = 0; + + if(msghdr_from_user32_to_kern(&kern_msg, user_msg)) + return -EFAULT; + if(kern_msg.msg_iovlen > UIO_MAXIOV) + return -EINVAL; + + uaddr = kern_msg.msg_name; + uaddr_len = &user_msg->msg_namelen; + err = verify_iovec32(&kern_msg, iov, addr, VERIFY_WRITE); + if (err < 0) + goto out; + total_len = err; + + cmsg_ptr = (unsigned long) kern_msg.msg_control; + kern_msg.msg_flags = 0; + + lock_kernel(); + sock = sockfd_lookup(fd, &err); + if (sock != NULL) { + struct scm_cookie scm; + + if (sock->file->f_flags & O_NONBLOCK) + user_flags |= MSG_DONTWAIT; + memset(&scm, 0, sizeof(scm)); + err = sock->ops->recvmsg(sock, &kern_msg, total_len, + user_flags, &scm); + if(err >= 0) { + len = err; + if(!kern_msg.msg_control) { + if(sock->passcred || scm.fp) + kern_msg.msg_flags |= MSG_CTRUNC; + if(scm.fp) + __scm_destroy(&scm); + } else { + /* If recvmsg processing itself placed some + * control messages into user space, it's is + * using 64-bit CMSG processing, so we need + * to fix it up before we tack on more stuff. + */ + if((unsigned long) kern_msg.msg_control + != cmsg_ptr) + cmsg32_recvmsg_fixup(&kern_msg, + cmsg_ptr); + + /* Wheee... */ + if(sock->passcred) + put_cmsg32(&kern_msg, + SOL_SOCKET, SCM_CREDENTIALS, + sizeof(scm.creds), + &scm.creds); + if(scm.fp != NULL) + scm_detach_fds32(&kern_msg, &scm); + } + } + sockfd_put(sock); + } + unlock_kernel(); + + if(uaddr != NULL && err >= 0) + err = move_addr_to_user(addr, kern_msg.msg_namelen, uaddr, + uaddr_len); + if(cmsg_ptr != 0 && err >= 0) { + unsigned long ucmsg_ptr = ((unsigned long)kern_msg.msg_control); + __kernel_size_t32 uclen = (__kernel_size_t32) (ucmsg_ptr + - cmsg_ptr); + err |= __put_user(uclen, &user_msg->msg_controllen); + } + if(err >= 0) + err = __put_user(kern_msg.msg_flags, &user_msg->msg_flags); + if(kern_msg.msg_iov != iov) + kfree(kern_msg.msg_iov); +out: + if(err < 0) + return err; + return len; +} + +extern void check_pending(int signum); + +asmlinkage int +sys32_sigaction (int sig, struct old_sigaction32 *act, + struct old_sigaction32 *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if(sig < 0) { + current->tss.new_signal = 1; + sig = -sig; + } + + if (act) { + old_sigset_t32 mask; + + ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler); + ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); + ret |= __get_user(mask, &act->sa_mask); + if (ret) + return ret; + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler); + ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); + ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + } + + return ret; +} + +#ifdef CONFIG_MODULES + +extern asmlinkage unsigned long sys_create_module(const char *name_user, + size_t size); + +asmlinkage unsigned long +sys32_create_module(const char *name_user, __kernel_size_t32 size) +{ + return sys_create_module(name_user, (size_t)size); +} + +extern asmlinkage int sys_init_module(const char *name_user, + struct module *mod_user); + +/* Hey, when you're trying to init module, take time and prepare us a nice 64bit + * module structure, even if from 32bit modutils... Why to pollute kernel... :)) + */ +asmlinkage int +sys32_init_module(const char *name_user, struct module *mod_user) +{ + return sys_init_module(name_user, mod_user); +} + +extern asmlinkage int sys_delete_module(const char *name_user); + +asmlinkage int +sys32_delete_module(const char *name_user) +{ + return sys_delete_module(name_user); +} + +struct module_info32 { + u32 addr; + u32 size; + u32 flags; + s32 usecount; +}; + +/* Query various bits about modules. */ + +static inline long +get_mod_name(const char *user_name, char **buf) +{ + unsigned long page; + long retval; + + if ((unsigned long)user_name >= TASK_SIZE + && !segment_eq(get_fs (), KERNEL_DS)) + return -EFAULT; + + page = __get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + retval = strncpy_from_user((char *)page, user_name, PAGE_SIZE); + if (retval > 0) { + if (retval < PAGE_SIZE) { + *buf = (char *)page; + return retval; + } + retval = -ENAMETOOLONG; + } else if (!retval) + retval = -EINVAL; + + free_page(page); + return retval; +} + +static inline void +put_mod_name(char *buf) +{ + free_page((unsigned long)buf); +} + +static __inline__ struct module * +find_module(const char *name) +{ + struct module *mod; + + for (mod = module_list; mod ; mod = mod->next) { + if (mod->flags & MOD_DELETED) + continue; + if (!strcmp(mod->name, name)) + break; + } + + return mod; +} + +static int +qm_modules(char *buf, size_t bufsize, __kernel_size_t32 *ret) +{ + struct module *mod; + size_t nmod, space, len; + + nmod = space = 0; + + for (mod = module_list; mod->next != NULL; mod = mod->next, ++nmod) { + len = strlen(mod->name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, mod->name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; + } + + if (put_user(nmod, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while ((mod = mod->next)->next != NULL) + space += strlen(mod->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static int +qm_deps(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret) +{ + size_t i, space, len; + + if (mod->next == NULL) + return -EINVAL; + if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = 0; + for (i = 0; i < mod->ndeps; ++i) { + const char *dep_name = mod->deps[i].dep->name; + + len = strlen(dep_name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, dep_name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; + } + + if (put_user(i, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while (++i < mod->ndeps) + space += strlen(mod->deps[i].dep->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static int +qm_refs(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret) +{ + size_t nrefs, space, len; + struct module_ref *ref; + + if (mod->next == NULL) + return -EINVAL; + if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = 0; + for (nrefs = 0, ref = mod->refs; ref ; ++nrefs, ref = ref->next_ref) { + const char *ref_name = ref->ref->name; + + len = strlen(ref_name)+1; + if (len > bufsize) + goto calc_space_needed; + if (copy_to_user(buf, ref_name, len)) + return -EFAULT; + buf += len; + bufsize -= len; + space += len; + } + + if (put_user(nrefs, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + space += len; + while ((ref = ref->next_ref) != NULL) + space += strlen(ref->ref->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static inline int +qm_symbols(struct module *mod, char *buf, size_t bufsize, + __kernel_size_t32 *ret) +{ + size_t i, space, len; + struct module_symbol *s; + char *strings; + unsigned *vals; + + if ((mod->flags & (MOD_RUNNING | MOD_DELETED)) != MOD_RUNNING) + if (put_user(0, ret)) + return -EFAULT; + else + return 0; + + space = mod->nsyms * 2*sizeof(u32); + + i = len = 0; + s = mod->syms; + + if (space > bufsize) + goto calc_space_needed; + + if (!access_ok(VERIFY_WRITE, buf, space)) + return -EFAULT; + + bufsize -= space; + vals = (unsigned *)buf; + strings = buf+space; + + for (; i < mod->nsyms ; ++i, ++s, vals += 2) { + len = strlen(s->name)+1; + if (len > bufsize) + goto calc_space_needed; + + if (copy_to_user(strings, s->name, len) + || __put_user(s->value, vals+0) + || __put_user(space, vals+1)) + return -EFAULT; + + strings += len; + bufsize -= len; + space += len; + } + + if (put_user(i, ret)) + return -EFAULT; + else + return 0; + +calc_space_needed: + for (; i < mod->nsyms; ++i, ++s) + space += strlen(s->name)+1; + + if (put_user(space, ret)) + return -EFAULT; + else + return -ENOSPC; +} + +static inline int +qm_info(struct module *mod, char *buf, size_t bufsize, __kernel_size_t32 *ret) +{ + int error = 0; + + if (mod->next == NULL) + return -EINVAL; + + if (sizeof(struct module_info32) <= bufsize) { + struct module_info32 info; + info.addr = (unsigned long)mod; + info.size = mod->size; + info.flags = mod->flags; + info.usecount = + ((mod_member_present(mod, can_unload) + && mod->can_unload) + ? -1 : atomic_read(&mod->uc.usecount)); + + if (copy_to_user(buf, &info, sizeof(struct module_info32))) + return -EFAULT; + } else + error = -ENOSPC; + + if (put_user(sizeof(struct module_info32), ret)) + return -EFAULT; + + return error; +} + +asmlinkage int +sys32_query_module(char *name_user, int which, char *buf, + __kernel_size_t32 bufsize, u32 ret) +{ + struct module *mod; + int err; + + lock_kernel(); + if (name_user == 0) { + /* This finds "kernel_module" which is not exported. */ + for(mod = module_list; mod->next != NULL; mod = mod->next) + ; + } else { + long namelen; + char *name; + + if ((namelen = get_mod_name(name_user, &name)) < 0) { + err = namelen; + goto out; + } + err = -ENOENT; + if (namelen == 0) { + /* This finds "kernel_module" which is not exported. */ + for(mod = module_list; + mod->next != NULL; + mod = mod->next) ; + } else if ((mod = find_module(name)) == NULL) { + put_mod_name(name); + goto out; + } + put_mod_name(name); + } + + switch (which) + { + case 0: + err = 0; + break; + case QM_MODULES: + err = qm_modules(buf, bufsize, (__kernel_size_t32 *)AA(ret)); + break; + case QM_DEPS: + err = qm_deps(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret)); + break; + case QM_REFS: + err = qm_refs(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret)); + break; + case QM_SYMBOLS: + err = qm_symbols(mod, buf, bufsize, + (__kernel_size_t32 *)AA(ret)); + break; + case QM_INFO: + err = qm_info(mod, buf, bufsize, (__kernel_size_t32 *)AA(ret)); + break; + default: + err = -EINVAL; + break; + } +out: + unlock_kernel(); + return err; +} + +struct kernel_sym32 { + u32 value; + char name[60]; +}; + +extern asmlinkage int sys_get_kernel_syms(struct kernel_sym *table); + +asmlinkage int +sys32_get_kernel_syms(struct kernel_sym32 *table) +{ + int len, i; + struct kernel_sym *tbl; + mm_segment_t old_fs; + + len = sys_get_kernel_syms(NULL); + if (!table) return len; + tbl = kmalloc (len * sizeof (struct kernel_sym), GFP_KERNEL); + if (!tbl) return -ENOMEM; + old_fs = get_fs(); + set_fs (KERNEL_DS); + sys_get_kernel_syms(tbl); + set_fs (old_fs); + for (i = 0; i < len; i++, table += sizeof (struct kernel_sym32)) { + if (put_user (tbl[i].value, &table->value) || + copy_to_user (table->name, tbl[i].name, 60)) + break; + } + kfree (tbl); + return i; +} + +#else /* CONFIG_MODULES */ + +asmlinkage unsigned long +sys32_create_module(const char *name_user, size_t size) +{ + return -ENOSYS; +} + +asmlinkage int +sys32_init_module(const char *name_user, struct module *mod_user) +{ + return -ENOSYS; +} + +asmlinkage int +sys32_delete_module(const char *name_user) +{ + return -ENOSYS; +} + +asmlinkage int +sys32_query_module(const char *name_user, int which, char *buf, size_t bufsize, + size_t *ret) +{ + /* Let the program know about the new interface. Not that + it'll do them much good. */ + if (which == 0) + return 0; + + return -ENOSYS; +} + +asmlinkage int +sys32_get_kernel_syms(struct kernel_sym *table) +{ + return -ENOSYS; +} + +#endif /* CONFIG_MODULES */ + +/* Stuff for NFS server syscalls... */ +struct nfsctl_svc32 { + u16 svc32_port; + s32 svc32_nthreads; +}; + +struct nfsctl_client32 { + s8 cl32_ident[NFSCLNT_IDMAX+1]; + s32 cl32_naddr; + struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX]; + s32 cl32_fhkeytype; + s32 cl32_fhkeylen; + u8 cl32_fhkey[NFSCLNT_KEYMAX]; +}; + +struct nfsctl_export32 { + s8 ex32_client[NFSCLNT_IDMAX+1]; + s8 ex32_path[NFS_MAXPATHLEN+1]; + __kernel_dev_t32 ex32_dev; + __kernel_ino_t32 ex32_ino; + s32 ex32_flags; + __kernel_uid_t32 ex32_anon_uid; + __kernel_gid_t32 ex32_anon_gid; +}; + +struct nfsctl_uidmap32 { + u32 ug32_ident; /* char * */ + __kernel_uid_t32 ug32_uidbase; + s32 ug32_uidlen; + u32 ug32_udimap; /* uid_t * */ + __kernel_uid_t32 ug32_gidbase; + s32 ug32_gidlen; + u32 ug32_gdimap; /* gid_t * */ +}; + +struct nfsctl_fhparm32 { + struct sockaddr gf32_addr; + __kernel_dev_t32 gf32_dev; + __kernel_ino_t32 gf32_ino; + s32 gf32_version; +}; + +struct nfsctl_arg32 { + s32 ca32_version; /* safeguard */ + union { + struct nfsctl_svc32 u32_svc; + struct nfsctl_client32 u32_client; + struct nfsctl_export32 u32_export; + struct nfsctl_uidmap32 u32_umap; + struct nfsctl_fhparm32 u32_getfh; + u32 u32_debug; + } u; +#define ca32_svc u.u32_svc +#define ca32_client u.u32_client +#define ca32_export u.u32_export +#define ca32_umap u.u32_umap +#define ca32_getfh u.u32_getfh +#define ca32_authd u.u32_authd +#define ca32_debug u.u32_debug +}; + +union nfsctl_res32 { + struct knfs_fh cr32_getfh; + u32 cr32_debug; +}; + +static int +nfs_svc32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= __get_user(karg->ca_svc.svc_port, &arg32->ca32_svc.svc32_port); + err |= __get_user(karg->ca_svc.svc_nthreads, + &arg32->ca32_svc.svc32_nthreads); + return err; +} + +static int +nfs_clnt32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= copy_from_user(&karg->ca_client.cl_ident[0], + &arg32->ca32_client.cl32_ident[0], + NFSCLNT_IDMAX); + err |= __get_user(karg->ca_client.cl_naddr, + &arg32->ca32_client.cl32_naddr); + err |= copy_from_user(&karg->ca_client.cl_addrlist[0], + &arg32->ca32_client.cl32_addrlist[0], + (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); + err |= __get_user(karg->ca_client.cl_fhkeytype, + &arg32->ca32_client.cl32_fhkeytype); + err |= __get_user(karg->ca_client.cl_fhkeylen, + &arg32->ca32_client.cl32_fhkeylen); + err |= copy_from_user(&karg->ca_client.cl_fhkey[0], + &arg32->ca32_client.cl32_fhkey[0], + NFSCLNT_KEYMAX); + return err; +} + +static int +nfs_exp32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= copy_from_user(&karg->ca_export.ex_client[0], + &arg32->ca32_export.ex32_client[0], + NFSCLNT_IDMAX); + err |= copy_from_user(&karg->ca_export.ex_path[0], + &arg32->ca32_export.ex32_path[0], + NFS_MAXPATHLEN); + err |= __get_user(karg->ca_export.ex_dev, + &arg32->ca32_export.ex32_dev); + err |= __get_user(karg->ca_export.ex_ino, + &arg32->ca32_export.ex32_ino); + err |= __get_user(karg->ca_export.ex_flags, + &arg32->ca32_export.ex32_flags); + err |= __get_user(karg->ca_export.ex_anon_uid, + &arg32->ca32_export.ex32_anon_uid); + err |= __get_user(karg->ca_export.ex_anon_gid, + &arg32->ca32_export.ex32_anon_gid); + return err; +} + +static int +nfs_uud32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + u32 uaddr; + int i; + int err; + + memset(karg, 0, sizeof(*karg)); + if(__get_user(karg->ca_version, &arg32->ca32_version)) + return -EFAULT; + karg->ca_umap.ug_ident = (char *)get_free_page(GFP_USER); + if(!karg->ca_umap.ug_ident) + return -ENOMEM; + err = __get_user(uaddr, &arg32->ca32_umap.ug32_ident); + if(strncpy_from_user(karg->ca_umap.ug_ident, + (char *)A(uaddr), PAGE_SIZE) <= 0) + return -EFAULT; + err |= __get_user(karg->ca_umap.ug_uidbase, + &arg32->ca32_umap.ug32_uidbase); + err |= __get_user(karg->ca_umap.ug_uidlen, + &arg32->ca32_umap.ug32_uidlen); + err |= __get_user(uaddr, &arg32->ca32_umap.ug32_udimap); + if (err) + return -EFAULT; + karg->ca_umap.ug_udimap = kmalloc((sizeof(uid_t) * + karg->ca_umap.ug_uidlen), + GFP_USER); + if(!karg->ca_umap.ug_udimap) + return -ENOMEM; + for(i = 0; i < karg->ca_umap.ug_uidlen; i++) + err |= __get_user(karg->ca_umap.ug_udimap[i], + &(((__kernel_uid_t32 *)A(uaddr))[i])); + err |= __get_user(karg->ca_umap.ug_gidbase, + &arg32->ca32_umap.ug32_gidbase); + err |= __get_user(karg->ca_umap.ug_uidlen, + &arg32->ca32_umap.ug32_gidlen); + err |= __get_user(uaddr, &arg32->ca32_umap.ug32_gdimap); + if (err) + return -EFAULT; + karg->ca_umap.ug_gdimap = kmalloc((sizeof(gid_t) * + karg->ca_umap.ug_uidlen), + GFP_USER); + if(!karg->ca_umap.ug_gdimap) + return -ENOMEM; + for(i = 0; i < karg->ca_umap.ug_gidlen; i++) + err |= __get_user(karg->ca_umap.ug_gdimap[i], + &(((__kernel_gid_t32 *)A(uaddr))[i])); + + return err; +} + +static int +nfs_getfh32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) +{ + int err; + + err = __get_user(karg->ca_version, &arg32->ca32_version); + err |= copy_from_user(&karg->ca_getfh.gf_addr, + &arg32->ca32_getfh.gf32_addr, + (sizeof(struct sockaddr))); + err |= __get_user(karg->ca_getfh.gf_dev, + &arg32->ca32_getfh.gf32_dev); + err |= __get_user(karg->ca_getfh.gf_ino, + &arg32->ca32_getfh.gf32_ino); + err |= __get_user(karg->ca_getfh.gf_version, + &arg32->ca32_getfh.gf32_version); + return err; +} + +static int +nfs_getfh32_res_trans(union nfsctl_res *kres, union nfsctl_res32 *res32) +{ + int err; + + err = copy_to_user(&res32->cr32_getfh, + &kres->cr_getfh, + sizeof(res32->cr32_getfh)); + err |= __put_user(kres->cr_debug, &res32->cr32_debug); + return err; +} + +extern asmlinkage int sys_nfsservctl(int cmd, void *arg, void *resp); + +int asmlinkage +sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) +{ + struct nfsctl_arg *karg = NULL; + union nfsctl_res *kres = NULL; + mm_segment_t oldfs; + int err; + + karg = kmalloc(sizeof(*karg), GFP_USER); + if(!karg) + return -ENOMEM; + if(res32) { + kres = kmalloc(sizeof(*kres), GFP_USER); + if(!kres) { + kfree(karg); + return -ENOMEM; + } + } + switch(cmd) { + case NFSCTL_SVC: + err = nfs_svc32_trans(karg, arg32); + break; + case NFSCTL_ADDCLIENT: + err = nfs_clnt32_trans(karg, arg32); + break; + case NFSCTL_DELCLIENT: + err = nfs_clnt32_trans(karg, arg32); + break; + case NFSCTL_EXPORT: + err = nfs_exp32_trans(karg, arg32); + break; + /* This one is unimplemented, be we're ready for it. */ + case NFSCTL_UGIDUPDATE: + err = nfs_uud32_trans(karg, arg32); + break; + case NFSCTL_GETFH: + err = nfs_getfh32_trans(karg, arg32); + break; + default: + err = -EINVAL; + break; + } + if(err) + goto done; + oldfs = get_fs(); + set_fs(KERNEL_DS); + err = sys_nfsservctl(cmd, karg, kres); + set_fs(oldfs); + + if(!err && cmd == NFSCTL_GETFH) + err = nfs_getfh32_res_trans(kres, res32); + +done: + if(karg) { + if(cmd == NFSCTL_UGIDUPDATE) { + if(karg->ca_umap.ug_ident) + kfree(karg->ca_umap.ug_ident); + if(karg->ca_umap.ug_udimap) + kfree(karg->ca_umap.ug_udimap); + if(karg->ca_umap.ug_gdimap) + kfree(karg->ca_umap.ug_gdimap); + } + kfree(karg); + } + if(kres) + kfree(kres); + return err; +} + +asmlinkage int sys_utimes(char *, struct timeval *); + +asmlinkage int +sys32_utimes(char *filename, struct timeval32 *tvs) +{ + char *kfilename; + struct timeval ktvs[2]; + mm_segment_t old_fs; + int ret; + + kfilename = getname32(filename); + ret = PTR_ERR(kfilename); + if (!IS_ERR(kfilename)) { + if (tvs) { + if (get_tv32(&ktvs[0], tvs) || + get_tv32(&ktvs[1], 1+tvs)) + return -EFAULT; + } + + old_fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_utimes(kfilename, &ktvs[0]); + set_fs(old_fs); + + putname(kfilename); + } + return ret; +} + +/* These are here just in case some old ia32 binary calls it. */ +asmlinkage int +sys32_pause(void) +{ + current->state = TASK_INTERRUPTIBLE; + schedule(); + return -ERESTARTNOHAND; +} + +/* PCI config space poking. */ +extern asmlinkage int sys_pciconfig_read(unsigned long bus, + unsigned long dfn, + unsigned long off, + unsigned long len, + unsigned char *buf); + +extern asmlinkage int sys_pciconfig_write(unsigned long bus, + unsigned long dfn, + unsigned long off, + unsigned long len, + unsigned char *buf); + +asmlinkage int +sys32_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) +{ + return sys_pciconfig_read((unsigned long) bus, + (unsigned long) dfn, + (unsigned long) off, + (unsigned long) len, + (unsigned char *)AA(ubuf)); +} + +asmlinkage int +sys32_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf) +{ + return sys_pciconfig_write((unsigned long) bus, + (unsigned long) dfn, + (unsigned long) off, + (unsigned long) len, + (unsigned char *)AA(ubuf)); +} + +extern asmlinkage int sys_prctl(int option, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5); + +asmlinkage int +sys32_prctl(int option, u32 arg2, u32 arg3, u32 arg4, u32 arg5) +{ + return sys_prctl(option, + (unsigned long) arg2, + (unsigned long) arg3, + (unsigned long) arg4, + (unsigned long) arg5); +} + + +extern asmlinkage int sys_newuname(struct new_utsname * name); + +asmlinkage int +sys32_newuname(struct new_utsname * name) +{ + int ret = sys_newuname(name); + + if (current->personality == PER_LINUX32 && !ret) { + ret = copy_to_user(name->machine, "sparc\0\0", 8); + } + return ret; +} + +extern asmlinkage ssize_t sys_pread(unsigned int fd, char * buf, + size_t count, loff_t pos); + +extern asmlinkage ssize_t sys_pwrite(unsigned int fd, const char * buf, + size_t count, loff_t pos); + +typedef __kernel_ssize_t32 ssize_t32; + +asmlinkage ssize_t32 +sys32_pread(unsigned int fd, char *ubuf, __kernel_size_t32 count, + u32 poshi, u32 poslo) +{ + return sys_pread(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + +asmlinkage ssize_t32 +sys32_pwrite(unsigned int fd, char *ubuf, __kernel_size_t32 count, + u32 poshi, u32 poslo) +{ + return sys_pwrite(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + + +extern asmlinkage int sys_personality(unsigned long); + +asmlinkage int +sys32_personality(unsigned long personality) +{ + int ret; + lock_kernel(); + if (current->personality == PER_LINUX32 && personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + unlock_kernel(); + if (ret == PER_LINUX32) + ret = PER_LINUX; + return ret; +} + +extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset, + size_t count); + +asmlinkage int +sys32_sendfile(int out_fd, int in_fd, __kernel_off_t32 *offset, s32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + off_t of; + + if (offset && get_user(of, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count); + set_fs(old_fs); + + if (!ret && offset && put_user(of, offset)) + return -EFAULT; + + return ret; +} + +/* Handle adjtimex compatability. */ + +struct timex32 { + u32 modes; + s32 offset, freq, maxerror, esterror; + s32 status, constant, precision, tolerance; + struct timeval32 time; + s32 tick; + s32 ppsfreq, jitter, shift, stabil; + s32 jitcnt, calcnt, errcnt, stbcnt; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; + s32 :32; s32 :32; s32 :32; s32 :32; +}; + +extern int do_adjtimex(struct timex *); + +asmlinkage int +sys32_adjtimex(struct timex32 *utp) +{ + struct timex txc; + int ret; + + memset(&txc, 0, sizeof(struct timex)); + + if(get_user(txc.modes, &utp->modes) || + __get_user(txc.offset, &utp->offset) || + __get_user(txc.freq, &utp->freq) || + __get_user(txc.maxerror, &utp->maxerror) || + __get_user(txc.esterror, &utp->esterror) || + __get_user(txc.status, &utp->status) || + __get_user(txc.constant, &utp->constant) || + __get_user(txc.precision, &utp->precision) || + __get_user(txc.tolerance, &utp->tolerance) || + __get_user(txc.time.tv_sec, &utp->time.tv_sec) || + __get_user(txc.time.tv_usec, &utp->time.tv_usec) || + __get_user(txc.tick, &utp->tick) || + __get_user(txc.ppsfreq, &utp->ppsfreq) || + __get_user(txc.jitter, &utp->jitter) || + __get_user(txc.shift, &utp->shift) || + __get_user(txc.stabil, &utp->stabil) || + __get_user(txc.jitcnt, &utp->jitcnt) || + __get_user(txc.calcnt, &utp->calcnt) || + __get_user(txc.errcnt, &utp->errcnt) || + __get_user(txc.stbcnt, &utp->stbcnt)) + return -EFAULT; + + ret = do_adjtimex(&txc); + + if(put_user(txc.modes, &utp->modes) || + __put_user(txc.offset, &utp->offset) || + __put_user(txc.freq, &utp->freq) || + __put_user(txc.maxerror, &utp->maxerror) || + __put_user(txc.esterror, &utp->esterror) || + __put_user(txc.status, &utp->status) || + __put_user(txc.constant, &utp->constant) || + __put_user(txc.precision, &utp->precision) || + __put_user(txc.tolerance, &utp->tolerance) || + __put_user(txc.time.tv_sec, &utp->time.tv_sec) || + __put_user(txc.time.tv_usec, &utp->time.tv_usec) || + __put_user(txc.tick, &utp->tick) || + __put_user(txc.ppsfreq, &utp->ppsfreq) || + __put_user(txc.jitter, &utp->jitter) || + __put_user(txc.shift, &utp->shift) || + __put_user(txc.stabil, &utp->stabil) || + __put_user(txc.jitcnt, &utp->jitcnt) || + __put_user(txc.calcnt, &utp->calcnt) || + __put_user(txc.errcnt, &utp->errcnt) || + __put_user(txc.stbcnt, &utp->stbcnt)) + ret = -EFAULT; + + return ret; +} +#endif // NOTYET + diff --git a/arch/ia64/kdb/Makefile b/arch/ia64/kdb/Makefile new file mode 100644 index 000000000..0b29d6b35 --- /dev/null +++ b/arch/ia64/kdb/Makefile @@ -0,0 +1,21 @@ +# +# Makefile for ia64-specific kdb files.. +# +# Copyright 1999, Silicon Graphics Inc. +# +# Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. +# Code for IA64 written by Goutham Rao <goutham.rao@intel.com> and +# Sreenivas Subramoney <sreenivas.subramoney@intel.com> +# + +SUB_DIRS := +MOD_SUB_DIRS := $(SUB_DIRS) +ALL_SUB_DIRS := $(SUB_DIRS) + +.S.o: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $< -o $*.o + +L_TARGET = kdb.a +L_OBJS = kdbsupport.o kdb_io.o kdb_bt.o kdb_traps.o + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/kdb/kdb_bt.c b/arch/ia64/kdb/kdb_bt.c new file mode 100644 index 000000000..dbcb7a575 --- /dev/null +++ b/arch/ia64/kdb/kdb_bt.c @@ -0,0 +1,104 @@ +/** + * Minimalist Kernel Debugger + * Machine dependent stack traceback code for IA-64. + * + * Copyright (C) 1999 Goutham Rao <goutham.rao@intel.com> + * Copyright (C) 1999 Sreenivas Subramoney <sreenivas.subramoney@intel.com> + * Intel Corporation, August 1999. + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 99/12/03 D. Mosberger Reimplemented based on <asm-ia64/unwind.h> API. + * 99/12/06 D. Mosberger Added support for backtracing other processes. + */ + +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/kdb.h> +#include <asm/system.h> +#include <asm/current.h> +#include <asm/kdbsupport.h> + +/* + * Minimal stack back trace functionality. + */ +int +kdb_bt (int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + struct task_struct *task = current; + struct ia64_frame_info info; + char *name; + int diag; + + if (strcmp(argv[0], "btp") == 0) { + unsigned long pid; + + diag = kdbgetularg(argv[1], &pid); + if (diag) + return diag; + + task = find_task_by_pid(pid); + if (!task) { + kdb_printf("No process with pid == %d found\n", pid); + return 0; + } + regs = ia64_task_regs(task); + } else if (argc) { + kdb_printf("bt <address> is unsupported for IA-64\n"); + return 0; + } + + if (task == current) { + /* + * Upon entering kdb, the stack frame looks like this: + * + * +---------------------+ + * | struct pt_regs | + * +---------------------+ + * | | + * | kernel stack | + * | | + * +=====================+ <--- top of stack upon entering kdb + * | struct pt_regs | + * +---------------------+ + * | struct switch_stack | + * +---------------------+ + */ + if (user_mode(regs)) { + /* We are not implementing stack backtrace from user mode code */ + kdb_printf ("Not in Kernel\n"); + return 0; + } + ia64_unwind_init_from_current(&info, regs); + } else { + /* + * For a blocked task, the stack frame looks like this: + * + * +---------------------+ + * | struct pt_regs | + * +---------------------+ + * | | + * | kernel stack | + * | | + * +---------------------+ + * | struct switch_stack | + * +=====================+ <--- task->thread.ksp + */ + ia64_unwind_init_from_blocked_task(&info, task); + } + + kdb_printf("Ret Address Reg Stack base Name\n\n") ; + do { + unsigned long ip = ia64_unwind_get_ip(&info); + + name = kdbnearsym(ip); + if (!name) { + kdb_printf("Interrupt\n"); + return 0; + } + kdb_printf("0x%016lx: [0x%016lx] %s\n", ip, ia64_unwind_get_bsp(&info), name); + } while (ia64_unwind_to_previous_frame(&info) >= 0); + return 0; +} diff --git a/arch/ia64/kdb/kdb_io.c b/arch/ia64/kdb/kdb_io.c new file mode 100644 index 000000000..0b5c6fd44 --- /dev/null +++ b/arch/ia64/kdb/kdb_io.c @@ -0,0 +1,350 @@ +/* + * Kernel Debugger Console I/O handler + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Chuck Fleckenstein 1999/07/20 + * Move kdb_info struct declaration to this file + * for cases where serial support is not compiled into + * the kernel. + * + * Masahiro Adegawa 1999/07/20 + * Handle some peculiarities of japanese 86/106 + * keyboards. + * + * marc@mucom.co.il 1999/07/20 + * Catch buffer overflow for serial input. + * + * Scott Foehner + * Port to ia64 + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/console.h> +#include <linux/serial_reg.h> +#include <linux/spinlock.h> + +#include <asm/io.h> + +#include "pc_keyb.h" + +int kdb_port = 0; + +/* + * This module contains code to read characters from the keyboard or a serial + * port. + * + * It is used by the kernel debugger, and is polled, not interrupt driven. + * + */ + +/* + * send: Send a byte to the keyboard controller. Used primarily to + * alter LED settings. + */ + +static void +kdb_kbdsend(unsigned char byte) +{ + while (inb(KBD_STATUS_REG) & KBD_STAT_IBF) + ; + outb(KBD_DATA_REG, byte); +} + +static void +kdb_kbdsetled(int leds) +{ + kdb_kbdsend(KBD_CMD_SET_LEDS); + kdb_kbdsend((unsigned char)leds); +} + +static void +console_read (char *buffer, size_t bufsize) +{ + struct console *in; + struct console *out; + char *cp, ch; + + for (in = console_drivers; in; in = in->next) { + if ((in->flags & CON_ENABLED) && (in->read || in->wait_key)) + break; + } + for (out = console_drivers; out; out = out->next) { + if ((out->flags & CON_ENABLED) && out->write) + break; + } + + if ((!in->read && !in->wait_key) || !out->write) { + panic("kdb_io: can't do console i/o!"); + } + + if (in->read) { + /* this is untested... */ + (*in->read)(in, buffer, bufsize); + return; + } + + bufsize -= 2; /* leave room for CR & NUL terminator */ + cp = buffer; + while (1) { + ch = (*in->wait_key)(in); + switch (ch) { + case '\b': + if (cp > buffer) { + --cp, ++bufsize; + (*out->write)(out, "\b \b", 3); + } + break; + + case '\025': + while (cp > buffer) { + --cp, ++bufsize; + (*out->write)(out, "\b \b", 3); + } + break; + + case '\r': + case '\n': + (*out->write)(out, "\r\n", 2); + *cp++ = '\n'; + *cp++ = '\0'; + return; + + default: + if (bufsize > 0) { + (*out->write)(out, &ch, 1); + --bufsize; + *cp++ = ch; + } + break; + } + } +} + +char * +kdb_getscancode(char *buffer, size_t bufsize) +{ + /* + * XXX Shouldn't kdb _always_ use console based I/O? That's what the console + * abstraction is for, after all... ---davidm + */ +#ifdef CONFIG_IA64_HP_SIM + extern spinlock_t console_lock; + unsigned long flags; + + spin_lock_irqsave(&console_lock, flags); + console_read(buffer, bufsize); + spin_unlock_irqrestore(&console_lock, flags); + return buffer; +#else /* !CONFIG_IA64_HP_SIM */ + char *cp = buffer; + int scancode, scanstatus; + static int shift_lock = 0; /* CAPS LOCK state (0-off, 1-on) */ + static int shift_key = 0; /* Shift next keypress */ + static int ctrl_key = 0; + static int leds = 2; /* Num lock */ + u_short keychar; + extern u_short plain_map[], shift_map[], ctrl_map[]; + + bufsize -= 2; /* Reserve space for newline and null byte */ + + /* + * If we came in via a serial console, we allow that to + * be the input window for kdb. + */ + if (kdb_port != 0) { + char ch; + int status; +#define serial_inp(info, offset) inb((info) + (offset)) +#define serial_out(info, offset, v) outb((v), (info) + (offset)) + + while(1) { + while ((status = serial_inp(kdb_port, UART_LSR)) + & UART_LSR_DR) { +readchar: + ch = serial_inp(kdb_port, UART_RX); + if (ch == 8) { /* BS */ + if (cp > buffer) { + --cp, bufsize++; + printk("%c %c", 0x08, 0x08); + } + continue; + } + serial_out(kdb_port, UART_TX, ch); + if (ch == 13) { /* CR */ + *cp++ = '\n'; + *cp++ = '\0'; + serial_out(kdb_port, UART_TX, 10); + return(buffer); + } + /* + * Discard excess characters + */ + if (bufsize > 0) { + *cp++ = ch; + bufsize--; + } + } + while (((status = serial_inp(kdb_port, UART_LSR)) + & UART_LSR_DR) == 0); + } + } + + while (1) { + + /* + * Wait for a valid scancode + */ + + while ((inb(KBD_STATUS_REG) & KBD_STAT_OBF) == 0) + ; + + /* + * Fetch the scancode + */ + scancode = inb(KBD_DATA_REG); + scanstatus = inb(KBD_STATUS_REG); + + /* + * Ignore mouse events. + */ + if (scanstatus & KBD_STAT_MOUSE_OBF) + continue; + + /* + * Ignore release, trigger on make + * (except for shift keys, where we want to + * keep the shift state so long as the key is + * held down). + */ + + if (((scancode&0x7f) == 0x2a) + || ((scancode&0x7f) == 0x36)) { + /* + * Next key may use shift table + */ + if ((scancode & 0x80) == 0) { + shift_key=1; + } else { + shift_key=0; + } + continue; + } + + if ((scancode&0x7f) == 0x1d) { + /* + * Left ctrl key + */ + if ((scancode & 0x80) == 0) { + ctrl_key = 1; + } else { + ctrl_key = 0; + } + continue; + } + + if ((scancode & 0x80) != 0) + continue; + + scancode &= 0x7f; + + /* + * Translate scancode + */ + + if (scancode == 0x3a) { + /* + * Toggle caps lock + */ + shift_lock ^= 1; + leds ^= 0x4; /* toggle caps lock led */ + + kdb_kbdsetled(leds); + continue; + } + + if (scancode == 0x0e) { + /* + * Backspace + */ + if (cp > buffer) { + --cp, bufsize++; + + /* + * XXX - erase character on screen + */ + printk("%c %c", 0x08, 0x08); + } + continue; + } + + if (scancode == 0xe0) { + continue; + } + + /* + * For Japanese 86/106 keyboards + * See comment in drivers/char/pc_keyb.c. + * - Masahiro Adegawa + */ + if (scancode == 0x73) { + scancode = 0x59; + } else if (scancode == 0x7d) { + scancode = 0x7c; + } + + if (!shift_lock && !shift_key) { + keychar = plain_map[scancode]; + } else if (shift_lock || shift_key) { + keychar = shift_map[scancode]; + } else if (ctrl_key) { + keychar = ctrl_map[scancode]; + } else { + keychar = 0x0020; + printk("Unknown state/scancode (%d)\n", scancode); + } + + if ((scancode & 0x7f) == 0x1c) { + /* + * enter key. All done. + */ + printk("\n"); + break; + } + + /* + * echo the character. + */ + printk("%c", keychar&0xff); + + if (bufsize) { + --bufsize; + *cp++ = keychar&0xff; + } else { + printk("buffer overflow\n"); + break; + } + + } + + *cp++ = '\n'; /* White space for parser */ + *cp++ = '\0'; /* String termination */ + +#if defined(NOTNOW) + cp = buffer; + while (*cp) { + printk("char 0x%x\n", *cp++); + } +#endif + + return buffer; +#endif /* !CONFIG_IA64_HP_SIM */ +} + diff --git a/arch/ia64/kdb/kdb_traps.c b/arch/ia64/kdb/kdb_traps.c new file mode 100644 index 000000000..6358f7a30 --- /dev/null +++ b/arch/ia64/kdb/kdb_traps.c @@ -0,0 +1,55 @@ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/kdb.h> + +static struct kdb_bp_support { + unsigned long addr ; + int slot ; +} kdb_bp_info[NR_CPUS] ; + + +extern void kdb_bp_install (void); + +/* + * This gets invoked right before a call to ia64_fault(). + * Returns zero the normal fault handler should be invoked. + */ +long +ia64_kdb_fault_handler (unsigned long vector, unsigned long isr, unsigned long ifa, + unsigned long iim, unsigned long itir, unsigned long arg5, + unsigned long arg6, unsigned long arg7, unsigned long stack) +{ + struct switch_stack *sw = (struct switch_stack *) &stack; + struct pt_regs *regs = (struct pt_regs *) (sw + 1); + int bundle_slot; + + /* + * TBD + * If KDB is configured, enter KDB for any fault. + */ + if ((vector == 29) || (vector == 35) || (vector == 36)) { + if (!user_mode(regs)) { + bundle_slot = ia64_psr(regs)->ri; + if (vector == 29) { + if (bundle_slot == 0) { + kdb_bp_info[0].addr = regs->cr_iip; + kdb_bp_info[0].slot = bundle_slot; + kdb(KDB_REASON_FLTDBG, 0, regs); + } else { + if ((bundle_slot < 3) && + (kdb_bp_info[0].addr == regs->cr_iip)) + { + ia64_psr(regs)->id = 1; + ia64_psr(regs)->db = 1; + kdb_bp_install() ; + } else /* some error ?? */ + kdb(KDB_REASON_FLTDBG, 0, regs); + } + } else /* single step or taken branch */ + kdb(KDB_REASON_DEBUG, 0, regs); + return 1; + } + } + return 0; +} diff --git a/arch/ia64/kdb/kdbsupport.c b/arch/ia64/kdb/kdbsupport.c new file mode 100644 index 000000000..0b574ae6e --- /dev/null +++ b/arch/ia64/kdb/kdbsupport.c @@ -0,0 +1,1310 @@ +/* + * Minimalist Kernel Debugger + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Scott Lurndal (slurn@engr.sgi.com) + * Copyright (C) Scott Foehner (sfoehner@engr.sgi.com) + * Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) + * Copyright (C) David Mosberger-Tang <davidm@hpl.hp.com> + * + * Written March 1999 by Scott Lurndal at Silicon Graphics, Inc. + * + * Modifications from: + * Richard Bass 1999/07/20 + * Many bug fixes and enhancements. + * Scott Foehner + * Port to ia64 + * Srinivasa Thirumalachar + * RSE support for ia64 + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/kdb.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> + +#include <asm/uaccess.h> +#include <asm/kdbsupport.h> +#include <asm/rse.h> + +extern kdb_state_t kdb_state ; +k_machreg_t dbregs[KDB_DBREGS]; + +static int __init +kdb_setup (char *str) +{ + kdb_flags |= KDB_FLAG_EARLYKDB; + return 1; +} + +__setup("kdb", kdb_setup); + +static int +kdb_ia64_sir (int argc, const char **argv, const char **envp, struct pt_regs *regs) +{ + u64 lid, tpr, lrr0, lrr1, itv, pmv, cmcv; + + asm ("mov %0=cr.lid" : "=r"(lid)); + asm ("mov %0=cr.tpr" : "=r"(tpr)); + asm ("mov %0=cr.lrr0" : "=r"(lrr0)); + asm ("mov %0=cr.lrr1" : "=r"(lrr1)); + printk ("lid=0x%lx, tpr=0x%lx, lrr0=0x%lx, llr1=0x%lx\n", lid, tpr, lrr0, lrr1); + + asm ("mov %0=cr.itv" : "=r"(itv)); + asm ("mov %0=cr.pmv" : "=r"(pmv)); + asm ("mov %0=cr.cmcv" : "=r"(cmcv)); + printk ("itv=0x%lx, pmv=0x%lx, cmcv=0x%lx\n", itv, pmv, cmcv); + + printk ("irr=0x%016lx,0x%016lx,0x%016lx,0x%016lx\n", + ia64_get_irr0(), ia64_get_irr1(), ia64_get_irr2(), ia64_get_irr3()); + return 0; +} + +void __init +kdb_init (void) +{ + extern void kdb_inittab(void); + unsigned long reg; + + kdb_inittab(); + kdb_initbptab(); +#if 0 + kdb_disinit(); +#endif + kdb_printf("kdb version %d.%d by Scott Lurndal. "\ + "Copyright SGI, All Rights Reserved\n", + KDB_MAJOR_VERSION, KDB_MINOR_VERSION); + + /* Enable debug registers */ + __asm__ ("mov %0=psr":"=r"(reg)); + reg |= IA64_PSR_DB; + __asm__ ("mov psr.l=%0"::"r"(reg)); + ia64_srlz_d(); + + /* Init kdb state */ + kdb_state.bkpt_handling_state = BKPTSTATE_NOT_HANDLED ; + + kdb_register("irr", kdb_ia64_sir, "", "Show interrupt registers", 0); +} + +/* + * kdbprintf + * kdbgetword + * kdb_getstr + */ + +char * +kbd_getstr(char *buffer, size_t bufsize, char *prompt) +{ + extern char* kdb_getscancode(char *, size_t); + +#if defined(CONFIG_SMP) + kdb_printf(prompt, smp_processor_id()); +#else + kdb_printf("%s", prompt); +#endif + + return kdb_getscancode(buffer, bufsize); + +} + +int +kdb_printf(const char *fmt, ...) +{ + char buffer[256]; + va_list ap; + int diag; + int linecount; + + diag = kdbgetintenv("LINES", &linecount); + if (diag) + linecount = 22; + + va_start(ap, fmt); + vsprintf(buffer, fmt, ap); + va_end(ap); + + printk("%s", buffer); +#if 0 + if (strchr(buffer, '\n') != NULL) { + kdb_nextline++; + } + + if (kdb_nextline == linecount) { + char buf1[16]; + char buf2[32]; + extern char* kdb_getscancode(char *, size_t); + char *moreprompt; + + /* + * Pause until cr. + */ + moreprompt = kdbgetenv("MOREPROMPT"); + if (moreprompt == NULL) { + moreprompt = "more> "; + } + +#if defined(CONFIG_SMP) + if (strchr(moreprompt, '%')) { + sprintf(buf2, moreprompt, smp_processor_id()); + moreprompt = buf2; + } +#endif + + printk(moreprompt); + (void) kdb_getscancode(buf1, sizeof(buf1)); + + kdb_nextline = 1; + + if ((buf1[0] == 'q') + || (buf1[0] == 'Q')) { + kdb_longjmp(&kdbjmpbuf, 1); + } + } +#endif + return 0; +} + +unsigned long +kdbgetword(unsigned long addr, int width) +{ + /* + * This function checks the address for validity. Any address + * in the range PAGE_OFFSET to high_memory is legal, any address + * which maps to a vmalloc region is legal, and any address which + * is a user address, we use get_user() to verify validity. + */ + + if (addr < PAGE_OFFSET) { + /* + * Usermode address. + */ + unsigned long diag; + unsigned long ulval; + + switch (width) { + case 8: + { unsigned long *lp; + + lp = (unsigned long *) addr; + diag = get_user(ulval, lp); + break; + } + case 4: + { unsigned int *ip; + + ip = (unsigned int *) addr; + diag = get_user(ulval, ip); + break; + } + case 2: + { unsigned short *sp; + + sp = (unsigned short *) addr; + diag = get_user(ulval, sp); + break; + } + case 1: + { unsigned char *cp; + + cp = (unsigned char *) addr; + diag = get_user(ulval, cp); + break; + } + default: + printk("kdbgetword: Bad width\n"); + return 0L; + } + + if (diag) { + if ((kdb_flags & KDB_FLAG_SUPRESS) == 0) { + printk("kdb: Bad user address 0x%lx\n", addr); + kdb_flags |= KDB_FLAG_SUPRESS; + } + return 0L; + } + kdb_flags &= ~KDB_FLAG_SUPRESS; + return ulval; + } + + if (addr > (unsigned long)high_memory) { + extern int kdb_vmlist_check(unsigned long, unsigned long); + + if (!kdb_vmlist_check(addr, addr+width)) { + /* + * Would appear to be an illegal kernel address; + * Print a message once, and don't print again until + * a legal address is used. + */ + if ((kdb_flags & KDB_FLAG_SUPRESS) == 0) { + printk("kdb: Bad kernel address 0x%lx\n", addr); + kdb_flags |= KDB_FLAG_SUPRESS; + } + return 0L; + } + } + + /* + * A good address. Reset error flag. + */ + kdb_flags &= ~KDB_FLAG_SUPRESS; + + switch (width) { + case 8: + { unsigned long *lp; + + lp = (unsigned long *)(addr); + return *lp; + } + case 4: + { unsigned int *ip; + + ip = (unsigned int *)(addr); + return *ip; + } + case 2: + { unsigned short *sp; + + sp = (unsigned short *)(addr); + return *sp; + } + case 1: + { unsigned char *cp; + + cp = (unsigned char *)(addr); + return *cp; + } + } + + printk("kdbgetword: Bad width\n"); + return 0L; +} + +/* + * Start of breakpoint management routines + */ + +/* + * Arg: bp structure + */ + +int +kdb_allocdbreg(kdb_bp_t *bp) +{ + int i=0; + + /* For inst bkpt, just return. No hw reg alloc to be done. */ + + if (bp->bp_mode == BKPTMODE_INST) { + return i; + } else if (bp->bp_mode == BKPTMODE_DATAW) { + for(i=0; i<KDB_DBREGS; i++) { + if (dbregs[i] == 0xffffffff) { + dbregs[i] = 0; + return i; + } + } + } + + return -1; +} + +void +kdb_freedbreg(kdb_bp_t *bp) +{ + if (bp->bp_mode == BKPTMODE_DATAW) + dbregs[bp->bp_reg] = 0xffffffff; +} + +void +kdb_initdbregs(void) +{ + int i; + + for(i=0; i<KDB_DBREGS; i++) { + dbregs[i] = 0xffffffff; + } +} +int +kdbinstalltrap(int type, handler_t newh, handler_t *oldh) +{ + /* + * Usurp INTn. XXX - TBD. + */ + + return 0; +} + +int +install_instbkpt(kdb_bp_t *bp) +{ + unsigned long *addr = (unsigned long *)bp->bp_addr ; + bundle_t *bundle = (bundle_t *)bp->bp_longinst; + + /* save current bundle */ + *bundle = *(bundle_t *)addr ; + + /* Set the break point! */ + ((bundle_t *)addr)->lform.low8 = ( + (((bundle_t *)addr)->lform.low8 & ~INST_SLOT0_MASK) | + BREAK_INSTR); + + /* set flag */ + bp->bp_instvalid = 1 ; + + /* flush icache as it is stale now */ + ia64_flush_icache_page((unsigned long)addr) ; + +#ifdef KDB_DEBUG + kdb_printf ("[0x%016lx]: install 0x%016lx with 0x%016lx\n", + addr, bundle->lform.low8, addr[0]) ; +#endif + return 0 ; +} + +int +install_databkpt(kdb_bp_t *bp) +{ + unsigned long dbreg_addr = bp->bp_reg * 2; + unsigned long dbreg_cond = dbreg_addr + 1; + unsigned long value = 0x8fffffffffffffff; + unsigned long addr = (unsigned long)bp->bp_addr; + __asm__ ("mov dbr[%0]=%1"::"r"(dbreg_cond),"r"(value)); +// __asm__ ("movl %0,%%db0\n\t"::"r"(contents)); + __asm__ ("mov dbr[%0]=%1"::"r"(dbreg_addr),"r"(addr)); + ia64_insn_group_barrier(); + ia64_srlz_i(); + ia64_insn_group_barrier(); + +#ifdef KDB_DEBUG + kdb_printf("installed dbkpt at 0x%016lx\n", addr) ; +#endif + return 0; +} + +int +kdbinstalldbreg(kdb_bp_t *bp) +{ + if (bp->bp_mode == BKPTMODE_INST) { + return install_instbkpt(bp) ; + } else if (bp->bp_mode == BKPTMODE_DATAW) { + return install_databkpt(bp) ; + } + return 0; +} + +void +remove_instbkpt(kdb_bp_t *bp) +{ + unsigned long *addr = (unsigned long *)bp->bp_addr ; + bundle_t *bundle = (bundle_t *)bp->bp_longinst; + + if (!bp->bp_instvalid) + /* Nothing to remove. If we just alloced the bkpt + * but never resumed, the bp_inst will not be valid. */ + return ; + +#ifdef KDB_DEBUG + kdb_printf ("[0x%016lx]: remove 0x%016lx with 0x%016lx\n", + addr, addr[0], bundle->lform.low8) ; +#endif + + /* restore current bundle */ + *(bundle_t *)addr = *bundle ; + /* reset the flag */ + bp->bp_instvalid = 0 ; + ia64_flush_icache_page((unsigned long)addr) ; +} + +void +remove_databkpt(kdb_bp_t *bp) +{ + int regnum = bp->bp_reg ; + unsigned long dbreg_addr = regnum * 2; + unsigned long dbreg_cond = dbreg_addr + 1; + unsigned long value = 0x0fffffffffffffff; + __asm__ ("mov dbr[%0]=%1"::"r"(dbreg_cond),"r"(value)); +// __asm__ ("movl %0,%%db0\n\t"::"r"(contents)); + ia64_insn_group_barrier(); + ia64_srlz_i(); + ia64_insn_group_barrier(); + +#ifdef KDB_DEBUG + kdb_printf("removed dbkpt at 0x%016lx\n", bp->bp_addr) ; +#endif +} + +void +kdbremovedbreg(kdb_bp_t *bp) +{ + if (bp->bp_mode == BKPTMODE_INST) { + remove_instbkpt(bp) ; + } else if (bp->bp_mode == BKPTMODE_DATAW) { + remove_databkpt(bp) ; + } +} + +k_machreg_t +kdb_getdr6(void) +{ + return kdb_getdr(6); +} + +k_machreg_t +kdb_getdr7(void) +{ + return kdb_getdr(7); +} + +k_machreg_t +kdb_getdr(int regnum) +{ + k_machreg_t contents = 0; + unsigned long reg = (unsigned long)regnum; + + __asm__ ("mov %0=ibr[%1]"::"r"(contents),"r"(reg)); +// __asm__ ("mov ibr[%0]=%1"::"r"(dbreg_cond),"r"(value)); + + return contents; +} + + +k_machreg_t +kdb_getcr(int regnum) +{ + k_machreg_t contents = 0; + return contents; +} + +void +kdb_putdr6(k_machreg_t contents) +{ + kdb_putdr(6, contents); +} + +void +kdb_putdr7(k_machreg_t contents) +{ + kdb_putdr(7, contents); +} + +void +kdb_putdr(int regnum, k_machreg_t contents) +{ +} + +void +get_fault_regs(fault_regs_t *fr) +{ + fr->ifa = 0 ; + fr->isr = 0 ; + + __asm__ ("rsm psr.ic;;") ; + ia64_srlz_d(); + __asm__ ("mov %0=cr.ifa" : "=r"(fr->ifa)); + __asm__ ("mov %0=cr.isr" : "=r"(fr->isr)); + __asm__ ("ssm psr.ic;;") ; + ia64_srlz_d(); +} + +/* + * kdb_db_trap + * + * Perform breakpoint processing upon entry to the + * processor debugger fault. Determine and print + * the active breakpoint. + * + * Parameters: + * ef Exception frame containing machine register state + * reason Why did we enter kdb - fault or break + * Outputs: + * None. + * Returns: + * 0 Standard instruction or data breakpoint encountered + * 1 Single Step fault ('ss' command) + * 2 Single Step fault, caller should continue ('ssb' command) + * Locking: + * None. + * Remarks: + * Yup, there be goto's here. + */ + +int +kdb_db_trap(struct pt_regs *ef, int reason) +{ + int i, rv=0; + + /* Trying very hard to not change the interface to kdb. + * So, eventhough we have these values in the fault function + * it is not passed in but read again. + */ + fault_regs_t faultregs ; + + if (reason == KDB_REASON_FLTDBG) + get_fault_regs(&faultregs) ; + + /* NOTE : XXX: This has to be done only for data bkpts */ + /* Prevent it from continuously faulting */ + ef->cr_ipsr |= 0x0000002000000000; + + if (ef->cr_ipsr & 0x0000010000000000) { + /* single step */ + ef->cr_ipsr &= 0xfffffeffffffffff; + if ((kdb_state.bkpt_handling_state == BKPTSTATE_HANDLED) + && (kdb_state.cmd_given == CMDGIVEN_GO)) + ; + else + kdb_printf("SS trap at 0x%lx\n", ef->cr_iip + ia64_psr(ef)->ri); + rv = 1; + kdb_state.reason_for_entry = ENTRYREASON_SSTEP ; + goto handled; + } else + kdb_state.reason_for_entry = ENTRYREASON_GO ; + + /* + * Determine which breakpoint was encountered. + */ + for(i=0; i<KDB_MAXBPT; i++) { + if ((breakpoints[i].bp_enabled) + && ((breakpoints[i].bp_addr == ef->cr_iip) || + ((faultregs.ifa) && + (breakpoints[i].bp_addr == faultregs.ifa)))) { + /* + * Hit this breakpoint. Remove it while we are + * handling hit to avoid recursion. XXX ?? + */ + if (breakpoints[i].bp_addr == faultregs.ifa) + kdb_printf("Data breakpoint #%d for 0x%lx at 0x%lx\n", + i, breakpoints[i].bp_addr, ef->cr_iip + ia64_psr(ef)->ri); + else + kdb_printf("%s breakpoint #%d at 0x%lx\n", + rwtypes[0], + i, breakpoints[i].bp_addr); + + /* + * For an instruction breakpoint, disassemble + * the current instruction. + */ +#if 0 + if (rw == 0) { + kdb_id1(ef->eip); + } +#endif + + goto handled; + } + } + +#if 0 +unknown: +#endif + kdb_printf("Unknown breakpoint. Should forward. \n"); + /* Need a flag for this. The skip should be done XXX + * when a go or single step command is done for this session. + * For now it is here. + */ + ia64_increment_ip(ef) ; + return rv ; + +handled: + + /* We are here after handling a break inst/data bkpt */ + if (kdb_state.bkpt_handling_state == BKPTSTATE_NOT_HANDLED) { + kdb_state.bkpt_handling_state = BKPTSTATE_HANDLED ; + if (kdb_state.reason_for_entry == ENTRYREASON_GO) { + kdb_setsinglestep(ef) ; + kdb_state.kdb_action = ACTION_NOBPINSTALL; + /* We dont want bp install just this once */ + kdb_state.cmd_given = CMDGIVEN_UNKNOWN ; + } + } else if (kdb_state.bkpt_handling_state == BKPTSTATE_HANDLED) { + kdb_state.bkpt_handling_state = BKPTSTATE_NOT_HANDLED ; + if (kdb_state.reason_for_entry == ENTRYREASON_SSTEP) { + if (kdb_state.cmd_given == CMDGIVEN_GO) + kdb_state.kdb_action = ACTION_NOPROMPT ; + kdb_state.cmd_given = CMDGIVEN_UNKNOWN ; + } + } else + kdb_printf("Unknown value of bkpt state\n") ; + + return rv; + +} + +void +kdb_setsinglestep(struct pt_regs *regs) +{ + regs->cr_ipsr |= 0x0000010000000000; +#if 0 + regs->eflags |= EF_TF; +#endif +} + +/* + * Symbol table functions. + */ + +/* + * kdbgetsym + * + * Return the symbol table entry for the given symbol + * + * Parameters: + * symname Character string containing symbol name + * Outputs: + * Returns: + * NULL Symbol doesn't exist + * ksp Pointer to symbol table entry + * Locking: + * None. + * Remarks: + */ + +__ksymtab_t * +kdbgetsym(const char *symname) +{ + __ksymtab_t *ksp = __kdbsymtab; + int i; + + if (symname == NULL) + return NULL; + + for (i=0; i<__kdbsymtabsize; i++, ksp++) { + if (ksp->name && (strcmp(ksp->name, symname)==0)) { + return ksp; + } + } + + return NULL; +} + +/* + * kdbgetsymval + * + * Return the address of the given symbol. + * + * Parameters: + * symname Character string containing symbol name + * Outputs: + * Returns: + * 0 Symbol name is NULL + * addr Address corresponding to symname + * Locking: + * None. + * Remarks: + */ + +unsigned long +kdbgetsymval(const char *symname) +{ + __ksymtab_t *ksp = kdbgetsym(symname); + + return (ksp?ksp->value:0); +} + +/* + * kdbaddmodsym + * + * Add a symbol to the kernel debugger symbol table. Called when + * a new module is loaded into the kernel. + * + * Parameters: + * symname Character string containing symbol name + * value Value of symbol + * Outputs: + * Returns: + * 0 Successfully added to table. + * 1 Duplicate symbol + * 2 Symbol table full + * Locking: + * None. + * Remarks: + */ + +int +kdbaddmodsym(char *symname, unsigned long value) +{ + + /* + * Check for duplicate symbols. + */ + if (kdbgetsym(symname)) { + printk("kdb: Attempt to register duplicate symbol '%s' @ 0x%lx\n", + symname, value); + return 1; + } + + if (__kdbsymtabsize < __kdbmaxsymtabsize) { + __ksymtab_t *ksp = &__kdbsymtab[__kdbsymtabsize++]; + + ksp->name = symname; + ksp->value = value; + return 0; + } + + /* + * No room left in kernel symbol table. + */ + { + static int __kdbwarn = 0; + + if (__kdbwarn == 0) { + __kdbwarn++; + printk("kdb: Exceeded symbol table size. Increase CONFIG_KDB_SYMTAB_SIZE in kernel configuration\n"); + } + } + + return 2; +} + +/* + * kdbdelmodsym + * + * Add a symbol to the kernel debugger symbol table. Called when + * a new module is loaded into the kernel. + * + * Parameters: + * symname Character string containing symbol name + * value Value of symbol + * Outputs: + * Returns: + * 0 Successfully added to table. + * 1 Symbol not found + * Locking: + * None. + * Remarks: + */ + +int +kdbdelmodsym(const char *symname) +{ + __ksymtab_t *ksp, *endksp; + + if (symname == NULL) + return 1; + + /* + * Search for the symbol. If found, move + * all successive symbols down one position + * in the symbol table to avoid leaving holes. + */ + endksp = &__kdbsymtab[__kdbsymtabsize]; + for (ksp = __kdbsymtab; ksp < endksp; ksp++) { + if (ksp->name && (strcmp(ksp->name, symname) == 0)) { + endksp--; + for ( ; ksp < endksp; ksp++) { + *ksp = *(ksp + 1); + } + __kdbsymtabsize--; + return 0; + } + } + + return 1; +} + +/* + * kdbnearsym + * + * Return the name of the symbol with the nearest address + * less than 'addr'. + * + * Parameters: + * addr Address to check for symbol near + * Outputs: + * Returns: + * NULL No symbol with address less than 'addr' + * symbol Returns the actual name of the symbol. + * Locking: + * None. + * Remarks: + */ + +char * +kdbnearsym(unsigned long addr) +{ + __ksymtab_t *ksp = __kdbsymtab; + __ksymtab_t *kpp = NULL; + int i; + + for(i=0; i<__kdbsymtabsize; i++, ksp++) { + if (!ksp->name) + continue; + + if (addr == ksp->value) { + kpp = ksp; + break; + } + if (addr > ksp->value) { + if ((kpp == NULL) + || (ksp->value > kpp->value)) { + kpp = ksp; + } + } + } + + /* + * If more than 128k away, don't bother. + */ + if ((kpp == NULL) + || ((addr - kpp->value) > 0x20000)) { + return NULL; + } + + return kpp->name; +} + +/* + * kdbgetregcontents + * + * Return the contents of the register specified by the + * input string argument. Return an error if the string + * does not match a machine register. + * + * The following pseudo register names are supported: + * ®s - Prints address of exception frame + * kesp - Prints kernel stack pointer at time of fault + * sstk - Prints switch stack for ia64 + * %<regname> - Uses the value of the registers at the + * last time the user process entered kernel + * mode, instead of the registers at the time + * kdb was entered. + * + * Parameters: + * regname Pointer to string naming register + * regs Pointer to structure containing registers. + * Outputs: + * *contents Pointer to unsigned long to recieve register contents + * Returns: + * 0 Success + * KDB_BADREG Invalid register name + * Locking: + * None. + * Remarks: + * + * Note that this function is really machine independent. The kdb + * register list is not, however. + */ + +static struct kdbregs { + char *reg_name; + size_t reg_offset; +} kdbreglist[] = { + { " psr", offsetof(struct pt_regs, cr_ipsr) }, + { " ifs", offsetof(struct pt_regs, cr_ifs) }, + { " ip", offsetof(struct pt_regs, cr_iip) }, + + { "unat", offsetof(struct pt_regs, ar_unat) }, + { " pfs", offsetof(struct pt_regs, ar_pfs) }, + { " rsc", offsetof(struct pt_regs, ar_rsc) }, + + { "rnat", offsetof(struct pt_regs, ar_rnat) }, + { "bsps", offsetof(struct pt_regs, ar_bspstore) }, + { " pr", offsetof(struct pt_regs, pr) }, + + { "ldrs", offsetof(struct pt_regs, loadrs) }, + { " ccv", offsetof(struct pt_regs, ar_ccv) }, + { "fpsr", offsetof(struct pt_regs, ar_fpsr) }, + + { " b0", offsetof(struct pt_regs, b0) }, + { " b6", offsetof(struct pt_regs, b6) }, + { " b7", offsetof(struct pt_regs, b7) }, + + { " r1",offsetof(struct pt_regs, r1) }, + { " r2",offsetof(struct pt_regs, r2) }, + { " r3",offsetof(struct pt_regs, r3) }, + + { " r8",offsetof(struct pt_regs, r8) }, + { " r9",offsetof(struct pt_regs, r9) }, + { " r10",offsetof(struct pt_regs, r10) }, + + { " r11",offsetof(struct pt_regs, r11) }, + { " r12",offsetof(struct pt_regs, r12) }, + { " r13",offsetof(struct pt_regs, r13) }, + + { " r14",offsetof(struct pt_regs, r14) }, + { " r15",offsetof(struct pt_regs, r15) }, + { " r16",offsetof(struct pt_regs, r16) }, + + { " r17",offsetof(struct pt_regs, r17) }, + { " r18",offsetof(struct pt_regs, r18) }, + { " r19",offsetof(struct pt_regs, r19) }, + + { " r20",offsetof(struct pt_regs, r20) }, + { " r21",offsetof(struct pt_regs, r21) }, + { " r22",offsetof(struct pt_regs, r22) }, + + { " r23",offsetof(struct pt_regs, r23) }, + { " r24",offsetof(struct pt_regs, r24) }, + { " r25",offsetof(struct pt_regs, r25) }, + + { " r26",offsetof(struct pt_regs, r26) }, + { " r27",offsetof(struct pt_regs, r27) }, + { " r28",offsetof(struct pt_regs, r28) }, + + { " r29",offsetof(struct pt_regs, r29) }, + { " r30",offsetof(struct pt_regs, r30) }, + { " r31",offsetof(struct pt_regs, r31) }, + +}; + +static const int nkdbreglist = sizeof(kdbreglist) / sizeof(struct kdbregs); + +int +kdbgetregcontents(const char *regname, + struct pt_regs *regs, + unsigned long *contents) +{ + int i; + + if (strcmp(regname, "®s") == 0) { + *contents = (unsigned long)regs; + return 0; + } + + if (strcmp(regname, "sstk") == 0) { + *contents = (unsigned long)getprsregs(regs) ; + return 0; + } + + if (strcmp(regname, "isr") == 0) { + fault_regs_t fr ; + get_fault_regs(&fr) ; + *contents = fr.isr ; + return 0 ; + } + +#if 0 + /* XXX need to verify this */ + if (strcmp(regname, "kesp") == 0) { + *contents = (unsigned long)regs + sizeof(struct pt_regs); + return 0; + } + + if (regname[0] == '%') { + /* User registers: %%e[a-c]x, etc */ + regname++; + regs = (struct pt_regs *) + (current->thread.ksp - sizeof(struct pt_regs)); + } +#endif + + for (i=0; i<nkdbreglist; i++) { + if (strstr(kdbreglist[i].reg_name, regname)) + break; + } + + if (i == nkdbreglist) { + /* Lets check the rse maybe */ + if (regname[0] == 'r') + if (show_cur_stack_frame(regs, simple_strtoul(regname+1, 0, 0) - 31, + contents)) + return 0 ; + return KDB_BADREG; + } + + *contents = *(unsigned long *)((unsigned long)regs + + kdbreglist[i].reg_offset); + + return 0; +} + +/* + * kdbsetregcontents + * + * Set the contents of the register specified by the + * input string argument. Return an error if the string + * does not match a machine register. + * + * Supports modification of user-mode registers via + * %<register-name> + * + * Parameters: + * regname Pointer to string naming register + * regs Pointer to structure containing registers. + * contents Unsigned long containing new register contents + * Outputs: + * Returns: + * 0 Success + * KDB_BADREG Invalid register name + * Locking: + * None. + * Remarks: + */ + +int +kdbsetregcontents(const char *regname, + struct pt_regs *regs, + unsigned long contents) +{ + int i; + + if (regname[0] == '%') { + regname++; + regs = (struct pt_regs *) + (current->thread.ksp - sizeof(struct pt_regs)); + } + + for (i=0; i<nkdbreglist; i++) { + if (strnicmp(kdbreglist[i].reg_name, + regname, + strlen(regname)) == 0) + break; + } + + if ((i == nkdbreglist) + || (strlen(kdbreglist[i].reg_name) != strlen(regname))) { + return KDB_BADREG; + } + + *(unsigned long *)((unsigned long)regs + kdbreglist[i].reg_offset) = + contents; + + return 0; +} + +/* + * kdbdumpregs + * + * Dump the specified register set to the display. + * + * Parameters: + * regs Pointer to structure containing registers. + * type Character string identifying register set to dump + * extra string further identifying register (optional) + * Outputs: + * Returns: + * 0 Success + * Locking: + * None. + * Remarks: + * This function will dump the general register set if the type + * argument is NULL (struct pt_regs). The alternate register + * set types supported by this function: + * + * d Debug registers + * c Control registers + * u User registers at most recent entry to kernel + * Following not yet implemented: + * m Model Specific Registers (extra defines register #) + * r Memory Type Range Registers (extra defines register) + * + * For now, all registers are covered as follows: + * + * rd - dumps all regs + * rd %isr - current interrupt status reg, read freshly + * rd s - valid stacked regs + * rd %sstk - gets switch stack addr. dump memory and search + * rd d - debug regs, may not be too useful + * + * ARs TB Done + * Interrupt regs TB Done ?? + * OTHERS TB Decided ?? + * + * Intel wish list + * These will be implemented later - Srinivasa + * + * type action + * ---- ------ + * g dump all General static registers + * s dump all general Stacked registers + * f dump all Floating Point registers + * p dump all Predicate registers + * b dump all Branch registers + * a dump all Application registers + * c dump all Control registers + * + */ + +int +kdbdumpregs(struct pt_regs *regs, + const char *type, + const char *extra) + +{ + int i; + int count = 0; + + if (type + && (type[0] == 'u')) { + type = NULL; + regs = (struct pt_regs *) + (current->thread.ksp - sizeof(struct pt_regs)); + } + + if (type == NULL) { + for (i=0; i<nkdbreglist; i++) { + kdb_printf("%s: 0x%16.16lx ", + kdbreglist[i].reg_name, + *(unsigned long *)((unsigned long)regs + + kdbreglist[i].reg_offset)); + + if ((++count % 3) == 0) + kdb_printf("\n"); + } + + kdb_printf("®s = 0x%16.16lx\n", regs); + + return 0; + } + + switch (type[0]) { + case 'd': + { + for(i=0; i<8; i+=2) { + kdb_printf("idr%d: 0x%16.16lx idr%d: 0x%16.16lx\n", i, + kdb_getdr(i), i+1, kdb_getdr(i+1)); + + } + return 0; + } +#if 0 + case 'c': + { + unsigned long cr[5]; + + for (i=0; i<5; i++) { + cr[i] = kdb_getcr(i); + } + kdb_printf("cr0 = 0x%8.8x cr1 = 0x%8.8x cr2 = 0x%8.8x cr3 = 0x%8.8x\ncr4 = 0x%8.8x\n", + cr[0], cr[1], cr[2], cr[3], cr[4]); + return 0; + } +#endif + case 'm': + break; + case 'r': + break; + + case 's': + { + show_cur_stack_frame(regs, 0, NULL) ; + + return 0 ; + } + + case '%': + { + unsigned long contents ; + + if (!kdbgetregcontents(type+1, regs, &contents)) + kdb_printf("%s = 0x%16.16lx\n", type+1, contents) ; + else + kdb_printf("diag: Invalid register %s\n", type+1) ; + + return 0 ; + } + + default: + return KDB_BADREG; + } + + /* NOTREACHED */ + return 0; +} + +k_machreg_t +kdb_getpc(struct pt_regs *regs) +{ + return regs->cr_iip + ia64_psr(regs)->ri; +} + +int +kdb_setpc(struct pt_regs *regs, k_machreg_t newpc) +{ + regs->cr_iip = newpc & ~0xf; + ia64_psr(regs)->ri = newpc & 0x3; + return 0; +} + +void +kdb_disableint(kdbintstate_t *state) +{ + int *fp = (int *)state; + int flags; + + __save_flags(flags); + __cli(); + + *fp = flags; +} + +void +kdb_restoreint(kdbintstate_t *state) +{ + int flags = *(int *)state; + __restore_flags(flags); +} + +int +kdb_putword(unsigned long addr, unsigned long contents) +{ + *(unsigned long *)addr = contents; + return 0; +} + +int +kdb_getcurrentframe(struct pt_regs *regs) +{ +#if 0 + regs->xcs = 0; +#if defined(CONFIG_KDB_FRAMEPTR) + asm volatile("movl %%ebp,%0":"=m" (*(int *)®s->ebp)); +#endif + asm volatile("movl %%esp,%0":"=m" (*(int *)®s->esp)); +#endif + return 0; +} + +unsigned long +show_cur_stack_frame(struct pt_regs *regs, int regno, unsigned long *contents) +{ + long sof = regs->cr_ifs & ((1<<7)-1) ; /* size of frame */ + unsigned long i ; + int j; + struct switch_stack *prs_regs = getprsregs(regs) ; + unsigned long *sofptr = (prs_regs? ia64_rse_skip_regs( + (unsigned long *)prs_regs->ar_bspstore, -sof) : NULL) ; + + if (!sofptr) { + printk("Unable to display Current Stack Frame\n") ; + return 0 ; + } + + if (regno < 0) + return 0 ; + + for (i=sof, j=0;i;i--,j++) { + /* remember to skip the nat collection dword */ + if ((((unsigned long)sofptr>>3) & (((1<<6)-1))) + == ((1<<6)-1)) + sofptr++ ; + + /* return the value in the reg if regno is non zero */ + + if (regno) { + if ((j+1) == regno) { + if (contents) + *contents = *sofptr ; + return -1; + } + sofptr++ ; + } else { + printk(" r%d: %016lx ", 32+j, *sofptr++) ; + if (!((j+1)%3)) printk("\n") ; + } + } + + if (regno) { + if (!i) /* bogus rse number */ + return 0 ; + } else + printk("\n") ; + + return 0 ; +} diff --git a/arch/ia64/kdb/pc_keyb.h b/arch/ia64/kdb/pc_keyb.h new file mode 100644 index 000000000..3d4831a80 --- /dev/null +++ b/arch/ia64/kdb/pc_keyb.h @@ -0,0 +1,127 @@ +/* + * linux/drivers/char/pc_keyb.h + * + * PC Keyboard And Keyboard Controller + * + * (c) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + */ + +/* + * Configuration Switches + */ + +#undef KBD_REPORT_ERR /* Report keyboard errors */ +#define KBD_REPORT_UNKN /* Report unknown scan codes */ +#define KBD_REPORT_TIMEOUTS /* Report keyboard timeouts */ +#undef KBD_IS_FOCUS_9000 /* We have the brain-damaged FOCUS-9000 keyboard */ +#undef INITIALIZE_MOUSE /* Define if your PS/2 mouse needs initialization. */ + + + +#define KBD_INIT_TIMEOUT 1000 /* Timeout in ms for initializing the keyboard */ +#define KBC_TIMEOUT 250 /* Timeout in ms for sending to keyboard controller */ +#define KBD_TIMEOUT 1000 /* Timeout in ms for keyboard command acknowledge */ + +/* + * Internal variables of the driver + */ + +extern unsigned char pckbd_read_mask; +extern unsigned char aux_device_present; + +/* + * Keyboard Controller Registers + */ + +#define KBD_STATUS_REG 0x64 /* Status register (R) */ +#define KBD_CNTL_REG 0x64 /* Controller command register (W) */ +#define KBD_DATA_REG 0x60 /* Keyboard data register (R/W) */ + +/* + * Keyboard Controller Commands + */ + +#define KBD_CCMD_READ_MODE 0x20 /* Read mode bits */ +#define KBD_CCMD_WRITE_MODE 0x60 /* Write mode bits */ +#define KBD_CCMD_GET_VERSION 0xA1 /* Get controller version */ +#define KBD_CCMD_MOUSE_DISABLE 0xA7 /* Disable mouse interface */ +#define KBD_CCMD_MOUSE_ENABLE 0xA8 /* Enable mouse interface */ +#define KBD_CCMD_TEST_MOUSE 0xA9 /* Mouse interface test */ +#define KBD_CCMD_SELF_TEST 0xAA /* Controller self test */ +#define KBD_CCMD_KBD_TEST 0xAB /* Keyboard interface test */ +#define KBD_CCMD_KBD_DISABLE 0xAD /* Keyboard interface disable */ +#define KBD_CCMD_KBD_ENABLE 0xAE /* Keyboard interface enable */ +#define KBD_CCMD_WRITE_AUX_OBUF 0xD3 /* Write to output buffer as if + initiated by the auxiliary device */ +#define KBD_CCMD_WRITE_MOUSE 0xD4 /* Write the following byte to the mouse */ + +/* + * Keyboard Commands + */ + +#define KBD_CMD_SET_LEDS 0xED /* Set keyboard leds */ +#define KBD_CMD_SET_RATE 0xF3 /* Set typematic rate */ +#define KBD_CMD_ENABLE 0xF4 /* Enable scanning */ +#define KBD_CMD_DISABLE 0xF5 /* Disable scanning */ +#define KBD_CMD_RESET 0xFF /* Reset */ + +/* + * Keyboard Replies + */ + +#define KBD_REPLY_POR 0xAA /* Power on reset */ +#define KBD_REPLY_ACK 0xFA /* Command ACK */ +#define KBD_REPLY_RESEND 0xFE /* Command NACK, send the cmd again */ + +/* + * Status Register Bits + */ + +#define KBD_STAT_OBF 0x01 /* Keyboard output buffer full */ +#define KBD_STAT_IBF 0x02 /* Keyboard input buffer full */ +#define KBD_STAT_SELFTEST 0x04 /* Self test successful */ +#define KBD_STAT_CMD 0x08 /* Last write was a command write (0=data) */ +#define KBD_STAT_UNLOCKED 0x10 /* Zero if keyboard locked */ +#define KBD_STAT_MOUSE_OBF 0x20 /* Mouse output buffer full */ +#define KBD_STAT_GTO 0x40 /* General receive/xmit timeout */ +#define KBD_STAT_PERR 0x80 /* Parity error */ + +#define AUX_STAT_OBF (KBD_STAT_OBF | KBD_STAT_MOUSE_OBF) + +/* + * Controller Mode Register Bits + */ + +#define KBD_MODE_KBD_INT 0x01 /* Keyboard data generate IRQ1 */ +#define KBD_MODE_MOUSE_INT 0x02 /* Mouse data generate IRQ12 */ +#define KBD_MODE_SYS 0x04 /* The system flag (?) */ +#define KBD_MODE_NO_KEYLOCK 0x08 /* The keylock doesn't affect the keyboard if set */ +#define KBD_MODE_DISABLE_KBD 0x10 /* Disable keyboard interface */ +#define KBD_MODE_DISABLE_MOUSE 0x20 /* Disable mouse interface */ +#define KBD_MODE_KCC 0x40 /* Scan code conversion to PC format */ +#define KBD_MODE_RFU 0x80 + +/* + * Mouse Commands + */ + +#define AUX_SET_RES 0xE8 /* Set resolution */ +#define AUX_SET_SCALE11 0xE6 /* Set 1:1 scaling */ +#define AUX_SET_SCALE21 0xE7 /* Set 2:1 scaling */ +#define AUX_GET_SCALE 0xE9 /* Get scaling factor */ +#define AUX_SET_STREAM 0xEA /* Set stream mode */ +#define AUX_SET_SAMPLE 0xF3 /* Set sample rate */ +#define AUX_ENABLE_DEV 0xF4 /* Enable aux device */ +#define AUX_DISABLE_DEV 0xF5 /* Disable aux device */ +#define AUX_RESET 0xFF /* Reset aux device */ + +#define AUX_BUF_SIZE 2048 + +struct aux_queue { + unsigned long head; + unsigned long tail; + struct wait_queue *proc_list; + struct fasync_struct *fasync; + unsigned char buf[AUX_BUF_SIZE]; +}; + diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile new file mode 100644 index 000000000..7cb47da72 --- /dev/null +++ b/arch/ia64/kernel/Makefile @@ -0,0 +1,42 @@ +# +# Makefile for the linux kernel. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.S.s: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $< +.S.o: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $< + +all: kernel.o head.o init_task.o + +O_TARGET := kernel.o +O_OBJS := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_default.o irq_internal.o ivt.o \ + pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o sal_stub.o semaphore.o setup.o signal.o \ + sys_ia64.o traps.o time.o unaligned.o unwind.o +#O_OBJS := fpreg.o +#OX_OBJS := ia64_ksyms.o + +ifeq ($(CONFIG_IA64_GENERIC),y) +O_OBJS += machvec.o +endif + +ifdef CONFIG_PCI +O_OBJS += pci.o +endif + +ifdef CONFIG_SMP +O_OBJS += smp.o irq_lock.o +endif + +ifeq ($(CONFIG_MCA),y) +O_OBJS += mca.o mca_asm.o +endif + +clean:: + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c new file mode 100644 index 000000000..e289efab6 --- /dev/null +++ b/arch/ia64/kernel/acpi.c @@ -0,0 +1,308 @@ +/* + * Advanced Configuration and Power Interface + * + * Based on 'ACPI Specification 1.0b' February 2, 1999 and + * 'IA-64 Extensions to ACPI Specification' Revision 0.6 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> + */ + +#include <linux/config.h> + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <linux/types.h> + +#include <asm/acpi-ext.h> +#include <asm/page.h> +#include <asm/efi.h> +#include <asm/io.h> +#include <asm/iosapic.h> +#include <asm/irq.h> + +#undef ACPI_DEBUG /* Guess what this does? */ + +#ifdef CONFIG_SMP +extern unsigned long ipi_base_addr; +#endif + +/* These are ugly but will be reclaimed by the kernel */ +int __initdata acpi_cpus = 0; +int __initdata acpi_apic_map[32]; +int __initdata cpu_cnt = 0; + +void (*pm_idle) (void); + +/* + * Identify usable CPU's and remember them for SMP bringup later. + */ +static void __init +acpi_lsapic(char *p) +{ + int add = 1; + + acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p; + + if ((lsapic->flags & LSAPIC_PRESENT) == 0) + return; + + printk(" CPU %d (%.04x:%.04x): ", cpu_cnt, lsapic->eid, lsapic->id); + + if ((lsapic->flags & LSAPIC_ENABLED) == 0) { + printk("Disabled.\n"); + add = 0; + } else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) { + printk("Performance Restricted; ignoring.\n"); + add = 0; + } + + if (add) { + printk("Available.\n"); + acpi_cpus++; + acpi_apic_map[cpu_cnt] = (lsapic->id << 8) | lsapic->eid; + } + + cpu_cnt++; +} + +/* + * Find all IOSAPICs and tag the iosapic_vector structure with the appropriate + * base addresses. + */ +static void __init +acpi_iosapic(char *p) +{ + /* + * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet + * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be + * a means for platform_setup() to register ACPI handlers? + */ +#ifdef CONFIG_IA64_DIG + acpi_entry_iosapic_t *iosapic = (acpi_entry_iosapic_t *) p; + unsigned int ver; + int l, v, pins; + + ver = iosapic_version(iosapic->address); + pins = (ver >> 16) & 0xff; + + printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", + (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, + iosapic->irq_base, iosapic->irq_base + pins); + + for (l = 0; l < pins; l++) { + v = map_legacy_irq(iosapic->irq_base + l); + if (v > IA64_MAX_VECTORED_IRQ) { + printk(" !!! IRQ %d > 255\n", v); + continue; + } + /* XXX Check for IOSAPIC collisions */ + iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0); + iosapic_baseirq(v) = iosapic->irq_base; + } + iosapic_init(iosapic->address); +#endif +} + + +/* + * Configure legacy IRQ information in iosapic_vector + */ +static void __init +acpi_legacy_irq(char *p) +{ + /* + * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet + * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be + * a means for platform_setup() to register ACPI handlers? + */ +#ifdef CONFIG_IA64_IRQ_ACPI + acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p; + unsigned char vector; + int i; + + vector = map_legacy_irq(legacy->isa_irq); + + /* + * Clobber any old pin mapping. It may be that it gets replaced later on + */ + for (i = 0; i < IA64_MAX_VECTORED_IRQ; i++) { + if (i == vector) + continue; + if (iosapic_pin(i) == iosapic_pin(vector)) + iosapic_pin(i) = 0xff; + } + + iosapic_pin(vector) = legacy->pin; + iosapic_bus(vector) = BUS_ISA; /* This table only overrides the ISA devices */ + iosapic_busdata(vector) = 0; + + /* + * External timer tick is special... + */ + if (vector != TIMER_IRQ) + iosapic_dmode(vector) = IO_SAPIC_LOWEST_PRIORITY; + else + iosapic_dmode(vector) = IO_SAPIC_FIXED; + + /* See MPS 1.4 section 4.3.4 */ + switch (legacy->flags) { + case 0x5: + iosapic_polarity(vector) = IO_SAPIC_POL_HIGH; + iosapic_trigger(vector) = IO_SAPIC_EDGE; + break; + case 0x8: + iosapic_polarity(vector) = IO_SAPIC_POL_LOW; + iosapic_trigger(vector) = IO_SAPIC_EDGE; + break; + case 0xd: + iosapic_polarity(vector) = IO_SAPIC_POL_HIGH; + iosapic_trigger(vector) = IO_SAPIC_LEVEL; + break; + case 0xf: + iosapic_polarity(vector) = IO_SAPIC_POL_LOW; + iosapic_trigger(vector) = IO_SAPIC_LEVEL; + break; + default: + printk(" ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq, + legacy->flags); + break; + } + +#ifdef ACPI_DEBUG + printk("Legacy ISA IRQ %x -> IA64 Vector %x IOSAPIC Pin %x Active %s %s Trigger\n", + legacy->isa_irq, vector, iosapic_pin(vector), + ((iosapic_polarity(vector) == IO_SAPIC_POL_LOW) ? "Low" : "High"), + ((iosapic_trigger(vector) == IO_SAPIC_LEVEL) ? "Level" : "Edge")); +#endif /* ACPI_DEBUG */ + +#endif /* CONFIG_IA64_IRQ_ACPI */ +} + +/* + * Info on platform interrupt sources: NMI. PMI, INIT, etc. + */ +static void __init +acpi_platform(char *p) +{ + acpi_entry_platform_src_t *plat = (acpi_entry_platform_src_t *) p; + + printk("PLATFORM: IOSAPIC %x -> Vector %lx on CPU %.04u:%.04u\n", + plat->iosapic_vector, plat->global_vector, plat->eid, plat->id); +} + +/* + * Parse the ACPI Multiple SAPIC Table + */ +static void __init +acpi_parse_msapic(acpi_sapic_t *msapic) +{ + char *p, *end; + + memset(&acpi_apic_map, -1, sizeof(acpi_apic_map)); + +#ifdef CONFIG_SMP + /* Base address of IPI Message Block */ + ipi_base_addr = ioremap(msapic->interrupt_block, 0); +#endif + + p = (char *) (msapic + 1); + end = p + (msapic->header.length - sizeof(acpi_sapic_t)); + + while (p < end) { + + switch (*p) { + case ACPI_ENTRY_LOCAL_SAPIC: + acpi_lsapic(p); + break; + + case ACPI_ENTRY_IO_SAPIC: + acpi_iosapic(p); + break; + + case ACPI_ENTRY_INT_SRC_OVERRIDE: + acpi_legacy_irq(p); + break; + + case ACPI_ENTRY_PLATFORM_INT_SOURCE: + acpi_platform(p); + break; + + default: + break; + } + + /* Move to next table entry. */ + p += *(p + 1); + } + + /* Make bootup pretty */ + printk(" %d CPUs available, %d CPUs total\n", acpi_cpus, cpu_cnt); +} + +int __init +acpi_parse(acpi_rsdp_t *rsdp) +{ + acpi_rsdt_t *rsdt; + acpi_desc_table_hdr_t *hdrp; + long tables, i; + + if (!rsdp) { + printk("Uh-oh, no ACPI Root System Description Pointer table!\n"); + return 0; + } + + if (strncmp(rsdp->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) { + printk("Uh-oh, ACPI RSDP signature incorrect!\n"); + return 0; + } + + rsdp->rsdt = __va(rsdp->rsdt); + rsdt = rsdp->rsdt; + if (strncmp(rsdt->header.signature, ACPI_RSDT_SIG, ACPI_RSDT_SIG_LEN)) { + printk("Uh-oh, ACPI RDST signature incorrect!\n"); + return 0; + } + + printk("ACPI: %.6s %.8s %d.%d\n", rsdt->header.oem_id, rsdt->header.oem_table_id, + rsdt->header.oem_revision >> 16, rsdt->header.oem_revision & 0xffff); + + tables = (rsdt->header.length - sizeof(acpi_desc_table_hdr_t)) / 8; + for (i = 0; i < tables; i++) { + hdrp = (acpi_desc_table_hdr_t *) __va(rsdt->entry_ptrs[i]); + + /* Only interested int the MSAPIC table for now ... */ + if (strncmp(hdrp->signature, ACPI_SAPIC_SIG, ACPI_SAPIC_SIG_LEN) != 0) + continue; + + acpi_parse_msapic((acpi_sapic_t *) hdrp); + } /* while() */ + + if (acpi_cpus == 0) { + printk("ACPI: Found 0 CPUS; assuming 1\n"); + acpi_cpus = 1; /* We've got at least one of these, no? */ + } + return 1; +} + +const char * +acpi_get_sysname (void) +{ + /* the following should go away once we have an ACPI parser: */ +#ifdef CONFIG_IA64_GENERIC + return "hpsim"; +#else +# if defined (CONFIG_IA64_HP_SIM) + return "hpsim"; +# elif defined (CONFIG_IA64_SGI_SN1_SIM) + return "sn1"; +# elif defined (CONFIG_IA64_DIG) + return "dig"; +# else +# error Unknown platform. Fix acpi.c. +# endif +#endif +} diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c new file mode 100644 index 000000000..dd7de2ab0 --- /dev/null +++ b/arch/ia64/kernel/efi.c @@ -0,0 +1,365 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Hewlett-Packard Co. + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/time.h> + +#include <asm/efi.h> +#include <asm/io.h> +#include <asm/processor.h> + +#define EFI_DEBUG + +extern efi_status_t efi_call_phys (void *, ...); + +struct efi efi; + +static efi_runtime_services_t *runtime; + +static efi_status_t +phys_get_time (efi_time_t *tm, efi_time_cap_t *tc) +{ + return efi_call_phys(__va(runtime->get_time), __pa(tm), __pa(tc)); +} + +static efi_status_t +phys_set_time (efi_time_t *tm) +{ + return efi_call_phys(__va(runtime->set_time), __pa(tm)); +} + +static efi_status_t +phys_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) +{ + return efi_call_phys(__va(runtime->get_wakeup_time), __pa(enabled), __pa(pending), + __pa(tm)); +} + +static efi_status_t +phys_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) +{ + return efi_call_phys(__va(runtime->set_wakeup_time), enabled, __pa(tm)); +} + +static efi_status_t +phys_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, + unsigned long *data_size, void *data) +{ + return efi_call_phys(__va(runtime->get_variable), __pa(name), __pa(vendor), __pa(attr), + __pa(data_size), __pa(data)); +} + +static efi_status_t +phys_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) +{ + return efi_call_phys(__va(runtime->get_next_variable), __pa(name_size), __pa(name), + __pa(vendor)); +} + +static efi_status_t +phys_set_variable (efi_char16_t *name, efi_guid_t *vendor, u32 attr, + unsigned long data_size, void *data) +{ + return efi_call_phys(__va(runtime->set_variable), __pa(name), __pa(vendor), attr, + data_size, __pa(data)); +} + +static efi_status_t +phys_get_next_high_mono_count (u64 *count) +{ + return efi_call_phys(__va(runtime->get_next_high_mono_count), __pa(count)); +} + +static void +phys_reset_system (int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) +{ + efi_call_phys(__va(runtime->reset_system), status, data_size, __pa(data)); +} + +/* + * Converts Gregorian date to seconds since 1970-01-01 00:00:00. + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 + * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. + * + * [For the Julian calendar (which was used in Russia before 1917, + * Britain & colonies before 1752, anywhere else before 1582, + * and is still in use by some communities) leave out the + * -year/100+year/400 terms, and add 10.] + * + * This algorithm was first published by Gauss (I think). + * + * WARNING: this function will overflow on 2106-02-07 06:28:16 on + * machines were long is 32-bit! (However, as time_t is signed, we + * will already get problems at other places on 2038-01-19 03:14:08) + */ +static inline unsigned long +mktime (unsigned int year, unsigned int mon, unsigned int day, unsigned int hour, + unsigned int min, unsigned int sec) +{ + if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ + mon += 12; /* Puts Feb last since it has leap day */ + year -= 1; + } + return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day) + + year*365 - 719499 + )*24 + hour /* now have hours */ + )*60 + min /* now have minutes */ + )*60 + sec; /* finally seconds */ +} + +void +efi_gettimeofday (struct timeval *tv) +{ + efi_time_t tm; + + memset(tv, 0, sizeof(tv)); + if ((*efi.get_time)(&tm, 0) != EFI_SUCCESS) + return; + + tv->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second); + tv->tv_usec = tm.nanosecond / 1000; +} + +/* + * Walks the EFI memory map and calls CALLBACK once for each EFI + * memory descriptor that has memory that is available for OS use. + */ +void +efi_memmap_walk (efi_freemem_callback_t callback, void *arg) +{ + int prev_valid = 0; + struct range { + u64 start; + u64 end; + } prev, curr; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size, start, end; + + efi_map_start = __va(ia64_boot_param.efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size; + efi_desc_size = ia64_boot_param.efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: +#ifndef CONFIG_IA64_VIRTUAL_MEM_MAP + if (md->phys_addr > 1024*1024*1024UL) { + printk("Warning: ignoring %luMB of memory above 1GB!\n", + md->num_pages >> 8); + md->type = EFI_UNUSABLE_MEMORY; + continue; + } +#endif + + curr.start = PAGE_OFFSET + md->phys_addr; + curr.end = curr.start + (md->num_pages << 12); + + if (!prev_valid) { + prev = curr; + prev_valid = 1; + } else { + if (curr.start < prev.start) + printk("Oops: EFI memory table not ordered!\n"); + + if (prev.end == curr.start) { + /* merge two consecutive memory ranges */ + prev.end = curr.end; + } else { + start = PAGE_ALIGN(prev.start); + end = prev.end & PAGE_MASK; + if ((end > start) && (*callback)(start, end, arg) < 0) + return; + prev = curr; + } + } + break; + + default: + continue; + } + } + if (prev_valid) { + start = PAGE_ALIGN(prev.start); + end = prev.end & PAGE_MASK; + if (end > start) + (*callback)(start, end, arg); + } +} + +void __init +efi_init (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_config_table_t *config_tables; + efi_memory_desc_t *md; + efi_char16_t *c16; + u64 efi_desc_size; + char vendor[100] = "unknown"; + int i; + + efi.systab = __va(ia64_boot_param.efi_systab); + + /* + * Verify the EFI Table + */ + if (efi.systab == NULL) + panic("Woah! Can't find EFI system table.\n"); + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + panic("Woah! EFI system table signature incorrect\n"); + if (efi.systab->hdr.revision != EFI_SYSTEM_TABLE_REVISION) + printk("Warning: EFI system table version mismatch: " + "got %d.%02d, expected %d.%02d\n", + efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, + EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff); + + config_tables = __va(efi.systab->tables); + + /* Show what we know for posterity */ + c16 = __va(efi.systab->fw_vendor); + if (c16) { + for (i = 0;i < sizeof(vendor) && *c16; ++i) + vendor[i] = *c16++; + vendor[i] = '\0'; + } + + printk("EFI v%u.%.02u by %s:", + efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); + + for (i = 0; i < efi.systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { + efi.mps = __va(config_tables[i].table); + printk(" MPS=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { + efi.acpi = __va(config_tables[i].table); + printk(" ACPI=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { + efi.smbios = __va(config_tables[i].table); + printk(" SMBIOS=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) { + efi.sal_systab = __va(config_tables[i].table); + printk(" SALsystab=0x%lx", config_tables[i].table); + } + } + printk("\n"); + + runtime = __va(efi.systab->runtime); + efi.get_time = phys_get_time; + efi.set_time = phys_set_time; + efi.get_wakeup_time = phys_get_wakeup_time; + efi.set_wakeup_time = phys_set_wakeup_time; + efi.get_variable = phys_get_variable; + efi.get_next_variable = phys_get_next_variable; + efi.set_variable = phys_set_variable; + efi.get_next_high_mono_count = phys_get_next_high_mono_count; + efi.reset_system = phys_reset_system; + + efi_map_start = __va(ia64_boot_param.efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size; + efi_desc_size = ia64_boot_param.efi_memdesc_size; + +#ifdef EFI_DEBUG + /* print EFI memory map: */ + for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) { + md = p; + printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", + i, md->type, md->attribute, + md->phys_addr, md->phys_addr + (md->num_pages<<12) - 1, md->num_pages >> 8); + } +#endif +} + +void +efi_enter_virtual_mode (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + efi_status_t status; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param.efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size; + efi_desc_size = ia64_boot_param.efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->attribute & EFI_MEMORY_RUNTIME) { + /* + * Some descriptors have multiple bits set, so the order of + * the tests is relevant. + */ + if (md->attribute & EFI_MEMORY_WB) { + md->virt_addr = (u64) __va(md->phys_addr); + } else if (md->attribute & EFI_MEMORY_UC) { + md->virt_addr = (u64) ioremap(md->phys_addr, 0); + } else if (md->attribute & EFI_MEMORY_WC) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P + | _PAGE_D + | _PAGE_MA_WC + | _PAGE_PL_0 + | _PAGE_AR_RW)); +#else + printk("EFI_MEMORY_WC mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } else if (md->attribute & EFI_MEMORY_WT) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P + | _PAGE_D | _PAGE_MA_WT + | _PAGE_PL_0 + | _PAGE_AR_RW)); +#else + printk("EFI_MEMORY_WT mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } + } + } + + status = efi_call_phys(__va(runtime->set_virtual_address_map), + ia64_boot_param.efi_memmap_size, + efi_desc_size, ia64_boot_param.efi_memdesc_version, + ia64_boot_param.efi_memmap); + if (status != EFI_SUCCESS) { + printk("Warning: unable to switch EFI into virtual mode (status=%lu)\n", status); + return; + } + + /* + * Now that EFI is in virtual mode, we arrange for EFI functions to be + * called directly: + */ + efi.get_time = __va(runtime->get_time); + efi.set_time = __va(runtime->set_time); + efi.get_wakeup_time = __va(runtime->get_wakeup_time); + efi.set_wakeup_time = __va(runtime->set_wakeup_time); + efi.get_variable = __va(runtime->get_variable); + efi.get_next_variable = __va(runtime->get_next_variable); + efi.set_variable = __va(runtime->set_variable); + efi.get_next_high_mono_count = __va(runtime->get_next_high_mono_count); + efi.reset_system = __va(runtime->reset_system); +} diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S new file mode 100644 index 000000000..4e6f1fc63 --- /dev/null +++ b/arch/ia64/kernel/efi_stub.S @@ -0,0 +1,141 @@ +/* + * EFI call stub. + * + * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com> + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. We need this because we can't call SetVirtualMap() until + * the kernel has booted far enough to allow allocation of struct vma_struct + * entries (which we would need to map stuff with memory attributes other + * than uncached or writeback...). Since the GetTime() service gets called + * earlier than that, we need to be able to make physical mode EFI calls from + * the kernel. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include <asm/processor.h> + + .text + .psr abi64 + .psr lsb + .lsb + + .text + +/* + * Switch execution mode from virtual to physical or vice versa. + * + * Inputs: + * r16 = new psr to establish + */ + .proc switch_mode +switch_mode: + { + alloc r2=ar.pfs,0,0,0,0 + rsm psr.i | psr.ic // disable interrupts and interrupt collection + mov r15=ip + } + ;; + { + flushrs // must be first insn in group + srlz.i + shr.u r19=r15,61 // r19 <- top 3 bits of current IP + } + ;; + mov cr.ipsr=r16 // set new PSR + add r3=1f-switch_mode,r15 + xor r15=0x7,r19 // flip the region bits + + mov r17=ar.bsp + mov r14=rp // get return address into a general register + + // switch RSE backing store: + ;; + dep r17=r15,r17,61,3 // make ar.bsp physical or virtual + mov r18=ar.rnat // save ar.rnat + ;; + mov ar.bspstore=r17 // this steps on ar.rnat + dep r3=r15,r3,61,3 // make rfi return address physical or virtual + ;; + mov cr.iip=r3 + mov cr.ifs=r0 + dep sp=r15,sp,61,3 // make stack pointer physical or virtual + ;; + mov ar.rnat=r18 // restore ar.rnat + dep r14=r15,r14,61,3 // make function return address physical or virtual + rfi // must be last insn in group + ;; +1: mov rp=r14 + br.ret.sptk.few rp + .endp switch_mode + +/* + * Inputs: + * in0 = address of function descriptor of EFI routine to call + * in1..in7 = arguments to routine + * + * Outputs: + * r8 = EFI_STATUS returned by called function + */ + + .global efi_call_phys + .proc efi_call_phys +efi_call_phys: + + alloc loc0=ar.pfs,8,5,7,0 + ld8 r2=[in0],8 // load EFI function's entry point + mov loc1=rp + ;; + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=r0 // put RSE in enforced lazy, LE mode + ;; + + ld8 gp=[in0] // load EFI function's global pointer + mov out0=in1 + mov out1=in2 + movl r16=PSR_BITS_TO_CLEAR + + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + mov out2=in3 + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + mov out3=in4 + br.call.sptk.few rp=switch_mode +.ret0: + mov out4=in5 + mov out5=in6 + mov out6=in7 + br.call.sptk.few rp=b6 // call the EFI function +.ret1: + mov ar.rsc=r0 // put RSE in enforced lazy, LE mode + mov r16=loc3 + br.call.sptk.few rp=switch_mode // return to virtual mode +.ret2: + mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc0 + mov rp=loc1 + mov gp=loc2 + br.ret.sptk.few rp + + .endp efi_call_phys diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S new file mode 100644 index 000000000..87e77c677 --- /dev/null +++ b/arch/ia64/kernel/entry.S @@ -0,0 +1,1261 @@ +/* + * ia64/kernel/entry.S + * + * Kernel entry points. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> + * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> + */ +/* + * Global (preserved) predicate usage on syscall entry/exit path: + * + * + * pEOI: See entry.h. + * pKern: See entry.h. + * pSys: See entry.h. + * pNonSys: !pSys + * p2: (Alias of pKern!) True if any signals are pending. + * p16/p17: Used by stubs calling ia64_do_signal to indicate if current task + * has PF_PTRACED flag bit set. p16 is true if so, p17 is the complement. + */ + +#include <linux/config.h> + +#include <asm/errno.h> +#include <asm/offsets.h> +#include <asm/processor.h> +#include <asm/unistd.h> + +#include "entry.h" + + .text + .psr abi64 + .psr lsb + .lsb + + /* + * execve() is special because in case of success, we need to + * setup a null register window frame. + */ + .align 16 + .proc ia64_execve +ia64_execve: + alloc loc0=ar.pfs,3,2,4,0 + mov loc1=rp + mov out0=in0 // filename + ;; // stop bit between alloc and call + mov out1=in1 // argv + mov out2=in2 // envp + add out3=16,sp // regs + br.call.sptk.few rp=sys_execve +.ret0: cmp4.ge p6,p0=r8,r0 + mov ar.pfs=loc0 // restore ar.pfs + ;; +(p6) mov ar.pfs=r0 // clear ar.pfs in case of success + sxt4 r8=r8 // return 64-bit result + mov rp=loc1 + + br.ret.sptk.few rp + .endp ia64_execve + + .align 16 + .global sys_clone + .proc sys_clone +sys_clone: + alloc r16=ar.pfs,2,2,3,0;; + movl r28=1f + mov loc1=rp + br.cond.sptk.many save_switch_stack +1: + mov loc0=r16 // save ar.pfs across do_fork + adds out2=IA64_SWITCH_STACK_SIZE+16,sp + adds r2=IA64_SWITCH_STACK_SIZE+IA64_PT_REGS_R12_OFFSET+16,sp + cmp.eq p8,p9=in1,r0 // usp == 0? + mov out0=in0 // out0 = clone_flags + ;; +(p8) ld8 out1=[r2] // fetch usp from pt_regs.r12 +(p9) mov out1=in1 + br.call.sptk.few rp=do_fork +.ret1: + mov ar.pfs=loc0 + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov rp=loc1 + ;; + br.ret.sptk.many rp + .endp sys_clone + +/* + * prev_task <- switch_to(struct task_struct *next) + */ + .align 16 + .global ia64_switch_to + .proc ia64_switch_to +ia64_switch_to: + alloc r16=ar.pfs,1,0,0,0 + movl r28=1f + br.cond.sptk.many save_switch_stack +1: + // disable interrupts to ensure atomicity for next few instructions: + mov r17=psr // M-unit + ;; + rsm psr.i // M-unit + dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff + ;; + srlz.d + ;; + adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 + adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 + ;; + st8 [r22]=sp // save kernel stack pointer of old task + ld8 sp=[r21] // load kernel stack pointer of new task + and r20=in0,r18 // physical address of "current" + ;; + mov r8=r13 // return pointer to previously running task + mov r13=in0 // set "current" pointer + mov ar.k6=r20 // copy "current" into ar.k6 + ;; + // restore interrupts + mov psr.l=r17 + ;; + srlz.d + + movl r28=1f + br.cond.sptk.many load_switch_stack +1: + br.ret.sptk.few rp + .endp ia64_switch_to + + /* + * Like save_switch_stack, but also save the stack frame that is active + * at the time this function is called. + */ + .align 16 + .proc save_switch_stack_with_current_frame +save_switch_stack_with_current_frame: +1: { + alloc r16=ar.pfs,0,0,0,0 // pass ar.pfs to save_switch_stack + mov r28=ip + } + ;; + adds r28=1f-1b,r28 + br.cond.sptk.many save_switch_stack +1: br.ret.sptk.few rp + .endp save_switch_stack_with_current_frame +/* + * Note that interrupts are enabled during save_switch_stack and + * load_switch_stack. This means that we may get an interrupt with + * "sp" pointing to the new kernel stack while ar.bspstore is still + * pointing to the old kernel backing store area. Since ar.rsc, + * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, + * this is not a problem. + */ + +/* + * save_switch_stack: + * - r16 holds ar.pfs + * - r28 holds address to return to + * - rp (b0) holds return address to save + */ + .align 16 + .global save_switch_stack + .proc save_switch_stack +save_switch_stack: + flushrs // flush dirty regs to backing store (must be first in insn group) + mov r17=ar.unat // preserve caller's + adds r2=-IA64_SWITCH_STACK_SIZE+16,sp // r2 = &sw->caller_unat + ;; + mov r18=ar.fpsr // preserve fpsr + mov ar.rsc=r0 // put RSE in mode: enforced lazy, little endian, pl 0 + ;; + mov r19=ar.rnat + adds r3=-IA64_SWITCH_STACK_SIZE+24,sp // r3 = &sw->ar_fpsr + + // Note: the instruction ordering is important here: we can't + // store anything to the switch stack before sp is updated + // as otherwise an interrupt might overwrite the memory! + adds sp=-IA64_SWITCH_STACK_SIZE,sp + ;; + st8 [r2]=r17,16 + st8 [r3]=r18,24 + ;; + stf.spill [r2]=f2,32 + stf.spill [r3]=f3,32 + mov r21=b0 + ;; + stf.spill [r2]=f4,32 + stf.spill [r3]=f5,32 + ;; + stf.spill [r2]=f10,32 + stf.spill [r3]=f11,32 + mov r22=b1 + ;; + stf.spill [r2]=f12,32 + stf.spill [r3]=f13,32 + mov r23=b2 + ;; + stf.spill [r2]=f14,32 + stf.spill [r3]=f15,32 + mov r24=b3 + ;; + stf.spill [r2]=f16,32 + stf.spill [r3]=f17,32 + mov r25=b4 + ;; + stf.spill [r2]=f18,32 + stf.spill [r3]=f19,32 + mov r26=b5 + ;; + stf.spill [r2]=f20,32 + stf.spill [r3]=f21,32 + mov r17=ar.lc // I-unit + ;; + stf.spill [r2]=f22,32 + stf.spill [r3]=f23,32 + ;; + stf.spill [r2]=f24,32 + stf.spill [r3]=f25,32 + ;; + stf.spill [r2]=f26,32 + stf.spill [r3]=f27,32 + ;; + stf.spill [r2]=f28,32 + stf.spill [r3]=f29,32 + ;; + stf.spill [r2]=f30,32 + stf.spill [r3]=f31,24 + ;; + st8.spill [r2]=r4,16 + st8.spill [r3]=r5,16 + ;; + st8.spill [r2]=r6,16 + st8.spill [r3]=r7,16 + ;; + st8 [r2]=r21,16 // save b0 + st8 [r3]=r22,16 // save b1 + /* since we're done with the spills, read and save ar.unat: */ + mov r18=ar.unat // M-unit + mov r20=ar.bspstore // M-unit + ;; + st8 [r2]=r23,16 // save b2 + st8 [r3]=r24,16 // save b3 + ;; + st8 [r2]=r25,16 // save b4 + st8 [r3]=r26,16 // save b5 + ;; + st8 [r2]=r16,16 // save ar.pfs + st8 [r3]=r17,16 // save ar.lc + mov r21=pr + ;; + st8 [r2]=r18,16 // save ar.unat + st8 [r3]=r19,16 // save ar.rnat + mov b7=r28 + ;; + st8 [r2]=r20 // save ar.bspstore + st8 [r3]=r21 // save predicate registers + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + br.cond.sptk.few b7 + .endp save_switch_stack + +/* + * load_switch_stack: + * - r28 holds address to return to + */ + .align 16 + .proc load_switch_stack +load_switch_stack: + invala // invalidate ALAT + adds r2=IA64_SWITCH_STACK_B0_OFFSET+16,sp // get pointer to switch_stack.b0 + mov ar.rsc=r0 // put RSE into enforced lazy mode + adds r3=IA64_SWITCH_STACK_B0_OFFSET+24,sp // get pointer to switch_stack.b1 + ;; + ld8 r21=[r2],16 // restore b0 + ld8 r22=[r3],16 // restore b1 + ;; + ld8 r23=[r2],16 // restore b2 + ld8 r24=[r3],16 // restore b3 + ;; + ld8 r25=[r2],16 // restore b4 + ld8 r26=[r3],16 // restore b5 + ;; + ld8 r16=[r2],16 // restore ar.pfs + ld8 r17=[r3],16 // restore ar.lc + ;; + ld8 r18=[r2],16 // restore ar.unat + ld8 r19=[r3],16 // restore ar.rnat + mov b0=r21 + ;; + ld8 r20=[r2] // restore ar.bspstore + ld8 r21=[r3] // restore predicate registers + mov ar.pfs=r16 + ;; + mov ar.bspstore=r20 + ;; + loadrs // invalidate stacked regs outside current frame + adds r2=16-IA64_SWITCH_STACK_SIZE,r2 // get pointer to switch_stack.caller_unat + ;; // stop bit for rnat dependency + mov ar.rnat=r19 + mov ar.unat=r18 // establish unat holding the NaT bits for r4-r7 + adds r3=16-IA64_SWITCH_STACK_SIZE,r3 // get pointer to switch_stack.ar_fpsr + ;; + ld8 r18=[r2],16 // restore caller's unat + ld8 r19=[r3],24 // restore fpsr + mov ar.lc=r17 + ;; + ldf.fill f2=[r2],32 + ldf.fill f3=[r3],32 + mov pr=r21,-1 + ;; + ldf.fill f4=[r2],32 + ldf.fill f5=[r3],32 + ;; + ldf.fill f10=[r2],32 + ldf.fill f11=[r3],32 + mov b1=r22 + ;; + ldf.fill f12=[r2],32 + ldf.fill f13=[r3],32 + mov b2=r23 + ;; + ldf.fill f14=[r2],32 + ldf.fill f15=[r3],32 + mov b3=r24 + ;; + ldf.fill f16=[r2],32 + ldf.fill f17=[r3],32 + mov b4=r25 + ;; + ldf.fill f18=[r2],32 + ldf.fill f19=[r3],32 + mov b5=r26 + ;; + ldf.fill f20=[r2],32 + ldf.fill f21=[r3],32 + ;; + ldf.fill f22=[r2],32 + ldf.fill f23=[r3],32 + ;; + ldf.fill f24=[r2],32 + ldf.fill f25=[r3],32 + ;; + ldf.fill f26=[r2],32 + ldf.fill f27=[r3],32 + ;; + ldf.fill f28=[r2],32 + ldf.fill f29=[r3],32 + ;; + ldf.fill f30=[r2],32 + ldf.fill f31=[r3],24 + ;; + ld8.fill r4=[r2],16 + ld8.fill r5=[r3],16 + mov b7=r28 + ;; + ld8.fill r6=[r2],16 + ld8.fill r7=[r3],16 + mov ar.unat=r18 // restore caller's unat + mov ar.fpsr=r19 // restore fpsr + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop switch_stack + br.cond.sptk.few b7 + .endp load_switch_stack + + .align 16 + .global __ia64_syscall + .proc __ia64_syscall +__ia64_syscall: + .regstk 6,0,0,0 + mov r15=in5 // put syscall number in place + break __BREAK_SYSCALL + movl r2=errno + cmp.eq p6,p7=-1,r10 + ;; +(p6) st4 [r2]=r8 +(p6) mov r8=-1 + br.ret.sptk.few rp + .endp __ia64_syscall + + // + // We invoke syscall_trace through this intermediate function to + // ensure that the syscall input arguments are not clobbered. We + // also use it to preserve b6, which contains the syscall entry point. + // + .align 16 + .global invoke_syscall_trace + .proc invoke_syscall_trace +invoke_syscall_trace: + alloc loc0=ar.pfs,8,3,0,0 + ;; // WAW on CFM at the br.call + mov loc1=rp + br.call.sptk.many rp=save_switch_stack_with_current_frame // must preserve b6!! +.ret2: mov loc2=b6 + br.call.sptk.few rp=syscall_trace +.ret3: adds sp=IA64_SWITCH_STACK_SIZE,sp // drop switch_stack frame + mov rp=loc1 + mov ar.pfs=loc0 + mov b6=loc2 + ;; + br.ret.sptk.few rp + .endp invoke_syscall_trace + + // + // Invoke a system call, but do some tracing before and after the call. + // We MUST preserve the current register frame throughout this routine + // because some system calls (such as ia64_execve) directly + // manipulate ar.pfs. + // + // Input: + // r15 = syscall number + // b6 = syscall entry point + // + .global ia64_trace_syscall + .global ia64_strace_leave_kernel + .global ia64_strace_clear_r8 + + .proc ia64_strace_clear_r8 +ia64_strace_clear_r8: // this is where we return after cloning when PF_TRACESYS is on +# ifdef CONFIG_SMP + br.call.sptk.few rp=invoke_schedule_tail +# endif + mov r8=0 + br strace_check_retval + .endp ia64_strace_clear_r8 + + .proc ia64_trace_syscall +ia64_trace_syscall: + br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch syscall args +.ret4: br.call.sptk.few rp=b6 // do the syscall +strace_check_retval: +.ret5: cmp.lt p6,p0=r8,r0 // syscall failed? + ;; + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8 + adds r3=IA64_PT_REGS_R8_OFFSET+32,sp // r3 = &pt_regs.r10 + mov r10=0 +(p6) br.cond.sptk.few strace_error // syscall failed -> + ;; // avoid RAW on r10 +strace_save_retval: + st8.spill [r2]=r8 // store return value in slot for r8 + st8.spill [r3]=r10 // clear error indication in slot for r10 +ia64_strace_leave_kernel: + br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch return value +.ret6: br.cond.sptk.many ia64_leave_kernel + +strace_error: + ld8 r3=[r2] // load pt_regs.r8 + sub r9=0,r8 // negate return value to get errno value + ;; + cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? + adds r3=16,r2 // r3=&pt_regs.r10 + ;; +(p6) mov r10=-1 +(p6) mov r8=r9 + br.cond.sptk.few strace_save_retval + .endp ia64_trace_syscall + +/* + * A couple of convenience macros to help implement/understand the state + * restoration that happens at the end of ia64_ret_from_syscall. + */ +#define rARPR r31 +#define rCRIFS r30 +#define rCRIPSR r29 +#define rCRIIP r28 +#define rARRSC r27 +#define rARPFS r26 +#define rARUNAT r25 +#define rARRNAT r24 +#define rARBSPSTORE r23 +#define rKRBS r22 +#define rB6 r21 + + .align 16 + .global ia64_ret_from_syscall + .global ia64_ret_from_syscall_clear_r8 + .global ia64_leave_kernel + .proc ia64_ret_from_syscall +ia64_ret_from_syscall_clear_r8: +#ifdef CONFIG_SMP + // In SMP mode, we need to call schedule_tail to complete the scheduling process. + // Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the + // address of the previously executing task. + br.call.sptk.few rp=invoke_schedule_tail +.ret7: +#endif + mov r8=0 + ;; // added stop bits to prevent r8 dependency +ia64_ret_from_syscall: + cmp.ge p6,p7=r8,r0 // syscall executed successfully? + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8 + adds r3=IA64_PT_REGS_R8_OFFSET+32,sp // r3 = &pt_regs.r10 + ;; +(p6) st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit +(p6) st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit +(p7) br.cond.spnt.few handle_syscall_error // handle potential syscall failure + +ia64_leave_kernel: + // check & deliver software interrupts (bottom half handlers): + + movl r2=bh_active // sheesh, why aren't these two in + movl r3=bh_mask // a struct?? + ;; + ld8 r2=[r2] + ld8 r3=[r3] + ;; + and r2=r2,r3 + ;; + cmp.ne p6,p7=r2,r0 // any soft interrupts ready for delivery? +(p6) br.call.dpnt.few rp=invoke_do_bottom_half +1: +(pKern) br.cond.dpnt.many restore_all // yup -> skip check for rescheduling & signal delivery + + // call schedule() until we find a task that doesn't have need_resched set: + +back_from_resched: + { .mii + adds r2=IA64_TASK_NEED_RESCHED_OFFSET,r13 + mov r3=ip + adds r14=IA64_TASK_SIGPENDING_OFFSET,r13 + } + ;; + ld8 r2=[r2] + ld4 r14=[r14] + mov rp=r3 // arrange for schedule() to return to back_from_resched + ;; + /* + * If pEOI is set, we need to write the cr.eoi now and then + * clear pEOI because both invoke_schedule() and + * handle_signal_delivery() may call the scheduler. Since + * we're returning to user-level, we get at most one nested + * interrupt of the same priority level, which doesn't tax the + * kernel stack too much. + */ +(pEOI) mov cr.eoi=r0 + cmp.ne p6,p0=r2,r0 + cmp.ne p2,p0=r14,r0 // NOTE: pKern is an alias for p2!! +(pEOI) cmp.ne pEOI,p0=r0,r0 // clear pEOI before calling schedule() + srlz.d +(p6) br.call.spnt.many b6=invoke_schedule // ignore return value +2: + // check & deliver pending signals: +(p2) br.call.spnt.few rp=handle_signal_delivery +restore_all: + + // start restoring the state saved on the kernel stack (struct pt_regs): + + adds r2=IA64_PT_REGS_R8_OFFSET+16,r12 + adds r3=IA64_PT_REGS_R8_OFFSET+24,r12 + ;; + ld8.fill r8=[r2],16 + ld8.fill r9=[r3],16 + ;; + ld8.fill r10=[r2],16 + ld8.fill r11=[r3],16 + ;; + ld8.fill r16=[r2],16 + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + ;; + ld8.fill r22=[r2],16 + ld8.fill r23=[r3],16 + ;; + ld8.fill r24=[r2],16 + ld8.fill r25=[r3],16 + ;; + ld8.fill r26=[r2],16 + ld8.fill r27=[r3],16 + ;; + ld8.fill r28=[r2],16 + ld8.fill r29=[r3],16 + ;; + ld8.fill r30=[r2],16 + ld8.fill r31=[r3],16 + ;; + ld8 r1=[r2],16 // ar.ccv + ld8 r13=[r3],16 // ar.fpsr + ;; + ld8 r14=[r2],16 // b0 + ld8 r15=[r3],16+8 // b7 + ;; + ldf.fill f6=[r2],32 + ldf.fill f7=[r3],32 + ;; + ldf.fill f8=[r2],32 + ldf.fill f9=[r3],32 + ;; + mov ar.ccv=r1 + mov ar.fpsr=r13 + mov b0=r14 + // turn off interrupts, interrupt collection, & data translation + rsm psr.i | psr.ic | psr.dt + ;; + srlz.i // EAS 2.5 + mov b7=r15 + ;; + invala // invalidate ALAT + dep r12=0,r12,61,3 // convert sp to physical address + bsw.0;; // switch back to bank 0 (must be last in insn group) + ;; +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + nop.i 0x0 + ;; + nop.i 0x0 + ;; + nop.i 0x0 + ;; +#endif + adds r16=16,r12 + adds r17=24,r12 + ;; + ld8 rCRIPSR=[r16],16 // load cr.ipsr + ld8 rCRIIP=[r17],16 // load cr.iip + ;; + ld8 rCRIFS=[r16],16 // load cr.ifs + ld8 rARUNAT=[r17],16 // load ar.unat + ;; + ld8 rARPFS=[r16],16 // load ar.pfs + ld8 rARRSC=[r17],16 // load ar.rsc + ;; + ld8 rARRNAT=[r16],16 // load ar.rnat (may be garbage) + ld8 rARBSPSTORE=[r17],16 // load ar.bspstore (may be garbage) + ;; + ld8 rARPR=[r16],16 // load predicates + ld8 rB6=[r17],16 // load b6 + ;; + ld8 r18=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r2=[r16],16 + ld8.fill r3=[r17],16 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 + extr.u r19=rCRIPSR,32,2 // extract ps.cpl + ;; + ld8.fill r14=[r16],16 + ld8.fill r15=[r17],16 + cmp.eq p6,p7=r0,r19 // are we returning to kernel mode? (psr.cpl==0) + ;; + mov b6=rB6 + mov ar.pfs=rARPFS +(p6) br.cond.dpnt.few skip_rbs_switch + + /* + * Restore user backing store. + * + * NOTE: alloc, loadrs, and cover can't be predicated. + * + * XXX This needs some scheduling/tuning once we believe it + * really does work as intended. + */ + mov r16=ar.bsp // get existing backing store pointer +(pNonSys) br.cond.dpnt.few dont_preserve_current_frame + cover // add current frame into dirty partition + ;; + mov rCRIFS=cr.ifs // fetch the cr.ifs value that "cover" produced + mov r17=ar.bsp // get new backing store pointer + ;; + sub r16=r17,r16 // calculate number of bytes that were added to rbs + ;; + shl r16=r16,16 // shift additional frame size into position for loadrs + ;; + add r18=r16,r18 // adjust the loadrs value + ;; +#ifdef CONFIG_IA64_SOFTSDV_HACKS + // Reset ITM if we've missed a timer tick. Workaround for SoftSDV bug + mov r16 = r2 + mov r2 = ar.itc + mov r17 = cr.itm + ;; + cmp.gt p6,p7 = r2, r17 +(p6) addl r17 = 100, r2 + ;; + mov cr.itm = r17 + mov r2 = r16 +#endif +dont_preserve_current_frame: + alloc r16=ar.pfs,0,0,0,0 // drop the current call frame (noop for syscalls) + ;; + mov ar.rsc=r18 // load ar.rsc to be used for "loadrs" +#ifdef CONFIG_IA32_SUPPORT + tbit.nz p6,p0=rCRIPSR,IA64_PSR_IS_BIT + ;; +(p6) mov ar.rsc=r0 // returning to IA32 mode +#endif + ;; + loadrs + ;; + mov ar.bspstore=rARBSPSTORE + ;; + mov ar.rnat=rARRNAT // must happen with RSE in lazy mode + +skip_rbs_switch: + mov ar.rsc=rARRSC + mov ar.unat=rARUNAT + mov cr.ifs=rCRIFS // restore cr.ifs only if not a (synchronous) syscall +(pEOI) mov cr.eoi=r0 + mov pr=rARPR,-1 + mov cr.iip=rCRIIP + mov cr.ipsr=rCRIPSR + ;; + rfi;; // must be last instruction in an insn group + +handle_syscall_error: + /* + * Some system calls (e.g., ptrace, mmap) can return arbitrary + * values which could lead us to mistake a negative return + * value as a failed syscall. Those syscall must deposit + * a non-zero value in pt_regs.r8 to indicate an error. + * If pt_regs.r8 is zero, we assume that the call completed + * successfully. + */ + ld8 r3=[r2] // load pt_regs.r8 + sub r9=0,r8 // negate return value to get errno + ;; + mov r10=-1 // return -1 in pt_regs.r10 to indicate error + cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? + adds r3=16,r2 // r3=&pt_regs.r10 + ;; +(p6) mov r9=r8 +(p6) mov r10=0 + ;; + st8.spill [r2]=r9 // store errno in pt_regs.r8 and set unat bit + st8.spill [r3]=r10 // store error indication in pt_regs.r10 and set unat bit + br.cond.sptk.many ia64_leave_kernel + .endp __ret_from_syscall + +#ifdef CONFIG_SMP + /* + * Invoke schedule_tail(task) while preserving in0-in7, which may be needed + * in case a system call gets restarted. + */ + .proc invoke_schedule_tail +invoke_schedule_tail: + alloc loc0=ar.pfs,8,2,1,0 + mov loc1=rp + mov out0=r8 // Address of previous task + ;; + br.call.sptk.few rp=schedule_tail +.ret8: + mov ar.pfs=loc0 + mov rp=loc1 + br.ret.sptk.many rp + .endp invoke_schedule_tail +#endif /* CONFIG_SMP */ + + /* + * Invoke do_bottom_half() while preserving in0-in7, which may be needed + * in case a system call gets restarted. + */ + .proc invoke_do_bottom_half +invoke_do_bottom_half: + alloc loc0=ar.pfs,8,2,0,0 + mov loc1=rp + ;; + br.call.sptk.few rp=do_bottom_half +.ret9: + mov ar.pfs=loc0 + mov rp=loc1 + br.ret.sptk.many rp + .endp invoke_do_bottom_half + + /* + * Invoke schedule() while preserving in0-in7, which may be needed + * in case a system call gets restarted. + */ + .proc invoke_schedule +invoke_schedule: + alloc loc0=ar.pfs,8,2,0,0 + mov loc1=rp + ;; + br.call.sptk.few rp=schedule +.ret10: + mov ar.pfs=loc0 + mov rp=loc1 + br.ret.sptk.many rp + .endp invoke_schedule + + // + // Setup stack and call ia64_do_signal. Note that pSys and pNonSys need to + // be set up by the caller. We declare 8 input registers so the system call + // args get preserved, in case we need to restart a system call. + // + .align 16 + .proc handle_signal_delivery +handle_signal_delivery: + alloc loc0=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! + mov r9=ar.unat + + // If the process is being ptraced, the signal may not actually be delivered to + // the process. Instead, SIGCHLD will be sent to the parent. We need to + // setup a switch_stack so ptrace can inspect the processes state if necessary. + adds r2=IA64_TASK_FLAGS_OFFSET,r13 + ;; + ld8 r2=[r2] + mov out0=0 // there is no "oldset" + adds out1=16,sp // out1=&pt_regs + ;; +(pSys) mov out2=1 // out2==1 => we're in a syscall + tbit.nz p16,p17=r2,PF_PTRACED_BIT +(p16) br.cond.spnt.many setup_switch_stack + ;; +back_from_setup_switch_stack: +(pNonSys) mov out2=0 // out2==0 => not a syscall + adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp +(p17) adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for (dummy) switch_stack + ;; +(p17) st8 [r3]=r9 // save ar.unat in sw->caller_unat + mov loc1=rp // save return address + br.call.sptk.few rp=ia64_do_signal +.ret11: + adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp + ;; + ld8 r9=[r3] // load new unat from sw->caller_unat + mov rp=loc1 + ;; +(p17) adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch_stack +(p17) mov ar.unat=r9 +(p17) mov ar.pfs=loc0 +(p17) br.ret.sptk.many rp + + // restore the switch stack (ptrace may have modified it): + movl r28=1f + br.cond.sptk.many load_switch_stack +1: br.ret.sptk.many rp + // NOT REACHED + +setup_switch_stack: + movl r28=back_from_setup_switch_stack + mov r16=loc0 + br.cond.sptk.many save_switch_stack + // NOT REACHED + + .endp handle_signal_delivery + + .align 16 + .proc sys_rt_sigsuspend + .global sys_rt_sigsuspend +sys_rt_sigsuspend: + alloc loc0=ar.pfs,2,2,3,0 + mov r9=ar.unat + + // If the process is being ptraced, the signal may not actually be delivered to + // the process. Instead, SIGCHLD will be sent to the parent. We need to + // setup a switch_stack so ptrace can inspect the processes state if necessary. + adds r2=IA64_TASK_FLAGS_OFFSET,r13 + ;; + ld8 r2=[r2] + mov out0=in0 // mask + mov out1=in1 // sigsetsize + ;; + adds out2=16,sp // out1=&pt_regs + tbit.nz p16,p17=r2,PF_PTRACED_BIT +(p16) br.cond.spnt.many sigsuspend_setup_switch_stack + ;; +back_from_sigsuspend_setup_switch_stack: + adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp +(p17) adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for (dummy) switch_stack + ;; +(p17) st8 [r3]=r9 // save ar.unat in sw->caller_unat + mov loc1=rp // save return address + br.call.sptk.many rp=ia64_rt_sigsuspend +.ret12: + adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp + ;; + ld8 r9=[r3] // load new unat from sw->caller_unat + mov rp=loc1 + ;; +(p17) adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch_stack +(p17) mov ar.unat=r9 +(p17) mov ar.pfs=loc0 +(p17) br.ret.sptk.many rp + + // restore the switch stack (ptrace may have modified it): + movl r28=1f + br.cond.sptk.many load_switch_stack +1: br.ret.sptk.many rp + // NOT REACHED + +sigsuspend_setup_switch_stack: + movl r28=back_from_sigsuspend_setup_switch_stack + mov r16=loc0 + br.cond.sptk.many save_switch_stack + // NOT REACHED + + .endp sys_rt_sigsuspend + + .align 16 + .proc sys_rt_sigreturn +sys_rt_sigreturn: + alloc loc0=ar.pfs,8,1,1,0 // preserve all eight input regs in case of syscall restart! + adds out0=16,sp // out0 = &pt_regs + ;; + adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for unat and padding + br.call.sptk.few rp=ia64_rt_sigreturn +.ret13: + adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp + ;; + ld8 r9=[r3] // load new ar.unat + mov rp=r8 + ;; + adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch-stack frame + mov ar.unat=r9 + mov ar.pfs=loc0 + br.ret.sptk.many rp + .endp sys_rt_sigreturn + + .align 16 + .global ia64_prepare_handle_unaligned + .proc ia64_prepare_handle_unaligned +ia64_prepare_handle_unaligned: + movl r28=1f + // + // r16 = fake ar.pfs, we simply need to make sure + // privilege is still 0 + // + mov r16=r0 + br.cond.sptk.few save_switch_stack +1: br.call.sptk.few rp=ia64_handle_unaligned // stack frame setup in ivt +.ret14: + movl r28=2f + br.cond.sptk.many load_switch_stack +2: br.cond.sptk.many rp // goes to ia64_leave_kernel + .endp ia64_prepare_handle_unaligned + +#ifdef CONFIG_KDB + // + // This gets called from ivt.S with: + // SAVE MIN with cover done + // SAVE REST done + // no parameters + // r15 has return value = ia64_leave_kernel + // + .align 16 + .global ia64_invoke_kdb + .proc ia64_invoke_kdb +ia64_invoke_kdb: + alloc r16=ar.pfs,0,0,4,0 + movl r28=1f // save_switch_stack protocol + ;; // avoid WAW on CFM + br.cond.sptk.many save_switch_stack // to flushrs +1: mov out0=4 // kdb entry reason + mov out1=0 // err number + adds out2=IA64_SWITCH_STACK_SIZE+16,sp // pt_regs + add out3=16,sp // switch_stack + br.call.sptk.few rp=kdb +.ret15: + movl r28=1f // load_switch_stack proto + br.cond.sptk.many load_switch_stack +1: br.ret.sptk.many rp + .endp ia64_invoke_kdb + + // + // When KDB is compiled in, we intercept each fault and give + // kdb a chance to run before calling the normal fault handler. + // + .align 16 + .global ia64_invoke_kdb_fault_handler + .proc ia64_invoke_kdb_fault_handler +ia64_invoke_kdb_fault_handler: + alloc r16=ar.pfs,5,1,5,0 + movl r28=1f + mov loc0=rp // save this + br.cond.sptk.many save_switch_stack // to flushrs + ;; // avoid WAW on CFM +1: mov out0=in0 // vector number + mov out1=in1 // cr.isr + mov out2=in2 // cr.ifa + mov out3=in3 // cr.iim + mov out4=in4 // cr.itir + br.call.sptk.few rp=ia64_kdb_fault_handler +.ret16: + + movl r28=1f + br.cond.sptk.many load_switch_stack +1: cmp.ne p6,p0=r8,r0 // did ia64_kdb_fault_handler return 0? + mov rp=loc0 +(p6) br.ret.spnt.many rp // no, we're done + ;; // avoid WAW on rp + mov out0=in0 // vector number + mov out1=in1 // cr.isr + mov out2=in2 // cr.ifa + mov out3=in3 // cr.iim + mov out4=in4 // cr.itir + mov in0=ar.pfs // preserve ar.pfs returned by load_switch_stack + br.call.sptk.few rp=ia64_fault // yup -> we need to invoke normal fault handler now +.ret17: + mov ar.pfs=in0 + mov rp=loc0 + br.ret.sptk.many rp + + .endp ia64_invoke_kdb_fault_handler + +#endif /* CONFIG_KDB */ + + .rodata + .align 8 + .globl sys_call_table +sys_call_table: + data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S. + data8 sys_exit // 1025 + data8 sys_read + data8 sys_write + data8 sys_open + data8 sys_close + data8 sys_creat // 1030 + data8 sys_link + data8 sys_unlink + data8 ia64_execve + data8 sys_chdir + data8 sys_fchdir // 1035 + data8 sys_utimes + data8 sys_mknod + data8 sys_chmod + data8 sys_chown + data8 sys_lseek // 1040 + data8 sys_getpid + data8 sys_getppid + data8 sys_mount + data8 sys_umount + data8 sys_setuid // 1045 + data8 sys_getuid + data8 sys_geteuid + data8 sys_ptrace + data8 sys_access + data8 sys_sync // 1050 + data8 sys_fsync + data8 sys_fdatasync + data8 sys_kill + data8 sys_rename + data8 sys_mkdir // 1055 + data8 sys_rmdir + data8 sys_dup + data8 sys_pipe + data8 sys_times + data8 ia64_brk // 1060 + data8 sys_setgid + data8 sys_getgid + data8 sys_getegid + data8 sys_acct + data8 sys_ioctl // 1065 + data8 sys_fcntl + data8 sys_umask + data8 sys_chroot + data8 sys_ustat + data8 sys_dup2 // 1070 + data8 sys_setreuid + data8 sys_setregid + data8 sys_getresuid + data8 sys_setresuid + data8 sys_getresgid // 1075 + data8 sys_setresgid + data8 sys_getgroups + data8 sys_setgroups + data8 sys_getpgid + data8 sys_setpgid // 1080 + data8 sys_setsid + data8 sys_getsid + data8 sys_sethostname + data8 sys_setrlimit + data8 sys_getrlimit // 1085 + data8 sys_getrusage + data8 sys_gettimeofday + data8 sys_settimeofday + data8 sys_select + data8 sys_poll // 1090 + data8 sys_symlink + data8 sys_readlink + data8 sys_uselib + data8 sys_swapon + data8 sys_swapoff // 1095 + data8 sys_reboot + data8 sys_truncate + data8 sys_ftruncate + data8 sys_fchmod + data8 sys_fchown // 1100 + data8 ia64_getpriority + data8 sys_setpriority + data8 sys_statfs + data8 sys_fstatfs + data8 sys_ioperm // 1105 + data8 sys_semget + data8 sys_semop + data8 sys_semctl + data8 sys_msgget + data8 sys_msgsnd // 1110 + data8 sys_msgrcv + data8 sys_msgctl + data8 sys_shmget + data8 ia64_shmat + data8 sys_shmdt // 1115 + data8 sys_shmctl + data8 sys_syslog + data8 sys_setitimer + data8 sys_getitimer + data8 sys_newstat // 1120 + data8 sys_newlstat + data8 sys_newfstat + data8 sys_vhangup + data8 sys_lchown + data8 sys_vm86 // 1125 + data8 sys_wait4 + data8 sys_sysinfo + data8 sys_clone + data8 sys_setdomainname + data8 sys_newuname // 1130 + data8 sys_adjtimex + data8 sys_create_module + data8 sys_init_module + data8 sys_delete_module + data8 sys_get_kernel_syms // 1135 + data8 sys_query_module + data8 sys_quotactl + data8 sys_bdflush + data8 sys_sysfs + data8 sys_personality // 1140 + data8 ia64_ni_syscall // sys_afs_syscall + data8 sys_setfsuid + data8 sys_setfsgid + data8 sys_getdents + data8 sys_flock // 1145 + data8 sys_readv + data8 sys_writev + data8 sys_pread + data8 sys_pwrite + data8 sys_sysctl // 1150 + data8 sys_mmap + data8 sys_munmap + data8 sys_mlock + data8 sys_mlockall + data8 sys_mprotect // 1155 + data8 sys_mremap + data8 sys_msync + data8 sys_munlock + data8 sys_munlockall + data8 sys_sched_getparam // 1160 + data8 sys_sched_setparam + data8 sys_sched_getscheduler + data8 sys_sched_setscheduler + data8 sys_sched_yield + data8 sys_sched_get_priority_max // 1165 + data8 sys_sched_get_priority_min + data8 sys_sched_rr_get_interval + data8 sys_nanosleep + data8 sys_nfsservctl + data8 sys_prctl // 1170 + data8 sys_getpagesize + data8 sys_mmap2 + data8 sys_pciconfig_read + data8 sys_pciconfig_write + data8 sys_perfmonctl // 1175 + data8 sys_sigaltstack + data8 sys_rt_sigaction + data8 sys_rt_sigpending + data8 sys_rt_sigprocmask + data8 sys_rt_sigqueueinfo // 1180 + data8 sys_rt_sigreturn + data8 sys_rt_sigsuspend + data8 sys_rt_sigtimedwait + data8 sys_getcwd + data8 sys_capget // 1185 + data8 sys_capset + data8 sys_sendfile + data8 sys_ni_syscall // sys_getpmsg (STREAMS) + data8 sys_ni_syscall // sys_putpmsg (STREAMS) + data8 sys_socket // 1190 + data8 sys_bind + data8 sys_connect + data8 sys_listen + data8 sys_accept + data8 sys_getsockname // 1195 + data8 sys_getpeername + data8 sys_socketpair + data8 sys_send + data8 sys_sendto + data8 sys_recv // 1200 + data8 sys_recvfrom + data8 sys_shutdown + data8 sys_setsockopt + data8 sys_getsockopt + data8 sys_sendmsg // 1205 + data8 sys_recvmsg + data8 sys_pivot_root + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1210 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1215 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1220 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1225 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1230 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1235 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1240 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1245 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1250 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1255 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1260 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1265 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1270 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1275 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h new file mode 100644 index 000000000..ecef44f60 --- /dev/null +++ b/arch/ia64/kernel/entry.h @@ -0,0 +1,8 @@ +/* + * Preserved registers that are shared between code in ivt.S and entry.S. Be + * careful not to step on these! + */ +#define pEOI p1 /* should leave_kernel write EOI? */ +#define pKern p2 /* will leave_kernel return to kernel-mode? */ +#define pSys p4 /* are we processing a (synchronous) system call? */ +#define pNonSys p5 /* complement of pSys */ diff --git a/arch/ia64/kernel/fw-emu.c b/arch/ia64/kernel/fw-emu.c new file mode 100644 index 000000000..212ff299c --- /dev/null +++ b/arch/ia64/kernel/fw-emu.c @@ -0,0 +1,444 @@ +/* + * PAL & SAL emulation. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * For the HP simulator, this file gets include in boot/bootloader.c. + * For SoftSDV, this file gets included in sys_softsdv.c. + */ +#include <linux/config.h> + +#ifdef CONFIG_PCI +# include <linux/pci.h> +#endif + +#include <asm/efi.h> +#include <asm/io.h> +#include <asm/pal.h> +#include <asm/sal.h> + +#define MB (1024*1024UL) + +#define NUM_MEM_DESCS 3 + +static char fw_mem[( sizeof(efi_system_table_t) + + sizeof(efi_runtime_services_t) + + 1*sizeof(efi_config_table_t) + + sizeof(struct ia64_sal_systab) + + sizeof(struct ia64_sal_desc_entry_point) + + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t)) + + 1024)] __attribute__ ((aligned (8))); + +#ifdef CONFIG_IA64_HP_SIM + +/* Simulator system calls: */ + +#define SSC_EXIT 66 + +/* + * Simulator system call. + */ +static long +ssc (long arg0, long arg1, long arg2, long arg3, int nr) +{ + register long r8 asm ("r8"); + + asm volatile ("mov r15=%1\n\t" + "break 0x80001" + : "=r"(r8) + : "r"(nr), "r"(arg0), "r"(arg1), "r"(arg2), "r"(arg3)); + return r8; +} + +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) + +/* Compute the `struct tm' representation of *T, + offset OFFSET seconds east of UTC, + and store year, yday, mon, mday, wday, hour, min, sec into *TP. + Return nonzero if successful. */ +int +offtime (unsigned long t, efi_time_t *tp) +{ + const unsigned short int __mon_yday[2][13] = + { + /* Normal years. */ + { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, + /* Leap years. */ + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } + }; + long int days, rem, y; + const unsigned short int *ip; + + days = t / SECS_PER_DAY; + rem = t % SECS_PER_DAY; + while (rem < 0) { + rem += SECS_PER_DAY; + --days; + } + while (rem >= SECS_PER_DAY) { + rem -= SECS_PER_DAY; + ++days; + } + tp->hour = rem / SECS_PER_HOUR; + rem %= SECS_PER_HOUR; + tp->minute = rem / 60; + tp->second = rem % 60; + /* January 1, 1970 was a Thursday. */ + y = 1970; + +# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) +# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) +# define __isleap(year) \ + ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) + + while (days < 0 || days >= (__isleap (y) ? 366 : 365)) { + /* Guess a corrected year, assuming 365 days per year. */ + long int yg = y + days / 365 - (days % 365 < 0); + + /* Adjust DAYS and Y to match the guessed year. */ + days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1) + - LEAPS_THRU_END_OF (y - 1)); + y = yg; + } + tp->year = y; + ip = __mon_yday[__isleap(y)]; + for (y = 11; days < (long int) ip[y]; --y) + continue; + days -= ip[y]; + tp->month = y + 1; + tp->day = days + 1; + return 1; +} + +#endif /* CONFIG_IA64_HP_SIM */ + +/* + * Very ugly, but we need this in the simulator only. Once we run on + * real hw, this can all go away. + */ +extern void pal_emulator_static (void); + +asm (" + .proc pal_emulator_static +pal_emulator_static: + mov r8=-1 + cmp.eq p6,p7=6,r28 /* PAL_PTCE_INFO */ +(p7) br.cond.sptk.few 1f + ;; + mov r8=0 /* status = 0 */ + movl r9=0x100000000 /* tc.base */ + movl r10=0x0000000200000003 /* count[0], count[1] */ + movl r11=0x1000000000002000 /* stride[0], stride[1] */ + br.cond.sptk.few rp + +1: cmp.eq p6,p7=14,r28 /* PAL_FREQ_RATIOS */ +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + movl r9 =0x100000064 /* proc_ratio (1/100) */ + movl r10=0x100000100 /* bus_ratio<<32 (1/256) */ + movl r11=0x100000064 /* itc_ratio<<32 (1/100) */ +1: br.cond.sptk.few rp + .endp pal_emulator_static\n"); + +/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */ + +#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3) + +#define REG_OFFSET(addr) (0x00000000000000FF & (addr)) +#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr)) +#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr)) + +static efi_status_t +efi_get_time (efi_time_t *tm, efi_time_cap_t *tc) +{ +#ifdef CONFIG_IA64_HP_SIM + struct { + int tv_sec; /* must be 32bits to work */ + int tv_usec; + } tv32bits; + + ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD); + + memset(tm, 0, sizeof(*tm)); + offtime(tv32bits.tv_sec, tm); + + if (tc) + memset(tc, 0, sizeof(*tc)); +#else +# error Not implemented yet... +#endif + return EFI_SUCCESS; +} + +static void +efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data) +{ +#ifdef CONFIG_IA64_HP_SIM + ssc(status, 0, 0, 0, SSC_EXIT); +#else +# error Not implemented yet... +#endif +} + +static efi_status_t +efi_unimplemented (void) +{ + return EFI_UNSUPPORTED; +} + +static long +sal_emulator (long index, unsigned long in1, unsigned long in2, + unsigned long in3, unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + register long r9 asm ("r9") = 0; + register long r10 asm ("r10") = 0; + register long r11 asm ("r11") = 0; + long status; + + /* + * Don't do a "switch" here since that gives us code that + * isn't self-relocatable. + */ + status = 0; + if (index == SAL_FREQ_BASE) { + switch (in1) { + case SAL_FREQ_BASE_PLATFORM: + r9 = 100000000; + break; + + case SAL_FREQ_BASE_INTERVAL_TIMER: + /* + * Is this supposed to be the cr.itc frequency + * or something platform specific? The SAL + * doc ain't exactly clear on this... + */ +#if defined(CONFIG_IA64_SOFTSDV_HACKS) + r9 = 4000000; +#elif defined(CONFIG_IA64_SDV) + r9 = 300000000; +#else + r9 = 700000000; +#endif + break; + + case SAL_FREQ_BASE_REALTIME_CLOCK: + r9 = 1; + break; + + default: + status = -1; + break; + } + } else if (index == SAL_SET_VECTORS) { + ; + } else if (index == SAL_GET_STATE_INFO) { + ; + } else if (index == SAL_GET_STATE_INFO_SIZE) { + ; + } else if (index == SAL_CLEAR_STATE_INFO) { + ; + } else if (index == SAL_MC_RENDEZ) { + ; + } else if (index == SAL_MC_SET_PARAMS) { + ; + } else if (index == SAL_CACHE_FLUSH) { + ; + } else if (index == SAL_CACHE_INIT) { + ; +#ifdef CONFIG_PCI + } else if (index == SAL_PCI_CONFIG_READ) { + /* + * in1 contains the PCI configuration address and in2 + * the size of the read. The value that is read is + * returned via the general register r9. + */ + outl(BUILD_CMD(in1), 0xCF8); + if (in2 == 1) /* Reading byte */ + r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3))); + else if (in2 == 2) /* Reading word */ + r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2))); + else /* Reading dword */ + r9 = inl(0xCFC); + status = PCIBIOS_SUCCESSFUL; + } else if (index == SAL_PCI_CONFIG_WRITE) { + /* + * in1 contains the PCI configuration address, in2 the + * size of the write, and in3 the actual value to be + * written out. + */ + outl(BUILD_CMD(in1), 0xCF8); + if (in2 == 1) /* Writing byte */ + outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3))); + else if (in2 == 2) /* Writing word */ + outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2))); + else /* Writing dword */ + outl(in3, 0xCFC); + status = PCIBIOS_SUCCESSFUL; +#endif /* CONFIG_PCI */ + } else if (index == SAL_UPDATE_PAL) { + ; + } else { + status = -1; + } + asm volatile ("" :: "r"(r9), "r"(r10), "r"(r11)); + return status; +} + + +/* + * This is here to work around a bug in egcs-1.1.1b that causes the + * compiler to crash (seems like a bug in the new alias analysis code. + */ +void * +id (long addr) +{ + return (void *) addr; +} + +void +sys_fw_init (const char *args, int arglen) +{ + efi_system_table_t *efi_systab; + efi_runtime_services_t *efi_runtime; + efi_config_table_t *efi_tables; + struct ia64_sal_systab *sal_systab; + efi_memory_desc_t *efi_memmap, *md; + unsigned long *pal_desc, *sal_desc; + struct ia64_sal_desc_entry_point *sal_ed; + struct ia64_boot_param *bp; + unsigned char checksum = 0; + char *cp, *cmd_line; + + memset(fw_mem, 0, sizeof(fw_mem)); + + pal_desc = (unsigned long *) &pal_emulator_static; + sal_desc = (unsigned long *) &sal_emulator; + + cp = fw_mem; + efi_systab = (void *) cp; cp += sizeof(*efi_systab); + efi_runtime = (void *) cp; cp += sizeof(*efi_runtime); + efi_tables = (void *) cp; cp += sizeof(*efi_tables); + sal_systab = (void *) cp; cp += sizeof(*sal_systab); + sal_ed = (void *) cp; cp += sizeof(*sal_ed); + efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap); + cmd_line = (void *) cp; + + if (args) { + if (arglen >= 1024) + arglen = 1023; + memcpy(cmd_line, args, arglen); + } else { + arglen = 0; + } + cmd_line[arglen] = '\0'; + + memset(efi_systab, 0, sizeof(efi_systab)); + efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE; + efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION; + efi_systab->hdr.headersize = sizeof(efi_systab->hdr); + efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0"); + efi_systab->fw_revision = 1; + efi_systab->runtime = __pa(efi_runtime); + efi_systab->nr_tables = 1; + efi_systab->tables = __pa(efi_tables); + + efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE; + efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION; + efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr); + efi_runtime->get_time = __pa(&efi_get_time); + efi_runtime->set_time = __pa(&efi_unimplemented); + efi_runtime->get_wakeup_time = __pa(&efi_unimplemented); + efi_runtime->set_wakeup_time = __pa(&efi_unimplemented); + efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented); + efi_runtime->get_variable = __pa(&efi_unimplemented); + efi_runtime->get_next_variable = __pa(&efi_unimplemented); + efi_runtime->set_variable = __pa(&efi_unimplemented); + efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented); + efi_runtime->reset_system = __pa(&efi_reset_system); + + efi_tables->guid = SAL_SYSTEM_TABLE_GUID; + efi_tables->table = __pa(sal_systab); + + /* fill in the SAL system table: */ + memcpy(sal_systab->signature, "SST_", 4); + sal_systab->size = sizeof(*sal_systab); + sal_systab->sal_rev_minor = 1; + sal_systab->sal_rev_major = 0; + sal_systab->entry_count = 1; + sal_systab->ia32_bios_present = 0; + +#ifdef CONFIG_IA64_GENERIC + strcpy(sal_systab->oem_id, "Generic"); + strcpy(sal_systab->product_id, "IA-64 system"); +#endif + +#ifdef CONFIG_IA64_HP_SIM + strcpy(sal_systab->oem_id, "Hewlett-Packard"); + strcpy(sal_systab->product_id, "HP-simulator"); +#endif + +#ifdef CONFIG_IA64_SDV + strcpy(sal_systab->oem_id, "Intel"); + strcpy(sal_systab->product_id, "SDV"); +#endif + +#ifdef CONFIG_IA64_SGI_SN1_SIM + strcpy(sal_systab->oem_id, "SGI"); + strcpy(sal_systab->product_id, "SN1"); +#endif + + /* fill in an entry point: */ + sal_ed->type = SAL_DESC_ENTRY_POINT; + sal_ed->pal_proc = __pa(pal_desc[0]); + sal_ed->sal_proc = __pa(sal_desc[0]); + sal_ed->gp = __pa(sal_desc[1]); + + for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp) + checksum += *cp; + + sal_systab->checksum = -checksum; + + /* fill in a memory descriptor: */ + md = &efi_memmap[0]; + md->type = EFI_CONVENTIONAL_MEMORY; + md->pad = 0; + md->phys_addr = 2*MB; + md->virt_addr = 0; + md->num_pages = (64*MB) >> 12; /* 64MB (in 4KB pages) */ + md->attribute = EFI_MEMORY_WB; + + /* descriptor for firmware emulator: */ + md = &efi_memmap[1]; + md->type = EFI_RUNTIME_SERVICES_DATA; + md->pad = 0; + md->phys_addr = 1*MB; + md->virt_addr = 0; + md->num_pages = (1*MB) >> 12; /* 1MB (in 4KB pages) */ + md->attribute = EFI_MEMORY_WB; + + /* descriptor for high memory (>4GB): */ + md = &efi_memmap[2]; + md->type = EFI_CONVENTIONAL_MEMORY; + md->pad = 0; + md->phys_addr = 4096*MB; + md->virt_addr = 0; + md->num_pages = (32*MB) >> 12; /* 32MB (in 4KB pages) */ + md->attribute = EFI_MEMORY_WB; + + bp = id(ZERO_PAGE_ADDR); + bp->efi_systab = __pa(&fw_mem); + bp->efi_memmap = __pa(efi_memmap); + bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t); + bp->efi_memdesc_size = sizeof(efi_memory_desc_t); + bp->efi_memdesc_version = 1; + bp->command_line = __pa(cmd_line); + bp->console_info.num_cols = 80; + bp->console_info.num_rows = 25; + bp->console_info.orig_x = 0; + bp->console_info.orig_y = 24; + bp->num_pci_vectors = 0; + bp->fpswa = 0; +} diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S new file mode 100644 index 000000000..24dc10ee4 --- /dev/null +++ b/arch/ia64/kernel/gate.S @@ -0,0 +1,200 @@ +/* + * This file contains the code that gets mapped at the upper end of + * each task's text region. For now, it contains the signal + * trampoline code only. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <asm/offsets.h> +#include <asm/sigcontext.h> +#include <asm/system.h> +#include <asm/unistd.h> +#include <asm/page.h> + + .psr abi64 + .psr lsb + .lsb + + .section __gate_section,"ax" + + .align PAGE_SIZE + +# define SIGINFO_OFF 16 +# define SIGCONTEXT_OFF (SIGINFO_OFF + ((IA64_SIGINFO_SIZE + 15) & ~15)) +# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET +# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET +# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET +# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET +# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET +# define base0 r2 +# define base1 r3 + /* + * When we get here, the memory stack looks like this: + * + * +===============================+ + * | | + * // struct sigcontext // + * | | + * +===============================+ <-- sp+SIGCONTEXT_OFF + * | | + * // rest of siginfo // + * | | + * + +---------------+ + * | | siginfo.code | + * +---------------+---------------+ + * | siginfo.errno | siginfo.signo | + * +-------------------------------+ <-- sp+SIGINFO_OFF + * | 16 byte of scratch | + * | space | + * +-------------------------------+ <-- sp + * + * The register stack looks _exactly_ the way it looked at the + * time the signal occurred. In other words, we're treading + * on a potential mine-field: each incoming general register + * may be a NaT value (includeing sp, in which case the process + * ends up dying with a SIGSEGV). + * + * The first need to do is a cover to get the registers onto + * the backing store. Once that is done, we invoke the signal + * handler which may modify some of the machine state. After + * returning from the signal handler, we return control to the + * previous context by executing a sigreturn system call. A + * signal handler may call the rt_sigreturn() function to + * directly return to a given sigcontext. However, the + * user-level sigreturn() needs to do much more than calling + * the rt_sigreturn() system call as it needs to unwind the + * stack to restore preserved registers that may have been + * saved on the signal handler's call stack. + * + * On entry: + * r2 = signal number + * r3 = plabel of signal handler + * r15 = new register backing store (ignored) + * [sp+16] = sigframe + */ + + .global ia64_sigtramp + .proc ia64_sigtramp +ia64_sigtramp: + ld8 r10=[r3],8 // get signal handler entry point + br.call.sptk.many rp=invoke_sighandler +.ret0: mov r15=__NR_rt_sigreturn + break __BREAK_SYSCALL + .endp ia64_sigramp + + .proc invoke_sighandler +invoke_sighandler: + ld8 gp=[r3] // get signal handler's global pointer + mov b6=r10 + cover // push args in interrupted frame onto backing store + ;; + alloc r8=ar.pfs,0,1,3,0 // get CFM0, EC0, and CPL0 into r8 + mov r17=ar.bsp // fetch ar.bsp + mov loc0=rp // save return pointer + ;; + cmp.ne p8,p0=r15,r0 // do we need to switch the rbs? + mov out0=r2 // signal number +(p8) br.cond.spnt.few setup_rbs // yup -> (clobbers r14 and r16) +back_from_setup_rbs: + adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + st8 [base0]=r17,(CFM_OFF-BSP_OFF) // save sc_ar_bsp + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + ;; + + st8 [base0]=r8 // save CFM0, EC0, and CPL0 + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + ;; + stf.spill [base0]=f6,32 + stf.spill [base1]=f7,32 + ;; + stf.spill [base0]=f8,32 + stf.spill [base1]=f9,32 + ;; + stf.spill [base0]=f10,32 + stf.spill [base1]=f11,32 + adds out1=SIGINFO_OFF,sp // siginfo pointer + ;; + stf.spill [base0]=f12,32 + stf.spill [base1]=f13,32 + adds out2=SIGCONTEXT_OFF,sp // sigcontext pointer + ;; + stf.spill [base0]=f14,32 + stf.spill [base1]=f15,32 + br.call.sptk.few rp=b6 // call the signal handler +.ret2: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r15=[base0],(CFM_OFF-BSP_OFF) // fetch sc_ar_bsp and advance to CFM_OFF + mov r14=ar.bsp + ;; + ld8 r8=[base0] // restore (perhaps modified) CFM0, EC0, and CPL0 + cmp.ne p8,p0=r14,r15 // do we need to restore the rbs? +(p8) br.cond.spnt.few restore_rbs // yup -> (clobbers r14 and r16) +back_from_restore_rbs: + { + and r9=0x7f,r8 // r9 <- CFM0.sof + extr.u r10=r8,7,7 // r10 <- CFM0.sol + mov r11=ip + } + ;; + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + adds r11=(cont-back_from_restore_rbs),r11 + sub r9=r9,r10 // r9 <- CFM0.sof - CFM0.sol == CFM0.nout + ;; + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + dep r9=r9,r9,7,7 // r9.sol = r9.sof + mov b6=r11 + ;; + ldf.fill f6=[base0],32 + ldf.fill f7=[base1],32 + mov rp=loc0 // copy return pointer out of stacked register + ;; + ldf.fill f8=[base0],32 + ldf.fill f9=[base1],32 + ;; + ldf.fill f10=[base0],32 + ldf.fill f11=[base1],32 + ;; + ldf.fill f12=[base0],32 + ldf.fill f13=[base1],32 + mov ar.pfs=r9 + ;; + ldf.fill f14=[base0],32 + ldf.fill f15=[base1],32 + br.ret.sptk.few b6 +cont: mov ar.pfs=r8 // ar.pfs = CFM0 + br.ret.sptk.few rp // re-establish CFM0 + .endp invoke_signal_handler + + .proc setup_rbs +setup_rbs: + flushrs // must be first in insn + ;; + mov ar.rsc=r0 // put RSE into enforced lazy mode + adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp + mov r14=ar.rnat // get rnat as updated by flushrs + ;; + mov ar.bspstore=r15 // set new register backing store area + st8 [r16]=r14 // save sc_ar_rnat + ;; + mov ar.rsc=0xf // set RSE into eager mode, pl 3 + invala // invalidate ALAT + br.cond.sptk.many back_from_setup_rbs + + .proc restore_rbs +restore_rbs: + flushrs + mov ar.rsc=r0 // put RSE into enforced lazy mode + adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r14=[r16] // get new rnat + mov ar.bspstore=r15 // set old register backing store area + ;; + mov ar.rnat=r14 // establish new rnat + mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) + // invala not necessary as that will happen when returning to user-mode + br.cond.sptk.many back_from_restore_rbs + + .endp restore_rbs diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S new file mode 100644 index 000000000..50d965e02 --- /dev/null +++ b/arch/ia64/kernel/head.S @@ -0,0 +1,646 @@ +/* + * Here is where the ball gets rolling as far as the kernel is concerned. + * When control is transferred to _start, the bootload has already + * loaded us to the correct address. All that's left to do here is + * to set up the kernel's global pointer and jump to the kernel + * entry point. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> + * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> + */ + +#include <linux/config.h> + +#include <asm/fpu.h> +#include <asm/pal.h> +#include <asm/offsets.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> + + .psr abi64 + .psr lsb + .lsb + + .section __special_page_section,"ax" + + .global empty_zero_page +empty_zero_page: + .skip PAGE_SIZE + + .global swapper_pg_dir +swapper_pg_dir: + .skip PAGE_SIZE + + .global empty_bad_page +empty_bad_page: + .skip PAGE_SIZE + + .global empty_bad_pte_table +empty_bad_pte_table: + .skip PAGE_SIZE + + .global empty_bad_pmd_table +empty_bad_pmd_table: + .skip PAGE_SIZE + + .rodata +halt_msg: + stringz "Halting kernel\n" + + .text + .align 16 + .global _start + .proc _start +_start: + // set IVT entry point---can't access I/O ports without it + movl r3=ia64_ivt + ;; + mov cr.iva=r3 + movl r2=FPSR_DEFAULT + ;; + srlz.i + movl gp=__gp + + mov ar.fpsr=r2 + ;; + +#ifdef CONFIG_IA64_EARLY_PRINTK + mov r2=6 + mov r3=(8<<8) | (28<<2) + ;; + mov rr[r2]=r3 + ;; + srlz.i + ;; +#endif + +#define isAP p2 // are we booting an Application Processor (not the BSP)? + + // Find the init_task for the currently booting CPU. At poweron, and in + // UP mode, cpu_now_booting is 0 + movl r3=cpu_now_booting + ;; + ld4 r3=[r3] + movl r2=init_tasks + ;; + shladd r2=r3,3,r2 + ;; + ld8 r2=[r2] + cmp4.ne isAP,p0=r3,r0 // p9 == true if this is an application processor (ap) + ;; // RAW on r2 + extr r3=r2,0,61 // r3 == phys addr of task struct + ;; + + // load the "current" pointer (r13) and ar.k6 with the current task + mov r13=r2 + mov ar.k6=r3 // Physical address + ;; + /* + * Reserve space at the top of the stack for "struct pt_regs". Kernel threads + * don't store interesting values in that structure, but the space still needs + * to be there because time-critical stuff such as the context switching can + * be implemented more efficiently (for example, __switch_to() + * always sets the psr.dfh bit of the task it is switching to). + */ + addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 + addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE + mov ar.rsc=r0 // place RSE in enforced lazy mode + ;; + mov ar.bspstore=r2 // establish the new RSE stack + ;; + loadrs // load zero bytes from the register stack + ;; + mov ar.rsc=0x3 // place RSE in eager mode + ;; + +#ifdef CONFIG_IA64_EARLY_PRINTK + .rodata +alive_msg: + stringz "I'm alive and well\n" + .previous + + alloc r2=ar.pfs,0,0,2,0 + movl out0=alive_msg + ;; + br.call.sptk.few rp=early_printk +1: // force new bundle +#endif /* CONFIG_IA64_EARLY_PRINTK */ + + alloc r2=ar.pfs,8,0,2,0 +#ifdef CONFIG_SMP +(isAP) br.call.sptk.few rp=smp_callin +.ret1: +(isAP) br.cond.sptk.few self +#endif + +#undef isAP + + // This is executed by the bootstrap processor (bsp) only: + +#ifdef CONFIG_IA64_FW_EMU + // initialize PAL & SAL emulator: + br.call.sptk.few rp=sys_fw_init + ;; +#endif + br.call.sptk.few rp=start_kernel +.ret2: + addl r2=@ltoff(halt_msg),gp + ;; + ld8 out0=[r2] + br.call.sptk.few b0=console_print +self: br.sptk.few self // endless loop + .endp _start + + .align 16 + .global ia64_save_debug_regs + .proc ia64_save_debug_regs +ia64_save_debug_regs: + alloc r16=ar.pfs,1,0,0,0 + mov r20=ar.lc // preserve ar.lc + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=0 + add r19=IA64_NUM_DBG_REGS*8,in0 + ;; +1: mov r16=dbr[r18] + mov r17=ibr[r18] + add r18=1,r18 + ;; + st8.nta [in0]=r16,8 + st8.nta [r19]=r17,8 + br.cloop.sptk.few 1b + + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.few b0 + .endp ia64_save_debug_regs + + .align 16 + .global ia64_load_debug_regs + .proc ia64_load_debug_regs +ia64_load_debug_regs: + alloc r16=ar.pfs,1,0,0,0 + lfetch.nta [in0] + mov r20=ar.lc // preserve ar.lc + add r19=IA64_NUM_DBG_REGS*8,in0 + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=-1 + ;; +1: ld8.nta r16=[in0],8 + ld8.nta r17=[r19],8 + add r18=1,r18 + ;; + mov dbr[r18]=r16 + mov ibr[r18]=r17 + br.cloop.sptk.few 1b + + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.few b0 + .endp ia64_load_debug_regs + + .align 16 + .global __ia64_save_fpu + .proc __ia64_save_fpu +__ia64_save_fpu: + alloc r2=ar.pfs,1,0,0,0 + adds r3=16,in0 + ;; + stf.spill.nta [in0]=f32,32 + stf.spill.nta [ r3]=f33,32 + ;; + stf.spill.nta [in0]=f34,32 + stf.spill.nta [ r3]=f35,32 + ;; + stf.spill.nta [in0]=f36,32 + stf.spill.nta [ r3]=f37,32 + ;; + stf.spill.nta [in0]=f38,32 + stf.spill.nta [ r3]=f39,32 + ;; + stf.spill.nta [in0]=f40,32 + stf.spill.nta [ r3]=f41,32 + ;; + stf.spill.nta [in0]=f42,32 + stf.spill.nta [ r3]=f43,32 + ;; + stf.spill.nta [in0]=f44,32 + stf.spill.nta [ r3]=f45,32 + ;; + stf.spill.nta [in0]=f46,32 + stf.spill.nta [ r3]=f47,32 + ;; + stf.spill.nta [in0]=f48,32 + stf.spill.nta [ r3]=f49,32 + ;; + stf.spill.nta [in0]=f50,32 + stf.spill.nta [ r3]=f51,32 + ;; + stf.spill.nta [in0]=f52,32 + stf.spill.nta [ r3]=f53,32 + ;; + stf.spill.nta [in0]=f54,32 + stf.spill.nta [ r3]=f55,32 + ;; + stf.spill.nta [in0]=f56,32 + stf.spill.nta [ r3]=f57,32 + ;; + stf.spill.nta [in0]=f58,32 + stf.spill.nta [ r3]=f59,32 + ;; + stf.spill.nta [in0]=f60,32 + stf.spill.nta [ r3]=f61,32 + ;; + stf.spill.nta [in0]=f62,32 + stf.spill.nta [ r3]=f63,32 + ;; + stf.spill.nta [in0]=f64,32 + stf.spill.nta [ r3]=f65,32 + ;; + stf.spill.nta [in0]=f66,32 + stf.spill.nta [ r3]=f67,32 + ;; + stf.spill.nta [in0]=f68,32 + stf.spill.nta [ r3]=f69,32 + ;; + stf.spill.nta [in0]=f70,32 + stf.spill.nta [ r3]=f71,32 + ;; + stf.spill.nta [in0]=f72,32 + stf.spill.nta [ r3]=f73,32 + ;; + stf.spill.nta [in0]=f74,32 + stf.spill.nta [ r3]=f75,32 + ;; + stf.spill.nta [in0]=f76,32 + stf.spill.nta [ r3]=f77,32 + ;; + stf.spill.nta [in0]=f78,32 + stf.spill.nta [ r3]=f79,32 + ;; + stf.spill.nta [in0]=f80,32 + stf.spill.nta [ r3]=f81,32 + ;; + stf.spill.nta [in0]=f82,32 + stf.spill.nta [ r3]=f83,32 + ;; + stf.spill.nta [in0]=f84,32 + stf.spill.nta [ r3]=f85,32 + ;; + stf.spill.nta [in0]=f86,32 + stf.spill.nta [ r3]=f87,32 + ;; + stf.spill.nta [in0]=f88,32 + stf.spill.nta [ r3]=f89,32 + ;; + stf.spill.nta [in0]=f90,32 + stf.spill.nta [ r3]=f91,32 + ;; + stf.spill.nta [in0]=f92,32 + stf.spill.nta [ r3]=f93,32 + ;; + stf.spill.nta [in0]=f94,32 + stf.spill.nta [ r3]=f95,32 + ;; + stf.spill.nta [in0]=f96,32 + stf.spill.nta [ r3]=f97,32 + ;; + stf.spill.nta [in0]=f98,32 + stf.spill.nta [ r3]=f99,32 + ;; + stf.spill.nta [in0]=f100,32 + stf.spill.nta [ r3]=f101,32 + ;; + stf.spill.nta [in0]=f102,32 + stf.spill.nta [ r3]=f103,32 + ;; + stf.spill.nta [in0]=f104,32 + stf.spill.nta [ r3]=f105,32 + ;; + stf.spill.nta [in0]=f106,32 + stf.spill.nta [ r3]=f107,32 + ;; + stf.spill.nta [in0]=f108,32 + stf.spill.nta [ r3]=f109,32 + ;; + stf.spill.nta [in0]=f110,32 + stf.spill.nta [ r3]=f111,32 + ;; + stf.spill.nta [in0]=f112,32 + stf.spill.nta [ r3]=f113,32 + ;; + stf.spill.nta [in0]=f114,32 + stf.spill.nta [ r3]=f115,32 + ;; + stf.spill.nta [in0]=f116,32 + stf.spill.nta [ r3]=f117,32 + ;; + stf.spill.nta [in0]=f118,32 + stf.spill.nta [ r3]=f119,32 + ;; + stf.spill.nta [in0]=f120,32 + stf.spill.nta [ r3]=f121,32 + ;; + stf.spill.nta [in0]=f122,32 + stf.spill.nta [ r3]=f123,32 + ;; + stf.spill.nta [in0]=f124,32 + stf.spill.nta [ r3]=f125,32 + ;; + stf.spill.nta [in0]=f126,32 + stf.spill.nta [ r3]=f127,32 + br.ret.sptk.few rp + .endp __ia64_save_fpu + + .align 16 + .global __ia64_load_fpu + .proc __ia64_load_fpu +__ia64_load_fpu: + alloc r2=ar.pfs,1,0,0,0 + adds r3=16,in0 + ;; + ldf.fill.nta f32=[in0],32 + ldf.fill.nta f33=[ r3],32 + ;; + ldf.fill.nta f34=[in0],32 + ldf.fill.nta f35=[ r3],32 + ;; + ldf.fill.nta f36=[in0],32 + ldf.fill.nta f37=[ r3],32 + ;; + ldf.fill.nta f38=[in0],32 + ldf.fill.nta f39=[ r3],32 + ;; + ldf.fill.nta f40=[in0],32 + ldf.fill.nta f41=[ r3],32 + ;; + ldf.fill.nta f42=[in0],32 + ldf.fill.nta f43=[ r3],32 + ;; + ldf.fill.nta f44=[in0],32 + ldf.fill.nta f45=[ r3],32 + ;; + ldf.fill.nta f46=[in0],32 + ldf.fill.nta f47=[ r3],32 + ;; + ldf.fill.nta f48=[in0],32 + ldf.fill.nta f49=[ r3],32 + ;; + ldf.fill.nta f50=[in0],32 + ldf.fill.nta f51=[ r3],32 + ;; + ldf.fill.nta f52=[in0],32 + ldf.fill.nta f53=[ r3],32 + ;; + ldf.fill.nta f54=[in0],32 + ldf.fill.nta f55=[ r3],32 + ;; + ldf.fill.nta f56=[in0],32 + ldf.fill.nta f57=[ r3],32 + ;; + ldf.fill.nta f58=[in0],32 + ldf.fill.nta f59=[ r3],32 + ;; + ldf.fill.nta f60=[in0],32 + ldf.fill.nta f61=[ r3],32 + ;; + ldf.fill.nta f62=[in0],32 + ldf.fill.nta f63=[ r3],32 + ;; + ldf.fill.nta f64=[in0],32 + ldf.fill.nta f65=[ r3],32 + ;; + ldf.fill.nta f66=[in0],32 + ldf.fill.nta f67=[ r3],32 + ;; + ldf.fill.nta f68=[in0],32 + ldf.fill.nta f69=[ r3],32 + ;; + ldf.fill.nta f70=[in0],32 + ldf.fill.nta f71=[ r3],32 + ;; + ldf.fill.nta f72=[in0],32 + ldf.fill.nta f73=[ r3],32 + ;; + ldf.fill.nta f74=[in0],32 + ldf.fill.nta f75=[ r3],32 + ;; + ldf.fill.nta f76=[in0],32 + ldf.fill.nta f77=[ r3],32 + ;; + ldf.fill.nta f78=[in0],32 + ldf.fill.nta f79=[ r3],32 + ;; + ldf.fill.nta f80=[in0],32 + ldf.fill.nta f81=[ r3],32 + ;; + ldf.fill.nta f82=[in0],32 + ldf.fill.nta f83=[ r3],32 + ;; + ldf.fill.nta f84=[in0],32 + ldf.fill.nta f85=[ r3],32 + ;; + ldf.fill.nta f86=[in0],32 + ldf.fill.nta f87=[ r3],32 + ;; + ldf.fill.nta f88=[in0],32 + ldf.fill.nta f89=[ r3],32 + ;; + ldf.fill.nta f90=[in0],32 + ldf.fill.nta f91=[ r3],32 + ;; + ldf.fill.nta f92=[in0],32 + ldf.fill.nta f93=[ r3],32 + ;; + ldf.fill.nta f94=[in0],32 + ldf.fill.nta f95=[ r3],32 + ;; + ldf.fill.nta f96=[in0],32 + ldf.fill.nta f97=[ r3],32 + ;; + ldf.fill.nta f98=[in0],32 + ldf.fill.nta f99=[ r3],32 + ;; + ldf.fill.nta f100=[in0],32 + ldf.fill.nta f101=[ r3],32 + ;; + ldf.fill.nta f102=[in0],32 + ldf.fill.nta f103=[ r3],32 + ;; + ldf.fill.nta f104=[in0],32 + ldf.fill.nta f105=[ r3],32 + ;; + ldf.fill.nta f106=[in0],32 + ldf.fill.nta f107=[ r3],32 + ;; + ldf.fill.nta f108=[in0],32 + ldf.fill.nta f109=[ r3],32 + ;; + ldf.fill.nta f110=[in0],32 + ldf.fill.nta f111=[ r3],32 + ;; + ldf.fill.nta f112=[in0],32 + ldf.fill.nta f113=[ r3],32 + ;; + ldf.fill.nta f114=[in0],32 + ldf.fill.nta f115=[ r3],32 + ;; + ldf.fill.nta f116=[in0],32 + ldf.fill.nta f117=[ r3],32 + ;; + ldf.fill.nta f118=[in0],32 + ldf.fill.nta f119=[ r3],32 + ;; + ldf.fill.nta f120=[in0],32 + ldf.fill.nta f121=[ r3],32 + ;; + ldf.fill.nta f122=[in0],32 + ldf.fill.nta f123=[ r3],32 + ;; + ldf.fill.nta f124=[in0],32 + ldf.fill.nta f125=[ r3],32 + ;; + ldf.fill.nta f126=[in0],32 + ldf.fill.nta f127=[ r3],32 + br.ret.sptk.few rp + .endp __ia64_load_fpu + + .align 16 + .global __ia64_init_fpu + .proc __ia64_init_fpu +__ia64_init_fpu: + alloc r2=ar.pfs,0,0,0,0 + stf.spill [sp]=f0 + mov f32=f0 + ;; + ldf.fill f33=[sp] + ldf.fill f34=[sp] + mov f35=f0 + ;; + ldf.fill f36=[sp] + ldf.fill f37=[sp] + mov f38=f0 + ;; + ldf.fill f39=[sp] + ldf.fill f40=[sp] + mov f41=f0 + ;; + ldf.fill f42=[sp] + ldf.fill f43=[sp] + mov f44=f0 + ;; + ldf.fill f45=[sp] + ldf.fill f46=[sp] + mov f47=f0 + ;; + ldf.fill f48=[sp] + ldf.fill f49=[sp] + mov f50=f0 + ;; + ldf.fill f51=[sp] + ldf.fill f52=[sp] + mov f53=f0 + ;; + ldf.fill f54=[sp] + ldf.fill f55=[sp] + mov f56=f0 + ;; + ldf.fill f57=[sp] + ldf.fill f58=[sp] + mov f59=f0 + ;; + ldf.fill f60=[sp] + ldf.fill f61=[sp] + mov f62=f0 + ;; + ldf.fill f63=[sp] + ldf.fill f64=[sp] + mov f65=f0 + ;; + ldf.fill f66=[sp] + ldf.fill f67=[sp] + mov f68=f0 + ;; + ldf.fill f69=[sp] + ldf.fill f70=[sp] + mov f71=f0 + ;; + ldf.fill f72=[sp] + ldf.fill f73=[sp] + mov f74=f0 + ;; + ldf.fill f75=[sp] + ldf.fill f76=[sp] + mov f77=f0 + ;; + ldf.fill f78=[sp] + ldf.fill f79=[sp] + mov f80=f0 + ;; + ldf.fill f81=[sp] + ldf.fill f82=[sp] + mov f83=f0 + ;; + ldf.fill f84=[sp] + ldf.fill f85=[sp] + mov f86=f0 + ;; + ldf.fill f87=[sp] + ldf.fill f88=[sp] + mov f89=f0 + ;; + ldf.fill f90=[sp] + ldf.fill f91=[sp] + mov f92=f0 + ;; + ldf.fill f93=[sp] + ldf.fill f94=[sp] + mov f95=f0 + ;; + ldf.fill f96=[sp] + ldf.fill f97=[sp] + mov f98=f0 + ;; + ldf.fill f99=[sp] + ldf.fill f100=[sp] + mov f101=f0 + ;; + ldf.fill f102=[sp] + ldf.fill f103=[sp] + mov f104=f0 + ;; + ldf.fill f105=[sp] + ldf.fill f106=[sp] + mov f107=f0 + ;; + ldf.fill f108=[sp] + ldf.fill f109=[sp] + mov f110=f0 + ;; + ldf.fill f111=[sp] + ldf.fill f112=[sp] + mov f113=f0 + ;; + ldf.fill f114=[sp] + ldf.fill f115=[sp] + mov f116=f0 + ;; + ldf.fill f117=[sp] + ldf.fill f118=[sp] + mov f119=f0 + ;; + ldf.fill f120=[sp] + ldf.fill f121=[sp] + mov f122=f0 + ;; + ldf.fill f123=[sp] + ldf.fill f124=[sp] + mov f125=f0 + ;; + ldf.fill f126=[sp] + mov f127=f0 + br.ret.sptk.few rp + .endp __ia64_init_fpu diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c new file mode 100644 index 000000000..122650461 --- /dev/null +++ b/arch/ia64/kernel/init_task.c @@ -0,0 +1,31 @@ +/* + * This is where we statically allocate and initialize the initial + * task. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/sched.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> + +static struct vm_area_struct init_mmap = INIT_MMAP; +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS; +struct mm_struct init_mm = INIT_MM(init_mm); + +/* + * Initial task structure. + * + * We need to make sure that this is page aligned due to the way + * process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union task_union init_task_union + __attribute__((section("init_task"))) = + { INIT_TASK(init_task_union.task) }; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c new file mode 100644 index 000000000..01c201137 --- /dev/null +++ b/arch/ia64/kernel/irq.c @@ -0,0 +1,657 @@ +/* + * linux/arch/ia64/kernel/irq.c + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 6/10/99: Updated to bring in sync with x86 version to facilitate + * support for SMP and different interrupt controllers. + */ + +#include <linux/config.h> + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/kernel_stat.h> +#include <linux/malloc.h> +#include <linux/ptrace.h> +#include <linux/random.h> /* for rand_initialize_irq() */ +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/threads.h> + +#ifdef CONFIG_KDB +# include <linux/kdb.h> +#endif + +#include <asm/bitops.h> +#include <asm/delay.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/machvec.h> +#include <asm/pgtable.h> +#include <asm/system.h> + +/* This is used to detect bad usage of probe_irq_on()/probe_irq_off(). */ +#define PROBE_IRQ_COOKIE 0xfeedC0FFEE + +struct irq_desc irq_desc[NR_IRQS]; + +/* + * Micro-access to controllers is serialized over the whole + * system. We never hold this lock when we call the actual + * IRQ handler. + */ +spinlock_t irq_controller_lock; + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC +spinlock_t ivr_read_lock; +#endif + +unsigned int local_bh_count[NR_CPUS]; +/* + * used in irq_enter()/irq_exit() + */ +unsigned int local_irq_count[NR_CPUS]; + +static struct irqaction timer_action = { NULL, 0, 0, NULL, NULL, NULL}; + +#ifdef CONFIG_SMP +static struct irqaction ipi_action = { NULL, 0, 0, NULL, NULL, NULL}; +#endif + +/* + * Legacy IRQ to IA-64 vector translation table. Any vector not in + * this table maps to itself (ie: irq 0x30 => IA64 vector 0x30) + */ +__u8 irq_to_vector_map[IA64_MIN_VECTORED_IRQ] = { + /* 8259 IRQ translation, first 16 entries */ + TIMER_IRQ, 0x50, 0x0f, 0x51, 0x52, 0x53, 0x43, 0x54, + 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x40, 0x41, +}; + +/* + * Reverse of the above table. + */ +static __u8 vector_to_legacy_map[256]; + +/* + * used by proc fs (/proc/interrupts) + */ +int +get_irq_list (char *buf) +{ + int i; + struct irqaction * action; + char *p = buf; + +#ifdef CONFIG_SMP + p += sprintf(p, " "); + for (i = 0; i < smp_num_cpus; i++) + p += sprintf(p, "CPU%d ", i); + *p++ = '\n'; +#endif + /* + * Simply scans the external vectored interrupts + */ + for (i = 0; i < NR_IRQS; i++) { + action = irq_desc[i].action; + if (!action) + continue; + p += sprintf(p, "%3d: ",i); +#ifndef CONFIG_SMP + p += sprintf(p, "%10u ", kstat_irqs(i)); +#else + { + int j; + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + kstat.irqs[cpu_logical_map(j)][i]); + } +#endif + p += sprintf(p, " %14s", irq_desc[i].handler->typename); + p += sprintf(p, " %c%s", (action->flags & SA_INTERRUPT) ? '+' : ' ', + action->name); + + for (action = action->next; action; action = action->next) { + p += sprintf(p, ", %c%s", + (action->flags & SA_INTERRUPT)?'+':' ', + action->name); + } + *p++ = '\n'; + } + return p - buf; +} + +/* + * That's where the IVT branches when we get an external + * interrupt. This branches to the correct hardware IRQ handler via + * function ptr. + */ +void +ia64_handle_irq (unsigned long irq, struct pt_regs *regs) +{ + unsigned long bsp, sp, saved_tpr; + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC +# ifndef CONFIG_SMP + static unsigned int max_prio = 0; +# endif + unsigned int prev_prio; + unsigned long eoi_ptr; + +# ifdef CONFIG_USB + disable_usb(); +# endif + /* + * Stop IPIs by getting the ivr_read_lock + */ + spin_lock(&ivr_read_lock); + + /* + * Disable PCI writes + */ + outl(0x80ff81c0, 0xcf8); + outl(0x73002188, 0xcfc); + eoi_ptr = inl(0xcfc); + + irq = ia64_get_ivr(); + + /* + * Enable PCI writes + */ + outl(0x73182188, 0xcfc); + + spin_unlock(&ivr_read_lock); + +# ifdef CONFIG_USB + reenable_usb(); +# endif + +# ifndef CONFIG_SMP + prev_prio = max_prio; + if (irq < max_prio) { + printk ("ia64_handle_irq: got irq %lu while %u was in progress!\n", + irq, max_prio); + + } else + max_prio = irq; +# endif /* !CONFIG_SMP */ +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + + /* Always set TPR to limit maximum interrupt nesting depth to + * 16 (without this, it would be ~240, which could easily lead + * to kernel stack overflows. + */ + saved_tpr = ia64_get_tpr(); + ia64_srlz_d(); + ia64_set_tpr(irq); + ia64_srlz_d(); + + asm ("mov %0=ar.bsp" : "=r"(bsp)); + asm ("mov %0=sp" : "=r"(sp)); + + if ((sp - bsp) < 1024) { + static long last_time; + static unsigned char count; + + if (count > 5 && jiffies - last_time > 5*HZ) + count = 0; + if (++count < 5) { + last_time = jiffies; + printk("ia64_handle_irq: DANGER: less than 1KB of free stack space!!\n" + "(bsp=0x%lx, sp=%lx)\n", bsp, sp); + } +#ifdef CONFIG_KDB + kdb(KDB_REASON_PANIC, 0, regs); +#endif + } + + /* + * The interrupt is now said to be in service + */ + if (irq >= NR_IRQS) { + printk("handle_irq: invalid irq=%lu\n", irq); + goto out; + } + + ++kstat.irqs[smp_processor_id()][irq]; + + if (irq == IA64_SPURIOUS_INT) { + printk("handle_irq: spurious interrupt\n"); + goto out; + } + + /* + * Handle the interrupt by calling the hardware specific handler (IOSAPIC, Internal, etc). + */ + (*irq_desc[irq].handler->handle)(irq, regs); + out: +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + { + long pEOI; + + asm ("mov %0=0;; (p1) mov %0=1" : "=r"(pEOI)); + if (!pEOI) { + printk("Yikes: ia64_handle_irq() without pEOI!!\n"); + asm volatile ("cmp.eq p1,p0=r0,r0" : "=r"(pEOI)); +# ifdef CONFIG_KDB + kdb(KDB_REASON_PANIC, 0, regs); +# endif + } + } + + local_irq_disable(); +# ifndef CONFIG_SMP + if (max_prio == irq) + max_prio = prev_prio; +# endif /* !CONFIG_SMP */ +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + + ia64_srlz_d(); + ia64_set_tpr(saved_tpr); + ia64_srlz_d(); +} + + +/* + * This should really return information about whether we should do + * bottom half handling etc. Right now we end up _always_ checking the + * bottom half, which is a waste of time and is not what some drivers + * would prefer. + */ +int +invoke_irq_handlers (unsigned int irq, struct pt_regs *regs, struct irqaction *action) +{ + void (*handler)(int, void *, struct pt_regs *); + unsigned long flags, flags_union = 0; + int cpu = smp_processor_id(); + unsigned int requested_irq; + void *dev_id; + + irq_enter(cpu, irq); + + if ((action->flags & SA_INTERRUPT) == 0) + __sti(); + + do { + flags = action->flags; + requested_irq = irq; + if ((flags & SA_LEGACY) != 0) + requested_irq = vector_to_legacy_map[irq]; + flags_union |= flags; + handler = action->handler; + dev_id = action->dev_id; + action = action->next; + (*handler)(requested_irq, dev_id, regs); + } while (action); + if ((flags_union & SA_SAMPLE_RANDOM) != 0) + add_interrupt_randomness(irq); + __cli(); + + irq_exit(cpu, irq); + return flags_union | 1; /* force the "do bottom halves" bit */ +} + +void +disable_irq_nosync (unsigned int irq) +{ + unsigned long flags; + + irq = map_legacy_irq(irq); + + spin_lock_irqsave(&irq_controller_lock, flags); + if (irq_desc[irq].depth++ > 0) { + irq_desc[irq].status &= ~IRQ_ENABLED; + irq_desc[irq].handler->disable(irq); + } + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +/* + * Synchronous version of the above, making sure the IRQ is + * no longer running on any other IRQ.. + */ +void +disable_irq (unsigned int irq) +{ + disable_irq_nosync(irq); + + irq = map_legacy_irq(irq); + + if (!local_irq_count[smp_processor_id()]) { + do { + barrier(); + } while ((irq_desc[irq].status & IRQ_INPROGRESS) != 0); + } +} + +void +enable_irq (unsigned int irq) +{ + unsigned long flags; + + irq = map_legacy_irq(irq); + + spin_lock_irqsave(&irq_controller_lock, flags); + switch (irq_desc[irq].depth) { + case 1: + irq_desc[irq].status |= IRQ_ENABLED; + (*irq_desc[irq].handler->enable)(irq); + /* fall through */ + default: + --irq_desc[irq].depth; + break; + + case 0: + printk("enable_irq: unbalanced from %p\n", __builtin_return_address(0)); + } + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +/* + * This function encapsulates the initialization that needs to be + * performed under the protection of lock irq_controller_lock. The + * lock must have been acquired by the time this is called. + */ +static inline int +setup_irq (unsigned int irq, struct irqaction *new) +{ + int shared = 0; + struct irqaction *old, **p; + + p = &irq_desc[irq].action; + old = *p; + if (old) { + if (!(old->flags & new->flags & SA_SHIRQ)) { + return -EBUSY; + } + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + *p = new; + + /* when sharing do not unmask */ + if (!shared) { + irq_desc[irq].depth = 0; + irq_desc[irq].status |= IRQ_ENABLED; + (*irq_desc[irq].handler->startup)(irq); + } + return 0; +} + +int +request_irq (unsigned int requested_irq, void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, const char * devname, void *dev_id) +{ + int retval, need_kfree = 0; + struct irqaction *action; + unsigned long flags; + unsigned int irq; + +#ifdef IA64_DEBUG + printk("request_irq(0x%x) called\n", requested_irq); +#endif + /* + * Sanity-check: shared interrupts should REALLY pass in + * a real dev-ID, otherwise we'll have trouble later trying + * to figure out which interrupt is which (messes up the + * interrupt freeing logic etc). + */ + if ((irqflags & SA_SHIRQ) && !dev_id) + printk("Bad boy: %s (at %p) called us without a dev_id!\n", + devname, current_text_addr()); + + irq = map_legacy_irq(requested_irq); + if (irq != requested_irq) + irqflags |= SA_LEGACY; + + if (irq >= NR_IRQS) + return -EINVAL; + + if (!handler) + return -EINVAL; + + /* + * The timer_action and ipi_action cannot be allocated + * dynamically because its initialization happens really early + * on in init/main.c at this point the memory allocator has + * not yet been initialized. So we use a statically reserved + * buffer for it. In some sense that's no big deal because we + * need one no matter what. + */ + if (irq == TIMER_IRQ) + action = &timer_action; +#ifdef CONFIG_SMP + else if (irq == IPI_IRQ) + action = &ipi_action; +#endif + else { + action = kmalloc(sizeof(struct irqaction), GFP_KERNEL); + need_kfree = 1; + } + + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + if ((irqflags & SA_SAMPLE_RANDOM) != 0) + rand_initialize_irq(irq); + + spin_lock_irqsave(&irq_controller_lock, flags); + retval = setup_irq(irq, action); + spin_unlock_irqrestore(&irq_controller_lock, flags); + + if (need_kfree && retval) + kfree(action); + + return retval; +} + +void +free_irq (unsigned int irq, void *dev_id) +{ + struct irqaction *action, **p; + unsigned long flags; + + /* + * some sanity checks first + */ + if (irq >= NR_IRQS) { + printk("Trying to free IRQ%d\n",irq); + return; + } + + irq = map_legacy_irq(irq); + + /* + * Find the corresponding irqaction + */ + spin_lock_irqsave(&irq_controller_lock, flags); + for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *p = action->next; + if (!irq_desc[irq].action) { + irq_desc[irq].status &= ~IRQ_ENABLED; + (*irq_desc[irq].handler->shutdown)(irq); + } + + spin_unlock_irqrestore(&irq_controller_lock, flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (irq_desc[irq].status & IRQ_INPROGRESS) + barrier(); +#endif + + if (action != &timer_action +#ifdef CONFIG_SMP + && action != &ipi_action +#endif + ) + kfree(action); + return; + } + printk("Trying to free free IRQ%d\n", irq); +} + +/* + * IRQ autodetection code. Note that the return value of + * probe_irq_on() is no longer being used (it's role has been replaced + * by the IRQ_AUTODETECT flag). + */ +unsigned long +probe_irq_on (void) +{ + struct irq_desc *id; + unsigned long delay; + +#ifdef IA64_DEBUG + printk("probe_irq_on() called\n"); +#endif + + spin_lock_irq(&irq_controller_lock); + for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) { + if (!id->action) { + id->status |= IRQ_AUTODETECT | IRQ_WAITING; + (*id->handler->startup)(id - irq_desc); + } + } + spin_unlock_irq(&irq_controller_lock); + + /* wait for spurious interrupts to trigger: */ + + for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) + /* about 100ms delay */ + synchronize_irq(); + + /* filter out obviously spurious interrupts: */ + spin_lock_irq(&irq_controller_lock); + for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) { + unsigned int status = id->status; + + if (!(status & IRQ_AUTODETECT)) + continue; + + if (!(status & IRQ_WAITING)) { + id->status = status & ~IRQ_AUTODETECT; + (*id->handler->shutdown)(id - irq_desc); + } + } + spin_unlock_irq(&irq_controller_lock); + return PROBE_IRQ_COOKIE; /* return meaningless return value */ +} + +int +probe_irq_off (unsigned long cookie) +{ + int irq_found, nr_irqs; + struct irq_desc *id; + +#ifdef IA64_DEBUG + printk("probe_irq_off(cookie=0x%lx) -> ", cookie); +#endif + + if (cookie != PROBE_IRQ_COOKIE) + printk("bad irq probe from %p\n", __builtin_return_address(0)); + + nr_irqs = 0; + irq_found = 0; + spin_lock_irq(&irq_controller_lock); + for (id = irq_desc + IA64_MIN_VECTORED_IRQ; id < irq_desc + NR_IRQS; ++id) { + unsigned int status = id->status; + + if (!(status & IRQ_AUTODETECT)) + continue; + + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = (id - irq_desc); + ++nr_irqs; + } + id->status = status & ~IRQ_AUTODETECT; + (*id->handler->shutdown)(id - irq_desc); + } + spin_unlock_irq(&irq_controller_lock); + + if (nr_irqs > 1) + irq_found = -irq_found; + +#ifdef IA64_DEBUG + printk("%d\n", irq_found); +#endif + return irq_found; +} + +#ifdef CONFIG_SMP + +void __init +init_IRQ_SMP (void) +{ + if (request_irq(IPI_IRQ, handle_IPI, 0, "IPI", NULL)) + panic("Could not allocate IPI Interrupt Handler!"); +} + +#endif + +void __init +init_IRQ (void) +{ + int i; + + for (i = 0; i < IA64_MIN_VECTORED_IRQ; ++i) + vector_to_legacy_map[irq_to_vector_map[i]] = i; + + for (i = 0; i < NR_IRQS; ++i) { + irq_desc[i].handler = &irq_type_default; + } + + irq_desc[TIMER_IRQ].handler = &irq_type_ia64_internal; +#ifdef CONFIG_SMP + /* + * Configure the IPI vector and handler + */ + irq_desc[IPI_IRQ].handler = &irq_type_ia64_internal; + init_IRQ_SMP(); +#endif + + ia64_set_pmv(1 << 16); + ia64_set_cmcv(CMC_IRQ); /* XXX fix me */ + + platform_irq_init(irq_desc); + + /* clear TPR to enable all interrupt classes: */ + ia64_set_tpr(0); +} + +/* TBD: + * Certain IA64 platforms can have inter-processor interrupt support. + * This interface is supposed to default to the IA64 IPI block-based + * mechanism if the platform doesn't provide a separate mechanism + * for IPIs. + * Choices : (1) Extend hw_interrupt_type interfaces + * (2) Use machine vector mechanism + * For now defining the following interface as a place holder. + */ +void +ipi_send (int cpu, int vector, int delivery_mode) +{ +} diff --git a/arch/ia64/kernel/irq_default.c b/arch/ia64/kernel/irq_default.c new file mode 100644 index 000000000..bf8c62642 --- /dev/null +++ b/arch/ia64/kernel/irq_default.c @@ -0,0 +1,30 @@ +#include <linux/kernel.h> +#include <linux/sched.h> + +#include <asm/irq.h> +#include <asm/processor.h> +#include <asm/ptrace.h> + + +static int +irq_default_handle_irq (unsigned int irq, struct pt_regs *regs) +{ + printk("Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id()); + return 0; /* don't call do_bottom_half() for spurious interrupts */ +} + +static void +irq_default_noop (unsigned int irq) +{ + /* nuthing to do... */ +} + +struct hw_interrupt_type irq_type_default = { + "default", + (void (*)(unsigned long)) irq_default_noop, /* init */ + irq_default_noop, /* startup */ + irq_default_noop, /* shutdown */ + irq_default_handle_irq, /* handle */ + irq_default_noop, /* enable */ + irq_default_noop /* disable */ +}; diff --git a/arch/ia64/kernel/irq_internal.c b/arch/ia64/kernel/irq_internal.c new file mode 100644 index 000000000..1ae904fe8 --- /dev/null +++ b/arch/ia64/kernel/irq_internal.c @@ -0,0 +1,71 @@ +/* + * Internal Interrupt Vectors + * + * This takes care of interrupts that are generated by the CPU + * internally, such as the ITC and IPI interrupts. + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + */ + +#include <linux/kernel.h> +#include <linux/sched.h> + +#include <asm/irq.h> +#include <asm/processor.h> +#include <asm/ptrace.h> + +/* + * This is identical to IOSAPIC handle_irq. It may go away . . . + */ +static int +internal_handle_irq (unsigned int irq, struct pt_regs *regs) +{ + struct irqaction *action = 0; + struct irq_desc *id = irq_desc + irq; + unsigned int status; + int retval; + + spin_lock(&irq_controller_lock); + { + status = id->status; + if ((status & IRQ_ENABLED) != 0) + action = id->action; + id->status = status & ~(IRQ_REPLAY | IRQ_WAITING); + } + spin_unlock(&irq_controller_lock); + + if (!action) { + if (!(id->status & IRQ_AUTODETECT)) + printk("irq_hpsim_handle_irq: unexpected interrupt %u\n", irq); + return 0; + } + + retval = invoke_irq_handlers(irq, regs, action); + + spin_lock(&irq_controller_lock); + { + status = (id->status & ~IRQ_INPROGRESS); + id->status = status; + } + spin_unlock(&irq_controller_lock); + + return retval; +} + +static void +internal_noop (unsigned int irq) +{ + /* nuthing to do... */ +} + +struct hw_interrupt_type irq_type_ia64_internal = { + "IA64 internal", + (void (*)(unsigned long)) internal_noop, /* init */ + internal_noop, /* startup */ + internal_noop, /* shutdown */ + internal_handle_irq, /* handle */ + internal_noop, /* enable */ + internal_noop /* disable */ +}; + diff --git a/arch/ia64/kernel/irq_lock.c b/arch/ia64/kernel/irq_lock.c new file mode 100644 index 000000000..9c512dd4e --- /dev/null +++ b/arch/ia64/kernel/irq_lock.c @@ -0,0 +1,287 @@ +/* + * SMP IRQ Lock support + * + * Global interrupt locks for SMP. Allow interrupts to come in on any + * CPU, yet make cli/sti act globally to protect critical regions.. + * These function usually appear in irq.c, but I think it's cleaner this way. + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + */ + +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/threads.h> +#include <linux/init.h> + +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/bitops.h> +#include <asm/pgtable.h> +#include <asm/delay.h> + +int global_irq_holder = NO_PROC_ID; +spinlock_t global_irq_lock; +atomic_t global_irq_count; +atomic_t global_bh_count; +atomic_t global_bh_lock; + +#define INIT_STUCK (1<<26) + +void +irq_enter(int cpu, int irq) +{ + int stuck = INIT_STUCK; + + hardirq_enter(cpu, irq); + barrier(); + while (global_irq_lock.lock) { + if (cpu == global_irq_holder) { + break; + } + + if (!--stuck) { + printk("irq_enter stuck (irq=%d, cpu=%d, global=%d)\n", + irq, cpu,global_irq_holder); + stuck = INIT_STUCK; + } + barrier(); + } +} + +void +irq_exit(int cpu, int irq) +{ + hardirq_exit(cpu, irq); + release_irqlock(cpu); +} + +static void +show(char * str) +{ + int i; + unsigned long *stack; + int cpu = smp_processor_id(); + + printk("\n%s, CPU %d:\n", str, cpu); + printk("irq: %d [%d %d]\n", + atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]); + printk("bh: %d [%d %d]\n", + atomic_read(&global_bh_count), local_bh_count[0], local_bh_count[1]); + + stack = (unsigned long *) &stack; + for (i = 40; i ; i--) { + unsigned long x = *++stack; + if (x > (unsigned long) &get_options && x < (unsigned long) &vsprintf) { + printk("<[%08lx]> ", x); + } + } +} + +#define MAXCOUNT 100000000 + +static inline void +wait_on_bh(void) +{ + int count = MAXCOUNT; + do { + if (!--count) { + show("wait_on_bh"); + count = ~0; + } + /* nothing .. wait for the other bh's to go away */ + } while (atomic_read(&global_bh_count) != 0); +} + +static inline void +wait_on_irq(int cpu) +{ + int count = MAXCOUNT; + + for (;;) { + + /* + * Wait until all interrupts are gone. Wait + * for bottom half handlers unless we're + * already executing in one.. + */ + if (!atomic_read(&global_irq_count)) { + if (local_bh_count[cpu] || !atomic_read(&global_bh_count)) + break; + } + + /* Duh, we have to loop. Release the lock to avoid deadlocks */ + spin_unlock(&global_irq_lock); + mb(); + + for (;;) { + if (!--count) { + show("wait_on_irq"); + count = ~0; + } + __sti(); + udelay(cpu + 1); + __cli(); + if (atomic_read(&global_irq_count)) + continue; + if (global_irq_lock.lock) + continue; + if (!local_bh_count[cpu] && atomic_read(&global_bh_count)) + continue; + if (spin_trylock(&global_irq_lock)) + break; + } + } +} + +/* + * This is called when we want to synchronize with + * bottom half handlers. We need to wait until + * no other CPU is executing any bottom half handler. + * + * Don't wait if we're already running in an interrupt + * context or are inside a bh handler. + */ +void +synchronize_bh(void) +{ + if (atomic_read(&global_bh_count)) { + int cpu = smp_processor_id(); + if (!local_irq_count[cpu] && !local_bh_count[cpu]) { + wait_on_bh(); + } + } +} + + +/* + * This is called when we want to synchronize with + * interrupts. We may for example tell a device to + * stop sending interrupts: but to make sure there + * are no interrupts that are executing on another + * CPU we need to call this function. + */ +void +synchronize_irq(void) +{ + int cpu = smp_processor_id(); + int local_count; + int global_count; + + mb(); + do { + local_count = local_irq_count[cpu]; + global_count = atomic_read(&global_irq_count); + } while (global_count != local_count); +} + +static inline void +get_irqlock(int cpu) +{ + if (!spin_trylock(&global_irq_lock)) { + /* do we already hold the lock? */ + if ((unsigned char) cpu == global_irq_holder) + return; + /* Uhhuh.. Somebody else got it. Wait.. */ + spin_lock(&global_irq_lock); + } + /* + * We also to make sure that nobody else is running + * in an interrupt context. + */ + wait_on_irq(cpu); + + /* + * Ok, finally.. + */ + global_irq_holder = cpu; +} + +/* + * A global "cli()" while in an interrupt context + * turns into just a local cli(). Interrupts + * should use spinlocks for the (very unlikely) + * case that they ever want to protect against + * each other. + * + * If we already have local interrupts disabled, + * this will not turn a local disable into a + * global one (problems with spinlocks: this makes + * save_flags+cli+sti usable inside a spinlock). + */ +void +__global_cli(void) +{ + unsigned long flags; + + __save_flags(flags); + if (flags & IA64_PSR_I) { + int cpu = smp_processor_id(); + __cli(); + if (!local_irq_count[cpu]) + get_irqlock(cpu); + } +} + +void +__global_sti(void) +{ + int cpu = smp_processor_id(); + + if (!local_irq_count[cpu]) + release_irqlock(cpu); + __sti(); +} + +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ +unsigned long +__global_save_flags(void) +{ + int retval; + int local_enabled; + unsigned long flags; + + __save_flags(flags); + local_enabled = flags & IA64_PSR_I; + /* default to local */ + retval = 2 + local_enabled; + + /* check for global flags if we're not in an interrupt */ + if (!local_irq_count[smp_processor_id()]) { + if (local_enabled) + retval = 1; + if (global_irq_holder == (unsigned char) smp_processor_id()) + retval = 0; + } + return retval; +} + +void +__global_restore_flags(unsigned long flags) +{ + switch (flags) { + case 0: + __global_cli(); + break; + case 1: + __global_sti(); + break; + case 2: + __cli(); + break; + case 3: + __sti(); + break; + default: + printk("global_restore_flags: %08lx (%08lx)\n", + flags, (&flags)[-1]); + } +} diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S new file mode 100644 index 000000000..4c3ac242a --- /dev/null +++ b/arch/ia64/kernel/ivt.S @@ -0,0 +1,1342 @@ +/* + * arch/ia64/kernel/ivt.S + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1998-2000 David Mosberger <davidm@hpl.hp.com> + */ + +#include <linux/config.h> + +#include <asm/break.h> +#include <asm/offsets.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/unistd.h> + +#include "entry.h" + +/* + * A couple of convenience macros that make writing and reading + * SAVE_MIN and SAVE_REST easier. + */ +#define rARPR r31 +#define rCRIFS r30 +#define rCRIPSR r29 +#define rCRIIP r28 +#define rARRSC r27 +#define rARPFS r26 +#define rARUNAT r25 +#define rARRNAT r24 +#define rARBSPSTORE r23 +#define rKRBS r22 +#define rB6 r21 +#define rR1 r20 + +/* + * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * psr.dt: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * psr.dt: off + * r2 = points to &pt_regs.r16 + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p6, p7, and p15), b6, r3, r8, r9, r10, r11, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ +#define DO_SAVE_MIN(COVER,EXTRA) \ + mov rARRSC=ar.rsc; \ + mov rARPFS=ar.pfs; \ + mov rR1=r1; \ + mov rARUNAT=ar.unat; \ + mov rCRIPSR=cr.ipsr; \ + mov rB6=b6; /* rB6 = branch reg 6 */ \ + mov rCRIIP=cr.iip; \ + mov r1=ar.k6; /* r1 = current */ \ + ;; \ + invala; \ + extr.u r16=rCRIPSR,32,2; /* extract psr.cpl */ \ + ;; \ + cmp.eq pKern,p7=r0,r16; /* are we in kernel mode already? (psr.cpl==0) */ \ + /* switch from user to kernel RBS: */ \ + COVER; \ + ;; \ +(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ +(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ + ;; \ +(p7) mov rARRNAT=ar.rnat; \ +(pKern) dep r1=0,sp,61,3; /* compute physical addr of sp */ \ +(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ +(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \ +(p7) dep rKRBS=-1,rKRBS,61,3; /* compute kernel virtual addr of RBS */ \ + ;; \ +(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ +(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \ + ;; \ +(p7) mov r18=ar.bsp; \ +(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + \ + mov r16=r1; /* initialize first base pointer */ \ + adds r17=8,r1; /* initialize second base pointer */ \ + ;; \ + st8 [r16]=rCRIPSR,16; /* save cr.ipsr */ \ + st8 [r17]=rCRIIP,16; /* save cr.iip */ \ +(pKern) mov r18=r0; /* make sure r18 isn't NaT */ \ + ;; \ + st8 [r16]=rCRIFS,16; /* save cr.ifs */ \ + st8 [r17]=rARUNAT,16; /* save ar.unat */ \ +(p7) sub r18=r18,rKRBS; /* r18=RSE.ndirty*8 */ \ + ;; \ + st8 [r16]=rARPFS,16; /* save ar.pfs */ \ + st8 [r17]=rARRSC,16; /* save ar.rsc */ \ + tbit.nz p15,p0=rCRIPSR,IA64_PSR_I_BIT \ + ;; /* avoid RAW on r16 & r17 */ \ +(pKern) adds r16=16,r16; /* skip over ar_rnat field */ \ +(pKern) adds r17=16,r17; /* skip over ar_bspstore field */ \ +(p7) st8 [r16]=rARRNAT,16; /* save ar.rnat */ \ +(p7) st8 [r17]=rARBSPSTORE,16; /* save ar.bspstore */ \ + ;; \ + st8 [r16]=rARPR,16; /* save predicates */ \ + st8 [r17]=rB6,16; /* save b6 */ \ + shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ + ;; \ + st8 [r16]=r18,16; /* save ar.rsc value for "loadrs" */ \ + st8.spill [r17]=rR1,16; /* save original r1 */ \ + cmp.ne pEOI,p0=r0,r0 /* clear pEOI by default */ \ + ;; \ + st8.spill [r16]=r2,16; \ + st8.spill [r17]=r3,16; \ + adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ + ;; \ + st8.spill [r16]=r12,16; \ + st8.spill [r17]=r13,16; \ + cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ + ;; \ + st8.spill [r16]=r14,16; \ + st8.spill [r17]=r15,16; \ + dep r14=-1,r0,61,3; \ + ;; \ + st8.spill [r16]=r8,16; \ + st8.spill [r17]=r9,16; \ + adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ + ;; \ + st8.spill [r16]=r10,16; \ + st8.spill [r17]=r11,16; \ + mov r13=ar.k6; /* establish `current' */ \ + ;; \ + or r2=r2,r14; /* make first base a kernel virtual address */ \ + EXTRA; \ + movl r1=__gp; /* establish kernel global pointer */ \ + ;; \ + or r12=r12,r14; /* make sp a kernel virtual address */ \ + or r13=r13,r14; /* make `current' a kernel virtual address */ \ + bsw.1;; /* switch back to bank 1 (must be last in insn group) */ + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC +# define STOPS nop.i 0x0;; nop.i 0x0;; nop.i 0x0;; +#else +# define STOPS +#endif + +#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs,) STOPS +#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs, mov r15=r19) STOPS +#define SAVE_MIN DO_SAVE_MIN(mov rCRIFS=r0,) STOPS + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). This + * macro guarantees to preserve all predicate registers, r8, r9, r10, + * r11, r14, and r15. + * + * Assumed state upon entry: + * psr.ic: on + * psr.dt: on + * r2: points to &pt_regs.r16 + * r3: points to &pt_regs.r17 + */ +#define SAVE_REST \ + st8.spill [r2]=r16,16; \ + st8.spill [r3]=r17,16; \ + ;; \ + st8.spill [r2]=r18,16; \ + st8.spill [r3]=r19,16; \ + ;; \ + mov r16=ar.ccv; /* M-unit */ \ + movl r18=FPSR_DEFAULT /* L-unit */ \ + ;; \ + mov r17=ar.fpsr; /* M-unit */ \ + mov ar.fpsr=r18; /* M-unit */ \ + ;; \ + st8.spill [r2]=r20,16; \ + st8.spill [r3]=r21,16; \ + mov r18=b0; \ + ;; \ + st8.spill [r2]=r22,16; \ + st8.spill [r3]=r23,16; \ + mov r19=b7; \ + ;; \ + st8.spill [r2]=r24,16; \ + st8.spill [r3]=r25,16; \ + ;; \ + st8.spill [r2]=r26,16; \ + st8.spill [r3]=r27,16; \ + ;; \ + st8.spill [r2]=r28,16; \ + st8.spill [r3]=r29,16; \ + ;; \ + st8.spill [r2]=r30,16; \ + st8.spill [r3]=r31,16; \ + ;; \ + st8 [r2]=r16,16; /* ar.ccv */ \ + st8 [r3]=r17,16; /* ar.fpsr */ \ + ;; \ + st8 [r2]=r18,16; /* b0 */ \ + st8 [r3]=r19,16+8; /* b7 */ \ + ;; \ + stf.spill [r2]=f6,32; \ + stf.spill [r3]=f7,32; \ + ;; \ + stf.spill [r2]=f8,32; \ + stf.spill [r3]=f9,32 + +/* + * This file defines the interrupt vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * External interrupts only use 1 entry. All others are internal interrupts + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for critical + * interrupts like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + * entry offset ----/ / / / / + * entry number ---------/ / / / + * size of the entry -------------/ / / + * vector name -------------------------------------/ / + * related interrupts (what is the real interrupt?) ----------/ + * + * The table is 32KB in size and must be aligned on 32KB boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.4 (June 1998) + */ + +#define FAULT(n) \ + rsm psr.dt; /* avoid nested faults due to TLB misses... */ \ + ;; \ + srlz.d; /* ensure everyone knows psr.dt is off... */ \ + mov r31=pr; \ + mov r19=n;; /* prepare to save predicates */ \ + br.cond.sptk.many dispatch_to_fault_handler + +/* + * As we don't (hopefully) use the space available, we need to fill it with + * nops. the parameter may be used for debugging and is representing the entry + * number + */ +#define BREAK_BUNDLE(a) break.m (a); \ + break.i (a); \ + break.i (a) +/* + * 4 breaks bundles all together + */ +#define BREAK_BUNDLE4(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a) + +/* + * 8 bundles all together (too lazy to use only 4 at a time !) + */ +#define BREAK_BUNDLE8(a); BREAK_BUNDLE4(a); BREAK_BUNDLE4(a) + + .psr abi64 + .psr lsb + .lsb + + .section __ivt_section,"ax" + + .align 32768 // align on 32KB boundary + .global ia64_ivt +ia64_ivt: +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) + /* + * The VHPT vector is invoked when the TLB entry for the virtual page table + * is missing. This happens only as a result of a previous + * (the "original") TLB miss, which may either be caused by an instruction + * fetch or a data access (or non-access). + * + * What we do here is normal TLB miss handing for the _original_ miss, followed + * by inserting the TLB entry for the virtual page table page that the VHPT + * walker was attempting to access. The latter gets inserted as long + * as both L1 and L2 have valid mappings for the faulting address. + * The TLB entry for the original miss gets inserted only if + * the L3 entry indicates that the page is present. + * + * do_page_fault gets invoked in the following cases: + * - the faulting virtual address uses unimplemented address bits + * - the faulting virtual address has no L1, L2, or L3 mapping + */ + mov r16=cr.ifa // get address that caused the TLB miss + ;; + rsm psr.dt // use physical addressing for data + mov r31=pr // save the predicate registers + mov r19=ar.k7 // get page table base address + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + srlz.d // ensure "rsm psr.dt" has taken effect +(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir +(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 +(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; +(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones? + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; +(p7) ld8 r18=[r17] // read the L3 PTE + mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss + ;; +(p7) tbit.z p6,p7=r18,0 // page present bit cleared? + mov r21=cr.iha // get the VHPT address that caused the TLB miss + ;; // avoid RAW on p7 +(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? + dep r17=0,r17,0,PAGE_SHIFT // clear low bits to get page address + ;; +(p10) itc.i r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!) +(p11) itc.d r18;; // insert the data TLB entry (EAS2.6: must be last in insn group!) +(p6) br.spnt.few page_fault // handle bad address/page not present (page fault) + mov cr.ifa=r21 + + // Now compute and insert the TLB entry for the virtual page table. + // We never execute in a page table page so there is no need to set + // the exception deferral bit. + adds r16=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r17 + ;; +(p7) itc.d r16;; // EAS2.6: must be last in insn group! + mov pr=r31,-1 // restore predicate registers + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) + /* + * The ITLB basically does the same as the VHPT handler except + * that we always insert exactly one instruction TLB entry. + */ + mov r16=cr.ifa // get address that caused the TLB miss + ;; + rsm psr.dt // use physical addressing for data + mov r31=pr // save the predicate registers + mov r19=ar.k7 // get page table base address + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + srlz.d // ensure "rsm psr.dt" has taken effect +(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir +(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 +(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; +(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones? + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; +(p7) ld8 r18=[r17] // read the L3 PTE + ;; +(p7) tbit.z p6,p7=r18,0 // page present bit cleared? + ;; +(p7) itc.i r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!) +(p6) br.spnt.few page_fault // handle bad address/page not present (page fault) + ;; + mov pr=r31,-1 // restore predicate registers + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) + /* + * The DTLB basically does the same as the VHPT handler except + * that we always insert exactly one data TLB entry. + */ + mov r16=cr.ifa // get address that caused the TLB miss + ;; + rsm psr.dt // use physical addressing for data + mov r31=pr // save the predicate registers + mov r19=ar.k7 // get page table base address + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + srlz.d // ensure "rsm psr.dt" has taken effect +(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir +(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 +(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; +(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones? + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; +(p7) ld8 r18=[r17] // read the L3 PTE + ;; +(p7) tbit.z p6,p7=r18,0 // page present bit cleared? + ;; +(p7) itc.d r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!) +(p6) br.spnt.few page_fault // handle bad address/page not present (page fault) + ;; + mov pr=r31,-1 // restore predicate registers + rfi;; // must be last insn in an insn group + + //----------------------------------------------------------------------------------- + // call do_page_fault (predicates are in r31, psr.dt is off, r16 is faulting address) +page_fault: + SAVE_MIN_WITH_COVER + // + // Copy control registers to temporary registers, then turn on psr bits, + // then copy the temporary regs to the output regs. We have to do this + // because the "alloc" can cause a mandatory store which could lead to + // an "Alt DTLB" fault which we can handle only if psr.ic is on. + // + mov r8=cr.ifa + mov r9=cr.isr + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic | psr.dt + ;; + srlz.d // guarantee that interrupt collection is enabled +(p15) ssm psr.i // restore psr.i + ;; + srlz.i // must precede "alloc"! (srlz.i implies srlz.d) + movl r14=ia64_leave_kernel + ;; + alloc r15=ar.pfs,0,0,3,0 // must be first in insn group + mov out0=r8 + mov out1=r9 + ;; + SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 // out2 = pointer to pt_regs + br.call.sptk.few b6=ia64_do_page_fault // ignore return address + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RX + ;; + shr.u r18=r16,57 // move address bit 61 to bit 4 + dep r16=0,r16,52,12 // clear top 12 bits of address + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) + dep r16=r17,r16,0,12 // insert PTE control bits into r16 + ;; + or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6 + ;; + itc.i r16;; // insert the TLB entry(EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW + ;; + shr.u r18=r16,57 // move address bit 61 to bit 4 + dep r16=0,r16,52,12 // clear top 12 bits of address + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) + dep r16=r17,r16,0,12 // insert PTE control bits into r16 + ;; + or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6 + ;; + itc.d r16;; // insert the TLB entry (EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) + // + // In the absence of kernel bugs, we get here when the Dirty-bit, Instruction + // Access-bit, or Data Access-bit faults cause a nested fault because the + // dTLB entry for the virtual page table isn't present. In such a case, + // we lookup the pte for the faulting address by walking the page table + // and return to the contination point passed in register r30. + // In accessing the page tables, we don't need to check for NULL entries + // because if the page tables didn't map the faulting address, it would not + // be possible to receive one of the above faults. + // + // Input: r16: faulting address + // r29: saved b0 + // r30: continuation address + // + // Output: r17: physical address of L3 PTE of faulting address + // r29: saved b0 + // r30: continuation address + // + // Clobbered: b0, r18, r19, r21, r31, psr.dt (cleared) + // + rsm psr.dt // switch to using physical data addressing + mov r19=ar.k7 // get the page table base address + shl r21=r16,3 // shift bit 60 into sign bit + ;; + mov r31=pr // save the predicate registers + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is faulting address in region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + srlz.d +(p6) movl r17=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir +(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 +(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 + ;; +(p6) dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; + ld8 r17=[r17] // fetch the L1 entry + mov b0=r30 + ;; + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; + ld8 r17=[r17] // fetch the L2 entry + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; + mov pr=r31,-1 // restore predicates + br.cond.sptk.few b0 // return to continuation point + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) + FAULT(6) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + FAULT(7) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) + // + // What we do here is to simply turn on the dirty bit in the PTE. We need + // to update both the page-table and the TLB entry. To efficiently access + // the PTE, we address it through the virtual page table. Most likely, the + // TLB entry for the relevant virtual page table page is still present in + // the TLB so we can normally do this without additional TLB misses. + // In case the necessary virtual page table TLB entry isn't present, we take + // a nested TLB miss hit where we look up the physical address of the L3 PTE + // and then continue at label 1 below. + // + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_D,r18 // set the dirty bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18;; // install updated PTE (EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) + // Like Entry 8, except for instruction access + mov r16=cr.ifa // get the address that caused the fault +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + mov r31=pr // save predicates + mov r30=cr.ipsr + ;; + extr.u r17=r30,IA64_PSR_IS_BIT,1 // get instruction arch. indicator + ;; + cmp.eq p6,p0 = r17,r0 // check if IA64 instruction set + ;; +(p6) mov r16=cr.iip // get real faulting address + ;; +(p6) mov cr.ifa=r16 // reset IFA + mov pr=r31,-1 +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault) + ;; +1: ld8 r18=[r17] + ;; // avoid raw on r18 + or r18=_PAGE_A,r18 // set the accessed bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.i r18;; // install updated PTE (EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) + // Like Entry 8, except for data access + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault) + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_A,r18 // set the accessed bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18;; // install updated PTE (EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) + mov r16=cr.iim + mov r17=__IA64_BREAK_SYSCALL + mov r31=pr // prepare to save predicates + rsm psr.dt // avoid nested faults due to TLB misses... + ;; + srlz.d // ensure everyone knows psr.dt is off... + cmp.eq p0,p7=r16,r17 // is this a system call? (p7 <- false, if so) + +#if 1 + // Allow syscalls via the old system call number for the time being. This is + // so we can transition to the new syscall number in a relatively smooth + // fashion. + mov r17=0x80000 + ;; +(p7) cmp.eq.or.andcm p0,p7=r16,r17 // is this the old syscall number? +#endif + +(p7) br.cond.spnt.many non_syscall + + SAVE_MIN // uses r31; defines r2: + + // turn interrupt collection and data translation back on: + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 + ;; +(p15) ssm psr.i // restore psr.i + ;; + srlz.i // ensure everybody knows psr.ic and psr.dt are back on + adds r8=(IA64_PT_REGS_R8_OFFSET-IA64_PT_REGS_R16_OFFSET),r2 + ;; + stf8 [r8]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error) + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + ;; // avoid WAW on r2 & r3 + + mov r3=255 + adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 + adds r2=IA64_TASK_FLAGS_OFFSET,r13 // r2 = ¤t->flags + + ;; + cmp.geu.unc p6,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ? + movl r16=sys_call_table + ;; +(p6) shladd r16=r15,3,r16 + movl r15=ia64_ret_from_syscall +(p7) adds r16=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall + ;; + ld8 r16=[r16] // load address of syscall entry point + mov rp=r15 // set the real return addr + ;; + ld8 r2=[r2] // r2 = current->flags + mov b6=r16 + + // arrange things so we skip over break instruction when returning: + + adds r16=16,sp // get pointer to cr_ipsr + adds r17=24,sp // get pointer to cr_iip + ;; + ld8 r18=[r16] // fetch cr_ipsr + tbit.z p8,p0=r2,5 // (current->flags & PF_TRACESYS) == 0? + ;; + ld8 r19=[r17] // fetch cr_iip + extr.u r20=r18,41,2 // extract ei field + ;; + cmp.eq p6,p7=2,r20 // isr.ei==2? + adds r19=16,r19 // compute address of next bundle + ;; +(p6) mov r20=0 // clear ei to 0 +(p7) adds r20=1,r20 // increment ei to next slot + ;; +(p6) st8 [r17]=r19 // store new cr.iip if cr.isr.ei wrapped around + dep r18=r20,r18,41,2 // insert new ei into cr.isr + ;; + st8 [r16]=r18 // store new value for cr.isr + +(p8) br.call.sptk.few b6=b6 // ignore this return addr + br.call.sptk.few rp=ia64_trace_syscall // rp will be overwritten (ignored) + // NOT REACHED + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) + rsm psr.dt // avoid nested faults due to TLB misses... + ;; + srlz.d // ensure everyone knows psr.dt is off... + mov r31=pr // prepare to save predicates + ;; + + SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 + ssm psr.ic | psr.dt // turn interrupt collection and data translation back on + ;; + adds r3=8,r2 // set up second base pointer for SAVE_REST + cmp.eq pEOI,p0=r0,r0 // set pEOI flag so that ia64_leave_kernel writes cr.eoi + srlz.i // ensure everybody knows psr.ic and psr.dt are back on + ;; + SAVE_REST + ;; + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + mov out0=r0 // defer reading of cr.ivr to handle_irq... +#else + mov out0=cr.ivr // pass cr.ivr as first arg +#endif + add out1=16,sp // pass pointer to pt_regs as second arg + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.few b6=ia64_handle_irq + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved + FAULT(13) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + FAULT(14) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + FAULT(15) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + FAULT(16) + +#ifdef CONFIG_IA32_SUPPORT + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + // IA32 interrupt entry point + +dispatch_to_ia32_handler: + SAVE_MIN + ;; + mov r14=cr.isr + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + ;; +(p15) ssm psr.i + ;; + srlz.d + adds r3=8,r2 // Base pointer for SAVE_REST + ;; + SAVE_REST + ;; + mov r15=0x80 + shr r14=r14,16 // Get interrupt number + ;; + cmp.ne p6,p0=r14,r15 +(p6) br.call.dpnt.few b6=non_ia32_syscall + + adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions + + ;; + alloc r15=ar.pfs,0,0,6,0 // must first in an insn group + ;; + ld4 r8=[r14],8 // r8 == EAX (syscall number) + mov r15=0xff + ;; + cmp.ltu.unc p6,p7=r8,r15 + ld4 out1=[r14],8 // r9 == ecx + ;; + ld4 out2=[r14],8 // r10 == edx + ;; + ld4 out0=[r14] // r11 == ebx + adds r14=(IA64_PT_REGS_R8_OFFSET-(8*3)) + 16,sp + ;; + ld4 out5=[r14],8 // r13 == ebp + ;; + ld4 out3=[r14],8 // r14 == esi + adds r2=IA64_TASK_FLAGS_OFFSET,r13 // r2 = ¤t->flags + ;; + ld4 out4=[r14] // R15 == edi + movl r16=ia32_syscall_table + ;; +(p6) shladd r16=r8,3,r16 // Force ni_syscall if not valid syscall number + ld8 r2=[r2] // r2 = current->flags + ;; + ld8 r16=[r16] + tbit.z p8,p0=r2,5 // (current->flags & PF_TRACESYS) == 0? + ;; + movl r15=ia32_ret_from_syscall + mov b6=r16 + ;; + mov rp=r15 +(p8) br.call.sptk.few b6=b6 + br.call.sptk.few rp=ia32_trace_syscall // rp will be overwritten (ignored) + +non_ia32_syscall: + alloc r15=ar.pfs,0,0,2,0 + mov out0=r14 // interrupt # + add out1=16,sp // pointer to pt_regs + ;; // avoid WAW on CFM + br.call.sptk.few rp=ia32_bad_interrupt + ;; + movl r15=ia64_leave_kernel + ;; + mov rp=r15 + br.ret.sptk.many rp + +#endif /* CONFIG_IA32_SUPPORT */ + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + FAULT(17) + +non_syscall: + +#ifdef CONFIG_KDB + mov r17=__IA64_BREAK_KDB + ;; + cmp.eq p8,p0=r16,r17 // is this a kernel breakpoint? +#endif + + SAVE_MIN_WITH_COVER + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + mov r8=cr.iim // get break immediate (must be done while psr.ic is off) + adds r3=8,r2 // set up second base pointer for SAVE_REST + + // turn interrupt collection and data translation back on: + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + ;; +(p15) ssm psr.i // restore psr.i + ;; + srlz.i // ensure everybody knows psr.ic and psr.dt are back on + movl r15=ia64_leave_kernel + ;; + alloc r14=ar.pfs,0,0,2,0 + mov out0=r8 // break number + add out1=16,sp // pointer to pt_regs + ;; + SAVE_REST + mov rp=r15 + ;; +#ifdef CONFIG_KDB +(p8) br.call.sptk.few b6=ia64_invoke_kdb +#endif + br.call.sptk.few b6=ia64_bad_break // avoid WAW on CFM and ignore return addr + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + FAULT(18) + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + +dispatch_unaligned_handler: + SAVE_MIN_WITH_COVER + ;; + // + // we can't have the alloc while psr.ic is cleared because + // we might get a mandatory RSE (when you reach the end of the + // rotating partition when doing the alloc) spill which could cause + // a page fault on the kernel virtual address and the handler + // wouldn't get the state to recover. + // + mov r15=cr.ifa + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + ;; +(p15) ssm psr.i // restore psr.i + ;; + srlz.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + ;; // avoid WAW on r14 + movl r14=ia64_leave_kernel + mov out0=r15 // out0 = faulting address + adds out1=16,sp // out1 = pointer to pt_regs + ;; + mov rp=r14 + br.sptk.few ia64_prepare_handle_unaligned + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + FAULT(19) + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + +dispatch_to_fault_handler: + // + // Input: + // psr.ic: off + // psr.dt: off + // r19: fault vector number (e.g., 24 for General Exception) + // r31: contains saved predicates (pr) + // + SAVE_MIN_WITH_COVER_R19 + // + // Copy control registers to temporary registers, then turn on psr bits, + // then copy the temporary regs to the output regs. We have to do this + // because the "alloc" can cause a mandatory store which could lead to + // an "Alt DTLB" fault which we can handle only if psr.ic is on. + // + mov r8=cr.isr + mov r9=cr.ifa + mov r10=cr.iim + mov r11=cr.itir + ;; + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + srlz.i // must precede "alloc"! + ;; + alloc r14=ar.pfs,0,0,5,0 // must be first in insn group + mov out0=r15 + mov out1=r8 + mov out2=r9 + mov out3=r10 + mov out4=r11 + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 +#ifdef CONFIG_KDB + br.call.sptk.few b6=ia64_invoke_kdb_fault_handler +#else + br.call.sptk.few b6=ia64_fault +#endif +// +// --- End of long entries, Beginning of short entries +// + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) + mov r16=cr.ifa + rsm psr.dt +#if 0 + // If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h + mov r17=_PAGE_SIZE_4K<<2 + ;; + ptc.l r16,r17 +#endif + ;; + mov r31=pr + srlz.d + br.cond.sptk.many page_fault + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.cond.sptk.many page_fault + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.cond.sptk.many page_fault + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.cond.sptk.many page_fault + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) + FAULT(24) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) + rsm psr.dt | psr.dfh // ensure we can access fph + ;; + srlz.d + mov r31=pr + mov r19=25 + br.cond.sptk.many dispatch_to_fault_handler + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) + FAULT(26) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) + // + // A [f]chk.[as] instruction needs to take the branch to + // the recovery code but this part of the architecture is + // not implemented in hardware on some CPUs, such as Itanium. + // Thus, in general we need to emulate the behavior. + // IIM contains the relative target (not yet sign extended). + // So after sign extending it we simply add it to IIP. + // We also need to reset the EI field of the IPSR to zero, + // i.e., the slot to restart into. + // + // cr.imm contains zero_ext(imm21) + // + mov r18=cr.iim + ;; + mov r17=cr.iip + shl r18=r18,43 // put sign bit in position (43=64-21) + ;; + + mov r16=cr.ipsr + shr r18=r18,39 // sign extend (39=43-4) + ;; + + add r17=r17,r18 // now add the offset + ;; + mov cr.iip=r17 + dep r16=0,r16,41,2 // clear EI + ;; + + mov cr.ipsr=r16 + ;; + + rfi;; // and go back (must be last insn in group) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + FAULT(28) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) + FAULT(29) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) + rsm psr.dt // avoid nested faults due to TLB misses... + mov r16=cr.ipsr + mov r31=pr // prepare to save predicates + ;; + srlz.d // ensure everyone knows psr.dt is off + mov r19=30 // error vector for fault_handler (when kernel) + extr.u r16=r16,32,2 // extract psr.cpl + ;; + cmp.eq p6,p7=r0,r16 // if kernel cpl then fault else emulate +(p7) br.cond.sptk.many dispatch_unaligned_handler +(p6) br.cond.sptk.many dispatch_to_fault_handler + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) + FAULT(31) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) + FAULT(32) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) + FAULT(33) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Tranfer Trap (66) + FAULT(34) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) + FAULT(35) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) + FAULT(36) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Reserved + FAULT(37) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + FAULT(38) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + FAULT(39) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + FAULT(40) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + FAULT(41) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + FAULT(42) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + FAULT(43) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + FAULT(44) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) + FAULT(45) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) + FAULT(46) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) +#ifdef CONFIG_IA32_SUPPORT + rsm psr.dt + ;; + srlz.d + mov r31=pr + br.cond.sptk.many dispatch_to_ia32_handler +#else + FAULT(47) +#endif + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + FAULT(48) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + FAULT(49) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + FAULT(50) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + FAULT(51) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7000 Entry 52 (size 16 bundles) Reserved + FAULT(52) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + FAULT(53) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + FAULT(54) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + FAULT(55) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + FAULT(56) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + FAULT(57) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + FAULT(58) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + FAULT(59) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + FAULT(60) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + FAULT(61) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + FAULT(62) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + FAULT(63) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + FAULT(64) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + FAULT(65) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + FAULT(66) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + FAULT(67) diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c new file mode 100644 index 000000000..153fb5684 --- /dev/null +++ b/arch/ia64/kernel/machvec.c @@ -0,0 +1,48 @@ +#include <linux/kernel.h> + +#include <asm/page.h> +#include <asm/machvec.h> + +struct ia64_machine_vector ia64_mv; + +void +machvec_noop (void) +{ +} + +/* + * Most platforms use this routine for mapping page frame addresses + * into a memory map index. + */ +unsigned long +map_nr_dense (unsigned long addr) +{ + return MAP_NR_DENSE(addr); +} + +static struct ia64_machine_vector * +lookup_machvec (const char *name) +{ + extern struct ia64_machine_vector machvec_start[]; + extern struct ia64_machine_vector machvec_end[]; + struct ia64_machine_vector *mv; + + for (mv = machvec_start; mv < machvec_end; ++mv) + if (strcmp (mv->name, name) == 0) + return mv; + + return 0; +} + +void +machvec_init (const char *name) +{ + struct ia64_machine_vector *mv; + + mv = lookup_machvec(name); + if (!mv) { + panic("generic kernel failed to find machine vector for platform %s!", name); + } + ia64_mv = *mv; + printk("booting generic kernel on platform %s\n", name); +} diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c new file mode 100644 index 000000000..320c56ebc --- /dev/null +++ b/arch/ia64/kernel/mca.c @@ -0,0 +1,842 @@ +/* + * File: mca.c + * Purpose: Generic MCA handling layer + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander(vijay@engr.sgi.com) + */ +#include <linux/types.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/sal.h> +#include <asm/mca.h> +#include <asm/spinlock.h> +#include <asm/irq.h> +#include <asm/machvec.h> + + +ia64_mc_info_t ia64_mc_info; +ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; +ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; +u64 ia64_mca_proc_state_dump[256]; +u64 ia64_mca_stack[1024]; +u64 ia64_mca_stackframe[32]; +u64 ia64_mca_bspstore[1024]; + +static void ia64_mca_cmc_vector_setup(int enable, + int_vector_t cmc_vector); +static void ia64_mca_wakeup_ipi_wait(void); +static void ia64_mca_wakeup(int cpu); +static void ia64_mca_wakeup_all(void); +static void ia64_log_init(int,int); +static void ia64_log_get(int,int, prfunc_t); +static void ia64_log_clear(int,int,int, prfunc_t); + +/* + * ia64_mca_cmc_vector_setup + * Setup the correctable machine check vector register in the processor + * Inputs + * Enable (1 - enable cmc interrupt , 0 - disable) + * CMC handler entry point (if enabled) + * + * Outputs + * None + */ +static void +ia64_mca_cmc_vector_setup(int enable, + int_vector_t cmc_vector) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = 0; + cmcv.cmcv_mask = enable; + cmcv.cmcv_vector = cmc_vector; + ia64_set_cmcv(cmcv.cmcv_regval); +} + + +#if defined(MCA_TEST) + +sal_log_processor_info_t slpi_buf; + +void +mca_test(void) +{ + slpi_buf.slpi_valid.slpi_psi = 1; + slpi_buf.slpi_valid.slpi_cache_check = 1; + slpi_buf.slpi_valid.slpi_tlb_check = 1; + slpi_buf.slpi_valid.slpi_bus_check = 1; + slpi_buf.slpi_valid.slpi_minstate = 1; + slpi_buf.slpi_valid.slpi_bank1_gr = 1; + slpi_buf.slpi_valid.slpi_br = 1; + slpi_buf.slpi_valid.slpi_cr = 1; + slpi_buf.slpi_valid.slpi_ar = 1; + slpi_buf.slpi_valid.slpi_rr = 1; + slpi_buf.slpi_valid.slpi_fr = 1; + + ia64_os_mca_dispatch(); +} + +#endif /* #if defined(MCA_TEST) */ + +/* + * mca_init + * Do all the mca specific initialization on a per-processor basis. + * + * 1. Register spinloop and wakeup request interrupt vectors + * + * 2. Register OS_MCA handler entry point + * + * 3. Register OS_INIT handler entry point + * + * 4. Initialize CMCV register to enable/disable CMC interrupt on the + * processor and hook a handler in the platform-specific mca_init. + * + * 5. Initialize MCA/CMC/INIT related log buffers maintained by the OS. + * + * Inputs + * None + * Outputs + * None + */ +void __init +mca_init(void) +{ + int i; + + MCA_DEBUG("mca_init : begin\n"); + /* Clear the Rendez checkin flag for all cpus */ + for(i = 0 ; i < IA64_MAXCPUS; i++) + ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + + /* NOTE : The actual irqs for the rendez, wakeup and + * cmc interrupts are requested in the platform-specific + * mca initialization code. + */ + /* + * Register the rendezvous spinloop and wakeup mechanism with SAL + */ + + /* Register the rendezvous interrupt vector with SAL */ + if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_RENDEZ_INT_VECTOR, + IA64_MCA_RENDEZ_TIMEOUT)) + return; + + /* Register the wakeup interrupt vector with SAL */ + if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_WAKEUP_INT_VECTOR, + 0)) + return; + + MCA_DEBUG("mca_init : registered mca rendezvous spinloop and wakeup mech.\n"); + /* + * Setup the correctable machine check vector + */ + ia64_mca_cmc_vector_setup(IA64_CMC_INT_ENABLE, + IA64_MCA_CMC_INT_VECTOR); + + MCA_DEBUG("mca_init : correctable mca vector setup done\n"); + + ia64_mc_info.imi_mca_handler = __pa(ia64_os_mca_dispatch); + ia64_mc_info.imi_mca_handler_size = + __pa(ia64_os_mca_dispatch_end) - __pa(ia64_os_mca_dispatch); + /* Register the os mca handler with SAL */ + if (ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, + ia64_mc_info.imi_mca_handler, + __pa(ia64_get_gp()), + ia64_mc_info.imi_mca_handler_size, + 0,0,0)) + + return; + + MCA_DEBUG("mca_init : registered os mca handler with SAL\n"); + + ia64_mc_info.imi_monarch_init_handler = __pa(ia64_monarch_init_handler); + ia64_mc_info.imi_monarch_init_handler_size = IA64_INIT_HANDLER_SIZE; + ia64_mc_info.imi_slave_init_handler = __pa(ia64_slave_init_handler); + ia64_mc_info.imi_slave_init_handler_size = IA64_INIT_HANDLER_SIZE; + /* Register the os init handler with SAL */ + if (ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, + ia64_mc_info.imi_monarch_init_handler, + __pa(ia64_get_gp()), + ia64_mc_info.imi_monarch_init_handler_size, + ia64_mc_info.imi_slave_init_handler, + __pa(ia64_get_gp()), + ia64_mc_info.imi_slave_init_handler_size)) + + + return; + + MCA_DEBUG("mca_init : registered os init handler with SAL\n"); + + /* Initialize the areas set aside by the OS to buffer the + * platform/processor error states for MCA/INIT/CMC + * handling. + */ + ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR); + ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM); + ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PROCESSOR); + ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PLATFORM); + ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR); + ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM); + + mca_init_platform(); + + MCA_DEBUG("mca_init : platform-specific mca handling setup done\n"); + +#if defined(MCA_TEST) + mca_test(); +#endif /* #if defined(MCA_TEST) */ + + printk("Mca related initialization done\n"); +} + +/* + * ia64_mca_wakeup_ipi_wait + * Wait for the inter-cpu interrupt to be sent by the + * monarch processor once it is done with handling the + * MCA. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_wakeup_ipi_wait(void) +{ + int irr_num = (IA64_MCA_WAKEUP_INT_VECTOR >> 6); + int irr_bit = (IA64_MCA_WAKEUP_INT_VECTOR & 0x3f); + u64 irr = 0; + + do { + switch(irr_num) { + case 0: + irr = ia64_get_irr0(); + break; + case 1: + irr = ia64_get_irr1(); + break; + case 2: + irr = ia64_get_irr2(); + break; + case 3: + irr = ia64_get_irr3(); + break; + } + } while (!(irr & (1 << irr_bit))) ; +} + +/* + * ia64_mca_wakeup + * Send an inter-cpu interrupt to wake-up a particular cpu + * and mark that cpu to be out of rendez. + * Inputs + * cpuid + * Outputs + * None + */ +void +ia64_mca_wakeup(int cpu) +{ + ipi_send(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT); + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + +} +/* + * ia64_mca_wakeup_all + * Wakeup all the cpus which have rendez'ed previously. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_wakeup_all(void) +{ + int cpu; + + /* Clear the Rendez checkin flag for all cpus */ + for(cpu = 0 ; cpu < IA64_MAXCPUS; cpu++) + if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) + ia64_mca_wakeup(cpu); + +} +/* + * ia64_mca_rendez_interrupt_handler + * This is handler used to put slave processors into spinloop + * while the monarch processor does the mca handling and later + * wake each slave up once the monarch is done. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) +{ + int flags; + /* Mask all interrupts */ + save_and_cli(flags); + + ia64_mc_info.imi_rendez_checkin[ia64_get_cpuid(0)] = IA64_MCA_RENDEZ_CHECKIN_DONE; + /* Register with the SAL monarch that the slave has + * reached SAL + */ + ia64_sal_mc_rendez(); + + /* Wait for the wakeup IPI from the monarch + * This waiting is done by polling on the wakeup-interrupt + * vector bit in the processor's IRRs + */ + ia64_mca_wakeup_ipi_wait(); + + /* Enable all interrupts */ + restore_flags(flags); + + +} + + +/* + * ia64_mca_wakeup_int_handler + * The interrupt handler for processing the inter-cpu interrupt to the + * slave cpu which was spinning in the rendez loop. + * Since this spinning is done by turning off the interrupts and + * polling on the wakeup-interrupt bit in the IRR, there is + * nothing useful to be done in the handler. + * Inputs + * wakeup_irq (Wakeup-interrupt bit) + * arg (Interrupt handler specific argument) + * ptregs (Exception frame at the time of the interrupt) + * Outputs + * + */ +void +ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs) +{ + +} + +/* + * ia64_return_to_sal_check + * This is function called before going back from the OS_MCA handler + * to the OS_MCA dispatch code which finally takes the control back + * to the SAL. + * The main purpose of this routine is to setup the OS_MCA to SAL + * return state which can be used by the OS_MCA dispatch code + * just before going back to SAL. + * Inputs + * None + * Outputs + * None + */ + +void +ia64_return_to_sal_check(void) +{ + /* Copy over some relevant stuff from the sal_to_os_mca_handoff + * so that it can be used at the time of os_mca_to_sal_handoff + */ + ia64_os_to_sal_handoff_state.imots_sal_gp = + ia64_sal_to_os_handoff_state.imsto_sal_gp; + + ia64_os_to_sal_handoff_state.imots_sal_check_ra = + ia64_sal_to_os_handoff_state.imsto_sal_check_ra; + + /* For now ignore the MCA */ + ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED; +} +/* + * ia64_mca_ucmc_handler + * This is uncorrectable machine check handler called from OS_MCA + * dispatch code which is in turn called from SAL_CHECK(). + * This is the place where the core of OS MCA handling is done. + * Right now the logs are extracted and displayed in a well-defined + * format. This handler code is supposed to be run only on the + * monarch processor. Once the monarch is done with MCA handling + * further MCA logging is enabled by clearing logs. + * Monarch also has the duty of sending wakeup-IPIs to pull the + * slave processors out of rendez. spinloop. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_ucmc_handler(void) +{ + + /* Get the MCA processor log */ + ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk); + /* Get the MCA platform log */ + ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk); + + ia64_log_print(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk); + + /* + * Do some error handling - Platform-specific mca handler is called at this point + */ + + mca_handler_platform() ; + + /* Clear the SAL MCA logs */ + ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, 1, printk); + ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, 1, printk); + + /* Wakeup all the processors which are spinning in the rendezvous + * loop. + */ + ia64_mca_wakeup_all(); + ia64_return_to_sal_check(); +} + +/* + * SAL to OS entry point for INIT on the monarch processor + * This has been defined for registration purposes with SAL + * as a part of mca_init. + */ +void +ia64_monarch_init_handler() +{ +} +/* + * SAL to OS entry point for INIT on the slave processor + * This has been defined for registration purposes with SAL + * as a part of mca_init. + */ + +void +ia64_slave_init_handler() +{ +} +/* + * ia64_mca_cmc_int_handler + * This is correctable machine check interrupt handler. + * Right now the logs are extracted and displayed in a well-defined + * format. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) +{ + /* Get the CMC processor log */ + ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk); + /* Get the CMC platform log */ + ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk); + + + ia64_log_print(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk); + cmci_handler_platform(cmc_irq, arg, ptregs); + + /* Clear the CMC SAL logs now that they have been saved in the OS buffer */ + ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR); + ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM); +} + +/* + * IA64_MCA log support + */ +#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ +#define IA64_MAX_LOG_TYPES 3 /* MCA, CMC, INIT */ +#define IA64_MAX_LOG_SUBTYPES 2 /* Processor, Platform */ + +typedef struct ia64_state_log_s { + spinlock_t isl_lock; + int isl_index; + sal_log_header_t isl_log[IA64_MAX_LOGS]; + +} ia64_state_log_t; + +static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES][IA64_MAX_LOG_SUBTYPES]; + +#define IA64_LOG_LOCK_INIT(it, sit) spin_lock_init(&ia64_state_log[it][sit].isl_lock) +#define IA64_LOG_LOCK(it, sit) spin_lock_irqsave(&ia64_state_log[it][sit].isl_lock, s) +#define IA64_LOG_UNLOCK(it, sit) spin_unlock_irqrestore(&ia64_state_log[it][sit].isl_lock,\ + s) +#define IA64_LOG_NEXT_INDEX(it, sit) ia64_state_log[it][sit].isl_index +#define IA64_LOG_CURR_INDEX(it, sit) 1 - ia64_state_log[it][sit].isl_index +#define IA64_LOG_INDEX_INC(it, sit) \ + ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index +#define IA64_LOG_INDEX_DEC(it, sit) \ + ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index +#define IA64_LOG_NEXT_BUFFER(it, sit) (void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_NEXT_INDEX(it,sit)])) +#define IA64_LOG_CURR_BUFFER(it, sit) (void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_CURR_INDEX(it,sit)])) + +/* + * ia64_log_init + * Reset the OS ia64 log buffer + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC}) + * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM}) + * Outputs : None + */ +void +ia64_log_init(int sal_info_type, int sal_sub_info_type) +{ + IA64_LOG_LOCK_INIT(sal_info_type, sal_sub_info_type); + IA64_LOG_NEXT_INDEX(sal_info_type, sal_sub_info_type) = 0; + memset(IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type), 0, + sizeof(sal_log_header_t) * IA64_MAX_LOGS); +} + +/* + * ia64_log_get + * Get the current MCA log from SAL and copy it into the OS log buffer. + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC}) + * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM}) + * Outputs : None + * + */ +void +ia64_log_get(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc) +{ + sal_log_header_t *log_buffer; + int s; + + IA64_LOG_LOCK(sal_info_type, sal_sub_info_type); + + + /* Get the process state information */ + log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type); + + if (ia64_sal_get_state_info(sal_info_type, sal_sub_info_type ,(u64 *)log_buffer)) + prfunc("ia64_mca_log_get : Getting processor log failed\n"); + + IA64_LOG_INDEX_INC(sal_info_type, sal_sub_info_type); + + IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type); + +} + +/* + * ia64_log_clear + * Clear the current MCA log from SAL and dpending on the clear_os_buffer flags + * clear the OS log buffer also + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC}) + * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM}) + * clear_os_buffer + * prfunc (print function) + * Outputs : None + * + */ +void +ia64_log_clear(int sal_info_type, int sal_sub_info_type, int clear_os_buffer, prfunc_t prfunc) +{ + if (ia64_sal_clear_state_info(sal_info_type, sal_sub_info_type)) + prfunc("ia64_mca_log_get : Clearing processor log failed\n"); + + if (clear_os_buffer) { + sal_log_header_t *log_buffer; + int s; + + IA64_LOG_LOCK(sal_info_type, sal_sub_info_type); + + /* Get the process state information */ + log_buffer = IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type); + + memset(log_buffer, 0, sizeof(sal_log_header_t)); + + IA64_LOG_INDEX_DEC(sal_info_type, sal_sub_info_type); + + IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type); + } + +} + +/* + * ia64_log_processor_regs_print + * Print the contents of the saved processor register(s) in the format + * <reg_prefix>[<index>] <value> + * + * Inputs : regs (Register save buffer) + * reg_num (# of registers) + * reg_class (application/banked/control/bank1_general) + * reg_prefix (ar/br/cr/b1_gr) + * Outputs : None + * + */ +void +ia64_log_processor_regs_print(u64 *regs, + int reg_num, + char *reg_class, + char *reg_prefix, + prfunc_t prfunc) +{ + int i; + + prfunc("+%s Registers\n", reg_class); + for (i = 0; i < reg_num; i++) + prfunc("+ %s[%d] 0x%lx\n", reg_prefix, i, regs[i]); +} + +static char *pal_mesi_state[] = { + "Invalid", + "Shared", + "Exclusive", + "Modified", + "Reserved1", + "Reserved2", + "Reserved3", + "Reserved4" +}; + +static char *pal_cache_op[] = { + "Unknown", + "Move in", + "Cast out", + "Coherency check", + "Internal", + "Instruction fetch", + "Implicit Writeback", + "Reserved" +}; + +/* + * ia64_log_cache_check_info_print + * Display the machine check information related to cache error(s). + * Inputs : i (Multiple errors are logged, i - index of logged error) + * info (Machine check info logged by the PAL and later + * captured by the SAL) + * target_addr (Address which caused the cache error) + * Outputs : None + */ +void +ia64_log_cache_check_info_print(int i, + pal_cache_check_info_t info, + u64 target_addr, + prfunc_t prfunc) +{ + prfunc("+ Cache check info[%d]\n+", i); + prfunc(" Level: L%d",info.level); + if (info.mv) + prfunc(" ,Mesi: %s",pal_mesi_state[info.mesi]); + prfunc(" ,Index: %d,", info.index); + if (info.ic) + prfunc(" ,Cache: Instruction"); + if (info.dc) + prfunc(" ,Cache: Data"); + if (info.tl) + prfunc(" ,Line: Tag"); + if (info.dl) + prfunc(" ,Line: Data"); + prfunc(" ,Operation: %s,", pal_cache_op[info.op]); + if (info.wv) + prfunc(" ,Way: %d,", info.way); + if (info.tv) + prfunc(" ,Target Addr: 0x%lx", target_addr); + if (info.mc) + prfunc(" ,MC: Corrected"); + prfunc("\n"); +} + +/* + * ia64_log_tlb_check_info_print + * Display the machine check information related to tlb error(s). + * Inputs : i (Multiple errors are logged, i - index of logged error) + * info (Machine check info logged by the PAL and later + * captured by the SAL) + * Outputs : None + */ + +void +ia64_log_tlb_check_info_print(int i, + pal_tlb_check_info_t info, + prfunc_t prfunc) +{ + prfunc("+ TLB Check Info [%d]\n+", i); + if (info.itc) + prfunc(" Failure: Instruction Translation Cache"); + if (info.dtc) + prfunc(" Failure: Data Translation Cache"); + if (info.itr) { + prfunc(" Failure: Instruction Translation Register"); + prfunc(" ,Slot: %d", info.tr_slot); + } + if (info.dtr) { + prfunc(" Failure: Data Translation Register"); + prfunc(" ,Slot: %d", info.tr_slot); + } + if (info.mc) + prfunc(" ,MC: Corrected"); + prfunc("\n"); +} + +/* + * ia64_log_bus_check_info_print + * Display the machine check information related to bus error(s). + * Inputs : i (Multiple errors are logged, i - index of logged error) + * info (Machine check info logged by the PAL and later + * captured by the SAL) + * req_addr (Address of the requestor of the transaction) + * resp_addr (Address of the responder of the transaction) + * target_addr (Address where the data was to be delivered to or + * obtained from) + * Outputs : None + */ +void +ia64_log_bus_check_info_print(int i, + pal_bus_check_info_t info, + u64 req_addr, + u64 resp_addr, + u64 targ_addr, + prfunc_t prfunc) +{ + prfunc("+ BUS Check Info [%d]\n+", i); + prfunc(" Status Info: %d", info.bsi); + prfunc(" ,Severity: %d", info.sev); + prfunc(" ,Transaction Type: %d", info.type); + prfunc(" ,Transaction Size: %d", info.size); + if (info.cc) + prfunc(" ,Cache-cache-transfer"); + if (info.ib) + prfunc(" ,Error: Internal"); + if (info.eb) + prfunc(" ,Error: External"); + if (info.mc) + prfunc(" ,MC: Corrected"); + if (info.tv) + prfunc(" ,Target Address: 0x%lx", targ_addr); + if (info.rq) + prfunc(" ,Requestor Address: 0x%lx", req_addr); + if (info.tv) + prfunc(" ,Responder Address: 0x%lx", resp_addr); + prfunc("\n"); +} + +/* + * ia64_log_processor_info_print + * Display the processor-specific information logged by PAL as a part + * of MCA or INIT or CMC. + * Inputs : lh (Pointer of the sal log header which specifies the format + * of SAL state info as specified by the SAL spec). + * Outputs : None + */ +void +ia64_log_processor_info_print(sal_log_header_t *lh, prfunc_t prfunc) +{ + sal_log_processor_info_t *slpi; + int i; + + if (!lh) + return; + + if (lh->slh_log_type != SAL_SUB_INFO_TYPE_PROCESSOR) + return; + +#if defined(MCA_TEST) + slpi = &slpi_buf; +#else + slpi = (sal_log_processor_info_t *)lh->slh_log_dev_spec_info; +#endif /#if defined(MCA_TEST) */ + + if (!slpi) { + prfunc("No Processor Error Log found\n"); + return; + } + + /* Print branch register contents if valid */ + if (slpi->slpi_valid.slpi_br) + ia64_log_processor_regs_print(slpi->slpi_br, 8, "Branch", "br", prfunc); + + /* Print control register contents if valid */ + if (slpi->slpi_valid.slpi_cr) + ia64_log_processor_regs_print(slpi->slpi_cr, 128, "Control", "cr", prfunc); + + /* Print application register contents if valid */ + if (slpi->slpi_valid.slpi_ar) + ia64_log_processor_regs_print(slpi->slpi_br, 128, "Application", "ar", prfunc); + + /* Print region register contents if valid */ + if (slpi->slpi_valid.slpi_rr) + ia64_log_processor_regs_print(slpi->slpi_rr, 8, "Region", "rr", prfunc); + + /* Print floating-point register contents if valid */ + if (slpi->slpi_valid.slpi_fr) + ia64_log_processor_regs_print(slpi->slpi_fr, 128, "Floating-point", "fr", + prfunc); + + /* Print bank1-gr NAT register contents if valid */ + ia64_log_processor_regs_print(&slpi->slpi_bank1_nat_bits, 1, "NAT", "nat", prfunc); + + /* Print bank 1 register contents if valid */ + if (slpi->slpi_valid.slpi_bank1_gr) + ia64_log_processor_regs_print(slpi->slpi_bank1_gr, 16, "Bank1-General", "gr", + prfunc); + + /* Print the cache check information if any*/ + for (i = 0 ; i < MAX_CACHE_ERRORS; i++) + ia64_log_cache_check_info_print(i, + slpi->slpi_cache_check_info[i].slpi_cache_check, + slpi->slpi_cache_check_info[i].slpi_target_address, + prfunc); + /* Print the tlb check information if any*/ + for (i = 0 ; i < MAX_TLB_ERRORS; i++) + ia64_log_tlb_check_info_print(i,slpi->slpi_tlb_check_info[i], prfunc); + + /* Print the bus check information if any*/ + for (i = 0 ; i < MAX_BUS_ERRORS; i++) + ia64_log_bus_check_info_print(i, + slpi->slpi_bus_check_info[i].slpi_bus_check, + slpi->slpi_bus_check_info[i].slpi_requestor_addr, + slpi->slpi_bus_check_info[i].slpi_responder_addr, + slpi->slpi_bus_check_info[i].slpi_target_addr, + prfunc); + +} + +/* + * ia64_log_print + * Display the contents of the OS error log information + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC}) + * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM}) + * Outputs : None + */ +void +ia64_log_print(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc) +{ + char *info_type, *sub_info_type; + + switch(sal_info_type) { + case SAL_INFO_TYPE_MCA: + info_type = "MCA"; + break; + case SAL_INFO_TYPE_INIT: + info_type = "INIT"; + break; + case SAL_INFO_TYPE_CMC: + info_type = "CMC"; + break; + default: + info_type = "UNKNOWN"; + break; + } + + switch(sal_sub_info_type) { + case SAL_SUB_INFO_TYPE_PROCESSOR: + sub_info_type = "PROCESSOR"; + break; + case SAL_SUB_INFO_TYPE_PLATFORM: + sub_info_type = "PLATFORM"; + break; + default: + sub_info_type = "UNKNOWN"; + break; + } + + prfunc("+BEGIN HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type); + if (sal_sub_info_type == SAL_SUB_INFO_TYPE_PROCESSOR) + ia64_log_processor_info_print( + IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type), + prfunc); + else + log_print_platform(IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type),prfunc); + prfunc("+END HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type); +} diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S new file mode 100644 index 000000000..3d49ac06e --- /dev/null +++ b/arch/ia64/kernel/mca_asm.S @@ -0,0 +1,621 @@ +#include <asm/processor.h> +#include <asm/mcaasm.h> +#include <asm/page.h> +#include <asm/mca.h> + + .psr abi64 + .psr lsb + .lsb + +/* + * SAL_TO_OS_MCA_HANDOFF_STATE + * 1. GR1 = OS GP + * 2. GR8 = PAL_PROC physical address + * 3. GR9 = SAL_PROC physical address + * 4. GR10 = SAL GP (physical) + * 5. GR11 = Rendez state + * 6. GR12 = Return address to location within SAL_CHECK + */ +#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp) \ + movl _tmp=ia64_sal_to_os_handoff_state;; \ + st8 [_tmp]=r1,0x08;; \ + st8 [_tmp]=r8,0x08;; \ + st8 [_tmp]=r9,0x08;; \ + st8 [_tmp]=r10,0x08;; \ + st8 [_tmp]=r11,0x08;; \ + st8 [_tmp]=r12,0x08;; + +/* + * OS_MCA_TO_SAL_HANDOFF_STATE + * 1. GR8 = OS_MCA status + * 2. GR9 = SAL GP (physical) + * 3. GR22 = New min state save area pointer + */ +#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \ + movl _tmp=ia64_os_to_sal_handoff_state;; \ + DATA_VA_TO_PA(_tmp);; \ + ld8 r8=[_tmp],0x08;; \ + ld8 r9=[_tmp],0x08;; \ + ld8 r22=[_tmp],0x08;; + +/* + * BRANCH + * Jump to the instruction referenced by + * "to_label". + * Branch is taken only if the predicate + * register "p" is true. + * "ip" is the address of the instruction + * located at "from_label". + * "temp" is a scratch register like r2 + * "adjust" needed for HP compiler. + * A screwup somewhere with constant arithmetic. + */ +#define BRANCH(to_label, temp, p, adjust) \ +100: (p) mov temp=ip; \ + ;; \ + (p) adds temp=to_label-100b,temp;\ + (p) adds temp=adjust,temp; \ + (p) mov b1=temp ; \ + (p) br b1 + + .global ia64_os_mca_dispatch + .global ia64_os_mca_dispatch_end + .global ia64_sal_to_os_handoff_state + .global ia64_os_to_sal_handoff_state + .global ia64_os_mca_ucmc_handler + .global ia64_mca_proc_state_dump + .global ia64_mca_proc_state_restore + .global ia64_mca_stack + .global ia64_mca_stackframe + .global ia64_mca_bspstore + + .text + .align 16 + +ia64_os_mca_dispatch: + +#if defined(MCA_TEST) + // Pretend that we are in interrupt context + mov r2=psr + dep r2=0, r2, PSR_IC, 2; + mov psr.l = r2 +#endif /* #if defined(MCA_TEST) */ + + // Save the SAL to OS MCA handoff state as defined + // by SAL SPEC 2.5 + // NOTE : The order in which the state gets saved + // is dependent on the way the C-structure + // for ia64_mca_sal_to_os_state_t has been + // defined in include/asm/mca.h + SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) + + // LOG PROCESSOR STATE INFO FROM HERE ON.. + ;; +begin_os_mca_dump: + BRANCH(ia64_os_mca_proc_state_dump, r2, p0, 0x0) + ;; +ia64_os_mca_done_dump: + + // Setup new stack frame for OS_MCA handling + movl r2=ia64_mca_bspstore // local bspstore area location in r2 + movl r3=ia64_mca_stackframe // save stack frame to memory in r3 + rse_switch_context(r6,r3,r2);; // RSC management in this new context + movl r12=ia64_mca_stack;; + + // Enter virtual mode from physical mode + VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4) +ia64_os_mca_virtual_begin: + + // call our handler + movl r2=ia64_mca_ucmc_handler;; + mov b6=r2;; + br.call.sptk.few b0=b6 + ;; + + // Revert back to physical mode before going back to SAL + PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4) +ia64_os_mca_virtual_end: + +#if defined(MCA_TEST) + // Pretend that we are in interrupt context + mov r2=psr + dep r2=0, r2, PSR_IC, 2; + mov psr.l = r2 +#endif /* #if defined(MCA_TEST) */ + + // restore the original stack frame here + movl r2=ia64_mca_stackframe // restore stack frame from memory at r2 + ;; + DATA_VA_TO_PA(r2) + movl r4=IA64_PSR_MC + ;; + rse_return_context(r4,r3,r2) // switch from interrupt context for RSE + + // let us restore all the registers from our PSI structure + mov r8=gp + ;; +begin_os_mca_restore: + BRANCH(ia64_os_mca_proc_state_restore, r2, p0, 0x0) + ;; + +ia64_os_mca_done_restore: + ;; +#ifdef SOFTSDV + VIRTUAL_MODE_ENTER(r2,r3, vmode_enter, r4) +vmode_enter: + br.ret.sptk.few b0 +#else + // branch back to SALE_CHECK + OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2) + ld8 r3=[r2];; + mov b0=r3 // SAL_CHECK return address + br b0 + ;; +#endif /* #ifdef SOFTSDV */ +ia64_os_mca_dispatch_end: +//EndMain////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_os_mca_proc_state_dump() +// +// Stub Description: +// +// This stub dumps the processor state during MCHK to a data area +// +//-- + +ia64_os_mca_proc_state_dump: +// Get and save GR0-31 from Proc. Min. State Save Area to SAL PSI + movl r2=ia64_mca_proc_state_dump;; // Os state dump area + +// save ar.NaT + mov r5=ar.unat // ar.unat + +// save banked GRs 16-31 along with NaT bits + bsw.1;; + st8.spill [r2]=r16,8;; + st8.spill [r2]=r17,8;; + st8.spill [r2]=r18,8;; + st8.spill [r2]=r19,8;; + st8.spill [r2]=r20,8;; + st8.spill [r2]=r21,8;; + st8.spill [r2]=r22,8;; + st8.spill [r2]=r23,8;; + st8.spill [r2]=r24,8;; + st8.spill [r2]=r25,8;; + st8.spill [r2]=r26,8;; + st8.spill [r2]=r27,8;; + st8.spill [r2]=r28,8;; + st8.spill [r2]=r29,8;; + st8.spill [r2]=r30,8;; + st8.spill [r2]=r31,8;; + + mov r4=ar.unat;; + st8 [r2]=r4,8 // save User NaT bits for r16-r31 + mov ar.unat=r5 // restore original unat + bsw.0;; + +//save BRs + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r4 + + mov r3=b0 + mov r5=b1 + mov r7=b2;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=b3 + mov r5=b4 + mov r7=b5;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=b6 + mov r5=b7;; + st8 [r2]=r3,2*8 + st8 [r4]=r5,2*8;; + +cSaveCRs: +// save CRs + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r4 + + mov r3=cr0 // cr.dcr + mov r5=cr1 // cr.itm + mov r7=cr2;; // cr.iva + + st8 [r2]=r3,8*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; // 48 byte rements + + mov r3=cr8;; // cr.pta + st8 [r2]=r3,8*8;; // 64 byte rements + +// if PSR.ic=0, reading interruption registers causes an illegal operation fault + mov r3=psr;; + tbit.nz.unc p2,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test +(p2) st8 [r2]=r0,9*8+160 // increment by 168 byte inc. +begin_skip_intr_regs: + BRANCH(SkipIntrRegs, r9, p2, 0x0) + ;; + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r6 + + mov r3=cr16 // cr.ipsr + mov r5=cr17 // cr.isr + mov r7=r0;; // cr.ida => cr18 + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr19 // cr.iip + mov r5=cr20 // cr.idtr + mov r7=cr21;; // cr.iitr + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr22 // cr.iipa + mov r5=cr23 // cr.ifs + mov r7=cr24;; // cr.iim + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr25;; // cr.iha + st8 [r2]=r3,160;; // 160 byte rement + +SkipIntrRegs: + st8 [r2]=r0,168 // another 168 byte . + + mov r3=cr66;; // cr.lid + st8 [r2]=r3,40 // 40 byte rement + + mov r3=cr71;; // cr.ivr + st8 [r2]=r3,8 + + mov r3=cr72;; // cr.tpr + st8 [r2]=r3,24 // 24 byte increment + + mov r3=r0;; // cr.eoi => cr75 + st8 [r2]=r3,168 // 168 byte inc. + + mov r3=r0;; // cr.irr0 => cr96 + st8 [r2]=r3,16 // 16 byte inc. + + mov r3=r0;; // cr.irr1 => cr98 + st8 [r2]=r3,16 // 16 byte inc. + + mov r3=r0;; // cr.irr2 => cr100 + st8 [r2]=r3,16 // 16 byte inc + + mov r3=r0;; // cr.irr3 => cr100 + st8 [r2]=r3,16 // 16b inc. + + mov r3=r0;; // cr.itv => cr114 + st8 [r2]=r3,16 // 16 byte inc. + + mov r3=r0;; // cr.pmv => cr116 + st8 [r2]=r3,8 + + mov r3=r0;; // cr.lrr0 => cr117 + st8 [r2]=r3,8 + + mov r3=r0;; // cr.lrr1 => cr118 + st8 [r2]=r3,8 + + mov r3=r0;; // cr.cmcv => cr119 + st8 [r2]=r3,8*10;; + +cSaveARs: +// save ARs + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r6 + + mov r3=ar0 // ar.kro + mov r5=ar1 // ar.kr1 + mov r7=ar2;; // ar.kr2 + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=ar3 // ar.kr3 + mov r5=ar4 // ar.kr4 + mov r7=ar5;; // ar.kr5 + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=ar6 // ar.kr6 + mov r5=ar7 // ar.kr7 + mov r7=r0;; // ar.kr8 + st8 [r2]=r3,10*8 + st8 [r4]=r5,10*8 + st8 [r6]=r7,10*8;; // rement by 72 bytes + + mov r3=ar16 // ar.rsc + mov ar16=r0 // put RSE in enforced lazy mode + mov r5=ar17 // ar.bsp + mov r7=ar18;; // ar.bspstore + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=ar19;; // ar.rnat + st8 [r2]=r3,8*13 // increment by 13x8 bytes + + mov r3=ar32;; // ar.ccv + st8 [r2]=r3,8*4 + + mov r3=ar36;; // ar.unat + st8 [r2]=r3,8*4 + + mov r3=ar40;; // ar.fpsr + st8 [r2]=r3,8*4 + + mov r3=ar44;; // ar.itc + st8 [r2]=r3,160 // 160 + + mov r3=ar64;; // ar.pfs + st8 [r2]=r3,8 + + mov r3=ar65;; // ar.lc + st8 [r2]=r3,8 + + mov r3=ar66;; // ar.ec + st8 [r2]=r3 + add r2=8*62,r2 //padding + +// save RRs + mov ar.lc=0x08-1 + movl r4=0x00;; + +cStRR: + mov r3=rr[r4];; + st8 [r2]=r3,8 + add r4=1,r4 + br.cloop.sptk.few cStRR + ;; +end_os_mca_dump: + BRANCH(ia64_os_mca_done_dump, r2, p0, -0x10) + ;; + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_os_mca_proc_state_restore() +// +// Stub Description: +// +// This is a stub to restore the saved processor state during MCHK +// +//-- + +ia64_os_mca_proc_state_restore: + +// Restore bank1 GR16-31 + movl r2=ia64_mca_proc_state_dump // Convert virtual address + ;; // of OS state dump area + DATA_VA_TO_PA(r2) // to physical address + ;; +restore_GRs: // restore bank-1 GRs 16-31 + bsw.1;; + add r3=16*8,r2;; // to get to NaT of GR 16-31 + ld8 r3=[r3];; + mov ar.unat=r3;; // first restore NaT + + ld8.fill r16=[r2],8;; + ld8.fill r17=[r2],8;; + ld8.fill r18=[r2],8;; + ld8.fill r19=[r2],8;; + ld8.fill r20=[r2],8;; + ld8.fill r21=[r2],8;; + ld8.fill r22=[r2],8;; + ld8.fill r23=[r2],8;; + ld8.fill r24=[r2],8;; + ld8.fill r25=[r2],8;; + ld8.fill r26=[r2],8;; + ld8.fill r27=[r2],8;; + ld8.fill r28=[r2],8;; + ld8.fill r29=[r2],8;; + ld8.fill r30=[r2],8;; + ld8.fill r31=[r2],8;; + + ld8 r3=[r2],8;; // increment to skip NaT + bsw.0;; + +restore_BRs: + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov b0=r3 + mov b1=r5 + mov b2=r7;; + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov b3=r3 + mov b4=r5 + mov b5=r7;; + + ld8 r3=[r2],2*8 + ld8 r5=[r4],2*8;; + mov b6=r3 + mov b7=r5;; + +restore_CRs: + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],8*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; // 48 byte increments + mov cr0=r3 // cr.dcr + mov cr1=r5 // cr.itm + mov cr2=r7;; // cr.iva + + ld8 r3=[r2],8*8;; // 64 byte increments +// mov cr8=r3 // cr.pta + + +// if PSR.ic=1, reading interruption registers causes an illegal operation fault + mov r3=psr;; + tbit.nz.unc p2,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test +(p2) st8 [r2]=r0,9*8+160 // increment by 160 byte inc. + +begin_rskip_intr_regs: + BRANCH(rSkipIntrRegs, r9, p2, 0x0) + ;; + + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov cr16=r3 // cr.ipsr + mov cr17=r5 // cr.isr is read only +// mov cr18=r7;; // cr.ida + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov cr19=r3 // cr.iip + mov cr20=r5 // cr.idtr + mov cr21=r7;; // cr.iitr + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov cr22=r3 // cr.iipa + mov cr23=r5 // cr.ifs + mov cr24=r7 // cr.iim + + ld8 r3=[r2],160;; // 160 byte increment + mov cr25=r3 // cr.iha + +rSkipIntrRegs: + ld8 r3=[r2],168;; // another 168 byte inc. + + ld8 r3=[r2],40;; // 40 byte increment + mov cr66=r3 // cr.lid + + ld8 r3=[r2],8;; +// mov cr71=r3 // cr.ivr is read only + ld8 r3=[r2],24;; // 24 byte increment + mov cr72=r3 // cr.tpr + + ld8 r3=[r2],168;; // 168 byte inc. +// mov cr75=r3 // cr.eoi + + ld8 r3=[r2],16;; // 16 byte inc. +// mov cr96=r3 // cr.irr0 is read only + + ld8 r3=[r2],16;; // 16 byte inc. +// mov cr98=r3 // cr.irr1 is read only + + ld8 r3=[r2],16;; // 16 byte inc +// mov cr100=r3 // cr.irr2 is read only + + ld8 r3=[r2],16;; // 16b inc. +// mov cr102=r3 // cr.irr3 is read only + + ld8 r3=[r2],16;; // 16 byte inc. +// mov cr114=r3 // cr.itv + + ld8 r3=[r2],8;; +// mov cr116=r3 // cr.pmv + ld8 r3=[r2],8;; +// mov cr117=r3 // cr.lrr0 + ld8 r3=[r2],8;; +// mov cr118=r3 // cr.lrr1 + ld8 r3=[r2],8*10;; +// mov cr119=r3 // cr.cmcv + +restore_ARs: + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov ar0=r3 // ar.kro + mov ar1=r5 // ar.kr1 + mov ar2=r7;; // ar.kr2 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov ar3=r3 // ar.kr3 + mov ar4=r5 // ar.kr4 + mov ar5=r7;; // ar.kr5 + + ld8 r3=[r2],10*8 + ld8 r5=[r4],10*8 + ld8 r7=[r6],10*8;; + mov ar6=r3 // ar.kr6 + mov ar7=r5 // ar.kr7 +// mov ar8=r6 // ar.kr8 + ;; + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; +// mov ar16=r3 // ar.rsc +// mov ar17=r5 // ar.bsp is read only + mov ar16=r0 // make sure that RSE is in enforced lazy mode + mov ar18=r7;; // ar.bspstore + + ld8 r9=[r2],8*13;; + mov ar19=r9 // ar.rnat + + mov ar16=r3 // ar.rsc + ld8 r3=[r2],8*4;; + mov ar32=r3 // ar.ccv + + ld8 r3=[r2],8*4;; + mov ar36=r3 // ar.unat + + ld8 r3=[r2],8*4;; + mov ar40=r3 // ar.fpsr + + ld8 r3=[r2],160;; // 160 +// mov ar44=r3 // ar.itc + + ld8 r3=[r2],8;; + mov ar64=r3 // ar.pfs + + ld8 r3=[r2],8;; + mov ar65=r3 // ar.lc + + ld8 r3=[r2];; + mov ar66=r3 // ar.ec + add r2=8*62,r2;; // padding + +restore_RRs: + mov r5=ar.lc + mov ar.lc=0x08-1 + movl r4=0x00 +cStRRr: + ld8 r3=[r2],8;; +// mov rr[r4]=r3 // what are its access previledges? + add r4=1,r4 + br.cloop.sptk.few cStRRr + ;; + mov ar.lc=r5 + ;; +end_os_mca_restore: + BRANCH(ia64_os_mca_done_restore, r2, p0, -0x20) + ;; +//EndStub////////////////////////////////////////////////////////////////////// diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S new file mode 100644 index 000000000..1506bacc2 --- /dev/null +++ b/arch/ia64/kernel/pal.S @@ -0,0 +1,119 @@ +/* + * PAL Firmware support + * IA-64 Processor Programmers Reference Vol 2 + * + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com> + */ + + .text + .psr abi64 + .psr lsb + .lsb + + .data +pal_entry_point: + data8 ia64_pal_default_handler + .text + +/* + * Set the PAL entry point address. This could be written in C code, but we do it here + * to keep it all in one module (besides, it's so trivial that it's + * not a big deal). + * + * in0 Address of the PAL entry point (text address, NOT a function descriptor). + */ + .align 16 + .global ia64_pal_handler_init + .proc ia64_pal_handler_init +ia64_pal_handler_init: + alloc r3=ar.pfs,1,0,0,0 + movl r2=pal_entry_point + ;; + st8 [r2]=in0 + br.ret.sptk.few rp + + .endp ia64_pal_handler_init + +/* + * Default PAL call handler. This needs to be coded in assembly because it uses + * the static calling convention, i.e., the RSE may not be used and calls are + * done via "br.cond" (not "br.call"). + */ + .align 16 + .global ia64_pal_default_handler + .proc ia64_pal_default_handler +ia64_pal_default_handler: + mov r8=-1 + br.cond.sptk.few rp + +/* + * Make a PAL call using the static calling convention. + * + * in0 Pointer to struct ia64_pal_retval + * in1 Index of PAL service + * in2 - in4 Remaning PAL arguments + * + */ + +#ifdef __GCC_MULTIREG_RETVALS__ +# define arg0 in0 +# define arg1 in1 +# define arg2 in2 +# define arg3 in3 +# define arg4 in4 +#else +# define arg0 in1 +# define arg1 in2 +# define arg2 in3 +# define arg3 in4 +# define arg4 in5 +#endif + + .text + .psr abi64 + .psr lsb + .lsb + + .align 16 + .global ia64_pal_call_static + .proc ia64_pal_call_static +ia64_pal_call_static: + alloc loc0 = ar.pfs,6,90,0,0 + movl loc2 = pal_entry_point +1: { + mov r28 = arg0 + mov r29 = arg1 + mov r8 = ip + } + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov r30 = arg2 + mov r31 = arg3 + ;; + mov loc3 = psr + mov loc1 = rp + adds r8 = .ret0-1b,r8 + ;; + rsm psr.i + mov b7 = loc2 + mov rp = r8 + ;; + br.cond.sptk.few b7 +.ret0: mov psr.l = loc3 +#ifndef __GCC_MULTIREG_RETVALS__ + st8 [in0] = r8, 8 + ;; + st8 [in0] = r9, 8 + ;; + st8 [in0] = r10, 8 + ;; + st8 [in0] = r11, 8 +#endif + mov ar.pfs = loc0 + mov rp = loc1 + ;; + srlz.d // seralize restoration of psr.l + br.ret.sptk.few b0 + .endp ia64_pal_call_static diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c new file mode 100644 index 000000000..f86f45537 --- /dev/null +++ b/arch/ia64/kernel/pci-dma.c @@ -0,0 +1,56 @@ +/* + * Dynamic DMA mapping support. + * + * This implementation is for IA-64 platforms that do not support + * I/O TLBs (aka DMA address translation hardware). + * + * XXX This doesn't do the right thing yet. It appears we would have + * to add additional zones so we can implement the various address + * mask constraints that we might encounter. A zone for memory < 32 + * bits is obviously necessary... + */ + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/pci.h> + +#include <asm/io.h> + +/* Pure 2^n version of get_order */ +extern __inline__ unsigned long +get_order (unsigned long size) +{ + unsigned long order = ia64_fls(size); + + printk ("get_order: size=%lu, order=%lu\n", size, order); + + if (order > PAGE_SHIFT) + order -= PAGE_SHIFT; + else + order = 0; + return order; +} + +void * +pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle) +{ + void *ret; + int gfp = GFP_ATOMIC; + + if (!hwdev || hwdev->dma_mask != 0xffffffff) + gfp |= GFP_DMA; + ret = (void *)__get_free_pages(gfp, get_order(size)); + + if (ret) { + memset(ret, 0, size); + *dma_handle = virt_to_bus(ret); + } + return ret; +} + +void +pci_free_consistent (struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) +{ + free_pages((unsigned long) vaddr, get_order(size)); +} diff --git a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c new file mode 100644 index 000000000..3bceeed8e --- /dev/null +++ b/arch/ia64/kernel/pci.c @@ -0,0 +1,239 @@ +/* + * pci.c - Low-Level PCI Access in IA64 + * + * Derived from bios32.c of i386 tree. + * + */ + +#include <linux/config.h> + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/malloc.h> +#include <linux/smp_lock.h> +#include <linux/spinlock.h> + +#include <asm/machvec.h> +#include <asm/page.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/io.h> + +#include <asm/sal.h> + + +#ifdef CONFIG_SMP +# include <asm/smp.h> +#endif +#include <asm/irq.h> + + +#undef DEBUG +#define DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +/* + * This interrupt-safe spinlock protects all accesses to PCI + * configuration space. + */ + +spinlock_t pci_lock = SPIN_LOCK_UNLOCKED; + +struct pci_fixup pcibios_fixups[] = { { 0 } }; + +#define PCI_NO_CHECKS 0x400 +#define PCI_NO_PEER_FIXUP 0x800 + +static unsigned int pci_probe = PCI_NO_CHECKS; + +/* Macro to build a PCI configuration address to be passed as a parameter to SAL. */ + +#define PCI_CONFIG_ADDRESS(dev, where) (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff)) + +static int +pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + s64 status; + u64 lval; + + status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 1, &lval); + *value = lval; + return status; +} + +static int +pci_conf_read_config_word(struct pci_dev *dev, int where, u16 *value) +{ + s64 status; + u64 lval; + + status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 2, &lval); + *value = lval; + return status; +} + +static int +pci_conf_read_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + s64 status; + u64 lval; + + status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 4, &lval); + *value = lval; + return status; +} + +static int +pci_conf_write_config_byte (struct pci_dev *dev, int where, u8 value) +{ + return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 1, value); +} + +static int +pci_conf_write_config_word (struct pci_dev *dev, int where, u16 value) +{ + return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 2, value); +} + +static int +pci_conf_write_config_dword (struct pci_dev *dev, int where, u32 value) +{ + return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 4, value); +} + + +static struct pci_ops pci_conf = { + pci_conf_read_config_byte, + pci_conf_read_config_word, + pci_conf_read_config_dword, + pci_conf_write_config_byte, + pci_conf_write_config_word, + pci_conf_write_config_dword +}; + +/* + * Try to find PCI BIOS. This will always work for IA64. + */ + +static struct pci_ops * __init +pci_find_bios(void) +{ + return &pci_conf; +} + +/* + * Initialization. Uses the SAL interface + */ + +#define PCI_BUSSES_TO_SCAN 2 /* On "real" ;) hardware this will be 255 */ + +void __init +pcibios_init(void) +{ + struct pci_ops *ops = NULL; + int i; + + if ((ops = pci_find_bios()) == NULL) { + printk("PCI: No PCI bus detected\n"); + return; + } + + printk("PCI: Probing PCI hardware\n"); + for (i = 0; i < PCI_BUSSES_TO_SCAN; i++) + pci_scan_bus(i, ops, NULL); + platform_pci_fixup(); + return; +} + +/* + * Called after each bus is probed, but before its children + * are examined. + */ + +void __init +pcibios_fixup_bus(struct pci_bus *b) +{ + return; +} + +int +pci_assign_resource (struct pci_dev *dev, int i) +{ + printk("pci_assign_resource: not implemented!\n"); + return -ENODEV; +} + +void __init +pcibios_update_resource(struct pci_dev *dev, struct resource *root, + struct resource *res, int resource) +{ + unsigned long where, size; + u32 reg; + + where = PCI_BASE_ADDRESS_0 + (resource * 4); + size = res->end - res->start; + pci_read_config_dword(dev, where, ®); + reg = (reg & size) | (((u32)(res->start - root->start)) & ~size); + pci_write_config_dword(dev, where, reg); + + /* ??? FIXME -- record old value for shutdown. */ +} + +void __init +pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); + + /* ??? FIXME -- record old value for shutdown. */ +} + +void __init +pcibios_fixup_pbus_ranges (struct pci_bus * bus, struct pbus_set_ranges_data * ranges) +{ + ranges->io_start -= bus->resource[0]->start; + ranges->io_end -= bus->resource[0]->start; + ranges->mem_start -= bus->resource[1]->start; + ranges->mem_end -= bus->resource[1]->start; +} + +int __init +pcibios_enable_device (struct pci_dev *dev) +{ + /* Not needed, since we enable all devices at startup. */ + return 0; +} + +/* + * PCI BIOS setup, always defaults to SAL interface + */ + +char * __init +pcibios_setup(char *str) +{ + pci_probe = PCI_NO_CHECKS; + return NULL; +} + +void +pcibios_align_resource (void *data, struct resource *res, unsigned long size) +{ +} + +#if 0 /*def CONFIG_PROC_FS*/ +/* + * This is an ugly hack to get a (weak) unresolved reference to something that is + * in drivers/pci/proc.c. Without this, the file does not get linked in at all + * (I suspect the reason this isn't needed on Linux/x86 is that most people compile + * with module support, in which case the EXPORT_SYMBOL() stuff will ensure the + * code gets linked in. Sigh... --davidm 99/12/20. + */ +asm ("data8 proc_bus_pci_add"); +#endif diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c new file mode 100644 index 000000000..274b68a73 --- /dev/null +++ b/arch/ia64/kernel/perfmon.c @@ -0,0 +1,227 @@ +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> + +#include <asm/errno.h> +#include <asm/irq.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/uaccess.h> + +#ifdef CONFIG_PERFMON + +#define MAX_PERF_COUNTER 4 /* true for Itanium, at least */ +#define WRITE_PMCS_AND_START 0xa0 +#define WRITE_PMCS 0xa1 +#define READ_PMDS 0xa2 +#define STOP_PMCS 0xa3 +#define IA64_COUNTER_MASK 0xffffffffffffff6f +#define PERF_OVFL_VAL 0xffffffff + +struct perfmon_counter { + unsigned long data; + int counter_num; +}; + +unsigned long pmds[MAX_PERF_COUNTER]; +struct task_struct *perf_owner; + +/* + * We set dcr.pp, psr.pp, and the appropriate pmc control values with + * this. Notice that we go about modifying _each_ task's pt_regs to + * set cr_ipsr.pp. This will start counting when "current" does an + * _rfi_. Also, since each task's cr_ipsr.pp, and cr_ipsr is inherited + * across forks, we do _not_ need additional code on context + * switches. On stopping of the counters we dont _need_ to go about + * changing every task's cr_ipsr back to where it wuz, because we can + * just set pmc[0]=1. But we do it anyways becuase we will probably + * add thread specific accounting later. + * + * The obvious problem with this is that on SMP systems, it is a bit + * of work (when someone wants to do it) - it would be easier if we + * just added code to the context-switch path. I think we would need + * to lock the run queue to ensure no context switches, send an IPI to + * each processor, and in that IPI handler, just modify the psr bit of + * only the _current_ thread, since we have modified the psr bit + * correctly in the kernel stack for every process which is not + * running. Might crash on SMP systems without the + * lock_kernel(). Hence the lock.. + */ +asmlinkage unsigned long +sys_perfmonctl (int cmd1, int cmd2, void *ptr) +{ + struct perfmon_counter tmp, *cptr = ptr; + unsigned long pmd, cnum, dcr, flags; + struct task_struct *p; + struct pt_regs *regs; + struct perf_counter; + int i; + + switch (cmd1) { + case WRITE_PMCS: /* Writes to PMC's and clears PMDs */ + case WRITE_PMCS_AND_START: /* Also starts counting */ + + if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2)) + return -EFAULT; + + if (cmd2 >= MAX_PERF_COUNTER) + return -EFAULT; + + if (perf_owner && perf_owner != current) + return -EBUSY; + perf_owner = current; + + for (i = 0; i < cmd2; i++, cptr++) { + copy_from_user(&tmp, cptr, sizeof(tmp)); + /* XXX need to check validity of counter_num and perhaps data!! */ + ia64_set_pmc(tmp.counter_num, tmp.data); + ia64_set_pmd(tmp.counter_num, 0); + pmds[tmp.counter_num - 4] = 0; + } + + if (cmd1 == WRITE_PMCS_AND_START) { + local_irq_save(flags); + dcr = ia64_get_dcr(); + dcr |= IA64_DCR_PP; + ia64_set_dcr(dcr); + local_irq_restore(flags); + + /* + * This is a no can do. It obviously wouldn't + * work on SMP where another process may not + * be blocked at all. + * + * Perhaps we need a global predicate in the + * leave_kernel path to control if pp should + * be on or off? + */ + lock_kernel(); + for_each_task(p) { + regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1; + ia64_psr(regs)->pp = 1; + } + unlock_kernel(); + ia64_set_pmc(0, 0); + } + break; + + case READ_PMDS: + if (cmd2 >= MAX_PERF_COUNTER) + return -EFAULT; + if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perf_counter)*cmd2)) + return -EFAULT; + local_irq_save(flags); + /* XXX this looks wrong */ + __asm__ __volatile__("rsm psr.pp\n"); + dcr = ia64_get_dcr(); + dcr &= ~IA64_DCR_PP; + ia64_set_dcr(dcr); + local_irq_restore(flags); + + /* + * We cannot touch pmc[0] to stop counting here, as + * that particular instruction might cause an overflow + * and the mask in pmc[0] might get lost. I'm not very + * sure of the hardware behavior here. So we stop + * counting by psr.pp = 0. And we reset dcr.pp to + * prevent an interrupt from mucking up psr.pp in the + * meanwhile. Perfmon interrupts are pended, hence the + * above code should be ok if one of the above + * instructions cause overflows. Is this ok? When I + * muck with dcr, is the cli/sti needed?? + */ + for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; i++, cnum++, cptr++) { + pmd = pmds[i] + (ia64_get_pmd(cnum) & PERF_OVFL_VAL); + put_user(pmd, &cptr->data); + } + local_irq_save(flags); + /* XXX this looks wrong */ + __asm__ __volatile__("ssm psr.pp"); + dcr = ia64_get_dcr(); + dcr |= IA64_DCR_PP; + ia64_set_dcr(dcr); + local_irq_restore(flags); + break; + + case STOP_PMCS: + ia64_set_pmc(0, 1); + for (i = 0; i < MAX_PERF_COUNTER; ++i) + ia64_set_pmc(i, 0); + + local_irq_save(flags); + dcr = ia64_get_dcr(); + dcr &= ~IA64_DCR_PP; + ia64_set_dcr(dcr); + local_irq_restore(flags); + /* + * This is a no can do. It obviously wouldn't + * work on SMP where another process may not + * be blocked at all. + * + * Perhaps we need a global predicate in the + * leave_kernel path to control if pp should + * be on or off? + */ + lock_kernel(); + for_each_task(p) { + regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1; + ia64_psr(regs)->pp = 0; + } + unlock_kernel(); + perf_owner = 0; + break; + + default: + break; + } + return 0; +} + +static inline void +update_counters (void) +{ + unsigned long mask, i, cnum, val; + + mask = ia64_get_pmd(0) >> 4; + for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; cnum++, i++, mask >>= 1) { + if (mask & 0x1) + val = PERF_OVFL_VAL; + else + /* since we got an interrupt, might as well clear every pmd. */ + val = ia64_get_pmd(cnum) & PERF_OVFL_VAL; + pmds[i] += val; + ia64_set_pmd(cnum, 0); + } +} + +static void +perfmon_interrupt (int irq, void *arg, struct pt_regs *regs) +{ + update_counters(); + ia64_set_pmc(0, 0); + ia64_srlz_d(); +} + +void +perfmon_init (void) +{ + if (request_irq(PERFMON_IRQ, perfmon_interrupt, 0, "perfmon", NULL)) { + printk("perfmon_init: could not allocate performance monitor vector %u\n", + PERFMON_IRQ); + return; + } + ia64_set_pmv(PERFMON_IRQ); + ia64_srlz_d(); +} + +#else /* !CONFIG_PERFMON */ + +asmlinkage unsigned long +sys_perfmonctl (int cmd1, int cmd2, void *ptr) +{ + return -ENOSYS; +} + +#endif /* !CONFIG_PERFMON */ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c new file mode 100644 index 000000000..5b6deb5f5 --- /dev/null +++ b/arch/ia64/kernel/process.c @@ -0,0 +1,421 @@ +/* + * Architecture-specific setup. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */ +#include <linux/config.h> + +#include <linux/pm.h> +#include <linux/elf.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> + +#include <asm/delay.h> +#include <asm/efi.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/sal.h> +#include <asm/uaccess.h> +#include <asm/user.h> + + +void +show_regs (struct pt_regs *regs) +{ + unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; + + printk("\npsr : %016lx ifs : %016lx ip : [<%016lx>]\n", + regs->cr_ipsr, regs->cr_ifs, ip); + printk("unat: %016lx pfs : %016lx rsc : %016lx\n", + regs->ar_unat, regs->ar_pfs, regs->ar_rsc); + printk("rnat: %016lx bsps: %016lx pr : %016lx\n", + regs->ar_rnat, regs->ar_bspstore, regs->pr); + printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", + regs->loadrs, regs->ar_ccv, regs->ar_fpsr); + printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7); + printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", + regs->f6.u.bits[1], regs->f6.u.bits[0], + regs->f7.u.bits[1], regs->f7.u.bits[0]); + printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", + regs->f8.u.bits[1], regs->f8.u.bits[0], + regs->f9.u.bits[1], regs->f9.u.bits[0]); + + printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3); + printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10); + printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13); + printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16); + printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19); + printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22); + printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25); + printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28); + printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31); + + /* print the stacked registers if cr.ifs is valid: */ + if (regs->cr_ifs & 0x8000000000000000) { + unsigned long val, sof, *bsp, ndirty; + int i, is_nat = 0; + + sof = regs->cr_ifs & 0x7f; /* size of frame */ + ndirty = (regs->loadrs >> 19); + bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty); + for (i = 0; i < sof; ++i) { + get_user(val, ia64_rse_skip_regs(bsp, i)); + printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val, + ((i == sof - 1) || (i % 3) == 2) ? "\n" : " "); + } + } +} + +void __attribute__((noreturn)) +cpu_idle (void *unused) +{ + /* endless idle loop with no priority at all */ + init_idle(); + current->priority = 0; + current->counter = -100; + +#ifdef CONFIG_SMP + if (!current->need_resched) + min_xtp(); +#endif + + while (1) { + while (!current->need_resched) { + continue; + } +#ifdef CONFIG_SMP + normal_xtp(); +#endif + schedule(); + check_pgt_cache(); + if (pm_idle) + (*pm_idle)(); + } +} + +/* + * Copy the state of an ia-64 thread. + * + * We get here through the following call chain: + * + * <clone syscall> + * sys_clone + * do_fork + * copy_thread + * + * This means that the stack layout is as follows: + * + * +---------------------+ (highest addr) + * | struct pt_regs | + * +---------------------+ + * | struct switch_stack | + * +---------------------+ + * | | + * | memory stack | + * | | <-- sp (lowest addr) + * +---------------------+ + * + * Note: if we get called through kernel_thread() then the memory + * above "(highest addr)" is valid kernel stack memory that needs to + * be copied as well. + * + * Observe that we copy the unat values that are in pt_regs and + * switch_stack. Since the interpretation of unat is dependent upon + * the address to which the registers got spilled, doing this is valid + * only as long as we preserve the alignment of the stack. Since the + * stack is always page aligned, we know this is the case. + * + * XXX Actually, the above isn't true when we create kernel_threads(). + * If we ever needs to create kernel_threads() that preserve the unat + * values we'll need to fix this. Perhaps an easy workaround would be + * to always clear the unat bits in the child thread. + */ +int +copy_thread (int nr, unsigned long clone_flags, unsigned long usp, + struct task_struct *p, struct pt_regs *regs) +{ + unsigned long rbs, child_rbs, rbs_size, stack_offset, stack_top, stack_used; + struct switch_stack *child_stack, *stack; + extern char ia64_ret_from_syscall_clear_r8; + extern char ia64_strace_clear_r8; + struct pt_regs *child_ptregs; + +#ifdef CONFIG_SMP + /* + * For SMP idle threads, fork_by_hand() calls do_fork with + * NULL regs. + */ + if (!regs) + return 0; +#endif + + stack_top = (unsigned long) current + IA64_STK_OFFSET; + stack = ((struct switch_stack *) regs) - 1; + stack_used = stack_top - (unsigned long) stack; + stack_offset = IA64_STK_OFFSET - stack_used; + + child_stack = (struct switch_stack *) ((unsigned long) p + stack_offset); + child_ptregs = (struct pt_regs *) (child_stack + 1); + + /* copy parent's switch_stack & pt_regs to child: */ + memcpy(child_stack, stack, stack_used); + + rbs = (unsigned long) current + IA64_RBS_OFFSET; + child_rbs = (unsigned long) p + IA64_RBS_OFFSET; + rbs_size = stack->ar_bspstore - rbs; + + /* copy the parent's register backing store to the child: */ + memcpy((void *) child_rbs, (void *) rbs, rbs_size); + + child_ptregs->r8 = 0; /* child gets a zero return value */ + if (user_mode(child_ptregs)) + child_ptregs->r12 = usp; /* user stack pointer */ + else { + /* + * Note: we simply preserve the relative position of + * the stack pointer here. There is no need to + * allocate a scratch area here, since that will have + * been taken care of by the caller of sys_clone() + * already. + */ + child_ptregs->r12 = (unsigned long) (child_ptregs + 1); /* kernel sp */ + child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */ + } + if (p->flags & PF_TRACESYS) + child_stack->b0 = (unsigned long) &ia64_strace_clear_r8; + else + child_stack->b0 = (unsigned long) &ia64_ret_from_syscall_clear_r8; + child_stack->ar_bspstore = child_rbs + rbs_size; + + /* copy the thread_struct: */ + p->thread.ksp = (unsigned long) child_stack - 16; + /* + * NOTE: The calling convention considers all floating point + * registers in the high partition (fph) to be scratch. Since + * the only way to get to this point is through a system call, + * we know that the values in fph are all dead. Hence, there + * is no need to inherit the fph state from the parent to the + * child and all we have to do is to make sure that + * IA64_THREAD_FPH_VALID is cleared in the child. + * + * XXX We could push this optimization a bit further by + * clearing IA64_THREAD_FPH_VALID on ANY system call. + * However, it's not clear this is worth doing. Also, it + * would be a slight deviation from the normal Linux system + * call behavior where scratch registers are preserved across + * system calls (unless used by the system call itself). + * + * If we wanted to inherit the fph state from the parent to the + * child, we would have to do something along the lines of: + * + * if (ia64_get_fpu_owner() == current && ia64_psr(regs)->mfh) { + * p->thread.flags |= IA64_THREAD_FPH_VALID; + * ia64_save_fpu(&p->thread.fph); + * } else if (current->thread.flags & IA64_THREAD_FPH_VALID) { + * memcpy(p->thread.fph, current->thread.fph, sizeof(p->thread.fph)); + * } + */ + p->thread.flags = (current->thread.flags & ~IA64_THREAD_FPH_VALID); + return 0; +} + +void +ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) +{ + struct switch_stack *sw = ((struct switch_stack *) pt) - 1; + unsigned long ar_ec, cfm, ar_bsp, ndirty, *krbs; + + ar_ec = (sw->ar_pfs >> 52) & 0x3f; + + cfm = pt->cr_ifs & ((1UL << 63) - 1); + if ((pt->cr_ifs & (1UL << 63)) == 0) { + /* if cr_ifs isn't valid, we got here through a syscall or a break */ + cfm = sw->ar_pfs & ((1UL << 38) - 1); + } + + krbs = (unsigned long *) current + IA64_RBS_OFFSET/8; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 16)); + ar_bsp = (long) ia64_rse_skip_regs((long *) pt->ar_bspstore, ndirty); + + /* r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm user-mask + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec + */ + memset(dst, 0, sizeof (dst)); /* don't leak any "random" bits */ + + /* r0 is zero */ dst[ 1] = pt->r1; dst[ 2] = pt->r2; dst[ 3] = pt->r3; + dst[ 4] = sw->r4; dst[ 5] = sw->r5; dst[ 6] = sw->r6; dst[ 7] = sw->r7; + dst[ 8] = pt->r8; dst[ 9] = pt->r9; dst[10] = pt->r10; dst[11] = pt->r11; + dst[12] = pt->r12; dst[13] = pt->r13; dst[14] = pt->r14; dst[15] = pt->r15; + memcpy(dst + 16, &pt->r16, 16*8); /* r16-r31 are contiguous */ + + dst[32] = ia64_get_nat_bits(pt, sw); + dst[33] = pt->pr; + + /* branch regs: */ + dst[34] = pt->b0; dst[35] = sw->b1; dst[36] = sw->b2; dst[37] = sw->b3; + dst[38] = sw->b4; dst[39] = sw->b5; dst[40] = pt->b6; dst[41] = pt->b7; + + dst[42] = pt->cr_iip; dst[43] = pt->cr_ifs; + dst[44] = pt->cr_ipsr; /* XXX perhaps we should filter out some bits here? --davidm */ + + dst[45] = pt->ar_rsc; dst[46] = ar_bsp; dst[47] = pt->ar_bspstore; dst[48] = pt->ar_rnat; + dst[49] = pt->ar_ccv; dst[50] = pt->ar_unat; dst[51] = sw->ar_fpsr; dst[52] = pt->ar_pfs; + dst[53] = sw->ar_lc; dst[54] = (sw->ar_pfs >> 52) & 0x3f; +} + +int +dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) +{ + struct switch_stack *sw = ((struct switch_stack *) pt) - 1; + struct task_struct *fpu_owner = ia64_get_fpu_owner(); + + memset(dst, 0, sizeof (dst)); /* don't leak any "random" bits */ + + /* f0 is 0.0 */ /* f1 is 1.0 */ dst[2] = sw->f2; dst[3] = sw->f3; + dst[4] = sw->f4; dst[5] = sw->f5; dst[6] = pt->f6; dst[7] = pt->f7; + dst[8] = pt->f8; dst[9] = pt->f9; + memcpy(dst + 10, &sw->f10, 22*16); /* f10-f31 are contiguous */ + + if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) { + if (fpu_owner == current) { + __ia64_save_fpu(current->thread.fph); + } + memcpy(dst + 32, current->thread.fph, 96*16); + } + return 1; /* f0-f31 are always valid so we always return 1 */ +} + +asmlinkage long +sys_execve (char *filename, char **argv, char **envp, struct pt_regs *regs) +{ + int error; + + lock_kernel(); + filename = getname(filename); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, argv, envp, regs); + putname(filename); +out: + unlock_kernel(); + return error; +} + +pid_t +kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) +{ + struct task_struct *parent = current; + int result; + + clone(flags | CLONE_VM, 0); + if (parent != current) { + result = (*fn)(arg); + _exit(result); + } + return 0; /* parent: just return */ +} + +/* + * Flush thread state. This is called when a thread does an execve(). + */ +void +flush_thread (void) +{ + /* drop floating-point and debug-register state if it exists: */ + current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); + + if (ia64_get_fpu_owner() == current) { + ia64_set_fpu_owner(0); + } +} + +/* + * Clean up state associated with current thread. This is called when + * the thread calls exit(). + */ +void +exit_thread (void) +{ + if (ia64_get_fpu_owner() == current) { + ia64_set_fpu_owner(0); + } +} + +/* + * Free remaining state associated with DEAD_TASK. This is called + * after the parent of DEAD_TASK has collected the exist status of the + * task via wait(). + */ +void +release_thread (struct task_struct *dead_task) +{ + /* nothing to do */ +} + +unsigned long +get_wchan (struct task_struct *p) +{ + struct ia64_frame_info info; + unsigned long ip; + int count = 0; + /* + * These bracket the sleeping functions.. + */ + extern void scheduling_functions_start_here(void); + extern void scheduling_functions_end_here(void); +# define first_sched ((unsigned long) scheduling_functions_start_here) +# define last_sched ((unsigned long) scheduling_functions_end_here) + + /* + * Note: p may not be a blocked task (it could be current or + * another process running on some other CPU. Rather than + * trying to determine if p is really blocked, we just assume + * it's blocked and rely on the unwind routines to fail + * gracefully if the process wasn't really blocked after all. + * --davidm 99/12/15 + */ + ia64_unwind_init_from_blocked_task(&info, p); + do { + if (ia64_unwind_to_previous_frame(&info) < 0) + return 0; + ip = ia64_unwind_get_ip(&info); + if (ip < first_sched || ip >= last_sched) + return ip; + } while (count++ < 16); + return 0; +# undef first_sched +# undef last_sched +} + +void +machine_restart (char *restart_cmd) +{ + (*efi.reset_system)(EFI_RESET_WARM, 0, 0, 0); +} + +void +machine_halt (void) +{ + printk("machine_halt: need PAL or ACPI version here!!\n"); + machine_restart(0); +} + +void +machine_power_off (void) +{ + printk("machine_power_off: unimplemented (need ACPI version here)\n"); + machine_halt (); +} diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c new file mode 100644 index 000000000..18a8e342e --- /dev/null +++ b/arch/ia64/kernel/ptrace.c @@ -0,0 +1,653 @@ +/* + * Kernel support for the ptrace() and syscall tracing interfaces. + * + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Derived from the x86 and Alpha versions. Most of the code in here + * could actually be factored into a common set of routines. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/smp_lock.h> +#include <linux/user.h> + +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace_offsets.h> +#include <asm/rse.h> +#include <asm/system.h> +#include <asm/uaccess.h> + +/* + * Collect the NaT bits for r1-r31 from sw->caller_unat and + * sw->ar_unat and return a NaT bitset where bit i is set iff the NaT + * bit of register i is set. + */ +long +ia64_get_nat_bits (struct pt_regs *pt, struct switch_stack *sw) +{ +# define GET_BITS(str, first, last, unat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&str->r##first); \ + unsigned long mask = ((1UL << (last - first + 1)) - 1) << first; \ + (ia64_rotl(unat, first) >> bit) & mask; \ + }) + unsigned long val; + + val = GET_BITS(pt, 1, 3, sw->caller_unat); + val |= GET_BITS(pt, 12, 15, sw->caller_unat); + val |= GET_BITS(pt, 8, 11, sw->caller_unat); + val |= GET_BITS(pt, 16, 31, sw->caller_unat); + val |= GET_BITS(sw, 4, 7, sw->ar_unat); + return val; + +# undef GET_BITS +} + +/* + * Store the NaT bitset NAT in pt->caller_unat and sw->ar_unat. + */ +void +ia64_put_nat_bits (struct pt_regs *pt, struct switch_stack *sw, unsigned long nat) +{ +# define PUT_BITS(str, first, last, nat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&str->r##first); \ + unsigned long mask = ((1UL << (last - first + 1)) - 1) << bit; \ + (ia64_rotr(nat, first) << bit) & mask; \ + }) + sw->caller_unat = PUT_BITS(pt, 1, 3, nat); + sw->caller_unat |= PUT_BITS(pt, 12, 15, nat); + sw->caller_unat |= PUT_BITS(pt, 8, 11, nat); + sw->caller_unat |= PUT_BITS(pt, 16, 31, nat); + sw->ar_unat = PUT_BITS(sw, 4, 7, nat); + +# undef PUT_BITS +} + +#define IA64_MLI_TEMPLATE 0x2 +#define IA64_MOVL_OPCODE 6 + +void +ia64_increment_ip (struct pt_regs *regs) +{ + unsigned long w0, w1, ri = ia64_psr(regs)->ri + 1; + + if (ri > 2) { + ri = 0; + regs->cr_iip += 16; + } else if (ri == 2) { + get_user(w0, (char *) regs->cr_iip + 0); + get_user(w1, (char *) regs->cr_iip + 8); + if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) { + /* + * rfi'ing to slot 2 of an MLI bundle causes + * an illegal operation fault. We don't want + * that to happen... Note that we check the + * opcode only. "movl" has a vc bit of 0, but + * since a vc bit of 1 is currently reserved, + * we might just as well treat it like a movl. + */ + ri = 0; + regs->cr_iip += 16; + } + } + ia64_psr(regs)->ri = ri; +} + +void +ia64_decrement_ip (struct pt_regs *regs) +{ + unsigned long w0, w1, ri = ia64_psr(regs)->ri - 1; + + if (ia64_psr(regs)->ri == 0) { + regs->cr_iip -= 16; + ri = 2; + get_user(w0, (char *) regs->cr_iip + 0); + get_user(w1, (char *) regs->cr_iip + 8); + if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) { + /* + * rfi'ing to slot 2 of an MLI bundle causes + * an illegal operation fault. We don't want + * that to happen... Note that we check the + * opcode only. "movl" has a vc bit of 0, but + * since a vc bit of 1 is currently reserved, + * we might just as well treat it like a movl. + */ + ri = 1; + } + } + ia64_psr(regs)->ri = ri; +} + +/* + * This routine is used to read an rnat bits that are stored on the + * kernel backing store. Since, in general, the alignment of the user + * and kernel are different, this is not completely trivial. In + * essence, we need to construct the user RNAT based on up to two + * kernel RNAT values and/or the RNAT value saved in the child's + * pt_regs. + * + * user rbs + * + * +--------+ <-- lowest address + * | slot62 | + * +--------+ + * | rnat | 0x....1f8 + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_regs->ar_rnat + * +--------+ | + * | slot02 | / kernel rbs + * +--------+ +--------+ + * <- child_regs->ar_bspstore | slot61 | <-- krbs + * +- - - - + +--------+ + * | slot62 | + * +- - - - + +--------+ + * | rnat | + * +- - - - + +--------+ + * vrnat | slot00 | + * +- - - - + +--------+ + * = = + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_stack->ar_rnat + * +--------+ | + * | slot02 | / + * +--------+ + * <--- child_stack->ar_bspstore + * + * The way to think of this code is as follows: bit 0 in the user rnat + * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat + * value. The kernel rnat value holding this bit is stored in + * variable rnat0. rnat1 is loaded with the kernel rnat value that + * form the upper bits of the user rnat value. + * + * Boundary cases: + * + * o when reading the rnat "below" the first rnat slot on the kernel + * backing store, rnat0/rnat1 are set to 0 and the low order bits + * are merged in from pt->ar_rnat. + * + * o when reading the rnat "above" the last rnat slot on the kernel + * backing store, rnat0/rnat1 gets its value from sw->ar_rnat. + */ +static unsigned long +get_rnat (struct pt_regs *pt, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr) +{ + unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr, kmask = ~0UL; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs; + + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be merged in from pt->ar_rnat */ + kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1); + urnat = (pt->ar_rnat & ~kmask); + } + if (rnat0_kaddr >= kbsp) { + rnat0 = sw->ar_rnat; + } else if (rnat0_kaddr > krbs) { + rnat0 = *rnat0_kaddr; + } + if (rnat1_kaddr >= kbsp) { + rnat1 = sw->ar_rnat; + } else if (rnat1_kaddr > krbs) { + rnat1 = *rnat1_kaddr; + } + urnat |= ((rnat1 << (63 - shift)) | (rnat0 >> shift)) & kmask; + return urnat; +} + +/* + * The reverse of get_rnat. + */ +static void +put_rnat (struct pt_regs *pt, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat) +{ + unsigned long rnat0 = 0, rnat1 = 0, rnat = 0, *slot0_kaddr, kmask = ~0UL, mask; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs; + + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = (long) ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be place in pt->ar_rnat: */ + kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1); + pt->ar_rnat = (pt->ar_rnat & kmask) | (rnat & ~kmask); + } + /* + * Note: Section 11.1 of the EAS guarantees that bit 63 of an + * rnat slot is ignored. so we don't have to clear it here. + */ + rnat0 = (urnat << shift); + mask = ~0UL << shift; + if (rnat0_kaddr >= kbsp) { + sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat0 & mask); + } else if (rnat0_kaddr > krbs) { + *rnat0_kaddr = ((*rnat0_kaddr & ~mask) | (rnat0 & mask)); + } + + rnat1 = (urnat >> (63 - shift)); + mask = ~0UL >> (63 - shift); + if (rnat1_kaddr >= kbsp) { + sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat1 & mask); + } else if (rnat1_kaddr > krbs) { + *rnat1_kaddr = ((*rnat1_kaddr & ~mask) | (rnat1 & mask)); + } +} + +long +ia64_peek (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long *val) +{ + unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr; + struct switch_stack *child_stack; + struct pt_regs *child_regs; + size_t copied; + long ret; + + laddr = (unsigned long *) addr; + child_regs = ia64_task_regs(child); + child_stack = (struct switch_stack *) child_regs - 1; + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore); + rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs); + if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) { + /* + * Attempt to read the RBS in an area that's actually + * on the kernel RBS => read the corresponding bits in + * the kernel RBS. + */ + if (ia64_rse_is_rnat_slot(laddr)) + ret = get_rnat(child_regs, child_stack, krbs, laddr); + else { + regnum = ia64_rse_num_regs(bspstore, laddr); + laddr = ia64_rse_skip_regs(krbs, regnum); + if (regnum >= krbs_num_regs) { + ret = 0; + } else { + if ((unsigned long) laddr >= (unsigned long) high_memory) { + printk("yikes: trying to access long at %p\n", laddr); + return -EIO; + } + ret = *laddr; + } + } + } else { + copied = access_process_vm(child, addr, &ret, sizeof(ret), 0); + if (copied != sizeof(ret)) + return -EIO; + } + *val = ret; + return 0; +} + +long +ia64_poke (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long val) +{ + unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr; + struct switch_stack *child_stack; + struct pt_regs *child_regs; + + laddr = (unsigned long *) addr; + child_regs = ia64_task_regs(child); + child_stack = (struct switch_stack *) child_regs - 1; + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore); + rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs); + if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) { + /* + * Attempt to write the RBS in an area that's actually + * on the kernel RBS => write the corresponding bits + * in the kernel RBS. + */ + if (ia64_rse_is_rnat_slot(laddr)) + put_rnat(child_regs, child_stack, krbs, laddr, val); + else { + regnum = ia64_rse_num_regs(bspstore, laddr); + laddr = ia64_rse_skip_regs(krbs, regnum); + if (regnum < krbs_num_regs) { + *laddr = val; + } + } + } else if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val)) { + return -EIO; + } + return 0; +} + +/* + * Ensure the state in child->thread.fph is up-to-date. + */ +static void +sync_fph (struct task_struct *child) +{ + if (ia64_psr(ia64_task_regs(child))->mfh && ia64_get_fpu_owner() == child) { + ia64_save_fpu(&child->thread.fph[0]); + child->thread.flags |= IA64_THREAD_FPH_VALID; + } + if (!(child->thread.flags & IA64_THREAD_FPH_VALID)) { + memset(&child->thread.fph, 0, sizeof(child->thread.fph)); + child->thread.flags |= IA64_THREAD_FPH_VALID; + } +} + +asmlinkage long +sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, + long arg4, long arg5, long arg6, long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + struct switch_stack *child_stack; + struct pt_regs *child_regs; + struct task_struct *child; + unsigned long flags, *base; + long ret, regnum; + + lock_kernel(); + ret = -EPERM; + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->flags & PF_PTRACED) + goto out; + current->flags |= PF_PTRACED; + ret = 0; + goto out; + } + + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + read_unlock(&tasklist_lock); + if (!child) + goto out; + ret = -EPERM; + if (pid == 1) /* no messing around with init! */ + goto out; + + if (request == PTRACE_ATTACH) { + if (child == current) + goto out; + if ((!child->dumpable || + (current->uid != child->euid) || + (current->uid != child->suid) || + (current->uid != child->uid) || + (current->gid != child->egid) || + (current->gid != child->sgid) || + (!cap_issubset(child->cap_permitted, current->cap_permitted)) || + (current->gid != child->gid)) && !capable(CAP_SYS_PTRACE)) + goto out; + /* the same process cannot be attached many times */ + if (child->flags & PF_PTRACED) + goto out; + child->flags |= PF_PTRACED; + if (child->p_pptr != current) { + unsigned long flags; + + write_lock_irqsave(&tasklist_lock, flags); + REMOVE_LINKS(child); + child->p_pptr = current; + SET_LINKS(child); + write_unlock_irqrestore(&tasklist_lock, flags); + } + send_sig(SIGSTOP, child, 1); + ret = 0; + goto out; + } + ret = -ESRCH; + if (!(child->flags & PF_PTRACED)) + goto out; + if (child->state != TASK_STOPPED) { + if (request != PTRACE_KILL) + goto out; + } + if (child->p_pptr != current) + goto out; + + switch (request) { + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: /* read word at location addr */ + ret = ia64_peek(regs, child, addr, &data); + if (ret == 0) { + ret = data; + regs->r8 = 0; /* ensure "ret" is not mistaken as an error code */ + } + goto out; + + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: /* write the word at location addr */ + ret = ia64_poke(regs, child, addr, data); + goto out; + + case PTRACE_PEEKUSR: /* read the word at addr in the USER area */ + ret = -EIO; + if ((addr & 0x7) != 0) + goto out; + + if (addr < PT_CALLER_UNAT) { + /* accessing fph */ + sync_fph(child); + addr += (unsigned long) &child->thread.fph; + ret = *(unsigned long *) addr; + } else if (addr < PT_F9+16) { + /* accessing switch_stack or pt_regs: */ + child_regs = ia64_task_regs(child); + child_stack = (struct switch_stack *) child_regs - 1; + ret = *(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT); + + if (addr == PT_AR_BSP) { + /* ret currently contains pt_regs.loadrs */ + unsigned long *rbs, *bspstore, ndirty; + + rbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + bspstore = (unsigned long *) child_regs->ar_bspstore; + ndirty = ia64_rse_num_regs(rbs, rbs + (ret >> 19)); + ret = (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); + } + } else { + if (addr >= PT_IBR) { + regnum = (addr - PT_IBR) >> 3; + base = &child->thread.ibr[0]; + } else { + regnum = (addr - PT_DBR) >> 3; + base = &child->thread.dbr[0]; + } + if (regnum >= 8) + goto out; + data = base[regnum]; + } + regs->r8 = 0; /* ensure "ret" is not mistaken as an error code */ + goto out; + + case PTRACE_POKEUSR: /* write the word at addr in the USER area */ + ret = -EIO; + if ((addr & 0x7) != 0) + goto out; + + if (addr < PT_CALLER_UNAT) { + /* accessing fph */ + sync_fph(child); + addr += (unsigned long) &child->thread.fph; + *(unsigned long *) addr = data; + if (ret < 0) + goto out; + } else if (addr < PT_F9+16) { + /* accessing switch_stack or pt_regs */ + child_regs = ia64_task_regs(child); + child_stack = (struct switch_stack *) child_regs - 1; + + if (addr == PT_AR_BSP) { + /* compute the loadrs value based on bsp and bspstore: */ + unsigned long *rbs, *bspstore, ndirty, *kbsp; + + bspstore = (unsigned long *) child_regs->ar_bspstore; + ndirty = ia64_rse_num_regs(bspstore, (unsigned long *) data); + rbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + kbsp = ia64_rse_skip_regs(rbs, ndirty); + data = (kbsp - rbs) << 19; + } + *(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT) = data; + } else { + if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { + child->thread.flags |= IA64_THREAD_DBG_VALID; + memset(current->thread.dbr, 0, sizeof current->thread.dbr); + memset(current->thread.ibr, 0, sizeof current->thread.ibr); + } + + if (addr >= PT_IBR) { + regnum = (addr - PT_IBR) >> 3; + base = &child->thread.ibr[0]; + } else { + regnum = (addr - PT_DBR) >> 3; + base = &child->thread.dbr[0]; + } + if (regnum >= 8) + goto out; + if (regnum & 1) { + /* force breakpoint to be effective a most for user-level: */ + data &= ~(0x7UL << 56); + } + base[regnum] = data; + } + ret = 0; + goto out; + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: /* restart after signal. */ + ret = -EIO; + if (data > _NSIG) + goto out; + if (request == PTRACE_SYSCALL) + child->flags |= PF_TRACESYS; + else + child->flags &= ~PF_TRACESYS; + child->exit_code = data; + + /* make sure the single step/take-branch tra bits are not set: */ + ia64_psr(ia64_task_regs(child))->ss = 0; + ia64_psr(ia64_task_regs(child))->tb = 0; + + wake_up_process(child); + ret = 0; + goto out; + + case PTRACE_KILL: + /* + * Make the child exit. Best I can do is send it a + * sigkill. Perhaps it should be put in the status + * that it wants to exit. + */ + if (child->state == TASK_ZOMBIE) /* already dead */ + goto out; + child->exit_code = SIGKILL; + + /* make sure the single step/take-branch tra bits are not set: */ + ia64_psr(ia64_task_regs(child))->ss = 0; + ia64_psr(ia64_task_regs(child))->tb = 0; + + wake_up_process(child); + ret = 0; + goto out; + + case PTRACE_SINGLESTEP: /* let child execute for one instruction */ + case PTRACE_SINGLEBLOCK: + ret = -EIO; + if (data > _NSIG) + goto out; + + child->flags &= ~PF_TRACESYS; + if (request == PTRACE_SINGLESTEP) { + ia64_psr(ia64_task_regs(child))->ss = 1; + } else { + ia64_psr(ia64_task_regs(child))->tb = 1; + } + child->exit_code = data; + + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + goto out; + + case PTRACE_DETACH: /* detach a process that was attached. */ + ret = -EIO; + if (data > _NSIG) + goto out; + + child->flags &= ~(PF_PTRACED|PF_TRACESYS); + child->exit_code = data; + write_lock_irqsave(&tasklist_lock, flags); + REMOVE_LINKS(child); + child->p_pptr = child->p_opptr; + SET_LINKS(child); + write_unlock_irqrestore(&tasklist_lock, flags); + + /* make sure the single step/take-branch tra bits are not set: */ + ia64_psr(ia64_task_regs(child))->ss = 0; + ia64_psr(ia64_task_regs(child))->tb = 0; + + wake_up_process(child); + ret = 0; + goto out; + + default: + ret = -EIO; + goto out; + } + out: + unlock_kernel(); + return ret; +} + +void +syscall_trace (void) +{ + if ((current->flags & (PF_PTRACED|PF_TRACESYS)) != (PF_PTRACED|PF_TRACESYS)) + return; + current->exit_code = SIGTRAP; + set_current_state(TASK_STOPPED); + notify_parent(current, SIGCHLD); + schedule(); + /* + * This isn't the same as continuing with a signal, but it + * will do for normal use. strace only continues with a + * signal if the stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c new file mode 100644 index 000000000..8743f6588 --- /dev/null +++ b/arch/ia64/kernel/sal.c @@ -0,0 +1,157 @@ +/* + * System Abstraction Layer (SAL) interface routines. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + */ +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/string.h> + +#include <asm/page.h> +#include <asm/sal.h> +#include <asm/pal.h> + +#define SAL_DEBUG + +spinlock_t sal_lock = SPIN_LOCK_UNLOCKED; + +static struct { + void *addr; /* function entry point */ + void *gpval; /* gp value to use */ +} pdesc; + +static long +default_handler (void) +{ + return -1; +} + +ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler; + +const char * +ia64_sal_strerror (long status) +{ + const char *str; + switch (status) { + case 0: str = "Call completed without error"; break; + case 1: str = "Effect a warm boot of the system to complete " + "the update"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + case -4: str = "Virtual address not registered"; break; + case -5: str = "No information available"; break; + case -6: str = "Insufficient space to add the entry"; break; + case -7: str = "Invalid entry_addr value"; break; + case -8: str = "Invalid interrupt vector"; break; + case -9: str = "Requested memory not available"; break; + case -10: str = "Unable to write to the NVM device"; break; + case -11: str = "Invalid partition type specified"; break; + case -12: str = "Invalid NVM_Object id specified"; break; + case -13: str = "NVM_Object already has the maximum number " + "of partitions"; break; + case -14: str = "Insufficient space in partition for the " + "requested write sub-function"; break; + case -15: str = "Insufficient data buffer space for the " + "requested read record sub-function"; break; + case -16: str = "Scratch buffer required for the write/delete " + "sub-function"; break; + case -17: str = "Insufficient space in the NVM_Object for the " + "requested create sub-function"; break; + case -18: str = "Invalid value specified in the partition_rec " + "argument"; break; + case -19: str = "Record oriented I/O not supported for this " + "partition"; break; + case -20: str = "Bad format of record to be written or " + "required keyword variable not " + "specified"; break; + default: str = "Unknown SAL status code"; break; + } + return str; +} + +static void __init +ia64_sal_handler_init (void *entry_point, void *gpval) +{ + /* fill in the SAL procedure descriptor and point ia64_sal to it: */ + pdesc.addr = entry_point; + pdesc.gpval = gpval; + ia64_sal = (ia64_sal_handler) &pdesc; +} + + +void __init +ia64_sal_init (struct ia64_sal_systab *systab) +{ + unsigned long min, max; + char *p; + struct ia64_sal_desc_entry_point *ep; + int i; + + if (!systab) { + printk("Hmm, no SAL System Table.\n"); + return; + } + + if (strncmp(systab->signature, "SST_", 4) != 0) + printk("bad signature in system table!"); + + printk("SAL v%u.%02u: ia32bios=%s, oem=%.32s, product=%.32s\n", + systab->sal_rev_major, systab->sal_rev_minor, + systab->ia32_bios_present ? "present" : "absent", + systab->oem_id, systab->product_id); + + min = ~0UL; + max = 0; + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type desciptor. + */ + switch (*p) { + case SAL_DESC_ENTRY_POINT: + ep = (struct ia64_sal_desc_entry_point *) p; +#ifdef SAL_DEBUG + printk("sal[%d] - entry: pal_proc=0x%lx, sal_proc=0x%lx\n", + i, ep->pal_proc, ep->sal_proc); +#endif + ia64_pal_handler_init(__va(ep->pal_proc)); + ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp)); + break; + + case SAL_DESC_AP_WAKEUP: +#ifdef CONFIG_SMP + { + struct ia64_sal_desc_ap_wakeup *ap = (void *) p; +# ifdef SAL_DEBUG + printk("sal[%d] - wakeup type %x, 0x%lx\n", + i, ap->mechanism, ap->vector); +# endif + switch (ap->mechanism) { + case IA64_SAL_AP_EXTERNAL_INT: + ap_wakeup_vector = ap->vector; +# ifdef SAL_DEBUG + printk("SAL: AP wakeup using external interrupt; " + "vector 0x%lx\n", ap_wakeup_vector); +# endif + break; + + default: + printk("SAL: AP wakeup mechanism unsupported!\n"); + break; + } + break; + } +#endif + } + p += SAL_DESC_SIZE(*p); + } +} diff --git a/arch/ia64/kernel/sal_stub.S b/arch/ia64/kernel/sal_stub.S new file mode 100644 index 000000000..7ab16bbcd --- /dev/null +++ b/arch/ia64/kernel/sal_stub.S @@ -0,0 +1,116 @@ +/* + * gcc currently does not conform to the ia-64 calling convention as far + * as returning function values are concerned. Instead of returning + * values up to 32 bytes in size in r8-r11, gcc returns any value + * bigger than a doubleword via a structure that's allocated by the + * caller and whose address is passed into the function. Since + * SAL_PROC returns values according to the calling convention, this + * stub takes care of copying r8-r11 to the place where gcc expects + * them. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#ifndef __GCC_MULTIREG_RETVALS__ + .text + .psr abi64 + .psr lsb + .lsb + + .align 16 + .global ia64_sal_stub +ia64_sal_stub: + /* + * Sheesh, the Cygnus backend passes the pointer to a return value structure in + * in0 whereas the HP backend passes it in r8. Don't you hate those little + * differences... + */ +#ifdef GCC_RETVAL_POINTER_IN_R8 + adds r2=-24,sp + adds sp=-48,sp + mov r14=rp + ;; + st8 [r2]=r8,8 // save pointer to return value + addl r3=@ltoff(ia64_sal),gp + ;; + ld8 r3=[r3] + st8 [r2]=gp,8 // save global pointer + ;; + ld8 r3=[r3] // fetch the value of ia64_sal + st8 [r2]=r14 // save return pointer + ;; + ld8 r2=[r3],8 // load function's entry point + ;; + ld8 gp=[r3] // load function's global pointer + ;; + mov b6=r2 + br.call.sptk.few rp=b6 +.ret0: adds r2=24,sp + ;; + ld8 r3=[r2],8 // restore pointer to return value + ;; + ld8 gp=[r2],8 // restore global pointer + st8 [r3]=r8,8 + ;; + ld8 r14=[r2] // restore return pointer + st8 [r3]=r9,8 + ;; + mov rp=r14 + st8 [r3]=r10,8 + ;; + st8 [r3]=r11,8 + adds sp=48,sp + br.sptk.few rp +#else + /* + * On input: + * in0 = pointer to return value structure + * in1 = index of SAL function to call + * in2..inN = remaining args to SAL call + */ + /* + * We allocate one input and eight output register such that the br.call instruction + * will rename in1-in7 to in0-in6---exactly what we want because SAL doesn't want to + * see the pointer to the return value structure. + */ + alloc r15=ar.pfs,1,0,8,0 + + adds r2=-24,sp + adds sp=-48,sp + mov r14=rp + ;; + st8 [r2]=r15,8 // save ar.pfs + addl r3=@ltoff(ia64_sal),gp + ;; + ld8 r3=[r3] // get address of ia64_sal + st8 [r2]=gp,8 // save global pointer + ;; + ld8 r3=[r3] // get value of ia64_sal + st8 [r2]=r14,8 // save return address (rp) + ;; + ld8 r2=[r3],8 // load function's entry point + ;; + ld8 gp=[r3] // load function's global pointer + mov b6=r2 + br.call.sptk.few rp=b6 // make SAL call +.ret0: adds r2=24,sp + ;; + ld8 r15=[r2],8 // restore ar.pfs + ;; + ld8 gp=[r2],8 // restore global pointer + st8 [in0]=r8,8 // store 1. dword of return value + ;; + ld8 r14=[r2] // restore return address (rp) + st8 [in0]=r9,8 // store 2. dword of return value + ;; + mov rp=r14 + st8 [in0]=r10,8 // store 3. dword of return value + ;; + st8 [in0]=r11,8 + adds sp=48,sp // pop stack frame + mov ar.pfs=r15 + br.ret.sptk.few rp +#endif + + .endp ia64_sal_stub +#endif /* __GCC_MULTIREG_RETVALS__ */ diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c new file mode 100644 index 000000000..84581af2e --- /dev/null +++ b/arch/ia64/kernel/semaphore.c @@ -0,0 +1,336 @@ +/* + * IA-64 semaphore implementation (derived from x86 version). + * + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +/* + * Semaphores are implemented using a two-way counter: The "count" + * variable is decremented for each process that tries to aquire the + * semaphore, while the "sleepers" variable is a count of such + * aquires. + * + * Notably, the inline "up()" and "down()" functions can efficiently + * test if they need to do any extra work (up needs to do something + * only if count was negative before the increment operation. + * + * "sleepers" and the contention routine ordering is protected by the + * semaphore spinlock. + * + * Note that these functions are only called when there is contention + * on the lock, and as such all this is the "non-critical" part of the + * whole semaphore business. The critical part is the inline stuff in + * <asm/semaphore.h> where we want to avoid any extra jumps and calls. + */ +#include <linux/sched.h> + +#include <asm/semaphore.h> + +/* + * Logic: + * - Only on a boundary condition do we need to care. When we go + * from a negative count to a non-negative, we wake people up. + * - When we go from a non-negative count to a negative do we + * (a) synchronize with the "sleepers" count and (b) make sure + * that we're on the wakeup list before we synchronize so that + * we cannot lose wakeup events. + */ + +void +__up (struct semaphore *sem) +{ + wake_up(&sem->wait); +} + +static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED; + +void +__down (struct semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; + wake_up(&sem->wait); +} + +int +__down_interruptible (struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers ++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * With signals pending, this turns into + * the trylock failure case - we won't be + * sleeping, and we* can't get the lock as + * it has contention. Just correct the count + * and exit. + */ + if (signal_pending(current)) { + retval = -EINTR; + sem->sleepers = 0; + atomic_add(sleepers, &sem->count); + break; + } + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. The + * "-1" is because we're still hoping to get + * the lock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + tsk->state = TASK_RUNNING; + remove_wait_queue(&sem->wait, &wait); + wake_up(&sem->wait); + return retval; +} + +/* + * Trylock failed - make sure we correct for having decremented the + * count. + */ +int +__down_trylock (struct semaphore *sem) +{ + int sleepers; + + spin_lock_irq(&semaphore_lock); + sleepers = sem->sleepers + 1; + sem->sleepers = 0; + + /* + * Add "everybody else" and us into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers, &sem->count)) + wake_up(&sem->wait); + + spin_unlock_irq(&semaphore_lock); + return 1; +} + +/* + * Helper routines for rw semaphores. These could be optimized some + * more, but since they're off the critical path, I prefer clarity for + * now... + */ + +/* + * This gets called if we failed to acquire the lock, but we're biased + * to acquire the lock by virtue of causing the count to change from 0 + * to -1. Being biased, we sleep and attempt to grab the lock until + * we succeed. When this function returns, we own the lock. + */ +static inline void +down_read_failed_biased (struct rw_semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + add_wait_queue(&sem->wait, &wait); /* put ourselves at the head of the list */ + + for (;;) { + if (sem->read_bias_granted && xchg(&sem->read_bias_granted, 0)) + break; + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!sem->read_bias_granted) + schedule(); + } + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; +} + +/* + * This gets called if we failed to aquire the lock and we are not + * biased to acquire the lock. We undo the decrement that was + * done earlier, go to sleep, and then attempt to re-acquire the + * lock afterwards. + */ +static inline void +down_read_failed (struct rw_semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + /* + * Undo the decrement we did in down_read() and check if we + * need to wake up someone. + */ + __up_read(sem); + + add_wait_queue(&sem->wait, &wait); + while (sem->count < 0) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (sem->count >= 0) + break; + schedule(); + } + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; +} + +/* + * Wait for the lock to become unbiased. Readers are non-exclusive. + */ +void +__down_read_failed (struct rw_semaphore *sem, long count) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + while (1) { + if (count == -1) { + down_read_failed_biased(sem); + return; + } + /* unbiased */ + down_read_failed(sem); + + count = ia64_fetch_and_add(-1, &sem->count); + if (count >= 0) + return; + } +} + +static inline void +down_write_failed_biased (struct rw_semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + /* put ourselves at the end of the list */ + add_wait_queue_exclusive(&sem->write_bias_wait, &wait); + + for (;;) { + if (sem->write_bias_granted && xchg(&sem->write_bias_granted, 0)) + break; + set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE); + if (!sem->write_bias_granted) + schedule(); + } + + remove_wait_queue(&sem->write_bias_wait, &wait); + tsk->state = TASK_RUNNING; + + /* + * If the lock is currently unbiased, awaken the sleepers + * FIXME: this wakes up the readers early in a bit of a + * stampede -> bad! + */ + if (sem->count >= 0) + wake_up(&sem->wait); +} + + +static inline void +down_write_failed (struct rw_semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + __up_write(sem); /* this takes care of granting the lock */ + + add_wait_queue_exclusive(&sem->wait, &wait); + + while (sem->count < 0) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE); + if (sem->count >= 0) + break; /* we must attempt to aquire or bias the lock */ + schedule(); + } + + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; +} + + +/* + * Wait for the lock to become unbiased. Since we're a writer, we'll + * make ourselves exclusive. + */ +void +__down_write_failed (struct rw_semaphore *sem, long count) +{ + long old_count; + + while (1) { + if (count == -RW_LOCK_BIAS) { + down_write_failed_biased(sem); + return; + } + down_write_failed(sem); + + do { + old_count = sem->count; + count = old_count - RW_LOCK_BIAS; + } while (cmpxchg(&sem->count, old_count, count) != old_count); + + if (count == 0) + return; + } +} + +void +__rwsem_wake (struct rw_semaphore *sem, long count) +{ + wait_queue_head_t *wq; + + if (count == 0) { + /* wake a writer */ + if (xchg(&sem->write_bias_granted, 1)) + BUG(); + wq = &sem->write_bias_wait; + } else { + /* wake reader(s) */ + if (xchg(&sem->read_bias_granted, 1)) + BUG(); + wq = &sem->wait; + } + wake_up(wq); /* wake up everyone on the wait queue */ +} diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c new file mode 100644 index 000000000..f3283d535 --- /dev/null +++ b/arch/ia64/kernel/setup.c @@ -0,0 +1,326 @@ +/* + * Architecture-specific setup. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * + * 02/04/00 D.Mosberger some more get_cpuinfo fixes... + * 02/01/00 R.Seth fixed get_cpuinfo for SMP + * 01/07/99 S.Eranian added the support for command line argument + * 06/24/99 W.Drummond added boot_cpu_data. + */ +#include <linux/config.h> +#include <linux/init.h> + +#include <linux/bootmem.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/threads.h> +#include <linux/console.h> + +#include <asm/acpi-ext.h> +#include <asm/page.h> +#include <asm/machvec.h> +#include <asm/processor.h> +#include <asm/sal.h> +#include <asm/system.h> +#include <asm/efi.h> + +extern char _end; + +/* cpu_data[bootstrap_processor] is data for the bootstrap processor: */ +struct cpuinfo_ia64 cpu_data[NR_CPUS]; + +unsigned long ia64_cycles_per_usec; +struct ia64_boot_param ia64_boot_param; +struct screen_info screen_info; +unsigned long cpu_initialized = 0; +/* This tells _start which CPU is booting. */ +int cpu_now_booting = 0; + +#define COMMAND_LINE_SIZE 512 + +char saved_command_line[COMMAND_LINE_SIZE]; /* used in proc filesystem */ + +static int +find_max_pfn (unsigned long start, unsigned long end, void *arg) +{ + unsigned long *max_pfn = arg, pfn; + + pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT; + if (pfn > *max_pfn) + *max_pfn = pfn; + return 0; +} + +static int +free_available_memory (unsigned long start, unsigned long end, void *arg) +{ +# define KERNEL_END ((unsigned long) &_end) +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +# define MAX(a,b) ((a) > (b) ? (a) : (b)) + unsigned long range_start, range_end; + + range_start = MIN(start, KERNEL_START); + range_end = MIN(end, KERNEL_START); + + /* + * XXX This should not be necessary, but the bootmem allocator + * is broken and fails to work correctly when the starting + * address is not properly aligned. + */ + range_start = PAGE_ALIGN(range_start); + + if (range_start < range_end) + free_bootmem(__pa(range_start), range_end - range_start); + + range_start = MAX(start, KERNEL_END); + range_end = MAX(end, KERNEL_END); + + /* + * XXX This should not be necessary, but the bootmem allocator + * is broken and fails to work correctly when the starting + * address is not properly aligned. + */ + range_start = PAGE_ALIGN(range_start); + + if (range_start < range_end) + free_bootmem(__pa(range_start), range_end - range_start); + + return 0; +} + +void __init +setup_arch (char **cmdline_p) +{ + unsigned long max_pfn, bootmap_start, bootmap_size; + + /* + * The secondary bootstrap loader passes us the boot + * parameters at the beginning of the ZERO_PAGE, so let's + * stash away those values before ZERO_PAGE gets cleared out. + */ + memcpy(&ia64_boot_param, (void *) ZERO_PAGE_ADDR, sizeof(ia64_boot_param)); + + efi_init(); + + max_pfn = 0; + efi_memmap_walk(find_max_pfn, &max_pfn); + + /* + * This is wrong, wrong, wrong. Darn it, you'd think if they + * change APIs, they'd do things for the better. Grumble... + */ + bootmap_start = PAGE_ALIGN(__pa(&_end)); + bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn); + + efi_memmap_walk(free_available_memory, 0); + + reserve_bootmem(bootmap_start, bootmap_size); +#if 0 + /* XXX fix me */ + init_mm.start_code = (unsigned long) &_stext; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + code_resource.start = virt_to_bus(&_text); + code_resource.end = virt_to_bus(&_etext) - 1; + data_resource.start = virt_to_bus(&_etext); + data_resource.end = virt_to_bus(&_edata) - 1; +#endif + + /* process SAL system table: */ + ia64_sal_init(efi.sal_systab); + + *cmdline_p = __va(ia64_boot_param.command_line); + strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line)); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */ + + printk("args to kernel: %s\n", *cmdline_p); + +#ifndef CONFIG_SMP + cpu_init(); + identify_cpu(&cpu_data[0]); +#endif + + if (efi.acpi) { + /* Parse the ACPI tables */ + acpi_parse(efi.acpi); + } + +#ifdef CONFIG_IA64_GENERIC + machvec_init(acpi_get_sysname()); +#endif + +#ifdef CONFIG_VT +# if defined(CONFIG_VGA_CONSOLE) + conswitchp = &vga_con; +# elif defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +# endif +#endif + platform_setup(cmdline_p); +} + +/* + * Display cpu info for all cpu's. + */ +int +get_cpuinfo (char *buffer) +{ + char family[32], model[32], features[128], *cp, *p = buffer; + struct cpuinfo_ia64 *c; + unsigned long mask; + + for (c = cpu_data; c < cpu_data + NR_CPUS; ++c) { + if (!(cpu_initialized & (1UL << (c - cpu_data)))) + continue; + + mask = c->features; + + if (c->family == 7) + memcpy(family, "IA-64", 6); + else + sprintf(family, "%u", c->family); + + switch (c->model) { + case 0: strcpy(model, "Itanium"); break; + default: sprintf(model, "%u", c->model); break; + } + + /* build the feature string: */ + memcpy(features, " standard", 10); + cp = features; + if (mask & 1) { + strcpy(cp, " branchlong"); + cp = strchr(cp, '\0'); + mask &= ~1UL; + } + if (mask) + sprintf(cp, " 0x%lx", mask); + + p += sprintf(buffer, + "CPU# %lu\n" + "\tvendor : %s\n" + "\tfamily : %s\n" + "\tmodel : %s\n" + "\trevision : %u\n" + "\tarchrev : %u\n" + "\tfeatures :%s\n" /* don't change this---it _is_ right! */ + "\tcpu number : %lu\n" + "\tcpu regs : %u\n" + "\tcpu MHz : %lu.%06lu\n" + "\titc MHz : %lu.%06lu\n" + "\tBogoMIPS : %lu.%02lu\n\n", + c - cpu_data, c->vendor, family, model, c->revision, c->archrev, + features, + c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000, + c->itc_freq / 1000000, c->itc_freq % 1000000, + loops_per_sec() / 500000, (loops_per_sec() / 5000) % 100); + } + return p - buffer; +} + +void +identify_cpu (struct cpuinfo_ia64 *c) +{ + union { + unsigned long bits[5]; + struct { + /* id 0 & 1: */ + char vendor[16]; + + /* id 2 */ + u64 ppn; /* processor serial number */ + + /* id 3: */ + unsigned number : 8; + unsigned revision : 8; + unsigned model : 8; + unsigned family : 8; + unsigned archrev : 8; + unsigned reserved : 24; + + /* id 4: */ + u64 features; + } field; + } cpuid; + int i; + + for (i = 0; i < 5; ++i) { + cpuid.bits[i] = ia64_get_cpuid(i); + } + +#ifdef CONFIG_SMP + /* + * XXX Instead of copying the ITC info from the bootstrap + * processor, ia64_init_itm() should be done per CPU. That + * should get you the right info. --davidm 1/24/00 + */ + if (c != &cpu_data[bootstrap_processor]) { + memset(c, 0, sizeof(struct cpuinfo_ia64)); + c->proc_freq = cpu_data[bootstrap_processor].proc_freq; + c->itc_freq = cpu_data[bootstrap_processor].itc_freq; + c->cyc_per_usec = cpu_data[bootstrap_processor].cyc_per_usec; + c->usec_per_cyc = cpu_data[bootstrap_processor].usec_per_cyc; + } +#else + memset(c, 0, sizeof(struct cpuinfo_ia64)); +#endif + + memcpy(c->vendor, cpuid.field.vendor, 16); +#ifdef CONFIG_IA64_SOFTSDV_HACKS + /* BUG: SoftSDV doesn't support the cpuid registers. */ + if (c->vendor[0] == '\0') + memcpy(c->vendor, "Intel", 6); +#endif + c->ppn = cpuid.field.ppn; + c->number = cpuid.field.number; + c->revision = cpuid.field.revision; + c->model = cpuid.field.model; + c->family = cpuid.field.family; + c->archrev = cpuid.field.archrev; + c->features = cpuid.field.features; +#ifdef CONFIG_SMP + c->loops_per_sec = loops_per_sec; +#endif +} + +/* + * cpu_init() initializes state that is per-CPU. This function acts + * as a 'CPU state barrier', nothing should get across. + */ +void +cpu_init (void) +{ + int nr = smp_processor_id(); + + /* Clear the stack memory reserved for pt_regs: */ + memset(ia64_task_regs(current), 0, sizeof(struct pt_regs)); + + /* + * Initialize default control register to defer speculative + * faults. On a speculative load, we want to defer access + * right, key miss, and key permission faults. We currently + * do NOT defer TLB misses, page-not-present, access bit, or + * debug faults but kernel code should not rely on any + * particular setting of these bits. + */ + ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP); + ia64_set_fpu_owner(0); /* initialize ar.k5 */ + + if (test_and_set_bit(nr, &cpu_initialized)) { + printk("CPU#%d already initialized!\n", nr); + machine_halt(); + } + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; +} diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c new file mode 100644 index 000000000..19be1f840 --- /dev/null +++ b/arch/ia64/kernel/signal.c @@ -0,0 +1,537 @@ +/* + * Architecture-specific signal handling support. + * + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Derived from i386 and Alpha versions. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/wait.h> + +#include <asm/ia32.h> +#include <asm/uaccess.h> +#include <asm/rse.h> +#include <asm/sigcontext.h> + +#define DEBUG_SIG 0 +#define STACK_ALIGN 16 /* minimal alignment for stack pointer */ +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#if _NSIG_WORDS > 1 +# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t)) +# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t)) +#else +# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0]) +# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) +#endif + +struct sigframe { + struct siginfo info; + struct sigcontext sc; +}; + +extern long sys_wait4 (int, int *, int, struct rusage *); +extern long ia64_do_signal (sigset_t *, struct pt_regs *, long); /* forward decl */ + +long +ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct pt_regs *pt) +{ + sigset_t oldset, set; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (GET_SIGSET(&set, uset)) + return -EFAULT; + + sigdelsetmask(&set, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + { + oldset = current->blocked; + current->blocked = set; + recalc_sigpending(current); + } + spin_unlock_irq(¤t->sigmask_lock); + + /* + * The return below usually returns to the signal handler. We need to + * pre-set the correct error code here to ensure that the right values + * get saved in sigcontext by ia64_do_signal. + */ + pt->r8 = EINTR; + pt->r10 = -1; + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + if (ia64_do_signal(&oldset, pt, 1)) + return -EINTR; + } +} + +asmlinkage long +sys_sigaltstack (const stack_t *uss, stack_t *uoss, long arg2, long arg3, long arg4, + long arg5, long arg6, long arg7, long stack) +{ + struct pt_regs *pt = (struct pt_regs *) &stack; + + return do_sigaltstack(uss, uoss, pt->r12); +} + +static long +restore_sigcontext (struct sigcontext *sc, struct pt_regs *pt) +{ + struct switch_stack *sw = (struct switch_stack *) pt - 1; + unsigned long ip, flags, nat, um; + long err; + + /* restore scratch that always needs gets updated during signal delivery: */ + err = __get_user(flags, &sc->sc_flags); + + err |= __get_user(nat, &sc->sc_nat); + err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */ + err |= __get_user(pt->ar_fpsr, &sc->sc_ar_fpsr); + err |= __get_user(pt->ar_pfs, &sc->sc_ar_pfs); + err |= __get_user(um, &sc->sc_um); /* user mask */ + err |= __get_user(pt->ar_rsc, &sc->sc_ar_rsc); + err |= __get_user(pt->ar_ccv, &sc->sc_ar_ccv); + err |= __get_user(pt->ar_unat, &sc->sc_ar_unat); + err |= __get_user(pt->pr, &sc->sc_pr); /* predicates */ + err |= __get_user(pt->b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __get_user(pt->b6, &sc->sc_br[6]); + err |= __copy_from_user(&pt->r1, &sc->sc_gr[1], 3*8); /* r1-r3 */ + err |= __copy_from_user(&pt->r8, &sc->sc_gr[8], 4*8); /* r8-r11 */ + err |= __copy_from_user(&pt->r12, &sc->sc_gr[12], 4*8); /* r12-r15 */ + err |= __copy_from_user(&pt->r16, &sc->sc_gr[16], 16*8); /* r16-r31 */ + + /* establish new instruction pointer: */ + pt->cr_iip = ip & ~0x3UL; + ia64_psr(pt)->ri = ip & 0x3; + pt->cr_ipsr = (pt->cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM); + + ia64_put_nat_bits (pt, sw, nat); /* restore the original scratch NaT bits */ + + if (flags & IA64_SC_FLAG_FPH_VALID) { + struct task_struct *fpu_owner = ia64_get_fpu_owner(); + + __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); + if (fpu_owner == current) { + __ia64_load_fpu(current->thread.fph); + } + } + return err; +} + +/* + * When we get here, ((struct switch_stack *) pt - 1) is a + * switch_stack frame that has no defined value. Upon return, we + * expect sw->caller_unat to contain the new unat value. The reason + * we use a full switch_stack frame is so everything is symmetric + * with ia64_do_signal(). + */ +long +ia64_rt_sigreturn (struct pt_regs *pt) +{ + extern char ia64_strace_leave_kernel, ia64_leave_kernel; + struct sigcontext *sc; + struct siginfo si; + sigset_t set; + long retval; + + sc = &((struct sigframe *) (pt->r12 + 16))->sc; + + /* + * When we return to the previously executing context, r8 and + * r10 have already been setup the way we want them. Indeed, + * if the signal wasn't delivered while in a system call, we + * must not touch r8 or r10 as otherwise user-level stat could + * be corrupted. + */ + retval = (long) &ia64_leave_kernel | 1; + if ((current->flags & PF_TRACESYS) + && (sc->sc_flags & IA64_SC_FLAG_IN_SYSCALL)) + retval = (long) &ia64_strace_leave_kernel; + + if (!access_ok(VERIFY_READ, sc, sizeof(*sc))) + goto give_sigsegv; + + if (GET_SIGSET(&set, &sc->sc_mask)) + goto give_sigsegv; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(sc, pt)) + goto give_sigsegv; + +#if DEBUG_SIG + printk("SIG return (%s:%d): sp=%lx ip=%lx\n", + current->comm, current->pid, pt->r12, pt->cr_iip); +#endif + /* + * It is more difficult to avoid calling this function than to + * call it and ignore errors. + */ + do_sigaltstack(&sc->sc_stack, 0, pt->r12); + return retval; + + give_sigsegv: + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_pid = current->pid; + si.si_uid = current->uid; + si.si_addr = sc; + force_sig_info(SIGSEGV, &si, current); + return retval; +} + +/* + * This does just the minimum required setup of sigcontext. + * Specifically, it only installs data that is either not knowable at + * the user-level or that gets modified before execution in the + * trampoline starts. Everything else is done at the user-level. + */ +static long +setup_sigcontext (struct sigcontext *sc, sigset_t *mask, struct pt_regs *pt) +{ + struct switch_stack *sw = (struct switch_stack *) pt - 1; + struct task_struct *fpu_owner = ia64_get_fpu_owner(); + unsigned long flags = 0, ifs, nat; + long err; + + ifs = pt->cr_ifs; + + if (on_sig_stack((unsigned long) sc)) + flags |= IA64_SC_FLAG_ONSTACK; + if ((ifs & (1UL << 63)) == 0) { + /* if cr_ifs isn't valid, we got here through a syscall */ + flags |= IA64_SC_FLAG_IN_SYSCALL; + } + if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) { + flags |= IA64_SC_FLAG_FPH_VALID; + if (fpu_owner == current) { + __ia64_save_fpu(current->thread.fph); + } + __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16); + } + + /* + * Note: sw->ar_unat is UNDEFINED unless the process is being + * PTRACED. However, this is OK because the NaT bits of the + * preserved registers (r4-r7) are never being looked at by + * the signal handler (register r4-r7 are used instead). + */ + nat = ia64_get_nat_bits(pt, sw); + + err = __put_user(flags, &sc->sc_flags); + err |= __put_user(nat, &sc->sc_nat); + err |= PUT_SIGSET(mask, &sc->sc_mask); + err |= __put_user(pt->cr_ipsr & IA64_PSR_UM, &sc->sc_um); + err |= __put_user(pt->ar_rsc, &sc->sc_ar_rsc); + err |= __put_user(pt->ar_ccv, &sc->sc_ar_ccv); + err |= __put_user(pt->ar_unat, &sc->sc_ar_unat); /* ar.unat */ + err |= __put_user(pt->ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */ + err |= __put_user(pt->ar_pfs, &sc->sc_ar_pfs); + err |= __put_user(pt->pr, &sc->sc_pr); /* predicates */ + err |= __put_user(pt->b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __put_user(pt->b6, &sc->sc_br[6]); /* b6 */ + err |= __put_user(pt->b7, &sc->sc_br[7]); /* b7 */ + + err |= __copy_to_user(&sc->sc_gr[1], &pt->r1, 3*8); /* r1-r3 */ + err |= __copy_to_user(&sc->sc_gr[8], &pt->r8, 4*8); /* r8-r11 */ + err |= __copy_to_user(&sc->sc_gr[12], &pt->r12, 4*8); /* r12-r15 */ + err |= __copy_to_user(&sc->sc_gr[16], &pt->r16, 16*8); /* r16-r31 */ + + err |= __put_user(pt->cr_iip + ia64_psr(pt)->ri, &sc->sc_ip); + err |= __put_user(pt->r12, &sc->sc_gr[12]); /* r12 */ + return err; +} + +static long +setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *pt) +{ + struct switch_stack *sw = (struct switch_stack *) pt - 1; + extern char ia64_sigtramp[], __start_gate_section[]; + unsigned long tramp_addr, new_rbs = 0; + struct sigframe *frame; + struct siginfo si; + long err; + + frame = (void *) pt->r12; + tramp_addr = GATE_ADDR + (ia64_sigtramp - __start_gate_section); + if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack((unsigned long) frame)) { + new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1); + frame = (void *) ((current->sas_ss_sp + current->sas_ss_size) + & ~(STACK_ALIGN - 1)); + } + frame = (void *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err = __copy_to_user(&frame->info, info, sizeof(siginfo_t)); + + err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp); + err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size); + err |= __put_user(sas_ss_flags(pt->r12), &frame->sc.sc_stack.ss_flags); + err |= setup_sigcontext(&frame->sc, set, pt); + + if (err) + goto give_sigsegv; + + pt->r12 = (unsigned long) frame - 16; /* new stack pointer */ + pt->r2 = sig; /* signal number */ + pt->r3 = (unsigned long) ka->sa.sa_handler; /* addr. of handler's proc. descriptor */ + pt->r15 = new_rbs; + pt->ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */ + pt->cr_iip = tramp_addr; + ia64_psr(pt)->ri = 0; /* start executing in first slot */ + + /* + * Note: this affects only the NaT bits of the scratch regs + * (the ones saved in pt_regs, which is exactly what we want. + * The NaT bits for the preserved regs (r4-r7) are in + * sw->ar_unat iff this process is being PTRACED. + */ + sw->caller_unat = 0; /* ensure NaT bits of at least r2, r3, r12, and r15 are clear */ + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%lx\n", + current->comm, current->pid, sig, pt->r12, pt->cr_iip, pt->r3); +#endif + return 1; + + give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_pid = current->pid; + si.si_uid = current->uid; + si.si_addr = frame; + force_sig_info(SIGSEGV, &si, current); + return 0; +} + +static long +handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, + struct pt_regs *pt) +{ +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(pt)) { + /* send signal to IA-32 process */ + if (!ia32_setup_frame1(sig, ka, info, oldset, pt)) + return 0; + } else +#endif + /* send signal to IA-64 process */ + if (!setup_frame(sig, ka, info, oldset, pt)) + return 0; + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + sigaddset(¤t->blocked, sig); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } + return 1; +} + +/* + * When we get here, `pt' points to struct pt_regs and ((struct + * switch_stack *) pt - 1) points to a switch stack structure. + * HOWEVER, in the normal case, the ONLY value valid in the + * switch_stack is the caller_unat field. The entire switch_stack is + * valid ONLY if current->flags has PF_PTRACED set. + * + * Note that `init' is a special process: it doesn't get signals it + * doesn't want to handle. Thus you cannot kill init even with a + * SIGKILL even by mistake. + * + * Note that we go through the signals twice: once to check the + * signals that the kernel can handle, and then we build all the + * user-level signal handling stack-frames in one go after that. + */ +long +ia64_do_signal (sigset_t *oldset, struct pt_regs *pt, long in_syscall) +{ + struct k_sigaction *ka; + siginfo_t info; + long restart = in_syscall; + + /* + * In the ia64_leave_kernel code path, we want the common case + * to go fast, which is why we may in certain cases get here + * from kernel mode. Just return without doing anything if so. + */ + if (!user_mode(pt)) + return 0; + + if (!oldset) + oldset = ¤t->blocked; + + if (pt->r10 != -1) { + /* + * A system calls has to be restarted only if one of + * the error codes ERESTARTNOHAND, ERESTARTSYS, or + * ERESTARTNOINTR is returned. If r10 isn't -1 then + * r8 doesn't hold an error code and we don't need to + * restart the syscall, so we set in_syscall to zero. + */ + restart = 0; + } + + for (;;) { + unsigned long signr; + + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + + if (!signr) + break; + + if ((current->flags & PF_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; + set_current_state(TASK_STOPPED); + notify_parent(current, SIGCHLD); + schedule(); + signr = current->exit_code; + + /* We're back. Did the debugger cancel the sig? */ + if (!signr) + continue; + current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ + if (signr == SIGSTOP) + continue; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + continue; + } + } + + ka = ¤t->sig->action[signr - 1]; + if (ka->sa.sa_handler == SIG_IGN) { + if (signr != SIGCHLD) + continue; + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + continue; + } + + if (ka->sa.sa_handler == SIG_DFL) { + int exit_code = signr; + + /* Init gets no signals it doesn't want. */ + if (current->pid == 1) + continue; + + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: + continue; + + case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (is_orphaned_pgrp(current->pgrp)) + continue; + /* FALLTHRU */ + + case SIGSTOP: + set_current_state(TASK_STOPPED); + current->exit_code = signr; + if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags + & SA_NOCLDSTOP)) + notify_parent(current, SIGCHLD); + schedule(); + continue; + + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGABRT: case SIGFPE: case SIGSEGV: + case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: + if (do_coredump(signr, pt)) + exit_code |= 0x80; + /* FALLTHRU */ + + default: + lock_kernel(); + sigaddset(¤t->signal, signr); + recalc_sigpending(current); + current->flags |= PF_SIGNALED; + do_exit(exit_code); + /* NOTREACHED */ + } + } + + if (restart) { + switch (pt->r8) { + case ERESTARTSYS: + if ((ka->sa.sa_flags & SA_RESTART) == 0) { + case ERESTARTNOHAND: + pt->r8 = EINTR; + /* note: pt->r10 is already -1 */ + break; + } + case ERESTARTNOINTR: + ia64_decrement_ip(pt); + } + } + + /* Whee! Actually deliver the signal. If the + delivery failed, we need to continue to iterate in + this loop so we can deliver the SIGSEGV... */ + if (handle_signal(signr, ka, &info, oldset, pt)) + return 1; + } + + /* Did we come from a system call? */ + if (restart) { + /* Restart the system call - no handlers present */ + if (pt->r8 == ERESTARTNOHAND || + pt->r8 == ERESTARTSYS || + pt->r8 == ERESTARTNOINTR) { + /* + * Note: the syscall number is in r15 which is + * saved in pt_regs so all we need to do here + * is adjust ip so that the "break" + * instruction gets re-executed. + */ + ia64_decrement_ip(pt); + } + } + return 0; +} diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c new file mode 100644 index 000000000..48a3d68b4 --- /dev/null +++ b/arch/ia64/kernel/smp.c @@ -0,0 +1,777 @@ +/* + * SMP Support + * + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Lots of stuff stolen from arch/alpha/kernel/smp.c + * + * 99/10/05 davidm Update to bring it in sync with new command-line processing scheme. + */ +#define __KERNEL_SYSCALLS__ + +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/kernel_stat.h> +#include <linux/mm.h> + +#include <asm/atomic.h> +#include <asm/bitops.h> +#include <asm/current.h> +#include <asm/delay.h> + +#ifdef CONFIG_KDB +#include <linux/kdb.h> +void smp_kdb_interrupt (struct pt_regs* regs); +void kdb_global(int cpuid); +extern unsigned long smp_kdb_wait; +extern int kdb_new_cpu; +#endif + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/sal.h> +#include <asm/system.h> +#include <asm/unistd.h> + +extern int cpu_idle(void * unused); +extern void _start(void); + +extern int cpu_now_booting; /* Used by head.S to find idle task */ +extern unsigned long cpu_initialized; /* Bitmap of available cpu's */ +extern struct cpuinfo_ia64 cpu_data[NR_CPUS]; /* Duh... */ + +spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; + +#ifdef CONFIG_KDB +unsigned long cpu_online_map = 1; +#endif + +volatile int cpu_number_map[NR_CPUS] = { -1, }; /* SAPIC ID -> Logical ID */ +volatile int __cpu_logical_map[NR_CPUS] = { -1, }; /* logical ID -> SAPIC ID */ +int smp_num_cpus = 1; +int bootstrap_processor = -1; /* SAPIC ID of BSP */ +int smp_threads_ready = 0; /* Set when the idlers are all forked */ +unsigned long ipi_base_addr = IPI_DEFAULT_BASE_ADDR; /* Base addr of IPI table */ +cycles_t cacheflush_time = 0; +unsigned long ap_wakeup_vector = -1; /* External Int to use to wakeup AP's */ +static int max_cpus = -1; /* Command line */ +static unsigned long ipi_op[NR_CPUS]; +struct smp_call_struct { + void (*func) (void *info); + void *info; + long wait; + atomic_t unstarted_count; + atomic_t unfinished_count; +}; +static struct smp_call_struct *smp_call_function_data; + +#ifdef CONFIG_KDB +unsigned long smp_kdb_wait = 0; /* Bitmask of waiters */ +#endif + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC +extern spinlock_t ivr_read_lock; +#endif + +int use_xtp = 0; /* XXX */ + +#define IPI_RESCHEDULE 0 +#define IPI_CALL_FUNC 1 +#define IPI_CPU_STOP 2 +#define IPI_KDB_INTERRUPT 4 + +/* + * Setup routine for controlling SMP activation + * + * Command-line option of "nosmp" or "maxcpus=0" will disable SMP + * activation entirely (the MPS table probe still happens, though). + * + * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer + * greater than 0, limits the maximum number of CPUs activated in + * SMP mode to <NUM>. + */ + +static int __init nosmp(char *str) +{ + max_cpus = 0; + return 1; +} + +__setup("nosmp", nosmp); + +static int __init maxcpus(char *str) +{ + get_option(&str, &max_cpus); + return 1; +} + +__setup("maxcpus=", maxcpus); + +/* + * Yoink this CPU from the runnable list... + */ +void +halt_processor(void) +{ + clear_bit(smp_processor_id(), &cpu_initialized); + max_xtp(); + __cli(); + for (;;) + ; + +} + +void +handle_IPI(int irq, void *dev_id, struct pt_regs *regs) +{ + int this_cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_op[this_cpu]; + unsigned long ops; + + /* Count this now; we may make a call that never returns. */ + cpu_data[this_cpu].ipi_count++; + + mb(); /* Order interrupt and bit testing. */ + while ((ops = xchg(pending_ipis, 0)) != 0) { + mb(); /* Order bit clearing and data access. */ + do { + unsigned long which; + + which = ffz(~ops); + ops &= ~(1 << which); + + switch (which) { + case IPI_RESCHEDULE: + /* + * Reschedule callback. Everything to be done is done by the + * interrupt return path. + */ + break; + + case IPI_CALL_FUNC: + { + struct smp_call_struct *data; + void (*func)(void *info); + void *info; + int wait; + + data = smp_call_function_data; + func = data->func; + info = data->info; + wait = data->wait; + + mb(); + atomic_dec (&data->unstarted_count); + + /* At this point the structure may be gone unless wait is true. */ + (*func)(info); + + /* Notify the sending CPU that the task is done. */ + mb(); + if (wait) + atomic_dec (&data->unfinished_count); + } + break; + + case IPI_CPU_STOP: + halt_processor(); + break; + +#ifdef CONFIG_KDB + case IPI_KDB_INTERRUPT: + smp_kdb_interrupt(regs); + break; +#endif + + default: + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); + break; + } /* Switch */ + } while (ops); + + mb(); /* Order data access and bit testing. */ + } +} + +static inline void +send_IPI(int dest_cpu, unsigned char vector) +{ + unsigned long ipi_addr; + unsigned long ipi_data; +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + unsigned long flags; +#endif + + ipi_data = vector; + ipi_addr = ipi_base_addr | ((dest_cpu << 8) << 4); /* 16-bit SAPIC ID's; assume CPU bus 0 */ + mb(); + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + /* + * Disable IVR reads + */ + spin_lock_irqsave(&ivr_read_lock, flags); + writeq(ipi_data, ipi_addr); + spin_unlock_irqrestore(&ivr_read_lock, flags); +#else + writeq(ipi_data, ipi_addr); +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + +} + +static inline void +send_IPI_single(int dest_cpu, int op) +{ + + if (dest_cpu == -1) + return; + + ipi_op[dest_cpu] |= (1 << op); + send_IPI(dest_cpu, IPI_IRQ); +} + +static inline void +send_IPI_allbutself(int op) +{ + int i; + int cpu_id = 0; + + for (i = 0; i < smp_num_cpus; i++) { + cpu_id = __cpu_logical_map[i]; + if (cpu_id != smp_processor_id()) + send_IPI_single(cpu_id, op); + } +} + +static inline void +send_IPI_all(int op) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) + send_IPI_single(__cpu_logical_map[i], op); +} + +static inline void +send_IPI_self(int op) +{ + send_IPI_single(smp_processor_id(), op); +} + +void +smp_send_reschedule(int cpu) +{ + send_IPI_single(cpu, IPI_RESCHEDULE); +} + +void +smp_send_stop(void) +{ + send_IPI_allbutself(IPI_CPU_STOP); +} + +/* + * Run a function on all other CPUs. + * <func> The function to run. This must be fast and non-blocking. + * <info> An arbitrary pointer to pass to the function. + * <retry> If true, keep retrying until ready. + * <wait> If true, wait until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. + * + * Does not return until remote CPUs are nearly ready to execute <func> + * or are or have executed. + */ + +int +smp_call_function (void (*func) (void *info), void *info, int retry, int wait) +{ + struct smp_call_struct data; + long timeout; + static spinlock_t lock = SPIN_LOCK_UNLOCKED; + + data.func = func; + data.info = info; + data.wait = wait; + atomic_set(&data.unstarted_count, smp_num_cpus - 1); + atomic_set(&data.unfinished_count, smp_num_cpus - 1); + + if (retry) { + while (1) { + if (smp_call_function_data) { + schedule (); /* Give a mate a go */ + continue; + } + spin_lock (&lock); + if (smp_call_function_data) { + spin_unlock (&lock); /* Bad luck */ + continue; + } + /* Mine, all mine! */ + break; + } + } + else { + if (smp_call_function_data) + return -EBUSY; + spin_lock (&lock); + if (smp_call_function_data) { + spin_unlock (&lock); + return -EBUSY; + } + } + + smp_call_function_data = &data; + spin_unlock (&lock); + data.func = func; + data.info = info; + atomic_set (&data.unstarted_count, smp_num_cpus - 1); + data.wait = wait; + if (wait) + atomic_set (&data.unfinished_count, smp_num_cpus - 1); + + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_allbutself(IPI_CALL_FUNC); + + /* Wait for response */ + timeout = jiffies + HZ; + while ( (atomic_read (&data.unstarted_count) > 0) && + time_before (jiffies, timeout) ) + barrier (); + if (atomic_read (&data.unstarted_count) > 0) { + smp_call_function_data = NULL; + return -ETIMEDOUT; + } + if (wait) + while (atomic_read (&data.unfinished_count) > 0) + barrier (); + smp_call_function_data = NULL; + return 0; +} + +/* + * Flush all other CPU's tlb and then mine. Do this with smp_call_function() as we + * want to ensure all TLB's flushed before proceeding. + * + * XXX: Is it OK to use the same ptc.e info on all cpus? + */ +void +smp_flush_tlb_all(void) +{ + smp_call_function((void (*)(void *))__flush_tlb_all, NULL, 1, 1); + __flush_tlb_all(); +} + +/* + * Ideally sets up per-cpu profiling hooks. Doesn't do much now... + */ +static inline void __init +smp_setup_percpu_timer(int cpuid) +{ + cpu_data[cpuid].prof_counter = 1; + cpu_data[cpuid].prof_multiplier = 1; +} + +void +smp_do_timer(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + int user = user_mode(regs); + struct cpuinfo_ia64 *data = &cpu_data[cpu]; + + extern void update_one_process(struct task_struct *, unsigned long, unsigned long, + unsigned long, int); + if (!--data->prof_counter) { + irq_enter(cpu, TIMER_IRQ); + + update_one_process(current, 1, user, !user, cpu); + if (current->pid) { + if (--current->counter < 0) { + current->counter = 0; + current->need_resched = 1; + } + + if (user) { + if (current->priority < DEF_PRIORITY) { + kstat.cpu_nice++; + kstat.per_cpu_nice[cpu]++; + } else { + kstat.cpu_user++; + kstat.per_cpu_user[cpu]++; + } + } else { + kstat.cpu_system++; + kstat.per_cpu_system[cpu]++; + } + } + + data->prof_counter = data->prof_multiplier; + irq_exit(cpu, TIMER_IRQ); + } +} + + +/* + * Called by both boot and secondaries to move global data into + * per-processor storage. + */ +static inline void __init +smp_store_cpu_info(int cpuid) +{ + struct cpuinfo_ia64 *c = &cpu_data[cpuid]; + + identify_cpu(c); +} + +/* + * SAL shoves the AP's here when we start them. Physical mode, no kernel TR, + * no RRs set, better than even chance that psr is bogus. Fix all that and + * call _start. In effect, pretend to be lilo. + * + * Stolen from lilo_start.c. Thanks David! + */ +void +start_ap(void) +{ + unsigned long flags; + + /* + * Install a translation register that identity maps the + * kernel's 256MB page(s). + */ + ia64_clear_ic(flags); + ia64_set_rr( 0, (0x1000 << 8) | (_PAGE_SIZE_1M << 2)); + ia64_set_rr(PAGE_OFFSET, (ia64_rid(0, PAGE_OFFSET) << 8) | (_PAGE_SIZE_256M << 2)); + ia64_itr(0x3, 1, PAGE_OFFSET, + pte_val(mk_pte_phys(0, __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX))), + _PAGE_SIZE_256M); + + flags = (IA64_PSR_IT | IA64_PSR_IC | IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH | + IA64_PSR_BN); + + asm volatile ("movl r8 = 1f\n" + ";;\n" + "mov cr.ipsr=%0\n" + "mov cr.iip=r8\n" + "mov cr.ifs=r0\n" + ";;\n" + "rfi;;" + "1:\n" + "movl r1 = __gp" :: "r"(flags) : "r8"); + _start(); +} + + +/* + * AP's start using C here. + */ +void __init +smp_callin(void) +{ + extern void ia64_rid_init(void); + extern void ia64_init_itm(void); + extern void ia64_cpu_local_tick(void); + + ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP); + ia64_set_fpu_owner(0); + ia64_rid_init(); /* initialize region ids */ + + cpu_init(); + __flush_tlb_all(); + + smp_store_cpu_info(smp_processor_id()); + smp_setup_percpu_timer(smp_processor_id()); + + while (!smp_threads_ready) + mb(); + + normal_xtp(); + + /* setup the CPU local timer tick */ + ia64_cpu_local_tick(); + + /* Disable all local interrupts */ + ia64_set_lrr0(0, 1); + ia64_set_lrr1(0, 1); + + __sti(); /* Interrupts have been off till now. */ + cpu_idle(NULL); +} + +/* + * Create the idle task for a new AP. DO NOT use kernel_thread() because + * that could end up calling schedule() in the ia64_leave_kernel exit + * path in which case the new idle task could get scheduled before we + * had a chance to remove it from the run-queue... + */ +static int __init +fork_by_hand(void) +{ + /* + * Don't care about the usp and regs settings since we'll never + * reschedule the forked task. + */ + return do_fork(CLONE_VM|CLONE_PID, 0, 0); +} + +/* + * Bring one cpu online. + * + * NB: cpuid is the CPU BUS-LOCAL ID, not the entire SAPIC ID. See asm/smp.h. + */ +static int __init +smp_boot_one_cpu(int cpuid, int cpunum) +{ + struct task_struct *idle; + long timeout; + + /* + * Create an idle task for this CPU. Note that the address we + * give to kernel_thread is irrelevant -- it's going to start + * where OS_BOOT_RENDEVZ vector in SAL says to start. But + * this gets all the other task-y sort of data structures set + * up like we wish. We need to pull the just created idle task + * off the run queue and stuff it into the init_tasks[] array. + * Sheesh . . . + */ + if (fork_by_hand() < 0) + panic("failed fork for CPU %d", cpuid); + /* + * We remove it from the pidhash and the runqueue + * once we got the process: + */ + idle = init_task.prev_task; + if (!idle) + panic("No idle process for CPU %d", cpuid); + init_tasks[cpunum] = idle; + del_from_runqueue(idle); + unhash_process(idle); + + /* Schedule the first task manually. */ + idle->processor = cpuid; + idle->has_cpu = 1; + + /* Let _start know what logical CPU we're booting (offset into init_tasks[] */ + cpu_now_booting = cpunum; + + /* Kick the AP in the butt */ + send_IPI(cpuid, ap_wakeup_vector); + ia64_srlz_i(); + mb(); + + /* + * OK, wait a bit for that CPU to finish staggering about. smp_callin() will + * call cpu_init() which will set a bit for this AP. When that bit flips, the AP + * is waiting for smp_threads_ready to be 1 and we can move on. + */ + for (timeout = 0; timeout < 100000; timeout++) { + if (test_bit(cpuid, &cpu_initialized)) + goto alive; + udelay(10); + barrier(); + } + + printk(KERN_ERR "SMP: Processor %d is stuck.\n", cpuid); + return -1; + +alive: + /* Remember the AP data */ + cpu_number_map[cpuid] = cpunum; +#ifdef CONFIG_KDB + cpu_online_map |= (1<<cpunum); + printk ("DEBUGGER: cpu_online_map = 0x%08x\n", cpu_online_map); +#endif + __cpu_logical_map[cpunum] = cpuid; + return 0; +} + + + +/* + * Called by smp_init bring all the secondaries online and hold them. + * XXX: this is ACPI specific; it uses "magic" variables exported from acpi.c + * to 'discover' the AP's. Blech. + */ +void __init +smp_boot_cpus(void) +{ + int i, cpu_count = 1; + unsigned long bogosum; + int sapic_id; + extern int acpi_cpus; + extern int acpi_apic_map[32]; + + /* Take care of some initial bookkeeping. */ + memset(&cpu_number_map, -1, sizeof(cpu_number_map)); + memset(&__cpu_logical_map, -1, sizeof(__cpu_logical_map)); + memset(&ipi_op, 0, sizeof(ipi_op)); + + /* Setup BSP mappings */ + cpu_number_map[bootstrap_processor] = 0; + __cpu_logical_map[0] = bootstrap_processor; + current->processor = bootstrap_processor; + + /* Mark BSP booted and get active_mm context */ + cpu_init(); + + /* reset XTP for interrupt routing */ + normal_xtp(); + + /* And generate an entry in cpu_data */ + smp_store_cpu_info(bootstrap_processor); +#if 0 + smp_tune_scheduling(); +#endif + smp_setup_percpu_timer(bootstrap_processor); + + init_idle(); + + /* Nothing to do when told not to. */ + if (max_cpus == 0) { + printk(KERN_INFO "SMP mode deactivated.\n"); + return; + } + + if (acpi_cpus > 1) { + printk(KERN_INFO "SMP: starting up secondaries.\n"); + + for (i = 0; i < NR_CPUS; i++) { + if (acpi_apic_map[i] == -1 || + acpi_apic_map[i] == bootstrap_processor << 8) /* XXX Fix me Walt */ + continue; + + /* + * IA64 SAPIC ID's are 16-bits. See asm/smp.h for more info + */ + sapic_id = acpi_apic_map[i] >> 8; + if (smp_boot_one_cpu(sapic_id, cpu_count)) + continue; + + cpu_count++; /* Count good CPUs only... */ + } + } + + if (cpu_count == 1) { + printk(KERN_ERR "SMP: Bootstrap processor only.\n"); + return; + } + + bogosum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (cpu_initialized & (1L << i)) + bogosum += cpu_data[i].loops_per_sec; + } + + printk(KERN_INFO "SMP: Total of %d processors activated " + "(%lu.%02lu BogoMIPS).\n", + cpu_count, (bogosum + 2500) / 500000, + ((bogosum + 2500) / 5000) % 100); + + smp_num_cpus = cpu_count; +} + +/* + * Called from main.c by each AP. + */ +void __init +smp_commence(void) +{ + mb(); +} + +/* + * Not used; part of the i386 bringup + */ +void __init +initialize_secondary(void) +{ +} + +int __init +setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +/* + * Assume that CPU's have been discovered by some platform-dependant + * interface. For SoftSDV/Lion, that would be ACPI. + * + * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). + * + * So this just gets the BSP SAPIC ID and print's it out. Dull, huh? + * + * Not anymore. This also registers the AP OS_MC_REDVEZ address with SAL. + */ +void __init +init_smp_config(void) +{ + struct fptr { + unsigned long fp; + unsigned long gp; + } *ap_startup; + long sal_ret; + + /* Grab the BSP ID */ + bootstrap_processor = hard_smp_processor_id(); + + /* Tell SAL where to drop the AP's. */ + ap_startup = (struct fptr *) start_ap; + sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, + __pa(ap_startup->fp), __pa(ap_startup->gp), 0, + 0, 0, 0); + if (sal_ret < 0) { + printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n", ia64_sal_strerror(sal_ret)); + printk(" Forcing UP mode\n"); + smp_num_cpus = 1; + } + +} + +#ifdef CONFIG_KDB +void smp_kdb_stop (int all, struct pt_regs* regs) +{ + if (all) + { + printk ("Sending IPI to all on CPU %i\n", smp_processor_id ()); + smp_kdb_wait = 0xffffffff; + clear_bit (smp_processor_id(), &smp_kdb_wait); + send_IPI_allbutself (IPI_KDB_INTERRUPT); + } + else + { + printk ("Sending IPI to self on CPU %i\n", + smp_processor_id ()); + set_bit (smp_processor_id(), &smp_kdb_wait); + clear_bit (__cpu_logical_map[kdb_new_cpu], &smp_kdb_wait); + smp_kdb_interrupt (regs); + } +} + +void smp_kdb_interrupt (struct pt_regs* regs) +{ + printk ("kdb: IPI on CPU %i with mask 0x%08x\n", + smp_processor_id (), smp_kdb_wait); + + /* All CPUs spin here forever */ + while (test_bit (smp_processor_id(), &smp_kdb_wait)); + + /* Enter KDB on CPU selected by KDB on the last CPU */ + if (__cpu_logical_map[kdb_new_cpu] == smp_processor_id ()) + { + kdb (KDB_REASON_SWITCH, 0, regs); + } +} + +#endif + diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c new file mode 100644 index 000000000..18a498a09 --- /dev/null +++ b/arch/ia64/kernel/sys_ia64.c @@ -0,0 +1,216 @@ +/* + * This file contains various system calls that have different calling + * conventions on different platforms. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/sched.h> +#include <linux/file.h> /* doh, must come after sched.h... */ +#include <linux/smp.h> +#include <linux/smp_lock.h> + +asmlinkage long +ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6, + long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + extern long sys_getpriority (int, int); + long prio; + + prio = sys_getpriority(which, who); + if (prio >= 0) { + regs->r8 = 0; /* ensure negative priority is not mistaken as error code */ + prio = 20 - prio; + } + return prio; +} + +asmlinkage unsigned long +sys_getpagesize (void) +{ + return PAGE_SIZE; +} + +asmlinkage unsigned long +ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg5, long arg6, + long arg7, long stack) +{ + extern int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr); + struct pt_regs *regs = (struct pt_regs *) &stack; + unsigned long raddr; + int retval; + + retval = sys_shmat(shmid, shmaddr, shmflg, &raddr); + if (retval < 0) + return retval; + + regs->r8 = 0; /* ensure negative addresses are not mistaken as an error code */ + return raddr; +} + +asmlinkage unsigned long +ia64_brk (long brk, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, long stack) +{ + extern unsigned long sys_brk (unsigned long brk); + struct pt_regs *regs = (struct pt_regs *) &stack; + unsigned long retval; + + retval = sys_brk(brk); + + regs->r8 = 0; /* ensure large retval isn't mistaken as error code */ + return retval; +} + +/* + * On IA-64, we return the two file descriptors in ret0 and ret1 (r8 + * and r9) as this is faster than doing a copy_to_user(). + */ +asmlinkage long +sys_pipe (long arg0, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + int fd[2]; + int retval; + + lock_kernel(); + retval = do_pipe(fd); + if (retval) + goto out; + retval = fd[0]; + regs->r9 = fd[1]; + out: + unlock_kernel(); + return retval; +} + +static inline unsigned long +do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff) +{ + struct file *file = 0; + + /* + * A zero mmap always succeeds in Linux, independent of + * whether or not the remaining arguments are valid. + */ + if (PAGE_ALIGN(len) == 0) + return addr; + +#ifdef notyet + /* Don't permit mappings that would cross a region boundary: */ + region_start = IA64_GET_REGION(addr); + region_end = IA64_GET_REGION(addr + len); + if (region_start != region_end) + return -EINVAL; + + <<x??x>> +#endif + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + return -EBADF; + } + + down(¤t->mm->mmap_sem); + lock_kernel(); + + addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + + unlock_kernel(); + up(¤t->mm->mmap_sem); + + if (file) + fput(file); + return addr; +} + +/* + * mmap2() is like mmap() except that the offset is expressed in units + * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces + * of) files that are larger than the address space of the CPU. + */ +asmlinkage unsigned long +sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff, + long arg6, long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + + addr = do_mmap2(addr, len, prot, flags, fd, pgoff); + if (!IS_ERR(addr)) + regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */ + return addr; +} + +asmlinkage unsigned long +sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, + int fd, long off, long arg6, long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + + addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + if (!IS_ERR(addr)) + regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */ + return addr; +} + +asmlinkage long +sys_ioperm (unsigned long from, unsigned long num, int on) +{ + printk(KERN_ERR "sys_ioperm(from=%lx, num=%lx, on=%d)\n", from, num, on); + return -EIO; +} + +asmlinkage long +sys_iopl (int level, long arg1, long arg2, long arg3) +{ + lock_kernel(); + printk(KERN_ERR "sys_iopl(level=%d)!\n", level); + unlock_kernel(); + return -ENOSYS; +} + +asmlinkage long +sys_vm86 (long arg0, long arg1, long arg2, long arg3) +{ + lock_kernel(); + printk(KERN_ERR "sys_vm86(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3); + unlock_kernel(); + return -ENOSYS; +} + +asmlinkage long +sys_modify_ldt (long arg0, long arg1, long arg2, long arg3) +{ + lock_kernel(); + printk(KERN_ERR "sys_modify_ldt(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3); + unlock_kernel(); + return -ENOSYS; +} + +#ifndef CONFIG_PCI + +asmlinkage long +sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, + void *buf) +{ + return -ENOSYS; +} + +asmlinkage long +sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, + void *buf) +{ + return -ENOSYS; +} + + +#endif /* CONFIG_PCI */ diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c new file mode 100644 index 000000000..7c5ace740 --- /dev/null +++ b/arch/ia64/kernel/time.c @@ -0,0 +1,290 @@ +/* + * linux/arch/ia64/kernel/time.c + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1999-2000 David Mosberger <davidm@hpl.hp.com> + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> + * Copyright (C) 1999-2000 VA Linux Systems + * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com> + */ +#include <linux/config.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/time.h> + +#include <asm/delay.h> +#include <asm/efi.h> +#include <asm/irq.h> +#include <asm/machvec.h> +#include <asm/ptrace.h> +#include <asm/sal.h> +#include <asm/system.h> + +extern rwlock_t xtime_lock; +extern volatile unsigned long lost_ticks; + +#ifdef CONFIG_IA64_DEBUG_IRQ + +unsigned long last_cli_ip; + +#endif + +static struct { + unsigned long delta; + unsigned long next[NR_CPUS]; +} itm; + +static void +do_profile (unsigned long ip) +{ + extern char _stext; + + if (prof_buffer && current->pid) { + ip -= (unsigned long) &_stext; + ip >>= prof_shift; + /* + * Don't ignore out-of-bounds IP values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (ip > prof_len - 1) + ip = prof_len - 1; + + atomic_inc((atomic_t *) &prof_buffer[ip]); + } +} + +/* + * Return the number of micro-seconds that elapsed since the last + * update to jiffy. The xtime_lock must be at least read-locked when + * calling this routine. + */ +static inline unsigned long +gettimeoffset (void) +{ + unsigned long now = ia64_get_itc(); + unsigned long elapsed_cycles, lost; + + elapsed_cycles = now - (itm.next[smp_processor_id()] - itm.delta); + + lost = lost_ticks; + if (lost) + elapsed_cycles += lost*itm.delta; + + return (elapsed_cycles*my_cpu_data.usec_per_cyc) >> IA64_USEC_PER_CYC_SHIFT; +} + +void +do_settimeofday (struct timeval *tv) +{ + write_lock_irq(&xtime_lock); + { + /* + * This is revolting. We need to set the xtime.tv_usec + * correctly. However, the value in this location is + * is value at the last tick. Discover what + * correction gettimeofday would have done, and then + * undo it! + */ + tv->tv_usec -= gettimeoffset(); + while (tv->tv_usec < 0) { + tv->tv_usec += 1000000; + tv->tv_sec--; + } + + xtime = *tv; + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + } + write_unlock_irq(&xtime_lock); +} + +void +do_gettimeofday (struct timeval *tv) +{ + unsigned long flags, usec, sec; + + read_lock_irqsave(&xtime_lock, flags); + { + usec = gettimeoffset(); + + sec = xtime.tv_sec; + usec += xtime.tv_usec; + } + read_unlock_irqrestore(&xtime_lock, flags); + + while (usec >= 1000000) { + usec -= 1000000; + ++sec; + } + + tv->tv_sec = sec; + tv->tv_usec = usec; +} + +static void +timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + static unsigned long last_time; + static unsigned char count; + int cpu = smp_processor_id(); + + /* + * Here we are in the timer irq handler. We have irqs locally + * disabled, but we don't know if the timer_bh is running on + * another CPU. We need to avoid to SMP race by acquiring the + * xtime_lock. + */ + write_lock(&xtime_lock); + while (1) { + /* do kernel PC profiling here. */ + if (!user_mode(regs)) + do_profile(regs->cr_iip); + +#ifdef CONFIG_SMP + smp_do_timer(regs); + if (smp_processor_id() == bootstrap_processor) + do_timer(regs); +#else + do_timer(regs); +#endif + + itm.next[cpu] += itm.delta; + /* + * There is a race condition here: to be on the "safe" + * side, we process timer ticks until itm.next is + * ahead of the itc by at least half the timer + * interval. This should give us enough time to set + * the new itm value without losing a timer tick. + */ + if (time_after(itm.next[cpu], ia64_get_itc() + itm.delta/2)) { + ia64_set_itm(itm.next[cpu]); + break; + } + +#if !(defined(CONFIG_IA64_SOFTSDV_HACKS) && defined(CONFIG_SMP)) + /* + * SoftSDV in SMP mode is _slow_, so we do "loose" ticks, + * but it's really OK... + */ + if (count > 0 && jiffies - last_time > 5*HZ) + count = 0; + if (count++ == 0) { + last_time = jiffies; + printk("Lost clock tick on CPU %d (now=%lx, next=%lx)!!\n", + cpu, ia64_get_itc(), itm.next[cpu]); +# ifdef CONFIG_IA64_DEBUG_IRQ + printk("last_cli_ip=%lx\n", last_cli_ip); +# endif + } +#endif + } + write_unlock(&xtime_lock); +} + +/* + * Encapsulate access to the itm structure for SMP. + */ +void __init +ia64_cpu_local_tick(void) +{ + /* arrange for the cycle counter to generate a timer interrupt: */ + ia64_set_itv(TIMER_IRQ, 0); + ia64_set_itc(0); + itm.next[smp_processor_id()] = ia64_get_itc() + itm.delta; + ia64_set_itm(itm.next[smp_processor_id()]); +} + +void __init +ia64_init_itm (void) +{ + unsigned long platform_base_freq, itc_freq, drift; + struct pal_freq_ratio itc_ratio, proc_ratio; + long status; + + /* + * According to SAL v2.6, we need to use a SAL call to determine the + * platform base frequency and then a PAL call to determine the + * frequency ratio between the ITC and the base frequency. + */ + status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, &platform_base_freq, &drift); + if (status != 0) { + printk("SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status)); + } else { + status = ia64_pal_freq_ratios(&proc_ratio, 0, &itc_ratio); + if (status != 0) + printk("PAL_FREQ_RATIOS failed with status=%ld\n", status); + } + if (status != 0) { + /* invent "random" values */ + printk("SAL/PAL failed to obtain frequency info---inventing reasonably values\n"); + platform_base_freq = 100000000; + itc_ratio.num = 3; + itc_ratio.den = 1; + } +#if defined(CONFIG_IA64_LION_HACKS) + /* Our Lion currently returns base freq 104.857MHz, which + ain't right (it really is 100MHz). */ + printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n", + platform_base_freq, itc_ratio.num, itc_ratio.den, + proc_ratio.num, proc_ratio.den); + platform_base_freq = 100000000; +#elif 0 && defined(CONFIG_IA64_BIGSUR_HACKS) + /* BigSur with 991020 firmware returned itc-ratio=9/2 and base + freq 75MHz, which wasn't right. The 991119 firmware seems + to return the right values, so this isn't necessary + anymore... */ + printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n", + platform_base_freq, itc_ratio.num, itc_ratio.den, + proc_ratio.num, proc_ratio.den); + platform_base_freq = 100000000; + proc_ratio.num = 5; proc_ratio.den = 1; + itc_ratio.num = 5; itc_ratio.den = 1; +#elif defined(CONFIG_IA64_SOFTSDV_HACKS) + platform_base_freq = 10000000; + proc_ratio.num = 4; proc_ratio.den = 1; + itc_ratio.num = 4; itc_ratio.den = 1; +#else + if (platform_base_freq < 40000000) { + printk("Platform base frequency %lu bogus---resetting to 75MHz!\n", + platform_base_freq); + platform_base_freq = 75000000; + } +#endif + if (!proc_ratio.den) + proc_ratio.num = 1; /* avoid division by zero */ + if (!itc_ratio.den) + itc_ratio.num = 1; /* avoid division by zero */ + + itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; + itm.delta = itc_freq / HZ; + printk("timer: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, ITC freq=%lu.%03luMHz\n", + platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, + itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000); + + my_cpu_data.proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den; + my_cpu_data.itc_freq = itc_freq; + my_cpu_data.cyc_per_usec = itc_freq / 1000000; + my_cpu_data.usec_per_cyc = (1000000UL << IA64_USEC_PER_CYC_SHIFT) / itc_freq; + + /* Setup the CPU local timer tick */ + ia64_cpu_local_tick(); +} + +void __init +time_init (void) +{ + /* + * Request the IRQ _before_ doing anything to cause that + * interrupt to be posted. + */ + if (request_irq(TIMER_IRQ, timer_interrupt, 0, "timer", NULL)) + panic("Could not allocate timer IRQ!"); + + efi_gettimeofday(&xtime); + ia64_init_itm(); +} diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c new file mode 100644 index 000000000..c242622ec --- /dev/null +++ b/arch/ia64/kernel/traps.c @@ -0,0 +1,423 @@ +/* + * Architecture-specific trap handling. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +/* + * The fpu_fault() handler needs to be able to access and update all + * floating point registers. Those saved in pt_regs can be accessed + * through that structure, but those not saved, will be accessed + * directly. To make this work, we need to ensure that the compiler + * does not end up using a preserved floating point register on its + * own. The following achieves this by declaring preserved registers + * that are not marked as "fixed" as global register variables. + */ +register double f2 asm ("f2"); register double f3 asm ("f3"); +register double f4 asm ("f4"); register double f5 asm ("f5"); + +register long f16 asm ("f16"); register long f17 asm ("f17"); +register long f18 asm ("f18"); register long f19 asm ("f19"); +register long f20 asm ("f20"); register long f21 asm ("f21"); +register long f22 asm ("f22"); register long f23 asm ("f23"); + +register double f24 asm ("f24"); register double f25 asm ("f25"); +register double f26 asm ("f26"); register double f27 asm ("f27"); +register double f28 asm ("f28"); register double f29 asm ("f29"); +register double f30 asm ("f30"); register double f31 asm ("f31"); + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> + +#ifdef CONFIG_KDB +# include <linux/kdb.h> +#endif + +#include <asm/processor.h> +#include <asm/uaccess.h> + +#include <asm/fpswa.h> + +static fpswa_interface_t *fpswa_interface; + +void __init +trap_init (void) +{ + printk("fpswa interface at %lx\n", ia64_boot_param.fpswa); + if (ia64_boot_param.fpswa) { +#define OLD_FIRMWARE +#ifdef OLD_FIRMWARE + /* + * HACK to work around broken firmware. This code + * applies the label fixup to the FPSWA interface and + * works both with old and new (fixed) firmware. + */ + unsigned long addr = (unsigned long) __va(ia64_boot_param.fpswa); + unsigned long gp_val = *(unsigned long *)(addr + 8); + + /* go indirect and indexed to get table address */ + addr = gp_val; + gp_val = *(unsigned long *)(addr + 8); + + while (gp_val == *(unsigned long *)(addr + 8)) { + *(unsigned long *)addr |= PAGE_OFFSET; + *(unsigned long *)(addr + 8) |= PAGE_OFFSET; + addr += 16; + } +#endif + /* FPSWA fixup: make the interface pointer a kernel virtual address: */ + fpswa_interface = __va(ia64_boot_param.fpswa); + } +} + +void +die_if_kernel (char *str, struct pt_regs *regs, long err) +{ + if (user_mode(regs)) { +#if 1 + /* XXX for debugging only */ + printk ("!!die_if_kernel: %s(%d): %s %ld\n", + current->comm, current->pid, str, err); + show_regs(regs); +#endif + return; + } + + printk("%s[%d]: %s %ld\n", current->comm, current->pid, str, err); + +#ifdef CONFIG_KDB + while (1) { + kdb(KDB_REASON_PANIC, 0, regs); + printk("Cant go anywhere from Panic!\n"); + } +#endif + + show_regs(regs); + + if (current->thread.flags & IA64_KERNEL_DEATH) { + printk("die_if_kernel recursion detected.\n"); + sti(); + while (1); + } + current->thread.flags |= IA64_KERNEL_DEATH; + do_exit(SIGSEGV); +} + +void +ia64_bad_break (unsigned long break_num, struct pt_regs *regs) +{ + siginfo_t siginfo; + + /* gdb uses a break number of 0xccccc for debug breakpoints: */ + if (break_num != 0xccccc) + die_if_kernel("Bad break", regs, break_num); + + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = break_num; /* XXX is it legal to abuse si_errno like this? */ + siginfo.si_code = TRAP_BRKPT; + send_sig_info(SIGTRAP, &siginfo, current); +} + +/* + * Unimplemented system calls. This is called only for stuff that + * we're supposed to implement but haven't done so yet. Everything + * else goes to sys_ni_syscall. + */ +asmlinkage long +ia64_ni_syscall (unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5, unsigned long arg6, unsigned long arg7, + unsigned long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + + printk("<sc%ld(%lx,%lx,%lx,%lx)>\n", regs->r15, arg0, arg1, arg2, arg3); + return -ENOSYS; +} + +/* + * disabled_fp_fault() is called when a user-level process attempts to + * access one of the registers f32..f127 while it doesn't own the + * fp-high register partition. When this happens, we save the current + * fph partition in the task_struct of the fpu-owner (if necessary) + * and then load the fp-high partition of the current task (if + * necessary). + */ +static inline void +disabled_fph_fault (struct pt_regs *regs) +{ + struct task_struct *fpu_owner = ia64_get_fpu_owner(); + + regs->cr_ipsr &= ~(IA64_PSR_DFH | IA64_PSR_MFH); + if (fpu_owner != current) { + ia64_set_fpu_owner(current); + + if (fpu_owner && ia64_psr(ia64_task_regs(fpu_owner))->mfh) { + fpu_owner->thread.flags |= IA64_THREAD_FPH_VALID; + __ia64_save_fpu(fpu_owner->thread.fph); + } + if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) { + __ia64_load_fpu(current->thread.fph); + } else { + __ia64_init_fpu(); + } + } +} + +static inline int +fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs, + struct pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; +#ifdef FPSWA_BUG + struct ia64_fpreg f6_15[10]; +#endif + + if (!fpswa_interface) + return -1; + + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * compute fp_state. only FP registers f6 - f11 are used by the + * kernel, so set those bits in the mask and set the low volatile + * pointer to point to these registers. + */ + fp_state.bitmask_low64 = 0xffc0; /* bit6..bit15 */ +#ifndef FPSWA_BUG + fp_state.fp_state_low_volatile = ®s->f6; +#else + f6_15[0] = regs->f6; + f6_15[1] = regs->f7; + f6_15[2] = regs->f8; + f6_15[3] = regs->f9; + __asm__ ("stf.spill %0=f10" : "=m"(f6_15[4])); + __asm__ ("stf.spill %0=f11" : "=m"(f6_15[5])); + __asm__ ("stf.spill %0=f12" : "=m"(f6_15[6])); + __asm__ ("stf.spill %0=f13" : "=m"(f6_15[7])); + __asm__ ("stf.spill %0=f14" : "=m"(f6_15[8])); + __asm__ ("stf.spill %0=f15" : "=m"(f6_15[9])); + fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) f6_15; +#endif + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle, + (unsigned long *) ipsr, (unsigned long *) fpsr, + (unsigned long *) isr, (unsigned long *) pr, + (unsigned long *) ifs, &fp_state); +#ifdef FPSWA_BUG + __asm__ ("ldf.fill f10=%0" :: "m"(f6_15[4])); + __asm__ ("ldf.fill f11=%0" :: "m"(f6_15[5])); + __asm__ ("ldf.fill f12=%0" :: "m"(f6_15[6])); + __asm__ ("ldf.fill f13=%0" :: "m"(f6_15[7])); + __asm__ ("ldf.fill f14=%0" :: "m"(f6_15[8])); + __asm__ ("ldf.fill f15=%0" :: "m"(f6_15[9])); + regs->f6 = f6_15[0]; + regs->f7 = f6_15[1]; + regs->f8 = f6_15[2]; + regs->f9 = f6_15[3]; +#endif + return ret.status; +} + +/* + * Handle floating-point assist faults and traps. + */ +static int +handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) +{ + long exception, bundle[2]; + unsigned long fault_ip; + static int fpu_swa_count = 0; + static unsigned long last_time; + + fault_ip = regs->cr_iip; + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + if (copy_from_user(bundle, (void *) fault_ip, sizeof(bundle))) + return -1; + + if (fpu_swa_count > 5 && jiffies - last_time > 5*HZ) + fpu_swa_count = 0; + if (++fpu_swa_count < 5) { + last_time = jiffies; + printk("%s(%d): floating-point assist fault at ip %016lx\n", + current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri); + } + + exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, + ®s->cr_ifs, regs); + if (fp_fault) { + if (exception == 0) { + /* emulation was successful */ + ia64_increment_ip(regs); + } else if (exception == -1) { + printk("handle_fpu_swa: fp_emulate() returned -1\n"); + return -2; + } else { + /* is next instruction a trap? */ + if (exception & 2) { + ia64_increment_ip(regs); + } + return -1; + } + } else { + if (exception == -1) { + printk("handle_fpu_swa: fp_emulate() returned -1\n"); + return -2; + } else if (exception != 0) { + /* raise exception */ + return -1; + } + } + return 0; +} + +void +ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, + unsigned long iim, unsigned long itir, unsigned long arg5, + unsigned long arg6, unsigned long arg7, unsigned long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + unsigned long code, error = isr; + struct siginfo siginfo; + char buf[128]; + int result; + static const char *reason[] = { + "IA-64 Illegal Operation fault", + "IA-64 Privileged Operation fault", + "IA-64 Privileged Register fault", + "IA-64 Reserved Register/Field fault", + "Disabled Instruction Set Transition fault", + "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", + "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", + "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" + }; + +#if 0 + /* this is for minimal trust debugging; yeah this kind of stuff is useful at times... */ + + if (vector != 25) { + static unsigned long last_time; + static char count; + unsigned long n = vector; + char buf[32], *cp; + + if (count > 5 && jiffies - last_time > 5*HZ) + count = 0; + + if (count++ < 5) { + last_time = jiffies; + cp = buf + sizeof(buf); + *--cp = '\0'; + while (n) { + *--cp = "0123456789abcdef"[n & 0xf]; + n >>= 4; + } + printk("<0x%s>", cp); + } + } +#endif + + switch (vector) { + case 24: /* General Exception */ + code = (isr >> 4) & 0xf; + sprintf(buf, "General Exception: %s%s", reason[code], + (code == 3) ? ((isr & (1UL << 37)) + ? " (RSE access)" : " (data access)") : ""); +#ifndef CONFIG_ITANIUM_ASTEP_SPECIFIC + if (code == 8) { +# ifdef CONFIG_IA64_PRINT_HAZARDS + printk("%016lx:possible hazard, pr = %016lx\n", regs->cr_iip, regs->pr); +# endif + return; + } +#endif + break; + + case 25: /* Disabled FP-Register */ + if (isr & 2) { + disabled_fph_fault(regs); + return; + } + sprintf(buf, "Disabled FPL fault---not supposed to happen!"); + break; + + case 29: /* Debug */ + case 35: /* Taken Branch Trap */ + case 36: /* Single Step Trap */ + switch (vector) { + case 29: siginfo.si_code = TRAP_BRKPT; break; + case 35: siginfo.si_code = TRAP_BRANCH; break; + case 36: siginfo.si_code = TRAP_TRACE; break; + } + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = 0; + force_sig_info(SIGTRAP, &siginfo, current); + return; + + case 30: /* Unaligned fault */ + sprintf(buf, "Unaligned access in kernel mode---don't do this!"); + break; + + case 32: /* fp fault */ + case 33: /* fp trap */ + result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr); + if (result < 0) { + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = 0; /* XXX fix me */ + siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + send_sig_info(SIGFPE, &siginfo, current); + if (result == -1) + send_sig_info(SIGFPE, &siginfo, current); + else + force_sig(SIGFPE, current); + } + return; + + case 34: /* Unimplemented Instruction Address Trap */ + if (user_mode(regs)) { + printk("Woah! Unimplemented Instruction Address Trap!\n"); + siginfo.si_code = ILL_BADIADDR; + siginfo.si_signo = SIGILL; + siginfo.si_errno = 0; + force_sig_info(SIGILL, &siginfo, current); + return; + } + sprintf(buf, "Unimplemented Instruction Address fault"); + break; + + case 45: + printk("Unexpected IA-32 exception\n"); + force_sig(SIGSEGV, current); + return; + + case 46: + printk("Unexpected IA-32 intercept trap\n"); + force_sig(SIGSEGV, current); + return; + + case 47: + sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); + break; + + default: + sprintf(buf, "Fault %lu", vector); + break; + } + die_if_kernel(buf, regs, error); + force_sig(SIGILL, current); +} diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c new file mode 100644 index 000000000..0bd213f6b --- /dev/null +++ b/arch/ia64/kernel/unaligned.c @@ -0,0 +1,1554 @@ +/* + * Architecture-specific unaligned trap handling. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <asm/uaccess.h> +#include <asm/rse.h> +#include <asm/processor.h> +#include <asm/unaligned.h> + +extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn)); + +#undef DEBUG_UNALIGNED_TRAP + +#ifdef DEBUG_UNALIGNED_TRAP +#define DPRINT(a) { printk("%s, line %d: ", __FUNCTION__, __LINE__); printk a;} +#else +#define DPRINT(a) +#endif + +#define IA64_FIRST_STACKED_GR 32 +#define IA64_FIRST_ROTATING_FR 32 +#define SIGN_EXT9 __IA64_UL(0xffffffffffffff00) + +/* + * For M-unit: + * + * opcode | m | x6 | + * --------|------|---------| + * [40-37] | [36] | [35:30] | + * --------|------|---------| + * 4 | 1 | 6 | = 11 bits + * -------------------------- + * However bits [31:30] are not directly useful to distinguish between + * load/store so we can use [35:32] instead, which gives the following + * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer + * checking the m-bit until later in the load/store emulation. + */ +#define IA64_OPCODE_MASK 0x1ef00000000 + +/* + * Table C-28 Integer Load/Store + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill MUST be aligned because the RNATs are based on + * the address (bits [8:3]), so we must failed. + */ +#define LD_OP 0x08000000000 +#define LDS_OP 0x08100000000 +#define LDA_OP 0x08200000000 +#define LDSA_OP 0x08300000000 +#define LDBIAS_OP 0x08400000000 +#define LDACQ_OP 0x08500000000 +/* 0x086, 0x087 are not relevant */ +#define LDCCLR_OP 0x08800000000 +#define LDCNC_OP 0x08900000000 +#define LDCCLRACQ_OP 0x08a00000000 +#define ST_OP 0x08c00000000 +#define STREL_OP 0x08d00000000 +/* 0x08e,0x8f are not relevant */ + +/* + * Table C-29 Integer Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-30 Integer Load/Store +Imm + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill must be aligned because the Nat register are based on + * the address, so we must fail and the program must be fixed. + */ +#define LD_IMM_OP 0x0a000000000 +#define LDS_IMM_OP 0x0a100000000 +#define LDA_IMM_OP 0x0a200000000 +#define LDSA_IMM_OP 0x0a300000000 +#define LDBIAS_IMM_OP 0x0a400000000 +#define LDACQ_IMM_OP 0x0a500000000 +/* 0x0a6, 0xa7 are not relevant */ +#define LDCCLR_IMM_OP 0x0a800000000 +#define LDCNC_IMM_OP 0x0a900000000 +#define LDCCLRACQ_IMM_OP 0x0aa00000000 +#define ST_IMM_OP 0x0ac00000000 +#define STREL_IMM_OP 0x0ad00000000 +/* 0x0ae,0xaf are not relevant */ + +/* + * Table C-32 Floating-point Load/Store + */ +#define LDF_OP 0x0c000000000 +#define LDFS_OP 0x0c100000000 +#define LDFA_OP 0x0c200000000 +#define LDFSA_OP 0x0c300000000 +/* 0x0c6 is irrelevant */ +#define LDFCCLR_OP 0x0c800000000 +#define LDFCNC_OP 0x0c900000000 +/* 0x0cb is irrelevant */ +#define STF_OP 0x0cc00000000 + +/* + * Table C-33 Floating-point Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-34 Floating-point Load/Store +Imm + */ +#define LDF_IMM_OP 0x0e000000000 +#define LDFS_IMM_OP 0x0e100000000 +#define LDFA_IMM_OP 0x0e200000000 +#define LDFSA_IMM_OP 0x0e300000000 +/* 0x0e6 is irrelevant */ +#define LDFCCLR_IMM_OP 0x0e800000000 +#define LDFCNC_IMM_OP 0x0e900000000 +#define STF_IMM_OP 0x0ec00000000 + +typedef struct { + unsigned long qp:6; /* [0:5] */ + unsigned long r1:7; /* [6:12] */ + unsigned long imm:7; /* [13:19] */ + unsigned long r3:7; /* [20:26] */ + unsigned long x:1; /* [27:27] */ + unsigned long hint:2; /* [28:29] */ + unsigned long x6_sz:2; /* [30:31] */ + unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */ + unsigned long m:1; /* [36:36] */ + unsigned long op:4; /* [37:40] */ + unsigned long pad:23; /* [41:63] */ +} load_store_t; + + +typedef enum { + UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */ + UPD_REG /* ldXZ r1=[r3],r2 */ +} update_t; + +/* + * We use tables to keep track of the offsets of registers in the saved state. + * This way we save having big switch/case statements. + * + * We use bit 0 to indicate switch_stack or pt_regs. + * The offset is simply shifted by 1 bit. + * A 2-byte value should be enough to hold any kind of offset + * + * In case the calling convention changes (and thus pt_regs/switch_stack) + * simply use RSW instead of RPT or vice-versa. + */ + +#define RPO(x) ((size_t) &((struct pt_regs *)0)->x) +#define RSO(x) ((size_t) &((struct switch_stack *)0)->x) + +#define RPT(x) (RPO(x) << 1) +#define RSW(x) (1| RSO(x)<<1) + +#define GR_OFFS(x) (gr_info[x]>>1) +#define GR_IN_SW(x) (gr_info[x] & 0x1) + +#define FR_OFFS(x) (fr_info[x]>>1) +#define FR_IN_SW(x) (fr_info[x] & 0x1) + +static u16 gr_info[32]={ + 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ + + RPT(r1), RPT(r2), RPT(r3), + + RSW(r4), RSW(r5), RSW(r6), RSW(r7), + + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), + + RPT(r16), RPT(r17), RPT(r18), RPT(r19), + RPT(r20), RPT(r21), RPT(r22), RPT(r23), + RPT(r24), RPT(r25), RPT(r26), RPT(r27), + RPT(r28), RPT(r29), RPT(r30), RPT(r31) +}; + +static u16 fr_info[32]={ + 0, /* constant : WE SHOULD NEVER GET THIS */ + 0, /* constant : WE SHOULD NEVER GET THIS */ + + RSW(f2), RSW(f3), RSW(f4), RSW(f5), + + RPT(f6), RPT(f7), RPT(f8), RPT(f9), + + RSW(f10), RSW(f11), RSW(f12), RSW(f13), RSW(f14), + RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19), + RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24), + RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29), + RSW(f30), RSW(f31) +}; + +/* Invalidate ALAT entry for integer register REGNO. */ +static void +invala_gr (int regno) +{ +# define F(reg) case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +/* Invalidate ALAT entry for floating-point register REGNO. */ +static void +invala_fr (int regno) +{ +# define F(reg) case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +static void +set_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) +{ + struct switch_stack *sw = (struct switch_stack *)regs - 1; + unsigned long *kbs = ((unsigned long *)current) + IA64_RBS_OFFSET/8; + unsigned long on_kbs; + unsigned long *bsp, *bspstore, *addr, *ubs_end, *slot; + unsigned long rnats; + long nlocals; + + /* + * cr_ifs=[rv:ifm], ifm=[....:sof(6)] + * nlocal=number of locals (in+loc) register of the faulting function + */ + nlocals = (regs->cr_ifs) & 0x7f; + + DPRINT(("sw.bsptore=%lx pt.bspstore=%lx\n", sw->ar_bspstore, regs->ar_bspstore)); + DPRINT(("cr.ifs=%lx sof=%ld sol=%ld\n", + regs->cr_ifs, regs->cr_ifs &0x7f, (regs->cr_ifs>>7)&0x7f)); + + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore); + bspstore = (unsigned long *)regs->ar_bspstore; + + DPRINT(("rse_slot_num=0x%lx\n",ia64_rse_slot_num((unsigned long *)sw->ar_bspstore))); + DPRINT(("kbs=%p nlocals=%ld\n", kbs, nlocals)); + DPRINT(("bspstore next rnat slot %p\n", + ia64_rse_rnat_addr((unsigned long *)sw->ar_bspstore))); + DPRINT(("on_kbs=%ld rnats=%ld\n", + on_kbs, ((sw->ar_bspstore-(unsigned long)kbs)>>3) - on_kbs)); + + /* + * See get_rse_reg() for an explanation on the following instructions + */ + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -nlocals); + addr = slot = ia64_rse_skip_regs(bsp, r1 - 32); + + DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n", + ubs_end, bsp, addr, ia64_rse_slot_num(addr))); + + ia64_poke(regs, current, (unsigned long)addr, val); + + /* + * addr will now contain the address of the RNAT for the register + */ + addr = ia64_rse_rnat_addr(addr); + + ia64_peek(regs, current, (unsigned long)addr, &rnats); + DPRINT(("rnat @%p = 0x%lx nat=%d rnatval=%lx\n", + addr, rnats, nat, rnats &ia64_rse_slot_num(slot))); + + if ( nat ) { + rnats |= __IA64_UL(1) << ia64_rse_slot_num(slot); + } else { + rnats &= ~(__IA64_UL(1) << ia64_rse_slot_num(slot)); + } + ia64_poke(regs, current, (unsigned long)addr, rnats); + + DPRINT(("rnat changed to @%p = 0x%lx\n", addr, rnats)); +} + + +static void +get_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat) +{ + struct switch_stack *sw = (struct switch_stack *)regs - 1; + unsigned long *kbs = (unsigned long *)current + IA64_RBS_OFFSET/8; + unsigned long on_kbs; + long nlocals; + unsigned long *bsp, *addr, *ubs_end, *slot, *bspstore; + unsigned long rnats; + + /* + * cr_ifs=[rv:ifm], ifm=[....:sof(6)] + * nlocals=number of local registers in the faulting function + */ + nlocals = (regs->cr_ifs) & 0x7f; + + /* + * save_switch_stack does a flushrs and saves bspstore. + * on_kbs = actual number of registers saved on kernel backing store + * (taking into accound potential RNATs) + * + * Note that this number can be greater than nlocals if the dirty + * parititions included more than one stack frame at the time we + * switched to KBS + */ + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore); + bspstore = (unsigned long *)regs->ar_bspstore; + + /* + * To simplify the logic, we calculate everything as if there was only + * one backing store i.e., the user one (UBS). We let it to peek/poke + * to figure out whether the register we're looking for really is + * on the UBS or on KBS. + * + * regs->ar_bsptore = address of last register saved on UBS (before switch) + * + * ubs_end = virtual end of the UBS (if everything had been spilled there) + * + * We know that ubs_end is the point where the last register on the + * stack frame we're interested in as been saved. So we need to walk + * our way backward to figure out what the BSP "was" for that frame, + * this will give us the location of r32. + * + * bsp = "virtual UBS" address of r32 for our frame + * + * Finally, get compute the address of the register we're looking for + * using bsp as our base (move up again). + * + * Please note that in our case, we know that the register is necessarily + * on the KBS because we are only interested in the current frame at the moment + * we got the exception i.e., bsp is not changed until we switch to KBS. + */ + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -nlocals); + addr = slot = ia64_rse_skip_regs(bsp, r1 - 32); + + DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n", + ubs_end, bsp, addr, ia64_rse_slot_num(addr))); + + ia64_peek(regs, current, (unsigned long)addr, val); + + /* + * addr will now contain the address of the RNAT for the register + */ + addr = ia64_rse_rnat_addr(addr); + + ia64_peek(regs, current, (unsigned long)addr, &rnats); + DPRINT(("rnat @%p = 0x%lx\n", addr, rnats)); + + if ( nat ) *nat = rnats >> ia64_rse_slot_num(slot) & 0x1; +} + + +static void +setreg(unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs -1; + unsigned long addr; + unsigned long bitmask; + unsigned long *unat; + + + /* + * First takes care of stacked registers + */ + if ( regnum >= IA64_FIRST_STACKED_GR ) { + set_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Using r0 as a target raises a General Exception fault which has + * higher priority than the Unaligned Reference fault. + */ + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if ( GR_IN_SW(regnum) ) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + DPRINT(("tmp_base=%lx switch_stack=%s offset=%d\n", + addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum))); + /* + * add offset from base of struct + * and do it ! + */ + addr += GR_OFFS(regnum); + + *(unsigned long *)addr = val; + + /* + * We need to clear the corresponding UNAT bit to fully emulate the load + * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 + */ + bitmask = __IA64_UL(1) << (addr >> 3 & 0x3f); + DPRINT(("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, unat, *unat)); + if ( nat ) { + *unat |= bitmask; + } else { + *unat &= ~bitmask; + } + DPRINT(("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, unat,*unat)); +} + +#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR) + +static void +setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs - 1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than + * Unaligned Fault. Thus, when we get here, we know the partition is + * enabled. + * + * The registers [32-127] are ususally saved in the tss. When get here, + * they are NECESSARY live because they are only saved explicitely. + * We have 3 ways of updating the values: force a save of the range + * in tss, use a gigantic switch/case statement or generate code on the + * fly to store to the right register. + * For now, we are using the (slow) save/restore way. + */ + if ( regnum >= IA64_FIRST_ROTATING_FR ) { + /* + * force a save of [32-127] to tss + * we use the __() form to avoid fiddling with the dfh bit + */ + __ia64_save_fpu(¤t->thread.fph[0]); + + current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval; + + __ia64_load_fpu(¤t->thread.fph[0]); + + /* + * mark the high partition as being used now + * + * This is REQUIRED because the disabled_fph_fault() does + * not set it, it's relying on the faulting instruction to + * do it. In our case the faulty instruction never gets executed + * completely, so we need to toggle the bit. + */ + regs->cr_ipsr |= IA64_PSR_MFH; + } else { + /* + * pt_regs or switch_stack ? + */ + if ( FR_IN_SW(regnum) ) { + addr = (unsigned long)sw; + } else { + addr = (unsigned long)regs; + } + + DPRINT(("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum))); + + addr += FR_OFFS(regnum); + *(struct ia64_fpreg *)addr = *fpval; + + /* + * mark the low partition as being used now + * + * It is highly unlikely that this bit is not already set, but + * let's do it for safety. + */ + regs->cr_ipsr |= IA64_PSR_MFL; + + } +} + +/* + * Those 2 inline functions generate the spilled versions of the constant floating point + * registers which can be used with stfX + */ +static inline void +float_spill_f0(struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory"); +} + +static inline void +float_spill_f1(struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory"); +} + +static void +getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs -1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than + * Unaligned Fault. Thus, when we get here, we know the partition is + * enabled. + * + * When regnum > 31, the register is still live and + * we need to force a save to the tss to get access to it. + * See discussion in setfpreg() for reasons and other ways of doing this. + */ + if ( regnum >= IA64_FIRST_ROTATING_FR ) { + + /* + * force a save of [32-127] to tss + * we use the__ia64_save_fpu() form to avoid fiddling with + * the dfh bit. + */ + __ia64_save_fpu(¤t->thread.fph[0]); + + *fpval = current->thread.fph[IA64_FPH_OFFS(regnum)]; + } else { + /* + * f0 = 0.0, f1= 1.0. Those registers are constant and are thus + * not saved, we must generate their spilled form on the fly + */ + switch(regnum) { + case 0: + float_spill_f0(fpval); + break; + case 1: + float_spill_f1(fpval); + break; + default: + /* + * pt_regs or switch_stack ? + */ + addr = FR_IN_SW(regnum) ? (unsigned long)sw + : (unsigned long)regs; + + DPRINT(("is_sw=%d tmp_base=%lx offset=0x%x\n", + FR_IN_SW(regnum), addr, FR_OFFS(regnum))); + + addr += FR_OFFS(regnum); + *fpval = *(struct ia64_fpreg *)addr; + } + } +} + + +static void +getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs -1; + unsigned long addr, *unat; + + if ( regnum >= IA64_FIRST_STACKED_GR ) { + get_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * take care of r0 (read-only always evaluate to 0) + */ + if ( regnum == 0 ) { + *val = 0; + *nat = 0; + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if ( GR_IN_SW(regnum) ) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + + DPRINT(("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum))); + + addr += GR_OFFS(regnum); + + *val = *(unsigned long *)addr; + + /* + * do it only when requested + */ + if ( nat ) *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL; +} + +static void +emulate_load_updates(update_t type, load_store_t *ld, struct pt_regs *regs, unsigned long ifa) +{ + /* + * IMPORTANT: + * Given the way we handle unaligned speculative loads, we should + * not get to this point in the code but we keep this sanity check, + * just in case. + */ + if ( ld->x6_op == 1 || ld->x6_op == 3 ) { + printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n"); + die_if_kernel("unaligned reference on specualtive load with register update\n", + regs, 30); + } + + + /* + * at this point, we know that the base register to update is valid i.e., + * it's not r0 + */ + if ( type == UPD_IMMEDIATE ) { + unsigned long imm; + + /* + * Load +Imm: ldXZ r1=[r3],imm(9) + * + * + * form imm9: [13:19] contain the first 7 bits + */ + imm = ld->x << 7 | ld->imm; + + /* + * sign extend (1+8bits) if m set + */ + if (ld->m) imm |= SIGN_EXT9; + + /* + * ifa == r3 and we know that the NaT bit on r3 was clear so + * we can directly use ifa. + */ + ifa += imm; + + setreg(ld->r3, ifa, 0, regs); + + DPRINT(("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld->x, ld->m, imm, ifa)); + + } else if ( ld->m ) { + unsigned long r2; + int nat_r2; + + /* + * Load +Reg Opcode: ldXZ r1=[r3],r2 + * + * Note: that we update r3 even in the case of ldfX.a + * (where the load does not happen) + * + * The way the load algorithm works, we know that r3 does not + * have its NaT bit set (would have gotten NaT consumption + * before getting the unaligned fault). So we can use ifa + * which equals r3 at this point. + * + * IMPORTANT: + * The above statement holds ONLY because we know that we + * never reach this code when trying to do a ldX.s. + * If we ever make it to here on an ldfX.s then + */ + getreg(ld->imm, &r2, &nat_r2, regs); + + ifa += r2; + + /* + * propagate Nat r2 -> r3 + */ + setreg(ld->r3, ifa, nat_r2, regs); + + DPRINT(("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld->imm, r2, ifa, nat_r2)); + } +} + + +static int +emulate_load_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + unsigned long val; + unsigned int len = 1<< ld->x6_sz; + + /* + * the macro supposes sequential access (which is the case) + * if the first byte is an invalid address we return here. Otherwise + * there is a guard page at the top of the user's address page and + * the first access would generate a NaT consumption fault and return + * with a SIGSEGV, which is what we want. + * + * Note: the first argument is ignored + */ + if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) { + DPRINT(("verify area failed on %lx\n", ifa)); + return -1; + } + + /* + * r0, as target, doesn't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * ldX.a we don't try to emulate anything but we must + * invalidate the ALAT entry. + * See comment below for explanation on how we handle ldX.a + */ + if ( ld->x6_op != 0x2 ) { + /* + * we rely on the macros in unaligned.h for now i.e., + * we let the compiler figure out how to read memory gracefully. + * + * We need this switch/case because the way the inline function + * works. The code is optimized by the compiler and looks like + * a single switch/case. + */ + switch(len) { + case 2: + val = ia64_get_unaligned((void *)ifa, 2); + break; + case 4: + val = ia64_get_unaligned((void *)ifa, 4); + break; + case 8: + val = ia64_get_unaligned((void *)ifa, 8); + break; + default: + DPRINT(("unknown size: x6=%d\n", ld->x6_sz)); + return -1; + } + + setreg(ld->r1, val, 0, regs); + } + + /* + * check for updates on any kind of loads + */ + if ( ld->op == 0x5 || ld->m ) + emulate_load_updates(ld->op == 0x5 ? UPD_IMMEDIATE: UPD_REG, + ld, regs, ifa); + + /* + * handling of various loads (based on EAS2.4): + * + * ldX.acq (ordered load): + * - acquire semantics would have been used, so force fence instead. + * + * + * ldX.c.clr (check load and clear): + * - if we get to this handler, it's because the entry was not in the ALAT. + * Therefore the operation reverts to a normal load + * + * ldX.c.nc (check load no clear): + * - same as previous one + * + * ldX.c.clr.acq (ordered check load and clear): + * - same as above for c.clr part. The load needs to have acquire semantics. So + * we use the fence semantics which is stronger and thus ensures correctness. + * + * ldX.a (advanced load): + * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the + * address doesn't match requested size alignement. This means that we would + * possibly need more than one load to get the result. + * + * The load part can be handled just like a normal load, however the difficult + * part is to get the right thing into the ALAT. The critical piece of information + * in the base address of the load & size. To do that, a ld.a must be executed, + * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now + * if we use the same target register, we will be okay for the check.a instruction. + * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry + * which would overlap within [r3,r3+X] (the size of the load was store in the + * ALAT). If such an entry is found the entry is invalidated. But this is not good + * enough, take the following example: + * r3=3 + * ld4.a r1=[r3] + * + * Could be emulated by doing: + * ld1.a r1=[r3],1 + * store to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3] + * store & shift to temporary; + * r1=temporary + * + * So int this case, you would get the right value is r1 but the wrong info in + * the ALAT. Notice that you could do it in reverse to finish with address 3 + * but you would still get the size wrong. To get the size right, one needs to + * execute exactly the same kind of load. You could do it from a aligned + * temporary location, but you would get the address wrong. + * + * So no matter what, it is not possible to emulate an advanced load + * correctly. But is that really critical ? + * + * + * Now one has to look at how ld.a is used, one must either do a ld.c.* or + * chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no + * entry found in ALAT), and that's perfectly ok because: + * + * - ld.c.*, if the entry is not present a normal load is executed + * - chk.a.*, if the entry is not present, execution jumps to recovery code + * + * In either case, the load can be potentially retried in another form. + * + * So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT + * must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up + * a stale entry later) The register base update MUST also be performed. + * + * Now what is the content of the register and its NaT bit in the case we don't + * do the load ? EAS2.4, says (in case an actual load is needed) + * + * - r1 = [r3], Nat = 0 if succeeds + * - r1 = 0 Nat = 0 if trying to access non-speculative memory + * + * For us, there is nothing to do, because both ld.c.* and chk.a.* are going to + * retry and thus eventually reload the register thereby changing Nat and + * register content. + */ + + /* + * when the load has the .acq completer then + * use ordering fence. + */ + if (ld->x6_op == 0x5 || ld->x6_op == 0xa) + mb(); + + /* + * invalidate ALAT entry in case of advanced load + */ + if (ld->x6_op == 0x2) + invala_gr(ld->r1); + + return 0; +} + +static int +emulate_store_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + unsigned long r2; + unsigned int len = 1<< ld->x6_sz; + + /* + * the macro supposes sequential access (which is the case) + * if the first byte is an invalid address we return here. Otherwise + * there is a guard page at the top of the user's address page and + * the first access would generate a NaT consumption fault and return + * with a SIGSEGV, which is what we want. + * + * Note: the first argument is ignored + */ + if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) { + DPRINT(("verify area failed on %lx\n",ifa)); + return -1; + } + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getreg(ld->imm, &r2, 0, regs); + + /* + * we rely on the macros in unaligned.h for now i.e., + * we let the compiler figure out how to read memory gracefully. + * + * We need this switch/case because the way the inline function + * works. The code is optimized by the compiler and looks like + * a single switch/case. + */ + DPRINT(("st%d [%lx]=%lx\n", len, ifa, r2)); + + switch(len) { + case 2: + ia64_put_unaligned(r2, (void *)ifa, 2); + break; + case 4: + ia64_put_unaligned(r2, (void *)ifa, 4); + break; + case 8: + ia64_put_unaligned(r2, (void *)ifa, 8); + break; + default: + DPRINT(("unknown size: x6=%d\n", ld->x6_sz)); + return -1; + } + /* + * stX [r3]=r2,imm(9) + * + * NOTE: + * ld->r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if ( ld->op == 0x5 ) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld->x << 7 | ld->r1; + /* + * sign extend (8bits) if m set + */ + if ( ld->m ) imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT(("imm=%lx r3=%lx\n", imm, ifa)); + + setreg(ld->r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + /* + * stX.rel: use fence instead of release + */ + if ( ld->x6_op == 0xd ) mb(); + + return 0; +} + +/* + * floating point operations sizes in bytes + */ +static const unsigned short float_fsz[4]={ + 16, /* extended precision (e) */ + 8, /* integer (8) */ + 4, /* single precision (s) */ + 8 /* double precision (d) */ +}; + +static inline void +mem2float_extended(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +mem2float_integer(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +mem2float_single(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +mem2float_double(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +float2mem_extended(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +float2mem_integer(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +float2mem_single(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +float2mem_double(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static int +emulate_load_floatpair(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init[2]; + struct ia64_fpreg fpr_final[2]; + unsigned long len = float_fsz[ld->x6_sz]; + + if ( access_ok(VERIFY_READ, (void *)ifa, len<<1) < 0 ) { + DPRINT(("verify area failed on %lx\n", ifa)); + return -1; + } + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * ldfpX.a: we don't try to emulate anything but we must + * invalidate the ALAT entry and execute updates, if any. + */ + if ( ld->x6_op != 0x2 ) { + /* + * does the unaligned access + */ + memcpy(&fpr_init[0], (void *)ifa, len); + memcpy(&fpr_init[1], (void *)(ifa+len), len); + + DPRINT(("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld->r1, ld->imm, ld->x6_sz)); +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_init; + printk("fpr_init= "); + for(i=0; i < len<<1; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + /* + * XXX fixme + * Could optimize inlines by using ldfpX & 2 spills + */ + switch( ld->x6_sz ) { + case 0: + mem2float_extended(&fpr_init[0], &fpr_final[0]); + mem2float_extended(&fpr_init[1], &fpr_final[1]); + break; + case 1: + mem2float_integer(&fpr_init[0], &fpr_final[0]); + mem2float_integer(&fpr_init[1], &fpr_final[1]); + break; + case 2: + mem2float_single(&fpr_init[0], &fpr_final[0]); + mem2float_single(&fpr_init[1], &fpr_final[1]); + break; + case 3: + mem2float_double(&fpr_init[0], &fpr_final[0]); + mem2float_double(&fpr_init[1], &fpr_final[1]); + break; + } +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_final; + printk("fpr_final= "); + for(i=0; i < len<<1; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final + * and directly use the storage from the saved context i.e., + * the actual final destination (pt_regs, switch_stack or tss). + */ + setfpreg(ld->r1, &fpr_final[0], regs); + setfpreg(ld->imm, &fpr_final[1], regs); + } + + /* + * Check for updates: only immediate updates are available for this + * instruction. + */ + if ( ld->m ) { + + /* + * the immediate is implicit given the ldsz of the operation: + * single: 8 (2x4) and for all others it's 16 (2x8) + */ + ifa += len<<1; + + /* + * IMPORTANT: + * the fact that we force the NaT of r3 to zero is ONLY valid + * as long as we don't come here with a ldfpX.s. + * For this reason we keep this sanity check + */ + if ( ld->x6_op == 1 || ld->x6_op == 3 ) { + printk(KERN_ERR "%s: register update on speculative load pair, error\n", __FUNCTION__); + } + + + setreg(ld->r3, ifa, 0, regs); + } + + /* + * Invalidate ALAT entries, if any, for both registers. + */ + if ( ld->x6_op == 0x2 ) { + invala_fr(ld->r1); + invala_fr(ld->imm); + } + return 0; +} + + +static int +emulate_load_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld->x6_sz]; + + /* + * check for load pair because our masking scheme is not fine grain enough + if ( ld->x == 1 ) return emulate_load_floatpair(ifa,ld,regs); + */ + + if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) { + DPRINT(("verify area failed on %lx\n", ifa)); + return -1; + } + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * ldfX.a we don't try to emulate anything but we must + * invalidate the ALAT entry. + * See comments in ldX for descriptions on how the various loads are handled. + */ + if ( ld->x6_op != 0x2 ) { + + /* + * does the unaligned access + */ + memcpy(&fpr_init, (void *)ifa, len); + + DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz)); +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_init; + printk("fpr_init= "); + for(i=0; i < len; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + /* + * we only do something for x6_op={0,8,9} + */ + switch( ld->x6_sz ) { + case 0: + mem2float_extended(&fpr_init, &fpr_final); + break; + case 1: + mem2float_integer(&fpr_init, &fpr_final); + break; + case 2: + mem2float_single(&fpr_init, &fpr_final); + break; + case 3: + mem2float_double(&fpr_init, &fpr_final); + break; + } +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_final; + printk("fpr_final= "); + for(i=0; i < len; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final + * and directly use the storage from the saved context i.e., + * the actual final destination (pt_regs, switch_stack or tss). + */ + setfpreg(ld->r1, &fpr_final, regs); + } + + /* + * check for updates on any loads + */ + if ( ld->op == 0x7 || ld->m ) + emulate_load_updates(ld->op == 0x7 ? UPD_IMMEDIATE: UPD_REG, + ld, regs, ifa); + + + /* + * invalidate ALAT entry in case of advanced floating point loads + */ + if (ld->x6_op == 0x2) + invala_fr(ld->r1); + + return 0; +} + + +static int +emulate_store_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld->x6_sz]; + + /* + * the macro supposes sequential access (which is the case) + * if the first byte is an invalid address we return here. Otherwise + * there is a guard page at the top of the user's address page and + * the first access would generate a NaT consumption fault and return + * with a SIGSEGV, which is what we want. + * + * Note: the first argument is ignored + */ + if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) { + DPRINT(("verify area failed on %lx\n",ifa)); + return -1; + } + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getfpreg(ld->imm, &fpr_init, regs); + /* + * during this step, we extract the spilled registers from the saved + * context i.e., we refill. Then we store (no spill) to temporary + * aligned location + */ + switch( ld->x6_sz ) { + case 0: + float2mem_extended(&fpr_init, &fpr_final); + break; + case 1: + float2mem_integer(&fpr_init, &fpr_final); + break; + case 2: + float2mem_single(&fpr_init, &fpr_final); + break; + case 3: + float2mem_double(&fpr_init, &fpr_final); + break; + } + DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz)); +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_init; + printk("fpr_init= "); + for(i=0; i < len; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } + { int i; char *c = (char *)&fpr_final; + printk("fpr_final= "); + for(i=0; i < len; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + + /* + * does the unaligned store + */ + memcpy((void *)ifa, &fpr_final, len); + + /* + * stfX [r3]=r2,imm(9) + * + * NOTE: + * ld->r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if ( ld->op == 0x7 ) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld->x << 7 | ld->r1; + /* + * sign extend (8bits) if m set + */ + if ( ld->m ) imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT(("imm=%lx r3=%lx\n", imm, ifa)); + + setreg(ld->r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + return 0; +} + +void +ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs) +{ + static unsigned long unalign_count; + static long last_time; + + struct ia64_psr *ipsr = ia64_psr(regs); + unsigned long *bundle_addr; + unsigned long opcode; + unsigned long op; + load_store_t *insn; + int ret = -1; + + /* + * We flag unaligned references while in kernel as + * errors: the kernel must be fixed. The switch code + * is in ivt.S at entry 30. + * + * So here we keep a simple sanity check. + */ + if ( !user_mode(regs) ) { + die_if_kernel("Unaligned reference while in kernel\n", regs, 30); + /* NOT_REACHED */ + } + + /* + * Make sure we log the unaligned access, so that user/sysadmin can notice it + * and eventually fix the program. + * + * We don't want to do that for every access so we pace it with jiffies. + */ + if ( unalign_count > 5 && jiffies - last_time > 5*HZ ) unalign_count = 0; + if ( ++unalign_count < 5 ) { + last_time = jiffies; + printk("%s(%d): unaligned trap accessing %016lx (ip=%016lx)\n", + current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri); + + } + + DPRINT(("iip=%lx ifa=%lx isr=%lx\n", regs->cr_iip, ifa, regs->cr_ipsr)); + DPRINT(("ISR.ei=%d ISR.sp=%d\n", ipsr->ri, ipsr->it)); + + bundle_addr = (unsigned long *)(regs->cr_iip); + + /* + * extract the instruction from the bundle given the slot number + */ + switch ( ipsr->ri ) { + case 0: op = *bundle_addr >> 5; + break; + + case 1: op = *bundle_addr >> 46 | (*(bundle_addr+1) & 0x7fffff)<<18; + break; + + case 2: op = *(bundle_addr+1) >> 23; + break; + } + + insn = (load_store_t *)&op; + opcode = op & IA64_OPCODE_MASK; + + DPRINT(("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d " + "ld.x6=0x%x ld.m=%d ld.op=%d\n", + opcode, + insn->qp, + insn->r1, + insn->imm, + insn->r3, + insn->x, + insn->hint, + insn->x6_sz, + insn->m, + insn->op)); + + /* + * IMPORTANT: + * Notice that the swictch statement DOES not cover all possible instructions + * that DO generate unaligned references. This is made on purpose because for some + * instructions it DOES NOT make sense to try and emulate the access. Sometimes it + * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e., + * the program will get a signal and die: + * + * load/store: + * - ldX.spill + * - stX.spill + * Reason: RNATs are based on addresses + * + * synchronization: + * - cmpxchg + * - fetchadd + * - xchg + * Reason: ATOMIC operations cannot be emulated properly using multiple + * instructions. + * + * speculative loads: + * - ldX.sZ + * Reason: side effects, code must be ready to deal with failure so simpler + * to let the load fail. + * --------------------------------------------------------------------------------- + * XXX fixme + * + * I would like to get rid of this switch case and do something + * more elegant. + */ + switch(opcode) { + case LDS_OP: + case LDSA_OP: + case LDS_IMM_OP: + case LDSA_IMM_OP: + case LDFS_OP: + case LDFSA_OP: + case LDFS_IMM_OP: + /* + * The instruction will be retried with defered exceptions + * turned on, and we should get Nat bit installed + * + * IMPORTANT: + * When PSR_ED is set, the register & immediate update + * forms are actually executed even though the operation + * failed. So we don't need to take care of this. + */ + DPRINT(("forcing PSR_ED\n")); + regs->cr_ipsr |= IA64_PSR_ED; + return; + + case LD_OP: + case LDA_OP: + case LDBIAS_OP: + case LDACQ_OP: + case LDCCLR_OP: + case LDCNC_OP: + case LDCCLRACQ_OP: + case LD_IMM_OP: + case LDA_IMM_OP: + case LDBIAS_IMM_OP: + case LDACQ_IMM_OP: + case LDCCLR_IMM_OP: + case LDCNC_IMM_OP: + case LDCCLRACQ_IMM_OP: + ret = emulate_load_int(ifa, insn, regs); + break; + case ST_OP: + case STREL_OP: + case ST_IMM_OP: + case STREL_IMM_OP: + ret = emulate_store_int(ifa, insn, regs); + break; + case LDF_OP: + case LDFA_OP: + case LDFCCLR_OP: + case LDFCNC_OP: + case LDF_IMM_OP: + case LDFA_IMM_OP: + case LDFCCLR_IMM_OP: + case LDFCNC_IMM_OP: + ret = insn->x ? + emulate_load_floatpair(ifa, insn, regs): + emulate_load_float(ifa, insn, regs); + break; + case STF_OP: + case STF_IMM_OP: + ret = emulate_store_float(ifa, insn, regs); + } + + DPRINT(("ret=%d\n", ret)); + if ( ret ) { + lock_kernel(); + force_sig(SIGSEGV, current); + unlock_kernel(); + } else { + /* + * given today's architecture this case is not likely to happen + * because a memory access instruction (M) can never be in the + * last slot of a bundle. But let's keep it for now. + */ + if ( ipsr->ri == 2 ) regs->cr_iip += 16; + ipsr->ri = ++ipsr->ri & 3; + } + + DPRINT(("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip)); +} diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c new file mode 100644 index 000000000..c2b772e68 --- /dev/null +++ b/arch/ia64/kernel/unwind.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/kernel.h> +#include <linux/sched.h> + +#include <asm/unwind.h> + +void +ia64_unwind_init_from_blocked_task (struct ia64_frame_info *info, struct task_struct *t) +{ + struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16); + unsigned long sol, limit, top; + + memset(info, 0, sizeof(*info)); + + sol = (sw->ar_pfs >> 7) & 0x7f; /* size of locals */ + + limit = (unsigned long) t + IA64_RBS_OFFSET; + top = sw->ar_bspstore; + if (top - (unsigned long) t >= IA64_STK_OFFSET) + top = limit; + + info->regstk.limit = (unsigned long *) limit; + info->regstk.top = (unsigned long *) top; + info->bsp = ia64_rse_skip_regs(info->regstk.top, -sol); + info->top_rnat = sw->ar_rnat; + info->cfm = sw->ar_pfs; + info->ip = sw->b0; +} + +void +ia64_unwind_init_from_current (struct ia64_frame_info *info, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long sol, sof, *bsp, limit, top; + + limit = (unsigned long) current + IA64_RBS_OFFSET; + top = sw->ar_bspstore; + if (top - (unsigned long) current >= IA64_STK_OFFSET) + top = limit; + + memset(info, 0, sizeof(*info)); + + sol = (sw->ar_pfs >> 7) & 0x7f; /* size of frame */ + info->regstk.limit = (unsigned long *) limit; + info->regstk.top = (unsigned long *) top; + info->top_rnat = sw->ar_rnat; + + /* this gives us the bsp top level frame (kdb interrupt frame): */ + bsp = ia64_rse_skip_regs((unsigned long *) top, -sol); + + /* now skip past the interrupt frame: */ + sof = regs->cr_ifs & 0x7f; /* size of frame */ + info->cfm = regs->cr_ifs; + info->bsp = ia64_rse_skip_regs(bsp, -sof); + info->ip = regs->cr_iip; +} + +static unsigned long +read_reg (struct ia64_frame_info *info, int regnum, int *is_nat) +{ + unsigned long *addr, *rnat_addr, rnat; + + addr = ia64_rse_skip_regs(info->bsp, regnum); + if (addr < info->regstk.limit || addr >= info->regstk.top || ((long) addr & 0x7) != 0) { + *is_nat = 1; + return 0xdeadbeefdeadbeef; + } + rnat_addr = ia64_rse_rnat_addr(addr); + + if (rnat_addr >= info->regstk.top) + rnat = info->top_rnat; + else + rnat = *rnat_addr; + *is_nat = (rnat & (1UL << ia64_rse_slot_num(addr))) != 0; + return *addr; +} + +/* + * On entry, info->regstk.top should point to the register backing + * store for r32. + */ +int +ia64_unwind_to_previous_frame (struct ia64_frame_info *info) +{ + unsigned long sol, cfm = info->cfm; + int is_nat; + + sol = (cfm >> 7) & 0x7f; /* size of locals */ + + /* + * In general, we would have to make use of unwind info to + * unwind an IA-64 stack, but for now gcc uses a special + * convention that makes this possible without full-fledged + * unwindo info. Specifically, we expect "rp" in the second + * last, and "ar.pfs" in the last local register, so the + * number of locals in a frame must be at least two. If it's + * less than that, we reached the end of the C call stack. + */ + if (sol < 2) + return -1; + + info->ip = read_reg(info, sol - 2, &is_nat); + if (is_nat) + return -1; + + cfm = read_reg(info, sol - 1, &is_nat); + if (is_nat) + return -1; + + sol = (cfm >> 7) & 0x7f; + + info->cfm = cfm; + info->bsp = ia64_rse_skip_regs(info->bsp, -sol); + return 0; +} diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile new file mode 100644 index 000000000..8a9581747 --- /dev/null +++ b/arch/ia64/lib/Makefile @@ -0,0 +1,42 @@ +# +# Makefile for ia64-specific library routines.. +# + +.S.o: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $< -o $@ + +OBJS = __divdi3.o __divsi3.o __udivdi3.o __udivsi3.o \ + __moddi3.o __modsi3.o __umoddi3.o __umodsi3.o \ + checksum.o clear_page.o csum_partial_copy.o copy_page.o \ + copy_user.o clear_user.o memset.o strncpy_from_user.o \ + strlen.o strlen_user.o strnlen_user.o \ + flush.o do_csum.o + +lib.a: $(OBJS) + $(AR) rcs lib.a $(OBJS) + +__divdi3.o: idiv.S + $(CC) $(AFLAGS) -c -o $@ $< + +__divsi3.o: idiv.S + $(CC) $(AFLAGS) -c -DSINGLE -c -o $@ $< + +__udivdi3.o: idiv.S + $(CC) $(AFLAGS) -c -DUNSIGNED -c -o $@ $< + +__udivsi3.o: idiv.S + $(CC) $(AFLAGS) -c -DUNSIGNED -DSINGLE -c -o $@ $< + +__moddi3.o: idiv.S + $(CC) $(AFLAGS) -c -DMODULO -c -o $@ $< + +__modsi3.o: idiv.S + $(CC) $(AFLAGS) -c -DMODULO -DSINGLE -c -o $@ $< + +__umoddi3.o: idiv.S + $(CC) $(AFLAGS) -c -DMODULO -DUNSIGNED -c -o $@ $< + +__umodsi3.o: idiv.S + $(CC) $(AFLAGS) -c -DMODULO -DUNSIGNED -DSINGLE -c -o $@ $< + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c new file mode 100644 index 000000000..9c4a8af75 --- /dev/null +++ b/arch/ia64/lib/checksum.c @@ -0,0 +1,110 @@ +/* + * Network checksum routines + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + * + * Most of the code coming from arch/alpha/lib/checksum.c + * + * This file contains network checksum routines that are better done + * in an architecture-specific manner due to speed.. + */ + +#include <linux/string.h> + +#include <asm/byteorder.h> + +static inline unsigned short +from64to16(unsigned long x) +{ + /* add up 32-bit words for 33 bits */ + x = (x & 0xffffffff) + (x >> 32); + /* add up 16-bit and 17-bit words for 17+c bits */ + x = (x & 0xffff) + (x >> 16); + /* add up 16-bit and 2-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented. + */ +unsigned short int csum_tcpudp_magic(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + return ~from64to16(saddr + daddr + sum + + ((unsigned long) ntohs(len) << 16) + + ((unsigned long) proto << 8)); +} + +unsigned int csum_tcpudp_nofold(unsigned long saddr, + unsigned long daddr, + unsigned short len, + unsigned short proto, + unsigned int sum) +{ + unsigned long result; + + result = (saddr + daddr + sum + + ((unsigned long) ntohs(len) << 16) + + ((unsigned long) proto << 8)); + + /* Fold down to 32-bits so we don't loose in the typedef-less + network stack. */ + /* 64 to 33 */ + result = (result & 0xffffffff) + (result >> 32); + /* 33 to 32 */ + result = (result & 0xffffffff) + (result >> 32); + return result; +} + +extern unsigned long do_csum(const unsigned char *, unsigned int, unsigned int); +extern unsigned long do_csum_c(const unsigned char *, unsigned int, unsigned int); + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl) +{ + return ~do_csum(iph,ihl*4,0); +} + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) +{ + unsigned long result = do_csum(buff, len, 0); + + /* add in old sum, and carry.. */ + result += sum; + /* 32+c bits -> 32 bits */ + result = (result & 0xffffffff) + (result >> 32); + return result; +} + + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +unsigned short ip_compute_csum(unsigned char * buff, int len) +{ + return ~do_csum(buff,len, 0); +} diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S new file mode 100644 index 000000000..314311c5c --- /dev/null +++ b/arch/ia64/lib/clear_page.S @@ -0,0 +1,42 @@ +/* + * + * Optimized version of the standard clearpage() function + * + * Based on comments from ddd. Try not to overflow the write buffer. + * + * Inputs: + * in0: address of page + * + * Output: + * none + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <asm/page.h> + + .text + .psr abi64 + .psr lsb + .lsb + + .align 32 + .global clear_page + .proc clear_page +clear_page: + alloc r11=ar.pfs,1,0,0,0 + mov r16=ar.lc // slow + mov r17=PAGE_SIZE/32-1 // -1 = repeat/until + ;; + adds r18=16,in0 + mov ar.lc=r17 + ;; +1: stf.spill.nta [in0]=f0,32 + stf.spill.nta [r18]=f0,32 + br.cloop.dptk.few 1b + ;; + mov ar.lc=r16 // restore lc + br.ret.sptk.few rp + + .endp clear_page diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S new file mode 100644 index 000000000..0db4a78f8 --- /dev/null +++ b/arch/ia64/lib/clear_user.S @@ -0,0 +1,224 @@ +/* + * This routine clears to zero a linear memory buffer in user space. + * + * Inputs: + * in0: address of buffer + * in1: length of buffer in bytes + * Outputs: + * r8: number of bytes that didn't get cleared due to a fault + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + */ + +// +// arguments +// +#define buf r32 +#define len r33 + +// +// local registers +// +#define cnt r16 +#define buf2 r17 +#define saved_lc r18 +#define saved_pr r19 +#define saved_pfs r20 +#define tmp r21 +#define len2 r22 +#define len3 r23 + +// +// Theory of operations: +// - we check whether or not the buffer is small, i.e., less than 17 +// in which case we do the byte by byte loop. +// +// - Otherwise we go progressively from 1 byte store to 8byte store in +// the head part, the body is a 16byte store loop and we finish we the +// tail for the last 15 bytes. +// The good point about this breakdown is that the long buffer handling +// contains only 2 branches. +// +// The reason for not using shifting & masking for both the head and the +// tail is to stay semantically correct. This routine is not supposed +// to write bytes outside of the buffer. While most of the time this would +// be ok, we can't tolerate a mistake. A classical example is the case +// of multithreaded code were to the extra bytes touched is actually owned +// by another thread which runs concurrently to ours. Another, less likely, +// example is with device drivers where reading an I/O mapped location may +// have side effects (same thing for writing). +// + +// The label comes first because our store instruction contains a comma +// and confuse the preprocessor otherwise +// +#define EX(y,x...) \ + .section __ex_table,"a"; \ + data4 @gprel(99f); \ + data4 y-99f; \ + .previous; \ +99: x + + .text + .psr abi64 + .psr lsb + .lsb + + .align 32 + .global __do_clear_user + .proc __do_clear_user + +__do_clear_user: + alloc saved_pfs=ar.pfs,2,0,0,0 + cmp.eq p6,p0=r0,len // check for zero length + mov saved_lc=ar.lc // preserve ar.lc (slow) + ;; // avoid WAW on CFM + adds tmp=-1,len // br.ctop is repeat/until + mov ret0=len // return value is length at this point +(p6) br.ret.spnt.few rp + ;; + cmp.lt p6,p0=16,len // if len > 16 then long memset + mov ar.lc=tmp // initialize lc for small count +(p6) br.cond.dptk.few long_do_clear + ;; // WAR on ar.lc + // + // worst case 16 cyles, avg 8 cycles + // + // We could have played with the predicates to use the extra + // M slot for 2 stores/iteration but the cost the initialization + // the various counters compared to how long the loop is supposed + // to last on average does not make this solution viable. + // +1: + EX( .Lexit1, st1 [buf]=r0,1 ) + adds len=-1,len // countdown length using len + br.cloop.dptk.few 1b + ;; // avoid RAW on ar.lc + // + // .Lexit4: comes from byte by byte loop + // len contains bytes left +.Lexit1: + mov ret0=len // faster than using ar.lc + mov ar.lc=saved_lc + br.ret.sptk.few rp // end of short clear_user + + + // + // At this point we know we have more than 16 bytes to copy + // so we focus on alignment (no branches required) + // + // The use of len/len2 for countdown of the number of bytes left + // instead of ret0 is due to the fact that the exception code + // changes the values of r8. + // +long_do_clear: + tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear) + ;; + EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned +(p6) adds len=-1,len;; // sync because buf is modified + tbit.nz p6,p0=buf,1 + ;; + EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned +(p6) adds len=-2,len;; + tbit.nz p6,p0=buf,2 + ;; + EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned +(p6) adds len=-4,len;; + tbit.nz p6,p0=buf,3 + ;; + EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned +(p6) adds len=-8,len;; + shr.u cnt=len,4 // number of 128-bit (2x64bit) words + ;; + cmp.eq p6,p0=r0,cnt + adds tmp=-1,cnt +(p6) br.cond.dpnt.few .dotail // we have less than 16 bytes left + ;; + adds buf2=8,buf // setup second base pointer + mov ar.lc=tmp + ;; + + // + // 16bytes/iteration core loop + // + // The second store can never generate a fault because + // we come into the loop only when we are 16-byte aligned. + // This means that if we cross a page then it will always be + // in the first store and never in the second. + // + // + // We need to keep track of the remaining length. A possible (optimistic) + // way would be to ue ar.lc and derive how many byte were left by + // doing : left= 16*ar.lc + 16. this would avoid the addition at + // every iteration. + // However we need to keep the synchronization point. A template + // M;;MB does not exist and thus we can keep the addition at no + // extra cycle cost (use a nop slot anyway). It also simplifies the + // (unlikely) error recovery code + // + +2: + + EX(.Lexit3, st8 [buf]=r0,16 ) + ;; // needed to get len correct when error + st8 [buf2]=r0,16 + adds len=-16,len + br.cloop.dptk.few 2b + ;; + mov ar.lc=saved_lc + // + // tail correction based on len only + // + // We alternate the use of len3,len2 to allow parallelism and correct + // error handling. We also reuse p6/p7 to return correct value. + // The addition of len2/len3 does not cost anything more compared to + // the regular memset as we had empty slots. + // +.dotail: + mov len2=len // for parallelization of error handling + mov len3=len + tbit.nz p6,p0=len,3 + ;; + EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes +(p6) adds len3=-8,len2 + tbit.nz p7,p6=len,2 + ;; + EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes +(p7) adds len2=-4,len3 + tbit.nz p6,p7=len,1 + ;; + EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes +(p6) adds len3=-2,len2 + tbit.nz p7,p6=len,0 + ;; + EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left + mov ret0=r0 // success + br.ret.dptk.few rp // end of most likely path + + // + // Outlined error handling code + // + + // + // .Lexit3: comes from core loop, need restore pr/lc + // len contains bytes left + // + // + // .Lexit2: + // if p6 -> coming from st8 or st2 : len2 contains what's left + // if p7 -> coming from st4 or st1 : len3 contains what's left + // We must restore lc/pr even though might not have been used. +.Lexit2: +(p6) mov len=len2 +(p7) mov len=len3 + ;; + // + // .Lexit4: comes from head, need not restore pr/lc + // len contains bytes left + // +.Lexit3: + mov ret0=len + mov ar.lc=saved_lc + br.ret.dptk.few rp + .endp diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S new file mode 100644 index 000000000..0a956e5a2 --- /dev/null +++ b/arch/ia64/lib/copy_page.S @@ -0,0 +1,87 @@ +/* + * + * Optimized version of the standard copy_page() function + * + * Based on comments from ddd. Try not to overflow write buffer. + * + * Inputs: + * in0: address of target page + * in1: address of source page + * Output: + * no return value + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + */ +#include <asm/page.h> + +#define lcount r16 +#define saved_pr r17 +#define saved_lc r18 +#define saved_pfs r19 +#define src1 r20 +#define src2 r21 +#define tgt1 r22 +#define tgt2 r23 + + .text + .psr abi64 + .psr lsb + .lsb + + .align 32 + .global copy_page + .proc copy_page + +copy_page: + alloc saved_pfs=ar.pfs,10,0,0,8 // we need 6 roatating (8 minimum) + // + 2 input + + .rotr t1[4], t2[4] // our 2 pipelines with depth of 4 each + + mov saved_lc=ar.lc // save ar.lc ahead of time + mov saved_pr=pr // rotating predicates are preserved + // resgisters we must save. + mov src1=in1 // initialize 1st stream source + adds src2=8,in1 // initialize 2nd stream source + mov lcount=PAGE_SIZE/16-1 // as many 16bytes as there are on a page + // -1 is because br.ctop is repeat/until + + adds tgt2=8,in0 // initialize 2nd stream target + mov tgt1=in0 // initialize 1st stream target + ;; + mov pr.rot=1<<16 // pr16=1 & pr[17-63]=0 , 63 not modified + + mov ar.lc=lcount // set loop counter + mov ar.ec=4 // ar.ec must match pipeline depth + ;; + + // We need to preload the n-1 stages of the pipeline (n=depth). + // We do this during the "prolog" of the loop: we execute + // n-1 times the "load" bundle. Then both loads & stores are + // enabled until we reach the end of the last word of the page + // on the load side. Then, we enter the epilogue (controlled by ec) + // where we just do the stores and no loads n-1 times : drain the pipe. + // + // The initialization of the prolog is done via the predicate registers: + // the choice of pr19 DEPENDS on the depth of the pipeline (n). + // When lc > 0 pr63=1 and it is fed back into pr16 and pr16-pr62 + // are then shifted right at every iteration, + // Thus by initializing pr16=1 and pr17-19=0 (19=16+4-1) before the loop + // we get pr19=1 after 4 iterations (n in our case). + // +1: // engage loop now, let the magic happen... +(p16) ld8 t1[0]=[src1],16 // new data on top of pipeline in 1st stream +(p16) ld8 t2[0]=[src2],16 // new data on top of pipeline in 2nd stream + nop.i 0x0 +(p19) st8 [tgt1]=t1[3],16 // store top of 1st pipeline +(p19) st8 [tgt2]=t2[3],16 // store top of 2nd pipeline + br.ctop.dptk.few 1b // once lc==0, ec-- & p16=0 + // stores but no loads anymore + ;; + mov pr=saved_pr,0xffffffffffff0000 // restore predicates + mov ar.pfs=saved_pfs // restore ar.ec + mov ar.lc=saved_lc // restore saved lc + br.ret.sptk.few rp // bye... + + .endp copy_page diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S new file mode 100644 index 000000000..03a540a80 --- /dev/null +++ b/arch/ia64/lib/copy_user.S @@ -0,0 +1,71 @@ +/* + * This routine copies a linear memory buffer across the user/kernel boundary. When + * reading a byte from the source causes a fault, the remainder of the destination + * buffer is zeroed out. Note that this can happen only when copying from user + * to kernel memory and we do this to absolutely guarantee that the + * kernel doesn't operate on random data. + * + * This file is derived from arch/alpha/lib/copy_user.S. + * + * Inputs: + * in0: address of destination buffer + * in1: address of source buffer + * in2: length of buffer in bytes + * Outputs: + * r8: number of bytes that didn't get copied due to a fault + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#define EXI(x...) \ +99: x; \ + .section __ex_table,"a"; \ + data4 @gprel(99b); \ + data4 .Lexit_in-99b; \ + .previous + +#define EXO(x...) \ +99: x; \ + .section __ex_table,"a"; \ + data4 @gprel(99b); \ + data4 .Lexit_out-99b; \ + .previous + + .text + .psr abi64 + .psr lsb + .lsb + + .align 32 + .global __copy_user + .proc __copy_user +__copy_user: + alloc r10=ar.pfs,3,0,0,0 + mov r9=ar.lc // save ar.lc + mov ar.lc=in2 // set ar.lc to length of buffer + br.sptk.few .Lentr + + // XXX braindead copy loop---this needs to be optimized +.Loop1: + EXI(ld1 r8=[in1],1) + ;; + EXO(st1 [in0]=r8,1) +.Lentr: br.cloop.dptk.few .Loop1 // repeat unless ar.lc--==0 + ;; // avoid RAW on ar.lc +.Lexit_out: + mov r8=ar.lc // return how many bytes we _didn't_ copy + mov ar.lc=r9 + br.ret.sptk.few rp + +.Lexit_in: + // clear the remainder of the buffer: + mov r8=ar.lc // return how many bytes we _didn't_ copy +.Loop2: + st1 [in0]=r0,1 // this cannot fault because we get here only on user->kernel copies + br.cloop.dptk.few .Loop2 + ;; // avoid RAW on ar.lc + mov ar.lc=r9 + br.ret.sptk.few rp + + .endp __copy_user diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c new file mode 100644 index 000000000..d09f11e21 --- /dev/null +++ b/arch/ia64/lib/csum_partial_copy.c @@ -0,0 +1,165 @@ +/* + * Network Checksum & Copy routine + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + * + * Most of the code has been imported from Linux/Alpha + */ + +#include <linux/types.h> +#include <linux/string.h> + +#include <asm/uaccess.h> + +/* + * XXX Fixme: those 2 inlines are meant for debugging and will go away + */ +static inline unsigned +short from64to16(unsigned long x) +{ + /* add up 32-bit words for 33 bits */ + x = (x & 0xffffffff) + (x >> 32); + /* add up 16-bit and 17-bit words for 17+c bits */ + x = (x & 0xffff) + (x >> 16); + /* add up 16-bit and 2-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +static inline +unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum) +{ + int odd, count; + unsigned long result = (unsigned long)psum; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { + result = *buff << 8; + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { + if (4 & (unsigned long) buff) { + result += *(unsigned int *) buff; + count--; + len -= 4; + buff += 4; + } + count >>= 1; /* nr of 64-bit words.. */ + if (count) { + unsigned long carry = 0; + do { + unsigned long w = *(unsigned long *) buff; + count--; + buff += 8; + result += carry; + result += w; + carry = (w > result); + } while (count); + result += carry; + result = (result & 0xffffffff) + (result >> 32); + } + if (len & 4) { + result += *(unsigned int *) buff; + buff += 4; + } + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) + result += *buff; + + result = from64to16(result); + + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); + +out: + return result; +} + +/* + * XXX Fixme + * + * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS. + * But it's very tricky to get right even in C. + */ +extern unsigned long do_csum(const unsigned char *, int); + +static unsigned int +do_csum_partial_copy_from_user (const char *src, char *dst, int len, + unsigned int psum, int *errp) +{ + const unsigned char *psrc = src; + unsigned long result; + int cplen = len; + int r = 0; + + /* XXX Fixme + * for now we separate the copy from checksum for obvious + * alignment difficulties. Look at the Alpha code and you'll be + * scared. + */ + + while ( cplen-- ) r |=__get_user(*dst++,psrc++); + + if ( r && errp ) *errp = r; + + result = do_csum(src, len); + + /* add in old sum, and carry.. */ + result += psum; + /* 32+c bits -> 32 bits */ + result = (result & 0xffffffff) + (result >> 32); + return result; +} + +unsigned int +csum_partial_copy_from_user(const char *src, char *dst, int len, + unsigned int sum, int *errp) +{ + if (!access_ok(src, len, VERIFY_READ)) { + *errp = -EFAULT; + memset(dst, 0, len); + return sum; + } + + return do_csum_partial_copy_from_user(src, dst, len, sum, errp); +} + +unsigned int +csum_partial_copy_nocheck(const char *src, char *dst, int len, unsigned int sum) +{ + return do_csum_partial_copy_from_user(src, dst, len, sum, NULL); +} + +unsigned int +csum_partial_copy (const char *src, char *dst, int len, unsigned int sum) +{ + unsigned int ret; + int error = 0; + + ret = do_csum_partial_copy_from_user(src, dst, len, sum, &error); + if (error) + printk("csum_partial_copy_old(): tell mingo to convert me!\n"); + + return ret; +} + diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S new file mode 100644 index 000000000..d8174f10a --- /dev/null +++ b/arch/ia64/lib/do_csum.S @@ -0,0 +1,230 @@ +/* + * + * Optmized version of the standard do_csum() function + * + * Return: a 64bit quantity containing the 16bit Internet checksum + * + * Inputs: + * in0: address of buffer to checksum (char *) + * in1: length of the buffer (int) + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + * + */ + +// +// Theory of operations: +// The goal is to go as quickly as possible to the point where +// we can checksum 8 bytes/loop. Before reaching that point we must +// take care of incorrect alignment of first byte. +// +// The code hereafter also takes care of the "tail" part of the buffer +// before entering the core loop, if any. The checksum is a sum so it +// allows us to commute operations. So we do do the "head" and "tail" +// first to finish at full speed in the body. Once we get the head and +// tail values, we feed them into the pipeline, very handy initialization. +// +// Of course we deal with the special case where the whole buffer fits +// into one 8 byte word. In this case we have only one entry in the pipeline. +// +// We use a (3+1)-stage pipeline in the loop to account for possible +// load latency and also to accomodate for head and tail. +// +// The end of the function deals with folding the checksum from 64bits +// down to 16bits taking care of the carry. +// +// This version avoids synchronization in the core loop by also using a +// pipeline for the accumulation of the checksum in result[]. +// +// p[] +// |---| +// 0| | r32 : new value loaded in pipeline +// |---| +// 1| | r33 : in transit data +// |---| +// 2| | r34 : current value to add to checksum +// |---| +// 3| | r35 : previous value added to checksum (previous iteration) +// |---| +// +// result[] +// |---| +// 0| | r36 : new checksum +// |---| +// 1| | r37 : previous value of checksum +// |---| +// 2| | r38 : final checksum when out of the loop (after 2 epilogue rots) +// |---| +// +// +// NOT YET DONE: +// - Take advantage of the MMI bandwidth to load more than 8byte per loop +// iteration +// - use the lfetch instruction to augment the chances of the data being in +// the cache when we need it. +// - Maybe another algorithm which would take care of the folding at the +// end in a different manner +// - Work with people more knowledgeable than me on the network stack +// to figure out if we could not split the function depending on the +// type of packet or alignment we get. Like the ip_fast_csum() routine +// where we know we have at least 20bytes worth of data to checksum. +// - Look at RFCs about checksums to see whether or not we can do better +// +// - Do a better job of handling small packets. +// +#define saved_pfs r11 +#define hmask r16 +#define tmask r17 +#define first r18 +#define firstval r19 +#define firstoff r20 +#define last r21 +#define lastval r22 +#define lastoff r23 +#define saved_lc r24 +#define saved_pr r25 +#define tmp1 r26 +#define tmp2 r27 +#define tmp3 r28 +#define carry r29 + +#define buf in0 +#define len in1 + + + .text + .psr abi64 + .psr lsb + .lsb + +// unsigned long do_csum(unsigned char *buf,int len) + + .align 32 + .global do_csum + .proc do_csum +do_csum: + alloc saved_pfs=ar.pfs,2,8,0,8 + + .rotr p[4], result[3] + mov ret0=r0 // in case we have zero length + cmp4.lt p0,p6=r0,len // check for zero length or negative (32bit len) + ;; // avoid WAW on CFM + mov tmp3=0x7 // a temporary mask/value + add tmp1=buf,len // last byte's address +(p6) br.ret.spnt.few rp // return if true (hope we can avoid that) + + and firstoff=7,buf // how many bytes off for first element + tbit.nz p10,p0=buf,0 // is buf an odd address ? + mov hmask=-1 // intialize head mask + ;; + + andcm first=buf,tmp3 // 8byte aligned down address of first element + mov tmask=-1 // initialize tail mask + adds tmp2=-1,tmp1 // last-1 + ;; + and lastoff=7,tmp1 // how many bytes off for last element + andcm last=tmp2,tmp3 // address of word containing last byte + mov saved_pr=pr // preserve predicates (rotation) + ;; + sub tmp3=last,first // tmp3=distance from first to last + cmp.eq p8,p9=last,first // everything fits in one word ? + sub tmp1=8,lastoff // complement to lastoff + + ld8 firstval=[first],8 // load,ahead of time, "first" word + shl tmp2=firstoff,3 // number of bits + ;; + and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0 + +(p9) ld8 lastval=[last] // load,ahead of time, "last" word, if needed +(p8) mov lastval=r0 // we don't need lastval if first==last + mov result[1]=r0 // initialize result + ;; + + shl tmp1=tmp1,3 // number of bits + shl hmask=hmask,tmp2 // build head mask, mask off [0,firstoff[ + ;; + shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff] + mov saved_lc=ar.lc // save lc + ;; +(p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only +(p9) and p[1]=lastval,tmask // mask last it as appropriate + shr.u tmp3=tmp3,3 // we do 8 bytes per loop + ;; + cmp.lt p6,p7=2,tmp3 // tmp3 > 2 ? + and p[2]=firstval,hmask // and mask it as appropriate + add tmp1=-2,tmp3 // -2 = -1 (br.ctop) -1 (last-first) + ;; + // XXX Fixme: not very nice initialization here + // + // Setup loop control registers: + // + // tmp3=0 (1 word) : lc=0, ec=2, p16=F + // tmp3=1 (2 words) : lc=0, ec=3, p16=F + // tmp3=2 (3 words) : lc=0, ec=4, p16=T + // tmp3>2 (4 or more): lc=tmp3-2, ec=4, p16=T + // + cmp.eq p8,p9=r0,tmp3 // tmp3 == 0 ? +(p6) mov ar.lc=tmp1 +(p7) mov ar.lc=0 + ;; + cmp.lt p6,p7=1,tmp3 // tmp3 > 1 ? +(p8) mov ar.ec=2 // we need the extra rotation on result[] +(p9) mov ar.ec=3 // hard not to set it twice sometimes + ;; + mov carry=r0 // initialize carry +(p6) mov ar.ec=4 +(p6) mov pr.rot=0xffffffffffff0000 // p16=T, p18=T + + cmp.ne p8,p0=r0,r0 // p8 is false + mov p[3]=r0 // make sure first compare fails +(p7) mov pr.rot=0xfffffffffffe0000 // p16=F, p18=T + ;; +1: +(p16) ld8 p[0]=[first],8 // load next +(p8) adds carry=1,carry // add carry on prev_prev_value +(p18) add result[0]=result[1],p[2] // new_res = prev_res + cur_val + cmp.ltu p8,p0=result[1],p[3] // p8= prev_result < prev_val + br.ctop.dptk.few 1b // loop until lc--==0 + ;; // RAW on carry when loop exits + (p8) adds carry=1,carry;; // correct for carry on prev_value + add result[2]=carry,result[2];; // add carry to final result + cmp.ltu p6,p7=result[2], carry // check for new carry + ;; +(p6) adds result[2]=1,result[1] // correct if required + movl tmp3=0xffffffff + ;; + // XXX Fixme + // + // now fold 64 into 16 bits taking care of carry + // that's not very good because it has lots of sequentiality + // + and tmp1=result[2],tmp3 + shr.u tmp2=result[2],32 + ;; + add result[2]=tmp1,tmp2 + shr.u tmp3=tmp3,16 + ;; + and tmp1=result[2],tmp3 + shr.u tmp2=result[2],16 + ;; + add result[2]=tmp1,tmp2 + ;; + and tmp1=result[2],tmp3 + shr.u tmp2=result[2],16 + ;; + add result[2]=tmp1,tmp2 + ;; + and tmp1=result[2],tmp3 + shr.u tmp2=result[2],16 + ;; + add ret0=tmp1,tmp2 + mov pr=saved_pr,0xffffffffffff0000 + ;; + // if buf was odd then swap bytes + mov ar.pfs=saved_pfs // restore ar.ec +(p10) mux1 ret0=ret0,@rev // reverse word + ;; + mov ar.lc=saved_lc +(p10) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes + br.ret.sptk.few rp diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S new file mode 100644 index 000000000..0195ae5f5 --- /dev/null +++ b/arch/ia64/lib/flush.S @@ -0,0 +1,37 @@ +/* + * Cache flushing routines. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <asm/page.h> + + .text + .psr abi64 + .psr lsb + .lsb + + .align 16 + .global ia64_flush_icache_page + .proc ia64_flush_icache_page +ia64_flush_icache_page: + alloc r2=ar.pfs,1,0,0,0 + mov r3=ar.lc // save ar.lc + mov r8=PAGE_SIZE/64-1 // repeat/until loop + ;; + mov ar.lc=r8 + add r8=32,in0 + ;; +.Loop1: fc in0 // issuable on M0 only + add in0=64,in0 + fc r8 + add r8=64,r8 + br.cloop.sptk.few .Loop1 + ;; + sync.i + ;; + srlz.i + ;; + mov ar.lc=r3 // restore ar.lc + br.ret.sptk.few rp + .endp ia64_flush_icache_page diff --git a/arch/ia64/lib/idiv.S b/arch/ia64/lib/idiv.S new file mode 100644 index 000000000..a12097c94 --- /dev/null +++ b/arch/ia64/lib/idiv.S @@ -0,0 +1,158 @@ +/* + * Integer division routine. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +/* Simple integer division. It uses the straight forward division + algorithm. This may not be the absolutely fastest way to do it, + but it's not horrible either. According to ski, the worst case + scenario of dividing 0xffffffffffffffff by 1 takes 133 cycles. + + An alternative would be to use an algorithm similar to the + floating point division algorithm (Newton-Raphson iteration), + but that approach is rather tricky (one has to be very careful + to get the last bit right...). + + While this algorithm is straight-forward, it does use a couple + of neat ia-64 specific tricks: + + - it uses the floating point unit to determine the initial + shift amount (shift = floor(ld(x)) - floor(ld(y))) + + - it uses predication to avoid a branch in the case where + x < y (this is what p8 is used for) + + - it uses rotating registers and the br.ctop branch to + implement a software-pipelined loop that's unrolled + twice (without any code expansion!) + + - the code is relatively well scheduled to avoid unnecessary + nops while maximizing parallelism +*/ + +#include <asm/break.h> + + .text + .psr abi64 +#ifdef __BIG_ENDIAN__ + .psr msb + .msb +#else + .psr lsb + .lsb +#endif + +#ifdef MODULO +# define OP mod +# define Q r9 +# define R r8 +#else +# define OP div +# define Q r8 +# define R r9 +#endif + +#ifdef SINGLE +# define PREC si +#else +# define PREC di +#endif + +#ifdef UNSIGNED +# define SGN u +# define INT_TO_FP(a,b) fma.s0 a=b,f1,f0 +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s0 a=b +#else +# define SGN +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s0 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,PASTE(PREC,3))) + + .align 32 + .global NAME + .proc NAME +NAME: + + alloc r2=ar.pfs,2,6,0,8 + mov r18=pr +#ifdef SINGLE +# ifdef UNSIGNED + zxt4 in0=in0 + zxt4 in1=in1 +# else + sxt4 in0=in0 + sxt4 in1=in1 +# endif + ;; +#endif + +#ifndef UNSIGNED + cmp.lt p6,p0=in0,r0 // x negative? + cmp.lt p7,p0=in1,r0 // y negative? + ;; +(p6) sub in0=r0,in0 // make x positive +(p7) sub in1=r0,in1 // ditto for y + ;; +#endif + + setf.sig f8=in0 + mov r3=ar.lc // save ar.lc + setf.sig f9=in1 + ;; + mov Q=0 // initialize q + mov R=in0 // stash away x in a static register + mov r16=1 // r16 = 1 + INT_TO_FP(f8,f8) + cmp.eq p8,p0=0,in0 // x==0? + cmp.eq p9,p0=0,in1 // y==0? + ;; + INT_TO_FP(f9,f9) +(p8) br.dpnt.few .L3 +(p9) break __IA64_BREAK_KDB // attempted division by zero (should never happen) + mov ar.ec=r0 // epilogue count = 0 + ;; + getf.exp r14=f8 // r14 = exponent of x + getf.exp r15=f9 // r15 = exponent of y + mov ar.lc=r0 // loop count = 0 + ;; + sub r17=r14,r15 // r17 = (exp of x - exp y) = shift amount + cmp.ge p8,p0=r14,r15 + ;; + + .rotr y[2], mask[2] // in0 and in1 may no longer be valid after + // the first write to a rotating register! + +(p8) shl y[1]=in1,r17 // y[1] = y<<shift +(p8) shl mask[1]=r16,r17 // mask[1] = 1<<shift + +(p8) mov ar.lc=r17 // loop count = r17 + ;; +.L1: +(p8) cmp.geu.unc p9,p0=R,y[1]// p9 = (x >= y[1]) +(p8) shr.u mask[0]=mask[1],1 // prepare mask[0] and y[0] for next +(p8) shr.u y[0]=y[1],1 // iteration + ;; +(p9) sub R=R,y[1] // if (x >= y[1]), subtract y[1] from x +(p9) add Q=Q,mask[1] // and set corresponding bit in q (Q) + br.ctop.dptk.few .L1 // repeated unless ar.lc-- == 0 + ;; +.L2: +#ifndef UNSIGNED +# ifdef MODULO +(p6) sub R=r0,R // set sign of remainder according to x +# else +(p6) sub Q=r0,Q // set sign of quotient + ;; +(p7) sub Q=r0,Q +# endif +#endif +.L3: + mov ar.pfs=r2 // restore ar.pfs + mov ar.lc=r3 // restore ar.lc + mov pr=r18,0xffffffffffff0000 // restore p16-p63 + br.ret.sptk.few rp diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S new file mode 100644 index 000000000..595720a2d --- /dev/null +++ b/arch/ia64/lib/memset.S @@ -0,0 +1,111 @@ +/* + * + * Optimized version of the standard memset() function + * + * Return: none + * + * + * Inputs: + * in0: address of buffer + * in1: byte value to use for storing + * in2: length of the buffer + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + */ + + +// arguments +// +#define buf r32 +#define val r33 +#define len r34 + +// +// local registers +// +#define saved_pfs r14 +#define cnt r18 +#define buf2 r19 +#define saved_lc r20 +#define saved_pr r21 +#define tmp r22 + + .text + .psr abi64 + .psr lsb + + .align 16 + .global memset + .proc memset + +memset: + alloc saved_pfs=ar.pfs,3,0,0,0 // cnt is sink here + cmp.eq p8,p0=r0,len // check for zero length + mov saved_lc=ar.lc // preserve ar.lc (slow) + ;; + adds tmp=-1,len // br.ctop is repeat/until + tbit.nz p6,p0=buf,0 // odd alignment +(p8) br.ret.spnt.few rp + + cmp.lt p7,p0=16,len // if len > 16 then long memset + mux1 val=val,@brcst // prepare value +(p7) br.cond.dptk.few long_memset + ;; + mov ar.lc=tmp // initialize lc for small count + ;; // avoid RAW and WAW on ar.lc +1: // worst case 15 cyles, avg 8 cycles + st1 [buf]=val,1 + br.cloop.dptk.few 1b + ;; // avoid RAW on ar.lc + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.few rp // end of short memset + + // at this point we know we have more than 16 bytes to copy + // so we focus on alignment +long_memset: +(p6) st1 [buf]=val,1 // 1-byte aligned +(p6) adds len=-1,len;; // sync because buf is modified + tbit.nz p6,p0=buf,1 + ;; +(p6) st2 [buf]=val,2 // 2-byte aligned +(p6) adds len=-2,len;; + tbit.nz p6,p0=buf,2 + ;; +(p6) st4 [buf]=val,4 // 4-byte aligned +(p6) adds len=-4,len;; + tbit.nz p6,p0=buf,3 + ;; +(p6) st8 [buf]=val,8 // 8-byte aligned +(p6) adds len=-8,len;; + shr.u cnt=len,4 // number of 128-bit (2x64bit) words + ;; + cmp.eq p6,p0=r0,cnt + adds tmp=-1,cnt +(p6) br.cond.dpnt.few .dotail // we have less than 16 bytes left + ;; + adds buf2=8,buf // setup second base pointer + mov ar.lc=tmp + ;; +2: // 16bytes/iteration + st8 [buf]=val,16 + st8 [buf2]=val,16 + br.cloop.dptk.few 2b + ;; +.dotail: // tail correction based on len only + tbit.nz p6,p0=len,3 + ;; +(p6) st8 [buf]=val,8 // at least 8 bytes + tbit.nz p6,p0=len,2 + ;; +(p6) st4 [buf]=val,4 // at least 4 bytes + tbit.nz p6,p0=len,1 + ;; +(p6) st2 [buf]=val,2 // at least 2 bytes + tbit.nz p6,p0=len,0 + mov ar.lc=saved_lc + ;; +(p6) st1 [buf]=val // only 1 byte left + br.ret.dptk.few rp + .endp diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S new file mode 100644 index 000000000..3062716b1 --- /dev/null +++ b/arch/ia64/lib/strlen.S @@ -0,0 +1,197 @@ +/* + * + * Optimized version of the standard strlen() function + * + * + * Inputs: + * in0 address of string + * + * Outputs: + * ret0 the number of characters in the string (0 if empty string) + * does not count the \0 + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + * + * 09/24/99 S.Eranian add speculation recovery code + */ + +// +// +// This is an enhanced version of the basic strlen. it includes a combination +// of compute zero index (czx), parallel comparisons, speculative loads and +// loop unroll using rotating registers. +// +// General Ideas about the algorithm: +// The goal is to look at the string in chunks of 8 bytes. +// so we need to do a few extra checks at the beginning because the +// string may not be 8-byte aligned. In this case we load the 8byte +// quantity which includes the start of the string and mask the unused +// bytes with 0xff to avoid confusing czx. +// We use speculative loads and software pipelining to hide memory +// latency and do read ahead safely. This way we defer any exception. +// +// Because we don't want the kernel to be relying on particular +// settings of the DCR register, we provide recovery code in case +// speculation fails. The recovery code is going to "redo" the work using +// only normal loads. If we still get a fault then we generate a +// kernel panic. Otherwise we return the strlen as usual. +// +// The fact that speculation may fail can be caused, for instance, by +// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., +// a NaT bit will be set if the translation is not present. The normal +// load, on the other hand, will cause the translation to be inserted +// if the mapping exists. +// +// It should be noted that we execute recovery code only when we need +// to use the data that has been speculatively loaded: we don't execute +// recovery code on pure read ahead data. +// +// Remarks: +// - the cmp r0,r0 is used as a fast way to initialize a predicate +// register to 1. This is required to make sure that we get the parallel +// compare correct. +// +// - we don't use the epilogue counter to exit the loop but we need to set +// it to zero beforehand. +// +// - after the loop we must test for Nat values because neither the +// czx nor cmp instruction raise a NaT consumption fault. We must be +// careful not to look too far for a Nat for which we don't care. +// For instance we don't need to look at a NaT in val2 if the zero byte +// was in val1. +// +// - Clearly performance tuning is required. +// +// +// +#define saved_pfs r11 +#define tmp r10 +#define base r16 +#define orig r17 +#define saved_pr r18 +#define src r19 +#define mask r20 +#define val r21 +#define val1 r22 +#define val2 r23 + + + .text + .psr abi64 + .psr lsb + .lsb + + .align 32 + .global strlen + .proc strlen +strlen: + alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8 + + .rotr v[2], w[2] // declares our 4 aliases + + extr.u tmp=in0,0,3 // tmp=least significant 3 bits + mov orig=in0 // keep trackof initial byte address + dep src=0,in0,0,3 // src=8byte-aligned in0 address + mov saved_pr=pr // preserve predicates (rotation) + ;; + ld8 v[1]=[src],8 // must not speculate: can fail here + shl tmp=tmp,3 // multiply by 8bits/byte + mov mask=-1 // our mask + ;; + ld8.s w[1]=[src],8 // speculatively load next + cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and + sub tmp=64,tmp // how many bits to shift our mask on the right + ;; + shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part + mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) + ;; + add base=-16,src // keep track of aligned base + or v[1]=v[1],mask // now we have a safe initial byte pattern + ;; +1: + ld8.s v[0]=[src],8 // speculatively load next + czx1.r val1=v[1] // search 0 byte from right + czx1.r val2=w[1] // search 0 byte from right following 8bytes + ;; + ld8.s w[0]=[src],8 // speculatively load next to next + cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 + cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 +(p6) br.wtop.dptk.few 1b // loop until p6 == 0 + ;; + // + // We must return try the recovery code iff + // val1_is_nat || (val1==8 && val2_is_nat) + // + // XXX Fixme + // - there must be a better way of doing the test + // + cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) +#ifdef notyet + tnat.nz p6,p7=val1 // test NaT on val1 +#else + tnat.z p7,p6=val1 // test NaT on val1 +#endif +(p6) br.cond.spnt.few recover// jump to recovery if val1 is NaT + ;; + // + // if we come here p7 is true, i.e., initialized for // cmp + // + cmp.eq.and p7,p0=8,val1// val1==8? + tnat.nz.and p7,p0=val2 // test NaT if val2 +(p7) br.cond.spnt.few recover// jump to recovery if val2 is NaT + ;; +(p8) mov val1=val2 // the other test got us out of the loop +(p8) adds src=-16,src // correct position when 3 ahead +(p9) adds src=-24,src // correct position when 4 ahead + ;; + sub ret0=src,orig // distance from base + sub tmp=8,val1 // which byte in word + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // adjust + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.few rp // end of normal execution + + // + // Outlined recovery code when speculation failed + // + // This time we don't use speculation and rely on the normal exception + // mechanism. that's why the loop is not as good as the previous one + // because read ahead is not possible + // + // IMPORTANT: + // Please note that in the case of strlen() as opposed to strlen_user() + // we don't use the exception mechanism, as this function is not + // supposed to fail. If that happens it means we have a bug and the + // code will cause of kernel fault. + // + // XXX Fixme + // - today we restart from the beginning of the string instead + // of trying to continue where we left off. + // +recover: + ld8 val=[base],8 // will fail if unrecoverable fault + ;; + or val=val,mask // remask first bytes + cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop + ;; + // + // ar.ec is still zero here + // +2: +(p6) ld8 val=[base],8 // will fail if unrecoverable fault + ;; + czx1.r val1=val // search 0 byte from right + ;; + cmp.eq p6,p0=8,val1 // val1==8 ? +(p6) br.wtop.dptk.few 2b // loop until p6 == 0 + sub ret0=base,orig // distance from base + sub tmp=8,val1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.few rp // end of sucessful recovery code + + .endp strlen diff --git a/arch/ia64/lib/strlen_user.S b/arch/ia64/lib/strlen_user.S new file mode 100644 index 000000000..8149dde8a --- /dev/null +++ b/arch/ia64/lib/strlen_user.S @@ -0,0 +1,213 @@ +/* + * Optimized version of the strlen_user() function + * + * Inputs: + * in0 address of buffer + * + * Outputs: + * ret0 0 in case of fault, strlen(buffer)+1 otherwise + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com> + * + * 01/19/99 S.Eranian heavily enhanced version (see details below) + * 09/24/99 S.Eranian added speculation recovery code + */ + +// +// int strlen_user(char *) +// ------------------------ +// Returns: +// - length of string + 1 +// - 0 in case an exception is raised +// +// This is an enhanced version of the basic strlen_user. it includes a +// combination of compute zero index (czx), parallel comparisons, speculative +// loads and loop unroll using rotating registers. +// +// General Ideas about the algorithm: +// The goal is to look at the string in chunks of 8 bytes. +// so we need to do a few extra checks at the beginning because the +// string may not be 8-byte aligned. In this case we load the 8byte +// quantity which includes the start of the string and mask the unused +// bytes with 0xff to avoid confusing czx. +// We use speculative loads and software pipelining to hide memory +// latency and do read ahead safely. This way we defer any exception. +// +// Because we don't want the kernel to be relying on particular +// settings of the DCR register, we provide recovery code in case +// speculation fails. The recovery code is going to "redo" the work using +// only normal loads. If we still get a fault then we return an +// error (ret0=0). Otherwise we return the strlen+1 as usual. +// The fact that speculation may fail can be caused, for instance, by +// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., +// a NaT bit will be set if the translation is not present. The normal +// load, on the other hand, will cause the translation to be inserted +// if the mapping exists. +// +// It should be noted that we execute recovery code only when we need +// to use the data that has been speculatively loaded: we don't execute +// recovery code on pure read ahead data. +// +// Remarks: +// - the cmp r0,r0 is used as a fast way to initialize a predicate +// register to 1. This is required to make sure that we get the parallel +// compare correct. +// +// - we don't use the epilogue counter to exit the loop but we need to set +// it to zero beforehand. +// +// - after the loop we must test for Nat values because neither the +// czx nor cmp instruction raise a NaT consumption fault. We must be +// careful not to look too far for a Nat for which we don't care. +// For instance we don't need to look at a NaT in val2 if the zero byte +// was in val1. +// +// - Clearly performance tuning is required. +// +// +// + +#define EX(y,x...) \ + .section __ex_table,"a"; \ + data4 @gprel(99f); \ + data4 y-99f; \ + .previous; \ +99: x + +#define saved_pfs r11 +#define tmp r10 +#define base r16 +#define orig r17 +#define saved_pr r18 +#define src r19 +#define mask r20 +#define val r21 +#define val1 r22 +#define val2 r23 + + + .text + .psr abi64 + .psr lsb + .lsb + + .align 32 + .global __strlen_user + .proc __strlen_user +__strlen_user: + alloc saved_pfs=ar.pfs,11,0,0,8 + + .rotr v[2], w[2] // declares our 4 aliases + + extr.u tmp=in0,0,3 // tmp=least significant 3 bits + mov orig=in0 // keep trackof initial byte address + dep src=0,in0,0,3 // src=8byte-aligned in0 address + mov saved_pr=pr // preserve predicates (rotation) + ;; + ld8.s v[1]=[src],8 // load the initial 8bytes (must speculate) + shl tmp=tmp,3 // multiply by 8bits/byte + mov mask=-1 // our mask + ;; + ld8.s w[1]=[src],8 // load next 8 bytes in 2nd pipeline + cmp.eq p6,p0=r0,r0 // sets p6 (required because of // cmp.and) + sub tmp=64,tmp // how many bits to shift our mask on the right + ;; + shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part + mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) + ;; + add base=-16,src // keep track of aligned base + chk.s v[1], recover // if already NaT, then directly skip to recover + or v[1]=v[1],mask // now we have a safe initial byte pattern + ;; +1: + ld8.s v[0]=[src],8 // speculatively load next + czx1.r val1=v[1] // search 0 byte from right + czx1.r val2=w[1] // search 0 byte from right following 8bytes + ;; + ld8.s w[0]=[src],8 // speculatively load next to next + cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 + cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 +(p6) br.wtop.dptk.few 1b // loop until p6 == 0 + ;; + // + // We must return try the recovery code iff + // val1_is_nat || (val1==8 && val2_is_nat) + // + // XXX Fixme + // - there must be a better way of doing the test + // + cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) +#ifdef notyet + tnat.nz p6,p7=val1 // test NaT on val1 +#else + tnat.z p7,p6=val1 // test NaT on val1 +#endif +(p6) br.cond.spnt.few recover// jump to recovery if val1 is NaT + ;; + // + // if we come here p7 is true, i.e., initialized for // cmp + // + cmp.eq.and p7,p0=8,val1// val1==8? + tnat.nz.and p7,p0=val2 // test NaT if val2 +(p7) br.cond.spnt.few recover// jump to recovery if val2 is NaT + ;; +(p8) mov val1=val2 // val2 contains the value +(p8) adds src=-16,src // correct position when 3 ahead +(p9) adds src=-24,src // correct position when 4 ahead + ;; + sub ret0=src,orig // distance from origin + sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.few rp // end of normal execution + + // + // Outlined recovery code when speculation failed + // + // This time we don't use speculation and rely on the normal exception + // mechanism. that's why the loop is not as good as the previous one + // because read ahead is not possible + // + // XXX Fixme + // - today we restart from the beginning of the string instead + // of trying to continue where we left off. + // +recover: + EX(.Lexit1, ld8 val=[base],8) // load the initial bytes + ;; + or val=val,mask // remask first bytes + cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop + ;; + // + // ar.ec is still zero here + // +2: + EX(.Lexit1, (p6) ld8 val=[base],8) + ;; + czx1.r val1=val // search 0 byte from right + ;; + cmp.eq p6,p0=8,val1 // val1==8 ? +(p6) br.wtop.dptk.few 2b // loop until p6 == 0 + ;; + sub ret0=base,orig // distance from base + sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.few rp // end of sucessful recovery code + + // + // We failed even on the normal load (called from exception handler) + // +.Lexit1: + mov ret0=0 + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.few rp + + .endp __strlen_user diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S new file mode 100644 index 000000000..17f71f1a0 --- /dev/null +++ b/arch/ia64/lib/strncpy_from_user.S @@ -0,0 +1,53 @@ +/* + * Just like strncpy() except for the return value. If no fault occurs during + * the copying, the number of bytes copied is returned. If a fault occurs, + * -EFAULT is returned. + * + * Inputs: + * in0: address of destination buffer + * in1: address of string to be copied + * in2: length of buffer in bytes + * Outputs: + * r8: -EFAULT in case of fault or number of bytes copied if no fault + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#define EX(x...) \ +99: x; \ + .section __ex_table,"a"; \ + data4 @gprel(99b); \ + data4 .Lexit-99b; \ + .previous + + .text + .psr abi64 + .psr lsb + .lsb + + .align 32 + .global __strncpy_from_user + .proc __strncpy_from_user +__strncpy_from_user: + alloc r11=ar.pfs,3,0,0,0 + mov r9=in1 + add r10=in1,in2 + + // XXX braindead copy loop---this needs to be optimized +.Loop1: + EX(ld1 r8=[in1],1) + ;; + st1 [in0]=r8,1 + cmp.ltu p6,p0=in1,r10 + ;; +(p6) cmp.ne.and p6,p0=r8,r0 + ;; +(p6) br.cond.dpnt.few .Loop1 + +1: sub r8=in1,r9 // length of string (including NUL character) +.Lexit: + mov ar.pfs=r11 + br.ret.sptk.few rp + + .endp __strncpy_from_user diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S new file mode 100644 index 000000000..c227a9003 --- /dev/null +++ b/arch/ia64/lib/strnlen_user.S @@ -0,0 +1,55 @@ +/* + * Returns 0 if exception before NUL or reaching the supplied limit (N), + * a value greater than N if the string is longer than the limit, else + * strlen. + * + * Inputs: + * in0: address of buffer + * in1: string length limit N + * Outputs: + * r8: 0 in case of fault, strlen(buffer)+1 otherwise + * + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +/* If a fault occurs, r8 gets set to -EFAULT and r9 gets cleared. */ +#define EX(x...) \ + .section __ex_table,"a"; \ + data4 @gprel(99f); \ + data4 (.Lexit-99f)|1; \ + .previous \ +99: x; + + .text + .psr abi64 + .psr lsb + .lsb + + .align 32 + .global __strnlen_user + .proc __strnlen_user +__strnlen_user: + alloc r2=ar.pfs,2,0,0,0 + mov r16=ar.lc // preserve ar.lc + add r3=-1,in1 + ;; + mov ar.lc=r3 + mov r9=0 + + // XXX braindead strlen loop---this needs to be optimized +.Loop1: + EX(ld1 r8=[in0],1) + add r9=1,r9 + ;; + cmp.eq p6,p0=r8,r0 +(p6) br.dpnt.few .Lexit + br.cloop.dptk.few .Loop1 + + add r9=1,in1 // NUL not found---return N+1 + ;; +.Lexit: + mov r8=r9 + mov ar.lc=r16 // restore ar.lc + br.ret.sptk.few rp + + .endp __strnlen_user diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile new file mode 100644 index 000000000..ab2b95cf9 --- /dev/null +++ b/arch/ia64/mm/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for the ia64-specific parts of the memory manager. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +O_TARGET := mm.o +#O_OBJS := ioremap.o +O_OBJS := init.o fault.o tlb.o extable.o + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c new file mode 100644 index 000000000..bee64e0e3 --- /dev/null +++ b/arch/ia64/mm/extable.c @@ -0,0 +1,68 @@ +/* + * Kernel exception handling table support. Derived from arch/alpha/mm/extable.c. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <asm/uaccess.h> + +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; + +static inline const struct exception_table_entry * +search_one_table (const struct exception_table_entry *first, + const struct exception_table_entry *last, + signed long value) +{ + /* Abort early if the search value is out of range. */ + if (value != (signed int)value) + return 0; + + while (first <= last) { + const struct exception_table_entry *mid; + long diff; + /* + * We know that first and last are both kernel virtual + * pointers (region 7) so first+last will cause an + * overflow. We fix that by calling __va() on the + * result, which will ensure that the top two bits get + * set again. + */ + mid = (void *) __va((((__u64) first + (__u64) last)/2/sizeof(*mid))*sizeof(*mid)); + diff = mid->addr - value; + if (diff == 0) + return mid; + else if (diff < 0) + first = mid+1; + else + last = mid-1; + } + return 0; +} + +register unsigned long gp __asm__("gp"); + +const struct exception_table_entry * +search_exception_table (unsigned long addr) +{ +#ifndef CONFIG_MODULE + /* There is only the kernel to search. */ + return search_one_table(__start___ex_table, __stop___ex_table - 1, addr - gp); +#else + struct exception_table_entry *ret; + /* The kernel is the last "module" -- no need to treat it special. */ + struct module *mp; + + for (mp = module_list; mp ; mp = mp->next) { + if (!mp->ex_table_start) + continue; + ret = search_one_table(mp->ex_table_start, mp->ex_table_end - 1, addr - mp->gp); + if (ret) + return ret; + } + return 0; +#endif +} diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c new file mode 100644 index 000000000..99cf5048c --- /dev/null +++ b/arch/ia64/mm/fault.c @@ -0,0 +1,164 @@ +/* + * MMU fault handling support. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> + +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/hardirq.h> + +extern void die_if_kernel (char *, struct pt_regs *, long); + +/* + * This routine is analogous to expand_stack() but instead grows the + * register backing store (which grows towards higher addresses). + * Since the register backing store is access sequentially, we + * disallow growing the RBS by more than a page at a time. Note that + * the VM_GROWSUP flag can be set on any VM area but that's fine + * because the total process size is still limited by RLIMIT_STACK and + * RLIMIT_AS. + */ +static inline long +expand_backing_store (struct vm_area_struct *vma, unsigned long address) +{ + unsigned long grow; + + grow = PAGE_SIZE >> PAGE_SHIFT; + if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur + || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur)) + return -ENOMEM; + vma->vm_end += PAGE_SIZE; + vma->vm_mm->total_vm += grow; + if (vma->vm_flags & VM_LOCKED) + vma->vm_mm->locked_vm += grow; + return 0; +} + +void +ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) +{ + struct mm_struct *mm = current->mm; + const struct exception_table_entry *fix; + struct vm_area_struct *vma, *prev_vma; + struct siginfo si; + int signal = SIGSEGV; + unsigned long mask; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_interrupt() || !mm) + goto no_context; + + down(&mm->mmap_sem); + + vma = find_vma_prev(mm, address, &prev_vma); + if (!vma) + goto bad_area; + + /* find_vma_prev() returns vma such that address < vma->vm_end or NULL */ + if (address < vma->vm_start) + goto check_expansion; + + good_area: + /* OK, we've got a good vm_area for this memory area. Check the access permissions: */ + +# define VM_READ_BIT 0 +# define VM_WRITE_BIT 1 +# define VM_EXEC_BIT 2 + +# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \ + || (1 << VM_EXEC_BIT) != VM_EXEC) +# error File is out of sync with <linux/mm.h>. Pleaes update. +# endif + + mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) + | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT) + | (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT)); + + if ((vma->vm_flags & mask) != mask) + goto bad_area; + + /* + * If for any reason at all we couldn't handle the fault, make + * sure we exit gracefully rather than endlessly redo the + * fault. + */ + if (!handle_mm_fault(current, vma, address, (isr & IA64_ISR_W) != 0)) { + /* + * We ran out of memory, or some other thing happened + * to us that made us unable to handle the page fault + * gracefully. + */ + signal = SIGBUS; + goto bad_area; + } + up(&mm->mmap_sem); + return; + + check_expansion: + if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; + } else if (expand_backing_store(prev_vma, address)) + goto bad_area; + goto good_area; + + bad_area: + up(&mm->mmap_sem); + if (isr & IA64_ISR_SP) { + /* + * This fault was due to a speculative load set the + * "ed" bit in the psr to ensure forward progress + * (target register will get a NaT). + */ + ia64_psr(regs)->ed = 1; + return; + } + if (user_mode(regs)) { +#if 0 +printk("%s(%d): segfault accessing %lx\n", current->comm, current->pid, address); +show_regs(regs); +#endif + si.si_signo = signal; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_addr = (void *) address; + force_sig_info(SIGSEGV, &si, current); + return; + } + + no_context: + fix = search_exception_table(regs->cr_iip); + if (fix) { + regs->r8 = -EFAULT; + if (fix->skip & 1) { + regs->r9 = 0; + } + regs->cr_iip += ((long) fix->skip) & ~15; + regs->cr_ipsr &= ~IA64_PSR_RI; /* clear exception slot number */ + return; + } + + /* + * Oops. The kernel tried to access some bad page. We'll have + * to terminate things with extreme prejudice. + */ + printk(KERN_ALERT "Unable to handle kernel paging request at " + "virtual address %016lx\n", address); + die_if_kernel("Oops", regs, isr); + do_exit(SIGKILL); + return; +} diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c new file mode 100644 index 000000000..388f1fe0c --- /dev/null +++ b/arch/ia64/mm/init.c @@ -0,0 +1,461 @@ +/* + * Initialize MMU support. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> + +#include <linux/bootmem.h> +#include <linux/mm.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/swap.h> + +#include <asm/dma.h> +#include <asm/efi.h> +#include <asm/ia32.h> +#include <asm/io.h> +#include <asm/pgalloc.h> +#include <asm/sal.h> +#include <asm/system.h> + +/* References to section boundaries: */ +extern char _stext, _etext, _edata, __init_begin, __init_end; + +/* + * These are allocated in head.S so that we get proper page alignment. + * If you change the size of these then change head.S as well. + */ +extern char empty_bad_page[PAGE_SIZE]; +extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD]; +extern pte_t empty_bad_pte_table[PTRS_PER_PTE]; + +extern void ia64_tlb_init (void); + +static unsigned long totalram_pages; + +/* + * Fill in empty_bad_pmd_table with entries pointing to + * empty_bad_pte_table and return the address of this PMD table. + */ +static pmd_t * +get_bad_pmd_table (void) +{ + pmd_t v; + int i; + + pmd_set(&v, empty_bad_pte_table); + + for (i = 0; i < PTRS_PER_PMD; ++i) + empty_bad_pmd_table[i] = v; + + return empty_bad_pmd_table; +} + +/* + * Fill in empty_bad_pte_table with PTEs pointing to empty_bad_page + * and return the address of this PTE table. + */ +static pte_t * +get_bad_pte_table (void) +{ + pte_t v; + int i; + + set_pte(&v, pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED))); + + for (i = 0; i < PTRS_PER_PTE; ++i) + empty_bad_pte_table[i] = v; + + return empty_bad_pte_table; +} + +void +__handle_bad_pgd (pgd_t *pgd) +{ + pgd_ERROR(*pgd); + pgd_set(pgd, get_bad_pmd_table()); +} + +void +__handle_bad_pmd (pmd_t *pmd) +{ + pmd_ERROR(*pmd); + pmd_set(pmd, get_bad_pte_table()); +} + +/* + * Allocate and initialize an L3 directory page and set + * the L2 directory entry PMD to the newly allocated page. + */ +pte_t* +get_pte_slow (pmd_t *pmd, unsigned long offset) +{ + pte_t *pte; + + pte = (pte_t *) __get_free_page(GFP_KERNEL); + if (pmd_none(*pmd)) { + if (pte) { + /* everything A-OK */ + clear_page(pte); + pmd_set(pmd, pte); + return pte + offset; + } + pmd_set(pmd, get_bad_pte_table()); + return NULL; + } + free_page((unsigned long) pte); + if (pmd_bad(*pmd)) { + __handle_bad_pmd(pmd); + return NULL; + } + return (pte_t *) pmd_page(*pmd) + offset; +} + +int +do_check_pgt_cache (int low, int high) +{ + int freed = 0; + + if (pgtable_cache_size > high) { + do { + if (pgd_quicklist) + free_page((unsigned long)get_pgd_fast()), ++freed; + if (pmd_quicklist) + free_page((unsigned long)get_pmd_fast()), ++freed; + if (pte_quicklist) + free_page((unsigned long)get_pte_fast()), ++freed; + } while (pgtable_cache_size > low); + } + return freed; +} + +/* + * This performs some platform-dependent address space initialization. + * On IA-64, we want to setup the VM area for the register backing + * store (which grows upwards) and install the gateway page which is + * used for signal trampolines, etc. + */ +void +ia64_init_addr_space (void) +{ + struct vm_area_struct *vma; + + /* + * If we're out of memory and kmem_cache_alloc() returns NULL, + * we simply ignore the problem. When the process attempts to + * write to the register backing store for the first time, it + * will get a SEGFAULT in this case. + */ + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma) { + vma->vm_mm = current->mm; + vma->vm_start = IA64_RBS_BOT; + vma->vm_end = vma->vm_start + PAGE_SIZE; + vma->vm_page_prot = PAGE_COPY; + vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP; + vma->vm_ops = NULL; + vma->vm_pgoff = 0; + vma->vm_file = NULL; + vma->vm_private_data = NULL; + insert_vm_struct(current->mm, vma); + } +} + +void +free_initmem (void) +{ + unsigned long addr; + + addr = (unsigned long) &__init_begin; + for (; addr < (unsigned long) &__init_end; addr += PAGE_SIZE) { + clear_bit(PG_reserved, &mem_map[MAP_NR(addr)].flags); + set_page_count(&mem_map[MAP_NR(addr)], 1); + free_page(addr); + ++totalram_pages; + } + printk ("Freeing unused kernel memory: %ldkB freed\n", + (&__init_end - &__init_begin) >> 10); +} + +void +si_meminfo (struct sysinfo *val) +{ + val->totalram = totalram_pages; + val->sharedram = 0; + val->freeram = nr_free_pages(); + val->bufferram = atomic_read(&buffermem_pages); + val->totalhigh = 0; + val->freehigh = 0; + val->mem_unit = PAGE_SIZE; + return; +} + +void +show_mem (void) +{ + int i,free = 0,total = 0,reserved = 0; + int shared = 0, cached = 0; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + i = max_mapnr; + while (i-- > 0) { + total++; + if (PageReserved(mem_map+i)) + reserved++; + else if (PageSwapCache(mem_map+i)) + cached++; + else if (!page_count(mem_map + i)) + free++; + else + shared += page_count(mem_map + i) - 1; + } + printk("%d pages of RAM\n", total); + printk("%d reserved pages\n", reserved); + printk("%d pages shared\n", shared); + printk("%d pages swap cached\n", cached); + printk("%ld pages in page table cache\n", pgtable_cache_size); + show_buffers(); +} + +/* + * This is like put_dirty_page() but installs a clean page with PAGE_GATE protection + * (execute-only, typically). + */ +struct page * +put_gate_page (struct page *page, unsigned long address) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + if (!PageReserved(page)) + printk("put_gate_page: gate page at 0x%lx not in reserved memory\n", + page_address(page)); + pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ + pmd = pmd_alloc(pgd, address); + if (!pmd) { + __free_page(page); + oom(current); + return 0; + } + pte = pte_alloc(pmd, address); + if (!pte) { + __free_page(page); + oom(current); + return 0; + } + if (!pte_none(*pte)) { + pte_ERROR(*pte); + __free_page(page); + return 0; + } + flush_page_to_ram(page); + set_pte(pte, page_pte_prot(page, PAGE_GATE)); + /* no need for flush_tlb */ + return page; +} + +void __init +ia64_rid_init (void) +{ + unsigned long flags, rid, pta; + + /* Set up the kernel identity mappings (regions 6 & 7) and the vmalloc area (region 5): */ + ia64_clear_ic(flags); + + rid = ia64_rid(IA64_REGION_ID_KERNEL, __IA64_UNCACHED_OFFSET); + ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (_PAGE_SIZE_256M << 2)); + + rid = ia64_rid(IA64_REGION_ID_KERNEL, PAGE_OFFSET); + ia64_set_rr(PAGE_OFFSET, (rid << 8) | (_PAGE_SIZE_256M << 2)); + + rid = ia64_rid(IA64_REGION_ID_KERNEL, VMALLOC_START); + ia64_set_rr(VMALLOC_START, (rid << 8) | (PAGE_SHIFT << 2) | 1); + + __restore_flags(flags); + + /* + * Check if the virtually mapped linear page table (VMLPT) + * overlaps with a mapped address space. The IA-64 + * architecture guarantees that at least 50 bits of virtual + * address space are implemented but if we pick a large enough + * page size (e.g., 64KB), the VMLPT is big enough that it + * will overlap with the upper half of the kernel mapped + * region. I assume that once we run on machines big enough + * to warrant 64KB pages, IMPL_VA_MSB will be significantly + * bigger, so we can just adjust the number below to get + * things going. Alternatively, we could truncate the upper + * half of each regions address space to not permit mappings + * that would overlap with the VMLPT. --davidm 99/11/13 + */ +# define ld_pte_size 3 +# define ld_max_addr_space_pages 3*(PAGE_SHIFT - ld_pte_size) /* max # of mappable pages */ +# define ld_max_addr_space_size (ld_max_addr_space_pages + PAGE_SHIFT) +# define ld_max_vpt_size (ld_max_addr_space_pages + ld_pte_size) +# define POW2(n) (1ULL << (n)) +# define IMPL_VA_MSB 50 + if (POW2(ld_max_addr_space_size - 1) + POW2(ld_max_vpt_size) > POW2(IMPL_VA_MSB)) + panic("mm/init: overlap between virtually mapped linear page table and " + "mapped kernel space!"); + pta = POW2(61) - POW2(IMPL_VA_MSB); + /* + * Set the (virtually mapped linear) page table address. Bit + * 8 selects between the short and long format, bits 2-7 the + * size of the table, and bit 0 whether the VHPT walker is + * enabled. + */ + ia64_set_pta(pta | (0<<8) | ((3*(PAGE_SHIFT-3)+3)<<2) | 1); +} + +#ifdef CONFIG_IA64_VIRTUAL_MEM_MAP + +static int +create_mem_map_page_table (u64 start, u64 end, void *arg) +{ + unsigned long address, start_page, end_page; + struct page *map_start, *map_end; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + void *page; + + map_start = mem_map + MAP_NR(start); + map_end = mem_map + MAP_NR(end); + + start_page = (unsigned long) map_start & PAGE_MASK; + end_page = PAGE_ALIGN((unsigned long) map_end); + + printk("[%lx,%lx) -> %lx-%lx\n", start, end, start_page, end_page); + + for (address = start_page; address < end_page; address += PAGE_SIZE) { + pgd = pgd_offset_k(address); + if (pgd_none(*pgd)) { + pmd = alloc_bootmem_pages(PAGE_SIZE); + clear_page(pmd); + pgd_set(pgd, pmd); + pmd += (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); + } else + pmd = pmd_offset(pgd, address); + if (pmd_none(*pmd)) { + pte = alloc_bootmem_pages(PAGE_SIZE); + clear_page(pte); + pmd_set(pmd, pte); + pte += (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + } else + pte = pte_offset(pmd, address); + + if (pte_none(*pte)) { + page = alloc_bootmem_pages(PAGE_SIZE); + clear_page(page); + set_pte(pte, mk_pte_phys(__pa(page), PAGE_KERNEL)); + } + } + return 0; +} + +#endif /* CONFIG_IA64_VIRTUAL_MEM_MAP */ + +/* + * Set up the page tables. + */ +void +paging_init (void) +{ + unsigned long max_dma, zones_size[MAX_NR_ZONES]; + + clear_page((void *) ZERO_PAGE_ADDR); + + ia64_rid_init(); + __flush_tlb_all(); + + /* initialize mem_map[] */ + + memset(zones_size, 0, sizeof(zones_size)); + + max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS); + if (max_low_pfn < max_dma) + zones_size[ZONE_DMA] = max_low_pfn; + else { + zones_size[ZONE_DMA] = max_dma; + zones_size[ZONE_NORMAL] = max_low_pfn - max_dma; + } + free_area_init(zones_size); +} + +static int +count_pages (u64 start, u64 end, void *arg) +{ + unsigned long *count = arg; + + *count += (end - start) >> PAGE_SHIFT; + return 0; +} + +static int +count_reserved_pages (u64 start, u64 end, void *arg) +{ + unsigned long num_reserved = 0; + unsigned long *count = arg; + struct page *pg; + + for (pg = mem_map + MAP_NR(start); pg < mem_map + MAP_NR(end); ++pg) + if (PageReserved(pg)) + ++num_reserved; + *count += num_reserved; + return 0; +} + +void +mem_init (void) +{ + extern char __start_gate_section[]; + long reserved_pages, codesize, datasize, initsize; + + if (!mem_map) + BUG(); + + num_physpages = 0; + efi_memmap_walk(count_pages, &num_physpages); + + max_mapnr = max_low_pfn; + high_memory = __va(max_low_pfn * PAGE_SIZE); + + ia64_tlb_init(); + + totalram_pages += free_all_bootmem(); + + reserved_pages = 0; + efi_memmap_walk(count_reserved_pages, &reserved_pages); + + codesize = (unsigned long) &_etext - (unsigned long) &_stext; + datasize = (unsigned long) &_edata - (unsigned long) &_etext; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + printk("Memory: %luk/%luk available (%luk code, %luk reserved, %luk data, %luk init)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10), + max_mapnr << (PAGE_SHIFT - 10), codesize >> 10, reserved_pages << (PAGE_SHIFT - 10), + datasize >> 10, initsize >> 10); + + /* install the gate page in the global page table: */ + put_gate_page(mem_map + MAP_NR(__start_gate_section), GATE_ADDR); + +#ifndef CONFIG_IA64_SOFTSDV_HACKS + /* + * (Some) SoftSDVs seem to have a problem with this call. + * Since it's mostly a performance optimization, just don't do + * it for now... --davidm 99/12/6 + */ + efi_enter_virtual_mode(); +#endif + +#ifdef CONFIG_IA32_SUPPORT + ia32_gdt_init(); +#endif + return; +} diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c new file mode 100644 index 000000000..72ece4147 --- /dev/null +++ b/arch/ia64/mm/tlb.c @@ -0,0 +1,166 @@ +/* + * TLB support routines. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/config.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> + +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/pal.h> + +#define SUPPORTED_PGBITS ( \ + 1 << _PAGE_SIZE_256M | \ + 1 << _PAGE_SIZE_64M | \ + 1 << _PAGE_SIZE_16M | \ + 1 << _PAGE_SIZE_4M | \ + 1 << _PAGE_SIZE_1M | \ + 1 << _PAGE_SIZE_256K | \ + 1 << _PAGE_SIZE_64K | \ + 1 << _PAGE_SIZE_16K | \ + 1 << _PAGE_SIZE_8K | \ + 1 << _PAGE_SIZE_4K ) + +static void wrap_context (struct mm_struct *mm); + +unsigned long ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1; + + /* + * Put everything in a struct so we avoid the global offset table whenever + * possible. + */ +ia64_ptce_info_t ia64_ptce_info; + +/* + * Seralize usage of ptc.g + */ +spinlock_t ptcg_lock = SPIN_LOCK_UNLOCKED; /* see <asm/pgtable.h> */ + +void +get_new_mmu_context (struct mm_struct *mm) +{ + if ((ia64_next_context & IA64_HW_CONTEXT_MASK) == 0) { + wrap_context(mm); + } + mm->context = ia64_next_context++; +} + +/* + * This is where we handle the case where (ia64_next_context & + * IA64_HW_CONTEXT_MASK) == 0. Whenever this happens, we need to + * flush the entire TLB and skip over region id number 0, which is + * used by the kernel. + */ +static void +wrap_context (struct mm_struct *mm) +{ + struct task_struct *task; + + /* + * We wrapped back to the first region id so we nuke the TLB + * so we can switch to the next generation of region ids. + */ + __flush_tlb_all(); + if (ia64_next_context++ == 0) { + /* + * Oops, we've used up all 64 bits of the context + * space---walk through task table to ensure we don't + * get tricked into using an old context. If this + * happens, the machine has been running for a long, + * long time! + */ + ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1; + + read_lock(&tasklist_lock); + for_each_task (task) { + if (task->mm == mm) + continue; + flush_tlb_mm(mm); + } + read_unlock(&tasklist_lock); + } +} + +void +__flush_tlb_all (void) +{ + unsigned long i, j, flags, count0, count1, stride0, stride1, addr = ia64_ptce_info.base; + + count0 = ia64_ptce_info.count[0]; + count1 = ia64_ptce_info.count[1]; + stride0 = ia64_ptce_info.stride[0]; + stride1 = ia64_ptce_info.stride[1]; + + save_and_cli(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + asm volatile ("ptc.e %0" :: "r"(addr)); + addr += stride1; + } + addr += stride0; + } + restore_flags(flags); + ia64_insn_group_barrier(); + ia64_srlz_i(); /* srlz.i implies srlz.d */ + ia64_insn_group_barrier(); +} + +void +flush_tlb_range (struct mm_struct *mm, unsigned long start, unsigned long end) +{ + unsigned long size = end - start; + unsigned long nbits; + + if (mm != current->active_mm) { + /* this doesn't happen often, if at all, so it's not worth optimizing for... */ + mm->context = 0; + return; + } + + nbits = ia64_fls(size + 0xfff); + if (((1UL << nbits) & SUPPORTED_PGBITS) == 0) { + if (nbits > _PAGE_SIZE_256M) + nbits = _PAGE_SIZE_256M; + else + /* + * Some page sizes are not implemented in the + * IA-64 arch, so if we get asked to clear an + * unsupported page size, round up to the + * nearest page size. Note that we depend on + * the fact that if page size N is not + * implemented, 2*N _is_ implemented. + */ + ++nbits; + if (((1UL << nbits) & SUPPORTED_PGBITS) == 0) + panic("flush_tlb_range: BUG: nbits=%lu\n", nbits); + } + start &= ~((1UL << nbits) - 1); + + spin_lock(&ptcg_lock); + do { +#ifdef CONFIG_SMP + __asm__ __volatile__ ("ptc.g %0,%1;;srlz.i;;" + :: "r"(start), "r"(nbits<<2) : "memory"); +#else + __asm__ __volatile__ ("ptc.l %0,%1" :: "r"(start), "r"(nbits<<2) : "memory"); +#endif + start += (1UL << nbits); + } while (start < end); + spin_unlock(&ptcg_lock); + ia64_insn_group_barrier(); + ia64_srlz_i(); /* srlz.i implies srlz.d */ + ia64_insn_group_barrier(); +} + +void +ia64_tlb_init (void) +{ + ia64_get_ptce(&ia64_ptce_info); + __flush_tlb_all(); /* nuke left overs from bootstrapping... */ +} diff --git a/arch/ia64/sn/Makefile b/arch/ia64/sn/Makefile new file mode 100644 index 000000000..3c8810967 --- /dev/null +++ b/arch/ia64/sn/Makefile @@ -0,0 +1,25 @@ +# +# ia64/sn/Makefile +# +# Copyright (C) 1999 Silicon Graphics, Inc. +# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) +# + +CFLAGS := $(CFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSN -DSOFTSDV \ + -DLANGUAGE_C=1 -D_LANGUAGE_C=1 +AFLAGS := $(AFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSOFTSDV + +.S.s: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $< +.S.o: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $< + +all: sn.a + +O_TARGET = sn.a +O_HEADERS = +O_OBJS = sn1/sn1.a + +clean:: + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/sn/sn1/Makefile b/arch/ia64/sn/sn1/Makefile new file mode 100644 index 000000000..23758c473 --- /dev/null +++ b/arch/ia64/sn/sn1/Makefile @@ -0,0 +1,29 @@ +# +# ia64/platform/sn/sn1/Makefile +# +# Copyright (C) 1999 Silicon Graphics, Inc. +# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com) +# + +CFLAGS := $(CFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSN -DSOFTSDV \ + -DLANGUAGE_C=1 -D_LANGUAGE_C=1 +AFLAGS := $(AFLAGS) -DCONFIG_SGI_SN1 -DSN1 -DSOFTSDV + +.S.s: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $< +.S.o: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $< + +all: sn1.a + +O_TARGET = sn1.a +O_HEADERS = +O_OBJS = irq.o setup.o + +ifeq ($(CONFIG_IA64_GENERIC),y) +O_OBJS += machvec.o +endif + +clean:: + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/sn/sn1/irq.c b/arch/ia64/sn/sn1/irq.c new file mode 100644 index 000000000..df8e56943 --- /dev/null +++ b/arch/ia64/sn/sn1/irq.c @@ -0,0 +1,50 @@ +#include <linux/kernel.h> + +#include <asm/irq.h> +#include <asm/ptrace.h> + +static int +sn1_startup_irq(unsigned int irq) +{ + return(0); +} + +static void +sn1_shutdown_irq(unsigned int irq) +{ +} + +static void +sn1_disable_irq(unsigned int irq) +{ +} + +static void +sn1_enable_irq(unsigned int irq) +{ +} + +static int +sn1_handle_irq(unsigned int irq, struct pt_regs *regs) +{ + return(0); +} + +struct hw_interrupt_type irq_type_sn1 = { + "sn1_irq", + sn1_startup_irq, + sn1_shutdown_irq, + sn1_handle_irq, + sn1_enable_irq, + sn1_disable_irq +}; + +void +sn1_irq_init (struct irq_desc desc[NR_IRQS]) +{ + int i; + + for (i = IA64_MIN_VECTORED_IRQ; i <= IA64_MAX_VECTORED_IRQ; ++i) { + irq_desc[i].handler = &irq_type_sn1; + } +} diff --git a/arch/ia64/sn/sn1/machvec.c b/arch/ia64/sn/sn1/machvec.c new file mode 100644 index 000000000..2e36b2e08 --- /dev/null +++ b/arch/ia64/sn/sn1/machvec.c @@ -0,0 +1,4 @@ +#include <asm/machvec_init.h> +#include <asm/machvec_sn1.h> + +MACHVEC_DEFINE(sn1) diff --git a/arch/ia64/sn/sn1/setup.c b/arch/ia64/sn/sn1/setup.c new file mode 100644 index 000000000..1e3a39ae3 --- /dev/null +++ b/arch/ia64/sn/sn1/setup.c @@ -0,0 +1,77 @@ +/* + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander(vijay@engr.sgi.com) + */ +#include <linux/config.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/kdev_t.h> +#include <linux/string.h> +#include <linux/tty.h> +#include <linux/console.h> +#include <linux/timex.h> +#include <linux/sched.h> + +#include <asm/io.h> +#include <asm/machvec.h> +#include <asm/system.h> +#include <asm/processor.h> + + +/* + * The format of "screen_info" is strange, and due to early i386-setup + * code. This is just enough to make the console code think we're on a + * VGA color display. + */ +struct screen_info sn1_screen_info = { + orig_x: 0, + orig_y: 0, + orig_video_mode: 3, + orig_video_cols: 80, + orig_video_ega_bx: 3, + orig_video_lines: 25, + orig_video_isVGA: 1, + orig_video_points: 16 +}; + +/* + * This is here so we can use the CMOS detection in ide-probe.c to + * determine what drives are present. In theory, we don't need this + * as the auto-detection could be done via ide-probe.c:do_probe() but + * in practice that would be much slower, which is painful when + * running in the simulator. Note that passing zeroes in DRIVE_INFO + * is sufficient (the IDE driver will autodetect the drive geometry). + */ +char drive_info[4*16]; + +unsigned long +sn1_map_nr (unsigned long addr) +{ + return MAP_NR_SN1(addr); +} + +void +sn1_setup(char **cmdline_p) +{ + + ROOT_DEV = to_kdev_t(0x0301); /* default to first IDE drive */ + +#if !defined (CONFIG_IA64_SOFTSDV_HACKS) + /* + * Program the timer to deliver timer ticks. 0x40 is the I/O port + * address of PIT counter 0, 0x43 is the I/O port address of the + * PIT control word. + */ + request_region(0x40,0x20,"timer"); + outb(0x34, 0x43); /* Control word */ + outb(LATCH & 0xff , 0x40); /* LSB */ + outb(LATCH >> 8, 0x40); /* MSB */ + printk("PIT: LATCH at 0x%x%x for %d HZ\n", LATCH >> 8, LATCH & 0xff, HZ); +#endif +#ifdef __SMP__ + init_smp_config(); +#endif + screen_info = sn1_screen_info; +} diff --git a/arch/ia64/tools/Makefile b/arch/ia64/tools/Makefile new file mode 100644 index 000000000..0491ca943 --- /dev/null +++ b/arch/ia64/tools/Makefile @@ -0,0 +1,49 @@ +CFLAGS = -D__KERNEL__ -g -O2 -Wall -I$(TOPDIR)/include + +ifdef CONFIG_SMP + CFLAGS += -D__SMP__ +endif + +TARGET = $(TOPDIR)/include/asm-ia64/offsets.h + +all: + +clean: + rm -f print_offsets.s print_offsets offsets.h + +fastdep: offsets.h + @if ! cmp -s offsets.h ${TARGET}; then \ + echo "Updating ${TARGET}..."; \ + cp offsets.h ${TARGET}; \ + else \ + echo "${TARGET} is up to date"; \ + fi + +# +# If we're cross-compiling, we use the cross-compiler to translate +# print_offsets.c into an assembly file and then awk to translate this +# file into offsets.h. This avoids having to use a simulator to +# generate this file. This is based on an idea suggested by Asit +# Mallick. If we're running natively, we can of course just build +# print_offsets and run it. --davidm +# + +ifeq ($(CROSS_COMPILE),) + +offsets.h: print_offsets + ./print_offsets > offsets.h + +print_offsets: print_offsets.c + $(CC) $(CFLAGS) print_offsets.c -o $@ + +else + +offsets.h: print_offsets.s + $(AWK) -f print_offsets.awk $^ > $@ + +print_offsets.s: print_offsets.c + $(CC) $(CFLAGS) -S print_offsets.c -o $@ + +endif + +.PHONY: all diff --git a/arch/ia64/tools/print_offsets.awk b/arch/ia64/tools/print_offsets.awk new file mode 100644 index 000000000..5eb8bcb63 --- /dev/null +++ b/arch/ia64/tools/print_offsets.awk @@ -0,0 +1,70 @@ +BEGIN { + print "#ifndef _ASM_IA64_OFFSETS_H" + print "#define _ASM_IA64_OFFSETS_H" + print "/*" + print " * DO NOT MODIFY" + print " *" + print " * This file was generated by arch/ia64/tools/print_offsets.awk." + print " *" + print " */" + # + # This is a cheesy hack. Make sure that + # PF_PTRACED == 1<<PF_PTRACED_BIT. + # + print "#define PF_PTRACED_BIT 4" +} + +# look for .tab: +# stringz "name" +# data value +# sequence + +/.*[.]size/ { + inside_table = 0 +} + +/\/\/ end/ { + inside_table = 0 +} + +{ + if (inside_table) { + if ($1 == "//") getline; + name=$2 + getline + getline + if ($1 == "//") getline; + value=$2 + len = length(name) + name = substr(name, 2, len - 2) + len -= 2 + if (len == 0) + print "" + else { + len += 8 + if (len >= 40) { + space=" " + } else { + space="" + while (len < 40) { + len += 8 + space = space"\t" + } + } + printf("#define %s%s%lu\t/* 0x%lx */\n", name, space, value, value) + } + } +} + +/tab:/ { + inside_table = 1 +} + +/tab#:/ { + inside_table = 1 +} + +END { + print "" + print "#endif /* _ASM_IA64_OFFSETS_H */" +} diff --git a/arch/ia64/tools/print_offsets.c b/arch/ia64/tools/print_offsets.c new file mode 100644 index 000000000..85b15aae1 --- /dev/null +++ b/arch/ia64/tools/print_offsets.c @@ -0,0 +1,109 @@ +/* + * Utility to generate asm-ia64/offsets.h. + * + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Note that this file has dual use: when building the kernel + * natively, the file is translated into a binary and executed. When + * building the kernel in a cross-development environment, this file + * gets translated into an assembly file which, in turn, is processed + * by awk to generate offsets.h. So if you make any changes to this + * file, be sure to verify that the awk procedure still works (see + * prin_offsets.awk). + */ +#include <linux/sched.h> + +#include <asm-ia64/processor.h> +#include <asm-ia64/ptrace.h> +#include <asm-ia64/siginfo.h> +#include <asm-ia64/sigcontext.h> + +#ifdef offsetof +# undef offsetof +#endif + +/* + * We _can't_ include the host's standard header file, as those are in + * potential conflict with the what the Linux kernel declares for the + * target system. + */ +extern int printf (const char *, ...); + +#define offsetof(type,field) ((char *) &((type *) 0)->field - (char *) 0) + +struct + { + const char name[256]; + unsigned long value; + } +tab[] = + { + { "IA64_TASK_SIZE", sizeof (struct task_struct) }, + { "IA64_PT_REGS_SIZE", sizeof (struct pt_regs) }, + { "IA64_SWITCH_STACK_SIZE", sizeof (struct switch_stack) }, + { "IA64_SIGINFO_SIZE", sizeof (struct siginfo) }, + { "", 0 }, /* spacer */ + { "IA64_TASK_FLAGS_OFFSET", offsetof (struct task_struct, flags) }, + { "IA64_TASK_SIGPENDING_OFFSET", offsetof (struct task_struct, sigpending) }, + { "IA64_TASK_NEED_RESCHED_OFFSET", offsetof (struct task_struct, need_resched) }, + { "IA64_TASK_THREAD_OFFSET", offsetof (struct task_struct, thread) }, + { "IA64_TASK_THREAD_KSP_OFFSET", offsetof (struct task_struct, thread.ksp) }, + { "IA64_TASK_PID_OFFSET", offsetof (struct task_struct, pid) }, + { "IA64_TASK_MM_OFFSET", offsetof (struct task_struct, mm) }, + { "IA64_PT_REGS_CR_IPSR_OFFSET", offsetof (struct pt_regs, cr_ipsr) }, + { "IA64_PT_REGS_R12_OFFSET", offsetof (struct pt_regs, r12) }, + { "IA64_PT_REGS_R8_OFFSET", offsetof (struct pt_regs, r8) }, + { "IA64_PT_REGS_R16_OFFSET", offsetof (struct pt_regs, r16) }, + { "IA64_SWITCH_STACK_B0_OFFSET", offsetof (struct switch_stack, b0) }, + { "IA64_SWITCH_STACK_CALLER_UNAT_OFFSET", offsetof (struct switch_stack, caller_unat) }, + { "IA64_SIGCONTEXT_AR_BSP_OFFSET", offsetof (struct sigcontext, sc_ar_bsp) }, + { "IA64_SIGCONTEXT_AR_RNAT_OFFSET", offsetof (struct sigcontext, sc_ar_rnat) }, + { "IA64_SIGCONTEXT_FLAGS_OFFSET", offsetof (struct sigcontext, sc_flags) }, + { "IA64_SIGCONTEXT_CFM_OFFSET", offsetof (struct sigcontext, sc_cfm) }, + { "IA64_SIGCONTEXT_FR6_OFFSET", offsetof (struct sigcontext, sc_fr[6]) }, +}; + +static const char *tabs = "\t\t\t\t\t\t\t\t\t\t"; + +int +main (int argc, char **argv) +{ + const char *space; + int i, num_tabs; + size_t len; + + printf ("#ifndef _ASM_IA64_OFFSETS_H\n"); + printf ("#define _ASM_IA64_OFFSETS_H\n\n"); + + printf ("/*\n * DO NOT MODIFY\n *\n * This file was generated by " + "arch/ia64/tools/print_offsets.\n *\n */\n\n"); + + /* This is stretching things a bit, but entry.S needs the bit number + for PF_PTRACED and it can't include <linux/sched.h> so this seems + like a reasonably solution. At least the code won't break shoudl + PF_PTRACED ever change. */ + printf ("#define PF_PTRACED_BIT\t\t\t%u\n\n", ffs (PF_PTRACED) - 1); + + for (i = 0; i < sizeof (tab) / sizeof (tab[0]); ++i) + { + if (tab[i].name[0] == '\0') + printf ("\n"); + else + { + len = strlen (tab[i].name); + + num_tabs = (40 - len) / 8; + if (num_tabs <= 0) + space = " "; + else + space = strchr(tabs, '\0') - (40 - len) / 8; + + printf ("#define %s%s%lu\t/* 0x%lx */\n", + tab[i].name, space, tab[i].value, tab[i].value); + } + } + + printf ("\n#endif /* _ASM_IA64_OFFSETS_H */\n"); + return 0; +} diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S new file mode 100644 index 000000000..b095baeb9 --- /dev/null +++ b/arch/ia64/vmlinux.lds.S @@ -0,0 +1,164 @@ +#include <linux/config.h> + +#include <asm/page.h> +#include <asm/system.h> + +OUTPUT_FORMAT("elf64-ia64-little") +OUTPUT_ARCH(ia64) +ENTRY(_start) +SECTIONS +{ + v = PAGE_OFFSET; /* this symbol is here to make debugging with kdb easier... */ + + . = KERNEL_START; + + _text = .; + _stext = .; + .text : AT(ADDR(.text) - PAGE_OFFSET) + { + *(__ivt_section) + /* these are not really text pages, but the zero page needs to be in a fixed location: */ + *(__special_page_section) + __start_gate_section = .; + *(__gate_section) + __stop_gate_section = .; + *(.text) + } + .text2 : AT(ADDR(.text2) - PAGE_OFFSET) + { *(.text2) } +#ifdef CONFIG_SMP + .text.lock : AT(ADDR(.text.lock) - PAGE_OFFSET) + { *(.text.lock) } +#endif + _etext = .; + + /* Exception table */ + . = ALIGN(16); + __start___ex_table = .; + __ex_table : AT(ADDR(__ex_table) - PAGE_OFFSET) + { *(__ex_table) } + __stop___ex_table = .; + +#if defined(CONFIG_KDB) + /* Kernel symbols and strings for kdb */ +# define KDB_MEAN_SYMBOL_SIZE 48 +# define KDB_SPACE (CONFIG_KDB_STBSIZE * KDB_MEAN_SYMBOL_SIZE) + . = ALIGN(8); + _skdb = .; + .kdb : AT(ADDR(.kdb) - PAGE_OFFSET) + { + *(kdbsymtab) + *(kdbstrings) + } + _ekdb = .; + . = _skdb + KDB_SPACE; +#endif + + /* Kernel symbol names for modules: */ + .kstrtab : AT(ADDR(.kstrtab) - PAGE_OFFSET) + { *(.kstrtab) } + + /* The initial task and kernel stack */ + . = ALIGN(PAGE_SIZE); + init_task : AT(ADDR(init_task) - PAGE_OFFSET) + { *(init_task) } + + /* Startup code */ + __init_begin = .; + .text.init : AT(ADDR(.text.init) - PAGE_OFFSET) + { *(.text.init) } + .data.init : AT(ADDR(.data.init) - PAGE_OFFSET) + { *(.data.init) } + . = ALIGN(16); + __setup_start = .; + .setup.init : AT(ADDR(.setup.init) - PAGE_OFFSET) + { *(.setup.init) } + __setup_end = .; + __initcall_start = .; + .initcall.init : AT(ADDR(.initcall.init) - PAGE_OFFSET) + { *(.initcall.init) } + __initcall_end = .; + . = ALIGN(PAGE_SIZE); + __init_end = .; + + .data.page_aligned : AT(ADDR(.data.page_aligned) - PAGE_OFFSET) + { *(.data.idt) } + + . = ALIGN(64); + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - PAGE_OFFSET) + { *(.data.cacheline_aligned) } + + /* Global data */ + _data = .; + + .rodata : AT(ADDR(.rodata) - PAGE_OFFSET) + { *(.rodata) } + .opd : AT(ADDR(.opd) - PAGE_OFFSET) + { *(.opd) } + .data : AT(ADDR(.data) - PAGE_OFFSET) + { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS } + + __gp = ALIGN (8) + 0x200000; + + .got : AT(ADDR(.got) - PAGE_OFFSET) + { *(.got.plt) *(.got) } + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : AT(ADDR(.sdata) - PAGE_OFFSET) + { *(.sdata) } + _edata = .; + _bss = .; + .sbss : AT(ADDR(.sbss) - PAGE_OFFSET) + { *(.sbss) *(.scommon) } + .bss : AT(ADDR(.bss) - PAGE_OFFSET) + { *(.bss) *(COMMON) } + . = ALIGN(64 / 8); + _end = .; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.text.exit) + *(.data.exit) + } + + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* These must appear regardless of . */ + /* Discard them for now since Intel SoftSDV cannot handle them. + .comment 0 : { *(.comment) } + .note 0 : { *(.note) } + */ + /DISCARD/ : { *(.comment) } + /DISCARD/ : { *(.note) } +} |