diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2000-02-23 00:40:54 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2000-02-23 00:40:54 +0000 |
commit | 529c593ece216e4aaffd36bd940cb94f1fa63129 (patch) | |
tree | 78f1c0b805f5656aa7b0417a043c5346f700a2cf /arch/ia64/kernel | |
parent | 0bd079751d25808d1972baee5c4eaa1db2227257 (diff) |
Merge with 2.3.43. I did ignore all modifications to the qlogicisp.c
driver due to the Origin A64 hacks.
Diffstat (limited to 'arch/ia64/kernel')
35 files changed, 13909 insertions, 0 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile new file mode 100644 index 000000000..7cb47da72 --- /dev/null +++ b/arch/ia64/kernel/Makefile @@ -0,0 +1,42 @@ +# +# Makefile for the linux kernel. +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +.S.s: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -E -o $*.s $< +.S.o: + $(CC) -D__ASSEMBLY__ $(AFLAGS) -c -o $*.o $< + +all: kernel.o head.o init_task.o + +O_TARGET := kernel.o +O_OBJS := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_default.o irq_internal.o ivt.o \ + pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o sal_stub.o semaphore.o setup.o signal.o \ + sys_ia64.o traps.o time.o unaligned.o unwind.o +#O_OBJS := fpreg.o +#OX_OBJS := ia64_ksyms.o + +ifeq ($(CONFIG_IA64_GENERIC),y) +O_OBJS += machvec.o +endif + +ifdef CONFIG_PCI +O_OBJS += pci.o +endif + +ifdef CONFIG_SMP +O_OBJS += smp.o irq_lock.o +endif + +ifeq ($(CONFIG_MCA),y) +O_OBJS += mca.o mca_asm.o +endif + +clean:: + +include $(TOPDIR)/Rules.make diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c new file mode 100644 index 000000000..e289efab6 --- /dev/null +++ b/arch/ia64/kernel/acpi.c @@ -0,0 +1,308 @@ +/* + * Advanced Configuration and Power Interface + * + * Based on 'ACPI Specification 1.0b' February 2, 1999 and + * 'IA-64 Extensions to ACPI Specification' Revision 0.6 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> + */ + +#include <linux/config.h> + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <linux/types.h> + +#include <asm/acpi-ext.h> +#include <asm/page.h> +#include <asm/efi.h> +#include <asm/io.h> +#include <asm/iosapic.h> +#include <asm/irq.h> + +#undef ACPI_DEBUG /* Guess what this does? */ + +#ifdef CONFIG_SMP +extern unsigned long ipi_base_addr; +#endif + +/* These are ugly but will be reclaimed by the kernel */ +int __initdata acpi_cpus = 0; +int __initdata acpi_apic_map[32]; +int __initdata cpu_cnt = 0; + +void (*pm_idle) (void); + +/* + * Identify usable CPU's and remember them for SMP bringup later. + */ +static void __init +acpi_lsapic(char *p) +{ + int add = 1; + + acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p; + + if ((lsapic->flags & LSAPIC_PRESENT) == 0) + return; + + printk(" CPU %d (%.04x:%.04x): ", cpu_cnt, lsapic->eid, lsapic->id); + + if ((lsapic->flags & LSAPIC_ENABLED) == 0) { + printk("Disabled.\n"); + add = 0; + } else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) { + printk("Performance Restricted; ignoring.\n"); + add = 0; + } + + if (add) { + printk("Available.\n"); + acpi_cpus++; + acpi_apic_map[cpu_cnt] = (lsapic->id << 8) | lsapic->eid; + } + + cpu_cnt++; +} + +/* + * Find all IOSAPICs and tag the iosapic_vector structure with the appropriate + * base addresses. + */ +static void __init +acpi_iosapic(char *p) +{ + /* + * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet + * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be + * a means for platform_setup() to register ACPI handlers? + */ +#ifdef CONFIG_IA64_DIG + acpi_entry_iosapic_t *iosapic = (acpi_entry_iosapic_t *) p; + unsigned int ver; + int l, v, pins; + + ver = iosapic_version(iosapic->address); + pins = (ver >> 16) & 0xff; + + printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", + (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, + iosapic->irq_base, iosapic->irq_base + pins); + + for (l = 0; l < pins; l++) { + v = map_legacy_irq(iosapic->irq_base + l); + if (v > IA64_MAX_VECTORED_IRQ) { + printk(" !!! IRQ %d > 255\n", v); + continue; + } + /* XXX Check for IOSAPIC collisions */ + iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0); + iosapic_baseirq(v) = iosapic->irq_base; + } + iosapic_init(iosapic->address); +#endif +} + + +/* + * Configure legacy IRQ information in iosapic_vector + */ +static void __init +acpi_legacy_irq(char *p) +{ + /* + * This is not good. ACPI is not necessarily limited to CONFIG_IA64_SV, yet + * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be + * a means for platform_setup() to register ACPI handlers? + */ +#ifdef CONFIG_IA64_IRQ_ACPI + acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p; + unsigned char vector; + int i; + + vector = map_legacy_irq(legacy->isa_irq); + + /* + * Clobber any old pin mapping. It may be that it gets replaced later on + */ + for (i = 0; i < IA64_MAX_VECTORED_IRQ; i++) { + if (i == vector) + continue; + if (iosapic_pin(i) == iosapic_pin(vector)) + iosapic_pin(i) = 0xff; + } + + iosapic_pin(vector) = legacy->pin; + iosapic_bus(vector) = BUS_ISA; /* This table only overrides the ISA devices */ + iosapic_busdata(vector) = 0; + + /* + * External timer tick is special... + */ + if (vector != TIMER_IRQ) + iosapic_dmode(vector) = IO_SAPIC_LOWEST_PRIORITY; + else + iosapic_dmode(vector) = IO_SAPIC_FIXED; + + /* See MPS 1.4 section 4.3.4 */ + switch (legacy->flags) { + case 0x5: + iosapic_polarity(vector) = IO_SAPIC_POL_HIGH; + iosapic_trigger(vector) = IO_SAPIC_EDGE; + break; + case 0x8: + iosapic_polarity(vector) = IO_SAPIC_POL_LOW; + iosapic_trigger(vector) = IO_SAPIC_EDGE; + break; + case 0xd: + iosapic_polarity(vector) = IO_SAPIC_POL_HIGH; + iosapic_trigger(vector) = IO_SAPIC_LEVEL; + break; + case 0xf: + iosapic_polarity(vector) = IO_SAPIC_POL_LOW; + iosapic_trigger(vector) = IO_SAPIC_LEVEL; + break; + default: + printk(" ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq, + legacy->flags); + break; + } + +#ifdef ACPI_DEBUG + printk("Legacy ISA IRQ %x -> IA64 Vector %x IOSAPIC Pin %x Active %s %s Trigger\n", + legacy->isa_irq, vector, iosapic_pin(vector), + ((iosapic_polarity(vector) == IO_SAPIC_POL_LOW) ? "Low" : "High"), + ((iosapic_trigger(vector) == IO_SAPIC_LEVEL) ? "Level" : "Edge")); +#endif /* ACPI_DEBUG */ + +#endif /* CONFIG_IA64_IRQ_ACPI */ +} + +/* + * Info on platform interrupt sources: NMI. PMI, INIT, etc. + */ +static void __init +acpi_platform(char *p) +{ + acpi_entry_platform_src_t *plat = (acpi_entry_platform_src_t *) p; + + printk("PLATFORM: IOSAPIC %x -> Vector %lx on CPU %.04u:%.04u\n", + plat->iosapic_vector, plat->global_vector, plat->eid, plat->id); +} + +/* + * Parse the ACPI Multiple SAPIC Table + */ +static void __init +acpi_parse_msapic(acpi_sapic_t *msapic) +{ + char *p, *end; + + memset(&acpi_apic_map, -1, sizeof(acpi_apic_map)); + +#ifdef CONFIG_SMP + /* Base address of IPI Message Block */ + ipi_base_addr = ioremap(msapic->interrupt_block, 0); +#endif + + p = (char *) (msapic + 1); + end = p + (msapic->header.length - sizeof(acpi_sapic_t)); + + while (p < end) { + + switch (*p) { + case ACPI_ENTRY_LOCAL_SAPIC: + acpi_lsapic(p); + break; + + case ACPI_ENTRY_IO_SAPIC: + acpi_iosapic(p); + break; + + case ACPI_ENTRY_INT_SRC_OVERRIDE: + acpi_legacy_irq(p); + break; + + case ACPI_ENTRY_PLATFORM_INT_SOURCE: + acpi_platform(p); + break; + + default: + break; + } + + /* Move to next table entry. */ + p += *(p + 1); + } + + /* Make bootup pretty */ + printk(" %d CPUs available, %d CPUs total\n", acpi_cpus, cpu_cnt); +} + +int __init +acpi_parse(acpi_rsdp_t *rsdp) +{ + acpi_rsdt_t *rsdt; + acpi_desc_table_hdr_t *hdrp; + long tables, i; + + if (!rsdp) { + printk("Uh-oh, no ACPI Root System Description Pointer table!\n"); + return 0; + } + + if (strncmp(rsdp->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) { + printk("Uh-oh, ACPI RSDP signature incorrect!\n"); + return 0; + } + + rsdp->rsdt = __va(rsdp->rsdt); + rsdt = rsdp->rsdt; + if (strncmp(rsdt->header.signature, ACPI_RSDT_SIG, ACPI_RSDT_SIG_LEN)) { + printk("Uh-oh, ACPI RDST signature incorrect!\n"); + return 0; + } + + printk("ACPI: %.6s %.8s %d.%d\n", rsdt->header.oem_id, rsdt->header.oem_table_id, + rsdt->header.oem_revision >> 16, rsdt->header.oem_revision & 0xffff); + + tables = (rsdt->header.length - sizeof(acpi_desc_table_hdr_t)) / 8; + for (i = 0; i < tables; i++) { + hdrp = (acpi_desc_table_hdr_t *) __va(rsdt->entry_ptrs[i]); + + /* Only interested int the MSAPIC table for now ... */ + if (strncmp(hdrp->signature, ACPI_SAPIC_SIG, ACPI_SAPIC_SIG_LEN) != 0) + continue; + + acpi_parse_msapic((acpi_sapic_t *) hdrp); + } /* while() */ + + if (acpi_cpus == 0) { + printk("ACPI: Found 0 CPUS; assuming 1\n"); + acpi_cpus = 1; /* We've got at least one of these, no? */ + } + return 1; +} + +const char * +acpi_get_sysname (void) +{ + /* the following should go away once we have an ACPI parser: */ +#ifdef CONFIG_IA64_GENERIC + return "hpsim"; +#else +# if defined (CONFIG_IA64_HP_SIM) + return "hpsim"; +# elif defined (CONFIG_IA64_SGI_SN1_SIM) + return "sn1"; +# elif defined (CONFIG_IA64_DIG) + return "dig"; +# else +# error Unknown platform. Fix acpi.c. +# endif +#endif +} diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c new file mode 100644 index 000000000..dd7de2ab0 --- /dev/null +++ b/arch/ia64/kernel/efi.c @@ -0,0 +1,365 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Hewlett-Packard Co. + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/time.h> + +#include <asm/efi.h> +#include <asm/io.h> +#include <asm/processor.h> + +#define EFI_DEBUG + +extern efi_status_t efi_call_phys (void *, ...); + +struct efi efi; + +static efi_runtime_services_t *runtime; + +static efi_status_t +phys_get_time (efi_time_t *tm, efi_time_cap_t *tc) +{ + return efi_call_phys(__va(runtime->get_time), __pa(tm), __pa(tc)); +} + +static efi_status_t +phys_set_time (efi_time_t *tm) +{ + return efi_call_phys(__va(runtime->set_time), __pa(tm)); +} + +static efi_status_t +phys_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) +{ + return efi_call_phys(__va(runtime->get_wakeup_time), __pa(enabled), __pa(pending), + __pa(tm)); +} + +static efi_status_t +phys_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) +{ + return efi_call_phys(__va(runtime->set_wakeup_time), enabled, __pa(tm)); +} + +static efi_status_t +phys_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, + unsigned long *data_size, void *data) +{ + return efi_call_phys(__va(runtime->get_variable), __pa(name), __pa(vendor), __pa(attr), + __pa(data_size), __pa(data)); +} + +static efi_status_t +phys_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) +{ + return efi_call_phys(__va(runtime->get_next_variable), __pa(name_size), __pa(name), + __pa(vendor)); +} + +static efi_status_t +phys_set_variable (efi_char16_t *name, efi_guid_t *vendor, u32 attr, + unsigned long data_size, void *data) +{ + return efi_call_phys(__va(runtime->set_variable), __pa(name), __pa(vendor), attr, + data_size, __pa(data)); +} + +static efi_status_t +phys_get_next_high_mono_count (u64 *count) +{ + return efi_call_phys(__va(runtime->get_next_high_mono_count), __pa(count)); +} + +static void +phys_reset_system (int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) +{ + efi_call_phys(__va(runtime->reset_system), status, data_size, __pa(data)); +} + +/* + * Converts Gregorian date to seconds since 1970-01-01 00:00:00. + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 + * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. + * + * [For the Julian calendar (which was used in Russia before 1917, + * Britain & colonies before 1752, anywhere else before 1582, + * and is still in use by some communities) leave out the + * -year/100+year/400 terms, and add 10.] + * + * This algorithm was first published by Gauss (I think). + * + * WARNING: this function will overflow on 2106-02-07 06:28:16 on + * machines were long is 32-bit! (However, as time_t is signed, we + * will already get problems at other places on 2038-01-19 03:14:08) + */ +static inline unsigned long +mktime (unsigned int year, unsigned int mon, unsigned int day, unsigned int hour, + unsigned int min, unsigned int sec) +{ + if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ + mon += 12; /* Puts Feb last since it has leap day */ + year -= 1; + } + return ((((unsigned long)(year/4 - year/100 + year/400 + 367*mon/12 + day) + + year*365 - 719499 + )*24 + hour /* now have hours */ + )*60 + min /* now have minutes */ + )*60 + sec; /* finally seconds */ +} + +void +efi_gettimeofday (struct timeval *tv) +{ + efi_time_t tm; + + memset(tv, 0, sizeof(tv)); + if ((*efi.get_time)(&tm, 0) != EFI_SUCCESS) + return; + + tv->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second); + tv->tv_usec = tm.nanosecond / 1000; +} + +/* + * Walks the EFI memory map and calls CALLBACK once for each EFI + * memory descriptor that has memory that is available for OS use. + */ +void +efi_memmap_walk (efi_freemem_callback_t callback, void *arg) +{ + int prev_valid = 0; + struct range { + u64 start; + u64 end; + } prev, curr; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size, start, end; + + efi_map_start = __va(ia64_boot_param.efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size; + efi_desc_size = ia64_boot_param.efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: +#ifndef CONFIG_IA64_VIRTUAL_MEM_MAP + if (md->phys_addr > 1024*1024*1024UL) { + printk("Warning: ignoring %luMB of memory above 1GB!\n", + md->num_pages >> 8); + md->type = EFI_UNUSABLE_MEMORY; + continue; + } +#endif + + curr.start = PAGE_OFFSET + md->phys_addr; + curr.end = curr.start + (md->num_pages << 12); + + if (!prev_valid) { + prev = curr; + prev_valid = 1; + } else { + if (curr.start < prev.start) + printk("Oops: EFI memory table not ordered!\n"); + + if (prev.end == curr.start) { + /* merge two consecutive memory ranges */ + prev.end = curr.end; + } else { + start = PAGE_ALIGN(prev.start); + end = prev.end & PAGE_MASK; + if ((end > start) && (*callback)(start, end, arg) < 0) + return; + prev = curr; + } + } + break; + + default: + continue; + } + } + if (prev_valid) { + start = PAGE_ALIGN(prev.start); + end = prev.end & PAGE_MASK; + if (end > start) + (*callback)(start, end, arg); + } +} + +void __init +efi_init (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_config_table_t *config_tables; + efi_memory_desc_t *md; + efi_char16_t *c16; + u64 efi_desc_size; + char vendor[100] = "unknown"; + int i; + + efi.systab = __va(ia64_boot_param.efi_systab); + + /* + * Verify the EFI Table + */ + if (efi.systab == NULL) + panic("Woah! Can't find EFI system table.\n"); + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + panic("Woah! EFI system table signature incorrect\n"); + if (efi.systab->hdr.revision != EFI_SYSTEM_TABLE_REVISION) + printk("Warning: EFI system table version mismatch: " + "got %d.%02d, expected %d.%02d\n", + efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, + EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff); + + config_tables = __va(efi.systab->tables); + + /* Show what we know for posterity */ + c16 = __va(efi.systab->fw_vendor); + if (c16) { + for (i = 0;i < sizeof(vendor) && *c16; ++i) + vendor[i] = *c16++; + vendor[i] = '\0'; + } + + printk("EFI v%u.%.02u by %s:", + efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); + + for (i = 0; i < efi.systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { + efi.mps = __va(config_tables[i].table); + printk(" MPS=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { + efi.acpi = __va(config_tables[i].table); + printk(" ACPI=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { + efi.smbios = __va(config_tables[i].table); + printk(" SMBIOS=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) { + efi.sal_systab = __va(config_tables[i].table); + printk(" SALsystab=0x%lx", config_tables[i].table); + } + } + printk("\n"); + + runtime = __va(efi.systab->runtime); + efi.get_time = phys_get_time; + efi.set_time = phys_set_time; + efi.get_wakeup_time = phys_get_wakeup_time; + efi.set_wakeup_time = phys_set_wakeup_time; + efi.get_variable = phys_get_variable; + efi.get_next_variable = phys_get_next_variable; + efi.set_variable = phys_set_variable; + efi.get_next_high_mono_count = phys_get_next_high_mono_count; + efi.reset_system = phys_reset_system; + + efi_map_start = __va(ia64_boot_param.efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size; + efi_desc_size = ia64_boot_param.efi_memdesc_size; + +#ifdef EFI_DEBUG + /* print EFI memory map: */ + for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) { + md = p; + printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", + i, md->type, md->attribute, + md->phys_addr, md->phys_addr + (md->num_pages<<12) - 1, md->num_pages >> 8); + } +#endif +} + +void +efi_enter_virtual_mode (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + efi_status_t status; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param.efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param.efi_memmap_size; + efi_desc_size = ia64_boot_param.efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->attribute & EFI_MEMORY_RUNTIME) { + /* + * Some descriptors have multiple bits set, so the order of + * the tests is relevant. + */ + if (md->attribute & EFI_MEMORY_WB) { + md->virt_addr = (u64) __va(md->phys_addr); + } else if (md->attribute & EFI_MEMORY_UC) { + md->virt_addr = (u64) ioremap(md->phys_addr, 0); + } else if (md->attribute & EFI_MEMORY_WC) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P + | _PAGE_D + | _PAGE_MA_WC + | _PAGE_PL_0 + | _PAGE_AR_RW)); +#else + printk("EFI_MEMORY_WC mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } else if (md->attribute & EFI_MEMORY_WT) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P + | _PAGE_D | _PAGE_MA_WT + | _PAGE_PL_0 + | _PAGE_AR_RW)); +#else + printk("EFI_MEMORY_WT mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } + } + } + + status = efi_call_phys(__va(runtime->set_virtual_address_map), + ia64_boot_param.efi_memmap_size, + efi_desc_size, ia64_boot_param.efi_memdesc_version, + ia64_boot_param.efi_memmap); + if (status != EFI_SUCCESS) { + printk("Warning: unable to switch EFI into virtual mode (status=%lu)\n", status); + return; + } + + /* + * Now that EFI is in virtual mode, we arrange for EFI functions to be + * called directly: + */ + efi.get_time = __va(runtime->get_time); + efi.set_time = __va(runtime->set_time); + efi.get_wakeup_time = __va(runtime->get_wakeup_time); + efi.set_wakeup_time = __va(runtime->set_wakeup_time); + efi.get_variable = __va(runtime->get_variable); + efi.get_next_variable = __va(runtime->get_next_variable); + efi.set_variable = __va(runtime->set_variable); + efi.get_next_high_mono_count = __va(runtime->get_next_high_mono_count); + efi.reset_system = __va(runtime->reset_system); +} diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S new file mode 100644 index 000000000..4e6f1fc63 --- /dev/null +++ b/arch/ia64/kernel/efi_stub.S @@ -0,0 +1,141 @@ +/* + * EFI call stub. + * + * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com> + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. We need this because we can't call SetVirtualMap() until + * the kernel has booted far enough to allow allocation of struct vma_struct + * entries (which we would need to map stuff with memory attributes other + * than uncached or writeback...). Since the GetTime() service gets called + * earlier than that, we need to be able to make physical mode EFI calls from + * the kernel. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include <asm/processor.h> + + .text + .psr abi64 + .psr lsb + .lsb + + .text + +/* + * Switch execution mode from virtual to physical or vice versa. + * + * Inputs: + * r16 = new psr to establish + */ + .proc switch_mode +switch_mode: + { + alloc r2=ar.pfs,0,0,0,0 + rsm psr.i | psr.ic // disable interrupts and interrupt collection + mov r15=ip + } + ;; + { + flushrs // must be first insn in group + srlz.i + shr.u r19=r15,61 // r19 <- top 3 bits of current IP + } + ;; + mov cr.ipsr=r16 // set new PSR + add r3=1f-switch_mode,r15 + xor r15=0x7,r19 // flip the region bits + + mov r17=ar.bsp + mov r14=rp // get return address into a general register + + // switch RSE backing store: + ;; + dep r17=r15,r17,61,3 // make ar.bsp physical or virtual + mov r18=ar.rnat // save ar.rnat + ;; + mov ar.bspstore=r17 // this steps on ar.rnat + dep r3=r15,r3,61,3 // make rfi return address physical or virtual + ;; + mov cr.iip=r3 + mov cr.ifs=r0 + dep sp=r15,sp,61,3 // make stack pointer physical or virtual + ;; + mov ar.rnat=r18 // restore ar.rnat + dep r14=r15,r14,61,3 // make function return address physical or virtual + rfi // must be last insn in group + ;; +1: mov rp=r14 + br.ret.sptk.few rp + .endp switch_mode + +/* + * Inputs: + * in0 = address of function descriptor of EFI routine to call + * in1..in7 = arguments to routine + * + * Outputs: + * r8 = EFI_STATUS returned by called function + */ + + .global efi_call_phys + .proc efi_call_phys +efi_call_phys: + + alloc loc0=ar.pfs,8,5,7,0 + ld8 r2=[in0],8 // load EFI function's entry point + mov loc1=rp + ;; + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=r0 // put RSE in enforced lazy, LE mode + ;; + + ld8 gp=[in0] // load EFI function's global pointer + mov out0=in1 + mov out1=in2 + movl r16=PSR_BITS_TO_CLEAR + + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + mov out2=in3 + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + mov out3=in4 + br.call.sptk.few rp=switch_mode +.ret0: + mov out4=in5 + mov out5=in6 + mov out6=in7 + br.call.sptk.few rp=b6 // call the EFI function +.ret1: + mov ar.rsc=r0 // put RSE in enforced lazy, LE mode + mov r16=loc3 + br.call.sptk.few rp=switch_mode // return to virtual mode +.ret2: + mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc0 + mov rp=loc1 + mov gp=loc2 + br.ret.sptk.few rp + + .endp efi_call_phys diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S new file mode 100644 index 000000000..87e77c677 --- /dev/null +++ b/arch/ia64/kernel/entry.S @@ -0,0 +1,1261 @@ +/* + * ia64/kernel/entry.S + * + * Kernel entry points. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> + * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> + */ +/* + * Global (preserved) predicate usage on syscall entry/exit path: + * + * + * pEOI: See entry.h. + * pKern: See entry.h. + * pSys: See entry.h. + * pNonSys: !pSys + * p2: (Alias of pKern!) True if any signals are pending. + * p16/p17: Used by stubs calling ia64_do_signal to indicate if current task + * has PF_PTRACED flag bit set. p16 is true if so, p17 is the complement. + */ + +#include <linux/config.h> + +#include <asm/errno.h> +#include <asm/offsets.h> +#include <asm/processor.h> +#include <asm/unistd.h> + +#include "entry.h" + + .text + .psr abi64 + .psr lsb + .lsb + + /* + * execve() is special because in case of success, we need to + * setup a null register window frame. + */ + .align 16 + .proc ia64_execve +ia64_execve: + alloc loc0=ar.pfs,3,2,4,0 + mov loc1=rp + mov out0=in0 // filename + ;; // stop bit between alloc and call + mov out1=in1 // argv + mov out2=in2 // envp + add out3=16,sp // regs + br.call.sptk.few rp=sys_execve +.ret0: cmp4.ge p6,p0=r8,r0 + mov ar.pfs=loc0 // restore ar.pfs + ;; +(p6) mov ar.pfs=r0 // clear ar.pfs in case of success + sxt4 r8=r8 // return 64-bit result + mov rp=loc1 + + br.ret.sptk.few rp + .endp ia64_execve + + .align 16 + .global sys_clone + .proc sys_clone +sys_clone: + alloc r16=ar.pfs,2,2,3,0;; + movl r28=1f + mov loc1=rp + br.cond.sptk.many save_switch_stack +1: + mov loc0=r16 // save ar.pfs across do_fork + adds out2=IA64_SWITCH_STACK_SIZE+16,sp + adds r2=IA64_SWITCH_STACK_SIZE+IA64_PT_REGS_R12_OFFSET+16,sp + cmp.eq p8,p9=in1,r0 // usp == 0? + mov out0=in0 // out0 = clone_flags + ;; +(p8) ld8 out1=[r2] // fetch usp from pt_regs.r12 +(p9) mov out1=in1 + br.call.sptk.few rp=do_fork +.ret1: + mov ar.pfs=loc0 + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov rp=loc1 + ;; + br.ret.sptk.many rp + .endp sys_clone + +/* + * prev_task <- switch_to(struct task_struct *next) + */ + .align 16 + .global ia64_switch_to + .proc ia64_switch_to +ia64_switch_to: + alloc r16=ar.pfs,1,0,0,0 + movl r28=1f + br.cond.sptk.many save_switch_stack +1: + // disable interrupts to ensure atomicity for next few instructions: + mov r17=psr // M-unit + ;; + rsm psr.i // M-unit + dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff + ;; + srlz.d + ;; + adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 + adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 + ;; + st8 [r22]=sp // save kernel stack pointer of old task + ld8 sp=[r21] // load kernel stack pointer of new task + and r20=in0,r18 // physical address of "current" + ;; + mov r8=r13 // return pointer to previously running task + mov r13=in0 // set "current" pointer + mov ar.k6=r20 // copy "current" into ar.k6 + ;; + // restore interrupts + mov psr.l=r17 + ;; + srlz.d + + movl r28=1f + br.cond.sptk.many load_switch_stack +1: + br.ret.sptk.few rp + .endp ia64_switch_to + + /* + * Like save_switch_stack, but also save the stack frame that is active + * at the time this function is called. + */ + .align 16 + .proc save_switch_stack_with_current_frame +save_switch_stack_with_current_frame: +1: { + alloc r16=ar.pfs,0,0,0,0 // pass ar.pfs to save_switch_stack + mov r28=ip + } + ;; + adds r28=1f-1b,r28 + br.cond.sptk.many save_switch_stack +1: br.ret.sptk.few rp + .endp save_switch_stack_with_current_frame +/* + * Note that interrupts are enabled during save_switch_stack and + * load_switch_stack. This means that we may get an interrupt with + * "sp" pointing to the new kernel stack while ar.bspstore is still + * pointing to the old kernel backing store area. Since ar.rsc, + * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, + * this is not a problem. + */ + +/* + * save_switch_stack: + * - r16 holds ar.pfs + * - r28 holds address to return to + * - rp (b0) holds return address to save + */ + .align 16 + .global save_switch_stack + .proc save_switch_stack +save_switch_stack: + flushrs // flush dirty regs to backing store (must be first in insn group) + mov r17=ar.unat // preserve caller's + adds r2=-IA64_SWITCH_STACK_SIZE+16,sp // r2 = &sw->caller_unat + ;; + mov r18=ar.fpsr // preserve fpsr + mov ar.rsc=r0 // put RSE in mode: enforced lazy, little endian, pl 0 + ;; + mov r19=ar.rnat + adds r3=-IA64_SWITCH_STACK_SIZE+24,sp // r3 = &sw->ar_fpsr + + // Note: the instruction ordering is important here: we can't + // store anything to the switch stack before sp is updated + // as otherwise an interrupt might overwrite the memory! + adds sp=-IA64_SWITCH_STACK_SIZE,sp + ;; + st8 [r2]=r17,16 + st8 [r3]=r18,24 + ;; + stf.spill [r2]=f2,32 + stf.spill [r3]=f3,32 + mov r21=b0 + ;; + stf.spill [r2]=f4,32 + stf.spill [r3]=f5,32 + ;; + stf.spill [r2]=f10,32 + stf.spill [r3]=f11,32 + mov r22=b1 + ;; + stf.spill [r2]=f12,32 + stf.spill [r3]=f13,32 + mov r23=b2 + ;; + stf.spill [r2]=f14,32 + stf.spill [r3]=f15,32 + mov r24=b3 + ;; + stf.spill [r2]=f16,32 + stf.spill [r3]=f17,32 + mov r25=b4 + ;; + stf.spill [r2]=f18,32 + stf.spill [r3]=f19,32 + mov r26=b5 + ;; + stf.spill [r2]=f20,32 + stf.spill [r3]=f21,32 + mov r17=ar.lc // I-unit + ;; + stf.spill [r2]=f22,32 + stf.spill [r3]=f23,32 + ;; + stf.spill [r2]=f24,32 + stf.spill [r3]=f25,32 + ;; + stf.spill [r2]=f26,32 + stf.spill [r3]=f27,32 + ;; + stf.spill [r2]=f28,32 + stf.spill [r3]=f29,32 + ;; + stf.spill [r2]=f30,32 + stf.spill [r3]=f31,24 + ;; + st8.spill [r2]=r4,16 + st8.spill [r3]=r5,16 + ;; + st8.spill [r2]=r6,16 + st8.spill [r3]=r7,16 + ;; + st8 [r2]=r21,16 // save b0 + st8 [r3]=r22,16 // save b1 + /* since we're done with the spills, read and save ar.unat: */ + mov r18=ar.unat // M-unit + mov r20=ar.bspstore // M-unit + ;; + st8 [r2]=r23,16 // save b2 + st8 [r3]=r24,16 // save b3 + ;; + st8 [r2]=r25,16 // save b4 + st8 [r3]=r26,16 // save b5 + ;; + st8 [r2]=r16,16 // save ar.pfs + st8 [r3]=r17,16 // save ar.lc + mov r21=pr + ;; + st8 [r2]=r18,16 // save ar.unat + st8 [r3]=r19,16 // save ar.rnat + mov b7=r28 + ;; + st8 [r2]=r20 // save ar.bspstore + st8 [r3]=r21 // save predicate registers + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + br.cond.sptk.few b7 + .endp save_switch_stack + +/* + * load_switch_stack: + * - r28 holds address to return to + */ + .align 16 + .proc load_switch_stack +load_switch_stack: + invala // invalidate ALAT + adds r2=IA64_SWITCH_STACK_B0_OFFSET+16,sp // get pointer to switch_stack.b0 + mov ar.rsc=r0 // put RSE into enforced lazy mode + adds r3=IA64_SWITCH_STACK_B0_OFFSET+24,sp // get pointer to switch_stack.b1 + ;; + ld8 r21=[r2],16 // restore b0 + ld8 r22=[r3],16 // restore b1 + ;; + ld8 r23=[r2],16 // restore b2 + ld8 r24=[r3],16 // restore b3 + ;; + ld8 r25=[r2],16 // restore b4 + ld8 r26=[r3],16 // restore b5 + ;; + ld8 r16=[r2],16 // restore ar.pfs + ld8 r17=[r3],16 // restore ar.lc + ;; + ld8 r18=[r2],16 // restore ar.unat + ld8 r19=[r3],16 // restore ar.rnat + mov b0=r21 + ;; + ld8 r20=[r2] // restore ar.bspstore + ld8 r21=[r3] // restore predicate registers + mov ar.pfs=r16 + ;; + mov ar.bspstore=r20 + ;; + loadrs // invalidate stacked regs outside current frame + adds r2=16-IA64_SWITCH_STACK_SIZE,r2 // get pointer to switch_stack.caller_unat + ;; // stop bit for rnat dependency + mov ar.rnat=r19 + mov ar.unat=r18 // establish unat holding the NaT bits for r4-r7 + adds r3=16-IA64_SWITCH_STACK_SIZE,r3 // get pointer to switch_stack.ar_fpsr + ;; + ld8 r18=[r2],16 // restore caller's unat + ld8 r19=[r3],24 // restore fpsr + mov ar.lc=r17 + ;; + ldf.fill f2=[r2],32 + ldf.fill f3=[r3],32 + mov pr=r21,-1 + ;; + ldf.fill f4=[r2],32 + ldf.fill f5=[r3],32 + ;; + ldf.fill f10=[r2],32 + ldf.fill f11=[r3],32 + mov b1=r22 + ;; + ldf.fill f12=[r2],32 + ldf.fill f13=[r3],32 + mov b2=r23 + ;; + ldf.fill f14=[r2],32 + ldf.fill f15=[r3],32 + mov b3=r24 + ;; + ldf.fill f16=[r2],32 + ldf.fill f17=[r3],32 + mov b4=r25 + ;; + ldf.fill f18=[r2],32 + ldf.fill f19=[r3],32 + mov b5=r26 + ;; + ldf.fill f20=[r2],32 + ldf.fill f21=[r3],32 + ;; + ldf.fill f22=[r2],32 + ldf.fill f23=[r3],32 + ;; + ldf.fill f24=[r2],32 + ldf.fill f25=[r3],32 + ;; + ldf.fill f26=[r2],32 + ldf.fill f27=[r3],32 + ;; + ldf.fill f28=[r2],32 + ldf.fill f29=[r3],32 + ;; + ldf.fill f30=[r2],32 + ldf.fill f31=[r3],24 + ;; + ld8.fill r4=[r2],16 + ld8.fill r5=[r3],16 + mov b7=r28 + ;; + ld8.fill r6=[r2],16 + ld8.fill r7=[r3],16 + mov ar.unat=r18 // restore caller's unat + mov ar.fpsr=r19 // restore fpsr + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop switch_stack + br.cond.sptk.few b7 + .endp load_switch_stack + + .align 16 + .global __ia64_syscall + .proc __ia64_syscall +__ia64_syscall: + .regstk 6,0,0,0 + mov r15=in5 // put syscall number in place + break __BREAK_SYSCALL + movl r2=errno + cmp.eq p6,p7=-1,r10 + ;; +(p6) st4 [r2]=r8 +(p6) mov r8=-1 + br.ret.sptk.few rp + .endp __ia64_syscall + + // + // We invoke syscall_trace through this intermediate function to + // ensure that the syscall input arguments are not clobbered. We + // also use it to preserve b6, which contains the syscall entry point. + // + .align 16 + .global invoke_syscall_trace + .proc invoke_syscall_trace +invoke_syscall_trace: + alloc loc0=ar.pfs,8,3,0,0 + ;; // WAW on CFM at the br.call + mov loc1=rp + br.call.sptk.many rp=save_switch_stack_with_current_frame // must preserve b6!! +.ret2: mov loc2=b6 + br.call.sptk.few rp=syscall_trace +.ret3: adds sp=IA64_SWITCH_STACK_SIZE,sp // drop switch_stack frame + mov rp=loc1 + mov ar.pfs=loc0 + mov b6=loc2 + ;; + br.ret.sptk.few rp + .endp invoke_syscall_trace + + // + // Invoke a system call, but do some tracing before and after the call. + // We MUST preserve the current register frame throughout this routine + // because some system calls (such as ia64_execve) directly + // manipulate ar.pfs. + // + // Input: + // r15 = syscall number + // b6 = syscall entry point + // + .global ia64_trace_syscall + .global ia64_strace_leave_kernel + .global ia64_strace_clear_r8 + + .proc ia64_strace_clear_r8 +ia64_strace_clear_r8: // this is where we return after cloning when PF_TRACESYS is on +# ifdef CONFIG_SMP + br.call.sptk.few rp=invoke_schedule_tail +# endif + mov r8=0 + br strace_check_retval + .endp ia64_strace_clear_r8 + + .proc ia64_trace_syscall +ia64_trace_syscall: + br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch syscall args +.ret4: br.call.sptk.few rp=b6 // do the syscall +strace_check_retval: +.ret5: cmp.lt p6,p0=r8,r0 // syscall failed? + ;; + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8 + adds r3=IA64_PT_REGS_R8_OFFSET+32,sp // r3 = &pt_regs.r10 + mov r10=0 +(p6) br.cond.sptk.few strace_error // syscall failed -> + ;; // avoid RAW on r10 +strace_save_retval: + st8.spill [r2]=r8 // store return value in slot for r8 + st8.spill [r3]=r10 // clear error indication in slot for r10 +ia64_strace_leave_kernel: + br.call.sptk.few rp=invoke_syscall_trace // give parent a chance to catch return value +.ret6: br.cond.sptk.many ia64_leave_kernel + +strace_error: + ld8 r3=[r2] // load pt_regs.r8 + sub r9=0,r8 // negate return value to get errno value + ;; + cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? + adds r3=16,r2 // r3=&pt_regs.r10 + ;; +(p6) mov r10=-1 +(p6) mov r8=r9 + br.cond.sptk.few strace_save_retval + .endp ia64_trace_syscall + +/* + * A couple of convenience macros to help implement/understand the state + * restoration that happens at the end of ia64_ret_from_syscall. + */ +#define rARPR r31 +#define rCRIFS r30 +#define rCRIPSR r29 +#define rCRIIP r28 +#define rARRSC r27 +#define rARPFS r26 +#define rARUNAT r25 +#define rARRNAT r24 +#define rARBSPSTORE r23 +#define rKRBS r22 +#define rB6 r21 + + .align 16 + .global ia64_ret_from_syscall + .global ia64_ret_from_syscall_clear_r8 + .global ia64_leave_kernel + .proc ia64_ret_from_syscall +ia64_ret_from_syscall_clear_r8: +#ifdef CONFIG_SMP + // In SMP mode, we need to call schedule_tail to complete the scheduling process. + // Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the + // address of the previously executing task. + br.call.sptk.few rp=invoke_schedule_tail +.ret7: +#endif + mov r8=0 + ;; // added stop bits to prevent r8 dependency +ia64_ret_from_syscall: + cmp.ge p6,p7=r8,r0 // syscall executed successfully? + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8 + adds r3=IA64_PT_REGS_R8_OFFSET+32,sp // r3 = &pt_regs.r10 + ;; +(p6) st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit +(p6) st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit +(p7) br.cond.spnt.few handle_syscall_error // handle potential syscall failure + +ia64_leave_kernel: + // check & deliver software interrupts (bottom half handlers): + + movl r2=bh_active // sheesh, why aren't these two in + movl r3=bh_mask // a struct?? + ;; + ld8 r2=[r2] + ld8 r3=[r3] + ;; + and r2=r2,r3 + ;; + cmp.ne p6,p7=r2,r0 // any soft interrupts ready for delivery? +(p6) br.call.dpnt.few rp=invoke_do_bottom_half +1: +(pKern) br.cond.dpnt.many restore_all // yup -> skip check for rescheduling & signal delivery + + // call schedule() until we find a task that doesn't have need_resched set: + +back_from_resched: + { .mii + adds r2=IA64_TASK_NEED_RESCHED_OFFSET,r13 + mov r3=ip + adds r14=IA64_TASK_SIGPENDING_OFFSET,r13 + } + ;; + ld8 r2=[r2] + ld4 r14=[r14] + mov rp=r3 // arrange for schedule() to return to back_from_resched + ;; + /* + * If pEOI is set, we need to write the cr.eoi now and then + * clear pEOI because both invoke_schedule() and + * handle_signal_delivery() may call the scheduler. Since + * we're returning to user-level, we get at most one nested + * interrupt of the same priority level, which doesn't tax the + * kernel stack too much. + */ +(pEOI) mov cr.eoi=r0 + cmp.ne p6,p0=r2,r0 + cmp.ne p2,p0=r14,r0 // NOTE: pKern is an alias for p2!! +(pEOI) cmp.ne pEOI,p0=r0,r0 // clear pEOI before calling schedule() + srlz.d +(p6) br.call.spnt.many b6=invoke_schedule // ignore return value +2: + // check & deliver pending signals: +(p2) br.call.spnt.few rp=handle_signal_delivery +restore_all: + + // start restoring the state saved on the kernel stack (struct pt_regs): + + adds r2=IA64_PT_REGS_R8_OFFSET+16,r12 + adds r3=IA64_PT_REGS_R8_OFFSET+24,r12 + ;; + ld8.fill r8=[r2],16 + ld8.fill r9=[r3],16 + ;; + ld8.fill r10=[r2],16 + ld8.fill r11=[r3],16 + ;; + ld8.fill r16=[r2],16 + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + ;; + ld8.fill r22=[r2],16 + ld8.fill r23=[r3],16 + ;; + ld8.fill r24=[r2],16 + ld8.fill r25=[r3],16 + ;; + ld8.fill r26=[r2],16 + ld8.fill r27=[r3],16 + ;; + ld8.fill r28=[r2],16 + ld8.fill r29=[r3],16 + ;; + ld8.fill r30=[r2],16 + ld8.fill r31=[r3],16 + ;; + ld8 r1=[r2],16 // ar.ccv + ld8 r13=[r3],16 // ar.fpsr + ;; + ld8 r14=[r2],16 // b0 + ld8 r15=[r3],16+8 // b7 + ;; + ldf.fill f6=[r2],32 + ldf.fill f7=[r3],32 + ;; + ldf.fill f8=[r2],32 + ldf.fill f9=[r3],32 + ;; + mov ar.ccv=r1 + mov ar.fpsr=r13 + mov b0=r14 + // turn off interrupts, interrupt collection, & data translation + rsm psr.i | psr.ic | psr.dt + ;; + srlz.i // EAS 2.5 + mov b7=r15 + ;; + invala // invalidate ALAT + dep r12=0,r12,61,3 // convert sp to physical address + bsw.0;; // switch back to bank 0 (must be last in insn group) + ;; +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + nop.i 0x0 + ;; + nop.i 0x0 + ;; + nop.i 0x0 + ;; +#endif + adds r16=16,r12 + adds r17=24,r12 + ;; + ld8 rCRIPSR=[r16],16 // load cr.ipsr + ld8 rCRIIP=[r17],16 // load cr.iip + ;; + ld8 rCRIFS=[r16],16 // load cr.ifs + ld8 rARUNAT=[r17],16 // load ar.unat + ;; + ld8 rARPFS=[r16],16 // load ar.pfs + ld8 rARRSC=[r17],16 // load ar.rsc + ;; + ld8 rARRNAT=[r16],16 // load ar.rnat (may be garbage) + ld8 rARBSPSTORE=[r17],16 // load ar.bspstore (may be garbage) + ;; + ld8 rARPR=[r16],16 // load predicates + ld8 rB6=[r17],16 // load b6 + ;; + ld8 r18=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r2=[r16],16 + ld8.fill r3=[r17],16 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 + extr.u r19=rCRIPSR,32,2 // extract ps.cpl + ;; + ld8.fill r14=[r16],16 + ld8.fill r15=[r17],16 + cmp.eq p6,p7=r0,r19 // are we returning to kernel mode? (psr.cpl==0) + ;; + mov b6=rB6 + mov ar.pfs=rARPFS +(p6) br.cond.dpnt.few skip_rbs_switch + + /* + * Restore user backing store. + * + * NOTE: alloc, loadrs, and cover can't be predicated. + * + * XXX This needs some scheduling/tuning once we believe it + * really does work as intended. + */ + mov r16=ar.bsp // get existing backing store pointer +(pNonSys) br.cond.dpnt.few dont_preserve_current_frame + cover // add current frame into dirty partition + ;; + mov rCRIFS=cr.ifs // fetch the cr.ifs value that "cover" produced + mov r17=ar.bsp // get new backing store pointer + ;; + sub r16=r17,r16 // calculate number of bytes that were added to rbs + ;; + shl r16=r16,16 // shift additional frame size into position for loadrs + ;; + add r18=r16,r18 // adjust the loadrs value + ;; +#ifdef CONFIG_IA64_SOFTSDV_HACKS + // Reset ITM if we've missed a timer tick. Workaround for SoftSDV bug + mov r16 = r2 + mov r2 = ar.itc + mov r17 = cr.itm + ;; + cmp.gt p6,p7 = r2, r17 +(p6) addl r17 = 100, r2 + ;; + mov cr.itm = r17 + mov r2 = r16 +#endif +dont_preserve_current_frame: + alloc r16=ar.pfs,0,0,0,0 // drop the current call frame (noop for syscalls) + ;; + mov ar.rsc=r18 // load ar.rsc to be used for "loadrs" +#ifdef CONFIG_IA32_SUPPORT + tbit.nz p6,p0=rCRIPSR,IA64_PSR_IS_BIT + ;; +(p6) mov ar.rsc=r0 // returning to IA32 mode +#endif + ;; + loadrs + ;; + mov ar.bspstore=rARBSPSTORE + ;; + mov ar.rnat=rARRNAT // must happen with RSE in lazy mode + +skip_rbs_switch: + mov ar.rsc=rARRSC + mov ar.unat=rARUNAT + mov cr.ifs=rCRIFS // restore cr.ifs only if not a (synchronous) syscall +(pEOI) mov cr.eoi=r0 + mov pr=rARPR,-1 + mov cr.iip=rCRIIP + mov cr.ipsr=rCRIPSR + ;; + rfi;; // must be last instruction in an insn group + +handle_syscall_error: + /* + * Some system calls (e.g., ptrace, mmap) can return arbitrary + * values which could lead us to mistake a negative return + * value as a failed syscall. Those syscall must deposit + * a non-zero value in pt_regs.r8 to indicate an error. + * If pt_regs.r8 is zero, we assume that the call completed + * successfully. + */ + ld8 r3=[r2] // load pt_regs.r8 + sub r9=0,r8 // negate return value to get errno + ;; + mov r10=-1 // return -1 in pt_regs.r10 to indicate error + cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? + adds r3=16,r2 // r3=&pt_regs.r10 + ;; +(p6) mov r9=r8 +(p6) mov r10=0 + ;; + st8.spill [r2]=r9 // store errno in pt_regs.r8 and set unat bit + st8.spill [r3]=r10 // store error indication in pt_regs.r10 and set unat bit + br.cond.sptk.many ia64_leave_kernel + .endp __ret_from_syscall + +#ifdef CONFIG_SMP + /* + * Invoke schedule_tail(task) while preserving in0-in7, which may be needed + * in case a system call gets restarted. + */ + .proc invoke_schedule_tail +invoke_schedule_tail: + alloc loc0=ar.pfs,8,2,1,0 + mov loc1=rp + mov out0=r8 // Address of previous task + ;; + br.call.sptk.few rp=schedule_tail +.ret8: + mov ar.pfs=loc0 + mov rp=loc1 + br.ret.sptk.many rp + .endp invoke_schedule_tail +#endif /* CONFIG_SMP */ + + /* + * Invoke do_bottom_half() while preserving in0-in7, which may be needed + * in case a system call gets restarted. + */ + .proc invoke_do_bottom_half +invoke_do_bottom_half: + alloc loc0=ar.pfs,8,2,0,0 + mov loc1=rp + ;; + br.call.sptk.few rp=do_bottom_half +.ret9: + mov ar.pfs=loc0 + mov rp=loc1 + br.ret.sptk.many rp + .endp invoke_do_bottom_half + + /* + * Invoke schedule() while preserving in0-in7, which may be needed + * in case a system call gets restarted. + */ + .proc invoke_schedule +invoke_schedule: + alloc loc0=ar.pfs,8,2,0,0 + mov loc1=rp + ;; + br.call.sptk.few rp=schedule +.ret10: + mov ar.pfs=loc0 + mov rp=loc1 + br.ret.sptk.many rp + .endp invoke_schedule + + // + // Setup stack and call ia64_do_signal. Note that pSys and pNonSys need to + // be set up by the caller. We declare 8 input registers so the system call + // args get preserved, in case we need to restart a system call. + // + .align 16 + .proc handle_signal_delivery +handle_signal_delivery: + alloc loc0=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! + mov r9=ar.unat + + // If the process is being ptraced, the signal may not actually be delivered to + // the process. Instead, SIGCHLD will be sent to the parent. We need to + // setup a switch_stack so ptrace can inspect the processes state if necessary. + adds r2=IA64_TASK_FLAGS_OFFSET,r13 + ;; + ld8 r2=[r2] + mov out0=0 // there is no "oldset" + adds out1=16,sp // out1=&pt_regs + ;; +(pSys) mov out2=1 // out2==1 => we're in a syscall + tbit.nz p16,p17=r2,PF_PTRACED_BIT +(p16) br.cond.spnt.many setup_switch_stack + ;; +back_from_setup_switch_stack: +(pNonSys) mov out2=0 // out2==0 => not a syscall + adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp +(p17) adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for (dummy) switch_stack + ;; +(p17) st8 [r3]=r9 // save ar.unat in sw->caller_unat + mov loc1=rp // save return address + br.call.sptk.few rp=ia64_do_signal +.ret11: + adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp + ;; + ld8 r9=[r3] // load new unat from sw->caller_unat + mov rp=loc1 + ;; +(p17) adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch_stack +(p17) mov ar.unat=r9 +(p17) mov ar.pfs=loc0 +(p17) br.ret.sptk.many rp + + // restore the switch stack (ptrace may have modified it): + movl r28=1f + br.cond.sptk.many load_switch_stack +1: br.ret.sptk.many rp + // NOT REACHED + +setup_switch_stack: + movl r28=back_from_setup_switch_stack + mov r16=loc0 + br.cond.sptk.many save_switch_stack + // NOT REACHED + + .endp handle_signal_delivery + + .align 16 + .proc sys_rt_sigsuspend + .global sys_rt_sigsuspend +sys_rt_sigsuspend: + alloc loc0=ar.pfs,2,2,3,0 + mov r9=ar.unat + + // If the process is being ptraced, the signal may not actually be delivered to + // the process. Instead, SIGCHLD will be sent to the parent. We need to + // setup a switch_stack so ptrace can inspect the processes state if necessary. + adds r2=IA64_TASK_FLAGS_OFFSET,r13 + ;; + ld8 r2=[r2] + mov out0=in0 // mask + mov out1=in1 // sigsetsize + ;; + adds out2=16,sp // out1=&pt_regs + tbit.nz p16,p17=r2,PF_PTRACED_BIT +(p16) br.cond.spnt.many sigsuspend_setup_switch_stack + ;; +back_from_sigsuspend_setup_switch_stack: + adds r3=-IA64_SWITCH_STACK_SIZE+IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp +(p17) adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for (dummy) switch_stack + ;; +(p17) st8 [r3]=r9 // save ar.unat in sw->caller_unat + mov loc1=rp // save return address + br.call.sptk.many rp=ia64_rt_sigsuspend +.ret12: + adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp + ;; + ld8 r9=[r3] // load new unat from sw->caller_unat + mov rp=loc1 + ;; +(p17) adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch_stack +(p17) mov ar.unat=r9 +(p17) mov ar.pfs=loc0 +(p17) br.ret.sptk.many rp + + // restore the switch stack (ptrace may have modified it): + movl r28=1f + br.cond.sptk.many load_switch_stack +1: br.ret.sptk.many rp + // NOT REACHED + +sigsuspend_setup_switch_stack: + movl r28=back_from_sigsuspend_setup_switch_stack + mov r16=loc0 + br.cond.sptk.many save_switch_stack + // NOT REACHED + + .endp sys_rt_sigsuspend + + .align 16 + .proc sys_rt_sigreturn +sys_rt_sigreturn: + alloc loc0=ar.pfs,8,1,1,0 // preserve all eight input regs in case of syscall restart! + adds out0=16,sp // out0 = &pt_regs + ;; + adds sp=-IA64_SWITCH_STACK_SIZE,sp // make space for unat and padding + br.call.sptk.few rp=ia64_rt_sigreturn +.ret13: + adds r3=IA64_SWITCH_STACK_CALLER_UNAT_OFFSET+16,sp + ;; + ld8 r9=[r3] // load new ar.unat + mov rp=r8 + ;; + adds sp=IA64_SWITCH_STACK_SIZE,sp // drop (dummy) switch-stack frame + mov ar.unat=r9 + mov ar.pfs=loc0 + br.ret.sptk.many rp + .endp sys_rt_sigreturn + + .align 16 + .global ia64_prepare_handle_unaligned + .proc ia64_prepare_handle_unaligned +ia64_prepare_handle_unaligned: + movl r28=1f + // + // r16 = fake ar.pfs, we simply need to make sure + // privilege is still 0 + // + mov r16=r0 + br.cond.sptk.few save_switch_stack +1: br.call.sptk.few rp=ia64_handle_unaligned // stack frame setup in ivt +.ret14: + movl r28=2f + br.cond.sptk.many load_switch_stack +2: br.cond.sptk.many rp // goes to ia64_leave_kernel + .endp ia64_prepare_handle_unaligned + +#ifdef CONFIG_KDB + // + // This gets called from ivt.S with: + // SAVE MIN with cover done + // SAVE REST done + // no parameters + // r15 has return value = ia64_leave_kernel + // + .align 16 + .global ia64_invoke_kdb + .proc ia64_invoke_kdb +ia64_invoke_kdb: + alloc r16=ar.pfs,0,0,4,0 + movl r28=1f // save_switch_stack protocol + ;; // avoid WAW on CFM + br.cond.sptk.many save_switch_stack // to flushrs +1: mov out0=4 // kdb entry reason + mov out1=0 // err number + adds out2=IA64_SWITCH_STACK_SIZE+16,sp // pt_regs + add out3=16,sp // switch_stack + br.call.sptk.few rp=kdb +.ret15: + movl r28=1f // load_switch_stack proto + br.cond.sptk.many load_switch_stack +1: br.ret.sptk.many rp + .endp ia64_invoke_kdb + + // + // When KDB is compiled in, we intercept each fault and give + // kdb a chance to run before calling the normal fault handler. + // + .align 16 + .global ia64_invoke_kdb_fault_handler + .proc ia64_invoke_kdb_fault_handler +ia64_invoke_kdb_fault_handler: + alloc r16=ar.pfs,5,1,5,0 + movl r28=1f + mov loc0=rp // save this + br.cond.sptk.many save_switch_stack // to flushrs + ;; // avoid WAW on CFM +1: mov out0=in0 // vector number + mov out1=in1 // cr.isr + mov out2=in2 // cr.ifa + mov out3=in3 // cr.iim + mov out4=in4 // cr.itir + br.call.sptk.few rp=ia64_kdb_fault_handler +.ret16: + + movl r28=1f + br.cond.sptk.many load_switch_stack +1: cmp.ne p6,p0=r8,r0 // did ia64_kdb_fault_handler return 0? + mov rp=loc0 +(p6) br.ret.spnt.many rp // no, we're done + ;; // avoid WAW on rp + mov out0=in0 // vector number + mov out1=in1 // cr.isr + mov out2=in2 // cr.ifa + mov out3=in3 // cr.iim + mov out4=in4 // cr.itir + mov in0=ar.pfs // preserve ar.pfs returned by load_switch_stack + br.call.sptk.few rp=ia64_fault // yup -> we need to invoke normal fault handler now +.ret17: + mov ar.pfs=in0 + mov rp=loc0 + br.ret.sptk.many rp + + .endp ia64_invoke_kdb_fault_handler + +#endif /* CONFIG_KDB */ + + .rodata + .align 8 + .globl sys_call_table +sys_call_table: + data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S. + data8 sys_exit // 1025 + data8 sys_read + data8 sys_write + data8 sys_open + data8 sys_close + data8 sys_creat // 1030 + data8 sys_link + data8 sys_unlink + data8 ia64_execve + data8 sys_chdir + data8 sys_fchdir // 1035 + data8 sys_utimes + data8 sys_mknod + data8 sys_chmod + data8 sys_chown + data8 sys_lseek // 1040 + data8 sys_getpid + data8 sys_getppid + data8 sys_mount + data8 sys_umount + data8 sys_setuid // 1045 + data8 sys_getuid + data8 sys_geteuid + data8 sys_ptrace + data8 sys_access + data8 sys_sync // 1050 + data8 sys_fsync + data8 sys_fdatasync + data8 sys_kill + data8 sys_rename + data8 sys_mkdir // 1055 + data8 sys_rmdir + data8 sys_dup + data8 sys_pipe + data8 sys_times + data8 ia64_brk // 1060 + data8 sys_setgid + data8 sys_getgid + data8 sys_getegid + data8 sys_acct + data8 sys_ioctl // 1065 + data8 sys_fcntl + data8 sys_umask + data8 sys_chroot + data8 sys_ustat + data8 sys_dup2 // 1070 + data8 sys_setreuid + data8 sys_setregid + data8 sys_getresuid + data8 sys_setresuid + data8 sys_getresgid // 1075 + data8 sys_setresgid + data8 sys_getgroups + data8 sys_setgroups + data8 sys_getpgid + data8 sys_setpgid // 1080 + data8 sys_setsid + data8 sys_getsid + data8 sys_sethostname + data8 sys_setrlimit + data8 sys_getrlimit // 1085 + data8 sys_getrusage + data8 sys_gettimeofday + data8 sys_settimeofday + data8 sys_select + data8 sys_poll // 1090 + data8 sys_symlink + data8 sys_readlink + data8 sys_uselib + data8 sys_swapon + data8 sys_swapoff // 1095 + data8 sys_reboot + data8 sys_truncate + data8 sys_ftruncate + data8 sys_fchmod + data8 sys_fchown // 1100 + data8 ia64_getpriority + data8 sys_setpriority + data8 sys_statfs + data8 sys_fstatfs + data8 sys_ioperm // 1105 + data8 sys_semget + data8 sys_semop + data8 sys_semctl + data8 sys_msgget + data8 sys_msgsnd // 1110 + data8 sys_msgrcv + data8 sys_msgctl + data8 sys_shmget + data8 ia64_shmat + data8 sys_shmdt // 1115 + data8 sys_shmctl + data8 sys_syslog + data8 sys_setitimer + data8 sys_getitimer + data8 sys_newstat // 1120 + data8 sys_newlstat + data8 sys_newfstat + data8 sys_vhangup + data8 sys_lchown + data8 sys_vm86 // 1125 + data8 sys_wait4 + data8 sys_sysinfo + data8 sys_clone + data8 sys_setdomainname + data8 sys_newuname // 1130 + data8 sys_adjtimex + data8 sys_create_module + data8 sys_init_module + data8 sys_delete_module + data8 sys_get_kernel_syms // 1135 + data8 sys_query_module + data8 sys_quotactl + data8 sys_bdflush + data8 sys_sysfs + data8 sys_personality // 1140 + data8 ia64_ni_syscall // sys_afs_syscall + data8 sys_setfsuid + data8 sys_setfsgid + data8 sys_getdents + data8 sys_flock // 1145 + data8 sys_readv + data8 sys_writev + data8 sys_pread + data8 sys_pwrite + data8 sys_sysctl // 1150 + data8 sys_mmap + data8 sys_munmap + data8 sys_mlock + data8 sys_mlockall + data8 sys_mprotect // 1155 + data8 sys_mremap + data8 sys_msync + data8 sys_munlock + data8 sys_munlockall + data8 sys_sched_getparam // 1160 + data8 sys_sched_setparam + data8 sys_sched_getscheduler + data8 sys_sched_setscheduler + data8 sys_sched_yield + data8 sys_sched_get_priority_max // 1165 + data8 sys_sched_get_priority_min + data8 sys_sched_rr_get_interval + data8 sys_nanosleep + data8 sys_nfsservctl + data8 sys_prctl // 1170 + data8 sys_getpagesize + data8 sys_mmap2 + data8 sys_pciconfig_read + data8 sys_pciconfig_write + data8 sys_perfmonctl // 1175 + data8 sys_sigaltstack + data8 sys_rt_sigaction + data8 sys_rt_sigpending + data8 sys_rt_sigprocmask + data8 sys_rt_sigqueueinfo // 1180 + data8 sys_rt_sigreturn + data8 sys_rt_sigsuspend + data8 sys_rt_sigtimedwait + data8 sys_getcwd + data8 sys_capget // 1185 + data8 sys_capset + data8 sys_sendfile + data8 sys_ni_syscall // sys_getpmsg (STREAMS) + data8 sys_ni_syscall // sys_putpmsg (STREAMS) + data8 sys_socket // 1190 + data8 sys_bind + data8 sys_connect + data8 sys_listen + data8 sys_accept + data8 sys_getsockname // 1195 + data8 sys_getpeername + data8 sys_socketpair + data8 sys_send + data8 sys_sendto + data8 sys_recv // 1200 + data8 sys_recvfrom + data8 sys_shutdown + data8 sys_setsockopt + data8 sys_getsockopt + data8 sys_sendmsg // 1205 + data8 sys_recvmsg + data8 sys_pivot_root + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1210 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1215 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1220 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1225 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1230 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1235 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1240 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1245 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1250 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1255 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1260 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1265 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1270 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1275 + data8 ia64_ni_syscall + data8 ia64_ni_syscall + data8 ia64_ni_syscall + diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h new file mode 100644 index 000000000..ecef44f60 --- /dev/null +++ b/arch/ia64/kernel/entry.h @@ -0,0 +1,8 @@ +/* + * Preserved registers that are shared between code in ivt.S and entry.S. Be + * careful not to step on these! + */ +#define pEOI p1 /* should leave_kernel write EOI? */ +#define pKern p2 /* will leave_kernel return to kernel-mode? */ +#define pSys p4 /* are we processing a (synchronous) system call? */ +#define pNonSys p5 /* complement of pSys */ diff --git a/arch/ia64/kernel/fw-emu.c b/arch/ia64/kernel/fw-emu.c new file mode 100644 index 000000000..212ff299c --- /dev/null +++ b/arch/ia64/kernel/fw-emu.c @@ -0,0 +1,444 @@ +/* + * PAL & SAL emulation. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * For the HP simulator, this file gets include in boot/bootloader.c. + * For SoftSDV, this file gets included in sys_softsdv.c. + */ +#include <linux/config.h> + +#ifdef CONFIG_PCI +# include <linux/pci.h> +#endif + +#include <asm/efi.h> +#include <asm/io.h> +#include <asm/pal.h> +#include <asm/sal.h> + +#define MB (1024*1024UL) + +#define NUM_MEM_DESCS 3 + +static char fw_mem[( sizeof(efi_system_table_t) + + sizeof(efi_runtime_services_t) + + 1*sizeof(efi_config_table_t) + + sizeof(struct ia64_sal_systab) + + sizeof(struct ia64_sal_desc_entry_point) + + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t)) + + 1024)] __attribute__ ((aligned (8))); + +#ifdef CONFIG_IA64_HP_SIM + +/* Simulator system calls: */ + +#define SSC_EXIT 66 + +/* + * Simulator system call. + */ +static long +ssc (long arg0, long arg1, long arg2, long arg3, int nr) +{ + register long r8 asm ("r8"); + + asm volatile ("mov r15=%1\n\t" + "break 0x80001" + : "=r"(r8) + : "r"(nr), "r"(arg0), "r"(arg1), "r"(arg2), "r"(arg3)); + return r8; +} + +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) + +/* Compute the `struct tm' representation of *T, + offset OFFSET seconds east of UTC, + and store year, yday, mon, mday, wday, hour, min, sec into *TP. + Return nonzero if successful. */ +int +offtime (unsigned long t, efi_time_t *tp) +{ + const unsigned short int __mon_yday[2][13] = + { + /* Normal years. */ + { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, + /* Leap years. */ + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } + }; + long int days, rem, y; + const unsigned short int *ip; + + days = t / SECS_PER_DAY; + rem = t % SECS_PER_DAY; + while (rem < 0) { + rem += SECS_PER_DAY; + --days; + } + while (rem >= SECS_PER_DAY) { + rem -= SECS_PER_DAY; + ++days; + } + tp->hour = rem / SECS_PER_HOUR; + rem %= SECS_PER_HOUR; + tp->minute = rem / 60; + tp->second = rem % 60; + /* January 1, 1970 was a Thursday. */ + y = 1970; + +# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) +# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) +# define __isleap(year) \ + ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) + + while (days < 0 || days >= (__isleap (y) ? 366 : 365)) { + /* Guess a corrected year, assuming 365 days per year. */ + long int yg = y + days / 365 - (days % 365 < 0); + + /* Adjust DAYS and Y to match the guessed year. */ + days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1) + - LEAPS_THRU_END_OF (y - 1)); + y = yg; + } + tp->year = y; + ip = __mon_yday[__isleap(y)]; + for (y = 11; days < (long int) ip[y]; --y) + continue; + days -= ip[y]; + tp->month = y + 1; + tp->day = days + 1; + return 1; +} + +#endif /* CONFIG_IA64_HP_SIM */ + +/* + * Very ugly, but we need this in the simulator only. Once we run on + * real hw, this can all go away. + */ +extern void pal_emulator_static (void); + +asm (" + .proc pal_emulator_static +pal_emulator_static: + mov r8=-1 + cmp.eq p6,p7=6,r28 /* PAL_PTCE_INFO */ +(p7) br.cond.sptk.few 1f + ;; + mov r8=0 /* status = 0 */ + movl r9=0x100000000 /* tc.base */ + movl r10=0x0000000200000003 /* count[0], count[1] */ + movl r11=0x1000000000002000 /* stride[0], stride[1] */ + br.cond.sptk.few rp + +1: cmp.eq p6,p7=14,r28 /* PAL_FREQ_RATIOS */ +(p7) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + movl r9 =0x100000064 /* proc_ratio (1/100) */ + movl r10=0x100000100 /* bus_ratio<<32 (1/256) */ + movl r11=0x100000064 /* itc_ratio<<32 (1/100) */ +1: br.cond.sptk.few rp + .endp pal_emulator_static\n"); + +/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */ + +#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3) + +#define REG_OFFSET(addr) (0x00000000000000FF & (addr)) +#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr)) +#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr)) + +static efi_status_t +efi_get_time (efi_time_t *tm, efi_time_cap_t *tc) +{ +#ifdef CONFIG_IA64_HP_SIM + struct { + int tv_sec; /* must be 32bits to work */ + int tv_usec; + } tv32bits; + + ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD); + + memset(tm, 0, sizeof(*tm)); + offtime(tv32bits.tv_sec, tm); + + if (tc) + memset(tc, 0, sizeof(*tc)); +#else +# error Not implemented yet... +#endif + return EFI_SUCCESS; +} + +static void +efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data) +{ +#ifdef CONFIG_IA64_HP_SIM + ssc(status, 0, 0, 0, SSC_EXIT); +#else +# error Not implemented yet... +#endif +} + +static efi_status_t +efi_unimplemented (void) +{ + return EFI_UNSUPPORTED; +} + +static long +sal_emulator (long index, unsigned long in1, unsigned long in2, + unsigned long in3, unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + register long r9 asm ("r9") = 0; + register long r10 asm ("r10") = 0; + register long r11 asm ("r11") = 0; + long status; + + /* + * Don't do a "switch" here since that gives us code that + * isn't self-relocatable. + */ + status = 0; + if (index == SAL_FREQ_BASE) { + switch (in1) { + case SAL_FREQ_BASE_PLATFORM: + r9 = 100000000; + break; + + case SAL_FREQ_BASE_INTERVAL_TIMER: + /* + * Is this supposed to be the cr.itc frequency + * or something platform specific? The SAL + * doc ain't exactly clear on this... + */ +#if defined(CONFIG_IA64_SOFTSDV_HACKS) + r9 = 4000000; +#elif defined(CONFIG_IA64_SDV) + r9 = 300000000; +#else + r9 = 700000000; +#endif + break; + + case SAL_FREQ_BASE_REALTIME_CLOCK: + r9 = 1; + break; + + default: + status = -1; + break; + } + } else if (index == SAL_SET_VECTORS) { + ; + } else if (index == SAL_GET_STATE_INFO) { + ; + } else if (index == SAL_GET_STATE_INFO_SIZE) { + ; + } else if (index == SAL_CLEAR_STATE_INFO) { + ; + } else if (index == SAL_MC_RENDEZ) { + ; + } else if (index == SAL_MC_SET_PARAMS) { + ; + } else if (index == SAL_CACHE_FLUSH) { + ; + } else if (index == SAL_CACHE_INIT) { + ; +#ifdef CONFIG_PCI + } else if (index == SAL_PCI_CONFIG_READ) { + /* + * in1 contains the PCI configuration address and in2 + * the size of the read. The value that is read is + * returned via the general register r9. + */ + outl(BUILD_CMD(in1), 0xCF8); + if (in2 == 1) /* Reading byte */ + r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3))); + else if (in2 == 2) /* Reading word */ + r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2))); + else /* Reading dword */ + r9 = inl(0xCFC); + status = PCIBIOS_SUCCESSFUL; + } else if (index == SAL_PCI_CONFIG_WRITE) { + /* + * in1 contains the PCI configuration address, in2 the + * size of the write, and in3 the actual value to be + * written out. + */ + outl(BUILD_CMD(in1), 0xCF8); + if (in2 == 1) /* Writing byte */ + outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3))); + else if (in2 == 2) /* Writing word */ + outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2))); + else /* Writing dword */ + outl(in3, 0xCFC); + status = PCIBIOS_SUCCESSFUL; +#endif /* CONFIG_PCI */ + } else if (index == SAL_UPDATE_PAL) { + ; + } else { + status = -1; + } + asm volatile ("" :: "r"(r9), "r"(r10), "r"(r11)); + return status; +} + + +/* + * This is here to work around a bug in egcs-1.1.1b that causes the + * compiler to crash (seems like a bug in the new alias analysis code. + */ +void * +id (long addr) +{ + return (void *) addr; +} + +void +sys_fw_init (const char *args, int arglen) +{ + efi_system_table_t *efi_systab; + efi_runtime_services_t *efi_runtime; + efi_config_table_t *efi_tables; + struct ia64_sal_systab *sal_systab; + efi_memory_desc_t *efi_memmap, *md; + unsigned long *pal_desc, *sal_desc; + struct ia64_sal_desc_entry_point *sal_ed; + struct ia64_boot_param *bp; + unsigned char checksum = 0; + char *cp, *cmd_line; + + memset(fw_mem, 0, sizeof(fw_mem)); + + pal_desc = (unsigned long *) &pal_emulator_static; + sal_desc = (unsigned long *) &sal_emulator; + + cp = fw_mem; + efi_systab = (void *) cp; cp += sizeof(*efi_systab); + efi_runtime = (void *) cp; cp += sizeof(*efi_runtime); + efi_tables = (void *) cp; cp += sizeof(*efi_tables); + sal_systab = (void *) cp; cp += sizeof(*sal_systab); + sal_ed = (void *) cp; cp += sizeof(*sal_ed); + efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap); + cmd_line = (void *) cp; + + if (args) { + if (arglen >= 1024) + arglen = 1023; + memcpy(cmd_line, args, arglen); + } else { + arglen = 0; + } + cmd_line[arglen] = '\0'; + + memset(efi_systab, 0, sizeof(efi_systab)); + efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE; + efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION; + efi_systab->hdr.headersize = sizeof(efi_systab->hdr); + efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0"); + efi_systab->fw_revision = 1; + efi_systab->runtime = __pa(efi_runtime); + efi_systab->nr_tables = 1; + efi_systab->tables = __pa(efi_tables); + + efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE; + efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION; + efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr); + efi_runtime->get_time = __pa(&efi_get_time); + efi_runtime->set_time = __pa(&efi_unimplemented); + efi_runtime->get_wakeup_time = __pa(&efi_unimplemented); + efi_runtime->set_wakeup_time = __pa(&efi_unimplemented); + efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented); + efi_runtime->get_variable = __pa(&efi_unimplemented); + efi_runtime->get_next_variable = __pa(&efi_unimplemented); + efi_runtime->set_variable = __pa(&efi_unimplemented); + efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented); + efi_runtime->reset_system = __pa(&efi_reset_system); + + efi_tables->guid = SAL_SYSTEM_TABLE_GUID; + efi_tables->table = __pa(sal_systab); + + /* fill in the SAL system table: */ + memcpy(sal_systab->signature, "SST_", 4); + sal_systab->size = sizeof(*sal_systab); + sal_systab->sal_rev_minor = 1; + sal_systab->sal_rev_major = 0; + sal_systab->entry_count = 1; + sal_systab->ia32_bios_present = 0; + +#ifdef CONFIG_IA64_GENERIC + strcpy(sal_systab->oem_id, "Generic"); + strcpy(sal_systab->product_id, "IA-64 system"); +#endif + +#ifdef CONFIG_IA64_HP_SIM + strcpy(sal_systab->oem_id, "Hewlett-Packard"); + strcpy(sal_systab->product_id, "HP-simulator"); +#endif + +#ifdef CONFIG_IA64_SDV + strcpy(sal_systab->oem_id, "Intel"); + strcpy(sal_systab->product_id, "SDV"); +#endif + +#ifdef CONFIG_IA64_SGI_SN1_SIM + strcpy(sal_systab->oem_id, "SGI"); + strcpy(sal_systab->product_id, "SN1"); +#endif + + /* fill in an entry point: */ + sal_ed->type = SAL_DESC_ENTRY_POINT; + sal_ed->pal_proc = __pa(pal_desc[0]); + sal_ed->sal_proc = __pa(sal_desc[0]); + sal_ed->gp = __pa(sal_desc[1]); + + for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp) + checksum += *cp; + + sal_systab->checksum = -checksum; + + /* fill in a memory descriptor: */ + md = &efi_memmap[0]; + md->type = EFI_CONVENTIONAL_MEMORY; + md->pad = 0; + md->phys_addr = 2*MB; + md->virt_addr = 0; + md->num_pages = (64*MB) >> 12; /* 64MB (in 4KB pages) */ + md->attribute = EFI_MEMORY_WB; + + /* descriptor for firmware emulator: */ + md = &efi_memmap[1]; + md->type = EFI_RUNTIME_SERVICES_DATA; + md->pad = 0; + md->phys_addr = 1*MB; + md->virt_addr = 0; + md->num_pages = (1*MB) >> 12; /* 1MB (in 4KB pages) */ + md->attribute = EFI_MEMORY_WB; + + /* descriptor for high memory (>4GB): */ + md = &efi_memmap[2]; + md->type = EFI_CONVENTIONAL_MEMORY; + md->pad = 0; + md->phys_addr = 4096*MB; + md->virt_addr = 0; + md->num_pages = (32*MB) >> 12; /* 32MB (in 4KB pages) */ + md->attribute = EFI_MEMORY_WB; + + bp = id(ZERO_PAGE_ADDR); + bp->efi_systab = __pa(&fw_mem); + bp->efi_memmap = __pa(efi_memmap); + bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t); + bp->efi_memdesc_size = sizeof(efi_memory_desc_t); + bp->efi_memdesc_version = 1; + bp->command_line = __pa(cmd_line); + bp->console_info.num_cols = 80; + bp->console_info.num_rows = 25; + bp->console_info.orig_x = 0; + bp->console_info.orig_y = 24; + bp->num_pci_vectors = 0; + bp->fpswa = 0; +} diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S new file mode 100644 index 000000000..24dc10ee4 --- /dev/null +++ b/arch/ia64/kernel/gate.S @@ -0,0 +1,200 @@ +/* + * This file contains the code that gets mapped at the upper end of + * each task's text region. For now, it contains the signal + * trampoline code only. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <asm/offsets.h> +#include <asm/sigcontext.h> +#include <asm/system.h> +#include <asm/unistd.h> +#include <asm/page.h> + + .psr abi64 + .psr lsb + .lsb + + .section __gate_section,"ax" + + .align PAGE_SIZE + +# define SIGINFO_OFF 16 +# define SIGCONTEXT_OFF (SIGINFO_OFF + ((IA64_SIGINFO_SIZE + 15) & ~15)) +# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET +# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET +# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET +# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET +# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET +# define base0 r2 +# define base1 r3 + /* + * When we get here, the memory stack looks like this: + * + * +===============================+ + * | | + * // struct sigcontext // + * | | + * +===============================+ <-- sp+SIGCONTEXT_OFF + * | | + * // rest of siginfo // + * | | + * + +---------------+ + * | | siginfo.code | + * +---------------+---------------+ + * | siginfo.errno | siginfo.signo | + * +-------------------------------+ <-- sp+SIGINFO_OFF + * | 16 byte of scratch | + * | space | + * +-------------------------------+ <-- sp + * + * The register stack looks _exactly_ the way it looked at the + * time the signal occurred. In other words, we're treading + * on a potential mine-field: each incoming general register + * may be a NaT value (includeing sp, in which case the process + * ends up dying with a SIGSEGV). + * + * The first need to do is a cover to get the registers onto + * the backing store. Once that is done, we invoke the signal + * handler which may modify some of the machine state. After + * returning from the signal handler, we return control to the + * previous context by executing a sigreturn system call. A + * signal handler may call the rt_sigreturn() function to + * directly return to a given sigcontext. However, the + * user-level sigreturn() needs to do much more than calling + * the rt_sigreturn() system call as it needs to unwind the + * stack to restore preserved registers that may have been + * saved on the signal handler's call stack. + * + * On entry: + * r2 = signal number + * r3 = plabel of signal handler + * r15 = new register backing store (ignored) + * [sp+16] = sigframe + */ + + .global ia64_sigtramp + .proc ia64_sigtramp +ia64_sigtramp: + ld8 r10=[r3],8 // get signal handler entry point + br.call.sptk.many rp=invoke_sighandler +.ret0: mov r15=__NR_rt_sigreturn + break __BREAK_SYSCALL + .endp ia64_sigramp + + .proc invoke_sighandler +invoke_sighandler: + ld8 gp=[r3] // get signal handler's global pointer + mov b6=r10 + cover // push args in interrupted frame onto backing store + ;; + alloc r8=ar.pfs,0,1,3,0 // get CFM0, EC0, and CPL0 into r8 + mov r17=ar.bsp // fetch ar.bsp + mov loc0=rp // save return pointer + ;; + cmp.ne p8,p0=r15,r0 // do we need to switch the rbs? + mov out0=r2 // signal number +(p8) br.cond.spnt.few setup_rbs // yup -> (clobbers r14 and r16) +back_from_setup_rbs: + adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + st8 [base0]=r17,(CFM_OFF-BSP_OFF) // save sc_ar_bsp + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + ;; + + st8 [base0]=r8 // save CFM0, EC0, and CPL0 + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + ;; + stf.spill [base0]=f6,32 + stf.spill [base1]=f7,32 + ;; + stf.spill [base0]=f8,32 + stf.spill [base1]=f9,32 + ;; + stf.spill [base0]=f10,32 + stf.spill [base1]=f11,32 + adds out1=SIGINFO_OFF,sp // siginfo pointer + ;; + stf.spill [base0]=f12,32 + stf.spill [base1]=f13,32 + adds out2=SIGCONTEXT_OFF,sp // sigcontext pointer + ;; + stf.spill [base0]=f14,32 + stf.spill [base1]=f15,32 + br.call.sptk.few rp=b6 // call the signal handler +.ret2: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r15=[base0],(CFM_OFF-BSP_OFF) // fetch sc_ar_bsp and advance to CFM_OFF + mov r14=ar.bsp + ;; + ld8 r8=[base0] // restore (perhaps modified) CFM0, EC0, and CPL0 + cmp.ne p8,p0=r14,r15 // do we need to restore the rbs? +(p8) br.cond.spnt.few restore_rbs // yup -> (clobbers r14 and r16) +back_from_restore_rbs: + { + and r9=0x7f,r8 // r9 <- CFM0.sof + extr.u r10=r8,7,7 // r10 <- CFM0.sol + mov r11=ip + } + ;; + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + adds r11=(cont-back_from_restore_rbs),r11 + sub r9=r9,r10 // r9 <- CFM0.sof - CFM0.sol == CFM0.nout + ;; + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + dep r9=r9,r9,7,7 // r9.sol = r9.sof + mov b6=r11 + ;; + ldf.fill f6=[base0],32 + ldf.fill f7=[base1],32 + mov rp=loc0 // copy return pointer out of stacked register + ;; + ldf.fill f8=[base0],32 + ldf.fill f9=[base1],32 + ;; + ldf.fill f10=[base0],32 + ldf.fill f11=[base1],32 + ;; + ldf.fill f12=[base0],32 + ldf.fill f13=[base1],32 + mov ar.pfs=r9 + ;; + ldf.fill f14=[base0],32 + ldf.fill f15=[base1],32 + br.ret.sptk.few b6 +cont: mov ar.pfs=r8 // ar.pfs = CFM0 + br.ret.sptk.few rp // re-establish CFM0 + .endp invoke_signal_handler + + .proc setup_rbs +setup_rbs: + flushrs // must be first in insn + ;; + mov ar.rsc=r0 // put RSE into enforced lazy mode + adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp + mov r14=ar.rnat // get rnat as updated by flushrs + ;; + mov ar.bspstore=r15 // set new register backing store area + st8 [r16]=r14 // save sc_ar_rnat + ;; + mov ar.rsc=0xf // set RSE into eager mode, pl 3 + invala // invalidate ALAT + br.cond.sptk.many back_from_setup_rbs + + .proc restore_rbs +restore_rbs: + flushrs + mov ar.rsc=r0 // put RSE into enforced lazy mode + adds r16=(RNAT_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r14=[r16] // get new rnat + mov ar.bspstore=r15 // set old register backing store area + ;; + mov ar.rnat=r14 // establish new rnat + mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) + // invala not necessary as that will happen when returning to user-mode + br.cond.sptk.many back_from_restore_rbs + + .endp restore_rbs diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S new file mode 100644 index 000000000..50d965e02 --- /dev/null +++ b/arch/ia64/kernel/head.S @@ -0,0 +1,646 @@ +/* + * Here is where the ball gets rolling as far as the kernel is concerned. + * When control is transferred to _start, the bootload has already + * loaded us to the correct address. All that's left to do here is + * to set up the kernel's global pointer and jump to the kernel + * entry point. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> + * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> + */ + +#include <linux/config.h> + +#include <asm/fpu.h> +#include <asm/pal.h> +#include <asm/offsets.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> + + .psr abi64 + .psr lsb + .lsb + + .section __special_page_section,"ax" + + .global empty_zero_page +empty_zero_page: + .skip PAGE_SIZE + + .global swapper_pg_dir +swapper_pg_dir: + .skip PAGE_SIZE + + .global empty_bad_page +empty_bad_page: + .skip PAGE_SIZE + + .global empty_bad_pte_table +empty_bad_pte_table: + .skip PAGE_SIZE + + .global empty_bad_pmd_table +empty_bad_pmd_table: + .skip PAGE_SIZE + + .rodata +halt_msg: + stringz "Halting kernel\n" + + .text + .align 16 + .global _start + .proc _start +_start: + // set IVT entry point---can't access I/O ports without it + movl r3=ia64_ivt + ;; + mov cr.iva=r3 + movl r2=FPSR_DEFAULT + ;; + srlz.i + movl gp=__gp + + mov ar.fpsr=r2 + ;; + +#ifdef CONFIG_IA64_EARLY_PRINTK + mov r2=6 + mov r3=(8<<8) | (28<<2) + ;; + mov rr[r2]=r3 + ;; + srlz.i + ;; +#endif + +#define isAP p2 // are we booting an Application Processor (not the BSP)? + + // Find the init_task for the currently booting CPU. At poweron, and in + // UP mode, cpu_now_booting is 0 + movl r3=cpu_now_booting + ;; + ld4 r3=[r3] + movl r2=init_tasks + ;; + shladd r2=r3,3,r2 + ;; + ld8 r2=[r2] + cmp4.ne isAP,p0=r3,r0 // p9 == true if this is an application processor (ap) + ;; // RAW on r2 + extr r3=r2,0,61 // r3 == phys addr of task struct + ;; + + // load the "current" pointer (r13) and ar.k6 with the current task + mov r13=r2 + mov ar.k6=r3 // Physical address + ;; + /* + * Reserve space at the top of the stack for "struct pt_regs". Kernel threads + * don't store interesting values in that structure, but the space still needs + * to be there because time-critical stuff such as the context switching can + * be implemented more efficiently (for example, __switch_to() + * always sets the psr.dfh bit of the task it is switching to). + */ + addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 + addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE + mov ar.rsc=r0 // place RSE in enforced lazy mode + ;; + mov ar.bspstore=r2 // establish the new RSE stack + ;; + loadrs // load zero bytes from the register stack + ;; + mov ar.rsc=0x3 // place RSE in eager mode + ;; + +#ifdef CONFIG_IA64_EARLY_PRINTK + .rodata +alive_msg: + stringz "I'm alive and well\n" + .previous + + alloc r2=ar.pfs,0,0,2,0 + movl out0=alive_msg + ;; + br.call.sptk.few rp=early_printk +1: // force new bundle +#endif /* CONFIG_IA64_EARLY_PRINTK */ + + alloc r2=ar.pfs,8,0,2,0 +#ifdef CONFIG_SMP +(isAP) br.call.sptk.few rp=smp_callin +.ret1: +(isAP) br.cond.sptk.few self +#endif + +#undef isAP + + // This is executed by the bootstrap processor (bsp) only: + +#ifdef CONFIG_IA64_FW_EMU + // initialize PAL & SAL emulator: + br.call.sptk.few rp=sys_fw_init + ;; +#endif + br.call.sptk.few rp=start_kernel +.ret2: + addl r2=@ltoff(halt_msg),gp + ;; + ld8 out0=[r2] + br.call.sptk.few b0=console_print +self: br.sptk.few self // endless loop + .endp _start + + .align 16 + .global ia64_save_debug_regs + .proc ia64_save_debug_regs +ia64_save_debug_regs: + alloc r16=ar.pfs,1,0,0,0 + mov r20=ar.lc // preserve ar.lc + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=0 + add r19=IA64_NUM_DBG_REGS*8,in0 + ;; +1: mov r16=dbr[r18] + mov r17=ibr[r18] + add r18=1,r18 + ;; + st8.nta [in0]=r16,8 + st8.nta [r19]=r17,8 + br.cloop.sptk.few 1b + + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.few b0 + .endp ia64_save_debug_regs + + .align 16 + .global ia64_load_debug_regs + .proc ia64_load_debug_regs +ia64_load_debug_regs: + alloc r16=ar.pfs,1,0,0,0 + lfetch.nta [in0] + mov r20=ar.lc // preserve ar.lc + add r19=IA64_NUM_DBG_REGS*8,in0 + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=-1 + ;; +1: ld8.nta r16=[in0],8 + ld8.nta r17=[r19],8 + add r18=1,r18 + ;; + mov dbr[r18]=r16 + mov ibr[r18]=r17 + br.cloop.sptk.few 1b + + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.few b0 + .endp ia64_load_debug_regs + + .align 16 + .global __ia64_save_fpu + .proc __ia64_save_fpu +__ia64_save_fpu: + alloc r2=ar.pfs,1,0,0,0 + adds r3=16,in0 + ;; + stf.spill.nta [in0]=f32,32 + stf.spill.nta [ r3]=f33,32 + ;; + stf.spill.nta [in0]=f34,32 + stf.spill.nta [ r3]=f35,32 + ;; + stf.spill.nta [in0]=f36,32 + stf.spill.nta [ r3]=f37,32 + ;; + stf.spill.nta [in0]=f38,32 + stf.spill.nta [ r3]=f39,32 + ;; + stf.spill.nta [in0]=f40,32 + stf.spill.nta [ r3]=f41,32 + ;; + stf.spill.nta [in0]=f42,32 + stf.spill.nta [ r3]=f43,32 + ;; + stf.spill.nta [in0]=f44,32 + stf.spill.nta [ r3]=f45,32 + ;; + stf.spill.nta [in0]=f46,32 + stf.spill.nta [ r3]=f47,32 + ;; + stf.spill.nta [in0]=f48,32 + stf.spill.nta [ r3]=f49,32 + ;; + stf.spill.nta [in0]=f50,32 + stf.spill.nta [ r3]=f51,32 + ;; + stf.spill.nta [in0]=f52,32 + stf.spill.nta [ r3]=f53,32 + ;; + stf.spill.nta [in0]=f54,32 + stf.spill.nta [ r3]=f55,32 + ;; + stf.spill.nta [in0]=f56,32 + stf.spill.nta [ r3]=f57,32 + ;; + stf.spill.nta [in0]=f58,32 + stf.spill.nta [ r3]=f59,32 + ;; + stf.spill.nta [in0]=f60,32 + stf.spill.nta [ r3]=f61,32 + ;; + stf.spill.nta [in0]=f62,32 + stf.spill.nta [ r3]=f63,32 + ;; + stf.spill.nta [in0]=f64,32 + stf.spill.nta [ r3]=f65,32 + ;; + stf.spill.nta [in0]=f66,32 + stf.spill.nta [ r3]=f67,32 + ;; + stf.spill.nta [in0]=f68,32 + stf.spill.nta [ r3]=f69,32 + ;; + stf.spill.nta [in0]=f70,32 + stf.spill.nta [ r3]=f71,32 + ;; + stf.spill.nta [in0]=f72,32 + stf.spill.nta [ r3]=f73,32 + ;; + stf.spill.nta [in0]=f74,32 + stf.spill.nta [ r3]=f75,32 + ;; + stf.spill.nta [in0]=f76,32 + stf.spill.nta [ r3]=f77,32 + ;; + stf.spill.nta [in0]=f78,32 + stf.spill.nta [ r3]=f79,32 + ;; + stf.spill.nta [in0]=f80,32 + stf.spill.nta [ r3]=f81,32 + ;; + stf.spill.nta [in0]=f82,32 + stf.spill.nta [ r3]=f83,32 + ;; + stf.spill.nta [in0]=f84,32 + stf.spill.nta [ r3]=f85,32 + ;; + stf.spill.nta [in0]=f86,32 + stf.spill.nta [ r3]=f87,32 + ;; + stf.spill.nta [in0]=f88,32 + stf.spill.nta [ r3]=f89,32 + ;; + stf.spill.nta [in0]=f90,32 + stf.spill.nta [ r3]=f91,32 + ;; + stf.spill.nta [in0]=f92,32 + stf.spill.nta [ r3]=f93,32 + ;; + stf.spill.nta [in0]=f94,32 + stf.spill.nta [ r3]=f95,32 + ;; + stf.spill.nta [in0]=f96,32 + stf.spill.nta [ r3]=f97,32 + ;; + stf.spill.nta [in0]=f98,32 + stf.spill.nta [ r3]=f99,32 + ;; + stf.spill.nta [in0]=f100,32 + stf.spill.nta [ r3]=f101,32 + ;; + stf.spill.nta [in0]=f102,32 + stf.spill.nta [ r3]=f103,32 + ;; + stf.spill.nta [in0]=f104,32 + stf.spill.nta [ r3]=f105,32 + ;; + stf.spill.nta [in0]=f106,32 + stf.spill.nta [ r3]=f107,32 + ;; + stf.spill.nta [in0]=f108,32 + stf.spill.nta [ r3]=f109,32 + ;; + stf.spill.nta [in0]=f110,32 + stf.spill.nta [ r3]=f111,32 + ;; + stf.spill.nta [in0]=f112,32 + stf.spill.nta [ r3]=f113,32 + ;; + stf.spill.nta [in0]=f114,32 + stf.spill.nta [ r3]=f115,32 + ;; + stf.spill.nta [in0]=f116,32 + stf.spill.nta [ r3]=f117,32 + ;; + stf.spill.nta [in0]=f118,32 + stf.spill.nta [ r3]=f119,32 + ;; + stf.spill.nta [in0]=f120,32 + stf.spill.nta [ r3]=f121,32 + ;; + stf.spill.nta [in0]=f122,32 + stf.spill.nta [ r3]=f123,32 + ;; + stf.spill.nta [in0]=f124,32 + stf.spill.nta [ r3]=f125,32 + ;; + stf.spill.nta [in0]=f126,32 + stf.spill.nta [ r3]=f127,32 + br.ret.sptk.few rp + .endp __ia64_save_fpu + + .align 16 + .global __ia64_load_fpu + .proc __ia64_load_fpu +__ia64_load_fpu: + alloc r2=ar.pfs,1,0,0,0 + adds r3=16,in0 + ;; + ldf.fill.nta f32=[in0],32 + ldf.fill.nta f33=[ r3],32 + ;; + ldf.fill.nta f34=[in0],32 + ldf.fill.nta f35=[ r3],32 + ;; + ldf.fill.nta f36=[in0],32 + ldf.fill.nta f37=[ r3],32 + ;; + ldf.fill.nta f38=[in0],32 + ldf.fill.nta f39=[ r3],32 + ;; + ldf.fill.nta f40=[in0],32 + ldf.fill.nta f41=[ r3],32 + ;; + ldf.fill.nta f42=[in0],32 + ldf.fill.nta f43=[ r3],32 + ;; + ldf.fill.nta f44=[in0],32 + ldf.fill.nta f45=[ r3],32 + ;; + ldf.fill.nta f46=[in0],32 + ldf.fill.nta f47=[ r3],32 + ;; + ldf.fill.nta f48=[in0],32 + ldf.fill.nta f49=[ r3],32 + ;; + ldf.fill.nta f50=[in0],32 + ldf.fill.nta f51=[ r3],32 + ;; + ldf.fill.nta f52=[in0],32 + ldf.fill.nta f53=[ r3],32 + ;; + ldf.fill.nta f54=[in0],32 + ldf.fill.nta f55=[ r3],32 + ;; + ldf.fill.nta f56=[in0],32 + ldf.fill.nta f57=[ r3],32 + ;; + ldf.fill.nta f58=[in0],32 + ldf.fill.nta f59=[ r3],32 + ;; + ldf.fill.nta f60=[in0],32 + ldf.fill.nta f61=[ r3],32 + ;; + ldf.fill.nta f62=[in0],32 + ldf.fill.nta f63=[ r3],32 + ;; + ldf.fill.nta f64=[in0],32 + ldf.fill.nta f65=[ r3],32 + ;; + ldf.fill.nta f66=[in0],32 + ldf.fill.nta f67=[ r3],32 + ;; + ldf.fill.nta f68=[in0],32 + ldf.fill.nta f69=[ r3],32 + ;; + ldf.fill.nta f70=[in0],32 + ldf.fill.nta f71=[ r3],32 + ;; + ldf.fill.nta f72=[in0],32 + ldf.fill.nta f73=[ r3],32 + ;; + ldf.fill.nta f74=[in0],32 + ldf.fill.nta f75=[ r3],32 + ;; + ldf.fill.nta f76=[in0],32 + ldf.fill.nta f77=[ r3],32 + ;; + ldf.fill.nta f78=[in0],32 + ldf.fill.nta f79=[ r3],32 + ;; + ldf.fill.nta f80=[in0],32 + ldf.fill.nta f81=[ r3],32 + ;; + ldf.fill.nta f82=[in0],32 + ldf.fill.nta f83=[ r3],32 + ;; + ldf.fill.nta f84=[in0],32 + ldf.fill.nta f85=[ r3],32 + ;; + ldf.fill.nta f86=[in0],32 + ldf.fill.nta f87=[ r3],32 + ;; + ldf.fill.nta f88=[in0],32 + ldf.fill.nta f89=[ r3],32 + ;; + ldf.fill.nta f90=[in0],32 + ldf.fill.nta f91=[ r3],32 + ;; + ldf.fill.nta f92=[in0],32 + ldf.fill.nta f93=[ r3],32 + ;; + ldf.fill.nta f94=[in0],32 + ldf.fill.nta f95=[ r3],32 + ;; + ldf.fill.nta f96=[in0],32 + ldf.fill.nta f97=[ r3],32 + ;; + ldf.fill.nta f98=[in0],32 + ldf.fill.nta f99=[ r3],32 + ;; + ldf.fill.nta f100=[in0],32 + ldf.fill.nta f101=[ r3],32 + ;; + ldf.fill.nta f102=[in0],32 + ldf.fill.nta f103=[ r3],32 + ;; + ldf.fill.nta f104=[in0],32 + ldf.fill.nta f105=[ r3],32 + ;; + ldf.fill.nta f106=[in0],32 + ldf.fill.nta f107=[ r3],32 + ;; + ldf.fill.nta f108=[in0],32 + ldf.fill.nta f109=[ r3],32 + ;; + ldf.fill.nta f110=[in0],32 + ldf.fill.nta f111=[ r3],32 + ;; + ldf.fill.nta f112=[in0],32 + ldf.fill.nta f113=[ r3],32 + ;; + ldf.fill.nta f114=[in0],32 + ldf.fill.nta f115=[ r3],32 + ;; + ldf.fill.nta f116=[in0],32 + ldf.fill.nta f117=[ r3],32 + ;; + ldf.fill.nta f118=[in0],32 + ldf.fill.nta f119=[ r3],32 + ;; + ldf.fill.nta f120=[in0],32 + ldf.fill.nta f121=[ r3],32 + ;; + ldf.fill.nta f122=[in0],32 + ldf.fill.nta f123=[ r3],32 + ;; + ldf.fill.nta f124=[in0],32 + ldf.fill.nta f125=[ r3],32 + ;; + ldf.fill.nta f126=[in0],32 + ldf.fill.nta f127=[ r3],32 + br.ret.sptk.few rp + .endp __ia64_load_fpu + + .align 16 + .global __ia64_init_fpu + .proc __ia64_init_fpu +__ia64_init_fpu: + alloc r2=ar.pfs,0,0,0,0 + stf.spill [sp]=f0 + mov f32=f0 + ;; + ldf.fill f33=[sp] + ldf.fill f34=[sp] + mov f35=f0 + ;; + ldf.fill f36=[sp] + ldf.fill f37=[sp] + mov f38=f0 + ;; + ldf.fill f39=[sp] + ldf.fill f40=[sp] + mov f41=f0 + ;; + ldf.fill f42=[sp] + ldf.fill f43=[sp] + mov f44=f0 + ;; + ldf.fill f45=[sp] + ldf.fill f46=[sp] + mov f47=f0 + ;; + ldf.fill f48=[sp] + ldf.fill f49=[sp] + mov f50=f0 + ;; + ldf.fill f51=[sp] + ldf.fill f52=[sp] + mov f53=f0 + ;; + ldf.fill f54=[sp] + ldf.fill f55=[sp] + mov f56=f0 + ;; + ldf.fill f57=[sp] + ldf.fill f58=[sp] + mov f59=f0 + ;; + ldf.fill f60=[sp] + ldf.fill f61=[sp] + mov f62=f0 + ;; + ldf.fill f63=[sp] + ldf.fill f64=[sp] + mov f65=f0 + ;; + ldf.fill f66=[sp] + ldf.fill f67=[sp] + mov f68=f0 + ;; + ldf.fill f69=[sp] + ldf.fill f70=[sp] + mov f71=f0 + ;; + ldf.fill f72=[sp] + ldf.fill f73=[sp] + mov f74=f0 + ;; + ldf.fill f75=[sp] + ldf.fill f76=[sp] + mov f77=f0 + ;; + ldf.fill f78=[sp] + ldf.fill f79=[sp] + mov f80=f0 + ;; + ldf.fill f81=[sp] + ldf.fill f82=[sp] + mov f83=f0 + ;; + ldf.fill f84=[sp] + ldf.fill f85=[sp] + mov f86=f0 + ;; + ldf.fill f87=[sp] + ldf.fill f88=[sp] + mov f89=f0 + ;; + ldf.fill f90=[sp] + ldf.fill f91=[sp] + mov f92=f0 + ;; + ldf.fill f93=[sp] + ldf.fill f94=[sp] + mov f95=f0 + ;; + ldf.fill f96=[sp] + ldf.fill f97=[sp] + mov f98=f0 + ;; + ldf.fill f99=[sp] + ldf.fill f100=[sp] + mov f101=f0 + ;; + ldf.fill f102=[sp] + ldf.fill f103=[sp] + mov f104=f0 + ;; + ldf.fill f105=[sp] + ldf.fill f106=[sp] + mov f107=f0 + ;; + ldf.fill f108=[sp] + ldf.fill f109=[sp] + mov f110=f0 + ;; + ldf.fill f111=[sp] + ldf.fill f112=[sp] + mov f113=f0 + ;; + ldf.fill f114=[sp] + ldf.fill f115=[sp] + mov f116=f0 + ;; + ldf.fill f117=[sp] + ldf.fill f118=[sp] + mov f119=f0 + ;; + ldf.fill f120=[sp] + ldf.fill f121=[sp] + mov f122=f0 + ;; + ldf.fill f123=[sp] + ldf.fill f124=[sp] + mov f125=f0 + ;; + ldf.fill f126=[sp] + mov f127=f0 + br.ret.sptk.few rp + .endp __ia64_init_fpu diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c new file mode 100644 index 000000000..122650461 --- /dev/null +++ b/arch/ia64/kernel/init_task.c @@ -0,0 +1,31 @@ +/* + * This is where we statically allocate and initialize the initial + * task. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/sched.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> + +static struct vm_area_struct init_mmap = INIT_MMAP; +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS; +struct mm_struct init_mm = INIT_MM(init_mm); + +/* + * Initial task structure. + * + * We need to make sure that this is page aligned due to the way + * process stacks are handled. This is done by having a special + * "init_task" linker map entry.. + */ +union task_union init_task_union + __attribute__((section("init_task"))) = + { INIT_TASK(init_task_union.task) }; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c new file mode 100644 index 000000000..01c201137 --- /dev/null +++ b/arch/ia64/kernel/irq.c @@ -0,0 +1,657 @@ +/* + * linux/arch/ia64/kernel/irq.c + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * 6/10/99: Updated to bring in sync with x86 version to facilitate + * support for SMP and different interrupt controllers. + */ + +#include <linux/config.h> + +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/kernel_stat.h> +#include <linux/malloc.h> +#include <linux/ptrace.h> +#include <linux/random.h> /* for rand_initialize_irq() */ +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/threads.h> + +#ifdef CONFIG_KDB +# include <linux/kdb.h> +#endif + +#include <asm/bitops.h> +#include <asm/delay.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/machvec.h> +#include <asm/pgtable.h> +#include <asm/system.h> + +/* This is used to detect bad usage of probe_irq_on()/probe_irq_off(). */ +#define PROBE_IRQ_COOKIE 0xfeedC0FFEE + +struct irq_desc irq_desc[NR_IRQS]; + +/* + * Micro-access to controllers is serialized over the whole + * system. We never hold this lock when we call the actual + * IRQ handler. + */ +spinlock_t irq_controller_lock; + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC +spinlock_t ivr_read_lock; +#endif + +unsigned int local_bh_count[NR_CPUS]; +/* + * used in irq_enter()/irq_exit() + */ +unsigned int local_irq_count[NR_CPUS]; + +static struct irqaction timer_action = { NULL, 0, 0, NULL, NULL, NULL}; + +#ifdef CONFIG_SMP +static struct irqaction ipi_action = { NULL, 0, 0, NULL, NULL, NULL}; +#endif + +/* + * Legacy IRQ to IA-64 vector translation table. Any vector not in + * this table maps to itself (ie: irq 0x30 => IA64 vector 0x30) + */ +__u8 irq_to_vector_map[IA64_MIN_VECTORED_IRQ] = { + /* 8259 IRQ translation, first 16 entries */ + TIMER_IRQ, 0x50, 0x0f, 0x51, 0x52, 0x53, 0x43, 0x54, + 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x40, 0x41, +}; + +/* + * Reverse of the above table. + */ +static __u8 vector_to_legacy_map[256]; + +/* + * used by proc fs (/proc/interrupts) + */ +int +get_irq_list (char *buf) +{ + int i; + struct irqaction * action; + char *p = buf; + +#ifdef CONFIG_SMP + p += sprintf(p, " "); + for (i = 0; i < smp_num_cpus; i++) + p += sprintf(p, "CPU%d ", i); + *p++ = '\n'; +#endif + /* + * Simply scans the external vectored interrupts + */ + for (i = 0; i < NR_IRQS; i++) { + action = irq_desc[i].action; + if (!action) + continue; + p += sprintf(p, "%3d: ",i); +#ifndef CONFIG_SMP + p += sprintf(p, "%10u ", kstat_irqs(i)); +#else + { + int j; + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + kstat.irqs[cpu_logical_map(j)][i]); + } +#endif + p += sprintf(p, " %14s", irq_desc[i].handler->typename); + p += sprintf(p, " %c%s", (action->flags & SA_INTERRUPT) ? '+' : ' ', + action->name); + + for (action = action->next; action; action = action->next) { + p += sprintf(p, ", %c%s", + (action->flags & SA_INTERRUPT)?'+':' ', + action->name); + } + *p++ = '\n'; + } + return p - buf; +} + +/* + * That's where the IVT branches when we get an external + * interrupt. This branches to the correct hardware IRQ handler via + * function ptr. + */ +void +ia64_handle_irq (unsigned long irq, struct pt_regs *regs) +{ + unsigned long bsp, sp, saved_tpr; + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC +# ifndef CONFIG_SMP + static unsigned int max_prio = 0; +# endif + unsigned int prev_prio; + unsigned long eoi_ptr; + +# ifdef CONFIG_USB + disable_usb(); +# endif + /* + * Stop IPIs by getting the ivr_read_lock + */ + spin_lock(&ivr_read_lock); + + /* + * Disable PCI writes + */ + outl(0x80ff81c0, 0xcf8); + outl(0x73002188, 0xcfc); + eoi_ptr = inl(0xcfc); + + irq = ia64_get_ivr(); + + /* + * Enable PCI writes + */ + outl(0x73182188, 0xcfc); + + spin_unlock(&ivr_read_lock); + +# ifdef CONFIG_USB + reenable_usb(); +# endif + +# ifndef CONFIG_SMP + prev_prio = max_prio; + if (irq < max_prio) { + printk ("ia64_handle_irq: got irq %lu while %u was in progress!\n", + irq, max_prio); + + } else + max_prio = irq; +# endif /* !CONFIG_SMP */ +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + + /* Always set TPR to limit maximum interrupt nesting depth to + * 16 (without this, it would be ~240, which could easily lead + * to kernel stack overflows. + */ + saved_tpr = ia64_get_tpr(); + ia64_srlz_d(); + ia64_set_tpr(irq); + ia64_srlz_d(); + + asm ("mov %0=ar.bsp" : "=r"(bsp)); + asm ("mov %0=sp" : "=r"(sp)); + + if ((sp - bsp) < 1024) { + static long last_time; + static unsigned char count; + + if (count > 5 && jiffies - last_time > 5*HZ) + count = 0; + if (++count < 5) { + last_time = jiffies; + printk("ia64_handle_irq: DANGER: less than 1KB of free stack space!!\n" + "(bsp=0x%lx, sp=%lx)\n", bsp, sp); + } +#ifdef CONFIG_KDB + kdb(KDB_REASON_PANIC, 0, regs); +#endif + } + + /* + * The interrupt is now said to be in service + */ + if (irq >= NR_IRQS) { + printk("handle_irq: invalid irq=%lu\n", irq); + goto out; + } + + ++kstat.irqs[smp_processor_id()][irq]; + + if (irq == IA64_SPURIOUS_INT) { + printk("handle_irq: spurious interrupt\n"); + goto out; + } + + /* + * Handle the interrupt by calling the hardware specific handler (IOSAPIC, Internal, etc). + */ + (*irq_desc[irq].handler->handle)(irq, regs); + out: +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + { + long pEOI; + + asm ("mov %0=0;; (p1) mov %0=1" : "=r"(pEOI)); + if (!pEOI) { + printk("Yikes: ia64_handle_irq() without pEOI!!\n"); + asm volatile ("cmp.eq p1,p0=r0,r0" : "=r"(pEOI)); +# ifdef CONFIG_KDB + kdb(KDB_REASON_PANIC, 0, regs); +# endif + } + } + + local_irq_disable(); +# ifndef CONFIG_SMP + if (max_prio == irq) + max_prio = prev_prio; +# endif /* !CONFIG_SMP */ +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + + ia64_srlz_d(); + ia64_set_tpr(saved_tpr); + ia64_srlz_d(); +} + + +/* + * This should really return information about whether we should do + * bottom half handling etc. Right now we end up _always_ checking the + * bottom half, which is a waste of time and is not what some drivers + * would prefer. + */ +int +invoke_irq_handlers (unsigned int irq, struct pt_regs *regs, struct irqaction *action) +{ + void (*handler)(int, void *, struct pt_regs *); + unsigned long flags, flags_union = 0; + int cpu = smp_processor_id(); + unsigned int requested_irq; + void *dev_id; + + irq_enter(cpu, irq); + + if ((action->flags & SA_INTERRUPT) == 0) + __sti(); + + do { + flags = action->flags; + requested_irq = irq; + if ((flags & SA_LEGACY) != 0) + requested_irq = vector_to_legacy_map[irq]; + flags_union |= flags; + handler = action->handler; + dev_id = action->dev_id; + action = action->next; + (*handler)(requested_irq, dev_id, regs); + } while (action); + if ((flags_union & SA_SAMPLE_RANDOM) != 0) + add_interrupt_randomness(irq); + __cli(); + + irq_exit(cpu, irq); + return flags_union | 1; /* force the "do bottom halves" bit */ +} + +void +disable_irq_nosync (unsigned int irq) +{ + unsigned long flags; + + irq = map_legacy_irq(irq); + + spin_lock_irqsave(&irq_controller_lock, flags); + if (irq_desc[irq].depth++ > 0) { + irq_desc[irq].status &= ~IRQ_ENABLED; + irq_desc[irq].handler->disable(irq); + } + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +/* + * Synchronous version of the above, making sure the IRQ is + * no longer running on any other IRQ.. + */ +void +disable_irq (unsigned int irq) +{ + disable_irq_nosync(irq); + + irq = map_legacy_irq(irq); + + if (!local_irq_count[smp_processor_id()]) { + do { + barrier(); + } while ((irq_desc[irq].status & IRQ_INPROGRESS) != 0); + } +} + +void +enable_irq (unsigned int irq) +{ + unsigned long flags; + + irq = map_legacy_irq(irq); + + spin_lock_irqsave(&irq_controller_lock, flags); + switch (irq_desc[irq].depth) { + case 1: + irq_desc[irq].status |= IRQ_ENABLED; + (*irq_desc[irq].handler->enable)(irq); + /* fall through */ + default: + --irq_desc[irq].depth; + break; + + case 0: + printk("enable_irq: unbalanced from %p\n", __builtin_return_address(0)); + } + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + +/* + * This function encapsulates the initialization that needs to be + * performed under the protection of lock irq_controller_lock. The + * lock must have been acquired by the time this is called. + */ +static inline int +setup_irq (unsigned int irq, struct irqaction *new) +{ + int shared = 0; + struct irqaction *old, **p; + + p = &irq_desc[irq].action; + old = *p; + if (old) { + if (!(old->flags & new->flags & SA_SHIRQ)) { + return -EBUSY; + } + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + *p = new; + + /* when sharing do not unmask */ + if (!shared) { + irq_desc[irq].depth = 0; + irq_desc[irq].status |= IRQ_ENABLED; + (*irq_desc[irq].handler->startup)(irq); + } + return 0; +} + +int +request_irq (unsigned int requested_irq, void (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, const char * devname, void *dev_id) +{ + int retval, need_kfree = 0; + struct irqaction *action; + unsigned long flags; + unsigned int irq; + +#ifdef IA64_DEBUG + printk("request_irq(0x%x) called\n", requested_irq); +#endif + /* + * Sanity-check: shared interrupts should REALLY pass in + * a real dev-ID, otherwise we'll have trouble later trying + * to figure out which interrupt is which (messes up the + * interrupt freeing logic etc). + */ + if ((irqflags & SA_SHIRQ) && !dev_id) + printk("Bad boy: %s (at %p) called us without a dev_id!\n", + devname, current_text_addr()); + + irq = map_legacy_irq(requested_irq); + if (irq != requested_irq) + irqflags |= SA_LEGACY; + + if (irq >= NR_IRQS) + return -EINVAL; + + if (!handler) + return -EINVAL; + + /* + * The timer_action and ipi_action cannot be allocated + * dynamically because its initialization happens really early + * on in init/main.c at this point the memory allocator has + * not yet been initialized. So we use a statically reserved + * buffer for it. In some sense that's no big deal because we + * need one no matter what. + */ + if (irq == TIMER_IRQ) + action = &timer_action; +#ifdef CONFIG_SMP + else if (irq == IPI_IRQ) + action = &ipi_action; +#endif + else { + action = kmalloc(sizeof(struct irqaction), GFP_KERNEL); + need_kfree = 1; + } + + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags; + action->mask = 0; + action->name = devname; + action->next = NULL; + action->dev_id = dev_id; + + if ((irqflags & SA_SAMPLE_RANDOM) != 0) + rand_initialize_irq(irq); + + spin_lock_irqsave(&irq_controller_lock, flags); + retval = setup_irq(irq, action); + spin_unlock_irqrestore(&irq_controller_lock, flags); + + if (need_kfree && retval) + kfree(action); + + return retval; +} + +void +free_irq (unsigned int irq, void *dev_id) +{ + struct irqaction *action, **p; + unsigned long flags; + + /* + * some sanity checks first + */ + if (irq >= NR_IRQS) { + printk("Trying to free IRQ%d\n",irq); + return; + } + + irq = map_legacy_irq(irq); + + /* + * Find the corresponding irqaction + */ + spin_lock_irqsave(&irq_controller_lock, flags); + for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *p = action->next; + if (!irq_desc[irq].action) { + irq_desc[irq].status &= ~IRQ_ENABLED; + (*irq_desc[irq].handler->shutdown)(irq); + } + + spin_unlock_irqrestore(&irq_controller_lock, flags); + +#ifdef CONFIG_SMP + /* Wait to make sure it's not being used on another CPU */ + while (irq_desc[irq].status & IRQ_INPROGRESS) + barrier(); +#endif + + if (action != &timer_action +#ifdef CONFIG_SMP + && action != &ipi_action +#endif + ) + kfree(action); + return; + } + printk("Trying to free free IRQ%d\n", irq); +} + +/* + * IRQ autodetection code. Note that the return value of + * probe_irq_on() is no longer being used (it's role has been replaced + * by the IRQ_AUTODETECT flag). + */ +unsigned long +probe_irq_on (void) +{ + struct irq_desc *id; + unsigned long delay; + +#ifdef IA64_DEBUG + printk("probe_irq_on() called\n"); +#endif + + spin_lock_irq(&irq_controller_lock); + for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) { + if (!id->action) { + id->status |= IRQ_AUTODETECT | IRQ_WAITING; + (*id->handler->startup)(id - irq_desc); + } + } + spin_unlock_irq(&irq_controller_lock); + + /* wait for spurious interrupts to trigger: */ + + for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) + /* about 100ms delay */ + synchronize_irq(); + + /* filter out obviously spurious interrupts: */ + spin_lock_irq(&irq_controller_lock); + for (id = irq_desc; id < irq_desc + NR_IRQS; ++id) { + unsigned int status = id->status; + + if (!(status & IRQ_AUTODETECT)) + continue; + + if (!(status & IRQ_WAITING)) { + id->status = status & ~IRQ_AUTODETECT; + (*id->handler->shutdown)(id - irq_desc); + } + } + spin_unlock_irq(&irq_controller_lock); + return PROBE_IRQ_COOKIE; /* return meaningless return value */ +} + +int +probe_irq_off (unsigned long cookie) +{ + int irq_found, nr_irqs; + struct irq_desc *id; + +#ifdef IA64_DEBUG + printk("probe_irq_off(cookie=0x%lx) -> ", cookie); +#endif + + if (cookie != PROBE_IRQ_COOKIE) + printk("bad irq probe from %p\n", __builtin_return_address(0)); + + nr_irqs = 0; + irq_found = 0; + spin_lock_irq(&irq_controller_lock); + for (id = irq_desc + IA64_MIN_VECTORED_IRQ; id < irq_desc + NR_IRQS; ++id) { + unsigned int status = id->status; + + if (!(status & IRQ_AUTODETECT)) + continue; + + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = (id - irq_desc); + ++nr_irqs; + } + id->status = status & ~IRQ_AUTODETECT; + (*id->handler->shutdown)(id - irq_desc); + } + spin_unlock_irq(&irq_controller_lock); + + if (nr_irqs > 1) + irq_found = -irq_found; + +#ifdef IA64_DEBUG + printk("%d\n", irq_found); +#endif + return irq_found; +} + +#ifdef CONFIG_SMP + +void __init +init_IRQ_SMP (void) +{ + if (request_irq(IPI_IRQ, handle_IPI, 0, "IPI", NULL)) + panic("Could not allocate IPI Interrupt Handler!"); +} + +#endif + +void __init +init_IRQ (void) +{ + int i; + + for (i = 0; i < IA64_MIN_VECTORED_IRQ; ++i) + vector_to_legacy_map[irq_to_vector_map[i]] = i; + + for (i = 0; i < NR_IRQS; ++i) { + irq_desc[i].handler = &irq_type_default; + } + + irq_desc[TIMER_IRQ].handler = &irq_type_ia64_internal; +#ifdef CONFIG_SMP + /* + * Configure the IPI vector and handler + */ + irq_desc[IPI_IRQ].handler = &irq_type_ia64_internal; + init_IRQ_SMP(); +#endif + + ia64_set_pmv(1 << 16); + ia64_set_cmcv(CMC_IRQ); /* XXX fix me */ + + platform_irq_init(irq_desc); + + /* clear TPR to enable all interrupt classes: */ + ia64_set_tpr(0); +} + +/* TBD: + * Certain IA64 platforms can have inter-processor interrupt support. + * This interface is supposed to default to the IA64 IPI block-based + * mechanism if the platform doesn't provide a separate mechanism + * for IPIs. + * Choices : (1) Extend hw_interrupt_type interfaces + * (2) Use machine vector mechanism + * For now defining the following interface as a place holder. + */ +void +ipi_send (int cpu, int vector, int delivery_mode) +{ +} diff --git a/arch/ia64/kernel/irq_default.c b/arch/ia64/kernel/irq_default.c new file mode 100644 index 000000000..bf8c62642 --- /dev/null +++ b/arch/ia64/kernel/irq_default.c @@ -0,0 +1,30 @@ +#include <linux/kernel.h> +#include <linux/sched.h> + +#include <asm/irq.h> +#include <asm/processor.h> +#include <asm/ptrace.h> + + +static int +irq_default_handle_irq (unsigned int irq, struct pt_regs *regs) +{ + printk("Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id()); + return 0; /* don't call do_bottom_half() for spurious interrupts */ +} + +static void +irq_default_noop (unsigned int irq) +{ + /* nuthing to do... */ +} + +struct hw_interrupt_type irq_type_default = { + "default", + (void (*)(unsigned long)) irq_default_noop, /* init */ + irq_default_noop, /* startup */ + irq_default_noop, /* shutdown */ + irq_default_handle_irq, /* handle */ + irq_default_noop, /* enable */ + irq_default_noop /* disable */ +}; diff --git a/arch/ia64/kernel/irq_internal.c b/arch/ia64/kernel/irq_internal.c new file mode 100644 index 000000000..1ae904fe8 --- /dev/null +++ b/arch/ia64/kernel/irq_internal.c @@ -0,0 +1,71 @@ +/* + * Internal Interrupt Vectors + * + * This takes care of interrupts that are generated by the CPU + * internally, such as the ITC and IPI interrupts. + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + */ + +#include <linux/kernel.h> +#include <linux/sched.h> + +#include <asm/irq.h> +#include <asm/processor.h> +#include <asm/ptrace.h> + +/* + * This is identical to IOSAPIC handle_irq. It may go away . . . + */ +static int +internal_handle_irq (unsigned int irq, struct pt_regs *regs) +{ + struct irqaction *action = 0; + struct irq_desc *id = irq_desc + irq; + unsigned int status; + int retval; + + spin_lock(&irq_controller_lock); + { + status = id->status; + if ((status & IRQ_ENABLED) != 0) + action = id->action; + id->status = status & ~(IRQ_REPLAY | IRQ_WAITING); + } + spin_unlock(&irq_controller_lock); + + if (!action) { + if (!(id->status & IRQ_AUTODETECT)) + printk("irq_hpsim_handle_irq: unexpected interrupt %u\n", irq); + return 0; + } + + retval = invoke_irq_handlers(irq, regs, action); + + spin_lock(&irq_controller_lock); + { + status = (id->status & ~IRQ_INPROGRESS); + id->status = status; + } + spin_unlock(&irq_controller_lock); + + return retval; +} + +static void +internal_noop (unsigned int irq) +{ + /* nuthing to do... */ +} + +struct hw_interrupt_type irq_type_ia64_internal = { + "IA64 internal", + (void (*)(unsigned long)) internal_noop, /* init */ + internal_noop, /* startup */ + internal_noop, /* shutdown */ + internal_handle_irq, /* handle */ + internal_noop, /* enable */ + internal_noop /* disable */ +}; + diff --git a/arch/ia64/kernel/irq_lock.c b/arch/ia64/kernel/irq_lock.c new file mode 100644 index 000000000..9c512dd4e --- /dev/null +++ b/arch/ia64/kernel/irq_lock.c @@ -0,0 +1,287 @@ +/* + * SMP IRQ Lock support + * + * Global interrupt locks for SMP. Allow interrupts to come in on any + * CPU, yet make cli/sti act globally to protect critical regions.. + * These function usually appear in irq.c, but I think it's cleaner this way. + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + */ + +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/threads.h> +#include <linux/init.h> + +#include <asm/system.h> +#include <asm/processor.h> +#include <asm/irq.h> +#include <asm/bitops.h> +#include <asm/pgtable.h> +#include <asm/delay.h> + +int global_irq_holder = NO_PROC_ID; +spinlock_t global_irq_lock; +atomic_t global_irq_count; +atomic_t global_bh_count; +atomic_t global_bh_lock; + +#define INIT_STUCK (1<<26) + +void +irq_enter(int cpu, int irq) +{ + int stuck = INIT_STUCK; + + hardirq_enter(cpu, irq); + barrier(); + while (global_irq_lock.lock) { + if (cpu == global_irq_holder) { + break; + } + + if (!--stuck) { + printk("irq_enter stuck (irq=%d, cpu=%d, global=%d)\n", + irq, cpu,global_irq_holder); + stuck = INIT_STUCK; + } + barrier(); + } +} + +void +irq_exit(int cpu, int irq) +{ + hardirq_exit(cpu, irq); + release_irqlock(cpu); +} + +static void +show(char * str) +{ + int i; + unsigned long *stack; + int cpu = smp_processor_id(); + + printk("\n%s, CPU %d:\n", str, cpu); + printk("irq: %d [%d %d]\n", + atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]); + printk("bh: %d [%d %d]\n", + atomic_read(&global_bh_count), local_bh_count[0], local_bh_count[1]); + + stack = (unsigned long *) &stack; + for (i = 40; i ; i--) { + unsigned long x = *++stack; + if (x > (unsigned long) &get_options && x < (unsigned long) &vsprintf) { + printk("<[%08lx]> ", x); + } + } +} + +#define MAXCOUNT 100000000 + +static inline void +wait_on_bh(void) +{ + int count = MAXCOUNT; + do { + if (!--count) { + show("wait_on_bh"); + count = ~0; + } + /* nothing .. wait for the other bh's to go away */ + } while (atomic_read(&global_bh_count) != 0); +} + +static inline void +wait_on_irq(int cpu) +{ + int count = MAXCOUNT; + + for (;;) { + + /* + * Wait until all interrupts are gone. Wait + * for bottom half handlers unless we're + * already executing in one.. + */ + if (!atomic_read(&global_irq_count)) { + if (local_bh_count[cpu] || !atomic_read(&global_bh_count)) + break; + } + + /* Duh, we have to loop. Release the lock to avoid deadlocks */ + spin_unlock(&global_irq_lock); + mb(); + + for (;;) { + if (!--count) { + show("wait_on_irq"); + count = ~0; + } + __sti(); + udelay(cpu + 1); + __cli(); + if (atomic_read(&global_irq_count)) + continue; + if (global_irq_lock.lock) + continue; + if (!local_bh_count[cpu] && atomic_read(&global_bh_count)) + continue; + if (spin_trylock(&global_irq_lock)) + break; + } + } +} + +/* + * This is called when we want to synchronize with + * bottom half handlers. We need to wait until + * no other CPU is executing any bottom half handler. + * + * Don't wait if we're already running in an interrupt + * context or are inside a bh handler. + */ +void +synchronize_bh(void) +{ + if (atomic_read(&global_bh_count)) { + int cpu = smp_processor_id(); + if (!local_irq_count[cpu] && !local_bh_count[cpu]) { + wait_on_bh(); + } + } +} + + +/* + * This is called when we want to synchronize with + * interrupts. We may for example tell a device to + * stop sending interrupts: but to make sure there + * are no interrupts that are executing on another + * CPU we need to call this function. + */ +void +synchronize_irq(void) +{ + int cpu = smp_processor_id(); + int local_count; + int global_count; + + mb(); + do { + local_count = local_irq_count[cpu]; + global_count = atomic_read(&global_irq_count); + } while (global_count != local_count); +} + +static inline void +get_irqlock(int cpu) +{ + if (!spin_trylock(&global_irq_lock)) { + /* do we already hold the lock? */ + if ((unsigned char) cpu == global_irq_holder) + return; + /* Uhhuh.. Somebody else got it. Wait.. */ + spin_lock(&global_irq_lock); + } + /* + * We also to make sure that nobody else is running + * in an interrupt context. + */ + wait_on_irq(cpu); + + /* + * Ok, finally.. + */ + global_irq_holder = cpu; +} + +/* + * A global "cli()" while in an interrupt context + * turns into just a local cli(). Interrupts + * should use spinlocks for the (very unlikely) + * case that they ever want to protect against + * each other. + * + * If we already have local interrupts disabled, + * this will not turn a local disable into a + * global one (problems with spinlocks: this makes + * save_flags+cli+sti usable inside a spinlock). + */ +void +__global_cli(void) +{ + unsigned long flags; + + __save_flags(flags); + if (flags & IA64_PSR_I) { + int cpu = smp_processor_id(); + __cli(); + if (!local_irq_count[cpu]) + get_irqlock(cpu); + } +} + +void +__global_sti(void) +{ + int cpu = smp_processor_id(); + + if (!local_irq_count[cpu]) + release_irqlock(cpu); + __sti(); +} + +/* + * SMP flags value to restore to: + * 0 - global cli + * 1 - global sti + * 2 - local cli + * 3 - local sti + */ +unsigned long +__global_save_flags(void) +{ + int retval; + int local_enabled; + unsigned long flags; + + __save_flags(flags); + local_enabled = flags & IA64_PSR_I; + /* default to local */ + retval = 2 + local_enabled; + + /* check for global flags if we're not in an interrupt */ + if (!local_irq_count[smp_processor_id()]) { + if (local_enabled) + retval = 1; + if (global_irq_holder == (unsigned char) smp_processor_id()) + retval = 0; + } + return retval; +} + +void +__global_restore_flags(unsigned long flags) +{ + switch (flags) { + case 0: + __global_cli(); + break; + case 1: + __global_sti(); + break; + case 2: + __cli(); + break; + case 3: + __sti(); + break; + default: + printk("global_restore_flags: %08lx (%08lx)\n", + flags, (&flags)[-1]); + } +} diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S new file mode 100644 index 000000000..4c3ac242a --- /dev/null +++ b/arch/ia64/kernel/ivt.S @@ -0,0 +1,1342 @@ +/* + * arch/ia64/kernel/ivt.S + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1998-2000 David Mosberger <davidm@hpl.hp.com> + */ + +#include <linux/config.h> + +#include <asm/break.h> +#include <asm/offsets.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/unistd.h> + +#include "entry.h" + +/* + * A couple of convenience macros that make writing and reading + * SAVE_MIN and SAVE_REST easier. + */ +#define rARPR r31 +#define rCRIFS r30 +#define rCRIPSR r29 +#define rCRIIP r28 +#define rARRSC r27 +#define rARPFS r26 +#define rARUNAT r25 +#define rARRNAT r24 +#define rARBSPSTORE r23 +#define rKRBS r22 +#define rB6 r21 +#define rR1 r20 + +/* + * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * psr.dt: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * psr.dt: off + * r2 = points to &pt_regs.r16 + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p6, p7, and p15), b6, r3, r8, r9, r10, r11, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ +#define DO_SAVE_MIN(COVER,EXTRA) \ + mov rARRSC=ar.rsc; \ + mov rARPFS=ar.pfs; \ + mov rR1=r1; \ + mov rARUNAT=ar.unat; \ + mov rCRIPSR=cr.ipsr; \ + mov rB6=b6; /* rB6 = branch reg 6 */ \ + mov rCRIIP=cr.iip; \ + mov r1=ar.k6; /* r1 = current */ \ + ;; \ + invala; \ + extr.u r16=rCRIPSR,32,2; /* extract psr.cpl */ \ + ;; \ + cmp.eq pKern,p7=r0,r16; /* are we in kernel mode already? (psr.cpl==0) */ \ + /* switch from user to kernel RBS: */ \ + COVER; \ + ;; \ +(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ +(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ + ;; \ +(p7) mov rARRNAT=ar.rnat; \ +(pKern) dep r1=0,sp,61,3; /* compute physical addr of sp */ \ +(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ +(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \ +(p7) dep rKRBS=-1,rKRBS,61,3; /* compute kernel virtual addr of RBS */ \ + ;; \ +(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ +(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \ + ;; \ +(p7) mov r18=ar.bsp; \ +(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + \ + mov r16=r1; /* initialize first base pointer */ \ + adds r17=8,r1; /* initialize second base pointer */ \ + ;; \ + st8 [r16]=rCRIPSR,16; /* save cr.ipsr */ \ + st8 [r17]=rCRIIP,16; /* save cr.iip */ \ +(pKern) mov r18=r0; /* make sure r18 isn't NaT */ \ + ;; \ + st8 [r16]=rCRIFS,16; /* save cr.ifs */ \ + st8 [r17]=rARUNAT,16; /* save ar.unat */ \ +(p7) sub r18=r18,rKRBS; /* r18=RSE.ndirty*8 */ \ + ;; \ + st8 [r16]=rARPFS,16; /* save ar.pfs */ \ + st8 [r17]=rARRSC,16; /* save ar.rsc */ \ + tbit.nz p15,p0=rCRIPSR,IA64_PSR_I_BIT \ + ;; /* avoid RAW on r16 & r17 */ \ +(pKern) adds r16=16,r16; /* skip over ar_rnat field */ \ +(pKern) adds r17=16,r17; /* skip over ar_bspstore field */ \ +(p7) st8 [r16]=rARRNAT,16; /* save ar.rnat */ \ +(p7) st8 [r17]=rARBSPSTORE,16; /* save ar.bspstore */ \ + ;; \ + st8 [r16]=rARPR,16; /* save predicates */ \ + st8 [r17]=rB6,16; /* save b6 */ \ + shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ + ;; \ + st8 [r16]=r18,16; /* save ar.rsc value for "loadrs" */ \ + st8.spill [r17]=rR1,16; /* save original r1 */ \ + cmp.ne pEOI,p0=r0,r0 /* clear pEOI by default */ \ + ;; \ + st8.spill [r16]=r2,16; \ + st8.spill [r17]=r3,16; \ + adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ + ;; \ + st8.spill [r16]=r12,16; \ + st8.spill [r17]=r13,16; \ + cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ + ;; \ + st8.spill [r16]=r14,16; \ + st8.spill [r17]=r15,16; \ + dep r14=-1,r0,61,3; \ + ;; \ + st8.spill [r16]=r8,16; \ + st8.spill [r17]=r9,16; \ + adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ + ;; \ + st8.spill [r16]=r10,16; \ + st8.spill [r17]=r11,16; \ + mov r13=ar.k6; /* establish `current' */ \ + ;; \ + or r2=r2,r14; /* make first base a kernel virtual address */ \ + EXTRA; \ + movl r1=__gp; /* establish kernel global pointer */ \ + ;; \ + or r12=r12,r14; /* make sp a kernel virtual address */ \ + or r13=r13,r14; /* make `current' a kernel virtual address */ \ + bsw.1;; /* switch back to bank 1 (must be last in insn group) */ + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC +# define STOPS nop.i 0x0;; nop.i 0x0;; nop.i 0x0;; +#else +# define STOPS +#endif + +#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs,) STOPS +#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover;; mov rCRIFS=cr.ifs, mov r15=r19) STOPS +#define SAVE_MIN DO_SAVE_MIN(mov rCRIFS=r0,) STOPS + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). This + * macro guarantees to preserve all predicate registers, r8, r9, r10, + * r11, r14, and r15. + * + * Assumed state upon entry: + * psr.ic: on + * psr.dt: on + * r2: points to &pt_regs.r16 + * r3: points to &pt_regs.r17 + */ +#define SAVE_REST \ + st8.spill [r2]=r16,16; \ + st8.spill [r3]=r17,16; \ + ;; \ + st8.spill [r2]=r18,16; \ + st8.spill [r3]=r19,16; \ + ;; \ + mov r16=ar.ccv; /* M-unit */ \ + movl r18=FPSR_DEFAULT /* L-unit */ \ + ;; \ + mov r17=ar.fpsr; /* M-unit */ \ + mov ar.fpsr=r18; /* M-unit */ \ + ;; \ + st8.spill [r2]=r20,16; \ + st8.spill [r3]=r21,16; \ + mov r18=b0; \ + ;; \ + st8.spill [r2]=r22,16; \ + st8.spill [r3]=r23,16; \ + mov r19=b7; \ + ;; \ + st8.spill [r2]=r24,16; \ + st8.spill [r3]=r25,16; \ + ;; \ + st8.spill [r2]=r26,16; \ + st8.spill [r3]=r27,16; \ + ;; \ + st8.spill [r2]=r28,16; \ + st8.spill [r3]=r29,16; \ + ;; \ + st8.spill [r2]=r30,16; \ + st8.spill [r3]=r31,16; \ + ;; \ + st8 [r2]=r16,16; /* ar.ccv */ \ + st8 [r3]=r17,16; /* ar.fpsr */ \ + ;; \ + st8 [r2]=r18,16; /* b0 */ \ + st8 [r3]=r19,16+8; /* b7 */ \ + ;; \ + stf.spill [r2]=f6,32; \ + stf.spill [r3]=f7,32; \ + ;; \ + stf.spill [r2]=f8,32; \ + stf.spill [r3]=f9,32 + +/* + * This file defines the interrupt vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * External interrupts only use 1 entry. All others are internal interrupts + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for critical + * interrupts like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + * entry offset ----/ / / / / + * entry number ---------/ / / / + * size of the entry -------------/ / / + * vector name -------------------------------------/ / + * related interrupts (what is the real interrupt?) ----------/ + * + * The table is 32KB in size and must be aligned on 32KB boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.4 (June 1998) + */ + +#define FAULT(n) \ + rsm psr.dt; /* avoid nested faults due to TLB misses... */ \ + ;; \ + srlz.d; /* ensure everyone knows psr.dt is off... */ \ + mov r31=pr; \ + mov r19=n;; /* prepare to save predicates */ \ + br.cond.sptk.many dispatch_to_fault_handler + +/* + * As we don't (hopefully) use the space available, we need to fill it with + * nops. the parameter may be used for debugging and is representing the entry + * number + */ +#define BREAK_BUNDLE(a) break.m (a); \ + break.i (a); \ + break.i (a) +/* + * 4 breaks bundles all together + */ +#define BREAK_BUNDLE4(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a); BREAK_BUNDLE(a) + +/* + * 8 bundles all together (too lazy to use only 4 at a time !) + */ +#define BREAK_BUNDLE8(a); BREAK_BUNDLE4(a); BREAK_BUNDLE4(a) + + .psr abi64 + .psr lsb + .lsb + + .section __ivt_section,"ax" + + .align 32768 // align on 32KB boundary + .global ia64_ivt +ia64_ivt: +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) + /* + * The VHPT vector is invoked when the TLB entry for the virtual page table + * is missing. This happens only as a result of a previous + * (the "original") TLB miss, which may either be caused by an instruction + * fetch or a data access (or non-access). + * + * What we do here is normal TLB miss handing for the _original_ miss, followed + * by inserting the TLB entry for the virtual page table page that the VHPT + * walker was attempting to access. The latter gets inserted as long + * as both L1 and L2 have valid mappings for the faulting address. + * The TLB entry for the original miss gets inserted only if + * the L3 entry indicates that the page is present. + * + * do_page_fault gets invoked in the following cases: + * - the faulting virtual address uses unimplemented address bits + * - the faulting virtual address has no L1, L2, or L3 mapping + */ + mov r16=cr.ifa // get address that caused the TLB miss + ;; + rsm psr.dt // use physical addressing for data + mov r31=pr // save the predicate registers + mov r19=ar.k7 // get page table base address + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + srlz.d // ensure "rsm psr.dt" has taken effect +(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir +(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 +(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; +(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones? + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; +(p7) ld8 r18=[r17] // read the L3 PTE + mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss + ;; +(p7) tbit.z p6,p7=r18,0 // page present bit cleared? + mov r21=cr.iha // get the VHPT address that caused the TLB miss + ;; // avoid RAW on p7 +(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? + dep r17=0,r17,0,PAGE_SHIFT // clear low bits to get page address + ;; +(p10) itc.i r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!) +(p11) itc.d r18;; // insert the data TLB entry (EAS2.6: must be last in insn group!) +(p6) br.spnt.few page_fault // handle bad address/page not present (page fault) + mov cr.ifa=r21 + + // Now compute and insert the TLB entry for the virtual page table. + // We never execute in a page table page so there is no need to set + // the exception deferral bit. + adds r16=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r17 + ;; +(p7) itc.d r16;; // EAS2.6: must be last in insn group! + mov pr=r31,-1 // restore predicate registers + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) + /* + * The ITLB basically does the same as the VHPT handler except + * that we always insert exactly one instruction TLB entry. + */ + mov r16=cr.ifa // get address that caused the TLB miss + ;; + rsm psr.dt // use physical addressing for data + mov r31=pr // save the predicate registers + mov r19=ar.k7 // get page table base address + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + srlz.d // ensure "rsm psr.dt" has taken effect +(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir +(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 +(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; +(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones? + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; +(p7) ld8 r18=[r17] // read the L3 PTE + ;; +(p7) tbit.z p6,p7=r18,0 // page present bit cleared? + ;; +(p7) itc.i r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!) +(p6) br.spnt.few page_fault // handle bad address/page not present (page fault) + ;; + mov pr=r31,-1 // restore predicate registers + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) + /* + * The DTLB basically does the same as the VHPT handler except + * that we always insert exactly one data TLB entry. + */ + mov r16=cr.ifa // get address that caused the TLB miss + ;; + rsm psr.dt // use physical addressing for data + mov r31=pr // save the predicate registers + mov r19=ar.k7 // get page table base address + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + srlz.d // ensure "rsm psr.dt" has taken effect +(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir +(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 +(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; +(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones? + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; +(p7) ld8 r18=[r17] // read the L3 PTE + ;; +(p7) tbit.z p6,p7=r18,0 // page present bit cleared? + ;; +(p7) itc.d r18;; // insert the instruction TLB entry (EAS2.6: must be last in insn group!) +(p6) br.spnt.few page_fault // handle bad address/page not present (page fault) + ;; + mov pr=r31,-1 // restore predicate registers + rfi;; // must be last insn in an insn group + + //----------------------------------------------------------------------------------- + // call do_page_fault (predicates are in r31, psr.dt is off, r16 is faulting address) +page_fault: + SAVE_MIN_WITH_COVER + // + // Copy control registers to temporary registers, then turn on psr bits, + // then copy the temporary regs to the output regs. We have to do this + // because the "alloc" can cause a mandatory store which could lead to + // an "Alt DTLB" fault which we can handle only if psr.ic is on. + // + mov r8=cr.ifa + mov r9=cr.isr + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic | psr.dt + ;; + srlz.d // guarantee that interrupt collection is enabled +(p15) ssm psr.i // restore psr.i + ;; + srlz.i // must precede "alloc"! (srlz.i implies srlz.d) + movl r14=ia64_leave_kernel + ;; + alloc r15=ar.pfs,0,0,3,0 // must be first in insn group + mov out0=r8 + mov out1=r9 + ;; + SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 // out2 = pointer to pt_regs + br.call.sptk.few b6=ia64_do_page_fault // ignore return address + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RX + ;; + shr.u r18=r16,57 // move address bit 61 to bit 4 + dep r16=0,r16,52,12 // clear top 12 bits of address + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) + dep r16=r17,r16,0,12 // insert PTE control bits into r16 + ;; + or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6 + ;; + itc.i r16;; // insert the TLB entry(EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW + ;; + shr.u r18=r16,57 // move address bit 61 to bit 4 + dep r16=0,r16,52,12 // clear top 12 bits of address + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) + dep r16=r17,r16,0,12 // insert PTE control bits into r16 + ;; + or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6 + ;; + itc.d r16;; // insert the TLB entry (EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) + // + // In the absence of kernel bugs, we get here when the Dirty-bit, Instruction + // Access-bit, or Data Access-bit faults cause a nested fault because the + // dTLB entry for the virtual page table isn't present. In such a case, + // we lookup the pte for the faulting address by walking the page table + // and return to the contination point passed in register r30. + // In accessing the page tables, we don't need to check for NULL entries + // because if the page tables didn't map the faulting address, it would not + // be possible to receive one of the above faults. + // + // Input: r16: faulting address + // r29: saved b0 + // r30: continuation address + // + // Output: r17: physical address of L3 PTE of faulting address + // r29: saved b0 + // r30: continuation address + // + // Clobbered: b0, r18, r19, r21, r31, psr.dt (cleared) + // + rsm psr.dt // switch to using physical data addressing + mov r19=ar.k7 // get the page table base address + shl r21=r16,3 // shift bit 60 into sign bit + ;; + mov r31=pr // save the predicate registers + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is faulting address in region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + srlz.d +(p6) movl r17=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir +(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 +(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 + ;; +(p6) dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; + ld8 r17=[r17] // fetch the L1 entry + mov b0=r30 + ;; + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; + ld8 r17=[r17] // fetch the L2 entry + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; + mov pr=r31,-1 // restore predicates + br.cond.sptk.few b0 // return to continuation point + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) + FAULT(6) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + FAULT(7) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) + // + // What we do here is to simply turn on the dirty bit in the PTE. We need + // to update both the page-table and the TLB entry. To efficiently access + // the PTE, we address it through the virtual page table. Most likely, the + // TLB entry for the relevant virtual page table page is still present in + // the TLB so we can normally do this without additional TLB misses. + // In case the necessary virtual page table TLB entry isn't present, we take + // a nested TLB miss hit where we look up the physical address of the L3 PTE + // and then continue at label 1 below. + // + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_D,r18 // set the dirty bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18;; // install updated PTE (EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) + // Like Entry 8, except for instruction access + mov r16=cr.ifa // get the address that caused the fault +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + mov r31=pr // save predicates + mov r30=cr.ipsr + ;; + extr.u r17=r30,IA64_PSR_IS_BIT,1 // get instruction arch. indicator + ;; + cmp.eq p6,p0 = r17,r0 // check if IA64 instruction set + ;; +(p6) mov r16=cr.iip // get real faulting address + ;; +(p6) mov cr.ifa=r16 // reset IFA + mov pr=r31,-1 +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault) + ;; +1: ld8 r18=[r17] + ;; // avoid raw on r18 + or r18=_PAGE_A,r18 // set the accessed bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.i r18;; // install updated PTE (EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) + // Like Entry 8, except for data access + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault) + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_A,r18 // set the accessed bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18;; // install updated PTE (EAS2.6: must be last in insn group!) + rfi;; // must be last insn in an insn group + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) + mov r16=cr.iim + mov r17=__IA64_BREAK_SYSCALL + mov r31=pr // prepare to save predicates + rsm psr.dt // avoid nested faults due to TLB misses... + ;; + srlz.d // ensure everyone knows psr.dt is off... + cmp.eq p0,p7=r16,r17 // is this a system call? (p7 <- false, if so) + +#if 1 + // Allow syscalls via the old system call number for the time being. This is + // so we can transition to the new syscall number in a relatively smooth + // fashion. + mov r17=0x80000 + ;; +(p7) cmp.eq.or.andcm p0,p7=r16,r17 // is this the old syscall number? +#endif + +(p7) br.cond.spnt.many non_syscall + + SAVE_MIN // uses r31; defines r2: + + // turn interrupt collection and data translation back on: + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 + ;; +(p15) ssm psr.i // restore psr.i + ;; + srlz.i // ensure everybody knows psr.ic and psr.dt are back on + adds r8=(IA64_PT_REGS_R8_OFFSET-IA64_PT_REGS_R16_OFFSET),r2 + ;; + stf8 [r8]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error) + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + ;; // avoid WAW on r2 & r3 + + mov r3=255 + adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 + adds r2=IA64_TASK_FLAGS_OFFSET,r13 // r2 = ¤t->flags + + ;; + cmp.geu.unc p6,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ? + movl r16=sys_call_table + ;; +(p6) shladd r16=r15,3,r16 + movl r15=ia64_ret_from_syscall +(p7) adds r16=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall + ;; + ld8 r16=[r16] // load address of syscall entry point + mov rp=r15 // set the real return addr + ;; + ld8 r2=[r2] // r2 = current->flags + mov b6=r16 + + // arrange things so we skip over break instruction when returning: + + adds r16=16,sp // get pointer to cr_ipsr + adds r17=24,sp // get pointer to cr_iip + ;; + ld8 r18=[r16] // fetch cr_ipsr + tbit.z p8,p0=r2,5 // (current->flags & PF_TRACESYS) == 0? + ;; + ld8 r19=[r17] // fetch cr_iip + extr.u r20=r18,41,2 // extract ei field + ;; + cmp.eq p6,p7=2,r20 // isr.ei==2? + adds r19=16,r19 // compute address of next bundle + ;; +(p6) mov r20=0 // clear ei to 0 +(p7) adds r20=1,r20 // increment ei to next slot + ;; +(p6) st8 [r17]=r19 // store new cr.iip if cr.isr.ei wrapped around + dep r18=r20,r18,41,2 // insert new ei into cr.isr + ;; + st8 [r16]=r18 // store new value for cr.isr + +(p8) br.call.sptk.few b6=b6 // ignore this return addr + br.call.sptk.few rp=ia64_trace_syscall // rp will be overwritten (ignored) + // NOT REACHED + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) + rsm psr.dt // avoid nested faults due to TLB misses... + ;; + srlz.d // ensure everyone knows psr.dt is off... + mov r31=pr // prepare to save predicates + ;; + + SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 + ssm psr.ic | psr.dt // turn interrupt collection and data translation back on + ;; + adds r3=8,r2 // set up second base pointer for SAVE_REST + cmp.eq pEOI,p0=r0,r0 // set pEOI flag so that ia64_leave_kernel writes cr.eoi + srlz.i // ensure everybody knows psr.ic and psr.dt are back on + ;; + SAVE_REST + ;; + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + mov out0=r0 // defer reading of cr.ivr to handle_irq... +#else + mov out0=cr.ivr // pass cr.ivr as first arg +#endif + add out1=16,sp // pass pointer to pt_regs as second arg + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.few b6=ia64_handle_irq + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved + FAULT(13) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + FAULT(14) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + FAULT(15) + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + FAULT(16) + +#ifdef CONFIG_IA32_SUPPORT + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + // IA32 interrupt entry point + +dispatch_to_ia32_handler: + SAVE_MIN + ;; + mov r14=cr.isr + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + ;; +(p15) ssm psr.i + ;; + srlz.d + adds r3=8,r2 // Base pointer for SAVE_REST + ;; + SAVE_REST + ;; + mov r15=0x80 + shr r14=r14,16 // Get interrupt number + ;; + cmp.ne p6,p0=r14,r15 +(p6) br.call.dpnt.few b6=non_ia32_syscall + + adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions + + ;; + alloc r15=ar.pfs,0,0,6,0 // must first in an insn group + ;; + ld4 r8=[r14],8 // r8 == EAX (syscall number) + mov r15=0xff + ;; + cmp.ltu.unc p6,p7=r8,r15 + ld4 out1=[r14],8 // r9 == ecx + ;; + ld4 out2=[r14],8 // r10 == edx + ;; + ld4 out0=[r14] // r11 == ebx + adds r14=(IA64_PT_REGS_R8_OFFSET-(8*3)) + 16,sp + ;; + ld4 out5=[r14],8 // r13 == ebp + ;; + ld4 out3=[r14],8 // r14 == esi + adds r2=IA64_TASK_FLAGS_OFFSET,r13 // r2 = ¤t->flags + ;; + ld4 out4=[r14] // R15 == edi + movl r16=ia32_syscall_table + ;; +(p6) shladd r16=r8,3,r16 // Force ni_syscall if not valid syscall number + ld8 r2=[r2] // r2 = current->flags + ;; + ld8 r16=[r16] + tbit.z p8,p0=r2,5 // (current->flags & PF_TRACESYS) == 0? + ;; + movl r15=ia32_ret_from_syscall + mov b6=r16 + ;; + mov rp=r15 +(p8) br.call.sptk.few b6=b6 + br.call.sptk.few rp=ia32_trace_syscall // rp will be overwritten (ignored) + +non_ia32_syscall: + alloc r15=ar.pfs,0,0,2,0 + mov out0=r14 // interrupt # + add out1=16,sp // pointer to pt_regs + ;; // avoid WAW on CFM + br.call.sptk.few rp=ia32_bad_interrupt + ;; + movl r15=ia64_leave_kernel + ;; + mov rp=r15 + br.ret.sptk.many rp + +#endif /* CONFIG_IA32_SUPPORT */ + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + FAULT(17) + +non_syscall: + +#ifdef CONFIG_KDB + mov r17=__IA64_BREAK_KDB + ;; + cmp.eq p8,p0=r16,r17 // is this a kernel breakpoint? +#endif + + SAVE_MIN_WITH_COVER + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + mov r8=cr.iim // get break immediate (must be done while psr.ic is off) + adds r3=8,r2 // set up second base pointer for SAVE_REST + + // turn interrupt collection and data translation back on: + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + ;; +(p15) ssm psr.i // restore psr.i + ;; + srlz.i // ensure everybody knows psr.ic and psr.dt are back on + movl r15=ia64_leave_kernel + ;; + alloc r14=ar.pfs,0,0,2,0 + mov out0=r8 // break number + add out1=16,sp // pointer to pt_regs + ;; + SAVE_REST + mov rp=r15 + ;; +#ifdef CONFIG_KDB +(p8) br.call.sptk.few b6=ia64_invoke_kdb +#endif + br.call.sptk.few b6=ia64_bad_break // avoid WAW on CFM and ignore return addr + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + FAULT(18) + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + +dispatch_unaligned_handler: + SAVE_MIN_WITH_COVER + ;; + // + // we can't have the alloc while psr.ic is cleared because + // we might get a mandatory RSE (when you reach the end of the + // rotating partition when doing the alloc) spill which could cause + // a page fault on the kernel virtual address and the handler + // wouldn't get the state to recover. + // + mov r15=cr.ifa + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + ;; +(p15) ssm psr.i // restore psr.i + ;; + srlz.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + ;; // avoid WAW on r14 + movl r14=ia64_leave_kernel + mov out0=r15 // out0 = faulting address + adds out1=16,sp // out1 = pointer to pt_regs + ;; + mov rp=r14 + br.sptk.few ia64_prepare_handle_unaligned + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + FAULT(19) + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + +dispatch_to_fault_handler: + // + // Input: + // psr.ic: off + // psr.dt: off + // r19: fault vector number (e.g., 24 for General Exception) + // r31: contains saved predicates (pr) + // + SAVE_MIN_WITH_COVER_R19 + // + // Copy control registers to temporary registers, then turn on psr bits, + // then copy the temporary regs to the output regs. We have to do this + // because the "alloc" can cause a mandatory store which could lead to + // an "Alt DTLB" fault which we can handle only if psr.ic is on. + // + mov r8=cr.isr + mov r9=cr.ifa + mov r10=cr.iim + mov r11=cr.itir + ;; + ssm psr.ic | psr.dt + srlz.d // guarantee that interrupt collection is enabled + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + srlz.i // must precede "alloc"! + ;; + alloc r14=ar.pfs,0,0,5,0 // must be first in insn group + mov out0=r15 + mov out1=r8 + mov out2=r9 + mov out3=r10 + mov out4=r11 + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 +#ifdef CONFIG_KDB + br.call.sptk.few b6=ia64_invoke_kdb_fault_handler +#else + br.call.sptk.few b6=ia64_fault +#endif +// +// --- End of long entries, Beginning of short entries +// + + .align 1024 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) + mov r16=cr.ifa + rsm psr.dt +#if 0 + // If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h + mov r17=_PAGE_SIZE_4K<<2 + ;; + ptc.l r16,r17 +#endif + ;; + mov r31=pr + srlz.d + br.cond.sptk.many page_fault + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.cond.sptk.many page_fault + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.cond.sptk.many page_fault + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.cond.sptk.many page_fault + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) + FAULT(24) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) + rsm psr.dt | psr.dfh // ensure we can access fph + ;; + srlz.d + mov r31=pr + mov r19=25 + br.cond.sptk.many dispatch_to_fault_handler + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) + FAULT(26) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) + // + // A [f]chk.[as] instruction needs to take the branch to + // the recovery code but this part of the architecture is + // not implemented in hardware on some CPUs, such as Itanium. + // Thus, in general we need to emulate the behavior. + // IIM contains the relative target (not yet sign extended). + // So after sign extending it we simply add it to IIP. + // We also need to reset the EI field of the IPSR to zero, + // i.e., the slot to restart into. + // + // cr.imm contains zero_ext(imm21) + // + mov r18=cr.iim + ;; + mov r17=cr.iip + shl r18=r18,43 // put sign bit in position (43=64-21) + ;; + + mov r16=cr.ipsr + shr r18=r18,39 // sign extend (39=43-4) + ;; + + add r17=r17,r18 // now add the offset + ;; + mov cr.iip=r17 + dep r16=0,r16,41,2 // clear EI + ;; + + mov cr.ipsr=r16 + ;; + + rfi;; // and go back (must be last insn in group) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + FAULT(28) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) + FAULT(29) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) + rsm psr.dt // avoid nested faults due to TLB misses... + mov r16=cr.ipsr + mov r31=pr // prepare to save predicates + ;; + srlz.d // ensure everyone knows psr.dt is off + mov r19=30 // error vector for fault_handler (when kernel) + extr.u r16=r16,32,2 // extract psr.cpl + ;; + cmp.eq p6,p7=r0,r16 // if kernel cpl then fault else emulate +(p7) br.cond.sptk.many dispatch_unaligned_handler +(p6) br.cond.sptk.many dispatch_to_fault_handler + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) + FAULT(31) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) + FAULT(32) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) + FAULT(33) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Tranfer Trap (66) + FAULT(34) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) + FAULT(35) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) + FAULT(36) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Reserved + FAULT(37) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + FAULT(38) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + FAULT(39) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + FAULT(40) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + FAULT(41) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + FAULT(42) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + FAULT(43) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + FAULT(44) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) + FAULT(45) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) + FAULT(46) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) +#ifdef CONFIG_IA32_SUPPORT + rsm psr.dt + ;; + srlz.d + mov r31=pr + br.cond.sptk.many dispatch_to_ia32_handler +#else + FAULT(47) +#endif + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + FAULT(48) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + FAULT(49) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + FAULT(50) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + FAULT(51) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7000 Entry 52 (size 16 bundles) Reserved + FAULT(52) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + FAULT(53) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + FAULT(54) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + FAULT(55) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + FAULT(56) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + FAULT(57) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + FAULT(58) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + FAULT(59) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + FAULT(60) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + FAULT(61) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + FAULT(62) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + FAULT(63) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + FAULT(64) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + FAULT(65) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + FAULT(66) + + .align 256 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + FAULT(67) diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c new file mode 100644 index 000000000..153fb5684 --- /dev/null +++ b/arch/ia64/kernel/machvec.c @@ -0,0 +1,48 @@ +#include <linux/kernel.h> + +#include <asm/page.h> +#include <asm/machvec.h> + +struct ia64_machine_vector ia64_mv; + +void +machvec_noop (void) +{ +} + +/* + * Most platforms use this routine for mapping page frame addresses + * into a memory map index. + */ +unsigned long +map_nr_dense (unsigned long addr) +{ + return MAP_NR_DENSE(addr); +} + +static struct ia64_machine_vector * +lookup_machvec (const char *name) +{ + extern struct ia64_machine_vector machvec_start[]; + extern struct ia64_machine_vector machvec_end[]; + struct ia64_machine_vector *mv; + + for (mv = machvec_start; mv < machvec_end; ++mv) + if (strcmp (mv->name, name) == 0) + return mv; + + return 0; +} + +void +machvec_init (const char *name) +{ + struct ia64_machine_vector *mv; + + mv = lookup_machvec(name); + if (!mv) { + panic("generic kernel failed to find machine vector for platform %s!", name); + } + ia64_mv = *mv; + printk("booting generic kernel on platform %s\n", name); +} diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c new file mode 100644 index 000000000..320c56ebc --- /dev/null +++ b/arch/ia64/kernel/mca.c @@ -0,0 +1,842 @@ +/* + * File: mca.c + * Purpose: Generic MCA handling layer + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander(vijay@engr.sgi.com) + */ +#include <linux/types.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/sal.h> +#include <asm/mca.h> +#include <asm/spinlock.h> +#include <asm/irq.h> +#include <asm/machvec.h> + + +ia64_mc_info_t ia64_mc_info; +ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; +ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; +u64 ia64_mca_proc_state_dump[256]; +u64 ia64_mca_stack[1024]; +u64 ia64_mca_stackframe[32]; +u64 ia64_mca_bspstore[1024]; + +static void ia64_mca_cmc_vector_setup(int enable, + int_vector_t cmc_vector); +static void ia64_mca_wakeup_ipi_wait(void); +static void ia64_mca_wakeup(int cpu); +static void ia64_mca_wakeup_all(void); +static void ia64_log_init(int,int); +static void ia64_log_get(int,int, prfunc_t); +static void ia64_log_clear(int,int,int, prfunc_t); + +/* + * ia64_mca_cmc_vector_setup + * Setup the correctable machine check vector register in the processor + * Inputs + * Enable (1 - enable cmc interrupt , 0 - disable) + * CMC handler entry point (if enabled) + * + * Outputs + * None + */ +static void +ia64_mca_cmc_vector_setup(int enable, + int_vector_t cmc_vector) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = 0; + cmcv.cmcv_mask = enable; + cmcv.cmcv_vector = cmc_vector; + ia64_set_cmcv(cmcv.cmcv_regval); +} + + +#if defined(MCA_TEST) + +sal_log_processor_info_t slpi_buf; + +void +mca_test(void) +{ + slpi_buf.slpi_valid.slpi_psi = 1; + slpi_buf.slpi_valid.slpi_cache_check = 1; + slpi_buf.slpi_valid.slpi_tlb_check = 1; + slpi_buf.slpi_valid.slpi_bus_check = 1; + slpi_buf.slpi_valid.slpi_minstate = 1; + slpi_buf.slpi_valid.slpi_bank1_gr = 1; + slpi_buf.slpi_valid.slpi_br = 1; + slpi_buf.slpi_valid.slpi_cr = 1; + slpi_buf.slpi_valid.slpi_ar = 1; + slpi_buf.slpi_valid.slpi_rr = 1; + slpi_buf.slpi_valid.slpi_fr = 1; + + ia64_os_mca_dispatch(); +} + +#endif /* #if defined(MCA_TEST) */ + +/* + * mca_init + * Do all the mca specific initialization on a per-processor basis. + * + * 1. Register spinloop and wakeup request interrupt vectors + * + * 2. Register OS_MCA handler entry point + * + * 3. Register OS_INIT handler entry point + * + * 4. Initialize CMCV register to enable/disable CMC interrupt on the + * processor and hook a handler in the platform-specific mca_init. + * + * 5. Initialize MCA/CMC/INIT related log buffers maintained by the OS. + * + * Inputs + * None + * Outputs + * None + */ +void __init +mca_init(void) +{ + int i; + + MCA_DEBUG("mca_init : begin\n"); + /* Clear the Rendez checkin flag for all cpus */ + for(i = 0 ; i < IA64_MAXCPUS; i++) + ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + + /* NOTE : The actual irqs for the rendez, wakeup and + * cmc interrupts are requested in the platform-specific + * mca initialization code. + */ + /* + * Register the rendezvous spinloop and wakeup mechanism with SAL + */ + + /* Register the rendezvous interrupt vector with SAL */ + if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_RENDEZ_INT_VECTOR, + IA64_MCA_RENDEZ_TIMEOUT)) + return; + + /* Register the wakeup interrupt vector with SAL */ + if (ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_WAKEUP_INT_VECTOR, + 0)) + return; + + MCA_DEBUG("mca_init : registered mca rendezvous spinloop and wakeup mech.\n"); + /* + * Setup the correctable machine check vector + */ + ia64_mca_cmc_vector_setup(IA64_CMC_INT_ENABLE, + IA64_MCA_CMC_INT_VECTOR); + + MCA_DEBUG("mca_init : correctable mca vector setup done\n"); + + ia64_mc_info.imi_mca_handler = __pa(ia64_os_mca_dispatch); + ia64_mc_info.imi_mca_handler_size = + __pa(ia64_os_mca_dispatch_end) - __pa(ia64_os_mca_dispatch); + /* Register the os mca handler with SAL */ + if (ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, + ia64_mc_info.imi_mca_handler, + __pa(ia64_get_gp()), + ia64_mc_info.imi_mca_handler_size, + 0,0,0)) + + return; + + MCA_DEBUG("mca_init : registered os mca handler with SAL\n"); + + ia64_mc_info.imi_monarch_init_handler = __pa(ia64_monarch_init_handler); + ia64_mc_info.imi_monarch_init_handler_size = IA64_INIT_HANDLER_SIZE; + ia64_mc_info.imi_slave_init_handler = __pa(ia64_slave_init_handler); + ia64_mc_info.imi_slave_init_handler_size = IA64_INIT_HANDLER_SIZE; + /* Register the os init handler with SAL */ + if (ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, + ia64_mc_info.imi_monarch_init_handler, + __pa(ia64_get_gp()), + ia64_mc_info.imi_monarch_init_handler_size, + ia64_mc_info.imi_slave_init_handler, + __pa(ia64_get_gp()), + ia64_mc_info.imi_slave_init_handler_size)) + + + return; + + MCA_DEBUG("mca_init : registered os init handler with SAL\n"); + + /* Initialize the areas set aside by the OS to buffer the + * platform/processor error states for MCA/INIT/CMC + * handling. + */ + ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR); + ia64_log_init(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM); + ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PROCESSOR); + ia64_log_init(SAL_INFO_TYPE_INIT, SAL_SUB_INFO_TYPE_PLATFORM); + ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR); + ia64_log_init(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM); + + mca_init_platform(); + + MCA_DEBUG("mca_init : platform-specific mca handling setup done\n"); + +#if defined(MCA_TEST) + mca_test(); +#endif /* #if defined(MCA_TEST) */ + + printk("Mca related initialization done\n"); +} + +/* + * ia64_mca_wakeup_ipi_wait + * Wait for the inter-cpu interrupt to be sent by the + * monarch processor once it is done with handling the + * MCA. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_wakeup_ipi_wait(void) +{ + int irr_num = (IA64_MCA_WAKEUP_INT_VECTOR >> 6); + int irr_bit = (IA64_MCA_WAKEUP_INT_VECTOR & 0x3f); + u64 irr = 0; + + do { + switch(irr_num) { + case 0: + irr = ia64_get_irr0(); + break; + case 1: + irr = ia64_get_irr1(); + break; + case 2: + irr = ia64_get_irr2(); + break; + case 3: + irr = ia64_get_irr3(); + break; + } + } while (!(irr & (1 << irr_bit))) ; +} + +/* + * ia64_mca_wakeup + * Send an inter-cpu interrupt to wake-up a particular cpu + * and mark that cpu to be out of rendez. + * Inputs + * cpuid + * Outputs + * None + */ +void +ia64_mca_wakeup(int cpu) +{ + ipi_send(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT); + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + +} +/* + * ia64_mca_wakeup_all + * Wakeup all the cpus which have rendez'ed previously. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_wakeup_all(void) +{ + int cpu; + + /* Clear the Rendez checkin flag for all cpus */ + for(cpu = 0 ; cpu < IA64_MAXCPUS; cpu++) + if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) + ia64_mca_wakeup(cpu); + +} +/* + * ia64_mca_rendez_interrupt_handler + * This is handler used to put slave processors into spinloop + * while the monarch processor does the mca handling and later + * wake each slave up once the monarch is done. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) +{ + int flags; + /* Mask all interrupts */ + save_and_cli(flags); + + ia64_mc_info.imi_rendez_checkin[ia64_get_cpuid(0)] = IA64_MCA_RENDEZ_CHECKIN_DONE; + /* Register with the SAL monarch that the slave has + * reached SAL + */ + ia64_sal_mc_rendez(); + + /* Wait for the wakeup IPI from the monarch + * This waiting is done by polling on the wakeup-interrupt + * vector bit in the processor's IRRs + */ + ia64_mca_wakeup_ipi_wait(); + + /* Enable all interrupts */ + restore_flags(flags); + + +} + + +/* + * ia64_mca_wakeup_int_handler + * The interrupt handler for processing the inter-cpu interrupt to the + * slave cpu which was spinning in the rendez loop. + * Since this spinning is done by turning off the interrupts and + * polling on the wakeup-interrupt bit in the IRR, there is + * nothing useful to be done in the handler. + * Inputs + * wakeup_irq (Wakeup-interrupt bit) + * arg (Interrupt handler specific argument) + * ptregs (Exception frame at the time of the interrupt) + * Outputs + * + */ +void +ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs) +{ + +} + +/* + * ia64_return_to_sal_check + * This is function called before going back from the OS_MCA handler + * to the OS_MCA dispatch code which finally takes the control back + * to the SAL. + * The main purpose of this routine is to setup the OS_MCA to SAL + * return state which can be used by the OS_MCA dispatch code + * just before going back to SAL. + * Inputs + * None + * Outputs + * None + */ + +void +ia64_return_to_sal_check(void) +{ + /* Copy over some relevant stuff from the sal_to_os_mca_handoff + * so that it can be used at the time of os_mca_to_sal_handoff + */ + ia64_os_to_sal_handoff_state.imots_sal_gp = + ia64_sal_to_os_handoff_state.imsto_sal_gp; + + ia64_os_to_sal_handoff_state.imots_sal_check_ra = + ia64_sal_to_os_handoff_state.imsto_sal_check_ra; + + /* For now ignore the MCA */ + ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED; +} +/* + * ia64_mca_ucmc_handler + * This is uncorrectable machine check handler called from OS_MCA + * dispatch code which is in turn called from SAL_CHECK(). + * This is the place where the core of OS MCA handling is done. + * Right now the logs are extracted and displayed in a well-defined + * format. This handler code is supposed to be run only on the + * monarch processor. Once the monarch is done with MCA handling + * further MCA logging is enabled by clearing logs. + * Monarch also has the duty of sending wakeup-IPIs to pull the + * slave processors out of rendez. spinloop. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_ucmc_handler(void) +{ + + /* Get the MCA processor log */ + ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk); + /* Get the MCA platform log */ + ia64_log_get(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk); + + ia64_log_print(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk); + + /* + * Do some error handling - Platform-specific mca handler is called at this point + */ + + mca_handler_platform() ; + + /* Clear the SAL MCA logs */ + ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PROCESSOR, 1, printk); + ia64_log_clear(SAL_INFO_TYPE_MCA, SAL_SUB_INFO_TYPE_PLATFORM, 1, printk); + + /* Wakeup all the processors which are spinning in the rendezvous + * loop. + */ + ia64_mca_wakeup_all(); + ia64_return_to_sal_check(); +} + +/* + * SAL to OS entry point for INIT on the monarch processor + * This has been defined for registration purposes with SAL + * as a part of mca_init. + */ +void +ia64_monarch_init_handler() +{ +} +/* + * SAL to OS entry point for INIT on the slave processor + * This has been defined for registration purposes with SAL + * as a part of mca_init. + */ + +void +ia64_slave_init_handler() +{ +} +/* + * ia64_mca_cmc_int_handler + * This is correctable machine check interrupt handler. + * Right now the logs are extracted and displayed in a well-defined + * format. + * Inputs + * None + * Outputs + * None + */ +void +ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) +{ + /* Get the CMC processor log */ + ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk); + /* Get the CMC platform log */ + ia64_log_get(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM, (prfunc_t)printk); + + + ia64_log_print(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR, (prfunc_t)printk); + cmci_handler_platform(cmc_irq, arg, ptregs); + + /* Clear the CMC SAL logs now that they have been saved in the OS buffer */ + ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PROCESSOR); + ia64_sal_clear_state_info(SAL_INFO_TYPE_CMC, SAL_SUB_INFO_TYPE_PLATFORM); +} + +/* + * IA64_MCA log support + */ +#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ +#define IA64_MAX_LOG_TYPES 3 /* MCA, CMC, INIT */ +#define IA64_MAX_LOG_SUBTYPES 2 /* Processor, Platform */ + +typedef struct ia64_state_log_s { + spinlock_t isl_lock; + int isl_index; + sal_log_header_t isl_log[IA64_MAX_LOGS]; + +} ia64_state_log_t; + +static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES][IA64_MAX_LOG_SUBTYPES]; + +#define IA64_LOG_LOCK_INIT(it, sit) spin_lock_init(&ia64_state_log[it][sit].isl_lock) +#define IA64_LOG_LOCK(it, sit) spin_lock_irqsave(&ia64_state_log[it][sit].isl_lock, s) +#define IA64_LOG_UNLOCK(it, sit) spin_unlock_irqrestore(&ia64_state_log[it][sit].isl_lock,\ + s) +#define IA64_LOG_NEXT_INDEX(it, sit) ia64_state_log[it][sit].isl_index +#define IA64_LOG_CURR_INDEX(it, sit) 1 - ia64_state_log[it][sit].isl_index +#define IA64_LOG_INDEX_INC(it, sit) \ + ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index +#define IA64_LOG_INDEX_DEC(it, sit) \ + ia64_state_log[it][sit].isl_index = 1 - ia64_state_log[it][sit].isl_index +#define IA64_LOG_NEXT_BUFFER(it, sit) (void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_NEXT_INDEX(it,sit)])) +#define IA64_LOG_CURR_BUFFER(it, sit) (void *)(&(ia64_state_log[it][sit].isl_log[IA64_LOG_CURR_INDEX(it,sit)])) + +/* + * ia64_log_init + * Reset the OS ia64 log buffer + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC}) + * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM}) + * Outputs : None + */ +void +ia64_log_init(int sal_info_type, int sal_sub_info_type) +{ + IA64_LOG_LOCK_INIT(sal_info_type, sal_sub_info_type); + IA64_LOG_NEXT_INDEX(sal_info_type, sal_sub_info_type) = 0; + memset(IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type), 0, + sizeof(sal_log_header_t) * IA64_MAX_LOGS); +} + +/* + * ia64_log_get + * Get the current MCA log from SAL and copy it into the OS log buffer. + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC}) + * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM}) + * Outputs : None + * + */ +void +ia64_log_get(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc) +{ + sal_log_header_t *log_buffer; + int s; + + IA64_LOG_LOCK(sal_info_type, sal_sub_info_type); + + + /* Get the process state information */ + log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type, sal_sub_info_type); + + if (ia64_sal_get_state_info(sal_info_type, sal_sub_info_type ,(u64 *)log_buffer)) + prfunc("ia64_mca_log_get : Getting processor log failed\n"); + + IA64_LOG_INDEX_INC(sal_info_type, sal_sub_info_type); + + IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type); + +} + +/* + * ia64_log_clear + * Clear the current MCA log from SAL and dpending on the clear_os_buffer flags + * clear the OS log buffer also + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC}) + * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM}) + * clear_os_buffer + * prfunc (print function) + * Outputs : None + * + */ +void +ia64_log_clear(int sal_info_type, int sal_sub_info_type, int clear_os_buffer, prfunc_t prfunc) +{ + if (ia64_sal_clear_state_info(sal_info_type, sal_sub_info_type)) + prfunc("ia64_mca_log_get : Clearing processor log failed\n"); + + if (clear_os_buffer) { + sal_log_header_t *log_buffer; + int s; + + IA64_LOG_LOCK(sal_info_type, sal_sub_info_type); + + /* Get the process state information */ + log_buffer = IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type); + + memset(log_buffer, 0, sizeof(sal_log_header_t)); + + IA64_LOG_INDEX_DEC(sal_info_type, sal_sub_info_type); + + IA64_LOG_UNLOCK(sal_info_type, sal_sub_info_type); + } + +} + +/* + * ia64_log_processor_regs_print + * Print the contents of the saved processor register(s) in the format + * <reg_prefix>[<index>] <value> + * + * Inputs : regs (Register save buffer) + * reg_num (# of registers) + * reg_class (application/banked/control/bank1_general) + * reg_prefix (ar/br/cr/b1_gr) + * Outputs : None + * + */ +void +ia64_log_processor_regs_print(u64 *regs, + int reg_num, + char *reg_class, + char *reg_prefix, + prfunc_t prfunc) +{ + int i; + + prfunc("+%s Registers\n", reg_class); + for (i = 0; i < reg_num; i++) + prfunc("+ %s[%d] 0x%lx\n", reg_prefix, i, regs[i]); +} + +static char *pal_mesi_state[] = { + "Invalid", + "Shared", + "Exclusive", + "Modified", + "Reserved1", + "Reserved2", + "Reserved3", + "Reserved4" +}; + +static char *pal_cache_op[] = { + "Unknown", + "Move in", + "Cast out", + "Coherency check", + "Internal", + "Instruction fetch", + "Implicit Writeback", + "Reserved" +}; + +/* + * ia64_log_cache_check_info_print + * Display the machine check information related to cache error(s). + * Inputs : i (Multiple errors are logged, i - index of logged error) + * info (Machine check info logged by the PAL and later + * captured by the SAL) + * target_addr (Address which caused the cache error) + * Outputs : None + */ +void +ia64_log_cache_check_info_print(int i, + pal_cache_check_info_t info, + u64 target_addr, + prfunc_t prfunc) +{ + prfunc("+ Cache check info[%d]\n+", i); + prfunc(" Level: L%d",info.level); + if (info.mv) + prfunc(" ,Mesi: %s",pal_mesi_state[info.mesi]); + prfunc(" ,Index: %d,", info.index); + if (info.ic) + prfunc(" ,Cache: Instruction"); + if (info.dc) + prfunc(" ,Cache: Data"); + if (info.tl) + prfunc(" ,Line: Tag"); + if (info.dl) + prfunc(" ,Line: Data"); + prfunc(" ,Operation: %s,", pal_cache_op[info.op]); + if (info.wv) + prfunc(" ,Way: %d,", info.way); + if (info.tv) + prfunc(" ,Target Addr: 0x%lx", target_addr); + if (info.mc) + prfunc(" ,MC: Corrected"); + prfunc("\n"); +} + +/* + * ia64_log_tlb_check_info_print + * Display the machine check information related to tlb error(s). + * Inputs : i (Multiple errors are logged, i - index of logged error) + * info (Machine check info logged by the PAL and later + * captured by the SAL) + * Outputs : None + */ + +void +ia64_log_tlb_check_info_print(int i, + pal_tlb_check_info_t info, + prfunc_t prfunc) +{ + prfunc("+ TLB Check Info [%d]\n+", i); + if (info.itc) + prfunc(" Failure: Instruction Translation Cache"); + if (info.dtc) + prfunc(" Failure: Data Translation Cache"); + if (info.itr) { + prfunc(" Failure: Instruction Translation Register"); + prfunc(" ,Slot: %d", info.tr_slot); + } + if (info.dtr) { + prfunc(" Failure: Data Translation Register"); + prfunc(" ,Slot: %d", info.tr_slot); + } + if (info.mc) + prfunc(" ,MC: Corrected"); + prfunc("\n"); +} + +/* + * ia64_log_bus_check_info_print + * Display the machine check information related to bus error(s). + * Inputs : i (Multiple errors are logged, i - index of logged error) + * info (Machine check info logged by the PAL and later + * captured by the SAL) + * req_addr (Address of the requestor of the transaction) + * resp_addr (Address of the responder of the transaction) + * target_addr (Address where the data was to be delivered to or + * obtained from) + * Outputs : None + */ +void +ia64_log_bus_check_info_print(int i, + pal_bus_check_info_t info, + u64 req_addr, + u64 resp_addr, + u64 targ_addr, + prfunc_t prfunc) +{ + prfunc("+ BUS Check Info [%d]\n+", i); + prfunc(" Status Info: %d", info.bsi); + prfunc(" ,Severity: %d", info.sev); + prfunc(" ,Transaction Type: %d", info.type); + prfunc(" ,Transaction Size: %d", info.size); + if (info.cc) + prfunc(" ,Cache-cache-transfer"); + if (info.ib) + prfunc(" ,Error: Internal"); + if (info.eb) + prfunc(" ,Error: External"); + if (info.mc) + prfunc(" ,MC: Corrected"); + if (info.tv) + prfunc(" ,Target Address: 0x%lx", targ_addr); + if (info.rq) + prfunc(" ,Requestor Address: 0x%lx", req_addr); + if (info.tv) + prfunc(" ,Responder Address: 0x%lx", resp_addr); + prfunc("\n"); +} + +/* + * ia64_log_processor_info_print + * Display the processor-specific information logged by PAL as a part + * of MCA or INIT or CMC. + * Inputs : lh (Pointer of the sal log header which specifies the format + * of SAL state info as specified by the SAL spec). + * Outputs : None + */ +void +ia64_log_processor_info_print(sal_log_header_t *lh, prfunc_t prfunc) +{ + sal_log_processor_info_t *slpi; + int i; + + if (!lh) + return; + + if (lh->slh_log_type != SAL_SUB_INFO_TYPE_PROCESSOR) + return; + +#if defined(MCA_TEST) + slpi = &slpi_buf; +#else + slpi = (sal_log_processor_info_t *)lh->slh_log_dev_spec_info; +#endif /#if defined(MCA_TEST) */ + + if (!slpi) { + prfunc("No Processor Error Log found\n"); + return; + } + + /* Print branch register contents if valid */ + if (slpi->slpi_valid.slpi_br) + ia64_log_processor_regs_print(slpi->slpi_br, 8, "Branch", "br", prfunc); + + /* Print control register contents if valid */ + if (slpi->slpi_valid.slpi_cr) + ia64_log_processor_regs_print(slpi->slpi_cr, 128, "Control", "cr", prfunc); + + /* Print application register contents if valid */ + if (slpi->slpi_valid.slpi_ar) + ia64_log_processor_regs_print(slpi->slpi_br, 128, "Application", "ar", prfunc); + + /* Print region register contents if valid */ + if (slpi->slpi_valid.slpi_rr) + ia64_log_processor_regs_print(slpi->slpi_rr, 8, "Region", "rr", prfunc); + + /* Print floating-point register contents if valid */ + if (slpi->slpi_valid.slpi_fr) + ia64_log_processor_regs_print(slpi->slpi_fr, 128, "Floating-point", "fr", + prfunc); + + /* Print bank1-gr NAT register contents if valid */ + ia64_log_processor_regs_print(&slpi->slpi_bank1_nat_bits, 1, "NAT", "nat", prfunc); + + /* Print bank 1 register contents if valid */ + if (slpi->slpi_valid.slpi_bank1_gr) + ia64_log_processor_regs_print(slpi->slpi_bank1_gr, 16, "Bank1-General", "gr", + prfunc); + + /* Print the cache check information if any*/ + for (i = 0 ; i < MAX_CACHE_ERRORS; i++) + ia64_log_cache_check_info_print(i, + slpi->slpi_cache_check_info[i].slpi_cache_check, + slpi->slpi_cache_check_info[i].slpi_target_address, + prfunc); + /* Print the tlb check information if any*/ + for (i = 0 ; i < MAX_TLB_ERRORS; i++) + ia64_log_tlb_check_info_print(i,slpi->slpi_tlb_check_info[i], prfunc); + + /* Print the bus check information if any*/ + for (i = 0 ; i < MAX_BUS_ERRORS; i++) + ia64_log_bus_check_info_print(i, + slpi->slpi_bus_check_info[i].slpi_bus_check, + slpi->slpi_bus_check_info[i].slpi_requestor_addr, + slpi->slpi_bus_check_info[i].slpi_responder_addr, + slpi->slpi_bus_check_info[i].slpi_target_addr, + prfunc); + +} + +/* + * ia64_log_print + * Display the contents of the OS error log information + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC}) + * sub_info_type (SAL_SUB_INFO_TYPE_{PROCESSOR,PLATFORM}) + * Outputs : None + */ +void +ia64_log_print(int sal_info_type, int sal_sub_info_type, prfunc_t prfunc) +{ + char *info_type, *sub_info_type; + + switch(sal_info_type) { + case SAL_INFO_TYPE_MCA: + info_type = "MCA"; + break; + case SAL_INFO_TYPE_INIT: + info_type = "INIT"; + break; + case SAL_INFO_TYPE_CMC: + info_type = "CMC"; + break; + default: + info_type = "UNKNOWN"; + break; + } + + switch(sal_sub_info_type) { + case SAL_SUB_INFO_TYPE_PROCESSOR: + sub_info_type = "PROCESSOR"; + break; + case SAL_SUB_INFO_TYPE_PLATFORM: + sub_info_type = "PLATFORM"; + break; + default: + sub_info_type = "UNKNOWN"; + break; + } + + prfunc("+BEGIN HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type); + if (sal_sub_info_type == SAL_SUB_INFO_TYPE_PROCESSOR) + ia64_log_processor_info_print( + IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type), + prfunc); + else + log_print_platform(IA64_LOG_CURR_BUFFER(sal_info_type, sal_sub_info_type),prfunc); + prfunc("+END HARDWARE ERROR STATE [%s %s]\n", info_type, sub_info_type); +} diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S new file mode 100644 index 000000000..3d49ac06e --- /dev/null +++ b/arch/ia64/kernel/mca_asm.S @@ -0,0 +1,621 @@ +#include <asm/processor.h> +#include <asm/mcaasm.h> +#include <asm/page.h> +#include <asm/mca.h> + + .psr abi64 + .psr lsb + .lsb + +/* + * SAL_TO_OS_MCA_HANDOFF_STATE + * 1. GR1 = OS GP + * 2. GR8 = PAL_PROC physical address + * 3. GR9 = SAL_PROC physical address + * 4. GR10 = SAL GP (physical) + * 5. GR11 = Rendez state + * 6. GR12 = Return address to location within SAL_CHECK + */ +#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp) \ + movl _tmp=ia64_sal_to_os_handoff_state;; \ + st8 [_tmp]=r1,0x08;; \ + st8 [_tmp]=r8,0x08;; \ + st8 [_tmp]=r9,0x08;; \ + st8 [_tmp]=r10,0x08;; \ + st8 [_tmp]=r11,0x08;; \ + st8 [_tmp]=r12,0x08;; + +/* + * OS_MCA_TO_SAL_HANDOFF_STATE + * 1. GR8 = OS_MCA status + * 2. GR9 = SAL GP (physical) + * 3. GR22 = New min state save area pointer + */ +#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \ + movl _tmp=ia64_os_to_sal_handoff_state;; \ + DATA_VA_TO_PA(_tmp);; \ + ld8 r8=[_tmp],0x08;; \ + ld8 r9=[_tmp],0x08;; \ + ld8 r22=[_tmp],0x08;; + +/* + * BRANCH + * Jump to the instruction referenced by + * "to_label". + * Branch is taken only if the predicate + * register "p" is true. + * "ip" is the address of the instruction + * located at "from_label". + * "temp" is a scratch register like r2 + * "adjust" needed for HP compiler. + * A screwup somewhere with constant arithmetic. + */ +#define BRANCH(to_label, temp, p, adjust) \ +100: (p) mov temp=ip; \ + ;; \ + (p) adds temp=to_label-100b,temp;\ + (p) adds temp=adjust,temp; \ + (p) mov b1=temp ; \ + (p) br b1 + + .global ia64_os_mca_dispatch + .global ia64_os_mca_dispatch_end + .global ia64_sal_to_os_handoff_state + .global ia64_os_to_sal_handoff_state + .global ia64_os_mca_ucmc_handler + .global ia64_mca_proc_state_dump + .global ia64_mca_proc_state_restore + .global ia64_mca_stack + .global ia64_mca_stackframe + .global ia64_mca_bspstore + + .text + .align 16 + +ia64_os_mca_dispatch: + +#if defined(MCA_TEST) + // Pretend that we are in interrupt context + mov r2=psr + dep r2=0, r2, PSR_IC, 2; + mov psr.l = r2 +#endif /* #if defined(MCA_TEST) */ + + // Save the SAL to OS MCA handoff state as defined + // by SAL SPEC 2.5 + // NOTE : The order in which the state gets saved + // is dependent on the way the C-structure + // for ia64_mca_sal_to_os_state_t has been + // defined in include/asm/mca.h + SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) + + // LOG PROCESSOR STATE INFO FROM HERE ON.. + ;; +begin_os_mca_dump: + BRANCH(ia64_os_mca_proc_state_dump, r2, p0, 0x0) + ;; +ia64_os_mca_done_dump: + + // Setup new stack frame for OS_MCA handling + movl r2=ia64_mca_bspstore // local bspstore area location in r2 + movl r3=ia64_mca_stackframe // save stack frame to memory in r3 + rse_switch_context(r6,r3,r2);; // RSC management in this new context + movl r12=ia64_mca_stack;; + + // Enter virtual mode from physical mode + VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4) +ia64_os_mca_virtual_begin: + + // call our handler + movl r2=ia64_mca_ucmc_handler;; + mov b6=r2;; + br.call.sptk.few b0=b6 + ;; + + // Revert back to physical mode before going back to SAL + PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4) +ia64_os_mca_virtual_end: + +#if defined(MCA_TEST) + // Pretend that we are in interrupt context + mov r2=psr + dep r2=0, r2, PSR_IC, 2; + mov psr.l = r2 +#endif /* #if defined(MCA_TEST) */ + + // restore the original stack frame here + movl r2=ia64_mca_stackframe // restore stack frame from memory at r2 + ;; + DATA_VA_TO_PA(r2) + movl r4=IA64_PSR_MC + ;; + rse_return_context(r4,r3,r2) // switch from interrupt context for RSE + + // let us restore all the registers from our PSI structure + mov r8=gp + ;; +begin_os_mca_restore: + BRANCH(ia64_os_mca_proc_state_restore, r2, p0, 0x0) + ;; + +ia64_os_mca_done_restore: + ;; +#ifdef SOFTSDV + VIRTUAL_MODE_ENTER(r2,r3, vmode_enter, r4) +vmode_enter: + br.ret.sptk.few b0 +#else + // branch back to SALE_CHECK + OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2) + ld8 r3=[r2];; + mov b0=r3 // SAL_CHECK return address + br b0 + ;; +#endif /* #ifdef SOFTSDV */ +ia64_os_mca_dispatch_end: +//EndMain////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_os_mca_proc_state_dump() +// +// Stub Description: +// +// This stub dumps the processor state during MCHK to a data area +// +//-- + +ia64_os_mca_proc_state_dump: +// Get and save GR0-31 from Proc. Min. State Save Area to SAL PSI + movl r2=ia64_mca_proc_state_dump;; // Os state dump area + +// save ar.NaT + mov r5=ar.unat // ar.unat + +// save banked GRs 16-31 along with NaT bits + bsw.1;; + st8.spill [r2]=r16,8;; + st8.spill [r2]=r17,8;; + st8.spill [r2]=r18,8;; + st8.spill [r2]=r19,8;; + st8.spill [r2]=r20,8;; + st8.spill [r2]=r21,8;; + st8.spill [r2]=r22,8;; + st8.spill [r2]=r23,8;; + st8.spill [r2]=r24,8;; + st8.spill [r2]=r25,8;; + st8.spill [r2]=r26,8;; + st8.spill [r2]=r27,8;; + st8.spill [r2]=r28,8;; + st8.spill [r2]=r29,8;; + st8.spill [r2]=r30,8;; + st8.spill [r2]=r31,8;; + + mov r4=ar.unat;; + st8 [r2]=r4,8 // save User NaT bits for r16-r31 + mov ar.unat=r5 // restore original unat + bsw.0;; + +//save BRs + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r4 + + mov r3=b0 + mov r5=b1 + mov r7=b2;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=b3 + mov r5=b4 + mov r7=b5;; + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=b6 + mov r5=b7;; + st8 [r2]=r3,2*8 + st8 [r4]=r5,2*8;; + +cSaveCRs: +// save CRs + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r4 + + mov r3=cr0 // cr.dcr + mov r5=cr1 // cr.itm + mov r7=cr2;; // cr.iva + + st8 [r2]=r3,8*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; // 48 byte rements + + mov r3=cr8;; // cr.pta + st8 [r2]=r3,8*8;; // 64 byte rements + +// if PSR.ic=0, reading interruption registers causes an illegal operation fault + mov r3=psr;; + tbit.nz.unc p2,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test +(p2) st8 [r2]=r0,9*8+160 // increment by 168 byte inc. +begin_skip_intr_regs: + BRANCH(SkipIntrRegs, r9, p2, 0x0) + ;; + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r6 + + mov r3=cr16 // cr.ipsr + mov r5=cr17 // cr.isr + mov r7=r0;; // cr.ida => cr18 + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr19 // cr.iip + mov r5=cr20 // cr.idtr + mov r7=cr21;; // cr.iitr + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr22 // cr.iipa + mov r5=cr23 // cr.ifs + mov r7=cr24;; // cr.iim + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=cr25;; // cr.iha + st8 [r2]=r3,160;; // 160 byte rement + +SkipIntrRegs: + st8 [r2]=r0,168 // another 168 byte . + + mov r3=cr66;; // cr.lid + st8 [r2]=r3,40 // 40 byte rement + + mov r3=cr71;; // cr.ivr + st8 [r2]=r3,8 + + mov r3=cr72;; // cr.tpr + st8 [r2]=r3,24 // 24 byte increment + + mov r3=r0;; // cr.eoi => cr75 + st8 [r2]=r3,168 // 168 byte inc. + + mov r3=r0;; // cr.irr0 => cr96 + st8 [r2]=r3,16 // 16 byte inc. + + mov r3=r0;; // cr.irr1 => cr98 + st8 [r2]=r3,16 // 16 byte inc. + + mov r3=r0;; // cr.irr2 => cr100 + st8 [r2]=r3,16 // 16 byte inc + + mov r3=r0;; // cr.irr3 => cr100 + st8 [r2]=r3,16 // 16b inc. + + mov r3=r0;; // cr.itv => cr114 + st8 [r2]=r3,16 // 16 byte inc. + + mov r3=r0;; // cr.pmv => cr116 + st8 [r2]=r3,8 + + mov r3=r0;; // cr.lrr0 => cr117 + st8 [r2]=r3,8 + + mov r3=r0;; // cr.lrr1 => cr118 + st8 [r2]=r3,8 + + mov r3=r0;; // cr.cmcv => cr119 + st8 [r2]=r3,8*10;; + +cSaveARs: +// save ARs + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2 // duplicate r2 in r6 + + mov r3=ar0 // ar.kro + mov r5=ar1 // ar.kr1 + mov r7=ar2;; // ar.kr2 + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=ar3 // ar.kr3 + mov r5=ar4 // ar.kr4 + mov r7=ar5;; // ar.kr5 + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=ar6 // ar.kr6 + mov r5=ar7 // ar.kr7 + mov r7=r0;; // ar.kr8 + st8 [r2]=r3,10*8 + st8 [r4]=r5,10*8 + st8 [r6]=r7,10*8;; // rement by 72 bytes + + mov r3=ar16 // ar.rsc + mov ar16=r0 // put RSE in enforced lazy mode + mov r5=ar17 // ar.bsp + mov r7=ar18;; // ar.bspstore + st8 [r2]=r3,3*8 + st8 [r4]=r5,3*8 + st8 [r6]=r7,3*8;; + + mov r3=ar19;; // ar.rnat + st8 [r2]=r3,8*13 // increment by 13x8 bytes + + mov r3=ar32;; // ar.ccv + st8 [r2]=r3,8*4 + + mov r3=ar36;; // ar.unat + st8 [r2]=r3,8*4 + + mov r3=ar40;; // ar.fpsr + st8 [r2]=r3,8*4 + + mov r3=ar44;; // ar.itc + st8 [r2]=r3,160 // 160 + + mov r3=ar64;; // ar.pfs + st8 [r2]=r3,8 + + mov r3=ar65;; // ar.lc + st8 [r2]=r3,8 + + mov r3=ar66;; // ar.ec + st8 [r2]=r3 + add r2=8*62,r2 //padding + +// save RRs + mov ar.lc=0x08-1 + movl r4=0x00;; + +cStRR: + mov r3=rr[r4];; + st8 [r2]=r3,8 + add r4=1,r4 + br.cloop.sptk.few cStRR + ;; +end_os_mca_dump: + BRANCH(ia64_os_mca_done_dump, r2, p0, -0x10) + ;; + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_os_mca_proc_state_restore() +// +// Stub Description: +// +// This is a stub to restore the saved processor state during MCHK +// +//-- + +ia64_os_mca_proc_state_restore: + +// Restore bank1 GR16-31 + movl r2=ia64_mca_proc_state_dump // Convert virtual address + ;; // of OS state dump area + DATA_VA_TO_PA(r2) // to physical address + ;; +restore_GRs: // restore bank-1 GRs 16-31 + bsw.1;; + add r3=16*8,r2;; // to get to NaT of GR 16-31 + ld8 r3=[r3];; + mov ar.unat=r3;; // first restore NaT + + ld8.fill r16=[r2],8;; + ld8.fill r17=[r2],8;; + ld8.fill r18=[r2],8;; + ld8.fill r19=[r2],8;; + ld8.fill r20=[r2],8;; + ld8.fill r21=[r2],8;; + ld8.fill r22=[r2],8;; + ld8.fill r23=[r2],8;; + ld8.fill r24=[r2],8;; + ld8.fill r25=[r2],8;; + ld8.fill r26=[r2],8;; + ld8.fill r27=[r2],8;; + ld8.fill r28=[r2],8;; + ld8.fill r29=[r2],8;; + ld8.fill r30=[r2],8;; + ld8.fill r31=[r2],8;; + + ld8 r3=[r2],8;; // increment to skip NaT + bsw.0;; + +restore_BRs: + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov b0=r3 + mov b1=r5 + mov b2=r7;; + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov b3=r3 + mov b4=r5 + mov b5=r7;; + + ld8 r3=[r2],2*8 + ld8 r5=[r4],2*8;; + mov b6=r3 + mov b7=r5;; + +restore_CRs: + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],8*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; // 48 byte increments + mov cr0=r3 // cr.dcr + mov cr1=r5 // cr.itm + mov cr2=r7;; // cr.iva + + ld8 r3=[r2],8*8;; // 64 byte increments +// mov cr8=r3 // cr.pta + + +// if PSR.ic=1, reading interruption registers causes an illegal operation fault + mov r3=psr;; + tbit.nz.unc p2,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test +(p2) st8 [r2]=r0,9*8+160 // increment by 160 byte inc. + +begin_rskip_intr_regs: + BRANCH(rSkipIntrRegs, r9, p2, 0x0) + ;; + + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov cr16=r3 // cr.ipsr + mov cr17=r5 // cr.isr is read only +// mov cr18=r7;; // cr.ida + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov cr19=r3 // cr.iip + mov cr20=r5 // cr.idtr + mov cr21=r7;; // cr.iitr + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov cr22=r3 // cr.iipa + mov cr23=r5 // cr.ifs + mov cr24=r7 // cr.iim + + ld8 r3=[r2],160;; // 160 byte increment + mov cr25=r3 // cr.iha + +rSkipIntrRegs: + ld8 r3=[r2],168;; // another 168 byte inc. + + ld8 r3=[r2],40;; // 40 byte increment + mov cr66=r3 // cr.lid + + ld8 r3=[r2],8;; +// mov cr71=r3 // cr.ivr is read only + ld8 r3=[r2],24;; // 24 byte increment + mov cr72=r3 // cr.tpr + + ld8 r3=[r2],168;; // 168 byte inc. +// mov cr75=r3 // cr.eoi + + ld8 r3=[r2],16;; // 16 byte inc. +// mov cr96=r3 // cr.irr0 is read only + + ld8 r3=[r2],16;; // 16 byte inc. +// mov cr98=r3 // cr.irr1 is read only + + ld8 r3=[r2],16;; // 16 byte inc +// mov cr100=r3 // cr.irr2 is read only + + ld8 r3=[r2],16;; // 16b inc. +// mov cr102=r3 // cr.irr3 is read only + + ld8 r3=[r2],16;; // 16 byte inc. +// mov cr114=r3 // cr.itv + + ld8 r3=[r2],8;; +// mov cr116=r3 // cr.pmv + ld8 r3=[r2],8;; +// mov cr117=r3 // cr.lrr0 + ld8 r3=[r2],8;; +// mov cr118=r3 // cr.lrr1 + ld8 r3=[r2],8*10;; +// mov cr119=r3 // cr.cmcv + +restore_ARs: + add r4=8,r2 // duplicate r2 in r4 + add r6=2*8,r2;; // duplicate r2 in r4 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov ar0=r3 // ar.kro + mov ar1=r5 // ar.kr1 + mov ar2=r7;; // ar.kr2 + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; + mov ar3=r3 // ar.kr3 + mov ar4=r5 // ar.kr4 + mov ar5=r7;; // ar.kr5 + + ld8 r3=[r2],10*8 + ld8 r5=[r4],10*8 + ld8 r7=[r6],10*8;; + mov ar6=r3 // ar.kr6 + mov ar7=r5 // ar.kr7 +// mov ar8=r6 // ar.kr8 + ;; + + ld8 r3=[r2],3*8 + ld8 r5=[r4],3*8 + ld8 r7=[r6],3*8;; +// mov ar16=r3 // ar.rsc +// mov ar17=r5 // ar.bsp is read only + mov ar16=r0 // make sure that RSE is in enforced lazy mode + mov ar18=r7;; // ar.bspstore + + ld8 r9=[r2],8*13;; + mov ar19=r9 // ar.rnat + + mov ar16=r3 // ar.rsc + ld8 r3=[r2],8*4;; + mov ar32=r3 // ar.ccv + + ld8 r3=[r2],8*4;; + mov ar36=r3 // ar.unat + + ld8 r3=[r2],8*4;; + mov ar40=r3 // ar.fpsr + + ld8 r3=[r2],160;; // 160 +// mov ar44=r3 // ar.itc + + ld8 r3=[r2],8;; + mov ar64=r3 // ar.pfs + + ld8 r3=[r2],8;; + mov ar65=r3 // ar.lc + + ld8 r3=[r2];; + mov ar66=r3 // ar.ec + add r2=8*62,r2;; // padding + +restore_RRs: + mov r5=ar.lc + mov ar.lc=0x08-1 + movl r4=0x00 +cStRRr: + ld8 r3=[r2],8;; +// mov rr[r4]=r3 // what are its access previledges? + add r4=1,r4 + br.cloop.sptk.few cStRRr + ;; + mov ar.lc=r5 + ;; +end_os_mca_restore: + BRANCH(ia64_os_mca_done_restore, r2, p0, -0x20) + ;; +//EndStub////////////////////////////////////////////////////////////////////// diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S new file mode 100644 index 000000000..1506bacc2 --- /dev/null +++ b/arch/ia64/kernel/pal.S @@ -0,0 +1,119 @@ +/* + * PAL Firmware support + * IA-64 Processor Programmers Reference Vol 2 + * + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 David Mosberger <davidm@hpl.hp.com> + */ + + .text + .psr abi64 + .psr lsb + .lsb + + .data +pal_entry_point: + data8 ia64_pal_default_handler + .text + +/* + * Set the PAL entry point address. This could be written in C code, but we do it here + * to keep it all in one module (besides, it's so trivial that it's + * not a big deal). + * + * in0 Address of the PAL entry point (text address, NOT a function descriptor). + */ + .align 16 + .global ia64_pal_handler_init + .proc ia64_pal_handler_init +ia64_pal_handler_init: + alloc r3=ar.pfs,1,0,0,0 + movl r2=pal_entry_point + ;; + st8 [r2]=in0 + br.ret.sptk.few rp + + .endp ia64_pal_handler_init + +/* + * Default PAL call handler. This needs to be coded in assembly because it uses + * the static calling convention, i.e., the RSE may not be used and calls are + * done via "br.cond" (not "br.call"). + */ + .align 16 + .global ia64_pal_default_handler + .proc ia64_pal_default_handler +ia64_pal_default_handler: + mov r8=-1 + br.cond.sptk.few rp + +/* + * Make a PAL call using the static calling convention. + * + * in0 Pointer to struct ia64_pal_retval + * in1 Index of PAL service + * in2 - in4 Remaning PAL arguments + * + */ + +#ifdef __GCC_MULTIREG_RETVALS__ +# define arg0 in0 +# define arg1 in1 +# define arg2 in2 +# define arg3 in3 +# define arg4 in4 +#else +# define arg0 in1 +# define arg1 in2 +# define arg2 in3 +# define arg3 in4 +# define arg4 in5 +#endif + + .text + .psr abi64 + .psr lsb + .lsb + + .align 16 + .global ia64_pal_call_static + .proc ia64_pal_call_static +ia64_pal_call_static: + alloc loc0 = ar.pfs,6,90,0,0 + movl loc2 = pal_entry_point +1: { + mov r28 = arg0 + mov r29 = arg1 + mov r8 = ip + } + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov r30 = arg2 + mov r31 = arg3 + ;; + mov loc3 = psr + mov loc1 = rp + adds r8 = .ret0-1b,r8 + ;; + rsm psr.i + mov b7 = loc2 + mov rp = r8 + ;; + br.cond.sptk.few b7 +.ret0: mov psr.l = loc3 +#ifndef __GCC_MULTIREG_RETVALS__ + st8 [in0] = r8, 8 + ;; + st8 [in0] = r9, 8 + ;; + st8 [in0] = r10, 8 + ;; + st8 [in0] = r11, 8 +#endif + mov ar.pfs = loc0 + mov rp = loc1 + ;; + srlz.d // seralize restoration of psr.l + br.ret.sptk.few b0 + .endp ia64_pal_call_static diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c new file mode 100644 index 000000000..f86f45537 --- /dev/null +++ b/arch/ia64/kernel/pci-dma.c @@ -0,0 +1,56 @@ +/* + * Dynamic DMA mapping support. + * + * This implementation is for IA-64 platforms that do not support + * I/O TLBs (aka DMA address translation hardware). + * + * XXX This doesn't do the right thing yet. It appears we would have + * to add additional zones so we can implement the various address + * mask constraints that we might encounter. A zone for memory < 32 + * bits is obviously necessary... + */ + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/pci.h> + +#include <asm/io.h> + +/* Pure 2^n version of get_order */ +extern __inline__ unsigned long +get_order (unsigned long size) +{ + unsigned long order = ia64_fls(size); + + printk ("get_order: size=%lu, order=%lu\n", size, order); + + if (order > PAGE_SHIFT) + order -= PAGE_SHIFT; + else + order = 0; + return order; +} + +void * +pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle) +{ + void *ret; + int gfp = GFP_ATOMIC; + + if (!hwdev || hwdev->dma_mask != 0xffffffff) + gfp |= GFP_DMA; + ret = (void *)__get_free_pages(gfp, get_order(size)); + + if (ret) { + memset(ret, 0, size); + *dma_handle = virt_to_bus(ret); + } + return ret; +} + +void +pci_free_consistent (struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) +{ + free_pages((unsigned long) vaddr, get_order(size)); +} diff --git a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c new file mode 100644 index 000000000..3bceeed8e --- /dev/null +++ b/arch/ia64/kernel/pci.c @@ -0,0 +1,239 @@ +/* + * pci.c - Low-Level PCI Access in IA64 + * + * Derived from bios32.c of i386 tree. + * + */ + +#include <linux/config.h> + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/malloc.h> +#include <linux/smp_lock.h> +#include <linux/spinlock.h> + +#include <asm/machvec.h> +#include <asm/page.h> +#include <asm/segment.h> +#include <asm/system.h> +#include <asm/io.h> + +#include <asm/sal.h> + + +#ifdef CONFIG_SMP +# include <asm/smp.h> +#endif +#include <asm/irq.h> + + +#undef DEBUG +#define DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +/* + * This interrupt-safe spinlock protects all accesses to PCI + * configuration space. + */ + +spinlock_t pci_lock = SPIN_LOCK_UNLOCKED; + +struct pci_fixup pcibios_fixups[] = { { 0 } }; + +#define PCI_NO_CHECKS 0x400 +#define PCI_NO_PEER_FIXUP 0x800 + +static unsigned int pci_probe = PCI_NO_CHECKS; + +/* Macro to build a PCI configuration address to be passed as a parameter to SAL. */ + +#define PCI_CONFIG_ADDRESS(dev, where) (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff)) + +static int +pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value) +{ + s64 status; + u64 lval; + + status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 1, &lval); + *value = lval; + return status; +} + +static int +pci_conf_read_config_word(struct pci_dev *dev, int where, u16 *value) +{ + s64 status; + u64 lval; + + status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 2, &lval); + *value = lval; + return status; +} + +static int +pci_conf_read_config_dword(struct pci_dev *dev, int where, u32 *value) +{ + s64 status; + u64 lval; + + status = ia64_sal_pci_config_read(PCI_CONFIG_ADDRESS(dev, where), 4, &lval); + *value = lval; + return status; +} + +static int +pci_conf_write_config_byte (struct pci_dev *dev, int where, u8 value) +{ + return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 1, value); +} + +static int +pci_conf_write_config_word (struct pci_dev *dev, int where, u16 value) +{ + return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 2, value); +} + +static int +pci_conf_write_config_dword (struct pci_dev *dev, int where, u32 value) +{ + return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 4, value); +} + + +static struct pci_ops pci_conf = { + pci_conf_read_config_byte, + pci_conf_read_config_word, + pci_conf_read_config_dword, + pci_conf_write_config_byte, + pci_conf_write_config_word, + pci_conf_write_config_dword +}; + +/* + * Try to find PCI BIOS. This will always work for IA64. + */ + +static struct pci_ops * __init +pci_find_bios(void) +{ + return &pci_conf; +} + +/* + * Initialization. Uses the SAL interface + */ + +#define PCI_BUSSES_TO_SCAN 2 /* On "real" ;) hardware this will be 255 */ + +void __init +pcibios_init(void) +{ + struct pci_ops *ops = NULL; + int i; + + if ((ops = pci_find_bios()) == NULL) { + printk("PCI: No PCI bus detected\n"); + return; + } + + printk("PCI: Probing PCI hardware\n"); + for (i = 0; i < PCI_BUSSES_TO_SCAN; i++) + pci_scan_bus(i, ops, NULL); + platform_pci_fixup(); + return; +} + +/* + * Called after each bus is probed, but before its children + * are examined. + */ + +void __init +pcibios_fixup_bus(struct pci_bus *b) +{ + return; +} + +int +pci_assign_resource (struct pci_dev *dev, int i) +{ + printk("pci_assign_resource: not implemented!\n"); + return -ENODEV; +} + +void __init +pcibios_update_resource(struct pci_dev *dev, struct resource *root, + struct resource *res, int resource) +{ + unsigned long where, size; + u32 reg; + + where = PCI_BASE_ADDRESS_0 + (resource * 4); + size = res->end - res->start; + pci_read_config_dword(dev, where, ®); + reg = (reg & size) | (((u32)(res->start - root->start)) & ~size); + pci_write_config_dword(dev, where, reg); + + /* ??? FIXME -- record old value for shutdown. */ +} + +void __init +pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); + + /* ??? FIXME -- record old value for shutdown. */ +} + +void __init +pcibios_fixup_pbus_ranges (struct pci_bus * bus, struct pbus_set_ranges_data * ranges) +{ + ranges->io_start -= bus->resource[0]->start; + ranges->io_end -= bus->resource[0]->start; + ranges->mem_start -= bus->resource[1]->start; + ranges->mem_end -= bus->resource[1]->start; +} + +int __init +pcibios_enable_device (struct pci_dev *dev) +{ + /* Not needed, since we enable all devices at startup. */ + return 0; +} + +/* + * PCI BIOS setup, always defaults to SAL interface + */ + +char * __init +pcibios_setup(char *str) +{ + pci_probe = PCI_NO_CHECKS; + return NULL; +} + +void +pcibios_align_resource (void *data, struct resource *res, unsigned long size) +{ +} + +#if 0 /*def CONFIG_PROC_FS*/ +/* + * This is an ugly hack to get a (weak) unresolved reference to something that is + * in drivers/pci/proc.c. Without this, the file does not get linked in at all + * (I suspect the reason this isn't needed on Linux/x86 is that most people compile + * with module support, in which case the EXPORT_SYMBOL() stuff will ensure the + * code gets linked in. Sigh... --davidm 99/12/20. + */ +asm ("data8 proc_bus_pci_add"); +#endif diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c new file mode 100644 index 000000000..274b68a73 --- /dev/null +++ b/arch/ia64/kernel/perfmon.c @@ -0,0 +1,227 @@ +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> + +#include <asm/errno.h> +#include <asm/irq.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/uaccess.h> + +#ifdef CONFIG_PERFMON + +#define MAX_PERF_COUNTER 4 /* true for Itanium, at least */ +#define WRITE_PMCS_AND_START 0xa0 +#define WRITE_PMCS 0xa1 +#define READ_PMDS 0xa2 +#define STOP_PMCS 0xa3 +#define IA64_COUNTER_MASK 0xffffffffffffff6f +#define PERF_OVFL_VAL 0xffffffff + +struct perfmon_counter { + unsigned long data; + int counter_num; +}; + +unsigned long pmds[MAX_PERF_COUNTER]; +struct task_struct *perf_owner; + +/* + * We set dcr.pp, psr.pp, and the appropriate pmc control values with + * this. Notice that we go about modifying _each_ task's pt_regs to + * set cr_ipsr.pp. This will start counting when "current" does an + * _rfi_. Also, since each task's cr_ipsr.pp, and cr_ipsr is inherited + * across forks, we do _not_ need additional code on context + * switches. On stopping of the counters we dont _need_ to go about + * changing every task's cr_ipsr back to where it wuz, because we can + * just set pmc[0]=1. But we do it anyways becuase we will probably + * add thread specific accounting later. + * + * The obvious problem with this is that on SMP systems, it is a bit + * of work (when someone wants to do it) - it would be easier if we + * just added code to the context-switch path. I think we would need + * to lock the run queue to ensure no context switches, send an IPI to + * each processor, and in that IPI handler, just modify the psr bit of + * only the _current_ thread, since we have modified the psr bit + * correctly in the kernel stack for every process which is not + * running. Might crash on SMP systems without the + * lock_kernel(). Hence the lock.. + */ +asmlinkage unsigned long +sys_perfmonctl (int cmd1, int cmd2, void *ptr) +{ + struct perfmon_counter tmp, *cptr = ptr; + unsigned long pmd, cnum, dcr, flags; + struct task_struct *p; + struct pt_regs *regs; + struct perf_counter; + int i; + + switch (cmd1) { + case WRITE_PMCS: /* Writes to PMC's and clears PMDs */ + case WRITE_PMCS_AND_START: /* Also starts counting */ + + if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2)) + return -EFAULT; + + if (cmd2 >= MAX_PERF_COUNTER) + return -EFAULT; + + if (perf_owner && perf_owner != current) + return -EBUSY; + perf_owner = current; + + for (i = 0; i < cmd2; i++, cptr++) { + copy_from_user(&tmp, cptr, sizeof(tmp)); + /* XXX need to check validity of counter_num and perhaps data!! */ + ia64_set_pmc(tmp.counter_num, tmp.data); + ia64_set_pmd(tmp.counter_num, 0); + pmds[tmp.counter_num - 4] = 0; + } + + if (cmd1 == WRITE_PMCS_AND_START) { + local_irq_save(flags); + dcr = ia64_get_dcr(); + dcr |= IA64_DCR_PP; + ia64_set_dcr(dcr); + local_irq_restore(flags); + + /* + * This is a no can do. It obviously wouldn't + * work on SMP where another process may not + * be blocked at all. + * + * Perhaps we need a global predicate in the + * leave_kernel path to control if pp should + * be on or off? + */ + lock_kernel(); + for_each_task(p) { + regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1; + ia64_psr(regs)->pp = 1; + } + unlock_kernel(); + ia64_set_pmc(0, 0); + } + break; + + case READ_PMDS: + if (cmd2 >= MAX_PERF_COUNTER) + return -EFAULT; + if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perf_counter)*cmd2)) + return -EFAULT; + local_irq_save(flags); + /* XXX this looks wrong */ + __asm__ __volatile__("rsm psr.pp\n"); + dcr = ia64_get_dcr(); + dcr &= ~IA64_DCR_PP; + ia64_set_dcr(dcr); + local_irq_restore(flags); + + /* + * We cannot touch pmc[0] to stop counting here, as + * that particular instruction might cause an overflow + * and the mask in pmc[0] might get lost. I'm not very + * sure of the hardware behavior here. So we stop + * counting by psr.pp = 0. And we reset dcr.pp to + * prevent an interrupt from mucking up psr.pp in the + * meanwhile. Perfmon interrupts are pended, hence the + * above code should be ok if one of the above + * instructions cause overflows. Is this ok? When I + * muck with dcr, is the cli/sti needed?? + */ + for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; i++, cnum++, cptr++) { + pmd = pmds[i] + (ia64_get_pmd(cnum) & PERF_OVFL_VAL); + put_user(pmd, &cptr->data); + } + local_irq_save(flags); + /* XXX this looks wrong */ + __asm__ __volatile__("ssm psr.pp"); + dcr = ia64_get_dcr(); + dcr |= IA64_DCR_PP; + ia64_set_dcr(dcr); + local_irq_restore(flags); + break; + + case STOP_PMCS: + ia64_set_pmc(0, 1); + for (i = 0; i < MAX_PERF_COUNTER; ++i) + ia64_set_pmc(i, 0); + + local_irq_save(flags); + dcr = ia64_get_dcr(); + dcr &= ~IA64_DCR_PP; + ia64_set_dcr(dcr); + local_irq_restore(flags); + /* + * This is a no can do. It obviously wouldn't + * work on SMP where another process may not + * be blocked at all. + * + * Perhaps we need a global predicate in the + * leave_kernel path to control if pp should + * be on or off? + */ + lock_kernel(); + for_each_task(p) { + regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1; + ia64_psr(regs)->pp = 0; + } + unlock_kernel(); + perf_owner = 0; + break; + + default: + break; + } + return 0; +} + +static inline void +update_counters (void) +{ + unsigned long mask, i, cnum, val; + + mask = ia64_get_pmd(0) >> 4; + for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; cnum++, i++, mask >>= 1) { + if (mask & 0x1) + val = PERF_OVFL_VAL; + else + /* since we got an interrupt, might as well clear every pmd. */ + val = ia64_get_pmd(cnum) & PERF_OVFL_VAL; + pmds[i] += val; + ia64_set_pmd(cnum, 0); + } +} + +static void +perfmon_interrupt (int irq, void *arg, struct pt_regs *regs) +{ + update_counters(); + ia64_set_pmc(0, 0); + ia64_srlz_d(); +} + +void +perfmon_init (void) +{ + if (request_irq(PERFMON_IRQ, perfmon_interrupt, 0, "perfmon", NULL)) { + printk("perfmon_init: could not allocate performance monitor vector %u\n", + PERFMON_IRQ); + return; + } + ia64_set_pmv(PERFMON_IRQ); + ia64_srlz_d(); +} + +#else /* !CONFIG_PERFMON */ + +asmlinkage unsigned long +sys_perfmonctl (int cmd1, int cmd2, void *ptr) +{ + return -ENOSYS; +} + +#endif /* !CONFIG_PERFMON */ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c new file mode 100644 index 000000000..5b6deb5f5 --- /dev/null +++ b/arch/ia64/kernel/process.c @@ -0,0 +1,421 @@ +/* + * Architecture-specific setup. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */ +#include <linux/config.h> + +#include <linux/pm.h> +#include <linux/elf.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> + +#include <asm/delay.h> +#include <asm/efi.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/sal.h> +#include <asm/uaccess.h> +#include <asm/user.h> + + +void +show_regs (struct pt_regs *regs) +{ + unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; + + printk("\npsr : %016lx ifs : %016lx ip : [<%016lx>]\n", + regs->cr_ipsr, regs->cr_ifs, ip); + printk("unat: %016lx pfs : %016lx rsc : %016lx\n", + regs->ar_unat, regs->ar_pfs, regs->ar_rsc); + printk("rnat: %016lx bsps: %016lx pr : %016lx\n", + regs->ar_rnat, regs->ar_bspstore, regs->pr); + printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", + regs->loadrs, regs->ar_ccv, regs->ar_fpsr); + printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7); + printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", + regs->f6.u.bits[1], regs->f6.u.bits[0], + regs->f7.u.bits[1], regs->f7.u.bits[0]); + printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", + regs->f8.u.bits[1], regs->f8.u.bits[0], + regs->f9.u.bits[1], regs->f9.u.bits[0]); + + printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3); + printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10); + printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13); + printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16); + printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19); + printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22); + printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25); + printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28); + printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31); + + /* print the stacked registers if cr.ifs is valid: */ + if (regs->cr_ifs & 0x8000000000000000) { + unsigned long val, sof, *bsp, ndirty; + int i, is_nat = 0; + + sof = regs->cr_ifs & 0x7f; /* size of frame */ + ndirty = (regs->loadrs >> 19); + bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty); + for (i = 0; i < sof; ++i) { + get_user(val, ia64_rse_skip_regs(bsp, i)); + printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val, + ((i == sof - 1) || (i % 3) == 2) ? "\n" : " "); + } + } +} + +void __attribute__((noreturn)) +cpu_idle (void *unused) +{ + /* endless idle loop with no priority at all */ + init_idle(); + current->priority = 0; + current->counter = -100; + +#ifdef CONFIG_SMP + if (!current->need_resched) + min_xtp(); +#endif + + while (1) { + while (!current->need_resched) { + continue; + } +#ifdef CONFIG_SMP + normal_xtp(); +#endif + schedule(); + check_pgt_cache(); + if (pm_idle) + (*pm_idle)(); + } +} + +/* + * Copy the state of an ia-64 thread. + * + * We get here through the following call chain: + * + * <clone syscall> + * sys_clone + * do_fork + * copy_thread + * + * This means that the stack layout is as follows: + * + * +---------------------+ (highest addr) + * | struct pt_regs | + * +---------------------+ + * | struct switch_stack | + * +---------------------+ + * | | + * | memory stack | + * | | <-- sp (lowest addr) + * +---------------------+ + * + * Note: if we get called through kernel_thread() then the memory + * above "(highest addr)" is valid kernel stack memory that needs to + * be copied as well. + * + * Observe that we copy the unat values that are in pt_regs and + * switch_stack. Since the interpretation of unat is dependent upon + * the address to which the registers got spilled, doing this is valid + * only as long as we preserve the alignment of the stack. Since the + * stack is always page aligned, we know this is the case. + * + * XXX Actually, the above isn't true when we create kernel_threads(). + * If we ever needs to create kernel_threads() that preserve the unat + * values we'll need to fix this. Perhaps an easy workaround would be + * to always clear the unat bits in the child thread. + */ +int +copy_thread (int nr, unsigned long clone_flags, unsigned long usp, + struct task_struct *p, struct pt_regs *regs) +{ + unsigned long rbs, child_rbs, rbs_size, stack_offset, stack_top, stack_used; + struct switch_stack *child_stack, *stack; + extern char ia64_ret_from_syscall_clear_r8; + extern char ia64_strace_clear_r8; + struct pt_regs *child_ptregs; + +#ifdef CONFIG_SMP + /* + * For SMP idle threads, fork_by_hand() calls do_fork with + * NULL regs. + */ + if (!regs) + return 0; +#endif + + stack_top = (unsigned long) current + IA64_STK_OFFSET; + stack = ((struct switch_stack *) regs) - 1; + stack_used = stack_top - (unsigned long) stack; + stack_offset = IA64_STK_OFFSET - stack_used; + + child_stack = (struct switch_stack *) ((unsigned long) p + stack_offset); + child_ptregs = (struct pt_regs *) (child_stack + 1); + + /* copy parent's switch_stack & pt_regs to child: */ + memcpy(child_stack, stack, stack_used); + + rbs = (unsigned long) current + IA64_RBS_OFFSET; + child_rbs = (unsigned long) p + IA64_RBS_OFFSET; + rbs_size = stack->ar_bspstore - rbs; + + /* copy the parent's register backing store to the child: */ + memcpy((void *) child_rbs, (void *) rbs, rbs_size); + + child_ptregs->r8 = 0; /* child gets a zero return value */ + if (user_mode(child_ptregs)) + child_ptregs->r12 = usp; /* user stack pointer */ + else { + /* + * Note: we simply preserve the relative position of + * the stack pointer here. There is no need to + * allocate a scratch area here, since that will have + * been taken care of by the caller of sys_clone() + * already. + */ + child_ptregs->r12 = (unsigned long) (child_ptregs + 1); /* kernel sp */ + child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */ + } + if (p->flags & PF_TRACESYS) + child_stack->b0 = (unsigned long) &ia64_strace_clear_r8; + else + child_stack->b0 = (unsigned long) &ia64_ret_from_syscall_clear_r8; + child_stack->ar_bspstore = child_rbs + rbs_size; + + /* copy the thread_struct: */ + p->thread.ksp = (unsigned long) child_stack - 16; + /* + * NOTE: The calling convention considers all floating point + * registers in the high partition (fph) to be scratch. Since + * the only way to get to this point is through a system call, + * we know that the values in fph are all dead. Hence, there + * is no need to inherit the fph state from the parent to the + * child and all we have to do is to make sure that + * IA64_THREAD_FPH_VALID is cleared in the child. + * + * XXX We could push this optimization a bit further by + * clearing IA64_THREAD_FPH_VALID on ANY system call. + * However, it's not clear this is worth doing. Also, it + * would be a slight deviation from the normal Linux system + * call behavior where scratch registers are preserved across + * system calls (unless used by the system call itself). + * + * If we wanted to inherit the fph state from the parent to the + * child, we would have to do something along the lines of: + * + * if (ia64_get_fpu_owner() == current && ia64_psr(regs)->mfh) { + * p->thread.flags |= IA64_THREAD_FPH_VALID; + * ia64_save_fpu(&p->thread.fph); + * } else if (current->thread.flags & IA64_THREAD_FPH_VALID) { + * memcpy(p->thread.fph, current->thread.fph, sizeof(p->thread.fph)); + * } + */ + p->thread.flags = (current->thread.flags & ~IA64_THREAD_FPH_VALID); + return 0; +} + +void +ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) +{ + struct switch_stack *sw = ((struct switch_stack *) pt) - 1; + unsigned long ar_ec, cfm, ar_bsp, ndirty, *krbs; + + ar_ec = (sw->ar_pfs >> 52) & 0x3f; + + cfm = pt->cr_ifs & ((1UL << 63) - 1); + if ((pt->cr_ifs & (1UL << 63)) == 0) { + /* if cr_ifs isn't valid, we got here through a syscall or a break */ + cfm = sw->ar_pfs & ((1UL << 38) - 1); + } + + krbs = (unsigned long *) current + IA64_RBS_OFFSET/8; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 16)); + ar_bsp = (long) ia64_rse_skip_regs((long *) pt->ar_bspstore, ndirty); + + /* r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm user-mask + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec + */ + memset(dst, 0, sizeof (dst)); /* don't leak any "random" bits */ + + /* r0 is zero */ dst[ 1] = pt->r1; dst[ 2] = pt->r2; dst[ 3] = pt->r3; + dst[ 4] = sw->r4; dst[ 5] = sw->r5; dst[ 6] = sw->r6; dst[ 7] = sw->r7; + dst[ 8] = pt->r8; dst[ 9] = pt->r9; dst[10] = pt->r10; dst[11] = pt->r11; + dst[12] = pt->r12; dst[13] = pt->r13; dst[14] = pt->r14; dst[15] = pt->r15; + memcpy(dst + 16, &pt->r16, 16*8); /* r16-r31 are contiguous */ + + dst[32] = ia64_get_nat_bits(pt, sw); + dst[33] = pt->pr; + + /* branch regs: */ + dst[34] = pt->b0; dst[35] = sw->b1; dst[36] = sw->b2; dst[37] = sw->b3; + dst[38] = sw->b4; dst[39] = sw->b5; dst[40] = pt->b6; dst[41] = pt->b7; + + dst[42] = pt->cr_iip; dst[43] = pt->cr_ifs; + dst[44] = pt->cr_ipsr; /* XXX perhaps we should filter out some bits here? --davidm */ + + dst[45] = pt->ar_rsc; dst[46] = ar_bsp; dst[47] = pt->ar_bspstore; dst[48] = pt->ar_rnat; + dst[49] = pt->ar_ccv; dst[50] = pt->ar_unat; dst[51] = sw->ar_fpsr; dst[52] = pt->ar_pfs; + dst[53] = sw->ar_lc; dst[54] = (sw->ar_pfs >> 52) & 0x3f; +} + +int +dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) +{ + struct switch_stack *sw = ((struct switch_stack *) pt) - 1; + struct task_struct *fpu_owner = ia64_get_fpu_owner(); + + memset(dst, 0, sizeof (dst)); /* don't leak any "random" bits */ + + /* f0 is 0.0 */ /* f1 is 1.0 */ dst[2] = sw->f2; dst[3] = sw->f3; + dst[4] = sw->f4; dst[5] = sw->f5; dst[6] = pt->f6; dst[7] = pt->f7; + dst[8] = pt->f8; dst[9] = pt->f9; + memcpy(dst + 10, &sw->f10, 22*16); /* f10-f31 are contiguous */ + + if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) { + if (fpu_owner == current) { + __ia64_save_fpu(current->thread.fph); + } + memcpy(dst + 32, current->thread.fph, 96*16); + } + return 1; /* f0-f31 are always valid so we always return 1 */ +} + +asmlinkage long +sys_execve (char *filename, char **argv, char **envp, struct pt_regs *regs) +{ + int error; + + lock_kernel(); + filename = getname(filename); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + error = do_execve(filename, argv, envp, regs); + putname(filename); +out: + unlock_kernel(); + return error; +} + +pid_t +kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) +{ + struct task_struct *parent = current; + int result; + + clone(flags | CLONE_VM, 0); + if (parent != current) { + result = (*fn)(arg); + _exit(result); + } + return 0; /* parent: just return */ +} + +/* + * Flush thread state. This is called when a thread does an execve(). + */ +void +flush_thread (void) +{ + /* drop floating-point and debug-register state if it exists: */ + current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); + + if (ia64_get_fpu_owner() == current) { + ia64_set_fpu_owner(0); + } +} + +/* + * Clean up state associated with current thread. This is called when + * the thread calls exit(). + */ +void +exit_thread (void) +{ + if (ia64_get_fpu_owner() == current) { + ia64_set_fpu_owner(0); + } +} + +/* + * Free remaining state associated with DEAD_TASK. This is called + * after the parent of DEAD_TASK has collected the exist status of the + * task via wait(). + */ +void +release_thread (struct task_struct *dead_task) +{ + /* nothing to do */ +} + +unsigned long +get_wchan (struct task_struct *p) +{ + struct ia64_frame_info info; + unsigned long ip; + int count = 0; + /* + * These bracket the sleeping functions.. + */ + extern void scheduling_functions_start_here(void); + extern void scheduling_functions_end_here(void); +# define first_sched ((unsigned long) scheduling_functions_start_here) +# define last_sched ((unsigned long) scheduling_functions_end_here) + + /* + * Note: p may not be a blocked task (it could be current or + * another process running on some other CPU. Rather than + * trying to determine if p is really blocked, we just assume + * it's blocked and rely on the unwind routines to fail + * gracefully if the process wasn't really blocked after all. + * --davidm 99/12/15 + */ + ia64_unwind_init_from_blocked_task(&info, p); + do { + if (ia64_unwind_to_previous_frame(&info) < 0) + return 0; + ip = ia64_unwind_get_ip(&info); + if (ip < first_sched || ip >= last_sched) + return ip; + } while (count++ < 16); + return 0; +# undef first_sched +# undef last_sched +} + +void +machine_restart (char *restart_cmd) +{ + (*efi.reset_system)(EFI_RESET_WARM, 0, 0, 0); +} + +void +machine_halt (void) +{ + printk("machine_halt: need PAL or ACPI version here!!\n"); + machine_restart(0); +} + +void +machine_power_off (void) +{ + printk("machine_power_off: unimplemented (need ACPI version here)\n"); + machine_halt (); +} diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c new file mode 100644 index 000000000..18a8e342e --- /dev/null +++ b/arch/ia64/kernel/ptrace.c @@ -0,0 +1,653 @@ +/* + * Kernel support for the ptrace() and syscall tracing interfaces. + * + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Derived from the x86 and Alpha versions. Most of the code in here + * could actually be factored into a common set of routines. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/smp_lock.h> +#include <linux/user.h> + +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace_offsets.h> +#include <asm/rse.h> +#include <asm/system.h> +#include <asm/uaccess.h> + +/* + * Collect the NaT bits for r1-r31 from sw->caller_unat and + * sw->ar_unat and return a NaT bitset where bit i is set iff the NaT + * bit of register i is set. + */ +long +ia64_get_nat_bits (struct pt_regs *pt, struct switch_stack *sw) +{ +# define GET_BITS(str, first, last, unat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&str->r##first); \ + unsigned long mask = ((1UL << (last - first + 1)) - 1) << first; \ + (ia64_rotl(unat, first) >> bit) & mask; \ + }) + unsigned long val; + + val = GET_BITS(pt, 1, 3, sw->caller_unat); + val |= GET_BITS(pt, 12, 15, sw->caller_unat); + val |= GET_BITS(pt, 8, 11, sw->caller_unat); + val |= GET_BITS(pt, 16, 31, sw->caller_unat); + val |= GET_BITS(sw, 4, 7, sw->ar_unat); + return val; + +# undef GET_BITS +} + +/* + * Store the NaT bitset NAT in pt->caller_unat and sw->ar_unat. + */ +void +ia64_put_nat_bits (struct pt_regs *pt, struct switch_stack *sw, unsigned long nat) +{ +# define PUT_BITS(str, first, last, nat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&str->r##first); \ + unsigned long mask = ((1UL << (last - first + 1)) - 1) << bit; \ + (ia64_rotr(nat, first) << bit) & mask; \ + }) + sw->caller_unat = PUT_BITS(pt, 1, 3, nat); + sw->caller_unat |= PUT_BITS(pt, 12, 15, nat); + sw->caller_unat |= PUT_BITS(pt, 8, 11, nat); + sw->caller_unat |= PUT_BITS(pt, 16, 31, nat); + sw->ar_unat = PUT_BITS(sw, 4, 7, nat); + +# undef PUT_BITS +} + +#define IA64_MLI_TEMPLATE 0x2 +#define IA64_MOVL_OPCODE 6 + +void +ia64_increment_ip (struct pt_regs *regs) +{ + unsigned long w0, w1, ri = ia64_psr(regs)->ri + 1; + + if (ri > 2) { + ri = 0; + regs->cr_iip += 16; + } else if (ri == 2) { + get_user(w0, (char *) regs->cr_iip + 0); + get_user(w1, (char *) regs->cr_iip + 8); + if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) { + /* + * rfi'ing to slot 2 of an MLI bundle causes + * an illegal operation fault. We don't want + * that to happen... Note that we check the + * opcode only. "movl" has a vc bit of 0, but + * since a vc bit of 1 is currently reserved, + * we might just as well treat it like a movl. + */ + ri = 0; + regs->cr_iip += 16; + } + } + ia64_psr(regs)->ri = ri; +} + +void +ia64_decrement_ip (struct pt_regs *regs) +{ + unsigned long w0, w1, ri = ia64_psr(regs)->ri - 1; + + if (ia64_psr(regs)->ri == 0) { + regs->cr_iip -= 16; + ri = 2; + get_user(w0, (char *) regs->cr_iip + 0); + get_user(w1, (char *) regs->cr_iip + 8); + if (((w0 >> 1) & 0xf) == IA64_MLI_TEMPLATE && (w1 >> 60) == IA64_MOVL_OPCODE) { + /* + * rfi'ing to slot 2 of an MLI bundle causes + * an illegal operation fault. We don't want + * that to happen... Note that we check the + * opcode only. "movl" has a vc bit of 0, but + * since a vc bit of 1 is currently reserved, + * we might just as well treat it like a movl. + */ + ri = 1; + } + } + ia64_psr(regs)->ri = ri; +} + +/* + * This routine is used to read an rnat bits that are stored on the + * kernel backing store. Since, in general, the alignment of the user + * and kernel are different, this is not completely trivial. In + * essence, we need to construct the user RNAT based on up to two + * kernel RNAT values and/or the RNAT value saved in the child's + * pt_regs. + * + * user rbs + * + * +--------+ <-- lowest address + * | slot62 | + * +--------+ + * | rnat | 0x....1f8 + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_regs->ar_rnat + * +--------+ | + * | slot02 | / kernel rbs + * +--------+ +--------+ + * <- child_regs->ar_bspstore | slot61 | <-- krbs + * +- - - - + +--------+ + * | slot62 | + * +- - - - + +--------+ + * | rnat | + * +- - - - + +--------+ + * vrnat | slot00 | + * +- - - - + +--------+ + * = = + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_stack->ar_rnat + * +--------+ | + * | slot02 | / + * +--------+ + * <--- child_stack->ar_bspstore + * + * The way to think of this code is as follows: bit 0 in the user rnat + * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat + * value. The kernel rnat value holding this bit is stored in + * variable rnat0. rnat1 is loaded with the kernel rnat value that + * form the upper bits of the user rnat value. + * + * Boundary cases: + * + * o when reading the rnat "below" the first rnat slot on the kernel + * backing store, rnat0/rnat1 are set to 0 and the low order bits + * are merged in from pt->ar_rnat. + * + * o when reading the rnat "above" the last rnat slot on the kernel + * backing store, rnat0/rnat1 gets its value from sw->ar_rnat. + */ +static unsigned long +get_rnat (struct pt_regs *pt, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr) +{ + unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr, kmask = ~0UL; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs; + + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be merged in from pt->ar_rnat */ + kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1); + urnat = (pt->ar_rnat & ~kmask); + } + if (rnat0_kaddr >= kbsp) { + rnat0 = sw->ar_rnat; + } else if (rnat0_kaddr > krbs) { + rnat0 = *rnat0_kaddr; + } + if (rnat1_kaddr >= kbsp) { + rnat1 = sw->ar_rnat; + } else if (rnat1_kaddr > krbs) { + rnat1 = *rnat1_kaddr; + } + urnat |= ((rnat1 << (63 - shift)) | (rnat0 >> shift)) & kmask; + return urnat; +} + +/* + * The reverse of get_rnat. + */ +static void +put_rnat (struct pt_regs *pt, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat) +{ + unsigned long rnat0 = 0, rnat1 = 0, rnat = 0, *slot0_kaddr, kmask = ~0UL, mask; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs; + + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = (long) ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be place in pt->ar_rnat: */ + kmask = ~((1UL << ia64_rse_slot_num(ubspstore)) - 1); + pt->ar_rnat = (pt->ar_rnat & kmask) | (rnat & ~kmask); + } + /* + * Note: Section 11.1 of the EAS guarantees that bit 63 of an + * rnat slot is ignored. so we don't have to clear it here. + */ + rnat0 = (urnat << shift); + mask = ~0UL << shift; + if (rnat0_kaddr >= kbsp) { + sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat0 & mask); + } else if (rnat0_kaddr > krbs) { + *rnat0_kaddr = ((*rnat0_kaddr & ~mask) | (rnat0 & mask)); + } + + rnat1 = (urnat >> (63 - shift)); + mask = ~0UL >> (63 - shift); + if (rnat1_kaddr >= kbsp) { + sw->ar_rnat = (sw->ar_rnat & ~mask) | (rnat1 & mask); + } else if (rnat1_kaddr > krbs) { + *rnat1_kaddr = ((*rnat1_kaddr & ~mask) | (rnat1 & mask)); + } +} + +long +ia64_peek (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long *val) +{ + unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr; + struct switch_stack *child_stack; + struct pt_regs *child_regs; + size_t copied; + long ret; + + laddr = (unsigned long *) addr; + child_regs = ia64_task_regs(child); + child_stack = (struct switch_stack *) child_regs - 1; + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore); + rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs); + if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) { + /* + * Attempt to read the RBS in an area that's actually + * on the kernel RBS => read the corresponding bits in + * the kernel RBS. + */ + if (ia64_rse_is_rnat_slot(laddr)) + ret = get_rnat(child_regs, child_stack, krbs, laddr); + else { + regnum = ia64_rse_num_regs(bspstore, laddr); + laddr = ia64_rse_skip_regs(krbs, regnum); + if (regnum >= krbs_num_regs) { + ret = 0; + } else { + if ((unsigned long) laddr >= (unsigned long) high_memory) { + printk("yikes: trying to access long at %p\n", laddr); + return -EIO; + } + ret = *laddr; + } + } + } else { + copied = access_process_vm(child, addr, &ret, sizeof(ret), 0); + if (copied != sizeof(ret)) + return -EIO; + } + *val = ret; + return 0; +} + +long +ia64_poke (struct pt_regs *regs, struct task_struct *child, unsigned long addr, long val) +{ + unsigned long *bspstore, *krbs, krbs_num_regs, regnum, *rbs_end, *laddr; + struct switch_stack *child_stack; + struct pt_regs *child_regs; + + laddr = (unsigned long *) addr; + child_regs = ia64_task_regs(child); + child_stack = (struct switch_stack *) child_regs - 1; + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + krbs_num_regs = ia64_rse_num_regs(krbs, (unsigned long *) child_stack->ar_bspstore); + rbs_end = ia64_rse_skip_regs(bspstore, krbs_num_regs); + if (laddr >= bspstore && laddr <= ia64_rse_rnat_addr(rbs_end)) { + /* + * Attempt to write the RBS in an area that's actually + * on the kernel RBS => write the corresponding bits + * in the kernel RBS. + */ + if (ia64_rse_is_rnat_slot(laddr)) + put_rnat(child_regs, child_stack, krbs, laddr, val); + else { + regnum = ia64_rse_num_regs(bspstore, laddr); + laddr = ia64_rse_skip_regs(krbs, regnum); + if (regnum < krbs_num_regs) { + *laddr = val; + } + } + } else if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val)) { + return -EIO; + } + return 0; +} + +/* + * Ensure the state in child->thread.fph is up-to-date. + */ +static void +sync_fph (struct task_struct *child) +{ + if (ia64_psr(ia64_task_regs(child))->mfh && ia64_get_fpu_owner() == child) { + ia64_save_fpu(&child->thread.fph[0]); + child->thread.flags |= IA64_THREAD_FPH_VALID; + } + if (!(child->thread.flags & IA64_THREAD_FPH_VALID)) { + memset(&child->thread.fph, 0, sizeof(child->thread.fph)); + child->thread.flags |= IA64_THREAD_FPH_VALID; + } +} + +asmlinkage long +sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, + long arg4, long arg5, long arg6, long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + struct switch_stack *child_stack; + struct pt_regs *child_regs; + struct task_struct *child; + unsigned long flags, *base; + long ret, regnum; + + lock_kernel(); + ret = -EPERM; + if (request == PTRACE_TRACEME) { + /* are we already being traced? */ + if (current->flags & PF_PTRACED) + goto out; + current->flags |= PF_PTRACED; + ret = 0; + goto out; + } + + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + read_unlock(&tasklist_lock); + if (!child) + goto out; + ret = -EPERM; + if (pid == 1) /* no messing around with init! */ + goto out; + + if (request == PTRACE_ATTACH) { + if (child == current) + goto out; + if ((!child->dumpable || + (current->uid != child->euid) || + (current->uid != child->suid) || + (current->uid != child->uid) || + (current->gid != child->egid) || + (current->gid != child->sgid) || + (!cap_issubset(child->cap_permitted, current->cap_permitted)) || + (current->gid != child->gid)) && !capable(CAP_SYS_PTRACE)) + goto out; + /* the same process cannot be attached many times */ + if (child->flags & PF_PTRACED) + goto out; + child->flags |= PF_PTRACED; + if (child->p_pptr != current) { + unsigned long flags; + + write_lock_irqsave(&tasklist_lock, flags); + REMOVE_LINKS(child); + child->p_pptr = current; + SET_LINKS(child); + write_unlock_irqrestore(&tasklist_lock, flags); + } + send_sig(SIGSTOP, child, 1); + ret = 0; + goto out; + } + ret = -ESRCH; + if (!(child->flags & PF_PTRACED)) + goto out; + if (child->state != TASK_STOPPED) { + if (request != PTRACE_KILL) + goto out; + } + if (child->p_pptr != current) + goto out; + + switch (request) { + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: /* read word at location addr */ + ret = ia64_peek(regs, child, addr, &data); + if (ret == 0) { + ret = data; + regs->r8 = 0; /* ensure "ret" is not mistaken as an error code */ + } + goto out; + + case PTRACE_POKETEXT: + case PTRACE_POKEDATA: /* write the word at location addr */ + ret = ia64_poke(regs, child, addr, data); + goto out; + + case PTRACE_PEEKUSR: /* read the word at addr in the USER area */ + ret = -EIO; + if ((addr & 0x7) != 0) + goto out; + + if (addr < PT_CALLER_UNAT) { + /* accessing fph */ + sync_fph(child); + addr += (unsigned long) &child->thread.fph; + ret = *(unsigned long *) addr; + } else if (addr < PT_F9+16) { + /* accessing switch_stack or pt_regs: */ + child_regs = ia64_task_regs(child); + child_stack = (struct switch_stack *) child_regs - 1; + ret = *(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT); + + if (addr == PT_AR_BSP) { + /* ret currently contains pt_regs.loadrs */ + unsigned long *rbs, *bspstore, ndirty; + + rbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + bspstore = (unsigned long *) child_regs->ar_bspstore; + ndirty = ia64_rse_num_regs(rbs, rbs + (ret >> 19)); + ret = (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); + } + } else { + if (addr >= PT_IBR) { + regnum = (addr - PT_IBR) >> 3; + base = &child->thread.ibr[0]; + } else { + regnum = (addr - PT_DBR) >> 3; + base = &child->thread.dbr[0]; + } + if (regnum >= 8) + goto out; + data = base[regnum]; + } + regs->r8 = 0; /* ensure "ret" is not mistaken as an error code */ + goto out; + + case PTRACE_POKEUSR: /* write the word at addr in the USER area */ + ret = -EIO; + if ((addr & 0x7) != 0) + goto out; + + if (addr < PT_CALLER_UNAT) { + /* accessing fph */ + sync_fph(child); + addr += (unsigned long) &child->thread.fph; + *(unsigned long *) addr = data; + if (ret < 0) + goto out; + } else if (addr < PT_F9+16) { + /* accessing switch_stack or pt_regs */ + child_regs = ia64_task_regs(child); + child_stack = (struct switch_stack *) child_regs - 1; + + if (addr == PT_AR_BSP) { + /* compute the loadrs value based on bsp and bspstore: */ + unsigned long *rbs, *bspstore, ndirty, *kbsp; + + bspstore = (unsigned long *) child_regs->ar_bspstore; + ndirty = ia64_rse_num_regs(bspstore, (unsigned long *) data); + rbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + kbsp = ia64_rse_skip_regs(rbs, ndirty); + data = (kbsp - rbs) << 19; + } + *(unsigned long *) ((long) child_stack + addr - PT_CALLER_UNAT) = data; + } else { + if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { + child->thread.flags |= IA64_THREAD_DBG_VALID; + memset(current->thread.dbr, 0, sizeof current->thread.dbr); + memset(current->thread.ibr, 0, sizeof current->thread.ibr); + } + + if (addr >= PT_IBR) { + regnum = (addr - PT_IBR) >> 3; + base = &child->thread.ibr[0]; + } else { + regnum = (addr - PT_DBR) >> 3; + base = &child->thread.dbr[0]; + } + if (regnum >= 8) + goto out; + if (regnum & 1) { + /* force breakpoint to be effective a most for user-level: */ + data &= ~(0x7UL << 56); + } + base[regnum] = data; + } + ret = 0; + goto out; + + case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ + case PTRACE_CONT: /* restart after signal. */ + ret = -EIO; + if (data > _NSIG) + goto out; + if (request == PTRACE_SYSCALL) + child->flags |= PF_TRACESYS; + else + child->flags &= ~PF_TRACESYS; + child->exit_code = data; + + /* make sure the single step/take-branch tra bits are not set: */ + ia64_psr(ia64_task_regs(child))->ss = 0; + ia64_psr(ia64_task_regs(child))->tb = 0; + + wake_up_process(child); + ret = 0; + goto out; + + case PTRACE_KILL: + /* + * Make the child exit. Best I can do is send it a + * sigkill. Perhaps it should be put in the status + * that it wants to exit. + */ + if (child->state == TASK_ZOMBIE) /* already dead */ + goto out; + child->exit_code = SIGKILL; + + /* make sure the single step/take-branch tra bits are not set: */ + ia64_psr(ia64_task_regs(child))->ss = 0; + ia64_psr(ia64_task_regs(child))->tb = 0; + + wake_up_process(child); + ret = 0; + goto out; + + case PTRACE_SINGLESTEP: /* let child execute for one instruction */ + case PTRACE_SINGLEBLOCK: + ret = -EIO; + if (data > _NSIG) + goto out; + + child->flags &= ~PF_TRACESYS; + if (request == PTRACE_SINGLESTEP) { + ia64_psr(ia64_task_regs(child))->ss = 1; + } else { + ia64_psr(ia64_task_regs(child))->tb = 1; + } + child->exit_code = data; + + /* give it a chance to run. */ + wake_up_process(child); + ret = 0; + goto out; + + case PTRACE_DETACH: /* detach a process that was attached. */ + ret = -EIO; + if (data > _NSIG) + goto out; + + child->flags &= ~(PF_PTRACED|PF_TRACESYS); + child->exit_code = data; + write_lock_irqsave(&tasklist_lock, flags); + REMOVE_LINKS(child); + child->p_pptr = child->p_opptr; + SET_LINKS(child); + write_unlock_irqrestore(&tasklist_lock, flags); + + /* make sure the single step/take-branch tra bits are not set: */ + ia64_psr(ia64_task_regs(child))->ss = 0; + ia64_psr(ia64_task_regs(child))->tb = 0; + + wake_up_process(child); + ret = 0; + goto out; + + default: + ret = -EIO; + goto out; + } + out: + unlock_kernel(); + return ret; +} + +void +syscall_trace (void) +{ + if ((current->flags & (PF_PTRACED|PF_TRACESYS)) != (PF_PTRACED|PF_TRACESYS)) + return; + current->exit_code = SIGTRAP; + set_current_state(TASK_STOPPED); + notify_parent(current, SIGCHLD); + schedule(); + /* + * This isn't the same as continuing with a signal, but it + * will do for normal use. strace only continues with a + * signal if the stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c new file mode 100644 index 000000000..8743f6588 --- /dev/null +++ b/arch/ia64/kernel/sal.c @@ -0,0 +1,157 @@ +/* + * System Abstraction Layer (SAL) interface routines. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + */ +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/string.h> + +#include <asm/page.h> +#include <asm/sal.h> +#include <asm/pal.h> + +#define SAL_DEBUG + +spinlock_t sal_lock = SPIN_LOCK_UNLOCKED; + +static struct { + void *addr; /* function entry point */ + void *gpval; /* gp value to use */ +} pdesc; + +static long +default_handler (void) +{ + return -1; +} + +ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler; + +const char * +ia64_sal_strerror (long status) +{ + const char *str; + switch (status) { + case 0: str = "Call completed without error"; break; + case 1: str = "Effect a warm boot of the system to complete " + "the update"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + case -4: str = "Virtual address not registered"; break; + case -5: str = "No information available"; break; + case -6: str = "Insufficient space to add the entry"; break; + case -7: str = "Invalid entry_addr value"; break; + case -8: str = "Invalid interrupt vector"; break; + case -9: str = "Requested memory not available"; break; + case -10: str = "Unable to write to the NVM device"; break; + case -11: str = "Invalid partition type specified"; break; + case -12: str = "Invalid NVM_Object id specified"; break; + case -13: str = "NVM_Object already has the maximum number " + "of partitions"; break; + case -14: str = "Insufficient space in partition for the " + "requested write sub-function"; break; + case -15: str = "Insufficient data buffer space for the " + "requested read record sub-function"; break; + case -16: str = "Scratch buffer required for the write/delete " + "sub-function"; break; + case -17: str = "Insufficient space in the NVM_Object for the " + "requested create sub-function"; break; + case -18: str = "Invalid value specified in the partition_rec " + "argument"; break; + case -19: str = "Record oriented I/O not supported for this " + "partition"; break; + case -20: str = "Bad format of record to be written or " + "required keyword variable not " + "specified"; break; + default: str = "Unknown SAL status code"; break; + } + return str; +} + +static void __init +ia64_sal_handler_init (void *entry_point, void *gpval) +{ + /* fill in the SAL procedure descriptor and point ia64_sal to it: */ + pdesc.addr = entry_point; + pdesc.gpval = gpval; + ia64_sal = (ia64_sal_handler) &pdesc; +} + + +void __init +ia64_sal_init (struct ia64_sal_systab *systab) +{ + unsigned long min, max; + char *p; + struct ia64_sal_desc_entry_point *ep; + int i; + + if (!systab) { + printk("Hmm, no SAL System Table.\n"); + return; + } + + if (strncmp(systab->signature, "SST_", 4) != 0) + printk("bad signature in system table!"); + + printk("SAL v%u.%02u: ia32bios=%s, oem=%.32s, product=%.32s\n", + systab->sal_rev_major, systab->sal_rev_minor, + systab->ia32_bios_present ? "present" : "absent", + systab->oem_id, systab->product_id); + + min = ~0UL; + max = 0; + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type desciptor. + */ + switch (*p) { + case SAL_DESC_ENTRY_POINT: + ep = (struct ia64_sal_desc_entry_point *) p; +#ifdef SAL_DEBUG + printk("sal[%d] - entry: pal_proc=0x%lx, sal_proc=0x%lx\n", + i, ep->pal_proc, ep->sal_proc); +#endif + ia64_pal_handler_init(__va(ep->pal_proc)); + ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp)); + break; + + case SAL_DESC_AP_WAKEUP: +#ifdef CONFIG_SMP + { + struct ia64_sal_desc_ap_wakeup *ap = (void *) p; +# ifdef SAL_DEBUG + printk("sal[%d] - wakeup type %x, 0x%lx\n", + i, ap->mechanism, ap->vector); +# endif + switch (ap->mechanism) { + case IA64_SAL_AP_EXTERNAL_INT: + ap_wakeup_vector = ap->vector; +# ifdef SAL_DEBUG + printk("SAL: AP wakeup using external interrupt; " + "vector 0x%lx\n", ap_wakeup_vector); +# endif + break; + + default: + printk("SAL: AP wakeup mechanism unsupported!\n"); + break; + } + break; + } +#endif + } + p += SAL_DESC_SIZE(*p); + } +} diff --git a/arch/ia64/kernel/sal_stub.S b/arch/ia64/kernel/sal_stub.S new file mode 100644 index 000000000..7ab16bbcd --- /dev/null +++ b/arch/ia64/kernel/sal_stub.S @@ -0,0 +1,116 @@ +/* + * gcc currently does not conform to the ia-64 calling convention as far + * as returning function values are concerned. Instead of returning + * values up to 32 bytes in size in r8-r11, gcc returns any value + * bigger than a doubleword via a structure that's allocated by the + * caller and whose address is passed into the function. Since + * SAL_PROC returns values according to the calling convention, this + * stub takes care of copying r8-r11 to the place where gcc expects + * them. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#ifndef __GCC_MULTIREG_RETVALS__ + .text + .psr abi64 + .psr lsb + .lsb + + .align 16 + .global ia64_sal_stub +ia64_sal_stub: + /* + * Sheesh, the Cygnus backend passes the pointer to a return value structure in + * in0 whereas the HP backend passes it in r8. Don't you hate those little + * differences... + */ +#ifdef GCC_RETVAL_POINTER_IN_R8 + adds r2=-24,sp + adds sp=-48,sp + mov r14=rp + ;; + st8 [r2]=r8,8 // save pointer to return value + addl r3=@ltoff(ia64_sal),gp + ;; + ld8 r3=[r3] + st8 [r2]=gp,8 // save global pointer + ;; + ld8 r3=[r3] // fetch the value of ia64_sal + st8 [r2]=r14 // save return pointer + ;; + ld8 r2=[r3],8 // load function's entry point + ;; + ld8 gp=[r3] // load function's global pointer + ;; + mov b6=r2 + br.call.sptk.few rp=b6 +.ret0: adds r2=24,sp + ;; + ld8 r3=[r2],8 // restore pointer to return value + ;; + ld8 gp=[r2],8 // restore global pointer + st8 [r3]=r8,8 + ;; + ld8 r14=[r2] // restore return pointer + st8 [r3]=r9,8 + ;; + mov rp=r14 + st8 [r3]=r10,8 + ;; + st8 [r3]=r11,8 + adds sp=48,sp + br.sptk.few rp +#else + /* + * On input: + * in0 = pointer to return value structure + * in1 = index of SAL function to call + * in2..inN = remaining args to SAL call + */ + /* + * We allocate one input and eight output register such that the br.call instruction + * will rename in1-in7 to in0-in6---exactly what we want because SAL doesn't want to + * see the pointer to the return value structure. + */ + alloc r15=ar.pfs,1,0,8,0 + + adds r2=-24,sp + adds sp=-48,sp + mov r14=rp + ;; + st8 [r2]=r15,8 // save ar.pfs + addl r3=@ltoff(ia64_sal),gp + ;; + ld8 r3=[r3] // get address of ia64_sal + st8 [r2]=gp,8 // save global pointer + ;; + ld8 r3=[r3] // get value of ia64_sal + st8 [r2]=r14,8 // save return address (rp) + ;; + ld8 r2=[r3],8 // load function's entry point + ;; + ld8 gp=[r3] // load function's global pointer + mov b6=r2 + br.call.sptk.few rp=b6 // make SAL call +.ret0: adds r2=24,sp + ;; + ld8 r15=[r2],8 // restore ar.pfs + ;; + ld8 gp=[r2],8 // restore global pointer + st8 [in0]=r8,8 // store 1. dword of return value + ;; + ld8 r14=[r2] // restore return address (rp) + st8 [in0]=r9,8 // store 2. dword of return value + ;; + mov rp=r14 + st8 [in0]=r10,8 // store 3. dword of return value + ;; + st8 [in0]=r11,8 + adds sp=48,sp // pop stack frame + mov ar.pfs=r15 + br.ret.sptk.few rp +#endif + + .endp ia64_sal_stub +#endif /* __GCC_MULTIREG_RETVALS__ */ diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c new file mode 100644 index 000000000..84581af2e --- /dev/null +++ b/arch/ia64/kernel/semaphore.c @@ -0,0 +1,336 @@ +/* + * IA-64 semaphore implementation (derived from x86 version). + * + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +/* + * Semaphores are implemented using a two-way counter: The "count" + * variable is decremented for each process that tries to aquire the + * semaphore, while the "sleepers" variable is a count of such + * aquires. + * + * Notably, the inline "up()" and "down()" functions can efficiently + * test if they need to do any extra work (up needs to do something + * only if count was negative before the increment operation. + * + * "sleepers" and the contention routine ordering is protected by the + * semaphore spinlock. + * + * Note that these functions are only called when there is contention + * on the lock, and as such all this is the "non-critical" part of the + * whole semaphore business. The critical part is the inline stuff in + * <asm/semaphore.h> where we want to avoid any extra jumps and calls. + */ +#include <linux/sched.h> + +#include <asm/semaphore.h> + +/* + * Logic: + * - Only on a boundary condition do we need to care. When we go + * from a negative count to a non-negative, we wake people up. + * - When we go from a non-negative count to a negative do we + * (a) synchronize with the "sleepers" count and (b) make sure + * that we're on the wakeup list before we synchronize so that + * we cannot lose wakeup events. + */ + +void +__up (struct semaphore *sem) +{ + wake_up(&sem->wait); +} + +static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED; + +void +__down (struct semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; + wake_up(&sem->wait); +} + +int +__down_interruptible (struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE; + add_wait_queue_exclusive(&sem->wait, &wait); + + spin_lock_irq(&semaphore_lock); + sem->sleepers ++; + for (;;) { + int sleepers = sem->sleepers; + + /* + * With signals pending, this turns into + * the trylock failure case - we won't be + * sleeping, and we* can't get the lock as + * it has contention. Just correct the count + * and exit. + */ + if (signal_pending(current)) { + retval = -EINTR; + sem->sleepers = 0; + atomic_add(sleepers, &sem->count); + break; + } + + /* + * Add "everybody else" into it. They aren't + * playing, because we own the spinlock. The + * "-1" is because we're still hoping to get + * the lock. + */ + if (!atomic_add_negative(sleepers - 1, &sem->count)) { + sem->sleepers = 0; + break; + } + sem->sleepers = 1; /* us - see -1 above */ + spin_unlock_irq(&semaphore_lock); + + schedule(); + tsk->state = TASK_INTERRUPTIBLE|TASK_EXCLUSIVE; + spin_lock_irq(&semaphore_lock); + } + spin_unlock_irq(&semaphore_lock); + tsk->state = TASK_RUNNING; + remove_wait_queue(&sem->wait, &wait); + wake_up(&sem->wait); + return retval; +} + +/* + * Trylock failed - make sure we correct for having decremented the + * count. + */ +int +__down_trylock (struct semaphore *sem) +{ + int sleepers; + + spin_lock_irq(&semaphore_lock); + sleepers = sem->sleepers + 1; + sem->sleepers = 0; + + /* + * Add "everybody else" and us into it. They aren't + * playing, because we own the spinlock. + */ + if (!atomic_add_negative(sleepers, &sem->count)) + wake_up(&sem->wait); + + spin_unlock_irq(&semaphore_lock); + return 1; +} + +/* + * Helper routines for rw semaphores. These could be optimized some + * more, but since they're off the critical path, I prefer clarity for + * now... + */ + +/* + * This gets called if we failed to acquire the lock, but we're biased + * to acquire the lock by virtue of causing the count to change from 0 + * to -1. Being biased, we sleep and attempt to grab the lock until + * we succeed. When this function returns, we own the lock. + */ +static inline void +down_read_failed_biased (struct rw_semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + add_wait_queue(&sem->wait, &wait); /* put ourselves at the head of the list */ + + for (;;) { + if (sem->read_bias_granted && xchg(&sem->read_bias_granted, 0)) + break; + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!sem->read_bias_granted) + schedule(); + } + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; +} + +/* + * This gets called if we failed to aquire the lock and we are not + * biased to acquire the lock. We undo the decrement that was + * done earlier, go to sleep, and then attempt to re-acquire the + * lock afterwards. + */ +static inline void +down_read_failed (struct rw_semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + /* + * Undo the decrement we did in down_read() and check if we + * need to wake up someone. + */ + __up_read(sem); + + add_wait_queue(&sem->wait, &wait); + while (sem->count < 0) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (sem->count >= 0) + break; + schedule(); + } + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; +} + +/* + * Wait for the lock to become unbiased. Readers are non-exclusive. + */ +void +__down_read_failed (struct rw_semaphore *sem, long count) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + while (1) { + if (count == -1) { + down_read_failed_biased(sem); + return; + } + /* unbiased */ + down_read_failed(sem); + + count = ia64_fetch_and_add(-1, &sem->count); + if (count >= 0) + return; + } +} + +static inline void +down_write_failed_biased (struct rw_semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + /* put ourselves at the end of the list */ + add_wait_queue_exclusive(&sem->write_bias_wait, &wait); + + for (;;) { + if (sem->write_bias_granted && xchg(&sem->write_bias_granted, 0)) + break; + set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE); + if (!sem->write_bias_granted) + schedule(); + } + + remove_wait_queue(&sem->write_bias_wait, &wait); + tsk->state = TASK_RUNNING; + + /* + * If the lock is currently unbiased, awaken the sleepers + * FIXME: this wakes up the readers early in a bit of a + * stampede -> bad! + */ + if (sem->count >= 0) + wake_up(&sem->wait); +} + + +static inline void +down_write_failed (struct rw_semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + __up_write(sem); /* this takes care of granting the lock */ + + add_wait_queue_exclusive(&sem->wait, &wait); + + while (sem->count < 0) { + set_task_state(tsk, TASK_UNINTERRUPTIBLE | TASK_EXCLUSIVE); + if (sem->count >= 0) + break; /* we must attempt to aquire or bias the lock */ + schedule(); + } + + remove_wait_queue(&sem->wait, &wait); + tsk->state = TASK_RUNNING; +} + + +/* + * Wait for the lock to become unbiased. Since we're a writer, we'll + * make ourselves exclusive. + */ +void +__down_write_failed (struct rw_semaphore *sem, long count) +{ + long old_count; + + while (1) { + if (count == -RW_LOCK_BIAS) { + down_write_failed_biased(sem); + return; + } + down_write_failed(sem); + + do { + old_count = sem->count; + count = old_count - RW_LOCK_BIAS; + } while (cmpxchg(&sem->count, old_count, count) != old_count); + + if (count == 0) + return; + } +} + +void +__rwsem_wake (struct rw_semaphore *sem, long count) +{ + wait_queue_head_t *wq; + + if (count == 0) { + /* wake a writer */ + if (xchg(&sem->write_bias_granted, 1)) + BUG(); + wq = &sem->write_bias_wait; + } else { + /* wake reader(s) */ + if (xchg(&sem->read_bias_granted, 1)) + BUG(); + wq = &sem->wait; + } + wake_up(wq); /* wake up everyone on the wait queue */ +} diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c new file mode 100644 index 000000000..f3283d535 --- /dev/null +++ b/arch/ia64/kernel/setup.c @@ -0,0 +1,326 @@ +/* + * Architecture-specific setup. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1998, 1999 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * + * 02/04/00 D.Mosberger some more get_cpuinfo fixes... + * 02/01/00 R.Seth fixed get_cpuinfo for SMP + * 01/07/99 S.Eranian added the support for command line argument + * 06/24/99 W.Drummond added boot_cpu_data. + */ +#include <linux/config.h> +#include <linux/init.h> + +#include <linux/bootmem.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/threads.h> +#include <linux/console.h> + +#include <asm/acpi-ext.h> +#include <asm/page.h> +#include <asm/machvec.h> +#include <asm/processor.h> +#include <asm/sal.h> +#include <asm/system.h> +#include <asm/efi.h> + +extern char _end; + +/* cpu_data[bootstrap_processor] is data for the bootstrap processor: */ +struct cpuinfo_ia64 cpu_data[NR_CPUS]; + +unsigned long ia64_cycles_per_usec; +struct ia64_boot_param ia64_boot_param; +struct screen_info screen_info; +unsigned long cpu_initialized = 0; +/* This tells _start which CPU is booting. */ +int cpu_now_booting = 0; + +#define COMMAND_LINE_SIZE 512 + +char saved_command_line[COMMAND_LINE_SIZE]; /* used in proc filesystem */ + +static int +find_max_pfn (unsigned long start, unsigned long end, void *arg) +{ + unsigned long *max_pfn = arg, pfn; + + pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT; + if (pfn > *max_pfn) + *max_pfn = pfn; + return 0; +} + +static int +free_available_memory (unsigned long start, unsigned long end, void *arg) +{ +# define KERNEL_END ((unsigned long) &_end) +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +# define MAX(a,b) ((a) > (b) ? (a) : (b)) + unsigned long range_start, range_end; + + range_start = MIN(start, KERNEL_START); + range_end = MIN(end, KERNEL_START); + + /* + * XXX This should not be necessary, but the bootmem allocator + * is broken and fails to work correctly when the starting + * address is not properly aligned. + */ + range_start = PAGE_ALIGN(range_start); + + if (range_start < range_end) + free_bootmem(__pa(range_start), range_end - range_start); + + range_start = MAX(start, KERNEL_END); + range_end = MAX(end, KERNEL_END); + + /* + * XXX This should not be necessary, but the bootmem allocator + * is broken and fails to work correctly when the starting + * address is not properly aligned. + */ + range_start = PAGE_ALIGN(range_start); + + if (range_start < range_end) + free_bootmem(__pa(range_start), range_end - range_start); + + return 0; +} + +void __init +setup_arch (char **cmdline_p) +{ + unsigned long max_pfn, bootmap_start, bootmap_size; + + /* + * The secondary bootstrap loader passes us the boot + * parameters at the beginning of the ZERO_PAGE, so let's + * stash away those values before ZERO_PAGE gets cleared out. + */ + memcpy(&ia64_boot_param, (void *) ZERO_PAGE_ADDR, sizeof(ia64_boot_param)); + + efi_init(); + + max_pfn = 0; + efi_memmap_walk(find_max_pfn, &max_pfn); + + /* + * This is wrong, wrong, wrong. Darn it, you'd think if they + * change APIs, they'd do things for the better. Grumble... + */ + bootmap_start = PAGE_ALIGN(__pa(&_end)); + bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn); + + efi_memmap_walk(free_available_memory, 0); + + reserve_bootmem(bootmap_start, bootmap_size); +#if 0 + /* XXX fix me */ + init_mm.start_code = (unsigned long) &_stext; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + code_resource.start = virt_to_bus(&_text); + code_resource.end = virt_to_bus(&_etext) - 1; + data_resource.start = virt_to_bus(&_etext); + data_resource.end = virt_to_bus(&_edata) - 1; +#endif + + /* process SAL system table: */ + ia64_sal_init(efi.sal_systab); + + *cmdline_p = __va(ia64_boot_param.command_line); + strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line)); + saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */ + + printk("args to kernel: %s\n", *cmdline_p); + +#ifndef CONFIG_SMP + cpu_init(); + identify_cpu(&cpu_data[0]); +#endif + + if (efi.acpi) { + /* Parse the ACPI tables */ + acpi_parse(efi.acpi); + } + +#ifdef CONFIG_IA64_GENERIC + machvec_init(acpi_get_sysname()); +#endif + +#ifdef CONFIG_VT +# if defined(CONFIG_VGA_CONSOLE) + conswitchp = &vga_con; +# elif defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +# endif +#endif + platform_setup(cmdline_p); +} + +/* + * Display cpu info for all cpu's. + */ +int +get_cpuinfo (char *buffer) +{ + char family[32], model[32], features[128], *cp, *p = buffer; + struct cpuinfo_ia64 *c; + unsigned long mask; + + for (c = cpu_data; c < cpu_data + NR_CPUS; ++c) { + if (!(cpu_initialized & (1UL << (c - cpu_data)))) + continue; + + mask = c->features; + + if (c->family == 7) + memcpy(family, "IA-64", 6); + else + sprintf(family, "%u", c->family); + + switch (c->model) { + case 0: strcpy(model, "Itanium"); break; + default: sprintf(model, "%u", c->model); break; + } + + /* build the feature string: */ + memcpy(features, " standard", 10); + cp = features; + if (mask & 1) { + strcpy(cp, " branchlong"); + cp = strchr(cp, '\0'); + mask &= ~1UL; + } + if (mask) + sprintf(cp, " 0x%lx", mask); + + p += sprintf(buffer, + "CPU# %lu\n" + "\tvendor : %s\n" + "\tfamily : %s\n" + "\tmodel : %s\n" + "\trevision : %u\n" + "\tarchrev : %u\n" + "\tfeatures :%s\n" /* don't change this---it _is_ right! */ + "\tcpu number : %lu\n" + "\tcpu regs : %u\n" + "\tcpu MHz : %lu.%06lu\n" + "\titc MHz : %lu.%06lu\n" + "\tBogoMIPS : %lu.%02lu\n\n", + c - cpu_data, c->vendor, family, model, c->revision, c->archrev, + features, + c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000, + c->itc_freq / 1000000, c->itc_freq % 1000000, + loops_per_sec() / 500000, (loops_per_sec() / 5000) % 100); + } + return p - buffer; +} + +void +identify_cpu (struct cpuinfo_ia64 *c) +{ + union { + unsigned long bits[5]; + struct { + /* id 0 & 1: */ + char vendor[16]; + + /* id 2 */ + u64 ppn; /* processor serial number */ + + /* id 3: */ + unsigned number : 8; + unsigned revision : 8; + unsigned model : 8; + unsigned family : 8; + unsigned archrev : 8; + unsigned reserved : 24; + + /* id 4: */ + u64 features; + } field; + } cpuid; + int i; + + for (i = 0; i < 5; ++i) { + cpuid.bits[i] = ia64_get_cpuid(i); + } + +#ifdef CONFIG_SMP + /* + * XXX Instead of copying the ITC info from the bootstrap + * processor, ia64_init_itm() should be done per CPU. That + * should get you the right info. --davidm 1/24/00 + */ + if (c != &cpu_data[bootstrap_processor]) { + memset(c, 0, sizeof(struct cpuinfo_ia64)); + c->proc_freq = cpu_data[bootstrap_processor].proc_freq; + c->itc_freq = cpu_data[bootstrap_processor].itc_freq; + c->cyc_per_usec = cpu_data[bootstrap_processor].cyc_per_usec; + c->usec_per_cyc = cpu_data[bootstrap_processor].usec_per_cyc; + } +#else + memset(c, 0, sizeof(struct cpuinfo_ia64)); +#endif + + memcpy(c->vendor, cpuid.field.vendor, 16); +#ifdef CONFIG_IA64_SOFTSDV_HACKS + /* BUG: SoftSDV doesn't support the cpuid registers. */ + if (c->vendor[0] == '\0') + memcpy(c->vendor, "Intel", 6); +#endif + c->ppn = cpuid.field.ppn; + c->number = cpuid.field.number; + c->revision = cpuid.field.revision; + c->model = cpuid.field.model; + c->family = cpuid.field.family; + c->archrev = cpuid.field.archrev; + c->features = cpuid.field.features; +#ifdef CONFIG_SMP + c->loops_per_sec = loops_per_sec; +#endif +} + +/* + * cpu_init() initializes state that is per-CPU. This function acts + * as a 'CPU state barrier', nothing should get across. + */ +void +cpu_init (void) +{ + int nr = smp_processor_id(); + + /* Clear the stack memory reserved for pt_regs: */ + memset(ia64_task_regs(current), 0, sizeof(struct pt_regs)); + + /* + * Initialize default control register to defer speculative + * faults. On a speculative load, we want to defer access + * right, key miss, and key permission faults. We currently + * do NOT defer TLB misses, page-not-present, access bit, or + * debug faults but kernel code should not rely on any + * particular setting of these bits. + */ + ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP); + ia64_set_fpu_owner(0); /* initialize ar.k5 */ + + if (test_and_set_bit(nr, &cpu_initialized)) { + printk("CPU#%d already initialized!\n", nr); + machine_halt(); + } + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; +} diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c new file mode 100644 index 000000000..19be1f840 --- /dev/null +++ b/arch/ia64/kernel/signal.c @@ -0,0 +1,537 @@ +/* + * Architecture-specific signal handling support. + * + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Derived from i386 and Alpha versions. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/wait.h> + +#include <asm/ia32.h> +#include <asm/uaccess.h> +#include <asm/rse.h> +#include <asm/sigcontext.h> + +#define DEBUG_SIG 0 +#define STACK_ALIGN 16 /* minimal alignment for stack pointer */ +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#if _NSIG_WORDS > 1 +# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t)) +# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t)) +#else +# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0]) +# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) +#endif + +struct sigframe { + struct siginfo info; + struct sigcontext sc; +}; + +extern long sys_wait4 (int, int *, int, struct rusage *); +extern long ia64_do_signal (sigset_t *, struct pt_regs *, long); /* forward decl */ + +long +ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct pt_regs *pt) +{ + sigset_t oldset, set; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (GET_SIGSET(&set, uset)) + return -EFAULT; + + sigdelsetmask(&set, ~_BLOCKABLE); + + spin_lock_irq(¤t->sigmask_lock); + { + oldset = current->blocked; + current->blocked = set; + recalc_sigpending(current); + } + spin_unlock_irq(¤t->sigmask_lock); + + /* + * The return below usually returns to the signal handler. We need to + * pre-set the correct error code here to ensure that the right values + * get saved in sigcontext by ia64_do_signal. + */ + pt->r8 = EINTR; + pt->r10 = -1; + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + if (ia64_do_signal(&oldset, pt, 1)) + return -EINTR; + } +} + +asmlinkage long +sys_sigaltstack (const stack_t *uss, stack_t *uoss, long arg2, long arg3, long arg4, + long arg5, long arg6, long arg7, long stack) +{ + struct pt_regs *pt = (struct pt_regs *) &stack; + + return do_sigaltstack(uss, uoss, pt->r12); +} + +static long +restore_sigcontext (struct sigcontext *sc, struct pt_regs *pt) +{ + struct switch_stack *sw = (struct switch_stack *) pt - 1; + unsigned long ip, flags, nat, um; + long err; + + /* restore scratch that always needs gets updated during signal delivery: */ + err = __get_user(flags, &sc->sc_flags); + + err |= __get_user(nat, &sc->sc_nat); + err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */ + err |= __get_user(pt->ar_fpsr, &sc->sc_ar_fpsr); + err |= __get_user(pt->ar_pfs, &sc->sc_ar_pfs); + err |= __get_user(um, &sc->sc_um); /* user mask */ + err |= __get_user(pt->ar_rsc, &sc->sc_ar_rsc); + err |= __get_user(pt->ar_ccv, &sc->sc_ar_ccv); + err |= __get_user(pt->ar_unat, &sc->sc_ar_unat); + err |= __get_user(pt->pr, &sc->sc_pr); /* predicates */ + err |= __get_user(pt->b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __get_user(pt->b6, &sc->sc_br[6]); + err |= __copy_from_user(&pt->r1, &sc->sc_gr[1], 3*8); /* r1-r3 */ + err |= __copy_from_user(&pt->r8, &sc->sc_gr[8], 4*8); /* r8-r11 */ + err |= __copy_from_user(&pt->r12, &sc->sc_gr[12], 4*8); /* r12-r15 */ + err |= __copy_from_user(&pt->r16, &sc->sc_gr[16], 16*8); /* r16-r31 */ + + /* establish new instruction pointer: */ + pt->cr_iip = ip & ~0x3UL; + ia64_psr(pt)->ri = ip & 0x3; + pt->cr_ipsr = (pt->cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM); + + ia64_put_nat_bits (pt, sw, nat); /* restore the original scratch NaT bits */ + + if (flags & IA64_SC_FLAG_FPH_VALID) { + struct task_struct *fpu_owner = ia64_get_fpu_owner(); + + __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); + if (fpu_owner == current) { + __ia64_load_fpu(current->thread.fph); + } + } + return err; +} + +/* + * When we get here, ((struct switch_stack *) pt - 1) is a + * switch_stack frame that has no defined value. Upon return, we + * expect sw->caller_unat to contain the new unat value. The reason + * we use a full switch_stack frame is so everything is symmetric + * with ia64_do_signal(). + */ +long +ia64_rt_sigreturn (struct pt_regs *pt) +{ + extern char ia64_strace_leave_kernel, ia64_leave_kernel; + struct sigcontext *sc; + struct siginfo si; + sigset_t set; + long retval; + + sc = &((struct sigframe *) (pt->r12 + 16))->sc; + + /* + * When we return to the previously executing context, r8 and + * r10 have already been setup the way we want them. Indeed, + * if the signal wasn't delivered while in a system call, we + * must not touch r8 or r10 as otherwise user-level stat could + * be corrupted. + */ + retval = (long) &ia64_leave_kernel | 1; + if ((current->flags & PF_TRACESYS) + && (sc->sc_flags & IA64_SC_FLAG_IN_SYSCALL)) + retval = (long) &ia64_strace_leave_kernel; + + if (!access_ok(VERIFY_READ, sc, sizeof(*sc))) + goto give_sigsegv; + + if (GET_SIGSET(&set, &sc->sc_mask)) + goto give_sigsegv; + + sigdelsetmask(&set, ~_BLOCKABLE); + spin_lock_irq(¤t->sigmask_lock); + current->blocked = set; + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + + if (restore_sigcontext(sc, pt)) + goto give_sigsegv; + +#if DEBUG_SIG + printk("SIG return (%s:%d): sp=%lx ip=%lx\n", + current->comm, current->pid, pt->r12, pt->cr_iip); +#endif + /* + * It is more difficult to avoid calling this function than to + * call it and ignore errors. + */ + do_sigaltstack(&sc->sc_stack, 0, pt->r12); + return retval; + + give_sigsegv: + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_pid = current->pid; + si.si_uid = current->uid; + si.si_addr = sc; + force_sig_info(SIGSEGV, &si, current); + return retval; +} + +/* + * This does just the minimum required setup of sigcontext. + * Specifically, it only installs data that is either not knowable at + * the user-level or that gets modified before execution in the + * trampoline starts. Everything else is done at the user-level. + */ +static long +setup_sigcontext (struct sigcontext *sc, sigset_t *mask, struct pt_regs *pt) +{ + struct switch_stack *sw = (struct switch_stack *) pt - 1; + struct task_struct *fpu_owner = ia64_get_fpu_owner(); + unsigned long flags = 0, ifs, nat; + long err; + + ifs = pt->cr_ifs; + + if (on_sig_stack((unsigned long) sc)) + flags |= IA64_SC_FLAG_ONSTACK; + if ((ifs & (1UL << 63)) == 0) { + /* if cr_ifs isn't valid, we got here through a syscall */ + flags |= IA64_SC_FLAG_IN_SYSCALL; + } + if ((fpu_owner == current) || (current->thread.flags & IA64_THREAD_FPH_VALID)) { + flags |= IA64_SC_FLAG_FPH_VALID; + if (fpu_owner == current) { + __ia64_save_fpu(current->thread.fph); + } + __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16); + } + + /* + * Note: sw->ar_unat is UNDEFINED unless the process is being + * PTRACED. However, this is OK because the NaT bits of the + * preserved registers (r4-r7) are never being looked at by + * the signal handler (register r4-r7 are used instead). + */ + nat = ia64_get_nat_bits(pt, sw); + + err = __put_user(flags, &sc->sc_flags); + err |= __put_user(nat, &sc->sc_nat); + err |= PUT_SIGSET(mask, &sc->sc_mask); + err |= __put_user(pt->cr_ipsr & IA64_PSR_UM, &sc->sc_um); + err |= __put_user(pt->ar_rsc, &sc->sc_ar_rsc); + err |= __put_user(pt->ar_ccv, &sc->sc_ar_ccv); + err |= __put_user(pt->ar_unat, &sc->sc_ar_unat); /* ar.unat */ + err |= __put_user(pt->ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */ + err |= __put_user(pt->ar_pfs, &sc->sc_ar_pfs); + err |= __put_user(pt->pr, &sc->sc_pr); /* predicates */ + err |= __put_user(pt->b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __put_user(pt->b6, &sc->sc_br[6]); /* b6 */ + err |= __put_user(pt->b7, &sc->sc_br[7]); /* b7 */ + + err |= __copy_to_user(&sc->sc_gr[1], &pt->r1, 3*8); /* r1-r3 */ + err |= __copy_to_user(&sc->sc_gr[8], &pt->r8, 4*8); /* r8-r11 */ + err |= __copy_to_user(&sc->sc_gr[12], &pt->r12, 4*8); /* r12-r15 */ + err |= __copy_to_user(&sc->sc_gr[16], &pt->r16, 16*8); /* r16-r31 */ + + err |= __put_user(pt->cr_iip + ia64_psr(pt)->ri, &sc->sc_ip); + err |= __put_user(pt->r12, &sc->sc_gr[12]); /* r12 */ + return err; +} + +static long +setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *pt) +{ + struct switch_stack *sw = (struct switch_stack *) pt - 1; + extern char ia64_sigtramp[], __start_gate_section[]; + unsigned long tramp_addr, new_rbs = 0; + struct sigframe *frame; + struct siginfo si; + long err; + + frame = (void *) pt->r12; + tramp_addr = GATE_ADDR + (ia64_sigtramp - __start_gate_section); + if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && !on_sig_stack((unsigned long) frame)) { + new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1); + frame = (void *) ((current->sas_ss_sp + current->sas_ss_size) + & ~(STACK_ALIGN - 1)); + } + frame = (void *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto give_sigsegv; + + err = __copy_to_user(&frame->info, info, sizeof(siginfo_t)); + + err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp); + err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size); + err |= __put_user(sas_ss_flags(pt->r12), &frame->sc.sc_stack.ss_flags); + err |= setup_sigcontext(&frame->sc, set, pt); + + if (err) + goto give_sigsegv; + + pt->r12 = (unsigned long) frame - 16; /* new stack pointer */ + pt->r2 = sig; /* signal number */ + pt->r3 = (unsigned long) ka->sa.sa_handler; /* addr. of handler's proc. descriptor */ + pt->r15 = new_rbs; + pt->ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */ + pt->cr_iip = tramp_addr; + ia64_psr(pt)->ri = 0; /* start executing in first slot */ + + /* + * Note: this affects only the NaT bits of the scratch regs + * (the ones saved in pt_regs, which is exactly what we want. + * The NaT bits for the preserved regs (r4-r7) are in + * sw->ar_unat iff this process is being PTRACED. + */ + sw->caller_unat = 0; /* ensure NaT bits of at least r2, r3, r12, and r15 are clear */ + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%lx\n", + current->comm, current->pid, sig, pt->r12, pt->cr_iip, pt->r3); +#endif + return 1; + + give_sigsegv: + if (sig == SIGSEGV) + ka->sa.sa_handler = SIG_DFL; + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_pid = current->pid; + si.si_uid = current->uid; + si.si_addr = frame; + force_sig_info(SIGSEGV, &si, current); + return 0; +} + +static long +handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, + struct pt_regs *pt) +{ +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(pt)) { + /* send signal to IA-32 process */ + if (!ia32_setup_frame1(sig, ka, info, oldset, pt)) + return 0; + } else +#endif + /* send signal to IA-64 process */ + if (!setup_frame(sig, ka, info, oldset, pt)) + return 0; + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + + if (!(ka->sa.sa_flags & SA_NODEFER)) { + spin_lock_irq(¤t->sigmask_lock); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + sigaddset(¤t->blocked, sig); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + } + return 1; +} + +/* + * When we get here, `pt' points to struct pt_regs and ((struct + * switch_stack *) pt - 1) points to a switch stack structure. + * HOWEVER, in the normal case, the ONLY value valid in the + * switch_stack is the caller_unat field. The entire switch_stack is + * valid ONLY if current->flags has PF_PTRACED set. + * + * Note that `init' is a special process: it doesn't get signals it + * doesn't want to handle. Thus you cannot kill init even with a + * SIGKILL even by mistake. + * + * Note that we go through the signals twice: once to check the + * signals that the kernel can handle, and then we build all the + * user-level signal handling stack-frames in one go after that. + */ +long +ia64_do_signal (sigset_t *oldset, struct pt_regs *pt, long in_syscall) +{ + struct k_sigaction *ka; + siginfo_t info; + long restart = in_syscall; + + /* + * In the ia64_leave_kernel code path, we want the common case + * to go fast, which is why we may in certain cases get here + * from kernel mode. Just return without doing anything if so. + */ + if (!user_mode(pt)) + return 0; + + if (!oldset) + oldset = ¤t->blocked; + + if (pt->r10 != -1) { + /* + * A system calls has to be restarted only if one of + * the error codes ERESTARTNOHAND, ERESTARTSYS, or + * ERESTARTNOINTR is returned. If r10 isn't -1 then + * r8 doesn't hold an error code and we don't need to + * restart the syscall, so we set in_syscall to zero. + */ + restart = 0; + } + + for (;;) { + unsigned long signr; + + spin_lock_irq(¤t->sigmask_lock); + signr = dequeue_signal(¤t->blocked, &info); + spin_unlock_irq(¤t->sigmask_lock); + + if (!signr) + break; + + if ((current->flags & PF_PTRACED) && signr != SIGKILL) { + /* Let the debugger run. */ + current->exit_code = signr; + set_current_state(TASK_STOPPED); + notify_parent(current, SIGCHLD); + schedule(); + signr = current->exit_code; + + /* We're back. Did the debugger cancel the sig? */ + if (!signr) + continue; + current->exit_code = 0; + + /* The debugger continued. Ignore SIGSTOP. */ + if (signr == SIGSTOP) + continue; + + /* Update the siginfo structure. Is this good? */ + if (signr != info.si_signo) { + info.si_signo = signr; + info.si_errno = 0; + info.si_code = SI_USER; + info.si_pid = current->p_pptr->pid; + info.si_uid = current->p_pptr->uid; + } + + /* If the (new) signal is now blocked, requeue it. */ + if (sigismember(¤t->blocked, signr)) { + send_sig_info(signr, &info, current); + continue; + } + } + + ka = ¤t->sig->action[signr - 1]; + if (ka->sa.sa_handler == SIG_IGN) { + if (signr != SIGCHLD) + continue; + /* Check for SIGCHLD: it's special. */ + while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) + /* nothing */; + continue; + } + + if (ka->sa.sa_handler == SIG_DFL) { + int exit_code = signr; + + /* Init gets no signals it doesn't want. */ + if (current->pid == 1) + continue; + + switch (signr) { + case SIGCONT: case SIGCHLD: case SIGWINCH: + continue; + + case SIGTSTP: case SIGTTIN: case SIGTTOU: + if (is_orphaned_pgrp(current->pgrp)) + continue; + /* FALLTHRU */ + + case SIGSTOP: + set_current_state(TASK_STOPPED); + current->exit_code = signr; + if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags + & SA_NOCLDSTOP)) + notify_parent(current, SIGCHLD); + schedule(); + continue; + + case SIGQUIT: case SIGILL: case SIGTRAP: + case SIGABRT: case SIGFPE: case SIGSEGV: + case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: + if (do_coredump(signr, pt)) + exit_code |= 0x80; + /* FALLTHRU */ + + default: + lock_kernel(); + sigaddset(¤t->signal, signr); + recalc_sigpending(current); + current->flags |= PF_SIGNALED; + do_exit(exit_code); + /* NOTREACHED */ + } + } + + if (restart) { + switch (pt->r8) { + case ERESTARTSYS: + if ((ka->sa.sa_flags & SA_RESTART) == 0) { + case ERESTARTNOHAND: + pt->r8 = EINTR; + /* note: pt->r10 is already -1 */ + break; + } + case ERESTARTNOINTR: + ia64_decrement_ip(pt); + } + } + + /* Whee! Actually deliver the signal. If the + delivery failed, we need to continue to iterate in + this loop so we can deliver the SIGSEGV... */ + if (handle_signal(signr, ka, &info, oldset, pt)) + return 1; + } + + /* Did we come from a system call? */ + if (restart) { + /* Restart the system call - no handlers present */ + if (pt->r8 == ERESTARTNOHAND || + pt->r8 == ERESTARTSYS || + pt->r8 == ERESTARTNOINTR) { + /* + * Note: the syscall number is in r15 which is + * saved in pt_regs so all we need to do here + * is adjust ip so that the "break" + * instruction gets re-executed. + */ + ia64_decrement_ip(pt); + } + } + return 0; +} diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c new file mode 100644 index 000000000..48a3d68b4 --- /dev/null +++ b/arch/ia64/kernel/smp.c @@ -0,0 +1,777 @@ +/* + * SMP Support + * + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * + * Lots of stuff stolen from arch/alpha/kernel/smp.c + * + * 99/10/05 davidm Update to bring it in sync with new command-line processing scheme. + */ +#define __KERNEL_SYSCALLS__ + +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/kernel_stat.h> +#include <linux/mm.h> + +#include <asm/atomic.h> +#include <asm/bitops.h> +#include <asm/current.h> +#include <asm/delay.h> + +#ifdef CONFIG_KDB +#include <linux/kdb.h> +void smp_kdb_interrupt (struct pt_regs* regs); +void kdb_global(int cpuid); +extern unsigned long smp_kdb_wait; +extern int kdb_new_cpu; +#endif + +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/sal.h> +#include <asm/system.h> +#include <asm/unistd.h> + +extern int cpu_idle(void * unused); +extern void _start(void); + +extern int cpu_now_booting; /* Used by head.S to find idle task */ +extern unsigned long cpu_initialized; /* Bitmap of available cpu's */ +extern struct cpuinfo_ia64 cpu_data[NR_CPUS]; /* Duh... */ + +spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; + +#ifdef CONFIG_KDB +unsigned long cpu_online_map = 1; +#endif + +volatile int cpu_number_map[NR_CPUS] = { -1, }; /* SAPIC ID -> Logical ID */ +volatile int __cpu_logical_map[NR_CPUS] = { -1, }; /* logical ID -> SAPIC ID */ +int smp_num_cpus = 1; +int bootstrap_processor = -1; /* SAPIC ID of BSP */ +int smp_threads_ready = 0; /* Set when the idlers are all forked */ +unsigned long ipi_base_addr = IPI_DEFAULT_BASE_ADDR; /* Base addr of IPI table */ +cycles_t cacheflush_time = 0; +unsigned long ap_wakeup_vector = -1; /* External Int to use to wakeup AP's */ +static int max_cpus = -1; /* Command line */ +static unsigned long ipi_op[NR_CPUS]; +struct smp_call_struct { + void (*func) (void *info); + void *info; + long wait; + atomic_t unstarted_count; + atomic_t unfinished_count; +}; +static struct smp_call_struct *smp_call_function_data; + +#ifdef CONFIG_KDB +unsigned long smp_kdb_wait = 0; /* Bitmask of waiters */ +#endif + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC +extern spinlock_t ivr_read_lock; +#endif + +int use_xtp = 0; /* XXX */ + +#define IPI_RESCHEDULE 0 +#define IPI_CALL_FUNC 1 +#define IPI_CPU_STOP 2 +#define IPI_KDB_INTERRUPT 4 + +/* + * Setup routine for controlling SMP activation + * + * Command-line option of "nosmp" or "maxcpus=0" will disable SMP + * activation entirely (the MPS table probe still happens, though). + * + * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer + * greater than 0, limits the maximum number of CPUs activated in + * SMP mode to <NUM>. + */ + +static int __init nosmp(char *str) +{ + max_cpus = 0; + return 1; +} + +__setup("nosmp", nosmp); + +static int __init maxcpus(char *str) +{ + get_option(&str, &max_cpus); + return 1; +} + +__setup("maxcpus=", maxcpus); + +/* + * Yoink this CPU from the runnable list... + */ +void +halt_processor(void) +{ + clear_bit(smp_processor_id(), &cpu_initialized); + max_xtp(); + __cli(); + for (;;) + ; + +} + +void +handle_IPI(int irq, void *dev_id, struct pt_regs *regs) +{ + int this_cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_op[this_cpu]; + unsigned long ops; + + /* Count this now; we may make a call that never returns. */ + cpu_data[this_cpu].ipi_count++; + + mb(); /* Order interrupt and bit testing. */ + while ((ops = xchg(pending_ipis, 0)) != 0) { + mb(); /* Order bit clearing and data access. */ + do { + unsigned long which; + + which = ffz(~ops); + ops &= ~(1 << which); + + switch (which) { + case IPI_RESCHEDULE: + /* + * Reschedule callback. Everything to be done is done by the + * interrupt return path. + */ + break; + + case IPI_CALL_FUNC: + { + struct smp_call_struct *data; + void (*func)(void *info); + void *info; + int wait; + + data = smp_call_function_data; + func = data->func; + info = data->info; + wait = data->wait; + + mb(); + atomic_dec (&data->unstarted_count); + + /* At this point the structure may be gone unless wait is true. */ + (*func)(info); + + /* Notify the sending CPU that the task is done. */ + mb(); + if (wait) + atomic_dec (&data->unfinished_count); + } + break; + + case IPI_CPU_STOP: + halt_processor(); + break; + +#ifdef CONFIG_KDB + case IPI_KDB_INTERRUPT: + smp_kdb_interrupt(regs); + break; +#endif + + default: + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); + break; + } /* Switch */ + } while (ops); + + mb(); /* Order data access and bit testing. */ + } +} + +static inline void +send_IPI(int dest_cpu, unsigned char vector) +{ + unsigned long ipi_addr; + unsigned long ipi_data; +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + unsigned long flags; +#endif + + ipi_data = vector; + ipi_addr = ipi_base_addr | ((dest_cpu << 8) << 4); /* 16-bit SAPIC ID's; assume CPU bus 0 */ + mb(); + +#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC + /* + * Disable IVR reads + */ + spin_lock_irqsave(&ivr_read_lock, flags); + writeq(ipi_data, ipi_addr); + spin_unlock_irqrestore(&ivr_read_lock, flags); +#else + writeq(ipi_data, ipi_addr); +#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ + +} + +static inline void +send_IPI_single(int dest_cpu, int op) +{ + + if (dest_cpu == -1) + return; + + ipi_op[dest_cpu] |= (1 << op); + send_IPI(dest_cpu, IPI_IRQ); +} + +static inline void +send_IPI_allbutself(int op) +{ + int i; + int cpu_id = 0; + + for (i = 0; i < smp_num_cpus; i++) { + cpu_id = __cpu_logical_map[i]; + if (cpu_id != smp_processor_id()) + send_IPI_single(cpu_id, op); + } +} + +static inline void +send_IPI_all(int op) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) + send_IPI_single(__cpu_logical_map[i], op); +} + +static inline void +send_IPI_self(int op) +{ + send_IPI_single(smp_processor_id(), op); +} + +void +smp_send_reschedule(int cpu) +{ + send_IPI_single(cpu, IPI_RESCHEDULE); +} + +void +smp_send_stop(void) +{ + send_IPI_allbutself(IPI_CPU_STOP); +} + +/* + * Run a function on all other CPUs. + * <func> The function to run. This must be fast and non-blocking. + * <info> An arbitrary pointer to pass to the function. + * <retry> If true, keep retrying until ready. + * <wait> If true, wait until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. + * + * Does not return until remote CPUs are nearly ready to execute <func> + * or are or have executed. + */ + +int +smp_call_function (void (*func) (void *info), void *info, int retry, int wait) +{ + struct smp_call_struct data; + long timeout; + static spinlock_t lock = SPIN_LOCK_UNLOCKED; + + data.func = func; + data.info = info; + data.wait = wait; + atomic_set(&data.unstarted_count, smp_num_cpus - 1); + atomic_set(&data.unfinished_count, smp_num_cpus - 1); + + if (retry) { + while (1) { + if (smp_call_function_data) { + schedule (); /* Give a mate a go */ + continue; + } + spin_lock (&lock); + if (smp_call_function_data) { + spin_unlock (&lock); /* Bad luck */ + continue; + } + /* Mine, all mine! */ + break; + } + } + else { + if (smp_call_function_data) + return -EBUSY; + spin_lock (&lock); + if (smp_call_function_data) { + spin_unlock (&lock); + return -EBUSY; + } + } + + smp_call_function_data = &data; + spin_unlock (&lock); + data.func = func; + data.info = info; + atomic_set (&data.unstarted_count, smp_num_cpus - 1); + data.wait = wait; + if (wait) + atomic_set (&data.unfinished_count, smp_num_cpus - 1); + + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_allbutself(IPI_CALL_FUNC); + + /* Wait for response */ + timeout = jiffies + HZ; + while ( (atomic_read (&data.unstarted_count) > 0) && + time_before (jiffies, timeout) ) + barrier (); + if (atomic_read (&data.unstarted_count) > 0) { + smp_call_function_data = NULL; + return -ETIMEDOUT; + } + if (wait) + while (atomic_read (&data.unfinished_count) > 0) + barrier (); + smp_call_function_data = NULL; + return 0; +} + +/* + * Flush all other CPU's tlb and then mine. Do this with smp_call_function() as we + * want to ensure all TLB's flushed before proceeding. + * + * XXX: Is it OK to use the same ptc.e info on all cpus? + */ +void +smp_flush_tlb_all(void) +{ + smp_call_function((void (*)(void *))__flush_tlb_all, NULL, 1, 1); + __flush_tlb_all(); +} + +/* + * Ideally sets up per-cpu profiling hooks. Doesn't do much now... + */ +static inline void __init +smp_setup_percpu_timer(int cpuid) +{ + cpu_data[cpuid].prof_counter = 1; + cpu_data[cpuid].prof_multiplier = 1; +} + +void +smp_do_timer(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + int user = user_mode(regs); + struct cpuinfo_ia64 *data = &cpu_data[cpu]; + + extern void update_one_process(struct task_struct *, unsigned long, unsigned long, + unsigned long, int); + if (!--data->prof_counter) { + irq_enter(cpu, TIMER_IRQ); + + update_one_process(current, 1, user, !user, cpu); + if (current->pid) { + if (--current->counter < 0) { + current->counter = 0; + current->need_resched = 1; + } + + if (user) { + if (current->priority < DEF_PRIORITY) { + kstat.cpu_nice++; + kstat.per_cpu_nice[cpu]++; + } else { + kstat.cpu_user++; + kstat.per_cpu_user[cpu]++; + } + } else { + kstat.cpu_system++; + kstat.per_cpu_system[cpu]++; + } + } + + data->prof_counter = data->prof_multiplier; + irq_exit(cpu, TIMER_IRQ); + } +} + + +/* + * Called by both boot and secondaries to move global data into + * per-processor storage. + */ +static inline void __init +smp_store_cpu_info(int cpuid) +{ + struct cpuinfo_ia64 *c = &cpu_data[cpuid]; + + identify_cpu(c); +} + +/* + * SAL shoves the AP's here when we start them. Physical mode, no kernel TR, + * no RRs set, better than even chance that psr is bogus. Fix all that and + * call _start. In effect, pretend to be lilo. + * + * Stolen from lilo_start.c. Thanks David! + */ +void +start_ap(void) +{ + unsigned long flags; + + /* + * Install a translation register that identity maps the + * kernel's 256MB page(s). + */ + ia64_clear_ic(flags); + ia64_set_rr( 0, (0x1000 << 8) | (_PAGE_SIZE_1M << 2)); + ia64_set_rr(PAGE_OFFSET, (ia64_rid(0, PAGE_OFFSET) << 8) | (_PAGE_SIZE_256M << 2)); + ia64_itr(0x3, 1, PAGE_OFFSET, + pte_val(mk_pte_phys(0, __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX))), + _PAGE_SIZE_256M); + + flags = (IA64_PSR_IT | IA64_PSR_IC | IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH | + IA64_PSR_BN); + + asm volatile ("movl r8 = 1f\n" + ";;\n" + "mov cr.ipsr=%0\n" + "mov cr.iip=r8\n" + "mov cr.ifs=r0\n" + ";;\n" + "rfi;;" + "1:\n" + "movl r1 = __gp" :: "r"(flags) : "r8"); + _start(); +} + + +/* + * AP's start using C here. + */ +void __init +smp_callin(void) +{ + extern void ia64_rid_init(void); + extern void ia64_init_itm(void); + extern void ia64_cpu_local_tick(void); + + ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP); + ia64_set_fpu_owner(0); + ia64_rid_init(); /* initialize region ids */ + + cpu_init(); + __flush_tlb_all(); + + smp_store_cpu_info(smp_processor_id()); + smp_setup_percpu_timer(smp_processor_id()); + + while (!smp_threads_ready) + mb(); + + normal_xtp(); + + /* setup the CPU local timer tick */ + ia64_cpu_local_tick(); + + /* Disable all local interrupts */ + ia64_set_lrr0(0, 1); + ia64_set_lrr1(0, 1); + + __sti(); /* Interrupts have been off till now. */ + cpu_idle(NULL); +} + +/* + * Create the idle task for a new AP. DO NOT use kernel_thread() because + * that could end up calling schedule() in the ia64_leave_kernel exit + * path in which case the new idle task could get scheduled before we + * had a chance to remove it from the run-queue... + */ +static int __init +fork_by_hand(void) +{ + /* + * Don't care about the usp and regs settings since we'll never + * reschedule the forked task. + */ + return do_fork(CLONE_VM|CLONE_PID, 0, 0); +} + +/* + * Bring one cpu online. + * + * NB: cpuid is the CPU BUS-LOCAL ID, not the entire SAPIC ID. See asm/smp.h. + */ +static int __init +smp_boot_one_cpu(int cpuid, int cpunum) +{ + struct task_struct *idle; + long timeout; + + /* + * Create an idle task for this CPU. Note that the address we + * give to kernel_thread is irrelevant -- it's going to start + * where OS_BOOT_RENDEVZ vector in SAL says to start. But + * this gets all the other task-y sort of data structures set + * up like we wish. We need to pull the just created idle task + * off the run queue and stuff it into the init_tasks[] array. + * Sheesh . . . + */ + if (fork_by_hand() < 0) + panic("failed fork for CPU %d", cpuid); + /* + * We remove it from the pidhash and the runqueue + * once we got the process: + */ + idle = init_task.prev_task; + if (!idle) + panic("No idle process for CPU %d", cpuid); + init_tasks[cpunum] = idle; + del_from_runqueue(idle); + unhash_process(idle); + + /* Schedule the first task manually. */ + idle->processor = cpuid; + idle->has_cpu = 1; + + /* Let _start know what logical CPU we're booting (offset into init_tasks[] */ + cpu_now_booting = cpunum; + + /* Kick the AP in the butt */ + send_IPI(cpuid, ap_wakeup_vector); + ia64_srlz_i(); + mb(); + + /* + * OK, wait a bit for that CPU to finish staggering about. smp_callin() will + * call cpu_init() which will set a bit for this AP. When that bit flips, the AP + * is waiting for smp_threads_ready to be 1 and we can move on. + */ + for (timeout = 0; timeout < 100000; timeout++) { + if (test_bit(cpuid, &cpu_initialized)) + goto alive; + udelay(10); + barrier(); + } + + printk(KERN_ERR "SMP: Processor %d is stuck.\n", cpuid); + return -1; + +alive: + /* Remember the AP data */ + cpu_number_map[cpuid] = cpunum; +#ifdef CONFIG_KDB + cpu_online_map |= (1<<cpunum); + printk ("DEBUGGER: cpu_online_map = 0x%08x\n", cpu_online_map); +#endif + __cpu_logical_map[cpunum] = cpuid; + return 0; +} + + + +/* + * Called by smp_init bring all the secondaries online and hold them. + * XXX: this is ACPI specific; it uses "magic" variables exported from acpi.c + * to 'discover' the AP's. Blech. + */ +void __init +smp_boot_cpus(void) +{ + int i, cpu_count = 1; + unsigned long bogosum; + int sapic_id; + extern int acpi_cpus; + extern int acpi_apic_map[32]; + + /* Take care of some initial bookkeeping. */ + memset(&cpu_number_map, -1, sizeof(cpu_number_map)); + memset(&__cpu_logical_map, -1, sizeof(__cpu_logical_map)); + memset(&ipi_op, 0, sizeof(ipi_op)); + + /* Setup BSP mappings */ + cpu_number_map[bootstrap_processor] = 0; + __cpu_logical_map[0] = bootstrap_processor; + current->processor = bootstrap_processor; + + /* Mark BSP booted and get active_mm context */ + cpu_init(); + + /* reset XTP for interrupt routing */ + normal_xtp(); + + /* And generate an entry in cpu_data */ + smp_store_cpu_info(bootstrap_processor); +#if 0 + smp_tune_scheduling(); +#endif + smp_setup_percpu_timer(bootstrap_processor); + + init_idle(); + + /* Nothing to do when told not to. */ + if (max_cpus == 0) { + printk(KERN_INFO "SMP mode deactivated.\n"); + return; + } + + if (acpi_cpus > 1) { + printk(KERN_INFO "SMP: starting up secondaries.\n"); + + for (i = 0; i < NR_CPUS; i++) { + if (acpi_apic_map[i] == -1 || + acpi_apic_map[i] == bootstrap_processor << 8) /* XXX Fix me Walt */ + continue; + + /* + * IA64 SAPIC ID's are 16-bits. See asm/smp.h for more info + */ + sapic_id = acpi_apic_map[i] >> 8; + if (smp_boot_one_cpu(sapic_id, cpu_count)) + continue; + + cpu_count++; /* Count good CPUs only... */ + } + } + + if (cpu_count == 1) { + printk(KERN_ERR "SMP: Bootstrap processor only.\n"); + return; + } + + bogosum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (cpu_initialized & (1L << i)) + bogosum += cpu_data[i].loops_per_sec; + } + + printk(KERN_INFO "SMP: Total of %d processors activated " + "(%lu.%02lu BogoMIPS).\n", + cpu_count, (bogosum + 2500) / 500000, + ((bogosum + 2500) / 5000) % 100); + + smp_num_cpus = cpu_count; +} + +/* + * Called from main.c by each AP. + */ +void __init +smp_commence(void) +{ + mb(); +} + +/* + * Not used; part of the i386 bringup + */ +void __init +initialize_secondary(void) +{ +} + +int __init +setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +/* + * Assume that CPU's have been discovered by some platform-dependant + * interface. For SoftSDV/Lion, that would be ACPI. + * + * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). + * + * So this just gets the BSP SAPIC ID and print's it out. Dull, huh? + * + * Not anymore. This also registers the AP OS_MC_REDVEZ address with SAL. + */ +void __init +init_smp_config(void) +{ + struct fptr { + unsigned long fp; + unsigned long gp; + } *ap_startup; + long sal_ret; + + /* Grab the BSP ID */ + bootstrap_processor = hard_smp_processor_id(); + + /* Tell SAL where to drop the AP's. */ + ap_startup = (struct fptr *) start_ap; + sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, + __pa(ap_startup->fp), __pa(ap_startup->gp), 0, + 0, 0, 0); + if (sal_ret < 0) { + printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n", ia64_sal_strerror(sal_ret)); + printk(" Forcing UP mode\n"); + smp_num_cpus = 1; + } + +} + +#ifdef CONFIG_KDB +void smp_kdb_stop (int all, struct pt_regs* regs) +{ + if (all) + { + printk ("Sending IPI to all on CPU %i\n", smp_processor_id ()); + smp_kdb_wait = 0xffffffff; + clear_bit (smp_processor_id(), &smp_kdb_wait); + send_IPI_allbutself (IPI_KDB_INTERRUPT); + } + else + { + printk ("Sending IPI to self on CPU %i\n", + smp_processor_id ()); + set_bit (smp_processor_id(), &smp_kdb_wait); + clear_bit (__cpu_logical_map[kdb_new_cpu], &smp_kdb_wait); + smp_kdb_interrupt (regs); + } +} + +void smp_kdb_interrupt (struct pt_regs* regs) +{ + printk ("kdb: IPI on CPU %i with mask 0x%08x\n", + smp_processor_id (), smp_kdb_wait); + + /* All CPUs spin here forever */ + while (test_bit (smp_processor_id(), &smp_kdb_wait)); + + /* Enter KDB on CPU selected by KDB on the last CPU */ + if (__cpu_logical_map[kdb_new_cpu] == smp_processor_id ()) + { + kdb (KDB_REASON_SWITCH, 0, regs); + } +} + +#endif + diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c new file mode 100644 index 000000000..18a498a09 --- /dev/null +++ b/arch/ia64/kernel/sys_ia64.c @@ -0,0 +1,216 @@ +/* + * This file contains various system calls that have different calling + * conventions on different platforms. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/sched.h> +#include <linux/file.h> /* doh, must come after sched.h... */ +#include <linux/smp.h> +#include <linux/smp_lock.h> + +asmlinkage long +ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6, + long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + extern long sys_getpriority (int, int); + long prio; + + prio = sys_getpriority(which, who); + if (prio >= 0) { + regs->r8 = 0; /* ensure negative priority is not mistaken as error code */ + prio = 20 - prio; + } + return prio; +} + +asmlinkage unsigned long +sys_getpagesize (void) +{ + return PAGE_SIZE; +} + +asmlinkage unsigned long +ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg5, long arg6, + long arg7, long stack) +{ + extern int sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr); + struct pt_regs *regs = (struct pt_regs *) &stack; + unsigned long raddr; + int retval; + + retval = sys_shmat(shmid, shmaddr, shmflg, &raddr); + if (retval < 0) + return retval; + + regs->r8 = 0; /* ensure negative addresses are not mistaken as an error code */ + return raddr; +} + +asmlinkage unsigned long +ia64_brk (long brk, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, long stack) +{ + extern unsigned long sys_brk (unsigned long brk); + struct pt_regs *regs = (struct pt_regs *) &stack; + unsigned long retval; + + retval = sys_brk(brk); + + regs->r8 = 0; /* ensure large retval isn't mistaken as error code */ + return retval; +} + +/* + * On IA-64, we return the two file descriptors in ret0 and ret1 (r8 + * and r9) as this is faster than doing a copy_to_user(). + */ +asmlinkage long +sys_pipe (long arg0, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + int fd[2]; + int retval; + + lock_kernel(); + retval = do_pipe(fd); + if (retval) + goto out; + retval = fd[0]; + regs->r9 = fd[1]; + out: + unlock_kernel(); + return retval; +} + +static inline unsigned long +do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff) +{ + struct file *file = 0; + + /* + * A zero mmap always succeeds in Linux, independent of + * whether or not the remaining arguments are valid. + */ + if (PAGE_ALIGN(len) == 0) + return addr; + +#ifdef notyet + /* Don't permit mappings that would cross a region boundary: */ + region_start = IA64_GET_REGION(addr); + region_end = IA64_GET_REGION(addr + len); + if (region_start != region_end) + return -EINVAL; + + <<x??x>> +#endif + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + return -EBADF; + } + + down(¤t->mm->mmap_sem); + lock_kernel(); + + addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + + unlock_kernel(); + up(¤t->mm->mmap_sem); + + if (file) + fput(file); + return addr; +} + +/* + * mmap2() is like mmap() except that the offset is expressed in units + * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces + * of) files that are larger than the address space of the CPU. + */ +asmlinkage unsigned long +sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff, + long arg6, long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + + addr = do_mmap2(addr, len, prot, flags, fd, pgoff); + if (!IS_ERR(addr)) + regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */ + return addr; +} + +asmlinkage unsigned long +sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, + int fd, long off, long arg6, long arg7, long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + + addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + if (!IS_ERR(addr)) + regs->r8 = 0; /* ensure large addresses are not mistaken as failures... */ + return addr; +} + +asmlinkage long +sys_ioperm (unsigned long from, unsigned long num, int on) +{ + printk(KERN_ERR "sys_ioperm(from=%lx, num=%lx, on=%d)\n", from, num, on); + return -EIO; +} + +asmlinkage long +sys_iopl (int level, long arg1, long arg2, long arg3) +{ + lock_kernel(); + printk(KERN_ERR "sys_iopl(level=%d)!\n", level); + unlock_kernel(); + return -ENOSYS; +} + +asmlinkage long +sys_vm86 (long arg0, long arg1, long arg2, long arg3) +{ + lock_kernel(); + printk(KERN_ERR "sys_vm86(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3); + unlock_kernel(); + return -ENOSYS; +} + +asmlinkage long +sys_modify_ldt (long arg0, long arg1, long arg2, long arg3) +{ + lock_kernel(); + printk(KERN_ERR "sys_modify_ldt(%lx, %lx, %lx, %lx)!\n", arg0, arg1, arg2, arg3); + unlock_kernel(); + return -ENOSYS; +} + +#ifndef CONFIG_PCI + +asmlinkage long +sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, + void *buf) +{ + return -ENOSYS; +} + +asmlinkage long +sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, + void *buf) +{ + return -ENOSYS; +} + + +#endif /* CONFIG_PCI */ diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c new file mode 100644 index 000000000..7c5ace740 --- /dev/null +++ b/arch/ia64/kernel/time.c @@ -0,0 +1,290 @@ +/* + * linux/arch/ia64/kernel/time.c + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1999-2000 David Mosberger <davidm@hpl.hp.com> + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> + * Copyright (C) 1999-2000 VA Linux Systems + * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com> + */ +#include <linux/config.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/time.h> + +#include <asm/delay.h> +#include <asm/efi.h> +#include <asm/irq.h> +#include <asm/machvec.h> +#include <asm/ptrace.h> +#include <asm/sal.h> +#include <asm/system.h> + +extern rwlock_t xtime_lock; +extern volatile unsigned long lost_ticks; + +#ifdef CONFIG_IA64_DEBUG_IRQ + +unsigned long last_cli_ip; + +#endif + +static struct { + unsigned long delta; + unsigned long next[NR_CPUS]; +} itm; + +static void +do_profile (unsigned long ip) +{ + extern char _stext; + + if (prof_buffer && current->pid) { + ip -= (unsigned long) &_stext; + ip >>= prof_shift; + /* + * Don't ignore out-of-bounds IP values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (ip > prof_len - 1) + ip = prof_len - 1; + + atomic_inc((atomic_t *) &prof_buffer[ip]); + } +} + +/* + * Return the number of micro-seconds that elapsed since the last + * update to jiffy. The xtime_lock must be at least read-locked when + * calling this routine. + */ +static inline unsigned long +gettimeoffset (void) +{ + unsigned long now = ia64_get_itc(); + unsigned long elapsed_cycles, lost; + + elapsed_cycles = now - (itm.next[smp_processor_id()] - itm.delta); + + lost = lost_ticks; + if (lost) + elapsed_cycles += lost*itm.delta; + + return (elapsed_cycles*my_cpu_data.usec_per_cyc) >> IA64_USEC_PER_CYC_SHIFT; +} + +void +do_settimeofday (struct timeval *tv) +{ + write_lock_irq(&xtime_lock); + { + /* + * This is revolting. We need to set the xtime.tv_usec + * correctly. However, the value in this location is + * is value at the last tick. Discover what + * correction gettimeofday would have done, and then + * undo it! + */ + tv->tv_usec -= gettimeoffset(); + while (tv->tv_usec < 0) { + tv->tv_usec += 1000000; + tv->tv_sec--; + } + + xtime = *tv; + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; + } + write_unlock_irq(&xtime_lock); +} + +void +do_gettimeofday (struct timeval *tv) +{ + unsigned long flags, usec, sec; + + read_lock_irqsave(&xtime_lock, flags); + { + usec = gettimeoffset(); + + sec = xtime.tv_sec; + usec += xtime.tv_usec; + } + read_unlock_irqrestore(&xtime_lock, flags); + + while (usec >= 1000000) { + usec -= 1000000; + ++sec; + } + + tv->tv_sec = sec; + tv->tv_usec = usec; +} + +static void +timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + static unsigned long last_time; + static unsigned char count; + int cpu = smp_processor_id(); + + /* + * Here we are in the timer irq handler. We have irqs locally + * disabled, but we don't know if the timer_bh is running on + * another CPU. We need to avoid to SMP race by acquiring the + * xtime_lock. + */ + write_lock(&xtime_lock); + while (1) { + /* do kernel PC profiling here. */ + if (!user_mode(regs)) + do_profile(regs->cr_iip); + +#ifdef CONFIG_SMP + smp_do_timer(regs); + if (smp_processor_id() == bootstrap_processor) + do_timer(regs); +#else + do_timer(regs); +#endif + + itm.next[cpu] += itm.delta; + /* + * There is a race condition here: to be on the "safe" + * side, we process timer ticks until itm.next is + * ahead of the itc by at least half the timer + * interval. This should give us enough time to set + * the new itm value without losing a timer tick. + */ + if (time_after(itm.next[cpu], ia64_get_itc() + itm.delta/2)) { + ia64_set_itm(itm.next[cpu]); + break; + } + +#if !(defined(CONFIG_IA64_SOFTSDV_HACKS) && defined(CONFIG_SMP)) + /* + * SoftSDV in SMP mode is _slow_, so we do "loose" ticks, + * but it's really OK... + */ + if (count > 0 && jiffies - last_time > 5*HZ) + count = 0; + if (count++ == 0) { + last_time = jiffies; + printk("Lost clock tick on CPU %d (now=%lx, next=%lx)!!\n", + cpu, ia64_get_itc(), itm.next[cpu]); +# ifdef CONFIG_IA64_DEBUG_IRQ + printk("last_cli_ip=%lx\n", last_cli_ip); +# endif + } +#endif + } + write_unlock(&xtime_lock); +} + +/* + * Encapsulate access to the itm structure for SMP. + */ +void __init +ia64_cpu_local_tick(void) +{ + /* arrange for the cycle counter to generate a timer interrupt: */ + ia64_set_itv(TIMER_IRQ, 0); + ia64_set_itc(0); + itm.next[smp_processor_id()] = ia64_get_itc() + itm.delta; + ia64_set_itm(itm.next[smp_processor_id()]); +} + +void __init +ia64_init_itm (void) +{ + unsigned long platform_base_freq, itc_freq, drift; + struct pal_freq_ratio itc_ratio, proc_ratio; + long status; + + /* + * According to SAL v2.6, we need to use a SAL call to determine the + * platform base frequency and then a PAL call to determine the + * frequency ratio between the ITC and the base frequency. + */ + status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, &platform_base_freq, &drift); + if (status != 0) { + printk("SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status)); + } else { + status = ia64_pal_freq_ratios(&proc_ratio, 0, &itc_ratio); + if (status != 0) + printk("PAL_FREQ_RATIOS failed with status=%ld\n", status); + } + if (status != 0) { + /* invent "random" values */ + printk("SAL/PAL failed to obtain frequency info---inventing reasonably values\n"); + platform_base_freq = 100000000; + itc_ratio.num = 3; + itc_ratio.den = 1; + } +#if defined(CONFIG_IA64_LION_HACKS) + /* Our Lion currently returns base freq 104.857MHz, which + ain't right (it really is 100MHz). */ + printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n", + platform_base_freq, itc_ratio.num, itc_ratio.den, + proc_ratio.num, proc_ratio.den); + platform_base_freq = 100000000; +#elif 0 && defined(CONFIG_IA64_BIGSUR_HACKS) + /* BigSur with 991020 firmware returned itc-ratio=9/2 and base + freq 75MHz, which wasn't right. The 991119 firmware seems + to return the right values, so this isn't necessary + anymore... */ + printk("SAL/PAL returned: base-freq=%lu, itc-ratio=%lu/%lu, proc-ratio=%lu/%lu\n", + platform_base_freq, itc_ratio.num, itc_ratio.den, + proc_ratio.num, proc_ratio.den); + platform_base_freq = 100000000; + proc_ratio.num = 5; proc_ratio.den = 1; + itc_ratio.num = 5; itc_ratio.den = 1; +#elif defined(CONFIG_IA64_SOFTSDV_HACKS) + platform_base_freq = 10000000; + proc_ratio.num = 4; proc_ratio.den = 1; + itc_ratio.num = 4; itc_ratio.den = 1; +#else + if (platform_base_freq < 40000000) { + printk("Platform base frequency %lu bogus---resetting to 75MHz!\n", + platform_base_freq); + platform_base_freq = 75000000; + } +#endif + if (!proc_ratio.den) + proc_ratio.num = 1; /* avoid division by zero */ + if (!itc_ratio.den) + itc_ratio.num = 1; /* avoid division by zero */ + + itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; + itm.delta = itc_freq / HZ; + printk("timer: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, ITC freq=%lu.%03luMHz\n", + platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, + itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000); + + my_cpu_data.proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den; + my_cpu_data.itc_freq = itc_freq; + my_cpu_data.cyc_per_usec = itc_freq / 1000000; + my_cpu_data.usec_per_cyc = (1000000UL << IA64_USEC_PER_CYC_SHIFT) / itc_freq; + + /* Setup the CPU local timer tick */ + ia64_cpu_local_tick(); +} + +void __init +time_init (void) +{ + /* + * Request the IRQ _before_ doing anything to cause that + * interrupt to be posted. + */ + if (request_irq(TIMER_IRQ, timer_interrupt, 0, "timer", NULL)) + panic("Could not allocate timer IRQ!"); + + efi_gettimeofday(&xtime); + ia64_init_itm(); +} diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c new file mode 100644 index 000000000..c242622ec --- /dev/null +++ b/arch/ia64/kernel/traps.c @@ -0,0 +1,423 @@ +/* + * Architecture-specific trap handling. + * + * Copyright (C) 1998-2000 Hewlett-Packard Co + * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com> + */ + +/* + * The fpu_fault() handler needs to be able to access and update all + * floating point registers. Those saved in pt_regs can be accessed + * through that structure, but those not saved, will be accessed + * directly. To make this work, we need to ensure that the compiler + * does not end up using a preserved floating point register on its + * own. The following achieves this by declaring preserved registers + * that are not marked as "fixed" as global register variables. + */ +register double f2 asm ("f2"); register double f3 asm ("f3"); +register double f4 asm ("f4"); register double f5 asm ("f5"); + +register long f16 asm ("f16"); register long f17 asm ("f17"); +register long f18 asm ("f18"); register long f19 asm ("f19"); +register long f20 asm ("f20"); register long f21 asm ("f21"); +register long f22 asm ("f22"); register long f23 asm ("f23"); + +register double f24 asm ("f24"); register double f25 asm ("f25"); +register double f26 asm ("f26"); register double f27 asm ("f27"); +register double f28 asm ("f28"); register double f29 asm ("f29"); +register double f30 asm ("f30"); register double f31 asm ("f31"); + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> + +#ifdef CONFIG_KDB +# include <linux/kdb.h> +#endif + +#include <asm/processor.h> +#include <asm/uaccess.h> + +#include <asm/fpswa.h> + +static fpswa_interface_t *fpswa_interface; + +void __init +trap_init (void) +{ + printk("fpswa interface at %lx\n", ia64_boot_param.fpswa); + if (ia64_boot_param.fpswa) { +#define OLD_FIRMWARE +#ifdef OLD_FIRMWARE + /* + * HACK to work around broken firmware. This code + * applies the label fixup to the FPSWA interface and + * works both with old and new (fixed) firmware. + */ + unsigned long addr = (unsigned long) __va(ia64_boot_param.fpswa); + unsigned long gp_val = *(unsigned long *)(addr + 8); + + /* go indirect and indexed to get table address */ + addr = gp_val; + gp_val = *(unsigned long *)(addr + 8); + + while (gp_val == *(unsigned long *)(addr + 8)) { + *(unsigned long *)addr |= PAGE_OFFSET; + *(unsigned long *)(addr + 8) |= PAGE_OFFSET; + addr += 16; + } +#endif + /* FPSWA fixup: make the interface pointer a kernel virtual address: */ + fpswa_interface = __va(ia64_boot_param.fpswa); + } +} + +void +die_if_kernel (char *str, struct pt_regs *regs, long err) +{ + if (user_mode(regs)) { +#if 1 + /* XXX for debugging only */ + printk ("!!die_if_kernel: %s(%d): %s %ld\n", + current->comm, current->pid, str, err); + show_regs(regs); +#endif + return; + } + + printk("%s[%d]: %s %ld\n", current->comm, current->pid, str, err); + +#ifdef CONFIG_KDB + while (1) { + kdb(KDB_REASON_PANIC, 0, regs); + printk("Cant go anywhere from Panic!\n"); + } +#endif + + show_regs(regs); + + if (current->thread.flags & IA64_KERNEL_DEATH) { + printk("die_if_kernel recursion detected.\n"); + sti(); + while (1); + } + current->thread.flags |= IA64_KERNEL_DEATH; + do_exit(SIGSEGV); +} + +void +ia64_bad_break (unsigned long break_num, struct pt_regs *regs) +{ + siginfo_t siginfo; + + /* gdb uses a break number of 0xccccc for debug breakpoints: */ + if (break_num != 0xccccc) + die_if_kernel("Bad break", regs, break_num); + + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = break_num; /* XXX is it legal to abuse si_errno like this? */ + siginfo.si_code = TRAP_BRKPT; + send_sig_info(SIGTRAP, &siginfo, current); +} + +/* + * Unimplemented system calls. This is called only for stuff that + * we're supposed to implement but haven't done so yet. Everything + * else goes to sys_ni_syscall. + */ +asmlinkage long +ia64_ni_syscall (unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5, unsigned long arg6, unsigned long arg7, + unsigned long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + + printk("<sc%ld(%lx,%lx,%lx,%lx)>\n", regs->r15, arg0, arg1, arg2, arg3); + return -ENOSYS; +} + +/* + * disabled_fp_fault() is called when a user-level process attempts to + * access one of the registers f32..f127 while it doesn't own the + * fp-high register partition. When this happens, we save the current + * fph partition in the task_struct of the fpu-owner (if necessary) + * and then load the fp-high partition of the current task (if + * necessary). + */ +static inline void +disabled_fph_fault (struct pt_regs *regs) +{ + struct task_struct *fpu_owner = ia64_get_fpu_owner(); + + regs->cr_ipsr &= ~(IA64_PSR_DFH | IA64_PSR_MFH); + if (fpu_owner != current) { + ia64_set_fpu_owner(current); + + if (fpu_owner && ia64_psr(ia64_task_regs(fpu_owner))->mfh) { + fpu_owner->thread.flags |= IA64_THREAD_FPH_VALID; + __ia64_save_fpu(fpu_owner->thread.fph); + } + if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) { + __ia64_load_fpu(current->thread.fph); + } else { + __ia64_init_fpu(); + } + } +} + +static inline int +fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs, + struct pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; +#ifdef FPSWA_BUG + struct ia64_fpreg f6_15[10]; +#endif + + if (!fpswa_interface) + return -1; + + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * compute fp_state. only FP registers f6 - f11 are used by the + * kernel, so set those bits in the mask and set the low volatile + * pointer to point to these registers. + */ + fp_state.bitmask_low64 = 0xffc0; /* bit6..bit15 */ +#ifndef FPSWA_BUG + fp_state.fp_state_low_volatile = ®s->f6; +#else + f6_15[0] = regs->f6; + f6_15[1] = regs->f7; + f6_15[2] = regs->f8; + f6_15[3] = regs->f9; + __asm__ ("stf.spill %0=f10" : "=m"(f6_15[4])); + __asm__ ("stf.spill %0=f11" : "=m"(f6_15[5])); + __asm__ ("stf.spill %0=f12" : "=m"(f6_15[6])); + __asm__ ("stf.spill %0=f13" : "=m"(f6_15[7])); + __asm__ ("stf.spill %0=f14" : "=m"(f6_15[8])); + __asm__ ("stf.spill %0=f15" : "=m"(f6_15[9])); + fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) f6_15; +#endif + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle, + (unsigned long *) ipsr, (unsigned long *) fpsr, + (unsigned long *) isr, (unsigned long *) pr, + (unsigned long *) ifs, &fp_state); +#ifdef FPSWA_BUG + __asm__ ("ldf.fill f10=%0" :: "m"(f6_15[4])); + __asm__ ("ldf.fill f11=%0" :: "m"(f6_15[5])); + __asm__ ("ldf.fill f12=%0" :: "m"(f6_15[6])); + __asm__ ("ldf.fill f13=%0" :: "m"(f6_15[7])); + __asm__ ("ldf.fill f14=%0" :: "m"(f6_15[8])); + __asm__ ("ldf.fill f15=%0" :: "m"(f6_15[9])); + regs->f6 = f6_15[0]; + regs->f7 = f6_15[1]; + regs->f8 = f6_15[2]; + regs->f9 = f6_15[3]; +#endif + return ret.status; +} + +/* + * Handle floating-point assist faults and traps. + */ +static int +handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) +{ + long exception, bundle[2]; + unsigned long fault_ip; + static int fpu_swa_count = 0; + static unsigned long last_time; + + fault_ip = regs->cr_iip; + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + if (copy_from_user(bundle, (void *) fault_ip, sizeof(bundle))) + return -1; + + if (fpu_swa_count > 5 && jiffies - last_time > 5*HZ) + fpu_swa_count = 0; + if (++fpu_swa_count < 5) { + last_time = jiffies; + printk("%s(%d): floating-point assist fault at ip %016lx\n", + current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri); + } + + exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, + ®s->cr_ifs, regs); + if (fp_fault) { + if (exception == 0) { + /* emulation was successful */ + ia64_increment_ip(regs); + } else if (exception == -1) { + printk("handle_fpu_swa: fp_emulate() returned -1\n"); + return -2; + } else { + /* is next instruction a trap? */ + if (exception & 2) { + ia64_increment_ip(regs); + } + return -1; + } + } else { + if (exception == -1) { + printk("handle_fpu_swa: fp_emulate() returned -1\n"); + return -2; + } else if (exception != 0) { + /* raise exception */ + return -1; + } + } + return 0; +} + +void +ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, + unsigned long iim, unsigned long itir, unsigned long arg5, + unsigned long arg6, unsigned long arg7, unsigned long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + unsigned long code, error = isr; + struct siginfo siginfo; + char buf[128]; + int result; + static const char *reason[] = { + "IA-64 Illegal Operation fault", + "IA-64 Privileged Operation fault", + "IA-64 Privileged Register fault", + "IA-64 Reserved Register/Field fault", + "Disabled Instruction Set Transition fault", + "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", + "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", + "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" + }; + +#if 0 + /* this is for minimal trust debugging; yeah this kind of stuff is useful at times... */ + + if (vector != 25) { + static unsigned long last_time; + static char count; + unsigned long n = vector; + char buf[32], *cp; + + if (count > 5 && jiffies - last_time > 5*HZ) + count = 0; + + if (count++ < 5) { + last_time = jiffies; + cp = buf + sizeof(buf); + *--cp = '\0'; + while (n) { + *--cp = "0123456789abcdef"[n & 0xf]; + n >>= 4; + } + printk("<0x%s>", cp); + } + } +#endif + + switch (vector) { + case 24: /* General Exception */ + code = (isr >> 4) & 0xf; + sprintf(buf, "General Exception: %s%s", reason[code], + (code == 3) ? ((isr & (1UL << 37)) + ? " (RSE access)" : " (data access)") : ""); +#ifndef CONFIG_ITANIUM_ASTEP_SPECIFIC + if (code == 8) { +# ifdef CONFIG_IA64_PRINT_HAZARDS + printk("%016lx:possible hazard, pr = %016lx\n", regs->cr_iip, regs->pr); +# endif + return; + } +#endif + break; + + case 25: /* Disabled FP-Register */ + if (isr & 2) { + disabled_fph_fault(regs); + return; + } + sprintf(buf, "Disabled FPL fault---not supposed to happen!"); + break; + + case 29: /* Debug */ + case 35: /* Taken Branch Trap */ + case 36: /* Single Step Trap */ + switch (vector) { + case 29: siginfo.si_code = TRAP_BRKPT; break; + case 35: siginfo.si_code = TRAP_BRANCH; break; + case 36: siginfo.si_code = TRAP_TRACE; break; + } + siginfo.si_signo = SIGTRAP; + siginfo.si_errno = 0; + force_sig_info(SIGTRAP, &siginfo, current); + return; + + case 30: /* Unaligned fault */ + sprintf(buf, "Unaligned access in kernel mode---don't do this!"); + break; + + case 32: /* fp fault */ + case 33: /* fp trap */ + result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr); + if (result < 0) { + siginfo.si_signo = SIGFPE; + siginfo.si_errno = 0; + siginfo.si_code = 0; /* XXX fix me */ + siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + send_sig_info(SIGFPE, &siginfo, current); + if (result == -1) + send_sig_info(SIGFPE, &siginfo, current); + else + force_sig(SIGFPE, current); + } + return; + + case 34: /* Unimplemented Instruction Address Trap */ + if (user_mode(regs)) { + printk("Woah! Unimplemented Instruction Address Trap!\n"); + siginfo.si_code = ILL_BADIADDR; + siginfo.si_signo = SIGILL; + siginfo.si_errno = 0; + force_sig_info(SIGILL, &siginfo, current); + return; + } + sprintf(buf, "Unimplemented Instruction Address fault"); + break; + + case 45: + printk("Unexpected IA-32 exception\n"); + force_sig(SIGSEGV, current); + return; + + case 46: + printk("Unexpected IA-32 intercept trap\n"); + force_sig(SIGSEGV, current); + return; + + case 47: + sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); + break; + + default: + sprintf(buf, "Fault %lu", vector); + break; + } + die_if_kernel(buf, regs, error); + force_sig(SIGILL, current); +} diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c new file mode 100644 index 000000000..0bd213f6b --- /dev/null +++ b/arch/ia64/kernel/unaligned.c @@ -0,0 +1,1554 @@ +/* + * Architecture-specific unaligned trap handling. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com> + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp_lock.h> +#include <asm/uaccess.h> +#include <asm/rse.h> +#include <asm/processor.h> +#include <asm/unaligned.h> + +extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn)); + +#undef DEBUG_UNALIGNED_TRAP + +#ifdef DEBUG_UNALIGNED_TRAP +#define DPRINT(a) { printk("%s, line %d: ", __FUNCTION__, __LINE__); printk a;} +#else +#define DPRINT(a) +#endif + +#define IA64_FIRST_STACKED_GR 32 +#define IA64_FIRST_ROTATING_FR 32 +#define SIGN_EXT9 __IA64_UL(0xffffffffffffff00) + +/* + * For M-unit: + * + * opcode | m | x6 | + * --------|------|---------| + * [40-37] | [36] | [35:30] | + * --------|------|---------| + * 4 | 1 | 6 | = 11 bits + * -------------------------- + * However bits [31:30] are not directly useful to distinguish between + * load/store so we can use [35:32] instead, which gives the following + * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer + * checking the m-bit until later in the load/store emulation. + */ +#define IA64_OPCODE_MASK 0x1ef00000000 + +/* + * Table C-28 Integer Load/Store + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill MUST be aligned because the RNATs are based on + * the address (bits [8:3]), so we must failed. + */ +#define LD_OP 0x08000000000 +#define LDS_OP 0x08100000000 +#define LDA_OP 0x08200000000 +#define LDSA_OP 0x08300000000 +#define LDBIAS_OP 0x08400000000 +#define LDACQ_OP 0x08500000000 +/* 0x086, 0x087 are not relevant */ +#define LDCCLR_OP 0x08800000000 +#define LDCNC_OP 0x08900000000 +#define LDCCLRACQ_OP 0x08a00000000 +#define ST_OP 0x08c00000000 +#define STREL_OP 0x08d00000000 +/* 0x08e,0x8f are not relevant */ + +/* + * Table C-29 Integer Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-30 Integer Load/Store +Imm + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill must be aligned because the Nat register are based on + * the address, so we must fail and the program must be fixed. + */ +#define LD_IMM_OP 0x0a000000000 +#define LDS_IMM_OP 0x0a100000000 +#define LDA_IMM_OP 0x0a200000000 +#define LDSA_IMM_OP 0x0a300000000 +#define LDBIAS_IMM_OP 0x0a400000000 +#define LDACQ_IMM_OP 0x0a500000000 +/* 0x0a6, 0xa7 are not relevant */ +#define LDCCLR_IMM_OP 0x0a800000000 +#define LDCNC_IMM_OP 0x0a900000000 +#define LDCCLRACQ_IMM_OP 0x0aa00000000 +#define ST_IMM_OP 0x0ac00000000 +#define STREL_IMM_OP 0x0ad00000000 +/* 0x0ae,0xaf are not relevant */ + +/* + * Table C-32 Floating-point Load/Store + */ +#define LDF_OP 0x0c000000000 +#define LDFS_OP 0x0c100000000 +#define LDFA_OP 0x0c200000000 +#define LDFSA_OP 0x0c300000000 +/* 0x0c6 is irrelevant */ +#define LDFCCLR_OP 0x0c800000000 +#define LDFCNC_OP 0x0c900000000 +/* 0x0cb is irrelevant */ +#define STF_OP 0x0cc00000000 + +/* + * Table C-33 Floating-point Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-34 Floating-point Load/Store +Imm + */ +#define LDF_IMM_OP 0x0e000000000 +#define LDFS_IMM_OP 0x0e100000000 +#define LDFA_IMM_OP 0x0e200000000 +#define LDFSA_IMM_OP 0x0e300000000 +/* 0x0e6 is irrelevant */ +#define LDFCCLR_IMM_OP 0x0e800000000 +#define LDFCNC_IMM_OP 0x0e900000000 +#define STF_IMM_OP 0x0ec00000000 + +typedef struct { + unsigned long qp:6; /* [0:5] */ + unsigned long r1:7; /* [6:12] */ + unsigned long imm:7; /* [13:19] */ + unsigned long r3:7; /* [20:26] */ + unsigned long x:1; /* [27:27] */ + unsigned long hint:2; /* [28:29] */ + unsigned long x6_sz:2; /* [30:31] */ + unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */ + unsigned long m:1; /* [36:36] */ + unsigned long op:4; /* [37:40] */ + unsigned long pad:23; /* [41:63] */ +} load_store_t; + + +typedef enum { + UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */ + UPD_REG /* ldXZ r1=[r3],r2 */ +} update_t; + +/* + * We use tables to keep track of the offsets of registers in the saved state. + * This way we save having big switch/case statements. + * + * We use bit 0 to indicate switch_stack or pt_regs. + * The offset is simply shifted by 1 bit. + * A 2-byte value should be enough to hold any kind of offset + * + * In case the calling convention changes (and thus pt_regs/switch_stack) + * simply use RSW instead of RPT or vice-versa. + */ + +#define RPO(x) ((size_t) &((struct pt_regs *)0)->x) +#define RSO(x) ((size_t) &((struct switch_stack *)0)->x) + +#define RPT(x) (RPO(x) << 1) +#define RSW(x) (1| RSO(x)<<1) + +#define GR_OFFS(x) (gr_info[x]>>1) +#define GR_IN_SW(x) (gr_info[x] & 0x1) + +#define FR_OFFS(x) (fr_info[x]>>1) +#define FR_IN_SW(x) (fr_info[x] & 0x1) + +static u16 gr_info[32]={ + 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ + + RPT(r1), RPT(r2), RPT(r3), + + RSW(r4), RSW(r5), RSW(r6), RSW(r7), + + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), + + RPT(r16), RPT(r17), RPT(r18), RPT(r19), + RPT(r20), RPT(r21), RPT(r22), RPT(r23), + RPT(r24), RPT(r25), RPT(r26), RPT(r27), + RPT(r28), RPT(r29), RPT(r30), RPT(r31) +}; + +static u16 fr_info[32]={ + 0, /* constant : WE SHOULD NEVER GET THIS */ + 0, /* constant : WE SHOULD NEVER GET THIS */ + + RSW(f2), RSW(f3), RSW(f4), RSW(f5), + + RPT(f6), RPT(f7), RPT(f8), RPT(f9), + + RSW(f10), RSW(f11), RSW(f12), RSW(f13), RSW(f14), + RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19), + RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24), + RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29), + RSW(f30), RSW(f31) +}; + +/* Invalidate ALAT entry for integer register REGNO. */ +static void +invala_gr (int regno) +{ +# define F(reg) case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +/* Invalidate ALAT entry for floating-point register REGNO. */ +static void +invala_fr (int regno) +{ +# define F(reg) case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +static void +set_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) +{ + struct switch_stack *sw = (struct switch_stack *)regs - 1; + unsigned long *kbs = ((unsigned long *)current) + IA64_RBS_OFFSET/8; + unsigned long on_kbs; + unsigned long *bsp, *bspstore, *addr, *ubs_end, *slot; + unsigned long rnats; + long nlocals; + + /* + * cr_ifs=[rv:ifm], ifm=[....:sof(6)] + * nlocal=number of locals (in+loc) register of the faulting function + */ + nlocals = (regs->cr_ifs) & 0x7f; + + DPRINT(("sw.bsptore=%lx pt.bspstore=%lx\n", sw->ar_bspstore, regs->ar_bspstore)); + DPRINT(("cr.ifs=%lx sof=%ld sol=%ld\n", + regs->cr_ifs, regs->cr_ifs &0x7f, (regs->cr_ifs>>7)&0x7f)); + + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore); + bspstore = (unsigned long *)regs->ar_bspstore; + + DPRINT(("rse_slot_num=0x%lx\n",ia64_rse_slot_num((unsigned long *)sw->ar_bspstore))); + DPRINT(("kbs=%p nlocals=%ld\n", kbs, nlocals)); + DPRINT(("bspstore next rnat slot %p\n", + ia64_rse_rnat_addr((unsigned long *)sw->ar_bspstore))); + DPRINT(("on_kbs=%ld rnats=%ld\n", + on_kbs, ((sw->ar_bspstore-(unsigned long)kbs)>>3) - on_kbs)); + + /* + * See get_rse_reg() for an explanation on the following instructions + */ + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -nlocals); + addr = slot = ia64_rse_skip_regs(bsp, r1 - 32); + + DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n", + ubs_end, bsp, addr, ia64_rse_slot_num(addr))); + + ia64_poke(regs, current, (unsigned long)addr, val); + + /* + * addr will now contain the address of the RNAT for the register + */ + addr = ia64_rse_rnat_addr(addr); + + ia64_peek(regs, current, (unsigned long)addr, &rnats); + DPRINT(("rnat @%p = 0x%lx nat=%d rnatval=%lx\n", + addr, rnats, nat, rnats &ia64_rse_slot_num(slot))); + + if ( nat ) { + rnats |= __IA64_UL(1) << ia64_rse_slot_num(slot); + } else { + rnats &= ~(__IA64_UL(1) << ia64_rse_slot_num(slot)); + } + ia64_poke(regs, current, (unsigned long)addr, rnats); + + DPRINT(("rnat changed to @%p = 0x%lx\n", addr, rnats)); +} + + +static void +get_rse_reg(struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat) +{ + struct switch_stack *sw = (struct switch_stack *)regs - 1; + unsigned long *kbs = (unsigned long *)current + IA64_RBS_OFFSET/8; + unsigned long on_kbs; + long nlocals; + unsigned long *bsp, *addr, *ubs_end, *slot, *bspstore; + unsigned long rnats; + + /* + * cr_ifs=[rv:ifm], ifm=[....:sof(6)] + * nlocals=number of local registers in the faulting function + */ + nlocals = (regs->cr_ifs) & 0x7f; + + /* + * save_switch_stack does a flushrs and saves bspstore. + * on_kbs = actual number of registers saved on kernel backing store + * (taking into accound potential RNATs) + * + * Note that this number can be greater than nlocals if the dirty + * parititions included more than one stack frame at the time we + * switched to KBS + */ + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *)sw->ar_bspstore); + bspstore = (unsigned long *)regs->ar_bspstore; + + /* + * To simplify the logic, we calculate everything as if there was only + * one backing store i.e., the user one (UBS). We let it to peek/poke + * to figure out whether the register we're looking for really is + * on the UBS or on KBS. + * + * regs->ar_bsptore = address of last register saved on UBS (before switch) + * + * ubs_end = virtual end of the UBS (if everything had been spilled there) + * + * We know that ubs_end is the point where the last register on the + * stack frame we're interested in as been saved. So we need to walk + * our way backward to figure out what the BSP "was" for that frame, + * this will give us the location of r32. + * + * bsp = "virtual UBS" address of r32 for our frame + * + * Finally, get compute the address of the register we're looking for + * using bsp as our base (move up again). + * + * Please note that in our case, we know that the register is necessarily + * on the KBS because we are only interested in the current frame at the moment + * we got the exception i.e., bsp is not changed until we switch to KBS. + */ + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -nlocals); + addr = slot = ia64_rse_skip_regs(bsp, r1 - 32); + + DPRINT(("ubs_end=%p bsp=%p addr=%p slot=0x%lx\n", + ubs_end, bsp, addr, ia64_rse_slot_num(addr))); + + ia64_peek(regs, current, (unsigned long)addr, val); + + /* + * addr will now contain the address of the RNAT for the register + */ + addr = ia64_rse_rnat_addr(addr); + + ia64_peek(regs, current, (unsigned long)addr, &rnats); + DPRINT(("rnat @%p = 0x%lx\n", addr, rnats)); + + if ( nat ) *nat = rnats >> ia64_rse_slot_num(slot) & 0x1; +} + + +static void +setreg(unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs -1; + unsigned long addr; + unsigned long bitmask; + unsigned long *unat; + + + /* + * First takes care of stacked registers + */ + if ( regnum >= IA64_FIRST_STACKED_GR ) { + set_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Using r0 as a target raises a General Exception fault which has + * higher priority than the Unaligned Reference fault. + */ + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if ( GR_IN_SW(regnum) ) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + DPRINT(("tmp_base=%lx switch_stack=%s offset=%d\n", + addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum))); + /* + * add offset from base of struct + * and do it ! + */ + addr += GR_OFFS(regnum); + + *(unsigned long *)addr = val; + + /* + * We need to clear the corresponding UNAT bit to fully emulate the load + * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 + */ + bitmask = __IA64_UL(1) << (addr >> 3 & 0x3f); + DPRINT(("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, unat, *unat)); + if ( nat ) { + *unat |= bitmask; + } else { + *unat &= ~bitmask; + } + DPRINT(("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, unat,*unat)); +} + +#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR) + +static void +setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs - 1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than + * Unaligned Fault. Thus, when we get here, we know the partition is + * enabled. + * + * The registers [32-127] are ususally saved in the tss. When get here, + * they are NECESSARY live because they are only saved explicitely. + * We have 3 ways of updating the values: force a save of the range + * in tss, use a gigantic switch/case statement or generate code on the + * fly to store to the right register. + * For now, we are using the (slow) save/restore way. + */ + if ( regnum >= IA64_FIRST_ROTATING_FR ) { + /* + * force a save of [32-127] to tss + * we use the __() form to avoid fiddling with the dfh bit + */ + __ia64_save_fpu(¤t->thread.fph[0]); + + current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval; + + __ia64_load_fpu(¤t->thread.fph[0]); + + /* + * mark the high partition as being used now + * + * This is REQUIRED because the disabled_fph_fault() does + * not set it, it's relying on the faulting instruction to + * do it. In our case the faulty instruction never gets executed + * completely, so we need to toggle the bit. + */ + regs->cr_ipsr |= IA64_PSR_MFH; + } else { + /* + * pt_regs or switch_stack ? + */ + if ( FR_IN_SW(regnum) ) { + addr = (unsigned long)sw; + } else { + addr = (unsigned long)regs; + } + + DPRINT(("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum))); + + addr += FR_OFFS(regnum); + *(struct ia64_fpreg *)addr = *fpval; + + /* + * mark the low partition as being used now + * + * It is highly unlikely that this bit is not already set, but + * let's do it for safety. + */ + regs->cr_ipsr |= IA64_PSR_MFL; + + } +} + +/* + * Those 2 inline functions generate the spilled versions of the constant floating point + * registers which can be used with stfX + */ +static inline void +float_spill_f0(struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory"); +} + +static inline void +float_spill_f1(struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory"); +} + +static void +getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs -1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than + * Unaligned Fault. Thus, when we get here, we know the partition is + * enabled. + * + * When regnum > 31, the register is still live and + * we need to force a save to the tss to get access to it. + * See discussion in setfpreg() for reasons and other ways of doing this. + */ + if ( regnum >= IA64_FIRST_ROTATING_FR ) { + + /* + * force a save of [32-127] to tss + * we use the__ia64_save_fpu() form to avoid fiddling with + * the dfh bit. + */ + __ia64_save_fpu(¤t->thread.fph[0]); + + *fpval = current->thread.fph[IA64_FPH_OFFS(regnum)]; + } else { + /* + * f0 = 0.0, f1= 1.0. Those registers are constant and are thus + * not saved, we must generate their spilled form on the fly + */ + switch(regnum) { + case 0: + float_spill_f0(fpval); + break; + case 1: + float_spill_f1(fpval); + break; + default: + /* + * pt_regs or switch_stack ? + */ + addr = FR_IN_SW(regnum) ? (unsigned long)sw + : (unsigned long)regs; + + DPRINT(("is_sw=%d tmp_base=%lx offset=0x%x\n", + FR_IN_SW(regnum), addr, FR_OFFS(regnum))); + + addr += FR_OFFS(regnum); + *fpval = *(struct ia64_fpreg *)addr; + } + } +} + + +static void +getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs -1; + unsigned long addr, *unat; + + if ( regnum >= IA64_FIRST_STACKED_GR ) { + get_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * take care of r0 (read-only always evaluate to 0) + */ + if ( regnum == 0 ) { + *val = 0; + *nat = 0; + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if ( GR_IN_SW(regnum) ) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + + DPRINT(("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum))); + + addr += GR_OFFS(regnum); + + *val = *(unsigned long *)addr; + + /* + * do it only when requested + */ + if ( nat ) *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL; +} + +static void +emulate_load_updates(update_t type, load_store_t *ld, struct pt_regs *regs, unsigned long ifa) +{ + /* + * IMPORTANT: + * Given the way we handle unaligned speculative loads, we should + * not get to this point in the code but we keep this sanity check, + * just in case. + */ + if ( ld->x6_op == 1 || ld->x6_op == 3 ) { + printk(KERN_ERR __FUNCTION__": register update on speculative load, error\n"); + die_if_kernel("unaligned reference on specualtive load with register update\n", + regs, 30); + } + + + /* + * at this point, we know that the base register to update is valid i.e., + * it's not r0 + */ + if ( type == UPD_IMMEDIATE ) { + unsigned long imm; + + /* + * Load +Imm: ldXZ r1=[r3],imm(9) + * + * + * form imm9: [13:19] contain the first 7 bits + */ + imm = ld->x << 7 | ld->imm; + + /* + * sign extend (1+8bits) if m set + */ + if (ld->m) imm |= SIGN_EXT9; + + /* + * ifa == r3 and we know that the NaT bit on r3 was clear so + * we can directly use ifa. + */ + ifa += imm; + + setreg(ld->r3, ifa, 0, regs); + + DPRINT(("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld->x, ld->m, imm, ifa)); + + } else if ( ld->m ) { + unsigned long r2; + int nat_r2; + + /* + * Load +Reg Opcode: ldXZ r1=[r3],r2 + * + * Note: that we update r3 even in the case of ldfX.a + * (where the load does not happen) + * + * The way the load algorithm works, we know that r3 does not + * have its NaT bit set (would have gotten NaT consumption + * before getting the unaligned fault). So we can use ifa + * which equals r3 at this point. + * + * IMPORTANT: + * The above statement holds ONLY because we know that we + * never reach this code when trying to do a ldX.s. + * If we ever make it to here on an ldfX.s then + */ + getreg(ld->imm, &r2, &nat_r2, regs); + + ifa += r2; + + /* + * propagate Nat r2 -> r3 + */ + setreg(ld->r3, ifa, nat_r2, regs); + + DPRINT(("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld->imm, r2, ifa, nat_r2)); + } +} + + +static int +emulate_load_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + unsigned long val; + unsigned int len = 1<< ld->x6_sz; + + /* + * the macro supposes sequential access (which is the case) + * if the first byte is an invalid address we return here. Otherwise + * there is a guard page at the top of the user's address page and + * the first access would generate a NaT consumption fault and return + * with a SIGSEGV, which is what we want. + * + * Note: the first argument is ignored + */ + if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) { + DPRINT(("verify area failed on %lx\n", ifa)); + return -1; + } + + /* + * r0, as target, doesn't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * ldX.a we don't try to emulate anything but we must + * invalidate the ALAT entry. + * See comment below for explanation on how we handle ldX.a + */ + if ( ld->x6_op != 0x2 ) { + /* + * we rely on the macros in unaligned.h for now i.e., + * we let the compiler figure out how to read memory gracefully. + * + * We need this switch/case because the way the inline function + * works. The code is optimized by the compiler and looks like + * a single switch/case. + */ + switch(len) { + case 2: + val = ia64_get_unaligned((void *)ifa, 2); + break; + case 4: + val = ia64_get_unaligned((void *)ifa, 4); + break; + case 8: + val = ia64_get_unaligned((void *)ifa, 8); + break; + default: + DPRINT(("unknown size: x6=%d\n", ld->x6_sz)); + return -1; + } + + setreg(ld->r1, val, 0, regs); + } + + /* + * check for updates on any kind of loads + */ + if ( ld->op == 0x5 || ld->m ) + emulate_load_updates(ld->op == 0x5 ? UPD_IMMEDIATE: UPD_REG, + ld, regs, ifa); + + /* + * handling of various loads (based on EAS2.4): + * + * ldX.acq (ordered load): + * - acquire semantics would have been used, so force fence instead. + * + * + * ldX.c.clr (check load and clear): + * - if we get to this handler, it's because the entry was not in the ALAT. + * Therefore the operation reverts to a normal load + * + * ldX.c.nc (check load no clear): + * - same as previous one + * + * ldX.c.clr.acq (ordered check load and clear): + * - same as above for c.clr part. The load needs to have acquire semantics. So + * we use the fence semantics which is stronger and thus ensures correctness. + * + * ldX.a (advanced load): + * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the + * address doesn't match requested size alignement. This means that we would + * possibly need more than one load to get the result. + * + * The load part can be handled just like a normal load, however the difficult + * part is to get the right thing into the ALAT. The critical piece of information + * in the base address of the load & size. To do that, a ld.a must be executed, + * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now + * if we use the same target register, we will be okay for the check.a instruction. + * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry + * which would overlap within [r3,r3+X] (the size of the load was store in the + * ALAT). If such an entry is found the entry is invalidated. But this is not good + * enough, take the following example: + * r3=3 + * ld4.a r1=[r3] + * + * Could be emulated by doing: + * ld1.a r1=[r3],1 + * store to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3] + * store & shift to temporary; + * r1=temporary + * + * So int this case, you would get the right value is r1 but the wrong info in + * the ALAT. Notice that you could do it in reverse to finish with address 3 + * but you would still get the size wrong. To get the size right, one needs to + * execute exactly the same kind of load. You could do it from a aligned + * temporary location, but you would get the address wrong. + * + * So no matter what, it is not possible to emulate an advanced load + * correctly. But is that really critical ? + * + * + * Now one has to look at how ld.a is used, one must either do a ld.c.* or + * chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no + * entry found in ALAT), and that's perfectly ok because: + * + * - ld.c.*, if the entry is not present a normal load is executed + * - chk.a.*, if the entry is not present, execution jumps to recovery code + * + * In either case, the load can be potentially retried in another form. + * + * So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT + * must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up + * a stale entry later) The register base update MUST also be performed. + * + * Now what is the content of the register and its NaT bit in the case we don't + * do the load ? EAS2.4, says (in case an actual load is needed) + * + * - r1 = [r3], Nat = 0 if succeeds + * - r1 = 0 Nat = 0 if trying to access non-speculative memory + * + * For us, there is nothing to do, because both ld.c.* and chk.a.* are going to + * retry and thus eventually reload the register thereby changing Nat and + * register content. + */ + + /* + * when the load has the .acq completer then + * use ordering fence. + */ + if (ld->x6_op == 0x5 || ld->x6_op == 0xa) + mb(); + + /* + * invalidate ALAT entry in case of advanced load + */ + if (ld->x6_op == 0x2) + invala_gr(ld->r1); + + return 0; +} + +static int +emulate_store_int(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + unsigned long r2; + unsigned int len = 1<< ld->x6_sz; + + /* + * the macro supposes sequential access (which is the case) + * if the first byte is an invalid address we return here. Otherwise + * there is a guard page at the top of the user's address page and + * the first access would generate a NaT consumption fault and return + * with a SIGSEGV, which is what we want. + * + * Note: the first argument is ignored + */ + if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) { + DPRINT(("verify area failed on %lx\n",ifa)); + return -1; + } + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getreg(ld->imm, &r2, 0, regs); + + /* + * we rely on the macros in unaligned.h for now i.e., + * we let the compiler figure out how to read memory gracefully. + * + * We need this switch/case because the way the inline function + * works. The code is optimized by the compiler and looks like + * a single switch/case. + */ + DPRINT(("st%d [%lx]=%lx\n", len, ifa, r2)); + + switch(len) { + case 2: + ia64_put_unaligned(r2, (void *)ifa, 2); + break; + case 4: + ia64_put_unaligned(r2, (void *)ifa, 4); + break; + case 8: + ia64_put_unaligned(r2, (void *)ifa, 8); + break; + default: + DPRINT(("unknown size: x6=%d\n", ld->x6_sz)); + return -1; + } + /* + * stX [r3]=r2,imm(9) + * + * NOTE: + * ld->r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if ( ld->op == 0x5 ) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld->x << 7 | ld->r1; + /* + * sign extend (8bits) if m set + */ + if ( ld->m ) imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT(("imm=%lx r3=%lx\n", imm, ifa)); + + setreg(ld->r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + /* + * stX.rel: use fence instead of release + */ + if ( ld->x6_op == 0xd ) mb(); + + return 0; +} + +/* + * floating point operations sizes in bytes + */ +static const unsigned short float_fsz[4]={ + 16, /* extended precision (e) */ + 8, /* integer (8) */ + 4, /* single precision (s) */ + 8 /* double precision (d) */ +}; + +static inline void +mem2float_extended(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +mem2float_integer(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +mem2float_single(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +mem2float_double(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +float2mem_extended(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +float2mem_integer(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +float2mem_single(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static inline void +float2mem_double(struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6" + :: "r"(init), "r"(final) : "f6","memory"); +} + +static int +emulate_load_floatpair(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init[2]; + struct ia64_fpreg fpr_final[2]; + unsigned long len = float_fsz[ld->x6_sz]; + + if ( access_ok(VERIFY_READ, (void *)ifa, len<<1) < 0 ) { + DPRINT(("verify area failed on %lx\n", ifa)); + return -1; + } + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * ldfpX.a: we don't try to emulate anything but we must + * invalidate the ALAT entry and execute updates, if any. + */ + if ( ld->x6_op != 0x2 ) { + /* + * does the unaligned access + */ + memcpy(&fpr_init[0], (void *)ifa, len); + memcpy(&fpr_init[1], (void *)(ifa+len), len); + + DPRINT(("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld->r1, ld->imm, ld->x6_sz)); +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_init; + printk("fpr_init= "); + for(i=0; i < len<<1; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + /* + * XXX fixme + * Could optimize inlines by using ldfpX & 2 spills + */ + switch( ld->x6_sz ) { + case 0: + mem2float_extended(&fpr_init[0], &fpr_final[0]); + mem2float_extended(&fpr_init[1], &fpr_final[1]); + break; + case 1: + mem2float_integer(&fpr_init[0], &fpr_final[0]); + mem2float_integer(&fpr_init[1], &fpr_final[1]); + break; + case 2: + mem2float_single(&fpr_init[0], &fpr_final[0]); + mem2float_single(&fpr_init[1], &fpr_final[1]); + break; + case 3: + mem2float_double(&fpr_init[0], &fpr_final[0]); + mem2float_double(&fpr_init[1], &fpr_final[1]); + break; + } +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_final; + printk("fpr_final= "); + for(i=0; i < len<<1; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final + * and directly use the storage from the saved context i.e., + * the actual final destination (pt_regs, switch_stack or tss). + */ + setfpreg(ld->r1, &fpr_final[0], regs); + setfpreg(ld->imm, &fpr_final[1], regs); + } + + /* + * Check for updates: only immediate updates are available for this + * instruction. + */ + if ( ld->m ) { + + /* + * the immediate is implicit given the ldsz of the operation: + * single: 8 (2x4) and for all others it's 16 (2x8) + */ + ifa += len<<1; + + /* + * IMPORTANT: + * the fact that we force the NaT of r3 to zero is ONLY valid + * as long as we don't come here with a ldfpX.s. + * For this reason we keep this sanity check + */ + if ( ld->x6_op == 1 || ld->x6_op == 3 ) { + printk(KERN_ERR "%s: register update on speculative load pair, error\n", __FUNCTION__); + } + + + setreg(ld->r3, ifa, 0, regs); + } + + /* + * Invalidate ALAT entries, if any, for both registers. + */ + if ( ld->x6_op == 0x2 ) { + invala_fr(ld->r1); + invala_fr(ld->imm); + } + return 0; +} + + +static int +emulate_load_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld->x6_sz]; + + /* + * check for load pair because our masking scheme is not fine grain enough + if ( ld->x == 1 ) return emulate_load_floatpair(ifa,ld,regs); + */ + + if ( access_ok(VERIFY_READ, (void *)ifa, len) < 0 ) { + DPRINT(("verify area failed on %lx\n", ifa)); + return -1; + } + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * ldfX.a we don't try to emulate anything but we must + * invalidate the ALAT entry. + * See comments in ldX for descriptions on how the various loads are handled. + */ + if ( ld->x6_op != 0x2 ) { + + /* + * does the unaligned access + */ + memcpy(&fpr_init, (void *)ifa, len); + + DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz)); +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_init; + printk("fpr_init= "); + for(i=0; i < len; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + /* + * we only do something for x6_op={0,8,9} + */ + switch( ld->x6_sz ) { + case 0: + mem2float_extended(&fpr_init, &fpr_final); + break; + case 1: + mem2float_integer(&fpr_init, &fpr_final); + break; + case 2: + mem2float_single(&fpr_init, &fpr_final); + break; + case 3: + mem2float_double(&fpr_init, &fpr_final); + break; + } +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_final; + printk("fpr_final= "); + for(i=0; i < len; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final + * and directly use the storage from the saved context i.e., + * the actual final destination (pt_regs, switch_stack or tss). + */ + setfpreg(ld->r1, &fpr_final, regs); + } + + /* + * check for updates on any loads + */ + if ( ld->op == 0x7 || ld->m ) + emulate_load_updates(ld->op == 0x7 ? UPD_IMMEDIATE: UPD_REG, + ld, regs, ifa); + + + /* + * invalidate ALAT entry in case of advanced floating point loads + */ + if (ld->x6_op == 0x2) + invala_fr(ld->r1); + + return 0; +} + + +static int +emulate_store_float(unsigned long ifa, load_store_t *ld, struct pt_regs *regs) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld->x6_sz]; + + /* + * the macro supposes sequential access (which is the case) + * if the first byte is an invalid address we return here. Otherwise + * there is a guard page at the top of the user's address page and + * the first access would generate a NaT consumption fault and return + * with a SIGSEGV, which is what we want. + * + * Note: the first argument is ignored + */ + if ( access_ok(VERIFY_WRITE, (void *)ifa, len) < 0 ) { + DPRINT(("verify area failed on %lx\n",ifa)); + return -1; + } + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getfpreg(ld->imm, &fpr_init, regs); + /* + * during this step, we extract the spilled registers from the saved + * context i.e., we refill. Then we store (no spill) to temporary + * aligned location + */ + switch( ld->x6_sz ) { + case 0: + float2mem_extended(&fpr_init, &fpr_final); + break; + case 1: + float2mem_integer(&fpr_init, &fpr_final); + break; + case 2: + float2mem_single(&fpr_init, &fpr_final); + break; + case 3: + float2mem_double(&fpr_init, &fpr_final); + break; + } + DPRINT(("ld.r1=%d x6_sz=%d\n", ld->r1, ld->x6_sz)); +#ifdef DEBUG_UNALIGNED_TRAP + { int i; char *c = (char *)&fpr_init; + printk("fpr_init= "); + for(i=0; i < len; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } + { int i; char *c = (char *)&fpr_final; + printk("fpr_final= "); + for(i=0; i < len; i++ ) { + printk("%02x ", c[i]&0xff); + } + printk("\n"); + } +#endif + + /* + * does the unaligned store + */ + memcpy((void *)ifa, &fpr_final, len); + + /* + * stfX [r3]=r2,imm(9) + * + * NOTE: + * ld->r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if ( ld->op == 0x7 ) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld->x << 7 | ld->r1; + /* + * sign extend (8bits) if m set + */ + if ( ld->m ) imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT(("imm=%lx r3=%lx\n", imm, ifa)); + + setreg(ld->r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + return 0; +} + +void +ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs) +{ + static unsigned long unalign_count; + static long last_time; + + struct ia64_psr *ipsr = ia64_psr(regs); + unsigned long *bundle_addr; + unsigned long opcode; + unsigned long op; + load_store_t *insn; + int ret = -1; + + /* + * We flag unaligned references while in kernel as + * errors: the kernel must be fixed. The switch code + * is in ivt.S at entry 30. + * + * So here we keep a simple sanity check. + */ + if ( !user_mode(regs) ) { + die_if_kernel("Unaligned reference while in kernel\n", regs, 30); + /* NOT_REACHED */ + } + + /* + * Make sure we log the unaligned access, so that user/sysadmin can notice it + * and eventually fix the program. + * + * We don't want to do that for every access so we pace it with jiffies. + */ + if ( unalign_count > 5 && jiffies - last_time > 5*HZ ) unalign_count = 0; + if ( ++unalign_count < 5 ) { + last_time = jiffies; + printk("%s(%d): unaligned trap accessing %016lx (ip=%016lx)\n", + current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri); + + } + + DPRINT(("iip=%lx ifa=%lx isr=%lx\n", regs->cr_iip, ifa, regs->cr_ipsr)); + DPRINT(("ISR.ei=%d ISR.sp=%d\n", ipsr->ri, ipsr->it)); + + bundle_addr = (unsigned long *)(regs->cr_iip); + + /* + * extract the instruction from the bundle given the slot number + */ + switch ( ipsr->ri ) { + case 0: op = *bundle_addr >> 5; + break; + + case 1: op = *bundle_addr >> 46 | (*(bundle_addr+1) & 0x7fffff)<<18; + break; + + case 2: op = *(bundle_addr+1) >> 23; + break; + } + + insn = (load_store_t *)&op; + opcode = op & IA64_OPCODE_MASK; + + DPRINT(("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d " + "ld.x6=0x%x ld.m=%d ld.op=%d\n", + opcode, + insn->qp, + insn->r1, + insn->imm, + insn->r3, + insn->x, + insn->hint, + insn->x6_sz, + insn->m, + insn->op)); + + /* + * IMPORTANT: + * Notice that the swictch statement DOES not cover all possible instructions + * that DO generate unaligned references. This is made on purpose because for some + * instructions it DOES NOT make sense to try and emulate the access. Sometimes it + * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e., + * the program will get a signal and die: + * + * load/store: + * - ldX.spill + * - stX.spill + * Reason: RNATs are based on addresses + * + * synchronization: + * - cmpxchg + * - fetchadd + * - xchg + * Reason: ATOMIC operations cannot be emulated properly using multiple + * instructions. + * + * speculative loads: + * - ldX.sZ + * Reason: side effects, code must be ready to deal with failure so simpler + * to let the load fail. + * --------------------------------------------------------------------------------- + * XXX fixme + * + * I would like to get rid of this switch case and do something + * more elegant. + */ + switch(opcode) { + case LDS_OP: + case LDSA_OP: + case LDS_IMM_OP: + case LDSA_IMM_OP: + case LDFS_OP: + case LDFSA_OP: + case LDFS_IMM_OP: + /* + * The instruction will be retried with defered exceptions + * turned on, and we should get Nat bit installed + * + * IMPORTANT: + * When PSR_ED is set, the register & immediate update + * forms are actually executed even though the operation + * failed. So we don't need to take care of this. + */ + DPRINT(("forcing PSR_ED\n")); + regs->cr_ipsr |= IA64_PSR_ED; + return; + + case LD_OP: + case LDA_OP: + case LDBIAS_OP: + case LDACQ_OP: + case LDCCLR_OP: + case LDCNC_OP: + case LDCCLRACQ_OP: + case LD_IMM_OP: + case LDA_IMM_OP: + case LDBIAS_IMM_OP: + case LDACQ_IMM_OP: + case LDCCLR_IMM_OP: + case LDCNC_IMM_OP: + case LDCCLRACQ_IMM_OP: + ret = emulate_load_int(ifa, insn, regs); + break; + case ST_OP: + case STREL_OP: + case ST_IMM_OP: + case STREL_IMM_OP: + ret = emulate_store_int(ifa, insn, regs); + break; + case LDF_OP: + case LDFA_OP: + case LDFCCLR_OP: + case LDFCNC_OP: + case LDF_IMM_OP: + case LDFA_IMM_OP: + case LDFCCLR_IMM_OP: + case LDFCNC_IMM_OP: + ret = insn->x ? + emulate_load_floatpair(ifa, insn, regs): + emulate_load_float(ifa, insn, regs); + break; + case STF_OP: + case STF_IMM_OP: + ret = emulate_store_float(ifa, insn, regs); + } + + DPRINT(("ret=%d\n", ret)); + if ( ret ) { + lock_kernel(); + force_sig(SIGSEGV, current); + unlock_kernel(); + } else { + /* + * given today's architecture this case is not likely to happen + * because a memory access instruction (M) can never be in the + * last slot of a bundle. But let's keep it for now. + */ + if ( ipsr->ri == 2 ) regs->cr_iip += 16; + ipsr->ri = ++ipsr->ri & 3; + } + + DPRINT(("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip)); +} diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c new file mode 100644 index 000000000..c2b772e68 --- /dev/null +++ b/arch/ia64/kernel/unwind.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + */ +#include <linux/kernel.h> +#include <linux/sched.h> + +#include <asm/unwind.h> + +void +ia64_unwind_init_from_blocked_task (struct ia64_frame_info *info, struct task_struct *t) +{ + struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16); + unsigned long sol, limit, top; + + memset(info, 0, sizeof(*info)); + + sol = (sw->ar_pfs >> 7) & 0x7f; /* size of locals */ + + limit = (unsigned long) t + IA64_RBS_OFFSET; + top = sw->ar_bspstore; + if (top - (unsigned long) t >= IA64_STK_OFFSET) + top = limit; + + info->regstk.limit = (unsigned long *) limit; + info->regstk.top = (unsigned long *) top; + info->bsp = ia64_rse_skip_regs(info->regstk.top, -sol); + info->top_rnat = sw->ar_rnat; + info->cfm = sw->ar_pfs; + info->ip = sw->b0; +} + +void +ia64_unwind_init_from_current (struct ia64_frame_info *info, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long sol, sof, *bsp, limit, top; + + limit = (unsigned long) current + IA64_RBS_OFFSET; + top = sw->ar_bspstore; + if (top - (unsigned long) current >= IA64_STK_OFFSET) + top = limit; + + memset(info, 0, sizeof(*info)); + + sol = (sw->ar_pfs >> 7) & 0x7f; /* size of frame */ + info->regstk.limit = (unsigned long *) limit; + info->regstk.top = (unsigned long *) top; + info->top_rnat = sw->ar_rnat; + + /* this gives us the bsp top level frame (kdb interrupt frame): */ + bsp = ia64_rse_skip_regs((unsigned long *) top, -sol); + + /* now skip past the interrupt frame: */ + sof = regs->cr_ifs & 0x7f; /* size of frame */ + info->cfm = regs->cr_ifs; + info->bsp = ia64_rse_skip_regs(bsp, -sof); + info->ip = regs->cr_iip; +} + +static unsigned long +read_reg (struct ia64_frame_info *info, int regnum, int *is_nat) +{ + unsigned long *addr, *rnat_addr, rnat; + + addr = ia64_rse_skip_regs(info->bsp, regnum); + if (addr < info->regstk.limit || addr >= info->regstk.top || ((long) addr & 0x7) != 0) { + *is_nat = 1; + return 0xdeadbeefdeadbeef; + } + rnat_addr = ia64_rse_rnat_addr(addr); + + if (rnat_addr >= info->regstk.top) + rnat = info->top_rnat; + else + rnat = *rnat_addr; + *is_nat = (rnat & (1UL << ia64_rse_slot_num(addr))) != 0; + return *addr; +} + +/* + * On entry, info->regstk.top should point to the register backing + * store for r32. + */ +int +ia64_unwind_to_previous_frame (struct ia64_frame_info *info) +{ + unsigned long sol, cfm = info->cfm; + int is_nat; + + sol = (cfm >> 7) & 0x7f; /* size of locals */ + + /* + * In general, we would have to make use of unwind info to + * unwind an IA-64 stack, but for now gcc uses a special + * convention that makes this possible without full-fledged + * unwindo info. Specifically, we expect "rp" in the second + * last, and "ar.pfs" in the last local register, so the + * number of locals in a frame must be at least two. If it's + * less than that, we reached the end of the C call stack. + */ + if (sol < 2) + return -1; + + info->ip = read_reg(info, sol - 2, &is_nat); + if (is_nat) + return -1; + + cfm = read_reg(info, sol - 1, &is_nat); + if (is_nat) + return -1; + + sol = (cfm >> 7) & 0x7f; + + info->cfm = cfm; + info->bsp = ia64_rse_skip_regs(info->bsp, -sol); + return 0; +} |