diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2001-01-11 04:02:40 +0000 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2001-01-11 04:02:40 +0000 |
commit | e47f00743fc4776491344f2c618cc8dc2c23bcbc (patch) | |
tree | 13e03a113a82a184c51c19c209867cfd3a59b3b9 /arch/ia64/kernel | |
parent | b2ad5f821b1381492d792ca10b1eb7a107b48f14 (diff) |
Merge with Linux 2.4.0.
Diffstat (limited to 'arch/ia64/kernel')
30 files changed, 1933 insertions, 1538 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 7a49511d3..e4ffb3ae6 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -9,20 +9,20 @@ all: kernel.o head.o init_task.o -obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o \ - machvec.o pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \ - signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o +O_TARGET := kernel.o -obj-$(CONFIG_IA64_GENERIC) += machvec.o +obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o \ + machvec.o pal.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \ + signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o +obj-$(CONFIG_IA64_GENERIC) += machvec.o iosapic.o +obj-$(CONFIG_IA64_DIG) += iosapic.o obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_IA64_MCA) += mca.o mca_asm.o obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o -O_TARGET := kernel.o -O_OBJS := $(obj-y) -OX_OBJS := ia64_ksyms.o +export-objs := ia64_ksyms.o clean:: diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index a8c1ead1f..35ed564c9 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -6,6 +6,12 @@ * * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> + * Copyright (C) 2000 Hewlett-Packard Co. + * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2000 Intel Corp. + * Copyright (C) 2000 J.I. Lee <jung-ik.lee@intel.com> + * ACPI based kernel configuration manager. + * ACPI 2.0 & IA64 ext 0.71 */ #include <linux/config.h> @@ -36,29 +42,87 @@ int __initdata total_cpus; void (*pm_idle)(void); +asm (".weak iosapic_register_legacy_irq"); +asm (".weak iosapic_init"); + +const char * +acpi_get_sysname (void) +{ + /* the following should go away once we have an ACPI parser: */ +#ifdef CONFIG_IA64_GENERIC + return "hpsim"; +#else +# if defined (CONFIG_IA64_HP_SIM) + return "hpsim"; +# elif defined (CONFIG_IA64_SGI_SN1) + return "sn1"; +# elif defined (CONFIG_IA64_DIG) + return "dig"; +# else +# error Unknown platform. Fix acpi.c. +# endif +#endif + +} + /* - * Identify usable CPU's and remember them for SMP bringup later. + * Configure legacy IRQ information. */ static void __init -acpi_lsapic(char *p) +acpi_legacy_irq (char *p) { - int add = 1; - - acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p; + acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p; + unsigned long polarity = 0, edge_triggered = 0; - if ((lsapic->flags & LSAPIC_PRESENT) == 0) + /* + * If the platform we're running doesn't define + * iosapic_register_legacy_irq(), we ignore this info... + */ + if (!iosapic_register_legacy_irq) return; + switch (legacy->flags) { + case 0x5: polarity = 1; edge_triggered = 1; break; + case 0x7: polarity = 0; edge_triggered = 1; break; + case 0xd: polarity = 1; edge_triggered = 0; break; + case 0xf: polarity = 0; edge_triggered = 0; break; + default: + printk(" ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq, + legacy->flags); + break; + } + iosapic_register_legacy_irq(legacy->isa_irq, legacy->pin, polarity, edge_triggered); +} + +/* + * ACPI 2.0 tables parsing functions + */ + +static unsigned long +readl_unaligned(void *p) +{ + unsigned long ret; + + memcpy(&ret, p, sizeof(long)); + return ret; +} + +/* + * Identify usable CPU's and remember them for SMP bringup later. + */ +static void __init +acpi20_lsapic (char *p) +{ + int add = 1; + + acpi20_entry_lsapic_t *lsapic = (acpi20_entry_lsapic_t *) p; printk(" CPU %d (%.04x:%.04x): ", total_cpus, lsapic->eid, lsapic->id); if ((lsapic->flags & LSAPIC_ENABLED) == 0) { printk("Disabled.\n"); add = 0; - } else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) { - printk("Performance Restricted; ignoring.\n"); - add = 0; } - + #ifdef CONFIG_SMP smp_boot_data.cpu_phys_id[total_cpus] = -1; #endif @@ -73,87 +137,234 @@ acpi_lsapic(char *p) } /* - * Configure legacy IRQ information in iosapic_vector + * Info on platform interrupt sources: NMI. PMI, INIT, etc. */ static void __init -acpi_legacy_irq(char *p) +acpi20_platform (char *p) { - /* - * This is not good. ACPI is not necessarily limited to CONFIG_IA64_DIG, yet - * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be - * a means for platform_setup() to register ACPI handlers? - */ -#ifdef CONFIG_IA64_IRQ_ACPI - acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p; - unsigned char vector; - int i; + acpi20_entry_platform_src_t *plat = (acpi20_entry_platform_src_t *) p; + + printk("PLATFORM: IOSAPIC %x -> Vector %x on CPU %.04u:%.04u\n", + plat->iosapic_vector, plat->global_vector, plat->eid, plat->id); +} - vector = isa_irq_to_vector(legacy->isa_irq); +/* + * Override the physical address of the local APIC in the MADT stable header. + */ +static void __init +acpi20_lapic_addr_override (char *p) +{ + acpi20_entry_lapic_addr_override_t * lapic = (acpi20_entry_lapic_addr_override_t *) p; + + if (lapic->lapic_address) { + iounmap((void *)ipi_base_addr); + ipi_base_addr = (unsigned long) ioremap(lapic->lapic_address, 0); + + printk("LOCAL ACPI override to 0x%lx(p=0x%lx)\n", + ipi_base_addr, lapic->lapic_address); + } +} + +/* + * Parse the ACPI Multiple APIC Description Table + */ +static void __init +acpi20_parse_madt (acpi_madt_t *madt) +{ + acpi_entry_iosapic_t *iosapic; + char *p, *end; + + /* Base address of IPI Message Block */ + if (madt->lapic_address) { + ipi_base_addr = (unsigned long) ioremap(madt->lapic_address, 0); + printk("Lapic address set to 0x%lx\n", ipi_base_addr); + } else + printk("Lapic address set to default 0x%lx\n", ipi_base_addr); + + p = (char *) (madt + 1); + end = p + (madt->header.length - sizeof(acpi_madt_t)); /* - * Clobber any old pin mapping. It may be that it gets replaced later on + * Splitted entry parsing to ensure ordering. */ - for (i = 0; i < IA64_MAX_VECTORED_IRQ; i++) { - if (i == vector) - continue; - if (iosapic_pin(i) == iosapic_pin(vector)) - iosapic_pin(i) = 0xff; - } - iosapic_pin(vector) = legacy->pin; - iosapic_bus(vector) = BUS_ISA; /* This table only overrides the ISA devices */ - iosapic_busdata(vector) = 0; - - /* - * External timer tick is special... - */ - if (vector != TIMER_IRQ) - iosapic_dmode(vector) = IO_SAPIC_LOWEST_PRIORITY; - else - iosapic_dmode(vector) = IO_SAPIC_FIXED; + while (p < end) { + switch (*p) { + case ACPI20_ENTRY_LOCAL_APIC_ADDR_OVERRIDE: + printk("ACPI 2.0 MADT: LOCAL APIC Override\n"); + acpi20_lapic_addr_override(p); + break; + + case ACPI20_ENTRY_LOCAL_SAPIC: + printk("ACPI 2.0 MADT: LOCAL SAPIC\n"); + acpi20_lsapic(p); + break; - /* See MPS 1.4 section 4.3.4 */ - switch (legacy->flags) { - case 0x5: - iosapic_polarity(vector) = IO_SAPIC_POL_HIGH; - iosapic_trigger(vector) = IO_SAPIC_EDGE; - break; - case 0x8: - iosapic_polarity(vector) = IO_SAPIC_POL_LOW; - iosapic_trigger(vector) = IO_SAPIC_EDGE; - break; - case 0xd: - iosapic_polarity(vector) = IO_SAPIC_POL_HIGH; - iosapic_trigger(vector) = IO_SAPIC_LEVEL; - break; - case 0xf: - iosapic_polarity(vector) = IO_SAPIC_POL_LOW; - iosapic_trigger(vector) = IO_SAPIC_LEVEL; - break; - default: - printk(" ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq, - legacy->flags); - break; + case ACPI20_ENTRY_IO_SAPIC: + iosapic = (acpi_entry_iosapic_t *) p; + if (iosapic_init) + iosapic_init(iosapic->address, iosapic->irq_base); + break; + + case ACPI20_ENTRY_PLATFORM_INT_SOURCE: + printk("ACPI 2.0 MADT: PLATFORM INT SOUCE\n"); + acpi20_platform(p); + break; + + case ACPI20_ENTRY_LOCAL_APIC: + printk("ACPI 2.0 MADT: LOCAL APIC entry\n"); break; + case ACPI20_ENTRY_IO_APIC: + printk("ACPI 2.0 MADT: IO APIC entry\n"); break; + case ACPI20_ENTRY_NMI_SOURCE: + printk("ACPI 2.0 MADT: NMI SOURCE entry\n"); break; + case ACPI20_ENTRY_LOCAL_APIC_NMI: + printk("ACPI 2.0 MADT: LOCAL APIC NMI entry\n"); break; + case ACPI20_ENTRY_INT_SRC_OVERRIDE: + break; + default: + printk("ACPI 2.0 MADT: unknown entry skip\n"); break; + break; + } + + p += p[1]; + } + + p = (char *) (madt + 1); + end = p + (madt->header.length - sizeof(acpi_madt_t)); + + while (p < end) { + + switch (*p) { + case ACPI20_ENTRY_INT_SRC_OVERRIDE: + printk("ACPI 2.0 MADT: INT SOURCE Override\n"); + acpi_legacy_irq(p); + break; + default: + break; + } + + p += p[1]; + } + + /* Make bootup pretty */ + printk(" %d CPUs available, %d CPUs total\n", + available_cpus, total_cpus); +} + +int __init +acpi20_parse (acpi20_rsdp_t *rsdp20) +{ + acpi_xsdt_t *xsdt; + acpi_desc_table_hdr_t *hdrp; + int tables, i; + + if (strncmp(rsdp20->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) { + printk("ACPI 2.0 RSDP signature incorrect!\n"); + return 0; + } else { + printk("ACPI 2.0 Root System Description Ptr at 0x%lx\n", + (unsigned long)rsdp20); + } + + xsdt = __va(rsdp20->xsdt); + hdrp = &xsdt->header; + if (strncmp(hdrp->signature, + ACPI_XSDT_SIG, ACPI_XSDT_SIG_LEN)) { + printk("ACPI 2.0 XSDT signature incorrect. Trying RSDT\n"); + /* RSDT parsing here */ + return 0; + } else { + printk("ACPI 2.0 XSDT at 0x%lx (p=0x%lx)\n", + (unsigned long)xsdt, (unsigned long)rsdp20->xsdt); + } + + printk("ACPI 2.0: %.6s %.8s %d.%d\n", + hdrp->oem_id, + hdrp->oem_table_id, + hdrp->oem_revision >> 16, + hdrp->oem_revision & 0xffff); + +#ifdef CONFIG_ACPI_KERNEL_CONFIG + acpi_cf_init((void *)rsdp20); +#endif + + tables =(hdrp->length -sizeof(acpi_desc_table_hdr_t))>>3; + + for (i = 0; i < tables; i++) { + hdrp = (acpi_desc_table_hdr_t *) __va(readl_unaligned(&xsdt->entry_ptrs[i])); + printk(" :table %4.4s found\n", hdrp->signature); + + /* Only interested int the MADT table for now ... */ + if (strncmp(hdrp->signature, + ACPI_MADT_SIG, ACPI_MADT_SIG_LEN) != 0) + continue; + + acpi20_parse_madt((acpi_madt_t *) hdrp); + } + +#ifdef CONFIG_ACPI_KERNEL_CONFIG + acpi_cf_terminate(); +#endif + +#ifdef CONFIG_SMP + if (available_cpus == 0) { + printk("ACPI: Found 0 CPUS; assuming 1\n"); + available_cpus = 1; /* We've got at least one of these, no? */ + } + smp_boot_data.cpu_count = available_cpus; +#endif + return 1; +} +/* + * ACPI 1.0b with 0.71 IA64 extensions functions; should be removed once all + * platforms start supporting ACPI 2.0 + */ + +/* + * Identify usable CPU's and remember them for SMP bringup later. + */ +static void __init +acpi_lsapic (char *p) +{ + int add = 1; + + acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p; + + if ((lsapic->flags & LSAPIC_PRESENT) == 0) + return; + + printk(" CPU %d (%.04x:%.04x): ", total_cpus, lsapic->eid, lsapic->id); + + if ((lsapic->flags & LSAPIC_ENABLED) == 0) { + printk("Disabled.\n"); + add = 0; + } else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) { + printk("Performance Restricted; ignoring.\n"); + add = 0; } -# ifdef ACPI_DEBUG - printk("Legacy ISA IRQ %x -> IA64 Vector %x IOSAPIC Pin %x Active %s %s Trigger\n", - legacy->isa_irq, vector, iosapic_pin(vector), - ((iosapic_polarity(vector) == IO_SAPIC_POL_LOW) ? "Low" : "High"), - ((iosapic_trigger(vector) == IO_SAPIC_LEVEL) ? "Level" : "Edge")); -# endif /* ACPI_DEBUG */ -#endif /* CONFIG_IA64_IRQ_ACPI */ +#ifdef CONFIG_SMP + smp_boot_data.cpu_phys_id[total_cpus] = -1; +#endif + if (add) { + printk("Available.\n"); + available_cpus++; +#ifdef CONFIG_SMP + smp_boot_data.cpu_phys_id[total_cpus] = (lsapic->id << 8) | lsapic->eid; +#endif /* CONFIG_SMP */ + } + total_cpus++; } /* * Info on platform interrupt sources: NMI. PMI, INIT, etc. */ static void __init -acpi_platform(char *p) +acpi_platform (char *p) { acpi_entry_platform_src_t *plat = (acpi_entry_platform_src_t *) p; - printk("PLATFORM: IOSAPIC %x -> Vector %lx on CPU %.04u:%.04u\n", + printk("PLATFORM: IOSAPIC %x -> Vector %x on CPU %.04u:%.04u\n", plat->iosapic_vector, plat->global_vector, plat->eid, plat->id); } @@ -161,8 +372,9 @@ acpi_platform(char *p) * Parse the ACPI Multiple SAPIC Table */ static void __init -acpi_parse_msapic(acpi_sapic_t *msapic) +acpi_parse_msapic (acpi_sapic_t *msapic) { + acpi_entry_iosapic_t *iosapic; char *p, *end; /* Base address of IPI Message Block */ @@ -172,41 +384,31 @@ acpi_parse_msapic(acpi_sapic_t *msapic) end = p + (msapic->header.length - sizeof(acpi_sapic_t)); while (p < end) { - switch (*p) { - case ACPI_ENTRY_LOCAL_SAPIC: + case ACPI_ENTRY_LOCAL_SAPIC: acpi_lsapic(p); break; - case ACPI_ENTRY_IO_SAPIC: - platform_register_iosapic((acpi_entry_iosapic_t *) p); + case ACPI_ENTRY_IO_SAPIC: + iosapic = (acpi_entry_iosapic_t *) p; + if (iosapic_init) + iosapic_init(iosapic->address, iosapic->irq_base); break; - case ACPI_ENTRY_INT_SRC_OVERRIDE: + case ACPI_ENTRY_INT_SRC_OVERRIDE: acpi_legacy_irq(p); break; - - case ACPI_ENTRY_PLATFORM_INT_SOURCE: + + case ACPI_ENTRY_PLATFORM_INT_SOURCE: acpi_platform(p); break; - - default: + + default: break; } /* Move to next table entry. */ -#define BAD_ACPI_TABLE -#ifdef BAD_ACPI_TABLE - /* - * Some prototype Lion's have a bad ACPI table - * requiring this fix. Without this fix, those - * machines crash during bootup. - */ - if (p[1] == 0) - p = end; - else -#endif - p += p[1]; + p += p[1]; } /* Make bootup pretty */ @@ -214,24 +416,18 @@ acpi_parse_msapic(acpi_sapic_t *msapic) } int __init -acpi_parse(acpi_rsdp_t *rsdp) +acpi_parse (acpi_rsdp_t *rsdp) { acpi_rsdt_t *rsdt; acpi_desc_table_hdr_t *hdrp; long tables, i; - if (!rsdp) { - printk("Uh-oh, no ACPI Root System Description Pointer table!\n"); - return 0; - } - if (strncmp(rsdp->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) { printk("Uh-oh, ACPI RSDP signature incorrect!\n"); return 0; } - rsdp->rsdt = __va(rsdp->rsdt); - rsdt = rsdp->rsdt; + rsdt = __va(rsdp->rsdt); if (strncmp(rsdt->header.signature, ACPI_RSDT_SIG, ACPI_RSDT_SIG_LEN)) { printk("Uh-oh, ACPI RDST signature incorrect!\n"); return 0; @@ -256,7 +452,7 @@ acpi_parse(acpi_rsdp_t *rsdp) } #ifdef CONFIG_ACPI_KERNEL_CONFIG - acpi_cf_terminate(); + acpi_cf_terminate(); #endif #ifdef CONFIG_SMP @@ -268,22 +464,3 @@ acpi_parse(acpi_rsdp_t *rsdp) #endif return 1; } - -const char * -acpi_get_sysname (void) -{ - /* the following should go away once we have an ACPI parser: */ -#ifdef CONFIG_IA64_GENERIC - return "hpsim"; -#else -# if defined (CONFIG_IA64_HP_SIM) - return "hpsim"; -# elif defined (CONFIG_IA64_SGI_SN1) - return "sn1"; -# elif defined (CONFIG_IA64_DIG) - return "dig"; -# else -# error Unknown platform. Fix acpi.c. -# endif -#endif -} diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 759db7f52..1ac4e04f4 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -333,6 +333,9 @@ efi_init (void) if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { efi.mps = __va(config_tables[i].table); printk(" MPS=0x%lx", config_tables[i].table); + } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { + efi.acpi20 = __va(config_tables[i].table); + printk(" ACPI 2.0=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { efi.acpi = __va(config_tables[i].table); printk(" ACPI=0x%lx", config_tables[i].table); @@ -364,7 +367,7 @@ efi_init (void) #if EFI_DEBUG /* print EFI memory map: */ { - efi_memory_desc_t *md = p; + efi_memory_desc_t *md; void *p; for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) { diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index ffb1760ea..f8c647386 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -11,6 +11,17 @@ * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> */ /* + * ia64_switch_to now places correct virtual mapping in in TR2 for + * kernel stack. This allows us to handle interrupts without changing + * to physical mode. + * + * ar.k4 is now used to hold last virtual map address + * + * Jonathan Nickin <nicklin@missioncriticallinux.com> + * Patrick O'Rourke <orourke@missioncriticallinux.com> + * 11/07/2000 + / +/* * Global (preserved) predicate usage on syscall entry/exit path: * * pKern: See entry.h. @@ -27,7 +38,8 @@ #include <asm/processor.h> #include <asm/unistd.h> #include <asm/asmmacro.h> - +#include <asm/pgtable.h> + #include "entry.h" .text @@ -98,6 +110,8 @@ GLOBAL_ENTRY(sys_clone) br.ret.sptk.many rp END(sys_clone) +#define KSTACK_TR 2 + /* * prev_task <- ia64_switch_to(struct task_struct *next) */ @@ -108,22 +122,55 @@ GLOBAL_ENTRY(ia64_switch_to) UNW(.body) adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 - dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff + mov r27=ar.k4 + dep r20=0,in0,61,3 // physical address of "current" + ;; + st8 [r22]=sp // save kernel stack pointer of old task + shr.u r26=r20,_PAGE_SIZE_256M + ;; + cmp.eq p7,p6=r26,r0 // check < 256M adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 ;; - st8 [r22]=sp // save kernel stack pointer of old task - ld8 sp=[r21] // load kernel stack pointer of new task - and r20=in0,r18 // physical address of "current" + /* + * If we've already mapped this task's page, we can skip doing it + * again. + */ +(p6) cmp.eq p7,p6=r26,r27 +(p6) br.cond.dpnt.few .map + ;; +.done: ld8 sp=[r21] // load kernel stack pointer of new task +(p6) ssm psr.ic // if we we had to map, renable the psr.ic bit FIRST!!! ;; - mov ar.k6=r20 // copy "current" into ar.k6 - mov r8=r13 // return pointer to previously running task - mov r13=in0 // set "current" pointer +(p6) srlz.d + mov ar.k6=r20 // copy "current" into ar.k6 + mov r8=r13 // return pointer to previously running task + mov r13=in0 // set "current" pointer ;; +(p6) ssm psr.i // renable psr.i AFTER the ic bit is serialized DO_LOAD_SWITCH_STACK( ) + #ifdef CONFIG_SMP - sync.i // ensure "fc"s done by this CPU are visible on other CPUs -#endif - br.ret.sptk.few rp + sync.i // ensure "fc"s done by this CPU are visible on other CPUs +#endif + br.ret.sptk.few rp // boogie on out in new context + +.map: + rsm psr.i | psr.ic + movl r25=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX + ;; + srlz.d + or r23=r25,r20 // construct PA | page properties + mov r25=_PAGE_SIZE_256M<<2 + ;; + mov cr.itir=r25 + mov cr.ifa=in0 // VA of next task... + ;; + mov r25=KSTACK_TR // use tr entry #2... + mov ar.k4=r26 // remember last page we mapped... + ;; + itr.d dtr[r25]=r23 // wire in new mapping... + br.cond.sptk.many .done + ;; END(ia64_switch_to) #ifndef CONFIG_IA64_NEW_UNWIND @@ -503,7 +550,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) ;; ld4 r2=[r2] ;; - shl r2=r2,SMP_LOG_CACHE_BYTES // can't use shladd here... + shl r2=r2,SMP_CACHE_SHIFT // can't use shladd here... ;; add r3=r2,r3 #else @@ -542,7 +589,7 @@ back_from_resched: // check & deliver pending signals: (p2) br.call.spnt.few rp=handle_signal_delivery .ret9: -#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_IA64_SOFTSDV_HACKS) +#ifdef CONFIG_IA64_SOFTSDV_HACKS // Check for lost ticks rsm psr.i mov r2 = ar.itc @@ -611,14 +658,13 @@ restore_all: mov ar.ccv=r1 mov ar.fpsr=r13 mov b0=r14 - // turn off interrupts, interrupt collection, & data translation - rsm psr.i | psr.ic | psr.dt + // turn off interrupts, interrupt collection + rsm psr.i | psr.ic ;; srlz.i // EAS 2.5 mov b7=r15 ;; invala // invalidate ALAT - dep r12=0,r12,61,3 // convert sp to physical address bsw.0;; // switch back to bank 0 (must be last in insn group) ;; #ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC @@ -757,7 +803,7 @@ END(invoke_schedule_tail) #endif /* CONFIG_SMP */ -#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_IA64_SOFTSDV_HACKS) +#ifdef CONFIG_IA64_SOFTSDV_HACKS ENTRY(invoke_ia64_reset_itm) UNW(.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)) @@ -772,7 +818,7 @@ ENTRY(invoke_ia64_reset_itm) br.ret.sptk.many rp END(invoke_ia64_reset_itm) -#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC || CONFIG_IA64_SOFTSDV_HACKS */ +#endif /* CONFIG_IA64_SOFTSDV_HACKS */ /* * Invoke do_softirq() while preserving in0-in7, which may be needed @@ -1091,7 +1137,7 @@ sys_call_table: data8 sys_setpriority data8 sys_statfs data8 sys_fstatfs - data8 ia64_ni_syscall + data8 ia64_ni_syscall // 1105 data8 sys_semget data8 sys_semop data8 sys_semctl diff --git a/arch/ia64/kernel/fw-emu.c b/arch/ia64/kernel/fw-emu.c index 34316fe58..e16f23426 100644 --- a/arch/ia64/kernel/fw-emu.c +++ b/arch/ia64/kernel/fw-emu.c @@ -402,7 +402,6 @@ sys_fw_init (const char *args, int arglen) sal_systab->sal_rev_minor = 1; sal_systab->sal_rev_major = 0; sal_systab->entry_count = 1; - sal_systab->ia32_bios_present = 0; #ifdef CONFIG_IA64_GENERIC strcpy(sal_systab->oem_id, "Generic"); diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index e6298b297..abee408f1 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -74,8 +74,8 @@ GLOBAL_ENTRY(_start) ;; #ifdef CONFIG_IA64_EARLY_PRINTK - mov r2=6 - mov r3=(8<<8) | (28<<2) + mov r3=(6<<8) | (28<<2) + movl r2=6<<61 ;; mov rr[r2]=r3 ;; @@ -168,6 +168,11 @@ GLOBAL_ENTRY(ia64_save_debug_regs) add r19=IA64_NUM_DBG_REGS*8,in0 ;; 1: mov r16=dbr[r18] +#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) \ + || defined(CONFIG_ITANIUM_C0_SPECIFIC) + ;; + srlz.d +#endif mov r17=ibr[r18] add r18=1,r18 ;; @@ -181,7 +186,8 @@ END(ia64_save_debug_regs) GLOBAL_ENTRY(ia64_load_debug_regs) alloc r16=ar.pfs,1,0,0,0 -#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)) +#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) \ + || defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC)) lfetch.nta [in0] #endif mov r20=ar.lc // preserve ar.lc @@ -194,6 +200,11 @@ GLOBAL_ENTRY(ia64_load_debug_regs) add r18=1,r18 ;; mov dbr[r18]=r16 +#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) \ + || defined(CONFIG_ITANIUM_C0_SPECIFIC) + ;; + srlz.d +#endif mov ibr[r18]=r17 br.cloop.sptk.few 1b ;; @@ -754,7 +765,7 @@ GLOBAL_ENTRY(ia64_spinlock_contention) mov tmp=ar.itc (p15) br.cond.sptk .wait ;; - ld1 tmp=[r31] + ld4 tmp=[r31] ;; cmp.ne p15,p0=tmp,r0 mov tmp=ar.itc @@ -764,7 +775,7 @@ GLOBAL_ENTRY(ia64_spinlock_contention) mov tmp=1 ;; IA64_SEMFIX_INSN - cmpxchg1.acq tmp=[r31],tmp,ar.ccv + cmpxchg4.acq tmp=[r31],tmp,ar.ccv ;; cmp.eq p15,p0=tmp,r0 diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index d3d2416cf..f831f86d9 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -24,9 +24,8 @@ EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(strtok); -#include <linux/pci.h> -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); +#include <asm/hw_irq.h> +EXPORT_SYMBOL(isa_irq_to_vector_map); #include <linux/in6.h> #include <asm/checksum.h> @@ -49,14 +48,6 @@ EXPORT_SYMBOL(disable_irq_nosync); #include <asm/page.h> EXPORT_SYMBOL(clear_page); -#include <asm/pci.h> -EXPORT_SYMBOL(pci_dma_sync_sg); -EXPORT_SYMBOL(pci_dma_sync_single); -EXPORT_SYMBOL(pci_map_sg); -EXPORT_SYMBOL(pci_map_single); -EXPORT_SYMBOL(pci_unmap_sg); -EXPORT_SYMBOL(pci_unmap_single); - #include <asm/processor.h> EXPORT_SYMBOL(cpu_data); EXPORT_SYMBOL(kernel_thread); @@ -92,6 +83,9 @@ EXPORT_SYMBOL(__global_restore_flags); #include <asm/uaccess.h> EXPORT_SYMBOL(__copy_user); EXPORT_SYMBOL(__do_clear_user); +EXPORT_SYMBOL(__strlen_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); #include <asm/unistd.h> EXPORT_SYMBOL(__ia64_syscall); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c new file mode 100644 index 000000000..9d8408c3f --- /dev/null +++ b/arch/ia64/kernel/iosapic.c @@ -0,0 +1,498 @@ +/* + * I/O SAPIC support. + * + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com> + * Copyright (C) 1999-2000 Hewlett-Packard Co. + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> + * + * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code. + * In particular, we now have separate handlers for edge + * and level triggered interrupts. + * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation + * PCI to vector mapping, shared PCI interrupts. + * 00/10/27 D. Mosberger Document things a bit more to make them more understandable. + * Clean up much of the old IOSAPIC cruft. + */ +/* + * Here is what the interrupt logic between a PCI device and the CPU looks like: + * + * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The + * device is uniquely identified by its bus-, device-, and slot-number (the function + * number does not matter here because all functions share the same interrupt + * lines). + * + * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller. + * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level + * triggered and use the same polarity). Each interrupt line has a unique IOSAPIC + * irq number which can be calculated as the sum of the controller's base irq number + * and the IOSAPIC pin number to which the line connects. + * + * (3) The IOSAPIC uses an internal table to map the IOSAPIC pin into the IA-64 interrupt + * vector. This interrupt vector is then sent to the CPU. + * + * In other words, there are two levels of indirections involved: + * + * pci pin -> iosapic irq -> IA-64 vector + * + * Note: outside this module, IA-64 vectors are called "irqs". This is because that's + * the traditional name Linux uses for interrupt vectors. + */ +#include <linux/config.h> + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/string.h> +#include <linux/irq.h> + +#include <asm/acpi-ext.h> +#include <asm/delay.h> +#include <asm/io.h> +#include <asm/iosapic.h> +#include <asm/machvec.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> + +#ifdef CONFIG_ACPI_KERNEL_CONFIG +# include <asm/acpikcfg.h> +#endif + +#undef DEBUG_IRQ_ROUTING + +static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED; + +/* PCI pin to IOSAPIC irq routing information. This info typically comes from ACPI. */ + +static struct { + int num_routes; + struct pci_vector_struct *route; +} pci_irq; + +/* This tables maps IA-64 vectors to the IOSAPIC pin that generates this vector. */ + +static struct iosapic_irq { + char *addr; /* base address of IOSAPIC */ + unsigned char base_irq; /* first irq assigned to this IOSAPIC */ + char pin; /* IOSAPIC pin (-1 => not an IOSAPIC irq) */ + unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ + unsigned char polarity : 1; /* interrupt polarity (see iosapic.h) */ + unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ +} iosapic_irq[NR_IRQS]; + +/* + * Translate IOSAPIC irq number to the corresponding IA-64 interrupt vector. If no + * entry exists, return -1. + */ +static int +iosapic_irq_to_vector (int irq) +{ + int vector; + + for (vector = 0; vector < NR_IRQS; ++vector) + if (iosapic_irq[vector].base_irq + iosapic_irq[vector].pin == irq) + return vector; + return -1; +} + +/* + * Map PCI pin to the corresponding IA-64 interrupt vector. If no such mapping exists, + * return -1. + */ +static int +pci_pin_to_vector (int bus, int slot, int pci_pin) +{ + struct pci_vector_struct *r; + + for (r = pci_irq.route; r < pci_irq.route + pci_irq.num_routes; ++r) + if (r->bus == bus && (r->pci_id >> 16) == slot && r->pin == pci_pin) + return iosapic_irq_to_vector(r->irq); + return -1; +} + +static void +set_rte (unsigned int vector, unsigned long dest) +{ + unsigned long pol, trigger, dmode; + u32 low32, high32; + char *addr; + int pin; + + pin = iosapic_irq[vector].pin; + if (pin < 0) + return; /* not an IOSAPIC interrupt */ + + addr = iosapic_irq[vector].addr; + pol = iosapic_irq[vector].polarity; + trigger = iosapic_irq[vector].trigger; + dmode = iosapic_irq[vector].dmode; + + low32 = ((pol << IOSAPIC_POLARITY_SHIFT) | + (trigger << IOSAPIC_TRIGGER_SHIFT) | + (dmode << IOSAPIC_DELIVERY_SHIFT) | + vector); + +#ifdef CONFIG_IA64_AZUSA_HACKS + /* set Flush Disable bit */ + if (addr != (char *) 0xc0000000fec00000) + low32 |= (1 << 17); +#endif + + /* dest contains both id and eid */ + high32 = (dest << IOSAPIC_DEST_SHIFT); + + writel(IOSAPIC_RTE_HIGH(pin), addr + IOSAPIC_REG_SELECT); + writel(high32, addr + IOSAPIC_WINDOW); + writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT); + writel(low32, addr + IOSAPIC_WINDOW); +} + +static void +nop (unsigned int vector) +{ + /* do nothing... */ +} + +static void +mask_irq (unsigned int vector) +{ + unsigned long flags; + char *addr; + u32 low32; + int pin; + + addr = iosapic_irq[vector].addr; + pin = iosapic_irq[vector].pin; + + if (pin < 0) + return; /* not an IOSAPIC interrupt! */ + + spin_lock_irqsave(&iosapic_lock, flags); + { + writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT); + low32 = readl(addr + IOSAPIC_WINDOW); + + low32 |= (1 << IOSAPIC_MASK_SHIFT); /* set only the mask bit */ + writel(low32, addr + IOSAPIC_WINDOW); + } + spin_unlock_irqrestore(&iosapic_lock, flags); +} + +static void +unmask_irq (unsigned int vector) +{ + unsigned long flags; + char *addr; + u32 low32; + int pin; + + addr = iosapic_irq[vector].addr; + pin = iosapic_irq[vector].pin; + if (pin < 0) + return; /* not an IOSAPIC interrupt! */ + + spin_lock_irqsave(&iosapic_lock, flags); + { + writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT); + low32 = readl(addr + IOSAPIC_WINDOW); + + low32 &= ~(1 << IOSAPIC_MASK_SHIFT); /* clear only the mask bit */ + writel(low32, addr + IOSAPIC_WINDOW); + } + spin_unlock_irqrestore(&iosapic_lock, flags); +} + + +static void +iosapic_set_affinity (unsigned int vector, unsigned long mask) +{ + printk("iosapic_set_affinity: not implemented yet\n"); +} + +/* + * Handlers for level-triggered interrupts. + */ + +static unsigned int +iosapic_startup_level_irq (unsigned int vector) +{ + unmask_irq(vector); + return 0; +} + +static void +iosapic_end_level_irq (unsigned int vector) +{ + writel(vector, iosapic_irq[vector].addr + IOSAPIC_EOI); +} + +#define iosapic_shutdown_level_irq mask_irq +#define iosapic_enable_level_irq unmask_irq +#define iosapic_disable_level_irq mask_irq +#define iosapic_ack_level_irq nop + +struct hw_interrupt_type irq_type_iosapic_level = { + typename: "IO-SAPIC-level", + startup: iosapic_startup_level_irq, + shutdown: iosapic_shutdown_level_irq, + enable: iosapic_enable_level_irq, + disable: iosapic_disable_level_irq, + ack: iosapic_ack_level_irq, + end: iosapic_end_level_irq, + set_affinity: iosapic_set_affinity +}; + +/* + * Handlers for edge-triggered interrupts. + */ + +static unsigned int +iosapic_startup_edge_irq (unsigned int vector) +{ + unmask_irq(vector); + /* + * IOSAPIC simply drops interrupts pended while the + * corresponding pin was masked, so we can't know if an + * interrupt is pending already. Let's hope not... + */ + return 0; +} + +static void +iosapic_ack_edge_irq (unsigned int vector) +{ + /* + * Once we have recorded IRQ_PENDING already, we can mask the + * interrupt for real. This prevents IRQ storms from unhandled + * devices. + */ + if ((irq_desc[vector].status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED)) + mask_irq(vector); +} + +#define iosapic_enable_edge_irq unmask_irq +#define iosapic_disable_edge_irq nop +#define iosapic_end_edge_irq nop + +struct hw_interrupt_type irq_type_iosapic_edge = { + typename: "IO-SAPIC-edge", + startup: iosapic_startup_edge_irq, + shutdown: iosapic_disable_edge_irq, + enable: iosapic_enable_edge_irq, + disable: iosapic_disable_edge_irq, + ack: iosapic_ack_edge_irq, + end: iosapic_end_edge_irq, + set_affinity: iosapic_set_affinity +}; + +static unsigned int +iosapic_version (char *addr) +{ + /* + * IOSAPIC Version Register return 32 bit structure like: + * { + * unsigned int version : 8; + * unsigned int reserved1 : 8; + * unsigned int pins : 8; + * unsigned int reserved2 : 8; + * } + */ + writel(IOSAPIC_VERSION, addr + IOSAPIC_REG_SELECT); + return readl(IOSAPIC_WINDOW + addr); +} + +/* + * ACPI calls this when it finds an entry for a legacy ISA interrupt. Note that the + * irq_base and IOSAPIC address must be set in iosapic_init(). + */ +void +iosapic_register_legacy_irq (unsigned long irq, + unsigned long pin, unsigned long polarity, + unsigned long edge_triggered) +{ + unsigned int vector = isa_irq_to_vector(irq); + +#ifdef DEBUG_IRQ_ROUTING + printk("ISA: IRQ %u -> IOSAPIC irq 0x%02x (%s, %s) -> vector %02x\n", + (unsigned) irq, (unsigned) pin, + polarity ? "high" : "low", edge_triggered ? "edge" : "level", + vector); +#endif + + iosapic_irq[vector].pin = pin; + iosapic_irq[vector].dmode = IOSAPIC_LOWEST_PRIORITY; + iosapic_irq[vector].polarity = polarity ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW; + iosapic_irq[vector].trigger = edge_triggered ? IOSAPIC_EDGE : IOSAPIC_LEVEL; +} + +void __init +iosapic_init (unsigned long phys_addr, unsigned int base_irq) +{ + struct hw_interrupt_type *irq_type; + int i, irq, max_pin, vector; + unsigned int ver; + char *addr; + static int first_time = 1; + + if (first_time) { + first_time = 0; + + for (vector = 0; vector < NR_IRQS; ++vector) + iosapic_irq[vector].pin = -1; /* mark as unused */ + + /* + * Fetch the PCI interrupt routing table: + */ +#ifdef CONFIG_ACPI_KERNEL_CONFIG + acpi_cf_get_pci_vectors(&pci_irq.route, &pci_irq.num_routes); +#else + pci_irq.route = + (struct pci_vector_struct *) __va(ia64_boot_param.pci_vectors); + pci_irq.num_routes = ia64_boot_param.num_pci_vectors; +#endif + } + + addr = ioremap(phys_addr, 0); + + ver = iosapic_version(addr); + max_pin = (ver >> 16) & 0xff; + + printk("IOSAPIC: version %x.%x, address 0x%lx, IRQs 0x%02x-0x%02x\n", + (ver & 0xf0) >> 4, (ver & 0x0f), phys_addr, base_irq, base_irq + max_pin); + + if (base_irq == 0) + /* + * Map the legacy ISA devices into the IOSAPIC data. Some of these may + * get reprogrammed later on with data from the ACPI Interrupt Source + * Override table. + */ + for (irq = 0; irq < 16; ++irq) { + vector = isa_irq_to_vector(irq); + iosapic_irq[vector].addr = addr; + iosapic_irq[vector].base_irq = 0; + if (iosapic_irq[vector].pin == -1) + iosapic_irq[vector].pin = irq; + iosapic_irq[vector].dmode = IOSAPIC_LOWEST_PRIORITY; + iosapic_irq[vector].trigger = IOSAPIC_EDGE; + iosapic_irq[vector].polarity = IOSAPIC_POL_HIGH; +#ifdef DEBUG_IRQ_ROUTING + printk("ISA: IRQ %u -> IOSAPIC irq 0x%02x (high, edge) -> vector 0x%02x\n", + irq, iosapic_irq[vector].base_irq + iosapic_irq[vector].pin, + vector); +#endif + irq_type = &irq_type_iosapic_edge; + if (irq_desc[vector].handler != irq_type) { + if (irq_desc[vector].handler != &no_irq_type) + printk("iosapic_init: changing vector 0x%02x from %s to " + "%s\n", irq, irq_desc[vector].handler->typename, + irq_type->typename); + irq_desc[vector].handler = irq_type; + } + + /* program the IOSAPIC routing table: */ + set_rte(vector, (ia64_get_lid() >> 16) & 0xffff); + } + +#ifndef CONFIG_IA64_SOFTSDV_HACKS + for (i = 0; i < pci_irq.num_routes; i++) { + irq = pci_irq.route[i].irq; + + if ((unsigned) (irq - base_irq) > max_pin) + /* the interrupt route is for another controller... */ + continue; + + if (irq < 16) + vector = isa_irq_to_vector(irq); + else { + vector = iosapic_irq_to_vector(irq); + if (vector < 0) + /* new iosapic irq: allocate a vector for it */ + vector = ia64_alloc_irq(); + } + + iosapic_irq[vector].addr = addr; + iosapic_irq[vector].base_irq = base_irq; + iosapic_irq[vector].pin = (irq - base_irq); + iosapic_irq[vector].dmode = IOSAPIC_LOWEST_PRIORITY; + iosapic_irq[vector].trigger = IOSAPIC_LEVEL; + iosapic_irq[vector].polarity = IOSAPIC_POL_LOW; + +# ifdef DEBUG_IRQ_ROUTING + printk("PCI: (B%d,I%d,P%d) -> IOSAPIC irq 0x%02x -> vector 0x%02x\n", + pci_irq.route[i].bus, pci_irq.route[i].pci_id>>16, pci_irq.route[i].pin, + iosapic_irq[vector].base_irq + iosapic_irq[vector].pin, vector); +# endif + irq_type = &irq_type_iosapic_level; + if (irq_desc[vector].handler != irq_type){ + if (irq_desc[vector].handler != &no_irq_type) + printk("iosapic_init: changing vector 0x%02x from %s to %s\n", + vector, irq_desc[vector].handler->typename, + irq_type->typename); + irq_desc[vector].handler = irq_type; + } + + /* program the IOSAPIC routing table: */ + set_rte(vector, (ia64_get_lid() >> 16) & 0xffff); + } +#endif /* !CONFIG_IA64_SOFTSDV_HACKS */ +} + +void +iosapic_pci_fixup (int phase) +{ + struct pci_dev *dev; + unsigned char pin; + int vector; + + if (phase != 1) + return; + + pci_for_each_dev(dev) { + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (pin) { + pin--; /* interrupt pins are numbered starting from 1 */ + vector = pci_pin_to_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); + if (vector < 0 && dev->bus->parent) { + /* go back to the bridge */ + struct pci_dev *bridge = dev->bus->self; + + if (bridge) { + /* allow for multiple bridges on an adapter */ + do { + /* do the bridge swizzle... */ + pin = (pin + PCI_SLOT(dev->devfn)) % 4; + vector = pci_pin_to_vector(bridge->bus->number, + PCI_SLOT(bridge->devfn), + pin); + } while (vector < 0 && (bridge = bridge->bus->self)); + } + if (vector >= 0) + printk(KERN_WARNING + "PCI: using PPB(B%d,I%d,P%d) to get vector %02x\n", + bridge->bus->number, PCI_SLOT(bridge->devfn), + pin, vector); + else + printk(KERN_WARNING + "PCI: Couldn't map irq for (B%d,I%d,P%d)o\n", + bridge->bus->number, PCI_SLOT(bridge->devfn), + pin); + } + if (vector >= 0) { + printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> 0x%02x\n", + dev->bus->number, PCI_SLOT(dev->devfn), pin, vector); + dev->irq = vector; + } + } + /* + * Nothing to fixup + * Fix out-of-range IRQ numbers + */ + if (dev->irq >= NR_IRQS) + dev->irq = 15; /* Spurious interrupts */ + } +} diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index b3646e275..ab8961a54 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -541,6 +541,18 @@ void enable_irq(unsigned int irq) spin_unlock_irqrestore(&desc->lock, flags); } +void do_IRQ_per_cpu(unsigned long irq, struct pt_regs *regs) +{ + irq_desc_t *desc = irq_desc + irq; + int cpu = smp_processor_id(); + + kstat.irqs[cpu][irq]++; + + desc->handler->ack(irq); + handle_IRQ_event(irq, regs, desc->action); + desc->handler->end(irq); +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -581,8 +593,7 @@ unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs) if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { action = desc->action; status &= ~IRQ_PENDING; /* we commit to handling */ - if (!(status & IRQ_PER_CPU)) - status |= IRQ_INPROGRESS; /* we are handling it */ + status |= IRQ_INPROGRESS; /* we are handling it */ } desc->status = status; diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 2166e205f..155ee66b7 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -7,6 +7,9 @@ * * 6/10/99: Updated to bring in sync with x86 version to facilitate * support for SMP and different interrupt controllers. + * + * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector + * PCI to vector allocation routine. */ #include <linux/config.h> @@ -35,38 +38,28 @@ #define IRQ_DEBUG 0 -#ifdef CONFIG_ITANIUM_A1_SPECIFIC -spinlock_t ivr_read_lock; -#endif - /* default base addr of IPI table */ unsigned long ipi_base_addr = (__IA64_UNCACHED_OFFSET | IPI_DEFAULT_BASE_ADDR); /* - * Legacy IRQ to IA-64 vector translation table. Any vector not in - * this table maps to itself (ie: irq 0x30 => IA64 vector 0x30) + * Legacy IRQ to IA-64 vector translation table. */ __u8 isa_irq_to_vector_map[16] = { /* 8259 IRQ translation, first 16 entries */ - 0x60, 0x50, 0x10, 0x51, 0x52, 0x53, 0x43, 0x54, - 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x40, 0x41 + 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, + 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21 }; -#ifdef CONFIG_ITANIUM_A1_SPECIFIC - -int usbfix; - -static int __init -usbfix_option (char *str) +int +ia64_alloc_irq (void) { - printk("irq: enabling USB workaround\n"); - usbfix = 1; - return 1; -} + static int next_irq = FIRST_DEVICE_IRQ; -__setup("usbfix", usbfix_option); - -#endif /* CONFIG_ITANIUM_A1_SPECIFIC */ + if (next_irq > LAST_DEVICE_IRQ) + /* XXX could look for sharable vectors instead of panic'ing... */ + panic("ia64_alloc_irq: out of interrupt vectors!"); + return next_irq++; +} /* * That's where the IVT branches when we get an external @@ -77,42 +70,6 @@ void ia64_handle_irq (unsigned long vector, struct pt_regs *regs) { unsigned long saved_tpr; -#ifdef CONFIG_ITANIUM_A1_SPECIFIC - unsigned long eoi_ptr; - -# ifdef CONFIG_USB - extern void reenable_usb (void); - extern void disable_usb (void); - - if (usbfix) - disable_usb(); -# endif - /* - * Stop IPIs by getting the ivr_read_lock - */ - spin_lock(&ivr_read_lock); - { - unsigned int tmp; - /* - * Disable PCI writes - */ - outl(0x80ff81c0, 0xcf8); - tmp = inl(0xcfc); - outl(tmp | 0x400, 0xcfc); - eoi_ptr = inl(0xcfc); - vector = ia64_get_ivr(); - /* - * Enable PCI writes - */ - outl(tmp, 0xcfc); - } - spin_unlock(&ivr_read_lock); - -# ifdef CONFIG_USB - if (usbfix) - reenable_usb(); -# endif -#endif /* CONFIG_ITANIUM_A1_SPECIFIC */ #if IRQ_DEBUG { @@ -161,7 +118,10 @@ ia64_handle_irq (unsigned long vector, struct pt_regs *regs) ia64_set_tpr(vector); ia64_srlz_d(); - do_IRQ(vector, regs); + if ((irq_desc[vector].status & IRQ_PER_CPU) != 0) + do_IRQ_per_cpu(vector, regs); + else + do_IRQ(vector, regs); /* * Disable interrupts and send EOI: @@ -169,9 +129,6 @@ ia64_handle_irq (unsigned long vector, struct pt_regs *regs) local_irq_disable(); ia64_set_tpr(saved_tpr); ia64_eoi(); -#ifdef CONFIG_ITANIUM_A1_SPECIFIC - break; -#endif vector = ia64_get_ivr(); } while (vector != IA64_SPURIOUS_INT); } @@ -194,8 +151,8 @@ init_IRQ (void) * Disable all local interrupts */ ia64_set_itv(0, 1); - ia64_set_lrr0(0, 1); - ia64_set_lrr1(0, 1); + ia64_set_lrr0(0, 1); + ia64_set_lrr1(0, 1); irq_desc[IA64_SPURIOUS_INT].handler = &irq_type_ia64_sapic; #ifdef CONFIG_SMP @@ -217,14 +174,11 @@ init_IRQ (void) } void -ipi_send (int cpu, int vector, int delivery_mode, int redirect) +ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect) { unsigned long ipi_addr; unsigned long ipi_data; unsigned long phys_cpu_id; -#ifdef CONFIG_ITANIUM_A1_SPECIFIC - unsigned long flags; -#endif #ifdef CONFIG_SMP phys_cpu_id = cpu_physical_id(cpu); @@ -239,13 +193,5 @@ ipi_send (int cpu, int vector, int delivery_mode, int redirect) ipi_data = (delivery_mode << 8) | (vector & 0xff); ipi_addr = ipi_base_addr | (phys_cpu_id << 4) | ((redirect & 1) << 3); -#ifdef CONFIG_ITANIUM_A1_SPECIFIC - spin_lock_irqsave(&ivr_read_lock, flags); -#endif - writeq(ipi_data, ipi_addr); - -#ifdef CONFIG_ITANIUM_A1_SPECIFIC - spin_unlock_irqrestore(&ivr_read_lock, flags); -#endif } diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index fa0ad0993..b75cd9dbc 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -6,6 +6,7 @@ * Copyright (C) 1998-2000 David Mosberger <davidm@hpl.hp.com> * * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP + * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT. */ /* * This file defines the interrupt vector table used by the CPU. @@ -44,23 +45,13 @@ #include <asm/system.h> #include <asm/unistd.h> -#define MINSTATE_START_SAVE_MIN /* no special action needed */ -#define MINSTATE_END_SAVE_MIN \ - or r2=r2,r14; /* make first base a kernel virtual address */ \ - or r12=r12,r14; /* make sp a kernel virtual address */ \ - or r13=r13,r14; /* make `current' a kernel virtual address */ \ - bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ - ;; - +#define MINSTATE_VIRT /* needed by minstate.h */ #include "minstate.h" #define FAULT(n) \ - rsm psr.dt; /* avoid nested faults due to TLB misses... */ \ - ;; \ - srlz.d; /* ensure everyone knows psr.dt is off... */ \ mov r31=pr; \ mov r19=n;; /* prepare to save predicates */ \ - br.cond.sptk.many dispatch_to_fault_handler + br.sptk.many dispatch_to_fault_handler /* * As we don't (hopefully) use the space available, we need to fill it with @@ -122,15 +113,14 @@ ia64_ivt: (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place srlz.d // ensure "rsm psr.dt" has taken effect (p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir -(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 -(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 ;; (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) cmp.eq p7,p6=0,r21 // unused address bits all zeroes? shr.u r18=r16,PMD_SHIFT // shift L2 index into position ;; -(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones? ld8 r17=[r17] // fetch the L1 entry (may be 0) ;; (p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? @@ -145,7 +135,7 @@ ia64_ivt: (p7) ld8 r18=[r21] // read the L3 PTE mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss ;; -(p7) tbit.z p6,p7=r18,0 // page present bit cleared? +(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? mov r22=cr.iha // get the VHPT address that caused the TLB miss ;; // avoid RAW on p7 (p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? @@ -153,7 +143,7 @@ ia64_ivt: ;; (p10) itc.i r18 // insert the instruction TLB entry (p11) itc.d r18 // insert the data TLB entry -(p6) br.spnt.few page_fault // handle bad address/page not present (page fault) +(p6) br.spnt.many page_fault // handle bad address/page not present (page fault) mov cr.ifa=r22 // Now compute and insert the TLB entry for the virtual page table. @@ -183,212 +173,117 @@ ia64_ivt: mov pr=r31,-1 // restore predicate registers rfi + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// // 0x0400 Entry 1 (size 64 bundles) ITLB (21) /* - * The ITLB basically does the same as the VHPT handler except - * that we always insert exactly one instruction TLB entry. - */ - /* - * Attempt to lookup PTE through virtual linear page table. - * The speculative access will fail if there is no TLB entry - * for the L3 page table page we're trying to access. + * The ITLB handler accesses the L3 PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the L3 PTE read + * and go on normally after that. */ +itlb_fault: mov r16=cr.ifa // get virtual address - mov r19=cr.iha // get virtual address of L3 PTE - ;; - ld8.s r17=[r19] // try to read L3 PTE + mov r29=b0 // save b0 mov r31=pr // save predicates + mov r17=cr.iha // get virtual address of L3 PTE + movl r30=1f // load nested fault continuation point ;; - tnat.nz p6,p0=r17 // did read succeed? -(p6) br.cond.spnt.many 1f +1: ld8 r18=[r17] // read L3 PTE ;; - itc.i r17 + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt.many page_fault + ;; + itc.i r18 ;; #ifdef CONFIG_SMP - ld8.s r18=[r19] // try to read L3 PTE again and see if same + ld8 r19=[r17] // read L3 PTE again and see if same mov r20=PAGE_SHIFT<<2 // setup page size for purge ;; - cmp.eq p6,p7=r17,r18 + cmp.ne p7,p0=r18,r19 ;; (p7) ptc.l r16,r20 #endif mov pr=r31,-1 rfi - -#ifdef CONFIG_DISABLE_VHPT -itlb_fault: -#endif -1: rsm psr.dt // use physical addressing for data - mov r19=ar.k7 // get page table base address - shl r21=r16,3 // shift bit 60 into sign bit - shr.u r17=r16,61 // get the region number into r17 ;; - cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? - shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address - ;; -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - srlz.d // ensure "rsm psr.dt" has taken effect -(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir -(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 -(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r16,PMD_SHIFT // shift L2 index into position - ;; -(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones? - ld8 r17=[r17] // fetch the L1 entry (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry - ;; -(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r16,PAGE_SHIFT // shift L3 index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? - dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry - ;; -(p7) ld8 r18=[r17] // read the L3 PTE - ;; -(p7) tbit.z p6,p7=r18,0 // page present bit cleared? - ;; -(p7) itc.i r18 // insert the instruction TLB entry -(p6) br.spnt.few page_fault // handle bad address/page not present (page fault) - ;; -#ifdef CONFIG_SMP - ld8 r19=[r17] // re-read the PTE and check if same - ;; - cmp.eq p6,p7=r18,r19 - mov r20=PAGE_SHIFT<<2 - ;; -(p7) ptc.l r16,r20 // PTE changed purge translation -#endif - - mov pr=r31,-1 // restore predicate registers - rfi .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) /* - * The DTLB basically does the same as the VHPT handler except - * that we always insert exactly one data TLB entry. - */ - /* - * Attempt to lookup PTE through virtual linear page table. - * The speculative access will fail if there is no TLB entry - * for the L3 page table page we're trying to access. + * The DTLB handler accesses the L3 PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the L3 PTE read + * and go on normally after that. */ +dtlb_fault: mov r16=cr.ifa // get virtual address - mov r19=cr.iha // get virtual address of L3 PTE - ;; - ld8.s r17=[r19] // try to read L3 PTE + mov r29=b0 // save b0 mov r31=pr // save predicates + mov r17=cr.iha // get virtual address of L3 PTE + movl r30=1f // load nested fault continuation point + ;; +1: ld8 r18=[r17] // read L3 PTE ;; - tnat.nz p6,p0=r17 // did read succeed? -(p6) br.cond.spnt.many 1f + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt.many page_fault ;; - itc.d r17 + itc.d r18 ;; #ifdef CONFIG_SMP - ld8.s r18=[r19] // try to read L3 PTE again and see if same + ld8 r19=[r17] // read L3 PTE again and see if same mov r20=PAGE_SHIFT<<2 // setup page size for purge ;; - cmp.eq p6,p7=r17,r18 + cmp.ne p7,p0=r18,r19 ;; (p7) ptc.l r16,r20 #endif mov pr=r31,-1 rfi - -#ifdef CONFIG_DISABLE_VHPT -dtlb_fault: -#endif -1: rsm psr.dt // use physical addressing for data - mov r19=ar.k7 // get page table base address - shl r21=r16,3 // shift bit 60 into sign bit - shr.u r17=r16,61 // get the region number into r17 - ;; - cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? - shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address ;; -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - srlz.d // ensure "rsm psr.dt" has taken effect -(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir -(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 -(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r16,PMD_SHIFT // shift L2 index into position - ;; -(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones? - ld8 r17=[r17] // fetch the L1 entry (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry - ;; -(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r16,PAGE_SHIFT // shift L3 index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? - dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry - ;; -(p7) ld8 r18=[r17] // read the L3 PTE - ;; -(p7) tbit.z p6,p7=r18,0 // page present bit cleared? - ;; -(p7) itc.d r18 // insert the instruction TLB entry -(p6) br.spnt.few page_fault // handle bad address/page not present (page fault) - ;; -#ifdef CONFIG_SMP - ld8 r19=[r17] // re-read the PTE and check if same - ;; - cmp.eq p6,p7=r18,r19 - mov r20=PAGE_SHIFT<<2 - ;; -(p7) ptc.l r16,r20 // PTE changed purge translation -#endif - mov pr=r31,-1 // restore predicate registers - rfi .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) mov r16=cr.ifa // get address that caused the TLB miss -#ifdef CONFIG_DISABLE_VHPT + movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX + mov r21=cr.ipsr mov r31=pr ;; - shr.u r21=r16,61 // get the region number into r21 +#ifdef CONFIG_DISABLE_VHPT + shr.u r22=r16,61 // get the region number into r21 ;; - cmp.gt p6,p0=6,r21 // user mode -(p6) br.cond.dptk.many itlb_fault + cmp.gt p8,p0=6,r22 // user mode ;; - mov pr=r31,-1 -#endif - movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RX +(p8) thash r17=r16 ;; +(p8) mov cr.iha=r17 +(p8) br.cond.dptk.many itlb_fault +#endif + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl shr.u r18=r16,57 // move address bit 61 to bit 4 - dep r16=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits + dep r19=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits ;; andcm r18=0x10,r18 // bit 4=~address-bit(61) - dep r16=r17,r16,0,12 // insert PTE control bits into r16 + cmp.ne p8,p0=r0,r23 // psr.cpl != 0? + dep r19=r17,r19,0,12 // insert PTE control bits into r19 ;; - or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6 + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 +(p8) br.cond.spnt.many page_fault ;; - itc.i r16 // insert the TLB entry + itc.i r19 // insert the TLB entry + mov pr=r31,-1 rfi + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) mov r16=cr.ifa // get address that caused the TLB miss - movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW + movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX mov r20=cr.isr mov r21=cr.ipsr mov r31=pr @@ -396,29 +291,40 @@ dtlb_fault: #ifdef CONFIG_DISABLE_VHPT shr.u r22=r16,61 // get the region number into r21 ;; - cmp.gt p8,p0=6,r22 // user mode + cmp.gt p8,p0=6,r22 // access to region 0-5 + ;; +(p8) thash r17=r16 + ;; +(p8) mov cr.iha=r17 (p8) br.cond.dptk.many dtlb_fault #endif + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? shr.u r18=r16,57 // move address bit 61 to bit 4 - dep r16=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits + dep r19=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits ;; - dep r21=-1,r21,IA64_PSR_ED_BIT,1 andcm r18=0x10,r18 // bit 4=~address-bit(61) - dep r16=r17,r16,0,12 // insert PTE control bits into r16 + cmp.ne p8,p0=r0,r23 +(p8) br.cond.spnt.many page_fault + + dep r21=-1,r21,IA64_PSR_ED_BIT,1 + dep r19=r17,r19,0,12 // insert PTE control bits into r19 ;; - or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6 + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 (p6) mov cr.ipsr=r21 ;; -(p7) itc.d r16 // insert the TLB entry +(p7) itc.d r19 // insert the TLB entry mov pr=r31,-1 rfi - ;; //----------------------------------------------------------------------------------- - // call do_page_fault (predicates are in r31, psr.dt is off, r16 is faulting address) + // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) page_fault: + ssm psr.dt + ;; + srlz.i + ;; SAVE_MIN_WITH_COVER // // Copy control registers to temporary registers, then turn on psr bits, @@ -430,7 +336,7 @@ page_fault: mov r9=cr.isr adds r3=8,r2 // set up second base pointer ;; - ssm psr.ic | psr.dt + ssm psr.ic ;; srlz.i // guarantee that interrupt collection is enabled ;; @@ -445,36 +351,37 @@ page_fault: mov rp=r14 ;; adds out2=16,r12 // out2 = pointer to pt_regs - br.call.sptk.few b6=ia64_do_page_fault // ignore return address + br.call.sptk.many b6=ia64_do_page_fault // ignore return address + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) // - // In the absence of kernel bugs, we get here when the Dirty-bit, Instruction - // Access-bit, or Data Access-bit faults cause a nested fault because the - // dTLB entry for the virtual page table isn't present. In such a case, - // we lookup the pte for the faulting address by walking the page table - // and return to the continuation point passed in register r30. - // In accessing the page tables, we don't need to check for NULL entries - // because if the page tables didn't map the faulting address, it would not - // be possible to receive one of the above faults. + // In the absence of kernel bugs, we get here when the virtually mapped linear page + // table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction + // Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page + // table is missing, a nested TLB miss fault is triggered and control is transferred + // to this point. When this happens, we lookup the pte for the faulting address + // by walking the page table in physical mode and return to the continuation point + // passed in register r30 (or call page_fault if the address is not mapped). // // Input: r16: faulting address // r29: saved b0 // r30: continuation address + // r31: saved pr // // Output: r17: physical address of L3 PTE of faulting address // r29: saved b0 // r30: continuation address + // r31: saved pr // - // Clobbered: b0, r18, r19, r21, r31, psr.dt (cleared) + // Clobbered: b0, r18, r19, r21, psr.dt (cleared) // rsm psr.dt // switch to using physical data addressing mov r19=ar.k7 // get the page table base address shl r21=r16,3 // shift bit 60 into sign bit ;; - mov r31=pr // save the predicate registers shr.u r17=r16,61 // get the region number into r17 ;; cmp.eq p6,p7=5,r17 // is faulting address in region 5? @@ -482,26 +389,30 @@ page_fault: ;; (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place srlz.d -(p6) movl r17=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir -(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1 -(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4 +(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 ;; -(p6) dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? shr.u r18=r16,PMD_SHIFT // shift L2 index into position ;; - ld8 r17=[r17] // fetch the L1 entry + ld8 r17=[r17] // fetch the L1 entry (may be 0) mov b0=r30 ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry ;; - ld8 r17=[r17] // fetch the L2 entry +(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) shr.u r19=r16,PAGE_SHIFT // shift L3 index into position ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry ;; - mov pr=r31,-1 // restore predicates - br.cond.sptk.few b0 // return to continuation point +(p6) br.cond.spnt.many page_fault + br.sptk.many b0 // return to continuation point + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// @@ -526,33 +437,19 @@ page_fault: // a nested TLB miss hit where we look up the physical address of the L3 PTE // and then continue at label 1 below. // -#ifndef CONFIG_SMP mov r16=cr.ifa // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault ;; thash r17=r16 // compute virtual address of L3 PTE mov r29=b0 // save b0 in case of nested fault - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_D,r18 // set the dirty bit - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE -#else - mov r16=cr.ifa // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - ;; - thash r17=r16 // compute virtual address of L3 PTE + mov r31=pr // save pr +#ifdef CONFIG_SMP mov r28=ar.ccv // save ar.ccv - mov r29=b0 // save b0 in case of nested fault - mov r27=pr ;; 1: ld8 r18=[r17] ;; // avoid RAW on r18 mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_D,r18 // set the dirty bit + or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits ;; cmpxchg8.acq r26=[r17],r25,ar.ccv mov r24=PAGE_SHIFT<<2 @@ -568,70 +465,46 @@ page_fault: (p7) ptc.l r16,r24 mov b0=r29 // restore b0 mov ar.ccv=r28 - mov pr=r27,-1 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18 // install updated PTE #endif + mov pr=r31,-1 // restore pr rfi + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) // Like Entry 8, except for instruction access mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + mov r31=pr // save predicates #ifdef CONFIG_ITANIUM /* - * Erratum 10 (IFA may contain incorrect address) now has - * "NoFix" status. There are no plans for fixing this. + * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. */ mov r17=cr.ipsr - mov r31=pr // save predicates ;; mov r18=cr.iip tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? ;; (p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa - mov pr=r31,-1 #endif /* CONFIG_ITANIUM */ - -#ifndef CONFIG_SMP - movl r30=1f // load continuation point in case of nested fault ;; thash r17=r16 // compute virtual address of L3 PTE mov r29=b0 // save b0 in case of nested fault) - ;; -1: ld8 r18=[r17] -#if defined(CONFIG_IA32_SUPPORT) && \ - (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC)) - // - // Erratum 85 (Access bit fault could be reported before page not present fault) - // If the PTE is indicates the page is not present, then just turn this into a - // page fault. - // - mov r31=pr // save predicates - ;; - tbit.nz p6,p0=r18,0 // page present bit set? -(p6) br.cond.sptk 1f - ;; // avoid WAW on p6 - mov pr=r31,-1 - br.cond.sptk page_fault // page wasn't present -1: mov pr=r31,-1 -#else - ;; // avoid RAW on r18 -#endif - or r18=_PAGE_A,r18 // set the accessed bit - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - itc.i r18 // install updated PTE -#else - movl r30=1f // load continuation point in case of nested fault - ;; - thash r17=r16 // compute virtual address of L3 PTE +#ifdef CONFIG_SMP mov r28=ar.ccv // save ar.ccv - mov r29=b0 // save b0 in case of nested fault) - mov r27=pr ;; 1: ld8 r18=[r17] -#if defined(CONFIG_IA32_SUPPORT) && \ +# if defined(CONFIG_IA32_SUPPORT) && \ (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC)) // // Erratum 85 (Access bit fault could be reported before page not present fault) @@ -639,15 +512,9 @@ page_fault: // page fault. // ;; - tbit.nz p6,p0=r18,0 // page present bit set? -(p6) br.cond.sptk 1f - ;; // avoid WAW on p6 - mov pr=r27,-1 - br.cond.sptk page_fault // page wasn't present -1: -#else - ;; // avoid RAW on r18 -#endif + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.sptk page_fault // page wasn't present +# endif mov ar.ccv=r18 // set compare value for cmpxchg or r25=_PAGE_A,r18 // set the accessed bit ;; @@ -665,36 +532,42 @@ page_fault: (p7) ptc.l r16,r24 mov b0=r29 // restore b0 mov ar.ccv=r28 - mov pr=r27,-1 -#endif +#else /* !CONFIG_SMP */ + ;; +1: ld8 r18=[r17] + ;; +# if defined(CONFIG_IA32_SUPPORT) && \ + (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC)) + // + // Erratum 85 (Access bit fault could be reported before page not present fault) + // If the PTE is indicates the page is not present, then just turn this into a + // page fault. + // + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.sptk page_fault // page wasn't present +# endif + or r18=_PAGE_A,r18 // set the accessed bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.i r18 // install updated PTE +#endif /* !CONFIG_SMP */ + mov pr=r31,-1 rfi + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) // Like Entry 8, except for data access -#ifndef CONFIG_SMP mov r16=cr.ifa // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault ;; thash r17=r16 // compute virtual address of L3 PTE + mov r31=pr mov r29=b0 // save b0 in case of nested fault) - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_A,r18 // set the accessed bit - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE -#else - mov r16=cr.ifa // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - ;; - thash r17=r16 // compute virtual address of L3 PTE +#ifdef CONFIG_SMP mov r28=ar.ccv // save ar.ccv - mov r29=b0 // save b0 in case of nested fault - mov r27=pr ;; 1: ld8 r18=[r17] ;; // avoid RAW on r18 @@ -713,11 +586,20 @@ page_fault: cmp.eq p6,p7=r18,r25 // is it same as the newly installed ;; (p7) ptc.l r16,r24 - mov b0=r29 // restore b0 mov ar.ccv=r28 - mov pr=r27,-1 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_A,r18 // set the accessed bit + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18 // install updated PTE #endif + mov b0=r29 // restore b0 + mov pr=r31,-1 rfi + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// @@ -725,16 +607,14 @@ page_fault: mov r16=cr.iim mov r17=__IA64_BREAK_SYSCALL mov r31=pr // prepare to save predicates - rsm psr.dt // avoid nested faults due to TLB misses... ;; - srlz.d // ensure everyone knows psr.dt is off... cmp.eq p0,p7=r16,r17 // is this a system call? (p7 <- false, if so) (p7) br.cond.spnt.many non_syscall SAVE_MIN // uses r31; defines r2: - // turn interrupt collection and data translation back on: - ssm psr.ic | psr.dt + // turn interrupt collection back on: + ssm psr.ic ;; srlz.i // guarantee that interrupt collection is enabled cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 @@ -746,14 +626,13 @@ page_fault: adds r3=8,r2 // set up second base pointer for SAVE_REST ;; SAVE_REST - ;; // avoid WAW on r2 & r3 + br.call.sptk rp=demine_args // clear NaT bits in (potential) syscall args mov r3=255 adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 adds r2=IA64_TASK_PTRACE_OFFSET,r13 // r2 = ¤t->ptrace - ;; - cmp.geu.unc p6,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ? + cmp.geu p6,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ? movl r16=sys_call_table ;; (p6) shladd r16=r15,3,r16 @@ -788,40 +667,61 @@ page_fault: ;; st8 [r16]=r18 // store new value for cr.isr -(p8) br.call.sptk.few b6=b6 // ignore this return addr - br.call.sptk.few rp=ia64_trace_syscall // rp will be overwritten (ignored) +(p8) br.call.sptk.many b6=b6 // ignore this return addr + br.call.sptk.many rp=ia64_trace_syscall // rp will be overwritten (ignored) // NOT REACHED + .proc demine_args +demine_args: + alloc r2=ar.pfs,8,0,0,0 + tnat.nz p8,p0=in0 + tnat.nz p9,p0=in1 + ;; +(p8) mov in0=-1 + tnat.nz p10,p0=in2 + tnat.nz p11,p0=in3 + +(p9) mov in1=-1 + tnat.nz p12,p0=in4 + tnat.nz p13,p0=in5 + ;; +(p10) mov in2=-1 + tnat.nz p14,p0=in6 + tnat.nz p15,p0=in7 + +(p11) mov in3=-1 +(p12) mov in4=-1 +(p13) mov in5=-1 + ;; +(p14) mov in6=-1 +(p15) mov in7=-1 + br.ret.sptk.many rp + .endp demine_args + .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) - rsm psr.dt // avoid nested faults due to TLB misses... - ;; - srlz.d // ensure everyone knows psr.dt is off... mov r31=pr // prepare to save predicates ;; SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 - ssm psr.ic | psr.dt // turn interrupt collection and data translation back on + ssm psr.ic // turn interrupt collection ;; adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic and psr.dt are back on + srlz.i // ensure everybody knows psr.ic is back on ;; SAVE_REST ;; alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group -#ifdef CONFIG_ITANIUM_A1_SPECIFIC - mov out0=r0 // defer reading of cr.ivr to handle_irq... -#else mov out0=cr.ivr // pass cr.ivr as first arg -#endif add out1=16,sp // pass pointer to pt_regs as second arg ;; srlz.d // make sure we see the effect of cr.ivr movl r14=ia64_leave_kernel ;; mov rp=r14 - br.call.sptk.few b6=ia64_handle_irq + br.call.sptk.many b6=ia64_handle_irq + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// @@ -855,7 +755,7 @@ dispatch_illegal_op_fault: // The "alloc" can cause a mandatory store which could lead to // an "Alt DTLB" fault which we can handle only if psr.ic is on. // - ssm psr.ic | psr.dt + ssm psr.ic ;; srlz.i // guarantee that interrupt collection is enabled ;; @@ -867,7 +767,7 @@ dispatch_illegal_op_fault: ;; SAVE_REST ;; - br.call.sptk.few rp=ia64_illegal_op_fault + br.call.sptk.many rp=ia64_illegal_op_fault .ret0: ;; alloc r14=ar.pfs,0,0,3,0 // must be first in insn group mov out0=r9 @@ -881,6 +781,7 @@ dispatch_illegal_op_fault: cmp.ne p6,p0=0,r8 (p6) br.call.dpnt b6=b6 // call returns to ia64_leave_kernel br.sptk ia64_leave_kernel + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// @@ -900,7 +801,7 @@ dispatch_to_ia32_handler: SAVE_MIN ;; mov r14=cr.isr - ssm psr.ic | psr.dt + ssm psr.ic ;; srlz.i // guarantee that interrupt collection is enabled ;; @@ -913,7 +814,7 @@ dispatch_to_ia32_handler: shr r14=r14,16 // Get interrupt number ;; cmp.ne p6,p0=r14,r15 -(p6) br.call.dpnt.few b6=non_ia32_syscall +(p6) br.call.dpnt.many b6=non_ia32_syscall adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp @@ -924,7 +825,7 @@ dispatch_to_ia32_handler: alloc r15=ar.pfs,0,0,6,0 // must first in an insn group ;; ld4 r8=[r14],8 // r8 == EAX (syscall number) - mov r15=190 // sys_vfork - last implemented system call + mov r15=222 // sys_vfork - last implemented system call ;; cmp.leu.unc p6,p7=r8,r15 ld4 out1=[r14],8 // r9 == ecx @@ -961,11 +862,12 @@ non_ia32_syscall: mov out0=r14 // interrupt # add out1=16,sp // pointer to pt_regs ;; // avoid WAW on CFM - br.call.sptk.few rp=ia32_bad_interrupt + br.call.sptk.many rp=ia32_bad_interrupt .ret1: movl r15=ia64_leave_kernel ;; mov rp=r15 br.ret.sptk.many rp + ;; #endif /* CONFIG_IA32_SUPPORT */ @@ -985,8 +887,8 @@ non_syscall: mov r8=cr.iim // get break immediate (must be done while psr.ic is off) adds r3=8,r2 // set up second base pointer for SAVE_REST - // turn interrupt collection and data translation back on: - ssm psr.ic | psr.dt + // turn interrupt collection back on: + ssm psr.ic ;; srlz.i // guarantee that interrupt collection is enabled ;; @@ -1000,7 +902,8 @@ non_syscall: SAVE_REST mov rp=r15 ;; - br.call.sptk.few b6=ia64_bad_break // avoid WAW on CFM and ignore return addr + br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1023,7 +926,7 @@ dispatch_unaligned_handler: // wouldn't get the state to recover. // mov r15=cr.ifa - ssm psr.ic | psr.dt + ssm psr.ic ;; srlz.i // guarantee that interrupt collection is enabled ;; @@ -1039,7 +942,8 @@ dispatch_unaligned_handler: adds out1=16,sp // out1 = pointer to pt_regs ;; mov rp=r14 - br.sptk.few ia64_prepare_handle_unaligned + br.sptk.many ia64_prepare_handle_unaligned + ;; .align 1024 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1055,7 +959,6 @@ dispatch_to_fault_handler: // // Input: // psr.ic: off - // psr.dt: off // r19: fault vector number (e.g., 24 for General Exception) // r31: contains saved predicates (pr) // @@ -1071,7 +974,7 @@ dispatch_to_fault_handler: mov r10=cr.iim mov r11=cr.itir ;; - ssm psr.ic | psr.dt + ssm psr.ic ;; srlz.i // guarantee that interrupt collection is enabled ;; @@ -1089,7 +992,9 @@ dispatch_to_fault_handler: movl r14=ia64_leave_kernel ;; mov rp=r14 - br.call.sptk.few b6=ia64_fault + br.call.sptk.many b6=ia64_fault + ;; + // // --- End of long entries, Beginning of short entries // @@ -1099,16 +1004,16 @@ dispatch_to_fault_handler: // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) mov r16=cr.ifa rsm psr.dt -#if 1 - // If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h + // The Linux page fault handler doesn't expect non-present pages to be in + // the TLB. Flush the existing entry now, so we meet that expectation. mov r17=_PAGE_SIZE_4K<<2 ;; ptc.l r16,r17 -#endif ;; mov r31=pr srlz.d - br.cond.sptk.many page_fault + br.sptk.many page_fault + ;; .align 256 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1118,7 +1023,8 @@ dispatch_to_fault_handler: mov r31=pr ;; srlz.d - br.cond.sptk.many page_fault + br.sptk.many page_fault + ;; .align 256 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1128,7 +1034,8 @@ dispatch_to_fault_handler: mov r31=pr ;; srlz.d - br.cond.sptk.many page_fault + br.sptk.many page_fault + ;; .align 256 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1138,31 +1045,32 @@ dispatch_to_fault_handler: mov r31=pr ;; srlz.d - br.cond.sptk.many page_fault + br.sptk.many page_fault + ;; .align 256 ///////////////////////////////////////////////////////////////////////////////////////// // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) mov r16=cr.isr mov r31=pr - rsm psr.dt // avoid nested faults due to TLB misses... ;; - srlz.d // ensure everyone knows psr.dt is off... cmp4.eq p6,p0=0,r16 (p6) br.sptk dispatch_illegal_op_fault ;; mov r19=24 // fault number - br.cond.sptk.many dispatch_to_fault_handler + br.sptk.many dispatch_to_fault_handler + ;; .align 256 ///////////////////////////////////////////////////////////////////////////////////////// // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) - rsm psr.dt | psr.dfh // ensure we can access fph + rsm psr.dfh // ensure we can access fph ;; srlz.d mov r31=pr mov r19=25 - br.cond.sptk.many dispatch_to_fault_handler + br.sptk.many dispatch_to_fault_handler + ;; .align 256 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1204,6 +1112,7 @@ dispatch_to_fault_handler: ;; rfi // and go back + ;; .align 256 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1218,12 +1127,11 @@ dispatch_to_fault_handler: .align 256 ///////////////////////////////////////////////////////////////////////////////////////// // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) - rsm psr.dt // avoid nested faults due to TLB misses... mov r16=cr.ipsr mov r31=pr // prepare to save predicates ;; - srlz.d // ensure everyone knows psr.dt is off - br.cond.sptk.many dispatch_unaligned_handler + br.sptk.many dispatch_unaligned_handler + ;; .align 256 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1304,9 +1212,6 @@ dispatch_to_fault_handler: ///////////////////////////////////////////////////////////////////////////////////////// // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) #ifdef CONFIG_IA32_SUPPORT - rsm psr.dt - ;; - srlz.d mov r31=pr mov r16=cr.isr ;; @@ -1325,7 +1230,7 @@ dispatch_to_fault_handler: ;; mov pr=r31,-1 // restore predicate registers rfi - + ;; 1: #endif // CONFIG_IA32_SUPPORT FAULT(46) @@ -1334,11 +1239,9 @@ dispatch_to_fault_handler: ///////////////////////////////////////////////////////////////////////////////////////// // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) #ifdef CONFIG_IA32_SUPPORT - rsm psr.dt - ;; - srlz.d mov r31=pr - br.cond.sptk.many dispatch_to_ia32_handler + br.sptk.many dispatch_to_ia32_handler + ;; #else FAULT(47) #endif diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c index 2afb5613e..df19a8d6f 100644 --- a/arch/ia64/kernel/machvec.c +++ b/arch/ia64/kernel/machvec.c @@ -1,11 +1,13 @@ #include <linux/config.h> + +#ifdef CONFIG_IA64_GENERIC + #include <linux/kernel.h> +#include <linux/string.h> #include <asm/page.h> #include <asm/machvec.h> -#ifdef CONFIG_IA64_GENERIC - struct ia64_machine_vector ia64_mv; /* diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 333258d35..1456b8d96 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -19,6 +19,7 @@ #include <linux/irq.h> #include <linux/smp_lock.h> +#include <asm/machvec.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/system.h> @@ -26,7 +27,6 @@ #include <asm/mca.h> #include <asm/irq.h> -#include <asm/machvec.h> typedef struct ia64_fptr { @@ -365,7 +365,7 @@ ia64_mca_wakeup_ipi_wait(void) void ia64_mca_wakeup(int cpu) { - ipi_send(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT, 0); + platform_send_ipi(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT, 0); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; } diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 15993525d..b148d8b9c 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -3,11 +3,11 @@ // // Mods by cfleck to integrate into kernel build // 00/03/15 davidm Added various stop bits to get a clean compile -// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp kstack, -// switch modes, jump to C INIT handler +// +// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp +// kstack, switch modes, jump to C INIT handler // #include <linux/config.h> - #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mca_asm.h> @@ -17,14 +17,7 @@ * When we get an machine check, the kernel stack pointer is no longer * valid, so we need to set a new stack pointer. */ -#define MINSTATE_START_SAVE_MIN \ -(pKern) movl sp=ia64_init_stack+IA64_STK_OFFSET-IA64_PT_REGS_SIZE; \ - ;; - -#define MINSTATE_END_SAVE_MIN \ - or r12=r12,r14; /* make sp a kernel virtual address */ \ - or r13=r13,r14; /* make `current' a kernel virtual address */ \ - ;; +#define MINSTATE_PHYS /* Make sure stack access is physical for MINSTATE */ #include "minstate.h" diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index 8790d49c3..2ea6f1791 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -20,6 +20,72 @@ #define rR1 r20 /* + * Here start the source dependent macros. + */ + +/* + * For ivt.s we want to access the stack virtually so we dont have to disable translation + * on interrupts. + */ +#define MINSTATE_START_SAVE_MIN_VIRT \ + dep r1=-1,r1,61,3; /* r1 = current (virtual) */ \ +(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ + ;; \ +(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ +(p7) mov rARRNAT=ar.rnat; \ +(pKern) mov r1=sp; /* get sp */ \ + ;; \ +(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ +(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \ + ;; \ +(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ +(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \ + ;; \ +(p7) mov r18=ar.bsp; \ +(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + +#define MINSTATE_END_SAVE_MIN_VIRT \ + or r13=r13,r14; /* make `current' a kernel virtual address */ \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ + ;; + +/* + * For mca_asm.S we want to access the stack physically since the state is saved before we + * go virtual and dont want to destroy the iip or ipsr. + */ +#define MINSTATE_START_SAVE_MIN_PHYS \ +(pKern) movl sp=ia64_init_stack+IA64_STK_OFFSET-IA64_PT_REGS_SIZE; \ +(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ +(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ + ;; \ +(p7) mov rARRNAT=ar.rnat; \ +(pKern) dep r1=0,sp,61,3; /* compute physical addr of sp */ \ +(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ +(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \ +(p7) dep rKRBS=-1,rKRBS,61,3; /* compute kernel virtual addr of RBS */\ + ;; \ +(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ +(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \ + ;; \ +(p7) mov r18=ar.bsp; \ +(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + +#define MINSTATE_END_SAVE_MIN_PHYS \ + or r12=r12,r14; /* make sp a kernel virtual address */ \ + or r13=r13,r14; /* make `current' a kernel virtual address */ \ + ;; + +#ifdef MINSTATE_VIRT +# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT +# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT +#endif + +#ifdef MINSTATE_PHYS +# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS +# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS +#endif + +/* * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves * the minimum state necessary that allows us to turn psr.ic back * on. @@ -31,7 +97,6 @@ * * Upon exit, the state is as follows: * psr.ic: off - * psr.dt: off * r2 = points to &pt_regs.r16 * r12 = kernel sp (kernel virtual address) * r13 = points to current task_struct (kernel virtual address) @@ -50,7 +115,7 @@ mov rCRIPSR=cr.ipsr; \ mov rB6=b6; /* rB6 = branch reg 6 */ \ mov rCRIIP=cr.iip; \ - mov r1=ar.k6; /* r1 = current */ \ + mov r1=ar.k6; /* r1 = current (physical) */ \ ;; \ invala; \ extr.u r16=rCRIPSR,32,2; /* extract psr.cpl */ \ @@ -58,25 +123,11 @@ cmp.eq pKern,p7=r0,r16; /* are we in kernel mode already? (psr.cpl==0) */ \ /* switch from user to kernel RBS: */ \ COVER; \ - ;; \ + ;; \ MINSTATE_START_SAVE_MIN \ -(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ -(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ - ;; \ -(p7) mov rARRNAT=ar.rnat; \ -(pKern) dep r1=0,sp,61,3; /* compute physical addr of sp */ \ -(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ -(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \ -(p7) dep rKRBS=-1,rKRBS,61,3; /* compute kernel virtual addr of RBS */ \ - ;; \ -(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ -(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \ - ;; \ -(p7) mov r18=ar.bsp; \ -(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ - \ - mov r16=r1; /* initialize first base pointer */ \ - adds r17=8,r1; /* initialize second base pointer */ \ + ;; \ + mov r16=r1; /* initialize first base pointer */ \ + adds r17=8,r1; /* initialize second base pointer */ \ ;; \ st8 [r16]=rCRIPSR,16; /* save cr.ipsr */ \ st8 [r17]=rCRIIP,16; /* save cr.iip */ \ diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S index 2e56a428e..fc14cc31c 100644 --- a/arch/ia64/kernel/pal.S +++ b/arch/ia64/kernel/pal.S @@ -52,10 +52,9 @@ END(ia64_pal_default_handler) /* * Make a PAL call using the static calling convention. * - * in0 Pointer to struct ia64_pal_retval - * in1 Index of PAL service - * in2 - in4 Remaining PAL arguments - * in5 1 ==> clear psr.ic, 0 ==> don't clear psr.ic + * in0 Index of PAL service + * in1 - in3 Remaining PAL arguments + * in4 1 ==> clear psr.ic, 0 ==> don't clear psr.ic * */ GLOBAL_ENTRY(ia64_pal_call_static) @@ -69,7 +68,7 @@ GLOBAL_ENTRY(ia64_pal_call_static) } ;; ld8 loc2 = [loc2] // loc2 <- entry point - tbit.nz p6,p7 = in5, 0 + tbit.nz p6,p7 = in4, 0 adds r8 = 1f-1b,r8 ;; mov loc3 = psr diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c deleted file mode 100644 index 6293cdfa0..000000000 --- a/arch/ia64/kernel/pci-dma.c +++ /dev/null @@ -1,517 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * This implementation is for IA-64 platforms that do not support - * I/O TLBs (aka DMA address translation hardware). - * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> - * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> - */ - -#include <linux/config.h> - -#include <linux/mm.h> -#include <linux/pci.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/types.h> - -#include <asm/io.h> -#include <asm/pci.h> -#include <asm/dma.h> - -#ifdef CONFIG_SWIOTLB - -#include <linux/init.h> -#include <linux/bootmem.h> - -#define ALIGN(val, align) ((unsigned long) (((unsigned long) (val) + ((align) - 1)) & ~((align) - 1))) - -/* - * log of the size of each IO TLB slab. The number of slabs is command line - * controllable. - */ -#define IO_TLB_SHIFT 11 - -/* - * Used to do a quick range check in pci_unmap_single and pci_sync_single, to see if the - * memory was in fact allocated by this API. - */ -static char *io_tlb_start, *io_tlb_end; - -/* - * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and io_tlb_end. - * This is command line adjustable via setup_io_tlb_npages. - */ -unsigned long io_tlb_nslabs = 1024; - -/* - * This is a free list describing the number of free entries available from each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; - -/* - * We need to save away the original address corresponding to a mapped entry for the sync - * operations. - */ -static unsigned char **io_tlb_orig_addr; - -/* - * Protect the above data structures in the map and unmap calls - */ -spinlock_t io_tlb_lock = SPIN_LOCK_UNLOCKED; - -static int __init -setup_io_tlb_npages (char *str) -{ - io_tlb_nslabs = simple_strtoul(str, NULL, 0) << (PAGE_SHIFT - IO_TLB_SHIFT); - return 1; -} -__setup("swiotlb=", setup_io_tlb_npages); - -/* - * Statically reserve bounce buffer space and initialize bounce buffer - * data structures for the software IO TLB used to implement the PCI DMA API - */ -void -setup_swiotlb (void) -{ - int i; - - /* - * Get IO TLB memory from the low pages - */ - io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); - if (!io_tlb_start) - BUG(); - io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size 2^IO_TLB_SHIFT between - * io_tlb_start and io_tlb_end. - */ - io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); - for (i = 0; i < io_tlb_nslabs; i++) - io_tlb_list[i] = io_tlb_nslabs - i; - io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *)); - - printk("Placing software IO TLB between 0x%p - 0x%p\n", - (void *) io_tlb_start, (void *) io_tlb_end); -} - -/* - * Allocates bounce buffer and returns its kernel virtual address. - */ -static void * -__pci_map_single (struct pci_dev *hwdev, char *buffer, size_t size, int direction) -{ - unsigned long flags; - char *dma_addr; - unsigned int i, nslots, stride, index, wrap; - - /* - * For mappings greater than a page size, we limit the stride (and hence alignment) - * to a page size. - */ - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size > (1 << PAGE_SHIFT)) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = nslots; - - if (!nslots) - BUG(); - - /* - * Find suitable number of IO TLB entries size that will fit this request and allocate a buffer - * from that IO TLB pool. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - wrap = index = ALIGN(io_tlb_index, stride); - do { - /* - * If we find a slot that indicates we have 'nslots' number of - * contiguous buffers, we allocate the buffers from that slot and mark the - * entries as '0' indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - for (i = index; i < index + nslots; i++) - io_tlb_list[i] = 0; - dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in the next round. - */ - io_tlb_index = (index + nslots) < io_tlb_nslabs ? (index + nslots) : 0; - - goto found; - } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - } while (index != wrap); - - /* - * XXX What is a suitable recovery mechanism here? We cannot - * sleep because we are called from with in interrupts! - */ - panic("__pci_map_single: could not allocate software IO TLB (%ld bytes)", size); -found: - } - spin_unlock_irqrestore(&io_tlb_lock, flags); - - /* - * Save away the mapping from the original address to the DMA address. This is needed - * when we sync the memory. Then we sync the buffer if needed. - */ - io_tlb_orig_addr[index] = buffer; - if (direction == PCI_DMA_TODEVICE || direction == PCI_DMA_BIDIRECTIONAL) - memcpy(dma_addr, buffer, size); - - return dma_addr; -} - -/* - * dma_addr is the kernel virtual address of the bounce buffer to unmap. - */ -static void -__pci_unmap_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction) -{ - unsigned long flags; - int i, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; - - /* - * First, sync the memory before unmapping the entry - */ - if ((direction == PCI_DMA_FROMDEVICE) || (direction == PCI_DMA_BIDIRECTIONAL)) - /* - * bounce... copy the data back into the original buffer - * and delete the bounce buffer. - */ - memcpy(buffer, dma_addr, size); - - /* - * Return the buffer to the free list by setting the corresponding entries to indicate - * the number of contigous entries available. - * While returning the entries to the free list, we merge the entries with slots below - * and above the pool being returned. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - int count = ((index + nslots) < io_tlb_nslabs ? io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) - io_tlb_list[i] = ++count; - /* - * Step 2: merge the returned slots with the preceeding slots, if available (non zero) - */ - for (i = index - 1; (i >= 0) && io_tlb_list[i]; i--) - io_tlb_list[i] += io_tlb_list[index]; - } - spin_unlock_irqrestore(&io_tlb_lock, flags); -} - -static void -__pci_sync_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction) -{ - int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; - char *buffer = io_tlb_orig_addr[index]; - - /* - * bounce... copy the data back into/from the original buffer - * XXX How do you handle PCI_DMA_BIDIRECTIONAL here ? - */ - if (direction == PCI_DMA_FROMDEVICE) - memcpy(buffer, dma_addr, size); - else if (direction == PCI_DMA_TODEVICE) - memcpy(dma_addr, buffer, size); - else - BUG(); -} - -/* - * Map a single buffer of the indicated size for DMA in streaming mode. - * The PCI address to use is returned. - * - * Once the device is given the dma address, the device owns this memory - * until either pci_unmap_single or pci_dma_sync_single is performed. - */ -dma_addr_t -pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction) -{ - unsigned long pci_addr = virt_to_phys(ptr); - - if (direction == PCI_DMA_NONE) - BUG(); - /* - * Check if the PCI device can DMA to ptr... if so, just return ptr - */ - if ((pci_addr & ~hwdev->dma_mask) == 0) - /* - * Device is bit capable of DMA'ing to the - * buffer... just return the PCI address of ptr - */ - return pci_addr; - - /* - * get a bounce buffer: - */ - pci_addr = virt_to_phys(__pci_map_single(hwdev, ptr, size, direction)); - - /* - * Ensure that the address returned is DMA'ble: - */ - if ((pci_addr & ~hwdev->dma_mask) != 0) - panic("__pci_map_single: bounce buffer is not DMA'ble"); - - return pci_addr; -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size - * must match what was provided for in a previous pci_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guarenteed to see - * whatever the device wrote there. - */ -void -pci_unmap_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction) -{ - char *dma_addr = phys_to_virt(pci_addr); - - if (direction == PCI_DMA_NONE) - BUG(); - if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) - __pci_unmap_single(hwdev, dma_addr, size, direction); -} - -/* - * Make physical memory consistent for a single - * streaming mode DMA translation after a transfer. - * - * If you perform a pci_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, the - * device again owns the buffer. - */ -void -pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction) -{ - char *dma_addr = phys_to_virt(pci_addr); - - if (direction == PCI_DMA_NONE) - BUG(); - if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) - __pci_sync_single(hwdev, dma_addr, size, direction); -} - -/* - * Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scather-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -int -pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) -{ - int i; - - if (direction == PCI_DMA_NONE) - BUG(); - - for (i = 0; i < nelems; i++, sg++) { - sg->orig_address = sg->address; - if ((virt_to_phys(sg->address) & ~hwdev->dma_mask) != 0) { - sg->address = __pci_map_single(hwdev, sg->address, sg->length, direction); - } - } - return nelems; -} - -/* - * Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -void -pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) -{ - int i; - - if (direction == PCI_DMA_NONE) - BUG(); - - for (i = 0; i < nelems; i++, sg++) - if (sg->orig_address != sg->address) { - __pci_unmap_single(hwdev, sg->address, sg->length, direction); - sg->address = sg->orig_address; - } -} - -/* - * Make physical memory consistent for a set of streaming mode DMA - * translations after a transfer. - * - * The same as pci_dma_sync_single but for a scatter-gather list, - * same rules and usage. - */ -void -pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) -{ - int i; - - if (direction == PCI_DMA_NONE) - BUG(); - - for (i = 0; i < nelems; i++, sg++) - if (sg->orig_address != sg->address) - __pci_sync_single(hwdev, sg->address, sg->length, direction); -} - -#else -/* - * Map a single buffer of the indicated size for DMA in streaming mode. - * The 32-bit bus address to use is returned. - * - * Once the device is given the dma address, the device owns this memory - * until either pci_unmap_single or pci_dma_sync_single is performed. - */ -dma_addr_t -pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - BUG(); - return virt_to_bus(ptr); -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size - * must match what was provided for in a previous pci_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guarenteed to see - * whatever the device wrote there. - */ -void -pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - BUG(); - /* Nothing to do */ -} -/* - * Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scather-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -int -pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) -{ - if (direction == PCI_DMA_NONE) - BUG(); - return nents; -} - -/* - * Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -void -pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) -{ - if (direction == PCI_DMA_NONE) - BUG(); - /* Nothing to do */ -} -/* - * Make physical memory consistent for a single - * streaming mode DMA translation after a transfer. - * - * If you perform a pci_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, the - * device again owns the buffer. - */ -void -pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction) -{ - if (direction == PCI_DMA_NONE) - BUG(); - /* Nothing to do */ -} - -/* - * Make physical memory consistent for a set of streaming mode DMA - * translations after a transfer. - * - * The same as pci_dma_sync_single but for a scatter-gather list, - * same rules and usage. - */ -void -pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction) -{ - if (direction == PCI_DMA_NONE) - BUG(); - /* Nothing to do */ -} - -#endif /* CONFIG_SWIOTLB */ - -void * -pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle) -{ - unsigned long pci_addr; - int gfp = GFP_ATOMIC; - void *ret; - - if (!hwdev || hwdev->dma_mask <= 0xffffffff) - gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */ - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (!ret) - return NULL; - - memset(ret, 0, size); - pci_addr = virt_to_phys(ret); - if ((pci_addr & ~hwdev->dma_mask) != 0) - panic("pci_alloc_consistent: allocated memory is out of range for PCI device"); - *dma_handle = pci_addr; - return ret; -} - -void -pci_free_consistent (struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) -{ - free_pages((unsigned long) vaddr, get_order(size)); -} diff --git a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c index 2d814b443..37dbf811a 100644 --- a/arch/ia64/kernel/pci.c +++ b/arch/ia64/kernel/pci.c @@ -1,10 +1,8 @@ /* - * pci.c - Low-Level PCI Access in IA64 + * pci.c - Low-Level PCI Access in IA-64 * * Derived from bios32.c of i386 tree. - * */ - #include <linux/config.h> #include <linux/types.h> @@ -44,19 +42,16 @@ * This interrupt-safe spinlock protects all accesses to PCI * configuration space. */ - spinlock_t pci_lock = SPIN_LOCK_UNLOCKED; -struct pci_fixup pcibios_fixups[] = { { 0 } }; - -#define PCI_NO_CHECKS 0x400 -#define PCI_NO_PEER_FIXUP 0x800 - -static unsigned int pci_probe = PCI_NO_CHECKS; +struct pci_fixup pcibios_fixups[] = { + { 0 } +}; /* Macro to build a PCI configuration address to be passed as a parameter to SAL. */ -#define PCI_CONFIG_ADDRESS(dev, where) (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff)) +#define PCI_CONFIG_ADDRESS(dev, where) \ + (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff)) static int pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value) @@ -109,8 +104,7 @@ pci_conf_write_config_dword (struct pci_dev *dev, int where, u32 value) return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 4, value); } - -static struct pci_ops pci_conf = { +struct pci_ops pci_conf = { pci_conf_read_config_byte, pci_conf_read_config_word, pci_conf_read_config_dword, @@ -120,36 +114,21 @@ static struct pci_ops pci_conf = { }; /* - * Try to find PCI BIOS. This will always work for IA64. - */ - -static struct pci_ops * __init -pci_find_bios(void) -{ - return &pci_conf; -} - -/* * Initialization. Uses the SAL interface */ - -#define PCI_BUSES_TO_SCAN 255 - void __init -pcibios_init(void) +pcibios_init (void) { - struct pci_ops *ops = NULL; +# define PCI_BUSES_TO_SCAN 255 int i; - if ((ops = pci_find_bios()) == NULL) { - printk("PCI: No PCI bus detected\n"); - return; - } + platform_pci_fixup(0); /* phase 0 initialization (before PCI bus has been scanned) */ printk("PCI: Probing PCI hardware\n"); for (i = 0; i < PCI_BUSES_TO_SCAN; i++) - pci_scan_bus(i, ops, NULL); - platform_pci_fixup(); + pci_scan_bus(i, &pci_conf, NULL); + + platform_pci_fixup(1); /* phase 1 initialization (after PCI bus has been scanned) */ return; } @@ -157,16 +136,15 @@ pcibios_init(void) * Called after each bus is probed, but before its children * are examined. */ - void __init -pcibios_fixup_bus(struct pci_bus *b) +pcibios_fixup_bus (struct pci_bus *b) { return; } void __init -pcibios_update_resource(struct pci_dev *dev, struct resource *root, - struct resource *res, int resource) +pcibios_update_resource (struct pci_dev *dev, struct resource *root, + struct resource *res, int resource) { unsigned long where, size; u32 reg; @@ -181,7 +159,7 @@ pcibios_update_resource(struct pci_dev *dev, struct resource *root, } void __init -pcibios_update_irq(struct pci_dev *dev, int irq) +pcibios_update_irq (struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); @@ -204,18 +182,16 @@ pcibios_enable_device (struct pci_dev *dev) return 0; } +void +pcibios_align_resource (void *data, struct resource *res, unsigned long size) +{ +} + /* * PCI BIOS setup, always defaults to SAL interface */ - char * __init -pcibios_setup(char *str) +pcibios_setup (char *str) { - pci_probe = PCI_NO_CHECKS; return NULL; } - -void -pcibios_align_resource (void *data, struct resource *res, unsigned long size) -{ -} diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index e5efbc8b5..4c7ba4295 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -4,18 +4,20 @@ * * Originaly Written by Ganesh Venkitachalam, IBM Corp. * Modifications by David Mosberger-Tang, Hewlett-Packard Co. + * Modifications by Stephane Eranian, Hewlett-Packard Co. * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com> * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com> */ #include <linux/config.h> + #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/smp_lock.h> #include <linux/proc_fs.h> -#include <linux/ptrace.h> #include <asm/errno.h> #include <asm/hw_irq.h> @@ -58,19 +60,51 @@ #define MAX_PERF_COUNTER 4 /* true for Itanium, at least */ #define PMU_FIRST_COUNTER 4 /* first generic counter */ -#define WRITE_PMCS_AND_START 0xa0 -#define WRITE_PMCS 0xa1 -#define READ_PMDS 0xa2 -#define STOP_PMCS 0xa3 +#define PFM_WRITE_PMCS 0xa0 +#define PFM_WRITE_PMDS 0xa1 +#define PFM_READ_PMDS 0xa2 +#define PFM_STOP 0xa3 +#define PFM_START 0xa4 +#define PFM_ENABLE 0xa5 /* unfreeze only */ +#define PFM_DISABLE 0xa6 /* freeze only */ +/* + * Those 2 are just meant for debugging. I considered using sysctl() for + * that but it is a little bit too pervasive. This solution is at least + * self-contained. + */ +#define PFM_DEBUG_ON 0xe0 +#define PFM_DEBUG_OFF 0xe1 + +#ifdef CONFIG_SMP +#define cpu_is_online(i) (cpu_online_map & (1UL << i)) +#else +#define cpu_is_online(i) 1 +#endif +#define PMC_IS_IMPL(i) (pmu_conf.impl_regs[i>>6] & (1<< (i&~(64-1)))) +#define PMD_IS_IMPL(i) (pmu_conf.impl_regs[4+(i>>6)] & (1<< (i&~(64-1)))) +#define PMD_IS_COUNTER(i) (i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters)) +#define PMC_IS_COUNTER(i) (i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters)) /* * this structure needs to be enhanced */ typedef struct { + unsigned long pfr_reg_num; /* which register */ + unsigned long pfr_reg_value; /* configuration (PMC) or initial value (PMD) */ + unsigned long pfr_reg_reset; /* reset value on overflow (PMD) */ + void *pfr_smpl_buf; /* pointer to user buffer for EAR/BTB */ + unsigned long pfr_smpl_size; /* size of user buffer for EAR/BTB */ + pid_t pfr_notify_pid; /* process to notify */ + int pfr_notify_sig; /* signal for notification, 0=no notification */ +} perfmon_req_t; + +#if 0 +typedef struct { unsigned long pmu_reg_data; /* generic PMD register */ unsigned long pmu_reg_num; /* which register number */ } perfmon_reg_t; +#endif /* * This structure is initialize at boot time and contains @@ -78,86 +112,141 @@ typedef struct { * by PAL */ typedef struct { - unsigned long perf_ovfl_val; /* overflow value for generic counters */ - unsigned long max_pmc; /* highest PMC */ - unsigned long max_pmd; /* highest PMD */ - unsigned long max_counters; /* number of generic counter pairs (PMC/PMD) */ + unsigned long perf_ovfl_val; /* overflow value for generic counters */ + unsigned long max_counters; /* upper limit on counter pair (PMC/PMD) */ + unsigned long impl_regs[16]; /* buffer used to hold implememted PMC/PMD mask */ } pmu_config_t; -/* XXX will go static when ptrace() is cleaned */ -unsigned long perf_ovfl_val; /* overflow value for generic counters */ - static pmu_config_t pmu_conf; +/* for debug only */ +static unsigned long pfm_debug=1; /* 0= nodebug, >0= debug output on */ +#define DBprintk(a) {\ + if (pfm_debug >0) { printk a; } \ +} + /* - * could optimize to avoid cache conflicts in SMP + * could optimize to avoid cache line conflicts in SMP */ -unsigned long pmds[NR_CPUS][MAX_PERF_COUNTER]; +static struct task_struct *pmu_owners[NR_CPUS]; -asmlinkage unsigned long -sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6, long arg7, long arg8, long stack) +static int +do_perfmonctl (struct task_struct *task, int cmd, int flags, perfmon_req_t *req, int count, struct pt_regs *regs) { - struct pt_regs *regs = (struct pt_regs *) &stack; - perfmon_reg_t tmp, *cptr = ptr; - unsigned long cnum; + perfmon_req_t tmp; int i; switch (cmd) { - case WRITE_PMCS: /* Writes to PMC's and clears PMDs */ - case WRITE_PMCS_AND_START: /* Also starts counting */ + case PFM_WRITE_PMCS: + /* we don't quite support this right now */ + if (task != current) return -EINVAL; + + if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT; + + for (i = 0; i < count; i++, req++) { + copy_from_user(&tmp, req, sizeof(tmp)); + + /* XXX needs to check validity of the data maybe */ + + if (!PMC_IS_IMPL(tmp.pfr_reg_num)) { + DBprintk((__FUNCTION__ " invalid pmc[%ld]\n", tmp.pfr_reg_num)); + return -EINVAL; + } + + /* XXX: for counters, need to some checks */ + if (PMC_IS_COUNTER(tmp.pfr_reg_num)) { + current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].sig = tmp.pfr_notify_sig; + current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].pid = tmp.pfr_notify_pid; + + DBprintk((__FUNCTION__" setting PMC[%ld] send sig %d to %d\n",tmp.pfr_reg_num, tmp.pfr_notify_sig, tmp.pfr_notify_pid)); + } + ia64_set_pmc(tmp.pfr_reg_num, tmp.pfr_reg_value); + + DBprintk((__FUNCTION__" setting PMC[%ld]=0x%lx\n", tmp.pfr_reg_num, tmp.pfr_reg_value)); + } + /* + * we have to set this here event hough we haven't necessarily started monitoring + * because we may be context switched out + */ + current->thread.flags |= IA64_THREAD_PM_VALID; + break; + + case PFM_WRITE_PMDS: + /* we don't quite support this right now */ + if (task != current) return -EINVAL; + + if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT; + + for (i = 0; i < count; i++, req++) { + copy_from_user(&tmp, req, sizeof(tmp)); + + if (!PMD_IS_IMPL(tmp.pfr_reg_num)) return -EINVAL; + + /* update virtualized (64bits) counter */ + if (PMD_IS_COUNTER(tmp.pfr_reg_num)) { + current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].val = tmp.pfr_reg_value & ~pmu_conf.perf_ovfl_val; + current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].rval = tmp.pfr_reg_reset; + } + /* writes to unimplemented part is ignored, so this is safe */ + ia64_set_pmd(tmp.pfr_reg_num, tmp.pfr_reg_value); + /* to go away */ + ia64_srlz_d(); + DBprintk((__FUNCTION__" setting PMD[%ld]: pmod.val=0x%lx pmd=0x%lx rval=0x%lx\n", tmp.pfr_reg_num, current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].val, ia64_get_pmd(tmp.pfr_reg_num),current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].rval)); + } + /* + * we have to set this here event hough we haven't necessarily started monitoring + * because we may be context switched out + */ + current->thread.flags |= IA64_THREAD_PM_VALID; + break; + + case PFM_START: + /* we don't quite support this right now */ + if (task != current) return -EINVAL; + + pmu_owners[smp_processor_id()] = current; - if (!access_ok(VERIFY_READ, cptr, sizeof(struct perfmon_reg_t)*count)) - return -EFAULT; - - for (i = 0; i < count; i++, cptr++) { - - copy_from_user(&tmp, cptr, sizeof(tmp)); - - /* XXX need to check validity of pmu_reg_num and perhaps data!! */ - - if (tmp.pmu_reg_num > pmu_conf.max_pmc || tmp.pmu_reg_num == 0) return -EFAULT; + /* will start monitoring right after rfi */ + ia64_psr(regs)->up = 1; - ia64_set_pmc(tmp.pmu_reg_num, tmp.pmu_reg_data); + /* + * mark the state as valid. + * this will trigger save/restore at context switch + */ + current->thread.flags |= IA64_THREAD_PM_VALID; - /* to go away */ - if (tmp.pmu_reg_num >= PMU_FIRST_COUNTER && tmp.pmu_reg_num < PMU_FIRST_COUNTER+pmu_conf.max_counters) { - ia64_set_pmd(tmp.pmu_reg_num, 0); - pmds[smp_processor_id()][tmp.pmu_reg_num - PMU_FIRST_COUNTER] = 0; + ia64_set_pmc(0, 0); - printk(__FUNCTION__" setting PMC/PMD[%ld] es=0x%lx pmd[%ld]=%lx\n", tmp.pmu_reg_num, (tmp.pmu_reg_data>>8) & 0x7f, tmp.pmu_reg_num, ia64_get_pmd(tmp.pmu_reg_num)); - } else - printk(__FUNCTION__" setting PMC[%ld]=0x%lx\n", tmp.pmu_reg_num, tmp.pmu_reg_data); - } + break; - if (cmd == WRITE_PMCS_AND_START) { -#if 0 -/* irrelevant with user monitors */ - local_irq_save(flags); + case PFM_ENABLE: + /* we don't quite support this right now */ + if (task != current) return -EINVAL; - dcr = ia64_get_dcr(); - dcr |= IA64_DCR_PP; - ia64_set_dcr(dcr); + pmu_owners[smp_processor_id()] = current; - local_irq_restore(flags); -#endif + /* + * mark the state as valid. + * this will trigger save/restore at context switch + */ + current->thread.flags |= IA64_THREAD_PM_VALID; + /* simply unfreeze */ ia64_set_pmc(0, 0); + break; - /* will start monitoring right after rfi */ - ia64_psr(regs)->up = 1; - } - /* - * mark the state as valid. - * this will trigger save/restore at context switch - */ - current->thread.flags |= IA64_THREAD_PM_VALID; - break; + case PFM_DISABLE: + /* we don't quite support this right now */ + if (task != current) return -EINVAL; + + /* simply unfreeze */ + ia64_set_pmc(0, 1); + ia64_srlz_d(); + break; - case READ_PMDS: - if (count <= 0 || count > MAX_PERF_COUNTER) - return -EINVAL; - if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perfmon_reg_t)*count)) - return -EFAULT; + case PFM_READ_PMDS: + if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT; + if (!access_ok(VERIFY_WRITE, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT; /* This looks shady, but IMHO this will work fine. This is * the sequence that I could come up with to avoid races @@ -187,16 +276,31 @@ sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6, * is the irq_save/restore needed? */ + for (i = 0; i < count; i++, req++) { + unsigned long val=0; - /* XXX: This needs to change to read more than just the counters */ - for (i = 0, cnum = PMU_FIRST_COUNTER;i < count; i++, cnum++, cptr++) { + copy_from_user(&tmp, req, sizeof(tmp)); - tmp.pmu_reg_data = (pmds[smp_processor_id()][i] - + (ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val)); + if (!PMD_IS_IMPL(tmp.pfr_reg_num)) return -EINVAL; - tmp.pmu_reg_num = cnum; + if (PMD_IS_COUNTER(tmp.pfr_reg_num)) { + if (task == current){ + val = ia64_get_pmd(tmp.pfr_reg_num) & pmu_conf.perf_ovfl_val; + } else { + val = task->thread.pmd[tmp.pfr_reg_num - PMU_FIRST_COUNTER] & pmu_conf.perf_ovfl_val; + } + val += task->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].val; + } else { + /* for now */ + if (task != current) return -EINVAL; - if (copy_to_user(cptr, &tmp, sizeof(tmp))) return -EFAULT; + val = ia64_get_pmd(tmp.pfr_reg_num); + } + tmp.pfr_reg_value = val; + +DBprintk((__FUNCTION__" reading PMD[%ld]=0x%lx\n", tmp.pfr_reg_num, val)); + + if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT; } #if 0 /* irrelevant with user monitors */ @@ -209,11 +313,18 @@ sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6, #endif break; - case STOP_PMCS: + case PFM_STOP: + /* we don't quite support this right now */ + if (task != current) return -EINVAL; + ia64_set_pmc(0, 1); ia64_srlz_d(); - for (i = 0; i < MAX_PERF_COUNTER; ++i) - ia64_set_pmc(4+i, 0); + + ia64_psr(regs)->up = 0; + + current->thread.flags &= ~IA64_THREAD_PM_VALID; + + pmu_owners[smp_processor_id()] = NULL; #if 0 /* irrelevant with user monitors */ @@ -225,48 +336,140 @@ sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6, ia64_psr(regs)->up = 0; #endif - current->thread.flags &= ~(IA64_THREAD_PM_VALID); - break; + case PFM_DEBUG_ON: + printk(__FUNCTION__" debuggin on\n"); + pfm_debug = 1; + break; + + case PFM_DEBUG_OFF: + printk(__FUNCTION__" debuggin off\n"); + pfm_debug = 0; + break; + default: + DBprintk((__FUNCTION__" UNknown command 0x%x\n", cmd)); return -EINVAL; break; } return 0; } -static inline void -update_counters (void) +asmlinkage int +sys_perfmonctl (int pid, int cmd, int flags, perfmon_req_t *req, int count, long arg6, long arg7, long arg8, long stack) { - unsigned long mask, i, cnum, val; + struct pt_regs *regs = (struct pt_regs *) &stack; + struct task_struct *child = current; + int ret; + + if (pid != current->pid) { + read_lock(&tasklist_lock); + { + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + } + if (!child) { + read_unlock(&tasklist_lock); + return -ESRCH; + } + /* + * XXX: need to do more checking here + */ + if (child->state != TASK_ZOMBIE) { + DBprintk((__FUNCTION__" warning process %d not in stable state %ld\n", pid, child->state)); + } + } + ret = do_perfmonctl(child, cmd, flags, req, count, regs); - mask = ia64_get_pmc(0) >> 4; - for (i = 0, cnum = PMU_FIRST_COUNTER ; i < pmu_conf.max_counters; cnum++, i++, mask >>= 1) { + if (child != current) read_unlock(&tasklist_lock); + return ret; +} - val = mask & 0x1 ? pmu_conf.perf_ovfl_val + 1 : 0; - if (mask & 0x1) - printk(__FUNCTION__ " PMD%ld overflowed pmd=%lx pmod=%lx\n", cnum, ia64_get_pmd(cnum), pmds[smp_processor_id()][i]); +static inline int +update_counters (u64 pmc0) +{ + unsigned long mask, i, cnum; + struct thread_struct *th; + struct task_struct *ta; - /* since we got an interrupt, might as well clear every pmd. */ - val += ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val; + if (pmu_owners[smp_processor_id()] == NULL) { + DBprintk((__FUNCTION__" Spurious overflow interrupt: PMU not owned\n")); + return 0; + } + + /* + * It is never safe to access the task for which the overflow interrupt is destinated + * using the current variable as the interrupt may occur in the middle of a context switch + * where current does not hold the task that is running yet. + * + * For monitoring, however, we do need to get access to the task which caused the overflow + * to account for overflow on the counters. + * We accomplish this by maintaining a current owner of the PMU per CPU. During context + * switch the ownership is changed in a way such that the reflected owner is always the + * valid one, i.e. the one that caused the interrupt. + */ + ta = pmu_owners[smp_processor_id()]; + th = &pmu_owners[smp_processor_id()]->thread; - printk(__FUNCTION__ " adding val=%lx to pmod[%ld]=%lx \n", val, i, pmds[smp_processor_id()][i]); + /* + * Don't think this could happen given first test. Keep as sanity check + */ + if ((th->flags & IA64_THREAD_PM_VALID) == 0) { + DBprintk((__FUNCTION__" Spurious overflow interrupt: process %d not using perfmon\n", ta->pid)); + return 0; + } + + /* + * if PMU not frozen: spurious from previous context + * if PMC[0] = 0x1 : frozen but no overflow reported: leftover from previous context + * + * in either case we don't touch the state upon return from handler + */ + if ((pmc0 & 0x1) == 0 || pmc0 == 0x1) { + DBprintk((__FUNCTION__" Spurious overflow interrupt: process %d freeze=0\n",ta->pid)); + return 0; + } - pmds[smp_processor_id()][i] += val; + mask = pmc0 >> 4; - ia64_set_pmd(cnum, 0); + for (i = 0, cnum = PMU_FIRST_COUNTER; i < pmu_conf.max_counters; cnum++, i++, mask >>= 1) { + + if (mask & 0x1) { + DBprintk((__FUNCTION__ " PMD[%ld] overflowed pmd=0x%lx pmod.val=0x%lx\n", cnum, ia64_get_pmd(cnum), th->pmu_counters[i].val)); + + /* + * Because we somtimes (EARS/BTB) reset to a specific value, we cannot simply use + * val to count the number of times we overflowed. Otherwise we would loose the value + * current in the PMD (which can be >0). So to make sure we don't loose + * the residual counts we set val to contain full 64bits value of the counter. + */ + th->pmu_counters[i].val += 1+pmu_conf.perf_ovfl_val+(ia64_get_pmd(cnum) &pmu_conf.perf_ovfl_val); + + /* writes to upper part are ignored, so this is safe */ + ia64_set_pmd(cnum, th->pmu_counters[i].rval); + + DBprintk((__FUNCTION__ " pmod[%ld].val=0x%lx pmd=0x%lx\n", i, th->pmu_counters[i].val, ia64_get_pmd(cnum)&pmu_conf.perf_ovfl_val)); + + if (th->pmu_counters[i].pid != 0 && th->pmu_counters[i].sig>0) { + DBprintk((__FUNCTION__ " shouild notify process %d with signal %d\n",th->pmu_counters[i].pid, th->pmu_counters[i].sig)); + } + } } + return 1; } static void perfmon_interrupt (int irq, void *arg, struct pt_regs *regs) { - update_counters(); - ia64_set_pmc(0, 0); - ia64_srlz_d(); + /* unfreeze if not spurious */ + if ( update_counters(ia64_get_pmc(0)) ) { + ia64_set_pmc(0, 0); + ia64_srlz_d(); + } } static struct irqaction perfmon_irqaction = { @@ -280,9 +483,13 @@ perfmon_proc_info(char *page) { char *p = page; u64 pmc0 = ia64_get_pmc(0); + int i; - p += sprintf(p, "PMC[0]=%lx\n", pmc0); - + p += sprintf(p, "PMC[0]=%lx\nPerfmon debug: %s\n", pmc0, pfm_debug ? "On" : "Off"); + for(i=0; i < NR_CPUS; i++) { + if (cpu_is_online(i)) + p += sprintf(p, "CPU%d.PMU %d\n", i, pmu_owners[i] ? pmu_owners[i]->pid: -1); + } return p - page; } @@ -308,7 +515,6 @@ void __init perfmon_init (void) { pal_perf_mon_info_u_t pm_info; - u64 pm_buffer[16]; s64 status; irq_desc[PERFMON_IRQ].status |= IRQ_PER_CPU; @@ -320,15 +526,13 @@ perfmon_init (void) printk("perfmon: Initialized vector to %u\n",PERFMON_IRQ); - if ((status=ia64_pal_perf_mon_info(pm_buffer, &pm_info)) != 0) { + if ((status=ia64_pal_perf_mon_info(pmu_conf.impl_regs, &pm_info)) != 0) { printk(__FUNCTION__ " pal call failed (%ld)\n", status); return; } - pmu_conf.perf_ovfl_val = perf_ovfl_val = (1L << pm_info.pal_perf_mon_info_s.width) - 1; + pmu_conf.perf_ovfl_val = (1L << pm_info.pal_perf_mon_info_s.width) - 1; /* XXX need to use PAL instead */ - pmu_conf.max_pmc = 13; - pmu_conf.max_pmd = 17; pmu_conf.max_counters = pm_info.pal_perf_mon_info_s.generic; printk("perfmon: Counters are %d bits\n", pm_info.pal_perf_mon_info_s.width); @@ -347,36 +551,137 @@ perfmon_init_percpu (void) ia64_srlz_d(); } +/* + * XXX: for system wide this function MUST never be called + */ void -ia64_save_pm_regs (struct thread_struct *t) +ia64_save_pm_regs (struct task_struct *ta) { - int i; + struct thread_struct *t = &ta->thread; + u64 pmc0, psr; + int i,j; + + /* + * We must maek sure that we don't loose any potential overflow + * interrupt while saving PMU context. In this code, external + * interrupts are always enabled. + */ + + /* + * save current PSR: needed because we modify it + */ + __asm__ __volatile__ ("mov %0=psr;;": "=r"(psr) :: "memory"); + + /* + * stop monitoring: + * This is the only way to stop monitoring without destroying overflow + * information in PMC[0..3]. + * This is the last instruction which can cause overflow when monitoring + * in kernel. + * By now, we could still have an overflow interrupt in flight. + */ + __asm__ __volatile__ ("rsm psr.up;;"::: "memory"); + + /* + * read current overflow status: + * + * We may be reading stale information at this point, if we got interrupt + * just before the read(pmc0) but that's all right. However, if we did + * not get the interrupt before, this read reflects LAST state. + * + */ + pmc0 = ia64_get_pmc(0); + /* + * freeze PMU: + * + * This destroys the overflow information. This is required to make sure + * next process does not start with monitoring on if not requested + * (PSR.up may not be enough). + * + * We could still get an overflow interrupt by now. However the handler + * will not do anything if is sees PMC[0].fr=1 but no overflow bits + * are set. So PMU will stay in frozen state. This implies that pmc0 + * will still be holding the correct unprocessed information. + * + */ ia64_set_pmc(0, 1); ia64_srlz_d(); + + /* + * check for overflow bits set: + * + * If pmc0 reports PMU frozen, this means we have a pending overflow, + * therefore we invoke the handler. Handler is reentrant with regards + * to PMC[0] so it is safe to call it twice. + * + * IF pmc0 reports overflow, we need to reread current PMC[0] value + * in case the handler was invoked right after the first pmc0 read. + * it is was not invoked then pmc0==PMC[0], otherwise it's been invoked + * and overflow information has been processed, so we don't need to call. + * + * Test breakdown: + * - pmc0 & ~0x1: test if overflow happened + * - second part: check if current register reflects this as well. + * + * NOTE: testing for pmc0 & 0x1 is not enough has it would trigger call + * when PM_VALID and PMU.fr which is common when setting up registers + * just before actually starting monitors. + * + */ + if ((pmc0 & ~0x1) && ((pmc0=ia64_get_pmc(0)) &~0x1) ) { + printk(__FUNCTION__" Warning: pmc[0]=0x%lx\n", pmc0); + update_counters(pmc0); + /* + * XXX: not sure that's enough. the next task may still get the + * interrupt. + */ + } + + /* + * restore PSR for context switch to save + */ + __asm__ __volatile__ ("mov psr.l=%0;;"::"r"(psr): "memory"); + /* * XXX: this will need to be extended beyong just counters */ - for (i=0; i< IA64_NUM_PM_REGS; i++) { - t->pmd[i] = ia64_get_pmd(4+i); - t->pmod[i] = pmds[smp_processor_id()][i]; - t->pmc[i] = ia64_get_pmc(4+i); + for (i=0,j=4; i< IA64_NUM_PMD_COUNTERS; i++,j++) { + t->pmd[i] = ia64_get_pmd(j); + t->pmc[i] = ia64_get_pmc(j); } + /* + * PMU is frozen, PMU context is saved: nobody owns the PMU on this CPU + * At this point, we should not receive any pending interrupt from the + * 'switched out' task + */ + pmu_owners[smp_processor_id()] = NULL; } void -ia64_load_pm_regs (struct thread_struct *t) +ia64_load_pm_regs (struct task_struct *ta) { - int i; + struct thread_struct *t = &ta->thread; + int i,j; + + /* + * we first restore ownership of the PMU to the 'soon to be current' + * context. This way, if, as soon as we unfreeze the PMU at the end + * of this function, we get an interrupt, we attribute it to the correct + * task + */ + pmu_owners[smp_processor_id()] = ta; /* * XXX: this will need to be extended beyong just counters */ - for (i=0; i< IA64_NUM_PM_REGS ; i++) { - ia64_set_pmd(4+i, t->pmd[i]); - pmds[smp_processor_id()][i] = t->pmod[i]; - ia64_set_pmc(4+i, t->pmc[i]); + for (i=0,j=4; i< IA64_NUM_PMD_COUNTERS; i++,j++) { + ia64_set_pmd(j, t->pmd[i]); + ia64_set_pmc(j, t->pmc[i]); } + /* + * unfreeze PMU + */ ia64_set_pmc(0, 0); ia64_srlz_d(); } diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 41db60a0c..e61843db5 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -137,23 +137,6 @@ cpu_idle (void *unused) check_pgt_cache(); if (pm_idle) (*pm_idle)(); -#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC - local_irq_disable(); - { - u64 itc, itm; - - itc = ia64_get_itc(); - itm = ia64_get_itm(); - if (time_after(itc, itm + 1000)) { - extern void ia64_reset_itm (void); - - printk("cpu_idle: ITM in past (itc=%lx,itm=%lx:%lums)\n", - itc, itm, (itc - itm)/500000); - ia64_reset_itm(); - } - } - local_irq_enable(); -#endif } } @@ -164,7 +147,7 @@ ia64_save_extra (struct task_struct *task) ia64_save_debug_regs(&task->thread.dbr[0]); #ifdef CONFIG_PERFMON if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) - ia64_save_pm_regs(&task->thread); + ia64_save_pm_regs(task); #endif if (IS_IA32_PROCESS(ia64_task_regs(task))) ia32_save_state(&task->thread); @@ -177,7 +160,7 @@ ia64_load_extra (struct task_struct *task) ia64_load_debug_regs(&task->thread.dbr[0]); #ifdef CONFIG_PERFMON if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) - ia64_load_pm_regs(&task->thread); + ia64_load_pm_regs(task); #endif if (IS_IA32_PROCESS(ia64_task_regs(task))) ia32_load_state(&task->thread); @@ -299,6 +282,14 @@ copy_thread (int nr, unsigned long clone_flags, # define THREAD_FLAGS_TO_SET 0 p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) | THREAD_FLAGS_TO_SET); +#ifdef CONFIG_IA32_SUPPORT + /* + * If we're cloning an IA32 task then save the IA32 extra + * state from the current task to the new task + */ + if (IS_IA32_PROCESS(ia64_task_regs(current))) + ia32_save_state(&p->thread); +#endif return 0; } @@ -554,7 +545,7 @@ exit_thread (void) * we garantee no race. this call we also stop * monitoring */ - ia64_save_pm_regs(¤t->thread); + ia64_save_pm_regs(current); /* * make sure that switch_to() will not save context again */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 820a87854..0b49bdcaa 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -617,7 +617,6 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data struct switch_stack *sw; struct unw_frame_info info; struct pt_regs *pt; - unsigned long pmd_tmp; pt = ia64_task_regs(child); sw = (struct switch_stack *) (child->thread.ksp + 16); @@ -794,11 +793,7 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data addr); return -1; } - } else -#ifdef CONFIG_PERFMON - if (addr < PT_PMD) -#endif - { + } else { /* access debug registers */ if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { @@ -820,33 +815,14 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data } ptr += regnum; - } -#ifdef CONFIG_PERFMON - else { - /* - * XXX: will eventually move back to perfmonctl() - */ - unsigned long pmd = (addr - PT_PMD) >> 3; - extern unsigned long perf_ovfl_val; - - /* we just use ptrace to read */ - if (write_access) return -1; - - if (pmd > 3) { - printk("ptrace: rejecting access to PMD[%ld] address 0x%lx\n", pmd, addr); - return -1; - } - /* - * We always need to mask upper 32bits of pmd because value is random - */ - pmd_tmp = child->thread.pmod[pmd]+(child->thread.pmd[pmd]& perf_ovfl_val); - - /*printk(__FUNCTION__" child=%d reading pmd[%ld]=%lx\n", child->pid, pmd, pmd_tmp);*/ - - ptr = &pmd_tmp; + if (write_access) + /* don't let the user set kernel-level breakpoints... */ + *ptr = *data & ~(7UL << 56); + else + *data = *ptr; + return 0; } -#endif if (write_access) *ptr = *data; else @@ -861,7 +837,6 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data { unsigned long *ptr = NULL, *rbs, *bspstore, ndirty, regnum; struct switch_stack *sw; - unsigned long pmd_tmp; struct pt_regs *pt; if ((addr & 0x7) != 0) @@ -977,11 +952,7 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data /* disallow accessing anything else... */ return -1; } - } else -#ifdef CONFIG_PERFMON - if (addr < PT_PMD) -#endif - { + } else { /* access debug registers */ @@ -1002,34 +973,14 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data return -1; ptr += regnum; - } -#ifdef CONFIG_PERFMON - else { - /* - * XXX: will eventually move back to perfmonctl() - */ - unsigned long pmd = (addr - PT_PMD) >> 3; - extern unsigned long perf_ovfl_val; - - /* we just use ptrace to read */ - if (write_access) return -1; - - if (pmd > 3) { - printk("ptrace: rejecting access to PMD[%ld] address 0x%lx\n", pmd, addr); - return -1; - } - /* - * We always need to mask upper 32bits of pmd because value is random - */ - pmd_tmp = child->thread.pmod[pmd]+(child->thread.pmd[pmd]& perf_ovfl_val); - - /*printk(__FUNCTION__" child=%d reading pmd[%ld]=%lx\n", child->pid, pmd, pmd_tmp);*/ - - ptr = &pmd_tmp; + if (write_access) + /* don't let the user set kernel-level breakpoints... */ + *ptr = *data & ~(7UL << 56); + else + *data = *ptr; + return 0; } -#endif - if (write_access) *ptr = *data; else @@ -1107,7 +1058,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, goto out_tsk; if (child->state != TASK_STOPPED) { - if (request != PTRACE_KILL && request != PTRACE_PEEKUSR) + if (request != PTRACE_KILL) goto out_tsk; } diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c index 87c7befea..56059a306 100644 --- a/arch/ia64/kernel/sal.c +++ b/arch/ia64/kernel/sal.c @@ -104,9 +104,11 @@ ia64_sal_init (struct ia64_sal_systab *systab) if (strncmp(systab->signature, "SST_", 4) != 0) printk("bad signature in system table!"); - printk("SAL v%u.%02u: ia32bios=%s, oem=%.32s, product=%.32s\n", + /* + * revisions are coded in BCD, so %x does the job for us + */ + printk("SAL v%x.%02x: oem=%.32s, product=%.32s\n", systab->sal_rev_major, systab->sal_rev_minor, - systab->ia32_bios_present ? "present" : "absent", systab->oem_id, systab->product_id); min = ~0UL; diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index ed091d864..83d5643cd 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -235,6 +235,12 @@ setup_arch (char **cmdline_p) machvec_init(acpi_get_sysname()); #endif +#ifdef CONFIG_ACPI20 + if (efi.acpi20) { + /* Parse the ACPI 2.0 tables */ + acpi20_parse(efi.acpi20); + } else +#endif if (efi.acpi) { /* Parse the ACPI tables */ acpi_parse(efi.acpi); @@ -255,13 +261,6 @@ setup_arch (char **cmdline_p) paging_init(); platform_setup(cmdline_p); - -#ifdef CONFIG_SWIOTLB - { - extern void setup_swiotlb (void); - setup_swiotlb(); - } -#endif } /* @@ -271,9 +270,9 @@ int get_cpuinfo (char *buffer) { #ifdef CONFIG_SMP -# define lps c->loops_per_sec +# define lpj c->loops_per_jiffy #else -# define lps loops_per_sec +# define lpj loops_per_jiffy #endif char family[32], model[32], features[128], *cp, *p = buffer; struct cpuinfo_ia64 *c; @@ -325,7 +324,7 @@ get_cpuinfo (char *buffer) features, c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000, c->itc_freq / 1000000, c->itc_freq % 1000000, - lps / 500000, (lps / 5000) % 100); + lpj*HZ/500000, (lpj*HZ/5000) % 100); } return p - buffer; } @@ -376,15 +375,7 @@ identify_cpu (struct cpuinfo_ia64 *c) status = ia64_pal_vm_summary(&vm1, &vm2); if (status == PAL_STATUS_SUCCESS) { -#if 1 - /* - * XXX the current PAL code returns IMPL_VA_MSB==60, which is dead-wrong. - * --davidm 00/05/26 - s*/ - impl_va_msb = 50; -#else impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb; -#endif phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size; } printk("CPU %d: %lu virtual and %lu physical address bits\n", @@ -408,6 +399,8 @@ cpu_init (void) { extern void __init ia64_rid_init (void); extern void __init ia64_tlb_init (void); + pal_vm_info_2_u_t vmi; + unsigned int max_ctx; identify_cpu(&my_cpu_data); @@ -415,15 +408,12 @@ cpu_init (void) memset(ia64_task_regs(current), 0, sizeof(struct pt_regs)); /* - * Initialize default control register to defer speculative - * faults. On a speculative load, we want to defer access - * right, key miss, and key permission faults. We currently - * do NOT defer TLB misses, page-not-present, access bit, or - * debug faults but kernel code should not rely on any - * particular setting of these bits. - ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP); + * Initialize default control register to defer all speculative faults. The + * kernel MUST NOT depend on a particular setting of these bits (in other words, + * the kernel must have recovery code for all speculative accesses). */ - ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX ); + ia64_set_dcr( IA64_DCR_DM | IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR + | IA64_DCR_DA | IA64_DCR_DD); #ifndef CONFIG_SMP ia64_set_fpu_owner(0); /* initialize ar.k5 */ #endif @@ -444,4 +434,17 @@ cpu_init (void) #ifdef CONFIG_SMP normal_xtp(); #endif + + /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ + if (ia64_pal_vm_summary(NULL, &vmi) == 0) + max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; + else { + printk("ia64_rid_init: PAL VM summary failed, assuming 18 RID bits\n"); + max_ctx = (1U << 15) - 1; /* use architected minimum */ + } + while (max_ctx < ia64_ctx.max_ctx) { + unsigned int old = ia64_ctx.max_ctx; + if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old) + break; + } } diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index e0adf1981..3ffa201aa 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -91,7 +91,7 @@ ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct sigscratch *scr) scr->pt.r10 = -1; } while (1) { - set_current_state(TASK_INTERRUPTIBLE); + current->state = TASK_INTERRUPTIBLE; schedule(); if (ia64_do_signal(&oldset, scr, 1)) return -EINTR; @@ -499,9 +499,10 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) /* Let the debugger run. */ current->exit_code = signr; current->thread.siginfo = &info; - set_current_state(TASK_STOPPED); + current->state = TASK_STOPPED; notify_parent(current, SIGCHLD); schedule(); + signr = current->exit_code; current->thread.siginfo = 0; @@ -557,7 +558,7 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) /* FALLTHRU */ case SIGSTOP: - set_current_state(TASK_STOPPED); + current->state = TASK_STOPPED; current->exit_code = signr; if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 694711507..5093341a5 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -6,11 +6,13 @@ * * Lots of stuff stolen from arch/alpha/kernel/smp.c * - * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_sec calibration on each CPU. + * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy calibration on each CPU. * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor & cpu_online_map * now gets done here (instead of setup.c) * 99/10/05 davidm Update to bring it in sync with new command-line processing scheme. + * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and + * smp_call_function_single to resend IPI on timeouts */ #define __KERNEL_SYSCALLS__ @@ -30,6 +32,7 @@ #include <asm/current.h> #include <asm/delay.h> #include <asm/efi.h> +#include <asm/machvec.h> #include <asm/io.h> #include <asm/irq.h> @@ -78,10 +81,6 @@ struct smp_call_struct { }; static volatile struct smp_call_struct *smp_call_function_data; -#ifdef CONFIG_ITANIUM_A1_SPECIFIC -extern spinlock_t ivr_read_lock; -#endif - #define IPI_RESCHEDULE 0 #define IPI_CALL_FUNC 1 #define IPI_CPU_STOP 2 @@ -269,14 +268,14 @@ handle_IPI(int irq, void *dev_id, struct pt_regs *regs) } static inline void -send_IPI_single(int dest_cpu, int op) +send_IPI_single (int dest_cpu, int op) { if (dest_cpu == -1) return; set_bit(op, &ipi_op[dest_cpu]); - ipi_send(dest_cpu, IPI_IRQ, IA64_IPI_DM_INT, 0); + platform_send_ipi(dest_cpu, IPI_IRQ, IA64_IPI_DM_INT, 0); } static inline void @@ -358,6 +357,7 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int if (pointer_lock(&smp_call_function_data, &data, retry)) return -EBUSY; +resend: /* Send a message to all other CPUs and wait for them to respond */ send_IPI_single(cpuid, IPI_CALL_FUNC); @@ -366,8 +366,12 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout)) barrier(); if (atomic_read(&data.unstarted_count) > 0) { +#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)) + goto resend; +#else smp_call_function_data = NULL; return -ETIMEDOUT; +#endif } if (wait) while (atomic_read(&data.unfinished_count) > 0) @@ -411,13 +415,23 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait) /* Send a message to all other CPUs and wait for them to respond */ send_IPI_allbutself(IPI_CALL_FUNC); +retry: /* Wait for response */ timeout = jiffies + HZ; while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout)) barrier(); if (atomic_read(&data.unstarted_count) > 0) { +#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)) + int i; + for (i = 0; i < smp_num_cpus; i++) { + if (i != smp_processor_id()) + platform_send_ipi(i, IPI_IRQ, IA64_IPI_DM_INT, 0); + } + goto retry; +#else smp_call_function_data = NULL; return -ETIMEDOUT; +#endif } if (wait) while (atomic_read(&data.unfinished_count) > 0) @@ -430,8 +444,6 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait) /* * Flush all other CPU's tlb and then mine. Do this with smp_call_function() as we * want to ensure all TLB's flushed before proceeding. - * - * XXX: Is it OK to use the same ptc.e info on all cpus? */ void smp_flush_tlb_all(void) @@ -502,7 +514,7 @@ smp_callin (void) local_irq_enable(); /* Interrupts have been off until now */ calibrate_delay(); - my_cpu_data.loops_per_sec = loops_per_sec; + my_cpu_data.loops_per_jiffy = loops_per_jiffy; /* allow the master to continue */ set_bit(cpu, &cpu_callin_map); @@ -569,7 +581,7 @@ smp_boot_one_cpu(int cpu) cpu_now_booting = cpu; /* Kick the AP in the butt */ - ipi_send(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); + platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); /* wait up to 10s for the AP to start */ for (timeout = 0; timeout < 100000; timeout++) { @@ -603,7 +615,7 @@ smp_boot_cpus(void) __cpu_physical_id[0] = hard_smp_processor_id(); /* on the BP, the kernel already called calibrate_delay_loop() in init/main.c */ - my_cpu_data.loops_per_sec = loops_per_sec; + my_cpu_data.loops_per_jiffy = loops_per_jiffy; #if 0 smp_tune_scheduling(); #endif @@ -653,13 +665,11 @@ smp_boot_cpus(void) bogosum = 0; for (i = 0; i < NR_CPUS; i++) { if (cpu_online_map & (1L << i)) - bogosum += cpu_data[i].loops_per_sec; + bogosum += cpu_data[i].loops_per_jiffy; } - printk(KERN_INFO "SMP: Total of %d processors activated " - "(%lu.%02lu BogoMIPS).\n", - cpu_count, (bogosum + 2500) / 500000, - ((bogosum + 2500) / 5000) % 100); + printk(KERN_INFO "SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + cpu_count, bogosum*HZ/500000, (bogosum*HZ/5000) % 100); smp_num_cpus = cpu_count; } diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index f78512229..2713d7fd9 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -16,8 +16,38 @@ #include <linux/smp_lock.h> #include <linux/highuid.h> +#include <asm/shmparam.h> #include <asm/uaccess.h> +#define COLOR_ALIGN(addr) (((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + +unsigned long +get_unmapped_area (unsigned long addr, unsigned long len) +{ + struct vm_area_struct * vmm; + + if (len > RGN_MAP_LIMIT) + return 0; + if (!addr) + addr = TASK_UNMAPPED_BASE; + + if (current->thread.flags & IA64_THREAD_MAP_SHARED) + addr = COLOR_ALIGN(addr); + else + addr = PAGE_ALIGN(addr); + + for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { + /* At this point: (!vmm || addr < vmm->vm_end). */ + if (TASK_SIZE - len < addr) + return 0; + if (rgn_offset(addr) + len > RGN_MAP_LIMIT) /* no risk of overflow here... */ + return 0; + if (!vmm || addr + len <= vmm->vm_start) + return addr; + addr = vmm->vm_end; + } +} + asmlinkage long ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, long stack) @@ -34,6 +64,7 @@ ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5 return prio; } +/* XXX obsolete, but leave it here until the old libc is gone... */ asmlinkage unsigned long sys_getpagesize (void) { @@ -58,16 +89,61 @@ ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg } asmlinkage unsigned long -ia64_brk (long brk, long arg1, long arg2, long arg3, +ia64_brk (unsigned long brk, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, long stack) { - extern unsigned long sys_brk (unsigned long brk); + extern int vm_enough_memory (long pages); struct pt_regs *regs = (struct pt_regs *) &stack; - unsigned long retval; + unsigned long rlim, retval, newbrk, oldbrk; + struct mm_struct *mm = current->mm; + + /* + * Most of this replicates the code in sys_brk() except for an additional safety + * check and the clearing of r8. However, we can't call sys_brk() because we need + * to acquire the mmap_sem before we can do the test... + */ + down(&mm->mmap_sem); - retval = sys_brk(brk); + if (brk < mm->end_code) + goto out; + newbrk = PAGE_ALIGN(brk); + oldbrk = PAGE_ALIGN(mm->brk); + if (oldbrk == newbrk) + goto set_brk; + + /* Always allow shrinking brk. */ + if (brk <= mm->brk) { + if (!do_munmap(mm, newbrk, oldbrk-newbrk)) + goto set_brk; + goto out; + } - regs->r8 = 0; /* ensure large retval isn't mistaken as error code */ + /* Check against unimplemented/unmapped addresses: */ + if ((newbrk - oldbrk) > RGN_MAP_LIMIT || rgn_offset(newbrk) > RGN_MAP_LIMIT) + goto out; + + /* Check against rlimit.. */ + rlim = current->rlim[RLIMIT_DATA].rlim_cur; + if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) + goto out; + + /* Check against existing mmap mappings. */ + if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) + goto out; + + /* Check if we have enough memory.. */ + if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) + goto out; + + /* Ok, looks good - let it rip. */ + if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) + goto out; +set_brk: + mm->brk = brk; +out: + retval = mm->brk; + up(&mm->mmap_sem); + regs->r8 = 0; /* ensure large retval isn't mistaken as error code */ return retval; } @@ -95,10 +171,8 @@ sys_pipe (long arg0, long arg1, long arg2, long arg3, static inline unsigned long do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff) { - unsigned long loff, hoff; + unsigned long roff; struct file *file = 0; - /* the virtual address space that is mappable in each region: */ -# define OCTANT_SIZE ((PTRS_PER_PGD<<PGDIR_SHIFT)/8) /* * A zero mmap always succeeds in Linux, independent of @@ -107,15 +181,12 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un if (PAGE_ALIGN(len) == 0) return addr; - /* Don't permit mappings into or across the address hole in a region: */ - loff = rgn_offset(addr); - hoff = loff - (RGN_SIZE - OCTANT_SIZE/2); - if ((len | loff | (loff + len)) >= OCTANT_SIZE/2 - && (len | hoff | (hoff + len)) >= OCTANT_SIZE/2) + /* don't permit mappings into unmapped space or the virtual page table of a region: */ + roff = rgn_offset(addr); + if ((len | roff | (roff + len)) >= RGN_MAP_LIMIT) return -EINVAL; - /* Don't permit mappings that would cross a region boundary: */ - + /* don't permit mappings that would cross a region boundary: */ if (rgn_index(addr) != rgn_index(addr + len)) return -EINVAL; @@ -126,10 +197,15 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un return -EBADF; } + if (flags & MAP_SHARED) + current->thread.flags |= IA64_THREAD_MAP_SHARED; + down(¤t->mm->mmap_sem); addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); up(¤t->mm->mmap_sem); + current->thread.flags &= ~IA64_THREAD_MAP_SHARED; + if (file) fput(file); return addr; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 5e54e4f4b..8f65adc2c 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -152,19 +152,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { int cpu = smp_processor_id(); unsigned long new_itm; -#if 0 - static unsigned long last_time; - static unsigned char count; - int printed = 0; -#endif - /* - * Here we are in the timer irq handler. We have irqs locally - * disabled, but we don't know if the timer_bh is running on - * another CPU. We need to avoid to SMP race by acquiring the - * xtime_lock. - */ - write_lock(&xtime_lock); new_itm = itm.next[cpu].count; if (!time_after(ia64_get_itc(), new_itm)) @@ -173,48 +161,33 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) while (1) { /* - * Do kernel PC profiling here. We multiply the - * instruction number by four so that we can use a - * prof_shift of 2 to get instruction-level instead of - * just bundle-level accuracy. + * Do kernel PC profiling here. We multiply the instruction number by + * four so that we can use a prof_shift of 2 to get instruction-level + * instead of just bundle-level accuracy. */ if (!user_mode(regs)) do_profile(regs->cr_iip + 4*ia64_psr(regs)->ri); #ifdef CONFIG_SMP smp_do_timer(regs); - if (smp_processor_id() == 0) - do_timer(regs); -#else - do_timer(regs); #endif + if (smp_processor_id() == 0) { + /* + * Here we are in the timer irq handler. We have irqs locally + * disabled, but we don't know if the timer_bh is running on + * another CPU. We need to avoid to SMP race by acquiring the + * xtime_lock. + */ + write_lock(&xtime_lock); + do_timer(regs); + write_unlock(&xtime_lock); + } new_itm += itm.delta; itm.next[cpu].count = new_itm; if (time_after(new_itm, ia64_get_itc())) break; - -#if 0 - /* - * SoftSDV in SMP mode is _slow_, so we do "lose" ticks, - * but it's really OK... - */ - if (count > 0 && jiffies - last_time > 5*HZ) - count = 0; - if (count++ == 0) { - last_time = jiffies; - if (!printed) { - printk("Lost clock tick on CPU %d (now=%lx, next=%lx)!!\n", - cpu, ia64_get_itc(), itm.next[cpu].count); - printed = 1; -# ifdef CONFIG_IA64_DEBUG_IRQ - printk("last_cli_ip=%lx\n", last_cli_ip); -# endif - } - } -#endif } - write_unlock(&xtime_lock); /* * If we're too close to the next clock tick for comfort, we @@ -229,7 +202,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) ia64_set_itm(new_itm); } -#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_IA64_SOFTSDV_HACKS) +#ifdef CONFIG_IA64_SOFTSDV_HACKS /* * Interrupts must be disabled before calling this routine. @@ -240,7 +213,7 @@ ia64_reset_itm (void) timer_interrupt(0, 0, ia64_task_regs(current)); } -#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */ +#endif /* * Encapsulate access to the itm structure for SMP. diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 43340bf85..fd8369291 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -78,7 +78,7 @@ void die_if_kernel (char *str, struct pt_regs *regs, long err) { if (user_mode(regs)) { -#if 1 +#if 0 /* XXX for debugging only */ printk ("!!die_if_kernel: %s(%d): %s %ld\n", current->comm, current->pid, str, err); @@ -484,6 +484,20 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, sprintf(buf, "Disabled FPL fault---not supposed to happen!"); break; + case 26: /* NaT Consumption */ + case 31: /* Unsupported Data Reference */ + if (user_mode(regs)) { + siginfo.si_signo = SIGILL; + siginfo.si_code = ILL_ILLOPN; + siginfo.si_errno = 0; + siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + siginfo.si_imm = vector; + force_sig_info(SIGILL, &siginfo, current); + return; + } + sprintf(buf, (vector == 26) ? "NaT consumption" : "Unsupported data reference"); + break; + case 29: /* Debug */ case 35: /* Taken Branch Trap */ case 36: /* Single Step Trap */ @@ -522,10 +536,10 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, case 34: /* Unimplemented Instruction Address Trap */ if (user_mode(regs)) { - printk("Woah! Unimplemented Instruction Address Trap!\n"); - siginfo.si_code = ILL_BADIADDR; siginfo.si_signo = SIGILL; + siginfo.si_code = ILL_BADIADDR; siginfo.si_errno = 0; + siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); force_sig_info(SIGILL, &siginfo, current); return; } @@ -544,7 +558,8 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, case 46: printk("Unexpected IA-32 intercept trap (Trap 46)\n"); - printk(" iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", regs->cr_iip, ifa, isr); + printk(" iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", + regs->cr_iip, ifa, isr, iim); force_sig(SIGSEGV, current); return; diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 7cc238a83..a24121a26 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -572,7 +572,8 @@ getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) */ if (regnum == 0) { *val = 0; - *nat = 0; + if (nat) + *nat = 0; return; } @@ -1563,9 +1564,13 @@ ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs) DPRINT(("ret=%d\n", ret)); if (ret) { - lock_kernel(); - force_sig(SIGSEGV, current); - unlock_kernel(); + struct siginfo si; + + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRALN; + si.si_addr = (void *) ifa; + force_sig_info(SIGBUS, &si, current); } else { /* * given today's architecture this case is not likely to happen diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index 21a2ead16..f5ae7e497 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -46,16 +46,6 @@ #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define p5 5 -/* - * The unwind tables are supposed to be sorted, but the GNU toolchain - * currently fails to produce a sorted table in the presence of - * functions that go into sections other than .text. For example, the - * kernel likes to put initialization code into .text.init, which - * messes up the sort order. Hopefully, this will get fixed sometime - * soon. --davidm 00/05/23 - */ -#define UNWIND_TABLE_SORT_BUG - #define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */ #define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE) @@ -531,6 +521,10 @@ push (struct unw_state_record *sr) struct unw_reg_state *rs; rs = alloc_reg_state(); + if (!rs) { + printk("unwind: cannot stack reg state!\n"); + return; + } memcpy(rs, &sr->curr, sizeof(*rs)); rs->next = sr->stack; sr->stack = rs; @@ -1964,23 +1958,6 @@ init_unwind_table (struct unw_table *table, const char *name, unsigned long segm { struct unw_table_entry *start = table_start, *end = table_end; -#ifdef UNWIND_TABLE_SORT_BUG - { - struct unw_table_entry *e1, *e2, tmp; - - /* stupid bubble sort... */ - - for (e1 = start; e1 < end; ++e1) { - for (e2 = e1 + 1; e2 < end; ++e2) { - if (e2->start_offset < e1->start_offset) { - tmp = *e1; - *e1 = *e2; - *e2 = tmp; - } - } - } - } -#endif table->name = name; table->segment_base = segment_base; table->gp = gp; @@ -2023,8 +2000,8 @@ unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned lon void unw_remove_unwind_table (void *handle) { - struct unw_table *table, *prevt; - struct unw_script *tmp, *prev; + struct unw_table *table, *prev; + struct unw_script *tmp; unsigned long flags; long index; @@ -2043,41 +2020,35 @@ unw_remove_unwind_table (void *handle) { /* first, delete the table: */ - for (prevt = (struct unw_table *) &unw.tables; prevt; prevt = prevt->next) - if (prevt->next == table) + for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next) + if (prev->next == table) break; - if (!prevt) { + if (!prev) { dprintk("unwind: failed to find unwind table %p\n", (void *) table); spin_unlock_irqrestore(&unw.lock, flags); return; } - prevt->next = table->next; + prev->next = table->next; + } + spin_unlock_irqrestore(&unw.lock, flags); - /* next, remove hash table entries for this table */ + /* next, remove hash table entries for this table */ - for (index = 0; index <= UNW_HASH_SIZE; ++index) { - if (unw.hash[index] >= UNW_CACHE_SIZE) - continue; + for (index = 0; index <= UNW_HASH_SIZE; ++index) { + tmp = unw.cache + unw.hash[index]; + if (unw.hash[index] >= UNW_CACHE_SIZE + || tmp->ip < table->start || tmp->ip >= table->end) + continue; - tmp = unw.cache + unw.hash[index]; - prev = 0; - while (1) { - write_lock(&tmp->lock); - { - if (tmp->ip >= table->start && tmp->ip < table->end) { - if (prev) - prev->coll_chain = tmp->coll_chain; - else - unw.hash[index] = -1; - tmp->ip = 0; - } else - prev = tmp; - } - write_unlock(&tmp->lock); + write_lock(&tmp->lock); + { + if (tmp->ip >= table->start && tmp->ip < table->end) { + unw.hash[index] = tmp->coll_chain; + tmp->ip = 0; } } + write_unlock(&tmp->lock); } - spin_unlock_irqrestore(&unw.lock, flags); kfree(table); } |