summaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2001-01-11 04:02:40 +0000
committerRalf Baechle <ralf@linux-mips.org>2001-01-11 04:02:40 +0000
commite47f00743fc4776491344f2c618cc8dc2c23bcbc (patch)
tree13e03a113a82a184c51c19c209867cfd3a59b3b9 /arch/ia64/kernel
parentb2ad5f821b1381492d792ca10b1eb7a107b48f14 (diff)
Merge with Linux 2.4.0.
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r--arch/ia64/kernel/Makefile14
-rw-r--r--arch/ia64/kernel/acpi.c419
-rw-r--r--arch/ia64/kernel/efi.c5
-rw-r--r--arch/ia64/kernel/entry.S84
-rw-r--r--arch/ia64/kernel/fw-emu.c1
-rw-r--r--arch/ia64/kernel/head.S21
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c16
-rw-r--r--arch/ia64/kernel/iosapic.c498
-rw-r--r--arch/ia64/kernel/irq.c15
-rw-r--r--arch/ia64/kernel/irq_ia64.c96
-rw-r--r--arch/ia64/kernel/ivt.S567
-rw-r--r--arch/ia64/kernel/machvec.c6
-rw-r--r--arch/ia64/kernel/mca.c4
-rw-r--r--arch/ia64/kernel/mca_asm.S15
-rw-r--r--arch/ia64/kernel/minstate.h91
-rw-r--r--arch/ia64/kernel/pal.S9
-rw-r--r--arch/ia64/kernel/pci-dma.c517
-rw-r--r--arch/ia64/kernel/pci.c70
-rw-r--r--arch/ia64/kernel/perfmon.c523
-rw-r--r--arch/ia64/kernel/process.c31
-rw-r--r--arch/ia64/kernel/ptrace.c79
-rw-r--r--arch/ia64/kernel/sal.c6
-rw-r--r--arch/ia64/kernel/setup.c55
-rw-r--r--arch/ia64/kernel/signal.c7
-rw-r--r--arch/ia64/kernel/smp.c44
-rw-r--r--arch/ia64/kernel/sys_ia64.c106
-rw-r--r--arch/ia64/kernel/time.c59
-rw-r--r--arch/ia64/kernel/traps.c23
-rw-r--r--arch/ia64/kernel/unaligned.c13
-rw-r--r--arch/ia64/kernel/unwind.c77
30 files changed, 1933 insertions, 1538 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 7a49511d3..e4ffb3ae6 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -9,20 +9,20 @@
all: kernel.o head.o init_task.o
-obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o \
- machvec.o pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \
- signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o
+O_TARGET := kernel.o
-obj-$(CONFIG_IA64_GENERIC) += machvec.o
+obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o \
+ machvec.o pal.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o \
+ signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o
+obj-$(CONFIG_IA64_GENERIC) += machvec.o iosapic.o
+obj-$(CONFIG_IA64_DIG) += iosapic.o
obj-$(CONFIG_IA64_PALINFO) += palinfo.o
obj-$(CONFIG_PCI) += pci.o
obj-$(CONFIG_SMP) += smp.o smpboot.o
obj-$(CONFIG_IA64_MCA) += mca.o mca_asm.o
obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
-O_TARGET := kernel.o
-O_OBJS := $(obj-y)
-OX_OBJS := ia64_ksyms.o
+export-objs := ia64_ksyms.o
clean::
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index a8c1ead1f..35ed564c9 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -6,6 +6,12 @@
*
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ * Copyright (C) 2000 Hewlett-Packard Co.
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000 Intel Corp.
+ * Copyright (C) 2000 J.I. Lee <jung-ik.lee@intel.com>
+ * ACPI based kernel configuration manager.
+ * ACPI 2.0 & IA64 ext 0.71
*/
#include <linux/config.h>
@@ -36,29 +42,87 @@ int __initdata total_cpus;
void (*pm_idle)(void);
+asm (".weak iosapic_register_legacy_irq");
+asm (".weak iosapic_init");
+
+const char *
+acpi_get_sysname (void)
+{
+ /* the following should go away once we have an ACPI parser: */
+#ifdef CONFIG_IA64_GENERIC
+ return "hpsim";
+#else
+# if defined (CONFIG_IA64_HP_SIM)
+ return "hpsim";
+# elif defined (CONFIG_IA64_SGI_SN1)
+ return "sn1";
+# elif defined (CONFIG_IA64_DIG)
+ return "dig";
+# else
+# error Unknown platform. Fix acpi.c.
+# endif
+#endif
+
+}
+
/*
- * Identify usable CPU's and remember them for SMP bringup later.
+ * Configure legacy IRQ information.
*/
static void __init
-acpi_lsapic(char *p)
+acpi_legacy_irq (char *p)
{
- int add = 1;
-
- acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p;
+ acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p;
+ unsigned long polarity = 0, edge_triggered = 0;
- if ((lsapic->flags & LSAPIC_PRESENT) == 0)
+ /*
+ * If the platform we're running doesn't define
+ * iosapic_register_legacy_irq(), we ignore this info...
+ */
+ if (!iosapic_register_legacy_irq)
return;
+ switch (legacy->flags) {
+ case 0x5: polarity = 1; edge_triggered = 1; break;
+ case 0x7: polarity = 0; edge_triggered = 1; break;
+ case 0xd: polarity = 1; edge_triggered = 0; break;
+ case 0xf: polarity = 0; edge_triggered = 0; break;
+ default:
+ printk(" ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq,
+ legacy->flags);
+ break;
+ }
+ iosapic_register_legacy_irq(legacy->isa_irq, legacy->pin, polarity, edge_triggered);
+}
+
+/*
+ * ACPI 2.0 tables parsing functions
+ */
+
+static unsigned long
+readl_unaligned(void *p)
+{
+ unsigned long ret;
+
+ memcpy(&ret, p, sizeof(long));
+ return ret;
+}
+
+/*
+ * Identify usable CPU's and remember them for SMP bringup later.
+ */
+static void __init
+acpi20_lsapic (char *p)
+{
+ int add = 1;
+
+ acpi20_entry_lsapic_t *lsapic = (acpi20_entry_lsapic_t *) p;
printk(" CPU %d (%.04x:%.04x): ", total_cpus, lsapic->eid, lsapic->id);
if ((lsapic->flags & LSAPIC_ENABLED) == 0) {
printk("Disabled.\n");
add = 0;
- } else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) {
- printk("Performance Restricted; ignoring.\n");
- add = 0;
}
-
+
#ifdef CONFIG_SMP
smp_boot_data.cpu_phys_id[total_cpus] = -1;
#endif
@@ -73,87 +137,234 @@ acpi_lsapic(char *p)
}
/*
- * Configure legacy IRQ information in iosapic_vector
+ * Info on platform interrupt sources: NMI. PMI, INIT, etc.
*/
static void __init
-acpi_legacy_irq(char *p)
+acpi20_platform (char *p)
{
- /*
- * This is not good. ACPI is not necessarily limited to CONFIG_IA64_DIG, yet
- * ACPI does not necessarily imply IOSAPIC either. Perhaps there should be
- * a means for platform_setup() to register ACPI handlers?
- */
-#ifdef CONFIG_IA64_IRQ_ACPI
- acpi_entry_int_override_t *legacy = (acpi_entry_int_override_t *) p;
- unsigned char vector;
- int i;
+ acpi20_entry_platform_src_t *plat = (acpi20_entry_platform_src_t *) p;
+
+ printk("PLATFORM: IOSAPIC %x -> Vector %x on CPU %.04u:%.04u\n",
+ plat->iosapic_vector, plat->global_vector, plat->eid, plat->id);
+}
- vector = isa_irq_to_vector(legacy->isa_irq);
+/*
+ * Override the physical address of the local APIC in the MADT stable header.
+ */
+static void __init
+acpi20_lapic_addr_override (char *p)
+{
+ acpi20_entry_lapic_addr_override_t * lapic = (acpi20_entry_lapic_addr_override_t *) p;
+
+ if (lapic->lapic_address) {
+ iounmap((void *)ipi_base_addr);
+ ipi_base_addr = (unsigned long) ioremap(lapic->lapic_address, 0);
+
+ printk("LOCAL ACPI override to 0x%lx(p=0x%lx)\n",
+ ipi_base_addr, lapic->lapic_address);
+ }
+}
+
+/*
+ * Parse the ACPI Multiple APIC Description Table
+ */
+static void __init
+acpi20_parse_madt (acpi_madt_t *madt)
+{
+ acpi_entry_iosapic_t *iosapic;
+ char *p, *end;
+
+ /* Base address of IPI Message Block */
+ if (madt->lapic_address) {
+ ipi_base_addr = (unsigned long) ioremap(madt->lapic_address, 0);
+ printk("Lapic address set to 0x%lx\n", ipi_base_addr);
+ } else
+ printk("Lapic address set to default 0x%lx\n", ipi_base_addr);
+
+ p = (char *) (madt + 1);
+ end = p + (madt->header.length - sizeof(acpi_madt_t));
/*
- * Clobber any old pin mapping. It may be that it gets replaced later on
+ * Splitted entry parsing to ensure ordering.
*/
- for (i = 0; i < IA64_MAX_VECTORED_IRQ; i++) {
- if (i == vector)
- continue;
- if (iosapic_pin(i) == iosapic_pin(vector))
- iosapic_pin(i) = 0xff;
- }
- iosapic_pin(vector) = legacy->pin;
- iosapic_bus(vector) = BUS_ISA; /* This table only overrides the ISA devices */
- iosapic_busdata(vector) = 0;
-
- /*
- * External timer tick is special...
- */
- if (vector != TIMER_IRQ)
- iosapic_dmode(vector) = IO_SAPIC_LOWEST_PRIORITY;
- else
- iosapic_dmode(vector) = IO_SAPIC_FIXED;
+ while (p < end) {
+ switch (*p) {
+ case ACPI20_ENTRY_LOCAL_APIC_ADDR_OVERRIDE:
+ printk("ACPI 2.0 MADT: LOCAL APIC Override\n");
+ acpi20_lapic_addr_override(p);
+ break;
+
+ case ACPI20_ENTRY_LOCAL_SAPIC:
+ printk("ACPI 2.0 MADT: LOCAL SAPIC\n");
+ acpi20_lsapic(p);
+ break;
- /* See MPS 1.4 section 4.3.4 */
- switch (legacy->flags) {
- case 0x5:
- iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
- iosapic_trigger(vector) = IO_SAPIC_EDGE;
- break;
- case 0x8:
- iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
- iosapic_trigger(vector) = IO_SAPIC_EDGE;
- break;
- case 0xd:
- iosapic_polarity(vector) = IO_SAPIC_POL_HIGH;
- iosapic_trigger(vector) = IO_SAPIC_LEVEL;
- break;
- case 0xf:
- iosapic_polarity(vector) = IO_SAPIC_POL_LOW;
- iosapic_trigger(vector) = IO_SAPIC_LEVEL;
- break;
- default:
- printk(" ACPI Legacy IRQ 0x%02x: Unknown flags 0x%x\n", legacy->isa_irq,
- legacy->flags);
- break;
+ case ACPI20_ENTRY_IO_SAPIC:
+ iosapic = (acpi_entry_iosapic_t *) p;
+ if (iosapic_init)
+ iosapic_init(iosapic->address, iosapic->irq_base);
+ break;
+
+ case ACPI20_ENTRY_PLATFORM_INT_SOURCE:
+ printk("ACPI 2.0 MADT: PLATFORM INT SOUCE\n");
+ acpi20_platform(p);
+ break;
+
+ case ACPI20_ENTRY_LOCAL_APIC:
+ printk("ACPI 2.0 MADT: LOCAL APIC entry\n"); break;
+ case ACPI20_ENTRY_IO_APIC:
+ printk("ACPI 2.0 MADT: IO APIC entry\n"); break;
+ case ACPI20_ENTRY_NMI_SOURCE:
+ printk("ACPI 2.0 MADT: NMI SOURCE entry\n"); break;
+ case ACPI20_ENTRY_LOCAL_APIC_NMI:
+ printk("ACPI 2.0 MADT: LOCAL APIC NMI entry\n"); break;
+ case ACPI20_ENTRY_INT_SRC_OVERRIDE:
+ break;
+ default:
+ printk("ACPI 2.0 MADT: unknown entry skip\n"); break;
+ break;
+ }
+
+ p += p[1];
+ }
+
+ p = (char *) (madt + 1);
+ end = p + (madt->header.length - sizeof(acpi_madt_t));
+
+ while (p < end) {
+
+ switch (*p) {
+ case ACPI20_ENTRY_INT_SRC_OVERRIDE:
+ printk("ACPI 2.0 MADT: INT SOURCE Override\n");
+ acpi_legacy_irq(p);
+ break;
+ default:
+ break;
+ }
+
+ p += p[1];
+ }
+
+ /* Make bootup pretty */
+ printk(" %d CPUs available, %d CPUs total\n",
+ available_cpus, total_cpus);
+}
+
+int __init
+acpi20_parse (acpi20_rsdp_t *rsdp20)
+{
+ acpi_xsdt_t *xsdt;
+ acpi_desc_table_hdr_t *hdrp;
+ int tables, i;
+
+ if (strncmp(rsdp20->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) {
+ printk("ACPI 2.0 RSDP signature incorrect!\n");
+ return 0;
+ } else {
+ printk("ACPI 2.0 Root System Description Ptr at 0x%lx\n",
+ (unsigned long)rsdp20);
+ }
+
+ xsdt = __va(rsdp20->xsdt);
+ hdrp = &xsdt->header;
+ if (strncmp(hdrp->signature,
+ ACPI_XSDT_SIG, ACPI_XSDT_SIG_LEN)) {
+ printk("ACPI 2.0 XSDT signature incorrect. Trying RSDT\n");
+ /* RSDT parsing here */
+ return 0;
+ } else {
+ printk("ACPI 2.0 XSDT at 0x%lx (p=0x%lx)\n",
+ (unsigned long)xsdt, (unsigned long)rsdp20->xsdt);
+ }
+
+ printk("ACPI 2.0: %.6s %.8s %d.%d\n",
+ hdrp->oem_id,
+ hdrp->oem_table_id,
+ hdrp->oem_revision >> 16,
+ hdrp->oem_revision & 0xffff);
+
+#ifdef CONFIG_ACPI_KERNEL_CONFIG
+ acpi_cf_init((void *)rsdp20);
+#endif
+
+ tables =(hdrp->length -sizeof(acpi_desc_table_hdr_t))>>3;
+
+ for (i = 0; i < tables; i++) {
+ hdrp = (acpi_desc_table_hdr_t *) __va(readl_unaligned(&xsdt->entry_ptrs[i]));
+ printk(" :table %4.4s found\n", hdrp->signature);
+
+ /* Only interested int the MADT table for now ... */
+ if (strncmp(hdrp->signature,
+ ACPI_MADT_SIG, ACPI_MADT_SIG_LEN) != 0)
+ continue;
+
+ acpi20_parse_madt((acpi_madt_t *) hdrp);
+ }
+
+#ifdef CONFIG_ACPI_KERNEL_CONFIG
+ acpi_cf_terminate();
+#endif
+
+#ifdef CONFIG_SMP
+ if (available_cpus == 0) {
+ printk("ACPI: Found 0 CPUS; assuming 1\n");
+ available_cpus = 1; /* We've got at least one of these, no? */
+ }
+ smp_boot_data.cpu_count = available_cpus;
+#endif
+ return 1;
+}
+/*
+ * ACPI 1.0b with 0.71 IA64 extensions functions; should be removed once all
+ * platforms start supporting ACPI 2.0
+ */
+
+/*
+ * Identify usable CPU's and remember them for SMP bringup later.
+ */
+static void __init
+acpi_lsapic (char *p)
+{
+ int add = 1;
+
+ acpi_entry_lsapic_t *lsapic = (acpi_entry_lsapic_t *) p;
+
+ if ((lsapic->flags & LSAPIC_PRESENT) == 0)
+ return;
+
+ printk(" CPU %d (%.04x:%.04x): ", total_cpus, lsapic->eid, lsapic->id);
+
+ if ((lsapic->flags & LSAPIC_ENABLED) == 0) {
+ printk("Disabled.\n");
+ add = 0;
+ } else if (lsapic->flags & LSAPIC_PERFORMANCE_RESTRICTED) {
+ printk("Performance Restricted; ignoring.\n");
+ add = 0;
}
-# ifdef ACPI_DEBUG
- printk("Legacy ISA IRQ %x -> IA64 Vector %x IOSAPIC Pin %x Active %s %s Trigger\n",
- legacy->isa_irq, vector, iosapic_pin(vector),
- ((iosapic_polarity(vector) == IO_SAPIC_POL_LOW) ? "Low" : "High"),
- ((iosapic_trigger(vector) == IO_SAPIC_LEVEL) ? "Level" : "Edge"));
-# endif /* ACPI_DEBUG */
-#endif /* CONFIG_IA64_IRQ_ACPI */
+#ifdef CONFIG_SMP
+ smp_boot_data.cpu_phys_id[total_cpus] = -1;
+#endif
+ if (add) {
+ printk("Available.\n");
+ available_cpus++;
+#ifdef CONFIG_SMP
+ smp_boot_data.cpu_phys_id[total_cpus] = (lsapic->id << 8) | lsapic->eid;
+#endif /* CONFIG_SMP */
+ }
+ total_cpus++;
}
/*
* Info on platform interrupt sources: NMI. PMI, INIT, etc.
*/
static void __init
-acpi_platform(char *p)
+acpi_platform (char *p)
{
acpi_entry_platform_src_t *plat = (acpi_entry_platform_src_t *) p;
- printk("PLATFORM: IOSAPIC %x -> Vector %lx on CPU %.04u:%.04u\n",
+ printk("PLATFORM: IOSAPIC %x -> Vector %x on CPU %.04u:%.04u\n",
plat->iosapic_vector, plat->global_vector, plat->eid, plat->id);
}
@@ -161,8 +372,9 @@ acpi_platform(char *p)
* Parse the ACPI Multiple SAPIC Table
*/
static void __init
-acpi_parse_msapic(acpi_sapic_t *msapic)
+acpi_parse_msapic (acpi_sapic_t *msapic)
{
+ acpi_entry_iosapic_t *iosapic;
char *p, *end;
/* Base address of IPI Message Block */
@@ -172,41 +384,31 @@ acpi_parse_msapic(acpi_sapic_t *msapic)
end = p + (msapic->header.length - sizeof(acpi_sapic_t));
while (p < end) {
-
switch (*p) {
- case ACPI_ENTRY_LOCAL_SAPIC:
+ case ACPI_ENTRY_LOCAL_SAPIC:
acpi_lsapic(p);
break;
- case ACPI_ENTRY_IO_SAPIC:
- platform_register_iosapic((acpi_entry_iosapic_t *) p);
+ case ACPI_ENTRY_IO_SAPIC:
+ iosapic = (acpi_entry_iosapic_t *) p;
+ if (iosapic_init)
+ iosapic_init(iosapic->address, iosapic->irq_base);
break;
- case ACPI_ENTRY_INT_SRC_OVERRIDE:
+ case ACPI_ENTRY_INT_SRC_OVERRIDE:
acpi_legacy_irq(p);
break;
-
- case ACPI_ENTRY_PLATFORM_INT_SOURCE:
+
+ case ACPI_ENTRY_PLATFORM_INT_SOURCE:
acpi_platform(p);
break;
-
- default:
+
+ default:
break;
}
/* Move to next table entry. */
-#define BAD_ACPI_TABLE
-#ifdef BAD_ACPI_TABLE
- /*
- * Some prototype Lion's have a bad ACPI table
- * requiring this fix. Without this fix, those
- * machines crash during bootup.
- */
- if (p[1] == 0)
- p = end;
- else
-#endif
- p += p[1];
+ p += p[1];
}
/* Make bootup pretty */
@@ -214,24 +416,18 @@ acpi_parse_msapic(acpi_sapic_t *msapic)
}
int __init
-acpi_parse(acpi_rsdp_t *rsdp)
+acpi_parse (acpi_rsdp_t *rsdp)
{
acpi_rsdt_t *rsdt;
acpi_desc_table_hdr_t *hdrp;
long tables, i;
- if (!rsdp) {
- printk("Uh-oh, no ACPI Root System Description Pointer table!\n");
- return 0;
- }
-
if (strncmp(rsdp->signature, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN)) {
printk("Uh-oh, ACPI RSDP signature incorrect!\n");
return 0;
}
- rsdp->rsdt = __va(rsdp->rsdt);
- rsdt = rsdp->rsdt;
+ rsdt = __va(rsdp->rsdt);
if (strncmp(rsdt->header.signature, ACPI_RSDT_SIG, ACPI_RSDT_SIG_LEN)) {
printk("Uh-oh, ACPI RDST signature incorrect!\n");
return 0;
@@ -256,7 +452,7 @@ acpi_parse(acpi_rsdp_t *rsdp)
}
#ifdef CONFIG_ACPI_KERNEL_CONFIG
- acpi_cf_terminate();
+ acpi_cf_terminate();
#endif
#ifdef CONFIG_SMP
@@ -268,22 +464,3 @@ acpi_parse(acpi_rsdp_t *rsdp)
#endif
return 1;
}
-
-const char *
-acpi_get_sysname (void)
-{
- /* the following should go away once we have an ACPI parser: */
-#ifdef CONFIG_IA64_GENERIC
- return "hpsim";
-#else
-# if defined (CONFIG_IA64_HP_SIM)
- return "hpsim";
-# elif defined (CONFIG_IA64_SGI_SN1)
- return "sn1";
-# elif defined (CONFIG_IA64_DIG)
- return "dig";
-# else
-# error Unknown platform. Fix acpi.c.
-# endif
-#endif
-}
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 759db7f52..1ac4e04f4 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -333,6 +333,9 @@ efi_init (void)
if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
efi.mps = __va(config_tables[i].table);
printk(" MPS=0x%lx", config_tables[i].table);
+ } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
+ efi.acpi20 = __va(config_tables[i].table);
+ printk(" ACPI 2.0=0x%lx", config_tables[i].table);
} else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
efi.acpi = __va(config_tables[i].table);
printk(" ACPI=0x%lx", config_tables[i].table);
@@ -364,7 +367,7 @@ efi_init (void)
#if EFI_DEBUG
/* print EFI memory map: */
{
- efi_memory_desc_t *md = p;
+ efi_memory_desc_t *md;
void *p;
for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index ffb1760ea..f8c647386 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -11,6 +11,17 @@
* Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
*/
/*
+ * ia64_switch_to now places correct virtual mapping in in TR2 for
+ * kernel stack. This allows us to handle interrupts without changing
+ * to physical mode.
+ *
+ * ar.k4 is now used to hold last virtual map address
+ *
+ * Jonathan Nickin <nicklin@missioncriticallinux.com>
+ * Patrick O'Rourke <orourke@missioncriticallinux.com>
+ * 11/07/2000
+ /
+/*
* Global (preserved) predicate usage on syscall entry/exit path:
*
* pKern: See entry.h.
@@ -27,7 +38,8 @@
#include <asm/processor.h>
#include <asm/unistd.h>
#include <asm/asmmacro.h>
-
+#include <asm/pgtable.h>
+
#include "entry.h"
.text
@@ -98,6 +110,8 @@ GLOBAL_ENTRY(sys_clone)
br.ret.sptk.many rp
END(sys_clone)
+#define KSTACK_TR 2
+
/*
* prev_task <- ia64_switch_to(struct task_struct *next)
*/
@@ -108,22 +122,55 @@ GLOBAL_ENTRY(ia64_switch_to)
UNW(.body)
adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
- dep r18=-1,r0,0,61 // build mask 0x1fffffffffffffff
+ mov r27=ar.k4
+ dep r20=0,in0,61,3 // physical address of "current"
+ ;;
+ st8 [r22]=sp // save kernel stack pointer of old task
+ shr.u r26=r20,_PAGE_SIZE_256M
+ ;;
+ cmp.eq p7,p6=r26,r0 // check < 256M
adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
;;
- st8 [r22]=sp // save kernel stack pointer of old task
- ld8 sp=[r21] // load kernel stack pointer of new task
- and r20=in0,r18 // physical address of "current"
+ /*
+ * If we've already mapped this task's page, we can skip doing it
+ * again.
+ */
+(p6) cmp.eq p7,p6=r26,r27
+(p6) br.cond.dpnt.few .map
+ ;;
+.done: ld8 sp=[r21] // load kernel stack pointer of new task
+(p6) ssm psr.ic // if we we had to map, renable the psr.ic bit FIRST!!!
;;
- mov ar.k6=r20 // copy "current" into ar.k6
- mov r8=r13 // return pointer to previously running task
- mov r13=in0 // set "current" pointer
+(p6) srlz.d
+ mov ar.k6=r20 // copy "current" into ar.k6
+ mov r8=r13 // return pointer to previously running task
+ mov r13=in0 // set "current" pointer
;;
+(p6) ssm psr.i // renable psr.i AFTER the ic bit is serialized
DO_LOAD_SWITCH_STACK( )
+
#ifdef CONFIG_SMP
- sync.i // ensure "fc"s done by this CPU are visible on other CPUs
-#endif
- br.ret.sptk.few rp
+ sync.i // ensure "fc"s done by this CPU are visible on other CPUs
+#endif
+ br.ret.sptk.few rp // boogie on out in new context
+
+.map:
+ rsm psr.i | psr.ic
+ movl r25=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX
+ ;;
+ srlz.d
+ or r23=r25,r20 // construct PA | page properties
+ mov r25=_PAGE_SIZE_256M<<2
+ ;;
+ mov cr.itir=r25
+ mov cr.ifa=in0 // VA of next task...
+ ;;
+ mov r25=KSTACK_TR // use tr entry #2...
+ mov ar.k4=r26 // remember last page we mapped...
+ ;;
+ itr.d dtr[r25]=r23 // wire in new mapping...
+ br.cond.sptk.many .done
+ ;;
END(ia64_switch_to)
#ifndef CONFIG_IA64_NEW_UNWIND
@@ -503,7 +550,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
;;
ld4 r2=[r2]
;;
- shl r2=r2,SMP_LOG_CACHE_BYTES // can't use shladd here...
+ shl r2=r2,SMP_CACHE_SHIFT // can't use shladd here...
;;
add r3=r2,r3
#else
@@ -542,7 +589,7 @@ back_from_resched:
// check & deliver pending signals:
(p2) br.call.spnt.few rp=handle_signal_delivery
.ret9:
-#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_IA64_SOFTSDV_HACKS)
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
// Check for lost ticks
rsm psr.i
mov r2 = ar.itc
@@ -611,14 +658,13 @@ restore_all:
mov ar.ccv=r1
mov ar.fpsr=r13
mov b0=r14
- // turn off interrupts, interrupt collection, & data translation
- rsm psr.i | psr.ic | psr.dt
+ // turn off interrupts, interrupt collection
+ rsm psr.i | psr.ic
;;
srlz.i // EAS 2.5
mov b7=r15
;;
invala // invalidate ALAT
- dep r12=0,r12,61,3 // convert sp to physical address
bsw.0;; // switch back to bank 0 (must be last in insn group)
;;
#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
@@ -757,7 +803,7 @@ END(invoke_schedule_tail)
#endif /* CONFIG_SMP */
-#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_IA64_SOFTSDV_HACKS)
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
ENTRY(invoke_ia64_reset_itm)
UNW(.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8))
@@ -772,7 +818,7 @@ ENTRY(invoke_ia64_reset_itm)
br.ret.sptk.many rp
END(invoke_ia64_reset_itm)
-#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC || CONFIG_IA64_SOFTSDV_HACKS */
+#endif /* CONFIG_IA64_SOFTSDV_HACKS */
/*
* Invoke do_softirq() while preserving in0-in7, which may be needed
@@ -1091,7 +1137,7 @@ sys_call_table:
data8 sys_setpriority
data8 sys_statfs
data8 sys_fstatfs
- data8 ia64_ni_syscall
+ data8 ia64_ni_syscall // 1105
data8 sys_semget
data8 sys_semop
data8 sys_semctl
diff --git a/arch/ia64/kernel/fw-emu.c b/arch/ia64/kernel/fw-emu.c
index 34316fe58..e16f23426 100644
--- a/arch/ia64/kernel/fw-emu.c
+++ b/arch/ia64/kernel/fw-emu.c
@@ -402,7 +402,6 @@ sys_fw_init (const char *args, int arglen)
sal_systab->sal_rev_minor = 1;
sal_systab->sal_rev_major = 0;
sal_systab->entry_count = 1;
- sal_systab->ia32_bios_present = 0;
#ifdef CONFIG_IA64_GENERIC
strcpy(sal_systab->oem_id, "Generic");
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index e6298b297..abee408f1 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -74,8 +74,8 @@ GLOBAL_ENTRY(_start)
;;
#ifdef CONFIG_IA64_EARLY_PRINTK
- mov r2=6
- mov r3=(8<<8) | (28<<2)
+ mov r3=(6<<8) | (28<<2)
+ movl r2=6<<61
;;
mov rr[r2]=r3
;;
@@ -168,6 +168,11 @@ GLOBAL_ENTRY(ia64_save_debug_regs)
add r19=IA64_NUM_DBG_REGS*8,in0
;;
1: mov r16=dbr[r18]
+#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) \
+ || defined(CONFIG_ITANIUM_C0_SPECIFIC)
+ ;;
+ srlz.d
+#endif
mov r17=ibr[r18]
add r18=1,r18
;;
@@ -181,7 +186,8 @@ END(ia64_save_debug_regs)
GLOBAL_ENTRY(ia64_load_debug_regs)
alloc r16=ar.pfs,1,0,0,0
-#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
+#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) \
+ || defined(CONFIG_ITANIUM_B0_SPECIFIC) || defined(CONFIG_ITANIUM_B1_SPECIFIC))
lfetch.nta [in0]
#endif
mov r20=ar.lc // preserve ar.lc
@@ -194,6 +200,11 @@ GLOBAL_ENTRY(ia64_load_debug_regs)
add r18=1,r18
;;
mov dbr[r18]=r16
+#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC) \
+ || defined(CONFIG_ITANIUM_C0_SPECIFIC)
+ ;;
+ srlz.d
+#endif
mov ibr[r18]=r17
br.cloop.sptk.few 1b
;;
@@ -754,7 +765,7 @@ GLOBAL_ENTRY(ia64_spinlock_contention)
mov tmp=ar.itc
(p15) br.cond.sptk .wait
;;
- ld1 tmp=[r31]
+ ld4 tmp=[r31]
;;
cmp.ne p15,p0=tmp,r0
mov tmp=ar.itc
@@ -764,7 +775,7 @@ GLOBAL_ENTRY(ia64_spinlock_contention)
mov tmp=1
;;
IA64_SEMFIX_INSN
- cmpxchg1.acq tmp=[r31],tmp,ar.ccv
+ cmpxchg4.acq tmp=[r31],tmp,ar.ccv
;;
cmp.eq p15,p0=tmp,r0
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index d3d2416cf..f831f86d9 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -24,9 +24,8 @@ EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strtok);
-#include <linux/pci.h>
-EXPORT_SYMBOL(pci_alloc_consistent);
-EXPORT_SYMBOL(pci_free_consistent);
+#include <asm/hw_irq.h>
+EXPORT_SYMBOL(isa_irq_to_vector_map);
#include <linux/in6.h>
#include <asm/checksum.h>
@@ -49,14 +48,6 @@ EXPORT_SYMBOL(disable_irq_nosync);
#include <asm/page.h>
EXPORT_SYMBOL(clear_page);
-#include <asm/pci.h>
-EXPORT_SYMBOL(pci_dma_sync_sg);
-EXPORT_SYMBOL(pci_dma_sync_single);
-EXPORT_SYMBOL(pci_map_sg);
-EXPORT_SYMBOL(pci_map_single);
-EXPORT_SYMBOL(pci_unmap_sg);
-EXPORT_SYMBOL(pci_unmap_single);
-
#include <asm/processor.h>
EXPORT_SYMBOL(cpu_data);
EXPORT_SYMBOL(kernel_thread);
@@ -92,6 +83,9 @@ EXPORT_SYMBOL(__global_restore_flags);
#include <asm/uaccess.h>
EXPORT_SYMBOL(__copy_user);
EXPORT_SYMBOL(__do_clear_user);
+EXPORT_SYMBOL(__strlen_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(__strnlen_user);
#include <asm/unistd.h>
EXPORT_SYMBOL(__ia64_syscall);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
new file mode 100644
index 000000000..9d8408c3f
--- /dev/null
+++ b/arch/ia64/kernel/iosapic.c
@@ -0,0 +1,498 @@
+/*
+ * I/O SAPIC support.
+ *
+ * Copyright (C) 1999 Intel Corp.
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999-2000 Hewlett-Packard Co.
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
+ *
+ * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code.
+ * In particular, we now have separate handlers for edge
+ * and level triggered interrupts.
+ * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
+ * PCI to vector mapping, shared PCI interrupts.
+ * 00/10/27 D. Mosberger Document things a bit more to make them more understandable.
+ * Clean up much of the old IOSAPIC cruft.
+ */
+/*
+ * Here is what the interrupt logic between a PCI device and the CPU looks like:
+ *
+ * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The
+ * device is uniquely identified by its bus-, device-, and slot-number (the function
+ * number does not matter here because all functions share the same interrupt
+ * lines).
+ *
+ * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
+ * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
+ * triggered and use the same polarity). Each interrupt line has a unique IOSAPIC
+ * irq number which can be calculated as the sum of the controller's base irq number
+ * and the IOSAPIC pin number to which the line connects.
+ *
+ * (3) The IOSAPIC uses an internal table to map the IOSAPIC pin into the IA-64 interrupt
+ * vector. This interrupt vector is then sent to the CPU.
+ *
+ * In other words, there are two levels of indirections involved:
+ *
+ * pci pin -> iosapic irq -> IA-64 vector
+ *
+ * Note: outside this module, IA-64 vectors are called "irqs". This is because that's
+ * the traditional name Linux uses for interrupt vectors.
+ */
+#include <linux/config.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/string.h>
+#include <linux/irq.h>
+
+#include <asm/acpi-ext.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_ACPI_KERNEL_CONFIG
+# include <asm/acpikcfg.h>
+#endif
+
+#undef DEBUG_IRQ_ROUTING
+
+static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED;
+
+/* PCI pin to IOSAPIC irq routing information. This info typically comes from ACPI. */
+
+static struct {
+ int num_routes;
+ struct pci_vector_struct *route;
+} pci_irq;
+
+/* This tables maps IA-64 vectors to the IOSAPIC pin that generates this vector. */
+
+static struct iosapic_irq {
+ char *addr; /* base address of IOSAPIC */
+ unsigned char base_irq; /* first irq assigned to this IOSAPIC */
+ char pin; /* IOSAPIC pin (-1 => not an IOSAPIC irq) */
+ unsigned char dmode : 3; /* delivery mode (see iosapic.h) */
+ unsigned char polarity : 1; /* interrupt polarity (see iosapic.h) */
+ unsigned char trigger : 1; /* trigger mode (see iosapic.h) */
+} iosapic_irq[NR_IRQS];
+
+/*
+ * Translate IOSAPIC irq number to the corresponding IA-64 interrupt vector. If no
+ * entry exists, return -1.
+ */
+static int
+iosapic_irq_to_vector (int irq)
+{
+ int vector;
+
+ for (vector = 0; vector < NR_IRQS; ++vector)
+ if (iosapic_irq[vector].base_irq + iosapic_irq[vector].pin == irq)
+ return vector;
+ return -1;
+}
+
+/*
+ * Map PCI pin to the corresponding IA-64 interrupt vector. If no such mapping exists,
+ * return -1.
+ */
+static int
+pci_pin_to_vector (int bus, int slot, int pci_pin)
+{
+ struct pci_vector_struct *r;
+
+ for (r = pci_irq.route; r < pci_irq.route + pci_irq.num_routes; ++r)
+ if (r->bus == bus && (r->pci_id >> 16) == slot && r->pin == pci_pin)
+ return iosapic_irq_to_vector(r->irq);
+ return -1;
+}
+
+static void
+set_rte (unsigned int vector, unsigned long dest)
+{
+ unsigned long pol, trigger, dmode;
+ u32 low32, high32;
+ char *addr;
+ int pin;
+
+ pin = iosapic_irq[vector].pin;
+ if (pin < 0)
+ return; /* not an IOSAPIC interrupt */
+
+ addr = iosapic_irq[vector].addr;
+ pol = iosapic_irq[vector].polarity;
+ trigger = iosapic_irq[vector].trigger;
+ dmode = iosapic_irq[vector].dmode;
+
+ low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
+ (trigger << IOSAPIC_TRIGGER_SHIFT) |
+ (dmode << IOSAPIC_DELIVERY_SHIFT) |
+ vector);
+
+#ifdef CONFIG_IA64_AZUSA_HACKS
+ /* set Flush Disable bit */
+ if (addr != (char *) 0xc0000000fec00000)
+ low32 |= (1 << 17);
+#endif
+
+ /* dest contains both id and eid */
+ high32 = (dest << IOSAPIC_DEST_SHIFT);
+
+ writel(IOSAPIC_RTE_HIGH(pin), addr + IOSAPIC_REG_SELECT);
+ writel(high32, addr + IOSAPIC_WINDOW);
+ writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
+ writel(low32, addr + IOSAPIC_WINDOW);
+}
+
+static void
+nop (unsigned int vector)
+{
+ /* do nothing... */
+}
+
+static void
+mask_irq (unsigned int vector)
+{
+ unsigned long flags;
+ char *addr;
+ u32 low32;
+ int pin;
+
+ addr = iosapic_irq[vector].addr;
+ pin = iosapic_irq[vector].pin;
+
+ if (pin < 0)
+ return; /* not an IOSAPIC interrupt! */
+
+ spin_lock_irqsave(&iosapic_lock, flags);
+ {
+ writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
+ low32 = readl(addr + IOSAPIC_WINDOW);
+
+ low32 |= (1 << IOSAPIC_MASK_SHIFT); /* set only the mask bit */
+ writel(low32, addr + IOSAPIC_WINDOW);
+ }
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+}
+
+static void
+unmask_irq (unsigned int vector)
+{
+ unsigned long flags;
+ char *addr;
+ u32 low32;
+ int pin;
+
+ addr = iosapic_irq[vector].addr;
+ pin = iosapic_irq[vector].pin;
+ if (pin < 0)
+ return; /* not an IOSAPIC interrupt! */
+
+ spin_lock_irqsave(&iosapic_lock, flags);
+ {
+ writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
+ low32 = readl(addr + IOSAPIC_WINDOW);
+
+ low32 &= ~(1 << IOSAPIC_MASK_SHIFT); /* clear only the mask bit */
+ writel(low32, addr + IOSAPIC_WINDOW);
+ }
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+}
+
+
+static void
+iosapic_set_affinity (unsigned int vector, unsigned long mask)
+{
+ printk("iosapic_set_affinity: not implemented yet\n");
+}
+
+/*
+ * Handlers for level-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_level_irq (unsigned int vector)
+{
+ unmask_irq(vector);
+ return 0;
+}
+
+static void
+iosapic_end_level_irq (unsigned int vector)
+{
+ writel(vector, iosapic_irq[vector].addr + IOSAPIC_EOI);
+}
+
+#define iosapic_shutdown_level_irq mask_irq
+#define iosapic_enable_level_irq unmask_irq
+#define iosapic_disable_level_irq mask_irq
+#define iosapic_ack_level_irq nop
+
+struct hw_interrupt_type irq_type_iosapic_level = {
+ typename: "IO-SAPIC-level",
+ startup: iosapic_startup_level_irq,
+ shutdown: iosapic_shutdown_level_irq,
+ enable: iosapic_enable_level_irq,
+ disable: iosapic_disable_level_irq,
+ ack: iosapic_ack_level_irq,
+ end: iosapic_end_level_irq,
+ set_affinity: iosapic_set_affinity
+};
+
+/*
+ * Handlers for edge-triggered interrupts.
+ */
+
+static unsigned int
+iosapic_startup_edge_irq (unsigned int vector)
+{
+ unmask_irq(vector);
+ /*
+ * IOSAPIC simply drops interrupts pended while the
+ * corresponding pin was masked, so we can't know if an
+ * interrupt is pending already. Let's hope not...
+ */
+ return 0;
+}
+
+static void
+iosapic_ack_edge_irq (unsigned int vector)
+{
+ /*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+ if ((irq_desc[vector].status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
+ mask_irq(vector);
+}
+
+#define iosapic_enable_edge_irq unmask_irq
+#define iosapic_disable_edge_irq nop
+#define iosapic_end_edge_irq nop
+
+struct hw_interrupt_type irq_type_iosapic_edge = {
+ typename: "IO-SAPIC-edge",
+ startup: iosapic_startup_edge_irq,
+ shutdown: iosapic_disable_edge_irq,
+ enable: iosapic_enable_edge_irq,
+ disable: iosapic_disable_edge_irq,
+ ack: iosapic_ack_edge_irq,
+ end: iosapic_end_edge_irq,
+ set_affinity: iosapic_set_affinity
+};
+
+static unsigned int
+iosapic_version (char *addr)
+{
+ /*
+ * IOSAPIC Version Register return 32 bit structure like:
+ * {
+ * unsigned int version : 8;
+ * unsigned int reserved1 : 8;
+ * unsigned int pins : 8;
+ * unsigned int reserved2 : 8;
+ * }
+ */
+ writel(IOSAPIC_VERSION, addr + IOSAPIC_REG_SELECT);
+ return readl(IOSAPIC_WINDOW + addr);
+}
+
+/*
+ * ACPI calls this when it finds an entry for a legacy ISA interrupt. Note that the
+ * irq_base and IOSAPIC address must be set in iosapic_init().
+ */
+void
+iosapic_register_legacy_irq (unsigned long irq,
+ unsigned long pin, unsigned long polarity,
+ unsigned long edge_triggered)
+{
+ unsigned int vector = isa_irq_to_vector(irq);
+
+#ifdef DEBUG_IRQ_ROUTING
+ printk("ISA: IRQ %u -> IOSAPIC irq 0x%02x (%s, %s) -> vector %02x\n",
+ (unsigned) irq, (unsigned) pin,
+ polarity ? "high" : "low", edge_triggered ? "edge" : "level",
+ vector);
+#endif
+
+ iosapic_irq[vector].pin = pin;
+ iosapic_irq[vector].dmode = IOSAPIC_LOWEST_PRIORITY;
+ iosapic_irq[vector].polarity = polarity ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW;
+ iosapic_irq[vector].trigger = edge_triggered ? IOSAPIC_EDGE : IOSAPIC_LEVEL;
+}
+
+void __init
+iosapic_init (unsigned long phys_addr, unsigned int base_irq)
+{
+ struct hw_interrupt_type *irq_type;
+ int i, irq, max_pin, vector;
+ unsigned int ver;
+ char *addr;
+ static int first_time = 1;
+
+ if (first_time) {
+ first_time = 0;
+
+ for (vector = 0; vector < NR_IRQS; ++vector)
+ iosapic_irq[vector].pin = -1; /* mark as unused */
+
+ /*
+ * Fetch the PCI interrupt routing table:
+ */
+#ifdef CONFIG_ACPI_KERNEL_CONFIG
+ acpi_cf_get_pci_vectors(&pci_irq.route, &pci_irq.num_routes);
+#else
+ pci_irq.route =
+ (struct pci_vector_struct *) __va(ia64_boot_param.pci_vectors);
+ pci_irq.num_routes = ia64_boot_param.num_pci_vectors;
+#endif
+ }
+
+ addr = ioremap(phys_addr, 0);
+
+ ver = iosapic_version(addr);
+ max_pin = (ver >> 16) & 0xff;
+
+ printk("IOSAPIC: version %x.%x, address 0x%lx, IRQs 0x%02x-0x%02x\n",
+ (ver & 0xf0) >> 4, (ver & 0x0f), phys_addr, base_irq, base_irq + max_pin);
+
+ if (base_irq == 0)
+ /*
+ * Map the legacy ISA devices into the IOSAPIC data. Some of these may
+ * get reprogrammed later on with data from the ACPI Interrupt Source
+ * Override table.
+ */
+ for (irq = 0; irq < 16; ++irq) {
+ vector = isa_irq_to_vector(irq);
+ iosapic_irq[vector].addr = addr;
+ iosapic_irq[vector].base_irq = 0;
+ if (iosapic_irq[vector].pin == -1)
+ iosapic_irq[vector].pin = irq;
+ iosapic_irq[vector].dmode = IOSAPIC_LOWEST_PRIORITY;
+ iosapic_irq[vector].trigger = IOSAPIC_EDGE;
+ iosapic_irq[vector].polarity = IOSAPIC_POL_HIGH;
+#ifdef DEBUG_IRQ_ROUTING
+ printk("ISA: IRQ %u -> IOSAPIC irq 0x%02x (high, edge) -> vector 0x%02x\n",
+ irq, iosapic_irq[vector].base_irq + iosapic_irq[vector].pin,
+ vector);
+#endif
+ irq_type = &irq_type_iosapic_edge;
+ if (irq_desc[vector].handler != irq_type) {
+ if (irq_desc[vector].handler != &no_irq_type)
+ printk("iosapic_init: changing vector 0x%02x from %s to "
+ "%s\n", irq, irq_desc[vector].handler->typename,
+ irq_type->typename);
+ irq_desc[vector].handler = irq_type;
+ }
+
+ /* program the IOSAPIC routing table: */
+ set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
+ }
+
+#ifndef CONFIG_IA64_SOFTSDV_HACKS
+ for (i = 0; i < pci_irq.num_routes; i++) {
+ irq = pci_irq.route[i].irq;
+
+ if ((unsigned) (irq - base_irq) > max_pin)
+ /* the interrupt route is for another controller... */
+ continue;
+
+ if (irq < 16)
+ vector = isa_irq_to_vector(irq);
+ else {
+ vector = iosapic_irq_to_vector(irq);
+ if (vector < 0)
+ /* new iosapic irq: allocate a vector for it */
+ vector = ia64_alloc_irq();
+ }
+
+ iosapic_irq[vector].addr = addr;
+ iosapic_irq[vector].base_irq = base_irq;
+ iosapic_irq[vector].pin = (irq - base_irq);
+ iosapic_irq[vector].dmode = IOSAPIC_LOWEST_PRIORITY;
+ iosapic_irq[vector].trigger = IOSAPIC_LEVEL;
+ iosapic_irq[vector].polarity = IOSAPIC_POL_LOW;
+
+# ifdef DEBUG_IRQ_ROUTING
+ printk("PCI: (B%d,I%d,P%d) -> IOSAPIC irq 0x%02x -> vector 0x%02x\n",
+ pci_irq.route[i].bus, pci_irq.route[i].pci_id>>16, pci_irq.route[i].pin,
+ iosapic_irq[vector].base_irq + iosapic_irq[vector].pin, vector);
+# endif
+ irq_type = &irq_type_iosapic_level;
+ if (irq_desc[vector].handler != irq_type){
+ if (irq_desc[vector].handler != &no_irq_type)
+ printk("iosapic_init: changing vector 0x%02x from %s to %s\n",
+ vector, irq_desc[vector].handler->typename,
+ irq_type->typename);
+ irq_desc[vector].handler = irq_type;
+ }
+
+ /* program the IOSAPIC routing table: */
+ set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
+ }
+#endif /* !CONFIG_IA64_SOFTSDV_HACKS */
+}
+
+void
+iosapic_pci_fixup (int phase)
+{
+ struct pci_dev *dev;
+ unsigned char pin;
+ int vector;
+
+ if (phase != 1)
+ return;
+
+ pci_for_each_dev(dev) {
+ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (pin) {
+ pin--; /* interrupt pins are numbered starting from 1 */
+ vector = pci_pin_to_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+ if (vector < 0 && dev->bus->parent) {
+ /* go back to the bridge */
+ struct pci_dev *bridge = dev->bus->self;
+
+ if (bridge) {
+ /* allow for multiple bridges on an adapter */
+ do {
+ /* do the bridge swizzle... */
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+ vector = pci_pin_to_vector(bridge->bus->number,
+ PCI_SLOT(bridge->devfn),
+ pin);
+ } while (vector < 0 && (bridge = bridge->bus->self));
+ }
+ if (vector >= 0)
+ printk(KERN_WARNING
+ "PCI: using PPB(B%d,I%d,P%d) to get vector %02x\n",
+ bridge->bus->number, PCI_SLOT(bridge->devfn),
+ pin, vector);
+ else
+ printk(KERN_WARNING
+ "PCI: Couldn't map irq for (B%d,I%d,P%d)o\n",
+ bridge->bus->number, PCI_SLOT(bridge->devfn),
+ pin);
+ }
+ if (vector >= 0) {
+ printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> 0x%02x\n",
+ dev->bus->number, PCI_SLOT(dev->devfn), pin, vector);
+ dev->irq = vector;
+ }
+ }
+ /*
+ * Nothing to fixup
+ * Fix out-of-range IRQ numbers
+ */
+ if (dev->irq >= NR_IRQS)
+ dev->irq = 15; /* Spurious interrupts */
+ }
+}
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index b3646e275..ab8961a54 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -541,6 +541,18 @@ void enable_irq(unsigned int irq)
spin_unlock_irqrestore(&desc->lock, flags);
}
+void do_IRQ_per_cpu(unsigned long irq, struct pt_regs *regs)
+{
+ irq_desc_t *desc = irq_desc + irq;
+ int cpu = smp_processor_id();
+
+ kstat.irqs[cpu][irq]++;
+
+ desc->handler->ack(irq);
+ handle_IRQ_event(irq, regs, desc->action);
+ desc->handler->end(irq);
+}
+
/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
@@ -581,8 +593,7 @@ unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs)
if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
action = desc->action;
status &= ~IRQ_PENDING; /* we commit to handling */
- if (!(status & IRQ_PER_CPU))
- status |= IRQ_INPROGRESS; /* we are handling it */
+ status |= IRQ_INPROGRESS; /* we are handling it */
}
desc->status = status;
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 2166e205f..155ee66b7 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -7,6 +7,9 @@
*
* 6/10/99: Updated to bring in sync with x86 version to facilitate
* support for SMP and different interrupt controllers.
+ *
+ * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
+ * PCI to vector allocation routine.
*/
#include <linux/config.h>
@@ -35,38 +38,28 @@
#define IRQ_DEBUG 0
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-spinlock_t ivr_read_lock;
-#endif
-
/* default base addr of IPI table */
unsigned long ipi_base_addr = (__IA64_UNCACHED_OFFSET | IPI_DEFAULT_BASE_ADDR);
/*
- * Legacy IRQ to IA-64 vector translation table. Any vector not in
- * this table maps to itself (ie: irq 0x30 => IA64 vector 0x30)
+ * Legacy IRQ to IA-64 vector translation table.
*/
__u8 isa_irq_to_vector_map[16] = {
/* 8259 IRQ translation, first 16 entries */
- 0x60, 0x50, 0x10, 0x51, 0x52, 0x53, 0x43, 0x54,
- 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x40, 0x41
+ 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
+ 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
};
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-
-int usbfix;
-
-static int __init
-usbfix_option (char *str)
+int
+ia64_alloc_irq (void)
{
- printk("irq: enabling USB workaround\n");
- usbfix = 1;
- return 1;
-}
+ static int next_irq = FIRST_DEVICE_IRQ;
-__setup("usbfix", usbfix_option);
-
-#endif /* CONFIG_ITANIUM_A1_SPECIFIC */
+ if (next_irq > LAST_DEVICE_IRQ)
+ /* XXX could look for sharable vectors instead of panic'ing... */
+ panic("ia64_alloc_irq: out of interrupt vectors!");
+ return next_irq++;
+}
/*
* That's where the IVT branches when we get an external
@@ -77,42 +70,6 @@ void
ia64_handle_irq (unsigned long vector, struct pt_regs *regs)
{
unsigned long saved_tpr;
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
- unsigned long eoi_ptr;
-
-# ifdef CONFIG_USB
- extern void reenable_usb (void);
- extern void disable_usb (void);
-
- if (usbfix)
- disable_usb();
-# endif
- /*
- * Stop IPIs by getting the ivr_read_lock
- */
- spin_lock(&ivr_read_lock);
- {
- unsigned int tmp;
- /*
- * Disable PCI writes
- */
- outl(0x80ff81c0, 0xcf8);
- tmp = inl(0xcfc);
- outl(tmp | 0x400, 0xcfc);
- eoi_ptr = inl(0xcfc);
- vector = ia64_get_ivr();
- /*
- * Enable PCI writes
- */
- outl(tmp, 0xcfc);
- }
- spin_unlock(&ivr_read_lock);
-
-# ifdef CONFIG_USB
- if (usbfix)
- reenable_usb();
-# endif
-#endif /* CONFIG_ITANIUM_A1_SPECIFIC */
#if IRQ_DEBUG
{
@@ -161,7 +118,10 @@ ia64_handle_irq (unsigned long vector, struct pt_regs *regs)
ia64_set_tpr(vector);
ia64_srlz_d();
- do_IRQ(vector, regs);
+ if ((irq_desc[vector].status & IRQ_PER_CPU) != 0)
+ do_IRQ_per_cpu(vector, regs);
+ else
+ do_IRQ(vector, regs);
/*
* Disable interrupts and send EOI:
@@ -169,9 +129,6 @@ ia64_handle_irq (unsigned long vector, struct pt_regs *regs)
local_irq_disable();
ia64_set_tpr(saved_tpr);
ia64_eoi();
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
- break;
-#endif
vector = ia64_get_ivr();
} while (vector != IA64_SPURIOUS_INT);
}
@@ -194,8 +151,8 @@ init_IRQ (void)
* Disable all local interrupts
*/
ia64_set_itv(0, 1);
- ia64_set_lrr0(0, 1);
- ia64_set_lrr1(0, 1);
+ ia64_set_lrr0(0, 1);
+ ia64_set_lrr1(0, 1);
irq_desc[IA64_SPURIOUS_INT].handler = &irq_type_ia64_sapic;
#ifdef CONFIG_SMP
@@ -217,14 +174,11 @@ init_IRQ (void)
}
void
-ipi_send (int cpu, int vector, int delivery_mode, int redirect)
+ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
{
unsigned long ipi_addr;
unsigned long ipi_data;
unsigned long phys_cpu_id;
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
- unsigned long flags;
-#endif
#ifdef CONFIG_SMP
phys_cpu_id = cpu_physical_id(cpu);
@@ -239,13 +193,5 @@ ipi_send (int cpu, int vector, int delivery_mode, int redirect)
ipi_data = (delivery_mode << 8) | (vector & 0xff);
ipi_addr = ipi_base_addr | (phys_cpu_id << 4) | ((redirect & 1) << 3);
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
- spin_lock_irqsave(&ivr_read_lock, flags);
-#endif
-
writeq(ipi_data, ipi_addr);
-
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
- spin_unlock_irqrestore(&ivr_read_lock, flags);
-#endif
}
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index fa0ad0993..b75cd9dbc 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -6,6 +6,7 @@
* Copyright (C) 1998-2000 David Mosberger <davidm@hpl.hp.com>
*
* 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
*/
/*
* This file defines the interrupt vector table used by the CPU.
@@ -44,23 +45,13 @@
#include <asm/system.h>
#include <asm/unistd.h>
-#define MINSTATE_START_SAVE_MIN /* no special action needed */
-#define MINSTATE_END_SAVE_MIN \
- or r2=r2,r14; /* make first base a kernel virtual address */ \
- or r12=r12,r14; /* make sp a kernel virtual address */ \
- or r13=r13,r14; /* make `current' a kernel virtual address */ \
- bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
- ;;
-
+#define MINSTATE_VIRT /* needed by minstate.h */
#include "minstate.h"
#define FAULT(n) \
- rsm psr.dt; /* avoid nested faults due to TLB misses... */ \
- ;; \
- srlz.d; /* ensure everyone knows psr.dt is off... */ \
mov r31=pr; \
mov r19=n;; /* prepare to save predicates */ \
- br.cond.sptk.many dispatch_to_fault_handler
+ br.sptk.many dispatch_to_fault_handler
/*
* As we don't (hopefully) use the space available, we need to fill it with
@@ -122,15 +113,14 @@ ia64_ivt:
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
srlz.d // ensure "rsm psr.dt" has taken effect
(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
-(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
-(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
shr.u r18=r16,PMD_SHIFT // shift L2 index into position
;;
-(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones?
ld8 r17=[r17] // fetch the L1 entry (may be 0)
;;
(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
@@ -145,7 +135,7 @@ ia64_ivt:
(p7) ld8 r18=[r21] // read the L3 PTE
mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss
;;
-(p7) tbit.z p6,p7=r18,0 // page present bit cleared?
+(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
mov r22=cr.iha // get the VHPT address that caused the TLB miss
;; // avoid RAW on p7
(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss?
@@ -153,7 +143,7 @@ ia64_ivt:
;;
(p10) itc.i r18 // insert the instruction TLB entry
(p11) itc.d r18 // insert the data TLB entry
-(p6) br.spnt.few page_fault // handle bad address/page not present (page fault)
+(p6) br.spnt.many page_fault // handle bad address/page not present (page fault)
mov cr.ifa=r22
// Now compute and insert the TLB entry for the virtual page table.
@@ -183,212 +173,117 @@ ia64_ivt:
mov pr=r31,-1 // restore predicate registers
rfi
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
/*
- * The ITLB basically does the same as the VHPT handler except
- * that we always insert exactly one instruction TLB entry.
- */
- /*
- * Attempt to lookup PTE through virtual linear page table.
- * The speculative access will fail if there is no TLB entry
- * for the L3 page table page we're trying to access.
+ * The ITLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+ * mode, walk the page table, and then re-execute the L3 PTE read
+ * and go on normally after that.
*/
+itlb_fault:
mov r16=cr.ifa // get virtual address
- mov r19=cr.iha // get virtual address of L3 PTE
- ;;
- ld8.s r17=[r19] // try to read L3 PTE
+ mov r29=b0 // save b0
mov r31=pr // save predicates
+ mov r17=cr.iha // get virtual address of L3 PTE
+ movl r30=1f // load nested fault continuation point
;;
- tnat.nz p6,p0=r17 // did read succeed?
-(p6) br.cond.spnt.many 1f
+1: ld8 r18=[r17] // read L3 PTE
;;
- itc.i r17
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.cond.spnt.many page_fault
+ ;;
+ itc.i r18
;;
#ifdef CONFIG_SMP
- ld8.s r18=[r19] // try to read L3 PTE again and see if same
+ ld8 r19=[r17] // read L3 PTE again and see if same
mov r20=PAGE_SHIFT<<2 // setup page size for purge
;;
- cmp.eq p6,p7=r17,r18
+ cmp.ne p7,p0=r18,r19
;;
(p7) ptc.l r16,r20
#endif
mov pr=r31,-1
rfi
-
-#ifdef CONFIG_DISABLE_VHPT
-itlb_fault:
-#endif
-1: rsm psr.dt // use physical addressing for data
- mov r19=ar.k7 // get page table base address
- shl r21=r16,3 // shift bit 60 into sign bit
- shr.u r17=r16,61 // get the region number into r17
;;
- cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
- shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address
- ;;
-(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
- srlz.d // ensure "rsm psr.dt" has taken effect
-(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
-(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
-(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
- ;;
-(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
-(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
- cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
- shr.u r18=r16,PMD_SHIFT // shift L2 index into position
- ;;
-(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones?
- ld8 r17=[r17] // fetch the L1 entry (may be 0)
- ;;
-(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
- ;;
-(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
- shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
- dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
- ;;
-(p7) ld8 r18=[r17] // read the L3 PTE
- ;;
-(p7) tbit.z p6,p7=r18,0 // page present bit cleared?
- ;;
-(p7) itc.i r18 // insert the instruction TLB entry
-(p6) br.spnt.few page_fault // handle bad address/page not present (page fault)
- ;;
-#ifdef CONFIG_SMP
- ld8 r19=[r17] // re-read the PTE and check if same
- ;;
- cmp.eq p6,p7=r18,r19
- mov r20=PAGE_SHIFT<<2
- ;;
-(p7) ptc.l r16,r20 // PTE changed purge translation
-#endif
-
- mov pr=r31,-1 // restore predicate registers
- rfi
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
/*
- * The DTLB basically does the same as the VHPT handler except
- * that we always insert exactly one data TLB entry.
- */
- /*
- * Attempt to lookup PTE through virtual linear page table.
- * The speculative access will fail if there is no TLB entry
- * for the L3 page table page we're trying to access.
+ * The DTLB handler accesses the L3 PTE via the virtually mapped linear
+ * page table. If a nested TLB miss occurs, we switch into physical
+ * mode, walk the page table, and then re-execute the L3 PTE read
+ * and go on normally after that.
*/
+dtlb_fault:
mov r16=cr.ifa // get virtual address
- mov r19=cr.iha // get virtual address of L3 PTE
- ;;
- ld8.s r17=[r19] // try to read L3 PTE
+ mov r29=b0 // save b0
mov r31=pr // save predicates
+ mov r17=cr.iha // get virtual address of L3 PTE
+ movl r30=1f // load nested fault continuation point
+ ;;
+1: ld8 r18=[r17] // read L3 PTE
;;
- tnat.nz p6,p0=r17 // did read succeed?
-(p6) br.cond.spnt.many 1f
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.cond.spnt.many page_fault
;;
- itc.d r17
+ itc.d r18
;;
#ifdef CONFIG_SMP
- ld8.s r18=[r19] // try to read L3 PTE again and see if same
+ ld8 r19=[r17] // read L3 PTE again and see if same
mov r20=PAGE_SHIFT<<2 // setup page size for purge
;;
- cmp.eq p6,p7=r17,r18
+ cmp.ne p7,p0=r18,r19
;;
(p7) ptc.l r16,r20
#endif
mov pr=r31,-1
rfi
-
-#ifdef CONFIG_DISABLE_VHPT
-dtlb_fault:
-#endif
-1: rsm psr.dt // use physical addressing for data
- mov r19=ar.k7 // get page table base address
- shl r21=r16,3 // shift bit 60 into sign bit
- shr.u r17=r16,61 // get the region number into r17
- ;;
- cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
- shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of the faulting address
;;
-(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
- srlz.d // ensure "rsm psr.dt" has taken effect
-(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
-(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
-(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
- ;;
-(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
-(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
- cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
- shr.u r18=r16,PMD_SHIFT // shift L2 index into position
- ;;
-(p6) cmp.eq p7,p6=-1,r21 // unused address bits all ones?
- ld8 r17=[r17] // fetch the L1 entry (may be 0)
- ;;
-(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
- dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
- ;;
-(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
- shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
- ;;
-(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
- dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
- ;;
-(p7) ld8 r18=[r17] // read the L3 PTE
- ;;
-(p7) tbit.z p6,p7=r18,0 // page present bit cleared?
- ;;
-(p7) itc.d r18 // insert the instruction TLB entry
-(p6) br.spnt.few page_fault // handle bad address/page not present (page fault)
- ;;
-#ifdef CONFIG_SMP
- ld8 r19=[r17] // re-read the PTE and check if same
- ;;
- cmp.eq p6,p7=r18,r19
- mov r20=PAGE_SHIFT<<2
- ;;
-(p7) ptc.l r16,r20 // PTE changed purge translation
-#endif
- mov pr=r31,-1 // restore predicate registers
- rfi
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
mov r16=cr.ifa // get address that caused the TLB miss
-#ifdef CONFIG_DISABLE_VHPT
+ movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX
+ mov r21=cr.ipsr
mov r31=pr
;;
- shr.u r21=r16,61 // get the region number into r21
+#ifdef CONFIG_DISABLE_VHPT
+ shr.u r22=r16,61 // get the region number into r21
;;
- cmp.gt p6,p0=6,r21 // user mode
-(p6) br.cond.dptk.many itlb_fault
+ cmp.gt p8,p0=6,r22 // user mode
;;
- mov pr=r31,-1
-#endif
- movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RX
+(p8) thash r17=r16
;;
+(p8) mov cr.iha=r17
+(p8) br.cond.dptk.many itlb_fault
+#endif
+ extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
shr.u r18=r16,57 // move address bit 61 to bit 4
- dep r16=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits
+ dep r19=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits
;;
andcm r18=0x10,r18 // bit 4=~address-bit(61)
- dep r16=r17,r16,0,12 // insert PTE control bits into r16
+ cmp.ne p8,p0=r0,r23 // psr.cpl != 0?
+ dep r19=r17,r19,0,12 // insert PTE control bits into r19
;;
- or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
+(p8) br.cond.spnt.many page_fault
;;
- itc.i r16 // insert the TLB entry
+ itc.i r19 // insert the TLB entry
+ mov pr=r31,-1
rfi
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
mov r16=cr.ifa // get address that caused the TLB miss
- movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW
+ movl r17=__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RWX
mov r20=cr.isr
mov r21=cr.ipsr
mov r31=pr
@@ -396,29 +291,40 @@ dtlb_fault:
#ifdef CONFIG_DISABLE_VHPT
shr.u r22=r16,61 // get the region number into r21
;;
- cmp.gt p8,p0=6,r22 // user mode
+ cmp.gt p8,p0=6,r22 // access to region 0-5
+ ;;
+(p8) thash r17=r16
+ ;;
+(p8) mov cr.iha=r17
(p8) br.cond.dptk.many dtlb_fault
#endif
+ extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
shr.u r18=r16,57 // move address bit 61 to bit 4
- dep r16=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits
+ dep r19=0,r16,IA64_MAX_PHYS_BITS,(64-IA64_MAX_PHYS_BITS) // clear ed & reserved bits
;;
- dep r21=-1,r21,IA64_PSR_ED_BIT,1
andcm r18=0x10,r18 // bit 4=~address-bit(61)
- dep r16=r17,r16,0,12 // insert PTE control bits into r16
+ cmp.ne p8,p0=r0,r23
+(p8) br.cond.spnt.many page_fault
+
+ dep r21=-1,r21,IA64_PSR_ED_BIT,1
+ dep r19=r17,r19,0,12 // insert PTE control bits into r19
;;
- or r16=r16,r18 // set bit 4 (uncached) if the access was to region 6
+ or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
(p6) mov cr.ipsr=r21
;;
-(p7) itc.d r16 // insert the TLB entry
+(p7) itc.d r19 // insert the TLB entry
mov pr=r31,-1
rfi
-
;;
//-----------------------------------------------------------------------------------
- // call do_page_fault (predicates are in r31, psr.dt is off, r16 is faulting address)
+ // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
page_fault:
+ ssm psr.dt
+ ;;
+ srlz.i
+ ;;
SAVE_MIN_WITH_COVER
//
// Copy control registers to temporary registers, then turn on psr bits,
@@ -430,7 +336,7 @@ page_fault:
mov r9=cr.isr
adds r3=8,r2 // set up second base pointer
;;
- ssm psr.ic | psr.dt
+ ssm psr.ic
;;
srlz.i // guarantee that interrupt collection is enabled
;;
@@ -445,36 +351,37 @@ page_fault:
mov rp=r14
;;
adds out2=16,r12 // out2 = pointer to pt_regs
- br.call.sptk.few b6=ia64_do_page_fault // ignore return address
+ br.call.sptk.many b6=ia64_do_page_fault // ignore return address
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
//
- // In the absence of kernel bugs, we get here when the Dirty-bit, Instruction
- // Access-bit, or Data Access-bit faults cause a nested fault because the
- // dTLB entry for the virtual page table isn't present. In such a case,
- // we lookup the pte for the faulting address by walking the page table
- // and return to the continuation point passed in register r30.
- // In accessing the page tables, we don't need to check for NULL entries
- // because if the page tables didn't map the faulting address, it would not
- // be possible to receive one of the above faults.
+ // In the absence of kernel bugs, we get here when the virtually mapped linear page
+ // table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction
+ // Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page
+ // table is missing, a nested TLB miss fault is triggered and control is transferred
+ // to this point. When this happens, we lookup the pte for the faulting address
+ // by walking the page table in physical mode and return to the continuation point
+ // passed in register r30 (or call page_fault if the address is not mapped).
//
// Input: r16: faulting address
// r29: saved b0
// r30: continuation address
+ // r31: saved pr
//
// Output: r17: physical address of L3 PTE of faulting address
// r29: saved b0
// r30: continuation address
+ // r31: saved pr
//
- // Clobbered: b0, r18, r19, r21, r31, psr.dt (cleared)
+ // Clobbered: b0, r18, r19, r21, psr.dt (cleared)
//
rsm psr.dt // switch to using physical data addressing
mov r19=ar.k7 // get the page table base address
shl r21=r16,3 // shift bit 60 into sign bit
;;
- mov r31=pr // save the predicate registers
shr.u r17=r16,61 // get the region number into r17
;;
cmp.eq p6,p7=5,r17 // is faulting address in region 5?
@@ -482,26 +389,30 @@ page_fault:
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
srlz.d
-(p6) movl r17=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
-(p6) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-1
-(p7) shr r21=r21,PGDIR_SHIFT+PAGE_SHIFT-4
+(p6) movl r19=__pa(SWAPPER_PGD_ADDR) // region 5 is rooted at swapper_pg_dir
+(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
+(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
-(p6) dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
+(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
+ cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
shr.u r18=r16,PMD_SHIFT // shift L2 index into position
;;
- ld8 r17=[r17] // fetch the L1 entry
+ ld8 r17=[r17] // fetch the L1 entry (may be 0)
mov b0=r30
;;
+(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
;;
- ld8 r17=[r17] // fetch the L2 entry
+(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
;;
+(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
;;
- mov pr=r31,-1 // restore predicates
- br.cond.sptk.few b0 // return to continuation point
+(p6) br.cond.spnt.many page_fault
+ br.sptk.many b0 // return to continuation point
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
@@ -526,33 +437,19 @@ page_fault:
// a nested TLB miss hit where we look up the physical address of the L3 PTE
// and then continue at label 1 below.
//
-#ifndef CONFIG_SMP
mov r16=cr.ifa // get the address that caused the fault
movl r30=1f // load continuation point in case of nested fault
;;
thash r17=r16 // compute virtual address of L3 PTE
mov r29=b0 // save b0 in case of nested fault
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- or r18=_PAGE_D,r18 // set the dirty bit
- mov b0=r29 // restore b0
- ;;
- st8 [r17]=r18 // store back updated PTE
- itc.d r18 // install updated PTE
-#else
- mov r16=cr.ifa // get the address that caused the fault
- movl r30=1f // load continuation point in case of nested fault
- ;;
- thash r17=r16 // compute virtual address of L3 PTE
+ mov r31=pr // save pr
+#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
- mov r29=b0 // save b0 in case of nested fault
- mov r27=pr
;;
1: ld8 r18=[r17]
;; // avoid RAW on r18
mov ar.ccv=r18 // set compare value for cmpxchg
- or r25=_PAGE_D,r18 // set the dirty bit
+ or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
;;
cmpxchg8.acq r26=[r17],r25,ar.ccv
mov r24=PAGE_SHIFT<<2
@@ -568,70 +465,46 @@ page_fault:
(p7) ptc.l r16,r24
mov b0=r29 // restore b0
mov ar.ccv=r28
- mov pr=r27,-1
+#else
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18 // install updated PTE
#endif
+ mov pr=r31,-1 // restore pr
rfi
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
// Like Entry 8, except for instruction access
mov r16=cr.ifa // get the address that caused the fault
+ movl r30=1f // load continuation point in case of nested fault
+ mov r31=pr // save predicates
#ifdef CONFIG_ITANIUM
/*
- * Erratum 10 (IFA may contain incorrect address) now has
- * "NoFix" status. There are no plans for fixing this.
+ * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
*/
mov r17=cr.ipsr
- mov r31=pr // save predicates
;;
mov r18=cr.iip
tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set?
;;
(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa
- mov pr=r31,-1
#endif /* CONFIG_ITANIUM */
-
-#ifndef CONFIG_SMP
- movl r30=1f // load continuation point in case of nested fault
;;
thash r17=r16 // compute virtual address of L3 PTE
mov r29=b0 // save b0 in case of nested fault)
- ;;
-1: ld8 r18=[r17]
-#if defined(CONFIG_IA32_SUPPORT) && \
- (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC))
- //
- // Erratum 85 (Access bit fault could be reported before page not present fault)
- // If the PTE is indicates the page is not present, then just turn this into a
- // page fault.
- //
- mov r31=pr // save predicates
- ;;
- tbit.nz p6,p0=r18,0 // page present bit set?
-(p6) br.cond.sptk 1f
- ;; // avoid WAW on p6
- mov pr=r31,-1
- br.cond.sptk page_fault // page wasn't present
-1: mov pr=r31,-1
-#else
- ;; // avoid RAW on r18
-#endif
- or r18=_PAGE_A,r18 // set the accessed bit
- mov b0=r29 // restore b0
- ;;
- st8 [r17]=r18 // store back updated PTE
- itc.i r18 // install updated PTE
-#else
- movl r30=1f // load continuation point in case of nested fault
- ;;
- thash r17=r16 // compute virtual address of L3 PTE
+#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
- mov r29=b0 // save b0 in case of nested fault)
- mov r27=pr
;;
1: ld8 r18=[r17]
-#if defined(CONFIG_IA32_SUPPORT) && \
+# if defined(CONFIG_IA32_SUPPORT) && \
(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC))
//
// Erratum 85 (Access bit fault could be reported before page not present fault)
@@ -639,15 +512,9 @@ page_fault:
// page fault.
//
;;
- tbit.nz p6,p0=r18,0 // page present bit set?
-(p6) br.cond.sptk 1f
- ;; // avoid WAW on p6
- mov pr=r27,-1
- br.cond.sptk page_fault // page wasn't present
-1:
-#else
- ;; // avoid RAW on r18
-#endif
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.sptk page_fault // page wasn't present
+# endif
mov ar.ccv=r18 // set compare value for cmpxchg
or r25=_PAGE_A,r18 // set the accessed bit
;;
@@ -665,36 +532,42 @@ page_fault:
(p7) ptc.l r16,r24
mov b0=r29 // restore b0
mov ar.ccv=r28
- mov pr=r27,-1
-#endif
+#else /* !CONFIG_SMP */
+ ;;
+1: ld8 r18=[r17]
+ ;;
+# if defined(CONFIG_IA32_SUPPORT) && \
+ (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC))
+ //
+ // Erratum 85 (Access bit fault could be reported before page not present fault)
+ // If the PTE is indicates the page is not present, then just turn this into a
+ // page fault.
+ //
+ tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
+(p6) br.sptk page_fault // page wasn't present
+# endif
+ or r18=_PAGE_A,r18 // set the accessed bit
+ mov b0=r29 // restore b0
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.i r18 // install updated PTE
+#endif /* !CONFIG_SMP */
+ mov pr=r31,-1
rfi
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
// Like Entry 8, except for data access
-#ifndef CONFIG_SMP
mov r16=cr.ifa // get the address that caused the fault
movl r30=1f // load continuation point in case of nested fault
;;
thash r17=r16 // compute virtual address of L3 PTE
+ mov r31=pr
mov r29=b0 // save b0 in case of nested fault)
- ;;
-1: ld8 r18=[r17]
- ;; // avoid RAW on r18
- or r18=_PAGE_A,r18 // set the accessed bit
- mov b0=r29 // restore b0
- ;;
- st8 [r17]=r18 // store back updated PTE
- itc.d r18 // install updated PTE
-#else
- mov r16=cr.ifa // get the address that caused the fault
- movl r30=1f // load continuation point in case of nested fault
- ;;
- thash r17=r16 // compute virtual address of L3 PTE
+#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
- mov r29=b0 // save b0 in case of nested fault
- mov r27=pr
;;
1: ld8 r18=[r17]
;; // avoid RAW on r18
@@ -713,11 +586,20 @@ page_fault:
cmp.eq p6,p7=r18,r25 // is it same as the newly installed
;;
(p7) ptc.l r16,r24
- mov b0=r29 // restore b0
mov ar.ccv=r28
- mov pr=r27,-1
+#else
+ ;;
+1: ld8 r18=[r17]
+ ;; // avoid RAW on r18
+ or r18=_PAGE_A,r18 // set the accessed bit
+ ;;
+ st8 [r17]=r18 // store back updated PTE
+ itc.d r18 // install updated PTE
#endif
+ mov b0=r29 // restore b0
+ mov pr=r31,-1
rfi
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
@@ -725,16 +607,14 @@ page_fault:
mov r16=cr.iim
mov r17=__IA64_BREAK_SYSCALL
mov r31=pr // prepare to save predicates
- rsm psr.dt // avoid nested faults due to TLB misses...
;;
- srlz.d // ensure everyone knows psr.dt is off...
cmp.eq p0,p7=r16,r17 // is this a system call? (p7 <- false, if so)
(p7) br.cond.spnt.many non_syscall
SAVE_MIN // uses r31; defines r2:
- // turn interrupt collection and data translation back on:
- ssm psr.ic | psr.dt
+ // turn interrupt collection back on:
+ ssm psr.ic
;;
srlz.i // guarantee that interrupt collection is enabled
cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
@@ -746,14 +626,13 @@ page_fault:
adds r3=8,r2 // set up second base pointer for SAVE_REST
;;
SAVE_REST
- ;; // avoid WAW on r2 & r3
+ br.call.sptk rp=demine_args // clear NaT bits in (potential) syscall args
mov r3=255
adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
adds r2=IA64_TASK_PTRACE_OFFSET,r13 // r2 = &current->ptrace
-
;;
- cmp.geu.unc p6,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ?
+ cmp.geu p6,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ?
movl r16=sys_call_table
;;
(p6) shladd r16=r15,3,r16
@@ -788,40 +667,61 @@ page_fault:
;;
st8 [r16]=r18 // store new value for cr.isr
-(p8) br.call.sptk.few b6=b6 // ignore this return addr
- br.call.sptk.few rp=ia64_trace_syscall // rp will be overwritten (ignored)
+(p8) br.call.sptk.many b6=b6 // ignore this return addr
+ br.call.sptk.many rp=ia64_trace_syscall // rp will be overwritten (ignored)
// NOT REACHED
+ .proc demine_args
+demine_args:
+ alloc r2=ar.pfs,8,0,0,0
+ tnat.nz p8,p0=in0
+ tnat.nz p9,p0=in1
+ ;;
+(p8) mov in0=-1
+ tnat.nz p10,p0=in2
+ tnat.nz p11,p0=in3
+
+(p9) mov in1=-1
+ tnat.nz p12,p0=in4
+ tnat.nz p13,p0=in5
+ ;;
+(p10) mov in2=-1
+ tnat.nz p14,p0=in6
+ tnat.nz p15,p0=in7
+
+(p11) mov in3=-1
+(p12) mov in4=-1
+(p13) mov in5=-1
+ ;;
+(p14) mov in6=-1
+(p15) mov in7=-1
+ br.ret.sptk.many rp
+ .endp demine_args
+
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
- rsm psr.dt // avoid nested faults due to TLB misses...
- ;;
- srlz.d // ensure everyone knows psr.dt is off...
mov r31=pr // prepare to save predicates
;;
SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
- ssm psr.ic | psr.dt // turn interrupt collection and data translation back on
+ ssm psr.ic // turn interrupt collection
;;
adds r3=8,r2 // set up second base pointer for SAVE_REST
- srlz.i // ensure everybody knows psr.ic and psr.dt are back on
+ srlz.i // ensure everybody knows psr.ic is back on
;;
SAVE_REST
;;
alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
- mov out0=r0 // defer reading of cr.ivr to handle_irq...
-#else
mov out0=cr.ivr // pass cr.ivr as first arg
-#endif
add out1=16,sp // pass pointer to pt_regs as second arg
;;
srlz.d // make sure we see the effect of cr.ivr
movl r14=ia64_leave_kernel
;;
mov rp=r14
- br.call.sptk.few b6=ia64_handle_irq
+ br.call.sptk.many b6=ia64_handle_irq
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
@@ -855,7 +755,7 @@ dispatch_illegal_op_fault:
// The "alloc" can cause a mandatory store which could lead to
// an "Alt DTLB" fault which we can handle only if psr.ic is on.
//
- ssm psr.ic | psr.dt
+ ssm psr.ic
;;
srlz.i // guarantee that interrupt collection is enabled
;;
@@ -867,7 +767,7 @@ dispatch_illegal_op_fault:
;;
SAVE_REST
;;
- br.call.sptk.few rp=ia64_illegal_op_fault
+ br.call.sptk.many rp=ia64_illegal_op_fault
.ret0: ;;
alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
mov out0=r9
@@ -881,6 +781,7 @@ dispatch_illegal_op_fault:
cmp.ne p6,p0=0,r8
(p6) br.call.dpnt b6=b6 // call returns to ia64_leave_kernel
br.sptk ia64_leave_kernel
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
@@ -900,7 +801,7 @@ dispatch_to_ia32_handler:
SAVE_MIN
;;
mov r14=cr.isr
- ssm psr.ic | psr.dt
+ ssm psr.ic
;;
srlz.i // guarantee that interrupt collection is enabled
;;
@@ -913,7 +814,7 @@ dispatch_to_ia32_handler:
shr r14=r14,16 // Get interrupt number
;;
cmp.ne p6,p0=r14,r15
-(p6) br.call.dpnt.few b6=non_ia32_syscall
+(p6) br.call.dpnt.many b6=non_ia32_syscall
adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions
adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
@@ -924,7 +825,7 @@ dispatch_to_ia32_handler:
alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
;;
ld4 r8=[r14],8 // r8 == EAX (syscall number)
- mov r15=190 // sys_vfork - last implemented system call
+ mov r15=222 // sys_vfork - last implemented system call
;;
cmp.leu.unc p6,p7=r8,r15
ld4 out1=[r14],8 // r9 == ecx
@@ -961,11 +862,12 @@ non_ia32_syscall:
mov out0=r14 // interrupt #
add out1=16,sp // pointer to pt_regs
;; // avoid WAW on CFM
- br.call.sptk.few rp=ia32_bad_interrupt
+ br.call.sptk.many rp=ia32_bad_interrupt
.ret1: movl r15=ia64_leave_kernel
;;
mov rp=r15
br.ret.sptk.many rp
+ ;;
#endif /* CONFIG_IA32_SUPPORT */
@@ -985,8 +887,8 @@ non_syscall:
mov r8=cr.iim // get break immediate (must be done while psr.ic is off)
adds r3=8,r2 // set up second base pointer for SAVE_REST
- // turn interrupt collection and data translation back on:
- ssm psr.ic | psr.dt
+ // turn interrupt collection back on:
+ ssm psr.ic
;;
srlz.i // guarantee that interrupt collection is enabled
;;
@@ -1000,7 +902,8 @@ non_syscall:
SAVE_REST
mov rp=r15
;;
- br.call.sptk.few b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
+ br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1023,7 +926,7 @@ dispatch_unaligned_handler:
// wouldn't get the state to recover.
//
mov r15=cr.ifa
- ssm psr.ic | psr.dt
+ ssm psr.ic
;;
srlz.i // guarantee that interrupt collection is enabled
;;
@@ -1039,7 +942,8 @@ dispatch_unaligned_handler:
adds out1=16,sp // out1 = pointer to pt_regs
;;
mov rp=r14
- br.sptk.few ia64_prepare_handle_unaligned
+ br.sptk.many ia64_prepare_handle_unaligned
+ ;;
.align 1024
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1055,7 +959,6 @@ dispatch_to_fault_handler:
//
// Input:
// psr.ic: off
- // psr.dt: off
// r19: fault vector number (e.g., 24 for General Exception)
// r31: contains saved predicates (pr)
//
@@ -1071,7 +974,7 @@ dispatch_to_fault_handler:
mov r10=cr.iim
mov r11=cr.itir
;;
- ssm psr.ic | psr.dt
+ ssm psr.ic
;;
srlz.i // guarantee that interrupt collection is enabled
;;
@@ -1089,7 +992,9 @@ dispatch_to_fault_handler:
movl r14=ia64_leave_kernel
;;
mov rp=r14
- br.call.sptk.few b6=ia64_fault
+ br.call.sptk.many b6=ia64_fault
+ ;;
+
//
// --- End of long entries, Beginning of short entries
//
@@ -1099,16 +1004,16 @@ dispatch_to_fault_handler:
// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
mov r16=cr.ifa
rsm psr.dt
-#if 1
- // If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h
+ // The Linux page fault handler doesn't expect non-present pages to be in
+ // the TLB. Flush the existing entry now, so we meet that expectation.
mov r17=_PAGE_SIZE_4K<<2
;;
ptc.l r16,r17
-#endif
;;
mov r31=pr
srlz.d
- br.cond.sptk.many page_fault
+ br.sptk.many page_fault
+ ;;
.align 256
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1118,7 +1023,8 @@ dispatch_to_fault_handler:
mov r31=pr
;;
srlz.d
- br.cond.sptk.many page_fault
+ br.sptk.many page_fault
+ ;;
.align 256
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1128,7 +1034,8 @@ dispatch_to_fault_handler:
mov r31=pr
;;
srlz.d
- br.cond.sptk.many page_fault
+ br.sptk.many page_fault
+ ;;
.align 256
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1138,31 +1045,32 @@ dispatch_to_fault_handler:
mov r31=pr
;;
srlz.d
- br.cond.sptk.many page_fault
+ br.sptk.many page_fault
+ ;;
.align 256
/////////////////////////////////////////////////////////////////////////////////////////
// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
mov r16=cr.isr
mov r31=pr
- rsm psr.dt // avoid nested faults due to TLB misses...
;;
- srlz.d // ensure everyone knows psr.dt is off...
cmp4.eq p6,p0=0,r16
(p6) br.sptk dispatch_illegal_op_fault
;;
mov r19=24 // fault number
- br.cond.sptk.many dispatch_to_fault_handler
+ br.sptk.many dispatch_to_fault_handler
+ ;;
.align 256
/////////////////////////////////////////////////////////////////////////////////////////
// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
- rsm psr.dt | psr.dfh // ensure we can access fph
+ rsm psr.dfh // ensure we can access fph
;;
srlz.d
mov r31=pr
mov r19=25
- br.cond.sptk.many dispatch_to_fault_handler
+ br.sptk.many dispatch_to_fault_handler
+ ;;
.align 256
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1204,6 +1112,7 @@ dispatch_to_fault_handler:
;;
rfi // and go back
+ ;;
.align 256
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1218,12 +1127,11 @@ dispatch_to_fault_handler:
.align 256
/////////////////////////////////////////////////////////////////////////////////////////
// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
- rsm psr.dt // avoid nested faults due to TLB misses...
mov r16=cr.ipsr
mov r31=pr // prepare to save predicates
;;
- srlz.d // ensure everyone knows psr.dt is off
- br.cond.sptk.many dispatch_unaligned_handler
+ br.sptk.many dispatch_unaligned_handler
+ ;;
.align 256
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1304,9 +1212,6 @@ dispatch_to_fault_handler:
/////////////////////////////////////////////////////////////////////////////////////////
// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
#ifdef CONFIG_IA32_SUPPORT
- rsm psr.dt
- ;;
- srlz.d
mov r31=pr
mov r16=cr.isr
;;
@@ -1325,7 +1230,7 @@ dispatch_to_fault_handler:
;;
mov pr=r31,-1 // restore predicate registers
rfi
-
+ ;;
1:
#endif // CONFIG_IA32_SUPPORT
FAULT(46)
@@ -1334,11 +1239,9 @@ dispatch_to_fault_handler:
/////////////////////////////////////////////////////////////////////////////////////////
// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
#ifdef CONFIG_IA32_SUPPORT
- rsm psr.dt
- ;;
- srlz.d
mov r31=pr
- br.cond.sptk.many dispatch_to_ia32_handler
+ br.sptk.many dispatch_to_ia32_handler
+ ;;
#else
FAULT(47)
#endif
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 2afb5613e..df19a8d6f 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -1,11 +1,13 @@
#include <linux/config.h>
+
+#ifdef CONFIG_IA64_GENERIC
+
#include <linux/kernel.h>
+#include <linux/string.h>
#include <asm/page.h>
#include <asm/machvec.h>
-#ifdef CONFIG_IA64_GENERIC
-
struct ia64_machine_vector ia64_mv;
/*
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 333258d35..1456b8d96 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -19,6 +19,7 @@
#include <linux/irq.h>
#include <linux/smp_lock.h>
+#include <asm/machvec.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/system.h>
@@ -26,7 +27,6 @@
#include <asm/mca.h>
#include <asm/irq.h>
-#include <asm/machvec.h>
typedef struct ia64_fptr {
@@ -365,7 +365,7 @@ ia64_mca_wakeup_ipi_wait(void)
void
ia64_mca_wakeup(int cpu)
{
- ipi_send(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT, 0);
+ platform_send_ipi(cpu, IA64_MCA_WAKEUP_INT_VECTOR, IA64_IPI_DM_INT, 0);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
}
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 15993525d..b148d8b9c 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -3,11 +3,11 @@
//
// Mods by cfleck to integrate into kernel build
// 00/03/15 davidm Added various stop bits to get a clean compile
-// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp kstack,
-// switch modes, jump to C INIT handler
+//
+// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp
+// kstack, switch modes, jump to C INIT handler
//
#include <linux/config.h>
-
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mca_asm.h>
@@ -17,14 +17,7 @@
* When we get an machine check, the kernel stack pointer is no longer
* valid, so we need to set a new stack pointer.
*/
-#define MINSTATE_START_SAVE_MIN \
-(pKern) movl sp=ia64_init_stack+IA64_STK_OFFSET-IA64_PT_REGS_SIZE; \
- ;;
-
-#define MINSTATE_END_SAVE_MIN \
- or r12=r12,r14; /* make sp a kernel virtual address */ \
- or r13=r13,r14; /* make `current' a kernel virtual address */ \
- ;;
+#define MINSTATE_PHYS /* Make sure stack access is physical for MINSTATE */
#include "minstate.h"
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index 8790d49c3..2ea6f1791 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -20,6 +20,72 @@
#define rR1 r20
/*
+ * Here start the source dependent macros.
+ */
+
+/*
+ * For ivt.s we want to access the stack virtually so we dont have to disable translation
+ * on interrupts.
+ */
+#define MINSTATE_START_SAVE_MIN_VIRT \
+ dep r1=-1,r1,61,3; /* r1 = current (virtual) */ \
+(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
+ ;; \
+(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
+(p7) mov rARRNAT=ar.rnat; \
+(pKern) mov r1=sp; /* get sp */ \
+ ;; \
+(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \
+ ;; \
+(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
+(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \
+ ;; \
+(p7) mov r18=ar.bsp; \
+(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
+
+#define MINSTATE_END_SAVE_MIN_VIRT \
+ or r13=r13,r14; /* make `current' a kernel virtual address */ \
+ bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
+ ;;
+
+/*
+ * For mca_asm.S we want to access the stack physically since the state is saved before we
+ * go virtual and dont want to destroy the iip or ipsr.
+ */
+#define MINSTATE_START_SAVE_MIN_PHYS \
+(pKern) movl sp=ia64_init_stack+IA64_STK_OFFSET-IA64_PT_REGS_SIZE; \
+(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
+(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
+ ;; \
+(p7) mov rARRNAT=ar.rnat; \
+(pKern) dep r1=0,sp,61,3; /* compute physical addr of sp */ \
+(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \
+(p7) dep rKRBS=-1,rKRBS,61,3; /* compute kernel virtual addr of RBS */\
+ ;; \
+(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
+(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \
+ ;; \
+(p7) mov r18=ar.bsp; \
+(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
+
+#define MINSTATE_END_SAVE_MIN_PHYS \
+ or r12=r12,r14; /* make sp a kernel virtual address */ \
+ or r13=r13,r14; /* make `current' a kernel virtual address */ \
+ ;;
+
+#ifdef MINSTATE_VIRT
+# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT
+# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT
+#endif
+
+#ifdef MINSTATE_PHYS
+# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS
+# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS
+#endif
+
+/*
* DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
* the minimum state necessary that allows us to turn psr.ic back
* on.
@@ -31,7 +97,6 @@
*
* Upon exit, the state is as follows:
* psr.ic: off
- * psr.dt: off
* r2 = points to &pt_regs.r16
* r12 = kernel sp (kernel virtual address)
* r13 = points to current task_struct (kernel virtual address)
@@ -50,7 +115,7 @@
mov rCRIPSR=cr.ipsr; \
mov rB6=b6; /* rB6 = branch reg 6 */ \
mov rCRIIP=cr.iip; \
- mov r1=ar.k6; /* r1 = current */ \
+ mov r1=ar.k6; /* r1 = current (physical) */ \
;; \
invala; \
extr.u r16=rCRIPSR,32,2; /* extract psr.cpl */ \
@@ -58,25 +123,11 @@
cmp.eq pKern,p7=r0,r16; /* are we in kernel mode already? (psr.cpl==0) */ \
/* switch from user to kernel RBS: */ \
COVER; \
- ;; \
+ ;; \
MINSTATE_START_SAVE_MIN \
-(p7) mov ar.rsc=r0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
-(p7) addl rKRBS=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
- ;; \
-(p7) mov rARRNAT=ar.rnat; \
-(pKern) dep r1=0,sp,61,3; /* compute physical addr of sp */ \
-(p7) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
-(p7) mov rARBSPSTORE=ar.bspstore; /* save ar.bspstore */ \
-(p7) dep rKRBS=-1,rKRBS,61,3; /* compute kernel virtual addr of RBS */ \
- ;; \
-(pKern) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
-(p7) mov ar.bspstore=rKRBS; /* switch to kernel RBS */ \
- ;; \
-(p7) mov r18=ar.bsp; \
-(p7) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
- \
- mov r16=r1; /* initialize first base pointer */ \
- adds r17=8,r1; /* initialize second base pointer */ \
+ ;; \
+ mov r16=r1; /* initialize first base pointer */ \
+ adds r17=8,r1; /* initialize second base pointer */ \
;; \
st8 [r16]=rCRIPSR,16; /* save cr.ipsr */ \
st8 [r17]=rCRIIP,16; /* save cr.iip */ \
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
index 2e56a428e..fc14cc31c 100644
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@@ -52,10 +52,9 @@ END(ia64_pal_default_handler)
/*
* Make a PAL call using the static calling convention.
*
- * in0 Pointer to struct ia64_pal_retval
- * in1 Index of PAL service
- * in2 - in4 Remaining PAL arguments
- * in5 1 ==> clear psr.ic, 0 ==> don't clear psr.ic
+ * in0 Index of PAL service
+ * in1 - in3 Remaining PAL arguments
+ * in4 1 ==> clear psr.ic, 0 ==> don't clear psr.ic
*
*/
GLOBAL_ENTRY(ia64_pal_call_static)
@@ -69,7 +68,7 @@ GLOBAL_ENTRY(ia64_pal_call_static)
}
;;
ld8 loc2 = [loc2] // loc2 <- entry point
- tbit.nz p6,p7 = in5, 0
+ tbit.nz p6,p7 = in4, 0
adds r8 = 1f-1b,r8
;;
mov loc3 = psr
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
deleted file mode 100644
index 6293cdfa0..000000000
--- a/arch/ia64/kernel/pci-dma.c
+++ /dev/null
@@ -1,517 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- *
- * This implementation is for IA-64 platforms that do not support
- * I/O TLBs (aka DMA address translation hardware).
- * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
- */
-
-#include <linux/config.h>
-
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include <asm/io.h>
-#include <asm/pci.h>
-#include <asm/dma.h>
-
-#ifdef CONFIG_SWIOTLB
-
-#include <linux/init.h>
-#include <linux/bootmem.h>
-
-#define ALIGN(val, align) ((unsigned long) (((unsigned long) (val) + ((align) - 1)) & ~((align) - 1)))
-
-/*
- * log of the size of each IO TLB slab. The number of slabs is command line
- * controllable.
- */
-#define IO_TLB_SHIFT 11
-
-/*
- * Used to do a quick range check in pci_unmap_single and pci_sync_single, to see if the
- * memory was in fact allocated by this API.
- */
-static char *io_tlb_start, *io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and io_tlb_end.
- * This is command line adjustable via setup_io_tlb_npages.
- */
-unsigned long io_tlb_nslabs = 1024;
-
-/*
- * This is a free list describing the number of free entries available from each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
-
-/*
- * We need to save away the original address corresponding to a mapped entry for the sync
- * operations.
- */
-static unsigned char **io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-spinlock_t io_tlb_lock = SPIN_LOCK_UNLOCKED;
-
-static int __init
-setup_io_tlb_npages (char *str)
-{
- io_tlb_nslabs = simple_strtoul(str, NULL, 0) << (PAGE_SHIFT - IO_TLB_SHIFT);
- return 1;
-}
-__setup("swiotlb=", setup_io_tlb_npages);
-
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer
- * data structures for the software IO TLB used to implement the PCI DMA API
- */
-void
-setup_swiotlb (void)
-{
- int i;
-
- /*
- * Get IO TLB memory from the low pages
- */
- io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
- if (!io_tlb_start)
- BUG();
- io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
-
- /*
- * Allocate and initialize the free list array. This array is used
- * to find contiguous free memory regions of size 2^IO_TLB_SHIFT between
- * io_tlb_start and io_tlb_end.
- */
- io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
- for (i = 0; i < io_tlb_nslabs; i++)
- io_tlb_list[i] = io_tlb_nslabs - i;
- io_tlb_index = 0;
- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
-
- printk("Placing software IO TLB between 0x%p - 0x%p\n",
- (void *) io_tlb_start, (void *) io_tlb_end);
-}
-
-/*
- * Allocates bounce buffer and returns its kernel virtual address.
- */
-static void *
-__pci_map_single (struct pci_dev *hwdev, char *buffer, size_t size, int direction)
-{
- unsigned long flags;
- char *dma_addr;
- unsigned int i, nslots, stride, index, wrap;
-
- /*
- * For mappings greater than a page size, we limit the stride (and hence alignment)
- * to a page size.
- */
- nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- if (size > (1 << PAGE_SHIFT))
- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
- else
- stride = nslots;
-
- if (!nslots)
- BUG();
-
- /*
- * Find suitable number of IO TLB entries size that will fit this request and allocate a buffer
- * from that IO TLB pool.
- */
- spin_lock_irqsave(&io_tlb_lock, flags);
- {
- wrap = index = ALIGN(io_tlb_index, stride);
- do {
- /*
- * If we find a slot that indicates we have 'nslots' number of
- * contiguous buffers, we allocate the buffers from that slot and mark the
- * entries as '0' indicating unavailable.
- */
- if (io_tlb_list[index] >= nslots) {
- for (i = index; i < index + nslots; i++)
- io_tlb_list[i] = 0;
- dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
- /*
- * Update the indices to avoid searching in the next round.
- */
- io_tlb_index = (index + nslots) < io_tlb_nslabs ? (index + nslots) : 0;
-
- goto found;
- }
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
- } while (index != wrap);
-
- /*
- * XXX What is a suitable recovery mechanism here? We cannot
- * sleep because we are called from with in interrupts!
- */
- panic("__pci_map_single: could not allocate software IO TLB (%ld bytes)", size);
-found:
- }
- spin_unlock_irqrestore(&io_tlb_lock, flags);
-
- /*
- * Save away the mapping from the original address to the DMA address. This is needed
- * when we sync the memory. Then we sync the buffer if needed.
- */
- io_tlb_orig_addr[index] = buffer;
- if (direction == PCI_DMA_TODEVICE || direction == PCI_DMA_BIDIRECTIONAL)
- memcpy(dma_addr, buffer, size);
-
- return dma_addr;
-}
-
-/*
- * dma_addr is the kernel virtual address of the bounce buffer to unmap.
- */
-static void
-__pci_unmap_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
-{
- unsigned long flags;
- int i, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
-
- /*
- * First, sync the memory before unmapping the entry
- */
- if ((direction == PCI_DMA_FROMDEVICE) || (direction == PCI_DMA_BIDIRECTIONAL))
- /*
- * bounce... copy the data back into the original buffer
- * and delete the bounce buffer.
- */
- memcpy(buffer, dma_addr, size);
-
- /*
- * Return the buffer to the free list by setting the corresponding entries to indicate
- * the number of contigous entries available.
- * While returning the entries to the free list, we merge the entries with slots below
- * and above the pool being returned.
- */
- spin_lock_irqsave(&io_tlb_lock, flags);
- {
- int count = ((index + nslots) < io_tlb_nslabs ? io_tlb_list[index + nslots] : 0);
- /*
- * Step 1: return the slots to the free list, merging the slots with superceeding slots
- */
- for (i = index + nslots - 1; i >= index; i--)
- io_tlb_list[i] = ++count;
- /*
- * Step 2: merge the returned slots with the preceeding slots, if available (non zero)
- */
- for (i = index - 1; (i >= 0) && io_tlb_list[i]; i--)
- io_tlb_list[i] += io_tlb_list[index];
- }
- spin_unlock_irqrestore(&io_tlb_lock, flags);
-}
-
-static void
-__pci_sync_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
-{
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
- char *buffer = io_tlb_orig_addr[index];
-
- /*
- * bounce... copy the data back into/from the original buffer
- * XXX How do you handle PCI_DMA_BIDIRECTIONAL here ?
- */
- if (direction == PCI_DMA_FROMDEVICE)
- memcpy(buffer, dma_addr, size);
- else if (direction == PCI_DMA_TODEVICE)
- memcpy(dma_addr, buffer, size);
- else
- BUG();
-}
-
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode.
- * The PCI address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
- */
-dma_addr_t
-pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
-{
- unsigned long pci_addr = virt_to_phys(ptr);
-
- if (direction == PCI_DMA_NONE)
- BUG();
- /*
- * Check if the PCI device can DMA to ptr... if so, just return ptr
- */
- if ((pci_addr & ~hwdev->dma_mask) == 0)
- /*
- * Device is bit capable of DMA'ing to the
- * buffer... just return the PCI address of ptr
- */
- return pci_addr;
-
- /*
- * get a bounce buffer:
- */
- pci_addr = virt_to_phys(__pci_map_single(hwdev, ptr, size, direction));
-
- /*
- * Ensure that the address returned is DMA'ble:
- */
- if ((pci_addr & ~hwdev->dma_mask) != 0)
- panic("__pci_map_single: bounce buffer is not DMA'ble");
-
- return pci_addr;
-}
-
-/*
- * Unmap a single streaming mode DMA translation. The dma_addr and size
- * must match what was provided for in a previous pci_map_single call. All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guarenteed to see
- * whatever the device wrote there.
- */
-void
-pci_unmap_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
-{
- char *dma_addr = phys_to_virt(pci_addr);
-
- if (direction == PCI_DMA_NONE)
- BUG();
- if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
- __pci_unmap_single(hwdev, dma_addr, size, direction);
-}
-
-/*
- * Make physical memory consistent for a single
- * streaming mode DMA translation after a transfer.
- *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so. At the
- * next point you give the PCI dma address back to the card, the
- * device again owns the buffer.
- */
-void
-pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
-{
- char *dma_addr = phys_to_virt(pci_addr);
-
- if (direction == PCI_DMA_NONE)
- BUG();
- if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
- __pci_sync_single(hwdev, dma_addr, size, direction);
-}
-
-/*
- * Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scather-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-int
-pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
-{
- int i;
-
- if (direction == PCI_DMA_NONE)
- BUG();
-
- for (i = 0; i < nelems; i++, sg++) {
- sg->orig_address = sg->address;
- if ((virt_to_phys(sg->address) & ~hwdev->dma_mask) != 0) {
- sg->address = __pci_map_single(hwdev, sg->address, sg->length, direction);
- }
- }
- return nelems;
-}
-
-/*
- * Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-void
-pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
-{
- int i;
-
- if (direction == PCI_DMA_NONE)
- BUG();
-
- for (i = 0; i < nelems; i++, sg++)
- if (sg->orig_address != sg->address) {
- __pci_unmap_single(hwdev, sg->address, sg->length, direction);
- sg->address = sg->orig_address;
- }
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA
- * translations after a transfer.
- *
- * The same as pci_dma_sync_single but for a scatter-gather list,
- * same rules and usage.
- */
-void
-pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
-{
- int i;
-
- if (direction == PCI_DMA_NONE)
- BUG();
-
- for (i = 0; i < nelems; i++, sg++)
- if (sg->orig_address != sg->address)
- __pci_sync_single(hwdev, sg->address, sg->length, direction);
-}
-
-#else
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode.
- * The 32-bit bus address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
- */
-dma_addr_t
-pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- BUG();
- return virt_to_bus(ptr);
-}
-
-/*
- * Unmap a single streaming mode DMA translation. The dma_addr and size
- * must match what was provided for in a previous pci_map_single call. All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guarenteed to see
- * whatever the device wrote there.
- */
-void
-pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- BUG();
- /* Nothing to do */
-}
-/*
- * Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scather-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-int
-pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
-{
- if (direction == PCI_DMA_NONE)
- BUG();
- return nents;
-}
-
-/*
- * Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-void
-pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
-{
- if (direction == PCI_DMA_NONE)
- BUG();
- /* Nothing to do */
-}
-/*
- * Make physical memory consistent for a single
- * streaming mode DMA translation after a transfer.
- *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so. At the
- * next point you give the PCI dma address back to the card, the
- * device again owns the buffer.
- */
-void
-pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
-{
- if (direction == PCI_DMA_NONE)
- BUG();
- /* Nothing to do */
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA
- * translations after a transfer.
- *
- * The same as pci_dma_sync_single but for a scatter-gather list,
- * same rules and usage.
- */
-void
-pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
-{
- if (direction == PCI_DMA_NONE)
- BUG();
- /* Nothing to do */
-}
-
-#endif /* CONFIG_SWIOTLB */
-
-void *
-pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
-{
- unsigned long pci_addr;
- int gfp = GFP_ATOMIC;
- void *ret;
-
- if (!hwdev || hwdev->dma_mask <= 0xffffffff)
- gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */
- ret = (void *)__get_free_pages(gfp, get_order(size));
- if (!ret)
- return NULL;
-
- memset(ret, 0, size);
- pci_addr = virt_to_phys(ret);
- if ((pci_addr & ~hwdev->dma_mask) != 0)
- panic("pci_alloc_consistent: allocated memory is out of range for PCI device");
- *dma_handle = pci_addr;
- return ret;
-}
-
-void
-pci_free_consistent (struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
-{
- free_pages((unsigned long) vaddr, get_order(size));
-}
diff --git a/arch/ia64/kernel/pci.c b/arch/ia64/kernel/pci.c
index 2d814b443..37dbf811a 100644
--- a/arch/ia64/kernel/pci.c
+++ b/arch/ia64/kernel/pci.c
@@ -1,10 +1,8 @@
/*
- * pci.c - Low-Level PCI Access in IA64
+ * pci.c - Low-Level PCI Access in IA-64
*
* Derived from bios32.c of i386 tree.
- *
*/
-
#include <linux/config.h>
#include <linux/types.h>
@@ -44,19 +42,16 @@
* This interrupt-safe spinlock protects all accesses to PCI
* configuration space.
*/
-
spinlock_t pci_lock = SPIN_LOCK_UNLOCKED;
-struct pci_fixup pcibios_fixups[] = { { 0 } };
-
-#define PCI_NO_CHECKS 0x400
-#define PCI_NO_PEER_FIXUP 0x800
-
-static unsigned int pci_probe = PCI_NO_CHECKS;
+struct pci_fixup pcibios_fixups[] = {
+ { 0 }
+};
/* Macro to build a PCI configuration address to be passed as a parameter to SAL. */
-#define PCI_CONFIG_ADDRESS(dev, where) (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff))
+#define PCI_CONFIG_ADDRESS(dev, where) \
+ (((u64) dev->bus->number << 16) | ((u64) (dev->devfn & 0xff) << 8) | (where & 0xff))
static int
pci_conf_read_config_byte(struct pci_dev *dev, int where, u8 *value)
@@ -109,8 +104,7 @@ pci_conf_write_config_dword (struct pci_dev *dev, int where, u32 value)
return ia64_sal_pci_config_write(PCI_CONFIG_ADDRESS(dev, where), 4, value);
}
-
-static struct pci_ops pci_conf = {
+struct pci_ops pci_conf = {
pci_conf_read_config_byte,
pci_conf_read_config_word,
pci_conf_read_config_dword,
@@ -120,36 +114,21 @@ static struct pci_ops pci_conf = {
};
/*
- * Try to find PCI BIOS. This will always work for IA64.
- */
-
-static struct pci_ops * __init
-pci_find_bios(void)
-{
- return &pci_conf;
-}
-
-/*
* Initialization. Uses the SAL interface
*/
-
-#define PCI_BUSES_TO_SCAN 255
-
void __init
-pcibios_init(void)
+pcibios_init (void)
{
- struct pci_ops *ops = NULL;
+# define PCI_BUSES_TO_SCAN 255
int i;
- if ((ops = pci_find_bios()) == NULL) {
- printk("PCI: No PCI bus detected\n");
- return;
- }
+ platform_pci_fixup(0); /* phase 0 initialization (before PCI bus has been scanned) */
printk("PCI: Probing PCI hardware\n");
for (i = 0; i < PCI_BUSES_TO_SCAN; i++)
- pci_scan_bus(i, ops, NULL);
- platform_pci_fixup();
+ pci_scan_bus(i, &pci_conf, NULL);
+
+ platform_pci_fixup(1); /* phase 1 initialization (after PCI bus has been scanned) */
return;
}
@@ -157,16 +136,15 @@ pcibios_init(void)
* Called after each bus is probed, but before its children
* are examined.
*/
-
void __init
-pcibios_fixup_bus(struct pci_bus *b)
+pcibios_fixup_bus (struct pci_bus *b)
{
return;
}
void __init
-pcibios_update_resource(struct pci_dev *dev, struct resource *root,
- struct resource *res, int resource)
+pcibios_update_resource (struct pci_dev *dev, struct resource *root,
+ struct resource *res, int resource)
{
unsigned long where, size;
u32 reg;
@@ -181,7 +159,7 @@ pcibios_update_resource(struct pci_dev *dev, struct resource *root,
}
void __init
-pcibios_update_irq(struct pci_dev *dev, int irq)
+pcibios_update_irq (struct pci_dev *dev, int irq)
{
pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
@@ -204,18 +182,16 @@ pcibios_enable_device (struct pci_dev *dev)
return 0;
}
+void
+pcibios_align_resource (void *data, struct resource *res, unsigned long size)
+{
+}
+
/*
* PCI BIOS setup, always defaults to SAL interface
*/
-
char * __init
-pcibios_setup(char *str)
+pcibios_setup (char *str)
{
- pci_probe = PCI_NO_CHECKS;
return NULL;
}
-
-void
-pcibios_align_resource (void *data, struct resource *res, unsigned long size)
-{
-}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index e5efbc8b5..4c7ba4295 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -4,18 +4,20 @@
*
* Originaly Written by Ganesh Venkitachalam, IBM Corp.
* Modifications by David Mosberger-Tang, Hewlett-Packard Co.
+ * Modifications by Stephane Eranian, Hewlett-Packard Co.
* Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
* Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
*/
#include <linux/config.h>
+
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/smp_lock.h>
#include <linux/proc_fs.h>
-#include <linux/ptrace.h>
#include <asm/errno.h>
#include <asm/hw_irq.h>
@@ -58,19 +60,51 @@
#define MAX_PERF_COUNTER 4 /* true for Itanium, at least */
#define PMU_FIRST_COUNTER 4 /* first generic counter */
-#define WRITE_PMCS_AND_START 0xa0
-#define WRITE_PMCS 0xa1
-#define READ_PMDS 0xa2
-#define STOP_PMCS 0xa3
+#define PFM_WRITE_PMCS 0xa0
+#define PFM_WRITE_PMDS 0xa1
+#define PFM_READ_PMDS 0xa2
+#define PFM_STOP 0xa3
+#define PFM_START 0xa4
+#define PFM_ENABLE 0xa5 /* unfreeze only */
+#define PFM_DISABLE 0xa6 /* freeze only */
+/*
+ * Those 2 are just meant for debugging. I considered using sysctl() for
+ * that but it is a little bit too pervasive. This solution is at least
+ * self-contained.
+ */
+#define PFM_DEBUG_ON 0xe0
+#define PFM_DEBUG_OFF 0xe1
+
+#ifdef CONFIG_SMP
+#define cpu_is_online(i) (cpu_online_map & (1UL << i))
+#else
+#define cpu_is_online(i) 1
+#endif
+#define PMC_IS_IMPL(i) (pmu_conf.impl_regs[i>>6] & (1<< (i&~(64-1))))
+#define PMD_IS_IMPL(i) (pmu_conf.impl_regs[4+(i>>6)] & (1<< (i&~(64-1))))
+#define PMD_IS_COUNTER(i) (i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters))
+#define PMC_IS_COUNTER(i) (i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters))
/*
* this structure needs to be enhanced
*/
typedef struct {
+ unsigned long pfr_reg_num; /* which register */
+ unsigned long pfr_reg_value; /* configuration (PMC) or initial value (PMD) */
+ unsigned long pfr_reg_reset; /* reset value on overflow (PMD) */
+ void *pfr_smpl_buf; /* pointer to user buffer for EAR/BTB */
+ unsigned long pfr_smpl_size; /* size of user buffer for EAR/BTB */
+ pid_t pfr_notify_pid; /* process to notify */
+ int pfr_notify_sig; /* signal for notification, 0=no notification */
+} perfmon_req_t;
+
+#if 0
+typedef struct {
unsigned long pmu_reg_data; /* generic PMD register */
unsigned long pmu_reg_num; /* which register number */
} perfmon_reg_t;
+#endif
/*
* This structure is initialize at boot time and contains
@@ -78,86 +112,141 @@ typedef struct {
* by PAL
*/
typedef struct {
- unsigned long perf_ovfl_val; /* overflow value for generic counters */
- unsigned long max_pmc; /* highest PMC */
- unsigned long max_pmd; /* highest PMD */
- unsigned long max_counters; /* number of generic counter pairs (PMC/PMD) */
+ unsigned long perf_ovfl_val; /* overflow value for generic counters */
+ unsigned long max_counters; /* upper limit on counter pair (PMC/PMD) */
+ unsigned long impl_regs[16]; /* buffer used to hold implememted PMC/PMD mask */
} pmu_config_t;
-/* XXX will go static when ptrace() is cleaned */
-unsigned long perf_ovfl_val; /* overflow value for generic counters */
-
static pmu_config_t pmu_conf;
+/* for debug only */
+static unsigned long pfm_debug=1; /* 0= nodebug, >0= debug output on */
+#define DBprintk(a) {\
+ if (pfm_debug >0) { printk a; } \
+}
+
/*
- * could optimize to avoid cache conflicts in SMP
+ * could optimize to avoid cache line conflicts in SMP
*/
-unsigned long pmds[NR_CPUS][MAX_PERF_COUNTER];
+static struct task_struct *pmu_owners[NR_CPUS];
-asmlinkage unsigned long
-sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6, long arg7, long arg8, long stack)
+static int
+do_perfmonctl (struct task_struct *task, int cmd, int flags, perfmon_req_t *req, int count, struct pt_regs *regs)
{
- struct pt_regs *regs = (struct pt_regs *) &stack;
- perfmon_reg_t tmp, *cptr = ptr;
- unsigned long cnum;
+ perfmon_req_t tmp;
int i;
switch (cmd) {
- case WRITE_PMCS: /* Writes to PMC's and clears PMDs */
- case WRITE_PMCS_AND_START: /* Also starts counting */
+ case PFM_WRITE_PMCS:
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
+
+ if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
+
+ for (i = 0; i < count; i++, req++) {
+ copy_from_user(&tmp, req, sizeof(tmp));
+
+ /* XXX needs to check validity of the data maybe */
+
+ if (!PMC_IS_IMPL(tmp.pfr_reg_num)) {
+ DBprintk((__FUNCTION__ " invalid pmc[%ld]\n", tmp.pfr_reg_num));
+ return -EINVAL;
+ }
+
+ /* XXX: for counters, need to some checks */
+ if (PMC_IS_COUNTER(tmp.pfr_reg_num)) {
+ current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].sig = tmp.pfr_notify_sig;
+ current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].pid = tmp.pfr_notify_pid;
+
+ DBprintk((__FUNCTION__" setting PMC[%ld] send sig %d to %d\n",tmp.pfr_reg_num, tmp.pfr_notify_sig, tmp.pfr_notify_pid));
+ }
+ ia64_set_pmc(tmp.pfr_reg_num, tmp.pfr_reg_value);
+
+ DBprintk((__FUNCTION__" setting PMC[%ld]=0x%lx\n", tmp.pfr_reg_num, tmp.pfr_reg_value));
+ }
+ /*
+ * we have to set this here event hough we haven't necessarily started monitoring
+ * because we may be context switched out
+ */
+ current->thread.flags |= IA64_THREAD_PM_VALID;
+ break;
+
+ case PFM_WRITE_PMDS:
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
+
+ if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
+
+ for (i = 0; i < count; i++, req++) {
+ copy_from_user(&tmp, req, sizeof(tmp));
+
+ if (!PMD_IS_IMPL(tmp.pfr_reg_num)) return -EINVAL;
+
+ /* update virtualized (64bits) counter */
+ if (PMD_IS_COUNTER(tmp.pfr_reg_num)) {
+ current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].val = tmp.pfr_reg_value & ~pmu_conf.perf_ovfl_val;
+ current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].rval = tmp.pfr_reg_reset;
+ }
+ /* writes to unimplemented part is ignored, so this is safe */
+ ia64_set_pmd(tmp.pfr_reg_num, tmp.pfr_reg_value);
+ /* to go away */
+ ia64_srlz_d();
+ DBprintk((__FUNCTION__" setting PMD[%ld]: pmod.val=0x%lx pmd=0x%lx rval=0x%lx\n", tmp.pfr_reg_num, current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].val, ia64_get_pmd(tmp.pfr_reg_num),current->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].rval));
+ }
+ /*
+ * we have to set this here event hough we haven't necessarily started monitoring
+ * because we may be context switched out
+ */
+ current->thread.flags |= IA64_THREAD_PM_VALID;
+ break;
+
+ case PFM_START:
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
+
+ pmu_owners[smp_processor_id()] = current;
- if (!access_ok(VERIFY_READ, cptr, sizeof(struct perfmon_reg_t)*count))
- return -EFAULT;
-
- for (i = 0; i < count; i++, cptr++) {
-
- copy_from_user(&tmp, cptr, sizeof(tmp));
-
- /* XXX need to check validity of pmu_reg_num and perhaps data!! */
-
- if (tmp.pmu_reg_num > pmu_conf.max_pmc || tmp.pmu_reg_num == 0) return -EFAULT;
+ /* will start monitoring right after rfi */
+ ia64_psr(regs)->up = 1;
- ia64_set_pmc(tmp.pmu_reg_num, tmp.pmu_reg_data);
+ /*
+ * mark the state as valid.
+ * this will trigger save/restore at context switch
+ */
+ current->thread.flags |= IA64_THREAD_PM_VALID;
- /* to go away */
- if (tmp.pmu_reg_num >= PMU_FIRST_COUNTER && tmp.pmu_reg_num < PMU_FIRST_COUNTER+pmu_conf.max_counters) {
- ia64_set_pmd(tmp.pmu_reg_num, 0);
- pmds[smp_processor_id()][tmp.pmu_reg_num - PMU_FIRST_COUNTER] = 0;
+ ia64_set_pmc(0, 0);
- printk(__FUNCTION__" setting PMC/PMD[%ld] es=0x%lx pmd[%ld]=%lx\n", tmp.pmu_reg_num, (tmp.pmu_reg_data>>8) & 0x7f, tmp.pmu_reg_num, ia64_get_pmd(tmp.pmu_reg_num));
- } else
- printk(__FUNCTION__" setting PMC[%ld]=0x%lx\n", tmp.pmu_reg_num, tmp.pmu_reg_data);
- }
+ break;
- if (cmd == WRITE_PMCS_AND_START) {
-#if 0
-/* irrelevant with user monitors */
- local_irq_save(flags);
+ case PFM_ENABLE:
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
- dcr = ia64_get_dcr();
- dcr |= IA64_DCR_PP;
- ia64_set_dcr(dcr);
+ pmu_owners[smp_processor_id()] = current;
- local_irq_restore(flags);
-#endif
+ /*
+ * mark the state as valid.
+ * this will trigger save/restore at context switch
+ */
+ current->thread.flags |= IA64_THREAD_PM_VALID;
+ /* simply unfreeze */
ia64_set_pmc(0, 0);
+ break;
- /* will start monitoring right after rfi */
- ia64_psr(regs)->up = 1;
- }
- /*
- * mark the state as valid.
- * this will trigger save/restore at context switch
- */
- current->thread.flags |= IA64_THREAD_PM_VALID;
- break;
+ case PFM_DISABLE:
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
+
+ /* simply unfreeze */
+ ia64_set_pmc(0, 1);
+ ia64_srlz_d();
+ break;
- case READ_PMDS:
- if (count <= 0 || count > MAX_PERF_COUNTER)
- return -EINVAL;
- if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perfmon_reg_t)*count))
- return -EFAULT;
+ case PFM_READ_PMDS:
+ if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
+ if (!access_ok(VERIFY_WRITE, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
/* This looks shady, but IMHO this will work fine. This is
* the sequence that I could come up with to avoid races
@@ -187,16 +276,31 @@ sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6,
* is the irq_save/restore needed?
*/
+ for (i = 0; i < count; i++, req++) {
+ unsigned long val=0;
- /* XXX: This needs to change to read more than just the counters */
- for (i = 0, cnum = PMU_FIRST_COUNTER;i < count; i++, cnum++, cptr++) {
+ copy_from_user(&tmp, req, sizeof(tmp));
- tmp.pmu_reg_data = (pmds[smp_processor_id()][i]
- + (ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val));
+ if (!PMD_IS_IMPL(tmp.pfr_reg_num)) return -EINVAL;
- tmp.pmu_reg_num = cnum;
+ if (PMD_IS_COUNTER(tmp.pfr_reg_num)) {
+ if (task == current){
+ val = ia64_get_pmd(tmp.pfr_reg_num) & pmu_conf.perf_ovfl_val;
+ } else {
+ val = task->thread.pmd[tmp.pfr_reg_num - PMU_FIRST_COUNTER] & pmu_conf.perf_ovfl_val;
+ }
+ val += task->thread.pmu_counters[tmp.pfr_reg_num - PMU_FIRST_COUNTER].val;
+ } else {
+ /* for now */
+ if (task != current) return -EINVAL;
- if (copy_to_user(cptr, &tmp, sizeof(tmp))) return -EFAULT;
+ val = ia64_get_pmd(tmp.pfr_reg_num);
+ }
+ tmp.pfr_reg_value = val;
+
+DBprintk((__FUNCTION__" reading PMD[%ld]=0x%lx\n", tmp.pfr_reg_num, val));
+
+ if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
}
#if 0
/* irrelevant with user monitors */
@@ -209,11 +313,18 @@ sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6,
#endif
break;
- case STOP_PMCS:
+ case PFM_STOP:
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
+
ia64_set_pmc(0, 1);
ia64_srlz_d();
- for (i = 0; i < MAX_PERF_COUNTER; ++i)
- ia64_set_pmc(4+i, 0);
+
+ ia64_psr(regs)->up = 0;
+
+ current->thread.flags &= ~IA64_THREAD_PM_VALID;
+
+ pmu_owners[smp_processor_id()] = NULL;
#if 0
/* irrelevant with user monitors */
@@ -225,48 +336,140 @@ sys_perfmonctl (int cmd, int count, void *ptr, long arg4, long arg5, long arg6,
ia64_psr(regs)->up = 0;
#endif
- current->thread.flags &= ~(IA64_THREAD_PM_VALID);
-
break;
+ case PFM_DEBUG_ON:
+ printk(__FUNCTION__" debuggin on\n");
+ pfm_debug = 1;
+ break;
+
+ case PFM_DEBUG_OFF:
+ printk(__FUNCTION__" debuggin off\n");
+ pfm_debug = 0;
+ break;
+
default:
+ DBprintk((__FUNCTION__" UNknown command 0x%x\n", cmd));
return -EINVAL;
break;
}
return 0;
}
-static inline void
-update_counters (void)
+asmlinkage int
+sys_perfmonctl (int pid, int cmd, int flags, perfmon_req_t *req, int count, long arg6, long arg7, long arg8, long stack)
{
- unsigned long mask, i, cnum, val;
+ struct pt_regs *regs = (struct pt_regs *) &stack;
+ struct task_struct *child = current;
+ int ret;
+
+ if (pid != current->pid) {
+ read_lock(&tasklist_lock);
+ {
+ child = find_task_by_pid(pid);
+ if (child)
+ get_task_struct(child);
+ }
+ if (!child) {
+ read_unlock(&tasklist_lock);
+ return -ESRCH;
+ }
+ /*
+ * XXX: need to do more checking here
+ */
+ if (child->state != TASK_ZOMBIE) {
+ DBprintk((__FUNCTION__" warning process %d not in stable state %ld\n", pid, child->state));
+ }
+ }
+ ret = do_perfmonctl(child, cmd, flags, req, count, regs);
- mask = ia64_get_pmc(0) >> 4;
- for (i = 0, cnum = PMU_FIRST_COUNTER ; i < pmu_conf.max_counters; cnum++, i++, mask >>= 1) {
+ if (child != current) read_unlock(&tasklist_lock);
+ return ret;
+}
- val = mask & 0x1 ? pmu_conf.perf_ovfl_val + 1 : 0;
- if (mask & 0x1)
- printk(__FUNCTION__ " PMD%ld overflowed pmd=%lx pmod=%lx\n", cnum, ia64_get_pmd(cnum), pmds[smp_processor_id()][i]);
+static inline int
+update_counters (u64 pmc0)
+{
+ unsigned long mask, i, cnum;
+ struct thread_struct *th;
+ struct task_struct *ta;
- /* since we got an interrupt, might as well clear every pmd. */
- val += ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val;
+ if (pmu_owners[smp_processor_id()] == NULL) {
+ DBprintk((__FUNCTION__" Spurious overflow interrupt: PMU not owned\n"));
+ return 0;
+ }
+
+ /*
+ * It is never safe to access the task for which the overflow interrupt is destinated
+ * using the current variable as the interrupt may occur in the middle of a context switch
+ * where current does not hold the task that is running yet.
+ *
+ * For monitoring, however, we do need to get access to the task which caused the overflow
+ * to account for overflow on the counters.
+ * We accomplish this by maintaining a current owner of the PMU per CPU. During context
+ * switch the ownership is changed in a way such that the reflected owner is always the
+ * valid one, i.e. the one that caused the interrupt.
+ */
+ ta = pmu_owners[smp_processor_id()];
+ th = &pmu_owners[smp_processor_id()]->thread;
- printk(__FUNCTION__ " adding val=%lx to pmod[%ld]=%lx \n", val, i, pmds[smp_processor_id()][i]);
+ /*
+ * Don't think this could happen given first test. Keep as sanity check
+ */
+ if ((th->flags & IA64_THREAD_PM_VALID) == 0) {
+ DBprintk((__FUNCTION__" Spurious overflow interrupt: process %d not using perfmon\n", ta->pid));
+ return 0;
+ }
+
+ /*
+ * if PMU not frozen: spurious from previous context
+ * if PMC[0] = 0x1 : frozen but no overflow reported: leftover from previous context
+ *
+ * in either case we don't touch the state upon return from handler
+ */
+ if ((pmc0 & 0x1) == 0 || pmc0 == 0x1) {
+ DBprintk((__FUNCTION__" Spurious overflow interrupt: process %d freeze=0\n",ta->pid));
+ return 0;
+ }
- pmds[smp_processor_id()][i] += val;
+ mask = pmc0 >> 4;
- ia64_set_pmd(cnum, 0);
+ for (i = 0, cnum = PMU_FIRST_COUNTER; i < pmu_conf.max_counters; cnum++, i++, mask >>= 1) {
+
+ if (mask & 0x1) {
+ DBprintk((__FUNCTION__ " PMD[%ld] overflowed pmd=0x%lx pmod.val=0x%lx\n", cnum, ia64_get_pmd(cnum), th->pmu_counters[i].val));
+
+ /*
+ * Because we somtimes (EARS/BTB) reset to a specific value, we cannot simply use
+ * val to count the number of times we overflowed. Otherwise we would loose the value
+ * current in the PMD (which can be >0). So to make sure we don't loose
+ * the residual counts we set val to contain full 64bits value of the counter.
+ */
+ th->pmu_counters[i].val += 1+pmu_conf.perf_ovfl_val+(ia64_get_pmd(cnum) &pmu_conf.perf_ovfl_val);
+
+ /* writes to upper part are ignored, so this is safe */
+ ia64_set_pmd(cnum, th->pmu_counters[i].rval);
+
+ DBprintk((__FUNCTION__ " pmod[%ld].val=0x%lx pmd=0x%lx\n", i, th->pmu_counters[i].val, ia64_get_pmd(cnum)&pmu_conf.perf_ovfl_val));
+
+ if (th->pmu_counters[i].pid != 0 && th->pmu_counters[i].sig>0) {
+ DBprintk((__FUNCTION__ " shouild notify process %d with signal %d\n",th->pmu_counters[i].pid, th->pmu_counters[i].sig));
+ }
+ }
}
+ return 1;
}
static void
perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
{
- update_counters();
- ia64_set_pmc(0, 0);
- ia64_srlz_d();
+ /* unfreeze if not spurious */
+ if ( update_counters(ia64_get_pmc(0)) ) {
+ ia64_set_pmc(0, 0);
+ ia64_srlz_d();
+ }
}
static struct irqaction perfmon_irqaction = {
@@ -280,9 +483,13 @@ perfmon_proc_info(char *page)
{
char *p = page;
u64 pmc0 = ia64_get_pmc(0);
+ int i;
- p += sprintf(p, "PMC[0]=%lx\n", pmc0);
-
+ p += sprintf(p, "PMC[0]=%lx\nPerfmon debug: %s\n", pmc0, pfm_debug ? "On" : "Off");
+ for(i=0; i < NR_CPUS; i++) {
+ if (cpu_is_online(i))
+ p += sprintf(p, "CPU%d.PMU %d\n", i, pmu_owners[i] ? pmu_owners[i]->pid: -1);
+ }
return p - page;
}
@@ -308,7 +515,6 @@ void __init
perfmon_init (void)
{
pal_perf_mon_info_u_t pm_info;
- u64 pm_buffer[16];
s64 status;
irq_desc[PERFMON_IRQ].status |= IRQ_PER_CPU;
@@ -320,15 +526,13 @@ perfmon_init (void)
printk("perfmon: Initialized vector to %u\n",PERFMON_IRQ);
- if ((status=ia64_pal_perf_mon_info(pm_buffer, &pm_info)) != 0) {
+ if ((status=ia64_pal_perf_mon_info(pmu_conf.impl_regs, &pm_info)) != 0) {
printk(__FUNCTION__ " pal call failed (%ld)\n", status);
return;
}
- pmu_conf.perf_ovfl_val = perf_ovfl_val = (1L << pm_info.pal_perf_mon_info_s.width) - 1;
+ pmu_conf.perf_ovfl_val = (1L << pm_info.pal_perf_mon_info_s.width) - 1;
/* XXX need to use PAL instead */
- pmu_conf.max_pmc = 13;
- pmu_conf.max_pmd = 17;
pmu_conf.max_counters = pm_info.pal_perf_mon_info_s.generic;
printk("perfmon: Counters are %d bits\n", pm_info.pal_perf_mon_info_s.width);
@@ -347,36 +551,137 @@ perfmon_init_percpu (void)
ia64_srlz_d();
}
+/*
+ * XXX: for system wide this function MUST never be called
+ */
void
-ia64_save_pm_regs (struct thread_struct *t)
+ia64_save_pm_regs (struct task_struct *ta)
{
- int i;
+ struct thread_struct *t = &ta->thread;
+ u64 pmc0, psr;
+ int i,j;
+
+ /*
+ * We must maek sure that we don't loose any potential overflow
+ * interrupt while saving PMU context. In this code, external
+ * interrupts are always enabled.
+ */
+
+ /*
+ * save current PSR: needed because we modify it
+ */
+ __asm__ __volatile__ ("mov %0=psr;;": "=r"(psr) :: "memory");
+
+ /*
+ * stop monitoring:
+ * This is the only way to stop monitoring without destroying overflow
+ * information in PMC[0..3].
+ * This is the last instruction which can cause overflow when monitoring
+ * in kernel.
+ * By now, we could still have an overflow interrupt in flight.
+ */
+ __asm__ __volatile__ ("rsm psr.up;;"::: "memory");
+
+ /*
+ * read current overflow status:
+ *
+ * We may be reading stale information at this point, if we got interrupt
+ * just before the read(pmc0) but that's all right. However, if we did
+ * not get the interrupt before, this read reflects LAST state.
+ *
+ */
+ pmc0 = ia64_get_pmc(0);
+ /*
+ * freeze PMU:
+ *
+ * This destroys the overflow information. This is required to make sure
+ * next process does not start with monitoring on if not requested
+ * (PSR.up may not be enough).
+ *
+ * We could still get an overflow interrupt by now. However the handler
+ * will not do anything if is sees PMC[0].fr=1 but no overflow bits
+ * are set. So PMU will stay in frozen state. This implies that pmc0
+ * will still be holding the correct unprocessed information.
+ *
+ */
ia64_set_pmc(0, 1);
ia64_srlz_d();
+
+ /*
+ * check for overflow bits set:
+ *
+ * If pmc0 reports PMU frozen, this means we have a pending overflow,
+ * therefore we invoke the handler. Handler is reentrant with regards
+ * to PMC[0] so it is safe to call it twice.
+ *
+ * IF pmc0 reports overflow, we need to reread current PMC[0] value
+ * in case the handler was invoked right after the first pmc0 read.
+ * it is was not invoked then pmc0==PMC[0], otherwise it's been invoked
+ * and overflow information has been processed, so we don't need to call.
+ *
+ * Test breakdown:
+ * - pmc0 & ~0x1: test if overflow happened
+ * - second part: check if current register reflects this as well.
+ *
+ * NOTE: testing for pmc0 & 0x1 is not enough has it would trigger call
+ * when PM_VALID and PMU.fr which is common when setting up registers
+ * just before actually starting monitors.
+ *
+ */
+ if ((pmc0 & ~0x1) && ((pmc0=ia64_get_pmc(0)) &~0x1) ) {
+ printk(__FUNCTION__" Warning: pmc[0]=0x%lx\n", pmc0);
+ update_counters(pmc0);
+ /*
+ * XXX: not sure that's enough. the next task may still get the
+ * interrupt.
+ */
+ }
+
+ /*
+ * restore PSR for context switch to save
+ */
+ __asm__ __volatile__ ("mov psr.l=%0;;"::"r"(psr): "memory");
+
/*
* XXX: this will need to be extended beyong just counters
*/
- for (i=0; i< IA64_NUM_PM_REGS; i++) {
- t->pmd[i] = ia64_get_pmd(4+i);
- t->pmod[i] = pmds[smp_processor_id()][i];
- t->pmc[i] = ia64_get_pmc(4+i);
+ for (i=0,j=4; i< IA64_NUM_PMD_COUNTERS; i++,j++) {
+ t->pmd[i] = ia64_get_pmd(j);
+ t->pmc[i] = ia64_get_pmc(j);
}
+ /*
+ * PMU is frozen, PMU context is saved: nobody owns the PMU on this CPU
+ * At this point, we should not receive any pending interrupt from the
+ * 'switched out' task
+ */
+ pmu_owners[smp_processor_id()] = NULL;
}
void
-ia64_load_pm_regs (struct thread_struct *t)
+ia64_load_pm_regs (struct task_struct *ta)
{
- int i;
+ struct thread_struct *t = &ta->thread;
+ int i,j;
+
+ /*
+ * we first restore ownership of the PMU to the 'soon to be current'
+ * context. This way, if, as soon as we unfreeze the PMU at the end
+ * of this function, we get an interrupt, we attribute it to the correct
+ * task
+ */
+ pmu_owners[smp_processor_id()] = ta;
/*
* XXX: this will need to be extended beyong just counters
*/
- for (i=0; i< IA64_NUM_PM_REGS ; i++) {
- ia64_set_pmd(4+i, t->pmd[i]);
- pmds[smp_processor_id()][i] = t->pmod[i];
- ia64_set_pmc(4+i, t->pmc[i]);
+ for (i=0,j=4; i< IA64_NUM_PMD_COUNTERS; i++,j++) {
+ ia64_set_pmd(j, t->pmd[i]);
+ ia64_set_pmc(j, t->pmc[i]);
}
+ /*
+ * unfreeze PMU
+ */
ia64_set_pmc(0, 0);
ia64_srlz_d();
}
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 41db60a0c..e61843db5 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -137,23 +137,6 @@ cpu_idle (void *unused)
check_pgt_cache();
if (pm_idle)
(*pm_idle)();
-#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
- local_irq_disable();
- {
- u64 itc, itm;
-
- itc = ia64_get_itc();
- itm = ia64_get_itm();
- if (time_after(itc, itm + 1000)) {
- extern void ia64_reset_itm (void);
-
- printk("cpu_idle: ITM in past (itc=%lx,itm=%lx:%lums)\n",
- itc, itm, (itc - itm)/500000);
- ia64_reset_itm();
- }
- }
- local_irq_enable();
-#endif
}
}
@@ -164,7 +147,7 @@ ia64_save_extra (struct task_struct *task)
ia64_save_debug_regs(&task->thread.dbr[0]);
#ifdef CONFIG_PERFMON
if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
- ia64_save_pm_regs(&task->thread);
+ ia64_save_pm_regs(task);
#endif
if (IS_IA32_PROCESS(ia64_task_regs(task)))
ia32_save_state(&task->thread);
@@ -177,7 +160,7 @@ ia64_load_extra (struct task_struct *task)
ia64_load_debug_regs(&task->thread.dbr[0]);
#ifdef CONFIG_PERFMON
if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
- ia64_load_pm_regs(&task->thread);
+ ia64_load_pm_regs(task);
#endif
if (IS_IA32_PROCESS(ia64_task_regs(task)))
ia32_load_state(&task->thread);
@@ -299,6 +282,14 @@ copy_thread (int nr, unsigned long clone_flags,
# define THREAD_FLAGS_TO_SET 0
p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
| THREAD_FLAGS_TO_SET);
+#ifdef CONFIG_IA32_SUPPORT
+ /*
+ * If we're cloning an IA32 task then save the IA32 extra
+ * state from the current task to the new task
+ */
+ if (IS_IA32_PROCESS(ia64_task_regs(current)))
+ ia32_save_state(&p->thread);
+#endif
return 0;
}
@@ -554,7 +545,7 @@ exit_thread (void)
* we garantee no race. this call we also stop
* monitoring
*/
- ia64_save_pm_regs(&current->thread);
+ ia64_save_pm_regs(current);
/*
* make sure that switch_to() will not save context again
*/
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 820a87854..0b49bdcaa 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -617,7 +617,6 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
struct switch_stack *sw;
struct unw_frame_info info;
struct pt_regs *pt;
- unsigned long pmd_tmp;
pt = ia64_task_regs(child);
sw = (struct switch_stack *) (child->thread.ksp + 16);
@@ -794,11 +793,7 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
addr);
return -1;
}
- } else
-#ifdef CONFIG_PERFMON
- if (addr < PT_PMD)
-#endif
- {
+ } else {
/* access debug registers */
if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
@@ -820,33 +815,14 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
}
ptr += regnum;
- }
-#ifdef CONFIG_PERFMON
- else {
- /*
- * XXX: will eventually move back to perfmonctl()
- */
- unsigned long pmd = (addr - PT_PMD) >> 3;
- extern unsigned long perf_ovfl_val;
-
- /* we just use ptrace to read */
- if (write_access) return -1;
-
- if (pmd > 3) {
- printk("ptrace: rejecting access to PMD[%ld] address 0x%lx\n", pmd, addr);
- return -1;
- }
- /*
- * We always need to mask upper 32bits of pmd because value is random
- */
- pmd_tmp = child->thread.pmod[pmd]+(child->thread.pmd[pmd]& perf_ovfl_val);
-
- /*printk(__FUNCTION__" child=%d reading pmd[%ld]=%lx\n", child->pid, pmd, pmd_tmp);*/
-
- ptr = &pmd_tmp;
+ if (write_access)
+ /* don't let the user set kernel-level breakpoints... */
+ *ptr = *data & ~(7UL << 56);
+ else
+ *data = *ptr;
+ return 0;
}
-#endif
if (write_access)
*ptr = *data;
else
@@ -861,7 +837,6 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
{
unsigned long *ptr = NULL, *rbs, *bspstore, ndirty, regnum;
struct switch_stack *sw;
- unsigned long pmd_tmp;
struct pt_regs *pt;
if ((addr & 0x7) != 0)
@@ -977,11 +952,7 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
/* disallow accessing anything else... */
return -1;
}
- } else
-#ifdef CONFIG_PERFMON
- if (addr < PT_PMD)
-#endif
- {
+ } else {
/* access debug registers */
@@ -1002,34 +973,14 @@ access_uarea (struct task_struct *child, unsigned long addr, unsigned long *data
return -1;
ptr += regnum;
- }
-#ifdef CONFIG_PERFMON
- else {
- /*
- * XXX: will eventually move back to perfmonctl()
- */
- unsigned long pmd = (addr - PT_PMD) >> 3;
- extern unsigned long perf_ovfl_val;
-
- /* we just use ptrace to read */
- if (write_access) return -1;
-
- if (pmd > 3) {
- printk("ptrace: rejecting access to PMD[%ld] address 0x%lx\n", pmd, addr);
- return -1;
- }
- /*
- * We always need to mask upper 32bits of pmd because value is random
- */
- pmd_tmp = child->thread.pmod[pmd]+(child->thread.pmd[pmd]& perf_ovfl_val);
-
- /*printk(__FUNCTION__" child=%d reading pmd[%ld]=%lx\n", child->pid, pmd, pmd_tmp);*/
-
- ptr = &pmd_tmp;
+ if (write_access)
+ /* don't let the user set kernel-level breakpoints... */
+ *ptr = *data & ~(7UL << 56);
+ else
+ *data = *ptr;
+ return 0;
}
-#endif
-
if (write_access)
*ptr = *data;
else
@@ -1107,7 +1058,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data,
goto out_tsk;
if (child->state != TASK_STOPPED) {
- if (request != PTRACE_KILL && request != PTRACE_PEEKUSR)
+ if (request != PTRACE_KILL)
goto out_tsk;
}
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index 87c7befea..56059a306 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -104,9 +104,11 @@ ia64_sal_init (struct ia64_sal_systab *systab)
if (strncmp(systab->signature, "SST_", 4) != 0)
printk("bad signature in system table!");
- printk("SAL v%u.%02u: ia32bios=%s, oem=%.32s, product=%.32s\n",
+ /*
+ * revisions are coded in BCD, so %x does the job for us
+ */
+ printk("SAL v%x.%02x: oem=%.32s, product=%.32s\n",
systab->sal_rev_major, systab->sal_rev_minor,
- systab->ia32_bios_present ? "present" : "absent",
systab->oem_id, systab->product_id);
min = ~0UL;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index ed091d864..83d5643cd 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -235,6 +235,12 @@ setup_arch (char **cmdline_p)
machvec_init(acpi_get_sysname());
#endif
+#ifdef CONFIG_ACPI20
+ if (efi.acpi20) {
+ /* Parse the ACPI 2.0 tables */
+ acpi20_parse(efi.acpi20);
+ } else
+#endif
if (efi.acpi) {
/* Parse the ACPI tables */
acpi_parse(efi.acpi);
@@ -255,13 +261,6 @@ setup_arch (char **cmdline_p)
paging_init();
platform_setup(cmdline_p);
-
-#ifdef CONFIG_SWIOTLB
- {
- extern void setup_swiotlb (void);
- setup_swiotlb();
- }
-#endif
}
/*
@@ -271,9 +270,9 @@ int
get_cpuinfo (char *buffer)
{
#ifdef CONFIG_SMP
-# define lps c->loops_per_sec
+# define lpj c->loops_per_jiffy
#else
-# define lps loops_per_sec
+# define lpj loops_per_jiffy
#endif
char family[32], model[32], features[128], *cp, *p = buffer;
struct cpuinfo_ia64 *c;
@@ -325,7 +324,7 @@ get_cpuinfo (char *buffer)
features,
c->ppn, c->number, c->proc_freq / 1000000, c->proc_freq % 1000000,
c->itc_freq / 1000000, c->itc_freq % 1000000,
- lps / 500000, (lps / 5000) % 100);
+ lpj*HZ/500000, (lpj*HZ/5000) % 100);
}
return p - buffer;
}
@@ -376,15 +375,7 @@ identify_cpu (struct cpuinfo_ia64 *c)
status = ia64_pal_vm_summary(&vm1, &vm2);
if (status == PAL_STATUS_SUCCESS) {
-#if 1
- /*
- * XXX the current PAL code returns IMPL_VA_MSB==60, which is dead-wrong.
- * --davidm 00/05/26
- s*/
- impl_va_msb = 50;
-#else
impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
-#endif
phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
}
printk("CPU %d: %lu virtual and %lu physical address bits\n",
@@ -408,6 +399,8 @@ cpu_init (void)
{
extern void __init ia64_rid_init (void);
extern void __init ia64_tlb_init (void);
+ pal_vm_info_2_u_t vmi;
+ unsigned int max_ctx;
identify_cpu(&my_cpu_data);
@@ -415,15 +408,12 @@ cpu_init (void)
memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
/*
- * Initialize default control register to defer speculative
- * faults. On a speculative load, we want to defer access
- * right, key miss, and key permission faults. We currently
- * do NOT defer TLB misses, page-not-present, access bit, or
- * debug faults but kernel code should not rely on any
- * particular setting of these bits.
- ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_PP);
+ * Initialize default control register to defer all speculative faults. The
+ * kernel MUST NOT depend on a particular setting of these bits (in other words,
+ * the kernel must have recovery code for all speculative accesses).
*/
- ia64_set_dcr(IA64_DCR_DR | IA64_DCR_DK | IA64_DCR_DX );
+ ia64_set_dcr( IA64_DCR_DM | IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
+ | IA64_DCR_DA | IA64_DCR_DD);
#ifndef CONFIG_SMP
ia64_set_fpu_owner(0); /* initialize ar.k5 */
#endif
@@ -444,4 +434,17 @@ cpu_init (void)
#ifdef CONFIG_SMP
normal_xtp();
#endif
+
+ /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
+ if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+ max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
+ else {
+ printk("ia64_rid_init: PAL VM summary failed, assuming 18 RID bits\n");
+ max_ctx = (1U << 15) - 1; /* use architected minimum */
+ }
+ while (max_ctx < ia64_ctx.max_ctx) {
+ unsigned int old = ia64_ctx.max_ctx;
+ if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
+ break;
+ }
}
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index e0adf1981..3ffa201aa 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -91,7 +91,7 @@ ia64_rt_sigsuspend (sigset_t *uset, size_t sigsetsize, struct sigscratch *scr)
scr->pt.r10 = -1;
}
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ current->state = TASK_INTERRUPTIBLE;
schedule();
if (ia64_do_signal(&oldset, scr, 1))
return -EINTR;
@@ -499,9 +499,10 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
/* Let the debugger run. */
current->exit_code = signr;
current->thread.siginfo = &info;
- set_current_state(TASK_STOPPED);
+ current->state = TASK_STOPPED;
notify_parent(current, SIGCHLD);
schedule();
+
signr = current->exit_code;
current->thread.siginfo = 0;
@@ -557,7 +558,7 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
/* FALLTHRU */
case SIGSTOP:
- set_current_state(TASK_STOPPED);
+ current->state = TASK_STOPPED;
current->exit_code = signr;
if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags
& SA_NOCLDSTOP))
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 694711507..5093341a5 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -6,11 +6,13 @@
*
* Lots of stuff stolen from arch/alpha/kernel/smp.c
*
- * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_sec calibration on each CPU.
+ * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy calibration on each CPU.
* 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
* 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor & cpu_online_map
* now gets done here (instead of setup.c)
* 99/10/05 davidm Update to bring it in sync with new command-line processing scheme.
+ * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
+ * smp_call_function_single to resend IPI on timeouts
*/
#define __KERNEL_SYSCALLS__
@@ -30,6 +32,7 @@
#include <asm/current.h>
#include <asm/delay.h>
#include <asm/efi.h>
+#include <asm/machvec.h>
#include <asm/io.h>
#include <asm/irq.h>
@@ -78,10 +81,6 @@ struct smp_call_struct {
};
static volatile struct smp_call_struct *smp_call_function_data;
-#ifdef CONFIG_ITANIUM_A1_SPECIFIC
-extern spinlock_t ivr_read_lock;
-#endif
-
#define IPI_RESCHEDULE 0
#define IPI_CALL_FUNC 1
#define IPI_CPU_STOP 2
@@ -269,14 +268,14 @@ handle_IPI(int irq, void *dev_id, struct pt_regs *regs)
}
static inline void
-send_IPI_single(int dest_cpu, int op)
+send_IPI_single (int dest_cpu, int op)
{
if (dest_cpu == -1)
return;
set_bit(op, &ipi_op[dest_cpu]);
- ipi_send(dest_cpu, IPI_IRQ, IA64_IPI_DM_INT, 0);
+ platform_send_ipi(dest_cpu, IPI_IRQ, IA64_IPI_DM_INT, 0);
}
static inline void
@@ -358,6 +357,7 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
if (pointer_lock(&smp_call_function_data, &data, retry))
return -EBUSY;
+resend:
/* Send a message to all other CPUs and wait for them to respond */
send_IPI_single(cpuid, IPI_CALL_FUNC);
@@ -366,8 +366,12 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout))
barrier();
if (atomic_read(&data.unstarted_count) > 0) {
+#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
+ goto resend;
+#else
smp_call_function_data = NULL;
return -ETIMEDOUT;
+#endif
}
if (wait)
while (atomic_read(&data.unfinished_count) > 0)
@@ -411,13 +415,23 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
/* Send a message to all other CPUs and wait for them to respond */
send_IPI_allbutself(IPI_CALL_FUNC);
+retry:
/* Wait for response */
timeout = jiffies + HZ;
while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout))
barrier();
if (atomic_read(&data.unstarted_count) > 0) {
+#if (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
+ int i;
+ for (i = 0; i < smp_num_cpus; i++) {
+ if (i != smp_processor_id())
+ platform_send_ipi(i, IPI_IRQ, IA64_IPI_DM_INT, 0);
+ }
+ goto retry;
+#else
smp_call_function_data = NULL;
return -ETIMEDOUT;
+#endif
}
if (wait)
while (atomic_read(&data.unfinished_count) > 0)
@@ -430,8 +444,6 @@ smp_call_function (void (*func) (void *info), void *info, int retry, int wait)
/*
* Flush all other CPU's tlb and then mine. Do this with smp_call_function() as we
* want to ensure all TLB's flushed before proceeding.
- *
- * XXX: Is it OK to use the same ptc.e info on all cpus?
*/
void
smp_flush_tlb_all(void)
@@ -502,7 +514,7 @@ smp_callin (void)
local_irq_enable(); /* Interrupts have been off until now */
calibrate_delay();
- my_cpu_data.loops_per_sec = loops_per_sec;
+ my_cpu_data.loops_per_jiffy = loops_per_jiffy;
/* allow the master to continue */
set_bit(cpu, &cpu_callin_map);
@@ -569,7 +581,7 @@ smp_boot_one_cpu(int cpu)
cpu_now_booting = cpu;
/* Kick the AP in the butt */
- ipi_send(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
+ platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
/* wait up to 10s for the AP to start */
for (timeout = 0; timeout < 100000; timeout++) {
@@ -603,7 +615,7 @@ smp_boot_cpus(void)
__cpu_physical_id[0] = hard_smp_processor_id();
/* on the BP, the kernel already called calibrate_delay_loop() in init/main.c */
- my_cpu_data.loops_per_sec = loops_per_sec;
+ my_cpu_data.loops_per_jiffy = loops_per_jiffy;
#if 0
smp_tune_scheduling();
#endif
@@ -653,13 +665,11 @@ smp_boot_cpus(void)
bogosum = 0;
for (i = 0; i < NR_CPUS; i++) {
if (cpu_online_map & (1L << i))
- bogosum += cpu_data[i].loops_per_sec;
+ bogosum += cpu_data[i].loops_per_jiffy;
}
- printk(KERN_INFO "SMP: Total of %d processors activated "
- "(%lu.%02lu BogoMIPS).\n",
- cpu_count, (bogosum + 2500) / 500000,
- ((bogosum + 2500) / 5000) % 100);
+ printk(KERN_INFO "SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ cpu_count, bogosum*HZ/500000, (bogosum*HZ/5000) % 100);
smp_num_cpus = cpu_count;
}
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index f78512229..2713d7fd9 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -16,8 +16,38 @@
#include <linux/smp_lock.h>
#include <linux/highuid.h>
+#include <asm/shmparam.h>
#include <asm/uaccess.h>
+#define COLOR_ALIGN(addr) (((addr) + SHMLBA - 1) & ~(SHMLBA - 1))
+
+unsigned long
+get_unmapped_area (unsigned long addr, unsigned long len)
+{
+ struct vm_area_struct * vmm;
+
+ if (len > RGN_MAP_LIMIT)
+ return 0;
+ if (!addr)
+ addr = TASK_UNMAPPED_BASE;
+
+ if (current->thread.flags & IA64_THREAD_MAP_SHARED)
+ addr = COLOR_ALIGN(addr);
+ else
+ addr = PAGE_ALIGN(addr);
+
+ for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
+ /* At this point: (!vmm || addr < vmm->vm_end). */
+ if (TASK_SIZE - len < addr)
+ return 0;
+ if (rgn_offset(addr) + len > RGN_MAP_LIMIT) /* no risk of overflow here... */
+ return 0;
+ if (!vmm || addr + len <= vmm->vm_start)
+ return addr;
+ addr = vmm->vm_end;
+ }
+}
+
asmlinkage long
ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5, long arg6,
long arg7, long stack)
@@ -34,6 +64,7 @@ ia64_getpriority (int which, int who, long arg2, long arg3, long arg4, long arg5
return prio;
}
+/* XXX obsolete, but leave it here until the old libc is gone... */
asmlinkage unsigned long
sys_getpagesize (void)
{
@@ -58,16 +89,61 @@ ia64_shmat (int shmid, void *shmaddr, int shmflg, long arg3, long arg4, long arg
}
asmlinkage unsigned long
-ia64_brk (long brk, long arg1, long arg2, long arg3,
+ia64_brk (unsigned long brk, long arg1, long arg2, long arg3,
long arg4, long arg5, long arg6, long arg7, long stack)
{
- extern unsigned long sys_brk (unsigned long brk);
+ extern int vm_enough_memory (long pages);
struct pt_regs *regs = (struct pt_regs *) &stack;
- unsigned long retval;
+ unsigned long rlim, retval, newbrk, oldbrk;
+ struct mm_struct *mm = current->mm;
+
+ /*
+ * Most of this replicates the code in sys_brk() except for an additional safety
+ * check and the clearing of r8. However, we can't call sys_brk() because we need
+ * to acquire the mmap_sem before we can do the test...
+ */
+ down(&mm->mmap_sem);
- retval = sys_brk(brk);
+ if (brk < mm->end_code)
+ goto out;
+ newbrk = PAGE_ALIGN(brk);
+ oldbrk = PAGE_ALIGN(mm->brk);
+ if (oldbrk == newbrk)
+ goto set_brk;
+
+ /* Always allow shrinking brk. */
+ if (brk <= mm->brk) {
+ if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+ goto set_brk;
+ goto out;
+ }
- regs->r8 = 0; /* ensure large retval isn't mistaken as error code */
+ /* Check against unimplemented/unmapped addresses: */
+ if ((newbrk - oldbrk) > RGN_MAP_LIMIT || rgn_offset(newbrk) > RGN_MAP_LIMIT)
+ goto out;
+
+ /* Check against rlimit.. */
+ rlim = current->rlim[RLIMIT_DATA].rlim_cur;
+ if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+ goto out;
+
+ /* Check against existing mmap mappings. */
+ if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+ goto out;
+
+ /* Check if we have enough memory.. */
+ if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
+ goto out;
+
+ /* Ok, looks good - let it rip. */
+ if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+ goto out;
+set_brk:
+ mm->brk = brk;
+out:
+ retval = mm->brk;
+ up(&mm->mmap_sem);
+ regs->r8 = 0; /* ensure large retval isn't mistaken as error code */
return retval;
}
@@ -95,10 +171,8 @@ sys_pipe (long arg0, long arg1, long arg2, long arg3,
static inline unsigned long
do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
{
- unsigned long loff, hoff;
+ unsigned long roff;
struct file *file = 0;
- /* the virtual address space that is mappable in each region: */
-# define OCTANT_SIZE ((PTRS_PER_PGD<<PGDIR_SHIFT)/8)
/*
* A zero mmap always succeeds in Linux, independent of
@@ -107,15 +181,12 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un
if (PAGE_ALIGN(len) == 0)
return addr;
- /* Don't permit mappings into or across the address hole in a region: */
- loff = rgn_offset(addr);
- hoff = loff - (RGN_SIZE - OCTANT_SIZE/2);
- if ((len | loff | (loff + len)) >= OCTANT_SIZE/2
- && (len | hoff | (hoff + len)) >= OCTANT_SIZE/2)
+ /* don't permit mappings into unmapped space or the virtual page table of a region: */
+ roff = rgn_offset(addr);
+ if ((len | roff | (roff + len)) >= RGN_MAP_LIMIT)
return -EINVAL;
- /* Don't permit mappings that would cross a region boundary: */
-
+ /* don't permit mappings that would cross a region boundary: */
if (rgn_index(addr) != rgn_index(addr + len))
return -EINVAL;
@@ -126,10 +197,15 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un
return -EBADF;
}
+ if (flags & MAP_SHARED)
+ current->thread.flags |= IA64_THREAD_MAP_SHARED;
+
down(&current->mm->mmap_sem);
addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
up(&current->mm->mmap_sem);
+ current->thread.flags &= ~IA64_THREAD_MAP_SHARED;
+
if (file)
fput(file);
return addr;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 5e54e4f4b..8f65adc2c 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -152,19 +152,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
int cpu = smp_processor_id();
unsigned long new_itm;
-#if 0
- static unsigned long last_time;
- static unsigned char count;
- int printed = 0;
-#endif
- /*
- * Here we are in the timer irq handler. We have irqs locally
- * disabled, but we don't know if the timer_bh is running on
- * another CPU. We need to avoid to SMP race by acquiring the
- * xtime_lock.
- */
- write_lock(&xtime_lock);
new_itm = itm.next[cpu].count;
if (!time_after(ia64_get_itc(), new_itm))
@@ -173,48 +161,33 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
while (1) {
/*
- * Do kernel PC profiling here. We multiply the
- * instruction number by four so that we can use a
- * prof_shift of 2 to get instruction-level instead of
- * just bundle-level accuracy.
+ * Do kernel PC profiling here. We multiply the instruction number by
+ * four so that we can use a prof_shift of 2 to get instruction-level
+ * instead of just bundle-level accuracy.
*/
if (!user_mode(regs))
do_profile(regs->cr_iip + 4*ia64_psr(regs)->ri);
#ifdef CONFIG_SMP
smp_do_timer(regs);
- if (smp_processor_id() == 0)
- do_timer(regs);
-#else
- do_timer(regs);
#endif
+ if (smp_processor_id() == 0) {
+ /*
+ * Here we are in the timer irq handler. We have irqs locally
+ * disabled, but we don't know if the timer_bh is running on
+ * another CPU. We need to avoid to SMP race by acquiring the
+ * xtime_lock.
+ */
+ write_lock(&xtime_lock);
+ do_timer(regs);
+ write_unlock(&xtime_lock);
+ }
new_itm += itm.delta;
itm.next[cpu].count = new_itm;
if (time_after(new_itm, ia64_get_itc()))
break;
-
-#if 0
- /*
- * SoftSDV in SMP mode is _slow_, so we do "lose" ticks,
- * but it's really OK...
- */
- if (count > 0 && jiffies - last_time > 5*HZ)
- count = 0;
- if (count++ == 0) {
- last_time = jiffies;
- if (!printed) {
- printk("Lost clock tick on CPU %d (now=%lx, next=%lx)!!\n",
- cpu, ia64_get_itc(), itm.next[cpu].count);
- printed = 1;
-# ifdef CONFIG_IA64_DEBUG_IRQ
- printk("last_cli_ip=%lx\n", last_cli_ip);
-# endif
- }
- }
-#endif
}
- write_unlock(&xtime_lock);
/*
* If we're too close to the next clock tick for comfort, we
@@ -229,7 +202,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
ia64_set_itm(new_itm);
}
-#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_IA64_SOFTSDV_HACKS)
+#ifdef CONFIG_IA64_SOFTSDV_HACKS
/*
* Interrupts must be disabled before calling this routine.
@@ -240,7 +213,7 @@ ia64_reset_itm (void)
timer_interrupt(0, 0, ia64_task_regs(current));
}
-#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+#endif
/*
* Encapsulate access to the itm structure for SMP.
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 43340bf85..fd8369291 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -78,7 +78,7 @@ void
die_if_kernel (char *str, struct pt_regs *regs, long err)
{
if (user_mode(regs)) {
-#if 1
+#if 0
/* XXX for debugging only */
printk ("!!die_if_kernel: %s(%d): %s %ld\n",
current->comm, current->pid, str, err);
@@ -484,6 +484,20 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
sprintf(buf, "Disabled FPL fault---not supposed to happen!");
break;
+ case 26: /* NaT Consumption */
+ case 31: /* Unsupported Data Reference */
+ if (user_mode(regs)) {
+ siginfo.si_signo = SIGILL;
+ siginfo.si_code = ILL_ILLOPN;
+ siginfo.si_errno = 0;
+ siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
+ siginfo.si_imm = vector;
+ force_sig_info(SIGILL, &siginfo, current);
+ return;
+ }
+ sprintf(buf, (vector == 26) ? "NaT consumption" : "Unsupported data reference");
+ break;
+
case 29: /* Debug */
case 35: /* Taken Branch Trap */
case 36: /* Single Step Trap */
@@ -522,10 +536,10 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
case 34: /* Unimplemented Instruction Address Trap */
if (user_mode(regs)) {
- printk("Woah! Unimplemented Instruction Address Trap!\n");
- siginfo.si_code = ILL_BADIADDR;
siginfo.si_signo = SIGILL;
+ siginfo.si_code = ILL_BADIADDR;
siginfo.si_errno = 0;
+ siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
force_sig_info(SIGILL, &siginfo, current);
return;
}
@@ -544,7 +558,8 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
case 46:
printk("Unexpected IA-32 intercept trap (Trap 46)\n");
- printk(" iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", regs->cr_iip, ifa, isr);
+ printk(" iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
+ regs->cr_iip, ifa, isr, iim);
force_sig(SIGSEGV, current);
return;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 7cc238a83..a24121a26 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -572,7 +572,8 @@ getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
*/
if (regnum == 0) {
*val = 0;
- *nat = 0;
+ if (nat)
+ *nat = 0;
return;
}
@@ -1563,9 +1564,13 @@ ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs)
DPRINT(("ret=%d\n", ret));
if (ret) {
- lock_kernel();
- force_sig(SIGSEGV, current);
- unlock_kernel();
+ struct siginfo si;
+
+ si.si_signo = SIGBUS;
+ si.si_errno = 0;
+ si.si_code = BUS_ADRALN;
+ si.si_addr = (void *) ifa;
+ force_sig_info(SIGBUS, &si, current);
} else {
/*
* given today's architecture this case is not likely to happen
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 21a2ead16..f5ae7e497 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -46,16 +46,6 @@
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define p5 5
-/*
- * The unwind tables are supposed to be sorted, but the GNU toolchain
- * currently fails to produce a sorted table in the presence of
- * functions that go into sections other than .text. For example, the
- * kernel likes to put initialization code into .text.init, which
- * messes up the sort order. Hopefully, this will get fixed sometime
- * soon. --davidm 00/05/23
- */
-#define UNWIND_TABLE_SORT_BUG
-
#define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */
#define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE)
@@ -531,6 +521,10 @@ push (struct unw_state_record *sr)
struct unw_reg_state *rs;
rs = alloc_reg_state();
+ if (!rs) {
+ printk("unwind: cannot stack reg state!\n");
+ return;
+ }
memcpy(rs, &sr->curr, sizeof(*rs));
rs->next = sr->stack;
sr->stack = rs;
@@ -1964,23 +1958,6 @@ init_unwind_table (struct unw_table *table, const char *name, unsigned long segm
{
struct unw_table_entry *start = table_start, *end = table_end;
-#ifdef UNWIND_TABLE_SORT_BUG
- {
- struct unw_table_entry *e1, *e2, tmp;
-
- /* stupid bubble sort... */
-
- for (e1 = start; e1 < end; ++e1) {
- for (e2 = e1 + 1; e2 < end; ++e2) {
- if (e2->start_offset < e1->start_offset) {
- tmp = *e1;
- *e1 = *e2;
- *e2 = tmp;
- }
- }
- }
- }
-#endif
table->name = name;
table->segment_base = segment_base;
table->gp = gp;
@@ -2023,8 +2000,8 @@ unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned lon
void
unw_remove_unwind_table (void *handle)
{
- struct unw_table *table, *prevt;
- struct unw_script *tmp, *prev;
+ struct unw_table *table, *prev;
+ struct unw_script *tmp;
unsigned long flags;
long index;
@@ -2043,41 +2020,35 @@ unw_remove_unwind_table (void *handle)
{
/* first, delete the table: */
- for (prevt = (struct unw_table *) &unw.tables; prevt; prevt = prevt->next)
- if (prevt->next == table)
+ for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next)
+ if (prev->next == table)
break;
- if (!prevt) {
+ if (!prev) {
dprintk("unwind: failed to find unwind table %p\n", (void *) table);
spin_unlock_irqrestore(&unw.lock, flags);
return;
}
- prevt->next = table->next;
+ prev->next = table->next;
+ }
+ spin_unlock_irqrestore(&unw.lock, flags);
- /* next, remove hash table entries for this table */
+ /* next, remove hash table entries for this table */
- for (index = 0; index <= UNW_HASH_SIZE; ++index) {
- if (unw.hash[index] >= UNW_CACHE_SIZE)
- continue;
+ for (index = 0; index <= UNW_HASH_SIZE; ++index) {
+ tmp = unw.cache + unw.hash[index];
+ if (unw.hash[index] >= UNW_CACHE_SIZE
+ || tmp->ip < table->start || tmp->ip >= table->end)
+ continue;
- tmp = unw.cache + unw.hash[index];
- prev = 0;
- while (1) {
- write_lock(&tmp->lock);
- {
- if (tmp->ip >= table->start && tmp->ip < table->end) {
- if (prev)
- prev->coll_chain = tmp->coll_chain;
- else
- unw.hash[index] = -1;
- tmp->ip = 0;
- } else
- prev = tmp;
- }
- write_unlock(&tmp->lock);
+ write_lock(&tmp->lock);
+ {
+ if (tmp->ip >= table->start && tmp->ip < table->end) {
+ unw.hash[index] = tmp->coll_chain;
+ tmp->ip = 0;
}
}
+ write_unlock(&tmp->lock);
}
- spin_unlock_irqrestore(&unw.lock, flags);
kfree(table);
}