summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorRalf Baechle <ralf@linux-mips.org>2000-03-02 02:36:47 +0000
committerRalf Baechle <ralf@linux-mips.org>2000-03-02 02:36:47 +0000
commit8624512aa908741ba2795200133eae0d7f4557ea (patch)
treed5d3036fccf2604f4c98dedc11e8adb929d6b52e /arch
parent7b8f5d6f1d45d9f9de1d26e7d3c32aa5af11b488 (diff)
Merge with 2.3.48.
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/config.in5
-rw-r--r--arch/alpha/kernel/alpha_ksyms.c6
-rw-r--r--arch/alpha/kernel/core_tsunami.c35
-rw-r--r--arch/alpha/kernel/irq.c68
-rw-r--r--arch/alpha/kernel/pci_iommu.c53
-rw-r--r--arch/alpha/kernel/process.c109
-rw-r--r--arch/alpha/kernel/proto.h3
-rw-r--r--arch/alpha/kernel/semaphore.c4
-rw-r--r--arch/alpha/kernel/setup.c4
-rw-r--r--arch/alpha/kernel/smp.c41
-rw-r--r--arch/alpha/kernel/sys_cabriolet.c26
-rw-r--r--arch/alpha/kernel/sys_dp264.c124
-rw-r--r--arch/alpha/kernel/sys_sio.c2
-rw-r--r--arch/alpha/kernel/time.c1
-rw-r--r--arch/alpha/kernel/traps.c4
-rw-r--r--arch/alpha/vmlinux.lds1
-rw-r--r--arch/arm/kernel/armksyms.c1
-rw-r--r--arch/arm/mm/consistent.c6
-rw-r--r--arch/i386/defconfig3
-rw-r--r--arch/i386/kernel/acpi.c37
-rw-r--r--arch/i386/kernel/apm.c14
-rw-r--r--arch/i386/kernel/entry.S2
-rw-r--r--arch/i386/kernel/i386_ksyms.c4
-rw-r--r--arch/i386/kernel/i8259.c36
-rw-r--r--arch/i386/kernel/io_apic.c102
-rw-r--r--arch/i386/kernel/irq.c419
-rw-r--r--arch/i386/kernel/microcode.c46
-rw-r--r--arch/i386/kernel/mpparse.c29
-rw-r--r--arch/i386/kernel/mtrr.c9
-rw-r--r--arch/i386/kernel/process.c9
-rw-r--r--arch/i386/kernel/semaphore.c5
-rw-r--r--arch/i386/kernel/setup.c17
-rw-r--r--arch/i386/kernel/traps.c5
-rw-r--r--arch/i386/mm/init.c4
-rw-r--r--arch/ia64/ia32/binfmt_elf32.c2
-rw-r--r--arch/ia64/ia32/ia32_entry.S2
-rw-r--r--arch/ia64/ia32/sys_ia32.c21
-rw-r--r--arch/ia64/kdb/kdbsupport.c27
-rw-r--r--arch/ia64/kernel/irq.c3
-rw-r--r--arch/ia64/kernel/irq_internal.c2
-rw-r--r--arch/ia64/kernel/ivt.S2
-rw-r--r--arch/ia64/kernel/process.c8
-rw-r--r--arch/ia64/kernel/time.c20
-rw-r--r--arch/ia64/kernel/traps.c116
-rw-r--r--arch/ia64/kernel/unaligned.c47
-rw-r--r--arch/ia64/lib/copy_user.S440
-rw-r--r--arch/mips/defconfig4
-rw-r--r--arch/mips/kernel/irixelf.c21
-rw-r--r--arch/mips/kernel/irq.c20
-rw-r--r--arch/mips/kernel/mips_ksyms.c4
-rw-r--r--arch/mips/kernel/proc.c5
-rw-r--r--arch/mips/kernel/setup.c4
-rw-r--r--arch/mips/sgi/kernel/indy_int.c20
-rw-r--r--arch/mips64/defconfig1
-rw-r--r--arch/mips64/defconfig-ip221
-rw-r--r--arch/mips64/defconfig-ip271
-rw-r--r--arch/mips64/kernel/proc.c7
-rw-r--r--arch/mips64/kernel/setup.c4
-rw-r--r--arch/mips64/sgi-ip22/ip22-int.c20
-rw-r--r--arch/mips64/sgi-ip27/ip27-irq.c22
-rw-r--r--arch/ppc/chrpboot/main.c20
-rw-r--r--arch/ppc/chrpboot/piggyback.c3
-rw-r--r--arch/ppc/coffboot/piggyback.c3
-rw-r--r--arch/ppc/config.in7
-rw-r--r--arch/ppc/configs/common_defconfig26
-rw-r--r--arch/ppc/defconfig26
-rw-r--r--arch/ppc/kernel/Makefile20
-rw-r--r--arch/ppc/kernel/apus_setup.c159
-rw-r--r--arch/ppc/kernel/entry.S4
-rw-r--r--arch/ppc/kernel/hashtable.S80
-rw-r--r--arch/ppc/kernel/head.S7
-rw-r--r--arch/ppc/kernel/irq.c3
-rw-r--r--arch/ppc/kernel/misc.S11
-rw-r--r--arch/ppc/kernel/mk_defs.c1
-rw-r--r--arch/ppc/kernel/pmac_pic.c24
-rw-r--r--arch/ppc/kernel/ppc_htab.c9
-rw-r--r--arch/ppc/kernel/ppc_ksyms.c17
-rw-r--r--arch/ppc/kernel/process.c2
-rw-r--r--arch/ppc/kernel/prom.c1
-rw-r--r--arch/ppc/kernel/setup.c60
-rw-r--r--arch/ppc/kernel/smp.c3
-rw-r--r--arch/ppc/mm/init.c86
-rw-r--r--arch/ppc/mm/mem_pieces.c2
-rw-r--r--arch/ppc/xmon/xmon.c46
-rw-r--r--arch/sparc/boot/Makefile14
-rw-r--r--arch/sparc/kernel/ioport.c40
-rw-r--r--arch/sparc/kernel/irq.c7
-rw-r--r--arch/sparc/kernel/setup.c6
-rw-r--r--arch/sparc/kernel/sparc_ksyms.c4
-rw-r--r--arch/sparc/lib/locks.S21
-rw-r--r--arch/sparc/mm/init.c4
-rw-r--r--arch/sparc64/defconfig6
-rw-r--r--arch/sparc64/kernel/irq.c7
-rw-r--r--arch/sparc64/kernel/setup.c6
-rw-r--r--arch/sparc64/kernel/signal32.c4
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c3
-rw-r--r--arch/sparc64/lib/VIScsum.S693
-rw-r--r--arch/sparc64/lib/VIScsumcopy.S1176
-rw-r--r--arch/sparc64/lib/VIScsumcopyusr.S1162
99 files changed, 3454 insertions, 2377 deletions
diff --git a/arch/alpha/config.in b/arch/alpha/config.in
index 8e44bb0e2..1686fefbc 100644
--- a/arch/alpha/config.in
+++ b/arch/alpha/config.in
@@ -60,6 +60,7 @@ unset CONFIG_ALPHA_LCA CONFIG_ALPHA_APECS CONFIG_ALPHA_CIA
unset CONFIG_ALPHA_T2 CONFIG_ALPHA_PYXIS CONFIG_ALPHA_POLARIS
unset CONFIG_ALPHA_TSUNAMI CONFIG_ALPHA_MCPCIA
unset CONFIG_ALPHA_IRONGATE
+unset CONFIG_ALPHA_BROKEN_IRQ_MASK
# Most of these machines have ISA slots; not exactly sure which don't,
# and this doesn't activate hordes of code, so do it always.
@@ -178,6 +179,10 @@ if [ "$CONFIG_ALPHA_XL" = "y" ]
then
define_bool CONFIG_ALPHA_AVANTI y
fi
+if [ "$CONFIG_ALPHA_GENERIC" = "y" -o "$CONFIG_ALPHA_PC164" = "y" ]
+then
+ define_bool CONFIG_ALPHA_BROKEN_IRQ_MASK y
+fi
if [ "$CONFIG_ALPHA_SABLE" = "y" -o "$CONFIG_ALPHA_RAWHIDE" = "y" \
-o "$CONFIG_ALPHA_DP264" = "y" -o "$CONFIG_ALPHA_GENERIC" = "y" ]
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
index 725dd4f51..25d9583dd 100644
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -98,6 +98,8 @@ EXPORT_SYMBOL(__memset);
EXPORT_SYMBOL(__memsetw);
EXPORT_SYMBOL(__constant_c_memset);
+EXPORT_SYMBOL(__direct_map_base);
+EXPORT_SYMBOL(__direct_map_size);
EXPORT_SYMBOL(pci_alloc_consistent);
EXPORT_SYMBOL(pci_free_consistent);
EXPORT_SYMBOL(pci_map_single);
@@ -144,6 +146,10 @@ EXPORT_SYMBOL(alpha_fp_emul_imprecise);
EXPORT_SYMBOL(alpha_fp_emul);
#endif
+#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK
+EXPORT_SYMBOL(__min_ipl);
+#endif
+
/*
* The following are specially called from the uaccess assembly stubs.
*/
diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c
index 5fa112173..1452b6336 100644
--- a/arch/alpha/kernel/core_tsunami.c
+++ b/arch/alpha/kernel/core_tsunami.c
@@ -24,7 +24,6 @@
#include "proto.h"
#include "pci_impl.h"
-int TSUNAMI_bootcpu;
static struct
{
@@ -210,17 +209,23 @@ void
tsunami_pci_tbi(struct pci_controler *hose, dma_addr_t start, dma_addr_t end)
{
tsunami_pchip *pchip = hose->index ? TSUNAMI_pchip1 : TSUNAMI_pchip0;
-
- wmb();
+ volatile unsigned long *csr;
+ unsigned long value;
/* We can invalidate up to 8 tlb entries in a go. The flush
matches against <31:16> in the pci address. */
+ csr = &pchip->tlbia.csr;
if (((start ^ end) & 0xffff0000) == 0)
- pchip->tlbiv.csr = (start & 0xffff0000) >> 12;
- else
- pchip->tlbia.csr = 0;
+ csr = &pchip->tlbiv.csr;
+ /* For TBIA, it doesn't matter what value we write. For TBI,
+ it's the shifted tag bits. */
+ value = (start & 0xffff0000) >> 12;
+
+ wmb();
+ *csr = value;
mb();
+ *csr;
}
#ifdef NXM_MACHINE_CHECKS_ON_TSUNAMI
@@ -229,7 +234,7 @@ tsunami_probe_read(volatile unsigned long *vaddr)
{
long dont_care, probe_result;
int cpu = smp_processor_id();
- int s = swpipl(6); /* Block everything but machine checks. */
+ int s = swpipl(IPL_MCHECK - 1);
mcheck_taken(cpu) = 0;
mcheck_expected(cpu) = 1;
@@ -338,9 +343,13 @@ tsunami_init_one_pchip(tsunami_pchip *pchip, int index)
* because of an idiot-syncrasy of the CYPRESS chip. It may
* respond to a PCI bus address in the last 1MB of the 4GB
* address range.
+ *
+ * Note that the TLB lookup logic uses bitwise concatenation,
+ * not addition, so the required arena alignment is based on
+ * the size of the window.
*/
- hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, PAGE_SIZE);
- hose->sg_pci = iommu_arena_new(0xc0000000, 0x08000000, PAGE_SIZE);
+ hose->sg_isa = iommu_arena_new(0x00800000, 0x00800000, 0x00800000>>10);
+ hose->sg_pci = iommu_arena_new(0xc0000000, 0x08000000, 0x08000000>>10);
__direct_map_base = 0x40000000;
__direct_map_size = 0x80000000;
@@ -399,8 +408,6 @@ tsunami_init_arch(void)
printk("%s: CSR_STR 0x%lx\n", FN, TSUNAMI_dchip->str.csr);
printk("%s: CSR_DREV 0x%lx\n", FN, TSUNAMI_dchip->drev.csr);
#endif
- TSUNAMI_bootcpu = __hard_smp_processor_id();
-
/* With multiple PCI busses, we play with I/O as physical addrs. */
ioport_resource.end = ~0UL;
iomem_resource.end = ~0UL;
@@ -444,12 +451,10 @@ tsunami_kill_arch(int mode)
static inline void
tsunami_pci_clr_err_1(tsunami_pchip *pchip)
{
- unsigned int jd;
-
- jd = pchip->perror.csr;
+ pchip->perror.csr;
pchip->perror.csr = 0x040;
mb();
- jd = pchip->perror.csr;
+ pchip->perror.csr;
}
static inline void
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 3d593acf3..613a633ba 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -48,6 +48,12 @@ unsigned long __irq_attempt[NR_IRQS];
#define ACTUAL_NR_IRQS NR_IRQS
#endif
+/* Hack minimum IPL during interupt processing for broken hardware. */
+
+#ifdef CONFIG_ALPHA_BROKEN_IRQ_MASK
+int __min_ipl;
+#endif
+
/*
* Performance counter hook. A module can override this to
* do something useful.
@@ -283,30 +289,32 @@ handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
struct irqaction *action)
{
int status, cpu = smp_processor_id();
- unsigned long ipl;
+ int old_ipl, ipl;
kstat.irqs[cpu][irq]++;
irq_enter(cpu, irq);
status = 1; /* Force the "do bottom halves" bit */
- ipl = rdps() & 7;
+ old_ipl = ipl = getipl();
do {
- unsigned long newipl = (action->flags & SA_INTERRUPT ? 7 : 0);
- if (newipl != ipl) {
- swpipl(newipl);
- ipl = newipl;
+ int new_ipl = IPL_MIN;
+ if (action->flags & SA_INTERRUPT)
+ new_ipl = IPL_MAX;
+ if (new_ipl != ipl) {
+ setipl(new_ipl);
+ ipl = new_ipl;
}
status |= action->flags;
action->handler(irq, action->dev_id, regs);
action = action->next;
} while (action);
+ if (ipl != old_ipl)
+ setipl(old_ipl);
+
if (status & SA_SAMPLE_RANDOM)
add_interrupt_randomness(irq);
- if (ipl == 0)
- __cli();
-
irq_exit(cpu, irq);
return status;
@@ -325,7 +333,7 @@ disable_irq_nosync(unsigned int irq)
spin_lock_irqsave(&irq_controller_lock, flags);
if (!irq_desc[irq].depth++) {
- irq_desc[irq].status |= IRQ_DISABLED;
+ irq_desc[irq].status |= IRQ_DISABLED | IRQ_MASKED;
irq_desc[irq].handler->disable(irq);
}
spin_unlock_irqrestore(&irq_controller_lock, flags);
@@ -356,14 +364,15 @@ enable_irq(unsigned int irq)
switch (irq_desc[irq].depth) {
case 1:
{
- unsigned int status = irq_desc[irq].status & ~IRQ_DISABLED;
- irq_desc[irq].status = status;
- if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
- irq_desc[irq].status = status | IRQ_REPLAY;
+ unsigned int status = irq_desc[irq].status;
+ status &= ~(IRQ_DISABLED | IRQ_MASKED);
+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ status |= IRQ_REPLAY;
/* ??? We can't re-send on (most?) alpha hw.
hw_resend_irq(irq_desc[irq].handler,irq); */
}
+ irq_desc[irq].status = status;
irq_desc[irq].handler->enable(irq);
/* fall-through */
}
@@ -425,7 +434,7 @@ setup_irq(unsigned int irq, struct irqaction * new)
if (!shared) {
irq_desc[irq].depth = 0;
- irq_desc[irq].status &= ~IRQ_DISABLED;
+ irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_MASKED);
irq_desc[irq].handler->startup(irq);
}
spin_unlock_irqrestore(&irq_controller_lock,flags);
@@ -500,7 +509,7 @@ free_irq(unsigned int irq, void *dev_id)
/* Found - now remove it from the list of entries. */
*pp = action->next;
if (!irq_desc[irq].action) {
- irq_desc[irq].status |= IRQ_DISABLED;
+ irq_desc[irq].status |= IRQ_DISABLED|IRQ_MASKED;
irq_desc[irq].handler->shutdown(irq);
}
spin_unlock_irqrestore(&irq_controller_lock,flags);
@@ -669,7 +678,7 @@ __global_cli(void)
* Maximize ipl. If ipl was previously 0 and if this thread
* is not in an irq, then take global_irq_lock.
*/
- if (swpipl(7) == 0 && !local_irq_count(cpu))
+ if (swpipl(IPL_MAX) == IPL_MIN && !local_irq_count(cpu))
get_irqlock(cpu, where);
}
@@ -841,13 +850,25 @@ handle_irq(int irq, struct pt_regs * regs)
desc = irq_desc + irq;
spin_lock_irq(&irq_controller_lock); /* mask also the RTC */
desc->handler->ack(irq);
+ status = desc->status;
+
+#ifndef CONFIG_SMP
+ /* Look for broken irq masking. */
+ if (status & IRQ_MASKED) {
+ static unsigned long last_printed;
+ if (time_after(jiffies, last_printed+HZ)) {
+ printk(KERN_CRIT "Mask didn't work for irq %d!\n", irq);
+ last_printed = jiffies;
+ }
+ }
+#endif
/*
* REPLAY is when Linux resends an IRQ that was dropped earlier.
* WAITING is used by probe to mark irqs that are being tested.
*/
- status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
- status |= IRQ_PENDING; /* we _want_ to handle it */
+ status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ status |= IRQ_PENDING | IRQ_MASKED; /* we _want_ to handle it */
/*
* If the IRQ is disabled for whatever reason, we cannot
@@ -890,9 +911,12 @@ handle_irq(int irq, struct pt_regs * regs)
desc->status &= ~IRQ_PENDING;
spin_unlock(&irq_controller_lock);
}
- desc->status &= ~IRQ_INPROGRESS;
- if (!(desc->status & IRQ_DISABLED))
+ status = desc->status & ~IRQ_INPROGRESS;
+ if (!(status & IRQ_DISABLED)) {
+ status &= ~IRQ_MASKED;
desc->handler->end(irq);
+ }
+ desc->status = status;
spin_unlock(&irq_controller_lock);
}
@@ -1056,7 +1080,7 @@ do_entInt(unsigned long type, unsigned long vector, unsigned long la_ptr,
#ifdef CONFIG_SMP
cpu_data[smp_processor_id()].smp_local_irq_count++;
smp_percpu_timer_interrupt(&regs);
- if (smp_processor_id() == smp_boot_cpuid)
+ if (smp_processor_id() == boot_cpuid)
#endif
handle_irq(RTC_IRQ, &regs);
return;
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 72ce8bcb6..f5a9bd990 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -133,6 +133,9 @@ pci_map_single(struct pci_dev *pdev, void *cpu_addr, long size, int direction)
unsigned long paddr;
dma_addr_t ret;
+ if (direction == PCI_DMA_NONE)
+ BUG();
+
paddr = virt_to_phys(cpu_addr);
/* First check to see if we can use the direct map window. */
@@ -186,12 +189,15 @@ pci_map_single(struct pci_dev *pdev, void *cpu_addr, long size, int direction)
wrote there. */
void
-pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, long size, int direction)
+pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, long size,
+ int direction)
{
struct pci_controler *hose = pdev ? pdev->sysdata : pci_isa_hose;
struct pci_iommu_arena *arena;
long dma_ofs, npages;
+ if (direction == PCI_DMA_NONE)
+ BUG();
if (dma_addr >= __direct_map_base
&& dma_addr < __direct_map_base + __direct_map_size) {
@@ -247,7 +253,8 @@ pci_alloc_consistent(struct pci_dev *pdev, long size, dma_addr_t *dma_addrp)
}
memset(cpu_addr, 0, size);
- *dma_addrp = pci_map_single(pdev, cpu_addr, size, PCI_DMA_BIDIRECTIONAL);
+ *dma_addrp = pci_map_single(pdev, cpu_addr, size,
+ PCI_DMA_BIDIRECTIONAL);
if (*dma_addrp == 0) {
free_pages((unsigned long)cpu_addr, order);
return NULL;
@@ -424,13 +431,17 @@ sg_fill(struct scatterlist *leader, struct scatterlist *end,
}
int
-pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, int direction)
+pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents,
+ int direction)
{
struct scatterlist *start, *end, *out;
struct pci_controler *hose;
struct pci_iommu_arena *arena;
dma_addr_t max_dma;
+ if (direction == PCI_DMA_NONE)
+ BUG();
+
/* Fast path single entry scatterlists. */
if (nents == 1) {
sg->dma_length = sg->length;
@@ -499,7 +510,8 @@ error:
above. */
void
-pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, int direction)
+pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents,
+ int direction)
{
struct pci_controler *hose;
struct pci_iommu_arena *arena;
@@ -507,6 +519,9 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, int direct
dma_addr_t max_dma;
dma_addr_t fstart, fend;
+ if (direction == PCI_DMA_NONE)
+ BUG();
+
if (! alpha_mv.mv_pci_tbi)
return;
@@ -555,3 +570,33 @@ pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, int nents, int direct
DBGA("pci_unmap_sg: %d entries\n", nents - (end - sg));
}
+
+/* Return whether the given PCI device DMA address mask can be
+ supported properly. */
+
+int
+pci_dma_supported(struct pci_dev *pdev, dma_addr_t mask)
+{
+ struct pci_controler *hose;
+ struct pci_iommu_arena *arena;
+
+ /* If there exists a direct map, and the mask fits either
+ MAX_DMA_ADDRESS defined such that GFP_DMA does something
+ useful, or the total system memory as shifted by the
+ map base. */
+ if (__direct_map_size != 0
+ && (__direct_map_base + MAX_DMA_ADDRESS-IDENT_ADDR-1 <= mask
+ || __direct_map_base + (max_low_pfn<<PAGE_SHIFT)-1 <= mask))
+ return 1;
+
+ /* Check that we have a scatter-gather arena that fits. */
+ hose = pdev ? pdev->sysdata : pci_isa_hose;
+ arena = hose->sg_isa;
+ if (arena && arena->dma_base + arena->size <= mask)
+ return 1;
+ arena = hose->sg_pci;
+ if (arena && arena->dma_base + arena->size <= mask)
+ return 1;
+
+ return 0;
+}
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 31a818209..2e462550f 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -90,55 +90,82 @@ cpu_idle(void)
}
}
+
+struct halt_info {
+ int mode;
+ char *restart_cmd;
+};
+
static void
-common_shutdown(int mode, char *restart_cmd)
+common_shutdown_1(void *generic_ptr)
{
- /* The following currently only has any effect on SRM. We should
- fix MILO to understand it. Should be pretty easy. Also we can
- support RESTART2 via the ipc_buffer machinations pictured below,
- which SRM ignores. */
+ struct halt_info *how = (struct halt_info *)generic_ptr;
+ struct percpu_struct *cpup;
+ unsigned long *pflags, flags;
+ int cpuid = smp_processor_id();
- if (alpha_using_srm) {
- struct percpu_struct *cpup;
- unsigned long flags;
-
- cpup = (struct percpu_struct *)
- ((unsigned long)hwrpb + hwrpb->processor_offset);
-
- flags = cpup->flags;
-
- /* Clear reason to "default"; clear "bootstrap in progress". */
- flags &= ~0x00ff0001UL;
-
- if (mode == LINUX_REBOOT_CMD_RESTART) {
- if (!restart_cmd) {
- flags |= 0x00020000UL; /* "cold bootstrap" */
- cpup->ipc_buffer[0] = 0;
- } else {
- flags |= 0x00030000UL; /* "warm bootstrap" */
- strncpy((char *)cpup->ipc_buffer, restart_cmd,
- sizeof(cpup->ipc_buffer));
- }
+ /* No point in taking interrupts anymore. */
+ __cli();
+
+ cpup = (struct percpu_struct *)
+ ((unsigned long)hwrpb + hwrpb->processor_offset
+ + hwrpb->processor_size * cpuid);
+ pflags = &cpup->flags;
+ flags = *pflags;
+
+ /* Clear reason to "default"; clear "bootstrap in progress". */
+ flags &= ~0x00ff0001UL;
+
+#ifdef __SMP__
+ /* Secondaries halt here. */
+ if (cpuid != boot_cpuid) {
+ flags |= 0x00040000UL; /* "remain halted" */
+ *pflags = flags;
+ clear_bit(cpuid, &cpu_present_mask);
+ halt();
+ }
+#endif
+
+ if (how->mode == LINUX_REBOOT_CMD_RESTART) {
+ if (!how->restart_cmd) {
+ flags |= 0x00020000UL; /* "cold bootstrap" */
} else {
- flags |= 0x00040000UL; /* "remain halted" */
+ /* For SRM, we could probably set environment
+ variables to get this to work. We'd have to
+ delay this until after srm_paging_stop unless
+ we ever got srm_fixup working.
+
+ At the moment, SRM will use the last boot device,
+ but the file and flags will be the defaults, when
+ doing a "warm" bootstrap. */
+ flags |= 0x00030000UL; /* "warm bootstrap" */
}
-
- cpup->flags = flags;
- mb();
+ } else {
+ flags |= 0x00040000UL; /* "remain halted" */
+ }
+ *pflags = flags;
- /* reset_for_srm(); */
- set_hae(srm_hae);
+#ifdef __SMP__
+ /* Wait for the secondaries to halt. */
+ clear_bit(boot_cpuid, &cpu_present_mask);
+ while (cpu_present_mask)
+ barrier();
+#endif
+ /* If booted from SRM, reset some of the original environment. */
+ if (alpha_using_srm) {
#ifdef CONFIG_DUMMY_CONSOLE
- /* This has the effect of reseting the VGA video origin. */
+ /* This has the effect of resetting the VGA video origin. */
take_over_console(&dummy_con, 0, MAX_NR_CONSOLES-1, 1);
#endif
+ /* reset_for_srm(); */
+ set_hae(srm_hae);
}
if (alpha_mv.kill_arch)
- alpha_mv.kill_arch(mode);
+ alpha_mv.kill_arch(how->mode);
- if (!alpha_using_srm && mode != LINUX_REBOOT_CMD_RESTART) {
+ if (! alpha_using_srm && how->mode != LINUX_REBOOT_CMD_RESTART) {
/* Unfortunately, since MILO doesn't currently understand
the hwrpb bits above, we can't reliably halt the
processor and keep it halted. So just loop. */
@@ -151,6 +178,18 @@ common_shutdown(int mode, char *restart_cmd)
halt();
}
+static void
+common_shutdown(int mode, char *restart_cmd)
+{
+ struct halt_info args;
+ args.mode = mode;
+ args.restart_cmd = restart_cmd;
+#ifdef __SMP__
+ smp_call_function(common_shutdown_1, &args, 1, 0);
+#endif
+ common_shutdown_1(&args);
+}
+
void
machine_restart(char *restart_cmd)
{
diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h
index dd63de4d2..a8859059b 100644
--- a/arch/alpha/kernel/proto.h
+++ b/arch/alpha/kernel/proto.h
@@ -74,13 +74,14 @@ extern void tsunami_pci_tbi(struct pci_controler *, dma_addr_t, dma_addr_t);
/* setup.c */
extern unsigned long srm_hae;
+extern int boot_cpuid;
/* smp.c */
extern void setup_smp(void);
extern int smp_info(char *buffer);
extern void handle_ipi(struct pt_regs *);
extern void smp_percpu_timer_interrupt(struct pt_regs *);
-extern int smp_boot_cpuid;
+extern unsigned long cpu_present_mask;
/* bios32.c */
/* extern void reset_for_srm(void); */
diff --git a/arch/alpha/kernel/semaphore.c b/arch/alpha/kernel/semaphore.c
index d4793ecb4..dc5209531 100644
--- a/arch/alpha/kernel/semaphore.c
+++ b/arch/alpha/kernel/semaphore.c
@@ -173,7 +173,7 @@ __down_read(struct rw_semaphore *sem, int count)
" subl %0,1,%0\n"
" stl_c %2,%1\n"
" bne %2,2f\n"
- ".section .text2,\"ax\"\n"
+ ".subsection 2\n"
"2: br 1b\n"
".previous"
: "=r"(count), "=m"(sem->count), "=r"(tmp)
@@ -226,7 +226,7 @@ __down_write(struct rw_semaphore *sem, int count)
" ldah %0,%3(%0)\n"
" stl_c %2,%1\n"
" bne %2,2f\n"
- ".section .text2,\"ax\"\n"
+ ".subsection 2\n"
"2: br 1b\n"
".previous"
: "=r"(count), "=m"(sem->count), "=r"(tmp)
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 112976bcb..1311d939b 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -50,6 +50,9 @@
struct hwrpb_struct *hwrpb;
unsigned long srm_hae;
+/* Which processor we booted from. */
+int boot_cpuid;
+
#ifdef CONFIG_ALPHA_GENERIC
struct alpha_machine_vector alpha_mv;
int alpha_using_srm;
@@ -351,6 +354,7 @@ setup_arch(char **cmdline_p)
char *type_name, *var_name, *p;
hwrpb = (struct hwrpb_struct*) __va(INIT_HWRPB->phys_addr);
+ boot_cpuid = hard_smp_processor_id();
/*
* Locate the command line.
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index e3ae30973..be1a6440e 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -62,11 +62,13 @@ spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
/* Set to a secondary's cpuid when it comes online. */
static unsigned long smp_secondary_alive;
-unsigned long cpu_present_mask; /* Which cpus ids came online. */
-static unsigned long __cpu_present_mask __initdata = 0; /* cpu reported in the hwrpb */
+/* Which cpus ids came online. */
+unsigned long cpu_present_mask;
+
+/* cpus reported in the hwrpb */
+static unsigned long hwrpb_cpu_present_mask __initdata = 0;
static int max_cpus = -1; /* Command-line limitation. */
-int smp_boot_cpuid; /* Which processor we booted from. */
int smp_num_probed; /* Internal processor count */
int smp_num_cpus = 1; /* Number that came online. */
int smp_threads_ready; /* True once the per process idle is forked. */
@@ -486,10 +488,9 @@ setup_smp(void)
struct percpu_struct *cpubase, *cpu;
int i;
- smp_boot_cpuid = hard_smp_processor_id();
- if (smp_boot_cpuid != 0) {
+ if (boot_cpuid != 0) {
printk(KERN_WARNING "SMP: Booting off cpu %d instead of 0?\n",
- smp_boot_cpuid);
+ boot_cpuid);
}
if (hwrpb->nr_processors > 1) {
@@ -508,7 +509,7 @@ setup_smp(void)
if ((cpu->flags & 0x1cc) == 0x1cc) {
smp_num_probed++;
/* Assume here that "whami" == index */
- __cpu_present_mask |= (1L << i);
+ hwrpb_cpu_present_mask |= (1L << i);
cpu->pal_revision = boot_cpu_palrev;
}
@@ -519,12 +520,12 @@ setup_smp(void)
}
} else {
smp_num_probed = 1;
- __cpu_present_mask = (1L << smp_boot_cpuid);
+ hwrpb_cpu_present_mask = (1L << boot_cpuid);
}
- cpu_present_mask = 1L << smp_boot_cpuid;
+ cpu_present_mask = 1L << boot_cpuid;
printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n",
- smp_num_probed, __cpu_present_mask);
+ smp_num_probed, hwrpb_cpu_present_mask);
}
/*
@@ -541,13 +542,13 @@ smp_boot_cpus(void)
memset(__cpu_logical_map, -1, sizeof(__cpu_logical_map));
memset(ipi_data, 0, sizeof(ipi_data));
- __cpu_number_map[smp_boot_cpuid] = 0;
- __cpu_logical_map[0] = smp_boot_cpuid;
- current->processor = smp_boot_cpuid;
+ __cpu_number_map[boot_cpuid] = 0;
+ __cpu_logical_map[0] = boot_cpuid;
+ current->processor = boot_cpuid;
- smp_store_cpu_info(smp_boot_cpuid);
+ smp_store_cpu_info(boot_cpuid);
smp_tune_scheduling();
- smp_setup_percpu_timer(smp_boot_cpuid);
+ smp_setup_percpu_timer(boot_cpuid);
init_idle();
@@ -565,10 +566,10 @@ smp_boot_cpus(void)
cpu_count = 1;
for (i = 0; i < NR_CPUS; i++) {
- if (i == smp_boot_cpuid)
+ if (i == boot_cpuid)
continue;
- if (((__cpu_present_mask >> i) & 1) == 0)
+ if (((hwrpb_cpu_present_mask >> i) & 1) == 0)
continue;
if (smp_boot_one_cpu(i, cpu_count))
@@ -1023,7 +1024,7 @@ debug_spin_lock(spinlock_t * lock, const char *base_file, int line_no)
" stl_c %0,%1\n"
" beq %0,3f\n"
"4: mb\n"
- ".section .text2,\"ax\"\n"
+ ".subsection 2\n"
"2: ldl %0,%1\n"
" subq %2,1,%2\n"
"3: blt %2,4b\n"
@@ -1097,7 +1098,7 @@ void write_lock(rwlock_t * lock)
" stl_c %1,%0\n"
" beq %1,6f\n"
"4: mb\n"
- ".section .text2,\"ax\"\n"
+ ".subsection 2\n"
"6: blt %3,4b # debug\n"
" subl %3,1,%3 # debug\n"
" ldl %1,%0\n"
@@ -1140,7 +1141,7 @@ void read_lock(rwlock_t * lock)
" stl_c %1,%0;"
" beq %1,6f;"
"4: mb\n"
- ".section .text2,\"ax\"\n"
+ ".subsection 2\n"
"6: ldl %1,%0;"
" blt %2,4b # debug\n"
" subl %2,1,%2 # debug\n"
diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c
index 1432496d8..acea58d1e 100644
--- a/arch/alpha/kernel/sys_cabriolet.c
+++ b/arch/alpha/kernel/sys_cabriolet.c
@@ -126,6 +126,30 @@ cabriolet_init_irq(void)
setup_irq(16+4, &isa_cascade_irqaction);
}
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_PC164)
+static void
+pc164_device_interrupt(unsigned long v, struct pt_regs *r)
+{
+ /* In theory, the PC164 has the same interrupt hardware as
+ the other Cabriolet based systems. However, something
+ got screwed up late in the development cycle which broke
+ the interrupt masking hardware. Repeat, it is not
+ possible to mask and ack interrupts. At all.
+
+ In an attempt to work around this, while processing
+ interrupts, we do not allow the IPL to drop below what
+ it is currently. This prevents the possibility of
+ recursion.
+
+ ??? Another option might be to force all PCI devices
+ to use edge triggered rather than level triggered
+ interrupts. That might be too invasive though. */
+
+ __min_ipl = getipl();
+ cabriolet_device_interrupt(v, r);
+ __min_ipl = 0;
+}
+#endif
/*
* The EB66+ is very similar to the EB66 except that it does not have
@@ -379,7 +403,7 @@ struct alpha_machine_vector pc164_mv __initmv = {
min_mem_address: CIA_DEFAULT_MEM_BASE,
nr_irqs: 35,
- device_interrupt: cabriolet_device_interrupt,
+ device_interrupt: pc164_device_interrupt,
init_arch: cia_init_arch,
init_irq: cabriolet_init_irq,
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index fbebdd5a5..7414b8cc2 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -33,94 +33,80 @@
#include "machvec_impl.h"
+/* Note mask bit is true for ENABLED irqs. */
static unsigned long cached_irq_mask;
-
-#define TSUNAMI_SET_IRQ_MASK(cpu, value) \
-do { \
- volatile unsigned long *csr; \
- csr = &TSUNAMI_cchip->dim##cpu##.csr; \
- *csr = (value); \
- mb(); \
- *csr; \
-} while(0)
-
-static inline void
-do_flush_irq_mask(unsigned long value)
-{
- switch (TSUNAMI_bootcpu) {
- case 0:
- TSUNAMI_SET_IRQ_MASK(0, value);
- break;
- case 1:
- TSUNAMI_SET_IRQ_MASK(1, value);
- break;
- case 2:
- TSUNAMI_SET_IRQ_MASK(2, value);
- break;
- case 3:
- TSUNAMI_SET_IRQ_MASK(3, value);
- break;
- }
-}
-
-#ifdef CONFIG_SMP
-static inline void
-do_flush_smp_irq_mask(unsigned long value)
-{
- extern unsigned long cpu_present_mask;
- unsigned long other_cpus = cpu_present_mask & ~(1L << TSUNAMI_bootcpu);
-
- if (other_cpus & 1)
- TSUNAMI_SET_IRQ_MASK(0, value);
- if (other_cpus & 2)
- TSUNAMI_SET_IRQ_MASK(1, value);
- if (other_cpus & 4)
- TSUNAMI_SET_IRQ_MASK(2, value);
- if (other_cpus & 8)
- TSUNAMI_SET_IRQ_MASK(3, value);
-}
-#endif
-
static void
-dp264_flush_irq_mask(unsigned long mask)
+tsunami_update_irq_hw(unsigned long mask, unsigned long isa_enable)
{
- unsigned long value;
+ register tsunami_cchip *cchip = TSUNAMI_cchip;
+ register int bcpu = boot_cpuid;
#ifdef CONFIG_SMP
- do_flush_smp_irq_mask(mask);
+ register unsigned long cpm = cpu_present_mask;
+ volatile unsigned long *dim0, *dim1, *dim2, *dim3;
+ unsigned long mask0, mask1, mask2, mask3, maskB, dummy;
+
+ mask0 = mask1 = mask2 = mask3 = mask;
+ maskB = mask | isa_enable;
+ if (bcpu == 0) mask0 = maskB;
+ if (bcpu == 1) mask1 = maskB;
+ if (bcpu == 2) mask2 = maskB;
+ if (bcpu == 3) mask3 = maskB;
+
+ dim0 = &cchip->dim0.csr;
+ dim1 = &cchip->dim1.csr;
+ dim2 = &cchip->dim2.csr;
+ dim3 = &cchip->dim3.csr;
+ if ((cpm & 1) == 0) dim0 = &dummy;
+ if ((cpm & 2) == 0) dim1 = &dummy;
+ if ((cpm & 4) == 0) dim2 = &dummy;
+ if ((cpm & 8) == 0) dim3 = &dummy;
+
+ *dim0 = mask0;
+ *dim1 = mask1;
+ *dim2 = mask2;
+ *dim3 = mask3;
+ mb();
+ *dim0;
+ *dim1;
+ *dim2;
+ *dim3;
+#else
+ volatile unsigned long *dimB = &cchip->dim1.csr;
+ if (bcpu == 0) dimB = &cchip->dim0.csr;
+ if (bcpu == 2) dimB = &cchip->dim2.csr;
+ if (bcpu == 3) dimB = &cchip->dim3.csr;
+ *dimB = mask | isa_enable;
+ mb();
+ *dimB;
#endif
-
- value = mask | (1UL << 55) | 0xffff; /* isa irqs always enabled */
- do_flush_irq_mask(value);
}
-static void
-clipper_flush_irq_mask(unsigned long mask)
+static inline void
+dp264_update_irq_hw(unsigned long mask)
{
- unsigned long value;
-
- value = mask >> 16;
-#ifdef CONFIG_SMP
- do_flush_smp_irq_mask(value);
-#endif
+ tsunami_update_irq_hw(mask, (1UL << 55) | 0xffff);
+}
- value = value | (1UL << 55); /* master ISA enable */
- do_flush_irq_mask(value);
+static inline void
+clipper_update_irq_hw(unsigned long mask)
+{
+ tsunami_update_irq_hw(mask, 1UL << 55);
}
static inline void
dp264_enable_irq(unsigned int irq)
{
cached_irq_mask |= 1UL << irq;
- dp264_flush_irq_mask(cached_irq_mask);
+ dp264_update_irq_hw(cached_irq_mask);
}
static void
dp264_disable_irq(unsigned int irq)
{
cached_irq_mask &= ~(1UL << irq);
- dp264_flush_irq_mask(cached_irq_mask);
+ dp264_update_irq_hw(cached_irq_mask);
}
static unsigned int
@@ -134,14 +120,14 @@ static inline void
clipper_enable_irq(unsigned int irq)
{
cached_irq_mask |= 1UL << irq;
- clipper_flush_irq_mask(cached_irq_mask);
+ clipper_update_irq_hw(cached_irq_mask);
}
static void
clipper_disable_irq(unsigned int irq)
{
cached_irq_mask &= ~(1UL << irq);
- clipper_flush_irq_mask(cached_irq_mask);
+ clipper_update_irq_hw(cached_irq_mask);
}
static unsigned int
@@ -271,7 +257,7 @@ dp264_init_irq(void)
if (alpha_using_srm)
alpha_mv.device_interrupt = dp264_srm_device_interrupt;
- dp264_flush_irq_mask(0UL);
+ dp264_update_irq_hw(0UL);
init_i8259a_irqs();
init_rtc_irq();
@@ -289,7 +275,7 @@ clipper_init_irq(void)
if (alpha_using_srm)
alpha_mv.device_interrupt = clipper_srm_device_interrupt;
- clipper_flush_irq_mask(0UL);
+ clipper_update_irq_hw(0UL);
init_i8259a_irqs();
init_rtc_irq();
diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c
index ccdcf3bdb..0230ec6d9 100644
--- a/arch/alpha/kernel/sys_sio.c
+++ b/arch/alpha/kernel/sys_sio.c
@@ -391,7 +391,7 @@ struct alpha_machine_vector xl_mv __initmv = {
nr_irqs: 16,
device_interrupt: isa_device_interrupt,
- init_arch: lca_init_arch,
+ init_arch: apecs_init_arch,
init_irq: sio_init_irq,
init_rtc: common_init_rtc,
init_pci: noname_init_pci,
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 8211045e8..d7b5cee8c 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -22,7 +22,6 @@
* fixed algorithm in do_gettimeofday() for calculating the precise time
* from processor cycle counter (now taking lost_ticks into account)
*/
-#include <linux/config.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 36b0cc43a..828044b24 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -215,8 +215,10 @@ do_entIF(unsigned long type, unsigned long a1,
/* EV4 does not implement anything except normal
rounding. Everything else will come here as
an illegal instruction. Emulate them. */
- if (alpha_fp_emul(regs.pc - 4))
+ if (alpha_fp_emul(regs.pc)) {
+ regs.pc += 4;
return;
+ }
}
send_sig(SIGILL, current, 1);
break;
diff --git a/arch/alpha/vmlinux.lds b/arch/alpha/vmlinux.lds
index 4b49a5369..4eaac4e42 100644
--- a/arch/alpha/vmlinux.lds
+++ b/arch/alpha/vmlinux.lds
@@ -5,7 +5,6 @@ SECTIONS
. = 0xfffffc0000310000;
_text = .;
.text : { *(.text) }
- .text2 : { *(.text2) }
_etext = .;
/* Exception table */
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 63d4631a4..c445daeee 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -165,7 +165,6 @@ EXPORT_SYMBOL_NOVERS(strncmp);
EXPORT_SYMBOL_NOVERS(strchr);
EXPORT_SYMBOL_NOVERS(strlen);
EXPORT_SYMBOL_NOVERS(strnlen);
-EXPORT_SYMBOL_NOVERS(strspn);
EXPORT_SYMBOL_NOVERS(strpbrk);
EXPORT_SYMBOL_NOVERS(strtok);
EXPORT_SYMBOL_NOVERS(strrchr);
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index 8673d6c1d..7101b2936 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -23,7 +23,6 @@ void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle)
{
int order;
unsigned long page;
- struct vm_struct *area;
void *ret;
if (in_interrupt())
@@ -40,15 +39,10 @@ void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle)
*dma_handle = virt_to_bus((void *)page);
- area = get_vm_area(size, VM_IOREMAP); /* maybe new type? */
- if (!area)
- goto no_area;
-
ret = __ioremap(virt_to_phys((void *)page), PAGE_SIZE << order, 0);
if (ret)
return ret;
-no_area:
free_pages(page, order);
no_page:
BUG();
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 4ca545255..34b453c2c 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -109,10 +109,12 @@ CONFIG_BLK_DEV_CMD640=y
# CONFIG_BLK_DEV_ISAPNP is not set
CONFIG_BLK_DEV_RZ1000=y
CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
# CONFIG_BLK_DEV_IDEDMA_PCI is not set
# CONFIG_BLK_DEV_OFFBOARD is not set
# CONFIG_BLK_DEV_AEC6210 is not set
# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_CS5530 is not set
# CONFIG_IDE_CHIPSETS is not set
# CONFIG_BLK_CPQ_DA is not set
@@ -393,6 +395,7 @@ CONFIG_PSMOUSE=y
# CONFIG_WATCHDOG is not set
# CONFIG_NVRAM is not set
# CONFIG_RTC is not set
+# CONFIG_EFI_RTC is not set
#
# Video For Linux
diff --git a/arch/i386/kernel/acpi.c b/arch/i386/kernel/acpi.c
index 9bdd111d1..6228805db 100644
--- a/arch/i386/kernel/acpi.c
+++ b/arch/i386/kernel/acpi.c
@@ -34,6 +34,7 @@
#include <linux/spinlock.h>
#include <linux/ioport.h>
#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/pci.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -379,13 +380,14 @@ static struct acpi_table *__init acpi_map_table(u32 addr)
ioremap((unsigned long) addr, table_size);
}
- if (!table) {
- /* ioremap is a pain, it returns NULL if the
- * table starts within mapped physical memory.
- * Hopefully, no table straddles a mapped/unmapped
- * physical memory boundary, ugh
+ if (!table && addr < virt_to_phys(high_memory)) {
+ /* sometimes we see ACPI tables in low memory
+ * and not reserved by the memory map (E820) code,
+ * who is at fault for this? BIOS?
*/
- table = (struct acpi_table*) phys_to_virt(addr);
+ printk(KERN_ERR
+ "ACPI: unreserved table memory @ 0x%p!\n",
+ (void*) addr);
}
}
return table;
@@ -933,9 +935,9 @@ static int acpi_enter_dx(acpi_dstate_t state)
int status = 0;
if (state == ACPI_D0)
- status = pm_send_request(PM_RESUME, (void*) state);
+ status = pm_send_all(PM_RESUME, (void*) state);
else
- status = pm_send_request(PM_SUSPEND, (void*) state);
+ status = pm_send_all(PM_SUSPEND, (void*) state);
return status;
}
@@ -1333,10 +1335,7 @@ static int __init acpi_init(void)
if (acpi_claim_ioports(acpi_facp)) {
printk(KERN_ERR "ACPI: I/O port allocation failed\n");
- if (pci_driver_registered)
- pci_unregister_driver(&acpi_driver);
- acpi_destroy_tables();
- return -ENODEV;
+ goto err_out;
}
if (acpi_facp->sci_int
@@ -1347,12 +1346,7 @@ static int __init acpi_init(void)
acpi_facp)) {
printk(KERN_ERR "ACPI: SCI (IRQ%d) allocation failed\n",
acpi_facp->sci_int);
-
- if (pci_driver_registered)
- pci_unregister_driver(&acpi_driver);
- acpi_destroy_tables();
-
- return -ENODEV;
+ goto err_out;
}
acpi_sysctl = register_sysctl_table(acpi_dir_table, 1);
@@ -1379,6 +1373,13 @@ static int __init acpi_init(void)
pm_idle = acpi_idle;
return 0;
+
+err_out:
+ if (pci_driver_registered)
+ pci_unregister_driver(&acpi_driver);
+ acpi_destroy_tables();
+
+ return -ENODEV;
}
/*
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 4ec5e7993..3d403b93c 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -333,7 +333,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
static struct apm_user * user_list = NULL;
-static char driver_version[] = "1.12"; /* no spaces */
+static char driver_version[] = "1.13"; /* no spaces */
static char * apm_event_name[] = {
"system standby",
@@ -590,7 +590,11 @@ static void apm_cpu_idle(void)
continue;
if (hlt_counter)
continue;
- asm volatile("sti ; hlt" : : : "memory");
+ asm volatile("cli" : : : "memory");
+ if (!current->need_resched)
+ asm volatile("sti ; hlt" : : : "memory");
+ else
+ asm volatile("sti" : : : "memory");
continue;
}
@@ -635,7 +639,7 @@ static void apm_power_off(void)
*/
#ifdef CONFIG_SMP
/* Some bioses don't like being called from CPU != 0 */
- while (cpu_number_map[smp_processor_id()] != 0) {
+ while (cpu_number_map(smp_processor_id()) != 0) {
kernel_thread(apm_magic, NULL,
CLONE_FS | CLONE_FILES | CLONE_SIGHAND | SIGCHLD);
schedule();
@@ -916,7 +920,7 @@ static int send_event(apm_event_t event, struct apm_user *sender)
case APM_CRITICAL_SUSPEND:
case APM_USER_SUSPEND:
/* map all suspends to ACPI D3 */
- if (pm_send_request(PM_SUSPEND, (void *)3)) {
+ if (pm_send_all(PM_SUSPEND, (void *)3)) {
if (apm_bios_info.version > 0x100)
apm_set_power_state(APM_STATE_REJECT);
return 0;
@@ -925,7 +929,7 @@ static int send_event(apm_event_t event, struct apm_user *sender)
case APM_NORMAL_RESUME:
case APM_CRITICAL_RESUME:
/* map all resumes to ACPI D0 */
- (void) pm_send_request(PM_RESUME, (void *)0);
+ (void) pm_send_all(PM_RESUME, (void *)0);
break;
}
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index bcca244c1..0c3cae5d9 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -181,6 +181,8 @@ ret_from_fork:
call SYMBOL_NAME(schedule_tail)
addl $4, %esp
GET_CURRENT(%ebx)
+ testb $0x20,flags(%ebx) # PF_TRACESYS
+ jne tracesys_exit
jmp ret_from_sys_call
/*
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index cad6ceb17..a3389c5f0 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -144,6 +144,4 @@ EXPORT_SYMBOL(screen_info);
EXPORT_SYMBOL(get_wchan);
-
-EXPORT_SYMBOL(local_bh_count);
-EXPORT_SYMBOL(local_irq_count);
+EXPORT_SYMBOL(irq_stat);
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index d54f9b503..ec33f2269 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -127,11 +127,14 @@ void (*interrupt[NR_IRQS])(void) = {
* moves to arch independent land
*/
-void enable_8259A_irq(unsigned int irq);
-void disable_8259A_irq(unsigned int irq);
+static spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
+
+static void end_8259A_irq (unsigned int irq)
+{
+ if (!(irq_desc[irq].status & IRQ_DISABLED))
+ enable_8259A_irq(irq);
+}
-/* shutdown is same as "disable" */
-#define end_8259A_irq enable_8259A_irq
#define shutdown_8259A_irq disable_8259A_irq
void mask_and_ack_8259A(unsigned int);
@@ -149,7 +152,8 @@ static struct hw_interrupt_type i8259A_irq_type = {
enable_8259A_irq,
disable_8259A_irq,
mask_and_ack_8259A,
- end_8259A_irq
+ end_8259A_irq,
+ NULL
};
/*
@@ -183,30 +187,45 @@ unsigned long io_apic_irqs = 0;
void disable_8259A_irq(unsigned int irq)
{
unsigned int mask = 1 << irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
cached_irq_mask |= mask;
if (irq & 8)
outb(cached_A1,0xA1);
else
outb(cached_21,0x21);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
}
void enable_8259A_irq(unsigned int irq)
{
unsigned int mask = ~(1 << irq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
cached_irq_mask &= mask;
if (irq & 8)
outb(cached_A1,0xA1);
else
outb(cached_21,0x21);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
}
int i8259A_irq_pending(unsigned int irq)
{
unsigned int mask = 1<<irq;
+ unsigned long flags;
+ int ret;
+ spin_lock_irqsave(&i8259A_lock, flags);
if (irq < 8)
- return (inb(0x20) & mask);
- return (inb(0xA0) & (mask >> 8));
+ ret = inb(0x20) & mask;
+ else
+ ret = inb(0xA0) & (mask >> 8);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+
+ return ret;
}
void make_8259A_irq(unsigned int irq)
@@ -247,7 +266,9 @@ static inline int i8259A_irq_real(unsigned int irq)
void mask_and_ack_8259A(unsigned int irq)
{
unsigned int irqmask = 1 << irq;
+ unsigned long flags;
+ spin_lock_irqsave(&i8259A_lock, flags);
/*
* Lightweight spurious IRQ detection. We do not want
* to overdo spurious IRQ handling - it's usually a sign
@@ -278,6 +299,7 @@ handle_real_irq:
outb(cached_21,0x21);
outb(0x20,0x20); /* 'generic EOI' to master */
}
+ spin_unlock_irqrestore(&i8259A_lock, flags);
return;
spurious_8259A_irq:
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 75b2bfb9f..129a587f0 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -28,6 +28,8 @@
#include <asm/smp.h>
#include <asm/desc.h>
+static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
+
/*
* # of IO-APICs and # of IRQ routing registers
*/
@@ -87,9 +89,8 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
entry->pin = pin;
}
-#define DO_ACTION(name,R,ACTION, FINAL) \
+#define __DO_ACTION(name,R,ACTION, FINAL) \
\
-static void name##_IO_APIC_irq(unsigned int irq) \
{ \
int pin; \
struct irq_pin_list *entry = irq_2_pin + irq; \
@@ -109,8 +110,31 @@ static void name##_IO_APIC_irq(unsigned int irq) \
FINAL; \
}
-DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */
-DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */
+#define DO_ACTION(name,R,ACTION, FINAL) \
+ \
+static void name##_IO_APIC_irq(unsigned int irq) \
+__DO_ACTION(name,R,ACTION, FINAL)
+
+DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */
+DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __mask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
@@ -537,7 +561,7 @@ void __init setup_IO_APIC_irqs(void)
entry.delivery_mode = dest_LowestPrio;
entry.dest_mode = 1; /* logical delivery */
entry.mask = 0; /* enable IRQ */
- entry.dest.logical.logical_dest = APIC_ALL_CPUS; /* all CPUs */
+ entry.dest.logical.logical_dest = APIC_ALL_CPUS;
idx = find_irq_entry(apic,pin,mp_INT);
if (idx == -1) {
@@ -1026,16 +1050,16 @@ extern atomic_t nmi_counter[NR_CPUS];
static int __init nmi_irq_works(void)
{
- atomic_t tmp[NR_CPUS];
+ irq_cpustat_t tmp[NR_CPUS];
int j, cpu;
- memcpy(tmp, nmi_counter, sizeof(tmp));
+ memcpy(tmp, irq_stat, sizeof(tmp));
sti();
mdelay(50);
for (j = 0; j < smp_num_cpus; j++) {
cpu = cpu_logical_map(j);
- if (atomic_read(nmi_counter+cpu) - atomic_read(tmp+cpu) <= 3) {
+ if (atomic_read(&nmi_counter(cpu)) - atomic_read(&tmp[cpu].__nmi_counter) <= 3) {
printk("CPU#%d NMI appears to be stuck.\n", cpu);
return 0;
}
@@ -1055,14 +1079,9 @@ static int __init nmi_irq_works(void)
* that was delayed but this is now handled in the device
* independent code.
*/
-static void enable_edge_ioapic_irq(unsigned int irq)
-{
- unmask_IO_APIC_irq(irq);
-}
+#define enable_edge_ioapic_irq unmask_IO_APIC_irq
-static void disable_edge_ioapic_irq(unsigned int irq)
-{
-}
+static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
/*
* Starting up a edge-triggered IO-APIC interrupt is
@@ -1077,12 +1096,17 @@ static void disable_edge_ioapic_irq(unsigned int irq)
static unsigned int startup_edge_ioapic_irq(unsigned int irq)
{
int was_pending = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
if (irq < 16) {
disable_8259A_irq(irq);
if (i8259A_irq_pending(irq))
was_pending = 1;
}
- enable_edge_ioapic_irq(irq);
+ __unmask_IO_APIC_irq(irq);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
return was_pending;
}
@@ -1093,14 +1117,15 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
* interrupt for real. This prevents IRQ storms from unhandled
* devices.
*/
-void static ack_edge_ioapic_irq(unsigned int irq)
+static void ack_edge_ioapic_irq(unsigned int irq)
{
if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
== (IRQ_PENDING | IRQ_DISABLED))
mask_IO_APIC_irq(irq);
ack_APIC_irq();
}
-void static end_edge_ioapic_irq(unsigned int i){}
+
+static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
/*
@@ -1108,23 +1133,46 @@ void static end_edge_ioapic_irq(unsigned int i){}
* and shutting down and starting up the interrupt
* is the same as enabling and disabling them -- except
* with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
*/
-static unsigned int startup_level_ioapic_irq(unsigned int irq)
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
{
unmask_IO_APIC_irq(irq);
+
return 0; /* don't check for pending */
}
#define shutdown_level_ioapic_irq mask_IO_APIC_irq
#define enable_level_ioapic_irq unmask_IO_APIC_irq
#define disable_level_ioapic_irq mask_IO_APIC_irq
-#define end_level_ioapic_irq unmask_IO_APIC_irq
-void static mask_and_ack_level_ioapic_irq(unsigned int i)
+
+static void end_level_ioapic_irq (unsigned int i)
{
- mask_IO_APIC_irq(i);
ack_APIC_irq();
}
+static void mask_and_ack_level_ioapic_irq (unsigned int i) { /* nothing */ }
+
+static void set_ioapic_affinity (unsigned int irq, unsigned int mask)
+{
+ unsigned long flags;
+ /*
+ * Only the first 8 bits are valid.
+ */
+ mask = mask << 24;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __DO_ACTION( target, 1, = mask, )
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
/*
* Level and edge triggered IO-APIC interrupts need different handling,
* so we use two separate IRQ descriptors. Edge triggered IRQs can be
@@ -1141,7 +1189,8 @@ static struct hw_interrupt_type ioapic_edge_irq_type = {
enable_edge_ioapic_irq,
disable_edge_ioapic_irq,
ack_edge_ioapic_irq,
- end_edge_ioapic_irq
+ end_edge_ioapic_irq,
+ set_ioapic_affinity,
};
static struct hw_interrupt_type ioapic_level_irq_type = {
@@ -1151,7 +1200,8 @@ static struct hw_interrupt_type ioapic_level_irq_type = {
enable_level_ioapic_irq,
disable_level_ioapic_irq,
mask_and_ack_level_ioapic_irq,
- end_level_ioapic_irq
+ end_level_ioapic_irq,
+ set_ioapic_affinity,
};
static inline void init_IO_APIC_traps(void)
@@ -1185,12 +1235,12 @@ static inline void init_IO_APIC_traps(void)
}
}
-void static ack_lapic_irq (unsigned int irq)
+static void ack_lapic_irq (unsigned int irq)
{
ack_APIC_irq();
}
-void static end_lapic_irq (unsigned int i) { /* nothing */ }
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
static struct hw_interrupt_type lapic_irq_type = {
"local-APIC-edge",
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 9d4a81041..7054249e6 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -31,21 +31,20 @@
#include <linux/init.h>
#include <linux/kernel_stat.h>
#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/irq.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/system.h>
#include <asm/bitops.h>
+#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/delay.h>
#include <asm/desc.h>
#include <asm/irq.h>
-unsigned int local_bh_count[NR_CPUS];
-unsigned int local_irq_count[NR_CPUS];
-
-extern atomic_t nmi_counter[NR_CPUS];
/*
* Linux has a controller-independent x86 interrupt architecture.
@@ -63,17 +62,15 @@ extern atomic_t nmi_counter[NR_CPUS];
* interrupt controllers, without having to do assembly magic.
*/
-/*
- * Micro-access to controllers is serialized over the whole
- * system. We never hold this lock when we call the actual
- * IRQ handler.
- */
-spinlock_t irq_controller_lock = SPIN_LOCK_UNLOCKED;
+irq_cpustat_t irq_stat [NR_CPUS];
+
/*
* Controller mappings for all interrupt sources:
*/
irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
- { [0 ... NR_IRQS-1] = { 0, &no_irq_type, }};
+ { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+
+static void register_irq_proc (unsigned int irq);
/*
* Special irq handlers.
@@ -164,7 +161,7 @@ int get_irq_list(char *buf)
p += sprintf(p, "NMI: ");
for (j = 0; j < smp_num_cpus; j++)
p += sprintf(p, "%10u ",
- atomic_read(nmi_counter+cpu_logical_map(j)));
+ atomic_read(&nmi_counter(cpu_logical_map(j))));
p += sprintf(p, "\n");
#if CONFIG_SMP
p += sprintf(p, "LOC: ");
@@ -186,7 +183,6 @@ int get_irq_list(char *buf)
#ifdef CONFIG_SMP
unsigned char global_irq_holder = NO_PROC_ID;
unsigned volatile int global_irq_lock;
-atomic_t global_irq_count;
static void show(char * str)
{
@@ -196,9 +192,9 @@ static void show(char * str)
printk("\n%s, CPU %d:\n", str, cpu);
printk("irq: %d [%d %d]\n",
- atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]);
+ irqs_running(), local_irq_count(0), local_irq_count(1));
printk("bh: %d [%d %d]\n",
- spin_is_locked(&global_bh_lock) ? 1 : 0, local_bh_count[0], local_bh_count[1]);
+ spin_is_locked(&global_bh_lock) ? 1 : 0, local_bh_count(0), local_bh_count(1));
stack = (unsigned long *) &stack;
for (i = 40; i ; i--) {
unsigned long x = *++stack;
@@ -248,10 +244,9 @@ static inline void wait_on_irq(int cpu)
* for bottom half handlers unless we're
* already executing in one..
*/
- if (!atomic_read(&global_irq_count)) {
- if (local_bh_count[cpu] || !spin_is_locked(&global_bh_lock))
+ if (!irqs_running())
+ if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
break;
- }
/* Duh, we have to loop. Release the lock to avoid deadlocks */
clear_bit(0,&global_irq_lock);
@@ -264,11 +259,11 @@ static inline void wait_on_irq(int cpu)
__sti();
SYNC_OTHER_CORES(cpu);
__cli();
- if (atomic_read(&global_irq_count))
+ if (irqs_running())
continue;
if (global_irq_lock)
continue;
- if (!local_bh_count[cpu] && spin_is_locked(&global_bh_lock))
+ if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
continue;
if (!test_and_set_bit(0,&global_irq_lock))
break;
@@ -285,7 +280,7 @@ static inline void wait_on_irq(int cpu)
*/
void synchronize_irq(void)
{
- if (atomic_read(&global_irq_count)) {
+ if (irqs_running()) {
/* Stupid approach */
cli();
sti();
@@ -338,7 +333,7 @@ void __global_cli(void)
if (flags & (1 << EFLAGS_IF_SHIFT)) {
int cpu = smp_processor_id();
__cli();
- if (!local_irq_count[cpu])
+ if (!local_irq_count(cpu))
get_irqlock(cpu);
}
}
@@ -347,7 +342,7 @@ void __global_sti(void)
{
int cpu = smp_processor_id();
- if (!local_irq_count[cpu])
+ if (!local_irq_count(cpu))
release_irqlock(cpu);
__sti();
}
@@ -364,6 +359,7 @@ unsigned long __global_save_flags(void)
int retval;
int local_enabled;
unsigned long flags;
+ int cpu = smp_processor_id();
__save_flags(flags);
local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
@@ -371,10 +367,10 @@ unsigned long __global_save_flags(void)
retval = 2 + local_enabled;
/* check for global flags if we're not in an interrupt */
- if (!local_irq_count[smp_processor_id()]) {
+ if (!local_irq_count(cpu)) {
if (local_enabled)
retval = 1;
- if (global_irq_holder == (unsigned char) smp_processor_id())
+ if (global_irq_holder == cpu)
retval = 0;
}
return retval;
@@ -442,16 +438,17 @@ int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction *
* hardware disable after having gotten the irq
* controller lock.
*/
-void disable_irq_nosync(unsigned int irq)
+void inline disable_irq_nosync(unsigned int irq)
{
+ irq_desc_t *desc = irq_desc + irq;
unsigned long flags;
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (!irq_desc[irq].depth++) {
- irq_desc[irq].status |= IRQ_DISABLED;
- irq_desc[irq].handler->disable(irq);
+ spin_lock_irqsave(&desc->lock, flags);
+ if (!desc->depth++) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->disable(irq);
}
- spin_unlock_irqrestore(&irq_controller_lock, flags);
+ spin_unlock_irqrestore(&desc->lock, flags);
}
/*
@@ -462,7 +459,7 @@ void disable_irq(unsigned int irq)
{
disable_irq_nosync(irq);
- if (!local_irq_count[smp_processor_id()]) {
+ if (!local_irq_count(smp_processor_id())) {
do {
barrier();
} while (irq_desc[irq].status & IRQ_INPROGRESS);
@@ -471,28 +468,29 @@ void disable_irq(unsigned int irq)
void enable_irq(unsigned int irq)
{
+ irq_desc_t *desc = irq_desc + irq;
unsigned long flags;
- spin_lock_irqsave(&irq_controller_lock, flags);
- switch (irq_desc[irq].depth) {
+ spin_lock_irqsave(&desc->lock, flags);
+ switch (desc->depth) {
case 1: {
- unsigned int status = irq_desc[irq].status & ~IRQ_DISABLED;
- irq_desc[irq].status = status;
+ unsigned int status = desc->status & ~IRQ_DISABLED;
+ desc->status = status;
if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
- irq_desc[irq].status = status | IRQ_REPLAY;
- hw_resend_irq(irq_desc[irq].handler,irq);
+ desc->status = status | IRQ_REPLAY;
+ hw_resend_irq(desc->handler,irq);
}
- irq_desc[irq].handler->enable(irq);
+ desc->handler->enable(irq);
/* fall-through */
}
default:
- irq_desc[irq].depth--;
+ desc->depth--;
break;
case 0:
printk("enable_irq() unbalanced from %p\n",
__builtin_return_address(0));
}
- spin_unlock_irqrestore(&irq_controller_lock, flags);
+ spin_unlock_irqrestore(&desc->lock, flags);
}
/*
@@ -514,13 +512,12 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs)
*/
int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */
int cpu = smp_processor_id();
- irq_desc_t *desc;
+ irq_desc_t *desc = irq_desc + irq;
struct irqaction * action;
unsigned int status;
kstat.irqs[cpu][irq]++;
- desc = irq_desc + irq;
- spin_lock(&irq_controller_lock);
+ spin_lock(&desc->lock);
desc->handler->ack(irq);
/*
REPLAY is when Linux resends an IRQ that was dropped earlier
@@ -540,7 +537,6 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs)
status |= IRQ_INPROGRESS; /* we are handling it */
}
desc->status = status;
- spin_unlock(&irq_controller_lock);
/*
* If there is no IRQ handler or it was disabled, exit early.
@@ -549,7 +545,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs)
will take care of it.
*/
if (!action)
- return 1;
+ goto out;
/*
* Edge triggered interrupts need to remember
@@ -562,20 +558,24 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs)
* SMP environment.
*/
for (;;) {
+ spin_unlock(&desc->lock);
handle_IRQ_event(irq, &regs, action);
- spin_lock(&irq_controller_lock);
+ spin_lock(&desc->lock);
if (!(desc->status & IRQ_PENDING))
break;
desc->status &= ~IRQ_PENDING;
- spin_unlock(&irq_controller_lock);
}
desc->status &= ~IRQ_INPROGRESS;
- if (!(desc->status & IRQ_DISABLED))
- desc->handler->end(irq);
- spin_unlock(&irq_controller_lock);
+out:
+ /*
+ * The ->end() handler has to deal with interrupts which got
+ * disabled while the handler was running.
+ */
+ desc->handler->end(irq);
+ spin_unlock(&desc->lock);
- if (softirq_state[cpu].active&softirq_state[cpu].mask)
+ if (softirq_state[cpu].active & softirq_state[cpu].mask)
do_softirq();
return 1;
}
@@ -627,14 +627,16 @@ int request_irq(unsigned int irq,
void free_irq(unsigned int irq, void *dev_id)
{
+ irq_desc_t *desc;
struct irqaction **p;
unsigned long flags;
if (irq >= NR_IRQS)
return;
- spin_lock_irqsave(&irq_controller_lock,flags);
- p = &irq_desc[irq].action;
+ desc = irq_desc + irq;
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
for (;;) {
struct irqaction * action = *p;
if (action) {
@@ -645,22 +647,22 @@ void free_irq(unsigned int irq, void *dev_id)
/* Found it - now remove it from the list of entries */
*pp = action->next;
- if (!irq_desc[irq].action) {
- irq_desc[irq].status |= IRQ_DISABLED;
- irq_desc[irq].handler->shutdown(irq);
+ if (!desc->action) {
+ desc->status |= IRQ_DISABLED;
+ desc->handler->shutdown(irq);
}
- spin_unlock_irqrestore(&irq_controller_lock,flags);
+ spin_unlock_irqrestore(&desc->lock,flags);
#ifdef CONFIG_SMP
/* Wait to make sure it's not being used on another CPU */
- while (irq_desc[irq].status & IRQ_INPROGRESS)
+ while (desc->status & IRQ_INPROGRESS)
barrier();
#endif
kfree(action);
return;
}
printk("Trying to free free IRQ%d\n",irq);
- spin_unlock_irqrestore(&irq_controller_lock,flags);
+ spin_unlock_irqrestore(&desc->lock,flags);
return;
}
}
@@ -676,21 +678,43 @@ void free_irq(unsigned int irq, void *dev_id)
unsigned long probe_irq_on(void)
{
unsigned int i;
- unsigned long delay;
+ irq_desc_t *desc;
unsigned long val;
+ unsigned long delay;
+
+ /*
+ * something may have generated an irq long ago and we want to
+ * flush such a longstanding irq before considering it as spurious.
+ */
+ for (i = NR_IRQS-1; i > 0; i--) {
+ desc = irq_desc + i;
+
+ spin_lock_irq(&desc->lock);
+ if (!irq_desc[i].action)
+ irq_desc[i].handler->startup(i);
+ spin_unlock_irq(&desc->lock);
+ }
+
+ /* Wait for longstanding interrupts to trigger. */
+ for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+ /* about 20ms delay */ synchronize_irq();
/*
- * first, enable any unassigned irqs
+ * enable any unassigned irqs
+ * (we must startup again here because if a longstanding irq
+ * happened in the previous stage, it may have masked itself)
*/
- spin_lock_irq(&irq_controller_lock);
for (i = NR_IRQS-1; i > 0; i--) {
- if (!irq_desc[i].action) {
- irq_desc[i].status |= IRQ_AUTODETECT | IRQ_WAITING;
- if(irq_desc[i].handler->startup(i))
- irq_desc[i].status |= IRQ_PENDING;
+ desc = irq_desc + i;
+
+ spin_lock_irq(&desc->lock);
+ if (!desc->action) {
+ desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+ if (desc->handler->startup(i))
+ desc->status |= IRQ_PENDING;
}
+ spin_unlock_irq(&desc->lock);
}
- spin_unlock_irq(&irq_controller_lock);
/*
* Wait for spurious interrupts to trigger
@@ -702,24 +726,24 @@ unsigned long probe_irq_on(void)
* Now filter out any obviously spurious interrupts
*/
val = 0;
- spin_lock_irq(&irq_controller_lock);
- for (i=0; i<NR_IRQS; i++) {
- unsigned int status = irq_desc[i].status;
-
- if (!(status & IRQ_AUTODETECT))
- continue;
-
- /* It triggered already - consider it spurious. */
- if (!(status & IRQ_WAITING)) {
- irq_desc[i].status = status & ~IRQ_AUTODETECT;
- irq_desc[i].handler->shutdown(i);
- continue;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ /* It triggered already - consider it spurious. */
+ if (!(status & IRQ_WAITING)) {
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ } else
+ if (i < 32)
+ val |= 1 << i;
}
-
- if (i < 32)
- val |= 1 << i;
+ spin_unlock_irq(&desc->lock);
}
- spin_unlock_irq(&irq_controller_lock);
return val;
}
@@ -734,20 +758,22 @@ unsigned int probe_irq_mask(unsigned long val)
unsigned int mask;
mask = 0;
- spin_lock_irq(&irq_controller_lock);
for (i = 0; i < 16; i++) {
- unsigned int status = irq_desc[i].status;
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
- if (!(status & IRQ_AUTODETECT))
- continue;
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
- if (!(status & IRQ_WAITING))
- mask |= 1 << i;
+ if (status & IRQ_AUTODETECT) {
+ if (!(status & IRQ_WAITING))
+ mask |= 1 << i;
- irq_desc[i].status = status & ~IRQ_AUTODETECT;
- irq_desc[i].handler->shutdown(i);
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
+ }
+ spin_unlock_irq(&desc->lock);
}
- spin_unlock_irq(&irq_controller_lock);
return mask & val;
}
@@ -762,22 +788,24 @@ int probe_irq_off(unsigned long val)
nr_irqs = 0;
irq_found = 0;
- spin_lock_irq(&irq_controller_lock);
- for (i=0; i<NR_IRQS; i++) {
- unsigned int status = irq_desc[i].status;
-
- if (!(status & IRQ_AUTODETECT))
- continue;
-
- if (!(status & IRQ_WAITING)) {
- if (!nr_irqs)
- irq_found = i;
- nr_irqs++;
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc_t *desc = irq_desc + i;
+ unsigned int status;
+
+ spin_lock_irq(&desc->lock);
+ status = desc->status;
+
+ if (status & IRQ_AUTODETECT) {
+ if (!(status & IRQ_WAITING)) {
+ if (!nr_irqs)
+ irq_found = i;
+ nr_irqs++;
+ }
+ desc->status = status & ~IRQ_AUTODETECT;
+ desc->handler->shutdown(i);
}
- irq_desc[i].status = status & ~IRQ_AUTODETECT;
- irq_desc[i].handler->shutdown(i);
+ spin_unlock_irq(&desc->lock);
}
- spin_unlock_irq(&irq_controller_lock);
if (nr_irqs > 1)
irq_found = -irq_found;
@@ -788,8 +816,9 @@ int probe_irq_off(unsigned long val)
int setup_irq(unsigned int irq, struct irqaction * new)
{
int shared = 0;
- struct irqaction *old, **p;
unsigned long flags;
+ struct irqaction *old, **p;
+ irq_desc_t *desc = irq_desc + irq;
/*
* Some drivers like serial.c use request_irq() heavily,
@@ -811,12 +840,12 @@ int setup_irq(unsigned int irq, struct irqaction * new)
/*
* The following block of code has to be executed atomically
*/
- spin_lock_irqsave(&irq_controller_lock,flags);
- p = &irq_desc[irq].action;
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
if ((old = *p) != NULL) {
/* Can't share interrupts unless both agree to */
if (!(old->flags & new->flags & SA_SHIRQ)) {
- spin_unlock_irqrestore(&irq_controller_lock,flags);
+ spin_unlock_irqrestore(&desc->lock,flags);
return -EBUSY;
}
@@ -831,11 +860,171 @@ int setup_irq(unsigned int irq, struct irqaction * new)
*p = new;
if (!shared) {
- irq_desc[irq].depth = 0;
- irq_desc[irq].status &= ~IRQ_DISABLED;
- irq_desc[irq].handler->startup(irq);
+ desc->depth = 0;
+ desc->status &= ~IRQ_DISABLED;
+ desc->handler->startup(irq);
}
- spin_unlock_irqrestore(&irq_controller_lock,flags);
+ spin_unlock_irqrestore(&desc->lock,flags);
+
+ register_irq_proc(irq);
return 0;
}
+static struct proc_dir_entry * root_irq_dir;
+static struct proc_dir_entry * irq_dir [NR_IRQS];
+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
+
+unsigned int irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = 0xffffffff};
+
+#define HEX_DIGITS 8
+
+static int irq_affinity_read_proc (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ if (count < HEX_DIGITS+1)
+ return -EINVAL;
+ return sprintf (page, "%08x\n", irq_affinity[(int)data]);
+}
+
+static unsigned int parse_hex_value (const char *buffer,
+ unsigned long count, unsigned long *ret)
+{
+ unsigned char hexnum [HEX_DIGITS];
+ unsigned long value;
+ int i;
+
+ if (!count)
+ return -EINVAL;
+ if (count > HEX_DIGITS)
+ count = HEX_DIGITS;
+ if (copy_from_user(hexnum, buffer, count))
+ return -EFAULT;
+
+ /*
+ * Parse the first 8 characters as a hex string, any non-hex char
+ * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same.
+ */
+ value = 0;
+
+ for (i = 0; i < count; i++) {
+ unsigned int c = hexnum[i];
+
+ switch (c) {
+ case '0' ... '9': c -= '0'; break;
+ case 'a' ... 'f': c -= 'a'-10; break;
+ case 'A' ... 'F': c -= 'A'-10; break;
+ default:
+ goto out;
+ }
+ value = (value << 4) | c;
+ }
+out:
+ *ret = value;
+ return 0;
+}
+
+static int irq_affinity_write_proc (struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ int irq = (int) data, full_count = count, err;
+ unsigned long new_value;
+
+ if (!irq_desc[irq].handler->set_affinity)
+ return -EIO;
+
+ err = parse_hex_value(buffer, count, &new_value);
+
+#if CONFIG_SMP
+ /*
+ * Do not allow disabling IRQs completely - it's a too easy
+ * way to make the system unusable accidentally :-) At least
+ * one online CPU still has to be targeted.
+ */
+ if (!(new_value & cpu_online_map))
+ return -EINVAL;
+#endif
+
+ irq_affinity[irq] = new_value;
+ irq_desc[irq].handler->set_affinity(irq, new_value);
+
+ return full_count;
+}
+
+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ unsigned long *mask = (unsigned long *) data;
+ if (count < HEX_DIGITS+1)
+ return -EINVAL;
+ return sprintf (page, "%08lx\n", *mask);
+}
+
+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ unsigned long *mask = (unsigned long *) data, full_count = count, err;
+ unsigned long new_value;
+
+ err = parse_hex_value(buffer, count, &new_value);
+ if (err)
+ return err;
+
+ *mask = new_value;
+ return full_count;
+}
+
+#define MAX_NAMELEN 10
+
+static void register_irq_proc (unsigned int irq)
+{
+ struct proc_dir_entry *entry;
+ char name [MAX_NAMELEN];
+
+ if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type))
+ return;
+
+ memset(name, 0, MAX_NAMELEN);
+ sprintf(name, "%d", irq);
+
+ /* create /proc/irq/1234 */
+ irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+ /* create /proc/irq/1234/smp_affinity */
+ entry = create_proc_entry("smp_affinity", 0700, irq_dir[irq]);
+
+ entry->nlink = 1;
+ entry->data = (void *)irq;
+ entry->read_proc = irq_affinity_read_proc;
+ entry->write_proc = irq_affinity_write_proc;
+
+ smp_affinity_entry[irq] = entry;
+}
+
+unsigned long prof_cpu_mask = -1;
+
+void init_irq_proc (void)
+{
+ struct proc_dir_entry *entry;
+ int i;
+
+ /* create /proc/irq */
+ root_irq_dir = proc_mkdir("irq", 0);
+
+ /* create /proc/irq/prof_cpu_mask */
+ entry = create_proc_entry("prof_cpu_mask", 0700, root_irq_dir);
+
+ entry->nlink = 1;
+ entry->data = (void *)&prof_cpu_mask;
+ entry->read_proc = prof_cpu_mask_read_proc;
+ entry->write_proc = prof_cpu_mask_write_proc;
+
+ /*
+ * Create entries for all existing IRQs.
+ */
+ for (i = 0; i < NR_IRQS; i++) {
+ if (irq_desc[i].handler == &no_irq_type)
+ continue;
+ register_irq_proc(i);
+ }
+}
+
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index 26b6525d8..84490b40b 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -20,6 +20,9 @@
* Initial release.
* 1.01 18 February 2000, Tigran Aivazian <tigran@sco.com>
* Added read() support + cleanups.
+ * 1.02 21 February 2000, Tigran Aivazian <tigran@sco.com>
+ * Added 'device trimming' support. open(O_WRONLY) zeroes
+ * and frees the saved copy of applied microcode.
*/
#include <linux/init.h>
@@ -33,7 +36,7 @@
#include <asm/uaccess.h>
#include <asm/processor.h>
-#define MICROCODE_VERSION "1.01"
+#define MICROCODE_VERSION "1.02"
MODULE_DESCRIPTION("CPU (P6) microcode update driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@ocston.org>");
@@ -53,7 +56,7 @@ static void do_update_one(void *);
/*
* Bits in microcode_status. (31 bits of room for future expansion)
*/
-#define MICROCODE_IS_OPEN 0 /* set if /dev/microcode is in use */
+#define MICROCODE_IS_OPEN 0 /* set if device is in use */
static unsigned long microcode_status = 0;
/* the actual array of microcode blocks, each 2048 bytes */
@@ -68,31 +71,16 @@ static struct file_operations microcode_fops = {
release: microcode_release,
};
-static struct inode_operations microcode_inops = {
- default_file_ops: &microcode_fops,
-};
-
static struct proc_dir_entry *proc_microcode;
static int __init microcode_init(void)
{
- int size;
-
proc_microcode = create_proc_entry("microcode", S_IWUSR|S_IRUSR, proc_root_driver);
if (!proc_microcode) {
printk(KERN_ERR "microcode: can't create /proc/driver/microcode\n");
return -ENOMEM;
}
- proc_microcode->ops = &microcode_inops;
- size = smp_num_cpus * sizeof(struct microcode);
- mc_applied = kmalloc(size, GFP_KERNEL);
- if (!mc_applied) {
- remove_proc_entry("microcode", proc_root_driver);
- printk(KERN_ERR "microcode: can't allocate memory for saved microcode\n");
- return -ENOMEM;
- }
- memset(mc_applied, 0, size); /* so that reading from offsets corresponding to failed
- update makes this obvious */
+ proc_microcode->proc_fops = &microcode_fops;
printk(KERN_INFO "P6 Microcode Update Driver v%s registered\n", MICROCODE_VERSION);
return 0;
}
@@ -100,7 +88,8 @@ static int __init microcode_init(void)
static void __exit microcode_exit(void)
{
remove_proc_entry("microcode", proc_root_driver);
- kfree(mc_applied);
+ if (mc_applied)
+ kfree(mc_applied);
printk(KERN_INFO "P6 Microcode Update Driver v%s unregistered\n", MICROCODE_VERSION);
}
@@ -119,6 +108,15 @@ static int microcode_open(struct inode *inode, struct file *file)
if (test_and_set_bit(MICROCODE_IS_OPEN, &microcode_status))
return -EBUSY;
+ if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
+ proc_microcode->size = 0;
+ if (mc_applied) {
+ memset(mc_applied, 0, smp_num_cpus * sizeof(struct microcode));
+ kfree(mc_applied);
+ mc_applied = NULL;
+ }
+ }
+
MOD_INC_USE_COUNT;
return 0;
@@ -243,6 +241,16 @@ static ssize_t microcode_write(struct file *file, const char *buf, size_t len, l
sizeof(struct microcode));
return -EINVAL;
}
+ if (!mc_applied) {
+ int size = smp_num_cpus * sizeof(struct microcode);
+ mc_applied = kmalloc(size, GFP_KERNEL);
+ if (!mc_applied) {
+ printk(KERN_ERR "microcode: can't allocate memory for saved microcode\n");
+ return -ENOMEM;
+ }
+ memset(mc_applied, 0, size);
+ }
+
lock_kernel();
microcode_num = len/sizeof(struct microcode);
microcode = vmalloc(len);
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 81685d2f5..030b31647 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -316,11 +316,14 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
return num_processors;
}
+static struct intel_mp_floating *mpf_found;
+
/*
* Scan the memory blocks for an SMP configuration block.
*/
-static int __init smp_get_mpf(struct intel_mp_floating *mpf)
+void __init get_smp_config (void)
{
+ struct intel_mp_floating *mpf = mpf_found;
printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
if (mpf->mpf_feature2 & (1<<7)) {
printk(" IMCR and PIC compatibility mode.\n");
@@ -329,7 +332,6 @@ static int __init smp_get_mpf(struct intel_mp_floating *mpf)
printk(" Virtual Wire compatibility mode.\n");
pic_mode = 0;
}
- smp_found_config = 1;
/*
* default CPU id - if it's different in the mptable
* then we change it before first using it.
@@ -388,7 +390,7 @@ static int __init smp_get_mpf(struct intel_mp_floating *mpf)
default:
printk("???\nUnknown standard configuration %d\n",
mpf->mpf_feature1);
- return 1;
+ return;
}
if (mpf->mpf_feature1 > 4) {
printk("Bus #1 is PCI\n");
@@ -412,10 +414,9 @@ static int __init smp_get_mpf(struct intel_mp_floating *mpf)
/*
* Only use the first configuration found.
*/
- return 1;
}
-static int __init smp_scan_config(unsigned long base, unsigned long length)
+static int __init smp_scan_config (unsigned long base, unsigned long length)
{
unsigned long *bp = phys_to_virt(base);
struct intel_mp_floating *mpf;
@@ -432,9 +433,13 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
((mpf->mpf_specification == 1)
|| (mpf->mpf_specification == 4)) ) {
- printk("found SMP MP-table at %08ld\n",
+ smp_found_config = 1;
+ printk("found SMP MP-table at %08lx\n",
virt_to_phys(mpf));
- smp_get_mpf(mpf);
+ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+ if (mpf->mpf_physptr)
+ reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE);
+ mpf_found = mpf;
return 1;
}
bp += 4;
@@ -443,7 +448,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
return 0;
}
-void __init init_intel_smp (void)
+void __init find_intel_smp (void)
{
unsigned int address;
@@ -488,7 +493,7 @@ void __init init_intel_smp (void)
* sense, but it doesnt have a BIOS(-configuration table).
* No problem for Linux.
*/
-void __init init_visws_smp(void)
+void __init find_visws_smp(void)
{
smp_found_config = 1;
@@ -505,13 +510,13 @@ void __init init_visws_smp(void)
* - Intel MP Configuration Table
* - or SGI Visual Workstation configuration
*/
-void __init init_smp_config (void)
+void __init find_smp_config (void)
{
#ifdef CONFIG_X86_IO_APIC
- init_intel_smp();
+ find_intel_smp();
#endif
#ifdef CONFIG_VISWS
- init_visws_smp();
+ find_visws_smp();
#endif
}
diff --git a/arch/i386/kernel/mtrr.c b/arch/i386/kernel/mtrr.c
index a0a4ab851..cc9c7eafe 100644
--- a/arch/i386/kernel/mtrr.c
+++ b/arch/i386/kernel/mtrr.c
@@ -1507,11 +1507,6 @@ static struct file_operations mtrr_fops =
# ifdef CONFIG_PROC_FS
-static struct inode_operations proc_mtrr_inode_operations =
-{
- &mtrr_fops, /* default property file-ops */
-};
-
static struct proc_dir_entry *proc_root_mtrr;
# endif /* CONFIG_PROC_FS */
@@ -1836,9 +1831,9 @@ int __init mtrr_init(void)
#ifdef CONFIG_PROC_FS
proc_root_mtrr = create_proc_entry ("mtrr", S_IWUSR | S_IRUGO, &proc_root);
- proc_root_mtrr->ops = &proc_mtrr_inode_operations;
+ proc_root_mtrr->proc_fops = &mtrr_fops;
#endif
-#ifdev CONFIG_DEVFS_FS
+#ifdef CONFIG_DEVFS_FS
devfs_handle = devfs_register (NULL, "cpu/mtrr", 0, DEVFS_FL_DEFAULT, 0, 0,
S_IFREG | S_IRUGO | S_IWUSR, 0, 0,
&mtrr_fops, NULL);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 0f61ca543..19f7022a4 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -74,8 +74,13 @@ void enable_hlt(void)
*/
static void default_idle(void)
{
- if (current_cpu_data.hlt_works_ok && !hlt_counter)
- asm volatile("sti ; hlt" : : : "memory");
+ if (current_cpu_data.hlt_works_ok && !hlt_counter) {
+ asm volatile("cli" : : : "memory");
+ if (!current->need_resched)
+ asm volatile("sti ; hlt" : : : "memory");
+ else
+ asm volatile("sti" : : : "memory");
+ }
}
/*
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
index c530eece0..febc592ae 100644
--- a/arch/i386/kernel/semaphore.c
+++ b/arch/i386/kernel/semaphore.c
@@ -150,8 +150,9 @@ int __down_interruptible(struct semaphore * sem)
int __down_trylock(struct semaphore * sem)
{
int sleepers;
+ unsigned long flags;
- spin_lock_irq(&semaphore_lock);
+ spin_lock_irqsave(&semaphore_lock, flags);
sleepers = sem->sleepers + 1;
sem->sleepers = 0;
@@ -162,7 +163,7 @@ int __down_trylock(struct semaphore * sem)
if (!atomic_add_negative(sleepers, &sem->count))
wake_up(&sem->wait);
- spin_unlock_irq(&semaphore_lock);
+ spin_unlock_irqrestore(&semaphore_lock, flags);
return 1;
}
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index cd2a3d8af..b5602ebec 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -119,7 +119,7 @@ extern int rd_image_start; /* starting block # of image */
#endif
extern int root_mountflags;
-extern int _text, _etext, _edata, _end;
+extern char _text, _etext, _edata, _end;
extern unsigned long cpu_hz;
/*
@@ -709,9 +709,20 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_IO_APIC
/*
- * Save possible boot-time SMP configuration:
+ * Find and reserve possible boot-time SMP configuration:
*/
- init_smp_config();
+ find_smp_config();
+#endif
+ paging_init();
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * get boot-time SMP configuration:
+ */
+ if (smp_found_config)
+ get_smp_config();
+#endif
+#ifdef CONFIG_X86_LOCAL_APIC
+ init_apic_mappings();
#endif
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 07797e760..7400b628b 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -360,8 +360,6 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
printk("Do you have a strange power saving mode enabled?\n");
}
-atomic_t nmi_counter[NR_CPUS];
-
#if CONFIG_X86_IO_APIC
int nmi_watchdog = 1;
@@ -437,7 +435,8 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
{
unsigned char reason = inb(0x61);
- atomic_inc(nmi_counter+smp_processor_id());
+
+ atomic_inc(&nmi_counter(smp_processor_id()));
if (!(reason & 0xc0)) {
#if CONFIG_X86_IO_APIC
/*
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index c3991b056..8b1324520 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -438,10 +438,6 @@ void __init paging_init(void)
__flush_tlb_all();
-#ifdef CONFIG_X86_LOCAL_APIC
- init_apic_mappings();
-#endif
-
#ifdef CONFIG_HIGHMEM
kmap_init();
#endif
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index 685d85b20..af51038e5 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -82,6 +82,8 @@ void ia64_elf32_init(struct pt_regs *regs)
/* Do all the IA-32 setup here */
+ current->thread.map_base = 0x40000000;
+
/* CS descriptor */
__asm__("mov ar.csd = %0" : /* no outputs */
: "r" IA64_SEG_DESCRIPTOR(0L, 0xFFFFFL, 0xBL, 1L,
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index 82ba58129..bd7b0517b 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -75,7 +75,7 @@ ia32_syscall_table:
data8 sys_unlink /* 10 */
data8 sys32_execve
data8 sys_chdir
- data8 sys_ni_syscall /* sys_time is not supported on ia64 */
+ data8 sys32_time
data8 sys_mknod
data8 sys_chmod /* 15 */
data8 sys_lchown
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index d61f1cfe5..8d4e4a8fd 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -278,7 +278,7 @@ do_mmap_fake(struct file *file, unsigned long addr, unsigned long len,
if (!file)
return -EINVAL;
inode = file->f_dentry->d_inode;
- if (!inode->i_op || !inode->i_op->default_file_ops)
+ if (!inode->i_fop)
return -EINVAL;
if (!file->f_op->read)
return -EINVAL;
@@ -1930,6 +1930,25 @@ out:
return err;
}
+/*
+ * sys_time() can be implemented in user-level using
+ * sys_gettimeofday(). IA64 did this but i386 Linux did not
+ * so we have to implement this system call here.
+ */
+asmlinkage long sys32_time(int * tloc)
+{
+ int i;
+
+ /* SMP: This is fairly trivial. We grab CURRENT_TIME and
+ stuff it to user space. No side effects */
+ i = CURRENT_TIME;
+ if (tloc) {
+ if (put_user(i,tloc))
+ i = -EFAULT;
+ }
+ return i;
+}
+
#ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */
/* In order to reduce some races, while at the same time doing additional
diff --git a/arch/ia64/kdb/kdbsupport.c b/arch/ia64/kdb/kdbsupport.c
index 0b574ae6e..d074a01a3 100644
--- a/arch/ia64/kdb/kdbsupport.c
+++ b/arch/ia64/kdb/kdbsupport.c
@@ -28,9 +28,10 @@
#include <linux/stddef.h>
#include <linux/vmalloc.h>
-#include <asm/uaccess.h>
+#include <asm/delay.h>
#include <asm/kdbsupport.h>
#include <asm/rse.h>
+#include <asm/uaccess.h>
extern kdb_state_t kdb_state ;
k_machreg_t dbregs[KDB_DBREGS];
@@ -45,6 +46,21 @@ kdb_setup (char *str)
__setup("kdb", kdb_setup);
static int
+kdb_ia64_itm (int argc, const char **argv, const char **envp, struct pt_regs *regs)
+{
+ int diag;
+ unsigned long val;
+
+ diag = kdbgetularg(argv[1], &val);
+ if (diag)
+ return diag;
+ kdb_printf("new itm=%0xlx\n", val);
+
+ ia64_set_itm(val);
+ return 0;
+}
+
+static int
kdb_ia64_sir (int argc, const char **argv, const char **envp, struct pt_regs *regs)
{
u64 lid, tpr, lrr0, lrr1, itv, pmv, cmcv;
@@ -53,15 +69,17 @@ kdb_ia64_sir (int argc, const char **argv, const char **envp, struct pt_regs *re
asm ("mov %0=cr.tpr" : "=r"(tpr));
asm ("mov %0=cr.lrr0" : "=r"(lrr0));
asm ("mov %0=cr.lrr1" : "=r"(lrr1));
- printk ("lid=0x%lx, tpr=0x%lx, lrr0=0x%lx, llr1=0x%lx\n", lid, tpr, lrr0, lrr1);
+ printk("lid=0x%lx, tpr=0x%lx, lrr0=0x%lx, llr1=0x%lx\n", lid, tpr, lrr0, lrr1);
asm ("mov %0=cr.itv" : "=r"(itv));
asm ("mov %0=cr.pmv" : "=r"(pmv));
asm ("mov %0=cr.cmcv" : "=r"(cmcv));
- printk ("itv=0x%lx, pmv=0x%lx, cmcv=0x%lx\n", itv, pmv, cmcv);
+ printk("itv=0x%lx, pmv=0x%lx, cmcv=0x%lx\n", itv, pmv, cmcv);
- printk ("irr=0x%016lx,0x%016lx,0x%016lx,0x%016lx\n",
+ printk("irr=0x%016lx,0x%016lx,0x%016lx,0x%016lx\n",
ia64_get_irr0(), ia64_get_irr1(), ia64_get_irr2(), ia64_get_irr3());
+
+ printk("itc=0x%016lx, itm=0x%016lx\n", ia64_get_itc(), ia64_get_itm());
return 0;
}
@@ -90,6 +108,7 @@ kdb_init (void)
kdb_state.bkpt_handling_state = BKPTSTATE_NOT_HANDLED ;
kdb_register("irr", kdb_ia64_sir, "", "Show interrupt registers", 0);
+ kdb_register("itm", kdb_ia64_itm, "", "Set new ITM value", 0);
}
/*
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 5efe50164..6059e41c6 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -158,6 +158,9 @@ ia64_handle_irq (unsigned long irq, struct pt_regs *regs)
unsigned long eoi_ptr;
# ifdef CONFIG_USB
+ extern void reenable_usb (void);
+ extern void disable_usb (void);
+
if (usbfix)
disable_usb();
# endif
diff --git a/arch/ia64/kernel/irq_internal.c b/arch/ia64/kernel/irq_internal.c
index 1ae904fe8..cc59e0c72 100644
--- a/arch/ia64/kernel/irq_internal.c
+++ b/arch/ia64/kernel/irq_internal.c
@@ -60,7 +60,7 @@ internal_noop (unsigned int irq)
}
struct hw_interrupt_type irq_type_ia64_internal = {
- "IA64 internal",
+ "IA64-internal",
(void (*)(unsigned long)) internal_noop, /* init */
internal_noop, /* startup */
internal_noop, /* shutdown */
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 4c3ac242a..b4592999f 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -1026,7 +1026,7 @@ dispatch_to_fault_handler:
// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
mov r16=cr.ifa
rsm psr.dt
-#if 0
+#if 1
// If you disable this, you MUST re-enable to update_mmu_cache() code in pgtable.h
mov r17=_PAGE_SIZE_4K<<2
;;
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 5b6deb5f5..cc26b8760 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -97,6 +97,14 @@ cpu_idle (void *unused)
check_pgt_cache();
if (pm_idle)
(*pm_idle)();
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+ if (ia64_get_itm() < ia64_get_itc()) {
+ extern void ia64_reset_itm();
+
+ printk("cpu_idle: ITM in past, resetting it...\n");
+ ia64_reset_itm();
+ }
+#endif
}
}
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 7c5ace740..cfcff3063 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -21,6 +21,10 @@
#include <asm/ptrace.h>
#include <asm/sal.h>
#include <asm/system.h>
+#ifdef CONFIG_KDB
+# include <linux/kdb.h>
+#endif
+
extern rwlock_t xtime_lock;
extern volatile unsigned long lost_ticks;
@@ -61,7 +65,7 @@ do_profile (unsigned long ip)
* update to jiffy. The xtime_lock must be at least read-locked when
* calling this routine.
*/
-static inline unsigned long
+static /*inline*/ unsigned long
gettimeoffset (void)
{
unsigned long now = ia64_get_itc();
@@ -186,6 +190,20 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
write_unlock(&xtime_lock);
}
+#ifdef CONFIG_ITANIUM_ASTEP_SPECIFIC
+
+void
+ia64_reset_itm (void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ timer_interrupt(0, 0, current);
+ local_irq_restore(flags);
+}
+
+#endif /* CONFIG_ITANIUM_ASTEP_SPECIFIC */
+
/*
* Encapsulate access to the itm structure for SMP.
*/
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index c242622ec..1f5106036 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -110,15 +110,75 @@ void
ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
{
siginfo_t siginfo;
+ int sig, code;
- /* gdb uses a break number of 0xccccc for debug breakpoints: */
- if (break_num != 0xccccc)
- die_if_kernel("Bad break", regs, break_num);
+ /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
+ siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
+ siginfo.si_imm = break_num;
- siginfo.si_signo = SIGTRAP;
- siginfo.si_errno = break_num; /* XXX is it legal to abuse si_errno like this? */
- siginfo.si_code = TRAP_BRKPT;
- send_sig_info(SIGTRAP, &siginfo, current);
+ switch (break_num) {
+ case 0: /* unknown error */
+ sig = SIGILL; code = ILL_ILLOPC;
+ break;
+
+ case 1: /* integer divide by zero */
+ sig = SIGFPE; code = FPE_INTDIV;
+ break;
+
+ case 2: /* integer overflow */
+ sig = SIGFPE; code = FPE_INTOVF;
+ break;
+
+ case 3: /* range check/bounds check */
+ sig = SIGFPE; code = FPE_FLTSUB;
+ break;
+
+ case 4: /* null pointer dereference */
+ sig = SIGSEGV; code = SEGV_MAPERR;
+ break;
+
+ case 5: /* misaligned data */
+ sig = SIGSEGV; code = BUS_ADRALN;
+ break;
+
+ case 6: /* decimal overflow */
+ sig = SIGFPE; code = __FPE_DECOVF;
+ break;
+
+ case 7: /* decimal divide by zero */
+ sig = SIGFPE; code = __FPE_DECDIV;
+ break;
+
+ case 8: /* packed decimal error */
+ sig = SIGFPE; code = __FPE_DECERR;
+ break;
+
+ case 9: /* invalid ASCII digit */
+ sig = SIGFPE; code = __FPE_INVASC;
+ break;
+
+ case 10: /* invalid decimal digit */
+ sig = SIGFPE; code = __FPE_INVDEC;
+ break;
+
+ case 11: /* paragraph stack overflow */
+ sig = SIGSEGV; code = __SEGV_PSTKOVF;
+ break;
+
+ default:
+ if (break_num < 0x40000 || break_num > 0x100000)
+ die_if_kernel("Bad break", regs, break_num);
+
+ if (break_num < 0x80000) {
+ sig = SIGILL; code = __ILL_BREAK;
+ } else {
+ sig = SIGTRAP; code = TRAP_BRKPT;
+ }
+ }
+ siginfo.si_signo = sig;
+ siginfo.si_errno = 0;
+ siginfo.si_code = code;
+ send_sig_info(sig, &siginfo, current);
}
/*
@@ -240,6 +300,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
{
long exception, bundle[2];
unsigned long fault_ip;
+ struct siginfo siginfo;
static int fpu_swa_count = 0;
static unsigned long last_time;
@@ -265,21 +326,41 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
ia64_increment_ip(regs);
} else if (exception == -1) {
printk("handle_fpu_swa: fp_emulate() returned -1\n");
- return -2;
+ return -1;
} else {
/* is next instruction a trap? */
if (exception & 2) {
ia64_increment_ip(regs);
}
- return -1;
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_errno = 0;
+ siginfo.si_code = 0;
+ siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
+ if (isr & 0x11) {
+ siginfo.si_code = FPE_FLTINV;
+ } else if (isr & 0x44) {
+ siginfo.si_code = FPE_FLTDIV;
+ }
+ send_sig_info(SIGFPE, &siginfo, current);
}
} else {
if (exception == -1) {
printk("handle_fpu_swa: fp_emulate() returned -1\n");
- return -2;
+ return -1;
} else if (exception != 0) {
/* raise exception */
- return -1;
+ siginfo.si_signo = SIGFPE;
+ siginfo.si_errno = 0;
+ siginfo.si_code = 0;
+ siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
+ if (isr & 0x880) {
+ siginfo.si_code = FPE_FLTOVF;
+ } else if (isr & 0x1100) {
+ siginfo.si_code = FPE_FLTUND;
+ } else if (isr & 0x2200) {
+ siginfo.si_code = FPE_FLTRES;
+ }
+ send_sig_info(SIGFPE, &siginfo, current);
}
}
return 0;
@@ -369,22 +450,19 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
return;
case 30: /* Unaligned fault */
- sprintf(buf, "Unaligned access in kernel mode---don't do this!");
+ sprintf(buf, "Kernel unaligned trap accessing %016lx (ip=%016lx)!",
+ ifa, regs->cr_iip + ia64_psr(regs)->ri);
break;
case 32: /* fp fault */
case 33: /* fp trap */
- result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr);
+ result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, &isr);
if (result < 0) {
siginfo.si_signo = SIGFPE;
siginfo.si_errno = 0;
- siginfo.si_code = 0; /* XXX fix me */
+ siginfo.si_code = FPE_FLTINV;
siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri);
- send_sig_info(SIGFPE, &siginfo, current);
- if (result == -1)
- send_sig_info(SIGFPE, &siginfo, current);
- else
- force_sig(SIGFPE, current);
+ force_sig(SIGFPE, current);
}
return;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 0bd213f6b..014adcf35 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1384,30 +1384,33 @@ ia64_handle_unaligned(unsigned long ifa, struct pt_regs *regs)
load_store_t *insn;
int ret = -1;
- /*
- * We flag unaligned references while in kernel as
- * errors: the kernel must be fixed. The switch code
- * is in ivt.S at entry 30.
- *
- * So here we keep a simple sanity check.
- */
- if ( !user_mode(regs) ) {
- die_if_kernel("Unaligned reference while in kernel\n", regs, 30);
- /* NOT_REACHED */
+ if (current->thread.flags & IA64_THREAD_UAC_SIGBUS) {
+ struct siginfo si;
+
+ si.si_signo = SIGBUS;
+ si.si_errno = 0;
+ si.si_code = BUS_ADRALN;
+ si.si_addr = (void *) ifa;
+ send_sig_info (SIGBUS, &si, current);
+ return;
}
- /*
- * Make sure we log the unaligned access, so that user/sysadmin can notice it
- * and eventually fix the program.
- *
- * We don't want to do that for every access so we pace it with jiffies.
- */
- if ( unalign_count > 5 && jiffies - last_time > 5*HZ ) unalign_count = 0;
- if ( ++unalign_count < 5 ) {
- last_time = jiffies;
- printk("%s(%d): unaligned trap accessing %016lx (ip=%016lx)\n",
- current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri);
-
+ if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)) {
+ /*
+ * Make sure we log the unaligned access, so that
+ * user/sysadmin can notice it and eventually fix the
+ * program.
+ *
+ * We don't want to do that for every access so we
+ * pace it with jiffies.
+ */
+ if (unalign_count > 5 && jiffies - last_time > 5*HZ)
+ unalign_count = 0;
+ if (++unalign_count < 5) {
+ last_time = jiffies;
+ printk("%s(%d): unaligned trap accessing %016lx (ip=%016lx)\n",
+ current->comm, current->pid, ifa, regs->cr_iip + ipsr->ri);
+ }
}
DPRINT(("iip=%lx ifa=%lx isr=%lx\n", regs->cr_iip, ifa, regs->cr_ipsr));
diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S
index 03a540a80..58c92876f 100644
--- a/arch/ia64/lib/copy_user.S
+++ b/arch/ia64/lib/copy_user.S
@@ -1,71 +1,375 @@
-/*
- * This routine copies a linear memory buffer across the user/kernel boundary. When
- * reading a byte from the source causes a fault, the remainder of the destination
- * buffer is zeroed out. Note that this can happen only when copying from user
- * to kernel memory and we do this to absolutely guarantee that the
- * kernel doesn't operate on random data.
- *
- * This file is derived from arch/alpha/lib/copy_user.S.
- *
- * Inputs:
- * in0: address of destination buffer
- * in1: address of source buffer
- * in2: length of buffer in bytes
- * Outputs:
- * r8: number of bytes that didn't get copied due to a fault
- *
- * Copyright (C) 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-#define EXI(x...) \
-99: x; \
+// The label comes first because our store instruction contains a comma
+// and confuse the preprocessor otherwise
+//
+#undef DEBUG
+#ifdef DEBUG
+#define EX(y,x...) \
+99: x
+#else
+#define EX(y,x...) \
.section __ex_table,"a"; \
- data4 @gprel(99b); \
- data4 .Lexit_in-99b; \
- .previous
+ data4 @gprel(99f); \
+ data4 y-99f; \
+ .previous; \
+99: x
+#endif
-#define EXO(x...) \
-99: x; \
- .section __ex_table,"a"; \
- data4 @gprel(99b); \
- data4 .Lexit_out-99b; \
- .previous
-
- .text
- .psr abi64
- .psr lsb
- .lsb
-
- .align 32
- .global __copy_user
- .proc __copy_user
+//
+// Tuneable parameters
+//
+#define COPY_BREAK 16 // we do byte copy below (must be >=16)
+#define PIPE_DEPTH 4 // pipe depth
+
+#define EPI p[PIPE_DEPTH-1] // PASTE(p,16+PIPE_DEPTH-1)
+
+//
+// arguments
+//
+#define dst in0
+#define src in1
+#define len in2
+
+//
+// local registers
+//
+#define cnt r18
+#define len2 r19
+#define saved_lc r20
+#define saved_pr r21
+#define tmp r22
+#define val r23
+#define src1 r24
+#define dst1 r25
+#define src2 r26
+#define dst2 r27
+#define len1 r28
+#define enddst r29
+#define endsrc r30
+#define saved_pfs r31
+ .text
+ .psr abi64
+ .psr lsb
+
+ .align 16
+ .global __copy_user
+ .proc __copy_user
__copy_user:
- alloc r10=ar.pfs,3,0,0,0
- mov r9=ar.lc // save ar.lc
- mov ar.lc=in2 // set ar.lc to length of buffer
- br.sptk.few .Lentr
-
- // XXX braindead copy loop---this needs to be optimized
-.Loop1:
- EXI(ld1 r8=[in1],1)
- ;;
- EXO(st1 [in0]=r8,1)
-.Lentr: br.cloop.dptk.few .Loop1 // repeat unless ar.lc--==0
- ;; // avoid RAW on ar.lc
-.Lexit_out:
- mov r8=ar.lc // return how many bytes we _didn't_ copy
- mov ar.lc=r9
- br.ret.sptk.few rp
-
-.Lexit_in:
- // clear the remainder of the buffer:
- mov r8=ar.lc // return how many bytes we _didn't_ copy
-.Loop2:
- st1 [in0]=r0,1 // this cannot fault because we get here only on user->kernel copies
- br.cloop.dptk.few .Loop2
- ;; // avoid RAW on ar.lc
- mov ar.lc=r9
- br.ret.sptk.few rp
-
- .endp __copy_user
+ alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7)
+
+ .rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH]
+ .rotp p[PIPE_DEPTH]
+
+ adds len2=-1,len // br.ctop is repeat/until
+ mov ret0=r0
+
+ ;; // RAW of cfm when len=0
+ cmp.eq p8,p0=r0,len // check for zero length
+ mov saved_lc=ar.lc // preserve ar.lc (slow)
+(p8) br.ret.spnt.few rp // empty mempcy()
+ ;;
+ add enddst=dst,len // first byte after end of source
+ add endsrc=src,len // first byte after end of destination
+ mov saved_pr=pr // preserve predicates
+
+ mov dst1=dst // copy because of rotation
+ mov ar.ec=PIPE_DEPTH
+ mov pr.rot=1<<16 // p16=true all others are false
+
+ mov src1=src // copy because of rotation
+ mov ar.lc=len2 // initialize lc for small count
+ cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy
+
+ xor tmp=src,dst // same alignment test prepare
+(p10) br.cond.dptk.few long_memcpy
+ ;; // RAW pr.rot/p16 ?
+ //
+ // Now we do the byte by byte loop with software pipeline
+ //
+ // p7 is necessarily false by now
+1:
+ EX(failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
+
+ EX(failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
+ br.ctop.dptk.few 1b
+ ;;
+ mov ar.lc=saved_lc
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.pfs=saved_pfs // restore ar.ec
+ br.ret.sptk.few rp // end of short memcpy
+
+ //
+ // Beginning of long mempcy (i.e. > 16 bytes)
+ //
+long_memcpy:
+ tbit.nz p6,p7=src1,0 // odd alignement
+ and tmp=7,tmp
+ ;;
+ cmp.eq p10,p8=r0,tmp
+ mov len1=len // copy because of rotation
+(p8) br.cond.dpnt.few 1b // XXX Fixme. memcpy_diff_align
+ ;;
+ // At this point we know we have more than 16 bytes to copy
+ // and also that both src and dest have the same alignment
+ // which may not be the one we want. So for now we must move
+ // forward slowly until we reach 16byte alignment: no need to
+ // worry about reaching the end of buffer.
+ //
+ EX(failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned
+(p6) adds len1=-1,len1;;
+ tbit.nz p7,p0=src1,1
+ ;;
+ EX(failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned
+(p7) adds len1=-2,len1;;
+ tbit.nz p8,p0=src1,2
+ ;;
+ //
+ // Stop bit not required after ld4 because if we fail on ld4
+ // we have never executed the ld1, therefore st1 is not executed.
+ //
+ EX(failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned
+ EX(failure_out,(p6) st1 [dst1]=val1[0],1)
+ tbit.nz p9,p0=src1,3
+ ;;
+ //
+ // Stop bit not required after ld8 because if we fail on ld8
+ // we have never executed the ld2, therefore st2 is not executed.
+ //
+ EX(failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned
+ EX(failure_out,(p7) st2 [dst1]=val1[1],2)
+(p8) adds len1=-4,len1
+ ;;
+ EX(failure_out, (p8) st4 [dst1]=val2[0],4)
+(p9) adds len1=-8,len1;;
+ shr.u cnt=len1,4 // number of 128-bit (2x64bit) words
+ ;;
+ EX(failure_out, (p9) st8 [dst1]=val2[1],8)
+ tbit.nz p6,p0=len1,3
+ cmp.eq p7,p0=r0,cnt
+ adds tmp=-1,cnt // br.ctop is repeat/until
+(p7) br.cond.dpnt.few .dotail // we have less than 16 bytes left
+ ;;
+ adds src2=8,src1
+ adds dst2=8,dst1
+ mov ar.lc=tmp
+ ;;
+ //
+ // 16bytes/iteration
+ //
+2:
+ EX(failure_in3,(p16) ld8 val1[0]=[src1],16)
+(p16) ld8 val2[0]=[src2],16
+
+ EX(failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16)
+(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16
+ br.ctop.dptk.few 2b
+ ;; // RAW on src1 when fall through from loop
+ //
+ // Tail correction based on len only
+ //
+ // No matter where we come from (loop or test) the src1 pointer
+ // is 16 byte aligned AND we have less than 16 bytes to copy.
+ //
+.dotail:
+ EX(failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes
+ tbit.nz p7,p0=len1,2
+ ;;
+ EX(failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes
+ tbit.nz p8,p0=len1,1
+ ;;
+ EX(failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes
+ tbit.nz p9,p0=len1,0
+ ;;
+ EX(failure_out, (p6) st8 [dst1]=val1[0],8)
+ ;;
+ EX(failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left
+ mov ar.lc=saved_lc
+ ;;
+ EX(failure_out,(p7) st4 [dst1]=val1[1],4)
+ mov pr=saved_pr,0xffffffffffff0000
+ ;;
+ EX(failure_out, (p8) st2 [dst1]=val2[0],2)
+ mov ar.pfs=saved_pfs
+ ;;
+ EX(failure_out, (p9) st1 [dst1]=val2[1])
+ br.ret.dptk.few rp
+
+
+
+ //
+ // Here we handle the case where the byte by byte copy fails
+ // on the load.
+ // Several factors make the zeroing of the rest of the buffer kind of
+ // tricky:
+ // - the pipeline: loads/stores are not in sync (pipeline)
+ //
+ // In the same loop iteration, the dst1 pointer does not directly
+ // reflect where the faulty load was.
+ //
+ // - pipeline effect
+ // When you get a fault on load, you may have valid data from
+ // previous loads not yet store in transit. Such data must be
+ // store normally before moving onto zeroing the rest.
+ //
+ // - single/multi dispersal independence.
+ //
+ // solution:
+ // - we don't disrupt the pipeline, i.e. data in transit in
+ // the software pipeline will be eventually move to memory.
+ // We simply replace the load with a simple mov and keep the
+ // pipeline going. We can't really do this inline because
+ // p16 is always reset to 1 when lc > 0.
+ //
+failure_in_pipe1:
+ sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
+1:
+(p16) mov val1[0]=r0
+(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1
+ br.ctop.dptk.few 1b
+ ;;
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ br.ret.dptk.few rp
+
+
+ //
+ // Here we handle the head & tail part when we check for alignment.
+ // The following code handles only the load failures. The
+ // main diffculty comes from the fact that loads/stores are
+ // scheduled. So when you fail on a load, the stores corresponding
+ // to previous successful loads must be executed.
+ //
+ // However some simplifications are possible given the way
+ // things work.
+ //
+ // 1) HEAD
+ // Theory of operation:
+ //
+ // Page A | Page B
+ // ---------|-----
+ // 1|8 x
+ // 1 2|8 x
+ // 4|8 x
+ // 1 4|8 x
+ // 2 4|8 x
+ // 1 2 4|8 x
+ // |1
+ // |2 x
+ // |4 x
+ //
+ // page_size >= 4k (2^12). (x means 4, 2, 1)
+ // Here we suppose Page A exists and Page B does not.
+ //
+ // As we move towards eight byte alignment we may encounter faults.
+ // The numbers on each page show the size of the load (current alignment).
+ //
+ // Key point:
+ // - if you fail on 1, 2, 4 then you have never executed any smaller
+ // size loads, e.g. failing ld4 means no ld1 nor ld2 executed
+ // before.
+ //
+ // This allows us to simplify the cleanup code, because basically you
+ // only have to worry about "pending" stores in the case of a failing
+ // ld8(). Given the way the code is written today, this means only
+ // worry about st2, st4. There we can use the information encapsulated
+ // into the predicates.
+ //
+ // Other key point:
+ // - if you fail on the ld8 in the head, it means you went straight
+ // to it, i.e. 8byte alignment within an unexisting page.
+ // Again this comes from the fact that if you crossed just for the the ld8 then
+ // you are 8byte aligned but also 16byte align, therefore you would
+ // either go for the 16byte copy loop OR the ld8 in the tail part.
+ // The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible
+ // because it would mean you had 15bytes to copy in which case you
+ // would have defaulted to the byte by byte copy.
+ //
+ //
+ // 2) TAIL
+ // Here we now we have less than 16 bytes AND we are either 8 or 16 byte
+ // aligned.
+ //
+ // Key point:
+ // This means that we either:
+ // - are right on a page boundary
+ // OR
+ // - are at more than 16 bytes from a page boundary with
+ // at most 15 bytes to copy: no chance of crossing.
+ //
+ // This allows us to assume that if we fail on a load we haven't possibly
+ // executed any of the previous (tail) ones, so we don't need to do
+ // any stores. For instance, if we fail on ld2, this means we had
+ // 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4.
+ //
+ // This means that we are in a situation similar the a fault in the
+ // head part. That's nice!
+ //
+failure_in1:
+// sub ret0=enddst,dst1 // number of bytes to zero, i.e. not copied
+// sub len=enddst,dst1,1
+ sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
+ sub len=endsrc,src1,1
+ //
+ // we know that ret0 can never be zero at this point
+ // because we failed why trying to do a load, i.e. there is still
+ // some work to do.
+ // The failure_in1bis and length problem is taken care of at the
+ // calling side.
+ //
+ ;;
+failure_in1bis: // from (failure_in3)
+ mov ar.lc=len // Continue with a stupid byte store.
+ ;;
+5:
+ st1 [dst1]=r0,1
+ br.cloop.dptk.few 5b
+ ;;
+skip_loop:
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ br.ret.dptk.few rp
+
+ //
+ // Here we simply restart the loop but instead
+ // of doing loads we fill the pipeline with zeroes
+ // We can't simply store r0 because we may have valid
+ // data in transit in the pipeline.
+ // ar.lc and ar.ec are setup correctly at this point
+ //
+ // we MUST use src1/endsrc here and not dst1/enddst because
+ // of the pipeline effect.
+ //
+failure_in3:
+ sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
+ ;;
+2:
+(p16) mov val1[0]=r0
+(p16) mov val2[0]=r0
+(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16
+(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16
+ br.ctop.dptk.few 2b
+ ;;
+ cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ?
+ sub len=enddst,dst1,1 // precompute len
+(p6) br.cond.dptk.few failure_in1bis
+ ;;
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.lc=saved_lc
+ mov ar.pfs=saved_pfs
+ br.ret.dptk.few rp
+
+ //
+ // handling of failures on stores: that's the easy part
+ //
+failure_out:
+ sub ret0=enddst,dst1
+ mov pr=saved_pr,0xffffffffffff0000
+ mov ar.lc=saved_lc
+
+ mov ar.pfs=saved_pfs
+ br.ret.dptk.few rp
+
+
+ .endp __copy_user
+
diff --git a/arch/mips/defconfig b/arch/mips/defconfig
index 5516700c1..bec96e794 100644
--- a/arch/mips/defconfig
+++ b/arch/mips/defconfig
@@ -176,6 +176,8 @@ CONFIG_SCSI_CONSTANTS=y
# SCSI low-level drivers
#
CONFIG_SCSI_SGIWD93=y
+CONFIG_SCSI_SGIWD93=y
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
# CONFIG_SCSI_7000FASST is not set
# CONFIG_SCSI_ACARD is not set
# CONFIG_SCSI_AHA152X is not set
@@ -210,7 +212,6 @@ CONFIG_SCSI_SGIWD93=y
# CONFIG_SCSI_U14_34F is not set
# CONFIG_SCSI_ULTRASTOR is not set
# CONFIG_SCSI_DEBUG is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
#
# I2O device support
@@ -269,6 +270,7 @@ CONFIG_PSMOUSE=y
# CONFIG_WATCHDOG is not set
# CONFIG_NVRAM is not set
# CONFIG_RTC is not set
+# CONFIG_EFI_RTC is not set
#
# Video For Linux
diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c
index b0225f750..41ea11b98 100644
--- a/arch/mips/kernel/irixelf.c
+++ b/arch/mips/kernel/irixelf.c
@@ -1,4 +1,4 @@
-/* $Id: irixelf.c,v 1.23 2000/01/29 01:41:59 ralf Exp $
+/* $Id: irixelf.c,v 1.24 2000/02/04 07:40:23 ralf Exp $
*
* irixelf.c: Code to load IRIX ELF executables which conform to
* the MIPS ABI.
@@ -257,11 +257,11 @@ static unsigned int load_irix_interp(struct elfhdr * interp_elf_ex,
#endif
/* First of all, some simple consistency checks */
- if((interp_elf_ex->e_type != ET_EXEC &&
- interp_elf_ex->e_type != ET_DYN) ||
- !elf_check_arch(interp_elf_ex->e_machine) ||
- (!interpreter_dentry->d_inode->i_op ||
- !interpreter_dentry->d_inode->i_op->default_file_ops->mmap)) {
+ if ((interp_elf_ex->e_type != ET_EXEC &&
+ interp_elf_ex->e_type != ET_DYN) ||
+ !elf_check_arch(interp_elf_ex->e_machine) ||
+ (!interpreter_dentry->d_inode->i_fop ||
+ !interpreter_dentry->d_inode->i_fop->mmap)) {
printk("IRIX interp has bad e_type %d\n", interp_elf_ex->e_type);
return 0xffffffff;
}
@@ -410,9 +410,8 @@ static int verify_binary(struct elfhdr *ehp, struct linux_binprm *bprm)
/* First of all, some simple consistency checks */
if((ehp->e_type != ET_EXEC && ehp->e_type != ET_DYN) ||
!elf_check_arch(ehp->e_machine) ||
- (!bprm->dentry->d_inode->i_op ||
- !bprm->dentry->d_inode->i_op->default_file_ops ||
- !bprm->dentry->d_inode->i_op->default_file_ops->mmap)) {
+ (!bprm->dentry->d_inode->i_fop ||
+ !bprm->dentry->d_inode->i_fop->mmap)) {
return -ENOEXEC;
}
@@ -877,8 +876,8 @@ static inline int do_load_irix_library(struct file *file)
/* First of all, some simple consistency checks. */
if(elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
!elf_check_arch(elf_ex.e_machine) ||
- (!dentry->d_inode->i_op ||
- !dentry->d_inode->i_op->default_file_ops->mmap))
+ (!dentry->d_inode->i_fop ||
+ !dentry->d_inode->i_fop->mmap))
return -ENOEXEC;
/* Now read in all of the header information. */
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 4e3ba5ad9..239576e4e 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -1,4 +1,4 @@
-/* $Id: irq.c,v 1.19 2000/02/04 07:40:23 ralf Exp $
+/* $Id: irq.c,v 1.20 2000/02/23 00:41:00 ralf Exp $
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
@@ -32,6 +32,24 @@
#include <asm/nile4.h>
/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+irq_cpustat_t irq_stat [NR_CPUS];
+
+/*
* This contains the irq mask for both 8259A irq controllers, it's an
* int so we can deal with the third PIC in some systems like the RM300.
* (XXX This is broken for big endian.)
diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c
index f1c65805f..b8e1e6ec2 100644
--- a/arch/mips/kernel/mips_ksyms.c
+++ b/arch/mips/kernel/mips_ksyms.c
@@ -1,4 +1,4 @@
-/* $Id: mips_ksyms.c,v 1.24 2000/02/04 07:40:23 ralf Exp $
+/* $Id: mips_ksyms.c,v 1.25 2000/02/24 00:12:40 ralf Exp $
*
* Export MIPS-specific functions needed for loadable modules.
*
@@ -56,8 +56,6 @@ EXPORT_SYMBOL_NOVERS(strtok);
EXPORT_SYMBOL_NOVERS(strpbrk);
EXPORT_SYMBOL(_clear_page);
-EXPORT_SYMBOL(local_bh_count);
-EXPORT_SYMBOL(local_irq_count);
EXPORT_SYMBOL(enable_irq);
EXPORT_SYMBOL(disable_irq);
EXPORT_SYMBOL(kernel_thread);
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index c2ee57b86..9bffcdc96 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -83,3 +83,8 @@ int get_cpuinfo(char *buffer)
return len;
}
+
+void init_irq_proc(void)
+{
+ /* Nothing, for now. */
+}
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 43cf5ad74..f6209461f 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -1,4 +1,4 @@
-/* $Id: setup.c,v 1.21 2000/01/26 00:07:44 ralf Exp $
+/* $Id: setup.c,v 1.22 2000/01/27 01:05:23 ralf Exp $
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
@@ -220,4 +220,6 @@ void __init setup_arch(char **cmdline_p)
*memory_start_p = initrd_end;
}
#endif
+
+ paging_init();
}
diff --git a/arch/mips/sgi/kernel/indy_int.c b/arch/mips/sgi/kernel/indy_int.c
index 916b0873a..cab112c29 100644
--- a/arch/mips/sgi/kernel/indy_int.c
+++ b/arch/mips/sgi/kernel/indy_int.c
@@ -1,4 +1,4 @@
-/* $Id: indy_int.c,v 1.16 1999/12/04 03:59:00 ralf Exp $
+/* $Id: indy_int.c,v 1.17 2000/02/04 07:40:23 ralf Exp $
*
* indy_int.c: Routines for generic manipulation of the INT[23] ASIC
* found on INDY workstations..
@@ -40,6 +40,24 @@
#include <asm/sgialib.h>
#include <asm/gdb-stub.h>
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+irq_cpustat_t irq_stat [NR_CPUS];
+
/* #define DEBUG_SGINT */
struct sgi_int2_regs *sgi_i2regs;
diff --git a/arch/mips64/defconfig b/arch/mips64/defconfig
index 039ffd84f..c4f4ba27a 100644
--- a/arch/mips64/defconfig
+++ b/arch/mips64/defconfig
@@ -305,6 +305,7 @@ CONFIG_SERIAL_CONSOLE=y
# CONFIG_WATCHDOG is not set
# CONFIG_NVRAM is not set
# CONFIG_RTC is not set
+# CONFIG_EFI_RTC is not set
#
# Video For Linux
diff --git a/arch/mips64/defconfig-ip22 b/arch/mips64/defconfig-ip22
index 9c6687735..b40469cfc 100644
--- a/arch/mips64/defconfig-ip22
+++ b/arch/mips64/defconfig-ip22
@@ -224,6 +224,7 @@ CONFIG_VT_CONSOLE=y
# CONFIG_WATCHDOG is not set
# CONFIG_NVRAM is not set
# CONFIG_RTC is not set
+# CONFIG_EFI_RTC is not set
#
# Video For Linux
diff --git a/arch/mips64/defconfig-ip27 b/arch/mips64/defconfig-ip27
index 039ffd84f..c4f4ba27a 100644
--- a/arch/mips64/defconfig-ip27
+++ b/arch/mips64/defconfig-ip27
@@ -305,6 +305,7 @@ CONFIG_SERIAL_CONSOLE=y
# CONFIG_WATCHDOG is not set
# CONFIG_NVRAM is not set
# CONFIG_RTC is not set
+# CONFIG_EFI_RTC is not set
#
# Video For Linux
diff --git a/arch/mips64/kernel/proc.c b/arch/mips64/kernel/proc.c
index 6fba1b756..063ac5d88 100644
--- a/arch/mips64/kernel/proc.c
+++ b/arch/mips64/kernel/proc.c
@@ -1,4 +1,4 @@
-/* $Id: proc.c,v 1.1 1999/09/27 16:01:37 ralf Exp $
+/* $Id: proc.c,v 1.1 1999/09/28 22:25:51 ralf Exp $
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
@@ -69,3 +69,8 @@ int get_cpuinfo(char *buffer)
return len;
}
+
+void init_irq_proc(void)
+{
+ /* Nothing, for now. */
+}
diff --git a/arch/mips64/kernel/setup.c b/arch/mips64/kernel/setup.c
index b42271b47..2a7d8a894 100644
--- a/arch/mips64/kernel/setup.c
+++ b/arch/mips64/kernel/setup.c
@@ -1,4 +1,4 @@
-/* $Id: setup.c,v 1.6 2000/01/27 01:05:24 ralf Exp $
+/* $Id: setup.c,v 1.7 2000/02/04 07:40:24 ralf Exp $
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
@@ -183,4 +183,6 @@ void __init setup_arch(char **cmdline_p)
*memory_start_p = initrd_end;
}
#endif
+
+ paging_init();
}
diff --git a/arch/mips64/sgi-ip22/ip22-int.c b/arch/mips64/sgi-ip22/ip22-int.c
index 420e47fc7..a26ad631d 100644
--- a/arch/mips64/sgi-ip22/ip22-int.c
+++ b/arch/mips64/sgi-ip22/ip22-int.c
@@ -1,4 +1,4 @@
-/* $Id: ip22-int.c,v 1.3 1999/12/04 03:59:01 ralf Exp $
+/* $Id: ip22-int.c,v 1.4 2000/02/04 07:40:24 ralf Exp $
*
* indy_int.c: Routines for generic manipulation of the INT[23] ASIC
* found on INDY workstations..
@@ -37,6 +37,24 @@
#include <asm/sgi/sgint23.h>
#include <asm/sgialib.h>
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+irq_cpustat_t irq_stat [NR_CPUS];
+
struct sgi_int2_regs *sgi_i2regs;
struct sgi_int3_regs *sgi_i3regs;
struct sgi_ioc_ints *ioc_icontrol;
diff --git a/arch/mips64/sgi-ip27/ip27-irq.c b/arch/mips64/sgi-ip27/ip27-irq.c
index 7f5a36f97..c9e6fe150 100644
--- a/arch/mips64/sgi-ip27/ip27-irq.c
+++ b/arch/mips64/sgi-ip27/ip27-irq.c
@@ -1,4 +1,4 @@
-/* $Id: ip27-irq.c,v 1.5 2000/02/04 07:40:24 ralf Exp $
+/* $Id: ip27-irq.c,v 1.6 2000/02/10 05:58:56 dagum Exp $
*
* ip27-irq.c: Highlevel interrupt handling for IP27 architecture.
*
@@ -35,6 +35,24 @@
#include <asm/sn/sn0/ip27.h>
#include <asm/sn/arch.h>
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+irq_cpustat_t irq_stat [NR_CPUS];
+
extern asmlinkage void ip27_irq(void);
int (*irq_cannonicalize)(int irq);
@@ -255,7 +273,7 @@ void irq_debug(void)
printk("PI_INT_MASK0_A = 0x%x\n", LOCAL_HUB_L(PI_INT_MASK0_A));
}
-int setup_irq(int irq, struct irqaction *new)
+int setup_irq(unsigned int irq, struct irqaction *new)
{
int shared = 0;
struct irqaction *old, **p;
diff --git a/arch/ppc/chrpboot/main.c b/arch/ppc/chrpboot/main.c
index d54a429a9..91bf4d8c4 100644
--- a/arch/ppc/chrpboot/main.c
+++ b/arch/ppc/chrpboot/main.c
@@ -10,7 +10,6 @@
#include "../coffboot/zlib.h"
#include <asm/bootinfo.h>
#include <asm/processor.h>
-#define __KERNEL__
#include <asm/page.h>
extern void *finddevice(const char *);
@@ -49,17 +48,8 @@ chrpboot(int a1, int a2, void *prom)
printf("chrpboot starting: loaded at 0x%x\n\r", &_start);
- if (initrd_len) {
- initrd_size = initrd_len;
- initrd_start = (RAM_END - initrd_size) & ~0xFFF;
- a1 = initrd_start;
- a2 = initrd_size;
- printf("initial ramdisk moving 0x%x <- 0x%x (%x bytes)\n\r", initrd_start,
- initrd_data,initrd_size);
- memcpy((char *)initrd_start, initrd_data, initrd_size);
- end_avail = (char *)initrd_start;
- } else
- end_avail = (char *) RAM_END;
+ end_avail = (char *) RAM_END;
+
im = image_data;
len = image_len;
dst = (void *) PROG_START;
@@ -98,7 +88,7 @@ chrpboot(int a1, int a2, void *prom)
rec = (struct bi_record *)((unsigned long)rec + rec->size);
rec->tag = BI_SYSMAP;
- rec->data[0] = sysmap_data;
+ rec->data[0] = (unsigned long)sysmap_data;
rec->data[1] = sysmap_len;
rec->size = sizeof(struct bi_record) + sizeof(unsigned long);
rec = (struct bi_record *)((unsigned long)rec + rec->size);
@@ -129,6 +119,10 @@ void *zalloc(void *x, unsigned items, unsigned size)
void zfree(void *x, void *addr, unsigned nb)
{
+ nb = (nb + 7) & -8;
+ if (addr == (avail_ram - nb)) {
+ avail_ram -= nb;
+ }
}
#define HEAD_CRC 2
diff --git a/arch/ppc/chrpboot/piggyback.c b/arch/ppc/chrpboot/piggyback.c
index 172025802..304bc8f11 100644
--- a/arch/ppc/chrpboot/piggyback.c
+++ b/arch/ppc/chrpboot/piggyback.c
@@ -1,8 +1,9 @@
#include <stdio.h>
+#include <unistd.h>
extern long ce_exec_config[];
-main(int argc, char *argv[])
+int main(int argc, char *argv[])
{
int i, cnt, pos, len;
unsigned int cksum, val;
diff --git a/arch/ppc/coffboot/piggyback.c b/arch/ppc/coffboot/piggyback.c
index 172025802..304bc8f11 100644
--- a/arch/ppc/coffboot/piggyback.c
+++ b/arch/ppc/coffboot/piggyback.c
@@ -1,8 +1,9 @@
#include <stdio.h>
+#include <unistd.h>
extern long ce_exec_config[];
-main(int argc, char *argv[])
+int main(int argc, char *argv[])
{
int i, cnt, pos, len;
unsigned int cksum, val;
diff --git a/arch/ppc/config.in b/arch/ppc/config.in
index 8bb23afa2..7d0ab5fa0 100644
--- a/arch/ppc/config.in
+++ b/arch/ppc/config.in
@@ -47,12 +47,9 @@ if [ "$CONFIG_8xx" = "y" ]; then
fi
if [ "$CONFIG_6xx" = "y" ]; then
choice 'Machine Type' \
- "PowerMac CONFIG_PMAC \
- PReP/MTX CONFIG_PREP \
- CHRP CONFIG_CHRP \
- PowerMac/PReP/CHRP CONFIG_ALL_PPC \
+ "PowerMac/PReP/MTX/CHRP CONFIG_ALL_PPC \
Gemini CONFIG_GEMINI \
- APUS CONFIG_APUS" PowerMac
+ APUS CONFIG_APUS" PowerMac/PReP/MTX/CHRP
fi
if [ "$CONFIG_PPC64" = "y" ]; then
diff --git a/arch/ppc/configs/common_defconfig b/arch/ppc/configs/common_defconfig
index 17217702f..4ba96bde9 100644
--- a/arch/ppc/configs/common_defconfig
+++ b/arch/ppc/configs/common_defconfig
@@ -17,9 +17,6 @@ CONFIG_6xx=y
# CONFIG_PPC64 is not set
# CONFIG_82xx is not set
# CONFIG_8xx is not set
-# CONFIG_PMAC is not set
-# CONFIG_PREP is not set
-# CONFIG_CHRP is not set
CONFIG_ALL_PPC=y
# CONFIG_GEMINI is not set
# CONFIG_APUS is not set
@@ -286,30 +283,29 @@ CONFIG_GMAC=y
# CONFIG_LANCE is not set
# CONFIG_NET_VENDOR_SMC is not set
# CONFIG_NET_VENDOR_RACAL is not set
-# CONFIG_RTL8139 is not set
-# CONFIG_DM9102 is not set
# CONFIG_AT1700 is not set
# CONFIG_DEPCA is not set
# CONFIG_NET_ISA is not set
-CONFIG_NET_EISA=y
+CONFIG_NET_PCI=y
CONFIG_PCNET32=y
# CONFIG_ADAPTEC_STARFIRE is not set
# CONFIG_AC3200 is not set
# CONFIG_APRICOT is not set
-# CONFIG_CS89x0 is not set
CONFIG_DE4X5=y
-# CONFIG_DEC_ELCP is not set
+# CONFIG_TULIP is not set
# CONFIG_DGRS is not set
-# CONFIG_EEXPRESS_PRO100 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_EEPRO100 is not set
# CONFIG_LNE390 is not set
# CONFIG_NE3210 is not set
# CONFIG_NE2K_PCI is not set
+# CONFIG_RTL8129 is not set
+# CONFIG_8139TOO is not set
# CONFIG_SIS900 is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
# CONFIG_ES3210 is not set
# CONFIG_EPIC100 is not set
-# CONFIG_ZNET is not set
# CONFIG_NET_POCKET is not set
#
@@ -499,6 +495,7 @@ CONFIG_USB_OHCI=y
# CONFIG_USB_DC2XX is not set
# CONFIG_USB_STORAGE is not set
# CONFIG_USB_DABUSB is not set
+# CONFIG_USB_PLUSB is not set
#
# USB HID
@@ -508,13 +505,15 @@ CONFIG_USB_KBD=y
CONFIG_USB_MOUSE=y
# CONFIG_USB_GRAPHIRE is not set
# CONFIG_USB_WMFORCE is not set
-# CONFIG_INPUT_KEYBDEV is not set
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_KEYBDEV=y
+CONFIG_INPUT_MOUSEDEV=y
+# CONFIG_INPUT_MOUSEDEV_MIX is not set
+# CONFIG_INPUT_MOUSEDEV_DIGITIZER is not set
# CONFIG_INPUT_JOYDEV is not set
# CONFIG_INPUT_EVDEV is not set
#
-# Filesystems
+# File systems
#
# CONFIG_QUOTA is not set
CONFIG_AUTOFS_FS=y
@@ -535,6 +534,7 @@ CONFIG_ISO9660_FS=y
# CONFIG_NTFS_FS is not set
# CONFIG_HPFS_FS is not set
CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
CONFIG_DEVPTS_FS=y
# CONFIG_QNX4FS_FS is not set
# CONFIG_ROMFS_FS is not set
diff --git a/arch/ppc/defconfig b/arch/ppc/defconfig
index 17217702f..4ba96bde9 100644
--- a/arch/ppc/defconfig
+++ b/arch/ppc/defconfig
@@ -17,9 +17,6 @@ CONFIG_6xx=y
# CONFIG_PPC64 is not set
# CONFIG_82xx is not set
# CONFIG_8xx is not set
-# CONFIG_PMAC is not set
-# CONFIG_PREP is not set
-# CONFIG_CHRP is not set
CONFIG_ALL_PPC=y
# CONFIG_GEMINI is not set
# CONFIG_APUS is not set
@@ -286,30 +283,29 @@ CONFIG_GMAC=y
# CONFIG_LANCE is not set
# CONFIG_NET_VENDOR_SMC is not set
# CONFIG_NET_VENDOR_RACAL is not set
-# CONFIG_RTL8139 is not set
-# CONFIG_DM9102 is not set
# CONFIG_AT1700 is not set
# CONFIG_DEPCA is not set
# CONFIG_NET_ISA is not set
-CONFIG_NET_EISA=y
+CONFIG_NET_PCI=y
CONFIG_PCNET32=y
# CONFIG_ADAPTEC_STARFIRE is not set
# CONFIG_AC3200 is not set
# CONFIG_APRICOT is not set
-# CONFIG_CS89x0 is not set
CONFIG_DE4X5=y
-# CONFIG_DEC_ELCP is not set
+# CONFIG_TULIP is not set
# CONFIG_DGRS is not set
-# CONFIG_EEXPRESS_PRO100 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_EEPRO100 is not set
# CONFIG_LNE390 is not set
# CONFIG_NE3210 is not set
# CONFIG_NE2K_PCI is not set
+# CONFIG_RTL8129 is not set
+# CONFIG_8139TOO is not set
# CONFIG_SIS900 is not set
# CONFIG_TLAN is not set
# CONFIG_VIA_RHINE is not set
# CONFIG_ES3210 is not set
# CONFIG_EPIC100 is not set
-# CONFIG_ZNET is not set
# CONFIG_NET_POCKET is not set
#
@@ -499,6 +495,7 @@ CONFIG_USB_OHCI=y
# CONFIG_USB_DC2XX is not set
# CONFIG_USB_STORAGE is not set
# CONFIG_USB_DABUSB is not set
+# CONFIG_USB_PLUSB is not set
#
# USB HID
@@ -508,13 +505,15 @@ CONFIG_USB_KBD=y
CONFIG_USB_MOUSE=y
# CONFIG_USB_GRAPHIRE is not set
# CONFIG_USB_WMFORCE is not set
-# CONFIG_INPUT_KEYBDEV is not set
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_KEYBDEV=y
+CONFIG_INPUT_MOUSEDEV=y
+# CONFIG_INPUT_MOUSEDEV_MIX is not set
+# CONFIG_INPUT_MOUSEDEV_DIGITIZER is not set
# CONFIG_INPUT_JOYDEV is not set
# CONFIG_INPUT_EVDEV is not set
#
-# Filesystems
+# File systems
#
# CONFIG_QUOTA is not set
CONFIG_AUTOFS_FS=y
@@ -535,6 +534,7 @@ CONFIG_ISO9660_FS=y
# CONFIG_NTFS_FS is not set
# CONFIG_HPFS_FS is not set
CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
CONFIG_DEVPTS_FS=y
# CONFIG_QNX4FS_FS is not set
# CONFIG_ROMFS_FS is not set
diff --git a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile
index ea7c7c6e7..0f7167622 100644
--- a/arch/ppc/kernel/Makefile
+++ b/arch/ppc/kernel/Makefile
@@ -88,24 +88,14 @@ endif
ifeq ($(CONFIG_NVRAM),y)
O_OBJS += pmac_nvram.o
endif
-ifeq ($(CONFIG_6xx),y)
- O_OBJS += open_pic.o indirect_pci.o
-endif
-ifeq ($(CONFIG_PPC64),y)
- O_OBJS += open_pic.o indirect_pci.o
-endif
ifeq ($(CONFIG_APUS),y)
O_OBJS += apus_setup.o
endif
-ifeq ($(CONFIG_PMAC),y)
- O_OBJS += pmac_pic.o pmac_setup.o pmac_time.o feature.o pmac_pci.o prom.o
-endif
-ifeq ($(CONFIG_CHRP),y)
- O_OBJS += chrp_pci.o pmac_pci.o chrp_setup.o i8259.o \
- chrp_time.o pmac_time.o prom.o
-endif
-ifeq ($(CONFIG_PREP),y)
- O_OBJS += prep_pci.o i8259.o prep_setup.o prep_nvram.o prep_time.o residual.o
+ifeq ($(CONFIG_ALL_PPC),y)
+ O_OBJS += pmac_pic.o pmac_setup.o pmac_time.o feature.o pmac_pci.o prom.o \
+ chrp_setup.o chrp_time.o chrp_pci.o open_pic.o indirect_pci.o \
+ prep_pci.o i8259.o prep_nvram.o prep_time.o residual.o
+ OX_OBJS += prep_setup.o
endif
ifeq ($(CONFIG_GEMINI),y)
O_OBJS += gemini_prom.o gemini_pci.o gemini_setup.o
diff --git a/arch/ppc/kernel/apus_setup.c b/arch/ppc/kernel/apus_setup.c
index 5f0c4b06e..a54efc6fd 100644
--- a/arch/ppc/kernel/apus_setup.c
+++ b/arch/ppc/kernel/apus_setup.c
@@ -10,7 +10,7 @@
* TODO:
* This file needs a *really* good cleanup. Restructure and optimize.
* Make sure it can be compiled for non-APUS configs. Begin to move
- * Amiga specific stuff into linux/machine/amiga.
+ * Amiga specific stuff into mach/amiga.
*/
#include <linux/config.h>
@@ -27,6 +27,10 @@
#include <asm/logging.h>
#endif
+/* Needs INITSERIAL call in head.S! */
+#undef APUS_DEBUG
+
+
#include <linux/ide.h>
#define T_CHAR (0x0000) /* char: don't touch */
#define T_SHORT (0x4000) /* short: 12 -> 21 */
@@ -60,37 +64,6 @@ static u_short driveid_types[] = {
#define num_driveid_types (sizeof(driveid_types)/sizeof(*driveid_types))
-#if 0 /* Get rid of this crud */
-/* Get the IDE stuff from the 68k file */
-#define ide_init_hwif_ports m68k_ide_init_hwif_ports
-#define ide_default_irq m68k_ide_default_irq
-#undef ide_request_irq
-#define ide_request_irq m68k_ide_request_irq
-#undef ide_free_irq
-#define ide_free_irq m68k_ide_free_irq
-#define ide_default_io_base m68k_ide_default_io_base
-#define ide_check_region m68k_ide_check_region
-#define ide_request_region m68k_ide_request_region
-#define ide_release_region m68k_ide_release_region
-#define ide_fix_driveid m68k_ide_fix_driveid
-#define ide_init_default_hwifs m68k_ide_init_default_hwifs
-#define select_t m68k_select_t
-//#include <asm/hdreg.h>
-#include <asm-m68k/ide.h>
-#undef ide_free_irq
-#undef select_t
-#undef ide_request_irq
-#undef ide_init_default_hwifs
-#undef ide_init_hwif_ports
-#undef ide_default_irq
-#undef ide_default_io_base
-#undef ide_check_region
-#undef ide_request_region
-#undef ide_release_region
-#undef ide_fix_driveid
-/*-------------------------------------------*/
-#endif
-
#include <asm/bootinfo.h>
#include <asm/setup.h>
#include <asm/amigahw.h>
@@ -764,6 +737,12 @@ void apus_ide_init_hwif_ports (hw_regs_t *hw, ide_ioreg_t data_port,
/****************************************************** IRQ stuff */
__apus
+static unsigned int apus_irq_cannonicalize(unsigned int irq)
+{
+ return irq;
+}
+
+__apus
int apus_get_irq_list(char *buf)
{
#ifdef CONFIG_APUS
@@ -922,6 +901,114 @@ static void apus_kbd_init_hw(void)
}
+/****************************************************** debugging */
+
+/* some serial hardware definitions */
+#define SDR_OVRUN (1<<15)
+#define SDR_RBF (1<<14)
+#define SDR_TBE (1<<13)
+#define SDR_TSRE (1<<12)
+
+#define AC_SETCLR (1<<15)
+#define AC_UARTBRK (1<<11)
+
+#define SER_DTR (1<<7)
+#define SER_RTS (1<<6)
+#define SER_DCD (1<<5)
+#define SER_CTS (1<<4)
+#define SER_DSR (1<<3)
+
+static __inline__ void ser_RTSon(void)
+{
+ ciab.pra &= ~SER_RTS; /* active low */
+}
+
+__apus
+int __debug_ser_out( unsigned char c )
+{
+ custom.serdat = c | 0x100;
+ mb();
+ while (!(custom.serdatr & 0x2000))
+ barrier();
+ return 1;
+}
+
+__apus
+unsigned char __debug_ser_in( void )
+{
+ unsigned char c;
+
+ /* XXX: is that ok?? derived from amiga_ser.c... */
+ while( !(custom.intreqr & IF_RBF) )
+ barrier();
+ c = custom.serdatr;
+ /* clear the interrupt, so that another character can be read */
+ custom.intreq = IF_RBF;
+ return c;
+}
+
+__apus
+int __debug_serinit( void )
+{
+ unsigned long flags;
+
+ save_flags (flags);
+ cli();
+
+ /* turn off Rx and Tx interrupts */
+ custom.intena = IF_RBF | IF_TBE;
+
+ /* clear any pending interrupt */
+ custom.intreq = IF_RBF | IF_TBE;
+
+ restore_flags (flags);
+
+ /*
+ * set the appropriate directions for the modem control flags,
+ * and clear RTS and DTR
+ */
+ ciab.ddra |= (SER_DTR | SER_RTS); /* outputs */
+ ciab.ddra &= ~(SER_DCD | SER_CTS | SER_DSR); /* inputs */
+
+#ifdef CONFIG_KGDB
+ /* turn Rx interrupts on for GDB */
+ custom.intena = IF_SETCLR | IF_RBF;
+ ser_RTSon();
+#endif
+
+ return 0;
+}
+
+__apus
+void __debug_print_hex(unsigned long x)
+{
+ int i;
+ char hexchars[] = "0123456789ABCDEF";
+
+ for (i = 0; i < 8; i++) {
+ __debug_ser_out(hexchars[(x >> 28) & 15]);
+ x <<= 4;
+ }
+ __debug_ser_out('\n');
+ __debug_ser_out('\r');
+}
+
+__apus
+void __debug_print_string(char* s)
+{
+ unsigned char c;
+ while((c = *s++))
+ __debug_ser_out(c);
+ __debug_ser_out('\n');
+ __debug_ser_out('\r');
+}
+
+__apus
+static void apus_progress(char *s, unsigned short value)
+{
+ __debug_print_string(s);
+}
+
/****************************************************** init */
/* The number of spurious interrupts */
@@ -970,7 +1057,7 @@ void apus_init_IRQ(void)
int i;
for ( i = 0 ; i < NR_IRQS ; i++ )
- irq_desc[i].ctl = &amiga_irqctrl;
+ irq_desc[i].handler = &amiga_irqctrl;
for (i = 0; i < NUM_IRQ_NODES; i++)
nodes[i].handler = NULL;
@@ -1015,7 +1102,7 @@ void apus_init(unsigned long r3, unsigned long r4, unsigned long r5,
ppc_md.setup_arch = apus_setup_arch;
ppc_md.setup_residual = NULL;
ppc_md.get_cpuinfo = apus_get_cpuinfo;
- ppc_md.irq_cannonicalize = NULL;
+ ppc_md.irq_cannonicalize = apus_irq_cannonicalize;
ppc_md.init_IRQ = apus_init_IRQ;
ppc_md.get_irq = apus_get_irq;
ppc_md.post_irq = apus_post_irq;
@@ -1023,6 +1110,10 @@ void apus_init(unsigned long r3, unsigned long r4, unsigned long r5,
ppc_md.heartbeat = apus_heartbeat;
ppc_md.heartbeat_count = 1;
#endif
+#ifdef APUS_DEBUG
+ __debug_serinit();
+ ppc_md.progress = apus_progress;
+#endif
ppc_md.init = NULL;
ppc_md.restart = apus_restart;
diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S
index 013812afc..cc647a58b 100644
--- a/arch/ppc/kernel/entry.S
+++ b/arch/ppc/kernel/entry.S
@@ -435,7 +435,7 @@ _GLOBAL(fake_interrupt)
* here so it's easy to add arch-specific sections later.
* -- Cort
*/
-#if defined(CONFIG_CHRP) || defined(CONFIG_PMAC) || defined(CONFIG_ALL_PPC)
+#if defined(CONFIG_ALL_PPC)
/*
* On CHRP, the Run-Time Abstraction Services (RTAS) have to be
* called with the MMU off.
@@ -475,4 +475,4 @@ enter_rtas:
mtspr SRR0,r8
mtspr SRR1,r9
rfi /* return to caller */
-#endif /* CONFIG_CHRP || CONFIG_PMAC || CONFIG_ALL_PPC */
+#endif /* CONFIG_ALL_PPC */
diff --git a/arch/ppc/kernel/hashtable.S b/arch/ppc/kernel/hashtable.S
index c87385c53..5593ebe18 100644
--- a/arch/ppc/kernel/hashtable.S
+++ b/arch/ppc/kernel/hashtable.S
@@ -115,11 +115,6 @@ hash_page:
stw r6,0(r2) /* update PTE (accessed/dirty bits) */
/* Convert linux-style PTE to low word of PPC-style PTE */
-#ifdef CONFIG_PPC64
- /* clear the high 32 bits just in case */
- clrldi r6,r6,32
- clrldi r4,r4,32
-#endif /* CONFIG_PPC64 */
rlwinm r4,r6,32-9,31,31 /* _PAGE_HWWRITE -> PP lsb */
rlwimi r6,r6,32-1,31,31 /* _PAGE_USER -> PP (both bits now) */
ori r4,r4,0xe04 /* clear out reserved bits */
@@ -151,10 +146,6 @@ hash_page:
.globl hash_page_patch_A
hash_page_patch_A:
lis r4,Hash_base@h /* base address of hash table */
-#ifdef CONFIG_PPC64
- /* just in case */
- clrldi r4,r4,32
-#endif
rlwimi r4,r5,32-1,26-Hash_bits,25 /* (VSID & hash_mask) << 6 */
rlwinm r0,r3,32-6,26-Hash_bits,25 /* (PI & hash_mask) << 6 */
xor r4,r4,r0 /* make primary hash */
@@ -169,43 +160,89 @@ hash_page_patch_A:
/* Search the primary PTEG for a PTE whose 1st word matches r5 */
mtctr r2
addi r3,r4,-8
-1: lwzu r0,8(r3) /* get next PTE */
+1:
+#ifdef CONFIG_PPC64
+ lwzu r0,16(r3) /* get next PTE */
+#else
+ lwzu r0,8(r3) /* get next PTE */
+#endif
cmp 0,r0,r5
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
beq+ found_slot
/* Search the secondary PTEG for a matching PTE */
+#ifdef CONFIG_PPC64
+ ori r5,r5,0x2 /* set H (secondary hash) bit */
+#else
ori r5,r5,0x40 /* set H (secondary hash) bit */
+#endif
.globl hash_page_patch_B
hash_page_patch_B:
xoris r3,r4,Hash_msk>>16 /* compute secondary hash */
xori r3,r3,0xffc0
+#ifdef CONFIG_PPC64
+ addi r3,r3,-16
+#else
addi r3,r3,-8
+#endif
mtctr r2
-2: lwzu r0,8(r3)
+2:
+#ifdef CONFIG_PPC64
+ lwzu r0,16(r3)
+#else
+ lwzu r0,8(r3)
+#endif
cmp 0,r0,r5
bdnzf 2,2b
beq+ found_slot
+#ifdef CONFIG_PPC64
+ xori r5,r5,0x2 /* clear H bit again */
+#else
xori r5,r5,0x40 /* clear H bit again */
+#endif
/* Search the primary PTEG for an empty slot */
10: mtctr r2
+#ifdef CONFIG_PPC64
+ addi r3,r4,-16 /* search primary PTEG */
+#else
addi r3,r4,-8 /* search primary PTEG */
-1: lwzu r0,8(r3) /* get next PTE */
+#endif
+1:
+#ifdef CONFIG_PPC64
+ lwzu r0,16(r3) /* get next PTE */
+ andi. r0,r0,1
+#else
+ lwzu r0,8(r3) /* get next PTE */
rlwinm. r0,r0,0,0,0 /* only want to check valid bit */
+#endif
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
beq+ found_empty
/* Search the secondary PTEG for an empty slot */
+#ifdef CONFIG_PPC64
+ ori r5,r5,0x2 /* set H (secondary hash) bit */
+#else
ori r5,r5,0x40 /* set H (secondary hash) bit */
+#endif
.globl hash_page_patch_C
hash_page_patch_C:
xoris r3,r4,Hash_msk>>16 /* compute secondary hash */
xori r3,r3,0xffc0
+#ifdef CONFIG_PPC64
+ addi r3,r3,-16
+#else
addi r3,r3,-8
+#endif
mtctr r2
-2: lwzu r0,8(r3)
+2:
+#ifdef CONFIG_PPC64
+ lwzu r0,16(r3)
+ andi. r0,r0,1
+#else
+ lwzu r0,8(r3)
rlwinm. r0,r0,0,0,0 /* only want to check valid bit */
+#endif
bdnzf 2,2b
beq+ found_empty
@@ -218,12 +255,21 @@ hash_page_patch_C:
* advantage to putting the PTE in the primary PTEG, we always
* put the PTE in the primary PTEG.
*/
+#ifdef CONFIG_PPC64
+ xori r5,r5,0x2 /* clear H bit again */
+#else
xori r5,r5,0x40 /* clear H bit again */
+#endif
lis r3,next_slot@ha
tophys(r3,r3)
lwz r2,next_slot@l(r3)
+#ifdef CONFIG_PPC64
+ addi r2,r2,16
+ andi. r2,r2,0x78
+#else
addi r2,r2,8
andi. r2,r2,0x38
+#endif
stw r2,next_slot@l(r3)
add r3,r4,r2
11:
@@ -237,9 +283,17 @@ hash_page_patch_C:
#ifndef __SMP__
/* Store PTE in PTEG */
found_empty:
+#ifdef CONFIG_PPC64
+ std r5,0(r3)
+#else
stw r5,0(r3)
+#endif
found_slot:
+#ifdef CONFIG_PPC64
+ std r6,8(r3)
+#else
stw r6,4(r3)
+#endif
sync
#else /* __SMP__ */
diff --git a/arch/ppc/kernel/head.S b/arch/ppc/kernel/head.S
index dd16b8c27..b6d44ecb3 100644
--- a/arch/ppc/kernel/head.S
+++ b/arch/ppc/kernel/head.S
@@ -1328,18 +1328,15 @@ load_up_mmu:
/* Load the SDR1 register (hash table base & size) */
lis r6,_SDR1@ha
tophys(r6,r6)
-#ifdef CONFIG_PPC64
- ld r6,_SDR1@l(r6)
+ lwz r6,_SDR1@l(r6)
mtspr SDR1,r6
+#ifdef CONFIG_PPC64
/* clear the v bit in the ASR so we can
* behave as if we have segment registers
* -- Cort
*/
clrldi r6,r6,63
mtasr r6
-#else
- lwz r6,_SDR1@l(r6)
- mtspr SDR1,r6
#endif /* CONFIG_PPC64 */
li r0,16 /* load up segment register values */
mtctr r0 /* for context 0 */
diff --git a/arch/ppc/kernel/irq.c b/arch/ppc/kernel/irq.c
index fd77fbc36..ffac1871a 100644
--- a/arch/ppc/kernel/irq.c
+++ b/arch/ppc/kernel/irq.c
@@ -70,8 +70,6 @@ volatile unsigned char *chrp_int_ack_special;
#define MAXCOUNT 10000000
-#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
-
irq_desc_t irq_desc[NR_IRQS];
int ppc_spurious_interrupts = 0;
unsigned int local_bh_count[NR_CPUS];
@@ -81,7 +79,6 @@ unsigned int ppc_cached_irq_mask[NR_MASK_WORDS];
unsigned int ppc_lost_interrupts[NR_MASK_WORDS];
atomic_t ppc_n_lost_interrupts;
-
/* nasty hack for shared irq's since we need to do kmalloc calls but
* can't very early in the boot when we need to do a request irq.
* this needs to be removed.
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index 50f63eeb4..8444bb4a0 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -241,12 +241,21 @@ _GLOBAL(__flush_page_to_ram)
rlwinm r5,r5,16,16,31
cmpi 0,r5,1
beqlr /* for 601, do nothing */
+ li r4,0x0FFF
+ andc r3,r3,r4 /* Get page base address */
li r4,4096/CACHE_LINE_SIZE /* Number of lines in a page */
mtctr r4
+ mr r6,r3
0: dcbst 0,r3 /* Write line to ram */
addi r3,r3,CACHE_LINE_SIZE
bdnz 0b
sync
+ mtctr r4
+1: icbi 0,r6
+ addi r6,r6,CACHE_LINE_SIZE
+ bdnz 1b
+ sync
+ isync
blr
/*
@@ -270,7 +279,7 @@ _GLOBAL(__flush_icache_page)
sync
isync
blr
-
+
/*
* Clear a page using the dcbz instruction, which doesn't cause any
* memory traffic (except to write out any cache lines which get
diff --git a/arch/ppc/kernel/mk_defs.c b/arch/ppc/kernel/mk_defs.c
index 4f3c6834d..c381ea073 100644
--- a/arch/ppc/kernel/mk_defs.c
+++ b/arch/ppc/kernel/mk_defs.c
@@ -99,6 +99,7 @@ main(void)
DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr));
DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link));
DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr));
+ DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq));
DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer));
DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
diff --git a/arch/ppc/kernel/pmac_pic.c b/arch/ppc/kernel/pmac_pic.c
index d13875c9f..b0276ca2c 100644
--- a/arch/ppc/kernel/pmac_pic.c
+++ b/arch/ppc/kernel/pmac_pic.c
@@ -39,6 +39,17 @@ extern int pmac_pcibios_read_config_word(unsigned char bus, unsigned char dev_fn
extern int pmac_pcibios_write_config_word(unsigned char bus, unsigned char dev_fn,
unsigned char offset, unsigned short val);
+/*
+ * Mark an irq as "lost". This is only used on the pmac
+ * since it can lose interrupts (see pmac_set_irq_mask).
+ * -- Cort
+ */
+void __pmac __no_use_set_lost(unsigned long irq_nr)
+{
+ if (!test_and_set_bit(irq_nr, ppc_lost_interrupts))
+ atomic_inc(&ppc_n_lost_interrupts);
+}
+
static void pmac_openpic_mask_irq(unsigned int irq_nr)
{
openpic_disable_irq(irq_nr);
@@ -105,10 +116,8 @@ static void __pmac pmac_set_irq_mask(unsigned int irq_nr)
*/
if ((bit & ppc_cached_irq_mask[i])
&& (ld_le32(&pmac_irq_hw[i]->level) & bit)
- && !(ld_le32(&pmac_irq_hw[i]->flag) & bit)) {
- if (!test_and_set_bit(irq_nr, ppc_lost_interrupts))
- atomic_inc(&ppc_n_lost_interrupts);
- }
+ && !(ld_le32(&pmac_irq_hw[i]->flag) & bit))
+ __set_lost((ulong)irq_nr);
}
static void __pmac pmac_mask_irq(unsigned int irq_nr)
@@ -174,6 +183,8 @@ pmac_get_irq(struct pt_regs *regs)
unsigned long bits = 0;
#ifdef __SMP__
+ void pmac_smp_message_recv(void);
+
/* IPI's are a hack on the powersurge -- Cort */
if ( smp_processor_id() != 0 )
{
@@ -182,12 +193,12 @@ pmac_get_irq(struct pt_regs *regs)
if (xmon_2nd)
xmon(regs);
#endif
- smp_message_recv();
+ pmac_smp_message_recv();
return -2; /* ignore, already handled */
}
#endif /* __SMP__ */
- /* Yeah, I know, this could be a separate do_IRQ function */
+ /* Yeah, I know, this could be a separate get_irq function */
if (has_openpic)
{
irq = openpic_irq(smp_processor_id());
@@ -376,6 +387,7 @@ pmac_pic_init(void)
irqctrler = NULL;
}
+ int_control.int_set_lost = __no_use_set_lost;
/*
* G3 powermacs and 1999 G3 PowerBooks have 64 interrupts,
* 1998 G3 Series PowerBooks have 128,
diff --git a/arch/ppc/kernel/ppc_htab.c b/arch/ppc/kernel/ppc_htab.c
index 264a24d48..da46f3c1c 100644
--- a/arch/ppc/kernel/ppc_htab.c
+++ b/arch/ppc/kernel/ppc_htab.c
@@ -44,19 +44,12 @@ extern unsigned long htab_evicts;
extern unsigned long pte_misses;
extern unsigned long pte_errors;
-static struct file_operations ppc_htab_operations = {
+struct file_operations ppc_htab_operations = {
llseek: ppc_htab_lseek,
read: ppc_htab_read,
write: ppc_htab_write,
};
-/*
- * proc files can do almost nothing..
- */
-struct inode_operations proc_ppc_htab_inode_operations = {
- &ppc_htab_operations, /* default proc file-ops */
-};
-
/* these will go into processor.h when I'm done debugging -- Cort */
#define MMCR0 952
#define MMCR0_PMC1_CYCLES (0x1<<7)
diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c
index 757715512..87c8d4082 100644
--- a/arch/ppc/kernel/ppc_ksyms.c
+++ b/arch/ppc/kernel/ppc_ksyms.c
@@ -34,6 +34,7 @@
#include <asm/feature.h>
#include <asm/dma.h>
#include <asm/machdep.h>
+#include <asm/hw_irq.h>
#ifdef __SMP__
#include <asm/smplock.h>
#endif /* __SMP__ */
@@ -50,7 +51,6 @@ extern void AlignmentException(struct pt_regs *regs);
extern void ProgramCheckException(struct pt_regs *regs);
extern void SingleStepException(struct pt_regs *regs);
extern int sys_sigreturn(struct pt_regs *regs);
-extern atomic_t ppc_n_lost_interrupts;
extern void do_lost_interrupts(unsigned long);
extern int do_signal(sigset_t *, struct pt_regs *);
@@ -69,6 +69,7 @@ EXPORT_SYMBOL(ProgramCheckException);
EXPORT_SYMBOL(SingleStepException);
EXPORT_SYMBOL(sys_sigreturn);
EXPORT_SYMBOL(ppc_n_lost_interrupts);
+EXPORT_SYMBOL(ppc_lost_interrupts);
EXPORT_SYMBOL(do_lost_interrupts);
EXPORT_SYMBOL(enable_irq);
EXPORT_SYMBOL(disable_irq);
@@ -88,7 +89,7 @@ EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
EXPORT_SYMBOL(DMA_MODE_READ);
EXPORT_SYMBOL(DMA_MODE_WRITE);
#ifndef CONFIG_8xx
-#if defined(CONFIG_PREP) || defined(CONFIG_ALL_PPC)
+#if defined(CONFIG_ALL_PPC)
EXPORT_SYMBOL(_prep_type);
EXPORT_SYMBOL(ucSystemType);
#endif
@@ -125,7 +126,6 @@ EXPORT_SYMBOL(strtok);
EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(strnlen);
-EXPORT_SYMBOL(strspn);
EXPORT_SYMBOL(strcmp);
EXPORT_SYMBOL(strncmp);
@@ -227,7 +227,7 @@ EXPORT_SYMBOL(pmu_register_sleep_notifier);
EXPORT_SYMBOL(pmu_unregister_sleep_notifier);
EXPORT_SYMBOL(pmu_enable_irled);
#endif CONFIG_PMAC_PBOOK
-#if defined(CONFIG_PMAC) || defined(CONFIG_ALL_PPC)
+#if defined(CONFIG_ALL_PPC)
EXPORT_SYMBOL(find_devices);
EXPORT_SYMBOL(find_type_devices);
EXPORT_SYMBOL(find_compatible_devices);
@@ -243,8 +243,8 @@ EXPORT_SYMBOL(pci_device_loc);
EXPORT_SYMBOL(feature_set);
EXPORT_SYMBOL(feature_clear);
EXPORT_SYMBOL(feature_test);
-#endif /* defined(CONFIG_PMAC) || defined(CONFIG_ALL_PPC) */
-#if defined(CONFIG_SCSI) && (defined(CONFIG_PMAC) || defined(CONFIG_ALL_PPC))
+#endif /* defined(CONFIG_ALL_PPC) */
+#if defined(CONFIG_SCSI) && defined(CONFIG_ALL_PPC)
EXPORT_SYMBOL(note_scsi_host);
#endif
EXPORT_SYMBOL(kd_mksound);
@@ -270,7 +270,6 @@ EXPORT_SYMBOL(screen_info);
EXPORT_SYMBOL(int_control);
EXPORT_SYMBOL(timer_interrupt_intercept);
EXPORT_SYMBOL(timer_interrupt);
-extern unsigned long do_IRQ_intercept;
EXPORT_SYMBOL(do_IRQ_intercept);
EXPORT_SYMBOL(irq_desc);
void ppc_irq_dispatch_handler(struct pt_regs *, int);
@@ -278,3 +277,7 @@ EXPORT_SYMBOL(ppc_irq_dispatch_handler);
EXPORT_SYMBOL(decrementer_count);
EXPORT_SYMBOL(get_wchan);
EXPORT_SYMBOL(console_drivers);
+#ifdef CONFIG_XMON
+EXPORT_SYMBOL(xmon);
+#endif
+EXPORT_SYMBOL(down_read_failed);
diff --git a/arch/ppc/kernel/process.c b/arch/ppc/kernel/process.c
index 41382b2d7..5c01d3c72 100644
--- a/arch/ppc/kernel/process.c
+++ b/arch/ppc/kernel/process.c
@@ -158,7 +158,7 @@ enable_kernel_altivec(void)
if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
giveup_altivec(current);
else
- giveup_altivec(NULL): /* just enable AltiVec for kernel - force */
+ giveup_altivec(NULL); /* just enable AltiVec for kernel - force */
#else
giveup_altivec(last_task_used_altivec);
#endif /* __SMP __ */
diff --git a/arch/ppc/kernel/prom.c b/arch/ppc/kernel/prom.c
index b86e2a153..4ee638f62 100644
--- a/arch/ppc/kernel/prom.c
+++ b/arch/ppc/kernel/prom.c
@@ -604,7 +604,6 @@ prom_init(int r3, int r4, prom_entry pp)
/* XXX: hack - don't start cpu 0, this cpu -- Cort */
if ( smp_chrp_cpu_nr++ == 0 )
continue;
- RELOC(smp_ibm_chrp_hack) = 1;
prom_print(RELOC("starting cpu "));
prom_print(path);
*(unsigned long *)(0x4) = 0;
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index 7502ad08e..5a57ba8a2 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -733,17 +733,13 @@ void ppc_generic_ide_fix_driveid(struct hd_driveid *id)
id->eide_dma_time = __le16_to_cpu(id->eide_dma_time);
id->eide_pio = __le16_to_cpu(id->eide_pio);
id->eide_pio_iordy = __le16_to_cpu(id->eide_pio_iordy);
- id->word69 = __le16_to_cpu(id->word69);
- id->word70 = __le16_to_cpu(id->word70);
- id->word71 = __le16_to_cpu(id->word71);
- id->word72 = __le16_to_cpu(id->word72);
- id->word73 = __le16_to_cpu(id->word73);
- id->word74 = __le16_to_cpu(id->word74);
+ for (i=0; i<2 i++)
+ id->words69_70[i] = __le16_to_cpu(id->words69_70[i]);
+ for (i=0; i<4 i++)
+ id->words71_74[i] = __le16_to_cpu(id->words71_74[i]);
id->queue_depth = __le16_to_cpu(id->queue_depth);
- id->word76 = __le16_to_cpu(id->word76);
- id->word77 = __le16_to_cpu(id->word77);
- id->word78 = __le16_to_cpu(id->word78);
- id->word79 = __le16_to_cpu(id->word79);
+ for (i=0; i<4 i++)
+ id->words76_79[i] = __le16_to_cpu(id->words76_79[i]);
id->major_rev_num = __le16_to_cpu(id->major_rev_num);
id->minor_rev_num = __le16_to_cpu(id->minor_rev_num);
id->command_set_1 = __le16_to_cpu(id->command_set_1);
@@ -758,40 +754,14 @@ void ppc_generic_ide_fix_driveid(struct hd_driveid *id)
id->CurAPMvalues = __le16_to_cpu(id->CurAPMvalues);
id->word92 = __le16_to_cpu(id->word92);
id->hw_config = __le16_to_cpu(id->hw_config);
- id->word94 = __le16_to_cpu(id->word94);
- id->word95 = __le16_to_cpu(id->word95);
- id->word96 = __le16_to_cpu(id->word96);
- id->word97 = __le16_to_cpu(id->word97);
- id->word98 = __le16_to_cpu(id->word98);
- id->word99 = __le16_to_cpu(id->word99);
- id->word100 = __le16_to_cpu(id->word100);
- id->word101 = __le16_to_cpu(id->word101);
- id->word102 = __le16_to_cpu(id->word102);
- id->word103 = __le16_to_cpu(id->word103);
- id->word104 = __le16_to_cpu(id->word104);
- id->word105 = __le16_to_cpu(id->word105);
- id->word106 = __le16_to_cpu(id->word106);
- id->word107 = __le16_to_cpu(id->word107);
- id->word108 = __le16_to_cpu(id->word108);
- id->word109 = __le16_to_cpu(id->word109);
- id->word110 = __le16_to_cpu(id->word110);
- id->word111 = __le16_to_cpu(id->word111);
- id->word112 = __le16_to_cpu(id->word112);
- id->word113 = __le16_to_cpu(id->word113);
- id->word114 = __le16_to_cpu(id->word114);
- id->word115 = __le16_to_cpu(id->word115);
- id->word116 = __le16_to_cpu(id->word116);
- id->word117 = __le16_to_cpu(id->word117);
- id->word118 = __le16_to_cpu(id->word118);
- id->word119 = __le16_to_cpu(id->word119);
- id->word120 = __le16_to_cpu(id->word120);
- id->word121 = __le16_to_cpu(id->word121);
- id->word122 = __le16_to_cpu(id->word122);
- id->word123 = __le16_to_cpu(id->word123);
- id->word124 = __le16_to_cpu(id->word124);
- id->word125 = __le16_to_cpu(id->word125);
- id->word126 = __le16_to_cpu(id->word126);
+ for (i=0; i<34; i++)
+ id->words94_125[i] = __le16_to_cpu(id->words94_125[i]);
+ id->last_lun = __le16_to_cpu(id->last_lun);
id->word127 = __le16_to_cpu(id->word127);
- for (i=0; i<127; i++)
- id->reserved[i] = __le16_to_cpu(id->reserved[i]);
+ id->dlf = __le16_to_cpu(id->dlf);
+ id->csfo = __le16_to_cpu(id->csfo);
+ for (i=0; i<31; i++)
+ id->words130_159[i] = __le16_to_cpu(id->words130_159[i]);
+ for (i=0; i<97; i++)
+ id->words160_255[i] = __le16_to_cpu(id->words160_255[i]);
}
diff --git a/arch/ppc/kernel/smp.c b/arch/ppc/kernel/smp.c
index 83dff9246..97543348b 100644
--- a/arch/ppc/kernel/smp.c
+++ b/arch/ppc/kernel/smp.c
@@ -12,6 +12,7 @@
* (troy@microux.com, hozer@drgw.net)
*/
+#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
@@ -445,8 +446,10 @@ void __init smp_callin(void)
*/
if ( _machine & (_MACH_gemini|_MACH_chrp|_MACH_prep) )
do_openpic_setup_cpu();
+#ifdef CONFIG_GEMINI
if ( _machine == _MACH_gemini )
gemini_init_l2();
+#endif
while(!smp_commenced)
barrier();
__sti();
diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c
index 25d728fdd..2faccd042 100644
--- a/arch/ppc/mm/init.c
+++ b/arch/ppc/mm/init.c
@@ -115,11 +115,7 @@ extern struct task_struct *current_set[NR_CPUS];
PTE *Hash, *Hash_end;
unsigned long Hash_size, Hash_mask;
#if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
-#ifdef CONFIG_PPC64
-unsigned long long _SDR1;
-#else
unsigned long _SDR1;
-#endif
static void hash_init(void);
union ubat { /* BAT register values to be loaded */
@@ -423,10 +419,9 @@ __ioremap(unsigned long addr, unsigned long size, unsigned long flags)
/*
* Is it a candidate for a BAT mapping?
*/
-
for (i = 0; i < size; i += PAGE_SIZE)
map_page(v+i, p+i, flags);
-out:
+out:
return (void *) (v + (addr & ~PAGE_MASK));
}
@@ -593,7 +588,7 @@ mmu_context_overflow(void)
#if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
static void get_mem_prop(char *, struct mem_pieces *);
-#if defined(CONFIG_PMAC) || defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC)
+#if defined(CONFIG_ALL_PPC)
/*
* Read in a property describing some pieces of memory.
*/
@@ -616,7 +611,7 @@ static void __init get_mem_prop(char *name, struct mem_pieces *mp)
mem_pieces_sort(mp);
mem_pieces_coalesce(mp);
}
-#endif /* CONFIG_PMAC || CONFIG_CHRP || CONFIG_ALL_PPC */
+#endif /* CONFIG_ALL_PPC */
/*
* Set up one of the I/D BAT (block address translation) register pairs.
@@ -921,10 +916,11 @@ void __init MMU_init(void)
if ( ppc_md.progress ) ppc_md.progress("MMU:hash init", 0x300);
hash_init();
#ifdef CONFIG_PPC64
- _SDR1 = 0; /* temporary hack to just use bats -- Cort */
-#else
+ _SDR1 = __pa(Hash) | (ffz(~Hash_size) - 7)-11;
+#else
_SDR1 = __pa(Hash) | (Hash_mask >> 10);
-#endif
+#endif
+
ioremap_base = 0xf8000000;
if ( ppc_md.progress ) ppc_md.progress("MMU:mapin", 0x301);
@@ -947,7 +943,7 @@ void __init MMU_init(void)
setbat(0, 0xf8000000, 0xf8000000, 0x08000000, IO_PAGE);
#ifdef CONFIG_PPC64
/* temporary hack to get working until page tables are stable -- Cort*/
- setbat(1, 0x80000000, 0xc0000000, 0x10000000, IO_PAGE);
+/* setbat(1, 0x80000000, 0xc0000000, 0x10000000, IO_PAGE);*/
setbat(3, 0xd0000000, 0xd0000000, 0x10000000, IO_PAGE);
#else
setbat(1, 0x80000000, 0x80000000, 0x10000000, IO_PAGE);
@@ -1118,7 +1114,7 @@ void __init paging_init(void)
/*
* All pages are DMA-able so we put them all in the DMA zone.
*/
- zones_size[0] = virt_to_phys(end_of_DRAM) >> PAGE_SHIFT;
+ zones_size[0] = ((unsigned long)end_of_DRAM - KERNELBASE) >> PAGE_SHIFT;
for (i = 1; i < MAX_NR_ZONES; i++)
zones_size[i] = 0;
free_area_init(zones_size);
@@ -1132,9 +1128,9 @@ void __init mem_init(void)
int codepages = 0;
int datapages = 0;
int initpages = 0;
-#if defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC)
+#if defined(CONFIG_ALL_PPC)
extern unsigned int rtas_data, rtas_size;
-#endif /* defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC) */
+#endif /* defined(CONFIG_ALL_PPC) */
max_mapnr = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
num_physpages = max_mapnr; /* RAM is assumed contiguous */
@@ -1150,13 +1146,13 @@ void __init mem_init(void)
}
#endif /* CONFIG_BLK_DEV_INITRD */
-#if defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC)
+#if defined(CONFIG_ALL_PPC)
/* mark the RTAS pages as reserved */
if ( rtas_data )
for (addr = rtas_data; addr < PAGE_ALIGN(rtas_data+rtas_size) ;
addr += PAGE_SIZE)
SetPageReserved(mem_map + MAP_NR(addr));
-#endif /* defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC) */
+#endif /* defined(CONFIG_ALL_PPC) */
if ( sysmap_size )
for (addr = (unsigned long)sysmap;
addr < PAGE_ALIGN((unsigned long)sysmap+sysmap_size) ;
@@ -1178,13 +1174,14 @@ void __init mem_init(void)
printk("Memory: %luk available (%dk kernel code, %dk data, %dk init) [%08x,%08lx]\n",
(unsigned long)nr_free_pages()<< (PAGE_SHIFT-10),
- codepages, datapages, initpages,
+ codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10),
+ initpages<< (PAGE_SHIFT-10),
PAGE_OFFSET, (unsigned long) end_of_DRAM);
mem_init_done = 1;
}
#if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
-#if defined(CONFIG_PMAC) || defined(CONFIG_CHRP) || defined(CONFIG_ALL_PPC)
+#if defined(CONFIG_ALL_PPC)
/*
* On systems with Open Firmware, collect information about
* physical RAM and which pieces are already in use.
@@ -1195,9 +1192,13 @@ void __init mem_init(void)
unsigned long __init *pmac_find_end_of_memory(void)
{
unsigned long a, total;
-
- /* max amount of RAM we allow -- Cort */
-#define RAM_LIMIT (768<<20)
+ unsigned long ram_limit = 0xf0000000 - KERNELBASE;
+ /* allow 0x08000000 for IO space */
+ if ( _machine & (_MACH_prep|_MACH_Pmac) )
+ ram_limit = 0xd8000000 - KERNELBASE;
+#ifdef CONFIG_PPC64
+ ram_limit = 64<<20;
+#endif
memory_node = find_devices("memory");
if (memory_node == NULL) {
@@ -1222,16 +1223,8 @@ unsigned long __init *pmac_find_end_of_memory(void)
a = phys_mem.regions[0].address;
if (a != 0)
panic("RAM doesn't start at physical address 0");
- /*
- * XXX:
- * Make sure ram mappings don't stomp on IO space
- * This is a temporary hack to keep this from happening
- * until we move the KERNELBASE and can allocate RAM up
- * to our nearest IO area.
- * -- Cort
- */
- if (__max_memory == 0 || __max_memory > RAM_LIMIT)
- __max_memory = RAM_LIMIT;
+ if (__max_memory == 0 || __max_memory > ram_limit)
+ __max_memory = ram_limit;
if (phys_mem.regions[0].size >= __max_memory) {
phys_mem.regions[0].size = __max_memory;
phys_mem.n_regions = 1;
@@ -1247,12 +1240,11 @@ unsigned long __init *pmac_find_end_of_memory(void)
set_phys_avail(&phys_mem);
-#undef RAM_LIMIT
return __va(total);
}
-#endif /* CONFIG_PMAC || CONFIG_CHRP || CONFIG_ALL_PPC */
+#endif /* CONFIG_ALL_PPC */
-#if defined(CONFIG_PREP) || defined(CONFIG_ALL_PPC)
+#if defined(CONFIG_ALL_PPC)
/*
* This finds the amount of physical ram and does necessary
* setup for prep. This is pretty architecture specific so
@@ -1279,7 +1271,7 @@ unsigned long __init *prep_find_end_of_memory(void)
return (__va(total));
}
-#endif /* defined(CONFIG_PREP) || defined(CONFIG_ALL_PPC) */
+#endif /* defined(CONFIG_ALL_PPC) */
#if defined(CONFIG_GEMINI)
@@ -1389,16 +1381,12 @@ static void __init hash_init(void)
* up to a maximum of 2MB.
*/
ramsize = (ulong)end_of_DRAM - KERNELBASE;
-#ifdef CONFIG_PPC64
- Hash_mask = 0;
- for (h = 256<<10; h < ramsize / 256 && h < 4<<20; h *= 2, Hash_mask++)
- ;
- Hash_size = h;
- Hash_mask <<= 10; /* so setting _SDR1 works the same -- Cort */
-#else
for (h = 64<<10; h < ramsize / 256 && h < 2<<20; h *= 2)
;
Hash_size = h;
+#ifdef CONFIG_PPC64
+ Hash_mask = (h >> 7) - 1;
+#else
Hash_mask = (h >> 6) - 1;
#endif
@@ -1433,7 +1421,11 @@ static void __init hash_init(void)
/*
* Patch up the instructions in head.S:hash_page
*/
+#ifdef CONFIG_PPC64
+ Hash_bits = ffz(~Hash_size) - 7;
+#else
Hash_bits = ffz(~Hash_size) - 6;
+#endif
hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
| (__pa(Hash) >> 16);
hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0)
@@ -1443,9 +1435,17 @@ static void __init hash_init(void)
hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0)
| ((26 - Hash_bits) << 6);
hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff)
+#ifdef CONFIG_PPC64
+ | (Hash_mask >> 11);
+#else
| (Hash_mask >> 10);
+#endif
hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff)
+#ifdef CONFIG_PPC64
+ | (Hash_mask >> 11);
+#else
| (Hash_mask >> 10);
+#endif
#if 0 /* see hash_page in head.S, note also patch_C ref below */
hash_page_patch_D[0] = (hash_page_patch_D[0] & ~0xffff)
| (Hash_mask >> 10);
diff --git a/arch/ppc/mm/mem_pieces.c b/arch/ppc/mm/mem_pieces.c
index e695d5a0a..309a526f5 100644
--- a/arch/ppc/mm/mem_pieces.c
+++ b/arch/ppc/mm/mem_pieces.c
@@ -127,7 +127,7 @@ mem_pieces_print(struct mem_pieces *mp)
printk("\n");
}
-#if defined(CONFIG_PREP) || defined(CONFIG_APUS) || defined(CONFIG_ALL_PPC)
+#if defined(CONFIG_APUS) || defined(CONFIG_ALL_PPC)
/*
* Add some memory to an array of pieces
*/
diff --git a/arch/ppc/xmon/xmon.c b/arch/ppc/xmon/xmon.c
index a0da2f1b4..d18d74dfd 100644
--- a/arch/ppc/xmon/xmon.c
+++ b/arch/ppc/xmon/xmon.c
@@ -75,11 +75,14 @@ static void take_input(char *);
static unsigned read_spr(int);
static void write_spr(int, unsigned);
static void super_regs(void);
+static void print_sysmap(void);
static void remove_bpts(void);
static void insert_bpts(void);
static struct bpt *at_breakpoint(unsigned pc);
static void bpt_cmds(void);
static void cacheflush(void);
+static char *pretty_lookup_name(unsigned long addr);
+static char *lookup_name(unsigned long addr);
extern int print_insn_big_powerpc(FILE *, unsigned long, unsigned);
extern void printf(const char *fmt, ...);
@@ -101,6 +104,7 @@ Commands:\n\
mm move a block of memory\n\
ms set a block of memory\n\
md compare two blocks of memory\n\
+ M print System.map\n\
r print registers\n\
S print special registers\n\
t print backtrace\n\
@@ -337,6 +341,8 @@ cmds(struct pt_regs *excp)
else
excprint(excp);
break;
+ case 'M':
+ print_sysmap();
case 'S':
super_regs();
break;
@@ -514,8 +520,10 @@ getsp()
void
excprint(struct pt_regs *fp)
{
- printf("vector: %x at pc = %x, msr = %x, sp = %x [%x]\n",
- fp->trap, fp->nip, fp->msr, fp->gpr[1], fp);
+ printf("vector: %x at pc = %x %s",
+ fp->trap, fp->nip,/* pretty_lookup_name(fp->nip)*/"");
+ printf(", msr = %x, sp = %x [%x]\n",
+ fp->msr, fp->gpr[1], fp);
if (fp->trap == 0x300 || fp->trap == 0x600)
printf("dar = %x, dsisr = %x\n", fp->dar, fp->dsisr);
if (current)
@@ -597,6 +605,14 @@ extern char exc_prolog;
extern char dec_exc;
void
+print_sysmap(void)
+{
+ extern char *sysmap;
+ if ( sysmap )
+ printf("System.map: \n%s", sysmap);
+}
+
+void
super_regs()
{
int i, cmd;
@@ -1345,9 +1361,26 @@ char *str;
lineptr = str;
}
+/*
+ * We use this array a lot here. We assume we don't have multiple
+ * instances of xmon running and that we don't use the return value of
+ * any functions other than printing them.
+ * -- Cort
+ */
char last[64];
-char *
-lookup_addr(unsigned long addr)
+static char *pretty_lookup_name(unsigned long addr)
+{
+ if ( lookup_name(addr) )
+ {
+ sprintf(last, " (%s)", lookup_name(addr));
+ return last;
+ }
+ else
+ return NULL;
+}
+
+
+static char *lookup_name(unsigned long addr)
{
extern char *sysmap;
extern unsigned long sysmap_size;
@@ -1357,10 +1390,6 @@ lookup_addr(unsigned long addr)
if ( !sysmap || !sysmap_size )
return NULL;
- /* adjust if addr is relative to kernelbase */
- if ( addr < PAGE_OFFSET )
- addr += PAGE_OFFSET;
-
cmp = simple_strtoul(c, &c, 8);
strcpy( last, strsep( &c, "\n"));
while ( c < (sysmap+sysmap_size) )
@@ -1372,3 +1401,4 @@ lookup_addr(unsigned long addr)
}
return last;
}
+
diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile
index b0f7f63ea..a7a562549 100644
--- a/arch/sparc/boot/Makefile
+++ b/arch/sparc/boot/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile,v 1.9 1998/10/26 20:01:03 davem Exp $
+# $Id: Makefile,v 1.10 2000/02/23 08:17:46 jj Exp $
# Makefile for the Sparc boot stuff.
#
# Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -22,16 +22,20 @@ btfixupprep: btfixupprep.c
clean:
rm -f btfixupprep piggyback tftpboot.img btfix.o btfix.s
-BTOBJS := $(HEAD) init/main.o init/version.o \
- $(CORE_FILES_NO_BTFIX) $(FILESYSTEMS) \
- $(NETWORKS) $(DRIVERS)
+BTOBJS := $(HEAD) init/main.o init/version.o
+BTLIBS := $(CORE_FILES_NO_BTFIX) $(FILESYSTEMS) \
+ $(DRIVERS) $(NETWORKS)
# I wanted to make this depend upon BTOBJS so that a parallel
# build would work, but this fails because $(HEAD) cannot work
# properly as it will cause head.o to be built with the implicit
# rules not the ones in kernel/Makefile. Someone please fix. --DaveM
vmlinux.o: dummy
- $(LD) -r $(patsubst %,$(TOPDIR)/%,$(BTOBJS)) $(LIBS) -o vmlinux.o
+ $(LD) -r $(patsubst %,$(TOPDIR)/%,$(BTOBJS)) \
+ --start-group \
+ $(patsubst %,$(TOPDIR)/%,$(BTLIBS)) \
+ $(LIBS) \
+ --end-group -o vmlinux.o
btfix.s: btfixupprep vmlinux.o
$(OBJDUMP) -x vmlinux.o | ./btfixupprep > btfix.s
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 8c8903d26..f0dbea065 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -695,42 +695,6 @@ _sparc_io_get_info(char *buf, char **start, off_t fpos, int length, int *eof,
return p-buf;
}
-static struct proc_dir_entry _sparc_iomap_proc_entry = {
- 0, /* Inode number - dynamic */
- 6, /* Length of the file name */
- "io_map", /* The file name */
- S_IFREG | S_IRUGO, /* File mode */
- 1, /* Number of links */
- 0, 0, /* The uid and gid for the file */
- 0, /* The size of the file reported by ls. */
- NULL, /* struct inode_operations * ops */
- NULL, /* get_info: backward compatibility */
- NULL, /* owner */
- NULL, NULL, NULL, /* linkage */
- &sparc_iomap,
- _sparc_io_get_info, /* The read function for this file */
- NULL,
- /* and more stuff */
-};
-
-static struct proc_dir_entry _sparc_dvma_proc_entry = {
- 0, /* Inode number - dynamic */
- 8, /* Length of the file name */
- "dvma_map", /* The file name */
- S_IFREG | S_IRUGO, /* File mode */
- 1, /* Number of links */
- 0, 0, /* The uid and gid for the file */
- 0, /* The size of the file reported by ls. */
- NULL, /* struct inode_operations * ops */
- NULL, /* get_info: backward compatibility */
- NULL, /* owner */
- NULL, NULL, NULL, /* linkage */
- &_sparc_dvma,
- _sparc_io_get_info,
- NULL,
- /* some more stuff */
-};
-
#endif CONFIG_PROC_FS
/*
@@ -782,7 +746,7 @@ void ioport_init(void)
};
#ifdef CONFIG_PROC_FS
- proc_register(&proc_root, &_sparc_iomap_proc_entry);
- proc_register(&proc_root, &_sparc_dvma_proc_entry);
+ create_proc_read_entry("io_map",0,0,_sparc_io_get_info,&sparc_iomap);
+ create_proc_read_entry("dvma_map",0,0,_sparc_io_get_info,&_sparc_dvma);
#endif
}
diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c
index 471929a01..07aefa660 100644
--- a/arch/sparc/kernel/irq.c
+++ b/arch/sparc/kernel/irq.c
@@ -1,4 +1,4 @@
-/* $Id: irq.c,v 1.101 2000/02/09 11:15:03 davem Exp $
+/* $Id: irq.c,v 1.102 2000/02/25 05:44:35 davem Exp $
* arch/sparc/kernel/irq.c: Interrupt request handling routines. On the
* Sparc the IRQ's are basically 'cast in stone'
* and you are supposed to probe the prom's device
@@ -713,3 +713,8 @@ void __init init_IRQ(void)
}
btfixup();
}
+
+void init_irq_proc(void)
+{
+ /* For now, nothing... */
+}
diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c
index 50d682929..d4ac34932 100644
--- a/arch/sparc/kernel/setup.c
+++ b/arch/sparc/kernel/setup.c
@@ -1,4 +1,4 @@
-/* $Id: setup.c,v 1.114 2000/01/29 01:08:57 anton Exp $
+/* $Id: setup.c,v 1.115 2000/02/26 04:24:31 davem Exp $
* linux/arch/sparc/kernel/setup.c
*
* Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -294,6 +294,8 @@ static struct console prom_console = {
"PROM", prom_cons_write, 0, 0, 0, 0, 0, CON_PRINTBUFFER, 0, 0, 0
};
+extern void paging_init(void);
+
void __init setup_arch(char **cmdline_p)
{
int i;
@@ -478,6 +480,8 @@ void __init setup_arch(char **cmdline_p)
if (serial_console)
conswitchp = NULL;
+
+ paging_init();
}
asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on)
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c
index cdc1f0751..d4585d9d5 100644
--- a/arch/sparc/kernel/sparc_ksyms.c
+++ b/arch/sparc/kernel/sparc_ksyms.c
@@ -1,4 +1,4 @@
-/* $Id: sparc_ksyms.c,v 1.91 2000/02/18 20:23:24 davem Exp $
+/* $Id: sparc_ksyms.c,v 1.93 2000/02/26 11:02:45 anton Exp $
* arch/sparc/kernel/ksyms.c: Sparc specific ksyms support.
*
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
@@ -92,7 +92,6 @@ __attribute__((section("__ksymtab"))) = \
/* used by various drivers */
EXPORT_SYMBOL(sparc_cpu_model);
-EXPORT_SYMBOL_PRIVATE(_spinlock_waitfor);
EXPORT_SYMBOL(kernel_thread);
#ifdef SPIN_LOCK_DEBUG
EXPORT_SYMBOL(_do_spin_lock);
@@ -246,7 +245,6 @@ EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strtok);
EXPORT_SYMBOL(strstr);
-EXPORT_SYMBOL(strspn);
/* Special internal versions of library functions. */
EXPORT_SYMBOL(__copy_1page);
diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S
index 102541b18..e199f3813 100644
--- a/arch/sparc/lib/locks.S
+++ b/arch/sparc/lib/locks.S
@@ -1,4 +1,4 @@
-/* $Id: locks.S,v 1.15 1998/10/14 09:18:55 jj Exp $
+/* $Id: locks.S,v 1.16 2000/02/26 11:02:47 anton Exp $
* locks.S: SMP low-level lock primitives on Sparc.
*
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
@@ -15,25 +15,6 @@
.text
.align 4
- /* This is called when the initial acquisition attempt of a spin
- * lock fails. The calling convention is weird, return address
- * is in %o7 as usual but we agree with the caller to only touch
- * and use %g2 as a temporary. We are passed a ptr to the lock
- * itself in %g1, %g4 must be restored into %o7 when we return,
- * and the caller wants us to return to him at three instructions
- * previous to the call instruction which got us here. See how
- * this is used in asm/spinlock.h if what I just said confuses
- * you to no end.
- */
- .globl ___spinlock_waitfor
-___spinlock_waitfor:
-1: orcc %g2, 0x0, %g0
- bne,a 1b
- ldub [%g1], %g2
- ldstub [%g1], %g2
- jmpl %o7 - 12, %g0
- mov %g4, %o7
-
/* Read/writer locks, as usual this is overly clever to make it
* as fast as possible.
*/
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c
index 6736dc9d3..3ac49a10b 100644
--- a/arch/sparc/mm/init.c
+++ b/arch/sparc/mm/init.c
@@ -1,4 +1,4 @@
-/* $Id: init.c,v 1.80 2000/02/09 21:11:06 davem Exp $
+/* $Id: init.c,v 1.81 2000/02/26 11:59:31 anton Exp $
* linux/arch/sparc/mm/init.c
*
* Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -40,7 +40,7 @@ unsigned long phys_base;
struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
unsigned long sparc_unmapped_base;
-struct pgtable_cache_struct pgt_quicklists;
+struct pgtable_cache_struct pgt_quicklists = { 0, 0, 0, 0, SPIN_LOCK_UNLOCKED, SPIN_LOCK_UNLOCKED };
/* References to section boundaries */
extern char __init_begin, __init_end, _start, end, etext , edata;
diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig
index 917bb5e74..12e9432a7 100644
--- a/arch/sparc64/defconfig
+++ b/arch/sparc64/defconfig
@@ -273,9 +273,9 @@ CONFIG_SUNBMAC=m
CONFIG_SUNQE=m
CONFIG_DE4X5=m
CONFIG_VORTEX=m
-CONFIG_RTL8139=m
+CONFIG_8139TOO=m
CONFIG_NE2K_PCI=m
-CONFIG_EEXPRESS_PRO100=m
+CONFIG_EEPRO100=m
CONFIG_ADAPTEC_STARFIRE=m
#
@@ -299,7 +299,7 @@ CONFIG_VIDEO_DEV=y
# CONFIG_VIDEO_BT848 is not set
#
-# Filesystems
+# File systems
#
# CONFIG_QUOTA is not set
CONFIG_AUTOFS_FS=m
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index cb659b655..ed9e49685 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -1,4 +1,4 @@
-/* $Id: irq.c,v 1.83 2000/02/11 06:57:17 jj Exp $
+/* $Id: irq.c,v 1.84 2000/02/25 05:44:41 davem Exp $
* irq.c: UltraSparc IRQ handling/init/registry.
*
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
@@ -1156,3 +1156,8 @@ void __init init_IRQ(void)
: "i" (PSTATE_IE)
: "g1");
}
+
+void init_irq_proc(void)
+{
+ /* For now, nothing... */
+}
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 0f280f818..ed2e8bd81 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -1,4 +1,4 @@
-/* $Id: setup.c,v 1.50 1999/12/01 10:44:45 davem Exp $
+/* $Id: setup.c,v 1.51 2000/02/26 04:24:32 davem Exp $
* linux/arch/sparc64/kernel/setup.c
*
* Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu)
@@ -451,6 +451,8 @@ void register_prom_callbacks(void)
"' linux-.soft2 to .soft2");
}
+extern void paging_init(void);
+
void __init setup_arch(char **cmdline_p)
{
extern int serial_console; /* in console.c, of course */
@@ -587,6 +589,8 @@ void __init setup_arch(char **cmdline_p)
#endif
if (serial_console)
conswitchp = NULL;
+
+ paging_init();
}
asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int on)
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index f226a8ae5..8df2116e7 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -1,4 +1,4 @@
-/* $Id: signal32.c,v 1.59 2000/01/21 11:38:52 jj Exp $
+/* $Id: signal32.c,v 1.60 2000/02/25 06:02:37 jj Exp $
* arch/sparc64/kernel/signal32.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
@@ -126,6 +126,8 @@ int copy_siginfo_to_user32(siginfo_t32 *to, siginfo_t *from)
err |= __put_user(from->si_pid, &to->si_pid);
err |= __put_user(from->si_uid, &to->si_uid);
break;
+ case SIGURG:
+ case SIGIO:
case SIGSEGV:
case SIGILL:
case SIGFPE:
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index ff3843651..f798358ce 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -1,4 +1,4 @@
-/* $Id: sparc64_ksyms.c,v 1.74 2000/02/09 11:15:07 davem Exp $
+/* $Id: sparc64_ksyms.c,v 1.75 2000/02/21 15:50:08 davem Exp $
* arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support.
*
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
@@ -261,7 +261,6 @@ EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strtok);
EXPORT_SYMBOL(strstr);
-EXPORT_SYMBOL(strspn);
#ifdef CONFIG_SOLARIS_EMUL_MODULE
EXPORT_SYMBOL(getname32);
diff --git a/arch/sparc64/lib/VIScsum.S b/arch/sparc64/lib/VIScsum.S
index aad5d941a..9f77c8cb4 100644
--- a/arch/sparc64/lib/VIScsum.S
+++ b/arch/sparc64/lib/VIScsum.S
@@ -1,14 +1,15 @@
-/* $Id: VIScsum.S,v 1.5 1999/07/30 09:35:36 davem Exp $
+/* $Id: VIScsum.S,v 1.6 2000/02/20 23:21:39 davem Exp $
* VIScsum.S: High bandwidth IP checksumming utilizing the UltraSparc
* Visual Instruction Set.
*
* Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 2000 David S. Miller (davem@redhat.com)
*
* Based on older sparc32/sparc64 checksum.S, which is:
*
* Copyright(C) 1995 Linus Torvalds
* Copyright(C) 1995 Miguel de Icaza
- * Copyright(C) 1996,1997 David S. Miller
+ * Copyright(C) 1996, 1997 David S. Miller
* derived from:
* Linux/Alpha checksum c-code
* Linux/ix86 inline checksum assembly
@@ -38,290 +39,290 @@
* tricks are UltraLinux trade secrets :))
*/
-#define START_THE_TRICK(fz,f0,f2,f4,f6,f8,f10) \
- fcmpgt32 %fz, %f0, %g1 /* FPM Group */; \
- fcmpgt32 %fz, %f2, %g2 /* FPM Group */; \
- fcmpgt32 %fz, %f4, %g3 /* FPM Group */; \
- fcmpgt32 %fz, %f6, %g5 /* FPM Group */; \
- inc %g1 /* IEU0 */; \
- fcmpgt32 %fz, %f8, %g7 /* FPM Group */; \
- srl %g1, 1, %g1 /* IEU0 */; \
- inc %g2 /* IEU1 */; \
- fcmpgt32 %fz, %f10, %o3 /* FPM Group */; \
- srl %g2, 1, %g2 /* IEU0 */; \
- add %o2, %g1, %o2 /* IEU1 */; \
- add %g3, 1, %g3 /* IEU0 Group */; \
- srl %g3, 1, %g3 /* IEU0 Group */; \
- add %o2, %g2, %o2 /* IEU1 */; \
- inc %g5 /* IEU0 Group */; \
- add %o2, %g3, %o2 /* IEU1 */;
+#define START_THE_TRICK(fz,f0,f2,f4,f6,f8,f10) \
+ fcmpgt32 %fz, %f0, %g1 /* FPM Group */; \
+ fcmpgt32 %fz, %f2, %g2 /* FPM Group */; \
+ fcmpgt32 %fz, %f4, %g3 /* FPM Group */; \
+ inc %g1 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f6, %g5 /* FPM */; \
+ srl %g1, 1, %g1 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f8, %g7 /* FPM */; \
+ inc %g2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f10, %o3 /* FPM */; \
+ srl %g2, 1, %g2 /* IEU0 Group */; \
+ inc %g3 /* IEU1 */; \
+ srl %g3, 1, %g3 /* IEU0 Group */; \
+ add %o2, %g1, %o2 /* IEU1 */; \
+ add %o2, %g2, %o2 /* IEU0 Group */; \
+ inc %g5 /* IEU1 */; \
+ add %o2, %g3, %o2 /* IEU0 Group */;
-#define DO_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14) \
- fcmpgt32 %O12, %f12, %o4 /* FPM Group */; \
- srl %g5, 1, %g5 /* IEU0 */; \
- inc %g7 /* IEU1 */; \
- fpadd32 %F0, %f0, %F0 /* FPA */; \
- fcmpgt32 %O14, %f14, %o5 /* FPM Group */; \
- srl %g7, 1, %g7 /* IEU0 */; \
- add %o2, %g5, %o2 /* IEU1 */; \
- fpadd32 %F2, %f2, %F2 /* FPA */; \
- inc %o3 /* IEU0 Group */; \
- add %o2, %g7, %o2 /* IEU1 */; \
- fcmpgt32 %f0, %F0, %g1 /* FPM Group */; \
- srl %o3, 1, %o3 /* IEU0 */; \
- inc %o4 /* IEU1 */; \
- fpadd32 %F4, %f4, %F4 /* FPA */; \
- fcmpgt32 %f2, %F2, %g2 /* FPM Group */; \
- srl %o4, 1, %o4 /* IEU0 */; \
- add %o2, %o3, %o2 /* IEU1 */; \
- fpadd32 %F6, %f6, %F6 /* FPA */; \
- inc %o5 /* IEU0 Group */; \
- add %o2, %o4, %o2 /* IEU1 */; \
- fcmpgt32 %f4, %F4, %g3 /* FPM Group */; \
- srl %o5, 1, %o5 /* IEU0 */; \
- inc %g1 /* IEU1 */; \
- fpadd32 %F8, %f8, %F8 /* FPA */; \
- fcmpgt32 %f6, %F6, %g5 /* FPM Group */; \
- srl %g1, 1, %g1 /* IEU0 */; \
- add %o2, %o5, %o2 /* IEU1 */; \
- fpadd32 %F10, %f10, %F10 /* FPA */; \
- inc %g2 /* IEU0 Group */; \
- add %o2, %g1, %o2 /* IEU1 */; \
- fcmpgt32 %f8, %F8, %g7 /* FPM Group */; \
- srl %g2, 1, %g2 /* IEU0 */; \
- inc %g3 /* IEU1 */; \
- fpadd32 %F12, %f12, %F12 /* FPA */; \
- fcmpgt32 %f10, %F10, %o3 /* FPM Group */; \
- srl %g3, 1, %g3 /* IEU0 */; \
- add %o2, %g2, %o2 /* IEU1 */; \
- fpadd32 %F14, %f14, %F14 /* FPA */; \
- inc %g5 /* IEU0 Group */; \
- add %o2, %g3, %o2 /* IEU1 */;
+#define DO_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14) \
+ srl %g5, 1, %g5 /* IEU0 Group */; \
+ fpadd32 %F0, %f0, %F0 /* FPA */; \
+ fcmpgt32 %O12, %f12, %o4 /* FPM */; \
+ inc %g7 /* IEU0 Group */; \
+ fpadd32 %F2, %f2, %F2 /* FPA */; \
+ fcmpgt32 %O14, %f14, %o5 /* FPM */; \
+ add %o2, %g5, %o2 /* IEU1 Group */; \
+ fpadd32 %F4, %f4, %F4 /* FPA */; \
+ fcmpgt32 %f0, %F0, %g1 /* FPM */; \
+ srl %g7, 1, %g7 /* IEU0 Group */; \
+ fpadd32 %F6, %f6, %F6 /* FPA */; \
+ fcmpgt32 %f2, %F2, %g2 /* FPM */; \
+ add %o2, %g7, %o2 /* IEU0 Group */; \
+ fpadd32 %F8, %f8, %F8 /* FPA */; \
+ fcmpgt32 %f4, %F4, %g3 /* FPM */; \
+ inc %o3 /* IEU0 Group */; \
+ fpadd32 %F10, %f10, %F10 /* FPA */; \
+ fcmpgt32 %f6, %F6, %g5 /* FPM */; \
+ srl %o3, 1, %o3 /* IEU0 Group */; \
+ fpadd32 %F12, %f12, %F12 /* FPA */; \
+ fcmpgt32 %f8, %F8, %g7 /* FPM */; \
+ add %o2, %o3, %o2 /* IEU0 Group */; \
+ fpadd32 %F14, %f14, %F14 /* FPA */; \
+ fcmpgt32 %f10, %F10, %o3 /* FPM */; \
+ inc %o4 /* IEU0 Group */; \
+ inc %o5 /* IEU1 */; \
+ srl %o4, 1, %o4 /* IEU0 Group */; \
+ inc %g1 /* IEU1 */; \
+ srl %o5, 1, %o5 /* IEU0 Group */; \
+ add %o2, %o4, %o2 /* IEU1 */; \
+ srl %g1, 1, %g1 /* IEU0 Group */; \
+ add %o2, %o5, %o2 /* IEU1 */; \
+ inc %g2 /* IEU0 Group */; \
+ add %o2, %g1, %o2 /* IEU1 */; \
+ srl %g2, 1, %g2 /* IEU0 Group */; \
+ inc %g3 /* IEU1 */; \
+ srl %g3, 1, %g3 /* IEU0 Group */; \
+ add %o2, %g2, %o2 /* IEU1 */; \
+ inc %g5 /* IEU0 Group */; \
+ add %o2, %g3, %o2 /* IEU0 */;
-#define END_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,S0,S1,S2,S3,T0,T1,U0,fz) \
- fcmpgt32 %O12, %f12, %o4 /* FPM Group */; \
- srl %g5, 1, %g5 /* IEU0 */; \
- inc %g7 /* IEU1 */; \
- fpadd32 %f2, %f0, %S0 /* FPA */; \
- fcmpgt32 %O14, %f14, %o5 /* FPM Group */; \
- srl %g7, 1, %g7 /* IEU0 */; \
- add %o2, %g5, %o2 /* IEU1 */; \
- fpadd32 %f6, %f4, %S1 /* FPA */; \
- inc %o3 /* IEU0 Group */; \
- add %o2, %g7, %o2 /* IEU1 */; \
- fcmpgt32 %f0, %S0, %g1 /* FPM Group */; \
- srl %o3, 1, %o3 /* IEU0 */; \
- inc %o4 /* IEU1 */; \
- fpadd32 %f10, %f8, %S2 /* FPA */; \
- fcmpgt32 %f4, %S1, %g2 /* FPM Group */; \
- srl %o4, 1, %o4 /* IEU0 */; \
- add %o2, %o3, %o2 /* IEU1 */; \
- fpadd32 %f14, %f12, %S3 /* FPA */; \
- inc %o5 /* IEU0 Group */; \
- add %o2, %o4, %o2 /* IEU1 */; \
- fzero %fz /* FPA */; \
- fcmpgt32 %f8, %S2, %g3 /* FPM Group */; \
- srl %o5, 1, %o5 /* IEU0 */; \
- inc %g1 /* IEU1 */; \
- fpadd32 %S0, %S1, %T0 /* FPA */; \
- fcmpgt32 %f12, %S3, %g5 /* FPM Group */; \
- srl %g1, 1, %g1 /* IEU0 */; \
- add %o2, %o5, %o2 /* IEU1 */; \
- fpadd32 %S2, %S3, %T1 /* FPA */; \
- inc %g2 /* IEU0 Group */; \
- add %o2, %g1, %o2 /* IEU1 */; \
- fcmpgt32 %S0, %T0, %g7 /* FPM Group */; \
- srl %g2, 1, %g2 /* IEU0 */; \
- inc %g3 /* IEU1 */; \
- fcmpgt32 %S2, %T1, %o3 /* FPM Group */; \
- srl %g3, 1, %g3 /* IEU0 */; \
- add %o2, %g2, %o2 /* IEU1 */; \
- inc %g5 /* IEU0 Group */; \
- add %o2, %g3, %o2 /* IEU1 */; \
- fcmpgt32 %fz, %f2, %o4 /* FPM Group */; \
- srl %g5, 1, %g5 /* IEU0 */; \
- inc %g7 /* IEU1 */; \
- fpadd32 %T0, %T1, %U0 /* FPA */; \
- fcmpgt32 %fz, %f6, %o5 /* FPM Group */; \
- srl %g7, 1, %g7 /* IEU0 */; \
- add %o2, %g5, %o2 /* IEU1 */; \
- inc %o3 /* IEU0 Group */; \
- add %o2, %g7, %o2 /* IEU1 */; \
- fcmpgt32 %fz, %f10, %g1 /* FPM Group */; \
- srl %o3, 1, %o3 /* IEU0 */; \
- inc %o4 /* IEU1 */; \
- fcmpgt32 %fz, %f14, %g2 /* FPM Group */; \
- srl %o4, 1, %o4 /* IEU0 */; \
- add %o2, %o3, %o2 /* IEU1 */; \
- std %U0, [%sp + STACKOFF] /* Store Group */; \
- inc %o5 /* IEU0 */; \
- sub %o2, %o4, %o2 /* IEU1 */; \
- fcmpgt32 %fz, %S1, %g3 /* FPM Group */; \
- srl %o5, 1, %o5 /* IEU0 */; \
- inc %g1 /* IEU1 */; \
- fcmpgt32 %fz, %S3, %g5 /* FPM Group */; \
- srl %g1, 1, %g1 /* IEU0 */; \
- sub %o2, %o5, %o2 /* IEU1 */; \
- ldx [%sp + STACKOFF], %o5 /* Load Group */; \
- inc %g2 /* IEU0 */; \
- sub %o2, %g1, %o2 /* IEU1 */; \
- fcmpgt32 %fz, %T1, %g7 /* FPM Group */; \
- srl %g2, 1, %g2 /* IEU0 */; \
- inc %g3 /* IEU1 */; \
- fcmpgt32 %T0, %U0, %o3 /* FPM Group */; \
- srl %g3, 1, %g3 /* IEU0 */; \
- sub %o2, %g2, %o2 /* IEU1 */; \
- inc %g5 /* IEU0 Group */; \
- sub %o2, %g3, %o2 /* IEU1 */; \
- fcmpgt32 %fz, %U0, %o4 /* FPM Group */; \
- srl %g5, 1, %g5 /* IEU0 */; \
- inc %g7 /* IEU1 */; \
- srl %g7, 1, %g7 /* IEU0 Group */; \
- sub %o2, %g5, %o2 /* IEU1 */; \
- inc %o3 /* IEU0 Group */; \
- sub %o2, %g7, %o2 /* IEU1 */; \
- srl %o3, 1, %o3 /* IEU0 Group */; \
- inc %o4 /* IEU1 */; \
- srl %o4, 1, %o4 /* IEU0 Group */; \
- add %o2, %o3, %o2 /* IEU1 */; \
- sub %o2, %o4, %o2 /* IEU0 Group */; \
- addcc %o2, %o5, %o2 /* IEU1 Group */; \
- bcs,a,pn %xcc, 33f /* CTI */; \
- add %o2, 1, %o2 /* IEU0 */; \
-33: /* That's it */;
+#define END_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,S0,S1,S2,S3,T0,T1,U0,fz) \
+ srl %g5, 1, %g5 /* IEU0 Group */; \
+ fpadd32 %f2, %f0, %S0 /* FPA */; \
+ fcmpgt32 %O12, %f12, %o4 /* FPM */; \
+ inc %g7 /* IEU0 Group */; \
+ fpadd32 %f6, %f4, %S1 /* FPA */; \
+ fcmpgt32 %O14, %f14, %o5 /* FPM */; \
+ srl %g7, 1, %g7 /* IEU0 Group */; \
+ fpadd32 %f10, %f8, %S2 /* FPA */; \
+ fcmpgt32 %f0, %S0, %g1 /* FPM */; \
+ inc %o3 /* IEU0 Group */; \
+ fpadd32 %f14, %f12, %S3 /* FPA */; \
+ fcmpgt32 %f4, %S1, %g2 /* FPM */; \
+ add %o2, %g5, %o2 /* IEU0 Group */; \
+ fpadd32 %S0, %S1, %T0 /* FPA */; \
+ fcmpgt32 %f8, %S2, %g3 /* FPM */; \
+ add %o2, %g7, %o2 /* IEU0 Group */; \
+ fzero %fz /* FPA */; \
+ fcmpgt32 %f12, %S3, %g5 /* FPM */; \
+ srl %o3, 1, %o3 /* IEU0 Group */; \
+ fpadd32 %S2, %S3, %T1 /* FPA */; \
+ fcmpgt32 %S0, %T0, %g7 /* FPM */; \
+ add %o2, %o3, %o2 /* IEU0 Group */; \
+ fpadd32 %T0, %T1, %U0 /* FPA */; \
+ fcmpgt32 %S2, %T1, %o3 /* FPM */; \
+ inc %o4 /* IEU0 Group */; \
+ inc %o5 /* IEU1 */; \
+ srl %o4, 1, %o4 /* IEU0 Group */; \
+ inc %g1 /* IEU1 */; \
+ add %o2, %o4, %o2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f2, %o4 /* FPM */; \
+ srl %o5, 1, %o5 /* IEU0 Group */; \
+ inc %g2 /* IEU1 */; \
+ add %o2, %o5, %o2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f6, %o5 /* FPM */; \
+ srl %g1, 1, %g1 /* IEU0 Group */; \
+ inc %g3 /* IEU1 */; \
+ add %o2, %g1, %o2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f10, %g1 /* FPM */; \
+ srl %g2, 1, %g2 /* IEU0 Group */; \
+ inc %g5 /* IEU1 */; \
+ add %o2, %g2, %o2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f14, %g2 /* FPM */; \
+ srl %g3, 1, %g3 /* IEU0 Group */; \
+ inc %g7 /* IEU1 */; \
+ add %o2, %g3, %o2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %S1, %g3 /* FPM */; \
+ srl %g5, 1, %g5 /* IEU0 Group */; \
+ inc %o3 /* IEU1 */; \
+ add %o2, %g5, %o2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %S3, %g5 /* FPM */; \
+ srl %g7, 1, %g7 /* IEU0 Group */; \
+ inc %o4 /* IEU1 */; \
+ add %o2, %g7, %o2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %T1, %g7 /* FPM */; \
+ srl %o3, 1, %o3 /* IEU0 Group */; \
+ inc %o5 /* IEU1 */; \
+ add %o2, %o3, %o2 /* IEU0 Group */; \
+ fcmpgt32 %T0, %U0, %o3 /* FPM */; \
+ srl %o4, 1, %o4 /* IEU0 Group */; \
+ inc %g1 /* IEU1 */; \
+ sub %o2, %o4, %o2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %U0, %o4 /* FPM */; \
+ srl %o5, 1, %o5 /* IEU0 Group */; \
+ inc %g2 /* IEU1 */; \
+ srl %g1, 1, %g1 /* IEU0 Group */; \
+ sub %o2, %o5, %o2 /* IEU1 */; \
+ std %U0, [%sp + STACKOFF] /* Store */; \
+ srl %g2, 1, %g2 /* IEU0 Group */; \
+ sub %o2, %g1, %o2 /* IEU1 */; \
+ inc %g3 /* IEU0 Group */; \
+ sub %o2, %g2, %o2 /* IEU1 */; \
+ srl %g3, 1, %g3 /* IEU0 Group */; \
+ inc %g5 /* IEU1 */; \
+ srl %g5, 1, %g5 /* IEU0 Group */; \
+ sub %o2, %g3, %o2 /* IEU1 */; \
+ ldx [%sp + STACKOFF], %o5 /* Load Group */; \
+ inc %g7 /* IEU0 */; \
+ sub %o2, %g5, %o2 /* IEU1 */; \
+ srl %g7, 1, %g7 /* IEU0 Group */; \
+ inc %o3 /* IEU1 */; \
+ srl %o3, 1, %o3 /* IEU0 Group */; \
+ sub %o2, %g7, %o2 /* IEU1 */; \
+ inc %o4 /* IEU0 Group */; \
+ add %o2, %o3, %o2 /* IEU1 */; \
+ srl %o4, 1, %o4 /* IEU0 Group */; \
+ sub %o2, %o4, %o2 /* IEU0 Group */; \
+ addcc %o2, %o5, %o2 /* IEU1 Group */; \
+ bcs,a,pn %xcc, 33f /* CTI */; \
+ add %o2, 1, %o2 /* IEU0 */; \
+33: /* That's it */;
-#define CSUM_LASTCHUNK(offset) \
- ldx [%o0 - offset - 0x10], %g2; \
- ldx [%o0 - offset - 0x08], %g3; \
- addcc %g2, %o2, %o2; \
- bcs,a,pn %xcc, 31f; \
- add %o2, 1, %o2; \
-31: addcc %g3, %o2, %o2; \
- bcs,a,pn %xcc, 32f; \
- add %o2, 1, %o2; \
+#define CSUM_LASTCHUNK(offset) \
+ ldx [%o0 - offset - 0x10], %g2; \
+ ldx [%o0 - offset - 0x08], %g3; \
+ addcc %g2, %o2, %o2; \
+ bcs,a,pn %xcc, 31f; \
+ add %o2, 1, %o2; \
+31: addcc %g3, %o2, %o2; \
+ bcs,a,pn %xcc, 32f; \
+ add %o2, 1, %o2; \
32:
.text
.globl csum_partial
.align 32
csum_partial:
- andcc %o0, 7, %g0 /* IEU1 Group */
- be,pt %icc, 4f /* CTI */
- andcc %o0, 0x38, %g3 /* IEU1 */
- mov 1, %g5 /* IEU0 Group */
- cmp %o1, 6 /* IEU1 */
- bl,pn %icc, 21f /* CTI */
- andcc %o0, 2, %g0 /* IEU1 Group */
- be,pt %icc, 1f /* CTI */
- and %o0, 4, %g7 /* IEU0 */
- lduh [%o0], %g2 /* Load */
- sub %o1, 2, %o1 /* IEU0 Group */
- add %o0, 2, %o0 /* IEU1 */
- andcc %o0, 4, %g7 /* IEU1 Group */
- sll %g5, 16, %g5 /* IEU0 */
- sll %g2, 16, %g2 /* IEU0 Group */
- addcc %g2, %o2, %o2 /* IEU1 Group (regdep) */
- bcs,a,pn %icc, 1f /* CTI */
- add %o2, %g5, %o2 /* IEU0 */
-1: ld [%o0], %g2 /* Load */
- brz,a,pn %g7, 4f /* CTI+IEU1 Group */
- and %o0, 0x38, %g3 /* IEU0 */
- add %o0, 4, %o0 /* IEU0 Group */
- sub %o1, 4, %o1 /* IEU1 */
- addcc %g2, %o2, %o2 /* IEU1 Group */
- bcs,a,pn %icc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-1: and %o0, 0x38, %g3 /* IEU1 Group */
-4: srl %o2, 0, %o2 /* IEU0 Group */
- mov 0x40, %g1 /* IEU1 */
- brz,pn %g3, 3f /* CTI+IEU1 Group */
- sub %g1, %g3, %g1 /* IEU0 */
- cmp %o1, 56 /* IEU1 Group */
- blu,pn %icc, 20f /* CTI */
- andcc %o0, 8, %g0 /* IEU1 Group */
- be,pn %icc, 1f /* CTI */
- ldx [%o0], %g2 /* Load */
- add %o0, 8, %o0 /* IEU0 Group */
- sub %o1, 8, %o1 /* IEU1 */
- addcc %g2, %o2, %o2 /* IEU1 Group */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-1: andcc %g1, 0x10, %g0 /* IEU1 Group */
- be,pn %icc, 2f /* CTI */
- and %g1, 0x20, %g1 /* IEU0 */
- ldx [%o0], %g2 /* Load */
- ldx [%o0+8], %g3 /* Load Group */
- add %o0, 16, %o0 /* IEU0 */
- sub %o1, 16, %o1 /* IEU1 */
- addcc %g2, %o2, %o2 /* IEU1 Group */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-1: addcc %g3, %o2, %o2 /* IEU1 Group */
- bcs,a,pn %xcc, 2f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-2: brz,pn %g1, 3f /* CTI+IEU1 Group */
- ldx [%o0], %g2 /* Load */
- ldx [%o0+8], %g3 /* Load Group */
- ldx [%o0+16], %g5 /* Load Group */
- ldx [%o0+24], %g7 /* Load Group */
- add %o0, 32, %o0 /* IEU0 */
- sub %o1, 32, %o1 /* IEU1 */
- addcc %g2, %o2, %o2 /* IEU1 Group */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-1: addcc %g3, %o2, %o2 /* IEU1 Group */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-1: addcc %g5, %o2, %o2 /* IEU1 Group */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-1: addcc %g7, %o2, %o2 /* IEU1 Group */
- bcs,a,pn %xcc, 3f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-3: cmp %o1, 0xc0 /* IEU1 Group */
- blu,pn %icc, 20f /* CTI */
- sllx %o2, 32, %g5 /* IEU0 */
+ andcc %o0, 7, %g0 /* IEU1 Group */
+ be,pt %icc, 4f /* CTI */
+ andcc %o0, 0x38, %g3 /* IEU1 */
+ mov 1, %g5 /* IEU0 Group */
+ cmp %o1, 6 /* IEU1 */
+ bl,pn %icc, 21f /* CTI */
+ andcc %o0, 2, %g0 /* IEU1 Group */
+ be,pt %icc, 1f /* CTI */
+ and %o0, 4, %g7 /* IEU0 */
+ lduh [%o0], %g2 /* Load */
+ sub %o1, 2, %o1 /* IEU0 Group */
+ add %o0, 2, %o0 /* IEU1 */
+ andcc %o0, 4, %g7 /* IEU1 Group */
+ sll %g5, 16, %g5 /* IEU0 */
+ sll %g2, 16, %g2 /* IEU0 Group */
+ addcc %g2, %o2, %o2 /* IEU1 Group (regdep) */
+ bcs,a,pn %icc, 1f /* CTI */
+ add %o2, %g5, %o2 /* IEU0 */
+1: ld [%o0], %g2 /* Load */
+ brz,a,pn %g7, 4f /* CTI+IEU1 Group */
+ and %o0, 0x38, %g3 /* IEU0 */
+ add %o0, 4, %o0 /* IEU0 Group */
+ sub %o1, 4, %o1 /* IEU1 */
+ addcc %g2, %o2, %o2 /* IEU1 Group */
+ bcs,a,pn %icc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+1: and %o0, 0x38, %g3 /* IEU1 Group */
+4: srl %o2, 0, %o2 /* IEU0 Group */
+ mov 0x40, %g1 /* IEU1 */
+ brz,pn %g3, 3f /* CTI+IEU1 Group */
+ sub %g1, %g3, %g1 /* IEU0 */
+ cmp %o1, 56 /* IEU1 Group */
+ blu,pn %icc, 20f /* CTI */
+ andcc %o0, 8, %g0 /* IEU1 Group */
+ be,pn %icc, 1f /* CTI */
+ ldx [%o0], %g2 /* Load */
+ add %o0, 8, %o0 /* IEU0 Group */
+ sub %o1, 8, %o1 /* IEU1 */
+ addcc %g2, %o2, %o2 /* IEU1 Group */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+1: andcc %g1, 0x10, %g0 /* IEU1 Group */
+ be,pn %icc, 2f /* CTI */
+ and %g1, 0x20, %g1 /* IEU0 */
+ ldx [%o0], %g2 /* Load */
+ ldx [%o0+8], %g3 /* Load Group */
+ add %o0, 16, %o0 /* IEU0 */
+ sub %o1, 16, %o1 /* IEU1 */
+ addcc %g2, %o2, %o2 /* IEU1 Group */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+1: addcc %g3, %o2, %o2 /* IEU1 Group */
+ bcs,a,pn %xcc, 2f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+2: brz,pn %g1, 3f /* CTI+IEU1 Group */
+ ldx [%o0], %g2 /* Load */
+ ldx [%o0+8], %g3 /* Load Group */
+ ldx [%o0+16], %g5 /* Load Group */
+ ldx [%o0+24], %g7 /* Load Group */
+ add %o0, 32, %o0 /* IEU0 */
+ sub %o1, 32, %o1 /* IEU1 */
+ addcc %g2, %o2, %o2 /* IEU1 Group */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+1: addcc %g3, %o2, %o2 /* IEU1 Group */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+1: addcc %g5, %o2, %o2 /* IEU1 Group */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+1: addcc %g7, %o2, %o2 /* IEU1 Group */
+ bcs,a,pn %xcc, 3f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+3: cmp %o1, 0xc0 /* IEU1 Group */
+ blu,pn %icc, 20f /* CTI */
+ sllx %o2, 32, %g5 /* IEU0 */
#ifdef __KERNEL__
VISEntry
#endif
- addcc %o2, %g5, %o2 /* IEU1 Group */
- sub %o1, 0xc0, %o1 /* IEU0 */
- wr %g0, ASI_BLK_P, %asi /* LSU Group */
- membar #StoreLoad /* LSU Group */
- srlx %o2, 32, %o2 /* IEU0 Group */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU1 */
-1: andcc %o1, 0x80, %g0 /* IEU1 Group */
- bne,pn %icc, 7f /* CTI */
- andcc %o1, 0x40, %g0 /* IEU1 Group */
- be,pn %icc, 6f /* CTI */
- fzero %f12 /* FPA */
- fzero %f14 /* FPA Group */
+ addcc %o2, %g5, %o2 /* IEU1 Group */
+ sub %o1, 0xc0, %o1 /* IEU0 */
+ wr %g0, ASI_BLK_P, %asi /* LSU Group */
+ membar #StoreLoad /* LSU Group */
+ srlx %o2, 32, %o2 /* IEU0 Group */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU1 */
+1: andcc %o1, 0x80, %g0 /* IEU1 Group */
+ bne,pn %icc, 7f /* CTI */
+ andcc %o1, 0x40, %g0 /* IEU1 Group */
+ be,pn %icc, 6f /* CTI */
+ fzero %f12 /* FPA */
+ fzero %f14 /* FPA Group */
ldda [%o0 + 0x000] %asi, %f16
ldda [%o0 + 0x040] %asi, %f32
ldda [%o0 + 0x080] %asi, %f48
START_THE_TRICK(f12,f16,f18,f20,f22,f24,f26)
ba,a,pt %xcc, 3f
-6: sub %o0, 0x40, %o0 /* IEU0 Group */
- fzero %f28 /* FPA */
- fzero %f30 /* FPA Group */
+6: sub %o0, 0x40, %o0 /* IEU0 Group */
+ fzero %f28 /* FPA */
+ fzero %f30 /* FPA Group */
ldda [%o0 + 0x040] %asi, %f32
ldda [%o0 + 0x080] %asi, %f48
ldda [%o0 + 0x0c0] %asi, %f0
START_THE_TRICK(f28,f32,f34,f36,f38,f40,f42)
ba,a,pt %xcc, 4f
-7: bne,pt %icc, 8f /* CTI */
- fzero %f44 /* FPA */
- add %o0, 0x40, %o0 /* IEU0 Group */
- fzero %f60 /* FPA */
- fzero %f62 /* FPA Group */
+7: bne,pt %icc, 8f /* CTI */
+ fzero %f44 /* FPA */
+ add %o0, 0x40, %o0 /* IEU0 Group */
+ fzero %f60 /* FPA */
+ fzero %f62 /* FPA Group */
ldda [%o0 - 0x040] %asi, %f0
ldda [%o0 + 0x000] %asi, %f16
ldda [%o0 + 0x040] %asi, %f32
START_THE_TRICK(f60,f0,f2,f4,f6,f8,f10)
ba,a,pt %xcc, 2f
-8: add %o0, 0x80, %o0 /* IEU0 Group */
- fzero %f46 /* FPA */
+8: add %o0, 0x80, %o0 /* IEU0 Group */
+ fzero %f46 /* FPA */
ldda [%o0 - 0x080] %asi, %f48
ldda [%o0 - 0x040] %asi, %f0
ldda [%o0 + 0x000] %asi, %f16
@@ -333,36 +334,36 @@ csum_partial:
3: DO_THE_TRICK(f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46)
ldda [%o0 + 0x0c0] %asi, %f0
4: DO_THE_TRICK(f28,f30,f32,f34,f36,f38,f40,f42,f44,f46,f48,f50,f52,f54,f56,f58,f60,f62)
- add %o0, 0x100, %o0 /* IEU0 Group */
- subcc %o1, 0x100, %o1 /* IEU1 */
- bgeu,a,pt %icc, 1b /* CTI */
+ add %o0, 0x100, %o0 /* IEU0 Group */
+ subcc %o1, 0x100, %o1 /* IEU1 */
+ bgeu,a,pt %icc, 1b /* CTI */
ldda [%o0 + 0x000] %asi, %f16
- membar #Sync /* LSU Group */
+ membar #Sync /* LSU Group */
DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14)
END_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30)
#ifdef __KERNEL__
ldub [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %g7
#endif
- and %o1, 0x3f, %o1 /* IEU0 Group */
+ and %o1, 0x3f, %o1 /* IEU0 Group */
#ifdef __KERNEL__
VISExit
wr %g7, %g0, %asi
#endif
-20: andcc %o1, 0xf0, %g1 /* IEU1 Group */
- be,pn %icc, 23f /* CTI */
- and %o1, 0xf, %o3 /* IEU0 */
+20: andcc %o1, 0xf0, %g1 /* IEU1 Group */
+ be,pn %icc, 23f /* CTI */
+ and %o1, 0xf, %o3 /* IEU0 */
#ifdef __KERNEL__
-22: sll %g1, 1, %o4 /* IEU0 Group */
- sethi %hi(23f), %g7 /* IEU1 */
- sub %g7, %o4, %g7 /* IEU0 Group */
- jmpl %g7 + %lo(23f), %g0 /* CTI Group brk forced */
- add %o0, %g1, %o0 /* IEU0 */
+22: sll %g1, 1, %o4 /* IEU0 Group */
+ sethi %hi(23f), %g7 /* IEU1 */
+ sub %g7, %o4, %g7 /* IEU0 Group */
+ jmpl %g7 + %lo(23f), %g0 /* CTI Group brk forced*/
+ add %o0, %g1, %o0 /* IEU0 */
#else
-22: rd %pc, %g7 /* LSU Group+4bubbles */
- sll %g1, 1, %o4 /* IEU0 Group */
- sub %g7, %o4, %g7 /* IEU0 Group (regdep) */
- jmpl %g7 + (23f - 22b), %g0 /* CTI Group brk forced */
- add %o0, %g1, %o0 /* IEU0 */
+22: rd %pc, %g7 /* LSU Group+4bubbles */
+ sll %g1, 1, %o4 /* IEU0 Group */
+ sub %g7, %o4, %g7 /* IEU0 Group (regdep) */
+ jmpl %g7 + (23f - 22b), %g0 /* CTI Group brk forced*/
+ add %o0, %g1, %o0 /* IEU0 */
#endif
CSUM_LASTCHUNK(0xe0)
CSUM_LASTCHUNK(0xd0)
@@ -379,72 +380,72 @@ csum_partial:
CSUM_LASTCHUNK(0x20)
CSUM_LASTCHUNK(0x10)
CSUM_LASTCHUNK(0x00)
-23: brnz,pn %o3, 26f /* CTI+IEU1 Group */
-24: sllx %o2, 32, %g1 /* IEU0 */
-25: addcc %o2, %g1, %o0 /* IEU1 Group */
- srlx %o0, 32, %o0 /* IEU0 Group (regdep) */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o0, 1, %o0 /* IEU1 */
-1: retl /* CTI Group brk forced */
- srl %o0, 0, %o0 /* IEU0 */
-26: andcc %o1, 8, %g0 /* IEU1 Group */
- be,pn %icc, 1f /* CTI */
- ldx [%o0], %g3 /* Load */
- add %o0, 8, %o0 /* IEU0 Group */
- addcc %g3, %o2, %o2 /* IEU1 Group */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-1: andcc %o1, 4, %g0 /* IEU1 Group */
- be,a,pn %icc, 1f /* CTI */
- clr %g2 /* IEU0 */
- ld [%o0], %g2 /* Load */
- add %o0, 4, %o0 /* IEU0 Group */
- sllx %g2, 32, %g2 /* IEU0 Group */
-1: andcc %o1, 2, %g0 /* IEU1 */
- be,a,pn %icc, 1f /* CTI */
- clr %o4 /* IEU0 Group */
- lduh [%o0], %o4 /* Load */
- add %o0, 2, %o0 /* IEU1 */
- sll %o4, 16, %o4 /* IEU0 Group */
-1: andcc %o1, 1, %g0 /* IEU1 */
- be,a,pn %icc, 1f /* CTI */
- clr %o5 /* IEU0 Group */
- ldub [%o0], %o5 /* Load */
- sll %o5, 8, %o5 /* IEU0 Group */
-1: or %g2, %o4, %o4 /* IEU1 */
- or %o5, %o4, %o4 /* IEU0 Group (regdep) */
- addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-1: ba,pt %xcc, 25b /* CTI Group */
- sllx %o2, 32, %g1 /* IEU0 */
-21: srl %o2, 0, %o2 /* IEU0 Group */
- cmp %o1, 0 /* IEU1 */
- be,pn %icc, 24b /* CTI */
- andcc %o1, 4, %g0 /* IEU1 Group */
- be,a,pn %icc, 1f /* CTI */
- clr %g2 /* IEU0 */
- lduh [%o0], %g3 /* Load */
- lduh [%o0+2], %g2 /* Load Group */
- add %o0, 4, %o0 /* IEU0 Group */
- sllx %g3, 48, %g3 /* IEU0 Group */
- sllx %g2, 32, %g2 /* IEU0 Group */
- or %g3, %g2, %g2 /* IEU0 Group */
-1: andcc %o1, 2, %g0 /* IEU1 */
- be,a,pn %icc, 1f /* CTI */
- clr %o4 /* IEU0 Group */
- lduh [%o0], %o4 /* Load */
- add %o0, 2, %o0 /* IEU1 */
- sll %o4, 16, %o4 /* IEU0 Group */
-1: andcc %o1, 1, %g0 /* IEU1 */
- be,a,pn %icc, 1f /* CTI */
- clr %o5 /* IEU0 Group */
- ldub [%o0], %o5 /* Load */
- sll %o5, 8, %o5 /* IEU0 Group */
-1: or %g2, %o4, %o4 /* IEU1 */
- or %o5, %o4, %o4 /* IEU0 Group (regdep) */
- addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */
- bcs,a,pn %xcc, 1f /* CTI */
- add %o2, 1, %o2 /* IEU0 */
-1: ba,pt %xcc, 25b /* CTI Group */
- sllx %o2, 32, %g1 /* IEU0 */
+23: brnz,pn %o3, 26f /* CTI+IEU1 Group */
+24: sllx %o2, 32, %g1 /* IEU0 */
+25: addcc %o2, %g1, %o0 /* IEU1 Group */
+ srlx %o0, 32, %o0 /* IEU0 Group (regdep) */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o0, 1, %o0 /* IEU1 */
+1: retl /* CTI Group brk forced*/
+ srl %o0, 0, %o0 /* IEU0 */
+26: andcc %o1, 8, %g0 /* IEU1 Group */
+ be,pn %icc, 1f /* CTI */
+ ldx [%o0], %g3 /* Load */
+ add %o0, 8, %o0 /* IEU0 Group */
+ addcc %g3, %o2, %o2 /* IEU1 Group */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+1: andcc %o1, 4, %g0 /* IEU1 Group */
+ be,a,pn %icc, 1f /* CTI */
+ clr %g2 /* IEU0 */
+ ld [%o0], %g2 /* Load */
+ add %o0, 4, %o0 /* IEU0 Group */
+ sllx %g2, 32, %g2 /* IEU0 Group */
+1: andcc %o1, 2, %g0 /* IEU1 */
+ be,a,pn %icc, 1f /* CTI */
+ clr %o4 /* IEU0 Group */
+ lduh [%o0], %o4 /* Load */
+ add %o0, 2, %o0 /* IEU1 */
+ sll %o4, 16, %o4 /* IEU0 Group */
+1: andcc %o1, 1, %g0 /* IEU1 */
+ be,a,pn %icc, 1f /* CTI */
+ clr %o5 /* IEU0 Group */
+ ldub [%o0], %o5 /* Load */
+ sll %o5, 8, %o5 /* IEU0 Group */
+1: or %g2, %o4, %o4 /* IEU1 */
+ or %o5, %o4, %o4 /* IEU0 Group (regdep) */
+ addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+1: ba,pt %xcc, 25b /* CTI Group */
+ sllx %o2, 32, %g1 /* IEU0 */
+21: srl %o2, 0, %o2 /* IEU0 Group */
+ cmp %o1, 0 /* IEU1 */
+ be,pn %icc, 24b /* CTI */
+ andcc %o1, 4, %g0 /* IEU1 Group */
+ be,a,pn %icc, 1f /* CTI */
+ clr %g2 /* IEU0 */
+ lduh [%o0], %g3 /* Load */
+ lduh [%o0+2], %g2 /* Load Group */
+ add %o0, 4, %o0 /* IEU0 Group */
+ sllx %g3, 48, %g3 /* IEU0 Group */
+ sllx %g2, 32, %g2 /* IEU0 Group */
+ or %g3, %g2, %g2 /* IEU0 Group */
+1: andcc %o1, 2, %g0 /* IEU1 */
+ be,a,pn %icc, 1f /* CTI */
+ clr %o4 /* IEU0 Group */
+ lduh [%o0], %o4 /* Load */
+ add %o0, 2, %o0 /* IEU1 */
+ sll %o4, 16, %o4 /* IEU0 Group */
+1: andcc %o1, 1, %g0 /* IEU1 */
+ be,a,pn %icc, 1f /* CTI */
+ clr %o5 /* IEU0 Group */
+ ldub [%o0], %o5 /* Load */
+ sll %o5, 8, %o5 /* IEU0 Group */
+1: or %g2, %o4, %o4 /* IEU1 */
+ or %o5, %o4, %o4 /* IEU0 Group (regdep) */
+ addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %o2, 1, %o2 /* IEU0 */
+1: ba,pt %xcc, 25b /* CTI Group */
+ sllx %o2, 32, %g1 /* IEU0 */
diff --git a/arch/sparc64/lib/VIScsumcopy.S b/arch/sparc64/lib/VIScsumcopy.S
index 3f89eea29..9b0193022 100644
--- a/arch/sparc64/lib/VIScsumcopy.S
+++ b/arch/sparc64/lib/VIScsumcopy.S
@@ -1,4 +1,4 @@
-/* $Id: VIScsumcopy.S,v 1.7 2000/01/19 04:06:03 davem Exp $
+/* $Id: VIScsumcopy.S,v 1.8 2000/02/20 23:21:39 davem Exp $
* VIScsumcopy.S: High bandwidth IP checksumming with simultaneous
* copying utilizing the UltraSparc Visual Instruction Set.
*
@@ -62,384 +62,386 @@
* per 64bytes checksummed/copied.
*/
-#define LDBLK(O0) \
- ldda [%src] %asi, %O0 /* Load Group */
+#define LDBLK(O0) \
+ ldda [%src] %asi, %O0 /* Load Group */
-#define STBLK \
- stda %f48, [%dst] ASI_BLK_P /* Store */
+#define STBLK \
+ stda %f48, [%dst] ASI_BLK_P /* Store */
-#define ST(fx,off) \
- std %fx, [%dst + off] /* Store */
+#define ST(fx,off) \
+ std %fx, [%dst + off] /* Store */
-#define SYNC \
+#define SYNC \
membar #Sync
#define DO_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14,DUMMY1,A0,A2,A4,A6,A8,A10,A12,A14,B14,DUMMY2,LOAD,STORE1,STORE2,STORE3,STORE4,STORE5,STORE6,STORE7,STORE8,DUMMY3,BRANCH...) \
- LOAD /* Load Group */; \
- faligndata %A14, %F0, %A14 /* FPA Group */; \
- inc %x5 /* IEU0 */; \
- STORE1 /* Store (optional) */; \
- faligndata %F0, %F2, %A0 /* FPA Group */; \
- srl %x5, 1, %x5 /* IEU0 */; \
- add %sum, %x4, %sum /* IEU1 */; \
- fpadd32 %F0, %f0, %F0 /* FPA Group */; \
- inc %x6 /* IEU0 */; \
- STORE2 /* Store (optional) */; \
- faligndata %F2, %F4, %A2 /* FPA Group */; \
- srl %x6, 1, %x6 /* IEU0 */; \
- add %sum, %x5, %sum /* IEU1 */; \
- fpadd32 %F2, %f2, %F2 /* FPA Group */; \
- add %src, 64, %src /* IEU0 */; \
- add %dst, 64, %dst /* IEU1 */; \
- fcmpgt32 %f0, %F0, %x1 /* FPM Group */; \
- inc %x7 /* IEU0 */; \
- STORE3 /* Store (optional) */; \
- faligndata %F4, %F6, %A4 /* FPA */; \
- srl %x7, 1, %x7 /* IEU0 Group */; \
- add %sum, %x6, %sum /* IEU1 */; \
- fpadd32 %F4, %f4, %F4 /* FPA */; \
- fcmpgt32 %f2, %F2, %x2 /* FPM Group */; \
- inc %x8 /* IEU0 */; \
- STORE4 /* Store (optional) */; \
- faligndata %F6, %F8, %A6 /* FPA */; \
- srl %x8, 1, %x8 /* IEU0 Group */; \
- add %sum, %x7, %sum /* IEU1 */; \
- fpadd32 %F6, %f6, %F6 /* FPA */; \
- fcmpgt32 %f4, %F4, %x3 /* FPM Group */; \
- inc %x1 /* IEU0 */; \
- STORE5 /* Store (optional) */; \
- faligndata %F8, %F10, %A8 /* FPA */; \
- srl %x1, 1, %x1 /* IEU0 Group */; \
- add %sum, %x8, %sum /* IEU1 */; \
- fpadd32 %F8, %f8, %F8 /* FPA */; \
- fcmpgt32 %f6, %F6, %x4 /* FPM Group */; \
- inc %x2 /* IEU0 */; \
- STORE6 /* Store (optional) */; \
- faligndata %F10, %F12, %A10 /* FPA */; \
- srl %x2, 1, %x2 /* IEU0 Group */; \
- add %sum, %x1, %sum /* IEU1 */; \
- fpadd32 %F10, %f10, %F10 /* FPA */; \
- fcmpgt32 %f8, %F8, %x5 /* FPM Group */; \
- inc %x3 /* IEU0 */; \
- STORE7 /* Store (optional) */; \
- faligndata %F12, %F14, %A12 /* FPA */; \
- srl %x3, 1, %x3 /* IEU0 Group */; \
- add %sum, %x2, %sum /* IEU1 */; \
- fpadd32 %F12, %f12, %F12 /* FPA */; \
- fcmpgt32 %f10, %F10, %x6 /* FPM Group */; \
- inc %x4 /* IEU0 */; \
- STORE8 /* Store (optional) */; \
- fmovd %F14, %B14 /* FPA */; \
- srl %x4, 1, %x4 /* IEU0 Group */; \
- add %sum, %x3, %sum /* IEU1 */; \
- fpadd32 %F14, %f14, %F14 /* FPA */; \
- fcmpgt32 %f12, %F12, %x7 /* FPM Group */; \
- subcc %len, 64, %len /* IEU1 */; \
- BRANCH /* CTI */; \
- fcmpgt32 %f14, %F14, %x8 /* FPM Group */; \
+ LOAD /* Load (Group) */; \
+ faligndata %A14, %F0, %A14 /* FPA Group */; \
+ inc %x5 /* IEU0 */; \
+ STORE1 /* Store (optional) */; \
+ faligndata %F0, %F2, %A0 /* FPA Group */; \
+ srl %x5, 1, %x5 /* IEU0 */; \
+ add %sum, %x4, %sum /* IEU1 */; \
+ fpadd32 %F0, %f0, %F0 /* FPA Group */; \
+ inc %x6 /* IEU0 */; \
+ STORE2 /* Store (optional) */; \
+ faligndata %F2, %F4, %A2 /* FPA Group */; \
+ srl %x6, 1, %x6 /* IEU0 */; \
+ add %sum, %x5, %sum /* IEU1 */; \
+ fpadd32 %F2, %f2, %F2 /* FPA Group */; \
+ add %src, 64, %src /* IEU0 */; \
+ fcmpgt32 %f0, %F0, %x1 /* FPM */; \
+ add %dst, 64, %dst /* IEU1 Group */; \
+ inc %x7 /* IEU0 */; \
+ STORE3 /* Store (optional) */; \
+ faligndata %F4, %F6, %A4 /* FPA */; \
+ fpadd32 %F4, %f4, %F4 /* FPA Group */; \
+ add %sum, %x6, %sum /* IEU1 */; \
+ fcmpgt32 %f2, %F2, %x2 /* FPM */; \
+ srl %x7, 1, %x7 /* IEU0 Group */; \
+ inc %x8 /* IEU1 */; \
+ STORE4 /* Store (optional) */; \
+ faligndata %F6, %F8, %A6 /* FPA */; \
+ fpadd32 %F6, %f6, %F6 /* FPA Group */; \
+ srl %x8, 1, %x8 /* IEU0 */; \
+ fcmpgt32 %f4, %F4, %x3 /* FPM */; \
+ add %sum, %x7, %sum /* IEU0 Group */; \
+ inc %x1 /* IEU1 */; \
+ STORE5 /* Store (optional) */; \
+ faligndata %F8, %F10, %A8 /* FPA */; \
+ fpadd32 %F8, %f8, %F8 /* FPA Group */; \
+ srl %x1, 1, %x1 /* IEU0 */; \
+ fcmpgt32 %f6, %F6, %x4 /* FPM */; \
+ add %sum, %x8, %sum /* IEU0 Group */; \
+ inc %x2 /* IEU1 */; \
+ STORE6 /* Store (optional) */; \
+ faligndata %F10, %F12, %A10 /* FPA */; \
+ fpadd32 %F10, %f10, %F10 /* FPA Group */; \
+ srl %x2, 1, %x2 /* IEU0 */; \
+ fcmpgt32 %f8, %F8, %x5 /* FPM */; \
+ add %sum, %x1, %sum /* IEU0 Group */; \
+ inc %x3 /* IEU1 */; \
+ STORE7 /* Store (optional) */; \
+ faligndata %F12, %F14, %A12 /* FPA */; \
+ fpadd32 %F12, %f12, %F12 /* FPA Group */; \
+ srl %x3, 1, %x3 /* IEU0 */; \
+ fcmpgt32 %f10, %F10, %x6 /* FPM */; \
+ add %sum, %x2, %sum /* IEU0 Group */; \
+ inc %x4 /* IEU1 */; \
+ STORE8 /* Store (optional) */; \
+ fmovd %F14, %B14 /* FPA */; \
+ fpadd32 %F14, %f14, %F14 /* FPA Group */; \
+ srl %x4, 1, %x4 /* IEU0 */; \
+ fcmpgt32 %f12, %F12, %x7 /* FPM */; \
+ add %sum, %x3, %sum /* IEU0 Group */; \
+ subcc %len, 64, %len /* IEU1 */; \
+ BRANCH /* CTI */; \
+ fcmpgt32 %f14, %F14, %x8 /* FPM Group */;
#define END_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB,S0,S1,S2,S3,T0,T1,U0,fz) \
- inc %x5 /* IEU0 Group */; \
- fpadd32 %f2, %f0, %S0 /* FPA */; \
- srl %x5, 1, %x5 /* IEU0 Group */; \
- add %sum, %x4, %sum /* IEU1 */; \
- fpadd32 %f6, %f4, %S1 /* FPA */; \
- inc %x6 /* IEU0 Group */; \
- add %sum, %x5, %sum /* IEU1 */; \
- fcmpgt32 %f0, %S0, %x1 /* FPM Group */; \
- srl %x6, 1, %x6 /* IEU0 */; \
- inc %x7 /* IEU1 */; \
- fpadd32 %f10, %f8, %S2 /* FPA */; \
- fcmpgt32 %f4, %S1, %x2 /* FPM Group */; \
- srl %x7, 1, %x7 /* IEU0 */; \
- add %sum, %x6, %sum /* IEU1 */; \
- fpadd32 %f14, %f12, %S3 /* FPA */; \
- inc %x8 /* IEU0 Group */; \
- add %sum, %x7, %sum /* IEU1 */; \
- fzero %fz /* FPA */; \
- fcmpgt32 %f8, %S2, %x3 /* FPM Group */; \
- srl %x8, 1, %x8 /* IEU0 */; \
- inc %x1 /* IEU1 */; \
- fpadd32 %S0, %S1, %T0 /* FPA */; \
- fcmpgt32 %f12, %S3, %x4 /* FPM Group */; \
- srl %x1, 1, %x1 /* IEU0 */; \
- add %sum, %x8, %sum /* IEU1 */; \
- fpadd32 %S2, %S3, %T1 /* FPA */; \
- inc %x2 /* IEU0 Group */; \
- add %sum, %x1, %sum /* IEU1 */; \
- fcmpgt32 %S0, %T0, %x5 /* FPM Group */; \
- srl %x2, 1, %x2 /* IEU0 */; \
- inc %x3 /* IEU1 */; \
- fcmpgt32 %S2, %T1, %x6 /* FPM Group */; \
- srl %x3, 1, %x3 /* IEU0 */; \
- add %sum, %x2, %sum /* IEU1 */; \
- inc %x4 /* IEU0 Group */; \
- add %sum, %x3, %sum /* IEU1 */; \
- fcmpgt32 %fz, %f2, %x7 /* FPM Group */; \
- srl %x4, 1, %x4 /* IEU0 */; \
- inc %x5 /* IEU1 */; \
- fpadd32 %T0, %T1, %U0 /* FPA */; \
- fcmpgt32 %fz, %f6, %x8 /* FPM Group */; \
- srl %x5, 1, %x5 /* IEU0 */; \
- add %sum, %x4, %sum /* IEU1 */; \
- inc %x6 /* IEU0 Group */; \
- add %sum, %x5, %sum /* IEU1 */; \
- fcmpgt32 %fz, %f10, %x1 /* FPM Group */; \
- srl %x6, 1, %x6 /* IEU0 */; \
- inc %x7 /* IEU1 */; \
- fcmpgt32 %fz, %f14, %x2 /* FPM Group */; \
- ba,pt %xcc, ett /* CTI */; \
- fmovd %FA, %FB /* FPA */; \
+ inc %x5 /* IEU0 Group */; \
+ fpadd32 %f2, %f0, %S0 /* FPA */; \
+ add %sum, %x4, %sum /* IEU1 */; \
+ srl %x5, 1, %x5 /* IEU0 Group */; \
+ fpadd32 %f6, %f4, %S1 /* FPA */; \
+ inc %x6 /* IEU1 */; \
+ fpadd32 %f10, %f8, %S2 /* FPA Group */; \
+ add %sum, %x5, %sum /* IEU0 */; \
+ fcmpgt32 %f0, %S0, %x1 /* FPM */; \
+ fpadd32 %f14, %f12, %S3 /* FPA Group */; \
+ srl %x6, 1, %x6 /* IEU0 */; \
+ fcmpgt32 %f4, %S1, %x2 /* FPM */; \
+ add %sum, %x6, %sum /* IEU0 Group */; \
+ fzero %fz /* FPA */; \
+ fcmpgt32 %f8, %S2, %x3 /* FPM */; \
+ inc %x7 /* IEU0 Group */; \
+ inc %x8 /* IEU1 */; \
+ srl %x7, 1, %x7 /* IEU0 Group */; \
+ inc %x1 /* IEU1 */; \
+ fpadd32 %S0, %S1, %T0 /* FPA */; \
+ fpadd32 %S2, %S3, %T1 /* FPA Group */; \
+ add %sum, %x7, %sum /* IEU0 */; \
+ fcmpgt32 %f12, %S3, %x4 /* FPM */; \
+ srl %x8, 1, %x8 /* IEU0 Group */; \
+ inc %x2 /* IEU1 */; \
+ srl %x1, 1, %x1 /* IEU0 Group */; \
+ add %sum, %x8, %sum /* IEU1 */; \
+ add %sum, %x1, %sum /* IEU0 Group */; \
+ fcmpgt32 %S0, %T0, %x5 /* FPM */; \
+ srl %x2, 1, %x2 /* IEU0 Group */; \
+ fcmpgt32 %S2, %T1, %x6 /* FPM */; \
+ inc %x3 /* IEU0 Group */; \
+ add %sum, %x2, %sum /* IEU1 */; \
+ srl %x3, 1, %x3 /* IEU0 Group */; \
+ inc %x4 /* IEU1 */; \
+ fpadd32 %T0, %T1, %U0 /* FPA Group */; \
+ add %sum, %x3, %sum /* IEU0 */; \
+ fcmpgt32 %fz, %f2, %x7 /* FPM */; \
+ srl %x4, 1, %x4 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f6, %x8 /* FPM */; \
+ inc %x5 /* IEU0 Group */; \
+ add %sum, %x4, %sum /* IEU1 */; \
+ srl %x5, 1, %x5 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f10, %x1 /* FPM */; \
+ inc %x6 /* IEU0 Group */; \
+ add %sum, %x5, %sum /* IEU1 */; \
+ fmovd %FA, %FB /* FPA Group */; \
+ fcmpgt32 %fz, %f14, %x2 /* FPM */; \
+ srl %x6, 1, %x6 /* IEU0 Group */; \
+ ba,pt %xcc, ett /* CTI */; \
+ inc %x7 /* IEU1 */;
-#define END_THE_TRICK1(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB) \
+#define END_THE_TRICK1(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB) \
END_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB,f48,f50,f52,f54,f56,f58,f60,f62)
-#define END_THE_TRICK2(S0,S1,S2,S3,T0,T1,U0,U1,V0,fz) \
- fpadd32 %U0, %U1, %V0 /* FPA Group */; \
- srl %x7, 1, %x7 /* IEU0 */; \
- add %sum, %x6, %sum /* IEU1 */; \
- std %V0, [%sp + STACKOFF] /* Store Group */; \
- inc %x8 /* IEU0 */; \
- sub %sum, %x7, %sum /* IEU1 */; \
- fcmpgt32 %fz, %S1, %x3 /* FPM Group */; \
- srl %x8, 1, %x8 /* IEU0 */; \
- inc %x1 /* IEU1 */; \
- fcmpgt32 %fz, %S3, %x4 /* FPM Group */; \
- srl %x1, 1, %x1 /* IEU0 */; \
- sub %sum, %x8, %sum /* IEU1 */; \
- ldx [%sp + STACKOFF], %x8 /* Load Group */; \
- inc %x2 /* IEU0 */; \
- sub %sum, %x1, %sum /* IEU1 */; \
- fcmpgt32 %fz, %T1, %x5 /* FPM Group */; \
- srl %x2, 1, %x2 /* IEU0 */; \
- inc %x3 /* IEU1 */; \
- fcmpgt32 %T0, %U0, %x6 /* FPM Group */; \
- srl %x3, 1, %x3 /* IEU0 */; \
- sub %sum, %x2, %sum /* IEU1 */; \
- inc %x4 /* IEU0 Group */; \
- sub %sum, %x3, %sum /* IEU1 */; \
- fcmpgt32 %fz, %U1, %x7 /* FPM Group */; \
- srl %x4, 1, %x4 /* IEU0 */; \
- inc %x5 /* IEU1 */; \
- fcmpgt32 %U0, %V0, %x1 /* FPM Group */; \
- srl %x5, 1, %x5 /* IEU0 */; \
- sub %sum, %x4, %sum /* IEU1 */; \
- fcmpgt32 %fz, %V0, %x2 /* FPM Group */; \
- inc %x6 /* IEU0 */; \
- sub %sum, %x5, %sum /* IEU1 */; \
- srl %x6, 1, %x6 /* IEU0 Group */; \
- inc %x7 /* IEU1 */; \
- srl %x7, 1, %x7 /* IEU0 Group */; \
- add %sum, %x6, %sum /* IEU1 */; \
- inc %x1 /* IEU0 Group */; \
- sub %sum, %x7, %sum /* IEU1 */; \
- srl %x1, 1, %x1 /* IEU0 Group */; \
- inc %x2 /* IEU1 */; \
- srl %x2, 1, %x2 /* IEU0 Group */; \
- add %sum, %x1, %sum /* IEU1 */; \
- sub %sum, %x2, %sum /* IEU0 Group */; \
- addcc %sum, %x8, %sum /* IEU Group */; \
- bcs,a,pn %xcc, 33f /* CTI */; \
- add %sum, 1, %sum /* IEU0 */; \
-33: /* That's it */;
+#define END_THE_TRICK2(S0,S1,S2,S3,T0,T1,U0,U1,V0,fz) \
+ fpadd32 %U0, %U1, %V0 /* FPA Group */; \
+ srl %x7, 1, %x7 /* IEU0 */; \
+ add %sum, %x6, %sum /* IEU1 */; \
+ std %V0, [%sp + STACKOFF] /* Store Group */; \
+ inc %x8 /* IEU0 */; \
+ sub %sum, %x7, %sum /* IEU1 */; \
+ srl %x8, 1, %x8 /* IEU0 Group */; \
+ fcmpgt32 %fz, %S1, %x3 /* FPM */; \
+ inc %x1 /* IEU0 Group */; \
+ fcmpgt32 %fz, %S3, %x4 /* FPM */; \
+ srl %x1, 1, %x1 /* IEU0 Group */; \
+ sub %sum, %x8, %sum /* IEU1 */; \
+ ldx [%sp + STACKOFF], %x8 /* Load Group */; \
+ inc %x2 /* IEU0 */; \
+ sub %sum, %x1, %sum /* IEU1 */; \
+ srl %x2, 1, %x2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %T1, %x5 /* FPM */; \
+ inc %x3 /* IEU0 Group */; \
+ fcmpgt32 %T0, %U0, %x6 /* FPM */; \
+ srl %x3, 1, %x3 /* IEU0 Group */; \
+ sub %sum, %x2, %sum /* IEU1 */; \
+ inc %x4 /* IEU0 Group */; \
+ sub %sum, %x3, %sum /* IEU1 */; \
+ srl %x4, 1, %x4 /* IEU0 Group */; \
+ fcmpgt32 %fz, %U1, %x7 /* FPM */; \
+ inc %x5 /* IEU0 Group */; \
+ fcmpgt32 %U0, %V0, %x1 /* FPM */; \
+ srl %x5, 1, %x5 /* IEU0 Group */; \
+ sub %sum, %x4, %sum /* IEU1 */; \
+ sub %sum, %x5, %sum /* IEU0 Group */; \
+ fcmpgt32 %fz, %V0, %x2 /* FPM */; \
+ inc %x6 /* IEU0 Group */; \
+ inc %x7 /* IEU1 */; \
+ srl %x6, 1, %x6 /* IEU0 Group */; \
+ inc %x1 /* IEU1 */; \
+ srl %x7, 1, %x7 /* IEU0 Group */; \
+ add %sum, %x6, %sum /* IEU1 */; \
+ srl %x1, 1, %x1 /* IEU0 Group */; \
+ sub %sum, %x7, %sum /* IEU1 */; \
+ inc %x2 /* IEU0 Group */; \
+ add %sum, %x1, %sum /* IEU1 */; \
+ srl %x2, 1, %x2 /* IEU0 Group */; \
+ sub %sum, %x2, %sum /* IEU0 Group */; \
+ addcc %sum, %x8, %sum /* IEU1 Group */; \
+ bcs,a,pn %xcc, 33f /* CTI */; \
+ add %sum, 1, %sum /* IEU0 (Group) */; \
+33: /* That's it */;
.text
.globl csum_partial_copy_vis
.align 32
-/* %asi should be either ASI_P or ASI_AIUS for csum_partial_copy resp. csum_partial_copy_from_user */
-/* This assumes that !((%src^%dst)&3) && !((%src|%dst)&1) && %len >= 256 */
+/* %asi should be either ASI_P or ASI_AIUS for csum_partial_copy resp.
+ * csum_partial_copy_from_user
+ * This assumes that !((%src^%dst)&3) && !((%src|%dst)&1) && %len >= 256
+ */
csum_partial_copy_vis:
- andcc %dst, 7, %g0 /* IEU1 Group */
- be,pt %icc, 4f /* CTI */
- and %dst, 0x38, %o4 /* IEU0 */
- mov 1, %g5 /* IEU0 Group */
- andcc %dst, 2, %g0 /* IEU1 */
- be,pt %icc, 1f /* CTI */
- and %dst, 4, %g7 /* IEU0 Group */
- lduha [%src] %asi, %g2 /* Load */
- sub %len, 2, %len /* IEU0 Group */
- add %dst, 2, %dst /* IEU1 */
- andcc %dst, 4, %g7 /* IEU1 Group */
- sll %g5, 16, %g5 /* IEU0 */
- sth %g2, [%dst - 2] /* Store Group */
- sll %g2, 16, %g2 /* IEU0 */
- add %src, 2, %src /* IEU1 */
- addcc %g2, %sum, %sum /* IEU1 Group */
- bcs,a,pn %icc, 1f /* CTI */
- add %sum, %g5, %sum /* IEU0 */
-1: lduwa [%src] %asi, %g2 /* Load */
- brz,a,pn %g7, 4f /* CTI+IEU1 Group */
- and %dst, 0x38, %o4 /* IEU0 */
- add %dst, 4, %dst /* IEU0 Group */
- sub %len, 4, %len /* IEU1 */
- addcc %g2, %sum, %sum /* IEU1 Group */
- bcs,a,pn %icc, 1f /* CTI */
- add %sum, 1, %sum /* IEU0 */
-1: and %dst, 0x38, %o4 /* IEU0 Group */
- stw %g2, [%dst - 4] /* Store */
- add %src, 4, %src /* IEU1 */
+ andcc %dst, 7, %g0 /* IEU1 Group */
+ be,pt %icc, 4f /* CTI */
+ and %dst, 0x38, %o4 /* IEU0 */
+ mov 1, %g5 /* IEU0 Group */
+ andcc %dst, 2, %g0 /* IEU1 */
+ be,pt %icc, 1f /* CTI */
+ and %dst, 4, %g7 /* IEU0 Group */
+ lduha [%src] %asi, %g2 /* Load */
+ sub %len, 2, %len /* IEU0 Group */
+ add %dst, 2, %dst /* IEU1 */
+ andcc %dst, 4, %g7 /* IEU1 Group */
+ sll %g5, 16, %g5 /* IEU0 */
+ sth %g2, [%dst - 2] /* Store Group */
+ sll %g2, 16, %g2 /* IEU0 */
+ add %src, 2, %src /* IEU1 */
+ addcc %g2, %sum, %sum /* IEU1 Group */
+ bcs,a,pn %icc, 1f /* CTI */
+ add %sum, %g5, %sum /* IEU0 */
+1: lduwa [%src] %asi, %g2 /* Load */
+ brz,a,pn %g7, 4f /* CTI+IEU1 Group */
+ and %dst, 0x38, %o4 /* IEU0 */
+ add %dst, 4, %dst /* IEU0 Group */
+ sub %len, 4, %len /* IEU1 */
+ addcc %g2, %sum, %sum /* IEU1 Group */
+ bcs,a,pn %icc, 1f /* CTI */
+ add %sum, 1, %sum /* IEU0 */
+1: and %dst, 0x38, %o4 /* IEU0 Group */
+ stw %g2, [%dst - 4] /* Store */
+ add %src, 4, %src /* IEU1 */
4:
#ifdef __KERNEL__
VISEntry
#endif
- mov %src, %g7 /* IEU1 Group */
- fzero %f48 /* FPA */
- alignaddr %src, %g0, %src /* Single Group */
- subcc %g7, %src, %g7 /* IEU1 Group */
- be,pt %xcc, 1f /* CTI */
- mov 0x40, %g1 /* IEU0 */
- lduwa [%src] %asi, %g2 /* Load Group */
- subcc %sum, %g2, %sum /* IEU1 Group+load stall */
- bcs,a,pn %icc, 1f /* CTI */
- sub %sum, 1, %sum /* IEU0 */
-1: srl %sum, 0, %sum /* IEU0 Group */
- clr %g5 /* IEU1 */
- brz,pn %o4, 3f /* CTI+IEU1 Group */
- sub %g1, %o4, %g1 /* IEU0 */
- ldda [%src] %asi, %f0 /* Load */
- clr %o4 /* IEU0 Group */
- andcc %dst, 8, %g0 /* IEU1 */
- be,pn %icc, 1f /* CTI */
- ldda [%src + 8] %asi, %f2 /* Load Group */
- add %src, 8, %src /* IEU0 */
- sub %len, 8, %len /* IEU1 */
- fpadd32 %f0, %f48, %f50 /* FPA */
- addcc %dst, 8, %dst /* IEU1 Group */
- faligndata %f0, %f2, %f16 /* FPA */
- fcmpgt32 %f48, %f50, %o4 /* FPM Group */
- fmovd %f2, %f0 /* FPA Group */
- ldda [%src + 8] %asi, %f2 /* Load */
- std %f16, [%dst - 8] /* Store */
- fmovd %f50, %f48 /* FPA */
-1: andcc %g1, 0x10, %g0 /* IEU1 Group */
- be,pn %icc, 1f /* CTI */
- and %g1, 0x20, %g1 /* IEU0 */
- fpadd32 %f0, %f48, %f50 /* FPA */
- ldda [%src + 16] %asi, %f4 /* Load Group */
- add %src, 16, %src /* IEU0 */
- add %dst, 16, %dst /* IEU1 */
- faligndata %f0, %f2, %f16 /* FPA */
- fcmpgt32 %f48, %f50, %g5 /* FPM Group */
- sub %len, 16, %len /* IEU0 */
- inc %o4 /* IEU1 */
- std %f16, [%dst - 16] /* Store Group */
- fpadd32 %f2, %f50, %f48 /* FPA */
- srl %o4, 1, %o5 /* IEU0 */
- faligndata %f2, %f4, %f18 /* FPA Group */
- std %f18, [%dst - 8] /* Store */
- fcmpgt32 %f50, %f48, %o4 /* FPM Group */
- add %o5, %sum, %sum /* IEU0 */
- ldda [%src + 8] %asi, %f2 /* Load */
- fmovd %f4, %f0 /* FPA */
-1: brz,a,pn %g1, 4f /* CTI+IEU1 Group */
- rd %asi, %g2 /* LSU Group + 4 bubbles */
- inc %g5 /* IEU0 */
- fpadd32 %f0, %f48, %f50 /* FPA */
- ldda [%src + 16] %asi, %f4 /* Load Group */
- srl %g5, 1, %g5 /* IEU0 */
- add %dst, 32, %dst /* IEU1 */
- faligndata %f0, %f2, %f16 /* FPA */
- fcmpgt32 %f48, %f50, %o5 /* FPM Group */
- inc %o4 /* IEU0 */
- ldda [%src + 24] %asi, %f6 /* Load */
- srl %o4, 1, %o4 /* IEU0 Group */
- add %g5, %sum, %sum /* IEU1 */
- ldda [%src + 32] %asi, %f8 /* Load */
- fpadd32 %f2, %f50, %f48 /* FPA */
- faligndata %f2, %f4, %f18 /* FPA Group */
- sub %len, 32, %len /* IEU0 */
- std %f16, [%dst - 32] /* Store */
- fcmpgt32 %f50, %f48, %g3 /* FPM Group */
- inc %o5 /* IEU0 */
- add %o4, %sum, %sum /* IEU1 */
- fpadd32 %f4, %f48, %f50 /* FPA */
- faligndata %f4, %f6, %f20 /* FPA Group */
- srl %o5, 1, %o5 /* IEU0 */
- fcmpgt32 %f48, %f50, %g5 /* FPM Group */
- add %o5, %sum, %sum /* IEU0 */
- std %f18, [%dst - 24] /* Store */
- fpadd32 %f6, %f50, %f48 /* FPA */
- inc %g3 /* IEU0 Group */
- std %f20, [%dst - 16] /* Store */
- add %src, 32, %src /* IEU1 */
- faligndata %f6, %f8, %f22 /* FPA */
- fcmpgt32 %f50, %f48, %o4 /* FPM Group */
- srl %g3, 1, %g3 /* IEU0 */
- std %f22, [%dst - 8] /* Store */
- add %g3, %sum, %sum /* IEU0 Group */
-3: rd %asi, %g2 /* LSU Group + 4 bubbles */
+ mov %src, %g7 /* IEU1 Group */
+ fzero %f48 /* FPA */
+ alignaddr %src, %g0, %src /* Single Group */
+ subcc %g7, %src, %g7 /* IEU1 Group */
+ be,pt %xcc, 1f /* CTI */
+ mov 0x40, %g1 /* IEU0 */
+ lduwa [%src] %asi, %g2 /* Load Group */
+ subcc %sum, %g2, %sum /* IEU1 Group+load stall*/
+ bcs,a,pn %icc, 1f /* CTI */
+ sub %sum, 1, %sum /* IEU0 */
+1: srl %sum, 0, %sum /* IEU0 Group */
+ clr %g5 /* IEU1 */
+ brz,pn %o4, 3f /* CTI+IEU1 Group */
+ sub %g1, %o4, %g1 /* IEU0 */
+ ldda [%src] %asi, %f0 /* Load */
+ clr %o4 /* IEU0 Group */
+ andcc %dst, 8, %g0 /* IEU1 */
+ be,pn %icc, 1f /* CTI */
+ ldda [%src + 8] %asi, %f2 /* Load Group */
+ add %src, 8, %src /* IEU0 */
+ sub %len, 8, %len /* IEU1 */
+ fpadd32 %f0, %f48, %f50 /* FPA */
+ addcc %dst, 8, %dst /* IEU1 Group */
+ faligndata %f0, %f2, %f16 /* FPA */
+ fcmpgt32 %f48, %f50, %o4 /* FPM Group */
+ fmovd %f2, %f0 /* FPA Group */
+ ldda [%src + 8] %asi, %f2 /* Load */
+ std %f16, [%dst - 8] /* Store */
+ fmovd %f50, %f48 /* FPA */
+1: andcc %g1, 0x10, %g0 /* IEU1 Group */
+ be,pn %icc, 1f /* CTI */
+ and %g1, 0x20, %g1 /* IEU0 */
+ fpadd32 %f0, %f48, %f50 /* FPA */
+ ldda [%src + 16] %asi, %f4 /* Load Group */
+ add %src, 16, %src /* IEU0 */
+ add %dst, 16, %dst /* IEU1 */
+ faligndata %f0, %f2, %f16 /* FPA */
+ fcmpgt32 %f48, %f50, %g5 /* FPM Group */
+ sub %len, 16, %len /* IEU0 */
+ inc %o4 /* IEU1 */
+ std %f16, [%dst - 16] /* Store Group */
+ fpadd32 %f2, %f50, %f48 /* FPA */
+ srl %o4, 1, %o5 /* IEU0 */
+ faligndata %f2, %f4, %f18 /* FPA Group */
+ std %f18, [%dst - 8] /* Store */
+ fcmpgt32 %f50, %f48, %o4 /* FPM Group */
+ add %o5, %sum, %sum /* IEU0 */
+ ldda [%src + 8] %asi, %f2 /* Load */
+ fmovd %f4, %f0 /* FPA */
+1: brz,a,pn %g1, 4f /* CTI+IEU1 Group */
+ rd %asi, %g2 /* LSU Group + 4 bubbles*/
+ inc %g5 /* IEU0 */
+ fpadd32 %f0, %f48, %f50 /* FPA */
+ ldda [%src + 16] %asi, %f4 /* Load Group */
+ srl %g5, 1, %g5 /* IEU0 */
+ add %dst, 32, %dst /* IEU1 */
+ faligndata %f0, %f2, %f16 /* FPA */
+ fcmpgt32 %f48, %f50, %o5 /* FPM Group */
+ inc %o4 /* IEU0 */
+ ldda [%src + 24] %asi, %f6 /* Load */
+ srl %o4, 1, %o4 /* IEU0 Group */
+ add %g5, %sum, %sum /* IEU1 */
+ ldda [%src + 32] %asi, %f8 /* Load */
+ fpadd32 %f2, %f50, %f48 /* FPA */
+ faligndata %f2, %f4, %f18 /* FPA Group */
+ sub %len, 32, %len /* IEU0 */
+ std %f16, [%dst - 32] /* Store */
+ fcmpgt32 %f50, %f48, %g3 /* FPM Group */
+ inc %o5 /* IEU0 */
+ add %o4, %sum, %sum /* IEU1 */
+ fpadd32 %f4, %f48, %f50 /* FPA */
+ faligndata %f4, %f6, %f20 /* FPA Group */
+ srl %o5, 1, %o5 /* IEU0 */
+ fcmpgt32 %f48, %f50, %g5 /* FPM Group */
+ add %o5, %sum, %sum /* IEU0 */
+ std %f18, [%dst - 24] /* Store */
+ fpadd32 %f6, %f50, %f48 /* FPA */
+ inc %g3 /* IEU0 Group */
+ std %f20, [%dst - 16] /* Store */
+ add %src, 32, %src /* IEU1 */
+ faligndata %f6, %f8, %f22 /* FPA */
+ fcmpgt32 %f50, %f48, %o4 /* FPM Group */
+ srl %g3, 1, %g3 /* IEU0 */
+ std %f22, [%dst - 8] /* Store */
+ add %g3, %sum, %sum /* IEU0 Group */
+3: rd %asi, %g2 /* LSU Group + 4 bubbles*/
#ifdef __KERNEL__
-4: sethi %hi(vis0s), %g7 /* IEU0 Group */
- or %g2, ASI_BLK_OR, %g2 /* IEU1 */
+4: sethi %hi(vis0s), %g7 /* IEU0 Group */
+ or %g2, ASI_BLK_OR, %g2 /* IEU1 */
#else
-4: rd %pc, %g7 /* LSU Group + 4 bubbles */
+4: rd %pc, %g7 /* LSU Group + 4 bubbles*/
#endif
- inc %g5 /* IEU0 Group */
- and %src, 0x38, %g3 /* IEU1 */
- membar #StoreLoad /* LSU Group */
- srl %g5, 1, %g5 /* IEU0 */
- inc %o4 /* IEU1 */
- sll %g3, 8, %g3 /* IEU0 Group */
- sub %len, 0xc0, %len /* IEU1 */
- addcc %g5, %sum, %sum /* IEU1 Group */
- srl %o4, 1, %o4 /* IEU0 */
- add %g7, %g3, %g7 /* IEU0 Group */
- add %o4, %sum, %sum /* IEU1 */
+ inc %g5 /* IEU0 Group */
+ and %src, 0x38, %g3 /* IEU1 */
+ membar #StoreLoad /* LSU Group */
+ srl %g5, 1, %g5 /* IEU0 */
+ inc %o4 /* IEU1 */
+ sll %g3, 8, %g3 /* IEU0 Group */
+ sub %len, 0xc0, %len /* IEU1 */
+ addcc %g5, %sum, %sum /* IEU1 Group */
+ srl %o4, 1, %o4 /* IEU0 */
+ add %g7, %g3, %g7 /* IEU0 Group */
+ add %o4, %sum, %sum /* IEU1 */
#ifdef __KERNEL__
- jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */
+ jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */
#else
- jmpl %g7 + (vis0s - 4b), %g0 /* CTI+IEU1 Group */
+ jmpl %g7 + (vis0s - 4b), %g0 /* CTI+IEU1 Group */
#endif
- fzero %f32 /* FPA */
+ fzero %f32 /* FPA */
.align 2048
-vis0s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- add %src, 128, %src /* IEU0 Group */
- ldda [%src-128] %asi, %f0 /* Load Group */
- ldda [%src-64] %asi, %f16 /* Load Group */
- fmovd %f48, %f62 /* FPA Group f0 available */
- faligndata %f0, %f2, %f48 /* FPA Group f2 available */
- fcmpgt32 %f32, %f2, %x1 /* FPM Group f4 available */
- fpadd32 %f0, %f62, %f0 /* FPA */
- fcmpgt32 %f32, %f4, %x2 /* FPM Group f6 available */
- faligndata %f2, %f4, %f50 /* FPA */
- fcmpgt32 %f62, %f0, %x3 /* FPM Group f8 available */
- faligndata %f4, %f6, %f52 /* FPA */
- fcmpgt32 %f32, %f6, %x4 /* FPM Group f10 available */
- inc %x1 /* IEU0 */
- faligndata %f6, %f8, %f54 /* FPA */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group f12 available */
- srl %x1, 1, %x1 /* IEU0 */
- inc %x2 /* IEU1 */
- faligndata %f8, %f10, %f56 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group f14 available */
- srl %x2, 1, %x2 /* IEU0 */
- add %sum, %x1, %sum /* IEU1 */
- faligndata %f10, %f12, %f58 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- inc %x3 /* IEU0 */
- add %sum, %x2, %sum /* IEU1 */
- faligndata %f12, %f14, %f60 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- srl %x3, 1, %x3 /* IEU0 */
- inc %x4 /* IEU1 */
- fmovd %f14, %f62 /* FPA */
- srl %x4, 1, %x4 /* IEU0 Group */
- add %sum, %x3, %sum /* IEU1 */
+vis0s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ add %src, 128, %src /* IEU0 Group */
+ ldda [%src-128] %asi, %f0 /* Load Group */
+ ldda [%src-64] %asi, %f16 /* Load Group */
+ fmovd %f48, %f62 /* FPA Group f0 available*/
+ faligndata %f0, %f2, %f48 /* FPA Group f2 available*/
+ fcmpgt32 %f32, %f2, %x1 /* FPM Group f4 available*/
+ fpadd32 %f0, %f62, %f0 /* FPA */
+ fcmpgt32 %f32, %f4, %x2 /* FPM Group f6 available*/
+ faligndata %f2, %f4, %f50 /* FPA */
+ fcmpgt32 %f62, %f0, %x3 /* FPM Group f8 available*/
+ faligndata %f4, %f6, %f52 /* FPA */
+ fcmpgt32 %f32, %f6, %x4 /* FPM Group f10 available*/
+ inc %x1 /* IEU0 */
+ faligndata %f6, %f8, %f54 /* FPA */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group f12 available*/
+ srl %x1, 1, %x1 /* IEU0 */
+ inc %x2 /* IEU1 */
+ faligndata %f8, %f10, %f56 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group f14 available*/
+ srl %x2, 1, %x2 /* IEU0 */
+ add %sum, %x1, %sum /* IEU1 */
+ faligndata %f10, %f12, %f58 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ inc %x3 /* IEU0 */
+ add %sum, %x2, %sum /* IEU1 */
+ faligndata %f12, %f14, %f60 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ srl %x3, 1, %x3 /* IEU0 */
+ inc %x4 /* IEU1 */
+ fmovd %f14, %f62 /* FPA */
+ srl %x4, 1, %x4 /* IEU0 Group */
+ add %sum, %x3, %sum /* IEU1 */
vis0: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
- ,f48,f50,f52,f54,f56,f58,f60,f62,f62,
- ,LDBLK(f32), STBLK,,,,,,,,
+ ,f48,f50,f52,f54,f56,f58,f60,f62,f62,
+ ,LDBLK(f32), STBLK,,,,,,,,
,bcs,pn %icc, vis0e1)
DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46,
- ,f48,f50,f52,f54,f56,f58,f60,f62,f62,
- ,LDBLK(f0), STBLK,,,,,,,,
+ ,f48,f50,f52,f54,f56,f58,f60,f62,f62,
+ ,LDBLK(f0), STBLK,,,,,,,,
,bcs,pn %icc, vis0e2)
- DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14,
+ DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14,
,f48,f50,f52,f54,f56,f58,f60,f62,f62,
,LDBLK(f16), STBLK,,,,,,,,
,bcc,pt %icc, vis0)
-vis0e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
+vis0e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f48,f50,f52,f54,f56,f58,f60,f62,f32,
,SYNC, STBLK,ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48),
,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e2)
@@ -447,39 +449,39 @@ vis0e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f4
,f48,f50,f52,f54,f56,f58,f60,f62,f0,
,SYNC, STBLK,ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48),
,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e3)
-vis0e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14,
+vis0e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14,
,f48,f50,f52,f54,f56,f58,f60,f62,f16,
,SYNC, STBLK,ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48),
,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e1)
.align 2048
-vis1s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- add %src, 128 - 8, %src /* IEU0 Group */
- ldda [%src-128] %asi, %f0 /* Load Group */
- ldda [%src-64] %asi, %f16 /* Load Group */
- fmovd %f0, %f58 /* FPA Group */
- fmovd %f48, %f0 /* FPA Group */
- fcmpgt32 %f32, %f2, %x2 /* FPM Group */
- faligndata %f2, %f4, %f48 /* FPA */
- fcmpgt32 %f32, %f4, %x3 /* FPM Group */
- faligndata %f4, %f6, %f50 /* FPA */
- fcmpgt32 %f32, %f6, %x4 /* FPM Group */
- faligndata %f6, %f8, %f52 /* FPA */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group */
- inc %x2 /* IEU1 */
- faligndata %f8, %f10, %f54 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- srl %x2, 1, %x2 /* IEU0 */
- faligndata %f10, %f12, %f56 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- inc %x3 /* IEU0 */
- add %sum, %x2, %sum /* IEU1 */
- faligndata %f12, %f14, %f58 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- srl %x3, 1, %x3 /* IEU0 */
- inc %x4 /* IEU1 */
- fmovd %f14, %f60 /* FPA */
- srl %x4, 1, %x4 /* IEU0 Group */
- add %sum, %x3, %sum /* IEU1 */
+vis1s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ add %src, 128 - 8, %src /* IEU0 Group */
+ ldda [%src-128] %asi, %f0 /* Load Group */
+ ldda [%src-64] %asi, %f16 /* Load Group */
+ fmovd %f0, %f58 /* FPA Group */
+ fmovd %f48, %f0 /* FPA Group */
+ fcmpgt32 %f32, %f2, %x2 /* FPM Group */
+ faligndata %f2, %f4, %f48 /* FPA */
+ fcmpgt32 %f32, %f4, %x3 /* FPM Group */
+ faligndata %f4, %f6, %f50 /* FPA */
+ fcmpgt32 %f32, %f6, %x4 /* FPM Group */
+ faligndata %f6, %f8, %f52 /* FPA */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group */
+ inc %x2 /* IEU1 */
+ faligndata %f8, %f10, %f54 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ srl %x2, 1, %x2 /* IEU0 */
+ faligndata %f10, %f12, %f56 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ inc %x3 /* IEU0 */
+ add %sum, %x2, %sum /* IEU1 */
+ faligndata %f12, %f14, %f58 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ srl %x3, 1, %x3 /* IEU0 */
+ inc %x4 /* IEU1 */
+ fmovd %f14, %f60 /* FPA */
+ srl %x4, 1, %x4 /* IEU0 Group */
+ add %sum, %x3, %sum /* IEU1 */
vis1: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f62,f48,f50,f52,f54,f56,f58,f60,f60,
,LDBLK(f32), ,STBLK,,,,,,,
@@ -505,31 +507,31 @@ vis1e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,STBLK,ST(f48,0),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),
,add %dst, 48, %dst; add %len, 192 - 7*8, %len; ba,pt %icc, e1)
.align 2048
-vis2s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- add %src, 128 - 16, %src /* IEU0 Group */
- ldda [%src-128] %asi, %f0 /* Load Group */
- ldda [%src-64] %asi, %f16 /* Load Group */
- fmovd %f0, %f56 /* FPA Group */
- fmovd %f48, %f0 /* FPA Group */
- sub %dst, 64, %dst /* IEU0 */
- fpsub32 %f2, %f2, %f2 /* FPA Group */
- fcmpgt32 %f32, %f4, %x3 /* FPM Group */
- faligndata %f4, %f6, %f48 /* FPA */
- fcmpgt32 %f32, %f6, %x4 /* FPM Group */
- faligndata %f6, %f8, %f50 /* FPA */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group */
- faligndata %f8, %f10, %f52 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- faligndata %f10, %f12, %f54 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- inc %x3 /* IEU0 */
- faligndata %f12, %f14, %f56 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- srl %x3, 1, %x3 /* IEU0 */
- inc %x4 /* IEU1 */
- fmovd %f14, %f58 /* FPA */
- srl %x4, 1, %x4 /* IEU0 Group */
- add %sum, %x3, %sum /* IEU1 */
+vis2s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ add %src, 128 - 16, %src /* IEU0 Group */
+ ldda [%src-128] %asi, %f0 /* Load Group */
+ ldda [%src-64] %asi, %f16 /* Load Group */
+ fmovd %f0, %f56 /* FPA Group */
+ fmovd %f48, %f0 /* FPA Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fpsub32 %f2, %f2, %f2 /* FPA Group */
+ fcmpgt32 %f32, %f4, %x3 /* FPM Group */
+ faligndata %f4, %f6, %f48 /* FPA */
+ fcmpgt32 %f32, %f6, %x4 /* FPM Group */
+ faligndata %f6, %f8, %f50 /* FPA */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group */
+ faligndata %f8, %f10, %f52 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ faligndata %f10, %f12, %f54 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ inc %x3 /* IEU0 */
+ faligndata %f12, %f14, %f56 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ srl %x3, 1, %x3 /* IEU0 */
+ inc %x4 /* IEU1 */
+ fmovd %f14, %f58 /* FPA */
+ srl %x4, 1, %x4 /* IEU0 Group */
+ add %sum, %x3, %sum /* IEU1 */
vis2: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f60,f62,f48,f50,f52,f54,f56,f58,f58,
,LDBLK(f32), ,,STBLK,,,,,,
@@ -555,27 +557,27 @@ vis2e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),ST(f56,96),
,add %dst, 104, %dst; add %len, 192 - 6*8, %len; ba,pt %icc, e1)
.align 2048
-vis3s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- add %src, 128 - 24, %src /* IEU0 Group */
- ldda [%src-128] %asi, %f0 /* Load Group */
- ldda [%src-64] %asi, %f16 /* Load Group */
- fmovd %f0, %f54 /* FPA Group */
- fmovd %f48, %f0 /* FPA Group */
- sub %dst, 64, %dst /* IEU0 */
- fpsub32 %f2, %f2, %f2 /* FPA Group */
- fpsub32 %f4, %f4, %f4 /* FPA Group */
- fcmpgt32 %f32, %f6, %x4 /* FPM Group */
- faligndata %f6, %f8, %f48 /* FPA */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group */
- faligndata %f8, %f10, %f50 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- faligndata %f10, %f12, %f52 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- faligndata %f12, %f14, %f54 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- fmovd %f14, %f56 /* FPA */
- inc %x4 /* IEU0 */
- srl %x4, 1, %x4 /* IEU0 Group */
+vis3s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ add %src, 128 - 24, %src /* IEU0 Group */
+ ldda [%src-128] %asi, %f0 /* Load Group */
+ ldda [%src-64] %asi, %f16 /* Load Group */
+ fmovd %f0, %f54 /* FPA Group */
+ fmovd %f48, %f0 /* FPA Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fpsub32 %f2, %f2, %f2 /* FPA Group */
+ fpsub32 %f4, %f4, %f4 /* FPA Group */
+ fcmpgt32 %f32, %f6, %x4 /* FPM Group */
+ faligndata %f6, %f8, %f48 /* FPA */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group */
+ faligndata %f8, %f10, %f50 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ faligndata %f10, %f12, %f52 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ faligndata %f12, %f14, %f54 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ fmovd %f14, %f56 /* FPA */
+ inc %x4 /* IEU0 */
+ srl %x4, 1, %x4 /* IEU0 Group */
vis3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f58,f60,f62,f48,f50,f52,f54,f56,f56,
,LDBLK(f32), ,,,STBLK,,,,,
@@ -601,25 +603,25 @@ vis3e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),
,add %dst, 96, %dst; add %len, 192 - 5*8, %len; ba,pt %icc, e1)
.align 2048
-vis4s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- add %src, 128 - 32, %src /* IEU0 Group */
- ldda [%src-128] %asi, %f0 /* Load Group */
- ldda [%src-64] %asi, %f16 /* Load Group */
- fmovd %f0, %f52 /* FPA Group */
- fmovd %f48, %f0 /* FPA Group */
- sub %dst, 64, %dst /* IEU0 */
- fpsub32 %f2, %f2, %f2 /* FPA Group */
- fpsub32 %f4, %f4, %f4 /* FPA Group */
- fpsub32 %f6, %f6, %f6 /* FPA Group */
- clr %x4 /* IEU0 */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group */
- faligndata %f8, %f10, %f48 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- faligndata %f10, %f12, %f50 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- faligndata %f12, %f14, %f52 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- fmovd %f14, %f54 /* FPA */
+vis4s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ add %src, 128 - 32, %src /* IEU0 Group */
+ ldda [%src-128] %asi, %f0 /* Load Group */
+ ldda [%src-64] %asi, %f16 /* Load Group */
+ fmovd %f0, %f52 /* FPA Group */
+ fmovd %f48, %f0 /* FPA Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fpsub32 %f2, %f2, %f2 /* FPA Group */
+ fpsub32 %f4, %f4, %f4 /* FPA Group */
+ fpsub32 %f6, %f6, %f6 /* FPA Group */
+ clr %x4 /* IEU0 */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group */
+ faligndata %f8, %f10, %f48 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ faligndata %f10, %f12, %f50 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ faligndata %f12, %f14, %f52 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ fmovd %f14, %f54 /* FPA */
vis4: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f56,f58,f60,f62,f48,f50,f52,f54,f54,
,LDBLK(f32), ,,,,STBLK,,,,
@@ -645,26 +647,26 @@ vis4e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),
,add %dst, 88, %dst; add %len, 192 - 4*8, %len; ba,pt %icc, e1)
.align 2048
-vis5s: add %src, 128 - 40, %src /* IEU0 Group */
- ldda [%src-88] %asi, %f10 /* Load Group */
- ldda [%src-80] %asi, %f12 /* Load Group */
- ldda [%src-72] %asi, %f14 /* Load Group */
- wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- ldda [%src-64] %asi, %f16 /* Load Group */
- fmovd %f48, %f0 /* FPA Group */
- fmuld %f32, %f32, %f2 /* FPM */
- clr %x4 /* IEU0 */
- faddd %f32, %f32, %f4 /* FPA Group */
- fmuld %f32, %f32, %f6 /* FPM */
- clr %x5 /* IEU0 */
- faddd %f32, %f32, %f8 /* FPA Group */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- sub %dst, 64, %dst /* IEU0 */
- faligndata %f10, %f12, %f48 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- faligndata %f12, %f14, %f50 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- fmovd %f14, %f52 /* FPA */
+vis5s: add %src, 128 - 40, %src /* IEU0 Group */
+ ldda [%src-88] %asi, %f10 /* Load Group */
+ ldda [%src-80] %asi, %f12 /* Load Group */
+ ldda [%src-72] %asi, %f14 /* Load Group */
+ wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ ldda [%src-64] %asi, %f16 /* Load Group */
+ fmovd %f48, %f0 /* FPA Group */
+ fmuld %f32, %f32, %f2 /* FPM */
+ clr %x4 /* IEU0 */
+ faddd %f32, %f32, %f4 /* FPA Group */
+ fmuld %f32, %f32, %f6 /* FPM */
+ clr %x5 /* IEU0 */
+ faddd %f32, %f32, %f8 /* FPA Group */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ sub %dst, 64, %dst /* IEU0 */
+ faligndata %f10, %f12, %f48 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ faligndata %f12, %f14, %f50 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ fmovd %f14, %f52 /* FPA */
vis5: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f54,f56,f58,f60,f62,f48,f50,f52,f52,
,LDBLK(f32), ,,,,,STBLK,,,
@@ -690,25 +692,25 @@ vis5e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,,,,STBLK,ST(f48,64),ST(f50,72),
,add %dst, 80, %dst; add %len, 192 - 3*8, %len; ba,pt %icc, e1)
.align 2048
-vis6s: add %src, 128 - 48, %src /* IEU0 Group */
- ldda [%src-80] %asi, %f12 /* Load Group */
- ldda [%src-72] %asi, %f14 /* Load Group */
- wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- ldda [%src-64] %asi, %f16 /* Load Group */
- fmovd %f48, %f0 /* FPA Group */
- fmuld %f32, %f32, %f2 /* FPM */
- clr %x4 /* IEU0 */
- faddd %f32, %f32, %f4 /* FPA Group */
- fmuld %f32, %f32, %f6 /* FPM */
- clr %x5 /* IEU0 */
- faddd %f32, %f32, %f8 /* FPA Group */
- fmuld %f32, %f32, %f10 /* FPM */
- clr %x6 /* IEU0 */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- sub %dst, 64, %dst /* IEU0 */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- faligndata %f12, %f14, %f48 /* FPA */
- fmovd %f14, %f50 /* FPA Group */
+vis6s: add %src, 128 - 48, %src /* IEU0 Group */
+ ldda [%src-80] %asi, %f12 /* Load Group */
+ ldda [%src-72] %asi, %f14 /* Load Group */
+ wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ ldda [%src-64] %asi, %f16 /* Load Group */
+ fmovd %f48, %f0 /* FPA Group */
+ fmuld %f32, %f32, %f2 /* FPM */
+ clr %x4 /* IEU0 */
+ faddd %f32, %f32, %f4 /* FPA Group */
+ fmuld %f32, %f32, %f6 /* FPM */
+ clr %x5 /* IEU0 */
+ faddd %f32, %f32, %f8 /* FPA Group */
+ fmuld %f32, %f32, %f10 /* FPM */
+ clr %x6 /* IEU0 */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ faligndata %f12, %f14, %f48 /* FPA */
+ fmovd %f14, %f50 /* FPA Group */
vis6: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f52,f54,f56,f58,f60,f62,f48,f50,f50,
,LDBLK(f32), ,,,,,,STBLK,,
@@ -734,24 +736,24 @@ vis6e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,,,,,STBLK,ST(f48,64),
,add %dst, 72, %dst; add %len, 192 - 2*8, %len; ba,pt %icc, e1)
.align 2048
-vis7s: add %src, 128 - 56, %src /* IEU0 Group */
- ldda [%src-72] %asi, %f14 /* Load Group */
- wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- ldda [%src-64] %asi, %f16 /* Load Group */
- fmovd %f48, %f0 /* FPA Group */
- fmuld %f32, %f32, %f2 /* FPM */
- clr %x4 /* IEU0 */
- faddd %f32, %f32, %f4 /* FPA Group */
- fmuld %f32, %f32, %f6 /* FPM */
- clr %x5 /* IEU0 */
- faddd %f32, %f32, %f8 /* FPA Group */
- fmuld %f32, %f32, %f10 /* FPM */
- clr %x6 /* IEU0 */
- faddd %f32, %f32, %f12 /* FPA Group */
- clr %x7 /* IEU0 */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- sub %dst, 64, %dst /* IEU0 */
- fmovd %f14, %f48 /* FPA */
+vis7s: add %src, 128 - 56, %src /* IEU0 Group */
+ ldda [%src-72] %asi, %f14 /* Load Group */
+ wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ ldda [%src-64] %asi, %f16 /* Load Group */
+ fmovd %f48, %f0 /* FPA Group */
+ fmuld %f32, %f32, %f2 /* FPM */
+ clr %x4 /* IEU0 */
+ faddd %f32, %f32, %f4 /* FPA Group */
+ fmuld %f32, %f32, %f6 /* FPM */
+ clr %x5 /* IEU0 */
+ faddd %f32, %f32, %f8 /* FPA Group */
+ fmuld %f32, %f32, %f10 /* FPM */
+ clr %x6 /* IEU0 */
+ faddd %f32, %f32, %f12 /* FPA Group */
+ clr %x7 /* IEU0 */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fmovd %f14, %f48 /* FPA */
vis7: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f50,f52,f54,f56,f58,f60,f62,f48,f48,
,LDBLK(f32), ,,,,,,,STBLK,
@@ -779,112 +781,112 @@ vis7e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
e1: END_THE_TRICK1( f0,f2,f4,f6,f8,f10,f12,f14,f16,f6)
e2: END_THE_TRICK1( f16,f18,f20,f22,f24,f26,f28,f30,f32,f6)
e3: END_THE_TRICK1( f32,f34,f36,f38,f40,f42,f44,f46,f0,f6)
-ett: rd %asi, %x4 /* LSU Group+4bubbles */
- rd %gsr, %x3 /* LSU Group+4bubbles */
+ett: rd %asi, %x4 /* LSU Group+4bubbles */
+ rd %gsr, %x3 /* LSU Group+4bubbles */
#ifdef __KERNEL__
- srl %x4, 3, %x5 /* IEU0 Group */
- xor %x4, ASI_BLK_XOR1, %x4 /* IEU1 */
- wr %x4, %x5, %asi /* LSU Group+4bubbles */
+ srl %x4, 3, %x5 /* IEU0 Group */
+ xor %x4, ASI_BLK_XOR1, %x4 /* IEU1 */
+ wr %x4, %x5, %asi /* LSU Group+4bubbles */
#else
- wr %x4, ASI_BLK_XOR, %asi /* LSU Group+4bubbles */
+ wr %x4, ASI_BLK_XOR, %asi /* LSU Group+4bubbles */
#endif
- andcc %x3, 7, %x3 /* IEU1 Group */
- add %dst, 8, %dst /* IEU0 */
- bne,pn %icc, 1f /* CTI */
- fzero %f10 /* FPA */
- brz,a,pn %len, 2f /* CTI+IEU1 Group */
- std %f6, [%dst - 8] /* Store */
-1: cmp %len, 8 /* IEU1 */
- blu,pn %icc, 3f /* CTI */
- sub %src, 64, %src /* IEU0 Group */
-1: ldda [%src] %asi, %f2 /* Load Group */
- fpadd32 %f10, %f2, %f12 /* FPA Group+load stall */
- add %src, 8, %src /* IEU0 */
- add %dst, 8, %dst /* IEU1 */
- faligndata %f6, %f2, %f14 /* FPA Group */
- fcmpgt32 %f10, %f12, %x5 /* FPM Group */
- std %f14, [%dst - 16] /* Store */
- fmovd %f2, %f6 /* FPA */
- fmovd %f12, %f10 /* FPA Group */
- sub %len, 8, %len /* IEU1 */
- fzero %f16 /* FPA Group - FPU nop */
- fzero %f18 /* FPA Group - FPU nop */
- inc %x5 /* IEU0 */
- srl %x5, 1, %x5 /* IEU0 Group (regdep) */
- cmp %len, 8 /* IEU1 */
- bgeu,pt %icc, 1b /* CTI */
- add %x5, %sum, %sum /* IEU0 Group */
-3: brz,a,pt %x3, 2f /* CTI+IEU1 */
- std %f6, [%dst - 8] /* Store Group */
- st %f7, [%dst - 8] /* Store Group */
- sub %dst, 4, %dst /* IEU0 */
- add %len, 4, %len /* IEU1 */
+ andcc %x3, 7, %x3 /* IEU1 Group */
+ add %dst, 8, %dst /* IEU0 */
+ bne,pn %icc, 1f /* CTI */
+ fzero %f10 /* FPA */
+ brz,a,pn %len, 2f /* CTI+IEU1 Group */
+ std %f6, [%dst - 8] /* Store */
+1: cmp %len, 8 /* IEU1 */
+ blu,pn %icc, 3f /* CTI */
+ sub %src, 64, %src /* IEU0 Group */
+1: ldda [%src] %asi, %f2 /* Load Group */
+ fpadd32 %f10, %f2, %f12 /* FPA Group+load stall*/
+ add %src, 8, %src /* IEU0 */
+ add %dst, 8, %dst /* IEU1 */
+ faligndata %f6, %f2, %f14 /* FPA Group */
+ fcmpgt32 %f10, %f12, %x5 /* FPM Group */
+ std %f14, [%dst - 16] /* Store */
+ fmovd %f2, %f6 /* FPA */
+ fmovd %f12, %f10 /* FPA Group */
+ sub %len, 8, %len /* IEU1 */
+ fzero %f16 /* FPA Group - FPU nop */
+ fzero %f18 /* FPA Group - FPU nop */
+ inc %x5 /* IEU0 */
+ srl %x5, 1, %x5 /* IEU0 Group (regdep) */
+ cmp %len, 8 /* IEU1 */
+ bgeu,pt %icc, 1b /* CTI */
+ add %x5, %sum, %sum /* IEU0 Group */
+3: brz,a,pt %x3, 2f /* CTI+IEU1 */
+ std %f6, [%dst - 8] /* Store Group */
+ st %f7, [%dst - 8] /* Store Group */
+ sub %dst, 4, %dst /* IEU0 */
+ add %len, 4, %len /* IEU1 */
2:
#ifdef __KERNEL__
- sub %sp, 8, %sp /* IEU0 Group */
+ sub %sp, 8, %sp /* IEU0 Group */
#endif
END_THE_TRICK2( f48,f50,f52,f54,f56,f58,f60,f10,f12,f62)
- membar #Sync /* LSU Group */
+ membar #Sync /* LSU Group */
#ifdef __KERNEL__
VISExit
- add %sp, 8, %sp /* IEU0 Group */
+ add %sp, 8, %sp /* IEU0 Group */
#endif
-23: brnz,pn %len, 26f /* CTI+IEU1 Group */
-24: sllx %sum, 32, %g1 /* IEU0 */
-25: addcc %sum, %g1, %src /* IEU1 Group */
- srlx %src, 32, %src /* IEU0 Group (regdep) */
- bcs,a,pn %xcc, 1f /* CTI */
- add %src, 1, %src /* IEU1 */
+23: brnz,pn %len, 26f /* CTI+IEU1 Group */
+24: sllx %sum, 32, %g1 /* IEU0 */
+25: addcc %sum, %g1, %src /* IEU1 Group */
+ srlx %src, 32, %src /* IEU0 Group (regdep) */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %src, 1, %src /* IEU1 */
#ifndef __KERNEL__
-1: retl /* CTI Group brk forced */
- srl %src, 0, %src /* IEU0 */
+1: retl /* CTI Group brk forced*/
+ srl %src, 0, %src /* IEU0 */
#else
-1: sethi %uhi(PAGE_OFFSET), %g4 /* IEU0 Group */
- retl /* CTI Group brk forced */
- sllx %g4, 32, %g4 /* IEU0 */
+1: sethi %uhi(PAGE_OFFSET), %g4 /* IEU0 Group */
+ retl /* CTI Group brk forced*/
+ sllx %g4, 32, %g4 /* IEU0 */
#endif
-26: andcc %len, 8, %g0 /* IEU1 Group */
- be,pn %icc, 1f /* CTI */
- lduwa [%src] %asi, %o4 /* Load */
- lduwa [%src+4] %asi, %g2 /* Load Group */
- add %src, 8, %src /* IEU0 */
- add %dst, 8, %dst /* IEU1 */
- sllx %o4, 32, %g5 /* IEU0 Group */
- stw %o4, [%dst - 8] /* Store */
- or %g5, %g2, %g5 /* IEU0 Group */
- stw %g2, [%dst - 4] /* Store */
- addcc %g5, %sum, %sum /* IEU1 Group */
- bcs,a,pn %xcc, 1f /* CTI */
- add %sum, 1, %sum /* IEU0 */
-1: andcc %len, 4, %g0 /* IEU1 Group */
- be,a,pn %icc, 1f /* CTI */
- clr %g2 /* IEU0 */
- lduwa [%src] %asi, %g7 /* Load */
- add %src, 4, %src /* IEU0 Group */
- add %dst, 4, %dst /* IEU1 */
- sllx %g7, 32, %g2 /* IEU0 Group */
- stw %g7, [%dst - 4] /* Store */
-1: andcc %len, 2, %g0 /* IEU1 */
- be,a,pn %icc, 1f /* CTI */
- clr %g3 /* IEU0 Group */
- lduha [%src] %asi, %g7 /* Load */
- add %src, 2, %src /* IEU1 */
- add %dst, 2, %dst /* IEU0 Group */
- sll %g7, 16, %g3 /* IEU0 Group */
- sth %g7, [%dst - 2] /* Store */
-1: andcc %len, 1, %g0 /* IEU1 */
- be,a,pn %icc, 1f /* CTI */
- clr %o5 /* IEU0 Group */
- lduba [%src] %asi, %g7 /* Load */
- sll %g7, 8, %o5 /* IEU0 Group */
- stb %g7, [%dst] /* Store */
-1: or %g2, %g3, %g3 /* IEU1 */
- or %o5, %g3, %g3 /* IEU0 Group (regdep) */
- addcc %g3, %sum, %sum /* IEU1 Group (regdep) */
- bcs,a,pn %xcc, 1f /* CTI */
- add %sum, 1, %sum /* IEU0 */
-1: ba,pt %xcc, 25b /* CTI Group */
- sllx %sum, 32, %g1 /* IEU0 */
+26: andcc %len, 8, %g0 /* IEU1 Group */
+ be,pn %icc, 1f /* CTI */
+ lduwa [%src] %asi, %o4 /* Load */
+ lduwa [%src+4] %asi, %g2 /* Load Group */
+ add %src, 8, %src /* IEU0 */
+ add %dst, 8, %dst /* IEU1 */
+ sllx %o4, 32, %g5 /* IEU0 Group */
+ stw %o4, [%dst - 8] /* Store */
+ or %g5, %g2, %g5 /* IEU0 Group */
+ stw %g2, [%dst - 4] /* Store */
+ addcc %g5, %sum, %sum /* IEU1 Group */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %sum, 1, %sum /* IEU0 */
+1: andcc %len, 4, %g0 /* IEU1 Group */
+ be,a,pn %icc, 1f /* CTI */
+ clr %g2 /* IEU0 */
+ lduwa [%src] %asi, %g7 /* Load */
+ add %src, 4, %src /* IEU0 Group */
+ add %dst, 4, %dst /* IEU1 */
+ sllx %g7, 32, %g2 /* IEU0 Group */
+ stw %g7, [%dst - 4] /* Store */
+1: andcc %len, 2, %g0 /* IEU1 */
+ be,a,pn %icc, 1f /* CTI */
+ clr %g3 /* IEU0 Group */
+ lduha [%src] %asi, %g7 /* Load */
+ add %src, 2, %src /* IEU1 */
+ add %dst, 2, %dst /* IEU0 Group */
+ sll %g7, 16, %g3 /* IEU0 Group */
+ sth %g7, [%dst - 2] /* Store */
+1: andcc %len, 1, %g0 /* IEU1 */
+ be,a,pn %icc, 1f /* CTI */
+ clr %o5 /* IEU0 Group */
+ lduba [%src] %asi, %g7 /* Load */
+ sll %g7, 8, %o5 /* IEU0 Group */
+ stb %g7, [%dst] /* Store */
+1: or %g2, %g3, %g3 /* IEU1 */
+ or %o5, %g3, %g3 /* IEU0 Group (regdep) */
+ addcc %g3, %sum, %sum /* IEU1 Group (regdep) */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %sum, 1, %sum /* IEU0 */
+1: ba,pt %xcc, 25b /* CTI Group */
+ sllx %sum, 32, %g1 /* IEU0 */
#ifdef __KERNEL__
end:
diff --git a/arch/sparc64/lib/VIScsumcopyusr.S b/arch/sparc64/lib/VIScsumcopyusr.S
index 17bbe78b1..4730a1c08 100644
--- a/arch/sparc64/lib/VIScsumcopyusr.S
+++ b/arch/sparc64/lib/VIScsumcopyusr.S
@@ -1,4 +1,4 @@
-/* $Id: VIScsumcopyusr.S,v 1.1 2000/01/19 04:06:04 davem Exp $
+/* $Id: VIScsumcopyusr.S,v 1.2 2000/02/20 23:21:40 davem Exp $
* VIScsumcopyusr.S: High bandwidth IP checksumming with simultaneous
* copying utilizing the UltraSparc Visual Instruction Set.
*
@@ -91,358 +91,360 @@
#define DO_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14,DUMMY1,A0,A2,A4,A6,A8,A10,A12,A14,B14,DUMMY2,LOAD,STORE1,STORE2,STORE3,STORE4,STORE5,STORE6,STORE7,STORE8,DUMMY3,BRANCH...) \
- LOAD /* Load Group */; \
- faligndata %A14, %F0, %A14 /* FPA Group */; \
- inc %x5 /* IEU0 */; \
- STORE1 /* Store (optional) */; \
- faligndata %F0, %F2, %A0 /* FPA Group */; \
- srl %x5, 1, %x5 /* IEU0 */; \
- add %sum, %x4, %sum /* IEU1 */; \
- fpadd32 %F0, %f0, %F0 /* FPA Group */; \
- inc %x6 /* IEU0 */; \
- STORE2 /* Store (optional) */; \
- faligndata %F2, %F4, %A2 /* FPA Group */; \
- srl %x6, 1, %x6 /* IEU0 */; \
- add %sum, %x5, %sum /* IEU1 */; \
- fpadd32 %F2, %f2, %F2 /* FPA Group */; \
- add %src, 64, %src /* IEU0 */; \
- add %dst, 64, %dst /* IEU1 */; \
- fcmpgt32 %f0, %F0, %x1 /* FPM Group */; \
- inc %x7 /* IEU0 */; \
- STORE3 /* Store (optional) */; \
- faligndata %F4, %F6, %A4 /* FPA */; \
- srl %x7, 1, %x7 /* IEU0 Group */; \
- add %sum, %x6, %sum /* IEU1 */; \
- fpadd32 %F4, %f4, %F4 /* FPA */; \
- fcmpgt32 %f2, %F2, %x2 /* FPM Group */; \
- inc %x8 /* IEU0 */; \
- STORE4 /* Store (optional) */; \
- faligndata %F6, %F8, %A6 /* FPA */; \
- srl %x8, 1, %x8 /* IEU0 Group */; \
- add %sum, %x7, %sum /* IEU1 */; \
- fpadd32 %F6, %f6, %F6 /* FPA */; \
- fcmpgt32 %f4, %F4, %x3 /* FPM Group */; \
- inc %x1 /* IEU0 */; \
- STORE5 /* Store (optional) */; \
- faligndata %F8, %F10, %A8 /* FPA */; \
- srl %x1, 1, %x1 /* IEU0 Group */; \
- add %sum, %x8, %sum /* IEU1 */; \
- fpadd32 %F8, %f8, %F8 /* FPA */; \
- fcmpgt32 %f6, %F6, %x4 /* FPM Group */; \
- inc %x2 /* IEU0 */; \
- STORE6 /* Store (optional) */; \
- faligndata %F10, %F12, %A10 /* FPA */; \
- srl %x2, 1, %x2 /* IEU0 Group */; \
- add %sum, %x1, %sum /* IEU1 */; \
- fpadd32 %F10, %f10, %F10 /* FPA */; \
- fcmpgt32 %f8, %F8, %x5 /* FPM Group */; \
- inc %x3 /* IEU0 */; \
- STORE7 /* Store (optional) */; \
- faligndata %F12, %F14, %A12 /* FPA */; \
- srl %x3, 1, %x3 /* IEU0 Group */; \
- add %sum, %x2, %sum /* IEU1 */; \
- fpadd32 %F12, %f12, %F12 /* FPA */; \
- fcmpgt32 %f10, %F10, %x6 /* FPM Group */; \
- inc %x4 /* IEU0 */; \
- STORE8 /* Store (optional) */; \
- fmovd %F14, %B14 /* FPA */; \
- srl %x4, 1, %x4 /* IEU0 Group */; \
- add %sum, %x3, %sum /* IEU1 */; \
- fpadd32 %F14, %f14, %F14 /* FPA */; \
- fcmpgt32 %f12, %F12, %x7 /* FPM Group */; \
- subcc %len, 64, %len /* IEU1 */; \
- BRANCH /* CTI */; \
- fcmpgt32 %f14, %F14, %x8 /* FPM Group */; \
+ LOAD /* Load (Group) */; \
+ faligndata %A14, %F0, %A14 /* FPA Group */; \
+ inc %x5 /* IEU0 */; \
+ STORE1 /* Store (optional) */; \
+ faligndata %F0, %F2, %A0 /* FPA Group */; \
+ srl %x5, 1, %x5 /* IEU0 */; \
+ add %sum, %x4, %sum /* IEU1 */; \
+ fpadd32 %F0, %f0, %F0 /* FPA Group */; \
+ inc %x6 /* IEU0 */; \
+ STORE2 /* Store (optional) */; \
+ faligndata %F2, %F4, %A2 /* FPA Group */; \
+ srl %x6, 1, %x6 /* IEU0 */; \
+ add %sum, %x5, %sum /* IEU1 */; \
+ fpadd32 %F2, %f2, %F2 /* FPA Group */; \
+ add %src, 64, %src /* IEU0 */; \
+ fcmpgt32 %f0, %F0, %x1 /* FPM */; \
+ add %dst, 64, %dst /* IEU1 Group */; \
+ inc %x7 /* IEU0 */; \
+ STORE3 /* Store (optional) */; \
+ faligndata %F4, %F6, %A4 /* FPA */; \
+ fpadd32 %F4, %f4, %F4 /* FPA Group */; \
+ add %sum, %x6, %sum /* IEU1 */; \
+ fcmpgt32 %f2, %F2, %x2 /* FPM */; \
+ srl %x7, 1, %x7 /* IEU0 Group */; \
+ inc %x8 /* IEU1 */; \
+ STORE4 /* Store (optional) */; \
+ faligndata %F6, %F8, %A6 /* FPA */; \
+ fpadd32 %F6, %f6, %F6 /* FPA Group */; \
+ srl %x8, 1, %x8 /* IEU0 */; \
+ fcmpgt32 %f4, %F4, %x3 /* FPM */; \
+ add %sum, %x7, %sum /* IEU0 Group */; \
+ inc %x1 /* IEU1 */; \
+ STORE5 /* Store (optional) */; \
+ faligndata %F8, %F10, %A8 /* FPA */; \
+ fpadd32 %F8, %f8, %F8 /* FPA Group */; \
+ srl %x1, 1, %x1 /* IEU0 */; \
+ fcmpgt32 %f6, %F6, %x4 /* FPM */; \
+ add %sum, %x8, %sum /* IEU0 Group */; \
+ inc %x2 /* IEU1 */; \
+ STORE6 /* Store (optional) */; \
+ faligndata %F10, %F12, %A10 /* FPA */; \
+ fpadd32 %F10, %f10, %F10 /* FPA Group */; \
+ srl %x2, 1, %x2 /* IEU0 */; \
+ fcmpgt32 %f8, %F8, %x5 /* FPM */; \
+ add %sum, %x1, %sum /* IEU0 Group */; \
+ inc %x3 /* IEU1 */; \
+ STORE7 /* Store (optional) */; \
+ faligndata %F12, %F14, %A12 /* FPA */; \
+ fpadd32 %F12, %f12, %F12 /* FPA Group */; \
+ srl %x3, 1, %x3 /* IEU0 */; \
+ fcmpgt32 %f10, %F10, %x6 /* FPM */; \
+ add %sum, %x2, %sum /* IEU0 Group */; \
+ inc %x4 /* IEU1 */; \
+ STORE8 /* Store (optional) */; \
+ fmovd %F14, %B14 /* FPA */; \
+ fpadd32 %F14, %f14, %F14 /* FPA Group */; \
+ srl %x4, 1, %x4 /* IEU0 */; \
+ fcmpgt32 %f12, %F12, %x7 /* FPM */; \
+ add %sum, %x3, %sum /* IEU0 Group */; \
+ subcc %len, 64, %len /* IEU1 */; \
+ BRANCH /* CTI */; \
+ fcmpgt32 %f14, %F14, %x8 /* FPM Group */;
#define END_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB,S0,S1,S2,S3,T0,T1,U0,fz) \
- inc %x5 /* IEU0 Group */; \
- fpadd32 %f2, %f0, %S0 /* FPA */; \
- srl %x5, 1, %x5 /* IEU0 Group */; \
- add %sum, %x4, %sum /* IEU1 */; \
- fpadd32 %f6, %f4, %S1 /* FPA */; \
- inc %x6 /* IEU0 Group */; \
- add %sum, %x5, %sum /* IEU1 */; \
- fcmpgt32 %f0, %S0, %x1 /* FPM Group */; \
- srl %x6, 1, %x6 /* IEU0 */; \
- inc %x7 /* IEU1 */; \
- fpadd32 %f10, %f8, %S2 /* FPA */; \
- fcmpgt32 %f4, %S1, %x2 /* FPM Group */; \
- srl %x7, 1, %x7 /* IEU0 */; \
- add %sum, %x6, %sum /* IEU1 */; \
- fpadd32 %f14, %f12, %S3 /* FPA */; \
- inc %x8 /* IEU0 Group */; \
- add %sum, %x7, %sum /* IEU1 */; \
- fzero %fz /* FPA */; \
- fcmpgt32 %f8, %S2, %x3 /* FPM Group */; \
- srl %x8, 1, %x8 /* IEU0 */; \
- inc %x1 /* IEU1 */; \
- fpadd32 %S0, %S1, %T0 /* FPA */; \
- fcmpgt32 %f12, %S3, %x4 /* FPM Group */; \
- srl %x1, 1, %x1 /* IEU0 */; \
- add %sum, %x8, %sum /* IEU1 */; \
- fpadd32 %S2, %S3, %T1 /* FPA */; \
- inc %x2 /* IEU0 Group */; \
- add %sum, %x1, %sum /* IEU1 */; \
- fcmpgt32 %S0, %T0, %x5 /* FPM Group */; \
- srl %x2, 1, %x2 /* IEU0 */; \
- inc %x3 /* IEU1 */; \
- fcmpgt32 %S2, %T1, %x6 /* FPM Group */; \
- srl %x3, 1, %x3 /* IEU0 */; \
- add %sum, %x2, %sum /* IEU1 */; \
- inc %x4 /* IEU0 Group */; \
- add %sum, %x3, %sum /* IEU1 */; \
- fcmpgt32 %fz, %f2, %x7 /* FPM Group */; \
- srl %x4, 1, %x4 /* IEU0 */; \
- inc %x5 /* IEU1 */; \
- fpadd32 %T0, %T1, %U0 /* FPA */; \
- fcmpgt32 %fz, %f6, %x8 /* FPM Group */; \
- srl %x5, 1, %x5 /* IEU0 */; \
- add %sum, %x4, %sum /* IEU1 */; \
- inc %x6 /* IEU0 Group */; \
- add %sum, %x5, %sum /* IEU1 */; \
- fcmpgt32 %fz, %f10, %x1 /* FPM Group */; \
- srl %x6, 1, %x6 /* IEU0 */; \
- inc %x7 /* IEU1 */; \
- fcmpgt32 %fz, %f14, %x2 /* FPM Group */; \
- ba,pt %xcc, ett /* CTI */; \
- fmovd %FA, %FB /* FPA */; \
+ inc %x5 /* IEU0 Group */; \
+ fpadd32 %f2, %f0, %S0 /* FPA */; \
+ add %sum, %x4, %sum /* IEU1 */; \
+ srl %x5, 1, %x5 /* IEU0 Group */; \
+ fpadd32 %f6, %f4, %S1 /* FPA */; \
+ inc %x6 /* IEU1 */; \
+ fpadd32 %f10, %f8, %S2 /* FPA Group */; \
+ add %sum, %x5, %sum /* IEU0 */; \
+ fcmpgt32 %f0, %S0, %x1 /* FPM */; \
+ fpadd32 %f14, %f12, %S3 /* FPA Group */; \
+ srl %x6, 1, %x6 /* IEU0 */; \
+ fcmpgt32 %f4, %S1, %x2 /* FPM */; \
+ add %sum, %x6, %sum /* IEU0 Group */; \
+ fzero %fz /* FPA */; \
+ fcmpgt32 %f8, %S2, %x3 /* FPM */; \
+ inc %x7 /* IEU0 Group */; \
+ inc %x8 /* IEU1 */; \
+ srl %x7, 1, %x7 /* IEU0 Group */; \
+ inc %x1 /* IEU1 */; \
+ fpadd32 %S0, %S1, %T0 /* FPA */; \
+ fpadd32 %S2, %S3, %T1 /* FPA Group */; \
+ add %sum, %x7, %sum /* IEU0 */; \
+ fcmpgt32 %f12, %S3, %x4 /* FPM */; \
+ srl %x8, 1, %x8 /* IEU0 Group */; \
+ inc %x2 /* IEU1 */; \
+ srl %x1, 1, %x1 /* IEU0 Group */; \
+ add %sum, %x8, %sum /* IEU1 */; \
+ add %sum, %x1, %sum /* IEU0 Group */; \
+ fcmpgt32 %S0, %T0, %x5 /* FPM */; \
+ srl %x2, 1, %x2 /* IEU0 Group */; \
+ fcmpgt32 %S2, %T1, %x6 /* FPM */; \
+ inc %x3 /* IEU0 Group */; \
+ add %sum, %x2, %sum /* IEU1 */; \
+ srl %x3, 1, %x3 /* IEU0 Group */; \
+ inc %x4 /* IEU1 */; \
+ fpadd32 %T0, %T1, %U0 /* FPA Group */; \
+ add %sum, %x3, %sum /* IEU0 */; \
+ fcmpgt32 %fz, %f2, %x7 /* FPM */; \
+ srl %x4, 1, %x4 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f6, %x8 /* FPM */; \
+ inc %x5 /* IEU0 Group */; \
+ add %sum, %x4, %sum /* IEU1 */; \
+ srl %x5, 1, %x5 /* IEU0 Group */; \
+ fcmpgt32 %fz, %f10, %x1 /* FPM */; \
+ inc %x6 /* IEU0 Group */; \
+ add %sum, %x5, %sum /* IEU1 */; \
+ fmovd %FA, %FB /* FPA Group */; \
+ fcmpgt32 %fz, %f14, %x2 /* FPM */; \
+ srl %x6, 1, %x6 /* IEU0 Group */; \
+ ba,pt %xcc, ett /* CTI */; \
+ inc %x7 /* IEU1 */;
-#define END_THE_TRICK1(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB) \
+#define END_THE_TRICK1(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB) \
END_THE_TRICK(f0,f2,f4,f6,f8,f10,f12,f14,FA,FB,f48,f50,f52,f54,f56,f58,f60,f62)
-#define END_THE_TRICK2(S0,S1,S2,S3,T0,T1,U0,U1,V0,fz) \
- fpadd32 %U0, %U1, %V0 /* FPA Group */; \
- srl %x7, 1, %x7 /* IEU0 */; \
- add %sum, %x6, %sum /* IEU1 */; \
- std %V0, [%sp + STACKOFF] /* Store Group */; \
- inc %x8 /* IEU0 */; \
- sub %sum, %x7, %sum /* IEU1 */; \
- fcmpgt32 %fz, %S1, %x3 /* FPM Group */; \
- srl %x8, 1, %x8 /* IEU0 */; \
- inc %x1 /* IEU1 */; \
- fcmpgt32 %fz, %S3, %x4 /* FPM Group */; \
- srl %x1, 1, %x1 /* IEU0 */; \
- sub %sum, %x8, %sum /* IEU1 */; \
- ldx [%sp + STACKOFF], %x8 /* Load Group */; \
- inc %x2 /* IEU0 */; \
- sub %sum, %x1, %sum /* IEU1 */; \
- fcmpgt32 %fz, %T1, %x5 /* FPM Group */; \
- srl %x2, 1, %x2 /* IEU0 */; \
- inc %x3 /* IEU1 */; \
- fcmpgt32 %T0, %U0, %x6 /* FPM Group */; \
- srl %x3, 1, %x3 /* IEU0 */; \
- sub %sum, %x2, %sum /* IEU1 */; \
- inc %x4 /* IEU0 Group */; \
- sub %sum, %x3, %sum /* IEU1 */; \
- fcmpgt32 %fz, %U1, %x7 /* FPM Group */; \
- srl %x4, 1, %x4 /* IEU0 */; \
- inc %x5 /* IEU1 */; \
- fcmpgt32 %U0, %V0, %x1 /* FPM Group */; \
- srl %x5, 1, %x5 /* IEU0 */; \
- sub %sum, %x4, %sum /* IEU1 */; \
- fcmpgt32 %fz, %V0, %x2 /* FPM Group */; \
- inc %x6 /* IEU0 */; \
- sub %sum, %x5, %sum /* IEU1 */; \
- srl %x6, 1, %x6 /* IEU0 Group */; \
- inc %x7 /* IEU1 */; \
- srl %x7, 1, %x7 /* IEU0 Group */; \
- add %sum, %x6, %sum /* IEU1 */; \
- inc %x1 /* IEU0 Group */; \
- sub %sum, %x7, %sum /* IEU1 */; \
- srl %x1, 1, %x1 /* IEU0 Group */; \
- inc %x2 /* IEU1 */; \
- srl %x2, 1, %x2 /* IEU0 Group */; \
- add %sum, %x1, %sum /* IEU1 */; \
- sub %sum, %x2, %sum /* IEU0 Group */; \
- addcc %sum, %x8, %sum /* IEU Group */; \
- bcs,a,pn %xcc, 33f /* CTI */; \
- add %sum, 1, %sum /* IEU0 */; \
-33: /* That's it */;
+#define END_THE_TRICK2(S0,S1,S2,S3,T0,T1,U0,U1,V0,fz) \
+ fpadd32 %U0, %U1, %V0 /* FPA Group */; \
+ srl %x7, 1, %x7 /* IEU0 */; \
+ add %sum, %x6, %sum /* IEU1 */; \
+ std %V0, [%sp + STACKOFF] /* Store Group */; \
+ inc %x8 /* IEU0 */; \
+ sub %sum, %x7, %sum /* IEU1 */; \
+ srl %x8, 1, %x8 /* IEU0 Group */; \
+ fcmpgt32 %fz, %S1, %x3 /* FPM */; \
+ inc %x1 /* IEU0 Group */; \
+ fcmpgt32 %fz, %S3, %x4 /* FPM */; \
+ srl %x1, 1, %x1 /* IEU0 Group */; \
+ sub %sum, %x8, %sum /* IEU1 */; \
+ ldx [%sp + STACKOFF], %x8 /* Load Group */; \
+ inc %x2 /* IEU0 */; \
+ sub %sum, %x1, %sum /* IEU1 */; \
+ srl %x2, 1, %x2 /* IEU0 Group */; \
+ fcmpgt32 %fz, %T1, %x5 /* FPM */; \
+ inc %x3 /* IEU0 Group */; \
+ fcmpgt32 %T0, %U0, %x6 /* FPM */; \
+ srl %x3, 1, %x3 /* IEU0 Group */; \
+ sub %sum, %x2, %sum /* IEU1 */; \
+ inc %x4 /* IEU0 Group */; \
+ sub %sum, %x3, %sum /* IEU1 */; \
+ srl %x4, 1, %x4 /* IEU0 Group */; \
+ fcmpgt32 %fz, %U1, %x7 /* FPM */; \
+ inc %x5 /* IEU0 Group */; \
+ fcmpgt32 %U0, %V0, %x1 /* FPM */; \
+ srl %x5, 1, %x5 /* IEU0 Group */; \
+ sub %sum, %x4, %sum /* IEU1 */; \
+ sub %sum, %x5, %sum /* IEU0 Group */; \
+ fcmpgt32 %fz, %V0, %x2 /* FPM */; \
+ inc %x6 /* IEU0 Group */; \
+ inc %x7 /* IEU1 */; \
+ srl %x6, 1, %x6 /* IEU0 Group */; \
+ inc %x1 /* IEU1 */; \
+ srl %x7, 1, %x7 /* IEU0 Group */; \
+ add %sum, %x6, %sum /* IEU1 */; \
+ srl %x1, 1, %x1 /* IEU0 Group */; \
+ sub %sum, %x7, %sum /* IEU1 */; \
+ inc %x2 /* IEU0 Group */; \
+ add %sum, %x1, %sum /* IEU1 */; \
+ srl %x2, 1, %x2 /* IEU0 Group */; \
+ sub %sum, %x2, %sum /* IEU0 Group */; \
+ addcc %sum, %x8, %sum /* IEU1 Group */; \
+ bcs,a,pn %xcc, 33f /* CTI */; \
+ add %sum, 1, %sum /* IEU0 (Group) */; \
+33: /* That's it */;
.text
.globl csum_partial_copy_user_vis
.align 32
-/* %asi should be either ASI_P or ASI_AIUS for csum_partial_copy resp. csum_partial_copy_from_user */
-/* This assumes that !((%src^%dst)&3) && !((%src|%dst)&1) && %len >= 256 */
+/* %asi should be either ASI_P or ASI_AIUS for csum_partial_copy resp.
+ * csum_partial_copy_from_user
+ * This assumes that !((%src^%dst)&3) && !((%src|%dst)&1) && %len >= 256
+ */
csum_partial_copy_user_vis:
- andcc %dst, 7, %g0 /* IEU1 Group */
- be,pt %icc, 4f /* CTI */
- and %dst, 0x38, %o4 /* IEU0 */
- mov 1, %g5 /* IEU0 Group */
- andcc %dst, 2, %g0 /* IEU1 */
- be,pt %icc, 1f /* CTI */
- and %dst, 4, %g7 /* IEU0 Group */
- lduh [%src], %g2 /* Load */
- sub %len, 2, %len /* IEU0 Group */
- add %dst, 2, %dst /* IEU1 */
- andcc %dst, 4, %g7 /* IEU1 Group */
- sll %g5, 16, %g5 /* IEU0 */
- stha %g2, [%dst - 2] %asi /* Store Group */
- sll %g2, 16, %g2 /* IEU0 */
- add %src, 2, %src /* IEU1 */
- addcc %g2, %sum, %sum /* IEU1 Group */
- bcs,a,pn %icc, 1f /* CTI */
- add %sum, %g5, %sum /* IEU0 */
-1: lduw [%src], %g2 /* Load */
- brz,a,pn %g7, 4f /* CTI+IEU1 Group */
- and %dst, 0x38, %o4 /* IEU0 */
- add %dst, 4, %dst /* IEU0 Group */
- sub %len, 4, %len /* IEU1 */
- addcc %g2, %sum, %sum /* IEU1 Group */
- bcs,a,pn %icc, 1f /* CTI */
- add %sum, 1, %sum /* IEU0 */
-1: and %dst, 0x38, %o4 /* IEU0 Group */
- stwa %g2, [%dst - 4] %asi /* Store */
- add %src, 4, %src /* IEU1 */
+ andcc %dst, 7, %g0 /* IEU1 Group */
+ be,pt %icc, 4f /* CTI */
+ and %dst, 0x38, %o4 /* IEU0 */
+ mov 1, %g5 /* IEU0 Group */
+ andcc %dst, 2, %g0 /* IEU1 */
+ be,pt %icc, 1f /* CTI */
+ and %dst, 4, %g7 /* IEU0 Group */
+ lduh [%src], %g2 /* Load */
+ sub %len, 2, %len /* IEU0 Group */
+ add %dst, 2, %dst /* IEU1 */
+ andcc %dst, 4, %g7 /* IEU1 Group */
+ sll %g5, 16, %g5 /* IEU0 */
+ stha %g2, [%dst - 2] %asi /* Store Group */
+ sll %g2, 16, %g2 /* IEU0 */
+ add %src, 2, %src /* IEU1 */
+ addcc %g2, %sum, %sum /* IEU1 Group */
+ bcs,a,pn %icc, 1f /* CTI */
+ add %sum, %g5, %sum /* IEU0 */
+1: lduw [%src], %g2 /* Load */
+ brz,a,pn %g7, 4f /* CTI+IEU1 Group */
+ and %dst, 0x38, %o4 /* IEU0 */
+ add %dst, 4, %dst /* IEU0 Group */
+ sub %len, 4, %len /* IEU1 */
+ addcc %g2, %sum, %sum /* IEU1 Group */
+ bcs,a,pn %icc, 1f /* CTI */
+ add %sum, 1, %sum /* IEU0 */
+1: and %dst, 0x38, %o4 /* IEU0 Group */
+ stwa %g2, [%dst - 4] %asi /* Store */
+ add %src, 4, %src /* IEU1 */
4:
#ifdef __KERNEL__
VISEntry
#endif
- mov %src, %g7 /* IEU1 Group */
- fzero %f48 /* FPA */
- alignaddr %src, %g0, %src /* Single Group */
- subcc %g7, %src, %g7 /* IEU1 Group */
- be,pt %xcc, 1f /* CTI */
- mov 0x40, %g1 /* IEU0 */
- lduw [%src], %g2 /* Load Group */
- subcc %sum, %g2, %sum /* IEU1 Group+load stall */
- bcs,a,pn %icc, 1f /* CTI */
- sub %sum, 1, %sum /* IEU0 */
-1: srl %sum, 0, %sum /* IEU0 Group */
- clr %g5 /* IEU1 */
- brz,pn %o4, 3f /* CTI+IEU1 Group */
- sub %g1, %o4, %g1 /* IEU0 */
- ldd [%src], %f0 /* Load */
- clr %o4 /* IEU0 Group */
- andcc %dst, 8, %g0 /* IEU1 */
- be,pn %icc, 1f /* CTI */
- ldd [%src + 8], %f2 /* Load Group */
- add %src, 8, %src /* IEU0 */
- sub %len, 8, %len /* IEU1 */
- fpadd32 %f0, %f48, %f50 /* FPA */
- addcc %dst, 8, %dst /* IEU1 Group */
- faligndata %f0, %f2, %f16 /* FPA */
- fcmpgt32 %f48, %f50, %o4 /* FPM Group */
- fmovd %f2, %f0 /* FPA Group */
- ldd [%src + 8], %f2 /* Load */
- stda %f16, [%dst - 8] %asi /* Store */
- fmovd %f50, %f48 /* FPA */
-1: andcc %g1, 0x10, %g0 /* IEU1 Group */
- be,pn %icc, 1f /* CTI */
- and %g1, 0x20, %g1 /* IEU0 */
- fpadd32 %f0, %f48, %f50 /* FPA */
- ldd [%src + 16], %f4 /* Load Group */
- add %src, 16, %src /* IEU0 */
- add %dst, 16, %dst /* IEU1 */
- faligndata %f0, %f2, %f16 /* FPA */
- fcmpgt32 %f48, %f50, %g5 /* FPM Group */
- sub %len, 16, %len /* IEU0 */
- inc %o4 /* IEU1 */
- stda %f16, [%dst - 16] %asi /* Store Group */
- fpadd32 %f2, %f50, %f48 /* FPA */
- srl %o4, 1, %o5 /* IEU0 */
- faligndata %f2, %f4, %f18 /* FPA Group */
- stda %f18, [%dst - 8] %asi /* Store */
- fcmpgt32 %f50, %f48, %o4 /* FPM Group */
- add %o5, %sum, %sum /* IEU0 */
- ldd [%src + 8], %f2 /* Load */
- fmovd %f4, %f0 /* FPA */
-1: brz,a,pn %g1, 4f /* CTI+IEU1 Group */
- rd %asi, %g2 /* LSU Group + 4 bubbles */
- inc %g5 /* IEU0 */
- fpadd32 %f0, %f48, %f50 /* FPA */
- ldd [%src + 16], %f4 /* Load Group */
- srl %g5, 1, %g5 /* IEU0 */
- add %dst, 32, %dst /* IEU1 */
- faligndata %f0, %f2, %f16 /* FPA */
- fcmpgt32 %f48, %f50, %o5 /* FPM Group */
- inc %o4 /* IEU0 */
- ldd [%src + 24], %f6 /* Load */
- srl %o4, 1, %o4 /* IEU0 Group */
- add %g5, %sum, %sum /* IEU1 */
- ldd [%src + 32], %f8 /* Load */
- fpadd32 %f2, %f50, %f48 /* FPA */
- faligndata %f2, %f4, %f18 /* FPA Group */
- sub %len, 32, %len /* IEU0 */
- stda %f16, [%dst - 32] %asi /* Store */
- fcmpgt32 %f50, %f48, %g3 /* FPM Group */
- inc %o5 /* IEU0 */
- add %o4, %sum, %sum /* IEU1 */
- fpadd32 %f4, %f48, %f50 /* FPA */
- faligndata %f4, %f6, %f20 /* FPA Group */
- srl %o5, 1, %o5 /* IEU0 */
- fcmpgt32 %f48, %f50, %g5 /* FPM Group */
- add %o5, %sum, %sum /* IEU0 */
- stda %f18, [%dst - 24] %asi /* Store */
- fpadd32 %f6, %f50, %f48 /* FPA */
- inc %g3 /* IEU0 Group */
- stda %f20, [%dst - 16] %asi /* Store */
- add %src, 32, %src /* IEU1 */
- faligndata %f6, %f8, %f22 /* FPA */
- fcmpgt32 %f50, %f48, %o4 /* FPM Group */
- srl %g3, 1, %g3 /* IEU0 */
- stda %f22, [%dst - 8] %asi /* Store */
- add %g3, %sum, %sum /* IEU0 Group */
-3: rd %asi, %g2 /* LSU Group + 4 bubbles */
+ mov %src, %g7 /* IEU1 Group */
+ fzero %f48 /* FPA */
+ alignaddr %src, %g0, %src /* Single Group */
+ subcc %g7, %src, %g7 /* IEU1 Group */
+ be,pt %xcc, 1f /* CTI */
+ mov 0x40, %g1 /* IEU0 */
+ lduw [%src], %g2 /* Load Group */
+ subcc %sum, %g2, %sum /* IEU1 Group+load stall*/
+ bcs,a,pn %icc, 1f /* CTI */
+ sub %sum, 1, %sum /* IEU0 */
+1: srl %sum, 0, %sum /* IEU0 Group */
+ clr %g5 /* IEU1 */
+ brz,pn %o4, 3f /* CTI+IEU1 Group */
+ sub %g1, %o4, %g1 /* IEU0 */
+ ldd [%src], %f0 /* Load */
+ clr %o4 /* IEU0 Group */
+ andcc %dst, 8, %g0 /* IEU1 */
+ be,pn %icc, 1f /* CTI */
+ ldd [%src + 8], %f2 /* Load Group */
+ add %src, 8, %src /* IEU0 */
+ sub %len, 8, %len /* IEU1 */
+ fpadd32 %f0, %f48, %f50 /* FPA */
+ addcc %dst, 8, %dst /* IEU1 Group */
+ faligndata %f0, %f2, %f16 /* FPA */
+ fcmpgt32 %f48, %f50, %o4 /* FPM Group */
+ fmovd %f2, %f0 /* FPA Group */
+ ldd [%src + 8], %f2 /* Load */
+ stda %f16, [%dst - 8] %asi /* Store */
+ fmovd %f50, %f48 /* FPA */
+1: andcc %g1, 0x10, %g0 /* IEU1 Group */
+ be,pn %icc, 1f /* CTI */
+ and %g1, 0x20, %g1 /* IEU0 */
+ fpadd32 %f0, %f48, %f50 /* FPA */
+ ldd [%src + 16], %f4 /* Load Group */
+ add %src, 16, %src /* IEU0 */
+ add %dst, 16, %dst /* IEU1 */
+ faligndata %f0, %f2, %f16 /* FPA */
+ fcmpgt32 %f48, %f50, %g5 /* FPM Group */
+ sub %len, 16, %len /* IEU0 */
+ inc %o4 /* IEU1 */
+ stda %f16, [%dst - 16] %asi /* Store Group */
+ fpadd32 %f2, %f50, %f48 /* FPA */
+ srl %o4, 1, %o5 /* IEU0 */
+ faligndata %f2, %f4, %f18 /* FPA Group */
+ stda %f18, [%dst - 8] %asi /* Store */
+ fcmpgt32 %f50, %f48, %o4 /* FPM Group */
+ add %o5, %sum, %sum /* IEU0 */
+ ldd [%src + 8], %f2 /* Load */
+ fmovd %f4, %f0 /* FPA */
+1: brz,a,pn %g1, 4f /* CTI+IEU1 Group */
+ rd %asi, %g2 /* LSU Group + 4 bubbles*/
+ inc %g5 /* IEU0 */
+ fpadd32 %f0, %f48, %f50 /* FPA */
+ ldd [%src + 16], %f4 /* Load Group */
+ srl %g5, 1, %g5 /* IEU0 */
+ add %dst, 32, %dst /* IEU1 */
+ faligndata %f0, %f2, %f16 /* FPA */
+ fcmpgt32 %f48, %f50, %o5 /* FPM Group */
+ inc %o4 /* IEU0 */
+ ldd [%src + 24], %f6 /* Load */
+ srl %o4, 1, %o4 /* IEU0 Group */
+ add %g5, %sum, %sum /* IEU1 */
+ ldd [%src + 32], %f8 /* Load */
+ fpadd32 %f2, %f50, %f48 /* FPA */
+ faligndata %f2, %f4, %f18 /* FPA Group */
+ sub %len, 32, %len /* IEU0 */
+ stda %f16, [%dst - 32] %asi /* Store */
+ fcmpgt32 %f50, %f48, %g3 /* FPM Group */
+ inc %o5 /* IEU0 */
+ add %o4, %sum, %sum /* IEU1 */
+ fpadd32 %f4, %f48, %f50 /* FPA */
+ faligndata %f4, %f6, %f20 /* FPA Group */
+ srl %o5, 1, %o5 /* IEU0 */
+ fcmpgt32 %f48, %f50, %g5 /* FPM Group */
+ add %o5, %sum, %sum /* IEU0 */
+ stda %f18, [%dst - 24] %asi /* Store */
+ fpadd32 %f6, %f50, %f48 /* FPA */
+ inc %g3 /* IEU0 Group */
+ stda %f20, [%dst - 16] %asi /* Store */
+ add %src, 32, %src /* IEU1 */
+ faligndata %f6, %f8, %f22 /* FPA */
+ fcmpgt32 %f50, %f48, %o4 /* FPM Group */
+ srl %g3, 1, %g3 /* IEU0 */
+ stda %f22, [%dst - 8] %asi /* Store */
+ add %g3, %sum, %sum /* IEU0 Group */
+3: rd %asi, %g2 /* LSU Group + 4 bubbles*/
#ifdef __KERNEL__
-4: sethi %hi(vis0s), %g7 /* IEU0 Group */
- or %g2, ASI_BLK_OR, %g2 /* IEU1 */
+4: sethi %hi(vis0s), %g7 /* IEU0 Group */
+ or %g2, ASI_BLK_OR, %g2 /* IEU1 */
#else
-4: rd %pc, %g7 /* LSU Group + 4 bubbles */
+4: rd %pc, %g7 /* LSU Group + 4 bubbles*/
#endif
- inc %g5 /* IEU0 Group */
- and %src, 0x38, %g3 /* IEU1 */
- membar #StoreLoad /* LSU Group */
- srl %g5, 1, %g5 /* IEU0 */
- inc %o4 /* IEU1 */
- sll %g3, 8, %g3 /* IEU0 Group */
- sub %len, 0xc0, %len /* IEU1 */
- addcc %g5, %sum, %sum /* IEU1 Group */
- srl %o4, 1, %o4 /* IEU0 */
- add %g7, %g3, %g7 /* IEU0 Group */
- add %o4, %sum, %sum /* IEU1 */
+ inc %g5 /* IEU0 Group */
+ and %src, 0x38, %g3 /* IEU1 */
+ membar #StoreLoad /* LSU Group */
+ srl %g5, 1, %g5 /* IEU0 */
+ inc %o4 /* IEU1 */
+ sll %g3, 8, %g3 /* IEU0 Group */
+ sub %len, 0xc0, %len /* IEU1 */
+ addcc %g5, %sum, %sum /* IEU1 Group */
+ srl %o4, 1, %o4 /* IEU0 */
+ add %g7, %g3, %g7 /* IEU0 Group */
+ add %o4, %sum, %sum /* IEU1 */
#ifdef __KERNEL__
- jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */
+ jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */
#else
- jmpl %g7 + (vis0s - 4b), %g0 /* CTI+IEU1 Group */
+ jmpl %g7 + (vis0s - 4b), %g0 /* CTI+IEU1 Group */
#endif
- fzero %f32 /* FPA */
+ fzero %f32 /* FPA */
.align 2048
-vis0s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- ldda [%src] ASI_BLK_P, %f0 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- ldda [%src] ASI_BLK_P, %f16 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- fmovd %f48, %f62 /* FPA Group f0 available */
- faligndata %f0, %f2, %f48 /* FPA Group f2 available */
- fcmpgt32 %f32, %f2, %x1 /* FPM Group f4 available */
- fpadd32 %f0, %f62, %f0 /* FPA */
- fcmpgt32 %f32, %f4, %x2 /* FPM Group f6 available */
- faligndata %f2, %f4, %f50 /* FPA */
- fcmpgt32 %f62, %f0, %x3 /* FPM Group f8 available */
- faligndata %f4, %f6, %f52 /* FPA */
- fcmpgt32 %f32, %f6, %x4 /* FPM Group f10 available */
- inc %x1 /* IEU0 */
- faligndata %f6, %f8, %f54 /* FPA */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group f12 available */
- srl %x1, 1, %x1 /* IEU0 */
- inc %x2 /* IEU1 */
- faligndata %f8, %f10, %f56 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group f14 available */
- srl %x2, 1, %x2 /* IEU0 */
- add %sum, %x1, %sum /* IEU1 */
- faligndata %f10, %f12, %f58 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- inc %x3 /* IEU0 */
- add %sum, %x2, %sum /* IEU1 */
- faligndata %f12, %f14, %f60 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- srl %x3, 1, %x3 /* IEU0 */
- inc %x4 /* IEU1 */
- fmovd %f14, %f62 /* FPA */
- srl %x4, 1, %x4 /* IEU0 Group */
- add %sum, %x3, %sum /* IEU1 */
+vis0s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ ldda [%src] ASI_BLK_P, %f0 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ ldda [%src] ASI_BLK_P, %f16 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ fmovd %f48, %f62 /* FPA Group f0 available*/
+ faligndata %f0, %f2, %f48 /* FPA Group f2 available*/
+ fcmpgt32 %f32, %f2, %x1 /* FPM Group f4 available*/
+ fpadd32 %f0, %f62, %f0 /* FPA */
+ fcmpgt32 %f32, %f4, %x2 /* FPM Group f6 available*/
+ faligndata %f2, %f4, %f50 /* FPA */
+ fcmpgt32 %f62, %f0, %x3 /* FPM Group f8 available*/
+ faligndata %f4, %f6, %f52 /* FPA */
+ fcmpgt32 %f32, %f6, %x4 /* FPM Group f10 available*/
+ inc %x1 /* IEU0 */
+ faligndata %f6, %f8, %f54 /* FPA */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group f12 available*/
+ srl %x1, 1, %x1 /* IEU0 */
+ inc %x2 /* IEU1 */
+ faligndata %f8, %f10, %f56 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group f14 available*/
+ srl %x2, 1, %x2 /* IEU0 */
+ add %sum, %x1, %sum /* IEU1 */
+ faligndata %f10, %f12, %f58 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ inc %x3 /* IEU0 */
+ add %sum, %x2, %sum /* IEU1 */
+ faligndata %f12, %f14, %f60 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ srl %x3, 1, %x3 /* IEU0 */
+ inc %x4 /* IEU1 */
+ fmovd %f14, %f62 /* FPA */
+ srl %x4, 1, %x4 /* IEU0 Group */
+ add %sum, %x3, %sum /* IEU1 */
vis0: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f48,f50,f52,f54,f56,f58,f60,f62,f62,
,LDBLK(f32), STBLK,,,,,,,,
@@ -468,36 +470,36 @@ vis0e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, STBLK_XORASI(x1,x2),ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48),
,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e1)
.align 2048
-vis1s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- sub %src, 8, %src /* IEU0 Group */
- ldda [%src] ASI_BLK_P, %f0 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- ldda [%src] ASI_BLK_P, %f16 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- fmovd %f0, %f58 /* FPA Group */
- fmovd %f48, %f0 /* FPA Group */
- fcmpgt32 %f32, %f2, %x2 /* FPM Group */
- faligndata %f2, %f4, %f48 /* FPA */
- fcmpgt32 %f32, %f4, %x3 /* FPM Group */
- faligndata %f4, %f6, %f50 /* FPA */
- fcmpgt32 %f32, %f6, %x4 /* FPM Group */
- faligndata %f6, %f8, %f52 /* FPA */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group */
- inc %x2 /* IEU1 */
- faligndata %f8, %f10, %f54 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- srl %x2, 1, %x2 /* IEU0 */
- faligndata %f10, %f12, %f56 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- inc %x3 /* IEU0 */
- add %sum, %x2, %sum /* IEU1 */
- faligndata %f12, %f14, %f58 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- srl %x3, 1, %x3 /* IEU0 */
- inc %x4 /* IEU1 */
- fmovd %f14, %f60 /* FPA */
- srl %x4, 1, %x4 /* IEU0 Group */
- add %sum, %x3, %sum /* IEU1 */
+vis1s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ sub %src, 8, %src /* IEU0 Group */
+ ldda [%src] ASI_BLK_P, %f0 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ ldda [%src] ASI_BLK_P, %f16 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ fmovd %f0, %f58 /* FPA Group */
+ fmovd %f48, %f0 /* FPA Group */
+ fcmpgt32 %f32, %f2, %x2 /* FPM Group */
+ faligndata %f2, %f4, %f48 /* FPA */
+ fcmpgt32 %f32, %f4, %x3 /* FPM Group */
+ faligndata %f4, %f6, %f50 /* FPA */
+ fcmpgt32 %f32, %f6, %x4 /* FPM Group */
+ faligndata %f6, %f8, %f52 /* FPA */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group */
+ inc %x2 /* IEU1 */
+ faligndata %f8, %f10, %f54 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ srl %x2, 1, %x2 /* IEU0 */
+ faligndata %f10, %f12, %f56 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ inc %x3 /* IEU0 */
+ add %sum, %x2, %sum /* IEU1 */
+ faligndata %f12, %f14, %f58 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ srl %x3, 1, %x3 /* IEU0 */
+ inc %x4 /* IEU1 */
+ fmovd %f14, %f60 /* FPA */
+ srl %x4, 1, %x4 /* IEU0 Group */
+ add %sum, %x3, %sum /* IEU1 */
vis1: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f62,f48,f50,f52,f54,f56,f58,f60,f60,
,LDBLK(f32), ,STBLK,,,,,,,
@@ -523,33 +525,33 @@ vis1e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,STBLK_XORASI(x1,x2),ST(f48,0),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),
,add %dst, 48, %dst; add %len, 192 - 7*8, %len; ba,pt %icc, e1)
.align 2048
-vis2s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- sub %src, 16, %src /* IEU0 Group */
- ldda [%src] ASI_BLK_P, %f0 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- ldda [%src] ASI_BLK_P, %f16 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- fmovd %f0, %f56 /* FPA Group */
- fmovd %f48, %f0 /* FPA Group */
- sub %dst, 64, %dst /* IEU0 */
- fpsub32 %f2, %f2, %f2 /* FPA Group */
- fcmpgt32 %f32, %f4, %x3 /* FPM Group */
- faligndata %f4, %f6, %f48 /* FPA */
- fcmpgt32 %f32, %f6, %x4 /* FPM Group */
- faligndata %f6, %f8, %f50 /* FPA */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group */
- faligndata %f8, %f10, %f52 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- faligndata %f10, %f12, %f54 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- inc %x3 /* IEU0 */
- faligndata %f12, %f14, %f56 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- srl %x3, 1, %x3 /* IEU0 */
- inc %x4 /* IEU1 */
- fmovd %f14, %f58 /* FPA */
- srl %x4, 1, %x4 /* IEU0 Group */
- add %sum, %x3, %sum /* IEU1 */
+vis2s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ sub %src, 16, %src /* IEU0 Group */
+ ldda [%src] ASI_BLK_P, %f0 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ ldda [%src] ASI_BLK_P, %f16 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ fmovd %f0, %f56 /* FPA Group */
+ fmovd %f48, %f0 /* FPA Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fpsub32 %f2, %f2, %f2 /* FPA Group */
+ fcmpgt32 %f32, %f4, %x3 /* FPM Group */
+ faligndata %f4, %f6, %f48 /* FPA */
+ fcmpgt32 %f32, %f6, %x4 /* FPM Group */
+ faligndata %f6, %f8, %f50 /* FPA */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group */
+ faligndata %f8, %f10, %f52 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ faligndata %f10, %f12, %f54 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ inc %x3 /* IEU0 */
+ faligndata %f12, %f14, %f56 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ srl %x3, 1, %x3 /* IEU0 */
+ inc %x4 /* IEU1 */
+ fmovd %f14, %f58 /* FPA */
+ srl %x4, 1, %x4 /* IEU0 Group */
+ add %sum, %x3, %sum /* IEU1 */
vis2: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f60,f62,f48,f50,f52,f54,f56,f58,f58,
,LDBLK(f32), ,,STBLK,,,,,,
@@ -575,29 +577,29 @@ vis2e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,STBLK_XORASI(x2,x3),ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),ST(f56,96),
,add %dst, 104, %dst; add %len, 192 - 6*8, %len; ba,pt %icc, e1)
.align 2048
-vis3s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- sub %src, 24, %src /* IEU0 Group */
- ldda [%src] ASI_BLK_P, %f0 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- ldda [%src] ASI_BLK_P, %f16 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- fmovd %f0, %f54 /* FPA Group */
- fmovd %f48, %f0 /* FPA Group */
- sub %dst, 64, %dst /* IEU0 */
- fpsub32 %f2, %f2, %f2 /* FPA Group */
- fpsub32 %f4, %f4, %f4 /* FPA Group */
- fcmpgt32 %f32, %f6, %x4 /* FPM Group */
- faligndata %f6, %f8, %f48 /* FPA */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group */
- faligndata %f8, %f10, %f50 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- faligndata %f10, %f12, %f52 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- faligndata %f12, %f14, %f54 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- fmovd %f14, %f56 /* FPA */
- inc %x4 /* IEU0 */
- srl %x4, 1, %x4 /* IEU0 Group */
+vis3s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ sub %src, 24, %src /* IEU0 Group */
+ ldda [%src] ASI_BLK_P, %f0 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ ldda [%src] ASI_BLK_P, %f16 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ fmovd %f0, %f54 /* FPA Group */
+ fmovd %f48, %f0 /* FPA Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fpsub32 %f2, %f2, %f2 /* FPA Group */
+ fpsub32 %f4, %f4, %f4 /* FPA Group */
+ fcmpgt32 %f32, %f6, %x4 /* FPM Group */
+ faligndata %f6, %f8, %f48 /* FPA */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group */
+ faligndata %f8, %f10, %f50 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ faligndata %f10, %f12, %f52 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ faligndata %f12, %f14, %f54 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ fmovd %f14, %f56 /* FPA */
+ inc %x4 /* IEU0 */
+ srl %x4, 1, %x4 /* IEU0 Group */
vis3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f58,f60,f62,f48,f50,f52,f54,f56,f56,
,LDBLK(f32), ,,,STBLK,,,,,
@@ -623,27 +625,27 @@ vis3e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,,STBLK_XORASI(x3,x4),ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),
,add %dst, 96, %dst; add %len, 192 - 5*8, %len; ba,pt %icc, e1)
.align 2048
-vis4s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- sub %src, 32, %src /* IEU0 Group */
- ldda [%src] ASI_BLK_P, %f0 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- ldda [%src] ASI_BLK_P, %f16 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- fmovd %f0, %f52 /* FPA Group */
- fmovd %f48, %f0 /* FPA Group */
- sub %dst, 64, %dst /* IEU0 */
- fpsub32 %f2, %f2, %f2 /* FPA Group */
- fpsub32 %f4, %f4, %f4 /* FPA Group */
- fpsub32 %f6, %f6, %f6 /* FPA Group */
- clr %x4 /* IEU0 */
- fcmpgt32 %f32, %f8, %x5 /* FPM Group */
- faligndata %f8, %f10, %f48 /* FPA */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- faligndata %f10, %f12, %f50 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- faligndata %f12, %f14, %f52 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- fmovd %f14, %f54 /* FPA */
+vis4s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ sub %src, 32, %src /* IEU0 Group */
+ ldda [%src] ASI_BLK_P, %f0 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ ldda [%src] ASI_BLK_P, %f16 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ fmovd %f0, %f52 /* FPA Group */
+ fmovd %f48, %f0 /* FPA Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fpsub32 %f2, %f2, %f2 /* FPA Group */
+ fpsub32 %f4, %f4, %f4 /* FPA Group */
+ fpsub32 %f6, %f6, %f6 /* FPA Group */
+ clr %x4 /* IEU0 */
+ fcmpgt32 %f32, %f8, %x5 /* FPM Group */
+ faligndata %f8, %f10, %f48 /* FPA */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ faligndata %f10, %f12, %f50 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ faligndata %f12, %f14, %f52 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ fmovd %f14, %f54 /* FPA */
vis4: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f56,f58,f60,f62,f48,f50,f52,f54,f54,
,LDBLK(f32), ,,,,STBLK,,,,
@@ -669,27 +671,27 @@ vis4e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,,,STBLK_XORASI(x4,x5),ST(f48,64),ST(f50,72),ST(f52,80),
,add %dst, 88, %dst; add %len, 192 - 4*8, %len; ba,pt %icc, e1)
.align 2048
-vis5s: ldd [%src+0], %f10 /* Load Group */
- ldd [%src+8], %f12 /* Load Group */
- ldd [%src+16], %f14 /* Load Group */
- add %src, 24, %src /* IEU0 Group */
- wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- ldda [%src] ASI_BLK_P, %f16 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- fmovd %f48, %f0 /* FPA Group */
- fmuld %f32, %f32, %f2 /* FPM */
- clr %x4 /* IEU0 */
- faddd %f32, %f32, %f4 /* FPA Group */
- fmuld %f32, %f32, %f6 /* FPM */
- clr %x5 /* IEU0 */
- faddd %f32, %f32, %f8 /* FPA Group */
- fcmpgt32 %f32, %f10, %x6 /* FPM Group */
- sub %dst, 64, %dst /* IEU0 */
- faligndata %f10, %f12, %f48 /* FPA */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- faligndata %f12, %f14, %f50 /* FPA */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- fmovd %f14, %f52 /* FPA */
+vis5s: ldd [%src+0], %f10 /* Load Group */
+ ldd [%src+8], %f12 /* Load Group */
+ ldd [%src+16], %f14 /* Load Group */
+ add %src, 24, %src /* IEU0 Group */
+ wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ ldda [%src] ASI_BLK_P, %f16 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ fmovd %f48, %f0 /* FPA Group */
+ fmuld %f32, %f32, %f2 /* FPM */
+ clr %x4 /* IEU0 */
+ faddd %f32, %f32, %f4 /* FPA Group */
+ fmuld %f32, %f32, %f6 /* FPM */
+ clr %x5 /* IEU0 */
+ faddd %f32, %f32, %f8 /* FPA Group */
+ fcmpgt32 %f32, %f10, %x6 /* FPM Group */
+ sub %dst, 64, %dst /* IEU0 */
+ faligndata %f10, %f12, %f48 /* FPA */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ faligndata %f12, %f14, %f50 /* FPA */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ fmovd %f14, %f52 /* FPA */
vis5: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f54,f56,f58,f60,f62,f48,f50,f52,f52,
,LDBLK(f32), ,,,,,STBLK,,,
@@ -715,26 +717,26 @@ vis5e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,,,,STBLK_XORASI(x5,x6),ST(f48,64),ST(f50,72),
,add %dst, 80, %dst; add %len, 192 - 3*8, %len; ba,pt %icc, e1)
.align 2048
-vis6s: ldd [%src+0], %f12 /* Load Group */
- ldd [%src+8], %f14 /* Load Group */
- add %src, 16, %src /* IEU0 Group */
- wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- ldda [%src] ASI_BLK_P, %f16 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- fmovd %f48, %f0 /* FPA Group */
- fmuld %f32, %f32, %f2 /* FPM */
- clr %x4 /* IEU0 */
- faddd %f32, %f32, %f4 /* FPA Group */
- fmuld %f32, %f32, %f6 /* FPM */
- clr %x5 /* IEU0 */
- faddd %f32, %f32, %f8 /* FPA Group */
- fmuld %f32, %f32, %f10 /* FPM */
- clr %x6 /* IEU0 */
- fcmpgt32 %f32, %f12, %x7 /* FPM Group */
- sub %dst, 64, %dst /* IEU0 */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- faligndata %f12, %f14, %f48 /* FPA */
- fmovd %f14, %f50 /* FPA Group */
+vis6s: ldd [%src+0], %f12 /* Load Group */
+ ldd [%src+8], %f14 /* Load Group */
+ add %src, 16, %src /* IEU0 Group */
+ wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ ldda [%src] ASI_BLK_P, %f16 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ fmovd %f48, %f0 /* FPA Group */
+ fmuld %f32, %f32, %f2 /* FPM */
+ clr %x4 /* IEU0 */
+ faddd %f32, %f32, %f4 /* FPA Group */
+ fmuld %f32, %f32, %f6 /* FPM */
+ clr %x5 /* IEU0 */
+ faddd %f32, %f32, %f8 /* FPA Group */
+ fmuld %f32, %f32, %f10 /* FPM */
+ clr %x6 /* IEU0 */
+ fcmpgt32 %f32, %f12, %x7 /* FPM Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ faligndata %f12, %f14, %f48 /* FPA */
+ fmovd %f14, %f50 /* FPA Group */
vis6: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f52,f54,f56,f58,f60,f62,f48,f50,f50,
,LDBLK(f32), ,,,,,,STBLK,,
@@ -760,25 +762,25 @@ vis6e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
,SYNC, ,,,,,,STBLK_XORASI(x6,x7),ST(f48,64),
,add %dst, 72, %dst; add %len, 192 - 2*8, %len; ba,pt %icc, e1)
.align 2048
-vis7s: ldd [%src+0], %f14 /* Load Group */
- add %src, 8, %src /* IEU0 Group */
- wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
- ldda [%src] ASI_BLK_P, %f16 /* Load Group */
- add %src, 64, %src /* IEU0 Group */
- fmovd %f48, %f0 /* FPA Group */
- fmuld %f32, %f32, %f2 /* FPM */
- clr %x4 /* IEU0 */
- faddd %f32, %f32, %f4 /* FPA Group */
- fmuld %f32, %f32, %f6 /* FPM */
- clr %x5 /* IEU0 */
- faddd %f32, %f32, %f8 /* FPA Group */
- fmuld %f32, %f32, %f10 /* FPM */
- clr %x6 /* IEU0 */
- faddd %f32, %f32, %f12 /* FPA Group */
- clr %x7 /* IEU0 */
- fcmpgt32 %f32, %f14, %x8 /* FPM Group */
- sub %dst, 64, %dst /* IEU0 */
- fmovd %f14, %f48 /* FPA */
+vis7s: ldd [%src+0], %f14 /* Load Group */
+ add %src, 8, %src /* IEU0 Group */
+ wr %g2, ASI_BLK_XOR, %asi /* LSU Group */
+ ldda [%src] ASI_BLK_P, %f16 /* Load Group */
+ add %src, 64, %src /* IEU0 Group */
+ fmovd %f48, %f0 /* FPA Group */
+ fmuld %f32, %f32, %f2 /* FPM */
+ clr %x4 /* IEU0 */
+ faddd %f32, %f32, %f4 /* FPA Group */
+ fmuld %f32, %f32, %f6 /* FPM */
+ clr %x5 /* IEU0 */
+ faddd %f32, %f32, %f8 /* FPA Group */
+ fmuld %f32, %f32, %f10 /* FPM */
+ clr %x6 /* IEU0 */
+ faddd %f32, %f32, %f12 /* FPA Group */
+ clr %x7 /* IEU0 */
+ fcmpgt32 %f32, %f14, %x8 /* FPM Group */
+ sub %dst, 64, %dst /* IEU0 */
+ fmovd %f14, %f48 /* FPA */
vis7: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,
,f50,f52,f54,f56,f58,f60,f62,f48,f48,
,LDBLK(f32), ,,,,,,,STBLK,
@@ -806,104 +808,104 @@ vis7e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14
e1: END_THE_TRICK1( f0,f2,f4,f6,f8,f10,f12,f14,f16,f6)
e2: END_THE_TRICK1( f16,f18,f20,f22,f24,f26,f28,f30,f32,f6)
e3: END_THE_TRICK1( f32,f34,f36,f38,f40,f42,f44,f46,f0,f6)
-ett: rd %gsr, %x3 /* LSU Group+4bubbles */
- andcc %x3, 7, %x3 /* IEU1 Group */
- add %dst, 8, %dst /* IEU0 */
- bne,pn %icc, 1f /* CTI */
- fzero %f10 /* FPA */
- brz,a,pn %len, 2f /* CTI+IEU1 Group */
- stda %f6, [%dst - 8] %asi /* Store */
-1: cmp %len, 8 /* IEU1 */
- blu,pn %icc, 3f /* CTI */
- sub %src, 64, %src /* IEU0 Group */
-1: ldd [%src], %f2 /* Load Group */
- fpadd32 %f10, %f2, %f12 /* FPA Group+load stall */
- add %src, 8, %src /* IEU0 */
- add %dst, 8, %dst /* IEU1 */
- faligndata %f6, %f2, %f14 /* FPA Group */
- fcmpgt32 %f10, %f12, %x5 /* FPM Group */
- stda %f14, [%dst - 16] %asi /* Store */
- fmovd %f2, %f6 /* FPA */
- fmovd %f12, %f10 /* FPA Group */
- sub %len, 8, %len /* IEU1 */
- fzero %f16 /* FPA Group - FPU nop */
- fzero %f18 /* FPA Group - FPU nop */
- inc %x5 /* IEU0 */
- srl %x5, 1, %x5 /* IEU0 Group (regdep) */
- cmp %len, 8 /* IEU1 */
- bgeu,pt %icc, 1b /* CTI */
- add %x5, %sum, %sum /* IEU0 Group */
-3: brz,a,pt %x3, 2f /* CTI+IEU1 */
- stda %f6, [%dst - 8] %asi /* Store Group */
- sta %f7, [%dst - 8] %asi /* Store Group */
- sub %dst, 4, %dst /* IEU0 */
- add %len, 4, %len /* IEU1 */
+ett: rd %gsr, %x3 /* LSU Group+4bubbles */
+ andcc %x3, 7, %x3 /* IEU1 Group */
+ add %dst, 8, %dst /* IEU0 */
+ bne,pn %icc, 1f /* CTI */
+ fzero %f10 /* FPA */
+ brz,a,pn %len, 2f /* CTI+IEU1 Group */
+ stda %f6, [%dst - 8] %asi /* Store */
+1: cmp %len, 8 /* IEU1 */
+ blu,pn %icc, 3f /* CTI */
+ sub %src, 64, %src /* IEU0 Group */
+1: ldd [%src], %f2 /* Load Group */
+ fpadd32 %f10, %f2, %f12 /* FPA Group+load stall*/
+ add %src, 8, %src /* IEU0 */
+ add %dst, 8, %dst /* IEU1 */
+ faligndata %f6, %f2, %f14 /* FPA Group */
+ fcmpgt32 %f10, %f12, %x5 /* FPM Group */
+ stda %f14, [%dst - 16] %asi /* Store */
+ fmovd %f2, %f6 /* FPA */
+ fmovd %f12, %f10 /* FPA Group */
+ sub %len, 8, %len /* IEU1 */
+ fzero %f16 /* FPA Group - FPU nop */
+ fzero %f18 /* FPA Group - FPU nop */
+ inc %x5 /* IEU0 */
+ srl %x5, 1, %x5 /* IEU0 Group (regdep) */
+ cmp %len, 8 /* IEU1 */
+ bgeu,pt %icc, 1b /* CTI */
+ add %x5, %sum, %sum /* IEU0 Group */
+3: brz,a,pt %x3, 2f /* CTI+IEU1 */
+ stda %f6, [%dst - 8] %asi /* Store Group */
+ sta %f7, [%dst - 8] %asi /* Store Group */
+ sub %dst, 4, %dst /* IEU0 */
+ add %len, 4, %len /* IEU1 */
2:
#ifdef __KERNEL__
- sub %sp, 8, %sp /* IEU0 Group */
+ sub %sp, 8, %sp /* IEU0 Group */
#endif
END_THE_TRICK2( f48,f50,f52,f54,f56,f58,f60,f10,f12,f62)
- membar #Sync /* LSU Group */
+ membar #Sync /* LSU Group */
#ifdef __KERNEL__
VISExit
- add %sp, 8, %sp /* IEU0 Group */
+ add %sp, 8, %sp /* IEU0 Group */
#endif
-23: brnz,pn %len, 26f /* CTI+IEU1 Group */
-24: sllx %sum, 32, %g1 /* IEU0 */
-25: addcc %sum, %g1, %src /* IEU1 Group */
- srlx %src, 32, %src /* IEU0 Group (regdep) */
- bcs,a,pn %xcc, 1f /* CTI */
- add %src, 1, %src /* IEU1 */
+23: brnz,pn %len, 26f /* CTI+IEU1 Group */
+24: sllx %sum, 32, %g1 /* IEU0 */
+25: addcc %sum, %g1, %src /* IEU1 Group */
+ srlx %src, 32, %src /* IEU0 Group (regdep) */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %src, 1, %src /* IEU1 */
#ifndef __KERNEL__
-1: retl /* CTI Group brk forced */
- srl %src, 0, %src /* IEU0 */
+1: retl /* CTI Group brk forced*/
+ srl %src, 0, %src /* IEU0 */
#else
-1: sethi %uhi(PAGE_OFFSET), %g4 /* IEU0 Group */
- retl /* CTI Group brk forced */
- sllx %g4, 32, %g4 /* IEU0 */
+1: sethi %uhi(PAGE_OFFSET), %g4 /* IEU0 Group */
+ retl /* CTI Group brk forced*/
+ sllx %g4, 32, %g4 /* IEU0 */
#endif
-26: andcc %len, 8, %g0 /* IEU1 Group */
- be,pn %icc, 1f /* CTI */
- lduw [%src], %o4 /* Load */
- lduw [%src+4], %g2 /* Load Group */
- add %src, 8, %src /* IEU0 */
- add %dst, 8, %dst /* IEU1 */
- sllx %o4, 32, %g5 /* IEU0 Group */
- stwa %o4, [%dst - 8] %asi /* Store */
- or %g5, %g2, %g5 /* IEU0 Group */
- stwa %g2, [%dst - 4] %asi /* Store */
- addcc %g5, %sum, %sum /* IEU1 Group */
- bcs,a,pn %xcc, 1f /* CTI */
- add %sum, 1, %sum /* IEU0 */
-1: andcc %len, 4, %g0 /* IEU1 Group */
- be,a,pn %icc, 1f /* CTI */
- clr %g2 /* IEU0 */
- lduw [%src], %g7 /* Load */
- add %src, 4, %src /* IEU0 Group */
- add %dst, 4, %dst /* IEU1 */
- sllx %g7, 32, %g2 /* IEU0 Group */
- stwa %g7, [%dst - 4] %asi /* Store */
-1: andcc %len, 2, %g0 /* IEU1 */
- be,a,pn %icc, 1f /* CTI */
- clr %g3 /* IEU0 Group */
- lduh [%src], %g7 /* Load */
- add %src, 2, %src /* IEU1 */
- add %dst, 2, %dst /* IEU0 Group */
- sll %g7, 16, %g3 /* IEU0 Group */
- stha %g7, [%dst - 2] %asi /* Store */
-1: andcc %len, 1, %g0 /* IEU1 */
- be,a,pn %icc, 1f /* CTI */
- clr %o5 /* IEU0 Group */
- ldub [%src], %g7 /* Load */
- sll %g7, 8, %o5 /* IEU0 Group */
- stba %g7, [%dst] %asi /* Store */
-1: or %g2, %g3, %g3 /* IEU1 */
- or %o5, %g3, %g3 /* IEU0 Group (regdep) */
- addcc %g3, %sum, %sum /* IEU1 Group (regdep) */
- bcs,a,pn %xcc, 1f /* CTI */
- add %sum, 1, %sum /* IEU0 */
-1: ba,pt %xcc, 25b /* CTI Group */
- sllx %sum, 32, %g1 /* IEU0 */
+26: andcc %len, 8, %g0 /* IEU1 Group */
+ be,pn %icc, 1f /* CTI */
+ lduw [%src], %o4 /* Load */
+ lduw [%src+4], %g2 /* Load Group */
+ add %src, 8, %src /* IEU0 */
+ add %dst, 8, %dst /* IEU1 */
+ sllx %o4, 32, %g5 /* IEU0 Group */
+ stwa %o4, [%dst - 8] %asi /* Store */
+ or %g5, %g2, %g5 /* IEU0 Group */
+ stwa %g2, [%dst - 4] %asi /* Store */
+ addcc %g5, %sum, %sum /* IEU1 Group */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %sum, 1, %sum /* IEU0 */
+1: andcc %len, 4, %g0 /* IEU1 Group */
+ be,a,pn %icc, 1f /* CTI */
+ clr %g2 /* IEU0 */
+ lduw [%src], %g7 /* Load */
+ add %src, 4, %src /* IEU0 Group */
+ add %dst, 4, %dst /* IEU1 */
+ sllx %g7, 32, %g2 /* IEU0 Group */
+ stwa %g7, [%dst - 4] %asi /* Store */
+1: andcc %len, 2, %g0 /* IEU1 */
+ be,a,pn %icc, 1f /* CTI */
+ clr %g3 /* IEU0 Group */
+ lduh [%src], %g7 /* Load */
+ add %src, 2, %src /* IEU1 */
+ add %dst, 2, %dst /* IEU0 Group */
+ sll %g7, 16, %g3 /* IEU0 Group */
+ stha %g7, [%dst - 2] %asi /* Store */
+1: andcc %len, 1, %g0 /* IEU1 */
+ be,a,pn %icc, 1f /* CTI */
+ clr %o5 /* IEU0 Group */
+ ldub [%src], %g7 /* Load */
+ sll %g7, 8, %o5 /* IEU0 Group */
+ stba %g7, [%dst] %asi /* Store */
+1: or %g2, %g3, %g3 /* IEU1 */
+ or %o5, %g3, %g3 /* IEU0 Group (regdep) */
+ addcc %g3, %sum, %sum /* IEU1 Group (regdep) */
+ bcs,a,pn %xcc, 1f /* CTI */
+ add %sum, 1, %sum /* IEU0 */
+1: ba,pt %xcc, 25b /* CTI Group */
+ sllx %sum, 32, %g1 /* IEU0 */
#ifdef __KERNEL__
end: